Package CedarBackup2 :: Module filesystem
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.filesystem

   1  # -*- coding: iso-8859-1 -*- 
   2  # vim: set ft=python ts=3 sw=3 expandtab: 
   3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   4  # 
   5  #              C E D A R 
   6  #          S O L U T I O N S       "Software done right." 
   7  #           S O F T W A R E 
   8  # 
   9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  10  # 
  11  # Copyright (c) 2004-2008,2010 Kenneth J. Pronovici. 
  12  # All rights reserved. 
  13  # 
  14  # This program is free software; you can redistribute it and/or 
  15  # modify it under the terms of the GNU General Public License, 
  16  # Version 2, as published by the Free Software Foundation. 
  17  # 
  18  # This program is distributed in the hope that it will be useful, 
  19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
  21  # 
  22  # Copies of the GNU General Public License are available from 
  23  # the Free Software Foundation website, http://www.gnu.org/. 
  24  # 
  25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  26  # 
  27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
  28  # Language : Python (>= 2.5) 
  29  # Project  : Cedar Backup, release 2 
  30  # Revision : $Id: filesystem.py 1022 2011-10-11 23:27:49Z pronovic $ 
  31  # Purpose  : Provides filesystem-related objects. 
  32  # 
  33  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  34   
  35  ######################################################################## 
  36  # Module documentation 
  37  ######################################################################## 
  38   
  39  """ 
  40  Provides filesystem-related objects. 
  41  @sort: FilesystemList, BackupFileList, PurgeItemList 
  42  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  43  """ 
  44   
  45   
  46  ######################################################################## 
  47  # Imported modules 
  48  ######################################################################## 
  49   
  50  # System modules 
  51  import os 
  52  import re 
  53  import math 
  54  import logging 
  55  import tarfile 
  56   
  57  # Cedar Backup modules 
  58  from CedarBackup2.knapsack import firstFit, bestFit, worstFit, alternateFit 
  59  from CedarBackup2.util import AbsolutePathList, UnorderedList, RegexList 
  60  from CedarBackup2.util import removeKeys, displayBytes, calculateFileAge, encodePath, dereferenceLink 
  61   
  62   
  63  ######################################################################## 
  64  # Module-wide variables 
  65  ######################################################################## 
  66   
  67  logger = logging.getLogger("CedarBackup2.log.filesystem") 
68 69 70 ######################################################################## 71 # FilesystemList class definition 72 ######################################################################## 73 74 -class FilesystemList(list):
75 76 ###################### 77 # Class documentation 78 ###################### 79 80 """ 81 Represents a list of filesystem items. 82 83 This is a generic class that represents a list of filesystem items. Callers 84 can add individual files or directories to the list, or can recursively add 85 the contents of a directory. The class also allows for up-front exclusions 86 in several forms (all files, all directories, all items matching a pattern, 87 all items whose basename matches a pattern, or all directories containing a 88 specific "ignore file"). Symbolic links are typically backed up 89 non-recursively, i.e. the link to a directory is backed up, but not the 90 contents of that link (we don't want to deal with recursive loops, etc.). 91 92 The custom methods such as L{addFile} will only add items if they exist on 93 the filesystem and do not match any exclusions that are already in place. 94 However, since a FilesystemList is a subclass of Python's standard list 95 class, callers can also add items to the list in the usual way, using 96 methods like C{append()} or C{insert()}. No validations apply to items 97 added to the list in this way; however, many list-manipulation methods deal 98 "gracefully" with items that don't exist in the filesystem, often by 99 ignoring them. 100 101 Once a list has been created, callers can remove individual items from the 102 list using standard methods like C{pop()} or C{remove()} or they can use 103 custom methods to remove specific types of entries or entries which match a 104 particular pattern. 105 106 @note: Regular expression patterns that apply to paths are assumed to be 107 bounded at front and back by the beginning and end of the string, i.e. they 108 are treated as if they begin with C{^} and end with C{$}. This is true 109 whether we are matching a complete path or a basename. 110 111 @note: Some platforms, like Windows, do not support soft links. On those 112 platforms, the ignore-soft-links flag can be set, but it won't do any good 113 because the operating system never reports a file as a soft link. 114 115 @sort: __init__, addFile, addDir, addDirContents, removeFiles, removeDirs, 116 removeLinks, removeMatch, removeInvalid, normalize, 117 excludeFiles, excludeDirs, excludeLinks, excludePaths, 118 excludePatterns, excludeBasenamePatterns, ignoreFile 119 """ 120 121 122 ############## 123 # Constructor 124 ############## 125
126 - def __init__(self):
127 """Initializes a list with no configured exclusions.""" 128 list.__init__(self) 129 self._excludeFiles = False 130 self._excludeDirs = False 131 self._excludeLinks = False 132 self._excludePaths = None 133 self._excludePatterns = None 134 self._excludeBasenamePatterns = None 135 self._ignoreFile = None 136 self.excludeFiles = False 137 self.excludeLinks = False 138 self.excludeDirs = False 139 self.excludePaths = [] 140 self.excludePatterns = RegexList() 141 self.excludeBasenamePatterns = RegexList() 142 self.ignoreFile = None
143 144 145 ############# 146 # Properties 147 ############# 148
149 - def _setExcludeFiles(self, value):
150 """ 151 Property target used to set the exclude files flag. 152 No validations, but we normalize the value to C{True} or C{False}. 153 """ 154 if value: 155 self._excludeFiles = True 156 else: 157 self._excludeFiles = False
158
159 - def _getExcludeFiles(self):
160 """ 161 Property target used to get the exclude files flag. 162 """ 163 return self._excludeFiles
164
165 - def _setExcludeDirs(self, value):
166 """ 167 Property target used to set the exclude directories flag. 168 No validations, but we normalize the value to C{True} or C{False}. 169 """ 170 if value: 171 self._excludeDirs = True 172 else: 173 self._excludeDirs = False
174
175 - def _getExcludeDirs(self):
176 """ 177 Property target used to get the exclude directories flag. 178 """ 179 return self._excludeDirs
180 190 196
197 - def _setExcludePaths(self, value):
198 """ 199 Property target used to set the exclude paths list. 200 A C{None} value is converted to an empty list. 201 Elements do not have to exist on disk at the time of assignment. 202 @raise ValueError: If any list element is not an absolute path. 203 """ 204 self._excludePaths = AbsolutePathList() 205 if value is not None: 206 self._excludePaths.extend(value)
207
208 - def _getExcludePaths(self):
209 """ 210 Property target used to get the absolute exclude paths list. 211 """ 212 return self._excludePaths
213
214 - def _setExcludePatterns(self, value):
215 """ 216 Property target used to set the exclude patterns list. 217 A C{None} value is converted to an empty list. 218 """ 219 self._excludePatterns = RegexList() 220 if value is not None: 221 self._excludePatterns.extend(value)
222
223 - def _getExcludePatterns(self):
224 """ 225 Property target used to get the exclude patterns list. 226 """ 227 return self._excludePatterns
228
229 - def _setExcludeBasenamePatterns(self, value):
230 """ 231 Property target used to set the exclude basename patterns list. 232 A C{None} value is converted to an empty list. 233 """ 234 self._excludeBasenamePatterns = RegexList() 235 if value is not None: 236 self._excludeBasenamePatterns.extend(value)
237
239 """ 240 Property target used to get the exclude basename patterns list. 241 """ 242 return self._excludeBasenamePatterns
243
244 - def _setIgnoreFile(self, value):
245 """ 246 Property target used to set the ignore file. 247 The value must be a non-empty string if it is not C{None}. 248 @raise ValueError: If the value is an empty string. 249 """ 250 if value is not None: 251 if len(value) < 1: 252 raise ValueError("The ignore file must be a non-empty string.") 253 self._ignoreFile = value
254
255 - def _getIgnoreFile(self):
256 """ 257 Property target used to get the ignore file. 258 """ 259 return self._ignoreFile
260 261 excludeFiles = property(_getExcludeFiles, _setExcludeFiles, None, "Boolean indicating whether files should be excluded.") 262 excludeDirs = property(_getExcludeDirs, _setExcludeDirs, None, "Boolean indicating whether directories should be excluded.") 263 excludeLinks = property(_getExcludeLinks, _setExcludeLinks, None, "Boolean indicating whether soft links should be excluded.") 264 excludePaths = property(_getExcludePaths, _setExcludePaths, None, "List of absolute paths to be excluded.") 265 excludePatterns = property(_getExcludePatterns, _setExcludePatterns, None, 266 "List of regular expression patterns (matching complete path) to be excluded.") 267 excludeBasenamePatterns = property(_getExcludeBasenamePatterns, _setExcludeBasenamePatterns, 268 None, "List of regular expression patterns (matching basename) to be excluded.") 269 ignoreFile = property(_getIgnoreFile, _setIgnoreFile, None, "Name of file which will cause directory contents to be ignored.") 270 271 272 ############## 273 # Add methods 274 ############## 275
276 - def addFile(self, path):
277 """ 278 Adds a file to the list. 279 280 The path must exist and must be a file or a link to an existing file. It 281 will be added to the list subject to any exclusions that are in place. 282 283 @param path: File path to be added to the list 284 @type path: String representing a path on disk 285 286 @return: Number of items added to the list. 287 288 @raise ValueError: If path is not a file or does not exist. 289 @raise ValueError: If the path could not be encoded properly. 290 """ 291 path = encodePath(path) 292 if not os.path.exists(path) or not os.path.isfile(path): 293 logger.debug("Path [%s] is not a file or does not exist on disk." % path) 294 raise ValueError("Path is not a file or does not exist on disk.") 295 if self.excludeLinks and os.path.islink(path): 296 logger.debug("Path [%s] is excluded based on excludeLinks." % path) 297 return 0 298 if self.excludeFiles: 299 logger.debug("Path [%s] is excluded based on excludeFiles." % path) 300 return 0 301 if path in self.excludePaths: 302 logger.debug("Path [%s] is excluded based on excludePaths." % path) 303 return 0 304 for pattern in self.excludePatterns: 305 pattern = encodePath(pattern) # use same encoding as filenames 306 if re.compile(r"^%s$" % pattern).match(path): # safe to assume all are valid due to RegexList 307 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 308 return 0 309 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 310 pattern = encodePath(pattern) # use same encoding as filenames 311 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 312 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 313 return 0 314 self.append(path) 315 logger.debug("Added file to list: [%s]" % path) 316 return 1
317
318 - def addDir(self, path):
319 """ 320 Adds a directory to the list. 321 322 The path must exist and must be a directory or a link to an existing 323 directory. It will be added to the list subject to any exclusions that 324 are in place. The L{ignoreFile} does not apply to this method, only to 325 L{addDirContents}. 326 327 @param path: Directory path to be added to the list 328 @type path: String representing a path on disk 329 330 @return: Number of items added to the list. 331 332 @raise ValueError: If path is not a directory or does not exist. 333 @raise ValueError: If the path could not be encoded properly. 334 """ 335 path = encodePath(path) 336 path = normalizeDir(path) 337 if not os.path.exists(path) or not os.path.isdir(path): 338 logger.debug("Path [%s] is not a directory or does not exist on disk." % path) 339 raise ValueError("Path is not a directory or does not exist on disk.") 340 if self.excludeLinks and os.path.islink(path): 341 logger.debug("Path [%s] is excluded based on excludeLinks." % path) 342 return 0 343 if self.excludeDirs: 344 logger.debug("Path [%s] is excluded based on excludeDirs." % path) 345 return 0 346 if path in self.excludePaths: 347 logger.debug("Path [%s] is excluded based on excludePaths." % path) 348 return 0 349 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 350 pattern = encodePath(pattern) # use same encoding as filenames 351 if re.compile(r"^%s$" % pattern).match(path): 352 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 353 return 0 354 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 355 pattern = encodePath(pattern) # use same encoding as filenames 356 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 357 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 358 return 0 359 self.append(path) 360 logger.debug("Added directory to list: [%s]" % path) 361 return 1
362
363 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
364 """ 365 Adds the contents of a directory to the list. 366 367 The path must exist and must be a directory or a link to a directory. 368 The contents of the directory (as well as the directory path itself) will 369 be recursively added to the list, subject to any exclusions that are in 370 place. If you only want the directory and its immediate contents to be 371 added, then pass in C{recursive=False}. 372 373 @note: If a directory's absolute path matches an exclude pattern or path, 374 or if the directory contains the configured ignore file, then the 375 directory and all of its contents will be recursively excluded from the 376 list. 377 378 @note: If the passed-in directory happens to be a soft link, it will be 379 recursed. However, the linkDepth parameter controls whether any soft 380 links I{within} the directory will be recursed. The link depth is 381 maximum depth of the tree at which soft links should be followed. So, a 382 depth of 0 does not follow any soft links, a depth of 1 follows only 383 links within the passed-in directory, a depth of 2 follows the links at 384 the next level down, etc. 385 386 @note: Any invalid soft links (i.e. soft links that point to 387 non-existent items) will be silently ignored. 388 389 @note: The L{excludeDirs} flag only controls whether any given directory 390 path itself is added to the list once it has been discovered. It does 391 I{not} modify any behavior related to directory recursion. 392 393 @note: If you call this method I{on a link to a directory} that link will 394 never be dereferenced (it may, however, be followed). 395 396 @param path: Directory path whose contents should be added to the list 397 @type path: String representing a path on disk 398 399 @param recursive: Indicates whether directory contents should be added recursively. 400 @type recursive: Boolean value 401 402 @param addSelf: Indicates whether the directory itself should be added to the list. 403 @type addSelf: Boolean value 404 405 @param linkDepth: Maximum depth of the tree at which soft links should be followed 406 @type linkDepth: Integer value, where zero means not to follow any soft links 407 408 @param dereference: Indicates whether soft links, if followed, should be dereferenced 409 @type dereference: Boolean value 410 411 @return: Number of items recursively added to the list 412 413 @raise ValueError: If path is not a directory or does not exist. 414 @raise ValueError: If the path could not be encoded properly. 415 """ 416 path = encodePath(path) 417 path = normalizeDir(path) 418 return self._addDirContentsInternal(path, addSelf, recursive, linkDepth, dereference)
419
420 - def _addDirContentsInternal(self, path, includePath=True, recursive=True, linkDepth=0, dereference=False):
421 """ 422 Internal implementation of C{addDirContents}. 423 424 This internal implementation exists due to some refactoring. Basically, 425 some subclasses have a need to add the contents of a directory, but not 426 the directory itself. This is different than the standard C{FilesystemList} 427 behavior and actually ends up making a special case out of the first 428 call in the recursive chain. Since I don't want to expose the modified 429 interface, C{addDirContents} ends up being wholly implemented in terms 430 of this method. 431 432 The linkDepth parameter controls whether soft links are followed when we 433 are adding the contents recursively. Any recursive calls reduce the 434 value by one. If the value zero or less, then soft links will just be 435 added as directories, but will not be followed. This means that links 436 are followed to a I{constant depth} starting from the top-most directory. 437 438 There is one difference between soft links and directories: soft links 439 that are added recursively are not placed into the list explicitly. This 440 is because if we do add the links recursively, the resulting tar file 441 gets a little confused (it has a link and a directory with the same 442 name). 443 444 @note: If you call this method I{on a link to a directory} that link will 445 never be dereferenced (it may, however, be followed). 446 447 @param path: Directory path whose contents should be added to the list. 448 @param includePath: Indicates whether to include the path as well as contents. 449 @param recursive: Indicates whether directory contents should be added recursively. 450 @param linkDepth: Depth of soft links that should be followed 451 @param dereference: Indicates whether soft links, if followed, should be dereferenced 452 453 @return: Number of items recursively added to the list 454 455 @raise ValueError: If path is not a directory or does not exist. 456 """ 457 added = 0 458 if not os.path.exists(path) or not os.path.isdir(path): 459 logger.debug("Path [%s] is not a directory or does not exist on disk." % path) 460 raise ValueError("Path is not a directory or does not exist on disk.") 461 if path in self.excludePaths: 462 logger.debug("Path [%s] is excluded based on excludePaths." % path) 463 return added 464 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 465 pattern = encodePath(pattern) # use same encoding as filenames 466 if re.compile(r"^%s$" % pattern).match(path): 467 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 468 return added 469 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 470 pattern = encodePath(pattern) # use same encoding as filenames 471 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 472 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 473 return added 474 if self.ignoreFile is not None and os.path.exists(os.path.join(path, self.ignoreFile)): 475 logger.debug("Path [%s] is excluded based on ignore file." % path) 476 return added 477 if includePath: 478 added += self.addDir(path) # could actually be excluded by addDir, yet 479 for entry in os.listdir(path): 480 entrypath = os.path.join(path, entry) 481 if os.path.isfile(entrypath): 482 if linkDepth > 0 and dereference: 483 derefpath = dereferenceLink(entrypath) 484 if derefpath != entrypath: 485 added += self.addFile(derefpath) 486 added += self.addFile(entrypath) 487 elif os.path.isdir(entrypath): 488 if os.path.islink(entrypath): 489 if recursive: 490 if linkDepth > 0: 491 newDepth = linkDepth - 1 492 if dereference: 493 derefpath = dereferenceLink(entrypath) 494 if derefpath != entrypath: 495 added += self._addDirContentsInternal(derefpath, True, recursive, newDepth, dereference) 496 added += self.addDir(entrypath) 497 else: 498 added += self._addDirContentsInternal(entrypath, False, recursive, newDepth, dereference) 499 else: 500 added += self.addDir(entrypath) 501 else: 502 added += self.addDir(entrypath) 503 else: 504 if recursive: 505 newDepth = linkDepth - 1 506 added += self._addDirContentsInternal(entrypath, True, recursive, newDepth, dereference) 507 else: 508 added += self.addDir(entrypath) 509 return added
510 511 512 ################# 513 # Remove methods 514 ################# 515
516 - def removeFiles(self, pattern=None):
517 """ 518 Removes file entries from the list. 519 520 If C{pattern} is not passed in or is C{None}, then all file entries will 521 be removed from the list. Otherwise, only those file entries matching 522 the pattern will be removed. Any entry which does not exist on disk 523 will be ignored (use L{removeInvalid} to purge those entries). 524 525 This method might be fairly slow for large lists, since it must check the 526 type of each item in the list. If you know ahead of time that you want 527 to exclude all files, then you will be better off setting L{excludeFiles} 528 to C{True} before adding items to the list. 529 530 @param pattern: Regular expression pattern representing entries to remove 531 532 @return: Number of entries removed 533 @raise ValueError: If the passed-in pattern is not a valid regular expression. 534 """ 535 removed = 0 536 if pattern is None: 537 for entry in self[:]: 538 if os.path.exists(entry) and os.path.isfile(entry): 539 self.remove(entry) 540 logger.debug("Removed path [%s] from list." % entry) 541 removed += 1 542 else: 543 try: 544 pattern = encodePath(pattern) # use same encoding as filenames 545 compiled = re.compile(pattern) 546 except re.error: 547 raise ValueError("Pattern is not a valid regular expression.") 548 for entry in self[:]: 549 if os.path.exists(entry) and os.path.isfile(entry): 550 if compiled.match(entry): 551 self.remove(entry) 552 logger.debug("Removed path [%s] from list." % entry) 553 removed += 1 554 logger.debug("Removed a total of %d entries." % removed) 555 return removed
556
557 - def removeDirs(self, pattern=None):
558 """ 559 Removes directory entries from the list. 560 561 If C{pattern} is not passed in or is C{None}, then all directory entries 562 will be removed from the list. Otherwise, only those directory entries 563 matching the pattern will be removed. Any entry which does not exist on 564 disk will be ignored (use L{removeInvalid} to purge those entries). 565 566 This method might be fairly slow for large lists, since it must check the 567 type of each item in the list. If you know ahead of time that you want 568 to exclude all directories, then you will be better off setting 569 L{excludeDirs} to C{True} before adding items to the list (note that this 570 will not prevent you from recursively adding the I{contents} of 571 directories). 572 573 @param pattern: Regular expression pattern representing entries to remove 574 575 @return: Number of entries removed 576 @raise ValueError: If the passed-in pattern is not a valid regular expression. 577 """ 578 removed = 0 579 if pattern is None: 580 for entry in self[:]: 581 if os.path.exists(entry) and os.path.isdir(entry): 582 self.remove(entry) 583 logger.debug("Removed path [%s] from list." % entry) 584 removed += 1 585 else: 586 try: 587 pattern = encodePath(pattern) # use same encoding as filenames 588 compiled = re.compile(pattern) 589 except re.error: 590 raise ValueError("Pattern is not a valid regular expression.") 591 for entry in self[:]: 592 if os.path.exists(entry) and os.path.isdir(entry): 593 if compiled.match(entry): 594 self.remove(entry) 595 logger.debug("Removed path [%s] from list based on pattern [%s]." % (entry, pattern)) 596 removed += 1 597 logger.debug("Removed a total of %d entries." % removed) 598 return removed
599 640
641 - def removeMatch(self, pattern):
642 """ 643 Removes from the list all entries matching a pattern. 644 645 This method removes from the list all entries which match the passed in 646 C{pattern}. Since there is no need to check the type of each entry, it 647 is faster to call this method than to call the L{removeFiles}, 648 L{removeDirs} or L{removeLinks} methods individually. If you know which 649 patterns you will want to remove ahead of time, you may be better off 650 setting L{excludePatterns} or L{excludeBasenamePatterns} before adding 651 items to the list. 652 653 @note: Unlike when using the exclude lists, the pattern here is I{not} 654 bounded at the front and the back of the string. You can use any pattern 655 you want. 656 657 @param pattern: Regular expression pattern representing entries to remove 658 659 @return: Number of entries removed. 660 @raise ValueError: If the passed-in pattern is not a valid regular expression. 661 """ 662 try: 663 pattern = encodePath(pattern) # use same encoding as filenames 664 compiled = re.compile(pattern) 665 except re.error: 666 raise ValueError("Pattern is not a valid regular expression.") 667 removed = 0 668 for entry in self[:]: 669 if compiled.match(entry): 670 self.remove(entry) 671 logger.debug("Removed path [%s] from list based on pattern [%s]." % (entry, pattern)) 672 removed += 1 673 logger.debug("Removed a total of %d entries." % removed) 674 return removed
675
676 - def removeInvalid(self):
677 """ 678 Removes from the list all entries that do not exist on disk. 679 680 This method removes from the list all entries which do not currently 681 exist on disk in some form. No attention is paid to whether the entries 682 are files or directories. 683 684 @return: Number of entries removed. 685 """ 686 removed = 0 687 for entry in self[:]: 688 if not os.path.exists(entry): 689 self.remove(entry) 690 logger.debug("Removed path [%s] from list." % entry) 691 removed += 1 692 logger.debug("Removed a total of %d entries." % removed) 693 return removed
694 695 696 ################## 697 # Utility methods 698 ################## 699
700 - def normalize(self):
701 """Normalizes the list, ensuring that each entry is unique.""" 702 orig = len(self) 703 self.sort() 704 dups = filter(lambda x, self=self: self[x] == self[x+1], range(0, len(self) - 1)) 705 items = map(lambda x, self=self: self[x], dups) 706 map(self.remove, items) 707 new = len(self) 708 logger.debug("Completed normalizing list; removed %d items (%d originally, %d now)." % (new-orig, orig, new))
709
710 - def verify(self):
711 """ 712 Verifies that all entries in the list exist on disk. 713 @return: C{True} if all entries exist, C{False} otherwise. 714 """ 715 for entry in self: 716 if not os.path.exists(entry): 717 logger.debug("Path [%s] is invalid; list is not valid." % entry) 718 return False 719 logger.debug("All entries in list are valid.") 720 return True
721
722 723 ######################################################################## 724 # SpanItem class definition 725 ######################################################################## 726 727 -class SpanItem(object): # pylint: disable=R0903
728 """ 729 Item returned by L{BackupFileList.generateSpan}. 730 """
731 - def __init__(self, fileList, size, capacity, utilization):
732 """ 733 Create object. 734 @param fileList: List of files 735 @param size: Size (in bytes) of files 736 @param utilization: Utilization, as a percentage (0-100) 737 """ 738 self.fileList = fileList 739 self.size = size 740 self.capacity = capacity 741 self.utilization = utilization
742
743 744 ######################################################################## 745 # BackupFileList class definition 746 ######################################################################## 747 748 -class BackupFileList(FilesystemList): # pylint: disable=R0904
749 750 ###################### 751 # Class documentation 752 ###################### 753 754 """ 755 List of files to be backed up. 756 757 A BackupFileList is a L{FilesystemList} containing a list of files to be 758 backed up. It only contains files, not directories (soft links are treated 759 like files). On top of the generic functionality provided by 760 L{FilesystemList}, this class adds functionality to keep a hash (checksum) 761 for each file in the list, and it also provides a method to calculate the 762 total size of the files in the list and a way to export the list into tar 763 form. 764 765 @sort: __init__, addDir, totalSize, generateSizeMap, generateDigestMap, 766 generateFitted, generateTarfile, removeUnchanged 767 """ 768 769 ############## 770 # Constructor 771 ############## 772
773 - def __init__(self):
774 """Initializes a list with no configured exclusions.""" 775 FilesystemList.__init__(self)
776 777 778 ################################ 779 # Overridden superclass methods 780 ################################ 781
782 - def addDir(self, path):
783 """ 784 Adds a directory to the list. 785 786 Note that this class does not allow directories to be added by themselves 787 (a backup list contains only files). However, since links to directories 788 are technically files, we allow them to be added. 789 790 This method is implemented in terms of the superclass method, with one 791 additional validation: the superclass method is only called if the 792 passed-in path is both a directory and a link. All of the superclass's 793 existing validations and restrictions apply. 794 795 @param path: Directory path to be added to the list 796 @type path: String representing a path on disk 797 798 @return: Number of items added to the list. 799 800 @raise ValueError: If path is not a directory or does not exist. 801 @raise ValueError: If the path could not be encoded properly. 802 """ 803 path = encodePath(path) 804 path = normalizeDir(path) 805 if os.path.isdir(path) and not os.path.islink(path): 806 return 0 807 else: 808 return FilesystemList.addDir(self, path)
809 810 811 ################## 812 # Utility methods 813 ################## 814
815 - def totalSize(self):
816 """ 817 Returns the total size among all files in the list. 818 Only files are counted. 819 Soft links that point at files are ignored. 820 Entries which do not exist on disk are ignored. 821 @return: Total size, in bytes 822 """ 823 total = 0.0 824 for entry in self: 825 if os.path.isfile(entry) and not os.path.islink(entry): 826 total += float(os.stat(entry).st_size) 827 return total
828
829 - def generateSizeMap(self):
830 """ 831 Generates a mapping from file to file size in bytes. 832 The mapping does include soft links, which are listed with size zero. 833 Entries which do not exist on disk are ignored. 834 @return: Dictionary mapping file to file size 835 """ 836 table = { } 837 for entry in self: 838 if os.path.islink(entry): 839 table[entry] = 0.0 840 elif os.path.isfile(entry): 841 table[entry] = float(os.stat(entry).st_size) 842 return table
843
844 - def generateDigestMap(self, stripPrefix=None):
845 """ 846 Generates a mapping from file to file digest. 847 848 Currently, the digest is an SHA hash, which should be pretty secure. In 849 the future, this might be a different kind of hash, but we guarantee that 850 the type of the hash will not change unless the library major version 851 number is bumped. 852 853 Entries which do not exist on disk are ignored. 854 855 Soft links are ignored. We would end up generating a digest for the file 856 that the soft link points at, which doesn't make any sense. 857 858 If C{stripPrefix} is passed in, then that prefix will be stripped from 859 each key when the map is generated. This can be useful in generating two 860 "relative" digest maps to be compared to one another. 861 862 @param stripPrefix: Common prefix to be stripped from paths 863 @type stripPrefix: String with any contents 864 865 @return: Dictionary mapping file to digest value 866 @see: L{removeUnchanged} 867 """ 868 table = { } 869 if stripPrefix is not None: 870 for entry in self: 871 if os.path.isfile(entry) and not os.path.islink(entry): 872 table[entry.replace(stripPrefix, "", 1)] = BackupFileList._generateDigest(entry) 873 else: 874 for entry in self: 875 if os.path.isfile(entry) and not os.path.islink(entry): 876 table[entry] = BackupFileList._generateDigest(entry) 877 return table
878 879 @staticmethod
880 - def _generateDigest(path):
881 """ 882 Generates an SHA digest for a given file on disk. 883 884 The original code for this function used this simplistic implementation, 885 which requires reading the entire file into memory at once in order to 886 generate a digest value:: 887 888 sha.new(open(path).read()).hexdigest() 889 890 Not surprisingly, this isn't an optimal solution. The U{Simple file 891 hashing <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259109>} 892 Python Cookbook recipe describes how to incrementally generate a hash 893 value by reading in chunks of data rather than reading the file all at 894 once. The recipe relies on the the C{update()} method of the various 895 Python hashing algorithms. 896 897 In my tests using a 110 MB file on CD, the original implementation 898 requires 111 seconds. This implementation requires only 40-45 seconds, 899 which is a pretty substantial speed-up. 900 901 Experience shows that reading in around 4kB (4096 bytes) at a time yields 902 the best performance. Smaller reads are quite a bit slower, and larger 903 reads don't make much of a difference. The 4kB number makes me a little 904 suspicious, and I think it might be related to the size of a filesystem 905 read at the hardware level. However, I've decided to just hardcode 4096 906 until I have evidence that shows it's worthwhile making the read size 907 configurable. 908 909 @param path: Path to generate digest for. 910 911 @return: ASCII-safe SHA digest for the file. 912 @raise OSError: If the file cannot be opened. 913 """ 914 # pylint: disable=C0103 915 try: 916 import hashlib 917 s = hashlib.sha1() 918 except ImportError: 919 import sha 920 s = sha.new() 921 f = open(path, mode="rb") # in case platform cares about binary reads 922 readBytes = 4096 # see notes above 923 while(readBytes > 0): 924 readString = f.read(readBytes) 925 s.update(readString) 926 readBytes = len(readString) 927 f.close() 928 digest = s.hexdigest() 929 logger.debug("Generated digest [%s] for file [%s]." % (digest, path)) 930 return digest
931
932 - def generateFitted(self, capacity, algorithm="worst_fit"):
933 """ 934 Generates a list of items that fit in the indicated capacity. 935 936 Sometimes, callers would like to include every item in a list, but are 937 unable to because not all of the items fit in the space available. This 938 method returns a copy of the list, containing only the items that fit in 939 a given capacity. A copy is returned so that we don't lose any 940 information if for some reason the fitted list is unsatisfactory. 941 942 The fitting is done using the functions in the knapsack module. By 943 default, the first fit algorithm is used, but you can also choose 944 from best fit, worst fit and alternate fit. 945 946 @param capacity: Maximum capacity among the files in the new list 947 @type capacity: Integer, in bytes 948 949 @param algorithm: Knapsack (fit) algorithm to use 950 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 951 952 @return: Copy of list with total size no larger than indicated capacity 953 @raise ValueError: If the algorithm is invalid. 954 """ 955 table = self._getKnapsackTable() 956 function = BackupFileList._getKnapsackFunction(algorithm) 957 return function(table, capacity)[0]
958
959 - def generateSpan(self, capacity, algorithm="worst_fit"):
960 """ 961 Splits the list of items into sub-lists that fit in a given capacity. 962 963 Sometimes, callers need split to a backup file list into a set of smaller 964 lists. For instance, you could use this to "span" the files across a set 965 of discs. 966 967 The fitting is done using the functions in the knapsack module. By 968 default, the first fit algorithm is used, but you can also choose 969 from best fit, worst fit and alternate fit. 970 971 @note: If any of your items are larger than the capacity, then it won't 972 be possible to find a solution. In this case, a value error will be 973 raised. 974 975 @param capacity: Maximum capacity among the files in the new list 976 @type capacity: Integer, in bytes 977 978 @param algorithm: Knapsack (fit) algorithm to use 979 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 980 981 @return: List of L{SpanItem} objects. 982 983 @raise ValueError: If the algorithm is invalid. 984 @raise ValueError: If it's not possible to fit some items 985 """ 986 spanItems = [] 987 function = BackupFileList._getKnapsackFunction(algorithm) 988 table = self._getKnapsackTable(capacity) 989 iteration = 0 990 while len(table) > 0: 991 iteration += 1 992 fit = function(table, capacity) 993 if len(fit[0]) == 0: 994 # Should never happen due to validations in _convertToKnapsackForm(), but let's be safe 995 raise ValueError("After iteration %d, unable to add any new items." % iteration) 996 removeKeys(table, fit[0]) 997 utilization = (float(fit[1])/float(capacity))*100.0 998 item = SpanItem(fit[0], fit[1], capacity, utilization) 999 spanItems.append(item) 1000 return spanItems
1001
1002 - def _getKnapsackTable(self, capacity=None):
1003 """ 1004 Converts the list into the form needed by the knapsack algorithms. 1005 @return: Dictionary mapping file name to tuple of (file path, file size). 1006 """ 1007 table = { } 1008 for entry in self: 1009 if os.path.islink(entry): 1010 table[entry] = (entry, 0.0) 1011 elif os.path.isfile(entry): 1012 size = float(os.stat(entry).st_size) 1013 if capacity is not None: 1014 if size > capacity: 1015 raise ValueError("File [%s] cannot fit in capacity %s." % (entry, displayBytes(capacity))) 1016 table[entry] = (entry, size) 1017 return table
1018 1019 @staticmethod
1020 - def _getKnapsackFunction(algorithm):
1021 """ 1022 Returns a reference to the function associated with an algorithm name. 1023 Algorithm name must be one of "first_fit", "best_fit", "worst_fit", "alternate_fit" 1024 @param algorithm: Name of the algorithm 1025 @return: Reference to knapsack function 1026 @raise ValueError: If the algorithm name is unknown. 1027 """ 1028 if algorithm == "first_fit": 1029 return firstFit 1030 elif algorithm == "best_fit": 1031 return bestFit 1032 elif algorithm == "worst_fit": 1033 return worstFit 1034 elif algorithm == "alternate_fit": 1035 return alternateFit 1036 else: 1037 raise ValueError("Algorithm [%s] is invalid." % algorithm)
1038
1039 - def generateTarfile(self, path, mode='tar', ignore=False, flat=False):
1040 """ 1041 Creates a tar file containing the files in the list. 1042 1043 By default, this method will create uncompressed tar files. If you pass 1044 in mode C{'targz'}, then it will create gzipped tar files, and if you 1045 pass in mode C{'tarbz2'}, then it will create bzipped tar files. 1046 1047 The tar file will be created as a GNU tar archive, which enables extended 1048 file name lengths, etc. Since GNU tar is so prevalent, I've decided that 1049 the extra functionality out-weighs the disadvantage of not being 1050 "standard". 1051 1052 If you pass in C{flat=True}, then a "flat" archive will be created, and 1053 all of the files will be added to the root of the archive. So, the file 1054 C{/tmp/something/whatever.txt} would be added as just C{whatever.txt}. 1055 1056 By default, the whole method call fails if there are problems adding any 1057 of the files to the archive, resulting in an exception. Under these 1058 circumstances, callers are advised that they might want to call 1059 L{removeInvalid()} and then attempt to extract the tar file a second 1060 time, since the most common cause of failures is a missing file (a file 1061 that existed when the list was built, but is gone again by the time the 1062 tar file is built). 1063 1064 If you want to, you can pass in C{ignore=True}, and the method will 1065 ignore errors encountered when adding individual files to the archive 1066 (but not errors opening and closing the archive itself). 1067 1068 We'll always attempt to remove the tarfile from disk if an exception will 1069 be thrown. 1070 1071 @note: No validation is done as to whether the entries in the list are 1072 files, since only files or soft links should be in an object like this. 1073 However, to be safe, everything is explicitly added to the tar archive 1074 non-recursively so it's safe to include soft links to directories. 1075 1076 @note: The Python C{tarfile} module, which is used internally here, is 1077 supposed to deal properly with long filenames and links. In my testing, 1078 I have found that it appears to be able to add long really long filenames 1079 to archives, but doesn't do a good job reading them back out, even out of 1080 an archive it created. Fortunately, all Cedar Backup does is add files 1081 to archives. 1082 1083 @param path: Path of tar file to create on disk 1084 @type path: String representing a path on disk 1085 1086 @param mode: Tar creation mode 1087 @type mode: One of either C{'tar'}, C{'targz'} or C{'tarbz2'} 1088 1089 @param ignore: Indicates whether to ignore certain errors. 1090 @type ignore: Boolean 1091 1092 @param flat: Creates "flat" archive by putting all items in root 1093 @type flat: Boolean 1094 1095 @raise ValueError: If mode is not valid 1096 @raise ValueError: If list is empty 1097 @raise ValueError: If the path could not be encoded properly. 1098 @raise TarError: If there is a problem creating the tar file 1099 """ 1100 # pylint: disable=E1101 1101 path = encodePath(path) 1102 if len(self) == 0: raise ValueError("Empty list cannot be used to generate tarfile.") 1103 if(mode == 'tar'): tarmode = "w:" 1104 elif(mode == 'targz'): tarmode = "w:gz" 1105 elif(mode == 'tarbz2'): tarmode = "w:bz2" 1106 else: raise ValueError("Mode [%s] is not valid." % mode) 1107 try: 1108 tar = tarfile.open(path, tarmode) 1109 try: 1110 tar.format = tarfile.GNU_FORMAT 1111 except AttributeError: 1112 tar.posix = False 1113 for entry in self: 1114 try: 1115 if flat: 1116 tar.add(entry, arcname=os.path.basename(entry), recursive=False) 1117 else: 1118 tar.add(entry, recursive=False) 1119 except tarfile.TarError, e: 1120 if not ignore: 1121 raise e 1122 logger.info("Unable to add file [%s]; going on anyway." % entry) 1123 except OSError, e: 1124 if not ignore: 1125 raise tarfile.TarError(e) 1126 logger.info("Unable to add file [%s]; going on anyway." % entry) 1127 tar.close() 1128 except tarfile.ReadError, e: 1129 try: tar.close() 1130 except: pass 1131 if os.path.exists(path): 1132 try: os.remove(path) 1133 except: pass 1134 raise tarfile.ReadError("Unable to open [%s]; maybe directory doesn't exist?" % path) 1135 except tarfile.TarError, e: 1136 try: tar.close() 1137 except: pass 1138 if os.path.exists(path): 1139 try: os.remove(path) 1140 except: pass 1141 raise e
1142
1143 - def removeUnchanged(self, digestMap, captureDigest=False):
1144 """ 1145 Removes unchanged entries from the list. 1146 1147 This method relies on a digest map as returned from L{generateDigestMap}. 1148 For each entry in C{digestMap}, if the entry also exists in the current 1149 list I{and} the entry in the current list has the same digest value as in 1150 the map, the entry in the current list will be removed. 1151 1152 This method offers a convenient way for callers to filter unneeded 1153 entries from a list. The idea is that a caller will capture a digest map 1154 from C{generateDigestMap} at some point in time (perhaps the beginning of 1155 the week), and will save off that map using C{pickle} or some other 1156 method. Then, the caller could use this method sometime in the future to 1157 filter out any unchanged files based on the saved-off map. 1158 1159 If C{captureDigest} is passed-in as C{True}, then digest information will 1160 be captured for the entire list before the removal step occurs using the 1161 same rules as in L{generateDigestMap}. The check will involve a lookup 1162 into the complete digest map. 1163 1164 If C{captureDigest} is passed in as C{False}, we will only generate a 1165 digest value for files we actually need to check, and we'll ignore any 1166 entry in the list which isn't a file that currently exists on disk. 1167 1168 The return value varies depending on C{captureDigest}, as well. To 1169 preserve backwards compatibility, if C{captureDigest} is C{False}, then 1170 we'll just return a single value representing the number of entries 1171 removed. Otherwise, we'll return a tuple of C{(entries removed, digest 1172 map)}. The returned digest map will be in exactly the form returned by 1173 L{generateDigestMap}. 1174 1175 @note: For performance reasons, this method actually ends up rebuilding 1176 the list from scratch. First, we build a temporary dictionary containing 1177 all of the items from the original list. Then, we remove items as needed 1178 from the dictionary (which is faster than the equivalent operation on a 1179 list). Finally, we replace the contents of the current list based on the 1180 keys left in the dictionary. This should be transparent to the caller. 1181 1182 @param digestMap: Dictionary mapping file name to digest value. 1183 @type digestMap: Map as returned from L{generateDigestMap}. 1184 1185 @param captureDigest: Indicates that digest information should be captured. 1186 @type captureDigest: Boolean 1187 1188 @return: Number of entries removed 1189 """ 1190 if captureDigest: 1191 removed = 0 1192 table = {} 1193 captured = {} 1194 for entry in self: 1195 if os.path.isfile(entry) and not os.path.islink(entry): 1196 table[entry] = BackupFileList._generateDigest(entry) 1197 captured[entry] = table[entry] 1198 else: 1199 table[entry] = None 1200 for entry in digestMap.keys(): 1201 if table.has_key(entry): 1202 if table[entry] is not None: # equivalent to file/link check in other case 1203 digest = table[entry] 1204 if digest == digestMap[entry]: 1205 removed += 1 1206 del table[entry] 1207 logger.debug("Discarded unchanged file [%s]." % entry) 1208 self[:] = table.keys() 1209 return (removed, captured) 1210 else: 1211 removed = 0 1212 table = {} 1213 for entry in self: 1214 table[entry] = None 1215 for entry in digestMap.keys(): 1216 if table.has_key(entry): 1217 if os.path.isfile(entry) and not os.path.islink(entry): 1218 digest = BackupFileList._generateDigest(entry) 1219 if digest == digestMap[entry]: 1220 removed += 1 1221 del table[entry] 1222 logger.debug("Discarded unchanged file [%s]." % entry) 1223 self[:] = table.keys() 1224 return removed
1225
1226 1227 ######################################################################## 1228 # PurgeItemList class definition 1229 ######################################################################## 1230 1231 -class PurgeItemList(FilesystemList): # pylint: disable=R0904
1232 1233 ###################### 1234 # Class documentation 1235 ###################### 1236 1237 """ 1238 List of files and directories to be purged. 1239 1240 A PurgeItemList is a L{FilesystemList} containing a list of files and 1241 directories to be purged. On top of the generic functionality provided by 1242 L{FilesystemList}, this class adds functionality to remove items that are 1243 too young to be purged, and to actually remove each item in the list from 1244 the filesystem. 1245 1246 The other main difference is that when you add a directory's contents to a 1247 purge item list, the directory itself is not added to the list. This way, 1248 if someone asks to purge within in C{/opt/backup/collect}, that directory 1249 doesn't get removed once all of the files within it is gone. 1250 """ 1251 1252 ############## 1253 # Constructor 1254 ############## 1255
1256 - def __init__(self):
1257 """Initializes a list with no configured exclusions.""" 1258 FilesystemList.__init__(self)
1259 1260 1261 ############## 1262 # Add methods 1263 ############## 1264
1265 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
1266 """ 1267 Adds the contents of a directory to the list. 1268 1269 The path must exist and must be a directory or a link to a directory. 1270 The contents of the directory (but I{not} the directory path itself) will 1271 be recursively added to the list, subject to any exclusions that are in 1272 place. If you only want the directory and its contents to be added, then 1273 pass in C{recursive=False}. 1274 1275 @note: If a directory's absolute path matches an exclude pattern or path, 1276 or if the directory contains the configured ignore file, then the 1277 directory and all of its contents will be recursively excluded from the 1278 list. 1279 1280 @note: If the passed-in directory happens to be a soft link, it will be 1281 recursed. However, the linkDepth parameter controls whether any soft 1282 links I{within} the directory will be recursed. The link depth is 1283 maximum depth of the tree at which soft links should be followed. So, a 1284 depth of 0 does not follow any soft links, a depth of 1 follows only 1285 links within the passed-in directory, a depth of 2 follows the links at 1286 the next level down, etc. 1287 1288 @note: Any invalid soft links (i.e. soft links that point to 1289 non-existent items) will be silently ignored. 1290 1291 @note: The L{excludeDirs} flag only controls whether any given soft link 1292 path itself is added to the list once it has been discovered. It does 1293 I{not} modify any behavior related to directory recursion. 1294 1295 @note: The L{excludeDirs} flag only controls whether any given directory 1296 path itself is added to the list once it has been discovered. It does 1297 I{not} modify any behavior related to directory recursion. 1298 1299 @note: If you call this method I{on a link to a directory} that link will 1300 never be dereferenced (it may, however, be followed). 1301 1302 @param path: Directory path whose contents should be added to the list 1303 @type path: String representing a path on disk 1304 1305 @param recursive: Indicates whether directory contents should be added recursively. 1306 @type recursive: Boolean value 1307 1308 @param addSelf: Ignored in this subclass. 1309 1310 @param linkDepth: Depth of soft links that should be followed 1311 @type linkDepth: Integer value, where zero means not to follow any soft links 1312 1313 @param dereference: Indicates whether soft links, if followed, should be dereferenced 1314 @type dereference: Boolean value 1315 1316 @return: Number of items recursively added to the list 1317 1318 @raise ValueError: If path is not a directory or does not exist. 1319 @raise ValueError: If the path could not be encoded properly. 1320 """ 1321 path = encodePath(path) 1322 path = normalizeDir(path) 1323 return super(PurgeItemList, self)._addDirContentsInternal(path, False, recursive, linkDepth, dereference)
1324 1325 1326 ################## 1327 # Utility methods 1328 ################## 1329
1330 - def removeYoungFiles(self, daysOld):
1331 """ 1332 Removes from the list files younger than a certain age (in days). 1333 1334 Any file whose "age" in days is less than (C{<}) the value of the 1335 C{daysOld} parameter will be removed from the list so that it will not be 1336 purged later when L{purgeItems} is called. Directories and soft links 1337 will be ignored. 1338 1339 The "age" of a file is the amount of time since the file was last used, 1340 per the most recent of the file's C{st_atime} and C{st_mtime} values. 1341 1342 @note: Some people find the "sense" of this method confusing or 1343 "backwards". Keep in mind that this method is used to remove items 1344 I{from the list}, not from the filesystem! It removes from the list 1345 those items that you would I{not} want to purge because they are too 1346 young. As an example, passing in C{daysOld} of zero (0) would remove 1347 from the list no files, which would result in purging all of the files 1348 later. I would be happy to make a synonym of this method with an 1349 easier-to-understand "sense", if someone can suggest one. 1350 1351 @param daysOld: Minimum age of files that are to be kept in the list. 1352 @type daysOld: Integer value >= 0. 1353 1354 @return: Number of entries removed 1355 """ 1356 removed = 0 1357 daysOld = int(daysOld) 1358 if daysOld < 0: 1359 raise ValueError("Days old value must be an integer >= 0.") 1360 for entry in self[:]: 1361 if os.path.isfile(entry) and not os.path.islink(entry): 1362 try: 1363 ageInDays = calculateFileAge(entry) 1364 ageInWholeDays = math.floor(ageInDays) 1365 if ageInWholeDays < daysOld: 1366 removed += 1 1367 self.remove(entry) 1368 except OSError: 1369 pass 1370 return removed
1371
1372 - def purgeItems(self):
1373 """ 1374 Purges all items in the list. 1375 1376 Every item in the list will be purged. Directories in the list will 1377 I{not} be purged recursively, and hence will only be removed if they are 1378 empty. Errors will be ignored. 1379 1380 To faciliate easy removal of directories that will end up being empty, 1381 the delete process happens in two passes: files first (including soft 1382 links), then directories. 1383 1384 @return: Tuple containing count of (files, dirs) removed 1385 """ 1386 files = 0 1387 dirs = 0 1388 for entry in self: 1389 if os.path.exists(entry) and (os.path.isfile(entry) or os.path.islink(entry)): 1390 try: 1391 os.remove(entry) 1392 files += 1 1393 logger.debug("Purged file [%s]." % entry) 1394 except OSError: 1395 pass 1396 for entry in self: 1397 if os.path.exists(entry) and os.path.isdir(entry) and not os.path.islink(entry): 1398 try: 1399 os.rmdir(entry) 1400 dirs += 1 1401 logger.debug("Purged empty directory [%s]." % entry) 1402 except OSError: 1403 pass 1404 return (files, dirs)
1405
1406 1407 ######################################################################## 1408 # Public functions 1409 ######################################################################## 1410 1411 ########################## 1412 # normalizeDir() function 1413 ########################## 1414 1415 -def normalizeDir(path):
1416 """ 1417 Normalizes a directory name. 1418 1419 For our purposes, a directory name is normalized by removing the trailing 1420 path separator, if any. This is important because we want directories to 1421 appear within lists in a consistent way, although from the user's 1422 perspective passing in C{/path/to/dir/} and C{/path/to/dir} are equivalent. 1423 1424 @param path: Path to be normalized. 1425 @type path: String representing a path on disk 1426 1427 @return: Normalized path, which should be equivalent to the original. 1428 """ 1429 if path != os.sep and path[-1:] == os.sep: 1430 return path[:-1] 1431 return path
1432
1433 1434 ############################# 1435 # compareContents() function 1436 ############################# 1437 1438 -def compareContents(path1, path2, verbose=False):
1439 """ 1440 Compares the contents of two directories to see if they are equivalent. 1441 1442 The two directories are recursively compared. First, we check whether they 1443 contain exactly the same set of files. Then, we check to see every given 1444 file has exactly the same contents in both directories. 1445 1446 This is all relatively simple to implement through the magic of 1447 L{BackupFileList.generateDigestMap}, which knows how to strip a path prefix 1448 off the front of each entry in the mapping it generates. This makes our 1449 comparison as simple as creating a list for each path, then generating a 1450 digest map for each path and comparing the two. 1451 1452 If no exception is thrown, the two directories are considered identical. 1453 1454 If the C{verbose} flag is C{True}, then an alternate (but slower) method is 1455 used so that any thrown exception can indicate exactly which file caused the 1456 comparison to fail. The thrown C{ValueError} exception distinguishes 1457 between the directories containing different files, and containing the same 1458 files with differing content. 1459 1460 @note: Symlinks are I{not} followed for the purposes of this comparison. 1461 1462 @param path1: First path to compare. 1463 @type path1: String representing a path on disk 1464 1465 @param path2: First path to compare. 1466 @type path2: String representing a path on disk 1467 1468 @param verbose: Indicates whether a verbose response should be given. 1469 @type verbose: Boolean 1470 1471 @raise ValueError: If a directory doesn't exist or can't be read. 1472 @raise ValueError: If the two directories are not equivalent. 1473 @raise IOError: If there is an unusual problem reading the directories. 1474 """ 1475 try: 1476 path1List = BackupFileList() 1477 path1List.addDirContents(path1) 1478 path1Digest = path1List.generateDigestMap(stripPrefix=normalizeDir(path1)) 1479 path2List = BackupFileList() 1480 path2List.addDirContents(path2) 1481 path2Digest = path2List.generateDigestMap(stripPrefix=normalizeDir(path2)) 1482 compareDigestMaps(path1Digest, path2Digest, verbose) 1483 except IOError, e: 1484 logger.error("I/O error encountered during consistency check.") 1485 raise e
1486
1487 -def compareDigestMaps(digest1, digest2, verbose=False):
1488 """ 1489 Compares two digest maps and throws an exception if they differ. 1490 1491 @param digest1: First digest to compare. 1492 @type digest1: Digest as returned from BackupFileList.generateDigestMap() 1493 1494 @param digest2: Second digest to compare. 1495 @type digest2: Digest as returned from BackupFileList.generateDigestMap() 1496 1497 @param verbose: Indicates whether a verbose response should be given. 1498 @type verbose: Boolean 1499 1500 @raise ValueError: If the two directories are not equivalent. 1501 """ 1502 if not verbose: 1503 if digest1 != digest2: 1504 raise ValueError("Consistency check failed.") 1505 else: 1506 list1 = UnorderedList(digest1.keys()) 1507 list2 = UnorderedList(digest2.keys()) 1508 if list1 != list2: 1509 raise ValueError("Directories contain a different set of files.") 1510 for key in list1: 1511 if digest1[key] != digest2[key]: 1512 raise ValueError("File contents for [%s] vary between directories." % key)
1513