Package CedarBackup2 :: Module filesystem
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.filesystem

   1  # -*- coding: iso-8859-1 -*- 
   2  # vim: set ft=python ts=3 sw=3 expandtab: 
   3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   4  # 
   5  #              C E D A R 
   6  #          S O L U T I O N S       "Software done right." 
   7  #           S O F T W A R E 
   8  # 
   9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  10  # 
  11  # Copyright (c) 2004-2008 Kenneth J. Pronovici. 
  12  # All rights reserved. 
  13  # 
  14  # This program is free software; you can redistribute it and/or 
  15  # modify it under the terms of the GNU General Public License, 
  16  # Version 2, as published by the Free Software Foundation. 
  17  # 
  18  # This program is distributed in the hope that it will be useful, 
  19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
  21  # 
  22  # Copies of the GNU General Public License are available from 
  23  # the Free Software Foundation website, http://www.gnu.org/. 
  24  # 
  25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  26  # 
  27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
  28  # Language : Python (>= 2.3) 
  29  # Project  : Cedar Backup, release 2 
  30  # Revision : $Id: filesystem.py 928 2008-11-15 18:12:43Z pronovic $ 
  31  # Purpose  : Provides filesystem-related objects. 
  32  # 
  33  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  34   
  35  ######################################################################## 
  36  # Module documentation 
  37  ######################################################################## 
  38   
  39  """ 
  40  Provides filesystem-related objects. 
  41  @sort: FilesystemList, BackupFileList, PurgeItemList 
  42  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  43  """ 
  44   
  45   
  46  ######################################################################## 
  47  # Imported modules 
  48  ######################################################################## 
  49   
  50  # System modules 
  51  import sys 
  52  import os 
  53  import re 
  54  import sha 
  55  import math 
  56  import logging 
  57  import tarfile 
  58   
  59  # Cedar Backup modules 
  60  from CedarBackup2.knapsack import firstFit, bestFit, worstFit, alternateFit 
  61  from CedarBackup2.util import AbsolutePathList, ObjectTypeList, UnorderedList, RegexList 
  62  from CedarBackup2.util import removeKeys, displayBytes, calculateFileAge, encodePath, dereferenceLink 
  63   
  64   
  65  ######################################################################## 
  66  # Module-wide variables 
  67  ######################################################################## 
  68   
  69  logger = logging.getLogger("CedarBackup2.log.filesystem") 
  70   
  71   
  72  ######################################################################## 
  73  # FilesystemList class definition 
  74  ######################################################################## 
  75   
76 -class FilesystemList(list):
77 78 ###################### 79 # Class documentation 80 ###################### 81 82 """ 83 Represents a list of filesystem items. 84 85 This is a generic class that represents a list of filesystem items. Callers 86 can add individual files or directories to the list, or can recursively add 87 the contents of a directory. The class also allows for up-front exclusions 88 in several forms (all files, all directories, all items matching a pattern, 89 all items whose basename matches a pattern, or all directories containing a 90 specific "ignore file"). Symbolic links are typically backed up 91 non-recursively, i.e. the link to a directory is backed up, but not the 92 contents of that link (we don't want to deal with recursive loops, etc.). 93 94 The custom methods such as L{addFile} will only add items if they exist on 95 the filesystem and do not match any exclusions that are already in place. 96 However, since a FilesystemList is a subclass of Python's standard list 97 class, callers can also add items to the list in the usual way, using 98 methods like C{append()} or C{insert()}. No validations apply to items 99 added to the list in this way; however, many list-manipulation methods deal 100 "gracefully" with items that don't exist in the filesystem, often by 101 ignoring them. 102 103 Once a list has been created, callers can remove individual items from the 104 list using standard methods like C{pop()} or C{remove()} or they can use 105 custom methods to remove specific types of entries or entries which match a 106 particular pattern. 107 108 @note: Regular expression patterns that apply to paths are assumed to be 109 bounded at front and back by the beginning and end of the string, i.e. they 110 are treated as if they begin with C{^} and end with C{$}. This is true 111 whether we are matching a complete path or a basename. 112 113 @note: Some platforms, like Windows, do not support soft links. On those 114 platforms, the ignore-soft-links flag can be set, but it won't do any good 115 because the operating system never reports a file as a soft link. 116 117 @sort: __init__, addFile, addDir, addDirContents, removeFiles, removeDirs, 118 removeLinks, removeMatch, removeInvalid, normalize, validate, 119 excludeFiles, excludeDirs, excludeLinks, excludePaths, 120 excludePatterns, excludeBasenamePatterns, ignoreFile 121 """ 122 123 124 ############## 125 # Constructor 126 ############## 127
128 - def __init__(self):
129 """Initializes a list with no configured exclusions.""" 130 list.__init__(self) 131 self._excludeFiles = False 132 self._excludeDirs = False 133 self._excludeLinks = False 134 self._excludePaths = None 135 self._excludePatterns = None 136 self._excludeBasenamePatterns = None 137 self._ignoreFile = None 138 self.excludeFiles = False 139 self.excludeLinks = False 140 self.excludeDirs = False 141 self.excludePaths = [] 142 self.excludePatterns = RegexList() 143 self.excludeBasenamePatterns = RegexList() 144 self.ignoreFile = None
145 146 147 ############# 148 # Properties 149 ############# 150
151 - def _setExcludeFiles(self, value):
152 """ 153 Property target used to set the exclude files flag. 154 No validations, but we normalize the value to C{True} or C{False}. 155 """ 156 if value: 157 self._excludeFiles = True 158 else: 159 self._excludeFiles = False
160
161 - def _getExcludeFiles(self):
162 """ 163 Property target used to get the exclude files flag. 164 """ 165 return self._excludeFiles
166
167 - def _setExcludeDirs(self, value):
168 """ 169 Property target used to set the exclude directories flag. 170 No validations, but we normalize the value to C{True} or C{False}. 171 """ 172 if value: 173 self._excludeDirs = True 174 else: 175 self._excludeDirs = False
176
177 - def _getExcludeDirs(self):
178 """ 179 Property target used to get the exclude directories flag. 180 """ 181 return self._excludeDirs
182 192 198
199 - def _setExcludePaths(self, value):
200 """ 201 Property target used to set the exclude paths list. 202 A C{None} value is converted to an empty list. 203 Elements do not have to exist on disk at the time of assignment. 204 @raise ValueError: If any list element is not an absolute path. 205 """ 206 self._absoluteExcludePaths = AbsolutePathList() 207 if value is not None: 208 self._absoluteExcludePaths.extend(value)
209
210 - def _getExcludePaths(self):
211 """ 212 Property target used to get the absolute exclude paths list. 213 """ 214 return self._absoluteExcludePaths
215
216 - def _setExcludePatterns(self, value):
217 """ 218 Property target used to set the exclude patterns list. 219 A C{None} value is converted to an empty list. 220 """ 221 self._excludePatterns = RegexList() 222 if value is not None: 223 self._excludePatterns.extend(value)
224
225 - def _getExcludePatterns(self):
226 """ 227 Property target used to get the exclude patterns list. 228 """ 229 return self._excludePatterns
230
231 - def _setExcludeBasenamePatterns(self, value):
232 """ 233 Property target used to set the exclude basename patterns list. 234 A C{None} value is converted to an empty list. 235 """ 236 self._excludeBasenamePatterns = RegexList() 237 if value is not None: 238 self._excludeBasenamePatterns.extend(value)
239
241 """ 242 Property target used to get the exclude basename patterns list. 243 """ 244 return self._excludeBasenamePatterns
245
246 - def _setIgnoreFile(self, value):
247 """ 248 Property target used to set the ignore file. 249 The value must be a non-empty string if it is not C{None}. 250 @raise ValueError: If the value is an empty string. 251 """ 252 if value is not None: 253 if len(value) < 1: 254 raise ValueError("The ignore file must be a non-empty string.") 255 self._ignoreFile = value
256
257 - def _getIgnoreFile(self):
258 """ 259 Property target used to get the ignore file. 260 """ 261 return self._ignoreFile
262 263 excludeFiles = property(_getExcludeFiles, _setExcludeFiles, None, "Boolean indicating whether files should be excluded.") 264 excludeDirs = property(_getExcludeDirs, _setExcludeDirs, None, "Boolean indicating whether directories should be excluded.") 265 excludeLinks = property(_getExcludeLinks, _setExcludeLinks, None, "Boolean indicating whether soft links should be excluded.") 266 excludePaths = property(_getExcludePaths, _setExcludePaths, None, "List of absolute paths to be excluded.") 267 excludePatterns = property(_getExcludePatterns, _setExcludePatterns, None, 268 "List of regular expression patterns (matching complete path) to be excluded.") 269 excludeBasenamePatterns = property(_getExcludeBasenamePatterns, _setExcludeBasenamePatterns, 270 None, "List of regular expression patterns (matching basename) to be excluded.") 271 ignoreFile = property(_getIgnoreFile, _setIgnoreFile, None, "Name of file which will cause directory contents to be ignored.") 272 273 274 ############## 275 # Add methods 276 ############## 277
278 - def addFile(self, path):
279 """ 280 Adds a file to the list. 281 282 The path must exist and must be a file or a link to an existing file. It 283 will be added to the list subject to any exclusions that are in place. 284 285 @param path: File path to be added to the list 286 @type path: String representing a path on disk 287 288 @return: Number of items added to the list. 289 290 @raise ValueError: If path is not a file or does not exist. 291 @raise ValueError: If the path could not be encoded properly. 292 """ 293 path = encodePath(path) 294 if not os.path.exists(path) or not os.path.isfile(path): 295 logger.debug("Path [%s] is not a file or does not exist on disk." % path) 296 raise ValueError("Path is not a file or does not exist on disk.") 297 if self.excludeLinks and os.path.islink(path): 298 logger.debug("Path [%s] is excluded based on excludeLinks." % path) 299 return 0 300 if self.excludeFiles: 301 logger.debug("Path [%s] is excluded based on excludeFiles." % path) 302 return 0 303 if path in self.excludePaths: 304 logger.debug("Path [%s] is excluded based on excludePaths." % path) 305 return 0 306 for pattern in self.excludePatterns: 307 pattern = encodePath(pattern) # use same encoding as filenames 308 if re.compile(r"^%s$" % pattern).match(path): # safe to assume all are valid due to RegexList 309 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 310 return 0 311 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 312 pattern = encodePath(pattern) # use same encoding as filenames 313 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 314 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 315 return 0 316 self.append(path) 317 logger.debug("Added file to list: [%s]" % path) 318 return 1
319
320 - def addDir(self, path):
321 """ 322 Adds a directory to the list. 323 324 The path must exist and must be a directory or a link to an existing 325 directory. It will be added to the list subject to any exclusions that 326 are in place. The L{ignoreFile} does not apply to this method, only to 327 L{addDirContents}. 328 329 @param path: Directory path to be added to the list 330 @type path: String representing a path on disk 331 332 @return: Number of items added to the list. 333 334 @raise ValueError: If path is not a directory or does not exist. 335 @raise ValueError: If the path could not be encoded properly. 336 """ 337 path = encodePath(path) 338 path = normalizeDir(path) 339 if not os.path.exists(path) or not os.path.isdir(path): 340 logger.debug("Path [%s] is not a directory or does not exist on disk." % path) 341 raise ValueError("Path is not a directory or does not exist on disk.") 342 if self.excludeLinks and os.path.islink(path): 343 logger.debug("Path [%s] is excluded based on excludeLinks." % path) 344 return 0 345 if self.excludeDirs: 346 logger.debug("Path [%s] is excluded based on excludeDirs." % path) 347 return 0 348 if path in self.excludePaths: 349 logger.debug("Path [%s] is excluded based on excludePaths." % path) 350 return 0 351 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 352 pattern = encodePath(pattern) # use same encoding as filenames 353 if re.compile(r"^%s$" % pattern).match(path): 354 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 355 return 0 356 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 357 pattern = encodePath(pattern) # use same encoding as filenames 358 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 359 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 360 return 0 361 self.append(path) 362 logger.debug("Added directory to list: [%s]" % path) 363 return 1
364
365 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
366 """ 367 Adds the contents of a directory to the list. 368 369 The path must exist and must be a directory or a link to a directory. 370 The contents of the directory (as well as the directory path itself) will 371 be recursively added to the list, subject to any exclusions that are in 372 place. If you only want the directory and its immediate contents to be 373 added, then pass in C{recursive=False}. 374 375 @note: If a directory's absolute path matches an exclude pattern or path, 376 or if the directory contains the configured ignore file, then the 377 directory and all of its contents will be recursively excluded from the 378 list. 379 380 @note: If the passed-in directory happens to be a soft link, it will be 381 recursed. However, the linkDepth parameter controls whether any soft 382 links I{within} the directory will be recursed. The link depth is 383 maximum depth of the tree at which soft links should be followed. So, a 384 depth of 0 does not follow any soft links, a depth of 1 follows only 385 links within the passed-in directory, a depth of 2 follows the links at 386 the next level down, etc. 387 388 @note: Any invalid soft links (i.e. soft links that point to 389 non-existent items) will be silently ignored. 390 391 @note: The L{excludeDirs} flag only controls whether any given directory 392 path itself is added to the list once it has been discovered. It does 393 I{not} modify any behavior related to directory recursion. 394 395 @note: If you call this method I{on a link to a directory} that link will 396 never be dereferenced (it may, however, be followed). 397 398 @param path: Directory path whose contents should be added to the list 399 @type path: String representing a path on disk 400 401 @param recursive: Indicates whether directory contents should be added recursively. 402 @type recursive: Boolean value 403 404 @param addSelf: Indicates whether the directory itself should be added to the list. 405 @type addSelf: Boolean value 406 407 @param linkDepth: Maximum depth of the tree at which soft links should be followed 408 @type linkDepth: Integer value, where zero means not to follow any soft links 409 410 @param dereference: Indicates whether soft links, if followed, should be dereferenced 411 @type dereference: Boolean value 412 413 @return: Number of items recursively added to the list 414 415 @raise ValueError: If path is not a directory or does not exist. 416 @raise ValueError: If the path could not be encoded properly. 417 """ 418 path = encodePath(path) 419 path = normalizeDir(path) 420 return self._addDirContentsInternal(path, addSelf, recursive, linkDepth, dereference)
421
422 - def _addDirContentsInternal(self, path, includePath=True, recursive=True, linkDepth=0, dereference=False):
423 """ 424 Internal implementation of C{addDirContents}. 425 426 This internal implementation exists due to some refactoring. Basically, 427 some subclasses have a need to add the contents of a directory, but not 428 the directory itself. This is different than the standard C{FilesystemList} 429 behavior and actually ends up making a special case out of the first 430 call in the recursive chain. Since I don't want to expose the modified 431 interface, C{addDirContents} ends up being wholly implemented in terms 432 of this method. 433 434 The linkDepth parameter controls whether soft links are followed when we 435 are adding the contents recursively. Any recursive calls reduce the 436 value by one. If the value zero or less, then soft links will just be 437 added as directories, but will not be followed. This means that links 438 are followed to a I{constant depth} starting from the top-most directory. 439 440 There is one difference between soft links and directories: soft links 441 that are added recursively are not placed into the list explicitly. This 442 is because if we do add the links recursively, the resulting tar file 443 gets a little confused (it has a link and a directory with the same 444 name). 445 446 @note: If you call this method I{on a link to a directory} that link will 447 never be dereferenced (it may, however, be followed). 448 449 @param path: Directory path whose contents should be added to the list. 450 @param includePath: Indicates whether to include the path as well as contents. 451 @param recursive: Indicates whether directory contents should be added recursively. 452 @param linkDepth: Depth of soft links that should be followed 453 @param dereference: Indicates whether soft links, if followed, should be dereferenced 454 455 @return: Number of items recursively added to the list 456 457 @raise ValueError: If path is not a directory or does not exist. 458 """ 459 added = 0 460 if not os.path.exists(path) or not os.path.isdir(path): 461 logger.debug("Path [%s] is not a directory or does not exist on disk." % path) 462 raise ValueError("Path is not a directory or does not exist on disk.") 463 if path in self.excludePaths: 464 logger.debug("Path [%s] is excluded based on excludePaths." % path) 465 return added 466 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 467 pattern = encodePath(pattern) # use same encoding as filenames 468 if re.compile(r"^%s$" % pattern).match(path): 469 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 470 return added 471 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 472 pattern = encodePath(pattern) # use same encoding as filenames 473 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 474 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 475 return added 476 if self.ignoreFile is not None and os.path.exists(os.path.join(path, self.ignoreFile)): 477 logger.debug("Path [%s] is excluded based on ignore file." % path) 478 return added 479 if includePath: 480 added += self.addDir(path) # could actually be excluded by addDir, yet 481 for entry in os.listdir(path): 482 entrypath = os.path.join(path, entry) 483 if os.path.isfile(entrypath): 484 if linkDepth > 0 and dereference: 485 derefpath = dereferenceLink(entrypath) 486 if derefpath != entrypath: 487 added += self.addFile(derefpath) 488 added += self.addFile(entrypath) 489 elif os.path.isdir(entrypath): 490 if os.path.islink(entrypath): 491 if recursive: 492 if linkDepth > 0: 493 newDepth = linkDepth - 1; 494 if dereference: 495 derefpath = dereferenceLink(entrypath) 496 if derefpath != entrypath: 497 added += self._addDirContentsInternal(derefpath, True, recursive, newDepth, dereference) 498 added += self.addDir(entrypath) 499 else: 500 added += self._addDirContentsInternal(entrypath, False, recursive, newDepth, dereference) 501 else: 502 added += self.addDir(entrypath) 503 else: 504 added += self.addDir(entrypath) 505 else: 506 if recursive: 507 newDepth = linkDepth - 1; 508 added += self._addDirContentsInternal(entrypath, True, recursive, newDepth, dereference) 509 else: 510 added += self.addDir(entrypath) 511 return added
512 513 514 ################# 515 # Remove methods 516 ################# 517
518 - def removeFiles(self, pattern=None):
519 """ 520 Removes file entries from the list. 521 522 If C{pattern} is not passed in or is C{None}, then all file entries will 523 be removed from the list. Otherwise, only those file entries matching 524 the pattern will be removed. Any entry which does not exist on disk 525 will be ignored (use L{removeInvalid} to purge those entries). 526 527 This method might be fairly slow for large lists, since it must check the 528 type of each item in the list. If you know ahead of time that you want 529 to exclude all files, then you will be better off setting L{excludeFiles} 530 to C{True} before adding items to the list. 531 532 @param pattern: Regular expression pattern representing entries to remove 533 534 @return: Number of entries removed 535 @raise ValueError: If the passed-in pattern is not a valid regular expression. 536 """ 537 removed = 0 538 if pattern is None: 539 for entry in self[:]: 540 if os.path.exists(entry) and os.path.isfile(entry): 541 self.remove(entry) 542 logger.debug("Removed path [%s] from list." % entry) 543 removed += 1 544 else: 545 try: 546 pattern = encodePath(pattern) # use same encoding as filenames 547 compiled = re.compile(pattern) 548 except re.error: 549 raise ValueError("Pattern is not a valid regular expression.") 550 for entry in self[:]: 551 if os.path.exists(entry) and os.path.isfile(entry): 552 if compiled.match(entry): 553 self.remove(entry) 554 logger.debug("Removed path [%s] from list." % entry) 555 removed += 1 556 logger.debug("Removed a total of %d entries." % removed); 557 return removed
558
559 - def removeDirs(self, pattern=None):
560 """ 561 Removes directory entries from the list. 562 563 If C{pattern} is not passed in or is C{None}, then all directory entries 564 will be removed from the list. Otherwise, only those directory entries 565 matching the pattern will be removed. Any entry which does not exist on 566 disk will be ignored (use L{removeInvalid} to purge those entries). 567 568 This method might be fairly slow for large lists, since it must check the 569 type of each item in the list. If you know ahead of time that you want 570 to exclude all directories, then you will be better off setting 571 L{excludeDirs} to C{True} before adding items to the list (note that this 572 will not prevent you from recursively adding the I{contents} of 573 directories). 574 575 @param pattern: Regular expression pattern representing entries to remove 576 577 @return: Number of entries removed 578 @raise ValueError: If the passed-in pattern is not a valid regular expression. 579 """ 580 removed = 0 581 if pattern is None: 582 for entry in self[:]: 583 if os.path.exists(entry) and os.path.isdir(entry): 584 self.remove(entry) 585 logger.debug("Removed path [%s] from list." % entry) 586 removed += 1 587 else: 588 try: 589 pattern = encodePath(pattern) # use same encoding as filenames 590 compiled = re.compile(pattern) 591 except re.error: 592 raise ValueError("Pattern is not a valid regular expression.") 593 for entry in self[:]: 594 if os.path.exists(entry) and os.path.isdir(entry): 595 if compiled.match(entry): 596 self.remove(entry) 597 logger.debug("Removed path [%s] from list based on pattern [%s]." % (entry, pattern)) 598 removed += 1 599 logger.debug("Removed a total of %d entries." % removed); 600 return removed
601 642
643 - def removeMatch(self, pattern):
644 """ 645 Removes from the list all entries matching a pattern. 646 647 This method removes from the list all entries which match the passed in 648 C{pattern}. Since there is no need to check the type of each entry, it 649 is faster to call this method than to call the L{removeFiles}, 650 L{removeDirs} or L{removeLinks} methods individually. If you know which 651 patterns you will want to remove ahead of time, you may be better off 652 setting L{excludePatterns} or L{excludeBasenamePatterns} before adding 653 items to the list. 654 655 @note: Unlike when using the exclude lists, the pattern here is I{not} 656 bounded at the front and the back of the string. You can use any pattern 657 you want. 658 659 @param pattern: Regular expression pattern representing entries to remove 660 661 @return: Number of entries removed. 662 @raise ValueError: If the passed-in pattern is not a valid regular expression. 663 """ 664 try: 665 pattern = encodePath(pattern) # use same encoding as filenames 666 compiled = re.compile(pattern) 667 except re.error: 668 raise ValueError("Pattern is not a valid regular expression.") 669 removed = 0 670 for entry in self[:]: 671 if compiled.match(entry): 672 self.remove(entry) 673 logger.debug("Removed path [%s] from list based on pattern [%s]." % (entry, pattern)) 674 removed += 1 675 logger.debug("Removed a total of %d entries." % removed); 676 return removed
677
678 - def removeInvalid(self):
679 """ 680 Removes from the list all entries that do not exist on disk. 681 682 This method removes from the list all entries which do not currently 683 exist on disk in some form. No attention is paid to whether the entries 684 are files or directories. 685 686 @return: Number of entries removed. 687 """ 688 removed = 0 689 for entry in self[:]: 690 if not os.path.exists(entry): 691 self.remove(entry) 692 logger.debug("Removed path [%s] from list." % entry) 693 removed += 1 694 logger.debug("Removed a total of %d entries." % removed); 695 return removed
696 697 698 ################## 699 # Utility methods 700 ################## 701
702 - def normalize(self):
703 """Normalizes the list, ensuring that each entry is unique.""" 704 orig = len(self) 705 self.sort() 706 dups = filter(lambda x, self=self: self[x] == self[x+1], range(0, len(self) - 1)) 707 items = map(lambda x, self=self: self[x], dups) 708 map(self.remove, items) 709 new = len(self) 710 logger.debug("Completed normalizing list; removed %d items (%d originally, %d now)." % (new-orig, orig, new))
711
712 - def verify(self):
713 """ 714 Verifies that all entries in the list exist on disk. 715 @return: C{True} if all entries exist, C{False} otherwise. 716 """ 717 for entry in self: 718 if not os.path.exists(entry): 719 logger.debug("Path [%s] is invalid; list is not valid." % entry) 720 return False 721 logger.debug("All entries in list are valid.") 722 return True
723 724 725 ######################################################################## 726 # SpanItem class definition 727 ######################################################################## 728
729 -class SpanItem(object):
730 """ 731 Item returned by L{BackupFileList.generateSpan}. 732 """
733 - def __init__(self, fileList, size, capacity, utilization):
734 """ 735 Create object. 736 @param fileList: List of files 737 @param size: Size (in bytes) of files 738 @param utilization: Utilization, as a percentage (0-100) 739 """ 740 self.fileList = fileList 741 self.size = size 742 self.capacity = capacity 743 self.utilization = utilization
744 745 746 ######################################################################## 747 # BackupFileList class definition 748 ######################################################################## 749
750 -class BackupFileList(FilesystemList):
751 752 ###################### 753 # Class documentation 754 ###################### 755 756 """ 757 List of files to be backed up. 758 759 A BackupFileList is a L{FilesystemList} containing a list of files to be 760 backed up. It only contains files, not directories (soft links are treated 761 like files). On top of the generic functionality provided by 762 L{FilesystemList}, this class adds functionality to keep a hash (checksum) 763 for each file in the list, and it also provides a method to calculate the 764 total size of the files in the list and a way to export the list into tar 765 form. 766 767 @sort: __init__, addDir, totalSize, generateSizeMap, generateDigestMap, 768 generateFitted, generateTarfile, removeUnchanged 769 """ 770 771 ############## 772 # Constructor 773 ############## 774
775 - def __init__(self):
776 """Initializes a list with no configured exclusions.""" 777 FilesystemList.__init__(self)
778 779 780 ################################ 781 # Overridden superclass methods 782 ################################ 783
784 - def addDir(self, path):
785 """ 786 Adds a directory to the list. 787 788 Note that this class does not allow directories to be added by themselves 789 (a backup list contains only files). However, since links to directories 790 are technically files, we allow them to be added. 791 792 This method is implemented in terms of the superclass method, with one 793 additional validation: the superclass method is only called if the 794 passed-in path is both a directory and a link. All of the superclass's 795 existing validations and restrictions apply. 796 797 @param path: Directory path to be added to the list 798 @type path: String representing a path on disk 799 800 @return: Number of items added to the list. 801 802 @raise ValueError: If path is not a directory or does not exist. 803 @raise ValueError: If the path could not be encoded properly. 804 """ 805 path = encodePath(path) 806 path = normalizeDir(path) 807 if os.path.isdir(path) and not os.path.islink(path): 808 return 0 809 else: 810 return FilesystemList.addDir(self, path)
811 812 813 ################## 814 # Utility methods 815 ################## 816
817 - def totalSize(self):
818 """ 819 Returns the total size among all files in the list. 820 Only files are counted. 821 Soft links that point at files are ignored. 822 Entries which do not exist on disk are ignored. 823 @return: Total size, in bytes 824 """ 825 total = 0.0 826 for entry in self: 827 if os.path.isfile(entry) and not os.path.islink(entry): 828 total += float(os.stat(entry).st_size) 829 return total
830
831 - def generateSizeMap(self):
832 """ 833 Generates a mapping from file to file size in bytes. 834 The mapping does include soft links, which are listed with size zero. 835 Entries which do not exist on disk are ignored. 836 @return: Dictionary mapping file to file size 837 """ 838 table = { } 839 for entry in self: 840 if os.path.islink(entry): 841 table[entry] = 0.0 842 elif os.path.isfile(entry): 843 table[entry] = float(os.stat(entry).st_size) 844 return table
845
846 - def generateDigestMap(self, stripPrefix=None):
847 """ 848 Generates a mapping from file to file digest. 849 850 Currently, the digest is an SHA hash, which should be pretty secure. In 851 the future, this might be a different kind of hash, but we guarantee that 852 the type of the hash will not change unless the library major version 853 number is bumped. 854 855 Entries which do not exist on disk are ignored. 856 857 Soft links are ignored. We would end up generating a digest for the file 858 that the soft link points at, which doesn't make any sense. 859 860 If C{stripPrefix} is passed in, then that prefix will be stripped from 861 each key when the map is generated. This can be useful in generating two 862 "relative" digest maps to be compared to one another. 863 864 @param stripPrefix: Common prefix to be stripped from paths 865 @type stripPrefix: String with any contents 866 867 @return: Dictionary mapping file to digest value 868 @see: L{removeUnchanged} 869 """ 870 table = { } 871 if stripPrefix is not None: 872 for entry in self: 873 if os.path.isfile(entry) and not os.path.islink(entry): 874 table[entry.replace(stripPrefix, "", 1)] = BackupFileList._generateDigest(entry) 875 else: 876 for entry in self: 877 if os.path.isfile(entry) and not os.path.islink(entry): 878 table[entry] = BackupFileList._generateDigest(entry) 879 return table
880
881 - def _generateDigest(path):
882 """ 883 Generates an SHA digest for a given file on disk. 884 885 The original code for this function used this simplistic implementation, 886 which requires reading the entire file into memory at once in order to 887 generate a digest value:: 888 889 sha.new(open(path).read()).hexdigest() 890 891 Not surprisingly, this isn't an optimal solution. The U{Simple file 892 hashing <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259109>} 893 Python Cookbook recipe describes how to incrementally generate a hash 894 value by reading in chunks of data rather than reading the file all at 895 once. The recipe relies on the the C{update()} method of the various 896 Python hashing algorithms. 897 898 In my tests using a 110 MB file on CD, the original implementation 899 requires 111 seconds. This implementation requires only 40-45 seconds, 900 which is a pretty substantial speed-up. 901 902 Practice shows that reading in around 4kB (4096 bytes) at a time yields 903 the best performance. Smaller reads are quite a bit slower, and larger 904 reads don't make much of a difference. The 4kB number makes me a little 905 suspicious, and I think it might be related to the size of a filesystem 906 read at the hardware level. However, I've decided to just hardcode 4096 907 until I have evidence that shows it's worthwhile making the read size 908 configurable. 909 910 @param path: Path to generate digest for. 911 912 @return: ASCII-safe SHA digest for the file. 913 @raise OSError: If the file cannot be opened. 914 """ 915 s = sha.new() 916 f = open(path, mode="rb") # in case platform cares about binary reads 917 readBytes = 4096 # see notes above 918 while(readBytes > 0): 919 readString = f.read(readBytes) 920 s.update(readString) 921 readBytes = len(readString) 922 f.close() 923 digest = s.hexdigest() 924 logger.debug("Generated digest [%s] for file [%s]." % (digest, path)) 925 return digest
926 _generateDigest = staticmethod(_generateDigest) 927
928 - def generateFitted(self, capacity, algorithm="worst_fit"):
929 """ 930 Generates a list of items that fit in the indicated capacity. 931 932 Sometimes, callers would like to include every item in a list, but are 933 unable to because not all of the items fit in the space available. This 934 method returns a copy of the list, containing only the items that fit in 935 a given capacity. A copy is returned so that we don't lose any 936 information if for some reason the fitted list is unsatisfactory. 937 938 The fitting is done using the functions in the knapsack module. By 939 default, the first fit algorithm is used, but you can also choose 940 from best fit, worst fit and alternate fit. 941 942 @param capacity: Maximum capacity among the files in the new list 943 @type capacity: Integer, in bytes 944 945 @param algorithm: Knapsack (fit) algorithm to use 946 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 947 948 @return: Copy of list with total size no larger than indicated capacity 949 @raise ValueError: If the algorithm is invalid. 950 """ 951 table = self._getKnapsackTable() 952 function = BackupFileList._getKnapsackFunction(algorithm) 953 return function(table, capacity)[0]
954
955 - def generateSpan(self, capacity, algorithm="worst_fit"):
956 """ 957 Splits the list of items into sub-lists that fit in a given capacity. 958 959 Sometimes, callers need split to a backup file list into a set of smaller 960 lists. For instance, you could use this to "span" the files across a set 961 of discs. 962 963 The fitting is done using the functions in the knapsack module. By 964 default, the first fit algorithm is used, but you can also choose 965 from best fit, worst fit and alternate fit. 966 967 @note: If any of your items are larger than the capacity, then it won't 968 be possible to find a solution. In this case, a value error will be 969 raised. 970 971 @param capacity: Maximum capacity among the files in the new list 972 @type capacity: Integer, in bytes 973 974 @param algorithm: Knapsack (fit) algorithm to use 975 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 976 977 @return: List of L{SpanItem} objects. 978 979 @raise ValueError: If the algorithm is invalid. 980 @raise ValueError: If it's not possible to fit some items 981 """ 982 spanItems = [] 983 function = BackupFileList._getKnapsackFunction(algorithm) 984 table = self._getKnapsackTable(capacity) 985 iteration = 0 986 while len(table) > 0: 987 iteration += 1 988 fit = function(table, capacity) 989 if len(fit[0]) == 0: 990 # Should never happen due to validations in _convertToKnapsackForm(), but let's be safe 991 raise ValueError("After iteration %d, unable to add any new items." % iteration) 992 removeKeys(table, fit[0]) 993 utilization = (float(fit[1])/float(capacity))*100.0 994 item = SpanItem(fit[0], fit[1], capacity, utilization) 995 spanItems.append(item) 996 return spanItems
997
998 - def _getKnapsackTable(self, capacity=None):
999 """ 1000 Converts the list into the form needed by the knapsack algorithms. 1001 @return: Dictionary mapping file name to tuple of (file path, file size). 1002 """ 1003 table = { } 1004 for entry in self: 1005 if os.path.islink(entry): 1006 table[entry] = (entry, 0.0) 1007 elif os.path.isfile(entry): 1008 size = float(os.stat(entry).st_size) 1009 if capacity is not None: 1010 if size > capacity: 1011 raise ValueError("File [%s] cannot fit in capacity %s." % (entry, displayBytes(capacity))) 1012 table[entry] = (entry, size) 1013 return table
1014
1015 - def _getKnapsackFunction(algorithm):
1016 """ 1017 Returns a reference to the function associated with an algorithm name. 1018 Algorithm name must be one of "first_fit", "best_fit", "worst_fit", "alternate_fit" 1019 @param algorithm: Name of the algorithm 1020 @return: Reference to knapsack function 1021 @raise ValueError: If the algorithm name is unknown. 1022 """ 1023 if algorithm == "first_fit": 1024 return firstFit 1025 elif algorithm == "best_fit": 1026 return bestFit 1027 elif algorithm == "worst_fit": 1028 return worstFit 1029 elif algorithm == "alternate_fit": 1030 return alternateFit 1031 else: 1032 raise ValueError("Algorithm [%s] is invalid." % algorithm);
1033 _getKnapsackFunction = staticmethod(_getKnapsackFunction) 1034
1035 - def generateTarfile(self, path, mode='tar', ignore=False, flat=False):
1036 """ 1037 Creates a tar file containing the files in the list. 1038 1039 By default, this method will create uncompressed tar files. If you pass 1040 in mode C{'targz'}, then it will create gzipped tar files, and if you 1041 pass in mode C{'tarbz2'}, then it will create bzipped tar files. 1042 1043 The tar file will be created as a GNU tar archive, which enables extended 1044 file name lengths, etc. Since GNU tar is so prevalent, I've decided that 1045 the extra functionality out-weighs the disadvantage of not being 1046 "standard". 1047 1048 If you pass in C{flat=True}, then a "flat" archive will be created, and 1049 all of the files will be added to the root of the archive. So, the file 1050 C{/tmp/something/whatever.txt} would be added as just C{whatever.txt}. 1051 1052 By default, the whole method call fails if there are problems adding any 1053 of the files to the archive, resulting in an exception. Under these 1054 circumstances, callers are advised that they might want to call 1055 L{removeInvalid()} and then attempt to extract the tar file a second 1056 time, since the most common cause of failures is a missing file (a file 1057 that existed when the list was built, but is gone again by the time the 1058 tar file is built). 1059 1060 If you want to, you can pass in C{ignore=True}, and the method will 1061 ignore errors encountered when adding individual files to the archive 1062 (but not errors opening and closing the archive itself). 1063 1064 We'll always attempt to remove the tarfile from disk if an exception will 1065 be thrown. 1066 1067 @note: No validation is done as to whether the entries in the list are 1068 files, since only files or soft links should be in an object like this. 1069 However, to be safe, everything is explicitly added to the tar archive 1070 non-recursively so it's safe to include soft links to directories. 1071 1072 @note: The Python C{tarfile} module, which is used internally here, is 1073 supposed to deal properly with long filenames and links. In my testing, 1074 I have found that it appears to be able to add long really long filenames 1075 to archives, but doesn't do a good job reading them back out, even out of 1076 an archive it created. Fortunately, all Cedar Backup does is add files 1077 to archives. 1078 1079 @param path: Path of tar file to create on disk 1080 @type path: String representing a path on disk 1081 1082 @param mode: Tar creation mode 1083 @type mode: One of either C{'tar'}, C{'targz'} or C{'tarbz2'} 1084 1085 @param ignore: Indicates whether to ignore certain errors. 1086 @type ignore: Boolean 1087 1088 @param flat: Creates "flat" archive by putting all items in root 1089 @type flat: Boolean 1090 1091 @raise ValueError: If mode is not valid 1092 @raise ValueError: If list is empty 1093 @raise ValueError: If the path could not be encoded properly. 1094 @raise TarError: If there is a problem creating the tar file 1095 """ 1096 path = encodePath(path) 1097 if len(self) == 0: raise ValueError("Empty list cannot be used to generate tarfile.") 1098 if(mode == 'tar'): tarmode = "w:" 1099 elif(mode == 'targz'): tarmode = "w:gz" 1100 elif(mode == 'tarbz2'): tarmode = "w:bz2" 1101 else: raise ValueError("Mode [%s] is not valid." % mode) 1102 try: 1103 tar = tarfile.open(path, tarmode) 1104 tar.posix = False # make a GNU-compatible archive without file length limits 1105 for entry in self: 1106 try: 1107 if flat: 1108 tar.add(entry, arcname=os.path.basename(entry), recursive=False) 1109 else: 1110 tar.add(entry, recursive=False) 1111 except tarfile.TarError, e: 1112 if not ignore: 1113 raise e 1114 logger.info("Unable to add file [%s]; going on anyway." % entry) 1115 except OSError, e: 1116 if not ignore: 1117 raise tarfile.TarError(e) 1118 logger.info("Unable to add file [%s]; going on anyway." % entry) 1119 tar.close() 1120 except tarfile.ReadError, e: 1121 try: tar.close() 1122 except: pass 1123 if os.path.exists(path): 1124 try: os.remove(path) 1125 except: pass 1126 raise tarfile.ReadError("Unable to open [%s]; maybe directory doesn't exist?" % path) 1127 except tarfile.TarError, e: 1128 try: tar.close() 1129 except: pass 1130 if os.path.exists(path): 1131 try: os.remove(path) 1132 except: pass 1133 raise e
1134
1135 - def removeUnchanged(self, digestMap, captureDigest=False):
1136 """ 1137 Removes unchanged entries from the list. 1138 1139 This method relies on a digest map as returned from L{generateDigestMap}. 1140 For each entry in C{digestMap}, if the entry also exists in the current 1141 list I{and} the entry in the current list has the same digest value as in 1142 the map, the entry in the current list will be removed. 1143 1144 This method offers a convenient way for callers to filter unneeded 1145 entries from a list. The idea is that a caller will capture a digest map 1146 from C{generateDigestMap} at some point in time (perhaps the beginning of 1147 the week), and will save off that map using C{pickle} or some other 1148 method. Then, the caller could use this method sometime in the future to 1149 filter out any unchanged files based on the saved-off map. 1150 1151 If C{captureDigest} is passed-in as C{True}, then digest information will 1152 be captured for the entire list before the removal step occurs using the 1153 same rules as in L{generateDigestMap}. The check will involve a lookup 1154 into the complete digest map. 1155 1156 If C{captureDigest} is passed in as C{False}, we will only generate a 1157 digest value for files we actually need to check, and we'll ignore any 1158 entry in the list which isn't a file that currently exists on disk. 1159 1160 The return value varies depending on C{captureDigest}, as well. To 1161 preserve backwards compatibility, if C{captureDigest} is C{False}, then 1162 we'll just return a single value representing the number of entries 1163 removed. Otherwise, we'll return a tuple of C{(entries removed, digest 1164 map)}. The returned digest map will be in exactly the form returned by 1165 L{generateDigestMap}. 1166 1167 @note: For performance reasons, this method actually ends up rebuilding 1168 the list from scratch. First, we build a temporary dictionary containing 1169 all of the items from the original list. Then, we remove items as needed 1170 from the dictionary (which is faster than the equivalent operation on a 1171 list). Finally, we replace the contents of the current list based on the 1172 keys left in the dictionary. This should be transparent to the caller. 1173 1174 @param digestMap: Dictionary mapping file name to digest value. 1175 @type digestMap: Map as returned from L{generateDigestMap}. 1176 1177 @param captureDigest: Indicates that digest information should be captured. 1178 @type captureDigest: Boolean 1179 1180 @return: Number of entries removed 1181 """ 1182 if captureDigest: 1183 removed = 0 1184 table = {} 1185 captured = {} 1186 for entry in self: 1187 if os.path.isfile(entry) and not os.path.islink(entry): 1188 table[entry] = BackupFileList._generateDigest(entry) 1189 captured[entry] = table[entry] 1190 else: 1191 table[entry] = None 1192 for entry in digestMap.keys(): 1193 if table.has_key(entry): 1194 if table[entry] is not None: # equivalent to file/link check in other case 1195 digest = table[entry] 1196 if digest == digestMap[entry]: 1197 removed += 1 1198 del table[entry] 1199 logger.debug("Discarded unchanged file [%s]." % entry) 1200 self[:] = table.keys() 1201 return (removed, captured) 1202 else: 1203 removed = 0 1204 table = {} 1205 for entry in self: 1206 table[entry] = None 1207 for entry in digestMap.keys(): 1208 if table.has_key(entry): 1209 if os.path.isfile(entry) and not os.path.islink(entry): 1210 digest = BackupFileList._generateDigest(entry) 1211 if digest == digestMap[entry]: 1212 removed += 1 1213 del table[entry] 1214 logger.debug("Discarded unchanged file [%s]." % entry) 1215 self[:] = table.keys() 1216 return removed
1217 1218 1219 ######################################################################## 1220 # PurgeItemList class definition 1221 ######################################################################## 1222
1223 -class PurgeItemList(FilesystemList):
1224 1225 ###################### 1226 # Class documentation 1227 ###################### 1228 1229 """ 1230 List of files and directories to be purged. 1231 1232 A PurgeItemList is a L{FilesystemList} containing a list of files and 1233 directories to be purged. On top of the generic functionality provided by 1234 L{FilesystemList}, this class adds functionality to remove items that are 1235 too young to be purged, and to actually remove each item in the list from 1236 the filesystem. 1237 1238 The other main difference is that when you add a directory's contents to a 1239 purge item list, the directory itself is not added to the list. This way, 1240 if someone asks to purge within in C{/opt/backup/collect}, that directory 1241 doesn't get removed once all of the files within it is gone. 1242 """ 1243 1244 ############## 1245 # Constructor 1246 ############## 1247
1248 - def __init__(self):
1249 """Initializes a list with no configured exclusions.""" 1250 FilesystemList.__init__(self)
1251 1252 1253 ############## 1254 # Add methods 1255 ############## 1256
1257 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
1258 """ 1259 Adds the contents of a directory to the list. 1260 1261 The path must exist and must be a directory or a link to a directory. 1262 The contents of the directory (but I{not} the directory path itself) will 1263 be recursively added to the list, subject to any exclusions that are in 1264 place. If you only want the directory and its contents to be added, then 1265 pass in C{recursive=False}. 1266 1267 @note: If a directory's absolute path matches an exclude pattern or path, 1268 or if the directory contains the configured ignore file, then the 1269 directory and all of its contents will be recursively excluded from the 1270 list. 1271 1272 @note: If the passed-in directory happens to be a soft link, it will be 1273 recursed. However, the linkDepth parameter controls whether any soft 1274 links I{within} the directory will be recursed. The link depth is 1275 maximum depth of the tree at which soft links should be followed. So, a 1276 depth of 0 does not follow any soft links, a depth of 1 follows only 1277 links within the passed-in directory, a depth of 2 follows the links at 1278 the next level down, etc. 1279 1280 @note: Any invalid soft links (i.e. soft links that point to 1281 non-existent items) will be silently ignored. 1282 1283 @note: The L{excludeDirs} flag only controls whether any given soft link 1284 path itself is added to the list once it has been discovered. It does 1285 I{not} modify any behavior related to directory recursion. 1286 1287 @note: The L{excludeDirs} flag only controls whether any given directory 1288 path itself is added to the list once it has been discovered. It does 1289 I{not} modify any behavior related to directory recursion. 1290 1291 @note: If you call this method I{on a link to a directory} that link will 1292 never be dereferenced (it may, however, be followed). 1293 1294 @param path: Directory path whose contents should be added to the list 1295 @type path: String representing a path on disk 1296 1297 @param recursive: Indicates whether directory contents should be added recursively. 1298 @type recursive: Boolean value 1299 1300 @param addSelf: Ignored in this subclass. 1301 1302 @param linkDepth: Depth of soft links that should be followed 1303 @type linkDepth: Integer value, where zero means not to follow any soft links 1304 1305 @param dereference: Indicates whether soft links, if followed, should be dereferenced 1306 @type dereference: Boolean value 1307 1308 @return: Number of items recursively added to the list 1309 1310 @raise ValueError: If path is not a directory or does not exist. 1311 @raise ValueError: If the path could not be encoded properly. 1312 """ 1313 path = encodePath(path) 1314 path = normalizeDir(path) 1315 return super(PurgeItemList, self)._addDirContentsInternal(path, False, recursive, linkDepth, dereference)
1316 1317 1318 ################## 1319 # Utility methods 1320 ################## 1321
1322 - def removeYoungFiles(self, daysOld):
1323 """ 1324 Removes from the list files younger than a certain age (in days). 1325 1326 Any file whose "age" in days is less than (C{<}) the value of the 1327 C{daysOld} parameter will be removed from the list so that it will not be 1328 purged later when L{purgeItems} is called. Directories and soft links 1329 will be ignored. 1330 1331 The "age" of a file is the amount of time since the file was last used, 1332 per the most recent of the file's C{st_atime} and C{st_mtime} values. 1333 1334 @note: Some people find the "sense" of this method confusing or 1335 "backwards". Keep in mind that this method is used to remove items 1336 I{from the list}, not from the filesystem! It removes from the list 1337 those items that you would I{not} want to purge because they are too 1338 young. As an example, passing in C{daysOld} of zero (0) would remove 1339 from the list no files, which would result in purging all of the files 1340 later. I would be happy to make a synonym of this method with an 1341 easier-to-understand "sense", if someone can suggest one. 1342 1343 @param daysOld: Minimum age of files that are to be kept in the list. 1344 @type daysOld: Integer value >= 0. 1345 1346 @return: Number of entries removed 1347 """ 1348 removed = 0 1349 daysOld = int(daysOld) 1350 if daysOld < 0: 1351 raise ValueError("Days old value must be an integer >= 0.") 1352 for entry in self[:]: 1353 if os.path.isfile(entry) and not os.path.islink(entry): 1354 try: 1355 ageInDays = calculateFileAge(entry) 1356 ageInWholeDays = math.floor(ageInDays) 1357 if ageInWholeDays < daysOld: 1358 removed += 1 1359 self.remove(entry) 1360 except OSError: 1361 pass 1362 return removed
1363
1364 - def purgeItems(self):
1365 """ 1366 Purges all items in the list. 1367 1368 Every item in the list will be purged. Directories in the list will 1369 I{not} be purged recursively, and hence will only be removed if they are 1370 empty. Errors will be ignored. 1371 1372 To faciliate easy removal of directories that will end up being empty, 1373 the delete process happens in two passes: files first (including soft 1374 links), then directories. 1375 1376 @return: Tuple containing count of (files, dirs) removed 1377 """ 1378 files = 0 1379 dirs = 0 1380 for entry in self: 1381 if os.path.exists(entry) and (os.path.isfile(entry) or os.path.islink(entry)): 1382 try: 1383 os.remove(entry) 1384 files += 1 1385 logger.debug("Purged file [%s]." % entry) 1386 except OSError: 1387 pass 1388 for entry in self: 1389 if os.path.exists(entry) and os.path.isdir(entry) and not os.path.islink(entry): 1390 try: 1391 os.rmdir(entry) 1392 dirs += 1 1393 logger.debug("Purged empty directory [%s]." % entry) 1394 except OSError: 1395 pass 1396 return (files, dirs)
1397 1398 1399 ######################################################################## 1400 # Public functions 1401 ######################################################################## 1402 1403 ########################## 1404 # normalizeDir() function 1405 ########################## 1406
1407 -def normalizeDir(path):
1408 """ 1409 Normalizes a directory name. 1410 1411 For our purposes, a directory name is normalized by removing the trailing 1412 path separator, if any. This is important because we want directories to 1413 appear within lists in a consistent way, although from the user's 1414 perspective passing in C{/path/to/dir/} and C{/path/to/dir} are equivalent. 1415 1416 @param path: Path to be normalized. 1417 @type path: String representing a path on disk 1418 1419 @return: Normalized path, which should be equivalent to the original. 1420 """ 1421 if path != os.sep and path[-1:] == os.sep: 1422 return path[:-1] 1423 return path
1424 1425 1426 ############################# 1427 # compareContents() function 1428 ############################# 1429
1430 -def compareContents(path1, path2, verbose=False):
1431 """ 1432 Compares the contents of two directories to see if they are equivalent. 1433 1434 The two directories are recursively compared. First, we check whether they 1435 contain exactly the same set of files. Then, we check to see every given 1436 file has exactly the same contents in both directories. 1437 1438 This is all relatively simple to implement through the magic of 1439 L{BackupFileList.generateDigestMap}, which knows how to strip a path prefix 1440 off the front of each entry in the mapping it generates. This makes our 1441 comparison as simple as creating a list for each path, then generating a 1442 digest map for each path and comparing the two. 1443 1444 If no exception is thrown, the two directories are considered identical. 1445 1446 If the C{verbose} flag is C{True}, then an alternate (but slower) method is 1447 used so that any thrown exception can indicate exactly which file caused the 1448 comparison to fail. The thrown C{ValueError} exception distinguishes 1449 between the directories containing different files, and containing the same 1450 files with differing content. 1451 1452 @note: Symlinks are I{not} followed for the purposes of this comparison. 1453 1454 @param path1: First path to compare. 1455 @type path1: String representing a path on disk 1456 1457 @param path2: First path to compare. 1458 @type path2: String representing a path on disk 1459 1460 @param verbose: Indicates whether a verbose response should be given. 1461 @type verbose: Boolean 1462 1463 @raise ValueError: If a directory doesn't exist or can't be read. 1464 @raise ValueError: If the two directories are not equivalent. 1465 @raise IOError: If there is an unusual problem reading the directories. 1466 """ 1467 try: 1468 path1List = BackupFileList() 1469 path1List.addDirContents(path1) 1470 path1Digest = path1List.generateDigestMap(stripPrefix=normalizeDir(path1)) 1471 path2List = BackupFileList() 1472 path2List.addDirContents(path2) 1473 path2Digest = path2List.generateDigestMap(stripPrefix=normalizeDir(path2)) 1474 compareDigestMaps(path1Digest, path2Digest, verbose) 1475 except IOError, e: 1476 logger.error("I/O error encountered during consistency check.") 1477 raise e
1478
1479 -def compareDigestMaps(digest1, digest2, verbose=False):
1480 """ 1481 Compares two digest maps and throws an exception if they differ. 1482 1483 @param digest1: First digest to compare. 1484 @type digest1: Digest as returned from BackupFileList.generateDigestMap() 1485 1486 @param digest2: Second digest to compare. 1487 @type digest2: Digest as returned from BackupFileList.generateDigestMap() 1488 1489 @param verbose: Indicates whether a verbose response should be given. 1490 @type verbose: Boolean 1491 1492 @raise ValueError: If the two directories are not equivalent. 1493 """ 1494 if not verbose: 1495 if digest1 != digest2: 1496 raise ValueError("Consistency check failed.") 1497 else: 1498 list1 = UnorderedList(digest1.keys()) 1499 list2 = UnorderedList(digest2.keys()) 1500 if list1 != list2: 1501 raise ValueError("Directories contain a different set of files.") 1502 for key in list1: 1503 if digest1[key] != digest2[key]: 1504 raise ValueError("File contents for [%s] vary between directories." % key)
1505