1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39 """
40 Provides filesystem-related objects.
41 @sort: FilesystemList, BackupFileList, PurgeItemList
42 @author: Kenneth J. Pronovici <pronovic@ieee.org>
43 """
44
45
46
47
48
49
50
51 import sys
52 import os
53 import re
54 import math
55 import logging
56 import tarfile
57
58
59 from CedarBackup2.knapsack import firstFit, bestFit, worstFit, alternateFit
60 from CedarBackup2.util import AbsolutePathList, ObjectTypeList, UnorderedList, RegexList
61 from CedarBackup2.util import removeKeys, displayBytes, calculateFileAge, encodePath, dereferenceLink
62
63
64
65
66
67
68 logger = logging.getLogger("CedarBackup2.log.filesystem")
69
70
71
72
73
74
76
77
78
79
80
81 """
82 Represents a list of filesystem items.
83
84 This is a generic class that represents a list of filesystem items. Callers
85 can add individual files or directories to the list, or can recursively add
86 the contents of a directory. The class also allows for up-front exclusions
87 in several forms (all files, all directories, all items matching a pattern,
88 all items whose basename matches a pattern, or all directories containing a
89 specific "ignore file"). Symbolic links are typically backed up
90 non-recursively, i.e. the link to a directory is backed up, but not the
91 contents of that link (we don't want to deal with recursive loops, etc.).
92
93 The custom methods such as L{addFile} will only add items if they exist on
94 the filesystem and do not match any exclusions that are already in place.
95 However, since a FilesystemList is a subclass of Python's standard list
96 class, callers can also add items to the list in the usual way, using
97 methods like C{append()} or C{insert()}. No validations apply to items
98 added to the list in this way; however, many list-manipulation methods deal
99 "gracefully" with items that don't exist in the filesystem, often by
100 ignoring them.
101
102 Once a list has been created, callers can remove individual items from the
103 list using standard methods like C{pop()} or C{remove()} or they can use
104 custom methods to remove specific types of entries or entries which match a
105 particular pattern.
106
107 @note: Regular expression patterns that apply to paths are assumed to be
108 bounded at front and back by the beginning and end of the string, i.e. they
109 are treated as if they begin with C{^} and end with C{$}. This is true
110 whether we are matching a complete path or a basename.
111
112 @note: Some platforms, like Windows, do not support soft links. On those
113 platforms, the ignore-soft-links flag can be set, but it won't do any good
114 because the operating system never reports a file as a soft link.
115
116 @sort: __init__, addFile, addDir, addDirContents, removeFiles, removeDirs,
117 removeLinks, removeMatch, removeInvalid, normalize,
118 excludeFiles, excludeDirs, excludeLinks, excludePaths,
119 excludePatterns, excludeBasenamePatterns, ignoreFile
120 """
121
122
123
124
125
126
144
145
146
147
148
149
151 """
152 Property target used to set the exclude files flag.
153 No validations, but we normalize the value to C{True} or C{False}.
154 """
155 if value:
156 self._excludeFiles = True
157 else:
158 self._excludeFiles = False
159
161 """
162 Property target used to get the exclude files flag.
163 """
164 return self._excludeFiles
165
167 """
168 Property target used to set the exclude directories flag.
169 No validations, but we normalize the value to C{True} or C{False}.
170 """
171 if value:
172 self._excludeDirs = True
173 else:
174 self._excludeDirs = False
175
177 """
178 Property target used to get the exclude directories flag.
179 """
180 return self._excludeDirs
181
183 """
184 Property target used to set the exclude soft links flag.
185 No validations, but we normalize the value to C{True} or C{False}.
186 """
187 if value:
188 self._excludeLinks = True
189 else:
190 self._excludeLinks = False
191
193 """
194 Property target used to get the exclude soft links flag.
195 """
196 return self._excludeLinks
197
199 """
200 Property target used to set the exclude paths list.
201 A C{None} value is converted to an empty list.
202 Elements do not have to exist on disk at the time of assignment.
203 @raise ValueError: If any list element is not an absolute path.
204 """
205 self._absoluteExcludePaths = AbsolutePathList()
206 if value is not None:
207 self._absoluteExcludePaths.extend(value)
208
210 """
211 Property target used to get the absolute exclude paths list.
212 """
213 return self._absoluteExcludePaths
214
216 """
217 Property target used to set the exclude patterns list.
218 A C{None} value is converted to an empty list.
219 """
220 self._excludePatterns = RegexList()
221 if value is not None:
222 self._excludePatterns.extend(value)
223
225 """
226 Property target used to get the exclude patterns list.
227 """
228 return self._excludePatterns
229
231 """
232 Property target used to set the exclude basename patterns list.
233 A C{None} value is converted to an empty list.
234 """
235 self._excludeBasenamePatterns = RegexList()
236 if value is not None:
237 self._excludeBasenamePatterns.extend(value)
238
240 """
241 Property target used to get the exclude basename patterns list.
242 """
243 return self._excludeBasenamePatterns
244
246 """
247 Property target used to set the ignore file.
248 The value must be a non-empty string if it is not C{None}.
249 @raise ValueError: If the value is an empty string.
250 """
251 if value is not None:
252 if len(value) < 1:
253 raise ValueError("The ignore file must be a non-empty string.")
254 self._ignoreFile = value
255
257 """
258 Property target used to get the ignore file.
259 """
260 return self._ignoreFile
261
262 excludeFiles = property(_getExcludeFiles, _setExcludeFiles, None, "Boolean indicating whether files should be excluded.")
263 excludeDirs = property(_getExcludeDirs, _setExcludeDirs, None, "Boolean indicating whether directories should be excluded.")
264 excludeLinks = property(_getExcludeLinks, _setExcludeLinks, None, "Boolean indicating whether soft links should be excluded.")
265 excludePaths = property(_getExcludePaths, _setExcludePaths, None, "List of absolute paths to be excluded.")
266 excludePatterns = property(_getExcludePatterns, _setExcludePatterns, None,
267 "List of regular expression patterns (matching complete path) to be excluded.")
268 excludeBasenamePatterns = property(_getExcludeBasenamePatterns, _setExcludeBasenamePatterns,
269 None, "List of regular expression patterns (matching basename) to be excluded.")
270 ignoreFile = property(_getIgnoreFile, _setIgnoreFile, None, "Name of file which will cause directory contents to be ignored.")
271
272
273
274
275
276
278 """
279 Adds a file to the list.
280
281 The path must exist and must be a file or a link to an existing file. It
282 will be added to the list subject to any exclusions that are in place.
283
284 @param path: File path to be added to the list
285 @type path: String representing a path on disk
286
287 @return: Number of items added to the list.
288
289 @raise ValueError: If path is not a file or does not exist.
290 @raise ValueError: If the path could not be encoded properly.
291 """
292 path = encodePath(path)
293 if not os.path.exists(path) or not os.path.isfile(path):
294 logger.debug("Path [%s] is not a file or does not exist on disk." % path)
295 raise ValueError("Path is not a file or does not exist on disk.")
296 if self.excludeLinks and os.path.islink(path):
297 logger.debug("Path [%s] is excluded based on excludeLinks." % path)
298 return 0
299 if self.excludeFiles:
300 logger.debug("Path [%s] is excluded based on excludeFiles." % path)
301 return 0
302 if path in self.excludePaths:
303 logger.debug("Path [%s] is excluded based on excludePaths." % path)
304 return 0
305 for pattern in self.excludePatterns:
306 pattern = encodePath(pattern)
307 if re.compile(r"^%s$" % pattern).match(path):
308 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern))
309 return 0
310 for pattern in self.excludeBasenamePatterns:
311 pattern = encodePath(pattern)
312 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)):
313 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern))
314 return 0
315 self.append(path)
316 logger.debug("Added file to list: [%s]" % path)
317 return 1
318
320 """
321 Adds a directory to the list.
322
323 The path must exist and must be a directory or a link to an existing
324 directory. It will be added to the list subject to any exclusions that
325 are in place. The L{ignoreFile} does not apply to this method, only to
326 L{addDirContents}.
327
328 @param path: Directory path to be added to the list
329 @type path: String representing a path on disk
330
331 @return: Number of items added to the list.
332
333 @raise ValueError: If path is not a directory or does not exist.
334 @raise ValueError: If the path could not be encoded properly.
335 """
336 path = encodePath(path)
337 path = normalizeDir(path)
338 if not os.path.exists(path) or not os.path.isdir(path):
339 logger.debug("Path [%s] is not a directory or does not exist on disk." % path)
340 raise ValueError("Path is not a directory or does not exist on disk.")
341 if self.excludeLinks and os.path.islink(path):
342 logger.debug("Path [%s] is excluded based on excludeLinks." % path)
343 return 0
344 if self.excludeDirs:
345 logger.debug("Path [%s] is excluded based on excludeDirs." % path)
346 return 0
347 if path in self.excludePaths:
348 logger.debug("Path [%s] is excluded based on excludePaths." % path)
349 return 0
350 for pattern in self.excludePatterns:
351 pattern = encodePath(pattern)
352 if re.compile(r"^%s$" % pattern).match(path):
353 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern))
354 return 0
355 for pattern in self.excludeBasenamePatterns:
356 pattern = encodePath(pattern)
357 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)):
358 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern))
359 return 0
360 self.append(path)
361 logger.debug("Added directory to list: [%s]" % path)
362 return 1
363
364 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
365 """
366 Adds the contents of a directory to the list.
367
368 The path must exist and must be a directory or a link to a directory.
369 The contents of the directory (as well as the directory path itself) will
370 be recursively added to the list, subject to any exclusions that are in
371 place. If you only want the directory and its immediate contents to be
372 added, then pass in C{recursive=False}.
373
374 @note: If a directory's absolute path matches an exclude pattern or path,
375 or if the directory contains the configured ignore file, then the
376 directory and all of its contents will be recursively excluded from the
377 list.
378
379 @note: If the passed-in directory happens to be a soft link, it will be
380 recursed. However, the linkDepth parameter controls whether any soft
381 links I{within} the directory will be recursed. The link depth is
382 maximum depth of the tree at which soft links should be followed. So, a
383 depth of 0 does not follow any soft links, a depth of 1 follows only
384 links within the passed-in directory, a depth of 2 follows the links at
385 the next level down, etc.
386
387 @note: Any invalid soft links (i.e. soft links that point to
388 non-existent items) will be silently ignored.
389
390 @note: The L{excludeDirs} flag only controls whether any given directory
391 path itself is added to the list once it has been discovered. It does
392 I{not} modify any behavior related to directory recursion.
393
394 @note: If you call this method I{on a link to a directory} that link will
395 never be dereferenced (it may, however, be followed).
396
397 @param path: Directory path whose contents should be added to the list
398 @type path: String representing a path on disk
399
400 @param recursive: Indicates whether directory contents should be added recursively.
401 @type recursive: Boolean value
402
403 @param addSelf: Indicates whether the directory itself should be added to the list.
404 @type addSelf: Boolean value
405
406 @param linkDepth: Maximum depth of the tree at which soft links should be followed
407 @type linkDepth: Integer value, where zero means not to follow any soft links
408
409 @param dereference: Indicates whether soft links, if followed, should be dereferenced
410 @type dereference: Boolean value
411
412 @return: Number of items recursively added to the list
413
414 @raise ValueError: If path is not a directory or does not exist.
415 @raise ValueError: If the path could not be encoded properly.
416 """
417 path = encodePath(path)
418 path = normalizeDir(path)
419 return self._addDirContentsInternal(path, addSelf, recursive, linkDepth, dereference)
420
421 - def _addDirContentsInternal(self, path, includePath=True, recursive=True, linkDepth=0, dereference=False):
422 """
423 Internal implementation of C{addDirContents}.
424
425 This internal implementation exists due to some refactoring. Basically,
426 some subclasses have a need to add the contents of a directory, but not
427 the directory itself. This is different than the standard C{FilesystemList}
428 behavior and actually ends up making a special case out of the first
429 call in the recursive chain. Since I don't want to expose the modified
430 interface, C{addDirContents} ends up being wholly implemented in terms
431 of this method.
432
433 The linkDepth parameter controls whether soft links are followed when we
434 are adding the contents recursively. Any recursive calls reduce the
435 value by one. If the value zero or less, then soft links will just be
436 added as directories, but will not be followed. This means that links
437 are followed to a I{constant depth} starting from the top-most directory.
438
439 There is one difference between soft links and directories: soft links
440 that are added recursively are not placed into the list explicitly. This
441 is because if we do add the links recursively, the resulting tar file
442 gets a little confused (it has a link and a directory with the same
443 name).
444
445 @note: If you call this method I{on a link to a directory} that link will
446 never be dereferenced (it may, however, be followed).
447
448 @param path: Directory path whose contents should be added to the list.
449 @param includePath: Indicates whether to include the path as well as contents.
450 @param recursive: Indicates whether directory contents should be added recursively.
451 @param linkDepth: Depth of soft links that should be followed
452 @param dereference: Indicates whether soft links, if followed, should be dereferenced
453
454 @return: Number of items recursively added to the list
455
456 @raise ValueError: If path is not a directory or does not exist.
457 """
458 added = 0
459 if not os.path.exists(path) or not os.path.isdir(path):
460 logger.debug("Path [%s] is not a directory or does not exist on disk." % path)
461 raise ValueError("Path is not a directory or does not exist on disk.")
462 if path in self.excludePaths:
463 logger.debug("Path [%s] is excluded based on excludePaths." % path)
464 return added
465 for pattern in self.excludePatterns:
466 pattern = encodePath(pattern)
467 if re.compile(r"^%s$" % pattern).match(path):
468 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern))
469 return added
470 for pattern in self.excludeBasenamePatterns:
471 pattern = encodePath(pattern)
472 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)):
473 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern))
474 return added
475 if self.ignoreFile is not None and os.path.exists(os.path.join(path, self.ignoreFile)):
476 logger.debug("Path [%s] is excluded based on ignore file." % path)
477 return added
478 if includePath:
479 added += self.addDir(path)
480 for entry in os.listdir(path):
481 entrypath = os.path.join(path, entry)
482 if os.path.isfile(entrypath):
483 if linkDepth > 0 and dereference:
484 derefpath = dereferenceLink(entrypath)
485 if derefpath != entrypath:
486 added += self.addFile(derefpath)
487 added += self.addFile(entrypath)
488 elif os.path.isdir(entrypath):
489 if os.path.islink(entrypath):
490 if recursive:
491 if linkDepth > 0:
492 newDepth = linkDepth - 1;
493 if dereference:
494 derefpath = dereferenceLink(entrypath)
495 if derefpath != entrypath:
496 added += self._addDirContentsInternal(derefpath, True, recursive, newDepth, dereference)
497 added += self.addDir(entrypath)
498 else:
499 added += self._addDirContentsInternal(entrypath, False, recursive, newDepth, dereference)
500 else:
501 added += self.addDir(entrypath)
502 else:
503 added += self.addDir(entrypath)
504 else:
505 if recursive:
506 newDepth = linkDepth - 1;
507 added += self._addDirContentsInternal(entrypath, True, recursive, newDepth, dereference)
508 else:
509 added += self.addDir(entrypath)
510 return added
511
512
513
514
515
516
518 """
519 Removes file entries from the list.
520
521 If C{pattern} is not passed in or is C{None}, then all file entries will
522 be removed from the list. Otherwise, only those file entries matching
523 the pattern will be removed. Any entry which does not exist on disk
524 will be ignored (use L{removeInvalid} to purge those entries).
525
526 This method might be fairly slow for large lists, since it must check the
527 type of each item in the list. If you know ahead of time that you want
528 to exclude all files, then you will be better off setting L{excludeFiles}
529 to C{True} before adding items to the list.
530
531 @param pattern: Regular expression pattern representing entries to remove
532
533 @return: Number of entries removed
534 @raise ValueError: If the passed-in pattern is not a valid regular expression.
535 """
536 removed = 0
537 if pattern is None:
538 for entry in self[:]:
539 if os.path.exists(entry) and os.path.isfile(entry):
540 self.remove(entry)
541 logger.debug("Removed path [%s] from list." % entry)
542 removed += 1
543 else:
544 try:
545 pattern = encodePath(pattern)
546 compiled = re.compile(pattern)
547 except re.error:
548 raise ValueError("Pattern is not a valid regular expression.")
549 for entry in self[:]:
550 if os.path.exists(entry) and os.path.isfile(entry):
551 if compiled.match(entry):
552 self.remove(entry)
553 logger.debug("Removed path [%s] from list." % entry)
554 removed += 1
555 logger.debug("Removed a total of %d entries." % removed);
556 return removed
557
559 """
560 Removes directory entries from the list.
561
562 If C{pattern} is not passed in or is C{None}, then all directory entries
563 will be removed from the list. Otherwise, only those directory entries
564 matching the pattern will be removed. Any entry which does not exist on
565 disk will be ignored (use L{removeInvalid} to purge those entries).
566
567 This method might be fairly slow for large lists, since it must check the
568 type of each item in the list. If you know ahead of time that you want
569 to exclude all directories, then you will be better off setting
570 L{excludeDirs} to C{True} before adding items to the list (note that this
571 will not prevent you from recursively adding the I{contents} of
572 directories).
573
574 @param pattern: Regular expression pattern representing entries to remove
575
576 @return: Number of entries removed
577 @raise ValueError: If the passed-in pattern is not a valid regular expression.
578 """
579 removed = 0
580 if pattern is None:
581 for entry in self[:]:
582 if os.path.exists(entry) and os.path.isdir(entry):
583 self.remove(entry)
584 logger.debug("Removed path [%s] from list." % entry)
585 removed += 1
586 else:
587 try:
588 pattern = encodePath(pattern)
589 compiled = re.compile(pattern)
590 except re.error:
591 raise ValueError("Pattern is not a valid regular expression.")
592 for entry in self[:]:
593 if os.path.exists(entry) and os.path.isdir(entry):
594 if compiled.match(entry):
595 self.remove(entry)
596 logger.debug("Removed path [%s] from list based on pattern [%s]." % (entry, pattern))
597 removed += 1
598 logger.debug("Removed a total of %d entries." % removed);
599 return removed
600
602 """
603 Removes soft link entries from the list.
604
605 If C{pattern} is not passed in or is C{None}, then all soft link entries
606 will be removed from the list. Otherwise, only those soft link entries
607 matching the pattern will be removed. Any entry which does not exist on
608 disk will be ignored (use L{removeInvalid} to purge those entries).
609
610 This method might be fairly slow for large lists, since it must check the
611 type of each item in the list. If you know ahead of time that you want
612 to exclude all soft links, then you will be better off setting
613 L{excludeLinks} to C{True} before adding items to the list.
614
615 @param pattern: Regular expression pattern representing entries to remove
616
617 @return: Number of entries removed
618 @raise ValueError: If the passed-in pattern is not a valid regular expression.
619 """
620 removed = 0
621 if pattern is None:
622 for entry in self[:]:
623 if os.path.exists(entry) and os.path.islink(entry):
624 self.remove(entry)
625 logger.debug("Removed path [%s] from list." % entry)
626 removed += 1
627 else:
628 try:
629 pattern = encodePath(pattern)
630 compiled = re.compile(pattern)
631 except re.error:
632 raise ValueError("Pattern is not a valid regular expression.")
633 for entry in self[:]:
634 if os.path.exists(entry) and os.path.islink(entry):
635 if compiled.match(entry):
636 self.remove(entry)
637 logger.debug("Removed path [%s] from list based on pattern [%s]." % (entry, pattern))
638 removed += 1
639 logger.debug("Removed a total of %d entries." % removed);
640 return removed
641
643 """
644 Removes from the list all entries matching a pattern.
645
646 This method removes from the list all entries which match the passed in
647 C{pattern}. Since there is no need to check the type of each entry, it
648 is faster to call this method than to call the L{removeFiles},
649 L{removeDirs} or L{removeLinks} methods individually. If you know which
650 patterns you will want to remove ahead of time, you may be better off
651 setting L{excludePatterns} or L{excludeBasenamePatterns} before adding
652 items to the list.
653
654 @note: Unlike when using the exclude lists, the pattern here is I{not}
655 bounded at the front and the back of the string. You can use any pattern
656 you want.
657
658 @param pattern: Regular expression pattern representing entries to remove
659
660 @return: Number of entries removed.
661 @raise ValueError: If the passed-in pattern is not a valid regular expression.
662 """
663 try:
664 pattern = encodePath(pattern)
665 compiled = re.compile(pattern)
666 except re.error:
667 raise ValueError("Pattern is not a valid regular expression.")
668 removed = 0
669 for entry in self[:]:
670 if compiled.match(entry):
671 self.remove(entry)
672 logger.debug("Removed path [%s] from list based on pattern [%s]." % (entry, pattern))
673 removed += 1
674 logger.debug("Removed a total of %d entries." % removed);
675 return removed
676
678 """
679 Removes from the list all entries that do not exist on disk.
680
681 This method removes from the list all entries which do not currently
682 exist on disk in some form. No attention is paid to whether the entries
683 are files or directories.
684
685 @return: Number of entries removed.
686 """
687 removed = 0
688 for entry in self[:]:
689 if not os.path.exists(entry):
690 self.remove(entry)
691 logger.debug("Removed path [%s] from list." % entry)
692 removed += 1
693 logger.debug("Removed a total of %d entries." % removed);
694 return removed
695
696
697
698
699
700
702 """Normalizes the list, ensuring that each entry is unique."""
703 orig = len(self)
704 self.sort()
705 dups = filter(lambda x, self=self: self[x] == self[x+1], range(0, len(self) - 1))
706 items = map(lambda x, self=self: self[x], dups)
707 map(self.remove, items)
708 new = len(self)
709 logger.debug("Completed normalizing list; removed %d items (%d originally, %d now)." % (new-orig, orig, new))
710
712 """
713 Verifies that all entries in the list exist on disk.
714 @return: C{True} if all entries exist, C{False} otherwise.
715 """
716 for entry in self:
717 if not os.path.exists(entry):
718 logger.debug("Path [%s] is invalid; list is not valid." % entry)
719 return False
720 logger.debug("All entries in list are valid.")
721 return True
722
723
724
725
726
727
729 """
730 Item returned by L{BackupFileList.generateSpan}.
731 """
732 - def __init__(self, fileList, size, capacity, utilization):
733 """
734 Create object.
735 @param fileList: List of files
736 @param size: Size (in bytes) of files
737 @param utilization: Utilization, as a percentage (0-100)
738 """
739 self.fileList = fileList
740 self.size = size
741 self.capacity = capacity
742 self.utilization = utilization
743
744
745
746
747
748
750
751
752
753
754
755 """
756 List of files to be backed up.
757
758 A BackupFileList is a L{FilesystemList} containing a list of files to be
759 backed up. It only contains files, not directories (soft links are treated
760 like files). On top of the generic functionality provided by
761 L{FilesystemList}, this class adds functionality to keep a hash (checksum)
762 for each file in the list, and it also provides a method to calculate the
763 total size of the files in the list and a way to export the list into tar
764 form.
765
766 @sort: __init__, addDir, totalSize, generateSizeMap, generateDigestMap,
767 generateFitted, generateTarfile, removeUnchanged
768 """
769
770
771
772
773
777
778
779
780
781
782
784 """
785 Adds a directory to the list.
786
787 Note that this class does not allow directories to be added by themselves
788 (a backup list contains only files). However, since links to directories
789 are technically files, we allow them to be added.
790
791 This method is implemented in terms of the superclass method, with one
792 additional validation: the superclass method is only called if the
793 passed-in path is both a directory and a link. All of the superclass's
794 existing validations and restrictions apply.
795
796 @param path: Directory path to be added to the list
797 @type path: String representing a path on disk
798
799 @return: Number of items added to the list.
800
801 @raise ValueError: If path is not a directory or does not exist.
802 @raise ValueError: If the path could not be encoded properly.
803 """
804 path = encodePath(path)
805 path = normalizeDir(path)
806 if os.path.isdir(path) and not os.path.islink(path):
807 return 0
808 else:
809 return FilesystemList.addDir(self, path)
810
811
812
813
814
815
817 """
818 Returns the total size among all files in the list.
819 Only files are counted.
820 Soft links that point at files are ignored.
821 Entries which do not exist on disk are ignored.
822 @return: Total size, in bytes
823 """
824 total = 0.0
825 for entry in self:
826 if os.path.isfile(entry) and not os.path.islink(entry):
827 total += float(os.stat(entry).st_size)
828 return total
829
831 """
832 Generates a mapping from file to file size in bytes.
833 The mapping does include soft links, which are listed with size zero.
834 Entries which do not exist on disk are ignored.
835 @return: Dictionary mapping file to file size
836 """
837 table = { }
838 for entry in self:
839 if os.path.islink(entry):
840 table[entry] = 0.0
841 elif os.path.isfile(entry):
842 table[entry] = float(os.stat(entry).st_size)
843 return table
844
846 """
847 Generates a mapping from file to file digest.
848
849 Currently, the digest is an SHA hash, which should be pretty secure. In
850 the future, this might be a different kind of hash, but we guarantee that
851 the type of the hash will not change unless the library major version
852 number is bumped.
853
854 Entries which do not exist on disk are ignored.
855
856 Soft links are ignored. We would end up generating a digest for the file
857 that the soft link points at, which doesn't make any sense.
858
859 If C{stripPrefix} is passed in, then that prefix will be stripped from
860 each key when the map is generated. This can be useful in generating two
861 "relative" digest maps to be compared to one another.
862
863 @param stripPrefix: Common prefix to be stripped from paths
864 @type stripPrefix: String with any contents
865
866 @return: Dictionary mapping file to digest value
867 @see: L{removeUnchanged}
868 """
869 table = { }
870 if stripPrefix is not None:
871 for entry in self:
872 if os.path.isfile(entry) and not os.path.islink(entry):
873 table[entry.replace(stripPrefix, "", 1)] = BackupFileList._generateDigest(entry)
874 else:
875 for entry in self:
876 if os.path.isfile(entry) and not os.path.islink(entry):
877 table[entry] = BackupFileList._generateDigest(entry)
878 return table
879
881 """
882 Generates an SHA digest for a given file on disk.
883
884 The original code for this function used this simplistic implementation,
885 which requires reading the entire file into memory at once in order to
886 generate a digest value::
887
888 sha.new(open(path).read()).hexdigest()
889
890 Not surprisingly, this isn't an optimal solution. The U{Simple file
891 hashing <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259109>}
892 Python Cookbook recipe describes how to incrementally generate a hash
893 value by reading in chunks of data rather than reading the file all at
894 once. The recipe relies on the the C{update()} method of the various
895 Python hashing algorithms.
896
897 In my tests using a 110 MB file on CD, the original implementation
898 requires 111 seconds. This implementation requires only 40-45 seconds,
899 which is a pretty substantial speed-up.
900
901 Experience shows that reading in around 4kB (4096 bytes) at a time yields
902 the best performance. Smaller reads are quite a bit slower, and larger
903 reads don't make much of a difference. The 4kB number makes me a little
904 suspicious, and I think it might be related to the size of a filesystem
905 read at the hardware level. However, I've decided to just hardcode 4096
906 until I have evidence that shows it's worthwhile making the read size
907 configurable.
908
909 @param path: Path to generate digest for.
910
911 @return: ASCII-safe SHA digest for the file.
912 @raise OSError: If the file cannot be opened.
913 """
914 try:
915 import hashlib
916 s = hashlib.sha1()
917 except:
918 import sha
919 s = sha.new()
920 f = open(path, mode="rb")
921 readBytes = 4096
922 while(readBytes > 0):
923 readString = f.read(readBytes)
924 s.update(readString)
925 readBytes = len(readString)
926 f.close()
927 digest = s.hexdigest()
928 logger.debug("Generated digest [%s] for file [%s]." % (digest, path))
929 return digest
930 _generateDigest = staticmethod(_generateDigest)
931
933 """
934 Generates a list of items that fit in the indicated capacity.
935
936 Sometimes, callers would like to include every item in a list, but are
937 unable to because not all of the items fit in the space available. This
938 method returns a copy of the list, containing only the items that fit in
939 a given capacity. A copy is returned so that we don't lose any
940 information if for some reason the fitted list is unsatisfactory.
941
942 The fitting is done using the functions in the knapsack module. By
943 default, the first fit algorithm is used, but you can also choose
944 from best fit, worst fit and alternate fit.
945
946 @param capacity: Maximum capacity among the files in the new list
947 @type capacity: Integer, in bytes
948
949 @param algorithm: Knapsack (fit) algorithm to use
950 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit"
951
952 @return: Copy of list with total size no larger than indicated capacity
953 @raise ValueError: If the algorithm is invalid.
954 """
955 table = self._getKnapsackTable()
956 function = BackupFileList._getKnapsackFunction(algorithm)
957 return function(table, capacity)[0]
958
960 """
961 Splits the list of items into sub-lists that fit in a given capacity.
962
963 Sometimes, callers need split to a backup file list into a set of smaller
964 lists. For instance, you could use this to "span" the files across a set
965 of discs.
966
967 The fitting is done using the functions in the knapsack module. By
968 default, the first fit algorithm is used, but you can also choose
969 from best fit, worst fit and alternate fit.
970
971 @note: If any of your items are larger than the capacity, then it won't
972 be possible to find a solution. In this case, a value error will be
973 raised.
974
975 @param capacity: Maximum capacity among the files in the new list
976 @type capacity: Integer, in bytes
977
978 @param algorithm: Knapsack (fit) algorithm to use
979 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit"
980
981 @return: List of L{SpanItem} objects.
982
983 @raise ValueError: If the algorithm is invalid.
984 @raise ValueError: If it's not possible to fit some items
985 """
986 spanItems = []
987 function = BackupFileList._getKnapsackFunction(algorithm)
988 table = self._getKnapsackTable(capacity)
989 iteration = 0
990 while len(table) > 0:
991 iteration += 1
992 fit = function(table, capacity)
993 if len(fit[0]) == 0:
994
995 raise ValueError("After iteration %d, unable to add any new items." % iteration)
996 removeKeys(table, fit[0])
997 utilization = (float(fit[1])/float(capacity))*100.0
998 item = SpanItem(fit[0], fit[1], capacity, utilization)
999 spanItems.append(item)
1000 return spanItems
1001
1003 """
1004 Converts the list into the form needed by the knapsack algorithms.
1005 @return: Dictionary mapping file name to tuple of (file path, file size).
1006 """
1007 table = { }
1008 for entry in self:
1009 if os.path.islink(entry):
1010 table[entry] = (entry, 0.0)
1011 elif os.path.isfile(entry):
1012 size = float(os.stat(entry).st_size)
1013 if capacity is not None:
1014 if size > capacity:
1015 raise ValueError("File [%s] cannot fit in capacity %s." % (entry, displayBytes(capacity)))
1016 table[entry] = (entry, size)
1017 return table
1018
1020 """
1021 Returns a reference to the function associated with an algorithm name.
1022 Algorithm name must be one of "first_fit", "best_fit", "worst_fit", "alternate_fit"
1023 @param algorithm: Name of the algorithm
1024 @return: Reference to knapsack function
1025 @raise ValueError: If the algorithm name is unknown.
1026 """
1027 if algorithm == "first_fit":
1028 return firstFit
1029 elif algorithm == "best_fit":
1030 return bestFit
1031 elif algorithm == "worst_fit":
1032 return worstFit
1033 elif algorithm == "alternate_fit":
1034 return alternateFit
1035 else:
1036 raise ValueError("Algorithm [%s] is invalid." % algorithm);
1037 _getKnapsackFunction = staticmethod(_getKnapsackFunction)
1038
1040 """
1041 Creates a tar file containing the files in the list.
1042
1043 By default, this method will create uncompressed tar files. If you pass
1044 in mode C{'targz'}, then it will create gzipped tar files, and if you
1045 pass in mode C{'tarbz2'}, then it will create bzipped tar files.
1046
1047 The tar file will be created as a GNU tar archive, which enables extended
1048 file name lengths, etc. Since GNU tar is so prevalent, I've decided that
1049 the extra functionality out-weighs the disadvantage of not being
1050 "standard".
1051
1052 If you pass in C{flat=True}, then a "flat" archive will be created, and
1053 all of the files will be added to the root of the archive. So, the file
1054 C{/tmp/something/whatever.txt} would be added as just C{whatever.txt}.
1055
1056 By default, the whole method call fails if there are problems adding any
1057 of the files to the archive, resulting in an exception. Under these
1058 circumstances, callers are advised that they might want to call
1059 L{removeInvalid()} and then attempt to extract the tar file a second
1060 time, since the most common cause of failures is a missing file (a file
1061 that existed when the list was built, but is gone again by the time the
1062 tar file is built).
1063
1064 If you want to, you can pass in C{ignore=True}, and the method will
1065 ignore errors encountered when adding individual files to the archive
1066 (but not errors opening and closing the archive itself).
1067
1068 We'll always attempt to remove the tarfile from disk if an exception will
1069 be thrown.
1070
1071 @note: No validation is done as to whether the entries in the list are
1072 files, since only files or soft links should be in an object like this.
1073 However, to be safe, everything is explicitly added to the tar archive
1074 non-recursively so it's safe to include soft links to directories.
1075
1076 @note: The Python C{tarfile} module, which is used internally here, is
1077 supposed to deal properly with long filenames and links. In my testing,
1078 I have found that it appears to be able to add long really long filenames
1079 to archives, but doesn't do a good job reading them back out, even out of
1080 an archive it created. Fortunately, all Cedar Backup does is add files
1081 to archives.
1082
1083 @param path: Path of tar file to create on disk
1084 @type path: String representing a path on disk
1085
1086 @param mode: Tar creation mode
1087 @type mode: One of either C{'tar'}, C{'targz'} or C{'tarbz2'}
1088
1089 @param ignore: Indicates whether to ignore certain errors.
1090 @type ignore: Boolean
1091
1092 @param flat: Creates "flat" archive by putting all items in root
1093 @type flat: Boolean
1094
1095 @raise ValueError: If mode is not valid
1096 @raise ValueError: If list is empty
1097 @raise ValueError: If the path could not be encoded properly.
1098 @raise TarError: If there is a problem creating the tar file
1099 """
1100 path = encodePath(path)
1101 if len(self) == 0: raise ValueError("Empty list cannot be used to generate tarfile.")
1102 if(mode == 'tar'): tarmode = "w:"
1103 elif(mode == 'targz'): tarmode = "w:gz"
1104 elif(mode == 'tarbz2'): tarmode = "w:bz2"
1105 else: raise ValueError("Mode [%s] is not valid." % mode)
1106 try:
1107 tar = tarfile.open(path, tarmode)
1108 try:
1109 tar.format = tarfile.GNU_FORMAT
1110 except:
1111 tar.posix = False
1112 for entry in self:
1113 try:
1114 if flat:
1115 tar.add(entry, arcname=os.path.basename(entry), recursive=False)
1116 else:
1117 tar.add(entry, recursive=False)
1118 except tarfile.TarError, e:
1119 if not ignore:
1120 raise e
1121 logger.info("Unable to add file [%s]; going on anyway." % entry)
1122 except OSError, e:
1123 if not ignore:
1124 raise tarfile.TarError(e)
1125 logger.info("Unable to add file [%s]; going on anyway." % entry)
1126 tar.close()
1127 except tarfile.ReadError, e:
1128 try: tar.close()
1129 except: pass
1130 if os.path.exists(path):
1131 try: os.remove(path)
1132 except: pass
1133 raise tarfile.ReadError("Unable to open [%s]; maybe directory doesn't exist?" % path)
1134 except tarfile.TarError, e:
1135 try: tar.close()
1136 except: pass
1137 if os.path.exists(path):
1138 try: os.remove(path)
1139 except: pass
1140 raise e
1141
1143 """
1144 Removes unchanged entries from the list.
1145
1146 This method relies on a digest map as returned from L{generateDigestMap}.
1147 For each entry in C{digestMap}, if the entry also exists in the current
1148 list I{and} the entry in the current list has the same digest value as in
1149 the map, the entry in the current list will be removed.
1150
1151 This method offers a convenient way for callers to filter unneeded
1152 entries from a list. The idea is that a caller will capture a digest map
1153 from C{generateDigestMap} at some point in time (perhaps the beginning of
1154 the week), and will save off that map using C{pickle} or some other
1155 method. Then, the caller could use this method sometime in the future to
1156 filter out any unchanged files based on the saved-off map.
1157
1158 If C{captureDigest} is passed-in as C{True}, then digest information will
1159 be captured for the entire list before the removal step occurs using the
1160 same rules as in L{generateDigestMap}. The check will involve a lookup
1161 into the complete digest map.
1162
1163 If C{captureDigest} is passed in as C{False}, we will only generate a
1164 digest value for files we actually need to check, and we'll ignore any
1165 entry in the list which isn't a file that currently exists on disk.
1166
1167 The return value varies depending on C{captureDigest}, as well. To
1168 preserve backwards compatibility, if C{captureDigest} is C{False}, then
1169 we'll just return a single value representing the number of entries
1170 removed. Otherwise, we'll return a tuple of C{(entries removed, digest
1171 map)}. The returned digest map will be in exactly the form returned by
1172 L{generateDigestMap}.
1173
1174 @note: For performance reasons, this method actually ends up rebuilding
1175 the list from scratch. First, we build a temporary dictionary containing
1176 all of the items from the original list. Then, we remove items as needed
1177 from the dictionary (which is faster than the equivalent operation on a
1178 list). Finally, we replace the contents of the current list based on the
1179 keys left in the dictionary. This should be transparent to the caller.
1180
1181 @param digestMap: Dictionary mapping file name to digest value.
1182 @type digestMap: Map as returned from L{generateDigestMap}.
1183
1184 @param captureDigest: Indicates that digest information should be captured.
1185 @type captureDigest: Boolean
1186
1187 @return: Number of entries removed
1188 """
1189 if captureDigest:
1190 removed = 0
1191 table = {}
1192 captured = {}
1193 for entry in self:
1194 if os.path.isfile(entry) and not os.path.islink(entry):
1195 table[entry] = BackupFileList._generateDigest(entry)
1196 captured[entry] = table[entry]
1197 else:
1198 table[entry] = None
1199 for entry in digestMap.keys():
1200 if table.has_key(entry):
1201 if table[entry] is not None:
1202 digest = table[entry]
1203 if digest == digestMap[entry]:
1204 removed += 1
1205 del table[entry]
1206 logger.debug("Discarded unchanged file [%s]." % entry)
1207 self[:] = table.keys()
1208 return (removed, captured)
1209 else:
1210 removed = 0
1211 table = {}
1212 for entry in self:
1213 table[entry] = None
1214 for entry in digestMap.keys():
1215 if table.has_key(entry):
1216 if os.path.isfile(entry) and not os.path.islink(entry):
1217 digest = BackupFileList._generateDigest(entry)
1218 if digest == digestMap[entry]:
1219 removed += 1
1220 del table[entry]
1221 logger.debug("Discarded unchanged file [%s]." % entry)
1222 self[:] = table.keys()
1223 return removed
1224
1225
1226
1227
1228
1229
1231
1232
1233
1234
1235
1236 """
1237 List of files and directories to be purged.
1238
1239 A PurgeItemList is a L{FilesystemList} containing a list of files and
1240 directories to be purged. On top of the generic functionality provided by
1241 L{FilesystemList}, this class adds functionality to remove items that are
1242 too young to be purged, and to actually remove each item in the list from
1243 the filesystem.
1244
1245 The other main difference is that when you add a directory's contents to a
1246 purge item list, the directory itself is not added to the list. This way,
1247 if someone asks to purge within in C{/opt/backup/collect}, that directory
1248 doesn't get removed once all of the files within it is gone.
1249 """
1250
1251
1252
1253
1254
1258
1259
1260
1261
1262
1263
1264 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
1265 """
1266 Adds the contents of a directory to the list.
1267
1268 The path must exist and must be a directory or a link to a directory.
1269 The contents of the directory (but I{not} the directory path itself) will
1270 be recursively added to the list, subject to any exclusions that are in
1271 place. If you only want the directory and its contents to be added, then
1272 pass in C{recursive=False}.
1273
1274 @note: If a directory's absolute path matches an exclude pattern or path,
1275 or if the directory contains the configured ignore file, then the
1276 directory and all of its contents will be recursively excluded from the
1277 list.
1278
1279 @note: If the passed-in directory happens to be a soft link, it will be
1280 recursed. However, the linkDepth parameter controls whether any soft
1281 links I{within} the directory will be recursed. The link depth is
1282 maximum depth of the tree at which soft links should be followed. So, a
1283 depth of 0 does not follow any soft links, a depth of 1 follows only
1284 links within the passed-in directory, a depth of 2 follows the links at
1285 the next level down, etc.
1286
1287 @note: Any invalid soft links (i.e. soft links that point to
1288 non-existent items) will be silently ignored.
1289
1290 @note: The L{excludeDirs} flag only controls whether any given soft link
1291 path itself is added to the list once it has been discovered. It does
1292 I{not} modify any behavior related to directory recursion.
1293
1294 @note: The L{excludeDirs} flag only controls whether any given directory
1295 path itself is added to the list once it has been discovered. It does
1296 I{not} modify any behavior related to directory recursion.
1297
1298 @note: If you call this method I{on a link to a directory} that link will
1299 never be dereferenced (it may, however, be followed).
1300
1301 @param path: Directory path whose contents should be added to the list
1302 @type path: String representing a path on disk
1303
1304 @param recursive: Indicates whether directory contents should be added recursively.
1305 @type recursive: Boolean value
1306
1307 @param addSelf: Ignored in this subclass.
1308
1309 @param linkDepth: Depth of soft links that should be followed
1310 @type linkDepth: Integer value, where zero means not to follow any soft links
1311
1312 @param dereference: Indicates whether soft links, if followed, should be dereferenced
1313 @type dereference: Boolean value
1314
1315 @return: Number of items recursively added to the list
1316
1317 @raise ValueError: If path is not a directory or does not exist.
1318 @raise ValueError: If the path could not be encoded properly.
1319 """
1320 path = encodePath(path)
1321 path = normalizeDir(path)
1322 return super(PurgeItemList, self)._addDirContentsInternal(path, False, recursive, linkDepth, dereference)
1323
1324
1325
1326
1327
1328
1330 """
1331 Removes from the list files younger than a certain age (in days).
1332
1333 Any file whose "age" in days is less than (C{<}) the value of the
1334 C{daysOld} parameter will be removed from the list so that it will not be
1335 purged later when L{purgeItems} is called. Directories and soft links
1336 will be ignored.
1337
1338 The "age" of a file is the amount of time since the file was last used,
1339 per the most recent of the file's C{st_atime} and C{st_mtime} values.
1340
1341 @note: Some people find the "sense" of this method confusing or
1342 "backwards". Keep in mind that this method is used to remove items
1343 I{from the list}, not from the filesystem! It removes from the list
1344 those items that you would I{not} want to purge because they are too
1345 young. As an example, passing in C{daysOld} of zero (0) would remove
1346 from the list no files, which would result in purging all of the files
1347 later. I would be happy to make a synonym of this method with an
1348 easier-to-understand "sense", if someone can suggest one.
1349
1350 @param daysOld: Minimum age of files that are to be kept in the list.
1351 @type daysOld: Integer value >= 0.
1352
1353 @return: Number of entries removed
1354 """
1355 removed = 0
1356 daysOld = int(daysOld)
1357 if daysOld < 0:
1358 raise ValueError("Days old value must be an integer >= 0.")
1359 for entry in self[:]:
1360 if os.path.isfile(entry) and not os.path.islink(entry):
1361 try:
1362 ageInDays = calculateFileAge(entry)
1363 ageInWholeDays = math.floor(ageInDays)
1364 if ageInWholeDays < daysOld:
1365 removed += 1
1366 self.remove(entry)
1367 except OSError:
1368 pass
1369 return removed
1370
1372 """
1373 Purges all items in the list.
1374
1375 Every item in the list will be purged. Directories in the list will
1376 I{not} be purged recursively, and hence will only be removed if they are
1377 empty. Errors will be ignored.
1378
1379 To faciliate easy removal of directories that will end up being empty,
1380 the delete process happens in two passes: files first (including soft
1381 links), then directories.
1382
1383 @return: Tuple containing count of (files, dirs) removed
1384 """
1385 files = 0
1386 dirs = 0
1387 for entry in self:
1388 if os.path.exists(entry) and (os.path.isfile(entry) or os.path.islink(entry)):
1389 try:
1390 os.remove(entry)
1391 files += 1
1392 logger.debug("Purged file [%s]." % entry)
1393 except OSError:
1394 pass
1395 for entry in self:
1396 if os.path.exists(entry) and os.path.isdir(entry) and not os.path.islink(entry):
1397 try:
1398 os.rmdir(entry)
1399 dirs += 1
1400 logger.debug("Purged empty directory [%s]." % entry)
1401 except OSError:
1402 pass
1403 return (files, dirs)
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1415 """
1416 Normalizes a directory name.
1417
1418 For our purposes, a directory name is normalized by removing the trailing
1419 path separator, if any. This is important because we want directories to
1420 appear within lists in a consistent way, although from the user's
1421 perspective passing in C{/path/to/dir/} and C{/path/to/dir} are equivalent.
1422
1423 @param path: Path to be normalized.
1424 @type path: String representing a path on disk
1425
1426 @return: Normalized path, which should be equivalent to the original.
1427 """
1428 if path != os.sep and path[-1:] == os.sep:
1429 return path[:-1]
1430 return path
1431
1432
1433
1434
1435
1436
1437 -def compareContents(path1, path2, verbose=False):
1438 """
1439 Compares the contents of two directories to see if they are equivalent.
1440
1441 The two directories are recursively compared. First, we check whether they
1442 contain exactly the same set of files. Then, we check to see every given
1443 file has exactly the same contents in both directories.
1444
1445 This is all relatively simple to implement through the magic of
1446 L{BackupFileList.generateDigestMap}, which knows how to strip a path prefix
1447 off the front of each entry in the mapping it generates. This makes our
1448 comparison as simple as creating a list for each path, then generating a
1449 digest map for each path and comparing the two.
1450
1451 If no exception is thrown, the two directories are considered identical.
1452
1453 If the C{verbose} flag is C{True}, then an alternate (but slower) method is
1454 used so that any thrown exception can indicate exactly which file caused the
1455 comparison to fail. The thrown C{ValueError} exception distinguishes
1456 between the directories containing different files, and containing the same
1457 files with differing content.
1458
1459 @note: Symlinks are I{not} followed for the purposes of this comparison.
1460
1461 @param path1: First path to compare.
1462 @type path1: String representing a path on disk
1463
1464 @param path2: First path to compare.
1465 @type path2: String representing a path on disk
1466
1467 @param verbose: Indicates whether a verbose response should be given.
1468 @type verbose: Boolean
1469
1470 @raise ValueError: If a directory doesn't exist or can't be read.
1471 @raise ValueError: If the two directories are not equivalent.
1472 @raise IOError: If there is an unusual problem reading the directories.
1473 """
1474 try:
1475 path1List = BackupFileList()
1476 path1List.addDirContents(path1)
1477 path1Digest = path1List.generateDigestMap(stripPrefix=normalizeDir(path1))
1478 path2List = BackupFileList()
1479 path2List.addDirContents(path2)
1480 path2Digest = path2List.generateDigestMap(stripPrefix=normalizeDir(path2))
1481 compareDigestMaps(path1Digest, path2Digest, verbose)
1482 except IOError, e:
1483 logger.error("I/O error encountered during consistency check.")
1484 raise e
1485
1487 """
1488 Compares two digest maps and throws an exception if they differ.
1489
1490 @param digest1: First digest to compare.
1491 @type digest1: Digest as returned from BackupFileList.generateDigestMap()
1492
1493 @param digest2: Second digest to compare.
1494 @type digest2: Digest as returned from BackupFileList.generateDigestMap()
1495
1496 @param verbose: Indicates whether a verbose response should be given.
1497 @type verbose: Boolean
1498
1499 @raise ValueError: If the two directories are not equivalent.
1500 """
1501 if not verbose:
1502 if digest1 != digest2:
1503 raise ValueError("Consistency check failed.")
1504 else:
1505 list1 = UnorderedList(digest1.keys())
1506 list2 = UnorderedList(digest2.keys())
1507 if list1 != list2:
1508 raise ValueError("Directories contain a different set of files.")
1509 for key in list1:
1510 if digest1[key] != digest2[key]:
1511 raise ValueError("File contents for [%s] vary between directories." % key)
1512