1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39 """
40 Provides an extension to split up large files in staging directories.
41
42 When this extension is executed, it will look through the configured Cedar
43 Backup staging directory for files exceeding a specified size limit, and split
44 them down into smaller files using the 'split' utility. Any directory which
45 has already been split (as indicated by the C{cback.split} file) will be
46 ignored.
47
48 This extension requires a new configuration section <split> and is intended
49 to be run immediately after the standard stage action or immediately before the
50 standard store action. Aside from its own configuration, it requires the
51 options and staging configuration sections in the standard Cedar Backup
52 configuration file.
53
54 @author: Kenneth J. Pronovici <pronovic@ieee.org>
55 """
56
57
58
59
60
61
62 import os
63 import re
64 import logging
65
66
67 from CedarBackup2.filesystem import FilesystemList
68 from CedarBackup2.util import resolveCommand, executeCommand, convertSize
69 from CedarBackup2.util import changeOwnership, buildNormalizedPath
70 from CedarBackup2.util import UNIT_BYTES, UNIT_KBYTES, UNIT_MBYTES, UNIT_GBYTES
71 from CedarBackup2.xmlutil import createInputDom, addContainerNode, addStringNode
72 from CedarBackup2.xmlutil import readFirstChild, readString
73 from CedarBackup2.actions.util import findDailyDirs, writeIndicatorFile, getBackupFiles
74
75
76
77
78
79
80 logger = logging.getLogger("CedarBackup2.log.extend.split")
81
82 SPLIT_COMMAND = [ "split", ]
83 SPLIT_INDICATOR = "cback.split"
84
85 VALID_BYTE_UNITS = [ UNIT_BYTES, UNIT_KBYTES, UNIT_MBYTES, UNIT_GBYTES, ]
86
87
88
89
90
91
93
94 """
95 Class representing a byte quantity.
96
97 A byte quantity has both a quantity and a byte-related unit. Units are
98 maintained using the constants from util.py.
99
100 The quantity is maintained internally as a string so that issues of
101 precision can be avoided. It really isn't possible to store a floating
102 point number here while being able to losslessly translate back and forth
103 between XML and object representations. (Perhaps the Python 2.4 Decimal
104 class would have been an option, but I want to stay compatible with Python
105 2.3.)
106
107 Even though the quantity is maintained as a string, the string must be in a
108 valid floating point positive number. Technically, any floating point
109 string format supported by Python is allowble. However, it does not make
110 sense to have a negative quantity of bytes in this context.
111
112 @sort: __init__, __repr__, __str__, __cmp__, quantity, units
113 """
114
115 - def __init__(self, quantity=None, units=None):
116 """
117 Constructor for the C{ByteQuantity} class.
118
119 @param quantity: Quantity of bytes, as string ("1.25")
120 @param units: Unit of bytes, one of VALID_BYTE_UNITS
121
122 @raise ValueError: If one of the values is invalid.
123 """
124 self._quantity = None
125 self._units = None
126 self.quantity = quantity
127 self.units = units
128
130 """
131 Official string representation for class instance.
132 """
133 return "ByteQuantity(%s, %s)" % (self.quantity, self.units)
134
136 """
137 Informal string representation for class instance.
138 """
139 return self.__repr__()
140
142 """
143 Definition of equals operator for this class.
144 Lists within this class are "unordered" for equality comparisons.
145 @param other: Other object to compare to.
146 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other.
147 """
148 if other is None:
149 return 1
150 if self._quantity != other._quantity:
151 if self._quantity < other._quantity:
152 return -1
153 else:
154 return 1
155 if self._units != other._units:
156 if self._units < other._units:
157 return -1
158 else:
159 return 1
160 return 0
161
163 """
164 Property target used to set the quantity
165 The value must be a non-empty string if it is not C{None}.
166 @raise ValueError: If the value is an empty string.
167 @raise ValueError: If the value is not a valid floating point number
168 @raise ValueError: If the value is less than zero
169 """
170 if value is not None:
171 if len(value) < 1:
172 raise ValueError("Quantity must be a non-empty string.")
173 floatValue = float(value)
174 if floatValue < 0.0:
175 raise ValueError("Quantity cannot be negative.")
176 self._quantity = value
177
179 """
180 Property target used to get the quantity.
181 """
182 return self._quantity
183
185 """
186 Property target used to set the units value.
187 If not C{None}, the units value must be one of the values in L{VALID_BYTE_UNITS}.
188 @raise ValueError: If the value is not valid.
189 """
190 if value is not None:
191 if value not in VALID_BYTE_UNITS:
192 raise ValueError("Units value must be one of %s." % VALID_BYTE_UNITS)
193 self._units = value
194
196 """
197 Property target used to get the units value.
198 """
199 return self._units
200
201 quantity = property(_getQuantity, _setQuantity, None, doc="Byte quantity, as a string")
202 units = property(_getUnits, _setUnits, None, doc="Units for byte quantity, for instance UNIT_BYTES")
203
204
205
206
207
208
210
211 """
212 Class representing split configuration.
213
214 Split configuration is used for splitting staging directories.
215
216 The following restrictions exist on data in this class:
217
218 - The size limit must be a ByteQuantity
219 - The split size must be a ByteQuantity
220
221 @sort: __init__, __repr__, __str__, __cmp__, sizeLimit, splitSize
222 """
223
224 - def __init__(self, sizeLimit=None, splitSize=None):
225 """
226 Constructor for the C{SplitCOnfig} class.
227
228 @param sizeLimit: Size limit of the files, in bytes
229 @param splitSize: Size that files exceeding the limit will be split into, in bytes
230
231 @raise ValueError: If one of the values is invalid.
232 """
233 self._sizeLimit = None
234 self._splitSize = None
235 self.sizeLimit = sizeLimit
236 self.splitSize = splitSize
237
239 """
240 Official string representation for class instance.
241 """
242 return "SplitConfig(%s, %s)" % (self.sizeLimit, self.splitSize)
243
245 """
246 Informal string representation for class instance.
247 """
248 return self.__repr__()
249
251 """
252 Definition of equals operator for this class.
253 Lists within this class are "unordered" for equality comparisons.
254 @param other: Other object to compare to.
255 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other.
256 """
257 if other is None:
258 return 1
259 if self._sizeLimit != other._sizeLimit:
260 if self._sizeLimit < other._sizeLimit:
261 return -1
262 else:
263 return 1
264 if self._splitSize != other._splitSize:
265 if self._splitSize < other._splitSize:
266 return -1
267 else:
268 return 1
269 return 0
270
272 """
273 Property target used to set the size limit.
274 If not C{None}, the value must be a C{ByteQuantity} object.
275 @raise ValueError: If the value is not a C{ByteQuantity}
276 """
277 if value is None:
278 self._sizeLimit = None
279 else:
280 if not isinstance(value, ByteQuantity):
281 raise ValueError("Value must be a C{ByteQuantity} object.")
282 self._sizeLimit = value
283
285 """
286 Property target used to get the size limit.
287 """
288 return self._sizeLimit
289
291 """
292 Property target used to set the split size.
293 If not C{None}, the value must be a C{ByteQuantity} object.
294 @raise ValueError: If the value is not a C{ByteQuantity}
295 """
296 if value is None:
297 self._splitSize = None
298 else:
299 if not isinstance(value, ByteQuantity):
300 raise ValueError("Value must be a C{ByteQuantity} object.")
301 self._splitSize = value
302
304 """
305 Property target used to get the split size.
306 """
307 return self._splitSize
308
309 sizeLimit = property(_getSizeLimit, _setSizeLimit, None, doc="Size limit, as a ByteQuantity")
310 splitSize = property(_getSplitSize, _setSplitSize, None, doc="Split size, as a ByteQuantity")
311
312
313
314
315
316
318
319 """
320 Class representing this extension's configuration document.
321
322 This is not a general-purpose configuration object like the main Cedar
323 Backup configuration object. Instead, it just knows how to parse and emit
324 split-specific configuration values. Third parties who need to read and
325 write configuration related to this extension should access it through the
326 constructor, C{validate} and C{addConfig} methods.
327
328 @note: Lists within this class are "unordered" for equality comparisons.
329
330 @sort: __init__, __repr__, __str__, __cmp__, split, validate, addConfig
331 """
332
333 - def __init__(self, xmlData=None, xmlPath=None, validate=True):
334 """
335 Initializes a configuration object.
336
337 If you initialize the object without passing either C{xmlData} or
338 C{xmlPath} then configuration will be empty and will be invalid until it
339 is filled in properly.
340
341 No reference to the original XML data or original path is saved off by
342 this class. Once the data has been parsed (successfully or not) this
343 original information is discarded.
344
345 Unless the C{validate} argument is C{False}, the L{LocalConfig.validate}
346 method will be called (with its default arguments) against configuration
347 after successfully parsing any passed-in XML. Keep in mind that even if
348 C{validate} is C{False}, it might not be possible to parse the passed-in
349 XML document if lower-level validations fail.
350
351 @note: It is strongly suggested that the C{validate} option always be set
352 to C{True} (the default) unless there is a specific need to read in
353 invalid configuration from disk.
354
355 @param xmlData: XML data representing configuration.
356 @type xmlData: String data.
357
358 @param xmlPath: Path to an XML file on disk.
359 @type xmlPath: Absolute path to a file on disk.
360
361 @param validate: Validate the document after parsing it.
362 @type validate: Boolean true/false.
363
364 @raise ValueError: If both C{xmlData} and C{xmlPath} are passed-in.
365 @raise ValueError: If the XML data in C{xmlData} or C{xmlPath} cannot be parsed.
366 @raise ValueError: If the parsed configuration document is not valid.
367 """
368 self._split = None
369 self.split = None
370 if xmlData is not None and xmlPath is not None:
371 raise ValueError("Use either xmlData or xmlPath, but not both.")
372 if xmlData is not None:
373 self._parseXmlData(xmlData)
374 if validate:
375 self.validate()
376 elif xmlPath is not None:
377 xmlData = open(xmlPath).read()
378 self._parseXmlData(xmlData)
379 if validate:
380 self.validate()
381
383 """
384 Official string representation for class instance.
385 """
386 return "LocalConfig(%s)" % (self.split)
387
389 """
390 Informal string representation for class instance.
391 """
392 return self.__repr__()
393
395 """
396 Definition of equals operator for this class.
397 Lists within this class are "unordered" for equality comparisons.
398 @param other: Other object to compare to.
399 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other.
400 """
401 if other is None:
402 return 1
403 if self._split != other._split:
404 if self._split < other._split:
405 return -1
406 else:
407 return 1
408 return 0
409
411 """
412 Property target used to set the split configuration value.
413 If not C{None}, the value must be a C{SplitConfig} object.
414 @raise ValueError: If the value is not a C{SplitConfig}
415 """
416 if value is None:
417 self._split = None
418 else:
419 if not isinstance(value, SplitConfig):
420 raise ValueError("Value must be a C{SplitConfig} object.")
421 self._split = value
422
424 """
425 Property target used to get the split configuration value.
426 """
427 return self._split
428
429 split = property(_getSplit, _setSplit, None, "Split configuration in terms of a C{SplitConfig} object.")
430
432 """
433 Validates configuration represented by the object.
434
435 Split configuration must be filled in. Within that, both the size limit
436 and split size must be filled in.
437
438 @raise ValueError: If one of the validations fails.
439 """
440 if self.split is None:
441 raise ValueError("Split section is required.")
442 if self.split.sizeLimit is None:
443 raise ValueError("Size limit must be set.")
444 if self.split.splitSize is None:
445 raise ValueError("Split size must be set.")
446
448 """
449 Adds a <split> configuration section as the next child of a parent.
450
451 Third parties should use this function to write configuration related to
452 this extension.
453
454 We add the following fields to the document::
455
456 sizeLimit //cb_config/split/size_limit
457 splitSize //cb_config/split/split_size
458
459 @param xmlDom: DOM tree as from C{impl.createDocument()}.
460 @param parentNode: Parent that the section should be appended to.
461 """
462 if self.split is not None:
463 sectionNode = addContainerNode(xmlDom, parentNode, "split")
464 LocalConfig._addByteQuantityNode(xmlDom, sectionNode, "size_limit", self.split.sizeLimit)
465 LocalConfig._addByteQuantityNode(xmlDom, sectionNode, "split_size", self.split.splitSize)
466
468 """
469 Internal method to parse an XML string into the object.
470
471 This method parses the XML document into a DOM tree (C{xmlDom}) and then
472 calls a static method to parse the split configuration section.
473
474 @param xmlData: XML data to be parsed
475 @type xmlData: String data
476
477 @raise ValueError: If the XML cannot be successfully parsed.
478 """
479 (xmlDom, parentNode) = createInputDom(xmlData)
480 self._split = LocalConfig._parseSplit(parentNode)
481
483 """
484 Parses an split configuration section.
485
486 We read the following individual fields::
487
488 sizeLimit //cb_config/split/size_limit
489 splitSize //cb_config/split/split_size
490
491 @param parent: Parent node to search beneath.
492
493 @return: C{EncryptConfig} object or C{None} if the section does not exist.
494 @raise ValueError: If some filled-in value is invalid.
495 """
496 split = None
497 section = readFirstChild(parent, "split")
498 if section is not None:
499 split = SplitConfig()
500 split.sizeLimit = LocalConfig._readByteQuantity(section, "size_limit")
501 split.splitSize = LocalConfig._readByteQuantity(section, "split_size")
502 return split
503 _parseSplit = staticmethod(_parseSplit)
504
506 """
507 Read a byte size value from an XML document.
508
509 A byte size value is an interpreted string value. If the string value
510 ends with "MB" or "GB", then the string before that is interpreted as
511 megabytes or gigabytes. Otherwise, it is intepreted as bytes.
512
513 @param parent: Parent node to search beneath.
514 @param name: Name of node to search for.
515
516 @return: ByteQuantity parsed from XML document
517 """
518 data = readString(parent, name)
519 if data is None:
520 return None
521 data = data.strip()
522 if data.endswith("KB"):
523 quantity = data[0:data.rfind("KB")].strip()
524 units = UNIT_KBYTES
525 elif data.endswith("MB"):
526 quantity = data[0:data.rfind("MB")].strip()
527 units = UNIT_MBYTES;
528 elif data.endswith("GB"):
529 quantity = data[0:data.rfind("GB")].strip()
530 units = UNIT_GBYTES
531 else:
532 quantity = data.strip()
533 units = UNIT_BYTES
534 return ByteQuantity(quantity, units)
535 _readByteQuantity = staticmethod(_readByteQuantity)
536
538 """
539 Adds a text node as the next child of a parent, to contain a byte size.
540
541 If the C{byteQuantity} is None, then the node will be created, but will
542 be empty (i.e. will contain no text node child).
543
544 The size in bytes will be normalized. If it is larger than 1.0 GB, it will
545 be shown in GB ("1.0 GB"). If it is larger than 1.0 MB ("1.0 MB"), it will
546 be shown in MB. Otherwise, it will be shown in bytes ("423413").
547
548 @param xmlDom: DOM tree as from C{impl.createDocument()}.
549 @param parentNode: Parent node to create child for.
550 @param nodeName: Name of the new container node.
551 @param byteQuantity: ByteQuantity object to put into the XML document
552
553 @return: Reference to the newly-created node.
554 """
555 if byteQuantity is None:
556 byteString = None
557 elif byteQuantity.units == UNIT_KBYTES:
558 byteString = "%s KB" % byteQuantity.quantity
559 elif byteQuantity.units == UNIT_MBYTES:
560 byteString = "%s MB" % byteQuantity.quantity
561 elif byteQuantity.units == UNIT_GBYTES:
562 byteString = "%s GB" % byteQuantity.quantity
563 else:
564 byteString = byteQuantity.quantity
565 return addStringNode(xmlDom, parentNode, nodeName, byteString)
566 _addByteQuantityNode = staticmethod(_addByteQuantityNode)
567
568
569
570
571
572
573
574
575
576
603
604
605
606
607
608
609 -def _splitDailyDir(dailyDir, sizeLimit, splitSize, backupUser, backupGroup):
610 """
611 Splits large files in a daily staging directory.
612
613 Files that match INDICATOR_PATTERNS (i.e. C{"cback.store"},
614 C{"cback.stage"}, etc.) are assumed to be indicator files and are ignored.
615 All other files are split.
616
617 @param dailyDir: Daily directory to encrypt
618 @param sizeLimit: Size limit, in bytes
619 @param splitSize: Split size, in bytes
620 @param backupUser: User that target files should be owned by
621 @param backupGroup: Group that target files should be owned by
622
623 @raise ValueError: If the encrypt mode is not supported.
624 @raise ValueError: If the daily staging directory does not exist.
625 """
626 logger.debug("Begin splitting contents of [%s]." % dailyDir)
627 fileList = getBackupFiles(dailyDir)
628 limitBytes = float(convertSize(sizeLimit.quantity, sizeLimit.units, UNIT_BYTES))
629 for path in fileList:
630 size = float(os.stat(path).st_size)
631 if size > limitBytes:
632 _splitFile(path, splitSize, backupUser, backupGroup, removeSource=True)
633 logger.debug("Completed splitting contents of [%s]." % dailyDir)
634
635
636
637
638
639
640 -def _splitFile(sourcePath, splitSize, backupUser, backupGroup, removeSource=False):
641 """
642 Splits the source file into chunks of the indicated size.
643
644 The split files will be owned by the indicated backup user and group. If
645 C{removeSource} is C{True}, then the source file will be removed after it is
646 successfully split.
647
648 @param sourcePath: Absolute path of the source file to split
649 @param splitSize: Encryption mode (only "gpg" is allowed)
650 @param backupUser: User that target files should be owned by
651 @param backupGroup: Group that target files should be owned by
652 @param removeSource: Indicates whether to remove the source file
653
654 @raise IOError: If there is a problem accessing, splitting or removing the source file.
655 """
656 cwd = os.getcwd()
657 try:
658 if not os.path.exists(sourcePath):
659 raise ValueError("Source path [%s] does not exist." % sourcePath);
660 dirname = os.path.dirname(sourcePath)
661 filename = os.path.basename(sourcePath)
662 prefix = "%s_" % filename
663 bytes = int(convertSize(splitSize.quantity, splitSize.units, UNIT_BYTES))
664 os.chdir(dirname)
665 command = resolveCommand(SPLIT_COMMAND)
666 args = [ "--verbose", "--numeric-suffixes", "--suffix-length=5", "--bytes=%d" % bytes, filename, prefix, ]
667 (result, output) = executeCommand(command, args, returnOutput=True, ignoreStderr=False)
668 if result != 0:
669 raise IOError("Error [%d] calling split for [%s]." % (result, sourcePath))
670 pattern = re.compile(r"(creating file `)(%s)(.*)(')" % prefix)
671 match = pattern.search(output[-1:][0])
672 if match is None:
673 raise IOError("Unable to parse output from split command.")
674 value = int(match.group(3).strip())
675 for index in range(0, value):
676 path = "%s%05d" % (prefix, index)
677 if not os.path.exists(path):
678 raise IOError("After call to split, expected file [%s] does not exist." % path)
679 changeOwnership(path, backupUser, backupGroup)
680 if removeSource:
681 if os.path.exists(sourcePath):
682 try:
683 os.remove(sourcePath)
684 logger.debug("Completed removing old file [%s]." % sourcePath)
685 except:
686 raise IOError("Failed to remove file [%s] after splitting it." % (sourcePath))
687 finally:
688 os.chdir(cwd)
689