Package CedarBackup2 :: Package extend :: Module split
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.extend.split

  1  # -*- coding: iso-8859-1 -*- 
  2  # vim: set ft=python ts=3 sw=3 expandtab: 
  3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  4  # 
  5  #              C E D A R 
  6  #          S O L U T I O N S       "Software done right." 
  7  #           S O F T W A R E 
  8  # 
  9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 10  # 
 11  # Copyright (c) 2007 Kenneth J. Pronovici. 
 12  # All rights reserved. 
 13  # 
 14  # This program is free software; you can redistribute it and/or 
 15  # modify it under the terms of the GNU General Public License, 
 16  # Version 2, as published by the Free Software Foundation. 
 17  # 
 18  # This program is distributed in the hope that it will be useful, 
 19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 21  # 
 22  # Copies of the GNU General Public License are available from 
 23  # the Free Software Foundation website, http://www.gnu.org/. 
 24  # 
 25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 26  # 
 27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
 28  # Language : Python (>= 2.3) 
 29  # Project  : Official Cedar Backup Extensions 
 30  # Revision : $Id: split.py 1232 2007-09-20 03:07:07Z pronovic $ 
 31  # Purpose  : Provides an extension to split up large files in staging directories. 
 32  # 
 33  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 34   
 35  ######################################################################## 
 36  # Module documentation 
 37  ######################################################################## 
 38   
 39  """ 
 40  Provides an extension to split up large files in staging directories. 
 41   
 42  When this extension is executed, it will look through the configured Cedar 
 43  Backup staging directory for files exceeding a specified size limit, and split 
 44  them down into smaller files using the 'split' utility.  Any directory which 
 45  has already been split (as indicated by the C{cback.split} file) will be 
 46  ignored. 
 47   
 48  This extension requires a new configuration section <split> and is intended 
 49  to be run immediately after the standard stage action or immediately before the 
 50  standard store action.  Aside from its own configuration, it requires the 
 51  options and staging configuration sections in the standard Cedar Backup 
 52  configuration file. 
 53   
 54  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 55  """ 
 56   
 57  ######################################################################## 
 58  # Imported modules 
 59  ######################################################################## 
 60   
 61  # System modules 
 62  import os 
 63  import re 
 64  import logging 
 65   
 66  # Cedar Backup modules 
 67  from CedarBackup2.filesystem import FilesystemList 
 68  from CedarBackup2.util import resolveCommand, executeCommand, convertSize 
 69  from CedarBackup2.util import changeOwnership, buildNormalizedPath 
 70  from CedarBackup2.util import UNIT_BYTES, UNIT_KBYTES, UNIT_MBYTES, UNIT_GBYTES 
 71  from CedarBackup2.xmlutil import createInputDom, addContainerNode, addStringNode 
 72  from CedarBackup2.xmlutil import readFirstChild, readString 
 73  from CedarBackup2.actions.util import findDailyDirs, writeIndicatorFile, getBackupFiles 
 74   
 75   
 76  ######################################################################## 
 77  # Module-wide constants and variables 
 78  ######################################################################## 
 79   
 80  logger = logging.getLogger("CedarBackup2.log.extend.split") 
 81   
 82  SPLIT_COMMAND = [ "split", ] 
 83  SPLIT_INDICATOR = "cback.split" 
 84   
 85  VALID_BYTE_UNITS = [ UNIT_BYTES, UNIT_KBYTES, UNIT_MBYTES, UNIT_GBYTES, ] 
 86   
 87   
 88  ######################################################################## 
 89  # ByteQuantity class definition 
 90  ######################################################################## 
 91   
92 -class ByteQuantity(object):
93 94 """ 95 Class representing a byte quantity. 96 97 A byte quantity has both a quantity and a byte-related unit. Units are 98 maintained using the constants from util.py. 99 100 The quantity is maintained internally as a string so that issues of 101 precision can be avoided. It really isn't possible to store a floating 102 point number here while being able to losslessly translate back and forth 103 between XML and object representations. (Perhaps the Python 2.4 Decimal 104 class would have been an option, but I want to stay compatible with Python 105 2.3.) 106 107 Even though the quantity is maintained as a string, the string must be in a 108 valid floating point positive number. Technically, any floating point 109 string format supported by Python is allowble. However, it does not make 110 sense to have a negative quantity of bytes in this context. 111 112 @sort: __init__, __repr__, __str__, __cmp__, quantity, units 113 """ 114
115 - def __init__(self, quantity=None, units=None):
116 """ 117 Constructor for the C{ByteQuantity} class. 118 119 @param quantity: Quantity of bytes, as string ("1.25") 120 @param units: Unit of bytes, one of VALID_BYTE_UNITS 121 122 @raise ValueError: If one of the values is invalid. 123 """ 124 self._quantity = None 125 self._units = None 126 self.quantity = quantity 127 self.units = units
128
129 - def __repr__(self):
130 """ 131 Official string representation for class instance. 132 """ 133 return "ByteQuantity(%s, %s)" % (self.quantity, self.units)
134
135 - def __str__(self):
136 """ 137 Informal string representation for class instance. 138 """ 139 return self.__repr__()
140
141 - def __cmp__(self, other):
142 """ 143 Definition of equals operator for this class. 144 Lists within this class are "unordered" for equality comparisons. 145 @param other: Other object to compare to. 146 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 147 """ 148 if other is None: 149 return 1 150 if self._quantity != other._quantity: 151 if self._quantity < other._quantity: 152 return -1 153 else: 154 return 1 155 if self._units != other._units: 156 if self._units < other._units: 157 return -1 158 else: 159 return 1 160 return 0
161
162 - def _setQuantity(self, value):
163 """ 164 Property target used to set the quantity 165 The value must be a non-empty string if it is not C{None}. 166 @raise ValueError: If the value is an empty string. 167 @raise ValueError: If the value is not a valid floating point number 168 @raise ValueError: If the value is less than zero 169 """ 170 if value is not None: 171 if len(value) < 1: 172 raise ValueError("Quantity must be a non-empty string.") 173 floatValue = float(value) 174 if floatValue < 0.0: 175 raise ValueError("Quantity cannot be negative.") 176 self._quantity = value # keep around string
177
178 - def _getQuantity(self):
179 """ 180 Property target used to get the quantity. 181 """ 182 return self._quantity
183
184 - def _setUnits(self, value):
185 """ 186 Property target used to set the units value. 187 If not C{None}, the units value must be one of the values in L{VALID_BYTE_UNITS}. 188 @raise ValueError: If the value is not valid. 189 """ 190 if value is not None: 191 if value not in VALID_BYTE_UNITS: 192 raise ValueError("Units value must be one of %s." % VALID_BYTE_UNITS) 193 self._units = value
194
195 - def _getUnits(self):
196 """ 197 Property target used to get the units value. 198 """ 199 return self._units
200 201 quantity = property(_getQuantity, _setQuantity, None, doc="Byte quantity, as a string") 202 units = property(_getUnits, _setUnits, None, doc="Units for byte quantity, for instance UNIT_BYTES")
203 204 205 ######################################################################## 206 # SplitConfig class definition 207 ######################################################################## 208
209 -class SplitConfig(object):
210 211 """ 212 Class representing split configuration. 213 214 Split configuration is used for splitting staging directories. 215 216 The following restrictions exist on data in this class: 217 218 - The size limit must be a ByteQuantity 219 - The split size must be a ByteQuantity 220 221 @sort: __init__, __repr__, __str__, __cmp__, sizeLimit, splitSize 222 """ 223
224 - def __init__(self, sizeLimit=None, splitSize=None):
225 """ 226 Constructor for the C{SplitCOnfig} class. 227 228 @param sizeLimit: Size limit of the files, in bytes 229 @param splitSize: Size that files exceeding the limit will be split into, in bytes 230 231 @raise ValueError: If one of the values is invalid. 232 """ 233 self._sizeLimit = None 234 self._splitSize = None 235 self.sizeLimit = sizeLimit 236 self.splitSize = splitSize
237
238 - def __repr__(self):
239 """ 240 Official string representation for class instance. 241 """ 242 return "SplitConfig(%s, %s)" % (self.sizeLimit, self.splitSize)
243
244 - def __str__(self):
245 """ 246 Informal string representation for class instance. 247 """ 248 return self.__repr__()
249
250 - def __cmp__(self, other):
251 """ 252 Definition of equals operator for this class. 253 Lists within this class are "unordered" for equality comparisons. 254 @param other: Other object to compare to. 255 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 256 """ 257 if other is None: 258 return 1 259 if self._sizeLimit != other._sizeLimit: 260 if self._sizeLimit < other._sizeLimit: 261 return -1 262 else: 263 return 1 264 if self._splitSize != other._splitSize: 265 if self._splitSize < other._splitSize: 266 return -1 267 else: 268 return 1 269 return 0
270
271 - def _setSizeLimit(self, value):
272 """ 273 Property target used to set the size limit. 274 If not C{None}, the value must be a C{ByteQuantity} object. 275 @raise ValueError: If the value is not a C{ByteQuantity} 276 """ 277 if value is None: 278 self._sizeLimit = None 279 else: 280 if not isinstance(value, ByteQuantity): 281 raise ValueError("Value must be a C{ByteQuantity} object.") 282 self._sizeLimit = value
283
284 - def _getSizeLimit(self):
285 """ 286 Property target used to get the size limit. 287 """ 288 return self._sizeLimit
289
290 - def _setSplitSize(self, value):
291 """ 292 Property target used to set the split size. 293 If not C{None}, the value must be a C{ByteQuantity} object. 294 @raise ValueError: If the value is not a C{ByteQuantity} 295 """ 296 if value is None: 297 self._splitSize = None 298 else: 299 if not isinstance(value, ByteQuantity): 300 raise ValueError("Value must be a C{ByteQuantity} object.") 301 self._splitSize = value
302
303 - def _getSplitSize(self):
304 """ 305 Property target used to get the split size. 306 """ 307 return self._splitSize
308 309 sizeLimit = property(_getSizeLimit, _setSizeLimit, None, doc="Size limit, as a ByteQuantity") 310 splitSize = property(_getSplitSize, _setSplitSize, None, doc="Split size, as a ByteQuantity")
311 312 313 ######################################################################## 314 # LocalConfig class definition 315 ######################################################################## 316
317 -class LocalConfig(object):
318 319 """ 320 Class representing this extension's configuration document. 321 322 This is not a general-purpose configuration object like the main Cedar 323 Backup configuration object. Instead, it just knows how to parse and emit 324 split-specific configuration values. Third parties who need to read and 325 write configuration related to this extension should access it through the 326 constructor, C{validate} and C{addConfig} methods. 327 328 @note: Lists within this class are "unordered" for equality comparisons. 329 330 @sort: __init__, __repr__, __str__, __cmp__, split, validate, addConfig 331 """ 332
333 - def __init__(self, xmlData=None, xmlPath=None, validate=True):
334 """ 335 Initializes a configuration object. 336 337 If you initialize the object without passing either C{xmlData} or 338 C{xmlPath} then configuration will be empty and will be invalid until it 339 is filled in properly. 340 341 No reference to the original XML data or original path is saved off by 342 this class. Once the data has been parsed (successfully or not) this 343 original information is discarded. 344 345 Unless the C{validate} argument is C{False}, the L{LocalConfig.validate} 346 method will be called (with its default arguments) against configuration 347 after successfully parsing any passed-in XML. Keep in mind that even if 348 C{validate} is C{False}, it might not be possible to parse the passed-in 349 XML document if lower-level validations fail. 350 351 @note: It is strongly suggested that the C{validate} option always be set 352 to C{True} (the default) unless there is a specific need to read in 353 invalid configuration from disk. 354 355 @param xmlData: XML data representing configuration. 356 @type xmlData: String data. 357 358 @param xmlPath: Path to an XML file on disk. 359 @type xmlPath: Absolute path to a file on disk. 360 361 @param validate: Validate the document after parsing it. 362 @type validate: Boolean true/false. 363 364 @raise ValueError: If both C{xmlData} and C{xmlPath} are passed-in. 365 @raise ValueError: If the XML data in C{xmlData} or C{xmlPath} cannot be parsed. 366 @raise ValueError: If the parsed configuration document is not valid. 367 """ 368 self._split = None 369 self.split = None 370 if xmlData is not None and xmlPath is not None: 371 raise ValueError("Use either xmlData or xmlPath, but not both.") 372 if xmlData is not None: 373 self._parseXmlData(xmlData) 374 if validate: 375 self.validate() 376 elif xmlPath is not None: 377 xmlData = open(xmlPath).read() 378 self._parseXmlData(xmlData) 379 if validate: 380 self.validate()
381
382 - def __repr__(self):
383 """ 384 Official string representation for class instance. 385 """ 386 return "LocalConfig(%s)" % (self.split)
387
388 - def __str__(self):
389 """ 390 Informal string representation for class instance. 391 """ 392 return self.__repr__()
393
394 - def __cmp__(self, other):
395 """ 396 Definition of equals operator for this class. 397 Lists within this class are "unordered" for equality comparisons. 398 @param other: Other object to compare to. 399 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 400 """ 401 if other is None: 402 return 1 403 if self._split != other._split: 404 if self._split < other._split: 405 return -1 406 else: 407 return 1 408 return 0
409
410 - def _setSplit(self, value):
411 """ 412 Property target used to set the split configuration value. 413 If not C{None}, the value must be a C{SplitConfig} object. 414 @raise ValueError: If the value is not a C{SplitConfig} 415 """ 416 if value is None: 417 self._split = None 418 else: 419 if not isinstance(value, SplitConfig): 420 raise ValueError("Value must be a C{SplitConfig} object.") 421 self._split = value
422
423 - def _getSplit(self):
424 """ 425 Property target used to get the split configuration value. 426 """ 427 return self._split
428 429 split = property(_getSplit, _setSplit, None, "Split configuration in terms of a C{SplitConfig} object.") 430
431 - def validate(self):
432 """ 433 Validates configuration represented by the object. 434 435 Split configuration must be filled in. Within that, both the size limit 436 and split size must be filled in. 437 438 @raise ValueError: If one of the validations fails. 439 """ 440 if self.split is None: 441 raise ValueError("Split section is required.") 442 if self.split.sizeLimit is None: 443 raise ValueError("Size limit must be set.") 444 if self.split.splitSize is None: 445 raise ValueError("Split size must be set.")
446
447 - def addConfig(self, xmlDom, parentNode):
448 """ 449 Adds a <split> configuration section as the next child of a parent. 450 451 Third parties should use this function to write configuration related to 452 this extension. 453 454 We add the following fields to the document:: 455 456 sizeLimit //cb_config/split/size_limit 457 splitSize //cb_config/split/split_size 458 459 @param xmlDom: DOM tree as from C{impl.createDocument()}. 460 @param parentNode: Parent that the section should be appended to. 461 """ 462 if self.split is not None: 463 sectionNode = addContainerNode(xmlDom, parentNode, "split") 464 LocalConfig._addByteQuantityNode(xmlDom, sectionNode, "size_limit", self.split.sizeLimit) 465 LocalConfig._addByteQuantityNode(xmlDom, sectionNode, "split_size", self.split.splitSize)
466
467 - def _parseXmlData(self, xmlData):
468 """ 469 Internal method to parse an XML string into the object. 470 471 This method parses the XML document into a DOM tree (C{xmlDom}) and then 472 calls a static method to parse the split configuration section. 473 474 @param xmlData: XML data to be parsed 475 @type xmlData: String data 476 477 @raise ValueError: If the XML cannot be successfully parsed. 478 """ 479 (xmlDom, parentNode) = createInputDom(xmlData) 480 self._split = LocalConfig._parseSplit(parentNode)
481
482 - def _parseSplit(parent):
483 """ 484 Parses an split configuration section. 485 486 We read the following individual fields:: 487 488 sizeLimit //cb_config/split/size_limit 489 splitSize //cb_config/split/split_size 490 491 @param parent: Parent node to search beneath. 492 493 @return: C{EncryptConfig} object or C{None} if the section does not exist. 494 @raise ValueError: If some filled-in value is invalid. 495 """ 496 split = None 497 section = readFirstChild(parent, "split") 498 if section is not None: 499 split = SplitConfig() 500 split.sizeLimit = LocalConfig._readByteQuantity(section, "size_limit") 501 split.splitSize = LocalConfig._readByteQuantity(section, "split_size") 502 return split
503 _parseSplit = staticmethod(_parseSplit) 504
505 - def _readByteQuantity(parent, name):
506 """ 507 Read a byte size value from an XML document. 508 509 A byte size value is an interpreted string value. If the string value 510 ends with "MB" or "GB", then the string before that is interpreted as 511 megabytes or gigabytes. Otherwise, it is intepreted as bytes. 512 513 @param parent: Parent node to search beneath. 514 @param name: Name of node to search for. 515 516 @return: ByteQuantity parsed from XML document 517 """ 518 data = readString(parent, name) 519 if data is None: 520 return None 521 data = data.strip() 522 if data.endswith("KB"): 523 quantity = data[0:data.rfind("KB")].strip() 524 units = UNIT_KBYTES 525 elif data.endswith("MB"): 526 quantity = data[0:data.rfind("MB")].strip() 527 units = UNIT_MBYTES; 528 elif data.endswith("GB"): 529 quantity = data[0:data.rfind("GB")].strip() 530 units = UNIT_GBYTES 531 else: 532 quantity = data.strip() 533 units = UNIT_BYTES 534 return ByteQuantity(quantity, units)
535 _readByteQuantity = staticmethod(_readByteQuantity) 536
537 - def _addByteQuantityNode(xmlDom, parentNode, nodeName, byteQuantity):
538 """ 539 Adds a text node as the next child of a parent, to contain a byte size. 540 541 If the C{byteQuantity} is None, then the node will be created, but will 542 be empty (i.e. will contain no text node child). 543 544 The size in bytes will be normalized. If it is larger than 1.0 GB, it will 545 be shown in GB ("1.0 GB"). If it is larger than 1.0 MB ("1.0 MB"), it will 546 be shown in MB. Otherwise, it will be shown in bytes ("423413"). 547 548 @param xmlDom: DOM tree as from C{impl.createDocument()}. 549 @param parentNode: Parent node to create child for. 550 @param nodeName: Name of the new container node. 551 @param byteQuantity: ByteQuantity object to put into the XML document 552 553 @return: Reference to the newly-created node. 554 """ 555 if byteQuantity is None: 556 byteString = None 557 elif byteQuantity.units == UNIT_KBYTES: 558 byteString = "%s KB" % byteQuantity.quantity 559 elif byteQuantity.units == UNIT_MBYTES: 560 byteString = "%s MB" % byteQuantity.quantity 561 elif byteQuantity.units == UNIT_GBYTES: 562 byteString = "%s GB" % byteQuantity.quantity 563 else: 564 byteString = byteQuantity.quantity 565 return addStringNode(xmlDom, parentNode, nodeName, byteString)
566 _addByteQuantityNode = staticmethod(_addByteQuantityNode)
567 568 569 ######################################################################## 570 # Public functions 571 ######################################################################## 572 573 ########################### 574 # executeAction() function 575 ########################### 576
577 -def executeAction(configPath, options, config):
578 """ 579 Executes the split backup action. 580 581 @param configPath: Path to configuration file on disk. 582 @type configPath: String representing a path on disk. 583 584 @param options: Program command-line options. 585 @type options: Options object. 586 587 @param config: Program configuration. 588 @type config: Config object. 589 590 @raise ValueError: Under many generic error conditions 591 @raise IOError: If there are I/O problems reading or writing files 592 """ 593 logger.debug("Executing split extended action.") 594 if config.options is None or config.stage is None: 595 raise ValueError("Cedar Backup configuration is not properly filled in.") 596 local = LocalConfig(xmlPath=configPath) 597 dailyDirs = findDailyDirs(config.stage.targetDir, SPLIT_INDICATOR) 598 for dailyDir in dailyDirs: 599 _splitDailyDir(dailyDir, local.split.sizeLimit, local.split.splitSize, 600 config.options.backupUser, config.options.backupGroup) 601 writeIndicatorFile(dailyDir, SPLIT_INDICATOR, config.options.backupUser, config.options.backupGroup) 602 logger.info("Executed the split extended action successfully.")
603 604 605 ############################## 606 # _splitDailyDir() function 607 ############################## 608
609 -def _splitDailyDir(dailyDir, sizeLimit, splitSize, backupUser, backupGroup):
610 """ 611 Splits large files in a daily staging directory. 612 613 Files that match INDICATOR_PATTERNS (i.e. C{"cback.store"}, 614 C{"cback.stage"}, etc.) are assumed to be indicator files and are ignored. 615 All other files are split. 616 617 @param dailyDir: Daily directory to encrypt 618 @param sizeLimit: Size limit, in bytes 619 @param splitSize: Split size, in bytes 620 @param backupUser: User that target files should be owned by 621 @param backupGroup: Group that target files should be owned by 622 623 @raise ValueError: If the encrypt mode is not supported. 624 @raise ValueError: If the daily staging directory does not exist. 625 """ 626 logger.debug("Begin splitting contents of [%s]." % dailyDir) 627 fileList = getBackupFiles(dailyDir) # ignores indicator files 628 limitBytes = float(convertSize(sizeLimit.quantity, sizeLimit.units, UNIT_BYTES)) 629 for path in fileList: 630 size = float(os.stat(path).st_size) 631 if size > limitBytes: 632 _splitFile(path, splitSize, backupUser, backupGroup, removeSource=True) 633 logger.debug("Completed splitting contents of [%s]." % dailyDir)
634 635 636 ######################## 637 # _splitFile() function 638 ######################## 639
640 -def _splitFile(sourcePath, splitSize, backupUser, backupGroup, removeSource=False):
641 """ 642 Splits the source file into chunks of the indicated size. 643 644 The split files will be owned by the indicated backup user and group. If 645 C{removeSource} is C{True}, then the source file will be removed after it is 646 successfully split. 647 648 @param sourcePath: Absolute path of the source file to split 649 @param splitSize: Encryption mode (only "gpg" is allowed) 650 @param backupUser: User that target files should be owned by 651 @param backupGroup: Group that target files should be owned by 652 @param removeSource: Indicates whether to remove the source file 653 654 @raise IOError: If there is a problem accessing, splitting or removing the source file. 655 """ 656 cwd = os.getcwd() 657 try: 658 if not os.path.exists(sourcePath): 659 raise ValueError("Source path [%s] does not exist." % sourcePath); 660 dirname = os.path.dirname(sourcePath) 661 filename = os.path.basename(sourcePath) 662 prefix = "%s_" % filename 663 bytes = int(convertSize(splitSize.quantity, splitSize.units, UNIT_BYTES)) 664 os.chdir(dirname) # need to operate from directory that we want files written to 665 command = resolveCommand(SPLIT_COMMAND) 666 args = [ "--verbose", "--numeric-suffixes", "--suffix-length=5", "--bytes=%d" % bytes, filename, prefix, ] 667 (result, output) = executeCommand(command, args, returnOutput=True, ignoreStderr=False) 668 if result != 0: 669 raise IOError("Error [%d] calling split for [%s]." % (result, sourcePath)) 670 pattern = re.compile(r"(creating file `)(%s)(.*)(')" % prefix) 671 match = pattern.search(output[-1:][0]) 672 if match is None: 673 raise IOError("Unable to parse output from split command.") 674 value = int(match.group(3).strip()) 675 for index in range(0, value): 676 path = "%s%05d" % (prefix, index) 677 if not os.path.exists(path): 678 raise IOError("After call to split, expected file [%s] does not exist." % path) 679 changeOwnership(path, backupUser, backupGroup) 680 if removeSource: 681 if os.path.exists(sourcePath): 682 try: 683 os.remove(sourcePath) 684 logger.debug("Completed removing old file [%s]." % sourcePath) 685 except: 686 raise IOError("Failed to remove file [%s] after splitting it." % (sourcePath)) 687 finally: 688 os.chdir(cwd)
689