Package CedarBackup2 :: Package actions :: Module collect
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.actions.collect

  1  # -*- coding: iso-8859-1 -*- 
  2  # vim: set ft=python ts=3 sw=3 expandtab: 
  3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  4  # 
  5  #              C E D A R 
  6  #          S O L U T I O N S       "Software done right." 
  7  #           S O F T W A R E 
  8  # 
  9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 10  # 
 11  # Copyright (c) 2004-2008,2010 Kenneth J. Pronovici. 
 12  # All rights reserved. 
 13  # 
 14  # This program is free software; you can redistribute it and/or 
 15  # modify it under the terms of the GNU General Public License, 
 16  # Version 2, as published by the Free Software Foundation. 
 17  # 
 18  # This program is distributed in the hope that it will be useful, 
 19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 21  # 
 22  # Copies of the GNU General Public License are available from 
 23  # the Free Software Foundation website, http://www.gnu.org/. 
 24  # 
 25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 26  # 
 27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
 28  # Language : Python (>= 2.5) 
 29  # Project  : Cedar Backup, release 2 
 30  # Revision : $Id: collect.py 1006 2010-07-07 21:03:57Z pronovic $ 
 31  # Purpose  : Implements the standard 'collect' action. 
 32  # 
 33  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 34   
 35  ######################################################################## 
 36  # Module documentation 
 37  ######################################################################## 
 38   
 39  """ 
 40  Implements the standard 'collect' action. 
 41  @sort: executeCollect 
 42  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 43  """ 
 44   
 45   
 46  ######################################################################## 
 47  # Imported modules 
 48  ######################################################################## 
 49   
 50  # System modules 
 51  import os 
 52  import logging 
 53  import pickle 
 54   
 55  # Cedar Backup modules 
 56  from CedarBackup2.filesystem import BackupFileList 
 57  from CedarBackup2.util import isStartOfWeek, changeOwnership, displayBytes, buildNormalizedPath 
 58  from CedarBackup2.actions.constants import DIGEST_EXTENSION, COLLECT_INDICATOR 
 59  from CedarBackup2.actions.util import writeIndicatorFile 
 60   
 61   
 62  ######################################################################## 
 63  # Module-wide constants and variables 
 64  ######################################################################## 
 65   
 66  logger = logging.getLogger("CedarBackup2.log.actions.collect") 
 67   
 68   
 69  ######################################################################## 
 70  # Public functions 
 71  ######################################################################## 
 72   
 73  ############################ 
 74  # executeCollect() function 
 75  ############################ 
 76   
77 -def executeCollect(configPath, options, config):
78 """ 79 Executes the collect backup action. 80 81 @note: When the collect action is complete, we will write a collect 82 indicator to the collect directory, so it's obvious that the collect action 83 has completed. The stage process uses this indicator to decide whether a 84 peer is ready to be staged. 85 86 @param configPath: Path to configuration file on disk. 87 @type configPath: String representing a path on disk. 88 89 @param options: Program command-line options. 90 @type options: Options object. 91 92 @param config: Program configuration. 93 @type config: Config object. 94 95 @raise ValueError: Under many generic error conditions 96 @raise TarError: If there is a problem creating a tar file 97 """ 98 logger.debug("Executing the 'collect' action.") 99 if config.options is None or config.collect is None: 100 raise ValueError("Collect configuration is not properly filled in.") 101 if ((config.collect.collectFiles is None or len(config.collect.collectFiles) < 1) and 102 (config.collect.collectDirs is None or len(config.collect.collectDirs) < 1)): 103 raise ValueError("There must be at least one collect file or collect directory.") 104 fullBackup = options.full 105 logger.debug("Full backup flag is [%s]" % fullBackup) 106 todayIsStart = isStartOfWeek(config.options.startingDay) 107 resetDigest = fullBackup or todayIsStart 108 logger.debug("Reset digest flag is [%s]" % resetDigest) 109 if config.collect.collectFiles is not None: 110 for collectFile in config.collect.collectFiles: 111 logger.debug("Working with collect file [%s]" % collectFile.absolutePath) 112 collectMode = _getCollectMode(config, collectFile) 113 archiveMode = _getArchiveMode(config, collectFile) 114 digestPath = _getDigestPath(config, collectFile) 115 tarfilePath = _getTarfilePath(config, collectFile, archiveMode) 116 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart): 117 logger.debug("File meets criteria to be backed up today.") 118 _collectFile(config, collectFile.absolutePath, tarfilePath, 119 collectMode, archiveMode, resetDigest, digestPath) 120 else: 121 logger.debug("File will not be backed up, per collect mode.") 122 logger.info("Completed collecting file [%s]" % collectFile.absolutePath) 123 if config.collect.collectDirs is not None: 124 for collectDir in config.collect.collectDirs: 125 logger.debug("Working with collect directory [%s]" % collectDir.absolutePath) 126 collectMode = _getCollectMode(config, collectDir) 127 archiveMode = _getArchiveMode(config, collectDir) 128 ignoreFile = _getIgnoreFile(config, collectDir) 129 linkDepth = _getLinkDepth(collectDir) 130 dereference = _getDereference(collectDir) 131 digestPath = _getDigestPath(config, collectDir) 132 tarfilePath = _getTarfilePath(config, collectDir, archiveMode) 133 (excludePaths, excludePatterns) = _getExclusions(config, collectDir) 134 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart): 135 logger.debug("Directory meets criteria to be backed up today.") 136 _collectDirectory(config, collectDir.absolutePath, tarfilePath, 137 collectMode, archiveMode, ignoreFile, linkDepth, dereference, 138 resetDigest, digestPath, excludePaths, excludePatterns) 139 else: 140 logger.debug("Directory will not be backed up, per collect mode.") 141 logger.info("Completed collecting directory [%s]" % collectDir.absolutePath) 142 writeIndicatorFile(config.collect.targetDir, COLLECT_INDICATOR, 143 config.options.backupUser, config.options.backupGroup) 144 logger.info("Executed the 'collect' action successfully.")
145 146 147 ######################################################################## 148 # Private utility functions 149 ######################################################################## 150 151 ########################## 152 # _collectFile() function 153 ########################## 154
155 -def _collectFile(config, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
156 """ 157 Collects a configured collect file. 158 159 The indicated collect file is collected into the indicated tarfile. 160 For files that are collected incrementally, we'll use the indicated 161 digest path and pay attention to the reset digest flag (basically, the reset 162 digest flag ignores any existing digest, but a new digest is always 163 rewritten). 164 165 The caller must decide what the collect and archive modes are, since they 166 can be on both the collect configuration and the collect file itself. 167 168 @param config: Config object. 169 @param absolutePath: Absolute path of file to collect. 170 @param tarfilePath: Path to tarfile that should be created. 171 @param collectMode: Collect mode to use. 172 @param archiveMode: Archive mode to use. 173 @param resetDigest: Reset digest flag. 174 @param digestPath: Path to digest file on disk, if needed. 175 """ 176 backupList = BackupFileList() 177 backupList.addFile(absolutePath) 178 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath)
179 180 181 ############################### 182 # _collectDirectory() function 183 ############################### 184
185 -def _collectDirectory(config, absolutePath, tarfilePath, collectMode, archiveMode, 186 ignoreFile, linkDepth, dereference, resetDigest, digestPath, 187 excludePaths, excludePatterns):
188 """ 189 Collects a configured collect directory. 190 191 The indicated collect directory is collected into the indicated tarfile. 192 For directories that are collected incrementally, we'll use the indicated 193 digest path and pay attention to the reset digest flag (basically, the reset 194 digest flag ignores any existing digest, but a new digest is always 195 rewritten). 196 197 The caller must decide what the collect and archive modes are, since they 198 can be on both the collect configuration and the collect directory itself. 199 200 @param config: Config object. 201 @param absolutePath: Absolute path of directory to collect. 202 @param tarfilePath: Path to tarfile that should be created. 203 @param collectMode: Collect mode to use. 204 @param archiveMode: Archive mode to use. 205 @param ignoreFile: Ignore file to use. 206 @param linkDepth: Link depth value to use. 207 @param dereference: Dereference flag to use. 208 @param resetDigest: Reset digest flag. 209 @param digestPath: Path to digest file on disk, if needed. 210 @param excludePaths: List of absolute paths to exclude. 211 @param excludePatterns: List of patterns to exclude. 212 """ 213 backupList = BackupFileList() 214 backupList.ignoreFile = ignoreFile 215 backupList.excludePaths = excludePaths 216 backupList.excludePatterns = excludePatterns 217 backupList.addDirContents(absolutePath, linkDepth=linkDepth, dereference=dereference) 218 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath)
219 220 221 ############################ 222 # _executeBackup() function 223 ############################ 224
225 -def _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
226 """ 227 Execute the backup process for the indicated backup list. 228 229 This function exists mainly to consolidate functionality between the 230 L{_collectFile} and L{_collectDirectory} functions. Those functions build 231 the backup list; this function causes the backup to execute properly and 232 also manages usage of the digest file on disk as explained in their 233 comments. 234 235 For collect files, the digest file will always just contain the single file 236 that is being backed up. This might little wasteful in terms of the number 237 of files that we keep around, but it's consistent and easy to understand. 238 239 @param config: Config object. 240 @param backupList: List to execute backup for 241 @param absolutePath: Absolute path of directory or file to collect. 242 @param tarfilePath: Path to tarfile that should be created. 243 @param collectMode: Collect mode to use. 244 @param archiveMode: Archive mode to use. 245 @param resetDigest: Reset digest flag. 246 @param digestPath: Path to digest file on disk, if needed. 247 """ 248 if collectMode != 'incr': 249 logger.debug("Collect mode is [%s]; no digest will be used." % collectMode) 250 if len(backupList) == 1 and backupList[0] == absolutePath: # special case for individual file 251 logger.info("Backing up file [%s] (%s)." % (absolutePath, displayBytes(backupList.totalSize()))) 252 else: 253 logger.info("Backing up %d files in [%s] (%s)." % (len(backupList), absolutePath, displayBytes(backupList.totalSize()))) 254 if len(backupList) > 0: 255 backupList.generateTarfile(tarfilePath, archiveMode, True) 256 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup) 257 else: 258 if resetDigest: 259 logger.debug("Based on resetDigest flag, digest will be cleared.") 260 oldDigest = {} 261 else: 262 logger.debug("Based on resetDigest flag, digest will loaded from disk.") 263 oldDigest = _loadDigest(digestPath) 264 (removed, newDigest) = backupList.removeUnchanged(oldDigest, captureDigest=True) 265 logger.debug("Removed %d unchanged files based on digest values." % removed) 266 if len(backupList) == 1 and backupList[0] == absolutePath: # special case for individual file 267 logger.info("Backing up file [%s] (%s)." % (absolutePath, displayBytes(backupList.totalSize()))) 268 else: 269 logger.info("Backing up %d files in [%s] (%s)." % (len(backupList), absolutePath, displayBytes(backupList.totalSize()))) 270 if len(backupList) > 0: 271 backupList.generateTarfile(tarfilePath, archiveMode, True) 272 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup) 273 _writeDigest(config, newDigest, digestPath)
274 275 276 ######################### 277 # _loadDigest() function 278 ######################### 279
280 -def _loadDigest(digestPath):
281 """ 282 Loads the indicated digest path from disk into a dictionary. 283 284 If we can't load the digest successfully (either because it doesn't exist or 285 for some other reason), then an empty dictionary will be returned - but the 286 condition will be logged. 287 288 @param digestPath: Path to the digest file on disk. 289 290 @return: Dictionary representing contents of digest path. 291 """ 292 if not os.path.isfile(digestPath): 293 digest = {} 294 logger.debug("Digest [%s] does not exist on disk." % digestPath) 295 else: 296 try: 297 digest = pickle.load(open(digestPath, "r")) 298 logger.debug("Loaded digest [%s] from disk: %d entries." % (digestPath, len(digest))) 299 except: 300 digest = {} 301 logger.error("Failed loading digest [%s] from disk." % digestPath) 302 return digest
303 304 305 ########################## 306 # _writeDigest() function 307 ########################## 308
309 -def _writeDigest(config, digest, digestPath):
310 """ 311 Writes the digest dictionary to the indicated digest path on disk. 312 313 If we can't write the digest successfully for any reason, we'll log the 314 condition but won't throw an exception. 315 316 @param config: Config object. 317 @param digest: Digest dictionary to write to disk. 318 @param digestPath: Path to the digest file on disk. 319 """ 320 try: 321 pickle.dump(digest, open(digestPath, "w")) 322 changeOwnership(digestPath, config.options.backupUser, config.options.backupGroup) 323 logger.debug("Wrote new digest [%s] to disk: %d entries." % (digestPath, len(digest))) 324 except: 325 logger.error("Failed to write digest [%s] to disk." % digestPath)
326 327 328 ######################################################################## 329 # Private attribute "getter" functions 330 ######################################################################## 331 332 ############################ 333 # getCollectMode() function 334 ############################ 335
336 -def _getCollectMode(config, item):
337 """ 338 Gets the collect mode that should be used for a collect directory or file. 339 If possible, use the one on the file or directory, otherwise take from collect section. 340 @param config: Config object. 341 @param item: C{CollectFile} or C{CollectDir} object 342 @return: Collect mode to use. 343 """ 344 if item.collectMode is None: 345 collectMode = config.collect.collectMode 346 else: 347 collectMode = item.collectMode 348 logger.debug("Collect mode is [%s]" % collectMode) 349 return collectMode
350 351 352 ############################# 353 # _getArchiveMode() function 354 ############################# 355
356 -def _getArchiveMode(config, item):
357 """ 358 Gets the archive mode that should be used for a collect directory or file. 359 If possible, use the one on the file or directory, otherwise take from collect section. 360 @param config: Config object. 361 @param item: C{CollectFile} or C{CollectDir} object 362 @return: Archive mode to use. 363 """ 364 if item.archiveMode is None: 365 archiveMode = config.collect.archiveMode 366 else: 367 archiveMode = item.archiveMode 368 logger.debug("Archive mode is [%s]" % archiveMode) 369 return archiveMode
370 371 372 ############################ 373 # _getIgnoreFile() function 374 ############################ 375
376 -def _getIgnoreFile(config, item):
377 """ 378 Gets the ignore file that should be used for a collect directory or file. 379 If possible, use the one on the file or directory, otherwise take from collect section. 380 @param config: Config object. 381 @param item: C{CollectFile} or C{CollectDir} object 382 @return: Ignore file to use. 383 """ 384 if item.ignoreFile is None: 385 ignoreFile = config.collect.ignoreFile 386 else: 387 ignoreFile = item.ignoreFile 388 logger.debug("Ignore file is [%s]" % ignoreFile) 389 return ignoreFile
390 391 392 ############################ 393 # _getLinkDepth() function 394 ############################ 395
396 -def _getLinkDepth(item):
397 """ 398 Gets the link depth that should be used for a collect directory. 399 If possible, use the one on the directory, otherwise set a value of 0 (zero). 400 @param item: C{CollectDir} object 401 @return: Ignore file to use. 402 """ 403 if item.linkDepth is None: 404 linkDepth = 0 405 else: 406 linkDepth = item.linkDepth 407 logger.debug("Link depth is [%d]" % linkDepth) 408 return linkDepth
409 410 411 ############################ 412 # _getDereference() function 413 ############################ 414
415 -def _getDereference(item):
416 """ 417 Gets the dereference flag that should be used for a collect directory. 418 If possible, use the one on the directory, otherwise set a value of False. 419 @param item: C{CollectDir} object 420 @return: Ignore file to use. 421 """ 422 if item.dereference is None: 423 dereference = False 424 else: 425 dereference = item.dereference 426 logger.debug("Dereference flag is [%s]" % dereference) 427 return dereference
428 429 430 ############################ 431 # _getDigestPath() function 432 ############################ 433
434 -def _getDigestPath(config, item):
435 """ 436 Gets the digest path associated with a collect directory or file. 437 @param config: Config object. 438 @param item: C{CollectFile} or C{CollectDir} object 439 @return: Absolute path to the digest associated with the collect directory or file. 440 """ 441 normalized = buildNormalizedPath(item.absolutePath) 442 filename = "%s.%s" % (normalized, DIGEST_EXTENSION) 443 digestPath = os.path.join(config.options.workingDir, filename) 444 logger.debug("Digest path is [%s]" % digestPath) 445 return digestPath
446 447 448 ############################# 449 # _getTarfilePath() function 450 ############################# 451
452 -def _getTarfilePath(config, item, archiveMode):
453 """ 454 Gets the tarfile path (including correct extension) associated with a collect directory. 455 @param config: Config object. 456 @param item: C{CollectFile} or C{CollectDir} object 457 @param archiveMode: Archive mode to use for this tarfile. 458 @return: Absolute path to the tarfile associated with the collect directory. 459 """ 460 if archiveMode == 'tar': 461 extension = "tar" 462 elif archiveMode == 'targz': 463 extension = "tar.gz" 464 elif archiveMode == 'tarbz2': 465 extension = "tar.bz2" 466 normalized = buildNormalizedPath(item.absolutePath) 467 filename = "%s.%s" % (normalized, extension) 468 tarfilePath = os.path.join(config.collect.targetDir, filename) 469 logger.debug("Tarfile path is [%s]" % tarfilePath) 470 return tarfilePath
471 472 473 ############################ 474 # _getExclusions() function 475 ############################ 476
477 -def _getExclusions(config, collectDir):
478 """ 479 Gets exclusions (file and patterns) associated with a collect directory. 480 481 The returned files value is a list of absolute paths to be excluded from the 482 backup for a given directory. It is derived from the collect configuration 483 absolute exclude paths and the collect directory's absolute and relative 484 exclude paths. 485 486 The returned patterns value is a list of patterns to be excluded from the 487 backup for a given directory. It is derived from the list of patterns from 488 the collect configuration and from the collect directory itself. 489 490 @param config: Config object. 491 @param collectDir: Collect directory object. 492 493 @return: Tuple (files, patterns) indicating what to exclude. 494 """ 495 paths = [] 496 if config.collect.absoluteExcludePaths is not None: 497 paths.extend(config.collect.absoluteExcludePaths) 498 if collectDir.absoluteExcludePaths is not None: 499 paths.extend(collectDir.absoluteExcludePaths) 500 if collectDir.relativeExcludePaths is not None: 501 for relativePath in collectDir.relativeExcludePaths: 502 paths.append(os.path.join(collectDir.absolutePath, relativePath)) 503 patterns = [] 504 if config.collect.excludePatterns is not None: 505 patterns.extend(config.collect.excludePatterns) 506 if collectDir.excludePatterns is not None: 507 patterns.extend(collectDir.excludePatterns) 508 logger.debug("Exclude paths: %s" % paths) 509 logger.debug("Exclude patterns: %s" % patterns) 510 return(paths, patterns)
511