Package CedarBackup2 :: Package actions :: Module collect
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.actions.collect

  1  # -*- coding: iso-8859-1 -*- 
  2  # vim: set ft=python ts=3 sw=3 expandtab: 
  3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  4  # 
  5  #              C E D A R 
  6  #          S O L U T I O N S       "Software done right." 
  7  #           S O F T W A R E 
  8  # 
  9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 10  # 
 11  # Copyright (c) 2004-2007 Kenneth J. Pronovici. 
 12  # All rights reserved. 
 13  # 
 14  # This program is free software; you can redistribute it and/or 
 15  # modify it under the terms of the GNU General Public License, 
 16  # Version 2, as published by the Free Software Foundation. 
 17  # 
 18  # This program is distributed in the hope that it will be useful, 
 19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 21  # 
 22  # Copies of the GNU General Public License are available from 
 23  # the Free Software Foundation website, http://www.gnu.org/. 
 24  # 
 25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 26  # 
 27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
 28  # Language : Python (>= 2.3) 
 29  # Project  : Cedar Backup, release 2 
 30  # Revision : $Id: collect.py 1181 2007-03-25 16:18:22Z pronovic $ 
 31  # Purpose  : Implements the standard 'collect' action. 
 32  # 
 33  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 34   
 35  ######################################################################## 
 36  # Module documentation 
 37  ######################################################################## 
 38   
 39  """ 
 40  Implements the standard 'collect' action. 
 41  @sort: executeCollect 
 42  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 43  """ 
 44   
 45   
 46  ######################################################################## 
 47  # Imported modules 
 48  ######################################################################## 
 49   
 50  # System modules 
 51  import os 
 52  import logging 
 53  import pickle 
 54   
 55  # Cedar Backup modules 
 56  from CedarBackup2.filesystem import BackupFileList 
 57  from CedarBackup2.util import isStartOfWeek, changeOwnership, displayBytes, buildNormalizedPath 
 58  from CedarBackup2.actions.constants import DIGEST_EXTENSION, COLLECT_INDICATOR 
 59  from CedarBackup2.actions.util import writeIndicatorFile 
 60   
 61   
 62  ######################################################################## 
 63  # Module-wide constants and variables 
 64  ######################################################################## 
 65   
 66  logger = logging.getLogger("CedarBackup2.log.actions.collect") 
 67   
 68   
 69  ######################################################################## 
 70  # Public functions 
 71  ######################################################################## 
 72   
 73  ############################ 
 74  # executeCollect() function 
 75  ############################ 
 76   
77 -def executeCollect(configPath, options, config):
78 """ 79 Executes the collect backup action. 80 81 @note: When the collect action is complete, we will write a collect 82 indicator to the collect directory, so it's obvious that the collect action 83 has completed. The stage process uses this indicator to decide whether a 84 peer is ready to be staged. 85 86 @param configPath: Path to configuration file on disk. 87 @type configPath: String representing a path on disk. 88 89 @param options: Program command-line options. 90 @type options: Options object. 91 92 @param config: Program configuration. 93 @type config: Config object. 94 95 @raise ValueError: Under many generic error conditions 96 @raise TarError: If there is a problem creating a tar file 97 """ 98 logger.debug("Executing the 'collect' action.") 99 if config.options is None or config.collect is None: 100 raise ValueError("Collect configuration is not properly filled in.") 101 if ((config.collect.collectFiles is None or len(config.collect.collectFiles) < 1) and 102 (config.collect.collectDirs is None or len(config.collect.collectDirs) < 1)): 103 raise ValueError("There must be at least one collect file or collect directory.") 104 fullBackup = options.full 105 logger.debug("Full backup flag is [%s]" % fullBackup) 106 todayIsStart = isStartOfWeek(config.options.startingDay) 107 resetDigest = fullBackup or todayIsStart 108 logger.debug("Reset digest flag is [%s]" % resetDigest) 109 if config.collect.collectFiles is not None: 110 for collectFile in config.collect.collectFiles: 111 logger.debug("Working with collect file [%s]" % collectFile.absolutePath) 112 collectMode = _getCollectMode(config, collectFile) 113 archiveMode = _getArchiveMode(config, collectFile) 114 digestPath = _getDigestPath(config, collectFile) 115 tarfilePath = _getTarfilePath(config, collectFile, archiveMode) 116 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart): 117 logger.debug("File meets criteria to be backed up today.") 118 _collectFile(config, collectFile.absolutePath, tarfilePath, 119 collectMode, archiveMode, resetDigest, digestPath) 120 else: 121 logger.debug("File will not be backed up, per collect mode.") 122 logger.info("Completed collecting file [%s]" % collectFile.absolutePath) 123 if config.collect.collectDirs is not None: 124 for collectDir in config.collect.collectDirs: 125 logger.debug("Working with collect directory [%s]" % collectDir.absolutePath) 126 collectMode = _getCollectMode(config, collectDir) 127 archiveMode = _getArchiveMode(config, collectDir) 128 ignoreFile = _getIgnoreFile(config, collectDir) 129 digestPath = _getDigestPath(config, collectDir) 130 tarfilePath = _getTarfilePath(config, collectDir, archiveMode) 131 (excludePaths, excludePatterns) = _getExclusions(config, collectDir) 132 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart): 133 logger.debug("Directory meets criteria to be backed up today.") 134 _collectDirectory(config, collectDir.absolutePath, tarfilePath, 135 collectMode, archiveMode, ignoreFile, resetDigest, 136 digestPath, excludePaths, excludePatterns) 137 else: 138 logger.debug("Directory will not be backed up, per collect mode.") 139 logger.info("Completed collecting directory [%s]" % collectDir.absolutePath) 140 writeIndicatorFile(config.collect.targetDir, COLLECT_INDICATOR, 141 config.options.backupUser, config.options.backupGroup) 142 logger.info("Executed the 'collect' action successfully.")
143 144 145 ######################################################################## 146 # Private utility functions 147 ######################################################################## 148 149 ########################## 150 # _collectFile() function 151 ########################## 152
153 -def _collectFile(config, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
154 """ 155 Collects a configured collect file. 156 157 The indicated collect file is collected into the indicated tarfile. 158 For files that are collected incrementally, we'll use the indicated 159 digest path and pay attention to the reset digest flag (basically, the reset 160 digest flag ignores any existing digest, but a new digest is always 161 rewritten). 162 163 The caller must decide what the collect and archive modes are, since they 164 can be on both the collect configuration and the collect file itself. 165 166 @param config: Config object. 167 @param absolutePath: Absolute path of file to collect. 168 @param tarfilePath: Path to tarfile that should be created. 169 @param collectMode: Collect mode to use. 170 @param archiveMode: Archive mode to use. 171 @param resetDigest: Reset digest flag. 172 @param digestPath: Path to digest file on disk, if needed. 173 """ 174 backupList = BackupFileList() 175 backupList.addFile(absolutePath) 176 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath)
177 178 179 ############################### 180 # _collectDirectory() function 181 ############################### 182
183 -def _collectDirectory(config, absolutePath, tarfilePath, collectMode, archiveMode, 184 ignoreFile, resetDigest, digestPath, excludePaths, excludePatterns):
185 """ 186 Collects a configured collect directory. 187 188 The indicated collect directory is collected into the indicated tarfile. 189 For directories that are collected incrementally, we'll use the indicated 190 digest path and pay attention to the reset digest flag (basically, the reset 191 digest flag ignores any existing digest, but a new digest is always 192 rewritten). 193 194 The caller must decide what the collect and archive modes are, since they 195 can be on both the collect configuration and the collect directory itself. 196 197 @param config: Config object. 198 @param absolutePath: Absolute path of directory to collect. 199 @param tarfilePath: Path to tarfile that should be created. 200 @param collectMode: Collect mode to use. 201 @param archiveMode: Archive mode to use. 202 @param ignoreFile: Ignore file to use. 203 @param resetDigest: Reset digest flag. 204 @param digestPath: Path to digest file on disk, if needed. 205 @param excludePaths: List of absolute paths to exclude. 206 @param excludePatterns: List of patterns to exclude. 207 """ 208 backupList = BackupFileList() 209 backupList.ignoreFile = ignoreFile 210 backupList.excludePaths = excludePaths 211 backupList.excludePatterns = excludePatterns 212 backupList.addDirContents(absolutePath) 213 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath)
214 215 216 ############################ 217 # _executeBackup() function 218 ############################ 219
220 -def _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
221 """ 222 Execute the backup process for the indicated backup list. 223 224 This function exists mainly to consolidate functionality between the 225 L{_collectFile} and L{_collectDirectory} functions. Those functions build 226 the backup list; this function causes the backup to execute properly and 227 also manages usage of the digest file on disk as explained in their 228 comments. 229 230 For collect files, the digest file will always just contain the single file 231 that is being backed up. This might little wasteful in terms of the number 232 of files that we keep around, but it's consistent and easy to understand. 233 234 @param config: Config object. 235 @param backupList: List to execute backup for 236 @param absolutePath: Absolute path of directory or file to collect. 237 @param tarfilePath: Path to tarfile that should be created. 238 @param collectMode: Collect mode to use. 239 @param archiveMode: Archive mode to use. 240 @param resetDigest: Reset digest flag. 241 @param digestPath: Path to digest file on disk, if needed. 242 """ 243 if collectMode != 'incr': 244 logger.debug("Collect mode is [%s]; no digest will be used." % collectMode) 245 if len(backupList) == 1 and backupList[0] == absolutePath: # special case for individual file 246 logger.info("Backing up file [%s] (%s)." % (absolutePath, displayBytes(backupList.totalSize()))) 247 else: 248 logger.info("Backing up %d files in [%s] (%s)." % (len(backupList), absolutePath, displayBytes(backupList.totalSize()))) 249 if len(backupList) > 0: 250 backupList.generateTarfile(tarfilePath, archiveMode, True) 251 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup) 252 else: 253 if resetDigest: 254 logger.debug("Based on resetDigest flag, digest will be cleared.") 255 oldDigest = {} 256 else: 257 logger.debug("Based on resetDigest flag, digest will loaded from disk.") 258 oldDigest = _loadDigest(digestPath) 259 (removed, newDigest) = backupList.removeUnchanged(oldDigest, captureDigest=True) 260 logger.debug("Removed %d unchanged files based on digest values." % removed) 261 if len(backupList) == 1 and backupList[0] == absolutePath: # special case for individual file 262 logger.info("Backing up file [%s] (%s)." % (absolutePath, displayBytes(backupList.totalSize()))) 263 else: 264 logger.info("Backing up %d files in [%s] (%s)." % (len(backupList), absolutePath, displayBytes(backupList.totalSize()))) 265 if len(backupList) > 0: 266 backupList.generateTarfile(tarfilePath, archiveMode, True) 267 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup) 268 _writeDigest(config, newDigest, digestPath)
269 270 271 ######################### 272 # _loadDigest() function 273 ######################### 274
275 -def _loadDigest(digestPath):
276 """ 277 Loads the indicated digest path from disk into a dictionary. 278 279 If we can't load the digest successfully (either because it doesn't exist or 280 for some other reason), then an empty dictionary will be returned - but the 281 condition will be logged. 282 283 @param digestPath: Path to the digest file on disk. 284 285 @return: Dictionary representing contents of digest path. 286 """ 287 if not os.path.isfile(digestPath): 288 digest = {} 289 logger.debug("Digest [%s] does not exist on disk." % digestPath) 290 else: 291 try: 292 digest = pickle.load(open(digestPath, "r")) 293 logger.debug("Loaded digest [%s] from disk: %d entries." % (digestPath, len(digest))) 294 except: 295 digest = {} 296 logger.error("Failed loading digest [%s] from disk." % digestPath) 297 return digest
298 299 300 ########################## 301 # _writeDigest() function 302 ########################## 303
304 -def _writeDigest(config, digest, digestPath):
305 """ 306 Writes the digest dictionary to the indicated digest path on disk. 307 308 If we can't write the digest successfully for any reason, we'll log the 309 condition but won't throw an exception. 310 311 @param config: Config object. 312 @param digest: Digest dictionary to write to disk. 313 @param digestPath: Path to the digest file on disk. 314 """ 315 try: 316 pickle.dump(digest, open(digestPath, "w")) 317 changeOwnership(digestPath, config.options.backupUser, config.options.backupGroup) 318 logger.debug("Wrote new digest [%s] to disk: %d entries." % (digestPath, len(digest))) 319 except: 320 logger.error("Failed to write digest [%s] to disk." % digestPath)
321 322 323 ######################################################################## 324 # Private attribute "getter" functions 325 ######################################################################## 326 327 ############################ 328 # getCollectMode() function 329 ############################ 330
331 -def _getCollectMode(config, item):
332 """ 333 Gets the collect mode that should be used for a collect directory or file. 334 If possible, use the one on the file or directory, otherwise take from collect section. 335 @param config: Config object. 336 @param item: C{CollectFile} or C{CollectDir} object 337 @return: Collect mode to use. 338 """ 339 if item.collectMode is None: 340 collectMode = config.collect.collectMode 341 else: 342 collectMode = item.collectMode 343 logger.debug("Collect mode is [%s]" % collectMode) 344 return collectMode
345 346 347 ############################# 348 # _getArchiveMode() function 349 ############################# 350
351 -def _getArchiveMode(config, item):
352 """ 353 Gets the archive mode that should be used for a collect directory or file. 354 If possible, use the one on the file or directory, otherwise take from collect section. 355 @param config: Config object. 356 @param item: C{CollectFile} or C{CollectDir} object 357 @return: Archive mode to use. 358 """ 359 if item.archiveMode is None: 360 archiveMode = config.collect.archiveMode 361 else: 362 archiveMode = item.archiveMode 363 logger.debug("Archive mode is [%s]" % archiveMode) 364 return archiveMode
365 366 367 ############################ 368 # _getIgnoreFile() function 369 ############################ 370
371 -def _getIgnoreFile(config, item):
372 """ 373 Gets the ignore file that should be used for a collect directory or file. 374 If possible, use the one on the file or directory, otherwise take from collect section. 375 @param config: Config object. 376 @param item: C{CollectFile} or C{CollectDir} object 377 @return: Ignore file to use. 378 """ 379 if item.ignoreFile is None: 380 ignoreFile = config.collect.ignoreFile 381 else: 382 ignoreFile = item.ignoreFile 383 logger.debug("Ignore file is [%s]" % ignoreFile) 384 return ignoreFile
385 386 387 ############################ 388 # _getDigestPath() function 389 ############################ 390
391 -def _getDigestPath(config, item):
392 """ 393 Gets the digest path associated with a collect directory or file. 394 @param config: Config object. 395 @param item: C{CollectFile} or C{CollectDir} object 396 @return: Absolute path to the digest associated with the collect directory or file. 397 """ 398 normalized = buildNormalizedPath(item.absolutePath) 399 filename = "%s.%s" % (normalized, DIGEST_EXTENSION) 400 digestPath = os.path.join(config.options.workingDir, filename) 401 logger.debug("Digest path is [%s]" % digestPath) 402 return digestPath
403 404 405 ############################# 406 # _getTarfilePath() function 407 ############################# 408
409 -def _getTarfilePath(config, item, archiveMode):
410 """ 411 Gets the tarfile path (including correct extension) associated with a collect directory. 412 @param config: Config object. 413 @param item: C{CollectFile} or C{CollectDir} object 414 @param archiveMode: Archive mode to use for this tarfile. 415 @return: Absolute path to the tarfile associated with the collect directory. 416 """ 417 if archiveMode == 'tar': 418 extension = "tar" 419 elif archiveMode == 'targz': 420 extension = "tar.gz" 421 elif archiveMode == 'tarbz2': 422 extension = "tar.bz2" 423 normalized = buildNormalizedPath(item.absolutePath) 424 filename = "%s.%s" % (normalized, extension) 425 tarfilePath = os.path.join(config.collect.targetDir, filename) 426 logger.debug("Tarfile path is [%s]" % tarfilePath) 427 return tarfilePath
428 429 430 ############################ 431 # _getExclusions() function 432 ############################ 433
434 -def _getExclusions(config, collectDir):
435 """ 436 Gets exclusions (file and patterns) associated with a collect directory. 437 438 The returned files value is a list of absolute paths to be excluded from the 439 backup for a given directory. It is derived from the collect configuration 440 absolute exclude paths and the collect directory's absolute and relative 441 exclude paths. 442 443 The returned patterns value is a list of patterns to be excluded from the 444 backup for a given directory. It is derived from the list of patterns from 445 the collect configuration and from the collect directory itself. 446 447 @param config: Config object. 448 @param collectDir: Collect directory object. 449 450 @return: Tuple (files, patterns) indicating what to exclude. 451 """ 452 paths = [] 453 if config.collect.absoluteExcludePaths is not None: 454 paths.extend(config.collect.absoluteExcludePaths) 455 if collectDir.absoluteExcludePaths is not None: 456 paths.extend(collectDir.absoluteExcludePaths) 457 if collectDir.relativeExcludePaths is not None: 458 for relativePath in collectDir.relativeExcludePaths: 459 paths.append(os.path.join(collectDir.absolutePath, relativePath)) 460 patterns = [] 461 if config.collect.excludePatterns is not None: 462 patterns.extend(config.collect.excludePatterns) 463 if collectDir.excludePatterns is not None: 464 patterns.extend(collectDir.excludePatterns) 465 logger.debug("Exclude paths: %s" % paths) 466 logger.debug("Exclude patterns: %s" % patterns) 467 return(paths, patterns)
468