Package CedarBackup2 :: Module util
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.util

   1  # -*- coding: iso-8859-1 -*- 
   2  # vim: set ft=python ts=3 sw=3 expandtab: 
   3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   4  # 
   5  #              C E D A R 
   6  #          S O L U T I O N S       "Software done right." 
   7  #           S O F T W A R E 
   8  # 
   9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  10  # 
  11  # Copyright (c) 2004-2007 Kenneth J. Pronovici. 
  12  # All rights reserved. 
  13  # 
  14  # Portions copyright (c) 2001, 2002 Python Software Foundation. 
  15  # All Rights Reserved. 
  16  # 
  17  # This program is free software; you can redistribute it and/or 
  18  # modify it under the terms of the GNU General Public License, 
  19  # Version 2, as published by the Free Software Foundation. 
  20  # 
  21  # This program is distributed in the hope that it will be useful, 
  22  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  23  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
  24  # 
  25  # Copies of the GNU General Public License are available from 
  26  # the Free Software Foundation website, http://www.gnu.org/. 
  27  # 
  28  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  29  # 
  30  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
  31  # Language : Python (>= 2.3) 
  32  # Project  : Cedar Backup, release 2 
  33  # Revision : $Id: util.py 1232 2007-09-20 03:07:07Z pronovic $ 
  34  # Purpose  : Provides general-purpose utilities. 
  35  # 
  36  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  37   
  38  ######################################################################## 
  39  # Module documentation 
  40  ######################################################################## 
  41   
  42  """ 
  43  Provides general-purpose utilities.  
  44   
  45  @sort: AbsolutePathList, ObjectTypeList, RestrictedValueList, RegexMatchList, 
  46         RegexList, _Vertex, DirectedGraph, PathResolverSingleton,  
  47         sortDict, convertSize, getUidGid, changeOwnership, splitCommandLine, 
  48         resolveCommand, executeCommand, calculateFileAge, encodePath, nullDevice, 
  49         deriveDayOfWeek, isStartOfWeek, buildNormalizedPath,  
  50         ISO_SECTOR_SIZE, BYTES_PER_SECTOR,  
  51         BYTES_PER_KBYTE, BYTES_PER_MBYTE, BYTES_PER_GBYTE, KBYTES_PER_MBYTE, MBYTES_PER_GBYTE,  
  52         SECONDS_PER_MINUTE, MINUTES_PER_HOUR, HOURS_PER_DAY, SECONDS_PER_DAY,  
  53         UNIT_BYTES, UNIT_KBYTES, UNIT_MBYTES, UNIT_GBYTES, UNIT_SECTORS 
  54   
  55  @var ISO_SECTOR_SIZE: Size of an ISO image sector, in bytes. 
  56  @var BYTES_PER_SECTOR: Number of bytes (B) per ISO sector. 
  57  @var BYTES_PER_KBYTE: Number of bytes (B) per kilobyte (kB). 
  58  @var BYTES_PER_MBYTE: Number of bytes (B) per megabyte (MB). 
  59  @var BYTES_PER_GBYTE: Number of bytes (B) per megabyte (GB). 
  60  @var KBYTES_PER_MBYTE: Number of kilobytes (kB) per megabyte (MB). 
  61  @var MBYTES_PER_GBYTE: Number of megabytes (MB) per gigabyte (GB). 
  62  @var SECONDS_PER_MINUTE: Number of seconds per minute. 
  63  @var MINUTES_PER_HOUR: Number of minutes per hour. 
  64  @var HOURS_PER_DAY: Number of hours per day. 
  65  @var SECONDS_PER_DAY: Number of seconds per day. 
  66  @var UNIT_BYTES: Constant representing the byte (B) unit for conversion. 
  67  @var UNIT_KBYTES: Constant representing the kilobyte (kB) unit for conversion. 
  68  @var UNIT_MBYTES: Constant representing the megabyte (MB) unit for conversion. 
  69  @var UNIT_GBYTES: Constant representing the gigabyte (GB) unit for conversion. 
  70  @var UNIT_SECTORS: Constant representing the ISO sector unit for conversion. 
  71   
  72  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  73  """ 
  74   
  75   
  76  ######################################################################## 
  77  # Imported modules 
  78  ######################################################################## 
  79   
  80  import sys 
  81  import math 
  82  import os 
  83  import re 
  84  import time 
  85  import logging 
  86  import string 
  87   
  88  try: 
  89     import pwd 
  90     import grp 
  91     _UID_GID_AVAILABLE = True    
  92  except ImportError: 
  93     _UID_GID_AVAILABLE = False    
  94   
  95  try: 
  96     from subprocess import Popen 
  97     _PIPE_IMPLEMENTATION = "subprocess.Popen" 
  98  except ImportError: 
  99     try: 
 100        from popen2 import Popen4 
 101        _PIPE_IMPLEMENTATION = "popen2.Popen4" 
 102     except ImportError: 
 103        raise ImportError("Unable to import either subprocess.Popen or popen2.Popen4 for use by Pipe class.") 
 104   
 105   
 106  ######################################################################## 
 107  # Module-wide constants and variables 
 108  ######################################################################## 
 109   
 110  logger = logging.getLogger("CedarBackup2.log.util") 
 111  outputLogger = logging.getLogger("CedarBackup2.output") 
 112   
 113  ISO_SECTOR_SIZE    = 2048.0   # in bytes 
 114  BYTES_PER_SECTOR   = ISO_SECTOR_SIZE 
 115   
 116  BYTES_PER_KBYTE    = 1024.0 
 117  KBYTES_PER_MBYTE   = 1024.0 
 118  MBYTES_PER_GBYTE   = 1024.0 
 119  BYTES_PER_MBYTE    = BYTES_PER_KBYTE * KBYTES_PER_MBYTE 
 120  BYTES_PER_GBYTE    = BYTES_PER_MBYTE * MBYTES_PER_GBYTE 
 121   
 122  SECONDS_PER_MINUTE = 60 
 123  MINUTES_PER_HOUR   = 60 
 124  HOURS_PER_DAY      = 24 
 125  SECONDS_PER_DAY    = SECONDS_PER_MINUTE * MINUTES_PER_HOUR * HOURS_PER_DAY 
 126   
 127  UNIT_BYTES         = 0 
 128  UNIT_KBYTES        = 1 
 129  UNIT_MBYTES        = 2 
 130  UNIT_GBYTES        = 4 
 131  UNIT_SECTORS       = 3 
 132   
 133  MTAB_FILE          = "/etc/mtab" 
 134   
 135  MOUNT_COMMAND      = [ "mount", ] 
 136  UMOUNT_COMMAND     = [ "umount", ] 
 137   
 138  DEFAULT_LANGUAGE   = "C" 
 139  LANG_VAR           = "LANG" 
 140  LOCALE_VARS        = [ "LC_ADDRESS", "LC_ALL", "LC_COLLATE", 
 141                         "LC_CTYPE", "LC_IDENTIFICATION",  
 142                         "LC_MEASUREMENT", "LC_MESSAGES",  
 143                         "LC_MONETARY", "LC_NAME", "LC_NUMERIC", 
 144                         "LC_PAPER", "LC_TELEPHONE", "LC_TIME", ] 
 145   
 146   
 147  ######################################################################## 
 148  # UnorderedList class definition 
 149  ######################################################################## 
 150   
151 -class UnorderedList(list):
152 153 """ 154 Class representing an "unordered list". 155 156 An "unordered list" is a list in which only the contents matter, not the 157 order in which the contents appear in the list. 158 159 For instance, we might be keeping track of set of paths in a list, because 160 it's convenient to have them in that form. However, for comparison 161 purposes, we would only care that the lists contain exactly the same 162 contents, regardless of order. 163 164 I have come up with two reasonable ways of doing this, plus a couple more 165 that would work but would be a pain to implement. My first method is to 166 copy and sort each list, comparing the sorted versions. This will only work 167 if two lists with exactly the same members are guaranteed to sort in exactly 168 the same order. The second way would be to create two Sets and then compare 169 the sets. However, this would lose information about any duplicates in 170 either list. I've decided to go with option #1 for now. I'll modify this 171 code if I run into problems in the future. 172 173 We override the original C{__eq__}, C{__ne__}, C{__ge__}, C{__gt__}, 174 C{__le__} and C{__lt__} list methods to change the definition of the various 175 comparison operators. In all cases, the comparison is changed to return the 176 result of the original operation I{but instead comparing sorted lists}. 177 This is going to be quite a bit slower than a normal list, so you probably 178 only want to use it on small lists. 179 """ 180
181 - def __eq__(self, other):
182 """ 183 Definition of C{==} operator for this class. 184 @param other: Other object to compare to. 185 @return: True/false depending on whether C{self == other}. 186 """ 187 if other is None: 188 return False 189 selfSorted = self[:] 190 otherSorted = other[:] 191 selfSorted.sort() 192 otherSorted.sort() 193 return selfSorted.__eq__(otherSorted)
194
195 - def __ne__(self, other):
196 """ 197 Definition of C{!=} operator for this class. 198 @param other: Other object to compare to. 199 @return: True/false depending on whether C{self != other}. 200 """ 201 if other is None: 202 return True 203 selfSorted = self[:] 204 otherSorted = other[:] 205 selfSorted.sort() 206 otherSorted.sort() 207 return selfSorted.__ne__(otherSorted)
208
209 - def __ge__(self, other):
210 """ 211 Definition of S{>=} operator for this class. 212 @param other: Other object to compare to. 213 @return: True/false depending on whether C{self >= other}. 214 """ 215 if other is None: 216 return True 217 selfSorted = self[:] 218 otherSorted = other[:] 219 selfSorted.sort() 220 otherSorted.sort() 221 return selfSorted.__ge__(otherSorted)
222
223 - def __gt__(self, other):
224 """ 225 Definition of C{>} operator for this class. 226 @param other: Other object to compare to. 227 @return: True/false depending on whether C{self > other}. 228 """ 229 if other is None: 230 return True 231 selfSorted = self[:] 232 otherSorted = other[:] 233 selfSorted.sort() 234 otherSorted.sort() 235 return selfSorted.__gt__(otherSorted)
236
237 - def __le__(self, other):
238 """ 239 Definition of S{<=} operator for this class. 240 @param other: Other object to compare to. 241 @return: True/false depending on whether C{self <= other}. 242 """ 243 if other is None: 244 return False 245 selfSorted = self[:] 246 otherSorted = other[:] 247 selfSorted.sort() 248 otherSorted.sort() 249 return selfSorted.__le__(otherSorted)
250
251 - def __lt__(self, other):
252 """ 253 Definition of C{<} operator for this class. 254 @param other: Other object to compare to. 255 @return: True/false depending on whether C{self < other}. 256 """ 257 if other is None: 258 return False 259 selfSorted = self[:] 260 otherSorted = other[:] 261 selfSorted.sort() 262 otherSorted.sort() 263 return selfSorted.__lt__(otherSorted)
264 265 266 ######################################################################## 267 # AbsolutePathList class definition 268 ######################################################################## 269
270 -class AbsolutePathList(UnorderedList):
271 272 """ 273 Class representing a list of absolute paths. 274 275 This is an unordered list. 276 277 We override the C{append}, C{insert} and C{extend} methods to ensure that 278 any item added to the list is an absolute path. 279 280 Each item added to the list is encoded using L{encodePath}. If we don't do 281 this, we have problems trying certain operations between strings and unicode 282 objects, particularly for "odd" filenames that can't be encoded in standard 283 ASCII. 284 """ 285
286 - def append(self, item):
287 """ 288 Overrides the standard C{append} method. 289 @raise ValueError: If item is not an absolute path. 290 """ 291 if not os.path.isabs(item): 292 raise ValueError("Not an absolute path: [%s]" % item) 293 list.append(self, encodePath(item))
294
295 - def insert(self, index, item):
296 """ 297 Overrides the standard C{insert} method. 298 @raise ValueError: If item is not an absolute path. 299 """ 300 if not os.path.isabs(item): 301 raise ValueError("Not an absolute path: [%s]" % item) 302 list.insert(self, index, encodePath(item))
303
304 - def extend(self, seq):
305 """ 306 Overrides the standard C{insert} method. 307 @raise ValueError: If any item is not an absolute path. 308 """ 309 for item in seq: 310 if not os.path.isabs(item): 311 raise ValueError("Not an absolute path: [%s]" % item) 312 for item in seq: 313 list.append(self, encodePath(item))
314 315 316 ######################################################################## 317 # ObjectTypeList class definition 318 ######################################################################## 319
320 -class ObjectTypeList(UnorderedList):
321 322 """ 323 Class representing a list containing only objects with a certain type. 324 325 This is an unordered list. 326 327 We override the C{append}, C{insert} and C{extend} methods to ensure that 328 any item added to the list matches the type that is requested. The 329 comparison uses the built-in C{isinstance}, which should allow subclasses of 330 of the requested type to be added to the list as well. 331 332 The C{objectName} value will be used in exceptions, i.e. C{"Item must be a 333 CollectDir object."} if C{objectName} is C{"CollectDir"}. 334 """ 335
336 - def __init__(self, objectType, objectName):
337 """ 338 Initializes a typed list for a particular type. 339 @param objectType: Type that the list elements must match. 340 @param objectName: Short string containing the "name" of the type. 341 """ 342 self.objectType = objectType 343 self.objectName = objectName
344
345 - def append(self, item):
346 """ 347 Overrides the standard C{append} method. 348 @raise ValueError: If item does not match requested type. 349 """ 350 if not isinstance(item, self.objectType): 351 raise ValueError("Item must be a %s object." % self.objectName) 352 list.append(self, item)
353
354 - def insert(self, index, item):
355 """ 356 Overrides the standard C{insert} method. 357 @raise ValueError: If item does not match requested type. 358 """ 359 if not isinstance(item, self.objectType): 360 raise ValueError("Item must be a %s object." % self.objectName) 361 list.insert(self, index, item)
362
363 - def extend(self, seq):
364 """ 365 Overrides the standard C{insert} method. 366 @raise ValueError: If item does not match requested type. 367 """ 368 for item in seq: 369 if not isinstance(item, self.objectType): 370 raise ValueError("All items must be %s objects." % self.objectName) 371 list.extend(self, seq)
372 373 374 ######################################################################## 375 # RestrictedContentList class definition 376 ######################################################################## 377
378 -class RestrictedContentList(UnorderedList):
379 380 """ 381 Class representing a list containing only object with certain values. 382 383 This is an unordered list. 384 385 We override the C{append}, C{insert} and C{extend} methods to ensure that 386 any item added to the list is among the valid values. We use a standard 387 comparison, so pretty much anything can be in the list of valid values. 388 389 The C{valuesDescr} value will be used in exceptions, i.e. C{"Item must be 390 one of values in VALID_ACTIONS"} if C{valuesDescr} is C{"VALID_ACTIONS"}. 391 392 @note: This class doesn't make any attempt to trap for nonsensical 393 arguments. All of the values in the values list should be of the same type 394 (i.e. strings). Then, all list operations also need to be of that type 395 (i.e. you should always insert or append just strings). If you mix types -- 396 for instance lists and strings -- you will likely see AttributeError 397 exceptions or other problems. 398 """ 399
400 - def __init__(self, valuesList, valuesDescr, prefix=None):
401 """ 402 Initializes a list restricted to containing certain values. 403 @param valuesList: List of valid values. 404 @param valuesDescr: Short string describing list of values. 405 @param prefix: Prefix to use in error messages (None results in prefix "Item") 406 """ 407 self.prefix = "Item" 408 if prefix is not None: self.prefix = prefix 409 self.valuesList = valuesList 410 self.valuesDescr = valuesDescr
411
412 - def append(self, item):
413 """ 414 Overrides the standard C{append} method. 415 @raise ValueError: If item is not in the values list. 416 """ 417 if item not in self.valuesList: 418 raise ValueError("%s must be one of the values in %s." % (self.prefix, self.valuesDescr)) 419 list.append(self, item)
420
421 - def insert(self, index, item):
422 """ 423 Overrides the standard C{insert} method. 424 @raise ValueError: If item is not in the values list. 425 """ 426 if item not in self.valuesList: 427 raise ValueError("%s must be one of the values in %s." % (self.prefix, self.valuesDescr)) 428 list.insert(self, index, item)
429
430 - def extend(self, seq):
431 """ 432 Overrides the standard C{insert} method. 433 @raise ValueError: If item is not in the values list. 434 """ 435 for item in seq: 436 if item not in self.valuesList: 437 raise ValueError("%s must be one of the values in %s." % (self.prefix, self.valuesDescr)) 438 list.extend(self, seq)
439 440 441 ######################################################################## 442 # RegexMatchList class definition 443 ######################################################################## 444
445 -class RegexMatchList(UnorderedList):
446 447 """ 448 Class representing a list containing only strings that match a regular expression. 449 450 If C{emptyAllowed} is passed in as C{False}, then empty strings are 451 explicitly disallowed, even if they happen to match the regular expression. 452 (C{None} values are always disallowed, since string operations are not 453 permitted on C{None}.) 454 455 This is an unordered list. 456 457 We override the C{append}, C{insert} and C{extend} methods to ensure that 458 any item added to the list matches the indicated regular expression. 459 460 @note: If you try to put values that are not strings into the list, you will 461 likely get either TypeError or AttributeError exceptions as a result. 462 """ 463
464 - def __init__(self, valuesRegex, emptyAllowed=True, prefix=None):
465 """ 466 Initializes a list restricted to containing certain values. 467 @param valuesRegex: Regular expression that must be matched, as a string 468 @param emptyAllowed: Indicates whether empty or None values are allowed. 469 @param prefix: Prefix to use in error messages (None results in prefix "Item") 470 """ 471 self.prefix = "Item" 472 if prefix is not None: self.prefix = prefix 473 self.valuesRegex = valuesRegex 474 self.emptyAllowed = emptyAllowed 475 self.pattern = re.compile(self.valuesRegex)
476
477 - def append(self, item):
478 """ 479 Overrides the standard C{append} method. 480 @raise ValueError: If item is None 481 @raise ValueError: If item is empty and empty values are not allowed 482 @raise ValueError: If item does not match the configured regular expression 483 """ 484 if item is None or (not self.emptyAllowed and item == ""): 485 raise ValueError("%s cannot be empty." % self.prefix) 486 if not self.pattern.search(item): 487 raise ValueError("%s is not valid: [%s]" % (self.prefix, item)) 488 list.append(self, item)
489
490 - def insert(self, index, item):
491 """ 492 Overrides the standard C{insert} method. 493 @raise ValueError: If item is None 494 @raise ValueError: If item is empty and empty values are not allowed 495 @raise ValueError: If item does not match the configured regular expression 496 """ 497 if item is None or (not self.emptyAllowed and item == ""): 498 raise ValueError("%s cannot be empty." % self.prefix) 499 if not self.pattern.search(item): 500 raise ValueError("%s is not valid [%s]" % (self.prefix, item)) 501 list.insert(self, index, item)
502
503 - def extend(self, seq):
504 """ 505 Overrides the standard C{insert} method. 506 @raise ValueError: If any item is None 507 @raise ValueError: If any item is empty and empty values are not allowed 508 @raise ValueError: If any item does not match the configured regular expression 509 """ 510 for item in seq: 511 if item is None or (not self.emptyAllowed and item == ""): 512 raise ValueError("%s cannot be empty.", self.prefix) 513 if not self.pattern.search(item): 514 raise ValueError("%s is not valid: [%s]" % (self.prefix, item)) 515 list.extend(self, seq)
516 517 518 ######################################################################## 519 # RegexList class definition 520 ######################################################################## 521
522 -class RegexList(UnorderedList):
523 524 """ 525 Class representing a list of valid regular expression strings. 526 527 This is an unordered list. 528 529 We override the C{append}, C{insert} and C{extend} methods to ensure that 530 any item added to the list is a valid regular expression. 531 """ 532
533 - def append(self, item):
534 """ 535 Overrides the standard C{append} method. 536 @raise ValueError: If item is not an absolute path. 537 """ 538 try: 539 re.compile(item) 540 except re.error: 541 raise ValueError("Not a valid regular expression: [%s]" % item) 542 list.append(self, item)
543
544 - def insert(self, index, item):
545 """ 546 Overrides the standard C{insert} method. 547 @raise ValueError: If item is not an absolute path. 548 """ 549 try: 550 re.compile(item) 551 except re.error: 552 raise ValueError("Not a valid regular expression: [%s]" % item) 553 list.insert(self, index, item)
554
555 - def extend(self, seq):
556 """ 557 Overrides the standard C{insert} method. 558 @raise ValueError: If any item is not an absolute path. 559 """ 560 for item in seq: 561 try: 562 re.compile(item) 563 except re.error: 564 raise ValueError("Not a valid regular expression: [%s]" % item) 565 for item in seq: 566 list.append(self, item)
567 568 569 ######################################################################## 570 # Directed graph implementation 571 ######################################################################## 572
573 -class _Vertex(object):
574 575 """ 576 Represents a vertex (or node) in a directed graph. 577 """ 578
579 - def __init__(self, name):
580 """ 581 Constructor. 582 @param name: Name of this graph vertex. 583 @type name: String value. 584 """ 585 self.name = name 586 self.endpoints = [] 587 self.state = None
588
589 -class DirectedGraph(object):
590 591 """ 592 Represents a directed graph. 593 594 A graph B{G=(V,E)} consists of a set of vertices B{V} together with a set 595 B{E} of vertex pairs or edges. In a directed graph, each edge also has an 596 associated direction (from vertext B{v1} to vertex B{v2}). A C{DirectedGraph} 597 object provides a way to construct a directed graph and execute a depth- 598 first search. 599 600 This data structure was designed based on the graphing chapter in 601 U{The Algorithm Design Manual<http://www2.toki.or.id/book/AlgDesignManual/>}, 602 by Steven S. Skiena. 603 604 This class is intended to be used by Cedar Backup for dependency ordering. 605 Because of this, it's not quite general-purpose. Unlike a "general" graph, 606 every vertex in this graph has at least one edge pointing to it, from a 607 special "start" vertex. This is so no vertices get "lost" either because 608 they have no dependencies or because nothing depends on them. 609 """ 610 611 _UNDISCOVERED = 0 612 _DISCOVERED = 1 613 _EXPLORED = 2 614
615 - def __init__(self, name):
616 """ 617 Directed graph constructor. 618 619 @param name: Name of this graph. 620 @type name: String value. 621 """ 622 if name is None or name == "": 623 raise ValueError("Graph name must be non-empty.") 624 self._name = name 625 self._vertices = {} 626 self._startVertex = _Vertex(None) # start vertex is only vertex with no name
627
628 - def __repr__(self):
629 """ 630 Official string representation for class instance. 631 """ 632 return "DirectedGraph(%s)" % self.name
633
634 - def __str__(self):
635 """ 636 Informal string representation for class instance. 637 """ 638 return self.__repr__()
639
640 - def __cmp__(self, other):
641 """ 642 Definition of equals operator for this class. 643 @param other: Other object to compare to. 644 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 645 """ 646 if other is None: 647 return 1 648 if self._name != other._name: 649 if self._name < other._name: 650 return -1 651 else: 652 return 1 653 if self._vertices != other._vertices: 654 if self._vertices < other._vertices: 655 return -1 656 else: 657 return 1 658 return 0
659
660 - def _getName(self):
661 """ 662 Property target used to get the graph name. 663 """ 664 return self._name
665 666 name = property(_getName, None, None, "Name of the graph.") 667
668 - def createVertex(self, name):
669 """ 670 Creates a named vertex. 671 @param name: vertex name 672 @raise ValueError: If the vertex name is C{None} or empty. 673 """ 674 if name is None or name == "": 675 raise ValueError("Vertex name must be non-empty.") 676 vertex = _Vertex(name) 677 self._startVertex.endpoints.append(vertex) # so every vertex is connected at least once 678 self._vertices[name] = vertex
679
680 - def createEdge(self, start, finish):
681 """ 682 Adds an edge with an associated direction, from C{start} vertex to C{finish} vertex. 683 @param start: Name of start vertex. 684 @param finish: Name of finish vertex. 685 @raise ValueError: If one of the named vertices is unknown. 686 """ 687 try: 688 startVertex = self._vertices[start] 689 finishVertex = self._vertices[finish] 690 startVertex.endpoints.append(finishVertex) 691 except KeyError, e: 692 raise ValueError("Vertex [%s] could not be found." % e)
693
694 - def topologicalSort(self):
695 """ 696 Implements a topological sort of the graph. 697 698 This method also enforces that the graph is a directed acyclic graph, 699 which is a requirement of a topological sort. 700 701 A directed acyclic graph (or "DAG") is a directed graph with no directed 702 cycles. A topological sort of a DAG is an ordering on the vertices such 703 that all edges go from left to right. Only an acyclic graph can have a 704 topological sort, but any DAG has at least one topological sort. 705 706 Since a topological sort only makes sense for an acyclic graph, this 707 method throws an exception if a cycle is found. 708 709 A depth-first search only makes sense if the graph is acyclic. If the 710 graph contains any cycles, it is not possible to determine a consistent 711 ordering for the vertices. 712 713 @note: If a particular vertex has no edges, then its position in the 714 final list depends on the order in which the vertices were created in the 715 graph. If you're using this method to determine a dependency order, this 716 makes sense: a vertex with no dependencies can go anywhere (and will). 717 718 @return: Ordering on the vertices so that all edges go from left to right. 719 720 @raise ValueError: If a cycle is found in the graph. 721 """ 722 ordering = [] 723 for key in self._vertices: 724 vertex = self._vertices[key] 725 vertex.state = self._UNDISCOVERED 726 for key in self._vertices: 727 vertex = self._vertices[key] 728 if vertex.state == self._UNDISCOVERED: 729 self._topologicalSort(self._startVertex, ordering) 730 return ordering
731
732 - def _topologicalSort(self, vertex, ordering):
733 """ 734 Recursive depth first search function implementing topological sort. 735 @param vertex: Vertex to search 736 @param ordering: List of vertices in proper order 737 """ 738 vertex.state = self._DISCOVERED 739 for endpoint in vertex.endpoints: 740 if endpoint.state == self._UNDISCOVERED: 741 self._topologicalSort(endpoint, ordering) 742 elif endpoint.state != self._EXPLORED: 743 raise ValueError("Cycle found in graph (found '%s' while searching '%s')." % (vertex.name, endpoint.name)) 744 if vertex.name is not None: 745 ordering.insert(0, vertex.name) 746 vertex.state = self._EXPLORED
747 748 749 ######################################################################## 750 # PathResolverSingleton class defkinition 751 ######################################################################## 752
753 -class PathResolverSingleton(object):
754 755 """ 756 Singleton used for resolving executable paths. 757 758 Various functions throughout Cedar Backup (including extensions) need a way 759 to resolve the path of executables that they use. For instance, the image 760 functionality needs to find the C{mkisofs} executable, and the Subversion 761 extension needs to find the C{svnlook} executable. Cedar Backup's original 762 behavior was to assume that the simple name (C{"svnlook"} or whatever) was 763 available on the caller's C{$PATH}, and to fail otherwise. However, this 764 turns out to be less than ideal, since for instance the root user might not 765 always have executables like C{svnlook} in its path. 766 767 One solution is to specify a path (either via an absolute path or some sort 768 of path insertion or path appending mechanism) that would apply to the 769 C{executeCommand()} function. This is not difficult to implement, but it 770 seem like kind of a "big hammer" solution. Besides that, it might also 771 represent a security flaw (for instance, I prefer not to mess with root's 772 C{$PATH} on the application level if I don't have to). 773 774 The alternative is to set up some sort of configuration for the path to 775 certain executables, i.e. "find C{svnlook} in C{/usr/local/bin/svnlook}" or 776 whatever. This PathResolverSingleton aims to provide a good solution to the 777 mapping problem. Callers of all sorts (extensions or not) can get an 778 instance of the singleton. Then, they call the C{lookup} method to try and 779 resolve the executable they are looking for. Through the C{lookup} method, 780 the caller can also specify a default to use if a mapping is not found. 781 This way, with no real effort on the part of the caller, behavior can neatly 782 degrade to something equivalent to the current behavior if there is no 783 special mapping or if the singleton was never initialized in the first 784 place. 785 786 Even better, extensions automagically get access to the same resolver 787 functionality, and they don't even need to understand how the mapping 788 happens. All extension authors need to do is document what executables 789 their code requires, and the standard resolver configuration section will 790 meet their needs. 791 792 The class should be initialized once through the constructor somewhere in 793 the main routine. Then, the main routine should call the L{fill} method to 794 fill in the resolver's internal structures. Everyone else who needs to 795 resolve a path will get an instance of the class using L{getInstance} and 796 will then just call the L{lookup} method. 797 798 @cvar _instance: Holds a reference to the singleton 799 @ivar _mapping: Internal mapping from resource name to path. 800 """ 801 802 _instance = None # Holds a reference to singleton instance 803
804 - class _Helper:
805 """Helper class to provide a singleton factory method."""
806 - def __call__(self, *args, **kw):
811 812 getInstance = _Helper() # Method that callers will use to get an instance 813
814 - def __init__(self):
815 """Singleton constructor, which just creates the singleton instance.""" 816 if PathResolverSingleton._instance is not None: 817 raise RuntimeError("Only one instance of PathResolverSingleton is allowed!") 818 PathResolverSingleton._instance = self 819 self._mapping = { }
820
821 - def lookup(self, name, default=None):
822 """ 823 Looks up name and returns the resolved path associated with the name. 824 @param name: Name of the path resource to resolve. 825 @param default: Default to return if resource cannot be resolved. 826 @return: Resolved path associated with name, or default if name can't be resolved. 827 """ 828 value = default 829 if name in self._mapping.keys(): 830 value = self._mapping[name] 831 logger.debug("Resolved command [%s] to [%s]." % (name, value)) 832 return value
833
834 - def fill(self, mapping):
835 """ 836 Fills in the singleton's internal mapping from name to resource. 837 @param mapping: Mapping from resource name to path. 838 @type mapping: Dictionary mapping name to path, both as strings. 839 """ 840 self._mapping = { } 841 for key in mapping.keys(): 842 self._mapping[key] = mapping[key]
843 844 845 ######################################################################## 846 # Pipe class definition 847 ######################################################################## 848 849 if _PIPE_IMPLEMENTATION == "subprocess.Popen": 850 851 from subprocess import STDOUT, PIPE 852
853 - class Pipe(Popen):
854 """ 855 Specialized pipe class for use by C{executeCommand}. 856 857 The L{executeCommand} function needs a specialized way of interacting 858 with a pipe. First, C{executeCommand} only reads from the pipe, and 859 never writes to it. Second, C{executeCommand} needs a way to discard all 860 output written to C{stderr}, as a means of simulating the shell 861 C{2>/dev/null} construct. 862 863 All of this functionality is provided (in Python 2.4 or later) by the 864 C{subprocess.Popen} class, so when that class is available, we'll use it. 865 Otherwise, there's another implementation based on C{popen2.Popen4}, 866 which unfortunately only works on UNIX platforms. 867 """
868 - def __init__(self, cmd, bufsize=-1, ignoreStderr=False):
869 stderr = STDOUT 870 if ignoreStderr: 871 devnull = nullDevice() 872 stderr = os.open(devnull, os.O_RDWR) 873 Popen.__init__(self, shell=False, args=cmd, bufsize=bufsize, stdin=None, stdout=PIPE, stderr=stderr) 874 self.fromchild = self.stdout # for compatibility with original interface based on popen2.Popen4
875 876 else: # _PIPE_IMPLEMENTATION == "popen2.Popen4" 877 878 from popen2 import _cleanup, _active 879
880 - class Pipe(Popen4):
881 """ 882 Specialized pipe class for use by C{executeCommand}. 883 884 The L{executeCommand} function needs a specialized way of interacting with a 885 pipe that isn't satisfied by the standard C{Popen3} and C{Popen4} classes in 886 C{popen2}. First, C{executeCommand} only reads from the pipe, and never 887 writes to it. Second, C{executeCommand} needs a way to discard all output 888 written to C{stderr}, as a means of simulating the shell C{2>/dev/null} 889 construct. 890 891 This class inherits from C{Popen4}. If the C{ignoreStderr} flag is passed in 892 as C{False}, then the standard C{Popen4} constructor will be called and 893 C{stdout} and C{stderr} will be intermingled in the output. 894 895 Otherwise, we'll call a custom version of the constructor which was 896 basically stolen from the real constructor in C{python2.3/Lib/popen2.py}. 897 This custom constructor will redirect the C{stderr} file descriptor to 898 C{/dev/null}. I've done this based on a suggestion from Donn Cave on 899 comp.lang.python. 900 901 In either case, the C{tochild} file object is always closed before returning 902 from the constructor, since it is never needed by C{executeCommand}. 903 904 I really wish there were a prettier way to do this. Unfortunately, I 905 need access to the guts of the constructor implementation because of the 906 way the pipe process is forked, etc. It doesn't work to just call the 907 superclass constructor and then modify a few things afterwards. Even 908 worse, I have to access private C{popen2} module members C{_cleanup} and 909 C{_active} in order to duplicate the implementation. 910 911 Hopefully this whole thing will continue to work properly. At least we 912 can use the other L{subprocess.Popen}-based implementation when that 913 class is available. 914 915 @copyright: Some of this code, prior to customization, was originally part 916 of the Python 2.3 codebase. Python code is copyright (c) 2001, 2002 Python 917 Software Foundation; All Rights Reserved. 918 """ 919
920 - def __init__(self, cmd, bufsize=-1, ignoreStderr=False):
921 if not ignoreStderr: 922 Popen4.__init__(self, cmd, bufsize) 923 else: 924 _cleanup() 925 p2cread, p2cwrite = os.pipe() 926 c2pread, c2pwrite = os.pipe() 927 self.pid = os.fork() 928 if self.pid == 0: # Child 929 os.dup2(p2cread, 0) 930 os.dup2(c2pwrite, 1) 931 devnull = nullDevice() 932 null = os.open(devnull, os.O_RDWR) 933 os.dup2(null, 2) 934 os.close(null) 935 self._run_child(cmd) 936 os.close(p2cread) 937 self.tochild = os.fdopen(p2cwrite, 'w', bufsize) 938 os.close(c2pwrite) 939 self.fromchild = os.fdopen(c2pread, 'r', bufsize) 940 _active.append(self) 941 self.tochild.close() # we'll never write to it, and this way we don't confuse anything.
942 943 944 ######################################################################## 945 # General utility functions 946 ######################################################################## 947 948 ###################### 949 # sortDict() function 950 ###################### 951
952 -def sortDict(d):
953 """ 954 Returns the keys of the dictionary sorted by value. 955 There are cuter ways to do this in Python 2.4, but we're compatible with 2.3. 956 @param d: Dictionary to operate on 957 @return: List of dictionary keys sorted in order by dictionary value. 958 """ 959 items = d.items() 960 items.sort(lambda x, y: cmp(x[1], y[1])) 961 return [key for key, value in items]
962 963 964 ######################## 965 # removeKeys() function 966 ######################## 967
968 -def removeKeys(d, keys):
969 """ 970 Removes all of the keys from the dictionary. 971 The dictionary is altered in-place. 972 Each key must exist in the dictionary. 973 @param d: Dictionary to operate on 974 @param keys: List of keys to remove 975 @raise KeyError: If one of the keys does not exist 976 """ 977 for key in keys: 978 del d[key]
979 980 981 ######################### 982 # convertSize() function 983 ######################### 984
985 -def convertSize(size, fromUnit, toUnit):
986 """ 987 Converts a size in one unit to a size in another unit. 988 989 This is just a convenience function so that the functionality can be 990 implemented in just one place. Internally, we convert values to bytes and 991 then to the final unit. 992 993 The available units are: 994 995 - C{UNIT_BYTES} - Bytes 996 - C{UNIT_KBYTES} - Kilobytes, where 1 kB = 1024 B 997 - C{UNIT_MBYTES} - Megabytes, where 1 MB = 1024 kB 998 - C{UNIT_GBYTES} - Gigabytes, where 1 GB = 1024 MB 999 - C{UNIT_SECTORS} - Sectors, where 1 sector = 2048 B 1000 1001 @param size: Size to convert 1002 @type size: Integer or float value in units of C{fromUnit} 1003 1004 @param fromUnit: Unit to convert from 1005 @type fromUnit: One of the units listed above 1006 1007 @param toUnit: Unit to convert to 1008 @type toUnit: One of the units listed above 1009 1010 @return: Number converted to new unit, as a float. 1011 @raise ValueError: If one of the units is invalid. 1012 """ 1013 if size is None: 1014 raise ValueError("Cannot convert size of None.") 1015 if fromUnit == UNIT_BYTES: 1016 byteSize = float(size) 1017 elif fromUnit == UNIT_KBYTES: 1018 byteSize = float(size) * BYTES_PER_KBYTE 1019 elif fromUnit == UNIT_MBYTES: 1020 byteSize = float(size) * BYTES_PER_MBYTE 1021 elif fromUnit == UNIT_GBYTES: 1022 byteSize = float(size) * BYTES_PER_GBYTE 1023 elif fromUnit == UNIT_SECTORS: 1024 byteSize = float(size) * BYTES_PER_SECTOR 1025 else: 1026 raise ValueError("Unknown 'from' unit %s." % fromUnit) 1027 if toUnit == UNIT_BYTES: 1028 return byteSize 1029 elif toUnit == UNIT_KBYTES: 1030 return byteSize / BYTES_PER_KBYTE 1031 elif toUnit == UNIT_MBYTES: 1032 return byteSize / BYTES_PER_MBYTE 1033 elif toUnit == UNIT_GBYTES: 1034 return byteSize / BYTES_PER_GBYTE 1035 elif toUnit == UNIT_SECTORS: 1036 return byteSize / BYTES_PER_SECTOR 1037 else: 1038 raise ValueError("Unknown 'to' unit %s." % toUnit)
1039 1040 1041 ########################## 1042 # displayBytes() function 1043 ########################## 1044
1045 -def displayBytes(bytes, digits=2):
1046 """ 1047 Format a byte quantity so it can be sensibly displayed. 1048 1049 It's rather difficult to look at a number like "72372224 bytes" and get any 1050 meaningful information out of it. It would be more useful to see something 1051 like "69.02 MB". That's what this function does. Any time you want to display 1052 a byte value, i.e.:: 1053 1054 print "Size: %s bytes" % bytes 1055 1056 Call this function instead:: 1057 1058 print "Size: %s" % displayBytes(bytes) 1059 1060 What comes out will be sensibly formatted. The indicated number of digits 1061 will be listed after the decimal point, rounded based on whatever rules are 1062 used by Python's standard C{%f} string format specifier. (Values less than 1 1063 kB will be listed in bytes and will not have a decimal point, since the 1064 concept of a fractional byte is nonsensical.) 1065 1066 @param bytes: Byte quantity. 1067 @type bytes: Integer number of bytes. 1068 1069 @param digits: Number of digits to display after the decimal point. 1070 @type digits: Integer value, typically 2-5. 1071 1072 @return: String, formatted for sensible display. 1073 """ 1074 if(bytes is None): 1075 raise ValueError("Cannot display byte value of None.") 1076 bytes = float(bytes) 1077 if math.fabs(bytes) < BYTES_PER_KBYTE: 1078 format = "%.0f bytes" 1079 value = bytes 1080 elif math.fabs(bytes) < BYTES_PER_MBYTE: 1081 format = "%." + "%d" % digits + "f kB" 1082 value = bytes / BYTES_PER_KBYTE 1083 elif math.fabs(bytes) < BYTES_PER_GBYTE: 1084 format = "%." + "%d" % digits + "f MB" 1085 value = bytes / BYTES_PER_MBYTE 1086 else: 1087 format = "%." + "%d" % digits + "f GB" 1088 value = bytes / BYTES_PER_GBYTE 1089 return format % value
1090 1091 1092 ################################## 1093 # getFunctionReference() function 1094 ################################## 1095
1096 -def getFunctionReference(module, function):
1097 """ 1098 Gets a reference to a named function. 1099 1100 This does some hokey-pokey to get back a reference to a dynamically named 1101 function. For instance, say you wanted to get a reference to the 1102 C{os.path.isdir} function. You could use:: 1103 1104 myfunc = getFunctionReference("os.path", "isdir") 1105 1106 Although we won't bomb out directly, behavior is pretty much undefined if 1107 you pass in C{None} or C{""} for either C{module} or C{function}. 1108 1109 The only validation we enforce is that whatever we get back must be 1110 callable. 1111 1112 I derived this code based on the internals of the Python unittest 1113 implementation. I don't claim to completely understand how it works. 1114 1115 @param module: Name of module associated with function. 1116 @type module: Something like "os.path" or "CedarBackup2.util" 1117 1118 @param function: Name of function 1119 @type function: Something like "isdir" or "getUidGid" 1120 1121 @return: Reference to function associated with name. 1122 1123 @raise ImportError: If the function cannot be found. 1124 @raise ValueError: If the resulting reference is not callable. 1125 1126 @copyright: Some of this code, prior to customization, was originally part 1127 of the Python 2.3 codebase. Python code is copyright (c) 2001, 2002 Python 1128 Software Foundation; All Rights Reserved. 1129 """ 1130 parts = [] 1131 if module is not None and module != "": 1132 parts = module.split(".") 1133 if function is not None and function != "": 1134 parts.append(function); 1135 copy = parts[:] 1136 while copy: 1137 try: 1138 module = __import__(string.join(copy, ".")) 1139 break 1140 except ImportError: 1141 del copy[-1] 1142 if not copy: raise 1143 parts = parts[1:] 1144 obj = module 1145 for part in parts: 1146 obj = getattr(obj, part) 1147 if not callable(obj): 1148 raise ValueError("Reference to %s.%s is not callable." % (module, function)) 1149 return obj
1150 1151 1152 ####################### 1153 # getUidGid() function 1154 ####################### 1155
1156 -def getUidGid(user, group):
1157 """ 1158 Get the uid/gid associated with a user/group pair 1159 1160 This is a no-op if user/group functionality is not available on the platform. 1161 1162 @param user: User name 1163 @type user: User name as a string 1164 1165 @param group: Group name 1166 @type group: Group name as a string 1167 1168 @return: Tuple C{(uid, gid)} matching passed-in user and group. 1169 @raise ValueError: If the ownership user/group values are invalid 1170 """ 1171 if _UID_GID_AVAILABLE: 1172 try: 1173 uid = pwd.getpwnam(user)[2] 1174 gid = grp.getgrnam(group)[2] 1175 logger.debug("Translated [%s:%s] into [%d:%d]." % (user, group, uid, gid)) 1176 return (uid, gid) 1177 except Exception, e: 1178 logger.debug("Error looking up uid and gid for [%s:%s]: %s" % (user, group, e)) 1179 raise ValueError("Unable to lookup up uid and gid for passed in user/group.") 1180 else: 1181 return (0,0)
1182 1183 1184 ############################# 1185 # changeOwnership() function 1186 ############################# 1187
1188 -def changeOwnership(path, user, group):
1189 """ 1190 Changes ownership of path to match the user and group. 1191 1192 This is a no-op if user/group functionality is not available on the 1193 platform, or if the either passed-in user or group is C{None}. 1194 1195 @param path: Path whose ownership to change. 1196 @param user: User which owns file. 1197 @param group: Group which owns file. 1198 """ 1199 if _UID_GID_AVAILABLE: 1200 if user is None or group is None: 1201 logger.debug("User or group is None, so not attempting to change owner on [%s]." % path) 1202 elif os.getuid() != 0: 1203 logger.debug("Not root, so not attempting to change owner on [%s]." % path) 1204 else: 1205 try: 1206 (uid, gid) = getUidGid(user, group) 1207 os.chown(path, uid, gid) 1208 except Exception, e: 1209 logger.error("Error changing ownership of [%s]: %s" % (path, e))
1210 1211 1212 ############################## 1213 # splitCommandLine() function 1214 ############################## 1215
1216 -def splitCommandLine(commandLine):
1217 """ 1218 Splits a command line string into a list of arguments. 1219 1220 Unfortunately, there is no "standard" way to parse a command line string, 1221 and it's actually not an easy problem to solve portably (essentially, we 1222 have to emulate the shell argument-processing logic). This code only 1223 respects double quotes (C{"}) for grouping arguments, not single quotes 1224 (C{'}). Make sure you take this into account when building your command 1225 line. 1226 1227 Incidentally, I found this particular parsing method while digging around in 1228 Google Groups, and I tweaked it for my own use. 1229 1230 @param commandLine: Command line string 1231 @type commandLine: String, i.e. "cback --verbose stage store" 1232 1233 @return: List of arguments, suitable for passing to C{popen2}. 1234 1235 @raise ValueError: If the command line is None. 1236 """ 1237 if commandLine is None: 1238 raise ValueError("Cannot split command line of None.") 1239 fields = re.findall('[^ "]+|"[^"]+"', commandLine) 1240 fields = map(lambda field: field.replace('"', ''), fields) 1241 return fields
1242 1243 1244 ############################ 1245 # resolveCommand() function 1246 ############################ 1247
1248 -def resolveCommand(command):
1249 """ 1250 Resolves the real path to a command through the path resolver mechanism. 1251 1252 Both extensions and standard Cedar Backup functionality need a way to 1253 resolve the "real" location of various executables. Normally, they assume 1254 that these executables are on the system path, but some callers need to 1255 specify an alternate location. 1256 1257 Ideally, we want to handle this configuration in a central location. The 1258 Cedar Backup path resolver mechanism (a singleton called 1259 L{PathResolverSingleton}) provides the central location to store the 1260 mappings. This function wraps access to the singleton, and is what all 1261 functions (extensions or standard functionality) should call if they need to 1262 find a command. 1263 1264 The passed-in command must actually be a list, in the standard form used by 1265 all existing Cedar Backup code (something like C{["svnlook", ]}). The 1266 lookup will actually be done on the first element in the list, and the 1267 returned command will always be in list form as well. 1268 1269 If the passed-in command can't be resolved or no mapping exists, then the 1270 command itself will be returned unchanged. This way, we neatly fall back on 1271 default behavior if we have no sensible alternative. 1272 1273 @param command: Command to resolve. 1274 @type command: List form of command, i.e. C{["svnlook", ]}. 1275 1276 @return: Path to command or just command itself if no mapping exists. 1277 """ 1278 singleton = PathResolverSingleton.getInstance() 1279 name = command[0] 1280 result = command[:] 1281 result[0] = singleton.lookup(name, name) 1282 return result
1283 1284 1285 ############################ 1286 # executeCommand() function 1287 ############################ 1288
1289 -def executeCommand(command, args, returnOutput=False, ignoreStderr=False, doNotLog=False, outputFile=None):
1290 """ 1291 Executes a shell command, hopefully in a safe way. 1292 1293 This function exists to replace direct calls to C{os.popen} in the Cedar 1294 Backup code. It's not safe to call a function such as C{os.popen()} with 1295 untrusted arguments, since that can cause problems if the string contains 1296 non-safe variables or other constructs (imagine that the argument is 1297 C{$WHATEVER}, but C{$WHATEVER} contains something like C{"; rm -fR ~/; 1298 echo"} in the current environment). 1299 1300 Instead, it's safer to pass a list of arguments in the style supported bt 1301 C{popen2} or C{popen4}. This function actually uses a specialized C{Pipe} 1302 class implemented using either C{subprocess.Popen} or C{popen2.Popen4}. 1303 1304 Under the normal case, this function will return a tuple of C{(status, 1305 None)} where the status is the wait-encoded return status of the call per 1306 the C{popen2.Popen4} documentation. If C{returnOutput} is passed in as 1307 C{True}, the function will return a tuple of C{(status, output)} where 1308 C{output} is a list of strings, one entry per line in the output from the 1309 command. Output is always logged to the C{outputLogger.info()} target, 1310 regardless of whether it's returned. 1311 1312 By default, C{stdout} and C{stderr} will be intermingled in the output. 1313 However, if you pass in C{ignoreStderr=True}, then only C{stdout} will be 1314 included in the output. 1315 1316 The C{doNotLog} parameter exists so that callers can force the function to 1317 not log command output to the debug log. Normally, you would want to log. 1318 However, if you're using this function to write huge output files (i.e. 1319 database backups written to C{stdout}) then you might want to avoid putting 1320 all that information into the debug log. 1321 1322 The C{outputFile} parameter exists to make it easier for a caller to push 1323 output into a file, i.e. as a substitute for redirection to a file. If this 1324 value is passed in, each time a line of output is generated, it will be 1325 written to the file using C{outputFile.write()}. At the end, the file 1326 descriptor will be flushed using C{outputFile.flush()}. The caller 1327 maintains responsibility for closing the file object appropriately. 1328 1329 @note: I know that it's a bit confusing that the command and the arguments 1330 are both lists. I could have just required the caller to pass in one big 1331 list. However, I think it makes some sense to keep the command (the 1332 constant part of what we're executing, i.e. C{"scp -B"}) separate from its 1333 arguments, even if they both end up looking kind of similar. 1334 1335 @note: You cannot redirect output via shell constructs (i.e. C{>file}, 1336 C{2>/dev/null}, etc.) using this function. The redirection string would be 1337 passed to the command just like any other argument. However, you can 1338 implement the equivalent to redirection using C{ignoreStderr} and 1339 C{outputFile}, as discussed above. 1340 1341 @note: The operating system environment is partially sanitized before 1342 the command is invoked. See L{sanitizeEnvironment} for details. 1343 1344 @param command: Shell command to execute 1345 @type command: List of individual arguments that make up the command 1346 1347 @param args: List of arguments to the command 1348 @type args: List of additional arguments to the command 1349 1350 @param returnOutput: Indicates whether to return the output of the command 1351 @type returnOutput: Boolean C{True} or C{False} 1352 1353 @param doNotLog: Indicates that output should not be logged. 1354 @type doNotLog: Boolean C{True} or C{False} 1355 1356 @param outputFile: File object that all output should be written to. 1357 @type outputFile: File object as returned from C{open()} or C{file()}. 1358 1359 @return: Tuple of C{(result, output)} as described above. 1360 """ 1361 logger.debug("Executing command %s with args %s." % (command, args)) 1362 outputLogger.info("Executing command %s with args %s." % (command, args)) 1363 if doNotLog: 1364 logger.debug("Note: output will not be logged, per the doNotLog flag.") 1365 outputLogger.info("Note: output will not be logged, per the doNotLog flag.") 1366 output = [] 1367 fields = command[:] # make sure to copy it so we don't destroy it 1368 fields.extend(args) 1369 try: 1370 sanitizeEnvironment() # make sure we have a consistent environment 1371 pipe = Pipe(fields, ignoreStderr=ignoreStderr) 1372 while True: 1373 line = pipe.fromchild.readline() 1374 if not line: break 1375 if returnOutput: output.append(line) 1376 if outputFile is not None: outputFile.write(line) 1377 if not doNotLog: outputLogger.info(line[:-1]) # this way the log will (hopefully) get updated in realtime 1378 if outputFile is not None: 1379 try: # note, not every file-like object can be flushed 1380 outputFile.flush() 1381 except: pass 1382 if returnOutput: 1383 return (pipe.wait(), output) 1384 else: 1385 return (pipe.wait(), None) 1386 except OSError, e: 1387 try: 1388 if returnOutput: 1389 if output != []: 1390 return (pipe.wait(), output) 1391 else: 1392 return (pipe.wait(), [ e, ]) 1393 else: 1394 return (pipe.wait(), None) 1395 except UnboundLocalError: # pipe not set 1396 if returnOutput: 1397 return (256, []) 1398 else: 1399 return (256, None)
1400 1401 1402 ############################## 1403 # calculateFileAge() function 1404 ############################## 1405
1406 -def calculateFileAge(file):
1407 """ 1408 Calculates the age (in days) of a file. 1409 1410 The "age" of a file is the amount of time since the file was last used, per 1411 the most recent of the file's C{st_atime} and C{st_mtime} values. 1412 1413 Technically, we only intend this function to work with files, but it will 1414 probably work with anything on the filesystem. 1415 1416 @param file: Path to a file on disk. 1417 1418 @return: Age of the file in days. 1419 @raise OSError: If the file doesn't exist. 1420 """ 1421 currentTime = int(time.time()) 1422 fileStats = os.stat(file) 1423 lastUse = max(fileStats.st_atime, fileStats.st_mtime) # "most recent" is "largest" 1424 ageInDays = (currentTime - lastUse) / SECONDS_PER_DAY 1425 return ageInDays
1426 1427 1428 ################### 1429 # mount() function 1430 ################### 1431
1432 -def mount(devicePath, mountPoint, fsType):
1433 """ 1434 Mounts the indicated device at the indicated mount point. 1435 1436 For instance, to mount a CD, you might use device path C{/dev/cdrw}, mount 1437 point C{/media/cdrw} and filesystem type C{iso9660}. You can safely use any 1438 filesystem type that is supported by C{mount} on your platform. If the type 1439 is C{None}, we'll attempt to let C{mount} auto-detect it. This may or may 1440 not work on all systems. 1441 1442 @note: This only works on platforms that have a concept of "mounting" a 1443 filesystem through a command-line C{"mount"} command, like UNIXes. It 1444 won't work on Windows. 1445 1446 @param devicePath: Path of device to be mounted. 1447 @param mountPoint: Path that device should be mounted at. 1448 @param fsType: Type of the filesystem assumed to be available via the device. 1449 1450 @raise IOError: If the device cannot be mounted. 1451 """ 1452 if fsType is None: 1453 args = [ devicePath, mountPoint ] 1454 else: 1455 args = [ "-t", fsType, devicePath, mountPoint ] 1456 command = resolveCommand(MOUNT_COMMAND) 1457 result = executeCommand(command, args, returnOutput=False, ignoreStderr=True)[0] 1458 if result != 0: 1459 raise IOError("Error [%d] mounting [%s] at [%s] as [%s]." % (result, devicePath, mountPoint, fsType))
1460 1461 1462 ##################### 1463 # unmount() function 1464 ##################### 1465
1466 -def unmount(mountPoint, removeAfter=False, attempts=1, waitSeconds=0):
1467 """ 1468 Unmounts whatever device is mounted at the indicated mount point. 1469 1470 Sometimes, it might not be possible to unmount the mount point immediately, 1471 if there are still files open there. Use the C{attempts} and C{waitSeconds} 1472 arguments to indicate how many unmount attempts to make and how many seconds 1473 to wait between attempts. If you pass in zero attempts, no attempts will be 1474 made (duh). 1475 1476 If the indicated mount point is not really a mount point per 1477 C{os.path.ismount()}, then it will be ignored. This seems to be a safer 1478 check then looking through C{/etc/mtab}, since C{ismount()} is already in 1479 the Python standard library and is documented as working on all POSIX 1480 systems. 1481 1482 If C{removeAfter} is C{True}, then the mount point will be removed using 1483 C{os.rmdir()} after the unmount action succeeds. If for some reason the 1484 mount point is not a directory, then it will not be removed. 1485 1486 @note: This only works on platforms that have a concept of "mounting" a 1487 filesystem through a command-line C{"mount"} command, like UNIXes. It 1488 won't work on Windows. 1489 1490 @param mountPoint: Mount point to be unmounted. 1491 @param removeAfter: Remove the mount point after unmounting it. 1492 @param attempts: Number of times to attempt the unmount. 1493 @param waitSeconds: Number of seconds to wait between repeated attempts. 1494 1495 @raise IOError: If the mount point is still mounted after attempts are exhausted. 1496 """ 1497 if os.path.ismount(mountPoint): 1498 for attempt in range(0, attempts): 1499 logger.debug("Making attempt %d to unmount [%s]." % (attempt, mountPoint)) 1500 command = resolveCommand(UMOUNT_COMMAND) 1501 result = executeCommand(command, [ mountPoint, ], returnOutput=False, ignoreStderr=True)[0] 1502 if result != 0: 1503 logger.error("Error [%d] unmounting [%s] on attempt %d." % (result, mountPoint, attempt)) 1504 elif os.path.ismount(mountPoint): 1505 logger.error("After attempt %d, [%s] is still mounted." % (attempt, mountPoint)) 1506 else: 1507 logger.debug("Successfully unmounted [%s] on attempt %d." % (mountPoint, attempt)) 1508 break # this will cause us to skip the loop else: clause 1509 if attempt+1 < attempts: # i.e. this isn't the last attempt 1510 if waitSeconds > 0: 1511 logger.info("Sleeping %d second(s) before next unmount attempt." % waitSeconds) 1512 time.sleep(waitSeconds) 1513 else: 1514 if os.path.ismount(mountPoint): 1515 raise IOError("Unable to unmount [%s] after %d attempts." % (mountPoint, attempts)) 1516 logger.info("Mount point [%s] seems to have finally gone away." % mountPoint) 1517 if os.path.isdir(mountPoint) and removeAfter: 1518 logger.debug("Removing mount point [%s]." % mountPoint) 1519 os.rmdir(mountPoint)
1520 1521 1522 ########################### 1523 # deviceMounted() function 1524 ########################### 1525
1526 -def deviceMounted(devicePath):
1527 """ 1528 Indicates whether a specific filesystem device is currently mounted. 1529 1530 We determine whether the device is mounted by looking through the system's 1531 C{mtab} file. This file shows every currently-mounted filesystem, ordered 1532 by device. We only do the check if the C{mtab} file exists and is readable. 1533 Otherwise, we assume that the device is not mounted. 1534 1535 @note: This only works on platforms that have a concept of an mtab file 1536 to show mounted volumes, like UNIXes. It won't work on Windows. 1537 1538 @param devicePath: Path of device to be checked 1539 1540 @return: True if device is mounted, false otherwise. 1541 """ 1542 if os.path.exists(MTAB_FILE) and os.access(MTAB_FILE, os.R_OK): 1543 realPath = os.path.realpath(devicePath) 1544 lines = open(MTAB_FILE).readlines() 1545 for line in lines: 1546 (mountDevice, mountPoint, remainder) = line.split(None, 2) 1547 if mountDevice in [ devicePath, realPath, ]: 1548 logger.debug("Device [%s] is mounted at [%s]." % (devicePath, mountPoint)) 1549 return True 1550 return False
1551 1552 1553 ######################## 1554 # encodePath() function 1555 ######################## 1556
1557 -def encodePath(path):
1558 1559 """ 1560 Safely encodes a filesystem path. 1561 1562 Many Python filesystem functions, such as C{os.listdir}, behave differently 1563 if they are passed unicode arguments versus simple string arguments. For 1564 instance, C{os.listdir} generally returns unicode path names if it is passed 1565 a unicode argument, and string pathnames if it is passed a string argument. 1566 1567 However, this behavior often isn't as consistent as we might like. As an example, 1568 C{os.listdir} "gives up" if it finds a filename that it can't properly encode 1569 given the current locale settings. This means that the returned list is 1570 a mixed set of unicode and simple string paths. This has consequences later, 1571 because other filesystem functions like C{os.path.join} will blow up if they 1572 are given one string path and one unicode path. 1573 1574 On comp.lang.python, Martin v. Löwis explained the C{os.listdir} behavior 1575 like this:: 1576 1577 The operating system (POSIX) does not have the inherent notion that file 1578 names are character strings. Instead, in POSIX, file names are primarily 1579 byte strings. There are some bytes which are interpreted as characters 1580 (e.g. '\x2e', which is '.', or '\x2f', which is '/'), but apart from 1581 that, most OS layers think these are just bytes. 1582 1583 Now, most *people* think that file names are character strings. To 1584 interpret a file name as a character string, you need to know what the 1585 encoding is to interpret the file names (which are byte strings) as 1586 character strings. 1587 1588 There is, unfortunately, no operating system API to carry the notion of a 1589 file system encoding. By convention, the locale settings should be used 1590 to establish this encoding, in particular the LC_CTYPE facet of the 1591 locale. This is defined in the environment variables LC_CTYPE, LC_ALL, 1592 and LANG (searched in this order). 1593 1594 If LANG is not set, the "C" locale is assumed, which uses ASCII as its 1595 file system encoding. In this locale, '\xe2\x99\xaa\xe2\x99\xac' is not a 1596 valid file name (at least it cannot be interpreted as characters, and 1597 hence not be converted to Unicode). 1598 1599 Now, your Python script has requested that all file names *should* be 1600 returned as character (ie. Unicode) strings, but Python cannot comply, 1601 since there is no way to find out what this byte string means, in terms 1602 of characters. 1603 1604 So we have three options: 1605 1606 1. Skip this string, only return the ones that can be converted to Unicode. 1607 Give the user the impression the file does not exist. 1608 2. Return the string as a byte string 1609 3. Refuse to listdir altogether, raising an exception (i.e. return nothing) 1610 1611 Python has chosen alternative 2, allowing the application to implement 1 1612 or 3 on top of that if it wants to (or come up with other strategies, 1613 such as user feedback). 1614 1615 As a solution, he suggests that rather than passing unicode paths into the 1616 filesystem functions, that I should sensibly encode the path first. That is 1617 what this function accomplishes. Any function which takes a filesystem path 1618 as an argument should encode it first, before using it for any other purpose. 1619 1620 I confess I still don't completely understand how this works. On a system 1621 with filesystem encoding "ISO-8859-1", a path C{u"\xe2\x99\xaa\xe2\x99\xac"} 1622 is converted into the string C{"\xe2\x99\xaa\xe2\x99\xac"}. However, on a 1623 system with a "utf-8" encoding, the result is a completely different string: 1624 C{"\xc3\xa2\xc2\x99\xc2\xaa\xc3\xa2\xc2\x99\xc2\xac"}. A quick test where I 1625 write to the first filename and open the second proves that the two strings 1626 represent the same file on disk, which is all I really care about. 1627 1628 @note: As a special case, if C{path} is C{None}, then this function will 1629 return C{None}. 1630 1631 @note: To provide several examples of encoding values, my Debian sarge box 1632 with an ext3 filesystem has Python filesystem encoding C{ISO-8859-1}. User 1633 Anarcat's Debian box with a xfs filesystem has filesystem encoding 1634 C{ANSI_X3.4-1968}. Both my iBook G4 running Mac OS X 10.4 and user Dag 1635 Rende's SuSE 9.3 box both have filesystem encoding C{UTF-8}. 1636 1637 @note: Just because a filesystem has C{UTF-8} encoding doesn't mean that it 1638 will be able to handle all extended-character filenames. For instance, 1639 certain extended-character (but not UTF-8) filenames -- like the ones in the 1640 regression test tar file C{test/data/tree13.tar.gz} -- are not valid under 1641 Mac OS X, and it's not even possible to extract them from the tarfile on 1642 that platform. 1643 1644 @param path: Path to encode 1645 1646 @return: Path, as a string, encoded appropriately 1647 @raise ValueError: If the path cannot be encoded properly. 1648 """ 1649 if path is None: 1650 return path 1651 try: 1652 if isinstance(path, unicode): 1653 encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() 1654 path = path.encode(encoding) 1655 return path 1656 except UnicodeError: 1657 raise ValueError("Path could not be safely encoded as %s." % encoding)
1658 1659 1660 ######################## 1661 # nullDevice() function 1662 ######################## 1663
1664 -def nullDevice():
1665 """ 1666 Attempts to portably return the null device on this system. 1667 1668 The null device is something like C{/dev/null} on a UNIX system. The name 1669 varies on other platforms. 1670 1671 In Python 2.4 and better, we can use C{os.devnull}. Since we want to be 1672 portable to python 2.3, getting the value in earlier versions of Python 1673 takes some screwing around. Basically, this function will only work on 1674 either UNIX-like systems (the default) or Windows. 1675 """ 1676 try: 1677 return os.devnull 1678 except AttributeError: 1679 import platform 1680 if platform.platform().startswith("Windows"): 1681 return "NUL" 1682 else: 1683 return "/dev/null"
1684 1685 1686 ############################## 1687 # deriveDayOfWeek() function 1688 ############################## 1689
1690 -def deriveDayOfWeek(dayName):
1691 """ 1692 Converts English day name to numeric day of week as from C{time.localtime}. 1693 1694 For instance, the day C{monday} would be converted to the number C{0}. 1695 1696 @param dayName: Day of week to convert 1697 @type dayName: string, i.e. C{"monday"}, C{"tuesday"}, etc. 1698 1699 @returns: Integer, where Monday is 0 and Sunday is 6; or -1 if no conversion is possible. 1700 """ 1701 if dayName.lower() == "monday": 1702 return 0 1703 elif dayName.lower() == "tuesday": 1704 return 1 1705 elif dayName.lower() == "wednesday": 1706 return 2 1707 elif dayName.lower() == "thursday": 1708 return 3 1709 elif dayName.lower() == "friday": 1710 return 4 1711 elif dayName.lower() == "saturday": 1712 return 5 1713 elif dayName.lower() == "sunday": 1714 return 6 1715 else: 1716 return -1 # What else can we do?? Thrown an exception, I guess.
1717 1718 1719 ########################### 1720 # isStartOfWeek() function 1721 ########################### 1722
1723 -def isStartOfWeek(startingDay):
1724 """ 1725 Indicates whether "today" is the backup starting day per configuration. 1726 1727 If the current day's English name matches the indicated starting day, then 1728 today is a starting day. 1729 1730 @param startingDay: Configured starting day. 1731 @type startingDay: string, i.e. C{"monday"}, C{"tuesday"}, etc. 1732 1733 @return: Boolean indicating whether today is the starting day. 1734 """ 1735 value = time.localtime().tm_wday == deriveDayOfWeek(startingDay) 1736 if value: 1737 logger.debug("Today is the start of the week.") 1738 else: 1739 logger.debug("Today is NOT the start of the week.") 1740 return value
1741 1742 1743 ################################# 1744 # buildNormalizedPath() function 1745 ################################# 1746
1747 -def buildNormalizedPath(path):
1748 """ 1749 Returns a "normalized" path based on a path name. 1750 1751 A normalized path is a representation of a path that is also a valid file 1752 name. To make a valid file name out of a complete path, we have to convert 1753 or remove some characters that are significant to the filesystem -- in 1754 particular, the path separator and any leading C{'.'} character (which would 1755 cause the file to be hidden in a file listing). 1756 1757 Note that this is a one-way transformation -- you can't safely derive the 1758 original path from the normalized path. 1759 1760 To normalize a path, we begin by looking at the first character. If the 1761 first character is C{'/'} or C{'\\'}, it gets removed. If the first 1762 character is C{'.'}, it gets converted to C{'_'}. Then, we look through the 1763 rest of the path and convert all remaining C{'/'} or C{'\\'} characters 1764 C{'-'}, and all remaining whitespace characters to C{'_'}. 1765 1766 As a special case, a path consisting only of a single C{'/'} or C{'\\'} 1767 character will be converted to C{'-'}. 1768 1769 @param path: Path to normalize 1770 1771 @return: Normalized path as described above. 1772 1773 @raise ValueError: If the path is None 1774 """ 1775 if path is None: 1776 raise ValueError("Cannot normalize path None.") 1777 elif len(path) == 0: 1778 return path 1779 elif path == "/" or path == "\\": 1780 return "-" 1781 else: 1782 normalized = path 1783 normalized = re.sub(r"^\/", "", normalized) # remove leading '/' 1784 normalized = re.sub(r"^\\", "", normalized) # remove leading '\' 1785 normalized = re.sub(r"^\.", "_", normalized) # convert leading '.' to '_' so file won't be hidden 1786 normalized = re.sub(r"\/", "-", normalized) # convert all '/' characters to '-' 1787 normalized = re.sub(r"\\", "-", normalized) # convert all '\' characters to '-' 1788 normalized = re.sub(r"\s", "_", normalized) # convert all whitespace to '_' 1789 return normalized
1790 1791 1792 ################################# 1793 # sanitizeEnvironment() function 1794 ################################# 1795
1796 -def sanitizeEnvironment():
1797 """ 1798 Sanitizes the operating system environment. 1799 1800 The operating system environment is contained in C{os.environ}. This method 1801 sanitizes the contents of that dictionary. 1802 1803 Currently, all it does is reset the locale (removing C{$LC_*}) and set the 1804 default language (C{$LANG}) to L{DEFAULT_LANGUAGE}. This way, we can count 1805 on consistent localization regardless of what the end-user has configured. 1806 This is important for code that needs to parse program output. 1807 1808 The C{os.environ} dictionary is modifed in-place. If C{$LANG} is already 1809 set to the proper value, it is not re-set, so we can avoid the memory leaks 1810 that are documented to occur on BSD-based systems. 1811 1812 @return: Copy of the sanitized environment. 1813 """ 1814 for var in LOCALE_VARS: 1815 if os.environ.has_key(var): 1816 del os.environ[var] 1817 if os.environ.has_key(LANG_VAR): 1818 if os.environ[LANG_VAR] != DEFAULT_LANGUAGE: # no need to reset if it exists (avoid leaks on BSD systems) 1819 os.environ[LANG_VAR] = DEFAULT_LANGUAGE 1820 return os.environ.copy()
1821