Package cloudfiles :: Module storage_object
[frames] | no frames]

Source Code for Module cloudfiles.storage_object

  1  """ 
  2  Object operations 
  3   
  4  An Object is analogous to a file on a conventional filesystem. You can 
  5  read data from, or write data to your Objects. You can also associate 
  6  arbitrary metadata with them. 
  7   
  8  See COPYING for license information. 
  9  """ 
 10   
 11  try: 
 12      from hashlib import md5 
 13  except ImportError: 
 14      from md5 import md5 
 15  import StringIO 
 16  import mimetypes 
 17  import os 
 18   
 19  from urllib  import quote 
 20  from errors  import ResponseError, NoSuchObject, \ 
 21                      InvalidObjectName, IncompleteSend, \ 
 22                      InvalidMetaName, InvalidMetaValue 
 23   
 24  from socket  import timeout 
 25  import consts 
 26  from utils   import unicode_quote, requires_name 
27 28 # Because HTTPResponse objects *have* to have read() called on them 29 # before they can be used again ... 30 # pylint: disable-msg=W0612 31 32 33 -class Object(object):
34 """ 35 Storage data representing an object, (metadata and data). 36 37 @undocumented: _make_headers 38 @undocumented: _name_check 39 @undocumented: _initialize 40 @undocumented: compute_md5sum 41 @undocumented: __get_conn_for_write 42 @ivar name: the object's name (generally treat as read-only) 43 @type name: str 44 @ivar content_type: the object's content-type (set or read) 45 @type content_type: str 46 @ivar metadata: metadata associated with the object (set or read) 47 @type metadata: dict 48 @ivar size: the object's size (cached) 49 @type size: number 50 @ivar last_modified: date and time of last file modification (cached) 51 @type last_modified: str 52 @ivar container: the object's container (generally treat as read-only) 53 @type container: L{Container} 54 """ 55 # R/O support of the legacy objsum attr. 56 objsum = property(lambda self: self._etag) 57
58 - def __set_etag(self, value):
59 self._etag = value 60 self._etag_override = True
61 62 etag = property(lambda self: self._etag, __set_etag) 63
64 - def __init__(self, container, name=None, 65 force_exists=False, object_record=None):
66 """ 67 Storage objects rarely if ever need to be instantiated directly by the 68 user. 69 70 Instead, use the L{create_object<Container.create_object>}, 71 L{get_object<Container.get_object>}, 72 L{list_objects<Container.list_objects>} and other 73 methods on its parent L{Container} object. 74 """ 75 self.container = container 76 self.last_modified = None 77 self.metadata = {} 78 self.manifest = None 79 if object_record: 80 self.name = object_record['name'] 81 self.content_type = object_record['content_type'] 82 self.size = object_record['bytes'] 83 self.last_modified = object_record['last_modified'] 84 self._etag = object_record['hash'] 85 self._etag_override = False 86 else: 87 self.name = name 88 self.content_type = None 89 self.size = None 90 self._etag = None 91 self._etag_override = False 92 if not self._initialize() and force_exists: 93 raise NoSuchObject(self.name)
94 95 @requires_name(InvalidObjectName)
96 - def read(self, size=-1, offset=0, hdrs=None, buffer=None, callback=None):
97 """ 98 Read the content from the remote storage object. 99 100 By default this method will buffer the response in memory and 101 return it as a string. However, if a file-like object is passed 102 in using the buffer keyword, the response will be written to it 103 instead. 104 105 A callback can be passed in for reporting on the progress of 106 the download. The callback should accept two integers, the first 107 will be for the amount of data written so far, the second for 108 the total size of the transfer. Note: This option is only 109 applicable when used in conjunction with the buffer option. 110 111 >>> test_object.write('hello') 112 >>> test_object.read() 113 'hello' 114 115 @param size: combined with offset, defines the length of data to be 116 read 117 @type size: number 118 @param offset: combined with size, defines the start location to be 119 read 120 @type offset: number 121 @param hdrs: an optional dict of headers to send with the request 122 @type hdrs: dictionary 123 @param buffer: an optional file-like object to write the content to 124 @type buffer: file-like object 125 @param callback: function to be used as a progress callback 126 @type callback: callable(transferred, size) 127 @rtype: str or None 128 @return: a string of all data in the object, or None if a buffer is 129 used 130 """ 131 self._name_check() 132 if size > 0: 133 range = 'bytes=%d-%d' % (offset, (offset + size) - 1) 134 if hdrs: 135 hdrs['Range'] = range 136 else: 137 hdrs = {'Range': range} 138 response = self.container.conn.make_request('GET', 139 path=[self.container.name, self.name], hdrs=hdrs) 140 if (response.status < 200) or (response.status > 299): 141 response.read() 142 raise ResponseError(response.status, response.reason) 143 144 if hasattr(buffer, 'write'): 145 scratch = response.read(8192) 146 transferred = 0 147 148 while len(scratch) > 0: 149 buffer.write(scratch) 150 transferred += len(scratch) 151 if callable(callback): 152 callback(transferred, self.size) 153 scratch = response.read(8192) 154 return None 155 else: 156 return response.read()
157
158 - def save_to_filename(self, filename, callback=None):
159 """ 160 Save the contents of the object to filename. 161 162 >>> container = connection['container1'] 163 >>> obj = container.get_object('backup_file') 164 >>> obj.save_to_filename('./backup_file') 165 166 @param filename: name of the file 167 @type filename: str 168 @param callback: function to be used as a progress callback 169 @type callback: callable(transferred, size) 170 """ 171 fobj = open(filename, 'wb') 172 try: 173 self.read(buffer=fobj, callback=callback) 174 finally: 175 fobj.close()
176 177 @requires_name(InvalidObjectName)
178 - def stream(self, chunksize=8192, hdrs=None):
179 """ 180 Return a generator of the remote storage object's data. 181 182 Warning: The HTTP response is only complete after this generator 183 has raised a StopIteration. No other methods can be called until 184 this has occurred. 185 186 >>> test_object.write('hello') 187 >>> test_object.stream() 188 <generator object at 0xb77939cc> 189 >>> '-'.join(test_object.stream(chunksize=1)) 190 'h-e-l-l-o' 191 192 @param chunksize: size in bytes yielded by the generator 193 @type chunksize: number 194 @param hdrs: an optional dict of headers to send in the request 195 @type hdrs: dict 196 @rtype: str generator 197 @return: a generator which yields strings as the object is downloaded 198 """ 199 self._name_check() 200 response = self.container.conn.make_request('GET', 201 path=[self.container.name, self.name], hdrs=hdrs) 202 if response.status < 200 or response.status > 299: 203 buff = response.read() 204 raise ResponseError(response.status, response.reason) 205 buff = response.read(chunksize) 206 while len(buff) > 0: 207 yield buff 208 buff = response.read(chunksize) 209 # I hate you httplib 210 buff = response.read()
211 212 @requires_name(InvalidObjectName)
213 - def sync_metadata(self):
214 """ 215 Commits the metadata to the remote storage system. 216 217 >>> test_object = container['paradise_lost.pdf'] 218 >>> test_object.metadata = {'author': 'John Milton'} 219 >>> test_object.sync_metadata() 220 221 Object metadata can be set and retrieved through the object's 222 .metadata attribute. 223 """ 224 self._name_check() 225 if self.metadata: 226 headers = self._make_headers() 227 headers['Content-Length'] = "0" 228 response = self.container.conn.make_request( 229 'POST', [self.container.name, self.name], hdrs=headers, 230 data='') 231 response.read() 232 if response.status != 202: 233 raise ResponseError(response.status, response.reason)
234 235 @requires_name(InvalidObjectName)
236 - def sync_manifest(self):
237 """ 238 Commits the manifest to the remote storage system. 239 240 >>> test_object = container['paradise_lost.pdf'] 241 >>> test_object.manifest = 'container/prefix' 242 >>> test_object.sync_manifest() 243 244 Object manifests can be set and retrieved through the object's 245 .manifest attribute. 246 """ 247 self._name_check() 248 if self.manifest: 249 headers = self._make_headers() 250 headers['Content-Length'] = "0" 251 response = self.container.conn.make_request( 252 'POST', [self.container.name, self.name], hdrs=headers, 253 data='') 254 response.read() 255 if response.status < 200 or response.status > 299: 256 raise ResponseError(response.status, response.reason)
257
258 - def __get_conn_for_write(self):
259 headers = self._make_headers() 260 261 headers['X-Auth-Token'] = self.container.conn.token 262 263 path = "/%s/%s/%s" % (self.container.conn.uri.rstrip('/'), \ 264 quote(self.container.name), quote(self.name)) 265 266 # Requests are handled a little differently for writes ... 267 http = self.container.conn.connection 268 269 # TODO: more/better exception handling please 270 http.putrequest('PUT', path) 271 for hdr in headers: 272 http.putheader(hdr, headers[hdr]) 273 http.putheader('User-Agent', self.container.conn.user_agent) 274 http.endheaders() 275 return http
276 277 # pylint: disable-msg=W0622 278 @requires_name(InvalidObjectName)
279 - def write(self, data='', verify=True, callback=None):
280 """ 281 Write data to the remote storage system. 282 283 By default, server-side verification is enabled, (verify=True), and 284 end-to-end verification is performed using an md5 checksum. When 285 verification is disabled, (verify=False), the etag attribute will 286 be set to the value returned by the server, not one calculated 287 locally. When disabling verification, there is no guarantee that 288 what you think was uploaded matches what was actually stored. Use 289 this optional carefully. You have been warned. 290 291 A callback can be passed in for reporting on the progress of 292 the upload. The callback should accept two integers, the first 293 will be for the amount of data written so far, the second for 294 the total size of the transfer. 295 296 >>> test_object = container.create_object('file.txt') 297 >>> test_object.content_type = 'text/plain' 298 >>> fp = open('./file.txt') 299 >>> test_object.write(fp) 300 301 @param data: the data to be written 302 @type data: str or file 303 @param verify: enable/disable server-side checksum verification 304 @type verify: boolean 305 @param callback: function to be used as a progress callback 306 @type callback: callable(transferred, size) 307 """ 308 self._name_check() 309 if isinstance(data, file): 310 # pylint: disable-msg=E1101 311 try: 312 data.flush() 313 except IOError: 314 pass # If the file descriptor is read-only this will fail 315 self.size = int(os.fstat(data.fileno())[6]) 316 else: 317 data = StringIO.StringIO(data) 318 self.size = data.len 319 320 # If override is set (and _etag is not None), then the etag has 321 # been manually assigned and we will not calculate our own. 322 323 if not self._etag_override: 324 self._etag = None 325 326 if not self.content_type: 327 # pylint: disable-msg=E1101 328 type = None 329 if hasattr(data, 'name'): 330 type = mimetypes.guess_type(data.name)[0] 331 self.content_type = type and type or 'application/octet-stream' 332 333 http = self.__get_conn_for_write() 334 335 response = None 336 transfered = 0 337 running_checksum = md5() 338 339 buff = data.read(4096) 340 try: 341 while len(buff) > 0: 342 http.send(buff) 343 if verify and not self._etag_override: 344 running_checksum.update(buff) 345 buff = data.read(4096) 346 transfered += len(buff) 347 if callable(callback): 348 callback(transfered, self.size) 349 response = http.getresponse() 350 buff = response.read() 351 except timeout, err: 352 if response: 353 # pylint: disable-msg=E1101 354 buff = response.read() 355 raise err 356 else: 357 if verify and not self._etag_override: 358 self._etag = running_checksum.hexdigest() 359 360 # ---------------------------------------------------------------- 361 362 if (response.status < 200) or (response.status > 299): 363 raise ResponseError(response.status, response.reason) 364 365 # If verification has been disabled for this write, then set the 366 # instances etag attribute to what the server returns to us. 367 if not verify: 368 for hdr in response.getheaders(): 369 if hdr[0].lower() == 'etag': 370 self._etag = hdr[1]
371 372 @requires_name(InvalidObjectName)
373 - def copy_to(self, container_name, name):
374 """ 375 Copy an object's contents to another location. 376 """ 377 378 self._name_check() 379 self._name_check(name) 380 381 # This method implicitly disables verification. 382 if not self._etag_override: 383 self._etag = None 384 385 headers = self._make_headers() 386 headers['Destination'] = "%s/%s" % (container_name, name) 387 headers['Content-Length'] = 0 388 response = self.container.conn.make_request( 389 'COPY', [self.container.name, self.name], hdrs=headers, data='') 390 buff = response.read() 391 392 if response.status < 200 or response.status > 299: 393 raise ResponseError(response.status, response.reason) 394 395 # Reset the etag to what the server returns. 396 for hdr in response.getheaders(): 397 if hdr[0].lower() == 'etag': 398 self._etag = hdr[1]
399 400 @requires_name(InvalidObjectName)
401 - def copy_from(self, container_name, name):
402 """ 403 Copy another object's contents to this object. 404 """ 405 406 self._name_check() 407 self._name_check(name) 408 409 # This method implicitly disables verification. 410 if not self._etag_override: 411 self._etag = None 412 413 headers = self._make_headers() 414 headers['X-Copy-From'] = "%s/%s" % (container_name, name) 415 headers['Content-Length'] = 0 416 response = self.container.conn.make_request( 417 'PUT', [self.container.name, self.name], hdrs=headers, data='') 418 buff = response.read() 419 420 if response.status < 200 or response.status > 299: 421 raise ResponseError(response.status, response.reason) 422 423 # Reset the etag to what the server returns. 424 for hdr in response.getheaders(): 425 if hdr[0].lower() == 'etag': 426 self._etag = hdr[1]
427 428 @requires_name(InvalidObjectName)
429 - def send(self, iterable):
430 """ 431 Write potentially transient data to the remote storage system using a 432 generator or stream. 433 434 If the object's size is not set, chunked transfer encoding will be 435 used to upload the file. 436 437 If the object's size attribute is set, it will be used as the 438 Content-Length. If the generator raises StopIteration prior to 439 yielding the right number of bytes, an IncompleteSend exception is 440 raised. 441 442 If the content_type attribute is not set then a value of 443 application/octet-stream will be used. 444 445 Server-side verification will be performed if an md5 checksum is 446 assigned to the etag property before calling this method, 447 otherwise no verification will be performed, (verification 448 can be performed afterward though by using the etag attribute 449 which is set to the value returned by the server). 450 451 >>> test_object = container.create_object('backup.tar.gz') 452 >>> pfd = os.popen('tar -czvf - ./data/', 'r') 453 >>> test_object.send(pfd) 454 455 @param iterable: stream or generator which yields the content to upload 456 @type iterable: generator or stream 457 """ 458 self._name_check() 459 460 if hasattr(iterable, 'read'): 461 462 def file_iterator(file): 463 chunk = file.read(4095) 464 while chunk: 465 yield chunk 466 chunk = file.read(4095) 467 raise StopIteration()
468 iterable = file_iterator(iterable) 469 470 # This method implicitly disables verification. 471 if not self._etag_override: 472 self._etag = None 473 474 if not self.content_type: 475 self.content_type = 'application/octet-stream' 476 477 path = "/%s/%s/%s" % (self.container.conn.uri.rstrip('/'), \ 478 unicode_quote(self.container.name), unicode_quote(self.name)) 479 headers = self._make_headers() 480 if self.size is None: 481 del headers['Content-Length'] 482 headers['Transfer-Encoding'] = 'chunked' 483 headers['X-Auth-Token'] = self.container.conn.token 484 headers['User-Agent'] = self.container.conn.user_agent 485 http = self.container.conn.connection 486 http.putrequest('PUT', path) 487 for key, value in headers.iteritems(): 488 http.putheader(key, value) 489 http.endheaders() 490 491 response = None 492 transferred = 0 493 try: 494 for chunk in iterable: 495 if self.size is None: 496 http.send("%X\r\n" % len(chunk)) 497 http.send(chunk) 498 http.send("\r\n") 499 else: 500 http.send(chunk) 501 transferred += len(chunk) 502 if self.size is None: 503 http.send("0\r\n\r\n") 504 # If the generator didn't yield enough data, stop, drop, and roll. 505 elif transferred < self.size: 506 raise IncompleteSend() 507 response = http.getresponse() 508 buff = response.read() 509 except timeout, err: 510 if response: 511 # pylint: disable-msg=E1101 512 response.read() 513 raise err 514 515 if (response.status < 200) or (response.status > 299): 516 raise ResponseError(response.status, response.reason) 517 518 for hdr in response.getheaders(): 519 if hdr[0].lower() == 'etag': 520 self._etag = hdr[1]
521
522 - def load_from_filename(self, filename, verify=True, callback=None):
523 """ 524 Put the contents of the named file into remote storage. 525 526 >>> test_object = container.create_object('file.txt') 527 >>> test_object.content_type = 'text/plain' 528 >>> test_object.load_from_filename('./my_file.txt') 529 530 @param filename: path to the file 531 @type filename: str 532 @param verify: enable/disable server-side checksum verification 533 @type verify: boolean 534 @param callback: function to be used as a progress callback 535 @type callback: callable(transferred, size) 536 """ 537 fobj = open(filename, 'rb') 538 self.write(fobj, verify=verify, callback=callback) 539 fobj.close()
540
541 - def _initialize(self):
542 """ 543 Initialize the Object with values from the remote service (if any). 544 """ 545 if not self.name: 546 return False 547 548 response = self.container.conn.make_request( 549 'HEAD', [self.container.name, self.name]) 550 response.read() 551 if response.status == 404: 552 return False 553 if (response.status < 200) or (response.status > 299): 554 raise ResponseError(response.status, response.reason) 555 for hdr in response.getheaders(): 556 if hdr[0].lower() == 'x-object-manifest': 557 self.manifest = hdr[1] 558 if hdr[0].lower() == 'content-type': 559 self.content_type = hdr[1] 560 if hdr[0].lower().startswith('x-object-meta-'): 561 self.metadata[hdr[0][14:]] = hdr[1] 562 if hdr[0].lower() == 'etag': 563 self._etag = hdr[1] 564 self._etag_override = False 565 if hdr[0].lower() == 'content-length': 566 self.size = int(hdr[1]) 567 if hdr[0].lower() == 'last-modified': 568 self.last_modified = hdr[1] 569 return True
570
571 - def __str__(self):
572 return self.name
573
574 - def _name_check(self, name=None):
575 if name is None: 576 name = self.name 577 if len(name) > consts.object_name_limit: 578 raise InvalidObjectName(name)
579
580 - def _make_headers(self):
581 """ 582 Returns a dictionary representing http headers based on the 583 respective instance attributes. 584 """ 585 headers = {} 586 headers['Content-Length'] = (str(self.size) \ 587 and str(self.size) != "0") \ 588 and str(self.size) or "0" 589 if self.manifest: 590 headers['X-Object-Manifest'] = self.manifest 591 if self._etag: 592 headers['ETag'] = self._etag 593 594 if self.content_type: 595 headers['Content-Type'] = self.content_type 596 else: 597 headers['Content-Type'] = 'application/octet-stream' 598 for key in self.metadata: 599 if len(key) > consts.meta_name_limit: 600 raise(InvalidMetaName(key)) 601 if len(self.metadata[key]) > consts.meta_value_limit: 602 raise(InvalidMetaValue(self.metadata[key])) 603 headers['X-Object-Meta-' + key] = self.metadata[key] 604 return headers
605 606 @classmethod
607 - def compute_md5sum(cls, fobj):
608 """ 609 Given an open file object, returns the md5 hexdigest of the data. 610 """ 611 checksum = md5() 612 buff = fobj.read(4096) 613 while buff: 614 checksum.update(buff) 615 buff = fobj.read(4096) 616 fobj.seek(0) 617 return checksum.hexdigest()
618
619 - def public_uri(self):
620 """ 621 Retrieve the URI for this object, if its container is public. 622 623 >>> container1 = connection['container1'] 624 >>> container1.make_public() 625 >>> container1.create_object('file.txt').write('testing') 626 >>> container1['file.txt'].public_uri() 627 'http://c00061.cdn.cloudfiles.rackspacecloud.com/file.txt' 628 629 @return: the public URI for this object 630 @rtype: str 631 """ 632 return "%s/%s" % (self.container.public_uri().rstrip('/'), 633 quote(self.name))
634
635 - def public_ssl_uri(self):
636 """ 637 Retrieve the SSL URI for this object, if its container is public. 638 639 >>> container1 = connection['container1'] 640 >>> container1.make_public() 641 >>> container1.create_object('file.txt').write('testing') 642 >>> container1['file.txt'].public_ssl_uri() 643 'https://c61.ssl.cf0.rackcdn.com/file.txt' 644 645 @return: the public SSL URI for this object 646 @rtype: str 647 """ 648 return "%s/%s" % (self.container.public_ssl_uri().rstrip('/'), 649 quote(self.name))
650
651 - def purge_from_cdn(self, email=None):
652 """ 653 Purge Edge cache for this object. 654 You will be notified by email if one is provided when the 655 job completes. 656 657 >>> obj.purge_from_cdn("user@dmain.com") 658 659 or 660 661 >>> obj.purge_from_cdn("user@domain.com,user2@domain.com") 662 663 or 664 665 >>> obj.purge_from_cdn() 666 667 @param email: A Valid email address 668 @type email: str 669 """ 670 if not self.container.conn.cdn_enabled: 671 raise CDNNotEnabled() 672 673 if email: 674 hdrs = {"X-Purge-Email": email} 675 response = self.container.conn.cdn_request('DELETE', 676 [self.container.name, self.name], hdrs=hdrs) 677 else: 678 response = self.container.conn.cdn_request('DELETE', 679 [self.container.name, self.name]) 680 681 if (response.status < 200) or (response.status >= 299): 682 raise ResponseError(response.status, response.reason)
683
684 685 -class ObjectResults(object):
686 """ 687 An iterable results set object for Objects. 688 689 This class implements dictionary- and list-like interfaces. 690 """
691 - def __init__(self, container, objects=None):
692 if objects is None: 693 objects = [] 694 self._names = [] 695 self._objects = [] 696 for obj in objects: 697 try: 698 self._names.append(obj['name']) 699 except KeyError: 700 # pseudo-objects from a delimiter query don't have names 701 continue 702 else: 703 self._objects.append(obj) 704 self.container = container
705
706 - def __getitem__(self, key):
707 return Object(self.container, object_record=self._objects[key])
708
709 - def __getslice__(self, i, j):
710 return [Object(self.container, object_record=k) \ 711 for k in self._objects[i:j]]
712
713 - def __contains__(self, item):
714 return item in self._objects
715
716 - def __len__(self):
717 return len(self._objects)
718
719 - def __repr__(self):
720 return 'ObjectResults: %s objects' % len(self._objects)
721 __str__ = __repr__ 722
723 - def index(self, value, *args):
724 """ 725 returns an integer for the first index of value 726 """ 727 return self._names.index(value, *args)
728
729 - def count(self, value):
730 """ 731 returns the number of occurrences of value 732 """ 733 return self._names.count(value)
734 735 # vim:set ai sw=4 ts=4 tw=0 expandtab: 736