Package cloudfiles :: Module storage_object
[frames] | no frames]

Source Code for Module cloudfiles.storage_object

  1  """ 
  2  Object operations 
  3   
  4  An Object is analogous to a file on a conventional filesystem. You can 
  5  read data from, or write data to your Objects. You can also associate 
  6  arbitrary metadata with them. 
  7   
  8  See COPYING for license information. 
  9  """ 
 10   
 11  try: 
 12      from hashlib import md5 
 13  except ImportError: 
 14      from md5 import md5 
 15  import StringIO 
 16  import mimetypes 
 17  import os 
 18   
 19  from urllib  import quote 
 20  from errors  import ResponseError, NoSuchObject, \ 
 21                      InvalidObjectName, IncompleteSend, \ 
 22                      InvalidMetaName, InvalidMetaValue 
 23   
 24  from socket  import timeout 
 25  import consts 
 26  from utils   import requires_name 
27 28 # Because HTTPResponse objects *have* to have read() called on them 29 # before they can be used again ... 30 # pylint: disable-msg=W0612 31 32 -class Object(object):
33 """ 34 Storage data representing an object, (metadata and data). 35 36 @undocumented: _make_headers 37 @undocumented: _name_check 38 @undocumented: _initialize 39 @undocumented: compute_md5sum 40 @undocumented: __get_conn_for_write 41 @ivar name: the object's name (generally treat as read-only) 42 @type name: str 43 @ivar content_type: the object's content-type (set or read) 44 @type content_type: str 45 @ivar metadata: metadata associated with the object (set or read) 46 @type metadata: dict 47 @ivar size: the object's size (cached) 48 @type size: number 49 @ivar last_modified: date and time of last file modification (cached) 50 @type last_modified: str 51 @ivar container: the object's container (generally treat as read-only) 52 @type container: L{Container} 53 """ 54 # R/O support of the legacy objsum attr. 55 objsum = property(lambda self: self._etag) 56
57 - def __set_etag(self, value):
58 self._etag = value 59 self._etag_override = True
60 61 etag = property(lambda self: self._etag, __set_etag) 62
63 - def __init__(self, container, name=None, force_exists=False, object_record=None):
64 """ 65 Storage objects rarely if ever need to be instantiated directly by the 66 user. 67 68 Instead, use the L{create_object<Container.create_object>}, 69 L{get_object<Container.get_object>}, 70 L{list_objects<Container.list_objects>} and other 71 methods on its parent L{Container} object. 72 """ 73 self.container = container 74 self.last_modified = None 75 self.metadata = {} 76 if object_record: 77 self.name = object_record['name'] 78 self.content_type = object_record['content_type'] 79 self.size = object_record['bytes'] 80 self.last_modified = object_record['last_modified'] 81 self._etag = object_record['hash'] 82 self._etag_override = False 83 else: 84 self.name = name 85 self.content_type = None 86 self.size = None 87 self._etag = None 88 self._etag_override = False 89 if not self._initialize() and force_exists: 90 raise NoSuchObject(self.name)
91 92 @requires_name(InvalidObjectName)
93 - def read(self, size=-1, offset=0, hdrs=None, buffer=None, callback=None):
94 """ 95 Read the content from the remote storage object. 96 97 By default this method will buffer the response in memory and 98 return it as a string. However, if a file-like object is passed 99 in using the buffer keyword, the response will be written to it 100 instead. 101 102 A callback can be passed in for reporting on the progress of 103 the download. The callback should accept two integers, the first 104 will be for the amount of data written so far, the second for 105 the total size of the transfer. Note: This option is only 106 applicable when used in conjunction with the buffer option. 107 108 >>> test_object.write('hello') 109 >>> test_object.read() 110 'hello' 111 112 @param size: combined with offset, defines the length of data to be read 113 @type size: number 114 @param offset: combined with size, defines the start location to be read 115 @type offset: number 116 @param hdrs: an optional dict of headers to send with the request 117 @type hdrs: dictionary 118 @param buffer: an optional file-like object to write the content to 119 @type buffer: file-like object 120 @param callback: function to be used as a progress callback 121 @type callback: callable(transferred, size) 122 @rtype: str or None 123 @return: a string of all data in the object, or None if a buffer is used 124 """ 125 self._name_check() 126 if size > 0: 127 range = 'bytes=%d-%d' % (offset, (offset + size) - 1) 128 if hdrs: 129 hdrs['Range'] = range 130 else: 131 hdrs = {'Range': range} 132 response = self.container.conn.make_request('GET', 133 path = [self.container.name, self.name], hdrs = hdrs) 134 if (response.status < 200) or (response.status > 299): 135 buff = response.read() 136 raise ResponseError(response.status, response.reason) 137 138 if hasattr(buffer, 'write'): 139 scratch = response.read(8192) 140 transferred = 0 141 142 while len(scratch) > 0: 143 buffer.write(scratch) 144 transferred += len(scratch) 145 if callable(callback): 146 callback(transferred, self.size) 147 scratch = response.read(8192) 148 return None 149 else: 150 return response.read()
151
152 - def save_to_filename(self, filename, callback=None):
153 """ 154 Save the contents of the object to filename. 155 156 >>> container = connection['container1'] 157 >>> obj = container.get_object('backup_file') 158 >>> obj.save_to_filename('./backup_file') 159 160 @param filename: name of the file 161 @type filename: str 162 @param callback: function to be used as a progress callback 163 @type callback: callable(transferred, size) 164 """ 165 try: 166 fobj = open(filename, 'wb') 167 self.read(buffer=fobj, callback=callback) 168 finally: 169 fobj.close()
170 171 @requires_name(InvalidObjectName)
172 - def stream(self, chunksize=8192, hdrs=None):
173 """ 174 Return a generator of the remote storage object's data. 175 176 Warning: The HTTP response is only complete after this generator 177 has raised a StopIteration. No other methods can be called until 178 this has occurred. 179 180 >>> test_object.write('hello') 181 >>> test_object.stream() 182 <generator object at 0xb77939cc> 183 >>> '-'.join(test_object.stream(chunksize=1)) 184 'h-e-l-l-o' 185 186 @param chunksize: size in bytes yielded by the generator 187 @type chunksize: number 188 @param hdrs: an optional dict of headers to send in the request 189 @type hdrs: dict 190 @rtype: str generator 191 @return: a generator which yields strings as the object is downloaded 192 """ 193 self._name_check() 194 response = self.container.conn.make_request('GET', 195 path = [self.container.name, self.name], hdrs = hdrs) 196 if response.status < 200 or response.status > 299: 197 buff = response.read() 198 raise ResponseError(response.status, response.reason) 199 buff = response.read(chunksize) 200 while len(buff) > 0: 201 yield buff 202 buff = response.read(chunksize) 203 # I hate you httplib 204 buff = response.read()
205 206 @requires_name(InvalidObjectName)
207 - def sync_metadata(self):
208 """ 209 Commits the metadata to the remote storage system. 210 211 >>> test_object = container['paradise_lost.pdf'] 212 >>> test_object.metadata = {'author': 'John Milton'} 213 >>> test_object.sync_metadata() 214 215 Object metadata can be set and retrieved through the object's 216 .metadata attribute. 217 """ 218 self._name_check() 219 if self.metadata: 220 headers = self._make_headers() 221 headers['Content-Length'] = 0 222 response = self.container.conn.make_request( 223 'POST', [self.container.name, self.name], hdrs=headers, data='' 224 ) 225 buff = response.read() 226 if response.status != 202: 227 raise ResponseError(response.status, response.reason)
228
229 - def __get_conn_for_write(self):
230 headers = self._make_headers() 231 232 headers['X-Auth-Token'] = self.container.conn.token 233 234 path = "/%s/%s/%s" % (self.container.conn.uri.rstrip('/'), \ 235 quote(self.container.name), quote(self.name)) 236 237 # Requests are handled a little differently for writes ... 238 http = self.container.conn.connection 239 240 # TODO: more/better exception handling please 241 http.putrequest('PUT', path) 242 for hdr in headers: 243 http.putheader(hdr, headers[hdr]) 244 http.putheader('User-Agent', consts.user_agent) 245 http.endheaders() 246 return http
247 248 # pylint: disable-msg=W0622 249 @requires_name(InvalidObjectName)
250 - def write(self, data='', verify=True, callback=None):
251 """ 252 Write data to the remote storage system. 253 254 By default, server-side verification is enabled, (verify=True), and 255 end-to-end verification is performed using an md5 checksum. When 256 verification is disabled, (verify=False), the etag attribute will 257 be set to the value returned by the server, not one calculated 258 locally. When disabling verification, there is no guarantee that 259 what you think was uploaded matches what was actually stored. Use 260 this optional carefully. You have been warned. 261 262 A callback can be passed in for reporting on the progress of 263 the upload. The callback should accept two integers, the first 264 will be for the amount of data written so far, the second for 265 the total size of the transfer. 266 267 >>> test_object = container.create_object('file.txt') 268 >>> test_object.content_type = 'text/plain' 269 >>> fp = open('./file.txt') 270 >>> test_object.write(fp) 271 272 @param data: the data to be written 273 @type data: str or file 274 @param verify: enable/disable server-side checksum verification 275 @type verify: boolean 276 @param callback: function to be used as a progress callback 277 @type callback: callable(transferred, size) 278 """ 279 self._name_check() 280 if isinstance(data, file): 281 # pylint: disable-msg=E1101 282 try: 283 data.flush() 284 except IOError: 285 pass # If the file descriptor is read-only this will fail 286 self.size = int(os.fstat(data.fileno())[6]) 287 else: 288 data = StringIO.StringIO(data) 289 self.size = data.len 290 291 # If override is set (and _etag is not None), then the etag has 292 # been manually assigned and we will not calculate our own. 293 294 if not self._etag_override: 295 self._etag = None 296 297 if not self.content_type: 298 # pylint: disable-msg=E1101 299 type = None 300 if hasattr(data, 'name'): 301 type = mimetypes.guess_type(data.name)[0] 302 self.content_type = type and type or 'application/octet-stream' 303 304 http = self.__get_conn_for_write() 305 306 response = None 307 transfered = 0 308 running_checksum = md5() 309 310 buff = data.read(4096) 311 try: 312 while len(buff) > 0: 313 http.send(buff) 314 if verify and not self._etag_override: 315 running_checksum.update(buff) 316 buff = data.read(4096) 317 transfered += len(buff) 318 if callable(callback): 319 callback(transfered, self.size) 320 response = http.getresponse() 321 buff = response.read() 322 except timeout, err: 323 if response: 324 # pylint: disable-msg=E1101 325 buff = response.read() 326 raise err 327 else: 328 if verify and not self._etag_override: 329 self._etag = running_checksum.hexdigest() 330 331 # ---------------------------------------------------------------- 332 333 if (response.status < 200) or (response.status > 299): 334 raise ResponseError(response.status, response.reason) 335 336 # If verification has been disabled for this write, then set the 337 # instances etag attribute to what the server returns to us. 338 if not verify: 339 for hdr in response.getheaders(): 340 if hdr[0].lower() == 'etag': 341 self._etag = hdr[1]
342 343 @requires_name(InvalidObjectName)
344 - def send(self, iterable):
345 """ 346 Write potentially transient data to the remote storage system using a 347 generator or stream. 348 349 If the object's size is not set, chunked transfer encoding will be 350 used to upload the file. 351 352 If the object's size attribute is set, it will be used as the 353 Content-Length. If the generator raises StopIteration prior to yielding 354 the right number of bytes, an IncompleteSend exception is raised. 355 356 If the content_type attribute is not set then a value of 357 application/octet-stream will be used. 358 359 Server-side verification will be performed if an md5 checksum is 360 assigned to the etag property before calling this method, 361 otherwise no verification will be performed, (verification 362 can be performed afterward though by using the etag attribute 363 which is set to the value returned by the server). 364 365 >>> test_object = container.create_object('backup.tar.gz') 366 >>> pfd = os.popen('tar -czvf - ./data/', 'r') 367 >>> test_object.send(pfd) 368 369 @param iterable: stream or generator which yields the content to upload 370 @type iterable: generator or stream 371 """ 372 self._name_check() 373 374 if hasattr(iterable, 'read'): 375 def file_iterator(file): 376 chunk = file.read(4095) 377 while chunk: 378 yield chunk 379 chunk = file.read(4095) 380 raise StopIteration()
381 iterable = file_iterator(iterable) 382 383 # This method implicitly diables verification 384 if not self._etag_override: 385 self._etag = None 386 387 if not self.content_type: 388 self.content_type = 'application/octet-stream' 389 390 path = "/%s/%s/%s" % (self.container.conn.uri.rstrip('/'), \ 391 quote(self.container.name), quote(self.name)) 392 headers = self._make_headers() 393 if self.size is None: 394 del headers['Content-Length'] 395 headers['Transfer-Encoding'] = 'chunked' 396 headers['X-Auth-Token'] = self.container.conn.token 397 headers['User-Agent'] = consts.user_agent 398 http = self.container.conn.connection 399 http.putrequest('PUT', path) 400 for key, value in headers.iteritems(): 401 http.putheader(key, value) 402 http.endheaders() 403 404 response = None 405 transferred = 0 406 try: 407 for chunk in iterable: 408 if self.size is None: 409 http.send("%X\r\n" % len(chunk)) 410 http.send(chunk) 411 http.send("\r\n") 412 else: 413 http.send(chunk) 414 transferred += len(chunk) 415 if self.size is None: 416 http.send("0\r\n\r\n") 417 # If the generator didn't yield enough data, stop, drop, and roll. 418 elif transferred < self.size: 419 raise IncompleteSend() 420 response = http.getresponse() 421 buff = response.read() 422 except timeout, err: 423 if response: 424 # pylint: disable-msg=E1101 425 buff = response.read() 426 raise err 427 428 if (response.status < 200) or (response.status > 299): 429 raise ResponseError(response.status, response.reason) 430 431 for hdr in response.getheaders(): 432 if hdr[0].lower() == 'etag': 433 self._etag = hdr[1]
434
435 - def load_from_filename(self, filename, verify=True, callback=None):
436 """ 437 Put the contents of the named file into remote storage. 438 439 >>> test_object = container.create_object('file.txt') 440 >>> test_object.content_type = 'text/plain' 441 >>> test_object.load_from_filename('./my_file.txt') 442 443 @param filename: path to the file 444 @type filename: str 445 @param verify: enable/disable server-side checksum verification 446 @type verify: boolean 447 @param callback: function to be used as a progress callback 448 @type callback: callable(transferred, size) 449 """ 450 fobj = open(filename, 'rb') 451 self.write(fobj, verify=verify, callback=callback) 452 fobj.close()
453
454 - def _initialize(self):
455 """ 456 Initialize the Object with values from the remote service (if any). 457 """ 458 if not self.name: 459 return False 460 461 response = self.container.conn.make_request( 462 'HEAD', [self.container.name, self.name] 463 ) 464 buff = response.read() 465 if response.status == 404: 466 return False 467 if (response.status < 200) or (response.status > 299): 468 raise ResponseError(response.status, response.reason) 469 for hdr in response.getheaders(): 470 if hdr[0].lower() == 'content-type': 471 self.content_type = hdr[1] 472 if hdr[0].lower().startswith('x-object-meta-'): 473 self.metadata[hdr[0][14:]] = hdr[1] 474 if hdr[0].lower() == 'etag': 475 self._etag = hdr[1] 476 self._etag_override = False 477 if hdr[0].lower() == 'content-length': 478 self.size = int(hdr[1]) 479 if hdr[0].lower() == 'last-modified': 480 self.last_modified = hdr[1] 481 return True
482
483 - def __str__(self):
484 return self.name
485
486 - def _name_check(self):
487 if len(self.name) > consts.object_name_limit: 488 raise InvalidObjectName(self.name)
489
490 - def _make_headers(self):
491 """ 492 Returns a dictionary representing http headers based on the 493 respective instance attributes. 494 """ 495 headers = {} 496 headers['Content-Length'] = self.size and self.size or 0 497 if self._etag: headers['ETag'] = self._etag 498 499 if self.content_type: headers['Content-Type'] = self.content_type 500 else: headers['Content-Type'] = 'application/octet-stream' 501 502 for key in self.metadata: 503 if len(key) > consts.meta_name_limit: 504 raise(InvalidMetaName(key)) 505 if len(self.metadata[key]) > consts.meta_value_limit: 506 raise(InvalidMetaValue(self.metadata[key])) 507 headers['X-Object-Meta-'+key] = self.metadata[key] 508 return headers
509 510 @classmethod
511 - def compute_md5sum(cls, fobj):
512 """ 513 Given an open file object, returns the md5 hexdigest of the data. 514 """ 515 checksum = md5() 516 buff = fobj.read(4096) 517 while buff: 518 checksum.update(buff) 519 buff = fobj.read(4096) 520 fobj.seek(0) 521 return checksum.hexdigest()
522
523 - def public_uri(self):
524 """ 525 Retrieve the URI for this object, if its container is public. 526 527 >>> container1 = connection['container1'] 528 >>> container1.make_public() 529 >>> container1.create_object('file.txt').write('testing') 530 >>> container1['file.txt'].public_uri() 531 'http://c00061.cdn.cloudfiles.rackspacecloud.com/file.txt' 532 533 @return: the public URI for this object 534 @rtype: str 535 """ 536 return "%s/%s" % (self.container.public_uri().rstrip('/'), 537 quote(self.name))
538
539 -class ObjectResults(object):
540 """ 541 An iterable results set object for Objects. 542 543 This class implements dictionary- and list-like interfaces. 544 """
545 - def __init__(self, container, objects=None):
546 self._objects = objects and objects or list() 547 self._names = [obj['name'] for obj in self._objects] 548 self.container = container
549
550 - def __getitem__(self, key):
551 return Object(self.container, object_record=self._objects[key])
552
553 - def __getslice__(self, i, j):
554 return [Object(self.container, object_record=k) for k in self._objects[i:j]]
555
556 - def __contains__(self, item):
557 return item in self._objects
558
559 - def __len__(self):
560 return len(self._objects)
561
562 - def __repr__(self):
563 return 'ObjectResults: %s objects' % len(self._objects)
564 __str__ = __repr__ 565
566 - def index(self, value, *args):
567 """ 568 returns an integer for the first index of value 569 """ 570 return self._names.index(value, *args)
571
572 - def count(self, value):
573 """ 574 returns the number of occurrences of value 575 """ 576 return self._names.count(value)
577 578 # vim:set ai sw=4 ts=4 tw=0 expandtab: 579