Package restkit :: Module http
[hide private]
[frames] | no frames]

Source Code for Module restkit.http

  1  # -*- coding: utf-8 - 
  2  # 
  3  # This file is part of restkit released under the MIT license.  
  4  # See the NOTICE for more information. 
  5   
  6  import os 
  7  import re 
  8  import sys 
  9  import urlparse 
 10  import zlib 
 11   
 12  try: 
 13      from cStringIO import StringIO 
 14  except ImportError: 
 15      from StringIO import StringIO 
 16   
 17  from .datastructures import MultiDict 
 18  from .errors import NoMoreData, ChunkMissingTerminator, \ 
 19  InvalidChunkSize, InvalidRequestLine, InvalidHTTPVersion, \ 
 20  InvalidHTTPStatus, InvalidHeader, InvalidHeaderName, HeaderLimit 
 21   
 22   
23 -class Unreader(object):
24 - def __init__(self, sock, max_chunk=8192):
25 self.buf = StringIO() 26 self.sock = sock 27 self.max_chunk = max_chunk
28
29 - def _data(self):
30 return self.sock.recv(self.max_chunk)
31
32 - def read(self, size=None):
33 if size is not None and not isinstance(size, (int, long)): 34 raise TypeError("size parameter must be an int or long.") 35 if size == 0: 36 return "" 37 if size < 0: 38 size = None 39 40 self.buf.seek(0, os.SEEK_END) 41 42 if size is None and self.buf.tell(): 43 ret = self.buf.getvalue() 44 self.buf = StringIO() 45 return ret 46 if size is None: 47 return self._data() 48 49 while self.buf.tell() < size: 50 data = self._data() 51 if not len(data): 52 ret = self.buf.getvalue() 53 self.buf = StringIO() 54 return ret 55 self.buf.write(data) 56 57 data = self.buf.getvalue() 58 self.buf = StringIO() 59 self.buf.write(data[size:]) 60 return data[:size]
61
62 - def unread(self, data):
63 self.buf.seek(0, os.SEEK_END) 64 self.buf.write(data)
65
66 -class ChunkedReader(object):
67 - def __init__(self, req, unreader):
68 self.unreader = unreader 69 self.req = req 70 self.parser = self.parse_chunked(unreader) 71 self.buf = StringIO()
72
73 - def read(self, size):
74 if not isinstance(size, (int, long)): 75 raise TypeError("size must be an integral type") 76 if size <= 0: 77 raise ValueError("Size must be positive.") 78 if size == 0: 79 return "" 80 81 if self.parser: 82 while self.buf.tell() < size: 83 try: 84 self.buf.write(self.parser.next()) 85 except StopIteration: 86 self.parser = None 87 break 88 89 data = self.buf.getvalue() 90 ret, rest = data[:size], data[size:] 91 self.buf.truncate(0) 92 self.buf.write(rest) 93 return ret
94
95 - def parse_trailers(self, unreader, data, eof=False):
96 buf = StringIO() 97 buf.write(data) 98 99 idx = buf.getvalue().find("\r\n\r\n") 100 done = buf.getvalue()[:2] == "\r\n" 101 102 while idx < 0 and not done: 103 self.get_data(unreader, buf) 104 idx = buf.getvalue().find("\r\n\r\n") 105 done = buf.getvalue()[:2] == "\r\n" 106 if done: 107 unreader.unread(buf.getvalue()[2:]) 108 return "" 109 self.req.trailers = self.req.parse_headers(buf.getvalue()[:idx]) 110 unreader.unread(buf.getvalue()[idx+4:])
111
112 - def parse_chunked(self, unreader):
113 (size, rest) = self.parse_chunk_size(unreader) 114 while size > 0: 115 while size > len(rest): 116 size -= len(rest) 117 yield rest 118 rest = unreader.read() 119 if not rest: 120 raise NoMoreData() 121 yield rest[:size] 122 # Remove \r\n after chunk 123 rest = rest[size:] 124 while len(rest) < 2: 125 rest += unreader.read() 126 if rest[:2] != '\r\n': 127 raise ChunkMissingTerminator(rest[:2]) 128 (size, rest) = self.parse_chunk_size(unreader, data=rest[2:])
129
130 - def parse_chunk_size(self, unreader, data=None):
131 buf = StringIO() 132 if data is not None: 133 buf.write(data) 134 135 idx = buf.getvalue().find("\r\n") 136 while idx < 0: 137 self.get_data(unreader, buf) 138 idx = buf.getvalue().find("\r\n") 139 140 data = buf.getvalue() 141 line, rest_chunk = data[:idx], data[idx+2:] 142 143 chunk_size = line.split(";", 1)[0].strip() 144 try: 145 chunk_size = int(chunk_size, 16) 146 except ValueError: 147 raise InvalidChunkSize(chunk_size) 148 149 if chunk_size == 0: 150 try: 151 self.parse_trailers(unreader, rest_chunk) 152 except NoMoreData: 153 pass 154 return (0, None) 155 return (chunk_size, rest_chunk)
156
157 - def get_data(self, unreader, buf):
158 data = unreader.read() 159 if not data: 160 raise NoMoreData() 161 buf.write(data)
162 163
164 -class LengthReader(object):
165 - def __init__(self, req, unreader, length):
166 self.req = req 167 self.unreader = unreader 168 self.length = length
169
170 - def read(self, size):
171 if not isinstance(size, (int, long)): 172 raise TypeError("size must be an integral type") 173 174 size = min(self.length, size) 175 if size < 0: 176 raise ValueError("Size must be positive.") 177 if size == 0: 178 return "" 179 180 buf = StringIO() 181 data = self.unreader.read() 182 while data: 183 buf.write(data) 184 if buf.tell() >= size: 185 break 186 data = self.unreader.read() 187 188 189 buf = buf.getvalue() 190 ret, rest = buf[:size], buf[size:] 191 self.unreader.unread(rest) 192 self.length -= size 193 return ret
194
195 -class EOFReader(object):
196 - def __init__(self, req, unreader):
197 self.req = req 198 self.unreader = unreader 199 self.buf = StringIO() 200 self.finished = False
201
202 - def read(self, size):
203 if not isinstance(size, (int, long)): 204 raise TypeError("size must be an integral type") 205 if size < 0: 206 raise ValueError("Size must be positive.") 207 if size == 0: 208 return "" 209 210 if self.finished: 211 data = self.buf.getvalue() 212 ret, rest = data[:size], data[size:] 213 self.buf.truncate(0) 214 self.buf.write(rest) 215 return ret 216 217 data = self.unreader.read() 218 while data: 219 self.buf.write(data) 220 if self.buf.tell() > size: 221 break 222 data = self.unreader.read() 223 224 if not data: 225 self.finished = True 226 227 data = self.buf.getvalue() 228 ret, rest = data[:size], data[size:] 229 self.buf.truncate(0) 230 self.buf.write(rest) 231 return ret
232
233 -class Body(object):
234 - def __init__(self, reader):
235 self.reader = reader 236 self.buf = StringIO() 237 self.closed = False
238
239 - def __iter__(self):
240 return self
241
242 - def next(self):
243 ret = self.readline() 244 if not ret: 245 raise StopIteration() 246 return ret
247
248 - def discard(self):
249 data = self.read(8192) 250 while data: 251 data = self.read()
252
253 - def getsize(self, size):
254 if size is None: 255 return sys.maxint 256 elif not isinstance(size, (int, long)): 257 raise TypeError("size must be an integral type") 258 elif size < 0: 259 return sys.maxint 260 return size
261
262 - def read(self, size=None):
263 size = self.getsize(size) 264 if size == 0: 265 return "" 266 267 if size < self.buf.tell(): 268 data = self.buf.getvalue() 269 ret, rest = data[:size], data[size:] 270 self.buf.truncate(0) 271 self.buf.write(rest) 272 return ret 273 274 while size > self.buf.tell(): 275 data = self.reader.read(1024) 276 if not len(data): 277 self.closed = True 278 break 279 self.buf.write(data) 280 281 data = self.buf.getvalue() 282 ret, rest = data[:size], data[size:] 283 self.buf.truncate(0) 284 self.buf.write(rest) 285 return ret
286
287 - def readline(self, size=None):
288 size = self.getsize(size) 289 if size == 0: 290 return "" 291 292 line = self.buf.getvalue() 293 idx = line.find("\n") 294 if idx >= 0: 295 ret = line[:idx+1] 296 self.buf.truncate(0) 297 self.buf.write(line[idx+1:]) 298 return ret 299 300 self.buf.truncate(0) 301 ch = "" 302 buf = [line] 303 lsize = len(line) 304 while lsize < size and ch != "\n": 305 ch = self.reader.read(1) 306 if not len(ch): 307 self.closed = True 308 break 309 lsize += 1 310 buf.append(ch) 311 return "".join(buf)
312
313 - def readlines(self, size=None):
314 ret = [] 315 data = self.read() 316 while len(data): 317 pos = data.find("\n") 318 if pos < 0: 319 ret.append(data) 320 data = "" 321 else: 322 line, data = data[:pos+1], data[pos+1:] 323 ret.append(line) 324 return ret
325 326
327 -class GzipBody(Body):
328 - def __init__(self, reader):
329 super(GzipBody, self).__init__(reader) 330 self._d = zlib.decompressobj(16+zlib.MAX_WBITS)
331
332 - def _decompress(self, data):
333 return self._d.decompress(data)
334
335 - def read(self, size=None):
336 size = self.getsize(size) 337 if size == 0: 338 return "" 339 340 if size < self.buf.tell(): 341 data = self.buf.getvalue() 342 ret, rest = data[:size], data[size:] 343 self.buf.truncate(0) 344 self.buf.write(rest) 345 return self._decompress(ret) 346 347 while size > self.buf.tell(): 348 data = self.reader.read(1024) 349 if not len(data): 350 break 351 self.buf.write(data) 352 353 data = self.buf.getvalue() 354 ret, rest = data[:size], data[size:] 355 self.buf.truncate(0) 356 self.buf.write(rest) 357 return self._decompress(ret)
358
359 - def readline(self, size=None):
360 size = self.getsize(size) 361 if size == 0: 362 return "" 363 364 idx = self.buf.getvalue().find("\n") 365 while idx < 0: 366 data = self.reader.read(1024) 367 if not len(data): 368 break 369 self.buf.write(self._decompress(data)) 370 idx = self.buf.getvalue().find("\n") 371 if size < self.buf.tell(): 372 break 373 374 # If we didn't find it, and we got here, we've 375 # exceeded size or run out of data. 376 if idx < 0: 377 rlen = min(size, self.buf.tell()) 378 else: 379 rlen = idx + 1 380 381 # If rlen is beyond our size threshold, trim back 382 if rlen > size: 383 rlen = size 384 385 data = self.buf.getvalue() 386 ret, rest = data[:rlen], data[rlen:] 387 388 self.buf.truncate(0) 389 self.buf.write(rest) 390 return ret
391 392
393 -class DeflateBody(GzipBody):
394 - def __init__(self, reader):
395 super(DeflateBody, self).__init__(reader) 396 self._d = zlib.decompressobj()
397 398
399 -class Request(object):
400 - def __init__(self, unreader, decompress=True, 401 max_status_line_garbage=None, 402 max_header_count=0):
403 self.unreader = unreader 404 self.version = None 405 self.headers = MultiDict() 406 self.trailers = [] 407 self.body = None 408 self.encoding = None 409 self.status = None 410 self.reason = None 411 self.status_int = None 412 self.decompress = decompress 413 414 if max_status_line_garbage is None: 415 max_status_line_garbage = sys.maxint 416 self.max_status_line_garbage=max_status_line_garbage 417 418 self.max_header_count=max_header_count 419 420 self.versre = re.compile("HTTP/(\d+).(\d+)") 421 self.stare = re.compile("(\d{3})\s*(\w*)") 422 self.hdrre = re.compile("[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]") 423 424 unused = self.parse(self.unreader) 425 self.unreader.unread(unused) 426 self.set_body_reader()
427
428 - def get_data(self, unreader, buf, stop=False):
429 data = unreader.read() 430 if not data: 431 if stop: 432 raise StopIteration() 433 raise NoMoreData(buf.getvalue()) 434 buf.write(data)
435
436 - def parse(self, unreader):
437 buf = StringIO() 438 439 self.get_data(unreader, buf, stop=True) 440 441 # Parse request first line 442 # With HTTP/1.1 persistent connections, the problem arises 443 # that broken scripts could return a wrong Content-Length 444 # (there are more bytes sent than specified). Unfortunately, 445 # in some cases, this cannot be detected after the bad response, 446 # but only before the next one. So w retry to read the line 447 # until we go over max_status_line_garbage tries. 448 tries = 0 449 while True: 450 idx = buf.getvalue().find("\r\n") 451 while idx < 0: 452 self.get_data(unreader, buf) 453 idx = buf.getvalue().find("\r\n") 454 455 try: 456 self.parse_first_line(buf.getvalue()[:idx]) 457 break 458 except (InvalidRequestLine, InvalidHTTPVersion, 459 InvalidHTTPStatus), e: 460 if tries > self.max_status_line_garbage: 461 raise InvalidRequestLine("Status line not found %s" 462 % str(e)) 463 finally: 464 rest = buf.getvalue()[idx+2:] # Skip \r\n 465 buf.truncate(0) 466 buf.write(rest) 467 468 # increase number of tries 469 tries += 1 470 471 # parse headers 472 idx = buf.getvalue().find("\r\n\r\n") 473 done = buf.getvalue()[:2] == "\r\n" 474 while idx < 0 and not done: 475 self.get_data(unreader, buf) 476 idx = buf.getvalue().find("\r\n\r\n") 477 done = buf.getvalue()[:2] == "\r\n" 478 if done: 479 self.unreader.unread(buf.getvalue()[2:]) 480 return "" 481 482 self.headers = self.parse_headers(buf.getvalue()[:idx]) 483 484 ret = buf.getvalue()[idx+4:] 485 buf.truncate(0) 486 return ret
487
488 - def parse_first_line(self, line):
489 bits = line.split(None, 1) 490 if len(bits) != 2: 491 raise InvalidRequestLine(line) 492 493 # version 494 matchv = self.versre.match(bits[0]) 495 if matchv is None: 496 raise InvalidHTTPVersion(bits[0]) 497 self.version = (int(matchv.group(1)), int(matchv.group(2))) 498 499 # status 500 matchs = self.stare.match(bits[1]) 501 if matchs is None: 502 raise InvalidHTTPStatus(bits[1]) 503 504 self.status = bits[1] 505 self.status_int = int(matchs.group(1)) 506 self.reason = matchs.group(2)
507
508 - def parse_headers(self, data):
509 headers = MultiDict() 510 511 # Split lines on \r\n keeping the \r\n on each line 512 lines = [line + "\r\n" for line in data.split("\r\n")] 513 514 # Parse headers into key/value pairs paying attention 515 # to continuation lines. 516 hdr_count = 0 517 while len(lines): 518 if self.max_header_count and \ 519 hdr_count > self.max_header_count: 520 521 raise HeaderLimit(self.max_header_count) 522 523 # Parse initial header name : value pair. 524 curr = lines.pop(0) 525 if curr.find(":") < 0: 526 raise InvalidHeader(curr.strip()) 527 name, value = curr.split(":", 1) 528 name = name.rstrip(" \t") 529 if self.hdrre.search(name.upper()): 530 raise InvalidHeaderName(name) 531 name, value = name.strip(), [value.lstrip()] 532 533 # Consume value continuation lines 534 while len(lines) and lines[0].startswith((" ", "\t")): 535 value.append(lines.pop(0)) 536 value = ''.join(value).rstrip() 537 538 headers.add(name, value) 539 hdr_count += 1 540 541 return headers
542
543 - def set_body_reader(self):
544 clen = self.headers.iget('content-length') 545 te = self.headers.iget('transfer-encoding') 546 encoding = self.headers.iget('content-encoding') 547 548 chunked = False 549 clength = None 550 if clen is not None: 551 try: 552 clength = int(clen) 553 except ValueError: 554 pass 555 elif te is not None: 556 chunked = te.lower() == "chunked" 557 558 if encoding: 559 self.encoding = encoding.lower() 560 561 if chunked: 562 reader = ChunkedReader(self, self.unreader) 563 elif clength is not None: 564 reader = LengthReader(self, self.unreader, clength) 565 else: 566 reader = EOFReader(self, self.unreader) 567 568 if self.decompress and self.encoding in ('gzip', 'deflate',): 569 if self.encoding == "gzip": 570 self.body = GzipBody(reader) 571 else: 572 self.body = DeflateBody(reader) 573 else: 574 self.body = Body(reader)
575
576 - def should_close(self):
577 connection = self.headers.iget("connection") 578 579 if connection is not None: 580 if connection.lower().strip() == "close": 581 return True 582 elif connection.lower().strip() == "keep-alive": 583 return False 584 return self.version <= (1, 0)
585