1
2 """Asynchronous HTTP/1.1 client library
3
4 This module is an attempt to combine the best features of httplib with
5 the scalability of asynchat.
6
7 I have pasted as much code as I could from httplib (Python 2.0) because it
8 is a well written and widely used interface. This may be a mistake,
9 because the behavior of AsynchHTTPConnection os quite different from that of
10 httplib.HTTPConnection"""
11
12 __contact__="""
13 Doug Fort <dougfort@downright.com>
14 Senior Meat Manager
15 Downright Software LLC
16 http://www.dougfort.com
17 """
18 __author__="""
19 Downright Software LLC
20 http://www.downright.com
21 """
22 __copyright__="""
23 Copyright (c) 2001 Downright Software LLC. All Rights Reserved.
24
25 Distributed and Licensed under the provisions of the Python Open Source License
26 Agreement which is included by reference. (See 'Front Matter' in the latest
27 Python documentation)
28
29 WARRANTIES
30 YOU UNDERSTAND AND AGREE THAT:
31
32 a. YOUR USE OF THE PACKAGE IS AT YOUR SOLE RISK. THE PACKAGE IS PROVIDED ON
33 AN 'AS IS' AND 'AS AVAILABLE' BASIS. DOWNRIGHT EXPRESSLY DISCLAIMS ALL
34 WARRANTIES OF ANY KIND, WHETHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED
35 TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
36 AND NON-INFRINGEMENT.
37
38 b. DOWNRIGHT MAKES NO WARRANTY THAT (1) THE PACKAGE WILL MEET YOUR
39 REQUIREMENTS, (2) THE PACKAGE WILL BE UNINTERRUPTED, TIMELY, SECURE, OR
40 ERROR-FREE, (3) THE RESULTS THAT MAY BE OBTAINED FROM THE USE OF THE PACKAGE
41 WILL BE ACCURATE OR RELIABLE, (4) THE OTHER MATERIAL PURCHASED OR OBTAINED BY
42 YOU THROUGH THE PACKAGE WILL MEET YOUR EXPECTATIONS,, AND (5) ANY ERRORS IN
43 THE PACKAGE WILL BE CORRECTED.
44
45 c. ANY MATERIALS DOWNLOADED OR OTHERWISE OBTAINED THROUGH THE USE OF THE
46 PACKAGE IS DONE AT YOUR OWN DISCRETION AND RISK AND THAT YOU WILL BE SOLELY
47 RESPONSIBLE FOR ANY DAMAGE TO YOUR COMPUTER SYSTEM OR LOSS OF DATA THAT
48 RESULTS FROM THE DOWNLOAD OF ANY SUCH MATERIAL.
49
50 d. NO ADVICE OR INFORMATION, WHETHER ORAL OR WRITTEN, OBTAINED BY YOU FROM
51 DOWNRIGHT OR THROUGH OR FROM THE PACKAGE SHALL CREATE ANY WARRANTY NOT
52 EXPRESSLY STATED IN THE TOS.
53
54 LIMITATION OF LIABILITY
55 YOU EXPRESSLY UNDERSTAND AND AGREE THAT DOWNRIGHT SHALL NOT BE LIABLE FOR ANY
56 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, CONSEQUENTIAL OR EXEMPLARY DAMAGES,
57 INCLUDING BUT NOT LIMITED TO, DAMAGES FOR LOSS OF PROFITS, GOODWILL, USE,
58 DATA OR OTHER INTANGIBLE LOSSES (EVEN IF DOWNRIGHT HAS BEEN ADVISED OF SUCH
59 DAMAGES), RESULTING FROM:
60 (1) THE USE OR THE INABILITY TO USE THE PACKAGE;
61 (2) THE COST OF PROCUREMENT OF SUBSTITUTE GOODS AND SERVICES RESULTING FROM
62 ANY GOODS, DATA, INFORMATION OR SERVICES PURCHASED OR OBTAINED OR MESSAGES
63 RECEIVED OR TRANSACTIONS ENTERED INTO THROUGH OR FROM THE PACKAGE;
64 (3) UNAUTHORIZED ACCESS TO OR ALTERATION OF YOUR TRANSMISSIONS OR DATA;
65 (4) STATEMENTS OF CONDUCT OF ANY THIRD PARTY ON THE PACKAGE; OR
66 (5) ANY OTHER MATTER RELATING TO THE PACKAGE.
67 """
68 __version__="0.20"
69
70 import sys
71 import asynchat
72 import asyncore
73 import socket
74 import time
75 import string
76 import cStringIO
77 import mimetools
78
79 HTTP_PORT = 80
80 HTTPS_PORT = 443
81
82
84 """
85 This class attempts to mimic HTTPResponse from httplib.
86 The major difference is that it is NOT DYNAMIC:
87 All the reading has already been done
88 """
90 """
91 This constructor builds everything in the response
92 object except the body. It expects a file object
93 containing the header text returnded by the server
94 """
95 self.debuglevel = debuglevel
96
97
98 self._replyline = fp.readline()
99 if self.debuglevel > 0:
100 print "reply: %s" % (self._replyline)
101
102 replylist = string.split(self._replyline, None, 2)
103
104 if len(replylist) == 3:
105 version, status, reason = replylist
106 elif len(replylist) == 2:
107 version, status = replylist
108 reason = ""
109 else:
110 raise BadStatusLine(self._replyline, name=str(self))
111
112 if version[:5] != 'HTTP/':
113 raise BadStatusLine(self._replyline, name=str(self))
114
115 try:
116 self.status = int(status)
117 except:
118 raise BadStatusLine(self._replyline, name=str(self))
119
120 self.reason = string.strip(reason)
121
122 if version == 'HTTP/1.0':
123 self.version = 10
124 elif version.startswith('HTTP/1.'):
125 self.version = 11
126 else:
127 raise UnknownProtocol(self._replyline, name=str(self))
128
129 self.msg = mimetools.Message(fp, 0)
130 if self.debuglevel > 0:
131 for hdr in self.msg.headers:
132 print "header: %s" % (string.strip(hdr))
133
134 self.body = None
135
137 return "AsyncHTTPResponse %s" % (self._replyline)
138
143
145 if self.body is None:
146 raise ResponseNotReady(name=str(self))
147 return self.body
148
149 _CHUNK_REQUEST_SIZE = 8192
150
151 _STATE_IDLE = "asynchttp._STATE_IDLE"
152 _STATE_CONNECTING = "asynchttp._STATE_CONNECTING"
153 _STATE_ACTIVE = "asynchttp._STATE_ACTIVE"
154 _STATE_ACCEPTING_HEADERS = "asynchttp._STATE_ACCEPTING_HEADERS"
155 _STATE_REQUESTING_BODY = "asynchttp._STATE_REQUESTING_BODY"
156 _STATE_CHUNK_START = "asynchttp._STATE_CHUNK_START"
157 _STATE_CHUNK_BODY = "asynchttp._STATE_CHUNK_BODY"
158 _STATE_CHUNK_RESIDUE = "asynchttp._STATE_CHUNK_RESIDUE"
159
161
162 _http_vsn = 11
163 _http_vsn_str = 'HTTP/1.1'
164
165 response_class = AsyncHTTPResponse
166 default_port = HTTP_PORT
167 auto_open = 1
168 debuglevel = 0
169
170 - def __init__(self, host=None, port=None):
201
203 if host and port is None:
204 i = string.find(host, ':')
205 if i >= 0:
206 port = int(host[i+1:])
207 host = host[:i]
208 else:
209 port = self.default_port
210
211 self.host = host
212 self.port = port
213
216
218 """
219 Connect to the host and port specified in __init__.
220 Add ourselves to thhe asyncore polling group
221 """
222 self.__set_state(_STATE_CONNECTING)
223 self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
224 if self.debuglevel > 0:
225 print "connecting: (%s, %s)" % (self.host, self.port)
226
227 asyncore.dispatcher.connect(self, (self.host, self.port))
228
230 """
231 Close the connection to the HTTP server.
232 And remove ourselves from the asyncore polling group
233 """
234 if self.debuglevel > 0:
235 print "asynchttp.close() (%s, %s)" % (self.host, self.port)
236
237 self.connected = 0
238
239 if self.socket:
240 asynchat.async_chat.close(self)
241
242 self._set_hostport(None, None)
243
245 """
246 Send `str' to the server.
247 Actually, we just append str to the block of text to be sent
248 to the server when getresponse is called.
249
250 Note: the name was changed from httplib's 'HTTPConnection.send()'
251 because it conflicts with asynchat
252 """
253 if self.debuglevel > 0:
254 print "send_entity %s"
255
256 self._requestfp.write(str)
257
259 """Send a request to the server.
260
261 `method' specifies an HTTP request method, e.g. 'GET'.
262 `url' specifies the object being requested, e.g. '/index.html'.
263
264 This function actually only starts accumulating the request:
265 nothing gets sent to the server until getresponse() is called.
266 """
267 if self.debuglevel > 0:
268 print "putrequest %s %s" % (method, url)
269
270 if not self.__state is _STATE_ACTIVE:
271 raise RequestNotReady(
272 "Invalid putrequest() %s" % (self.__state),
273 name=str(self)
274 )
275
276 self._requestfp = cStringIO.StringIO()
277
278 if not url:
279 url = '/'
280 self._requestfp.write(
281 '%s %s %s\r\n' % (method, url, self._http_vsn_str)
282 )
283
284 self._headerdict = {}
285
286 if self._http_vsn == 11:
287
288
289
290
291
292
293
294
295 self.putheader('Host', self.host)
296
297
298
299
300
301
302
303
304
305 self.putheader('Accept-Encoding', 'identity')
306
307
308
309
310
311
312
313
314
316 """
317 Send a request header line to the server.
318
319 For example: h.putheader('Accept', 'text/html')
320 We don't actually send the header here, we stick it
321 in a dictionary, to be sent when getresponse() is
322 called. If you call putheader() with a duplicate
323 key, it will wipe out the existing entry.
324 """
325 if self.debuglevel > 0:
326 print "putheader %s: %s" % (header, value)
327
328 self._headerdict[header] = value
329
331 """
332 Indicate that the last header line has been sent to the server.
333 Actually, we just copy the header dictionary into the request
334 stream to be sent when getresponse() is called.
335 """
336 if self.debuglevel > 0:
337 print "endheaders"
338
339 for header, value in self._headerdict.items():
340 self._requestfp.write(
341 '%s: %s\r\n' % (header, value)
342 )
343
344 self._requestfp.write('\r\n')
345
346 - def request(self, method, url, body=None, headers={}):
347 """
348 Send a complete request to the server.
349 """
350 if self.debuglevel > 0:
351 print "request"
352
353 self._send_request(method, url, body, headers)
354
356 if self.debuglevel > 0:
357 print "_send_request"
358
359 self.putrequest(method, url)
360
361 if body:
362 self.putheader('Content-Length', str(len(body)))
363
364 for hdr, value in headers.items():
365 self.putheader(hdr, value)
366
367 self.endheaders()
368
369 if body:
370 self.send_entity(body)
371
373 """
374 Get the response from the server.
375 This actually starts the process of sending the request
376 to the server. The response will be delivered in handle_response
377 """
378 self.__set_state(_STATE_ACCEPTING_HEADERS)
379
380 self.push(self._requestfp.getvalue())
381
382 self._requestfp = None
383
384
385 self.set_terminator("\r\n\r\n")
386
387 self._responsefp = cStringIO.StringIO()
388
390 """
391 Notification from asyncore that we are connected
392 """
393 self.__set_state(_STATE_ACTIVE)
394 if self.debuglevel > 0:
395 print "connected: (%s, %s)" % (self.host, self.port)
396
398 """
399 Notification from asyncore that the server has closed
400 its end of the connection.
401 If auto_open is TRUE, we will attempt to reopen the
402 connection.
403 """
404 if self.debuglevel > 0:
405 print "closed by server: (%s, %s) %s" % (
406 self.host, self.port, self.__state
407 )
408
409
410
411
412 if self.__state in [
413 _STATE_REQUESTING_BODY,
414 _STATE_CHUNK_BODY,
415 _STATE_CHUNK_RESIDUE
416 ]:
417 self.found_terminator()
418 return
419
420 asynchat.async_chat.handle_close(self)
421
422
423 if AsyncHTTPConnection.auto_open and self.host:
424 self.connect()
425
427
428 try:
429 data = self.recv (self.ac_in_buffer_size)
430 except socket.error, why:
431 self.handle_error()
432 return
433
434 self.ac_in_buffer = self.ac_in_buffer + data
435
436
437
438
439
440
441 while self.ac_in_buffer:
442 lb = len(self.ac_in_buffer)
443 terminator = self.get_terminator()
444 if terminator is None:
445
446 self.collect_incoming_data (self.ac_in_buffer)
447 self.ac_in_buffer = ''
448 elif type(terminator) == type(0):
449
450 n = terminator
451 if lb < n:
452 self.collect_incoming_data (self.ac_in_buffer)
453 self.ac_in_buffer = ''
454 self.terminator = self.terminator - lb
455 else:
456 self.collect_incoming_data (self.ac_in_buffer)
457 self.ac_in_buffer = ''
458 self.terminator = 0
459 self.found_terminator()
460 else:
461
462
463
464
465
466
467
468 terminator_len = len(terminator)
469 index = string.find (self.ac_in_buffer, terminator)
470 if index != -1:
471
472 if index > 0:
473
474 self.collect_incoming_data (self.ac_in_buffer[:index])
475 self.ac_in_buffer = self.ac_in_buffer[index+terminator_len:]
476
477 self.found_terminator()
478 else:
479
480 index = asynchat.find_prefix_at_end (self.ac_in_buffer, terminator)
481 if index:
482 if index != lb:
483
484 self.collect_incoming_data (self.ac_in_buffer[:-index])
485 self.ac_in_buffer = self.ac_in_buffer[-index:]
486 break
487 else:
488
489 self.collect_incoming_data (self.ac_in_buffer)
490 self.ac_in_buffer = ''
491
492
499
501 """
502 asynchat calls this with data as it comes in
503 """
504 if not self._responsefp:
505 raise UnexpectedData(
506 "%s '%s' '%s' '%s'" % (
507 self.__state,
508 data,
509 self.get_terminator(),
510 self.ac_in_buffer
511 ), name=str(self))
512
513 self._responsefp.write(data)
514
516 """
517 overload asynchat.found_terminator
518 This function will only be called when someone is badly confused
519 """
520 raise UnexpectedTerminator(
521 "%s '%s'" % (self.__state, self.get_terminator()),
522 name=str(self)
523 )
524
526 """
527 overload asynchat.found_terminator for
528 _STATE_ACCEPTING_HEADERS
529 We assume that we have hit the blank line terminator after the
530 HTTP response headers.
531 """
532 self._responsefp.seek(0)
533 self.response = self.response_class(
534 self._responsefp,
535 self.debuglevel
536 )
537
538 self._willclose = string.lower(
539 self.response.getheader("connection", "")
540 ) == "close"
541
542 transferencoding = string.lower(
543 self.response.getheader("transfer-encoding", "")
544 )
545
546
547 self._responsefp = cStringIO.StringIO()
548
549 if transferencoding:
550 if transferencoding == "chunked":
551 self._chunkfp = cStringIO.StringIO()
552 self.set_terminator("\r\n")
553 self.__set_state(_STATE_CHUNK_START)
554 return
555
556 raise UnknownTransferEncoding(
557 self.response.getheader("transfer-encoding", ""),
558 name=str(self)
559 )
560
561 contentlengthstr = self.response.getheader(
562 "content-length", None
563 )
564 if contentlengthstr:
565 contentlength = int(contentlengthstr)
566 else:
567 contentlength = None
568
569 self.set_terminator(contentlength)
570 self.__set_state(_STATE_REQUESTING_BODY)
571
572 - def _body_data(self):
573 """
574 overload asynchat.found_terminator for
575 _STATE_REQUESTING_BODY
576 We assume that we have the full body text
577 """
578 self.response.body = self._responsefp.getvalue()
579 self._responsefp = None
580
581 if self._willclose:
582 self.close()
583
584 self.__set_state(_STATE_ACTIVE)
585
586
587 self.handle_response()
588
590 """
591 Assume that chunkbuffer contains some text, begining with
592 a line containing the chunk size in hex.
593 """
594
595
596 splitlist = self._chunkbuffer.lstrip().split("\r\n",1)
597 if len(splitlist) == 1:
598 chunkline, self._chunkbuffer = splitlist[0], ''
599 else:
600 chunkline, self._chunkbuffer = splitlist
601
602 i = string.find(chunkline, ';')
603 if i >= 0:
604 chunkline = chunkline[:i]
605
606 try:
607 chunksize = string.atoi(chunkline, 16)
608 except:
609 raise InvalidChunk(
610 "Can't compute chunk size from '%s' '%s'" % (
611 chunkline, self._chunkbuffer
612 ))
613
614 if self.debuglevel > 0:
615 print "chunksize = '%d" % (chunksize)
616
617 return chunksize
618
620 """
621 overload asynchat.found_terminator for
622 _STATE_CHUNKED_START
623 Assumes we got a hit on terminator '\r\n'
624 """
625 self._chunkbuffer = self._responsefp.getvalue()
626 self._chunksize = self._get_chunk_size()
627 if self._chunksize == 0:
628 if self.debuglevel > 0:
629 print "0 size Chunk: ending chunk processing"
630 self.response.body = self._chunkfp.getvalue()
631 self._chunkfp = None
632 self.set_terminator("\r\n\r\n")
633 self._responsefp = cStringIO.StringIO()
634 self.__set_state(_STATE_CHUNK_RESIDUE)
635 return
636
637 self.set_terminator(self._chunksize+2)
638 self._responsefp = cStringIO.StringIO()
639 self.__set_state(_STATE_CHUNK_BODY)
640
642 """
643 overload asynchat.found_terminator for
644 _STATE_CHUNK_BODY
645 """
646 self._chunkbuffer += self._responsefp.getvalue()
647
648 while self._chunkbuffer:
649 chunk_plus_crlf_size = self._chunksize+2
650 if len(self._chunkbuffer) > chunk_plus_crlf_size:
651 chunkbody = self._chunkbuffer[:chunk_plus_crlf_size]
652 self._chunkbuffer = self._chunkbuffer[chunk_plus_crlf_size:]
653 self._chunkbuffer = self._chunkbuffer.lstrip()
654 else:
655 chunkbody = self._chunkbuffer
656 self._chunkbuffer = ''
657
658 self._chunkfp.write(chunkbody)
659
660 if not self._chunkbuffer:
661 break
662
663
664
665 if self._chunkbuffer.find("\r\n") < 0:
666 self._responsefp = cStringIO.StringIO()
667 self.set_terminator("\r\n")
668 self.__set_state(_STATE_CHUNK_START)
669 return
670
671 self._chunksize = self._get_chunk_size()
672 if self._chunksize == 0:
673 if self.debuglevel > 0:
674 print "0 size Chunk: ending chunk processing"
675 self.response.body = self._chunkfp.getvalue()
676 self._chunkfp = None
677
678
679
680
681 if self._chunkbuffer:
682 self._chunkbuffer = ""
683 self.__set_state(_STATE_ACTIVE)
684
685 if self._willclose:
686 self.close()
687
688
689 self.handle_response()
690 return
691
692
693
694
695 self.set_terminator("\r\n\r\n")
696 self._responsefp = cStringIO.StringIO()
697 self.__set_state(_STATE_CHUNK_RESIDUE)
698 return
699
700
701
702
703 chunk_plus_crlf_size = self._chunksize+2
704 bufsize = len(self._chunkbuffer)
705 if bufsize < chunk_plus_crlf_size:
706 self.set_terminator(chunk_plus_crlf_size - bufsize)
707 self._responsefp = cStringIO.StringIO()
708 self.__set_state(_STATE_CHUNK_BODY)
709 return
710
711
712
713
714
715
716
717 self._responsefp = cStringIO.StringIO()
718 self.set_terminator("\r\n")
719 self.__set_state(_STATE_CHUNK_START)
720
722 """
723 overload asynchat.found_terminator for
724 _STATE_CHUNK_RESIDUE
725 """
726 residue = string.strip(self._responsefp.getvalue())
727 if self.debuglevel > 0 and residue:
728 print "chunk residue '%s'" % (residue)
729
730 self._responsefp = None
731
732 if self._willclose:
733 self.close()
734
735 self.__set_state(_STATE_ACTIVE)
736
737
738 self.handle_response()
739
741 """
742 This is an abstract function, the user MUST overload it
743 """
744 raise HandleResponse(
745 "Call to AsyncHTTPConnection.handle_response", name=str(self)
746 )
747
749 """
750 Change state be setting _found_terminator
751 """
752 if self.debuglevel > 0:
753 print "%s to %s" % (self.__state, next_state)
754 self.__state = next_state
755 self.found_terminator = self._TERMINATOR_MAP[self.__state]
756
757
759 - def __init__(self, message="", name=""):
760 self._message = message
761 self._name = name
762
764 return "%s %s" % (self._name, self._message)
765
768
771
774
777
780
783
786
789
792
795
798
815
816 if __name__ == "__main__":
817 """
818 Code for commandline testing
819 """
820 if len(sys.argv) < 4:
821 print "Usage: asynchttp.py <host> <port> <request>"
822 sys.exit(-1)
823
824 tester = __test_AsyncHTTPConnection(
825 sys.argv[1],
826 int(sys.argv[2]),
827 sys.argv[3]
828 )
829
830 tester.connect()
831
832 asyncore.loop()
833
834 if not hasattr(tester, "response"):
835 print "No rsponse"
836 sys.exit(-1)
837
838
839
840
841
842
843
844
845
846
847
848 if tester.response.status == 200:
849
850 print tester.response.body
851