# # # $Id: ahttplib.py,v 1.36 2004/03/23 03:19:40 aweil Exp $ # # # # ######################################################### # # # # - It would be nice REPLACE putheader with putheaders(dic) # # # # ######################################################### # import socket import errno import async2 import types #our target completer checks types.. import SimpleChannel #used in http_sock to write/read to a string.. SSL_ERRORSET = [socket.SSL_ERROR_EOF,\ socket.SSL_ERROR_INVALID_ERROR_CODE, socket.SSL_ERROR_SSL,\ socket.SSL_ERROR_SYSCALL, socket.SSL_ERROR_WANT_CONNECT,\ socket.SSL_ERROR_WANT_READ, socket.SSL_ERROR_WANT_WRITE,\ socket.SSL_ERROR_WANT_X509_LOOKUP, socket.SSL_ERROR_ZERO_RETURN] HTTP_PORT = 80 newline = '\r\n' _header = '%s: %s'+newline # Errors _state_error = 'Invalid state for requested operation' _cant_connect = 'Can\'t connect' enable_log = False def log(s): global enable_log if enable_log: print s def __make_http_socket_over ( dispatcher_specie ): """ This function makes http_socket over an asyncronous socket dispatcher given. Specially crafted to be called with: dispatcher / ssldispatcher """ class _int_http_socket(dispatcher_specie): _init = 'init' _connected = 'connected' _closed = 'closed' def __init__(self, target, request=''): """target should be (host, port)""" dispatcher_specie.__init__(self) self._status = self.__class__._init self.create_socket(socket.AF_INET, socket.SOCK_STREAM) self.target=target self.buffer = request #we are the writer! self._reset_recv('') self.connect( target ) def __str__ (self): return '<%s httpsocket->%s>'%(self.status(),repr(self.target)) def status (self): return self._status def connected (self): return self.status() == self.__class__._connected def closed (self): return self.status() == self.__class__._closed def _reset_recv(self, data): self.channel = SimpleChannel.SimpleChannel() self.writeon = self.channel.get_writer() self.append(data) def append_data(self, request): #if self.closed: # raise Exception('http socket closed!') self.buffer+=request def handle_connect(self): self._status = self.__class__._connected def append(self, str): """ Appends a string to received data """ self.writeon.write(str) def handle_read(self): try: data = self.recv(8192) self.append(data) #log('data:'+repr(data)) except socket.sslerror, e: print 'SSLError ->',str(e) if e[0] == socket.SSL_ERROR_EOF: self.handle_close() elif e[0] in SSL_ERRORSET: self.handle_error() except Exception, e: print '\rException reading!!!:',e #raise def writable(self): return len(self.buffer) > 0 def handle_write(self): try: sent = self.send(self.buffer) self.buffer = self.buffer[sent:] except socket.sslerror, e: print 'SSLError ->',str(e) if e[0] == socket.SSL_ERROR_EOF: self.handle_close() elif e[0] in SSL_ERRORSET: self.handle_error() except Exception, e: print '\rException writting!!!:',e def handle_close(self): self._status = self.__class__._closed #XXX TODO def set_socket(self, socket): ret = dispatcher_specie.set_socket(self, socket) self.channel = SimpleChannel.SimpleChannel() self.writeon = self.channel.get_writer() return ret #__make_http_socket_over's body: return _int_http_socket class ObjectMerger: """ Object 'Merger'. Returns an instance that takes methods from two given instances/objects. In order, as if one have the subclass methods of the other. """ def __init__(self, super, _self): self.___super = super self.___self = _self def __getattr__ ( self, name ): if not name.startswith('___'): super = self.___super _self = self.___self if hasattr(_self, name): return getattr(_self, name) elif hasattr(super, name): return getattr(super, name) return getattr(ObjectMerger, name) class AttrXlater: """Attribute translator""" def __init__(self, _self, methodxchange={}): self.___mxlate = methodxchange self.___self = _self def __getattr__ ( self, name ): _self = self.___self if not name.startswith('___'): if self.___mxlate.has_key(name): name = self.___mxlate[name] if hasattr(_self, name): return getattr(_self, name) return getattr(AttrXlater, name) class ssldispatcher(async2.dispatcher): def handle_connect_event(self): try: sslsock = socket.ssl(self.socket) #no certs this time, sorry except Exception, e: print 'EXCEPTION!!! not catching!', e print 'calling handle expt event()' self.handle_expt_event() else: #we must use the ssl from here.. oldsocket = self.socket newsocket = AttrXlater( \ ObjectMerger(oldsocket, sslsock), \ {'recv':'read','send':'write'}) self.set_socket(newsocket) #everything went fine, so.. go on! async2.dispatcher.handle_connect_event(self) class multidispatcher(async2.dispatcher): """kind of weird protocol stack""" def __init__(self, sock=None, map=None): async2.dispatcher.__init__(self, sock=None, map=None) self.socketstack = [] def push_socket(self, newsocket): self.socketstack.append(self.set_socket(newsocket)) def pop_socket(self): return self.set_socket(self.socketstack.pop()) http_socket = __make_http_socket_over (multidispatcher) https_socket = __make_http_socket_over (ssldispatcher) class ConnectionException(Exception): pass class ResponseException(Exception): def __init__(self, str): self.value = str def __str__(self): return repr(self.value) class HttpResponseReader: """ HttpResponseReader main objetive is to recognize different fields from a http request's answer and provide it. """ # response status _response = 'response' _headers = 'headers' _body = 'body' _finished = 'finished' def __init__(self, stream, errors='strict'): self._status = HttpResponseReader._response self.stream = stream self.errors = errors self.buffer = '' self.headers = {} self.code = None self.__clen = None self.rcode = 0 #default response code self.httpver = '1.0' #internal vars self.statusmsg = '' self.remain = '' def status(self): return self._status def finished(self): return self.status()==HttpResponseReader._finished def __str__ (self): return 'Response([%s,%d,%s],%s,%s,%s)' % \ (self.httpver, self.rcode, self.statusmsg[:10], \ str(self.headers)[:10], self.buffer[:15], self.remain[:10]) def read(self, size=1024*1024): self.buffer+=self.stream.read(size) cont=True while cont: cont = self.peek() if self.finished(): return self return '' def peek(self): """ Tries to read one piece of data from our buffer Returns True if made some change, and False if no modified anything """ if self._status==HttpResponseReader._finished: #print 'finished' return False elif self._status==HttpResponseReader._response: #print 'in response' sp = self.buffer.split(newline,1) if len(sp)<>2: return False statusline, self.buffer = sp log('status line:'+statusline) try: self.httpver, statusline=statusline.split(' ',1) try: rcode, self.statusline=statusline.split(' ',1) self.rcode = int(rcode) if self.rcode>=400: self._status=HttpResponseReader._finished return False if self.rcode>=500: self._status=HttpResponseReader._finished return False except: pass except: pass self._status=HttpResponseReader._headers elif self._status==HttpResponseReader._headers: #print 'in headers' sp = self.buffer.split(newline,1) if len(sp)==2: header, self.buffer = sp else: return False if len(header)==0: if self.rcode==100: self._status=HttpResponseReader._response elif self.rcode<400: self._status=HttpResponseReader._body else: raise ResponseException('Response code=%d(%s)'% \ (self.rcode,header)) return True sp = header.split(': ',1) if len(sp)<>2: #reset would be welcomed raise ResponseException('Invalid header: "%s"'%header) self.headers[sp[0]]=sp[1] elif self._status==HttpResponseReader._body: #print 'in body' if self.__clen==None: try: self.__clen = int(self.headers['Content-Length']) except: #if there's no Content-Length header, we finish. #returning this object, providing read() function #to return received contents! log('no content-length') self._status = HttpResponseReader._finished return True clen=self.__clen log('len %d - clen %d'%(len(self.buffer),clen)) if len(self.buffer)>=clen: self.remain = self.buffer[clen:] self.buffer = self.buffer[:clen] self._status = HttpResponseReader._finished else: return False return True def show(self): print '\r'+(70*'-') print 'received: ',repr(self.buffer+'.. ') print 'statusmsg',self.statusline, ' rcode:',self.rcode print 'buffer',self.buffer print 'headers',self.headers print 'remain',self.remain def complete_peer(dst, port=80, proto='http'): if type(dst)==types.StringType: dst = (proto, dst, port) return dst _default_http_ver = '1.0' _http_ver_11 = '1.1' _HTTP_HOST_HDR = 'Host' class AHttpConnection: # status _idle = 'idle' _connected = 'connected' _finished = 'finished' socket_class = http_socket default_port = HTTP_PORT default_proto = 'http' def __init__ ( self, dst, proxy=None, http_version=_default_http_ver): self.sock=None self.headers={} self.response=None self.respvalue=None self._status=AHttpConnection._idle self.httpver = http_version superc = self.__class__ self.dst = complete_peer(dst, port=superc.default_port, \ proto=superc.default_proto) ## ##by request variables ## #self._headers_sent = {} # contains headers sent in last request #now, send it always! self._headers_sent = False self.version = http_version def request( self, request, method='GET', headers={}, data='', version=None): """Makes a complete request calling subfunctions""" self.putrequest(request, method, headers=headers, version=version) self.endheaders() self.send(data+newline) # medium level functions def connect ( self ): if self.status()==AHttpConnection._connected: return self.sock = self.__class__.socket_class( (self.dst[1],self.dst[2]) ) self._status = AHttpConnection._connected def putrequest( self, request, method='GET', headers={}, version=None): if version==None: version=self.httpver self.version = version self.connect() _request = method + ' ' + request + ' HTTP/'+ version self._send(_request+newline) for key in headers.keys(): self.putheader(key, headers[key]) def putheader( self, key, value ): self._send(_header%(key,value)) #self._headers_sent[key] = value self._headers_sent = True def endheaders( self ): global _HTTP_HOST_HDR if self.version==_http_ver_11: #if not self._headers_sent.has_key(_HTTP_HOST_HDR): #now always send Host header! yeaph! self.putheader(_HTTP_HOST_HDR, self.dst[1]) if self._headers_sent: self._send(newline) self._headers_sent = {} def read( self ): return self.get_response() def get_response( self ): """ -If we don't have a connect, create it and read(). -If we have a connection: -read -keep it until it finish. -If finished: return it, else return none """ log('getresp') if self.status() <> AHttpConnection._connected: #print 'not connected' log('not connected') raise ConnectionException('not connected') if self.response==None: log('build response reader') self.response=HttpResponseReader(self.sock.channel.get_reader()) if not self.response.finished(): log('not finished.. tring to read') self.response.read() if not self.response.finished(): return None log('finished.. go on!') #we finished return the response received! response=self.response self.response=None self.sock._reset_recv(response.remain) return response ## new functions def status (self): return self._status def finished( self ): return self.status()==AHttpConnection._finished def close ( self ): self.sock.close() self._status = AHttpConnection._finished def send( self, data ): """ our send function is non blocking """ self._send(data) ## ## internal functions ## def _send( self, data ): self.sock.append_data( data ) ## ## weird functions ## def signal_ssl_socket(self): """ This function switchs the socket used by asyncronous socket dispatcher """ prevsock = self.sock.socket try: #is this blocking? i think.. sslsock = socket.ssl(prevsock) #no certs this time, sorry except Exception, e: #we shoould raise something heavy here #'cause it used to skip connect() exceptions print 'EXCEPTION!!! not catching!', e print 're raising error!' raise else: #we must use the ssl from here.. newsocket = AttrXlater( \ ObjectMerger(prevsock, sslsock), \ {'recv':'read','send':'write'}) self.sock.push_socket(newsocket) return True #everything went fine, so.. go on! class AHttpsConnection(AHttpConnection): socket_class = https_socket default_port = 443 default_proto = 'https' class AProxyHttpConnection(AHttpConnection): _sslidle = 'idle' _sslhandshake = 'sslhands' _sslconnected = 'connected' def __init__ ( self, dst, proxy, http_version=_default_http_ver, \ proxy_http_ver=None): #setup target to know the kind of end-connection. self.target= complete_peer(dst) self.proxy = complete_peer(proxy) AHttpConnection.__init__( self, dst, http_version) self.cached_request = None self.sslstat = AProxyHttpConnection._sslidle self._proxied_headers = None #keeps sent headers.. self.proxy_http_version = proxy_http_ver def connect ( self ): if self.status()==AHttpConnection._connected: return self.sock = self.__class__.socket_class( (self.proxy[1],self.proxy[2]) ) self._status = AHttpConnection._connected def putrequest( self, request, method='GET', headers={}, version=None): if self.target[0]=='http': #simplest proxy connection, append proxy _request= 'http://'+str(self.target[1]) if self.proxy[2]<>80: _request+=':'+str(self.target[2]) if request[0]<>'/': _request+='/' _request += request elif self.target[0]=='https': #self.proxy_http_version #we must switch method if self.sslstat <> AProxyHttpConnection._sslconnected: self.cached_request = (request, method, version, headers) method = 'CONNECT' _request = '%s:%d'%(self.target[1], self.target[2]) headers = {} else: #here the requests are directly to the destination host.. #rewrite no required! _request = request #finally all keeps calling the superclass.. AHttpConnection.putrequest(self, _request, method, headers, version) def get_response (self): if self.target[0]<>'https': return AHttpConnection.get_response(self) #we have an ssl connection.. what = self.sock.channel.st1.getvalue() #log('RECV<'+repr(what)+'>') if self.sslstat == AProxyHttpConnection._sslidle: r = AHttpConnection.get_response(self) if r<>None: self.sslstat = AProxyHttpConnection._sslhandshake self.response = r #we setup the response again.. return None #keep connecting.. elif self.sslstat == AProxyHttpConnection._sslhandshake: if self.signal_ssl_socket(): self.sslstat = AProxyHttpConnection._sslconnected return None elif self.sslstat == AProxyHttpConnection._sslconnected: #inject inside request if self.cached_request<>None: new = self.cached_request self.cached_request = None ##request, method='GET', headers={}, data='', version=None ##Internally we know that our sslstat is connected.. self.request(request=new[0], method=new[1], headers=new[3], \ version=new[2]) #return '' self.response = None return AHttpConnection.get_response(self) def get_system_proxy_config(proto='http'): pdic = {'http':('http','localhost', 3128)} #return ('192.168.254.254', 80) return pdic[proto] def mini_loop(conn): #some connection to loop r = None while r==None: async2.looponce(0.5) try: r = conn.get_response() except ResponseException, e: print e log(str(e)) except Exception, e: print e pass return r if __name__=='__main__': import unittest import sys #for argv import StringIO #for Response tests class testHttpResponseReader(unittest.TestCase): """This class tests handling of responses given by http servers""" def test_resp_reader_1(self): """Test HttpResponseReader()""" test='' test+='HTTP/1.1 200 OK\r\n' test+='Server: Microsoft-IIS/5.0\r\n' test+='Date: Wed, 11 Feb 2004 00:58:39 GMT\r\n' test+='X-MSN-Messenger: SessionID=367754021.32470; GW-IP=' \ '207.46.110.25\r\n' test+='Content-Length: 18\r\n' test+='Content-type: application/x-msn-messenger\r\n' test+='\r\n' test+='VER 1 MSNP8 CVR0\r\n' ios = StringIO.StringIO(test) rr = HttpResponseReader(ios) r = rr.read() self.failIf(r=='') #print r def test_resp_reader_2(self): """Test2 HttpResponseReader()""" # # This empty response caused troubles # test ='HTTP/1.1 200 OK\r\n' test+='Server: Microsoft-IIS/5.0\r\n' test+='Date: Tue, 17 Feb 2004 04:59:04 GMT\r\n' test+='X-MSN-Messenger: SessionID=646347394.16934; GW-IP=' \ '207.46.110.47\r\n' test+='Content-Length: 0\r\n' test+='Content-type: application/x-msn-messenger\r\n' test+='\r\n' #print 'len is:',len(test) assert len(test)==210 ios = StringIO.StringIO(test) rr = HttpResponseReader(ios) r = rr.read() self.failIf(r=='') #print r def test_resp_reader_3(self): """Test3 HttpResponseReader()""" test ='HTTP/1.1 100 Continue\r\n' test+='Server: Microsoft-IIS/5.0\r\n' test+='Date: Tue, 17 Feb 2004 04:59:04 GMT\r\n' test+='\r\n' test+='HTTP/1.1 200 OK\r\n' test+='Server: Microsoft-IIS/5.0\r\n' test+='Date: Tue, 17 Feb 2004 04:59:04 GMT\r\n' test+='X-MSN-Messenger: SessionID=646347394.16934; GW-IP=' \ '207.46.110.47\r\n' test+='Content-Length: 0\r\n' test+='Content-type: application/x-msn-messenger\r\n' test+='\r\n' #print 'len is:',len(test) ios = StringIO.StringIO(test) rr = HttpResponseReader(ios) r = rr.read() self.failIf(r=='') #print r def test_new(self): """Test4 HttpResponseReader() - A wrong response..""" test ='HTTP/1.1 200 OK\r\n' test+='Server: Microsoft-IIS/4.0\r\n' test+='Content-Location: https://32.104.16.39/Default.htm\r\n' test+='Date: Sun, 07 Mar 2004 01:22:59 GMT\r\n' test+='Content-Type: text/html\r\n' test+='Accept-Ranges: bytes\r\n' test+='Last-Modified: Mon, 08 Jul 2002 13:40:42 GMT\r\n' test+='ETag: "06992d8526c21:1b2b"\r\n' test+='Content-Length: 287\r\n' test+='\r\n' test+='\r\n' test+='\r\n' test+='\r\n' test+=''') class testHttpConnection(unittest.TestCase): """This class tests simplest http servers connection""" def test_simplest_connection(self): """1 Test AHttpConnection()""" print dsthost = 'localhost' if len(sys.argv)>1: dsthost = sys.argv[1] h = AHttpConnection(dsthost, http_version='1.0') h.connect() h.request('/') r = mini_loop(h) assert r<>None self.failIf(r==None) print 'received: ',repr(r.buffer[:20]+'..') h.close() def test_simple_connection(self): """1 Test AHttpConnection()/1.1""" print dsthost = 'localhost' if len(sys.argv)>1: dsthost = sys.argv[1] h = AHttpConnection(dsthost, http_version='1.1') h.connect() h.request('/') r = mini_loop(h) assert r<>None self.failIf(r==None) print 'received: ',repr(r.buffer[:20]+'..') h.close() class testProxyHttpConnection(unittest.TestCase): """Test some different proxy usage""" def test_proxy_connection(self): """ 2 Test AHttpConnection() - via proxy""" print h = AProxyHttpConnection('www.google.com', \ proxy=get_system_proxy_config(), http_version='1.0') h.connect() h.request('/') r = mini_loop(h) self.failIf(r==None) print 'received: ',repr(r.buffer[:20]+'.. ') h.sock.close() def test_proxy_ssl_connection(self): """3 Test AHttpConnection() -> proxied ssl""" h = AProxyHttpConnection( ('https','www.bostonaccess.com.ar', 443),\ proxy=('http','localhost',8888), http_version='1.0') h.connect() h.request('/',method='GET') r = mini_loop(h) self.failIf(r==None) print print 'received: ',repr(r.buffer[:20]+'.. ') self.failIf(not \ r.buffer.startswith('