[ ]
宙飒天下网 1. 用httptools解析请求和响应头
httptools是一个由cython写成的http解析工具.可以用于解析http请求和响应的参数.他们接口类似用法类似,本处以解析请求头部作为例子.
httptools通过注册回调函数来解析
import httptools
head = b'''GET / HTTP/1.1 Host: github.com Connection: keep-alive Cache-Control: max-age=0 Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 Accept-Encoding: gzip, deflate, sdch, br Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.6,fr;q=0.4,en;q=0.2,ja;q=0.2 Cookie: _octo=GH1.1.1989111283.1493917476; logged_in=yes; dotcom_user=hsz1273327; _gat=1; user_session=mrt4FoCQYKoRaeE7L3ZwWyGJiJHfA2SFq3C0BgftZ2tFrep9; __Host-user_session_same_site=mrt4FoCQYKoRaeE7L3ZwWyGJiJHfA2SFq3C0BgftZ2tFrep9; _gh_sess=eyJzZXNzaW9uX2lkIjoiMmVhYThhM2QxMDhiMjFiNzE5YjhjNGRlMTIyOTM2ZDIiLCJsYXN0X3JlYWRfZnJvbV9yZXBsaWNhcyI6MTQ5ODk3MTg4OTYyNCwiY29udGV4dCI6Ii8iLCJsYXN0X3dyaXRlIjoxNDk4OTIzMTgwNjQzLCJyZXR1cm5fdG8iOiIvcmJhcnJvaXMvZmFjdG9yeWJveSIsInNweV9yZXBvIjoiTWFnaWNTdGFjay9odHRwdG9vbHMiLCJzcHlfcmVwb19hdCI6MTQ5ODk3MTcyNSwiZmxhc2giOnsiZGlzY2FyZCI6WyJhbmFseXRpY3NfbG9jYXRpb24iXSwiZmxhc2hlcyI6eyJhbmFseXRpY3NfbG9jYXRpb24iOiIvZGFzaGJvYXJkIn19fQ%3D%3D--cc0fb066d6afd219a9483d1cb131a26c5608de88; _ga=GA1.2.442677063.1493917476; tz=Asia%2FShanghai '''
class Protocol: def __init__(self): self._header = {} self._parser = httptools.HttpRequestParser(self) self._total_request_size = 0 self.request_max_size = None @property def parser(self): return self._parser @property def header(self): return self._header def data_received(self, data): #self._parser = httptools.HttpRequestParser(self) self.parser.feed_data(data) def update_header(self,head): print(head) self._header.update(head) def on_header(self,name, value): m.update_header({name:value}) def on_headers_complete(self): print("header complete") def on_body(self, body): self._body = body def on_message_complete(self): print("message")
m = Protocol()
m.data_received(head)
{b'Host': b'github.com'} {b'Connection': b'keep-alive'} {b'Cache-Control': b'max-age=0'} {b'Upgrade-Insecure-Requests': b'1'} {b'User-Agent': b'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'} {b'Accept': b'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'} {b'Accept-Encoding': b'gzip, deflate, sdch, br'} {b'Accept-Language': b'zh-CN,zh;q=0.8,zh-TW;q=0.6,fr;q=0.4,en;q=0.2,ja;q=0.2'}
m.header
{b'Accept': b'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', b'Accept-Encoding': b'gzip, deflate, sdch, br', b'Accept-Language': b'zh-CN,zh;q=0.8,zh-TW;q=0.6,fr;q=0.4,en;q=0.2,ja;q=0.2', b'Cache-Control': b'max-age=0', b'Connection': b'keep-alive', b'Host': b'github.com', b'Upgrade-Insecure-Requests': b'1', b'User-Agent': b'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}
m.parser.get_http_version()
'1.1'
head = b'''GET / HTTP/1.1 Host: github.com Connection: keep-alive Cache-Control: max-age=0 Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 Accept-Encoding: gzip, deflate, sdch, br Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.6,fr;q=0.4,en;q=0.2,ja;q=0.2 Cookie: _octo=GH1.1.1989111283.1493917476; logged_in=yes; dotcom_user=hsz1273327; _gat=1; user_session=mrt4FoCQYKoRaeE7L3ZwWyGJiJHfA2SFq3C0BgftZ2tFrep9; __Host-user_session_same_site=mrt4FoCQYKoRaeE7L3ZwWyGJiJHfA2SFq3C0BgftZ2tFrep9; _gh_sess=eyJzZXNzaW9uX2lkIjoiMmVhYThhM2QxMDhiMjFiNzE5YjhjNGRlMTIyOTM2ZDIiLCJsYXN0X3JlYWRfZnJvbV9yZXBsaWNhcyI6MTQ5ODk3MTg4OTYyNCwiY29udGV4dCI6Ii8iLCJsYXN0X3dyaXRlIjoxNDk4OTIzMTgwNjQzLCJyZXR1cm5fdG8iOiIvcmJhcnJvaXMvZmFjdG9yeWJveSIsInNweV9yZXBvIjoiTWFnaWNTdGFjay9odHRwdG9vbHMiLCJzcHlfcmVwb19hdCI6MTQ5ODk3MTcyNSwiZmxhc2giOnsiZGlzY2FyZCI6WyJhbmFseXRpY3NfbG9jYXRpb24iXSwiZmxhc2hlcyI6eyJhbmFseXRpY3NfbG9jYXRpb24iOiIvZGFzaGJvYXJkIn19fQ%3D%3D--cc0fb066d6afd219a9483d1cb131a26c5608de88; _ga=GA1.2.442677063.1493917476; tz=Asia%2FShanghai '''
0
head = b'''GET / HTTP/1.1 Host: github.com Connection: keep-alive Cache-Control: max-age=0 Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 Accept-Encoding: gzip, deflate, sdch, br Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.6,fr;q=0.4,en;q=0.2,ja;q=0.2 Cookie: _octo=GH1.1.1989111283.1493917476; logged_in=yes; dotcom_user=hsz1273327; _gat=1; user_session=mrt4FoCQYKoRaeE7L3ZwWyGJiJHfA2SFq3C0BgftZ2tFrep9; __Host-user_session_same_site=mrt4FoCQYKoRaeE7L3ZwWyGJiJHfA2SFq3C0BgftZ2tFrep9; _gh_sess=eyJzZXNzaW9uX2lkIjoiMmVhYThhM2QxMDhiMjFiNzE5YjhjNGRlMTIyOTM2ZDIiLCJsYXN0X3JlYWRfZnJvbV9yZXBsaWNhcyI6MTQ5ODk3MTg4OTYyNCwiY29udGV4dCI6Ii8iLCJsYXN0X3dyaXRlIjoxNDk4OTIzMTgwNjQzLCJyZXR1cm5fdG8iOiIvcmJhcnJvaXMvZmFjdG9yeWJveSIsInNweV9yZXBvIjoiTWFnaWNTdGFjay9odHRwdG9vbHMiLCJzcHlfcmVwb19hdCI6MTQ5ODk3MTcyNSwiZmxhc2giOnsiZGlzY2FyZCI6WyJhbmFseXRpY3NfbG9jYXRpb24iXSwiZmxhc2hlcyI6eyJhbmFseXRpY3NfbG9jYXRpb24iOiIvZGFzaGJvYXJkIn19fQ%3D%3D--cc0fb066d6afd219a9483d1cb131a26c5608de88; _ga=GA1.2.442677063.1493917476; tz=Asia%2FShanghai '''
1
还没有评论,来说两句吧...