Scrapyソース注記--CookiesMiddleware
3742 ワード
class CookiesMiddleware(object):
"""
Scrapy . jars CookieJar dict.
web server cookie, jars , request ,
.
CookiesMiddleware Spider cookie. Request meta cookiejar
spider cookie session. cookie jar(session),
。
:
for i, url in enumerate(urls):
yield scrapy.Request("http://www.example.com", meta={'cookiejar': i},callback=self.parse_page)
:meta cookiejar cookiejar , cookiejar CookiesMiddleware
jars
"""
def __init__(self, debug=False):
self.jars = defaultdict(CookieJar)
self.debug = debug
@classmethod
def from_crawler(cls, crawler):
# COOKIES_ENABLED True, CookiesMiddleware
# COOKIES_DEBUG False, ,Scrapy request(Cookie )
# cookies response cookies(Set-Cookie )。
if not crawler.settings.getbool('COOKIES_ENABLED'):
raise NotConfigured
return cls(crawler.settings.getbool('COOKIES_DEBUG'))
def process_request(self, request, spider):
if request.meta.get('dont_merge_cookies', False):
return
# request meta cookiejar, cookiejarkey .
# cookiejarkey None
cookiejarkey = request.meta.get("cookiejar")
# jars key cookiejar . {None: cookiejar}
jar = self.jars[cookiejarkey]
# _get_request_cookies()
cookies = self._get_request_cookies(jar, request)
for cookie in cookies:
jar.set_cookie_if_ok(cookie, request)
# set Cookie header
request.headers.pop('Cookie', None)
# cookie request headers
jar.add_cookie_header(request)
self._debug_cookie(request, spider)
def process_response(self, request, response, spider):
if request.meta.get('dont_merge_cookies', False):
return response
# extract cookies from Set-Cookie and drop invalid/expired cookies
cookiejarkey = request.meta.get("cookiejar")
jar = self.jars[cookiejarkey]
# (?), response cookie cookiejar
jar.extract_cookies(response, request)
self._debug_set_cookie(response, spider)
return response
...
...
def _format_cookie(self, cookie):
# cookie
cookie_str = '%s=%s' % (cookie['name'], cookie['value'])
if cookie.get('path', None):
cookie_str += '; Path=%s' % cookie['path']
if cookie.get('domain', None):
cookie_str += '; Domain=%s' % cookie['domain']
return cookie_str
def _get_request_cookies(self, jar, request):
# request cookies cookie cookiejar
if isinstance(request.cookies, dict):
cookie_list = [{'name': k, 'value': v} for k, v in \
six.iteritems(request.cookies)]
else:
cookie_list = request.cookies
cookies = [self._format_cookie(x) for x in cookie_list]
headers = {'Set-Cookie': cookies}
# cookie
response = Response(request.url, headers=headers)
# cookiejar.make_cookies response cookie cookiejar .
return jar.make_cookies(response, request)
CookiesMiddlewareのデフォルトでは、要求応答間のクッキーのフローと充填が実現する.また、
scrapy.Request(url, meta={'cookiejar': n})
により単一Spiderマルチクッキーを実現することもできる.前のブログ「Scrapyフレームワーク--cookieの取得/伝達/ローカル保存」の疑問も、ソースコードを読むことで解いた.