Scrapyソース注記--CookiesMiddleware

3742 ワード

class CookiesMiddleware(object):
    """
        Scrapy      .  jars         CookieJar   dict.
          web server   cookie,   jars ,     request     ,
            .

    CookiesMiddleware      Spider cookie.   Request meta   cookiejar    
    spider   cookie session.          cookie jar(session),         
            。
      :
    for i, url in enumerate(urls):
        yield scrapy.Request("http://www.example.com", meta={'cookiejar': i},callback=self.parse_page)
      :meta  cookiejar    cookiejar   ,   cookiejar   CookiesMiddleware 
      jars   
    """
    def __init__(self, debug=False):
        self.jars = defaultdict(CookieJar)
        self.debug = debug

    @classmethod
    def from_crawler(cls, crawler):
        # COOKIES_ENABLED    True,    CookiesMiddleware
        # COOKIES_DEBUG    False,    ,Scrapy      request(Cookie    ) 
        #   cookies response    cookies(Set-Cookie    )。
        if not crawler.settings.getbool('COOKIES_ENABLED'):
            raise NotConfigured
        return cls(crawler.settings.getbool('COOKIES_DEBUG'))

    def process_request(self, request, spider):
        if request.meta.get('dont_merge_cookies', False):
            return
        #    request meta    cookiejar, cookiejarkey      .
        #   cookiejarkey None
        cookiejarkey = request.meta.get("cookiejar")
        #      jars    key       cookiejar  .   {None: cookiejar}
        jar = self.jars[cookiejarkey]    
       #    _get_request_cookies()  
        cookies = self._get_request_cookies(jar, request)
        for cookie in cookies:
            jar.set_cookie_if_ok(cookie, request)
        # set Cookie header
        request.headers.pop('Cookie', None)
        #  cookie   request headers 
        jar.add_cookie_header(request)
        self._debug_cookie(request, spider)

    def process_response(self, request, response, spider):
        if request.meta.get('dont_merge_cookies', False):
            return response
        # extract cookies from Set-Cookie and drop invalid/expired cookies
        cookiejarkey = request.meta.get("cookiejar")
        jar = self.jars[cookiejarkey]
       #          (?), response   cookie     cookiejar
        jar.extract_cookies(response, request)
        self._debug_set_cookie(response, spider)

        return response
    ...
    ...

    def _format_cookie(self, cookie):
        #                 cookie     
        cookie_str = '%s=%s' % (cookie['name'], cookie['value'])

        if cookie.get('path', None):
            cookie_str += '; Path=%s' % cookie['path']
        if cookie.get('domain', None):
            cookie_str += '; Domain=%s' % cookie['domain']

        return cookie_str

    def _get_request_cookies(self, jar, request):
        #  request cookies     cookie      cookiejar 
        if isinstance(request.cookies, dict):
            cookie_list = [{'name': k, 'value': v} for k, v in \
                    six.iteritems(request.cookies)]
        else:
            cookie_list = request.cookies

        cookies = [self._format_cookie(x) for x in cookie_list]
        headers = {'Set-Cookie': cookies}
        #        cookie        
        response = Response(request.url, headers=headers)
        # cookiejar.make_cookies   response   cookie    cookiejar .
        return jar.make_cookies(response, request)

CookiesMiddlewareのデフォルトでは、要求応答間のクッキーのフローと充填が実現する.また、scrapy.Request(url, meta={'cookiejar': n})により単一Spiderマルチクッキーを実現することもできる.前のブログ「Scrapyフレームワーク--cookieの取得/伝達/ローカル保存」の疑問も、ソースコードを読むことで解いた.