diff --git a/science_article_cnki/science_article_cnki/middlewares.py b/science_article_cnki/science_article_cnki/middlewares.py index fdd76df..2498349 100644 --- a/science_article_cnki/science_article_cnki/middlewares.py +++ b/science_article_cnki/science_article_cnki/middlewares.py @@ -104,13 +104,25 @@ from scrapy.http.headers import Headers class CnkiSearchHeadersDownloaderMiddleware: - def __init__(self, custom_headers: dict): + def __init__(self, custom_headers: dict, cookies_str: str): self.custom_headers = custom_headers + self.custom_cookies = self._parse_cookies_str(cookies_str) @classmethod def from_crawler(cls, crawler): - return cls(custom_headers=crawler.settings['SEARCH_REQUEST_HEADERS']) + return cls( + custom_headers=crawler.settings['SEARCH_REQUEST_HEADERS'], + cookies_str=crawler.settings['SEARCH_REQUEST_COOKIES_STR'] + ) + + def _parse_cookies_str(self, cookies_str): + cookies = {} + for cookie in cookies_str.split(';'): + key, value = cookie.split('=', 1) + cookies[key.strip()] = value.strip() + return cookies def process_request(self, request, spider): request.headers = Headers(self.custom_headers) + request.cookies = self.custom_cookies return None