diff --git a/science_article_cnki/science_article_cnki/settings.py b/science_article_cnki/science_article_cnki/settings.py index 7a7c421..a33ba65 100644 --- a/science_article_cnki/science_article_cnki/settings.py +++ b/science_article_cnki/science_article_cnki/settings.py @@ -16,7 +16,7 @@ ADDONS = {} # Crawl responsibly by identifying yourself (and your website) on the user-agent -USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36' +USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36' # Obey robots.txt rules ROBOTSTXT_OBEY = False @@ -39,11 +39,11 @@ COOKIES_ENABLED = True #} SEARCH_REQUEST_HEADERS = { 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', - 'Cookie': 'Hm_lvt_dcec09ba2227fd02c55623c1bb82776a=1739256689; UM_distinctid=197b0769b48ea3-0de0b4b2dd761f-26001051-1fa400-197b0769b49cc6; Ecp_ClientId=e250627180800765334; Ecp_ClientIp=111.186.53.36; cnkiUserKey=1b8e7dbe-3c98-864f-2b80-84b544af32af; _c_WBKFRo=UO8UFAxWLjMjlOxhuKvmtkZ4yYaXr8dPZXuhVFea; Ecp_loginuserbk=SJTU; tfstk=g5GqYEZ0ZId4NHSWG0FNzQCb6QNYs5-QjfZ_SV0gloqDDdFa7uoTCSMjSA5ZJuEOhdn6_lmxYPZ0DxMNb0nUXt99nAPZ2q5jhfuO_P0iXEE6kLgxk5FMAHTBOq3vhen9f3NMS4V_773PuGuxk5Q-60hJAqQN2mSLS5mgZz4gS540ItYPZPqliPf0SgYzWuVgSrX0ZT4_uGb0Sc0kzPEuolmgsUPu2PVgjcViG50mS_zQnU-thdfV8NPaxqqPs67Lu-cB9u5Mabzqzugc-1fiaryqZpcfbM2jI2eKGqONwSgEE74qjBx0ex0r_Jh9Csg0ZoPxa-bMXocxSfPYTNAmzSr4KbwXO1mnzVDQUbTH9SP0mANx5w-jzjojkbu1STV4GYyEgWAdmlMS8fzZ6hdrYqDnjASP1GUobXlt3GXanzUzAU8z4y3oBzrYp_6OB8VLzkTblOBTnzUzAU8PBOeu2zrBlr1..; Ecp_session=1; SID_sug=018104; knsLeftGroupSelectItem=; dsorders=CF; dsortypes=cur%20DESC; knsadv-searchtype=%7B%22BLZOG7CK%22%3A%22gradeSearch%2CmajorSearch%22%2C%22MPMFIG1A%22%3A%22gradeSearch%2CmajorSearch%2CsentenceSearch%22%2C%22T2VC03OH%22%3A%22gradeSearch%2CmajorSearch%22%2C%22JQIRZIYA%22%3A%22gradeSearch%2CmajorSearch%2CsentenceSearch%22%2C%22S81HNSV3%22%3A%22gradeSearch%22%2C%22YSTT4HG0%22%3A%22gradeSearch%2CmajorSearch%2CauthorSearch%2CsentenceSearch%22%2C%22ML4DRIDX%22%3A%22gradeSearch%2CmajorSearch%22%2C%22WQ0UVIAA%22%3A%22gradeSearch%2CmajorSearch%22%2C%22VUDIXAIY%22%3A%22gradeSearch%2CmajorSearch%22%2C%22LIQN9Z3G%22%3A%22gradeSearch%22%2C%22NN3FJMUV%22%3A%22gradeSearch%2CmajorSearch%2CauthorSearch%2CsentenceSearch%22%2C%22LSTPFY1C%22%3A%22gradeSearch%2CmajorSearch%2CsentenceSearch%22%2C%22HHCPM1F8%22%3A%22gradeSearch%2CmajorSearch%22%2C%22OORPU5FE%22%3A%22gradeSearch%2CmajorSearch%22%2C%22WD0FTY92%22%3A%22gradeSearch%2CmajorSearch%2CauthorSearch%2CsentenceSearch%22%2C%22BPBAFJ5S%22%3A%22gradeSearch%2CmajorSearch%2CauthorSearch%2CsentenceSearch%22%2C%22EMRPGLPA%22%3A%22gradeSearch%2CmajorSearch%22%2C%22PWFIRAGL%22%3A%22gradeSearch%2CmajorSearch%2CsentenceSearch%22%2C%22U8J8LYLV%22%3A%22gradeSearch%2CmajorSearch%22%2C%22R79MZMCB%22%3A%22gradeSearch%22%2C%22J708GVCE%22%3A%22gradeSearch%2CmajorSearch%22%2C%228JBZLDJQ%22%3A%22gradeSearch%2CmajorSearch%2CsentenceSearch%22%2C%22HR1YT1Z9%22%3A%22gradeSearch%2CmajorSearch%22%2C%22JUP3MUPD%22%3A%22gradeSearch%2CmajorSearch%2CauthorSearch%2CsentenceSearch%22%2C%22NLBO1Z6R%22%3A%22gradeSearch%2CmajorSearch%22%2C%22RMJLXHZ3%22%3A%22gradeSearch%2CmajorSearch%2CsentenceSearch%22%2C%221UR4K4HZ%22%3A%22gradeSearch%2CmajorSearch%2CauthorSearch%2CsentenceSearch%22%2C%22NB3BWEHK%22%3A%22gradeSearch%2CmajorSearch%22%2C%22XVLO76FD%22%3A%22gradeSearch%2CmajorSearch%22%7D; Ecp_IpLoginFail=25121149.65.252.186; SID_kns_new=kns018106; SID_restapi=kns018110; KNS2COOKIE=1765437722.656.114388.232155|b25e41a932fd162af3b8c5cff4059fc3; dblang=both; createtime-advInput=2025-12-11%2015%3A22%3A21; searchTimeFlags=1', 'Origin': 'https://kns.cnki.net', 'Referer': 'https://kns.cnki.net/kns8s/AdvSearch?crossids=YSTT4HG0%2CLSTPFY1C%2CJUP3MUPD%2CMPMFIG1A%2CWQ0UVIAA%2CBLZOG7CK%2CPWFIRAGL%2CEMRPGLPA%2CNLBO1Z6R%2CNN3FJMUV', 'User-Agent': USER_AGENT, } +SEARCH_REQUEST_COOKIES_STR = 'Ecp_notFirstLogin=qkFgu9; Ecp_ClientId=o240823084800102418; Ecp_loginuserbk=SJTU; cnkiUserKey=eef4d3aa-1096-bc9e-dff0-74349179c2cc; Ecp_ClientIp=111.186.52.67; UM_distinctid=19366f14e7a832-0f92ef85a35cb5-26001051-1fa400-19366f14e7c14f2; Hm_lvt_dcec09ba2227fd02c55623c1bb82776a=1734079899; Ecp_session=1; SID_kns_new=kns018104; SID_sug=018104; knsLeftGroupSelectItem=; updatetime-advInput=2024-12-19+17%3A42%3A08; knsadv-searchtype=%7B%22BLZOG7CK%22%3A%22gradeSearch%2CmajorSearch%22%2C%22MPMFIG1A%22%3A%22gradeSearch%2CmajorSearch%2CsentenceSearch%22%2C%22T2VC03OH%22%3A%22gradeSearch%2CmajorSearch%22%2C%22JQIRZIYA%22%3A%22gradeSearch%2CmajorSearch%2CsentenceSearch%22%2C%22S81HNSV3%22%3A%22gradeSearch%22%2C%22YSTT4HG0%22%3A%22gradeSearch%2CmajorSearch%2CauthorSearch%2CsentenceSearch%22%2C%22ML4DRIDX%22%3A%22gradeSearch%2CmajorSearch%22%2C%22WQ0UVIAA%22%3A%22gradeSearch%2CmajorSearch%22%2C%22VUDIXAIY%22%3A%22gradeSearch%2CmajorSearch%22%2C%22NN3FJMUV%22%3A%22gradeSearch%2CmajorSearch%2CauthorSearch%2CsentenceSearch%22%2C%22LSTPFY1C%22%3A%22gradeSearch%2CmajorSearch%2CsentenceSearch%22%2C%22HHCPM1F8%22%3A%22gradeSearch%2CmajorSearch%22%2C%22OORPU5FE%22%3A%22gradeSearch%2CmajorSearch%22%2C%22WD0FTY92%22%3A%22gradeSearch%2CmajorSearch%2CauthorSearch%2CsentenceSearch%22%2C%22BPBAFJ5S%22%3A%22gradeSearch%2CmajorSearch%2CauthorSearch%2CsentenceSearch%22%2C%22EMRPGLPA%22%3A%22gradeSearch%2CmajorSearch%22%2C%22PWFIRAGL%22%3A%22gradeSearch%2CmajorSearch%2CsentenceSearch%22%2C%22U8J8LYLV%22%3A%22gradeSearch%2CmajorSearch%22%2C%22R79MZMCB%22%3A%22gradeSearch%22%2C%22J708GVCE%22%3A%22gradeSearch%2CmajorSearch%22%2C%22HR1YT1Z9%22%3A%22gradeSearch%2CmajorSearch%22%2C%22JUP3MUPD%22%3A%22gradeSearch%2CmajorSearch%2CauthorSearch%2CsentenceSearch%22%2C%22NLBO1Z6R%22%3A%22gradeSearch%2CmajorSearch%22%2C%22RMJLXHZ3%22%3A%22gradeSearch%2CmajorSearch%2CsentenceSearch%22%2C%221UR4K4HZ%22%3A%22gradeSearch%2CmajorSearch%2CauthorSearch%2CsentenceSearch%22%2C%22NB3BWEHK%22%3A%22gradeSearch%2CmajorSearch%22%2C%22XVLO76FD%22%3A%22gradeSearch%2CmajorSearch%22%7D; createtime-advInput=2024-12-20%2014%3A37%3A03; LID=WEEvREcwSlJHSldSdmVpanJGNW9JQS9sbkNrOUFycHJkRzF3eXgyTGlWbz0=$9A4hF_YAuvQ5obgVAqNKPCYcEjKensW4IQMovwHtwkF4VYPoHbKxJw!!; Ecp_LoginStuts={"IsAutoLogin":false,"UserName":"SJTU","ShowName":"%E4%B8%8A%E6%B5%B7%E4%BA%A4%E9%80%9A%E5%A4%A7%E5%AD%A6","UserType":"bk","BUserName":"","BShowName":"","BUserType":"","r":"qkFgu9","Members":[]}; KNS2COOKIE=1734680479.883.14106.830885|b25e41a932fd162af3b8c5cff4059fc3; dblang=both; c_m_LinID=LinID=WEEvREcwSlJHSldSdmVpanJGNW9JQS9sbkNrOUFycHJkRzF3eXgyTGlWbz0=$9A4hF_YAuvQ5obgVAqNKPCYcEjKensW4IQMovwHtwkF4VYPoHbKxJw!!&ot=12%2F20%2F2024%2016%3A01%3A27; c_m_expire=2024-12-20%2016%3A01%3A27; tfstk=gnXZLQYMKRewdgBaoHvqL9aIUYp9sd45ntTXmijDfFYG5iTcTZbBCGsccx-D-NdjCxY18pQRVAC_6ITq0dBC1xT_WKScPKz7P8w5XGpynzaShW0gBdKqnncilpDHmK-i1ZwdGGpvnyaM9UCdXabz7TCMnkJH4ncDnxYMtk-6qKDMiAcn-eKDnKADjDYH4nmioAYgYMYpDKxcoCcmtGjmL3Og25LCsWPKUCYljekmU0KHslSnGAMsnhA9rBxrnH6ebC8ljOHkrv-hd9RWOmayKgCCSHJz3vvwaOBytO4K3BQ2-IWMh0kcYNshNIWgD5IF3FRlIBoS3dIpmZAV9zkWbd1eaO5TD2jGPF5kBiiz5MRPTQKHtmlMC_s5HQXgQ4LBwn7y4NuN4DuvxG5lH1umgCxpYUZUY7E40mtBH0LEMjdHeH87fhGxMCxpYUZUYjhvteKePlt1.; searchTimeFlags=1; updatetime-advInput=2024-12-19+17%3A42%3A08' # Enable or disable spider middlewares # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html diff --git a/science_article_cnki/science_article_cnki/spiders/cnki_cited_number.py b/science_article_cnki/science_article_cnki/spiders/cnki_cited_number.py index ea4e045..ae149b4 100644 --- a/science_article_cnki/science_article_cnki/spiders/cnki_cited_number.py +++ b/science_article_cnki/science_article_cnki/spiders/cnki_cited_number.py @@ -33,7 +33,7 @@ class CnkiCitedNumberSpider(scrapy.Spider): # 比如判断如果没有参数从数据库中读取 return super().from_crawler(crawler, *args, **kwargs) - def __init__(self, query: str = None, resource_type: str = "JOURNAL", query_condition: dict = None, **kwargs: Any): + def __init__(self, query: str = None, resource_type: str = "学术期刊", query_condition: dict = None, **kwargs: Any): super().__init__(**kwargs) self.query = query self.resource_type = resource_type