From 131b760adffd2cdc2c3440caef080fbc831725a7 Mon Sep 17 00:00:00 2001 From: zhaoxiangpeng <1943364377@qq.com> Date: Sat, 10 Jan 2026 00:07:54 +0800 Subject: [PATCH] =?UTF-8?q?cnki:=E6=B5=8B=E8=AF=95:=E5=90=8C=E4=B8=80?= =?UTF-8?q?=E4=B8=AA=E7=AD=9B=E9=80=89=E7=B1=BB=E5=9E=8B=E5=8F=AF=E4=BB=A5?= =?UTF-8?q?=E5=8A=A0=E5=85=A5=E5=A4=9A=E4=B8=AA=E7=AD=9B=E9=80=89=E9=A1=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../starter/crawl_signal_result.py | 49 +++++++++++++++++++ science_article_cnki/tests/test_queryJson.py | 34 +++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 science_article_cnki/starter/crawl_signal_result.py create mode 100644 science_article_cnki/tests/test_queryJson.py diff --git a/science_article_cnki/starter/crawl_signal_result.py b/science_article_cnki/starter/crawl_signal_result.py new file mode 100644 index 0000000..22562fd --- /dev/null +++ b/science_article_cnki/starter/crawl_signal_result.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +# @Time : 2026/1/6 14:31 +# @Author : zhaoxiangpeng +# @File : crawl_signal_result.py +from twisted.internet import defer +from scrapy.crawler import CrawlerProcess +from scrapy.utils.project import get_project_settings +from science_article_cnki.spiders.cnki_article_tag_source import CnkiArticleTagSourceSpider + +""" +def test_starter(): + y = 2024 + init_params = { + 'query': '(作者单位:西南科技大学(模糊))', + 'query_condition': {'year': str(y)} + } + + process = CrawlerProcess(get_project_settings()) + process.crawl(CnkiArticleTagSourceSpider, **init_params) + process.start() +""" + + +def starter_by_year(): + @defer.inlineCallbacks + def f(range_list: list = None): + for y in range_list: + init_params = { + 'query': '(作者单位:西南科技大学(模糊))', + 'query_condition': {'year': str(y)}, + 'filters': [ + dict(project="年度", value=f"{y}", text_or_title=f"{y}年"), + ] + } + yield process.crawl(CnkiArticleTagSourceSpider, **init_params) + + process = CrawlerProcess(get_project_settings()) + f(list(range(2000, 2001))) + process.start() + + +def starter(): + process = CrawlerProcess(get_project_settings()) + process.crawl(CnkiArticleTagSourceSpider) + process.start() + + +if __name__ == '__main__': + starter_by_year() diff --git a/science_article_cnki/tests/test_queryJson.py b/science_article_cnki/tests/test_queryJson.py new file mode 100644 index 0000000..b6238aa --- /dev/null +++ b/science_article_cnki/tests/test_queryJson.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# @Time : 2026/1/9 15:27 +# @Author : zhaoxiangpeng +# @File : test_queryJson.py + +from pprint import pprint +from science_article_cnki.models import cnki_model as model + + +def test_add_con(): + years = [2022, 2023, 2024] + init_params = { + 'query': '(作者单位:西南科技大学(模糊))', + 'filters': [ + dict(project="年度", value=years, text_or_title=[f"{y}年" for y in years]) + ] + } + m = init_params + filters = m.get('filters') + query_body = model.adv_refine_search(**m) + for f in filters: + model.add_muti_group(**f, base_query=query_body) + + f2 = dict(project="年度", value=2025, text_or_title=f"{2025}年") + model.add_muti_group(**f2, base_query=query_body) + + f3 = dict(project="年度", value=[2023, 2025], text_or_title=["2023年", "2025年"]) + model.add_muti_group(**f3, base_query=query_body) + + # f4 = dict(project="来源类别", value="CSSCI", text_or_title="CSSCI") + # model.add_muti_group(**f4, base_query=query_body) + + pprint(query_body) + return query_body