cnki:测试:同一个筛选类型可以加入多个筛选项

main
zhaoxiangpeng 4 weeks ago
parent 7a5b8f787c
commit 131b760adf

@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
# @Time : 2026/1/6 14:31
# @Author : zhaoxiangpeng
# @File : crawl_signal_result.py
from twisted.internet import defer
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from science_article_cnki.spiders.cnki_article_tag_source import CnkiArticleTagSourceSpider
"""
def test_starter():
y = 2024
init_params = {
'query': '(作者单位:西南科技大学(模糊)',
'query_condition': {'year': str(y)}
}
process = CrawlerProcess(get_project_settings())
process.crawl(CnkiArticleTagSourceSpider, **init_params)
process.start()
"""
def starter_by_year():
@defer.inlineCallbacks
def f(range_list: list = None):
for y in range_list:
init_params = {
'query': '(作者单位:西南科技大学(模糊)',
'query_condition': {'year': str(y)},
'filters': [
dict(project="年度", value=f"{y}", text_or_title=f"{y}"),
]
}
yield process.crawl(CnkiArticleTagSourceSpider, **init_params)
process = CrawlerProcess(get_project_settings())
f(list(range(2000, 2001)))
process.start()
def starter():
process = CrawlerProcess(get_project_settings())
process.crawl(CnkiArticleTagSourceSpider)
process.start()
if __name__ == '__main__':
starter_by_year()

@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
# @Time : 2026/1/9 15:27
# @Author : zhaoxiangpeng
# @File : test_queryJson.py
from pprint import pprint
from science_article_cnki.models import cnki_model as model
def test_add_con():
years = [2022, 2023, 2024]
init_params = {
'query': '(作者单位:西南科技大学(模糊)',
'filters': [
dict(project="年度", value=years, text_or_title=[f"{y}" for y in years])
]
}
m = init_params
filters = m.get('filters')
query_body = model.adv_refine_search(**m)
for f in filters:
model.add_muti_group(**f, base_query=query_body)
f2 = dict(project="年度", value=2025, text_or_title=f"{2025}")
model.add_muti_group(**f2, base_query=query_body)
f3 = dict(project="年度", value=[2023, 2025], text_or_title=["2023年", "2025年"])
model.add_muti_group(**f3, base_query=query_body)
# f4 = dict(project="来源类别", value="CSSCI", text_or_title="CSSCI")
# model.add_muti_group(**f4, base_query=query_body)
pprint(query_body)
return query_body
Loading…
Cancel
Save