|
|
|
@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
# @Time : 2026/1/6 14:31
|
|
|
|
|
|
|
|
# @Author : zhaoxiangpeng
|
|
|
|
|
|
|
|
# @File : crawl_signal_result.py
|
|
|
|
|
|
|
|
from twisted.internet import defer
|
|
|
|
|
|
|
|
from scrapy.crawler import CrawlerProcess
|
|
|
|
|
|
|
|
from scrapy.utils.project import get_project_settings
|
|
|
|
|
|
|
|
from science_article_cnki.spiders.cnki_article_tag_source import CnkiArticleTagSourceSpider
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
def test_starter():
|
|
|
|
|
|
|
|
y = 2024
|
|
|
|
|
|
|
|
init_params = {
|
|
|
|
|
|
|
|
'query': '(作者单位:西南科技大学(模糊))',
|
|
|
|
|
|
|
|
'query_condition': {'year': str(y)}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
process = CrawlerProcess(get_project_settings())
|
|
|
|
|
|
|
|
process.crawl(CnkiArticleTagSourceSpider, **init_params)
|
|
|
|
|
|
|
|
process.start()
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def starter_by_year():
|
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
|
|
|
|
def f(range_list: list = None):
|
|
|
|
|
|
|
|
for y in range_list:
|
|
|
|
|
|
|
|
init_params = {
|
|
|
|
|
|
|
|
'query': '(作者单位:西南科技大学(模糊))',
|
|
|
|
|
|
|
|
'query_condition': {'year': str(y)},
|
|
|
|
|
|
|
|
'filters': [
|
|
|
|
|
|
|
|
dict(project="年度", value=f"{y}", text_or_title=f"{y}年"),
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
yield process.crawl(CnkiArticleTagSourceSpider, **init_params)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
process = CrawlerProcess(get_project_settings())
|
|
|
|
|
|
|
|
f(list(range(2000, 2001)))
|
|
|
|
|
|
|
|
process.start()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def starter():
|
|
|
|
|
|
|
|
process = CrawlerProcess(get_project_settings())
|
|
|
|
|
|
|
|
process.crawl(CnkiArticleTagSourceSpider)
|
|
|
|
|
|
|
|
process.start()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
|
|
starter_by_year()
|