# -*- coding: utf-8 -*- # @Time : 2026/1/6 14:31 # @Author : zhaoxiangpeng # @File : crawl_signal_result.py from twisted.internet import defer from scrapy.crawler import CrawlerProcess from scrapy.utils.project import get_project_settings from science_article_cnki.spiders.cnki_article_tag_source import CnkiArticleTagSourceSpider """ def test_starter(): y = 2024 init_params = { 'query': '(作者单位:西南科技大学(模糊))', 'query_condition': {'year': str(y)} } process = CrawlerProcess(get_project_settings()) process.crawl(CnkiArticleTagSourceSpider, **init_params) process.start() """ def starter_by_year(): @defer.inlineCallbacks def f(range_list: list = None): for y in range_list: init_params = { 'query': '(作者单位:西南科技大学(模糊))', 'query_condition': {'year': str(y)}, 'filters': [ dict(project="年度", value=f"{y}", text_or_title=f"{y}年"), ] } yield process.crawl(CnkiArticleTagSourceSpider, **init_params) process = CrawlerProcess(get_project_settings()) f(list(range(2000, 2001))) process.start() def starter(): process = CrawlerProcess(get_project_settings()) process.crawl(CnkiArticleTagSourceSpider) process.start() if __name__ == '__main__': starter_by_year()