# -*- coding: utf-8 -*- # @Time : 2026/1/5 09:18 # @Author : zhaoxiangpeng # @File : crawl_cited_number.py from twisted.internet import defer from scrapy.crawler import CrawlerProcess from scrapy.utils.project import get_project_settings from science_article_cnki.spiders.cnki_cited_number import CnkiCitedNumberSpider """ def test_starter(): y = 2025 init_params = { 'query': '(作者单位:河北工程技术学院(模糊))', 'query_condition': {'year': str(y)} } process = CrawlerProcess(get_project_settings()) process.crawl(CnkiCitedNumberSpider, **init_params) process.start() """ def starter_by_year(): @defer.inlineCallbacks def f(range_list: list = None): for y in range_list: init_params = { 'query': '(作者单位:大连东软信息学院(模糊))', 'query_condition': {'year': str(y)} } yield process.crawl(CnkiCitedNumberSpider, **init_params) process = CrawlerProcess(get_project_settings()) f(list(range(2021, 2026))) process.start() def starter(): process = CrawlerProcess(get_project_settings()) process.crawl(CnkiCitedNumberSpider) process.start() if __name__ == '__main__': starter_by_year()