# -*- coding: utf-8 -*- # @Time : 2026/1/12 14:13 # @Author : zhaoxiangpeng # @File : crawl_crossdb_article.py from twisted.internet import defer from scrapy.crawler import CrawlerProcess from scrapy.utils.project import get_project_settings from science_article_cnki.spiders.cnki_article_crossdb import CnkiArticleCrossdbSpider def starter_by_year(): @defer.inlineCallbacks def f(range_list: list = None): for y in range_list: init_params = { 'query_id': 1609, 'query': '(作者单位:河北工程技术学院(模糊))', # 'query_condition': {'year': str(y)}, 'filters': [ dict(project="年度", value=f"{y}", text_or_title=f"{y}年"), ] } yield process.crawl(CnkiArticleCrossdbSpider, **init_params) process = CrawlerProcess(get_project_settings()) f(list(range(2021, 2022))) process.start() def starter_more_year(): @defer.inlineCallbacks def f(years: list = None): init_params = { 'query_id': 1611, 'query': '(作者单位:武昌首义学院(模糊))', 'filters': [ dict(project="年度", value=[f"{y}" for y in years], text_or_title=[f"{y}年" for y in years]), ] } yield process.crawl(CnkiArticleCrossdbSpider, **init_params) process = CrawlerProcess(get_project_settings()) f(list(range(2021, 2026))) process.start() def starter(): process = CrawlerProcess(get_project_settings()) process.crawl(CnkiArticleCrossdbSpider) process.start() if __name__ == '__main__': starter_more_year()