# -*- coding: utf-8 -*- # @Time : 2026/1/20 17:06 # @Author : zhaoxiangpeng # @File : crawl_article_by_id.py import time import logging import json from typing import List import redis from twisted.internet import defer from scrapy.crawler import CrawlerProcess from scrapy.utils.project import get_project_settings from science_article_cssci.spiders.cssci_article_by_id import CssciArticleByIdSpider def push_task(): settings = get_project_settings() r = redis.StrictRedis.from_url(settings.get("REDIS_URL")) r.lpush( "cssci_article_by_id:start_urls", *[ json.dumps({'third_id': '11G0412025010007'}, ensure_ascii=False), json.dumps({'third_id': '11C1172023010002'}, ensure_ascii=False), json.dumps({'third_id': '11J0092023020008'}, ensure_ascii=False), json.dumps({'third_id': '44Z0712023010003'}, ensure_ascii=False), json.dumps({'third_id': '11D1022023010001'}, ensure_ascii=False), json.dumps({'third_id': '22D1042023010007'}, ensure_ascii=False), ]) def starter(): process = CrawlerProcess(get_project_settings()) process.crawl(CssciArticleByIdSpider) process.start() if __name__ == '__main__': push_task() starter()