From 61129eadf29454295b96324414b9d291e012049f Mon Sep 17 00:00:00 2001 From: zhaoxiangpeng <1943364377@qq.com> Date: Tue, 10 Mar 2026 15:13:12 +0800 Subject: [PATCH] =?UTF-8?q?cssci:=E6=A0=B9=E6=8D=AEid=E8=BF=9B=E8=A1=8C?= =?UTF-8?q?=E9=87=87=E9=9B=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../starter/crawl_article_by_id.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 science_article_cssci/starter/crawl_article_by_id.py diff --git a/science_article_cssci/starter/crawl_article_by_id.py b/science_article_cssci/starter/crawl_article_by_id.py new file mode 100644 index 0000000..480aa18 --- /dev/null +++ b/science_article_cssci/starter/crawl_article_by_id.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# @Time : 2026/1/20 17:06 +# @Author : zhaoxiangpeng +# @File : crawl_article_by_id.py +import time +import logging +import json +from typing import List +import redis +from twisted.internet import defer +from scrapy.crawler import CrawlerProcess +from scrapy.utils.project import get_project_settings +from science_article_cssci.spiders.cssci_article_by_id import CssciArticleByIdSpider + + +def push_task(): + settings = get_project_settings() + r = redis.StrictRedis.from_url(settings.get("REDIS_URL")) + r.lpush( + "cssci_article_by_id:start_urls", + *[ + json.dumps({'third_id': '11G0412025010007'}, ensure_ascii=False), + json.dumps({'third_id': '11C1172023010002'}, ensure_ascii=False), + json.dumps({'third_id': '11J0092023020008'}, ensure_ascii=False), + json.dumps({'third_id': '44Z0712023010003'}, ensure_ascii=False), + json.dumps({'third_id': '11D1022023010001'}, ensure_ascii=False), + json.dumps({'third_id': '22D1042023010007'}, ensure_ascii=False), + ]) + + +def starter(): + process = CrawlerProcess(get_project_settings()) + process.crawl(CssciArticleByIdSpider) + process.start() + + +if __name__ == '__main__': + push_task() + starter()