From efe077695d5fda714d5f48814cd7736903023708 Mon Sep 17 00:00:00 2001 From: zhaoxiangpeng <1943364377@qq.com> Date: Mon, 12 Jan 2026 11:05:57 +0800 Subject: [PATCH] =?UTF-8?q?cnki:=E9=80=9A=E8=BF=87id=E4=B8=8B=E8=BD=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../spiders/cnki_ids_download.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 science_article_cnki/science_article_cnki/spiders/cnki_ids_download.py diff --git a/science_article_cnki/science_article_cnki/spiders/cnki_ids_download.py b/science_article_cnki/science_article_cnki/spiders/cnki_ids_download.py new file mode 100644 index 0000000..6b113ef --- /dev/null +++ b/science_article_cnki/science_article_cnki/spiders/cnki_ids_download.py @@ -0,0 +1,30 @@ +from typing import Any, List, Dict, Self, AsyncIterator + +import scrapy +from scrapy.crawler import Crawler +from science_article_cnki.models import cnki_model as model +from science_article_cnki.configs import cnki as config + + +class CnkiIdsDownloadSpider(scrapy.Spider): + name = "cnki_ids_download" + allowed_domains = ["cnki.net"] + start_urls = ["https://cnki.net"] + + @classmethod + def from_crawler(cls, crawler: Crawler, *args: Any, **kwargs: Any) -> Self: + return super().from_crawler(crawler, *args, **kwargs) + + def __init__(self): + scrapy.Spider.__init__(self) + self.id_list: List[Dict[str, str]] = None + + async def start(self): + yield scrapy.FormRequest( + config.CNKI_EXPORT_XLS_OLD_API, + method='POST', + formdata=model.export_data(ids), + ) + + def parse(self, response): + pass