diff --git a/science_article_cnki/science_article_cnki/spiders/cnki_ids_download.py b/science_article_cnki/science_article_cnki/spiders/cnki_ids_download.py new file mode 100644 index 0000000..6b113ef --- /dev/null +++ b/science_article_cnki/science_article_cnki/spiders/cnki_ids_download.py @@ -0,0 +1,30 @@ +from typing import Any, List, Dict, Self, AsyncIterator + +import scrapy +from scrapy.crawler import Crawler +from science_article_cnki.models import cnki_model as model +from science_article_cnki.configs import cnki as config + + +class CnkiIdsDownloadSpider(scrapy.Spider): + name = "cnki_ids_download" + allowed_domains = ["cnki.net"] + start_urls = ["https://cnki.net"] + + @classmethod + def from_crawler(cls, crawler: Crawler, *args: Any, **kwargs: Any) -> Self: + return super().from_crawler(crawler, *args, **kwargs) + + def __init__(self): + scrapy.Spider.__init__(self) + self.id_list: List[Dict[str, str]] = None + + async def start(self): + yield scrapy.FormRequest( + config.CNKI_EXPORT_XLS_OLD_API, + method='POST', + formdata=model.export_data(ids), + ) + + def parse(self, response): + pass