cnki:通过id下载
parent
8b2862774e
commit
efe077695d
@ -0,0 +1,30 @@
|
|||||||
|
from typing import Any, List, Dict, Self, AsyncIterator
|
||||||
|
|
||||||
|
import scrapy
|
||||||
|
from scrapy.crawler import Crawler
|
||||||
|
from science_article_cnki.models import cnki_model as model
|
||||||
|
from science_article_cnki.configs import cnki as config
|
||||||
|
|
||||||
|
|
||||||
|
class CnkiIdsDownloadSpider(scrapy.Spider):
|
||||||
|
name = "cnki_ids_download"
|
||||||
|
allowed_domains = ["cnki.net"]
|
||||||
|
start_urls = ["https://cnki.net"]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_crawler(cls, crawler: Crawler, *args: Any, **kwargs: Any) -> Self:
|
||||||
|
return super().from_crawler(crawler, *args, **kwargs)
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
scrapy.Spider.__init__(self)
|
||||||
|
self.id_list: List[Dict[str, str]] = None
|
||||||
|
|
||||||
|
async def start(self):
|
||||||
|
yield scrapy.FormRequest(
|
||||||
|
config.CNKI_EXPORT_XLS_OLD_API,
|
||||||
|
method='POST',
|
||||||
|
formdata=model.export_data(ids),
|
||||||
|
)
|
||||||
|
|
||||||
|
def parse(self, response):
|
||||||
|
pass
|
||||||
Loading…
Reference in New Issue