cnki:通过id下载
parent
8b2862774e
commit
efe077695d
@ -0,0 +1,30 @@
|
||||
from typing import Any, List, Dict, Self, AsyncIterator
|
||||
|
||||
import scrapy
|
||||
from scrapy.crawler import Crawler
|
||||
from science_article_cnki.models import cnki_model as model
|
||||
from science_article_cnki.configs import cnki as config
|
||||
|
||||
|
||||
class CnkiIdsDownloadSpider(scrapy.Spider):
|
||||
name = "cnki_ids_download"
|
||||
allowed_domains = ["cnki.net"]
|
||||
start_urls = ["https://cnki.net"]
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler: Crawler, *args: Any, **kwargs: Any) -> Self:
|
||||
return super().from_crawler(crawler, *args, **kwargs)
|
||||
|
||||
def __init__(self):
|
||||
scrapy.Spider.__init__(self)
|
||||
self.id_list: List[Dict[str, str]] = None
|
||||
|
||||
async def start(self):
|
||||
yield scrapy.FormRequest(
|
||||
config.CNKI_EXPORT_XLS_OLD_API,
|
||||
method='POST',
|
||||
formdata=model.export_data(ids),
|
||||
)
|
||||
|
||||
def parse(self, response):
|
||||
pass
|
||||
Loading…
Reference in New Issue