From a95f242bd5eb8849613f849ffb7512bfe2cb7fd6 Mon Sep 17 00:00:00 2001 From: zhaoxiangpeng <1943364377@qq.com> Date: Wed, 14 Jan 2026 09:35:55 +0800 Subject: [PATCH] =?UTF-8?q?cnki:=E6=9B=B4=E6=94=B9v=E7=9A=84=E5=8F=96?= =?UTF-8?q?=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../science_article_cnki/spiders/cnki_article_crossdb.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/science_article_cnki/science_article_cnki/spiders/cnki_article_crossdb.py b/science_article_cnki/science_article_cnki/spiders/cnki_article_crossdb.py index 97bda17..43f1bb7 100644 --- a/science_article_cnki/science_article_cnki/spiders/cnki_article_crossdb.py +++ b/science_article_cnki/science_article_cnki/spiders/cnki_article_crossdb.py @@ -78,6 +78,7 @@ class CnkiArticleCrossdbSpider(scrapy.Spider): # ---------------------------------------------- 提取列表文章的逻辑 ---------------------------------------------- tr_nodes = response.xpath('//div[@id="gridTable"]//table[@class="result-table-list"]/tbody/tr') for tr_node in tr_nodes: + check_v = tr_node.xpath('./td[@class="seq"]/input/@value').get() # 下载导出用的v article_title = tr_node.xpath('./td[@class="name"]/a//text()').getall() # 文章标题 article_title = article_title and ''.join(article_title) article_link = tr_node.xpath('./td[@class="name"]/a/@href').get() # 文章链接(有v值) @@ -87,7 +88,7 @@ class CnkiArticleCrossdbSpider(scrapy.Spider): cited_str = tr_node.xpath('./td[@class="quote"]/span/a/text()').get() # 被引量字符串 param = tools.url_parse(article_link) - v = param.get('v') + v = check_v ti_format = ti2format(article_title) ti_unique = ti2unique_type2(ti=ti_format, so=source_title) @@ -131,6 +132,7 @@ class CnkiArticleCrossdbSpider(scrapy.Spider): # ---------------------------------------------- 提取列表文章的逻辑 ---------------------------------------------- tr_nodes = response.xpath('//div[@id="gridTable"]//table[@class="result-table-list"]/tbody/tr') for tr_node in tr_nodes: + check_v = tr_node.xpath('./td[@class="seq"]/input/@value').get() # 下载导出用的v article_title = tr_node.xpath('./td[@class="name"]/a/text()').get() # 文章标题 article_link = tr_node.xpath('./td[@class="name"]/a/@href').get() # 文章链接(有v值) source_title = tr_node.xpath('./td[@class="source"]/*/a/text()').get() # 出版物名称(刊名) @@ -139,7 +141,7 @@ class CnkiArticleCrossdbSpider(scrapy.Spider): cited_str = tr_node.xpath('./td[@class="quote"]/span/a/text()').get() # 被引量字符串 param = tools.url_parse(article_link) - v = param.get('v') + v = check_v ti_format = ti2format(article_title) ti_unique = ti2unique_type2(ti=ti_format, so=source_title) if third_id: