|
|
|
@ -78,6 +78,7 @@ class CnkiArticleCrossdbSpider(scrapy.Spider):
|
|
|
|
# ---------------------------------------------- 提取列表文章的逻辑 ----------------------------------------------
|
|
|
|
# ---------------------------------------------- 提取列表文章的逻辑 ----------------------------------------------
|
|
|
|
tr_nodes = response.xpath('//div[@id="gridTable"]//table[@class="result-table-list"]/tbody/tr')
|
|
|
|
tr_nodes = response.xpath('//div[@id="gridTable"]//table[@class="result-table-list"]/tbody/tr')
|
|
|
|
for tr_node in tr_nodes:
|
|
|
|
for tr_node in tr_nodes:
|
|
|
|
|
|
|
|
check_v = tr_node.xpath('./td[@class="seq"]/input/@value').get() # 下载导出用的v
|
|
|
|
article_title = tr_node.xpath('./td[@class="name"]/a//text()').getall() # 文章标题
|
|
|
|
article_title = tr_node.xpath('./td[@class="name"]/a//text()').getall() # 文章标题
|
|
|
|
article_title = article_title and ''.join(article_title)
|
|
|
|
article_title = article_title and ''.join(article_title)
|
|
|
|
article_link = tr_node.xpath('./td[@class="name"]/a/@href').get() # 文章链接(有v值)
|
|
|
|
article_link = tr_node.xpath('./td[@class="name"]/a/@href').get() # 文章链接(有v值)
|
|
|
|
@ -87,7 +88,7 @@ class CnkiArticleCrossdbSpider(scrapy.Spider):
|
|
|
|
cited_str = tr_node.xpath('./td[@class="quote"]/span/a/text()').get() # 被引量字符串
|
|
|
|
cited_str = tr_node.xpath('./td[@class="quote"]/span/a/text()').get() # 被引量字符串
|
|
|
|
|
|
|
|
|
|
|
|
param = tools.url_parse(article_link)
|
|
|
|
param = tools.url_parse(article_link)
|
|
|
|
v = param.get('v')
|
|
|
|
v = check_v
|
|
|
|
ti_format = ti2format(article_title)
|
|
|
|
ti_format = ti2format(article_title)
|
|
|
|
ti_unique = ti2unique_type2(ti=ti_format, so=source_title)
|
|
|
|
ti_unique = ti2unique_type2(ti=ti_format, so=source_title)
|
|
|
|
|
|
|
|
|
|
|
|
@ -131,6 +132,7 @@ class CnkiArticleCrossdbSpider(scrapy.Spider):
|
|
|
|
# ---------------------------------------------- 提取列表文章的逻辑 ----------------------------------------------
|
|
|
|
# ---------------------------------------------- 提取列表文章的逻辑 ----------------------------------------------
|
|
|
|
tr_nodes = response.xpath('//div[@id="gridTable"]//table[@class="result-table-list"]/tbody/tr')
|
|
|
|
tr_nodes = response.xpath('//div[@id="gridTable"]//table[@class="result-table-list"]/tbody/tr')
|
|
|
|
for tr_node in tr_nodes:
|
|
|
|
for tr_node in tr_nodes:
|
|
|
|
|
|
|
|
check_v = tr_node.xpath('./td[@class="seq"]/input/@value').get() # 下载导出用的v
|
|
|
|
article_title = tr_node.xpath('./td[@class="name"]/a/text()').get() # 文章标题
|
|
|
|
article_title = tr_node.xpath('./td[@class="name"]/a/text()').get() # 文章标题
|
|
|
|
article_link = tr_node.xpath('./td[@class="name"]/a/@href').get() # 文章链接(有v值)
|
|
|
|
article_link = tr_node.xpath('./td[@class="name"]/a/@href').get() # 文章链接(有v值)
|
|
|
|
source_title = tr_node.xpath('./td[@class="source"]/*/a/text()').get() # 出版物名称(刊名)
|
|
|
|
source_title = tr_node.xpath('./td[@class="source"]/*/a/text()').get() # 出版物名称(刊名)
|
|
|
|
@ -139,7 +141,7 @@ class CnkiArticleCrossdbSpider(scrapy.Spider):
|
|
|
|
cited_str = tr_node.xpath('./td[@class="quote"]/span/a/text()').get() # 被引量字符串
|
|
|
|
cited_str = tr_node.xpath('./td[@class="quote"]/span/a/text()').get() # 被引量字符串
|
|
|
|
|
|
|
|
|
|
|
|
param = tools.url_parse(article_link)
|
|
|
|
param = tools.url_parse(article_link)
|
|
|
|
v = param.get('v')
|
|
|
|
v = check_v
|
|
|
|
ti_format = ti2format(article_title)
|
|
|
|
ti_format = ti2format(article_title)
|
|
|
|
ti_unique = ti2unique_type2(ti=ti_format, so=source_title)
|
|
|
|
ti_unique = ti2unique_type2(ti=ti_format, so=source_title)
|
|
|
|
if third_id:
|
|
|
|
if third_id:
|
|
|
|
|