|
|
@ -13,6 +13,8 @@
|
|
|
|
# 2.2.发文信息的结果与引文信息的结果进行匹配
|
|
|
|
# 2.2.发文信息的结果与引文信息的结果进行匹配
|
|
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
import os
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
import pandas as pd
|
|
|
|
from pymongo import MongoClient
|
|
|
|
from pymongo import MongoClient
|
|
|
|
|
|
|
|
|
|
|
@ -90,6 +92,9 @@ def ref_str2dic(text):
|
|
|
|
var1.append(ref_copy[:idx_t])
|
|
|
|
var1.append(ref_copy[:idx_t])
|
|
|
|
ref_copy = ref_copy[idx_t + INFO_SPLIT_SYMBOL_LENGTH:]
|
|
|
|
ref_copy = ref_copy[idx_t + INFO_SPLIT_SYMBOL_LENGTH:]
|
|
|
|
au, py, so = var1
|
|
|
|
au, py, so = var1
|
|
|
|
|
|
|
|
# 年份要判断,要满足4位数字
|
|
|
|
|
|
|
|
if not re.match(r'\d{4}', py):
|
|
|
|
|
|
|
|
py = None
|
|
|
|
doi_idx = ref.find(DOI_SPLIT_SYMBOL)
|
|
|
|
doi_idx = ref.find(DOI_SPLIT_SYMBOL)
|
|
|
|
model = dict(au=au, py=py, so=so)
|
|
|
|
model = dict(au=au, py=py, so=so)
|
|
|
|
if doi_idx != -1:
|
|
|
|
if doi_idx != -1:
|
|
|
|