Compare commits

..

2 Commits

@ -29,7 +29,7 @@ INFO_SPLIT_SYMBOL_LENGTH = len(INFO_SPLIT_SYMBOL)
# 引文表需要有一个UT字段用来作为主键
REF_RECORD_TABLE_FIELD = ['UT', 'doi']
# 完整记录要保留的字段, {'exported.%s' % key.lower() : 1 for key in FULL_RECORD_TABLE_FIELD}
FULL_RECORD_TABLE_FIELD = ['DI', 'SO', 'PT', 'UT', 'AB', 'SN', 'EI', 'BN', 'PY']
FULL_RECORD_TABLE_FIELD = ['DI', 'SO', 'DT', 'UT', 'AB', 'SN', 'EI', 'BN', 'PY']
TABLE_HEAD_TRANS = {k.lower(): k for k in FULL_RECORD_TABLE_FIELD}
MONGODB_REMOTE_CONFIG = dict(
@ -50,7 +50,7 @@ def find_doi_data_from_mongo(doi_list: list):
find_results = collection.find(
filter={"exported.di": {"$in": doi_list}},
projection={"_id": 0, "third_id": 1, 'exported.ab': 1, 'exported.bn': 1, 'exported.di': 1, 'exported.ei': 1,
'exported.is': 1, 'exported.pt': 1, 'exported.sn': 1, 'exported.so': 1}
'exported.is': 1, 'exported.dt': 1, 'exported.sn': 1, 'exported.so': 1}
).collation({"locale": "en", "strength": 2}) # 忽略大小写
for document in find_results:
exported: dict = document.get('exported')
@ -96,7 +96,8 @@ def ref_str2dic(text):
if not re.match(r'\d{4}', py):
py = None
doi_idx = ref.find(DOI_SPLIT_SYMBOL)
model = dict(au=au, py=py, so=so)
# 把参考文献字段也加进去
model = dict(au=au, py=py, so=so, ref=ref)
if doi_idx != -1:
doi_text = ref[doi_idx + DOI_SPLIT_SYMBOL_LENGTH:]
if doi_text.startswith('['):
@ -191,7 +192,7 @@ def step_1_3():
ref_table['hot'] = None
# 保留需要使用的列
ref_table = ref_table[
['third_id', '作者', '年份', '刊名-简称', 'doi', 'citedTitle', 'citedWork', 'hot', 'PT', 'UT', 'AB', 'SN', 'online issn',
['third_id', '作者', '年份', '刊名-简称', 'doi', 'citedTitle', 'citedWork', 'hot', 'DT', 'UT', 'AB', 'SN', 'online issn',
'isbn']]
ref_table.to_csv(os.path.join(ROOT_PATH, '标准doi扩展完整字段.csv'), sep='\t',
@ -226,9 +227,9 @@ def step_2_1(table: pd.DataFrame, ai_result_table):
def main_step1():
# step_1_1()
step_1_1()
# step_1_2()
step_1_3() # WOS:000426769900009
# step_1_3() # WOS:000426769900009
def main_step2():
@ -249,5 +250,5 @@ def main_step2():
if __name__ == '__main__':
# main_step1()
main_step2()
main_step1()
# main_step2()

Loading…
Cancel
Save