添加单条参考文献字段（根据参考文件拆分后单条参考文献）

PT字段修改为DT字段
1 changed files with 9 additions and 8 deletions
--- a/bnu_wos/20250108_func.py
+++ b/bnu_wos/20250108_func.py
@ -29,7 +29,7 @@ INFO_SPLIT_SYMBOL_LENGTH = len(INFO_SPLIT_SYMBOL)
 # 引文表需要有一个UT字段用来作为主键
 REF_RECORD_TABLE_FIELD = ['UT', 'doi']
 #  完整记录要保留的字段, {'exported.%s' % key.lower() : 1 for key in FULL_RECORD_TABLE_FIELD}
-FULL_RECORD_TABLE_FIELD = ['DI', 'SO', 'PT', 'UT', 'AB', 'SN', 'EI', 'BN', 'PY']
+FULL_RECORD_TABLE_FIELD = ['DI', 'SO', 'DT', 'UT', 'AB', 'SN', 'EI', 'BN', 'PY']
 TABLE_HEAD_TRANS = {k.lower(): k for k in FULL_RECORD_TABLE_FIELD}
 MONGODB_REMOTE_CONFIG = dict(
@ -50,7 +50,7 @@ def find_doi_data_from_mongo(doi_list: list):
    find_results = collection.find(
        filter={"exported.di": {"$in": doi_list}},
        projection={"_id": 0, "third_id": 1, 'exported.ab': 1, 'exported.bn': 1, 'exported.di': 1, 'exported.ei': 1,
-                    'exported.is': 1, 'exported.pt': 1, 'exported.sn': 1, 'exported.so': 1}
+                    'exported.is': 1, 'exported.dt': 1, 'exported.sn': 1, 'exported.so': 1}
    ).collation({"locale": "en", "strength": 2})  # 忽略大小写
    for document in find_results:
        exported: dict = document.get('exported')
@ -96,7 +96,8 @@ def ref_str2dic(text):
        if not re.match(r'\d{4}', py):
            py = None
        doi_idx = ref.find(DOI_SPLIT_SYMBOL)
-        model = dict(au=au, py=py, so=so)
+        # 把参考文献字段也加进去
        model = dict(au=au, py=py, so=so, ref=ref)
        if doi_idx != -1:
            doi_text = ref[doi_idx + DOI_SPLIT_SYMBOL_LENGTH:]
            if doi_text.startswith('['):
@ -191,7 +192,7 @@ def step_1_3():
    ref_table['hot'] = None
    # 保留需要使用的列
    ref_table = ref_table[
-        ['third_id', '作者', '年份', '刊名-简称', 'doi', 'citedTitle', 'citedWork', 'hot', 'PT', 'UT', 'AB', 'SN', 'online issn',
+        ['third_id', '作者', '年份', '刊名-简称', 'doi', 'citedTitle', 'citedWork', 'hot', 'DT', 'UT', 'AB', 'SN', 'online issn',
         'isbn']]
    ref_table.to_csv(os.path.join(ROOT_PATH, '标准doi扩展完整字段.csv'), sep='\t',
@ -226,9 +227,9 @@ def step_2_1(table: pd.DataFrame, ai_result_table):
 def main_step1():
-    # step_1_1()
+    step_1_1()
    # step_1_2()
-    step_1_3()  # WOS:000426769900009
+    # step_1_3()  # WOS:000426769900009
 def main_step2():
@ -249,5 +250,5 @@ def main_step2():
 if __name__ == '__main__':
-    # main_step1()
+    main_step1()
-    main_step2()
+    # main_step2()
Author	SHA1	Message	Date
zhaoxiangpeng	abe12a41fe	添加单条参考文献字段（根据参考文件拆分后单条参考文献）	5 months ago
zhaoxiangpeng	96de736c58	PT字段修改为DT字段	5 months ago