用正则判断年份的4位数字

main
zhaoxiangpeng 3 months ago
parent 4d096a3561
commit 577531a873

@ -13,6 +13,8 @@
# 2.2.发文信息的结果与引文信息的结果进行匹配 # 2.2.发文信息的结果与引文信息的结果进行匹配
import os import os
import re
import pandas as pd import pandas as pd
from pymongo import MongoClient from pymongo import MongoClient
@ -90,6 +92,9 @@ def ref_str2dic(text):
var1.append(ref_copy[:idx_t]) var1.append(ref_copy[:idx_t])
ref_copy = ref_copy[idx_t + INFO_SPLIT_SYMBOL_LENGTH:] ref_copy = ref_copy[idx_t + INFO_SPLIT_SYMBOL_LENGTH:]
au, py, so = var1 au, py, so = var1
# 年份要判断要满足4位数字
if not re.match(r'\d{4}', py):
py = None
doi_idx = ref.find(DOI_SPLIT_SYMBOL) doi_idx = ref.find(DOI_SPLIT_SYMBOL)
model = dict(au=au, py=py, so=so) model = dict(au=au, py=py, so=so)
if doi_idx != -1: if doi_idx != -1:

Loading…
Cancel
Save