You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
46 lines
2.0 KiB
Python
46 lines
2.0 KiB
Python
# -*- coding: utf-8 -*-
|
|
# @Time : 2024/4/26 14:15
|
|
# @Author : zhaoxiangpeng
|
|
# @File : BCR_20240426.py
|
|
|
|
import os
|
|
import pandas as pd
|
|
from loguru import logger
|
|
|
|
ROOT_PATH = 'Y:\\zhaoxiangpeng\\BCR202404'
|
|
|
|
|
|
def func1():
|
|
"""
|
|
用22年的BCR填充24年错误的数据
|
|
"""
|
|
bcr22_path = 'Y:\\zhaoxiangpeng\\BCR2022'
|
|
bcr22_files = ['eid去重保留最大grandTotal-1.xlsx', 'eid去重保留最大grandTotal-2.xlsx',
|
|
'eid去重保留最大grandTotal-3.xlsx']
|
|
# 分片的旧文件合并
|
|
bcr22_table = pd.read_csv(os.path.join(bcr22_path, 'eid去重保留最大grandTotal.csv'))
|
|
bcr22_table = bcr22_table[['EID', '2021']]
|
|
bcr22_table.drop_duplicates(subset=['EID'], inplace=True)
|
|
"""
|
|
for bcr22_file in bcr22_files:
|
|
temp_file = pd.read_excel(os.path.join(bcr22_path, bcr22_file), engine='openpyxl', sheet_name=0)
|
|
temp_file = temp_file[['EID', '2021']]
|
|
bcr22_table = pd.concat([bcr22_table, temp_file])
|
|
"""
|
|
# 24年补充的数据
|
|
bcr24_extend = pd.read_csv(os.path.join(ROOT_PATH, 'Grand Total为空-20240410 11时06分下载.csv'), index_col=False)
|
|
table_head = bcr24_extend.columns.values.tolist()
|
|
new_table = pd.merge(bcr24_extend, bcr22_table, how='left', on=['EID'])
|
|
new_table = new_table[['作者', '作者 ID', '标题', '年份', '来源出版物名称', '卷', '期', '论文编号', '起始页码',
|
|
'结束页码', '页码计数', '施引文献', 'DOI', '链接', '归属机构', '带归属机构的作者',
|
|
'通讯地址',
|
|
'编者', '出版商', 'ISSN', 'ISBN', 'CODEN', 'PubMed ID', '原始文献语言',
|
|
'来源出版物名称缩写', '文献类型', '出版阶段', '访问类型', '来源出版物', 'EID', 'Sort Year',
|
|
'2021', '2022', '2023', '2024', 'Grand Total']]
|
|
print(new_table)
|
|
new_table.to_csv(os.path.join(ROOT_PATH, '补充数据填充2021年total.txt'), sep='\t', index=False)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
func1()
|