You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

46 lines
2.0 KiB
Python

# -*- coding: utf-8 -*-
# @Time : 2024/4/26 14:15
# @Author : zhaoxiangpeng
# @File : BCR_20240426.py
import os
import pandas as pd
from loguru import logger
ROOT_PATH = 'Y:\\zhaoxiangpeng\\BCR202404'
def func1():
"""
用22年的BCR填充24年错误的数据
"""
bcr22_path = 'Y:\\zhaoxiangpeng\\BCR2022'
bcr22_files = ['eid去重保留最大grandTotal-1.xlsx', 'eid去重保留最大grandTotal-2.xlsx',
'eid去重保留最大grandTotal-3.xlsx']
# 分片的旧文件合并
bcr22_table = pd.read_csv(os.path.join(bcr22_path, 'eid去重保留最大grandTotal.csv'))
bcr22_table = bcr22_table[['EID', '2021']]
bcr22_table.drop_duplicates(subset=['EID'], inplace=True)
"""
for bcr22_file in bcr22_files:
temp_file = pd.read_excel(os.path.join(bcr22_path, bcr22_file), engine='openpyxl', sheet_name=0)
temp_file = temp_file[['EID', '2021']]
bcr22_table = pd.concat([bcr22_table, temp_file])
"""
# 24年补充的数据
bcr24_extend = pd.read_csv(os.path.join(ROOT_PATH, 'Grand Total为空-20240410 11时06分下载.csv'), index_col=False)
table_head = bcr24_extend.columns.values.tolist()
new_table = pd.merge(bcr24_extend, bcr22_table, how='left', on=['EID'])
new_table = new_table[['作者', '作者 ID', '标题', '年份', '来源出版物名称', '', '', '论文编号', '起始页码',
'结束页码', '页码计数', '施引文献', 'DOI', '链接', '归属机构', '带归属机构的作者',
'通讯地址',
'编者', '出版商', 'ISSN', 'ISBN', 'CODEN', 'PubMed ID', '原始文献语言',
'来源出版物名称缩写', '文献类型', '出版阶段', '访问类型', '来源出版物', 'EID', 'Sort Year',
'2021', '2022', '2023', '2024', 'Grand Total']]
print(new_table)
new_table.to_csv(os.path.join(ROOT_PATH, '补充数据填充2021年total.txt'), sep='\t', index=False)
if __name__ == '__main__':
func1()