# -*- coding: utf-8 -*- # @Time : 2022/6/6 10:26 # @Author : ZhaoXiangPeng # @File : 合并统计.py import pandas as pd from 数据处理.文件合并 import join, load_file path = 'E:/inspec合并/' files = load_file(path) # files = [f'{path}{f}' for f in files] print(files) def count(file_name: str): temp_df: pd.DataFrame = pd.read_csv(file_name) # group: pd.Series = temp_df.groupby(by=['doi'])['count'].sum() # return pd.DataFrame(data=group) return temp_df df0 = pd.read_csv(r'F:\工作数据存储2022\20220526_inspec测试\Digital twin searching result 2.csv') df0 = df0[['DI']] df0.drop_duplicates(inplace=True) big_df = pd.DataFrame() for f in files: t_df = count(path+f) # 重设索引 以doi链接获取count t_df = t_df.reset_index() ts = pd.merge(df0, t_df, how='left', left_on=['DI'], right_on=['doi']) ts = ts[ts['count'].notnull()] big_df = pd.concat([big_df, ts], ignore_index=True) pp = [] group_2 = big_df.groupby(by=['doi']) for _, g in group_2: row = {'doi': _, 'count': g['count'].sum(), 'issn': '; '.join(list(set(g['issn'])))} pp.append(row) df = pd.DataFrame(data=pp) df.to_csv('inspec数据库施引测试数据.csv', index=False)