You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
41 lines
1.2 KiB
Python
41 lines
1.2 KiB
Python
# -*- coding: utf-8 -*-
|
|
# @Time : 2022/6/6 10:26
|
|
# @Author : ZhaoXiangPeng
|
|
# @File : 合并统计.py
|
|
|
|
import pandas as pd
|
|
from 数据处理.文件合并 import join, load_file
|
|
|
|
path = 'E:/inspec合并/'
|
|
files = load_file(path)
|
|
# files = [f'{path}{f}' for f in files]
|
|
print(files)
|
|
|
|
|
|
def count(file_name: str):
|
|
temp_df: pd.DataFrame = pd.read_csv(file_name)
|
|
# group: pd.Series = temp_df.groupby(by=['doi'])['count'].sum()
|
|
# return pd.DataFrame(data=group)
|
|
return temp_df
|
|
|
|
|
|
df0 = pd.read_csv(r'F:\工作数据存储2022\20220526_inspec测试\Digital twin searching result 2.csv')
|
|
df0 = df0[['DI']]
|
|
df0.drop_duplicates(inplace=True)
|
|
big_df = pd.DataFrame()
|
|
for f in files:
|
|
t_df = count(path+f)
|
|
# 重设索引 以doi链接获取count
|
|
t_df = t_df.reset_index()
|
|
ts = pd.merge(df0, t_df, how='left', left_on=['DI'], right_on=['doi'])
|
|
ts = ts[ts['count'].notnull()]
|
|
big_df = pd.concat([big_df, ts], ignore_index=True)
|
|
pp = []
|
|
group_2 = big_df.groupby(by=['doi'])
|
|
for _, g in group_2:
|
|
row = {'doi': _, 'count': g['count'].sum(), 'issn': '; '.join(list(set(g['issn'])))}
|
|
pp.append(row)
|
|
df = pd.DataFrame(data=pp)
|
|
df.to_csv('inspec数据库施引测试数据.csv', index=False)
|
|
|