You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

57 lines
1.5 KiB
Python

# -*- coding: utf-8 -*-
# @Time : 2022/6/21 8:53
# @Author : ZhaoXiangPeng
# @File : utils.py
import pandas as pd
import datetime
import os
def get_today(fmt='%Y%m%d'):
return datetime.date.today().strftime(fmt)
def get_row_top(row: pd.Series, num: int = 3):
# print(row)
top_sub_list = row.sort_values(ascending=False)[:num].index
top_sub = []
for sub in top_sub_list:
top_sub.append(sub.split('_')[-1])
return ';'.join(top_sub)
def get_row_top_join_sub(row: pd.Series, num: int = 3, split: float = 0.9, split_on: bool = True):
if split_on:
new_row = row[row.values >= split]
row_len = len(new_row)
if row_len == 0:
num = 1
elif row_len < num:
num = row_len
else:
num = num
top_sub_list = row.sort_values(ascending=False)[:num].to_dict()
top_sub = []
for sub, score in top_sub_list.items():
top_sub.append(f'{sub},{score}')
return '; '.join(top_sub)
def merge_table(filepath, output_path: str = None):
"""
filepath: 输入文件路径
output_path: 如果不为空,则文件保存到此目录
"""
if filepath[-1] != '/':
filepath += '/'
file_list = os.listdir(filepath)
return_df = pd.DataFrame()
for filename in file_list:
temp_df = pd.read_csv(filepath+filename)
return_df = pd.concat([return_df, temp_df])
if output_path:
day = get_today()
return_df.to_csv(output_path+f'/merge_table_{day}.csv', index=False)
return return_df