You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
57 lines
1.5 KiB
Python
57 lines
1.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
# @Time : 2022/6/21 8:53
|
|
# @Author : ZhaoXiangPeng
|
|
# @File : utils.py
|
|
|
|
import pandas as pd
|
|
import datetime
|
|
import os
|
|
|
|
|
|
def get_today(fmt='%Y%m%d'):
|
|
return datetime.date.today().strftime(fmt)
|
|
|
|
|
|
def get_row_top(row: pd.Series, num: int = 3):
|
|
# print(row)
|
|
top_sub_list = row.sort_values(ascending=False)[:num].index
|
|
top_sub = []
|
|
for sub in top_sub_list:
|
|
top_sub.append(sub.split('_')[-1])
|
|
return ';'.join(top_sub)
|
|
|
|
|
|
def get_row_top_join_sub(row: pd.Series, num: int = 3, split: float = 0.9, split_on: bool = True):
|
|
if split_on:
|
|
new_row = row[row.values >= split]
|
|
row_len = len(new_row)
|
|
if row_len == 0:
|
|
num = 1
|
|
elif row_len < num:
|
|
num = row_len
|
|
else:
|
|
num = num
|
|
top_sub_list = row.sort_values(ascending=False)[:num].to_dict()
|
|
top_sub = []
|
|
for sub, score in top_sub_list.items():
|
|
top_sub.append(f'{sub},{score}')
|
|
return '; '.join(top_sub)
|
|
|
|
|
|
def merge_table(filepath, output_path: str = None):
|
|
"""
|
|
filepath: 输入文件路径
|
|
output_path: 如果不为空,则文件保存到此目录
|
|
"""
|
|
if filepath[-1] != '/':
|
|
filepath += '/'
|
|
file_list = os.listdir(filepath)
|
|
return_df = pd.DataFrame()
|
|
for filename in file_list:
|
|
temp_df = pd.read_csv(filepath+filename)
|
|
return_df = pd.concat([return_df, temp_df])
|
|
if output_path:
|
|
day = get_today()
|
|
return_df.to_csv(output_path+f'/merge_table_{day}.csv', index=False)
|
|
return return_df
|