# -*- coding: utf-8 -*- # @Time : 2022/6/21 8:53 # @Author : ZhaoXiangPeng # @File : utils.py import pandas as pd import datetime import os def get_today(fmt='%Y%m%d'): return datetime.date.today().strftime(fmt) def get_row_top(row: pd.Series, num: int = 3): # print(row) top_sub_list = row.sort_values(ascending=False)[:num].index top_sub = [] for sub in top_sub_list: top_sub.append(sub.split('_')[-1]) return ';'.join(top_sub) def get_row_top_join_sub(row: pd.Series, num: int = 3, split: float = 0.9, split_on: bool = True): if split_on: new_row = row[row.values >= split] row_len = len(new_row) if row_len == 0: num = 1 elif row_len < num: num = row_len else: num = num top_sub_list = row.sort_values(ascending=False)[:num].to_dict() top_sub = [] for sub, score in top_sub_list.items(): top_sub.append(f'{sub},{score}') return '; '.join(top_sub) def merge_table(filepath, output_path: str = None): """ filepath: 输入文件路径 output_path: 如果不为空,则文件保存到此目录 """ if filepath[-1] != '/': filepath += '/' file_list = os.listdir(filepath) return_df = pd.DataFrame() for filename in file_list: temp_df = pd.read_csv(filepath+filename) return_df = pd.concat([return_df, temp_df]) if output_path: day = get_today() return_df.to_csv(output_path+f'/merge_table_{day}.csv', index=False) return return_df