# -*- coding: utf-8 -*- # @Time : 2024/2/4 10:55 # @Author : zhaoxiangpeng # @File : utils.py import os from typing import List, Union import pandas as pd def read_file(path_or_files: Union[List[str], str], path: bool = True): if path and isinstance(path_or_files, str): path_or_files = [os.path.join(path_or_files, file) for file in os.listdir(path_or_files)] big_table = pd.DataFrame() for file in path_or_files: table = pd.read_csv(file, sep='\t', low_memory=False) big_table = pd.concat([big_table, table]) return big_table def export_small_file(big_table, export_path: str = None, split: int = int(8e5)): """ 大的表导出为小的表 """ row, col = big_table.shape file_idx = 1 for x in range(0, row, split): table = big_table[x: x + split] table.to_excel(os.path.join(export_path, '%s.xlsx' % file_idx), index=False) file_idx += 1 def str2float(string, replace=0): try: val = float(string) except ValueError: val = replace except TypeError: val = replace return val def str2int(string, replace=0): try: val = int(string) except ValueError: val = replace except TypeError: val = replace return val