You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
51 lines
1.3 KiB
Python
51 lines
1.3 KiB
Python
# -*- coding: utf-8 -*-
|
|
# @Time : 2024/2/4 10:55
|
|
# @Author : zhaoxiangpeng
|
|
# @File : utils.py
|
|
|
|
import os
|
|
from typing import List, Union
|
|
import pandas as pd
|
|
|
|
|
|
def read_file(path_or_files: Union[List[str], str], path: bool = True):
|
|
if path and isinstance(path_or_files, str):
|
|
path_or_files = [os.path.join(path_or_files, file) for file in os.listdir(path_or_files)]
|
|
big_table = pd.DataFrame()
|
|
for file in path_or_files:
|
|
table = pd.read_csv(file, sep='\t', low_memory=False)
|
|
big_table = pd.concat([big_table, table])
|
|
return big_table
|
|
|
|
|
|
def export_small_file(big_table, export_path: str = None, split: int = int(8e5)):
|
|
"""
|
|
大的表导出为小的表
|
|
"""
|
|
row, col = big_table.shape
|
|
file_idx = 1
|
|
for x in range(0, row, split):
|
|
table = big_table[x: x + split]
|
|
table.to_excel(os.path.join(export_path, '%s.xlsx' % file_idx), index=False)
|
|
file_idx += 1
|
|
|
|
|
|
def str2float(string, replace=0):
|
|
try:
|
|
val = float(string)
|
|
except ValueError:
|
|
val = replace
|
|
except TypeError:
|
|
val = replace
|
|
return val
|
|
|
|
|
|
def str2int(string, replace=0):
|
|
try:
|
|
val = int(string)
|
|
except ValueError:
|
|
val = replace
|
|
except TypeError:
|
|
val = replace
|
|
return val
|