You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
44 lines
1.1 KiB
Python
44 lines
1.1 KiB
Python
# -*- coding: utf-8 -*-
|
|
# @Time : 2022/8/5 8:58
|
|
# @Author : ZAOXG
|
|
# @File : file_read.py
|
|
|
|
import chardet
|
|
import pandas as pd
|
|
import warnings
|
|
|
|
__all__ = [
|
|
'read_data'
|
|
]
|
|
|
|
file_type_operation = {
|
|
'csv': pd.read_csv,
|
|
'xlsx': pd.read_excel,
|
|
'xls': pd.read_excel,
|
|
'txt': pd.read_table,
|
|
'xls2': pd.read_html
|
|
}
|
|
|
|
|
|
def read_data(file: str, **kwargs) -> pd.DataFrame:
|
|
if '.' in file:
|
|
file_type = file.rsplit('.')[-1]
|
|
else:
|
|
file_type = 'txt'
|
|
file_type = file_type.lower()
|
|
try:
|
|
# if file_type == 'txt':
|
|
# kwargs.update(sep='\t')
|
|
temp: pd.DataFrame = file_type_operation[file_type](file, **kwargs)
|
|
except UnicodeDecodeError:
|
|
warnings.warn('%s 编码异常,启用检查' % file)
|
|
with open(file, 'rb') as f:
|
|
data = f.read()
|
|
info = chardet.detect(data)
|
|
encoding = info['encoding']
|
|
kwargs.update(encoding=encoding)
|
|
warnings.warn('%s 尝试使用 "%s" 解码' % (file, encoding))
|
|
temp = read_data(file, **kwargs)
|
|
|
|
return temp
|