You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

44 lines
1.1 KiB
Python

# -*- coding: utf-8 -*-
# @Time : 2022/8/5 8:58
# @Author : ZAOXG
# @File : file_read.py
import chardet
import pandas as pd
import warnings
__all__ = [
'read_data'
]
file_type_operation = {
'csv': pd.read_csv,
'xlsx': pd.read_excel,
'xls': pd.read_excel,
'txt': pd.read_table,
'xls2': pd.read_html
}
def read_data(file: str, **kwargs) -> pd.DataFrame:
if '.' in file:
file_type = file.rsplit('.')[-1]
else:
file_type = 'txt'
file_type = file_type.lower()
try:
# if file_type == 'txt':
# kwargs.update(sep='\t')
temp: pd.DataFrame = file_type_operation[file_type](file, **kwargs)
except UnicodeDecodeError:
warnings.warn('%s 编码异常,启用检查' % file)
with open(file, 'rb') as f:
data = f.read()
info = chardet.detect(data)
encoding = info['encoding']
kwargs.update(encoding=encoding)
warnings.warn('%s 尝试使用 "%s" 解码' % (file, encoding))
temp = read_data(file, **kwargs)
return temp