# -*- coding: utf-8 -*- # @Time : 2022/8/5 8:58 # @Author : ZAOXG # @File : file_read.py import chardet import pandas as pd import warnings __all__ = [ 'read_data' ] file_type_operation = { 'csv': pd.read_csv, 'xlsx': pd.read_excel, 'xls': pd.read_excel, 'txt': pd.read_table, 'xls2': pd.read_html } def read_data(file: str, **kwargs) -> pd.DataFrame: if '.' in file: file_type = file.rsplit('.')[-1] else: file_type = 'txt' file_type = file_type.lower() try: # if file_type == 'txt': # kwargs.update(sep='\t') temp: pd.DataFrame = file_type_operation[file_type](file, **kwargs) except UnicodeDecodeError: warnings.warn('%s 编码异常,启用检查' % file) with open(file, 'rb') as f: data = f.read() info = chardet.detect(data) encoding = info['encoding'] kwargs.update(encoding=encoding) warnings.warn('%s 尝试使用 "%s" 解码' % (file, encoding)) temp = read_data(file, **kwargs) return temp