You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

54 lines
1.6 KiB
Python

# -*- coding: utf-8 -*-
# @Time : 2024/3/11 13:48
# @Author : zhaoxiangpeng
# @File : BCR_20240311.py
import os
import pandas as pd
from loguru import logger
ADD_SOURCE = True
BASE_PATH = 'Y:\\BCR\\202407'
def load_all_small_file(path: str):
"""加载所有的小文件"""
dirs = os.listdir(path)
for dir_ in dirs:
path1 = os.path.join(path, dir_)
files = os.listdir(path1)
for file in files:
full_file_path = os.path.join(path1, file)
yield full_file_path
def step0():
gg = load_all_small_file(os.path.join(BASE_PATH, "API分工原始采集记录"))
big_table = pd.DataFrame()
for file_path in gg:
logger.debug('当前处理 %s' % file_path)
table = pd.read_csv(file_path, on_bad_lines='skip', low_memory=False, index_col=False)
if ADD_SOURCE:
simple_name = os.path.basename(file_path)
simple_name = simple_name
table['SOURCE'] = file_path
# columns = table.columns.values.tolist()
logger.debug('表头: %s' % table.columns.values.tolist())
big_table = pd.concat([big_table, table])
start = 0
split = 1000000
row, col = big_table.shape
file_idx = 1
for x in range(start, row, split):
table = big_table[x: x + split]
save_path = os.path.join(BASE_PATH, "After")
table.to_csv(os.path.join(save_path, '%s.txt' % file_idx), sep='\t', index=False)
table.to_excel(os.path.join(save_path, '%s.xlsx' % file_idx), index=False)
file_idx += 1
if __name__ == '__main__':
step0()