Files
Quant_Code/2.数据下载与处理/数据转换最终版/merged_tickdata__BIT_20240522.py
Win_home f925dff46b Enhance trading workflow with new order flow management
- Added dingdanliu_nb_mflow for improved order processing
- Updated related scripts and configurations to support new functionality
2025-03-15 22:45:08 +08:00

69 lines
2.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import pandas as pd
import os
from datetime import time as s_time
from datetime import datetime
import chardet
import numpy as np
def split_alpha_numeric(string):
alpha_chars = ""
numeric_chars = ""
for char in string:
if char.isalpha():
alpha_chars += char
elif char.isdigit():
numeric_chars += char
return alpha_chars, numeric_chars
def merged_old_unprocessed_tickdata(all_csv_files, sp_char):
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
print("csv_files:", csv_files)
merged_up_df = pd.DataFrame()
dir = os.getcwd()
fileNum_errors = 0
# 循环遍历每个csv文件
for file in csv_files:
try:
df = pd.read_csv(file,
header=0,
encoding='gbk',
low_memory= False,
# skiprows=0,
# parse_dates=['时间'] # 注意此处增加的排序,为了后面按时间排序
)
except:
file_path = os.path.join(dir, file)
fileNum_errors += 1
with open(file_path, 'rb') as file:
data = file.read()
# 使用chardet检测编码
detected_encoding = chardet.detect(data)['encoding']
# print("%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(file,detected_encoding,fileNum_errors))
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors))
with open('output_error.txt', 'a') as f:
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors), file = f)
# 删除重复行
df.drop_duplicates(inplace=True)
# 将数据合并到新的DataFrame中
merged_up_df = pd.concat([merged_up_df, df], ignore_index=True)
# 删除重复列
merged_up_df.drop_duplicates(subset=merged_up_df.columns.tolist(), inplace=True)
# 重置行索引
merged_up_df.reset_index(inplace=True, drop=True)
# merged_up_df,alpha_chars,code_value = insert_main_contract(merged_up_df)
# 打印提示信息
# print("按年份未处理的CSV文件合并成功")
return merged_up_df #,alpha_chars,code_value