Enhance trading workflow with new order flow management
- Added dingdanliu_nb_mflow for improved order processing - Updated related scripts and configurations to support new functionality
This commit is contained in:
68
2.数据下载与处理/数据转换最终版/merged_tickdata__BIT_20240522.py
Normal file
68
2.数据下载与处理/数据转换最终版/merged_tickdata__BIT_20240522.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
from datetime import time as s_time
|
||||
from datetime import datetime
|
||||
import chardet
|
||||
import numpy as np
|
||||
|
||||
|
||||
def split_alpha_numeric(string):
|
||||
alpha_chars = ""
|
||||
numeric_chars = ""
|
||||
for char in string:
|
||||
if char.isalpha():
|
||||
alpha_chars += char
|
||||
elif char.isdigit():
|
||||
numeric_chars += char
|
||||
return alpha_chars, numeric_chars
|
||||
|
||||
|
||||
|
||||
def merged_old_unprocessed_tickdata(all_csv_files, sp_char):
|
||||
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
|
||||
print("csv_files:", csv_files)
|
||||
merged_up_df = pd.DataFrame()
|
||||
dir = os.getcwd()
|
||||
fileNum_errors = 0
|
||||
|
||||
# 循环遍历每个csv文件
|
||||
for file in csv_files:
|
||||
try:
|
||||
df = pd.read_csv(file,
|
||||
header=0,
|
||||
encoding='gbk',
|
||||
low_memory= False,
|
||||
# skiprows=0,
|
||||
# parse_dates=['时间'] # 注意此处增加的排序,为了后面按时间排序
|
||||
)
|
||||
except:
|
||||
file_path = os.path.join(dir, file)
|
||||
fileNum_errors += 1
|
||||
with open(file_path, 'rb') as file:
|
||||
data = file.read()
|
||||
|
||||
# 使用chardet检测编码
|
||||
detected_encoding = chardet.detect(data)['encoding']
|
||||
# print("%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(file,detected_encoding,fileNum_errors))
|
||||
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors))
|
||||
|
||||
with open('output_error.txt', 'a') as f:
|
||||
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors), file = f)
|
||||
|
||||
|
||||
# 删除重复行
|
||||
df.drop_duplicates(inplace=True)
|
||||
# 将数据合并到新的DataFrame中
|
||||
merged_up_df = pd.concat([merged_up_df, df], ignore_index=True)
|
||||
|
||||
# 删除重复列
|
||||
merged_up_df.drop_duplicates(subset=merged_up_df.columns.tolist(), inplace=True)
|
||||
# 重置行索引
|
||||
merged_up_df.reset_index(inplace=True, drop=True)
|
||||
|
||||
# merged_up_df,alpha_chars,code_value = insert_main_contract(merged_up_df)
|
||||
# 打印提示信息
|
||||
# print("按年份未处理的CSV文件合并成功!")
|
||||
|
||||
return merged_up_df #,alpha_chars,code_value
|
||||
|
||||
Reference in New Issue
Block a user