增加交易策略、交易指标、量化库代码等文件夹
This commit is contained in:
@@ -0,0 +1,132 @@
|
||||
'''
|
||||
Author: zhoujie2104231 zhoujie@me.com
|
||||
# Date: 2024-02-25 17:51:46
|
||||
LastEditors: zhoujie2104231 zhoujie@me.com
|
||||
LastEditTime: 2024-03-07 22:48:22
|
||||
# 使用说明:使用前需要调整的相关参数如下
|
||||
# 1.确定python到csv文件夹下运行,
|
||||
# 2.统一代码的添加:主力连续为888,指数连续可以用999,次主力连续可以使用889,其他的可以不用添加统一代码,注释掉。
|
||||
# 3.文件夹下的文件名按照datetime进行排序修改
|
||||
# 4.data按照时间排序需要根据参数修改['业务日期','最后修改时间','最后修改毫秒'],如果前面文件名按照时间修改好了,不用修改
|
||||
# 5.使用gbk或者utf-8编译
|
||||
'''
|
||||
|
||||
import pandas as pd
|
||||
import os
|
||||
|
||||
# import datetime as dt
|
||||
|
||||
def split_alpha_numeric(string):
|
||||
"""
|
||||
Split a string into alphabetical and numerical characters.
|
||||
|
||||
Args:
|
||||
string: The string to split.
|
||||
|
||||
Returns:
|
||||
A tuple containing two strings, the first containing the alphabetical
|
||||
characters and the second containing the numerical characters.
|
||||
"""
|
||||
|
||||
alpha_chars = ""
|
||||
numeric_chars = ""
|
||||
|
||||
for char in string:
|
||||
if char.isalpha():
|
||||
alpha_chars += char
|
||||
elif char.isdigit():
|
||||
numeric_chars += char
|
||||
|
||||
return alpha_chars, numeric_chars
|
||||
|
||||
#第一中方法:
|
||||
# 获取当前目录下的所有csv文件
|
||||
all_csv_files = [file for file in os.listdir('.') if file.endswith('.csv')]
|
||||
|
||||
# csv需要筛选的文件名字符
|
||||
sp_char = '_2021'
|
||||
|
||||
# 获取当前目录下的所有文件名包含sp_char的csv文件
|
||||
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
|
||||
print("csv_files:", csv_files)
|
||||
|
||||
# 另一种遍历方式
|
||||
# folder_path = "D:/data_transfer/ag888"
|
||||
|
||||
# name_chr = "202309"
|
||||
# csv_files = []
|
||||
|
||||
# for root, dirs, files in os.walk(folder_path):
|
||||
# for file in files:
|
||||
# if file.endswith('.csv'):
|
||||
# # 获取文件名(不包含扩展名)
|
||||
# filename = os.path.splitext(file)[0]
|
||||
# match_file = re.search(r'(?<=^.{7}).{6}(?=.{2})',filename)
|
||||
# try:
|
||||
# if match_file.group() == name_chr:#
|
||||
# full_filename = filename + ".csv"
|
||||
# csv_files.append(full_filename)
|
||||
# else:
|
||||
# #print("文件夹中有csv文件,但没有文件名含%s的csv文件"%(name_chr))
|
||||
# pass
|
||||
# except AttributeError:
|
||||
# continue
|
||||
# else:
|
||||
# #print("文件夹中没有csv文件")
|
||||
# pass
|
||||
|
||||
|
||||
# 将当前的数据按照文件名进行排序,生成list文件
|
||||
#csv_files.sort(key=lambda x: int(x.split('.')[0]))
|
||||
|
||||
# 创建新的DataFrame来存储合并后的数据
|
||||
merged_df = pd.DataFrame()
|
||||
|
||||
# 循环遍历每个csv文件
|
||||
for file in csv_files:
|
||||
# 读取csv文件,并使用第一行为列标题,编译不通过可以改为gbk
|
||||
df = pd.read_csv(file, header=0, encoding='gbk')
|
||||
|
||||
# 删除重复行
|
||||
df.drop_duplicates(inplace=True)
|
||||
|
||||
# 将数据合并到新的DataFrame中
|
||||
merged_df = pd.concat([merged_df, df], ignore_index=True)
|
||||
|
||||
# 删除重复列
|
||||
merged_df.drop_duplicates(subset=merged_df.columns.tolist(), inplace=True)
|
||||
|
||||
# 重置行索引
|
||||
merged_df.reset_index(inplace=True, drop=True)
|
||||
print("合约代码:", merged_df["合约代码"])
|
||||
|
||||
# 插入新的数据
|
||||
# code_value = csv_files[0].split
|
||||
# merged_df.insert(loc=1,column="统一代码", value="rb888")
|
||||
alpha_chars, numeric_chars = split_alpha_numeric(merged_df["合约代码"][0])
|
||||
print("Alphabetical characters:", alpha_chars)
|
||||
# print("Numerical characters:", numeric_chars[1])
|
||||
|
||||
# 添加主力连续的合约代码,主力连续为888,指数连续可以用999,次主力连续可以使用889,表头用“统一代码”
|
||||
code_value = alpha_chars + "888"
|
||||
print("code_value characters:", code_value)
|
||||
merged_df.insert(loc=1,column="统一代码", value=code_value)
|
||||
|
||||
|
||||
# 将合并后的数据保存到csv文件中
|
||||
folder_path = "合成tick数据2019-2021"
|
||||
if not os.path.exists(folder_path):
|
||||
os.mkdir('合成tick数据2019-2021')
|
||||
|
||||
# sorted_merged_df = merged_df.sort_values(by= ['业务日期','最后修改时间','最后修改毫秒'], ascending=[True, True, True])
|
||||
# sorted_merged_df.to_csv('./合成tick数据/%s.csv'%(code_value), index=False)
|
||||
|
||||
merged_df['时间'] = pd.to_datetime(merged_df['时间'])
|
||||
sorted_merged_df = merged_df.sort_values(by = ['时间'], ascending=True)
|
||||
sorted_merged_df.to_csv('./合成tick数据2019-2021/%s%s.csv'%(code_value,sp_char), index=False)
|
||||
del merged_df
|
||||
del sorted_merged_df
|
||||
#merged_df.to_csv('./合成tick数据/%s.csv'%(code_value), index=False) #数据按照时间排序,前面文件夹按照时间修改好了可以直接用这里
|
||||
|
||||
# 打印提示信息
|
||||
print("CSV文件合并成功!")
|
||||
@@ -0,0 +1,134 @@
|
||||
'''
|
||||
Author: zhoujie2104231 zhoujie@me.com
|
||||
# Date: 2024-02-25 17:51:46
|
||||
LastEditors: zhoujie2104231 zhoujie@me.com
|
||||
LastEditTime: 2024-03-17 16:59:35
|
||||
# 使用说明:使用前需要调整的相关参数如下
|
||||
# 1.确定python到csv文件夹下运行,
|
||||
# 2.统一代码的添加:主力连续为888,指数连续可以用999,次主力连续可以使用889,其他的可以不用添加统一代码,注释掉。
|
||||
# 3.文件夹下的文件名按照datetime进行排序修改
|
||||
# 4.data按照时间排序需要根据参数修改['业务日期','最后修改时间','最后修改毫秒'],如果前面文件名按照时间修改好了,不用修改
|
||||
# 5.使用gbk或者utf-8编译
|
||||
'''
|
||||
|
||||
import pandas as pd
|
||||
import os
|
||||
|
||||
# import datetime as dt
|
||||
|
||||
def split_alpha_numeric(string):
|
||||
"""
|
||||
Split a string into alphabetical and numerical characters.
|
||||
|
||||
Args:
|
||||
string: The string to split.
|
||||
|
||||
Returns:
|
||||
A tuple containing two strings, the first containing the alphabetical
|
||||
characters and the second containing the numerical characters.
|
||||
"""
|
||||
|
||||
alpha_chars = ""
|
||||
numeric_chars = ""
|
||||
|
||||
for char in string:
|
||||
if char.isalpha():
|
||||
alpha_chars += char
|
||||
elif char.isdigit():
|
||||
numeric_chars += char
|
||||
|
||||
return alpha_chars, numeric_chars
|
||||
|
||||
#第一中方法:
|
||||
# 获取当前目录下的所有csv文件
|
||||
all_csv_files = [file for file in os.listdir('.') if file.endswith('.csv')]
|
||||
|
||||
# csv需要筛选的文件名字符
|
||||
sp_char = '_2023'
|
||||
|
||||
# 获取当前目录下的所有文件名包含sp_char的csv文件
|
||||
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
|
||||
print("csv_files:", csv_files)
|
||||
|
||||
# 另一种遍历方式
|
||||
# folder_path = "D:/data_transfer/ag888"
|
||||
|
||||
# name_chr = "202309"
|
||||
# csv_files = []
|
||||
|
||||
# for root, dirs, files in os.walk(folder_path):
|
||||
# for file in files:
|
||||
# if file.endswith('.csv'):
|
||||
# # 获取文件名(不包含扩展名)
|
||||
# filename = os.path.splitext(file)[0]
|
||||
# match_file = re.search(r'(?<=^.{7}).{6}(?=.{2})',filename)
|
||||
# try:
|
||||
# if match_file.group() == name_chr:#
|
||||
# full_filename = filename + ".csv"
|
||||
# csv_files.append(full_filename)
|
||||
# else:
|
||||
# #print("文件夹中有csv文件,但没有文件名含%s的csv文件"%(name_chr))
|
||||
# pass
|
||||
# except AttributeError:
|
||||
# continue
|
||||
# else:
|
||||
# #print("文件夹中没有csv文件")
|
||||
# pass
|
||||
|
||||
|
||||
# 将当前的数据按照文件名进行排序,生成list文件
|
||||
#csv_files.sort(key=lambda x: int(x.split('.')[0]))
|
||||
|
||||
# 创建新的DataFrame来存储合并后的数据
|
||||
merged_df = pd.DataFrame()
|
||||
|
||||
# 循环遍历每个csv文件
|
||||
for file in csv_files:
|
||||
# 读取csv文件,并使用第一行为列标题,编译不通过可以改为gbk
|
||||
df = pd.read_csv(file, header=0, encoding='gbk')
|
||||
|
||||
# 删除重复行
|
||||
df.drop_duplicates(inplace=True)
|
||||
|
||||
# 将数据合并到新的DataFrame中
|
||||
merged_df = pd.concat([merged_df, df], ignore_index=True)
|
||||
|
||||
# 删除重复列
|
||||
merged_df.drop_duplicates(subset=merged_df.columns.tolist(), inplace=True)
|
||||
|
||||
# 重置行索引
|
||||
merged_df.reset_index(inplace=True, drop=True)
|
||||
print("合约代码:", merged_df["合约代码"])
|
||||
|
||||
# 插入新的数据
|
||||
# code_value = csv_files[0].split
|
||||
# merged_df.insert(loc=1,column="统一代码", value="rb888")
|
||||
alpha_chars, numeric_chars = split_alpha_numeric(merged_df["合约代码"][0])
|
||||
print("Alphabetical characters:", alpha_chars)
|
||||
# print("Numerical characters:", numeric_chars[1])
|
||||
|
||||
# 添加主力连续的合约代码,主力连续为888,指数连续可以用999,次主力连续可以使用889,表头用“统一代码”
|
||||
code_value = alpha_chars + "888"
|
||||
print("code_value characters:", code_value)
|
||||
merged_df.insert(loc=1,column="统一代码", value=code_value)
|
||||
|
||||
|
||||
# 将合并后的数据保存到csv文件中
|
||||
folder_path = "合成tick数据2022-2023"
|
||||
if not os.path.exists(folder_path):
|
||||
os.mkdir('合成tick数据2022-2023')
|
||||
|
||||
sorted_merged_df = merged_df.sort_values(by= ['业务日期','最后修改时间','最后修改毫秒'], ascending=[True, True, True])
|
||||
sorted_merged_df.to_csv('./合成tick数据2022-2023/%s%s.csv'%(code_value,sp_char), index=False)
|
||||
|
||||
del merged_df
|
||||
del sorted_merged_df
|
||||
|
||||
# merged_df['时间'] = pd.to_datetime(merged_df['时间'])
|
||||
# sorted_merged_df = merged_df.sort_values(by = ['时间'], ascending=True)
|
||||
# sorted_merged_df.to_csv('./合成tick数据/%s.csv'%(code_value), index=False)
|
||||
|
||||
#merged_df.to_csv('./合成tick数据/%s.csv'%(code_value), index=False) #数据按照时间排序,前面文件夹按照时间修改好了可以直接用这里
|
||||
|
||||
# 打印提示信息
|
||||
print("CSV文件合并成功!")
|
||||
Reference in New Issue
Block a user