增加交易策略、交易指标、量化库代码等文件夹

This commit is contained in:
Win_home
2025-04-27 15:54:09 +08:00
parent ca3b209096
commit f57150dae8
589 changed files with 854346 additions and 1757 deletions

View File

@@ -0,0 +1,95 @@
import os
from chardet.universaldetector import UniversalDetector
import chardet
def get_filelist(path):
"""
获取路径下所有csv文件的路径列表
"""
Filelist = []
for home, dirs, files in os.walk(path):
for filename in files:
if ".csv" in filename:
Filelist.append(os.path.join(home, filename))
return Filelist
def read_file(file):
"""
逐个读取文件的内容
"""
with open(file, 'rb') as f:
return f.read()
def get_encode_info(file):
"""
逐个读取文件的编码方式
"""
with open(file, 'rb') as f:
# data = f.read()
# detected_encoding = chardet.detect(data)['encoding']
detector = UniversalDetector()
for line in f.readlines():
detector.feed(line)
if detector.done:
break
detector.close()
# return detected_encoding
return detector.result['encoding']
# return detected_encoding
def convert_encode2gbk(file, original_encode, des_encode):
"""
将文件的编码方式转换为gbk并写入原先的文件中。
"""
file_content = read_file(file)
file_decode = file_content.decode(original_encode, 'ignore')
file_encode = file_decode.encode(des_encode)
with open(file, 'wb') as f:
f.write(file_encode)
def read_and_convert(path):
"""
读取文件并转换
"""
Filelist = get_filelist(path=path)
fileNum= 0
for filename in Filelist:
try:
file_content = read_file(filename)
print("filename:", filename)
encode_info = get_encode_info(filename)
print("encode_info", encode_info)
if encode_info != 'gbk':
fileNum +=1
convert_encode2gbk(filename, encode_info, 'gbk')
print('成功转换 %s 个文件 %s '%(fileNum,filename))
except BaseException:
print(filename,'存在问题,请检查!')
def recheck_again(path):
"""
再次判断文件是否为gbk
"""
print('---------------------以下文件仍存在问题---------------------')
Filelist = get_filelist(path)
for filename in Filelist:
encode_info_ch = get_encode_info(filename)
if encode_info_ch != 'gbk':
print(filename,'的编码方式是:',encode_info_ch)
print('--------------------------检查结束--------------------------')
# if __name__ == "__main__":
# """
# 输入文件路径
# """
# path = r"D:\data"
# read_and_convert(path)
# recheck_again(path)
# print('转换结束!')

View File

@@ -0,0 +1,515 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "2d85dda4",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"from file_format_conversion import file_format_conversion\n",
"from ffc import read_and_convert, recheck_again"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe51b707",
"metadata": {},
"outputs": [],
"source": [
"new_directory = \"D:/data_all/doing\" # \"E:/data/大商所/test\"\n",
"os.chdir(new_directory) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d2694b45",
"metadata": {},
"outputs": [],
"source": [
"read_and_convert(new_directory)\n",
"recheck_again(new_directory)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f6ba7f98",
"metadata": {},
"outputs": [],
"source": [
"# 文件格式检查并转换替换\n",
"for root, dirs, files in os.walk('.'):\n",
" if len(dirs) > 0:\n",
" for dir in dirs:\n",
" # 获取二级子文件夹中的所有 CSV 文件\n",
" all_csv_files = [os.path.join(dir, file) for file in os.listdir(dir) if file.endswith('.csv')]\n",
"\n",
" sp_old_chars = ['_2019', '_2020', '_2021']\n",
" for sp_old_char in sp_old_chars:\n",
" csv_old_files = [sp_file for sp_file in all_csv_files if sp_old_char in sp_file]\n",
" if len(csv_old_files) > 0:\n",
" old_df, old_code_value = file_format_conversion(csv_old_files, sp_old_char)\n",
"\n",
" # sp_new_chars = ['_2022', '_2023']\n",
" # for sp_new_char in sp_new_chars:\n",
" # csv_new_files = [sp_file for sp_file in all_csv_files if sp_new_char in sp_file]\n",
" # if len(csv_new_files) > 0:\n",
" # new_df, new_code_value = file_format_conversion(csv_new_files, sp_new_char)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d9c211c7",
"metadata": {},
"outputs": [],
"source": [
"import chardet\n",
"csv_file = \"D:/data_all/doing/bu主力连续/bu主力连续_20190530.csv\"\n",
"with open(csv_file, 'rb') as f:\n",
" data = f.read()\n",
"\n",
"detected_encoding = chardet.detect(data)['encoding']\n",
"print(\"当前文件编码格式:\", detected_encoding)\n",
"\n",
"if detected_encoding and detected_encoding != 'gbk':\n",
" print(\"当前文件不为gbk格式:\", csv_file)\n",
" print(\"当前文件编码格式:\", detected_encoding)\n",
" # convert_csv_to_gbk(csv_file,detected_encoding)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5cf73aa",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os\n",
"\n",
"# 读取 ISO-8859-1 编码的 CSV 文件\n",
"df = pd.read_csv('D:/data_all/doing/bu主力连续/bu主力连续_20190530.csv', encoding='iso-8859-1', error_bad_lines= False)\n",
"\n",
"# # 将数据框转换为 GBK 编码\n",
"# df = df.to_csv('D:/data_all/doing/bu主力连续/bu主力连续_20190530_bak.csv', index=False, encoding='gbk')\n",
"\n",
"# # 替换原始 CSV 文件\n",
"# os.replace('D:/data_all/doing/bu主力连续/bu主力连续_20190530_bak.csv', 'D:/data_all/doing/bu主力连续/bu主力连续_20190530.csv')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d81f4d0",
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "714d5286",
"metadata": {},
"outputs": [],
"source": [
"df = df.to_csv('D:/data_all/doing/bu主力连续/bu主力连续_20190530_bak.csv', index=False, encoding='gbk')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d4ff2c6",
"metadata": {},
"outputs": [],
"source": [
"f.close"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ebc8f7ab",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"csv_file = \"D:/data_all/doing/bu主力连续/bu主力连续_20190530.csv\"\n",
"csv_df = pd.read_csv(csv_file,encoding=\"ISO-8859-1\")#detected_encoding"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1beee390",
"metadata": {},
"outputs": [],
"source": [
"import codecs\n",
" \n",
"def convert_encoding(input_str, input_encoding='iso-8859-1', output_encoding='gbk'):\n",
" return codecs.encode(input_str, output_encoding, input_encoding)\n",
" \n",
"input_str = csv_df\n",
"converted_str = convert_encoding(input_str, 'iso-8859-1', 'gbk')\n",
"print(converted_str)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8d36cfc",
"metadata": {},
"outputs": [],
"source": [
"import csv\n",
"import codecs\n",
"from csv import reader\n",
"from csv import writer\n",
" \n",
"# 指定原始文件和目标文件路径\n",
"input_file_path = 'D:/data_all/doing/bu主力连续/bu主力连续_20190530.csv'\n",
"output_file_path = 'D:/data_all/doing/bu主力连续/bu主力连续_20190530.csv'\n",
" \n",
"# 打开原始文件和目标文件\n",
"with open(input_file_path, 'r', encoding='iso-8859-1') as input_file, \\\n",
" codecs.open(output_file_path, 'w', 'gbk') as output_file:\n",
" # 创建读取器和写入器\n",
" input_reader = reader(input_file)\n",
" output_writer = writer(output_file)\n",
" \n",
" # 读取并写入数据\n",
" for row in input_reader:\n",
" output_writer.writerow(row)\n",
" \n",
"print(f'文件编码从ISO-8859-1转换为GBK已保存为 {output_file_path}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7c814155",
"metadata": {},
"outputs": [],
"source": [
"import csv\n",
"import codecs\n",
"import os\n",
" \n",
"input_file_name = 'D:/data_all/doing/bu主力连续/bu主力连续_20190530.csv'\n",
"output_file_name = 'D:/data_all/doing/bu主力连续/bu主力连续_20190530_bak.csv'\n",
" \n",
"# 打开原始ISO-8859-1编码的CSV文件\n",
"with codecs.open(input_file_name, 'r', 'iso-8859-1') as input_file:\n",
" reader = csv.reader(input_file)\n",
" # 打开目标GBK编码的CSV文件进行写入\n",
" with open(output_file_name, 'w', newline='', encoding='utf-8') as output_file:\n",
" writer = csv.writer(output_file)\n",
" for row in reader:\n",
" writer.writerow(row)\n",
" \n",
"# 删除原始文件\n",
"os.remove(input_file_name)\n",
"# 重命名新文件为原始文件名\n",
"os.rename(output_file_name, input_file_name)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fb75d9a0",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(\"D:/data_all/doing/bu主力连续/bu主力连续_20190530.csv\",encoding=\"gbk\")\n",
"\n",
"# df.to_csv(\"D:/data_all/doing/bu主力连续/bu主力连续_20190530_bak.csv\", encoding=\"gbk\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b9687682",
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4e300950",
"metadata": {},
"outputs": [],
"source": [
"print(csv_file)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ddf98c52",
"metadata": {},
"outputs": [],
"source": [
"os.remove(csv_file)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "364add14",
"metadata": {},
"outputs": [],
"source": [
"import csv\n",
"import codecs\n",
" \n",
"# # 输入输出文件路径\n",
"# input_file_path = 'utf8_file.csv'\n",
"# output_file_path = 'gbk_file.csv'\n",
" \n",
"# 打开UTF-8编码的CSV文件进行读取\n",
"with open(csv_file, 'r', encoding= detected_encoding) as input_file:\n",
" reader = csv.reader(csv_file)\n",
" os.remove(csv_file)\n",
" # 使用codecs打开GBK编码的CSV文件进行写入\n",
" with codecs.open(csv_file, 'w', encoding='gbk') as output_file:\n",
" writer = csv.writer(csv_file)\n",
" for row in reader:\n",
" writer.writerow(row)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f6f546c6",
"metadata": {},
"outputs": [],
"source": [
"\n",
"csv_df.to_csv(csv_file, index=False, encoding='gbk')"
]
},
{
"cell_type": "markdown",
"id": "a028b88e",
"metadata": {},
"source": [
"# 列错读取错误文件\n",
"当前读取文件读取错误: bu主力连续\\bu主力连续_20190530.csv\n",
"当前读取文件读取错误: bu主力连续\\bu主力连续_20190701.csv"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "200715a8",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"df = pd.read_csv(\n",
" \"D:/data_all/doing/bu主力连续/bu主力连续_20190530.csv\", \n",
" header=0,\n",
" # usecols=[ 1, 2, 3, 7, 12, 13, 14, 15],\n",
" # names=[\n",
" # \"合约代码\",\n",
" # \"时间\",\n",
" # \"最新\",\n",
" # \"成交量\",\n",
" # \"买一价\",\n",
" # \"卖一价\",\n",
" # \"买一量\",\n",
" # \"卖一量\",\n",
" # ],\n",
" encoding='ISO-8859-1',#ISO-8859-1\n",
" # skiprows=0,\n",
" # parse_dates=['时间']\n",
" )\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a8de54b6",
"metadata": {},
"outputs": [],
"source": [
"from chardet.universaldetector import UniversalDetector\n",
"import chardet\n",
"with open(\"D:/data_all/doing/bu主力连续/bu主力连续_20190530.csv\", 'rb') as f:\n",
" data = f.read()\n",
" detected_encoding = chardet.detect(data)['encoding']\n",
"print(detected_encoding)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "d874e66b",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd \n",
"import chardet \n",
"# 读取CSV文件 \n",
"df = pd.read_csv('D:/data_all/doing/bu主力连续/bu主力连续_20190530.csv',encoding=\"ISO-8859-1\", error_bad_lines=False, warn_bad_lines=True) \n"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "55c69d0a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CSV文件字符编码为: ISO-8859-1\n"
]
}
],
"source": [
"# 检测CSV文件字符编码 \n",
"with open('D:/data_all/doing/bu主力连续/bu主力连续_20190530.csv', 'rb') as f: \n",
" result = chardet.detect(f.read()) \n",
" encoding = result['encoding'] \n",
" print('CSV文件字符编码为:', encoding) \n",
" # 转换CSV文件编码格式 \n"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "bee6800a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CSV文件已转换为GBK编码格式\n"
]
}
],
"source": [
"if encoding != 'GBK': \n",
" df.to_csv('D:/data_all/doing/bu主力连续/bu主力连续_20190530_gbk.csv', encoding='utf-8', index=False) \n",
" print('CSV文件已转换为GBK编码格式') \n",
"else: print('CSV文件无需转换')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3c61930c",
"metadata": {},
"outputs": [],
"source": [
"#'gbk' codec can't decode byte 0xc3 in position 189076: illegal multibyte sequence\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1b22d988",
"metadata": {},
"outputs": [],
"source": [
"df = df.sort_values(by = ['datetime'], ascending=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cc14352a",
"metadata": {},
"outputs": [],
"source": [
"df['datetime'] = sorted(df['datetime'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2dc49f0c",
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "befec0c4",
"metadata": {},
"outputs": [],
"source": [
"import chardet"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6397fffb",
"metadata": {},
"outputs": [],
"source": [
"# 假设file_path是你要读取的文件路径\n",
"with open(\"D:/data_all/doing/bu主力连续/bu主力连续_20190530.csv\", 'rb') as file:\n",
" data = file.read()\n",
" \n",
"# 使用chardet检测编码\n",
"detected_encoding = chardet.detect(data)['encoding']\n",
" \n",
"# # 如果检测到的编码不是gbk可以尝试转换编码后再读取\n",
"# if detected_encoding and detected_encoding != 'gbk':\n",
"# with open(file_path, 'rb') as file:\n",
"# data = file.read().decode(detected_encoding)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c2433c60",
"metadata": {},
"outputs": [],
"source": [
"print(detected_encoding)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,66 @@
import pandas as pd
import os
# from datetime import time as s_time
# from datetime import datetime
import chardet
import csv
# has_common_keys(commodity_day_dict, commodity_night_dict,financial_time_dict)
# import chardet
# # 假设file_path是你要读取的文件路径
# with open(file_path, 'rb') as file:
# data = file.read()
# # 使用chardet检测编码
# detected_encoding = chardet.detect(data)['encoding']
# # 如果检测到的编码不是gbk可以尝试转换编码后再读取
# if detected_encoding and detected_encoding != 'gbk':
# with open(file_path, 'rb') as file:
# data = file.read().decode(detected_encoding)
def file_format_conversion(all_csv_files, sp_char):
# 获取当前目录下的所有文件名包含sp_char的csv文件
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
print("csv_files:", csv_files)
merged_df = pd.DataFrame()
dir = os.getcwd()
# 循环遍历每个csv文件
for csv_file in csv_files:
# file_path = os.path.join(dir, file)
# 读取csv文件并使用第一行为列标题编译不通过可以改为gbk
with open(csv_file, 'rb') as f:
data = f.read()
detected_encoding = chardet.detect(data)['encoding']
if detected_encoding and detected_encoding != 'gbk':
print("当前文件不为gbk格式:", csv_file)
convert_csv_to_gbk(csv_file,detected_encoding)
# with open(file_path, 'rb') as file:
# data = file.read().decode(detected_encoding)
# 定义一个函数来处理单个CSV文件
def convert_csv_to_gbk(csv_file,encoding_type):
# 读取CSV文件以UTF-8格式
with open(csv_file, 'r', encoding=encoding_type) as f:
reader = csv.reader(f)
rows = list(reader)
# 将读取的内容写入新的CSV文件以GBK格式
with open(csv_file, 'w', newline='', encoding='gbk') as f:
writer = csv.writer(f)
writer.writerows(rows)
# 删除原始CSV文件
os.remove(csv_file)
# 将转换后的文件重命名为原始文件名
new_file = csv_file.replace('.csv', '_gbk.csv')
os.rename(new_file, csv_file)
print("当前文件已经转换为gbk格式:", csv_file)

View File

@@ -0,0 +1,351 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "2d85dda4",
"metadata": {},
"outputs": [],
"source": [
"'''\n",
"Author: zhoujie2104231 zhoujie@me.com\n",
"Date: 2024-04-07 19:26:52\n",
"LastEditors: zhoujie2104231 zhoujie@me.com\n",
"LastEditTime: 2024-04-07 20:56:21\n",
"FilePath: \"/Gitee_Code/trading_strategy/SS_Code\\SF08\\使用文档\\数据转换最终版/merged_by_year.ipynb\"\n",
"Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE\n",
"'''\n",
"import os\n",
"import pandas as pd\n",
"from merged_tickdata import merged_old_tickdata, merged_new_tickdata, all_dict\n",
"# from merged_tickdata_tmp import merged_old_tickdata, merged_new_tickdata, all_dict"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "fe51b707",
"metadata": {},
"outputs": [],
"source": [
"new_directory = \"D:/data_all/doing\" # \"E:/data/大商所/test\"\n",
"os.chdir(new_directory) "
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "3356d8ff",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"csv_files: ['bu主力连续\\\\bu主力连续_20190703.csv', 'bu主力连续\\\\bu主力连续_20190528.csv', 'bu主力连续\\\\bu主力连续_20190529.csv', 'bu主力连续\\\\bu主力连续_20190530.csv', 'bu主力连续\\\\bu主力连续_20190531.csv', 'bu主力连续\\\\bu主力连续_20190603.csv', 'bu主力连续\\\\bu主力连续_20190604.csv', 'bu主力连续\\\\bu主力连续_20190605.csv', 'bu主力连续\\\\bu主力连续_20190606.csv', 'bu主力连续\\\\bu主力连续_20190610.csv', 'bu主力连续\\\\bu主力连续_20190611.csv', 'bu主力连续\\\\bu主力连续_20190612.csv', 'bu主力连续\\\\bu主力连续_20190613.csv', 'bu主力连续\\\\bu主力连续_20190614.csv', 'bu主力连续\\\\bu主力连续_20190617.csv', 'bu主力连续\\\\bu主力连续_20190618.csv', 'bu主力连续\\\\bu主力连续_20190619.csv', 'bu主力连续\\\\bu主力连续_20190620.csv', 'bu主力连续\\\\bu主力连续_20190621.csv', 'bu主力连续\\\\bu主力连续_20190624.csv', 'bu主力连续\\\\bu主力连续_20190625.csv', 'bu主力连续\\\\bu主力连续_20190626.csv', 'bu主力连续\\\\bu主力连续_20190627.csv', 'bu主力连续\\\\bu主力连续_20190628.csv', 'bu主力连续\\\\bu主力连续_20190701.csv', 'bu主力连续\\\\bu主力连续_20190702.csv']\n",
"当前读取文件读取错误: <_io.BufferedReader name='D:\\\\data_all\\\\doing\\\\bu主力连续\\\\bu主力连续_20190530.csv'>\n",
"当前读取文件正确解码格式 ISO-8859-1\n",
"当前读取文件读取错误: <_io.BufferedReader name='D:\\\\data_all\\\\doing\\\\bu主力连续\\\\bu主力连续_20190701.csv'>\n",
"当前读取文件正确解码格式 ISO-8859-1\n",
"code_value characters: bu888\n",
"按照夜盘截止交易时间为23:00筛选商品期货品种\n",
"bu888_2019数据生成成功!\n",
"bu888_2019.CSV文件合并成功\n"
]
}
],
"source": [
"for root, dirs, files in os.walk('.'):\n",
" if len(dirs) > 0:\n",
" for dir in dirs:\n",
" # 获取二级子文件夹中的所有 CSV 文件\n",
" all_csv_files = [os.path.join(dir, file) for file in os.listdir(dir) if file.endswith('.csv')]\n",
"\n",
" sp_old_chars = ['_2019', '_2020', '_2021']\n",
" for sp_old_char in sp_old_chars:\n",
" csv_old_files = [sp_file for sp_file in all_csv_files if sp_old_char in sp_file]\n",
" if len(csv_old_files) > 0:\n",
" old_df, old_code_value = merged_old_tickdata(csv_old_files, sp_old_char)\n",
" folder_path = str('D:/data_merged/上期所/%s'%(old_code_value))\n",
"\n",
" if not os.path.exists(folder_path):\n",
" os.makedirs(folder_path)\n",
" \n",
" old_df.to_csv('%s/%s%s.csv'%(folder_path,old_code_value,sp_old_char), index=False)\n",
" print(\"%s%s.CSV文件合并成功\"%(old_code_value,sp_old_char))\n",
"\n",
" sp_new_chars = ['_2022', '_2023']\n",
" for sp_new_char in sp_new_chars:\n",
" csv_new_files = [sp_file for sp_file in all_csv_files if sp_new_char in sp_file]\n",
" if len(csv_new_files) > 0:\n",
" new_df, new_code_value = merged_new_tickdata(csv_new_files, sp_new_char)\n",
" new_df.head()\n",
" folder_path = str('D:/data_merged/上期所/%s'%(new_code_value))\n",
"\n",
" if not os.path.exists(folder_path):\n",
" os.makedirs(folder_path)\n",
" \n",
" new_df.to_csv('%s/%s%s.csv'%(folder_path,new_code_value,sp_new_char), index=False)\n",
" print(\"%s%s.CSV文件合并成功\"%(new_code_value,sp_new_char))"
]
},
{
"cell_type": "markdown",
"id": "a028b88e",
"metadata": {},
"source": [
"# 列错读取错误文件\n",
"当前读取文件读取错误: bu主力连续\\bu主力连续_20190530.csv\n",
"当前读取文件读取错误: bu主力连续\\bu主力连续_20190701.csv"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "200715a8",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"df = pd.read_csv(\n",
" \"D:/data_merged/上期所/bu888/bu888_2019.csv\", \n",
" header=0,\n",
" # usecols=[ 1, 2, 3, 7, 12, 13, 14, 15],\n",
" # names=[\n",
" # \"合约代码\",\n",
" # \"时间\",\n",
" # \"最新\",\n",
" # \"成交量\",\n",
" # \"买一价\",\n",
" # \"卖一价\",\n",
" # \"买一量\",\n",
" # \"卖一量\",\n",
" # ],\n",
" encoding='gbk',#ISO-8859-1\n",
" # skiprows=0,\n",
" # parse_dates=['时间']\n",
" )\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "3c61930c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>main_contract</th>\n",
" <th>symbol</th>\n",
" <th>datetime</th>\n",
" <th>lastprice</th>\n",
" <th>volume</th>\n",
" <th>bid_p</th>\n",
" <th>ask_p</th>\n",
" <th>bid_v</th>\n",
" <th>ask_v</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>bu888</td>\n",
" <td>bu1912</td>\n",
" <td>2019-05-27 21:00:00.500</td>\n",
" <td>3252.0</td>\n",
" <td>734</td>\n",
" <td>3252.0</td>\n",
" <td>3256.0</td>\n",
" <td>288</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>bu888</td>\n",
" <td>bu1912</td>\n",
" <td>2019-05-27 21:00:01.000</td>\n",
" <td>3250.0</td>\n",
" <td>3240</td>\n",
" <td>3250.0</td>\n",
" <td>3252.0</td>\n",
" <td>43</td>\n",
" <td>38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>bu888</td>\n",
" <td>bu1912</td>\n",
" <td>2019-05-27 21:00:01.500</td>\n",
" <td>3250.0</td>\n",
" <td>2226</td>\n",
" <td>3250.0</td>\n",
" <td>3252.0</td>\n",
" <td>15</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>bu888</td>\n",
" <td>bu1912</td>\n",
" <td>2019-05-27 21:00:02.000</td>\n",
" <td>3248.0</td>\n",
" <td>962</td>\n",
" <td>3246.0</td>\n",
" <td>3248.0</td>\n",
" <td>110</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>bu888</td>\n",
" <td>bu1912</td>\n",
" <td>2019-05-27 21:00:02.500</td>\n",
" <td>3248.0</td>\n",
" <td>1178</td>\n",
" <td>3248.0</td>\n",
" <td>3250.0</td>\n",
" <td>10</td>\n",
" <td>110</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" main_contract symbol datetime lastprice volume bid_p \\\n",
"0 bu888 bu1912 2019-05-27 21:00:00.500 3252.0 734 3252.0 \n",
"1 bu888 bu1912 2019-05-27 21:00:01.000 3250.0 3240 3250.0 \n",
"2 bu888 bu1912 2019-05-27 21:00:01.500 3250.0 2226 3250.0 \n",
"3 bu888 bu1912 2019-05-27 21:00:02.000 3248.0 962 3246.0 \n",
"4 bu888 bu1912 2019-05-27 21:00:02.500 3248.0 1178 3248.0 \n",
"\n",
" ask_p bid_v ask_v \n",
"0 3256.0 288 15 \n",
"1 3252.0 43 38 \n",
"2 3252.0 15 19 \n",
"3 3248.0 110 8 \n",
"4 3250.0 10 110 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#'gbk' codec can't decode byte 0xc3 in position 189076: illegal multibyte sequence\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1b22d988",
"metadata": {},
"outputs": [],
"source": [
"df = df.sort_values(by = ['datetime'], ascending=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cc14352a",
"metadata": {},
"outputs": [],
"source": [
"df['datetime'] = sorted(df['datetime'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2dc49f0c",
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "befec0c4",
"metadata": {},
"outputs": [],
"source": [
"import chardet"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6397fffb",
"metadata": {},
"outputs": [],
"source": [
"# 假设file_path是你要读取的文件路径\n",
"with open(\"D:/data_all/doing/bu主力连续/bu主力连续_20190530.csv\", 'rb') as file:\n",
" data = file.read()\n",
" \n",
"# 使用chardet检测编码\n",
"detected_encoding = chardet.detect(data)['encoding']\n",
" \n",
"# # 如果检测到的编码不是gbk可以尝试转换编码后再读取\n",
"# if detected_encoding and detected_encoding != 'gbk':\n",
"# with open(file_path, 'rb') as file:\n",
"# data = file.read().decode(detected_encoding)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c2433c60",
"metadata": {},
"outputs": [],
"source": [
"print(detected_encoding)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,335 @@
import pandas as pd
import os
from datetime import time as s_time
from datetime import datetime
import chardet
# 日盘商品期货交易品种
commodity_day_dict = {'bb': s_time(15,00), 'jd': s_time(15,00), 'lh': s_time(15,00), 'l': s_time(15,00), 'fb': s_time(15,00), 'ec': s_time(15,00),
'AP': s_time(15,00), 'CJ': s_time(15,00), 'JR': s_time(15,00), 'LR': s_time(15,00), 'RS': s_time(15,00), 'PK': s_time(15,00),
'PM': s_time(15,00), 'PX': s_time(15,00), 'RI': s_time(15,00), 'ao': s_time(15,00), 'br': s_time(15,00), 'wr': s_time(15,00),}
# 夜盘商品期货交易品种
commodity_night_dict = {'sc': s_time(2,30), 'bc': s_time(1,0), 'lu': s_time(23,0), 'nr': s_time(23,0),'au': s_time(2,30), 'ag': s_time(2,30),
'ss': s_time(1,0), 'sn': s_time(1,0), 'ni': s_time(1,0), 'pb': s_time(1,0),'zn': s_time(1,0), 'al': s_time(1,0), 'cu': s_time(1,0),
'ru': s_time(23,0), 'rb': s_time(23,0), 'hc': s_time(23,0), 'fu': s_time(23,0), 'bu': s_time(23,0), 'sp': s_time(23,0),
'PF': s_time(23,0), 'SR': s_time(23,0), 'CF': s_time(23,0), 'CY': s_time(23,0), 'RM': s_time(23,0), 'MA': s_time(23,0),
'TA': s_time(23,0), 'ZC': s_time(23,0), 'FG': s_time(23,0), 'OI': s_time(23,0), 'SA': s_time(23,0),
'p': s_time(23,0), 'j': s_time(23,0), 'jm': s_time(23,0), 'i': s_time(23,0), 'l': s_time(23,0), 'v': s_time(23,0),
'pp': s_time(23,0), 'eg': s_time(23,0), 'c': s_time(23,0), 'cs': s_time(23,0), 'y': s_time(23,0), 'm': s_time(23,0),
'a': s_time(23,0), 'b': s_time(23,0), 'rr': s_time(23,0), 'eb': s_time(23,0), 'pg': s_time(23,0)}
# 金融期货交易品种
financial_time_dict = {'IH': s_time(15,00), 'IF': s_time(15,00), 'IC': s_time(15,00), 'IM': s_time(15,00),'T': s_time(15,00), 'TS': s_time(15,00),
'TF': s_time(15,00), 'TL': s_time(15,00)}
# 所有已列入的筛选品种
all_dict = {k: v for d in [commodity_day_dict, commodity_night_dict, financial_time_dict] for k, v in d.items()}
# def has_common_keys(*dicts):
# keys_union = set().union(*dicts) # 计算所有字典键的并集
# keys_intersection = set().intersection(*dicts) # 计算所有字典键的交集
# return len(keys_intersection) > 0
# has_common_keys(commodity_day_dict, commodity_night_dict,financial_time_dict)
# import chardet
# # 假设file_path是你要读取的文件路径
# with open(file_path, 'rb') as file:
# data = file.read()
# # 使用chardet检测编码
# detected_encoding = chardet.detect(data)['encoding']
# # 如果检测到的编码不是gbk可以尝试转换编码后再读取
# if detected_encoding and detected_encoding != 'gbk':
# with open(file_path, 'rb') as file:
# data = file.read().decode(detected_encoding)
def split_alpha_numeric(string):
alpha_chars = ""
numeric_chars = ""
for char in string:
if char.isalpha():
alpha_chars += char
elif char.isdigit():
numeric_chars += char
return alpha_chars, numeric_chars
def find_files(all_csv_files):
all_csv_files = sorted(all_csv_files)
sp_old_chars = ['_2019','_2020','_2021']
sp_old_chars = sorted(sp_old_chars)
sp_new_chars = ['_2022','_2023']
sp_new_chars = sorted(sp_new_chars)
csv_old_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_old_chars)]
csv_new_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_new_chars)]
return csv_old_files, csv_new_files
def merged_old_tickdata(all_csv_files, sp_char):
merged_up_df = pd.DataFrame()
merged_up_df = merged_old_unprocessed_tickdata(all_csv_files, sp_char)
# 获取当前目录下的所有文件名包含sp_char的csv文件
# 添加主力连续的合约代码主力连续为888指数连续可以用999次主力连续可以使用889表头用“统一代码”
alpha_chars, numeric_chars = split_alpha_numeric(merged_up_df.loc[0,'合约代码'])
code_value = alpha_chars + "888"
print("code_value characters:", code_value)
merged_up_df.insert(loc=0,column="统一代码", value=code_value)
while alpha_chars not in all_dict.keys():
print("%s期货品种未列入所有筛选条件中!!!"%(code_value))
continue
# merged_df['时间'] = pd.to_datetime(merged_df['时间'])
merged_df =pd.DataFrame({'main_contract':merged_df['统一代码'],'symbol':merged_df['合约代码'],'datetime':merged_df['时间'],'lastprice':merged_df['最新'],'volume':merged_df['成交量'],
'bid_p':merged_df['买一价'],'ask_p':merged_df['卖一价'],'bid_v':merged_df['买一量'],'ask_v':merged_df['卖一量']})
merged_df['tmp_time'] = merged_df['datetime'].dt.strftime('%H:%M:%S.%f')
merged_df['time'] = merged_df['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time
del merged_df['tmp_time']
if alpha_chars in financial_time_dict.keys():
drop_index1 = pd.DataFrame().index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 0, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) | (merged_df['time'] < s_time(9, 30, 0, 000000))].index
drop_index4 = pd.DataFrame().index
print("按照中金所交易时间筛选金融期货品种")
# else:
elif alpha_chars in commodity_night_dict.keys():
if commodity_night_dict[alpha_chars] == s_time(23,00):
drop_index1 = merged_df.loc[(merged_df['time'] > s_time(10, 15, 0, 000000)) & (merged_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) & (merged_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = merged_df.loc[(merged_df['time'] > s_time(23, 0, 0, 000000)) | (merged_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为23:00筛选商品期货品种")
elif commodity_night_dict[alpha_chars] == s_time(1,00):
drop_index1 = merged_df.loc[(merged_df['time'] > s_time(10, 15, 0, 000000)) & (merged_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) & (merged_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = merged_df.loc[(merged_df['time'] > s_time(1, 0, 0, 000000)) & (merged_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为1:00筛选商品期货品种")
elif commodity_night_dict[alpha_chars] == s_time(2,30):
drop_index1 = merged_df.loc[(merged_df['time'] > s_time(10, 15, 0, 000000)) & (merged_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) & (merged_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = merged_df.loc[(merged_df['time'] > s_time(2, 30, 0, 000000)) & (merged_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为2:30筛选商品期货品种")
else:
print("夜盘截止交易时间未设置或者设置错误!!!")
elif alpha_chars in commodity_day_dict.keys():
drop_index1 = merged_df.loc[(merged_df['time'] > s_time(10, 15, 0, 000000)) & (merged_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) | (merged_df['time'] < s_time(9, 0, 0, 000000))].index
drop_index4 = pd.DataFrame().index
print("按照无夜盘筛选商品期货品种")
else:
print("%s期货品种未列入筛选条件中!!!"%(code_value))
# 清理不在交易时间段的数据
merged_df.drop(labels=drop_index1, axis=0, inplace=True)
merged_df.drop(drop_index2, axis=0, inplace=True)
merged_df.drop(drop_index3, axis=0, inplace=True)
merged_df.drop(drop_index4, axis=0, inplace=True)
del merged_df['time']
# sorted_merged_df = merged_df.sort_values(by = ['datetime'], ascending=True)
# merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])
merged_df['datetime'] = sorted(merged_df['datetime'])
print("%s%s数据生成成功!"%(code_value,sp_char))
return merged_df, code_value
def merged_new_tickdata(all_csv_files, sp_char):
# 获取当前目录下的所有文件名包含sp_char的csv文件
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
print("csv_files:", csv_files)
merged_df = pd.DataFrame()
dir = os.getcwd()
# 循环遍历每个csv文件
for file in csv_files:
# 读取csv文件并使用第一行为列标题编译不通过可以改为gbk
try:
df = pd.read_csv(
file,
header=0,
usecols=[0, 1, 4, 11, 20, 21, 22, 23, 24, 25, 43],
names=[
"交易日",
"合约代码",
"最新价",
"数量",
"最后修改时间",
"最后修改毫秒",
"申买价一",
"申买量一",
"申卖价一",
"申卖量一",
"业务日期",
],
encoding='gbk',
# skiprows=0,
parse_dates=['业务日期','最后修改时间','最后修改毫秒'])#注意此处增加的排序,为了后面按时间排序
except:
# 假设file_path是你要读取的文件路径
file_path = os.path.join(dir, file)
with open(file_path, 'rb') as file:
data = file.read()
# 使用chardet检测编码
detected_encoding = chardet.detect(data)['encoding']
print("当前读取文件读取错误:", file)
print("当前读取文件正确解码格式", detected_encoding)
# 删除重复行
df.drop_duplicates(inplace=True)
# 将数据合并到新的DataFrame中
merged_df = pd.concat([merged_df, df], ignore_index=True)
# 删除重复列
merged_df.drop_duplicates(subset = merged_df.columns.tolist(), inplace=True)
# 重置行索引
merged_df.reset_index(inplace=True, drop=True)
#print("合约代码:", merged_df["合约代码"])
# 插入新的数据
alpha_chars, numeric_chars = split_alpha_numeric(merged_df.loc[0,'合约代码'])
# print("Alphabetical characters:", alpha_chars)
# 添加主力连续的合约代码主力连续为888指数连续可以用999次主力连续可以使用889表头用“统一代码”
code_value = alpha_chars + "888"
print("code_value characters:", code_value)
merged_df.insert(loc=1, column="统一代码", value=code_value)
while alpha_chars not in all_dict.keys():
print("%s期货品种未列入所有筛选条件中!!!"%(code_value))
continue
#日期修正
#merged_df['业务日期'] = pd.to_datetime(merged_df['业务日期'])
merged_df['业务日期'] = merged_df['业务日期'].dt.strftime('%Y-%m-%d')
merged_df['datetime'] = merged_df['业务日期'] + ' '+merged_df['最后修改时间'].dt.time.astype(str) + '.' + merged_df['最后修改毫秒'].astype(str)
# 将'datetime' 列的数据类型更改为 datetime 格式如果数据转换少8个小时可以用timedelta处理
merged_df['datetime'] = pd.to_datetime(merged_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
#计算瞬时成交量
merged_df['volume'] = merged_df['数量'] - merged_df['数量'].shift(1)
merged_df['volume'] = merged_df['volume'].fillna(0)
merged_df =pd.DataFrame({'main_contract':merged_df['统一代码'],'symbol':merged_df['合约代码'],'datetime':merged_df['datetime'],'lastprice':merged_df['最新价'],'volume':merged_df['volume'],
'bid_p':merged_df['申买价一'],'ask_p':merged_df['申卖价一'],'bid_v':merged_df['申买量一'],'ask_v':merged_df['申卖量一']})
merged_df['tmp_time'] = merged_df['datetime'].dt.strftime('%H:%M:%S.%f')
merged_df['time'] = merged_df['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time
del merged_df['tmp_time']
if alpha_chars in financial_time_dict.keys():
drop_index1 = pd.DataFrame().index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 0, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) | (merged_df['time'] < s_time(9, 30, 0, 000000))].index
drop_index4 = pd.DataFrame().index
print("按照中金所交易时间筛选金融期货品种")
# else:
elif alpha_chars in commodity_night_dict.keys():
if commodity_night_dict[alpha_chars] == s_time(23,00):
drop_index1 = merged_df.loc[(merged_df['time'] > s_time(10, 15, 0, 000000)) & (merged_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) & (merged_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = merged_df.loc[(merged_df['time'] > s_time(23, 0, 0, 000000)) | (merged_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为23:00筛选商品期货品种")
elif commodity_night_dict[alpha_chars] == s_time(1,00):
drop_index1 = merged_df.loc[(merged_df['time'] > s_time(10, 15, 0, 000000)) & (merged_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) & (merged_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = merged_df.loc[(merged_df['time'] > s_time(1, 0, 0, 000000)) & (merged_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为1:00筛选商品期货品种")
elif commodity_night_dict[alpha_chars] == s_time(2,30):
drop_index1 = merged_df.loc[(merged_df['time'] > s_time(10, 15, 0, 000000)) & (merged_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) & (merged_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = merged_df.loc[(merged_df['time'] > s_time(2, 30, 0, 000000)) & (merged_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为2:30筛选商品期货品种")
else:
print("夜盘截止交易时间未设置或者设置错误!!!")
elif alpha_chars in commodity_day_dict.keys():
drop_index1 = merged_df.loc[(merged_df['time'] > s_time(10, 15, 0, 000000)) & (merged_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) | (merged_df['time'] < s_time(9, 0, 0, 000000))].index
drop_index4 = pd.DataFrame().index
print("按照无夜盘筛选商品期货品种")
else:
print("%s期货品种未列入筛选条件中!!!"%(code_value))
# 清理不在交易时间段的数据
merged_df.drop(labels=drop_index1, axis=0, inplace=True)
merged_df.drop(drop_index2, axis=0, inplace=True)
merged_df.drop(drop_index3, axis=0, inplace=True)
merged_df.drop(drop_index4, axis=0, inplace=True)
del merged_df['time']
# sorted_merged_df = merged_df.sort_values(by = ['datetime'], inplace=True)
merged_df['datetime'] = sorted(merged_df['datetime'])
print("%s%s数据生成成功!"%(code_value,sp_char))
return merged_df, code_value
def merged_old_unprocessed_tickdata(all_csv_files, sp_char):
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
print("csv_files:", csv_files)
merged_df = pd.DataFrame()
dir = os.getcwd()
# 循环遍历每个csv文件
for file in csv_files:
try:
# 读取csv文件并使用第一行为列标题编译不通过可以改为gbk
df = pd.read_csv(file, header=0, encoding='gbk')
except:
file_path = os.path.join(dir, file)
with open(file_path, 'rb') as file:
data = file.read()
# 使用chardet检测编码
detected_encoding = chardet.detect(data)['encoding']
print("当前读取文件读取错误:", file)
print("当前读取文件正确解码格式", detected_encoding)
# 删除重复行
df.drop_duplicates(inplace=True)
# 将数据合并到新的DataFrame中
merged_df = pd.concat([merged_df, df], ignore_index=True)
# 删除重复列
merged_df.drop_duplicates(subset=merged_df.columns.tolist(), inplace=True)
# 重置行索引
merged_df.reset_index(inplace=True, drop=True)
# 插入新的数据
alpha_chars, numeric_chars = split_alpha_numeric(merged_df.loc[0,'合约代码'])
# 添加主力连续的合约代码主力连续为888指数连续可以用999次主力连续可以使用889表头用“统一代码”
code_value = alpha_chars + "888"
print("code_value characters:", code_value)
merged_df.insert(loc=1,column="统一代码", value=code_value)
# 将合并后的数据保存到csv文件中
folder_path = "合成tick数据2019-2021"
if not os.path.exists(folder_path):
os.mkdir('合成tick数据2019-2021')
# sorted_merged_df = merged_df.sort_values(by= ['业务日期','最后修改时间','最后修改毫秒'], ascending=[True, True, True])
# sorted_merged_df.to_csv('./合成tick数据/%s.csv'%(code_value), index=False)
merged_df['时间'] = pd.to_datetime(merged_df['时间'])
sorted_merged_df = merged_df.sort_values(by = ['时间'], ascending=True)
sorted_merged_df.to_csv('./合成tick数据2019-2021/%s%s.csv'%(code_value,sp_char), index=False)
del merged_df
del sorted_merged_df
#merged_df.to_csv('./合成tick数据/%s.csv'%(code_value), index=False) #数据按照时间排序,前面文件夹按照时间修改好了可以直接用这里
# 打印提示信息
print("CSV文件合并成功")

View File

@@ -0,0 +1,281 @@
import pandas as pd
import os
from datetime import time as s_time
from datetime import datetime
# 日盘商品期货交易品种
commodity_day_dict = {'bb': s_time(15,00), 'jd': s_time(15,00), 'lh': s_time(15,00), 'l': s_time(15,00), 'fb': s_time(15,00),}
# 夜盘商品期货交易品种
commodity_night_dict = {'sc': s_time(2,30), 'bc': s_time(1,0), 'lu': s_time(23,0), 'nr': s_time(23,0),'au': s_time(2,30), 'ag': s_time(2,30),
'ss': s_time(1,0), 'sn': s_time(1,0), 'ni': s_time(1,0), 'pb': s_time(1,0),'zn': s_time(1,0), 'al': s_time(1,0), 'cu': s_time(1,0),
'ru': s_time(23,0), 'rb': s_time(23,0), 'hc': s_time(23,0), 'fu': s_time(23,0), 'bu': s_time(23,0), 'sp': s_time(23,0),
'PF': s_time(23,0), 'SR': s_time(23,0), 'CF': s_time(23,0), 'CY': s_time(23,0), 'RM': s_time(23,0), 'MA': s_time(23,0),
'TA': s_time(23,0), 'ZC': s_time(23,0), 'FG': s_time(23,0), 'OI': s_time(23,0), 'SA': s_time(23,0),
'p': s_time(23,0), 'j': s_time(23,0), 'jm': s_time(23,0), 'i': s_time(23,0), 'l': s_time(23,0), 'v': s_time(23,0),
'pp': s_time(23,0), 'eg': s_time(23,0), 'c': s_time(23,0), 'cs': s_time(23,0), 'y': s_time(23,0), 'm': s_time(23,0),
'a': s_time(23,0), 'b': s_time(23,0), 'rr': s_time(23,0), 'eb': s_time(23,0), 'pg': s_time(23,0)}
# 金融期货交易品种
financial_time_dict = {'IH': s_time(15,00), 'IF': s_time(15,00), 'IC': s_time(15,00), 'IM': s_time(15,00),'T': s_time(15,00), 'TS': s_time(15,00),
'TF': s_time(15,00), 'TL': s_time(15,00)}
# 所有已列入的筛选品种
all_dict = {k: v for d in [commodity_day_dict, commodity_night_dict, financial_time_dict] for k, v in d.items()}
# def has_common_keys(*dicts):
# keys_union = set().union(*dicts) # 计算所有字典键的并集
# keys_intersection = set().intersection(*dicts) # 计算所有字典键的交集
# return len(keys_intersection) > 0
# has_common_keys(commodity_day_dict, commodity_night_dict,financial_time_dict)
def split_alpha_numeric(string):
alpha_chars = ""
numeric_chars = ""
for char in string:
if char.isalpha():
alpha_chars += char
elif char.isdigit():
numeric_chars += char
return alpha_chars, numeric_chars
def find_files(all_csv_files):
all_csv_files = sorted(all_csv_files)
sp_old_chars = ['_2019','_2020','_2021']
sp_old_chars = sorted(sp_old_chars)
sp_new_chars = ['_2022','_2023']
sp_new_chars = sorted(sp_new_chars)
csv_old_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_old_chars)]
csv_new_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_new_chars)]
return csv_old_files, csv_new_files
def merged_old_tickdata(all_csv_files, sp_char):
# 获取当前目录下的所有文件名包含sp_char的csv文件
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
print("csv_files:", csv_files)
merged_df = pd.DataFrame()
# 循环遍历每个csv文件
for file in csv_files:
# 读取csv文件并使用第一行为列标题编译不通过可以改为gbk
df = pd.read_csv(
file,
header=0,
usecols=[ 1, 2, 3, 4, 8, 13, 14, 15, 16],
names=[
"统一代码",
"合约代码",
"时间",
"最新",
"成交量",
"买一价",
"卖一价",
"买一量",
"卖一量",
],
encoding='utf-8',
# skiprows=0,
parse_dates=['时间'])#注意此处增加的排序,为了后面按时间排序
# 删除重复行
df.drop_duplicates(inplace=True)
# 将数据合并到新的DataFrame中
merged_df = pd.concat([merged_df, df], ignore_index=True)
# 删除重复列
merged_df.drop_duplicates(subset = merged_df.columns.tolist(), inplace=True)
# 重置行索引
merged_df.reset_index(inplace=True, drop=True)
# print("合约代码:", merged_df["合约代码"])
# # 插入新的数据
alpha_chars, numeric_chars = split_alpha_numeric(merged_df.loc[0,'合约代码'])
# print("Alphabetical characters:", alpha_chars)
# 添加主力连续的合约代码主力连续为888指数连续可以用999次主力连续可以使用889表头用“统一代码”
code_value = alpha_chars + "888"
print("code_value characters:", code_value)
# merged_df.insert(loc=0,column="统一代码", value=code_value)
while alpha_chars not in all_dict.keys():
print("%s期货品种未列入所有筛选条件中!!!"%(code_value))
continue
# merged_df['时间'] = pd.to_datetime(merged_df['时间'])
merged_df =pd.DataFrame({'main_contract':merged_df['统一代码'],'symbol':merged_df['合约代码'],'datetime':merged_df['时间'],'lastprice':merged_df['最新'],'volume':merged_df['成交量'],
'bid_p':merged_df['买一价'],'ask_p':merged_df['卖一价'],'bid_v':merged_df['买一量'],'ask_v':merged_df['卖一量']})
merged_df['tmp_time'] = merged_df['datetime'].dt.strftime('%H:%M:%S.%f')
merged_df['time'] = merged_df['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time
del merged_df['tmp_time']
if alpha_chars in financial_time_dict.keys():
drop_index1 = pd.DataFrame().index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 0, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) | (merged_df['time'] < s_time(9, 30, 0, 000000))].index
drop_index4 = pd.DataFrame().index
print("按照中金所交易时间筛选金融期货品种")
# else:
elif alpha_chars in commodity_night_dict.keys():
if commodity_night_dict[alpha_chars] == s_time(23,00):
drop_index1 = merged_df.loc[(merged_df['time'] > s_time(10, 15, 0, 000000)) & (merged_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) & (merged_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = merged_df.loc[(merged_df['time'] > s_time(23, 0, 0, 000000)) | (merged_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为23:00筛选商品期货品种")
elif commodity_night_dict[alpha_chars] == s_time(1,00):
drop_index1 = merged_df.loc[(merged_df['time'] > s_time(10, 15, 0, 000000)) & (merged_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) & (merged_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = merged_df.loc[(merged_df['time'] > s_time(1, 0, 0, 000000)) & (merged_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为1:00筛选商品期货品种")
elif commodity_night_dict[alpha_chars] == s_time(2,30):
drop_index1 = merged_df.loc[(merged_df['time'] > s_time(10, 15, 0, 000000)) & (merged_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) & (merged_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = merged_df.loc[(merged_df['time'] > s_time(2, 30, 0, 000000)) & (merged_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为2:30筛选商品期货品种")
else:
print("夜盘截止交易时间未设置或者设置错误!!!")
elif alpha_chars in commodity_day_dict.keys():
drop_index1 = merged_df.loc[(merged_df['time'] > s_time(10, 15, 0, 000000)) & (merged_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) | (merged_df['time'] < s_time(9, 0, 0, 000000))].index
drop_index4 = pd.DataFrame().index
print("按照无夜盘筛选商品期货品种")
else:
print("%s期货品种未列入筛选条件中!!!"%(code_value))
# 清理不在交易时间段的数据
merged_df.drop(labels=drop_index1, axis=0, inplace=True)
merged_df.drop(drop_index2, axis=0, inplace=True)
merged_df.drop(drop_index3, axis=0, inplace=True)
merged_df.drop(drop_index4, axis=0, inplace=True)
del merged_df['time']
# sorted_merged_df = merged_df.sort_values(by = ['datetime'], ascending=True)
sorted(merged_df['datetime'])
print("%s%s数据生成成功!"%(code_value,sp_char))
return merged_df, code_value
def merged_new_tickdata(all_csv_files, sp_char):
# 获取当前目录下的所有文件名包含sp_char的csv文件
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
print("csv_files:", csv_files)
merged_df = pd.DataFrame()
# 循环遍历每个csv文件
for file in csv_files:
# 读取csv文件并使用第一行为列标题编译不通过可以改为gbk
df = pd.read_csv(
file,
header=0,
usecols=[0, 1, 2, 5, 12, 21, 22, 23, 24, 25, 26, 44],
names=[
"交易日",
"统一代码",
"合约代码",
"最新价",
"数量",
"最后修改时间",
"最后修改毫秒",
"申买价一",
"申买量一",
"申卖价一",
"申卖量一",
"业务日期",
],
encoding='utf-8',
# skiprows=0,
parse_dates=['业务日期','最后修改时间','最后修改毫秒'])#注意此处增加的排序,为了后面按时间排序
# 删除重复行
df.drop_duplicates(inplace=True)
# 将数据合并到新的DataFrame中
merged_df = pd.concat([merged_df, df], ignore_index=True)
# 删除重复列
merged_df.drop_duplicates(subset = merged_df.columns.tolist(), inplace=True)
# 重置行索引
merged_df.reset_index(inplace=True, drop=True)
#print("合约代码:", merged_df["合约代码"])
# 插入新的数据
alpha_chars, numeric_chars = split_alpha_numeric(merged_df.loc[0,'合约代码'])
# print("Alphabetical characters:", alpha_chars)
# 添加主力连续的合约代码主力连续为888指数连续可以用999次主力连续可以使用889表头用“统一代码”
code_value = alpha_chars + "888"
print("code_value characters:", code_value)
# merged_df.insert(loc=1, column="统一代码", value=code_value)
while alpha_chars not in all_dict.keys():
print("%s期货品种未列入所有筛选条件中!!!"%(code_value))
continue
#日期修正
#merged_df['业务日期'] = pd.to_datetime(merged_df['业务日期'])
merged_df['业务日期'] = merged_df['业务日期'].dt.strftime('%Y-%m-%d')
merged_df['datetime'] = merged_df['业务日期'] + ' '+merged_df['最后修改时间'].dt.time.astype(str) + '.' + merged_df['最后修改毫秒'].astype(str)
# 将'datetime' 列的数据类型更改为 datetime 格式如果数据转换少8个小时可以用timedelta处理
merged_df['datetime'] = pd.to_datetime(merged_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
#计算瞬时成交量
merged_df['volume'] = merged_df['数量'] - merged_df['数量'].shift(1)
merged_df['volume'] = merged_df['volume'].fillna(0)
merged_df =pd.DataFrame({'main_contract':merged_df['统一代码'],'symbol':merged_df['合约代码'],'datetime':merged_df['datetime'],'lastprice':merged_df['最新价'],'volume':merged_df['volume'],
'bid_p':merged_df['申买价一'],'ask_p':merged_df['申卖价一'],'bid_v':merged_df['申买量一'],'ask_v':merged_df['申卖量一']})
merged_df['tmp_time'] = merged_df['datetime'].dt.strftime('%H:%M:%S.%f')
merged_df['time'] = merged_df['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time
del merged_df['tmp_time']
if alpha_chars in financial_time_dict.keys():
drop_index1 = pd.DataFrame().index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 0, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) | (merged_df['time'] < s_time(9, 30, 0, 000000))].index
drop_index4 = pd.DataFrame().index
print("按照中金所交易时间筛选金融期货品种")
# else:
elif alpha_chars in commodity_night_dict.keys():
if commodity_night_dict[alpha_chars] == s_time(23,00):
drop_index1 = merged_df.loc[(merged_df['time'] > s_time(10, 15, 0, 000000)) & (merged_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) & (merged_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = merged_df.loc[(merged_df['time'] > s_time(23, 0, 0, 000000)) | (merged_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为23:00筛选商品期货品种")
elif commodity_night_dict[alpha_chars] == s_time(1,00):
drop_index1 = merged_df.loc[(merged_df['time'] > s_time(10, 15, 0, 000000)) & (merged_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) & (merged_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = merged_df.loc[(merged_df['time'] > s_time(1, 0, 0, 000000)) & (merged_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为1:00筛选商品期货品种")
elif commodity_night_dict[alpha_chars] == s_time(2,30):
drop_index1 = merged_df.loc[(merged_df['time'] > s_time(10, 15, 0, 000000)) & (merged_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) & (merged_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = merged_df.loc[(merged_df['time'] > s_time(2, 30, 0, 000000)) & (merged_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为2:30筛选商品期货品种")
else:
print("夜盘截止交易时间未设置或者设置错误!!!")
elif alpha_chars in commodity_day_dict.keys():
drop_index1 = merged_df.loc[(merged_df['time'] > s_time(10, 15, 0, 000000)) & (merged_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = merged_df.loc[(merged_df['time'] > s_time(11, 30, 0, 000000)) & (merged_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = merged_df.loc[(merged_df['time'] > s_time(15, 0, 0, 000000)) | (merged_df['time'] < s_time(9, 0, 0, 000000))].index
drop_index4 = pd.DataFrame().index
print("按照无夜盘筛选商品期货品种")
else:
print("%s期货品种未列入筛选条件中!!!"%(code_value))
# 清理不在交易时间段的数据
merged_df.drop(labels=drop_index1, axis=0, inplace=True)
merged_df.drop(drop_index2, axis=0, inplace=True)
merged_df.drop(drop_index3, axis=0, inplace=True)
merged_df.drop(drop_index4, axis=0, inplace=True)
del merged_df['time']
# sorted_merged_df = merged_df.sort_values(by = ['datetime'], inplace=True)
sorted(merged_df['datetime'])
print("%s%s数据生成成功!"%(code_value,sp_char))
return merged_df, code_value

View File

@@ -0,0 +1,454 @@
'''
以下是代码的详细说明:
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
1.
导入必要的模块和库:
backtrader 用于回测功能
datetime 用于处理日期和时间
GenericCSVData 用于从CSV文件加载数据
numpy 用于数值操作
time 用于时间相关操作
matplotlib.pyplot 用于绘图
2. 定义自定义手续费模板MyCommission
继承自bt.CommInfoBase
3.
定义自定义数据源类 GenericCSV_SIG
继承自 GenericCSVData并添加了两个额外的行'sig''delta'
定义了参数 'sig''delta'
4.
定义 MyStrategy_固定止损_跟踪止盈 类:
继承自 bt.Strategybacktrader的基础策略类
定义了两个参数trailing_stop_percent 和 fixed_stop_loss_percent
初始化策略并设置各种变量和指标
实现了 next 方法该方法在数据源的每个新的K线出现时被调用
根据当前K线数据更新跟踪止盈价格
实现了跟踪止盈出场和固定止损出场
根据信号处理多头和空头仓位
在策略执行过程中打印调试信息
5.
if __name__ == "__main__": 代码块:
使用 Cerebro 实例设置回测环境
使用 GenericCSV_SIG 数据源从CSV文件加载数据
将数据源和策略添加到 Cerebro 实例中
添加观察者和分析器以评估性能
设置初始资金和经纪人参数
运行回测并获取结果
打印回测报告,包括收益率、回撤、胜率和交易统计数据
使用 matplotlib 绘制回测结果
使用说明:使用前需要调整的相关参数如下
1.确定python到csv文件夹下运行,修改csv文件为需要运行的csv
2.MyStrategy_固定止损_跟踪止盈可以修改跟踪百分比和移动周期均线。
3.__init__函数中可以修改lost手数
4.next函数一、修改清仓时间参数每个品种不一致二、window_size和window_size_delta的周期暂为10三、修改“开多组合”和“开空组合”
5.__main__函数:一、修改回测时间段fromdate和todate二、根据交易平中设置初始资金、手续费单手保证金合约倍数
'''
# 需要进一步了解windows_size的计算规则日线
import backtrader as bt
from datetime import datetime
from datetime import time as s_time
from backtrader.feeds import GenericCSVData
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
import os
# 导入表头解决图标中中文显示问题
from pylab import mpl
手续费汇总=0
class GenericCSV_SIG(GenericCSVData):
# 从基类继承,添加一个 'sig'delta
lines = ('sig','delta')
# 添加参数为从基类继承的参数
params = (('sig',6),('delta', 8))
class MyStrategy_固定止损_跟踪止盈(bt.Strategy):
params = (
('trailing_stop_percent', 0.02), # 跟踪止盈百分比
('fixed_stop_loss_percent', 0.01), # 固定止损百分比
# ('sma1_period', 60), # 移动平均线周期
# ('sma2_period',120),
)
def __init__(self):
self.Lots=1 #下单手数
self.signal = self.datas[0].sig # 使用sig字段作为策略的信号字段
self.delta= self.datas[0].delta
# 获取数据序列别名列表
line_aliases = self.datas[0].getlinealiases()
self.pos=0
print(line_aliases)
self.high=self.datas[0].high
self.low=self.datas[0].low
self.closes=self.datas[0].close
self.open=self.datas[0].open
self.trailing_stop_percent = self.params.trailing_stop_percent
self.short_trailing_stop_price = 0
self.long_trailing_stop_price = 0
self.fixed_stop_loss_percent = self.params.fixed_stop_loss_percent
self.sl_long_price=0
self.sl_shor_price=0
#240884432
self.out_long=0
self.out_short=0
self.rinei_ma=[]
self.rinei_mean=0
self.datetime_list= []
self.high_list = []
self.low_list = []
self.close_list = []
self.opens_list = []
self.deltas_list = []
self.delta_cumsum=[]
self.barN = 0
# self.sma1 = bt.indicators.SimpleMovingAverage(
# self.data, period=self.params.sma1_period
# )
# self.sma2 = bt.indicators.SimpleMovingAverage(
# self.data, period=self.params.sma2_period
# )
self.df = pd.DataFrame(columns=['datetime', 'high', 'low', 'close', 'open', 'delta', 'delta_cumsum'])
self.trader_df=pd.DataFrame(columns=['open', 'high', 'low', 'close', 'volume', 'openInterest','delta'])
def log(self, txt, dt=None):
'''可选,构建策略打印日志的函数:可用于打印订单记录或交易记录等'''
dt = dt or self.datas[0].datetime.date(0)
print('%s, %s' % (dt.isoformat(), txt))
def notify_order(self, order):
# 未被处理的订单
if order.status in [order.Submitted, order.Accepted]:
return
# 已经处理的订单
if order.status in [order.Completed, order.Canceled, order.Margin]:
global 手续费汇总
if order.isbuy():
手续费汇总 +=order.executed.comm
self.log(
'BUY EXECUTED, 订单编号:%.0f,成交价格: %.2f, 手续费滑点:%.2f, 成交量: %.2f, 品种: %s,手续费汇总:%.2f' %
(order.ref, # 订单编号
order.executed.price, # 成交价
order.executed.comm, # 佣金
order.executed.size, # 成交量
order.data._name,# 品种名称
手续费汇总))
else: # Sell
手续费汇总 +=order.executed.comm
self.log('SELL EXECUTED, 订单编号:%.0f,成交价格: %.2f, 手续费滑点:%.2f, 成交量: %.2f, 品种: %s,手续费汇总:%.2f' %
(order.ref,
order.executed.price,
order.executed.comm,
order.executed.size,
order.data._name,
手续费汇总))
def next(self):
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
#bar线计数初始化
self.barN += 1
position = self.getposition(self.datas[0]).size
#时间轴
dt = bt.num2date(self.data.datetime[0])
#更新跟踪止损价格
def 每日重置数据():
# 获取当前时间
current_time = dt.time()
#print(current_time)
# 设置清仓操作的时间范围114:55到15:00
clearing_time1_start = s_time(14, 55)
clearing_time1_end = s_time(15, 0)
# 设置清仓操作的时间范围200:55到01:00
clearing_time2_start = s_time(22, 55)
clearing_time2_end = s_time(23, 0)
# 创建一个标志变量
clearing_executed = False
if clearing_time1_start <= current_time <= clearing_time1_end and not clearing_executed :
clearing_executed = True # 设置标志变量为已执行
self.rinei_ma=[]
self.delta_cumsum=[]
self.deltas_list=[]
elif clearing_time2_start <= current_time <= clearing_time2_end and not clearing_executed :
clearing_executed = True # 设置标志变量为已执行
self.rinei_ma=[]
self.delta_cumsum=[]
self.deltas_list=[]
# 如果不在任何时间范围内,可以执行其他操作
else:
self.rinei_ma.append(self.closes[0])
self.rinei_mean = np.mean(self.rinei_ma)
#self.delta_cumsum=[]
#self.deltas_list=[]
#print('rinei_ma',self.rinei_ma)
clearing_executed = False
pass
return clearing_executed
run_kg=每日重置数据()
#过滤成交量为0或小于0
if self.data.volume[0] <= 0 :
return
#print(f'volume,{self.data.volume[0]}')
if self.long_trailing_stop_price >0 and self.pos>0:
#print('datetime+sig: ',dt,'旧多头出线',self.long_trailing_stop_price,'low',self.low[0])
self.long_trailing_stop_price = self.low[0] if self.long_trailing_stop_price<self.low[0] else self.long_trailing_stop_price
#print('datetime+sig: ',dt,'多头出线',self.long_trailing_stop_price)
if self.short_trailing_stop_price >0 and self.pos<0:
#print('datetime+sig: ',dt,'旧空头出线',self.short_trailing_stop_price,'high',self.high[0])
self.short_trailing_stop_price = self.high[0] if self.high[0] <self.short_trailing_stop_price else self.short_trailing_stop_price
#print('datetime+sig: ',dt,'空头出线',self.short_trailing_stop_price)
self.out_long=self.long_trailing_stop_price * (1 - self.trailing_stop_percent)
self.out_short=self.short_trailing_stop_price*(1 + self.trailing_stop_percent)
#print('datetime+sig: ',dt,'空头出线',self.out_short)
#print('datetime+sig: ',dt,'多头出线',self.out_long)
# 跟踪出场
if self.out_long >0:
if self.low[0] < self.out_long and self.pos>0 and self.sl_long_price>0 and self.low[0]>self.sl_long_price:
print('--多头止盈出场datetime+sig: ',dt,'Trailing stop triggered: Closing position','TR',self.out_long,'low', self.low[0])
self.close(data=self.data, price=self.data.close[0],size=self.Lots, exectype=bt.Order.Market)
self.long_trailing_stop_price = 0
self.sl_long_price=0
self.out_long=0
self.pos = 0
if self.out_short>0:
if self.high[0] > self.out_short and self.pos<0 and self.sl_shor_price>0 and self.high[0]<self.sl_shor_price:
print('--空头止盈出场datetime+sig: ',dt,'Trailing stop triggered: Closing position: ','TR',self.out_short,'high', self.high[0])
self.close(data=self.data, price=self.data.close[0],size=self.Lots, exectype=bt.Order.Market)
self.short_trailing_stop_price = 0
self.sl_shor_price=0
self.out_shor=0
self.pos = 0
# 固定止损
self.fixed_stop_loss_L = self.sl_long_price * (1 - self.fixed_stop_loss_percent)
if self.sl_long_price>0 and self.fixed_stop_loss_L>0 and self.pos > 0 and self.closes[0] < self.fixed_stop_loss_L:
print('--多头止损datetime+sig: ', dt, 'Fixed stop loss triggered: Closing position', 'SL', self.fixed_stop_loss_L, 'close', self.closes[0])
self.close(data=self.data, price=self.data.close[0],size=self.Lots, exectype=bt.Order.Market)
self.long_trailing_stop_price = 0
self.sl_long_price=0
self.out_long = 0
self.pos = 0
self.fixed_stop_loss_S = self.sl_shor_price * (1 + self.fixed_stop_loss_percent)
if self.sl_shor_price>0 and self.fixed_stop_loss_S>0 and self.pos < 0 and self.closes[0] > self.fixed_stop_loss_S:
print('--空头止损datetime+sig: ', dt, 'Fixed stop loss triggered: Closing position', 'SL', self.fixed_stop_loss_S, 'close', self.closes[0])
self.close(data=self.data, price=self.data.close[0], size=self.Lots,exectype=bt.Order.Market)
self.short_trailing_stop_price = 0
self.sl_shor_price=0
self.out_short = 0
self.pos = 0
# 更新最高价和最低价的列表
self.datetime_list.append(dt)
self.high_list.append(self.data.high[0])
self.low_list.append(self.data.low[0])
self.close_list.append(self.data.close[0])
self.opens_list.append(self.data.open[0])
self.deltas_list.append(self.data.delta[0])
# 计算delta累计
self.delta_cumsum.append(sum(self.deltas_list))
# 将当前行数据添加到 DataFrame
# new_row = {
# 'datetime': dt,
# 'high': self.data.high[0],
# 'low': self.data.low[0],
# 'close': self.data.close[0],
# 'open': self.data.open[0],
# 'delta': self.data.delta[0],
# 'delta_cumsum': sum(self.deltas_list)
# }
# # 使用pandas.concat代替append
# self.df = pd.concat([self.df, pd.DataFrame([new_row])], ignore_index=True)
# # 检查文件是否存在
# csv_file_path = f"output.csv"
# if os.path.exists(csv_file_path):
# # 仅保存最后一行数据
# self.df.tail(1).to_csv(csv_file_path, mode='a', header=False, index=False)
# else:
# # 创建新文件并保存整个DataFrame
# self.df.to_csv(csv_file_path, index=False)
#
if run_kg==False : #
# # 构建delta的正数和负数
# positive_nums = [x for x in self.data.delta if x > 0]
# negative_nums = [x for x in self.data.delta if x < 0]
# positive_sums = [x for x in self.delta_cumsum if x > 0]
# negative_sums = [x for x in self.delta_cumsum if x < 0]
# #
# # 开多组合= self.rinei_mean>0 and self.closes[0]>self.rinei_mean and self.signal[0] >1 and self.data.delta[0]>1000 and self.delta_cumsum[-1]>1500
# # 开空组合= self.rinei_mean>0 and self.closes[0]<self.rinei_mean and self.signal[0] <-1 and self.data.delta[0]<-1000 and self.delta_cumsum[-1]<-1500
# 开多组合= self.rinei_mean>0 and self.closes[0]>self.rinei_mean and self.signal[0] > 1 and self.data.delta[0]>max(self.data.delta[-60:-1]) #and self.delta_cumsum[-1] > np.max(self.delta_cumsum[-61:-2]) #np.mean(self.data.delta_cumsum[-61:-2])
# 开空组合= self.rinei_mean>0 and self.closes[0]<self.rinei_mean and self.signal[0] <-1 and self.data.delta[0]<min(self.data.delta[-60:-1]) #and self.delta_cumsum[-1] < np.min(self.delta_cumsum[-61:-2]) #np.mean(self.data.delta_cumsum[-61:-2])
#print(self.delta_cumsum)
开多组合= self.rinei_mean>0 and self.closes[0]>self.rinei_mean and self.signal[0] >1 and self.data.delta[0]>1500 and self.delta_cumsum[-1]>2000
开空组合= self.rinei_mean>0 and self.closes[0]<self.rinei_mean and self.signal[0] <-1 and self.data.delta[0]<-1500 and self.delta_cumsum[-1]<-2000
平多条件=self.pos<0 and self.signal[0] >1
平空条件=self.pos>0 and self.signal[0] <-1
if self.pos !=1 : #
if 平多条件:
#print('datetime+sig: ', dt, 'Fixed stop loss triggered: Closing position', 'SL', self.fixed_stop_loss_S, 'close', self.closes[0])
self.close(data=self.data, price=self.data.close[0], exectype=bt.Order.Market)
self.short_trailing_stop_price = 0
self.sl_shor_price=0
self.out_short = 0
self.pos = 0
if 开多组合 : #
self.buy(data=self.data, price=self.data.close[0], size=1, exectype=bt.Order.Market)
self.pos=1
self.long_trailing_stop_price=self.low[0]
self.sl_long_price=self.data.open[0]
#print('datetime+sig: ',dt,' sig: ',self.signal[0],'保存多头价格: ',self.long_trailing_stop_price)
if self.pos !=-1 : #
if 平空条件:
#print('datetime+sig: ', dt, 'Fixed stop loss triggered: Closing position', 'SL', self.fixed_stop_loss_L, 'close', self.closes[0])
self.close(data=self.data, price=self.data.close[0], exectype=bt.Order.Market)
self.long_trailing_stop_price = 0
self.sl_long_price=0
self.out_long = 0
self.pos = 0
if 开空组合: #
self.sell(data=self.data, price=self.data.close[0], size=1, exectype=bt.Order.Market)
self.pos=-1
self.short_trailing_stop_price=self.high[0]
self.sl_shor_price=self.data.open[0]
#print('datetime+sig: ',dt,' sig: ',self.signal[0],'保存空头价格: ',self.short_trailing_stop_price)
if __name__ == "__main__":
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
# 创建Cerebro实例
cerebro = bt.Cerebro()
#数据
csv_file='./tick生成的OF数据-own/back_ofdata_dj.csv' #
# 从CSV文件加载数据
data = GenericCSV_SIG(
dataname=csv_file,
fromdate=datetime(2023,1,1),
todate=datetime(2023,12,29),
timeframe=bt.TimeFrame.Minutes,
nullvalue=0.0,
dtformat='%Y-%m-%d %H:%M:%S',
datetime=0,
high=3,
low=4,
open=2,
close=1,
volume=5,
openinterest=None,
sig=6,
delta=8
)
# 添加数据到Cerebro实例
cerebro.adddata(data)
# 添加策略到Cerebro实例
cerebro.addstrategy(MyStrategy_固定止损_跟踪止盈)
# 添加观察者和分析器到Cerebro实例
#cerebro.addobserver(bt.observers.BuySell)
cerebro.addobserver(bt.observers.Value)
cerebro.addanalyzer(bt.analyzers.Returns, _name='returns')
cerebro.addanalyzer(bt.analyzers.DrawDown, _name='drawdown')
cerebro.addanalyzer(bt.analyzers.TradeAnalyzer, _name='trades')
初始资金=10000
cerebro.broker.setcash(初始资金) # 设置初始资金
#手续费,单手保证金,合约倍数
cerebro.broker.setcommission(commission=14, margin=5000.0,mult=10)#回测参数
# 运行回测
result = cerebro.run()
# 获取策略分析器中的结果
analyzer = result[0].analyzers
total_trades = analyzer.trades.get_analysis()['total']['total']
winning_trades = analyzer.trades.get_analysis()['won']['total']
# 获取TradeAnalyzer分析器的结果
trade_analyzer_result = analyzer.trades.get_analysis()
# 获取总收益额
total_profit = trade_analyzer_result.pnl.net.total
if total_trades > 0:
win_rate = winning_trades / total_trades
else:
win_rate = 0.0
# 打印回测报告
print('回测报告:')
print('期初权益', 初始资金)
print('期末权益', 初始资金+round(total_profit))
print('盈亏额', round(total_profit))
print('最大回撤率,', round(analyzer.drawdown.get_analysis()['drawdown'],2),'%')
print('胜率,', round(win_rate*100,2),'%')
print("交易次数,", total_trades)
print("盈利次数,", winning_trades)
print("亏损次数,", total_trades - winning_trades)
print('总手续费+滑点,', 手续费汇总)
手续费汇总=0
# 设置中文显示
mpl.rcParams["font.sans-serif"] = ["SimHei"]
mpl.rcParams["axes.unicode_minus"] = False
# 保存回测图像文件
plot = cerebro.plot()[0][0]
plot_filename = os.path.splitext(os.path.basename(csv_file))[0] +'ss'+ '_plot.png'
plot_path = os.path.join('部分回测报告', plot_filename)
plot.savefig(plot_path)
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!

View File

@@ -0,0 +1,309 @@
'''逐行解释代码:
1.导入所需的模块和库,包括 time、table来自 matplotlib.pyplot、pandas、numpy、numba 和 operator。
2.定义了一个名为 process 的函数,用于处理买卖盘的字典数据。
3.定义了一个名为 data 的函数,用于读取并处理 tick 数据,生成分钟级别的 bar 数据。
4.定义了一个名为 orderflow_df_new 的函数,用于处理 tick 数据和分钟级别的 bar 数据,生成订单流数据。
5.定义了一个名为 GetOrderFlow_dj 的函数,用于计算订单流的指标(堆积)。
6.定义了一个名为 back_data 的函数,用于保存回测数据。
7.在 if __name__ == "__main__": 下,首先调用 data() 函数获取 tick 数据和分钟级别的 bar 数据。
然后调用 orderflow_df_new() 函数,传入 tick 数据和 bar 数据,生成订单流数据 ofdata。
打印输出 ofdata。
8.调用 back_data() 函数,将订单流数据保存为回测数据。
打印输出 "done",表示程序执行完毕。
总体而言,该代码的功能是从 tick 数据中生成分钟级别的 bar 数据,然后根据 bar 数据计算订单流,并将订单流数据保存为回测数据。
使用说明:使用前需要调整的相关参数如下
1.确定python到csv文件夹下运行,修改csv文件为需要运行的csv
2.dataload函数一、确定datetime函数和其他key值是否和现在的一致不一致的修改二、resample函数中rule的取样周期进行修改默认为5T即5分钟。
3.back_data函数和main中需要注意修改相应的时间节点将开盘的初始数据设置为0
4.如果生成的时间和实际时间相差8小时可以调用timedelta函数修改
'''
# GetOrderFlow_dj函数需要进一步了解先不修改
import time
from matplotlib.pyplot import table
from datetime import timedelta
import pandas as pd
import numpy as np
from numba import *
from numba import cuda
import operator
import os
# 对于含时区的datetime可以通过timedelta来修改数据
#from datetime import datetime, timedelta
#os.environ['tz'] = 'Asia/ShangHai'
#time.tzset()
def process(bidDict,askDict):
bidDictResult,askDictResult = {},{}
sList = sorted(set(list(bidDict.keys()) + list(askDict.keys())))
#print('bidDict:',list(bidDict.keys()))
#print('askDict:',list(askDict.keys()))
#print('sList:',sList)
#240884432
for s in sList:
if s in bidDict:
bidDictResult[s] = bidDict[s]
else:
bidDictResult[s] = 0
if s in askDict:
askDictResult[s] = askDict[s]
else:
askDictResult[s] = 0
return bidDictResult,askDictResult
def dataload(data):
#日期修正
data['业务日期'] = data['业务日期'].dt.strftime('%Y-%m-%d')
data['datetime'] = data['业务日期'] + ' '+data['最后修改时间'].dt.time.astype(str) + '.' + data['最后修改毫秒'].astype(str)
# 将 'datetime' 列的数据类型更改为 datetime 格式如果数据转换少8个小时可以用timedelta处理
data['datetime'] = pd.to_datetime(data['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
# 如果需要,可以将 datetime 列格式化为字符串
#data['formatted_date'] = data['datetime'].dt.strftime('%Y-%m-%d %H:%M:%S.%f')
#计算瞬时成交量
data['volume'] = data['数量'] - data['数量'].shift(1)
data['volume'] = data['volume'].fillna(0)
#整理好要用的tick数据元素,具体按照数据的表头进行修改
tickdata =pd.DataFrame({'datetime':data['datetime'],'symbol':data['合约代码'],'lastprice':data['最新价'],
'volume':data['volume'],'bid_p':data['申买价一'],'bid_v':data['申买量一'],'ask_p':data['申卖价一'],'ask_v':data['申卖量一']})
#tickdata['datetime'] = pd.to_datetime(tickdata['datetime'])
tickdata['open'] = tickdata['lastprice']
tickdata['high'] = tickdata['lastprice']
tickdata['low'] = tickdata['lastprice']
tickdata['close'] = tickdata['lastprice']
tickdata['starttime'] = tickdata['datetime']
# # 找到满足条件的行的索引
# condition = tickdata['datetime'].dt.time == pd.to_datetime('22:59:59').time()
# indexes_to_update = tickdata.index[condition]
# # 遍历索引,将不一致的日期更新为上一行的日期
# for idx in indexes_to_update:
# if idx > 0:
# tickdata.at[idx, 'datetime'] = tickdata.at[idx - 1, 'datetime'].replace(hour=22, minute=59, second=59)
# 确保日期列按升序排序
tickdata.sort_values(by='datetime', inplace=True)
# 时序重采样 https://zhuanlan.zhihu.com/p/70353374
bardata = tickdata.resample(on = 'datetime',rule = '1T',label = 'right',closed = 'right').agg({'starttime':'first','symbol':'last','open':'first','high':'max','low':'min','close':'last','volume':'sum'}).reset_index(drop = False)
#240884432
bardata =bardata.dropna().reset_index(drop = True)
return tickdata,bardata
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
def orderflow_df_new(df_tick,df_min):
df_of=pd.DataFrame({})
t1 = time.time()
startArray = pd.to_datetime(df_min['starttime']).values
voluememin= df_min['volume'].values
highs=df_min['high'].values
lows=df_min['low'].values
opens=df_min['open'].values
closes=df_min['close'].values
endArray = pd.to_datetime(df_min['datetime']).values
tTickArray = pd.to_datetime(df_tick['datetime']).values
bp1TickArray = df_tick['bid_p'].values
ap1TickArray = df_tick['ask_p'].values
lastTickArray = df_tick['lastprice'].values
volumeTickArray = df_tick['volume'].values
symbolarray = df_tick['symbol'].values
indexFinal = 0
for index,tEnd in enumerate(endArray):
start = startArray[index]
bidDict = {}
askDict = {}
bar_vol=voluememin[index]
bar_close=closes[index]
bar_open=opens[index]
bar_low=lows[index]
bar_high=highs[index]
bar_symbol=symbolarray[index]
dt=endArray[index]
for indexTick in range(indexFinal,len(df_tick)):
if tTickArray[indexTick] > tEnd:
break
elif (tTickArray[indexTick] >= start) & (tTickArray[indexTick] <= tEnd):
if indexTick==0:
Bp = round(bp1TickArray[indexTick],2)
Ap = round(ap1TickArray[indexTick],2)
else:
Bp = round(bp1TickArray[indexTick - 1],2)
Ap = round(ap1TickArray[indexTick - 1],2)
LastPrice = round(lastTickArray[indexTick],2)
Volume = volumeTickArray[indexTick]
if LastPrice >= Ap:
if LastPrice in askDict.keys():
askDict[LastPrice] += Volume
else:
askDict[LastPrice] = Volume
if LastPrice <= Bp:
if LastPrice in bidDict.keys():
bidDict[LastPrice] += Volume
else:
bidDict[LastPrice] = Volume
indexFinal = indexTick
bidDictResult,askDictResult = process(bidDict,askDict)
bidDictResult=dict(sorted(bidDictResult.items(),key=operator.itemgetter(0)))
askDictResult=dict(sorted(askDictResult.items(),key=operator.itemgetter(0)))
prinslist=list(bidDictResult.keys())
asklist=list(askDictResult.values())
bidlist=list(bidDictResult.values())
delta=(sum(askDictResult.values()) - sum(bidDictResult.values()))
df=pd.DataFrame({'price':pd.Series([prinslist]),'Ask':pd.Series([asklist]),'Bid':pd.Series([bidlist])})
df['symbol']=bar_symbol
df['datetime']=dt
df['delta']=str(delta)
df['close']=bar_close
df['open']=bar_open
df['high']=bar_high
df['low']=bar_low
df['volume']=bar_vol
# 过滤'volume'列小于等于0的行
df = df[df['volume'] > 0]
# 重新排序DataFrame按照'datetime'列进行升序排序
df = df.sort_values(by='datetime', ascending=True)
# 重新设置索引,以便索引能够正确对齐
df = df.reset_index(drop=True)
#df['ticktime']=tTickArray[indexTick]
df['dj']=GetOrderFlow_dj(df)
#print(df)
df_of = pd.concat([df_of, df], ignore_index=True)
print(time.time() - t1)
return df_of
def GetOrderFlow_dj(kData):
itemAskBG=['rgb(0,255,255)', 'rgb(255,0,255)', "rgb(255,182,193)"] # 买盘背景色
itemBidBG=['rgb(173,255,47)', 'rgb(255,127,80)', "rgb(32,178,170)"] # 卖盘背景色
Config={
'Value1':3,
'Value2':3,
'Value3':3,
'Value4':True,
}
aryData=kData
djcout=0
for index,row in aryData.iterrows():
kItem=aryData.iloc[index]
high=kItem['high']
low=kItem['low']
close=kItem['close']
open=kItem['open']
dtime=kItem['datetime']
price_s=kItem['price']
Ask_s=kItem['Ask']
Bid_s=kItem['Bid']
delta=kItem['delta']
price_s=price_s
Ask_s=Ask_s
Bid_s=Bid_s
gj=0
xq=0
gxx=0
xxx=0
for i in np.arange (0, len(price_s),1) :
duiji={
'price':0,
'time':0,
'longshort':0,
'cout':0,
'color':'blue'
}
if i==0 :
delta=delta
order= {
"Price":price_s[i],
"Bid":{ "Value":Bid_s[i]},
"Ask":{ "Value":Ask_s[i]}
}
if i>=0 and i<len(price_s)-1:
if (order["Bid"]["Value"]>Ask_s[i+1]*int(Config['Value1'])):
order["Bid"]["Color"]=itemAskBG[1]
gxx+=1
gj+=1
if gj>=int(Config['Value2']) and Config['Value4']==True:
duiji['price']=price_s[i]
duiji['time']=dtime
duiji['longshort']=-1
duiji['cout']=gj
duiji['color']='rgba(0,139,0,0.45)'#绿色
if float(duiji['price'])>0:
djcout+=-1
else :
gj=0
if i>=1 and i<len(price_s)-1:
if (order["Ask"]["Value"]>Bid_s[i-1]*int(Config['Value1'])):
xq+=1
xxx+=1
order["Ask"]["Color"]=itemBidBG[1]
if xq>=int(Config['Value2']) and Config['Value4']==True:
duiji['price']=price_s[i]
duiji['time']=dtime
duiji['longshort']=1
duiji['cout']=xq
duiji['color']='rgba(255,0,0,0.45)' #红色
if float(duiji['price'])>0:
djcout+=1
else :
xq=0
return djcout
def back_data(df):
# 创建新的DataFrame并填充需要的列
new_df = pd.DataFrame()
new_df['datetime'] = pd.to_datetime(df['datetime'], format='%Y/%m/%d %H:%M')
new_df['close'] = df['close']
new_df['open'] = df['open']
new_df['high'] = df['high']
new_df['low'] = df['low']
new_df['volume'] = df['volume']
new_df['sig'] = df['dj']
new_df['symbol'] = df['symbol']
new_df['delta'] = df['delta']
new_df.to_csv(f'./tick生成的OF数据/back_ofdata_dj.csv',index=False)
#new_df.to_csv(f'{sym}back_ofdata_dj.csv',index=False)
if __name__ == "__main__":
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
data=pd.read_csv('rb主力连续_20230103.csv',encoding='GBK',parse_dates=['业务日期','最后修改时间']) #
print(data)
tick,bar=dataload(data)
ofdata = orderflow_df_new(tick,bar)
print(ofdata)
#保存orderflow数据
# os.mkdir('./tick生成的OF数据')或者在to_csv中修改生成的文件名
folder_path = "tick生成的OF数据"
if not os.path.exists(folder_path):
os.mkdir('tick生成的OF数据')
ofdata.to_csv('./tick生成的OF数据/ofdata_dj.csv')
#保存回测数据
back_data(ofdata)
print('done')

View File

@@ -0,0 +1,309 @@
'''逐行解释代码:
1.导入所需的模块和库,包括 time、table来自 matplotlib.pyplot、pandas、numpy、numba 和 operator。
2.定义了一个名为 process 的函数,用于处理买卖盘的字典数据。
3.定义了一个名为 data 的函数,用于读取并处理 tick 数据,生成分钟级别的 bar 数据。
4.定义了一个名为 orderflow_df_new 的函数,用于处理 tick 数据和分钟级别的 bar 数据,生成订单流数据。
5.定义了一个名为 GetOrderFlow_dj 的函数,用于计算订单流的指标(堆积)。
6.定义了一个名为 back_data 的函数,用于保存回测数据。
7.在 if __name__ == "__main__": 下,首先调用 data() 函数获取 tick 数据和分钟级别的 bar 数据。
然后调用 orderflow_df_new() 函数,传入 tick 数据和 bar 数据,生成订单流数据 ofdata。
打印输出 ofdata。
8.调用 back_data() 函数,将订单流数据保存为回测数据。
打印输出 "done",表示程序执行完毕。
总体而言,该代码的功能是从 tick 数据中生成分钟级别的 bar 数据,然后根据 bar 数据计算订单流,并将订单流数据保存为回测数据。
使用说明:使用前需要调整的相关参数如下
1.确定python到csv文件夹下运行,修改csv文件为需要运行的csv
2.dataload函数一、确定datetime函数和其他key值是否和现在的一致不一致的修改二、resample函数中rule的取样周期进行修改默认为5T即5分钟。
3.back_data函数和main中需要注意修改相应的时间节点将开盘的初始数据设置为0
4.如果生成的时间和实际时间相差8小时可以调用timedelta函数修改
'''
# GetOrderFlow_dj函数需要进一步了解先不修改
import time
from matplotlib.pyplot import table
from datetime import timedelta
import pandas as pd
import numpy as np
from numba import *
from numba import cuda
import operator
import os
# 对于含时区的datetime可以通过timedelta来修改数据
#from datetime import datetime, timedelta
#os.environ['tz'] = 'Asia/ShangHai'
#time.tzset()
def process(bidDict,askDict):
bidDictResult,askDictResult = {},{}
sList = sorted(set(list(bidDict.keys()) + list(askDict.keys())))
#print('bidDict:',list(bidDict.keys()))
#print('askDict:',list(askDict.keys()))
#print('sList:',sList)
#240884432
for s in sList:
if s in bidDict:
bidDictResult[s] = bidDict[s]
else:
bidDictResult[s] = 0
if s in askDict:
askDictResult[s] = askDict[s]
else:
askDictResult[s] = 0
return bidDictResult,askDictResult
def dataload(data):
#日期修正
data['业务日期'] = data['业务日期'].dt.strftime('%Y-%m-%d')
data['datetime'] = data['业务日期'] + ' '+data['最后修改时间'].dt.time.astype(str) + '.' + data['最后修改毫秒'].astype(str)
# 将 'datetime' 列的数据类型更改为 datetime 格式如果数据转换少8个小时可以用timedelta处理
data['datetime'] = pd.to_datetime(data['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
# 如果需要,可以将 datetime 列格式化为字符串
#data['formatted_date'] = data['datetime'].dt.strftime('%Y-%m-%d %H:%M:%S.%f')
#计算瞬时成交量
data['volume'] = data['数量'] - data['数量'].shift(1)
data['volume'] = data['volume'].fillna(0)
#整理好要用的tick数据元素,具体按照数据的表头进行修改
tickdata =pd.DataFrame({'datetime':data['datetime'],'symbol':data['合约代码'],'lastprice':data['最新价'],
'volume':data['volume'],'bid_p':data['申买价一'],'bid_v':data['申买量一'],'ask_p':data['申卖价一'],'ask_v':data['申卖量一']})
#tickdata['datetime'] = pd.to_datetime(tickdata['datetime'])
tickdata['open'] = tickdata['lastprice']
tickdata['high'] = tickdata['lastprice']
tickdata['low'] = tickdata['lastprice']
tickdata['close'] = tickdata['lastprice']
tickdata['starttime'] = tickdata['datetime']
# # 找到满足条件的行的索引
# condition = tickdata['datetime'].dt.time == pd.to_datetime('22:59:59').time()
# indexes_to_update = tickdata.index[condition]
# # 遍历索引,将不一致的日期更新为上一行的日期
# for idx in indexes_to_update:
# if idx > 0:
# tickdata.at[idx, 'datetime'] = tickdata.at[idx - 1, 'datetime'].replace(hour=22, minute=59, second=59)
# 确保日期列按升序排序
tickdata.sort_values(by='datetime', inplace=True)
# 时序重采样 https://zhuanlan.zhihu.com/p/70353374
bardata = tickdata.resample(on = 'datetime',rule = '1T',label = 'right',closed = 'right').agg({'starttime':'first','symbol':'last','open':'first','high':'max','low':'min','close':'last','volume':'sum'}).reset_index(drop = False)
#240884432
bardata =bardata.dropna().reset_index(drop = True)
return tickdata,bardata
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
def orderflow_df_new(df_tick,df_min):
df_of=pd.DataFrame({})
t1 = time.time()
startArray = pd.to_datetime(df_min['starttime']).values
voluememin= df_min['volume'].values
highs=df_min['high'].values
lows=df_min['low'].values
opens=df_min['open'].values
closes=df_min['close'].values
endArray = pd.to_datetime(df_min['datetime']).values
tTickArray = pd.to_datetime(df_tick['datetime']).values
bp1TickArray = df_tick['bid_p'].values
ap1TickArray = df_tick['ask_p'].values
lastTickArray = df_tick['lastprice'].values
volumeTickArray = df_tick['volume'].values
symbolarray = df_tick['symbol'].values
indexFinal = 0
for index,tEnd in enumerate(endArray):
start = startArray[index]
bidDict = {}
askDict = {}
bar_vol=voluememin[index]
bar_close=closes[index]
bar_open=opens[index]
bar_low=lows[index]
bar_high=highs[index]
bar_symbol=symbolarray[index]
dt=endArray[index]
for indexTick in range(indexFinal,len(df_tick)):
if tTickArray[indexTick] > tEnd:
break
elif (tTickArray[indexTick] >= start) & (tTickArray[indexTick] <= tEnd):
if indexTick==0:
Bp = round(bp1TickArray[indexTick],2)
Ap = round(ap1TickArray[indexTick],2)
else:
Bp = round(bp1TickArray[indexTick - 1],2)
Ap = round(ap1TickArray[indexTick - 1],2)
LastPrice = round(lastTickArray[indexTick],2)
Volume = volumeTickArray[indexTick]
if LastPrice >= Ap:
if LastPrice in askDict.keys():
askDict[LastPrice] += Volume
else:
askDict[LastPrice] = Volume
if LastPrice <= Bp:
if LastPrice in bidDict.keys():
bidDict[LastPrice] += Volume
else:
bidDict[LastPrice] = Volume
indexFinal = indexTick
bidDictResult,askDictResult = process(bidDict,askDict)
bidDictResult=dict(sorted(bidDictResult.items(),key=operator.itemgetter(0)))
askDictResult=dict(sorted(askDictResult.items(),key=operator.itemgetter(0)))
prinslist=list(bidDictResult.keys())
asklist=list(askDictResult.values())
bidlist=list(bidDictResult.values())
delta=(sum(askDictResult.values()) - sum(bidDictResult.values()))
df=pd.DataFrame({'price':pd.Series([prinslist]),'Ask':pd.Series([asklist]),'Bid':pd.Series([bidlist])})
df['symbol']=bar_symbol
df['datetime']=dt
df['delta']=str(delta)
df['close']=bar_close
df['open']=bar_open
df['high']=bar_high
df['low']=bar_low
df['volume']=bar_vol
# 过滤'volume'列小于等于0的行
df = df[df['volume'] > 0]
# 重新排序DataFrame按照'datetime'列进行升序排序
df = df.sort_values(by='datetime', ascending=True)
# 重新设置索引,以便索引能够正确对齐
df = df.reset_index(drop=True)
#df['ticktime']=tTickArray[indexTick]
df['dj']=GetOrderFlow_dj(df)
#print(df)
df_of = pd.concat([df_of, df], ignore_index=True)
print(time.time() - t1)
return df_of
def GetOrderFlow_dj(kData):
itemAskBG=['rgb(0,255,255)', 'rgb(255,0,255)', "rgb(255,182,193)"] # 买盘背景色
itemBidBG=['rgb(173,255,47)', 'rgb(255,127,80)', "rgb(32,178,170)"] # 卖盘背景色
Config={
'Value1':3,
'Value2':3,
'Value3':3,
'Value4':True,
}
aryData=kData
djcout=0
for index,row in aryData.iterrows():
kItem=aryData.iloc[index]
high=kItem['high']
low=kItem['low']
close=kItem['close']
open=kItem['open']
dtime=kItem['datetime']
price_s=kItem['price']
Ask_s=kItem['Ask']
Bid_s=kItem['Bid']
delta=kItem['delta']
price_s=price_s
Ask_s=Ask_s
Bid_s=Bid_s
gj=0
xq=0
gxx=0
xxx=0
for i in np.arange (0, len(price_s),1) :
duiji={
'price':0,
'time':0,
'longshort':0,
'cout':0,
'color':'blue'
}
if i==0 :
delta=delta
order= {
"Price":price_s[i],
"Bid":{ "Value":Bid_s[i]},
"Ask":{ "Value":Ask_s[i]}
}
if i>=0 and i<len(price_s)-1:
if (order["Bid"]["Value"]>Ask_s[i+1]*int(Config['Value1'])):
order["Bid"]["Color"]=itemAskBG[1]
gxx+=1
gj+=1
if gj>=int(Config['Value2']) and Config['Value4']==True:
duiji['price']=price_s[i]
duiji['time']=dtime
duiji['longshort']=-1
duiji['cout']=gj
duiji['color']='rgba(0,139,0,0.45)'#绿色
if float(duiji['price'])>0:
djcout+=-1
else :
gj=0
if i>=1 and i<len(price_s)-1:
if (order["Ask"]["Value"]>Bid_s[i-1]*int(Config['Value1'])):
xq+=1
xxx+=1
order["Ask"]["Color"]=itemBidBG[1]
if xq>=int(Config['Value2']) and Config['Value4']==True:
duiji['price']=price_s[i]
duiji['time']=dtime
duiji['longshort']=1
duiji['cout']=xq
duiji['color']='rgba(255,0,0,0.45)' #红色
if float(duiji['price'])>0:
djcout+=1
else :
xq=0
return djcout
def back_data(df):
# 创建新的DataFrame并填充需要的列
new_df = pd.DataFrame()
new_df['datetime'] = pd.to_datetime(df['datetime'], format='%Y/%m/%d %H:%M')
new_df['close'] = df['close']
new_df['open'] = df['open']
new_df['high'] = df['high']
new_df['low'] = df['low']
new_df['volume'] = df['volume']
new_df['sig'] = df['dj']
new_df['symbol'] = df['symbol']
new_df['delta'] = df['delta']
new_df.to_csv(f'./tick生成的OF数据/back_ofdata_dj.csv',index=False)
#new_df.to_csv(f'{sym}back_ofdata_dj.csv',index=False)
if __name__ == "__main__":
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
data=pd.read_csv('rb主力连续_20230103.csv',encoding='GBK',parse_dates=['业务日期','最后修改时间']) #
print(data)
tick,bar=dataload(data)
ofdata = orderflow_df_new(tick,bar)
print(ofdata)
#保存orderflow数据
# os.mkdir('./tick生成的OF数据')或者在to_csv中修改生成的文件名
folder_path = "tick生成的OF数据"
if not os.path.exists(folder_path):
os.mkdir('tick生成的OF数据')
ofdata.to_csv('./tick生成的OF数据/ofdata_dj.csv')
#保存回测数据
back_data(ofdata)
print('done')

View File

@@ -0,0 +1,313 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 54,
"id": "30ee221d",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "c826c49d",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('E:/data/ag/tick生成的OF数据/back_ofdata_dj.csv')"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "887ba88c",
"metadata": {},
"outputs": [],
"source": [
"delta_values = df['delta'].abs()"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "a662ed7c",
"metadata": {},
"outputs": [],
"source": [
"# 计算正数部分的百分位数、中位数和标准差\n",
"percentile = delta_values.quantile(0.70)\n",
"median = delta_values.median()\n",
"std = delta_values.std()"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "69d1e7cb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"全部数据绝对值的百分位740.0\n",
"全部数据绝对值的中位数426.0\n",
"全部数据绝对值的标准差722.8068551884389\n"
]
}
],
"source": [
"# 打印相关结果\n",
"print(f\"全部数据绝对值的百分位:{percentile}\")\n",
"print(f\"全部数据绝对值的中位数:{median}\")\n",
"print(f\"全部数据绝对值的标准差:{std}\")"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "979f66c9",
"metadata": {},
"outputs": [],
"source": [
"# positive_values = [x for x in delta_values if x > 0]\n",
"# negative_values = [x for x in delta_values if x < 0]\n",
"positive_values = df['delta'][df['delta'] > 0]\n",
"negative_values = df['delta'][df['delta'] < 0]"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "3f0334e8",
"metadata": {},
"outputs": [],
"source": [
"# 计算正数部分的百分位数、中位数和标准差\n",
"positive_percentile = positive_values.quantile(0.70)\n",
"positive_median = positive_values.median()\n",
"positive_std = positive_values.std()"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "9daebce5",
"metadata": {},
"outputs": [],
"source": [
"# 计算负数部分的百分位数、中位数和标准差\n",
"negative_percentile = negative_values.quantile(0.70)\n",
"negative_median = negative_values.median()\n",
"negative_std = negative_values.std()"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "0406696f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"正数的百分位1014.8000000000011\n",
"正数的中位数432.0\n",
"正数的标准差708.9579385007327\n",
"负数的百分位:-998.0\n",
"负数的中位数:-421.0\n",
"负数的标准差736.3260520762277\n"
]
}
],
"source": [
"# 打印相关结果\n",
"print(f\"正数的百分位:{positive_percentile}\")\n",
"print(f\"正数的中位数:{positive_median}\")\n",
"print(f\"正数的标准差:{positive_std}\")\n",
"\n",
"print(f\"负数的百分位:{negative_percentile}\")\n",
"print(f\"负数的中位数:{negative_median}\")\n",
"print(f\"负数的标准差:{negative_std}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bd9ed5a7",
"metadata": {},
"outputs": [],
"source": [
"# positive_values.tail()\n",
"# negative_values.tail()\n",
"# pos_qua_nums = positive_values.iloc[-120:-1].quantile(0.95)\n",
"# print(pos_qua_nums)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "39fe34e9",
"metadata": {},
"outputs": [],
"source": [
"df['delta_cumsum'] = df['delta'].cumsum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ea2ad82f",
"metadata": {},
"outputs": [],
"source": [
"df['delta'].head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f71b1dc3",
"metadata": {},
"outputs": [],
"source": [
"df['delta_cumsum'].head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29cf2703",
"metadata": {},
"outputs": [],
"source": [
"delta_cumsum_values = df['delta_cumsum']#.abs()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7f8ff173",
"metadata": {},
"outputs": [],
"source": [
"# 计算和值的正数部分的百分位数、中位数和标准差\n",
"cumsum_percentile = delta_cumsum_values.quantile(0.95)\n",
"cumsum_median = delta_cumsum_values.median()\n",
"cumsum_std = delta_cumsum_values.std()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c2c57b8a",
"metadata": {},
"outputs": [],
"source": [
"# 打印相关结果\n",
"print(f\"和值的全部数据绝对值的百分位:{cumsum_percentile}\")\n",
"print(f\"和值的全部数据绝对值的中位数:{cumsum_median}\")\n",
"print(f\"和值的全部数据绝对值的标准差:{cumsum_std}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "82860da9",
"metadata": {},
"outputs": [],
"source": [
"positive_cumsum_values = df['delta_cumsum'][df['delta_cumsum'] > 0]\n",
"negative_cumsum_values = df['delta_cumsum'][df['delta_cumsum'] < 0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "56f9f922",
"metadata": {},
"outputs": [],
"source": [
"positive_cumsum_values.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b8dcab86",
"metadata": {},
"outputs": [],
"source": [
"negative_cumsum_values.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "db782086",
"metadata": {},
"outputs": [],
"source": [
"# 计算正数部分的百分位数、中位数和标准差\n",
"positive_cumsum_percentile = positive_cumsum_values.quantile(0.7)\n",
"positive_cumsum_median = positive_cumsum_values.median()\n",
"positive_cumsum_std = positive_cumsum_values.std()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2430d9b7",
"metadata": {},
"outputs": [],
"source": [
"# 计算负数部分的百分位数、中位数和标准差\n",
"negative_cumsum_percentile = negative_cumsum_values.quantile(0.7)\n",
"negative_cumsum_median = negative_cumsum_values.median()\n",
"negative_cumsum_std = negative_cumsum_values.std()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "698948f6",
"metadata": {},
"outputs": [],
"source": [
"# 打印相关结果\n",
"print(f\"和值的正数百分位:{positive_cumsum_percentile}\")\n",
"print(f\"和值的正的中位数:{positive_cumsum_median}\")\n",
"print(f\"和值的正数标准差:{positive_cumsum_std}\")\n",
"\n",
"print(f\"和值的负数百分位:{negative_cumsum_percentile}\")\n",
"print(f\"和值的负数中位数:{negative_cumsum_median}\")\n",
"print(f\"和值的负数标准差:{negative_cumsum_std}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,892 @@
'''
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
该代码的主要目的是处理Tick数据并生成交易信号。代码中定义了一个tickcome函数它接收到Tick数据后会进行一系列的处理包括构建Tick字典、更新上一个Tick的成交量、保存Tick数据、生成K线数据等。其中涉及到的一些函数有
on_tick(tick): 处理单个Tick数据根据Tick数据生成K线数据。
tickdata(df, symbol): 处理Tick数据生成K线数据。
orderflow_df_new(df_tick, df_min, symbol): 处理Tick和K线数据生成订单流数据。
GetOrderFlow_dj(kData): 计算订单流的信号指标。
除此之外代码中还定义了一个MyTrader类继承自TraderApiBase用于实现交易相关的功能。
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
'''
from concurrent.futures import ThreadPoolExecutor
from multiprocessing import Process, Queue
import queue
import threading
from AlgoPlus.CTP.MdApi import run_tick_engine
from AlgoPlus.CTP.FutureAccount import get_simulate_account
from AlgoPlus.CTP.FutureAccount import FutureAccount
from AlgoPlus.CTP.TraderApiBase import TraderApiBase
from AlgoPlus.ta.time_bar import tick_to_bar
import pandas as pd
from datetime import datetime, timedelta
from datetime import time as s_time
import operator
import time
import numpy as np
import os
import re
# 加入邮件通知
import smtplib
from email.mime.text import MIMEText # 导入 MIMEText 类发送纯文本邮件
from email.mime.multipart import MIMEMultipart # 导入 MIMEMultipart 类发送带有附件的邮件
from email.mime.application import MIMEApplication # 导入 MIMEApplication 类发送二进制附件
## 配置邮件信息
receivers = ["240884432@qq.com"] # 设置邮件接收人地址
subject = "订单流策略交易信号" # 设置邮件主题
#text = " " # 设置邮件正文
# file_path = "test.txt" # 设置邮件附件文件路径
## 配置邮件服务器信息
smtp_server = "smtp.qq.com" # 设置发送邮件的 SMTP 服务器地址
smtp_port = 465 # 设置发送邮件的 SMTP 服务器端口号,一般为 25 端口 465
sender = "240884432@qq.com" # 设置发送邮件的邮箱地址
username = "240884432@qq.com" # 设置发送邮件的邮箱用户名
password = "ifjgwlnzdvrfbjgf" #zrmpcgttataabhjh设置发送邮件的邮箱密码或授权码
tickdatadict = {} # 存储Tick数据的字典
quotedict = {} # 存储行情数据的字典
ofdatadict = {} # 存储K线数据的字典
trade_dfs = {} #pd.DataFrame({}) # 存储交易数据的DataFrame对象
previous_volume = {} # 上一个Tick的成交量
tsymbollist={}
clearing_time_dict = {'sc': s_time(2,30), 'bc': s_time(1,0), 'lu': s_time(23,0), 'nr': s_time(23,0),'au': s_time(2,30), 'ag': s_time(2,30),
'ss': s_time(1,0), 'sn': s_time(1,0), 'ni': s_time(1,0), 'pb': s_time(1,0),'zn': s_time(1,0), 'al': s_time(1,0), 'cu': s_time(1,0),
'ru': s_time(23,0), 'rb': s_time(23,0), 'hc': s_time(23,0), 'fu': s_time(23,0), 'bu': s_time(23,0), 'sp': s_time(23,0),
'PF': s_time(23,0), 'SR': s_time(23,0), 'CF': s_time(23,0), 'CY': s_time(23,0), 'RM': s_time(23,0), 'MA': s_time(23,0),
'TA': s_time(23,0), 'ZC': s_time(23,0), 'FG': s_time(23,0), 'OI': s_time(23,0), 'SA': s_time(23,0),
'p': s_time(23,0), 'j': s_time(23,0), 'jm': s_time(23,0), 'i': s_time(23,0), 'l': s_time(23,0), 'v': s_time(23,0),
'pp': s_time(23,0), 'eg': s_time(23,0), 'c': s_time(23,0), 'cs': s_time(23,0), 'y': s_time(23,0), 'm': s_time(23,0),
'a': s_time(23,0), 'b': s_time(23,0), 'rr': s_time(23,0), 'eb': s_time(23,0), 'pg': s_time(23,0)}
def send_mail(text):
msg = MIMEMultipart()
msg["From"] = sender
msg["To"] = ";".join(receivers)
msg["Subject"] = subject
msg.attach(MIMEText(text, "plain", "utf-8"))
smtp = smtplib.SMTP_SSL(smtp_server, smtp_port)
smtp.login(username, password)
smtp.sendmail(sender, receivers, msg.as_string())
smtp.quit()
#交易程序---------------------------------------------------------------------------------------------------------------------------------------------------------------------
class ParamObj:
# 策略需要用到的参数,在新建合约对象的时候传入!!
# 策略需要用到的参数,在新建合约对象的时候传入!!
# 策略需要用到的参数,在新建合约对象的时候传入!!
symbol = None #合约名称
Lots = None #下单手数
py = None #设置委托价格的偏移,更加容易促成成交
trailing_stop_percent = None #跟踪出场参数
fixed_stop_loss_percent = None #固定出场参数
dj_X = None #开仓的堆积参数
delta = None #开仓的delta参数
sum_delta = None #开仓的delta累积参数
失衡=None
堆积=None
周期=None
# 策略需要用到的变量
cont_df = 0
pos = 0
short_trailing_stop_price = 0
long_trailing_stop_price = 0
sl_long_price = 0
sl_shor_price = 0
out_long = 0
out_short = 0
clearing_executed = False
kgdata = True
def __init__(self, symbol, Lots, py, trailing_stop_percent, fixed_stop_loss_percent, dj_X, delta, sum_delta,失衡,堆积,周期):
self.symbol = symbol
self.Lots = Lots
self.py = py
self.trailing_stop_percent = trailing_stop_percent
self.fixed_stop_loss_percent = fixed_stop_loss_percent
self.dj_X = dj_X
self.delta = delta
self.sum_delta = sum_delta
self.失衡=失衡
self.堆积=堆积
self.周期=周期
class MyTrader(TraderApiBase):
def __init__(self, broker_id, td_server, investor_id, password, app_id, auth_code, md_queue=None, page_dir='', private_resume_type=2, public_resume_type=2):
self.param_dict = {}
self.queue_dict = {}
self.品种=' '
# 邮件通知模块
def tickcome(self,md_queue):
global previous_volume
data=md_queue
instrument_id = data['InstrumentID'].decode() # 品种代码
ActionDay = data['ActionDay'].decode() # 交易日日期
update_time = data['UpdateTime'].decode() # 更新时间
update_millisec = str(data['UpdateMillisec']) # 更新毫秒数
created_at = ActionDay[:4] + '-' + ActionDay[4:6] + '-' + ActionDay[6:] + ' ' + update_time + '.' + update_millisec #创建时间
# 构建tick字典
tick = {
'symbol': instrument_id, # 品种代码和交易所ID
'created_at':datetime.strptime(created_at, "%Y-%m-%d %H:%M:%S.%f"),
#'created_at': created_at, # 创建时间
'price': float(data['LastPrice']), # 最新价
'last_volume': int(data['Volume']) - previous_volume.get(instrument_id, 0) if previous_volume.get(instrument_id, 0) != 0 else 0, # 瞬时成交量
'bid_p': float(data['BidPrice1']), # 买价
'bid_v': int(data['BidVolume1']), # 买量
'ask_p': float(data['AskPrice1']), # 卖价
'ask_v': int(data['AskVolume1']), # 卖量
'UpperLimitPrice': float(data['UpperLimitPrice']), # 涨停价
'LowerLimitPrice': float(data['LowerLimitPrice']), # 跌停价
'TradingDay': data['TradingDay'].decode(), # 交易日日期
'cum_volume': int(data['Volume']), # 最新总成交量
'cum_amount': float(data['Turnover']), # 最新总成交额
'cum_position': int(data['OpenInterest']), # 合约持仓量
}
# print('&&&&&&&&',instrument_id, tick['created_at'],'vol:',tick['last_volume'])
# 更新上一个Tick的成交量
previous_volume[instrument_id] = int(data['Volume'])
if tick['last_volume']>0:
#print(tick['created_at'],'vol:',tick['last_volume'])
# 处理Tick数据
self.on_tick(tick)
def can_time(self,hour, minute):
hour = str(hour)
minute = str(minute)
if len(minute) == 1:
minute = "0" + minute
return int(hour + minute)
def on_tick(self,tick):
tm=self.can_time(tick['created_at'].hour,tick['created_at'].minute)
#print(tick['symbol'])
#print(1)
#if tm>1500 and tm<2100 :
# return
if tick['last_volume']==0:
return
quotes = tick
timetick=str(tick['created_at']).replace('+08:00', '')
tsymbol=tick['symbol']
if tsymbol not in tsymbollist.keys():
# 获取tick的买卖价和买卖量
tsymbollist[tsymbol]=tick
bid_p=quotes['bid_p']
ask_p=quotes['ask_p']
bid_v=quotes['bid_v']
ask_v=quotes['ask_v']
else:
# 获取上一个tick的买卖价和买卖量
rquotes =tsymbollist[tsymbol]
bid_p=rquotes['bid_p']
ask_p=rquotes['ask_p']
bid_v=rquotes['bid_v']
ask_v=rquotes['ask_v']
tsymbollist[tsymbol]=tick
tick_dt=pd.DataFrame({'datetime':timetick,'symbol':tick['symbol'],'mainsym':tick['symbol'].rstrip('0123456789').upper(),'lastprice':tick['price'],
'vol':tick['last_volume'],
'bid_p':bid_p,'ask_p':ask_p,'bid_v':bid_v,'ask_v':ask_v},index=[0])
sym = tick_dt['symbol'][0]
#print(tick_dt)
self.tickdata(tick_dt,sym)
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
def data_of(self,symbol, df):
global trade_dfs
# 将df数据合并到trader_df中
# if symbol not in trade_dfs.keys():
# trade_df = pd.DataFrame({})
# else:
# trade_df = trade_dfs[symbol]
trade_dfs[symbol] = pd.concat([trade_dfs[symbol], df], ignore_index=True)
# print('!!!!!!!!!!!trader_df: ', symbol, df['datetime'].iloc[-1])
#print(trader_df)
def process(self,bidDict, askDict, symbol):
try:
# 尝试从quotedict中获取对应品种的报价数据
dic = quotedict[symbol]
bidDictResult = dic['bidDictResult']
askDictResult = dic['askDictResult']
except:
# 如果获取失败则初始化bidDictResult和askDictResult为空字典
bidDictResult, askDictResult = {}, {}
# 将所有买盘字典和卖盘字典的key合并并按升序排序
sList = sorted(set(list(bidDict.keys()) + list(askDict.keys())))
# 遍历所有的key将相同key的值进行累加
for s in sList:
if s in bidDict:
if s in bidDictResult:
bidDictResult[s] = int(bidDict[s]) + bidDictResult[s]
else:
bidDictResult[s] = int(bidDict[s])
if s not in askDictResult:
askDictResult[s] = 0
else:
if s in askDictResult:
askDictResult[s] = int(askDict[s]) + askDictResult[s]
else:
askDictResult[s] = int(askDict[s])
if s not in bidDictResult:
bidDictResult[s] = 0
# 构建包含bidDictResult和askDictResult的字典并存入quotedict中
df = {'bidDictResult': bidDictResult, 'askDictResult': askDictResult}
quotedict[symbol] = df
return bidDictResult, askDictResult
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
def tickdata(self,df,symbol):
tickdata =pd.DataFrame({'datetime':df['datetime'],'symbol':df['symbol'],'lastprice':df['lastprice'],
'volume':df['vol'],'bid_p':df['bid_p'],'bid_v':df['bid_v'],'ask_p':df['ask_p'],'ask_v':df['ask_v']})
try:
if symbol in tickdatadict.keys():
rdf=tickdatadict[symbol]
rdftm=pd.to_datetime(rdf['bartime'][0]).strftime('%Y-%m-%d %H:%M:%S')
now=str(tickdata['datetime'][0])
if now>rdftm:
try:
oo=ofdatadict[symbol]
self.data_of(symbol, oo)
#print('oo',oo)
if symbol in quotedict.keys():
quotedict.pop(symbol)
if symbol in tickdatadict.keys():
tickdatadict.pop(symbol)
if symbol in ofdatadict.keys():
ofdatadict.pop(symbol)
except IOError as e:
print('rdftm捕获到异常',e)
tickdata['bartime'] = pd.to_datetime(tickdata['datetime'])
tickdata['open'] = tickdata['lastprice']
tickdata['high'] = tickdata['lastprice']
tickdata['low'] = tickdata['lastprice']
tickdata['close'] = tickdata['lastprice']
tickdata['starttime'] = tickdata['datetime']
else:
tickdata['bartime'] = rdf['bartime']
tickdata['open'] = rdf['open']
tickdata['high'] = max(tickdata['lastprice'].values,rdf['high'].values)
tickdata['low'] = min(tickdata['lastprice'].values,rdf['low'].values)
tickdata['close'] = tickdata['lastprice']
tickdata['volume']=df['vol']+rdf['volume'].values
tickdata['starttime'] = rdf['starttime']
else :
print('新bar的第一个tick进入')
tickdata['bartime'] = pd.to_datetime(tickdata['datetime'])
tickdata['open'] = tickdata['lastprice']
tickdata['high'] = tickdata['lastprice']
tickdata['low'] = tickdata['lastprice']
tickdata['close'] = tickdata['lastprice']
tickdata['starttime'] = tickdata['datetime']
except IOError as e:
print('捕获到异常',e)
tickdata['bartime'] = pd.to_datetime(tickdata['bartime'])
param = self.param_dict[self.品种]
bardata = tickdata.resample(on = 'bartime',rule = param.周期,label = 'right',closed = 'right').agg({'starttime':'first','symbol':'last','open':'first','high':'max','low':'min','close':'last','volume':'sum'}).reset_index(drop = False)
bardata =bardata.dropna().reset_index(drop = True)
bardata['bartime'] = pd.to_datetime(bardata['bartime'][0]).strftime('%Y-%m-%d %H:%M:%S')
tickdatadict[symbol]=bardata
tickdata['volume']=df['vol'].values
#print(bardata['symbol'].values,bardata['bartime'].values)
self.orderflow_df_new(tickdata,bardata,symbol)
# time.sleep(0.5)
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
def orderflow_df_new(self,df_tick,df_min,symbol):
startArray = pd.to_datetime(df_min['starttime']).values
voluememin= df_min['volume'].values
highs=df_min['high'].values
lows=df_min['low'].values
opens=df_min['open'].values
closes=df_min['close'].values
#endArray = pd.to_datetime(df_min['bartime']).values
endArray = df_min['bartime'].values
#print(endArray)
deltaArray = np.zeros((len(endArray),))
tTickArray = pd.to_datetime(df_tick['datetime']).values
bp1TickArray = df_tick['bid_p'].values
ap1TickArray = df_tick['ask_p'].values
lastTickArray = df_tick['lastprice'].values
volumeTickArray = df_tick['volume'].values
symbolarray = df_tick['symbol'].values
indexFinal = 0
for index,tEnd in enumerate(endArray):
dt=endArray[index]
start = startArray[index]
bidDict = {}
askDict = {}
bar_vol=voluememin[index]
bar_close=closes[index]
bar_open=opens[index]
bar_low=lows[index]
bar_high=highs[index]
bar_symbol=symbolarray[index]
# for indexTick in range(indexFinal,len(df_tick)):
# if tTickArray[indexTick] >= tEnd:
# break
# elif (tTickArray[indexTick] >= start) & (tTickArray[indexTick] < tEnd):
Bp = round(bp1TickArray[0],4)
Ap = round(ap1TickArray[0],4)
LastPrice = round(lastTickArray[0],4)
Volume = volumeTickArray[0]
if LastPrice >= Ap:
if str(LastPrice) in askDict.keys():
askDict[str(LastPrice)] += Volume
else:
askDict[str(LastPrice)] = Volume
if LastPrice <= Bp:
if str(LastPrice) in bidDict.keys():
bidDict[str(LastPrice)] += Volume
else:
bidDict[str(LastPrice)] = Volume
# indexFinal = indexTick
bidDictResult,askDictResult = self.process(bidDict,askDict,symbol)
bidDictResult=dict(sorted(bidDictResult.items(),key=operator.itemgetter(0)))
askDictResult=dict(sorted(askDictResult.items(),key=operator.itemgetter(0)))
prinslist=list(bidDictResult.keys())
asklist=list(askDictResult.values())
bidlist=list(bidDictResult.values())
delta=(sum(askDictResult.values()) - sum(bidDictResult.values()))
#print(prinslist,asklist,bidlist)
#print(len(prinslist),len(bidDictResult),len(askDictResult))
df=pd.DataFrame({'price':pd.Series([prinslist]),'Ask':pd.Series([asklist]),'Bid':pd.Series([bidlist])})
#df=pd.DataFrame({'price':pd.Series(bidDictResult.keys()),'Ask':pd.Series(askDictResult.values()),'Bid':pd.Series(bidDictResult.values())})
df['symbol']=bar_symbol
df['datetime']=dt
df['delta']=str(delta)
df['close']=bar_close
df['open']=bar_open
df['high']=bar_high
df['low']=bar_low
df['volume']=bar_vol
#df['ticktime']=tTickArray[0]
df['dj'] = self.GetOrderFlow_dj(df)
ofdatadict[symbol]=df
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
def GetOrderFlow_dj(self,kData):
param = self.param_dict[self.品种]
Config = {
'Value1': param.失衡,
'Value2': param.堆积,
'Value4': True,
}
aryData = kData
djcout = 0
# 遍历kData中的每一行计算djcout指标
for index, row in aryData.iterrows():
kItem = aryData.iloc[index]
high = kItem['high']
low = kItem['low']
close = kItem['close']
open = kItem['open']
dtime = kItem['datetime']
price_s = kItem['price']
Ask_s = kItem['Ask']
Bid_s = kItem['Bid']
delta = kItem['delta']
price_s = price_s
Ask_s = Ask_s
Bid_s = Bid_s
gj = 0
xq = 0
gxx = 0
xxx = 0
# 遍历price_s中的每一个元素计算相关指标
for i in np.arange(0, len(price_s), 1):
duiji = {
'price': 0,
'time': 0,
'longshort': 0,
}
if i == 0:
delta = delta
order= {
"Price":price_s[i],
"Bid":{ "Value":Bid_s[i]},
"Ask":{ "Value":Ask_s[i]}
}
#空头堆积
if i >= 0 and i < len(price_s) - 1:
if (order["Bid"]["Value"] > Ask_s[i + 1] * int(Config['Value1'])):
gxx += 1
gj += 1
if gj >= int(Config['Value2']) and Config['Value4'] == True:
duiji['price'] = price_s[i]
duiji['time'] = dtime
duiji['longshort'] = -1
if float(duiji['price']) > 0:
djcout += -1
else:
gj = 0
#多头堆积
if i >= 1 and i < len(price_s) - 1:
if (order["Ask"]["Value"] > Bid_s[i - 1] * int(Config['Value1'])):
xq += 1
xxx += 1
if xq >= int(Config['Value2']) and Config['Value4'] == True:
duiji['price'] = price_s[i]
duiji['time'] = dtime
duiji['longshort'] = 1
if float(duiji['price']) > 0:
djcout += 1
else:
xq = 0
# 返回计算得到的djcout值
return djcout
#读取保存的数据
def read_to_csv(self,symbol):
# 文件夹路径和文件路径
# 使用正则表达式提取英文字母并重新赋值给symbol
param = self.param_dict[symbol]
# symbol = ''.join(re.findall('[a-zA-Z]', str(symbol)))
folder_path = "traderdata"
file_path = os.path.join(folder_path, f"{str(symbol)}_traderdata.csv")
# 如果文件夹不存在则创建
if not os.path.exists(folder_path):
os.makedirs(folder_path)
# 读取保留的模型数据CSV文件
if os.path.exists(file_path):
df = pd.read_csv(file_path)
if not df.empty and param.kgdata==True:
# 选择最后一行数据
row = df.iloc[-1]
# 根据CSV文件的列名将数据赋值给相应的属性
param.pos = int(row['pos'])
param.short_trailing_stop_price = float(row['short_trailing_stop_price'])
param.long_trailing_stop_price = float(row['long_trailing_stop_price'])
param.sl_long_price = float(row['sl_long_price'])
param.sl_shor_price = float(row['sl_shor_price'])
# param.out_long = int(row['out_long'])
# param.out_short = int(row['out_short'])
print("找到历史交易数据文件,已经更新持仓,止损止盈数据", df.iloc[-1])
param.kgdata=False
else:
pass
#print("没有找到历史交易数据文件", file_path)
#如果没有找到CSV则初始化变量
pass
#保存数据
def save_to_csv(self,symbol):
param = self.param_dict[symbol]
# 使用正则表达式提取英文字母并重新赋值给symbol
# symbol = ''.join(re.findall('[a-zA-Z]', str(symbol)))
# 创建DataFrame
data = {
'datetime': [trade_dfs[symbol]['datetime'].iloc[-1]],
'pos': [param.pos],
'short_trailing_stop_price': [param.short_trailing_stop_price],
'long_trailing_stop_price': [param.long_trailing_stop_price],
'sl_long_price': [param.sl_long_price],
'sl_shor_price': [param.sl_shor_price],
# 'out_long': [param.out_long],
# 'out_short': [param.out_short]
}
df = pd.DataFrame(data)
# 将DataFrame保存到CSV文件
df.to_csv(f"traderdata/{str(symbol)}_traderdata.csv", index=False)
#每日收盘重置数据
def day_data_reset(self, symbol):
param = self.param_dict[symbol]
sec = ''.join(re.findall('[a-zA-Z]', str(symbol)))
# 获取当前时间
current_time = datetime.now().time()
# 第一时间范围(日盘收盘)
clearing_time1_start = s_time(15,00)
clearing_time1_end = s_time(15,15)
# 创建一个标志变量,用于记录是否已经执行过
param.clearing_executed = False
# 检查当前时间第一个操作的时间范围内
if clearing_time1_start <= current_time <= clearing_time1_end and not param.clearing_executed :
param.clearing_executed = True # 设置标志变量为已执行
trade_dfs[symbol].drop(trade_dfs[symbol].index,inplace=True)#清除当天的行情数据
# 检查当前时间是否在第二个操作的时间范围内(夜盘收盘)
elif sec in clearing_time_dict.keys():
clearing_time2_start = clearing_time_dict[sec]
clearing_time2_end = s_time(clearing_time2_start.hour, clearing_time2_start.minute+15)
if clearing_time2_start <= current_time <= clearing_time2_end and not param.clearing_executed :
param.clearing_executed = True # 设置标志变量为已执行
trade_dfs[symbol].drop(trade_dfs[symbol].index,inplace=True) #清除当天的行情数据
else:
param.clearing_executed = False
pass
return param.clearing_executed
def OnRtnTrade(self, pTrade):
print("||成交回报||", pTrade)
def OnRspOrderInsert(self, pInputOrder, pRspInfo, nRequestID, bIsLast):
print("||OnRspOrderInsert||", pInputOrder, pRspInfo, nRequestID, bIsLast)
# 订单状态通知
def OnRtnOrder(self, pOrder):
print("||订单回报||", pOrder)
def cal_sig(self, symbol_queue):
while True:
try:
data = symbol_queue.get(block=True, timeout=5) # 如果5秒没收到新的tick行情则抛出异常
instrument_id = data['InstrumentID'].decode() # 品种代码
size = symbol_queue.qsize()
if size > 1:
print(f'当前{instrument_id}共享队列长度为{size}, 有点阻塞!!!!!')
self.read_to_csv(instrument_id)
self.day_data_reset(instrument_id)
param = self.param_dict[instrument_id]
self.品种=instrument_id
self.tickcome(data)
trade_df = trade_dfs[instrument_id]
#新K线开始启动交易程序 and 保存行情数据
self.read_to_csv(instrument_id)
# size = symbol_queue.qsize()
# if size > 2:
# print(f'!!!!!当前{instrument_id}共享队列长度为:',size)
if len(trade_df)>param.cont_df:
# 检查文件是否存在
csv_file_path = f"traderdata/{instrument_id}_ofdata.csv"
if os.path.exists(csv_file_path):
# 仅保存最后一行数据
trade_df.tail(1).to_csv(csv_file_path, mode='a', header=False, index=False)
else:
# 创建新文件并保存整个DataFrame
trade_df.to_csv(csv_file_path, index=False)
# 更新跟踪止损价格
if param.long_trailing_stop_price >0 and param.pos>0:
#print('datetime+sig: ',dt,'旧多头出线',param.long_trailing_stop_price,'low',self.low[0])
param.long_trailing_stop_price = trade_df['low'].iloc[-1] if param.long_trailing_stop_price<trade_df['low'].iloc[-1] else param.long_trailing_stop_price
self.save_to_csv(instrument_id)
#print('datetime+sig: ',dt,'多头出线',param.long_trailing_stop_price)
if param.short_trailing_stop_price >0 and param.pos<0:
#print('datetime+sig: ',dt,'旧空头出线',param.short_trailing_stop_price,'high',self.high[0])
param.short_trailing_stop_price = trade_df['high'].iloc[-1] if trade_df['high'].iloc[-1] <param.short_trailing_stop_price else param.short_trailing_stop_price
self.save_to_csv(instrument_id)
#print('datetime+sig: ',dt,'空头出线',param.short_trailing_stop_price)
param.out_long=param.long_trailing_stop_price * (1 - param.trailing_stop_percent)
param.out_short=param.short_trailing_stop_price*(1 + param.trailing_stop_percent)
#print('datetime+sig: ',dt,'空头出线',param.out_short)
#print('datetime+sig: ',dt,'多头出线',param.out_long)
# 跟踪出场
if param.out_long >0:
print('datetime+sig: ',trade_df['datetime'].iloc[-1],'预设——多头止盈——','TR',param.out_long,'low', trade_df['low'].iloc[-1])
if trade_df['low'].iloc[-1] < param.out_long and param.pos>0 and param.sl_long_price>0 and trade_df['low'].iloc[-1]>param.sl_long_price:
print('datetime+sig: ',trade_df['datetime'].iloc[-1],'多头止盈','TR',param.out_long,'low', trade_df['low'].iloc[-1])
#平多
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['BidPrice1']-param.py,param.Lots,b'1',b'1')
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['BidPrice1']-param.py,param.Lots,b'1',b'3')
param.long_trailing_stop_price = 0
param.out_long=0
param.sl_long_price=0
param.pos = 0
self.save_to_csv(instrument_id)
if param.out_short>0:
print('datetime+sig: ',trade_df['datetime'].iloc[-1],'预设——空头止盈——: ','TR',param.out_short,'high', trade_df['high'].iloc[-1])
if trade_df['high'].iloc[-1] > param.out_short and param.pos<0 and param.sl_shor_price>0 and trade_df['high'].iloc[-1]<param.sl_shor_price:
print('datetime+sig: ',trade_df['datetime'].iloc[-1],'空头止盈: ','TR',param.out_short,'high', trade_df['high'].iloc[-1])
#平空
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['AskPrice1']+param.py,param.Lots,b'0',b'1')
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['AskPrice1']+param.py,param.Lots,b'0',b'3')
param.short_trailing_stop_price = 0
param.sl_shor_price=0
self.out_shor=0
param.pos = 0
self.save_to_csv(instrument_id)
# 固定止损
fixed_stop_loss_L = param.sl_long_price * (1 - param.fixed_stop_loss_percent)
if param.pos>0:
print('datetime+sig: ', trade_df['datetime'].iloc[-1], '预设——多头止损', 'SL', fixed_stop_loss_L, 'close', trade_df['close'].iloc[-1])
if param.sl_long_price>0 and fixed_stop_loss_L>0 and param.pos > 0 and trade_df['close'].iloc[-1] < fixed_stop_loss_L:
print('datetime+sig: ', trade_df['datetime'].iloc[-1], '多头止损', 'SL', fixed_stop_loss_L, 'close', trade_df['close'].iloc[-1])
#平多
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['BidPrice1']-param.py,param.Lots,b'1',b'1')
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['BidPrice1']-param.py,param.Lots,b'1',b'3')
param.long_trailing_stop_price = 0
param.sl_long_price=0
param.out_long = 0
param.pos = 0
self.save_to_csv(instrument_id)
fixed_stop_loss_S = param.sl_shor_price * (1 + param.fixed_stop_loss_percent)
if param.pos<0:
print('datetime+sig: ', trade_df['datetime'].iloc[-1], '预设——空头止损', 'SL', fixed_stop_loss_S, 'close', trade_df['close'].iloc[-1])
if param.sl_shor_price>0 and fixed_stop_loss_S>0 and param.pos < 0 and trade_df['close'].iloc[-1] > fixed_stop_loss_S:
print('datetime+sig: ', trade_df['datetime'].iloc[-1], '空头止损', 'SL', fixed_stop_loss_S, 'close', trade_df['close'].iloc[-1])
#平空
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['AskPrice1']+param.py,param.Lots,b'0',b'1')
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['AskPrice1']+param.py,param.Lots,b'0',b'3')
param.short_trailing_stop_price = 0
param.sl_shor_price=0
param.out_short = 0
param.pos = 0
self.save_to_csv(instrument_id)
#日均线
trade_df['dayma']=trade_df['close'].mean()
# 计算累积的delta值
trade_df['delta'] = trade_df['delta'].astype(float)
trade_df['delta累计'] = trade_df['delta'].cumsum()
#大于日均线
开多1=trade_df['dayma'].iloc[-1] > 0 and trade_df['close'].iloc[-1] > trade_df['dayma'].iloc[-1]
#累计多空净量大于X
开多4=trade_df['delta累计'].iloc[-1] > param.sum_delta and trade_df['delta'].iloc[-1] > param.delta
#小于日均线
开空1=trade_df['dayma'].iloc[-1]>0 and trade_df['close'].iloc[-1] < trade_df['dayma'].iloc[-1]
#累计多空净量小于X
开空4=trade_df['delta累计'].iloc[-1] < -param.sum_delta and trade_df['delta'].iloc[-1] < -param.delta
开多组合= 开多1 and 开多4 and trade_df['dj'].iloc[-1]>param.dj_X
开空条件= 开空1 and 开空4 and trade_df['dj'].iloc[-1]<-param.dj_X
平多条件=trade_df['dj'].iloc[-1]<-param.dj_X
平空条件=trade_df['dj'].iloc[-1]>param.dj_X
#开仓
#多头开仓条件
if param.pos<0 and 平空条件 :
print('平空: ','ExchangeID: ',data['ExchangeID'],'InstrumentID',data['InstrumentID'],'AskPrice1',data['AskPrice1']+param.py)
#平空
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['AskPrice1']+param.py,param.Lots,b'0',b'1')
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['AskPrice1']+param.py,param.Lots,b'0',b'3')
param.pos=0
param.sl_shor_price=0
param.short_trailing_stop_price=0
print('datetime+sig: ', trade_df['datetime'].iloc[-1], '反手平空:', '平仓价格:', data['AskPrice1']+param.py,'堆积数:', trade_df['dj'].iloc[-1])
self.save_to_csv(instrument_id)
# 发送邮件
text = f"平空交易: 交易品种为{data['InstrumentID']}, 交易时间为{trade_df['datetime'].iloc[-1]}, 反手平空的平仓价格为{data['AskPrice1']+param.py}, 交易手数位{param.Lots}"
send_mail(text)
if param.pos==0 and 开多组合:
print('开多: ','ExchangeID: ',data['ExchangeID'],'InstrumentID',data['InstrumentID'],'AskPrice1',data['AskPrice1']+param.py)
#开多
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['AskPrice1']+param.py,param.Lots,b'0',b'0')
print('datetime+sig: ', trade_df['datetime'].iloc[-1], '多头开仓', '开仓价格:', data['AskPrice1']+param.py,'堆积数:', trade_df['dj'].iloc[-1])
param.pos=1
param.long_trailing_stop_price=data['AskPrice1']
param.sl_long_price=data['AskPrice1']
self.save_to_csv(instrument_id)
# 发送邮件
text = f"开多交易: 交易品种为{data['InstrumentID']}, 交易时间为{trade_df['datetime'].iloc[-1]}, 多头开仓的开仓价格{data['AskPrice1']+param.py}, 交易手数位{param.Lots}"
send_mail(text)
if param.pos>0 and 平多条件 :
print('平多: ','ExchangeID: ',data['ExchangeID'],'InstrumentID',data['InstrumentID'],'BidPrice1',data['BidPrice1']-param.py)
#平多
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['BidPrice1']-param.py,param.Lots,b'1',b'1')
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['BidPrice1']-param.py,param.Lots,b'1',b'3')
param.pos=0
param.long_trailing_stop_price=0
param.sl_long_price=0
print('datetime+sig: ', trade_df['datetime'].iloc[-1], '反手平多', '平仓价格:', data['BidPrice1']-param.py,'堆积数:', trade_df['dj'].iloc[-1])
self.save_to_csv(instrument_id)
#发送邮件
text = f"平多交易: 交易品种为{data['InstrumentID']}, 交易时间为{trade_df['datetime'].iloc[-1]}, 反手平多的平仓价格{data['BidPrice1']-param.py}, 交易手数位{param.Lots}"
send_mail(text)
if param.pos==0 and 开空条件 :
print('开空: ','ExchangeID: ',data['ExchangeID'],'InstrumentID',data['InstrumentID'],'BidPrice1',data['BidPrice1'])
#开空
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['BidPrice1']-param.py,param.Lots,b'1',b'0')
print('datetime+sig: ', trade_df['datetime'].iloc[-1], '空头开仓', '开仓价格:', data['BidPrice1']-param.py,'堆积数:', trade_df['dj'].iloc[-1])
param.pos=-1
param.short_trailing_stop_price=data['BidPrice1']
param.sl_shor_price=data['BidPrice1']
self.save_to_csv(instrument_id)
# 发送邮件
text = f"开空交易: 交易品种为{data['InstrumentID']}, 交易时间为{trade_df['datetime'].iloc[-1]}, 空头开仓的开仓价格{data['BidPrice1']-param.py}, 交易手数位{param.Lots}"
send_mail(text)
print(trade_df)
param.cont_df=len(trade_df)
except queue.Empty:
# print(f"当前合约队列为空,等待新数据插入。")
pass
# 将CTP推送的行情数据分发给对应线程队列去执行
def distribute_tick(self):
while True:
if self.status == 0:
data = None
while not self.md_queue.empty():
data = self.md_queue.get(block=False)
instrument_id = data['InstrumentID'].decode() # 品种代码
try:
self.queue_dict[instrument_id].put(data, block=False) # 往对应合约队列中插入行情
# print(f"{instrument_id}合约数据插入。")
except queue.Full:
# 当某个线程阻塞导致对应队列容量超限时抛出异常,不会影响其他合约的信号计算
print(f"{instrument_id}合约信号计算阻塞导致对应队列已满,请检查对应代码逻辑后重启。")
else:
time.sleep(1)
def start(self, param_dict):
threads = []
self.param_dict = param_dict
for symbol in param_dict.keys():
trade_dfs[symbol] = pd.DataFrame({})
self.queue_dict[symbol] = queue.Queue(20) #为每个合约创建一个限制数为10的队列当计算发生阻塞导致队列达到限制数时会抛出异常
t = threading.Thread(target=self.cal_sig, args=(self.queue_dict[symbol],)) # 为每个合约单独创建一个线程计算开仓逻辑
threads.append(t)
t.start()
self.distribute_tick()
for t in threads:
t.join()
def run_trader(param_dict, broker_id, td_server, investor_id, password, app_id, auth_code, md_queue=None, page_dir='', private_resume_type=2, public_resume_type=2):
my_trader = MyTrader(broker_id, td_server, investor_id, password, app_id, auth_code, md_queue, page_dir, private_resume_type, public_resume_type)
my_trader.start(param_dict)
if __name__ == '__main__':
#global symbol
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
#注意运行前请先安装好algoplus,
# pip install AlgoPlus
#http://www.algo.plus/ctp/python/0103001.html
# 实盘参数字典,需要实盘交易的合约,新建对应的参数对象即可,以下参数仅供测试使用,不作为实盘参考!!!!
# 实盘参数字典,需要实盘交易的合约,新建对应的参数对象即可,以下参数仅供测试使用,不作为实盘参考!!!!
# 实盘参数字典,需要实盘交易的合约,新建对应的参数对象即可,以下参数仅供测试使用,不作为实盘参考!!!!
param_dict = {}
param_dict['rb2410'] = ParamObj(symbol='rb2410', Lots=1, py=5, trailing_stop_percent=0.02, fixed_stop_loss_percent=0.01,dj_X=1,delta=1500,sum_delta=2000,失衡=3,堆积=3,周期='1T')
# param_dict['ni2405'] = ParamObj(symbol='ni2405', Lots=1, py=5, trailing_stop_percent=0.02, fixed_stop_loss_percent=0.01,dj_X=0,delta=1500,sum_delta=2000,失衡=3,堆积=3,周期='1T')
# param_dict['j2405'] = ParamObj(symbol='j2405', Lots=1, py=5, trailing_stop_percent=0.02, fixed_stop_loss_percent=0.01,dj_X=0,delta=15,sum_delta=20,失衡=3,堆积=3,周期='1T')
# param_dict['TA405'] = ParamObj(symbol='TA405', Lots=1, py=5, trailing_stop_percent=0.02, fixed_stop_loss_percent=0.01,dj_X=0,delta=15,sum_delta=20,失衡=3,堆积=3,周期='1T')
# param_dict['au2406'] = ParamObj(symbol='au2406', Lots=1, py=5, trailing_stop_percent=0.02, fixed_stop_loss_percent=0.01,dj_X=0,delta=15,sum_delta=20,失衡=3,堆积=3,周期='1T')
# param_dict['sc2405'] = ParamObj(symbol='sc2405', Lots=1, py=5, trailing_stop_percent=0.02, fixed_stop_loss_percent=0.01,dj_X=0,delta=15,sum_delta=20,失衡=3,堆积=3,周期='1T')
# param_dict['bc2406'] = ParamObj(symbol='bc2406', Lots=1, py=5, trailing_stop_percent=0.02, fixed_stop_loss_percent=0.01,dj_X=0,delta=15,sum_delta=20,失衡=3,堆积=3,周期='1T')
# param_dict['lu2406'] = ParamObj(symbol='lu2406', Lots=1, py=5, trailing_stop_percent=0.02, fixed_stop_loss_percent=0.01,dj_X=0,delta=15,sum_delta=20,失衡=3,堆积=3,周期='1T')
#用simnow模拟不要忘记屏蔽下方实盘的future_account字典
# future_account = get_simulate_account(
# investor_id='135858', # simnow账户注意是登录账户的IDSIMNOW个人首页查看
# password='Zj82334475', # simnow密码
# server_name='电信1', # 电信1、电信2、移动、TEST、N视界
# subscribe_list=list(param_dict.keys()), # 合约列表
# )
#实盘用这个不要忘记屏蔽上方simnow的future_account字典
future_account = FutureAccount(
broker_id='8888', # 期货公司BrokerID
server_dict={'TDServer': "103.140.14.210:43205", 'MDServer': '103.140.14.210:43173'}, # TDServer为交易服务器MDServer为行情服务器。服务器地址格式为"ip:port。"
reserve_server_dict={}, # 备用服务器地址
investor_id='155878', # 账户
password='Zj82334475', # 密码
app_id='vntech_vnpy_2.0', # 认证使用AppID
auth_code='N46EKN6TJ9U7V06V', # 认证使用授权码
subscribe_list=list(param_dict.keys()), # 订阅合约列表
md_flow_path='./log', # MdApi流文件存储地址默认MD_LOCATION
td_flow_path='./log', # TraderApi流文件存储地址默认TD_LOCATION
)
print('开始',len(future_account.subscribe_list))
# 共享队列
share_queue = Queue(maxsize=200)
# 行情进程
md_process = Process(target=run_tick_engine, args=(future_account, [share_queue]))
# 交易进程
trader_process = Process(target=run_trader, args=(
param_dict,
future_account.broker_id,
future_account.server_dict['TDServer'],
future_account.investor_id,
future_account.password,
future_account.app_id,
future_account.auth_code,
share_queue, # 队列
future_account.td_flow_path
))
md_process.start()
trader_process.start()
md_process.join()
trader_process.join()

View File

@@ -0,0 +1,780 @@
'''
使用说明:使用前需要调整的相关参数如下
1.确定python到csv文件夹下运行,修改csv文件为需要运行的csv
2.配置邮件信息和参数。
3.tickdata函数中一、修改时间冲采样resample中rule周期5T为交易周期
4.GetOrderFlow_dj函数一、堆积函数config参数暂时均为3
5.MyTrader类:
1) init函数初始化:委托价格的偏移、跟踪出场、固定出差参数、交易手数的设置;
2) day_data_reset函数、每日收盘重置数据按照交易品种设置。
3Join函数修改“开多组合”和“开空组合”
6. __main__函数设置交易账户变量
该代码的主要目的是处理Tick数据并生成交易信号。代码中定义了一个tickcome函数它接收到Tick数据后会进行一系列的处理包括构建Tick字典、更新上一个Tick的成交量、保存Tick数据、生成K线数据等。其中涉及到的一些函数有
on_tick(tick): 处理单个Tick数据根据Tick数据生成K线数据。
tickdata(df, symbol): 处理Tick数据生成K线数据。
orderflow_df_new(df_tick, df_min, symbol): 处理Tick和K线数据生成订单流数据。
GetOrderFlow_dj(kData): 计算订单流的信号指标。
除此之外代码中还定义了一个MyTrader类继承自TraderApiBase用于实现交易相关的功能。
'''
# 需要完善__main__函数中手动设置subscribe_list变量通过时间判断是否需要进行换月修改并发送邮件通知
from multiprocessing import Process, Queue
from AlgoPlus.CTP.MdApi import run_tick_engine
from AlgoPlus.CTP.FutureAccount import get_simulate_account
from AlgoPlus.CTP.FutureAccount import FutureAccount
from AlgoPlus.CTP.TraderApiBase import TraderApiBase
from AlgoPlus.ta.time_bar import tick_to_bar
import pandas as pd
from datetime import datetime
from datetime import time as s_time
import operator
import time
import numpy as np
import os
import re
# 加入邮件通知
import smtplib
from email.mime.text import MIMEText # 导入 MIMEText 类发送纯文本邮件
from email.mime.multipart import MIMEMultipart # 导入 MIMEMultipart 类发送带有附件的邮件
from email.mime.application import MIMEApplication # 导入 MIMEApplication 类发送二进制附件
## 配置邮件信息
receivers = ["***@qq.com"] # 设置邮件接收人地址
subject = "订单流策略交易信号" # 设置邮件主题
#text = " " # 设置邮件正文
# file_path = "test.txt" # 设置邮件附件文件路径
## 配置邮件服务器信息
smtp_server = "smtp.qq.com" # 设置发送邮件的 SMTP 服务器地址
smtp_port = 465 # 设置发送邮件的 SMTP 服务器端口号,一般为 25 端口 465
sender = "***@qq.com" # 设置发送邮件的邮箱地址
username = "***@qq.com" # 设置发送邮件的邮箱用户名
password = "zrmpcgttataabhjh" #zrmpcgttataabhjh设置发送邮件的邮箱密码或授权码
tickdatadict = {} # 存储Tick数据的字典
quotedict = {} # 存储行情数据的字典
ofdatadict = {} # 存储K线数据的字典
trader_df = pd.DataFrame({}) # 存储交易数据的DataFrame对象
previous_volume = {} # 上一个Tick的成交量
tsymbollist={}
# 邮件通知模块
def send_mail(text):
msg = MIMEMultipart()
msg["From"] = sender
msg["To"] = ";".join(receivers)
msg["Subject"] = subject
msg.attach(MIMEText(text, "plain", "utf-8"))
smtp = smtplib.SMTP_SSL(smtp_server, smtp_port)
smtp.login(username, password)
smtp.sendmail(sender, receivers, msg.as_string())
smtp.quit()
def tickcome(md_queue):
global previous_volume
data=md_queue
instrument_id = data['InstrumentID'].decode() # 品种代码
ActionDay = data['ActionDay'].decode() # 交易日日期
update_time = data['UpdateTime'].decode() # 更新时间
update_millisec = str(data['UpdateMillisec']) # 更新毫秒数
created_at = ActionDay[:4] + '-' + ActionDay[4:6] + '-' + ActionDay[6:] + ' ' + update_time + '.' + update_millisec #创建时间
# 构建tick字典
tick = {
'symbol': instrument_id, # 品种代码和交易所ID
'created_at':datetime.strptime(created_at, "%Y-%m-%d %H:%M:%S.%f"),
#'created_at': created_at, # 创建时间
'price': float(data['LastPrice']), # 最新价
'last_volume': int(data['Volume']) - previous_volume.get(instrument_id, 0) if previous_volume.get(instrument_id, 0) != 0 else 0, # 瞬时成交量
'bid_p': float(data['BidPrice1']), # 买价
'bid_v': int(data['BidVolume1']), # 买量
'ask_p': float(data['AskPrice1']), # 卖价
'ask_v': int(data['AskVolume1']), # 卖量
'UpperLimitPrice': float(data['UpperLimitPrice']), # 涨停价
'LowerLimitPrice': float(data['LowerLimitPrice']), # 跌停价
'TradingDay': data['TradingDay'].decode(), # 交易日日期
'cum_volume': int(data['Volume']), # 最新总成交量
'cum_amount': float(data['Turnover']), # 最新总成交额
'cum_position': int(data['OpenInterest']), # 合约持仓量
}
# 更新上一个Tick的成交量
previous_volume[instrument_id] = int(data['Volume'])
if tick['last_volume']>0:
#print(tick['created_at'],'vol:',tick['last_volume'])
# 处理Tick数据
on_tick(tick)
def can_time(hour, minute):
hour = str(hour)
minute = str(minute)
if len(minute) == 1:
minute = "0" + minute
return int(hour + minute)
def on_tick(tick):
tm=can_time(tick['created_at'].hour,tick['created_at'].minute)
#print(tick['symbol'])
#print(1)
#if tm>1500 and tm<2100 :
# return
if tick['last_volume']==0:
return
quotes = tick
timetick=str(tick['created_at']).replace('+08:00', '')
tsymbol=tick['symbol']
if tsymbol not in tsymbollist.keys():
# 获取tick的买卖价和买卖量
tsymbollist[tsymbol]=tick
bid_p=quotes['bid_p']
ask_p=quotes['ask_p']
bid_v=quotes['bid_v']
ask_v=quotes['ask_v']
else:
# 获取上一个tick的买卖价和买卖量
rquotes =tsymbollist[tsymbol]
bid_p=rquotes['bid_p']
ask_p=rquotes['ask_p']
bid_v=rquotes['bid_v']
ask_v=rquotes['ask_v']
tsymbollist[tsymbol]=tick
tick_dt=pd.DataFrame({'datetime':timetick,'symbol':tick['symbol'],'mainsym':tick['symbol'].rstrip('0123456789').upper(),'lastprice':tick['price'],
'vol':tick['last_volume'],
'bid_p':bid_p,'ask_p':ask_p,'bid_v':bid_v,'ask_v':ask_v},index=[0])
sym = tick_dt['symbol'][0]
#print(tick_dt)
tickdata(tick_dt,sym)
# 这个函数的主要目的是将输入的买盘和卖盘字典合并、排序、累加并将处理后的结果存储在一个全局字典quotedict中同时返回这个结果。
def data_of(df):
global trader_df
# 将df数据合并到trader_df中
trader_df = pd.concat([trader_df, df], ignore_index=True)
#print('trader_df: ', len(trader_df))
#print(trader_df)
def process(bidDict, askDict, symbol):
try:
# 尝试从quotedict中获取对应品种的报价数据
dic = quotedict[symbol]
bidDictResult = dic['bidDictResult']
askDictResult = dic['askDictResult']
except:
# 如果获取失败则初始化bidDictResult和askDictResult为空字典
bidDictResult, askDictResult = {}, {}
# 将所有买盘字典和卖盘字典的key合并并按升序排序
sList = sorted(set(list(bidDict.keys()) + list(askDict.keys())))
# 遍历所有的key将相同key的值进行累加
for s in sList:
if s in bidDict:
if s in bidDictResult:
bidDictResult[s] = int(bidDict[s]) + bidDictResult[s]
else:
bidDictResult[s] = int(bidDict[s])
if s not in askDictResult:
askDictResult[s] = 0
else:
if s in askDictResult:
askDictResult[s] = int(askDict[s]) + askDictResult[s]
else:
askDictResult[s] = int(askDict[s])
if s not in bidDictResult:
bidDictResult[s] = 0
# 构建包含bidDictResult和askDictResult的字典并存入quotedict中
df = {'bidDictResult': bidDictResult, 'askDictResult': askDictResult}
quotedict[symbol] = df
return bidDictResult, askDictResult
def tickdata(df,symbol):
tickdata =pd.DataFrame({'datetime':df['datetime'],'symbol':df['symbol'],'lastprice':df['lastprice'],
'volume':df['vol'],'bid_p':df['bid_p'],'bid_v':df['bid_v'],'ask_p':df['ask_p'],'ask_v':df['ask_v']})
try:
if symbol in tickdatadict.keys():
rdf=tickdatadict[symbol]
rdftm=pd.to_datetime(rdf['bartime'][0]).strftime('%Y-%m-%d %H:%M:%S')
now=str(tickdata['datetime'][0])
if now>rdftm:
try:
oo=ofdatadict[symbol]
data_of(oo)
#print('oo',oo)
if symbol in quotedict.keys():
quotedict.pop(symbol)
if symbol in tickdatadict.keys():
tickdatadict.pop(symbol)
if symbol in ofdatadict.keys():
ofdatadict.pop(symbol)
except IOError as e:
print('rdftm捕获到异常',e)
tickdata['bartime'] = pd.to_datetime(tickdata['datetime'])
tickdata['open'] = tickdata['lastprice']
tickdata['high'] = tickdata['lastprice']
tickdata['low'] = tickdata['lastprice']
tickdata['close'] = tickdata['lastprice']
tickdata['starttime'] = tickdata['datetime']
else:
tickdata['bartime'] = rdf['bartime']
tickdata['open'] = rdf['open']
tickdata['high'] = max(tickdata['lastprice'].values,rdf['high'].values)
tickdata['low'] = min(tickdata['lastprice'].values,rdf['low'].values)
tickdata['close'] = tickdata['lastprice']
tickdata['volume']=df['vol']+rdf['volume'].values
tickdata['starttime'] = rdf['starttime']
else :
print('新bar的第一个tick进入')
tickdata['bartime'] = pd.to_datetime(tickdata['datetime'])
tickdata['open'] = tickdata['lastprice']
tickdata['high'] = tickdata['lastprice']
tickdata['low'] = tickdata['lastprice']
tickdata['close'] = tickdata['lastprice']
tickdata['starttime'] = tickdata['datetime']
except IOError as e:
print('捕获到异常',e)
tickdata['bartime'] = pd.to_datetime(tickdata['bartime'])
bardata = tickdata.resample(on = 'bartime',rule = '1T',label = 'right',closed = 'right').agg({'starttime':'first','symbol':'last','open':'first','high':'max','low':'min','close':'last','volume':'sum'}).reset_index(drop = False)
bardata =bardata.dropna().reset_index(drop = True)
bardata['bartime'] = pd.to_datetime(bardata['bartime'][0]).strftime('%Y-%m-%d %H:%M:%S')
tickdatadict[symbol]=bardata
tickdata['volume']=df['vol'].values
#print(bardata['symbol'].values,bardata['bartime'].values)
orderflow_df_new(tickdata,bardata,symbol)
# time.sleep(0.5)
def orderflow_df_new(df_tick,df_min,symbol):
startArray = pd.to_datetime(df_min['starttime']).values
voluememin= df_min['volume'].values
highs=df_min['high'].values
lows=df_min['low'].values
opens=df_min['open'].values
closes=df_min['close'].values
#endArray = pd.to_datetime(df_min['bartime']).values
endArray = df_min['bartime'].values
#print(endArray)
deltaArray = np.zeros((len(endArray),))
tTickArray = pd.to_datetime(df_tick['datetime']).values
bp1TickArray = df_tick['bid_p'].values
ap1TickArray = df_tick['ask_p'].values
lastTickArray = df_tick['lastprice'].values
volumeTickArray = df_tick['volume'].values
symbolarray = df_tick['symbol'].values
indexFinal = 0
for index,tEnd in enumerate(endArray):
dt=endArray[index]
start = startArray[index]
bidDict = {}
askDict = {}
bar_vol=voluememin[index]
bar_close=closes[index]
bar_open=opens[index]
bar_low=lows[index]
bar_high=highs[index]
bar_symbol=symbolarray[index]
# for indexTick in range(indexFinal,len(df_tick)):
# if tTickArray[indexTick] >= tEnd:
# break
# elif (tTickArray[indexTick] >= start) & (tTickArray[indexTick] < tEnd):
Bp = round(bp1TickArray[0],4)
Ap = round(ap1TickArray[0],4)
LastPrice = round(lastTickArray[0],4)
Volume = volumeTickArray[0]
if LastPrice >= Ap:
if str(LastPrice) in askDict.keys():
askDict[str(LastPrice)] += Volume
else:
askDict[str(LastPrice)] = Volume
if LastPrice <= Bp:
if str(LastPrice) in bidDict.keys():
bidDict[str(LastPrice)] += Volume
else:
bidDict[str(LastPrice)] = Volume
# indexFinal = indexTick
bidDictResult,askDictResult = process(bidDict,askDict,symbol)
bidDictResult=dict(sorted(bidDictResult.items(),key=operator.itemgetter(0)))
askDictResult=dict(sorted(askDictResult.items(),key=operator.itemgetter(0)))
prinslist=list(bidDictResult.keys())
asklist=list(askDictResult.values())
bidlist=list(bidDictResult.values())
delta=(sum(askDictResult.values()) - sum(bidDictResult.values()))
#print(prinslist,asklist,bidlist)
#print(len(prinslist),len(bidDictResult),len(askDictResult))
df=pd.DataFrame({'price':pd.Series([prinslist]),'Ask':pd.Series([asklist]),'Bid':pd.Series([bidlist])})
#df=pd.DataFrame({'price':pd.Series(bidDictResult.keys()),'Ask':pd.Series(askDictResult.values()),'Bid':pd.Series(bidDictResult.values())})
df['symbol']=bar_symbol
df['datetime']=dt
df['delta']=str(delta)
df['close']=bar_close
df['open']=bar_open
df['high']=bar_high
df['low']=bar_low
df['volume']=bar_vol
#df['ticktime']=tTickArray[0]
df['dj'] = GetOrderFlow_dj(df)
ofdatadict[symbol]=df
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
def GetOrderFlow_dj(kData):
Config = {
'Value1': 3,
'Value2': 3,
'Value3': 3,
'Value4': True,
}
aryData = kData
djcout = 0
# 遍历kData中的每一行计算djcout指标
for index, row in aryData.iterrows():
kItem = aryData.iloc[index]
high = kItem['high']
low = kItem['low']
close = kItem['close']
open = kItem['open']
dtime = kItem['datetime']
price_s = kItem['price']
Ask_s = kItem['Ask']
Bid_s = kItem['Bid']
delta = kItem['delta']
price_s = price_s
Ask_s = Ask_s
Bid_s = Bid_s
gj = 0
xq = 0
gxx = 0
xxx = 0
# 遍历price_s中的每一个元素计算相关指标
for i in np.arange(0, len(price_s), 1):
duiji = {
'price': 0,
'time': 0,
'longshort': 0,
}
if i == 0:
delta = delta
order = {
"Price": price_s[i],
"Bid": { "Value":Bid_s[i]},
"Ask": { "Value":Ask_s[i]}
}
#空头堆积
if i >= 0 and i < len(price_s) - 1:
if (order["Bid"]["Value"] > Ask_s[i + 1] * int(Config['Value1'])):
gxx += 1
gj += 1
if gj >= int(Config['Value2']) and Config['Value4'] == True:
duiji['price'] = price_s[i]
duiji['time'] = dtime
duiji['longshort'] = -1
if float(duiji['price']) > 0:
djcout += -1
else:
gj = 0
#多头堆积
if i >= 1 and i < len(price_s) - 1:
if (order["Ask"]["Value"] > Bid_s[i - 1] * int(Config['Value1'])):
xq += 1
xxx += 1
if xq >= int(Config['Value2']) and Config['Value4'] == True:
duiji['price'] = price_s[i]
duiji['time'] = dtime
duiji['longshort'] = 1
if float(duiji['price']) > 0:
djcout += 1
else:
xq = 0
# 返回计算得到的djcout值
return djcout
#交易程序---------------------------------------------------------------------------------------------------------------------------------------------------------------------
class MyTrader(TraderApiBase):
def __init__(self, broker_id, td_server, investor_id, password, app_id, auth_code, md_queue=None, page_dir='', private_resume_type=2, public_resume_type=2):
self.py=5 #设置委托价格的偏移,更加容易促成成交。仅螺纹,其他品种根据最小点波动,自己设置
self.cont_df=0
self.trailing_stop_percent = 0.02 #跟踪出场参数
self.fixed_stop_loss_percent = 0.01 #固定出场参数
self.dj_X=1 #开仓的堆积参数
self.pos=0
self.Lots=1
self.short_trailing_stop_price = 0
self.long_trailing_stop_price = 0
self.sl_long_price=0
self.sl_shor_price=0
self.out_long=0
self.out_short=0
self.clearing_executed=False
self.kgdata=True
#读取保存的数据
def read_to_csv(self,symbol):
# 文件夹路径和文件路径
# 使用正则表达式提取英文字母并重新赋值给symbol
symbol = ''.join(re.findall('[a-zA-Z]', str(symbol)))
folder_path = "traderdata"
file_path = os.path.join(folder_path, f"{str(symbol)}traderdata.csv")
# 如果文件夹不存在则创建
if not os.path.exists(folder_path):
os.makedirs(folder_path)
# 读取保留的模型数据CSV文件
if os.path.exists(file_path):
df = pd.read_csv(file_path)
if not df.empty and self.kgdata==True:
# 选择最后一行数据
row = df.iloc[-1]
# 根据CSV文件的列名将数据赋值给相应的属性
self.pos = int(row['pos'])
self.short_trailing_stop_price = float(row['short_trailing_stop_price'])
self.long_trailing_stop_price = float(row['long_trailing_stop_price'])
self.sl_long_price = float(row['sl_long_price'])
self.sl_shor_price = float(row['sl_shor_price'])
# self.out_long = int(row['out_long'])
# self.out_short = int(row['out_short'])
print("找到历史交易数据文件,已经更新持仓,止损止盈数据", df.iloc[-1])
self.kgdata=False
else:
pass
#print("没有找到历史交易数据文件", file_path)
#如果没有找到CSV则初始化变量
pass
#保存数据
def save_to_csv(self,symbol):
# 使用正则表达式提取英文字母并重新赋值给symbol
symbol = ''.join(re.findall('[a-zA-Z]', str(symbol)))
# 创建DataFrame
data = {
'datetime': [trader_df['datetime'].iloc[-1]],
'pos': [self.pos],
'short_trailing_stop_price': [self.short_trailing_stop_price],
'long_trailing_stop_price': [self.long_trailing_stop_price],
'sl_long_price': [self.sl_long_price],
'sl_shor_price': [self.sl_shor_price],
# 'out_long': [self.out_long],
# 'out_short': [self.out_short]
}
df = pd.DataFrame(data)
# 将DataFrame保存到CSV文件
df.to_csv(f"traderdata/{str(symbol)}traderdata.csv", index=False)
#每日收盘重置数据
def day_data_reset(self):
# 获取当前时间
current_time = datetime.now().time()
# 第一时间范围
clearing_time1_start = s_time(15,00)
clearing_time1_end = s_time(15,15)
# 第二时间范围
clearing_time2_start = s_time(23,0)
clearing_time2_end = s_time(23,15)
# 创建一个标志变量,用于记录是否已经执行过
self.clearing_executed = False
# 检查当前时间第一个操作的时间范围内
if clearing_time1_start <= current_time <= clearing_time1_end and not self.clearing_executed :
self.clearing_executed = True # 设置标志变量为已执行
trader_df.drop(trader_df.index,inplace=True)#清除当天的行情数据
# 检查当前时间是否在第二个操作的时间范围内
elif clearing_time2_start <= current_time <= clearing_time2_end and not self.clearing_executed :
self.clearing_executed = True # 设置标志变量为已执行
trader_df.drop(trader_df.index,inplace=True) #清除当天的行情数据
else:
self.clearing_executed = False
pass
return self.clearing_executed
def OnRtnTrade(self, pTrade):
print("||成交回报||", pTrade)
def OnRspOrderInsert(self, pInputOrder, pRspInfo, nRequestID, bIsLast):
print("||OnRspOrderInsert||", pInputOrder, pRspInfo, nRequestID, bIsLast)
# 订单状态通知
def OnRtnOrder(self, pOrder):
print("||订单回报||", pOrder)
def Join(self):
data = None
while True:
if self.status == 0:
while not self.md_queue.empty():
data = self.md_queue.get(block=False)
instrument_id = data['InstrumentID'].decode() # 品种代码
self.read_to_csv(instrument_id)
self.day_data_reset()
tickcome(data)
#新K线开始启动交易程序 and 保存行情数据
if len(trader_df)>self.cont_df:
# 检查文件是否存在
csv_file_path = f"traderdata/{instrument_id}_ofdata.csv"
if os.path.exists(csv_file_path):
# 仅保存最后一行数据
trader_df.tail(1).to_csv(csv_file_path, mode='a', header=False, index=False)
else:
# 创建新文件并保存整个DataFrame
trader_df.to_csv(csv_file_path, index=False)
# 更新跟踪止损价格
if self.long_trailing_stop_price >0 and self.pos>0:
#print('datetime+sig: ',dt,'旧多头出线',self.long_trailing_stop_price,'low',self.low[0])
self.long_trailing_stop_price = trader_df['low'].iloc[-1] if self.long_trailing_stop_price<trader_df['low'].iloc[-1] else self.long_trailing_stop_price
self.save_to_csv(instrument_id)
#print('datetime+sig: ',dt,'多头出线',self.long_trailing_stop_price)
if self.short_trailing_stop_price >0 and self.pos<0:
#print('datetime+sig: ',dt,'旧空头出线',self.short_trailing_stop_price,'high',self.high[0])
self.short_trailing_stop_price = trader_df['high'].iloc[-1] if trader_df['high'].iloc[-1] <self.short_trailing_stop_price else self.short_trailing_stop_price
self.save_to_csv(instrument_id)
#print('datetime+sig: ',dt,'空头出线',self.short_trailing_stop_price)
self.out_long=self.long_trailing_stop_price * (1 - self.trailing_stop_percent)
self.out_short=self.short_trailing_stop_price*(1 + self.trailing_stop_percent)
#print('datetime+sig: ',dt,'空头出线',self.out_short)
#print('datetime+sig: ',dt,'多头出线',self.out_long)
# 跟踪出场
if self.out_long >0:
print('datetime+sig: ',trader_df['datetime'].iloc[-1],'预设——多头止盈——','TR',self.out_long,'low', trader_df['low'].iloc[-1])
if trader_df['low'].iloc[-1] < self.out_long and self.pos>0 and self.sl_long_price>0 and trader_df['low'].iloc[-1]>self.sl_long_price:
print('datetime+sig: ',trader_df['datetime'].iloc[-1],'多头止盈','TR',self.out_long,'low', trader_df['low'].iloc[-1])
#平多
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['BidPrice1']-self.py,self.Lots,b'1',b'1')
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['BidPrice1']-self.py,self.Lots,b'1',b'3')
self.long_trailing_stop_price = 0
self.out_long=0
self.sl_long_price=0
self.pos = 0
self.save_to_csv(instrument_id)
if self.out_short>0:
print('datetime+sig: ',trader_df['datetime'].iloc[-1],'预设——空头止盈——: ','TR',self.out_short,'high', trader_df['high'].iloc[-1])
if trader_df['high'].iloc[-1] > self.out_short and self.pos<0 and self.sl_shor_price>0 and trader_df['high'].iloc[-1]<self.sl_shor_price:
print('datetime+sig: ',trader_df['datetime'].iloc[-1],'空头止盈: ','TR',self.out_short,'high', trader_df['high'].iloc[-1])
#平空
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['AskPrice1']+self.py,self.Lots,b'0',b'1')
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['AskPrice1']+self.py,self.Lots,b'0',b'3')
self.short_trailing_stop_price = 0
self.sl_shor_price=0
self.out_shor=0
self.pos = 0
self.save_to_csv(instrument_id)
# 固定止损
self.fixed_stop_loss_L = self.sl_long_price * (1 - self.fixed_stop_loss_percent)
if self.pos>0:
print('datetime+sig: ', trader_df['datetime'].iloc[-1], '预设——多头止损', 'SL', self.fixed_stop_loss_L, 'close', trader_df['close'].iloc[-1])
if self.sl_long_price>0 and self.fixed_stop_loss_L>0 and self.pos > 0 and trader_df['close'].iloc[-1] < self.fixed_stop_loss_L:
print('datetime+sig: ', trader_df['datetime'].iloc[-1], '多头止损', 'SL', self.fixed_stop_loss_L, 'close', trader_df['close'].iloc[-1])
#平多
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['BidPrice1']-self.py,self.Lots,b'1',b'1')
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['BidPrice1']-self.py,self.Lots,b'1',b'3')
self.long_trailing_stop_price = 0
self.sl_long_price=0
self.out_long = 0
self.pos = 0
self.save_to_csv(instrument_id)
self.fixed_stop_loss_S = self.sl_shor_price * (1 + self.fixed_stop_loss_percent)
if self.pos<0:
print('datetime+sig: ', trader_df['datetime'].iloc[-1], '预设——空头止损', 'SL', self.fixed_stop_loss_S, 'close', trader_df['close'].iloc[-1])
if self.sl_shor_price>0 and self.fixed_stop_loss_S>0 and self.pos < 0 and trader_df['close'].iloc[-1] > self.fixed_stop_loss_S:
print('datetime+sig: ', trader_df['datetime'].iloc[-1], '空头止损', 'SL', self.fixed_stop_loss_S, 'close', trader_df['close'].iloc[-1])
#平空
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['AskPrice1']+self.py,self.Lots,b'0',b'1')
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['AskPrice1']+self.py,self.Lots,b'0',b'3')
self.short_trailing_stop_price = 0
self.sl_shor_price=0
self.out_short = 0
self.pos = 0
self.save_to_csv(instrument_id)
#日均线
trader_df['dayma']=trader_df['close'].mean()
# 计算累积的delta值
trader_df['delta'] = trader_df['delta'].astype(float)
trader_df['delta累计'] = trader_df['delta'].cumsum()
#大于日均线
开多1=trader_df['dayma'].iloc[-1] > 0 and trader_df['close'].iloc[-1] > trader_df['dayma'].iloc[-1]
#累计多空净量大于X
开多4=trader_df['delta累计'].iloc[-1] > 2000 and trader_df['delta'].iloc[-1] > 1500
#小于日均线
开空1=trader_df['dayma'].iloc[-1]>0 and trader_df['close'].iloc[-1] < trader_df['dayma'].iloc[-1]
#累计多空净量小于X
开空4=trader_df['delta累计'].iloc[-1] < -2000 and trader_df['delta'].iloc[-1] < -1500
开多组合= 开多1 and 开多4 and trader_df['dj'].iloc[-1]>self.dj_X
开空条件= 开空1 and 开空4 and trader_df['dj'].iloc[-1]<-self.dj_X
平多条件=trader_df['dj'].iloc[-1]<-self.dj_X
平空条件=trader_df['dj'].iloc[-1]>self.dj_X
#开仓
#多头开仓条件
if self.pos<0 and 平空条件 :
print('平空: ','ExchangeID: ',data['ExchangeID'],'InstrumentID',data['InstrumentID'],'AskPrice1',data['AskPrice1']+self.py)
#insert_order:买卖方向开仓0平仓1强平2平今3平昨4强减5本地强平6
#平空
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['AskPrice1']+self.py,self.Lots,b'0',b'1')
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['AskPrice1']+self.py,self.Lots,b'0',b'3')
self.pos=0
self.sl_shor_price=0
self.short_trailing_stop_price=0
print('datetime+sig: ', trader_df['datetime'].iloc[-1], '反手平空:', '平仓价格:', data['AskPrice1']+self.py,'堆积数:', trader_df['dj'].iloc[-1])
self.save_to_csv(instrument_id)
#发送邮件
text = f"平空交易: 交易品种为{data['InstrumentID']}, 交易时间为{trader_df['datetime'].iloc[-1]}, 反手平空的平仓价格{data['AskPrice1']+self.py}"
send_mail(text)
if self.pos==0 and 开多组合:
print('开多: ','ExchangeID: ',data['ExchangeID'],'InstrumentID',data['InstrumentID'],'AskPrice1',data['AskPrice1']+self.py)
#开多
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['AskPrice1']+self.py,self.Lots,b'0',b'0')
print('datetime+sig: ', trader_df['datetime'].iloc[-1], '多头开仓', '开仓价格:', data['AskPrice1']+self.py,'堆积数:', trader_df['dj'].iloc[-1])
self.pos=1
self.long_trailing_stop_price=data['AskPrice1']
self.sl_long_price=data['AskPrice1']
self.save_to_csv(instrument_id)
#发送邮件
text = f"开多交易: 交易品种为{data['InstrumentID']}, 交易时间为{trader_df['datetime'].iloc[-1]}, 多头开仓的开仓价格{data['AskPrice1']+self.py}预设——多头止盈——TR{self.out_long},多头止损SL{self.fixed_stop_loss_L}"
send_mail(text)
if self.pos>0 and 平多条件 :
print('平多: ','ExchangeID: ',data['ExchangeID'],'InstrumentID',data['InstrumentID'],'BidPrice1',data['BidPrice1']-self.py)
#平多
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['BidPrice1']-self.py,self.Lots,b'1',b'1')
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['BidPrice1']-self.py,self.Lots,b'1',b'3')
self.pos=0
self.long_trailing_stop_price=0
self.sl_long_price=0
print('datetime+sig: ', trader_df['datetime'].iloc[-1], '反手平多', '平仓价格:', data['BidPrice1']-self.py,'堆积数:', trader_df['dj'].iloc[-1])
self.save_to_csv(instrument_id)
#发送邮件
text = f"平多交易: 交易品种为{data['InstrumentID']}, 交易时间为{trader_df['datetime'].iloc[-1]}, 反手平多的平仓价格{data['BidPrice1']-self.py}"
send_mail(text)
if self.pos==0 and 开空条件 :
print('开空: ','ExchangeID: ',data['ExchangeID'],'InstrumentID',data['InstrumentID'],'BidPrice1',data['BidPrice1'])
#开空
self.insert_order(data['ExchangeID'], data['InstrumentID'], data['BidPrice1']-self.py,self.Lots,b'1',b'0')
print('datetime+sig: ', trader_df['datetime'].iloc[-1], '空头开仓', '开仓价格:', data['BidPrice1']-self.py,'堆积数:', trader_df['dj'].iloc[-1])
self.pos=-1
self.short_trailing_stop_price=data['BidPrice1']
self.sl_shor_price=data['BidPrice1']
self.save_to_csv(instrument_id)
# 发送邮件
text = f"开空交易: 交易品种为{data['InstrumentID']}, 交易时间为{trader_df['datetime'].iloc[-1]}, 空头开仓的开仓价格{data['BidPrice1']-self.py},预设——空头止盈——TR{self.out_short},空头止损{self.fixed_stop_loss_S}"
send_mail(text)
print(trader_df)
self.cont_df=len(trader_df)
else:
time.sleep(1)
def run_trader(broker_id, td_server, investor_id, password, app_id, auth_code, md_queue=None, page_dir='', private_resume_type=2, public_resume_type=2):
my_trader = MyTrader(broker_id, td_server, investor_id, password, app_id, auth_code, md_queue, page_dir, private_resume_type, public_resume_type)
my_trader.Join()
if __name__ == '__main__':
#global symbol
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
#注意运行前请先安装好algoplus,
# pip install AlgoPlus
#http://www.algo.plus/ctp/python/0103001.html
#用simnow模拟不要忘记屏蔽下方实盘的future_account字典
future_account = get_simulate_account(
investor_id='135858', # simnow账户注意是登录账户的IDSIMNOW个人首页查看
password='Zj82334475', # simnow密码
server_name='TEST', # 电信1、电信2、移动、TEST、N视界
subscribe_list=[b'rb2405'], # 合约列表
)
#实盘用这个不要忘记屏蔽上方simnow的future_account字典
# future_account = FutureAccount(
# broker_id='', # 期货公司BrokerID
# server_dict={'TDServer': "ip:port", 'MDServer': 'ip:port'}, # TDServer为交易服务器MDServer为行情服务器。服务器地址格式为"ip:port。"
# reserve_server_dict={}, # 备用服务器地址
# investor_id='', # 账户
# password='', # 密码
# app_id='simnow_client_test', # 认证使用AppID
# auth_code='0000000000000000', # 认证使用授权码
# subscribe_list=[b'rb2405'], # 订阅合约列表
# md_flow_path='./log', # MdApi流文件存储地址默认MD_LOCATION
# td_flow_path='./log', # TraderApi流文件存储地址默认TD_LOCATION
# )
print('开始',len(future_account.subscribe_list))
# 共享队列
share_queue = Queue(maxsize=200)
# 行情进程
md_process = Process(target=run_tick_engine, args=(future_account, [share_queue]))
# 交易进程
trader_process = Process(target=run_trader, args=(
future_account.broker_id,
future_account.server_dict['TDServer'],
future_account.investor_id,
future_account.password,
future_account.app_id,
future_account.auth_code,
share_queue, # 队列
future_account.td_flow_path
))
md_process.start()
trader_process.start()
# success = f"行情和交易启动成功!{future_account.subscribe_list}"
# send_mail(success)
md_process.join()
trader_process.join()

View File

@@ -0,0 +1,38 @@
'''
Author: zhoujie2104231 zhoujie@me.com
Date: 2024-02-25 11:17:14
LastEditors: zhoujie2104231 zhoujie@me.com
LastEditTime: 2024-02-25 21:40:34
# 使用说明:使用前需要调整的相关参数如下
# 1.确定python到csv文件夹下运行,并修改到对应的csv文件
# 2.设置按照index拆分的表名此处是按照“合约代码”的不同进行拆分
# 3.使用gbk或者utf-8编译
'''
import csv
import os
import pandas as pd
def read_large_csv(file_path, chunk_size):
reader = pd.read_csv(file_path, iterator=True, encoding="utf-8")
chunks = []
while True:
try:
chunk = reader.get_chunk(chunk_size)
chunks.append(chunk)
except StopIteration:
break
return pd.concat(chunks, ignore_index=True)
filepath = './合成tick数据/merged_data_new.csv'
chunk_size = 10000
data = read_large_csv(filepath, chunk_size)
groups = data.groupby(data['合约代码'])
folder_path = "split_csvs"
if not os.path.exists(folder_path):
os.mkdir('split_csvs')
for group in groups:
group[1].to_csv('./split_csvs/{}.csv'.format(str(group[0])), index = False, encoding = 'utf-8')
print("%s.csv创建成功" %(group[0]))

View File

@@ -0,0 +1,64 @@
'''
Author: zhoujie2104231 zhoujie@me.com
Date: 2024-02-25 16:19:47
LastEditors: zhoujie2104231 zhoujie@me.com
LastEditTime: 2024-02-25 16:22:11
FilePath: \Gitee_Code\trading_strategy\SS_Code\SF08\split_data_finall.py
Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
'''
import csv
import os
import pandas as pd
def read_large_csv(file_path, chunk_size):
reader = pd.read_csv(file_path, iterator=True, encoding="utf-8")
chunks = []
while True:
try:
chunk = reader.get_chunk(chunk_size)
chunks.append(chunk)
except StopIteration:
break
return pd.concat(chunks, ignore_index=True)
# 读取原始CSV文件
# with open('merged_data_new.csv', 'r', encoding="utf-8") as f:
# reader = csv.reader(f)
# data = list(reader)
# print("读取文件成功")
filepath = 'merged_data_new.csv'
chunk_size = 10000
data = read_large_csv(filepath, chunk_size)
print("读取文件成功")
# 创建一个字典key为symbol列的值value为一个列表存放与该symbol值相关的数据行
symbol_data = {}
header_row = data[0]
for row in data[1:]: # 跳过第一行标题
symbol = row[2]
if symbol not in symbol_data:
symbol_data[symbol] = []
symbol_data[symbol].append(row)
print("数据字典创建成功")
# 创建与symbol列值对应的目录
folder_path = "split_csvs"
if not os.path.exists(folder_path):
os.mkdir('split_csvs')
for symbol in symbol_data:
os.mkdir(os.path.join('split_csvs', symbol))
# 将数据写入拆分后的CSV文件中
for symbol, rows in symbol_data.items():
with open(os.path.join('split_csvs', symbol, f'{symbol}.csv'), 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows([header_row])
writer.writerows(rows)
print("csv拆分成功")

View File

@@ -0,0 +1,132 @@
'''
Author: zhoujie2104231 zhoujie@me.com
# Date: 2024-02-25 17:51:46
LastEditors: zhoujie2104231 zhoujie@me.com
LastEditTime: 2024-03-07 22:48:22
# 使用说明:使用前需要调整的相关参数如下
# 1.确定python到csv文件夹下运行,
# 2.统一代码的添加:主力连续为888,指数连续可以用999,次主力连续可以使用889,其他的可以不用添加统一代码,注释掉。
# 3.文件夹下的文件名按照datetime进行排序修改
# 4.data按照时间排序需要根据参数修改['业务日期','最后修改时间','最后修改毫秒'],如果前面文件名按照时间修改好了,不用修改
# 5.使用gbk或者utf-8编译
'''
import pandas as pd
import os
# import datetime as dt
def split_alpha_numeric(string):
"""
Split a string into alphabetical and numerical characters.
Args:
string: The string to split.
Returns:
A tuple containing two strings, the first containing the alphabetical
characters and the second containing the numerical characters.
"""
alpha_chars = ""
numeric_chars = ""
for char in string:
if char.isalpha():
alpha_chars += char
elif char.isdigit():
numeric_chars += char
return alpha_chars, numeric_chars
#第一中方法:
# 获取当前目录下的所有csv文件
all_csv_files = [file for file in os.listdir('.') if file.endswith('.csv')]
# csv需要筛选的文件名字符
sp_char = '_2021'
# 获取当前目录下的所有文件名包含sp_char的csv文件
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
print("csv_files:", csv_files)
# 另一种遍历方式
# folder_path = "D:/data_transfer/ag888"
# name_chr = "202309"
# csv_files = []
# for root, dirs, files in os.walk(folder_path):
# for file in files:
# if file.endswith('.csv'):
# # 获取文件名(不包含扩展名)
# filename = os.path.splitext(file)[0]
# match_file = re.search(r'(?<=^.{7}).{6}(?=.{2})',filename)
# try:
# if match_file.group() == name_chr:#
# full_filename = filename + ".csv"
# csv_files.append(full_filename)
# else:
# #print("文件夹中有csv文件但没有文件名含%s的csv文件"%(name_chr))
# pass
# except AttributeError:
# continue
# else:
# #print("文件夹中没有csv文件")
# pass
# 将当前的数据按照文件名进行排序生成list文件
#csv_files.sort(key=lambda x: int(x.split('.')[0]))
# 创建新的DataFrame来存储合并后的数据
merged_df = pd.DataFrame()
# 循环遍历每个csv文件
for file in csv_files:
# 读取csv文件并使用第一行为列标题编译不通过可以改为gbk
df = pd.read_csv(file, header=0, encoding='gbk')
# 删除重复行
df.drop_duplicates(inplace=True)
# 将数据合并到新的DataFrame中
merged_df = pd.concat([merged_df, df], ignore_index=True)
# 删除重复列
merged_df.drop_duplicates(subset=merged_df.columns.tolist(), inplace=True)
# 重置行索引
merged_df.reset_index(inplace=True, drop=True)
print("合约代码:", merged_df["合约代码"])
# 插入新的数据
# code_value = csv_files[0].split
# merged_df.insert(loc=1,column="统一代码", value="rb888")
alpha_chars, numeric_chars = split_alpha_numeric(merged_df["合约代码"][0])
print("Alphabetical characters:", alpha_chars)
# print("Numerical characters:", numeric_chars[1])
# 添加主力连续的合约代码主力连续为888指数连续可以用999次主力连续可以使用889表头用“统一代码”
code_value = alpha_chars + "888"
print("code_value characters:", code_value)
merged_df.insert(loc=1,column="统一代码", value=code_value)
# 将合并后的数据保存到csv文件中
folder_path = "合成tick数据2019-2021"
if not os.path.exists(folder_path):
os.mkdir('合成tick数据2019-2021')
# sorted_merged_df = merged_df.sort_values(by= ['业务日期','最后修改时间','最后修改毫秒'], ascending=[True, True, True])
# sorted_merged_df.to_csv('./合成tick数据/%s.csv'%(code_value), index=False)
merged_df['时间'] = pd.to_datetime(merged_df['时间'])
sorted_merged_df = merged_df.sort_values(by = ['时间'], ascending=True)
sorted_merged_df.to_csv('./合成tick数据2019-2021/%s%s.csv'%(code_value,sp_char), index=False)
del merged_df
del sorted_merged_df
#merged_df.to_csv('./合成tick数据/%s.csv'%(code_value), index=False) #数据按照时间排序,前面文件夹按照时间修改好了可以直接用这里
# 打印提示信息
print("CSV文件合并成功")

View File

@@ -0,0 +1,134 @@
'''
Author: zhoujie2104231 zhoujie@me.com
# Date: 2024-02-25 17:51:46
LastEditors: zhoujie2104231 zhoujie@me.com
LastEditTime: 2024-03-17 16:59:35
# 使用说明:使用前需要调整的相关参数如下
# 1.确定python到csv文件夹下运行,
# 2.统一代码的添加:主力连续为888,指数连续可以用999,次主力连续可以使用889,其他的可以不用添加统一代码,注释掉。
# 3.文件夹下的文件名按照datetime进行排序修改
# 4.data按照时间排序需要根据参数修改['业务日期','最后修改时间','最后修改毫秒'],如果前面文件名按照时间修改好了,不用修改
# 5.使用gbk或者utf-8编译
'''
import pandas as pd
import os
# import datetime as dt
def split_alpha_numeric(string):
"""
Split a string into alphabetical and numerical characters.
Args:
string: The string to split.
Returns:
A tuple containing two strings, the first containing the alphabetical
characters and the second containing the numerical characters.
"""
alpha_chars = ""
numeric_chars = ""
for char in string:
if char.isalpha():
alpha_chars += char
elif char.isdigit():
numeric_chars += char
return alpha_chars, numeric_chars
#第一中方法:
# 获取当前目录下的所有csv文件
all_csv_files = [file for file in os.listdir('.') if file.endswith('.csv')]
# csv需要筛选的文件名字符
sp_char = '_2023'
# 获取当前目录下的所有文件名包含sp_char的csv文件
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
print("csv_files:", csv_files)
# 另一种遍历方式
# folder_path = "D:/data_transfer/ag888"
# name_chr = "202309"
# csv_files = []
# for root, dirs, files in os.walk(folder_path):
# for file in files:
# if file.endswith('.csv'):
# # 获取文件名(不包含扩展名)
# filename = os.path.splitext(file)[0]
# match_file = re.search(r'(?<=^.{7}).{6}(?=.{2})',filename)
# try:
# if match_file.group() == name_chr:#
# full_filename = filename + ".csv"
# csv_files.append(full_filename)
# else:
# #print("文件夹中有csv文件但没有文件名含%s的csv文件"%(name_chr))
# pass
# except AttributeError:
# continue
# else:
# #print("文件夹中没有csv文件")
# pass
# 将当前的数据按照文件名进行排序生成list文件
#csv_files.sort(key=lambda x: int(x.split('.')[0]))
# 创建新的DataFrame来存储合并后的数据
merged_df = pd.DataFrame()
# 循环遍历每个csv文件
for file in csv_files:
# 读取csv文件并使用第一行为列标题编译不通过可以改为gbk
df = pd.read_csv(file, header=0, encoding='gbk')
# 删除重复行
df.drop_duplicates(inplace=True)
# 将数据合并到新的DataFrame中
merged_df = pd.concat([merged_df, df], ignore_index=True)
# 删除重复列
merged_df.drop_duplicates(subset=merged_df.columns.tolist(), inplace=True)
# 重置行索引
merged_df.reset_index(inplace=True, drop=True)
print("合约代码:", merged_df["合约代码"])
# 插入新的数据
# code_value = csv_files[0].split
# merged_df.insert(loc=1,column="统一代码", value="rb888")
alpha_chars, numeric_chars = split_alpha_numeric(merged_df["合约代码"][0])
print("Alphabetical characters:", alpha_chars)
# print("Numerical characters:", numeric_chars[1])
# 添加主力连续的合约代码主力连续为888指数连续可以用999次主力连续可以使用889表头用“统一代码”
code_value = alpha_chars + "888"
print("code_value characters:", code_value)
merged_df.insert(loc=1,column="统一代码", value=code_value)
# 将合并后的数据保存到csv文件中
folder_path = "合成tick数据2022-2023"
if not os.path.exists(folder_path):
os.mkdir('合成tick数据2022-2023')
sorted_merged_df = merged_df.sort_values(by= ['业务日期','最后修改时间','最后修改毫秒'], ascending=[True, True, True])
sorted_merged_df.to_csv('./合成tick数据2022-2023/%s%s.csv'%(code_value,sp_char), index=False)
del merged_df
del sorted_merged_df
# merged_df['时间'] = pd.to_datetime(merged_df['时间'])
# sorted_merged_df = merged_df.sort_values(by = ['时间'], ascending=True)
# sorted_merged_df.to_csv('./合成tick数据/%s.csv'%(code_value), index=False)
#merged_df.to_csv('./合成tick数据/%s.csv'%(code_value), index=False) #数据按照时间排序,前面文件夹按照时间修改好了可以直接用这里
# 打印提示信息
print("CSV文件合并成功")

View File

@@ -0,0 +1,526 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"root_path = r\"C:/Users/zhouj/Desktop/data\"\n",
"output_path = r\"C:/Users/zhouj/Desktop/a88.csv\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 等差复权\n",
"adjust = df['close'].shift() - df['open']\n",
"adjust = np.where(df['symbol'] != df['symbol'].shift(), adjust, 0)\n",
"df['open_adj'] = df['open'] + adjust.cumsum()\n",
"df['close_adj'] = df['close'] + adjust.cumsum()\n",
"df['low_adj'] = df['low'] + adjust.cumsum()\n",
"df['high_adj'] = df['high'] + adjust.cumsum()\n",
"# 等比复权\n",
"adjust = df['close'].shift() / df['open']\n",
"adjust = np.where(df['symbol'] != df['symbol'].shift(), adjust, 1)\n",
"df['open_adj'] = df['open'] * adjust.cumprod()\n",
"df['close_adj'] = df['close'] * adjust.cumprod()\n",
"df['low_adj'] = df['low'] * adjust.cumprod()\n",
"df['high_adj'] = df['high'] * adjust.cumprod()\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"files = []\n",
"\n",
"for r, ds, fs in os.walk(root_path):\n",
" for f in fs:\n",
" # if f[0:4] == '2023':\n",
" abs_filepath = os.path.join(r, f)\n",
" files.append(abs_filepath)\n",
"files = sorted(files)\n",
"\n",
"df = pd.DataFrame()\n",
"for f in files:\n",
" df_temp = pd.read_csv(\n",
" f,\n",
" usecols=[1, 2, 3, 4, 8, 13, 14, 15, 16],\n",
" names=[\n",
" \"统一代码\",\n",
" \"合约代码\",\n",
" \"时间\",\n",
" \"最新\",\n",
" \"成交量\",\n",
" \"买一价\",\n",
" \"卖一价\",\n",
" \"买一量\",\n",
" \"卖一量\",\n",
" ],\n",
" skiprows=1,\n",
" encoding=\"utf-8\",\n",
" )\n",
" # df_temp = pd.read_csv(f, usecols=[0,5], names=[\n",
" # 'datetime', 'volume'])\n",
" df = pd.concat([df, df_temp])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# files = []\n",
"\n",
"# for r, ds, fs in os.walk(root_path):\n",
"# for f in fs:\n",
"# # if f[0:4] == '2023':\n",
"# abs_filepath = os.path.join(r, f)\n",
"# files.append(abs_filepath)\n",
"# files = sorted(files)\n",
"\n",
"# df = pd.DataFrame()\n",
"# for f in files:\n",
"# df_temp = pd.read_csv(\n",
"# f,\n",
"# usecols=[0, 1, 4, 11, 20, 21, 22, 23, 24, 25],\n",
"# names=[\n",
"# \"交易日\",\n",
"# \"合约代码\",\n",
"# \"最新价\",\n",
"# \"数量\",\n",
"# \"最后修改时间\",\n",
"# \"最后修改毫秒\",\n",
"# \"申买价一\",\n",
"# \"申买量一\",\n",
"# \"申卖价一\",\n",
"# \"申卖量一\",\n",
"# ],\n",
"# skiprows=1,\n",
"# encoding=\"gbk\",\n",
"# )\n",
"# # df_temp = pd.read_csv(f, usecols=[0,5], names=[\n",
"# # 'datetime', 'volume'])\n",
"# df = pd.concat([df, df_temp])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.info()\n",
"# 21754840"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 统一代码 合约代码 时间 最新 成交量 买一价 卖一价 \\\n",
"1305669 a888 a1905 2019-04-22 15:00:00.568 3309.0 0 3308.0 3311.0 \n",
"1305670 a888 a1905 2019-04-22 15:00:36.638 3309.0 0 3308.0 3311.0 \n",
"1305671 a888 a1909 2019-04-22 20:59:00.014 3412.0 224 3411.0 3412.0 \n",
"1305672 a888 a1909 2019-04-22 21:00:00.461 3412.0 108 3412.0 3413.0 \n",
"1305673 a888 a1909 2019-04-22 21:00:00.958 3411.0 150 3410.0 3411.0 \n",
"\n",
" 买一量 卖一量 \n",
"1305669 25 10 \n",
"1305670 25 10 \n",
"1305671 2 8 \n",
"1305672 10 19 \n",
"1305673 43 3 \n"
]
}
],
"source": [
"print(df.loc[1305669:1305673])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 等比复权\n",
"# df['复权因子'] = df['卖一价'].shift() / df['买一价']\n",
"df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() / df['买一价'], 1)\n",
"df['复权因子'] = df['复权因子'].fillna(1)\n",
"# df['复权因子'].loc[0] = 1\n",
"df['买一价_adj'] = df['买一价'] * df['复权因子'].cumprod()\n",
"df['卖一价_adj'] = df['卖一价'] * df['复权因子'].cumprod()\n",
"df['最新_adj'] = df['最新'] * df['复权因子'].cumprod()\n",
"# df['low_adj'] = df['low'] * adjust.cumprod()\n",
"# df['high_adj'] = df['high'] * adjust.cumprod()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# 等差复权\n",
"df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() - df['买一价'], 0)\n",
"df['复权因子'] = df['复权因子'].fillna(0)\n",
"# df['复权因子'].loc[0] = 1\n",
"df['买一价_adj'] = df['买一价'] + df['复权因子'].cumsum()\n",
"df['卖一价_adj'] = df['卖一价'] + df['复权因子'].cumsum()\n",
"df['最新_adj'] = df['最新'] + df['复权因子'].cumsum()\n",
"# df['low_adj'] = df['low'] + df['复权因子'].cumsum()\n",
"# df['high_adj'] = df['high'] + df['复权因子'].cumsum()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 统一代码 合约代码 时间 最新 成交量 买一价 卖一价 \\\n",
"1305669 a888 a1905 2019-04-22 15:00:00.568 3309.0 0 3308.0 3311.0 \n",
"1305670 a888 a1905 2019-04-22 15:00:36.638 3309.0 0 3308.0 3311.0 \n",
"1305671 a888 a1909 2019-04-22 20:59:00.014 3412.0 224 3411.0 3412.0 \n",
"1305672 a888 a1909 2019-04-22 21:00:00.461 3412.0 108 3412.0 3413.0 \n",
"1305673 a888 a1909 2019-04-22 21:00:00.958 3411.0 150 3410.0 3411.0 \n",
"\n",
" 买一量 卖一量 复权因子 买一价_adj 卖一价_adj 最新_adj \n",
"1305669 25 10 0.0 3308.0 3311.0 3309.0 \n",
"1305670 25 10 0.0 3308.0 3311.0 3309.0 \n",
"1305671 2 8 -100.0 3311.0 3312.0 3312.0 \n",
"1305672 10 19 0.0 3312.0 3313.0 3312.0 \n",
"1305673 43 3 0.0 3310.0 3311.0 3311.0 \n"
]
}
],
"source": [
"print(df.loc[1305669:1305673])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"df['买一价'] = df['买一价_adj']\n",
"df['卖一价'] = df['卖一价_adj']\n",
"df['最新'] = df['最新_adj']"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 统一代码 合约代码 时间 最新 成交量 买一价 卖一价 \\\n",
"1305669 a888 a1905 2019-04-22 15:00:00.568 3309.0 0 3308.0 3311.0 \n",
"1305670 a888 a1905 2019-04-22 15:00:36.638 3309.0 0 3308.0 3311.0 \n",
"1305671 a888 a1909 2019-04-22 20:59:00.014 3312.0 224 3311.0 3312.0 \n",
"1305672 a888 a1909 2019-04-22 21:00:00.461 3312.0 108 3312.0 3313.0 \n",
"1305673 a888 a1909 2019-04-22 21:00:00.958 3311.0 150 3310.0 3311.0 \n",
"\n",
" 买一量 卖一量 复权因子 买一价_adj 卖一价_adj 最新_adj \n",
"1305669 25 10 0.0 3308.0 3311.0 3309.0 \n",
"1305670 25 10 0.0 3308.0 3311.0 3309.0 \n",
"1305671 2 8 -100.0 3311.0 3312.0 3312.0 \n",
"1305672 10 19 0.0 3312.0 3313.0 3312.0 \n",
"1305673 43 3 0.0 3310.0 3311.0 3311.0 \n"
]
}
],
"source": [
"print(df.loc[1305669:1305673])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# df.drop('复权因子', axis=1)\n",
"# df.drop('买一价_adj', axis=1)\n",
"# df.drop('卖一价_adj', axis=1)\n",
"del df['复权因子']\n",
"del df['买一价_adj']\n",
"del df['卖一价_adj']\n",
"del df['最新_adj']"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 统一代码 合约代码 时间 最新 成交量 买一价 卖一价 \\\n",
"1305670 a888 a1905 2019-04-22 15:00:36.638 3309.0 0 3308.0 3311.0 \n",
"1305671 a888 a1909 2019-04-22 20:59:00.014 3312.0 224 3311.0 3312.0 \n",
"1305672 a888 a1909 2019-04-22 21:00:00.461 3312.0 108 3312.0 3313.0 \n",
"1305673 a888 a1909 2019-04-22 21:00:00.958 3311.0 150 3310.0 3311.0 \n",
"1305674 a888 a1909 2019-04-22 21:00:01.464 3312.0 86 3311.0 3312.0 \n",
"\n",
" 买一量 卖一量 \n",
"1305670 25 10 \n",
"1305671 2 8 \n",
"1305672 10 19 \n",
"1305673 43 3 \n",
"1305674 18 80 \n"
]
}
],
"source": [
"print(df.loc[1305670:1305674])"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(output_path, index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drop_index1 = df.query('最后修改时间>\"15:00:00\" & 最后修改时间<\"21:00:00\"')[\n",
" \"最后修改时间\"\n",
"].index\n",
"# drop_index1 = df.query('最后修改时间>\"15:00:00\"')[\"最后修改时间\"].index\n",
"# drop_index2 = df.query('最后修改时间>\"01:00:00\" & 最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"# drop_index2 = df.query('最后修改时间>\"01:00:00\" & 最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"drop_index2 = df.query('最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"drop_index3 = df.query('最后修改时间>\"23:00:00\" & 最后修改时间<\"23:59:59\"')[\n",
" \"最后修改时间\"\n",
"].index\n",
"drop_index4 = df.query('最后修改时间>\"11:30:00\" & 最后修改时间<\"13:30:00\"')[\n",
" \"最后修改时间\"\n",
"].index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.drop(labels=drop_index1, axis=0, inplace=True)\n",
"df.drop(drop_index2, axis=0, inplace=True)\n",
"df.drop(drop_index3, axis=0, inplace=True)\n",
"df.drop(drop_index4, axis=0, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df[\"datetime\"] = pd.to_datetime(\n",
" pd.to_datetime(df[\"交易日\"].astype(str)).astype(str)\n",
" + \" \"\n",
" + df[\"最后修改时间\"].astype(str)\n",
" + \".\"\n",
" + df[\"最后修改毫秒\"].astype(str)\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.rename(\n",
" columns={\n",
" \"最新价\": \"lastprice\",\n",
" \"数量\": \"volume\",\n",
" \"申买价一\": \"bid_p\",\n",
" \"申买量一\": \"bid_v\",\n",
" \"申卖价一\": \"ask_p\",\n",
" \"申卖量一\": \"ask_v\",\n",
" \"合约代码\": \"symbol\",\n",
" },\n",
" inplace=True,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df[\"vol_diff\"] = df[\"volume\"].diff()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.loc[df[\"vol_diff\"].isnull(), \"vol_diff\"] = df.loc[df[\"vol_diff\"].isnull(), \"volume\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df[\"volume\"] = df[\"vol_diff\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(output_path)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "orderflow",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,273 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 使用说明:\n",
" 1.需要修改chdir到当前目录\n",
" 2.需要修改最后输出的文件名称\n",
" 3.依据情况需要修改保留的列数"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"文件中所有CSV文件: ['ag888_2019.csv', 'ag888_2020.csv', 'ag888_2021.csv', 'ag888_2022.csv', 'ag888_2022_2023.csv', 'ag888_2023.csv']\n",
"需要筛选的文件名关键字: ['_2022']\n",
"使用新年份格式采集!!!\n",
"筛选结果后的CSV文件: ['ag888_2022.csv', 'ag888_2022_2023.csv']\n"
]
}
],
"source": [
"os.chdir('E:/data/ag')\n",
"all_csv_files = [file for file in os.listdir('.') if file.endswith('.csv')]\n",
"all_csv_files = sorted(all_csv_files)\n",
"print(\"文件中所有CSV文件:\",all_csv_files)\n",
"\n",
"sp_chars = ['_2022']\n",
"sp_chars = sorted(sp_chars)\n",
"print(\"需要筛选的文件名关键字:\",sp_chars)\n",
"\n",
"# 设置后面数据的采集对于的行数# 用 \"old_type\" 或者 \"new_type\" 区分\n",
"if all(char in ['_2019','_2020','_2021'] for char in sp_chars):\n",
" year_type = 'old_type'\n",
" print(\"使用旧年份格式采集!!!\")\n",
"elif all(char in ['_2022','_2023'] for char in sp_chars):\n",
" year_type = 'new_type' \n",
" print(\"使用新年份格式采集!!!\")\n",
"else:\n",
" print(\"文件夹中CSV没有相关年份的数据或者新旧年份混用!!!\")\n",
"\n",
"csv_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_chars)]\n",
"print(\"筛选结果后的CSV文件:\",csv_files)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame()\n",
"for f in csv_files:\n",
" if year_type == 'old_type':\n",
" df_temp = pd.read_csv(\n",
" f,\n",
" usecols=[1, 2, 3, 4, 8, 13, 14, 15, 16],\n",
" names=[\n",
" \"统一代码\",\n",
" \"合约代码\",\n",
" \"时间\",\n",
" \"最新\",\n",
" \"成交量\",\n",
" \"买一价\",\n",
" \"卖一价\",\n",
" \"买一量\",\n",
" \"卖一量\",\n",
" ],\n",
" skiprows=1,\n",
" encoding=\"utf-8\",\n",
" )\n",
" elif year_type == 'new_type':\n",
" df_temp = pd.read_csv(\n",
" f,\n",
" usecols=[0, 1, 2, 5, 12, 21, 22, 23, 24, 25, 26, 44],\n",
" names=[\n",
" \"交易日\",\n",
" \"统一代码\",\n",
" \"合约代码\",\n",
" \"最新价\",\n",
" \"数量\",\n",
" \"最后修改时间\",\n",
" \"最后修改毫秒\",\n",
" \"申买价一\",\n",
" \"申买量一\",\n",
" \"申卖价一\",\n",
" \"申卖量一\",\n",
" \"业务日期\",\n",
" ],\n",
" skiprows=1,\n",
" encoding=\"utf-8\",\n",
" )\n",
"\n",
" # df_temp = pd.read_csv(f, usecols=[0,5], names=[\n",
" # 'datetime', 'volume'])\n",
" df = pd.concat([df, df_temp])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 查看数据的头部和尾部head()、tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查看dataframe的基本情况\n",
"df.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 等比复权,先不考虑\n",
"# df['复权因子'] = df['卖一价'].shift() / df['买一价']\n",
"df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() / df['买一价'], 1)\n",
"df['复权因子'] = df['复权因子'].fillna(1)\n",
"# df['复权因子'].loc[0] = 1\n",
"df['买一价_adj'] = df['买一价'] * df['复权因子'].cumprod()\n",
"df['卖一价_adj'] = df['卖一价'] * df['复权因子'].cumprod()\n",
"df['最新_adj'] = df['最新'] * df['复权因子'].cumprod()\n",
"# df['low_adj'] = df['low'] * adjust.cumprod()\n",
"# df['high_adj'] = df['high'] * adjust.cumprod()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 等差复权\n",
"df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['申卖价一'].shift() - df['申买价一'], 0)\n",
"df['复权因子'] = df['复权因子'].fillna(0)\n",
"# df['复权因子'].loc[0] = 1\n",
"df['申买价一_adj'] = df['申买价一'] + df['复权因子'].cumsum()\n",
"df['申卖价一_adj'] = df['申卖价一'] + df['复权因子'].cumsum()\n",
"df['最新价_adj'] = df['最新价'] + df['复权因子'].cumsum()\n",
"# df['low_adj'] = df['low'] + df['复权因子'].cumsum()\n",
"# df['high_adj'] = df['high'] + df['复权因子'].cumsum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查找换期需要复权的索引\n",
"non_zero_indices = df[df['复权因子'] != 0].index\n",
"print(non_zero_indices)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查看未调整买价、卖价和最新价的数据\n",
"df.loc[non_zero_indices[0]-5:non_zero_indices[0]+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 将调整后的数值替换原来的值\n",
"df['申买价一'] = df['申买价一_adj']\n",
"df['申卖价一'] = df['申卖价一_adj']\n",
"df['最新价'] = df['最新价_adj']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查看调整买价、卖价和最新价的数据\n",
"df.loc[non_zero_indices[0]-5:non_zero_indices[0]+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 删除多余的值\n",
"del df['复权因子']\n",
"del df['申买价一_adj']\n",
"del df['申卖价一_adj']\n",
"del df['最新价_adj']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.loc[non_zero_indices[0]-5:non_zero_indices[0]+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv('./ag888_2022_2023.csv', index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "orderflow",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,428 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"root_path = r\"E:/data/ag\"\n",
"output_path = r\"E:/data/ag/ag888.csv\"\n",
"# df_tmp = pd.read_csv('E:/data/rb/rb888_2023.csv',encoding=\"utf-8\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"files = []\n",
"\n",
"for r, ds, fs in os.walk(root_path):\n",
" for f in fs:\n",
" # if f[0:4] == '2023':\n",
" abs_filepath = os.path.join(r, f)\n",
" files.append(abs_filepath)\n",
"files = sorted(files)\n",
"\n",
"df = pd.DataFrame()\n",
"for f in files:\n",
" df_temp = pd.read_csv(\n",
" f,\n",
" usecols=[0, 1, 2, 5, 12, 21, 22, 23, 24, 25, 26, 44],\n",
" names=[\n",
" \"交易日\",\n",
" \"统一代码\",\n",
" \"合约代码\",\n",
" \"最新价\",\n",
" \"数量\",\n",
" \"最后修改时间\",\n",
" \"最后修改毫秒\",\n",
" \"申买价一\",\n",
" \"申买量一\",\n",
" \"申卖价一\",\n",
" \"申卖量一\",\n",
" \"业务日期\",\n",
" ],\n",
" skiprows=1,\n",
" encoding=\"utf-8\",\n",
" )\n",
" # df_temp = pd.read_csv(f, usecols=[0,5], names=[\n",
" # 'datetime', 'volume'])\n",
" df = pd.concat([df, df_temp])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#df_tmp = pd.read_csv('E:/data/rb/rb888_2023.csv',encoding=\"utf-8\")\n",
"#df_tmp.tail()\n",
"#df_tmp.tail().to_csv(\"E:/data/rb/rb_tail.csv\",index= False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.info()\n",
"# 21754840"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.loc[2493107:2493111]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 等比复权,先不考虑\n",
"# df['复权因子'] = df['卖一价'].shift() / df['买一价']\n",
"df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() / df['买一价'], 1)\n",
"df['复权因子'] = df['复权因子'].fillna(1)\n",
"# df['复权因子'].loc[0] = 1\n",
"df['买一价_adj'] = df['买一价'] * df['复权因子'].cumprod()\n",
"df['卖一价_adj'] = df['卖一价'] * df['复权因子'].cumprod()\n",
"df['最新_adj'] = df['最新'] * df['复权因子'].cumprod()\n",
"# df['low_adj'] = df['low'] * adjust.cumprod()\n",
"# df['high_adj'] = df['high'] * adjust.cumprod()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 等差复权\n",
"df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['申卖价一'].shift() - df['申买价一'], 0)\n",
"df['复权因子'] = df['复权因子'].fillna(0)\n",
"# df['复权因子'].loc[0] = 1\n",
"df['申买价一_adj'] = df['申买价一'] + df['复权因子'].cumsum()\n",
"df['申卖价一_adj'] = df['申卖价一'] + df['复权因子'].cumsum()\n",
"df['最新价_adj'] = df['最新价'] + df['复权因子'].cumsum()\n",
"# df['low_adj'] = df['low'] + df['复权因子'].cumsum()\n",
"# df['high_adj'] = df['high'] + df['复权因子'].cumsum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.loc[391880:391890]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df['申买价一'] = df['申买价一_adj']\n",
"df['申卖价一'] = df['申卖价一_adj']\n",
"df['最新价'] = df['最新价_adj']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.loc[391880:391890]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"non_zero_indices = df[df['复权因子'] != 0].index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(non_zero_indices)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# df.drop('复权因子', axis=1)\n",
"# df.drop('买一价_adj', axis=1)\n",
"# df.drop('卖一价_adj', axis=1)\n",
"del df['复权因子']\n",
"del df['申买价一_adj']\n",
"del df['申卖价一_adj']\n",
"del df['最新价_adj']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.loc[391880:391890]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(output_path, index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head().to_csv(\"E:/data/rb/rb_ch_temp.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drop_index1 = df.query('最后修改时间>\"15:00:00\" & 最后修改时间<\"21:00:00\"')[\n",
" \"最后修改时间\"\n",
"].index\n",
"# drop_index1 = df.query('最后修改时间>\"15:00:00\"')[\"最后修改时间\"].index\n",
"# drop_index2 = df.query('最后修改时间>\"01:00:00\" & 最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"# drop_index2 = df.query('最后修改时间>\"01:00:00\" & 最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"drop_index2 = df.query('最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"drop_index3 = df.query('最后修改时间>\"23:00:00\" & 最后修改时间<\"23:59:59\"')[\n",
" \"最后修改时间\"\n",
"].index\n",
"drop_index4 = df.query('最后修改时间>\"11:30:00\" & 最后修改时间<\"13:30:00\"')[\n",
" \"最后修改时间\"\n",
"].index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.drop(labels=drop_index1, axis=0, inplace=True)\n",
"df.drop(drop_index2, axis=0, inplace=True)\n",
"df.drop(drop_index3, axis=0, inplace=True)\n",
"df.drop(drop_index4, axis=0, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df[\"datetime\"] = pd.to_datetime(\n",
" pd.to_datetime(df[\"交易日\"].astype(str)).astype(str)\n",
" + \" \"\n",
" + df[\"最后修改时间\"].astype(str)\n",
" + \".\"\n",
" + df[\"最后修改毫秒\"].astype(str)\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.rename(\n",
" columns={\n",
" \"最新价\": \"lastprice\",\n",
" \"数量\": \"volume\",\n",
" \"申买价一\": \"bid_p\",\n",
" \"申买量一\": \"bid_v\",\n",
" \"申卖价一\": \"ask_p\",\n",
" \"申卖量一\": \"ask_v\",\n",
" \"合约代码\": \"symbol\",\n",
" },\n",
" inplace=True,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df[\"vol_diff\"] = df[\"volume\"].diff()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.loc[df[\"vol_diff\"].isnull(), \"vol_diff\"] = df.loc[df[\"vol_diff\"].isnull(), \"volume\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df[\"volume\"] = df[\"vol_diff\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(output_path)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "orderflow",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,222 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"os.chdir('E:/data/ag')\n",
"all_csv_files = [file for file in os.listdir('.') if file.endswith('.csv')]\n",
"all_csv_files = sorted(all_csv_files)\n",
"print(\"文件中所有CSV文件:\",all_csv_files)\n",
"\n",
"sp_chars = ['_2023','_2022']\n",
"sp_chars = sorted(sp_chars)\n",
"print(\"需要筛选的文件名关键字:\",sp_chars)\n",
"\n",
"csv_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_chars)]\n",
"print(\"筛选结果后的CSV文件:\",csv_files)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame()\n",
"for f in csv_files:\n",
" df_temp = pd.read_csv(\n",
" f,\n",
" usecols=[0, 1, 2, 5, 12, 21, 22, 23, 24, 25, 26, 44],\n",
" names=[\n",
" \"交易日\",\n",
" \"统一代码\",\n",
" \"合约代码\",\n",
" \"最新价\",\n",
" \"数量\",\n",
" \"最后修改时间\",\n",
" \"最后修改毫秒\",\n",
" \"申买价一\",\n",
" \"申买量一\",\n",
" \"申卖价一\",\n",
" \"申卖量一\",\n",
" \"业务日期\",\n",
" ],\n",
" skiprows=1,\n",
" encoding=\"utf-8\",\n",
" )\n",
" # df_temp = pd.read_csv(f, usecols=[0,5], names=[\n",
" # 'datetime', 'volume'])\n",
" df = pd.concat([df, df_temp])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 查看数据的头部和尾部head()、tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查看dataframe的基本情况\n",
"df.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 等比复权,先不考虑\n",
"# df['复权因子'] = df['卖一价'].shift() / df['买一价']\n",
"df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() / df['买一价'], 1)\n",
"df['复权因子'] = df['复权因子'].fillna(1)\n",
"# df['复权因子'].loc[0] = 1\n",
"df['买一价_adj'] = df['买一价'] * df['复权因子'].cumprod()\n",
"df['卖一价_adj'] = df['卖一价'] * df['复权因子'].cumprod()\n",
"df['最新_adj'] = df['最新'] * df['复权因子'].cumprod()\n",
"# df['low_adj'] = df['low'] * adjust.cumprod()\n",
"# df['high_adj'] = df['high'] * adjust.cumprod()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 等差复权\n",
"df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['申卖价一'].shift() - df['申买价一'], 0)\n",
"df['复权因子'] = df['复权因子'].fillna(0)\n",
"# df['复权因子'].loc[0] = 1\n",
"df['申买价一_adj'] = df['申买价一'] + df['复权因子'].cumsum()\n",
"df['申卖价一_adj'] = df['申卖价一'] + df['复权因子'].cumsum()\n",
"df['最新价_adj'] = df['最新价'] + df['复权因子'].cumsum()\n",
"# df['low_adj'] = df['low'] + df['复权因子'].cumsum()\n",
"# df['high_adj'] = df['high'] + df['复权因子'].cumsum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查找换期需要复权的索引\n",
"non_zero_indices = df[df['复权因子'] != 0].index\n",
"print(non_zero_indices)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查看未调整买价、卖价和最新价的数据\n",
"df.loc[non_zero_indices[0]-5:non_zero_indices[0]+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 将调整后的数值替换原来的值\n",
"df['申买价一'] = df['申买价一_adj']\n",
"df['申卖价一'] = df['申卖价一_adj']\n",
"df['最新价'] = df['最新价_adj']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查看调整买价、卖价和最新价的数据\n",
"df.loc[non_zero_indices[0]-5:non_zero_indices[0]+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 删除多余的值\n",
"del df['复权因子']\n",
"del df['申买价一_adj']\n",
"del df['申卖价一_adj']\n",
"del df['最新价_adj']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.loc[non_zero_indices[0]-5:non_zero_indices[0]+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv('./ag888_2022_2023.csv', index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "orderflow",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,958 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"root_path = r\".\\tick\\rb\"\n",
"output_path = r\".\\data\\rb.csv\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"files = []\n",
"\n",
"for r, ds, fs in os.walk(root_path):\n",
" for f in fs:\n",
" # if f[0:4] == '2023':\n",
" abs_filepath = os.path.join(r, f)\n",
" files.append(abs_filepath)\n",
"files = sorted(files)\n",
"\n",
"df = pd.DataFrame()\n",
"for f in files:\n",
" df_temp = pd.read_csv(\n",
" f,\n",
" usecols=[0, 1, 4, 11, 20, 21, 22, 23, 24, 25],\n",
" names=[\n",
" \"交易日\",\n",
" \"合约代码\",\n",
" \"最新价\",\n",
" \"数量\",\n",
" \"最后修改时间\",\n",
" \"最后修改毫秒\",\n",
" \"申买价一\",\n",
" \"申买量一\",\n",
" \"申卖价一\",\n",
" \"申卖量一\",\n",
" ],\n",
" skiprows=1,\n",
" encoding=\"gbk\",\n",
" )\n",
" # df_temp = pd.read_csv(f, usecols=[0,5], names=[\n",
" # 'datetime', 'volume'])\n",
" df = pd.concat([df, df_temp])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>交易日</th>\n",
" <th>合约代码</th>\n",
" <th>最新价</th>\n",
" <th>数量</th>\n",
" <th>最后修改时间</th>\n",
" <th>最后修改毫秒</th>\n",
" <th>申买价一</th>\n",
" <th>申买量一</th>\n",
" <th>申卖价一</th>\n",
" <th>申卖量一</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>41323</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4003.0</td>\n",
" <td>1201905</td>\n",
" <td>14:59:59</td>\n",
" <td>0</td>\n",
" <td>4002.0</td>\n",
" <td>247</td>\n",
" <td>4003.0</td>\n",
" <td>116</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41324</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4003.0</td>\n",
" <td>1202028</td>\n",
" <td>14:59:59</td>\n",
" <td>500</td>\n",
" <td>4002.0</td>\n",
" <td>224</td>\n",
" <td>4003.0</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41325</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4002.0</td>\n",
" <td>1202060</td>\n",
" <td>15:00:00</td>\n",
" <td>0</td>\n",
" <td>4003.0</td>\n",
" <td>23</td>\n",
" <td>4004.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41326</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4002.0</td>\n",
" <td>1202060</td>\n",
" <td>15:00:00</td>\n",
" <td>500</td>\n",
" <td>4003.0</td>\n",
" <td>23</td>\n",
" <td>4004.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41327</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4002.0</td>\n",
" <td>1202060</td>\n",
" <td>15:17:29</td>\n",
" <td>500</td>\n",
" <td>4003.0</td>\n",
" <td>23</td>\n",
" <td>4004.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 交易日 合约代码 最新价 数量 最后修改时间 最后修改毫秒 申买价一 申买量一 \\\n",
"41323 20231229 rb2405 4003.0 1201905 14:59:59 0 4002.0 247 \n",
"41324 20231229 rb2405 4003.0 1202028 14:59:59 500 4002.0 224 \n",
"41325 20231229 rb2405 4002.0 1202060 15:00:00 0 4003.0 23 \n",
"41326 20231229 rb2405 4002.0 1202060 15:00:00 500 4003.0 23 \n",
"41327 20231229 rb2405 4002.0 1202060 15:17:29 500 4003.0 23 \n",
"\n",
" 申卖价一 申卖量一 \n",
"41323 4003.0 116 \n",
"41324 4003.0 16 \n",
"41325 4004.0 7 \n",
"41326 4004.0 7 \n",
"41327 4004.0 7 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>交易日</th>\n",
" <th>合约代码</th>\n",
" <th>最新价</th>\n",
" <th>数量</th>\n",
" <th>最后修改时间</th>\n",
" <th>最后修改毫秒</th>\n",
" <th>申买价一</th>\n",
" <th>申买量一</th>\n",
" <th>申卖价一</th>\n",
" <th>申卖量一</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4302.0</td>\n",
" <td>4643</td>\n",
" <td>08:59:00</td>\n",
" <td>500</td>\n",
" <td>4302.0</td>\n",
" <td>115</td>\n",
" <td>4305.0</td>\n",
" <td>96</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4305.0</td>\n",
" <td>5750</td>\n",
" <td>09:00:00</td>\n",
" <td>500</td>\n",
" <td>4305.0</td>\n",
" <td>359</td>\n",
" <td>4310.0</td>\n",
" <td>36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4306.0</td>\n",
" <td>8039</td>\n",
" <td>09:00:01</td>\n",
" <td>0</td>\n",
" <td>4306.0</td>\n",
" <td>18</td>\n",
" <td>4308.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4308.0</td>\n",
" <td>9065</td>\n",
" <td>09:00:01</td>\n",
" <td>500</td>\n",
" <td>4308.0</td>\n",
" <td>43</td>\n",
" <td>4310.0</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4310.0</td>\n",
" <td>9682</td>\n",
" <td>09:00:02</td>\n",
" <td>0</td>\n",
" <td>4311.0</td>\n",
" <td>4</td>\n",
" <td>4314.0</td>\n",
" <td>19</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 交易日 合约代码 最新价 数量 最后修改时间 最后修改毫秒 申买价一 申买量一 申卖价一 \\\n",
"0 20220104 rb2205 4302.0 4643 08:59:00 500 4302.0 115 4305.0 \n",
"1 20220104 rb2205 4305.0 5750 09:00:00 500 4305.0 359 4310.0 \n",
"2 20220104 rb2205 4306.0 8039 09:00:01 0 4306.0 18 4308.0 \n",
"3 20220104 rb2205 4308.0 9065 09:00:01 500 4308.0 43 4310.0 \n",
"4 20220104 rb2205 4310.0 9682 09:00:02 0 4311.0 4 4314.0 \n",
"\n",
" 申卖量一 \n",
"0 96 \n",
"1 36 \n",
"2 7 \n",
"3 74 \n",
"4 19 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"df.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 19813536 entries, 0 to 19813535\n",
"Data columns (total 10 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 交易日 int64 \n",
" 1 合约代码 object \n",
" 2 最新价 float64\n",
" 3 数量 int64 \n",
" 4 最后修改时间 object \n",
" 5 最后修改毫秒 int64 \n",
" 6 申买价一 float64\n",
" 7 申买量一 int64 \n",
" 8 申卖价一 float64\n",
" 9 申卖量一 int64 \n",
"dtypes: float64(3), int64(5), object(2)\n",
"memory usage: 1.5+ GB\n"
]
}
],
"source": [
"df.info()\n",
"# 21754840"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"drop_index1 = df.query('最后修改时间>\"15:00:00\" & 最后修改时间<\"21:00:00\"')[\n",
" \"最后修改时间\"\n",
"].index\n",
"# drop_index1 = df.query('最后修改时间>\"15:00:00\"')[\"最后修改时间\"].index\n",
"# drop_index2 = df.query('最后修改时间>\"01:00:00\" & 最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"# drop_index2 = df.query('最后修改时间>\"01:00:00\" & 最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"drop_index2 = df.query('最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"drop_index3 = df.query('最后修改时间>\"23:00:00\" & 最后修改时间<\"23:59:59\"')[\n",
" \"最后修改时间\"\n",
"].index\n",
"drop_index4 = df.query('最后修改时间>\"11:30:00\" & 最后修改时间<\"13:30:00\"')[\n",
" \"最后修改时间\"\n",
"].index"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"df.drop(labels=drop_index1, axis=0, inplace=True)\n",
"df.drop(drop_index2, axis=0, inplace=True)\n",
"df.drop(drop_index3, axis=0, inplace=True)\n",
"df.drop(drop_index4, axis=0, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>交易日</th>\n",
" <th>合约代码</th>\n",
" <th>最新价</th>\n",
" <th>数量</th>\n",
" <th>最后修改时间</th>\n",
" <th>最后修改毫秒</th>\n",
" <th>申买价一</th>\n",
" <th>申买量一</th>\n",
" <th>申卖价一</th>\n",
" <th>申卖量一</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>19813530</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4003.0</td>\n",
" <td>1201836</td>\n",
" <td>14:59:58</td>\n",
" <td>500</td>\n",
" <td>4002.0</td>\n",
" <td>288</td>\n",
" <td>4003.0</td>\n",
" <td>140</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19813531</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4003.0</td>\n",
" <td>1201905</td>\n",
" <td>14:59:59</td>\n",
" <td>0</td>\n",
" <td>4002.0</td>\n",
" <td>247</td>\n",
" <td>4003.0</td>\n",
" <td>116</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19813532</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4003.0</td>\n",
" <td>1202028</td>\n",
" <td>14:59:59</td>\n",
" <td>500</td>\n",
" <td>4002.0</td>\n",
" <td>224</td>\n",
" <td>4003.0</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19813533</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4002.0</td>\n",
" <td>1202060</td>\n",
" <td>15:00:00</td>\n",
" <td>0</td>\n",
" <td>4003.0</td>\n",
" <td>23</td>\n",
" <td>4004.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19813534</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4002.0</td>\n",
" <td>1202060</td>\n",
" <td>15:00:00</td>\n",
" <td>500</td>\n",
" <td>4003.0</td>\n",
" <td>23</td>\n",
" <td>4004.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 交易日 合约代码 最新价 数量 最后修改时间 最后修改毫秒 申买价一 申买量一 \\\n",
"19813530 20231229 rb2405 4003.0 1201836 14:59:58 500 4002.0 288 \n",
"19813531 20231229 rb2405 4003.0 1201905 14:59:59 0 4002.0 247 \n",
"19813532 20231229 rb2405 4003.0 1202028 14:59:59 500 4002.0 224 \n",
"19813533 20231229 rb2405 4002.0 1202060 15:00:00 0 4003.0 23 \n",
"19813534 20231229 rb2405 4002.0 1202060 15:00:00 500 4003.0 23 \n",
"\n",
" 申卖价一 申卖量一 \n",
"19813530 4003.0 140 \n",
"19813531 4003.0 116 \n",
"19813532 4003.0 16 \n",
"19813533 4004.0 7 \n",
"19813534 4004.0 7 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 19812430 entries, 1 to 19813534\n",
"Data columns (total 10 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 交易日 int64 \n",
" 1 合约代码 object \n",
" 2 最新价 float64\n",
" 3 数量 int64 \n",
" 4 最后修改时间 object \n",
" 5 最后修改毫秒 int64 \n",
" 6 申买价一 float64\n",
" 7 申买量一 int64 \n",
" 8 申卖价一 float64\n",
" 9 申卖量一 int64 \n",
"dtypes: float64(3), int64(5), object(2)\n",
"memory usage: 1.6+ GB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"df.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"df[\"datetime\"] = pd.to_datetime(\n",
" pd.to_datetime(df[\"交易日\"].astype(str)).astype(str)\n",
" + \" \"\n",
" + df[\"最后修改时间\"].astype(str)\n",
" + \".\"\n",
" + df[\"最后修改毫秒\"].astype(str)\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>交易日</th>\n",
" <th>合约代码</th>\n",
" <th>最新价</th>\n",
" <th>数量</th>\n",
" <th>最后修改时间</th>\n",
" <th>最后修改毫秒</th>\n",
" <th>申买价一</th>\n",
" <th>申买量一</th>\n",
" <th>申卖价一</th>\n",
" <th>申卖量一</th>\n",
" <th>datetime</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>19812425</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4003.0</td>\n",
" <td>1201836</td>\n",
" <td>14:59:58</td>\n",
" <td>500</td>\n",
" <td>4002.0</td>\n",
" <td>288</td>\n",
" <td>4003.0</td>\n",
" <td>140</td>\n",
" <td>2023-12-29 14:59:58.500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19812426</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4003.0</td>\n",
" <td>1201905</td>\n",
" <td>14:59:59</td>\n",
" <td>0</td>\n",
" <td>4002.0</td>\n",
" <td>247</td>\n",
" <td>4003.0</td>\n",
" <td>116</td>\n",
" <td>2023-12-29 14:59:59.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19812427</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4003.0</td>\n",
" <td>1202028</td>\n",
" <td>14:59:59</td>\n",
" <td>500</td>\n",
" <td>4002.0</td>\n",
" <td>224</td>\n",
" <td>4003.0</td>\n",
" <td>16</td>\n",
" <td>2023-12-29 14:59:59.500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19812428</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4002.0</td>\n",
" <td>1202060</td>\n",
" <td>15:00:00</td>\n",
" <td>0</td>\n",
" <td>4003.0</td>\n",
" <td>23</td>\n",
" <td>4004.0</td>\n",
" <td>7</td>\n",
" <td>2023-12-29 15:00:00.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19812429</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4002.0</td>\n",
" <td>1202060</td>\n",
" <td>15:00:00</td>\n",
" <td>500</td>\n",
" <td>4003.0</td>\n",
" <td>23</td>\n",
" <td>4004.0</td>\n",
" <td>7</td>\n",
" <td>2023-12-29 15:00:00.500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 交易日 合约代码 最新价 数量 最后修改时间 最后修改毫秒 申买价一 申买量一 \\\n",
"19812425 20231229 rb2405 4003.0 1201836 14:59:58 500 4002.0 288 \n",
"19812426 20231229 rb2405 4003.0 1201905 14:59:59 0 4002.0 247 \n",
"19812427 20231229 rb2405 4003.0 1202028 14:59:59 500 4002.0 224 \n",
"19812428 20231229 rb2405 4002.0 1202060 15:00:00 0 4003.0 23 \n",
"19812429 20231229 rb2405 4002.0 1202060 15:00:00 500 4003.0 23 \n",
"\n",
" 申卖价一 申卖量一 datetime \n",
"19812425 4003.0 140 2023-12-29 14:59:58.500 \n",
"19812426 4003.0 116 2023-12-29 14:59:59.000 \n",
"19812427 4003.0 16 2023-12-29 14:59:59.500 \n",
"19812428 4004.0 7 2023-12-29 15:00:00.000 \n",
"19812429 4004.0 7 2023-12-29 15:00:00.500 "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"df.rename(\n",
" columns={\n",
" \"最新价\": \"lastprice\",\n",
" \"数量\": \"volume\",\n",
" \"申买价一\": \"bid_p\",\n",
" \"申买量一\": \"bid_v\",\n",
" \"申卖价一\": \"ask_p\",\n",
" \"申卖量一\": \"ask_v\",\n",
" \"合约代码\": \"symbol\",\n",
" },\n",
" inplace=True,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"df[\"vol_diff\"] = df[\"volume\"].diff()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>交易日</th>\n",
" <th>symbol</th>\n",
" <th>lastprice</th>\n",
" <th>volume</th>\n",
" <th>最后修改时间</th>\n",
" <th>最后修改毫秒</th>\n",
" <th>bid_p</th>\n",
" <th>bid_v</th>\n",
" <th>ask_p</th>\n",
" <th>ask_v</th>\n",
" <th>datetime</th>\n",
" <th>vol_diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4305.0</td>\n",
" <td>5750</td>\n",
" <td>09:00:00</td>\n",
" <td>500</td>\n",
" <td>4305.0</td>\n",
" <td>359</td>\n",
" <td>4310.0</td>\n",
" <td>36</td>\n",
" <td>2022-01-04 09:00:00.500</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4306.0</td>\n",
" <td>8039</td>\n",
" <td>09:00:01</td>\n",
" <td>0</td>\n",
" <td>4306.0</td>\n",
" <td>18</td>\n",
" <td>4308.0</td>\n",
" <td>7</td>\n",
" <td>2022-01-04 09:00:01.000</td>\n",
" <td>2289.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4308.0</td>\n",
" <td>9065</td>\n",
" <td>09:00:01</td>\n",
" <td>500</td>\n",
" <td>4308.0</td>\n",
" <td>43</td>\n",
" <td>4310.0</td>\n",
" <td>74</td>\n",
" <td>2022-01-04 09:00:01.500</td>\n",
" <td>1026.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4310.0</td>\n",
" <td>9682</td>\n",
" <td>09:00:02</td>\n",
" <td>0</td>\n",
" <td>4311.0</td>\n",
" <td>4</td>\n",
" <td>4314.0</td>\n",
" <td>19</td>\n",
" <td>2022-01-04 09:00:02.000</td>\n",
" <td>617.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4314.0</td>\n",
" <td>10328</td>\n",
" <td>09:00:02</td>\n",
" <td>500</td>\n",
" <td>4314.0</td>\n",
" <td>137</td>\n",
" <td>4316.0</td>\n",
" <td>19</td>\n",
" <td>2022-01-04 09:00:02.500</td>\n",
" <td>646.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 交易日 symbol lastprice volume 最后修改时间 最后修改毫秒 bid_p bid_v \\\n",
"0 20220104 rb2205 4305.0 5750 09:00:00 500 4305.0 359 \n",
"1 20220104 rb2205 4306.0 8039 09:00:01 0 4306.0 18 \n",
"2 20220104 rb2205 4308.0 9065 09:00:01 500 4308.0 43 \n",
"3 20220104 rb2205 4310.0 9682 09:00:02 0 4311.0 4 \n",
"4 20220104 rb2205 4314.0 10328 09:00:02 500 4314.0 137 \n",
"\n",
" ask_p ask_v datetime vol_diff \n",
"0 4310.0 36 2022-01-04 09:00:00.500 NaN \n",
"1 4308.0 7 2022-01-04 09:00:01.000 2289.0 \n",
"2 4310.0 74 2022-01-04 09:00:01.500 1026.0 \n",
"3 4314.0 19 2022-01-04 09:00:02.000 617.0 \n",
"4 4316.0 19 2022-01-04 09:00:02.500 646.0 "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"df.loc[df[\"vol_diff\"].isnull(), \"vol_diff\"] = df.loc[df[\"vol_diff\"].isnull(), \"volume\"]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"df[\"volume\"] = df[\"vol_diff\"]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(output_path)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "orderflow",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.17"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,801 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 使用说明:\n",
" 1.需要修改chdir到当前目录\n",
" 2.需要修改最后输出的文件名称\n",
" 3.依据情况需要修改保留的列数\n",
" 4.不同品种的交易时间不一样,要修改删除"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import datetime as datetime"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"os.chdir('E:/data/ru')\n",
"all_csv_files = [file for file in os.listdir('.') if file.endswith('.csv')]\n",
"all_csv_files = sorted(all_csv_files)\n",
"print(\"文件中所有CSV文件:\",all_csv_files)\n",
"\n",
"sp_old_chars = ['_2019','_2020','_2021']\n",
"sp_old_chars = sorted(sp_old_chars)\n",
"print(\"旧格式文件名关键字:\",sp_old_chars)\n",
"sp_new_chars = ['_2022','_2023']\n",
"sp_new_chars = sorted(sp_new_chars)\n",
"print(\"新格式文件名关键字:\",sp_new_chars)\n",
"\n",
"# # 设置后面数据的采集对于的行数# 用 \"old_type\" 或者 \"new_type\" 区分\n",
"# if all(char in ['_2019','_2020','_2021'] for char in sp_old_chars):\n",
"# year_type = 'old_type'\n",
"# print(\"使用旧年份格式采集!!!\")\n",
"# elif all(char in ['_2022','_2023'] for char in sp_chars):\n",
"# year_type = 'new_type' \n",
"# print(\"使用新年份格式采集!!!\")\n",
"# else:\n",
"# print(\"文件夹中CSV没有相关年份的数据或者新旧年份混用!!!\")\n",
"\n",
"csv_old_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_old_chars)]\n",
"print(\"筛选结果后的CSV文件:\",csv_old_files)\n",
"csv_new_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_new_chars)]\n",
"print(\"筛选结果后的CSV文件:\",csv_new_files)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old = pd.DataFrame()\n",
"for f in csv_old_files:\n",
" df_old_temp = pd.read_csv(\n",
" f,\n",
" usecols=[1, 2, 3, 4, 8, 13, 14, 15, 16],\n",
" names=[\n",
" \"统一代码\",\n",
" \"合约代码\",\n",
" \"时间\",\n",
" \"最新\",\n",
" \"成交量\",\n",
" \"买一价\",\n",
" \"卖一价\",\n",
" \"买一量\",\n",
" \"卖一量\",\n",
" ],\n",
" skiprows=1,\n",
" encoding=\"utf-8\",\n",
" parse_dates=['时间']#注意此处增加的排序,为了后面按时间排序\n",
" )\n",
" # df_temp = pd.read_csv(f, usecols=[0,5], names=[\n",
" # 'datetime', 'volume'])\n",
" df_old = pd.concat([df_old, df_old_temp])\n",
"del df_old_temp"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old =pd.DataFrame({'main_contract':df_old['统一代码'],'symbol':df_old['合约代码'],'datetime':df_old['时间'],'lastprice':df_old['最新'],'volume':df_old['成交量'],\n",
" 'bid_p':df_old['买一价'],'ask_p':df_old['卖一价'],'bid_v':df_old['买一量'],'ask_v':df_old['卖一量']})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old['time'] = df_old['datetime'].dt.strftime('%H:%M:%S')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 不同品种交易时间不一样,需要修改\n",
"# 商品期货\n",
"drop_index1 = df_old.query('time>\"15:00:00\" & time<\"21:00:00\"')[\"time\"].index\n",
"# drop_index1 = df_old.query('time>\"15:00:00\"')[\"time\"].index\n",
"# drop_index2 = df_old.query('time>\"01:00:00\" & time<\"09:00:00\"')[\"time\"].index\n",
"#drop_index2 = df_old.query('time>\"02:30:00\" & time<\"09:00:00\"')[\"time\"].index\n",
"drop_index2 = df_old.query('time<\"09:00:00\"')[\"time\"].index\n",
"drop_index3 = df_old.query('time>\"23:00:00\" & time<\"23:59:59\"')[\"time\"].index\n",
"# drop_index3 = df_old.query('time>\"11:30:00\" & time<\"13:30:00\"')[\"time\"].index\n",
"drop_index4 = df_old.query('time>\"10:15:00\" & time<\"10:30:00\"')[\"time\"].index\n",
"\n",
"# 清理不在交易时间段的数据\n",
"df_old.drop(labels=drop_index1, axis=0, inplace=True)\n",
"df_old.drop(drop_index2, axis=0, inplace=True)\n",
"df_old.drop(drop_index3, axis=0, inplace=True)\n",
"df_old.drop(drop_index4, axis=0, inplace=True)\n",
"\n",
"df_old.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"del df_old['time']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_new = pd.DataFrame()\n",
"for f in csv_new_files:\n",
" df_new_temp = pd.read_csv(\n",
" f,\n",
" usecols=[0, 1, 2, 5, 12, 21, 22, 23, 24, 25, 26, 44],\n",
" names=[\n",
" \"交易日\",\n",
" \"统一代码\",\n",
" \"合约代码\",\n",
" \"最新价\",\n",
" \"数量\",\n",
" \"最后修改时间\",\n",
" \"最后修改毫秒\",\n",
" \"申买价一\",\n",
" \"申买量一\",\n",
" \"申卖价一\",\n",
" \"申卖量一\",\n",
" \"业务日期\",\n",
" ],\n",
" skiprows=1,\n",
" encoding=\"utf-8\",\n",
" parse_dates=['业务日期','最后修改时间','最后修改毫秒']#注意此处增加的排序,为了后面按时间排序\n",
" )\n",
"\n",
" # df_temp = pd.read_csv(f, usecols=[0,5], names=[\n",
" # 'datetime', 'volume'])\n",
" df_new = pd.concat([df_new, df_new_temp])\n",
"del df_new_temp"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 不同品种交易时间不一样,需要修改\n",
"# 商品期货\n",
"drop_index1 = df_new.query('最后修改时间>\"15:00:00\" & 最后修改时间<\"21:00:00\"')[\"最后修改时间\"].index\n",
"# drop_index1 = df_new.query('最后修改时间>\"15:00:00\"')[\"最后修改时间\"].index\n",
"# drop_index2 = df_new.query('最后修改时间>\"01:00:00\" & 最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"# drop_index2 = df_new.query('最后修改时间>\"02:30:00\" & 最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"drop_index2 = df_new.query('最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"drop_index3 = df_new.query('最后修改时间>\"23:00:00\" & 最后修改时间<\"23:59:59\"')[\"最后修改时间\"].index\n",
"# drop_index3 = df_new.query('最后修改时间>\"11:30:00\" & 最后修改时间<\"13:30:00\"')[\"最后修改时间\"].index\n",
"drop_index4 = df_new.query('最后修改时间>\"10:15:00\" & 最后修改时间<\"10:30:00\"')[\"最后修改时间\"].index\n",
"\n",
"# 清理不在交易时间段的数据\n",
"df_new.drop(labels=drop_index1, axis=0, inplace=True)\n",
"df_new.drop(drop_index2, axis=0, inplace=True)\n",
"df_new.drop(drop_index3, axis=0, inplace=True)\n",
"df_new.drop(drop_index4, axis=0, inplace=True)\n",
"\n",
"df_new.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#日期修正\n",
"#df_new['业务日期'] = pd.to_datetime(df_new['业务日期'])\n",
"df_new['业务日期'] = df_new['业务日期'].dt.strftime('%Y-%m-%d')\n",
"df_new['datetime'] = df_new['业务日期'] + ' '+df_new['最后修改时间'].dt.time.astype(str) + '.' + df_new['最后修改毫秒'].astype(str)\n",
"# 将 'datetime' 列的数据类型更改为 datetime 格式如果数据转换少8个小时可以用timedelta处理\n",
"df_new['datetime'] = pd.to_datetime(df_new['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')\n",
"# 如果需要,可以将 datetime 列格式化为字符串\n",
"#df_new['formatted_date'] = df_new['datetime'].dt.strftime('%Y-%m-%d %H:%M:%S.%f')\n",
"#计算瞬时成交量\n",
"df_new['volume'] = df_new['数量'] - df_new['数量'].shift(1)\n",
"df_new['volume'] = df_new['volume'].fillna(0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_new =pd.DataFrame({'main_contract':df_new['统一代码'],'symbol':df_new['合约代码'],'datetime':df_new['datetime'],'lastprice':df_new['最新价'],'volume':df_new['volume'],\n",
" 'bid_p':df_new['申买价一'],'ask_p':df_new['申卖价一'],'bid_v':df_new['申买量一'],'ask_v':df_new['申卖量一']})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_new.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_new.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame()\n",
"df = pd.concat([df_old, df_new],axis=0, ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"del df_old,df_new"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df['main_contract'] = df['main_contract'].astype(str)\n",
"df['symbol'] = df['symbol'].astype(str)\n",
"df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')\n",
"df['lastprice'] = df['lastprice'].astype(float)\n",
"df['volume'] = df['volume'].astype(int)\n",
"df['bid_p'] = df['bid_p'].astype(float)\n",
"df['ask_p'] = df['ask_p'].astype(float)\n",
"df['bid_v'] = df['bid_v'].astype(int)\n",
"df['ask_v'] = df['ask_v'].astype(int)\n",
"#df = df_old.append(df_new, ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 删除重复行\n",
"df.drop_duplicates(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 确保日期列按升序排序\n",
"df.sort_values(by='datetime', inplace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 查看数据的头部和尾部head()、tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查看dataframe的基本情况\n",
"df.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 等比复权,先不考虑\n",
"# df['复权因子'] = df['卖一价'].shift() / df['买一价']\n",
"df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() / df['买一价'], 1)\n",
"df['复权因子'] = df['复权因子'].fillna(1)\n",
"# df['复权因子'].loc[0] = 1\n",
"df['买一价_adj'] = df['买一价'] * df['复权因子'].cumprod()\n",
"df['卖一价_adj'] = df['卖一价'] * df['复权因子'].cumprod()\n",
"df['最新_adj'] = df['最新'] * df['复权因子'].cumprod()\n",
"# df['low_adj'] = df['low'] * adjust.cumprod()\n",
"# df['high_adj'] = df['high'] * adjust.cumprod()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 等差复权\n",
"df['复权因子'] = np.where(df['symbol'] != df['symbol'].shift(), df['ask_p'].shift() - df['bid_p'], 0)\n",
"df['复权因子'] = df['复权因子'].fillna(0)\n",
"# df['复权因子'].loc[0] = 1\n",
"df['bid_p_adj'] = df['bid_p'] + df['复权因子'].cumsum()\n",
"df['ask_p_adj'] = df['ask_p'] + df['复权因子'].cumsum()\n",
"df['lastprice_adj'] = df['lastprice'] + df['复权因子'].cumsum()\n",
"# df['low_adj'] = df['low'] + df['复权因子'].cumsum()\n",
"# df['high_adj'] = df['high'] + df['复权因子'].cumsum()\n",
"# df_new =pd.DataFrame({'main_contract':df_new['统一代码'],'symbol':df_new['合约代码'],'datetime':df_new['datetime'],'lastprice':df_new['最新价'],'volume':df_new['volume'],\n",
"# 'bid_p':df_new['申买价一'],'ask_p':df_new['申卖量一'],'bid_v':df_new['申买量一'],'ask_v':df_new['申卖量一']})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(df['复权因子'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"contains_null = df.isnull().values.any()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(contains_null)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查找换期需要复权的索引\n",
"non_zero_indices = df[df['复权因子'] != 0].index\n",
"print(non_zero_indices)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查看未调整买价、卖价和最新价的数据\n",
"df.loc[non_zero_indices[0]-5:non_zero_indices[0]+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 将调整后的数值替换原来的值\n",
"df['bid_p'] = df['bid_p_adj']\n",
"df['ask_p'] = df['ask_p_adj']\n",
"df['lastprice'] = df['lastprice_adj']\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查看调整买价、卖价和最新价的数据\n",
"df.loc[non_zero_indices[0]-5:non_zero_indices[0]+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 删除多余的值\n",
"del df['复权因子']\n",
"del df['bid_p_adj']\n",
"del df['ask_p_adj']\n",
"del df['lastprice_adj']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.loc[non_zero_indices[0]-5:non_zero_indices[0]+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv('./ru888.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"del df"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import datetime as datetime\n",
"import pyarrow as pa\n",
"import pyarrow.feather as feather"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# feature 测试\n",
"df = pd.read_csv('E:/data/ru/ru888.csv',encoding='UTF-8',parse_dates=['datetime'])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"feather.write_feather(df, 'df_feather.feather')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"df = feather.read_feather('df_feather.feather')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>main_contract</th>\n",
" <th>symbol</th>\n",
" <th>datetime</th>\n",
" <th>lastprice</th>\n",
" <th>volume</th>\n",
" <th>bid_p</th>\n",
" <th>ask_p</th>\n",
" <th>bid_v</th>\n",
" <th>ask_v</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>44287432</th>\n",
" <td>ru888</td>\n",
" <td>ru2405</td>\n",
" <td>2023-12-29 14:59:58.500</td>\n",
" <td>6755.0</td>\n",
" <td>27</td>\n",
" <td>6750.0</td>\n",
" <td>6755.0</td>\n",
" <td>128</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44287433</th>\n",
" <td>ru888</td>\n",
" <td>ru2405</td>\n",
" <td>2023-12-29 14:59:59.000</td>\n",
" <td>6760.0</td>\n",
" <td>27</td>\n",
" <td>6755.0</td>\n",
" <td>6760.0</td>\n",
" <td>2</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44287434</th>\n",
" <td>ru888</td>\n",
" <td>ru2405</td>\n",
" <td>2023-12-29 14:59:59.500</td>\n",
" <td>6760.0</td>\n",
" <td>17</td>\n",
" <td>6760.0</td>\n",
" <td>6765.0</td>\n",
" <td>35</td>\n",
" <td>33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44287435</th>\n",
" <td>ru888</td>\n",
" <td>ru2405</td>\n",
" <td>2023-12-29 15:00:00.000</td>\n",
" <td>6760.0</td>\n",
" <td>6</td>\n",
" <td>6760.0</td>\n",
" <td>6765.0</td>\n",
" <td>45</td>\n",
" <td>42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44287436</th>\n",
" <td>ru888</td>\n",
" <td>ru2405</td>\n",
" <td>2023-12-29 15:00:00.500</td>\n",
" <td>6760.0</td>\n",
" <td>0</td>\n",
" <td>6760.0</td>\n",
" <td>6765.0</td>\n",
" <td>45</td>\n",
" <td>42</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" main_contract symbol datetime lastprice volume \\\n",
"44287432 ru888 ru2405 2023-12-29 14:59:58.500 6755.0 27 \n",
"44287433 ru888 ru2405 2023-12-29 14:59:59.000 6760.0 27 \n",
"44287434 ru888 ru2405 2023-12-29 14:59:59.500 6760.0 17 \n",
"44287435 ru888 ru2405 2023-12-29 15:00:00.000 6760.0 6 \n",
"44287436 ru888 ru2405 2023-12-29 15:00:00.500 6760.0 0 \n",
"\n",
" bid_p ask_p bid_v ask_v \n",
"44287432 6750.0 6755.0 128 15 \n",
"44287433 6755.0 6760.0 2 14 \n",
"44287434 6760.0 6765.0 35 33 \n",
"44287435 6760.0 6765.0 45 42 \n",
"44287436 6760.0 6765.0 45 42 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}