Files
Quant_Code/1.交易策略/999.其他策略/1.松鼠SF08_基于盘口数据的择时趋势策略/使用文档/一键生成OF数据/ofdata_dj完整OF数据生成CSV.py

320 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
'''逐行解释代码:
1.导入所需的模块和库,包括 time、table来自 matplotlib.pyplot、pandas、numpy、numba 和 operator。
2.定义了一个名为 process 的函数,用于处理买卖盘的字典数据。
3.定义了一个名为 data 的函数,用于读取并处理 tick 数据,生成分钟级别的 bar 数据。
4.定义了一个名为 orderflow_df_new 的函数,用于处理 tick 数据和分钟级别的 bar 数据,生成订单流数据。
5.定义了一个名为 GetOrderFlow_dj 的函数,用于计算订单流的指标(堆积)。
6.定义了一个名为 back_data 的函数,用于保存回测数据。
7.在 if __name__ == "__main__": 下,首先调用 data() 函数获取 tick 数据和分钟级别的 bar 数据。
然后调用 orderflow_df_new() 函数,传入 tick 数据和 bar 数据,生成订单流数据 ofdata。
打印输出 ofdata。
8.调用 back_data() 函数,将订单流数据保存为回测数据。
打印输出 "done",表示程序执行完毕。
总体而言,该代码的功能是从 tick 数据中生成分钟级别的 bar 数据,然后根据 bar 数据计算订单流,并将订单流数据保存为回测数据。
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
# 使用前注意事项:
1、修改read_csv对应的文件地址
2、修改resample对应的转化周期
3、修改folder_path、to_csv对应的保存路径
'''
import time
from matplotlib.pyplot import table
from datetime import timedelta
import pandas as pd
import numpy as np
from numba import *
from numba import cuda
import operator
import os
def process(bidDict,askDict):
bidDictResult,askDictResult = {},{}
sList = sorted(set(list(bidDict.keys()) + list(askDict.keys())))
#print('bidDict:',list(bidDict.keys()))
#print('askDict:',list(askDict.keys()))
#print('sList:',sList)
#240884432
for s in sList:
if s in bidDict:
bidDictResult[s] = bidDict[s]
else:
bidDictResult[s] = 0
if s in askDict:
askDictResult[s] = askDict[s]
else:
askDictResult[s] = 0
return bidDictResult,askDictResult
def dataload(data):
#日期修正
# data['业务日期'] = data['业务日期'].dt.strftime('%Y-%m-%d')
# data['datetime'] = data['业务日期'] + ' '+data['最后修改时间'].dt.time.astype(str) + '.' + data['最后修改毫秒'].astype(str)
# # 将 'datetime' 列的数据类型更改为 datetime 格式
data['datetime'] = pd.to_datetime(data['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
# 如果需要,可以将 datetime 列格式化为字符串
#data['formatted_date'] = data['datetime'].dt.strftime('%Y-%m-%d %H:%M:%S.%f')
#计算瞬时成交量
# data['volume'] = data['数量'] - data['数量'].shift(1)
data['volume'] = data['volume'].fillna(0)
#整理好要用的tick数据元素
tickdata =pd.DataFrame({'datetime':data['datetime'],'symbol':data['symbol'],'lastprice':data['lastprice'],
'volume':data['volume'],'bid_p':data['bid_p'],'bid_v':data['bid_v'],'ask_p':data['ask_p'],'ask_v':data['ask_v']})
#tickdata['datetime'] = pd.to_datetime(tickdata['datetime'])
tickdata['open'] = tickdata['lastprice']
tickdata['high'] = tickdata['lastprice']
tickdata['low'] = tickdata['lastprice']
tickdata['close'] = tickdata['lastprice']
tickdata['starttime'] = tickdata['datetime']
# # 找到满足条件的行的索引
# condition = tickdata['datetime'].dt.time == pd.to_datetime('22:59:59').time()
# indexes_to_update = tickdata.index[condition]
# # 遍历索引,将不一致的日期更新为上一行的日期
# for idx in indexes_to_update:
# if idx > 0:
# tickdata.at[idx, 'datetime'] = tickdata.at[idx - 1, 'datetime'].replace(hour=22, minute=59, second=59)
# 确保日期列按升序排序
tickdata.sort_values(by='datetime', inplace=True)
bardata = tickdata.resample(on = 'datetime',rule = '1T',label = 'right',closed = 'right').agg({'starttime':'first','symbol':'last','open':'first','high':'max','low':'min','close':'last','volume':'sum'}).reset_index(drop = False)
#240884432
bardata =bardata.dropna().reset_index(drop = True)
return tickdata,bardata
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
def orderflow_df_new(df_tick,df_min):
df_of=pd.DataFrame({})
t1 = time.time()
startArray = pd.to_datetime(df_min['starttime']).values
voluememin= df_min['volume'].values
highs=df_min['high'].values
lows=df_min['low'].values
opens=df_min['open'].values
closes=df_min['close'].values
endArray = pd.to_datetime(df_min['datetime']).values
tTickArray = pd.to_datetime(df_tick['datetime']).values
bp1TickArray = df_tick['bid_p'].values
ap1TickArray = df_tick['ask_p'].values
lastTickArray = df_tick['lastprice'].values
volumeTickArray = df_tick['volume'].values
symbolarray = df_tick['symbol'].values
indexFinal = 0
for index,tEnd in enumerate(endArray):
start = startArray[index]
bidDict = {}
askDict = {}
bar_vol=voluememin[index]
bar_close=closes[index]
bar_open=opens[index]
bar_low=lows[index]
bar_high=highs[index]
bar_symbol=symbolarray[index]
dt=endArray[index]
for indexTick in range(indexFinal,len(df_tick)):
if tTickArray[indexTick] > tEnd:
break
elif (tTickArray[indexTick] >= start) & (tTickArray[indexTick] <= tEnd):
if indexTick==0:
Bp = round(bp1TickArray[indexTick],2)
Ap = round(ap1TickArray[indexTick],2)
else:
Bp = round(bp1TickArray[indexTick - 1],2)
Ap = round(ap1TickArray[indexTick - 1],2)
LastPrice = round(lastTickArray[indexTick],2)
Volume = volumeTickArray[indexTick]
if LastPrice >= Ap:
if LastPrice in askDict.keys():
askDict[LastPrice] += Volume
else:
askDict[LastPrice] = Volume
if LastPrice <= Bp:
if LastPrice in bidDict.keys():
bidDict[LastPrice] += Volume
else:
bidDict[LastPrice] = Volume
indexFinal = indexTick
bidDictResult,askDictResult = process(bidDict,askDict)
bidDictResult=dict(sorted(bidDictResult.items(),key=operator.itemgetter(0)))
askDictResult=dict(sorted(askDictResult.items(),key=operator.itemgetter(0)))
prinslist=list(bidDictResult.keys())
asklist=list(askDictResult.values())
bidlist=list(bidDictResult.values())
delta=(sum(askDictResult.values()) - sum(bidDictResult.values()))
df=pd.DataFrame({'price':pd.Series([prinslist]),'Ask':pd.Series([asklist]),'Bid':pd.Series([bidlist])})
df['symbol']=bar_symbol
df['datetime']=dt
df['delta']=str(delta)
df['close']=bar_close
df['open']=bar_open
df['high']=bar_high
df['low']=bar_low
df['volume']=bar_vol
# 过滤'volume'列小于等于0的行
df = df[df['volume'] > 0]
# 重新排序DataFrame按照'datetime'列进行升序排序
df = df.sort_values(by='datetime', ascending=True)
# 重新设置索引,以便索引能够正确对齐
df = df.reset_index(drop=True)
#df['ticktime']=tTickArray[indexTick]
df['dj']=GetOrderFlow_dj(df)
#print(df)
df_of = pd.concat([df_of, df], ignore_index=True)
print(time.time() - t1)
return df_of
def GetOrderFlow_dj(kData):
itemAskBG=['rgb(0,255,255)', 'rgb(255,0,255)', "rgb(255,182,193)"] # 买盘背景色
itemBidBG=['rgb(173,255,47)', 'rgb(255,127,80)', "rgb(32,178,170)"] # 卖盘背景色
Config={
'Value1':3,
'Value2':3,
'Value3':3,
'Value4':True,
}
aryData=kData
djcout=0
for index,row in aryData.iterrows():
kItem=aryData.iloc[index]
high=kItem['high']
low=kItem['low']
close=kItem['close']
open=kItem['open']
dtime=kItem['datetime']
price_s=kItem['price']
Ask_s=kItem['Ask']
Bid_s=kItem['Bid']
delta=kItem['delta']
price_s=price_s
Ask_s=Ask_s
Bid_s=Bid_s
gj=0
xq=0
gxx=0
xxx=0
for i in np.arange (0, len(price_s),1) :
duiji={
'price':0,
'time':0,
'longshort':0,
'cout':0,
'color':'blue'
}
if i==0 :
delta=delta
order= {
"Price":price_s[i],
"Bid":{ "Value":Bid_s[i]},
"Ask":{ "Value":Ask_s[i]}
}
if i>=0 and i<len(price_s)-1:
if (order["Bid"]["Value"]>Ask_s[i+1]*int(Config['Value1'])):
order["Bid"]["Color"]=itemAskBG[1]
gxx+=1
gj+=1
if gj>=int(Config['Value2']) and Config['Value4']==True:
duiji['price']=price_s[i]
duiji['time']=dtime
duiji['longshort']=-1
duiji['cout']=gj
duiji['color']='rgba(0,139,0,0.45)'#绿色
if float(duiji['price'])>0:
djcout+=-1
else :
gj=0
if i>=1 and i<=len(price_s)-1:
if (order["Ask"]["Value"]>Bid_s[i-1]*int(Config['Value1'])):
xq+=1
xxx+=1
order["Ask"]["Color"]=itemBidBG[1]
if xq>=int(Config['Value2']) and Config['Value4']==True:
duiji['price']=price_s[i]
duiji['time']=dtime
duiji['longshort']=1
duiji['cout']=xq
duiji['color']='rgba(255,0,0,0.45)' #红色
if float(duiji['price'])>0:
djcout+=1
else :
xq=0
return djcout
def back_data(df):
# 创建新的DataFrame并填充需要的列
new_df = pd.DataFrame()
new_df['datetime'] = pd.to_datetime(df['datetime'], format='%Y/%m/%d %H:%M')
new_df['close'] = df['close']
new_df['open'] = df['open']
new_df['high'] = df['high']
new_df['low'] = df['low']
new_df['volume'] = df['volume']
new_df['sig'] = df['dj']
new_df['symbol'] = df['symbol']
new_df['delta'] = df['delta']
new_df.to_csv(f'./rb888_rs_2022_back_ofdata_dj.csv',index=False)
#new_df.to_csv(f'{sym}back_ofdata_dj.csv',index=False)
if __name__ == "__main__":
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
data=pd.read_csv('D:/data_transfer/data_rs_merged/上期所/rb888/rb888_rs_2022.csv',encoding='GBK',parse_dates=['datetime']) # ['业务日期','最后修改时间']
print(data)
tick,bar=dataload(data)
ofdata = orderflow_df_new(tick,bar)
print(ofdata)
#保存orderflow数据
folder_path = 'D:/of_data/tick生成的OF数据/data_rs_merged/上期所/rb888/'
if not os.path.exists(folder_path):
# os.mkdir(folder_path)
os.makedirs(folder_path)
# 获取当前工作目录
current_directory = os.getcwd()
print("当前工作目录:", current_directory)
# 设置新的工作目录
os.chdir(folder_path)
# 验证新的工作目录
updated_directory = os.getcwd()
print("已更改为新的工作目录:", updated_directory)
ofdata.to_csv('./rb888_rs_2022_ofdata_dj.csv')
#保存回测数据
back_data(ofdata)
print('done')