Files

310 lines
12 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
'''逐行解释代码:
1.导入所需的模块和库,包括 time、table来自 matplotlib.pyplot、pandas、numpy、numba 和 operator。
2.定义了一个名为 process 的函数,用于处理买卖盘的字典数据。
3.定义了一个名为 data 的函数,用于读取并处理 tick 数据,生成分钟级别的 bar 数据。
4.定义了一个名为 orderflow_df_new 的函数,用于处理 tick 数据和分钟级别的 bar 数据,生成订单流数据。
5.定义了一个名为 GetOrderFlow_dj 的函数,用于计算订单流的指标(堆积)。
6.定义了一个名为 back_data 的函数,用于保存回测数据。
7.在 if __name__ == "__main__": 下,首先调用 data() 函数获取 tick 数据和分钟级别的 bar 数据。
然后调用 orderflow_df_new() 函数,传入 tick 数据和 bar 数据,生成订单流数据 ofdata。
打印输出 ofdata。
8.调用 back_data() 函数,将订单流数据保存为回测数据。
打印输出 "done",表示程序执行完毕。
总体而言,该代码的功能是从 tick 数据中生成分钟级别的 bar 数据,然后根据 bar 数据计算订单流,并将订单流数据保存为回测数据。
使用说明:使用前需要调整的相关参数如下
1.确定python到csv文件夹下运行,修改csv文件为需要运行的csv
2.dataload函数一、确定datetime函数和其他key值是否和现在的一致不一致的修改二、resample函数中rule的取样周期进行修改默认为5T即5分钟。
3.back_data函数和main中需要注意修改相应的时间节点将开盘的初始数据设置为0
4.如果生成的时间和实际时间相差8小时可以调用timedelta函数修改
'''
# GetOrderFlow_dj函数需要进一步了解先不修改
import time
from matplotlib.pyplot import table
from datetime import timedelta
import pandas as pd
import numpy as np
from numba import *
from numba import cuda
import operator
import os
# 对于含时区的datetime可以通过timedelta来修改数据
#from datetime import datetime, timedelta
#os.environ['tz'] = 'Asia/ShangHai'
#time.tzset()
def process(bidDict,askDict):
bidDictResult,askDictResult = {},{}
sList = sorted(set(list(bidDict.keys()) + list(askDict.keys())))
#print('bidDict:',list(bidDict.keys()))
#print('askDict:',list(askDict.keys()))
#print('sList:',sList)
#240884432
for s in sList:
if s in bidDict:
bidDictResult[s] = bidDict[s]
else:
bidDictResult[s] = 0
if s in askDict:
askDictResult[s] = askDict[s]
else:
askDictResult[s] = 0
return bidDictResult,askDictResult
def dataload(data):
#日期修正
data['业务日期'] = data['业务日期'].dt.strftime('%Y-%m-%d')
data['datetime'] = data['业务日期'] + ' '+data['最后修改时间'].dt.time.astype(str) + '.' + data['最后修改毫秒'].astype(str)
# 将 'datetime' 列的数据类型更改为 datetime 格式如果数据转换少8个小时可以用timedelta处理
data['datetime'] = pd.to_datetime(data['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
# 如果需要,可以将 datetime 列格式化为字符串
#data['formatted_date'] = data['datetime'].dt.strftime('%Y-%m-%d %H:%M:%S.%f')
#计算瞬时成交量
data['volume'] = data['数量'] - data['数量'].shift(1)
data['volume'] = data['volume'].fillna(0)
#整理好要用的tick数据元素,具体按照数据的表头进行修改
tickdata =pd.DataFrame({'datetime':data['datetime'],'symbol':data['合约代码'],'lastprice':data['最新价'],
'volume':data['volume'],'bid_p':data['申买价一'],'bid_v':data['申买量一'],'ask_p':data['申卖价一'],'ask_v':data['申卖量一']})
#tickdata['datetime'] = pd.to_datetime(tickdata['datetime'])
tickdata['open'] = tickdata['lastprice']
tickdata['high'] = tickdata['lastprice']
tickdata['low'] = tickdata['lastprice']
tickdata['close'] = tickdata['lastprice']
tickdata['starttime'] = tickdata['datetime']
# # 找到满足条件的行的索引
# condition = tickdata['datetime'].dt.time == pd.to_datetime('22:59:59').time()
# indexes_to_update = tickdata.index[condition]
# # 遍历索引,将不一致的日期更新为上一行的日期
# for idx in indexes_to_update:
# if idx > 0:
# tickdata.at[idx, 'datetime'] = tickdata.at[idx - 1, 'datetime'].replace(hour=22, minute=59, second=59)
# 确保日期列按升序排序
tickdata.sort_values(by='datetime', inplace=True)
# 时序重采样 https://zhuanlan.zhihu.com/p/70353374
bardata = tickdata.resample(on = 'datetime',rule = '1T',label = 'right',closed = 'right').agg({'starttime':'first','symbol':'last','open':'first','high':'max','low':'min','close':'last','volume':'sum'}).reset_index(drop = False)
#240884432
bardata =bardata.dropna().reset_index(drop = True)
return tickdata,bardata
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
def orderflow_df_new(df_tick,df_min):
df_of=pd.DataFrame({})
t1 = time.time()
startArray = pd.to_datetime(df_min['starttime']).values
voluememin= df_min['volume'].values
highs=df_min['high'].values
lows=df_min['low'].values
opens=df_min['open'].values
closes=df_min['close'].values
endArray = pd.to_datetime(df_min['datetime']).values
tTickArray = pd.to_datetime(df_tick['datetime']).values
bp1TickArray = df_tick['bid_p'].values
ap1TickArray = df_tick['ask_p'].values
lastTickArray = df_tick['lastprice'].values
volumeTickArray = df_tick['volume'].values
symbolarray = df_tick['symbol'].values
indexFinal = 0
for index,tEnd in enumerate(endArray):
start = startArray[index]
bidDict = {}
askDict = {}
bar_vol=voluememin[index]
bar_close=closes[index]
bar_open=opens[index]
bar_low=lows[index]
bar_high=highs[index]
bar_symbol=symbolarray[index]
dt=endArray[index]
for indexTick in range(indexFinal,len(df_tick)):
if tTickArray[indexTick] > tEnd:
break
elif (tTickArray[indexTick] >= start) & (tTickArray[indexTick] <= tEnd):
if indexTick==0:
Bp = round(bp1TickArray[indexTick],2)
Ap = round(ap1TickArray[indexTick],2)
else:
Bp = round(bp1TickArray[indexTick - 1],2)
Ap = round(ap1TickArray[indexTick - 1],2)
LastPrice = round(lastTickArray[indexTick],2)
Volume = volumeTickArray[indexTick]
if LastPrice >= Ap:
if LastPrice in askDict.keys():
askDict[LastPrice] += Volume
else:
askDict[LastPrice] = Volume
if LastPrice <= Bp:
if LastPrice in bidDict.keys():
bidDict[LastPrice] += Volume
else:
bidDict[LastPrice] = Volume
indexFinal = indexTick
bidDictResult,askDictResult = process(bidDict,askDict)
bidDictResult=dict(sorted(bidDictResult.items(),key=operator.itemgetter(0)))
askDictResult=dict(sorted(askDictResult.items(),key=operator.itemgetter(0)))
prinslist=list(bidDictResult.keys())
asklist=list(askDictResult.values())
bidlist=list(bidDictResult.values())
delta=(sum(askDictResult.values()) - sum(bidDictResult.values()))
df=pd.DataFrame({'price':pd.Series([prinslist]),'Ask':pd.Series([asklist]),'Bid':pd.Series([bidlist])})
df['symbol']=bar_symbol
df['datetime']=dt
df['delta']=str(delta)
df['close']=bar_close
df['open']=bar_open
df['high']=bar_high
df['low']=bar_low
df['volume']=bar_vol
# 过滤'volume'列小于等于0的行
df = df[df['volume'] > 0]
# 重新排序DataFrame按照'datetime'列进行升序排序
df = df.sort_values(by='datetime', ascending=True)
# 重新设置索引,以便索引能够正确对齐
df = df.reset_index(drop=True)
#df['ticktime']=tTickArray[indexTick]
df['dj']=GetOrderFlow_dj(df)
#print(df)
df_of = pd.concat([df_of, df], ignore_index=True)
print(time.time() - t1)
return df_of
def GetOrderFlow_dj(kData):
itemAskBG=['rgb(0,255,255)', 'rgb(255,0,255)', "rgb(255,182,193)"] # 买盘背景色
itemBidBG=['rgb(173,255,47)', 'rgb(255,127,80)', "rgb(32,178,170)"] # 卖盘背景色
Config={
'Value1':3,
'Value2':3,
'Value3':3,
'Value4':True,
}
aryData=kData
djcout=0
for index,row in aryData.iterrows():
kItem=aryData.iloc[index]
high=kItem['high']
low=kItem['low']
close=kItem['close']
open=kItem['open']
dtime=kItem['datetime']
price_s=kItem['price']
Ask_s=kItem['Ask']
Bid_s=kItem['Bid']
delta=kItem['delta']
price_s=price_s
Ask_s=Ask_s
Bid_s=Bid_s
gj=0
xq=0
gxx=0
xxx=0
for i in np.arange (0, len(price_s),1) :
duiji={
'price':0,
'time':0,
'longshort':0,
'cout':0,
'color':'blue'
}
if i==0 :
delta=delta
order= {
"Price":price_s[i],
"Bid":{ "Value":Bid_s[i]},
"Ask":{ "Value":Ask_s[i]}
}
if i>=0 and i<len(price_s)-1:
if (order["Bid"]["Value"]>Ask_s[i+1]*int(Config['Value1'])):
order["Bid"]["Color"]=itemAskBG[1]
gxx+=1
gj+=1
if gj>=int(Config['Value2']) and Config['Value4']==True:
duiji['price']=price_s[i]
duiji['time']=dtime
duiji['longshort']=-1
duiji['cout']=gj
duiji['color']='rgba(0,139,0,0.45)'#绿色
if float(duiji['price'])>0:
djcout+=-1
else :
gj=0
if i>=1 and i<len(price_s)-1:
if (order["Ask"]["Value"]>Bid_s[i-1]*int(Config['Value1'])):
xq+=1
xxx+=1
order["Ask"]["Color"]=itemBidBG[1]
if xq>=int(Config['Value2']) and Config['Value4']==True:
duiji['price']=price_s[i]
duiji['time']=dtime
duiji['longshort']=1
duiji['cout']=xq
duiji['color']='rgba(255,0,0,0.45)' #红色
if float(duiji['price'])>0:
djcout+=1
else :
xq=0
return djcout
def back_data(df):
# 创建新的DataFrame并填充需要的列
new_df = pd.DataFrame()
new_df['datetime'] = pd.to_datetime(df['datetime'], format='%Y/%m/%d %H:%M')
new_df['close'] = df['close']
new_df['open'] = df['open']
new_df['high'] = df['high']
new_df['low'] = df['low']
new_df['volume'] = df['volume']
new_df['sig'] = df['dj']
new_df['symbol'] = df['symbol']
new_df['delta'] = df['delta']
new_df.to_csv(f'./tick生成的OF数据/back_ofdata_dj.csv',index=False)
#new_df.to_csv(f'{sym}back_ofdata_dj.csv',index=False)
if __name__ == "__main__":
#公众号松鼠Quant
#主页www.quant789.com
#本策略仅作学习交流使用,实盘交易盈亏投资者个人负责!!!
#版权归松鼠Quant所有禁止转发、转卖源码违者必究。
data=pd.read_csv('rb主力连续_20230103.csv',encoding='GBK',parse_dates=['业务日期','最后修改时间']) #
print(data)
tick,bar=dataload(data)
ofdata = orderflow_df_new(tick,bar)
print(ofdata)
#保存orderflow数据
# os.mkdir('./tick生成的OF数据')或者在to_csv中修改生成的文件名
folder_path = "tick生成的OF数据"
if not os.path.exists(folder_path):
os.mkdir('tick生成的OF数据')
ofdata.to_csv('./tick生成的OF数据/ofdata_dj.csv')
#保存回测数据
back_data(ofdata)
print('done')