import pandas as pd import os from datetime import time as s_time from datetime import datetime import chardet import numpy as np # 日盘商品期货交易品种 commodity_day_dict = {'bb': s_time(15,00), 'jd': s_time(15,00), 'lh': s_time(15,00), 'l': s_time(15,00), 'fb': s_time(15,00), 'ec': s_time(15,00), 'AP': s_time(15,00), 'CJ': s_time(15,00), 'JR': s_time(15,00), 'LR': s_time(15,00), 'RS': s_time(15,00), 'PK': s_time(15,00), 'PM': s_time(15,00), 'PX': s_time(15,00), 'RI': s_time(15,00), 'SF': s_time(15,00), 'SM': s_time(15,00), 'UR': s_time(15,00), 'WH': s_time(15,00), 'ao': s_time(15,00), 'br': s_time(15,00), 'wr': s_time(15,00),} # 夜盘商品期货交易品种 commodity_night_dict = {'sc': s_time(2,30), 'bc': s_time(1,0), 'lu': s_time(23,0), 'nr': s_time(23,0),'au': s_time(2,30), 'ag': s_time(2,30), 'ss': s_time(1,0), 'sn': s_time(1,0), 'ni': s_time(1,0), 'pb': s_time(1,0),'zn': s_time(1,0), 'al': s_time(1,0), 'cu': s_time(1,0), 'ru': s_time(23,0), 'rb': s_time(23,0), 'hc': s_time(23,0), 'fu': s_time(23,0), 'bu': s_time(23,0), 'sp': s_time(23,0), 'PF': s_time(23,0), 'SR': s_time(23,0), 'CF': s_time(23,0), 'CY': s_time(23,0), 'RM': s_time(23,0), 'MA': s_time(23,0), 'TA': s_time(23,0), 'ZC': s_time(23,0), 'FG': s_time(23,0), 'OI': s_time(23,0), 'SA': s_time(23,0), 'p': s_time(23,0), 'j': s_time(23,0), 'jm': s_time(23,0), 'i': s_time(23,0), 'l': s_time(23,0), 'v': s_time(23,0), 'pp': s_time(23,0), 'eg': s_time(23,0), 'c': s_time(23,0), 'cs': s_time(23,0), 'y': s_time(23,0), 'm': s_time(23,0), 'a': s_time(23,0), 'b': s_time(23,0), 'rr': s_time(23,0), 'eb': s_time(23,0), 'pg': s_time(23,0), 'SH': s_time(23,00)} # 金融期货交易品种 financial_time_dict = {'IH': s_time(15,00), 'IF': s_time(15,00), 'IC': s_time(15,00), 'IM': s_time(15,00),'T': s_time(15,15), 'TS': s_time(15,15), 'TF': s_time(15,15), 'TL': s_time(15,15)} # 所有已列入的筛选品种 all_dict = {k: v for d in [commodity_day_dict, commodity_night_dict, financial_time_dict] for k, v in d.items()} def split_alpha_numeric(string): alpha_chars = "" numeric_chars = "" for char in string: if char.isalpha(): alpha_chars += char elif char.isdigit(): numeric_chars += char return alpha_chars, numeric_chars def merged_new_tickdata(merged_up_df, alpha_chars): merged_up_df['datetime'] = merged_up_df['交易日'].astype(str) + ' '+merged_up_df['最后修改时间'].astype(str) + '.' + merged_up_df['最后修改毫秒'].astype(str) # merged_df['最后修改时间'].dt.time.astype(str) # 将'datetime' 列的数据类型更改为 datetime 格式,如果数据转换少8个小时,可以用timedelta处理 merged_up_df['datetime'] = pd.to_datetime(merged_up_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f') #计算瞬时成交量 merged_up_df['volume'] = merged_up_df['数量'] - merged_up_df['数量'].shift(1) merged_up_df['volume'] = merged_up_df['volume'].fillna(0) merged_df = pd.DataFrame() merged_df =pd.DataFrame({'main_contract':merged_up_df['统一代码'],'symbol':merged_up_df['合约代码'],'datetime':merged_up_df['datetime'],'lastprice':merged_up_df['最新价'],'volume':merged_up_df['数量'], 'bid_p':merged_up_df['申买价一'],'ask_p':merged_up_df['申卖价一'],'bid_v':merged_up_df['申买量一'],'ask_v':merged_up_df['申卖量一']}) del merged_up_df # merged_df['datetime'] = pd.to_datetime(merged_df['datetime']) merged_df['tmp_time'] = merged_df['datetime'].dt.strftime('%H:%M:%S.%f') merged_df['time'] = merged_df['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time del merged_df['tmp_time'] merged_df = filter_tickdata_time(merged_df, alpha_chars) del merged_df['time'] # merged_df['datetime'] = sorted(merged_df['datetime']) sorted_merged_df = merged_df.sort_values(by = ['datetime'], inplace=True) # print("%s%s数据生成成功!"%(code_value,sp_char)) return merged_df def filter_tickdata_time(filter_df, alpha_chars): # 由于落到本地的时间有延迟,建议结束时间延迟1秒。 if alpha_chars in financial_time_dict.keys(): drop_index1 = pd.DataFrame().index drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 500000)) & (filter_df['time'] < s_time(13, 0, 0, 000000))].index if alpha_chars in ['IH', 'IF', 'IC', 'IM']: drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 500000)) | (filter_df['time'] < s_time(9, 30, 0, 000000))].index print("按照中金所股指期货交易时间筛选金融期货品种") else: drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 15, 0, 500000)) | (filter_df['time'] < s_time(9, 30, 0, 000000))].index print("按照中金所国债期货交易时间筛选金融期货品种") drop_index4 = pd.DataFrame().index print("按照中金所交易时间筛选金融期货品种") elif alpha_chars in commodity_night_dict.keys(): if commodity_night_dict[alpha_chars] == s_time(23,00): drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index drop_index4 = filter_df.loc[(filter_df['time'] > s_time(23, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 0, 0, 000000))].index print("按照夜盘截止交易时间为23:00筛选商品期货品种") elif commodity_night_dict[alpha_chars] == s_time(1,00): drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index drop_index4 = filter_df.loc[(filter_df['time'] > s_time(1, 0, 0, 000000)) & (filter_df['time'] < s_time(9, 0, 0, 000000))].index print("按照夜盘截止交易时间为1:00筛选商品期货品种") elif commodity_night_dict[alpha_chars] == s_time(2,30): drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index drop_index4 = filter_df.loc[(filter_df['time'] > s_time(2, 30, 0, 000000)) & (filter_df['time'] < s_time(9, 0, 0, 000000))].index print("按照夜盘截止交易时间为2:30筛选商品期货品种") else: print("夜盘截止交易时间未设置或者设置错误!!!") elif alpha_chars in commodity_day_dict.keys(): drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 0, 0, 000000))].index drop_index4 = pd.DataFrame().index print("按照无夜盘筛选商品期货品种") else: print("%s期货品种未执行时间筛选中!!!"%(alpha_chars)) # 清理不在交易时间段的数据 # 数据清理 filter_df.drop(labels=drop_index1, axis=0, inplace=True) filter_df.drop(drop_index2, axis=0, inplace=True) filter_df.drop(drop_index3, axis=0, inplace=True) filter_df.drop(drop_index4, axis=0, inplace=True) return filter_df def insert_main_contract(df): # 添加主力连续的合约代码,主力连续为888,指数连续可以用999,次主力连续可以使用889,表头用“统一代码” alpha_chars, numeric_chars = split_alpha_numeric(df.loc[0,'合约代码']) code_value = alpha_chars + "889" print("code_value characters:", code_value) df.insert(loc=0,column="统一代码", value=code_value) return df, alpha_chars, code_value def reinstatement_tickdata(merged_rs_df): merged_rs_df['main_contract'] = merged_rs_df['main_contract'].astype(str) merged_rs_df['symbol'] = merged_rs_df['symbol'].astype(str) merged_rs_df['datetime'] = pd.to_datetime(merged_rs_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f') # merged_rs_df['lastprice'] = merged_rs_df['lastprice'].astype(float) merged_rs_df['volume'] = merged_rs_df['volume'].astype(int) # merged_rs_df['bid_p'] = merged_rs_df['bid_p'].astype(float) # merged_rs_df['ask_p'] = merged_rs_df['ask_p'].astype(float) merged_rs_df['bid_v'] = merged_rs_df['bid_v'].astype(int) merged_rs_df['ask_v'] = merged_rs_df['ask_v'].astype(int) # 等比复权,先不考虑 # df['复权因子'] = df['卖一价'].shift() / df['买一价'] # df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() / df['买一价'], 1) # df['复权因子'] = df['复权因子'].fillna(1) # df['买一价_adj'] = df['买一价'] * df['复权因子'].cumprod() # df['卖一价_adj'] = df['卖一价'] * df['复权因子'].cumprod() # df['最新_adj'] = df['最新'] * df['复权因子'].cumprod() # 等差复权 merged_rs_df['复权因子'] = np.where(merged_rs_df['symbol'] != merged_rs_df['symbol'].shift(), merged_rs_df['ask_p'].shift() - merged_rs_df['bid_p'], 0) merged_rs_df['复权因子'] = merged_rs_df['复权因子'].fillna(0) merged_rs_df['bid_p_adj'] = merged_rs_df['bid_p'] + merged_rs_df['复权因子'].cumsum() merged_rs_df['ask_p_adj'] = merged_rs_df['ask_p'] + merged_rs_df['复权因子'].cumsum() merged_rs_df['lastprice_adj'] = merged_rs_df['lastprice'] + merged_rs_df['复权因子'].cumsum() # 将调整后的数值替换原来的值 merged_rs_df['bid_p'] = merged_rs_df['bid_p_adj'].round(4) merged_rs_df['ask_p'] = merged_rs_df['ask_p_adj'].round(4) merged_rs_df['lastprice'] = merged_rs_df['lastprice_adj'].round(4) # 删除多余的值 del merged_rs_df['复权因子'] del merged_rs_df['bid_p_adj'] del merged_rs_df['ask_p_adj'] del merged_rs_df['lastprice_adj'] return merged_rs_df