Enhance trading workflow with new order flow management
- Added dingdanliu_nb_mflow for improved order processing - Updated related scripts and configurations to support new functionality
This commit is contained in:
698
2.数据下载与处理/Tushare_get_data.ipynb
Normal file
698
2.数据下载与处理/Tushare_get_data.ipynb
Normal file
@@ -0,0 +1,698 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ModuleNotFoundError",
|
||||
"evalue": "No module named 'tushare'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[1;32mIn[3], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtushare\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mts\u001b[39;00m\n\u001b[0;32m 2\u001b[0m ts\u001b[38;5;241m.\u001b[39mset_token(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m78282dabb315ee578fb73a9b328f493026e97d5af709acb331b7b348\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 3\u001b[0m pro \u001b[38;5;241m=\u001b[39m ts\u001b[38;5;241m.\u001b[39mpro_api()\n",
|
||||
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'tushare'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import tushare as ts\n",
|
||||
"ts.set_token('78282dabb315ee578fb73a9b328f493026e97d5af709acb331b7b348')\n",
|
||||
"pro = ts.pro_api()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"300\n",
|
||||
"<class 'int'>\n",
|
||||
"0.15\n",
|
||||
"<class 'float'>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"import pandas as pd\n",
|
||||
"fees_df = pd.read_csv('./futures_fees_info.csv', usecols= [1, 4, 17, 19, 25],names=['合约', '合约乘数', '做多保证金率', '做空保证金率', '品种代码'])\n",
|
||||
"data0 = int(fees_df[fees_df['合约'] == 'IH2407']['合约乘数'].iloc[0])\n",
|
||||
"\n",
|
||||
"print(data0)\n",
|
||||
"print(type(data0))\n",
|
||||
"data1 = float(fees_df[fees_df['合约'] == 'IH2407']['做多保证金率'].iloc[0])\n",
|
||||
"print(data1)\n",
|
||||
"print(type(data1))\n",
|
||||
"# fees_df[fees_df['合约'] == 'IH2407']['做空保证金率'].iloc[0]\n",
|
||||
"# (fees_df[fees_df['合约'] == 'IH2407']['做多保证金率'].iloc[0] + fees_df[fees_df['合约'] == 'IH2407']['做空保证金率'].iloc[0])/2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"import pandas as pd\n",
|
||||
"fees_df = pd.read_csv('./futures_fees_info.csv', usecols= [1, 4, 17, 19],names=['合约', '合约乘数', '做多保证金率', '做空保证金率'])\n",
|
||||
"contacts_df = pd.read_csv('./main_contacts.csv', usecols= [16, 17],names=['主连代码', '品种代码'])\n",
|
||||
"\n",
|
||||
"def get_main_contact_on_time(main_symbol_code):\n",
|
||||
" data_str = ''\n",
|
||||
" alpha_chars = ''\n",
|
||||
" numeric_chars = ''\n",
|
||||
" main_code = ''\n",
|
||||
"\n",
|
||||
" # main_symbol = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[0]\n",
|
||||
" # exchange_id = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[1]\n",
|
||||
" main_symbol = contacts_df[contacts_df['品种代码'] == main_symbol_code]['主连代码'].iloc[0]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" # # 拆分主连合约代码为交易标识和交易所代码(交易市场)\n",
|
||||
" # main_symbol = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[0]\n",
|
||||
" # exchange_id = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[1]\n",
|
||||
"\n",
|
||||
" # # 拆分交易标识中的合约产品代码和交割月份\n",
|
||||
" # for char in main_symbol:\n",
|
||||
" # if char.isalpha():\n",
|
||||
" # alpha_chars += char\n",
|
||||
" # elif char.isdigit():\n",
|
||||
" # numeric_chars += char\n",
|
||||
" \n",
|
||||
" # # 监理交易所映射\n",
|
||||
" # exchange = {'CFX': 'CFFEX', 'SHF':'SHFE', 'DCE':'DCE', 'GFE':'GFEX', 'INE':'INE', 'ZCE':'CZCE'}\n",
|
||||
"\n",
|
||||
" # # 计算per_unit:交易单位(每手)和转换后交易所识别的main_code:主连代码\n",
|
||||
" # if exchange_id == 'CFX' or exchange_id == 'SHF' or exchange_id == 'DCE' or exchange_id == 'GFE' or exchange_id == 'INE':\n",
|
||||
" # df = pro.fut_basic(exchange = exchange[exchange_id], fut_type='1', fut_code = alpha_chars, fields='ts_code,symbol,exchange,name,per_unit')\n",
|
||||
" # # ts_code = df[df['symbol'] == main_symbol]['ts_code'].iloc[0]\n",
|
||||
" # per_unit = df[df['symbol'] == main_symbol]['per_unit'].iloc[0]\n",
|
||||
"\n",
|
||||
" # # ds = pro.fut_settle(trade_date = data_str, ts_code =ts_code)\n",
|
||||
" # # ds['margin_rate'] = (ds['long_margin_rate'] + ds['short_margin_rate'])/2\n",
|
||||
" # # margin_rate = ds['margin_rate'].iloc[0]\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" # if exchange_id == 'CFX':\n",
|
||||
" # main_code = main_symbol\n",
|
||||
" # elif exchange_id == 'SHF' or exchange_id == 'DCE' or exchange_id == 'GFE' or exchange_id == 'INE':\n",
|
||||
" # lower_alpha_chars = str.lower(alpha_chars) \n",
|
||||
" # main_code = lower_alpha_chars + numeric_chars\n",
|
||||
" # elif exchange_id == 'ZCE':\n",
|
||||
" # true_numeric_chars = numeric_chars[1:]\n",
|
||||
" # main_code = alpha_chars + true_numeric_chars \n",
|
||||
" # df = pro.fut_basic(exchange = exchange[exchange_id], fut_type='1', fut_code = alpha_chars, fields='ts_code,symbol,exchange,name,per_unit')\n",
|
||||
" # per_unit = df[df['symbol'] == main_code]['per_unit'].iloc[0]\n",
|
||||
" # main_code = alpha_chars + true_numeric_chars\n",
|
||||
"\n",
|
||||
" # print(\"最终使用的主连代码:\",main_code) \n",
|
||||
" # print(\"%s的交易单位(每手):%s\"%(main_symbol, per_unit))\n",
|
||||
" return main_symbol\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'IH2407'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"get_main_contact_on_time('IH')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"def get_main_contact_on_time(main_symbol_code):\n",
|
||||
" data_str = ''\n",
|
||||
" alpha_chars = ''\n",
|
||||
" numeric_chars = ''\n",
|
||||
" main_code = ''\n",
|
||||
"\n",
|
||||
" # 获取主连合约代码,如果是当天15点前日盘,则获取前一天的合约代码,如果是当天15点后晚盘,则获取今天的的合约代码\n",
|
||||
" now = datetime.now()\n",
|
||||
" if now.hour < 15:\n",
|
||||
" data_str = (now - timedelta(days=1)).date().strftime('%Y%m%d')\n",
|
||||
" else:\n",
|
||||
" data_str = now.date().strftime('%Y%m%d')\n",
|
||||
"\n",
|
||||
" # 拆分主连合约代码为交易标识和交易所代码(交易市场)\n",
|
||||
" main_symbol = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[0]\n",
|
||||
" exchange_id = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[1]\n",
|
||||
"\n",
|
||||
" # 拆分交易标识中的合约产品代码和交割月份\n",
|
||||
" for char in main_symbol:\n",
|
||||
" if char.isalpha():\n",
|
||||
" alpha_chars += char\n",
|
||||
" elif char.isdigit():\n",
|
||||
" numeric_chars += char\n",
|
||||
" \n",
|
||||
" # 监理交易所映射\n",
|
||||
" exchange = {'CFX': 'CFFEX', 'SHF':'SHFE', 'DCE':'DCE', 'GFE':'GFEX', 'INE':'INE', 'ZCE':'CZCE'}\n",
|
||||
"\n",
|
||||
" # 计算per_unit:交易单位(每手)和转换后交易所识别的main_code:主连代码\n",
|
||||
" if exchange_id == 'CFX' or exchange_id == 'SHF' or exchange_id == 'DCE' or exchange_id == 'GFE' or exchange_id == 'INE':\n",
|
||||
" df = pro.fut_basic(exchange = exchange[exchange_id], fut_type='1', fut_code = alpha_chars, fields='ts_code,symbol,exchange,name,per_unit')\n",
|
||||
" # ts_code = df[df['symbol'] == main_symbol]['ts_code'].iloc[0]\n",
|
||||
" per_unit = df[df['symbol'] == main_symbol]['per_unit'].iloc[0]\n",
|
||||
"\n",
|
||||
" # ds = pro.fut_settle(trade_date = data_str, ts_code =ts_code)\n",
|
||||
" # ds['margin_rate'] = (ds['long_margin_rate'] + ds['short_margin_rate'])/2\n",
|
||||
" # margin_rate = ds['margin_rate'].iloc[0]\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" if exchange_id == 'CFX':\n",
|
||||
" main_code = main_symbol\n",
|
||||
" elif exchange_id == 'SHF' or exchange_id == 'DCE' or exchange_id == 'GFE' or exchange_id == 'INE':\n",
|
||||
" lower_alpha_chars = str.lower(alpha_chars) \n",
|
||||
" main_code = lower_alpha_chars + numeric_chars\n",
|
||||
" elif exchange_id == 'ZCE':\n",
|
||||
" true_numeric_chars = numeric_chars[1:]\n",
|
||||
" main_code = alpha_chars + true_numeric_chars \n",
|
||||
" df = pro.fut_basic(exchange = exchange[exchange_id], fut_type='1', fut_code = alpha_chars, fields='ts_code,symbol,exchange,name,per_unit')\n",
|
||||
" per_unit = df[df['symbol'] == main_code]['per_unit'].iloc[0]\n",
|
||||
" main_code = alpha_chars + true_numeric_chars\n",
|
||||
"\n",
|
||||
" print(\"最终使用的主连代码:\",main_code) \n",
|
||||
" print(\"%s的交易单位(每手):%s\"%(main_symbol, per_unit))\n",
|
||||
" return main_code\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sb_1 = get_main_contact_on_time('IH.CFX')\n",
|
||||
"sb_2 = get_main_contact_on_time('cu.SHF')\n",
|
||||
"sb_3 = get_main_contact_on_time('eb.DCE')\n",
|
||||
"sb_4 = get_main_contact_on_time('si.GFE')\n",
|
||||
"sb_5 = get_main_contact_on_time('sc.INE') \n",
|
||||
"sb_6 = get_main_contact_on_time('SA.ZCE')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# df = pro.fut_basic(exchange='DCE', fut_type='1',fut_code = 'j' , fields='ts_code,symbol,exchange,name,fut_code,multiplier,trade_unit,per_unit,quote_unit,quote_unit_desc,d_mode_desc,list_date,delist_date,d_month,last_ddate,trade_time_desc')\n",
|
||||
"# df = pro.fut_basic(exchange='SHFE', fut_type='1', fut_code = 'au', fields='ts_code,symbol,name,list_date,delist_date')\n",
|
||||
"df = pro.fut_basic(exchange='CZCE', fut_type='1', fut_code = 'SA', fields='ts_code,symbol,exchange,name,fut_code,per_unit')\n",
|
||||
"# index_of_value = df.index[df['symbol'] == 'AU2408']\n",
|
||||
"df.head()\n",
|
||||
"value = df[df['symbol'] == 'SA409']['per_unit'].iloc[0]\n",
|
||||
"print(value)\n",
|
||||
"# df.loc[index_of_value, 'per_unit'].value\n",
|
||||
"# df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pro.fut_mapping(ts_code='SA.ZCE')\n",
|
||||
"print(df)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# ds = pro.fut_settle(trade_date = '20240625', ts_code ='SA2409.ZCE')\n",
|
||||
"# ds = pro.fut_settle(trade_date='20230625', exchange='ZCE')\n",
|
||||
"# ds = pro.fut_settle(ts_code='SA409.ZCE', exchange='CZCE')\n",
|
||||
"# pro.fut_settle(trade_date='20181114', exchange='CZCE')\n",
|
||||
"pro.fut_settle(ts_code='AP2510.ZCE', exchange='CZCE')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ds.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ds['margin_rate'] = round((ds['long_margin_rate'] + ds['short_margin_rate'])/2,2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ds['margin_rate'].iloc[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"value = df.loc[index_of_value, 'per_unit'].iloc[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(value)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_IH = pro.fut_mapping(ts_code='IH.CFX')\n",
|
||||
"print(df_IH)\n",
|
||||
"df_IH.to_csv(r\"E:\\data\\mapping_ts_code_IH.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_IF = pro.fut_mapping(ts_code='IF.CFX')\n",
|
||||
"print(df_IF)\n",
|
||||
"df_IF.to_csv(r\"E:\\data\\mapping_ts_code_IF.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_IC = pro.fut_mapping(ts_code='IC.CFX')\n",
|
||||
"print(df_IC)\n",
|
||||
"df_IC.to_csv(r\"E:\\data\\mapping_ts_code_IC.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_IM = pro.fut_mapping(ts_code='IM.CFX')\n",
|
||||
"print(df_IM)\n",
|
||||
"df_IM.to_csv(r\"E:\\data\\mapping_ts_code_IM.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_TF = pro.fut_mapping(ts_code='TF.CFX')\n",
|
||||
"print(df_TF)\n",
|
||||
"df_TF.to_csv(r\"E:\\data\\mapping_ts_code_TF.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_T = pro.fut_mapping(ts_code='T.CFX')\n",
|
||||
"print(df_T)\n",
|
||||
"df_T.to_csv(r\"E:\\data\\mapping_ts_code_T.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_TS = pro.fut_mapping(ts_code='TS.CFX')\n",
|
||||
"print(df_TS)\n",
|
||||
"df_TS.to_csv(r\"E:\\data\\mapping_ts_code_TS.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_TL = pro.fut_mapping(TL_code='TL.CFX')\n",
|
||||
"print(df_TL)\n",
|
||||
"df_TL.to_csv(r\"E:\\data\\mapping_TL_code_TL.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "NameError",
|
||||
"evalue": "name 'pro' is not defined",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[1;32mIn[1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m df_TL \u001b[38;5;241m=\u001b[39m \u001b[43mpro\u001b[49m\u001b[38;5;241m.\u001b[39mfut_mapping(ts_code\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTL.CFX\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(df_TL)\n\u001b[0;32m 3\u001b[0m df_TL\u001b[38;5;241m.\u001b[39mto_csv(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mD:\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mmapping_TL_code_TL.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
||||
"\u001b[1;31mNameError\u001b[0m: name 'pro' is not defined"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_TL = pro.fut_mapping(ts_code='TL.CFX')\n",
|
||||
"print(df_TL)\n",
|
||||
"df_TL.to_csv(r\"D:\\data\\mapping_TL_code_TL.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import akshare as ak\n",
|
||||
"\n",
|
||||
"futures_comm_info_df = ak.futures_comm_info(symbol=\"上海国际能源交易中心\")\n",
|
||||
"print(futures_comm_info_df[\"保证金-买开\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"futures_fees_info_df = ak.futures_fees_info()\n",
|
||||
"print(futures_fees_info_df)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"futures_fees_info_df.to_csv(r'./futures_fees_info.csv', index=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"futures_display_main_sina_df = ak.futures_display_main_sina()\n",
|
||||
"print(futures_display_main_sina_df)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"url = 'https://www.9qihuo.com/hangqing' #上期所铜结算参数地址https://www.9qihuo.com/hangqing\n",
|
||||
"data =pd.read_html(url) #读取网页上的表格\n",
|
||||
"dt=data[4].drop([0],axis=0).append(data[5],ignore_index=True) #提取结算参数到DataFrame格式\n",
|
||||
"#调整格式\n",
|
||||
"dt.columns=dt.iloc[0]\n",
|
||||
"dt.drop([0],axis=0,inplace=True) \n",
|
||||
"dt.set_index('合约代码',inplace=True)\n",
|
||||
"print(dt) #输出铜的结算参数"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"import csv\n",
|
||||
"\n",
|
||||
"# 目标网址\n",
|
||||
"url = \"https://www.9qihuo.com/hangqing\"\n",
|
||||
"\n",
|
||||
"# 发送GET请求,禁用SSL验证\n",
|
||||
"response = requests.get(url, verify=False)\n",
|
||||
"response.encoding = 'utf-8' # 确保编码正确\n",
|
||||
"\n",
|
||||
"# 解析网页内容\n",
|
||||
"soup = BeautifulSoup(response.text, 'lxml')\n",
|
||||
"\n",
|
||||
"# 找到目标表格\n",
|
||||
"table = soup.find('table', {'id': 'tblhangqinglist'})\n",
|
||||
"\n",
|
||||
"# 初始化CSV文件\n",
|
||||
"with open('main_contacts.csv', mode='w', newline='', encoding='utf-8') as file:\n",
|
||||
" writer = csv.writer(file)\n",
|
||||
" \n",
|
||||
" # 遍历表格的所有行\n",
|
||||
" for row in table.find_all('tr'):\n",
|
||||
" # 获取每一行的所有单元格\n",
|
||||
" cols = row.find_all(['th', 'td'])\n",
|
||||
" # 提取文本内容并写入CSV文件\n",
|
||||
" writer.writerow([col.text.strip() for col in cols])\n",
|
||||
"\n",
|
||||
"print(\"表格已成功保存为main_contacts.csv\")\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"df = pd.read_csv('./main_contacts.csv')\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df['交易品种'] = df['合约'].str.split(r'[()]', n=1, expand=True)[0]\n",
|
||||
"df['主连代码'] = df['合约'].str.split(r'[()]', n=2, expand=True)[1]\n",
|
||||
"\n",
|
||||
"# df['品种代码'] = df['主连代码'].str.split(str.isalpha(df['主连代码']), n=1, expand=True)[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"# 创建示例DataFrame\n",
|
||||
"\n",
|
||||
"# 定义拆分字母和数字的函数\n",
|
||||
"def split_alpha_numeric(s):\n",
|
||||
" match = re.match(r\"([a-zA-Z]+)([0-9]+)\", s)\n",
|
||||
" if match:\n",
|
||||
" return match.groups()\n",
|
||||
" else:\n",
|
||||
" return (s, None) # 如果没有匹配,返回原始字符串和None\n",
|
||||
"\n",
|
||||
"# 应用函数并创建新列\n",
|
||||
"df[['品种代码', '交割月份']] = df['主连代码'].apply(lambda x: pd.Series(split_alpha_numeric(x)))\n",
|
||||
"\n",
|
||||
"print(df)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.to_csv('./main_contacts_all.csv')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import subprocess\n",
|
||||
"import schedule\n",
|
||||
"import time\n",
|
||||
"from datetime import datetime\n",
|
||||
"\n",
|
||||
"# jerome:增加akshare库\n",
|
||||
"import akshare as ak\n",
|
||||
"\n",
|
||||
"# jerome:增加下列库用于爬虫获取主力连续代码\n",
|
||||
"import pandas as pd\n",
|
||||
"import requests\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"import csv\n",
|
||||
"import re\n",
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_futures_fees_info():\n",
|
||||
" futures_fees_info_df = ak.futures_fees_info()\n",
|
||||
" futures_fees_info_df.to_csv(r'./futures_fees_info.csv', index=False)\n",
|
||||
"\n",
|
||||
"def get_main_contacts():\n",
|
||||
" url = \"https://www.9qihuo.com/hangqing\"\n",
|
||||
"\n",
|
||||
" # 发送GET请求,禁用SSL验证\n",
|
||||
" response = requests.get(url, verify=False)\n",
|
||||
" response.encoding = 'utf-8' # 确保编码正确\n",
|
||||
"\n",
|
||||
" # 解析网页内容\n",
|
||||
" soup = BeautifulSoup(response.text, 'lxml')\n",
|
||||
"\n",
|
||||
" # 找到目标表格\n",
|
||||
" table = soup.find('table', {'id': 'tblhangqinglist'})\n",
|
||||
"\n",
|
||||
" # 初始化CSV文件\n",
|
||||
" with open('tmp_main_contacts.csv', mode='w', newline='', encoding='utf-8') as file:\n",
|
||||
" writer = csv.writer(file)\n",
|
||||
" \n",
|
||||
" # 遍历表格的所有行\n",
|
||||
" for row in table.find_all('tr'):\n",
|
||||
" # 获取每一行的所有单元格\n",
|
||||
" cols = row.find_all(['th', 'td'])\n",
|
||||
" # 提取文本内容并写入CSV文件\n",
|
||||
" writer.writerow([col.text.strip() for col in cols])\n",
|
||||
"\n",
|
||||
" df = pd.read_csv('./tmp_main_contacts.csv',encoding='utf-8')\n",
|
||||
" df['交易品种'] = df['合约'].str.split(r'[()]', n=1, expand=True)[0]\n",
|
||||
" df['主连代码'] = df['合约'].str.split(r'[()]', n=2, expand=True)[1]\n",
|
||||
"\n",
|
||||
" df[['品种代码', '交割月份']] = df['主连代码'].apply(lambda x: pd.Series(split_alpha_numeric(x)))\n",
|
||||
" df.to_csv('./main_contacts.csv')\n",
|
||||
"\n",
|
||||
" print(\"期货主力品种表已经保存为main_contacts.csv\")\n",
|
||||
" os.remove(\"./tmp_main_contacts.csv\")\n",
|
||||
"\n",
|
||||
"# 拆分字母和数字的函数\n",
|
||||
"def split_alpha_numeric(s):\n",
|
||||
" match = re.match(r\"([a-zA-Z]+)([0-9]+)\", s)\n",
|
||||
" if match:\n",
|
||||
" return match.groups()\n",
|
||||
" else:\n",
|
||||
" return (s, None) # 如果没有匹配,返回原始字符串和None"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"get_futures_fees_info()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"期货主力品种表已经保存为main_contacts.csv\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"get_main_contacts()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
180
2.数据下载与处理/download_data.ipynb
Normal file
180
2.数据下载与处理/download_data.ipynb
Normal file
@@ -0,0 +1,180 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 配置迅投研数据服务\n",
|
||||
"from vnpy.trader.setting import SETTINGS\n",
|
||||
"\n",
|
||||
"SETTINGS[\"datafeed.name\"] = \"xt\"\n",
|
||||
"SETTINGS[\"datafeed.username\"] = \"token\"\n",
|
||||
"SETTINGS[\"datafeed.password\"] = \"ef326f853a744c58572f0158d470912c38a09552\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 加载功能模块\n",
|
||||
"from datetime import datetime\n",
|
||||
"\n",
|
||||
"from vnpy.trader.datafeed import get_datafeed\n",
|
||||
"from vnpy.trader.object import HistoryRequest, Exchange, Interval\n",
|
||||
"\n",
|
||||
"from vnpy_sqlite import Database as SqliteDatabase\n",
|
||||
"#from elite_database import Database as EliteDatabase\n",
|
||||
"\n",
|
||||
"#增加\n",
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 初始化数据服务\n",
|
||||
"datafeed = get_datafeed()\n",
|
||||
"datafeed.init()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 交易所映射关系\n",
|
||||
"EXCHANGE_XT2VT = {\n",
|
||||
" \"SH\": Exchange.SSE,\n",
|
||||
" \"SZ\": Exchange.SZSE,\n",
|
||||
" \"BJ\": Exchange.BSE,\n",
|
||||
" \"SF\": Exchange.SHFE,\n",
|
||||
" \"IF\": Exchange.CFFEX,\n",
|
||||
" \"INE\": Exchange.INE,\n",
|
||||
" \"DF\": Exchange.DCE,\n",
|
||||
" \"ZF\": Exchange.CZCE,\n",
|
||||
" \"GF\": Exchange.GFEX\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"数据长度 41336\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 查询期货历史数据\n",
|
||||
"req = HistoryRequest(\n",
|
||||
" symbol=\"rb00\", # 加权指数 \n",
|
||||
" # symbol=\"IF00\", # 主力连续(未平滑)\n",
|
||||
" # exchange=Exchange.CFFEX,\n",
|
||||
" exchange = EXCHANGE_XT2VT[\"SF\"],\n",
|
||||
" start=datetime(2023, 1, 1),\n",
|
||||
" end=datetime(2023, 11, 24),#end=datetime.now(),\n",
|
||||
" interval=Interval.TICK\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"ticks = datafeed.query_tick_history(req)\n",
|
||||
"print(\"数据长度\", len(ticks))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 创建Elite数据库实例并写入数据\n",
|
||||
"#db2 = EliteDatabase()\n",
|
||||
"#db2.save_bar_data(bars)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pd.DataFrame(ticks)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 创建CSV文件并写入数据\n",
|
||||
"filepath = \"rb00_11.csv\" # CSV文件保存路径及文件名\n",
|
||||
"df.to_csv(filepath, index=False) # index参数设置为False表示不包含索引列\n",
|
||||
"#df.to_csv(filepath, mode='a', index=False, header=False) # index参数设置为False表示不包含索引列"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 读取CSV文件\n",
|
||||
"data = pd.read_csv(\"IC0.csv\")\n",
|
||||
"# 对数据进行排序\n",
|
||||
"sorted_data = data.sort_values(by='datetime')\n",
|
||||
"# 将排序结果写入CSV文件\n",
|
||||
"sorted_data.to_csv('sort_IC00.csv', index=False)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "1b43cb0bd93d5abbadd54afed8252f711d4681fe6223ad6b67ffaee289648f85"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
241
2.数据下载与处理/main_contact_merged.ipynb
Normal file
241
2.数据下载与处理/main_contact_merged.ipynb
Normal file
@@ -0,0 +1,241 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"map_file = r\"D:\\data\\mapping_ts_code_IH.csv\" #主力合约统计表\n",
|
||||
"file_path = str(\"F:/2022_tickdata/marketdatacsv\") #csv文件绝对地址前缀\n",
|
||||
"\n",
|
||||
"header_file = r\"D:\\data\\fut_marketdata_head.csv\" # 包含表头的 CSV 文件名\n",
|
||||
"# data_file = r\"D:\\combined_market_data.csv\" # 包含数据的 CSV 文件名\n",
|
||||
"output_file = r\"D:\\IH888_up_2022.csv\" # 合并后的输出文件名\n",
|
||||
"total_code = 'IH888'\n",
|
||||
"\n",
|
||||
"sp_chars = ['csv2022'] #'csv2021', 'csv2022',需要查找的主力年份文件"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pd.read_csv(map_file, index_col=0, encoding='utf', low_memory=False)\n",
|
||||
"df['mapping_ts_code_new'] = df['mapping_ts_code'].apply(lambda x: x.split('.')[0])\n",
|
||||
"df['temp_path']= file_path\n",
|
||||
"df['final_path'] = df['temp_path'].astype(str) + df['trade_date'].astype(str) + '/' + df['mapping_ts_code_new'] + '.csv'\n",
|
||||
"del df['mapping_ts_code_new'], df['temp_path']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.tail()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"import time as s_time\n",
|
||||
"import datetime\n",
|
||||
"import pandas as pd\n",
|
||||
"for sp_char in sp_chars:\n",
|
||||
" csv_files = [sp_file for sp_file in df['final_path'] if sp_char in sp_file]\n",
|
||||
" print(csv_files[:5])\n",
|
||||
" print(csv_files[-5:])\n",
|
||||
" dfs = pd.DataFrame()\n",
|
||||
" for file_path in csv_files:\n",
|
||||
" df_temp = pd.read_csv(file_path) \n",
|
||||
" print('读取%s成功'%(file_path))\n",
|
||||
" # df_temp.columns = ['交易日','合约代码','最后修改时间','最后修改毫秒','最新价','数量','申买价一','申买量一','申卖价一','申卖量一','当日均价','成交金额','持仓量','涨停价','跌停价']\n",
|
||||
" # df_temp['datetime'] = df_temp['交易日'].astype(str) + ' '+df_temp['最后修改时间'].astype(str) + '.' + df_temp['最后修改毫秒'].astype(str)\n",
|
||||
" # df_temp['datetime'] = pd.to_datetime(df_temp['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')\n",
|
||||
" # df_temp['tmp_time'] = df_temp['datetime'].dt.strftime('%H:%M:%S.%f')\n",
|
||||
" # df_temp['time'] = df_temp['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time\n",
|
||||
" # drop_index1 = df_temp.loc[(df_temp['time'] > s_time(11, 30, 0)) & (df_temp['time'] < s_time(13, 0, 0))].index\n",
|
||||
" # drop_index2 = df_temp.loc[(df_temp['time'] > s_time(15, 0, 0)) | (df_temp['time'] < s_time(9, 30, 0))].index\n",
|
||||
" # df_temp.drop(drop_index1, axis=0, inplace=True)\n",
|
||||
" # df_temp.drop(drop_index2, axis=0, inplace=True)\n",
|
||||
" # dfs.append(df_temp)\n",
|
||||
" # df_temp.columns=['交易日','合约代码','最后修改时间','最后修改毫秒','最新价','数量','申买价一','申买量一','申卖价一','申卖量一','当日均价','成交金额','持仓量','涨停价','跌停价']\n",
|
||||
" df_temp.columns = ['交易日','合约代码','最后修改时间','最后修改毫秒','最新价','数量','申买价一','申买量一','申卖价一','申卖量一','当日均价','成交金额','持仓量','涨停价','跌停价']\n",
|
||||
" # print(df_temp.tail())\n",
|
||||
" # # print(\"表头添加成功!\")\n",
|
||||
" # dfs = pd.concat([dfs, df_temp],ignore_index=True, axis= 0)# \n",
|
||||
" # print(dfs.tail())\n",
|
||||
" # dfs = pd.concat([df_temp, ignore_index=True)\n",
|
||||
" dfs = pd.concat([dfs, df_temp], ignore_index=True)\n",
|
||||
" \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dfs.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dfs.tail()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"combined_df = dfs.sort_values(by = ['交易日', '最后修改时间', '最后修改毫秒'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"combined_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"combined_df.tail()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"combined_df.insert(0,'统一代码', total_code)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"combined_df.tail()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"combined_df.to_csv(output_file, index=False)\n",
|
||||
"print(\"合并完成,并已导出到%s文件。\"%(output_file))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 以下为其他代码\n",
|
||||
"import pandas as pd\n",
|
||||
" \n",
|
||||
"try:\n",
|
||||
" file_path = 'path/to/your/file.csv' # 替换为你的文件路径\n",
|
||||
" df = pd.read_csv(file_path)\n",
|
||||
"except FileNotFoundError:\n",
|
||||
" print(f\"无法找到文件:{file_path}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import os\n",
|
||||
"for k in ['2021']:# , '2023'\n",
|
||||
" for v in [ 'IH', 'IF', 'IC', 'IM', 'T', 'TF', 'TL', 'TS']: \n",
|
||||
" print('当前年份为:%s,品种为:%s'%(k,v))\n",
|
||||
" map_file = 'D:/data/mapping_ts_code_%s.csv'%(v) #v\n",
|
||||
" file_path = 'F:/%s_tickdata/marketdatacsv'%(k) #csv文件绝对地址前缀\n",
|
||||
" output_file = 'D:/%s888_up_%s.csv'%(v,k) # 合并后的输出文件名\n",
|
||||
" total_code = '%s888'%(v)\n",
|
||||
" sp_chars = ['csv%s'%(k)] #'csv2021', 'csv2022',需要查找的主力年份文件\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" df = pd.read_csv(map_file, index_col=0, encoding='utf', low_memory=False)\n",
|
||||
" except FileNotFoundError:\n",
|
||||
" raise ValueError(\"主力合约统计表文件不存在,请检查文件路径是否正确。\")\n",
|
||||
" df['mapping_ts_code_new'] = df['mapping_ts_code'].apply(lambda x: x.split('.')[0])\n",
|
||||
" df['temp_path']= file_path\n",
|
||||
" df['final_path'] = df['temp_path'].astype(str) + df['trade_date'].astype(str) + '/' + df['mapping_ts_code_new'] + '.csv'\n",
|
||||
" del df['mapping_ts_code_new'], df['temp_path']\n",
|
||||
"\n",
|
||||
" for sp_char in sp_chars:\n",
|
||||
" csv_files = [sp_file for sp_file in df['final_path'] if sp_char in sp_file]\n",
|
||||
" if csv_files:\n",
|
||||
" print(csv_files[:5])\n",
|
||||
" print(csv_files[-5:])\n",
|
||||
" dfs = pd.DataFrame()\n",
|
||||
" for path in csv_files:\n",
|
||||
" try:\n",
|
||||
" df_temp = pd.read_csv(path) \n",
|
||||
" # print('读取%s成功'%(path))\n",
|
||||
" except FileNotFoundError:\n",
|
||||
" raise ValueError(\"%s文件不存在,请检查文件路径是否正确。\"%(path))\n",
|
||||
" break\n",
|
||||
" df_temp.columns = ['交易日','合约代码','最后修改时间','最后修改毫秒','最新价','数量','申买价一','申买量一','申卖价一','申卖量一','当日均价','成交金额','持仓量','涨停价','跌停价']\n",
|
||||
" dfs = pd.concat([dfs, df_temp], ignore_index=True)\n",
|
||||
" combined_df = dfs.sort_values(by = ['交易日', '最后修改时间', '最后修改毫秒'])\n",
|
||||
" combined_df.insert(0,'统一代码', total_code)\n",
|
||||
" combined_df.to_csv(output_file, index=False)\n",
|
||||
" print(\"合并完成,并已导出到%s文件。\"%(output_file))\n",
|
||||
" else:\n",
|
||||
" print('品种%s在%s年无数据!'%(v,k))\n",
|
||||
" continue\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
138
2.数据下载与处理/shelve合并数据脚本.ipynb
Normal file
138
2.数据下载与处理/shelve合并数据脚本.ipynb
Normal file
@@ -0,0 +1,138 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import shelve\n",
|
||||
"\n",
|
||||
"# 要合并的shelve数据库路径\n",
|
||||
"shelve_files = ['D:/contract_data1.dat', 'D:/contract_data2.dat', 'D:/contract_data3.dat']\n",
|
||||
"# 合并后的新数据库路径\n",
|
||||
"new_shelve_file = 'D:/contract_data3.dat'\n",
|
||||
"\n",
|
||||
"# 创建一个新的shelve数据库来存储合并后的内容\n",
|
||||
"with shelve.open(new_shelve_file, writeback=True) as new_db:\n",
|
||||
" for shelve_file in shelve_files:\n",
|
||||
" try:\n",
|
||||
" with shelve.open(shelve_file) as db:\n",
|
||||
" for key in db:\n",
|
||||
" if key in new_db:\n",
|
||||
" print(f\"Warning: Key {key} already exists in the new database. Overwriting.\")\n",
|
||||
" new_db[key] = db[key]\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error processing {shelve_file}: {e}\")\n",
|
||||
"\n",
|
||||
"print(f\"Databases merged into {new_shelve_file}\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import shelve\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# 要合并的shelve数据库路径\n",
|
||||
"shelve_files = [r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview1', r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview2', r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview3']\n",
|
||||
"# 合并后的新数据库路径\n",
|
||||
"new_shelve_file = r'D:\\bar_overview'\n",
|
||||
"\n",
|
||||
"# 创建一个新的shelve数据库来存储合并后的内容\n",
|
||||
"with shelve.open(new_shelve_file, writeback=True) as new_db:\n",
|
||||
" for shelve_file in shelve_files:\n",
|
||||
" # 检查文件是否存在\n",
|
||||
" if not os.path.exists(shelve_file):\n",
|
||||
" print(f\"错误:文件 {shelve_file} 不存在。\")\n",
|
||||
" continue\n",
|
||||
" try:\n",
|
||||
" # 打开并读取shelve数据库\n",
|
||||
" with shelve.open(shelve_file) as db:\n",
|
||||
" for key in db:\n",
|
||||
" if key in new_db:\n",
|
||||
" print(f\"警告:键 {key} 已存在于新数据库中。将覆盖。\")\n",
|
||||
" new_db[key] = db[key]\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"处理文件 {shelve_file} 时出错:{e}\")\n",
|
||||
" if 'db type could not be determined' in str(e):\n",
|
||||
" print(f\"提示:文件 {shelve_file} 可能已损坏或不是一个shelve数据库。\")\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
"print(f\"数据库已合并到 {new_shelve_file}\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import shelve\n",
|
||||
"f_shelve = shelve.open(r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview1') # 创建一个文件句柄\n",
|
||||
"# 使用for循环打印内容\n",
|
||||
"for k,v in f_shelve.items():\n",
|
||||
" print(k,v)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import shelve\n",
|
||||
"\n",
|
||||
"# 打开所有源 shelve 数据库\n",
|
||||
"db1 = shelve.open(r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview1')\n",
|
||||
"db2 = shelve.open(r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview2')\n",
|
||||
"db3 = shelve.open(r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview3')\n",
|
||||
"\n",
|
||||
"# 创建一个新的目标 shelve 数据库\n",
|
||||
"merged_db = shelve.open(r'D:\\bar_overview')\n",
|
||||
"\n",
|
||||
"# 将第一个数据库的所有条目添加到新的数据库中\n",
|
||||
"for key in db1:\n",
|
||||
" merged_db[key] = db1[key]\n",
|
||||
"\n",
|
||||
"# 将第二个数据库的所有条目添加到新的数据库中\n",
|
||||
"for key in db2:\n",
|
||||
" merged_db[key] = db2[key]\n",
|
||||
"\n",
|
||||
"# 将第三个数据库的所有条目添加到新的数据库中\n",
|
||||
"for key in db3:\n",
|
||||
" merged_db[key] = db3[key]\n",
|
||||
"\n",
|
||||
"# 关闭所有数据库\n",
|
||||
"db1.close()\n",
|
||||
"db2.close()\n",
|
||||
"db3.close()\n",
|
||||
"merged_db.close()\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
309
2.数据下载与处理/sqlite导出为csv文件.ipynb
Normal file
309
2.数据下载与处理/sqlite导出为csv文件.ipynb
Normal file
@@ -0,0 +1,309 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import sqlite3\n",
|
||||
"# import pandas as pd\n",
|
||||
"\n",
|
||||
"# # 连接到SQLite数据库\n",
|
||||
"# conn = sqlite3.connect('database.db')\n",
|
||||
"\n",
|
||||
"# # 从数据库中读取表数据到DataFrame\n",
|
||||
"# table_name = 'your_table_name' # 替换为实际表名\n",
|
||||
"# query = f\"SELECT * FROM {table_name}\"\n",
|
||||
"# df = pd.read_sql_query(query, conn)\n",
|
||||
"\n",
|
||||
"# 按照“本地代码”分组并导出为CSV文件\n",
|
||||
"for local_code, group in df.groupby('本地代码'):\n",
|
||||
" # 为每个“本地代码”生成一个CSV文件,文件名使用该代码值\n",
|
||||
" csv_filename = f\"{local_code}.csv\"\n",
|
||||
" group.to_csv(csv_filename, index=False, encoding='utf-8-sig')\n",
|
||||
" print(f\"数据已导出到 {csv_filename}\")\n",
|
||||
"\n",
|
||||
"# 关闭数据库连接\n",
|
||||
"conn.close()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sqlite3\n",
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 连接到SQLite数据库\n",
|
||||
"conn = sqlite3.connect(r'D:\\of_data\\database.db')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 从数据库中读取表数据到DataFrame\n",
|
||||
"table_name = 'dbbardata' # 替换为实际表名\n",
|
||||
"query = f\"SELECT * FROM {table_name}\"\n",
|
||||
"df = pd.read_sql_query(query, conn)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"del(df['id'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"del group"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"数据已导出到 AP00_CZCE.csv\n",
|
||||
"数据已导出到 APJQ00_CZCE.csv\n",
|
||||
"数据已导出到 CF00_CZCE.csv\n",
|
||||
"数据已导出到 CFJQ00_CZCE.csv\n",
|
||||
"数据已导出到 CJ00_CZCE.csv\n",
|
||||
"数据已导出到 CJJQ00_CZCE.csv\n",
|
||||
"数据已导出到 CY00_CZCE.csv\n",
|
||||
"数据已导出到 CYJQ00_CZCE.csv\n",
|
||||
"数据已导出到 FG00_CZCE.csv\n",
|
||||
"数据已导出到 FGJQ00_CZCE.csv\n",
|
||||
"数据已导出到 IC00_CFFEX.csv\n",
|
||||
"数据已导出到 ICJQ00_CFFEX.csv\n",
|
||||
"数据已导出到 IF00_CFFEX.csv\n",
|
||||
"数据已导出到 IFJQ00_CFFEX.csv\n",
|
||||
"数据已导出到 IH00_CFFEX.csv\n",
|
||||
"数据已导出到 IHJQ00_CFFEX.csv\n",
|
||||
"数据已导出到 IM00_CFFEX.csv\n",
|
||||
"数据已导出到 IMJQ00_CFFEX.csv\n",
|
||||
"数据已导出到 JR00_CZCE.csv\n",
|
||||
"数据已导出到 JRJQ00_CZCE.csv\n",
|
||||
"数据已导出到 LR00_CZCE.csv\n",
|
||||
"数据已导出到 LRJQ00_CZCE.csv\n",
|
||||
"数据已导出到 MA00_CZCE.csv\n",
|
||||
"数据已导出到 MAJQ00_CZCE.csv\n",
|
||||
"数据已导出到 OI00_CZCE.csv\n",
|
||||
"数据已导出到 OIJQ00_CZCE.csv\n",
|
||||
"数据已导出到 PF00_CZCE.csv\n",
|
||||
"数据已导出到 PFJQ00_CZCE.csv\n",
|
||||
"数据已导出到 PK00_CZCE.csv\n",
|
||||
"数据已导出到 PKJQ00_CZCE.csv\n",
|
||||
"数据已导出到 PM00_CZCE.csv\n",
|
||||
"数据已导出到 PMJQ00_CZCE.csv\n",
|
||||
"数据已导出到 PX00_CZCE.csv\n",
|
||||
"数据已导出到 PXJQ00_CZCE.csv\n",
|
||||
"数据已导出到 RI00_CZCE.csv\n",
|
||||
"数据已导出到 RIJQ00_CZCE.csv\n",
|
||||
"数据已导出到 RM00_CZCE.csv\n",
|
||||
"数据已导出到 RMJQ00_CZCE.csv\n",
|
||||
"数据已导出到 RS00_CZCE.csv\n",
|
||||
"数据已导出到 RSJQ00_CZCE.csv\n",
|
||||
"数据已导出到 SA00_CZCE.csv\n",
|
||||
"数据已导出到 SAJQ00_CZCE.csv\n",
|
||||
"数据已导出到 SF00_CZCE.csv\n",
|
||||
"数据已导出到 SFJQ00_CZCE.csv\n",
|
||||
"数据已导出到 SH00_CZCE.csv\n",
|
||||
"数据已导出到 SHJQ00_CZCE.csv\n",
|
||||
"数据已导出到 SM00_CZCE.csv\n",
|
||||
"数据已导出到 SMJQ00_CZCE.csv\n",
|
||||
"数据已导出到 SR00_CZCE.csv\n",
|
||||
"数据已导出到 SRJQ00_CZCE.csv\n",
|
||||
"数据已导出到 T00_CFFEX.csv\n",
|
||||
"数据已导出到 TA00_CZCE.csv\n",
|
||||
"数据已导出到 TAJQ00_CZCE.csv\n",
|
||||
"数据已导出到 TF00_CFFEX.csv\n",
|
||||
"数据已导出到 TFJQ00_CFFEX.csv\n",
|
||||
"数据已导出到 TJQ00_CFFEX.csv\n",
|
||||
"数据已导出到 TL00_CFFEX.csv\n",
|
||||
"数据已导出到 TLJQ00_CFFEX.csv\n",
|
||||
"数据已导出到 TS00_CFFEX.csv\n",
|
||||
"数据已导出到 TSJQ00_CFFEX.csv\n",
|
||||
"数据已导出到 UR00_CZCE.csv\n",
|
||||
"数据已导出到 URJQ00_CZCE.csv\n",
|
||||
"数据已导出到 WH00_CZCE.csv\n",
|
||||
"数据已导出到 WHJQ00_CZCE.csv\n",
|
||||
"数据已导出到 ZC00_CZCE.csv\n",
|
||||
"数据已导出到 ZCJQ00_CZCE.csv\n",
|
||||
"数据已导出到 a00_DCE.csv\n",
|
||||
"数据已导出到 aJQ00_DCE.csv\n",
|
||||
"数据已导出到 ag00_SHFE.csv\n",
|
||||
"数据已导出到 agJQ00_SHFE.csv\n",
|
||||
"数据已导出到 al00_SHFE.csv\n",
|
||||
"数据已导出到 alJQ00_SHFE.csv\n",
|
||||
"数据已导出到 ao00_SHFE.csv\n",
|
||||
"数据已导出到 aoJQ00_SHFE.csv\n",
|
||||
"数据已导出到 au00_SHFE.csv\n",
|
||||
"数据已导出到 auJQ00_SHFE.csv\n",
|
||||
"数据已导出到 b00_DCE.csv\n",
|
||||
"数据已导出到 bJQ00_DCE.csv\n",
|
||||
"数据已导出到 bb00_DCE.csv\n",
|
||||
"数据已导出到 bbJQ00_DCE.csv\n",
|
||||
"数据已导出到 bc00_INE.csv\n",
|
||||
"数据已导出到 bcJQ00_INE.csv\n",
|
||||
"数据已导出到 br00_SHFE.csv\n",
|
||||
"数据已导出到 brJQ00_SHFE.csv\n",
|
||||
"数据已导出到 bu00_SHFE.csv\n",
|
||||
"数据已导出到 buJQ00_SHFE.csv\n",
|
||||
"数据已导出到 c00_DCE.csv\n",
|
||||
"数据已导出到 cJQ00_DCE.csv\n",
|
||||
"数据已导出到 cs00_DCE.csv\n",
|
||||
"数据已导出到 csJQ00_DCE.csv\n",
|
||||
"数据已导出到 cu00_SHFE.csv\n",
|
||||
"数据已导出到 cuJQ00_SHFE.csv\n",
|
||||
"数据已导出到 eb00_DCE.csv\n",
|
||||
"数据已导出到 ebJQ00_DCE.csv\n",
|
||||
"数据已导出到 ec00_INE.csv\n",
|
||||
"数据已导出到 ecJQ00_INE.csv\n",
|
||||
"数据已导出到 eg00_DCE.csv\n",
|
||||
"数据已导出到 egJQ00_DCE.csv\n",
|
||||
"数据已导出到 fb00_DCE.csv\n",
|
||||
"数据已导出到 fbJQ00_DCE.csv\n",
|
||||
"数据已导出到 fu00_SHFE.csv\n",
|
||||
"数据已导出到 fuJQ00_SHFE.csv\n",
|
||||
"数据已导出到 hc00_SHFE.csv\n",
|
||||
"数据已导出到 hcJQ00_SHFE.csv\n",
|
||||
"数据已导出到 i00_DCE.csv\n",
|
||||
"数据已导出到 iJQ00_DCE.csv\n",
|
||||
"数据已导出到 j00_DCE.csv\n",
|
||||
"数据已导出到 jJQ00_DCE.csv\n",
|
||||
"数据已导出到 jd00_DCE.csv\n",
|
||||
"数据已导出到 jdJQ00_DCE.csv\n",
|
||||
"数据已导出到 jm00_DCE.csv\n",
|
||||
"数据已导出到 jmJQ00_DCE.csv\n",
|
||||
"数据已导出到 l00_DCE.csv\n",
|
||||
"数据已导出到 lJQ00_DCE.csv\n",
|
||||
"数据已导出到 lc00_GFEX.csv\n",
|
||||
"数据已导出到 lcJQ00_GFEX.csv\n",
|
||||
"数据已导出到 lh00_DCE.csv\n",
|
||||
"数据已导出到 lhJQ00_DCE.csv\n",
|
||||
"数据已导出到 lu00_INE.csv\n",
|
||||
"数据已导出到 luJQ00_INE.csv\n",
|
||||
"数据已导出到 m00_DCE.csv\n",
|
||||
"数据已导出到 mJQ00_DCE.csv\n",
|
||||
"数据已导出到 ni00_SHFE.csv\n",
|
||||
"数据已导出到 niJQ00_SHFE.csv\n",
|
||||
"数据已导出到 nr00_INE.csv\n",
|
||||
"数据已导出到 nrJQ00_INE.csv\n",
|
||||
"数据已导出到 p00_DCE.csv\n",
|
||||
"数据已导出到 pJQ00_DCE.csv\n",
|
||||
"数据已导出到 pb00_SHFE.csv\n",
|
||||
"数据已导出到 pbJQ00_SHFE.csv\n",
|
||||
"数据已导出到 pg00_DCE.csv\n",
|
||||
"数据已导出到 pgJQ00_DCE.csv\n",
|
||||
"数据已导出到 pp00_DCE.csv\n",
|
||||
"数据已导出到 ppJQ00_DCE.csv\n",
|
||||
"数据已导出到 rb00_SHFE.csv\n",
|
||||
"数据已导出到 rbJQ00_SHFE.csv\n",
|
||||
"数据已导出到 rr00_DCE.csv\n",
|
||||
"数据已导出到 rrJQ00_DCE.csv\n",
|
||||
"数据已导出到 ru00_SHFE.csv\n",
|
||||
"数据已导出到 ruJQ00_SHFE.csv\n",
|
||||
"数据已导出到 sc00_INE.csv\n",
|
||||
"数据已导出到 scJQ00_INE.csv\n",
|
||||
"数据已导出到 si00_GFEX.csv\n",
|
||||
"数据已导出到 siJQ00_GFEX.csv\n",
|
||||
"数据已导出到 sn00_SHFE.csv\n",
|
||||
"数据已导出到 snJQ00_SHFE.csv\n",
|
||||
"数据已导出到 sp00_SHFE.csv\n",
|
||||
"数据已导出到 spJQ00_SHFE.csv\n",
|
||||
"数据已导出到 ss00_SHFE.csv\n",
|
||||
"数据已导出到 ssJQ00_SHFE.csv\n",
|
||||
"数据已导出到 v00_DCE.csv\n",
|
||||
"数据已导出到 vJQ00_DCE.csv\n",
|
||||
"数据已导出到 wr00_SHFE.csv\n",
|
||||
"数据已导出到 wrJQ00_SHFE.csv\n",
|
||||
"数据已导出到 y00_DCE.csv\n",
|
||||
"数据已导出到 yJQ00_DCE.csv\n",
|
||||
"数据已导出到 zn00_SHFE.csv\n",
|
||||
"数据已导出到 znJQ00_SHFE.csv\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for local_code, group in df.groupby('symbol'):\n",
|
||||
" # 为每个“本地代码”生成一个CSV文件,文件名使用该代码值\n",
|
||||
" exchange = group.exchange.iloc[0]\n",
|
||||
" csv_filename = f\"{local_code}_{exchange}.csv\"\n",
|
||||
" group.to_csv(csv_filename, index=False, encoding='utf-8-sig')\n",
|
||||
" print(f\"数据已导出到 {csv_filename}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"conn.close()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
371
2.数据下载与处理/ssquant_download/ssquant_download.ipynb
Normal file
371
2.数据下载与处理/ssquant_download/ssquant_download.ipynb
Normal file
@@ -0,0 +1,371 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"from ssquant.SQDATA import TakeData\n",
|
||||
"\n",
|
||||
"#注意首先需要pip install ssquant\n",
|
||||
"#否则链接不到数据库\n",
|
||||
"#输入俱乐部的账号密码即可调用,注意保密。\n",
|
||||
"#目前数据是2019年1月-至今\n",
|
||||
"#每日下午收盘后3点30分录入当天数据。\n",
|
||||
"#有任何疑问可以再群里提出,或者私信我(慕金龙)\n",
|
||||
"#官网: quant789.com\n",
|
||||
"#公众号:松鼠Quant\n",
|
||||
"#客服微信: viquant01\n",
|
||||
"\n",
|
||||
"#只能调取分钟及以上数据,tick数据每月底更新到百度网盘下载\n",
|
||||
"\n",
|
||||
"'''\n",
|
||||
"获取数据-\n",
|
||||
"品种:symbol,不区分大小写\n",
|
||||
"起始时间:start_date,\n",
|
||||
"结束时间:end_date(包含当天),\n",
|
||||
"周期kline_period:1M..5M..NM(分钟),1D(天),1W(周),1Y(月)\n",
|
||||
"复权adjust_type:0(不复权)1(后复权)\n",
|
||||
"注意:\n",
|
||||
"1.请正确输入账号密码\n",
|
||||
"2.不要挂代理访问数据库\n",
|
||||
"3.暂时没有股指数据,下个月补齐。\n",
|
||||
"'''\n",
|
||||
" \n",
|
||||
"# username='俱乐部账号' password='密码'\n",
|
||||
"client = TakeData(username='77777@qq.com', password='7777')\n",
|
||||
"data = client.get_data(\n",
|
||||
" symbol='rb888',\n",
|
||||
" start_date='2023-01-02',\n",
|
||||
" end_date='2024-01-03',\n",
|
||||
" kline_period='60M',\n",
|
||||
" adjust_type=1\n",
|
||||
")\n",
|
||||
"print(data)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"'''\n",
|
||||
"datetime:时间,\n",
|
||||
"\n",
|
||||
"symbol:品种,\n",
|
||||
"\n",
|
||||
"open:开盘价,\n",
|
||||
"\n",
|
||||
"high:最高价,\n",
|
||||
"\n",
|
||||
"low:最低价,\n",
|
||||
"\n",
|
||||
"close:收盘价,\n",
|
||||
"\n",
|
||||
"volume:成交量(单bar),\n",
|
||||
"\n",
|
||||
"amount:成交金额(单bar),\n",
|
||||
"\n",
|
||||
"openint:持仓量(单bar),\n",
|
||||
"\n",
|
||||
"cumulative_openint:累计持仓量,\n",
|
||||
"\n",
|
||||
"open_bidp , open_askp: K线第一个价格的买一价格和卖一价格\n",
|
||||
"\n",
|
||||
"close_bidp , close_askp: K线最后一个价格的买一价格和卖一价格\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" datetime symbol open high low close volume amount cumulative_openint openint open_bidp open_askp close_bidp close_askp\n",
|
||||
"0 2023-01-03 10:00:00 rb2305 4081.0 4081.0 4016.0 4037.0 737537 29782187220 1883481 -48415 4081.0 4084.0 4037.0 4038.0\n",
|
||||
"1 2023-01-03 11:00:00 rb2305 4038.0 4056.0 4037.0 4042.0 158548 6415696920 1887716 4235 4037.0 4038.0 4042.0 4044.0\n",
|
||||
"2 2023-01-03 12:00:00 rb2305 4044.0 4054.0 4037.0 4051.0 67448 2728130300 1890125 2409 4043.0 4044.0 4050.0 4051.0\n",
|
||||
"3 2023-01-03 14:00:00 rb2305 4055.0 4065.0 4045.0 4058.0 110181 4469698600 1895841 5723 4050.0 4051.0 4058.0 4059.0\n",
|
||||
"4 2023-01-03 15:00:00 rb2305 4059.0 4074.0 4056.0 4063.0 167932 6824213940 1882723 -13125 4058.0 4059.0 4062.0 4063.0\n",
|
||||
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
|
||||
"1689 2024-01-03 12:00:00 rb2405 4055.0 4057.0 4044.0 4049.0 79745 3229361570 1597387 -6515 4054.0 4055.0 4049.0 4050.0\n",
|
||||
"1690 2024-01-03 14:00:00 rb2405 4050.0 4056.0 4046.0 4049.0 55040 2229498750 1598566 1179 4050.0 4051.0 4049.0 4050.0\n",
|
||||
"1691 2024-01-03 15:00:00 rb2405 4050.0 4064.0 4048.0 4055.0 148845 6038835190 1583796 -14770 4049.0 4050.0 4055.0 4056.0\n",
|
||||
"1692 2024-01-03 22:00:00 rb2405 4054.0 4054.0 4040.0 4049.0 181753 7354584770 1582419 990 4053.0 4054.0 4048.0 4049.0\n",
|
||||
"1693 2024-01-03 23:00:00 rb2405 4049.0 4057.0 4042.0 4049.0 104712 4240341050 1574287 -8132 4048.0 4049.0 4049.0 4050.0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from ssquant.SQDATA import TakeData"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"out_path = r'D:/data'\n",
|
||||
"symbol_name = 'rb888' #主力连续888 次主力合约777\n",
|
||||
"time_period = '1M'\n",
|
||||
"start_time = '2000-01-01'\n",
|
||||
"end_time = '2019-01-31'\n",
|
||||
"adjust_k = 'Faj' #Naj:Non adjust,Faj:Forward adjust,后复权\n",
|
||||
"\n",
|
||||
"if adjust_k == 'Naj':\n",
|
||||
" adjust_tmp = 0\n",
|
||||
"elif adjust_k == 'Faj':\n",
|
||||
" adjust_tmp = 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"client = TakeData(username='240884432@qq.com', password='Zj123!@#')\n",
|
||||
"data = client.get_data(\n",
|
||||
" symbol=symbol_name,\n",
|
||||
" start_date=start_time,\n",
|
||||
" end_date=end_time,\n",
|
||||
" kline_period=time_period,\n",
|
||||
" adjust_type= adjust_tmp\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"头部文件为:--------------------\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>datetime</th>\n",
|
||||
" <th>symbol</th>\n",
|
||||
" <th>open</th>\n",
|
||||
" <th>high</th>\n",
|
||||
" <th>low</th>\n",
|
||||
" <th>close</th>\n",
|
||||
" <th>volume</th>\n",
|
||||
" <th>amount</th>\n",
|
||||
" <th>cumulative_openint</th>\n",
|
||||
" <th>openint</th>\n",
|
||||
" <th>open_bidp</th>\n",
|
||||
" <th>open_askp</th>\n",
|
||||
" <th>close_bidp</th>\n",
|
||||
" <th>close_askp</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2019-01-02 09:01:00</td>\n",
|
||||
" <td>rb1905</td>\n",
|
||||
" <td>3399</td>\n",
|
||||
" <td>3405</td>\n",
|
||||
" <td>3389</td>\n",
|
||||
" <td>3401</td>\n",
|
||||
" <td>69562</td>\n",
|
||||
" <td>2362607160</td>\n",
|
||||
" <td>2383714</td>\n",
|
||||
" <td>16864</td>\n",
|
||||
" <td>3399.0</td>\n",
|
||||
" <td>3400.0</td>\n",
|
||||
" <td>3400.0</td>\n",
|
||||
" <td>3401.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2019-01-02 09:02:00</td>\n",
|
||||
" <td>rb1905</td>\n",
|
||||
" <td>3401</td>\n",
|
||||
" <td>3430</td>\n",
|
||||
" <td>3401</td>\n",
|
||||
" <td>3410</td>\n",
|
||||
" <td>88696</td>\n",
|
||||
" <td>3034283200</td>\n",
|
||||
" <td>2399530</td>\n",
|
||||
" <td>-12248</td>\n",
|
||||
" <td>3401.0</td>\n",
|
||||
" <td>3402.0</td>\n",
|
||||
" <td>3409.0</td>\n",
|
||||
" <td>3410.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>2019-01-02 09:03:00</td>\n",
|
||||
" <td>rb1905</td>\n",
|
||||
" <td>3409</td>\n",
|
||||
" <td>3414</td>\n",
|
||||
" <td>3409</td>\n",
|
||||
" <td>3412</td>\n",
|
||||
" <td>22828</td>\n",
|
||||
" <td>778740580</td>\n",
|
||||
" <td>2387356</td>\n",
|
||||
" <td>1180</td>\n",
|
||||
" <td>3409.0</td>\n",
|
||||
" <td>3410.0</td>\n",
|
||||
" <td>3411.0</td>\n",
|
||||
" <td>3412.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>2019-01-02 09:04:00</td>\n",
|
||||
" <td>rb1905</td>\n",
|
||||
" <td>3412</td>\n",
|
||||
" <td>3413</td>\n",
|
||||
" <td>3403</td>\n",
|
||||
" <td>3404</td>\n",
|
||||
" <td>17378</td>\n",
|
||||
" <td>592413220</td>\n",
|
||||
" <td>2388158</td>\n",
|
||||
" <td>54</td>\n",
|
||||
" <td>3411.0</td>\n",
|
||||
" <td>3412.0</td>\n",
|
||||
" <td>3404.0</td>\n",
|
||||
" <td>3405.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2019-01-02 09:05:00</td>\n",
|
||||
" <td>rb1905</td>\n",
|
||||
" <td>3405</td>\n",
|
||||
" <td>3409</td>\n",
|
||||
" <td>3405</td>\n",
|
||||
" <td>3405</td>\n",
|
||||
" <td>15770</td>\n",
|
||||
" <td>537276980</td>\n",
|
||||
" <td>2388190</td>\n",
|
||||
" <td>1674</td>\n",
|
||||
" <td>3405.0</td>\n",
|
||||
" <td>3406.0</td>\n",
|
||||
" <td>3405.0</td>\n",
|
||||
" <td>3406.0</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" datetime symbol open high low close volume amount \\\n",
|
||||
"0 2019-01-02 09:01:00 rb1905 3399 3405 3389 3401 69562 2362607160 \n",
|
||||
"1 2019-01-02 09:02:00 rb1905 3401 3430 3401 3410 88696 3034283200 \n",
|
||||
"2 2019-01-02 09:03:00 rb1905 3409 3414 3409 3412 22828 778740580 \n",
|
||||
"3 2019-01-02 09:04:00 rb1905 3412 3413 3403 3404 17378 592413220 \n",
|
||||
"4 2019-01-02 09:05:00 rb1905 3405 3409 3405 3405 15770 537276980 \n",
|
||||
"\n",
|
||||
" cumulative_openint openint open_bidp open_askp close_bidp close_askp \n",
|
||||
"0 2383714 16864 3399.0 3400.0 3400.0 3401.0 \n",
|
||||
"1 2399530 -12248 3401.0 3402.0 3409.0 3410.0 \n",
|
||||
"2 2387356 1180 3409.0 3410.0 3411.0 3412.0 \n",
|
||||
"3 2388158 54 3411.0 3412.0 3404.0 3405.0 \n",
|
||||
"4 2388190 1674 3405.0 3406.0 3405.0 3406.0 "
|
||||
]
|
||||
},
|
||||
"execution_count": 49,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print('头部文件为:--------------------')\n",
|
||||
"data.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
" \n",
|
||||
"# 假设你有一个字符串,表示时间,格式为 'YYYY-MM-DD HH:MM:SS'\n",
|
||||
"real_start_time = data.iloc[0,0]\n",
|
||||
" \n",
|
||||
"# 使用datetime.strptime将字符串转换为时间\n",
|
||||
"time_obj = datetime.strptime(real_start_time, '%Y-%m-%d %H:%M:%S')\n",
|
||||
" \n",
|
||||
"# 获取年月日\n",
|
||||
"year = time_obj.year\n",
|
||||
"month = time_obj.month\n",
|
||||
"day = time_obj.day\n",
|
||||
" \n",
|
||||
"print(f'年: {year}, 月: {month}, 日: {day}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print('尾部文件为:--------------------')\n",
|
||||
"data.tail()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import datetime\n",
|
||||
"real_start_time = pd.to_datetime(data.iloc[0,0]).date().strftime('%Y-%m-%d')\n",
|
||||
"real_end_time = pd.to_datetime(data.iloc[-1,0]).date().strftime('%Y-%m-%d')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data.to_csv('%s/%s_%s_%s(%s_%s).csv'%(out_path,symbol_name,time_period,adjust_k,real_start_time,real_end_time), index=False)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
79
2.数据下载与处理/ssquant_download/数据库使用示例.py
Normal file
79
2.数据下载与处理/ssquant_download/数据库使用示例.py
Normal file
@@ -0,0 +1,79 @@
|
||||
from ssquant.SQDATA import TakeData
|
||||
|
||||
#注意首先需要pip install ssquant
|
||||
#否则链接不到数据库
|
||||
#输入俱乐部的账号密码即可调用,注意保密。
|
||||
#目前数据是2019年1月-至今
|
||||
#每日下午收盘后3点30分录入当天数据。
|
||||
#有任何疑问可以再群里提出,或者私信我(慕金龙)
|
||||
#官网: quant789.com
|
||||
#公众号:松鼠Quant
|
||||
#客服微信: viquant01
|
||||
|
||||
#只能调取分钟及以上数据,tick数据每月底更新到百度网盘下载
|
||||
|
||||
'''
|
||||
获取数据-
|
||||
品种:symbol,不区分大小写
|
||||
起始时间:start_date,
|
||||
结束时间:end_date(包含当天),
|
||||
周期kline_period:1M..5M..NM(分钟),1D(天),1W(周),1Y(月)
|
||||
复权adjust_type:0(不复权)1(后复权)
|
||||
注意:
|
||||
1.请正确输入账号密码
|
||||
2.不要挂代理访问数据库
|
||||
3.暂时没有股指数据,下个月补齐。
|
||||
'''
|
||||
|
||||
# username='俱乐部账号' password='密码'
|
||||
client = TakeData(username='77777@qq.com', password='7777')
|
||||
data = client.get_data(
|
||||
symbol='rb888',
|
||||
start_date='2023-01-02',
|
||||
end_date='2024-01-03',
|
||||
kline_period='60M',
|
||||
adjust_type=1
|
||||
)
|
||||
print(data)
|
||||
|
||||
|
||||
|
||||
'''
|
||||
datetime:时间,
|
||||
|
||||
symbol:品种,
|
||||
|
||||
open:开盘价,
|
||||
|
||||
high:最高价,
|
||||
|
||||
low:最低价,
|
||||
|
||||
close:收盘价,
|
||||
|
||||
volume:成交量(单bar),
|
||||
|
||||
amount:成交金额(单bar),
|
||||
|
||||
openint:持仓量(单bar),
|
||||
|
||||
cumulative_openint:累计持仓量,
|
||||
|
||||
open_bidp , open_askp: K线第一个价格的买一价格和卖一价格
|
||||
|
||||
close_bidp , close_askp: K线最后一个价格的买一价格和卖一价格
|
||||
|
||||
|
||||
datetime symbol open high low close volume amount cumulative_openint openint open_bidp open_askp close_bidp close_askp
|
||||
0 2023-01-03 10:00:00 rb2305 4081.0 4081.0 4016.0 4037.0 737537 29782187220 1883481 -48415 4081.0 4084.0 4037.0 4038.0
|
||||
1 2023-01-03 11:00:00 rb2305 4038.0 4056.0 4037.0 4042.0 158548 6415696920 1887716 4235 4037.0 4038.0 4042.0 4044.0
|
||||
2 2023-01-03 12:00:00 rb2305 4044.0 4054.0 4037.0 4051.0 67448 2728130300 1890125 2409 4043.0 4044.0 4050.0 4051.0
|
||||
3 2023-01-03 14:00:00 rb2305 4055.0 4065.0 4045.0 4058.0 110181 4469698600 1895841 5723 4050.0 4051.0 4058.0 4059.0
|
||||
4 2023-01-03 15:00:00 rb2305 4059.0 4074.0 4056.0 4063.0 167932 6824213940 1882723 -13125 4058.0 4059.0 4062.0 4063.0
|
||||
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
|
||||
1689 2024-01-03 12:00:00 rb2405 4055.0 4057.0 4044.0 4049.0 79745 3229361570 1597387 -6515 4054.0 4055.0 4049.0 4050.0
|
||||
1690 2024-01-03 14:00:00 rb2405 4050.0 4056.0 4046.0 4049.0 55040 2229498750 1598566 1179 4050.0 4051.0 4049.0 4050.0
|
||||
1691 2024-01-03 15:00:00 rb2405 4050.0 4064.0 4048.0 4055.0 148845 6038835190 1583796 -14770 4049.0 4050.0 4055.0 4056.0
|
||||
1692 2024-01-03 22:00:00 rb2405 4054.0 4054.0 4040.0 4049.0 181753 7354584770 1582419 990 4053.0 4054.0 4048.0 4049.0
|
||||
1693 2024-01-03 23:00:00 rb2405 4049.0 4057.0 4042.0 4049.0 104712 4240341050 1574287 -8132 4048.0 4049.0 4049.0 4050.0
|
||||
'''
|
||||
65
2.数据下载与处理/ssquant_download/数据库读取_生产K线图.py
Normal file
65
2.数据下载与处理/ssquant_download/数据库读取_生产K线图.py
Normal file
@@ -0,0 +1,65 @@
|
||||
from ssquant.SQDATA import TakeData
|
||||
from pyecharts import options as opts
|
||||
from pyecharts.charts import Kline, Bar, Grid
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def plotK(data):
|
||||
# 示例数据(您需要替换为您的实际数据)
|
||||
kline_data = data[['open', 'close', 'low', 'high']].values.tolist()
|
||||
dates = data.index.strftime('%Y-%m-%d %H:%M:%S').tolist()
|
||||
symbol_data = data['symbol'].values.tolist()
|
||||
# 标记 symbol 变化的位置
|
||||
markline_data = []
|
||||
for i in range(1, len(symbol_data)):
|
||||
if symbol_data[i] != symbol_data[i-1]:
|
||||
# 当前 symbol 与前一个不同时,添加红色竖线
|
||||
markline_data.append(opts.MarkLineItem(x=dates[i], name=f'前一个合约{symbol_data[i-1]},当前合约{symbol_data[i]}'))
|
||||
|
||||
|
||||
# 数据缩放组件配置
|
||||
datazoom_slider = opts.DataZoomOpts(type_="slider", xaxis_index=[0, 1, 2, 3,4], range_start=50, range_end=100)
|
||||
datazoom_inside = opts.DataZoomOpts(type_="inside", xaxis_index=[0, 1, 2, 3,4])
|
||||
|
||||
# 创建 K 线图
|
||||
kline = (
|
||||
Kline(init_opts=opts.InitOpts(width="100%", height="900px"))
|
||||
.add_xaxis(dates)
|
||||
.add_yaxis('K线图表', kline_data,markline_opts=opts.MarkLineOpts(data=markline_data, symbol='none', linestyle_opts=opts.LineStyleOpts(color="red")))#"ssss",
|
||||
.set_global_opts(
|
||||
datazoom_opts=[datazoom_slider, datazoom_inside],
|
||||
toolbox_opts=opts.ToolboxOpts(is_show=True, pos_top="0%", pos_right="80%"),
|
||||
legend_opts=opts.LegendOpts(pos_left='40%'), # 调整图例位置到底部
|
||||
)
|
||||
)
|
||||
kline.render('K线图.html')
|
||||
|
||||
|
||||
|
||||
'''
|
||||
获取数据-
|
||||
品种:symbol,
|
||||
起始时间:start_date,
|
||||
结束时间:end_date(包含当天),
|
||||
周期kline_period:1M..5M..NM(分钟),1D(天),1W(周),1Y(月)
|
||||
复权adjust_type:0(不复权)1(后复权)
|
||||
'''
|
||||
|
||||
# 请在下方输入你的俱乐部账号密码,username='俱乐部账号' password='密码'
|
||||
|
||||
client = TakeData(username='1234@qq.com', password='123')
|
||||
data = client.get_data(
|
||||
symbol='rb888',
|
||||
start_date='2023-12-28',
|
||||
end_date='2024-01-17',
|
||||
kline_period='60M',
|
||||
adjust_type=1
|
||||
)
|
||||
data.set_index("datetime", inplace=True)
|
||||
data.index = pd.to_datetime(data.index)
|
||||
print(data)
|
||||
#生产K线图表到脚本同目录下
|
||||
plotK(data)
|
||||
|
||||
|
||||
|
||||
249
2.数据下载与处理/ssquant_download/松鼠数据下载脚本.ipynb
Normal file
249
2.数据下载与处理/ssquant_download/松鼠数据下载脚本.ipynb
Normal file
@@ -0,0 +1,249 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1a846b12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"from ssquant.SQDATA import TakeData\n",
|
||||
"\n",
|
||||
"#注意首先需要pip install ssquant\n",
|
||||
"#否则链接不到数据库\n",
|
||||
"#输入俱乐部的账号密码即可调用,注意保密。\n",
|
||||
"#目前数据是2019年1月-至今\n",
|
||||
"#每日下午收盘后3点30分录入当天数据。\n",
|
||||
"#有任何疑问可以再群里提出,或者私信我(慕金龙)\n",
|
||||
"#官网: quant789.com\n",
|
||||
"#公众号:松鼠Quant\n",
|
||||
"#客服微信: viquant01\n",
|
||||
"\n",
|
||||
"#只能调取分钟及以上数据,tick数据每月底更新到百度网盘下载\n",
|
||||
"\n",
|
||||
"'''\n",
|
||||
"获取数据-\n",
|
||||
"品种:symbol,不区分大小写\n",
|
||||
"起始时间:start_date,\n",
|
||||
"结束时间:end_date(包含当天),\n",
|
||||
"周期kline_period:1M..5M..NM(分钟),1D(天),1W(周),1Y(月)\n",
|
||||
"复权adjust_type:0(不复权)1(后复权)\n",
|
||||
"注意:\n",
|
||||
"1.请正确输入账号密码\n",
|
||||
"2.不要挂代理访问数据库\n",
|
||||
"3.暂时没有股指数据,下个月补齐。\n",
|
||||
"'''\n",
|
||||
" \n",
|
||||
"# username='俱乐部账号' password='密码'\n",
|
||||
"client = TakeData(username='240884432@qq.com', password='7777')\n",
|
||||
"data = client.get_data(\n",
|
||||
" symbol='rb888',\n",
|
||||
" start_date='2023-01-02',\n",
|
||||
" end_date='2024-01-03',\n",
|
||||
" kline_period='60M',\n",
|
||||
" adjust_type=1\n",
|
||||
")\n",
|
||||
"print(data)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"'''\n",
|
||||
"datetime:时间,\n",
|
||||
"\n",
|
||||
"symbol:品种,\n",
|
||||
"\n",
|
||||
"open:开盘价,\n",
|
||||
"\n",
|
||||
"high:最高价,\n",
|
||||
"\n",
|
||||
"low:最低价,\n",
|
||||
"\n",
|
||||
"close:收盘价,\n",
|
||||
"\n",
|
||||
"volume:成交量(单bar),\n",
|
||||
"\n",
|
||||
"amount:成交金额(单bar),\n",
|
||||
"\n",
|
||||
"openint:持仓量(单bar),\n",
|
||||
"\n",
|
||||
"cumulative_openint:累计持仓量,\n",
|
||||
"\n",
|
||||
"open_bidp , open_askp: K线第一个价格的买一价格和卖一价格\n",
|
||||
"\n",
|
||||
"close_bidp , close_askp: K线最后一个价格的买一价格和卖一价格\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" datetime symbol open high low close volume amount cumulative_openint openint open_bidp open_askp close_bidp close_askp\n",
|
||||
"0 2023-01-03 10:00:00 rb2305 4081.0 4081.0 4016.0 4037.0 737537 29782187220 1883481 -48415 4081.0 4084.0 4037.0 4038.0\n",
|
||||
"1 2023-01-03 11:00:00 rb2305 4038.0 4056.0 4037.0 4042.0 158548 6415696920 1887716 4235 4037.0 4038.0 4042.0 4044.0\n",
|
||||
"2 2023-01-03 12:00:00 rb2305 4044.0 4054.0 4037.0 4051.0 67448 2728130300 1890125 2409 4043.0 4044.0 4050.0 4051.0\n",
|
||||
"3 2023-01-03 14:00:00 rb2305 4055.0 4065.0 4045.0 4058.0 110181 4469698600 1895841 5723 4050.0 4051.0 4058.0 4059.0\n",
|
||||
"4 2023-01-03 15:00:00 rb2305 4059.0 4074.0 4056.0 4063.0 167932 6824213940 1882723 -13125 4058.0 4059.0 4062.0 4063.0\n",
|
||||
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
|
||||
"1689 2024-01-03 12:00:00 rb2405 4055.0 4057.0 4044.0 4049.0 79745 3229361570 1597387 -6515 4054.0 4055.0 4049.0 4050.0\n",
|
||||
"1690 2024-01-03 14:00:00 rb2405 4050.0 4056.0 4046.0 4049.0 55040 2229498750 1598566 1179 4050.0 4051.0 4049.0 4050.0\n",
|
||||
"1691 2024-01-03 15:00:00 rb2405 4050.0 4064.0 4048.0 4055.0 148845 6038835190 1583796 -14770 4049.0 4050.0 4055.0 4056.0\n",
|
||||
"1692 2024-01-03 22:00:00 rb2405 4054.0 4054.0 4040.0 4049.0 181753 7354584770 1582419 990 4053.0 4054.0 4048.0 4049.0\n",
|
||||
"1693 2024-01-03 23:00:00 rb2405 4049.0 4057.0 4042.0 4049.0 104712 4240341050 1574287 -8132 4048.0 4049.0 4049.0 4050.0\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "65b4b7aa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from ssquant.SQDATA import TakeData\n",
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "edd4f1e5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" datetime symbol open high low close volume \\\n",
|
||||
"0 2023-01-03 10:00:00 rb2305 4081 4081 4016 4037 737537 \n",
|
||||
"1 2023-01-03 11:00:00 rb2305 4038 4056 4037 4042 158548 \n",
|
||||
"2 2023-01-03 12:00:00 rb2305 4044 4054 4037 4051 67448 \n",
|
||||
"3 2023-01-03 14:00:00 rb2305 4055 4065 4045 4058 110181 \n",
|
||||
"4 2023-01-03 15:00:00 rb2305 4059 4074 4056 4063 167932 \n",
|
||||
".. ... ... ... ... ... ... ... \n",
|
||||
"112 2023-02-01 12:00:00 rb2305 4126 4129 4105 4107 193291 \n",
|
||||
"113 2023-02-01 14:00:00 rb2305 4108 4117 4100 4109 137182 \n",
|
||||
"114 2023-02-01 15:00:00 rb2305 4109 4114 4075 4084 378930 \n",
|
||||
"115 2023-02-01 22:00:00 rb2305 4092 4104 4087 4103 207519 \n",
|
||||
"116 2023-02-01 23:00:00 rb2305 4102 4109 4075 4098 189724 \n",
|
||||
"\n",
|
||||
" amount cumulative_openint openint open_bidp open_askp \\\n",
|
||||
"0 29782187220 1883481 -48415 4081 4084 \n",
|
||||
"1 6415696920 1887716 4235 4037 4038 \n",
|
||||
"2 2728130300 1890125 2409 4043 4044 \n",
|
||||
"3 4469698600 1895841 5723 4050 4051 \n",
|
||||
"4 6824213940 1882723 -13125 4058 4059 \n",
|
||||
".. ... ... ... ... ... \n",
|
||||
"112 7954826320 1984919 3490 4125 4126 \n",
|
||||
"113 5634834380 1998312 13394 4108 4109 \n",
|
||||
"114 15503896450 1994915 -3398 4109 4110 \n",
|
||||
"115 8500232870 1988628 -5587 4091 4092 \n",
|
||||
"116 7757206650 1973544 -15099 4101 4102 \n",
|
||||
"\n",
|
||||
" close_bidp close_askp \n",
|
||||
"0 4037 4038 \n",
|
||||
"1 4042 4044 \n",
|
||||
"2 4050 4051 \n",
|
||||
"3 4058 4059 \n",
|
||||
"4 4062 4063 \n",
|
||||
".. ... ... \n",
|
||||
"112 4106 4107 \n",
|
||||
"113 4108 4109 \n",
|
||||
"114 4084 4085 \n",
|
||||
"115 4102 4103 \n",
|
||||
"116 4098 4099 \n",
|
||||
"\n",
|
||||
"[117 rows x 14 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"client = TakeData(username='240884432@qq.com', password='Zj123!@#')\n",
|
||||
"data = client.get_data(\n",
|
||||
" symbol='rb888',\n",
|
||||
" start_date='2023-01-01',\n",
|
||||
" end_date='2023-02-01',\n",
|
||||
" kline_period='60M',\n",
|
||||
" adjust_type=1\n",
|
||||
")\n",
|
||||
"print(data)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "25c70609",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" symbol open high low close volume amount \\\n",
|
||||
"datetime \n",
|
||||
"2023-01-03 10:00:00 rb2305 4081 4081 4016 4037 737537 29782187220 \n",
|
||||
"2023-01-03 11:00:00 rb2305 4038 4056 4037 4042 158548 6415696920 \n",
|
||||
"2023-01-03 12:00:00 rb2305 4044 4054 4037 4051 67448 2728130300 \n",
|
||||
"2023-01-03 14:00:00 rb2305 4055 4065 4045 4058 110181 4469698600 \n",
|
||||
"2023-01-03 15:00:00 rb2305 4059 4074 4056 4063 167932 6824213940 \n",
|
||||
"... ... ... ... ... ... ... ... \n",
|
||||
"2023-02-01 12:00:00 rb2305 4126 4129 4105 4107 193291 7954826320 \n",
|
||||
"2023-02-01 14:00:00 rb2305 4108 4117 4100 4109 137182 5634834380 \n",
|
||||
"2023-02-01 15:00:00 rb2305 4109 4114 4075 4084 378930 15503896450 \n",
|
||||
"2023-02-01 22:00:00 rb2305 4092 4104 4087 4103 207519 8500232870 \n",
|
||||
"2023-02-01 23:00:00 rb2305 4102 4109 4075 4098 189724 7757206650 \n",
|
||||
"\n",
|
||||
" cumulative_openint openint open_bidp open_askp \\\n",
|
||||
"datetime \n",
|
||||
"2023-01-03 10:00:00 1883481 -48415 4081 4084 \n",
|
||||
"2023-01-03 11:00:00 1887716 4235 4037 4038 \n",
|
||||
"2023-01-03 12:00:00 1890125 2409 4043 4044 \n",
|
||||
"2023-01-03 14:00:00 1895841 5723 4050 4051 \n",
|
||||
"2023-01-03 15:00:00 1882723 -13125 4058 4059 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"2023-02-01 12:00:00 1984919 3490 4125 4126 \n",
|
||||
"2023-02-01 14:00:00 1998312 13394 4108 4109 \n",
|
||||
"2023-02-01 15:00:00 1994915 -3398 4109 4110 \n",
|
||||
"2023-02-01 22:00:00 1988628 -5587 4091 4092 \n",
|
||||
"2023-02-01 23:00:00 1973544 -15099 4101 4102 \n",
|
||||
"\n",
|
||||
" close_bidp close_askp \n",
|
||||
"datetime \n",
|
||||
"2023-01-03 10:00:00 4037 4038 \n",
|
||||
"2023-01-03 11:00:00 4042 4044 \n",
|
||||
"2023-01-03 12:00:00 4050 4051 \n",
|
||||
"2023-01-03 14:00:00 4058 4059 \n",
|
||||
"2023-01-03 15:00:00 4062 4063 \n",
|
||||
"... ... ... \n",
|
||||
"2023-02-01 12:00:00 4106 4107 \n",
|
||||
"2023-02-01 14:00:00 4108 4109 \n",
|
||||
"2023-02-01 15:00:00 4084 4085 \n",
|
||||
"2023-02-01 22:00:00 4102 4103 \n",
|
||||
"2023-02-01 23:00:00 4098 4099 \n",
|
||||
"\n",
|
||||
"[117 rows x 13 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data.set_index(\"datetime\", inplace=True)\n",
|
||||
"data.index = pd.to_datetime(data.index)\n",
|
||||
"print(data)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
2
2.数据下载与处理/ssquant_download/说明.txt
Normal file
2
2.数据下载与处理/ssquant_download/说明.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
1.ʹ<><CAB9><EFBFBD><EFBFBD><EFBFBD>ݿ<EFBFBD>ʾ<EFBFBD><CABE>.py<70><79>ȡ<EFBFBD><C8A1><EFBFBD>ݣ<EFBFBD>ÿ<EFBFBD><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD>3<EFBFBD><33>50<35>ֺ<EFBFBD><D6BA><EFBFBD><EFBFBD>µ<EFBFBD><C2B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݡ<EFBFBD>
|
||||
2.<2E><><EFBFBD>³<EFBFBD><C2B3><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ϸ<EFBFBD><CFB8><EFBFBD><EFBFBD><EFBFBD><EFBFBD>е<EFBFBD>tick<63><6B><EFBFBD>ݺ<EFBFBD>1m<31><6D><EFBFBD><EFBFBD>
|
||||
610
2.数据下载与处理/tick数据转分钟数据脚本/tick_to_min.ipynb
Normal file
610
2.数据下载与处理/tick数据转分钟数据脚本/tick_to_min.ipynb
Normal file
@@ -0,0 +1,610 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"file_path_888 = r\"E:\\data\\data_rs_merged\\中金所\\IM888\\IM888_rs_2023.csv\"\n",
|
||||
"df_888 = pd.read_csv(file_path_888, encoding='utf-8')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>main_contract</th>\n",
|
||||
" <th>symbol</th>\n",
|
||||
" <th>datetime</th>\n",
|
||||
" <th>lastprice</th>\n",
|
||||
" <th>volume</th>\n",
|
||||
" <th>bid_p</th>\n",
|
||||
" <th>ask_p</th>\n",
|
||||
" <th>bid_v</th>\n",
|
||||
" <th>ask_v</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>IM888</td>\n",
|
||||
" <td>IM2301</td>\n",
|
||||
" <td>2023-01-03 09:30:00.200</td>\n",
|
||||
" <td>6280.0</td>\n",
|
||||
" <td>46</td>\n",
|
||||
" <td>6276.0</td>\n",
|
||||
" <td>6277.0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>IM888</td>\n",
|
||||
" <td>IM2301</td>\n",
|
||||
" <td>2023-01-03 09:30:00.700</td>\n",
|
||||
" <td>6277.0</td>\n",
|
||||
" <td>61</td>\n",
|
||||
" <td>6278.0</td>\n",
|
||||
" <td>6278.8</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>16</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>IM888</td>\n",
|
||||
" <td>IM2301</td>\n",
|
||||
" <td>2023-01-03 09:30:01.200</td>\n",
|
||||
" <td>6277.2</td>\n",
|
||||
" <td>81</td>\n",
|
||||
" <td>6277.2</td>\n",
|
||||
" <td>6278.8</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>IM888</td>\n",
|
||||
" <td>IM2301</td>\n",
|
||||
" <td>2023-01-03 09:30:01.700</td>\n",
|
||||
" <td>6277.8</td>\n",
|
||||
" <td>90</td>\n",
|
||||
" <td>6277.8</td>\n",
|
||||
" <td>6278.6</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>IM888</td>\n",
|
||||
" <td>IM2301</td>\n",
|
||||
" <td>2023-01-03 09:30:02.200</td>\n",
|
||||
" <td>6278.8</td>\n",
|
||||
" <td>112</td>\n",
|
||||
" <td>6278.8</td>\n",
|
||||
" <td>6280.0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>7</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" main_contract symbol datetime lastprice volume bid_p \\\n",
|
||||
"0 IM888 IM2301 2023-01-03 09:30:00.200 6280.0 46 6276.0 \n",
|
||||
"1 IM888 IM2301 2023-01-03 09:30:00.700 6277.0 61 6278.0 \n",
|
||||
"2 IM888 IM2301 2023-01-03 09:30:01.200 6277.2 81 6277.2 \n",
|
||||
"3 IM888 IM2301 2023-01-03 09:30:01.700 6277.8 90 6277.8 \n",
|
||||
"4 IM888 IM2301 2023-01-03 09:30:02.200 6278.8 112 6278.8 \n",
|
||||
"\n",
|
||||
" ask_p bid_v ask_v \n",
|
||||
"0 6277.0 1 3 \n",
|
||||
"1 6278.8 1 16 \n",
|
||||
"2 6278.8 1 5 \n",
|
||||
"3 6278.6 3 4 \n",
|
||||
"4 6280.0 1 7 "
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_888.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 重命名列以便处理\n",
|
||||
"# df_888.rename(columns={'时间': 'datetime', '最新': 'price', '成交量': 'volume'}, inplace=True)\n",
|
||||
"df_888.rename(columns={'datetime': 'datetime', 'lastprice': 'price', 'volume': 'volume'}, inplace=True)\n",
|
||||
"\n",
|
||||
"# 确保datetime列是datetime类型\n",
|
||||
"df_888['datetime'] = pd.to_datetime(df_888['datetime'])\n",
|
||||
"\n",
|
||||
"# 设置datetime列为索引\n",
|
||||
"df_888.set_index('datetime', inplace=True)\n",
|
||||
"\n",
|
||||
"# 使用resample方法将数据重新采样为1分钟数据\n",
|
||||
"df_resampled = df_888.resample('1T').agg({\n",
|
||||
" 'price': ['first', 'max', 'min', 'last'],\n",
|
||||
" 'volume': 'sum'\n",
|
||||
"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead tr th {\n",
|
||||
" text-align: left;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead tr:last-of-type th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr>\n",
|
||||
" <th></th>\n",
|
||||
" <th colspan=\"4\" halign=\"left\">price</th>\n",
|
||||
" <th>volume</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th></th>\n",
|
||||
" <th>first</th>\n",
|
||||
" <th>max</th>\n",
|
||||
" <th>min</th>\n",
|
||||
" <th>last</th>\n",
|
||||
" <th>sum</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>datetime</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>2023-01-03 09:30:00</th>\n",
|
||||
" <td>6280.0</td>\n",
|
||||
" <td>6306.4</td>\n",
|
||||
" <td>6277.0</td>\n",
|
||||
" <td>6302.0</td>\n",
|
||||
" <td>66894</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2023-01-03 09:31:00</th>\n",
|
||||
" <td>6302.0</td>\n",
|
||||
" <td>6320.0</td>\n",
|
||||
" <td>6302.0</td>\n",
|
||||
" <td>6318.8</td>\n",
|
||||
" <td>172512</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2023-01-03 09:32:00</th>\n",
|
||||
" <td>6319.8</td>\n",
|
||||
" <td>6328.0</td>\n",
|
||||
" <td>6314.8</td>\n",
|
||||
" <td>6314.8</td>\n",
|
||||
" <td>238716</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2023-01-03 09:33:00</th>\n",
|
||||
" <td>6313.0</td>\n",
|
||||
" <td>6325.0</td>\n",
|
||||
" <td>6310.4</td>\n",
|
||||
" <td>6312.4</td>\n",
|
||||
" <td>297675</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2023-01-03 09:34:00</th>\n",
|
||||
" <td>6311.0</td>\n",
|
||||
" <td>6323.2</td>\n",
|
||||
" <td>6311.0</td>\n",
|
||||
" <td>6319.4</td>\n",
|
||||
" <td>352184</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" price volume\n",
|
||||
" first max min last sum\n",
|
||||
"datetime \n",
|
||||
"2023-01-03 09:30:00 6280.0 6306.4 6277.0 6302.0 66894\n",
|
||||
"2023-01-03 09:31:00 6302.0 6320.0 6302.0 6318.8 172512\n",
|
||||
"2023-01-03 09:32:00 6319.8 6328.0 6314.8 6314.8 238716\n",
|
||||
"2023-01-03 09:33:00 6313.0 6325.0 6310.4 6312.4 297675\n",
|
||||
"2023-01-03 09:34:00 6311.0 6323.2 6311.0 6319.4 352184"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_resampled.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'IM888'"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_888['main_contract'][1]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# df_resampled['symbol'] = df_888['main_contract'][1]\n",
|
||||
"df_resampled.insert(0, 'symbol', df_888['main_contract'][1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead tr th {\n",
|
||||
" text-align: left;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead tr:last-of-type th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr>\n",
|
||||
" <th></th>\n",
|
||||
" <th>symbol</th>\n",
|
||||
" <th colspan=\"4\" halign=\"left\">price</th>\n",
|
||||
" <th>volume</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th>first</th>\n",
|
||||
" <th>max</th>\n",
|
||||
" <th>min</th>\n",
|
||||
" <th>last</th>\n",
|
||||
" <th>sum</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>datetime</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>2023-01-03 09:30:00</th>\n",
|
||||
" <td>IM888</td>\n",
|
||||
" <td>6280.0</td>\n",
|
||||
" <td>6306.4</td>\n",
|
||||
" <td>6277.0</td>\n",
|
||||
" <td>6302.0</td>\n",
|
||||
" <td>66894</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2023-01-03 09:31:00</th>\n",
|
||||
" <td>IM888</td>\n",
|
||||
" <td>6302.0</td>\n",
|
||||
" <td>6320.0</td>\n",
|
||||
" <td>6302.0</td>\n",
|
||||
" <td>6318.8</td>\n",
|
||||
" <td>172512</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2023-01-03 09:32:00</th>\n",
|
||||
" <td>IM888</td>\n",
|
||||
" <td>6319.8</td>\n",
|
||||
" <td>6328.0</td>\n",
|
||||
" <td>6314.8</td>\n",
|
||||
" <td>6314.8</td>\n",
|
||||
" <td>238716</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2023-01-03 09:33:00</th>\n",
|
||||
" <td>IM888</td>\n",
|
||||
" <td>6313.0</td>\n",
|
||||
" <td>6325.0</td>\n",
|
||||
" <td>6310.4</td>\n",
|
||||
" <td>6312.4</td>\n",
|
||||
" <td>297675</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2023-01-03 09:34:00</th>\n",
|
||||
" <td>IM888</td>\n",
|
||||
" <td>6311.0</td>\n",
|
||||
" <td>6323.2</td>\n",
|
||||
" <td>6311.0</td>\n",
|
||||
" <td>6319.4</td>\n",
|
||||
" <td>352184</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" symbol price volume\n",
|
||||
" first max min last sum\n",
|
||||
"datetime \n",
|
||||
"2023-01-03 09:30:00 IM888 6280.0 6306.4 6277.0 6302.0 66894\n",
|
||||
"2023-01-03 09:31:00 IM888 6302.0 6320.0 6302.0 6318.8 172512\n",
|
||||
"2023-01-03 09:32:00 IM888 6319.8 6328.0 6314.8 6314.8 238716\n",
|
||||
"2023-01-03 09:33:00 IM888 6313.0 6325.0 6310.4 6312.4 297675\n",
|
||||
"2023-01-03 09:34:00 IM888 6311.0 6323.2 6311.0 6319.4 352184"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_resampled.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 重命名列名以符合K线数据的标准命名\n",
|
||||
"df_resampled.columns = ['open', 'high', 'low', 'close', 'volume', 'symbol']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>open</th>\n",
|
||||
" <th>high</th>\n",
|
||||
" <th>low</th>\n",
|
||||
" <th>close</th>\n",
|
||||
" <th>volume</th>\n",
|
||||
" <th>symbol</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>datetime</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>2023-01-03 09:30:00</th>\n",
|
||||
" <td>IM888</td>\n",
|
||||
" <td>6280.0</td>\n",
|
||||
" <td>6306.4</td>\n",
|
||||
" <td>6277.0</td>\n",
|
||||
" <td>6302.0</td>\n",
|
||||
" <td>66894</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2023-01-03 09:31:00</th>\n",
|
||||
" <td>IM888</td>\n",
|
||||
" <td>6302.0</td>\n",
|
||||
" <td>6320.0</td>\n",
|
||||
" <td>6302.0</td>\n",
|
||||
" <td>6318.8</td>\n",
|
||||
" <td>172512</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2023-01-03 09:32:00</th>\n",
|
||||
" <td>IM888</td>\n",
|
||||
" <td>6319.8</td>\n",
|
||||
" <td>6328.0</td>\n",
|
||||
" <td>6314.8</td>\n",
|
||||
" <td>6314.8</td>\n",
|
||||
" <td>238716</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2023-01-03 09:33:00</th>\n",
|
||||
" <td>IM888</td>\n",
|
||||
" <td>6313.0</td>\n",
|
||||
" <td>6325.0</td>\n",
|
||||
" <td>6310.4</td>\n",
|
||||
" <td>6312.4</td>\n",
|
||||
" <td>297675</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2023-01-03 09:34:00</th>\n",
|
||||
" <td>IM888</td>\n",
|
||||
" <td>6311.0</td>\n",
|
||||
" <td>6323.2</td>\n",
|
||||
" <td>6311.0</td>\n",
|
||||
" <td>6319.4</td>\n",
|
||||
" <td>352184</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" open high low close volume symbol\n",
|
||||
"datetime \n",
|
||||
"2023-01-03 09:30:00 IM888 6280.0 6306.4 6277.0 6302.0 66894\n",
|
||||
"2023-01-03 09:31:00 IM888 6302.0 6320.0 6302.0 6318.8 172512\n",
|
||||
"2023-01-03 09:32:00 IM888 6319.8 6328.0 6314.8 6314.8 238716\n",
|
||||
"2023-01-03 09:33:00 IM888 6313.0 6325.0 6310.4 6312.4 297675\n",
|
||||
"2023-01-03 09:34:00 IM888 6311.0 6323.2 6311.0 6319.4 352184"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_resampled.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1分钟历史数据已保存至E:\\data\\data_rs_merged\\中金所\\IM888\\IM888_rs_2023_1min.csv\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 删除存在NA值的行(如果有的时间段没有交易数据)\n",
|
||||
"df_resampled.dropna(inplace=True)\n",
|
||||
"# df_resampled['symbol'] = df_888['统一代码']\n",
|
||||
"# df_resampled.insert(loc=0, column='main_contract', value=df_888['main_contract'])\n",
|
||||
"# df_resampled['symbol'] = df_888['main_contract']\n",
|
||||
"# 将重新采样的数据写入新的CSV文件\n",
|
||||
"output_file = r\"E:\\data\\data_rs_merged\\中金所\\IM888\\IM888_rs_2023_1min.csv\"\n",
|
||||
"df_resampled.to_csv(output_file)\n",
|
||||
"\n",
|
||||
"print(f'1分钟历史数据已保存至{output_file}')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
33
2.数据下载与处理/tick数据转分钟数据脚本/tick_to_min.py
Normal file
33
2.数据下载与处理/tick数据转分钟数据脚本/tick_to_min.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import pandas as pd
|
||||
|
||||
# 读取上传的CSV文件
|
||||
file_path = 'C:/Users/zhouj/Desktop/a次主力连续_20190103.csv'
|
||||
df = pd.read_csv(file_path, encoding='gbk')
|
||||
|
||||
# 重命名列以便处理
|
||||
df.rename(columns={'时间': 'datetime', '最新': 'price', '成交量': 'volume'}, inplace=True)
|
||||
|
||||
# 确保datetime列是datetime类型
|
||||
df['datetime'] = pd.to_datetime(df['datetime'])
|
||||
|
||||
# 设置datetime列为索引
|
||||
df.set_index('datetime', inplace=True)
|
||||
|
||||
# 使用resample方法将数据重新采样为1分钟数据
|
||||
df_resampled = df.resample('1T').agg({
|
||||
'price': ['first', 'max', 'min', 'last'],
|
||||
'volume': 'sum'
|
||||
})
|
||||
|
||||
# 重命名列名以符合K线数据的标准命名
|
||||
df_resampled.columns = ['open', 'high', 'low', 'close', 'volume']
|
||||
|
||||
# 删除存在NA值的行(如果有的时间段没有交易数据)
|
||||
df_resampled.dropna(inplace=True)
|
||||
|
||||
# 将重新采样的数据写入新的CSV文件
|
||||
output_file = 'C:/Users/zhouj/Desktop/tic_data_1min.csv'
|
||||
df_resampled.to_csv(output_file)
|
||||
|
||||
print(f'1分钟历史数据已保存至{output_file}')
|
||||
|
||||
135
2.数据下载与处理/update_data._2py.py
Normal file
135
2.数据下载与处理/update_data._2py.py
Normal file
@@ -0,0 +1,135 @@
|
||||
from multiprocessing import Process
|
||||
from datetime import datetime
|
||||
|
||||
from vnpy.trader.database import BarOverview
|
||||
from vnpy.trader.datafeed import get_datafeed
|
||||
from vnpy.trader.database import get_database
|
||||
from vnpy.trader.object import BarData, HistoryRequest
|
||||
from vnpy.trader.constant import Exchange, Interval
|
||||
|
||||
import re
|
||||
|
||||
# 交易所映射关系
|
||||
EXCHANGE_XT2VT = {
|
||||
"SH": Exchange.SSE,
|
||||
"SZ": Exchange.SZSE,
|
||||
"BJ": Exchange.BSE,
|
||||
"SF": Exchange.SHFE,
|
||||
"IF": Exchange.CFFEX,
|
||||
"INE": Exchange.INE,
|
||||
"DF": Exchange.DCE,
|
||||
"ZF": Exchange.CZCE,
|
||||
"GF": Exchange.GFEX
|
||||
}
|
||||
|
||||
# 开始查询时间
|
||||
START_TIME = datetime(2018, 1, 1)
|
||||
|
||||
|
||||
def update_history_data() -> None:
|
||||
"""更新历史合约信息"""
|
||||
# 在子进程中加载xtquant
|
||||
from xtquant.xtdata import download_history_data
|
||||
|
||||
# 初始化数据服务
|
||||
datafeed = get_datafeed()
|
||||
datafeed.init()
|
||||
|
||||
# 下载历史合约信息
|
||||
download_history_data("", "historycontract")
|
||||
|
||||
print("xtquant历史合约信息下载完成")
|
||||
|
||||
|
||||
def update_bar_data(
|
||||
sector_name: str,
|
||||
interval: Interval = Interval.MINUTE
|
||||
) -> None:
|
||||
"""更新K线数据"""
|
||||
# 在子进程中加载xtquant
|
||||
from xtquant.xtdata import (
|
||||
get_stock_list_in_sector,
|
||||
get_instrument_detail
|
||||
)
|
||||
|
||||
# 初始化数据服务
|
||||
datafeed = get_datafeed()
|
||||
datafeed.init()
|
||||
|
||||
# 连接数据库
|
||||
database = get_database()
|
||||
|
||||
# 获取当前时间戳
|
||||
now: datetime = datetime.now()
|
||||
|
||||
# 获取本地已有数据汇总
|
||||
data: list[BarOverview] = database.get_bar_overview()
|
||||
|
||||
overviews: dict[str, BarOverview] = {}
|
||||
for o in data:
|
||||
vt_symbol: str = f"{o.symbol}.{o.exchange.value}"
|
||||
overviews[vt_symbol] = o
|
||||
|
||||
# 查询交易所历史合约代码
|
||||
xt_symbols: list[str] = get_stock_list_in_sector(sector_name)
|
||||
|
||||
# 遍历列表查询合约信息
|
||||
for xt_symbol in xt_symbols:
|
||||
# 查询合约信息
|
||||
data: dict = get_instrument_detail(xt_symbol, True)
|
||||
|
||||
# 获取合约到期时间
|
||||
expiry: datetime = None
|
||||
if data["ExpireDate"]:
|
||||
expiry = datetime.strptime(data["ExpireDate"], "%Y%m%d")
|
||||
|
||||
# 拆分迅投研代码
|
||||
symbol, xt_exchange = xt_symbol.split(".")
|
||||
|
||||
symbol_main = re.split(r'(\d+)', symbol)[0]
|
||||
|
||||
# 生成本地代码
|
||||
exchange: Exchange = EXCHANGE_XT2VT[xt_exchange]
|
||||
vt_symbol: str = f"{symbol_main}+'JQ00'.{exchange.value}" or f"{symbol_main}+'00'.{exchange.value}"
|
||||
|
||||
# 查询数据汇总
|
||||
overview: BarOverview = overviews.get(vt_symbol, None)
|
||||
|
||||
# 如果已经到期,则跳过
|
||||
if overview and expiry and expiry < now:
|
||||
continue
|
||||
|
||||
# 实现增量查询
|
||||
start: datetime = START_TIME
|
||||
if overview:
|
||||
start = overview.end
|
||||
|
||||
# 执行数据查询和更新入库
|
||||
req: HistoryRequest = HistoryRequest(
|
||||
symbol=symbol,
|
||||
exchange=exchange,
|
||||
start=start,
|
||||
end=now,
|
||||
interval=interval
|
||||
)
|
||||
|
||||
bars: list[BarData] = datafeed.query_bar_history(req)
|
||||
|
||||
if bars:
|
||||
database.save_bar_data(bars)
|
||||
|
||||
start_dt: datetime = bars[0].datetime
|
||||
end_dt: datetime = bars[-1].datetime
|
||||
msg: str = f"{vt_symbol}数据更新成功,{start_dt} - {end_dt}"
|
||||
print(msg)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 使用子进程更新历史合约信息
|
||||
process: Process = Process(target=update_history_data)
|
||||
process.start()
|
||||
process.join() # 等待子进程执行完成
|
||||
|
||||
# 更新历史数据
|
||||
update_bar_data("上期所")
|
||||
update_bar_data("过期上期所")
|
||||
184
2.数据下载与处理/update_data.py
Normal file
184
2.数据下载与处理/update_data.py
Normal file
@@ -0,0 +1,184 @@
|
||||
from multiprocessing import Process
|
||||
from datetime import datetime
|
||||
|
||||
from vnpy.trader.database import BarOverview
|
||||
from vnpy.trader.datafeed import get_datafeed
|
||||
from vnpy.trader.object import ContractData, BarData, HistoryRequest
|
||||
from vnpy.trader.constant import Exchange, Product, OptionType, Interval
|
||||
from vnpy.trader.setting import SETTINGS
|
||||
|
||||
from elite_database import EliteDatabase
|
||||
|
||||
|
||||
# 配置迅投研数据服务
|
||||
SETTINGS["datafeed.name"] = "xt"
|
||||
SETTINGS["datafeed.username"] = "token"
|
||||
SETTINGS["datafeed.password"] = ""
|
||||
|
||||
|
||||
# 交易所映射关系
|
||||
EXCHANGE_XT2VT = {
|
||||
"SH": Exchange.SSE,
|
||||
"SZ": Exchange.SZSE,
|
||||
"BJ": Exchange.BSE,
|
||||
"SF": Exchange.SHFE,
|
||||
"IF": Exchange.CFFEX,
|
||||
"INE": Exchange.INE,
|
||||
"DF": Exchange.DCE,
|
||||
"ZF": Exchange.CZCE,
|
||||
"GF": Exchange.GFEX
|
||||
}
|
||||
|
||||
|
||||
def update_history_data() -> None:
|
||||
"""更新历史合约信息"""
|
||||
# 在子进程中加载xtquant
|
||||
from xtquant.xtdata import download_history_data
|
||||
|
||||
# 初始化数据服务
|
||||
datafeed = get_datafeed()
|
||||
datafeed.init()
|
||||
|
||||
# 下载历史合约信息
|
||||
download_history_data("", "historycontract")
|
||||
|
||||
print("xtquant历史合约信息下载完成")
|
||||
|
||||
|
||||
def update_contract_data(sector_name: str) -> None:
|
||||
"""更新合约数据"""
|
||||
# 在子进程中加载xtquant
|
||||
from xtquant.xtdata import (
|
||||
get_stock_list_in_sector,
|
||||
get_instrument_detail
|
||||
)
|
||||
|
||||
# 初始化数据服务
|
||||
datafeed = get_datafeed()
|
||||
datafeed.init()
|
||||
|
||||
# 查询中金所历史合约代码
|
||||
vt_symbols: list[str] = get_stock_list_in_sector(sector_name)
|
||||
|
||||
# 遍历列表查询合约信息
|
||||
contracts: list[ContractData] = []
|
||||
|
||||
for xt_symbol in vt_symbols:
|
||||
# 拆分XT代码
|
||||
symbol, xt_exchange = xt_symbol.split(".")
|
||||
|
||||
# 筛选期权合约合约
|
||||
if "-" in symbol:
|
||||
data: dict = get_instrument_detail(xt_symbol, True)
|
||||
|
||||
type_str = data["InstrumentID"].split("-")[1]
|
||||
if type_str == "C":
|
||||
option_type = OptionType.CALL
|
||||
elif type_str == "P":
|
||||
option_type = OptionType.PUT
|
||||
|
||||
option_underlying: str = data["InstrumentID"].split("-")[0]
|
||||
|
||||
contract: ContractData = ContractData(
|
||||
symbol=data["InstrumentID"],
|
||||
exchange=EXCHANGE_XT2VT[xt_exchange.replace("O", "")],
|
||||
name=data["InstrumentName"],
|
||||
product=Product.OPTION,
|
||||
size=data["VolumeMultiple"],
|
||||
pricetick=data["PriceTick"],
|
||||
min_volume=data["MinLimitOrderVolume"],
|
||||
option_strike=data["ExtendInfo"]["OptExercisePrice"],
|
||||
option_listed=datetime.strptime(data["OpenDate"], "%Y%m%d"),
|
||||
option_expiry=datetime.strptime(data["ExpireDate"], "%Y%m%d"),
|
||||
option_underlying=option_underlying,
|
||||
option_portfolio=data["ProductID"],
|
||||
option_index=str(data["ExtendInfo"]["OptExercisePrice"]),
|
||||
option_type=option_type,
|
||||
gateway_name="XT"
|
||||
)
|
||||
contracts.append(contract)
|
||||
|
||||
# 保存合约信息到数据库
|
||||
database: EliteDatabase = EliteDatabase()
|
||||
database.save_contract_data(contracts)
|
||||
|
||||
print("合约信息更新成功", len(contracts))
|
||||
|
||||
|
||||
def update_bar_data() -> None:
|
||||
"""更新K线数据"""
|
||||
# 初始化数据服务
|
||||
datafeed = get_datafeed()
|
||||
datafeed.init()
|
||||
|
||||
# 获取当前时间戳
|
||||
now: datetime = datetime.now()
|
||||
|
||||
# 获取合约信息
|
||||
database: EliteDatabase = EliteDatabase()
|
||||
contracts: list[ContractData] = database.load_contract_data()
|
||||
|
||||
# 获取数据汇总
|
||||
data: list[BarOverview] = database.get_bar_overview()
|
||||
|
||||
overviews: dict[str, BarOverview] = {}
|
||||
for o in data:
|
||||
# 只保留分钟线数据
|
||||
if o.interval != Interval.MINUTE:
|
||||
continue
|
||||
|
||||
vt_symbol: str = f"{o.symbol}.{o.exchange.value}"
|
||||
overviews[vt_symbol] = o
|
||||
|
||||
# 遍历所有合约信息
|
||||
for contract in contracts:
|
||||
# 如果没有到期时间,则跳过
|
||||
if not contract.option_expiry:
|
||||
continue
|
||||
|
||||
# 查询数据汇总
|
||||
overview: BarOverview = overviews.get(contract.vt_symbol, None)
|
||||
|
||||
# 如果已经到期,则跳过
|
||||
if overview and contract.option_expiry < now:
|
||||
continue
|
||||
|
||||
# 初始化查询开始的时间
|
||||
start: datetime = datetime(2018, 1, 1)
|
||||
|
||||
# 实现增量查询
|
||||
if overview:
|
||||
start = overview.end
|
||||
|
||||
# 执行数据查询和更新入库
|
||||
req: HistoryRequest = HistoryRequest(
|
||||
symbol=contract.symbol,
|
||||
exchange=contract.exchange,
|
||||
start=start,
|
||||
end=datetime.now(),
|
||||
interval=Interval.MINUTE
|
||||
)
|
||||
|
||||
bars: list[BarData] = datafeed.query_bar_history(req)
|
||||
|
||||
if bars:
|
||||
database.save_bar_data(bars)
|
||||
|
||||
start_dt: datetime = bars[0].datetime
|
||||
end_dt: datetime = bars[-1].datetime
|
||||
msg: str = f"{contract.vt_symbol}数据更新成功,{start_dt} - {end_dt}"
|
||||
print(msg)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 使用子进程更新历史合约信息
|
||||
process: Process = Process(target=update_history_data)
|
||||
process.start()
|
||||
process.join() # 等待子进程执行完成
|
||||
|
||||
# 更新合约信息
|
||||
update_contract_data("中金所")
|
||||
update_contract_data("过期中金所")
|
||||
|
||||
# 更新历史数据
|
||||
# update_bar_data()
|
||||
594
2.数据下载与处理/数据转换最终版/merged_by_year_20240510.ipynb
Normal file
594
2.数据下载与处理/数据转换最终版/merged_by_year_20240510.ipynb
Normal file
File diff suppressed because one or more lines are too long
344
2.数据下载与处理/数据转换最终版/merged_by_year_20240724.ipynb
Normal file
344
2.数据下载与处理/数据转换最终版/merged_by_year_20240724.ipynb
Normal file
File diff suppressed because one or more lines are too long
444
2.数据下载与处理/数据转换最终版/merged_by_year_BIT_20240522.ipynb
Normal file
444
2.数据下载与处理/数据转换最终版/merged_by_year_BIT_20240522.ipynb
Normal file
File diff suppressed because one or more lines are too long
342
2.数据下载与处理/数据转换最终版/merged_tickdata_20240510.py
Normal file
342
2.数据下载与处理/数据转换最终版/merged_tickdata_20240510.py
Normal file
@@ -0,0 +1,342 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
from datetime import time as s_time
|
||||
from datetime import datetime
|
||||
import chardet
|
||||
import numpy as np
|
||||
|
||||
# 日盘商品期货交易品种
|
||||
commodity_day_dict = {'bb': s_time(15,00), 'jd': s_time(15,00), 'lh': s_time(15,00), 'l': s_time(15,00), 'fb': s_time(15,00), 'ec': s_time(15,00),
|
||||
'AP': s_time(15,00), 'CJ': s_time(15,00), 'JR': s_time(15,00), 'LR': s_time(15,00), 'RS': s_time(15,00), 'PK': s_time(15,00),
|
||||
'PM': s_time(15,00), 'PX': s_time(15,00), 'RI': s_time(15,00), 'SF': s_time(15,00), 'SM': s_time(15,00), 'UR': s_time(15,00),
|
||||
'WH': s_time(15,00), 'ao': s_time(15,00), 'br': s_time(15,00), 'wr': s_time(15,00),}
|
||||
|
||||
# 夜盘商品期货交易品种
|
||||
commodity_night_dict = {'sc': s_time(2,30), 'bc': s_time(1,0), 'lu': s_time(23,0), 'nr': s_time(23,0),'au': s_time(2,30), 'ag': s_time(2,30),
|
||||
'ss': s_time(1,0), 'sn': s_time(1,0), 'ni': s_time(1,0), 'pb': s_time(1,0),'zn': s_time(1,0), 'al': s_time(1,0), 'cu': s_time(1,0),
|
||||
'ru': s_time(23,0), 'rb': s_time(23,0), 'hc': s_time(23,0), 'fu': s_time(23,0), 'bu': s_time(23,0), 'sp': s_time(23,0),
|
||||
'PF': s_time(23,0), 'SR': s_time(23,0), 'CF': s_time(23,0), 'CY': s_time(23,0), 'RM': s_time(23,0), 'MA': s_time(23,0),
|
||||
'TA': s_time(23,0), 'ZC': s_time(23,0), 'FG': s_time(23,0), 'OI': s_time(23,0), 'SA': s_time(23,0),
|
||||
'p': s_time(23,0), 'j': s_time(23,0), 'jm': s_time(23,0), 'i': s_time(23,0), 'l': s_time(23,0), 'v': s_time(23,0),
|
||||
'pp': s_time(23,0), 'eg': s_time(23,0), 'c': s_time(23,0), 'cs': s_time(23,0), 'y': s_time(23,0), 'm': s_time(23,0),
|
||||
'a': s_time(23,0), 'b': s_time(23,0), 'rr': s_time(23,0), 'eb': s_time(23,0), 'pg': s_time(23,0), 'SH': s_time(23,00)}
|
||||
|
||||
# 金融期货交易品种
|
||||
financial_time_dict = {'IH': s_time(15,00), 'IF': s_time(15,00), 'IC': s_time(15,00), 'IM': s_time(15,00),'T': s_time(15,00), 'TS': s_time(15,00),
|
||||
'TF': s_time(15,00), 'TL': s_time(15,00)}
|
||||
|
||||
# 所有已列入的筛选品种
|
||||
all_dict = {k: v for d in [commodity_day_dict, commodity_night_dict, financial_time_dict] for k, v in d.items()}
|
||||
|
||||
def split_alpha_numeric(string):
|
||||
alpha_chars = ""
|
||||
numeric_chars = ""
|
||||
for char in string:
|
||||
if char.isalpha():
|
||||
alpha_chars += char
|
||||
elif char.isdigit():
|
||||
numeric_chars += char
|
||||
return alpha_chars, numeric_chars
|
||||
|
||||
def merged_old_tickdata(merged_up_df, sp_char, alpha_chars, code_value):
|
||||
# merged_up_df = pd.DataFrame()
|
||||
# merged_up_df,alpha_chars,code_value = merged_old_unprocessed_tickdata(all_csv_files, sp_char)
|
||||
|
||||
while alpha_chars not in all_dict.keys():
|
||||
print("%s期货品种未列入所有筛选条件中!!!"%(code_value))
|
||||
continue
|
||||
|
||||
merged_df = pd.DataFrame()
|
||||
|
||||
merged_df =pd.DataFrame({'main_contract':merged_up_df['统一代码'],'symbol':merged_up_df['合约代码'],'datetime':merged_up_df['时间'],'lastprice':merged_up_df['最新'],'volume':merged_up_df['成交量'],
|
||||
'bid_p':merged_up_df['买一价'],'ask_p':merged_up_df['卖一价'],'bid_v':merged_up_df['买一量'],'ask_v':merged_up_df['卖一量']})
|
||||
|
||||
del merged_up_df
|
||||
|
||||
merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])
|
||||
merged_df['tmp_time'] = merged_df['datetime'].dt.strftime('%H:%M:%S.%f')
|
||||
merged_df['time'] = merged_df['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time
|
||||
del merged_df['tmp_time']
|
||||
|
||||
merged_df = filter_tickdata_time(merged_df, alpha_chars)
|
||||
del merged_df['time']
|
||||
merged_df['datetime'] = sorted(merged_df['datetime'])
|
||||
print("%s%s数据生成成功!"%(code_value,sp_char))
|
||||
|
||||
return merged_df
|
||||
|
||||
def merged_new_tickdata(merged_up_df, sp_char, alpha_chars, code_value):
|
||||
# merged_up_df = pd.DataFrame()
|
||||
# merged_up_df,alpha_chars,code_value = merged_new_unprocessed_tickdata(all_csv_files, sp_char)
|
||||
|
||||
while alpha_chars not in all_dict.keys():
|
||||
print("%s期货品种未列入所有筛选条件中!!!"%(code_value))
|
||||
continue
|
||||
|
||||
#日期修正
|
||||
# merged_df['业务日期'] = pd.to_datetime(merged_df['业务日期'])
|
||||
# merged_df['业务日期'] = merged_df['业务日期'].dt.strftime('%Y-%m-%d')
|
||||
# merged_df['最后修改时间'] = pd.to_datetime(merged_df['最后修改时间'])
|
||||
merged_up_df['datetime'] = merged_up_df['业务日期'].astype(str) + ' '+merged_up_df['最后修改时间'].astype(str) + '.' + merged_up_df['最后修改毫秒'].astype(str) # merged_df['最后修改时间'].dt.time.astype(str)
|
||||
# 将'datetime' 列的数据类型更改为 datetime 格式,如果数据转换少8个小时,可以用timedelta处理
|
||||
merged_up_df['datetime'] = pd.to_datetime(merged_up_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
|
||||
#计算瞬时成交量
|
||||
merged_up_df['volume'] = merged_up_df['数量'] - merged_up_df['数量'].shift(1)
|
||||
merged_up_df['volume'] = merged_up_df['volume'].fillna(0)
|
||||
|
||||
merged_df = pd.DataFrame()
|
||||
|
||||
merged_df =pd.DataFrame({'main_contract':merged_up_df['统一代码'],'symbol':merged_up_df['合约代码'],'datetime':merged_up_df['datetime'],'lastprice':merged_up_df['最新价'],'volume':merged_up_df['volume'],
|
||||
'bid_p':merged_up_df['申买价一'],'ask_p':merged_up_df['申卖价一'],'bid_v':merged_up_df['申买量一'],'ask_v':merged_up_df['申卖量一']})
|
||||
|
||||
del merged_up_df
|
||||
|
||||
# merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])
|
||||
merged_df['tmp_time'] = merged_df['datetime'].dt.strftime('%H:%M:%S.%f')
|
||||
merged_df['time'] = merged_df['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time
|
||||
del merged_df['tmp_time']
|
||||
|
||||
merged_df = filter_tickdata_time(merged_df, alpha_chars)
|
||||
|
||||
del merged_df['time']
|
||||
# merged_df['datetime'] = sorted(merged_df['datetime'])
|
||||
sorted_merged_df = merged_df.sort_values(by = ['datetime'], inplace=True)
|
||||
print("%s%s数据生成成功!"%(code_value,sp_char))
|
||||
|
||||
return merged_df
|
||||
|
||||
def filter_tickdata_time(filter_df, alpha_chars):
|
||||
|
||||
if alpha_chars in financial_time_dict.keys():
|
||||
drop_index1 = pd.DataFrame().index
|
||||
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 0, 0, 000000))].index
|
||||
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 30, 0, 000000))].index
|
||||
drop_index4 = pd.DataFrame().index
|
||||
print("按照中金所交易时间筛选金融期货品种")
|
||||
|
||||
elif alpha_chars in commodity_night_dict.keys():
|
||||
if commodity_night_dict[alpha_chars] == s_time(23,00):
|
||||
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
|
||||
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
|
||||
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
|
||||
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(23, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 0, 0, 000000))].index
|
||||
print("按照夜盘截止交易时间为23:00筛选商品期货品种")
|
||||
|
||||
elif commodity_night_dict[alpha_chars] == s_time(1,00):
|
||||
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
|
||||
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
|
||||
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
|
||||
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(1, 0, 0, 000000)) & (filter_df['time'] < s_time(9, 0, 0, 000000))].index
|
||||
print("按照夜盘截止交易时间为1:00筛选商品期货品种")
|
||||
|
||||
elif commodity_night_dict[alpha_chars] == s_time(2,30):
|
||||
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
|
||||
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
|
||||
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
|
||||
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(2, 30, 0, 000000)) & (filter_df['time'] < s_time(9, 0, 0, 000000))].index
|
||||
print("按照夜盘截止交易时间为2:30筛选商品期货品种")
|
||||
|
||||
else:
|
||||
print("夜盘截止交易时间未设置或者设置错误!!!")
|
||||
|
||||
elif alpha_chars in commodity_day_dict.keys():
|
||||
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
|
||||
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
|
||||
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 0, 0, 000000))].index
|
||||
drop_index4 = pd.DataFrame().index
|
||||
print("按照无夜盘筛选商品期货品种")
|
||||
|
||||
else:
|
||||
print("%s期货品种未执行时间筛选中!!!"%(alpha_chars))
|
||||
# 清理不在交易时间段的数据
|
||||
|
||||
# 数据清理
|
||||
filter_df.drop(labels=drop_index1, axis=0, inplace=True)
|
||||
filter_df.drop(drop_index2, axis=0, inplace=True)
|
||||
filter_df.drop(drop_index3, axis=0, inplace=True)
|
||||
filter_df.drop(drop_index4, axis=0, inplace=True)
|
||||
|
||||
return filter_df
|
||||
|
||||
def insert_main_contract(df):
|
||||
# 添加主力连续的合约代码,主力连续为888,指数连续可以用999,次主力连续可以使用889,表头用“统一代码”
|
||||
alpha_chars, numeric_chars = split_alpha_numeric(df.loc[0,'合约代码'])
|
||||
code_value = alpha_chars + "889"
|
||||
print("code_value characters:", code_value)
|
||||
df.insert(loc=0,column="统一代码", value=code_value)
|
||||
|
||||
return df, alpha_chars, code_value
|
||||
|
||||
def merged_old_unprocessed_tickdata(all_csv_files, sp_char):
|
||||
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
|
||||
print("csv_files:", csv_files)
|
||||
merged_up_df = pd.DataFrame()
|
||||
dir = os.getcwd()
|
||||
fileNum_errors = 0
|
||||
|
||||
# 循环遍历每个csv文件
|
||||
for file in csv_files:
|
||||
try:
|
||||
# 读取csv文件,并使用第一行为列标题,编译不通过可以改为gbk
|
||||
df = pd.read_csv(file,
|
||||
header=0,
|
||||
# usecols=[ 1, 2, 3, 7, 12, 13, 14, 15],
|
||||
# names=[
|
||||
# "合约代码",
|
||||
# "时间",
|
||||
# "最新",
|
||||
# "成交量",
|
||||
# "买一价",
|
||||
# "卖一价",
|
||||
# "买一量",
|
||||
# "卖一量",
|
||||
# ],
|
||||
encoding='gbk',
|
||||
low_memory= False,
|
||||
# skiprows=0,
|
||||
# parse_dates=['时间'] # 注意此处增加的排序,为了后面按时间排序
|
||||
)
|
||||
except:
|
||||
file_path = os.path.join(dir, file)
|
||||
fileNum_errors += 1
|
||||
with open(file_path, 'rb') as file:
|
||||
data = file.read()
|
||||
|
||||
# 使用chardet检测编码
|
||||
detected_encoding = chardet.detect(data)['encoding']
|
||||
# print("%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(file,detected_encoding,fileNum_errors))
|
||||
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors))
|
||||
|
||||
with open('output_error.txt', 'a') as f:
|
||||
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors), file = f)
|
||||
|
||||
|
||||
# 删除重复行
|
||||
df.drop_duplicates(inplace=True)
|
||||
# 将数据合并到新的DataFrame中
|
||||
merged_up_df = pd.concat([merged_up_df, df], ignore_index=True)
|
||||
|
||||
# 删除重复列
|
||||
merged_up_df.drop_duplicates(subset=merged_up_df.columns.tolist(), inplace=True)
|
||||
# 重置行索引
|
||||
merged_up_df.reset_index(inplace=True, drop=True)
|
||||
|
||||
merged_up_df,alpha_chars,code_value = insert_main_contract(merged_up_df)
|
||||
# 打印提示信息
|
||||
# print("按年份未处理的CSV文件合并成功!")
|
||||
|
||||
return merged_up_df,alpha_chars,code_value
|
||||
|
||||
def merged_new_unprocessed_tickdata(all_csv_files, sp_char):
|
||||
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
|
||||
print("csv_files:", csv_files)
|
||||
merged_up_df = pd.DataFrame()
|
||||
dir = os.getcwd()
|
||||
fileNum_errors = 0
|
||||
|
||||
# 循环遍历每个csv文件
|
||||
for file in csv_files:
|
||||
try:
|
||||
# 读取csv文件,并使用第一行为列标题,编译不通过可以改为gbk
|
||||
df = pd.read_csv(
|
||||
file,
|
||||
header=0,
|
||||
# usecols=[0, 1, 4, 11, 20, 21, 22, 23, 24, 25, 43],
|
||||
# names=[
|
||||
# "交易日",
|
||||
# "合约代码",
|
||||
# "最新价",
|
||||
# "数量",
|
||||
# "最后修改时间",
|
||||
# "最后修改毫秒",
|
||||
# "申买价一",
|
||||
# "申买量一",
|
||||
# "申卖价一",
|
||||
# "申卖量一",
|
||||
# "业务日期",
|
||||
# ],
|
||||
encoding='gbk',
|
||||
low_memory= False,
|
||||
# skiprows=0,
|
||||
# parse_dates=['业务日期','最后修改时间','最后修改毫秒'] # 注意此处增加的排序,为了后面按时间排序
|
||||
)
|
||||
except:
|
||||
file_path = os.path.join(dir, file)
|
||||
fileNum_errors += 1
|
||||
with open(file_path, 'rb') as file:
|
||||
data = file.read()
|
||||
|
||||
# 使用chardet检测编码
|
||||
detected_encoding = chardet.detect(data)['encoding']
|
||||
# print("%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(file_path,detected_encoding,fileNum_errors))
|
||||
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors))
|
||||
|
||||
|
||||
with open('output_error.txt', 'a') as f:
|
||||
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors), file = f)
|
||||
|
||||
# 删除重复行
|
||||
df.drop_duplicates(inplace=True)
|
||||
# 将数据合并到新的DataFrame中
|
||||
merged_up_df = pd.concat([merged_up_df, df], ignore_index=True)
|
||||
|
||||
# 删除重复列
|
||||
merged_up_df.drop_duplicates(subset=merged_up_df.columns.tolist(), inplace=True)
|
||||
# 重置行索引
|
||||
merged_up_df.reset_index(inplace=True, drop=True)
|
||||
|
||||
merged_up_df,alpha_chars,code_value = insert_main_contract(merged_up_df)
|
||||
# 打印提示信息
|
||||
# print("按年份未处理的CSV文件合并成功!")
|
||||
|
||||
return merged_up_df,alpha_chars,code_value
|
||||
|
||||
def reinstatement_tickdata(merged_rs_df):
|
||||
merged_rs_df['main_contract'] = merged_rs_df['main_contract'].astype(str)
|
||||
merged_rs_df['symbol'] = merged_rs_df['symbol'].astype(str)
|
||||
merged_rs_df['datetime'] = pd.to_datetime(merged_rs_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
|
||||
# merged_rs_df['lastprice'] = merged_rs_df['lastprice'].astype(float)
|
||||
merged_rs_df['volume'] = merged_rs_df['volume'].astype(int)
|
||||
# merged_rs_df['bid_p'] = merged_rs_df['bid_p'].astype(float)
|
||||
# merged_rs_df['ask_p'] = merged_rs_df['ask_p'].astype(float)
|
||||
merged_rs_df['bid_v'] = merged_rs_df['bid_v'].astype(int)
|
||||
merged_rs_df['ask_v'] = merged_rs_df['ask_v'].astype(int)
|
||||
|
||||
# 等比复权,先不考虑
|
||||
# df['复权因子'] = df['卖一价'].shift() / df['买一价']
|
||||
# df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() / df['买一价'], 1)
|
||||
# df['复权因子'] = df['复权因子'].fillna(1)
|
||||
# df['买一价_adj'] = df['买一价'] * df['复权因子'].cumprod()
|
||||
# df['卖一价_adj'] = df['卖一价'] * df['复权因子'].cumprod()
|
||||
# df['最新_adj'] = df['最新'] * df['复权因子'].cumprod()
|
||||
|
||||
# 等差复权
|
||||
merged_rs_df['复权因子'] = np.where(merged_rs_df['symbol'] != merged_rs_df['symbol'].shift(), merged_rs_df['ask_p'].shift() - merged_rs_df['bid_p'], 0)
|
||||
merged_rs_df['复权因子'] = merged_rs_df['复权因子'].fillna(0)
|
||||
merged_rs_df['bid_p_adj'] = merged_rs_df['bid_p'] + merged_rs_df['复权因子'].cumsum()
|
||||
merged_rs_df['ask_p_adj'] = merged_rs_df['ask_p'] + merged_rs_df['复权因子'].cumsum()
|
||||
merged_rs_df['lastprice_adj'] = merged_rs_df['lastprice'] + merged_rs_df['复权因子'].cumsum()
|
||||
|
||||
# 将调整后的数值替换原来的值
|
||||
merged_rs_df['bid_p'] = merged_rs_df['bid_p_adj'].round(4)
|
||||
merged_rs_df['ask_p'] = merged_rs_df['ask_p_adj'].round(4)
|
||||
merged_rs_df['lastprice'] = merged_rs_df['lastprice_adj'].round(4)
|
||||
|
||||
# 删除多余的值
|
||||
del merged_rs_df['复权因子']
|
||||
del merged_rs_df['bid_p_adj']
|
||||
del merged_rs_df['ask_p_adj']
|
||||
del merged_rs_df['lastprice_adj']
|
||||
|
||||
return merged_rs_df
|
||||
|
||||
# def find_files(all_csv_files):
|
||||
# all_csv_files = sorted(all_csv_files)
|
||||
# sp_old_chars = ['_2019','_2020','_2021']
|
||||
# sp_old_chars = sorted(sp_old_chars)
|
||||
# sp_new_chars = ['_2022','_2023']
|
||||
# sp_new_chars = sorted(sp_new_chars)
|
||||
# csv_old_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_old_chars)]
|
||||
# csv_new_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_new_chars)]
|
||||
|
||||
# return csv_old_files, csv_new_files
|
||||
174
2.数据下载与处理/数据转换最终版/merged_tickdata_20240724.py
Normal file
174
2.数据下载与处理/数据转换最终版/merged_tickdata_20240724.py
Normal file
@@ -0,0 +1,174 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
from datetime import time as s_time
|
||||
from datetime import datetime
|
||||
import chardet
|
||||
import numpy as np
|
||||
|
||||
# 日盘商品期货交易品种
|
||||
commodity_day_dict = {'bb': s_time(15,00), 'jd': s_time(15,00), 'lh': s_time(15,00), 'l': s_time(15,00), 'fb': s_time(15,00), 'ec': s_time(15,00),
|
||||
'AP': s_time(15,00), 'CJ': s_time(15,00), 'JR': s_time(15,00), 'LR': s_time(15,00), 'RS': s_time(15,00), 'PK': s_time(15,00),
|
||||
'PM': s_time(15,00), 'PX': s_time(15,00), 'RI': s_time(15,00), 'SF': s_time(15,00), 'SM': s_time(15,00), 'UR': s_time(15,00),
|
||||
'WH': s_time(15,00), 'ao': s_time(15,00), 'br': s_time(15,00), 'wr': s_time(15,00),}
|
||||
|
||||
# 夜盘商品期货交易品种
|
||||
commodity_night_dict = {'sc': s_time(2,30), 'bc': s_time(1,0), 'lu': s_time(23,0), 'nr': s_time(23,0),'au': s_time(2,30), 'ag': s_time(2,30),
|
||||
'ss': s_time(1,0), 'sn': s_time(1,0), 'ni': s_time(1,0), 'pb': s_time(1,0),'zn': s_time(1,0), 'al': s_time(1,0), 'cu': s_time(1,0),
|
||||
'ru': s_time(23,0), 'rb': s_time(23,0), 'hc': s_time(23,0), 'fu': s_time(23,0), 'bu': s_time(23,0), 'sp': s_time(23,0),
|
||||
'PF': s_time(23,0), 'SR': s_time(23,0), 'CF': s_time(23,0), 'CY': s_time(23,0), 'RM': s_time(23,0), 'MA': s_time(23,0),
|
||||
'TA': s_time(23,0), 'ZC': s_time(23,0), 'FG': s_time(23,0), 'OI': s_time(23,0), 'SA': s_time(23,0),
|
||||
'p': s_time(23,0), 'j': s_time(23,0), 'jm': s_time(23,0), 'i': s_time(23,0), 'l': s_time(23,0), 'v': s_time(23,0),
|
||||
'pp': s_time(23,0), 'eg': s_time(23,0), 'c': s_time(23,0), 'cs': s_time(23,0), 'y': s_time(23,0), 'm': s_time(23,0),
|
||||
'a': s_time(23,0), 'b': s_time(23,0), 'rr': s_time(23,0), 'eb': s_time(23,0), 'pg': s_time(23,0), 'SH': s_time(23,00)}
|
||||
|
||||
# 金融期货交易品种
|
||||
financial_time_dict = {'IH': s_time(15,00), 'IF': s_time(15,00), 'IC': s_time(15,00), 'IM': s_time(15,00),'T': s_time(15,15), 'TS': s_time(15,15),
|
||||
'TF': s_time(15,15), 'TL': s_time(15,15)}
|
||||
|
||||
# 所有已列入的筛选品种
|
||||
all_dict = {k: v for d in [commodity_day_dict, commodity_night_dict, financial_time_dict] for k, v in d.items()}
|
||||
|
||||
def split_alpha_numeric(string):
|
||||
alpha_chars = ""
|
||||
numeric_chars = ""
|
||||
for char in string:
|
||||
if char.isalpha():
|
||||
alpha_chars += char
|
||||
elif char.isdigit():
|
||||
numeric_chars += char
|
||||
return alpha_chars, numeric_chars
|
||||
|
||||
def merged_new_tickdata(merged_up_df, alpha_chars):
|
||||
merged_up_df['datetime'] = merged_up_df['交易日'].astype(str) + ' '+merged_up_df['最后修改时间'].astype(str) + '.' + merged_up_df['最后修改毫秒'].astype(str) # merged_df['最后修改时间'].dt.time.astype(str)
|
||||
# 将'datetime' 列的数据类型更改为 datetime 格式,如果数据转换少8个小时,可以用timedelta处理
|
||||
merged_up_df['datetime'] = pd.to_datetime(merged_up_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
|
||||
#计算瞬时成交量
|
||||
merged_up_df['volume'] = merged_up_df['数量'] - merged_up_df['数量'].shift(1)
|
||||
merged_up_df['volume'] = merged_up_df['volume'].fillna(0)
|
||||
|
||||
merged_df = pd.DataFrame()
|
||||
|
||||
merged_df =pd.DataFrame({'main_contract':merged_up_df['统一代码'],'symbol':merged_up_df['合约代码'],'datetime':merged_up_df['datetime'],'lastprice':merged_up_df['最新价'],'volume':merged_up_df['数量'],
|
||||
'bid_p':merged_up_df['申买价一'],'ask_p':merged_up_df['申卖价一'],'bid_v':merged_up_df['申买量一'],'ask_v':merged_up_df['申卖量一']})
|
||||
|
||||
del merged_up_df
|
||||
|
||||
# merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])
|
||||
merged_df['tmp_time'] = merged_df['datetime'].dt.strftime('%H:%M:%S.%f')
|
||||
merged_df['time'] = merged_df['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time
|
||||
del merged_df['tmp_time']
|
||||
|
||||
merged_df = filter_tickdata_time(merged_df, alpha_chars)
|
||||
|
||||
del merged_df['time']
|
||||
# merged_df['datetime'] = sorted(merged_df['datetime'])
|
||||
sorted_merged_df = merged_df.sort_values(by = ['datetime'], inplace=True)
|
||||
# print("%s%s数据生成成功!"%(code_value,sp_char))
|
||||
|
||||
return merged_df
|
||||
|
||||
def filter_tickdata_time(filter_df, alpha_chars):
|
||||
# 由于落到本地的时间有延迟,建议结束时间延迟1秒。
|
||||
if alpha_chars in financial_time_dict.keys():
|
||||
drop_index1 = pd.DataFrame().index
|
||||
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 500000)) & (filter_df['time'] < s_time(13, 0, 0, 000000))].index
|
||||
if alpha_chars in ['IH', 'IF', 'IC', 'IM']:
|
||||
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 500000)) | (filter_df['time'] < s_time(9, 30, 0, 000000))].index
|
||||
print("按照中金所股指期货交易时间筛选金融期货品种")
|
||||
else:
|
||||
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 15, 0, 500000)) | (filter_df['time'] < s_time(9, 30, 0, 000000))].index
|
||||
print("按照中金所国债期货交易时间筛选金融期货品种")
|
||||
drop_index4 = pd.DataFrame().index
|
||||
print("按照中金所交易时间筛选金融期货品种")
|
||||
|
||||
elif alpha_chars in commodity_night_dict.keys():
|
||||
if commodity_night_dict[alpha_chars] == s_time(23,00):
|
||||
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
|
||||
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
|
||||
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
|
||||
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(23, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 0, 0, 000000))].index
|
||||
print("按照夜盘截止交易时间为23:00筛选商品期货品种")
|
||||
|
||||
elif commodity_night_dict[alpha_chars] == s_time(1,00):
|
||||
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
|
||||
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
|
||||
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
|
||||
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(1, 0, 0, 000000)) & (filter_df['time'] < s_time(9, 0, 0, 000000))].index
|
||||
print("按照夜盘截止交易时间为1:00筛选商品期货品种")
|
||||
|
||||
elif commodity_night_dict[alpha_chars] == s_time(2,30):
|
||||
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
|
||||
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
|
||||
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
|
||||
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(2, 30, 0, 000000)) & (filter_df['time'] < s_time(9, 0, 0, 000000))].index
|
||||
print("按照夜盘截止交易时间为2:30筛选商品期货品种")
|
||||
|
||||
else:
|
||||
print("夜盘截止交易时间未设置或者设置错误!!!")
|
||||
|
||||
elif alpha_chars in commodity_day_dict.keys():
|
||||
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
|
||||
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
|
||||
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 0, 0, 000000))].index
|
||||
drop_index4 = pd.DataFrame().index
|
||||
print("按照无夜盘筛选商品期货品种")
|
||||
|
||||
else:
|
||||
print("%s期货品种未执行时间筛选中!!!"%(alpha_chars))
|
||||
# 清理不在交易时间段的数据
|
||||
|
||||
# 数据清理
|
||||
filter_df.drop(labels=drop_index1, axis=0, inplace=True)
|
||||
filter_df.drop(drop_index2, axis=0, inplace=True)
|
||||
filter_df.drop(drop_index3, axis=0, inplace=True)
|
||||
filter_df.drop(drop_index4, axis=0, inplace=True)
|
||||
|
||||
return filter_df
|
||||
|
||||
def insert_main_contract(df):
|
||||
# 添加主力连续的合约代码,主力连续为888,指数连续可以用999,次主力连续可以使用889,表头用“统一代码”
|
||||
alpha_chars, numeric_chars = split_alpha_numeric(df.loc[0,'合约代码'])
|
||||
code_value = alpha_chars + "889"
|
||||
print("code_value characters:", code_value)
|
||||
df.insert(loc=0,column="统一代码", value=code_value)
|
||||
|
||||
return df, alpha_chars, code_value
|
||||
|
||||
def reinstatement_tickdata(merged_rs_df):
|
||||
merged_rs_df['main_contract'] = merged_rs_df['main_contract'].astype(str)
|
||||
merged_rs_df['symbol'] = merged_rs_df['symbol'].astype(str)
|
||||
merged_rs_df['datetime'] = pd.to_datetime(merged_rs_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
|
||||
# merged_rs_df['lastprice'] = merged_rs_df['lastprice'].astype(float)
|
||||
merged_rs_df['volume'] = merged_rs_df['volume'].astype(int)
|
||||
# merged_rs_df['bid_p'] = merged_rs_df['bid_p'].astype(float)
|
||||
# merged_rs_df['ask_p'] = merged_rs_df['ask_p'].astype(float)
|
||||
merged_rs_df['bid_v'] = merged_rs_df['bid_v'].astype(int)
|
||||
merged_rs_df['ask_v'] = merged_rs_df['ask_v'].astype(int)
|
||||
|
||||
# 等比复权,先不考虑
|
||||
# df['复权因子'] = df['卖一价'].shift() / df['买一价']
|
||||
# df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() / df['买一价'], 1)
|
||||
# df['复权因子'] = df['复权因子'].fillna(1)
|
||||
# df['买一价_adj'] = df['买一价'] * df['复权因子'].cumprod()
|
||||
# df['卖一价_adj'] = df['卖一价'] * df['复权因子'].cumprod()
|
||||
# df['最新_adj'] = df['最新'] * df['复权因子'].cumprod()
|
||||
|
||||
# 等差复权
|
||||
merged_rs_df['复权因子'] = np.where(merged_rs_df['symbol'] != merged_rs_df['symbol'].shift(), merged_rs_df['ask_p'].shift() - merged_rs_df['bid_p'], 0)
|
||||
merged_rs_df['复权因子'] = merged_rs_df['复权因子'].fillna(0)
|
||||
merged_rs_df['bid_p_adj'] = merged_rs_df['bid_p'] + merged_rs_df['复权因子'].cumsum()
|
||||
merged_rs_df['ask_p_adj'] = merged_rs_df['ask_p'] + merged_rs_df['复权因子'].cumsum()
|
||||
merged_rs_df['lastprice_adj'] = merged_rs_df['lastprice'] + merged_rs_df['复权因子'].cumsum()
|
||||
|
||||
# 将调整后的数值替换原来的值
|
||||
merged_rs_df['bid_p'] = merged_rs_df['bid_p_adj'].round(4)
|
||||
merged_rs_df['ask_p'] = merged_rs_df['ask_p_adj'].round(4)
|
||||
merged_rs_df['lastprice'] = merged_rs_df['lastprice_adj'].round(4)
|
||||
|
||||
# 删除多余的值
|
||||
del merged_rs_df['复权因子']
|
||||
del merged_rs_df['bid_p_adj']
|
||||
del merged_rs_df['ask_p_adj']
|
||||
del merged_rs_df['lastprice_adj']
|
||||
|
||||
return merged_rs_df
|
||||
68
2.数据下载与处理/数据转换最终版/merged_tickdata__BIT_20240522.py
Normal file
68
2.数据下载与处理/数据转换最终版/merged_tickdata__BIT_20240522.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
from datetime import time as s_time
|
||||
from datetime import datetime
|
||||
import chardet
|
||||
import numpy as np
|
||||
|
||||
|
||||
def split_alpha_numeric(string):
|
||||
alpha_chars = ""
|
||||
numeric_chars = ""
|
||||
for char in string:
|
||||
if char.isalpha():
|
||||
alpha_chars += char
|
||||
elif char.isdigit():
|
||||
numeric_chars += char
|
||||
return alpha_chars, numeric_chars
|
||||
|
||||
|
||||
|
||||
def merged_old_unprocessed_tickdata(all_csv_files, sp_char):
|
||||
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
|
||||
print("csv_files:", csv_files)
|
||||
merged_up_df = pd.DataFrame()
|
||||
dir = os.getcwd()
|
||||
fileNum_errors = 0
|
||||
|
||||
# 循环遍历每个csv文件
|
||||
for file in csv_files:
|
||||
try:
|
||||
df = pd.read_csv(file,
|
||||
header=0,
|
||||
encoding='gbk',
|
||||
low_memory= False,
|
||||
# skiprows=0,
|
||||
# parse_dates=['时间'] # 注意此处增加的排序,为了后面按时间排序
|
||||
)
|
||||
except:
|
||||
file_path = os.path.join(dir, file)
|
||||
fileNum_errors += 1
|
||||
with open(file_path, 'rb') as file:
|
||||
data = file.read()
|
||||
|
||||
# 使用chardet检测编码
|
||||
detected_encoding = chardet.detect(data)['encoding']
|
||||
# print("%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(file,detected_encoding,fileNum_errors))
|
||||
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors))
|
||||
|
||||
with open('output_error.txt', 'a') as f:
|
||||
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors), file = f)
|
||||
|
||||
|
||||
# 删除重复行
|
||||
df.drop_duplicates(inplace=True)
|
||||
# 将数据合并到新的DataFrame中
|
||||
merged_up_df = pd.concat([merged_up_df, df], ignore_index=True)
|
||||
|
||||
# 删除重复列
|
||||
merged_up_df.drop_duplicates(subset=merged_up_df.columns.tolist(), inplace=True)
|
||||
# 重置行索引
|
||||
merged_up_df.reset_index(inplace=True, drop=True)
|
||||
|
||||
# merged_up_df,alpha_chars,code_value = insert_main_contract(merged_up_df)
|
||||
# 打印提示信息
|
||||
# print("按年份未处理的CSV文件合并成功!")
|
||||
|
||||
return merged_up_df #,alpha_chars,code_value
|
||||
|
||||
85
2.数据下载与处理/每日更新数据库.py
Normal file
85
2.数据下载与处理/每日更新数据库.py
Normal file
@@ -0,0 +1,85 @@
|
||||
import os
|
||||
|
||||
import requests
|
||||
import time
|
||||
from datetime import datetime
|
||||
from requests.adapters import HTTPAdapter
|
||||
import pandas as pd
|
||||
|
||||
pd.set_option('display.max_rows', 1000)
|
||||
pd.set_option('expand_frame_repr', False) # 当列太多时不换行
|
||||
# 设置命令行输出时的列对齐功能
|
||||
pd.set_option('display.unicode.ambiguous_as_wide', True)
|
||||
pd.set_option('display.unicode.east_asian_width', True)
|
||||
|
||||
|
||||
def requestForNew(url):
|
||||
session = requests.Session()
|
||||
session.mount('http://', HTTPAdapter(max_retries=3))
|
||||
session.mount('https://', HTTPAdapter(max_retries=3))
|
||||
session.keep_alive = False
|
||||
response = session.get(url, headers={'Connection': 'close'}, timeout=30)
|
||||
if response.content:
|
||||
return response
|
||||
else:
|
||||
print("链接失败", response)
|
||||
|
||||
|
||||
def getDate():
|
||||
url = 'http://hq.sinajs.cn/list=sh000001'
|
||||
response = requestForNew(url).text
|
||||
data_date = str(response.split(',')[-4])
|
||||
# 获取上证的指数日期
|
||||
return data_date
|
||||
|
||||
|
||||
# 通过新浪财经获取每日更新的股票代码
|
||||
def getStockCodeForEveryday():
|
||||
df = pd.DataFrame()
|
||||
for page in range(1, 100):
|
||||
# 1~100页,不用担心每天新增
|
||||
url = 'http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page=' \
|
||||
+ str(page) + '&num=80&sort=changepercent&asc=0&node=hs_a&symbol=&_s_r_a=page'
|
||||
# print(url)
|
||||
content = requestForNew(url).json()
|
||||
if not content:
|
||||
# if content =[]: 这个写法也可以
|
||||
print("股票信息,获取完毕。")
|
||||
break
|
||||
print("正在读取页面" + str(page))
|
||||
time.sleep(3)
|
||||
df = df.append(pd.DataFrame(content, dtype='float'), ignore_index=True)
|
||||
|
||||
rename_dict = {'symbol': '股票代码', 'code': '交易日期', 'name': '股票名称', 'open': '开盘价',
|
||||
'settlement': '前收盘价', 'trade': '收盘价', 'high': '最高价', 'low': '最低价',
|
||||
'buy': '买一', 'sell': '卖一', 'volume': '成交量', 'amount': '成交额',
|
||||
'changepercent': '涨跌幅', 'pricechange': '涨跌额',
|
||||
'mktcap': '总市值', 'nmc': '流通市值', 'ticktime': '数据更新时间', 'per': 'per', 'pb': '市净率',
|
||||
'turnoverratio': '换手率'}
|
||||
df.rename(columns=rename_dict, inplace=True)
|
||||
tradeDate = getDate()
|
||||
df['交易日期'] = tradeDate
|
||||
df = df[['股票代码', '股票名称', '交易日期', '开盘价', '最高价', '最低价', '收盘价', '前收盘价', '成交量', '成交额', '流通市值', '总市值']]
|
||||
# 把转化成float的code替换成交易日期
|
||||
return df
|
||||
|
||||
|
||||
df = getStockCodeForEveryday()
|
||||
print(df)
|
||||
|
||||
for i in df.index:
|
||||
t = df.iloc[i:i + 1, :]
|
||||
stock_code = t.iloc[0]['股票代码']
|
||||
|
||||
# 构建存储文件路径
|
||||
path = './data/' \
|
||||
+ stock_code + '.csv'
|
||||
# 文件存在,不是新股
|
||||
if os.path.exists(path):
|
||||
t.to_csv(path, header=None, index=False, mode='a', encoding='gbk')
|
||||
# 文件不存在,说明是新股
|
||||
else:
|
||||
# 先将头文件输出
|
||||
pd.DataFrame(columns=['数据由邢不行整理']).to_csv(path, index=False, encoding='gbk')
|
||||
t.to_csv(path, index=False, mode='a', encoding='gbk')
|
||||
print(stock_code)
|
||||
Reference in New Issue
Block a user