Enhance trading workflow with new order flow management
- Added dingdanliu_nb_mflow for improved order processing - Updated related scripts and configurations to support new functionality
This commit is contained in:
698
2.数据下载与处理/Tushare_get_data.ipynb
Normal file
698
2.数据下载与处理/Tushare_get_data.ipynb
Normal file
@@ -0,0 +1,698 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "ModuleNotFoundError",
|
||||||
|
"evalue": "No module named 'tushare'",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"Cell \u001b[1;32mIn[3], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtushare\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mts\u001b[39;00m\n\u001b[0;32m 2\u001b[0m ts\u001b[38;5;241m.\u001b[39mset_token(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m78282dabb315ee578fb73a9b328f493026e97d5af709acb331b7b348\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 3\u001b[0m pro \u001b[38;5;241m=\u001b[39m ts\u001b[38;5;241m.\u001b[39mpro_api()\n",
|
||||||
|
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'tushare'"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import tushare as ts\n",
|
||||||
|
"ts.set_token('78282dabb315ee578fb73a9b328f493026e97d5af709acb331b7b348')\n",
|
||||||
|
"pro = ts.pro_api()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 32,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"300\n",
|
||||||
|
"<class 'int'>\n",
|
||||||
|
"0.15\n",
|
||||||
|
"<class 'float'>\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from datetime import datetime, timedelta\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"fees_df = pd.read_csv('./futures_fees_info.csv', usecols= [1, 4, 17, 19, 25],names=['合约', '合约乘数', '做多保证金率', '做空保证金率', '品种代码'])\n",
|
||||||
|
"data0 = int(fees_df[fees_df['合约'] == 'IH2407']['合约乘数'].iloc[0])\n",
|
||||||
|
"\n",
|
||||||
|
"print(data0)\n",
|
||||||
|
"print(type(data0))\n",
|
||||||
|
"data1 = float(fees_df[fees_df['合约'] == 'IH2407']['做多保证金率'].iloc[0])\n",
|
||||||
|
"print(data1)\n",
|
||||||
|
"print(type(data1))\n",
|
||||||
|
"# fees_df[fees_df['合约'] == 'IH2407']['做空保证金率'].iloc[0]\n",
|
||||||
|
"# (fees_df[fees_df['合约'] == 'IH2407']['做多保证金率'].iloc[0] + fees_df[fees_df['合约'] == 'IH2407']['做空保证金率'].iloc[0])/2"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from datetime import datetime, timedelta\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"fees_df = pd.read_csv('./futures_fees_info.csv', usecols= [1, 4, 17, 19],names=['合约', '合约乘数', '做多保证金率', '做空保证金率'])\n",
|
||||||
|
"contacts_df = pd.read_csv('./main_contacts.csv', usecols= [16, 17],names=['主连代码', '品种代码'])\n",
|
||||||
|
"\n",
|
||||||
|
"def get_main_contact_on_time(main_symbol_code):\n",
|
||||||
|
" data_str = ''\n",
|
||||||
|
" alpha_chars = ''\n",
|
||||||
|
" numeric_chars = ''\n",
|
||||||
|
" main_code = ''\n",
|
||||||
|
"\n",
|
||||||
|
" # main_symbol = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[0]\n",
|
||||||
|
" # exchange_id = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[1]\n",
|
||||||
|
" main_symbol = contacts_df[contacts_df['品种代码'] == main_symbol_code]['主连代码'].iloc[0]\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" # # 拆分主连合约代码为交易标识和交易所代码(交易市场)\n",
|
||||||
|
" # main_symbol = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[0]\n",
|
||||||
|
" # exchange_id = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[1]\n",
|
||||||
|
"\n",
|
||||||
|
" # # 拆分交易标识中的合约产品代码和交割月份\n",
|
||||||
|
" # for char in main_symbol:\n",
|
||||||
|
" # if char.isalpha():\n",
|
||||||
|
" # alpha_chars += char\n",
|
||||||
|
" # elif char.isdigit():\n",
|
||||||
|
" # numeric_chars += char\n",
|
||||||
|
" \n",
|
||||||
|
" # # 监理交易所映射\n",
|
||||||
|
" # exchange = {'CFX': 'CFFEX', 'SHF':'SHFE', 'DCE':'DCE', 'GFE':'GFEX', 'INE':'INE', 'ZCE':'CZCE'}\n",
|
||||||
|
"\n",
|
||||||
|
" # # 计算per_unit:交易单位(每手)和转换后交易所识别的main_code:主连代码\n",
|
||||||
|
" # if exchange_id == 'CFX' or exchange_id == 'SHF' or exchange_id == 'DCE' or exchange_id == 'GFE' or exchange_id == 'INE':\n",
|
||||||
|
" # df = pro.fut_basic(exchange = exchange[exchange_id], fut_type='1', fut_code = alpha_chars, fields='ts_code,symbol,exchange,name,per_unit')\n",
|
||||||
|
" # # ts_code = df[df['symbol'] == main_symbol]['ts_code'].iloc[0]\n",
|
||||||
|
" # per_unit = df[df['symbol'] == main_symbol]['per_unit'].iloc[0]\n",
|
||||||
|
"\n",
|
||||||
|
" # # ds = pro.fut_settle(trade_date = data_str, ts_code =ts_code)\n",
|
||||||
|
" # # ds['margin_rate'] = (ds['long_margin_rate'] + ds['short_margin_rate'])/2\n",
|
||||||
|
" # # margin_rate = ds['margin_rate'].iloc[0]\n",
|
||||||
|
" \n",
|
||||||
|
" \n",
|
||||||
|
" # if exchange_id == 'CFX':\n",
|
||||||
|
" # main_code = main_symbol\n",
|
||||||
|
" # elif exchange_id == 'SHF' or exchange_id == 'DCE' or exchange_id == 'GFE' or exchange_id == 'INE':\n",
|
||||||
|
" # lower_alpha_chars = str.lower(alpha_chars) \n",
|
||||||
|
" # main_code = lower_alpha_chars + numeric_chars\n",
|
||||||
|
" # elif exchange_id == 'ZCE':\n",
|
||||||
|
" # true_numeric_chars = numeric_chars[1:]\n",
|
||||||
|
" # main_code = alpha_chars + true_numeric_chars \n",
|
||||||
|
" # df = pro.fut_basic(exchange = exchange[exchange_id], fut_type='1', fut_code = alpha_chars, fields='ts_code,symbol,exchange,name,per_unit')\n",
|
||||||
|
" # per_unit = df[df['symbol'] == main_code]['per_unit'].iloc[0]\n",
|
||||||
|
" # main_code = alpha_chars + true_numeric_chars\n",
|
||||||
|
"\n",
|
||||||
|
" # print(\"最终使用的主连代码:\",main_code) \n",
|
||||||
|
" # print(\"%s的交易单位(每手):%s\"%(main_symbol, per_unit))\n",
|
||||||
|
" return main_symbol\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'IH2407'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"get_main_contact_on_time('IH')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from datetime import datetime, timedelta\n",
|
||||||
|
"def get_main_contact_on_time(main_symbol_code):\n",
|
||||||
|
" data_str = ''\n",
|
||||||
|
" alpha_chars = ''\n",
|
||||||
|
" numeric_chars = ''\n",
|
||||||
|
" main_code = ''\n",
|
||||||
|
"\n",
|
||||||
|
" # 获取主连合约代码,如果是当天15点前日盘,则获取前一天的合约代码,如果是当天15点后晚盘,则获取今天的的合约代码\n",
|
||||||
|
" now = datetime.now()\n",
|
||||||
|
" if now.hour < 15:\n",
|
||||||
|
" data_str = (now - timedelta(days=1)).date().strftime('%Y%m%d')\n",
|
||||||
|
" else:\n",
|
||||||
|
" data_str = now.date().strftime('%Y%m%d')\n",
|
||||||
|
"\n",
|
||||||
|
" # 拆分主连合约代码为交易标识和交易所代码(交易市场)\n",
|
||||||
|
" main_symbol = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[0]\n",
|
||||||
|
" exchange_id = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[1]\n",
|
||||||
|
"\n",
|
||||||
|
" # 拆分交易标识中的合约产品代码和交割月份\n",
|
||||||
|
" for char in main_symbol:\n",
|
||||||
|
" if char.isalpha():\n",
|
||||||
|
" alpha_chars += char\n",
|
||||||
|
" elif char.isdigit():\n",
|
||||||
|
" numeric_chars += char\n",
|
||||||
|
" \n",
|
||||||
|
" # 监理交易所映射\n",
|
||||||
|
" exchange = {'CFX': 'CFFEX', 'SHF':'SHFE', 'DCE':'DCE', 'GFE':'GFEX', 'INE':'INE', 'ZCE':'CZCE'}\n",
|
||||||
|
"\n",
|
||||||
|
" # 计算per_unit:交易单位(每手)和转换后交易所识别的main_code:主连代码\n",
|
||||||
|
" if exchange_id == 'CFX' or exchange_id == 'SHF' or exchange_id == 'DCE' or exchange_id == 'GFE' or exchange_id == 'INE':\n",
|
||||||
|
" df = pro.fut_basic(exchange = exchange[exchange_id], fut_type='1', fut_code = alpha_chars, fields='ts_code,symbol,exchange,name,per_unit')\n",
|
||||||
|
" # ts_code = df[df['symbol'] == main_symbol]['ts_code'].iloc[0]\n",
|
||||||
|
" per_unit = df[df['symbol'] == main_symbol]['per_unit'].iloc[0]\n",
|
||||||
|
"\n",
|
||||||
|
" # ds = pro.fut_settle(trade_date = data_str, ts_code =ts_code)\n",
|
||||||
|
" # ds['margin_rate'] = (ds['long_margin_rate'] + ds['short_margin_rate'])/2\n",
|
||||||
|
" # margin_rate = ds['margin_rate'].iloc[0]\n",
|
||||||
|
" \n",
|
||||||
|
" \n",
|
||||||
|
" if exchange_id == 'CFX':\n",
|
||||||
|
" main_code = main_symbol\n",
|
||||||
|
" elif exchange_id == 'SHF' or exchange_id == 'DCE' or exchange_id == 'GFE' or exchange_id == 'INE':\n",
|
||||||
|
" lower_alpha_chars = str.lower(alpha_chars) \n",
|
||||||
|
" main_code = lower_alpha_chars + numeric_chars\n",
|
||||||
|
" elif exchange_id == 'ZCE':\n",
|
||||||
|
" true_numeric_chars = numeric_chars[1:]\n",
|
||||||
|
" main_code = alpha_chars + true_numeric_chars \n",
|
||||||
|
" df = pro.fut_basic(exchange = exchange[exchange_id], fut_type='1', fut_code = alpha_chars, fields='ts_code,symbol,exchange,name,per_unit')\n",
|
||||||
|
" per_unit = df[df['symbol'] == main_code]['per_unit'].iloc[0]\n",
|
||||||
|
" main_code = alpha_chars + true_numeric_chars\n",
|
||||||
|
"\n",
|
||||||
|
" print(\"最终使用的主连代码:\",main_code) \n",
|
||||||
|
" print(\"%s的交易单位(每手):%s\"%(main_symbol, per_unit))\n",
|
||||||
|
" return main_code\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sb_1 = get_main_contact_on_time('IH.CFX')\n",
|
||||||
|
"sb_2 = get_main_contact_on_time('cu.SHF')\n",
|
||||||
|
"sb_3 = get_main_contact_on_time('eb.DCE')\n",
|
||||||
|
"sb_4 = get_main_contact_on_time('si.GFE')\n",
|
||||||
|
"sb_5 = get_main_contact_on_time('sc.INE') \n",
|
||||||
|
"sb_6 = get_main_contact_on_time('SA.ZCE')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# df = pro.fut_basic(exchange='DCE', fut_type='1',fut_code = 'j' , fields='ts_code,symbol,exchange,name,fut_code,multiplier,trade_unit,per_unit,quote_unit,quote_unit_desc,d_mode_desc,list_date,delist_date,d_month,last_ddate,trade_time_desc')\n",
|
||||||
|
"# df = pro.fut_basic(exchange='SHFE', fut_type='1', fut_code = 'au', fields='ts_code,symbol,name,list_date,delist_date')\n",
|
||||||
|
"df = pro.fut_basic(exchange='CZCE', fut_type='1', fut_code = 'SA', fields='ts_code,symbol,exchange,name,fut_code,per_unit')\n",
|
||||||
|
"# index_of_value = df.index[df['symbol'] == 'AU2408']\n",
|
||||||
|
"df.head()\n",
|
||||||
|
"value = df[df['symbol'] == 'SA409']['per_unit'].iloc[0]\n",
|
||||||
|
"print(value)\n",
|
||||||
|
"# df.loc[index_of_value, 'per_unit'].value\n",
|
||||||
|
"# df.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df = pro.fut_mapping(ts_code='SA.ZCE')\n",
|
||||||
|
"print(df)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# ds = pro.fut_settle(trade_date = '20240625', ts_code ='SA2409.ZCE')\n",
|
||||||
|
"# ds = pro.fut_settle(trade_date='20230625', exchange='ZCE')\n",
|
||||||
|
"# ds = pro.fut_settle(ts_code='SA409.ZCE', exchange='CZCE')\n",
|
||||||
|
"# pro.fut_settle(trade_date='20181114', exchange='CZCE')\n",
|
||||||
|
"pro.fut_settle(ts_code='AP2510.ZCE', exchange='CZCE')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ds.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ds['margin_rate'] = round((ds['long_margin_rate'] + ds['short_margin_rate'])/2,2)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ds['margin_rate'].iloc[0]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"value = df.loc[index_of_value, 'per_unit'].iloc[0]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(value)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df_IH = pro.fut_mapping(ts_code='IH.CFX')\n",
|
||||||
|
"print(df_IH)\n",
|
||||||
|
"df_IH.to_csv(r\"E:\\data\\mapping_ts_code_IH.csv\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df_IF = pro.fut_mapping(ts_code='IF.CFX')\n",
|
||||||
|
"print(df_IF)\n",
|
||||||
|
"df_IF.to_csv(r\"E:\\data\\mapping_ts_code_IF.csv\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df_IC = pro.fut_mapping(ts_code='IC.CFX')\n",
|
||||||
|
"print(df_IC)\n",
|
||||||
|
"df_IC.to_csv(r\"E:\\data\\mapping_ts_code_IC.csv\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df_IM = pro.fut_mapping(ts_code='IM.CFX')\n",
|
||||||
|
"print(df_IM)\n",
|
||||||
|
"df_IM.to_csv(r\"E:\\data\\mapping_ts_code_IM.csv\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df_TF = pro.fut_mapping(ts_code='TF.CFX')\n",
|
||||||
|
"print(df_TF)\n",
|
||||||
|
"df_TF.to_csv(r\"E:\\data\\mapping_ts_code_TF.csv\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df_T = pro.fut_mapping(ts_code='T.CFX')\n",
|
||||||
|
"print(df_T)\n",
|
||||||
|
"df_T.to_csv(r\"E:\\data\\mapping_ts_code_T.csv\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df_TS = pro.fut_mapping(ts_code='TS.CFX')\n",
|
||||||
|
"print(df_TS)\n",
|
||||||
|
"df_TS.to_csv(r\"E:\\data\\mapping_ts_code_TS.csv\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df_TL = pro.fut_mapping(TL_code='TL.CFX')\n",
|
||||||
|
"print(df_TL)\n",
|
||||||
|
"df_TL.to_csv(r\"E:\\data\\mapping_TL_code_TL.csv\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "NameError",
|
||||||
|
"evalue": "name 'pro' is not defined",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"Cell \u001b[1;32mIn[1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m df_TL \u001b[38;5;241m=\u001b[39m \u001b[43mpro\u001b[49m\u001b[38;5;241m.\u001b[39mfut_mapping(ts_code\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTL.CFX\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(df_TL)\n\u001b[0;32m 3\u001b[0m df_TL\u001b[38;5;241m.\u001b[39mto_csv(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mD:\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mmapping_TL_code_TL.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
||||||
|
"\u001b[1;31mNameError\u001b[0m: name 'pro' is not defined"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df_TL = pro.fut_mapping(ts_code='TL.CFX')\n",
|
||||||
|
"print(df_TL)\n",
|
||||||
|
"df_TL.to_csv(r\"D:\\data\\mapping_TL_code_TL.csv\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import akshare as ak\n",
|
||||||
|
"\n",
|
||||||
|
"futures_comm_info_df = ak.futures_comm_info(symbol=\"上海国际能源交易中心\")\n",
|
||||||
|
"print(futures_comm_info_df[\"保证金-买开\"])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"futures_fees_info_df = ak.futures_fees_info()\n",
|
||||||
|
"print(futures_fees_info_df)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"futures_fees_info_df.to_csv(r'./futures_fees_info.csv', index=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"futures_display_main_sina_df = ak.futures_display_main_sina()\n",
|
||||||
|
"print(futures_display_main_sina_df)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"url = 'https://www.9qihuo.com/hangqing' #上期所铜结算参数地址https://www.9qihuo.com/hangqing\n",
|
||||||
|
"data =pd.read_html(url) #读取网页上的表格\n",
|
||||||
|
"dt=data[4].drop([0],axis=0).append(data[5],ignore_index=True) #提取结算参数到DataFrame格式\n",
|
||||||
|
"#调整格式\n",
|
||||||
|
"dt.columns=dt.iloc[0]\n",
|
||||||
|
"dt.drop([0],axis=0,inplace=True) \n",
|
||||||
|
"dt.set_index('合约代码',inplace=True)\n",
|
||||||
|
"print(dt) #输出铜的结算参数"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import requests\n",
|
||||||
|
"from bs4 import BeautifulSoup\n",
|
||||||
|
"import csv\n",
|
||||||
|
"\n",
|
||||||
|
"# 目标网址\n",
|
||||||
|
"url = \"https://www.9qihuo.com/hangqing\"\n",
|
||||||
|
"\n",
|
||||||
|
"# 发送GET请求,禁用SSL验证\n",
|
||||||
|
"response = requests.get(url, verify=False)\n",
|
||||||
|
"response.encoding = 'utf-8' # 确保编码正确\n",
|
||||||
|
"\n",
|
||||||
|
"# 解析网页内容\n",
|
||||||
|
"soup = BeautifulSoup(response.text, 'lxml')\n",
|
||||||
|
"\n",
|
||||||
|
"# 找到目标表格\n",
|
||||||
|
"table = soup.find('table', {'id': 'tblhangqinglist'})\n",
|
||||||
|
"\n",
|
||||||
|
"# 初始化CSV文件\n",
|
||||||
|
"with open('main_contacts.csv', mode='w', newline='', encoding='utf-8') as file:\n",
|
||||||
|
" writer = csv.writer(file)\n",
|
||||||
|
" \n",
|
||||||
|
" # 遍历表格的所有行\n",
|
||||||
|
" for row in table.find_all('tr'):\n",
|
||||||
|
" # 获取每一行的所有单元格\n",
|
||||||
|
" cols = row.find_all(['th', 'td'])\n",
|
||||||
|
" # 提取文本内容并写入CSV文件\n",
|
||||||
|
" writer.writerow([col.text.strip() for col in cols])\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"表格已成功保存为main_contacts.csv\")\n",
|
||||||
|
"\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"df = pd.read_csv('./main_contacts.csv')\n",
|
||||||
|
"df.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df['交易品种'] = df['合约'].str.split(r'[()]', n=1, expand=True)[0]\n",
|
||||||
|
"df['主连代码'] = df['合约'].str.split(r'[()]', n=2, expand=True)[1]\n",
|
||||||
|
"\n",
|
||||||
|
"# df['品种代码'] = df['主连代码'].str.split(str.isalpha(df['主连代码']), n=1, expand=True)[0]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import re\n",
|
||||||
|
"\n",
|
||||||
|
"# 创建示例DataFrame\n",
|
||||||
|
"\n",
|
||||||
|
"# 定义拆分字母和数字的函数\n",
|
||||||
|
"def split_alpha_numeric(s):\n",
|
||||||
|
" match = re.match(r\"([a-zA-Z]+)([0-9]+)\", s)\n",
|
||||||
|
" if match:\n",
|
||||||
|
" return match.groups()\n",
|
||||||
|
" else:\n",
|
||||||
|
" return (s, None) # 如果没有匹配,返回原始字符串和None\n",
|
||||||
|
"\n",
|
||||||
|
"# 应用函数并创建新列\n",
|
||||||
|
"df[['品种代码', '交割月份']] = df['主连代码'].apply(lambda x: pd.Series(split_alpha_numeric(x)))\n",
|
||||||
|
"\n",
|
||||||
|
"print(df)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df.to_csv('./main_contacts_all.csv')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 39,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import subprocess\n",
|
||||||
|
"import schedule\n",
|
||||||
|
"import time\n",
|
||||||
|
"from datetime import datetime\n",
|
||||||
|
"\n",
|
||||||
|
"# jerome:增加akshare库\n",
|
||||||
|
"import akshare as ak\n",
|
||||||
|
"\n",
|
||||||
|
"# jerome:增加下列库用于爬虫获取主力连续代码\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import requests\n",
|
||||||
|
"from bs4 import BeautifulSoup\n",
|
||||||
|
"import csv\n",
|
||||||
|
"import re\n",
|
||||||
|
"import os"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 48,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def get_futures_fees_info():\n",
|
||||||
|
" futures_fees_info_df = ak.futures_fees_info()\n",
|
||||||
|
" futures_fees_info_df.to_csv(r'./futures_fees_info.csv', index=False)\n",
|
||||||
|
"\n",
|
||||||
|
"def get_main_contacts():\n",
|
||||||
|
" url = \"https://www.9qihuo.com/hangqing\"\n",
|
||||||
|
"\n",
|
||||||
|
" # 发送GET请求,禁用SSL验证\n",
|
||||||
|
" response = requests.get(url, verify=False)\n",
|
||||||
|
" response.encoding = 'utf-8' # 确保编码正确\n",
|
||||||
|
"\n",
|
||||||
|
" # 解析网页内容\n",
|
||||||
|
" soup = BeautifulSoup(response.text, 'lxml')\n",
|
||||||
|
"\n",
|
||||||
|
" # 找到目标表格\n",
|
||||||
|
" table = soup.find('table', {'id': 'tblhangqinglist'})\n",
|
||||||
|
"\n",
|
||||||
|
" # 初始化CSV文件\n",
|
||||||
|
" with open('tmp_main_contacts.csv', mode='w', newline='', encoding='utf-8') as file:\n",
|
||||||
|
" writer = csv.writer(file)\n",
|
||||||
|
" \n",
|
||||||
|
" # 遍历表格的所有行\n",
|
||||||
|
" for row in table.find_all('tr'):\n",
|
||||||
|
" # 获取每一行的所有单元格\n",
|
||||||
|
" cols = row.find_all(['th', 'td'])\n",
|
||||||
|
" # 提取文本内容并写入CSV文件\n",
|
||||||
|
" writer.writerow([col.text.strip() for col in cols])\n",
|
||||||
|
"\n",
|
||||||
|
" df = pd.read_csv('./tmp_main_contacts.csv',encoding='utf-8')\n",
|
||||||
|
" df['交易品种'] = df['合约'].str.split(r'[()]', n=1, expand=True)[0]\n",
|
||||||
|
" df['主连代码'] = df['合约'].str.split(r'[()]', n=2, expand=True)[1]\n",
|
||||||
|
"\n",
|
||||||
|
" df[['品种代码', '交割月份']] = df['主连代码'].apply(lambda x: pd.Series(split_alpha_numeric(x)))\n",
|
||||||
|
" df.to_csv('./main_contacts.csv')\n",
|
||||||
|
"\n",
|
||||||
|
" print(\"期货主力品种表已经保存为main_contacts.csv\")\n",
|
||||||
|
" os.remove(\"./tmp_main_contacts.csv\")\n",
|
||||||
|
"\n",
|
||||||
|
"# 拆分字母和数字的函数\n",
|
||||||
|
"def split_alpha_numeric(s):\n",
|
||||||
|
" match = re.match(r\"([a-zA-Z]+)([0-9]+)\", s)\n",
|
||||||
|
" if match:\n",
|
||||||
|
" return match.groups()\n",
|
||||||
|
" else:\n",
|
||||||
|
" return (s, None) # 如果没有匹配,返回原始字符串和None"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 44,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"get_futures_fees_info()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 49,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"期货主力品种表已经保存为main_contacts.csv\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"get_main_contacts()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.9"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
180
2.数据下载与处理/download_data.ipynb
Normal file
180
2.数据下载与处理/download_data.ipynb
Normal file
@@ -0,0 +1,180 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# 配置迅投研数据服务\n",
|
||||||
|
"from vnpy.trader.setting import SETTINGS\n",
|
||||||
|
"\n",
|
||||||
|
"SETTINGS[\"datafeed.name\"] = \"xt\"\n",
|
||||||
|
"SETTINGS[\"datafeed.username\"] = \"token\"\n",
|
||||||
|
"SETTINGS[\"datafeed.password\"] = \"ef326f853a744c58572f0158d470912c38a09552\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# 加载功能模块\n",
|
||||||
|
"from datetime import datetime\n",
|
||||||
|
"\n",
|
||||||
|
"from vnpy.trader.datafeed import get_datafeed\n",
|
||||||
|
"from vnpy.trader.object import HistoryRequest, Exchange, Interval\n",
|
||||||
|
"\n",
|
||||||
|
"from vnpy_sqlite import Database as SqliteDatabase\n",
|
||||||
|
"#from elite_database import Database as EliteDatabase\n",
|
||||||
|
"\n",
|
||||||
|
"#增加\n",
|
||||||
|
"import pandas as pd"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"True"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# 初始化数据服务\n",
|
||||||
|
"datafeed = get_datafeed()\n",
|
||||||
|
"datafeed.init()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# 交易所映射关系\n",
|
||||||
|
"EXCHANGE_XT2VT = {\n",
|
||||||
|
" \"SH\": Exchange.SSE,\n",
|
||||||
|
" \"SZ\": Exchange.SZSE,\n",
|
||||||
|
" \"BJ\": Exchange.BSE,\n",
|
||||||
|
" \"SF\": Exchange.SHFE,\n",
|
||||||
|
" \"IF\": Exchange.CFFEX,\n",
|
||||||
|
" \"INE\": Exchange.INE,\n",
|
||||||
|
" \"DF\": Exchange.DCE,\n",
|
||||||
|
" \"ZF\": Exchange.CZCE,\n",
|
||||||
|
" \"GF\": Exchange.GFEX\n",
|
||||||
|
"}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"数据长度 41336\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# 查询期货历史数据\n",
|
||||||
|
"req = HistoryRequest(\n",
|
||||||
|
" symbol=\"rb00\", # 加权指数 \n",
|
||||||
|
" # symbol=\"IF00\", # 主力连续(未平滑)\n",
|
||||||
|
" # exchange=Exchange.CFFEX,\n",
|
||||||
|
" exchange = EXCHANGE_XT2VT[\"SF\"],\n",
|
||||||
|
" start=datetime(2023, 1, 1),\n",
|
||||||
|
" end=datetime(2023, 11, 24),#end=datetime.now(),\n",
|
||||||
|
" interval=Interval.TICK\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"ticks = datafeed.query_tick_history(req)\n",
|
||||||
|
"print(\"数据长度\", len(ticks))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# 创建Elite数据库实例并写入数据\n",
|
||||||
|
"#db2 = EliteDatabase()\n",
|
||||||
|
"#db2.save_bar_data(bars)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df = pd.DataFrame(ticks)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# 创建CSV文件并写入数据\n",
|
||||||
|
"filepath = \"rb00_11.csv\" # CSV文件保存路径及文件名\n",
|
||||||
|
"df.to_csv(filepath, index=False) # index参数设置为False表示不包含索引列\n",
|
||||||
|
"#df.to_csv(filepath, mode='a', index=False, header=False) # index参数设置为False表示不包含索引列"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# 读取CSV文件\n",
|
||||||
|
"data = pd.read_csv(\"IC0.csv\")\n",
|
||||||
|
"# 对数据进行排序\n",
|
||||||
|
"sorted_data = data.sort_values(by='datetime')\n",
|
||||||
|
"# 将排序结果写入CSV文件\n",
|
||||||
|
"sorted_data.to_csv('sort_IC00.csv', index=False)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.9"
|
||||||
|
},
|
||||||
|
"vscode": {
|
||||||
|
"interpreter": {
|
||||||
|
"hash": "1b43cb0bd93d5abbadd54afed8252f711d4681fe6223ad6b67ffaee289648f85"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
241
2.数据下载与处理/main_contact_merged.ipynb
Normal file
241
2.数据下载与处理/main_contact_merged.ipynb
Normal file
@@ -0,0 +1,241 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"map_file = r\"D:\\data\\mapping_ts_code_IH.csv\" #主力合约统计表\n",
|
||||||
|
"file_path = str(\"F:/2022_tickdata/marketdatacsv\") #csv文件绝对地址前缀\n",
|
||||||
|
"\n",
|
||||||
|
"header_file = r\"D:\\data\\fut_marketdata_head.csv\" # 包含表头的 CSV 文件名\n",
|
||||||
|
"# data_file = r\"D:\\combined_market_data.csv\" # 包含数据的 CSV 文件名\n",
|
||||||
|
"output_file = r\"D:\\IH888_up_2022.csv\" # 合并后的输出文件名\n",
|
||||||
|
"total_code = 'IH888'\n",
|
||||||
|
"\n",
|
||||||
|
"sp_chars = ['csv2022'] #'csv2021', 'csv2022',需要查找的主力年份文件"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df = pd.read_csv(map_file, index_col=0, encoding='utf', low_memory=False)\n",
|
||||||
|
"df['mapping_ts_code_new'] = df['mapping_ts_code'].apply(lambda x: x.split('.')[0])\n",
|
||||||
|
"df['temp_path']= file_path\n",
|
||||||
|
"df['final_path'] = df['temp_path'].astype(str) + df['trade_date'].astype(str) + '/' + df['mapping_ts_code_new'] + '.csv'\n",
|
||||||
|
"del df['mapping_ts_code_new'], df['temp_path']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df.tail()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"import time as s_time\n",
|
||||||
|
"import datetime\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"for sp_char in sp_chars:\n",
|
||||||
|
" csv_files = [sp_file for sp_file in df['final_path'] if sp_char in sp_file]\n",
|
||||||
|
" print(csv_files[:5])\n",
|
||||||
|
" print(csv_files[-5:])\n",
|
||||||
|
" dfs = pd.DataFrame()\n",
|
||||||
|
" for file_path in csv_files:\n",
|
||||||
|
" df_temp = pd.read_csv(file_path) \n",
|
||||||
|
" print('读取%s成功'%(file_path))\n",
|
||||||
|
" # df_temp.columns = ['交易日','合约代码','最后修改时间','最后修改毫秒','最新价','数量','申买价一','申买量一','申卖价一','申卖量一','当日均价','成交金额','持仓量','涨停价','跌停价']\n",
|
||||||
|
" # df_temp['datetime'] = df_temp['交易日'].astype(str) + ' '+df_temp['最后修改时间'].astype(str) + '.' + df_temp['最后修改毫秒'].astype(str)\n",
|
||||||
|
" # df_temp['datetime'] = pd.to_datetime(df_temp['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')\n",
|
||||||
|
" # df_temp['tmp_time'] = df_temp['datetime'].dt.strftime('%H:%M:%S.%f')\n",
|
||||||
|
" # df_temp['time'] = df_temp['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time\n",
|
||||||
|
" # drop_index1 = df_temp.loc[(df_temp['time'] > s_time(11, 30, 0)) & (df_temp['time'] < s_time(13, 0, 0))].index\n",
|
||||||
|
" # drop_index2 = df_temp.loc[(df_temp['time'] > s_time(15, 0, 0)) | (df_temp['time'] < s_time(9, 30, 0))].index\n",
|
||||||
|
" # df_temp.drop(drop_index1, axis=0, inplace=True)\n",
|
||||||
|
" # df_temp.drop(drop_index2, axis=0, inplace=True)\n",
|
||||||
|
" # dfs.append(df_temp)\n",
|
||||||
|
" # df_temp.columns=['交易日','合约代码','最后修改时间','最后修改毫秒','最新价','数量','申买价一','申买量一','申卖价一','申卖量一','当日均价','成交金额','持仓量','涨停价','跌停价']\n",
|
||||||
|
" df_temp.columns = ['交易日','合约代码','最后修改时间','最后修改毫秒','最新价','数量','申买价一','申买量一','申卖价一','申卖量一','当日均价','成交金额','持仓量','涨停价','跌停价']\n",
|
||||||
|
" # print(df_temp.tail())\n",
|
||||||
|
" # # print(\"表头添加成功!\")\n",
|
||||||
|
" # dfs = pd.concat([dfs, df_temp],ignore_index=True, axis= 0)# \n",
|
||||||
|
" # print(dfs.tail())\n",
|
||||||
|
" # dfs = pd.concat([df_temp, ignore_index=True)\n",
|
||||||
|
" dfs = pd.concat([dfs, df_temp], ignore_index=True)\n",
|
||||||
|
" \n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dfs.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dfs.tail()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"combined_df = dfs.sort_values(by = ['交易日', '最后修改时间', '最后修改毫秒'])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"combined_df.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"combined_df.tail()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"combined_df.insert(0,'统一代码', total_code)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"combined_df.tail()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"combined_df.to_csv(output_file, index=False)\n",
|
||||||
|
"print(\"合并完成,并已导出到%s文件。\"%(output_file))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### 以下为其他代码\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
" \n",
|
||||||
|
"try:\n",
|
||||||
|
" file_path = 'path/to/your/file.csv' # 替换为你的文件路径\n",
|
||||||
|
" df = pd.read_csv(file_path)\n",
|
||||||
|
"except FileNotFoundError:\n",
|
||||||
|
" print(f\"无法找到文件:{file_path}\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import os\n",
|
||||||
|
"for k in ['2021']:# , '2023'\n",
|
||||||
|
" for v in [ 'IH', 'IF', 'IC', 'IM', 'T', 'TF', 'TL', 'TS']: \n",
|
||||||
|
" print('当前年份为:%s,品种为:%s'%(k,v))\n",
|
||||||
|
" map_file = 'D:/data/mapping_ts_code_%s.csv'%(v) #v\n",
|
||||||
|
" file_path = 'F:/%s_tickdata/marketdatacsv'%(k) #csv文件绝对地址前缀\n",
|
||||||
|
" output_file = 'D:/%s888_up_%s.csv'%(v,k) # 合并后的输出文件名\n",
|
||||||
|
" total_code = '%s888'%(v)\n",
|
||||||
|
" sp_chars = ['csv%s'%(k)] #'csv2021', 'csv2022',需要查找的主力年份文件\n",
|
||||||
|
"\n",
|
||||||
|
" try:\n",
|
||||||
|
" df = pd.read_csv(map_file, index_col=0, encoding='utf', low_memory=False)\n",
|
||||||
|
" except FileNotFoundError:\n",
|
||||||
|
" raise ValueError(\"主力合约统计表文件不存在,请检查文件路径是否正确。\")\n",
|
||||||
|
" df['mapping_ts_code_new'] = df['mapping_ts_code'].apply(lambda x: x.split('.')[0])\n",
|
||||||
|
" df['temp_path']= file_path\n",
|
||||||
|
" df['final_path'] = df['temp_path'].astype(str) + df['trade_date'].astype(str) + '/' + df['mapping_ts_code_new'] + '.csv'\n",
|
||||||
|
" del df['mapping_ts_code_new'], df['temp_path']\n",
|
||||||
|
"\n",
|
||||||
|
" for sp_char in sp_chars:\n",
|
||||||
|
" csv_files = [sp_file for sp_file in df['final_path'] if sp_char in sp_file]\n",
|
||||||
|
" if csv_files:\n",
|
||||||
|
" print(csv_files[:5])\n",
|
||||||
|
" print(csv_files[-5:])\n",
|
||||||
|
" dfs = pd.DataFrame()\n",
|
||||||
|
" for path in csv_files:\n",
|
||||||
|
" try:\n",
|
||||||
|
" df_temp = pd.read_csv(path) \n",
|
||||||
|
" # print('读取%s成功'%(path))\n",
|
||||||
|
" except FileNotFoundError:\n",
|
||||||
|
" raise ValueError(\"%s文件不存在,请检查文件路径是否正确。\"%(path))\n",
|
||||||
|
" break\n",
|
||||||
|
" df_temp.columns = ['交易日','合约代码','最后修改时间','最后修改毫秒','最新价','数量','申买价一','申买量一','申卖价一','申卖量一','当日均价','成交金额','持仓量','涨停价','跌停价']\n",
|
||||||
|
" dfs = pd.concat([dfs, df_temp], ignore_index=True)\n",
|
||||||
|
" combined_df = dfs.sort_values(by = ['交易日', '最后修改时间', '最后修改毫秒'])\n",
|
||||||
|
" combined_df.insert(0,'统一代码', total_code)\n",
|
||||||
|
" combined_df.to_csv(output_file, index=False)\n",
|
||||||
|
" print(\"合并完成,并已导出到%s文件。\"%(output_file))\n",
|
||||||
|
" else:\n",
|
||||||
|
" print('品种%s在%s年无数据!'%(v,k))\n",
|
||||||
|
" continue\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.9"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
138
2.数据下载与处理/shelve合并数据脚本.ipynb
Normal file
138
2.数据下载与处理/shelve合并数据脚本.ipynb
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import shelve\n",
|
||||||
|
"\n",
|
||||||
|
"# 要合并的shelve数据库路径\n",
|
||||||
|
"shelve_files = ['D:/contract_data1.dat', 'D:/contract_data2.dat', 'D:/contract_data3.dat']\n",
|
||||||
|
"# 合并后的新数据库路径\n",
|
||||||
|
"new_shelve_file = 'D:/contract_data3.dat'\n",
|
||||||
|
"\n",
|
||||||
|
"# 创建一个新的shelve数据库来存储合并后的内容\n",
|
||||||
|
"with shelve.open(new_shelve_file, writeback=True) as new_db:\n",
|
||||||
|
" for shelve_file in shelve_files:\n",
|
||||||
|
" try:\n",
|
||||||
|
" with shelve.open(shelve_file) as db:\n",
|
||||||
|
" for key in db:\n",
|
||||||
|
" if key in new_db:\n",
|
||||||
|
" print(f\"Warning: Key {key} already exists in the new database. Overwriting.\")\n",
|
||||||
|
" new_db[key] = db[key]\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" print(f\"Error processing {shelve_file}: {e}\")\n",
|
||||||
|
"\n",
|
||||||
|
"print(f\"Databases merged into {new_shelve_file}\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import shelve\n",
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"# 要合并的shelve数据库路径\n",
|
||||||
|
"shelve_files = [r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview1', r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview2', r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview3']\n",
|
||||||
|
"# 合并后的新数据库路径\n",
|
||||||
|
"new_shelve_file = r'D:\\bar_overview'\n",
|
||||||
|
"\n",
|
||||||
|
"# 创建一个新的shelve数据库来存储合并后的内容\n",
|
||||||
|
"with shelve.open(new_shelve_file, writeback=True) as new_db:\n",
|
||||||
|
" for shelve_file in shelve_files:\n",
|
||||||
|
" # 检查文件是否存在\n",
|
||||||
|
" if not os.path.exists(shelve_file):\n",
|
||||||
|
" print(f\"错误:文件 {shelve_file} 不存在。\")\n",
|
||||||
|
" continue\n",
|
||||||
|
" try:\n",
|
||||||
|
" # 打开并读取shelve数据库\n",
|
||||||
|
" with shelve.open(shelve_file) as db:\n",
|
||||||
|
" for key in db:\n",
|
||||||
|
" if key in new_db:\n",
|
||||||
|
" print(f\"警告:键 {key} 已存在于新数据库中。将覆盖。\")\n",
|
||||||
|
" new_db[key] = db[key]\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" print(f\"处理文件 {shelve_file} 时出错:{e}\")\n",
|
||||||
|
" if 'db type could not be determined' in str(e):\n",
|
||||||
|
" print(f\"提示:文件 {shelve_file} 可能已损坏或不是一个shelve数据库。\")\n",
|
||||||
|
" continue\n",
|
||||||
|
"\n",
|
||||||
|
"print(f\"数据库已合并到 {new_shelve_file}\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import shelve\n",
|
||||||
|
"f_shelve = shelve.open(r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview1') # 创建一个文件句柄\n",
|
||||||
|
"# 使用for循环打印内容\n",
|
||||||
|
"for k,v in f_shelve.items():\n",
|
||||||
|
" print(k,v)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import shelve\n",
|
||||||
|
"\n",
|
||||||
|
"# 打开所有源 shelve 数据库\n",
|
||||||
|
"db1 = shelve.open(r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview1')\n",
|
||||||
|
"db2 = shelve.open(r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview2')\n",
|
||||||
|
"db3 = shelve.open(r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview3')\n",
|
||||||
|
"\n",
|
||||||
|
"# 创建一个新的目标 shelve 数据库\n",
|
||||||
|
"merged_db = shelve.open(r'D:\\bar_overview')\n",
|
||||||
|
"\n",
|
||||||
|
"# 将第一个数据库的所有条目添加到新的数据库中\n",
|
||||||
|
"for key in db1:\n",
|
||||||
|
" merged_db[key] = db1[key]\n",
|
||||||
|
"\n",
|
||||||
|
"# 将第二个数据库的所有条目添加到新的数据库中\n",
|
||||||
|
"for key in db2:\n",
|
||||||
|
" merged_db[key] = db2[key]\n",
|
||||||
|
"\n",
|
||||||
|
"# 将第三个数据库的所有条目添加到新的数据库中\n",
|
||||||
|
"for key in db3:\n",
|
||||||
|
" merged_db[key] = db3[key]\n",
|
||||||
|
"\n",
|
||||||
|
"# 关闭所有数据库\n",
|
||||||
|
"db1.close()\n",
|
||||||
|
"db2.close()\n",
|
||||||
|
"db3.close()\n",
|
||||||
|
"merged_db.close()\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.9"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
309
2.数据下载与处理/sqlite导出为csv文件.ipynb
Normal file
309
2.数据下载与处理/sqlite导出为csv文件.ipynb
Normal file
@@ -0,0 +1,309 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# import sqlite3\n",
|
||||||
|
"# import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"# # 连接到SQLite数据库\n",
|
||||||
|
"# conn = sqlite3.connect('database.db')\n",
|
||||||
|
"\n",
|
||||||
|
"# # 从数据库中读取表数据到DataFrame\n",
|
||||||
|
"# table_name = 'your_table_name' # 替换为实际表名\n",
|
||||||
|
"# query = f\"SELECT * FROM {table_name}\"\n",
|
||||||
|
"# df = pd.read_sql_query(query, conn)\n",
|
||||||
|
"\n",
|
||||||
|
"# 按照“本地代码”分组并导出为CSV文件\n",
|
||||||
|
"for local_code, group in df.groupby('本地代码'):\n",
|
||||||
|
" # 为每个“本地代码”生成一个CSV文件,文件名使用该代码值\n",
|
||||||
|
" csv_filename = f\"{local_code}.csv\"\n",
|
||||||
|
" group.to_csv(csv_filename, index=False, encoding='utf-8-sig')\n",
|
||||||
|
" print(f\"数据已导出到 {csv_filename}\")\n",
|
||||||
|
"\n",
|
||||||
|
"# 关闭数据库连接\n",
|
||||||
|
"conn.close()\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sqlite3\n",
|
||||||
|
"import pandas as pd"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# 连接到SQLite数据库\n",
|
||||||
|
"conn = sqlite3.connect(r'D:\\of_data\\database.db')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# 从数据库中读取表数据到DataFrame\n",
|
||||||
|
"table_name = 'dbbardata' # 替换为实际表名\n",
|
||||||
|
"query = f\"SELECT * FROM {table_name}\"\n",
|
||||||
|
"df = pd.read_sql_query(query, conn)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"del(df['id'])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 15,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"del group"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"数据已导出到 AP00_CZCE.csv\n",
|
||||||
|
"数据已导出到 APJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 CF00_CZCE.csv\n",
|
||||||
|
"数据已导出到 CFJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 CJ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 CJJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 CY00_CZCE.csv\n",
|
||||||
|
"数据已导出到 CYJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 FG00_CZCE.csv\n",
|
||||||
|
"数据已导出到 FGJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 IC00_CFFEX.csv\n",
|
||||||
|
"数据已导出到 ICJQ00_CFFEX.csv\n",
|
||||||
|
"数据已导出到 IF00_CFFEX.csv\n",
|
||||||
|
"数据已导出到 IFJQ00_CFFEX.csv\n",
|
||||||
|
"数据已导出到 IH00_CFFEX.csv\n",
|
||||||
|
"数据已导出到 IHJQ00_CFFEX.csv\n",
|
||||||
|
"数据已导出到 IM00_CFFEX.csv\n",
|
||||||
|
"数据已导出到 IMJQ00_CFFEX.csv\n",
|
||||||
|
"数据已导出到 JR00_CZCE.csv\n",
|
||||||
|
"数据已导出到 JRJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 LR00_CZCE.csv\n",
|
||||||
|
"数据已导出到 LRJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 MA00_CZCE.csv\n",
|
||||||
|
"数据已导出到 MAJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 OI00_CZCE.csv\n",
|
||||||
|
"数据已导出到 OIJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 PF00_CZCE.csv\n",
|
||||||
|
"数据已导出到 PFJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 PK00_CZCE.csv\n",
|
||||||
|
"数据已导出到 PKJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 PM00_CZCE.csv\n",
|
||||||
|
"数据已导出到 PMJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 PX00_CZCE.csv\n",
|
||||||
|
"数据已导出到 PXJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 RI00_CZCE.csv\n",
|
||||||
|
"数据已导出到 RIJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 RM00_CZCE.csv\n",
|
||||||
|
"数据已导出到 RMJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 RS00_CZCE.csv\n",
|
||||||
|
"数据已导出到 RSJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 SA00_CZCE.csv\n",
|
||||||
|
"数据已导出到 SAJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 SF00_CZCE.csv\n",
|
||||||
|
"数据已导出到 SFJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 SH00_CZCE.csv\n",
|
||||||
|
"数据已导出到 SHJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 SM00_CZCE.csv\n",
|
||||||
|
"数据已导出到 SMJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 SR00_CZCE.csv\n",
|
||||||
|
"数据已导出到 SRJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 T00_CFFEX.csv\n",
|
||||||
|
"数据已导出到 TA00_CZCE.csv\n",
|
||||||
|
"数据已导出到 TAJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 TF00_CFFEX.csv\n",
|
||||||
|
"数据已导出到 TFJQ00_CFFEX.csv\n",
|
||||||
|
"数据已导出到 TJQ00_CFFEX.csv\n",
|
||||||
|
"数据已导出到 TL00_CFFEX.csv\n",
|
||||||
|
"数据已导出到 TLJQ00_CFFEX.csv\n",
|
||||||
|
"数据已导出到 TS00_CFFEX.csv\n",
|
||||||
|
"数据已导出到 TSJQ00_CFFEX.csv\n",
|
||||||
|
"数据已导出到 UR00_CZCE.csv\n",
|
||||||
|
"数据已导出到 URJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 WH00_CZCE.csv\n",
|
||||||
|
"数据已导出到 WHJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 ZC00_CZCE.csv\n",
|
||||||
|
"数据已导出到 ZCJQ00_CZCE.csv\n",
|
||||||
|
"数据已导出到 a00_DCE.csv\n",
|
||||||
|
"数据已导出到 aJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 ag00_SHFE.csv\n",
|
||||||
|
"数据已导出到 agJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 al00_SHFE.csv\n",
|
||||||
|
"数据已导出到 alJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 ao00_SHFE.csv\n",
|
||||||
|
"数据已导出到 aoJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 au00_SHFE.csv\n",
|
||||||
|
"数据已导出到 auJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 b00_DCE.csv\n",
|
||||||
|
"数据已导出到 bJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 bb00_DCE.csv\n",
|
||||||
|
"数据已导出到 bbJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 bc00_INE.csv\n",
|
||||||
|
"数据已导出到 bcJQ00_INE.csv\n",
|
||||||
|
"数据已导出到 br00_SHFE.csv\n",
|
||||||
|
"数据已导出到 brJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 bu00_SHFE.csv\n",
|
||||||
|
"数据已导出到 buJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 c00_DCE.csv\n",
|
||||||
|
"数据已导出到 cJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 cs00_DCE.csv\n",
|
||||||
|
"数据已导出到 csJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 cu00_SHFE.csv\n",
|
||||||
|
"数据已导出到 cuJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 eb00_DCE.csv\n",
|
||||||
|
"数据已导出到 ebJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 ec00_INE.csv\n",
|
||||||
|
"数据已导出到 ecJQ00_INE.csv\n",
|
||||||
|
"数据已导出到 eg00_DCE.csv\n",
|
||||||
|
"数据已导出到 egJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 fb00_DCE.csv\n",
|
||||||
|
"数据已导出到 fbJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 fu00_SHFE.csv\n",
|
||||||
|
"数据已导出到 fuJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 hc00_SHFE.csv\n",
|
||||||
|
"数据已导出到 hcJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 i00_DCE.csv\n",
|
||||||
|
"数据已导出到 iJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 j00_DCE.csv\n",
|
||||||
|
"数据已导出到 jJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 jd00_DCE.csv\n",
|
||||||
|
"数据已导出到 jdJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 jm00_DCE.csv\n",
|
||||||
|
"数据已导出到 jmJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 l00_DCE.csv\n",
|
||||||
|
"数据已导出到 lJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 lc00_GFEX.csv\n",
|
||||||
|
"数据已导出到 lcJQ00_GFEX.csv\n",
|
||||||
|
"数据已导出到 lh00_DCE.csv\n",
|
||||||
|
"数据已导出到 lhJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 lu00_INE.csv\n",
|
||||||
|
"数据已导出到 luJQ00_INE.csv\n",
|
||||||
|
"数据已导出到 m00_DCE.csv\n",
|
||||||
|
"数据已导出到 mJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 ni00_SHFE.csv\n",
|
||||||
|
"数据已导出到 niJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 nr00_INE.csv\n",
|
||||||
|
"数据已导出到 nrJQ00_INE.csv\n",
|
||||||
|
"数据已导出到 p00_DCE.csv\n",
|
||||||
|
"数据已导出到 pJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 pb00_SHFE.csv\n",
|
||||||
|
"数据已导出到 pbJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 pg00_DCE.csv\n",
|
||||||
|
"数据已导出到 pgJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 pp00_DCE.csv\n",
|
||||||
|
"数据已导出到 ppJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 rb00_SHFE.csv\n",
|
||||||
|
"数据已导出到 rbJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 rr00_DCE.csv\n",
|
||||||
|
"数据已导出到 rrJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 ru00_SHFE.csv\n",
|
||||||
|
"数据已导出到 ruJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 sc00_INE.csv\n",
|
||||||
|
"数据已导出到 scJQ00_INE.csv\n",
|
||||||
|
"数据已导出到 si00_GFEX.csv\n",
|
||||||
|
"数据已导出到 siJQ00_GFEX.csv\n",
|
||||||
|
"数据已导出到 sn00_SHFE.csv\n",
|
||||||
|
"数据已导出到 snJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 sp00_SHFE.csv\n",
|
||||||
|
"数据已导出到 spJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 ss00_SHFE.csv\n",
|
||||||
|
"数据已导出到 ssJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 v00_DCE.csv\n",
|
||||||
|
"数据已导出到 vJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 wr00_SHFE.csv\n",
|
||||||
|
"数据已导出到 wrJQ00_SHFE.csv\n",
|
||||||
|
"数据已导出到 y00_DCE.csv\n",
|
||||||
|
"数据已导出到 yJQ00_DCE.csv\n",
|
||||||
|
"数据已导出到 zn00_SHFE.csv\n",
|
||||||
|
"数据已导出到 znJQ00_SHFE.csv\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"for local_code, group in df.groupby('symbol'):\n",
|
||||||
|
" # 为每个“本地代码”生成一个CSV文件,文件名使用该代码值\n",
|
||||||
|
" exchange = group.exchange.iloc[0]\n",
|
||||||
|
" csv_filename = f\"{local_code}_{exchange}.csv\"\n",
|
||||||
|
" group.to_csv(csv_filename, index=False, encoding='utf-8-sig')\n",
|
||||||
|
" print(f\"数据已导出到 {csv_filename}\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 17,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"conn.close()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.9"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
371
2.数据下载与处理/ssquant_download/ssquant_download.ipynb
Normal file
371
2.数据下载与处理/ssquant_download/ssquant_download.ipynb
Normal file
@@ -0,0 +1,371 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"from ssquant.SQDATA import TakeData\n",
|
||||||
|
"\n",
|
||||||
|
"#注意首先需要pip install ssquant\n",
|
||||||
|
"#否则链接不到数据库\n",
|
||||||
|
"#输入俱乐部的账号密码即可调用,注意保密。\n",
|
||||||
|
"#目前数据是2019年1月-至今\n",
|
||||||
|
"#每日下午收盘后3点30分录入当天数据。\n",
|
||||||
|
"#有任何疑问可以再群里提出,或者私信我(慕金龙)\n",
|
||||||
|
"#官网: quant789.com\n",
|
||||||
|
"#公众号:松鼠Quant\n",
|
||||||
|
"#客服微信: viquant01\n",
|
||||||
|
"\n",
|
||||||
|
"#只能调取分钟及以上数据,tick数据每月底更新到百度网盘下载\n",
|
||||||
|
"\n",
|
||||||
|
"'''\n",
|
||||||
|
"获取数据-\n",
|
||||||
|
"品种:symbol,不区分大小写\n",
|
||||||
|
"起始时间:start_date,\n",
|
||||||
|
"结束时间:end_date(包含当天),\n",
|
||||||
|
"周期kline_period:1M..5M..NM(分钟),1D(天),1W(周),1Y(月)\n",
|
||||||
|
"复权adjust_type:0(不复权)1(后复权)\n",
|
||||||
|
"注意:\n",
|
||||||
|
"1.请正确输入账号密码\n",
|
||||||
|
"2.不要挂代理访问数据库\n",
|
||||||
|
"3.暂时没有股指数据,下个月补齐。\n",
|
||||||
|
"'''\n",
|
||||||
|
" \n",
|
||||||
|
"# username='俱乐部账号' password='密码'\n",
|
||||||
|
"client = TakeData(username='77777@qq.com', password='7777')\n",
|
||||||
|
"data = client.get_data(\n",
|
||||||
|
" symbol='rb888',\n",
|
||||||
|
" start_date='2023-01-02',\n",
|
||||||
|
" end_date='2024-01-03',\n",
|
||||||
|
" kline_period='60M',\n",
|
||||||
|
" adjust_type=1\n",
|
||||||
|
")\n",
|
||||||
|
"print(data)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"'''\n",
|
||||||
|
"datetime:时间,\n",
|
||||||
|
"\n",
|
||||||
|
"symbol:品种,\n",
|
||||||
|
"\n",
|
||||||
|
"open:开盘价,\n",
|
||||||
|
"\n",
|
||||||
|
"high:最高价,\n",
|
||||||
|
"\n",
|
||||||
|
"low:最低价,\n",
|
||||||
|
"\n",
|
||||||
|
"close:收盘价,\n",
|
||||||
|
"\n",
|
||||||
|
"volume:成交量(单bar),\n",
|
||||||
|
"\n",
|
||||||
|
"amount:成交金额(单bar),\n",
|
||||||
|
"\n",
|
||||||
|
"openint:持仓量(单bar),\n",
|
||||||
|
"\n",
|
||||||
|
"cumulative_openint:累计持仓量,\n",
|
||||||
|
"\n",
|
||||||
|
"open_bidp , open_askp: K线第一个价格的买一价格和卖一价格\n",
|
||||||
|
"\n",
|
||||||
|
"close_bidp , close_askp: K线最后一个价格的买一价格和卖一价格\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" datetime symbol open high low close volume amount cumulative_openint openint open_bidp open_askp close_bidp close_askp\n",
|
||||||
|
"0 2023-01-03 10:00:00 rb2305 4081.0 4081.0 4016.0 4037.0 737537 29782187220 1883481 -48415 4081.0 4084.0 4037.0 4038.0\n",
|
||||||
|
"1 2023-01-03 11:00:00 rb2305 4038.0 4056.0 4037.0 4042.0 158548 6415696920 1887716 4235 4037.0 4038.0 4042.0 4044.0\n",
|
||||||
|
"2 2023-01-03 12:00:00 rb2305 4044.0 4054.0 4037.0 4051.0 67448 2728130300 1890125 2409 4043.0 4044.0 4050.0 4051.0\n",
|
||||||
|
"3 2023-01-03 14:00:00 rb2305 4055.0 4065.0 4045.0 4058.0 110181 4469698600 1895841 5723 4050.0 4051.0 4058.0 4059.0\n",
|
||||||
|
"4 2023-01-03 15:00:00 rb2305 4059.0 4074.0 4056.0 4063.0 167932 6824213940 1882723 -13125 4058.0 4059.0 4062.0 4063.0\n",
|
||||||
|
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
|
||||||
|
"1689 2024-01-03 12:00:00 rb2405 4055.0 4057.0 4044.0 4049.0 79745 3229361570 1597387 -6515 4054.0 4055.0 4049.0 4050.0\n",
|
||||||
|
"1690 2024-01-03 14:00:00 rb2405 4050.0 4056.0 4046.0 4049.0 55040 2229498750 1598566 1179 4050.0 4051.0 4049.0 4050.0\n",
|
||||||
|
"1691 2024-01-03 15:00:00 rb2405 4050.0 4064.0 4048.0 4055.0 148845 6038835190 1583796 -14770 4049.0 4050.0 4055.0 4056.0\n",
|
||||||
|
"1692 2024-01-03 22:00:00 rb2405 4054.0 4054.0 4040.0 4049.0 181753 7354584770 1582419 990 4053.0 4054.0 4048.0 4049.0\n",
|
||||||
|
"1693 2024-01-03 23:00:00 rb2405 4049.0 4057.0 4042.0 4049.0 104712 4240341050 1574287 -8132 4048.0 4049.0 4049.0 4050.0"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from ssquant.SQDATA import TakeData"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"out_path = r'D:/data'\n",
|
||||||
|
"symbol_name = 'rb888' #主力连续888 次主力合约777\n",
|
||||||
|
"time_period = '1M'\n",
|
||||||
|
"start_time = '2000-01-01'\n",
|
||||||
|
"end_time = '2019-01-31'\n",
|
||||||
|
"adjust_k = 'Faj' #Naj:Non adjust,Faj:Forward adjust,后复权\n",
|
||||||
|
"\n",
|
||||||
|
"if adjust_k == 'Naj':\n",
|
||||||
|
" adjust_tmp = 0\n",
|
||||||
|
"elif adjust_k == 'Faj':\n",
|
||||||
|
" adjust_tmp = 1"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"client = TakeData(username='240884432@qq.com', password='Zj123!@#')\n",
|
||||||
|
"data = client.get_data(\n",
|
||||||
|
" symbol=symbol_name,\n",
|
||||||
|
" start_date=start_time,\n",
|
||||||
|
" end_date=end_time,\n",
|
||||||
|
" kline_period=time_period,\n",
|
||||||
|
" adjust_type= adjust_tmp\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 49,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"头部文件为:--------------------\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>datetime</th>\n",
|
||||||
|
" <th>symbol</th>\n",
|
||||||
|
" <th>open</th>\n",
|
||||||
|
" <th>high</th>\n",
|
||||||
|
" <th>low</th>\n",
|
||||||
|
" <th>close</th>\n",
|
||||||
|
" <th>volume</th>\n",
|
||||||
|
" <th>amount</th>\n",
|
||||||
|
" <th>cumulative_openint</th>\n",
|
||||||
|
" <th>openint</th>\n",
|
||||||
|
" <th>open_bidp</th>\n",
|
||||||
|
" <th>open_askp</th>\n",
|
||||||
|
" <th>close_bidp</th>\n",
|
||||||
|
" <th>close_askp</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>2019-01-02 09:01:00</td>\n",
|
||||||
|
" <td>rb1905</td>\n",
|
||||||
|
" <td>3399</td>\n",
|
||||||
|
" <td>3405</td>\n",
|
||||||
|
" <td>3389</td>\n",
|
||||||
|
" <td>3401</td>\n",
|
||||||
|
" <td>69562</td>\n",
|
||||||
|
" <td>2362607160</td>\n",
|
||||||
|
" <td>2383714</td>\n",
|
||||||
|
" <td>16864</td>\n",
|
||||||
|
" <td>3399.0</td>\n",
|
||||||
|
" <td>3400.0</td>\n",
|
||||||
|
" <td>3400.0</td>\n",
|
||||||
|
" <td>3401.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>2019-01-02 09:02:00</td>\n",
|
||||||
|
" <td>rb1905</td>\n",
|
||||||
|
" <td>3401</td>\n",
|
||||||
|
" <td>3430</td>\n",
|
||||||
|
" <td>3401</td>\n",
|
||||||
|
" <td>3410</td>\n",
|
||||||
|
" <td>88696</td>\n",
|
||||||
|
" <td>3034283200</td>\n",
|
||||||
|
" <td>2399530</td>\n",
|
||||||
|
" <td>-12248</td>\n",
|
||||||
|
" <td>3401.0</td>\n",
|
||||||
|
" <td>3402.0</td>\n",
|
||||||
|
" <td>3409.0</td>\n",
|
||||||
|
" <td>3410.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>2019-01-02 09:03:00</td>\n",
|
||||||
|
" <td>rb1905</td>\n",
|
||||||
|
" <td>3409</td>\n",
|
||||||
|
" <td>3414</td>\n",
|
||||||
|
" <td>3409</td>\n",
|
||||||
|
" <td>3412</td>\n",
|
||||||
|
" <td>22828</td>\n",
|
||||||
|
" <td>778740580</td>\n",
|
||||||
|
" <td>2387356</td>\n",
|
||||||
|
" <td>1180</td>\n",
|
||||||
|
" <td>3409.0</td>\n",
|
||||||
|
" <td>3410.0</td>\n",
|
||||||
|
" <td>3411.0</td>\n",
|
||||||
|
" <td>3412.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>2019-01-02 09:04:00</td>\n",
|
||||||
|
" <td>rb1905</td>\n",
|
||||||
|
" <td>3412</td>\n",
|
||||||
|
" <td>3413</td>\n",
|
||||||
|
" <td>3403</td>\n",
|
||||||
|
" <td>3404</td>\n",
|
||||||
|
" <td>17378</td>\n",
|
||||||
|
" <td>592413220</td>\n",
|
||||||
|
" <td>2388158</td>\n",
|
||||||
|
" <td>54</td>\n",
|
||||||
|
" <td>3411.0</td>\n",
|
||||||
|
" <td>3412.0</td>\n",
|
||||||
|
" <td>3404.0</td>\n",
|
||||||
|
" <td>3405.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>2019-01-02 09:05:00</td>\n",
|
||||||
|
" <td>rb1905</td>\n",
|
||||||
|
" <td>3405</td>\n",
|
||||||
|
" <td>3409</td>\n",
|
||||||
|
" <td>3405</td>\n",
|
||||||
|
" <td>3405</td>\n",
|
||||||
|
" <td>15770</td>\n",
|
||||||
|
" <td>537276980</td>\n",
|
||||||
|
" <td>2388190</td>\n",
|
||||||
|
" <td>1674</td>\n",
|
||||||
|
" <td>3405.0</td>\n",
|
||||||
|
" <td>3406.0</td>\n",
|
||||||
|
" <td>3405.0</td>\n",
|
||||||
|
" <td>3406.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" datetime symbol open high low close volume amount \\\n",
|
||||||
|
"0 2019-01-02 09:01:00 rb1905 3399 3405 3389 3401 69562 2362607160 \n",
|
||||||
|
"1 2019-01-02 09:02:00 rb1905 3401 3430 3401 3410 88696 3034283200 \n",
|
||||||
|
"2 2019-01-02 09:03:00 rb1905 3409 3414 3409 3412 22828 778740580 \n",
|
||||||
|
"3 2019-01-02 09:04:00 rb1905 3412 3413 3403 3404 17378 592413220 \n",
|
||||||
|
"4 2019-01-02 09:05:00 rb1905 3405 3409 3405 3405 15770 537276980 \n",
|
||||||
|
"\n",
|
||||||
|
" cumulative_openint openint open_bidp open_askp close_bidp close_askp \n",
|
||||||
|
"0 2383714 16864 3399.0 3400.0 3400.0 3401.0 \n",
|
||||||
|
"1 2399530 -12248 3401.0 3402.0 3409.0 3410.0 \n",
|
||||||
|
"2 2387356 1180 3409.0 3410.0 3411.0 3412.0 \n",
|
||||||
|
"3 2388158 54 3411.0 3412.0 3404.0 3405.0 \n",
|
||||||
|
"4 2388190 1674 3405.0 3406.0 3405.0 3406.0 "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 49,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"print('头部文件为:--------------------')\n",
|
||||||
|
"data.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from datetime import datetime\n",
|
||||||
|
" \n",
|
||||||
|
"# 假设你有一个字符串,表示时间,格式为 'YYYY-MM-DD HH:MM:SS'\n",
|
||||||
|
"real_start_time = data.iloc[0,0]\n",
|
||||||
|
" \n",
|
||||||
|
"# 使用datetime.strptime将字符串转换为时间\n",
|
||||||
|
"time_obj = datetime.strptime(real_start_time, '%Y-%m-%d %H:%M:%S')\n",
|
||||||
|
" \n",
|
||||||
|
"# 获取年月日\n",
|
||||||
|
"year = time_obj.year\n",
|
||||||
|
"month = time_obj.month\n",
|
||||||
|
"day = time_obj.day\n",
|
||||||
|
" \n",
|
||||||
|
"print(f'年: {year}, 月: {month}, 日: {day}')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print('尾部文件为:--------------------')\n",
|
||||||
|
"data.tail()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import datetime\n",
|
||||||
|
"real_start_time = pd.to_datetime(data.iloc[0,0]).date().strftime('%Y-%m-%d')\n",
|
||||||
|
"real_end_time = pd.to_datetime(data.iloc[-1,0]).date().strftime('%Y-%m-%d')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data.to_csv('%s/%s_%s_%s(%s_%s).csv'%(out_path,symbol_name,time_period,adjust_k,real_start_time,real_end_time), index=False)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.9"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
79
2.数据下载与处理/ssquant_download/数据库使用示例.py
Normal file
79
2.数据下载与处理/ssquant_download/数据库使用示例.py
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
from ssquant.SQDATA import TakeData
|
||||||
|
|
||||||
|
#注意首先需要pip install ssquant
|
||||||
|
#否则链接不到数据库
|
||||||
|
#输入俱乐部的账号密码即可调用,注意保密。
|
||||||
|
#目前数据是2019年1月-至今
|
||||||
|
#每日下午收盘后3点30分录入当天数据。
|
||||||
|
#有任何疑问可以再群里提出,或者私信我(慕金龙)
|
||||||
|
#官网: quant789.com
|
||||||
|
#公众号:松鼠Quant
|
||||||
|
#客服微信: viquant01
|
||||||
|
|
||||||
|
#只能调取分钟及以上数据,tick数据每月底更新到百度网盘下载
|
||||||
|
|
||||||
|
'''
|
||||||
|
获取数据-
|
||||||
|
品种:symbol,不区分大小写
|
||||||
|
起始时间:start_date,
|
||||||
|
结束时间:end_date(包含当天),
|
||||||
|
周期kline_period:1M..5M..NM(分钟),1D(天),1W(周),1Y(月)
|
||||||
|
复权adjust_type:0(不复权)1(后复权)
|
||||||
|
注意:
|
||||||
|
1.请正确输入账号密码
|
||||||
|
2.不要挂代理访问数据库
|
||||||
|
3.暂时没有股指数据,下个月补齐。
|
||||||
|
'''
|
||||||
|
|
||||||
|
# username='俱乐部账号' password='密码'
|
||||||
|
client = TakeData(username='77777@qq.com', password='7777')
|
||||||
|
data = client.get_data(
|
||||||
|
symbol='rb888',
|
||||||
|
start_date='2023-01-02',
|
||||||
|
end_date='2024-01-03',
|
||||||
|
kline_period='60M',
|
||||||
|
adjust_type=1
|
||||||
|
)
|
||||||
|
print(data)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
datetime:时间,
|
||||||
|
|
||||||
|
symbol:品种,
|
||||||
|
|
||||||
|
open:开盘价,
|
||||||
|
|
||||||
|
high:最高价,
|
||||||
|
|
||||||
|
low:最低价,
|
||||||
|
|
||||||
|
close:收盘价,
|
||||||
|
|
||||||
|
volume:成交量(单bar),
|
||||||
|
|
||||||
|
amount:成交金额(单bar),
|
||||||
|
|
||||||
|
openint:持仓量(单bar),
|
||||||
|
|
||||||
|
cumulative_openint:累计持仓量,
|
||||||
|
|
||||||
|
open_bidp , open_askp: K线第一个价格的买一价格和卖一价格
|
||||||
|
|
||||||
|
close_bidp , close_askp: K线最后一个价格的买一价格和卖一价格
|
||||||
|
|
||||||
|
|
||||||
|
datetime symbol open high low close volume amount cumulative_openint openint open_bidp open_askp close_bidp close_askp
|
||||||
|
0 2023-01-03 10:00:00 rb2305 4081.0 4081.0 4016.0 4037.0 737537 29782187220 1883481 -48415 4081.0 4084.0 4037.0 4038.0
|
||||||
|
1 2023-01-03 11:00:00 rb2305 4038.0 4056.0 4037.0 4042.0 158548 6415696920 1887716 4235 4037.0 4038.0 4042.0 4044.0
|
||||||
|
2 2023-01-03 12:00:00 rb2305 4044.0 4054.0 4037.0 4051.0 67448 2728130300 1890125 2409 4043.0 4044.0 4050.0 4051.0
|
||||||
|
3 2023-01-03 14:00:00 rb2305 4055.0 4065.0 4045.0 4058.0 110181 4469698600 1895841 5723 4050.0 4051.0 4058.0 4059.0
|
||||||
|
4 2023-01-03 15:00:00 rb2305 4059.0 4074.0 4056.0 4063.0 167932 6824213940 1882723 -13125 4058.0 4059.0 4062.0 4063.0
|
||||||
|
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
|
||||||
|
1689 2024-01-03 12:00:00 rb2405 4055.0 4057.0 4044.0 4049.0 79745 3229361570 1597387 -6515 4054.0 4055.0 4049.0 4050.0
|
||||||
|
1690 2024-01-03 14:00:00 rb2405 4050.0 4056.0 4046.0 4049.0 55040 2229498750 1598566 1179 4050.0 4051.0 4049.0 4050.0
|
||||||
|
1691 2024-01-03 15:00:00 rb2405 4050.0 4064.0 4048.0 4055.0 148845 6038835190 1583796 -14770 4049.0 4050.0 4055.0 4056.0
|
||||||
|
1692 2024-01-03 22:00:00 rb2405 4054.0 4054.0 4040.0 4049.0 181753 7354584770 1582419 990 4053.0 4054.0 4048.0 4049.0
|
||||||
|
1693 2024-01-03 23:00:00 rb2405 4049.0 4057.0 4042.0 4049.0 104712 4240341050 1574287 -8132 4048.0 4049.0 4049.0 4050.0
|
||||||
|
'''
|
||||||
65
2.数据下载与处理/ssquant_download/数据库读取_生产K线图.py
Normal file
65
2.数据下载与处理/ssquant_download/数据库读取_生产K线图.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
from ssquant.SQDATA import TakeData
|
||||||
|
from pyecharts import options as opts
|
||||||
|
from pyecharts.charts import Kline, Bar, Grid
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def plotK(data):
|
||||||
|
# 示例数据(您需要替换为您的实际数据)
|
||||||
|
kline_data = data[['open', 'close', 'low', 'high']].values.tolist()
|
||||||
|
dates = data.index.strftime('%Y-%m-%d %H:%M:%S').tolist()
|
||||||
|
symbol_data = data['symbol'].values.tolist()
|
||||||
|
# 标记 symbol 变化的位置
|
||||||
|
markline_data = []
|
||||||
|
for i in range(1, len(symbol_data)):
|
||||||
|
if symbol_data[i] != symbol_data[i-1]:
|
||||||
|
# 当前 symbol 与前一个不同时,添加红色竖线
|
||||||
|
markline_data.append(opts.MarkLineItem(x=dates[i], name=f'前一个合约{symbol_data[i-1]},当前合约{symbol_data[i]}'))
|
||||||
|
|
||||||
|
|
||||||
|
# 数据缩放组件配置
|
||||||
|
datazoom_slider = opts.DataZoomOpts(type_="slider", xaxis_index=[0, 1, 2, 3,4], range_start=50, range_end=100)
|
||||||
|
datazoom_inside = opts.DataZoomOpts(type_="inside", xaxis_index=[0, 1, 2, 3,4])
|
||||||
|
|
||||||
|
# 创建 K 线图
|
||||||
|
kline = (
|
||||||
|
Kline(init_opts=opts.InitOpts(width="100%", height="900px"))
|
||||||
|
.add_xaxis(dates)
|
||||||
|
.add_yaxis('K线图表', kline_data,markline_opts=opts.MarkLineOpts(data=markline_data, symbol='none', linestyle_opts=opts.LineStyleOpts(color="red")))#"ssss",
|
||||||
|
.set_global_opts(
|
||||||
|
datazoom_opts=[datazoom_slider, datazoom_inside],
|
||||||
|
toolbox_opts=opts.ToolboxOpts(is_show=True, pos_top="0%", pos_right="80%"),
|
||||||
|
legend_opts=opts.LegendOpts(pos_left='40%'), # 调整图例位置到底部
|
||||||
|
)
|
||||||
|
)
|
||||||
|
kline.render('K线图.html')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
获取数据-
|
||||||
|
品种:symbol,
|
||||||
|
起始时间:start_date,
|
||||||
|
结束时间:end_date(包含当天),
|
||||||
|
周期kline_period:1M..5M..NM(分钟),1D(天),1W(周),1Y(月)
|
||||||
|
复权adjust_type:0(不复权)1(后复权)
|
||||||
|
'''
|
||||||
|
|
||||||
|
# 请在下方输入你的俱乐部账号密码,username='俱乐部账号' password='密码'
|
||||||
|
|
||||||
|
client = TakeData(username='1234@qq.com', password='123')
|
||||||
|
data = client.get_data(
|
||||||
|
symbol='rb888',
|
||||||
|
start_date='2023-12-28',
|
||||||
|
end_date='2024-01-17',
|
||||||
|
kline_period='60M',
|
||||||
|
adjust_type=1
|
||||||
|
)
|
||||||
|
data.set_index("datetime", inplace=True)
|
||||||
|
data.index = pd.to_datetime(data.index)
|
||||||
|
print(data)
|
||||||
|
#生产K线图表到脚本同目录下
|
||||||
|
plotK(data)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
249
2.数据下载与处理/ssquant_download/松鼠数据下载脚本.ipynb
Normal file
249
2.数据下载与处理/ssquant_download/松鼠数据下载脚本.ipynb
Normal file
@@ -0,0 +1,249 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "1a846b12",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"from ssquant.SQDATA import TakeData\n",
|
||||||
|
"\n",
|
||||||
|
"#注意首先需要pip install ssquant\n",
|
||||||
|
"#否则链接不到数据库\n",
|
||||||
|
"#输入俱乐部的账号密码即可调用,注意保密。\n",
|
||||||
|
"#目前数据是2019年1月-至今\n",
|
||||||
|
"#每日下午收盘后3点30分录入当天数据。\n",
|
||||||
|
"#有任何疑问可以再群里提出,或者私信我(慕金龙)\n",
|
||||||
|
"#官网: quant789.com\n",
|
||||||
|
"#公众号:松鼠Quant\n",
|
||||||
|
"#客服微信: viquant01\n",
|
||||||
|
"\n",
|
||||||
|
"#只能调取分钟及以上数据,tick数据每月底更新到百度网盘下载\n",
|
||||||
|
"\n",
|
||||||
|
"'''\n",
|
||||||
|
"获取数据-\n",
|
||||||
|
"品种:symbol,不区分大小写\n",
|
||||||
|
"起始时间:start_date,\n",
|
||||||
|
"结束时间:end_date(包含当天),\n",
|
||||||
|
"周期kline_period:1M..5M..NM(分钟),1D(天),1W(周),1Y(月)\n",
|
||||||
|
"复权adjust_type:0(不复权)1(后复权)\n",
|
||||||
|
"注意:\n",
|
||||||
|
"1.请正确输入账号密码\n",
|
||||||
|
"2.不要挂代理访问数据库\n",
|
||||||
|
"3.暂时没有股指数据,下个月补齐。\n",
|
||||||
|
"'''\n",
|
||||||
|
" \n",
|
||||||
|
"# username='俱乐部账号' password='密码'\n",
|
||||||
|
"client = TakeData(username='240884432@qq.com', password='7777')\n",
|
||||||
|
"data = client.get_data(\n",
|
||||||
|
" symbol='rb888',\n",
|
||||||
|
" start_date='2023-01-02',\n",
|
||||||
|
" end_date='2024-01-03',\n",
|
||||||
|
" kline_period='60M',\n",
|
||||||
|
" adjust_type=1\n",
|
||||||
|
")\n",
|
||||||
|
"print(data)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"'''\n",
|
||||||
|
"datetime:时间,\n",
|
||||||
|
"\n",
|
||||||
|
"symbol:品种,\n",
|
||||||
|
"\n",
|
||||||
|
"open:开盘价,\n",
|
||||||
|
"\n",
|
||||||
|
"high:最高价,\n",
|
||||||
|
"\n",
|
||||||
|
"low:最低价,\n",
|
||||||
|
"\n",
|
||||||
|
"close:收盘价,\n",
|
||||||
|
"\n",
|
||||||
|
"volume:成交量(单bar),\n",
|
||||||
|
"\n",
|
||||||
|
"amount:成交金额(单bar),\n",
|
||||||
|
"\n",
|
||||||
|
"openint:持仓量(单bar),\n",
|
||||||
|
"\n",
|
||||||
|
"cumulative_openint:累计持仓量,\n",
|
||||||
|
"\n",
|
||||||
|
"open_bidp , open_askp: K线第一个价格的买一价格和卖一价格\n",
|
||||||
|
"\n",
|
||||||
|
"close_bidp , close_askp: K线最后一个价格的买一价格和卖一价格\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" datetime symbol open high low close volume amount cumulative_openint openint open_bidp open_askp close_bidp close_askp\n",
|
||||||
|
"0 2023-01-03 10:00:00 rb2305 4081.0 4081.0 4016.0 4037.0 737537 29782187220 1883481 -48415 4081.0 4084.0 4037.0 4038.0\n",
|
||||||
|
"1 2023-01-03 11:00:00 rb2305 4038.0 4056.0 4037.0 4042.0 158548 6415696920 1887716 4235 4037.0 4038.0 4042.0 4044.0\n",
|
||||||
|
"2 2023-01-03 12:00:00 rb2305 4044.0 4054.0 4037.0 4051.0 67448 2728130300 1890125 2409 4043.0 4044.0 4050.0 4051.0\n",
|
||||||
|
"3 2023-01-03 14:00:00 rb2305 4055.0 4065.0 4045.0 4058.0 110181 4469698600 1895841 5723 4050.0 4051.0 4058.0 4059.0\n",
|
||||||
|
"4 2023-01-03 15:00:00 rb2305 4059.0 4074.0 4056.0 4063.0 167932 6824213940 1882723 -13125 4058.0 4059.0 4062.0 4063.0\n",
|
||||||
|
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
|
||||||
|
"1689 2024-01-03 12:00:00 rb2405 4055.0 4057.0 4044.0 4049.0 79745 3229361570 1597387 -6515 4054.0 4055.0 4049.0 4050.0\n",
|
||||||
|
"1690 2024-01-03 14:00:00 rb2405 4050.0 4056.0 4046.0 4049.0 55040 2229498750 1598566 1179 4050.0 4051.0 4049.0 4050.0\n",
|
||||||
|
"1691 2024-01-03 15:00:00 rb2405 4050.0 4064.0 4048.0 4055.0 148845 6038835190 1583796 -14770 4049.0 4050.0 4055.0 4056.0\n",
|
||||||
|
"1692 2024-01-03 22:00:00 rb2405 4054.0 4054.0 4040.0 4049.0 181753 7354584770 1582419 990 4053.0 4054.0 4048.0 4049.0\n",
|
||||||
|
"1693 2024-01-03 23:00:00 rb2405 4049.0 4057.0 4042.0 4049.0 104712 4240341050 1574287 -8132 4048.0 4049.0 4049.0 4050.0\n",
|
||||||
|
"'''"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"id": "65b4b7aa",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from ssquant.SQDATA import TakeData\n",
|
||||||
|
"import pandas as pd"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"id": "edd4f1e5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" datetime symbol open high low close volume \\\n",
|
||||||
|
"0 2023-01-03 10:00:00 rb2305 4081 4081 4016 4037 737537 \n",
|
||||||
|
"1 2023-01-03 11:00:00 rb2305 4038 4056 4037 4042 158548 \n",
|
||||||
|
"2 2023-01-03 12:00:00 rb2305 4044 4054 4037 4051 67448 \n",
|
||||||
|
"3 2023-01-03 14:00:00 rb2305 4055 4065 4045 4058 110181 \n",
|
||||||
|
"4 2023-01-03 15:00:00 rb2305 4059 4074 4056 4063 167932 \n",
|
||||||
|
".. ... ... ... ... ... ... ... \n",
|
||||||
|
"112 2023-02-01 12:00:00 rb2305 4126 4129 4105 4107 193291 \n",
|
||||||
|
"113 2023-02-01 14:00:00 rb2305 4108 4117 4100 4109 137182 \n",
|
||||||
|
"114 2023-02-01 15:00:00 rb2305 4109 4114 4075 4084 378930 \n",
|
||||||
|
"115 2023-02-01 22:00:00 rb2305 4092 4104 4087 4103 207519 \n",
|
||||||
|
"116 2023-02-01 23:00:00 rb2305 4102 4109 4075 4098 189724 \n",
|
||||||
|
"\n",
|
||||||
|
" amount cumulative_openint openint open_bidp open_askp \\\n",
|
||||||
|
"0 29782187220 1883481 -48415 4081 4084 \n",
|
||||||
|
"1 6415696920 1887716 4235 4037 4038 \n",
|
||||||
|
"2 2728130300 1890125 2409 4043 4044 \n",
|
||||||
|
"3 4469698600 1895841 5723 4050 4051 \n",
|
||||||
|
"4 6824213940 1882723 -13125 4058 4059 \n",
|
||||||
|
".. ... ... ... ... ... \n",
|
||||||
|
"112 7954826320 1984919 3490 4125 4126 \n",
|
||||||
|
"113 5634834380 1998312 13394 4108 4109 \n",
|
||||||
|
"114 15503896450 1994915 -3398 4109 4110 \n",
|
||||||
|
"115 8500232870 1988628 -5587 4091 4092 \n",
|
||||||
|
"116 7757206650 1973544 -15099 4101 4102 \n",
|
||||||
|
"\n",
|
||||||
|
" close_bidp close_askp \n",
|
||||||
|
"0 4037 4038 \n",
|
||||||
|
"1 4042 4044 \n",
|
||||||
|
"2 4050 4051 \n",
|
||||||
|
"3 4058 4059 \n",
|
||||||
|
"4 4062 4063 \n",
|
||||||
|
".. ... ... \n",
|
||||||
|
"112 4106 4107 \n",
|
||||||
|
"113 4108 4109 \n",
|
||||||
|
"114 4084 4085 \n",
|
||||||
|
"115 4102 4103 \n",
|
||||||
|
"116 4098 4099 \n",
|
||||||
|
"\n",
|
||||||
|
"[117 rows x 14 columns]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"client = TakeData(username='240884432@qq.com', password='Zj123!@#')\n",
|
||||||
|
"data = client.get_data(\n",
|
||||||
|
" symbol='rb888',\n",
|
||||||
|
" start_date='2023-01-01',\n",
|
||||||
|
" end_date='2023-02-01',\n",
|
||||||
|
" kline_period='60M',\n",
|
||||||
|
" adjust_type=1\n",
|
||||||
|
")\n",
|
||||||
|
"print(data)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"id": "25c70609",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" symbol open high low close volume amount \\\n",
|
||||||
|
"datetime \n",
|
||||||
|
"2023-01-03 10:00:00 rb2305 4081 4081 4016 4037 737537 29782187220 \n",
|
||||||
|
"2023-01-03 11:00:00 rb2305 4038 4056 4037 4042 158548 6415696920 \n",
|
||||||
|
"2023-01-03 12:00:00 rb2305 4044 4054 4037 4051 67448 2728130300 \n",
|
||||||
|
"2023-01-03 14:00:00 rb2305 4055 4065 4045 4058 110181 4469698600 \n",
|
||||||
|
"2023-01-03 15:00:00 rb2305 4059 4074 4056 4063 167932 6824213940 \n",
|
||||||
|
"... ... ... ... ... ... ... ... \n",
|
||||||
|
"2023-02-01 12:00:00 rb2305 4126 4129 4105 4107 193291 7954826320 \n",
|
||||||
|
"2023-02-01 14:00:00 rb2305 4108 4117 4100 4109 137182 5634834380 \n",
|
||||||
|
"2023-02-01 15:00:00 rb2305 4109 4114 4075 4084 378930 15503896450 \n",
|
||||||
|
"2023-02-01 22:00:00 rb2305 4092 4104 4087 4103 207519 8500232870 \n",
|
||||||
|
"2023-02-01 23:00:00 rb2305 4102 4109 4075 4098 189724 7757206650 \n",
|
||||||
|
"\n",
|
||||||
|
" cumulative_openint openint open_bidp open_askp \\\n",
|
||||||
|
"datetime \n",
|
||||||
|
"2023-01-03 10:00:00 1883481 -48415 4081 4084 \n",
|
||||||
|
"2023-01-03 11:00:00 1887716 4235 4037 4038 \n",
|
||||||
|
"2023-01-03 12:00:00 1890125 2409 4043 4044 \n",
|
||||||
|
"2023-01-03 14:00:00 1895841 5723 4050 4051 \n",
|
||||||
|
"2023-01-03 15:00:00 1882723 -13125 4058 4059 \n",
|
||||||
|
"... ... ... ... ... \n",
|
||||||
|
"2023-02-01 12:00:00 1984919 3490 4125 4126 \n",
|
||||||
|
"2023-02-01 14:00:00 1998312 13394 4108 4109 \n",
|
||||||
|
"2023-02-01 15:00:00 1994915 -3398 4109 4110 \n",
|
||||||
|
"2023-02-01 22:00:00 1988628 -5587 4091 4092 \n",
|
||||||
|
"2023-02-01 23:00:00 1973544 -15099 4101 4102 \n",
|
||||||
|
"\n",
|
||||||
|
" close_bidp close_askp \n",
|
||||||
|
"datetime \n",
|
||||||
|
"2023-01-03 10:00:00 4037 4038 \n",
|
||||||
|
"2023-01-03 11:00:00 4042 4044 \n",
|
||||||
|
"2023-01-03 12:00:00 4050 4051 \n",
|
||||||
|
"2023-01-03 14:00:00 4058 4059 \n",
|
||||||
|
"2023-01-03 15:00:00 4062 4063 \n",
|
||||||
|
"... ... ... \n",
|
||||||
|
"2023-02-01 12:00:00 4106 4107 \n",
|
||||||
|
"2023-02-01 14:00:00 4108 4109 \n",
|
||||||
|
"2023-02-01 15:00:00 4084 4085 \n",
|
||||||
|
"2023-02-01 22:00:00 4102 4103 \n",
|
||||||
|
"2023-02-01 23:00:00 4098 4099 \n",
|
||||||
|
"\n",
|
||||||
|
"[117 rows x 13 columns]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"data.set_index(\"datetime\", inplace=True)\n",
|
||||||
|
"data.index = pd.to_datetime(data.index)\n",
|
||||||
|
"print(data)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.9"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
2
2.数据下载与处理/ssquant_download/说明.txt
Normal file
2
2.数据下载与处理/ssquant_download/说明.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
1.使用数据库示例.py调取数据,每日下午3点50分后更新当日数据。
|
||||||
|
2.次月初更新上个月所有的tick数据和1m数据
|
||||||
610
2.数据下载与处理/tick数据转分钟数据脚本/tick_to_min.ipynb
Normal file
610
2.数据下载与处理/tick数据转分钟数据脚本/tick_to_min.ipynb
Normal file
@@ -0,0 +1,610 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 13,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"file_path_888 = r\"E:\\data\\data_rs_merged\\中金所\\IM888\\IM888_rs_2023.csv\"\n",
|
||||||
|
"df_888 = pd.read_csv(file_path_888, encoding='utf-8')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 14,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>main_contract</th>\n",
|
||||||
|
" <th>symbol</th>\n",
|
||||||
|
" <th>datetime</th>\n",
|
||||||
|
" <th>lastprice</th>\n",
|
||||||
|
" <th>volume</th>\n",
|
||||||
|
" <th>bid_p</th>\n",
|
||||||
|
" <th>ask_p</th>\n",
|
||||||
|
" <th>bid_v</th>\n",
|
||||||
|
" <th>ask_v</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>IM888</td>\n",
|
||||||
|
" <td>IM2301</td>\n",
|
||||||
|
" <td>2023-01-03 09:30:00.200</td>\n",
|
||||||
|
" <td>6280.0</td>\n",
|
||||||
|
" <td>46</td>\n",
|
||||||
|
" <td>6276.0</td>\n",
|
||||||
|
" <td>6277.0</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>IM888</td>\n",
|
||||||
|
" <td>IM2301</td>\n",
|
||||||
|
" <td>2023-01-03 09:30:00.700</td>\n",
|
||||||
|
" <td>6277.0</td>\n",
|
||||||
|
" <td>61</td>\n",
|
||||||
|
" <td>6278.0</td>\n",
|
||||||
|
" <td>6278.8</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>16</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>IM888</td>\n",
|
||||||
|
" <td>IM2301</td>\n",
|
||||||
|
" <td>2023-01-03 09:30:01.200</td>\n",
|
||||||
|
" <td>6277.2</td>\n",
|
||||||
|
" <td>81</td>\n",
|
||||||
|
" <td>6277.2</td>\n",
|
||||||
|
" <td>6278.8</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>5</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>IM888</td>\n",
|
||||||
|
" <td>IM2301</td>\n",
|
||||||
|
" <td>2023-01-03 09:30:01.700</td>\n",
|
||||||
|
" <td>6277.8</td>\n",
|
||||||
|
" <td>90</td>\n",
|
||||||
|
" <td>6277.8</td>\n",
|
||||||
|
" <td>6278.6</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>IM888</td>\n",
|
||||||
|
" <td>IM2301</td>\n",
|
||||||
|
" <td>2023-01-03 09:30:02.200</td>\n",
|
||||||
|
" <td>6278.8</td>\n",
|
||||||
|
" <td>112</td>\n",
|
||||||
|
" <td>6278.8</td>\n",
|
||||||
|
" <td>6280.0</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>7</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" main_contract symbol datetime lastprice volume bid_p \\\n",
|
||||||
|
"0 IM888 IM2301 2023-01-03 09:30:00.200 6280.0 46 6276.0 \n",
|
||||||
|
"1 IM888 IM2301 2023-01-03 09:30:00.700 6277.0 61 6278.0 \n",
|
||||||
|
"2 IM888 IM2301 2023-01-03 09:30:01.200 6277.2 81 6277.2 \n",
|
||||||
|
"3 IM888 IM2301 2023-01-03 09:30:01.700 6277.8 90 6277.8 \n",
|
||||||
|
"4 IM888 IM2301 2023-01-03 09:30:02.200 6278.8 112 6278.8 \n",
|
||||||
|
"\n",
|
||||||
|
" ask_p bid_v ask_v \n",
|
||||||
|
"0 6277.0 1 3 \n",
|
||||||
|
"1 6278.8 1 16 \n",
|
||||||
|
"2 6278.8 1 5 \n",
|
||||||
|
"3 6278.6 3 4 \n",
|
||||||
|
"4 6280.0 1 7 "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 14,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df_888.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 15,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# 重命名列以便处理\n",
|
||||||
|
"# df_888.rename(columns={'时间': 'datetime', '最新': 'price', '成交量': 'volume'}, inplace=True)\n",
|
||||||
|
"df_888.rename(columns={'datetime': 'datetime', 'lastprice': 'price', 'volume': 'volume'}, inplace=True)\n",
|
||||||
|
"\n",
|
||||||
|
"# 确保datetime列是datetime类型\n",
|
||||||
|
"df_888['datetime'] = pd.to_datetime(df_888['datetime'])\n",
|
||||||
|
"\n",
|
||||||
|
"# 设置datetime列为索引\n",
|
||||||
|
"df_888.set_index('datetime', inplace=True)\n",
|
||||||
|
"\n",
|
||||||
|
"# 使用resample方法将数据重新采样为1分钟数据\n",
|
||||||
|
"df_resampled = df_888.resample('1T').agg({\n",
|
||||||
|
" 'price': ['first', 'max', 'min', 'last'],\n",
|
||||||
|
" 'volume': 'sum'\n",
|
||||||
|
"})"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead tr th {\n",
|
||||||
|
" text-align: left;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead tr:last-of-type th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th colspan=\"4\" halign=\"left\">price</th>\n",
|
||||||
|
" <th>volume</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>first</th>\n",
|
||||||
|
" <th>max</th>\n",
|
||||||
|
" <th>min</th>\n",
|
||||||
|
" <th>last</th>\n",
|
||||||
|
" <th>sum</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>datetime</th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2023-01-03 09:30:00</th>\n",
|
||||||
|
" <td>6280.0</td>\n",
|
||||||
|
" <td>6306.4</td>\n",
|
||||||
|
" <td>6277.0</td>\n",
|
||||||
|
" <td>6302.0</td>\n",
|
||||||
|
" <td>66894</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2023-01-03 09:31:00</th>\n",
|
||||||
|
" <td>6302.0</td>\n",
|
||||||
|
" <td>6320.0</td>\n",
|
||||||
|
" <td>6302.0</td>\n",
|
||||||
|
" <td>6318.8</td>\n",
|
||||||
|
" <td>172512</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2023-01-03 09:32:00</th>\n",
|
||||||
|
" <td>6319.8</td>\n",
|
||||||
|
" <td>6328.0</td>\n",
|
||||||
|
" <td>6314.8</td>\n",
|
||||||
|
" <td>6314.8</td>\n",
|
||||||
|
" <td>238716</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2023-01-03 09:33:00</th>\n",
|
||||||
|
" <td>6313.0</td>\n",
|
||||||
|
" <td>6325.0</td>\n",
|
||||||
|
" <td>6310.4</td>\n",
|
||||||
|
" <td>6312.4</td>\n",
|
||||||
|
" <td>297675</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2023-01-03 09:34:00</th>\n",
|
||||||
|
" <td>6311.0</td>\n",
|
||||||
|
" <td>6323.2</td>\n",
|
||||||
|
" <td>6311.0</td>\n",
|
||||||
|
" <td>6319.4</td>\n",
|
||||||
|
" <td>352184</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" price volume\n",
|
||||||
|
" first max min last sum\n",
|
||||||
|
"datetime \n",
|
||||||
|
"2023-01-03 09:30:00 6280.0 6306.4 6277.0 6302.0 66894\n",
|
||||||
|
"2023-01-03 09:31:00 6302.0 6320.0 6302.0 6318.8 172512\n",
|
||||||
|
"2023-01-03 09:32:00 6319.8 6328.0 6314.8 6314.8 238716\n",
|
||||||
|
"2023-01-03 09:33:00 6313.0 6325.0 6310.4 6312.4 297675\n",
|
||||||
|
"2023-01-03 09:34:00 6311.0 6323.2 6311.0 6319.4 352184"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df_resampled.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 17,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'IM888'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 17,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df_888['main_contract'][1]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 18,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# df_resampled['symbol'] = df_888['main_contract'][1]\n",
|
||||||
|
"df_resampled.insert(0, 'symbol', df_888['main_contract'][1])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 19,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead tr th {\n",
|
||||||
|
" text-align: left;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead tr:last-of-type th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>symbol</th>\n",
|
||||||
|
" <th colspan=\"4\" halign=\"left\">price</th>\n",
|
||||||
|
" <th>volume</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>first</th>\n",
|
||||||
|
" <th>max</th>\n",
|
||||||
|
" <th>min</th>\n",
|
||||||
|
" <th>last</th>\n",
|
||||||
|
" <th>sum</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>datetime</th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2023-01-03 09:30:00</th>\n",
|
||||||
|
" <td>IM888</td>\n",
|
||||||
|
" <td>6280.0</td>\n",
|
||||||
|
" <td>6306.4</td>\n",
|
||||||
|
" <td>6277.0</td>\n",
|
||||||
|
" <td>6302.0</td>\n",
|
||||||
|
" <td>66894</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2023-01-03 09:31:00</th>\n",
|
||||||
|
" <td>IM888</td>\n",
|
||||||
|
" <td>6302.0</td>\n",
|
||||||
|
" <td>6320.0</td>\n",
|
||||||
|
" <td>6302.0</td>\n",
|
||||||
|
" <td>6318.8</td>\n",
|
||||||
|
" <td>172512</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2023-01-03 09:32:00</th>\n",
|
||||||
|
" <td>IM888</td>\n",
|
||||||
|
" <td>6319.8</td>\n",
|
||||||
|
" <td>6328.0</td>\n",
|
||||||
|
" <td>6314.8</td>\n",
|
||||||
|
" <td>6314.8</td>\n",
|
||||||
|
" <td>238716</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2023-01-03 09:33:00</th>\n",
|
||||||
|
" <td>IM888</td>\n",
|
||||||
|
" <td>6313.0</td>\n",
|
||||||
|
" <td>6325.0</td>\n",
|
||||||
|
" <td>6310.4</td>\n",
|
||||||
|
" <td>6312.4</td>\n",
|
||||||
|
" <td>297675</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2023-01-03 09:34:00</th>\n",
|
||||||
|
" <td>IM888</td>\n",
|
||||||
|
" <td>6311.0</td>\n",
|
||||||
|
" <td>6323.2</td>\n",
|
||||||
|
" <td>6311.0</td>\n",
|
||||||
|
" <td>6319.4</td>\n",
|
||||||
|
" <td>352184</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" symbol price volume\n",
|
||||||
|
" first max min last sum\n",
|
||||||
|
"datetime \n",
|
||||||
|
"2023-01-03 09:30:00 IM888 6280.0 6306.4 6277.0 6302.0 66894\n",
|
||||||
|
"2023-01-03 09:31:00 IM888 6302.0 6320.0 6302.0 6318.8 172512\n",
|
||||||
|
"2023-01-03 09:32:00 IM888 6319.8 6328.0 6314.8 6314.8 238716\n",
|
||||||
|
"2023-01-03 09:33:00 IM888 6313.0 6325.0 6310.4 6312.4 297675\n",
|
||||||
|
"2023-01-03 09:34:00 IM888 6311.0 6323.2 6311.0 6319.4 352184"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 19,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df_resampled.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 20,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# 重命名列名以符合K线数据的标准命名\n",
|
||||||
|
"df_resampled.columns = ['open', 'high', 'low', 'close', 'volume', 'symbol']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 21,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>open</th>\n",
|
||||||
|
" <th>high</th>\n",
|
||||||
|
" <th>low</th>\n",
|
||||||
|
" <th>close</th>\n",
|
||||||
|
" <th>volume</th>\n",
|
||||||
|
" <th>symbol</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>datetime</th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2023-01-03 09:30:00</th>\n",
|
||||||
|
" <td>IM888</td>\n",
|
||||||
|
" <td>6280.0</td>\n",
|
||||||
|
" <td>6306.4</td>\n",
|
||||||
|
" <td>6277.0</td>\n",
|
||||||
|
" <td>6302.0</td>\n",
|
||||||
|
" <td>66894</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2023-01-03 09:31:00</th>\n",
|
||||||
|
" <td>IM888</td>\n",
|
||||||
|
" <td>6302.0</td>\n",
|
||||||
|
" <td>6320.0</td>\n",
|
||||||
|
" <td>6302.0</td>\n",
|
||||||
|
" <td>6318.8</td>\n",
|
||||||
|
" <td>172512</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2023-01-03 09:32:00</th>\n",
|
||||||
|
" <td>IM888</td>\n",
|
||||||
|
" <td>6319.8</td>\n",
|
||||||
|
" <td>6328.0</td>\n",
|
||||||
|
" <td>6314.8</td>\n",
|
||||||
|
" <td>6314.8</td>\n",
|
||||||
|
" <td>238716</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2023-01-03 09:33:00</th>\n",
|
||||||
|
" <td>IM888</td>\n",
|
||||||
|
" <td>6313.0</td>\n",
|
||||||
|
" <td>6325.0</td>\n",
|
||||||
|
" <td>6310.4</td>\n",
|
||||||
|
" <td>6312.4</td>\n",
|
||||||
|
" <td>297675</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2023-01-03 09:34:00</th>\n",
|
||||||
|
" <td>IM888</td>\n",
|
||||||
|
" <td>6311.0</td>\n",
|
||||||
|
" <td>6323.2</td>\n",
|
||||||
|
" <td>6311.0</td>\n",
|
||||||
|
" <td>6319.4</td>\n",
|
||||||
|
" <td>352184</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" open high low close volume symbol\n",
|
||||||
|
"datetime \n",
|
||||||
|
"2023-01-03 09:30:00 IM888 6280.0 6306.4 6277.0 6302.0 66894\n",
|
||||||
|
"2023-01-03 09:31:00 IM888 6302.0 6320.0 6302.0 6318.8 172512\n",
|
||||||
|
"2023-01-03 09:32:00 IM888 6319.8 6328.0 6314.8 6314.8 238716\n",
|
||||||
|
"2023-01-03 09:33:00 IM888 6313.0 6325.0 6310.4 6312.4 297675\n",
|
||||||
|
"2023-01-03 09:34:00 IM888 6311.0 6323.2 6311.0 6319.4 352184"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 21,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df_resampled.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 22,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"1分钟历史数据已保存至E:\\data\\data_rs_merged\\中金所\\IM888\\IM888_rs_2023_1min.csv\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# 删除存在NA值的行(如果有的时间段没有交易数据)\n",
|
||||||
|
"df_resampled.dropna(inplace=True)\n",
|
||||||
|
"# df_resampled['symbol'] = df_888['统一代码']\n",
|
||||||
|
"# df_resampled.insert(loc=0, column='main_contract', value=df_888['main_contract'])\n",
|
||||||
|
"# df_resampled['symbol'] = df_888['main_contract']\n",
|
||||||
|
"# 将重新采样的数据写入新的CSV文件\n",
|
||||||
|
"output_file = r\"E:\\data\\data_rs_merged\\中金所\\IM888\\IM888_rs_2023_1min.csv\"\n",
|
||||||
|
"df_resampled.to_csv(output_file)\n",
|
||||||
|
"\n",
|
||||||
|
"print(f'1分钟历史数据已保存至{output_file}')"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.9"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
33
2.数据下载与处理/tick数据转分钟数据脚本/tick_to_min.py
Normal file
33
2.数据下载与处理/tick数据转分钟数据脚本/tick_to_min.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# 读取上传的CSV文件
|
||||||
|
file_path = 'C:/Users/zhouj/Desktop/a次主力连续_20190103.csv'
|
||||||
|
df = pd.read_csv(file_path, encoding='gbk')
|
||||||
|
|
||||||
|
# 重命名列以便处理
|
||||||
|
df.rename(columns={'时间': 'datetime', '最新': 'price', '成交量': 'volume'}, inplace=True)
|
||||||
|
|
||||||
|
# 确保datetime列是datetime类型
|
||||||
|
df['datetime'] = pd.to_datetime(df['datetime'])
|
||||||
|
|
||||||
|
# 设置datetime列为索引
|
||||||
|
df.set_index('datetime', inplace=True)
|
||||||
|
|
||||||
|
# 使用resample方法将数据重新采样为1分钟数据
|
||||||
|
df_resampled = df.resample('1T').agg({
|
||||||
|
'price': ['first', 'max', 'min', 'last'],
|
||||||
|
'volume': 'sum'
|
||||||
|
})
|
||||||
|
|
||||||
|
# 重命名列名以符合K线数据的标准命名
|
||||||
|
df_resampled.columns = ['open', 'high', 'low', 'close', 'volume']
|
||||||
|
|
||||||
|
# 删除存在NA值的行(如果有的时间段没有交易数据)
|
||||||
|
df_resampled.dropna(inplace=True)
|
||||||
|
|
||||||
|
# 将重新采样的数据写入新的CSV文件
|
||||||
|
output_file = 'C:/Users/zhouj/Desktop/tic_data_1min.csv'
|
||||||
|
df_resampled.to_csv(output_file)
|
||||||
|
|
||||||
|
print(f'1分钟历史数据已保存至{output_file}')
|
||||||
|
|
||||||
135
2.数据下载与处理/update_data._2py.py
Normal file
135
2.数据下载与处理/update_data._2py.py
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
from multiprocessing import Process
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from vnpy.trader.database import BarOverview
|
||||||
|
from vnpy.trader.datafeed import get_datafeed
|
||||||
|
from vnpy.trader.database import get_database
|
||||||
|
from vnpy.trader.object import BarData, HistoryRequest
|
||||||
|
from vnpy.trader.constant import Exchange, Interval
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
# 交易所映射关系
|
||||||
|
EXCHANGE_XT2VT = {
|
||||||
|
"SH": Exchange.SSE,
|
||||||
|
"SZ": Exchange.SZSE,
|
||||||
|
"BJ": Exchange.BSE,
|
||||||
|
"SF": Exchange.SHFE,
|
||||||
|
"IF": Exchange.CFFEX,
|
||||||
|
"INE": Exchange.INE,
|
||||||
|
"DF": Exchange.DCE,
|
||||||
|
"ZF": Exchange.CZCE,
|
||||||
|
"GF": Exchange.GFEX
|
||||||
|
}
|
||||||
|
|
||||||
|
# 开始查询时间
|
||||||
|
START_TIME = datetime(2018, 1, 1)
|
||||||
|
|
||||||
|
|
||||||
|
def update_history_data() -> None:
|
||||||
|
"""更新历史合约信息"""
|
||||||
|
# 在子进程中加载xtquant
|
||||||
|
from xtquant.xtdata import download_history_data
|
||||||
|
|
||||||
|
# 初始化数据服务
|
||||||
|
datafeed = get_datafeed()
|
||||||
|
datafeed.init()
|
||||||
|
|
||||||
|
# 下载历史合约信息
|
||||||
|
download_history_data("", "historycontract")
|
||||||
|
|
||||||
|
print("xtquant历史合约信息下载完成")
|
||||||
|
|
||||||
|
|
||||||
|
def update_bar_data(
|
||||||
|
sector_name: str,
|
||||||
|
interval: Interval = Interval.MINUTE
|
||||||
|
) -> None:
|
||||||
|
"""更新K线数据"""
|
||||||
|
# 在子进程中加载xtquant
|
||||||
|
from xtquant.xtdata import (
|
||||||
|
get_stock_list_in_sector,
|
||||||
|
get_instrument_detail
|
||||||
|
)
|
||||||
|
|
||||||
|
# 初始化数据服务
|
||||||
|
datafeed = get_datafeed()
|
||||||
|
datafeed.init()
|
||||||
|
|
||||||
|
# 连接数据库
|
||||||
|
database = get_database()
|
||||||
|
|
||||||
|
# 获取当前时间戳
|
||||||
|
now: datetime = datetime.now()
|
||||||
|
|
||||||
|
# 获取本地已有数据汇总
|
||||||
|
data: list[BarOverview] = database.get_bar_overview()
|
||||||
|
|
||||||
|
overviews: dict[str, BarOverview] = {}
|
||||||
|
for o in data:
|
||||||
|
vt_symbol: str = f"{o.symbol}.{o.exchange.value}"
|
||||||
|
overviews[vt_symbol] = o
|
||||||
|
|
||||||
|
# 查询交易所历史合约代码
|
||||||
|
xt_symbols: list[str] = get_stock_list_in_sector(sector_name)
|
||||||
|
|
||||||
|
# 遍历列表查询合约信息
|
||||||
|
for xt_symbol in xt_symbols:
|
||||||
|
# 查询合约信息
|
||||||
|
data: dict = get_instrument_detail(xt_symbol, True)
|
||||||
|
|
||||||
|
# 获取合约到期时间
|
||||||
|
expiry: datetime = None
|
||||||
|
if data["ExpireDate"]:
|
||||||
|
expiry = datetime.strptime(data["ExpireDate"], "%Y%m%d")
|
||||||
|
|
||||||
|
# 拆分迅投研代码
|
||||||
|
symbol, xt_exchange = xt_symbol.split(".")
|
||||||
|
|
||||||
|
symbol_main = re.split(r'(\d+)', symbol)[0]
|
||||||
|
|
||||||
|
# 生成本地代码
|
||||||
|
exchange: Exchange = EXCHANGE_XT2VT[xt_exchange]
|
||||||
|
vt_symbol: str = f"{symbol_main}+'JQ00'.{exchange.value}" or f"{symbol_main}+'00'.{exchange.value}"
|
||||||
|
|
||||||
|
# 查询数据汇总
|
||||||
|
overview: BarOverview = overviews.get(vt_symbol, None)
|
||||||
|
|
||||||
|
# 如果已经到期,则跳过
|
||||||
|
if overview and expiry and expiry < now:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 实现增量查询
|
||||||
|
start: datetime = START_TIME
|
||||||
|
if overview:
|
||||||
|
start = overview.end
|
||||||
|
|
||||||
|
# 执行数据查询和更新入库
|
||||||
|
req: HistoryRequest = HistoryRequest(
|
||||||
|
symbol=symbol,
|
||||||
|
exchange=exchange,
|
||||||
|
start=start,
|
||||||
|
end=now,
|
||||||
|
interval=interval
|
||||||
|
)
|
||||||
|
|
||||||
|
bars: list[BarData] = datafeed.query_bar_history(req)
|
||||||
|
|
||||||
|
if bars:
|
||||||
|
database.save_bar_data(bars)
|
||||||
|
|
||||||
|
start_dt: datetime = bars[0].datetime
|
||||||
|
end_dt: datetime = bars[-1].datetime
|
||||||
|
msg: str = f"{vt_symbol}数据更新成功,{start_dt} - {end_dt}"
|
||||||
|
print(msg)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# 使用子进程更新历史合约信息
|
||||||
|
process: Process = Process(target=update_history_data)
|
||||||
|
process.start()
|
||||||
|
process.join() # 等待子进程执行完成
|
||||||
|
|
||||||
|
# 更新历史数据
|
||||||
|
update_bar_data("上期所")
|
||||||
|
update_bar_data("过期上期所")
|
||||||
184
2.数据下载与处理/update_data.py
Normal file
184
2.数据下载与处理/update_data.py
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
from multiprocessing import Process
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from vnpy.trader.database import BarOverview
|
||||||
|
from vnpy.trader.datafeed import get_datafeed
|
||||||
|
from vnpy.trader.object import ContractData, BarData, HistoryRequest
|
||||||
|
from vnpy.trader.constant import Exchange, Product, OptionType, Interval
|
||||||
|
from vnpy.trader.setting import SETTINGS
|
||||||
|
|
||||||
|
from elite_database import EliteDatabase
|
||||||
|
|
||||||
|
|
||||||
|
# 配置迅投研数据服务
|
||||||
|
SETTINGS["datafeed.name"] = "xt"
|
||||||
|
SETTINGS["datafeed.username"] = "token"
|
||||||
|
SETTINGS["datafeed.password"] = ""
|
||||||
|
|
||||||
|
|
||||||
|
# 交易所映射关系
|
||||||
|
EXCHANGE_XT2VT = {
|
||||||
|
"SH": Exchange.SSE,
|
||||||
|
"SZ": Exchange.SZSE,
|
||||||
|
"BJ": Exchange.BSE,
|
||||||
|
"SF": Exchange.SHFE,
|
||||||
|
"IF": Exchange.CFFEX,
|
||||||
|
"INE": Exchange.INE,
|
||||||
|
"DF": Exchange.DCE,
|
||||||
|
"ZF": Exchange.CZCE,
|
||||||
|
"GF": Exchange.GFEX
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def update_history_data() -> None:
|
||||||
|
"""更新历史合约信息"""
|
||||||
|
# 在子进程中加载xtquant
|
||||||
|
from xtquant.xtdata import download_history_data
|
||||||
|
|
||||||
|
# 初始化数据服务
|
||||||
|
datafeed = get_datafeed()
|
||||||
|
datafeed.init()
|
||||||
|
|
||||||
|
# 下载历史合约信息
|
||||||
|
download_history_data("", "historycontract")
|
||||||
|
|
||||||
|
print("xtquant历史合约信息下载完成")
|
||||||
|
|
||||||
|
|
||||||
|
def update_contract_data(sector_name: str) -> None:
|
||||||
|
"""更新合约数据"""
|
||||||
|
# 在子进程中加载xtquant
|
||||||
|
from xtquant.xtdata import (
|
||||||
|
get_stock_list_in_sector,
|
||||||
|
get_instrument_detail
|
||||||
|
)
|
||||||
|
|
||||||
|
# 初始化数据服务
|
||||||
|
datafeed = get_datafeed()
|
||||||
|
datafeed.init()
|
||||||
|
|
||||||
|
# 查询中金所历史合约代码
|
||||||
|
vt_symbols: list[str] = get_stock_list_in_sector(sector_name)
|
||||||
|
|
||||||
|
# 遍历列表查询合约信息
|
||||||
|
contracts: list[ContractData] = []
|
||||||
|
|
||||||
|
for xt_symbol in vt_symbols:
|
||||||
|
# 拆分XT代码
|
||||||
|
symbol, xt_exchange = xt_symbol.split(".")
|
||||||
|
|
||||||
|
# 筛选期权合约合约
|
||||||
|
if "-" in symbol:
|
||||||
|
data: dict = get_instrument_detail(xt_symbol, True)
|
||||||
|
|
||||||
|
type_str = data["InstrumentID"].split("-")[1]
|
||||||
|
if type_str == "C":
|
||||||
|
option_type = OptionType.CALL
|
||||||
|
elif type_str == "P":
|
||||||
|
option_type = OptionType.PUT
|
||||||
|
|
||||||
|
option_underlying: str = data["InstrumentID"].split("-")[0]
|
||||||
|
|
||||||
|
contract: ContractData = ContractData(
|
||||||
|
symbol=data["InstrumentID"],
|
||||||
|
exchange=EXCHANGE_XT2VT[xt_exchange.replace("O", "")],
|
||||||
|
name=data["InstrumentName"],
|
||||||
|
product=Product.OPTION,
|
||||||
|
size=data["VolumeMultiple"],
|
||||||
|
pricetick=data["PriceTick"],
|
||||||
|
min_volume=data["MinLimitOrderVolume"],
|
||||||
|
option_strike=data["ExtendInfo"]["OptExercisePrice"],
|
||||||
|
option_listed=datetime.strptime(data["OpenDate"], "%Y%m%d"),
|
||||||
|
option_expiry=datetime.strptime(data["ExpireDate"], "%Y%m%d"),
|
||||||
|
option_underlying=option_underlying,
|
||||||
|
option_portfolio=data["ProductID"],
|
||||||
|
option_index=str(data["ExtendInfo"]["OptExercisePrice"]),
|
||||||
|
option_type=option_type,
|
||||||
|
gateway_name="XT"
|
||||||
|
)
|
||||||
|
contracts.append(contract)
|
||||||
|
|
||||||
|
# 保存合约信息到数据库
|
||||||
|
database: EliteDatabase = EliteDatabase()
|
||||||
|
database.save_contract_data(contracts)
|
||||||
|
|
||||||
|
print("合约信息更新成功", len(contracts))
|
||||||
|
|
||||||
|
|
||||||
|
def update_bar_data() -> None:
|
||||||
|
"""更新K线数据"""
|
||||||
|
# 初始化数据服务
|
||||||
|
datafeed = get_datafeed()
|
||||||
|
datafeed.init()
|
||||||
|
|
||||||
|
# 获取当前时间戳
|
||||||
|
now: datetime = datetime.now()
|
||||||
|
|
||||||
|
# 获取合约信息
|
||||||
|
database: EliteDatabase = EliteDatabase()
|
||||||
|
contracts: list[ContractData] = database.load_contract_data()
|
||||||
|
|
||||||
|
# 获取数据汇总
|
||||||
|
data: list[BarOverview] = database.get_bar_overview()
|
||||||
|
|
||||||
|
overviews: dict[str, BarOverview] = {}
|
||||||
|
for o in data:
|
||||||
|
# 只保留分钟线数据
|
||||||
|
if o.interval != Interval.MINUTE:
|
||||||
|
continue
|
||||||
|
|
||||||
|
vt_symbol: str = f"{o.symbol}.{o.exchange.value}"
|
||||||
|
overviews[vt_symbol] = o
|
||||||
|
|
||||||
|
# 遍历所有合约信息
|
||||||
|
for contract in contracts:
|
||||||
|
# 如果没有到期时间,则跳过
|
||||||
|
if not contract.option_expiry:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 查询数据汇总
|
||||||
|
overview: BarOverview = overviews.get(contract.vt_symbol, None)
|
||||||
|
|
||||||
|
# 如果已经到期,则跳过
|
||||||
|
if overview and contract.option_expiry < now:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 初始化查询开始的时间
|
||||||
|
start: datetime = datetime(2018, 1, 1)
|
||||||
|
|
||||||
|
# 实现增量查询
|
||||||
|
if overview:
|
||||||
|
start = overview.end
|
||||||
|
|
||||||
|
# 执行数据查询和更新入库
|
||||||
|
req: HistoryRequest = HistoryRequest(
|
||||||
|
symbol=contract.symbol,
|
||||||
|
exchange=contract.exchange,
|
||||||
|
start=start,
|
||||||
|
end=datetime.now(),
|
||||||
|
interval=Interval.MINUTE
|
||||||
|
)
|
||||||
|
|
||||||
|
bars: list[BarData] = datafeed.query_bar_history(req)
|
||||||
|
|
||||||
|
if bars:
|
||||||
|
database.save_bar_data(bars)
|
||||||
|
|
||||||
|
start_dt: datetime = bars[0].datetime
|
||||||
|
end_dt: datetime = bars[-1].datetime
|
||||||
|
msg: str = f"{contract.vt_symbol}数据更新成功,{start_dt} - {end_dt}"
|
||||||
|
print(msg)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# 使用子进程更新历史合约信息
|
||||||
|
process: Process = Process(target=update_history_data)
|
||||||
|
process.start()
|
||||||
|
process.join() # 等待子进程执行完成
|
||||||
|
|
||||||
|
# 更新合约信息
|
||||||
|
update_contract_data("中金所")
|
||||||
|
update_contract_data("过期中金所")
|
||||||
|
|
||||||
|
# 更新历史数据
|
||||||
|
# update_bar_data()
|
||||||
594
2.数据下载与处理/数据转换最终版/merged_by_year_20240510.ipynb
Normal file
594
2.数据下载与处理/数据转换最终版/merged_by_year_20240510.ipynb
Normal file
File diff suppressed because one or more lines are too long
344
2.数据下载与处理/数据转换最终版/merged_by_year_20240724.ipynb
Normal file
344
2.数据下载与处理/数据转换最终版/merged_by_year_20240724.ipynb
Normal file
File diff suppressed because one or more lines are too long
444
2.数据下载与处理/数据转换最终版/merged_by_year_BIT_20240522.ipynb
Normal file
444
2.数据下载与处理/数据转换最终版/merged_by_year_BIT_20240522.ipynb
Normal file
File diff suppressed because one or more lines are too long
342
2.数据下载与处理/数据转换最终版/merged_tickdata_20240510.py
Normal file
342
2.数据下载与处理/数据转换最终版/merged_tickdata_20240510.py
Normal file
@@ -0,0 +1,342 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
from datetime import time as s_time
|
||||||
|
from datetime import datetime
|
||||||
|
import chardet
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# 日盘商品期货交易品种
|
||||||
|
commodity_day_dict = {'bb': s_time(15,00), 'jd': s_time(15,00), 'lh': s_time(15,00), 'l': s_time(15,00), 'fb': s_time(15,00), 'ec': s_time(15,00),
|
||||||
|
'AP': s_time(15,00), 'CJ': s_time(15,00), 'JR': s_time(15,00), 'LR': s_time(15,00), 'RS': s_time(15,00), 'PK': s_time(15,00),
|
||||||
|
'PM': s_time(15,00), 'PX': s_time(15,00), 'RI': s_time(15,00), 'SF': s_time(15,00), 'SM': s_time(15,00), 'UR': s_time(15,00),
|
||||||
|
'WH': s_time(15,00), 'ao': s_time(15,00), 'br': s_time(15,00), 'wr': s_time(15,00),}
|
||||||
|
|
||||||
|
# 夜盘商品期货交易品种
|
||||||
|
commodity_night_dict = {'sc': s_time(2,30), 'bc': s_time(1,0), 'lu': s_time(23,0), 'nr': s_time(23,0),'au': s_time(2,30), 'ag': s_time(2,30),
|
||||||
|
'ss': s_time(1,0), 'sn': s_time(1,0), 'ni': s_time(1,0), 'pb': s_time(1,0),'zn': s_time(1,0), 'al': s_time(1,0), 'cu': s_time(1,0),
|
||||||
|
'ru': s_time(23,0), 'rb': s_time(23,0), 'hc': s_time(23,0), 'fu': s_time(23,0), 'bu': s_time(23,0), 'sp': s_time(23,0),
|
||||||
|
'PF': s_time(23,0), 'SR': s_time(23,0), 'CF': s_time(23,0), 'CY': s_time(23,0), 'RM': s_time(23,0), 'MA': s_time(23,0),
|
||||||
|
'TA': s_time(23,0), 'ZC': s_time(23,0), 'FG': s_time(23,0), 'OI': s_time(23,0), 'SA': s_time(23,0),
|
||||||
|
'p': s_time(23,0), 'j': s_time(23,0), 'jm': s_time(23,0), 'i': s_time(23,0), 'l': s_time(23,0), 'v': s_time(23,0),
|
||||||
|
'pp': s_time(23,0), 'eg': s_time(23,0), 'c': s_time(23,0), 'cs': s_time(23,0), 'y': s_time(23,0), 'm': s_time(23,0),
|
||||||
|
'a': s_time(23,0), 'b': s_time(23,0), 'rr': s_time(23,0), 'eb': s_time(23,0), 'pg': s_time(23,0), 'SH': s_time(23,00)}
|
||||||
|
|
||||||
|
# 金融期货交易品种
|
||||||
|
financial_time_dict = {'IH': s_time(15,00), 'IF': s_time(15,00), 'IC': s_time(15,00), 'IM': s_time(15,00),'T': s_time(15,00), 'TS': s_time(15,00),
|
||||||
|
'TF': s_time(15,00), 'TL': s_time(15,00)}
|
||||||
|
|
||||||
|
# 所有已列入的筛选品种
|
||||||
|
all_dict = {k: v for d in [commodity_day_dict, commodity_night_dict, financial_time_dict] for k, v in d.items()}
|
||||||
|
|
||||||
|
def split_alpha_numeric(string):
|
||||||
|
alpha_chars = ""
|
||||||
|
numeric_chars = ""
|
||||||
|
for char in string:
|
||||||
|
if char.isalpha():
|
||||||
|
alpha_chars += char
|
||||||
|
elif char.isdigit():
|
||||||
|
numeric_chars += char
|
||||||
|
return alpha_chars, numeric_chars
|
||||||
|
|
||||||
|
def merged_old_tickdata(merged_up_df, sp_char, alpha_chars, code_value):
|
||||||
|
# merged_up_df = pd.DataFrame()
|
||||||
|
# merged_up_df,alpha_chars,code_value = merged_old_unprocessed_tickdata(all_csv_files, sp_char)
|
||||||
|
|
||||||
|
while alpha_chars not in all_dict.keys():
|
||||||
|
print("%s期货品种未列入所有筛选条件中!!!"%(code_value))
|
||||||
|
continue
|
||||||
|
|
||||||
|
merged_df = pd.DataFrame()
|
||||||
|
|
||||||
|
merged_df =pd.DataFrame({'main_contract':merged_up_df['统一代码'],'symbol':merged_up_df['合约代码'],'datetime':merged_up_df['时间'],'lastprice':merged_up_df['最新'],'volume':merged_up_df['成交量'],
|
||||||
|
'bid_p':merged_up_df['买一价'],'ask_p':merged_up_df['卖一价'],'bid_v':merged_up_df['买一量'],'ask_v':merged_up_df['卖一量']})
|
||||||
|
|
||||||
|
del merged_up_df
|
||||||
|
|
||||||
|
merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])
|
||||||
|
merged_df['tmp_time'] = merged_df['datetime'].dt.strftime('%H:%M:%S.%f')
|
||||||
|
merged_df['time'] = merged_df['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time
|
||||||
|
del merged_df['tmp_time']
|
||||||
|
|
||||||
|
merged_df = filter_tickdata_time(merged_df, alpha_chars)
|
||||||
|
del merged_df['time']
|
||||||
|
merged_df['datetime'] = sorted(merged_df['datetime'])
|
||||||
|
print("%s%s数据生成成功!"%(code_value,sp_char))
|
||||||
|
|
||||||
|
return merged_df
|
||||||
|
|
||||||
|
def merged_new_tickdata(merged_up_df, sp_char, alpha_chars, code_value):
|
||||||
|
# merged_up_df = pd.DataFrame()
|
||||||
|
# merged_up_df,alpha_chars,code_value = merged_new_unprocessed_tickdata(all_csv_files, sp_char)
|
||||||
|
|
||||||
|
while alpha_chars not in all_dict.keys():
|
||||||
|
print("%s期货品种未列入所有筛选条件中!!!"%(code_value))
|
||||||
|
continue
|
||||||
|
|
||||||
|
#日期修正
|
||||||
|
# merged_df['业务日期'] = pd.to_datetime(merged_df['业务日期'])
|
||||||
|
# merged_df['业务日期'] = merged_df['业务日期'].dt.strftime('%Y-%m-%d')
|
||||||
|
# merged_df['最后修改时间'] = pd.to_datetime(merged_df['最后修改时间'])
|
||||||
|
merged_up_df['datetime'] = merged_up_df['业务日期'].astype(str) + ' '+merged_up_df['最后修改时间'].astype(str) + '.' + merged_up_df['最后修改毫秒'].astype(str) # merged_df['最后修改时间'].dt.time.astype(str)
|
||||||
|
# 将'datetime' 列的数据类型更改为 datetime 格式,如果数据转换少8个小时,可以用timedelta处理
|
||||||
|
merged_up_df['datetime'] = pd.to_datetime(merged_up_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
|
||||||
|
#计算瞬时成交量
|
||||||
|
merged_up_df['volume'] = merged_up_df['数量'] - merged_up_df['数量'].shift(1)
|
||||||
|
merged_up_df['volume'] = merged_up_df['volume'].fillna(0)
|
||||||
|
|
||||||
|
merged_df = pd.DataFrame()
|
||||||
|
|
||||||
|
merged_df =pd.DataFrame({'main_contract':merged_up_df['统一代码'],'symbol':merged_up_df['合约代码'],'datetime':merged_up_df['datetime'],'lastprice':merged_up_df['最新价'],'volume':merged_up_df['volume'],
|
||||||
|
'bid_p':merged_up_df['申买价一'],'ask_p':merged_up_df['申卖价一'],'bid_v':merged_up_df['申买量一'],'ask_v':merged_up_df['申卖量一']})
|
||||||
|
|
||||||
|
del merged_up_df
|
||||||
|
|
||||||
|
# merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])
|
||||||
|
merged_df['tmp_time'] = merged_df['datetime'].dt.strftime('%H:%M:%S.%f')
|
||||||
|
merged_df['time'] = merged_df['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time
|
||||||
|
del merged_df['tmp_time']
|
||||||
|
|
||||||
|
merged_df = filter_tickdata_time(merged_df, alpha_chars)
|
||||||
|
|
||||||
|
del merged_df['time']
|
||||||
|
# merged_df['datetime'] = sorted(merged_df['datetime'])
|
||||||
|
sorted_merged_df = merged_df.sort_values(by = ['datetime'], inplace=True)
|
||||||
|
print("%s%s数据生成成功!"%(code_value,sp_char))
|
||||||
|
|
||||||
|
return merged_df
|
||||||
|
|
||||||
|
def filter_tickdata_time(filter_df, alpha_chars):
|
||||||
|
|
||||||
|
if alpha_chars in financial_time_dict.keys():
|
||||||
|
drop_index1 = pd.DataFrame().index
|
||||||
|
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 0, 0, 000000))].index
|
||||||
|
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 30, 0, 000000))].index
|
||||||
|
drop_index4 = pd.DataFrame().index
|
||||||
|
print("按照中金所交易时间筛选金融期货品种")
|
||||||
|
|
||||||
|
elif alpha_chars in commodity_night_dict.keys():
|
||||||
|
if commodity_night_dict[alpha_chars] == s_time(23,00):
|
||||||
|
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
|
||||||
|
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
|
||||||
|
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
|
||||||
|
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(23, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 0, 0, 000000))].index
|
||||||
|
print("按照夜盘截止交易时间为23:00筛选商品期货品种")
|
||||||
|
|
||||||
|
elif commodity_night_dict[alpha_chars] == s_time(1,00):
|
||||||
|
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
|
||||||
|
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
|
||||||
|
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
|
||||||
|
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(1, 0, 0, 000000)) & (filter_df['time'] < s_time(9, 0, 0, 000000))].index
|
||||||
|
print("按照夜盘截止交易时间为1:00筛选商品期货品种")
|
||||||
|
|
||||||
|
elif commodity_night_dict[alpha_chars] == s_time(2,30):
|
||||||
|
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
|
||||||
|
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
|
||||||
|
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
|
||||||
|
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(2, 30, 0, 000000)) & (filter_df['time'] < s_time(9, 0, 0, 000000))].index
|
||||||
|
print("按照夜盘截止交易时间为2:30筛选商品期货品种")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print("夜盘截止交易时间未设置或者设置错误!!!")
|
||||||
|
|
||||||
|
elif alpha_chars in commodity_day_dict.keys():
|
||||||
|
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
|
||||||
|
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
|
||||||
|
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 0, 0, 000000))].index
|
||||||
|
drop_index4 = pd.DataFrame().index
|
||||||
|
print("按照无夜盘筛选商品期货品种")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print("%s期货品种未执行时间筛选中!!!"%(alpha_chars))
|
||||||
|
# 清理不在交易时间段的数据
|
||||||
|
|
||||||
|
# 数据清理
|
||||||
|
filter_df.drop(labels=drop_index1, axis=0, inplace=True)
|
||||||
|
filter_df.drop(drop_index2, axis=0, inplace=True)
|
||||||
|
filter_df.drop(drop_index3, axis=0, inplace=True)
|
||||||
|
filter_df.drop(drop_index4, axis=0, inplace=True)
|
||||||
|
|
||||||
|
return filter_df
|
||||||
|
|
||||||
|
def insert_main_contract(df):
|
||||||
|
# 添加主力连续的合约代码,主力连续为888,指数连续可以用999,次主力连续可以使用889,表头用“统一代码”
|
||||||
|
alpha_chars, numeric_chars = split_alpha_numeric(df.loc[0,'合约代码'])
|
||||||
|
code_value = alpha_chars + "889"
|
||||||
|
print("code_value characters:", code_value)
|
||||||
|
df.insert(loc=0,column="统一代码", value=code_value)
|
||||||
|
|
||||||
|
return df, alpha_chars, code_value
|
||||||
|
|
||||||
|
def merged_old_unprocessed_tickdata(all_csv_files, sp_char):
|
||||||
|
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
|
||||||
|
print("csv_files:", csv_files)
|
||||||
|
merged_up_df = pd.DataFrame()
|
||||||
|
dir = os.getcwd()
|
||||||
|
fileNum_errors = 0
|
||||||
|
|
||||||
|
# 循环遍历每个csv文件
|
||||||
|
for file in csv_files:
|
||||||
|
try:
|
||||||
|
# 读取csv文件,并使用第一行为列标题,编译不通过可以改为gbk
|
||||||
|
df = pd.read_csv(file,
|
||||||
|
header=0,
|
||||||
|
# usecols=[ 1, 2, 3, 7, 12, 13, 14, 15],
|
||||||
|
# names=[
|
||||||
|
# "合约代码",
|
||||||
|
# "时间",
|
||||||
|
# "最新",
|
||||||
|
# "成交量",
|
||||||
|
# "买一价",
|
||||||
|
# "卖一价",
|
||||||
|
# "买一量",
|
||||||
|
# "卖一量",
|
||||||
|
# ],
|
||||||
|
encoding='gbk',
|
||||||
|
low_memory= False,
|
||||||
|
# skiprows=0,
|
||||||
|
# parse_dates=['时间'] # 注意此处增加的排序,为了后面按时间排序
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
file_path = os.path.join(dir, file)
|
||||||
|
fileNum_errors += 1
|
||||||
|
with open(file_path, 'rb') as file:
|
||||||
|
data = file.read()
|
||||||
|
|
||||||
|
# 使用chardet检测编码
|
||||||
|
detected_encoding = chardet.detect(data)['encoding']
|
||||||
|
# print("%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(file,detected_encoding,fileNum_errors))
|
||||||
|
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors))
|
||||||
|
|
||||||
|
with open('output_error.txt', 'a') as f:
|
||||||
|
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors), file = f)
|
||||||
|
|
||||||
|
|
||||||
|
# 删除重复行
|
||||||
|
df.drop_duplicates(inplace=True)
|
||||||
|
# 将数据合并到新的DataFrame中
|
||||||
|
merged_up_df = pd.concat([merged_up_df, df], ignore_index=True)
|
||||||
|
|
||||||
|
# 删除重复列
|
||||||
|
merged_up_df.drop_duplicates(subset=merged_up_df.columns.tolist(), inplace=True)
|
||||||
|
# 重置行索引
|
||||||
|
merged_up_df.reset_index(inplace=True, drop=True)
|
||||||
|
|
||||||
|
merged_up_df,alpha_chars,code_value = insert_main_contract(merged_up_df)
|
||||||
|
# 打印提示信息
|
||||||
|
# print("按年份未处理的CSV文件合并成功!")
|
||||||
|
|
||||||
|
return merged_up_df,alpha_chars,code_value
|
||||||
|
|
||||||
|
def merged_new_unprocessed_tickdata(all_csv_files, sp_char):
|
||||||
|
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
|
||||||
|
print("csv_files:", csv_files)
|
||||||
|
merged_up_df = pd.DataFrame()
|
||||||
|
dir = os.getcwd()
|
||||||
|
fileNum_errors = 0
|
||||||
|
|
||||||
|
# 循环遍历每个csv文件
|
||||||
|
for file in csv_files:
|
||||||
|
try:
|
||||||
|
# 读取csv文件,并使用第一行为列标题,编译不通过可以改为gbk
|
||||||
|
df = pd.read_csv(
|
||||||
|
file,
|
||||||
|
header=0,
|
||||||
|
# usecols=[0, 1, 4, 11, 20, 21, 22, 23, 24, 25, 43],
|
||||||
|
# names=[
|
||||||
|
# "交易日",
|
||||||
|
# "合约代码",
|
||||||
|
# "最新价",
|
||||||
|
# "数量",
|
||||||
|
# "最后修改时间",
|
||||||
|
# "最后修改毫秒",
|
||||||
|
# "申买价一",
|
||||||
|
# "申买量一",
|
||||||
|
# "申卖价一",
|
||||||
|
# "申卖量一",
|
||||||
|
# "业务日期",
|
||||||
|
# ],
|
||||||
|
encoding='gbk',
|
||||||
|
low_memory= False,
|
||||||
|
# skiprows=0,
|
||||||
|
# parse_dates=['业务日期','最后修改时间','最后修改毫秒'] # 注意此处增加的排序,为了后面按时间排序
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
file_path = os.path.join(dir, file)
|
||||||
|
fileNum_errors += 1
|
||||||
|
with open(file_path, 'rb') as file:
|
||||||
|
data = file.read()
|
||||||
|
|
||||||
|
# 使用chardet检测编码
|
||||||
|
detected_encoding = chardet.detect(data)['encoding']
|
||||||
|
# print("%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(file_path,detected_encoding,fileNum_errors))
|
||||||
|
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors))
|
||||||
|
|
||||||
|
|
||||||
|
with open('output_error.txt', 'a') as f:
|
||||||
|
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors), file = f)
|
||||||
|
|
||||||
|
# 删除重复行
|
||||||
|
df.drop_duplicates(inplace=True)
|
||||||
|
# 将数据合并到新的DataFrame中
|
||||||
|
merged_up_df = pd.concat([merged_up_df, df], ignore_index=True)
|
||||||
|
|
||||||
|
# 删除重复列
|
||||||
|
merged_up_df.drop_duplicates(subset=merged_up_df.columns.tolist(), inplace=True)
|
||||||
|
# 重置行索引
|
||||||
|
merged_up_df.reset_index(inplace=True, drop=True)
|
||||||
|
|
||||||
|
merged_up_df,alpha_chars,code_value = insert_main_contract(merged_up_df)
|
||||||
|
# 打印提示信息
|
||||||
|
# print("按年份未处理的CSV文件合并成功!")
|
||||||
|
|
||||||
|
return merged_up_df,alpha_chars,code_value
|
||||||
|
|
||||||
|
def reinstatement_tickdata(merged_rs_df):
|
||||||
|
merged_rs_df['main_contract'] = merged_rs_df['main_contract'].astype(str)
|
||||||
|
merged_rs_df['symbol'] = merged_rs_df['symbol'].astype(str)
|
||||||
|
merged_rs_df['datetime'] = pd.to_datetime(merged_rs_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
|
||||||
|
# merged_rs_df['lastprice'] = merged_rs_df['lastprice'].astype(float)
|
||||||
|
merged_rs_df['volume'] = merged_rs_df['volume'].astype(int)
|
||||||
|
# merged_rs_df['bid_p'] = merged_rs_df['bid_p'].astype(float)
|
||||||
|
# merged_rs_df['ask_p'] = merged_rs_df['ask_p'].astype(float)
|
||||||
|
merged_rs_df['bid_v'] = merged_rs_df['bid_v'].astype(int)
|
||||||
|
merged_rs_df['ask_v'] = merged_rs_df['ask_v'].astype(int)
|
||||||
|
|
||||||
|
# 等比复权,先不考虑
|
||||||
|
# df['复权因子'] = df['卖一价'].shift() / df['买一价']
|
||||||
|
# df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() / df['买一价'], 1)
|
||||||
|
# df['复权因子'] = df['复权因子'].fillna(1)
|
||||||
|
# df['买一价_adj'] = df['买一价'] * df['复权因子'].cumprod()
|
||||||
|
# df['卖一价_adj'] = df['卖一价'] * df['复权因子'].cumprod()
|
||||||
|
# df['最新_adj'] = df['最新'] * df['复权因子'].cumprod()
|
||||||
|
|
||||||
|
# 等差复权
|
||||||
|
merged_rs_df['复权因子'] = np.where(merged_rs_df['symbol'] != merged_rs_df['symbol'].shift(), merged_rs_df['ask_p'].shift() - merged_rs_df['bid_p'], 0)
|
||||||
|
merged_rs_df['复权因子'] = merged_rs_df['复权因子'].fillna(0)
|
||||||
|
merged_rs_df['bid_p_adj'] = merged_rs_df['bid_p'] + merged_rs_df['复权因子'].cumsum()
|
||||||
|
merged_rs_df['ask_p_adj'] = merged_rs_df['ask_p'] + merged_rs_df['复权因子'].cumsum()
|
||||||
|
merged_rs_df['lastprice_adj'] = merged_rs_df['lastprice'] + merged_rs_df['复权因子'].cumsum()
|
||||||
|
|
||||||
|
# 将调整后的数值替换原来的值
|
||||||
|
merged_rs_df['bid_p'] = merged_rs_df['bid_p_adj'].round(4)
|
||||||
|
merged_rs_df['ask_p'] = merged_rs_df['ask_p_adj'].round(4)
|
||||||
|
merged_rs_df['lastprice'] = merged_rs_df['lastprice_adj'].round(4)
|
||||||
|
|
||||||
|
# 删除多余的值
|
||||||
|
del merged_rs_df['复权因子']
|
||||||
|
del merged_rs_df['bid_p_adj']
|
||||||
|
del merged_rs_df['ask_p_adj']
|
||||||
|
del merged_rs_df['lastprice_adj']
|
||||||
|
|
||||||
|
return merged_rs_df
|
||||||
|
|
||||||
|
# def find_files(all_csv_files):
|
||||||
|
# all_csv_files = sorted(all_csv_files)
|
||||||
|
# sp_old_chars = ['_2019','_2020','_2021']
|
||||||
|
# sp_old_chars = sorted(sp_old_chars)
|
||||||
|
# sp_new_chars = ['_2022','_2023']
|
||||||
|
# sp_new_chars = sorted(sp_new_chars)
|
||||||
|
# csv_old_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_old_chars)]
|
||||||
|
# csv_new_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_new_chars)]
|
||||||
|
|
||||||
|
# return csv_old_files, csv_new_files
|
||||||
174
2.数据下载与处理/数据转换最终版/merged_tickdata_20240724.py
Normal file
174
2.数据下载与处理/数据转换最终版/merged_tickdata_20240724.py
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
from datetime import time as s_time
|
||||||
|
from datetime import datetime
|
||||||
|
import chardet
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# 日盘商品期货交易品种
|
||||||
|
commodity_day_dict = {'bb': s_time(15,00), 'jd': s_time(15,00), 'lh': s_time(15,00), 'l': s_time(15,00), 'fb': s_time(15,00), 'ec': s_time(15,00),
|
||||||
|
'AP': s_time(15,00), 'CJ': s_time(15,00), 'JR': s_time(15,00), 'LR': s_time(15,00), 'RS': s_time(15,00), 'PK': s_time(15,00),
|
||||||
|
'PM': s_time(15,00), 'PX': s_time(15,00), 'RI': s_time(15,00), 'SF': s_time(15,00), 'SM': s_time(15,00), 'UR': s_time(15,00),
|
||||||
|
'WH': s_time(15,00), 'ao': s_time(15,00), 'br': s_time(15,00), 'wr': s_time(15,00),}
|
||||||
|
|
||||||
|
# 夜盘商品期货交易品种
|
||||||
|
commodity_night_dict = {'sc': s_time(2,30), 'bc': s_time(1,0), 'lu': s_time(23,0), 'nr': s_time(23,0),'au': s_time(2,30), 'ag': s_time(2,30),
|
||||||
|
'ss': s_time(1,0), 'sn': s_time(1,0), 'ni': s_time(1,0), 'pb': s_time(1,0),'zn': s_time(1,0), 'al': s_time(1,0), 'cu': s_time(1,0),
|
||||||
|
'ru': s_time(23,0), 'rb': s_time(23,0), 'hc': s_time(23,0), 'fu': s_time(23,0), 'bu': s_time(23,0), 'sp': s_time(23,0),
|
||||||
|
'PF': s_time(23,0), 'SR': s_time(23,0), 'CF': s_time(23,0), 'CY': s_time(23,0), 'RM': s_time(23,0), 'MA': s_time(23,0),
|
||||||
|
'TA': s_time(23,0), 'ZC': s_time(23,0), 'FG': s_time(23,0), 'OI': s_time(23,0), 'SA': s_time(23,0),
|
||||||
|
'p': s_time(23,0), 'j': s_time(23,0), 'jm': s_time(23,0), 'i': s_time(23,0), 'l': s_time(23,0), 'v': s_time(23,0),
|
||||||
|
'pp': s_time(23,0), 'eg': s_time(23,0), 'c': s_time(23,0), 'cs': s_time(23,0), 'y': s_time(23,0), 'm': s_time(23,0),
|
||||||
|
'a': s_time(23,0), 'b': s_time(23,0), 'rr': s_time(23,0), 'eb': s_time(23,0), 'pg': s_time(23,0), 'SH': s_time(23,00)}
|
||||||
|
|
||||||
|
# 金融期货交易品种
|
||||||
|
financial_time_dict = {'IH': s_time(15,00), 'IF': s_time(15,00), 'IC': s_time(15,00), 'IM': s_time(15,00),'T': s_time(15,15), 'TS': s_time(15,15),
|
||||||
|
'TF': s_time(15,15), 'TL': s_time(15,15)}
|
||||||
|
|
||||||
|
# 所有已列入的筛选品种
|
||||||
|
all_dict = {k: v for d in [commodity_day_dict, commodity_night_dict, financial_time_dict] for k, v in d.items()}
|
||||||
|
|
||||||
|
def split_alpha_numeric(string):
|
||||||
|
alpha_chars = ""
|
||||||
|
numeric_chars = ""
|
||||||
|
for char in string:
|
||||||
|
if char.isalpha():
|
||||||
|
alpha_chars += char
|
||||||
|
elif char.isdigit():
|
||||||
|
numeric_chars += char
|
||||||
|
return alpha_chars, numeric_chars
|
||||||
|
|
||||||
|
def merged_new_tickdata(merged_up_df, alpha_chars):
|
||||||
|
merged_up_df['datetime'] = merged_up_df['交易日'].astype(str) + ' '+merged_up_df['最后修改时间'].astype(str) + '.' + merged_up_df['最后修改毫秒'].astype(str) # merged_df['最后修改时间'].dt.time.astype(str)
|
||||||
|
# 将'datetime' 列的数据类型更改为 datetime 格式,如果数据转换少8个小时,可以用timedelta处理
|
||||||
|
merged_up_df['datetime'] = pd.to_datetime(merged_up_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
|
||||||
|
#计算瞬时成交量
|
||||||
|
merged_up_df['volume'] = merged_up_df['数量'] - merged_up_df['数量'].shift(1)
|
||||||
|
merged_up_df['volume'] = merged_up_df['volume'].fillna(0)
|
||||||
|
|
||||||
|
merged_df = pd.DataFrame()
|
||||||
|
|
||||||
|
merged_df =pd.DataFrame({'main_contract':merged_up_df['统一代码'],'symbol':merged_up_df['合约代码'],'datetime':merged_up_df['datetime'],'lastprice':merged_up_df['最新价'],'volume':merged_up_df['数量'],
|
||||||
|
'bid_p':merged_up_df['申买价一'],'ask_p':merged_up_df['申卖价一'],'bid_v':merged_up_df['申买量一'],'ask_v':merged_up_df['申卖量一']})
|
||||||
|
|
||||||
|
del merged_up_df
|
||||||
|
|
||||||
|
# merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])
|
||||||
|
merged_df['tmp_time'] = merged_df['datetime'].dt.strftime('%H:%M:%S.%f')
|
||||||
|
merged_df['time'] = merged_df['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time
|
||||||
|
del merged_df['tmp_time']
|
||||||
|
|
||||||
|
merged_df = filter_tickdata_time(merged_df, alpha_chars)
|
||||||
|
|
||||||
|
del merged_df['time']
|
||||||
|
# merged_df['datetime'] = sorted(merged_df['datetime'])
|
||||||
|
sorted_merged_df = merged_df.sort_values(by = ['datetime'], inplace=True)
|
||||||
|
# print("%s%s数据生成成功!"%(code_value,sp_char))
|
||||||
|
|
||||||
|
return merged_df
|
||||||
|
|
||||||
|
def filter_tickdata_time(filter_df, alpha_chars):
|
||||||
|
# 由于落到本地的时间有延迟,建议结束时间延迟1秒。
|
||||||
|
if alpha_chars in financial_time_dict.keys():
|
||||||
|
drop_index1 = pd.DataFrame().index
|
||||||
|
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 500000)) & (filter_df['time'] < s_time(13, 0, 0, 000000))].index
|
||||||
|
if alpha_chars in ['IH', 'IF', 'IC', 'IM']:
|
||||||
|
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 500000)) | (filter_df['time'] < s_time(9, 30, 0, 000000))].index
|
||||||
|
print("按照中金所股指期货交易时间筛选金融期货品种")
|
||||||
|
else:
|
||||||
|
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 15, 0, 500000)) | (filter_df['time'] < s_time(9, 30, 0, 000000))].index
|
||||||
|
print("按照中金所国债期货交易时间筛选金融期货品种")
|
||||||
|
drop_index4 = pd.DataFrame().index
|
||||||
|
print("按照中金所交易时间筛选金融期货品种")
|
||||||
|
|
||||||
|
elif alpha_chars in commodity_night_dict.keys():
|
||||||
|
if commodity_night_dict[alpha_chars] == s_time(23,00):
|
||||||
|
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
|
||||||
|
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
|
||||||
|
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
|
||||||
|
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(23, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 0, 0, 000000))].index
|
||||||
|
print("按照夜盘截止交易时间为23:00筛选商品期货品种")
|
||||||
|
|
||||||
|
elif commodity_night_dict[alpha_chars] == s_time(1,00):
|
||||||
|
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
|
||||||
|
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
|
||||||
|
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
|
||||||
|
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(1, 0, 0, 000000)) & (filter_df['time'] < s_time(9, 0, 0, 000000))].index
|
||||||
|
print("按照夜盘截止交易时间为1:00筛选商品期货品种")
|
||||||
|
|
||||||
|
elif commodity_night_dict[alpha_chars] == s_time(2,30):
|
||||||
|
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
|
||||||
|
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
|
||||||
|
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
|
||||||
|
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(2, 30, 0, 000000)) & (filter_df['time'] < s_time(9, 0, 0, 000000))].index
|
||||||
|
print("按照夜盘截止交易时间为2:30筛选商品期货品种")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print("夜盘截止交易时间未设置或者设置错误!!!")
|
||||||
|
|
||||||
|
elif alpha_chars in commodity_day_dict.keys():
|
||||||
|
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
|
||||||
|
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
|
||||||
|
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 0, 0, 000000))].index
|
||||||
|
drop_index4 = pd.DataFrame().index
|
||||||
|
print("按照无夜盘筛选商品期货品种")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print("%s期货品种未执行时间筛选中!!!"%(alpha_chars))
|
||||||
|
# 清理不在交易时间段的数据
|
||||||
|
|
||||||
|
# 数据清理
|
||||||
|
filter_df.drop(labels=drop_index1, axis=0, inplace=True)
|
||||||
|
filter_df.drop(drop_index2, axis=0, inplace=True)
|
||||||
|
filter_df.drop(drop_index3, axis=0, inplace=True)
|
||||||
|
filter_df.drop(drop_index4, axis=0, inplace=True)
|
||||||
|
|
||||||
|
return filter_df
|
||||||
|
|
||||||
|
def insert_main_contract(df):
|
||||||
|
# 添加主力连续的合约代码,主力连续为888,指数连续可以用999,次主力连续可以使用889,表头用“统一代码”
|
||||||
|
alpha_chars, numeric_chars = split_alpha_numeric(df.loc[0,'合约代码'])
|
||||||
|
code_value = alpha_chars + "889"
|
||||||
|
print("code_value characters:", code_value)
|
||||||
|
df.insert(loc=0,column="统一代码", value=code_value)
|
||||||
|
|
||||||
|
return df, alpha_chars, code_value
|
||||||
|
|
||||||
|
def reinstatement_tickdata(merged_rs_df):
|
||||||
|
merged_rs_df['main_contract'] = merged_rs_df['main_contract'].astype(str)
|
||||||
|
merged_rs_df['symbol'] = merged_rs_df['symbol'].astype(str)
|
||||||
|
merged_rs_df['datetime'] = pd.to_datetime(merged_rs_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
|
||||||
|
# merged_rs_df['lastprice'] = merged_rs_df['lastprice'].astype(float)
|
||||||
|
merged_rs_df['volume'] = merged_rs_df['volume'].astype(int)
|
||||||
|
# merged_rs_df['bid_p'] = merged_rs_df['bid_p'].astype(float)
|
||||||
|
# merged_rs_df['ask_p'] = merged_rs_df['ask_p'].astype(float)
|
||||||
|
merged_rs_df['bid_v'] = merged_rs_df['bid_v'].astype(int)
|
||||||
|
merged_rs_df['ask_v'] = merged_rs_df['ask_v'].astype(int)
|
||||||
|
|
||||||
|
# 等比复权,先不考虑
|
||||||
|
# df['复权因子'] = df['卖一价'].shift() / df['买一价']
|
||||||
|
# df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() / df['买一价'], 1)
|
||||||
|
# df['复权因子'] = df['复权因子'].fillna(1)
|
||||||
|
# df['买一价_adj'] = df['买一价'] * df['复权因子'].cumprod()
|
||||||
|
# df['卖一价_adj'] = df['卖一价'] * df['复权因子'].cumprod()
|
||||||
|
# df['最新_adj'] = df['最新'] * df['复权因子'].cumprod()
|
||||||
|
|
||||||
|
# 等差复权
|
||||||
|
merged_rs_df['复权因子'] = np.where(merged_rs_df['symbol'] != merged_rs_df['symbol'].shift(), merged_rs_df['ask_p'].shift() - merged_rs_df['bid_p'], 0)
|
||||||
|
merged_rs_df['复权因子'] = merged_rs_df['复权因子'].fillna(0)
|
||||||
|
merged_rs_df['bid_p_adj'] = merged_rs_df['bid_p'] + merged_rs_df['复权因子'].cumsum()
|
||||||
|
merged_rs_df['ask_p_adj'] = merged_rs_df['ask_p'] + merged_rs_df['复权因子'].cumsum()
|
||||||
|
merged_rs_df['lastprice_adj'] = merged_rs_df['lastprice'] + merged_rs_df['复权因子'].cumsum()
|
||||||
|
|
||||||
|
# 将调整后的数值替换原来的值
|
||||||
|
merged_rs_df['bid_p'] = merged_rs_df['bid_p_adj'].round(4)
|
||||||
|
merged_rs_df['ask_p'] = merged_rs_df['ask_p_adj'].round(4)
|
||||||
|
merged_rs_df['lastprice'] = merged_rs_df['lastprice_adj'].round(4)
|
||||||
|
|
||||||
|
# 删除多余的值
|
||||||
|
del merged_rs_df['复权因子']
|
||||||
|
del merged_rs_df['bid_p_adj']
|
||||||
|
del merged_rs_df['ask_p_adj']
|
||||||
|
del merged_rs_df['lastprice_adj']
|
||||||
|
|
||||||
|
return merged_rs_df
|
||||||
68
2.数据下载与处理/数据转换最终版/merged_tickdata__BIT_20240522.py
Normal file
68
2.数据下载与处理/数据转换最终版/merged_tickdata__BIT_20240522.py
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
from datetime import time as s_time
|
||||||
|
from datetime import datetime
|
||||||
|
import chardet
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def split_alpha_numeric(string):
|
||||||
|
alpha_chars = ""
|
||||||
|
numeric_chars = ""
|
||||||
|
for char in string:
|
||||||
|
if char.isalpha():
|
||||||
|
alpha_chars += char
|
||||||
|
elif char.isdigit():
|
||||||
|
numeric_chars += char
|
||||||
|
return alpha_chars, numeric_chars
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def merged_old_unprocessed_tickdata(all_csv_files, sp_char):
|
||||||
|
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
|
||||||
|
print("csv_files:", csv_files)
|
||||||
|
merged_up_df = pd.DataFrame()
|
||||||
|
dir = os.getcwd()
|
||||||
|
fileNum_errors = 0
|
||||||
|
|
||||||
|
# 循环遍历每个csv文件
|
||||||
|
for file in csv_files:
|
||||||
|
try:
|
||||||
|
df = pd.read_csv(file,
|
||||||
|
header=0,
|
||||||
|
encoding='gbk',
|
||||||
|
low_memory= False,
|
||||||
|
# skiprows=0,
|
||||||
|
# parse_dates=['时间'] # 注意此处增加的排序,为了后面按时间排序
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
file_path = os.path.join(dir, file)
|
||||||
|
fileNum_errors += 1
|
||||||
|
with open(file_path, 'rb') as file:
|
||||||
|
data = file.read()
|
||||||
|
|
||||||
|
# 使用chardet检测编码
|
||||||
|
detected_encoding = chardet.detect(data)['encoding']
|
||||||
|
# print("%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(file,detected_encoding,fileNum_errors))
|
||||||
|
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors))
|
||||||
|
|
||||||
|
with open('output_error.txt', 'a') as f:
|
||||||
|
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors), file = f)
|
||||||
|
|
||||||
|
|
||||||
|
# 删除重复行
|
||||||
|
df.drop_duplicates(inplace=True)
|
||||||
|
# 将数据合并到新的DataFrame中
|
||||||
|
merged_up_df = pd.concat([merged_up_df, df], ignore_index=True)
|
||||||
|
|
||||||
|
# 删除重复列
|
||||||
|
merged_up_df.drop_duplicates(subset=merged_up_df.columns.tolist(), inplace=True)
|
||||||
|
# 重置行索引
|
||||||
|
merged_up_df.reset_index(inplace=True, drop=True)
|
||||||
|
|
||||||
|
# merged_up_df,alpha_chars,code_value = insert_main_contract(merged_up_df)
|
||||||
|
# 打印提示信息
|
||||||
|
# print("按年份未处理的CSV文件合并成功!")
|
||||||
|
|
||||||
|
return merged_up_df #,alpha_chars,code_value
|
||||||
|
|
||||||
85
2.数据下载与处理/每日更新数据库.py
Normal file
85
2.数据下载与处理/每日更新数据库.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
from requests.adapters import HTTPAdapter
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
pd.set_option('display.max_rows', 1000)
|
||||||
|
pd.set_option('expand_frame_repr', False) # 当列太多时不换行
|
||||||
|
# 设置命令行输出时的列对齐功能
|
||||||
|
pd.set_option('display.unicode.ambiguous_as_wide', True)
|
||||||
|
pd.set_option('display.unicode.east_asian_width', True)
|
||||||
|
|
||||||
|
|
||||||
|
def requestForNew(url):
|
||||||
|
session = requests.Session()
|
||||||
|
session.mount('http://', HTTPAdapter(max_retries=3))
|
||||||
|
session.mount('https://', HTTPAdapter(max_retries=3))
|
||||||
|
session.keep_alive = False
|
||||||
|
response = session.get(url, headers={'Connection': 'close'}, timeout=30)
|
||||||
|
if response.content:
|
||||||
|
return response
|
||||||
|
else:
|
||||||
|
print("链接失败", response)
|
||||||
|
|
||||||
|
|
||||||
|
def getDate():
|
||||||
|
url = 'http://hq.sinajs.cn/list=sh000001'
|
||||||
|
response = requestForNew(url).text
|
||||||
|
data_date = str(response.split(',')[-4])
|
||||||
|
# 获取上证的指数日期
|
||||||
|
return data_date
|
||||||
|
|
||||||
|
|
||||||
|
# 通过新浪财经获取每日更新的股票代码
|
||||||
|
def getStockCodeForEveryday():
|
||||||
|
df = pd.DataFrame()
|
||||||
|
for page in range(1, 100):
|
||||||
|
# 1~100页,不用担心每天新增
|
||||||
|
url = 'http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page=' \
|
||||||
|
+ str(page) + '&num=80&sort=changepercent&asc=0&node=hs_a&symbol=&_s_r_a=page'
|
||||||
|
# print(url)
|
||||||
|
content = requestForNew(url).json()
|
||||||
|
if not content:
|
||||||
|
# if content =[]: 这个写法也可以
|
||||||
|
print("股票信息,获取完毕。")
|
||||||
|
break
|
||||||
|
print("正在读取页面" + str(page))
|
||||||
|
time.sleep(3)
|
||||||
|
df = df.append(pd.DataFrame(content, dtype='float'), ignore_index=True)
|
||||||
|
|
||||||
|
rename_dict = {'symbol': '股票代码', 'code': '交易日期', 'name': '股票名称', 'open': '开盘价',
|
||||||
|
'settlement': '前收盘价', 'trade': '收盘价', 'high': '最高价', 'low': '最低价',
|
||||||
|
'buy': '买一', 'sell': '卖一', 'volume': '成交量', 'amount': '成交额',
|
||||||
|
'changepercent': '涨跌幅', 'pricechange': '涨跌额',
|
||||||
|
'mktcap': '总市值', 'nmc': '流通市值', 'ticktime': '数据更新时间', 'per': 'per', 'pb': '市净率',
|
||||||
|
'turnoverratio': '换手率'}
|
||||||
|
df.rename(columns=rename_dict, inplace=True)
|
||||||
|
tradeDate = getDate()
|
||||||
|
df['交易日期'] = tradeDate
|
||||||
|
df = df[['股票代码', '股票名称', '交易日期', '开盘价', '最高价', '最低价', '收盘价', '前收盘价', '成交量', '成交额', '流通市值', '总市值']]
|
||||||
|
# 把转化成float的code替换成交易日期
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
df = getStockCodeForEveryday()
|
||||||
|
print(df)
|
||||||
|
|
||||||
|
for i in df.index:
|
||||||
|
t = df.iloc[i:i + 1, :]
|
||||||
|
stock_code = t.iloc[0]['股票代码']
|
||||||
|
|
||||||
|
# 构建存储文件路径
|
||||||
|
path = './data/' \
|
||||||
|
+ stock_code + '.csv'
|
||||||
|
# 文件存在,不是新股
|
||||||
|
if os.path.exists(path):
|
||||||
|
t.to_csv(path, header=None, index=False, mode='a', encoding='gbk')
|
||||||
|
# 文件不存在,说明是新股
|
||||||
|
else:
|
||||||
|
# 先将头文件输出
|
||||||
|
pd.DataFrame(columns=['数据由邢不行整理']).to_csv(path, index=False, encoding='gbk')
|
||||||
|
t.to_csv(path, index=False, mode='a', encoding='gbk')
|
||||||
|
print(stock_code)
|
||||||
Reference in New Issue
Block a user