Enhance trading workflow with new order flow management

- Added dingdanliu_nb_mflow for improved order processing
- Updated related scripts and configurations to support new functionality
This commit is contained in:
Win_home
2025-03-15 22:45:08 +08:00
parent e2c54c6409
commit f925dff46b
21 changed files with 5345 additions and 0 deletions

View File

@@ -0,0 +1,698 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'tushare'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[3], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtushare\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mts\u001b[39;00m\n\u001b[0;32m 2\u001b[0m ts\u001b[38;5;241m.\u001b[39mset_token(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m78282dabb315ee578fb73a9b328f493026e97d5af709acb331b7b348\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 3\u001b[0m pro \u001b[38;5;241m=\u001b[39m ts\u001b[38;5;241m.\u001b[39mpro_api()\n",
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'tushare'"
]
}
],
"source": [
"import tushare as ts\n",
"ts.set_token('78282dabb315ee578fb73a9b328f493026e97d5af709acb331b7b348')\n",
"pro = ts.pro_api()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"300\n",
"<class 'int'>\n",
"0.15\n",
"<class 'float'>\n"
]
}
],
"source": [
"from datetime import datetime, timedelta\n",
"import pandas as pd\n",
"fees_df = pd.read_csv('./futures_fees_info.csv', usecols= [1, 4, 17, 19, 25],names=['合约', '合约乘数', '做多保证金率', '做空保证金率', '品种代码'])\n",
"data0 = int(fees_df[fees_df['合约'] == 'IH2407']['合约乘数'].iloc[0])\n",
"\n",
"print(data0)\n",
"print(type(data0))\n",
"data1 = float(fees_df[fees_df['合约'] == 'IH2407']['做多保证金率'].iloc[0])\n",
"print(data1)\n",
"print(type(data1))\n",
"# fees_df[fees_df['合约'] == 'IH2407']['做空保证金率'].iloc[0]\n",
"# (fees_df[fees_df['合约'] == 'IH2407']['做多保证金率'].iloc[0] + fees_df[fees_df['合约'] == 'IH2407']['做空保证金率'].iloc[0])/2"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime, timedelta\n",
"import pandas as pd\n",
"fees_df = pd.read_csv('./futures_fees_info.csv', usecols= [1, 4, 17, 19],names=['合约', '合约乘数', '做多保证金率', '做空保证金率'])\n",
"contacts_df = pd.read_csv('./main_contacts.csv', usecols= [16, 17],names=['主连代码', '品种代码'])\n",
"\n",
"def get_main_contact_on_time(main_symbol_code):\n",
" data_str = ''\n",
" alpha_chars = ''\n",
" numeric_chars = ''\n",
" main_code = ''\n",
"\n",
" # main_symbol = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[0]\n",
" # exchange_id = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[1]\n",
" main_symbol = contacts_df[contacts_df['品种代码'] == main_symbol_code]['主连代码'].iloc[0]\n",
"\n",
"\n",
" # # 拆分主连合约代码为交易标识和交易所代码(交易市场)\n",
" # main_symbol = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[0]\n",
" # exchange_id = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[1]\n",
"\n",
" # # 拆分交易标识中的合约产品代码和交割月份\n",
" # for char in main_symbol:\n",
" # if char.isalpha():\n",
" # alpha_chars += char\n",
" # elif char.isdigit():\n",
" # numeric_chars += char\n",
" \n",
" # # 监理交易所映射\n",
" # exchange = {'CFX': 'CFFEX', 'SHF':'SHFE', 'DCE':'DCE', 'GFE':'GFEX', 'INE':'INE', 'ZCE':'CZCE'}\n",
"\n",
" # # 计算per_unit交易单位(每手)和转换后交易所识别的main_code主连代码\n",
" # if exchange_id == 'CFX' or exchange_id == 'SHF' or exchange_id == 'DCE' or exchange_id == 'GFE' or exchange_id == 'INE':\n",
" # df = pro.fut_basic(exchange = exchange[exchange_id], fut_type='1', fut_code = alpha_chars, fields='ts_code,symbol,exchange,name,per_unit')\n",
" # # ts_code = df[df['symbol'] == main_symbol]['ts_code'].iloc[0]\n",
" # per_unit = df[df['symbol'] == main_symbol]['per_unit'].iloc[0]\n",
"\n",
" # # ds = pro.fut_settle(trade_date = data_str, ts_code =ts_code)\n",
" # # ds['margin_rate'] = (ds['long_margin_rate'] + ds['short_margin_rate'])/2\n",
" # # margin_rate = ds['margin_rate'].iloc[0]\n",
" \n",
" \n",
" # if exchange_id == 'CFX':\n",
" # main_code = main_symbol\n",
" # elif exchange_id == 'SHF' or exchange_id == 'DCE' or exchange_id == 'GFE' or exchange_id == 'INE':\n",
" # lower_alpha_chars = str.lower(alpha_chars) \n",
" # main_code = lower_alpha_chars + numeric_chars\n",
" # elif exchange_id == 'ZCE':\n",
" # true_numeric_chars = numeric_chars[1:]\n",
" # main_code = alpha_chars + true_numeric_chars \n",
" # df = pro.fut_basic(exchange = exchange[exchange_id], fut_type='1', fut_code = alpha_chars, fields='ts_code,symbol,exchange,name,per_unit')\n",
" # per_unit = df[df['symbol'] == main_code]['per_unit'].iloc[0]\n",
" # main_code = alpha_chars + true_numeric_chars\n",
"\n",
" # print(\"最终使用的主连代码:\",main_code) \n",
" # print(\"%s的交易单位(每手):%s\"%(main_symbol, per_unit))\n",
" return main_symbol\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'IH2407'"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"get_main_contact_on_time('IH')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime, timedelta\n",
"def get_main_contact_on_time(main_symbol_code):\n",
" data_str = ''\n",
" alpha_chars = ''\n",
" numeric_chars = ''\n",
" main_code = ''\n",
"\n",
" # 获取主连合约代码如果是当天15点前日盘则获取前一天的合约代码如果是当天15点后晚盘则获取今天的的合约代码\n",
" now = datetime.now()\n",
" if now.hour < 15:\n",
" data_str = (now - timedelta(days=1)).date().strftime('%Y%m%d')\n",
" else:\n",
" data_str = now.date().strftime('%Y%m%d')\n",
"\n",
" # 拆分主连合约代码为交易标识和交易所代码(交易市场)\n",
" main_symbol = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[0]\n",
" exchange_id = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[1]\n",
"\n",
" # 拆分交易标识中的合约产品代码和交割月份\n",
" for char in main_symbol:\n",
" if char.isalpha():\n",
" alpha_chars += char\n",
" elif char.isdigit():\n",
" numeric_chars += char\n",
" \n",
" # 监理交易所映射\n",
" exchange = {'CFX': 'CFFEX', 'SHF':'SHFE', 'DCE':'DCE', 'GFE':'GFEX', 'INE':'INE', 'ZCE':'CZCE'}\n",
"\n",
" # 计算per_unit交易单位(每手)和转换后交易所识别的main_code主连代码\n",
" if exchange_id == 'CFX' or exchange_id == 'SHF' or exchange_id == 'DCE' or exchange_id == 'GFE' or exchange_id == 'INE':\n",
" df = pro.fut_basic(exchange = exchange[exchange_id], fut_type='1', fut_code = alpha_chars, fields='ts_code,symbol,exchange,name,per_unit')\n",
" # ts_code = df[df['symbol'] == main_symbol]['ts_code'].iloc[0]\n",
" per_unit = df[df['symbol'] == main_symbol]['per_unit'].iloc[0]\n",
"\n",
" # ds = pro.fut_settle(trade_date = data_str, ts_code =ts_code)\n",
" # ds['margin_rate'] = (ds['long_margin_rate'] + ds['short_margin_rate'])/2\n",
" # margin_rate = ds['margin_rate'].iloc[0]\n",
" \n",
" \n",
" if exchange_id == 'CFX':\n",
" main_code = main_symbol\n",
" elif exchange_id == 'SHF' or exchange_id == 'DCE' or exchange_id == 'GFE' or exchange_id == 'INE':\n",
" lower_alpha_chars = str.lower(alpha_chars) \n",
" main_code = lower_alpha_chars + numeric_chars\n",
" elif exchange_id == 'ZCE':\n",
" true_numeric_chars = numeric_chars[1:]\n",
" main_code = alpha_chars + true_numeric_chars \n",
" df = pro.fut_basic(exchange = exchange[exchange_id], fut_type='1', fut_code = alpha_chars, fields='ts_code,symbol,exchange,name,per_unit')\n",
" per_unit = df[df['symbol'] == main_code]['per_unit'].iloc[0]\n",
" main_code = alpha_chars + true_numeric_chars\n",
"\n",
" print(\"最终使用的主连代码:\",main_code) \n",
" print(\"%s的交易单位(每手):%s\"%(main_symbol, per_unit))\n",
" return main_code\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sb_1 = get_main_contact_on_time('IH.CFX')\n",
"sb_2 = get_main_contact_on_time('cu.SHF')\n",
"sb_3 = get_main_contact_on_time('eb.DCE')\n",
"sb_4 = get_main_contact_on_time('si.GFE')\n",
"sb_5 = get_main_contact_on_time('sc.INE') \n",
"sb_6 = get_main_contact_on_time('SA.ZCE')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# df = pro.fut_basic(exchange='DCE', fut_type='1',fut_code = 'j' , fields='ts_code,symbol,exchange,name,fut_code,multiplier,trade_unit,per_unit,quote_unit,quote_unit_desc,d_mode_desc,list_date,delist_date,d_month,last_ddate,trade_time_desc')\n",
"# df = pro.fut_basic(exchange='SHFE', fut_type='1', fut_code = 'au', fields='ts_code,symbol,name,list_date,delist_date')\n",
"df = pro.fut_basic(exchange='CZCE', fut_type='1', fut_code = 'SA', fields='ts_code,symbol,exchange,name,fut_code,per_unit')\n",
"# index_of_value = df.index[df['symbol'] == 'AU2408']\n",
"df.head()\n",
"value = df[df['symbol'] == 'SA409']['per_unit'].iloc[0]\n",
"print(value)\n",
"# df.loc[index_of_value, 'per_unit'].value\n",
"# df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pro.fut_mapping(ts_code='SA.ZCE')\n",
"print(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# ds = pro.fut_settle(trade_date = '20240625', ts_code ='SA2409.ZCE')\n",
"# ds = pro.fut_settle(trade_date='20230625', exchange='ZCE')\n",
"# ds = pro.fut_settle(ts_code='SA409.ZCE', exchange='CZCE')\n",
"# pro.fut_settle(trade_date='20181114', exchange='CZCE')\n",
"pro.fut_settle(ts_code='AP2510.ZCE', exchange='CZCE')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ds.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ds['margin_rate'] = round((ds['long_margin_rate'] + ds['short_margin_rate'])/2,2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ds['margin_rate'].iloc[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"value = df.loc[index_of_value, 'per_unit'].iloc[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(value)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_IH = pro.fut_mapping(ts_code='IH.CFX')\n",
"print(df_IH)\n",
"df_IH.to_csv(r\"E:\\data\\mapping_ts_code_IH.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_IF = pro.fut_mapping(ts_code='IF.CFX')\n",
"print(df_IF)\n",
"df_IF.to_csv(r\"E:\\data\\mapping_ts_code_IF.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_IC = pro.fut_mapping(ts_code='IC.CFX')\n",
"print(df_IC)\n",
"df_IC.to_csv(r\"E:\\data\\mapping_ts_code_IC.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_IM = pro.fut_mapping(ts_code='IM.CFX')\n",
"print(df_IM)\n",
"df_IM.to_csv(r\"E:\\data\\mapping_ts_code_IM.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_TF = pro.fut_mapping(ts_code='TF.CFX')\n",
"print(df_TF)\n",
"df_TF.to_csv(r\"E:\\data\\mapping_ts_code_TF.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_T = pro.fut_mapping(ts_code='T.CFX')\n",
"print(df_T)\n",
"df_T.to_csv(r\"E:\\data\\mapping_ts_code_T.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_TS = pro.fut_mapping(ts_code='TS.CFX')\n",
"print(df_TS)\n",
"df_TS.to_csv(r\"E:\\data\\mapping_ts_code_TS.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_TL = pro.fut_mapping(TL_code='TL.CFX')\n",
"print(df_TL)\n",
"df_TL.to_csv(r\"E:\\data\\mapping_TL_code_TL.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'pro' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m df_TL \u001b[38;5;241m=\u001b[39m \u001b[43mpro\u001b[49m\u001b[38;5;241m.\u001b[39mfut_mapping(ts_code\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTL.CFX\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(df_TL)\n\u001b[0;32m 3\u001b[0m df_TL\u001b[38;5;241m.\u001b[39mto_csv(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mD:\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mmapping_TL_code_TL.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[1;31mNameError\u001b[0m: name 'pro' is not defined"
]
}
],
"source": [
"df_TL = pro.fut_mapping(ts_code='TL.CFX')\n",
"print(df_TL)\n",
"df_TL.to_csv(r\"D:\\data\\mapping_TL_code_TL.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import akshare as ak\n",
"\n",
"futures_comm_info_df = ak.futures_comm_info(symbol=\"上海国际能源交易中心\")\n",
"print(futures_comm_info_df[\"保证金-买开\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"futures_fees_info_df = ak.futures_fees_info()\n",
"print(futures_fees_info_df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"futures_fees_info_df.to_csv(r'./futures_fees_info.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"futures_display_main_sina_df = ak.futures_display_main_sina()\n",
"print(futures_display_main_sina_df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"url = 'https://www.9qihuo.com/hangqing' #上期所铜结算参数地址https://www.9qihuo.com/hangqing\n",
"data =pd.read_html(url) #读取网页上的表格\n",
"dt=data[4].drop([0],axis=0).append(data[5],ignore_index=True) #提取结算参数到DataFrame格式\n",
"#调整格式\n",
"dt.columns=dt.iloc[0]\n",
"dt.drop([0],axis=0,inplace=True) \n",
"dt.set_index('合约代码',inplace=True)\n",
"print(dt) #输出铜的结算参数"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import csv\n",
"\n",
"# 目标网址\n",
"url = \"https://www.9qihuo.com/hangqing\"\n",
"\n",
"# 发送GET请求禁用SSL验证\n",
"response = requests.get(url, verify=False)\n",
"response.encoding = 'utf-8' # 确保编码正确\n",
"\n",
"# 解析网页内容\n",
"soup = BeautifulSoup(response.text, 'lxml')\n",
"\n",
"# 找到目标表格\n",
"table = soup.find('table', {'id': 'tblhangqinglist'})\n",
"\n",
"# 初始化CSV文件\n",
"with open('main_contacts.csv', mode='w', newline='', encoding='utf-8') as file:\n",
" writer = csv.writer(file)\n",
" \n",
" # 遍历表格的所有行\n",
" for row in table.find_all('tr'):\n",
" # 获取每一行的所有单元格\n",
" cols = row.find_all(['th', 'td'])\n",
" # 提取文本内容并写入CSV文件\n",
" writer.writerow([col.text.strip() for col in cols])\n",
"\n",
"print(\"表格已成功保存为main_contacts.csv\")\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"df = pd.read_csv('./main_contacts.csv')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df['交易品种'] = df['合约'].str.split(r'[()]', n=1, expand=True)[0]\n",
"df['主连代码'] = df['合约'].str.split(r'[()]', n=2, expand=True)[1]\n",
"\n",
"# df['品种代码'] = df['主连代码'].str.split(str.isalpha(df['主连代码']), n=1, expand=True)[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import re\n",
"\n",
"# 创建示例DataFrame\n",
"\n",
"# 定义拆分字母和数字的函数\n",
"def split_alpha_numeric(s):\n",
" match = re.match(r\"([a-zA-Z]+)([0-9]+)\", s)\n",
" if match:\n",
" return match.groups()\n",
" else:\n",
" return (s, None) # 如果没有匹配返回原始字符串和None\n",
"\n",
"# 应用函数并创建新列\n",
"df[['品种代码', '交割月份']] = df['主连代码'].apply(lambda x: pd.Series(split_alpha_numeric(x)))\n",
"\n",
"print(df)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv('./main_contacts_all.csv')"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"import subprocess\n",
"import schedule\n",
"import time\n",
"from datetime import datetime\n",
"\n",
"# jerome增加akshare库\n",
"import akshare as ak\n",
"\n",
"# jerome:增加下列库用于爬虫获取主力连续代码\n",
"import pandas as pd\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import csv\n",
"import re\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"def get_futures_fees_info():\n",
" futures_fees_info_df = ak.futures_fees_info()\n",
" futures_fees_info_df.to_csv(r'./futures_fees_info.csv', index=False)\n",
"\n",
"def get_main_contacts():\n",
" url = \"https://www.9qihuo.com/hangqing\"\n",
"\n",
" # 发送GET请求禁用SSL验证\n",
" response = requests.get(url, verify=False)\n",
" response.encoding = 'utf-8' # 确保编码正确\n",
"\n",
" # 解析网页内容\n",
" soup = BeautifulSoup(response.text, 'lxml')\n",
"\n",
" # 找到目标表格\n",
" table = soup.find('table', {'id': 'tblhangqinglist'})\n",
"\n",
" # 初始化CSV文件\n",
" with open('tmp_main_contacts.csv', mode='w', newline='', encoding='utf-8') as file:\n",
" writer = csv.writer(file)\n",
" \n",
" # 遍历表格的所有行\n",
" for row in table.find_all('tr'):\n",
" # 获取每一行的所有单元格\n",
" cols = row.find_all(['th', 'td'])\n",
" # 提取文本内容并写入CSV文件\n",
" writer.writerow([col.text.strip() for col in cols])\n",
"\n",
" df = pd.read_csv('./tmp_main_contacts.csv',encoding='utf-8')\n",
" df['交易品种'] = df['合约'].str.split(r'[()]', n=1, expand=True)[0]\n",
" df['主连代码'] = df['合约'].str.split(r'[()]', n=2, expand=True)[1]\n",
"\n",
" df[['品种代码', '交割月份']] = df['主连代码'].apply(lambda x: pd.Series(split_alpha_numeric(x)))\n",
" df.to_csv('./main_contacts.csv')\n",
"\n",
" print(\"期货主力品种表已经保存为main_contacts.csv\")\n",
" os.remove(\"./tmp_main_contacts.csv\")\n",
"\n",
"# 拆分字母和数字的函数\n",
"def split_alpha_numeric(s):\n",
" match = re.match(r\"([a-zA-Z]+)([0-9]+)\", s)\n",
" if match:\n",
" return match.groups()\n",
" else:\n",
" return (s, None) # 如果没有匹配返回原始字符串和None"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"get_futures_fees_info()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"期货主力品种表已经保存为main_contacts.csv\n"
]
}
],
"source": [
"get_main_contacts()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,180 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# 配置迅投研数据服务\n",
"from vnpy.trader.setting import SETTINGS\n",
"\n",
"SETTINGS[\"datafeed.name\"] = \"xt\"\n",
"SETTINGS[\"datafeed.username\"] = \"token\"\n",
"SETTINGS[\"datafeed.password\"] = \"ef326f853a744c58572f0158d470912c38a09552\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# 加载功能模块\n",
"from datetime import datetime\n",
"\n",
"from vnpy.trader.datafeed import get_datafeed\n",
"from vnpy.trader.object import HistoryRequest, Exchange, Interval\n",
"\n",
"from vnpy_sqlite import Database as SqliteDatabase\n",
"#from elite_database import Database as EliteDatabase\n",
"\n",
"#增加\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 初始化数据服务\n",
"datafeed = get_datafeed()\n",
"datafeed.init()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# 交易所映射关系\n",
"EXCHANGE_XT2VT = {\n",
" \"SH\": Exchange.SSE,\n",
" \"SZ\": Exchange.SZSE,\n",
" \"BJ\": Exchange.BSE,\n",
" \"SF\": Exchange.SHFE,\n",
" \"IF\": Exchange.CFFEX,\n",
" \"INE\": Exchange.INE,\n",
" \"DF\": Exchange.DCE,\n",
" \"ZF\": Exchange.CZCE,\n",
" \"GF\": Exchange.GFEX\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"数据长度 41336\n"
]
}
],
"source": [
"# 查询期货历史数据\n",
"req = HistoryRequest(\n",
" symbol=\"rb00\", # 加权指数 \n",
" # symbol=\"IF00\", # 主力连续(未平滑)\n",
" # exchange=Exchange.CFFEX,\n",
" exchange = EXCHANGE_XT2VT[\"SF\"],\n",
" start=datetime(2023, 1, 1),\n",
" end=datetime(2023, 11, 24),#end=datetime.now(),\n",
" interval=Interval.TICK\n",
")\n",
"\n",
"ticks = datafeed.query_tick_history(req)\n",
"print(\"数据长度\", len(ticks))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 创建Elite数据库实例并写入数据\n",
"#db2 = EliteDatabase()\n",
"#db2.save_bar_data(bars)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(ticks)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 创建CSV文件并写入数据\n",
"filepath = \"rb00_11.csv\" # CSV文件保存路径及文件名\n",
"df.to_csv(filepath, index=False) # index参数设置为False表示不包含索引列\n",
"#df.to_csv(filepath, mode='a', index=False, header=False) # index参数设置为False表示不包含索引列"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 读取CSV文件\n",
"data = pd.read_csv(\"IC0.csv\")\n",
"# 对数据进行排序\n",
"sorted_data = data.sort_values(by='datetime')\n",
"# 将排序结果写入CSV文件\n",
"sorted_data.to_csv('sort_IC00.csv', index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
},
"vscode": {
"interpreter": {
"hash": "1b43cb0bd93d5abbadd54afed8252f711d4681fe6223ad6b67ffaee289648f85"
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@@ -0,0 +1,241 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"map_file = r\"D:\\data\\mapping_ts_code_IH.csv\" #主力合约统计表\n",
"file_path = str(\"F:/2022_tickdata/marketdatacsv\") #csv文件绝对地址前缀\n",
"\n",
"header_file = r\"D:\\data\\fut_marketdata_head.csv\" # 包含表头的 CSV 文件名\n",
"# data_file = r\"D:\\combined_market_data.csv\" # 包含数据的 CSV 文件名\n",
"output_file = r\"D:\\IH888_up_2022.csv\" # 合并后的输出文件名\n",
"total_code = 'IH888'\n",
"\n",
"sp_chars = ['csv2022'] #'csv2021', 'csv2022',需要查找的主力年份文件"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(map_file, index_col=0, encoding='utf', low_memory=False)\n",
"df['mapping_ts_code_new'] = df['mapping_ts_code'].apply(lambda x: x.split('.')[0])\n",
"df['temp_path']= file_path\n",
"df['final_path'] = df['temp_path'].astype(str) + df['trade_date'].astype(str) + '/' + df['mapping_ts_code_new'] + '.csv'\n",
"del df['mapping_ts_code_new'], df['temp_path']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"import time as s_time\n",
"import datetime\n",
"import pandas as pd\n",
"for sp_char in sp_chars:\n",
" csv_files = [sp_file for sp_file in df['final_path'] if sp_char in sp_file]\n",
" print(csv_files[:5])\n",
" print(csv_files[-5:])\n",
" dfs = pd.DataFrame()\n",
" for file_path in csv_files:\n",
" df_temp = pd.read_csv(file_path) \n",
" print('读取%s成功'%(file_path))\n",
" # df_temp.columns = ['交易日','合约代码','最后修改时间','最后修改毫秒','最新价','数量','申买价一','申买量一','申卖价一','申卖量一','当日均价','成交金额','持仓量','涨停价','跌停价']\n",
" # df_temp['datetime'] = df_temp['交易日'].astype(str) + ' '+df_temp['最后修改时间'].astype(str) + '.' + df_temp['最后修改毫秒'].astype(str)\n",
" # df_temp['datetime'] = pd.to_datetime(df_temp['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')\n",
" # df_temp['tmp_time'] = df_temp['datetime'].dt.strftime('%H:%M:%S.%f')\n",
" # df_temp['time'] = df_temp['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time\n",
" # drop_index1 = df_temp.loc[(df_temp['time'] > s_time(11, 30, 0)) & (df_temp['time'] < s_time(13, 0, 0))].index\n",
" # drop_index2 = df_temp.loc[(df_temp['time'] > s_time(15, 0, 0)) | (df_temp['time'] < s_time(9, 30, 0))].index\n",
" # df_temp.drop(drop_index1, axis=0, inplace=True)\n",
" # df_temp.drop(drop_index2, axis=0, inplace=True)\n",
" # dfs.append(df_temp)\n",
" # df_temp.columns=['交易日','合约代码','最后修改时间','最后修改毫秒','最新价','数量','申买价一','申买量一','申卖价一','申卖量一','当日均价','成交金额','持仓量','涨停价','跌停价']\n",
" df_temp.columns = ['交易日','合约代码','最后修改时间','最后修改毫秒','最新价','数量','申买价一','申买量一','申卖价一','申卖量一','当日均价','成交金额','持仓量','涨停价','跌停价']\n",
" # print(df_temp.tail())\n",
" # # print(\"表头添加成功!\")\n",
" # dfs = pd.concat([dfs, df_temp],ignore_index=True, axis= 0)# \n",
" # print(dfs.tail())\n",
" # dfs = pd.concat([df_temp, ignore_index=True)\n",
" dfs = pd.concat([dfs, df_temp], ignore_index=True)\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dfs.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dfs.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"combined_df = dfs.sort_values(by = ['交易日', '最后修改时间', '最后修改毫秒'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"combined_df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"combined_df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"combined_df.insert(0,'统一代码', total_code)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"combined_df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"combined_df.to_csv(output_file, index=False)\n",
"print(\"合并完成,并已导出到%s文件。\"%(output_file))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 以下为其他代码\n",
"import pandas as pd\n",
" \n",
"try:\n",
" file_path = 'path/to/your/file.csv' # 替换为你的文件路径\n",
" df = pd.read_csv(file_path)\n",
"except FileNotFoundError:\n",
" print(f\"无法找到文件:{file_path}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os\n",
"for k in ['2021']:# , '2023'\n",
" for v in [ 'IH', 'IF', 'IC', 'IM', 'T', 'TF', 'TL', 'TS']: \n",
" print('当前年份为:%s,品种为:%s'%(k,v))\n",
" map_file = 'D:/data/mapping_ts_code_%s.csv'%(v) #v\n",
" file_path = 'F:/%s_tickdata/marketdatacsv'%(k) #csv文件绝对地址前缀\n",
" output_file = 'D:/%s888_up_%s.csv'%(v,k) # 合并后的输出文件名\n",
" total_code = '%s888'%(v)\n",
" sp_chars = ['csv%s'%(k)] #'csv2021', 'csv2022',需要查找的主力年份文件\n",
"\n",
" try:\n",
" df = pd.read_csv(map_file, index_col=0, encoding='utf', low_memory=False)\n",
" except FileNotFoundError:\n",
" raise ValueError(\"主力合约统计表文件不存在,请检查文件路径是否正确。\")\n",
" df['mapping_ts_code_new'] = df['mapping_ts_code'].apply(lambda x: x.split('.')[0])\n",
" df['temp_path']= file_path\n",
" df['final_path'] = df['temp_path'].astype(str) + df['trade_date'].astype(str) + '/' + df['mapping_ts_code_new'] + '.csv'\n",
" del df['mapping_ts_code_new'], df['temp_path']\n",
"\n",
" for sp_char in sp_chars:\n",
" csv_files = [sp_file for sp_file in df['final_path'] if sp_char in sp_file]\n",
" if csv_files:\n",
" print(csv_files[:5])\n",
" print(csv_files[-5:])\n",
" dfs = pd.DataFrame()\n",
" for path in csv_files:\n",
" try:\n",
" df_temp = pd.read_csv(path) \n",
" # print('读取%s成功'%(path))\n",
" except FileNotFoundError:\n",
" raise ValueError(\"%s文件不存在请检查文件路径是否正确。\"%(path))\n",
" break\n",
" df_temp.columns = ['交易日','合约代码','最后修改时间','最后修改毫秒','最新价','数量','申买价一','申买量一','申卖价一','申卖量一','当日均价','成交金额','持仓量','涨停价','跌停价']\n",
" dfs = pd.concat([dfs, df_temp], ignore_index=True)\n",
" combined_df = dfs.sort_values(by = ['交易日', '最后修改时间', '最后修改毫秒'])\n",
" combined_df.insert(0,'统一代码', total_code)\n",
" combined_df.to_csv(output_file, index=False)\n",
" print(\"合并完成,并已导出到%s文件。\"%(output_file))\n",
" else:\n",
" print('品种%s在%s年无数据!'%(v,k))\n",
" continue\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,138 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import shelve\n",
"\n",
"# 要合并的shelve数据库路径\n",
"shelve_files = ['D:/contract_data1.dat', 'D:/contract_data2.dat', 'D:/contract_data3.dat']\n",
"# 合并后的新数据库路径\n",
"new_shelve_file = 'D:/contract_data3.dat'\n",
"\n",
"# 创建一个新的shelve数据库来存储合并后的内容\n",
"with shelve.open(new_shelve_file, writeback=True) as new_db:\n",
" for shelve_file in shelve_files:\n",
" try:\n",
" with shelve.open(shelve_file) as db:\n",
" for key in db:\n",
" if key in new_db:\n",
" print(f\"Warning: Key {key} already exists in the new database. Overwriting.\")\n",
" new_db[key] = db[key]\n",
" except Exception as e:\n",
" print(f\"Error processing {shelve_file}: {e}\")\n",
"\n",
"print(f\"Databases merged into {new_shelve_file}\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import shelve\n",
"import os\n",
"\n",
"# 要合并的shelve数据库路径\n",
"shelve_files = [r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview1', r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview2', r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview3']\n",
"# 合并后的新数据库路径\n",
"new_shelve_file = r'D:\\bar_overview'\n",
"\n",
"# 创建一个新的shelve数据库来存储合并后的内容\n",
"with shelve.open(new_shelve_file, writeback=True) as new_db:\n",
" for shelve_file in shelve_files:\n",
" # 检查文件是否存在\n",
" if not os.path.exists(shelve_file):\n",
" print(f\"错误:文件 {shelve_file} 不存在。\")\n",
" continue\n",
" try:\n",
" # 打开并读取shelve数据库\n",
" with shelve.open(shelve_file) as db:\n",
" for key in db:\n",
" if key in new_db:\n",
" print(f\"警告:键 {key} 已存在于新数据库中。将覆盖。\")\n",
" new_db[key] = db[key]\n",
" except Exception as e:\n",
" print(f\"处理文件 {shelve_file} 时出错:{e}\")\n",
" if 'db type could not be determined' in str(e):\n",
" print(f\"提示:文件 {shelve_file} 可能已损坏或不是一个shelve数据库。\")\n",
" continue\n",
"\n",
"print(f\"数据库已合并到 {new_shelve_file}\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import shelve\n",
"f_shelve = shelve.open(r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview1') # 创建一个文件句柄\n",
"# 使用for循环打印内容\n",
"for k,v in f_shelve.items():\n",
" print(k,v)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import shelve\n",
"\n",
"# 打开所有源 shelve 数据库\n",
"db1 = shelve.open(r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview1')\n",
"db2 = shelve.open(r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview2')\n",
"db3 = shelve.open(r'C:\\Users\\Administrator\\.vntrader\\elite_db\\bar_overview3')\n",
"\n",
"# 创建一个新的目标 shelve 数据库\n",
"merged_db = shelve.open(r'D:\\bar_overview')\n",
"\n",
"# 将第一个数据库的所有条目添加到新的数据库中\n",
"for key in db1:\n",
" merged_db[key] = db1[key]\n",
"\n",
"# 将第二个数据库的所有条目添加到新的数据库中\n",
"for key in db2:\n",
" merged_db[key] = db2[key]\n",
"\n",
"# 将第三个数据库的所有条目添加到新的数据库中\n",
"for key in db3:\n",
" merged_db[key] = db3[key]\n",
"\n",
"# 关闭所有数据库\n",
"db1.close()\n",
"db2.close()\n",
"db3.close()\n",
"merged_db.close()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,309 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# import sqlite3\n",
"# import pandas as pd\n",
"\n",
"# # 连接到SQLite数据库\n",
"# conn = sqlite3.connect('database.db')\n",
"\n",
"# # 从数据库中读取表数据到DataFrame\n",
"# table_name = 'your_table_name' # 替换为实际表名\n",
"# query = f\"SELECT * FROM {table_name}\"\n",
"# df = pd.read_sql_query(query, conn)\n",
"\n",
"# 按照“本地代码”分组并导出为CSV文件\n",
"for local_code, group in df.groupby('本地代码'):\n",
" # 为每个“本地代码”生成一个CSV文件文件名使用该代码值\n",
" csv_filename = f\"{local_code}.csv\"\n",
" group.to_csv(csv_filename, index=False, encoding='utf-8-sig')\n",
" print(f\"数据已导出到 {csv_filename}\")\n",
"\n",
"# 关闭数据库连接\n",
"conn.close()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sqlite3\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 连接到SQLite数据库\n",
"conn = sqlite3.connect(r'D:\\of_data\\database.db')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 从数据库中读取表数据到DataFrame\n",
"table_name = 'dbbardata' # 替换为实际表名\n",
"query = f\"SELECT * FROM {table_name}\"\n",
"df = pd.read_sql_query(query, conn)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"del(df['id'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"del group"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"数据已导出到 AP00_CZCE.csv\n",
"数据已导出到 APJQ00_CZCE.csv\n",
"数据已导出到 CF00_CZCE.csv\n",
"数据已导出到 CFJQ00_CZCE.csv\n",
"数据已导出到 CJ00_CZCE.csv\n",
"数据已导出到 CJJQ00_CZCE.csv\n",
"数据已导出到 CY00_CZCE.csv\n",
"数据已导出到 CYJQ00_CZCE.csv\n",
"数据已导出到 FG00_CZCE.csv\n",
"数据已导出到 FGJQ00_CZCE.csv\n",
"数据已导出到 IC00_CFFEX.csv\n",
"数据已导出到 ICJQ00_CFFEX.csv\n",
"数据已导出到 IF00_CFFEX.csv\n",
"数据已导出到 IFJQ00_CFFEX.csv\n",
"数据已导出到 IH00_CFFEX.csv\n",
"数据已导出到 IHJQ00_CFFEX.csv\n",
"数据已导出到 IM00_CFFEX.csv\n",
"数据已导出到 IMJQ00_CFFEX.csv\n",
"数据已导出到 JR00_CZCE.csv\n",
"数据已导出到 JRJQ00_CZCE.csv\n",
"数据已导出到 LR00_CZCE.csv\n",
"数据已导出到 LRJQ00_CZCE.csv\n",
"数据已导出到 MA00_CZCE.csv\n",
"数据已导出到 MAJQ00_CZCE.csv\n",
"数据已导出到 OI00_CZCE.csv\n",
"数据已导出到 OIJQ00_CZCE.csv\n",
"数据已导出到 PF00_CZCE.csv\n",
"数据已导出到 PFJQ00_CZCE.csv\n",
"数据已导出到 PK00_CZCE.csv\n",
"数据已导出到 PKJQ00_CZCE.csv\n",
"数据已导出到 PM00_CZCE.csv\n",
"数据已导出到 PMJQ00_CZCE.csv\n",
"数据已导出到 PX00_CZCE.csv\n",
"数据已导出到 PXJQ00_CZCE.csv\n",
"数据已导出到 RI00_CZCE.csv\n",
"数据已导出到 RIJQ00_CZCE.csv\n",
"数据已导出到 RM00_CZCE.csv\n",
"数据已导出到 RMJQ00_CZCE.csv\n",
"数据已导出到 RS00_CZCE.csv\n",
"数据已导出到 RSJQ00_CZCE.csv\n",
"数据已导出到 SA00_CZCE.csv\n",
"数据已导出到 SAJQ00_CZCE.csv\n",
"数据已导出到 SF00_CZCE.csv\n",
"数据已导出到 SFJQ00_CZCE.csv\n",
"数据已导出到 SH00_CZCE.csv\n",
"数据已导出到 SHJQ00_CZCE.csv\n",
"数据已导出到 SM00_CZCE.csv\n",
"数据已导出到 SMJQ00_CZCE.csv\n",
"数据已导出到 SR00_CZCE.csv\n",
"数据已导出到 SRJQ00_CZCE.csv\n",
"数据已导出到 T00_CFFEX.csv\n",
"数据已导出到 TA00_CZCE.csv\n",
"数据已导出到 TAJQ00_CZCE.csv\n",
"数据已导出到 TF00_CFFEX.csv\n",
"数据已导出到 TFJQ00_CFFEX.csv\n",
"数据已导出到 TJQ00_CFFEX.csv\n",
"数据已导出到 TL00_CFFEX.csv\n",
"数据已导出到 TLJQ00_CFFEX.csv\n",
"数据已导出到 TS00_CFFEX.csv\n",
"数据已导出到 TSJQ00_CFFEX.csv\n",
"数据已导出到 UR00_CZCE.csv\n",
"数据已导出到 URJQ00_CZCE.csv\n",
"数据已导出到 WH00_CZCE.csv\n",
"数据已导出到 WHJQ00_CZCE.csv\n",
"数据已导出到 ZC00_CZCE.csv\n",
"数据已导出到 ZCJQ00_CZCE.csv\n",
"数据已导出到 a00_DCE.csv\n",
"数据已导出到 aJQ00_DCE.csv\n",
"数据已导出到 ag00_SHFE.csv\n",
"数据已导出到 agJQ00_SHFE.csv\n",
"数据已导出到 al00_SHFE.csv\n",
"数据已导出到 alJQ00_SHFE.csv\n",
"数据已导出到 ao00_SHFE.csv\n",
"数据已导出到 aoJQ00_SHFE.csv\n",
"数据已导出到 au00_SHFE.csv\n",
"数据已导出到 auJQ00_SHFE.csv\n",
"数据已导出到 b00_DCE.csv\n",
"数据已导出到 bJQ00_DCE.csv\n",
"数据已导出到 bb00_DCE.csv\n",
"数据已导出到 bbJQ00_DCE.csv\n",
"数据已导出到 bc00_INE.csv\n",
"数据已导出到 bcJQ00_INE.csv\n",
"数据已导出到 br00_SHFE.csv\n",
"数据已导出到 brJQ00_SHFE.csv\n",
"数据已导出到 bu00_SHFE.csv\n",
"数据已导出到 buJQ00_SHFE.csv\n",
"数据已导出到 c00_DCE.csv\n",
"数据已导出到 cJQ00_DCE.csv\n",
"数据已导出到 cs00_DCE.csv\n",
"数据已导出到 csJQ00_DCE.csv\n",
"数据已导出到 cu00_SHFE.csv\n",
"数据已导出到 cuJQ00_SHFE.csv\n",
"数据已导出到 eb00_DCE.csv\n",
"数据已导出到 ebJQ00_DCE.csv\n",
"数据已导出到 ec00_INE.csv\n",
"数据已导出到 ecJQ00_INE.csv\n",
"数据已导出到 eg00_DCE.csv\n",
"数据已导出到 egJQ00_DCE.csv\n",
"数据已导出到 fb00_DCE.csv\n",
"数据已导出到 fbJQ00_DCE.csv\n",
"数据已导出到 fu00_SHFE.csv\n",
"数据已导出到 fuJQ00_SHFE.csv\n",
"数据已导出到 hc00_SHFE.csv\n",
"数据已导出到 hcJQ00_SHFE.csv\n",
"数据已导出到 i00_DCE.csv\n",
"数据已导出到 iJQ00_DCE.csv\n",
"数据已导出到 j00_DCE.csv\n",
"数据已导出到 jJQ00_DCE.csv\n",
"数据已导出到 jd00_DCE.csv\n",
"数据已导出到 jdJQ00_DCE.csv\n",
"数据已导出到 jm00_DCE.csv\n",
"数据已导出到 jmJQ00_DCE.csv\n",
"数据已导出到 l00_DCE.csv\n",
"数据已导出到 lJQ00_DCE.csv\n",
"数据已导出到 lc00_GFEX.csv\n",
"数据已导出到 lcJQ00_GFEX.csv\n",
"数据已导出到 lh00_DCE.csv\n",
"数据已导出到 lhJQ00_DCE.csv\n",
"数据已导出到 lu00_INE.csv\n",
"数据已导出到 luJQ00_INE.csv\n",
"数据已导出到 m00_DCE.csv\n",
"数据已导出到 mJQ00_DCE.csv\n",
"数据已导出到 ni00_SHFE.csv\n",
"数据已导出到 niJQ00_SHFE.csv\n",
"数据已导出到 nr00_INE.csv\n",
"数据已导出到 nrJQ00_INE.csv\n",
"数据已导出到 p00_DCE.csv\n",
"数据已导出到 pJQ00_DCE.csv\n",
"数据已导出到 pb00_SHFE.csv\n",
"数据已导出到 pbJQ00_SHFE.csv\n",
"数据已导出到 pg00_DCE.csv\n",
"数据已导出到 pgJQ00_DCE.csv\n",
"数据已导出到 pp00_DCE.csv\n",
"数据已导出到 ppJQ00_DCE.csv\n",
"数据已导出到 rb00_SHFE.csv\n",
"数据已导出到 rbJQ00_SHFE.csv\n",
"数据已导出到 rr00_DCE.csv\n",
"数据已导出到 rrJQ00_DCE.csv\n",
"数据已导出到 ru00_SHFE.csv\n",
"数据已导出到 ruJQ00_SHFE.csv\n",
"数据已导出到 sc00_INE.csv\n",
"数据已导出到 scJQ00_INE.csv\n",
"数据已导出到 si00_GFEX.csv\n",
"数据已导出到 siJQ00_GFEX.csv\n",
"数据已导出到 sn00_SHFE.csv\n",
"数据已导出到 snJQ00_SHFE.csv\n",
"数据已导出到 sp00_SHFE.csv\n",
"数据已导出到 spJQ00_SHFE.csv\n",
"数据已导出到 ss00_SHFE.csv\n",
"数据已导出到 ssJQ00_SHFE.csv\n",
"数据已导出到 v00_DCE.csv\n",
"数据已导出到 vJQ00_DCE.csv\n",
"数据已导出到 wr00_SHFE.csv\n",
"数据已导出到 wrJQ00_SHFE.csv\n",
"数据已导出到 y00_DCE.csv\n",
"数据已导出到 yJQ00_DCE.csv\n",
"数据已导出到 zn00_SHFE.csv\n",
"数据已导出到 znJQ00_SHFE.csv\n"
]
}
],
"source": [
"for local_code, group in df.groupby('symbol'):\n",
" # 为每个“本地代码”生成一个CSV文件文件名使用该代码值\n",
" exchange = group.exchange.iloc[0]\n",
" csv_filename = f\"{local_code}_{exchange}.csv\"\n",
" group.to_csv(csv_filename, index=False, encoding='utf-8-sig')\n",
" print(f\"数据已导出到 {csv_filename}\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"conn.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,371 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"from ssquant.SQDATA import TakeData\n",
"\n",
"#注意首先需要pip install ssquant\n",
"#否则链接不到数据库\n",
"#输入俱乐部的账号密码即可调用,注意保密。\n",
"#目前数据是2019年1月-至今\n",
"#每日下午收盘后3点30分录入当天数据。\n",
"#有任何疑问可以再群里提出,或者私信我(慕金龙)\n",
"#官网: quant789.com\n",
"#公众号松鼠Quant\n",
"#客服微信: viquant01\n",
"\n",
"#只能调取分钟及以上数据tick数据每月底更新到百度网盘下载\n",
"\n",
"'''\n",
"获取数据-\n",
"品种:symbol,不区分大小写\n",
"起始时间:start_date,\n",
"结束时间:end_date(包含当天),\n",
"周期kline_period:1M..5M..NM(分钟),1D(天),1W(周),1Y(月)\n",
"复权adjust_type:0(不复权)1(后复权)\n",
"注意:\n",
"1.请正确输入账号密码\n",
"2.不要挂代理访问数据库\n",
"3.暂时没有股指数据,下个月补齐。\n",
"'''\n",
" \n",
"# username='俱乐部账号' password='密码'\n",
"client = TakeData(username='77777@qq.com', password='7777')\n",
"data = client.get_data(\n",
" symbol='rb888',\n",
" start_date='2023-01-02',\n",
" end_date='2024-01-03',\n",
" kline_period='60M',\n",
" adjust_type=1\n",
")\n",
"print(data)\n",
"\n",
"\n",
"\n",
"'''\n",
"datetime:时间,\n",
"\n",
"symbol:品种,\n",
"\n",
"open:开盘价,\n",
"\n",
"high:最高价,\n",
"\n",
"low:最低价,\n",
"\n",
"close:收盘价,\n",
"\n",
"volume:成交量(单bar),\n",
"\n",
"amount:成交金额(单bar),\n",
"\n",
"openint:持仓量(单bar),\n",
"\n",
"cumulative_openint:累计持仓量,\n",
"\n",
"open_bidp , open_askp: K线第一个价格的买一价格和卖一价格\n",
"\n",
"close_bidp , close_askp: K线最后一个价格的买一价格和卖一价格\n",
"\n",
"\n",
" datetime symbol open high low close volume amount cumulative_openint openint open_bidp open_askp close_bidp close_askp\n",
"0 2023-01-03 10:00:00 rb2305 4081.0 4081.0 4016.0 4037.0 737537 29782187220 1883481 -48415 4081.0 4084.0 4037.0 4038.0\n",
"1 2023-01-03 11:00:00 rb2305 4038.0 4056.0 4037.0 4042.0 158548 6415696920 1887716 4235 4037.0 4038.0 4042.0 4044.0\n",
"2 2023-01-03 12:00:00 rb2305 4044.0 4054.0 4037.0 4051.0 67448 2728130300 1890125 2409 4043.0 4044.0 4050.0 4051.0\n",
"3 2023-01-03 14:00:00 rb2305 4055.0 4065.0 4045.0 4058.0 110181 4469698600 1895841 5723 4050.0 4051.0 4058.0 4059.0\n",
"4 2023-01-03 15:00:00 rb2305 4059.0 4074.0 4056.0 4063.0 167932 6824213940 1882723 -13125 4058.0 4059.0 4062.0 4063.0\n",
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
"1689 2024-01-03 12:00:00 rb2405 4055.0 4057.0 4044.0 4049.0 79745 3229361570 1597387 -6515 4054.0 4055.0 4049.0 4050.0\n",
"1690 2024-01-03 14:00:00 rb2405 4050.0 4056.0 4046.0 4049.0 55040 2229498750 1598566 1179 4050.0 4051.0 4049.0 4050.0\n",
"1691 2024-01-03 15:00:00 rb2405 4050.0 4064.0 4048.0 4055.0 148845 6038835190 1583796 -14770 4049.0 4050.0 4055.0 4056.0\n",
"1692 2024-01-03 22:00:00 rb2405 4054.0 4054.0 4040.0 4049.0 181753 7354584770 1582419 990 4053.0 4054.0 4048.0 4049.0\n",
"1693 2024-01-03 23:00:00 rb2405 4049.0 4057.0 4042.0 4049.0 104712 4240341050 1574287 -8132 4048.0 4049.0 4049.0 4050.0"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from ssquant.SQDATA import TakeData"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"out_path = r'D:/data'\n",
"symbol_name = 'rb888' #主力连续888 次主力合约777\n",
"time_period = '1M'\n",
"start_time = '2000-01-01'\n",
"end_time = '2019-01-31'\n",
"adjust_k = 'Faj' #Naj:Non adjust,Faj:Forward adjust,后复权\n",
"\n",
"if adjust_k == 'Naj':\n",
" adjust_tmp = 0\n",
"elif adjust_k == 'Faj':\n",
" adjust_tmp = 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client = TakeData(username='240884432@qq.com', password='Zj123!@#')\n",
"data = client.get_data(\n",
" symbol=symbol_name,\n",
" start_date=start_time,\n",
" end_date=end_time,\n",
" kline_period=time_period,\n",
" adjust_type= adjust_tmp\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"头部文件为:--------------------\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>datetime</th>\n",
" <th>symbol</th>\n",
" <th>open</th>\n",
" <th>high</th>\n",
" <th>low</th>\n",
" <th>close</th>\n",
" <th>volume</th>\n",
" <th>amount</th>\n",
" <th>cumulative_openint</th>\n",
" <th>openint</th>\n",
" <th>open_bidp</th>\n",
" <th>open_askp</th>\n",
" <th>close_bidp</th>\n",
" <th>close_askp</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2019-01-02 09:01:00</td>\n",
" <td>rb1905</td>\n",
" <td>3399</td>\n",
" <td>3405</td>\n",
" <td>3389</td>\n",
" <td>3401</td>\n",
" <td>69562</td>\n",
" <td>2362607160</td>\n",
" <td>2383714</td>\n",
" <td>16864</td>\n",
" <td>3399.0</td>\n",
" <td>3400.0</td>\n",
" <td>3400.0</td>\n",
" <td>3401.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2019-01-02 09:02:00</td>\n",
" <td>rb1905</td>\n",
" <td>3401</td>\n",
" <td>3430</td>\n",
" <td>3401</td>\n",
" <td>3410</td>\n",
" <td>88696</td>\n",
" <td>3034283200</td>\n",
" <td>2399530</td>\n",
" <td>-12248</td>\n",
" <td>3401.0</td>\n",
" <td>3402.0</td>\n",
" <td>3409.0</td>\n",
" <td>3410.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2019-01-02 09:03:00</td>\n",
" <td>rb1905</td>\n",
" <td>3409</td>\n",
" <td>3414</td>\n",
" <td>3409</td>\n",
" <td>3412</td>\n",
" <td>22828</td>\n",
" <td>778740580</td>\n",
" <td>2387356</td>\n",
" <td>1180</td>\n",
" <td>3409.0</td>\n",
" <td>3410.0</td>\n",
" <td>3411.0</td>\n",
" <td>3412.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2019-01-02 09:04:00</td>\n",
" <td>rb1905</td>\n",
" <td>3412</td>\n",
" <td>3413</td>\n",
" <td>3403</td>\n",
" <td>3404</td>\n",
" <td>17378</td>\n",
" <td>592413220</td>\n",
" <td>2388158</td>\n",
" <td>54</td>\n",
" <td>3411.0</td>\n",
" <td>3412.0</td>\n",
" <td>3404.0</td>\n",
" <td>3405.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2019-01-02 09:05:00</td>\n",
" <td>rb1905</td>\n",
" <td>3405</td>\n",
" <td>3409</td>\n",
" <td>3405</td>\n",
" <td>3405</td>\n",
" <td>15770</td>\n",
" <td>537276980</td>\n",
" <td>2388190</td>\n",
" <td>1674</td>\n",
" <td>3405.0</td>\n",
" <td>3406.0</td>\n",
" <td>3405.0</td>\n",
" <td>3406.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" datetime symbol open high low close volume amount \\\n",
"0 2019-01-02 09:01:00 rb1905 3399 3405 3389 3401 69562 2362607160 \n",
"1 2019-01-02 09:02:00 rb1905 3401 3430 3401 3410 88696 3034283200 \n",
"2 2019-01-02 09:03:00 rb1905 3409 3414 3409 3412 22828 778740580 \n",
"3 2019-01-02 09:04:00 rb1905 3412 3413 3403 3404 17378 592413220 \n",
"4 2019-01-02 09:05:00 rb1905 3405 3409 3405 3405 15770 537276980 \n",
"\n",
" cumulative_openint openint open_bidp open_askp close_bidp close_askp \n",
"0 2383714 16864 3399.0 3400.0 3400.0 3401.0 \n",
"1 2399530 -12248 3401.0 3402.0 3409.0 3410.0 \n",
"2 2387356 1180 3409.0 3410.0 3411.0 3412.0 \n",
"3 2388158 54 3411.0 3412.0 3404.0 3405.0 \n",
"4 2388190 1674 3405.0 3406.0 3405.0 3406.0 "
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print('头部文件为:--------------------')\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime\n",
" \n",
"# 假设你有一个字符串,表示时间,格式为 'YYYY-MM-DD HH:MM:SS'\n",
"real_start_time = data.iloc[0,0]\n",
" \n",
"# 使用datetime.strptime将字符串转换为时间\n",
"time_obj = datetime.strptime(real_start_time, '%Y-%m-%d %H:%M:%S')\n",
" \n",
"# 获取年月日\n",
"year = time_obj.year\n",
"month = time_obj.month\n",
"day = time_obj.day\n",
" \n",
"print(f'年: {year}, 月: {month}, 日: {day}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print('尾部文件为:--------------------')\n",
"data.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import datetime\n",
"real_start_time = pd.to_datetime(data.iloc[0,0]).date().strftime('%Y-%m-%d')\n",
"real_end_time = pd.to_datetime(data.iloc[-1,0]).date().strftime('%Y-%m-%d')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.to_csv('%s/%s_%s_%s(%s_%s).csv'%(out_path,symbol_name,time_period,adjust_k,real_start_time,real_end_time), index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,79 @@
from ssquant.SQDATA import TakeData
#注意首先需要pip install ssquant
#否则链接不到数据库
#输入俱乐部的账号密码即可调用,注意保密。
#目前数据是2019年1月-至今
#每日下午收盘后3点30分录入当天数据。
#有任何疑问可以再群里提出,或者私信我(慕金龙)
#官网: quant789.com
#公众号松鼠Quant
#客服微信: viquant01
#只能调取分钟及以上数据tick数据每月底更新到百度网盘下载
'''
获取数据-
品种:symbol,不区分大小写
起始时间:start_date,
结束时间:end_date(包含当天),
周期kline_period:1M..5M..NM(分钟),1D(天),1W(周),1Y(月)
复权adjust_type:0(不复权)1(后复权)
注意:
1.请正确输入账号密码
2.不要挂代理访问数据库
3.暂时没有股指数据,下个月补齐。
'''
# username='俱乐部账号' password='密码'
client = TakeData(username='77777@qq.com', password='7777')
data = client.get_data(
symbol='rb888',
start_date='2023-01-02',
end_date='2024-01-03',
kline_period='60M',
adjust_type=1
)
print(data)
'''
datetime:时间,
symbol:品种,
open:开盘价,
high:最高价,
low:最低价,
close:收盘价,
volume:成交量(单bar),
amount:成交金额(单bar),
openint:持仓量(单bar),
cumulative_openint:累计持仓量,
open_bidp , open_askp: K线第一个价格的买一价格和卖一价格
close_bidp , close_askp: K线最后一个价格的买一价格和卖一价格
datetime symbol open high low close volume amount cumulative_openint openint open_bidp open_askp close_bidp close_askp
0 2023-01-03 10:00:00 rb2305 4081.0 4081.0 4016.0 4037.0 737537 29782187220 1883481 -48415 4081.0 4084.0 4037.0 4038.0
1 2023-01-03 11:00:00 rb2305 4038.0 4056.0 4037.0 4042.0 158548 6415696920 1887716 4235 4037.0 4038.0 4042.0 4044.0
2 2023-01-03 12:00:00 rb2305 4044.0 4054.0 4037.0 4051.0 67448 2728130300 1890125 2409 4043.0 4044.0 4050.0 4051.0
3 2023-01-03 14:00:00 rb2305 4055.0 4065.0 4045.0 4058.0 110181 4469698600 1895841 5723 4050.0 4051.0 4058.0 4059.0
4 2023-01-03 15:00:00 rb2305 4059.0 4074.0 4056.0 4063.0 167932 6824213940 1882723 -13125 4058.0 4059.0 4062.0 4063.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1689 2024-01-03 12:00:00 rb2405 4055.0 4057.0 4044.0 4049.0 79745 3229361570 1597387 -6515 4054.0 4055.0 4049.0 4050.0
1690 2024-01-03 14:00:00 rb2405 4050.0 4056.0 4046.0 4049.0 55040 2229498750 1598566 1179 4050.0 4051.0 4049.0 4050.0
1691 2024-01-03 15:00:00 rb2405 4050.0 4064.0 4048.0 4055.0 148845 6038835190 1583796 -14770 4049.0 4050.0 4055.0 4056.0
1692 2024-01-03 22:00:00 rb2405 4054.0 4054.0 4040.0 4049.0 181753 7354584770 1582419 990 4053.0 4054.0 4048.0 4049.0
1693 2024-01-03 23:00:00 rb2405 4049.0 4057.0 4042.0 4049.0 104712 4240341050 1574287 -8132 4048.0 4049.0 4049.0 4050.0
'''

View File

@@ -0,0 +1,65 @@
from ssquant.SQDATA import TakeData
from pyecharts import options as opts
from pyecharts.charts import Kline, Bar, Grid
import pandas as pd
def plotK(data):
# 示例数据(您需要替换为您的实际数据)
kline_data = data[['open', 'close', 'low', 'high']].values.tolist()
dates = data.index.strftime('%Y-%m-%d %H:%M:%S').tolist()
symbol_data = data['symbol'].values.tolist()
# 标记 symbol 变化的位置
markline_data = []
for i in range(1, len(symbol_data)):
if symbol_data[i] != symbol_data[i-1]:
# 当前 symbol 与前一个不同时,添加红色竖线
markline_data.append(opts.MarkLineItem(x=dates[i], name=f'前一个合约{symbol_data[i-1]},当前合约{symbol_data[i]}'))
# 数据缩放组件配置
datazoom_slider = opts.DataZoomOpts(type_="slider", xaxis_index=[0, 1, 2, 3,4], range_start=50, range_end=100)
datazoom_inside = opts.DataZoomOpts(type_="inside", xaxis_index=[0, 1, 2, 3,4])
# 创建 K 线图
kline = (
Kline(init_opts=opts.InitOpts(width="100%", height="900px"))
.add_xaxis(dates)
.add_yaxis('K线图表', kline_data,markline_opts=opts.MarkLineOpts(data=markline_data, symbol='none', linestyle_opts=opts.LineStyleOpts(color="red")))#"ssss",
.set_global_opts(
datazoom_opts=[datazoom_slider, datazoom_inside],
toolbox_opts=opts.ToolboxOpts(is_show=True, pos_top="0%", pos_right="80%"),
legend_opts=opts.LegendOpts(pos_left='40%'), # 调整图例位置到底部
)
)
kline.render('K线图.html')
'''
获取数据-
品种:symbol,
起始时间:start_date,
结束时间:end_date(包含当天),
周期kline_period:1M..5M..NM(分钟),1D(天),1W(周),1Y(月)
复权adjust_type:0(不复权)1(后复权)
'''
# 请在下方输入你的俱乐部账号密码username='俱乐部账号' password='密码'
client = TakeData(username='1234@qq.com', password='123')
data = client.get_data(
symbol='rb888',
start_date='2023-12-28',
end_date='2024-01-17',
kline_period='60M',
adjust_type=1
)
data.set_index("datetime", inplace=True)
data.index = pd.to_datetime(data.index)
print(data)
#生产K线图表到脚本同目录下
plotK(data)

View File

@@ -0,0 +1,249 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "1a846b12",
"metadata": {},
"source": [
"from ssquant.SQDATA import TakeData\n",
"\n",
"#注意首先需要pip install ssquant\n",
"#否则链接不到数据库\n",
"#输入俱乐部的账号密码即可调用,注意保密。\n",
"#目前数据是2019年1月-至今\n",
"#每日下午收盘后3点30分录入当天数据。\n",
"#有任何疑问可以再群里提出,或者私信我(慕金龙)\n",
"#官网: quant789.com\n",
"#公众号松鼠Quant\n",
"#客服微信: viquant01\n",
"\n",
"#只能调取分钟及以上数据tick数据每月底更新到百度网盘下载\n",
"\n",
"'''\n",
"获取数据-\n",
"品种:symbol,不区分大小写\n",
"起始时间:start_date,\n",
"结束时间:end_date(包含当天),\n",
"周期kline_period:1M..5M..NM(分钟),1D(天),1W(周),1Y(月)\n",
"复权adjust_type:0(不复权)1(后复权)\n",
"注意:\n",
"1.请正确输入账号密码\n",
"2.不要挂代理访问数据库\n",
"3.暂时没有股指数据,下个月补齐。\n",
"'''\n",
" \n",
"# username='俱乐部账号' password='密码'\n",
"client = TakeData(username='240884432@qq.com', password='7777')\n",
"data = client.get_data(\n",
" symbol='rb888',\n",
" start_date='2023-01-02',\n",
" end_date='2024-01-03',\n",
" kline_period='60M',\n",
" adjust_type=1\n",
")\n",
"print(data)\n",
"\n",
"\n",
"\n",
"'''\n",
"datetime:时间,\n",
"\n",
"symbol:品种,\n",
"\n",
"open:开盘价,\n",
"\n",
"high:最高价,\n",
"\n",
"low:最低价,\n",
"\n",
"close:收盘价,\n",
"\n",
"volume:成交量(单bar),\n",
"\n",
"amount:成交金额(单bar),\n",
"\n",
"openint:持仓量(单bar),\n",
"\n",
"cumulative_openint:累计持仓量,\n",
"\n",
"open_bidp , open_askp: K线第一个价格的买一价格和卖一价格\n",
"\n",
"close_bidp , close_askp: K线最后一个价格的买一价格和卖一价格\n",
"\n",
"\n",
" datetime symbol open high low close volume amount cumulative_openint openint open_bidp open_askp close_bidp close_askp\n",
"0 2023-01-03 10:00:00 rb2305 4081.0 4081.0 4016.0 4037.0 737537 29782187220 1883481 -48415 4081.0 4084.0 4037.0 4038.0\n",
"1 2023-01-03 11:00:00 rb2305 4038.0 4056.0 4037.0 4042.0 158548 6415696920 1887716 4235 4037.0 4038.0 4042.0 4044.0\n",
"2 2023-01-03 12:00:00 rb2305 4044.0 4054.0 4037.0 4051.0 67448 2728130300 1890125 2409 4043.0 4044.0 4050.0 4051.0\n",
"3 2023-01-03 14:00:00 rb2305 4055.0 4065.0 4045.0 4058.0 110181 4469698600 1895841 5723 4050.0 4051.0 4058.0 4059.0\n",
"4 2023-01-03 15:00:00 rb2305 4059.0 4074.0 4056.0 4063.0 167932 6824213940 1882723 -13125 4058.0 4059.0 4062.0 4063.0\n",
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
"1689 2024-01-03 12:00:00 rb2405 4055.0 4057.0 4044.0 4049.0 79745 3229361570 1597387 -6515 4054.0 4055.0 4049.0 4050.0\n",
"1690 2024-01-03 14:00:00 rb2405 4050.0 4056.0 4046.0 4049.0 55040 2229498750 1598566 1179 4050.0 4051.0 4049.0 4050.0\n",
"1691 2024-01-03 15:00:00 rb2405 4050.0 4064.0 4048.0 4055.0 148845 6038835190 1583796 -14770 4049.0 4050.0 4055.0 4056.0\n",
"1692 2024-01-03 22:00:00 rb2405 4054.0 4054.0 4040.0 4049.0 181753 7354584770 1582419 990 4053.0 4054.0 4048.0 4049.0\n",
"1693 2024-01-03 23:00:00 rb2405 4049.0 4057.0 4042.0 4049.0 104712 4240341050 1574287 -8132 4048.0 4049.0 4049.0 4050.0\n",
"'''"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "65b4b7aa",
"metadata": {},
"outputs": [],
"source": [
"from ssquant.SQDATA import TakeData\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "edd4f1e5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" datetime symbol open high low close volume \\\n",
"0 2023-01-03 10:00:00 rb2305 4081 4081 4016 4037 737537 \n",
"1 2023-01-03 11:00:00 rb2305 4038 4056 4037 4042 158548 \n",
"2 2023-01-03 12:00:00 rb2305 4044 4054 4037 4051 67448 \n",
"3 2023-01-03 14:00:00 rb2305 4055 4065 4045 4058 110181 \n",
"4 2023-01-03 15:00:00 rb2305 4059 4074 4056 4063 167932 \n",
".. ... ... ... ... ... ... ... \n",
"112 2023-02-01 12:00:00 rb2305 4126 4129 4105 4107 193291 \n",
"113 2023-02-01 14:00:00 rb2305 4108 4117 4100 4109 137182 \n",
"114 2023-02-01 15:00:00 rb2305 4109 4114 4075 4084 378930 \n",
"115 2023-02-01 22:00:00 rb2305 4092 4104 4087 4103 207519 \n",
"116 2023-02-01 23:00:00 rb2305 4102 4109 4075 4098 189724 \n",
"\n",
" amount cumulative_openint openint open_bidp open_askp \\\n",
"0 29782187220 1883481 -48415 4081 4084 \n",
"1 6415696920 1887716 4235 4037 4038 \n",
"2 2728130300 1890125 2409 4043 4044 \n",
"3 4469698600 1895841 5723 4050 4051 \n",
"4 6824213940 1882723 -13125 4058 4059 \n",
".. ... ... ... ... ... \n",
"112 7954826320 1984919 3490 4125 4126 \n",
"113 5634834380 1998312 13394 4108 4109 \n",
"114 15503896450 1994915 -3398 4109 4110 \n",
"115 8500232870 1988628 -5587 4091 4092 \n",
"116 7757206650 1973544 -15099 4101 4102 \n",
"\n",
" close_bidp close_askp \n",
"0 4037 4038 \n",
"1 4042 4044 \n",
"2 4050 4051 \n",
"3 4058 4059 \n",
"4 4062 4063 \n",
".. ... ... \n",
"112 4106 4107 \n",
"113 4108 4109 \n",
"114 4084 4085 \n",
"115 4102 4103 \n",
"116 4098 4099 \n",
"\n",
"[117 rows x 14 columns]\n"
]
}
],
"source": [
"client = TakeData(username='240884432@qq.com', password='Zj123!@#')\n",
"data = client.get_data(\n",
" symbol='rb888',\n",
" start_date='2023-01-01',\n",
" end_date='2023-02-01',\n",
" kline_period='60M',\n",
" adjust_type=1\n",
")\n",
"print(data)\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "25c70609",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" symbol open high low close volume amount \\\n",
"datetime \n",
"2023-01-03 10:00:00 rb2305 4081 4081 4016 4037 737537 29782187220 \n",
"2023-01-03 11:00:00 rb2305 4038 4056 4037 4042 158548 6415696920 \n",
"2023-01-03 12:00:00 rb2305 4044 4054 4037 4051 67448 2728130300 \n",
"2023-01-03 14:00:00 rb2305 4055 4065 4045 4058 110181 4469698600 \n",
"2023-01-03 15:00:00 rb2305 4059 4074 4056 4063 167932 6824213940 \n",
"... ... ... ... ... ... ... ... \n",
"2023-02-01 12:00:00 rb2305 4126 4129 4105 4107 193291 7954826320 \n",
"2023-02-01 14:00:00 rb2305 4108 4117 4100 4109 137182 5634834380 \n",
"2023-02-01 15:00:00 rb2305 4109 4114 4075 4084 378930 15503896450 \n",
"2023-02-01 22:00:00 rb2305 4092 4104 4087 4103 207519 8500232870 \n",
"2023-02-01 23:00:00 rb2305 4102 4109 4075 4098 189724 7757206650 \n",
"\n",
" cumulative_openint openint open_bidp open_askp \\\n",
"datetime \n",
"2023-01-03 10:00:00 1883481 -48415 4081 4084 \n",
"2023-01-03 11:00:00 1887716 4235 4037 4038 \n",
"2023-01-03 12:00:00 1890125 2409 4043 4044 \n",
"2023-01-03 14:00:00 1895841 5723 4050 4051 \n",
"2023-01-03 15:00:00 1882723 -13125 4058 4059 \n",
"... ... ... ... ... \n",
"2023-02-01 12:00:00 1984919 3490 4125 4126 \n",
"2023-02-01 14:00:00 1998312 13394 4108 4109 \n",
"2023-02-01 15:00:00 1994915 -3398 4109 4110 \n",
"2023-02-01 22:00:00 1988628 -5587 4091 4092 \n",
"2023-02-01 23:00:00 1973544 -15099 4101 4102 \n",
"\n",
" close_bidp close_askp \n",
"datetime \n",
"2023-01-03 10:00:00 4037 4038 \n",
"2023-01-03 11:00:00 4042 4044 \n",
"2023-01-03 12:00:00 4050 4051 \n",
"2023-01-03 14:00:00 4058 4059 \n",
"2023-01-03 15:00:00 4062 4063 \n",
"... ... ... \n",
"2023-02-01 12:00:00 4106 4107 \n",
"2023-02-01 14:00:00 4108 4109 \n",
"2023-02-01 15:00:00 4084 4085 \n",
"2023-02-01 22:00:00 4102 4103 \n",
"2023-02-01 23:00:00 4098 4099 \n",
"\n",
"[117 rows x 13 columns]\n"
]
}
],
"source": [
"data.set_index(\"datetime\", inplace=True)\n",
"data.index = pd.to_datetime(data.index)\n",
"print(data)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,2 @@
1.ʹ<><CAB9><EFBFBD><EFBFBD><EFBFBD>ݿ<EFBFBD>ʾ<EFBFBD><CABE>.py<70><79>ȡ<EFBFBD><C8A1><EFBFBD>ݣ<EFBFBD>ÿ<EFBFBD><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD>3<EFBFBD><33>50<35>ֺ<EFBFBD><D6BA><EFBFBD><EFBFBD>µ<EFBFBD><C2B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݡ<EFBFBD>
2.<2E><><EFBFBD>³<EFBFBD><C2B3><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ϸ<EFBFBD><CFB8><EFBFBD><EFBFBD><EFBFBD><EFBFBD>е<EFBFBD>tick<63><6B><EFBFBD>ݺ<EFBFBD>1m<31><6D><EFBFBD><EFBFBD>

View File

@@ -0,0 +1,610 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"file_path_888 = r\"E:\\data\\data_rs_merged\\中金所\\IM888\\IM888_rs_2023.csv\"\n",
"df_888 = pd.read_csv(file_path_888, encoding='utf-8')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>main_contract</th>\n",
" <th>symbol</th>\n",
" <th>datetime</th>\n",
" <th>lastprice</th>\n",
" <th>volume</th>\n",
" <th>bid_p</th>\n",
" <th>ask_p</th>\n",
" <th>bid_v</th>\n",
" <th>ask_v</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>IM888</td>\n",
" <td>IM2301</td>\n",
" <td>2023-01-03 09:30:00.200</td>\n",
" <td>6280.0</td>\n",
" <td>46</td>\n",
" <td>6276.0</td>\n",
" <td>6277.0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>IM888</td>\n",
" <td>IM2301</td>\n",
" <td>2023-01-03 09:30:00.700</td>\n",
" <td>6277.0</td>\n",
" <td>61</td>\n",
" <td>6278.0</td>\n",
" <td>6278.8</td>\n",
" <td>1</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>IM888</td>\n",
" <td>IM2301</td>\n",
" <td>2023-01-03 09:30:01.200</td>\n",
" <td>6277.2</td>\n",
" <td>81</td>\n",
" <td>6277.2</td>\n",
" <td>6278.8</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>IM888</td>\n",
" <td>IM2301</td>\n",
" <td>2023-01-03 09:30:01.700</td>\n",
" <td>6277.8</td>\n",
" <td>90</td>\n",
" <td>6277.8</td>\n",
" <td>6278.6</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>IM888</td>\n",
" <td>IM2301</td>\n",
" <td>2023-01-03 09:30:02.200</td>\n",
" <td>6278.8</td>\n",
" <td>112</td>\n",
" <td>6278.8</td>\n",
" <td>6280.0</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" main_contract symbol datetime lastprice volume bid_p \\\n",
"0 IM888 IM2301 2023-01-03 09:30:00.200 6280.0 46 6276.0 \n",
"1 IM888 IM2301 2023-01-03 09:30:00.700 6277.0 61 6278.0 \n",
"2 IM888 IM2301 2023-01-03 09:30:01.200 6277.2 81 6277.2 \n",
"3 IM888 IM2301 2023-01-03 09:30:01.700 6277.8 90 6277.8 \n",
"4 IM888 IM2301 2023-01-03 09:30:02.200 6278.8 112 6278.8 \n",
"\n",
" ask_p bid_v ask_v \n",
"0 6277.0 1 3 \n",
"1 6278.8 1 16 \n",
"2 6278.8 1 5 \n",
"3 6278.6 3 4 \n",
"4 6280.0 1 7 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_888.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# 重命名列以便处理\n",
"# df_888.rename(columns={'时间': 'datetime', '最新': 'price', '成交量': 'volume'}, inplace=True)\n",
"df_888.rename(columns={'datetime': 'datetime', 'lastprice': 'price', 'volume': 'volume'}, inplace=True)\n",
"\n",
"# 确保datetime列是datetime类型\n",
"df_888['datetime'] = pd.to_datetime(df_888['datetime'])\n",
"\n",
"# 设置datetime列为索引\n",
"df_888.set_index('datetime', inplace=True)\n",
"\n",
"# 使用resample方法将数据重新采样为1分钟数据\n",
"df_resampled = df_888.resample('1T').agg({\n",
" 'price': ['first', 'max', 'min', 'last'],\n",
" 'volume': 'sum'\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"4\" halign=\"left\">price</th>\n",
" <th>volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>first</th>\n",
" <th>max</th>\n",
" <th>min</th>\n",
" <th>last</th>\n",
" <th>sum</th>\n",
" </tr>\n",
" <tr>\n",
" <th>datetime</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2023-01-03 09:30:00</th>\n",
" <td>6280.0</td>\n",
" <td>6306.4</td>\n",
" <td>6277.0</td>\n",
" <td>6302.0</td>\n",
" <td>66894</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2023-01-03 09:31:00</th>\n",
" <td>6302.0</td>\n",
" <td>6320.0</td>\n",
" <td>6302.0</td>\n",
" <td>6318.8</td>\n",
" <td>172512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2023-01-03 09:32:00</th>\n",
" <td>6319.8</td>\n",
" <td>6328.0</td>\n",
" <td>6314.8</td>\n",
" <td>6314.8</td>\n",
" <td>238716</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2023-01-03 09:33:00</th>\n",
" <td>6313.0</td>\n",
" <td>6325.0</td>\n",
" <td>6310.4</td>\n",
" <td>6312.4</td>\n",
" <td>297675</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2023-01-03 09:34:00</th>\n",
" <td>6311.0</td>\n",
" <td>6323.2</td>\n",
" <td>6311.0</td>\n",
" <td>6319.4</td>\n",
" <td>352184</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" price volume\n",
" first max min last sum\n",
"datetime \n",
"2023-01-03 09:30:00 6280.0 6306.4 6277.0 6302.0 66894\n",
"2023-01-03 09:31:00 6302.0 6320.0 6302.0 6318.8 172512\n",
"2023-01-03 09:32:00 6319.8 6328.0 6314.8 6314.8 238716\n",
"2023-01-03 09:33:00 6313.0 6325.0 6310.4 6312.4 297675\n",
"2023-01-03 09:34:00 6311.0 6323.2 6311.0 6319.4 352184"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_resampled.head()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'IM888'"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_888['main_contract'][1]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# df_resampled['symbol'] = df_888['main_contract'][1]\n",
"df_resampled.insert(0, 'symbol', df_888['main_contract'][1])"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th>symbol</th>\n",
" <th colspan=\"4\" halign=\"left\">price</th>\n",
" <th>volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>first</th>\n",
" <th>max</th>\n",
" <th>min</th>\n",
" <th>last</th>\n",
" <th>sum</th>\n",
" </tr>\n",
" <tr>\n",
" <th>datetime</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2023-01-03 09:30:00</th>\n",
" <td>IM888</td>\n",
" <td>6280.0</td>\n",
" <td>6306.4</td>\n",
" <td>6277.0</td>\n",
" <td>6302.0</td>\n",
" <td>66894</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2023-01-03 09:31:00</th>\n",
" <td>IM888</td>\n",
" <td>6302.0</td>\n",
" <td>6320.0</td>\n",
" <td>6302.0</td>\n",
" <td>6318.8</td>\n",
" <td>172512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2023-01-03 09:32:00</th>\n",
" <td>IM888</td>\n",
" <td>6319.8</td>\n",
" <td>6328.0</td>\n",
" <td>6314.8</td>\n",
" <td>6314.8</td>\n",
" <td>238716</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2023-01-03 09:33:00</th>\n",
" <td>IM888</td>\n",
" <td>6313.0</td>\n",
" <td>6325.0</td>\n",
" <td>6310.4</td>\n",
" <td>6312.4</td>\n",
" <td>297675</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2023-01-03 09:34:00</th>\n",
" <td>IM888</td>\n",
" <td>6311.0</td>\n",
" <td>6323.2</td>\n",
" <td>6311.0</td>\n",
" <td>6319.4</td>\n",
" <td>352184</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" symbol price volume\n",
" first max min last sum\n",
"datetime \n",
"2023-01-03 09:30:00 IM888 6280.0 6306.4 6277.0 6302.0 66894\n",
"2023-01-03 09:31:00 IM888 6302.0 6320.0 6302.0 6318.8 172512\n",
"2023-01-03 09:32:00 IM888 6319.8 6328.0 6314.8 6314.8 238716\n",
"2023-01-03 09:33:00 IM888 6313.0 6325.0 6310.4 6312.4 297675\n",
"2023-01-03 09:34:00 IM888 6311.0 6323.2 6311.0 6319.4 352184"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_resampled.head()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"# 重命名列名以符合K线数据的标准命名\n",
"df_resampled.columns = ['open', 'high', 'low', 'close', 'volume', 'symbol']"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>open</th>\n",
" <th>high</th>\n",
" <th>low</th>\n",
" <th>close</th>\n",
" <th>volume</th>\n",
" <th>symbol</th>\n",
" </tr>\n",
" <tr>\n",
" <th>datetime</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2023-01-03 09:30:00</th>\n",
" <td>IM888</td>\n",
" <td>6280.0</td>\n",
" <td>6306.4</td>\n",
" <td>6277.0</td>\n",
" <td>6302.0</td>\n",
" <td>66894</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2023-01-03 09:31:00</th>\n",
" <td>IM888</td>\n",
" <td>6302.0</td>\n",
" <td>6320.0</td>\n",
" <td>6302.0</td>\n",
" <td>6318.8</td>\n",
" <td>172512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2023-01-03 09:32:00</th>\n",
" <td>IM888</td>\n",
" <td>6319.8</td>\n",
" <td>6328.0</td>\n",
" <td>6314.8</td>\n",
" <td>6314.8</td>\n",
" <td>238716</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2023-01-03 09:33:00</th>\n",
" <td>IM888</td>\n",
" <td>6313.0</td>\n",
" <td>6325.0</td>\n",
" <td>6310.4</td>\n",
" <td>6312.4</td>\n",
" <td>297675</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2023-01-03 09:34:00</th>\n",
" <td>IM888</td>\n",
" <td>6311.0</td>\n",
" <td>6323.2</td>\n",
" <td>6311.0</td>\n",
" <td>6319.4</td>\n",
" <td>352184</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" open high low close volume symbol\n",
"datetime \n",
"2023-01-03 09:30:00 IM888 6280.0 6306.4 6277.0 6302.0 66894\n",
"2023-01-03 09:31:00 IM888 6302.0 6320.0 6302.0 6318.8 172512\n",
"2023-01-03 09:32:00 IM888 6319.8 6328.0 6314.8 6314.8 238716\n",
"2023-01-03 09:33:00 IM888 6313.0 6325.0 6310.4 6312.4 297675\n",
"2023-01-03 09:34:00 IM888 6311.0 6323.2 6311.0 6319.4 352184"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_resampled.head()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1分钟历史数据已保存至E:\\data\\data_rs_merged\\中金所\\IM888\\IM888_rs_2023_1min.csv\n"
]
}
],
"source": [
"# 删除存在NA值的行如果有的时间段没有交易数据\n",
"df_resampled.dropna(inplace=True)\n",
"# df_resampled['symbol'] = df_888['统一代码']\n",
"# df_resampled.insert(loc=0, column='main_contract', value=df_888['main_contract'])\n",
"# df_resampled['symbol'] = df_888['main_contract']\n",
"# 将重新采样的数据写入新的CSV文件\n",
"output_file = r\"E:\\data\\data_rs_merged\\中金所\\IM888\\IM888_rs_2023_1min.csv\"\n",
"df_resampled.to_csv(output_file)\n",
"\n",
"print(f'1分钟历史数据已保存至{output_file}')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,33 @@
import pandas as pd
# 读取上传的CSV文件
file_path = 'C:/Users/zhouj/Desktop/a次主力连续_20190103.csv'
df = pd.read_csv(file_path, encoding='gbk')
# 重命名列以便处理
df.rename(columns={'时间': 'datetime', '最新': 'price', '成交量': 'volume'}, inplace=True)
# 确保datetime列是datetime类型
df['datetime'] = pd.to_datetime(df['datetime'])
# 设置datetime列为索引
df.set_index('datetime', inplace=True)
# 使用resample方法将数据重新采样为1分钟数据
df_resampled = df.resample('1T').agg({
'price': ['first', 'max', 'min', 'last'],
'volume': 'sum'
})
# 重命名列名以符合K线数据的标准命名
df_resampled.columns = ['open', 'high', 'low', 'close', 'volume']
# 删除存在NA值的行如果有的时间段没有交易数据
df_resampled.dropna(inplace=True)
# 将重新采样的数据写入新的CSV文件
output_file = 'C:/Users/zhouj/Desktop/tic_data_1min.csv'
df_resampled.to_csv(output_file)
print(f'1分钟历史数据已保存至{output_file}')

View File

@@ -0,0 +1,135 @@
from multiprocessing import Process
from datetime import datetime
from vnpy.trader.database import BarOverview
from vnpy.trader.datafeed import get_datafeed
from vnpy.trader.database import get_database
from vnpy.trader.object import BarData, HistoryRequest
from vnpy.trader.constant import Exchange, Interval
import re
# 交易所映射关系
EXCHANGE_XT2VT = {
"SH": Exchange.SSE,
"SZ": Exchange.SZSE,
"BJ": Exchange.BSE,
"SF": Exchange.SHFE,
"IF": Exchange.CFFEX,
"INE": Exchange.INE,
"DF": Exchange.DCE,
"ZF": Exchange.CZCE,
"GF": Exchange.GFEX
}
# 开始查询时间
START_TIME = datetime(2018, 1, 1)
def update_history_data() -> None:
"""更新历史合约信息"""
# 在子进程中加载xtquant
from xtquant.xtdata import download_history_data
# 初始化数据服务
datafeed = get_datafeed()
datafeed.init()
# 下载历史合约信息
download_history_data("", "historycontract")
print("xtquant历史合约信息下载完成")
def update_bar_data(
sector_name: str,
interval: Interval = Interval.MINUTE
) -> None:
"""更新K线数据"""
# 在子进程中加载xtquant
from xtquant.xtdata import (
get_stock_list_in_sector,
get_instrument_detail
)
# 初始化数据服务
datafeed = get_datafeed()
datafeed.init()
# 连接数据库
database = get_database()
# 获取当前时间戳
now: datetime = datetime.now()
# 获取本地已有数据汇总
data: list[BarOverview] = database.get_bar_overview()
overviews: dict[str, BarOverview] = {}
for o in data:
vt_symbol: str = f"{o.symbol}.{o.exchange.value}"
overviews[vt_symbol] = o
# 查询交易所历史合约代码
xt_symbols: list[str] = get_stock_list_in_sector(sector_name)
# 遍历列表查询合约信息
for xt_symbol in xt_symbols:
# 查询合约信息
data: dict = get_instrument_detail(xt_symbol, True)
# 获取合约到期时间
expiry: datetime = None
if data["ExpireDate"]:
expiry = datetime.strptime(data["ExpireDate"], "%Y%m%d")
# 拆分迅投研代码
symbol, xt_exchange = xt_symbol.split(".")
symbol_main = re.split(r'(\d+)', symbol)[0]
# 生成本地代码
exchange: Exchange = EXCHANGE_XT2VT[xt_exchange]
vt_symbol: str = f"{symbol_main}+'JQ00'.{exchange.value}" or f"{symbol_main}+'00'.{exchange.value}"
# 查询数据汇总
overview: BarOverview = overviews.get(vt_symbol, None)
# 如果已经到期,则跳过
if overview and expiry and expiry < now:
continue
# 实现增量查询
start: datetime = START_TIME
if overview:
start = overview.end
# 执行数据查询和更新入库
req: HistoryRequest = HistoryRequest(
symbol=symbol,
exchange=exchange,
start=start,
end=now,
interval=interval
)
bars: list[BarData] = datafeed.query_bar_history(req)
if bars:
database.save_bar_data(bars)
start_dt: datetime = bars[0].datetime
end_dt: datetime = bars[-1].datetime
msg: str = f"{vt_symbol}数据更新成功,{start_dt} - {end_dt}"
print(msg)
if __name__ == "__main__":
# 使用子进程更新历史合约信息
process: Process = Process(target=update_history_data)
process.start()
process.join() # 等待子进程执行完成
# 更新历史数据
update_bar_data("上期所")
update_bar_data("过期上期所")

View File

@@ -0,0 +1,184 @@
from multiprocessing import Process
from datetime import datetime
from vnpy.trader.database import BarOverview
from vnpy.trader.datafeed import get_datafeed
from vnpy.trader.object import ContractData, BarData, HistoryRequest
from vnpy.trader.constant import Exchange, Product, OptionType, Interval
from vnpy.trader.setting import SETTINGS
from elite_database import EliteDatabase
# 配置迅投研数据服务
SETTINGS["datafeed.name"] = "xt"
SETTINGS["datafeed.username"] = "token"
SETTINGS["datafeed.password"] = ""
# 交易所映射关系
EXCHANGE_XT2VT = {
"SH": Exchange.SSE,
"SZ": Exchange.SZSE,
"BJ": Exchange.BSE,
"SF": Exchange.SHFE,
"IF": Exchange.CFFEX,
"INE": Exchange.INE,
"DF": Exchange.DCE,
"ZF": Exchange.CZCE,
"GF": Exchange.GFEX
}
def update_history_data() -> None:
"""更新历史合约信息"""
# 在子进程中加载xtquant
from xtquant.xtdata import download_history_data
# 初始化数据服务
datafeed = get_datafeed()
datafeed.init()
# 下载历史合约信息
download_history_data("", "historycontract")
print("xtquant历史合约信息下载完成")
def update_contract_data(sector_name: str) -> None:
"""更新合约数据"""
# 在子进程中加载xtquant
from xtquant.xtdata import (
get_stock_list_in_sector,
get_instrument_detail
)
# 初始化数据服务
datafeed = get_datafeed()
datafeed.init()
# 查询中金所历史合约代码
vt_symbols: list[str] = get_stock_list_in_sector(sector_name)
# 遍历列表查询合约信息
contracts: list[ContractData] = []
for xt_symbol in vt_symbols:
# 拆分XT代码
symbol, xt_exchange = xt_symbol.split(".")
# 筛选期权合约合约
if "-" in symbol:
data: dict = get_instrument_detail(xt_symbol, True)
type_str = data["InstrumentID"].split("-")[1]
if type_str == "C":
option_type = OptionType.CALL
elif type_str == "P":
option_type = OptionType.PUT
option_underlying: str = data["InstrumentID"].split("-")[0]
contract: ContractData = ContractData(
symbol=data["InstrumentID"],
exchange=EXCHANGE_XT2VT[xt_exchange.replace("O", "")],
name=data["InstrumentName"],
product=Product.OPTION,
size=data["VolumeMultiple"],
pricetick=data["PriceTick"],
min_volume=data["MinLimitOrderVolume"],
option_strike=data["ExtendInfo"]["OptExercisePrice"],
option_listed=datetime.strptime(data["OpenDate"], "%Y%m%d"),
option_expiry=datetime.strptime(data["ExpireDate"], "%Y%m%d"),
option_underlying=option_underlying,
option_portfolio=data["ProductID"],
option_index=str(data["ExtendInfo"]["OptExercisePrice"]),
option_type=option_type,
gateway_name="XT"
)
contracts.append(contract)
# 保存合约信息到数据库
database: EliteDatabase = EliteDatabase()
database.save_contract_data(contracts)
print("合约信息更新成功", len(contracts))
def update_bar_data() -> None:
"""更新K线数据"""
# 初始化数据服务
datafeed = get_datafeed()
datafeed.init()
# 获取当前时间戳
now: datetime = datetime.now()
# 获取合约信息
database: EliteDatabase = EliteDatabase()
contracts: list[ContractData] = database.load_contract_data()
# 获取数据汇总
data: list[BarOverview] = database.get_bar_overview()
overviews: dict[str, BarOverview] = {}
for o in data:
# 只保留分钟线数据
if o.interval != Interval.MINUTE:
continue
vt_symbol: str = f"{o.symbol}.{o.exchange.value}"
overviews[vt_symbol] = o
# 遍历所有合约信息
for contract in contracts:
# 如果没有到期时间,则跳过
if not contract.option_expiry:
continue
# 查询数据汇总
overview: BarOverview = overviews.get(contract.vt_symbol, None)
# 如果已经到期,则跳过
if overview and contract.option_expiry < now:
continue
# 初始化查询开始的时间
start: datetime = datetime(2018, 1, 1)
# 实现增量查询
if overview:
start = overview.end
# 执行数据查询和更新入库
req: HistoryRequest = HistoryRequest(
symbol=contract.symbol,
exchange=contract.exchange,
start=start,
end=datetime.now(),
interval=Interval.MINUTE
)
bars: list[BarData] = datafeed.query_bar_history(req)
if bars:
database.save_bar_data(bars)
start_dt: datetime = bars[0].datetime
end_dt: datetime = bars[-1].datetime
msg: str = f"{contract.vt_symbol}数据更新成功,{start_dt} - {end_dt}"
print(msg)
if __name__ == "__main__":
# 使用子进程更新历史合约信息
process: Process = Process(target=update_history_data)
process.start()
process.join() # 等待子进程执行完成
# 更新合约信息
update_contract_data("中金所")
update_contract_data("过期中金所")
# 更新历史数据
# update_bar_data()

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,342 @@
import pandas as pd
import os
from datetime import time as s_time
from datetime import datetime
import chardet
import numpy as np
# 日盘商品期货交易品种
commodity_day_dict = {'bb': s_time(15,00), 'jd': s_time(15,00), 'lh': s_time(15,00), 'l': s_time(15,00), 'fb': s_time(15,00), 'ec': s_time(15,00),
'AP': s_time(15,00), 'CJ': s_time(15,00), 'JR': s_time(15,00), 'LR': s_time(15,00), 'RS': s_time(15,00), 'PK': s_time(15,00),
'PM': s_time(15,00), 'PX': s_time(15,00), 'RI': s_time(15,00), 'SF': s_time(15,00), 'SM': s_time(15,00), 'UR': s_time(15,00),
'WH': s_time(15,00), 'ao': s_time(15,00), 'br': s_time(15,00), 'wr': s_time(15,00),}
# 夜盘商品期货交易品种
commodity_night_dict = {'sc': s_time(2,30), 'bc': s_time(1,0), 'lu': s_time(23,0), 'nr': s_time(23,0),'au': s_time(2,30), 'ag': s_time(2,30),
'ss': s_time(1,0), 'sn': s_time(1,0), 'ni': s_time(1,0), 'pb': s_time(1,0),'zn': s_time(1,0), 'al': s_time(1,0), 'cu': s_time(1,0),
'ru': s_time(23,0), 'rb': s_time(23,0), 'hc': s_time(23,0), 'fu': s_time(23,0), 'bu': s_time(23,0), 'sp': s_time(23,0),
'PF': s_time(23,0), 'SR': s_time(23,0), 'CF': s_time(23,0), 'CY': s_time(23,0), 'RM': s_time(23,0), 'MA': s_time(23,0),
'TA': s_time(23,0), 'ZC': s_time(23,0), 'FG': s_time(23,0), 'OI': s_time(23,0), 'SA': s_time(23,0),
'p': s_time(23,0), 'j': s_time(23,0), 'jm': s_time(23,0), 'i': s_time(23,0), 'l': s_time(23,0), 'v': s_time(23,0),
'pp': s_time(23,0), 'eg': s_time(23,0), 'c': s_time(23,0), 'cs': s_time(23,0), 'y': s_time(23,0), 'm': s_time(23,0),
'a': s_time(23,0), 'b': s_time(23,0), 'rr': s_time(23,0), 'eb': s_time(23,0), 'pg': s_time(23,0), 'SH': s_time(23,00)}
# 金融期货交易品种
financial_time_dict = {'IH': s_time(15,00), 'IF': s_time(15,00), 'IC': s_time(15,00), 'IM': s_time(15,00),'T': s_time(15,00), 'TS': s_time(15,00),
'TF': s_time(15,00), 'TL': s_time(15,00)}
# 所有已列入的筛选品种
all_dict = {k: v for d in [commodity_day_dict, commodity_night_dict, financial_time_dict] for k, v in d.items()}
def split_alpha_numeric(string):
alpha_chars = ""
numeric_chars = ""
for char in string:
if char.isalpha():
alpha_chars += char
elif char.isdigit():
numeric_chars += char
return alpha_chars, numeric_chars
def merged_old_tickdata(merged_up_df, sp_char, alpha_chars, code_value):
# merged_up_df = pd.DataFrame()
# merged_up_df,alpha_chars,code_value = merged_old_unprocessed_tickdata(all_csv_files, sp_char)
while alpha_chars not in all_dict.keys():
print("%s期货品种未列入所有筛选条件中!!!"%(code_value))
continue
merged_df = pd.DataFrame()
merged_df =pd.DataFrame({'main_contract':merged_up_df['统一代码'],'symbol':merged_up_df['合约代码'],'datetime':merged_up_df['时间'],'lastprice':merged_up_df['最新'],'volume':merged_up_df['成交量'],
'bid_p':merged_up_df['买一价'],'ask_p':merged_up_df['卖一价'],'bid_v':merged_up_df['买一量'],'ask_v':merged_up_df['卖一量']})
del merged_up_df
merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])
merged_df['tmp_time'] = merged_df['datetime'].dt.strftime('%H:%M:%S.%f')
merged_df['time'] = merged_df['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time
del merged_df['tmp_time']
merged_df = filter_tickdata_time(merged_df, alpha_chars)
del merged_df['time']
merged_df['datetime'] = sorted(merged_df['datetime'])
print("%s%s数据生成成功!"%(code_value,sp_char))
return merged_df
def merged_new_tickdata(merged_up_df, sp_char, alpha_chars, code_value):
# merged_up_df = pd.DataFrame()
# merged_up_df,alpha_chars,code_value = merged_new_unprocessed_tickdata(all_csv_files, sp_char)
while alpha_chars not in all_dict.keys():
print("%s期货品种未列入所有筛选条件中!!!"%(code_value))
continue
#日期修正
# merged_df['业务日期'] = pd.to_datetime(merged_df['业务日期'])
# merged_df['业务日期'] = merged_df['业务日期'].dt.strftime('%Y-%m-%d')
# merged_df['最后修改时间'] = pd.to_datetime(merged_df['最后修改时间'])
merged_up_df['datetime'] = merged_up_df['业务日期'].astype(str) + ' '+merged_up_df['最后修改时间'].astype(str) + '.' + merged_up_df['最后修改毫秒'].astype(str) # merged_df['最后修改时间'].dt.time.astype(str)
# 将'datetime' 列的数据类型更改为 datetime 格式如果数据转换少8个小时可以用timedelta处理
merged_up_df['datetime'] = pd.to_datetime(merged_up_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
#计算瞬时成交量
merged_up_df['volume'] = merged_up_df['数量'] - merged_up_df['数量'].shift(1)
merged_up_df['volume'] = merged_up_df['volume'].fillna(0)
merged_df = pd.DataFrame()
merged_df =pd.DataFrame({'main_contract':merged_up_df['统一代码'],'symbol':merged_up_df['合约代码'],'datetime':merged_up_df['datetime'],'lastprice':merged_up_df['最新价'],'volume':merged_up_df['volume'],
'bid_p':merged_up_df['申买价一'],'ask_p':merged_up_df['申卖价一'],'bid_v':merged_up_df['申买量一'],'ask_v':merged_up_df['申卖量一']})
del merged_up_df
# merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])
merged_df['tmp_time'] = merged_df['datetime'].dt.strftime('%H:%M:%S.%f')
merged_df['time'] = merged_df['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time
del merged_df['tmp_time']
merged_df = filter_tickdata_time(merged_df, alpha_chars)
del merged_df['time']
# merged_df['datetime'] = sorted(merged_df['datetime'])
sorted_merged_df = merged_df.sort_values(by = ['datetime'], inplace=True)
print("%s%s数据生成成功!"%(code_value,sp_char))
return merged_df
def filter_tickdata_time(filter_df, alpha_chars):
if alpha_chars in financial_time_dict.keys():
drop_index1 = pd.DataFrame().index
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 0, 0, 000000))].index
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 30, 0, 000000))].index
drop_index4 = pd.DataFrame().index
print("按照中金所交易时间筛选金融期货品种")
elif alpha_chars in commodity_night_dict.keys():
if commodity_night_dict[alpha_chars] == s_time(23,00):
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(23, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为23:00筛选商品期货品种")
elif commodity_night_dict[alpha_chars] == s_time(1,00):
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(1, 0, 0, 000000)) & (filter_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为1:00筛选商品期货品种")
elif commodity_night_dict[alpha_chars] == s_time(2,30):
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(2, 30, 0, 000000)) & (filter_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为2:30筛选商品期货品种")
else:
print("夜盘截止交易时间未设置或者设置错误!!!")
elif alpha_chars in commodity_day_dict.keys():
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 0, 0, 000000))].index
drop_index4 = pd.DataFrame().index
print("按照无夜盘筛选商品期货品种")
else:
print("%s期货品种未执行时间筛选中!!!"%(alpha_chars))
# 清理不在交易时间段的数据
# 数据清理
filter_df.drop(labels=drop_index1, axis=0, inplace=True)
filter_df.drop(drop_index2, axis=0, inplace=True)
filter_df.drop(drop_index3, axis=0, inplace=True)
filter_df.drop(drop_index4, axis=0, inplace=True)
return filter_df
def insert_main_contract(df):
# 添加主力连续的合约代码主力连续为888指数连续可以用999次主力连续可以使用889表头用“统一代码”
alpha_chars, numeric_chars = split_alpha_numeric(df.loc[0,'合约代码'])
code_value = alpha_chars + "889"
print("code_value characters:", code_value)
df.insert(loc=0,column="统一代码", value=code_value)
return df, alpha_chars, code_value
def merged_old_unprocessed_tickdata(all_csv_files, sp_char):
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
print("csv_files:", csv_files)
merged_up_df = pd.DataFrame()
dir = os.getcwd()
fileNum_errors = 0
# 循环遍历每个csv文件
for file in csv_files:
try:
# 读取csv文件并使用第一行为列标题编译不通过可以改为gbk
df = pd.read_csv(file,
header=0,
# usecols=[ 1, 2, 3, 7, 12, 13, 14, 15],
# names=[
# "合约代码",
# "时间",
# "最新",
# "成交量",
# "买一价",
# "卖一价",
# "买一量",
# "卖一量",
# ],
encoding='gbk',
low_memory= False,
# skiprows=0,
# parse_dates=['时间'] # 注意此处增加的排序,为了后面按时间排序
)
except:
file_path = os.path.join(dir, file)
fileNum_errors += 1
with open(file_path, 'rb') as file:
data = file.read()
# 使用chardet检测编码
detected_encoding = chardet.detect(data)['encoding']
# print("%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(file,detected_encoding,fileNum_errors))
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors))
with open('output_error.txt', 'a') as f:
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors), file = f)
# 删除重复行
df.drop_duplicates(inplace=True)
# 将数据合并到新的DataFrame中
merged_up_df = pd.concat([merged_up_df, df], ignore_index=True)
# 删除重复列
merged_up_df.drop_duplicates(subset=merged_up_df.columns.tolist(), inplace=True)
# 重置行索引
merged_up_df.reset_index(inplace=True, drop=True)
merged_up_df,alpha_chars,code_value = insert_main_contract(merged_up_df)
# 打印提示信息
# print("按年份未处理的CSV文件合并成功")
return merged_up_df,alpha_chars,code_value
def merged_new_unprocessed_tickdata(all_csv_files, sp_char):
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
print("csv_files:", csv_files)
merged_up_df = pd.DataFrame()
dir = os.getcwd()
fileNum_errors = 0
# 循环遍历每个csv文件
for file in csv_files:
try:
# 读取csv文件并使用第一行为列标题编译不通过可以改为gbk
df = pd.read_csv(
file,
header=0,
# usecols=[0, 1, 4, 11, 20, 21, 22, 23, 24, 25, 43],
# names=[
# "交易日",
# "合约代码",
# "最新价",
# "数量",
# "最后修改时间",
# "最后修改毫秒",
# "申买价一",
# "申买量一",
# "申卖价一",
# "申卖量一",
# "业务日期",
# ],
encoding='gbk',
low_memory= False,
# skiprows=0,
# parse_dates=['业务日期','最后修改时间','最后修改毫秒'] # 注意此处增加的排序,为了后面按时间排序
)
except:
file_path = os.path.join(dir, file)
fileNum_errors += 1
with open(file_path, 'rb') as file:
data = file.read()
# 使用chardet检测编码
detected_encoding = chardet.detect(data)['encoding']
# print("%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(file_path,detected_encoding,fileNum_errors))
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors))
with open('output_error.txt', 'a') as f:
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors), file = f)
# 删除重复行
df.drop_duplicates(inplace=True)
# 将数据合并到新的DataFrame中
merged_up_df = pd.concat([merged_up_df, df], ignore_index=True)
# 删除重复列
merged_up_df.drop_duplicates(subset=merged_up_df.columns.tolist(), inplace=True)
# 重置行索引
merged_up_df.reset_index(inplace=True, drop=True)
merged_up_df,alpha_chars,code_value = insert_main_contract(merged_up_df)
# 打印提示信息
# print("按年份未处理的CSV文件合并成功")
return merged_up_df,alpha_chars,code_value
def reinstatement_tickdata(merged_rs_df):
merged_rs_df['main_contract'] = merged_rs_df['main_contract'].astype(str)
merged_rs_df['symbol'] = merged_rs_df['symbol'].astype(str)
merged_rs_df['datetime'] = pd.to_datetime(merged_rs_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
# merged_rs_df['lastprice'] = merged_rs_df['lastprice'].astype(float)
merged_rs_df['volume'] = merged_rs_df['volume'].astype(int)
# merged_rs_df['bid_p'] = merged_rs_df['bid_p'].astype(float)
# merged_rs_df['ask_p'] = merged_rs_df['ask_p'].astype(float)
merged_rs_df['bid_v'] = merged_rs_df['bid_v'].astype(int)
merged_rs_df['ask_v'] = merged_rs_df['ask_v'].astype(int)
# 等比复权,先不考虑
# df['复权因子'] = df['卖一价'].shift() / df['买一价']
# df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() / df['买一价'], 1)
# df['复权因子'] = df['复权因子'].fillna(1)
# df['买一价_adj'] = df['买一价'] * df['复权因子'].cumprod()
# df['卖一价_adj'] = df['卖一价'] * df['复权因子'].cumprod()
# df['最新_adj'] = df['最新'] * df['复权因子'].cumprod()
# 等差复权
merged_rs_df['复权因子'] = np.where(merged_rs_df['symbol'] != merged_rs_df['symbol'].shift(), merged_rs_df['ask_p'].shift() - merged_rs_df['bid_p'], 0)
merged_rs_df['复权因子'] = merged_rs_df['复权因子'].fillna(0)
merged_rs_df['bid_p_adj'] = merged_rs_df['bid_p'] + merged_rs_df['复权因子'].cumsum()
merged_rs_df['ask_p_adj'] = merged_rs_df['ask_p'] + merged_rs_df['复权因子'].cumsum()
merged_rs_df['lastprice_adj'] = merged_rs_df['lastprice'] + merged_rs_df['复权因子'].cumsum()
# 将调整后的数值替换原来的值
merged_rs_df['bid_p'] = merged_rs_df['bid_p_adj'].round(4)
merged_rs_df['ask_p'] = merged_rs_df['ask_p_adj'].round(4)
merged_rs_df['lastprice'] = merged_rs_df['lastprice_adj'].round(4)
# 删除多余的值
del merged_rs_df['复权因子']
del merged_rs_df['bid_p_adj']
del merged_rs_df['ask_p_adj']
del merged_rs_df['lastprice_adj']
return merged_rs_df
# def find_files(all_csv_files):
# all_csv_files = sorted(all_csv_files)
# sp_old_chars = ['_2019','_2020','_2021']
# sp_old_chars = sorted(sp_old_chars)
# sp_new_chars = ['_2022','_2023']
# sp_new_chars = sorted(sp_new_chars)
# csv_old_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_old_chars)]
# csv_new_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_new_chars)]
# return csv_old_files, csv_new_files

View File

@@ -0,0 +1,174 @@
import pandas as pd
import os
from datetime import time as s_time
from datetime import datetime
import chardet
import numpy as np
# 日盘商品期货交易品种
commodity_day_dict = {'bb': s_time(15,00), 'jd': s_time(15,00), 'lh': s_time(15,00), 'l': s_time(15,00), 'fb': s_time(15,00), 'ec': s_time(15,00),
'AP': s_time(15,00), 'CJ': s_time(15,00), 'JR': s_time(15,00), 'LR': s_time(15,00), 'RS': s_time(15,00), 'PK': s_time(15,00),
'PM': s_time(15,00), 'PX': s_time(15,00), 'RI': s_time(15,00), 'SF': s_time(15,00), 'SM': s_time(15,00), 'UR': s_time(15,00),
'WH': s_time(15,00), 'ao': s_time(15,00), 'br': s_time(15,00), 'wr': s_time(15,00),}
# 夜盘商品期货交易品种
commodity_night_dict = {'sc': s_time(2,30), 'bc': s_time(1,0), 'lu': s_time(23,0), 'nr': s_time(23,0),'au': s_time(2,30), 'ag': s_time(2,30),
'ss': s_time(1,0), 'sn': s_time(1,0), 'ni': s_time(1,0), 'pb': s_time(1,0),'zn': s_time(1,0), 'al': s_time(1,0), 'cu': s_time(1,0),
'ru': s_time(23,0), 'rb': s_time(23,0), 'hc': s_time(23,0), 'fu': s_time(23,0), 'bu': s_time(23,0), 'sp': s_time(23,0),
'PF': s_time(23,0), 'SR': s_time(23,0), 'CF': s_time(23,0), 'CY': s_time(23,0), 'RM': s_time(23,0), 'MA': s_time(23,0),
'TA': s_time(23,0), 'ZC': s_time(23,0), 'FG': s_time(23,0), 'OI': s_time(23,0), 'SA': s_time(23,0),
'p': s_time(23,0), 'j': s_time(23,0), 'jm': s_time(23,0), 'i': s_time(23,0), 'l': s_time(23,0), 'v': s_time(23,0),
'pp': s_time(23,0), 'eg': s_time(23,0), 'c': s_time(23,0), 'cs': s_time(23,0), 'y': s_time(23,0), 'm': s_time(23,0),
'a': s_time(23,0), 'b': s_time(23,0), 'rr': s_time(23,0), 'eb': s_time(23,0), 'pg': s_time(23,0), 'SH': s_time(23,00)}
# 金融期货交易品种
financial_time_dict = {'IH': s_time(15,00), 'IF': s_time(15,00), 'IC': s_time(15,00), 'IM': s_time(15,00),'T': s_time(15,15), 'TS': s_time(15,15),
'TF': s_time(15,15), 'TL': s_time(15,15)}
# 所有已列入的筛选品种
all_dict = {k: v for d in [commodity_day_dict, commodity_night_dict, financial_time_dict] for k, v in d.items()}
def split_alpha_numeric(string):
alpha_chars = ""
numeric_chars = ""
for char in string:
if char.isalpha():
alpha_chars += char
elif char.isdigit():
numeric_chars += char
return alpha_chars, numeric_chars
def merged_new_tickdata(merged_up_df, alpha_chars):
merged_up_df['datetime'] = merged_up_df['交易日'].astype(str) + ' '+merged_up_df['最后修改时间'].astype(str) + '.' + merged_up_df['最后修改毫秒'].astype(str) # merged_df['最后修改时间'].dt.time.astype(str)
# 将'datetime' 列的数据类型更改为 datetime 格式如果数据转换少8个小时可以用timedelta处理
merged_up_df['datetime'] = pd.to_datetime(merged_up_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
#计算瞬时成交量
merged_up_df['volume'] = merged_up_df['数量'] - merged_up_df['数量'].shift(1)
merged_up_df['volume'] = merged_up_df['volume'].fillna(0)
merged_df = pd.DataFrame()
merged_df =pd.DataFrame({'main_contract':merged_up_df['统一代码'],'symbol':merged_up_df['合约代码'],'datetime':merged_up_df['datetime'],'lastprice':merged_up_df['最新价'],'volume':merged_up_df['数量'],
'bid_p':merged_up_df['申买价一'],'ask_p':merged_up_df['申卖价一'],'bid_v':merged_up_df['申买量一'],'ask_v':merged_up_df['申卖量一']})
del merged_up_df
# merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])
merged_df['tmp_time'] = merged_df['datetime'].dt.strftime('%H:%M:%S.%f')
merged_df['time'] = merged_df['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time
del merged_df['tmp_time']
merged_df = filter_tickdata_time(merged_df, alpha_chars)
del merged_df['time']
# merged_df['datetime'] = sorted(merged_df['datetime'])
sorted_merged_df = merged_df.sort_values(by = ['datetime'], inplace=True)
# print("%s%s数据生成成功!"%(code_value,sp_char))
return merged_df
def filter_tickdata_time(filter_df, alpha_chars):
# 由于落到本地的时间有延迟建议结束时间延迟1秒。
if alpha_chars in financial_time_dict.keys():
drop_index1 = pd.DataFrame().index
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 500000)) & (filter_df['time'] < s_time(13, 0, 0, 000000))].index
if alpha_chars in ['IH', 'IF', 'IC', 'IM']:
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 500000)) | (filter_df['time'] < s_time(9, 30, 0, 000000))].index
print("按照中金所股指期货交易时间筛选金融期货品种")
else:
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 15, 0, 500000)) | (filter_df['time'] < s_time(9, 30, 0, 000000))].index
print("按照中金所国债期货交易时间筛选金融期货品种")
drop_index4 = pd.DataFrame().index
print("按照中金所交易时间筛选金融期货品种")
elif alpha_chars in commodity_night_dict.keys():
if commodity_night_dict[alpha_chars] == s_time(23,00):
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(23, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为23:00筛选商品期货品种")
elif commodity_night_dict[alpha_chars] == s_time(1,00):
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(1, 0, 0, 000000)) & (filter_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为1:00筛选商品期货品种")
elif commodity_night_dict[alpha_chars] == s_time(2,30):
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) & (filter_df['time'] < s_time(21, 0, 0, 000000))].index
drop_index4 = filter_df.loc[(filter_df['time'] > s_time(2, 30, 0, 000000)) & (filter_df['time'] < s_time(9, 0, 0, 000000))].index
print("按照夜盘截止交易时间为2:30筛选商品期货品种")
else:
print("夜盘截止交易时间未设置或者设置错误!!!")
elif alpha_chars in commodity_day_dict.keys():
drop_index1 = filter_df.loc[(filter_df['time'] > s_time(10, 15, 0, 000000)) & (filter_df['time'] < s_time(10, 30, 0, 000000))].index
drop_index2 = filter_df.loc[(filter_df['time'] > s_time(11, 30, 0, 000000)) & (filter_df['time'] < s_time(13, 30, 0, 000000))].index
drop_index3 = filter_df.loc[(filter_df['time'] > s_time(15, 0, 0, 000000)) | (filter_df['time'] < s_time(9, 0, 0, 000000))].index
drop_index4 = pd.DataFrame().index
print("按照无夜盘筛选商品期货品种")
else:
print("%s期货品种未执行时间筛选中!!!"%(alpha_chars))
# 清理不在交易时间段的数据
# 数据清理
filter_df.drop(labels=drop_index1, axis=0, inplace=True)
filter_df.drop(drop_index2, axis=0, inplace=True)
filter_df.drop(drop_index3, axis=0, inplace=True)
filter_df.drop(drop_index4, axis=0, inplace=True)
return filter_df
def insert_main_contract(df):
# 添加主力连续的合约代码主力连续为888指数连续可以用999次主力连续可以使用889表头用“统一代码”
alpha_chars, numeric_chars = split_alpha_numeric(df.loc[0,'合约代码'])
code_value = alpha_chars + "889"
print("code_value characters:", code_value)
df.insert(loc=0,column="统一代码", value=code_value)
return df, alpha_chars, code_value
def reinstatement_tickdata(merged_rs_df):
merged_rs_df['main_contract'] = merged_rs_df['main_contract'].astype(str)
merged_rs_df['symbol'] = merged_rs_df['symbol'].astype(str)
merged_rs_df['datetime'] = pd.to_datetime(merged_rs_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')
# merged_rs_df['lastprice'] = merged_rs_df['lastprice'].astype(float)
merged_rs_df['volume'] = merged_rs_df['volume'].astype(int)
# merged_rs_df['bid_p'] = merged_rs_df['bid_p'].astype(float)
# merged_rs_df['ask_p'] = merged_rs_df['ask_p'].astype(float)
merged_rs_df['bid_v'] = merged_rs_df['bid_v'].astype(int)
merged_rs_df['ask_v'] = merged_rs_df['ask_v'].astype(int)
# 等比复权,先不考虑
# df['复权因子'] = df['卖一价'].shift() / df['买一价']
# df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() / df['买一价'], 1)
# df['复权因子'] = df['复权因子'].fillna(1)
# df['买一价_adj'] = df['买一价'] * df['复权因子'].cumprod()
# df['卖一价_adj'] = df['卖一价'] * df['复权因子'].cumprod()
# df['最新_adj'] = df['最新'] * df['复权因子'].cumprod()
# 等差复权
merged_rs_df['复权因子'] = np.where(merged_rs_df['symbol'] != merged_rs_df['symbol'].shift(), merged_rs_df['ask_p'].shift() - merged_rs_df['bid_p'], 0)
merged_rs_df['复权因子'] = merged_rs_df['复权因子'].fillna(0)
merged_rs_df['bid_p_adj'] = merged_rs_df['bid_p'] + merged_rs_df['复权因子'].cumsum()
merged_rs_df['ask_p_adj'] = merged_rs_df['ask_p'] + merged_rs_df['复权因子'].cumsum()
merged_rs_df['lastprice_adj'] = merged_rs_df['lastprice'] + merged_rs_df['复权因子'].cumsum()
# 将调整后的数值替换原来的值
merged_rs_df['bid_p'] = merged_rs_df['bid_p_adj'].round(4)
merged_rs_df['ask_p'] = merged_rs_df['ask_p_adj'].round(4)
merged_rs_df['lastprice'] = merged_rs_df['lastprice_adj'].round(4)
# 删除多余的值
del merged_rs_df['复权因子']
del merged_rs_df['bid_p_adj']
del merged_rs_df['ask_p_adj']
del merged_rs_df['lastprice_adj']
return merged_rs_df

View File

@@ -0,0 +1,68 @@
import pandas as pd
import os
from datetime import time as s_time
from datetime import datetime
import chardet
import numpy as np
def split_alpha_numeric(string):
alpha_chars = ""
numeric_chars = ""
for char in string:
if char.isalpha():
alpha_chars += char
elif char.isdigit():
numeric_chars += char
return alpha_chars, numeric_chars
def merged_old_unprocessed_tickdata(all_csv_files, sp_char):
csv_files = [sp_file for sp_file in all_csv_files if sp_char in sp_file]
print("csv_files:", csv_files)
merged_up_df = pd.DataFrame()
dir = os.getcwd()
fileNum_errors = 0
# 循环遍历每个csv文件
for file in csv_files:
try:
df = pd.read_csv(file,
header=0,
encoding='gbk',
low_memory= False,
# skiprows=0,
# parse_dates=['时间'] # 注意此处增加的排序,为了后面按时间排序
)
except:
file_path = os.path.join(dir, file)
fileNum_errors += 1
with open(file_path, 'rb') as file:
data = file.read()
# 使用chardet检测编码
detected_encoding = chardet.detect(data)['encoding']
# print("%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(file,detected_encoding,fileNum_errors))
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors))
with open('output_error.txt', 'a') as f:
print("%s:%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s"%(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),file_path,detected_encoding,fileNum_errors), file = f)
# 删除重复行
df.drop_duplicates(inplace=True)
# 将数据合并到新的DataFrame中
merged_up_df = pd.concat([merged_up_df, df], ignore_index=True)
# 删除重复列
merged_up_df.drop_duplicates(subset=merged_up_df.columns.tolist(), inplace=True)
# 重置行索引
merged_up_df.reset_index(inplace=True, drop=True)
# merged_up_df,alpha_chars,code_value = insert_main_contract(merged_up_df)
# 打印提示信息
# print("按年份未处理的CSV文件合并成功")
return merged_up_df #,alpha_chars,code_value

View File

@@ -0,0 +1,85 @@
import os
import requests
import time
from datetime import datetime
from requests.adapters import HTTPAdapter
import pandas as pd
pd.set_option('display.max_rows', 1000)
pd.set_option('expand_frame_repr', False) # 当列太多时不换行
# 设置命令行输出时的列对齐功能
pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.unicode.east_asian_width', True)
def requestForNew(url):
session = requests.Session()
session.mount('http://', HTTPAdapter(max_retries=3))
session.mount('https://', HTTPAdapter(max_retries=3))
session.keep_alive = False
response = session.get(url, headers={'Connection': 'close'}, timeout=30)
if response.content:
return response
else:
print("链接失败", response)
def getDate():
url = 'http://hq.sinajs.cn/list=sh000001'
response = requestForNew(url).text
data_date = str(response.split(',')[-4])
# 获取上证的指数日期
return data_date
# 通过新浪财经获取每日更新的股票代码
def getStockCodeForEveryday():
df = pd.DataFrame()
for page in range(1, 100):
# 1~100页不用担心每天新增
url = 'http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page=' \
+ str(page) + '&num=80&sort=changepercent&asc=0&node=hs_a&symbol=&_s_r_a=page'
# print(url)
content = requestForNew(url).json()
if not content:
# if content =[]: 这个写法也可以
print("股票信息,获取完毕。")
break
print("正在读取页面" + str(page))
time.sleep(3)
df = df.append(pd.DataFrame(content, dtype='float'), ignore_index=True)
rename_dict = {'symbol': '股票代码', 'code': '交易日期', 'name': '股票名称', 'open': '开盘价',
'settlement': '前收盘价', 'trade': '收盘价', 'high': '最高价', 'low': '最低价',
'buy': '买一', 'sell': '卖一', 'volume': '成交量', 'amount': '成交额',
'changepercent': '涨跌幅', 'pricechange': '涨跌额',
'mktcap': '总市值', 'nmc': '流通市值', 'ticktime': '数据更新时间', 'per': 'per', 'pb': '市净率',
'turnoverratio': '换手率'}
df.rename(columns=rename_dict, inplace=True)
tradeDate = getDate()
df['交易日期'] = tradeDate
df = df[['股票代码', '股票名称', '交易日期', '开盘价', '最高价', '最低价', '收盘价', '前收盘价', '成交量', '成交额', '流通市值', '总市值']]
# 把转化成float的code替换成交易日期
return df
df = getStockCodeForEveryday()
print(df)
for i in df.index:
t = df.iloc[i:i + 1, :]
stock_code = t.iloc[0]['股票代码']
# 构建存储文件路径
path = './data/' \
+ stock_code + '.csv'
# 文件存在,不是新股
if os.path.exists(path):
t.to_csv(path, header=None, index=False, mode='a', encoding='gbk')
# 文件不存在,说明是新股
else:
# 先将头文件输出
pd.DataFrame(columns=['数据由邢不行整理']).to_csv(path, index=False, encoding='gbk')
t.to_csv(path, index=False, mode='a', encoding='gbk')
print(stock_code)