Files
Quant_Code/2.数据下载与处理/Tushare_get_data.ipynb
Win_home f925dff46b Enhance trading workflow with new order flow management
- Added dingdanliu_nb_mflow for improved order processing
- Updated related scripts and configurations to support new functionality
2025-03-15 22:45:08 +08:00

699 lines
23 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'tushare'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[3], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtushare\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mts\u001b[39;00m\n\u001b[0;32m 2\u001b[0m ts\u001b[38;5;241m.\u001b[39mset_token(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m78282dabb315ee578fb73a9b328f493026e97d5af709acb331b7b348\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 3\u001b[0m pro \u001b[38;5;241m=\u001b[39m ts\u001b[38;5;241m.\u001b[39mpro_api()\n",
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'tushare'"
]
}
],
"source": [
"import tushare as ts\n",
"ts.set_token('78282dabb315ee578fb73a9b328f493026e97d5af709acb331b7b348')\n",
"pro = ts.pro_api()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"300\n",
"<class 'int'>\n",
"0.15\n",
"<class 'float'>\n"
]
}
],
"source": [
"from datetime import datetime, timedelta\n",
"import pandas as pd\n",
"fees_df = pd.read_csv('./futures_fees_info.csv', usecols= [1, 4, 17, 19, 25],names=['合约', '合约乘数', '做多保证金率', '做空保证金率', '品种代码'])\n",
"data0 = int(fees_df[fees_df['合约'] == 'IH2407']['合约乘数'].iloc[0])\n",
"\n",
"print(data0)\n",
"print(type(data0))\n",
"data1 = float(fees_df[fees_df['合约'] == 'IH2407']['做多保证金率'].iloc[0])\n",
"print(data1)\n",
"print(type(data1))\n",
"# fees_df[fees_df['合约'] == 'IH2407']['做空保证金率'].iloc[0]\n",
"# (fees_df[fees_df['合约'] == 'IH2407']['做多保证金率'].iloc[0] + fees_df[fees_df['合约'] == 'IH2407']['做空保证金率'].iloc[0])/2"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime, timedelta\n",
"import pandas as pd\n",
"fees_df = pd.read_csv('./futures_fees_info.csv', usecols= [1, 4, 17, 19],names=['合约', '合约乘数', '做多保证金率', '做空保证金率'])\n",
"contacts_df = pd.read_csv('./main_contacts.csv', usecols= [16, 17],names=['主连代码', '品种代码'])\n",
"\n",
"def get_main_contact_on_time(main_symbol_code):\n",
" data_str = ''\n",
" alpha_chars = ''\n",
" numeric_chars = ''\n",
" main_code = ''\n",
"\n",
" # main_symbol = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[0]\n",
" # exchange_id = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[1]\n",
" main_symbol = contacts_df[contacts_df['品种代码'] == main_symbol_code]['主连代码'].iloc[0]\n",
"\n",
"\n",
" # # 拆分主连合约代码为交易标识和交易所代码(交易市场)\n",
" # main_symbol = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[0]\n",
" # exchange_id = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[1]\n",
"\n",
" # # 拆分交易标识中的合约产品代码和交割月份\n",
" # for char in main_symbol:\n",
" # if char.isalpha():\n",
" # alpha_chars += char\n",
" # elif char.isdigit():\n",
" # numeric_chars += char\n",
" \n",
" # # 监理交易所映射\n",
" # exchange = {'CFX': 'CFFEX', 'SHF':'SHFE', 'DCE':'DCE', 'GFE':'GFEX', 'INE':'INE', 'ZCE':'CZCE'}\n",
"\n",
" # # 计算per_unit交易单位(每手)和转换后交易所识别的main_code主连代码\n",
" # if exchange_id == 'CFX' or exchange_id == 'SHF' or exchange_id == 'DCE' or exchange_id == 'GFE' or exchange_id == 'INE':\n",
" # df = pro.fut_basic(exchange = exchange[exchange_id], fut_type='1', fut_code = alpha_chars, fields='ts_code,symbol,exchange,name,per_unit')\n",
" # # ts_code = df[df['symbol'] == main_symbol]['ts_code'].iloc[0]\n",
" # per_unit = df[df['symbol'] == main_symbol]['per_unit'].iloc[0]\n",
"\n",
" # # ds = pro.fut_settle(trade_date = data_str, ts_code =ts_code)\n",
" # # ds['margin_rate'] = (ds['long_margin_rate'] + ds['short_margin_rate'])/2\n",
" # # margin_rate = ds['margin_rate'].iloc[0]\n",
" \n",
" \n",
" # if exchange_id == 'CFX':\n",
" # main_code = main_symbol\n",
" # elif exchange_id == 'SHF' or exchange_id == 'DCE' or exchange_id == 'GFE' or exchange_id == 'INE':\n",
" # lower_alpha_chars = str.lower(alpha_chars) \n",
" # main_code = lower_alpha_chars + numeric_chars\n",
" # elif exchange_id == 'ZCE':\n",
" # true_numeric_chars = numeric_chars[1:]\n",
" # main_code = alpha_chars + true_numeric_chars \n",
" # df = pro.fut_basic(exchange = exchange[exchange_id], fut_type='1', fut_code = alpha_chars, fields='ts_code,symbol,exchange,name,per_unit')\n",
" # per_unit = df[df['symbol'] == main_code]['per_unit'].iloc[0]\n",
" # main_code = alpha_chars + true_numeric_chars\n",
"\n",
" # print(\"最终使用的主连代码:\",main_code) \n",
" # print(\"%s的交易单位(每手):%s\"%(main_symbol, per_unit))\n",
" return main_symbol\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'IH2407'"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"get_main_contact_on_time('IH')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime, timedelta\n",
"def get_main_contact_on_time(main_symbol_code):\n",
" data_str = ''\n",
" alpha_chars = ''\n",
" numeric_chars = ''\n",
" main_code = ''\n",
"\n",
" # 获取主连合约代码如果是当天15点前日盘则获取前一天的合约代码如果是当天15点后晚盘则获取今天的的合约代码\n",
" now = datetime.now()\n",
" if now.hour < 15:\n",
" data_str = (now - timedelta(days=1)).date().strftime('%Y%m%d')\n",
" else:\n",
" data_str = now.date().strftime('%Y%m%d')\n",
"\n",
" # 拆分主连合约代码为交易标识和交易所代码(交易市场)\n",
" main_symbol = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[0]\n",
" exchange_id = pro.fut_mapping(ts_code=main_symbol_code, trade_date = data_str).loc[0,'mapping_ts_code'].split('.')[1]\n",
"\n",
" # 拆分交易标识中的合约产品代码和交割月份\n",
" for char in main_symbol:\n",
" if char.isalpha():\n",
" alpha_chars += char\n",
" elif char.isdigit():\n",
" numeric_chars += char\n",
" \n",
" # 监理交易所映射\n",
" exchange = {'CFX': 'CFFEX', 'SHF':'SHFE', 'DCE':'DCE', 'GFE':'GFEX', 'INE':'INE', 'ZCE':'CZCE'}\n",
"\n",
" # 计算per_unit交易单位(每手)和转换后交易所识别的main_code主连代码\n",
" if exchange_id == 'CFX' or exchange_id == 'SHF' or exchange_id == 'DCE' or exchange_id == 'GFE' or exchange_id == 'INE':\n",
" df = pro.fut_basic(exchange = exchange[exchange_id], fut_type='1', fut_code = alpha_chars, fields='ts_code,symbol,exchange,name,per_unit')\n",
" # ts_code = df[df['symbol'] == main_symbol]['ts_code'].iloc[0]\n",
" per_unit = df[df['symbol'] == main_symbol]['per_unit'].iloc[0]\n",
"\n",
" # ds = pro.fut_settle(trade_date = data_str, ts_code =ts_code)\n",
" # ds['margin_rate'] = (ds['long_margin_rate'] + ds['short_margin_rate'])/2\n",
" # margin_rate = ds['margin_rate'].iloc[0]\n",
" \n",
" \n",
" if exchange_id == 'CFX':\n",
" main_code = main_symbol\n",
" elif exchange_id == 'SHF' or exchange_id == 'DCE' or exchange_id == 'GFE' or exchange_id == 'INE':\n",
" lower_alpha_chars = str.lower(alpha_chars) \n",
" main_code = lower_alpha_chars + numeric_chars\n",
" elif exchange_id == 'ZCE':\n",
" true_numeric_chars = numeric_chars[1:]\n",
" main_code = alpha_chars + true_numeric_chars \n",
" df = pro.fut_basic(exchange = exchange[exchange_id], fut_type='1', fut_code = alpha_chars, fields='ts_code,symbol,exchange,name,per_unit')\n",
" per_unit = df[df['symbol'] == main_code]['per_unit'].iloc[0]\n",
" main_code = alpha_chars + true_numeric_chars\n",
"\n",
" print(\"最终使用的主连代码:\",main_code) \n",
" print(\"%s的交易单位(每手):%s\"%(main_symbol, per_unit))\n",
" return main_code\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sb_1 = get_main_contact_on_time('IH.CFX')\n",
"sb_2 = get_main_contact_on_time('cu.SHF')\n",
"sb_3 = get_main_contact_on_time('eb.DCE')\n",
"sb_4 = get_main_contact_on_time('si.GFE')\n",
"sb_5 = get_main_contact_on_time('sc.INE') \n",
"sb_6 = get_main_contact_on_time('SA.ZCE')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# df = pro.fut_basic(exchange='DCE', fut_type='1',fut_code = 'j' , fields='ts_code,symbol,exchange,name,fut_code,multiplier,trade_unit,per_unit,quote_unit,quote_unit_desc,d_mode_desc,list_date,delist_date,d_month,last_ddate,trade_time_desc')\n",
"# df = pro.fut_basic(exchange='SHFE', fut_type='1', fut_code = 'au', fields='ts_code,symbol,name,list_date,delist_date')\n",
"df = pro.fut_basic(exchange='CZCE', fut_type='1', fut_code = 'SA', fields='ts_code,symbol,exchange,name,fut_code,per_unit')\n",
"# index_of_value = df.index[df['symbol'] == 'AU2408']\n",
"df.head()\n",
"value = df[df['symbol'] == 'SA409']['per_unit'].iloc[0]\n",
"print(value)\n",
"# df.loc[index_of_value, 'per_unit'].value\n",
"# df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pro.fut_mapping(ts_code='SA.ZCE')\n",
"print(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# ds = pro.fut_settle(trade_date = '20240625', ts_code ='SA2409.ZCE')\n",
"# ds = pro.fut_settle(trade_date='20230625', exchange='ZCE')\n",
"# ds = pro.fut_settle(ts_code='SA409.ZCE', exchange='CZCE')\n",
"# pro.fut_settle(trade_date='20181114', exchange='CZCE')\n",
"pro.fut_settle(ts_code='AP2510.ZCE', exchange='CZCE')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ds.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ds['margin_rate'] = round((ds['long_margin_rate'] + ds['short_margin_rate'])/2,2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ds['margin_rate'].iloc[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"value = df.loc[index_of_value, 'per_unit'].iloc[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(value)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_IH = pro.fut_mapping(ts_code='IH.CFX')\n",
"print(df_IH)\n",
"df_IH.to_csv(r\"E:\\data\\mapping_ts_code_IH.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_IF = pro.fut_mapping(ts_code='IF.CFX')\n",
"print(df_IF)\n",
"df_IF.to_csv(r\"E:\\data\\mapping_ts_code_IF.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_IC = pro.fut_mapping(ts_code='IC.CFX')\n",
"print(df_IC)\n",
"df_IC.to_csv(r\"E:\\data\\mapping_ts_code_IC.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_IM = pro.fut_mapping(ts_code='IM.CFX')\n",
"print(df_IM)\n",
"df_IM.to_csv(r\"E:\\data\\mapping_ts_code_IM.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_TF = pro.fut_mapping(ts_code='TF.CFX')\n",
"print(df_TF)\n",
"df_TF.to_csv(r\"E:\\data\\mapping_ts_code_TF.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_T = pro.fut_mapping(ts_code='T.CFX')\n",
"print(df_T)\n",
"df_T.to_csv(r\"E:\\data\\mapping_ts_code_T.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_TS = pro.fut_mapping(ts_code='TS.CFX')\n",
"print(df_TS)\n",
"df_TS.to_csv(r\"E:\\data\\mapping_ts_code_TS.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_TL = pro.fut_mapping(TL_code='TL.CFX')\n",
"print(df_TL)\n",
"df_TL.to_csv(r\"E:\\data\\mapping_TL_code_TL.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'pro' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m df_TL \u001b[38;5;241m=\u001b[39m \u001b[43mpro\u001b[49m\u001b[38;5;241m.\u001b[39mfut_mapping(ts_code\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTL.CFX\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(df_TL)\n\u001b[0;32m 3\u001b[0m df_TL\u001b[38;5;241m.\u001b[39mto_csv(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mD:\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mmapping_TL_code_TL.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[1;31mNameError\u001b[0m: name 'pro' is not defined"
]
}
],
"source": [
"df_TL = pro.fut_mapping(ts_code='TL.CFX')\n",
"print(df_TL)\n",
"df_TL.to_csv(r\"D:\\data\\mapping_TL_code_TL.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import akshare as ak\n",
"\n",
"futures_comm_info_df = ak.futures_comm_info(symbol=\"上海国际能源交易中心\")\n",
"print(futures_comm_info_df[\"保证金-买开\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"futures_fees_info_df = ak.futures_fees_info()\n",
"print(futures_fees_info_df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"futures_fees_info_df.to_csv(r'./futures_fees_info.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"futures_display_main_sina_df = ak.futures_display_main_sina()\n",
"print(futures_display_main_sina_df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"url = 'https://www.9qihuo.com/hangqing' #上期所铜结算参数地址https://www.9qihuo.com/hangqing\n",
"data =pd.read_html(url) #读取网页上的表格\n",
"dt=data[4].drop([0],axis=0).append(data[5],ignore_index=True) #提取结算参数到DataFrame格式\n",
"#调整格式\n",
"dt.columns=dt.iloc[0]\n",
"dt.drop([0],axis=0,inplace=True) \n",
"dt.set_index('合约代码',inplace=True)\n",
"print(dt) #输出铜的结算参数"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import csv\n",
"\n",
"# 目标网址\n",
"url = \"https://www.9qihuo.com/hangqing\"\n",
"\n",
"# 发送GET请求禁用SSL验证\n",
"response = requests.get(url, verify=False)\n",
"response.encoding = 'utf-8' # 确保编码正确\n",
"\n",
"# 解析网页内容\n",
"soup = BeautifulSoup(response.text, 'lxml')\n",
"\n",
"# 找到目标表格\n",
"table = soup.find('table', {'id': 'tblhangqinglist'})\n",
"\n",
"# 初始化CSV文件\n",
"with open('main_contacts.csv', mode='w', newline='', encoding='utf-8') as file:\n",
" writer = csv.writer(file)\n",
" \n",
" # 遍历表格的所有行\n",
" for row in table.find_all('tr'):\n",
" # 获取每一行的所有单元格\n",
" cols = row.find_all(['th', 'td'])\n",
" # 提取文本内容并写入CSV文件\n",
" writer.writerow([col.text.strip() for col in cols])\n",
"\n",
"print(\"表格已成功保存为main_contacts.csv\")\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"df = pd.read_csv('./main_contacts.csv')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df['交易品种'] = df['合约'].str.split(r'[()]', n=1, expand=True)[0]\n",
"df['主连代码'] = df['合约'].str.split(r'[()]', n=2, expand=True)[1]\n",
"\n",
"# df['品种代码'] = df['主连代码'].str.split(str.isalpha(df['主连代码']), n=1, expand=True)[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import re\n",
"\n",
"# 创建示例DataFrame\n",
"\n",
"# 定义拆分字母和数字的函数\n",
"def split_alpha_numeric(s):\n",
" match = re.match(r\"([a-zA-Z]+)([0-9]+)\", s)\n",
" if match:\n",
" return match.groups()\n",
" else:\n",
" return (s, None) # 如果没有匹配返回原始字符串和None\n",
"\n",
"# 应用函数并创建新列\n",
"df[['品种代码', '交割月份']] = df['主连代码'].apply(lambda x: pd.Series(split_alpha_numeric(x)))\n",
"\n",
"print(df)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv('./main_contacts_all.csv')"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"import subprocess\n",
"import schedule\n",
"import time\n",
"from datetime import datetime\n",
"\n",
"# jerome增加akshare库\n",
"import akshare as ak\n",
"\n",
"# jerome:增加下列库用于爬虫获取主力连续代码\n",
"import pandas as pd\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import csv\n",
"import re\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"def get_futures_fees_info():\n",
" futures_fees_info_df = ak.futures_fees_info()\n",
" futures_fees_info_df.to_csv(r'./futures_fees_info.csv', index=False)\n",
"\n",
"def get_main_contacts():\n",
" url = \"https://www.9qihuo.com/hangqing\"\n",
"\n",
" # 发送GET请求禁用SSL验证\n",
" response = requests.get(url, verify=False)\n",
" response.encoding = 'utf-8' # 确保编码正确\n",
"\n",
" # 解析网页内容\n",
" soup = BeautifulSoup(response.text, 'lxml')\n",
"\n",
" # 找到目标表格\n",
" table = soup.find('table', {'id': 'tblhangqinglist'})\n",
"\n",
" # 初始化CSV文件\n",
" with open('tmp_main_contacts.csv', mode='w', newline='', encoding='utf-8') as file:\n",
" writer = csv.writer(file)\n",
" \n",
" # 遍历表格的所有行\n",
" for row in table.find_all('tr'):\n",
" # 获取每一行的所有单元格\n",
" cols = row.find_all(['th', 'td'])\n",
" # 提取文本内容并写入CSV文件\n",
" writer.writerow([col.text.strip() for col in cols])\n",
"\n",
" df = pd.read_csv('./tmp_main_contacts.csv',encoding='utf-8')\n",
" df['交易品种'] = df['合约'].str.split(r'[()]', n=1, expand=True)[0]\n",
" df['主连代码'] = df['合约'].str.split(r'[()]', n=2, expand=True)[1]\n",
"\n",
" df[['品种代码', '交割月份']] = df['主连代码'].apply(lambda x: pd.Series(split_alpha_numeric(x)))\n",
" df.to_csv('./main_contacts.csv')\n",
"\n",
" print(\"期货主力品种表已经保存为main_contacts.csv\")\n",
" os.remove(\"./tmp_main_contacts.csv\")\n",
"\n",
"# 拆分字母和数字的函数\n",
"def split_alpha_numeric(s):\n",
" match = re.match(r\"([a-zA-Z]+)([0-9]+)\", s)\n",
" if match:\n",
" return match.groups()\n",
" else:\n",
" return (s, None) # 如果没有匹配返回原始字符串和None"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"get_futures_fees_info()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"期货主力品种表已经保存为main_contacts.csv\n"
]
}
],
"source": [
"get_main_contacts()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}