增加交易策略、交易指标、量化库代码等文件夹

This commit is contained in:
Win_home
2025-04-27 15:54:09 +08:00
parent ca3b209096
commit f57150dae8
589 changed files with 854346 additions and 1757 deletions

View File

@@ -0,0 +1,801 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 使用说明:\n",
" 1.需要修改chdir到当前目录\n",
" 2.需要修改最后输出的文件名称\n",
" 3.依据情况需要修改保留的列数\n",
" 4.不同品种的交易时间不一样,要修改删除"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import datetime as datetime"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"os.chdir('E:/data/ru')\n",
"all_csv_files = [file for file in os.listdir('.') if file.endswith('.csv')]\n",
"all_csv_files = sorted(all_csv_files)\n",
"print(\"文件中所有CSV文件:\",all_csv_files)\n",
"\n",
"sp_old_chars = ['_2019','_2020','_2021']\n",
"sp_old_chars = sorted(sp_old_chars)\n",
"print(\"旧格式文件名关键字:\",sp_old_chars)\n",
"sp_new_chars = ['_2022','_2023']\n",
"sp_new_chars = sorted(sp_new_chars)\n",
"print(\"新格式文件名关键字:\",sp_new_chars)\n",
"\n",
"# # 设置后面数据的采集对于的行数# 用 \"old_type\" 或者 \"new_type\" 区分\n",
"# if all(char in ['_2019','_2020','_2021'] for char in sp_old_chars):\n",
"# year_type = 'old_type'\n",
"# print(\"使用旧年份格式采集!!!\")\n",
"# elif all(char in ['_2022','_2023'] for char in sp_chars):\n",
"# year_type = 'new_type' \n",
"# print(\"使用新年份格式采集!!!\")\n",
"# else:\n",
"# print(\"文件夹中CSV没有相关年份的数据或者新旧年份混用!!!\")\n",
"\n",
"csv_old_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_old_chars)]\n",
"print(\"筛选结果后的CSV文件:\",csv_old_files)\n",
"csv_new_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_new_chars)]\n",
"print(\"筛选结果后的CSV文件:\",csv_new_files)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old = pd.DataFrame()\n",
"for f in csv_old_files:\n",
" df_old_temp = pd.read_csv(\n",
" f,\n",
" usecols=[1, 2, 3, 4, 8, 13, 14, 15, 16],\n",
" names=[\n",
" \"统一代码\",\n",
" \"合约代码\",\n",
" \"时间\",\n",
" \"最新\",\n",
" \"成交量\",\n",
" \"买一价\",\n",
" \"卖一价\",\n",
" \"买一量\",\n",
" \"卖一量\",\n",
" ],\n",
" skiprows=1,\n",
" encoding=\"utf-8\",\n",
" parse_dates=['时间']#注意此处增加的排序,为了后面按时间排序\n",
" )\n",
" # df_temp = pd.read_csv(f, usecols=[0,5], names=[\n",
" # 'datetime', 'volume'])\n",
" df_old = pd.concat([df_old, df_old_temp])\n",
"del df_old_temp"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old =pd.DataFrame({'main_contract':df_old['统一代码'],'symbol':df_old['合约代码'],'datetime':df_old['时间'],'lastprice':df_old['最新'],'volume':df_old['成交量'],\n",
" 'bid_p':df_old['买一价'],'ask_p':df_old['卖一价'],'bid_v':df_old['买一量'],'ask_v':df_old['卖一量']})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old['time'] = df_old['datetime'].dt.strftime('%H:%M:%S')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 不同品种交易时间不一样,需要修改\n",
"# 商品期货\n",
"drop_index1 = df_old.query('time>\"15:00:00\" & time<\"21:00:00\"')[\"time\"].index\n",
"# drop_index1 = df_old.query('time>\"15:00:00\"')[\"time\"].index\n",
"# drop_index2 = df_old.query('time>\"01:00:00\" & time<\"09:00:00\"')[\"time\"].index\n",
"#drop_index2 = df_old.query('time>\"02:30:00\" & time<\"09:00:00\"')[\"time\"].index\n",
"drop_index2 = df_old.query('time<\"09:00:00\"')[\"time\"].index\n",
"drop_index3 = df_old.query('time>\"23:00:00\" & time<\"23:59:59\"')[\"time\"].index\n",
"# drop_index3 = df_old.query('time>\"11:30:00\" & time<\"13:30:00\"')[\"time\"].index\n",
"drop_index4 = df_old.query('time>\"10:15:00\" & time<\"10:30:00\"')[\"time\"].index\n",
"\n",
"# 清理不在交易时间段的数据\n",
"df_old.drop(labels=drop_index1, axis=0, inplace=True)\n",
"df_old.drop(drop_index2, axis=0, inplace=True)\n",
"df_old.drop(drop_index3, axis=0, inplace=True)\n",
"df_old.drop(drop_index4, axis=0, inplace=True)\n",
"\n",
"df_old.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"del df_old['time']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_new = pd.DataFrame()\n",
"for f in csv_new_files:\n",
" df_new_temp = pd.read_csv(\n",
" f,\n",
" usecols=[0, 1, 2, 5, 12, 21, 22, 23, 24, 25, 26, 44],\n",
" names=[\n",
" \"交易日\",\n",
" \"统一代码\",\n",
" \"合约代码\",\n",
" \"最新价\",\n",
" \"数量\",\n",
" \"最后修改时间\",\n",
" \"最后修改毫秒\",\n",
" \"申买价一\",\n",
" \"申买量一\",\n",
" \"申卖价一\",\n",
" \"申卖量一\",\n",
" \"业务日期\",\n",
" ],\n",
" skiprows=1,\n",
" encoding=\"utf-8\",\n",
" parse_dates=['业务日期','最后修改时间','最后修改毫秒']#注意此处增加的排序,为了后面按时间排序\n",
" )\n",
"\n",
" # df_temp = pd.read_csv(f, usecols=[0,5], names=[\n",
" # 'datetime', 'volume'])\n",
" df_new = pd.concat([df_new, df_new_temp])\n",
"del df_new_temp"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 不同品种交易时间不一样,需要修改\n",
"# 商品期货\n",
"drop_index1 = df_new.query('最后修改时间>\"15:00:00\" & 最后修改时间<\"21:00:00\"')[\"最后修改时间\"].index\n",
"# drop_index1 = df_new.query('最后修改时间>\"15:00:00\"')[\"最后修改时间\"].index\n",
"# drop_index2 = df_new.query('最后修改时间>\"01:00:00\" & 最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"# drop_index2 = df_new.query('最后修改时间>\"02:30:00\" & 最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"drop_index2 = df_new.query('最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"drop_index3 = df_new.query('最后修改时间>\"23:00:00\" & 最后修改时间<\"23:59:59\"')[\"最后修改时间\"].index\n",
"# drop_index3 = df_new.query('最后修改时间>\"11:30:00\" & 最后修改时间<\"13:30:00\"')[\"最后修改时间\"].index\n",
"drop_index4 = df_new.query('最后修改时间>\"10:15:00\" & 最后修改时间<\"10:30:00\"')[\"最后修改时间\"].index\n",
"\n",
"# 清理不在交易时间段的数据\n",
"df_new.drop(labels=drop_index1, axis=0, inplace=True)\n",
"df_new.drop(drop_index2, axis=0, inplace=True)\n",
"df_new.drop(drop_index3, axis=0, inplace=True)\n",
"df_new.drop(drop_index4, axis=0, inplace=True)\n",
"\n",
"df_new.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#日期修正\n",
"#df_new['业务日期'] = pd.to_datetime(df_new['业务日期'])\n",
"df_new['业务日期'] = df_new['业务日期'].dt.strftime('%Y-%m-%d')\n",
"df_new['datetime'] = df_new['业务日期'] + ' '+df_new['最后修改时间'].dt.time.astype(str) + '.' + df_new['最后修改毫秒'].astype(str)\n",
"# 将 'datetime' 列的数据类型更改为 datetime 格式如果数据转换少8个小时可以用timedelta处理\n",
"df_new['datetime'] = pd.to_datetime(df_new['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')\n",
"# 如果需要,可以将 datetime 列格式化为字符串\n",
"#df_new['formatted_date'] = df_new['datetime'].dt.strftime('%Y-%m-%d %H:%M:%S.%f')\n",
"#计算瞬时成交量\n",
"df_new['volume'] = df_new['数量'] - df_new['数量'].shift(1)\n",
"df_new['volume'] = df_new['volume'].fillna(0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_new =pd.DataFrame({'main_contract':df_new['统一代码'],'symbol':df_new['合约代码'],'datetime':df_new['datetime'],'lastprice':df_new['最新价'],'volume':df_new['volume'],\n",
" 'bid_p':df_new['申买价一'],'ask_p':df_new['申卖价一'],'bid_v':df_new['申买量一'],'ask_v':df_new['申卖量一']})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_old.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_new.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_new.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame()\n",
"df = pd.concat([df_old, df_new],axis=0, ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"del df_old,df_new"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df['main_contract'] = df['main_contract'].astype(str)\n",
"df['symbol'] = df['symbol'].astype(str)\n",
"df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')\n",
"df['lastprice'] = df['lastprice'].astype(float)\n",
"df['volume'] = df['volume'].astype(int)\n",
"df['bid_p'] = df['bid_p'].astype(float)\n",
"df['ask_p'] = df['ask_p'].astype(float)\n",
"df['bid_v'] = df['bid_v'].astype(int)\n",
"df['ask_v'] = df['ask_v'].astype(int)\n",
"#df = df_old.append(df_new, ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 删除重复行\n",
"df.drop_duplicates(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 确保日期列按升序排序\n",
"df.sort_values(by='datetime', inplace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 查看数据的头部和尾部head()、tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查看dataframe的基本情况\n",
"df.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 等比复权,先不考虑\n",
"# df['复权因子'] = df['卖一价'].shift() / df['买一价']\n",
"df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() / df['买一价'], 1)\n",
"df['复权因子'] = df['复权因子'].fillna(1)\n",
"# df['复权因子'].loc[0] = 1\n",
"df['买一价_adj'] = df['买一价'] * df['复权因子'].cumprod()\n",
"df['卖一价_adj'] = df['卖一价'] * df['复权因子'].cumprod()\n",
"df['最新_adj'] = df['最新'] * df['复权因子'].cumprod()\n",
"# df['low_adj'] = df['low'] * adjust.cumprod()\n",
"# df['high_adj'] = df['high'] * adjust.cumprod()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 等差复权\n",
"df['复权因子'] = np.where(df['symbol'] != df['symbol'].shift(), df['ask_p'].shift() - df['bid_p'], 0)\n",
"df['复权因子'] = df['复权因子'].fillna(0)\n",
"# df['复权因子'].loc[0] = 1\n",
"df['bid_p_adj'] = df['bid_p'] + df['复权因子'].cumsum()\n",
"df['ask_p_adj'] = df['ask_p'] + df['复权因子'].cumsum()\n",
"df['lastprice_adj'] = df['lastprice'] + df['复权因子'].cumsum()\n",
"# df['low_adj'] = df['low'] + df['复权因子'].cumsum()\n",
"# df['high_adj'] = df['high'] + df['复权因子'].cumsum()\n",
"# df_new =pd.DataFrame({'main_contract':df_new['统一代码'],'symbol':df_new['合约代码'],'datetime':df_new['datetime'],'lastprice':df_new['最新价'],'volume':df_new['volume'],\n",
"# 'bid_p':df_new['申买价一'],'ask_p':df_new['申卖量一'],'bid_v':df_new['申买量一'],'ask_v':df_new['申卖量一']})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(df['复权因子'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"contains_null = df.isnull().values.any()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(contains_null)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查找换期需要复权的索引\n",
"non_zero_indices = df[df['复权因子'] != 0].index\n",
"print(non_zero_indices)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查看未调整买价、卖价和最新价的数据\n",
"df.loc[non_zero_indices[0]-5:non_zero_indices[0]+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 将调整后的数值替换原来的值\n",
"df['bid_p'] = df['bid_p_adj']\n",
"df['ask_p'] = df['ask_p_adj']\n",
"df['lastprice'] = df['lastprice_adj']\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查看调整买价、卖价和最新价的数据\n",
"df.loc[non_zero_indices[0]-5:non_zero_indices[0]+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 删除多余的值\n",
"del df['复权因子']\n",
"del df['bid_p_adj']\n",
"del df['ask_p_adj']\n",
"del df['lastprice_adj']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.loc[non_zero_indices[0]-5:non_zero_indices[0]+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv('./ru888.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"del df"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import datetime as datetime\n",
"import pyarrow as pa\n",
"import pyarrow.feather as feather"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# feature 测试\n",
"df = pd.read_csv('E:/data/ru/ru888.csv',encoding='UTF-8',parse_dates=['datetime'])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"feather.write_feather(df, 'df_feather.feather')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"df = feather.read_feather('df_feather.feather')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>main_contract</th>\n",
" <th>symbol</th>\n",
" <th>datetime</th>\n",
" <th>lastprice</th>\n",
" <th>volume</th>\n",
" <th>bid_p</th>\n",
" <th>ask_p</th>\n",
" <th>bid_v</th>\n",
" <th>ask_v</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>44287432</th>\n",
" <td>ru888</td>\n",
" <td>ru2405</td>\n",
" <td>2023-12-29 14:59:58.500</td>\n",
" <td>6755.0</td>\n",
" <td>27</td>\n",
" <td>6750.0</td>\n",
" <td>6755.0</td>\n",
" <td>128</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44287433</th>\n",
" <td>ru888</td>\n",
" <td>ru2405</td>\n",
" <td>2023-12-29 14:59:59.000</td>\n",
" <td>6760.0</td>\n",
" <td>27</td>\n",
" <td>6755.0</td>\n",
" <td>6760.0</td>\n",
" <td>2</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44287434</th>\n",
" <td>ru888</td>\n",
" <td>ru2405</td>\n",
" <td>2023-12-29 14:59:59.500</td>\n",
" <td>6760.0</td>\n",
" <td>17</td>\n",
" <td>6760.0</td>\n",
" <td>6765.0</td>\n",
" <td>35</td>\n",
" <td>33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44287435</th>\n",
" <td>ru888</td>\n",
" <td>ru2405</td>\n",
" <td>2023-12-29 15:00:00.000</td>\n",
" <td>6760.0</td>\n",
" <td>6</td>\n",
" <td>6760.0</td>\n",
" <td>6765.0</td>\n",
" <td>45</td>\n",
" <td>42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44287436</th>\n",
" <td>ru888</td>\n",
" <td>ru2405</td>\n",
" <td>2023-12-29 15:00:00.500</td>\n",
" <td>6760.0</td>\n",
" <td>0</td>\n",
" <td>6760.0</td>\n",
" <td>6765.0</td>\n",
" <td>45</td>\n",
" <td>42</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" main_contract symbol datetime lastprice volume \\\n",
"44287432 ru888 ru2405 2023-12-29 14:59:58.500 6755.0 27 \n",
"44287433 ru888 ru2405 2023-12-29 14:59:59.000 6760.0 27 \n",
"44287434 ru888 ru2405 2023-12-29 14:59:59.500 6760.0 17 \n",
"44287435 ru888 ru2405 2023-12-29 15:00:00.000 6760.0 6 \n",
"44287436 ru888 ru2405 2023-12-29 15:00:00.500 6760.0 0 \n",
"\n",
" bid_p ask_p bid_v ask_v \n",
"44287432 6750.0 6755.0 128 15 \n",
"44287433 6755.0 6760.0 2 14 \n",
"44287434 6760.0 6765.0 35 33 \n",
"44287435 6760.0 6765.0 45 42 \n",
"44287436 6760.0 6765.0 45 42 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}