增加交易策略、交易指标、量化库代码等文件夹

This commit is contained in:
Win_home
2025-04-27 15:54:09 +08:00
parent ca3b209096
commit f57150dae8
589 changed files with 854346 additions and 1757 deletions

View File

@@ -0,0 +1,526 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"root_path = r\"C:/Users/zhouj/Desktop/data\"\n",
"output_path = r\"C:/Users/zhouj/Desktop/a88.csv\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 等差复权\n",
"adjust = df['close'].shift() - df['open']\n",
"adjust = np.where(df['symbol'] != df['symbol'].shift(), adjust, 0)\n",
"df['open_adj'] = df['open'] + adjust.cumsum()\n",
"df['close_adj'] = df['close'] + adjust.cumsum()\n",
"df['low_adj'] = df['low'] + adjust.cumsum()\n",
"df['high_adj'] = df['high'] + adjust.cumsum()\n",
"# 等比复权\n",
"adjust = df['close'].shift() / df['open']\n",
"adjust = np.where(df['symbol'] != df['symbol'].shift(), adjust, 1)\n",
"df['open_adj'] = df['open'] * adjust.cumprod()\n",
"df['close_adj'] = df['close'] * adjust.cumprod()\n",
"df['low_adj'] = df['low'] * adjust.cumprod()\n",
"df['high_adj'] = df['high'] * adjust.cumprod()\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"files = []\n",
"\n",
"for r, ds, fs in os.walk(root_path):\n",
" for f in fs:\n",
" # if f[0:4] == '2023':\n",
" abs_filepath = os.path.join(r, f)\n",
" files.append(abs_filepath)\n",
"files = sorted(files)\n",
"\n",
"df = pd.DataFrame()\n",
"for f in files:\n",
" df_temp = pd.read_csv(\n",
" f,\n",
" usecols=[1, 2, 3, 4, 8, 13, 14, 15, 16],\n",
" names=[\n",
" \"统一代码\",\n",
" \"合约代码\",\n",
" \"时间\",\n",
" \"最新\",\n",
" \"成交量\",\n",
" \"买一价\",\n",
" \"卖一价\",\n",
" \"买一量\",\n",
" \"卖一量\",\n",
" ],\n",
" skiprows=1,\n",
" encoding=\"utf-8\",\n",
" )\n",
" # df_temp = pd.read_csv(f, usecols=[0,5], names=[\n",
" # 'datetime', 'volume'])\n",
" df = pd.concat([df, df_temp])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# files = []\n",
"\n",
"# for r, ds, fs in os.walk(root_path):\n",
"# for f in fs:\n",
"# # if f[0:4] == '2023':\n",
"# abs_filepath = os.path.join(r, f)\n",
"# files.append(abs_filepath)\n",
"# files = sorted(files)\n",
"\n",
"# df = pd.DataFrame()\n",
"# for f in files:\n",
"# df_temp = pd.read_csv(\n",
"# f,\n",
"# usecols=[0, 1, 4, 11, 20, 21, 22, 23, 24, 25],\n",
"# names=[\n",
"# \"交易日\",\n",
"# \"合约代码\",\n",
"# \"最新价\",\n",
"# \"数量\",\n",
"# \"最后修改时间\",\n",
"# \"最后修改毫秒\",\n",
"# \"申买价一\",\n",
"# \"申买量一\",\n",
"# \"申卖价一\",\n",
"# \"申卖量一\",\n",
"# ],\n",
"# skiprows=1,\n",
"# encoding=\"gbk\",\n",
"# )\n",
"# # df_temp = pd.read_csv(f, usecols=[0,5], names=[\n",
"# # 'datetime', 'volume'])\n",
"# df = pd.concat([df, df_temp])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.info()\n",
"# 21754840"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 统一代码 合约代码 时间 最新 成交量 买一价 卖一价 \\\n",
"1305669 a888 a1905 2019-04-22 15:00:00.568 3309.0 0 3308.0 3311.0 \n",
"1305670 a888 a1905 2019-04-22 15:00:36.638 3309.0 0 3308.0 3311.0 \n",
"1305671 a888 a1909 2019-04-22 20:59:00.014 3412.0 224 3411.0 3412.0 \n",
"1305672 a888 a1909 2019-04-22 21:00:00.461 3412.0 108 3412.0 3413.0 \n",
"1305673 a888 a1909 2019-04-22 21:00:00.958 3411.0 150 3410.0 3411.0 \n",
"\n",
" 买一量 卖一量 \n",
"1305669 25 10 \n",
"1305670 25 10 \n",
"1305671 2 8 \n",
"1305672 10 19 \n",
"1305673 43 3 \n"
]
}
],
"source": [
"print(df.loc[1305669:1305673])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 等比复权\n",
"# df['复权因子'] = df['卖一价'].shift() / df['买一价']\n",
"df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() / df['买一价'], 1)\n",
"df['复权因子'] = df['复权因子'].fillna(1)\n",
"# df['复权因子'].loc[0] = 1\n",
"df['买一价_adj'] = df['买一价'] * df['复权因子'].cumprod()\n",
"df['卖一价_adj'] = df['卖一价'] * df['复权因子'].cumprod()\n",
"df['最新_adj'] = df['最新'] * df['复权因子'].cumprod()\n",
"# df['low_adj'] = df['low'] * adjust.cumprod()\n",
"# df['high_adj'] = df['high'] * adjust.cumprod()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# 等差复权\n",
"df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() - df['买一价'], 0)\n",
"df['复权因子'] = df['复权因子'].fillna(0)\n",
"# df['复权因子'].loc[0] = 1\n",
"df['买一价_adj'] = df['买一价'] + df['复权因子'].cumsum()\n",
"df['卖一价_adj'] = df['卖一价'] + df['复权因子'].cumsum()\n",
"df['最新_adj'] = df['最新'] + df['复权因子'].cumsum()\n",
"# df['low_adj'] = df['low'] + df['复权因子'].cumsum()\n",
"# df['high_adj'] = df['high'] + df['复权因子'].cumsum()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 统一代码 合约代码 时间 最新 成交量 买一价 卖一价 \\\n",
"1305669 a888 a1905 2019-04-22 15:00:00.568 3309.0 0 3308.0 3311.0 \n",
"1305670 a888 a1905 2019-04-22 15:00:36.638 3309.0 0 3308.0 3311.0 \n",
"1305671 a888 a1909 2019-04-22 20:59:00.014 3412.0 224 3411.0 3412.0 \n",
"1305672 a888 a1909 2019-04-22 21:00:00.461 3412.0 108 3412.0 3413.0 \n",
"1305673 a888 a1909 2019-04-22 21:00:00.958 3411.0 150 3410.0 3411.0 \n",
"\n",
" 买一量 卖一量 复权因子 买一价_adj 卖一价_adj 最新_adj \n",
"1305669 25 10 0.0 3308.0 3311.0 3309.0 \n",
"1305670 25 10 0.0 3308.0 3311.0 3309.0 \n",
"1305671 2 8 -100.0 3311.0 3312.0 3312.0 \n",
"1305672 10 19 0.0 3312.0 3313.0 3312.0 \n",
"1305673 43 3 0.0 3310.0 3311.0 3311.0 \n"
]
}
],
"source": [
"print(df.loc[1305669:1305673])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"df['买一价'] = df['买一价_adj']\n",
"df['卖一价'] = df['卖一价_adj']\n",
"df['最新'] = df['最新_adj']"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 统一代码 合约代码 时间 最新 成交量 买一价 卖一价 \\\n",
"1305669 a888 a1905 2019-04-22 15:00:00.568 3309.0 0 3308.0 3311.0 \n",
"1305670 a888 a1905 2019-04-22 15:00:36.638 3309.0 0 3308.0 3311.0 \n",
"1305671 a888 a1909 2019-04-22 20:59:00.014 3312.0 224 3311.0 3312.0 \n",
"1305672 a888 a1909 2019-04-22 21:00:00.461 3312.0 108 3312.0 3313.0 \n",
"1305673 a888 a1909 2019-04-22 21:00:00.958 3311.0 150 3310.0 3311.0 \n",
"\n",
" 买一量 卖一量 复权因子 买一价_adj 卖一价_adj 最新_adj \n",
"1305669 25 10 0.0 3308.0 3311.0 3309.0 \n",
"1305670 25 10 0.0 3308.0 3311.0 3309.0 \n",
"1305671 2 8 -100.0 3311.0 3312.0 3312.0 \n",
"1305672 10 19 0.0 3312.0 3313.0 3312.0 \n",
"1305673 43 3 0.0 3310.0 3311.0 3311.0 \n"
]
}
],
"source": [
"print(df.loc[1305669:1305673])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# df.drop('复权因子', axis=1)\n",
"# df.drop('买一价_adj', axis=1)\n",
"# df.drop('卖一价_adj', axis=1)\n",
"del df['复权因子']\n",
"del df['买一价_adj']\n",
"del df['卖一价_adj']\n",
"del df['最新_adj']"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 统一代码 合约代码 时间 最新 成交量 买一价 卖一价 \\\n",
"1305670 a888 a1905 2019-04-22 15:00:36.638 3309.0 0 3308.0 3311.0 \n",
"1305671 a888 a1909 2019-04-22 20:59:00.014 3312.0 224 3311.0 3312.0 \n",
"1305672 a888 a1909 2019-04-22 21:00:00.461 3312.0 108 3312.0 3313.0 \n",
"1305673 a888 a1909 2019-04-22 21:00:00.958 3311.0 150 3310.0 3311.0 \n",
"1305674 a888 a1909 2019-04-22 21:00:01.464 3312.0 86 3311.0 3312.0 \n",
"\n",
" 买一量 卖一量 \n",
"1305670 25 10 \n",
"1305671 2 8 \n",
"1305672 10 19 \n",
"1305673 43 3 \n",
"1305674 18 80 \n"
]
}
],
"source": [
"print(df.loc[1305670:1305674])"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(output_path, index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"drop_index1 = df.query('最后修改时间>\"15:00:00\" & 最后修改时间<\"21:00:00\"')[\n",
" \"最后修改时间\"\n",
"].index\n",
"# drop_index1 = df.query('最后修改时间>\"15:00:00\"')[\"最后修改时间\"].index\n",
"# drop_index2 = df.query('最后修改时间>\"01:00:00\" & 最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"# drop_index2 = df.query('最后修改时间>\"01:00:00\" & 最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"drop_index2 = df.query('最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"drop_index3 = df.query('最后修改时间>\"23:00:00\" & 最后修改时间<\"23:59:59\"')[\n",
" \"最后修改时间\"\n",
"].index\n",
"drop_index4 = df.query('最后修改时间>\"11:30:00\" & 最后修改时间<\"13:30:00\"')[\n",
" \"最后修改时间\"\n",
"].index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.drop(labels=drop_index1, axis=0, inplace=True)\n",
"df.drop(drop_index2, axis=0, inplace=True)\n",
"df.drop(drop_index3, axis=0, inplace=True)\n",
"df.drop(drop_index4, axis=0, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df[\"datetime\"] = pd.to_datetime(\n",
" pd.to_datetime(df[\"交易日\"].astype(str)).astype(str)\n",
" + \" \"\n",
" + df[\"最后修改时间\"].astype(str)\n",
" + \".\"\n",
" + df[\"最后修改毫秒\"].astype(str)\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.rename(\n",
" columns={\n",
" \"最新价\": \"lastprice\",\n",
" \"数量\": \"volume\",\n",
" \"申买价一\": \"bid_p\",\n",
" \"申买量一\": \"bid_v\",\n",
" \"申卖价一\": \"ask_p\",\n",
" \"申卖量一\": \"ask_v\",\n",
" \"合约代码\": \"symbol\",\n",
" },\n",
" inplace=True,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df[\"vol_diff\"] = df[\"volume\"].diff()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.loc[df[\"vol_diff\"].isnull(), \"vol_diff\"] = df.loc[df[\"vol_diff\"].isnull(), \"volume\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df[\"volume\"] = df[\"vol_diff\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(output_path)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "orderflow",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}