429 lines
10 KiB
Plaintext
429 lines
10 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import os"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"root_path = r\"E:/data/ag\"\n",
|
|
"output_path = r\"E:/data/ag/ag888.csv\"\n",
|
|
"# df_tmp = pd.read_csv('E:/data/rb/rb888_2023.csv',encoding=\"utf-8\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"files = []\n",
|
|
"\n",
|
|
"for r, ds, fs in os.walk(root_path):\n",
|
|
" for f in fs:\n",
|
|
" # if f[0:4] == '2023':\n",
|
|
" abs_filepath = os.path.join(r, f)\n",
|
|
" files.append(abs_filepath)\n",
|
|
"files = sorted(files)\n",
|
|
"\n",
|
|
"df = pd.DataFrame()\n",
|
|
"for f in files:\n",
|
|
" df_temp = pd.read_csv(\n",
|
|
" f,\n",
|
|
" usecols=[0, 1, 2, 5, 12, 21, 22, 23, 24, 25, 26, 44],\n",
|
|
" names=[\n",
|
|
" \"交易日\",\n",
|
|
" \"统一代码\",\n",
|
|
" \"合约代码\",\n",
|
|
" \"最新价\",\n",
|
|
" \"数量\",\n",
|
|
" \"最后修改时间\",\n",
|
|
" \"最后修改毫秒\",\n",
|
|
" \"申买价一\",\n",
|
|
" \"申买量一\",\n",
|
|
" \"申卖价一\",\n",
|
|
" \"申卖量一\",\n",
|
|
" \"业务日期\",\n",
|
|
" ],\n",
|
|
" skiprows=1,\n",
|
|
" encoding=\"utf-8\",\n",
|
|
" )\n",
|
|
" # df_temp = pd.read_csv(f, usecols=[0,5], names=[\n",
|
|
" # 'datetime', 'volume'])\n",
|
|
" df = pd.concat([df, df_temp])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.tail()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#df_tmp = pd.read_csv('E:/data/rb/rb888_2023.csv',encoding=\"utf-8\")\n",
|
|
"#df_tmp.tail()\n",
|
|
"#df_tmp.tail().to_csv(\"E:/data/rb/rb_tail.csv\",index= False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.reset_index(drop=True, inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.info()\n",
|
|
"# 21754840"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.loc[2493107:2493111]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# 等比复权,先不考虑\n",
|
|
"# df['复权因子'] = df['卖一价'].shift() / df['买一价']\n",
|
|
"df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() / df['买一价'], 1)\n",
|
|
"df['复权因子'] = df['复权因子'].fillna(1)\n",
|
|
"# df['复权因子'].loc[0] = 1\n",
|
|
"df['买一价_adj'] = df['买一价'] * df['复权因子'].cumprod()\n",
|
|
"df['卖一价_adj'] = df['卖一价'] * df['复权因子'].cumprod()\n",
|
|
"df['最新_adj'] = df['最新'] * df['复权因子'].cumprod()\n",
|
|
"# df['low_adj'] = df['low'] * adjust.cumprod()\n",
|
|
"# df['high_adj'] = df['high'] * adjust.cumprod()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# 等差复权\n",
|
|
"df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['申卖价一'].shift() - df['申买价一'], 0)\n",
|
|
"df['复权因子'] = df['复权因子'].fillna(0)\n",
|
|
"# df['复权因子'].loc[0] = 1\n",
|
|
"df['申买价一_adj'] = df['申买价一'] + df['复权因子'].cumsum()\n",
|
|
"df['申卖价一_adj'] = df['申卖价一'] + df['复权因子'].cumsum()\n",
|
|
"df['最新价_adj'] = df['最新价'] + df['复权因子'].cumsum()\n",
|
|
"# df['low_adj'] = df['low'] + df['复权因子'].cumsum()\n",
|
|
"# df['high_adj'] = df['high'] + df['复权因子'].cumsum()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.loc[391880:391890]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df['申买价一'] = df['申买价一_adj']\n",
|
|
"df['申卖价一'] = df['申卖价一_adj']\n",
|
|
"df['最新价'] = df['最新价_adj']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.loc[391880:391890]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"non_zero_indices = df[df['复权因子'] != 0].index"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(non_zero_indices)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# df.drop('复权因子', axis=1)\n",
|
|
"# df.drop('买一价_adj', axis=1)\n",
|
|
"# df.drop('卖一价_adj', axis=1)\n",
|
|
"del df['复权因子']\n",
|
|
"del df['申买价一_adj']\n",
|
|
"del df['申卖价一_adj']\n",
|
|
"del df['最新价_adj']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.loc[391880:391890]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.to_csv(output_path, index=False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.head().to_csv(\"E:/data/rb/rb_ch_temp.csv\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"drop_index1 = df.query('最后修改时间>\"15:00:00\" & 最后修改时间<\"21:00:00\"')[\n",
|
|
" \"最后修改时间\"\n",
|
|
"].index\n",
|
|
"# drop_index1 = df.query('最后修改时间>\"15:00:00\"')[\"最后修改时间\"].index\n",
|
|
"# drop_index2 = df.query('最后修改时间>\"01:00:00\" & 最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
|
|
"# drop_index2 = df.query('最后修改时间>\"01:00:00\" & 最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
|
|
"drop_index2 = df.query('最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
|
|
"drop_index3 = df.query('最后修改时间>\"23:00:00\" & 最后修改时间<\"23:59:59\"')[\n",
|
|
" \"最后修改时间\"\n",
|
|
"].index\n",
|
|
"drop_index4 = df.query('最后修改时间>\"11:30:00\" & 最后修改时间<\"13:30:00\"')[\n",
|
|
" \"最后修改时间\"\n",
|
|
"].index"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.drop(labels=drop_index1, axis=0, inplace=True)\n",
|
|
"df.drop(drop_index2, axis=0, inplace=True)\n",
|
|
"df.drop(drop_index3, axis=0, inplace=True)\n",
|
|
"df.drop(drop_index4, axis=0, inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.tail()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.reset_index(drop=True, inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df[\"datetime\"] = pd.to_datetime(\n",
|
|
" pd.to_datetime(df[\"交易日\"].astype(str)).astype(str)\n",
|
|
" + \" \"\n",
|
|
" + df[\"最后修改时间\"].astype(str)\n",
|
|
" + \".\"\n",
|
|
" + df[\"最后修改毫秒\"].astype(str)\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.tail()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.rename(\n",
|
|
" columns={\n",
|
|
" \"最新价\": \"lastprice\",\n",
|
|
" \"数量\": \"volume\",\n",
|
|
" \"申买价一\": \"bid_p\",\n",
|
|
" \"申买量一\": \"bid_v\",\n",
|
|
" \"申卖价一\": \"ask_p\",\n",
|
|
" \"申卖量一\": \"ask_v\",\n",
|
|
" \"合约代码\": \"symbol\",\n",
|
|
" },\n",
|
|
" inplace=True,\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df[\"vol_diff\"] = df[\"volume\"].diff()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.loc[df[\"vol_diff\"].isnull(), \"vol_diff\"] = df.loc[df[\"vol_diff\"].isnull(), \"volume\"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df[\"volume\"] = df[\"vol_diff\"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.to_csv(output_path)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "orderflow",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.9"
|
|
},
|
|
"orig_nbformat": 4
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|