Files

959 lines
29 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"root_path = r\".\\tick\\rb\"\n",
"output_path = r\".\\data\\rb.csv\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"files = []\n",
"\n",
"for r, ds, fs in os.walk(root_path):\n",
" for f in fs:\n",
" # if f[0:4] == '2023':\n",
" abs_filepath = os.path.join(r, f)\n",
" files.append(abs_filepath)\n",
"files = sorted(files)\n",
"\n",
"df = pd.DataFrame()\n",
"for f in files:\n",
" df_temp = pd.read_csv(\n",
" f,\n",
" usecols=[0, 1, 4, 11, 20, 21, 22, 23, 24, 25],\n",
" names=[\n",
" \"交易日\",\n",
" \"合约代码\",\n",
" \"最新价\",\n",
" \"数量\",\n",
" \"最后修改时间\",\n",
" \"最后修改毫秒\",\n",
" \"申买价一\",\n",
" \"申买量一\",\n",
" \"申卖价一\",\n",
" \"申卖量一\",\n",
" ],\n",
" skiprows=1,\n",
" encoding=\"gbk\",\n",
" )\n",
" # df_temp = pd.read_csv(f, usecols=[0,5], names=[\n",
" # 'datetime', 'volume'])\n",
" df = pd.concat([df, df_temp])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>交易日</th>\n",
" <th>合约代码</th>\n",
" <th>最新价</th>\n",
" <th>数量</th>\n",
" <th>最后修改时间</th>\n",
" <th>最后修改毫秒</th>\n",
" <th>申买价一</th>\n",
" <th>申买量一</th>\n",
" <th>申卖价一</th>\n",
" <th>申卖量一</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>41323</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4003.0</td>\n",
" <td>1201905</td>\n",
" <td>14:59:59</td>\n",
" <td>0</td>\n",
" <td>4002.0</td>\n",
" <td>247</td>\n",
" <td>4003.0</td>\n",
" <td>116</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41324</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4003.0</td>\n",
" <td>1202028</td>\n",
" <td>14:59:59</td>\n",
" <td>500</td>\n",
" <td>4002.0</td>\n",
" <td>224</td>\n",
" <td>4003.0</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41325</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4002.0</td>\n",
" <td>1202060</td>\n",
" <td>15:00:00</td>\n",
" <td>0</td>\n",
" <td>4003.0</td>\n",
" <td>23</td>\n",
" <td>4004.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41326</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4002.0</td>\n",
" <td>1202060</td>\n",
" <td>15:00:00</td>\n",
" <td>500</td>\n",
" <td>4003.0</td>\n",
" <td>23</td>\n",
" <td>4004.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41327</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4002.0</td>\n",
" <td>1202060</td>\n",
" <td>15:17:29</td>\n",
" <td>500</td>\n",
" <td>4003.0</td>\n",
" <td>23</td>\n",
" <td>4004.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 交易日 合约代码 最新价 数量 最后修改时间 最后修改毫秒 申买价一 申买量一 \\\n",
"41323 20231229 rb2405 4003.0 1201905 14:59:59 0 4002.0 247 \n",
"41324 20231229 rb2405 4003.0 1202028 14:59:59 500 4002.0 224 \n",
"41325 20231229 rb2405 4002.0 1202060 15:00:00 0 4003.0 23 \n",
"41326 20231229 rb2405 4002.0 1202060 15:00:00 500 4003.0 23 \n",
"41327 20231229 rb2405 4002.0 1202060 15:17:29 500 4003.0 23 \n",
"\n",
" 申卖价一 申卖量一 \n",
"41323 4003.0 116 \n",
"41324 4003.0 16 \n",
"41325 4004.0 7 \n",
"41326 4004.0 7 \n",
"41327 4004.0 7 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>交易日</th>\n",
" <th>合约代码</th>\n",
" <th>最新价</th>\n",
" <th>数量</th>\n",
" <th>最后修改时间</th>\n",
" <th>最后修改毫秒</th>\n",
" <th>申买价一</th>\n",
" <th>申买量一</th>\n",
" <th>申卖价一</th>\n",
" <th>申卖量一</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4302.0</td>\n",
" <td>4643</td>\n",
" <td>08:59:00</td>\n",
" <td>500</td>\n",
" <td>4302.0</td>\n",
" <td>115</td>\n",
" <td>4305.0</td>\n",
" <td>96</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4305.0</td>\n",
" <td>5750</td>\n",
" <td>09:00:00</td>\n",
" <td>500</td>\n",
" <td>4305.0</td>\n",
" <td>359</td>\n",
" <td>4310.0</td>\n",
" <td>36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4306.0</td>\n",
" <td>8039</td>\n",
" <td>09:00:01</td>\n",
" <td>0</td>\n",
" <td>4306.0</td>\n",
" <td>18</td>\n",
" <td>4308.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4308.0</td>\n",
" <td>9065</td>\n",
" <td>09:00:01</td>\n",
" <td>500</td>\n",
" <td>4308.0</td>\n",
" <td>43</td>\n",
" <td>4310.0</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4310.0</td>\n",
" <td>9682</td>\n",
" <td>09:00:02</td>\n",
" <td>0</td>\n",
" <td>4311.0</td>\n",
" <td>4</td>\n",
" <td>4314.0</td>\n",
" <td>19</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 交易日 合约代码 最新价 数量 最后修改时间 最后修改毫秒 申买价一 申买量一 申卖价一 \\\n",
"0 20220104 rb2205 4302.0 4643 08:59:00 500 4302.0 115 4305.0 \n",
"1 20220104 rb2205 4305.0 5750 09:00:00 500 4305.0 359 4310.0 \n",
"2 20220104 rb2205 4306.0 8039 09:00:01 0 4306.0 18 4308.0 \n",
"3 20220104 rb2205 4308.0 9065 09:00:01 500 4308.0 43 4310.0 \n",
"4 20220104 rb2205 4310.0 9682 09:00:02 0 4311.0 4 4314.0 \n",
"\n",
" 申卖量一 \n",
"0 96 \n",
"1 36 \n",
"2 7 \n",
"3 74 \n",
"4 19 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"df.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 19813536 entries, 0 to 19813535\n",
"Data columns (total 10 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 交易日 int64 \n",
" 1 合约代码 object \n",
" 2 最新价 float64\n",
" 3 数量 int64 \n",
" 4 最后修改时间 object \n",
" 5 最后修改毫秒 int64 \n",
" 6 申买价一 float64\n",
" 7 申买量一 int64 \n",
" 8 申卖价一 float64\n",
" 9 申卖量一 int64 \n",
"dtypes: float64(3), int64(5), object(2)\n",
"memory usage: 1.5+ GB\n"
]
}
],
"source": [
"df.info()\n",
"# 21754840"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"drop_index1 = df.query('最后修改时间>\"15:00:00\" & 最后修改时间<\"21:00:00\"')[\n",
" \"最后修改时间\"\n",
"].index\n",
"# drop_index1 = df.query('最后修改时间>\"15:00:00\"')[\"最后修改时间\"].index\n",
"# drop_index2 = df.query('最后修改时间>\"01:00:00\" & 最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"# drop_index2 = df.query('最后修改时间>\"01:00:00\" & 最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"drop_index2 = df.query('最后修改时间<\"09:00:00\"')[\"最后修改时间\"].index\n",
"drop_index3 = df.query('最后修改时间>\"23:00:00\" & 最后修改时间<\"23:59:59\"')[\n",
" \"最后修改时间\"\n",
"].index\n",
"drop_index4 = df.query('最后修改时间>\"11:30:00\" & 最后修改时间<\"13:30:00\"')[\n",
" \"最后修改时间\"\n",
"].index"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"df.drop(labels=drop_index1, axis=0, inplace=True)\n",
"df.drop(drop_index2, axis=0, inplace=True)\n",
"df.drop(drop_index3, axis=0, inplace=True)\n",
"df.drop(drop_index4, axis=0, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>交易日</th>\n",
" <th>合约代码</th>\n",
" <th>最新价</th>\n",
" <th>数量</th>\n",
" <th>最后修改时间</th>\n",
" <th>最后修改毫秒</th>\n",
" <th>申买价一</th>\n",
" <th>申买量一</th>\n",
" <th>申卖价一</th>\n",
" <th>申卖量一</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>19813530</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4003.0</td>\n",
" <td>1201836</td>\n",
" <td>14:59:58</td>\n",
" <td>500</td>\n",
" <td>4002.0</td>\n",
" <td>288</td>\n",
" <td>4003.0</td>\n",
" <td>140</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19813531</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4003.0</td>\n",
" <td>1201905</td>\n",
" <td>14:59:59</td>\n",
" <td>0</td>\n",
" <td>4002.0</td>\n",
" <td>247</td>\n",
" <td>4003.0</td>\n",
" <td>116</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19813532</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4003.0</td>\n",
" <td>1202028</td>\n",
" <td>14:59:59</td>\n",
" <td>500</td>\n",
" <td>4002.0</td>\n",
" <td>224</td>\n",
" <td>4003.0</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19813533</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4002.0</td>\n",
" <td>1202060</td>\n",
" <td>15:00:00</td>\n",
" <td>0</td>\n",
" <td>4003.0</td>\n",
" <td>23</td>\n",
" <td>4004.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19813534</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4002.0</td>\n",
" <td>1202060</td>\n",
" <td>15:00:00</td>\n",
" <td>500</td>\n",
" <td>4003.0</td>\n",
" <td>23</td>\n",
" <td>4004.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 交易日 合约代码 最新价 数量 最后修改时间 最后修改毫秒 申买价一 申买量一 \\\n",
"19813530 20231229 rb2405 4003.0 1201836 14:59:58 500 4002.0 288 \n",
"19813531 20231229 rb2405 4003.0 1201905 14:59:59 0 4002.0 247 \n",
"19813532 20231229 rb2405 4003.0 1202028 14:59:59 500 4002.0 224 \n",
"19813533 20231229 rb2405 4002.0 1202060 15:00:00 0 4003.0 23 \n",
"19813534 20231229 rb2405 4002.0 1202060 15:00:00 500 4003.0 23 \n",
"\n",
" 申卖价一 申卖量一 \n",
"19813530 4003.0 140 \n",
"19813531 4003.0 116 \n",
"19813532 4003.0 16 \n",
"19813533 4004.0 7 \n",
"19813534 4004.0 7 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 19812430 entries, 1 to 19813534\n",
"Data columns (total 10 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 交易日 int64 \n",
" 1 合约代码 object \n",
" 2 最新价 float64\n",
" 3 数量 int64 \n",
" 4 最后修改时间 object \n",
" 5 最后修改毫秒 int64 \n",
" 6 申买价一 float64\n",
" 7 申买量一 int64 \n",
" 8 申卖价一 float64\n",
" 9 申卖量一 int64 \n",
"dtypes: float64(3), int64(5), object(2)\n",
"memory usage: 1.6+ GB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"df.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"df[\"datetime\"] = pd.to_datetime(\n",
" pd.to_datetime(df[\"交易日\"].astype(str)).astype(str)\n",
" + \" \"\n",
" + df[\"最后修改时间\"].astype(str)\n",
" + \".\"\n",
" + df[\"最后修改毫秒\"].astype(str)\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>交易日</th>\n",
" <th>合约代码</th>\n",
" <th>最新价</th>\n",
" <th>数量</th>\n",
" <th>最后修改时间</th>\n",
" <th>最后修改毫秒</th>\n",
" <th>申买价一</th>\n",
" <th>申买量一</th>\n",
" <th>申卖价一</th>\n",
" <th>申卖量一</th>\n",
" <th>datetime</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>19812425</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4003.0</td>\n",
" <td>1201836</td>\n",
" <td>14:59:58</td>\n",
" <td>500</td>\n",
" <td>4002.0</td>\n",
" <td>288</td>\n",
" <td>4003.0</td>\n",
" <td>140</td>\n",
" <td>2023-12-29 14:59:58.500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19812426</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4003.0</td>\n",
" <td>1201905</td>\n",
" <td>14:59:59</td>\n",
" <td>0</td>\n",
" <td>4002.0</td>\n",
" <td>247</td>\n",
" <td>4003.0</td>\n",
" <td>116</td>\n",
" <td>2023-12-29 14:59:59.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19812427</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4003.0</td>\n",
" <td>1202028</td>\n",
" <td>14:59:59</td>\n",
" <td>500</td>\n",
" <td>4002.0</td>\n",
" <td>224</td>\n",
" <td>4003.0</td>\n",
" <td>16</td>\n",
" <td>2023-12-29 14:59:59.500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19812428</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4002.0</td>\n",
" <td>1202060</td>\n",
" <td>15:00:00</td>\n",
" <td>0</td>\n",
" <td>4003.0</td>\n",
" <td>23</td>\n",
" <td>4004.0</td>\n",
" <td>7</td>\n",
" <td>2023-12-29 15:00:00.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19812429</th>\n",
" <td>20231229</td>\n",
" <td>rb2405</td>\n",
" <td>4002.0</td>\n",
" <td>1202060</td>\n",
" <td>15:00:00</td>\n",
" <td>500</td>\n",
" <td>4003.0</td>\n",
" <td>23</td>\n",
" <td>4004.0</td>\n",
" <td>7</td>\n",
" <td>2023-12-29 15:00:00.500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 交易日 合约代码 最新价 数量 最后修改时间 最后修改毫秒 申买价一 申买量一 \\\n",
"19812425 20231229 rb2405 4003.0 1201836 14:59:58 500 4002.0 288 \n",
"19812426 20231229 rb2405 4003.0 1201905 14:59:59 0 4002.0 247 \n",
"19812427 20231229 rb2405 4003.0 1202028 14:59:59 500 4002.0 224 \n",
"19812428 20231229 rb2405 4002.0 1202060 15:00:00 0 4003.0 23 \n",
"19812429 20231229 rb2405 4002.0 1202060 15:00:00 500 4003.0 23 \n",
"\n",
" 申卖价一 申卖量一 datetime \n",
"19812425 4003.0 140 2023-12-29 14:59:58.500 \n",
"19812426 4003.0 116 2023-12-29 14:59:59.000 \n",
"19812427 4003.0 16 2023-12-29 14:59:59.500 \n",
"19812428 4004.0 7 2023-12-29 15:00:00.000 \n",
"19812429 4004.0 7 2023-12-29 15:00:00.500 "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"df.rename(\n",
" columns={\n",
" \"最新价\": \"lastprice\",\n",
" \"数量\": \"volume\",\n",
" \"申买价一\": \"bid_p\",\n",
" \"申买量一\": \"bid_v\",\n",
" \"申卖价一\": \"ask_p\",\n",
" \"申卖量一\": \"ask_v\",\n",
" \"合约代码\": \"symbol\",\n",
" },\n",
" inplace=True,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"df[\"vol_diff\"] = df[\"volume\"].diff()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>交易日</th>\n",
" <th>symbol</th>\n",
" <th>lastprice</th>\n",
" <th>volume</th>\n",
" <th>最后修改时间</th>\n",
" <th>最后修改毫秒</th>\n",
" <th>bid_p</th>\n",
" <th>bid_v</th>\n",
" <th>ask_p</th>\n",
" <th>ask_v</th>\n",
" <th>datetime</th>\n",
" <th>vol_diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4305.0</td>\n",
" <td>5750</td>\n",
" <td>09:00:00</td>\n",
" <td>500</td>\n",
" <td>4305.0</td>\n",
" <td>359</td>\n",
" <td>4310.0</td>\n",
" <td>36</td>\n",
" <td>2022-01-04 09:00:00.500</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4306.0</td>\n",
" <td>8039</td>\n",
" <td>09:00:01</td>\n",
" <td>0</td>\n",
" <td>4306.0</td>\n",
" <td>18</td>\n",
" <td>4308.0</td>\n",
" <td>7</td>\n",
" <td>2022-01-04 09:00:01.000</td>\n",
" <td>2289.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4308.0</td>\n",
" <td>9065</td>\n",
" <td>09:00:01</td>\n",
" <td>500</td>\n",
" <td>4308.0</td>\n",
" <td>43</td>\n",
" <td>4310.0</td>\n",
" <td>74</td>\n",
" <td>2022-01-04 09:00:01.500</td>\n",
" <td>1026.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4310.0</td>\n",
" <td>9682</td>\n",
" <td>09:00:02</td>\n",
" <td>0</td>\n",
" <td>4311.0</td>\n",
" <td>4</td>\n",
" <td>4314.0</td>\n",
" <td>19</td>\n",
" <td>2022-01-04 09:00:02.000</td>\n",
" <td>617.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20220104</td>\n",
" <td>rb2205</td>\n",
" <td>4314.0</td>\n",
" <td>10328</td>\n",
" <td>09:00:02</td>\n",
" <td>500</td>\n",
" <td>4314.0</td>\n",
" <td>137</td>\n",
" <td>4316.0</td>\n",
" <td>19</td>\n",
" <td>2022-01-04 09:00:02.500</td>\n",
" <td>646.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 交易日 symbol lastprice volume 最后修改时间 最后修改毫秒 bid_p bid_v \\\n",
"0 20220104 rb2205 4305.0 5750 09:00:00 500 4305.0 359 \n",
"1 20220104 rb2205 4306.0 8039 09:00:01 0 4306.0 18 \n",
"2 20220104 rb2205 4308.0 9065 09:00:01 500 4308.0 43 \n",
"3 20220104 rb2205 4310.0 9682 09:00:02 0 4311.0 4 \n",
"4 20220104 rb2205 4314.0 10328 09:00:02 500 4314.0 137 \n",
"\n",
" ask_p ask_v datetime vol_diff \n",
"0 4310.0 36 2022-01-04 09:00:00.500 NaN \n",
"1 4308.0 7 2022-01-04 09:00:01.000 2289.0 \n",
"2 4310.0 74 2022-01-04 09:00:01.500 1026.0 \n",
"3 4314.0 19 2022-01-04 09:00:02.000 617.0 \n",
"4 4316.0 19 2022-01-04 09:00:02.500 646.0 "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"df.loc[df[\"vol_diff\"].isnull(), \"vol_diff\"] = df.loc[df[\"vol_diff\"].isnull(), \"volume\"]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"df[\"volume\"] = df[\"vol_diff\"]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(output_path)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "orderflow",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.17"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}