Files

223 lines
5.9 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"os.chdir('E:/data/ag')\n",
"all_csv_files = [file for file in os.listdir('.') if file.endswith('.csv')]\n",
"all_csv_files = sorted(all_csv_files)\n",
"print(\"文件中所有CSV文件:\",all_csv_files)\n",
"\n",
"sp_chars = ['_2023','_2022']\n",
"sp_chars = sorted(sp_chars)\n",
"print(\"需要筛选的文件名关键字:\",sp_chars)\n",
"\n",
"csv_files = [file for file in all_csv_files if any(sp_char in file for sp_char in sp_chars)]\n",
"print(\"筛选结果后的CSV文件:\",csv_files)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame()\n",
"for f in csv_files:\n",
" df_temp = pd.read_csv(\n",
" f,\n",
" usecols=[0, 1, 2, 5, 12, 21, 22, 23, 24, 25, 26, 44],\n",
" names=[\n",
" \"交易日\",\n",
" \"统一代码\",\n",
" \"合约代码\",\n",
" \"最新价\",\n",
" \"数量\",\n",
" \"最后修改时间\",\n",
" \"最后修改毫秒\",\n",
" \"申买价一\",\n",
" \"申买量一\",\n",
" \"申卖价一\",\n",
" \"申卖量一\",\n",
" \"业务日期\",\n",
" ],\n",
" skiprows=1,\n",
" encoding=\"utf-8\",\n",
" )\n",
" # df_temp = pd.read_csv(f, usecols=[0,5], names=[\n",
" # 'datetime', 'volume'])\n",
" df = pd.concat([df, df_temp])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 查看数据的头部和尾部head()、tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查看dataframe的基本情况\n",
"df.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 等比复权,先不考虑\n",
"# df['复权因子'] = df['卖一价'].shift() / df['买一价']\n",
"df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['卖一价'].shift() / df['买一价'], 1)\n",
"df['复权因子'] = df['复权因子'].fillna(1)\n",
"# df['复权因子'].loc[0] = 1\n",
"df['买一价_adj'] = df['买一价'] * df['复权因子'].cumprod()\n",
"df['卖一价_adj'] = df['卖一价'] * df['复权因子'].cumprod()\n",
"df['最新_adj'] = df['最新'] * df['复权因子'].cumprod()\n",
"# df['low_adj'] = df['low'] * adjust.cumprod()\n",
"# df['high_adj'] = df['high'] * adjust.cumprod()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 等差复权\n",
"df['复权因子'] = np.where(df['合约代码'] != df['合约代码'].shift(), df['申卖价一'].shift() - df['申买价一'], 0)\n",
"df['复权因子'] = df['复权因子'].fillna(0)\n",
"# df['复权因子'].loc[0] = 1\n",
"df['申买价一_adj'] = df['申买价一'] + df['复权因子'].cumsum()\n",
"df['申卖价一_adj'] = df['申卖价一'] + df['复权因子'].cumsum()\n",
"df['最新价_adj'] = df['最新价'] + df['复权因子'].cumsum()\n",
"# df['low_adj'] = df['low'] + df['复权因子'].cumsum()\n",
"# df['high_adj'] = df['high'] + df['复权因子'].cumsum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查找换期需要复权的索引\n",
"non_zero_indices = df[df['复权因子'] != 0].index\n",
"print(non_zero_indices)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查看未调整买价、卖价和最新价的数据\n",
"df.loc[non_zero_indices[0]-5:non_zero_indices[0]+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 将调整后的数值替换原来的值\n",
"df['申买价一'] = df['申买价一_adj']\n",
"df['申卖价一'] = df['申卖价一_adj']\n",
"df['最新价'] = df['最新价_adj']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 查看调整买价、卖价和最新价的数据\n",
"df.loc[non_zero_indices[0]-5:non_zero_indices[0]+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 删除多余的值\n",
"del df['复权因子']\n",
"del df['申买价一_adj']\n",
"del df['申卖价一_adj']\n",
"del df['最新价_adj']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.loc[non_zero_indices[0]-5:non_zero_indices[0]+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv('./ag888_2022_2023.csv', index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "orderflow",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}