{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "map_file = r\"D:\\data\\mapping_ts_code_IH.csv\" #主力合约统计表\n", "file_path = str(\"F:/2022_tickdata/marketdatacsv\") #csv文件绝对地址前缀\n", "\n", "header_file = r\"D:\\data\\fut_marketdata_head.csv\" # 包含表头的 CSV 文件名\n", "# data_file = r\"D:\\combined_market_data.csv\" # 包含数据的 CSV 文件名\n", "output_file = r\"D:\\IH888_up_2022.csv\" # 合并后的输出文件名\n", "total_code = 'IH888'\n", "\n", "sp_chars = ['csv2022'] #'csv2021', 'csv2022',需要查找的主力年份文件" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(map_file, index_col=0, encoding='utf', low_memory=False)\n", "df['mapping_ts_code_new'] = df['mapping_ts_code'].apply(lambda x: x.split('.')[0])\n", "df['temp_path']= file_path\n", "df['final_path'] = df['temp_path'].astype(str) + df['trade_date'].astype(str) + '/' + df['mapping_ts_code_new'] + '.csv'\n", "del df['mapping_ts_code_new'], df['temp_path']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.tail()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "import time as s_time\n", "import datetime\n", "import pandas as pd\n", "for sp_char in sp_chars:\n", " csv_files = [sp_file for sp_file in df['final_path'] if sp_char in sp_file]\n", " print(csv_files[:5])\n", " print(csv_files[-5:])\n", " dfs = pd.DataFrame()\n", " for file_path in csv_files:\n", " df_temp = pd.read_csv(file_path) \n", " print('读取%s成功'%(file_path))\n", " # df_temp.columns = ['交易日','合约代码','最后修改时间','最后修改毫秒','最新价','数量','申买价一','申买量一','申卖价一','申卖量一','当日均价','成交金额','持仓量','涨停价','跌停价']\n", " # df_temp['datetime'] = df_temp['交易日'].astype(str) + ' '+df_temp['最后修改时间'].astype(str) + '.' + df_temp['最后修改毫秒'].astype(str)\n", " # df_temp['datetime'] = pd.to_datetime(df_temp['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')\n", " # df_temp['tmp_time'] = df_temp['datetime'].dt.strftime('%H:%M:%S.%f')\n", " # df_temp['time'] = df_temp['tmp_time'].apply(lambda x: datetime.strptime(x, '%H:%M:%S.%f')).dt.time\n", " # drop_index1 = df_temp.loc[(df_temp['time'] > s_time(11, 30, 0)) & (df_temp['time'] < s_time(13, 0, 0))].index\n", " # drop_index2 = df_temp.loc[(df_temp['time'] > s_time(15, 0, 0)) | (df_temp['time'] < s_time(9, 30, 0))].index\n", " # df_temp.drop(drop_index1, axis=0, inplace=True)\n", " # df_temp.drop(drop_index2, axis=0, inplace=True)\n", " # dfs.append(df_temp)\n", " # df_temp.columns=['交易日','合约代码','最后修改时间','最后修改毫秒','最新价','数量','申买价一','申买量一','申卖价一','申卖量一','当日均价','成交金额','持仓量','涨停价','跌停价']\n", " df_temp.columns = ['交易日','合约代码','最后修改时间','最后修改毫秒','最新价','数量','申买价一','申买量一','申卖价一','申卖量一','当日均价','成交金额','持仓量','涨停价','跌停价']\n", " # print(df_temp.tail())\n", " # # print(\"表头添加成功!\")\n", " # dfs = pd.concat([dfs, df_temp],ignore_index=True, axis= 0)# \n", " # print(dfs.tail())\n", " # dfs = pd.concat([df_temp, ignore_index=True)\n", " dfs = pd.concat([dfs, df_temp], ignore_index=True)\n", " \n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dfs.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dfs.tail()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "combined_df = dfs.sort_values(by = ['交易日', '最后修改时间', '最后修改毫秒'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "combined_df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "combined_df.tail()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "combined_df.insert(0,'统一代码', total_code)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "combined_df.tail()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "combined_df.to_csv(output_file, index=False)\n", "print(\"合并完成,并已导出到%s文件。\"%(output_file))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 以下为其他代码\n", "import pandas as pd\n", " \n", "try:\n", " file_path = 'path/to/your/file.csv' # 替换为你的文件路径\n", " df = pd.read_csv(file_path)\n", "except FileNotFoundError:\n", " print(f\"无法找到文件:{file_path}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "for k in ['2021']:# , '2023'\n", " for v in [ 'IH', 'IF', 'IC', 'IM', 'T', 'TF', 'TL', 'TS']: \n", " print('当前年份为:%s,品种为:%s'%(k,v))\n", " map_file = 'D:/data/mapping_ts_code_%s.csv'%(v) #v\n", " file_path = 'F:/%s_tickdata/marketdatacsv'%(k) #csv文件绝对地址前缀\n", " output_file = 'D:/%s888_up_%s.csv'%(v,k) # 合并后的输出文件名\n", " total_code = '%s888'%(v)\n", " sp_chars = ['csv%s'%(k)] #'csv2021', 'csv2022',需要查找的主力年份文件\n", "\n", " try:\n", " df = pd.read_csv(map_file, index_col=0, encoding='utf', low_memory=False)\n", " except FileNotFoundError:\n", " raise ValueError(\"主力合约统计表文件不存在,请检查文件路径是否正确。\")\n", " df['mapping_ts_code_new'] = df['mapping_ts_code'].apply(lambda x: x.split('.')[0])\n", " df['temp_path']= file_path\n", " df['final_path'] = df['temp_path'].astype(str) + df['trade_date'].astype(str) + '/' + df['mapping_ts_code_new'] + '.csv'\n", " del df['mapping_ts_code_new'], df['temp_path']\n", "\n", " for sp_char in sp_chars:\n", " csv_files = [sp_file for sp_file in df['final_path'] if sp_char in sp_file]\n", " if csv_files:\n", " print(csv_files[:5])\n", " print(csv_files[-5:])\n", " dfs = pd.DataFrame()\n", " for path in csv_files:\n", " try:\n", " df_temp = pd.read_csv(path) \n", " # print('读取%s成功'%(path))\n", " except FileNotFoundError:\n", " raise ValueError(\"%s文件不存在,请检查文件路径是否正确。\"%(path))\n", " break\n", " df_temp.columns = ['交易日','合约代码','最后修改时间','最后修改毫秒','最新价','数量','申买价一','申买量一','申卖价一','申卖量一','当日均价','成交金额','持仓量','涨停价','跌停价']\n", " dfs = pd.concat([dfs, df_temp], ignore_index=True)\n", " combined_df = dfs.sort_values(by = ['交易日', '最后修改时间', '最后修改毫秒'])\n", " combined_df.insert(0,'统一代码', total_code)\n", " combined_df.to_csv(output_file, index=False)\n", " print(\"合并完成,并已导出到%s文件。\"%(output_file))\n", " else:\n", " print('品种%s在%s年无数据!'%(v,k))\n", " continue\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" } }, "nbformat": 4, "nbformat_minor": 2 }