Files
Quant_Code/2.数据下载与处理/数据转换最终版/merged_by_year_BIT_20240522.ipynb
Win_home f925dff46b Enhance trading workflow with new order flow management
- Added dingdanliu_nb_mflow for improved order processing
- Updated related scripts and configurations to support new functionality
2025-03-15 22:45:08 +08:00

445 lines
38 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "2d85dda4",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"from merged_tickdata__BIT_20240522 import merged_old_tickdata, merged_new_tickdata, merged_new_unprocessed_tickdata,merged_old_unprocessed_tickdata, reinstatement_tickdata"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "fe51b707",
"metadata": {},
"outputs": [],
"source": [
"# 相关文件保存路径,需要修改:csv_directory为需要处理的文件原始路径out_up_path为csv_directory进行了按年份合并的文件保存路径\n",
"# out_up_path为按年份合并后处理了重复数据、清除了交易时间外数据和统一表头了的数据out_rs_path为out_path文件进行了复权处理后的数据\n",
"csv_directory = str(\"D:/tmp\") \n",
"out_up_path = str('D:/data_transfer/data_up_merged/BIT')\n",
"out_path = str('D:/data_transfer/data_merged/BIT')\n",
"out_rs_path = str('D:/data_transfer/data_rs_merged/BIT')\n",
"# 需要处理的年份数据csv数据中有含有\"_year\"的文件名\n",
"sp_old_chars = ['ETCUSDT']\n",
"sp_new_chars = ['_2022', '_2023']"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "3356d8ff",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"csv_files: ['ETCUSDT\\\\ETCUSDT-5m-2023-05-01.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-02.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-03.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-04.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-05.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-06.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-07.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-08.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-09.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-10.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-11.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-12.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-13.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-14.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-15.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-16.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-17.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-18.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-19.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-20.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-21.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-22.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-23.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-24.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-25.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-26.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-27.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-28.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-29.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-30.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-05-31.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-01.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-02.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-03.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-04.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-05.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-06.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-07.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-08.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-09.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-10.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-11.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-12.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-13.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-14.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-15.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-16.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-17.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-18.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-19.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-20.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-21.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-22.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-23.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-24.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-25.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-26.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-27.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-28.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-29.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-06-30.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-01.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-02.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-03.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-04.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-05.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-06.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-07.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-08.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-09.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-10.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-11.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-12.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-13.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-14.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-15.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-16.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-17.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-18.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-19.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-20.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-21.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-22.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-23.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-24.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-25.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-26.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-27.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-28.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-29.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-30.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-07-31.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-01.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-02.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-03.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-04.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-05.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-06.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-07.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-08.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-09.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-10.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-11.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-12.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-13.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-14.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-15.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-16.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-17.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-18.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-19.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-20.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-21.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-22.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-23.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-24.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-25.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-26.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-27.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-28.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-29.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-30.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-08-31.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-01.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-02.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-03.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-04.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-05.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-06.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-07.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-08.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-09.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-10.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-11.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-12.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-13.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-14.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-15.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-16.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-17.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-18.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-19.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-20.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-21.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-22.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-23.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-24.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-25.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-26.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-27.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-28.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-29.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-09-30.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-01.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-02.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-03.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-04.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-05.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-06.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-07.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-08.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-09.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-10.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-11.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-12.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-13.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-14.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-15.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-16.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-17.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-18.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-19.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-20.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-21.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-22.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-23.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-24.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-25.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-26.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-27.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-28.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-29.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-30.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-10-31.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-01.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-02.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-03.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-04.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-05.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-06.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-07.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-08.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-09.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-10.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-11.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-12.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-13.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-14.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-15.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-16.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-17.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-18.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-19.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-20.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-21.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-22.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-23.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-24.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-25.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-26.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-27.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-28.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-29.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-11-30.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-01.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-02.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-03.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-04.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-05.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-06.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-07.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-08.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-09.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-10.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-11.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-12.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-13.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-14.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-15.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-16.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-17.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-18.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-19.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-20.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-21.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-22.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-23.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-24.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-25.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-26.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-27.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-28.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-29.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-30.csv', 'ETCUSDT\\\\ETCUSDT-5m-2023-12-31.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-01.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-02.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-03.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-04.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-05.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-06.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-07.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-08.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-09.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-10.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-11.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-12.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-13.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-14.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-15.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-16.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-17.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-18.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-19.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-20.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-21.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-22.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-23.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-24.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-25.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-26.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-27.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-28.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-29.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-30.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-01-31.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-01.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-02.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-03.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-04.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-05.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-06.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-07.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-08.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-09.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-10.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-11.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-12.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-13.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-14.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-15.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-16.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-17.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-18.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-19.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-20.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-21.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-22.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-23.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-24.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-25.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-26.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-27.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-28.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-02-29.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-01.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-02.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-03.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-04.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-05.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-06.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-07.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-08.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-09.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-10.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-11.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-12.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-13.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-14.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-15.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-16.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-17.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-18.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-19.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-20.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-21.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-22.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-23.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-24.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-25.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-26.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-27.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-28.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-29.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-30.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-03-31.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-01.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-02.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-03.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-04.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-05.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-06.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-07.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-08.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-09.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-10.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-11.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-12.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-13.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-14.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-15.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-16.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-17.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-18.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-19.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-20.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-21.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-22.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-23.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-24.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-25.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-26.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-27.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-28.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-29.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-04-30.csv', 'ETCUSDT\\\\ETCUSDT-5m-2024-05-01.csv']\n"
]
},
{
"ename": "KeyError",
"evalue": "'合约代码'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32mc:\\veighna_elite_simulation\\lib\\site-packages\\pandas\\core\\indexes\\base.py:3800\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 3799\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 3800\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3801\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
"File \u001b[1;32mc:\\veighna_elite_simulation\\lib\\site-packages\\pandas\\_libs\\index.pyx:138\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
"File \u001b[1;32mc:\\veighna_elite_simulation\\lib\\site-packages\\pandas\\_libs\\index.pyx:165\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
"File \u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi:5745\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
"File \u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi:5753\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
"\u001b[1;31mKeyError\u001b[0m: '合约代码'",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[5], line 20\u001b[0m\n\u001b[0;32m 17\u001b[0m csv_old_files \u001b[38;5;241m=\u001b[39m [sp_file \u001b[38;5;28;01mfor\u001b[39;00m sp_file \u001b[38;5;129;01min\u001b[39;00m all_csv_files \u001b[38;5;28;01mif\u001b[39;00m sp_old_char \u001b[38;5;129;01min\u001b[39;00m sp_file]\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(csv_old_files) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# 生成按年份未处理的CSV文件\u001b[39;00m\n\u001b[1;32m---> 20\u001b[0m old_up_df \u001b[38;5;241m=\u001b[39m \u001b[43mmerged_old_unprocessed_tickdata\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcsv_old_files\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msp_old_char\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 21\u001b[0m folder_up_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m%\u001b[39m(out_up_path))\n\u001b[0;32m 22\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(folder_up_path):\n",
"File \u001b[1;32md:\\Gitee_Code\\trading_strategy\\SS_Code\\SF08\\使用文档\\数据转换最终版\\merged_tickdata__BIT_20240522.py:224\u001b[0m, in \u001b[0;36mmerged_old_unprocessed_tickdata\u001b[1;34m(all_csv_files, sp_char)\u001b[0m\n\u001b[0;32m 221\u001b[0m \u001b[38;5;66;03m# 重置行索引\u001b[39;00m\n\u001b[0;32m 222\u001b[0m merged_up_df\u001b[38;5;241m.\u001b[39mreset_index(inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, drop\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m--> 224\u001b[0m merged_up_df,alpha_chars,code_value \u001b[38;5;241m=\u001b[39m \u001b[43minsert_main_contract\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmerged_up_df\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 225\u001b[0m \u001b[38;5;66;03m# 打印提示信息\u001b[39;00m\n\u001b[0;32m 226\u001b[0m \u001b[38;5;66;03m# print(\"按年份未处理的CSV文件合并成功\")\u001b[39;00m\n\u001b[0;32m 228\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m merged_up_df,alpha_chars,code_value\n",
"File \u001b[1;32md:\\Gitee_Code\\trading_strategy\\SS_Code\\SF08\\使用文档\\数据转换最终版\\merged_tickdata__BIT_20240522.py:163\u001b[0m, in \u001b[0;36minsert_main_contract\u001b[1;34m(df)\u001b[0m\n\u001b[0;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minsert_main_contract\u001b[39m(df):\n\u001b[0;32m 162\u001b[0m \u001b[38;5;66;03m# 添加主力连续的合约代码主力连续为888指数连续可以用999次主力连续可以使用889表头用“统一代码”\u001b[39;00m\n\u001b[1;32m--> 163\u001b[0m alpha_chars, numeric_chars \u001b[38;5;241m=\u001b[39m split_alpha_numeric(\u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloc\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m合约代码\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m)\n\u001b[0;32m 164\u001b[0m code_value \u001b[38;5;241m=\u001b[39m alpha_chars \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m888\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 165\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcode_value characters:\u001b[39m\u001b[38;5;124m\"\u001b[39m, code_value)\n",
"File \u001b[1;32mc:\\veighna_elite_simulation\\lib\\site-packages\\pandas\\core\\indexing.py:1067\u001b[0m, in \u001b[0;36m_LocationIndexer.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 1065\u001b[0m key \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mtuple\u001b[39m(com\u001b[38;5;241m.\u001b[39mapply_if_callable(x, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m key)\n\u001b[0;32m 1066\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_is_scalar_access(key):\n\u001b[1;32m-> 1067\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_value\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtakeable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_takeable\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1068\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_tuple(key)\n\u001b[0;32m 1069\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 1070\u001b[0m \u001b[38;5;66;03m# we by definition only have the 0th axis\u001b[39;00m\n",
"File \u001b[1;32mc:\\veighna_elite_simulation\\lib\\site-packages\\pandas\\core\\frame.py:3915\u001b[0m, in \u001b[0;36mDataFrame._get_value\u001b[1;34m(self, index, col, takeable)\u001b[0m\n\u001b[0;32m 3912\u001b[0m series \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_ixs(col, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m 3913\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m series\u001b[38;5;241m.\u001b[39m_values[index]\n\u001b[1;32m-> 3915\u001b[0m series \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_item_cache\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcol\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3916\u001b[0m engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindex\u001b[38;5;241m.\u001b[39m_engine\n\u001b[0;32m 3918\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindex, MultiIndex):\n\u001b[0;32m 3919\u001b[0m \u001b[38;5;66;03m# CategoricalIndex: Trying to use the engine fastpath may give incorrect\u001b[39;00m\n\u001b[0;32m 3920\u001b[0m \u001b[38;5;66;03m# results if our categories are integers that dont match our codes\u001b[39;00m\n\u001b[0;32m 3921\u001b[0m \u001b[38;5;66;03m# IntervalIndex: IntervalTree has no get_loc\u001b[39;00m\n",
"File \u001b[1;32mc:\\veighna_elite_simulation\\lib\\site-packages\\pandas\\core\\frame.py:4272\u001b[0m, in \u001b[0;36mDataFrame._get_item_cache\u001b[1;34m(self, item)\u001b[0m\n\u001b[0;32m 4267\u001b[0m res \u001b[38;5;241m=\u001b[39m cache\u001b[38;5;241m.\u001b[39mget(item)\n\u001b[0;32m 4268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m res \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 4269\u001b[0m \u001b[38;5;66;03m# All places that call _get_item_cache have unique columns,\u001b[39;00m\n\u001b[0;32m 4270\u001b[0m \u001b[38;5;66;03m# pending resolution of GH#33047\u001b[39;00m\n\u001b[1;32m-> 4272\u001b[0m loc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mitem\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 4273\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_ixs(loc, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m 4275\u001b[0m cache[item] \u001b[38;5;241m=\u001b[39m res\n",
"File \u001b[1;32mc:\\veighna_elite_simulation\\lib\\site-packages\\pandas\\core\\indexes\\base.py:3802\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 3800\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine\u001b[38;5;241m.\u001b[39mget_loc(casted_key)\n\u001b[0;32m 3801\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m-> 3802\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[0;32m 3803\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[0;32m 3804\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[0;32m 3805\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[0;32m 3806\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[0;32m 3807\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
"\u001b[1;31mKeyError\u001b[0m: '合约代码'"
]
}
],
"source": [
"'''\n",
"Author: zhoujie2104231 zhoujie@me.com\n",
"Date: 2024-05-24 00:01:21\n",
"LastEditors: zhoujie2104231 zhoujie@me.com\n",
"LastEditTime: 2024-05-24 00:14:06\n",
"Description: \n",
"\n",
"'''\n",
"os.chdir(csv_directory) \n",
"for root, dirs, files in os.walk('.'):\n",
" if len(dirs) > 0:\n",
" for dir in dirs:\n",
" # 获取二级子文件夹中的所有 CSV 文件\n",
" all_csv_files = [os.path.join(dir, file) for file in os.listdir(dir) if file.endswith('.csv')] \n",
" \n",
" for sp_old_char in sp_old_chars:\n",
" csv_old_files = [sp_file for sp_file in all_csv_files if sp_old_char in sp_file]\n",
" if len(csv_old_files) > 0:\n",
" # 生成按年份未处理的CSV文件\n",
" old_up_df = merged_old_unprocessed_tickdata(csv_old_files, sp_old_char)\n",
" folder_up_path = str('%s/%s'%(out_up_path))\n",
" if not os.path.exists(folder_up_path):\n",
" os.makedirs(folder_up_path) \n",
" old_up_df.to_csv('%s/%s_up%s.csv'%(folder_up_path,sp_old_char), index=False)\n",
" print(\"按年份未处理的%s_up%s.CSV文件合并成功!\"%(sp_old_char))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ff42c1f",
"metadata": {},
"outputs": [],
"source": [
"# import chardet\n",
"# for root, dirs, files in os.walk('.'):\n",
"# if len(dirs) > 0:\n",
"# for dir in dirs:\n",
"# all_csv_files = [os.path.join(dir, file) for file in os.listdir(dir) if file.endswith('.csv')]\n",
"# fileNum_corrects = 0\n",
"# fileNum_errors = 0\n",
"\n",
"# for csv_file in all_csv_files:\n",
"# with open(csv_file, 'rb') as f:\n",
"# data = f.read() \n",
"# detected_encoding = chardet.detect(data)['encoding']\n",
"\n",
"# if (detected_encoding and detected_encoding != 'gbk') and (detected_encoding and detected_encoding != 'GB2312'):\n",
"# fileNum_errors += 1\n",
"# with open('output_error.txt', 'a') as f:\n",
"# print(\"%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s\"%(csv_file,detected_encoding,fileNum_errors), file = f)\n",
"# print(\"%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式,错误总数为%s\"%(csv_file,detected_encoding,fileNum_errors))\n",
"# # print(\"%s当前文件不为gbk格式,其文件格式为%s,需要转换为gbk格式\"%(csv_file,detected_encoding))\n",
"# else:\n",
"# fileNum_corrects += 1\n",
"# with open('output.txt', 'a') as f:\n",
"# print(\"%s当前文件为gbk或者GB2312格式,无需要转换,正确总数为%s\"%(csv_file, fileNum_corrects), file = f)\n",
"# if fileNum_errors >0:\n",
"# print(\"存在错误文件,请核查!!!\")\n",
" \n",
"# print(\"查询完毕!!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1f6e93e2",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"merged_rs_df = pd.read_csv('D:\\data_transfer\\data_up_merged\\大商所\\j888\\j888_up_2020.csv', encoding='utf', low_memory=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "33b31d28",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"merged_rs_df.replace([np.inf, -np.inf], np.nan)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b2df07dd",
"metadata": {},
"outputs": [],
"source": [
"merged_rs_df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "febd8fc4",
"metadata": {},
"outputs": [],
"source": [
"# # 检测NaN值\n",
"# nan_mask = df['A'].isna()\n",
"# print(df[nan_mask])\n",
" \n",
"# # 检测无穷值\n",
"# inf_mask = df['A'].isinf()\n",
"# print(df[inf_mask])\n",
" \n",
"# # 如果你想要在整个DataFrame中查找所有的NaN和无穷值可以使用\n",
"# nan_and_inf = df.isna() | df.isinf()\n",
"# print(df[nan_and_inf])\n",
"\n",
"# 检测NaN值\n",
"nan_mask = merged_rs_df.isna() # merged_rs_df['成交量']\n",
"print(merged_rs_df[nan_mask])\n",
"\n",
"nan_index = merged_rs_df[nan_mask].index\n",
"print(nan_index)\n",
"# nan_index_in_column_A = merged_rs_df['成交量'].isna().index\n",
"# print(\"NaN indices in column '成交量':\", nan_index_in_column_A)\n",
" \n",
"# 检测无穷值\n",
"# inf_mask = pd.isinf(merged_rs_df['成交量'])\n",
"# print(merged_rs_df[inf_mask])\n",
" \n",
"# 如果你想要在整个DataFrame中查找所有的NaN和无穷值可以使用\n",
"# nan_and_inf = df.isna() | df.isinf()\n",
"# print(df[nan_and_inf])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "98b01523",
"metadata": {},
"outputs": [],
"source": [
"merged_rs_df.iloc[nan_index]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8efb7d08",
"metadata": {},
"outputs": [],
"source": [
"# merged_rs_df = merged_rs_df.drop(4017556)\n",
"merged_rs_df = merged_rs_df.drop(merged_rs_df.index[nan_index])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "71702d76",
"metadata": {},
"outputs": [],
"source": [
"merged_rs_df.iloc[nan_index]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "60c8bc6c",
"metadata": {},
"outputs": [],
"source": [
"# merged_rs_df['成交量'].replace(np.nan,0,inplace=True)\n",
"# merged_rs_df['成交量'].replace(np.inf,0,inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "808dd229",
"metadata": {},
"outputs": [],
"source": [
"merged_rs_df['volume'] = merged_rs_df['成交量'].astype(int)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0b07ec27",
"metadata": {},
"outputs": [],
"source": [
"merged_rs_df['volume'] = merged_rs_df['volume'].astype(int)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2b4cd024",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('mode.use_inf_as_na', True)\n",
"nan_index_in_column_A = merged_rs_df['volume'].isna().index\n",
"print(\"NaN indices in column 'A':\", nan_index_in_column_A)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "55655ddb",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"pd.set_option('mode.use_inf_as_na', True)\n",
"inf_index_in_column_A = merged_rs_df['volume'].isinf().index\n",
"print(\"Inf indices in column 'A':\", inf_index_in_column_A)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4761b95d",
"metadata": {},
"outputs": [],
"source": [
"not_int_values = merged_rs_df['成交量'].apply(lambda x: not isinstance(pd.to_numeric(x, errors='coerce'), float))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8b97f31",
"metadata": {},
"outputs": [],
"source": [
"print(merged_rs_df.loc[not_int_values, '成交量'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0f639067",
"metadata": {},
"outputs": [],
"source": [
"merged_rs_df.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "59b47b33",
"metadata": {},
"outputs": [],
"source": [
"merged_rs_df['main_contract'] = merged_rs_df['main_contract'].astype(str)\n",
"merged_rs_df['symbol'] = merged_rs_df['symbol'].astype(str)\n",
"merged_rs_df['datetime'] = pd.to_datetime(merged_rs_df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M:%S.%f')\n",
"merged_rs_df['lastprice'] = merged_rs_df['lastprice'].astype(float)\n",
"merged_rs_df['volume'] = merged_rs_df['volume'].astype(int)\n",
"merged_rs_df['bid_p'] = merged_rs_df['bid_p'].astype(float)\n",
"merged_rs_df['ask_p'] = merged_rs_df['ask_p'].astype(float)\n",
"merged_rs_df['bid_v'] = merged_rs_df['bid_v'].astype(int)\n",
"merged_rs_df['ask_v'] = merged_rs_df['ask_v'].astype(int)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8edc4f4e",
"metadata": {},
"outputs": [],
"source": [
"# 要查找的值\n",
"value_to_find = '0l4276.0'\n",
" \n",
"# 查找值的索引\n",
"index_of_value = merged_rs_df.index[merged_rs_df['volume'] == value_to_find].tolist()\n",
"print(index_of_value)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a2d43b8",
"metadata": {},
"outputs": [],
"source": [
"# 要查找的值\n",
"value_to_find = '3564.0<70'\n",
" \n",
"# 查找值的索引\n",
"index_of_value = merged_rs_df.index[merged_rs_df['lastprice'] == value_to_find].tolist()\n",
"print(index_of_value)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "35491aea",
"metadata": {},
"outputs": [],
"source": [
"merged_rs_df.loc[9748911-5:9748911+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e9bc02b5",
"metadata": {},
"outputs": [],
"source": [
"merged_rs_df.loc[9748911,'volume'] = 0 \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ac80c04d",
"metadata": {},
"outputs": [],
"source": [
"merged_rs_df.loc[2079318-5:2079318+5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "60a21f7a",
"metadata": {},
"outputs": [],
"source": [
"merged_rs_df.to_csv('D:/ag888_2019.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5fd5e0a8",
"metadata": {},
"outputs": [],
"source": [
"# 等差复权\n",
"import numpy as np\n",
"merged_rs_df['复权因子'] = np.where(merged_rs_df['symbol'] != merged_rs_df['symbol'].shift(), merged_rs_df['ask_p'].shift() - merged_rs_df['bid_p'], 0)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}