710 lines
33 KiB
Plaintext
710 lines
33 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# GRU-SAC Trading Pipeline: Step-by-Step Walkthrough\n",
|
|
"\n",
|
|
"This notebook demonstrates how to instantiate and run the refactored `TradingPipeline` class **sequentially**, executing each major step individually.\n",
|
|
"\n",
|
|
"**Goal:** Run the complete pipeline (data loading, feature engineering, GRU training/loading, calibration, SAC loading, backtesting) using a configuration file, inspecting the inputs and outputs at each stage."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 1. Imports and Setup\n",
|
|
"\n",
|
|
"Import necessary libraries and configure path variables to locate the project code."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Initial sys.path: ['/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/home/yasha/develop/gru_sac_predictor/.venv/lib/python3.10/site-packages']\n",
|
|
"Notebook directory (notebook_dir): /home/yasha/develop/gru_sac_predictor/gru_sac_predictor/notebooks\n",
|
|
"Calculated path for imports (package_root_for_imports): /home/yasha/develop/gru_sac_predictor/gru_sac_predictor\n",
|
|
"Checking if /home/yasha/develop/gru_sac_predictor/gru_sac_predictor is in sys.path...\n",
|
|
"Path not found. Adding /home/yasha/develop/gru_sac_predictor/gru_sac_predictor to sys.path.\n",
|
|
"sys.path after insert: ['/home/yasha/develop/gru_sac_predictor/gru_sac_predictor', '/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/home/yasha/develop/gru_sac_predictor/.venv/lib/python3.10/site-packages']\n",
|
|
"Project root for config/data (project_root): /home/yasha/develop/gru_sac_predictor\n",
|
|
"Attempting to import TradingPipeline...\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"2025-04-18 03:17:10.421895: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
|
|
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
|
"E0000 00:00:1744946230.439676 157301 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
|
|
"E0000 00:00:1744946230.445571 157301 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
|
|
"/home/yasha/develop/gru_sac_predictor/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
|
" from .autonotebook import tqdm as notebook_tqdm\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Successfully imported TradingPipeline.\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import os\n",
|
|
"import sys\n",
|
|
"import yaml\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import matplotlib.image as mpimg\n",
|
|
"import logging\n",
|
|
"\n",
|
|
"print(f'Initial sys.path: {sys.path}')\n",
|
|
"\n",
|
|
"# --- Path Setup ---\n",
|
|
"# Initialize project_root to None\n",
|
|
"project_root = None\n",
|
|
"package_root_for_imports = None # Initialize separately for clarity\n",
|
|
"try:\n",
|
|
" notebook_dir = os.path.abspath('') # Get current directory (should be notebooks/)\n",
|
|
" print(f'Notebook directory (notebook_dir): {notebook_dir}')\n",
|
|
"\n",
|
|
" # Go up ONE level to get the package root directory\n",
|
|
" # Since notebook is in .../gru_sac_predictor/notebooks/, parent is .../gru_sac_predictor/\n",
|
|
" package_root_for_imports = os.path.dirname(notebook_dir)\n",
|
|
" print(f'Calculated path for imports (package_root_for_imports): {package_root_for_imports}')\n",
|
|
"\n",
|
|
" # Add the calculated path to sys.path to allow imports\n",
|
|
" print(f'Checking if {package_root_for_imports} is in sys.path...')\n",
|
|
" if package_root_for_imports not in sys.path:\n",
|
|
" print(f'Path not found. Adding {package_root_for_imports} to sys.path.')\n",
|
|
" sys.path.insert(0, package_root_for_imports)\n",
|
|
" print(f'sys.path after insert: {sys.path}')\n",
|
|
" else:\n",
|
|
" print(f'Path {package_root_for_imports} already in sys.path.')\n",
|
|
"\n",
|
|
" # Define project_root consistently, used later for finding config.yaml\n",
|
|
" # It should be the *outer* directory containing the package and config\n",
|
|
" project_root = os.path.dirname(package_root_for_imports) # Go up one more level\n",
|
|
" print(f'Project root for config/data (project_root): {project_root}')\n",
|
|
"\n",
|
|
"except Exception as e:\n",
|
|
" print(f'Error during path setup: {e}')\n",
|
|
"\n",
|
|
"# --- Import the main pipeline class ---\n",
|
|
"print(\"Attempting to import TradingPipeline...\")\n",
|
|
"try:\n",
|
|
" # Import relative to the package root added to sys.path\n",
|
|
" from src.trading_pipeline import TradingPipeline\n",
|
|
" print('Successfully imported TradingPipeline.')\n",
|
|
"except ImportError as e:\n",
|
|
" print(f'ERROR: Failed to import TradingPipeline: {e}')\n",
|
|
" print(f'Final sys.path before error: {sys.path}')\n",
|
|
" print(\"Please verify the project structure and the paths added to sys.path.\")\n",
|
|
"except Exception as e: # Catch other potential errors\n",
|
|
" print(f'An unexpected error occurred during import: {e}')\n",
|
|
" print(f'Final sys.path before error: {sys.path}')\n",
|
|
"\n",
|
|
"# Configure basic logging for the notebook\n",
|
|
"logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
|
|
"\n",
|
|
"# Set pandas display options for better inspection\n",
|
|
"pd.set_option('display.max_columns', None) # Show all columns\n",
|
|
"pd.set_option('display.max_rows', 100) # Show more rows if needed\n",
|
|
"pd.set_option('display.width', 1000) # Wider display"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 2. Configuration\n",
|
|
"\n",
|
|
"Specify the path to the configuration file (`config.yaml`). This file defines all parameters for the data, models, training, and backtesting."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Using config file: /home/yasha/develop/gru_sac_predictor/gru_sac_predictor/config.yaml\n",
|
|
"Config file found.\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Path to the configuration file\n",
|
|
"# Assumes config.yaml is in the directory *above* the package root\n",
|
|
"config_rel_path = 'gru_sac_predictor/config.yaml' # Relative to project_root defined above\n",
|
|
"config_abs_path = None\n",
|
|
"\n",
|
|
"# Construct absolute path relative to the project root identified earlier\n",
|
|
"if 'project_root' in locals() and project_root: # Check if project_root was successfully determined\n",
|
|
" config_abs_path = os.path.join(project_root, config_rel_path)\n",
|
|
"else:\n",
|
|
" print('ERROR: project_root not defined. Cannot find config file.')\n",
|
|
"\n",
|
|
"if config_abs_path:\n",
|
|
" print(f'Using config file: {config_abs_path}')\n",
|
|
" # Verify the config file exists\n",
|
|
" if not os.path.exists(config_abs_path):\n",
|
|
" print(f'ERROR: Config file not found at {config_abs_path}')\n",
|
|
" else:\n",
|
|
" print('Config file found.')\n",
|
|
" # Optionally load and display config for verification\n",
|
|
" try:\n",
|
|
" with open(config_abs_path, 'r') as f:\n",
|
|
" config_data = yaml.safe_load(f)\n",
|
|
" # print('\\nConfiguration:')\n",
|
|
" # print(yaml.dump(config_data, default_flow_style=False)) # Pretty print\n",
|
|
" except Exception as e:\n",
|
|
" print(f'Error reading config file: {e}')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 3. Instantiate the Pipeline\n",
|
|
"\n",
|
|
"Create an instance of the `TradingPipeline` class, passing the path to the configuration file. This initializes the pipeline object but does not run any steps yet."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Instantiating TradingPipeline...\n",
|
|
"2025-04-18 03:17:13,554 - root - INFO - Using Base Models Directory: /home/yasha/develop/gru_sac_predictor/models\n",
|
|
"2025-04-18 03:17:13,555 - root - INFO - Using results directory: /home/yasha/develop/gru_sac_predictor/results/20250418_031713\n",
|
|
"2025-04-18 03:17:13,555 - root - INFO - Using logs directory: /home/yasha/develop/gru_sac_predictor/logs/20250418_031713\n",
|
|
"2025-04-18 03:17:13,556 - root - INFO - Using models directory: /home/yasha/develop/gru_sac_predictor/models/20250418_031713\n",
|
|
"2025-04-18 03:17:13,557 - root - INFO - Logging setup complete. Log file: /home/yasha/develop/gru_sac_predictor/logs/20250418_031713/pipeline_20250418_031713.log\n",
|
|
"2025-04-18 03:17:13,558 - root - INFO - --- Starting Pipeline Run: 20250418_031713 ---\n",
|
|
"2025-04-18 03:17:13,559 - root - INFO - Using config: /home/yasha/develop/gru_sac_predictor/gru_sac_predictor/config.yaml\n",
|
|
"2025-04-18 03:17:13,560 - root - INFO - Resolved relative db_dir '../../data/crypto_market_data' to absolute path: /home/yasha/data/crypto_market_data\n",
|
|
"2025-04-18 03:17:13,561 - gru_sac_predictor.src.data_loader - INFO - Initialized DataLoader with db_dir='/home/yasha/data/crypto_market_data'\n",
|
|
"2025-04-18 03:17:13,562 - gru_sac_predictor.src.data_loader - WARNING - Database directory does not exist: /home/yasha/data/crypto_market_data\n",
|
|
"2025-04-18 03:17:13,563 - gru_sac_predictor.src.feature_engineer - INFO - FeatureEngineer initialized with minimal whitelist: ['return_1m', 'return_15m', 'return_60m', 'ATR_14', 'volatility_14d', 'chaikin_AD_10', 'svi_10', 'EMA_10', 'EMA_50', 'MACD', 'MACD_signal', 'hour_sin', 'hour_cos']\n",
|
|
"2025-04-18 03:17:13,564 - gru_sac_predictor.src.gru_model_handler - INFO - GRUModelHandler initialized for run 20250418_031713 in /home/yasha/develop/gru_sac_predictor/models/20250418_031713\n",
|
|
"2025-04-18 03:17:13,564 - gru_sac_predictor.src.calibrator - INFO - Calibrator initialized with edge threshold: 0.55\n",
|
|
"2025-04-18 03:17:13,565 - gru_sac_predictor.src.backtester - INFO - Backtester initialized.\n",
|
|
"2025-04-18 03:17:13,566 - gru_sac_predictor.src.backtester - INFO - Initial Capital: 10000.00\n",
|
|
"2025-04-18 03:17:13,566 - gru_sac_predictor.src.backtester - INFO - Transaction Cost: 0.0500%\n",
|
|
"2025-04-18 03:17:13,567 - gru_sac_predictor.src.backtester - INFO - Edge Threshold: 0.550\n",
|
|
"2025-04-18 03:17:13,575 - root - INFO - Saved run configuration to /home/yasha/develop/gru_sac_predictor/results/20250418_031713/run_config.yaml\n",
|
|
"TradingPipeline instantiated successfully.\n",
|
|
"Run ID: 20250418_031713\n",
|
|
"Results Dir: /home/yasha/develop/gru_sac_predictor/results/20250418_031713\n",
|
|
"Log Dir: /home/yasha/develop/gru_sac_predictor/logs/20250418_031713\n",
|
|
"Models Dir: /home/yasha/develop/gru_sac_predictor/models/20250418_031713\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"pipeline_instance = None # Define outside try block\n",
|
|
"if 'TradingPipeline' in locals() and config_abs_path and os.path.exists(config_abs_path):\n",
|
|
" try:\n",
|
|
" # Instantiate the pipeline\n",
|
|
" print('Instantiating TradingPipeline...')\n",
|
|
" pipeline_instance = TradingPipeline(config_path=config_abs_path)\n",
|
|
" print('TradingPipeline instantiated successfully.')\n",
|
|
" print(f'Run ID: {pipeline_instance.run_id}')\n",
|
|
" print(f'Results Dir: {pipeline_instance.dirs[\"results\"]}')\n",
|
|
" print(f'Log Dir: {pipeline_instance.dirs[\"logs\"]}')\n",
|
|
" print(f'Models Dir: {pipeline_instance.dirs[\"models\"]}')\n",
|
|
"\n",
|
|
" except FileNotFoundError as e:\n",
|
|
" print(f'ERROR during pipeline instantiation (FileNotFound): {e}')\n",
|
|
" except Exception as e:\n",
|
|
" print(f'An error occurred during pipeline instantiation: {e}')\n",
|
|
" logging.error('Pipeline instantiation failed.', exc_info=True) # Log traceback\n",
|
|
"else:\n",
|
|
" print('TradingPipeline class not imported, config path invalid, or config file not found. Cannot instantiate pipeline.')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 4. Step 1: Load Data\n",
|
|
"\n",
|
|
"Call the `load_data` method to fetch the raw market data based on the configuration."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"=== Running Step 1: Load Data ===\n",
|
|
"2025-04-18 03:17:15,747 - root - INFO - --- Notebook Step: Load Data (Calling load_and_preprocess_data) ---\n",
|
|
"2025-04-18 03:17:15,749 - root - INFO - --- Stage: Loading and Preprocessing Data ---\n",
|
|
"2025-04-18 03:17:15,751 - gru_sac_predictor.src.data_loader - INFO - Loading data for SOL-USDT (bnbspot) from 2024-06-01 to 2025-03-10, interval 1min\n",
|
|
"2025-04-18 03:17:15,767 - gru_sac_predictor.src.data_loader - INFO - Scanning for DB files recursively in: /home/yasha/data/crypto_market_data\n",
|
|
"2025-04-18 03:17:15,769 - gru_sac_predictor.src.data_loader - ERROR - Database directory /home/yasha/data/crypto_market_data does not exist\n",
|
|
"2025-04-18 03:17:15,773 - gru_sac_predictor.src.data_loader - ERROR - No relevant DB files found and no fallback files available.\n",
|
|
"2025-04-18 03:17:15,774 - gru_sac_predictor.src.data_loader - ERROR - No relevant database files found for the specified date range.\n",
|
|
"2025-04-18 03:17:15,779 - root - ERROR - Failed to load data. Exiting.\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"No traceback available to show.\n"
|
|
]
|
|
},
|
|
{
|
|
"ename": "SystemExit",
|
|
"evalue": "1",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"An exception has occurred, use %tb to see the full traceback.\n",
|
|
"\u001b[0;31mSystemExit\u001b[0m\u001b[0;31m:\u001b[0m 1\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/yasha/develop/gru_sac_predictor/.venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py:3587: UserWarning: To exit: use 'exit', 'quit', or Ctrl-D.\n",
|
|
" warn(\"To exit: use 'exit', 'quit', or Ctrl-D.\", stacklevel=1)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"%tb\n",
|
|
"if pipeline_instance:\n",
|
|
" try:\n",
|
|
" print('\\n=== Running Step 1: Load Data ===')\n",
|
|
" pipeline_instance.load_data()\n",
|
|
" print('load_data() finished.')\n",
|
|
"\n",
|
|
" print('\\n--- Inspecting Raw Data ---')\n",
|
|
" if pipeline_instance.raw_data is not None:\n",
|
|
" print(f'Shape of raw_data: {pipeline_instance.raw_data.shape}')\n",
|
|
" display(pipeline_instance.raw_data.head())\n",
|
|
" display(pipeline_instance.raw_data.tail())\n",
|
|
" display(pipeline_instance.raw_data.isnull().sum()) # Check for NaNs\n",
|
|
" else:\n",
|
|
" print('raw_data attribute is None.')\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" print(f'An error occurred during Load Data step: {e}')\n",
|
|
" logging.error('Load Data step failed.', exc_info=True)\n",
|
|
"else:\n",
|
|
" print('Pipeline not instantiated. Cannot run step.')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 5. Step 2: Engineer Features\n",
|
|
"\n",
|
|
"Call the `engineer_features` method to create technical indicators and other features from the raw data."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"if pipeline_instance and pipeline_instance.raw_data is not None:\n",
|
|
" try:\n",
|
|
" print('\\n=== Running Step 2: Engineer Features ===')\n",
|
|
" pipeline_instance.engineer_features()\n",
|
|
" print('engineer_features() finished.')\n",
|
|
"\n",
|
|
" print('\\n--- Inspecting Features DataFrame ---')\n",
|
|
" if pipeline_instance.features_df is not None:\n",
|
|
" print(f'Shape of features_df: {pipeline_instance.features_df.shape}')\n",
|
|
" display(pipeline_instance.features_df.head())\n",
|
|
" display(pipeline_instance.features_df.tail())\n",
|
|
" display(pipeline_instance.features_df.isnull().sum()) # Check for NaNs introduced by features\n",
|
|
" else:\n",
|
|
" print('features_df attribute is None.')\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" print(f'An error occurred during Engineer Features step: {e}')\n",
|
|
" logging.error('Engineer Features step failed.', exc_info=True)\n",
|
|
"else:\n",
|
|
" print('Pipeline not instantiated or raw_data missing. Cannot run step.')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 6. Step 3: Prepare Sequences\n",
|
|
"\n",
|
|
"Call the `prepare_sequences` method to split the data into train/validation/test sets and create sequences suitable for the GRU model."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"if pipeline_instance and pipeline_instance.features_df is not None:\n",
|
|
" try:\n",
|
|
" print('\\n=== Running Step 3: Prepare Sequences ===')\n",
|
|
" pipeline_instance.prepare_sequences()\n",
|
|
" print('prepare_sequences() finished.')\n",
|
|
"\n",
|
|
" print('\\n--- Inspecting Sequences and Targets ---')\n",
|
|
" # Assuming attributes like train_sequences, val_targets etc. exist\n",
|
|
" for name in ['train_sequences', 'val_sequences', 'test_sequences',\n",
|
|
" 'train_targets', 'val_targets', 'test_targets',\n",
|
|
" 'train_indices', 'val_indices', 'test_indices']:\n",
|
|
" attr = getattr(pipeline_instance, name, None)\n",
|
|
" if attr is not None:\n",
|
|
" # Check if it's numpy array or pandas series/df before getting shape\n",
|
|
" if hasattr(attr, 'shape'):\n",
|
|
" print(f'{name} shape: {attr.shape}')\n",
|
|
" else:\n",
|
|
" print(f'{name} type: {type(attr)}, length: {len(attr)}') # For lists like indices\n",
|
|
" else:\n",
|
|
" print(f'{name} attribute is None.')\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" print(f'An error occurred during Prepare Sequences step: {e}')\n",
|
|
" logging.error('Prepare Sequences step failed.', exc_info=True)\n",
|
|
"else:\n",
|
|
" print('Pipeline not instantiated or features_df missing. Cannot run step.')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 7. Step 4: Train or Load GRU Model\n",
|
|
"\n",
|
|
"Call `train_or_load_gru` to either train a new GRU model or load a pre-trained one, based on the configuration flags."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"if pipeline_instance and pipeline_instance.train_sequences is not None: # Check if sequences are ready\n",
|
|
" try:\n",
|
|
" print('\\n=== Running Step 4: Train or Load GRU ===')\n",
|
|
" pipeline_instance.train_or_load_gru()\n",
|
|
" print('train_or_load_gru() finished.')\n",
|
|
"\n",
|
|
" print('\\n--- Inspecting GRU Handler ---')\n",
|
|
" if pipeline_instance.gru_handler is not None:\n",
|
|
" print(f'GRU Handler instantiated: {pipeline_instance.gru_handler}')\n",
|
|
" # Potentially inspect model summary if handler exposes it\n",
|
|
" # print(pipeline_instance.gru_handler.model.summary())\n",
|
|
" print(f'GRU Predictions available (val): {hasattr(pipeline_instance.gru_handler, \"val_predictions\")}')\n",
|
|
" print(f'GRU Predictions available (test): {hasattr(pipeline_instance.gru_handler, \"test_predictions\")}')\n",
|
|
" else:\n",
|
|
" print('gru_handler attribute is None.')\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" print(f'An error occurred during Train/Load GRU step: {e}')\n",
|
|
" logging.error('Train/Load GRU step failed.', exc_info=True)\n",
|
|
"else:\n",
|
|
" print('Pipeline not instantiated or sequences missing. Cannot run step.')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 8. Step 5: Calibrate Predictions\n",
|
|
"\n",
|
|
"Call `calibrate_predictions` to use the validation set predictions from the GRU to find an optimal probability threshold or apply other calibration techniques."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"if pipeline_instance and pipeline_instance.gru_handler is not None and hasattr(pipeline_instance.gru_handler, 'val_predictions'):\n",
|
|
" try:\n",
|
|
" print('\\n=== Running Step 5: Calibrate Predictions ===')\n",
|
|
" pipeline_instance.calibrate_predictions()\n",
|
|
" print('calibrate_predictions() finished.')\n",
|
|
"\n",
|
|
" print('\\n--- Inspecting Calibration Results ---')\n",
|
|
" if pipeline_instance.calibrator is not None:\n",
|
|
" print(f'Calibrator object: {pipeline_instance.calibrator}')\n",
|
|
" print(f'Optimal threshold: {getattr(pipeline_instance, \"optimal_threshold\", \"Not set\")}')\n",
|
|
" print(f'Calibrated Val Probs exist: {hasattr(pipeline_instance.calibrator, \"calibrated_val_probabilities\")}')\n",
|
|
" print(f'Calibrated Test Probs exist: {hasattr(pipeline_instance.calibrator, \"calibrated_test_probabilities\")}')\n",
|
|
"\n",
|
|
" else:\n",
|
|
" print('calibrator attribute is None.')\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" print(f'An error occurred during Calibrate Predictions step: {e}')\n",
|
|
" logging.error('Calibrate Predictions step failed.', exc_info=True)\n",
|
|
"else:\n",
|
|
" print('Pipeline not instantiated or GRU validation predictions missing. Cannot run step.')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 9. Step 6: Prepare SAC Agent for Backtest\n",
|
|
"\n",
|
|
"Call `train_or_load_sac`. This step might involve triggering offline SAC training (if configured) or simply identifying and setting the path to the pre-trained SAC agent policy to be used in the backtest."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Note: Actual SAC training might be complex to run directly inline.\n",
|
|
"# This step often just prepares the necessary info (like the agent path) for the backtester.\n",
|
|
"if pipeline_instance:\n",
|
|
" try:\n",
|
|
" print('\\n=== Running Step 6: Train or Load SAC (Prepare for Backtest) ===')\n",
|
|
" # This might just set an attribute like sac_agent_path based on config\n",
|
|
" pipeline_instance.train_or_load_sac()\n",
|
|
" print('train_or_load_sac() finished.')\n",
|
|
"\n",
|
|
" print('\\n--- Inspecting SAC Agent Info ---')\n",
|
|
" # Check the attribute storing the path or relevant SAC info\n",
|
|
" print(f'SAC Agent Path for backtest: {getattr(pipeline_instance, \"sac_agent_path\", \"Not set\")}')\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" print(f'An error occurred during Train/Load SAC step: {e}')\n",
|
|
" logging.error('Train/Load SAC step failed.', exc_info=True)\n",
|
|
"else:\n",
|
|
" print('Pipeline not instantiated. Cannot run step.')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 10. Step 7: Run Backtest\n",
|
|
"\n",
|
|
"Execute the trading simulation using the test data, GRU predictions (calibrated), and the loaded SAC agent policy."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Check if necessary components are ready\n",
|
|
"backtest_ready = (\n",
|
|
" pipeline_instance and\n",
|
|
" pipeline_instance.test_sequences is not None and\n",
|
|
" pipeline_instance.test_targets is not None and\n",
|
|
" pipeline_instance.test_indices is not None and\n",
|
|
" pipeline_instance.gru_handler is not None and\n",
|
|
" pipeline_instance.calibrator is not None and # Ensure calibration ran\n",
|
|
" getattr(pipeline_instance, \"optimal_threshold\", None) is not None and\n",
|
|
" getattr(pipeline_instance, \"sac_agent_path\", None) is not None\n",
|
|
")\n",
|
|
"\n",
|
|
"if backtest_ready:\n",
|
|
" try:\n",
|
|
" print('\\n=== Running Step 7: Run Backtest ===')\n",
|
|
" pipeline_instance.run_backtest()\n",
|
|
" print('run_backtest() finished.')\n",
|
|
"\n",
|
|
" print('\\n--- Inspecting Backtest Results ---')\n",
|
|
" if pipeline_instance.backtest_metrics:\n",
|
|
" print('\\n--- Backtest Metrics --- ')\n",
|
|
" metrics = pipeline_instance.backtest_metrics\n",
|
|
" metrics['Run ID'] = pipeline_instance.run_id # Add run ID for context\n",
|
|
" for key, value in metrics.items():\n",
|
|
" if key == \"Confusion Matrix (GRU Signal vs Actual Dir)\":\n",
|
|
" print(f'{key}:\\\\n{np.array(value)}')\n",
|
|
" elif key == \"Classification Report (GRU Signal)\":\n",
|
|
" print(f'{key}:\\\\n{value}')\n",
|
|
" elif isinstance(value, float):\n",
|
|
" print(f'{key}: {value:.4f}')\n",
|
|
" else:\n",
|
|
" print(f'{key}: {value}')\n",
|
|
" else:\n",
|
|
" print('Backtest metrics not available.')\n",
|
|
"\n",
|
|
" if pipeline_instance.backtest_results_df is not None:\n",
|
|
" print('\\n--- Backtest Results DataFrame (Head) --- ')\n",
|
|
" display(pipeline_instance.backtest_results_df.head())\n",
|
|
" print('\\n--- Backtest Results DataFrame (Tail) --- ')\n",
|
|
" display(pipeline_instance.backtest_results_df.tail())\n",
|
|
" print('\\n--- Backtest Results DataFrame (Description) --- ')\n",
|
|
" display(pipeline_instance.backtest_results_df.describe())\n",
|
|
" else:\n",
|
|
" print('Backtest results DataFrame not available.')\n",
|
|
"\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" print(f'An error occurred during Run Backtest step: {e}')\n",
|
|
" logging.error('Run Backtest step failed.', exc_info=True)\n",
|
|
"else:\n",
|
|
" print('Pipeline not instantiated or prerequisites for backtest are missing. Cannot run step.')\n",
|
|
" print(f\"Prerequisites check: pipeline={bool(pipeline_instance)}, test_sequences={pipeline_instance.test_sequences is not None if pipeline_instance else False}, \"\n",
|
|
" f\"test_targets={pipeline_instance.test_targets is not None if pipeline_instance else False}, test_indices={pipeline_instance.test_indices is not None if pipeline_instance else False}, \"\n",
|
|
" f\"gru_handler={pipeline_instance.gru_handler is not None if pipeline_instance else False}, calibrator={pipeline_instance.calibrator is not None if pipeline_instance else False}, \"\n",
|
|
" f\"optimal_T={getattr(pipeline_instance, 'optimal_threshold', None) is not None}, sac_path={getattr(pipeline_instance, 'sac_agent_path', None) is not None}\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 11. Step 8: Save Results\n",
|
|
"\n",
|
|
"Save the calculated metrics, the detailed backtest results DataFrame, and any generated plots to the run-specific output directory."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"if pipeline_instance and pipeline_instance.backtest_results_df is not None and pipeline_instance.backtest_metrics:\n",
|
|
" try:\n",
|
|
" print('\\n=== Running Step 8: Save Results ===')\n",
|
|
" pipeline_instance.save_results()\n",
|
|
" print('save_results() finished.')\n",
|
|
" print(f'Results should be saved in: {pipeline_instance.dirs[\"results\"]}')\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" print(f'An error occurred during Save Results step: {e}')\n",
|
|
" logging.error('Save Results step failed.', exc_info=True)\n",
|
|
"else:\n",
|
|
" print('Pipeline not instantiated or backtest results/metrics missing. Cannot run step.')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 12. Display Saved Plots\n",
|
|
"\n",
|
|
"Load and display the plots generated and saved during the pipeline execution (especially during calibration and backtesting/saving)."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# This code assumes plots were generated and saved by previous steps (like calibrate or save_results)\n",
|
|
"if pipeline_instance is not None and pipeline_instance.dirs.get('results'):\n",
|
|
" results_dir = pipeline_instance.dirs['results']\n",
|
|
" run_id = pipeline_instance.run_id\n",
|
|
" print(f'\\nLooking for plots in: {results_dir}\\n')\n",
|
|
"\n",
|
|
" plot_files = [\n",
|
|
" f'backtest_summary_{run_id}.png',\n",
|
|
" f'confusion_matrix_{run_id}.png',\n",
|
|
" f'reliability_curve_val_{run_id}.png', # Generated by calibration\n",
|
|
" f'calibration_curve_test_{run_id}.png' # Potentially generated by backtester/save_results\n",
|
|
" # Add any other plot filenames generated by your pipeline\n",
|
|
" ]\n",
|
|
"\n",
|
|
" plot_found = False\n",
|
|
" for plot_file in plot_files:\n",
|
|
" plot_path = os.path.join(results_dir, plot_file)\n",
|
|
" if os.path.exists(plot_path):\n",
|
|
" plot_found = True\n",
|
|
" print(f'--- Displaying: {plot_file} ---')\n",
|
|
" try:\n",
|
|
" img = mpimg.imread(plot_path)\n",
|
|
" # Determine appropriate figure size based on plot type\n",
|
|
" figsize = (15, 12) if 'summary' in plot_file else (8, 7)\n",
|
|
" plt.figure(figsize=figsize)\n",
|
|
" plt.imshow(img)\n",
|
|
" plt.axis('off') # Hide axes for image display\n",
|
|
" plt.title(plot_file)\n",
|
|
" plt.show()\n",
|
|
" except Exception as e:\n",
|
|
" print(f' Error loading/displaying plot {plot_file}: {e}')\n",
|
|
" else:\n",
|
|
" print(f'Plot not found: {plot_path}')\n",
|
|
"\n",
|
|
" if not plot_found:\n",
|
|
" print(\"No standard plots found in the results directory.\")\n",
|
|
"\n",
|
|
"else:\n",
|
|
" print('\\nPipeline object not found or results directory is not available. Cannot display plots.')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 13. Conclusion\n",
|
|
"\n",
|
|
"This notebook demonstrated the step-by-step workflow of using the `TradingPipeline`. By running each step individually, we could inspect the intermediate outputs. You can modify the `config.yaml` file to experiment with different parameters, data ranges, and control flags, then re-run the relevant steps of this notebook. The final results (metrics, plots, detailed CSV) are saved in the run-specific directory under the main project's `results/` folder."
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|