diff --git a/gru_sac_predictor/src/trading_pipeline.py b/gru_sac_predictor/src/trading_pipeline.py index 9bfd264b..f7815d1b 100644 --- a/gru_sac_predictor/src/trading_pipeline.py +++ b/gru_sac_predictor/src/trading_pipeline.py @@ -20,6 +20,8 @@ import matplotlib.pyplot as plt import seaborn as sns import torch # Added for SAC weight aggregation from collections import OrderedDict # Added for SAC weight aggregation +import time +import shutil # Determine the project root directory based on the script location # This assumes the script is in src/ and the project root is two levels up @@ -71,9 +73,9 @@ import scipy.stats as st # --- Import edge_filtered_accuracy (Task 6.1/6.2) --- # try: # Ensure both metrics are imported - from .metrics import edge_filtered_accuracy, calculate_brier_score, _calculate_optimal_edge_threshold + from gru_sac_predictor.src.metrics import edge_filtered_accuracy, calculate_brier_score, _calculate_optimal_edge_threshold except ImportError: - logging.error("Failed to import metrics from .metrics. Validation check will fail.") + logging.error("Failed to import metrics from gru_sac_predictor.src.metrics. Validation check will fail.") # Define placeholders def edge_filtered_accuracy(*args, **kwargs): return np.nan, 0 def calculate_brier_score(*args, **kwargs): return np.nan @@ -81,185 +83,85 @@ except ImportError: # --- End Import --- # # --- End imports for baseline --- # +# --- Import Stage Functions --- # +from gru_sac_predictor.src.pipeline_stages.data_processing import ( + load_and_preprocess, + engineer_features_for_fold, + define_labels_and_align_fold, + split_data_fold +) +from gru_sac_predictor.src.pipeline_stages.feature_processing import ( + scale_features_fold, + select_features_fold, + prune_features_fold # Added import +) +from gru_sac_predictor.src.pipeline_stages.sequence_creation import ( + create_sequences_fold # Added import +) +from gru_sac_predictor.src.pipeline_stages.evaluation import run_baseline_checks_fold # Added baseline check import +from gru_sac_predictor.src.pipeline_stages.evaluation import run_gru_validation_checks_fold # Added validation check import +from gru_sac_predictor.src.pipeline_stages.evaluation import run_backtest_fold # Import the new backtest function +from gru_sac_predictor.src.pipeline_stages.modelling import train_or_load_gru_fold +from gru_sac_predictor.src.pipeline_stages.modelling import calibrate_probabilities_fold +from gru_sac_predictor.src.pipeline_stages.modelling import train_or_load_sac_fold +from gru_sac_predictor.src.pipeline_stages.modelling import aggregate_sac_agents + logger = logging.getLogger(__name__) # Use module-level logger # --- Refactored Label Generation Logic --- # -def _generate_direction_labels(df: pd.DataFrame, config: dict) -> tuple[pd.DataFrame, str]: - """ - Calculates forward returns and generates binary, soft binary, or ternary direction labels. - - Args: - df (pd.DataFrame): DataFrame containing at least a 'close' column and DatetimeIndex. - config (dict): Pipeline configuration dictionary, expecting keys under 'gru' and 'data'. - - Returns: - tuple[pd.DataFrame, str]: - - DataFrame with added forward return and direction label columns. - - Name of the generated direction label column. - """ - if 'close' not in df.columns: - raise ValueError("'close' column missing in input DataFrame for label generation.") - - gru_cfg = config.get('gru', {}) - data_cfg = config.get('data', {}) - horizon = gru_cfg.get('prediction_horizon', 5) - use_ternary = gru_cfg.get('use_ternary', False) - - target_ret_col = f'fwd_log_ret_{horizon}' - - # --- Calculate Forward Log Return --- # - shifted_close = df['close'].shift(-horizon) - fwd_returns = np.log(shifted_close / df['close']) - df[target_ret_col] = fwd_returns - - # --- Generate Direction Label (Binary/Soft or Ternary) --- # - if use_ternary: - k = gru_cfg.get('flat_sigma_multiplier', 0.25) - target_dir_col = f'direction_label3_{horizon}' - logging.info(f"Generating ternary labels ({target_dir_col}) with k={k}...") - - sigma_n = fwd_returns.rolling(window=horizon, min_periods=max(1, horizon//2)).std() - eps = k * sigma_n - - conditions = [fwd_returns > eps, fwd_returns < -eps] - choices = [2, 0] # 2=up, 0=down - ordinal_labels = np.select(conditions, choices, default=1).astype(int) # 1=flat - - # --- Log Distribution & Check Balance --- # - # Temporarily add ordinal labels for check, handle NaNs from rolling sigma - df['_ordinal_label_temp'] = ordinal_labels - valid_mask_for_dist = ~np.isnan(eps) & ~np.isnan(fwd_returns) - ordinal_labels_valid = df.loc[valid_mask_for_dist, '_ordinal_label_temp'] - - if not ordinal_labels_valid.empty: - counts = np.bincount(ordinal_labels_valid, minlength=3) - total_valid = len(ordinal_labels_valid) - dist_pct = counts / total_valid * 100 - log_msg = (f"Label dist (n={total_valid}): " - f"Down(0)={dist_pct[0]:.1f}%, Flat(1)={dist_pct[1]:.1f}%, Up(2)={dist_pct[2]:.1f}%") - logging.info(log_msg) - - min_pct_threshold = 10.0 # As per implementation - if any(p < min_pct_threshold for p in dist_pct): - error_msg = f"Label imbalance detected! Min class percentage is {np.min(dist_pct):.1f}% (Threshold: {min_pct_threshold}%). Check data or flat_sigma_multiplier (k={k})." - logging.error(error_msg) - # Consider raising or exiting - currently only logs/prints - print(f"ERROR: {error_msg}") - else: - logging.warning("Could not calculate label distribution (no valid sigma or returns).") - # --- End Distribution Check --- # - - # --- One-hot encode --- # - try: - # Use the valid mask determined earlier - y_cat_full = np.full((len(df), 3), np.nan, dtype=np.float32) - if ordinal_labels_valid.empty: - logging.warning("No valid ordinal labels to one-hot encode.") - else: - y_cat_valid = to_categorical(ordinal_labels_valid, num_classes=3) - y_cat_full[valid_mask_for_dist] = y_cat_valid.astype(np.float32) - - # Assign the list of arrays (or NaNs) - df[target_dir_col] = list(y_cat_full) - - except Exception as e: - logging.error(f"Error during one-hot encoding: {e}", exc_info=True) - raise # Re-raise exception to halt pipeline if encoding fails - finally: - # Clean up temporary column regardless of success/failure - if '_ordinal_label_temp' in df.columns: - df.drop(columns=['_ordinal_label_temp'], inplace=True) - # --- End One-hot Encoding --- # - - else: # Binary / Soft Binary - target_dir_col = f'direction_label_{horizon}' - label_smoothing = data_cfg.get('label_smoothing', 0.0) - if not (0.0 <= label_smoothing < 1.0): - logging.warning(f"Invalid label_smoothing value ({label_smoothing}). Must be in [0.0, 1.0). Disabling smoothing.") - label_smoothing = 0.0 - - if label_smoothing > 0.0: - high_label = 1.0 - label_smoothing / 2.0 - low_label = label_smoothing / 2.0 - logging.info(f"Applying label smoothing: {label_smoothing:.2f} -> labels [{low_label:.2f}, {high_label:.2f}] for {target_dir_col}") - df[target_dir_col] = np.where(fwd_returns > 0, high_label, low_label).astype(np.float32) - else: - logging.info(f"Using hard binary labels (0.0 / 1.0) for {target_dir_col}") - df[target_dir_col] = (fwd_returns > 0).astype(np.float32) - - # --- Drop Rows with NaN Targets --- # - initial_rows = len(df) - - # Create mask for NaNs in the direction column (handle scalar or list/array NaNs) - if use_ternary: - # Check if elements are lists AND all values inside are NaN - nan_mask_dir = df[target_dir_col].apply(lambda x: isinstance(x, list) and np.all(np.isnan(x))) - else: - # Standard check for scalar NaN - nan_mask_dir = df[target_dir_col].isna() - - # Combine with NaN check for forward returns - nan_mask_combined = df[target_ret_col].isna() | nan_mask_dir - - df_clean = df[~nan_mask_combined].copy() # Use .copy() to avoid SettingWithCopyWarning later - - final_rows = len(df_clean) - if final_rows < initial_rows: - logging.info(f"Dropped {initial_rows - final_rows} rows due to NaN targets (horizon={horizon}).") - - if df_clean.empty: - logging.error("DataFrame is empty after defining labels and dropping NaNs. Exiting.") - # Returning empty DataFrame, caller should handle exit - return pd.DataFrame(), target_dir_col - - return df_clean, target_dir_col +# [Function _generate_direction_labels removed - Moved to data_processing.py] # --- End Refactored Label Generation --- # class TradingPipeline: """Orchestrates the entire trading strategy pipeline.""" - def __init__(self, config_path: str, cli_args: argparse.Namespace = None, io_manager: Optional[Any] = None): + def __init__(self, config: dict, io_manager: Optional[Any] = None): """ Initialize the pipeline with configuration, optional CLI args, and IOManager. Args: - config_path (str): Path to the configuration file. - cli_args (argparse.Namespace, optional): Parsed command-line arguments. Defaults to None. + config (dict): The loaded configuration dictionary. io_manager (IOManager, optional): Initialized IOManager instance. Defaults to None. """ - self.config_path = config_path - self.config = self._load_config() + # Store the passed config dictionary directly + self.config = config # Run ID and Git SHA should be generated *before* logger/io setup in run.py - # If pipeline is instantiated directly, generate them here. - # TODO: Consider passing run_id and git_sha directly from run.py? + # and passed via the IOManager. if io_manager is None: - # Attempt to generate run_id if not provided via IOManager - try: - from .utils.run_id import make_run_id, get_git_sha - self.run_id = make_run_id() - self.git_sha = get_git_sha(short=False) or "unknown" - except ImportError: - # Fallback if run outside standard structure - self.run_id = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S_fallback") - self.git_sha = "unknown" - logger_to_use = logging # Use root logger if no io/logger setup provided + # IOManager is considered essential for proper operation. + # Raise an error or handle appropriately if not provided. + # For now, log critical error and exit, assuming IOManager is required. + # TODO: Decide final handling if IOManager *can* be optional. + logging.critical("IOManager not provided during TradingPipeline initialization. Cannot proceed.") + raise ValueError("IOManager instance is required for TradingPipeline.") + # Fallback removed - rely on IOManager + # self.run_id = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S_fallback") + # self.git_sha = "unknown" + # logger_to_use = logging # Use root logger if no io/logger setup provided else: + # --- Retrieve run_id and git_sha FROM io_manager --- + if not hasattr(io_manager, 'run_id') or not io_manager.run_id: + raise ValueError("IOManager instance provided, but does not have a 'run_id' attribute set.") self.run_id = io_manager.run_id - # TODO: Pass git_sha via io_manager or constructor? - # For now, re-fetch it or get from src.__init__? - try: - from . import GIT_SHA - self.git_sha = GIT_SHA - except ImportError: - self.git_sha = "unknown" # Fallback - logger_to_use = logging.getLogger() # Assume logger was set up - - self.io = io_manager or self._setup_io_manual() # Use provided or setup manually - self.pipeline_version = "3.0.0" # Placeholder version - # --- Handle CLI Overrides --- # - # ... (rest of existing override logic) ... + # Assume git_sha is also an attribute of the initialized IOManager + if not hasattr(io_manager, 'git_sha') or not io_manager.git_sha: + # logging.warning("IOManager instance does not have 'git_sha' attribute. Using 'unknown'.") + # self.git_sha = "unknown" # Or raise error if mandatory + # For now, let's assume it's mandatory for traceability + raise ValueError("IOManager instance provided, but does not have a 'git_sha' attribute set.") + self.git_sha = io_manager.git_sha + # --- End Retrieval --- + + logger_to_use = logging.getLogger() # Assume logger was set up by IOManager/run.py + + # self.io is now guaranteed to be an IOManager instance if we proceed past checks + self.io = io_manager + # Store git_sha on self as well, retrieved via IOManager + # Ensure git_sha exists on io_manager (already checked above, but good practice) + self.git_sha = getattr(io_manager, 'git_sha', 'unknown_in_pipeline') + self.pipeline_version = "3.0.0" # Placeholder version # --- Directory Setup (Now handled by IOManager if provided) --- # if self.io: @@ -280,16 +182,34 @@ class TradingPipeline: self._setup_logging_manual() # Fallback logging # --- End Directory Setup --- # + # --- Feature Whitelist Modification --- # + # Add 'bar_imputed' to the minimal whitelist if it's not already there + if 'bar_imputed' not in minimal_whitelist: + minimal_whitelist.append('bar_imputed') + logger.info("Added 'bar_imputed' to minimal_whitelist.") + # --- End Whitelist Modification --- # + # Log Banner (Moved to run.py which has version info) # logger_to_use.info(...) # --- Initialize Components --- # - self.data_loader = DataLoader(self.config) - self.feature_engineer = FeatureEngineer(self.config) - self.calibrator = Calibrator(self.config) # Initialize Calibrator + # Extract db_directory from config before passing to DataLoader + data_cfg = self.config.get('data', {}) + db_directory_path = data_cfg.get('db_dir', 'data/db') # Use 'db_dir' key, provide a default + if not db_directory_path or not isinstance(db_directory_path, str): + # Adjust error message to reflect the correct key 'db_dir' + logger.error(f"Invalid or missing 'db_dir' in config['data']. Found: {db_directory_path}. Using default 'data/db'.") + db_directory_path = 'data/db' # Fallback + + self.data_loader = DataLoader(db_dir=db_directory_path) + self.feature_engineer = FeatureEngineer(self.config) + # Extract edge threshold for Calibrator initialization + calibration_cfg = self.config.get('calibration', {}) + initial_edge_threshold = calibration_cfg.get('edge_threshold', 0.1) # Get edge from config + self.calibrator = Calibrator(edge_threshold=initial_edge_threshold) # --- Vector Calibrator (Task 4) --- # if VECTOR_CALIBRATOR_AVAILABLE: - self.vector_calibrator = VectorCalibrator(config=self.config) + self.vector_calibrator = VectorCalibrator() # Initialize without config else: self.vector_calibrator = None # --- End Vector Calibrator --- # @@ -353,6 +273,16 @@ class TradingPipeline: self.use_ternary = self.config.get('gru', {}).get('use_ternary', False) # Cache ternary flag self.aggregated_metrics: Optional[dict] = None # Aggregated metrics across folds self.optimized_edge_threshold: Optional[float] = None # Store optimized edge threshold per fold + # --- Add attributes for baseline filtering --- # + self.fwd_returns_aligned: Optional[pd.Series] = None + self.eps_aligned: Optional[pd.Series] = None + # Attributes for split returns/eps needed by baseline check + self.fwd_ret_train: Optional[pd.Series] = None + self.eps_train: Optional[pd.Series] = None + self.fwd_ret_val: Optional[pd.Series] = None + self.eps_val: Optional[pd.Series] = None + self.y_dir_val_ordinal: Optional[pd.Series] = None # <<< ADDED + # --- End Add --- # # --- End Initialize state variables --- # # Save config handled by run.py via IOManager typically @@ -404,1882 +334,186 @@ class TradingPipeline: # Remove original _setup_directories, _setup_logging, _save_run_config # Remove _generate_run_id (now done externally or via fallback) - def _load_config(self) -> dict: - """Loads the YAML configuration file.""" - try: - # Try loading relative to the script first (if running from src) - if not os.path.isabs(self.config_path): - potential_path = os.path.join(script_dir, self.config_path) - if not os.path.exists(potential_path): - # If not found relative to script, try relative to project root - potential_path = os.path.join(project_root, self.config_path) - if not os.path.exists(potential_path): - # If still not found, try relative to CWD as last resort - potential_path = os.path.abspath(self.config_path) - - if os.path.exists(potential_path): - self.config_path = potential_path - else: - # Try one level up from project root (common structure) - potential_path = os.path.join(os.path.dirname(project_root), 'gru_sac_predictor', 'config.yaml') - if os.path.exists(potential_path): - self.config_path = potential_path - else: - raise FileNotFoundError(f"Config file not found at relative paths, CWD, or common location: {self.config_path}") - - # --- ADDED DEBUGGING --- - logging.info(f"Attempting to load config from resolved path: {self.config_path}") - # --- END DEBUGGING --- - - with open(self.config_path, 'r') as f: - config = yaml.safe_load(f) - - # --- ADDED DEBUGGING --- - if isinstance(config, dict): - logging.info(f"Successfully loaded YAML. Top-level keys found: {list(config.keys())}") - else: - logging.warning(f"YAML loaded, but result is not a dictionary. Type: {type(config)}. Content snippet: {str(config)[:200]}") - # --- END DEBUGGING --- - - # Basic validation - if 'data' not in config or 'gru' not in config or 'sac' not in config: - raise ValueError("Config file missing essential sections: data, gru, sac") - # Validate calibration config if present - if 'calibration' in config and 'edge_threshold' not in config['calibration']: - logging.warning("'edge_threshold' not found in calibration config, using default 0.55") - config['calibration']['edge_threshold'] = 0.55 # Add default if missing - elif 'calibration' not in config: - logging.warning("'calibration' section not found in config, using default edge_threshold 0.55") - config['calibration'] = {'edge_threshold': 0.55} # Add default section - - return config - except FileNotFoundError: - print(f"ERROR: Configuration file not found at '{self.config_path}'") - sys.exit(1) - except yaml.YAMLError as e: - print(f"ERROR: Error parsing configuration file '{self.config_path}': {e}") - sys.exit(1) - except Exception as e: - print(f"ERROR: An unexpected error occurred while loading config: {e}") - sys.exit(1) - # --- Internal Pipeline Steps --- def load_and_preprocess_data(self): - """Loads and preprocesses data using DataLoader.""" - logging.info("--- Stage: Loading and Preprocessing Data ---") - # Error handling for data_loader - if self.data_loader is None: - logging.error("DataLoader not initialized. Cannot load data.") - sys.exit(1) - - # Load data and summary - self.df_raw, self.load_summary = self.data_loader.load_data() + """Loads and preprocesses data by calling the stage function.""" + logger.info("--- Calling Stage: Loading and Preprocessing Data ---") + df_raw, load_summary = load_and_preprocess( + data_loader=self.data_loader, + io=self.io, + run_id=self.run_id, + config=self.config + ) + self.df_raw = df_raw + self.load_summary = load_summary + if self.df_raw is None: + logger.error("Data loading stage failed. Exiting pipeline.") + sys.exit(1) # Exit if loading failed - if self.df_raw is None or self.df_raw.empty: - logging.error("Data loading failed or returned empty DataFrame. Exiting.") - sys.exit(1) - - # Calculate memory usage and log info - mem_usage = self.df_raw.memory_usage(deep=True).sum() / (1024**2) - if self.load_summary: - logging.info(f"Data loading summary: {self.load_summary}") - else: - logging.warning("No load summary returned by DataLoader.") - logging.info(f"Loaded data: {self.df_raw.shape[0]} rows, {self.df_raw.shape[1]} columns. Memory: {mem_usage:.2f} MB") - logging.info(f"Time range: {self.df_raw.index.min()} to {self.df_raw.index.max()}") - - # --- V3 Output Contract: Stage 1 Artifacts --- - if self.io: - if self.load_summary: - # Add context to summary before saving - save_summary = self.load_summary.copy() # Don't modify original - save_summary['run_id'] = self.run_id - save_summary['timestamp_utc'] = datetime.now(timezone.utc).isoformat() - try: - self.io.save_json( - save_summary, - "preprocess_summary", - section='results', - use_txt=True # Save as .txt as requested - ) - logging.info("Saved preprocessing summary to results//preprocess_summary.txt") - except Exception as e: - logging.error(f"Failed to save preprocessing summary using IOManager: {e}") - else: - logging.warning("Load summary dictionary is None, cannot save preprocess_summary.txt") - - if self.df_raw is not None and not self.df_raw.empty: - try: - self.io.save_df( - self.df_raw.head(20), - "head_preprocessed", - section='results' - ) - logging.info("Saved head of preprocessed data to results//head_preprocessed.{csv/parquet}") - except Exception as e: - logging.error(f"Failed to save head of preprocessed data using IOManager: {e}") - else: - logging.warning("Raw dataframe (df_raw) is None or empty, cannot save head_preprocessed.") - - else: - logging.warning("IOManager not available, skipping saving of Stage 1 artifacts (preprocess_summary, head_preprocessed).") - # --- End V3 Output Contract --- - - # --- V3 Output Contract: Stage 2 Artifact (Label Histogram) --- - if self.io and self.config.get('control', {}).get('generate_plots', True): - logging.info("Generating training label distribution histogram...") - try: - # Get the target directory column name (handle ternary/binary) - horizon = self.config['gru'].get('prediction_horizon', 5) - target_dir_col = f'direction_label3_{horizon}' if self.use_ternary else f'direction_label_{horizon}' - - if target_dir_col not in self.y_train.columns: - logging.error(f"Target column '{target_dir_col}' not found in y_train. Cannot generate label histogram.") - elif self.y_train.empty: - logging.warning("y_train is empty. Skipping label histogram.") - else: - # Prepare data for plotting - if self.use_ternary: - # Convert one-hot back to ordinal for counting - labels_ordinal = np.argmax(np.stack(self.y_train[target_dir_col].values), axis=1) - label_counts = pd.Series(labels_ordinal).value_counts().sort_index() - class_names = ['Down (0)', 'Flat (1)', 'Up (2)'] - # Ensure all classes are present, even if count is 0 - label_counts = label_counts.reindex([0, 1, 2], fill_value=0) - title_suffix = f" (ε multiplier k={self.config.get('gru', {}).get('flat_sigma_multiplier', 'N/A')})" - else: # Binary - labels_ordinal = self.y_train[target_dir_col] - label_counts = labels_ordinal.value_counts().sort_index() - # Map 0/1 or smoothed values to names - # Simple approach: Count values close to 0 as Down, close to 1 as Up - down_count = (labels_ordinal < 0.5).sum() - up_count = (labels_ordinal >= 0.5).sum() - label_counts = pd.Series([down_count, up_count], index=[0, 1]) - class_names = ['Down (0)', 'Up (1)'] - title_suffix = "" - - # Get figure settings - fig_dpi = self.config.get('output', {}).get('figure_dpi', 150) - fig_size = self.config.get('output', {}).get('figure_size', [16, 9]) - footer_text = "© GRU-SAC v3" - - plt.style.use('seaborn-v0_8-darkgrid') - fig, ax = plt.subplots(figsize=fig_size) - - bars = ax.bar(class_names, label_counts.values, color=sns.color_palette('viridis', len(class_names))) - - # Add percentages on bars - total_samples = label_counts.sum() - if total_samples > 0: - for bar in bars: - height = bar.get_height() - percentage = f'{(height / total_samples) * 100:.1f}%' - ax.annotate(percentage, - xy=(bar.get_x() + bar.get_width() / 2, height), - xytext=(0, 3), # 3 points vertical offset - textcoords="offset points", - ha='center', va='bottom', fontsize=10) - - ax.set_ylabel('Count', fontsize=12) - ax.set_title(f'Training Set Label Distribution{title_suffix}', fontsize=16) - ax.tick_params(axis='x', rotation=0, labelsize=10) - ax.tick_params(axis='y', labelsize=10) - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - - # Add footer - plt.figtext(0.99, 0.01, footer_text, horizontalalignment='right', - verticalalignment='bottom', fontsize=8, color='gray') - - plt.tight_layout(rect=[0, 0.03, 1, 0.95]) - - # Save figure using IOManager - self.io.save_figure(fig, "label_histogram", section='results') - logging.info("Training label histogram saved.") - plt.close(fig) - - except Exception as e: - logging.error(f"Failed to generate or save training label histogram: {e}", exc_info=True) - elif not self.io: - logging.warning("IOManager not available, skipping training label histogram.") - # --- End V3 Output Contract --- - - def engineer_features(self, df: pd.DataFrame) -> pd.DataFrame: - """Adds features using FeatureEngineer.""" - logging.info("--- Stage: Engineering Features --- ") - if df is None or df.empty: - logging.error("Input DataFrame is empty. Cannot engineer features.") - # Return empty DataFrame to indicate failure - return pd.DataFrame() - - # Add base features (cyclical, imbalance, TA) - # Ensure FeatureEngineer.add_base_features accepts df - df_engineered = self.feature_engineer.add_base_features(df.copy()) - - # Drop rows with NaNs potentially introduced by feature engineering - initial_rows = len(df_engineered) - df_engineered.dropna(inplace=True) - if len(df_engineered) < initial_rows: - logging.warning(f"Dropped {initial_rows - len(df_engineered)} rows with NaN values after feature engineering.") + def engineer_features(self, df_input: pd.DataFrame) -> pd.DataFrame: + """Adds features for a fold by calling the stage function.""" + logger.info("--- Calling Stage: Engineering Features --- ") + # Call the refactored function + # --- Added missing io and config arguments --- # + df_engineered = engineer_features_for_fold( + df=df_input, + feature_engineer=self.feature_engineer, + io=self.io, # Pass the IOManager instance + config=self.config, # Pass the pipeline config + target_col=self.target_columns[0] if hasattr(self, 'target_columns') and self.target_columns else None # Pass target for sorting corr plot + ) + # --- End argument addition --- # + # Return the result (no need to store on self.df_engineered_full here, + # the execute method will handle passing it to the next stage) if df_engineered.empty: - logging.error("DataFrame is empty after feature engineering and NaN removal.") - # Return empty DataFrame - return pd.DataFrame() + logger.error("Feature engineering stage failed or resulted in empty DataFrame.") + # Depending on context (fold vs full), might need sys.exit or return empty + # For now, return the empty df, caller should handle. - logging.info(f"Feature engineering complete for this fold. Shape: {df_engineered.shape}") return df_engineered def define_labels_and_align(self, df_engineered: pd.DataFrame) -> Tuple[pd.DataFrame, str, List[str]]: - """Defines prediction labels (returns, direction) and aligns with features for a given DataFrame.""" - logging.info("--- Stage: Defining Labels and Aligning for Fold --- ") - if df_engineered is None or df_engineered.empty: - logging.error("Engineered data (DataFrame) is empty. Cannot define labels.") - # Return empty tuple to signal failure - return pd.DataFrame(), "", [] - - # --- Call the refactored label generation function --- # - try: - # Pass the df_engineered directly - df_labeled_aligned, target_dir_col = _generate_direction_labels( - df_engineered.copy(), # Pass a copy - self.config - ) - except Exception as e: - logging.error(f"Label generation failed for fold: {e}.", exc_info=True) - # Return empty tuple - return pd.DataFrame(), "", [] + """Defines prediction labels and aligns features for a fold by calling the stage function.""" + logger.info("--- Calling Stage: Defining Labels and Aligning --- ") + # Call the refactored function from data_processing module + # --- Updated call to capture fwd_returns and eps --- # + df_labeled_aligned, target_dir_col, target_cols, fwd_returns_aligned, eps_aligned = define_labels_and_align_fold( + df_engineered=df_engineered, # Pass the engineered data for the fold + config=self.config + ) + # --- End Updated call --- # + # Check for failure if df_labeled_aligned.empty: - logging.error("Label generation resulted in an empty DataFrame for fold.") - # Return empty tuple + logger.error("Label definition and alignment stage failed.") + # Return empty tuple to signal failure upstream + # Note: The stage function now returns empty series/None on failure too return pd.DataFrame(), "", [] - # --- End Label Generation Call --- # - - # Separate features (X) and targets (y) - horizon = self.config['gru'].get('prediction_horizon', 5) - target_ret_col = f'fwd_log_ret_{horizon}' - target_cols = [target_ret_col, target_dir_col] - - # Ensure the columns actually exist - if not all(col in df_labeled_aligned.columns for col in target_cols): - logging.error(f"Generated label/return columns ({target_cols}) not found in DataFrame after label generation for fold.") - # Return empty tuple - return pd.DataFrame(), "", [] - - # We don't need to store X_raw_aligned, y_aligned etc. on self here, - # as the split_data method will operate on df_labeled_aligned - # We just need to return the result and the target column names. - logging.info(f"Labels defined and aligned for fold. Shape: {df_labeled_aligned.shape}") - # Return the labeled/aligned df, target dir col, and all target cols + # Store the target column names on self for use in subsequent steps (like split_data) + self.target_dir_col = target_dir_col + self.target_columns = target_cols + # --- Store aligned returns and eps on self --- # + self.fwd_returns_aligned = fwd_returns_aligned + self.eps_aligned = eps_aligned + # --- End Store --- # + + # Return the main results to the caller (execute method) return df_labeled_aligned, target_dir_col, target_cols - # --- Remove plot generation from here, move to end-of-run if needed --- # - # Heatmap generation doesn't make sense per-fold usually. + def split_data(self, df_labeled_aligned_fold: pd.DataFrame, fold_dates: Optional[Tuple] = None): + """Splits data for a fold by calling the stage function.""" + logger.info(f"--- Calling Stage: Splitting Data for Fold {self.current_fold} --- ") - def split_data(self): - """Splits features and targets into train, validation, and test sets chronologically.""" - logging.info("--- Stage: Splitting Data ---") - if self.X_raw_aligned is None or self.y_aligned is None: - logging.error("Aligned features/targets not available for splitting.") - sys.exit(1) - if not isinstance(self.X_raw_aligned.index, pd.DatetimeIndex): - logging.error("Feature index must be DatetimeIndex for chronological split. Aborting.") - sys.exit(1) - - split_cfg = self.config['split_ratios'] - train_ratio = split_cfg['train'] - val_ratio = split_cfg['validation'] - test_ratio = round(1.0 - train_ratio - val_ratio, 2) - logger.info(f"Using split ratios: Train={train_ratio:.2f}, Val={val_ratio:.2f}, Test={test_ratio:.2f}") - - total_len = len(self.X_raw_aligned) - train_end_idx = int(total_len * train_ratio) - val_end_idx = int(total_len * (train_ratio + val_ratio)) - - # Split features - self.X_train_raw = self.X_raw_aligned.iloc[:train_end_idx] - self.X_val_raw = self.X_raw_aligned.iloc[train_end_idx:val_end_idx] - self.X_test_raw = self.X_raw_aligned.iloc[val_end_idx:] - - # Split targets - self.y_train = self.y_aligned.iloc[:train_end_idx] - self.y_val = self.y_aligned.iloc[train_end_idx:val_end_idx] - self.y_test = self.y_aligned.iloc[val_end_idx:] - - # Split original engineered dataframe to keep original columns for backtesting/plotting - self.df_train_original = self.df_engineered_full.iloc[:train_end_idx] - self.df_val_original = self.df_engineered_full.iloc[train_end_idx:val_end_idx] - self.df_test_original = self.df_engineered_full.iloc[val_end_idx:] - - # Keep separate handle to direction target for training feature selector - self.y_dir_train = self.y_dir_aligned.iloc[:train_end_idx] - - logging.info(f"Data split complete:") - logging.info(f" Train: X={self.X_train_raw.shape}, y={self.y_train.shape} ({self.X_train_raw.index.min()} to {self.X_train_raw.index.max()})") - logging.info(f" Val: X={self.X_val_raw.shape}, y={self.y_val.shape} ({self.X_val_raw.index.min()} to {self.X_val_raw.index.max()})") - logging.info(f" Test: X={self.X_test_raw.shape}, y={self.y_test.shape} ({self.X_test_raw.index.min()} to {self.X_test_raw.index.max()})") - - if len(self.X_train_raw) == 0 or len(self.X_val_raw) == 0 or len(self.X_test_raw) == 0: - logging.error("One or more data splits are empty. Check data length and split ratios. Aborting.") - sys.exit(1) - - def select_and_prune_features(self): - """Performs feature selection (e.g., VIF, L1) on RAW/ENGINEERED data - and then prunes the SCALED data splits based on the selection.""" - logging.info("--- Stage: Selecting Features (on Raw/Engineered Data) and Pruning Scaled Data ---") - - # --- MODIFIED: Input for SELECTION is now X_*_raw --- # - if self.X_train_raw is None or self.y_dir_train is None: - logging.error("Raw training data (X_train_raw, y_dir_train) not available for feature selection.") - sys.exit(1) - # --- End Modification --- # - - # Perform feature selection using the RAW/ENGINEERED training set - # FeatureEngineer.select_features handles imputation if needed - # Note: VIF should ideally run on unscaled data. - logging.info("Selecting features based on raw/engineered training data...") - self.final_whitelist = self.feature_engineer.select_features( - self.X_train_raw, # Use raw/engineered data for selection - self.y_dir_train, + # Call the stage function, passing necessary state stored on self + # Note: This function raises SystemExit on failure, so no need for explicit return check here + # --- Updated call to capture split fwd_ret and eps --- # + ( + X_train_raw, X_val_raw, X_test_raw, + y_train, y_val, y_test, + df_train_original, df_val_original, df_test_original, + y_dir_train_ordinal, + fwd_ret_train, fwd_ret_val, + eps_train, eps_val, + y_dir_val_ordinal # <<< ADDED + ) = split_data_fold( + df_labeled_aligned=df_labeled_aligned_fold, + fwd_returns_aligned=self.fwd_returns_aligned, # Pass the aligned series + eps_aligned=self.eps_aligned, # Pass the aligned series + config=self.config, + target_columns=self.target_columns, + target_dir_col=self.target_dir_col, + fold_dates=fold_dates, + current_fold=self.current_fold ) - - # --- Save the final whitelist using IOManager (V3 Output Contract) --- # - if self.io: - try: - # Note: IOManager save_json doesn't directly support indent, saves minified - self.io.save_json( - self.final_whitelist, - f'final_whitelist', # Name for IOManager path construction - section='models', # Save under models// - # suffix=f'_{self.run_id}.json' # Suffix is auto-added by IOManager if needed - ) - logging.info(f"Saved final feature whitelist ({len(self.final_whitelist)} features) via IOManager to models/{self.run_id}/final_whitelist.json") - except Exception as e: - logging.error(f"Failed to save final feature whitelist using IOManager: {e}", exc_info=True) - else: - logging.warning("IOManager not available, attempting manual save of final_whitelist.json") - # Fallback to original manual save if IOManager is not present - whitelist_save_path = os.path.join(self.current_run_models_dir, f'final_whitelist_{self.run_id}.json') - try: - with open(whitelist_save_path, 'w') as f: - json.dump(self.final_whitelist, f, indent=4) - logging.info(f"Saved final feature whitelist ({len(self.final_whitelist)} features) manually to {whitelist_save_path}") - except Exception as e: - logging.error(f"Manual save of final feature whitelist failed: {e}", exc_info=True) - # --- End Save Update --- # - - # --- MODIFIED: Prune the SCALED data splits using the determined whitelist --- # - if self.X_train_scaled is None or self.X_val_scaled is None or self.X_test_scaled is None: - logging.error("Scaled data splits not available for pruning.") - sys.exit(1) - - logging.info(f"Pruning SCALED feature sets using final whitelist ({len(self.final_whitelist)} features): {self.final_whitelist}") - self.X_train_pruned = self.feature_engineer.prune_features(self.X_train_scaled, self.final_whitelist) - self.X_val_pruned = self.feature_engineer.prune_features(self.X_val_scaled, self.final_whitelist) - self.X_test_pruned = self.feature_engineer.prune_features(self.X_test_scaled, self.final_whitelist) - # --- End Modification --- # - - logging.info(f"Feature shapes after pruning scaled data: Train={self.X_train_pruned.shape}, Val={self.X_val_pruned.shape}, Test={self.X_test_pruned.shape}") - - # Verification and empty checks remain the same, using X_*_pruned - if not (self.X_train_pruned.columns.equals(self.X_val_pruned.columns) and - self.X_train_pruned.columns.equals(self.X_test_pruned.columns)): - logging.error("Column mismatch between pruned data splits. Check pruning logic.") - sys.exit(1) - - if self.X_train_pruned.empty or self.X_val_pruned.empty or self.X_test_pruned.empty: - logging.error("One or more feature splits are empty after pruning. Exiting.") - sys.exit(1) - - def scale_features(self): - """Scales features using StandardScaler fitted on the training set.""" - logging.info("--- Stage: Scaling Features ---") - # --- Input remains X_*_raw --- # - if self.X_train_raw is None or self.X_val_raw is None or self.X_test_raw is None: - logging.error("Raw feature sets (X_train_raw, etc.) not available for scaling.") - sys.exit(1) - # --- End Input --- # - - # Scaler saving path remains the same - # scaler_path = os.path.join(self.current_run_models_dir, f'feature_scaler_{self.run_id}.joblib') - - # Ensure we only scale numeric columns from the RAW training data - numeric_cols = self.X_train_raw.select_dtypes(include=np.number).columns - if len(numeric_cols) < self.X_train_raw.shape[1]: - non_numeric_cols = self.X_train_raw.select_dtypes(exclude=np.number).columns - logging.warning(f"Non-numeric columns detected in raw features: {non_numeric_cols.tolist()}. These will not be scaled.") - - if not numeric_cols.empty: - # Check if scaler was loaded previously (when loading GRU - this logic needs adjustment) - # If loading GRU, the scaler should have been loaded *before* this step in execute() - if self.scaler is None: - # This path is taken when train_gru=True OR if loading GRU failed to load scaler (which now errors earlier) - logging.info("Fitting StandardScaler on raw training data (numeric columns only)...") - self.scaler = StandardScaler() - self.scaler.fit(self.X_train_raw[numeric_cols]) - - # Save the fitted scaler - scaler_save_path = os.path.join(self.current_run_models_dir, f'feature_scaler_{self.run_id}.joblib') - try: - joblib.dump(self.scaler, scaler_save_path) - logging.info(f"Feature scaler saved to {scaler_save_path}") - except Exception as e: - logging.error(f"Failed to save feature scaler: {e}") - else: - # This path is taken if a scaler was successfully loaded when loading a GRU model - logging.info("Using pre-loaded scaler for feature scaling.") - - # Apply scaling to all splits (numeric columns only) - # Create copies to store scaled data - self.X_train_scaled = self.X_train_raw.copy() - self.X_val_scaled = self.X_val_raw.copy() - self.X_test_scaled = self.X_test_raw.copy() - - self.X_train_scaled[numeric_cols] = self.scaler.transform(self.X_train_raw[numeric_cols]) - self.X_val_scaled[numeric_cols] = self.scaler.transform(self.X_val_raw[numeric_cols]) - self.X_test_scaled[numeric_cols] = self.scaler.transform(self.X_test_raw[numeric_cols]) - logging.info("Features scaled successfully.") - else: - logging.warning("No numeric columns found to scale. Skipping scaling step.") - # If no numeric columns, the scaled data is the same as the raw data - self.X_train_scaled = self.X_train_raw - self.X_val_scaled = self.X_val_raw - self.X_test_scaled = self.X_test_raw - - # --- Remove assignment to X_*_pruned --- # - # Scaled data is now stored in X_*_scaled, pruning happens next. - - def run_baseline_checks(self): - """Runs baseline Logistic Regression check on selected, scaled validation data.""" - logging.info("--- Stage: Baseline Checks (Logistic Regression) ---") - - # Skip if ternary - if self.use_ternary: - logging.warning("Using ternary labels. Skipping binary Logistic Regression baseline check.") - return - - # --- MODIFIED: Input is now X_*_pruned (which is selected AND scaled) --- # - if self.X_train_pruned is None or self.y_train is None or \ - self.X_val_pruned is None or self.y_val is None: - logging.error("Pruned/Scaled features or targets not available for baseline check. Skipping.") - return - # --- End Modification --- # - - horizon = self.config['gru'].get('prediction_horizon', 5) - # Get the correct binary direction label column name - target_dir_col = f'direction_label_{horizon}' - - if target_dir_col not in self.y_train.columns or target_dir_col not in self.y_val.columns: - logging.error(f"Target direction column '{target_dir_col}' not found in y_train/y_val. Skipping baseline.") - return - - y_train_dir = self.y_train[target_dir_col] - y_val_dir = self.y_val[target_dir_col] - - # --- Use BaselineChecker --- # - try: - # Run the baseline check using the checker - baseline_report = self.baseline_checker.run_logistic_baseline( - X_train_pruned=self.X_train_pruned, - y_train_dir=y_train_dir, - X_val_pruned=self.X_val_pruned, - y_val_dir=y_val_dir - ) - - # --- Save Baseline Report (V3 Output Contract) --- # - if self.io: - try: - self.io.save_json( - baseline_report, - "baseline1_report", # As per revisions.txt - section='results', - use_txt=True # Save as .txt - ) - logging.info("Saved baseline1_report.txt") - except Exception as e: - logging.error(f"Failed to save baseline1_report using IOManager: {e}") - else: - logging.warning("IOManager not available, skipping saving of baseline1_report.txt") - # --- End Save --- # - - # --- Success Criteria Check (V3) --- # - ci_lower_bound = baseline_report.get("ci_lower_bound") - required_ci_lb = 0.52 # From revisions.txt - - if ci_lower_bound is None or np.isnan(ci_lower_bound): - logging.error("Baseline check FAILED: Could not determine CI lower bound. Aborting.") - print(f"\n{'*'*80}\nBASELINE CHECK FAILED: CI lower bound is NaN.\nAborting pipeline.\n{'*'*80}\n") - sys.exit("Baseline CI lower bound calculation failed.") - elif ci_lower_bound < required_ci_lb: - error_msg = f"BASELINE CHECK FAILED: Logistic Regression 95% CI lower bound ({ci_lower_bound:.3f}) is below {required_ci_lb} threshold." - logging.error(error_msg) - print(f"\n{'*'*80}\n{error_msg}\nConsider revising features or data.\nAborting pipeline.\n{'*'*80}\n") - sys.exit(f"Baseline edge too low (< {required_ci_lb} CI lower). Aborting pipeline.") - else: - success_msg = f"Baseline check passed! Logistic hit-rate 95%-CI lower bound: {ci_lower_bound:.3f} (>= {required_ci_lb})" - logging.info(success_msg) - print(f"\n{'='*80}\n{success_msg}\nProceeding with pipeline.\n{'='*80}\n") - # --- End Success Criteria Check --- # - - except Exception as e: - logging.error(f"An error occurred during baseline checks: {e}", exc_info=True) - # Decide if this should halt the pipeline - # For now, log the error and continue, but the CI check might have failed earlier - - # --- Original baseline logic removed --- # - - def create_sequences(self): - """Creates sequences for GRU input using selected, scaled features.""" - logging.info("--- Stage: Creating Sequences ---") - # --- MODIFIED: Input is now X_*_pruned (which is selected AND scaled) --- # - if self.X_train_pruned is None or self.y_train is None or \ - self.X_val_pruned is None or self.y_val is None or \ - self.X_test_pruned is None or self.y_test is None: - logging.error("Selected/Scaled features or targets not available for sequence creation.") - sys.exit(1) - # --- End Modification --- # - - lookback = self.config['gru'].get('lookback', 60) - horizon = self.config['gru'].get('prediction_horizon', 5) - target_ret_col = f'fwd_log_ret_{horizon}' - target_dir_col = f'direction_label3_{horizon}' if self.use_ternary else f'direction_label_{horizon}' - - logging.info(f"Creating sequences with lookback={lookback}") - - # Helper function remains the same, but gets X_*_pruned as input - def _create_sequences_helper(features_pruned_df, targets_df, lookback, ret_col, dir_col): - # Convert DataFrames to numpy arrays for efficiency - features_np = features_pruned_df.values # Input is already pruned+scaled - # ... (rest of helper remains the same) ... - y_ret_np = targets_df[ret_col].values - if targets_df[dir_col].dtype == 'object': - y_dir_np = np.stack(targets_df[dir_col].values) - else: - y_dir_np = targets_df[dir_col].values - - X, y_ret_seq, y_dir_seq = [], [], [] - target_indices = [] - for i in range(lookback, len(features_np)): - X.append(features_np[i-lookback : i]) - y_ret_seq.append(y_ret_np[i]) - y_dir_seq.append(y_dir_np[i]) - target_indices.append(targets_df.index[i]) - if not X: return None, None, None, None - X_np = np.array(X) - y_ret_seq_np = np.array(y_ret_seq) - y_dir_seq_np = np.array(y_dir_seq) - target_indices_pd = pd.Index(target_indices) - return X_np, y_ret_seq_np, y_dir_seq_np, target_indices_pd - - # Create sequences using X_*_pruned (which are now the final scaled+selected features) - self.X_train_seq, y_ret_train_seq, y_dir_train_seq, self.train_indices = _create_sequences_helper( - self.X_train_pruned, self.y_train, lookback, target_ret_col, target_dir_col - ) - self.X_val_seq, y_ret_val_seq, y_dir_val_seq, self.val_indices = _create_sequences_helper( - self.X_val_pruned, self.y_val, lookback, target_ret_col, target_dir_col - ) - self.X_test_seq, y_ret_test_seq, y_dir_test_seq, self.test_indices = _create_sequences_helper( - self.X_test_pruned, self.y_test, lookback, target_ret_col, target_dir_col - ) - - # Checks and target dict creation remain the same - # ... (rest of function) ... - if self.X_train_seq is None or self.X_val_seq is None: - logger.error(f"Sequence creation resulted in empty train or val arrays. Check lookback ({lookback}) vs split sizes. Aborting.") - sys.exit(1) - logging.info(f"Sequence shapes created:") - logging.info(f" Train: X={self.X_train_seq.shape}, y_ret={y_ret_train_seq.shape}, y_dir={y_dir_train_seq.shape}") - logging.info(f" Val: X={self.X_val_seq.shape}, y_ret={y_ret_val_seq.shape}, y_dir={y_dir_val_seq.shape}") - logging.info(f" Test: X={self.X_test_seq.shape if self.X_test_seq is not None else 'None'}, ...") # Shortened log - dir_key = "dir3" if self.use_ternary else "dir" - self.y_train_seq_dict = {"ret": y_ret_train_seq, "gauss_params": y_ret_train_seq, dir_key: y_dir_train_seq} - self.y_val_seq_dict = {"ret": y_ret_val_seq, "gauss_params": y_ret_val_seq, dir_key: y_dir_val_seq} - if y_ret_test_seq is not None and y_dir_test_seq is not None: - self.y_test_seq_dict = {"ret": y_ret_test_seq, "gauss_params": y_ret_test_seq, dir_key: y_dir_test_seq} - else: - self.y_test_seq_dict = None - logging.warning("Test sequences or targets could not be created. Backtesting might fail.") - - def train_or_load_gru(self): - """Trains a new GRU model or loads a pre-trained one using GRUModelHandler.""" - logging.info("--- Stage: Training or Loading GRU Model ---") - gru_cfg = self.config['gru'] - train_gru_flag = self.config['control'].get('train_gru', False) - - if train_gru_flag: - logging.info(f"Attempting to train a new GRU model for run {self.run_id}...") - if self.X_train_seq is None or self.y_train_seq_dict is None or \ - self.X_val_seq is None or self.y_val_seq_dict is None: - logging.error("Sequence data (train/val) not available for GRU training. Exiting.") - sys.exit(1) - - # Check if hyperparameter sweep is enabled - sweep_enabled = self.config.get('hyperparameter_tuning', {}).get('gru', {}).get('sweep_enabled', False) - - if sweep_enabled: - logging.info("Hyperparameter sweep enabled. Running Optuna optimization.") - - try: - # Import the GRUHyperTuner - from gru_sac_predictor.src.gru_hyper_tuner import GRUHyperTuner - - # Create fold directory for tuning results - fold_tuning_dir = os.path.join(self.current_run_models_dir, "hypertuning") - os.makedirs(fold_tuning_dir, exist_ok=True) - - # Initialize hyperparameter tuner - tuner = GRUHyperTuner(self.config, fold_tuning_dir) - - # Run optimization - best_params = tuner.optimize( - X_train=self.X_train_seq, - y_train_dict=self.y_train_seq_dict, - X_val=self.X_val_seq, - y_val_dict=self.y_val_seq_dict - ) - - # Train final model with best parameters - model_handler, history = tuner.train_with_best_params( - X_train=self.X_train_seq, - y_train_dict=self.y_train_seq_dict, - X_val=self.X_val_seq, - y_val_dict=self.y_val_seq_dict - ) - - if model_handler is not None and model_handler.model is not None: - self.gru_model = model_handler.model - self.gru_handler = model_handler # Replace handler with tuned one - self.gru_model_run_id_loaded_from = self.run_id - logging.info("Successfully trained GRU model with optimized hyperparameters.") - - # Save best parameters to the main run directory - best_params_path = os.path.join(self.current_run_models_dir, f'best_gru_params_{self.run_id}.json') - with open(best_params_path, 'w') as f: - json.dump(best_params, f, indent=4) - logging.info(f"Saved best hyperparameters to {best_params_path}") - else: - logging.error("Failed to train GRU model with optimized hyperparameters. Falling back to default parameters.") - # Fall back to default training - sweep_enabled = False - - except ImportError: - logging.error("Failed to import GRUHyperTuner. Make sure Optuna is installed. Falling back to default parameters.") - sweep_enabled = False - except Exception as e: - logging.error(f"Hyperparameter optimization failed: {str(e)}. Falling back to default parameters.") - sweep_enabled = False - - # If sweep is not enabled or failed, train with default parameters - if not sweep_enabled: - # Get parameters from config - lookback = gru_cfg.get('lookback', 60) - # Get feature count from scaled data (use shape[2] for sequences) - n_features = self.X_train_seq.shape[2] - epochs = gru_cfg.get('epochs', 25) - batch_size = gru_cfg.get('batch_size', 128) - patience = gru_cfg.get('patience', 5) # Use gru patience from config - - # Train the model - self.gru_model, history = self.gru_handler.train( - X_train=self.X_train_seq, - y_train_dict=self.y_train_seq_dict, - X_val=self.X_val_seq, - y_val_dict=self.y_val_seq_dict, - lookback=lookback, - n_features=n_features, - max_epochs=epochs, - batch_size=batch_size, - patience=patience - ) - - if self.gru_model is None: - logging.error("GRU model training failed. Exiting.") - sys.exit(1) - else: - # Save the newly trained model - saved_path = self.gru_handler.save() # Uses run_id from handler - if saved_path: - logging.info(f"Newly trained GRU model saved to {saved_path}") - else: - logging.warning("Failed to save the newly trained GRU model.") - # Set the loaded ID to the current run ID - self.gru_model_run_id_loaded_from = self.run_id - logging.info(f"Using GRU model trained in current run: {self.run_id}") - - # --- V3 Output Contract: Plot Learning Curve --- # - if self.io and history is not None and self.config.get('control', {}).get('generate_plots', True): - # Infer log dir path based on current models dir - log_dir = os.path.dirname(self.current_run_models_dir).replace('/models', '/logs') - csv_log_path = os.path.join(log_dir, 'gru_history.csv') - if os.path.exists(csv_log_path): - logging.info(f"Plotting learning curve from {csv_log_path}...") - try: - history_df = pd.read_csv(csv_log_path) - - # Determine metric keys (handle v2 vs v3 differences if necessary) - loss_key = 'loss' - val_loss_key = 'val_loss' - acc_key = None - val_acc_key = None - if 'dir3_accuracy' in history_df.columns: # V3 specific? - acc_key = 'dir3_accuracy' - val_acc_key = 'val_dir3_accuracy' - elif 'accuracy' in history_df.columns: # V2 or other? - acc_key = 'accuracy' - val_acc_key = 'val_accuracy' - - if acc_key is None: - logging.warning("Could not find a suitable accuracy metric in history CSV for plotting.") - n_panes = 1 # Only plot loss - else: - n_panes = 2 # Plot loss and accuracy - - # Get figure settings - fig_dpi = self.config.get('output', {}).get('figure_dpi', 150) - fig_size = self.config.get('output', {}).get('figure_size', [16, 9]) - footer_text = "© GRU-SAC v3" - - plt.style.use('seaborn-v0_8-darkgrid') - # Adjust figsize height based on panes - adjusted_fig_height = fig_size[1] * (n_panes / 3.0) # Rough scaling - fig, axes = plt.subplots(n_panes, 1, figsize=(fig_size[0], adjusted_fig_height), sharex=True) - - if n_panes == 1: - ax_loss = axes # Single axis - else: - ax_loss, ax_acc = axes # Multiple axes - - epochs = history_df['epoch'] + 1 # epochs are 0-indexed in csv - - # Pane 1: Loss (Log Scale) - ax_loss.plot(epochs, history_df[loss_key], label='Training Loss') - ax_loss.plot(epochs, history_df[val_loss_key], label='Validation Loss') - ax_loss.set_yscale('log') - ax_loss.set_ylabel('Loss (Log Scale)') - ax_loss.legend() - ax_loss.set_title('GRU Model Training Progress', fontsize=16) - ax_loss.grid(True, which="both", ls="--", linewidth=0.5) - - # Pane 2: Accuracy (if available) - if n_panes == 2: - ax_acc.plot(epochs, history_df[acc_key], label=f'Training {acc_key}') - ax_acc.plot(epochs, history_df[val_acc_key], label=f'Validation {val_acc_key}') - ax_acc.set_ylabel('Accuracy') - ax_acc.set_xlabel('Epoch') - ax_acc.legend() - ax_acc.grid(True, which="both", ls="--", linewidth=0.5) - else: - # If only loss pane, set xlabel there - ax_loss.set_xlabel('Epoch') - - # Add vertical line for early stopping epoch if available - if hasattr(history, 'epoch') and len(history.epoch) > 0: - # Early stopping epoch is the number of epochs run - early_stop_epoch = len(history.epoch) - if early_stop_epoch < max_epochs: # Only draw if early stopping occurred - for ax in fig.axes: - ax.axvline(x=early_stop_epoch, color='r', linestyle='--', linewidth=1, label=f'Early Stop @ {early_stop_epoch}') - # Add legend entry to the last plot - fig.axes[-1].legend() - - # Add footer - plt.figtext(0.99, 0.01, footer_text, horizontalalignment='right', - verticalalignment='bottom', fontsize=8, color='gray') - - plt.tight_layout(rect=[0, 0.03, 1, 0.97]) # Adjust layout - - # Save figure using IOManager - self.io.save_figure(fig, "gru_learning_curve", section='results') - logging.info("GRU learning curve plot saved.") - plt.close(fig) - - except FileNotFoundError: - logging.warning(f"GRU history file not found at {csv_log_path}. Cannot plot learning curve.") - except Exception as e: - logging.error(f"Failed to plot GRU learning curve: {e}", exc_info=True) - else: - logging.warning(f"GRU history file not found at {csv_log_path}. Cannot plot learning curve.") - elif not self.io: - logging.warning("IOManager not available, skipping GRU learning curve plot.") - # --- End Plot Learning Curve --- # - - else: # Load pre-trained GRU model - load_run_id = gru_cfg.get('model_load_run_id', None) - if not load_run_id: - logging.error("train_gru is False, but no gru.model_load_run_id specified in config. Exiting.") - sys.exit(1) - - logging.info(f"Attempting to load pre-trained GRU model from run ID: {load_run_id}") - # Construct the expected path using the base models directory - model_filename = f'gru_model_{load_run_id}.keras' # Assuming .keras extension - model_path = os.path.join(self.base_models_dir_path, f'run_{load_run_id}', model_filename) - - # Load the model using the handler - self.gru_model = self.gru_handler.load(model_path) - - if self.gru_model is None: - logging.error(f"Failed to load GRU model from path: {model_path}. Exiting.") - sys.exit(1) - else: - self.gru_model_run_id_loaded_from = load_run_id - logging.info(f"Successfully loaded GRU model from run: {load_run_id}") - - # --- Try loading associated scaler --- # - scaler_filename = f'feature_scaler_{load_run_id}.joblib' - # Adjust path: load from the specific run ID's model folder - scaler_load_path = os.path.join(self.base_models_dir_path, load_run_id, scaler_filename) - logging.info(f"Attempting to load associated scaler from: {scaler_load_path}") - if os.path.exists(scaler_load_path): - try: - self.scaler = joblib.load(scaler_load_path) - logging.info("Associated feature scaler loaded successfully.") - # --- Re-apply scaling using the loaded scaler --- # - numeric_cols = self.X_train_pruned.select_dtypes(include=np.number).columns - if not numeric_cols.empty: - # Important: Scale the _pruned data before sequence creation if scaler is loaded - logging.info("Re-scaling features using loaded scaler...") - self.X_train_scaled = self.X_train_pruned.copy() - self.X_val_scaled = self.X_val_pruned.copy() - self.X_test_scaled = self.X_test_pruned.copy() - self.X_train_scaled[numeric_cols] = self.scaler.transform(self.X_train_pruned[numeric_cols]) - self.X_val_scaled[numeric_cols] = self.scaler.transform(self.X_val_pruned[numeric_cols]) - self.X_test_scaled[numeric_cols] = self.scaler.transform(self.X_test_pruned[numeric_cols]) - logging.info("Features re-scaled successfully.") - # Need to recreate sequences after re-scaling! - self.create_sequences() - else: - logging.warning("Loaded scaler, but no numeric columns found in pruned data to re-scale.") - except Exception as e: - logging.error(f"Failed to load or apply associated scaler: {e}. Scaling might be inconsistent. Exiting.") - raise RuntimeError(f"Failed to load or apply scaler '{scaler_load_path}'") from e # Step 1-C: Raise error - else: - # --- Raise error if scaler missing (Step 1-C) --- # - logging.error(f"Associated feature scaler not found at {scaler_load_path} for run {load_run_id}. Cannot proceed. Exiting.") - raise RuntimeError(f"Feature scaler '{scaler_filename}' not found for run {load_run_id} at {scaler_load_path}") - # --- End Scaler Loading/Applying --- # - - # Final check: Ensure a GRU model is loaded/trained - if self.gru_model is None: - logging.error("No GRU model is available after train/load step. Exiting.") - sys.exit(1) - - def calibrate_probabilities(self): - """Calibrates GRU output probabilities and runs validation checks for the current fold.""" - logger.info(f"--- Fold {self.current_fold}: Stage: Calibrating Probabilities & GRU Validation ---") - - # Ensure GRU model and fold's validation data are available - if self.gru_model is None or self.X_val_seq is None or self.y_val_seq_dict is None: - logger.error(f"Fold {self.current_fold}: GRU model or validation sequence data not available. Skipping calibration.") - # Store None for calibrated probs to prevent downstream errors - self.p_cal_val = None - self.optimal_T = None - self.vector_cal_params = None - return # Skip the rest of the method - - p_cal_val_to_check = None - y_dir_val_to_check = None - calibration_method = self.config.get('calibration', {}).get('method', 'temperature') - is_ternary_check = self.use_ternary # Use cached flag - dir_key = "dir3" if is_ternary_check else "dir" - - # Local variables to store intermediate results needed for validation check - local_p_raw_val = None - local_y_dir_val_temp = None - local_dir3_logits_val = None - local_y_dir3_val_onehot = None - - # --- Define Fold-Specific Paths --- # - # Use IOManager if available, otherwise fallback to manual path construction - fold_models_dir = self.fold_dirs.get('models') - if not fold_models_dir: - logger.warning(f"Fold {self.current_fold}: Fold-specific models directory not found. Saving calibration params to main run dir.") - fold_models_dir = self.current_run_models_dir # Fallback - - # --- Vector Scaling --- # - if calibration_method == 'vector' and is_ternary_check: - if not self.vector_calibrator: - logger.error(f"Fold {self.current_fold}: VectorCalibrator not available. Cannot perform vector calibration.") - return - try: - local_dir3_logits_val = self.gru_handler.predict_logits(self.X_val_seq) - if local_dir3_logits_val is None: raise ValueError("Failed to get logits") - local_y_dir3_val_onehot = self.y_val_seq_dict.get(dir_key) - if local_y_dir3_val_onehot is None: raise ValueError("Missing 'dir3' in y_val_seq_dict") - - # --- Fit on current fold's validation data --- # - logger.info(f"Fold {self.current_fold}: Fitting Vector Scaling parameters...") - self.vector_calibrator.fit(local_dir3_logits_val, local_y_dir3_val_onehot) - self.vector_cal_params = self.vector_calibrator.optimal_params # Store params on self - - if self.vector_cal_params: - p_cal_val_to_check = self.vector_calibrator.calibrate(local_dir3_logits_val) - y_dir_val_to_check = local_y_dir3_val_onehot - # Save params for current fold - params_save_filename = f'calibration_vector_fold_{self.current_fold}.npy' - params_save_path = os.path.join(fold_models_dir, params_save_filename) - self.vector_calibrator.save_params(params_save_path) - logger.info(f"Fold {self.current_fold}: Saved vector calibration params to {params_save_path}") - else: - logger.warning(f"Fold {self.current_fold}: Vector calibration parameters not found after fitting. Cannot perform validation.") - self.vector_cal_params = None # Ensure it's None - except Exception as e: - logger.error(f"Fold {self.current_fold}: Error during vector calibration: {e}", exc_info=True) - self.vector_cal_params = None - - # --- Temperature Scaling --- # - elif calibration_method == 'temperature' and not is_ternary_check: - try: - predictions_val = self.gru_handler.predict(self.X_val_seq) - if predictions_val is None or len(predictions_val) < 3: raise ValueError("Failed to get predictions") - local_p_raw_val = predictions_val[2].flatten() # Assuming 3rd output is binary prob - local_y_dir_val_temp = self.y_val_seq_dict.get(dir_key) - if local_y_dir_val_temp is None: raise ValueError("Missing 'dir' in y_val_seq_dict") - if len(local_p_raw_val) != len(local_y_dir_val_temp): raise ValueError("Length mismatch") - - # --- Fit on current fold's validation data --- # - logger.info(f"Fold {self.current_fold}: Fitting Temperature Scaling parameter...") - self.optimal_T = self.calibrator.optimise_temperature(local_p_raw_val, local_y_dir_val_temp) - self.calibrator.optimal_T = self.optimal_T # Update calibrator instance - - if self.optimal_T is not None: - p_cal_val_to_check = self.calibrator.calibrate(local_p_raw_val) - y_dir_val_to_check = local_y_dir_val_temp - # Save temp for current fold - temp_save_filename = f'calibration_temp_fold_{self.current_fold}.npy' - temp_save_path = os.path.join(fold_models_dir, temp_save_filename) - np.save(temp_save_path, self.optimal_T) - logger.info(f"Fold {self.current_fold}: Saved optimal temperature T={self.optimal_T:.4f} to {temp_save_path}") - else: - logger.warning(f"Fold {self.current_fold}: Optimal temperature not found after fitting. Cannot perform validation.") - self.optimal_T = None # Ensure it's None - except Exception as e: - logger.error(f"Fold {self.current_fold}: Error during temperature calibration: {e}", exc_info=True) - self.optimal_T = None - else: # Covers cases where calibration method is wrong or mismatch with ternary state - logger.warning(f"Fold {self.current_fold}: Calibration method '{calibration_method}' or ternary state mismatch ({is_ternary_check}). Skipping GRU validation checks.") - self.optimal_T = None - self.vector_cal_params = None - - # --- Optimize Edge Threshold (after potential calibration) --- # - optimize_edge = self.config.get('calibration', {}).get('optimize_edge_threshold', False) - edge_thr_config = self.config.get('calibration', {}).get('edge_threshold', 0.1) # Default/fallback - self.optimized_edge_threshold = None # Reset for the fold - - if optimize_edge: - logger.info(f"Optimizing edge threshold using Youden's J on validation predictions...") - try: - # Prepare y_true for optimization (needs binary 0/1) - if y_dir_val_to_check is None: - raise ValueError("Cannot optimize edge threshold without valid y_dir_val.") - y_true_for_opt = None - p_cal_for_opt = None - if is_ternary_check: - if p_cal_val_to_check is not None: - # Convert ternary to binary: P(up) vs others - p_cal_for_opt = p_cal_val_to_check[:, -1] # P(up) - y_true_for_opt = (np.argmax(y_dir_val_to_check, axis=1) == 2).astype(int) - else: - raise ValueError("Cannot optimize ternary edge threshold without valid calibrated probabilities.") - else: - # Binary case - if p_cal_val_to_check is not None: - p_cal_for_opt = p_cal_val_to_check - y_true_for_opt = (np.asarray(y_dir_val_to_check) > 0.5).astype(int) - else: - raise ValueError("Cannot optimize binary edge threshold without valid calibrated probabilities.") - - # Perform optimization using the dedicated function from metrics - # Note: This assumes Calibrator.optimize_edge_threshold was removed or is not used here - # Ensure _calculate_optimal_edge_threshold is imported - self.optimized_edge_threshold = _calculate_optimal_edge_threshold(y_true_for_opt, p_cal_for_opt) - - if self.optimized_edge_threshold is not None: - logger.info(f"Optimized edge threshold: {self.optimized_edge_threshold:.4f}") - # Save optimized threshold - thresh_file = f"optimized_edge_threshold_fold_{self.current_fold}.txt" - try: - # Ensure fold_results_dir is defined (should be available from context) - # Assuming fold_results_dir is defined in the outer scope - if self.io and 'fold_results_dir' in locals() and fold_results_dir: - self.io.save_json({'optimized_edge_threshold': self.optimized_edge_threshold}, - thresh_file.replace('.txt',''), # Use filename as key for io - base_dir=fold_results_dir, use_txt=True) - logger.info(f"Saved optimized edge threshold to {os.path.join(fold_results_dir, thresh_file)}") - elif self.io: - # Fallback: Save to the main results directory if fold_results_dir isn't available - # Construct the path for logging clarity - fallback_path = os.path.join(self.io.get_section_path('results'), thresh_file) - self.io.save_json({'optimized_edge_threshold': self.optimized_edge_threshold}, - thresh_file.replace('.txt',''), - section='results', use_txt=True) # Fallback save - logger.info(f"Saved optimized edge threshold to main results dir: {fallback_path}") - else: - logger.warning("IOManager not available, cannot save optimized threshold.") - except NameError: # Specifically catch if fold_results_dir is not defined - logger.warning("fold_results_dir not defined. Attempting fallback save to main results dir.") - if self.io: - fallback_path = os.path.join(self.io.get_section_path('results'), thresh_file) - self.io.save_json({'optimized_edge_threshold': self.optimized_edge_threshold}, - thresh_file.replace('.txt',''), - section='results', use_txt=True) - logger.info(f"Saved optimized edge threshold to main results dir: {fallback_path}") - else: - logger.warning("IOManager not available, cannot save optimized threshold.") - except Exception as e: - logger.error(f"Failed to save optimized edge threshold: {e}") - else: - logger.warning("Edge threshold optimization failed or was skipped. Using config default.") - self.optimized_edge_threshold = edge_thr_config # Fallback - - except Exception as e: - logger.error(f"Error during edge threshold optimization: {e}", exc_info=True) - self.optimized_edge_threshold = edge_thr_config # Fallback - else: - # If optimization is disabled, store the config threshold for consistent use - self.optimized_edge_threshold = edge_thr_config - logger.info(f"Using edge threshold from config: {self.optimized_edge_threshold}") - # --- End Optimize Edge Threshold --- # - - # --- Perform GRU Validation Checks using the threshold stored in self.optimized_edge_threshold --- # - if p_cal_val_to_check is not None and y_dir_val_to_check is not None: - self._perform_gru_validation_checks( - p_cal_val=p_cal_val_to_check, - y_dir_val=y_dir_val_to_check, - is_ternary=is_ternary_check - ) - # Note: _perform_gru_validation_checks was already modified to use self.optimized_edge_threshold - else: - logger.warning("Could not perform GRU validation checks due to missing calibrated predictions or labels.") - - # --- Helper for GRU Validation Checks (Replaces edge check) --- # - def _perform_gru_validation_checks(self, p_cal_val, y_dir_val, is_ternary): - """ - Performs GRU validation checks: Edge-Filtered Accuracy and Brier Score. - Logs results and raises SystemExit if checks fail. - - Args: - p_cal_val: Calibrated probabilities on validation set. - For binary: (N,) shape, P(up). - For ternary: (N, 3) shape, [P(down), P(flat), P(up)]. - y_dir_val: True direction labels for validation set. - For binary: (N,) shape, 0/1 (potentially soft). - For ternary: (N, 3) shape, one-hot encoded. - is_ternary (bool): Flag indicating if ternary classification is used. - """ - logger.info(f"--- Fold {self.current_fold}: Performing GRU Validation Checks --- ") - - # --- Define thresholds (Consider moving to config) --- # - validation_criteria = self.config.get('validation_gates', {}).get('gru', {}) - edge_check_thr = validation_criteria.get('edge_filtered_acc_ci_lower_threshold', 0.55) - brier_check_thr = validation_criteria.get('brier_score_threshold', 0.19) - min_edge_samples = validation_criteria.get('edge_filtered_min_samples', 30) - # --- End Thresholds --- # - - # --- Determine Edge Threshold --- # - calib_config = self.config.get('calibration', {}) - optimize_edge = calib_config.get('optimize_edge_threshold', False) - edge_thr_config = calib_config.get('edge_threshold', 0.1) # Default/fallback - self.fold_edge_threshold = edge_thr_config # Initialize with config value - - if optimize_edge and not is_ternary: - logger.info(f"Fold {self.current_fold}: Optimizing edge threshold using Youden's J...") - # Use binary y_dir_val (potentially hard labels) and p_cal_val - y_true_for_opt = (y_dir_val > 0.5).astype(int) if not np.all((y_dir_val == 0) | (y_dir_val == 1)) else y_dir_val.astype(int) - self.fold_edge_threshold = _calculate_optimal_edge_threshold(y_true_for_opt, p_cal_val) - logger.info(f"Fold {self.current_fold}: Using optimized edge threshold: {self.fold_edge_threshold:.4f}") - # Save the optimized threshold? Optional - could save to fold results - if self.io: - fold_results_dir = self.fold_dirs.get('results') - if fold_results_dir: - self.io.save_json({'optimized_edge_threshold': self.fold_edge_threshold}, - f'optimized_edge_threshold_fold_{self.current_fold}', - base_dir=fold_results_dir, use_txt=True) - else: - logger.warning(f"Fold {self.current_fold}: Could not save optimized edge threshold, results dir missing.") - elif optimize_edge and is_ternary: - logger.warning(f"Fold {self.current_fold}: Edge threshold optimization requested but not supported for ternary. Using config value: {edge_thr_config:.4f}") - else: - logger.info(f"Fold {self.current_fold}: Using fixed edge threshold from config: {edge_thr_config:.4f}") - # --- End Determine Edge Threshold --- # - - # --- Edge-Filtered Accuracy Check --- # - edge_accuracy = np.nan - n_filtered = 0 - ci_lower = np.nan - passed_edge_acc = False # Default to False - - try: # Wrap calculation in try-except - if is_ternary: - # Use P(up) equivalent for binary check compatibility - p_up_equiv = p_cal_val[:, 2] - y_true_binary_equiv = (np.argmax(y_dir_val, axis=1) == 2).astype(int) - logger.info(f"Fold {self.current_fold}: Performing edge-filtered accuracy check on ternary model using P(up) equivalent.") - edge_accuracy, n_filtered = edge_filtered_accuracy( - y_true=y_true_binary_equiv, - p_cal=p_up_equiv, - thr=self.fold_edge_threshold # Use the determined threshold - ) - else: - # Binary case - edge_accuracy, n_filtered = edge_filtered_accuracy( - y_true=y_dir_val, - p_cal=p_cal_val, - thr=self.fold_edge_threshold # Use the determined threshold - ) - - if not np.isnan(edge_accuracy): - if n_filtered < min_edge_samples: - logger.warning(f"Fold {self.current_fold}: Edge Acc Check: Insufficient samples ({n_filtered} < {min_edge_samples}) meeting edge >= {self.fold_edge_threshold:.2f} for reliable CI. Check considered FAIL.") - passed_edge_acc = False # Fail if not enough samples - else: - try: - k_correct = int(round(edge_accuracy * n_filtered)) - ci_lower = st.binomtest(k_correct, n_filtered, p=0.5, alternative='greater').proportion_ci(confidence_level=0.95).low - passed_edge_acc = ci_lower >= edge_check_thr - logger.info(f"Fold {self.current_fold}: Edge Acc Check (edge >= {self.fold_edge_threshold:.2f}): Acc={edge_accuracy:.3f} ({k_correct}/{n_filtered}), 95% CI Lower={ci_lower:.3f} >= {edge_check_thr} -> {'Pass' if passed_edge_acc else 'FAIL'}") - except ValueError as binom_err: - logger.error(f"Fold {self.current_fold}: Edge Acc Check: Error calculating binomial test (k={k_correct}, n={n_filtered}): {binom_err}. Check considered FAIL.") - passed_edge_acc = False # Consider error as failure - else: - logger.error(f"Fold {self.current_fold}: Edge Acc Check: Calculation failed (NaN). Check considered FAIL.") - if n_filtered == 0: - logger.error(f" Reason: No validation samples met the edge threshold >= {self.fold_edge_threshold:.2f}") - passed_edge_acc = False # Consider NaN or 0 samples as failure - except Exception as e: - logger.error(f"Fold {self.current_fold}: Edge Acc Check: Unexpected error during calculation: {e}. Check considered FAIL.", exc_info=True) - passed_edge_acc = False # Consider error as failure - # --- End Edge Accuracy Check --- # - - # --- Brier Score Check (Revision 5) --- # - brier_score = np.nan - passed_brier = True # Default to Pass (will be set False if check runs and fails) - if is_ternary: - logger.warning(f"Fold {self.current_fold}: Brier score check currently only implemented for binary classification. Skipping for ternary.") - # Keep passed_brier = True for ternary to avoid blocking pipeline - else: - # Binary case - passed_brier = False # Reset to False for binary case - try: - brier_score = calculate_brier_score(y_true=y_dir_val, p_cal=p_cal_val) - if not np.isnan(brier_score): - passed_brier = brier_score <= brier_check_thr - logger.info(f"Fold {self.current_fold}: Brier Score Check: Score={brier_score:.4f} <= {brier_check_thr} -> {'Pass' if passed_brier else 'FAIL'}") - else: - logger.error(f"Fold {self.current_fold}: Brier Score Check: Calculation failed (NaN). Check considered FAIL.") - # passed_brier remains False - except Exception as e: - logger.error(f"Fold {self.current_fold}: Brier Score Check: Error calculating Brier score: {e}. Check considered FAIL.", exc_info=True) - # passed_brier remains False - # --- End Brier Score Check --- # - - # --- Final Decision --- # - if not passed_edge_acc or not passed_brier: - error_msg = f"FOLD {self.current_fold} GRU VALIDATION FAILED: Edge Acc Pass={passed_edge_acc} (Req CI>={edge_check_thr}), Brier Pass={passed_brier} (Req Score<={brier_check_thr}). Aborting fold." - logger.error(error_msg) - # Use sys.exit with a specific message for clarity - sys.exit(f"Fold {self.current_fold}: GRU validation gates failed (Edge Acc / Brier Score).") - else: # Corrected indentation - logger.info(f"Fold {self.current_fold}: GRU validation checks passed (Edge Acc & Brier Score).") # Corrected indentation - # --- End Validation Helper --- # - - def train_or_load_sac(self): - """Trains a new SAC agent offline or loads a pre-trained one for backtesting.""" - logging.info("--- Stage: Training or Loading SAC Agent ---") - train_sac_flag = self.config['control'].get('train_sac', False) - - if train_sac_flag: - if self.gru_model_run_id_loaded_from is None: - logging.error("Cannot run SAC training: GRU model run ID is not set (no model trained or loaded). Aborting.") - sys.exit(1) - - logging.info(f"SAC training is enabled. Instantiating SACTrainer...") - - # --- Determine Edge Threshold for SAC --- # - # Use the threshold determined during calibration (optimized or default) - edge_threshold_for_sac = self.optimized_edge_threshold if self.optimized_edge_threshold is not None else \ - self.config.get('calibration', {}).get('edge_threshold', 0.1) - logger.info(f"Using edge threshold {edge_threshold_for_sac:.4f} for SAC Trainer (heuristic seeding / env info)...") - - # --- Prepare Config for SAC Trainer --- # - # Create a copy of the config to potentially pass modified values - # Note: SACTrainer should ideally accept parameters like edge_threshold directly - # For now, we modify the dict copy passed to its constructor. - sac_trainer_config = self.config.copy() - # Ensure the calibration section reflects the threshold to be used - if 'calibration' not in sac_trainer_config: sac_trainer_config['calibration'] = {} - sac_trainer_config['calibration']['edge_threshold'] = edge_threshold_for_sac - # Also disable rolling calibration in the copy if it was enabled in the main config - if sac_trainer_config.get('calibration', {}).get('rolling_enabled', False): - logger.warning("SAC training enabled AND rolling calibration enabled. Disabling rolling calibration for the SAC training environment to prevent data leakage.") - sac_trainer_config['calibration']['rolling_enabled'] = False - # --- End Prepare Config --- # - - # Instantiate SACTrainer, passing necessary base directories from the main pipeline - # Ensure logs/results dirs exist - base_logs = self.dirs.get('logs') - if not base_logs: - base_logs = os.path.join(project_root, 'logs') - os.makedirs(base_logs, exist_ok=True) - logging.warning(f"Using default base logs dir for SACTrainer: {base_logs}") - - base_results = self.dirs.get('results') - if not base_results: - base_results = os.path.join(project_root, 'results') - os.makedirs(base_results, exist_ok=True) - logging.warning(f"Using default base results dir for SACTrainer: {base_results}") - - self.sac_trainer = SACTrainer( - config=sac_trainer_config, # Pass the potentially modified config - base_models_dir=self.base_models_dir_path, - base_logs_dir=base_logs, - base_results_dir=base_results - ) - - # --- Remove the old Revision 1 block that modified self.sac_trainer.config --- # - # # --- Revision 1: Handle Rolling Calibrator Conflict --- # - # ... (block removed) ... - # # --- End Revision 1 --- # - - # Start the training process - final_agent_path = self.sac_trainer.train(gru_run_id_for_sac=self.gru_model_run_id_loaded_from) - - if final_agent_path: - logger.info(f"SAC training completed. Final agent saved at: {final_agent_path}") - # Set the agent path to the newly trained agent for subsequent backtesting - self.sac_agent_load_path = final_agent_path - - # --- V3 Output Contract: Plot SAC Reward Curve --- # - if self.io and self.config.get('control', {}).get('generate_plots', True): - # Path to the rewards CSV logged by SACTrainer - # sac_trainer instance should have the sac_run_id and logs_dir path - sac_log_dir = self.sac_trainer.sac_run_logs_dir - rewards_csv_path = os.path.join(sac_log_dir, 'episode_rewards.csv') - - if os.path.exists(rewards_csv_path): - logging.info(f"Plotting SAC reward curve from {rewards_csv_path}...") - try: - rewards_df = pd.read_csv(rewards_csv_path) - - if not rewards_df.empty and 'episode_reward' in rewards_df.columns and 'total_step' in rewards_df.columns: - # Calculate EMA of reward - rewards_df['reward_ema'] = rewards_df['episode_reward'].ewm(alpha=0.2, adjust=False).mean() - - # Get figure settings - fig_dpi = self.config.get('output', {}).get('figure_dpi', 150) - fig_size = self.config.get('output', {}).get('figure_size', [16, 9]) - footer_text = "© GRU-SAC v3" - - plt.style.use('seaborn-v0_8-darkgrid') - fig, ax1 = plt.subplots(figsize=fig_size) - - color1 = 'tab:blue' - ax1.set_xlabel('Training Steps') - ax1.set_ylabel('Smoothed Episode Reward (EMA 0.2)', color=color1) - ax1.plot(rewards_df['total_step'], rewards_df['reward_ema'], color=color1, label='Reward EMA (0.2)') - ax1.tick_params(axis='y', labelcolor=color1) - ax1.grid(True, linestyle='--', alpha=0.6) - - # --- Placeholder for Action Variance / Checkpoints (Not currently logged) --- - # logging.warning("Action variance and checkpoint steps not currently logged in episode_rewards.csv. Omitting from plot.") - # ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis - # color2 = 'tab:red' - # ax2.set_ylabel('Action Variance', color=color2) # we already handled the x-label with ax1 - # ax2.plot(steps, action_variance_data, color=color2, linestyle=':', label='Action Variance') - # ax2.tick_params(axis='y', labelcolor=color2) - # Add checkpoint vertical lines: ax1.axvline(x=chkpt_step, color='grey', linestyle='--', linewidth=0.5) - # --- End Placeholder --- - - fig.suptitle('SAC Training Reward Curve', fontsize=16) - # Add footer - plt.figtext(0.99, 0.01, footer_text, horizontalalignment='right', - verticalalignment='bottom', fontsize=8, color='gray') - - plt.tight_layout(rect=[0, 0.03, 1, 0.95]) - - # Save figure using IOManager (save to the main pipeline's results dir) - self.io.save_figure(fig, "sac_reward_plot", section='results') - logging.info("SAC reward curve plot saved.") - plt.close(fig) - else: - logging.warning("Episode rewards CSV is empty or missing required columns ('episode_reward', 'total_step'). Skipping plot.") - except FileNotFoundError: - logging.warning(f"SAC rewards file not found at {rewards_csv_path}. Cannot plot reward curve.") - except Exception as e: - logging.error(f"Failed to plot SAC reward curve: {e}", exc_info=True) - else: - logging.warning(f"SAC rewards file not found at {rewards_csv_path}. Cannot plot reward curve.") - elif not self.io: - logging.warning("IOManager not available, skipping SAC reward curve plot.") - # --- End Plot SAC Reward Curve --- # - - else: - logger.error("SAC training failed. Proceeding without a newly trained agent.") - # Decide whether to fallback to loading or abort? Fallback for now. - self.sac_agent_load_path = self._determine_sac_load_path_from_config() - if self.sac_agent_load_path: - logger.warning(f"Falling back to loading SAC agent specified in config: {self.sac_agent_load_path}") - else: - logger.error("SAC training failed and no load path specified in config. Cannot proceed with backtesting.") - # Optionally exit: sys.exit(1) - # For now, allow pipeline to continue, backtester should handle None path - - else: # Load SAC agent based on config for backtesting - logging.info("SAC training is disabled (train_sac=False). Determining agent path to load for backtesting...") - self.sac_agent_load_path = self._determine_sac_load_path_from_config() - if self.sac_agent_load_path: - logger.info(f"SAC agent path for backtesting set to load from: {self.sac_agent_load_path}") - else: - logger.warning("No 'sac_load_run_id' specified in config. Backtester will need to handle using untrained/initial weights.") - - def _determine_sac_load_path_from_config(self) -> str | None: - """Helper to determine the SAC agent load path based on config control flags.""" - load_run_id = self.config['control'].get('sac_load_run_id') - load_step = self.config['control'].get('sac_load_step', 'final') - sac_agent_path = None - if load_run_id: - # Construct path assuming structure like: //agent_.pt - # The sac_train_run_id usually differs from the pipeline run_id - models_base = self.base_models_dir_path # Use the stored base models path - # Assume the SAC trainer saves checkpoints inside its own run folder (e.g., models/sac_train_.../sac_agent_final) - if load_step == 'final': - # SAC trainer saves final model in a folder named 'sac_agent_final' - sac_agent_path = os.path.join(models_base, load_run_id, 'sac_agent_final') - else: - # SAC trainer saves step checkpoints in folder 'sac_agent_step_N' - sac_agent_path = os.path.join(models_base, load_run_id, f'sac_agent_step_{load_step}') - - # Check if the determined path exists - if not os.path.exists(sac_agent_path): - logger.warning(f"Determined SAC load path does not exist: {sac_agent_path}. Will proceed without loading specified agent.") - sac_agent_path = None # Reset path if not found - - return sac_agent_path - - def run_backtest(self): - """Runs the backtest and checks performance criteria for the current fold.""" - logger.info(f"--- Fold {self.current_fold}: Stage: Running Backtest ---") - - # --- Extract original prices for backtest --- # - if self.df_test_original is None or self.df_test_original.empty: - logger.error(f"Fold {self.current_fold}: Original test data (df_test_original) is missing. Cannot run backtest.") - sys.exit(f"Fold {self.current_fold}: Missing original test data for backtest.") - - # Ensure required columns exist - required_cols = ['open', 'high', 'low', 'close', 'volume'] - if not all(col in self.df_test_original.columns for col in required_cols): - missing_cols = [col for col in required_cols if col not in self.df_test_original.columns] - logger.error(f"Fold {self.current_fold}: Original test data missing required price columns: {missing_cols}. Cannot run backtest.") - sys.exit(f"Fold {self.current_fold}: Missing required price columns in original test data.") - - original_prices = self.df_test_original[required_cols] - # --- End Price Extraction --- # - - # Run the backtest using the Backtester instance - # Note: Need to handle potential absence of test sequences if fold is too short - if self.X_test_seq is None or self.y_test_seq_dict is None or self.test_indices is None: - logger.warning(f"Fold {self.current_fold}: Test sequences not available (likely due to fold length/lookback). Skipping backtest stage for this fold.") - # Set results to None to indicate skip - self.backtest_results_df = None - self.backtest_metrics = None - self.metrics_log_df = None - return # Skip rest of the backtest stage - - # Pass the appropriate calibrator instance - calibrator_instance = None - vector_calibrator_instance = None - calibration_method = self.config.get('calibration',{}).get('method') - if calibration_method == 'temperature': - calibrator_instance = self.calibrator - if not hasattr(self, 'optimal_T'): # Ensure optimal_T was set - logger.error(f"Fold {self.current_fold}: Temperature calibration selected but optimal_T not found.") - raise SystemExit(f"Fold {self.current_fold}: Missing optimal_T for backtest.") - elif calibration_method == 'vector': - vector_calibrator_instance = self.vector_calibrator - if not hasattr(self, 'vector_cal_params'): # Ensure params were set - logger.error(f"Fold {self.current_fold}: Vector calibration selected but vector_cal_params not found.") - raise SystemExit(f"Fold {self.current_fold}: Missing vector_cal_params for backtest.") - - # Get raw predictions needed for rolling calibration - p_raw_test_for_bt = None - logits_test_for_bt = None - is_ternary = self.config.get('gru', {}).get('use_ternary_output', False) # Need to know if ternary - if self.config.get('calibration', {}).get('rolling_enabled', False): - logger.info(f"Fold {self.current_fold}: Getting raw GRU outputs for rolling calibration...") - if is_ternary: - logits_test_for_bt = self.gru_handler.predict_logits(self.X_test_seq) - if logits_test_for_bt is None: - logger.error(f"Fold {self.current_fold}: Failed to get GRU logits for rolling calibration.") - raise SystemExit(f"Fold {self.current_fold}: Failed GRU logit prediction.") - else: # Corrected indentation - preds_test_raw = self.gru_handler.predict(self.X_test_seq) - if preds_test_raw is None or len(preds_test_raw) < 3: - logger.error(f"Fold {self.current_fold}: Failed to get GRU raw predictions for rolling calibration.") - raise SystemExit(f"Fold {self.current_fold}: Failed GRU raw prediction.") - p_raw_test_for_bt = preds_test_raw[2].flatten() # Assuming index 2 is probabilities - - # Get the edge threshold determined during validation (optimized or fixed) - edge_threshold_for_bt = getattr(self, 'fold_edge_threshold', self.config.get('calibration', {}).get('edge_threshold', 0.1)) - logger.info(f"Fold {self.current_fold}: Using edge threshold {edge_threshold_for_bt:.4f} for backtest execution.") - - try: # Corrected indentation - self.backtest_results_df, self.backtest_metrics, self.metrics_log_df = self.backtester.run_backtest( - sac_agent_load_path=self.sac_agent_load_path, - X_test_seq=self.X_test_seq, - y_test_seq_dict=self.y_test_seq_dict, - test_indices=self.test_indices, - gru_handler=self.gru_handler, - # --- Pass Calibrator instances and initial state --- # - calibrator=calibrator_instance, - vector_calibrator=vector_calibrator_instance, - initial_optimal_T=getattr(self, 'optimal_T', None), # Pass T if exists - initial_vector_params=getattr(self, 'vector_cal_params', None), # Pass params if exists - fold_edge_threshold=edge_threshold_for_bt, - # --- Pass raw predictions if needed for rolling cal --- # - p_raw_test=p_raw_test_for_bt, - logits_test=logits_test_for_bt, - # --- Pass original prices --- # - original_prices=self.df_test_original, # Pass the DataFrame - is_ternary=self.use_ternary, - fold_num=self.current_fold - ) - except SystemExit as e: # Corrected indentation - # Catch exits from backtester validation/execution - logger.error(f"Fold {self.current_fold}: Backtest aborted: {e}") - raise # Re-raise to stop the fold - except Exception as e: # Corrected indentation - logger.error(f"Fold {self.current_fold}: Unhandled error during backtester.run_backtest: {e}", exc_info=True) - # Treat as failure, ensure metrics are None - self.backtest_results_df = None - self.backtest_metrics = None - self.metrics_log_df = None - # Re-raise or exit? Let the main pipeline catch it. - raise SystemExit(f"Fold {self.current_fold}: Backtest execution failed unexpectedly.") from e - - if self.backtest_results_df is None or self.backtest_metrics is None: - # This case should ideally be caught by exceptions above now - logger.error(f"Fold {self.current_fold}: Backtesting failed to produce results (post-execution check).") - raise SystemExit(f"Fold {self.current_fold}: Backtest failed to produce results.") - else: - logger.info(f"Fold {self.current_fold}: Backtest completed successfully.") - - # --- Backtest Success Criteria Check (Now redundant as checks are inside run_backtest) --- # - # logger.info(f"Fold {self.current_fold}: Checking backtest performance against success criteria...") - # ... (Remove the check block here) ... - # --- End Backtest Check --- # - - def save_results(self): - """Saves backtest results, metrics, and plots using the Backtester instance for the current fold.""" - logger.info(f"--- Fold {self.current_fold}: Stage: Saving Results --- ") - if self.backtest_results_df is None or self.backtest_metrics is None: - logger.warning(f"Fold {self.current_fold}: No backtest results available to save. Skipping.") - return - - # Use IOManager to get fold-specific results directory if possible - results_dir = self.fold_dirs.get('results') - if not results_dir: - logger.warning(f"Fold {self.current_fold}: Results dir for fold not found. Saving results to main run dir.") - results_dir = self.dirs.get('results') # Fallback to main run results - - if not results_dir: - logger.error(f"Fold {self.current_fold}: Could not determine valid results directory. Cannot save backtest results.") - return - - # Pass results to the backtester's save method - self.backtester.save_results( - results_df=self.backtest_results_df, - metrics=self.backtest_metrics, - results_dir=results_dir, # Pass the determined directory - run_id=self.run_id, # Pass overall run_id for context in plots/reports - metrics_log_df=self.metrics_log_df, - fold_num=self.current_fold # Pass fold number for unique filenames - ) - - def evaluate_feature_ab_test(self, feature_name, feature_values): - """ - Performs A/B test for a new candidate feature. - - Args: - feature_name (str): Name of the candidate feature - feature_values (pd.Series or np.array): Values of the feature to test - - Returns: - tuple: (passed_gate, improvement, p_value) - whether feature improved accuracy by ≥1% with p<0.05 - """ - logging.info(f"--- A/B Testing Feature: {feature_name} ---") - - if self.X_train_scaled is None or self.y_train is None: - logging.error("Scaled features or targets not available for A/B test. Skipping.") - return False, 0, 1.0 - - horizon = self.config['gru'].get('prediction_horizon', 5) - target_dir_col = f'direction_label_{horizon}' - - if target_dir_col not in self.y_train.columns: - logging.error(f"Target direction column '{target_dir_col}' not found in y_train. Skipping A/B test.") - return False, 0, 1.0 - - y_train_dir = self.y_train[target_dir_col] - - try: - # Split train into teaching and validation sets - X_teach, X_val_subset, y_teach, y_val_subset = train_test_split( - self.X_train_scaled, y_train_dir, test_size=0.2, shuffle=False - ) - - # Baseline model (A) - without the new feature - model_a = LogisticRegression(max_iter=1000, solver="lbfgs", random_state=42) - model_a.fit(X_teach, y_teach) - y_pred_a = model_a.predict(X_val_subset) - accuracy_a = (y_pred_a == y_val_subset).mean() - - # Add the new feature to X_teach and X_val_subset - if len(feature_values) != len(self.X_train_scaled): - logging.error(f"Feature length mismatch: feature has {len(feature_values)} values, but X_train has {len(self.X_train_scaled)} rows") - return False, 0, 1.0 - - # Create copies of data with the new feature added - X_teach_b = X_teach.copy() - X_val_subset_b = X_val_subset.copy() - - # Determine which indices to use from the feature_values - teach_indices = X_teach.index - val_indices = X_val_subset.index - - # Add feature to both datasets - if isinstance(feature_values, pd.Series): - # If it's a Series, align by index - X_teach_b[feature_name] = feature_values.loc[teach_indices] - X_val_subset_b[feature_name] = feature_values.loc[val_indices] - else: - # If it's a numpy array, we need the original indices in the full dataset - # This assumes X_teach and X_val_subset came from contiguous parts of X_train - X_teach_b[feature_name] = feature_values[:len(X_teach)] - X_val_subset_b[feature_name] = feature_values[len(X_teach):len(X_teach)+len(X_val_subset)] - - # Model with new feature (B) - model_b = LogisticRegression(max_iter=1000, solver="lbfgs", random_state=42) - model_b.fit(X_teach_b, y_teach) - y_pred_b = model_b.predict(X_val_subset_b) - accuracy_b = (y_pred_b == y_val_subset).mean() - - # Calculate improvement - improvement = accuracy_b - accuracy_a - - # Calculate statistical significance with two-proportion z-test - n = len(y_val_subset) - count_correct_a = int(accuracy_a * n) - count_correct_b = int(accuracy_b * n) - - # Use proportion_test from statsmodels for the z-test - from statsmodels.stats.proportion import proportions_ztest - - # Format data for the test - count = np.array([count_correct_a, count_correct_b]) - nobs = np.array([n, n]) - - # Perform the test (alternative='larger' tests if B > A) - z_stat, p_value = proportions_ztest(count, nobs, alternative='larger') - - # Determine if the feature passes the gate: B-A ≥ 0.01 and p < 0.05 - passes_gate = improvement >= 0.01 and p_value < 0.05 - - logging.info(f"A/B Test Results for '{feature_name}':") - logging.info(f" Baseline accuracy (A): {accuracy_a:.3f}") - logging.info(f" With new feature (B): {accuracy_b:.3f}") - logging.info(f" Improvement (B-A): {improvement:.3f}") - logging.info(f" p-value: {p_value:.5f}") - logging.info(f" Passes gate (B-A ≥ 0.01 and p < 0.05): {passes_gate}") - - return passes_gate, improvement, p_value - - except Exception as e: - logging.error(f"Failed to perform A/B test for feature '{feature_name}': {e}", exc_info=True) - return False, 0, 1.0 - - # --- Wrapper Methods for Notebook Step-by-Step Execution --- - - def load_data(self): - """Wrapper for load_and_preprocess_data for notebook execution.""" - logging.info("--- Notebook Step: Load Data (Calling load_and_preprocess_data) ---") - self.load_and_preprocess_data() - # Store the primary result on self for notebook inspection - self.raw_data = self.df_raw - logging.info(f"Stored raw_data attribute. Shape: {self.raw_data.shape if self.raw_data is not None else 'None'}") - - def prepare_sequences(self): - """Wrapper for the sequence preparation steps for notebook execution.""" - logging.info("--- Notebook Step: Prepare Sequences (Calling internal steps) ---") - # Call the internal steps in the correct order - self.define_labels_and_align() - self.split_data() - self.select_and_prune_features() - self.scale_features() - # self.run_baseline_checks() # Optionally include if desired in this step - self.create_sequences() - logging.info("Finished sequence preparation steps.") - # Store key results on self for notebook inspection (add more as needed) - self.train_sequences = self.X_train_seq - self.val_sequences = self.X_val_seq - self.test_sequences = self.X_test_seq - self.train_targets = self.y_train_seq_dict # Assuming create_sequences stores dict here - self.val_targets = self.y_val_seq_dict - self.test_targets = self.y_test_seq_dict - - def calibrate_predictions(self): - """Wrapper for calibrate_probabilities for notebook execution.""" - logging.info("--- Notebook Step: Calibrate Predictions (Calling calibrate_probabilities) ---") - self.calibrate_probabilities() - # Store results on self for notebook inspection - self.optimal_threshold = self.optimal_T # Keep existing name for compatibility? Or use optimal_T? - self.optimal_calibration_params = self.vector_cal_params if self.use_ternary else self.optimal_T # Unified name - logging.info(f"Stored optimal_calibration_params: {self.optimal_calibration_params}") - - # --- Main Execution Method --- - - def execute(self): - """Runs the full trading pipeline end-to-end.""" - logger.info(f"--- Starting Trading Pipeline: Run ID {self.run_id} ---") - - # 1. Load and Preprocess Data - self.load_and_preprocess_data() - if self.data_processed is None: # Check if data loading failed - logger.error("Data loading failed. Exiting pipeline.") - return - - # 2. Engineer Features - self.engineer_features() - - # 3. Define Labels and Align - self.define_labels_and_align() - if self.data_processed is None: # Check if label generation failed - logger.error("Label generation failed. Exiting pipeline.") - return - - # 4. Split Data - self.split_data() - - # 5. Scale Features - self.scale_features() - - # --- MODIFIED ORDER --- - # 6. Baseline Checks (Now before pruning and sequencing) - self.run_baseline_checks() # Exits if baseline fails - logger.info("Baseline checks passed.") - - # 7. Select/Prune Features (Now before sequencing) - self.select_and_prune_features() - - # 8. Create Sequences (Now after scaling, baseline, pruning) - self.create_sequences() - # --- END MODIFIED ORDER --- - - # 9. Train/Load GRU Model - self.train_or_load_gru() - - # 10. Calibrate Probabilities - self.calibrate_probabilities() - if self.gru_model_handler is None or self.gru_model_handler.model is None: - logger.warning("GRU model not available, skipping edge accuracy check.") - elif not hasattr(self, 'p_cal_val'): - logger.warning("Calibrated validation probabilities not found, skipping edge accuracy check.") - else: - # Perform edge accuracy check only if calibration happened and model exists - self._perform_gru_validation_checks( - p_cal_val=self.p_cal_val, - y_dir_val=self.y_dir_val, - is_ternary=self.use_ternary - ) - - # 11. Train/Load SAC Agent - self.train_or_load_sac() - - # 12. Run Backtest - self.run_backtest() - - # 13. Save Results & Final Validation - self.save_results() # Includes final Sharpe/DD checks, exits if failed - - logger.info(f"--- Trading Pipeline Finished: Run ID {self.run_id} ---") - - # --- Walk-Forward Fold Generation --- # - def _generate_walk_forward_folds(self) -> Iterator[Tuple[pd.Timestamp, pd.Timestamp, pd.Timestamp, pd.Timestamp, pd.Timestamp, pd.Timestamp]]: - """ - Generates start and end timestamps for train, validation, and test sets - for each walk-forward fold based on config settings. - Requires self.df_raw to be loaded first to determine the full date range. - """ - wf_config = self.config.get('walk_forward', {}) - if not wf_config.get('enabled', False): - logger.info("Walk-forward validation disabled. Performing single split.") - # Yield a single pseudo-fold covering the entire dataset range - # The split_data method will handle the ratio-based split for this single fold - yield (self.df_raw.index.min(), self.df_raw.index.max(), - None, None, # Placeholder val dates - None, None) # Placeholder test dates - return - - train_days = wf_config.get('train_days', 60) - val_days = wf_config.get('val_days', 14) - test_days = wf_config.get('test_days', 14) - step_days = wf_config.get('step_days', 14) - offset_days = wf_config.get('initial_offset_days', 0) - - if not isinstance(self.df_raw.index, pd.DatetimeIndex): - raise ValueError("Raw data index must be DatetimeIndex for walk-forward validation.") - - full_start_date = self.df_raw.index.min() + timedelta(days=offset_days) - full_end_date = self.df_raw.index.max() - current_start = full_start_date - - logger.info(f"Generating Walk-Forward Folds: Train={train_days}d, Val={val_days}d, Test={test_days}d, Step={step_days}d") - logger.info(f"Full Data Range for Folds: {full_start_date} to {full_end_date}") - - fold_num = 0 - while True: - train_start = current_start - train_end = train_start + timedelta(days=train_days) - val_start = train_end - val_end = val_start + timedelta(days=val_days) - test_start = val_end - test_end = test_start + timedelta(days=test_days) - - # Check if the test period goes beyond the available data - if test_end > full_end_date: - logger.info(f"Stopping fold generation. Next test period ({test_start} to {test_end}) exceeds available data end date ({full_end_date}).") - break - - # Ensure we have at least some data in each period (basic check) - if train_end <= train_start or val_end <= val_start or test_end <= test_start: - logger.warning(f"Fold {fold_num}: Invalid date ranges calculated. Skipping.") - # Advance start date and retry - current_start += timedelta(days=step_days) - continue - - logger.info(f" Fold {fold_num}: Train=[{train_start}, {train_end}), Val=[{val_start}, {val_end}), Test=[{test_start}, {test_end})") - yield (train_start, train_end, val_start, val_end, test_start, test_end) - - # Advance the start for the next fold - current_start += timedelta(days=step_days) - fold_num += 1 - - if fold_num == 0: - logger.error("No valid walk-forward folds could be generated. Check data range and WF config.") - - # --- Core Pipeline Steps (Modified for Potential Fold Context) --- # - # Methods like load_and_preprocess_data, engineer_features, etc., - # might need adjustments if they rely heavily on `self` state - # that changes per fold. For now, assume they operate on data passed - # or reset their relevant `self` attributes appropriately. - - # Example modification for split_data: - def split_data(self, df_fold_data: pd.DataFrame, fold_dates: Tuple = None): - """Splits features and targets for a given fold based on dates or ratios.""" - logging.info("--- Stage: Splitting Data for Fold --- ") - # ... (Keep internal logic, but operate on df_fold_data) ... - # ... (Use fold_dates if provided for WF, else use ratios for single split) ... - if fold_dates and fold_dates[2] is not None: # Walk-forward fold - train_start, train_end, val_start, val_end, test_start, test_end = fold_dates - # Select data based on dates - self.X_train_raw = df_fold_data[train_start:train_end].drop(columns=self.target_columns) - self.y_train = df_fold_data.loc[self.X_train_raw.index, self.target_columns] - self.y_dir_train = df_fold_data.loc[self.X_train_raw.index, self.target_dir_col] - # ... similar slicing for val and test ... - self.X_val_raw = df_fold_data[val_start:val_end].drop(columns=self.target_columns) - self.y_val = df_fold_data.loc[self.X_val_raw.index, self.target_columns] - self.X_test_raw = df_fold_data[test_start:test_end].drop(columns=self.target_columns) - self.y_test = df_fold_data.loc[self.X_test_raw.index, self.target_columns] - # Store original data slices too - self.df_train_original = df_fold_data[train_start:train_end] - self.df_val_original = df_fold_data[val_start:val_end] - self.df_test_original = df_fold_data[test_start:test_end] - else: # Single split (WF disabled or first pseudo-fold) - split_cfg = self.config['split_ratios'] # Fallback to ratios - # ... (existing ratio-based split logic using df_fold_data) ... - total_len = len(df_fold_data) - train_end_idx = int(total_len * split_cfg['train']) - val_end_idx = int(total_len * (train_ratio + val_ratio)) - # ... etc ... - # ... (Log split shapes) ... + # --- End Updated call --- # + + # Update self state with the results from the stage function + self.X_train_raw = X_train_raw + self.y_train = y_train + self.X_val_raw = X_val_raw + self.y_val = y_val + self.X_test_raw = X_test_raw + self.y_test = y_test + self.df_train_original = df_train_original + self.df_val_original = df_val_original + self.df_test_original = df_test_original + self.y_dir_train_ordinal = y_dir_train_ordinal + # --- Store split returns and eps --- # + self.fwd_ret_train = fwd_ret_train + self.eps_train = eps_train + self.fwd_ret_val = fwd_ret_val + self.eps_val = eps_val + self.y_dir_val_ordinal = y_dir_val_ordinal # <<< ADDED + # --- End Store --- # + + logger.info(f"Fold {self.current_fold}: Data splitting stage complete.") # --- Baseline moved earlier (operates on raw/engineered fold train data) --- - def run_baseline_checks(self, X_train_fold_raw: pd.DataFrame, y_train_fold_dir: pd.Series): - """Runs baseline Logistic Regression check on fold's raw/engineered training data. - Called *before* scaling and pruning. - """ - logger.info(f"--- Fold {self.current_fold}: Stage: Baseline Checks (Logistic Regression on Raw/Engineered Features) --- ") - - # Skip if ternary - if self.use_ternary: - logger.warning(f"Fold {self.current_fold}: Using ternary labels. Skipping binary Logistic Regression baseline check.") - return - - # Check inputs - if X_train_fold_raw is None or y_train_fold_dir is None: - logger.error(f"Fold {self.current_fold}: Raw training data not available for baseline check. Skipping.") - return - - baseline_gate_cfg = self.config.get('validation_gates', {}).get('baseline', {}) - required_ci_lb = baseline_gate_cfg.get('ci_lower_bound_threshold', 0.52) # Default if not in config - logger.info(f"Fold {self.current_fold}: Baseline check required CI lower bound >= {required_ci_lb}") + def run_baseline_checks(self, fold_num: int): + """Placeholder method to call the baseline check stage function.""" + logger.info(f"--- Fold {fold_num}: Invoking Baseline Check Stage --- ") - # --- Use BaselineChecker --- # + if self.baseline_checker is None: + logger.error(f"Fold {fold_num}: BaselineChecker not initialized. Cannot run baseline checks.") + # Optionally, initialize it here if appropriate, or ensure it's done in __init__ or setup + # For now, we'll rely on it being initialized earlier. + # raise SystemExit(f"Fold {fold_num}: BaselineChecker missing.") + # Re-initializing here if logic allows: + if self.config.get('validation_gates', {}).get('run_baseline_check', False): + logger.info(f"Fold {fold_num}: Initializing BaselineChecker for baseline checks.") + self.baseline_checker = BaselineChecker(self.config) + else: + logger.warning(f"Fold {fold_num}: Baseline checks are disabled in config, but BaselineChecker was not initialized.") + # Decide if this is an error or just a skip condition + return # Skip if checks disabled and checker not ready + + # Retrieve necessary data from state + X_train_raw = self.X_train_raw + y_train_dir = self.y_dir_train_ordinal # Get the specific direction labels from split_data + use_ternary = self.use_ternary # Already stored on self + fold_dirs = self.fold_dirs # Already stored on self + + # Check if baseline check should run (based on config and ternary status) + run_check = self.config.get('validation_gates', {}).get('run_baseline_check', False) + if not run_check: + logger.info(f"Fold {fold_num}: Skipping baseline checks as per configuration.") + return + if use_ternary: + logger.warning(f"Fold {fold_num}: Skipping baseline checks as ternary targets are enabled.") + return + + if X_train_raw is None or y_train_dir is None: + logger.error(f"Fold {fold_num}: Cannot run baseline checks. Missing raw training features or direction labels in state.") + # Decide on error handling: maybe SystemExit or just log and continue depending on strictness + raise SystemExit(f"Fold {fold_num}: Critical data missing for baseline check.") + + # Call the stage function from evaluation.py try: - # Run the baseline check using the checker - baseline_report = self.baseline_checker.run_logistic_baseline( - X_train_fold_raw, # Pass the fold's raw/engineered training features - y_train_fold_dir # Pass the fold's training direction labels + run_baseline_checks_fold( + X_train_scaled=self.X_train_scaled, + X_val_scaled=self.X_val_scaled, + y_train_dir_ordinal=self.y_dir_train_ordinal, + fwd_ret_train=self.fwd_ret_train, + eps_train=self.eps_train, + fwd_ret_val=self.fwd_ret_val, + eps_val=self.eps_val, + baseline_checker=self.baseline_checker, # Pass the initialized instance + config=self.config, + io=self.io, # Pass the IOManager instance + fold_num=fold_num, + fold_dirs=fold_dirs, # Pass fold-specific directories + base_results_dir=self.io.run_results_dir if self.io else '.' # Fallback save location ) - - # --- Save Baseline Report --- # - if self.io: - try: - # Ensure fold-specific directory exists if possible - fold_results_dir = self.fold_dirs.get('results') - if not fold_results_dir: - logger.warning(f"Fold {self.current_fold}: Results dir for fold not found. Saving baseline report to main run dir.") - fold_results_dir = self.dirs.get('results') # Fallback to main results - - if fold_results_dir: # Check again after fallback - self.io.save_json( - baseline_report, - f"baseline_report_fold_{self.current_fold}", - #section='results', # IOManager prepends run_id/results, need relative path within - base_dir=fold_results_dir, # Save directly to fold dir - use_txt=True # Save as .txt - ) - logger.info(f"Fold {self.current_fold}: Saved baseline report.") - else: - logger.warning(f"Fold {self.current_fold}: Could not determine valid results directory. Skipping baseline report save.") - except Exception as e: - logger.error(f"Fold {self.current_fold}: Failed to save baseline report: {e}") - else: - logger.warning(f"Fold {self.current_fold}: IOManager not available, skipping saving of baseline report.") - # --- End Save --- # - - # --- Success Criteria Check --- # - ci_lower_bound = baseline_report.get("ci_lower_bound") - - if ci_lower_bound is None or np.isnan(ci_lower_bound): - error_msg = f"FOLD {self.current_fold} BASELINE CHECK FAILED: Could not determine CI lower bound. Aborting fold." - logger.error(error_msg) - sys.exit(f"Fold {self.current_fold}: Baseline CI lower bound calculation failed.") - elif ci_lower_bound < required_ci_lb: - error_msg = f"FOLD {self.current_fold} BASELINE CHECK FAILED: Logistic Regression 95% CI lower bound ({ci_lower_bound:.3f}) is below {required_ci_lb} threshold. Aborting fold." - logger.error(error_msg) - sys.exit(error_msg) - else: - success_msg = f"Fold {self.current_fold}: Baseline check passed! Logistic hit-rate 95%-CI lower bound: {ci_lower_bound:.3f} (>= {required_ci_lb})" - logger.info(success_msg) - # --- End Success Criteria Check --- # - - except SystemExit: - raise # Reraise SystemExit to stop the fold + logger.info(f"Fold {fold_num}: Baseline check stage completed successfully.") + except SystemExit as e: + logger.error(f"Fold {fold_num} failed baseline checks. Raising SystemExit to halt pipeline. Reason: {e}") + raise # Re-raise SystemExit to stop the pipeline except Exception as e: - logger.error(f"Fold {self.current_fold}: An error occurred during baseline checks: {e}. Aborting fold.", exc_info=True) - # Treat other exceptions as fatal for the fold - sys.exit(f"Fold {self.current_fold}: Unhandled exception during baseline check.") + logger.error(f"Fold {fold_num}: An unexpected error occurred during the baseline check stage: {e}", exc_info=True) + # Depending on policy, either raise SystemExit or just log and potentially continue + raise SystemExit(f"Fold {fold_num}: Unhandled exception in baseline check stage.") from e # --- execute method refactored for Walk-Forward --- # def execute(self): @@ -2295,166 +529,254 @@ class TradingPipeline: # 2. Generate Walk-Forward Folds fold_generator = self._generate_walk_forward_folds() - all_fold_metrics = [] # Store metrics from each fold + self.all_fold_metrics = [] # Reset fold metrics list for the run all_successful_sac_agent_paths = [] # Store paths of successfully trained SAC agents per fold fold_count = 0 # 3. Loop Through Folds for fold_dates in fold_generator: fold_count += 1 - logger.info(f"=== Processing Fold {fold_count} ===") + self.current_fold = fold_count # Set current fold number + logger.info(f"=== Processing Fold {self.current_fold} ===") + + # --- Fix Start: Handle single split case --- + if fold_dates is None: + # Calculate dates for single split based on ratios + if not self.df_raw.empty and isinstance(self.df_raw.index, pd.DatetimeIndex): + full_start_date = self.df_raw.index.min() + full_end_date = self.df_raw.index.max() + full_duration = full_end_date - full_start_date + + split_ratios = self.config.get('walk_forward', {}).get('split_ratios', {'train': 0.7, 'validation': 0.15}) # Default ratios + train_ratio = split_ratios.get('train', 0.7) + val_ratio = split_ratios.get('validation', 0.15) + test_ratio = 1.0 - train_ratio - val_ratio + + if not (0 < train_ratio < 1 and 0 < val_ratio < 1 and 0 <= test_ratio < 1 and abs(train_ratio + val_ratio + test_ratio - 1.0) < 1e-9): + logger.error(f"Invalid split_ratios in config: {split_ratios}. Ratios must sum to 1. Exiting.") + raise SystemExit("Invalid split_ratios configuration.") + + train_end_offset = full_start_date + full_duration * train_ratio + val_end_offset = train_end_offset + full_duration * val_ratio + + # Find nearest index points + train_start = full_start_date + # Find the index *before or at* the calculated end time + train_end_idx = self.df_raw.index.get_indexer([train_end_offset], method='ffill')[0] + train_end = self.df_raw.index[train_end_idx] + + val_start_idx = train_end_idx + 1 + if val_start_idx >= len(self.df_raw.index): + logger.error("Not enough data points for validation split based on ratios. Exiting.") + raise SystemExit("Insufficient data for validation split.") + val_start = self.df_raw.index[val_start_idx] + + val_end_idx = self.df_raw.index.get_indexer([val_end_offset], method='ffill')[0] + # Ensure val_end is after val_start + val_end_idx = max(val_end_idx, val_start_idx) + if val_end_idx >= len(self.df_raw.index): # If val_end_offset calculation goes beyond data + val_end = full_end_date + else: + val_end = self.df_raw.index[val_end_idx] + + test_start_idx = val_end_idx + 1 + if test_ratio > 1e-9 and test_start_idx < len(self.df_raw.index): # Only if there's a test set and data left + test_start = self.df_raw.index[test_start_idx] + test_end = full_end_date + else: # No test set or no data left for it + test_start = None + test_end = None + # Adjust val_end to be the absolute end if there's no test set + if test_ratio <= 1e-9: + val_end = full_end_date + logger.info("No test set defined by split_ratios. Validation set extends to end of data.") + else: + logger.warning("Not enough data points remaining for test split. Test set will be empty.") + + logger.info(f"Single split calculated: Train=[{train_start}, {train_end}], Val=[{val_start}, {val_end}], Test=[{test_start}, {test_end}]") + # Assign calculated dates to the fold_dates tuple for unpacking + fold_dates = (train_start, train_end, val_start, val_end, test_start, test_end) + else: + logger.error("Cannot calculate single split: Raw data not loaded or has incorrect index type.") + raise SystemExit("Failed to calculate single data split.") + # --- Fix End --- + train_start, train_end, val_start, val_end, test_start, test_end = fold_dates - # Select data for the current fold (Train+Val+Test periods combined for initial processing) - # Handle single split case where val/test dates might be None + # Setup fold-specific directories using IOManager if available + if self.io: + # self.fold_dirs = self.io.setup_fold_dirs(self.current_fold) + self.fold_dirs = self.io.get_fold_dirs(self.current_fold) # Correct method name + else: + self.fold_dirs = {} # Set empty if no IOManager + logger.warning(f"Fold {self.current_fold}: IOManager not available, cannot create fold-specific directories.") + + # Select data for the current fold fold_start_date = train_start - fold_end_date = test_end if test_end is not None else train_end # Use train_end if single split - - # Ensure dates are timezone-aware if df_raw index is + fold_end_date = test_end if test_end is not None else train_end if self.df_raw.index.tz is not None: fold_start_date = fold_start_date.tz_localize(self.df_raw.index.tz) if fold_start_date.tz is None else fold_start_date fold_end_date = fold_end_date.tz_localize(self.df_raw.index.tz) if fold_end_date.tz is None else fold_end_date current_fold_data_raw = self.df_raw[fold_start_date:fold_end_date] if current_fold_data_raw.empty: - logger.warning(f"Fold {fold_count}: No raw data found for range {fold_start_date} to {fold_end_date}. Skipping fold.") + logger.warning(f"Fold {self.current_fold}: No raw data found for range {fold_start_date} to {fold_end_date}. Skipping fold.") continue - logger.info(f"Fold {fold_count}: Raw data range [{current_fold_data_raw.index.min()}, {current_fold_data_raw.index.max()}]") + logger.info(f"Fold {self.current_fold}: Raw data range [{current_fold_data_raw.index.min()}, {current_fold_data_raw.index.max()}]") # --- Run Pipeline Steps within the Fold --- # try: - # a. Engineer Features for the fold's raw data - # Assuming engineer_features operates on the passed df or resets internal state - # We need to manage the dataframe state carefully here. - # Let's pass the data explicitly for clarity. - df_engineered_fold = self.engineer_features(current_fold_data_raw) + # a. Engineer Features + df_engineered_fold = self.engineer_features(current_fold_data_raw) + if df_engineered_fold.empty: + raise SystemExit(f"Fold {self.current_fold}: Feature engineering resulted in empty dataframe.") - # b. Define Labels and Align for the fold - df_labeled_aligned_fold, target_dir_col_fold, target_cols = self.define_labels_and_align(df_engineered_fold) - self.target_dir_col = target_dir_col_fold # Store for use in split - self.target_columns = target_cols + # b. Define Labels and Align + df_labeled_aligned_fold, _, _ = self.define_labels_and_align(df_engineered_fold) + if df_labeled_aligned_fold.empty: + raise SystemExit(f"Fold {self.current_fold}: Label definition resulted in empty dataframe.") - # c. Split data *within* the fold (using dates or ratios) + # c. Split data self.split_data(df_labeled_aligned_fold, fold_dates) - # Now self.X_train_raw, self.y_train etc. hold data for *this fold* + # self.X_train_raw, self.y_train etc. now hold data for *this fold* - # d. Baseline Check (Moved earlier - uses fold's raw train data) - logger.info(f"Fold {fold_count}: Running baseline check on raw/engineered features...") - self.run_baseline_checks(self.X_train_raw, self.y_dir_train) - logger.info(f"Fold {fold_count}: Baseline checks passed.") + # d. Scale Features (MUST happen before baseline checks) + self.scale_features() # Updates self.X_*_scaled, self.scaler, raises SystemExit on fail - # e. Select/Prune Features (Moved earlier) - # Selects based on self.X_train_raw, Prunes self.X_train_scaled -> self.X_train_pruned ... - logger.info(f"Fold {fold_count}: Selecting features (raw) & preparing for pruning...") - self.select_and_prune_features() + # e. Baseline Check (NOW uses scaled data) + self.run_baseline_checks(self.current_fold) # Raises SystemExit on fail - # f. Scale Features (Now after baseline & selection) - logger.info(f"Fold {fold_count}: Scaling features...") - self.scale_features() + # f. Select Features (Whitelist determined, uses raw train data internally) + self.select_and_prune_features() # Updates self.final_whitelist, raises SystemExit on fail - # g. Prune Features (Now applied to scaled data using prior selection) - # This step is now effectively done *within* select_and_prune_features - logger.info(f"Fold {fold_count}: Features pruned based on prior selection.") + # g. Prune Features (NOW uses scaled data and final_whitelist) + logger.info(f"--- Calling Stage: Pruning Features for Fold {self.current_fold} --- ") + X_train_pruned, X_val_pruned, X_test_pruned = prune_features_fold( + X_train_scaled=self.X_train_scaled, + X_val_scaled=self.X_val_scaled, + X_test_scaled=self.X_test_scaled, + final_whitelist=self.final_whitelist, + feature_engineer=self.feature_engineer, + fold_num=self.current_fold + ) + # Update self state for pruned data + self.X_train_pruned = X_train_pruned + self.X_val_pruned = X_val_pruned + self.X_test_pruned = X_test_pruned + logger.info(f"Fold {self.current_fold}: Feature pruning stage complete.") - # h. Create Sequences for the fold (Uses pruned, scaled data) - logger.info(f"Fold {fold_count}: Creating sequences...") - self.create_sequences() # Uses self.X_train_pruned etc. -> self.X_train_seq ... + # h. Create Sequences (Uses pruned data) + self.create_sequences() # Uses self.X_*_pruned etc. -> self.X_*_seq ... - # i. Train/Load GRU Model for the fold - # TODO: Add Optuna sweep integration here if enabled - self.train_or_load_gru() # Uses self.X_train_seq etc. + # i. Train/Load GRU Model + self.train_or_load_gru() # Uses self.X_*_seq etc. - # j. Calibrate Probabilities for the fold - # TODO: Add rolling calibration logic here - self.calibrate_probabilities() # Uses self.X_val_seq etc. -> runs validation checks + # j. Calibrate Probabilities & Validate GRU + self.calibrate_probabilities() # Uses self.X_val_seq, runs validation checks, raises SystemExit on fail - # k. Train/Load SAC Agent for the fold + # j.2. Perform GRU Validation Checks (New Step) + # This uses the p_cal_val_for_check and y_dir_val_for_check stored by calibrate_probabilities + run_gru_validation_checks_fold( + config=self.config, + current_fold=self.current_fold, + p_cal_val=getattr(self, 'p_cal_val_for_check', None), # Use getattr for safety + y_dir_val=getattr(self, 'y_dir_val_for_check', None), + optimized_edge_threshold=self.optimized_edge_threshold, + use_ternary=self.use_ternary, + io=self.io + ) + # Note: run_gru_validation_checks_fold raises SystemExit on failure + + # k. Train/Load SAC Agent self.train_or_load_sac() # Uses artifacts from GRU step - # l. Run Backtest for the fold (on fold's test set) - # Note: Backtest gate failure no longer causes SystemExit immediately - self.run_backtest() # Uses self.X_test_seq etc. -> runs validation checks + # l. Run Backtest + self.run_backtest() # Uses self.X_test_seq, runs validation checks, raises SystemExit on fail # m. Persist Fold Artefacts & Store Metrics - # Example: self.io.save_fold_results(fold_count, self.backtest_metrics) - logger.info(f"Storing metrics for Fold {fold_count}") - # Store metrics regardless of backtest success/failure for aggregation + logger.info(f"Storing metrics for Fold {self.current_fold}") if self.backtest_metrics is not None: fold_metrics = self.backtest_metrics.copy() - fold_metrics['fold_number'] = fold_count + fold_metrics['fold_number'] = self.current_fold fold_metrics['train_start'] = train_start.isoformat() if train_start else None fold_metrics['train_end'] = train_end.isoformat() if train_end else None fold_metrics['val_start'] = val_start.isoformat() if val_start else None fold_metrics['val_end'] = val_end.isoformat() if val_end else None fold_metrics['test_start'] = test_start.isoformat() if test_start else None fold_metrics['test_end'] = test_end.isoformat() if test_end else None - # Add a status field based on whether metrics seem valid (e.g., Sharpe exists) - # Backtester should ideally return a status or use specific metrics keys - if 'Annualized Sharpe Ratio' in fold_metrics and not pd.isna(fold_metrics['Annualized Sharpe Ratio']): - fold_metrics['status'] = 'success' # Assume success if Sharpe exists + # Determine status based on metrics + if 'Annualized Sharpe Ratio' in fold_metrics and not pd.isna(fold_metrics['Annualized Sharpe Ratio']): + fold_metrics['status'] = 'success' else: - fold_metrics['status'] = 'failed_backtest' # Assume backtest failed if Sharpe is missing/NaN - all_fold_metrics.append(fold_metrics) + fold_metrics['status'] = 'failed_backtest' + self.all_fold_metrics.append(fold_metrics) else: - logger.warning(f"Fold {fold_count}: No backtest metrics generated to store.") - # Still store a failure record for aggregation count - all_fold_metrics.append({'fold_number': fold_count, 'status': 'failed_backtest', 'error': 'No metrics returned'}) + logger.warning(f"Fold {self.current_fold}: No backtest metrics generated to store.") + self.all_fold_metrics.append({'fold_number': self.current_fold, 'status': 'failed_backtest', 'error': 'No metrics returned'}) - # --- Store SAC Agent Path if Trained --- # + # Store SAC Agent Path if Trained Successfully for this fold if self.config.get('control', {}).get('train_sac', False) and self.sac_agent_load_path: - # Check if the path corresponds to a newly trained agent for this fold - # We assume self.sac_agent_load_path holds the path to the agent used/trained in this fold - if os.path.exists(self.sac_agent_load_path): - logger.info(f"Fold {self.current_fold}: Storing SAC agent path for aggregation: {self.sac_agent_load_path}") - all_successful_sac_agent_paths.append(self.sac_agent_load_path) - else: - logger.warning(f"Fold {self.current_fold}: Trained SAC agent path {self.sac_agent_load_path} not found after training. Cannot use for aggregation.") - # --- End Store --- # + # Assuming sac_agent_load_path points to the *newly* trained agent if training occurred + if hasattr(self, 'sac_trainer') and self.sac_trainer and self.sac_trainer.last_saved_agent_path == self.sac_agent_load_path: + if os.path.exists(self.sac_agent_load_path): + logger.info(f"Fold {self.current_fold}: Storing successfully trained SAC agent path for aggregation: {self.sac_agent_load_path}") + all_successful_sac_agent_paths.append(self.sac_agent_load_path) + else: + logger.warning(f"Fold {self.current_fold}: SAC training reported success, but path {self.sac_agent_load_path} not found.") + elif self.sac_agent_load_path: # If path exists but wasn't from training this fold + pass # Don't add loaded agents to the aggregation list unless explicitly intended - except SystemExit as e: # Catch exits from validation gates (Baseline, GRU) - logger.error(f"Fold {fold_count} failed validation gate: {e}. Skipping to next fold.") - # Store failure information? For now, just continue. - # Check if it's a Baseline or GRU gate failure based on message? - gate_type = 'gru_gate' if 'GRU validation' in str(e) else 'baseline_gate' - all_fold_metrics.append({'fold_number': fold_count, 'status': f'failed_{gate_type}', 'error': str(e)}) - continue + except SystemExit as e: + logger.error(f"Fold {self.current_fold} processing halted: {e}. Skipping to next fold.") + # Determine gate type for reporting + gate_type = 'unknown_gate' + if 'Baseline CI' in str(e) or 'BASELINE CHECK' in str(e): gate_type = 'baseline_gate' + elif 'GRU validation' in str(e) or 'Edge Acc' in str(e) or 'Brier' in str(e): gate_type = 'gru_gate' + elif 'Backtest failed' in str(e) or 'BACKTEST CHECK' in str(e) or 'Sharpe' in str(e): gate_type = 'backtest_gate' + elif 'split' in str(e).lower(): gate_type = 'split_error' + elif 'selection' in str(e).lower(): gate_type = 'selection_error' + elif 'scaling' in str(e).lower(): gate_type = 'scaling_error' + elif 'pruning' in str(e).lower(): gate_type = 'pruning_error' + elif 'sequence' in str(e).lower(): gate_type = 'sequence_error' + elif 'label' in str(e).lower(): gate_type = 'labeling_error' + elif 'feature eng' in str(e).lower(): gate_type = 'feature_eng_error' + self.all_fold_metrics.append({'fold_number': self.current_fold, 'status': f'failed_{gate_type}', 'error': str(e)}) + continue # Skip to next fold except Exception as e: - logger.error(f"Error processing Fold {fold_count}: {e}. Skipping fold.", exc_info=True) - all_fold_metrics.append({'fold_number': fold_count, 'status': 'error', 'error': str(e)}) - continue # Skip to the next fold on error + logger.error(f"Unexpected error processing Fold {self.current_fold}: {e}. Skipping fold.", exc_info=True) + self.all_fold_metrics.append({'fold_number': self.current_fold, 'status': 'error', 'error': str(e)}) + continue # Skip to next fold # --- End Fold Loop --- # - logger.info(f"=== Finished Processing Fold {fold_count} ===") + logger.info(f"=== Finished Processing Fold {self.current_fold} ===") # If only single split, break after first iteration if not self.config.get('walk_forward', {}).get('enabled', False): + logger.info("Single split processing complete. Exiting fold loop.") break - # 4. Aggregate Fold Metrics - release_decision_passed = False # Initialize decision to False - successful_fold_nums = [f.get('fold_number') for f in all_fold_metrics if f.get('status', 'success') == 'success'] - if all_fold_metrics: - self.aggregated_metrics = self.aggregate_fold_metrics(all_fold_metrics) + # 4. Aggregate Fold Metrics & Final Decision + release_decision_passed = False + if self.all_fold_metrics: + self.aggregated_metrics = self.aggregate_fold_metrics(self.all_fold_metrics) logger.info("--- Aggregated Walk-Forward Metrics --- ") - for key, value in self.aggregated_metrics.items(): - logger.info(f" {key}: {value}") - # Save aggregated metrics + # Use json dumps for pretty printing dict/nested dict + logger.info(json.dumps(self.aggregated_metrics, indent=2)) if self.io: self.io.save_json(self.aggregated_metrics, 'aggregated_wf_metrics', section='results') - - # 5. Make Final Release Decision (only if aggregation occurred) - release_decision_passed = self.final_release_decision(self.aggregated_metrics) - else: - logger.warning("No fold metrics to aggregate. Skipping aggregation and final decision.") - self.aggregated_metrics = {} # Ensure it's defined, even if empty - # 6. Log Final Status based on the decision + release_decision_passed = self.final_release_decision(self.aggregated_metrics) + else: + logger.warning("No fold metrics were generated. Skipping aggregation and final decision.") + self.aggregated_metrics = {} # Ensure defined + + # Log Final Status if release_decision_passed: - logger.info("--- Pipeline finished successfully and meets release criteria. ---") + logger.info(f"--- Pipeline Run {self.run_id} finished successfully and meets release criteria. ---") else: - logger.error("--- Pipeline finished but FAILED to meet release criteria. ---") + logger.error(f"--- Pipeline Run {self.run_id} finished but FAILED to meet release criteria. See aggregated metrics and logs. ---") - # 5. Aggregate SAC Agents (if enabled and successful folds exist) + # 5. Aggregate SAC Agents (Optional) if self.config.get('sac_aggregation', {}).get('enabled', False): if all_successful_sac_agent_paths: self.aggregate_sac_agents(all_successful_sac_agent_paths) @@ -2646,115 +968,561 @@ class TradingPipeline: # --- SAC Agent Aggregation --- # def aggregate_sac_agents(self, agent_paths: List[str]): """ - Aggregates SAC agents from a list of saved agent paths. - Currently supports averaging weights. + Aggregates SAC agents by calling the stage function. Args: agent_paths (List[str]): List of paths to the saved SAC agent directories - (e.g., [.../sac_agent_final, ...]). + from successful folds. """ - agg_cfg = self.config.get('sac_aggregation', {}) - # Enabled check is done before calling, but double-check - if not agg_cfg.get('enabled', False): - logger.info("SAC agent aggregation is disabled. Skipping.") + logger.info(f"--- Calling Stage: Aggregating SAC Agents ---") + # Call the stage function from modelling.py + aggregate_sac_agents( + config=self.config, + agent_paths=agent_paths, + current_run_models_dir=self.current_run_models_dir, + io=self.io + ) + logger.info(f"--- SAC Agent Aggregation Stage Call Complete ---") + + def select_and_prune_features(self): + """Performs feature selection for the fold by calling the stage function. + + Note: Pruning is handled in a separate step after scaling. + """ + logger.info(f"--- Calling Stage: Selecting Features for Fold {self.current_fold} --- ") + + # Determine fold-specific models directory path + fold_models_dir = self.fold_dirs.get('models') + fold_results_dir = self.fold_dirs.get('results') # Pass results dir too, though not used by select currently + if not fold_models_dir: + logger.error(f"Fold {self.current_fold}: Cannot select features, fold models directory not set.") + raise SystemExit(f"Fold {self.current_fold}: Missing models directory for feature selection.") + + # Call the stage function to perform selection and save whitelist + # Note: This function raises SystemExit on failure + final_whitelist = select_features_fold( + X_train_raw=self.X_train_raw, + y_dir_train_ordinal=self.y_dir_train_ordinal, # Pass ordinal labels + feature_engineer=self.feature_engineer, + io=self.io, + run_id=self.run_id, + fold_num=self.current_fold, + fold_models_dir=fold_models_dir, + fold_results_dir=fold_results_dir, + main_run_models_dir=self.current_run_models_dir # <<< PASS the main run models dir + ) + + # Store the determined whitelist on self + self.final_whitelist = final_whitelist + + logger.info(f"Fold {self.current_fold}: Feature selection stage complete. Whitelist stored.") + # Pruning logic is removed from here. + + def scale_features(self): + """Scales features for the current fold by calling the stage function.""" + logger.info(f"--- Calling Stage: Scaling Features for Fold {self.current_fold} --- ") + + # Determine fold-specific models directory path + fold_models_dir = self.fold_dirs.get('models') + if not fold_models_dir: + logger.error(f"Fold {self.current_fold}: Cannot scale features, fold models directory not set.") + raise SystemExit(f"Fold {self.current_fold}: Missing models directory for scaling.") + + # Call the stage function + # Note: This function raises SystemExit on failure + X_train_scaled, X_val_scaled, X_test_scaled, used_scaler = scale_features_fold( + X_train_raw=self.X_train_raw, + X_val_raw=self.X_val_raw, + X_test_raw=self.X_test_raw, + run_id=self.run_id, + fold_num=self.current_fold, + fold_models_dir=fold_models_dir, + main_run_models_dir=self.current_run_models_dir, # <<< PASS the main run models dir + preloaded_scaler=self.scaler # Pass preloaded scaler if it exists (from previous fold or loaded model) + ) + + # Update self state + self.X_train_scaled = X_train_scaled + self.X_val_scaled = X_val_scaled + self.X_test_scaled = X_test_scaled + self.scaler = used_scaler # Store the scaler used (might be newly fitted or preloaded) + + logger.info(f"Fold {self.current_fold}: Feature scaling stage complete.") + + def create_sequences(self): + """Creates sequences from pruned features via stage function.""" + logger.info("--- Stage: Creating Sequences ---") + if self.X_train_pruned is None or self.y_train is None or \ + self.X_val_pruned is None or self.y_val is None or \ + self.X_test_pruned is None or self.y_test is None: + logger.error("Pruned training, validation, or test data/targets are missing. Cannot create sequences.") return - - if not agent_paths: - logger.warning("No SAC agent paths provided for aggregation. Skipping.") - return - - method = agg_cfg.get('method', 'average_weights') - if method != 'average_weights': - logger.warning(f"SAC aggregation method '{method}' is not implemented. Skipping.") - return - - logger.info(f"Starting SAC agent aggregation using method: {method} from {len(agent_paths)} agents.") - all_state_dicts = [] - loaded_agent_dims = {'state': None, 'action': None} + gru_cfg = self.config.get('gru', {}) + lookback = gru_cfg.get('lookback', 60) + use_ternary = gru_cfg.get('use_ternary', False) + drop_imputed = gru_cfg.get('drop_imputed_sequences', False) - # --- Load Fold Agents and Check Dimensions --- # - for agent_path in agent_paths: - if os.path.exists(agent_path): - logger.info(f" Loading agent from: {agent_path}") - try: - # Initialize a temporary agent to load into - # Get dimensions from metadata if possible, else use fallback - # Load metadata first - meta_path = os.path.join(agent_path, 'agent_metadata.json') - metadata = {} - if os.path.exists(meta_path): - with open(meta_path, 'r') as f: - metadata = json.load(f) - - # Determine state/action dims - state_dim = metadata.get('state_dim', 5) # Use 5 as fallback - action_dim = metadata.get('action_dim', 1) # Use 1 as fallback + # Determine target columns based on ternary setting + dir_key = 'dir3' if use_ternary else 'dir' + target_names = ['mu', dir_key] - # Check for dimension consistency - if loaded_agent_dims['state'] is None: - loaded_agent_dims['state'] = state_dim - loaded_agent_dims['action'] = action_dim - elif (loaded_agent_dims['state'] != state_dim or - loaded_agent_dims['action'] != action_dim): - logger.warning(f" Dimension mismatch! Agent {agent_path} has dims ({state_dim},{action_dim}), expected ({loaded_agent_dims['state']},{loaded_agent_dims['action']}). Skipping this agent.") - continue # Skip this agent + # Check if 'bar_imputed' exists in the dataframes + if 'bar_imputed' not in self.X_train_pruned.columns or \ + 'bar_imputed' not in self.X_val_pruned.columns or \ + 'bar_imputed' not in self.X_test_pruned.columns: + logger.error("'bar_imputed' column not found in pruned data. Cannot create sequences with imputed handling.") + # Decide whether to proceed without it or raise error + # For now, raising an error as it's required by the instructions + raise ValueError("'bar_imputed' column is missing from feature dataframes before sequence creation.") - agent_temp = SACTradingAgent(state_dim=state_dim, action_dim=action_dim, **self.sac_cfg) - loaded_meta_check = agent_temp.load(agent_path) # Load weights and metadata - if not loaded_meta_check: # Check if load method indicated failure - raise RuntimeError(f"Agent load method failed for {agent_path}") - all_state_dicts.append(agent_temp.get_state_dict()) # Get state dict - except Exception as e: - logger.warning(f" Failed to load or get state dict from agent at {agent_path}: {e}") - else: - logger.warning(f" Agent path not found: {agent_path}") - - if not all_state_dicts: - logger.error("Failed to load any valid SAC agents. Cannot aggregate.") - return + # Use the stage function for sequence creation + results_train = create_sequences_fold( + X_data=self.X_train_pruned, + y_data=self.y_train, + target_names=target_names, + lookback=lookback, + name="Train", + config=self.config, # Pass config for drop_imputed_sequences + io=self.io # Pass IOManager for artefact saving + ) + results_val = create_sequences_fold( + X_data=self.X_val_pruned, + y_data=self.y_val, + target_names=target_names, + lookback=lookback, + name="Validation", + config=self.config, + io=self.io + ) + results_test = create_sequences_fold( + X_data=self.X_test_pruned, + y_data=self.y_test, + target_names=target_names, + lookback=lookback, + name="Test", + config=self.config, + io=self.io + ) + + # Unpack results and store them + if results_train: + self.X_train_seq, self.y_train_seq_dict, self.train_seq_indices, _ = results_train + else: + logger.error("Failed to create training sequences.") + # Handle error appropriately - perhaps stop the pipeline + return + + if results_val: + self.X_val_seq, self.y_val_seq_dict, self.val_seq_indices, _ = results_val + else: + logger.error("Failed to create validation sequences.") + return - logger.info(f"Successfully loaded {len(all_state_dicts)} consistent SAC agent state dictionaries for aggregation.") + if results_test: + self.X_test_seq, self.y_test_seq_dict, self.test_seq_indices, _ = results_test + else: + logger.error("Failed to create test sequences.") + return + + logger.info("Sequence creation complete for all data splits.") + + def train_or_load_gru(self): + """Trains/loads GRU model for the fold via stage function & handles re-sequencing.""" + logger.info(f"--- Calling Stage: Training/Loading GRU for Fold {self.current_fold} ---") + + # Determine fold-specific models directory path + fold_models_dir = self.fold_dirs.get('models', self.current_run_models_dir) # Fallback to run models dir + if not fold_models_dir: + logger.error(f"Fold {self.current_fold}: Cannot train/load GRU, fold models directory not set.") + raise SystemExit(f"Fold {self.current_fold}: Missing models directory for GRU stage.") + + # Call the stage function + # Expects the stage function to handle re-pruning and re-sequencing internally + # if re-scaling occurs, and return the final sequences. + ( + gru_model, gru_handler, + gru_model_run_id_loaded_from, + scaler_maybe_updated, + # Potentially updated sequences if re-scaling occurred inside stage fn + X_train_seq_new, y_train_seq_dict_new, train_indices_new, + X_val_seq_new, y_val_seq_dict_new, val_indices_new, + X_test_seq_new, y_test_seq_dict_new, test_indices_new + ) = train_or_load_gru_fold( + config=self.config, + run_id=self.run_id, # Pass current run_id (used for saving trained models) + current_fold=self.current_fold, # Pass current fold number + current_run_models_dir=fold_models_dir, # Pass fold-specific dir + base_models_dir_path=self.base_models_dir_path, + gru_handler=self.gru_handler, # Pass the handler instance + # Pass current sequences + X_train_seq=self.X_train_seq, + y_train_seq_dict=self.y_train_seq_dict, + X_val_seq=self.X_val_seq, + y_val_seq_dict=self.y_val_seq_dict, + X_test_seq=self.X_test_seq, # Pass test sequences too + y_test_seq_dict=self.y_test_seq_dict, + # Pass raw data needed for potential re-processing + X_train_raw=self.X_train_raw, + X_val_raw=self.X_val_raw, + X_test_raw=self.X_test_raw, + y_train=self.y_train, + y_val=self.y_val, + y_test=self.y_test, + # Remove pruned data - not expected by the stage function + # X_train_pruned=self.X_train_pruned, # Needed if re-sequencing happens + # X_val_pruned=self.X_val_pruned, + # X_test_pruned=self.X_test_pruned, + scaler=self.scaler, # Pass the current scaler + final_whitelist=self.final_whitelist, # Pass whitelist for potential re-pruning + feature_engineer=self.feature_engineer, # Pass FeatureEngineer instance + io=self.io + ) + + # Update self state with results from the stage function + self.gru_model = gru_model + self.gru_handler = gru_handler # Handler might be updated (e.g., after tuning) + self.gru_model_run_id_loaded_from = gru_model_run_id_loaded_from + self.scaler = scaler_maybe_updated # Scaler might have been loaded + + # Check if stage function returned updated sequences + # (Indicates re-scaling/re-pruning/re-sequencing occurred internally) + if X_train_seq_new is not None: + logger.info(f"Fold {self.current_fold}: GRU loading triggered internal re-processing. Updating sequence data.") + self.X_train_seq = X_train_seq_new + self.y_train_seq_dict = y_train_seq_dict_new + self.train_indices = train_indices_new + self.X_val_seq = X_val_seq_new + self.y_val_seq_dict = y_val_seq_dict_new + self.val_indices = val_indices_new + self.X_test_seq = X_test_seq_new + self.y_test_seq_dict = y_test_seq_dict_new + self.test_indices = test_indices_new + # else: sequences remain as they were before calling the stage function. + + logger.info(f"Fold {self.current_fold}: GRU Training/Loading stage complete. Model run ID: {self.gru_model_run_id_loaded_from}") + + def calibrate_probabilities(self): + """Calibrates GRU probs & optimizes edge threshold for the fold via stage function.""" + logger.info(f"--- Calling Stage: Calibrating Probabilities for Fold {self.current_fold} ---") + + # Call the stage function + ( + optimal_T, vector_cal_params, + optimized_edge_threshold, + p_cal_val_for_check, y_dir_val_for_check + ) = calibrate_probabilities_fold( + config=self.config, + current_fold=self.current_fold, + gru_model=self.gru_model, + gru_handler=self.gru_handler, + X_val_seq=self.X_val_seq, + y_val_seq_dict=self.y_val_seq_dict, + use_ternary=self.use_ternary, + calibrator=self.calibrator, + vector_calibrator=self.vector_calibrator, + fold_dirs=self.fold_dirs, + current_run_models_dir=self.current_run_models_dir, + run_id=self.run_id, # <<< PASS run_id + io=self.io + ) + + # Update self state with the results + self.optimal_T = optimal_T + self.vector_cal_params = vector_cal_params + self.optimized_edge_threshold = optimized_edge_threshold + # Store predictions/labels needed for the separate validation check stage + self.p_cal_val_for_check = p_cal_val_for_check + self.y_dir_val_for_check = y_dir_val_for_check + + # REMOVED: Internal call to _perform_gru_validation_checks. + # This check will be performed by a dedicated evaluation stage function later. + + logger.info(f"Fold {self.current_fold}: Probability calibration stage complete. Optimized Edge: {self.optimized_edge_threshold}") + + def train_or_load_sac(self): + """Trains/loads SAC agent for the fold via stage function.""" + logger.info(f"--- Calling Stage: Training/Loading SAC Agent for Fold {self.current_fold} ---") + # --- Corrected Config Lookup --- # + train_sac_flag = self.config.get('sac', {}).get('train_sac', False) # Correct path + # --- End Correction --- # + sac_trainer_instance = None # Initialize as None + + if train_sac_flag: + # --- Instantiate SACTrainer only if training --- # + # --- DEBUG: Check value of gru_model_run_id_loaded_from --- # + logger.info(f"DEBUG: Checking gru_model_run_id_loaded_from: {self.gru_model_run_id_loaded_from}") + # --- END DEBUG --- # + if self.gru_model_run_id_loaded_from is None: + logger.error(f"Fold {self.current_fold}: Cannot instantiate SACTrainer: GRU model run ID is not set. Aborting SAC stage.") + self.sac_agent_load_path = None # Ensure path is None + return # Skip stage + + logger.info(f"Fold {self.current_fold}: SAC training enabled. Instantiating SACTrainer...") + + # Determine Edge Threshold for SAC Trainer config + edge_threshold_for_sac = self.optimized_edge_threshold if self.optimized_edge_threshold is not None else \ + self.config.get('calibration', {}).get('edge_threshold', 0.1) + logger.info(f"Using edge threshold {edge_threshold_for_sac:.4f} for SAC Trainer (heuristic seeding / env info)...") + + # Prepare Config copy for SAC Trainer + sac_trainer_config = self.config.copy() + if 'calibration' not in sac_trainer_config: sac_trainer_config['calibration'] = {} + sac_trainer_config['calibration']['edge_threshold'] = edge_threshold_for_sac + if sac_trainer_config.get('calibration', {}).get('rolling_enabled', False): + logger.warning(f"Fold {self.current_fold}: SAC training enabled AND rolling calib enabled. Disabling rolling calib for SAC trainer config copy.") + sac_trainer_config['calibration']['rolling_enabled'] = False + + # Determine base dirs for SACTrainer + base_logs = self.dirs.get('logs') + if not base_logs or not os.path.isdir(base_logs): + # Use project root as fallback - assumes specific structure + project_root_guess = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) # ../.. from src/ + base_logs = os.path.join(project_root_guess, 'logs') + os.makedirs(base_logs, exist_ok=True) + logger.warning(f"Fold {self.current_fold}: Using fallback base logs dir for SACTrainer: {base_logs}") + + base_results = self.dirs.get('results') + if not base_results or not os.path.isdir(base_results): + project_root_guess = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + base_results = os.path.join(project_root_guess, 'results') + os.makedirs(base_results, exist_ok=True) + logger.warning(f"Fold {self.current_fold}: Using fallback base results dir for SACTrainer: {base_results}") - # --- Average Weights --- # - if method == 'average_weights': try: - avg_state_dict = OrderedDict() - keys = all_state_dicts[0].keys() - num_agents = len(all_state_dicts) - - for key in keys: - summed_tensor = torch.stack([sd[key] for sd in all_state_dicts], dim=0).sum(dim=0) - avg_state_dict[key] = summed_tensor / num_agents - - logger.info("Successfully averaged agent weights.") - - # --- Save Averaged Agent --- # - # Use the dimensions determined during loading - final_state_dim = loaded_agent_dims['state'] - final_action_dim = loaded_agent_dims['action'] - final_agent = SACTradingAgent(state_dim=final_state_dim, action_dim=final_action_dim, **self.sac_cfg) - final_agent.load_state_dict(avg_state_dict) - - save_dir_name = 'sac_agent_aggregated' - aggregated_agent_save_path = os.path.join(self.current_run_models_dir, save_dir_name) - os.makedirs(aggregated_agent_save_path, exist_ok=True) - - # Use the agent's save method - final_agent.save(aggregated_agent_save_path) - logger.info(f"Saved aggregated SAC agent to: {aggregated_agent_save_path}") - - agg_info = { - 'aggregation_method': method, - 'num_agents_aggregated': num_agents, - 'source_agent_paths': agent_paths, # Log the paths used - 'save_path': aggregated_agent_save_path - } - if self.io: - self.io.save_json(agg_info, 'sac_aggregation_info', section='results', use_txt=True) - + # Instantiate SACTrainer - ensure SACTrainer class is imported + from gru_sac_predictor.src.sac_trainer import SACTrainer # Ensure import + sac_trainer_instance = SACTrainer( + config=sac_trainer_config, + base_models_dir=self.base_models_dir_path, + base_logs_dir=base_logs, + base_results_dir=base_results + ) + self.sac_trainer = sac_trainer_instance # Store instance if needed later (e.g., aggregation) + except ImportError: + logger.error(f"Fold {self.current_fold}: Failed to import SACTrainer. Cannot instantiate trainer.") + self.sac_agent_load_path = None + return # Skip stage except Exception as e: - logger.error(f"Error during SAC agent weight averaging or saving: {e}", exc_info=True) + logger.error(f"Fold {self.current_fold}: Failed to instantiate SACTrainer: {e}", exc_info=True) + self.sac_agent_load_path = None + return # Skip stage + # --- End SACTrainer Instantiation --- + + # Call the stage function, passing the trainer instance (if created) or None + sac_agent_load_path_result = train_or_load_sac_fold( + config=self.config, # Pass original config to stage function + current_fold=self.current_fold, + gru_model_run_id_loaded_from=self.gru_model_run_id_loaded_from, + base_models_dir_path=self.base_models_dir_path, + sac_trainer=sac_trainer_instance, # Pass the instance or None + io=self.io + ) + + # Store the determined path on self + self.sac_agent_load_path = sac_agent_load_path_result + + logger.info(f"Fold {self.current_fold}: SAC Training/Loading stage completed in pipeline. Agent path set to: {self.sac_agent_load_path}") + + def run_backtest(self): + """Runs the backtest for the fold by calling the stage function.""" + logger.info(f"--- Calling Stage: Running Backtest for Fold {self.current_fold} ---") + + # Reset metrics for the fold + self.backtest_results_df = None + self.backtest_metrics = None + self.metrics_log_df = None + + # --- Gather necessary inputs from self state --- # + # Specifically handle raw predictions needed ONLY if rolling calibration is enabled + p_raw_test_input = None + logits_test_input = None + rolling_cal_enabled = self.config.get('calibration', {}).get('rolling_enabled', False) + + if rolling_cal_enabled: + logger.info(f"Fold {self.current_fold}: Rolling calibration enabled. Attempting to generate raw GRU predictions for test set.") + if self.gru_handler and self.X_test_seq is not None: + try: + if self.use_ternary: + logger.info("Generating raw logits for ternary case...") + logits_test_input = self.gru_handler.predict_logits(self.X_test_seq) + if logits_test_input is None: + logger.error("GRU handler failed to return logits.") + else: + logger.info(f"Generated logits_test with shape: {logits_test_input.shape}") + else: + logger.info("Generating raw probabilities P(up) for binary case...") + # Assuming predict returns tuple: (calibrated_probs, raw_probs_or_none, ...) + # Or adjust based on actual gru_handler.predict signature if it only returns raw probs + preds_tuple = self.gru_handler.predict(self.X_test_seq) # This might need adjustment based on predict signature + # Check if predict returns raw probs directly or in a tuple + if isinstance(preds_tuple, tuple) and len(preds_tuple) >= 3 and preds_tuple[2] is not None: + # Assuming raw P(up) is the 3rd element as per previous logic + p_raw_test_input = preds_tuple[2].flatten() + logger.info(f"Generated p_raw_test with shape: {p_raw_test_input.shape}") + elif isinstance(preds_tuple, np.ndarray): # If predict *only* returns raw P(up) + p_raw_test_input = preds_tuple.flatten() + logger.info(f"Generated p_raw_test (direct return) with shape: {p_raw_test_input.shape}") + else: + # Fallback: Try predict_proba if available and predict doesn't give raw probs + if hasattr(self.gru_handler, 'predict_proba'): + logger.info("Using predict_proba as fallback for raw P(up)...") + p_raw_all_classes = self.gru_handler.predict_proba(self.X_test_seq) # Assumes returns (N, 2) + if p_raw_all_classes is not None and p_raw_all_classes.ndim == 2 and p_raw_all_classes.shape[1] == 2: + p_raw_test_input = p_raw_all_classes[:, 1] # Get P(up) + logger.info(f"Generated p_raw_test (from predict_proba) with shape: {p_raw_test_input.shape}") + else: + logger.error("GRU handler predict_proba did not return expected format.") + else: + logger.error("GRU handler predict method did not return expected raw probabilities, and predict_proba is not available.") + + except Exception as e: + logger.error(f"Error generating raw predictions via GRU handler: {e}", exc_info=True) + # Continue without raw predictions, but log the error + + # Final check if required inputs for rolling cal were obtained + if self.use_ternary and logits_test_input is None: + logger.error(f"Fold {self.current_fold}: Failed to get raw GRU logits needed for rolling calibration. Backtest cannot proceed with rolling cal.") + raise SystemExit(f"Fold {self.current_fold}: Missing raw GRU logits for rolling calibration.") + elif not self.use_ternary and p_raw_test_input is None: + logger.error(f"Fold {self.current_fold}: Failed to get raw GRU probabilities needed for rolling calibration. Backtest cannot proceed with rolling cal.") + raise SystemExit(f"Fold {self.current_fold}: Missing raw GRU probabilities P(up) for rolling calibration.") + # --- End raw prediction handling --- # + + try: + results_df, metrics_dict, metrics_log = run_backtest_fold( + config=self.config, + io=self.io, + current_fold=self.current_fold, + fold_dirs=self.fold_dirs, + sac_agent_load_path=self.sac_agent_load_path, + X_test_seq=self.X_test_seq, + y_test_seq_dict=self.y_test_seq_dict, + test_indices=self.test_indices, + df_test_original=self.df_test_original, + gru_handler=self.gru_handler, + calibrator=self.calibrator, + vector_calibrator=self.vector_calibrator, + initial_optimal_T=getattr(self, 'optimal_T', None), + initial_vector_params=getattr(self, 'vector_cal_params', None), + optimized_edge_threshold=self.optimized_edge_threshold, # Use the value stored from calibration + p_raw_test=p_raw_test_input, + logits_test=logits_test_input, + use_ternary=self.use_ternary + ) + + # Store results on self + self.backtest_results_df = results_df + self.backtest_metrics = metrics_dict + self.metrics_log_df = metrics_log + + logger.info(f"Fold {self.current_fold}: Backtest stage completed successfully.") + + except SystemExit as e: + logger.error(f"Fold {self.current_fold} failed backtest validation gates: {e}. Halting fold.") + self.backtest_metrics = None # Ensure metrics are None on failure + raise # Re-raise SystemExit to stop fold processing in execute() + except Exception as e: + logger.error(f"Fold {self.current_fold}: An unexpected error occurred during the backtest stage: {e}", exc_info=True) + self.backtest_metrics = None # Ensure metrics are None on failure + # Raise SystemExit to stop the fold + raise SystemExit(f"Fold {self.current_fold}: Unhandled exception in backtest stage.") from e + + # --- Walk-Forward Fold Generation --- # + def _generate_walk_forward_folds(self) -> Iterator[Tuple[pd.Timestamp, pd.Timestamp, pd.Timestamp, pd.Timestamp, pd.Timestamp, pd.Timestamp]]: + """Generates date ranges for walk-forward validation folds based on config. + + Yields: + tuple: (train_start, train_end, val_start, val_end, test_start, test_end) + Timestamps are timezone-naive initially. + """ + wf_config = self.config.get('walk_forward', {}) + if not wf_config.get('enabled', False): + logger.info("Walk-forward validation disabled. Performing single split based on ratios.") + # Yield None to signal single split mode to the caller + yield None + return + + # Ensure data is loaded to get the full date range + if self.df_raw is None or self.df_raw.empty: + logger.error("Cannot generate walk-forward folds: Raw data not loaded yet.") + raise SystemExit("Raw data must be loaded before generating walk-forward folds.") + if not isinstance(self.df_raw.index, pd.DatetimeIndex): + logger.error("Cannot generate walk-forward folds: Raw data index is not DatetimeIndex.") + raise SystemExit("Raw data index must be DatetimeIndex for walk-forward.") + + # Get parameters from config + train_days = wf_config.get('train_days', 365) + val_days = wf_config.get('val_days', 90) + test_days = wf_config.get('test_days', 30) + step_days = wf_config.get('step_days', 30) + initial_offset_days = wf_config.get('initial_offset_days', 0) # Days to skip at the start + + if not all([isinstance(d, int) and d > 0 for d in [train_days, val_days, test_days, step_days]]) or not isinstance(initial_offset_days, int) or initial_offset_days < 0: + logger.error("Invalid walk-forward parameters in config (days must be positive integers, offset non-negative). Exiting.") + raise SystemExit("Invalid walk_forward configuration.") + + train_delta = pd.Timedelta(days=train_days) + val_delta = pd.Timedelta(days=val_days) + test_delta = pd.Timedelta(days=test_days) + step_delta = pd.Timedelta(days=step_days) + offset_delta = pd.Timedelta(days=initial_offset_days) + + full_start_date = self.df_raw.index.min() + offset_delta + full_end_date = self.df_raw.index.max() + + # Calculate the end date of the first training period + first_train_end = full_start_date + train_delta + + current_val_start = first_train_end + fold_count = 0 + + logger.info("Generating Walk-Forward Folds:") + logger.info(f" Train={train_days}d, Val={val_days}d, Test={test_days}d, Step={step_days}d, Offset={initial_offset_days}d") + logger.info(f" Full Data Range Available: [{self.df_raw.index.min()}, {self.df_raw.index.max()}]" ) + logger.info(f" Starting after offset: {full_start_date}") + + while True: + # Define fold boundaries + train_start = current_val_start - train_delta + train_end = current_val_start # Train ends right before validation starts + val_start = current_val_start + val_end = val_start + val_delta + test_start = val_end + test_end = test_start + test_delta + + # Ensure we don't exceed the available data + if test_end > full_end_date: + # Adjust the last test period if it overshoots + test_end = full_end_date + # Optional: Check if the remaining test period is too short + min_test_period = pd.Timedelta(days=min(1, test_days // 2)) # Example minimum + if test_end - test_start < min_test_period: + logger.info(f"Remaining test period too short ({test_end - test_start}). Stopping fold generation.") + break # Stop if the last test period is too short + + # Check if validation period still has data + if val_start >= full_end_date: + logger.info("Validation start date reached end of data. Stopping fold generation.") + break + + fold_count += 1 + logger.info(f" Fold {fold_count}: Train=[{train_start}, {train_end}), Val=[{val_start}, {val_end}), Test=[{test_start}, {test_end}]") + yield train_start, train_end, val_start, val_end, test_start, test_end + + # Move to the next validation start + current_val_start += step_delta + + # Break condition if only training/validation is needed (test_days=0) + if test_days == 0 and val_end >= full_end_date: + logger.info("Reached end of data for training/validation folds (test_days=0).") + break + + if fold_count == 0: + logger.error("No valid walk-forward folds generated. Check data range and walk_forward config parameters.") + raise SystemExit("Failed to generate any walk-forward folds.") + + # --- End Walk-Forward Fold Generation --- # # --- Entry Point --- #