# GRU-SAC Predictor v3 Configuration File
# This file parameterizes all major components of the pipeline.

pipeline:
  description: "Configuration for the GRU-SAC trading predictor pipeline."
  # Define stages to run, primarily for debugging/selective execution.
  # stages_to_run: ["data", "features", "gru", "sac", "backtest", "aggregate"] # Example: Run all

# --- Data Loading and Initial Processing ---
data:
  ticker: "BTC-USDT" # Ticker symbol (adjust based on DataLoader capabilities)
  exchange: "bnbspot" # Exchange name (adjust based on DataLoader)
  interval: "1min" # Data interval (e.g., '1min', '5min', '1h')
  start_date: "2024-09-19" # Start date for data loading (YYYY-MM-DD)
  end_date: "2024-11-03" # End date for data loading (YYYY-MM-DD)
  db_dir: '../data/crypto_market_data' # to database directory (relative to project root)
  bar_frequency: "1T" # Added based on instructions
  missing:              # Added missing data section
    strategy:      "neutral"      # drop | neutral | ffill | interpolate
    max_gap:       60              # max consecutive missing bars allowed
    interpolate:
      method:      "linear"
      limit:       10

  volatility_sampling: # Optional volatility-based downsampling
    enabled: False
    window: 30 # Window for volatility calculation (e.g., 30 minutes)
    quantile: 0.5 # Quantile threshold for sampling (0.0 to 1.0)

  # Optional: Add parameters for data cleaning if needed
  # e.g., max_nan_fill_gap: 5

# --- Feature Engineering ---
features:
  # Parameters for FeatureEngineer.add_base_features
  atr_window: 14
  rsi_window: 14
  adx_window: 14
  macd_fast: 12
  macd_slow: 26
  macd_signal: 9
  # Add parameters for other indicators (e.g., Chaikin, SVI, Volatility) if configurable
  # chaikin_ad_window: 10
  # svi_window: 10
  # volatility_window: 14 # e.g., for a rolling std dev feature

  # Parameters for feature selection (used by FeatureEngineer.select_features)
  # These might include method (e.g., 'correlation', 'mutual_info', 'lgbm'), thresholds, etc.
  selection_method: "correlation" # Example
  correlation_threshold: 0.02 # Example threshold for correlation-based selection
  min_features_after_selection: 10 # Minimum number of features to keep

# --- Data Splitting (Walk-Forward or Single Split) ---
walk_forward:
  enabled: True # Set to False for a single train/val/test split based on ratios below
  # Ratios are used if enabled=False OR for preparing data for the SAC environment (which uses val split)
  split_ratios:
    train: 0.6
    validation: 0.2
    # test ratio is inferred (1.0 - train - validation)

  # Settings used only if enabled=True
  num_folds: 5 # Number of walk-forward folds. If <= 1, uses rolling window mode.
  # --- Rolling Window Specific Settings (used if num_folds <= 1) ---
  train_period_days: 25 # Length of the training period per fold
  validation_period_days: 10 # Length of the validation period per fold
  test_period_days: 10 # Length of the test period per fold
  step_days: 7 # How many days to slide the window forward for the next fold (Recommendation: >= test_period_days / 2)
  expanding_window: false # If true, train period grows; otherwise, it slides (Rolling Window only)

  # --- General Walk-Forward Settings ---
  initial_offset_days: 14 # Optional: Skip days at the beginning before the first fold starts
  purge_window_minutes: 0 # Optional: Drop training samples overlapping with val/test lookback (in minutes)
  embargo_minutes: 0 # Optional: Skip minutes after train/val period ends before starting next period (in minutes)
  final_holdout_period_days: 0 # Optional: Reserve days at the very end, excluded from all folds
  min_fold_duration_days: 1 # Optional: Minimum total duration (days) required for a generated fold (train_start to test_end)
  # --- Gap and Regime Settings ---
  gap_threshold_minutes: 5 # Split data into chunks if gap > this threshold
  min_chunk_days: 1 # Minimum duration (days) for a chunk to be considered for fold generation
  regime:
    enabled: true
    indicator: volatility # e.g., 'volatility', 'trend_strength', 'rsi'
    indicator_params:
      window: 20 # Parameter for the chosen indicator (e.g., rolling window size)
    quantiles: [0.33, 0.66] # Quantiles to define regime boundaries (e.g., [0.33, 0.66] for 3 regimes)
    min_regime_representation_pct: 10 # Minimum % each regime must occupy in train/val/test periods

  # --- Drift-Triggered Retraining (Informational - Requires external implementation) ---
  # drift:
  #   enable: false
  #   feature_list: ["close", "volume", "rsi"] # Example features to monitor
  #   p_threshold: 0.01 # Example drift detection threshold (e.g., for KS test p-value)

# --- GRU Model Configuration ---
gru:
  # Label Definition
  train_gru: true
  use_ternary: True # Use ternary (Up/Flat/Down) labels? If False, uses binary (Up/Down).
  prediction_horizon: 5 # Lookahead period for target returns/labels (in units of 'data.interval')
  flat_sigma_multiplier: 0.25 # 'k' factor for ternary flat label threshold (eps = k * rolling_std(fwd_ret))
  label_smoothing: 0.0 # Alpha for binary label smoothing (0.0 disables)
  drop_imputed_sequences: true    # Added based on instructions

  # Model Architecture (V3) - Used by GRUModelHandler.build_gru_model_v3
  gru_units: 96 # Number of units in GRU layer
  attention_units: 16 # Number of units in MultiHeadAttention layer (set to 0 to disable)
  dropout_rate: 0.1 # Dropout rate for GRU and Attention layers
  learning_rate: 1e-4 # Learning rate for Adam optimizer
  l2_reg: 1e-4 # L2 regularization factor for Dense layers

  # Loss Function Parameters (V3) - Used by GRUModelHandler.build_gru_model_v3
  focal_gamma: 2.0 # Gamma parameter for categorical focal loss (if use_ternary=True)
  focal_label_smoothing: 0.1 # Label smoothing within focal loss calculation
  huber_delta: 1.0 # Delta parameter for Huber loss (mu/return prediction)
  loss_weight_mu: 0.3 # Weight for the mu/return prediction loss component
  loss_weight_dir3: 1.0 # Weight for the direction prediction loss component

  # Training Parameters - Used by GRUModelHandler.train
  lookback: 60 # Sequence length (timesteps) for GRU input
  epochs: 25 # Maximum number of training epochs
  batch_size: 128 # Training batch size
  patience: 5 # Early stopping patience (epochs with no improvement in val_loss)
  # early_stopping_monitor: "val_loss" # Monitor for early stopping (hardcoded in handler)
  # training_shuffle: False # Whether to shuffle training data each epoch (hardcoded False)

  # Loading Control - Used by pipeline_stages.modelling.train_or_load_gru_fold
  load_gru_model:
    run_id: null # Set to a specific GRU pipeline run ID to load model/scaler from instead of training
    fold_num: null # Optional: Specify fold number (e.g., 1, 2...). If null, handler might load best/last fold based on its internal logic.

# --- Hyperparameter Tuning (Optuna/W&B) ---
hyperparameter_tuning:
  gru:
    sweep_enabled: False # Master switch to enable Optuna sweep for GRU
    # If enabled=True, define sweep parameters here:
    study_name: "gru_optimization"
    direction: "minimize" # "minimize" val_loss or "maximize" val_accuracy
    n_trials: 50
    inner_cv_splits: 3 # Number of inner folds for nested cross-validation
    pruner: "median" # e.g., "median", "hyperband"
    sampler: "tpe" # e.g., "tpe", "random"
    search_space:
      gru_units: { type: "int", low: 32, high: 128, step: 16 }
      attention_units: { type: "int", low: 8, high: 64, step: 8 }
      dropout_rate: { type: "float", low: 0.05, high: 0.3 }
      learning_rate: { type: "loguniform", low: 1e-5, high: 1e-3 }
      l2_reg: { type: "loguniform", low: 1e-5, high: 1e-3 }
      loss_weight_mu: { type: "float", low: 0.1, high: 0.9 }
      batch_size: { type: "categorical", choices: [64, 128, 256] }

# --- Probability Calibration ---
calibration:
  method: vector
  optimize_edge_threshold: true
  edge_threshold: 0.5 # Initial or fixed threshold if not optimizing
  # Rolling calibration settings (if method requires)
  rolling_window_size: 250
  rolling_min_samples: 50
  rolling_step: 50
  reliability_plot_bins: 10 # Number of bins for reliability plot

# --- Soft Actor-Critic (SAC) Agent and Training ---
sac:
  imputed_handling: "hold"        # Added based on instructions
  action_penalty: 0.05            # Added based on instructions
  # Agent Hyperparameters - Used by SACTradingAgent.__init__
  gamma: 0.99 # Discount factor
  tau: 0.005 # Target network update rate (polyak averaging)
  actor_lr: 3e-4 # Learning rate for the actor network
  critic_lr: 3e-4 # Learning rate for the critic networks
  # Optional: LR Decay for actor/critic (if implemented in agent)
  lr_decay_rate: 0.96
  decay_steps: 100000
  # Optional: Ornstein-Uhlenbeck noise parameters (if used)
  ou_noise_stddev: 0.2
  alpha: 0.2 # Initial entropy temperature (used if alpha_auto_tune=False)
  alpha_auto_tune: True # Enable automatic tuning of entropy temperature alpha
  target_entropy: -1.0 # Target entropy for alpha tuning; -action_dim is common default (-1.0 for action_dim=1)

  # Training Loop Parameters - Used by SACTrainer._training_loop
  total_training_steps: 100000 # Total steps for the SAC training loop
  buffer_capacity: 1000000 # Maximum size of the replay buffer
  batch_size: 256 # Batch size for sampling from replay buffer
  start_steps: 10000 # Number of initial steps with random actions before training starts
  update_after: 1000 # Number of steps to collect before first agent update
  update_every: 50 # Perform agent updates every N steps
  save_freq: 5000 # Save agent checkpoint every N steps
  log_freq: 100 # Log training metrics (losses, Q-values) to TensorBoard every N steps
  eval_freq: 5000 # Evaluate agent performance every N steps (requires evaluation logic)

  # Alpha (Entropy Temperature) Annealing - Used by SACTrainer._training_loop
  alpha_anneal_start_step: 10000 # Step to start annealing alpha (if auto-tune enabled)
  alpha_anneal_end_step: 50000 # Step to finish annealing alpha
  initial_alpha: 0.2 # Alpha value before annealing starts
  final_alpha: 0.01 # Target alpha value after annealing finishes

  # Prioritized Experience Replay (PER) - Used by SACTrainer / PrioritizedReplayBuffer
  use_per: False # Enable PER? If False, uses standard uniform replay buffer.
  # PER parameters (used only if use_per=True)
  per_alpha: 0.6 # Priority exponent (how much prioritization). 0 = uniform.
  per_beta_start: 0.4 # Initial importance sampling exponent (annealed to 1.0)
  per_beta_frames: 100000 # Steps over which to anneal beta from beta_start to 1.0
  # Optional PER Alpha annealing (anneals the priority exponent alpha)
  per_alpha_anneal_enabled: False
  per_alpha_start: 0.6
  per_alpha_end: 0.4
  per_alpha_anneal_steps: 50000

  # Oracle Seeding (Potentially deprecated/experimental)
  oracle_seeding_pct: 0.0 # Percentage of buffer to pre-fill using heuristic policy

  # State Normalization - Used by SACTrainer
  use_state_filter: True # Use MeanStdFilter for state normalization?
  state_dim_fallback: 5 # Fallback state dim if cannot be inferred (e.g., from loaded agent metadata)
  action_dim_fallback: 1 # Fallback action dim if cannot be inferred

  # Loading Control - Used by pipeline_stages.modelling.train_or_load_sac_fold
  train_sac: True # Master switch: Train SAC agent? If False, attempts to load based on control flags.

# --- SAC Agent Aggregation (Post Walk-Forward) ---
sac_aggregation:
  enabled: True # Aggregate agents from multiple folds?
  method: "average_weights" # Currently only 'average_weights' is supported

# --- Trading Environment Simulation ---
environment: # Parameters passed to TradingEnv and Backtester
  initial_capital: 10000.0 # Starting capital for simulation/backtest
  transaction_cost: 0.0005 # Fractional cost per trade (e.g., 0.0005 = 0.05%)
  # Reward shaping parameters (used within TradingEnv._calculate_reward)
  reward_scale: 100.0 # Multiplier applied to the raw PnL reward
  action_penalty_lambda: 0.0 # Penalty factor for action magnitude or changes (0 disables)
  # Add other env parameters if needed (e.g., position limits, reward clipping)

# --- Baseline Models ---
baselines: # Configuration for BaselineChecker
  run_baseline1: True # Run Logistic Regression baseline? (Requires binary labels)
  run_baseline2: False # Run placeholder/second baseline?
  # Parameters for Logistic Regression (Baseline 1)
  logistic_regression:
    max_iter: 1000
    solver: "lbfgs"
    random_state: 42
    val_subset_split_ratio: 0.2 # Internal split ratio used within baseline check
    val_subset_shuffle: False # Shuffle for internal split?
    ci_confidence_level: 0.95 # Confidence level for binomial test CI

  # Parameters for RandomForestClassifier (Baseline 2)
  random_forest:
    n_estimators: 100 # Number of trees
    max_depth: 10      # Maximum depth of trees (None for unlimited)
    min_samples_split: 2 # Minimum samples required to split an internal node
    min_samples_leaf: 1  # Minimum number of samples required to be at a leaf node
    random_state: 42
    n_jobs: -1         # Use all available CPU cores
    # Use the same internal split and CI settings as LogReg for comparison
    val_subset_split_ratio: 0.2 
    val_subset_shuffle: False
    ci_confidence_level: 0.95

  # --- Ternary Baselines (run only if gru.use_ternary=True) --- #
  run_baseline3: True # Run Multinomial Logistic Regression?
  run_baseline4: False # Run Ternary Random Forest?

  # Parameters for Multinomial Logistic Regression (Baseline 3)
  multinomial_logistic_regression:
    max_iter: 1000
    solver: "lbfgs"
    multi_class: "multinomial" # Explicitly set for clarity
    random_state: 42
    # Use same internal split/CI settings
    val_subset_split_ratio: 0.2 
    val_subset_shuffle: False
    ci_confidence_level: 0.95

  # Parameters for Ternary RandomForestClassifier (Baseline 4)
  ternary_random_forest:
    n_estimators: 100
    max_depth: 10
    min_samples_split: 2
    min_samples_leaf: 1
    random_state: 42
    n_jobs: -1
    # Use same internal split/CI settings
    val_subset_split_ratio: 0.2 
    val_subset_shuffle: False
    ci_confidence_level: 0.95

# --- Pipeline Validation Gates ---
validation_gates: # Thresholds checked at different stages to potentially halt the pipeline
  # Binary Baseline Gates (used if gru.use_ternary=False)
  run_baseline_check: True # Master switch for running *any* applicable baseline check
  baseline1_min_ci_lb: 0.52 # Binary LR (Raw) CI LB threshold (internal split)
  baseline2_min_ci_lb: 0.54 # Binary RF (Raw) CI LB threshold (internal split)
  baseline1_edge_min_ci_lb: 0.60 # Binary LR (Edge-Filtered) CI LB threshold (validation set)
  baseline2_edge_min_ci_lb: 0.62 # Binary RF (Edge-Filtered) CI LB threshold (validation set)

  # Ternary Baseline Gates (used if gru.use_ternary=True)
  baseline3_min_ci_lb: 0.40 # Ternary LR (Raw) CI LB threshold (internal split, vs 1/3 chance)
  baseline4_min_ci_lb: 0.42 # Ternary RF (Raw) CI LB threshold (internal split, vs 1/3 chance)
  baseline3_edge_min_ci_lb: 0.57 # Ternary LR (Edge-Filtered) CI LB threshold (validation set)
  baseline4_edge_min_ci_lb: 0.58 # Ternary RF (Edge-Filtered) CI LB threshold (validation set)

  gru_performance: # Checks on GRU validation predictions (after calibration)
    enabled: True
    min_edge_accuracy: 0.60 # Minimum accuracy using the optimized/configured edge threshold
    max_brier_score: 0.24 # Maximum acceptable Brier score

  backtest: # Master switch for all backtest performance gates
    enabled: True
  backtest_performance: # Specific performance checks on the backtest results
    enabled: True # Enable/disable Sharpe and Max DD checks specifically
    min_sharpe_ratio: 1.2 # Minimum acceptable annualized Sharpe ratio
    max_drawdown_pct: 15.0 # Maximum acceptable drawdown percentage (positive value)

# --- Pipeline Control Flags ---
control:
  generate_plots: True # Generate and save plots (learning curves, backtest summary, etc.)?

  # Loading specific models instead of training/running stages
  # Note: train_gru and train_sac flags override these if both are set
  # GRU Loading: see gru.load_gru_model section
  # SAC Loading: Used if sac.train_sac=False
  sac_load_run_id: null # Specify SAC Training Run ID (e.g., "sac_train_...") to load for backtesting
  sac_load_step: 'final' # 'final' or specific step number checkpoint to load

  # Resuming SAC Training (Loads agent and potentially buffer state to continue training)
  sac_resume_run_id: null # Specify SAC Training Run ID to resume from
  sac_resume_step: 'final' # 'final' or step number checkpoint to resume from

# --- Logging Configuration ---
logging:
  console_level: "INFO" # Level for console output: DEBUG, INFO, WARNING, ERROR, CRITICAL
  file_level: "DEBUG" # Level for file output: DEBUG, INFO, WARNING, ERROR, CRITICAL
  log_to_file: True # Enable logging to a file?
  # Log file path determined by IOManager: logs/<run_id>/pipeline.log
  log_format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s' # Format string
  log_date_format: '%Y-%m-%d %H:%M:%S' # Date format for logs
  # Rotating File Handler settings (if log_to_file=True)
  log_file_max_bytes: 10485760 # Max size in bytes (e.g., 10MB) before rotation
  log_file_backup_count: 5 # Number of backup log files to keep

# --- Output Artifacts Configuration ---
output:
  base_dirs: # Base directories (relative to project root or absolute)
    results: "results"
    models: "models"
    logs: "logs"
  # Figure generation settings
  figure_dpi: 150 # DPI for saved figures
  figure_size: [16, 9] # Default figure size (width, height in inches)
  figure_footer: "© GRU-SAC v3" # Footer text added to plots
  plot_style: "seaborn-v0_8-darkgrid" # Matplotlib style sheet to use
  # Plot-specific settings
  reward_plot_smoothing_alpha: 0.2 # EMA alpha for SAC reward plot smoothing
  # reliability_plot_bins: 10 # Defined under calibration section

  # IOManager settings
  dataframe_save_format: "parquet_if_large" # "csv", "parquet", "parquet_if_large"
  dataframe_max_csv_mb: 100 # Threshold (MB) for using Parquet if format is parquet_if_large

# ... existing code ...