fixed issues with SAC scaling inputs

2025-04-16 18:23:11 -04:00 · 2025-04-16 18:23:11 -04:00 · 843dde88f5
commit 843dde88f5
parent 984a230bcd
18 changed files with 279 additions and 34 deletions
--- a/gru_sac_predictor/pycache/main.cpython-312.pyc
+++ b/gru_sac_predictor/pycache/main.cpython-312.pyc
--- a/gru_sac_predictor/logs/20250416_170503/main_20250416_170503.log
+++ b/gru_sac_predictor/logs/20250416_170503/main_20250416_170503.log
--- a/gru_sac_predictor/logs/20250416_182038/main_20250416_182038.log
+++ b/gru_sac_predictor/logs/20250416_182038/main_20250416_182038.log
--- a/gru_sac_predictor/main.py
+++ b/gru_sac_predictor/main.py
@ -85,15 +85,15 @@ SAC_HIDDEN_SIZE = 64
 SAC_GAMMA = 0.97
 SAC_TAU = 0.02
 # SAC_ALPHA = 0.1 # Removed - Will use automatic tuning
-SAC_ACTOR_LR = 3e-4 # Lowered from 5e-4
-SAC_CRITIC_LR = 5e-4 # Lowered from 8e-4
+SAC_ACTOR_LR = 1.5e-5 # Halved from 3e-4 -> 10x lower again
+SAC_CRITIC_LR = 2.5e-5 # Halved from 5e-4 -> 10x lower again
 SAC_BATCH_SIZE = 64
 SAC_BUFFER_MAX_SIZE = 20000
 SAC_MIN_BUFFER_SIZE = 1000
 SAC_UPDATE_INTERVAL = 1
 SAC_TARGET_UPDATE_INTERVAL = 2
 SAC_GRADIENT_CLIP = 1.0
-SAC_REWARD_SCALE = 2.0 # Decreased from 10.0
+SAC_REWARD_SCALE = 1.0 # Decreased from 10.0 -> 2.0 -> 1.0
 SAC_USE_BATCH_NORM = True
 SAC_USE_RESIDUAL = True
 SAC_MODEL_DIR = 'models/simplified_sac' # Default dir within the agent class
--- a/gru_sac_predictor/models/run_20250416_170503/sac_agent/actor.weights.h5
+++ b/gru_sac_predictor/models/run_20250416_170503/sac_agent/actor.weights.h5
--- a/gru_sac_predictor/models/run_20250416_170503/sac_agent/alpha.npy
+++ b/gru_sac_predictor/models/run_20250416_170503/sac_agent/alpha.npy
--- a/gru_sac_predictor/models/run_20250416_170503/sac_agent/critic_1.weights.h5
+++ b/gru_sac_predictor/models/run_20250416_170503/sac_agent/critic_1.weights.h5
--- a/gru_sac_predictor/models/run_20250416_170503/sac_agent/critic_2.weights.h5
+++ b/gru_sac_predictor/models/run_20250416_170503/sac_agent/critic_2.weights.h5
--- a/gru_sac_predictor/models/run_20250416_170503/sac_agent/target_critic_1.weights.h5
+++ b/gru_sac_predictor/models/run_20250416_170503/sac_agent/target_critic_1.weights.h5
--- a/gru_sac_predictor/models/run_20250416_170503/sac_agent/target_critic_2.weights.h5
+++ b/gru_sac_predictor/models/run_20250416_170503/sac_agent/target_critic_2.weights.h5
--- a/gru_sac_predictor/results/20250416_170503/backtest_performance_report_20250416_170503.md
+++ b/gru_sac_predictor/results/20250416_170503/backtest_performance_report_20250416_170503.md
@ -0,0 +1,42 @@
+# GRU+SAC Backtesting Performance Report
+
+Report generated on: 2025-04-16 17:09:06.155365
+Data range: 2025-03-06 15:23:00+00:00 to 2025-03-07 23:57:00+00:00
+Total duration: 1 days 08:34:00
+
+## Strategy Performance Metrics
+
+* **Initial capital:** $10,000.00
+* **Final portfolio value:** $10,417.44
+* **Total return:** 4.17%
+* **Annualized return:** 6338986.83%
+* **Sharpe ratio (annualized):** 10.5985
+* **Sortino ratio (annualized):** 16.0926
+* **Volatility (annualized):** 110.03%
+* **Maximum drawdown:** 6.76%
+* **Total trades:** 1
+
+## Buy and Hold Benchmark
+
+* **Final value (B&H):** $9,658.75
+* **Total return (B&H):** -3.41%
+
+## Position & Prediction Analysis
+
+* **Average absolute position size:** 1.0000
+* **Position sign accuracy vs return:** 50.93%
+* **Prediction sign accuracy vs return:** 48.92%
+* **Prediction RMSE (on returns):** 0.004036
+
+## Correlations
+
+* **Prediction-Return correlation:** -0.0042
+* **Prediction-Position correlation:** 0.0000
+* **Uncertainty-Position Size correlation:** 0.0000
+
+## Notes
+
+* Transaction cost used: 0.0500% per position change value.
+* GRU lookback period: 60 minutes.
+* V6 features + return features used.
+* Uncertainty estimated via MC Dropout standard deviation.
--- a/gru_sac_predictor/results/20250416_170503/backtest_results_20250416_170503.png
+++ b/gru_sac_predictor/results/20250416_170503/backtest_results_20250416_170503.png
--- a/gru_sac_predictor/results/20250416_170503/config_20250416_170503.json
+++ b/gru_sac_predictor/results/20250416_170503/config_20250416_170503.json
@ -0,0 +1,66 @@
+{
+    "run_id": "20250416_170503",
+    "db_dir": "../downloaded_data",
+    "ticker": "BTC-USD",
+    "exchange": "COINBASE",
+    "start_date": "2025-03-01",
+    "end_date": "2025-03-10",
+    "interval": "1min",
+    "model_save_path": "gru_sac_predictor/models/run_20250416_170503",
+    "results_plot_path": "gru_sac_predictor/results/20250416_170503/backtest_results_20250416_170503.png",
+    "report_save_path": "gru_sac_predictor/results/20250416_170503/backtest_performance_report_20250416_170503.md",
+    "train_ratio": 0.6,
+    "validation_ratio": 0.2,
+    "gru_lookback": 60,
+    "gru_prediction_horizon": 1,
+    "gru_epochs": 20,
+    "gru_batch_size": 32,
+    "gru_patience": 10,
+    "gru_lr_factor": 0.5,
+    "gru_return_scale": 0.03,
+    "gru_model_load_run_id": "20250416_142744",
+    "sac_state_dim": 5,
+    "sac_hidden_size": 64,
+    "sac_gamma": 0.97,
+    "sac_tau": 0.02,
+    "sac_actor_lr": 0.00015,
+    "sac_critic_lr": 0.00025,
+    "sac_batch_size": 64,
+    "sac_buffer_max_size": 20000,
+    "sac_min_buffer_size": 1000,
+    "sac_update_interval": 1,
+    "sac_target_update_interval": 2,
+    "sac_gradient_clip": 1.0,
+    "sac_reward_scale": 2.0,
+    "sac_use_batch_norm": true,
+    "sac_use_residual": true,
+    "sac_model_dir": "models/simplified_sac",
+    "sac_epochs": 50,
+    "experience_config": {
+        "initial_experiences": 3000,
+        "experiences_per_batch": 64,
+        "batch_generation_interval": 500,
+        "balance_market_regimes": false,
+        "recency_bias_strength": 0.5,
+        "high_uncertainty_quantile": 0.75,
+        "extreme_return_quantile": 0.1,
+        "min_uncertainty_ratio": 0.2,
+        "min_extreme_return_ratio": 0.1,
+        "use_parallel_generation": false,
+        "precompute_all_gru_outputs": true,
+        "buffer_update_strategy": "fifo",
+        "training_iterations_per_step": 1
+    },
+    "initial_capital": 10000.0,
+    "transaction_cost": 0.0005,
+    "opportunity_cost_penalty_factor": 0.0,
+    "high_return_threshold": 0.002,
+    "action_tolerance": 0.3,
+    "load_existing_system": true,
+    "train_gru_model": false,
+    "train_sac_agent": true,
+    "load_sac_agent": false,
+    "run_backtest": true,
+    "generate_plots": true,
+    "generate_report": true
+}
--- a/gru_sac_predictor/results/20250416_170503/sac_training_history_20250416_170503.png
+++ b/gru_sac_predictor/results/20250416_170503/sac_training_history_20250416_170503.png
--- a/gru_sac_predictor/results/20250416_182038/config_20250416_182038.json
+++ b/gru_sac_predictor/results/20250416_182038/config_20250416_182038.json
@ -0,0 +1,66 @@
+{
+    "run_id": "20250416_182038",
+    "db_dir": "../downloaded_data",
+    "ticker": "BTC-USD",
+    "exchange": "COINBASE",
+    "start_date": "2025-03-01",
+    "end_date": "2025-03-10",
+    "interval": "1min",
+    "model_save_path": "gru_sac_predictor/models/run_20250416_182038",
+    "results_plot_path": "gru_sac_predictor/results/20250416_182038/backtest_results_20250416_182038.png",
+    "report_save_path": "gru_sac_predictor/results/20250416_182038/backtest_performance_report_20250416_182038.md",
+    "train_ratio": 0.6,
+    "validation_ratio": 0.2,
+    "gru_lookback": 60,
+    "gru_prediction_horizon": 1,
+    "gru_epochs": 20,
+    "gru_batch_size": 32,
+    "gru_patience": 10,
+    "gru_lr_factor": 0.5,
+    "gru_return_scale": 0.03,
+    "gru_model_load_run_id": "20250416_142744",
+    "sac_state_dim": 5,
+    "sac_hidden_size": 64,
+    "sac_gamma": 0.97,
+    "sac_tau": 0.02,
+    "sac_actor_lr": 1.5e-05,
+    "sac_critic_lr": 2.5e-05,
+    "sac_batch_size": 64,
+    "sac_buffer_max_size": 20000,
+    "sac_min_buffer_size": 1000,
+    "sac_update_interval": 1,
+    "sac_target_update_interval": 2,
+    "sac_gradient_clip": 1.0,
+    "sac_reward_scale": 1.0,
+    "sac_use_batch_norm": true,
+    "sac_use_residual": true,
+    "sac_model_dir": "models/simplified_sac",
+    "sac_epochs": 50,
+    "experience_config": {
+        "initial_experiences": 3000,
+        "experiences_per_batch": 64,
+        "batch_generation_interval": 500,
+        "balance_market_regimes": false,
+        "recency_bias_strength": 0.5,
+        "high_uncertainty_quantile": 0.75,
+        "extreme_return_quantile": 0.1,
+        "min_uncertainty_ratio": 0.2,
+        "min_extreme_return_ratio": 0.1,
+        "use_parallel_generation": false,
+        "precompute_all_gru_outputs": true,
+        "buffer_update_strategy": "fifo",
+        "training_iterations_per_step": 1
+    },
+    "initial_capital": 10000.0,
+    "transaction_cost": 0.0005,
+    "opportunity_cost_penalty_factor": 0.0,
+    "high_return_threshold": 0.002,
+    "action_tolerance": 0.3,
+    "load_existing_system": true,
+    "train_gru_model": false,
+    "train_sac_agent": true,
+    "load_sac_agent": false,
+    "run_backtest": true,
+    "generate_plots": true,
+    "generate_report": true
+}
--- a/gru_sac_predictor/src/pycache/sac_agent_simplified.cpython-312.pyc
+++ b/gru_sac_predictor/src/pycache/sac_agent_simplified.cpython-312.pyc
--- a/gru_sac_predictor/src/pycache/trading_system.cpython-312.pyc
+++ b/gru_sac_predictor/src/pycache/trading_system.cpython-312.pyc
--- a/gru_sac_predictor/src/trading_system.py
+++ b/gru_sac_predictor/src/trading_system.py
@ -368,11 +368,52 @@ class TradingSystem:
        self.y_scaler = None
        self.last_prediction = None
        self.experiences = [] # Store (state, action, reward, next_state, done)
+        # V7.23: Add storage for scaled feature indices
+        self._momentum_feature_idx = None
+        self._volatility_feature_idx = None
        # Initialize scalers from GRU model if available and loaded
        if self.gru_model and self.gru_model.is_loaded:
            self.feature_scaler = self.gru_model.feature_scaler
            self.y_scaler = self.gru_model.y_scaler
            self._logger.info("Scalers initialized from pre-loaded GRU model.")
+            # V7.23: Attempt to set indices if scaler loaded
+            self._set_feature_indices()
+
+    # V7.23: Helper method to find and store feature indices
+    def _set_feature_indices(self):
+        """Finds and stores the column indices for momentum and volatility features."""
+        if self.feature_scaler is None:
+            self._logger.warning("Feature scaler not available. Cannot set feature indices.")
+            self._momentum_feature_idx = None
+            self._volatility_feature_idx = None
+            return False
+
+        if not hasattr(self.feature_scaler, 'feature_names_in_'):
+            # Fallback for older sklearn versions or different scaler types
+            if hasattr(self.feature_scaler, 'n_features_in_'):
+                 self._logger.warning("Feature scaler lacks 'feature_names_in_'. Cannot reliably find named features. State scaling may fail.")
+                 # Cannot set indices reliably without names
+                 self._momentum_feature_idx = None
+                 self._volatility_feature_idx = None
+                 return False
+            else:
+                 self._logger.error("Feature scaler lacks both 'feature_names_in_' and 'n_features_in_'. Cannot determine feature indices.")
+                 self._momentum_feature_idx = None
+                 self._volatility_feature_idx = None
+                 return False
+
+        feature_columns = list(self.feature_scaler.feature_names_in_)
+        try:
+            # Use the correct feature names: 'return_5m' and 'volatility_14d'
+            self._momentum_feature_idx = feature_columns.index('return_5m')
+            self._volatility_feature_idx = feature_columns.index('volatility_14d')
+            self._logger.info(f"Successfully set feature indices: momentum_5m={self._momentum_feature_idx}, volatility_14d={self._volatility_feature_idx}")
+            return True
+        except ValueError as e:
+            self._logger.error(f"Could not find 'return_5m' or 'volatility_14d' in scaler's feature columns: {feature_columns}. Error: {e}. State construction will likely fail.")
+            self._momentum_feature_idx = None
+            self._volatility_feature_idx = None
+            return False

    # V7-V6 Update: Extract features and predict return/uncertainty for SAC state
    def _extract_features_and_predict(self, data_df_full, current_idx):
@ -706,13 +747,17 @@ class TradingSystem:
        
        for i in tqdm(range(num_sequences - 1), desc="Generating Experiences"): # Iterate up to second-to-last sequence result
            # --- V7.15 START: Construct 5D state s_t ---
+            # V7.23 Get SCALED momentum/volatility for state t
            pred_return_t = all_pred_returns[i]
            uncertainty_t = all_uncertainties[i]
-            momentum_5_t = all_momentum_5[i]
-            volatility_20_t = all_volatility_20[i]
+            momentum_5_t_scaled = all_momentum_5[i] # Use scaled value
+            volatility_20_t_scaled = all_volatility_20[i] # Use scaled value
+            
            # Calculate z_proxy using position *before* action (current_position)
-            z_proxy_t = current_position * volatility_20_t 
-            state = np.array([pred_return_t, uncertainty_t, z_proxy_t, momentum_5_t, volatility_20_t], dtype=np.float32)
+            # V7.23: Use SCALED volatility for consistency within state.
+            z_proxy_t = current_position * volatility_20_t_scaled 
+            state = np.array([pred_return_t, uncertainty_t, z_proxy_t, momentum_5_t_scaled, volatility_20_t_scaled], dtype=np.float32)
+            
            # Handle potential NaNs/Infs
            if np.any(np.isnan(state)) or np.any(np.isinf(state)):
                 logging.warning(f"NaN/Inf in state at step {i}. Replacing with 0. State: {state}")
@ -731,13 +776,17 @@ class TradingSystem:
                 continue # Skip this experience if action format is wrong
            
            # --- V7.15 START: Construct 5D next_state s_{t+1} ---
+            # V7.23 Get SCALED momentum/volatility for state t+1
            pred_return_t1 = all_pred_returns[i+1]
            uncertainty_t1 = all_uncertainties[i+1]
-            momentum_5_t1 = all_momentum_5[i+1]
-            volatility_20_t1 = all_volatility_20[i+1]
+            momentum_5_t1_scaled = all_momentum_5[i+1] # Use scaled value
+            volatility_20_t1_scaled = all_volatility_20[i+1] # Use scaled value
+            
            # Calculate z_proxy using the *action* taken (action is position for next step)
-            z_proxy_t1 = action * volatility_20_t1 # Use action, not current_position
-            next_state = np.array([pred_return_t1, uncertainty_t1, z_proxy_t1, momentum_5_t1, volatility_20_t1], dtype=np.float32)
+            # V7.23: Use SCALED volatility for consistency
+            z_proxy_t1 = action * volatility_20_t1_scaled # Use action, not current_position
+            next_state = np.array([pred_return_t1, uncertainty_t1, z_proxy_t1, momentum_5_t1_scaled, volatility_20_t1_scaled], dtype=np.float32)
+            
            # Handle potential NaNs/Infs
            if np.any(np.isnan(next_state)) or np.any(np.isinf(next_state)):
                 logging.warning(f"NaN/Inf in next_state at step {i}. Replacing with 0. State: {next_state}")
@ -926,18 +975,36 @@ class TradingSystem:
            if os.path.isdir(gru_path):
                # V7-V6 Update: Instantiate and load CryptoGRUModel
                self.gru_model = CryptoGRUModel()
-                if not self.gru_model.load(gru_path): print("Warning: Failed to load GRU model/scalers.")
-                # Store loaded scalers in TradingSystem as well for convenience?
+                if self.gru_model.load(gru_path):
                    self.feature_scaler = self.gru_model.feature_scaler
                    self.y_scaler = self.gru_model.y_scaler
+                    # V7.23: Set feature indices after loading scaler
+                    if self._set_feature_indices():
+                         gru_ok = True
+                    else:
+                         print("Warning: Failed to set feature indices after loading GRU scaler.")
+                         gru_ok = False # Mark as failed if indices can't be set
+                else:
+                     print("Warning: Failed to load GRU model/scalers.")
            else: print(f"Warning: GRU model directory not found: {gru_path}")
            if os.path.isdir(sac_path):
-                 # State dim is 2: [pred_return, confidence]
-                 self.sac_agent = SimplifiedSACTradingAgent(state_dim=2)
-                 self.sac_agent.load(sac_path)
+                 # V7.23 Use correct state dim when loading
+                 self.sac_agent = SimplifiedSACTradingAgent(state_dim=5)
+                 if self.sac_agent.load(sac_path):
+                     sac_ok = True
+                 else:
+                     print("Warning: Failed to load SAC agent.")
            else: print(f"Warning: SAC agent directory not found: {sac_path}")
-            print(f"Trading system loading {'successful' if models_loaded else 'failed (partially?) '}.")
-        except Exception as e: print(f"An error occurred during loading: {e}")
+            # V7.23 Check if both models loaded successfully
+            models_loaded = gru_ok and sac_ok
+            status = 'successful' if models_loaded else ('partially successful' if gru_ok or sac_ok else 'failed')
+            print(f"Trading system loading {status}.")
+        except Exception as e:
+            print(f"An error occurred during loading: {e}")
+            # Ensure flags reflect failure
+            models_loaded = False
+
+        return models_loaded # Return overall success status

    # V7-V6 Update: Adapt GRU training call for price regression
    def train_gru(self, train_data: pd.DataFrame, val_data: pd.DataFrame, 
@ -989,6 +1056,11 @@ class TradingSystem:
            self.y_scaler = y_scaler # Store fitted scaler
            logging.info(f"Target price scaling complete. Scaler type: {type(y_scaler)}")
            
+            # V7.23: Set feature indices now that scaler is fitted
+            if not self._set_feature_indices():
+                logging.error("Failed to set feature indices after scaling. Cannot proceed with GRU training.")
+                return None # Exit if indices aren't found
+            
        except Exception as e:
             logging.error(f"Error during scaling: {e}", exc_info=True)
             return None # Corrected indentation
@ -1302,30 +1374,29 @@ class ExtendedBacktester:
            # 1. Get state from pre-computed results
            pred_return = all_pred_returns[i]
            uncertainty_sigma = all_uncertainties[i]
-            # V7.13 Get momentum and volatility for the current step
-            momentum_5 = all_momentum_5[i]
-            volatility_20 = all_volatility_20[i]
+            # V7.23 Get SCALED momentum and volatility for the current step
+            momentum_5_scaled = all_momentum_5[i]
+            volatility_20_scaled = all_volatility_20[i]

-            # V7.13 Calculate z-proxy (Position as proxy for risk aversion)
-            # Use position *before* action, scaled by volatility
-            z_proxy = current_position * volatility_20 # Simpler proxy for now
+            # Calculate z-proxy (Position as proxy for risk aversion)
+            # V7.23 Use SCALED volatility for consistency in state
+            z_proxy = current_position * volatility_20_scaled

-            # V7.13 Construct 5D state: [pred_return, uncertainty, z_proxy, momentum_5, volatility_20]
+            # Construct 5D state using SCALED momentum/volatility
            state = np.array([
                pred_return,
                uncertainty_sigma,
                z_proxy,
-                momentum_5,
-                volatility_20
+                momentum_5_scaled,
+                volatility_20_scaled
            ], dtype=np.float32)

-            # V7.13 Handle NaNs/Infs in state (replace with 0 for simplicity)
+            # Handle NaNs/Infs in state
            if np.any(np.isnan(state)) or np.any(np.isinf(state)):
                 feature_logger.warning(f"NaN/Inf detected in state at step {i}. Replacing with 0. State: {state}")
                 state = np.nan_to_num(state, nan=0.0, posinf=0.0, neginf=0.0)

            # 2. Get deterministic action from SAC agent
-            # V7.13: get_action now returns (action, log_prob), unpack needed
            action_tuple = self.trading_system.sac_agent.get_action(state, deterministic=True)
            # V7.14 Ensure unpacking handles potential tuple vs single value return if agent API changes
            if isinstance(action_tuple, (tuple, list)) and len(action_tuple) > 0: