fixed issues with SAC scaling inputs

2025-04-16 18:23:11 -04:00 · 2025-04-16 18:23:11 -04:00 · 843dde88f5
commit 843dde88f5
parent 984a230bcd
18 changed files with 279 additions and 34 deletions
--- a/gru_sac_predictor/pycache/main.cpython-312.pyc
+++ b/gru_sac_predictor/pycache/main.cpython-312.pyc
--- a/gru_sac_predictor/logs/20250416_170503/main_20250416_170503.log
+++ b/gru_sac_predictor/logs/20250416_170503/main_20250416_170503.log
--- a/gru_sac_predictor/logs/20250416_182038/main_20250416_182038.log
+++ b/gru_sac_predictor/logs/20250416_182038/main_20250416_182038.log
--- a/gru_sac_predictor/main.py
+++ b/gru_sac_predictor/main.py
@ -85,15 +85,15 @@ SAC_HIDDEN_SIZE = 64
 SAC_GAMMA = 0.97
 SAC_TAU = 0.02
 # SAC_ALPHA = 0.1 # Removed - Will use automatic tuning
-SAC_ACTOR_LR = 3e-4 # Lowered from 5e-4
+SAC_ACTOR_LR = 1.5e-5 # Halved from 3e-4 -> 10x lower again
-SAC_CRITIC_LR = 5e-4 # Lowered from 8e-4
+SAC_CRITIC_LR = 2.5e-5 # Halved from 5e-4 -> 10x lower again
 SAC_BATCH_SIZE = 64
 SAC_BUFFER_MAX_SIZE = 20000
 SAC_MIN_BUFFER_SIZE = 1000
 SAC_UPDATE_INTERVAL = 1
 SAC_TARGET_UPDATE_INTERVAL = 2
 SAC_GRADIENT_CLIP = 1.0
-SAC_REWARD_SCALE = 2.0 # Decreased from 10.0
+SAC_REWARD_SCALE = 1.0 # Decreased from 10.0 -> 2.0 -> 1.0
 SAC_USE_BATCH_NORM = True
 SAC_USE_RESIDUAL = True
 SAC_MODEL_DIR = 'models/simplified_sac' # Default dir within the agent class
--- a/gru_sac_predictor/models/run_20250416_170503/sac_agent/actor.weights.h5
+++ b/gru_sac_predictor/models/run_20250416_170503/sac_agent/actor.weights.h5
--- a/gru_sac_predictor/models/run_20250416_170503/sac_agent/alpha.npy
+++ b/gru_sac_predictor/models/run_20250416_170503/sac_agent/alpha.npy
--- a/gru_sac_predictor/models/run_20250416_170503/sac_agent/critic_1.weights.h5
+++ b/gru_sac_predictor/models/run_20250416_170503/sac_agent/critic_1.weights.h5
--- a/gru_sac_predictor/models/run_20250416_170503/sac_agent/critic_2.weights.h5
+++ b/gru_sac_predictor/models/run_20250416_170503/sac_agent/critic_2.weights.h5
--- a/gru_sac_predictor/models/run_20250416_170503/sac_agent/target_critic_1.weights.h5
+++ b/gru_sac_predictor/models/run_20250416_170503/sac_agent/target_critic_1.weights.h5
--- a/gru_sac_predictor/models/run_20250416_170503/sac_agent/target_critic_2.weights.h5
+++ b/gru_sac_predictor/models/run_20250416_170503/sac_agent/target_critic_2.weights.h5
--- a/gru_sac_predictor/results/20250416_170503/backtest_performance_report_20250416_170503.md
+++ b/gru_sac_predictor/results/20250416_170503/backtest_performance_report_20250416_170503.md
@ -0,0 +1,42 @@
 # GRU+SAC Backtesting Performance Report
 Report generated on: 2025-04-16 17:09:06.155365
 Data range: 2025-03-06 15:23:00+00:00 to 2025-03-07 23:57:00+00:00
 Total duration: 1 days 08:34:00
 ## Strategy Performance Metrics
 * **Initial capital:** $10,000.00
 * **Final portfolio value:** $10,417.44
 * **Total return:** 4.17%
 * **Annualized return:** 6338986.83%
 * **Sharpe ratio (annualized):** 10.5985
 * **Sortino ratio (annualized):** 16.0926
 * **Volatility (annualized):** 110.03%
 * **Maximum drawdown:** 6.76%
 * **Total trades:** 1
 ## Buy and Hold Benchmark
 * **Final value (B&H):** $9,658.75
 * **Total return (B&H):** -3.41%
 ## Position & Prediction Analysis
 * **Average absolute position size:** 1.0000
 * **Position sign accuracy vs return:** 50.93%
 * **Prediction sign accuracy vs return:** 48.92%
 * **Prediction RMSE (on returns):** 0.004036
 ## Correlations
 * **Prediction-Return correlation:** -0.0042
 * **Prediction-Position correlation:** 0.0000
 * **Uncertainty-Position Size correlation:** 0.0000
 ## Notes
 * Transaction cost used: 0.0500% per position change value.
 * GRU lookback period: 60 minutes.
 * V6 features + return features used.
 * Uncertainty estimated via MC Dropout standard deviation.
--- a/gru_sac_predictor/results/20250416_170503/backtest_results_20250416_170503.png
+++ b/gru_sac_predictor/results/20250416_170503/backtest_results_20250416_170503.png
--- a/gru_sac_predictor/results/20250416_170503/config_20250416_170503.json
+++ b/gru_sac_predictor/results/20250416_170503/config_20250416_170503.json
@ -0,0 +1,66 @@
 {
    "run_id": "20250416_170503",
    "db_dir": "../downloaded_data",
    "ticker": "BTC-USD",
    "exchange": "COINBASE",
    "start_date": "2025-03-01",
    "end_date": "2025-03-10",
    "interval": "1min",
    "model_save_path": "gru_sac_predictor/models/run_20250416_170503",
    "results_plot_path": "gru_sac_predictor/results/20250416_170503/backtest_results_20250416_170503.png",
    "report_save_path": "gru_sac_predictor/results/20250416_170503/backtest_performance_report_20250416_170503.md",
    "train_ratio": 0.6,
    "validation_ratio": 0.2,
    "gru_lookback": 60,
    "gru_prediction_horizon": 1,
    "gru_epochs": 20,
    "gru_batch_size": 32,
    "gru_patience": 10,
    "gru_lr_factor": 0.5,
    "gru_return_scale": 0.03,
    "gru_model_load_run_id": "20250416_142744",
    "sac_state_dim": 5,
    "sac_hidden_size": 64,
    "sac_gamma": 0.97,
    "sac_tau": 0.02,
    "sac_actor_lr": 0.00015,
    "sac_critic_lr": 0.00025,
    "sac_batch_size": 64,
    "sac_buffer_max_size": 20000,
    "sac_min_buffer_size": 1000,
    "sac_update_interval": 1,
    "sac_target_update_interval": 2,
    "sac_gradient_clip": 1.0,
    "sac_reward_scale": 2.0,
    "sac_use_batch_norm": true,
    "sac_use_residual": true,
    "sac_model_dir": "models/simplified_sac",
    "sac_epochs": 50,
    "experience_config": {
        "initial_experiences": 3000,
        "experiences_per_batch": 64,
        "batch_generation_interval": 500,
        "balance_market_regimes": false,
        "recency_bias_strength": 0.5,
        "high_uncertainty_quantile": 0.75,
        "extreme_return_quantile": 0.1,
        "min_uncertainty_ratio": 0.2,
        "min_extreme_return_ratio": 0.1,
        "use_parallel_generation": false,
        "precompute_all_gru_outputs": true,
        "buffer_update_strategy": "fifo",
        "training_iterations_per_step": 1
    },
    "initial_capital": 10000.0,
    "transaction_cost": 0.0005,
    "opportunity_cost_penalty_factor": 0.0,
    "high_return_threshold": 0.002,
    "action_tolerance": 0.3,
    "load_existing_system": true,
    "train_gru_model": false,
    "train_sac_agent": true,
    "load_sac_agent": false,
    "run_backtest": true,
    "generate_plots": true,
    "generate_report": true
 }
--- a/gru_sac_predictor/results/20250416_170503/sac_training_history_20250416_170503.png
+++ b/gru_sac_predictor/results/20250416_170503/sac_training_history_20250416_170503.png
--- a/gru_sac_predictor/results/20250416_182038/config_20250416_182038.json
+++ b/gru_sac_predictor/results/20250416_182038/config_20250416_182038.json
@ -0,0 +1,66 @@
 {
    "run_id": "20250416_182038",
    "db_dir": "../downloaded_data",
    "ticker": "BTC-USD",
    "exchange": "COINBASE",
    "start_date": "2025-03-01",
    "end_date": "2025-03-10",
    "interval": "1min",
    "model_save_path": "gru_sac_predictor/models/run_20250416_182038",
    "results_plot_path": "gru_sac_predictor/results/20250416_182038/backtest_results_20250416_182038.png",
    "report_save_path": "gru_sac_predictor/results/20250416_182038/backtest_performance_report_20250416_182038.md",
    "train_ratio": 0.6,
    "validation_ratio": 0.2,
    "gru_lookback": 60,
    "gru_prediction_horizon": 1,
    "gru_epochs": 20,
    "gru_batch_size": 32,
    "gru_patience": 10,
    "gru_lr_factor": 0.5,
    "gru_return_scale": 0.03,
    "gru_model_load_run_id": "20250416_142744",
    "sac_state_dim": 5,
    "sac_hidden_size": 64,
    "sac_gamma": 0.97,
    "sac_tau": 0.02,
    "sac_actor_lr": 1.5e-05,
    "sac_critic_lr": 2.5e-05,
    "sac_batch_size": 64,
    "sac_buffer_max_size": 20000,
    "sac_min_buffer_size": 1000,
    "sac_update_interval": 1,
    "sac_target_update_interval": 2,
    "sac_gradient_clip": 1.0,
    "sac_reward_scale": 1.0,
    "sac_use_batch_norm": true,
    "sac_use_residual": true,
    "sac_model_dir": "models/simplified_sac",
    "sac_epochs": 50,
    "experience_config": {
        "initial_experiences": 3000,
        "experiences_per_batch": 64,
        "batch_generation_interval": 500,
        "balance_market_regimes": false,
        "recency_bias_strength": 0.5,
        "high_uncertainty_quantile": 0.75,
        "extreme_return_quantile": 0.1,
        "min_uncertainty_ratio": 0.2,
        "min_extreme_return_ratio": 0.1,
        "use_parallel_generation": false,
        "precompute_all_gru_outputs": true,
        "buffer_update_strategy": "fifo",
        "training_iterations_per_step": 1
    },
    "initial_capital": 10000.0,
    "transaction_cost": 0.0005,
    "opportunity_cost_penalty_factor": 0.0,
    "high_return_threshold": 0.002,
    "action_tolerance": 0.3,
    "load_existing_system": true,
    "train_gru_model": false,
    "train_sac_agent": true,
    "load_sac_agent": false,
    "run_backtest": true,
    "generate_plots": true,
    "generate_report": true
 }
--- a/gru_sac_predictor/src/pycache/sac_agent_simplified.cpython-312.pyc
+++ b/gru_sac_predictor/src/pycache/sac_agent_simplified.cpython-312.pyc
--- a/gru_sac_predictor/src/pycache/trading_system.cpython-312.pyc
+++ b/gru_sac_predictor/src/pycache/trading_system.cpython-312.pyc
--- a/gru_sac_predictor/src/trading_system.py
+++ b/gru_sac_predictor/src/trading_system.py
@ -368,11 +368,52 @@ class TradingSystem:
        self.y_scaler = None
        self.last_prediction = None
        self.experiences = [] # Store (state, action, reward, next_state, done)
        # V7.23: Add storage for scaled feature indices
        self._momentum_feature_idx = None
        self._volatility_feature_idx = None
        # Initialize scalers from GRU model if available and loaded
        if self.gru_model and self.gru_model.is_loaded:
            self.feature_scaler = self.gru_model.feature_scaler
            self.y_scaler = self.gru_model.y_scaler
            self._logger.info("Scalers initialized from pre-loaded GRU model.")
            # V7.23: Attempt to set indices if scaler loaded
            self._set_feature_indices()
    # V7.23: Helper method to find and store feature indices
    def _set_feature_indices(self):
        """Finds and stores the column indices for momentum and volatility features."""
        if self.feature_scaler is None:
            self._logger.warning("Feature scaler not available. Cannot set feature indices.")
            self._momentum_feature_idx = None
            self._volatility_feature_idx = None
            return False
        if not hasattr(self.feature_scaler, 'feature_names_in_'):
            # Fallback for older sklearn versions or different scaler types
            if hasattr(self.feature_scaler, 'n_features_in_'):
                 self._logger.warning("Feature scaler lacks 'feature_names_in_'. Cannot reliably find named features. State scaling may fail.")
                 # Cannot set indices reliably without names
                 self._momentum_feature_idx = None
                 self._volatility_feature_idx = None
                 return False
            else:
                 self._logger.error("Feature scaler lacks both 'feature_names_in_' and 'n_features_in_'. Cannot determine feature indices.")
                 self._momentum_feature_idx = None
                 self._volatility_feature_idx = None
                 return False
        feature_columns = list(self.feature_scaler.feature_names_in_)
        try:
            # Use the correct feature names: 'return_5m' and 'volatility_14d'
            self._momentum_feature_idx = feature_columns.index('return_5m')
            self._volatility_feature_idx = feature_columns.index('volatility_14d')
            self._logger.info(f"Successfully set feature indices: momentum_5m={self._momentum_feature_idx}, volatility_14d={self._volatility_feature_idx}")
            return True
        except ValueError as e:
            self._logger.error(f"Could not find 'return_5m' or 'volatility_14d' in scaler's feature columns: {feature_columns}. Error: {e}. State construction will likely fail.")
            self._momentum_feature_idx = None
            self._volatility_feature_idx = None
            return False
    # V7-V6 Update: Extract features and predict return/uncertainty for SAC state
    def _extract_features_and_predict(self, data_df_full, current_idx):
@ -706,13 +747,17 @@ class TradingSystem:
        for i in tqdm(range(num_sequences - 1), desc="Generating Experiences"): # Iterate up to second-to-last sequence result
            # --- V7.15 START: Construct 5D state s_t ---
            # V7.23 Get SCALED momentum/volatility for state t
            pred_return_t = all_pred_returns[i]
            uncertainty_t = all_uncertainties[i]
-            momentum_5_t = all_momentum_5[i]
+            momentum_5_t_scaled = all_momentum_5[i] # Use scaled value
-            volatility_20_t = all_volatility_20[i]
+            volatility_20_t_scaled = all_volatility_20[i] # Use scaled value
            # Calculate z_proxy using position *before* action (current_position)
-            z_proxy_t = current_position * volatility_20_t 
+            # V7.23: Use SCALED volatility for consistency within state.
-            state = np.array([pred_return_t, uncertainty_t, z_proxy_t, momentum_5_t, volatility_20_t], dtype=np.float32)
+            z_proxy_t = current_position * volatility_20_t_scaled 
            state = np.array([pred_return_t, uncertainty_t, z_proxy_t, momentum_5_t_scaled, volatility_20_t_scaled], dtype=np.float32)
            # Handle potential NaNs/Infs
            if np.any(np.isnan(state)) or np.any(np.isinf(state)):
                 logging.warning(f"NaN/Inf in state at step {i}. Replacing with 0. State: {state}")
@ -731,13 +776,17 @@ class TradingSystem:
                 continue # Skip this experience if action format is wrong
            # --- V7.15 START: Construct 5D next_state s_{t+1} ---
            # V7.23 Get SCALED momentum/volatility for state t+1
            pred_return_t1 = all_pred_returns[i+1]
            uncertainty_t1 = all_uncertainties[i+1]
-            momentum_5_t1 = all_momentum_5[i+1]
+            momentum_5_t1_scaled = all_momentum_5[i+1] # Use scaled value
-            volatility_20_t1 = all_volatility_20[i+1]
+            volatility_20_t1_scaled = all_volatility_20[i+1] # Use scaled value
            # Calculate z_proxy using the *action* taken (action is position for next step)
-            z_proxy_t1 = action * volatility_20_t1 # Use action, not current_position
+            # V7.23: Use SCALED volatility for consistency
-            next_state = np.array([pred_return_t1, uncertainty_t1, z_proxy_t1, momentum_5_t1, volatility_20_t1], dtype=np.float32)
+            z_proxy_t1 = action * volatility_20_t1_scaled # Use action, not current_position
            next_state = np.array([pred_return_t1, uncertainty_t1, z_proxy_t1, momentum_5_t1_scaled, volatility_20_t1_scaled], dtype=np.float32)
            # Handle potential NaNs/Infs
            if np.any(np.isnan(next_state)) or np.any(np.isinf(next_state)):
                 logging.warning(f"NaN/Inf in next_state at step {i}. Replacing with 0. State: {next_state}")
@ -926,18 +975,36 @@ class TradingSystem:
            if os.path.isdir(gru_path):
                # V7-V6 Update: Instantiate and load CryptoGRUModel
                self.gru_model = CryptoGRUModel()
-                if not self.gru_model.load(gru_path): print("Warning: Failed to load GRU model/scalers.")
+                if self.gru_model.load(gru_path):
                # Store loaded scalers in TradingSystem as well for convenience?
                    self.feature_scaler = self.gru_model.feature_scaler
                    self.y_scaler = self.gru_model.y_scaler
                    # V7.23: Set feature indices after loading scaler
                    if self._set_feature_indices():
                         gru_ok = True
                    else:
                         print("Warning: Failed to set feature indices after loading GRU scaler.")
                         gru_ok = False # Mark as failed if indices can't be set
                else:
                     print("Warning: Failed to load GRU model/scalers.")
            else: print(f"Warning: GRU model directory not found: {gru_path}")
            if os.path.isdir(sac_path):
-                 # State dim is 2: [pred_return, confidence]
+                 # V7.23 Use correct state dim when loading
-                 self.sac_agent = SimplifiedSACTradingAgent(state_dim=2)
+                 self.sac_agent = SimplifiedSACTradingAgent(state_dim=5)
-                 self.sac_agent.load(sac_path)
+                 if self.sac_agent.load(sac_path):
                     sac_ok = True
                 else:
                     print("Warning: Failed to load SAC agent.")
            else: print(f"Warning: SAC agent directory not found: {sac_path}")
-            print(f"Trading system loading {'successful' if models_loaded else 'failed (partially?) '}.")
+            # V7.23 Check if both models loaded successfully
-        except Exception as e: print(f"An error occurred during loading: {e}")
+            models_loaded = gru_ok and sac_ok
            status = 'successful' if models_loaded else ('partially successful' if gru_ok or sac_ok else 'failed')
            print(f"Trading system loading {status}.")
        except Exception as e:
            print(f"An error occurred during loading: {e}")
            # Ensure flags reflect failure
            models_loaded = False
        return models_loaded # Return overall success status
    # V7-V6 Update: Adapt GRU training call for price regression
    def train_gru(self, train_data: pd.DataFrame, val_data: pd.DataFrame, 
@ -989,6 +1056,11 @@ class TradingSystem:
            self.y_scaler = y_scaler # Store fitted scaler
            logging.info(f"Target price scaling complete. Scaler type: {type(y_scaler)}")
            # V7.23: Set feature indices now that scaler is fitted
            if not self._set_feature_indices():
                logging.error("Failed to set feature indices after scaling. Cannot proceed with GRU training.")
                return None # Exit if indices aren't found
        except Exception as e:
             logging.error(f"Error during scaling: {e}", exc_info=True)
             return None # Corrected indentation
@ -1302,30 +1374,29 @@ class ExtendedBacktester:
            # 1. Get state from pre-computed results
            pred_return = all_pred_returns[i]
            uncertainty_sigma = all_uncertainties[i]
-            # V7.13 Get momentum and volatility for the current step
+            # V7.23 Get SCALED momentum and volatility for the current step
-            momentum_5 = all_momentum_5[i]
+            momentum_5_scaled = all_momentum_5[i]
-            volatility_20 = all_volatility_20[i]
+            volatility_20_scaled = all_volatility_20[i]
-            # V7.13 Calculate z-proxy (Position as proxy for risk aversion)
+            # Calculate z-proxy (Position as proxy for risk aversion)
-            # Use position *before* action, scaled by volatility
+            # V7.23 Use SCALED volatility for consistency in state
-            z_proxy = current_position * volatility_20 # Simpler proxy for now
+            z_proxy = current_position * volatility_20_scaled
-            # V7.13 Construct 5D state: [pred_return, uncertainty, z_proxy, momentum_5, volatility_20]
+            # Construct 5D state using SCALED momentum/volatility
            state = np.array([
                pred_return,
                uncertainty_sigma,
                z_proxy,
-                momentum_5,
+                momentum_5_scaled,
-                volatility_20
+                volatility_20_scaled
            ], dtype=np.float32)
-            # V7.13 Handle NaNs/Infs in state (replace with 0 for simplicity)
+            # Handle NaNs/Infs in state
            if np.any(np.isnan(state)) or np.any(np.isinf(state)):
                 feature_logger.warning(f"NaN/Inf detected in state at step {i}. Replacing with 0. State: {state}")
                 state = np.nan_to_num(state, nan=0.0, posinf=0.0, neginf=0.0)
            # 2. Get deterministic action from SAC agent
            # V7.13: get_action now returns (action, log_prob), unpack needed
            action_tuple = self.trading_system.sac_agent.get_action(state, deterministic=True)
            # V7.14 Ensure unpacking handles potential tuple vs single value return if agent API changes
            if isinstance(action_tuple, (tuple, list)) and len(action_tuple) > 0: