fixed issues with SAC scaling inputs

This commit is contained in:
Yasha Sheynin 2025-04-16 18:23:11 -04:00
parent 984a230bcd
commit 843dde88f5
18 changed files with 279 additions and 34 deletions

View File

@ -85,15 +85,15 @@ SAC_HIDDEN_SIZE = 64
SAC_GAMMA = 0.97
SAC_TAU = 0.02
# SAC_ALPHA = 0.1 # Removed - Will use automatic tuning
SAC_ACTOR_LR = 3e-4 # Lowered from 5e-4
SAC_CRITIC_LR = 5e-4 # Lowered from 8e-4
SAC_ACTOR_LR = 1.5e-5 # Halved from 3e-4 -> 10x lower again
SAC_CRITIC_LR = 2.5e-5 # Halved from 5e-4 -> 10x lower again
SAC_BATCH_SIZE = 64
SAC_BUFFER_MAX_SIZE = 20000
SAC_MIN_BUFFER_SIZE = 1000
SAC_UPDATE_INTERVAL = 1
SAC_TARGET_UPDATE_INTERVAL = 2
SAC_GRADIENT_CLIP = 1.0
SAC_REWARD_SCALE = 2.0 # Decreased from 10.0
SAC_REWARD_SCALE = 1.0 # Decreased from 10.0 -> 2.0 -> 1.0
SAC_USE_BATCH_NORM = True
SAC_USE_RESIDUAL = True
SAC_MODEL_DIR = 'models/simplified_sac' # Default dir within the agent class

View File

@ -0,0 +1,42 @@
# GRU+SAC Backtesting Performance Report
Report generated on: 2025-04-16 17:09:06.155365
Data range: 2025-03-06 15:23:00+00:00 to 2025-03-07 23:57:00+00:00
Total duration: 1 days 08:34:00
## Strategy Performance Metrics
* **Initial capital:** $10,000.00
* **Final portfolio value:** $10,417.44
* **Total return:** 4.17%
* **Annualized return:** 6338986.83%
* **Sharpe ratio (annualized):** 10.5985
* **Sortino ratio (annualized):** 16.0926
* **Volatility (annualized):** 110.03%
* **Maximum drawdown:** 6.76%
* **Total trades:** 1
## Buy and Hold Benchmark
* **Final value (B&H):** $9,658.75
* **Total return (B&H):** -3.41%
## Position & Prediction Analysis
* **Average absolute position size:** 1.0000
* **Position sign accuracy vs return:** 50.93%
* **Prediction sign accuracy vs return:** 48.92%
* **Prediction RMSE (on returns):** 0.004036
## Correlations
* **Prediction-Return correlation:** -0.0042
* **Prediction-Position correlation:** 0.0000
* **Uncertainty-Position Size correlation:** 0.0000
## Notes
* Transaction cost used: 0.0500% per position change value.
* GRU lookback period: 60 minutes.
* V6 features + return features used.
* Uncertainty estimated via MC Dropout standard deviation.

Binary file not shown.

After

Width:  |  Height:  |  Size: 341 KiB

View File

@ -0,0 +1,66 @@
{
"run_id": "20250416_170503",
"db_dir": "../downloaded_data",
"ticker": "BTC-USD",
"exchange": "COINBASE",
"start_date": "2025-03-01",
"end_date": "2025-03-10",
"interval": "1min",
"model_save_path": "gru_sac_predictor/models/run_20250416_170503",
"results_plot_path": "gru_sac_predictor/results/20250416_170503/backtest_results_20250416_170503.png",
"report_save_path": "gru_sac_predictor/results/20250416_170503/backtest_performance_report_20250416_170503.md",
"train_ratio": 0.6,
"validation_ratio": 0.2,
"gru_lookback": 60,
"gru_prediction_horizon": 1,
"gru_epochs": 20,
"gru_batch_size": 32,
"gru_patience": 10,
"gru_lr_factor": 0.5,
"gru_return_scale": 0.03,
"gru_model_load_run_id": "20250416_142744",
"sac_state_dim": 5,
"sac_hidden_size": 64,
"sac_gamma": 0.97,
"sac_tau": 0.02,
"sac_actor_lr": 0.00015,
"sac_critic_lr": 0.00025,
"sac_batch_size": 64,
"sac_buffer_max_size": 20000,
"sac_min_buffer_size": 1000,
"sac_update_interval": 1,
"sac_target_update_interval": 2,
"sac_gradient_clip": 1.0,
"sac_reward_scale": 2.0,
"sac_use_batch_norm": true,
"sac_use_residual": true,
"sac_model_dir": "models/simplified_sac",
"sac_epochs": 50,
"experience_config": {
"initial_experiences": 3000,
"experiences_per_batch": 64,
"batch_generation_interval": 500,
"balance_market_regimes": false,
"recency_bias_strength": 0.5,
"high_uncertainty_quantile": 0.75,
"extreme_return_quantile": 0.1,
"min_uncertainty_ratio": 0.2,
"min_extreme_return_ratio": 0.1,
"use_parallel_generation": false,
"precompute_all_gru_outputs": true,
"buffer_update_strategy": "fifo",
"training_iterations_per_step": 1
},
"initial_capital": 10000.0,
"transaction_cost": 0.0005,
"opportunity_cost_penalty_factor": 0.0,
"high_return_threshold": 0.002,
"action_tolerance": 0.3,
"load_existing_system": true,
"train_gru_model": false,
"train_sac_agent": true,
"load_sac_agent": false,
"run_backtest": true,
"generate_plots": true,
"generate_report": true
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 63 KiB

View File

@ -0,0 +1,66 @@
{
"run_id": "20250416_182038",
"db_dir": "../downloaded_data",
"ticker": "BTC-USD",
"exchange": "COINBASE",
"start_date": "2025-03-01",
"end_date": "2025-03-10",
"interval": "1min",
"model_save_path": "gru_sac_predictor/models/run_20250416_182038",
"results_plot_path": "gru_sac_predictor/results/20250416_182038/backtest_results_20250416_182038.png",
"report_save_path": "gru_sac_predictor/results/20250416_182038/backtest_performance_report_20250416_182038.md",
"train_ratio": 0.6,
"validation_ratio": 0.2,
"gru_lookback": 60,
"gru_prediction_horizon": 1,
"gru_epochs": 20,
"gru_batch_size": 32,
"gru_patience": 10,
"gru_lr_factor": 0.5,
"gru_return_scale": 0.03,
"gru_model_load_run_id": "20250416_142744",
"sac_state_dim": 5,
"sac_hidden_size": 64,
"sac_gamma": 0.97,
"sac_tau": 0.02,
"sac_actor_lr": 1.5e-05,
"sac_critic_lr": 2.5e-05,
"sac_batch_size": 64,
"sac_buffer_max_size": 20000,
"sac_min_buffer_size": 1000,
"sac_update_interval": 1,
"sac_target_update_interval": 2,
"sac_gradient_clip": 1.0,
"sac_reward_scale": 1.0,
"sac_use_batch_norm": true,
"sac_use_residual": true,
"sac_model_dir": "models/simplified_sac",
"sac_epochs": 50,
"experience_config": {
"initial_experiences": 3000,
"experiences_per_batch": 64,
"batch_generation_interval": 500,
"balance_market_regimes": false,
"recency_bias_strength": 0.5,
"high_uncertainty_quantile": 0.75,
"extreme_return_quantile": 0.1,
"min_uncertainty_ratio": 0.2,
"min_extreme_return_ratio": 0.1,
"use_parallel_generation": false,
"precompute_all_gru_outputs": true,
"buffer_update_strategy": "fifo",
"training_iterations_per_step": 1
},
"initial_capital": 10000.0,
"transaction_cost": 0.0005,
"opportunity_cost_penalty_factor": 0.0,
"high_return_threshold": 0.002,
"action_tolerance": 0.3,
"load_existing_system": true,
"train_gru_model": false,
"train_sac_agent": true,
"load_sac_agent": false,
"run_backtest": true,
"generate_plots": true,
"generate_report": true
}

View File

@ -368,11 +368,52 @@ class TradingSystem:
self.y_scaler = None
self.last_prediction = None
self.experiences = [] # Store (state, action, reward, next_state, done)
# V7.23: Add storage for scaled feature indices
self._momentum_feature_idx = None
self._volatility_feature_idx = None
# Initialize scalers from GRU model if available and loaded
if self.gru_model and self.gru_model.is_loaded:
self.feature_scaler = self.gru_model.feature_scaler
self.y_scaler = self.gru_model.y_scaler
self._logger.info("Scalers initialized from pre-loaded GRU model.")
# V7.23: Attempt to set indices if scaler loaded
self._set_feature_indices()
# V7.23: Helper method to find and store feature indices
def _set_feature_indices(self):
"""Finds and stores the column indices for momentum and volatility features."""
if self.feature_scaler is None:
self._logger.warning("Feature scaler not available. Cannot set feature indices.")
self._momentum_feature_idx = None
self._volatility_feature_idx = None
return False
if not hasattr(self.feature_scaler, 'feature_names_in_'):
# Fallback for older sklearn versions or different scaler types
if hasattr(self.feature_scaler, 'n_features_in_'):
self._logger.warning("Feature scaler lacks 'feature_names_in_'. Cannot reliably find named features. State scaling may fail.")
# Cannot set indices reliably without names
self._momentum_feature_idx = None
self._volatility_feature_idx = None
return False
else:
self._logger.error("Feature scaler lacks both 'feature_names_in_' and 'n_features_in_'. Cannot determine feature indices.")
self._momentum_feature_idx = None
self._volatility_feature_idx = None
return False
feature_columns = list(self.feature_scaler.feature_names_in_)
try:
# Use the correct feature names: 'return_5m' and 'volatility_14d'
self._momentum_feature_idx = feature_columns.index('return_5m')
self._volatility_feature_idx = feature_columns.index('volatility_14d')
self._logger.info(f"Successfully set feature indices: momentum_5m={self._momentum_feature_idx}, volatility_14d={self._volatility_feature_idx}")
return True
except ValueError as e:
self._logger.error(f"Could not find 'return_5m' or 'volatility_14d' in scaler's feature columns: {feature_columns}. Error: {e}. State construction will likely fail.")
self._momentum_feature_idx = None
self._volatility_feature_idx = None
return False
# V7-V6 Update: Extract features and predict return/uncertainty for SAC state
def _extract_features_and_predict(self, data_df_full, current_idx):
@ -706,13 +747,17 @@ class TradingSystem:
for i in tqdm(range(num_sequences - 1), desc="Generating Experiences"): # Iterate up to second-to-last sequence result
# --- V7.15 START: Construct 5D state s_t ---
# V7.23 Get SCALED momentum/volatility for state t
pred_return_t = all_pred_returns[i]
uncertainty_t = all_uncertainties[i]
momentum_5_t = all_momentum_5[i]
volatility_20_t = all_volatility_20[i]
momentum_5_t_scaled = all_momentum_5[i] # Use scaled value
volatility_20_t_scaled = all_volatility_20[i] # Use scaled value
# Calculate z_proxy using position *before* action (current_position)
z_proxy_t = current_position * volatility_20_t
state = np.array([pred_return_t, uncertainty_t, z_proxy_t, momentum_5_t, volatility_20_t], dtype=np.float32)
# V7.23: Use SCALED volatility for consistency within state.
z_proxy_t = current_position * volatility_20_t_scaled
state = np.array([pred_return_t, uncertainty_t, z_proxy_t, momentum_5_t_scaled, volatility_20_t_scaled], dtype=np.float32)
# Handle potential NaNs/Infs
if np.any(np.isnan(state)) or np.any(np.isinf(state)):
logging.warning(f"NaN/Inf in state at step {i}. Replacing with 0. State: {state}")
@ -731,13 +776,17 @@ class TradingSystem:
continue # Skip this experience if action format is wrong
# --- V7.15 START: Construct 5D next_state s_{t+1} ---
# V7.23 Get SCALED momentum/volatility for state t+1
pred_return_t1 = all_pred_returns[i+1]
uncertainty_t1 = all_uncertainties[i+1]
momentum_5_t1 = all_momentum_5[i+1]
volatility_20_t1 = all_volatility_20[i+1]
momentum_5_t1_scaled = all_momentum_5[i+1] # Use scaled value
volatility_20_t1_scaled = all_volatility_20[i+1] # Use scaled value
# Calculate z_proxy using the *action* taken (action is position for next step)
z_proxy_t1 = action * volatility_20_t1 # Use action, not current_position
next_state = np.array([pred_return_t1, uncertainty_t1, z_proxy_t1, momentum_5_t1, volatility_20_t1], dtype=np.float32)
# V7.23: Use SCALED volatility for consistency
z_proxy_t1 = action * volatility_20_t1_scaled # Use action, not current_position
next_state = np.array([pred_return_t1, uncertainty_t1, z_proxy_t1, momentum_5_t1_scaled, volatility_20_t1_scaled], dtype=np.float32)
# Handle potential NaNs/Infs
if np.any(np.isnan(next_state)) or np.any(np.isinf(next_state)):
logging.warning(f"NaN/Inf in next_state at step {i}. Replacing with 0. State: {next_state}")
@ -926,18 +975,36 @@ class TradingSystem:
if os.path.isdir(gru_path):
# V7-V6 Update: Instantiate and load CryptoGRUModel
self.gru_model = CryptoGRUModel()
if not self.gru_model.load(gru_path): print("Warning: Failed to load GRU model/scalers.")
# Store loaded scalers in TradingSystem as well for convenience?
if self.gru_model.load(gru_path):
self.feature_scaler = self.gru_model.feature_scaler
self.y_scaler = self.gru_model.y_scaler
# V7.23: Set feature indices after loading scaler
if self._set_feature_indices():
gru_ok = True
else:
print("Warning: Failed to set feature indices after loading GRU scaler.")
gru_ok = False # Mark as failed if indices can't be set
else:
print("Warning: Failed to load GRU model/scalers.")
else: print(f"Warning: GRU model directory not found: {gru_path}")
if os.path.isdir(sac_path):
# State dim is 2: [pred_return, confidence]
self.sac_agent = SimplifiedSACTradingAgent(state_dim=2)
self.sac_agent.load(sac_path)
# V7.23 Use correct state dim when loading
self.sac_agent = SimplifiedSACTradingAgent(state_dim=5)
if self.sac_agent.load(sac_path):
sac_ok = True
else:
print("Warning: Failed to load SAC agent.")
else: print(f"Warning: SAC agent directory not found: {sac_path}")
print(f"Trading system loading {'successful' if models_loaded else 'failed (partially?) '}.")
except Exception as e: print(f"An error occurred during loading: {e}")
# V7.23 Check if both models loaded successfully
models_loaded = gru_ok and sac_ok
status = 'successful' if models_loaded else ('partially successful' if gru_ok or sac_ok else 'failed')
print(f"Trading system loading {status}.")
except Exception as e:
print(f"An error occurred during loading: {e}")
# Ensure flags reflect failure
models_loaded = False
return models_loaded # Return overall success status
# V7-V6 Update: Adapt GRU training call for price regression
def train_gru(self, train_data: pd.DataFrame, val_data: pd.DataFrame,
@ -989,6 +1056,11 @@ class TradingSystem:
self.y_scaler = y_scaler # Store fitted scaler
logging.info(f"Target price scaling complete. Scaler type: {type(y_scaler)}")
# V7.23: Set feature indices now that scaler is fitted
if not self._set_feature_indices():
logging.error("Failed to set feature indices after scaling. Cannot proceed with GRU training.")
return None # Exit if indices aren't found
except Exception as e:
logging.error(f"Error during scaling: {e}", exc_info=True)
return None # Corrected indentation
@ -1302,30 +1374,29 @@ class ExtendedBacktester:
# 1. Get state from pre-computed results
pred_return = all_pred_returns[i]
uncertainty_sigma = all_uncertainties[i]
# V7.13 Get momentum and volatility for the current step
momentum_5 = all_momentum_5[i]
volatility_20 = all_volatility_20[i]
# V7.23 Get SCALED momentum and volatility for the current step
momentum_5_scaled = all_momentum_5[i]
volatility_20_scaled = all_volatility_20[i]
# V7.13 Calculate z-proxy (Position as proxy for risk aversion)
# Use position *before* action, scaled by volatility
z_proxy = current_position * volatility_20 # Simpler proxy for now
# Calculate z-proxy (Position as proxy for risk aversion)
# V7.23 Use SCALED volatility for consistency in state
z_proxy = current_position * volatility_20_scaled
# V7.13 Construct 5D state: [pred_return, uncertainty, z_proxy, momentum_5, volatility_20]
# Construct 5D state using SCALED momentum/volatility
state = np.array([
pred_return,
uncertainty_sigma,
z_proxy,
momentum_5,
volatility_20
momentum_5_scaled,
volatility_20_scaled
], dtype=np.float32)
# V7.13 Handle NaNs/Infs in state (replace with 0 for simplicity)
# Handle NaNs/Infs in state
if np.any(np.isnan(state)) or np.any(np.isinf(state)):
feature_logger.warning(f"NaN/Inf detected in state at step {i}. Replacing with 0. State: {state}")
state = np.nan_to_num(state, nan=0.0, posinf=0.0, neginf=0.0)
# 2. Get deterministic action from SAC agent
# V7.13: get_action now returns (action, log_prob), unpack needed
action_tuple = self.trading_system.sac_agent.get_action(state, deterministic=True)
# V7.14 Ensure unpacking handles potential tuple vs single value return if agent API changes
if isinstance(action_tuple, (tuple, list)) and len(action_tuple) > 0: