diff --git a/configuration/new_vecm.cfg b/configuration/new_vecm.cfg new file mode 100644 index 0000000..16562d1 --- /dev/null +++ b/configuration/new_vecm.cfg @@ -0,0 +1,44 @@ +{ + "market_data_loading": { + "CRYPTO": { + "data_directory": "./data/crypto", + "db_table_name": "md_1min_bars", + "instrument_id_pfx": "PAIR-", + }, + "EQUITY": { + "data_directory": "./data/equity", + "db_table_name": "md_1min_bars", + "instrument_id_pfx": "STOCK-", + } + }, + + # ====== Funding ====== + "funding_per_pair": 2000.0, + + # ====== Trading Parameters ====== + "stat_model_price": "close", # "vwap" + "execution_price": { + "column": "vwap", + "shift": 1, + }, + "dis-equilibrium_open_trshld": 2.0, + "dis-equilibrium_close_trshld": 1.0, + "training_size": 120, + "model_class": "pt_strategy.models.VECMModel", + "model_data_policy_class": "pt_strategy.model_data_policy.RollingWindowDataPolicy", + + # ====== Stop Conditions ====== + "stop_close_conditions": { + "profit": 2.0, + "loss": -0.5 + } + + # ====== End of Session Closeout ====== + "close_outstanding_positions": true, + # "close_outstanding_positions": false, + "trading_hours": { + "timezone": "America/New_York", + "begin_session": "7:30:00", + "end_session": "18:30:00", + } +} \ No newline at end of file diff --git a/lib/pt_strategy/models.py b/lib/pt_strategy/models.py index ca90392..373f28a 100644 --- a/lib/pt_strategy/models.py +++ b/lib/pt_strategy/models.py @@ -15,23 +15,6 @@ class ZScoreOLSModel(PairsTradingModel): pair_predict_result_: Optional[pd.DataFrame] zscore_df_: Optional[pd.DataFrame] - def _fit_zscore(self, pair: TradingPair) -> pd.DataFrame: - assert self.training_df_ is not None - symbol_a_px_series = self.training_df_[pair.colnames()].iloc[:, 0].astype(float) - symbol_b_px_series = self.training_df_[pair.colnames()].iloc[:, 1].astype(float) - - symbol_a_px_series, symbol_b_px_series = symbol_a_px_series.align( - symbol_b_px_series, axis=0 - ) - - X = sm.add_constant(symbol_b_px_series) - self.zscore_model_ = sm.OLS(symbol_a_px_series, X).fit() - assert self.zscore_model_ is not None - hedge_ratio = self.zscore_model_.params.iloc[1] - - spread = symbol_a_px_series - hedge_ratio * symbol_b_px_series - return pd.DataFrame((spread - spread.mean()) / spread.std()) - def predict(self, pair: TradingPair) -> Prediction: self.training_df_ = pair.market_data_.copy() @@ -50,4 +33,72 @@ class ZScoreOLSModel(PairsTradingModel): pair_=pair, ) - \ No newline at end of file + def _fit_zscore(self, pair: TradingPair) -> pd.DataFrame: + assert self.training_df_ is not None + symbol_a_px_series = self.training_df_[pair.colnames()].iloc[:, 0] + symbol_b_px_series = self.training_df_[pair.colnames()].iloc[:, 1] + + symbol_a_px_series, symbol_b_px_series = symbol_a_px_series.align( + symbol_b_px_series, axis=0 + ) + + X = sm.add_constant(symbol_b_px_series) + self.zscore_model_ = sm.OLS(symbol_a_px_series, X).fit() + assert self.zscore_model_ is not None + hedge_ratio = self.zscore_model_.params.iloc[1] + + spread = symbol_a_px_series - hedge_ratio * symbol_b_px_series + return pd.DataFrame((spread - spread.mean()) / spread.std()) + + +class VECMModel(PairsTradingModel): + def predict(self, pair: TradingPair) -> Prediction: + self.training_df_ = pair.market_data_.copy() + assert self.training_df_ is not None + vecm_fit = self._fit_VECM(pair=pair) + + assert vecm_fit is not None + predicted_prices = vecm_fit.predict(steps=1) + + # Convert prediction to a DataFrame for readability + predicted_df = pd.DataFrame( + predicted_prices, columns=pd.Index(pair.colnames()), dtype=float + ) + + disequilibrium = (predicted_df[pair.colnames()] @ vecm_fit.beta)[0][0] + scaled_disequilibrium = (disequilibrium - self.training_mu_) / self.training_std_ + return Prediction( + tstamp_=pair.market_data_.index[-1], + disequilibrium_=disequilibrium, + scaled_disequilibrium_=scaled_disequilibrium, + pair_=pair, + ) + + def _fit_VECM(self, pair: TradingPair) -> VECMResults: # type: ignore + from statsmodels.tsa.vector_ar.vecm import VECM, VECMResults + + vecm_df = self.training_df_[pair.colnames()].reset_index(drop=True) + vecm_model = VECM(vecm_df, coint_rank=1) + vecm_fit = vecm_model.fit() + + assert vecm_fit is not None + + # Check if the model converged properly + if not hasattr(vecm_fit, "beta") or vecm_fit.beta is None: + print(f"{self}: VECM model failed to converge properly") + + diseq_series = self.training_df_[pair.colnames()] @ vecm_fit.beta + # print(diseq_series.shape) + self.training_mu_ = float(diseq_series[0].mean()) + self.training_std_ = float(diseq_series[0].std()) + + self.training_df_["dis-equilibrium"] = ( + self.training_df_[pair.colnames()] @ vecm_fit.beta + ) + # Normalize the dis-equilibrium + self.training_df_["scaled_dis-equilibrium"] = ( + diseq_series - self.training_mu_ + ) / self.training_std_ + + return vecm_fit + diff --git a/lib/pt_strategy/trading_strategy.py b/lib/pt_strategy/trading_strategy.py index 82c28f1..6cd76c1 100644 --- a/lib/pt_strategy/trading_strategy.py +++ b/lib/pt_strategy/trading_strategy.py @@ -64,8 +64,9 @@ class PtResearchStrategy: idx = 0 while self.pt_mkt_data_.has_next(): market_data_series = self.pt_mkt_data_.get_next() + new_row = pd.DataFrame([market_data_series]) market_data_df = pd.concat( - [market_data_df, market_data_series.to_frame().T], ignore_index=True + [market_data_df, new_row], ignore_index=True ) if idx >= training_minutes: break @@ -76,7 +77,7 @@ class PtResearchStrategy: while self.pt_mkt_data_.has_next(): market_data_series = self.pt_mkt_data_.get_next() - new_row = market_data_series.to_frame().T + new_row = pd.DataFrame([market_data_series]) market_data_df = pd.concat([market_data_df, new_row], ignore_index=True) prediction = self.trading_pair_.run(