refactoring progress: VECM, fixes

This commit is contained in:
Oleg Sheynin 2025-07-30 05:08:26 +00:00
parent ed0c0fecb2
commit 566dd9bbdc
3 changed files with 116 additions and 20 deletions

View File

@ -0,0 +1,44 @@
{
"market_data_loading": {
"CRYPTO": {
"data_directory": "./data/crypto",
"db_table_name": "md_1min_bars",
"instrument_id_pfx": "PAIR-",
},
"EQUITY": {
"data_directory": "./data/equity",
"db_table_name": "md_1min_bars",
"instrument_id_pfx": "STOCK-",
}
},
# ====== Funding ======
"funding_per_pair": 2000.0,
# ====== Trading Parameters ======
"stat_model_price": "close", # "vwap"
"execution_price": {
"column": "vwap",
"shift": 1,
},
"dis-equilibrium_open_trshld": 2.0,
"dis-equilibrium_close_trshld": 1.0,
"training_size": 120,
"model_class": "pt_strategy.models.VECMModel",
"model_data_policy_class": "pt_strategy.model_data_policy.RollingWindowDataPolicy",
# ====== Stop Conditions ======
"stop_close_conditions": {
"profit": 2.0,
"loss": -0.5
}
# ====== End of Session Closeout ======
"close_outstanding_positions": true,
# "close_outstanding_positions": false,
"trading_hours": {
"timezone": "America/New_York",
"begin_session": "7:30:00",
"end_session": "18:30:00",
}
}

View File

@ -15,23 +15,6 @@ class ZScoreOLSModel(PairsTradingModel):
pair_predict_result_: Optional[pd.DataFrame] pair_predict_result_: Optional[pd.DataFrame]
zscore_df_: Optional[pd.DataFrame] zscore_df_: Optional[pd.DataFrame]
def _fit_zscore(self, pair: TradingPair) -> pd.DataFrame:
assert self.training_df_ is not None
symbol_a_px_series = self.training_df_[pair.colnames()].iloc[:, 0].astype(float)
symbol_b_px_series = self.training_df_[pair.colnames()].iloc[:, 1].astype(float)
symbol_a_px_series, symbol_b_px_series = symbol_a_px_series.align(
symbol_b_px_series, axis=0
)
X = sm.add_constant(symbol_b_px_series)
self.zscore_model_ = sm.OLS(symbol_a_px_series, X).fit()
assert self.zscore_model_ is not None
hedge_ratio = self.zscore_model_.params.iloc[1]
spread = symbol_a_px_series - hedge_ratio * symbol_b_px_series
return pd.DataFrame((spread - spread.mean()) / spread.std())
def predict(self, pair: TradingPair) -> Prediction: def predict(self, pair: TradingPair) -> Prediction:
self.training_df_ = pair.market_data_.copy() self.training_df_ = pair.market_data_.copy()
@ -50,4 +33,72 @@ class ZScoreOLSModel(PairsTradingModel):
pair_=pair, pair_=pair,
) )
def _fit_zscore(self, pair: TradingPair) -> pd.DataFrame:
assert self.training_df_ is not None
symbol_a_px_series = self.training_df_[pair.colnames()].iloc[:, 0]
symbol_b_px_series = self.training_df_[pair.colnames()].iloc[:, 1]
symbol_a_px_series, symbol_b_px_series = symbol_a_px_series.align(
symbol_b_px_series, axis=0
)
X = sm.add_constant(symbol_b_px_series)
self.zscore_model_ = sm.OLS(symbol_a_px_series, X).fit()
assert self.zscore_model_ is not None
hedge_ratio = self.zscore_model_.params.iloc[1]
spread = symbol_a_px_series - hedge_ratio * symbol_b_px_series
return pd.DataFrame((spread - spread.mean()) / spread.std())
class VECMModel(PairsTradingModel):
def predict(self, pair: TradingPair) -> Prediction:
self.training_df_ = pair.market_data_.copy()
assert self.training_df_ is not None
vecm_fit = self._fit_VECM(pair=pair)
assert vecm_fit is not None
predicted_prices = vecm_fit.predict(steps=1)
# Convert prediction to a DataFrame for readability
predicted_df = pd.DataFrame(
predicted_prices, columns=pd.Index(pair.colnames()), dtype=float
)
disequilibrium = (predicted_df[pair.colnames()] @ vecm_fit.beta)[0][0]
scaled_disequilibrium = (disequilibrium - self.training_mu_) / self.training_std_
return Prediction(
tstamp_=pair.market_data_.index[-1],
disequilibrium_=disequilibrium,
scaled_disequilibrium_=scaled_disequilibrium,
pair_=pair,
)
def _fit_VECM(self, pair: TradingPair) -> VECMResults: # type: ignore
from statsmodels.tsa.vector_ar.vecm import VECM, VECMResults
vecm_df = self.training_df_[pair.colnames()].reset_index(drop=True)
vecm_model = VECM(vecm_df, coint_rank=1)
vecm_fit = vecm_model.fit()
assert vecm_fit is not None
# Check if the model converged properly
if not hasattr(vecm_fit, "beta") or vecm_fit.beta is None:
print(f"{self}: VECM model failed to converge properly")
diseq_series = self.training_df_[pair.colnames()] @ vecm_fit.beta
# print(diseq_series.shape)
self.training_mu_ = float(diseq_series[0].mean())
self.training_std_ = float(diseq_series[0].std())
self.training_df_["dis-equilibrium"] = (
self.training_df_[pair.colnames()] @ vecm_fit.beta
)
# Normalize the dis-equilibrium
self.training_df_["scaled_dis-equilibrium"] = (
diseq_series - self.training_mu_
) / self.training_std_
return vecm_fit

View File

@ -64,8 +64,9 @@ class PtResearchStrategy:
idx = 0 idx = 0
while self.pt_mkt_data_.has_next(): while self.pt_mkt_data_.has_next():
market_data_series = self.pt_mkt_data_.get_next() market_data_series = self.pt_mkt_data_.get_next()
new_row = pd.DataFrame([market_data_series])
market_data_df = pd.concat( market_data_df = pd.concat(
[market_data_df, market_data_series.to_frame().T], ignore_index=True [market_data_df, new_row], ignore_index=True
) )
if idx >= training_minutes: if idx >= training_minutes:
break break
@ -76,7 +77,7 @@ class PtResearchStrategy:
while self.pt_mkt_data_.has_next(): while self.pt_mkt_data_.has_next():
market_data_series = self.pt_mkt_data_.get_next() market_data_series = self.pt_mkt_data_.get_next()
new_row = market_data_series.to_frame().T new_row = pd.DataFrame([market_data_series])
market_data_df = pd.concat([market_data_df, new_row], ignore_index=True) market_data_df = pd.concat([market_data_df, new_row], ignore_index=True)
prediction = self.trading_pair_.run( prediction = self.trading_pair_.run(