From 31eb9f800c1904ea94e4a1ab66a466fa7f2c5268 Mon Sep 17 00:00:00 2001 From: Oleg Sheynin Date: Tue, 22 Jul 2025 17:25:16 +0000 Subject: [PATCH] bug fix --- configuration/crypto_zscore.cfg | 4 +- lib/pt_trading/fit_method.py | 3 +- lib/pt_trading/results.py | 91 +++++++++++++-------------- lib/pt_trading/rolling_window_fit.py | 17 +++-- lib/pt_trading/vecm_rolling_fit.py | 4 -- lib/pt_trading/z-score_rolling_fit.py | 19 +++--- research/pt_backtest.py | 14 +++-- 7 files changed, 75 insertions(+), 77 deletions(-) diff --git a/configuration/crypto_zscore.cfg b/configuration/crypto_zscore.cfg index e58cf87..a045de8 100644 --- a/configuration/crypto_zscore.cfg +++ b/configuration/crypto_zscore.cfg @@ -2,7 +2,7 @@ "security_type": "CRYPTO", "data_directory": "./data/crypto", "datafiles": [ - "2025*.mktdata.ohlcv.db" + "20250602.mktdata.ohlcv.db" ], "db_table_name": "md_1min_bars", "exchange_id": "BNBSPOT", @@ -27,7 +27,7 @@ # "close_outstanding_positions": false, "trading_hours": { "begin_session": "9:30:00", - "end_session": "21:30:00", + "end_session": "19:00:00", "timezone": "America/New_York" } } \ No newline at end of file diff --git a/lib/pt_trading/fit_method.py b/lib/pt_trading/fit_method.py index 6742f1d..6b3b65a 100644 --- a/lib/pt_trading/fit_method.py +++ b/lib/pt_trading/fit_method.py @@ -12,8 +12,9 @@ NanoPerMin = 1e9 class PairsTradingFitMethod(ABC): TRADES_COLUMNS = [ "time", - "action", "symbol", + "side", + "action", "price", "disequilibrium", "scaled_disequilibrium", diff --git a/lib/pt_trading/results.py b/lib/pt_trading/results.py index 1fc15c3..0928f35 100644 --- a/lib/pt_trading/results.py +++ b/lib/pt_trading/results.py @@ -442,11 +442,13 @@ class BacktestResult: self, pair_nm: str, symbol: str, + side: str, action: str, price: Any, disequilibrium: Optional[float] = None, scaled_disequilibrium: Optional[float] = None, timestamp: Optional[datetime] = None, + status: Optional[str] = None, ) -> None: """Add a trade to the results tracking.""" pair_nm = str(pair_nm) @@ -456,7 +458,15 @@ class BacktestResult: if symbol not in self.trades[pair_nm]: self.trades[pair_nm][symbol] = [] self.trades[pair_nm][symbol].append( - (action, price, disequilibrium, scaled_disequilibrium, timestamp) + {"symbol":symbol, + "side":side, + "action":action, + "price":price, + "disequilibrium":disequilibrium, + "scaled_disequilibrium":scaled_disequilibrium, + "timestamp":timestamp, + "status":status + } ) def add_outstanding_position(self, position: Dict[str, Any]) -> None: @@ -493,6 +503,7 @@ class BacktestResult: print(result) for row in result.itertuples(): + side = row.side action = row.action symbol = row.symbol price = row.price @@ -502,15 +513,17 @@ class BacktestResult: timestamp = getattr(row, "time") else: timestamp = convert_timestamp(row.Index) - + status = row.status self.add_trade( pair_nm=str(row.pair), - action=str(action), symbol=str(symbol), + side=str(side), + action=str(action), price=float(str(price)), disequilibrium=disequilibrium, scaled_disequilibrium=scaled_disequilibrium, timestamp=timestamp, + status=str(status) if status is not None else "?", ) def print_single_day_results(self) -> None: @@ -553,42 +566,22 @@ class BacktestResult: for symbol, trades in symbols.items(): if len(trades) == 0: continue - symbol_return = 0 - symbol_trades = [] - - # Process all trades sequentially for this symbol - for i, trade in enumerate(trades): - # Handle both old and new tuple formats - if len(trade) == 2: # Old format: (action, price) - action, price = trade - disequilibrium = None - scaled_disequilibrium = None - timestamp = None - else: # New format: (action, price, disequilibrium, scaled_disequilibrium, timestamp) - action, price = trade[:2] - disequilibrium = trade[2] if len(trade) > 2 else None - scaled_disequilibrium = trade[3] if len(trade) > 3 else None - timestamp = trade[4] if len(trade) > 4 else None - - symbol_trades.append((action, price, disequilibrium, scaled_disequilibrium, timestamp)) - + symbol_trades = [trade for trade in trades if trade["symbol"] == symbol] + # Calculate returns for all trade combinations for i in range(len(symbol_trades) - 1): - trade1 = symbol_trades[i] - trade2 = symbol_trades[i + 1] - - action1, price1, diseq1, scaled_diseq1, ts1 = trade1 - action2, price2, diseq2, scaled_diseq2, ts2 = trade2 + trade1 = trades[i] + trade2 = trades[i + 1] # Calculate return based on action combination trade_return = 0 - if action1 == "BUY" and action2 == "SELL": + if trade1["side"] == "BUY" and trade2["side"] == "SELL": # Long position - trade_return = (price2 - price1) / price1 * 100 - elif action1 == "SELL" and action2 == "BUY": + trade_return = (trade2["price"] - trade1["price"]) / trade1["price"] * 100 + elif trade1["side"] == "SELL" and trade2["side"] == "BUY": # Short position - trade_return = (price1 - price2) / price1 * 100 + trade_return = (trade1["price"] - trade2["price"]) / trade1["price"] * 100 symbol_return += trade_return @@ -596,13 +589,13 @@ class BacktestResult: pair_trades.append( ( symbol, - action1, - price1, - action2, - price2, + trade1["side"], + trade1["price"], + trade2["side"], + trade2["price"], trade_return, - scaled_diseq1, - scaled_diseq2, + trade1["scaled_disequilibrium"], + trade2["scaled_disequilibrium"], i + 1, # Trade sequence number ) ) @@ -614,24 +607,28 @@ class BacktestResult: print(f" {pair}:") for ( symbol, - action1, - price1, - action2, - price2, + trade1["side"], + trade1["price"], + trade2["side"], + trade2["price"], trade_return, - scaled_diseq1, - scaled_diseq2, + trade1["scaled_disequilibrium"], + trade2["scaled_disequilibrium"], trade_num, ) in pair_trades: disequil_info = "" if ( - scaled_diseq1 is not None - and scaled_diseq2 is not None + trade1["scaled_disequilibrium"] is not None + and trade2["scaled_disequilibrium"] is not None ): - disequil_info = f" | Open Dis-eq: {scaled_diseq1:.2f}, Close Dis-eq: {scaled_diseq2:.2f}" + disequil_info = f" | Open Dis-eq: {trade1["scaled_disequilibrium"]:.2f}," + f" Close Dis-eq: {trade2["scaled_disequilibrium"]:.2f}" print( - f" {symbol} (Trade #{trade_num}): {action1} @ ${price1:.2f}, {action2} @ ${price2:.2f}, Return: {trade_return:.2f}%{disequil_info}" + f" {symbol} (Trade #{trade_num}):" + f" {trade1["side"]} @ ${trade1["price"]:.2f}," + f" {trade2["side"]} @ ${trade2["price"]:.2f}," + f" Return: {trade_return:.2f}%{disequil_info}" ) print(f" Pair Total Return: {pair_return:.2f}%") day_return += pair_return diff --git a/lib/pt_trading/rolling_window_fit.py b/lib/pt_trading/rolling_window_fit.py index 6b71332..93205fa 100644 --- a/lib/pt_trading/rolling_window_fit.py +++ b/lib/pt_trading/rolling_window_fit.py @@ -35,8 +35,9 @@ class RollingFit(PairsTradingFitMethod): # Initialize trades DataFrame with proper dtypes to avoid concatenation warnings pair.user_data_["trades"] = pd.DataFrame(columns=self.TRADES_COLUMNS).astype({ "time": "datetime64[ns]", - "action": "string", "symbol": "string", + "side": "string", + "action": "string", "price": "float64", "disequilibrium": "float64", "scaled_disequilibrium": "float64", @@ -136,7 +137,7 @@ class RollingFit(PairsTradingFitMethod): close_position_trades = self._get_close_trades( pair=pair, row=pred_row, - close_threshold=close_threshold, + close_threshold=close_threshold ) if close_position_trades is not None: close_position_trades["status"] = PairState.CLOSE_POSITION.name @@ -197,8 +198,9 @@ class RollingFit(PairsTradingFitMethod): trd_signal_tuples = [ ( open_tstamp, - open_side_a, pair.symbol_a_, + open_side_a, + "OPEN", open_px_a, open_disequilibrium, open_scaled_disequilibrium, @@ -206,8 +208,9 @@ class RollingFit(PairsTradingFitMethod): ), ( open_tstamp, - open_side_b, pair.symbol_b_, + open_side_b, + "OPEN", open_px_b, open_disequilibrium, open_scaled_disequilibrium, @@ -248,8 +251,9 @@ class RollingFit(PairsTradingFitMethod): trd_signal_tuples = [ ( close_tstamp, - close_side_a, pair.symbol_a_, + close_side_a, + "CLOSE", close_px_a, close_disequilibrium, close_scaled_disequilibrium, @@ -257,8 +261,9 @@ class RollingFit(PairsTradingFitMethod): ), ( close_tstamp, - close_side_b, pair.symbol_b_, + close_side_b, + "CLOSE", close_px_b, close_disequilibrium, close_scaled_disequilibrium, diff --git a/lib/pt_trading/vecm_rolling_fit.py b/lib/pt_trading/vecm_rolling_fit.py index d4cc64c..79d92af 100644 --- a/lib/pt_trading/vecm_rolling_fit.py +++ b/lib/pt_trading/vecm_rolling_fit.py @@ -94,10 +94,6 @@ class VECMRollingFit(RollingFit): def __init__(self) -> None: super().__init__() - def run_pair( - self, pair: TradingPair, bt_result: BacktestResult - ) -> Optional[pd.DataFrame]: - return super().run_pair(pair, bt_result) def create_trading_pair( self, config: Dict, market_data: pd.DataFrame, symbol_a: str, symbol_b: str, price_column: str ) -> TradingPair: diff --git a/lib/pt_trading/z-score_rolling_fit.py b/lib/pt_trading/z-score_rolling_fit.py index 3c68c1d..1f4726d 100644 --- a/lib/pt_trading/z-score_rolling_fit.py +++ b/lib/pt_trading/z-score_rolling_fit.py @@ -20,19 +20,18 @@ class ZScoreTradingPair(TradingPair): def _fit_zscore(self) -> None: assert self.training_df_ is not None - a = self.training_df_[self.colnames()].iloc[:, 0] - b = self.training_df_[self.colnames()].iloc[:, 1] + symbol_a_px_series = self.training_df_[self.colnames()].iloc[:, 0] + symbol_b_px_series = self.training_df_[self.colnames()].iloc[:, 1] - a,b = a.align(b, axis=0) + symbol_a_px_series,symbol_b_px_series = symbol_a_px_series.align(symbol_b_px_series, axis=0) - - X = sm.add_constant(b) - self.zscore_model_ = sm.OLS(a, X).fit() + X = sm.add_constant(symbol_b_px_series) + self.zscore_model_ = sm.OLS(symbol_a_px_series, X).fit() assert self.zscore_model_ is not None hedge_ratio = self.zscore_model_.params.iloc[1] # Calculate spread and Z-score - spread = a - hedge_ratio * b + spread = symbol_a_px_series - hedge_ratio * symbol_b_px_series self.zscore_df_ = (spread - spread.mean()) / spread.std() def predict(self) -> pd.DataFrame: @@ -55,17 +54,13 @@ class ZScoreTradingPair(TradingPair): self.pair_predict_result_ = pd.concat([self.pair_predict_result_, predicted_df], ignore_index=True) # Reset index to ensure proper indexing self.pair_predict_result_ = self.pair_predict_result_.reset_index(drop=True) - return self.pair_predict_result_ + return self.pair_predict_result_.dropna() class ZScoreRollingFit(RollingFit): def __init__(self) -> None: super().__init__() - def run_pair( - self, pair: TradingPair, bt_result: BacktestResult - ) -> Optional[pd.DataFrame]: - return super().run_pair(pair, bt_result) def create_trading_pair( self, config: Dict, market_data: pd.DataFrame, symbol_a: str, symbol_b: str, price_column: str ) -> TradingPair: diff --git a/research/pt_backtest.py b/research/pt_backtest.py index bd40924..b7cd69a 100644 --- a/research/pt_backtest.py +++ b/research/pt_backtest.py @@ -72,13 +72,17 @@ def run_backtest( bt_result: BacktestResult = BacktestResult(config=config) pairs_trades = [] - for pair in create_pairs(datafile=datafile, fit_method=fit_method, price_column=price_column, config=config, instruments=instruments): - single_pair_trades = fit_method.run_pair( - pair=pair, bt_result=bt_result - ) + for pair in create_pairs( + datafile=datafile, + fit_method=fit_method, + price_column=price_column, + config=config, + instruments=instruments, + ): + single_pair_trades = fit_method.run_pair(pair=pair, bt_result=bt_result) if single_pair_trades is not None and len(single_pair_trades) > 0: pairs_trades.append(single_pair_trades) - print(f"pairs_trades: {pairs_trades}") + print(f"pairs_trades:\n{pairs_trades}") # Check if result_list has any data before concatenating if len(pairs_trades) == 0: print("No trading signals found for any pairs")