diff --git a/src/backtest_configs.py b/src/backtest_configs.py new file mode 100644 index 0000000..704d6a4 --- /dev/null +++ b/src/backtest_configs.py @@ -0,0 +1,95 @@ +from typing import Any, Dict, List, Optional + + +# ------------------------ Configuration ------------------------ +# Default configuration +CRYPTO_CONFIG: Dict = { + "security_type": "CRYPTO", + # --- Data retrieval + "data_directory": "./data/crypto", + "datafiles": [ + # "20250519.mktdata.ohlcv.db", + # "20250520.mktdata.ohlcv.db", + # "20250521.mktdata.ohlcv.db", + # "20250522.mktdata.ohlcv.db", + # "20250523.mktdata.ohlcv.db", + # "20250524.mktdata.ohlcv.db", + "20250525.mktdata.ohlcv.db", + ], + "db_table_name": "bnbspot_ohlcv_1min", + # ----- Instruments + "exchange_id": "BNBSPOT", + "instrument_id_pfx": "PAIR-", + "instruments": [ + "BTC-USDT", + "BCH-USDT", + "ETH-USDT", + "LTC-USDT", + "XRP-USDT", + "ADA-USDT", + "SOL-USDT", + "DOT-USDT", + ], + "trading_hours": { + "begin_session": "00:00:00", + "end_session": "23:59:00", + "timezone": "UTC", + }, + # ----- Model Settings + "price_column": "close", + "min_required_points": 30, + "zero_threshold": 1e-10, + + "dis-equilibrium_open_trshld": 2.0, + "dis-equilibrium_close_trshld": 0.5, + + # "training_minutes": 120, + "training_minutes": 60, + # ----- Validation + "funding_per_pair": 2000.0, # USD +} + +# ========================== EQUITIES +EQT_CONFIG: Dict = { + # --- Data retrieval + "security_type": "EQUITY", + "data_directory": "./data/equity", + "datafiles": [ + # "20250508.alpaca_sim_md.db", + # "20250509.alpaca_sim_md.db", + "20250512.alpaca_sim_md.db", + # "20250513.alpaca_sim_md.db", + # "20250514.alpaca_sim_md.db", + # "20250515.alpaca_sim_md.db", + # "20250516.alpaca_sim_md.db", + # "20250519.alpaca_sim_md.db", + # "20250520.alpaca_sim_md.db" + ], + "db_table_name": "md_1min_bars", + # ----- Instruments + "exchange_id": "ALPACA", + "instrument_id_pfx": "STOCK-", + "instruments": [ + "COIN", + "GBTC", + "HOOD", + "MSTR", + "PYPL", + ], + "trading_hours": { + "begin_session": "9:30:00", + "end_session": "16:00:00", + "timezone": "America/New_York", + }, + # ----- Model Settings + "price_column": "close", + "min_required_points": 30, + "zero_threshold": 1e-10, + "dis-equilibrium_open_trshld": 2.0, + "dis-equilibrium_close_trshld": 0.5, + "training_minutes": 120, + # ----- Validation + "funding_per_pair": 2000.0, +} + + diff --git a/src/pt_backtest.py b/src/pt_backtest.py index c1167e7..a2257bc 100644 --- a/src/pt_backtest.py +++ b/src/pt_backtest.py @@ -8,6 +8,7 @@ import numpy as np # ============= statsmodels =================== from statsmodels.tsa.vector_ar.vecm import VECM +from backtest_configs import CRYPTO_CONFIG from tools.data_loader import load_market_data, transform_dataframe from tools.trading_pair import TradingPair from results import BacktestResult @@ -16,90 +17,11 @@ NanoPerMin = 1e9 UNSET_FLOAT: float = sys.float_info.max UNSET_INT: int = sys.maxsize -# ------------------------ Configuration ------------------------ -# Default configuration -CRYPTO_CONFIG: Dict = { - "security_type": "CRYPTO", - # --- Data retrieval - "data_directory": "./data/crypto", - "datafiles": [ - "20250519.mktdata.ohlcv.db", - # "20250519.mktdata.ohlcv.db", - ], - "db_table_name": "bnbspot_ohlcv_1min", - # ----- Instruments - "exchange_id": "BNBSPOT", - "instrument_id_pfx": "PAIR-", - "instruments": [ - "BTC-USDT", - # "ETH-USDT", - "LTC-USDT", - ], - "trading_hours": { - "begin_session": "00:00:00", - "end_session": "23:59:00", - "timezone": "UTC", - }, - # ----- Model Settings - "price_column": "close", - "min_required_points": 30, - "zero_threshold": 1e-10, - "disequilibrium_open_trshld": 2, - "disequilibrium_close_trshld": 0.5, +# # ========================================================================== - "training_minutes": 120, - # ----- Validation - "funding_per_pair": 2000.0, # USD -} -# ========================== EQUITIES -EQT_CONFIG: Dict = { - # --- Data retrieval - "security_type": "EQUITY", - "data_directory": "./data/equity", - "datafiles": [ - "20250508.alpaca_sim_md.db", - # "20250509.alpaca_sim_md.db", - # "20250512.alpaca_sim_md.db", - # "20250513.alpaca_sim_md.db", - # "20250514.alpaca_sim_md.db", - # "20250515.alpaca_sim_md.db", - # "20250516.alpaca_sim_md.db", - # "20250519.alpaca_sim_md.db", - # "20250520.alpaca_sim_md.db" - ], - "db_table_name": "md_1min_bars", - # ----- Instruments - "exchange_id": "ALPACA", - "instrument_id_pfx": "STOCK-", - "instruments": [ - "COIN", - "GBTC", - "HOOD", - "MSTR", - "PYPL", - ], - "trading_hours": { - "begin_session": "9:30:00", - "end_session": "16:00:00", - "timezone": "America/New_York", - }, - # ----- Model Settings - "price_column": "close", - "min_required_points": 30, - "zero_threshold": 1e-10, - "disequilibrium_open_trshld": 2.0, - "disequilibrium_close_trshld": 0.5, - "training_minutes": 120, - # ----- Validation - "funding_per_pair": 2000.0, -} - - -# ========================================================================== - -# CONFIG = CRYPTO_CONFIG -CONFIG = EQT_CONFIG +CONFIG = CRYPTO_CONFIG +# CONFIG = EQT_CONFIG BacktestResults = BacktestResult(config=CONFIG) @@ -146,8 +68,8 @@ def create_trading_signals(pair: TradingPair) -> pd.DataFrame: open_row_index = None initial_abs_term = None - open_threshold = CONFIG["disequilibrium_open_trshld"] - close_threshold = CONFIG["disequilibrium_close_trshld"] + open_threshold = CONFIG["dis-equilibrium_open_trshld"] + close_threshold = CONFIG["dis-equilibrium_close_trshld"] for row_idx in range(len(pair_result_df)): curr_disequilibrium = pair_result_df["scaled_disequilibrium"][row_idx] @@ -299,7 +221,10 @@ def run_single_pair( market_data=market_data, training_minutes=CONFIG["training_minutes"] ) try: - pair.train_pair() + is_cointegrated = pair.train_pair() + if not is_cointegrated: + print(f"{pair} IS NOT COINTEGRATED") + return None except Exception as e: print(f"{pair}: Training failed: {str(e)}") return None @@ -387,8 +312,6 @@ if __name__ == "__main__": # BacktestResults.print_results_summary(all_results) BacktestResults.calculate_returns(all_results) - # Print grand totals BacktestResults.print_grand_totals() - BacktestResults.print_outstanding_positions() diff --git a/src/results.py b/src/results.py index 7398eb7..5d55450 100644 --- a/src/results.py +++ b/src/results.py @@ -300,8 +300,8 @@ class BacktestResult: "current_abs_term": current_scaled_disequilibrium, "current_disequilibrium": current_disequilibrium, "current_scaled_disequilibrium": current_scaled_disequilibrium, - "closing_threshold": initial_abs_term / self.config["disequilibrium_close_trshld"], - "disequilibrium_ratio": current_scaled_disequilibrium / (initial_abs_term / self.config["disequilibrium_close_trshld"]), + "closing_threshold": initial_abs_term / self.config["dis-equilibrium_close_trshld"], + "disequilibrium_ratio": current_scaled_disequilibrium / (initial_abs_term / self.config["dis-equilibrium_close_trshld"]), } ) diff --git a/src/tools/data_loader.py b/src/tools/data_loader.py index da89693..b40bcb2 100644 --- a/src/tools/data_loader.py +++ b/src/tools/data_loader.py @@ -118,23 +118,8 @@ def transform_dataframe(df: pd.DataFrame, price_column: str): return result_df -# def get_datasets(df: pd.DataFrame, training_minutes: int, pair: TradingPair) -> Tuple[pd.DataFrame, pd.DataFrame]: -# # Training dataset -# colname_a, colname_b = pair.colnames() -# df = df[["tstamp", colname_a, colname_b]] -# df = df.dropna() -# training_df = df.iloc[:training_minutes - 1, :].copy() -# training_df.reset_index(drop=True).dropna().reset_index(drop=True) +# if __name__ == "__main__": +# df1 = load_sqlite_to_dataframe(sys.argv[1], table_name="md_1min_bars") -# # Testing dataset -# testing_df = df.iloc[training_minutes:, :].copy() -# testing_df.reset_index(drop=True).dropna().reset_index(drop=True) - -# return (training_df, testing_df) - - -if __name__ == "__main__": - df1 = load_sqlite_to_dataframe(sys.argv[1], table_name="md_1min_bars") - - print(df1) +# print(df1) diff --git a/src/tools/trading_pair.py b/src/tools/trading_pair.py index 80b3b7a..2c749e2 100644 --- a/src/tools/trading_pair.py +++ b/src/tools/trading_pair.py @@ -51,28 +51,38 @@ class TradingPair: print(f"{self}: VECM model failed to converge properly") self.vecm_fit_ = vecm_fit + # print(f"{self}: beta={self.vecm_fit_.beta} alpha={self.vecm_fit_.alpha}" ) + # print(f"{self}: {self.vecm_fit_.summary()}") + pass + def check_cointegration(self): + from statsmodels.tsa.vector_ar.vecm import coint_johansen + df = self.training_df_[self.colnames()].reset_index(drop=True) + result = coint_johansen(df, det_order=0, k_ar_diff=1) + # print(f"{self}: lr1={result.lr1[0]} cvt={result.cvt[0, 1]}.") + is_cointegrated = result.lr1[0] > result.cvt[0, 1] - def train_pair(self): + return is_cointegrated + + def train_pair(self) -> bool: + is_cointegrated = self.check_cointegration() + if not is_cointegrated: + return False + pass + + print(f"*****\n**************** {self} IS COINTEGRATED ****************\n*****") self.fit_VECM() diseq_series = self.training_df_[self.colnames()] @ self.vecm_fit_.beta self.training_mu_ = diseq_series.mean().iloc[0] self.training_std_ = diseq_series.std().iloc[0] - self.training_df_["disequilibrium"] = self.training_df_[self.colnames()] @ self.vecm_fit_.beta - # Normalize the disequilibrium - self.training_df_["scaled_disequilibrium"] = ( + self.training_df_["dis-equilibrium"] = self.training_df_[self.colnames()] @ self.vecm_fit_.beta + # Normalize the dis-equilibrium + self.training_df_["scaled_dis-equilibrium"] = ( diseq_series - self.training_mu_ ) / self.training_std_ - - # def mu(self) -> float: - # assert self.training_mu_ is not None - # return self.training_mu_ - - # def std(self) -> float: - # assert self.training_std_ is not None - # return self.training_std_ + return True def __repr__(self) ->str: return f"{self.symbol_a_} & {self.symbol_b_}"