This commit is contained in:
Oleg Sheynin 2025-05-29 02:47:38 -04:00
parent 06884d72b7
commit 91623db4b7
5 changed files with 132 additions and 119 deletions

95
src/backtest_configs.py Normal file
View File

@ -0,0 +1,95 @@
from typing import Any, Dict, List, Optional
# ------------------------ Configuration ------------------------
# Default configuration
CRYPTO_CONFIG: Dict = {
"security_type": "CRYPTO",
# --- Data retrieval
"data_directory": "./data/crypto",
"datafiles": [
# "20250519.mktdata.ohlcv.db",
# "20250520.mktdata.ohlcv.db",
# "20250521.mktdata.ohlcv.db",
# "20250522.mktdata.ohlcv.db",
# "20250523.mktdata.ohlcv.db",
# "20250524.mktdata.ohlcv.db",
"20250525.mktdata.ohlcv.db",
],
"db_table_name": "bnbspot_ohlcv_1min",
# ----- Instruments
"exchange_id": "BNBSPOT",
"instrument_id_pfx": "PAIR-",
"instruments": [
"BTC-USDT",
"BCH-USDT",
"ETH-USDT",
"LTC-USDT",
"XRP-USDT",
"ADA-USDT",
"SOL-USDT",
"DOT-USDT",
],
"trading_hours": {
"begin_session": "00:00:00",
"end_session": "23:59:00",
"timezone": "UTC",
},
# ----- Model Settings
"price_column": "close",
"min_required_points": 30,
"zero_threshold": 1e-10,
"dis-equilibrium_open_trshld": 2.0,
"dis-equilibrium_close_trshld": 0.5,
# "training_minutes": 120,
"training_minutes": 60,
# ----- Validation
"funding_per_pair": 2000.0, # USD
}
# ========================== EQUITIES
EQT_CONFIG: Dict = {
# --- Data retrieval
"security_type": "EQUITY",
"data_directory": "./data/equity",
"datafiles": [
# "20250508.alpaca_sim_md.db",
# "20250509.alpaca_sim_md.db",
"20250512.alpaca_sim_md.db",
# "20250513.alpaca_sim_md.db",
# "20250514.alpaca_sim_md.db",
# "20250515.alpaca_sim_md.db",
# "20250516.alpaca_sim_md.db",
# "20250519.alpaca_sim_md.db",
# "20250520.alpaca_sim_md.db"
],
"db_table_name": "md_1min_bars",
# ----- Instruments
"exchange_id": "ALPACA",
"instrument_id_pfx": "STOCK-",
"instruments": [
"COIN",
"GBTC",
"HOOD",
"MSTR",
"PYPL",
],
"trading_hours": {
"begin_session": "9:30:00",
"end_session": "16:00:00",
"timezone": "America/New_York",
},
# ----- Model Settings
"price_column": "close",
"min_required_points": 30,
"zero_threshold": 1e-10,
"dis-equilibrium_open_trshld": 2.0,
"dis-equilibrium_close_trshld": 0.5,
"training_minutes": 120,
# ----- Validation
"funding_per_pair": 2000.0,
}

View File

@ -8,6 +8,7 @@ import numpy as np
# ============= statsmodels =================== # ============= statsmodels ===================
from statsmodels.tsa.vector_ar.vecm import VECM from statsmodels.tsa.vector_ar.vecm import VECM
from backtest_configs import CRYPTO_CONFIG
from tools.data_loader import load_market_data, transform_dataframe from tools.data_loader import load_market_data, transform_dataframe
from tools.trading_pair import TradingPair from tools.trading_pair import TradingPair
from results import BacktestResult from results import BacktestResult
@ -16,90 +17,11 @@ NanoPerMin = 1e9
UNSET_FLOAT: float = sys.float_info.max UNSET_FLOAT: float = sys.float_info.max
UNSET_INT: int = sys.maxsize UNSET_INT: int = sys.maxsize
# ------------------------ Configuration ------------------------
# Default configuration
CRYPTO_CONFIG: Dict = {
"security_type": "CRYPTO",
# --- Data retrieval
"data_directory": "./data/crypto",
"datafiles": [
"20250519.mktdata.ohlcv.db",
# "20250519.mktdata.ohlcv.db",
],
"db_table_name": "bnbspot_ohlcv_1min",
# ----- Instruments
"exchange_id": "BNBSPOT",
"instrument_id_pfx": "PAIR-",
"instruments": [
"BTC-USDT",
# "ETH-USDT",
"LTC-USDT",
],
"trading_hours": {
"begin_session": "00:00:00",
"end_session": "23:59:00",
"timezone": "UTC",
},
# ----- Model Settings
"price_column": "close",
"min_required_points": 30,
"zero_threshold": 1e-10,
"disequilibrium_open_trshld": 2, # # ==========================================================================
"disequilibrium_close_trshld": 0.5,
"training_minutes": 120, CONFIG = CRYPTO_CONFIG
# ----- Validation # CONFIG = EQT_CONFIG
"funding_per_pair": 2000.0, # USD
}
# ========================== EQUITIES
EQT_CONFIG: Dict = {
# --- Data retrieval
"security_type": "EQUITY",
"data_directory": "./data/equity",
"datafiles": [
"20250508.alpaca_sim_md.db",
# "20250509.alpaca_sim_md.db",
# "20250512.alpaca_sim_md.db",
# "20250513.alpaca_sim_md.db",
# "20250514.alpaca_sim_md.db",
# "20250515.alpaca_sim_md.db",
# "20250516.alpaca_sim_md.db",
# "20250519.alpaca_sim_md.db",
# "20250520.alpaca_sim_md.db"
],
"db_table_name": "md_1min_bars",
# ----- Instruments
"exchange_id": "ALPACA",
"instrument_id_pfx": "STOCK-",
"instruments": [
"COIN",
"GBTC",
"HOOD",
"MSTR",
"PYPL",
],
"trading_hours": {
"begin_session": "9:30:00",
"end_session": "16:00:00",
"timezone": "America/New_York",
},
# ----- Model Settings
"price_column": "close",
"min_required_points": 30,
"zero_threshold": 1e-10,
"disequilibrium_open_trshld": 2.0,
"disequilibrium_close_trshld": 0.5,
"training_minutes": 120,
# ----- Validation
"funding_per_pair": 2000.0,
}
# ==========================================================================
# CONFIG = CRYPTO_CONFIG
CONFIG = EQT_CONFIG
BacktestResults = BacktestResult(config=CONFIG) BacktestResults = BacktestResult(config=CONFIG)
@ -146,8 +68,8 @@ def create_trading_signals(pair: TradingPair) -> pd.DataFrame:
open_row_index = None open_row_index = None
initial_abs_term = None initial_abs_term = None
open_threshold = CONFIG["disequilibrium_open_trshld"] open_threshold = CONFIG["dis-equilibrium_open_trshld"]
close_threshold = CONFIG["disequilibrium_close_trshld"] close_threshold = CONFIG["dis-equilibrium_close_trshld"]
for row_idx in range(len(pair_result_df)): for row_idx in range(len(pair_result_df)):
curr_disequilibrium = pair_result_df["scaled_disequilibrium"][row_idx] curr_disequilibrium = pair_result_df["scaled_disequilibrium"][row_idx]
@ -299,7 +221,10 @@ def run_single_pair(
market_data=market_data, training_minutes=CONFIG["training_minutes"] market_data=market_data, training_minutes=CONFIG["training_minutes"]
) )
try: try:
pair.train_pair() is_cointegrated = pair.train_pair()
if not is_cointegrated:
print(f"{pair} IS NOT COINTEGRATED")
return None
except Exception as e: except Exception as e:
print(f"{pair}: Training failed: {str(e)}") print(f"{pair}: Training failed: {str(e)}")
return None return None
@ -387,8 +312,6 @@ if __name__ == "__main__":
# BacktestResults.print_results_summary(all_results) # BacktestResults.print_results_summary(all_results)
BacktestResults.calculate_returns(all_results) BacktestResults.calculate_returns(all_results)
# Print grand totals # Print grand totals
BacktestResults.print_grand_totals() BacktestResults.print_grand_totals()
BacktestResults.print_outstanding_positions() BacktestResults.print_outstanding_positions()

View File

@ -300,8 +300,8 @@ class BacktestResult:
"current_abs_term": current_scaled_disequilibrium, "current_abs_term": current_scaled_disequilibrium,
"current_disequilibrium": current_disequilibrium, "current_disequilibrium": current_disequilibrium,
"current_scaled_disequilibrium": current_scaled_disequilibrium, "current_scaled_disequilibrium": current_scaled_disequilibrium,
"closing_threshold": initial_abs_term / self.config["disequilibrium_close_trshld"], "closing_threshold": initial_abs_term / self.config["dis-equilibrium_close_trshld"],
"disequilibrium_ratio": current_scaled_disequilibrium / (initial_abs_term / self.config["disequilibrium_close_trshld"]), "disequilibrium_ratio": current_scaled_disequilibrium / (initial_abs_term / self.config["dis-equilibrium_close_trshld"]),
} }
) )

View File

@ -118,23 +118,8 @@ def transform_dataframe(df: pd.DataFrame, price_column: str):
return result_df return result_df
# def get_datasets(df: pd.DataFrame, training_minutes: int, pair: TradingPair) -> Tuple[pd.DataFrame, pd.DataFrame]:
# # Training dataset
# colname_a, colname_b = pair.colnames()
# df = df[["tstamp", colname_a, colname_b]]
# df = df.dropna()
# training_df = df.iloc[:training_minutes - 1, :].copy() # if __name__ == "__main__":
# training_df.reset_index(drop=True).dropna().reset_index(drop=True) # df1 = load_sqlite_to_dataframe(sys.argv[1], table_name="md_1min_bars")
# # Testing dataset # print(df1)
# testing_df = df.iloc[training_minutes:, :].copy()
# testing_df.reset_index(drop=True).dropna().reset_index(drop=True)
# return (training_df, testing_df)
if __name__ == "__main__":
df1 = load_sqlite_to_dataframe(sys.argv[1], table_name="md_1min_bars")
print(df1)

View File

@ -51,28 +51,38 @@ class TradingPair:
print(f"{self}: VECM model failed to converge properly") print(f"{self}: VECM model failed to converge properly")
self.vecm_fit_ = vecm_fit self.vecm_fit_ = vecm_fit
# print(f"{self}: beta={self.vecm_fit_.beta} alpha={self.vecm_fit_.alpha}" )
# print(f"{self}: {self.vecm_fit_.summary()}")
pass
def check_cointegration(self):
from statsmodels.tsa.vector_ar.vecm import coint_johansen
df = self.training_df_[self.colnames()].reset_index(drop=True)
result = coint_johansen(df, det_order=0, k_ar_diff=1)
# print(f"{self}: lr1={result.lr1[0]} cvt={result.cvt[0, 1]}.")
is_cointegrated = result.lr1[0] > result.cvt[0, 1]
def train_pair(self): return is_cointegrated
def train_pair(self) -> bool:
is_cointegrated = self.check_cointegration()
if not is_cointegrated:
return False
pass
print(f"*****\n**************** {self} IS COINTEGRATED ****************\n*****")
self.fit_VECM() self.fit_VECM()
diseq_series = self.training_df_[self.colnames()] @ self.vecm_fit_.beta diseq_series = self.training_df_[self.colnames()] @ self.vecm_fit_.beta
self.training_mu_ = diseq_series.mean().iloc[0] self.training_mu_ = diseq_series.mean().iloc[0]
self.training_std_ = diseq_series.std().iloc[0] self.training_std_ = diseq_series.std().iloc[0]
self.training_df_["disequilibrium"] = self.training_df_[self.colnames()] @ self.vecm_fit_.beta self.training_df_["dis-equilibrium"] = self.training_df_[self.colnames()] @ self.vecm_fit_.beta
# Normalize the disequilibrium # Normalize the dis-equilibrium
self.training_df_["scaled_disequilibrium"] = ( self.training_df_["scaled_dis-equilibrium"] = (
diseq_series - self.training_mu_ diseq_series - self.training_mu_
) / self.training_std_ ) / self.training_std_
return True
# def mu(self) -> float:
# assert self.training_mu_ is not None
# return self.training_mu_
# def std(self) -> float:
# assert self.training_std_ is not None
# return self.training_std_
def __repr__(self) ->str: def __repr__(self) ->str:
return f"{self.symbol_a_} & {self.symbol_b_}" return f"{self.symbol_a_} & {self.symbol_b_}"