added window size optimization classes

This commit is contained in:
Oleg Sheynin 2025-07-31 18:53:42 +00:00
parent 0af334bdf9
commit 8e6ac39674
8 changed files with 283 additions and 62 deletions

48
configuration/ols-opt.cfg Normal file
View File

@ -0,0 +1,48 @@
{
"market_data_loading": {
"CRYPTO": {
"data_directory": "./data/crypto",
"db_table_name": "md_1min_bars",
"instrument_id_pfx": "PAIR-",
},
"EQUITY": {
"data_directory": "./data/equity",
"db_table_name": "md_1min_bars",
"instrument_id_pfx": "STOCK-",
}
},
# ====== Funding ======
"funding_per_pair": 2000.0,
# ====== Trading Parameters ======
"stat_model_price": "close",
"execution_price": {
"column": "vwap",
"shift": 1,
},
"dis-equilibrium_open_trshld": 1.75,
"dis-equilibrium_close_trshld": 0.9,
"model_class": "pt_strategy.models.OLSModel",
# "training_size": 120,
# "model_data_policy_class": "pt_strategy.model_data_policy.RollingWindowDataPolicy",
# "model_data_policy_class": "pt_strategy.model_data_policy.ADFOptimizedWndDataPolicy",
"model_data_policy_class": "pt_strategy.model_data_policy.JohansenOptdWndDataPolicy",
"min_training_size": 60,
"max_training_size": 150,
# ====== Stop Conditions ======
"stop_close_conditions": {
"profit": 2.0,
"loss": -0.5
}
# ====== End of Session Closeout ======
"close_outstanding_positions": true,
# "close_outstanding_positions": false,
"trading_hours": {
"timezone": "America/New_York",
"begin_session": "7:30:00",
"end_session": "18:30:00",
}
}

View File

@ -20,11 +20,15 @@
"column": "vwap",
"shift": 1,
},
"dis-equilibrium_open_trshld": 2.0,
"dis-equilibrium_close_trshld": 0.5,
"training_size": 120,
"dis-equilibrium_open_trshld": 1.75,
"dis-equilibrium_close_trshld": 0.9,
"model_class": "pt_strategy.models.OLSModel",
"training_size": 120,
"model_data_policy_class": "pt_strategy.model_data_policy.RollingWindowDataPolicy",
# "model_data_policy_class": "pt_strategy.model_data_policy.OptimizedWindowDataPolicy",
# "min_training_size": 60,
# "max_training_size": 150,
# ====== Stop Conditions ======
"stop_close_conditions": {

View File

@ -21,11 +21,16 @@
"column": "vwap",
"shift": 1,
},
"dis-equilibrium_open_trshld": 2.0,
"dis-equilibrium_open_trshld": 1.75,
"dis-equilibrium_close_trshld": 1.0,
"training_size": 120,
"model_class": "pt_strategy.models.VECMModel",
"model_data_policy_class": "pt_strategy.model_data_policy.ExpandingWindowDataPolicy",
# "training_size": 120,
# "model_data_policy_class": "pt_strategy.model_data_policy.RollingWindowDataPolicy",
"model_data_policy_class": "pt_strategy.model_data_policy.ADFOptimizedWndDataPolicy",
"min_training_size": 60,
"max_training_size": 150,
# ====== Stop Conditions ======
"stop_close_conditions": {

View File

@ -21,11 +21,15 @@
"column": "vwap",
"shift": 1,
},
"dis-equilibrium_open_trshld": 2.0,
"dis-equilibrium_open_trshld": 1.75,
"dis-equilibrium_close_trshld": 1.0,
"training_size": 120,
"model_class": "pt_strategy.models.VECMModel",
"training_size": 120,
"model_data_policy_class": "pt_strategy.model_data_policy.RollingWindowDataPolicy",
# "model_data_policy_class": "pt_strategy.model_data_policy.OptimizedWindowDataPolicy",
# "min_training_size": 60,
# "max_training_size": 150,
# ====== Stop Conditions ======
"stop_close_conditions": {

View File

@ -1,64 +1,235 @@
from __future__ import annotations
import copy
from abc import ABC, abstractmethod
from dataclasses import dataclass
from enum import Enum
from typing import Any, Dict, Optional, cast, Generator, List
from typing import Any, Dict, cast
import numpy as np
import pandas as pd
@dataclass
class DataParams:
class DataWindowParams:
training_size: int
training_start_index: int
class ModelDataPolicy(ABC):
config_: Dict[str, Any]
current_data_params_: DataParams
current_data_params_: DataWindowParams
count_: int
def __init__(self, config: Dict[str, Any]):
self.config_ = config
self.current_data_params_ = DataParams(
self.current_data_params_ = DataWindowParams(
training_size=config.get("training_size", 120),
training_start_index=0,
)
self.count_ = 0
@abstractmethod
def advance(self) -> DataParams:
def advance(self) -> DataWindowParams:
self.count_ += 1
print(self.count_, end='\r')
print(self.count_, end="\r")
return self.current_data_params_
@staticmethod
def create(config: Dict[str, Any]) -> ModelDataPolicy:
def create(config: Dict[str, Any], *args: Any, **kwargs: Any) -> ModelDataPolicy:
import importlib
model_data_policy_class_name = config.get("model_data_policy_class", None)
assert model_data_policy_class_name is not None
module_name, class_name = model_data_policy_class_name.rsplit(".", 1)
module = importlib.import_module(module_name)
model_training_data_policy_object = getattr(module, class_name)(config=config)
model_training_data_policy_object = getattr(module, class_name)(
config=config, *args, **kwargs
)
return cast(ModelDataPolicy, model_training_data_policy_object)
class RollingWindowDataPolicy(ModelDataPolicy):
def __init__(self, config: Dict[str, Any]):
def __init__(self, config: Dict[str, Any], *args: Any, **kwargs: Any):
super().__init__(config)
self.count_ = 1
def advance(self) -> DataParams:
def advance(self) -> DataWindowParams:
super().advance()
self.current_data_params_.training_start_index += 1
return self.current_data_params_
class ExpandingWindowDataPolicy(ModelDataPolicy):
def __init__(self, config: Dict[str, Any]):
def __init__(self, config: Dict[str, Any], *args: Any, **kwargs: Any):
super().__init__(config)
def advance(self) -> DataParams:
def advance(self) -> DataWindowParams:
super().advance()
self.current_data_params_.training_size += 1
return self.current_data_params_
class OptimizedWndDataPolicy(ModelDataPolicy, ABC):
mkt_data_df_: pd.DataFrame
pair_: TradingPair # type: ignore
min_training_size_: int
max_training_size_: int
end_index_: int
prices_a_: np.ndarray
prices_b_: np.ndarray
def __init__(self, config: Dict[str, Any], *args: Any, **kwargs: Any):
super().__init__(config)
assert (
kwargs.get("mkt_data") is not None and kwargs.get("pair") is not None
), "mkt_data and/or pair must be provided"
assert (
"min_training_size" in config and "max_training_size" in config
), "min_training_size and max_training_size must be provided"
self.min_training_size_ = cast(int, config.get("min_training_size"))
self.max_training_size_ = cast(int, config.get("max_training_size"))
assert self.min_training_size_ < self.max_training_size_
from pt_strategy.trading_pair import TradingPair
self.mkt_data_df_ = cast(pd.DataFrame, kwargs.get("mkt_data"))
self.pair_ = cast(TradingPair, kwargs.get("pair"))
self.end_index_ = (
self.current_data_params_.training_start_index + self.max_training_size_
)
col_a, col_b = self.pair_.colnames()
self.prices_a_ = np.array(self.mkt_data_df_[col_a])
self.prices_b_ = np.array(self.mkt_data_df_[col_b])
def advance(self) -> DataWindowParams:
super().advance()
self.current_data_params_ = self.optimize_window_size()
self.end_index_ += 1
return self.current_data_params_
@abstractmethod
def optimize_window_size(self) -> DataWindowParams:
...
class EGOptimizedWndDataPolicy(OptimizedWndDataPolicy):
'''
# Engle-Granger cointegration test
*** VERY SLOW ***
'''
def __init__(self, config: Dict[str, Any], *args: Any, **kwargs: Any):
super().__init__(config, *args, **kwargs)
def optimize_window_size(self) -> DataWindowParams:
# Run Engle-Granger cointegration test
last_pvalue = 1.0
result = copy.copy(self.current_data_params_)
for trn_size in range(self.min_training_size_, self.max_training_size_):
from statsmodels.tsa.stattools import coint # type: ignore
start_index = self.end_index_ - trn_size
series_a = self.prices_a_[start_index : self.end_index_]
series_b = self.prices_b_[start_index : self.end_index_]
eg_pvalue = float(coint(series_a, series_b)[1])
if eg_pvalue < last_pvalue:
last_pvalue = eg_pvalue
result.training_size = trn_size
result.training_start_index = start_index
# print(
# f"*** DEBUG *** end_index={self.end_index_}, best_trn_size={self.current_data_params_.training_size}, {last_pvalue=}"
# )
return result
class ADFOptimizedWndDataPolicy(OptimizedWndDataPolicy):
# Augmented Dickey-Fuller test
def __init__(self, config: Dict[str, Any], *args: Any, **kwargs: Any):
super().__init__(config, *args, **kwargs)
def optimize_window_size(self) -> DataWindowParams:
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.tools import add_constant
from statsmodels.tsa.stattools import adfuller
last_pvalue = 1.0
result = copy.copy(self.current_data_params_)
for trn_size in range(self.min_training_size_, self.max_training_size_):
start_index = self.end_index_ - trn_size
y = self.prices_a_[start_index : self.end_index_]
x = self.prices_b_[start_index : self.end_index_]
# Add constant to x for intercept
x_with_const = add_constant(x)
# OLS regression: y = a + b*x + e
model = OLS(y, x_with_const).fit()
residuals = y - model.predict(x_with_const)
# ADF test on residuals
try:
adf_result = adfuller(residuals, maxlag=1, regression="c")
adf_pvalue = float(adf_result[1])
except Exception as e:
# Handle edge cases with exception (e.g., constant series, etc.)
adf_pvalue = 1.0
if adf_pvalue < last_pvalue:
last_pvalue = adf_pvalue
result.training_size = trn_size
result.training_start_index = start_index
# print(
# f"*** DEBUG *** end_index={self.end_index_},"
# f" best_trn_size={self.current_data_params_.training_size},"
# f" {last_pvalue=}"
# )
return result
class JohansenOptdWndDataPolicy(OptimizedWndDataPolicy):
# Johansen test
def __init__(self, config: Dict[str, Any], *args: Any, **kwargs: Any):
super().__init__(config, *args, **kwargs)
def optimize_window_size(self) -> DataWindowParams:
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import numpy as np
best_stat = -np.inf
best_trn_size = 0
best_start_index = -1
result = copy.copy(self.current_data_params_)
for trn_size in range(self.min_training_size_, self.max_training_size_):
start_index = self.end_index_ - trn_size
series_a = self.prices_a_[start_index:self.end_index_]
series_b = self.prices_b_[start_index:self.end_index_]
# Combine into 2D matrix for Johansen test
try:
data = np.column_stack([series_a, series_b])
# Johansen test: det_order=0 (no deterministic trend), k_ar_diff=1 (lag)
res = coint_johansen(data, det_order=0, k_ar_diff=1)
# Trace statistic for cointegration rank 1
trace_stat = res.lr1[0] # test stat for rank=0 vs >=1
critical_value = res.cvt[0, 1] # 5% critical value
if trace_stat > best_stat:
best_stat = trace_stat
best_trn_size = trn_size
best_start_index = start_index
except Exception:
continue
if best_trn_size > 0:
result.training_size = best_trn_size
result.training_start_index = best_start_index
else:
print("*** WARNING: No valid cointegration window found.")
# print(
# f"*** DEBUG *** end_index={self.end_index_}, best_trn_size={best_trn_size}, trace_stat={best_stat}"
# )
return result

View File

@ -439,9 +439,12 @@ class PairResearchResult:
summary = self.get_return_summary()
print(f"\n====== PAIR RESEARCH GRAND TOTALS ======")
print('---')
print(f"Total Return: {summary['total_return']:+.2f}%")
print('---')
print(f"Total Days Traded: {summary['total_days']}")
print(f"Total Pair Trades: {summary['total_pairs']}")
print(f"Total Open-Close Actions: {summary['total_pairs']}")
print(f"Total Trades: 4 * {summary['total_pairs']} = {4 * summary['total_pairs']}")
if summary['total_days'] > 0:
print(f"Average Daily Return: {summary['average_daily_return']:+.2f}%")
@ -469,8 +472,8 @@ class PairResearchResult:
self.calculate_returns()
self.print_returns_by_day()
self.print_outstanding_positions()
self.print_grand_totals()
self._print_additional_metrics()
self.print_grand_totals()
def _print_additional_metrics(self) -> None:
"""Print additional performance metrics."""

View File

@ -3,11 +3,10 @@ from __future__ import annotations
from abc import ABC, abstractmethod
from datetime import datetime
from enum import Enum
from typing import Any, Dict, Optional, Type, cast, Generator, List
from typing import Any, Dict, Generator, List, Optional, Type, cast
import pandas as pd
from pt_strategy.model_data_policy import DataParams
from pt_strategy.model_data_policy import DataWindowParams
class PairState(Enum):
@ -26,7 +25,6 @@ class TradingPair:
stat_model_price_: str
model_: PairsTradingModel # type: ignore[assignment]
model_tdp_: ModelDataPolicy # type: ignore[assignment]
user_data_: Dict[str, Any]
@ -44,7 +42,6 @@ class TradingPair:
self.symbol_a_ = instruments[0]["symbol"]
self.symbol_b_ = instruments[1]["symbol"]
self.model_ = PairsTradingModel.create(config)
self.model_tdp_ = ModelDataPolicy.create(config)
self.stat_model_price_ = config["stat_model_price"]
self.user_data_ = {
"state": PairState.INITIAL,
@ -159,12 +156,9 @@ class TradingPair:
})
def run(self, market_data: pd.DataFrame, data_params: DataParams) -> Prediction: # type: ignore[assignment]
def run(self, market_data: pd.DataFrame, data_params: DataWindowParams) -> Prediction: # type: ignore[assignment]
self.market_data_ = market_data[data_params.training_start_index:data_params.training_start_index + data_params.training_size]
return self.model_.predict(pair=self)
while self.model_tdp_.has_next_training_data():
training_data = self.model_tdp_.get_next_training_data()

View File

@ -1,22 +1,12 @@
from __future__ import annotations
import os
from abc import ABC, abstractmethod
from enum import Enum
from typing import Any, Dict, Generator, List, Optional, Type, cast
from typing import Any, Dict, List, Optional
import pandas as pd
from pt_strategy.model_data_policy import ModelDataPolicy
from pt_strategy.pt_market_data import PtMarketData
from pt_strategy.pt_model import Prediction
from pt_strategy.results import (
PairResearchResult,
create_result_database,
store_config_in_database,
)
from pt_strategy.trading_pair import PairState, TradingPair
from tools.filetools import resolve_datafiles
from tools.instruments import get_instruments
class PtResearchStrategy:
@ -41,7 +31,6 @@ class PtResearchStrategy:
self.config_ = config
self.trades_ = []
self.trading_pair_ = TradingPair(config=config, instruments=instruments)
self.model_data_policy_ = ModelDataPolicy.create(config)
self.predictions_ = pd.DataFrame()
import copy
@ -54,6 +43,9 @@ class PtResearchStrategy:
config=config_copy, md_class=ResearchMarketData
)
self.pt_mkt_data_.load()
self.model_data_policy_ = ModelDataPolicy.create(
config, mkt_data=self.pt_mkt_data_.market_data_df_, pair=self.trading_pair_
)
def outstanding_positions(self) -> List[Dict[str, Any]]:
return list(self.trading_pair_.user_data_.get("outstanding_positions", []))
@ -67,9 +59,7 @@ class PtResearchStrategy:
while self.pt_mkt_data_.has_next():
market_data_series = self.pt_mkt_data_.get_next()
new_row = pd.DataFrame([market_data_series])
market_data_df = pd.concat(
[market_data_df, new_row], ignore_index=True
)
market_data_df = pd.concat([market_data_df, new_row], ignore_index=True)
if idx >= training_minutes:
break
idx += 1
@ -85,7 +75,9 @@ class PtResearchStrategy:
prediction = self.trading_pair_.run(
market_data_df, self.model_data_policy_.advance()
)
self.predictions_ = pd.concat([self.predictions_, prediction.to_df()], ignore_index=True)
self.predictions_ = pd.concat(
[self.predictions_, prediction.to_df()], ignore_index=True
)
assert prediction is not None
trades = self._create_trades(