Compare commits

..

No commits in common. "c2f701e3a2c0cd123bc1415d8a4bea89c7024cff" and "98a15d301a4bd843d49296e58f37a76109efce03" have entirely different histories.

15 changed files with 554 additions and 1383 deletions

View File

@ -43,7 +43,7 @@ Each configuration dictionary specifies:
- `db_table_name`: The name of the table within the SQLite database.
- `instruments`: A list of symbols to consider for forming trading pairs.
- `trading_hours`: Defines the session start and end times, crucial for equity markets.
- `stat_model_price`: The column in the data to be used as the price (e.g., "close").
- `price_column`: The column in the data to be used as the price (e.g., "close").
- `dis-equilibrium_open_trshld`: The threshold (in standard deviations) of the dis-equilibrium for opening a trade.
- `dis-equilibrium_close_trshld`: The threshold (in standard deviations) of the dis-equilibrium for closing an open trade.
- `training_minutes`: The length of the rolling window (in minutes) used to train the model (e.g., calculate cointegration, mean, and standard deviation of the dis-equilibrium).

View File

@ -14,11 +14,8 @@
# ====== Funding ======
"funding_per_pair": 2000.0,
# ====== Trading Parameters ======
# "stat_model_price": "close",
"stat_model_price": "vwap",
"price_column": "close",
"execution_price": {
"column": "vwap",
"shift": 1,

View File

@ -15,7 +15,7 @@
# ====== Funding ======
"funding_per_pair": 2000.0,
# ====== Trading Parameters ======
"stat_model_price": "close",
"price_column": "close",
"execution_price": {
"column": "vwap",
"shift": 1,

View File

@ -43,10 +43,6 @@ class PairsTradingFitMethod(ABC):
@abstractmethod
def create_trading_pair(
self,
config: Dict,
market_data: pd.DataFrame,
symbol_a: str,
symbol_b: str,
self, config: Dict, market_data: pd.DataFrame, symbol_a: str, symbol_b: str, price_column: str
) -> TradingPair: ...

View File

@ -431,7 +431,7 @@ class BacktestResult:
f" Close Dis-eq: {trd['open_scaled_disequilibrium']:.2f}"
print(
f" {trd['open_time'].time()}-{trd['close_time'].time()} {trd['symbol']}: "
f" {trd['open_time'].time()} {trd['symbol']}: "
f" {trd['open_side']} @ ${trd['open_price']:.2f},"
f" {trd["close_side"]} @ ${trd["close_price"]:.2f},"
f" Return: {trd['symbol_return']:.2f}%{disequil_info}"
@ -552,7 +552,7 @@ class BacktestResult:
last_row = pair_result_df.loc[last_row_index]
last_tstamp = last_row["tstamp"]
colname_a, colname_b = pair.exec_prices_colnames()
colname_a, colname_b = pair.colnames()
last_px_a = last_row[colname_a]
last_px_b = last_row[colname_b]

View File

@ -146,13 +146,8 @@ class RollingFit(PairsTradingFitMethod):
print(f"{pair}: *** Position is NOT CLOSED. ***")
# outstanding positions
if config["close_outstanding_positions"]:
close_position_row = pd.Series(pair.market_data_.iloc[-2])
close_position_row["disequilibrium"] = 0.0
close_position_row["scaled_disequilibrium"] = 0.0
close_position_row["signed_scaled_disequilibrium"] = 0.0
close_position_trades = self._get_close_trades(
pair=pair, row=close_position_row, close_threshold=close_threshold
pair=pair, row=pred_row, close_threshold=close_threshold
)
if close_position_trades is not None:
close_position_trades["status"] = PairState.CLOSE_POSITION.name
@ -176,10 +171,9 @@ class RollingFit(PairsTradingFitMethod):
def _get_open_trades(
self, pair: TradingPair, row: pd.Series, open_threshold: float
) -> Optional[pd.DataFrame]:
colname_a, colname_b = pair.exec_prices_colnames()
colname_a, colname_b = pair.colnames()
open_row = row
open_tstamp = open_row["tstamp"]
open_disequilibrium = open_row["disequilibrium"]
open_scaled_disequilibrium = open_row["scaled_disequilibrium"]
@ -258,7 +252,7 @@ class RollingFit(PairsTradingFitMethod):
def _get_close_trades(
self, pair: TradingPair, row: pd.Series, close_threshold: float
) -> Optional[pd.DataFrame]:
colname_a, colname_b = pair.exec_prices_colnames()
colname_a, colname_b = pair.colnames()
close_row = row
close_tstamp = close_row["tstamp"]

View File

@ -73,7 +73,7 @@ class TradingPair(ABC):
market_data_: pd.DataFrame
symbol_a_: str
symbol_b_: str
stat_model_price_: str
price_column_: str
training_mu_: float
training_std_: float
@ -91,17 +91,17 @@ class TradingPair(ABC):
market_data: pd.DataFrame,
symbol_a: str,
symbol_b: str,
price_column: str,
):
self.symbol_a_ = symbol_a
self.symbol_b_ = symbol_b
self.stat_model_price_ = config["stat_model_price"]
self.price_column_ = price_column
self.set_market_data(market_data)
self.user_data_ = {}
self.predicted_df_ = None
self.config_ = config
self._set_market_data(market_data)
def _set_market_data(self, market_data: pd.DataFrame) -> None:
def set_market_data(self, market_data: pd.DataFrame) -> None:
self.market_data_ = pd.DataFrame(
self._transform_dataframe(market_data)[["tstamp"] + self.colnames()]
)
@ -109,22 +109,6 @@ class TradingPair(ABC):
self.market_data_ = self.market_data_.dropna().reset_index(drop=True)
self.market_data_["tstamp"] = pd.to_datetime(self.market_data_["tstamp"])
self.market_data_ = self.market_data_.sort_values("tstamp")
self._set_execution_price_data()
pass
def _set_execution_price_data(self) -> None:
if "execution_price" not in self.config_:
self.market_data_[f"exec_price_{self.symbol_a_}"] = self.market_data_[f"{self.stat_model_price_}_{self.symbol_a_}"]
self.market_data_[f"exec_price_{self.symbol_b_}"] = self.market_data_[f"{self.stat_model_price_}_{self.symbol_b_}"]
return
execution_price_column = self.config_["execution_price"]["column"]
execution_price_shift = self.config_["execution_price"]["shift"]
self.market_data_[f"exec_price_{self.symbol_a_}"] = self.market_data_[f"{self.stat_model_price_}_{self.symbol_a_}"].shift(-execution_price_shift)
self.market_data_[f"exec_price_{self.symbol_b_}"] = self.market_data_[f"{self.stat_model_price_}_{self.symbol_b_}"].shift(-execution_price_shift)
self.market_data_ = self.market_data_.dropna().reset_index(drop=True)
def get_begin_index(self) -> int:
if "trading_hours" not in self.config_:
@ -155,7 +139,7 @@ class TradingPair(ABC):
def _transform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
# Select only the columns we need
df_selected: pd.DataFrame = pd.DataFrame(
df[["tstamp", "symbol", self.stat_model_price_]]
df[["tstamp", "symbol", self.price_column_]]
)
# Start with unique timestamps
@ -173,13 +157,13 @@ class TradingPair(ABC):
)
# Create column name like "close-COIN"
new_price_column = f"{self.stat_model_price_}_{symbol}"
new_price_column = f"{self.price_column_}_{symbol}"
# Create temporary dataframe with timestamp and price
temp_df = pd.DataFrame(
{
"tstamp": df_symbol["tstamp"],
new_price_column: df_symbol[self.stat_model_price_],
new_price_column: df_symbol[self.price_column_],
}
)
@ -217,14 +201,8 @@ class TradingPair(ABC):
def colnames(self) -> List[str]:
return [
f"{self.stat_model_price_}_{self.symbol_a_}",
f"{self.stat_model_price_}_{self.symbol_b_}",
]
def exec_prices_colnames(self) -> List[str]:
return [
f"exec_price_{self.symbol_a_}",
f"exec_price_{self.symbol_b_}",
f"{self.price_column_}_{self.symbol_a_}",
f"{self.price_column_}_{self.symbol_b_}",
]
def add_trades(self, trades: pd.DataFrame) -> None:
@ -353,7 +331,7 @@ class TradingPair(ABC):
instrument_open_price = instrument_open_trades["price"].iloc[0]
sign = -1 if instrument_open_trades["side"].iloc[0] == "SELL" else 1
instrument_price = predicted_row[f"{self.stat_model_price_}_{symbol}"]
instrument_price = predicted_row[f"{self.price_column_}_{symbol}"]
instrument_return = (
sign
* (instrument_price - instrument_open_price)

View File

@ -7,20 +7,12 @@ from pt_trading.trading_pair import TradingPair
from statsmodels.tsa.vector_ar.vecm import VECM, VECMResults
NanoPerMin = 1e9
class VECMTradingPair(TradingPair):
vecm_fit_: Optional[VECMResults]
pair_predict_result_: Optional[pd.DataFrame]
def __init__(
self,
config: Dict[str, Any],
market_data: pd.DataFrame,
symbol_a: str,
symbol_b: str,
):
super().__init__(config, market_data, symbol_a, symbol_b)
def __init__(self, config: Dict[str, Any], market_data: pd.DataFrame, symbol_a: str, symbol_b: str, price_column: str):
super().__init__(config, market_data, symbol_a, symbol_b, price_column)
self.vecm_fit_ = None
self.pair_predict_result_ = None
@ -87,17 +79,15 @@ class VECMTradingPair(TradingPair):
predicted_df["disequilibrium"] - self.training_mu_
) / self.training_std_
predicted_df["scaled_disequilibrium"] = abs(
predicted_df["signed_scaled_disequilibrium"]
predicted_df["scaled_disequilibrium"] = (
abs(predicted_df["signed_scaled_disequilibrium"])
)
predicted_df = predicted_df.reset_index(drop=True)
if self.pair_predict_result_ is None:
self.pair_predict_result_ = predicted_df
else:
self.pair_predict_result_ = pd.concat(
[self.pair_predict_result_, predicted_df], ignore_index=True
)
self.pair_predict_result_ = pd.concat([self.pair_predict_result_, predicted_df], ignore_index=True)
# Reset index to ensure proper indexing
self.pair_predict_result_ = self.pair_predict_result_.reset_index(drop=True)
return self.pair_predict_result_
@ -108,15 +98,12 @@ class VECMRollingFit(RollingFit):
super().__init__()
def create_trading_pair(
self,
config: Dict,
market_data: pd.DataFrame,
symbol_a: str,
symbol_b: str,
self, config: Dict, market_data: pd.DataFrame, symbol_a: str, symbol_b: str, price_column: str
) -> TradingPair:
return VECMTradingPair(
config=config,
market_data=market_data,
symbol_a=symbol_a,
symbol_b=symbol_b,
price_column=price_column
)

View File

@ -7,21 +7,13 @@ from pt_trading.trading_pair import TradingPair
import statsmodels.api as sm
NanoPerMin = 1e9
class ZScoreTradingPair(TradingPair):
zscore_model_: Optional[sm.regression.linear_model.RegressionResultsWrapper]
pair_predict_result_: Optional[pd.DataFrame]
zscore_df_: Optional[pd.DataFrame]
def __init__(
self,
config: Dict[str, Any],
market_data: pd.DataFrame,
symbol_a: str,
symbol_b: str,
):
super().__init__(config, market_data, symbol_a, symbol_b)
def __init__(self, config: Dict[str, Any], market_data: pd.DataFrame, symbol_a: str, symbol_b: str, price_column: str):
super().__init__(config, market_data, symbol_a, symbol_b, price_column)
self.zscore_model_ = None
self.pair_predict_result_ = None
self.zscore_df_ = None
@ -31,9 +23,7 @@ class ZScoreTradingPair(TradingPair):
symbol_a_px_series = self.training_df_[self.colnames()].iloc[:, 0]
symbol_b_px_series = self.training_df_[self.colnames()].iloc[:, 1]
symbol_a_px_series, symbol_b_px_series = symbol_a_px_series.align(
symbol_b_px_series, axis=0
)
symbol_a_px_series,symbol_b_px_series = symbol_a_px_series.align(symbol_b_px_series, axis=0)
X = sm.add_constant(symbol_b_px_series)
self.zscore_model_ = sm.OLS(symbol_a_px_series, X).fit()
@ -62,9 +52,7 @@ class ZScoreTradingPair(TradingPair):
if self.pair_predict_result_ is None:
self.pair_predict_result_ = predicted_df
else:
self.pair_predict_result_ = pd.concat(
[self.pair_predict_result_, predicted_df], ignore_index=True
)
self.pair_predict_result_ = pd.concat([self.pair_predict_result_, predicted_df], ignore_index=True)
# Reset index to ensure proper indexing
self.pair_predict_result_ = self.pair_predict_result_.reset_index(drop=True)
return self.pair_predict_result_.dropna()
@ -75,11 +63,12 @@ class ZScoreRollingFit(RollingFit):
super().__init__()
def create_trading_pair(
self, config: Dict, market_data: pd.DataFrame, symbol_a: str, symbol_b: str
self, config: Dict, market_data: pd.DataFrame, symbol_a: str, symbol_b: str, price_column: str
) -> TradingPair:
return ZScoreTradingPair(
config=config,
market_data=market_data,
symbol_a=symbol_a,
symbol_b=symbol_b,
price_column=price_column
)

View File

@ -28,14 +28,13 @@ def load_sqlite_to_dataframe(db_path:str, query:str) -> pd.DataFrame:
conn.close()
def convert_time_to_UTC(value: str, timezone: str, extra_minutes: int = 0) -> str:
def convert_time_to_UTC(value: str, timezone: str) -> str:
from zoneinfo import ZoneInfo
from datetime import datetime, timedelta
from datetime import datetime
# Parse it to naive datetime object
local_dt = datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
local_dt = local_dt + timedelta(minutes=extra_minutes)
zinfo = ZoneInfo(timezone)
result: datetime = local_dt.replace(tzinfo=zinfo).astimezone(ZoneInfo("UTC"))
@ -86,7 +85,7 @@ def load_market_data(
f"{date_str} {trading_hours['begin_session']}", trading_hours["timezone"]
)
end_time = convert_time_to_UTC(
f"{date_str} {trading_hours['end_session']}", trading_hours["timezone"], extra_minutes=2 # to get execution price
f"{date_str} {trading_hours['end_session']}", trading_hours["timezone"]
)
# Perform boolean selection

View File

@ -85,7 +85,7 @@ def main() -> None:
# )
# Process each data file
stat_model_price = config["stat_model_price"]
price_column = config["price_column"]
print(f"\n====== Processing {os.path.basename(datafile)} ======")
@ -105,7 +105,7 @@ def main() -> None:
# Process data for this file
try:
cointegration_data: pd.DataFrame = pd.DataFrame()
for pair in create_pairs(datafile, stat_model_price, config, instruments):
for pair in create_pairs(datafile, price_column, config, instruments):
cointegration_data = pd.concat([cointegration_data, pair.cointegration_check()])
pd.set_option('display.width', 400)

File diff suppressed because one or more lines are too long

View File

@ -69,6 +69,7 @@ def get_instruments(args: argparse.Namespace, config: Dict) -> List[Dict[str, st
def run_backtest(
config: Dict,
datafiles: List[str],
price_column: str,
fit_method: PairsTradingFitMethod,
instruments: List[Dict[str, str]],
) -> BacktestResult:
@ -89,6 +90,7 @@ def run_backtest(
pairs = create_pairs(
datafiles=datafiles,
fit_method=fit_method,
price_column=price_column,
config=config,
instruments=instruments,
)
@ -154,6 +156,7 @@ def main() -> None:
all_results: Dict[str, Dict[str, Any]] = {}
is_config_stored = False
# Process each data file
price_column = config["price_column"]
for day in sorted(days):
md_datafiles = [datafile for md_day, datafile in datafiles if md_day == day]
@ -180,6 +183,7 @@ def main() -> None:
bt_results = run_backtest(
config=config,
datafiles=md_datafiles,
price_column=price_column,
fit_method=fit_method,
instruments=instruments,
)

View File

@ -48,6 +48,7 @@ def resolve_datafiles(config: Dict, cli_datafiles: Optional[str] = None) -> List
def create_pairs(
datafiles: List[str],
fit_method: PairsTradingFitMethod,
price_column: str,
config: Dict,
instruments: List[Dict[str, str]],
) -> List:
@ -84,6 +85,7 @@ def create_pairs(
market_data=market_data_df,
symbol_a=symbol_a,
symbol_b=symbol_b,
price_column=price_column,
)
pairs.append(pair)
return pairs

View File

@ -23,6 +23,7 @@ from pt_trading.trading_pair import TradingPair
def run_strategy(
config: Dict,
datafile: str,
price_column: str,
fit_method: PairsTradingFitMethod,
instruments: List[str],
) -> BacktestResult:
@ -55,6 +56,7 @@ def run_strategy(
market_data=market_data_df,
symbol_a=instruments[a_index],
symbol_b=instruments[b_index],
price_column=price_column,
)
pairs.append(pair)
return pairs
@ -159,6 +161,7 @@ def main() -> None:
)
# Process each data file
price_column = config["price_column"]
for datafile in datafiles:
print(f"\n====== Processing {os.path.basename(datafile)} ======")
@ -184,6 +187,7 @@ def main() -> None:
bt_results = run_strategy(
config=config,
datafile=datafile,
price_column=price_column,
fit_method=fit_method,
instruments=instruments,
)