Compare commits
3 Commits
3f4d174eb3
...
91623db4b7
| Author | SHA1 | Date | |
|---|---|---|---|
| 91623db4b7 | |||
| 06884d72b7 | |||
| 0ceb2f2eba |
95
src/backtest_configs.py
Normal file
95
src/backtest_configs.py
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------ Configuration ------------------------
|
||||||
|
# Default configuration
|
||||||
|
CRYPTO_CONFIG: Dict = {
|
||||||
|
"security_type": "CRYPTO",
|
||||||
|
# --- Data retrieval
|
||||||
|
"data_directory": "./data/crypto",
|
||||||
|
"datafiles": [
|
||||||
|
# "20250519.mktdata.ohlcv.db",
|
||||||
|
# "20250520.mktdata.ohlcv.db",
|
||||||
|
# "20250521.mktdata.ohlcv.db",
|
||||||
|
# "20250522.mktdata.ohlcv.db",
|
||||||
|
# "20250523.mktdata.ohlcv.db",
|
||||||
|
# "20250524.mktdata.ohlcv.db",
|
||||||
|
"20250525.mktdata.ohlcv.db",
|
||||||
|
],
|
||||||
|
"db_table_name": "bnbspot_ohlcv_1min",
|
||||||
|
# ----- Instruments
|
||||||
|
"exchange_id": "BNBSPOT",
|
||||||
|
"instrument_id_pfx": "PAIR-",
|
||||||
|
"instruments": [
|
||||||
|
"BTC-USDT",
|
||||||
|
"BCH-USDT",
|
||||||
|
"ETH-USDT",
|
||||||
|
"LTC-USDT",
|
||||||
|
"XRP-USDT",
|
||||||
|
"ADA-USDT",
|
||||||
|
"SOL-USDT",
|
||||||
|
"DOT-USDT",
|
||||||
|
],
|
||||||
|
"trading_hours": {
|
||||||
|
"begin_session": "00:00:00",
|
||||||
|
"end_session": "23:59:00",
|
||||||
|
"timezone": "UTC",
|
||||||
|
},
|
||||||
|
# ----- Model Settings
|
||||||
|
"price_column": "close",
|
||||||
|
"min_required_points": 30,
|
||||||
|
"zero_threshold": 1e-10,
|
||||||
|
|
||||||
|
"dis-equilibrium_open_trshld": 2.0,
|
||||||
|
"dis-equilibrium_close_trshld": 0.5,
|
||||||
|
|
||||||
|
# "training_minutes": 120,
|
||||||
|
"training_minutes": 60,
|
||||||
|
# ----- Validation
|
||||||
|
"funding_per_pair": 2000.0, # USD
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========================== EQUITIES
|
||||||
|
EQT_CONFIG: Dict = {
|
||||||
|
# --- Data retrieval
|
||||||
|
"security_type": "EQUITY",
|
||||||
|
"data_directory": "./data/equity",
|
||||||
|
"datafiles": [
|
||||||
|
# "20250508.alpaca_sim_md.db",
|
||||||
|
# "20250509.alpaca_sim_md.db",
|
||||||
|
"20250512.alpaca_sim_md.db",
|
||||||
|
# "20250513.alpaca_sim_md.db",
|
||||||
|
# "20250514.alpaca_sim_md.db",
|
||||||
|
# "20250515.alpaca_sim_md.db",
|
||||||
|
# "20250516.alpaca_sim_md.db",
|
||||||
|
# "20250519.alpaca_sim_md.db",
|
||||||
|
# "20250520.alpaca_sim_md.db"
|
||||||
|
],
|
||||||
|
"db_table_name": "md_1min_bars",
|
||||||
|
# ----- Instruments
|
||||||
|
"exchange_id": "ALPACA",
|
||||||
|
"instrument_id_pfx": "STOCK-",
|
||||||
|
"instruments": [
|
||||||
|
"COIN",
|
||||||
|
"GBTC",
|
||||||
|
"HOOD",
|
||||||
|
"MSTR",
|
||||||
|
"PYPL",
|
||||||
|
],
|
||||||
|
"trading_hours": {
|
||||||
|
"begin_session": "9:30:00",
|
||||||
|
"end_session": "16:00:00",
|
||||||
|
"timezone": "America/New_York",
|
||||||
|
},
|
||||||
|
# ----- Model Settings
|
||||||
|
"price_column": "close",
|
||||||
|
"min_required_points": 30,
|
||||||
|
"zero_threshold": 1e-10,
|
||||||
|
"dis-equilibrium_open_trshld": 2.0,
|
||||||
|
"dis-equilibrium_close_trshld": 0.5,
|
||||||
|
"training_minutes": 120,
|
||||||
|
# ----- Validation
|
||||||
|
"funding_per_pair": 2000.0,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1,8 +1,6 @@
|
|||||||
import datetime
|
|
||||||
import sys
|
import sys
|
||||||
import json
|
|
||||||
|
|
||||||
from typing import Any, Dict, List, Tuple, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -10,260 +8,58 @@ import numpy as np
|
|||||||
# ============= statsmodels ===================
|
# ============= statsmodels ===================
|
||||||
from statsmodels.tsa.vector_ar.vecm import VECM
|
from statsmodels.tsa.vector_ar.vecm import VECM
|
||||||
|
|
||||||
|
from backtest_configs import CRYPTO_CONFIG
|
||||||
|
from tools.data_loader import load_market_data, transform_dataframe
|
||||||
|
from tools.trading_pair import TradingPair
|
||||||
|
from results import BacktestResult
|
||||||
|
|
||||||
NanoPerMin = 1e9
|
NanoPerMin = 1e9
|
||||||
UNSET_FLOAT: float = sys.float_info.max
|
UNSET_FLOAT: float = sys.float_info.max
|
||||||
UNSET_INT: int = sys.maxsize
|
UNSET_INT: int = sys.maxsize
|
||||||
|
|
||||||
# ------------------------ Configuration ------------------------
|
|
||||||
# Default configuration
|
|
||||||
CRYPTO_CONFIG: Dict = {
|
|
||||||
# --- Data retrieval
|
|
||||||
"data_directory": "./data/crypto",
|
|
||||||
"datafiles": [
|
|
||||||
"20250519.mktdata.ohlcv.db",
|
|
||||||
],
|
|
||||||
"db_table_name": "bnbspot_ohlcv_1min",
|
|
||||||
|
|
||||||
# ----- Instruments
|
|
||||||
"exchange_id": "BNBSPOT",
|
|
||||||
"instrument_id_pfx": "PAIR-",
|
|
||||||
|
|
||||||
"instruments": [
|
# # ==========================================================================
|
||||||
"BTC-USDT",
|
|
||||||
"ETH-USDT",
|
|
||||||
"LTC-USDT",
|
|
||||||
],
|
|
||||||
|
|
||||||
"trading_hours": {
|
|
||||||
"begin_session": "00:00:00",
|
|
||||||
"end_session": "23:59:00",
|
|
||||||
"timezone": "UTC"
|
|
||||||
},
|
|
||||||
|
|
||||||
# ----- Model Settings
|
CONFIG = CRYPTO_CONFIG
|
||||||
"price_column": "close",
|
# CONFIG = EQT_CONFIG
|
||||||
"min_required_points": 30,
|
|
||||||
"zero_threshold": 1e-10,
|
|
||||||
"equilibrium_threshold_open": 5.0,
|
|
||||||
"equilibrium_threshold_close": 1.0,
|
|
||||||
"training_minutes": 120,
|
|
||||||
|
|
||||||
# ----- Validation
|
BacktestResults = BacktestResult(config=CONFIG)
|
||||||
"funding_per_pair": 2000.0, # USD
|
|
||||||
}
|
|
||||||
# ========================== EQUITIES
|
|
||||||
EQT_CONFIG: Dict = {
|
|
||||||
# --- Data retrieval
|
|
||||||
"data_directory": "./data/equity",
|
|
||||||
"datafiles": [
|
|
||||||
"20250508.alpaca_sim_md.db",
|
|
||||||
# "20250509.alpaca_sim_md.db",
|
|
||||||
# "20250512.alpaca_sim_md.db",
|
|
||||||
# "20250513.alpaca_sim_md.db",
|
|
||||||
# "20250514.alpaca_sim_md.db",
|
|
||||||
# "20250515.alpaca_sim_md.db",
|
|
||||||
# "20250516.alpaca_sim_md.db",
|
|
||||||
# "20250519.alpaca_sim_md.db",
|
|
||||||
# "20250520.alpaca_sim_md.db"
|
|
||||||
],
|
|
||||||
"db_table_name": "md_1min_bars",
|
|
||||||
|
|
||||||
# ----- Instruments
|
|
||||||
"exchange_id": "ALPACA",
|
|
||||||
"instrument_id_pfx": "STOCK-",
|
|
||||||
|
|
||||||
"instruments": [
|
|
||||||
"COIN",
|
|
||||||
"GBTC",
|
|
||||||
"HOOD",
|
|
||||||
"MSTR",
|
|
||||||
"PYPL",
|
|
||||||
],
|
|
||||||
|
|
||||||
"trading_hours": {
|
|
||||||
"begin_session": "9:30:00",
|
|
||||||
"end_session": "16:00:00",
|
|
||||||
"timezone": "America/New_York"
|
|
||||||
},
|
|
||||||
|
|
||||||
# ----- Model Settings
|
|
||||||
"price_column": "close",
|
|
||||||
"min_required_points": 30,
|
|
||||||
"zero_threshold": 1e-10,
|
|
||||||
"equilibrium_threshold_open": 5.0,
|
|
||||||
"equilibrium_threshold_close": 1.0,
|
|
||||||
"training_minutes": 120,
|
|
||||||
|
|
||||||
# ----- Validation
|
|
||||||
"funding_per_pair": 2000.0,
|
|
||||||
}
|
|
||||||
|
|
||||||
# ==========================================================================
|
|
||||||
CONFIG = EQT_CONFIG
|
|
||||||
TRADES = {}
|
|
||||||
TOTAL_UNREALIZED_PNL = 0.0 # Global variable to track total unrealized PnL
|
|
||||||
TOTAL_REALIZED_PNL = 0.0 # Global variable to track total realized PnL
|
|
||||||
OUTSTANDING_POSITIONS = [] # Global list to track outstanding positions with share quantities
|
|
||||||
|
|
||||||
class TradingPair:
|
|
||||||
symbol_a_: str
|
|
||||||
symbol_b_: str
|
|
||||||
price_column_: str
|
|
||||||
|
|
||||||
def __init__(self, symbol_a: str, symbol_b: str, price_column: str):
|
|
||||||
self.symbol_a_ = symbol_a
|
|
||||||
self.symbol_b_ = symbol_b
|
|
||||||
self.price_column_ = price_column
|
|
||||||
|
|
||||||
def colnames(self) -> List[str]:
|
|
||||||
return [f"{self.price_column_}_{self.symbol_a_}", f"{self.price_column_}_{self.symbol_b_}"]
|
|
||||||
|
|
||||||
def __repr__(self) ->str:
|
|
||||||
return f"{self.symbol_a_} & {self.symbol_b_}"
|
|
||||||
|
|
||||||
def convert_time_to_UTC(value: str, timezone: str):
|
|
||||||
|
|
||||||
from zoneinfo import ZoneInfo
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
# Parse it to naive datetime object
|
|
||||||
local_dt = datetime.strptime(value, '%Y-%m-%d %H:%M:%S')
|
|
||||||
|
|
||||||
zinfo = ZoneInfo(timezone)
|
|
||||||
result = local_dt.replace(tzinfo=zinfo)
|
|
||||||
|
|
||||||
result = result.astimezone(ZoneInfo('UTC'))
|
|
||||||
result = result.strftime('%Y-%m-%d %H:%M:%S')
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
pass
|
def create_trading_signals(pair: TradingPair) -> pd.DataFrame:
|
||||||
|
|
||||||
def load_market_data(datafile: str, config: Dict) -> pd.DataFrame:
|
|
||||||
from tools.data_loader import load_sqlite_to_dataframe
|
|
||||||
|
|
||||||
instrument_ids = ["\"" + config["instrument_id_pfx"] + instrument + "\"" for instrument in config["instruments"]]
|
|
||||||
exchange_id = config["exchange_id"]
|
|
||||||
|
|
||||||
query = "select tstamp"
|
|
||||||
query += ", tstamp_ns as time_ns"
|
|
||||||
query += ", substr(instrument_id, 7) as symbol"
|
|
||||||
query += ", open"
|
|
||||||
query += ", high"
|
|
||||||
query += ", low"
|
|
||||||
query += ", close"
|
|
||||||
query += ", volume"
|
|
||||||
query += ", num_trades"
|
|
||||||
query += ", vwap"
|
|
||||||
|
|
||||||
query += f" from {config['db_table_name']}"
|
|
||||||
query += f" where exchange_id ='{exchange_id}'"
|
|
||||||
query += f" and instrument_id in ({','.join(instrument_ids)})"
|
|
||||||
|
|
||||||
df = load_sqlite_to_dataframe(db_path=datafile, query=query)
|
|
||||||
|
|
||||||
# Trading Hours
|
|
||||||
date_str = df["tstamp"][0][0:10]
|
|
||||||
trading_hours = CONFIG['trading_hours']
|
|
||||||
start_time = f"{date_str} {trading_hours['begin_session']}"
|
|
||||||
end_time = f"{date_str} {trading_hours['end_session']}"
|
|
||||||
|
|
||||||
start_time = convert_time_to_UTC(start_time, trading_hours["timezone"])
|
|
||||||
end_time = convert_time_to_UTC(end_time, trading_hours["timezone"])
|
|
||||||
|
|
||||||
# Perform boolean selection
|
|
||||||
df = df[(df["tstamp"] >= start_time) & (df["tstamp"] <= end_time)]
|
|
||||||
df["tstamp"] = pd.to_datetime(df["tstamp"])
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
def transform_dataframe(df: pd.DataFrame, price_column: str):
|
|
||||||
# Select only the columns we need
|
|
||||||
df_selected = df[["tstamp", "symbol", price_column]]
|
|
||||||
|
|
||||||
# Start with unique timestamps
|
|
||||||
result_df: pd.DataFrame = pd.DataFrame(df_selected["tstamp"]).drop_duplicates().reset_index(drop=True)
|
|
||||||
|
|
||||||
# For each unique symbol, add a corresponding close price column
|
|
||||||
for symbol in df_selected["symbol"].unique():
|
|
||||||
# Filter rows for this symbol
|
|
||||||
df_symbol = df_selected[df_selected["symbol"] == symbol].reset_index(drop=True)
|
|
||||||
|
|
||||||
# Create column name like "close-COIN"
|
|
||||||
new_price_column = f"{price_column}_{symbol}"
|
|
||||||
|
|
||||||
# Create temporary dataframe with timestamp and price
|
|
||||||
temp_df = pd.DataFrame({
|
|
||||||
"tstamp": df_symbol["tstamp"],
|
|
||||||
new_price_column: df_symbol[price_column]
|
|
||||||
})
|
|
||||||
|
|
||||||
# Join with our result dataframe
|
|
||||||
result_df = pd.merge(result_df, temp_df, on="tstamp", how="left")
|
|
||||||
result_df = result_df.reset_index(drop=True) # do not dropna() since irrelevant symbol would affect dataset
|
|
||||||
|
|
||||||
return result_df
|
|
||||||
|
|
||||||
def get_datasets(df: pd.DataFrame, training_minutes: int, pair: TradingPair) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
||||||
# Training dataset
|
|
||||||
colname_a, colname_b = pair.colnames()
|
|
||||||
df = df[["tstamp", colname_a, colname_b]]
|
|
||||||
df = df.dropna()
|
|
||||||
|
|
||||||
training_df = df.iloc[:training_minutes - 1, :].copy()
|
|
||||||
training_df.reset_index(drop=True).dropna().reset_index(drop=True)
|
|
||||||
|
|
||||||
# Testing dataset
|
|
||||||
testing_df = df.iloc[training_minutes:, :].copy()
|
|
||||||
testing_df.reset_index(drop=True).dropna().reset_index(drop=True)
|
|
||||||
|
|
||||||
return (training_df, testing_df)
|
|
||||||
|
|
||||||
def fit_VECM(training_pair_df, pair: TradingPair):
|
|
||||||
vecm_model = VECM(training_pair_df[pair.colnames()].reset_index(drop=True), coint_rank=1)
|
|
||||||
vecm_fit = vecm_model.fit()
|
|
||||||
|
|
||||||
# Check if the model converged properly
|
|
||||||
if not hasattr(vecm_fit, "beta") or vecm_fit.beta is None:
|
|
||||||
print(f"{pair}: VECM model failed to converge properly")
|
|
||||||
|
|
||||||
return vecm_fit
|
|
||||||
|
|
||||||
def create_trading_signals(vecm_fit, testing_pair_df, pair: TradingPair) -> pd.DataFrame:
|
|
||||||
result_columns = [
|
result_columns = [
|
||||||
"time",
|
"time",
|
||||||
"action",
|
"action",
|
||||||
"symbol",
|
"symbol",
|
||||||
"price",
|
"price",
|
||||||
"equilibrium",
|
"disequilibrium",
|
||||||
|
"scaled_disequilibrium",
|
||||||
"pair",
|
"pair",
|
||||||
]
|
]
|
||||||
|
|
||||||
next_values = vecm_fit.predict(steps=len(testing_pair_df))
|
testing_pair_df = pair.testing_df_
|
||||||
|
next_values = pair.vecm_fit_.predict(steps=len(testing_pair_df))
|
||||||
colname_a, colname_b = pair.colnames()
|
colname_a, colname_b = pair.colnames()
|
||||||
|
|
||||||
# Convert prediction to a DataFrame for readability
|
# Convert prediction to a DataFrame for readability
|
||||||
predicted_df = pd.DataFrame(next_values, columns=[colname_a, colname_b])
|
predicted_df = pd.DataFrame(next_values, columns=[colname_a, colname_b])
|
||||||
|
|
||||||
beta = vecm_fit.beta
|
beta = pair.vecm_fit_.beta
|
||||||
|
|
||||||
predicted_df["equilibrium_term"] = (
|
|
||||||
beta[0] * predicted_df[colname_a]
|
|
||||||
+ beta[1] * predicted_df[colname_b]
|
|
||||||
)
|
|
||||||
|
|
||||||
pair_result_df = pd.merge(
|
pair_result_df = pd.merge(
|
||||||
testing_pair_df.reset_index(drop=True), predicted_df, left_index=True, right_index=True, suffixes=('', '_pred')
|
testing_pair_df.reset_index(drop=True),
|
||||||
|
predicted_df,
|
||||||
|
left_index=True,
|
||||||
|
right_index=True,
|
||||||
|
suffixes=("", "_pred"),
|
||||||
).dropna()
|
).dropna()
|
||||||
|
|
||||||
pair_result_df["equilibrium"] = (
|
pair_result_df["disequilibrium"] = pair_result_df[pair.colnames()] @ beta
|
||||||
beta[0] * pair_result_df[colname_a]
|
|
||||||
+ beta[1] * pair_result_df[colname_b]
|
pair_result_df["scaled_disequilibrium"] = abs(
|
||||||
)
|
pair_result_df["disequilibrium"] - pair.training_mu_
|
||||||
|
) / pair.training_std_
|
||||||
|
|
||||||
pair_result_df["abs_equilibrium"] = np.abs(pair_result_df["equilibrium"])
|
|
||||||
pair_result_df["scaled_equilibrium"] = pair_result_df["abs_equilibrium"] / (abs(beta[0]) * pair_result_df[colname_a] + abs(beta[1]) * pair_result_df[colname_b])
|
|
||||||
|
|
||||||
# Reset index to ensure proper indexing
|
# Reset index to ensure proper indexing
|
||||||
pair_result_df = pair_result_df.reset_index()
|
pair_result_df = pair_result_df.reset_index()
|
||||||
@ -272,26 +68,24 @@ def create_trading_signals(vecm_fit, testing_pair_df, pair: TradingPair) -> pd.D
|
|||||||
open_row_index = None
|
open_row_index = None
|
||||||
initial_abs_term = None
|
initial_abs_term = None
|
||||||
|
|
||||||
|
open_threshold = CONFIG["dis-equilibrium_open_trshld"]
|
||||||
|
close_threshold = CONFIG["dis-equilibrium_close_trshld"]
|
||||||
for row_idx in range(len(pair_result_df)):
|
for row_idx in range(len(pair_result_df)):
|
||||||
current_abs_term = pair_result_df["abs_equilibrium"][row_idx]
|
curr_disequilibrium = pair_result_df["scaled_disequilibrium"][row_idx]
|
||||||
|
|
||||||
# Check if current row has sufficient equilibrium (not near-zero)
|
# Check if current row has sufficient disequilibrium (not near-zero)
|
||||||
if current_abs_term >= CONFIG["equilibrium_threshold_open"]:
|
if curr_disequilibrium >= open_threshold:
|
||||||
open_row_index = row_idx
|
open_row_index = row_idx
|
||||||
initial_abs_term = current_abs_term
|
initial_abs_term = curr_disequilibrium
|
||||||
break
|
break
|
||||||
|
|
||||||
# If no row with sufficient equilibrium found, skip this pair
|
# If no row with sufficient disequilibrium found, skip this pair
|
||||||
if open_row_index is None:
|
if open_row_index is None:
|
||||||
print(f"{pair}: Insufficient divergence in testing dataset. Skipping.")
|
print(f"{pair}: Insufficient disequilibrium in testing dataset. Skipping.")
|
||||||
return pd.DataFrame()
|
return pd.DataFrame()
|
||||||
|
|
||||||
# Look for close signal starting from the open position
|
# Look for close signal starting from the open position
|
||||||
trading_signals_df = (
|
trading_signals_df = (pair_result_df["scaled_disequilibrium"][open_row_index:] < close_threshold)
|
||||||
pair_result_df["abs_equilibrium"][open_row_index:]
|
|
||||||
# < initial_abs_term / CONFIG["equilibrium_threshold_close"]
|
|
||||||
< CONFIG["equilibrium_threshold_close"]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Adjust indices to account for the offset from open_row_index
|
# Adjust indices to account for the offset from open_row_index
|
||||||
close_row_index = None
|
close_row_index = None
|
||||||
@ -302,7 +96,8 @@ def create_trading_signals(vecm_fit, testing_pair_df, pair: TradingPair) -> pd.D
|
|||||||
|
|
||||||
open_row = pair_result_df.loc[open_row_index]
|
open_row = pair_result_df.loc[open_row_index]
|
||||||
open_tstamp = open_row["tstamp"]
|
open_tstamp = open_row["tstamp"]
|
||||||
open_eqlbrm = open_row["equilibrium"]
|
open_disequilibrium = open_row["disequilibrium"]
|
||||||
|
open_scaled_disequilibrium = open_row["scaled_disequilibrium"]
|
||||||
open_px_a = open_row[f"{colname_a}"]
|
open_px_a = open_row[f"{colname_a}"]
|
||||||
open_px_b = open_row[f"{colname_b}"]
|
open_px_b = open_row[f"{colname_b}"]
|
||||||
|
|
||||||
@ -323,70 +118,23 @@ def create_trading_signals(vecm_fit, testing_pair_df, pair: TradingPair) -> pd.D
|
|||||||
|
|
||||||
# If no close signal found, print position and unrealized PnL
|
# If no close signal found, print position and unrealized PnL
|
||||||
if close_row_index is None:
|
if close_row_index is None:
|
||||||
global TOTAL_UNREALIZED_PNL, OUTSTANDING_POSITIONS
|
|
||||||
|
|
||||||
last_row_index = len(pair_result_df) - 1
|
last_row_index = len(pair_result_df) - 1
|
||||||
last_row = pair_result_df.loc[last_row_index]
|
|
||||||
last_tstamp = last_row["tstamp"]
|
|
||||||
last_px_a = last_row[f"{colname_a}"]
|
|
||||||
last_px_b = last_row[f"{colname_b}"]
|
|
||||||
|
|
||||||
# Calculate share quantities based on $1000 funding per pair
|
# Use the new method from BacktestResult to handle outstanding positions
|
||||||
# Split $1000 equally between the two positions ($500 each)
|
BacktestResults.handle_outstanding_position(
|
||||||
funding_per_position = CONFIG["funding_per_pair"] / 2
|
pair=pair,
|
||||||
shares_a = funding_per_position / open_px_a
|
pair_result_df=pair_result_df,
|
||||||
shares_b = funding_per_position / open_px_b
|
last_row_index=last_row_index,
|
||||||
|
open_side_a=open_side_a,
|
||||||
# Calculate unrealized PnL for each position
|
open_side_b=open_side_b,
|
||||||
if open_side_a == "BUY":
|
open_px_a=open_px_a,
|
||||||
unrealized_pnl_a = (last_px_a - open_px_a) / open_px_a * 100
|
open_px_b=open_px_b,
|
||||||
unrealized_dollar_a = shares_a * (last_px_a - open_px_a)
|
open_tstamp=open_tstamp,
|
||||||
else: # SELL
|
initial_abs_term=initial_abs_term,
|
||||||
unrealized_pnl_a = (open_px_a - last_px_a) / open_px_a * 100
|
colname_a=colname_a,
|
||||||
unrealized_dollar_a = shares_a * (open_px_a - last_px_a)
|
colname_b=colname_b
|
||||||
|
)
|
||||||
if open_side_b == "BUY":
|
|
||||||
unrealized_pnl_b = (last_px_b - open_px_b) / open_px_b * 100
|
|
||||||
unrealized_dollar_b = shares_b * (last_px_b - open_px_b)
|
|
||||||
else: # SELL
|
|
||||||
unrealized_pnl_b = (open_px_b - last_px_b) / open_px_b * 100
|
|
||||||
unrealized_dollar_b = shares_b * (open_px_b - last_px_b)
|
|
||||||
|
|
||||||
total_unrealized_pnl = unrealized_pnl_a + unrealized_pnl_b
|
|
||||||
total_unrealized_dollar = unrealized_dollar_a + unrealized_dollar_b
|
|
||||||
|
|
||||||
# Add to global total
|
|
||||||
TOTAL_UNREALIZED_PNL += total_unrealized_pnl
|
|
||||||
|
|
||||||
# Store outstanding positions
|
|
||||||
OUTSTANDING_POSITIONS.append({
|
|
||||||
'pair': str(pair),
|
|
||||||
'symbol_a': pair.symbol_a_,
|
|
||||||
'symbol_b': pair.symbol_b_,
|
|
||||||
'side_a': open_side_a,
|
|
||||||
'side_b': open_side_b,
|
|
||||||
'shares_a': shares_a,
|
|
||||||
'shares_b': shares_b,
|
|
||||||
'open_px_a': open_px_a,
|
|
||||||
'open_px_b': open_px_b,
|
|
||||||
'current_px_a': last_px_a,
|
|
||||||
'current_px_b': last_px_b,
|
|
||||||
'unrealized_dollar_a': unrealized_dollar_a,
|
|
||||||
'unrealized_dollar_b': unrealized_dollar_b,
|
|
||||||
'total_unrealized_dollar': total_unrealized_dollar,
|
|
||||||
'open_time': open_tstamp,
|
|
||||||
'last_time': last_tstamp,
|
|
||||||
'initial_abs_term': initial_abs_term,
|
|
||||||
'current_abs_term': pair_result_df.loc[last_row_index, "abs_equilibrium"],
|
|
||||||
'closing_threshold': initial_abs_term / CONFIG["equilibrium_threshold_close"],
|
|
||||||
'equilibrium_ratio': pair_result_df.loc[last_row_index, "abs_equilibrium"] / (initial_abs_term / CONFIG["equilibrium_threshold_close"])
|
|
||||||
})
|
|
||||||
|
|
||||||
print(f"{pair}: NO CLOSE SIGNAL FOUND - Position held until end of session")
|
|
||||||
print(f" Open: {open_tstamp} | Last: {last_tstamp}")
|
|
||||||
print(f" {pair.symbol_a_}: {open_side_a} {shares_a:.2f} shares @ ${open_px_a:.2f} -> ${last_px_a:.2f} | Unrealized: ${unrealized_dollar_a:.2f} ({unrealized_pnl_a:.2f}%)")
|
|
||||||
print(f" {pair.symbol_b_}: {open_side_b} {shares_b:.2f} shares @ ${open_px_b:.2f} -> ${last_px_b:.2f} | Unrealized: ${unrealized_dollar_b:.2f} ({unrealized_pnl_b:.2f}%)")
|
|
||||||
print(f" Total Unrealized: ${total_unrealized_dollar:.2f} ({total_unrealized_pnl:.2f}%)")
|
|
||||||
|
|
||||||
# Return only open trades (no close trades)
|
# Return only open trades (no close trades)
|
||||||
trd_signal_tuples = [
|
trd_signal_tuples = [
|
||||||
@ -395,7 +143,8 @@ def create_trading_signals(vecm_fit, testing_pair_df, pair: TradingPair) -> pd.D
|
|||||||
open_side_a,
|
open_side_a,
|
||||||
pair.symbol_a_,
|
pair.symbol_a_,
|
||||||
open_px_a,
|
open_px_a,
|
||||||
open_eqlbrm,
|
open_disequilibrium,
|
||||||
|
open_scaled_disequilibrium,
|
||||||
pair,
|
pair,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
@ -403,7 +152,8 @@ def create_trading_signals(vecm_fit, testing_pair_df, pair: TradingPair) -> pd.D
|
|||||||
open_side_b,
|
open_side_b,
|
||||||
pair.symbol_b_,
|
pair.symbol_b_,
|
||||||
open_px_b,
|
open_px_b,
|
||||||
open_eqlbrm,
|
open_disequilibrium,
|
||||||
|
open_scaled_disequilibrium,
|
||||||
pair,
|
pair,
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
@ -411,7 +161,8 @@ def create_trading_signals(vecm_fit, testing_pair_df, pair: TradingPair) -> pd.D
|
|||||||
# Close signal found - create complete trade
|
# Close signal found - create complete trade
|
||||||
close_row = pair_result_df.loc[close_row_index]
|
close_row = pair_result_df.loc[close_row_index]
|
||||||
close_tstamp = close_row["tstamp"]
|
close_tstamp = close_row["tstamp"]
|
||||||
close_eqlbrm = close_row["equilibrium"]
|
close_disequilibrium = close_row["disequilibrium"]
|
||||||
|
close_scaled_disequilibrium = close_row["scaled_disequilibrium"]
|
||||||
close_px_a = close_row[f"{colname_a}"]
|
close_px_a = close_row[f"{colname_a}"]
|
||||||
close_px_b = close_row[f"{colname_b}"]
|
close_px_b = close_row[f"{colname_b}"]
|
||||||
|
|
||||||
@ -423,7 +174,8 @@ def create_trading_signals(vecm_fit, testing_pair_df, pair: TradingPair) -> pd.D
|
|||||||
open_side_a,
|
open_side_a,
|
||||||
pair.symbol_a_,
|
pair.symbol_a_,
|
||||||
open_px_a,
|
open_px_a,
|
||||||
open_eqlbrm,
|
open_disequilibrium,
|
||||||
|
open_scaled_disequilibrium,
|
||||||
pair,
|
pair,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
@ -431,7 +183,8 @@ def create_trading_signals(vecm_fit, testing_pair_df, pair: TradingPair) -> pd.D
|
|||||||
open_side_b,
|
open_side_b,
|
||||||
pair.symbol_b_,
|
pair.symbol_b_,
|
||||||
open_px_b,
|
open_px_b,
|
||||||
open_eqlbrm,
|
open_disequilibrium,
|
||||||
|
open_scaled_disequilibrium,
|
||||||
pair,
|
pair,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
@ -439,7 +192,8 @@ def create_trading_signals(vecm_fit, testing_pair_df, pair: TradingPair) -> pd.D
|
|||||||
close_side_a,
|
close_side_a,
|
||||||
pair.symbol_a_,
|
pair.symbol_a_,
|
||||||
close_px_a,
|
close_px_a,
|
||||||
close_eqlbrm,
|
close_disequilibrium,
|
||||||
|
close_scaled_disequilibrium,
|
||||||
pair,
|
pair,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
@ -447,7 +201,8 @@ def create_trading_signals(vecm_fit, testing_pair_df, pair: TradingPair) -> pd.D
|
|||||||
close_side_b,
|
close_side_b,
|
||||||
pair.symbol_b_,
|
pair.symbol_b_,
|
||||||
close_px_b,
|
close_px_b,
|
||||||
close_eqlbrm,
|
close_disequilibrium,
|
||||||
|
close_scaled_disequilibrium,
|
||||||
pair,
|
pair,
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
@ -458,45 +213,25 @@ def create_trading_signals(vecm_fit, testing_pair_df, pair: TradingPair) -> pd.D
|
|||||||
columns=result_columns,
|
columns=result_columns,
|
||||||
)
|
)
|
||||||
|
|
||||||
def run_single_pair(market_data: pd.DataFrame, price_column:str, pair: TradingPair) -> Optional[pd.DataFrame]:
|
|
||||||
colname_a = f"{price_column}_{pair.symbol_a_}"
|
|
||||||
colname_b = f"{price_column}_{pair.symbol_b_}"
|
|
||||||
training_pair_df, testing_pair_df = get_datasets(df=market_data, training_minutes=CONFIG["training_minutes"], pair=pair)
|
|
||||||
|
|
||||||
# Check if we have enough data points for a meaningful analysis
|
def run_single_pair(
|
||||||
min_required_points = CONFIG[
|
pair: TradingPair, market_data: pd.DataFrame, price_column: str
|
||||||
"min_required_points"
|
) -> Optional[pd.DataFrame]:
|
||||||
] # Minimum number of points for a reasonable VECM model
|
pair.get_datasets(
|
||||||
if len(training_pair_df) < min_required_points:
|
market_data=market_data, training_minutes=CONFIG["training_minutes"]
|
||||||
print(
|
)
|
||||||
f"{pair}: Not enough data points for analysis. Found {len(training_pair_df)}, need at least {min_required_points}"
|
|
||||||
)
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Check for non-finite values
|
|
||||||
if not np.isfinite(training_pair_df).all().all():
|
|
||||||
print(f"{pair}: Data contains non-finite values (NaN or inf)")
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Fit the VECM
|
|
||||||
try:
|
try:
|
||||||
vecm_fit = fit_VECM(training_pair_df, pair=pair)
|
is_cointegrated = pair.train_pair()
|
||||||
|
if not is_cointegrated:
|
||||||
|
print(f"{pair} IS NOT COINTEGRATED")
|
||||||
|
return None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"{pair}: VECM fitting failed: {str(e)}")
|
print(f"{pair}: Training failed: {str(e)}")
|
||||||
return None
|
|
||||||
|
|
||||||
# Add safeguard against division by zero
|
|
||||||
if (
|
|
||||||
abs(vecm_fit.beta[1]) < CONFIG["zero_threshold"]
|
|
||||||
): # Small threshold to avoid division by very small numbers
|
|
||||||
print(f"{pair}: Skipping due to near-zero beta[1] value: {vecm_fit.beta[1]}")
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
pair_trades = create_trading_signals(
|
pair_trades = create_trading_signals(
|
||||||
vecm_fit=vecm_fit,
|
pair=pair,
|
||||||
testing_pair_df=testing_pair_df,
|
|
||||||
pair=pair,
|
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"{pair}: Prediction failed: {str(e)}")
|
print(f"{pair}: Prediction failed: {str(e)}")
|
||||||
@ -504,137 +239,28 @@ def run_single_pair(market_data: pd.DataFrame, price_column:str, pair: TradingPa
|
|||||||
|
|
||||||
return pair_trades
|
return pair_trades
|
||||||
|
|
||||||
def add_trade(pair_nm, symbol, action, price):
|
|
||||||
pair_nm = str(pair_nm)
|
|
||||||
|
|
||||||
|
def run_pairs(config: Dict, market_data_df: pd.DataFrame, price_column: str) -> None:
|
||||||
|
|
||||||
if pair_nm not in TRADES:
|
def _create_pairs(config: Dict) -> List[TradingPair]:
|
||||||
TRADES[pair_nm] = {symbol: []}
|
instruments = config["instruments"]
|
||||||
if symbol not in TRADES[pair_nm]:
|
all_indexes = range(len(instruments))
|
||||||
TRADES[pair_nm][symbol] = []
|
unique_index_pairs = [(i, j) for i in all_indexes for j in all_indexes if i < j]
|
||||||
TRADES[pair_nm][symbol].append((action, price))
|
pairs = []
|
||||||
|
for a_index, b_index in unique_index_pairs:
|
||||||
|
symbol_a = instruments[a_index]
|
||||||
|
symbol_b = instruments[b_index]
|
||||||
|
pair = TradingPair(symbol_a, symbol_b, price_column)
|
||||||
|
pairs.append(pair)
|
||||||
|
return pairs
|
||||||
|
|
||||||
def collect_single_day_results(result):
|
|
||||||
if result is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
print("\n -------------- Suggested Trades ")
|
|
||||||
print(result)
|
|
||||||
|
|
||||||
for row in result.itertuples():
|
|
||||||
action = row.action
|
|
||||||
symbol = row.symbol
|
|
||||||
price = row.price
|
|
||||||
add_trade(pair_nm=row.pair, action=action, symbol=symbol, price=price)
|
|
||||||
|
|
||||||
def print_single_day_results(result):
|
|
||||||
for pair, symbols in TRADES.items():
|
|
||||||
print(f"\n--- {pair} ---")
|
|
||||||
for symbol, trades in symbols.items():
|
|
||||||
for side, price in trades:
|
|
||||||
print(f"{symbol} {side} at ${price}")
|
|
||||||
|
|
||||||
def print_results_suummary(all_results):
|
|
||||||
# Summary of all processed files
|
|
||||||
print("\n====== Summary of All Processed Files ======")
|
|
||||||
for filename, data in all_results.items():
|
|
||||||
trade_count = sum(
|
|
||||||
len(trades)
|
|
||||||
for symbol_trades in data["trades"].values()
|
|
||||||
for trades in symbol_trades.values()
|
|
||||||
)
|
|
||||||
print(f"{filename}: {trade_count} trades")
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_returns(all_results: Dict):
|
|
||||||
global TOTAL_REALIZED_PNL
|
|
||||||
print("\n====== Returns By Day and Pair ======")
|
|
||||||
|
|
||||||
for filename, data in all_results.items():
|
|
||||||
day_return = 0
|
|
||||||
print(f"\n--- {filename} ---")
|
|
||||||
|
|
||||||
# Process each pair
|
|
||||||
for pair, symbols in data["trades"].items():
|
|
||||||
pair_return = 0
|
|
||||||
pair_trades = []
|
|
||||||
|
|
||||||
# Calculate individual symbol returns in the pair
|
|
||||||
for symbol, trades in symbols.items():
|
|
||||||
if len(trades) >= 2: # Need at least entry and exit
|
|
||||||
# Get entry and exit trades
|
|
||||||
entry_action, entry_price = trades[0]
|
|
||||||
exit_action, exit_price = trades[1]
|
|
||||||
|
|
||||||
# Calculate return based on action
|
|
||||||
symbol_return = 0
|
|
||||||
if entry_action == "BUY" and exit_action == "SELL":
|
|
||||||
# Long position
|
|
||||||
symbol_return = (exit_price - entry_price) / entry_price * 100
|
|
||||||
elif entry_action == "SELL" and exit_action == "BUY":
|
|
||||||
# Short position
|
|
||||||
symbol_return = (entry_price - exit_price) / entry_price * 100
|
|
||||||
|
|
||||||
pair_trades.append(
|
|
||||||
(
|
|
||||||
symbol,
|
|
||||||
entry_action,
|
|
||||||
entry_price,
|
|
||||||
exit_action,
|
|
||||||
exit_price,
|
|
||||||
symbol_return,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
pair_return += symbol_return
|
|
||||||
|
|
||||||
# Print pair returns
|
|
||||||
if pair_trades:
|
|
||||||
print(f" {pair}:")
|
|
||||||
for (
|
|
||||||
symbol,
|
|
||||||
entry_action,
|
|
||||||
entry_price,
|
|
||||||
exit_action,
|
|
||||||
exit_price,
|
|
||||||
symbol_return,
|
|
||||||
) in pair_trades:
|
|
||||||
print(
|
|
||||||
f" {symbol}: {entry_action} @ ${entry_price:.2f}, {exit_action} @ ${exit_price:.2f}, Return: {symbol_return:.2f}%"
|
|
||||||
)
|
|
||||||
print(f" Pair Total Return: {pair_return:.2f}%")
|
|
||||||
day_return += pair_return
|
|
||||||
|
|
||||||
# Print day total return and add to global realized PnL
|
|
||||||
if day_return != 0:
|
|
||||||
print(f" Day Total Return: {day_return:.2f}%")
|
|
||||||
TOTAL_REALIZED_PNL += day_return
|
|
||||||
|
|
||||||
def run_pairs(summaries_df: pd.DataFrame, price_column: str) -> None:
|
|
||||||
|
|
||||||
result_df = transform_dataframe(df=summaries_df, price_column=price_column)
|
|
||||||
|
|
||||||
stock_price_columns = [
|
|
||||||
column
|
|
||||||
for column in result_df.columns
|
|
||||||
if column.startswith(f"{price_column}_")
|
|
||||||
]
|
|
||||||
|
|
||||||
# Find the starting indices for A and B
|
|
||||||
all_indexes = range(len(stock_price_columns))
|
|
||||||
unique_index_pairs = [(i, j) for i in all_indexes for j in all_indexes if i < j]
|
|
||||||
|
|
||||||
pairs_trades = []
|
pairs_trades = []
|
||||||
for a_index, b_index in unique_index_pairs:
|
for pair in _create_pairs(config):
|
||||||
# Get the actual variable names
|
single_pair_trades = run_single_pair(
|
||||||
colname_a = stock_price_columns[a_index]
|
market_data=market_data_df, price_column=price_column, pair=pair
|
||||||
colname_b = stock_price_columns[b_index]
|
)
|
||||||
|
if single_pair_trades is not None and len(single_pair_trades) > 0:
|
||||||
symbol_a = colname_a[len(f"{price_column}-") :]
|
|
||||||
symbol_b = colname_b[len(f"{price_column}-") :]
|
|
||||||
pair = TradingPair(symbol_a, symbol_b, price_column)
|
|
||||||
|
|
||||||
single_pair_trades = run_single_pair(market_data=result_df, price_column=price_column, pair=pair)
|
|
||||||
if len(single_pair_trades) > 0:
|
|
||||||
pairs_trades.append(single_pair_trades)
|
pairs_trades.append(single_pair_trades)
|
||||||
# Check if result_list has any data before concatenating
|
# Check if result_list has any data before concatenating
|
||||||
if len(pairs_trades) == 0:
|
if len(pairs_trades) == 0:
|
||||||
@ -645,48 +271,15 @@ def run_pairs(summaries_df: pd.DataFrame, price_column: str) -> None:
|
|||||||
result["time"] = pd.to_datetime(result["time"])
|
result["time"] = pd.to_datetime(result["time"])
|
||||||
result = result.set_index("time").sort_index()
|
result = result.set_index("time").sort_index()
|
||||||
|
|
||||||
collect_single_day_results(result)
|
BacktestResults.collect_single_day_results(result)
|
||||||
# print_single_day_results(result)
|
# BacktestResults.print_single_day_results()
|
||||||
|
|
||||||
def print_outstanding_positions():
|
|
||||||
"""Print all outstanding positions with share quantities and unrealized PnL"""
|
|
||||||
if not OUTSTANDING_POSITIONS:
|
|
||||||
print("\n====== NO OUTSTANDING POSITIONS ======")
|
|
||||||
return
|
|
||||||
|
|
||||||
print(f"\n====== OUTSTANDING POSITIONS ======")
|
|
||||||
print(f"{'Pair':<15} {'Symbol':<6} {'Side':<4} {'Shares':<10} {'Open $':<8} {'Current $':<10} {'Unrealized $':<12} {'%':<8} {'Close Eq':<10}")
|
|
||||||
print("-" * 105)
|
|
||||||
|
|
||||||
total_unrealized_dollar = 0.0
|
|
||||||
|
|
||||||
for pos in OUTSTANDING_POSITIONS:
|
|
||||||
# Print position A
|
|
||||||
print(f"{pos['pair']:<15} {pos['symbol_a']:<6} {pos['side_a']:<4} {pos['shares_a']:<10.2f} {pos['open_px_a']:<8.2f} {pos['current_px_a']:<10.2f} {pos['unrealized_dollar_a']:<12.2f} {pos['unrealized_dollar_a']/500*100:<8.2f} {'':<10}")
|
|
||||||
|
|
||||||
# Print position B
|
|
||||||
print(f"{'':<15} {pos['symbol_b']:<6} {pos['side_b']:<4} {pos['shares_b']:<10.2f} {pos['open_px_b']:<8.2f} {pos['current_px_b']:<10.2f} {pos['unrealized_dollar_b']:<12.2f} {pos['unrealized_dollar_b']/500*100:<8.2f} {'':<10}")
|
|
||||||
|
|
||||||
# Print pair totals with equilibrium info
|
|
||||||
equilibrium_status = "CLOSE" if pos['current_abs_term'] < pos['closing_threshold'] else f"{pos['equilibrium_ratio']:.2f}x"
|
|
||||||
print(f"{'':<15} {'PAIR':<6} {'TOT':<4} {'':<10} {'':<8} {'':<10} {pos['total_unrealized_dollar']:<12.2f} {pos['total_unrealized_dollar']/1000*100:<8.2f} {equilibrium_status:<10}")
|
|
||||||
|
|
||||||
# Print equilibrium details
|
|
||||||
print(f"{'':<15} {'EQ':<6} {'INFO':<4} {'':<10} {'':<8} {'':<10} {'Curr:':<6}{pos['current_abs_term']:<6.4f} {'Thresh:':<7}{pos['closing_threshold']:<6.4f} {'':<10}")
|
|
||||||
print("-" * 105)
|
|
||||||
|
|
||||||
total_unrealized_dollar += pos['total_unrealized_dollar']
|
|
||||||
|
|
||||||
print(f"{'TOTAL OUTSTANDING':<80} ${total_unrealized_dollar:<12.2f}")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Initialize a dictionary to store all trade results
|
# Initialize a dictionary to store all trade results
|
||||||
all_results = {}
|
all_results: Dict[str, Dict[str, Any]] = {}
|
||||||
|
|
||||||
# Initialize global PnL tracking variables
|
# Initialize global PnL tracking variables
|
||||||
TOTAL_REALIZED_PNL = 0.0
|
|
||||||
TOTAL_UNREALIZED_PNL = 0.0
|
|
||||||
OUTSTANDING_POSITIONS = []
|
|
||||||
|
|
||||||
# Process each data file
|
# Process each data file
|
||||||
price_column = CONFIG["price_column"]
|
price_column = CONFIG["price_column"]
|
||||||
@ -694,39 +287,31 @@ if __name__ == "__main__":
|
|||||||
print(f"\n====== Processing {datafile} ======")
|
print(f"\n====== Processing {datafile} ======")
|
||||||
|
|
||||||
# Clear the TRADES global dictionary and reset unrealized PnL for the new file
|
# Clear the TRADES global dictionary and reset unrealized PnL for the new file
|
||||||
TRADES.clear()
|
BacktestResults.clear_trades()
|
||||||
TOTAL_UNREALIZED_PNL = 0.0
|
|
||||||
TOTAL_REALIZED_PNL = 0.0
|
|
||||||
|
|
||||||
# Process data for this file
|
# Process data for this file
|
||||||
try:
|
try:
|
||||||
run_pairs(
|
market_data_df = load_market_data(
|
||||||
summaries_df=load_market_data(f'{CONFIG["data_directory"]}/{datafile}', config=CONFIG),
|
f'{CONFIG["data_directory"]}/{datafile}', config=CONFIG
|
||||||
price_column=price_column
|
|
||||||
)
|
)
|
||||||
|
market_data_df = transform_dataframe(
|
||||||
|
df=market_data_df, price_column=price_column
|
||||||
|
)
|
||||||
|
run_pairs(config=CONFIG, market_data_df=market_data_df, price_column=price_column)
|
||||||
|
|
||||||
# Store results with file name as key
|
# Store results with file name as key
|
||||||
filename = datafile.split("/")[-1]
|
filename = datafile.split("/")[-1]
|
||||||
all_results[filename] = {"trades": TRADES.copy()}
|
all_results[filename] = {"trades": BacktestResults.trades.copy()}
|
||||||
|
|
||||||
print(f"Successfully processed {filename}")
|
print(f"Successfully processed {filename}")
|
||||||
|
|
||||||
# Print total unrealized PnL for this file
|
# No longer printing unrealized PnL since we removed that functionality
|
||||||
if TOTAL_UNREALIZED_PNL != 0:
|
|
||||||
print(f"\n====== TOTAL UNREALIZED PnL for {filename}: {TOTAL_UNREALIZED_PNL:.2f}% ======")
|
|
||||||
else:
|
|
||||||
print(f"\n====== No unrealized positions for {filename} ======")
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error processing {datafile}: {str(e)}")
|
print(f"Error processing {datafile}: {str(e)}")
|
||||||
|
|
||||||
# print_results_suummary(all_results)
|
# BacktestResults.print_results_summary(all_results)
|
||||||
calculate_returns(all_results)
|
BacktestResults.calculate_returns(all_results)
|
||||||
|
|
||||||
# Print grand totals
|
# Print grand totals
|
||||||
print(f"\n====== GRAND TOTALS ACROSS ALL PAIRS ======")
|
BacktestResults.print_grand_totals()
|
||||||
print(f"Total Realized PnL: {TOTAL_REALIZED_PNL:.2f}%")
|
BacktestResults.print_outstanding_positions()
|
||||||
print(f"Total Unrealized PnL: {TOTAL_UNREALIZED_PNL:.2f}%")
|
|
||||||
print(f"Combined Total PnL: {TOTAL_REALIZED_PNL + TOTAL_UNREALIZED_PNL:.2f}%")
|
|
||||||
|
|
||||||
print_outstanding_positions()
|
|
||||||
|
|||||||
316
src/results.py
Normal file
316
src/results.py
Normal file
@ -0,0 +1,316 @@
|
|||||||
|
from typing import Any, Dict, List
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
class BacktestResult:
|
||||||
|
"""
|
||||||
|
Class to handle backtest results, trades tracking, PnL calculations, and reporting.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: Dict[str, Any]):
|
||||||
|
self.config = config
|
||||||
|
self.trades: Dict[str, Dict[str, Any]] = {}
|
||||||
|
self.total_realized_pnl = 0.0
|
||||||
|
self.outstanding_positions: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
def add_trade(self, pair_nm, symbol, action, price):
|
||||||
|
"""Add a trade to the results tracking."""
|
||||||
|
pair_nm = str(pair_nm)
|
||||||
|
|
||||||
|
if pair_nm not in self.trades:
|
||||||
|
self.trades[pair_nm] = {symbol: []}
|
||||||
|
if symbol not in self.trades[pair_nm]:
|
||||||
|
self.trades[pair_nm][symbol] = []
|
||||||
|
self.trades[pair_nm][symbol].append((action, price))
|
||||||
|
|
||||||
|
def add_outstanding_position(self, position: Dict[str, Any]):
|
||||||
|
"""Add an outstanding position to tracking."""
|
||||||
|
self.outstanding_positions.append(position)
|
||||||
|
|
||||||
|
def add_realized_pnl(self, realized_pnl: float):
|
||||||
|
"""Add realized PnL to the total."""
|
||||||
|
self.total_realized_pnl += realized_pnl
|
||||||
|
|
||||||
|
def get_total_realized_pnl(self) -> float:
|
||||||
|
"""Get total realized PnL."""
|
||||||
|
return self.total_realized_pnl
|
||||||
|
|
||||||
|
def get_outstanding_positions(self) -> List[Dict[str, Any]]:
|
||||||
|
"""Get all outstanding positions."""
|
||||||
|
return self.outstanding_positions
|
||||||
|
|
||||||
|
def get_trades(self) -> Dict[str, Dict[str, Any]]:
|
||||||
|
"""Get all trades."""
|
||||||
|
return self.trades
|
||||||
|
|
||||||
|
def clear_trades(self):
|
||||||
|
"""Clear all trades (used when processing new files)."""
|
||||||
|
self.trades.clear()
|
||||||
|
|
||||||
|
def collect_single_day_results(self, result):
|
||||||
|
"""Collect and process single day trading results."""
|
||||||
|
if result is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
print("\n -------------- Suggested Trades ")
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
for row in result.itertuples():
|
||||||
|
action = row.action
|
||||||
|
symbol = row.symbol
|
||||||
|
price = row.price
|
||||||
|
self.add_trade(
|
||||||
|
pair_nm=row.pair, action=action, symbol=symbol, price=price
|
||||||
|
)
|
||||||
|
|
||||||
|
def print_single_day_results(self):
|
||||||
|
"""Print single day results summary."""
|
||||||
|
for pair, symbols in self.trades.items():
|
||||||
|
print(f"\n--- {pair} ---")
|
||||||
|
for symbol, trades in symbols.items():
|
||||||
|
for side, price in trades:
|
||||||
|
print(f"{symbol} {side} at ${price}")
|
||||||
|
|
||||||
|
def print_results_summary(self, all_results):
|
||||||
|
"""Print summary of all processed files."""
|
||||||
|
print("\n====== Summary of All Processed Files ======")
|
||||||
|
for filename, data in all_results.items():
|
||||||
|
trade_count = sum(
|
||||||
|
len(trades)
|
||||||
|
for symbol_trades in data["trades"].values()
|
||||||
|
for trades in symbol_trades.values()
|
||||||
|
)
|
||||||
|
print(f"{filename}: {trade_count} trades")
|
||||||
|
|
||||||
|
def calculate_returns(self, all_results: Dict):
|
||||||
|
"""Calculate and print returns by day and pair."""
|
||||||
|
print("\n====== Returns By Day and Pair ======")
|
||||||
|
|
||||||
|
for filename, data in all_results.items():
|
||||||
|
day_return = 0
|
||||||
|
print(f"\n--- {filename} ---")
|
||||||
|
|
||||||
|
# Process each pair
|
||||||
|
for pair, symbols in data["trades"].items():
|
||||||
|
pair_return = 0
|
||||||
|
pair_trades = []
|
||||||
|
|
||||||
|
# Calculate individual symbol returns in the pair
|
||||||
|
for symbol, trades in symbols.items():
|
||||||
|
if len(trades) >= 2: # Need at least entry and exit
|
||||||
|
# Get entry and exit trades
|
||||||
|
entry_action, entry_price = trades[0]
|
||||||
|
exit_action, exit_price = trades[1]
|
||||||
|
|
||||||
|
# Calculate return based on action
|
||||||
|
symbol_return = 0
|
||||||
|
if entry_action == "BUY" and exit_action == "SELL":
|
||||||
|
# Long position
|
||||||
|
symbol_return = (exit_price - entry_price) / entry_price * 100
|
||||||
|
elif entry_action == "SELL" and exit_action == "BUY":
|
||||||
|
# Short position
|
||||||
|
symbol_return = (entry_price - exit_price) / entry_price * 100
|
||||||
|
|
||||||
|
pair_trades.append(
|
||||||
|
(
|
||||||
|
symbol,
|
||||||
|
entry_action,
|
||||||
|
entry_price,
|
||||||
|
exit_action,
|
||||||
|
exit_price,
|
||||||
|
symbol_return,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
pair_return += symbol_return
|
||||||
|
|
||||||
|
# Print pair returns
|
||||||
|
if pair_trades:
|
||||||
|
print(f" {pair}:")
|
||||||
|
for (
|
||||||
|
symbol,
|
||||||
|
entry_action,
|
||||||
|
entry_price,
|
||||||
|
exit_action,
|
||||||
|
exit_price,
|
||||||
|
symbol_return,
|
||||||
|
) in pair_trades:
|
||||||
|
print(
|
||||||
|
f" {symbol}: {entry_action} @ ${entry_price:.2f}, {exit_action} @ ${exit_price:.2f}, Return: {symbol_return:.2f}%"
|
||||||
|
)
|
||||||
|
print(f" Pair Total Return: {pair_return:.2f}%")
|
||||||
|
day_return += pair_return
|
||||||
|
|
||||||
|
# Print day total return and add to global realized PnL
|
||||||
|
if day_return != 0:
|
||||||
|
print(f" Day Total Return: {day_return:.2f}%")
|
||||||
|
self.add_realized_pnl(day_return)
|
||||||
|
|
||||||
|
def print_outstanding_positions(self):
|
||||||
|
"""Print all outstanding positions with share quantities and current values."""
|
||||||
|
if not self.get_outstanding_positions():
|
||||||
|
print("\n====== NO OUTSTANDING POSITIONS ======")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"\n====== OUTSTANDING POSITIONS ======")
|
||||||
|
print(
|
||||||
|
f"{'Pair':<15}"
|
||||||
|
f" {'Symbol':<10}"
|
||||||
|
f" {'Side':<4}"
|
||||||
|
f" {'Shares':<10}"
|
||||||
|
f" {'Open $':<8}"
|
||||||
|
f" {'Current $':<10}"
|
||||||
|
f" {'Value $':<12}"
|
||||||
|
f" {'Disequilibrium':<15}"
|
||||||
|
)
|
||||||
|
print("-" * 100)
|
||||||
|
|
||||||
|
total_value = 0.0
|
||||||
|
|
||||||
|
for pos in self.get_outstanding_positions():
|
||||||
|
# Print position A
|
||||||
|
print(
|
||||||
|
f"{pos['pair']:<15}"
|
||||||
|
f" {pos['symbol_a']:<10}"
|
||||||
|
f" {pos['side_a']:<4}"
|
||||||
|
f" {pos['shares_a']:<10.2f}"
|
||||||
|
f" {pos['open_px_a']:<8.2f}"
|
||||||
|
f" {pos['current_px_a']:<10.2f}"
|
||||||
|
f" {pos['current_value_a']:<12.2f}"
|
||||||
|
f" {'':<15}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Print position B
|
||||||
|
print(
|
||||||
|
f"{'':<15}"
|
||||||
|
f" {pos['symbol_b']:<10}"
|
||||||
|
f" {pos['side_b']:<4}"
|
||||||
|
f" {pos['shares_b']:<10.2f}"
|
||||||
|
f" {pos['open_px_b']:<8.2f}"
|
||||||
|
f" {pos['current_px_b']:<10.2f}"
|
||||||
|
f" {pos['current_value_b']:<12.2f}"
|
||||||
|
f" {'':<15}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Print pair totals with disequilibrium info
|
||||||
|
disequilibrium_status = (
|
||||||
|
"CLOSE"
|
||||||
|
if pos["current_abs_term"] < pos["closing_threshold"]
|
||||||
|
else f"{pos['disequilibrium_ratio']:.2f}x"
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
f"{'':<15}"
|
||||||
|
f" {'PAIR TOTAL':<10}"
|
||||||
|
f" {'':<4}"
|
||||||
|
f" {'':<10}"
|
||||||
|
f" {'':<8}"
|
||||||
|
f" {'':<10}"
|
||||||
|
f" {pos['total_current_value']:<12.2f}"
|
||||||
|
f" {disequilibrium_status:<15}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Print disequilibrium details
|
||||||
|
print(
|
||||||
|
f"{'':<15}"
|
||||||
|
f" {'DISEQUIL':<10}"
|
||||||
|
f" {'':<4}"
|
||||||
|
f" {'':<10}"
|
||||||
|
f" {'':<8}"
|
||||||
|
f" {'':<10}"
|
||||||
|
f" Raw: {pos['current_disequilibrium']:<6.4f}"
|
||||||
|
f" Scaled: {pos['current_scaled_disequilibrium']:<6.4f}"
|
||||||
|
)
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"{'':<15}"
|
||||||
|
f" {'THRESHOLD':<10}"
|
||||||
|
f" {'':<4}"
|
||||||
|
f" {'':<10}"
|
||||||
|
f" {'':<8}"
|
||||||
|
f" {'':<10}"
|
||||||
|
f" Close: {pos['closing_threshold']:<6.4f}"
|
||||||
|
f" Ratio: {pos['disequilibrium_ratio']:<6.2f}"
|
||||||
|
)
|
||||||
|
print("-" * 100)
|
||||||
|
|
||||||
|
total_value += pos["total_current_value"]
|
||||||
|
|
||||||
|
print(f"{'TOTAL OUTSTANDING VALUE':<80} ${total_value:<12.2f}")
|
||||||
|
|
||||||
|
def print_grand_totals(self):
|
||||||
|
"""Print grand totals across all pairs."""
|
||||||
|
print(f"\n====== GRAND TOTALS ACROSS ALL PAIRS ======")
|
||||||
|
print(f"Total Realized PnL: {self.get_total_realized_pnl():.2f}%")
|
||||||
|
|
||||||
|
def handle_outstanding_position(self, pair, pair_result_df, last_row_index,
|
||||||
|
open_side_a, open_side_b, open_px_a, open_px_b,
|
||||||
|
open_tstamp, initial_abs_term, colname_a, colname_b):
|
||||||
|
"""
|
||||||
|
Handle calculation and tracking of outstanding positions when no close signal is found.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pair: TradingPair object
|
||||||
|
pair_result_df: DataFrame with pair results
|
||||||
|
last_row_index: Index of the last row in the data
|
||||||
|
open_side_a, open_side_b: Trading sides for symbols A and B
|
||||||
|
open_px_a, open_px_b: Opening prices for symbols A and B
|
||||||
|
open_tstamp: Opening timestamp
|
||||||
|
initial_abs_term: Initial absolute disequilibrium term
|
||||||
|
colname_a, colname_b: Column names for the price data
|
||||||
|
"""
|
||||||
|
last_row = pair_result_df.loc[last_row_index]
|
||||||
|
last_tstamp = last_row["tstamp"]
|
||||||
|
last_px_a = last_row[colname_a]
|
||||||
|
last_px_b = last_row[colname_b]
|
||||||
|
|
||||||
|
# Calculate share quantities based on funding per pair
|
||||||
|
# Split funding equally between the two positions
|
||||||
|
funding_per_position = self.config["funding_per_pair"] / 2
|
||||||
|
shares_a = funding_per_position / open_px_a
|
||||||
|
shares_b = funding_per_position / open_px_b
|
||||||
|
|
||||||
|
# Calculate current position values (shares * current price)
|
||||||
|
current_value_a = shares_a * last_px_a
|
||||||
|
current_value_b = shares_b * last_px_b
|
||||||
|
total_current_value = current_value_a + current_value_b
|
||||||
|
|
||||||
|
# Get disequilibrium information
|
||||||
|
current_disequilibrium = last_row["disequilibrium"]
|
||||||
|
current_scaled_disequilibrium = last_row["scaled_disequilibrium"]
|
||||||
|
|
||||||
|
# Store outstanding positions
|
||||||
|
self.add_outstanding_position(
|
||||||
|
{
|
||||||
|
"pair": str(pair),
|
||||||
|
"symbol_a": pair.symbol_a_,
|
||||||
|
"symbol_b": pair.symbol_b_,
|
||||||
|
"side_a": open_side_a,
|
||||||
|
"side_b": open_side_b,
|
||||||
|
"shares_a": shares_a,
|
||||||
|
"shares_b": shares_b,
|
||||||
|
"open_px_a": open_px_a,
|
||||||
|
"open_px_b": open_px_b,
|
||||||
|
"current_px_a": last_px_a,
|
||||||
|
"current_px_b": last_px_b,
|
||||||
|
"current_value_a": current_value_a,
|
||||||
|
"current_value_b": current_value_b,
|
||||||
|
"total_current_value": total_current_value,
|
||||||
|
"open_time": open_tstamp,
|
||||||
|
"last_time": last_tstamp,
|
||||||
|
"initial_abs_term": initial_abs_term,
|
||||||
|
"current_abs_term": current_scaled_disequilibrium,
|
||||||
|
"current_disequilibrium": current_disequilibrium,
|
||||||
|
"current_scaled_disequilibrium": current_scaled_disequilibrium,
|
||||||
|
"closing_threshold": initial_abs_term / self.config["dis-equilibrium_close_trshld"],
|
||||||
|
"disequilibrium_ratio": current_scaled_disequilibrium / (initial_abs_term / self.config["dis-equilibrium_close_trshld"]),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Print position details
|
||||||
|
print(f"{pair}: NO CLOSE SIGNAL FOUND - Position held until end of session")
|
||||||
|
print(f" Open: {open_tstamp} | Last: {last_tstamp}")
|
||||||
|
print(f" {pair.symbol_a_}: {open_side_a} {shares_a:.2f} shares @ ${open_px_a:.2f} -> ${last_px_a:.2f} | Value: ${current_value_a:.2f}")
|
||||||
|
print(f" {pair.symbol_b_}: {open_side_b} {shares_b:.2f} shares @ ${open_px_b:.2f} -> ${last_px_b:.2f} | Value: ${current_value_b:.2f}")
|
||||||
|
print(f" Total Value: ${total_current_value:.2f}")
|
||||||
|
print(f" Disequilibrium: {current_disequilibrium:.4f} | Scaled: {current_scaled_disequilibrium:.4f}")
|
||||||
|
|
||||||
|
return current_value_a, current_value_b, total_current_value
|
||||||
@ -1,8 +1,11 @@
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
from typing import Dict, Tuple
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
from tools.trading_pair import TradingPair
|
||||||
|
|
||||||
|
|
||||||
def load_sqlite_to_dataframe(db_path, query):
|
def load_sqlite_to_dataframe(db_path, query):
|
||||||
try:
|
try:
|
||||||
conn = sqlite3.connect(db_path)
|
conn = sqlite3.connect(db_path)
|
||||||
@ -16,10 +19,107 @@ def load_sqlite_to_dataframe(db_path, query):
|
|||||||
print(f"Error: {excpt}")
|
print(f"Error: {excpt}")
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
if 'conn' in locals():
|
if "conn" in locals():
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
df1 = load_sqlite_to_dataframe(sys.argv[1], table_name='md_1min_bars')
|
|
||||||
|
|
||||||
print(df1)
|
def convert_time_to_UTC(value: str, timezone: str):
|
||||||
|
|
||||||
|
from zoneinfo import ZoneInfo
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Parse it to naive datetime object
|
||||||
|
local_dt = datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
|
zinfo = ZoneInfo(timezone)
|
||||||
|
result = local_dt.replace(tzinfo=zinfo)
|
||||||
|
|
||||||
|
result = result.astimezone(ZoneInfo("UTC"))
|
||||||
|
result = result.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def load_market_data(datafile: str, config: Dict) -> pd.DataFrame:
|
||||||
|
from tools.data_loader import load_sqlite_to_dataframe
|
||||||
|
|
||||||
|
instrument_ids = [
|
||||||
|
'"' + config["instrument_id_pfx"] + instrument + '"'
|
||||||
|
for instrument in config["instruments"]
|
||||||
|
]
|
||||||
|
security_type = config["security_type"]
|
||||||
|
exchange_id = config["exchange_id"]
|
||||||
|
|
||||||
|
query = "select"
|
||||||
|
if security_type == "CRYPTO":
|
||||||
|
query += " strftime('%Y-%m-%d %H:%M:%S', tstamp/1000000000, 'unixepoch') as tstamp"
|
||||||
|
query += ", tstamp as time_ns"
|
||||||
|
else:
|
||||||
|
query += " tstamp"
|
||||||
|
query += ", tstamp_ns as time_ns"
|
||||||
|
|
||||||
|
query += f", substr(instrument_id, {len(config['instrument_id_pfx']) + 1}) as symbol"
|
||||||
|
query += ", open"
|
||||||
|
query += ", high"
|
||||||
|
query += ", low"
|
||||||
|
query += ", close"
|
||||||
|
query += ", volume"
|
||||||
|
query += ", num_trades"
|
||||||
|
query += ", vwap"
|
||||||
|
|
||||||
|
query += f" from {config['db_table_name']}"
|
||||||
|
query += f" where exchange_id ='{exchange_id}'"
|
||||||
|
query += f" and instrument_id in ({','.join(instrument_ids)})"
|
||||||
|
|
||||||
|
df = load_sqlite_to_dataframe(db_path=datafile, query=query)
|
||||||
|
|
||||||
|
# Trading Hours
|
||||||
|
date_str = df["tstamp"][0][0:10]
|
||||||
|
trading_hours = config["trading_hours"]
|
||||||
|
|
||||||
|
start_time = convert_time_to_UTC(
|
||||||
|
f"{date_str} {trading_hours['begin_session']}", trading_hours["timezone"]
|
||||||
|
)
|
||||||
|
end_time = convert_time_to_UTC(
|
||||||
|
f"{date_str} {trading_hours['end_session']}", trading_hours["timezone"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Perform boolean selection
|
||||||
|
df = df[(df["tstamp"] >= start_time) & (df["tstamp"] <= end_time)]
|
||||||
|
df["tstamp"] = pd.to_datetime(df["tstamp"])
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def transform_dataframe(df: pd.DataFrame, price_column: str):
|
||||||
|
# Select only the columns we need
|
||||||
|
df_selected = df[["tstamp", "symbol", price_column]]
|
||||||
|
|
||||||
|
# Start with unique timestamps
|
||||||
|
result_df: pd.DataFrame = pd.DataFrame(df_selected["tstamp"]).drop_duplicates().reset_index(drop=True)
|
||||||
|
|
||||||
|
# For each unique symbol, add a corresponding close price column
|
||||||
|
for symbol in df_selected["symbol"].unique():
|
||||||
|
# Filter rows for this symbol
|
||||||
|
df_symbol = df_selected[df_selected["symbol"] == symbol].reset_index(drop=True)
|
||||||
|
|
||||||
|
# Create column name like "close-COIN"
|
||||||
|
new_price_column = f"{price_column}_{symbol}"
|
||||||
|
|
||||||
|
# Create temporary dataframe with timestamp and price
|
||||||
|
temp_df = pd.DataFrame({
|
||||||
|
"tstamp": df_symbol["tstamp"],
|
||||||
|
new_price_column: df_symbol[price_column]
|
||||||
|
})
|
||||||
|
|
||||||
|
# Join with our result dataframe
|
||||||
|
result_df = pd.merge(result_df, temp_df, on="tstamp", how="left")
|
||||||
|
result_df = result_df.reset_index(drop=True) # do not dropna() since irrelevant symbol would affect dataset
|
||||||
|
|
||||||
|
return result_df
|
||||||
|
|
||||||
|
|
||||||
|
# if __name__ == "__main__":
|
||||||
|
# df1 = load_sqlite_to_dataframe(sys.argv[1], table_name="md_1min_bars")
|
||||||
|
|
||||||
|
# print(df1)
|
||||||
|
|||||||
89
src/tools/trading_pair.py
Normal file
89
src/tools/trading_pair.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
|
||||||
|
from typing import List, Optional
|
||||||
|
import pandas as pd
|
||||||
|
from statsmodels.tsa.vector_ar.vecm import VECM
|
||||||
|
|
||||||
|
class TradingPair:
|
||||||
|
symbol_a_: str
|
||||||
|
symbol_b_: str
|
||||||
|
price_column_: str
|
||||||
|
|
||||||
|
training_mu_: Optional[float]
|
||||||
|
training_std_: Optional[float]
|
||||||
|
|
||||||
|
original_df_: Optional[pd.DataFrame]
|
||||||
|
training_df_: Optional[pd.DataFrame]
|
||||||
|
testing_df_: Optional[pd.DataFrame]
|
||||||
|
|
||||||
|
vecm_fit_: Optional[VECM]
|
||||||
|
|
||||||
|
def __init__(self, symbol_a: str, symbol_b: str, price_column: str):
|
||||||
|
self.symbol_a_ = symbol_a
|
||||||
|
self.symbol_b_ = symbol_b
|
||||||
|
self.price_column_ = price_column
|
||||||
|
self.training_mu_ = None
|
||||||
|
self.training_std_ = None
|
||||||
|
self.original_df_ = None
|
||||||
|
self.training_df_ = None
|
||||||
|
self.testing_df_ = None
|
||||||
|
self.vecm_fit_ = None
|
||||||
|
|
||||||
|
def get_datasets(self, market_data: pd.DataFrame, training_minutes: int) -> None:
|
||||||
|
self.original_df_ = market_data[["tstamp"] + self.colnames()]
|
||||||
|
self.training_df_ = market_data.iloc[:training_minutes - 1, :].copy()
|
||||||
|
self.training_df_ = self.training_df_.dropna().reset_index(drop=True)
|
||||||
|
|
||||||
|
self.testing_df_ = market_data.iloc[training_minutes:, :].copy()
|
||||||
|
self.testing_df_ = self.testing_df_.dropna().reset_index(drop=True)
|
||||||
|
|
||||||
|
def colnames(self) -> List[str]:
|
||||||
|
return [f"{self.price_column_}_{self.symbol_a_}", f"{self.price_column_}_{self.symbol_b_}"]
|
||||||
|
|
||||||
|
def fit_VECM(self):
|
||||||
|
vecm_df = self.training_df_[self.colnames()].reset_index(drop=True)
|
||||||
|
vecm_model = VECM(vecm_df, coint_rank=1)
|
||||||
|
vecm_fit = vecm_model.fit()
|
||||||
|
|
||||||
|
# URGENT check beta and alpha
|
||||||
|
|
||||||
|
# Check if the model converged properly
|
||||||
|
if not hasattr(vecm_fit, "beta") or vecm_fit.beta is None:
|
||||||
|
print(f"{self}: VECM model failed to converge properly")
|
||||||
|
|
||||||
|
self.vecm_fit_ = vecm_fit
|
||||||
|
# print(f"{self}: beta={self.vecm_fit_.beta} alpha={self.vecm_fit_.alpha}" )
|
||||||
|
# print(f"{self}: {self.vecm_fit_.summary()}")
|
||||||
|
pass
|
||||||
|
|
||||||
|
def check_cointegration(self):
|
||||||
|
from statsmodels.tsa.vector_ar.vecm import coint_johansen
|
||||||
|
df = self.training_df_[self.colnames()].reset_index(drop=True)
|
||||||
|
result = coint_johansen(df, det_order=0, k_ar_diff=1)
|
||||||
|
# print(f"{self}: lr1={result.lr1[0]} cvt={result.cvt[0, 1]}.")
|
||||||
|
is_cointegrated = result.lr1[0] > result.cvt[0, 1]
|
||||||
|
|
||||||
|
return is_cointegrated
|
||||||
|
|
||||||
|
def train_pair(self) -> bool:
|
||||||
|
is_cointegrated = self.check_cointegration()
|
||||||
|
if not is_cointegrated:
|
||||||
|
return False
|
||||||
|
pass
|
||||||
|
|
||||||
|
print(f"*****\n**************** {self} IS COINTEGRATED ****************\n*****")
|
||||||
|
self.fit_VECM()
|
||||||
|
diseq_series = self.training_df_[self.colnames()] @ self.vecm_fit_.beta
|
||||||
|
self.training_mu_ = diseq_series.mean().iloc[0]
|
||||||
|
self.training_std_ = diseq_series.std().iloc[0]
|
||||||
|
|
||||||
|
self.training_df_["dis-equilibrium"] = self.training_df_[self.colnames()] @ self.vecm_fit_.beta
|
||||||
|
# Normalize the dis-equilibrium
|
||||||
|
self.training_df_["scaled_dis-equilibrium"] = (
|
||||||
|
diseq_series - self.training_mu_
|
||||||
|
) / self.training_std_
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def __repr__(self) ->str:
|
||||||
|
return f"{self.symbol_a_} & {self.symbol_b_}"
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user