cointegration test initial
This commit is contained in:
parent
facf7fb0c6
commit
2272a31765
@ -5,14 +5,10 @@ from typing import Dict, Optional, cast
|
|||||||
import pandas as pd # type: ignore[import]
|
import pandas as pd # type: ignore[import]
|
||||||
from pt_trading.fit_method import PairState, PairsTradingFitMethod
|
from pt_trading.fit_method import PairState, PairsTradingFitMethod
|
||||||
from pt_trading.results import BacktestResult
|
from pt_trading.results import BacktestResult
|
||||||
from pt_trading.trading_pair import TradingPair
|
from pt_trading.trading_pair import CointegrationData, TradingPair
|
||||||
|
|
||||||
NanoPerMin = 1e9
|
NanoPerMin = 1e9
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class SlidingFit(PairsTradingFitMethod):
|
class SlidingFit(PairsTradingFitMethod):
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
super().__init__()
|
super().__init__()
|
||||||
@ -37,7 +33,6 @@ class SlidingFit(PairsTradingFitMethod):
|
|||||||
"scaled_disequilibrium": "float64",
|
"scaled_disequilibrium": "float64",
|
||||||
"pair": "object"
|
"pair": "object"
|
||||||
})
|
})
|
||||||
pair.user_data_["is_cointegrated"] = False
|
|
||||||
|
|
||||||
training_minutes = config["training_minutes"]
|
training_minutes = config["training_minutes"]
|
||||||
curr_predicted_row_idx = 0
|
curr_predicted_row_idx = 0
|
||||||
@ -59,31 +54,10 @@ class SlidingFit(PairsTradingFitMethod):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# ================================ TRAINING ================================
|
# ================================ TRAINING ================================
|
||||||
is_cointegrated = pair.train_pair()
|
pair.train_pair()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise RuntimeError(f"{pair}: Training failed: {str(e)}") from e
|
raise RuntimeError(f"{pair}: Training failed: {str(e)}") from e
|
||||||
|
|
||||||
if pair.user_data_["is_cointegrated"] != is_cointegrated:
|
|
||||||
pair.user_data_["is_cointegrated"] = is_cointegrated
|
|
||||||
if not is_cointegrated:
|
|
||||||
if pair.user_data_["state"] == PairState.OPEN:
|
|
||||||
print(
|
|
||||||
f"{pair} {curr_training_start_idx} LOST COINTEGRATION. Consider closing positions..."
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
print(
|
|
||||||
f"{pair} {curr_training_start_idx} IS NOT COINTEGRATED. Moving on"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
print("*" * 80)
|
|
||||||
print(
|
|
||||||
f"Pair {pair} ({curr_training_start_idx}) IS COINTEGRATED"
|
|
||||||
)
|
|
||||||
print("*" * 80)
|
|
||||||
if not is_cointegrated:
|
|
||||||
curr_training_start_idx += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# ================================ PREDICTION ================================
|
# ================================ PREDICTION ================================
|
||||||
pair.predict()
|
pair.predict()
|
||||||
|
|||||||
@ -18,14 +18,6 @@ class StaticFit(PairsTradingFitMethod):
|
|||||||
) -> Optional[pd.DataFrame]: # abstractmethod
|
) -> Optional[pd.DataFrame]: # abstractmethod
|
||||||
config = pair.config_
|
config = pair.config_
|
||||||
pair.get_datasets(training_minutes=config["training_minutes"])
|
pair.get_datasets(training_minutes=config["training_minutes"])
|
||||||
try:
|
|
||||||
is_cointegrated = pair.train_pair()
|
|
||||||
if not is_cointegrated:
|
|
||||||
print(f"{pair} IS NOT COINTEGRATED")
|
|
||||||
return None
|
|
||||||
except Exception as e:
|
|
||||||
print(f"{pair}: Training failed: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
pair.predict()
|
pair.predict()
|
||||||
|
|||||||
@ -1,8 +1,63 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
import pandas as pd # type:ignore
|
import pandas as pd # type:ignore
|
||||||
from statsmodels.tsa.vector_ar.vecm import VECM, VECMResults # type:ignore
|
from statsmodels.tsa.vector_ar.vecm import VECM, VECMResults # type:ignore
|
||||||
|
|
||||||
|
class CointegrationData:
|
||||||
|
EG_PVALUE_THRESHOLD = 0.05
|
||||||
|
|
||||||
|
tstamp_: pd.Timestamp
|
||||||
|
pair_: str
|
||||||
|
eg_pvalue_: float
|
||||||
|
johansen_lr1_: float
|
||||||
|
johansen_cvt_: float
|
||||||
|
eg_is_cointegrated_: bool
|
||||||
|
johansen_is_cointegrated_: bool
|
||||||
|
|
||||||
|
def __init__(self, pair: TradingPair):
|
||||||
|
training_df = pair.training_df_
|
||||||
|
|
||||||
|
assert training_df is not None
|
||||||
|
from statsmodels.tsa.vector_ar.vecm import coint_johansen
|
||||||
|
|
||||||
|
df = training_df[pair.colnames()].reset_index(drop=True)
|
||||||
|
|
||||||
|
# Run Johansen cointegration test
|
||||||
|
result = coint_johansen(df, det_order=0, k_ar_diff=1)
|
||||||
|
self.johansen_lr1_ = result.lr1[0]
|
||||||
|
self.johansen_cvt_ = result.cvt[0, 1]
|
||||||
|
self.johansen_is_cointegrated_ = self.johansen_lr1_ > self.johansen_cvt_
|
||||||
|
|
||||||
|
# Run Engle-Granger cointegration test
|
||||||
|
from statsmodels.tsa.stattools import coint #type: ignore
|
||||||
|
|
||||||
|
col1, col2 = pair.colnames()
|
||||||
|
assert training_df is not None
|
||||||
|
series1 = training_df[col1].reset_index(drop=True)
|
||||||
|
series2 = training_df[col2].reset_index(drop=True)
|
||||||
|
|
||||||
|
self.eg_pvalue_ = float(coint(series1, series2)[1])
|
||||||
|
self.eg_is_cointegrated_ = bool(self.eg_pvalue_ < self.EG_PVALUE_THRESHOLD)
|
||||||
|
|
||||||
|
self.tstamp_ = training_df.index[-1]
|
||||||
|
self.pair_ = pair.name()
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"tstamp": self.tstamp_,
|
||||||
|
"pair": self.pair_,
|
||||||
|
"eg_pvalue": self.eg_pvalue_,
|
||||||
|
"johansen_lr1": self.johansen_lr1_,
|
||||||
|
"johansen_cvt": self.johansen_cvt_,
|
||||||
|
"eg_is_cointegrated": self.eg_is_cointegrated_,
|
||||||
|
"johansen_is_cointegrated": self.johansen_is_cointegrated_,
|
||||||
|
}
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f"CointegrationData(tstamp={self.tstamp_}, pair={self.pair_}, eg_pvalue={self.eg_pvalue_}, johansen_lr1={self.johansen_lr1_}, johansen_cvt={self.johansen_cvt_}, eg_is_cointegrated={self.eg_is_cointegrated_}, johansen_is_cointegrated={self.johansen_is_cointegrated_})"
|
||||||
|
|
||||||
|
|
||||||
class TradingPair:
|
class TradingPair:
|
||||||
market_data_: pd.DataFrame
|
market_data_: pd.DataFrame
|
||||||
@ -148,42 +203,7 @@ class TradingPair:
|
|||||||
# print(f"{self}: {self.vecm_fit_.summary()}")
|
# print(f"{self}: {self.vecm_fit_.summary()}")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def check_cointegration_johansen(self) -> bool:
|
def train_pair(self) -> None:
|
||||||
assert self.training_df_ is not None
|
|
||||||
from statsmodels.tsa.vector_ar.vecm import coint_johansen
|
|
||||||
|
|
||||||
df = self.training_df_[self.colnames()].reset_index(drop=True)
|
|
||||||
result = coint_johansen(df, det_order=0, k_ar_diff=1)
|
|
||||||
# print(
|
|
||||||
# f"{self}: lr1={result.lr1[0]} > cvt={result.cvt[0, 1]}? {result.lr1[0] > result.cvt[0, 1]}"
|
|
||||||
# )
|
|
||||||
is_cointegrated: bool = bool(result.lr1[0] > result.cvt[0, 1])
|
|
||||||
|
|
||||||
return is_cointegrated
|
|
||||||
|
|
||||||
def check_cointegration_engle_granger(self) -> bool:
|
|
||||||
from statsmodels.tsa.stattools import coint
|
|
||||||
|
|
||||||
col1, col2 = self.colnames()
|
|
||||||
assert self.training_df_ is not None
|
|
||||||
series1 = self.training_df_[col1].reset_index(drop=True)
|
|
||||||
series2 = self.training_df_[col2].reset_index(drop=True)
|
|
||||||
|
|
||||||
# Run Engle-Granger cointegration test
|
|
||||||
pvalue = coint(series1, series2)[1]
|
|
||||||
# Define cointegration if p-value < 0.05 (i.e., reject null of no cointegration)
|
|
||||||
is_cointegrated: bool = bool(pvalue < 0.05)
|
|
||||||
# print(f"{self}: is_cointegrated={is_cointegrated} pvalue={pvalue}")
|
|
||||||
return is_cointegrated
|
|
||||||
|
|
||||||
def check_cointegration(self) -> bool:
|
|
||||||
is_cointegrated_johansen = self.check_cointegration_johansen()
|
|
||||||
is_cointegrated_engle_granger = self.check_cointegration_engle_granger()
|
|
||||||
result = is_cointegrated_johansen or is_cointegrated_engle_granger
|
|
||||||
return result or True # TODO: remove this
|
|
||||||
|
|
||||||
def train_pair(self) -> bool:
|
|
||||||
result = self.check_cointegration()
|
|
||||||
# print('*' * 80 + '\n' + f"**************** {self} IS COINTEGRATED ****************\n" + '*' * 80)
|
# print('*' * 80 + '\n' + f"**************** {self} IS COINTEGRATED ****************\n" + '*' * 80)
|
||||||
self.fit_VECM()
|
self.fit_VECM()
|
||||||
assert self.training_df_ is not None and self.vecm_fit_ is not None
|
assert self.training_df_ is not None and self.vecm_fit_ is not None
|
||||||
@ -200,8 +220,6 @@ class TradingPair:
|
|||||||
diseq_series - self.training_mu_
|
diseq_series - self.training_mu_
|
||||||
) / self.training_std_
|
) / self.training_std_
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def add_trades(self, trades: pd.DataFrame) -> None:
|
def add_trades(self, trades: pd.DataFrame) -> None:
|
||||||
if self.user_data_["trades"] is None or len(self.user_data_["trades"]) == 0:
|
if self.user_data_["trades"] is None or len(self.user_data_["trades"]) == 0:
|
||||||
# If trades is empty or None, just assign the new trades directly
|
# If trades is empty or None, just assign the new trades directly
|
||||||
@ -286,6 +304,45 @@ class TradingPair:
|
|||||||
self.predicted_df_ = self.predicted_df_.reset_index(drop=True)
|
self.predicted_df_ = self.predicted_df_.reset_index(drop=True)
|
||||||
return self.predicted_df_
|
return self.predicted_df_
|
||||||
|
|
||||||
|
def cointegration_check(self) -> Optional[pd.DataFrame]:
|
||||||
|
print(f"***{self}*** STARTING....")
|
||||||
|
config = self.config_
|
||||||
|
|
||||||
|
curr_training_start_idx = 0
|
||||||
|
|
||||||
|
COINTEGRATION_DATA_COLUMNS = {
|
||||||
|
"tstamp" : "datetime64[ns]",
|
||||||
|
"pair" : "string",
|
||||||
|
"eg_pvalue" : "float64",
|
||||||
|
"johansen_lr1" : "float64",
|
||||||
|
"johansen_cvt" : "float64",
|
||||||
|
"eg_is_cointegrated" : "bool",
|
||||||
|
"johansen_is_cointegrated" : "bool",
|
||||||
|
}
|
||||||
|
# Initialize trades DataFrame with proper dtypes to avoid concatenation warnings
|
||||||
|
result: pd.DataFrame = pd.DataFrame(columns=[col for col in COINTEGRATION_DATA_COLUMNS.keys()]) #.astype(COINTEGRATION_DATA_COLUMNS)
|
||||||
|
|
||||||
|
training_minutes = config["training_minutes"]
|
||||||
|
while True:
|
||||||
|
print(curr_training_start_idx, end="\r")
|
||||||
|
self.get_datasets(
|
||||||
|
training_minutes=training_minutes,
|
||||||
|
training_start_index=curr_training_start_idx,
|
||||||
|
testing_size=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(self.training_df_) < training_minutes:
|
||||||
|
print(
|
||||||
|
f"{self}: current offset={curr_training_start_idx}"
|
||||||
|
f" * Training data length={len(self.training_df_)} < {training_minutes}"
|
||||||
|
" * Not enough training data. Completing the job."
|
||||||
|
)
|
||||||
|
break
|
||||||
|
new_row = pd.Series(CointegrationData(self).to_dict())
|
||||||
|
result.loc[len(result)] = new_row
|
||||||
|
curr_training_start_idx += 1
|
||||||
|
return result
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return self.name()
|
return self.name()
|
||||||
|
|
||||||
|
|||||||
126
research/cointegration_test.py
Normal file
126
research/cointegration_test.py
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
import argparse
|
||||||
|
import glob
|
||||||
|
import importlib
|
||||||
|
import os
|
||||||
|
from datetime import date, datetime
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from tools.config import expand_filename, load_config
|
||||||
|
from tools.data_loader import get_available_instruments_from_db, load_market_data
|
||||||
|
from pt_trading.results import (
|
||||||
|
BacktestResult,
|
||||||
|
create_result_database,
|
||||||
|
store_config_in_database,
|
||||||
|
store_results_in_database,
|
||||||
|
)
|
||||||
|
from pt_trading.fit_method import PairsTradingFitMethod
|
||||||
|
from pt_trading.trading_pair import TradingPair
|
||||||
|
|
||||||
|
from research.research_tools import create_pairs, resolve_datafiles
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(description="Run pairs trading backtest.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--config", type=str, required=True, help="Path to the configuration file."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--datafile",
|
||||||
|
type=str,
|
||||||
|
required=False,
|
||||||
|
help="Market data file to process.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--instruments",
|
||||||
|
type=str,
|
||||||
|
required=False,
|
||||||
|
help="Comma-separated list of instrument symbols (e.g., COIN,GBTC). If not provided, auto-detects from database.",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
config: Dict = load_config(args.config)
|
||||||
|
|
||||||
|
# Resolve data files (CLI takes priority over config)
|
||||||
|
datafile = resolve_datafiles(config, args.datafile)[0]
|
||||||
|
|
||||||
|
if not datafile:
|
||||||
|
print("No data files found to process.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Found {datafile} data files to process:")
|
||||||
|
|
||||||
|
# # Create result database if needed
|
||||||
|
# if args.result_db.upper() != "NONE":
|
||||||
|
# args.result_db = expand_filename(args.result_db)
|
||||||
|
# create_result_database(args.result_db)
|
||||||
|
|
||||||
|
# # Initialize a dictionary to store all trade results
|
||||||
|
# all_results: Dict[str, Dict[str, Any]] = {}
|
||||||
|
|
||||||
|
# # Store configuration in database for reference
|
||||||
|
# if args.result_db.upper() != "NONE":
|
||||||
|
# # Get list of all instruments for storage
|
||||||
|
# all_instruments = []
|
||||||
|
# for datafile in datafiles:
|
||||||
|
# if args.instruments:
|
||||||
|
# file_instruments = [
|
||||||
|
# inst.strip() for inst in args.instruments.split(",")
|
||||||
|
# ]
|
||||||
|
# else:
|
||||||
|
# file_instruments = get_available_instruments_from_db(datafile, config)
|
||||||
|
# all_instruments.extend(file_instruments)
|
||||||
|
|
||||||
|
# # Remove duplicates while preserving order
|
||||||
|
# unique_instruments = list(dict.fromkeys(all_instruments))
|
||||||
|
|
||||||
|
# store_config_in_database(
|
||||||
|
# db_path=args.result_db,
|
||||||
|
# config_file_path=args.config,
|
||||||
|
# config=config,
|
||||||
|
# fit_method_class=fit_method_class_name,
|
||||||
|
# datafiles=datafiles,
|
||||||
|
# instruments=unique_instruments,
|
||||||
|
# )
|
||||||
|
|
||||||
|
# Process each data file
|
||||||
|
price_column = config["price_column"]
|
||||||
|
|
||||||
|
print(f"\n====== Processing {os.path.basename(datafile)} ======")
|
||||||
|
|
||||||
|
# Determine instruments to use
|
||||||
|
if args.instruments:
|
||||||
|
# Use CLI-specified instruments
|
||||||
|
instruments = [inst.strip() for inst in args.instruments.split(",")]
|
||||||
|
print(f"Using CLI-specified instruments: {instruments}")
|
||||||
|
else:
|
||||||
|
# Auto-detect instruments from database
|
||||||
|
instruments = get_available_instruments_from_db(datafile, config)
|
||||||
|
print(f"Auto-detected instruments: {instruments}")
|
||||||
|
|
||||||
|
if not instruments:
|
||||||
|
print(f"No instruments found in {datafile}...")
|
||||||
|
return
|
||||||
|
# Process data for this file
|
||||||
|
try:
|
||||||
|
cointegration_data: pd.DataFrame = pd.DataFrame()
|
||||||
|
for pair in create_pairs(datafile, price_column, config, instruments):
|
||||||
|
cointegration_data = pd.concat([cointegration_data, pair.cointegration_check()])
|
||||||
|
|
||||||
|
pd.set_option('display.width', 400)
|
||||||
|
pd.set_option('display.max_colwidth', None)
|
||||||
|
pd.set_option('display.max_columns', None)
|
||||||
|
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
|
||||||
|
print(f"cointegration_data:\n{cointegration_data}")
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
print(f"Error processing {datafile}: {str(err)}")
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
File diff suppressed because one or more lines are too long
@ -1,771 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Pairs Trading Visualization Notebook\n",
|
|
||||||
"\n",
|
|
||||||
"This notebook allows you to visualize pairs trading strategies on individual instrument pairs.\n",
|
|
||||||
"You can examine the relationship between two instruments, their dis-equilibrium, and trading signals."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### 🎯 Key Features:\n",
|
|
||||||
"\n",
|
|
||||||
"1. **Interactive Configuration**: \n",
|
|
||||||
" - Easy switching between CRYPTO and EQUITY configurations\n",
|
|
||||||
" - Simple parameter adjustment for thresholds and training periods\n",
|
|
||||||
"\n",
|
|
||||||
"2. **Single Pair Focus**: \n",
|
|
||||||
" - Instead of running multiple pairs, focuses on one pair at a time\n",
|
|
||||||
" - Allows deep analysis of the relationship between two instruments\n",
|
|
||||||
"\n",
|
|
||||||
"3. **Step-by-Step Visualization**:\n",
|
|
||||||
" - **Raw price data**: Individual prices, normalized comparison, and price ratios\n",
|
|
||||||
" - **Training analysis**: Cointegration testing and VECM model fitting\n",
|
|
||||||
" - **Dis-equilibrium visualization**: Both raw and scaled dis-equilibrium with threshold lines\n",
|
|
||||||
" - **Strategy execution**: Trading signal generation and visualization\n",
|
|
||||||
" - **Prediction analysis**: Actual vs predicted prices with trading signals overlaid\n",
|
|
||||||
"\n",
|
|
||||||
"4. **Rich Analytics**:\n",
|
|
||||||
" - Cointegration status and VECM model details\n",
|
|
||||||
" - Statistical summaries for all stages\n",
|
|
||||||
" - Threshold crossing analysis\n",
|
|
||||||
" - Trading signal breakdown\n",
|
|
||||||
"\n",
|
|
||||||
"5. **Interactive Experimentation**:\n",
|
|
||||||
" - Easy parameter modification\n",
|
|
||||||
" - Re-run capabilities for different configurations\n",
|
|
||||||
" - Support for both StaticFitStrategy and SlidingFitStrategy\n",
|
|
||||||
"\n",
|
|
||||||
"### 🚀 How to Use:\n",
|
|
||||||
"\n",
|
|
||||||
"1. **Start Jupyter**:\n",
|
|
||||||
" ```bash\n",
|
|
||||||
" cd src/notebooks\n",
|
|
||||||
" jupyter notebook pairs_trading_visualization.ipynb\n",
|
|
||||||
" ```\n",
|
|
||||||
"\n",
|
|
||||||
"2. **Customize Your Analysis**:\n",
|
|
||||||
" - Change `SYMBOL_A` and `SYMBOL_B` to your desired trading pair\n",
|
|
||||||
" - Switch between `CRYPTO_CONFIG` and `EQT_CONFIG`\n",
|
|
||||||
" - Only **StaticFitStrategy** is supported. \n",
|
|
||||||
" - Adjust thresholds and parameters as needed\n",
|
|
||||||
"\n",
|
|
||||||
"3. **Run and Visualize**:\n",
|
|
||||||
" - Execute cells step by step to see the analysis unfold\n",
|
|
||||||
" - Rich matplotlib visualizations show relationships and signals\n",
|
|
||||||
" - Comprehensive summary at the end\n",
|
|
||||||
"\n",
|
|
||||||
"The notebook provides exactly what you requested - a way to visualize the relationship between two instruments and their scaled dis-equilibrium, with all the stages of your pairs trading strategy clearly displayed and analyzed.\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Setup and Imports"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Setup complete!\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"import sys\n",
|
|
||||||
"import os\n",
|
|
||||||
"sys.path.append('..')\n",
|
|
||||||
"\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"import matplotlib.pyplot as plt\n",
|
|
||||||
"import seaborn as sns\n",
|
|
||||||
"from typing import Dict, List, Optional\n",
|
|
||||||
"\n",
|
|
||||||
"# Import our modules\n",
|
|
||||||
"from pt_trading.fit_methods import StaticFit, SlidingFit\n",
|
|
||||||
"from tools.data_loader import load_market_data\n",
|
|
||||||
"from pt_trading.trading_pair import TradingPair\n",
|
|
||||||
"from pt_trading.results import BacktestResult\n",
|
|
||||||
"\n",
|
|
||||||
"# Set plotting style\n",
|
|
||||||
"plt.style.use('seaborn-v0_8')\n",
|
|
||||||
"sns.set_palette(\"husl\")\n",
|
|
||||||
"plt.rcParams['figure.figsize'] = (12, 8)\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"Setup complete!\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Configuration"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Using EQUITY configuration\n",
|
|
||||||
"Available instruments: ['COIN', 'GBTC', 'HOOD', 'MSTR', 'PYPL']\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"# Configuration - Choose between CRYPTO_CONFIG or EQT_CONFIG\n",
|
|
||||||
"\n",
|
|
||||||
"CRYPTO_CONFIG = {\n",
|
|
||||||
" \"security_type\": \"CRYPTO\",\n",
|
|
||||||
" \"data_directory\": \"../../data/crypto\",\n",
|
|
||||||
" \"datafiles\": [\n",
|
|
||||||
" \"20250519.mktdata.ohlcv.db\",\n",
|
|
||||||
" ],\n",
|
|
||||||
" \"db_table_name\": \"bnbspot_ohlcv_1min\",\n",
|
|
||||||
" \"exchange_id\": \"BNBSPOT\",\n",
|
|
||||||
" \"instrument_id_pfx\": \"PAIR-\",\n",
|
|
||||||
" \"instruments\": [\n",
|
|
||||||
" \"BTC-USDT\",\n",
|
|
||||||
" \"BCH-USDT\",\n",
|
|
||||||
" \"ETH-USDT\",\n",
|
|
||||||
" \"LTC-USDT\",\n",
|
|
||||||
" \"XRP-USDT\",\n",
|
|
||||||
" \"ADA-USDT\",\n",
|
|
||||||
" \"SOL-USDT\",\n",
|
|
||||||
" \"DOT-USDT\",\n",
|
|
||||||
" ],\n",
|
|
||||||
" \"trading_hours\": {\n",
|
|
||||||
" \"begin_session\": \"00:00:00\",\n",
|
|
||||||
" \"end_session\": \"23:59:00\",\n",
|
|
||||||
" \"timezone\": \"UTC\",\n",
|
|
||||||
" },\n",
|
|
||||||
" \"price_column\": \"close\",\n",
|
|
||||||
" \"min_required_points\": 30,\n",
|
|
||||||
" \"zero_threshold\": 1e-10,\n",
|
|
||||||
" \"dis-equilibrium_open_trshld\": 2.0,\n",
|
|
||||||
" \"dis-equilibrium_close_trshld\": 0.5,\n",
|
|
||||||
" \"training_minutes\": 120,\n",
|
|
||||||
" \"funding_per_pair\": 2000.0,\n",
|
|
||||||
"}\n",
|
|
||||||
"\n",
|
|
||||||
"EQT_CONFIG = {\n",
|
|
||||||
" \"security_type\": \"EQUITY\",\n",
|
|
||||||
" \"data_directory\": \"../../data/equity\",\n",
|
|
||||||
" \"datafiles\": {\n",
|
|
||||||
" \"0508\": \"20250508.alpaca_sim_md.db\",\n",
|
|
||||||
" \"0509\": \"20250509.alpaca_sim_md.db\",\n",
|
|
||||||
" \"0510\": \"20250510.alpaca_sim_md.db\",\n",
|
|
||||||
" \"0511\": \"20250511.alpaca_sim_md.db\",\n",
|
|
||||||
" \"0512\": \"20250512.alpaca_sim_md.db\",\n",
|
|
||||||
" \"0513\": \"20250513.alpaca_sim_md.db\",\n",
|
|
||||||
" \"0514\": \"20250514.alpaca_sim_md.db\",\n",
|
|
||||||
" \"0515\": \"20250515.alpaca_sim_md.db\",\n",
|
|
||||||
" \"0516\": \"20250516.alpaca_sim_md.db\",\n",
|
|
||||||
" \"0517\": \"20250517.alpaca_sim_md.db\",\n",
|
|
||||||
" \"0518\": \"20250518.alpaca_sim_md.db\",\n",
|
|
||||||
" \"0519\": \"20250519.alpaca_sim_md.db\",\n",
|
|
||||||
" \"0520\": \"20250520.alpaca_sim_md.db\",\n",
|
|
||||||
" \"0521\": \"20250521.alpaca_sim_md.db\",\n",
|
|
||||||
" \"0522\": \"20250522.alpaca_sim_md.db\",\n",
|
|
||||||
" },\n",
|
|
||||||
" \"db_table_name\": \"md_1min_bars\",\n",
|
|
||||||
" \"exchange_id\": \"ALPACA\",\n",
|
|
||||||
" \"instrument_id_pfx\": \"STOCK-\",\n",
|
|
||||||
" \"instruments\": [\n",
|
|
||||||
" \"COIN\",\n",
|
|
||||||
" \"GBTC\",\n",
|
|
||||||
" \"HOOD\",\n",
|
|
||||||
" \"MSTR\",\n",
|
|
||||||
" \"PYPL\",\n",
|
|
||||||
" ],\n",
|
|
||||||
" \"trading_hours\": {\n",
|
|
||||||
" \"begin_session\": \"9:30:00\",\n",
|
|
||||||
" \"end_session\": \"16:00:00\",\n",
|
|
||||||
" \"timezone\": \"America/New_York\",\n",
|
|
||||||
" },\n",
|
|
||||||
" \"price_column\": \"close\",\n",
|
|
||||||
" \"min_required_points\": 30,\n",
|
|
||||||
" \"zero_threshold\": 1e-10,\n",
|
|
||||||
" \"dis-equilibrium_open_trshld\": 2.0,\n",
|
|
||||||
" \"dis-equilibrium_close_trshld\": 1.0, #0.5,\n",
|
|
||||||
" \"training_minutes\": 120,\n",
|
|
||||||
" \"funding_per_pair\": 2000.0,\n",
|
|
||||||
"}\n",
|
|
||||||
"\n",
|
|
||||||
"# Choose your configuration\n",
|
|
||||||
"CONFIG = EQT_CONFIG # Change to CRYPTO_CONFIG if you want to use crypto data\n",
|
|
||||||
"\n",
|
|
||||||
"print(f\"Using {CONFIG['security_type']} configuration\")\n",
|
|
||||||
"print(f\"Available instruments: {CONFIG['instruments']}\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Select Trading Pair and Data File"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Selected pair: COIN & GBTC\n",
|
|
||||||
"Data file: 20250509.alpaca_sim_md.db\n",
|
|
||||||
"Strategy: StaticFitStrategy\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"# Select your trading pair and strategy\n",
|
|
||||||
"SYMBOL_A = \"COIN\" # Change these to your desired symbols\n",
|
|
||||||
"SYMBOL_B = \"GBTC\"\n",
|
|
||||||
"DATA_FILE = CONFIG[\"datafiles\"][\"0509\"]\n",
|
|
||||||
"\n",
|
|
||||||
"# Choose strategy\n",
|
|
||||||
"FIT_METHOD = StaticFit()\n",
|
|
||||||
"\n",
|
|
||||||
"print(f\"Selected pair: {SYMBOL_A} & {SYMBOL_B}\")\n",
|
|
||||||
"print(f\"Data file: {DATA_FILE}\")\n",
|
|
||||||
"print(f\"Strategy: {type(FIT_METHOD).__name__}\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Load Market Data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 5,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Current working directory: /home/oleg/devel/pairs_trading/src/notebooks\n",
|
|
||||||
"Loading data from: ../../data/equity/20250509.alpaca_sim_md.db\n",
|
|
||||||
"Error: Execution failed on sql 'select tstamp, tstamp_ns as time_ns, substr(instrument_id, 7) as symbol, open, high, low, close, volume, num_trades, vwap from md_1min_bars where exchange_id ='ALPACA' and instrument_id in (\"STOCK-COIN\",\"STOCK-GBTC\",\"STOCK-HOOD\",\"STOCK-MSTR\",\"STOCK-PYPL\")': no such table: md_1min_bars\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"ename": "Exception",
|
|
||||||
"evalue": "",
|
|
||||||
"output_type": "error",
|
|
||||||
"traceback": [
|
|
||||||
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
|
||||||
"\u001b[31mOperationalError\u001b[39m Traceback (most recent call last)",
|
|
||||||
"\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/python3.12-venv/lib/python3.12/site-packages/pandas/io/sql.py:2664\u001b[39m, in \u001b[36mSQLiteDatabase.execute\u001b[39m\u001b[34m(self, sql, params)\u001b[39m\n\u001b[32m 2663\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m2664\u001b[39m \u001b[43mcur\u001b[49m\u001b[43m.\u001b[49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2665\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m cur\n",
|
|
||||||
"\u001b[31mOperationalError\u001b[39m: no such table: md_1min_bars",
|
|
||||||
"\nThe above exception was the direct cause of the following exception:\n",
|
|
||||||
"\u001b[31mDatabaseError\u001b[39m Traceback (most recent call last)",
|
|
||||||
"\u001b[36mFile \u001b[39m\u001b[32m~/devel/pairs_trading/src/notebooks/../tools/data_loader.py:11\u001b[39m, in \u001b[36mload_sqlite_to_dataframe\u001b[39m\u001b[34m(db_path, query)\u001b[39m\n\u001b[32m 9\u001b[39m conn = sqlite3.connect(db_path)\n\u001b[32m---> \u001b[39m\u001b[32m11\u001b[39m df = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_sql_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 12\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m df\n",
|
|
||||||
"\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/python3.12-venv/lib/python3.12/site-packages/pandas/io/sql.py:528\u001b[39m, in \u001b[36mread_sql_query\u001b[39m\u001b[34m(sql, con, index_col, coerce_float, params, parse_dates, chunksize, dtype, dtype_backend)\u001b[39m\n\u001b[32m 527\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m pandasSQL_builder(con) \u001b[38;5;28;01mas\u001b[39;00m pandas_sql:\n\u001b[32m--> \u001b[39m\u001b[32m528\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpandas_sql\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_query\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 529\u001b[39m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 530\u001b[39m \u001b[43m \u001b[49m\u001b[43mindex_col\u001b[49m\u001b[43m=\u001b[49m\u001b[43mindex_col\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 531\u001b[39m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m=\u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 532\u001b[39m \u001b[43m \u001b[49m\u001b[43mcoerce_float\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcoerce_float\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 533\u001b[39m \u001b[43m \u001b[49m\u001b[43mparse_dates\u001b[49m\u001b[43m=\u001b[49m\u001b[43mparse_dates\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 534\u001b[39m \u001b[43m \u001b[49m\u001b[43mchunksize\u001b[49m\u001b[43m=\u001b[49m\u001b[43mchunksize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 535\u001b[39m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 536\u001b[39m \u001b[43m \u001b[49m\u001b[43mdtype_backend\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdtype_backend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 537\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
|
||||||
"\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/python3.12-venv/lib/python3.12/site-packages/pandas/io/sql.py:2728\u001b[39m, in \u001b[36mSQLiteDatabase.read_query\u001b[39m\u001b[34m(self, sql, index_col, coerce_float, parse_dates, params, chunksize, dtype, dtype_backend)\u001b[39m\n\u001b[32m 2717\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mread_query\u001b[39m(\n\u001b[32m 2718\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m 2719\u001b[39m sql,\n\u001b[32m (...)\u001b[39m\u001b[32m 2726\u001b[39m dtype_backend: DtypeBackend | Literal[\u001b[33m\"\u001b[39m\u001b[33mnumpy\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[33m\"\u001b[39m\u001b[33mnumpy\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 2727\u001b[39m ) -> DataFrame | Iterator[DataFrame]:\n\u001b[32m-> \u001b[39m\u001b[32m2728\u001b[39m cursor = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2729\u001b[39m columns = [col_desc[\u001b[32m0\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m col_desc \u001b[38;5;129;01min\u001b[39;00m cursor.description]\n",
|
|
||||||
"\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/python3.12-venv/lib/python3.12/site-packages/pandas/io/sql.py:2676\u001b[39m, in \u001b[36mSQLiteDatabase.execute\u001b[39m\u001b[34m(self, sql, params)\u001b[39m\n\u001b[32m 2675\u001b[39m ex = DatabaseError(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mExecution failed on sql \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msql\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexc\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m-> \u001b[39m\u001b[32m2676\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m ex \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mexc\u001b[39;00m\n",
|
|
||||||
"\u001b[31mDatabaseError\u001b[39m: Execution failed on sql 'select tstamp, tstamp_ns as time_ns, substr(instrument_id, 7) as symbol, open, high, low, close, volume, num_trades, vwap from md_1min_bars where exchange_id ='ALPACA' and instrument_id in (\"STOCK-COIN\",\"STOCK-GBTC\",\"STOCK-HOOD\",\"STOCK-MSTR\",\"STOCK-PYPL\")': no such table: md_1min_bars",
|
|
||||||
"\nThe above exception was the direct cause of the following exception:\n",
|
|
||||||
"\u001b[31mException\u001b[39m Traceback (most recent call last)",
|
|
||||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 6\u001b[39m\n\u001b[32m 3\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mCurrent working directory: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mos.getcwd()\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 4\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mLoading data from: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdatafile_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m----> \u001b[39m\u001b[32m6\u001b[39m market_data_df = \u001b[43mload_market_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdatafile_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m=\u001b[49m\u001b[43mCONFIG\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 8\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mLoaded \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(market_data_df)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m rows of market data\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 9\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mSymbols in data: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmarket_data_df[\u001b[33m'\u001b[39m\u001b[33msymbol\u001b[39m\u001b[33m'\u001b[39m].unique()\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n",
|
|
||||||
"\u001b[36mFile \u001b[39m\u001b[32m~/devel/pairs_trading/src/notebooks/../tools/data_loader.py:69\u001b[39m, in \u001b[36mload_market_data\u001b[39m\u001b[34m(datafile, config)\u001b[39m\n\u001b[32m 66\u001b[39m query += \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m where exchange_id =\u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexchange_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 67\u001b[39m query += \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m and instrument_id in (\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m,\u001b[39m\u001b[33m'\u001b[39m.join(instrument_ids)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m)\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m---> \u001b[39m\u001b[32m69\u001b[39m df = \u001b[43mload_sqlite_to_dataframe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdb_path\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdatafile\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m=\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 71\u001b[39m \u001b[38;5;66;03m# Trading Hours\u001b[39;00m\n\u001b[32m 72\u001b[39m date_str = df[\u001b[33m\"\u001b[39m\u001b[33mtstamp\u001b[39m\u001b[33m\"\u001b[39m][\u001b[32m0\u001b[39m][\u001b[32m0\u001b[39m:\u001b[32m10\u001b[39m]\n",
|
|
||||||
"\u001b[36mFile \u001b[39m\u001b[32m~/devel/pairs_trading/src/notebooks/../tools/data_loader.py:18\u001b[39m, in \u001b[36mload_sqlite_to_dataframe\u001b[39m\u001b[34m(db_path, query)\u001b[39m\n\u001b[32m 16\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m excpt:\n\u001b[32m 17\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexcpt\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m18\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m() \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mexcpt\u001b[39;00m\n\u001b[32m 19\u001b[39m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[32m 20\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mconn\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mlocals\u001b[39m():\n",
|
|
||||||
"\u001b[31mException\u001b[39m: "
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"# Load market data\n",
|
|
||||||
"datafile_path = f\"{CONFIG['data_directory']}/{DATA_FILE}\"\n",
|
|
||||||
"print(f\"Current working directory: {os.getcwd()}\")\n",
|
|
||||||
"print(f\"Loading data from: {datafile_path}\")\n",
|
|
||||||
"\n",
|
|
||||||
"market_data_df = load_market_data(datafile_path, config=CONFIG)\n",
|
|
||||||
"\n",
|
|
||||||
"print(f\"Loaded {len(market_data_df)} rows of market data\")\n",
|
|
||||||
"print(f\"Symbols in data: {market_data_df['symbol'].unique()}\")\n",
|
|
||||||
"print(f\"Time range: {market_data_df['tstamp'].min()} to {market_data_df['tstamp'].max()}\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Display first few rows\n",
|
|
||||||
"market_data_df.head()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Create Trading Pair and Analyze"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Create trading pair\n",
|
|
||||||
"pair = TradingPair(\n",
|
|
||||||
" market_data=market_data_df,\n",
|
|
||||||
" symbol_a=SYMBOL_A,\n",
|
|
||||||
" symbol_b=SYMBOL_B,\n",
|
|
||||||
" price_column=CONFIG[\"price_column\"]\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"print(f\"Created trading pair: {pair}\")\n",
|
|
||||||
"print(f\"Market data shape: {pair.market_data_.shape}\")\n",
|
|
||||||
"print(f\"Column names: {pair.colnames()}\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Display first few rows of pair data\n",
|
|
||||||
"pair.market_data_.head()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Split Data into Training and Testing"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Get training and testing datasets\n",
|
|
||||||
"training_minutes = CONFIG[\"training_minutes\"]\n",
|
|
||||||
"pair.get_datasets(training_minutes=training_minutes)\n",
|
|
||||||
"\n",
|
|
||||||
"print(f\"Training data: {len(pair.training_df_)} rows\")\n",
|
|
||||||
"print(f\"Testing data: {len(pair.testing_df_)} rows\")\n",
|
|
||||||
"print(f\"Training period: {pair.training_df_['tstamp'].iloc[0]} to {pair.training_df_['tstamp'].iloc[-1]}\")\n",
|
|
||||||
"print(f\"Testing period: {pair.testing_df_['tstamp'].iloc[0]} to {pair.testing_df_['tstamp'].iloc[-1]}\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Check for any missing data\n",
|
|
||||||
"print(f\"Training data null values: {pair.training_df_.isnull().sum().sum()}\")\n",
|
|
||||||
"print(f\"Testing data null values: {pair.testing_df_.isnull().sum().sum()}\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Visualize Raw Price Data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Plot raw price data\n",
|
|
||||||
"fig, axes = plt.subplots(3, 1, figsize=(15, 12))\n",
|
|
||||||
"\n",
|
|
||||||
"# Combined price plot\n",
|
|
||||||
"colname_a, colname_b = pair.colnames()\n",
|
|
||||||
"all_data = pd.concat([pair.training_df_, pair.testing_df_]).reset_index(drop=True)\n",
|
|
||||||
"\n",
|
|
||||||
"# Plot individual prices\n",
|
|
||||||
"axes[0].plot(all_data['tstamp'], all_data[colname_a], label=f'{SYMBOL_A}', alpha=0.8)\n",
|
|
||||||
"axes[0].plot(all_data['tstamp'], all_data[colname_b], label=f'{SYMBOL_B}', alpha=0.8)\n",
|
|
||||||
"axes[0].axvline(x=pair.training_df_['tstamp'].iloc[-1], color='red', linestyle='--', alpha=0.7, label='Train/Test Split')\n",
|
|
||||||
"axes[0].set_title(f'Price Comparison: {SYMBOL_A} vs {SYMBOL_B}')\n",
|
|
||||||
"axes[0].set_ylabel('Price')\n",
|
|
||||||
"axes[0].legend()\n",
|
|
||||||
"axes[0].grid(True)\n",
|
|
||||||
"\n",
|
|
||||||
"# Normalized prices for comparison\n",
|
|
||||||
"norm_a = all_data[colname_a] / all_data[colname_a].iloc[0]\n",
|
|
||||||
"norm_b = all_data[colname_b] / all_data[colname_b].iloc[0]\n",
|
|
||||||
"\n",
|
|
||||||
"axes[1].plot(all_data['tstamp'], norm_a, label=f'{SYMBOL_A} (normalized)', alpha=0.8)\n",
|
|
||||||
"axes[1].plot(all_data['tstamp'], norm_b, label=f'{SYMBOL_B} (normalized)', alpha=0.8)\n",
|
|
||||||
"axes[1].axvline(x=pair.training_df_['tstamp'].iloc[-1], color='red', linestyle='--', alpha=0.7, label='Train/Test Split')\n",
|
|
||||||
"axes[1].set_title('Normalized Price Comparison')\n",
|
|
||||||
"axes[1].set_ylabel('Normalized Price')\n",
|
|
||||||
"axes[1].legend()\n",
|
|
||||||
"axes[1].grid(True)\n",
|
|
||||||
"\n",
|
|
||||||
"# Price ratio\n",
|
|
||||||
"price_ratio = all_data[colname_a] / all_data[colname_b]\n",
|
|
||||||
"axes[2].plot(all_data['tstamp'], price_ratio, label=f'{SYMBOL_A}/{SYMBOL_B} Ratio', color='green', alpha=0.8)\n",
|
|
||||||
"axes[2].axvline(x=pair.training_df_['tstamp'].iloc[-1], color='red', linestyle='--', alpha=0.7, label='Train/Test Split')\n",
|
|
||||||
"axes[2].set_title('Price Ratio')\n",
|
|
||||||
"axes[2].set_ylabel('Ratio')\n",
|
|
||||||
"axes[2].set_xlabel('Time')\n",
|
|
||||||
"axes[2].legend()\n",
|
|
||||||
"axes[2].grid(True)\n",
|
|
||||||
"\n",
|
|
||||||
"plt.tight_layout()\n",
|
|
||||||
"plt.show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train the Pair and Check Cointegration"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Train the pair and check cointegration\n",
|
|
||||||
"try:\n",
|
|
||||||
" is_cointegrated = pair.train_pair()\n",
|
|
||||||
" print(f\"Pair {pair} cointegration status: {is_cointegrated}\")\n",
|
|
||||||
"\n",
|
|
||||||
" if is_cointegrated:\n",
|
|
||||||
" print(f\"VECM Beta coefficients: {pair.vecm_fit_.beta.flatten()}\")\n",
|
|
||||||
" print(f\"Training dis-equilibrium mean: {pair.training_mu_:.6f}\")\n",
|
|
||||||
" print(f\"Training dis-equilibrium std: {pair.training_std_:.6f}\")\n",
|
|
||||||
"\n",
|
|
||||||
" # Display VECM summary\n",
|
|
||||||
" print(\"\\nVECM Model Summary:\")\n",
|
|
||||||
" print(pair.vecm_fit_.summary())\n",
|
|
||||||
" else:\n",
|
|
||||||
" print(\"Pair is not cointegrated. Cannot proceed with strategy.\")\n",
|
|
||||||
"\n",
|
|
||||||
"except Exception as e:\n",
|
|
||||||
" print(f\"Training failed: {str(e)}\")\n",
|
|
||||||
" is_cointegrated = False"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Visualize Training Period Dis-equilibrium"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if is_cointegrated:\n",
|
|
||||||
" # fig, axes = plt.subplots(, 1, figsize=(15, 10))\n",
|
|
||||||
"\n",
|
|
||||||
" # # Raw dis-equilibrium\n",
|
|
||||||
" # axes[0].plot(pair.training_df_['tstamp'], pair.training_df_['dis-equilibrium'],\n",
|
|
||||||
" # color='blue', alpha=0.8, label='Raw Dis-equilibrium')\n",
|
|
||||||
" # axes[0].axhline(y=pair.training_mu_, color='red', linestyle='--', alpha=0.7, label='Mean')\n",
|
|
||||||
" # axes[0].axhline(y=pair.training_mu_ + pair.training_std_, color='orange', linestyle='--', alpha=0.5, label='+1 Std')\n",
|
|
||||||
" # axes[0].axhline(y=pair.training_mu_ - pair.training_std_, color='orange', linestyle='--', alpha=0.5, label='-1 Std')\n",
|
|
||||||
" # axes[0].set_title('Training Period: Raw Dis-equilibrium')\n",
|
|
||||||
" # axes[0].set_ylabel('Dis-equilibrium')\n",
|
|
||||||
" # axes[0].legend()\n",
|
|
||||||
" # axes[0].grid(True)\n",
|
|
||||||
"\n",
|
|
||||||
" # Scaled dis-equilibrium\n",
|
|
||||||
" fig, axes = plt.subplots(1, 1, figsize=(15, 5))\n",
|
|
||||||
" axes.plot(pair.training_df_['tstamp'], pair.training_df_['scaled_dis-equilibrium'],\n",
|
|
||||||
" color='green', alpha=0.8, label='Scaled Dis-equilibrium')\n",
|
|
||||||
" axes.axhline(y=0, color='red', linestyle='--', alpha=0.7, label='Mean (0)')\n",
|
|
||||||
" axes.axhline(y=1, color='orange', linestyle='--', alpha=0.5, label='+1 Std')\n",
|
|
||||||
" axes.axhline(y=-1, color='orange', linestyle='--', alpha=0.5, label='-1 Std')\n",
|
|
||||||
" axes.axhline(y=CONFIG['dis-equilibrium_open_trshld'], color='purple',\n",
|
|
||||||
" linestyle=':', alpha=0.7, label=f\"Open Threshold ({CONFIG['dis-equilibrium_open_trshld']})\")\n",
|
|
||||||
" axes.axhline(y=CONFIG['dis-equilibrium_close_trshld'], color='brown',\n",
|
|
||||||
" linestyle=':', alpha=0.7, label=f\"Close Threshold ({CONFIG['dis-equilibrium_close_trshld']})\")\n",
|
|
||||||
" axes.set_title('Training Period: Scaled Dis-equilibrium')\n",
|
|
||||||
" axes.set_ylabel('Scaled Dis-equilibrium')\n",
|
|
||||||
" axes.set_xlabel('Time')\n",
|
|
||||||
" axes.legend()\n",
|
|
||||||
" axes.grid(True)\n",
|
|
||||||
"\n",
|
|
||||||
" plt.tight_layout()\n",
|
|
||||||
" plt.show()\n",
|
|
||||||
"\n",
|
|
||||||
" # Print statistics\n",
|
|
||||||
" print(f\"Training dis-equilibrium statistics:\")\n",
|
|
||||||
" print(f\" Mean: {pair.training_df_['dis-equilibrium'].mean():.6f}\")\n",
|
|
||||||
" print(f\" Std: {pair.training_df_['dis-equilibrium'].std():.6f}\")\n",
|
|
||||||
" print(f\" Min: {pair.training_df_['dis-equilibrium'].min():.6f}\")\n",
|
|
||||||
" print(f\" Max: {pair.training_df_['dis-equilibrium'].max():.6f}\")\n",
|
|
||||||
"\n",
|
|
||||||
" print(f\"\\nScaled dis-equilibrium statistics:\")\n",
|
|
||||||
" print(f\" Mean: {pair.training_df_['scaled_dis-equilibrium'].mean():.6f}\")\n",
|
|
||||||
" print(f\" Std: {pair.training_df_['scaled_dis-equilibrium'].std():.6f}\")\n",
|
|
||||||
" print(f\" Min: {pair.training_df_['scaled_dis-equilibrium'].min():.6f}\")\n",
|
|
||||||
" print(f\" Max: {pair.training_df_['scaled_dis-equilibrium'].max():.6f}\")\n",
|
|
||||||
"else:\n",
|
|
||||||
" print(\"The pair is not cointegrated\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Generate Predictions and Run Strategy"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if is_cointegrated:\n",
|
|
||||||
" try:\n",
|
|
||||||
" # Generate predictions\n",
|
|
||||||
" pair.predict()\n",
|
|
||||||
" print(f\"Generated predictions for {len(pair.predicted_df_)} rows\")\n",
|
|
||||||
"\n",
|
|
||||||
" # Display prediction data structure\n",
|
|
||||||
" print(f\"Prediction columns: {list(pair.predicted_df_.columns)}\")\n",
|
|
||||||
" print(f\"Prediction period: {pair.predicted_df_['tstamp'].iloc[0]} to {pair.predicted_df_['tstamp'].iloc[-1]}\")\n",
|
|
||||||
"\n",
|
|
||||||
" # Run strategy\n",
|
|
||||||
" bt_result = BacktestResult(config=CONFIG)\n",
|
|
||||||
" pair_trades = FIT_METHOD.run_pair(config=CONFIG, pair=pair, bt_result=bt_result)\n",
|
|
||||||
"\n",
|
|
||||||
" if pair_trades is not None and len(pair_trades) > 0:\n",
|
|
||||||
" print(f\"\\nGenerated {len(pair_trades)} trading signals:\")\n",
|
|
||||||
" print(pair_trades)\n",
|
|
||||||
" else:\n",
|
|
||||||
" print(\"\\nNo trading signals generated\")\n",
|
|
||||||
"\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" print(f\"Prediction/Strategy failed: {str(e)}\")\n",
|
|
||||||
" pair_trades = None"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Visualize Predictions and Dis-equilibrium"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if is_cointegrated and hasattr(pair, 'predicted_df_'):\n",
|
|
||||||
" fig, axes = plt.subplots(4, 1, figsize=(16, 16))\n",
|
|
||||||
"\n",
|
|
||||||
" # Actual vs Predicted Prices\n",
|
|
||||||
" colname_a, colname_b = pair.colnames()\n",
|
|
||||||
"\n",
|
|
||||||
" axes[0].plot(pair.predicted_df_['tstamp'], pair.predicted_df_[colname_a],\n",
|
|
||||||
" label=f'{SYMBOL_A} Actual', alpha=0.8)\n",
|
|
||||||
" axes[0].plot(pair.predicted_df_['tstamp'], pair.predicted_df_[f'{colname_a}_pred'],\n",
|
|
||||||
" label=f'{SYMBOL_A} Predicted', alpha=0.8, linestyle='--')\n",
|
|
||||||
" axes[0].set_title('Actual vs Predicted Prices - Symbol A')\n",
|
|
||||||
" axes[0].set_ylabel('Price')\n",
|
|
||||||
" axes[0].legend()\n",
|
|
||||||
" axes[0].grid(True)\n",
|
|
||||||
"\n",
|
|
||||||
" axes[1].plot(pair.predicted_df_['tstamp'], pair.predicted_df_[colname_b],\n",
|
|
||||||
" label=f'{SYMBOL_B} Actual', alpha=0.8)\n",
|
|
||||||
" axes[1].plot(pair.predicted_df_['tstamp'], pair.predicted_df_[f'{colname_b}_pred'],\n",
|
|
||||||
" label=f'{SYMBOL_B} Predicted', alpha=0.8, linestyle='--')\n",
|
|
||||||
" axes[1].set_title('Actual vs Predicted Prices - Symbol B')\n",
|
|
||||||
" axes[1].set_ylabel('Price')\n",
|
|
||||||
" axes[1].legend()\n",
|
|
||||||
" axes[1].grid(True)\n",
|
|
||||||
"\n",
|
|
||||||
" # Raw dis-equilibrium\n",
|
|
||||||
" axes[2].plot(pair.predicted_df_['tstamp'], pair.predicted_df_['disequilibrium'],\n",
|
|
||||||
" color='blue', alpha=0.8, label='Dis-equilibrium')\n",
|
|
||||||
" axes[2].axhline(y=pair.training_mu_, color='red', linestyle='--', alpha=0.7, label='Training Mean')\n",
|
|
||||||
" axes[2].set_title('Testing Period: Raw Dis-equilibrium')\n",
|
|
||||||
" axes[2].set_ylabel('Dis-equilibrium')\n",
|
|
||||||
" axes[2].legend()\n",
|
|
||||||
" axes[2].grid(True)\n",
|
|
||||||
"\n",
|
|
||||||
" # Scaled dis-equilibrium with trading signals\n",
|
|
||||||
" axes[3].plot(pair.predicted_df_['tstamp'], pair.predicted_df_['scaled_disequilibrium'],\n",
|
|
||||||
" color='green', alpha=0.8, label='Scaled Dis-equilibrium')\n",
|
|
||||||
"\n",
|
|
||||||
" # Add threshold lines\n",
|
|
||||||
" axes[3].axhline(y=CONFIG['dis-equilibrium_open_trshld'], color='purple',\n",
|
|
||||||
" linestyle=':', alpha=0.7, label=f\"Open Threshold ({CONFIG['dis-equilibrium_open_trshld']})\")\n",
|
|
||||||
" axes[3].axhline(y=CONFIG['dis-equilibrium_close_trshld'], color='brown',\n",
|
|
||||||
" linestyle=':', alpha=0.7, label=f\"Close Threshold ({CONFIG['dis-equilibrium_close_trshld']})\")\n",
|
|
||||||
"\n",
|
|
||||||
" # Add trading signals if they exist\n",
|
|
||||||
" if pair_trades is not None and len(pair_trades) > 0:\n",
|
|
||||||
" for _, trade in pair_trades.iterrows():\n",
|
|
||||||
" color = 'red' if 'BUY' in trade['action'] else 'blue'\n",
|
|
||||||
" marker = '^' if 'BUY' in trade['action'] else 'v'\n",
|
|
||||||
" axes[3].scatter(trade['time'], trade['scaled_disequilibrium'],\n",
|
|
||||||
" color=color, marker=marker, s=100, alpha=0.8,\n",
|
|
||||||
" label=f\"{trade['action']} {trade['symbol']}\" if _ < 2 else \"\")\n",
|
|
||||||
"\n",
|
|
||||||
" axes[3].set_title('Testing Period: Scaled Dis-equilibrium with Trading Signals')\n",
|
|
||||||
" axes[3].set_ylabel('Scaled Dis-equilibrium')\n",
|
|
||||||
" axes[3].set_xlabel('Time')\n",
|
|
||||||
" axes[3].legend()\n",
|
|
||||||
" axes[3].grid(True)\n",
|
|
||||||
"\n",
|
|
||||||
" plt.tight_layout()\n",
|
|
||||||
" plt.show()\n",
|
|
||||||
"\n",
|
|
||||||
" # Print prediction statistics\n",
|
|
||||||
" print(f\"\\nTesting dis-equilibrium statistics:\")\n",
|
|
||||||
" print(f\" Mean: {pair.predicted_df_['disequilibrium'].mean():.6f}\")\n",
|
|
||||||
" print(f\" Std: {pair.predicted_df_['disequilibrium'].std():.6f}\")\n",
|
|
||||||
" print(f\" Min: {pair.predicted_df_['disequilibrium'].min():.6f}\")\n",
|
|
||||||
" print(f\" Max: {pair.predicted_df_['disequilibrium'].max():.6f}\")\n",
|
|
||||||
"\n",
|
|
||||||
" print(f\"\\nTesting scaled dis-equilibrium statistics:\")\n",
|
|
||||||
" print(f\" Mean: {pair.predicted_df_['scaled_disequilibrium'].mean():.6f}\")\n",
|
|
||||||
" print(f\" Std: {pair.predicted_df_['scaled_disequilibrium'].std():.6f}\")\n",
|
|
||||||
" print(f\" Min: {pair.predicted_df_['scaled_disequilibrium'].min():.6f}\")\n",
|
|
||||||
" print(f\" Max: {pair.predicted_df_['scaled_disequilibrium'].max():.6f}\")\n",
|
|
||||||
"\n",
|
|
||||||
" # Count threshold crossings\n",
|
|
||||||
" open_crossings = (pair.predicted_df_['scaled_disequilibrium'] >= CONFIG['dis-equilibrium_open_trshld']).sum()\n",
|
|
||||||
" close_crossings = (pair.predicted_df_['scaled_disequilibrium'] <= CONFIG['dis-equilibrium_close_trshld']).sum()\n",
|
|
||||||
" print(f\"\\nThreshold crossings:\")\n",
|
|
||||||
" print(f\" Open threshold ({CONFIG['dis-equilibrium_open_trshld']}): {open_crossings} times\")\n",
|
|
||||||
" print(f\" Close threshold ({CONFIG['dis-equilibrium_close_trshld']}): {close_crossings} times\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Summary and Analysis"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"print(\"=\" * 60)\n",
|
|
||||||
"print(\"PAIRS TRADING ANALYSIS SUMMARY\")\n",
|
|
||||||
"print(\"=\" * 60)\n",
|
|
||||||
"\n",
|
|
||||||
"print(f\"\\nPair: {SYMBOL_A} & {SYMBOL_B}\")\n",
|
|
||||||
"print(f\"Strategy: {type(FIT_METHOD).__name__}\")\n",
|
|
||||||
"print(f\"Data file: {DATA_FILE}\")\n",
|
|
||||||
"print(f\"Training period: {training_minutes} minutes\")\n",
|
|
||||||
"\n",
|
|
||||||
"print(f\"\\nCointegration Status: {'✓ COINTEGRATED' if is_cointegrated else '✗ NOT COINTEGRATED'}\")\n",
|
|
||||||
"\n",
|
|
||||||
"if is_cointegrated:\n",
|
|
||||||
" print(f\"\\nVECM Model:\")\n",
|
|
||||||
" print(f\" Beta coefficients: {pair.vecm_fit_.beta.flatten()}\")\n",
|
|
||||||
" print(f\" Training mean: {pair.training_mu_:.6f}\")\n",
|
|
||||||
" print(f\" Training std: {pair.training_std_:.6f}\")\n",
|
|
||||||
"\n",
|
|
||||||
" if pair_trades is not None and len(pair_trades) > 0:\n",
|
|
||||||
" print(f\"\\nTrading Signals: {len(pair_trades)} generated\")\n",
|
|
||||||
" unique_times = pair_trades['time'].unique()\n",
|
|
||||||
" print(f\" Unique trade times: {len(unique_times)}\")\n",
|
|
||||||
"\n",
|
|
||||||
" # Group by time to see paired trades\n",
|
|
||||||
" for trade_time in unique_times:\n",
|
|
||||||
" trades_at_time = pair_trades[pair_trades['time'] == trade_time]\n",
|
|
||||||
" print(f\"\\n Trade at {trade_time}:\")\n",
|
|
||||||
" for _, trade in trades_at_time.iterrows():\n",
|
|
||||||
" print(f\" {trade['action']} {trade['symbol']} @ ${trade['price']:.2f} (dis-eq: {trade['scaled_disequilibrium']:.2f})\")\n",
|
|
||||||
" else:\n",
|
|
||||||
" print(f\"\\nTrading Signals: None generated\")\n",
|
|
||||||
" print(\" Possible reasons:\")\n",
|
|
||||||
" print(\" - Dis-equilibrium never exceeded open threshold\")\n",
|
|
||||||
" print(\" - Insufficient testing data\")\n",
|
|
||||||
" print(\" - Strategy-specific conditions not met\")\n",
|
|
||||||
"\n",
|
|
||||||
"else:\n",
|
|
||||||
" print(\"\\nCannot proceed with trading strategy - pair is not cointegrated\")\n",
|
|
||||||
" print(\"Consider:\")\n",
|
|
||||||
" print(\" - Trying different symbol pairs\")\n",
|
|
||||||
" print(\" - Adjusting training period length\")\n",
|
|
||||||
" print(\" - Using different data timeframe\")\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"\\n\" + \"=\" * 60)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Interactive Analysis (Optional)\n",
|
|
||||||
"\n",
|
|
||||||
"You can modify the parameters below and re-run the analysis:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Interactive parameter adjustment\n",
|
|
||||||
"print(\"Current parameters:\")\n",
|
|
||||||
"print(f\" Open threshold: {CONFIG['dis-equilibrium_open_trshld']}\")\n",
|
|
||||||
"print(f\" Close threshold: {CONFIG['dis-equilibrium_close_trshld']}\")\n",
|
|
||||||
"print(f\" Training minutes: {CONFIG['training_minutes']}\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Uncomment and modify these to experiment:\n",
|
|
||||||
"# CONFIG['dis-equilibrium_open_trshld'] = 1.5\n",
|
|
||||||
"# CONFIG['dis-equilibrium_close_trshld'] = 0.3\n",
|
|
||||||
"# CONFIG['training_minutes'] = 180\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"\\nTo re-run with different parameters:\")\n",
|
|
||||||
"print(\"1. Modify the parameters above\")\n",
|
|
||||||
"print(\"2. Re-run from the 'Split Data into Training and Testing' cell\")\n",
|
|
||||||
"print(\"3. Or try different symbol pairs by changing SYMBOL_A and SYMBOL_B\")"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "python3.12-venv",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.12.9"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 4
|
|
||||||
}
|
|
||||||
@ -7,6 +7,7 @@ from typing import Any, Dict, List, Optional
|
|||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
from research.research_tools import create_pairs
|
||||||
from tools.config import expand_filename, load_config
|
from tools.config import expand_filename, load_config
|
||||||
from tools.data_loader import get_available_instruments_from_db, load_market_data
|
from tools.data_loader import get_available_instruments_from_db, load_market_data
|
||||||
from pt_trading.results import (
|
from pt_trading.results import (
|
||||||
@ -70,31 +71,8 @@ def run_backtest(
|
|||||||
"""
|
"""
|
||||||
bt_result: BacktestResult = BacktestResult(config=config)
|
bt_result: BacktestResult = BacktestResult(config=config)
|
||||||
|
|
||||||
def _create_pairs(config: Dict, instruments: List[str]) -> List[TradingPair]:
|
|
||||||
nonlocal datafile
|
|
||||||
all_indexes = range(len(instruments))
|
|
||||||
unique_index_pairs = [(i, j) for i in all_indexes for j in all_indexes if i < j]
|
|
||||||
pairs = []
|
|
||||||
|
|
||||||
# Update config to use the specified instruments
|
|
||||||
config_copy = config.copy()
|
|
||||||
config_copy["instruments"] = instruments
|
|
||||||
|
|
||||||
market_data_df = load_market_data(datafile, config=config_copy)
|
|
||||||
|
|
||||||
for a_index, b_index in unique_index_pairs:
|
|
||||||
pair = TradingPair(
|
|
||||||
config=config_copy,
|
|
||||||
market_data=market_data_df,
|
|
||||||
symbol_a=instruments[a_index],
|
|
||||||
symbol_b=instruments[b_index],
|
|
||||||
price_column=price_column,
|
|
||||||
)
|
|
||||||
pairs.append(pair)
|
|
||||||
return pairs
|
|
||||||
|
|
||||||
pairs_trades = []
|
pairs_trades = []
|
||||||
for pair in _create_pairs(config, instruments):
|
for pair in create_pairs(datafile, price_column, config, instruments):
|
||||||
single_pair_trades = fit_method.run_pair(
|
single_pair_trades = fit_method.run_pair(
|
||||||
pair=pair, bt_result=bt_result
|
pair=pair, bt_result=bt_result
|
||||||
)
|
)
|
||||||
|
|||||||
69
research/research_tools.py
Normal file
69
research/research_tools.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
import glob
|
||||||
|
import os
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_datafiles(config: Dict, cli_datafiles: Optional[str] = None) -> List[str]:
|
||||||
|
"""
|
||||||
|
Resolve the list of data files to process.
|
||||||
|
CLI datafiles take priority over config datafiles.
|
||||||
|
Supports wildcards in config but not in CLI.
|
||||||
|
"""
|
||||||
|
if cli_datafiles:
|
||||||
|
# CLI override - comma-separated list, no wildcards
|
||||||
|
datafiles = [f.strip() for f in cli_datafiles.split(",")]
|
||||||
|
# Make paths absolute relative to data directory
|
||||||
|
data_dir = config.get("data_directory", "./data")
|
||||||
|
resolved_files = []
|
||||||
|
for df in datafiles:
|
||||||
|
if not os.path.isabs(df):
|
||||||
|
df = os.path.join(data_dir, df)
|
||||||
|
resolved_files.append(df)
|
||||||
|
return resolved_files
|
||||||
|
|
||||||
|
# Use config datafiles with wildcard support
|
||||||
|
config_datafiles = config.get("datafiles", [])
|
||||||
|
data_dir = config.get("data_directory", "./data")
|
||||||
|
resolved_files = []
|
||||||
|
|
||||||
|
for pattern in config_datafiles:
|
||||||
|
if "*" in pattern or "?" in pattern:
|
||||||
|
# Handle wildcards
|
||||||
|
if not os.path.isabs(pattern):
|
||||||
|
pattern = os.path.join(data_dir, pattern)
|
||||||
|
matched_files = glob.glob(pattern)
|
||||||
|
resolved_files.extend(matched_files)
|
||||||
|
else:
|
||||||
|
# Handle explicit file path
|
||||||
|
if not os.path.isabs(pattern):
|
||||||
|
pattern = os.path.join(data_dir, pattern)
|
||||||
|
resolved_files.append(pattern)
|
||||||
|
|
||||||
|
return sorted(list(set(resolved_files))) # Remove duplicates and sort
|
||||||
|
|
||||||
|
def create_pairs(datafile: str, price_column: str, config: Dict, instruments: List[str]) -> List:
|
||||||
|
from tools.data_loader import load_market_data
|
||||||
|
from pt_trading.trading_pair import TradingPair
|
||||||
|
all_indexes = range(len(instruments))
|
||||||
|
unique_index_pairs = [(i, j) for i in all_indexes for j in all_indexes if i < j]
|
||||||
|
pairs = []
|
||||||
|
|
||||||
|
# Update config to use the specified instruments
|
||||||
|
config_copy = config.copy()
|
||||||
|
config_copy["instruments"] = instruments
|
||||||
|
|
||||||
|
market_data_df = load_market_data(datafile, config=config_copy)
|
||||||
|
|
||||||
|
for a_index, b_index in unique_index_pairs:
|
||||||
|
from research.pt_backtest import TradingPair
|
||||||
|
pair = TradingPair(
|
||||||
|
config=config_copy,
|
||||||
|
market_data=market_data_df,
|
||||||
|
symbol_a=instruments[a_index],
|
||||||
|
symbol_b=instruments[b_index],
|
||||||
|
price_column=price_column,
|
||||||
|
)
|
||||||
|
pairs.append(pair)
|
||||||
|
return pairs
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user