Compare commits

..

8 Commits

Author SHA1 Message Date
809f46fe36 to discard 2025-11-04 18:02:38 +00:00
413abafe0f My First Commit 2025-11-04 17:55:08 +00:00
5d46c1e32c . 2025-10-27 18:46:26 -04:00
889f7ba1c3 . 2025-10-27 18:46:14 -04:00
1515b2d077 . 2025-10-27 18:39:51 -04:00
b4ae3e715d . 2025-10-27 18:36:26 -04:00
Cryptoval Trading Technologies
6f845d32c6 . 2025-07-25 22:13:49 +00:00
Cryptoval Trading Technologies
a04e8878fb lg_changes 2025-07-25 22:11:49 +00:00
60 changed files with 24154 additions and 12190 deletions

1
.envrc
View File

@ -1 +0,0 @@
source /home/oleg/.pyenv/python3.12-venv/bin/activate

3
.gitignore vendored
View File

@ -1,12 +1,11 @@
# SpecStory explanation file # SpecStory explanation file
__pycache__/ __pycache__/
__OLD__/ __OLD__/
.specstory/
.history/ .history/
.cursorindexingignore .cursorindexingignore
data data
####.vscode/
cvttpy cvttpy
# SpecStory explanation file # SpecStory explanation file
.specstory/.what-is-this.md .specstory/.what-is-this.md
results/ results/
tmp/

1
.vscode/.env vendored
View File

@ -1 +0,0 @@
PYTHONPATH=/home/oleg/develop

133
.vscode/launch.json vendored
View File

@ -1,133 +0,0 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"python": "/home/oleg/.pyenv/python3.12-venv/bin/python",
"program": "${file}",
"console": "integratedTerminal",
"env": {
"PYTHONPATH": "${workspaceFolder}/lib:${workspaceFolder}/.."
},
},
{
"name": "-------- VECM --------",
},
{
"name": "CRYPTO VECM BACKTEST (optimized)",
"type": "debugpy",
"request": "launch",
"python": "/home/oleg/.pyenv/python3.12-venv/bin/python",
"program": "${workspaceFolder}/research/backtest.py",
"args": [
"--config=http://cloud16.cvtt.vpn:6789/apps/pairs_trading/backtest",
"--instruments=CRYPTO:BNBSPOT:PAIR-ADA-USDT,CRYPTO:BNBSPOT:PAIR-SOL-USDT",
"--date_pattern=20250911",
"--result_db=${workspaceFolder}/research/results/crypto/%T.vecm-opt.ADA-SOL.20250605.crypto_results.db",
],
"env": {
"PYTHONPATH": "${workspaceFolder}/..",
"CONFIG_SERVICE": "cloud16.cvtt.vpn:6789",
"MODEL_CONFIG": "vecm-opt"
},
"console": "integratedTerminal"
},
// {
// "name": "EQUITY VECM (rolling)",
// "type": "debugpy",
// "request": "launch",
// "python": "/home/oleg/.pyenv/python3.12-venv/bin/python",
// "program": "${workspaceFolder}/research/backtest.py",
// "args": [
// "--config=${workspaceFolder}/configuration/vecm.cfg",
// "--instruments=COIN:EQUITY:ALPACA,MSTR:EQUITY:ALPACA",
// "--date_pattern=20250605",
// "--result_db=${workspaceFolder}/research/results/equity/%T.vecm.COIN-MSTR.20250605.equity_results.db",
// ],
// "env": {
// "PYTHONPATH": "${workspaceFolder}/lib"
// },
// "console": "integratedTerminal"
// },
// {
// "name": "EQUITY-CRYPTO VECM (rolling)",
// "type": "debugpy",
// "request": "launch",
// "python": "/home/oleg/.pyenv/python3.12-venv/bin/python",
// "program": "${workspaceFolder}/research/backtest.py",
// "args": [
// "--config=${workspaceFolder}/configuration/vecm.cfg",
// "--instruments=COIN:EQUITY:ALPACA,BTC-USDT:CRYPTO:BNBSPOT",
// "--date_pattern=20250605",
// "--result_db=${workspaceFolder}/research/results/intermarket/%T.vecm.COIN-BTC.20250601.equity_results.db",
// ],
// "env": {
// "PYTHONPATH": "${workspaceFolder}/lib"
// },
// "console": "integratedTerminal"
// },
{
"name": "-------- B a t c h e s --------",
},
{
"name": "CRYPTO OLS Batch (rolling)",
"type": "debugpy",
"request": "launch",
"python": "/home/oleg/.pyenv/python3.12-venv/bin/python",
"program": "${workspaceFolder}/research/backtest.py",
"args": [
"--config=${workspaceFolder}/configuration/ols.cfg",
"--instruments=ADA-USDT:CRYPTO:BNBSPOT,SOL-USDT:CRYPTO:BNBSPOT",
"--date_pattern=2025060*",
"--result_db=${workspaceFolder}/research/results/crypto/%T.ols.ADA-SOL.2025060-.crypto_results.db",
],
"env": {
"PYTHONPATH": "${workspaceFolder}/lib"
},
"console": "integratedTerminal"
},
{
"name": "CRYPTO VECM Batch (rolling)",
"type": "debugpy",
"request": "launch",
"python": "/home/oleg/.pyenv/python3.12-venv/bin/python",
"program": "${workspaceFolder}/research/backtest.py",
"args": [
"--config=${workspaceFolder}/configuration/vecm.cfg",
"--instruments=ADA-USDT:CRYPTO:BNBSPOT,SOL-USDT:CRYPTO:BNBSPOT",
"--date_pattern=2025060*",
"--result_db=${workspaceFolder}/research/results/crypto/%T.vecm.ADA-SOL.2025060-.crypto_results.db",
],
"env": {
"PYTHONPATH": "${workspaceFolder}/lib"
},
"console": "integratedTerminal"
},
{
"name": "-------- Viz Test --------",
},
{
"name": "Viz Test",
"type": "debugpy",
"request": "launch",
"python": "/home/oleg/.pyenv/python3.12-venv/bin/python",
"program": "${workspaceFolder}/tests/viz_test.py",
"args": [
"--config=${workspaceFolder}/configuration/ols.cfg",
"--instruments=ADA-USDT:CRYPTO:BNBSPOT,SOL-USDT:CRYPTO:BNBSPOT",
"--date_pattern=20250605",
],
"env": {
"PYTHONPATH": "${workspaceFolder}/lib"
},
"console": "integratedTerminal"
}
]
}

View File

@ -1,10 +0,0 @@
{
"folders": [
{
"path": ".."
}
],
"settings": {
"workbench.colorTheme": "Dracula Theme"
}
}

19
.vscode/settings.json vendored
View File

@ -1,19 +0,0 @@
{
"python.testing.pytestEnabled": true,
"python.testing.unittestEnabled": false,
"python.testing.pytestArgs": [
"unittests"
],
"python.testing.cwd": "${workspaceFolder}",
"python.testing.autoTestDiscoverOnSaveEnabled": true,
"python.testing.pytestPath": "python3",
"python.analysis.extraPaths": [
"${workspaceFolder}",
"${workspaceFolder}/..",
"${workspaceFolder}/unittests"
],
"python.envFile": "${workspaceFolder}/.env",
"python.testing.debugPort": 3000,
"python.testing.promptToConfigure": false,
"python.defaultInterpreterPath": "/home/oleg/.pyenv/python3.12-venv/bin/python"
}

View File

@ -1 +0,0 @@
0.0.7

View File

@ -1,509 +0,0 @@
from __future__ import annotations
import asyncio
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple
from aiohttp import web
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller, coint # type: ignore
from statsmodels.tsa.vector_ar.vecm import coint_johansen # type: ignore
from cvttpy_tools.app import App
from cvttpy_tools.base import NamedObject
from cvttpy_tools.config import Config, CvttAppConfig
from cvttpy_tools.logger import Log
from cvttpy_tools.timeutils import NanoPerSec, SecPerHour, current_nanoseconds
from cvttpy_tools.web.rest_client import RESTSender
from cvttpy_tools.web.rest_service import RestService
from cvttpy_trading.trading.exchange_config import ExchangeAccounts
from cvttpy_trading.trading.instrument import ExchangeInstrument
from cvttpy_trading.trading.mkt_data.md_summary import MdTradesAggregate, MdSummary
from pairs_trading.apps.pair_selector.renderer import HtmlRenderer
@dataclass
class InstrumentQuality(NamedObject):
instrument_: ExchangeInstrument
record_count_: int
latest_tstamp_: Optional[pd.Timestamp]
status_: str
reason_: str
@dataclass
class PairStats(NamedObject):
instrument_a_: ExchangeInstrument
instrument_b_: ExchangeInstrument
pvalue_eg_: Optional[float]
pvalue_adf_: Optional[float]
pvalue_j_: Optional[float]
trace_stat_j_: Optional[float]
rank_eg_: int = 0
rank_adf_: int = 0
rank_j_: int = 0
composite_rank_: int = 0
def as_dict(self) -> Dict[str, Any]:
return {
"instrument_a": self.instrument_a_.instrument_id(),
"instrument_b": self.instrument_b_.instrument_id(),
"pvalue_eg": self.pvalue_eg_,
"pvalue_adf": self.pvalue_adf_,
"pvalue_j": self.pvalue_j_,
"trace_stat_j": self.trace_stat_j_,
"rank_eg": self.rank_eg_,
"rank_adf": self.rank_adf_,
"rank_j": self.rank_j_,
"composite_rank": self.composite_rank_,
}
class DataFetcher(NamedObject):
sender_: RESTSender
interval_sec_: int
history_depth_sec_: int
def __init__(
self,
base_url: str,
interval_sec: int,
history_depth_sec: int,
) -> None:
self.sender_ = RESTSender(base_url=base_url)
self.interval_sec_ = interval_sec
self.history_depth_sec_ = history_depth_sec
def fetch(
self, exch_acct: str, inst: ExchangeInstrument
) -> List[MdTradesAggregate]:
rqst_data = {
"exch_acct": exch_acct,
"instrument_id": inst.instrument_id(),
"interval_sec": self.interval_sec_,
"history_depth_sec": self.history_depth_sec_,
}
response = self.sender_.send_post(endpoint="md_summary", post_body=rqst_data)
if response.status_code not in (200, 201):
Log.error(
f"{self.fname()}: error {response.status_code} for {inst.details_short()}: {response.text}"
)
return []
mdsums: List[MdSummary] = MdSummary.from_REST_response(response=response)
return [
mdsum.create_md_trades_aggregate(
exch_acct=exch_acct, exch_inst=inst, interval_sec=self.interval_sec_
)
for mdsum in mdsums
]
class QualityChecker(NamedObject):
interval_sec_: int
def __init__(self, interval_sec: int) -> None:
self.interval_sec_ = interval_sec
def evaluate(
self, inst: ExchangeInstrument, aggr: List[MdTradesAggregate]
) -> InstrumentQuality:
if len(aggr) == 0:
return InstrumentQuality(
instrument_=inst,
record_count_=0,
latest_tstamp_=None,
status_="FAIL",
reason_="no records",
)
aggr_sorted = sorted(aggr, key=lambda a: a.aggr_time_ns_)
latest_ts = pd.to_datetime(aggr_sorted[-1].aggr_time_ns_, unit="ns", utc=True)
now_ts = pd.Timestamp.utcnow()
recency_cutoff = now_ts - pd.Timedelta(seconds=2 * self.interval_sec_)
if latest_ts <= recency_cutoff:
return InstrumentQuality(
instrument_=inst,
record_count_=len(aggr_sorted),
latest_tstamp_=latest_ts,
status_="FAIL",
reason_=f"stale: latest {latest_ts} <= cutoff {recency_cutoff}",
)
gaps_ok, reason = self._check_gaps(aggr_sorted)
status = "PASS" if gaps_ok else "FAIL"
return InstrumentQuality(
instrument_=inst,
record_count_=len(aggr_sorted),
latest_tstamp_=latest_ts,
status_=status,
reason_=reason,
)
def _check_gaps(self, aggr: List[MdTradesAggregate]) -> Tuple[bool, str]:
NUM_TRADES_THRESHOLD = 50
if len(aggr) < 2:
return True, "ok"
interval_ns = self.interval_sec_ * NanoPerSec
for idx in range(1, len(aggr)):
prev = aggr[idx - 1]
curr = aggr[idx]
delta = curr.aggr_time_ns_ - prev.aggr_time_ns_
missing_intervals = int(delta // interval_ns) - 1
if missing_intervals <= 0:
continue
prev_nt = prev.num_trades_
next_nt = curr.num_trades_
estimate = self._approximate_num_trades(prev_nt, next_nt)
if estimate > NUM_TRADES_THRESHOLD:
return False, (
f"gap of {missing_intervals} interval(s), est num_trades={estimate} > {NUM_TRADES_THRESHOLD}"
)
return True, "ok"
@staticmethod
def _approximate_num_trades(prev_nt: int, next_nt: int) -> float:
if prev_nt is None and next_nt is None:
return 0.0
if prev_nt is None:
return float(next_nt)
if next_nt is None:
return float(prev_nt)
return (prev_nt + next_nt) / 2.0
class PairAnalyzer(NamedObject):
price_field_: str
interval_sec_: int
def __init__(self, price_field: str, interval_sec: int) -> None:
self.price_field_ = price_field
self.interval_sec_ = interval_sec
def analyze(
self, series: Dict[ExchangeInstrument, pd.DataFrame]
) -> List[PairStats]:
instruments = list(series.keys())
results: List[PairStats] = []
for i in range(len(instruments)):
for j in range(i + 1, len(instruments)):
inst_a = instruments[i]
inst_b = instruments[j]
df_a = series[inst_a][["tstamp", "price"]].rename(
columns={"price": "price_a"}
)
df_b = series[inst_b][["tstamp", "price"]].rename(
columns={"price": "price_b"}
)
merged = pd.merge(df_a, df_b, on="tstamp", how="inner").sort_values(
"tstamp"
)
stats = self._compute_stats(inst_a, inst_b, merged)
if stats:
results.append(stats)
self._rank(results)
return results
def _compute_stats(
self,
inst_a: ExchangeInstrument,
inst_b: ExchangeInstrument,
merged: pd.DataFrame,
) -> Optional[PairStats]:
if len(merged) < 2:
return None
px_a = merged["price_a"].astype(float)
px_b = merged["price_b"].astype(float)
std_a = float(px_a.std())
std_b = float(px_b.std())
if std_a == 0 or std_b == 0:
return None
z_a = (px_a - float(px_a.mean())) / std_a
z_b = (px_b - float(px_b.mean())) / std_b
p_eg: Optional[float]
p_adf: Optional[float]
p_j: Optional[float]
trace_stat: Optional[float]
try:
p_eg = float(coint(z_a, z_b)[1])
except Exception as exc:
Log.warning(
f"{self.fname()}: EG failed for {inst_a.details_short()}/{inst_b.details_short()}: {exc}"
)
p_eg = None
try:
spread = z_a - z_b
p_adf = float(adfuller(spread, maxlag=1, regression="c")[1])
except Exception as exc:
Log.warning(
f"{self.fname()}: ADF failed for {inst_a.details_short()}/{inst_b.details_short()}: {exc}"
)
p_adf = None
try:
data = np.column_stack([z_a, z_b])
res = coint_johansen(data, det_order=0, k_ar_diff=1)
trace_stat = float(res.lr1[0])
cv10, cv5, cv1 = res.cvt[0]
if trace_stat > cv1:
p_j = 0.01
elif trace_stat > cv5:
p_j = 0.05
elif trace_stat > cv10:
p_j = 0.10
else:
p_j = 1.0
except Exception as exc:
Log.warning(
f"{self.fname()}: Johansen failed for {inst_a.details_short()}/{inst_b.details_short()}: {exc}"
)
p_j = None
trace_stat = None
return PairStats(
instrument_a_=inst_a,
instrument_b_=inst_b,
pvalue_eg_=p_eg,
pvalue_adf_=p_adf,
pvalue_j_=p_j,
trace_stat_j_=trace_stat,
)
def _rank(self, results: List[PairStats]) -> None:
self._assign_ranks(results, key=lambda r: r.pvalue_eg_, attr="rank_eg_")
self._assign_ranks(results, key=lambda r: r.pvalue_adf_, attr="rank_adf_")
self._assign_ranks(results, key=lambda r: r.pvalue_j_, attr="rank_j_")
for res in results:
res.composite_rank_ = res.rank_eg_ + res.rank_adf_ + res.rank_j_
results.sort(key=lambda r: r.composite_rank_)
@staticmethod
def _assign_ranks(results: List[PairStats], key, attr: str) -> None:
values = [key(r) for r in results]
sorted_vals = sorted([v for v in values if v is not None])
for res in results:
val = key(res)
if val is None:
setattr(res, attr, len(sorted_vals) + 1)
continue
rank = 1 + sum(1 for v in sorted_vals if v < val)
setattr(res, attr, rank)
class PairSelectionEngine(NamedObject):
config_: object
instruments_: List[ExchangeInstrument]
price_field_: str
fetcher_: DataFetcher
quality_: QualityChecker
analyzer_: PairAnalyzer
interval_sec_: int
history_depth_sec_: int
data_quality_cache_: List[InstrumentQuality]
pair_results_cache_: List[PairStats]
def __init__(
self,
config: Config,
instruments: List[ExchangeInstrument],
price_field: str,
) -> None:
self.config_ = config
self.instruments_ = instruments
self.price_field_ = price_field
interval_sec = int(config.get_value("interval_sec", 0))
history_depth_sec = int(config.get_value("history_depth_hours", 0)) * SecPerHour
base_url = config.get_value("cvtt_base_url", None)
assert interval_sec > 0, "interval_sec must be > 0"
assert history_depth_sec > 0, "history_depth_sec must be > 0"
assert base_url, "cvtt_base_url must be set"
self.fetcher_ = DataFetcher(
base_url=base_url,
interval_sec=interval_sec,
history_depth_sec=history_depth_sec,
)
self.quality_ = QualityChecker(interval_sec=interval_sec)
self.analyzer_ = PairAnalyzer(
price_field=price_field, interval_sec=interval_sec
)
self.interval_sec_ = interval_sec
self.history_depth_sec_ = history_depth_sec
self.data_quality_cache_ = []
self.pair_results_cache_ = []
async def run_once(self) -> None:
quality_results: List[InstrumentQuality] = []
price_series: Dict[ExchangeInstrument, pd.DataFrame] = {}
for inst in self.instruments_:
exch_acct = inst.user_data_.get("exch_acct") or inst.exchange_id_
aggr = self.fetcher_.fetch(exch_acct=exch_acct, inst=inst)
q = self.quality_.evaluate(inst, aggr)
quality_results.append(q)
if q.status_ != "PASS":
continue
df = self._to_dataframe(aggr, inst)
if len(df) > 0:
price_series[inst] = df
self.data_quality_cache_ = quality_results
self.pair_results_cache_ = self.analyzer_.analyze(price_series)
def _to_dataframe(
self, aggr: List[MdTradesAggregate], inst: ExchangeInstrument
) -> pd.DataFrame:
rows: List[Dict[str, Any]] = []
for item in aggr:
rows.append(
{
"tstamp": pd.to_datetime(item.aggr_time_ns_, unit="ns", utc=True),
"price": self._extract_price(item, inst),
"num_trades": item.num_trades_,
}
)
df = pd.DataFrame(rows)
return df.sort_values("tstamp").reset_index(drop=True)
def _extract_price(
self, aggr: MdTradesAggregate, inst: ExchangeInstrument
) -> float:
price_field = self.price_field_
# MdTradesAggregate inherits hist bar with fields open_, high_, low_, close_, vwap_
field_map = {
"open": aggr.open_,
"high": aggr.high_,
"low": aggr.low_,
"close": aggr.close_,
"vwap": aggr.vwap_,
}
raw = field_map.get(price_field, aggr.close_)
return inst.get_price(raw)
def sleep_seconds_until_next_cycle(self) -> float:
now_ns = current_nanoseconds()
interval_ns = self.interval_sec_ * NanoPerSec
next_boundary = (now_ns // interval_ns + 1) * interval_ns
return max(0.0, (next_boundary - now_ns) / NanoPerSec)
def quality_dicts(self) -> List[Dict[str, Any]]:
res: List[Dict[str, Any]] = []
for q in self.data_quality_cache_:
res.append(
{
"instrument": q.instrument_.instrument_id(),
"record_count": q.record_count_,
"latest_tstamp": (
q.latest_tstamp_.isoformat() if q.latest_tstamp_ else None
),
"status": q.status_,
"reason": q.reason_,
}
)
return res
def pair_dicts(self) -> List[Dict[str, Any]]:
return [p.as_dict() for p in self.pair_results_cache_]
class PairSelector(NamedObject):
instruments_: List[ExchangeInstrument]
engine_: PairSelectionEngine
rest_service_: RestService
def __init__(self) -> None:
App.instance().add_cmdline_arg("--oneshot", action="store_true", default=False)
App.instance().add_call(App.Stage.Config, self._on_config())
App.instance().add_call(App.Stage.Run, self.run())
async def _on_config(self) -> None:
cfg = CvttAppConfig.instance()
self.instruments_ = self._load_instruments(cfg)
price_field = cfg.get_value("model/stat_model_price", "close")
self.engine_ = PairSelectionEngine(
config=cfg,
instruments=self.instruments_,
price_field=price_field,
)
self.rest_service_ = RestService(config_key="/api/REST")
self.rest_service_.add_handler("GET", "/data_quality", self._on_data_quality)
self.rest_service_.add_handler(
"GET", "/pair_selection", self._on_pair_selection
)
def _load_instruments(self, cfg: CvttAppConfig) -> List[ExchangeInstrument]:
instruments_cfg = cfg.get_value("instruments", [])
instruments: List[ExchangeInstrument] = []
assert len(instruments_cfg) >= 2, "at least two instruments required"
for item in instruments_cfg:
if isinstance(item, str):
parts = item.split(":", 1)
if len(parts) != 2:
raise ValueError(f"invalid instrument format: {item}")
exch_acct, instrument_id = parts
elif isinstance(item, dict):
exch_acct = item.get("exch_acct", "")
instrument_id = item.get("instrument_id", "")
if not exch_acct or not instrument_id:
raise ValueError(f"invalid instrument config: {item}")
else:
raise ValueError(f"unsupported instrument entry: {item}")
exch_inst = ExchangeAccounts.instance().get_exchange_instrument(
exch_acct=exch_acct, instrument_id=instrument_id
)
assert (
exch_inst is not None
), f"no ExchangeInstrument for {exch_acct}:{instrument_id}"
exch_inst.user_data_["exch_acct"] = exch_acct
instruments.append(exch_inst)
return instruments
async def run(self) -> None:
oneshot = App.instance().get_argument("oneshot", False)
while True:
await self.engine_.run_once()
if oneshot:
break
sleep_for = self.engine_.sleep_seconds_until_next_cycle()
await asyncio.sleep(sleep_for)
async def _on_data_quality(self, request: web.Request) -> web.Response:
fmt = request.query.get("format", "html").lower()
quality = self.engine_.quality_dicts()
if fmt == "json":
return web.json_response(quality)
return web.Response(
text=HtmlRenderer.render_data_quality(quality), content_type="text/html"
)
async def _on_pair_selection(self, request: web.Request) -> web.Response:
fmt = request.query.get("format", "html").lower()
pairs = self.engine_.pair_dicts()
if fmt == "json":
return web.json_response(pairs)
return web.Response(
text=HtmlRenderer.render_pairs(pairs), content_type="text/html"
)
if __name__ == "__main__":
App()
CvttAppConfig()
PairSelector()
App.instance().run()

View File

@ -1,394 +0,0 @@
```python
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller, coint
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.tsa.vector_ar.vecm import coint_johansen # type: ignore
# ---
from cvttpy_tools.base import NamedObject
from cvttpy_tools.config import Config
from cvttpy_tools.logger import Log
from cvttpy_tools.timeutils import NanoPerSec, SecPerHour, current_nanoseconds
from cvttpy_tools.web.rest_client import RESTSender
# ---
from cvttpy_trading.trading.instrument import ExchangeInstrument
from cvttpy_trading.trading.mkt_data.md_summary import MdTradesAggregate, MdSummary
@dataclass
class InstrumentQuality(NamedObject):
instrument_: ExchangeInstrument
record_count_: int
latest_tstamp_: Optional[pd.Timestamp]
status_: str
reason_: str
@dataclass
class PairStats(NamedObject):
instrument_a_: ExchangeInstrument
instrument_b_: ExchangeInstrument
pvalue_eg_: Optional[float]
pvalue_adf_: Optional[float]
pvalue_j_: Optional[float]
trace_stat_j_: Optional[float]
rank_eg_: int = 0
rank_adf_: int = 0
rank_j_: int = 0
composite_rank_: int = 0
def as_dict(self) -> Dict[str, Any]:
return {
"instrument_a": self.instrument_a_.instrument_id(),
"instrument_b": self.instrument_b_.instrument_id(),
"pvalue_eg": self.pvalue_eg_,
"pvalue_adf": self.pvalue_adf_,
"pvalue_j": self.pvalue_j_,
"trace_stat_j": self.trace_stat_j_,
"rank_eg": self.rank_eg_,
"rank_adf": self.rank_adf_,
"rank_j": self.rank_j_,
"composite_rank": self.composite_rank_,
}
class DataFetcher(NamedObject):
sender_: RESTSender
interval_sec_: int
history_depth_sec_: int
def __init__(
self,
base_url: str,
interval_sec: int,
history_depth_sec: int,
) -> None:
self.sender_ = RESTSender(base_url=base_url)
self.interval_sec_ = interval_sec
self.history_depth_sec_ = history_depth_sec
def fetch(self, exch_acct: str, inst: ExchangeInstrument) -> List[MdTradesAggregate]:
rqst_data = {
"exch_acct": exch_acct,
"instrument_id": inst.instrument_id(),
"interval_sec": self.interval_sec_,
"history_depth_sec": self.history_depth_sec_,
}
response = self.sender_.send_post(endpoint="md_summary", post_body=rqst_data)
if response.status_code not in (200, 201):
Log.error(
f"{self.fname()}: error {response.status_code} for {inst.details_short()}: {response.text}")
return []
mdsums: List[MdSummary] = MdSummary.from_REST_response(response=response)
return [
mdsum.create_md_trades_aggregate(
exch_acct=exch_acct, exch_inst=inst, interval_sec=self.interval_sec_
)
for mdsum in mdsums
]
class QualityChecker(NamedObject):
interval_sec_: int
def __init__(self, interval_sec: int) -> None:
self.interval_sec_ = interval_sec
def evaluate(self, inst: ExchangeInstrument, aggr: List[MdTradesAggregate]) -> InstrumentQuality:
if len(aggr) == 0:
return InstrumentQuality(
instrument_=inst,
record_count_=0,
latest_tstamp_=None,
status_="FAIL",
reason_="no records",
)
aggr_sorted = sorted(aggr, key=lambda a: a.aggr_time_ns_)
latest_ts = pd.to_datetime(aggr_sorted[-1].aggr_time_ns_, unit="ns", utc=True)
now_ts = pd.Timestamp.utcnow()
recency_cutoff = now_ts - pd.Timedelta(seconds=2 * self.interval_sec_)
if latest_ts <= recency_cutoff:
return InstrumentQuality(
instrument_=inst,
record_count_=len(aggr_sorted),
latest_tstamp_=latest_ts,
status_="FAIL",
reason_=f"stale: latest {latest_ts} <= cutoff {recency_cutoff}",
)
gaps_ok, reason = self._check_gaps(aggr_sorted)
status = "PASS" if gaps_ok else "FAIL"
return InstrumentQuality(
instrument_=inst,
record_count_=len(aggr_sorted),
latest_tstamp_=latest_ts,
status_=status,
reason_=reason,
)
def _check_gaps(self, aggr: List[MdTradesAggregate]) -> Tuple[bool, str]:
NUM_TRADES_THRESHOLD = 50
if len(aggr) < 2:
return True, "ok"
interval_ns = self.interval_sec_ * NanoPerSec
for idx in range(1, len(aggr)):
prev = aggr[idx - 1]
curr = aggr[idx]
delta = curr.aggr_time_ns_ - prev.aggr_time_ns_
missing_intervals = int(delta // interval_ns) - 1
if missing_intervals <= 0:
continue
prev_nt = prev.num_trades_
next_nt = curr.num_trades_
estimate = self._approximate_num_trades(prev_nt, next_nt)
if estimate > NUM_TRADES_THRESHOLD:
return False, (
f"gap of {missing_intervals} interval(s), est num_trades={estimate} > {NUM_TRADES_THRESHOLD}"
)
return True, "ok"
@staticmethod
def _approximate_num_trades(prev_nt: int, next_nt: int) -> float:
if prev_nt is None and next_nt is None:
return 0.0
if prev_nt is None:
return float(next_nt)
if next_nt is None:
return float(prev_nt)
return (prev_nt + next_nt) / 2.0
class PairAnalyzer(NamedObject):
price_field_: str
interval_sec_: int
def __init__(self, price_field: str, interval_sec: int) -> None:
self.price_field_ = price_field
self.interval_sec_ = interval_sec
def analyze(self, series: Dict[ExchangeInstrument, pd.DataFrame]) -> List[PairStats]:
instruments = list(series.keys())
results: List[PairStats] = []
for i in range(len(instruments)):
for j in range(i + 1, len(instruments)):
inst_a = instruments[i]
inst_b = instruments[j]
df_a = series[inst_a][["tstamp", "price"]].rename(
columns={"price": "price_a"}
)
df_b = series[inst_b][["tstamp", "price"]].rename(
columns={"price": "price_b"}
)
merged = pd.merge(df_a, df_b, on="tstamp", how="inner").sort_values(
"tstamp"
)
stats = self._compute_stats(inst_a, inst_b, merged)
if stats:
results.append(stats)
self._rank(results)
return results
def _compute_stats(
self,
inst_a: ExchangeInstrument,
inst_b: ExchangeInstrument,
merged: pd.DataFrame,
) -> Optional[PairStats]:
if len(merged) < 2:
return None
px_a = merged["price_a"].astype(float)
px_b = merged["price_b"].astype(float)
std_a = float(px_a.std())
std_b = float(px_b.std())
if std_a == 0 or std_b == 0:
return None
z_a = (px_a - float(px_a.mean())) / std_a
z_b = (px_b - float(px_b.mean())) / std_b
p_eg: Optional[float]
p_adf: Optional[float]
p_j: Optional[float]
trace_stat: Optional[float]
try:
p_eg = float(coint(z_a, z_b)[1])
except Exception as exc:
Log.warning(f"{self.fname()}: EG failed for {inst_a.details_short()}/{inst_b.details_short()}: {exc}")
p_eg = None
try:
spread = z_a - z_b
p_adf = float(adfuller(spread, maxlag=1, regression="c")[1])
except Exception as exc:
Log.warning(f"{self.fname()}: ADF failed for {inst_a.details_short()}/{inst_b.details_short()}: {exc}")
p_adf = None
try:
data = np.column_stack([z_a, z_b])
res = coint_johansen(data, det_order=0, k_ar_diff=1)
trace_stat = float(res.lr1[0])
cv10, cv5, cv1 = res.cvt[0]
if trace_stat > cv1:
p_j = 0.01
elif trace_stat > cv5:
p_j = 0.05
elif trace_stat > cv10:
p_j = 0.10
else:
p_j = 1.0
except Exception as exc:
Log.warning(f"{self.fname()}: Johansen failed for {inst_a.details_short()}/{inst_b.details_short()}: {exc}")
p_j = None
trace_stat = None
return PairStats(
instrument_a_=inst_a,
instrument_b_=inst_b,
pvalue_eg_=p_eg,
pvalue_adf_=p_adf,
pvalue_j_=p_j,
trace_stat_j_=trace_stat,
)
def _rank(self, results: List[PairStats]) -> None:
self._assign_ranks(results, key=lambda r: r.pvalue_eg_, attr="rank_eg_")
self._assign_ranks(results, key=lambda r: r.pvalue_adf_, attr="rank_adf_")
self._assign_ranks(results, key=lambda r: r.pvalue_j_, attr="rank_j_")
for res in results:
res.composite_rank_ = res.rank_eg_ + res.rank_adf_ + res.rank_j_
results.sort(key=lambda r: r.composite_rank_)
@staticmethod
def _assign_ranks(
results: List[PairStats], key, attr: str
) -> None:
values = [key(r) for r in results]
sorted_vals = sorted([v for v in values if v is not None])
for res in results:
val = key(res)
if val is None:
setattr(res, attr, len(sorted_vals) + 1)
continue
rank = 1 + sum(1 for v in sorted_vals if v < val)
setattr(res, attr, rank)
class PairSelectionEngine(NamedObject):
config_: object
instruments_: List[ExchangeInstrument]
price_field_: str
fetcher_: DataFetcher
quality_: QualityChecker
analyzer_: PairAnalyzer
interval_sec_: int
history_depth_sec_: int
data_quality_cache_: List[InstrumentQuality]
pair_results_cache_: List[PairStats]
def __init__(
self,
config: Config,
instruments: List[ExchangeInstrument],
price_field: str,
) -> None:
self.config_ = config
self.instruments_ = instruments
self.price_field_ = price_field
interval_sec = int(config.get_value("interval_sec", 0))
history_depth_sec = int(config.get_value("history_depth_hours", 0)) * SecPerHour
base_url = config.get_value("cvtt_base_url", None)
assert interval_sec > 0, "interval_sec must be > 0"
assert history_depth_sec > 0, "history_depth_sec must be > 0"
assert base_url, "cvtt_base_url must be set"
self.fetcher_ = DataFetcher(
base_url=base_url,
interval_sec=interval_sec,
history_depth_sec=history_depth_sec,
)
self.quality_ = QualityChecker(interval_sec=interval_sec)
self.analyzer_ = PairAnalyzer(price_field=price_field, interval_sec=interval_sec)
self.interval_sec_ = interval_sec
self.history_depth_sec_ = history_depth_sec
self.data_quality_cache_ = []
self.pair_results_cache_ = []
async def run_once(self) -> None:
quality_results: List[InstrumentQuality] = []
price_series: Dict[ExchangeInstrument, pd.DataFrame] = {}
for inst in self.instruments_:
exch_acct = inst.user_data_.get("exch_acct") or inst.exchange_id_
aggr = self.fetcher_.fetch(exch_acct=exch_acct, inst=inst)
q = self.quality_.evaluate(inst, aggr)
quality_results.append(q)
if q.status_ != "PASS":
continue
df = self._to_dataframe(aggr, inst)
if len(df) > 0:
price_series[inst] = df
self.data_quality_cache_ = quality_results
self.pair_results_cache_ = self.analyzer_.analyze(price_series)
def _to_dataframe(self, aggr: List[MdTradesAggregate], inst: ExchangeInstrument) -> pd.DataFrame:
rows: List[Dict[str, Any]] = []
for item in aggr:
rows.append(
{
"tstamp": pd.to_datetime(item.aggr_time_ns_, unit="ns", utc=True),
"price": self._extract_price(item, inst),
"num_trades": item.num_trades_,
}
)
df = pd.DataFrame(rows)
return df.sort_values("tstamp").reset_index(drop=True)
def _extract_price(self, aggr: MdTradesAggregate, inst: ExchangeInstrument) -> float:
price_field = self.price_field_
# MdTradesAggregate inherits hist bar with fields open_, high_, low_, close_, vwap_
field_map = {
"open": aggr.open_,
"high": aggr.high_,
"low": aggr.low_,
"close": aggr.close_,
"vwap": aggr.vwap_,
}
raw = field_map.get(price_field, aggr.close_)
return inst.get_price(raw)
def sleep_seconds_until_next_cycle(self) -> float:
now_ns = current_nanoseconds()
interval_ns = self.interval_sec_ * NanoPerSec
next_boundary = (now_ns // interval_ns + 1) * interval_ns
return max(0.0, (next_boundary - now_ns) / NanoPerSec)
def quality_dicts(self) -> List[Dict[str, Any]]:
res: List[Dict[str, Any]] = []
for q in self.data_quality_cache_:
res.append(
{
"instrument": q.instrument_.instrument_id(),
"record_count": q.record_count_,
"latest_tstamp": q.latest_tstamp_.isoformat() if q.latest_tstamp_ else None,
"status": q.status_,
"reason": q.reason_,
}
)
return res
def pair_dicts(self) -> List[Dict[str, Any]]:
return [p.as_dict() for p in self.pair_results_cache_]
```

View File

@ -1,140 +0,0 @@
from __future__ import annotations
from typing import Any, Dict, List
from cvttpy_tools.app import App
from cvttpy_tools.base import NamedObject
from cvttpy_tools.config import CvttAppConfig
class HtmlRenderer(NamedObject):
def __init__(self) -> None:
pass
@staticmethod
def render_data_quality(quality: List[Dict[str, Any]]) -> str:
rows = "".join(
f"<tr>"
f"<td>{q.get('instrument','')}</td>"
f"<td>{q.get('record_count','')}</td>"
f"<td>{q.get('latest_tstamp','')}</td>"
f"<td>{q.get('status','')}</td>"
f"<td>{q.get('reason','')}</td>"
f"</tr>"
for q in sorted(quality, key=lambda x: str(x.get("instrument", "")))
)
return f"""
<!DOCTYPE html>
<html>
<head>
<meta charset='utf-8'/>
<title>Data Quality</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 20px; }}
table {{ border-collapse: collapse; width: 100%; }}
th, td {{ border: 1px solid #ccc; padding: 8px; text-align: left; }}
th {{ background: #f2f2f2; }}
</style>
</head>
<body>
<h2>Data Quality</h2>
<table>
<thead>
<tr><th>Instrument</th><th>Records</th><th>Latest</th><th>Status</th><th>Reason</th></tr>
</thead>
<tbody>{rows}</tbody>
</table>
</body>
</html>
"""
@staticmethod
def render_pairs(pairs: List[Dict[str, Any]]) -> str:
if not pairs:
body = "<p>No pairs available. Check data quality and try again.</p>"
else:
body_rows = []
for p in pairs:
body_rows.append(
"<tr>"
f"<td>{p.get('instrument_a','')}</td>"
f"<td>{p.get('instrument_b','')}</td>"
f"<td data-value='{p.get('rank_eg',0)}'>{p.get('rank_eg','')}</td>"
f"<td data-value='{p.get('rank_adf',0)}'>{p.get('rank_adf','')}</td>"
f"<td data-value='{p.get('rank_j',0)}'>{p.get('rank_j','')}</td>"
f"<td data-value='{p.get('pvalue_eg','')}'>{p.get('pvalue_eg','')}</td>"
f"<td data-value='{p.get('pvalue_adf','')}'>{p.get('pvalue_adf','')}</td>"
f"<td data-value='{p.get('pvalue_j','')}'>{p.get('pvalue_j','')}</td>"
"</tr>"
)
body = "\n".join(body_rows)
return f"""
<!DOCTYPE html>
<html>
<head>
<meta charset='utf-8'/>
<title>Pair Selection</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 20px; }}
table {{ border-collapse: collapse; width: 100%; }}
th, td {{ border: 1px solid #ccc; padding: 8px; text-align: left; }}
th.sortable {{ cursor: pointer; background: #f2f2f2; }}
</style>
</head>
<body>
<h2>Pair Selection</h2>
<table id="pairs-table">
<thead>
<tr>
<th>Instrument A</th>
<th>Instrument B</th>
<th class="sortable" data-type="num">Rank-EG</th>
<th class="sortable" data-type="num">Rank-ADF</th>
<th class="sortable" data-type="num">Rank-J</th>
<th>EG p-value</th>
<th>ADF p-value</th>
<th>Johansen pseudo p</th>
</tr>
</thead>
<tbody>
{body}
</tbody>
</table>
<script>
(function() {{
const table = document.getElementById('pairs-table');
if (!table) return;
const getValue = (cell) => {{
const val = cell.getAttribute('data-value');
const num = parseFloat(val);
return isNaN(num) ? val : num;
}};
const toggleSort = (index, isNumeric) => {{
const tbody = table.querySelector('tbody');
const rows = Array.from(tbody.querySelectorAll('tr'));
const th = table.querySelectorAll('th')[index];
const dir = th.getAttribute('data-dir') === 'asc' ? 'desc' : 'asc';
th.setAttribute('data-dir', dir);
rows.sort((a, b) => {{
const va = getValue(a.children[index]);
const vb = getValue(b.children[index]);
if (isNumeric && !isNaN(va) && !isNaN(vb)) {{
return dir === 'asc' ? va - vb : vb - va;
}}
return dir === 'asc'
? String(va).localeCompare(String(vb))
: String(vb).localeCompare(String(va));
}});
tbody.innerHTML = '';
rows.forEach(r => tbody.appendChild(r));
}};
table.querySelectorAll('th.sortable').forEach((th, idx) => {{
th.addEventListener('click', () => toggleSort(idx, th.dataset.type === 'num'));
}});
}})();
</script>
</body>
</html>
"""

View File

@ -1,169 +0,0 @@
from __future__ import annotations
import asyncio
from typing import Callable, Coroutine, Dict, List
import aiohttp.web as web
from cvttpy_tools.app import App
from cvttpy_tools.config import Config
from cvttpy_tools.base import NamedObject
from cvttpy_tools.config import CvttAppConfig
from cvttpy_tools.logger import Log
from cvttpy_tools.settings.cvtt_types import BookIdT
from cvttpy_tools.web.rest_service import RestService
# ---
from cvttpy_trading.trading.instrument import ExchangeInstrument
from cvttpy_trading.trading.mkt_data.md_summary import MdTradesAggregate
from cvttpy_trading.trading.exchange_config import ExchangeAccounts
# ---
from pairs_trading.lib.live.mkt_data_client import CvttRestMktDataClient
'''
config http://cloud16.cvtt.vpn/apps/pairs_trading
'''
HistMdCbT = Callable[[List[MdTradesAggregate]], Coroutine]
UpdateMdCbT = Callable[[MdTradesAggregate], Coroutine]
class PairTrader(NamedObject):
config_: CvttAppConfig
instruments_: List[ExchangeInstrument]
book_id_: BookIdT
live_strategy_: "PtLiveStrategy" #type: ignore
ti_sender_: "TradingInstructionsSender" #type: ignore
pricer_client_: CvttRestMktDataClient
rest_service_: RestService
latest_history_: Dict[ExchangeInstrument, List[MdTradesAggregate]]
def __init__(self) -> None:
self.instruments_ = []
self.latest_history_ = {}
App.instance().add_cmdline_arg(
"--instrument_A",
type=str,
required=True,
help=(
" Instrument A in pair (e.g., COINBASE_AT:PAIR-BTC-USD)"
),
)
App.instance().add_cmdline_arg(
"--instrument_B",
type=str,
required=True,
help=(
" Instrument B in pair (e.g., COINBASE_AT:PAIR-ETH-USD)"
),
)
App.instance().add_cmdline_arg(
"--book_id",
type=str,
required=True,
help="Book ID"
)
App.instance().add_call(App.Stage.Config, self._on_config())
App.instance().add_call(App.Stage.Run, self.run())
async def _on_config(self) -> None:
self.config_ = CvttAppConfig.instance()
self.book_id_ = App.instance().get_argument(name="book_id")
# ------- PARSE INSTRUMENTS -------
instr_list: List[str] = []
instr_str = App.instance().get_argument("instrument_A", "")
assert instr_str != "", "Missing insrument A"
instr_list.append(instr_str)
instr_str = App.instance().get_argument("instrument_B", "")
assert instr_str != "", "Missing insrument B"
instr_list.append(instr_str)
for instr in instr_list:
instr_parts = instr.split(":")
if len(instr_parts) != 2:
raise ValueError(f"Invalid pair format: {instr}")
exch_acct = instr_parts[0]
instrument_id = instr_parts[1]
exch_inst = ExchangeAccounts.instance().get_exchange_instrument(exch_acct=exch_acct, instrument_id=instrument_id)
assert exch_inst is not None, f"No ExchangeInstrument for {instr}"
exch_inst.user_data_["exch_acct"] = exch_acct
self.instruments_.append(exch_inst)
Log.info(f"{self.fname()} Instruments: {self.instruments_[0].details_short()} <==> {self.instruments_[1].details_short()}")
# ------- CREATE STRATEGY -------
from pairs_trading.lib.pt_strategy.live.live_strategy import PtLiveStrategy
strategy_config = CvttAppConfig.instance() #self.config_.get_subconfig("strategy_config", Config({}))
self.live_strategy_ = PtLiveStrategy(
config=strategy_config,
pairs_trader=self,
)
Log.info(f"{self.fname()} Strategy created: {self.live_strategy_}")
model_name = self.config_.get_value("model/name", "?model/name?")
self.config_.set_value("strategy_id", f"{self.live_strategy_.__class__.__name__}:{model_name}")
# # ------- CREATE PRICER CLIENT -------
self.pricer_client_ = CvttRestMktDataClient(config=self.config_)
Log.info(f"{self.fname()} MD client created: {self.pricer_client_}")
# ------- CREATE TRADER CLIENT -------
from pairs_trading.lib.live.ti_sender import TradingInstructionsSender
self.ti_sender_ = TradingInstructionsSender(config=self.config_, pairs_trader=self)
Log.info(f"{self.fname()} TI sender created: {self.ti_sender_}")
# # ------- CREATE REST SERVER -------
self.rest_service_ = RestService(
config_key=f"/api/REST"
)
# --- Strategy Handlers
self.rest_service_.add_handler(
method="POST",
url="/api/strategy",
handler=self._on_api_request,
)
async def subscribe_md(self) -> None:
from functools import partial
for exch_inst in self.instruments_:
exch_acct = exch_inst.user_data_.get("exch_acct", "?exch_acct?")
instrument_id = exch_inst.instrument_id()
await self.pricer_client_.add_subscription(
exch_acct=exch_acct,
instrument_id=instrument_id,
interval_sec=self.live_strategy_.interval_sec(),
history_depth_sec=self.live_strategy_.history_depth_sec(),
callback=partial(self._on_md_summary, exch_inst=exch_inst)
)
async def _on_md_summary(self, history: List[MdTradesAggregate], exch_inst: ExchangeInstrument) -> None:
Log.info(f"{self.fname()}: got {exch_inst.details_short()} data")
self.latest_history_[exch_inst] = history
if len(self.latest_history_) == 2:
from itertools import chain
all_aggrs = sorted(list(chain.from_iterable(self.latest_history_.values())), key=lambda X: X.aggr_time_ns_)
await self.live_strategy_.on_mkt_data_hist_snapshot(hist_aggr=all_aggrs)
self.latest_history_ = {}
async def _on_api_request(self, request: web.Request) -> web.Response:
# TODO choose pair
# TODO confirm chosen pair (after selection is implemented)
return web.Response() # TODO API request handler implementation
async def run(self) -> None:
Log.info(f"{self.fname()} ...")
while True:
await asyncio.sleep(0.1)
pass
if __name__ == "__main__":
App()
CvttAppConfig()
PairTrader()
App.instance().run()

View File

@ -1,186 +0,0 @@
#!/usr/bin/env bash
# ---------------- Settings
repo=git@cloud21.cvtt.vpn:/works/git/cvtt2/research/pairs_trading.git
dist_root=/home/cvttdist/software/cvtt2
dist_user=cvttdist
dist_host="cloud21.cvtt.vpn"
dist_ssh_port="22"
dist_locations="cloud21.cvtt.vpn:22 hs01.cvtt.vpn:22"
version_file="VERSION"
prj=pairs_trading
brnch=master
interactive=N
# ---------------- Settings
# ---------------- cmdline
usage() {
echo "Usage: $0 [-b <branch (master)> -i (interactive)"
exit 1
}
while getopts "b:i" opt; do
case ${opt} in
b )
brnch=$OPTARG
;;
i )
interactive=Y
;;
\? )
echo "Invalid option: -$OPTARG" >&2
usage
;;
: )
echo "Option -$OPTARG requires an argument." >&2
usage
;;
esac
done
# ---------------- cmdline
confirm() {
if [ "${interactive}" == "Y" ]; then
echo "--------------------------------"
echo -n "Press <Enter> to continue" && read
fi
}
if [ "${interactive}" == "Y" ]; then
echo -n "Enter project [${prj}]: "
read project
if [ "${project}" == "" ]
then
project=${prj}
fi
else
project=${prj}
fi
# repo=${git_repo_arr[${project}]}
if [ -z ${repo} ]; then
echo "ERROR: Project repository for ${project} not found"
exit -1
fi
echo "Project repo: ${repo}"
if [ "${interactive}" == "Y" ]; then
echo -n "Enter branch to build release from [${brnch}]: "
read branch
if [ "${branch}" == "" ]
then
branch=${brnch}
fi
else
branch=${brnch}
fi
tmp_dir=$(mktemp -d)
function cleanup {
cd ${HOME}
rm -rf ${tmp_dir}
}
trap cleanup EXIT
prj_dir="${tmp_dir}/${prj}"
cmd_arr=()
Cmd="git clone ${repo} ${prj_dir}"
cmd_arr+=("${Cmd}")
Cmd="cd ${prj_dir}"
cmd_arr+=("${Cmd}")
if [ "${interactive}" == "Y" ]; then
echo "------------------------------------"
echo "The following commands will execute:"
echo "------------------------------------"
for cmd in "${cmd_arr[@]}"
do
echo ${cmd}
done
fi
confirm
for cmd in "${cmd_arr[@]}"
do
echo ${cmd} && eval ${cmd}
done
Cmd="git checkout ${branch}"
echo ${Cmd} && eval ${Cmd}
if [ "${?}" != "0" ]; then
echo "ERROR: Branch ${branch} is not found"
cd ${HOME} && rm -rf ${tmp_dir}
exit -1
fi
release_version=$(cat ${version_file} | awk -F',' '{print $1}')
whats_new=$(cat ${version_file} | awk -F',' '{print $2}')
echo "--------------------------------"
echo "Version file: ${version_file}"
echo "Release version: ${release_version}"
confirm
version_tag="v${release_version}"
if [ "$(git tag -l "${version_tag}")" != "" ]; then
version_tag="${version_tag}.$(date +%Y%m%d_%H%M)"
fi
version_comment="'${version_tag} ${project} ${branch} $(date +%Y-%m-%d)\n${whats_new}'"
cmd_arr=()
Cmd="git tag -a ${version_tag} -m ${version_comment}"
cmd_arr+=("${Cmd}")
Cmd="git push origin --tags"
cmd_arr+=("${Cmd}")
Cmd="rm -rf .git"
cmd_arr+=("${Cmd}")
SourceLoc=../${project}
dist_path="${dist_root}/${project}/${release_version}"
for dist_loc in ${dist_locations}; do
dhp=(${dist_loc//:/ })
dist_host=${dhp[0]}
dist_port=${dhp[1]}
Cmd="rsync -avzh"
Cmd="${Cmd} --rsync-path=\"mkdir -p ${dist_path}"
Cmd="${Cmd} && rsync\" -e \"ssh -p ${dist_ssh_port}\""
Cmd="${Cmd} $SourceLoc ${dist_user}@${dist_host}:${dist_path}/"
cmd_arr+=("${Cmd}")
done
if [ "${interactive}" == "Y" ]; then
echo "------------------------------------"
echo "The following commands will execute:"
echo "------------------------------------"
for cmd in "${cmd_arr[@]}"
do
echo ${cmd}
done
fi
confirm
for cmd in "${cmd_arr[@]}"
do
pwd && echo ${cmd} && eval ${cmd}
done
echo "$0 Done ${project} ${release_version}"

View File

@ -1,47 +0,0 @@
{
"market_data_loading": {
"CRYPTO": {
"data_directory": "./data/crypto",
"db_table_name": "md_1min_bars",
"instrument_id_pfx": "PAIR-",
},
"EQUITY": {
"data_directory": "./data/equity",
"db_table_name": "md_1min_bars",
"instrument_id_pfx": "STOCK-",
}
},
# ====== Funding ======
"funding_per_pair": 2000.0,
# ====== Trading Parameters ======
"stat_model_price": "close",
"execution_price": {
"column": "vwap",
"shift": 1,
},
"dis-equilibrium_open_trshld": 1.75,
"dis-equilibrium_close_trshld": 0.9,
"model_class": "pairs_trading.lib.pt_strategy.models.OLSModel",
# "model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.EGOptimizedWndDataPolicy",
# "model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.ADFOptimizedWndDataPolicy",
"model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.JohansenOptdWndDataPolicy",
"min_training_size": 60,
"max_training_size": 150,
# ====== Stop Conditions ======
"stop_close_conditions": {
"profit": 2.0,
"loss": -0.5
}
# ====== End of Session Closeout ======
"close_outstanding_positions": true,
# "close_outstanding_positions": false,
"trading_hours": {
"timezone": "America/New_York",
"begin_session": "7:30:00",
"end_session": "18:30:00",
}
}

View File

@ -1,47 +0,0 @@
{
"market_data_loading": {
"CRYPTO": {
"data_directory": "./data/crypto",
"db_table_name": "md_1min_bars",
"instrument_id_pfx": "PAIR-",
},
"EQUITY": {
"data_directory": "./data/equity",
"db_table_name": "md_1min_bars",
"instrument_id_pfx": "STOCK-",
}
},
# ====== Funding ======
"funding_per_pair": 2000.0,
# ====== Trading Parameters ======
"stat_model_price": "close",
"execution_price": {
"column": "vwap",
"shift": 1,
},
"dis-equilibrium_open_trshld": 1.75,
"dis-equilibrium_close_trshld": 0.9,
"model_class": "pairs_trading.lib.pt_strategy.models.OLSModel",
"training_size": 120,
"model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.RollingWindowDataPolicy",
# "model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.OptimizedWindowDataPolicy",
# "min_training_size": 60,
# "max_training_size": 150,
# ====== Stop Conditions ======
"stop_close_conditions": {
"profit": 2.0,
"loss": -0.5
}
# ====== End of Session Closeout ======
"close_outstanding_positions": true,
# "close_outstanding_positions": false,
"trading_hours": {
"timezone": "America/New_York",
"begin_session": "7:30:00",
"end_session": "18:30:00",
}
}

View File

@ -1,46 +0,0 @@
{
"refdata": {
"assets": @inc=http://@env{CONFIG_SERVICE}/refdata/assets
, "instruments": @inc=http://@env{CONFIG_SERVICE}/refdata/instruments
, "exchange_instruments": @inc=http://@env{CONFIG_SERVICE}/refdata/exchange_instruments
, "dynamic_instrument_exchanges": ["ALPACA"]
, "exchanges": @inc=http://@env{CONFIG_SERVICE}/refdata/exchanges
},
"market_data_loading": {
"CRYPTO": {
"data_directory": "./data/crypto",
"db_table_name": "md_1min_bars",
"instrument_id_pfx": "PAIR-",
},
"EQUITY": {
"data_directory": "./data/equity",
"db_table_name": "md_1min_bars",
"instrument_id_pfx": "STOCK-",
}
},
# ====== Funding ======
"funding_per_pair": 2000.0,
# ====== Model =======
"model": @inc=http://@env{CONFIG_SERVICE}/apps/common/models/@env{MODEL_CONFIG}
# ====== Trading =======
"execution_price": {
"column": "vwap",
"shift": 1,
},
# ====== Stop Conditions ======
"stop_close_conditions": {
"profit": 2.0,
"loss": -0.5
}
# ====== End of Session Closeout ======
"close_outstanding_positions": true,
# "close_outstanding_positions": false,
"trading_hours": {
"timezone": "America/New_York",
"begin_session": "7:30:00",
"end_session": "18:30:00",
}
}

27
configuration/equity.cfg Normal file
View File

@ -0,0 +1,27 @@
{
"security_type": "EQUITY",
"data_directory": "./data/equity",
"datafiles": [
"20250618.mktdata.ohlcv.db",
],
"db_table_name": "md_1min_bars",
"exchange_id": "ALPACA",
"instrument_id_pfx": "STOCK-",
"trading_hours": {
"begin_session": "9:30:00",
"end_session": "16:00:00",
"timezone": "America/New_York"
},
"price_column": "close",
"min_required_points": 30,
"zero_threshold": 1e-10,
"dis-equilibrium_open_trshld": 2.0,
"dis-equilibrium_close_trshld": 1.0,
"training_minutes": 120,
"funding_per_pair": 2000.0,
# "fit_method_class": "pt_trading.sliding_fit.SlidingFit",
"fit_method_class": "pt_trading.static_fit.StaticFit",
"exclude_instruments": ["CAN"],
"close_outstanding_positions": false
}

View File

@ -0,0 +1,26 @@
{
"security_type": "EQUITY",
"data_directory": "./data/equity",
"datafiles": [
"20250602.mktdata.ohlcv.db",
],
"db_table_name": "md_1min_bars",
"exchange_id": "ALPACA",
"instrument_id_pfx": "STOCK-",
"trading_hours": {
"begin_session": "9:30:00",
"end_session": "16:00:00",
"timezone": "America/New_York"
},
"price_column": "close",
"min_required_points": 30,
"zero_threshold": 1e-10,
"dis-equilibrium_open_trshld": 2.0,
"dis-equilibrium_close_trshld": 1.0,
"training_minutes": 120,
"funding_per_pair": 2000.0,
"fit_method_class": "pt_trading.fit_methods.StaticFit",
"exclude_instruments": ["CAN"]
}
# "fit_method_class": "pt_trading.fit_methods.SlidingFit",
# "fit_method_class": "pt_trading.fit_methods.StaticFit",

View File

@ -1,21 +0,0 @@
{
"strategy_config": @inc=file:///home/oleg/develop/pairs_trading/configuration/vecm-opt.cfg
"pricer_config": {
"pricer_url": "ws://localhost:12346/ws",
"history_depth_sec": 86400 #"60*60*24", # use simpleeval
"interval_sec": 60
},
"ti_config": {
"cvtt_base_url": "http://localhost:23456"
"book_id": "XXXXXXXXX",
"strategy_id": "XXXXXXXXX",
"ti_endpoint": {
"method": "POST",
"url": "/trading_instructions"
},
"health_check_endpoint": {
"method": "GET",
"url": "/ping"
}
}
}

View File

@ -1,56 +0,0 @@
{
# "refdata": {
# "assets": @inc=http://@env{CONFIG_SERVICE}/refdata/assets
# , "instruments": @inc=http://@env{CONFIG_SERVICE}/refdata/instruments
# , "exchange_instruments": @inc=http://@env{CONFIG_SERVICE}/refdata/exchange_instruments
# , "dynamic_instrument_exchanges": ["ALPACA"]
# , "exchanges": @inc=http://@env{CONFIG_SERVICE}/refdata/exchanges
# },
# "market_data_loading": {
# "CRYPTO": {
# "data_directory": "./data/crypto",
# "db_table_name": "md_1min_bars",
# "instrument_id_pfx": "PAIR-",
# },
# "EQUITY": {
# "data_directory": "./data/equity",
# "db_table_name": "md_1min_bars",
# "instrument_id_pfx": "STOCK-",
# }
# },
# # ====== Funding ======
# "funding_per_pair": 2000.0,
# ====== Trading Parameters ======
"stat_model_price": "close", # "vwap"
"execution_price": {
"column": "vwap",
"shift": 1,
},
"dis-equilibrium_open_trshld": 1.75,
"dis-equilibrium_close_trshld": 1.0,
"model_class": "pairs_trading.lib.pt_strategy.models.VECMModel",
# "training_size": 120,
# "model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.RollingWindowDataPolicy",
"model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.ADFOptimizedWndDataPolicy",
"min_training_size": 60,
"max_training_size": 150,
# # ====== Stop Conditions ======
# "stop_close_conditions": {
# "profit": 2.0,
# "loss": -0.5
# }
# # ====== End of Session Closeout ======
# "close_outstanding_positions": true,
# # "close_outstanding_positions": false,
# "trading_hours": {
# "timezone": "America/New_York",
# "begin_session": "7:30:00",
# "end_session": "18:30:00",
# }
}

View File

@ -21,15 +21,10 @@
"column": "vwap", "column": "vwap",
"shift": 1, "shift": 1,
}, },
"dis-equilibrium_open_trshld": 1.75, "dis-equilibrium_open_trshld": 2.0,
"dis-equilibrium_close_trshld": 1.0, "dis-equilibrium_close_trshld": 1.0,
"model_class": "pairs_trading.lib.pt_strategy.models.VECMModel", "training_minutes": 120,
"fit_method_class": "pt_trading.vecm_rolling_fit.VECMRollingFit",
"training_size": 120,
"model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.RollingWindowDataPolicy",
# "model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.OptimizedWindowDataPolicy",
# "min_training_size": 60,
# "max_training_size": 150,
# ====== Stop Conditions ====== # ====== Stop Conditions ======
"stop_close_conditions": { "stop_close_conditions": {
@ -42,7 +37,7 @@
# "close_outstanding_positions": false, # "close_outstanding_positions": false,
"trading_hours": { "trading_hours": {
"timezone": "America/New_York", "timezone": "America/New_York",
"begin_session": "7:30:00", "begin_session": "9:30:00",
"end_session": "18:30:00", "end_session": "18:30:00",
} }
} }

View File

@ -22,9 +22,8 @@
}, },
"dis-equilibrium_open_trshld": 2.0, "dis-equilibrium_open_trshld": 2.0,
"dis-equilibrium_close_trshld": 0.5, "dis-equilibrium_close_trshld": 0.5,
"training_size": 120, "training_minutes": 120,
"model_class": "pairs_trading.lib.pt_strategy.models.OLSModel", "fit_method_class": "pt_trading.z-score_rolling_fit.ZScoreRollingFit",
"model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.ExpandingWindowDataPolicy",
# ====== Stop Conditions ====== # ====== Stop Conditions ======
"stop_close_conditions": { "stop_close_conditions": {
@ -37,7 +36,7 @@
# "close_outstanding_positions": false, # "close_outstanding_positions": false,
"trading_hours": { "trading_hours": {
"timezone": "America/New_York", "timezone": "America/New_York",
"begin_session": "7:30:00", "begin_session": "9:30:00",
"end_session": "18:30:00", "end_session": "18:30:00",
} }
} }

115
lg_notes.md Normal file
View File

@ -0,0 +1,115 @@
07.11.2025
pairs_trading/configuration <---- directory for config
equity_lg.cfg <-------- copy of equity.cfg
How to run a Program: TRIANGLEsquare ----> triangle EQUITY backtest
Results are in > results (timestamp table for all runs)
table "...timestamp... .pt_backtest_results.equity.db"
going to table using sqlite
> sqlite3 '/home/coder/results/20250721_175750.pt_backtest_results.equity.db'
sqlite> .databases
main: /home/coder/results/20250717_180122.pt_backtest_results.equity.db r/w
sqlite> .tables
config outstanding_positions pt_bt_results
sqlite> PRAGMA table_info('pt_bt_results');
0|date|DATE|0||0
1|pair|TEXT|0||0
2|symbol|TEXT|0||0
3|open_time|DATETIME|0||0
4|open_side|TEXT|0||0
5|open_price|REAL|0||0
6|open_quantity|INTEGER|0||0
7|open_disequilibrium|REAL|0||0
8|close_time|DATETIME|0||0
9|close_side|TEXT|0||0
10|close_price|REAL|0||0
11|close_quantity|INTEGER|0||0
12|close_disequilibrium|REAL|0||0
13|symbol_return|REAL|0||0
14|pair_return|REAL|0||0
select count(*) as cnt from pt_bt_results;
8
select * from pt_bt_results;
select
date, close_time, pair, symbol, symbol_return, pair_return
from pt_bt_results ;
select date, sum(symbol_return) as daily_return
from pt_bt_results where date = '2025-06-18' group by date;
.quit
sqlite3 '/home/coder/results/20250717_172435.pt_backtest_results.equity.db'
sqlite> select date, sum(symbol_return) as daily_return
from pt_bt_results group by date;
2025-06-02|1.29845390060828
...
2025-06-18|-43.5084977104115 <========== ????? ==========>
2025-06-20|11.8605547517183
select
date, close_time, pair, symbol, symbol_return, pair_return
from pt_bt_results ;
select date, close_time, pair, symbol, symbol_return, pair_return
from pt_bt_results where date = '2025-06-18';
./scripts/load_equity_pair_intraday.sh -A NVDA -B QQQ -d 20250701 -T ./intraday_md
to inspect exactly what sources, formats, and processing steps you can open the script with:
head -n 50 ./scripts/load_equity_pair_intraday.sh
✓ Data file found: /home/coder/pairs_trading/data/crypto/20250605.mktdata.ohlcv.db
sqlite3 '/home/coder/results/20250722_201930.pt_backtest_results.crypto.db'
sqlite3 '/home/coder/results/xxxxxxxx_yyyyyy.pt_backtest_results.pseudo.db'
11111111
=== At your terminal, run these commands:
sqlite3 '/home/coder/results/20250722_201930.pt_backtest_results.crypto.db'
=== Then inside the SQLite prompt:
.mode csv
.headers on
.output results_20250722.csv
SELECT * FROM pt_bt_results;
.output stdout
.quit
cd /home/coder/
# === mode csv formats output as CSV
# === headers on includes column names
# === output my_table.csv directs output to that file
# === Run your SELECT query, then revert output
# === Open my_table.csv in Excel directly
# ======== Using scp (Secure Copy)
# === On your local machine, open a terminal and run:
scp cvtt@953f6e8df266:/home/coder/results_20250722.csv ~/Downloads/
# ===== convert cvs pandas dataframe ====== -->
import pandas as pd
# Replace with the actual path to your CSV file
file_path = '/home/coder/results_20250722.csv'
# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)
# Show the first few rows
print(df.head())

188
lib/cvtt/mkt_data.py Normal file
View File

@ -0,0 +1,188 @@
#!/usr/bin/env python3
import argparse
from ast import Sub
import asyncio
from functools import partial
import json
import logging
import uuid
from dataclasses import dataclass
from typing import Callable, Coroutine, Dict, List, Optional
from numpy.strings import str_len
import websockets
from websockets.asyncio.client import ClientConnection
MessageTypeT = str
SubscriptionIdT = str
MessageT = Dict
UrlT = str
CallbackT = Callable[[MessageTypeT, SubscriptionIdT, MessageT], Coroutine[None, str, None]]
@dataclass
class CvttPricesSubscription:
id_: str
exchange_config_name_: str
instrument_id_: str
interval_sec_: int
history_depth_sec_: int
is_subscribed_: bool
is_historical_: bool
callback_: CallbackT
def __init__(
self,
exchange_config_name: str,
instrument_id: str,
interval_sec: int,
history_depth_sec: int,
callback: CallbackT,
):
self.exchange_config_name_ = exchange_config_name
self.instrument_id_ = instrument_id
self.interval_sec_ = interval_sec
self.history_depth_sec_ = history_depth_sec
self.callback_ = callback
self.id_ = str(uuid.uuid4())
self.is_subscribed_ = False
self.is_historical_ = history_depth_sec > 0
class CvttPricerWebSockClient:
# Class members with type hints
ws_url_: UrlT
websocket_: Optional[ClientConnection]
subscriptions_: Dict[SubscriptionIdT, CvttPricesSubscription]
is_connected_: bool
logger_: logging.Logger
def __init__(self, url: str):
self.ws_url_ = url
self.websocket_ = None
self.is_connected_ = False
self.subscriptions_ = {}
self.logger_ = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
async def subscribe(
self, subscription: CvttPricesSubscription
) -> str: # returns subscription id
if not self.is_connected_:
try:
self.logger_.info(f"Connecting to {self.ws_url_}")
self.websocket_ = await websockets.connect(self.ws_url_)
self.is_connected_ = True
except Exception as e:
self.logger_.error(f"Unable to connect to {self.ws_url_}: {str(e)}")
raise e
subscr_msg = {
"type": "subscr",
"id": subscription.id_,
"subscr_type": "MD_AGGREGATE",
"exchange_config_name": subscription.exchange_config_name_,
"instrument_id": subscription.instrument_id_,
"interval_sec": subscription.interval_sec_,
}
if subscription.is_historical_:
subscr_msg["history_depth_sec"] = subscription.history_depth_sec_
assert self.websocket_ is not None
await self.websocket_.send(json.dumps(subscr_msg))
response = await self.websocket_.recv()
response_data = json.loads(response)
if not await self.handle_subscription_response(subscription, response_data):
await self.websocket_.close()
self.is_connected_ = False
raise Exception(f"Subscription failed: {str(response)}")
self.subscriptions_[subscription.id_] = subscription
return subscription.id_
async def handle_subscription_response(
self, subscription: CvttPricesSubscription, response: dict
) -> bool:
if response.get("type") != "subscr" or response.get("id") != subscription.id_:
return False
if response.get("status") == "success":
self.logger_.info(f"Subscription successful: {json.dumps(response)}")
return True
elif response.get("status") == "error":
self.logger_.error(f"Subscription failed: {response.get('reason')}")
return False
return False
async def run(self) -> None:
assert self.websocket_
try:
while self.is_connected_:
try:
message = await self.websocket_.recv()
message_str = (
message.decode("utf-8")
if isinstance(message, bytes)
else message
)
await self.process_message(json.loads(message_str))
except websockets.ConnectionClosed:
self.logger_.warning("Connection closed")
self.is_connected_ = False
break
except Exception as e:
self.logger_.error(f"Error occurred: {str(e)}")
self.is_connected_ = False
await asyncio.sleep(5) # Wait before reconnecting
async def process_message(self, message: Dict) -> None:
message_type = message.get("type")
if message_type in ["md_aggregate", "historical_md_aggregate"]:
subscription_id = message.get("subscr_id")
if subscription_id not in self.subscriptions_:
self.logger_.warning(f"Unknown subscription id: {subscription_id}")
return
subscription = self.subscriptions_[subscription_id]
await subscription.callback_(message_type, subscription_id, message)
else:
self.logger_.warning(f"Unknown message type: {message.get('type')}")
async def main() -> None:
async def on_message(message_type: MessageTypeT, subscr_id: SubscriptionIdT, message: Dict, instrument_id: str) -> None:
print(f"{message_type=} {subscr_id=} {instrument_id}")
if message_type == "md_aggregate":
aggr = message.get("md_aggregate", [])
print(f"[{aggr['tstmp'][:19]}] *** RLTM *** {message}")
elif message_type == "historical_md_aggregate":
for aggr in message.get("historical_data", []):
print(f"[{aggr['tstmp'][:19]}] *** HIST *** {aggr}")
else:
print(f"Unknown message type: {message_type}")
pricer_client = CvttPricerWebSockClient(
"ws://localhost:12346/ws"
)
await pricer_client.subscribe(CvttPricesSubscription(
exchange_config_name="COINBASE_AT",
instrument_id="PAIR-BTC-USD",
interval_sec=60,
history_depth_sec=60*60*24,
callback=partial(on_message, instrument_id="PAIR-BTC-USD")
))
await pricer_client.subscribe(CvttPricesSubscription(
exchange_config_name="COINBASE_AT",
instrument_id="PAIR-ETH-USD",
interval_sec=60,
history_depth_sec=60*60*24,
callback=partial(on_message, instrument_id="PAIR-ETH-USD")
))
await pricer_client.run()
if __name__ == "__main__":
asyncio.run(main())

View File

@ -1,277 +0,0 @@
from __future__ import annotations
import asyncio
from typing import Dict, Any, List, Optional, Set
import requests
from cvttpy_tools.base import NamedObject
from cvttpy_tools.logger import Log
from cvttpy_tools.config import Config
from cvttpy_tools.timer import Timer
from cvttpy_tools.timeutils import NanosT, current_seconds
from cvttpy_tools.settings.cvtt_types import InstrumentIdT, IntervalSecT
from cvttpy_tools.web.rest_client import RESTSender
# ---
from cvttpy_trading.trading.instrument import ExchangeInstrument
from cvttpy_trading.trading.accounting.exch_account import ExchangeAccountNameT
from cvttpy_trading.trading.mkt_data.md_summary import MdTradesAggregate, MdSummary, MdSummaryCallbackT
from cvttpy_trading.trading.exchange_config import ExchangeAccounts
# ---
# class MdSummary(HistMdBar):
# def __init__(
# self,
# ts_ns: int,
# open: float,
# high: float,
# low: float,
# close: float,
# volume: float,
# vwap: float,
# num_trades: int,
# ):
# super().__init__(ts=ts_ns)
# self.open_ = open
# self.high_ = high
# self.low_ = low
# self.close_ = close
# self.volume_ = volume
# self.vwap_ = vwap
# self.num_trades_ = num_trades
# @classmethod
# def from_REST_response(cls, response: requests.Response) -> List[MdSummary]:
# res: List[MdSummary] = []
# jresp = response.json()
# hist_data = jresp.get("historical_data", [])
# for hd in hist_data:
# res.append(
# MdSummary(
# ts_ns=hd["time_ns"],
# open=hd["open"],
# high=hd["high"],
# low=hd["low"],
# close=hd["close"],
# volume=hd["volume"],
# vwap=hd["vwap"],
# num_trades=hd["num_trades"],
# )
# )
# return res
# def create_md_trades_aggregate(
# self,
# exch_acct: ExchangeAccountNameT,
# exch_inst: ExchangeInstrument,
# interval_sec: IntervalSecT,
# ) -> MdTradesAggregate:
# res = MdTradesAggregate(
# exch_acct=exch_acct,
# exch_inst=exch_inst,
# interval_ns=interval_sec * NanoPerSec,
# )
# res.set(mdbar=self)
# return res
# MdSummaryCallbackT = Callable[[List[MdTradesAggregate]], Coroutine]
class MdSummaryCollector(NamedObject):
sender_: RESTSender
exch_acct_: ExchangeAccountNameT
exch_inst_: ExchangeInstrument
interval_sec_: IntervalSecT
history_depth_sec_: IntervalSecT
history_: List[MdTradesAggregate]
callbacks_: List[MdSummaryCallbackT]
timer_: Optional[Timer]
def __init__(
self,
sender: RESTSender,
exch_acct: ExchangeAccountNameT,
instrument_id: InstrumentIdT,
interval_sec: IntervalSecT,
history_depth_sec: IntervalSecT,
) -> None:
self.sender_ = sender
self.exch_acct_ = exch_acct
exch_inst = ExchangeAccounts.instance().get_exchange_instrument(
exch_acct=exch_acct, instrument_id=instrument_id
)
assert exch_inst is not None, f"Unable to find Exchange instrument for {exch_acct}/{instrument_id}"
self.exch_inst_ = exch_inst
self.interval_sec_ = interval_sec
self.history_depth_sec_ = history_depth_sec
self.history_ = []
self.callbacks_ = []
self.timer_ = None
def add_callback(self, cb: MdSummaryCallbackT) -> None:
self.callbacks_.append(cb)
def __hash__(self):
return hash(
(
self.exch_acct_,
self.exch_inst_.instrument_id(),
self.interval_sec_,
self.history_depth_sec_,
)
)
def rqst_data(self) -> Dict[str, Any]:
return {
"exch_acct": self.exch_acct_,
"instrument_id": self.exch_inst_.instrument_id(),
"interval_sec": self.interval_sec_,
"history_depth_sec": self.history_depth_sec_,
}
def get_history(self) -> List[MdSummary]:
response: requests.Response = self.sender_.send_post(
endpoint="md_summary", post_body=self.rqst_data()
)
if response.status_code not in (200, 201):
Log.error(
f"{self.fname()}: Received error: {response.status_code} - {response.text}"
)
return []
return MdSummary.from_REST_response(response=response)
def get_last(self) -> Optional[MdSummary]:
Log.info(f"{self.fname()}: for {self.exch_inst_.details_short()}")
rqst_data = self.rqst_data()
rqst_data["history_depth_sec"] = self.interval_sec_ * 2
response: requests.Response = self.sender_.send_post(
endpoint="md_summary", post_body=rqst_data
)
if response.status_code not in (200, 201):
Log.error(
f"{self.fname()}: Received error: {response.status_code} - {response.text}"
)
return None
res = MdSummary.from_REST_response(response=response)
Log.info(f"DEBUG *** {self.exch_inst_.base_asset_id_}: {res[-1].tstamp_}")
return None if len(res) == 0 else res[-1]
def is_empty(self) -> bool:
return len(self.history_) == 0
async def start(self) -> None:
if self.timer_:
Log.error(f"{self.fname()}: Timer is already started")
return
mdsum_hist = self.get_history()
self.history_ = [
mdsum.create_md_trades_aggregate(
exch_acct=self.exch_acct_,
exch_inst=self.exch_inst_,
interval_sec=self.interval_sec_,
)
for mdsum in mdsum_hist
]
await self.run_callbacks()
self.set_timer()
def set_timer(self):
if self.timer_:
self.timer_.cancel()
start_in = self.next_load_time() - current_seconds()
self.timer_ = Timer(
start_in_sec=start_in,
func=self._load_new,
)
Log.info(f"{self.fname()} Timer for {self.exch_inst_.details_short()} is set to run in {start_in} sec")
def next_load_time(self) -> NanosT:
ALLOW_LAG_SEC = 1
curr_sec = int(current_seconds())
return (curr_sec - curr_sec % self.interval_sec_) + self.interval_sec_ + ALLOW_LAG_SEC
async def _load_new(self) -> None:
last: Optional[MdSummary] = self.get_last()
if not last:
Log.warning(f"{self.fname()}: did not get last update")
elif not self.is_empty() and last.ts_ns_ <= self.history_[-1].aggr_time_ns_:
Log.info(
f"{self.fname()}: Received {last}. Already Have: {self.history_[-1]}"
)
else:
self.history_.append(last.create_md_trades_aggregate(exch_acct=self.exch_acct_, exch_inst=self.exch_inst_, interval_sec=self.interval_sec_))
await self.run_callbacks()
self.set_timer()
async def run_callbacks(self) -> None:
[await cb(self.history_) for cb in self.callbacks_]
def stop(self) -> None:
if self.timer_:
self.timer_.cancel()
self.timer_ = None
class CvttRestMktDataClient(NamedObject):
config_: Config
sender_: RESTSender
collectors_: Set[MdSummaryCollector]
def __init__(self, config: Config) -> None:
self.config_ = config
base_url = self.config_.get_value("cvtt_base_url", default="")
assert base_url
self.sender_ = RESTSender(base_url=base_url)
self.collectors_ = set()
async def add_subscription(
self,
exch_acct: ExchangeAccountNameT,
instrument_id: InstrumentIdT,
interval_sec: IntervalSecT,
history_depth_sec: IntervalSecT,
callback: MdSummaryCallbackT,
) -> None:
mdsc = MdSummaryCollector(
sender=self.sender_,
exch_acct=exch_acct,
instrument_id=instrument_id,
interval_sec=interval_sec,
history_depth_sec=history_depth_sec,
)
mdsc.add_callback(callback)
self.collectors_.add(mdsc)
await mdsc.start()
if __name__ == "__main__":
config = Config(json_src={"cvtt_base_url": "http://cvtt-tester-01.cvtt.vpn:23456"})
# config = Config(json_src={"cvtt_base_url": "http://dev-server-02.cvtt.vpn:23456"})
async def _calback(history: List[MdTradesAggregate]) -> None:
Log.info(
f"MdSummary Hist Length is {len(history)}. Last summary: {history[-1] if len(history) > 0 else '[]'}"
)
async def __run() -> None:
Log.info("Starting...")
cvtt_client = CvttRestMktDataClient(config)
await cvtt_client.add_subscription(
exch_acct="COINBASE_AT",
instrument_id="PAIR-BTC-USD",
interval_sec=60,
history_depth_sec=24 * 3600,
callback=_calback,
)
while True:
await asyncio.sleep(5)
asyncio.run(__run())
pass

View File

@ -1,60 +0,0 @@
```python
from __future__ import annotations
from typing import Dict
import time
import requests
from cvttpy_tools.base import NamedObject
class RESTSender(NamedObject):
session_: requests.Session
base_url_: str
def __init__(self, base_url: str) -> None:
self.base_url_ = base_url
self.session_ = requests.Session()
def is_ready(self) -> bool:
"""Checks if the server is up and responding"""
url = f"{self.base_url_}/ping"
try:
response = self.session_.get(url)
response.raise_for_status()
return True
except requests.exceptions.RequestException:
return False
def send_post(self, endpoint: str, post_body: Dict) -> requests.Response:
while not self.is_ready():
print("Waiting for FrontGateway to start...")
time.sleep(5)
url = f"{self.base_url_}/{endpoint}"
try:
return self.session_.request(
method="POST",
url=url,
json=post_body,
headers={"Content-Type": "application/json"},
)
except requests.exceptions.RequestException as excpt:
raise ConnectionError(
f"Failed to send status={excpt.response.status_code} {excpt.response.text}" # type: ignore
) from excpt
def send_get(self, endpoint: str) -> requests.Response:
while not self.is_ready():
print("Waiting for FrontGateway to start...")
time.sleep(5)
url = f"{self.base_url_}/{endpoint}"
try:
return self.session_.request(method="GET", url=url)
except requests.exceptions.RequestException as excpt:
raise ConnectionError(
f"Failed to send status={excpt.response.status_code} {excpt.response.text}" # type: ignore
) from excpt
```

View File

@ -1,50 +0,0 @@
from enum import Enum
import requests
# import aiohttp
from cvttpy_tools.base import NamedObject
from cvttpy_tools.config import Config
from cvttpy_tools.logger import Log
from cvttpy_tools.web.rest_client import RESTSender
# ---
from cvttpy_trading.trading.trading_instructions import TradingInstructions
# ---
from pairs_trading.apps.pair_trader import PairTrader
class TradingInstructionsSender(NamedObject):
config_: Config
sender_: RESTSender
pairs_trader_: PairTrader
class TradingInstType(str, Enum):
TARGET_POSITION = "TARGET_POSITION"
DIRECT_ORDER = "DIRECT_ORDER"
MARKET_MAKING = "MARKET_MAKING"
NONE = "NONE"
def __init__(self, config: Config, pairs_trader: PairTrader) -> None:
self.config_ = config
base_url = self.config_.get_value("cvtt_base_url", default="")
assert base_url
self.sender_ = RESTSender(base_url=base_url)
self.pairs_trader_ = pairs_trader
self.book_id_ = self.pairs_trader_.book_id_
assert self.book_id_, "book_id is required"
self.strategy_id_ = config.get_value("strategy_id", "")
assert self.strategy_id_, "strategy_id is required"
async def send_trading_instructions(self, ti: TradingInstructions) -> None:
Log.info(f"{self.fname()}: sending {ti=}")
response: requests.Response = self.sender_.send_post(
endpoint="trading_instructions", post_body=ti.to_dict()
)
if response.status_code not in (200, 201):
Log.error(
f"{self.fname()}: Received error: {response.status_code} - {response.text}"
)

View File

@ -1,351 +0,0 @@
from __future__ import annotations
from typing import Any, Dict, List, Optional
import pandas as pd
# ---
from cvttpy_tools.base import NamedObject
from cvttpy_tools.app import App
from cvttpy_tools.config import Config
from cvttpy_tools.settings.cvtt_types import IntervalSecT
from cvttpy_tools.timeutils import NanosT, SecPerHour, current_nanoseconds, NanoPerSec, format_nanos_utc
from cvttpy_tools.logger import Log
# ---
from cvttpy_trading.trading.instrument import ExchangeInstrument
from cvttpy_trading.trading.mkt_data.md_summary import MdTradesAggregate
from cvttpy_trading.trading.trading_instructions import TradingInstructions
from cvttpy_trading.trading.trading_instructions import TargetPositionSignal
# ---
from pairs_trading.lib.pt_strategy.model_data_policy import ModelDataPolicy
from pairs_trading.lib.pt_strategy.pt_model import Prediction
from pairs_trading.lib.pt_strategy.trading_pair import LiveTradingPair
from pairs_trading.apps.pair_trader import PairTrader
from pairs_trading.lib.pt_strategy.pt_market_data import LiveMarketData
class PtLiveStrategy(NamedObject):
config_: Config
instruments_: List[ExchangeInstrument]
interval_sec_: IntervalSecT
history_depth_sec_: IntervalSecT
open_threshold_: float
close_threshold_: float
trading_pair_: LiveTradingPair
model_data_policy_: ModelDataPolicy
pairs_trader_: PairTrader
# for presentation: history of prediction values and trading signals
predictions_df_: pd.DataFrame
trading_signals_df_: pd.DataFrame
allowed_md_lag_sec_: int
def __init__(
self,
config: Config,
pairs_trader: PairTrader,
):
self.config_ = config
self.pairs_trader_ = pairs_trader
self.trading_pair_ = LiveTradingPair(
config=config,
instruments=self.pairs_trader_.instruments_,
)
self.model_data_policy_ = ModelDataPolicy.create(
self.config_,
is_real_time=True,
pair=self.trading_pair_,
)
assert (
self.model_data_policy_ is not None
), f"{self.fname()}: Unable to create ModelDataPolicy"
self.predictions_df_ = pd.DataFrame()
self.trading_signals_df_ = pd.DataFrame()
self.instruments_ = self.pairs_trader_.instruments_
App.instance().add_call(
stage=App.Stage.Config, func=self._on_config(), can_run_now=True
)
async def _on_config(self) -> None:
self.interval_sec_ = self.config_.get_value("interval_sec", 0)
assert self.interval_sec_ > 0, "interval_sec cannot be 0"
self.history_depth_sec_ = (
self.config_.get_value("history_depth_hours", 0) * SecPerHour
)
assert self.history_depth_sec_ > 0, "history_depth_hours cannot be 0"
self.allowed_md_lag_sec_ = self.config_.get_value("allowed_md_lag_sec", 3)
self.open_threshold_ = self.config_.get_value(
"model/disequilibrium/open_trshld", 0.0
)
self.close_threshold_ = self.config_.get_value(
"model/disequilibrium/close_trshld", 0.0
)
assert (
self.open_threshold_ > 0
), "disequilibrium/open_trshld must be greater than 0"
assert (
self.close_threshold_ > 0
), "disequilibrium/close_trshld must be greater than 0"
await self.pairs_trader_.subscribe_md()
def __repr__(self) -> str:
return f"{self.classname()}: trading_pair={self.trading_pair_}, mdp={self.model_data_policy_.__class__.__name__}, "
async def on_mkt_data_hist_snapshot(
self, hist_aggr: List[MdTradesAggregate]
) -> None:
if not self._is_md_actual(hist_aggr=hist_aggr):
return
market_data_df: pd.DataFrame = self._create_md_df(hist_aggr=hist_aggr)
if len(market_data_df) == 0:
Log.warning(f"{self.fname()} Unable to create market data df")
return
self.trading_pair_.market_data_ = market_data_df
Log.info(f"{self.fname()}: Running prediction for pair: {self.trading_pair_}")
prediction = self.trading_pair_.run(
market_data_df, self.model_data_policy_.advance()
)
self.predictions_df_ = pd.concat(
[self.predictions_df_, prediction.to_df()], ignore_index=True
)
trading_instructions: List[TradingInstructions] = (
self._create_trading_instructions(
prediction=prediction, last_row=market_data_df.iloc[-1]
)
)
if trading_instructions is not None:
await self._send_trading_instructions(trading_instructions)
def _is_md_actual(self, hist_aggr: List[MdTradesAggregate]) -> bool:
if len(hist_aggr) == 0:
Log.warning(f"{self.fname()} list of aggregates IS EMPTY")
return False
curr_ns = current_nanoseconds()
# MAYBE check market data length
# at 18:05:01 we should see data for 18:04:00
lag_sec = (curr_ns - hist_aggr[-1].aggr_time_ns_) / NanoPerSec - self.interval_sec()
if lag_sec > self.allowed_md_lag_sec_:
Log.warning(
f"{self.fname()} {hist_aggr[-1].exch_inst_.details_short()}"
f" Lagging {int(lag_sec)} > {self.allowed_md_lag_sec_} seconds:"
f"\n{len(hist_aggr)} records"
f"\n{hist_aggr[-1].exch_inst_.base_asset_id_}: {hist_aggr[-1].tstamp()}"
f"\n{hist_aggr[-2].exch_inst_.base_asset_id_}: {hist_aggr[-2].tstamp()}"
)
return False
else:
Log.info(
f"{self.fname()} {hist_aggr[-1].exch_inst_.details_short()}"
f" Lag {int(lag_sec)} <= {self.allowed_md_lag_sec_} seconds"
f"\n{len(hist_aggr)} records"
f"\n{hist_aggr[-1].exch_inst_.base_asset_id_}: {hist_aggr[-1].tstamp()}"
f"\n{hist_aggr[-2].exch_inst_.base_asset_id_}: {hist_aggr[-2].tstamp()}"
)
return True
def _create_md_df(self, hist_aggr: List[MdTradesAggregate]) -> pd.DataFrame:
"""
tstamp time_ns symbol open high low close volume num_trades vwap
0 2025-09-10 11:30:00 1757503800000000000 ADA-USDT 0.8750 0.8750 0.8743 0.8743 50710.500 0 0.874489
1 2025-09-10 11:30:00 1757503800000000000 SOL-USDT 219.9700 219.9800 219.6600 219.7000 2648.582 0 219.787847
2 2025-09-10 11:31:00 1757503860000000000 SOL-USDT 219.7000 219.7300 219.6200 219.6200 1134.886 0 219.663460
3 2025-09-10 11:31:00 1757503860000000000 ADA-USDT 0.8743 0.8745 0.8741 0.8741 10696.400 0 0.874234
4 2025-09-10 11:32:00 1757503920000000000 ADA-USDT 0.8742 0.8742 0.8739 0.8740 18546.900 0 0.874037
"""
rows: List[Dict[str, Any]] = []
for aggr in hist_aggr:
exch_inst = aggr.exch_inst_
rows.append(
{
# convert nanoseconds → tz-aware pandas timestamp
"tstamp": pd.to_datetime(aggr.aggr_time_ns_, unit="ns", utc=True),
"time_ns": aggr.aggr_time_ns_,
"symbol": exch_inst.instrument_id().split("-", 1)[1],
"exchange_id": exch_inst.exchange_id_,
"instrument_id": exch_inst.instrument_id(),
"open": exch_inst.get_price(aggr.open_),
"high": exch_inst.get_price(aggr.high_),
"low": exch_inst.get_price(aggr.low_),
"close": exch_inst.get_price(aggr.close_),
"volume": exch_inst.get_quantity(aggr.volume_),
"num_trades": aggr.num_trades_,
"vwap": exch_inst.get_price(aggr.vwap_),
}
)
source_md_df = pd.DataFrame(
rows,
columns=[
"tstamp",
"time_ns",
"symbol",
"exchange_id",
"instrument_id",
"open",
"high",
"low",
"close",
"volume",
"num_trades",
"vwap",
],
)
# automatic sorting
source_md_df.sort_values(
by=["time_ns", "symbol"],
ascending=True,
inplace=True,
kind="mergesort", # stable sort
)
source_md_df.reset_index(drop=True, inplace=True)
pt_mkt_data = LiveMarketData(config=self.config_, instruments=self.instruments_)
pt_mkt_data.origin_mkt_data_df_ = source_md_df
pt_mkt_data.set_market_data()
return pt_mkt_data.market_data_df_
def interval_sec(self) -> IntervalSecT:
return self.interval_sec_
def history_depth_sec(self) -> IntervalSecT:
return self.history_depth_sec_
async def _send_trading_instructions(
self, trading_instructions: List[TradingInstructions]
) -> None:
for ti in trading_instructions:
Log.info(f"{self.fname()} Sending trading instructions {ti}")
await self.pairs_trader_.ti_sender_.send_trading_instructions(ti)
def _create_trading_instructions(
self, prediction: Prediction, last_row: pd.Series
) -> List[TradingInstructions]:
trd_instructions: List[TradingInstructions] = []
pair = self.trading_pair_
scaled_disequilibrium = prediction.scaled_disequilibrium_
abs_scaled_disequilibrium = abs(scaled_disequilibrium)
if abs_scaled_disequilibrium >= self.open_threshold_:
trd_instructions = self._create_open_trade_instructions(
pair, row=last_row, prediction=prediction
)
elif abs_scaled_disequilibrium <= self.close_threshold_ or pair.to_stop_close_conditions(predicted_row=last_row):
trd_instructions = self._create_close_trade_instructions(
pair, row=last_row # , prediction=prediction
)
return trd_instructions
def _strength(self, scaled_disequilibrium: float) -> float:
# TODO PtLiveStrategy._strength()
return 1.0
def _create_open_trade_instructions(
self, pair: LiveTradingPair, row: pd.Series, prediction: Prediction
) -> List[TradingInstructions]:
diseqlbrm = prediction.disequilibrium_
scaled_disequilibrium = prediction.scaled_disequilibrium_
if diseqlbrm > 0:
side_a = -1
side_b = 1
else:
side_a = 1
side_b = -1
ti_a: Optional[TradingInstructions] = TradingInstructions(
book=self.pairs_trader_.book_id_,
strategy_id=self.__class__.__name__,
ti_type=TradingInstructions.Type.TARGET_POSITION,
issued_ts_ns=current_nanoseconds(),
data=TargetPositionSignal(
strength=side_a * self._strength(scaled_disequilibrium),
exchange_id=pair.get_instrument_a().exchange_id_,
base_asset=pair.get_instrument_a().base_asset_id_,
quote_asset=pair.get_instrument_a().quote_asset_id_,
user_data={}
),
)
if not ti_a:
return []
ti_b: Optional[TradingInstructions] = TradingInstructions(
book=self.pairs_trader_.book_id_,
strategy_id=self.__class__.__name__,
ti_type=TradingInstructions.Type.TARGET_POSITION,
issued_ts_ns=current_nanoseconds(),
data=TargetPositionSignal(
strength=side_b * self._strength(scaled_disequilibrium),
exchange_id=pair.get_instrument_b().exchange_id_,
base_asset=pair.get_instrument_b().base_asset_id_,
quote_asset=pair.get_instrument_b().quote_asset_id_,
user_data={}
),
)
if not ti_b:
return []
return [ti_a, ti_b]
def _create_close_trade_instructions(
self, pair: LiveTradingPair, row: pd.Series
) -> List[TradingInstructions]:
ti_a: Optional[TradingInstructions] = TradingInstructions(
book=self.pairs_trader_.book_id_,
strategy_id=self.__class__.__name__,
ti_type=TradingInstructions.Type.TARGET_POSITION,
issued_ts_ns=current_nanoseconds(),
data=TargetPositionSignal(
strength=0,
exchange_id=pair.get_instrument_a().exchange_id_,
base_asset=pair.get_instrument_a().base_asset_id_,
quote_asset=pair.get_instrument_a().quote_asset_id_,
user_data={}
),
)
if not ti_a:
return []
ti_b: Optional[TradingInstructions] = TradingInstructions(
book=self.pairs_trader_.book_id_,
strategy_id=self.__class__.__name__,
ti_type=TradingInstructions.Type.TARGET_POSITION,
issued_ts_ns=current_nanoseconds(),
data=TargetPositionSignal(
strength=0,
exchange_id=pair.get_instrument_b().exchange_id_,
base_asset=pair.get_instrument_b().base_asset_id_,
quote_asset=pair.get_instrument_b().quote_asset_id_,
user_data={}
),
)
if not ti_b:
return []
return [ti_a, ti_b]

View File

@ -1,253 +0,0 @@
from __future__ import annotations
import copy
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Dict, Optional, cast
import numpy as np
import pandas as pd
from cvttpy_tools.config import Config
@dataclass
class DataWindowParams:
training_size_: int
training_start_index_: int
class ModelDataPolicy(ABC):
config_: Config
current_data_params_: DataWindowParams
count_: int
is_real_time_: bool
def __init__(self, config: Config, *args: Any, **kwargs: Any):
self.config_ = config
self.current_data_params_ = DataWindowParams(
training_size_=config.get_value("model/training_size", 120),
training_start_index_=0,
)
self.count_ = 0
self.is_real_time_ = kwargs.get("is_real_time", False)
@abstractmethod
def advance(self, mkt_data_df: Optional[pd.DataFrame] = None) -> DataWindowParams:
self.count_ += 1
if not self.is_real_time_:
print(self.count_, end="\r")
return self.current_data_params_
@staticmethod
def create(config: Config, *args: Any, **kwargs: Any) -> ModelDataPolicy:
import importlib
model_data_policy_class_name = config.get_value("model/model_data_policy_class", None)
assert model_data_policy_class_name is not None
module_name, class_name = model_data_policy_class_name.rsplit(".", 1)
module = importlib.import_module(module_name)
model_training_data_policy_object = getattr(module, class_name)(
config=config, *args, **kwargs
)
return cast(ModelDataPolicy, model_training_data_policy_object)
class RollingWindowDataPolicy(ModelDataPolicy):
def __init__(self, config: Config, *args: Any, **kwargs: Any):
super().__init__(config, *args, **kwargs)
self.count_ = 1
def advance(self, mkt_data_df: Optional[pd.DataFrame] = None) -> DataWindowParams:
super().advance(mkt_data_df)
if self.is_real_time_:
self.current_data_params_.training_start_index_ = 0
if mkt_data_df and len(mkt_data_df) > self.curren_data_params_.training_size_:
self.current_data_params_.training_start_index_ = -self.curren_data_params_.training_size_
else:
self.current_data_params_.training_start_index_ += 1
return self.current_data_params_
class OptimizedWndDataPolicy(ModelDataPolicy, ABC):
mkt_data_df_: pd.DataFrame
pair_: TradingPair # type: ignore
min_training_size_: int
max_training_size_: int
end_index_: int
prices_a_: np.ndarray
prices_b_: np.ndarray
def __init__(self, config: Config, *args: Any, **kwargs: Any):
super().__init__(config, *args, **kwargs)
assert (
kwargs.get("pair") is not None
), "pair must be provided"
assert (config.key_exists("model/max_training_size") and config.key_exists("model/min_training_size")
), "min_training_size and max_training_size must be provided"
self.min_training_size_ = cast(int, config.get_value("model/min_training_size"))
self.max_training_size_ = cast(int, config.get_value("model/max_training_size"))
from pairs_trading.lib.pt_strategy.trading_pair import TradingPair
self.pair_ = cast(TradingPair, kwargs.get("pair"))
if "mkt_data" in kwargs:
self.mkt_data_df_ = cast(pd.DataFrame, kwargs.get("mkt_data"))
col_a, col_b = self.pair_.colnames()
self.prices_a_ = np.array(self.mkt_data_df_[col_a])
self.prices_b_ = np.array(self.mkt_data_df_[col_b])
assert self.min_training_size_ < self.max_training_size_
def advance(self, mkt_data_df: Optional[pd.DataFrame] = None) -> DataWindowParams:
super().advance(mkt_data_df)
if mkt_data_df is not None:
self.mkt_data_df_ = mkt_data_df
if self.is_real_time_:
self.end_index_ = len(self.mkt_data_df_) - 1
else:
self.end_index_ = self.current_data_params_.training_start_index_ + self.max_training_size_
if self.end_index_ > len(self.mkt_data_df_) - 1:
self.end_index_ = len(self.mkt_data_df_) - 1
self.current_data_params_.training_start_index_ = self.end_index_ - self.max_training_size_
if self.current_data_params_.training_start_index_ < 0:
self.current_data_params_.training_start_index_ = 0
col_a, col_b = self.pair_.colnames()
self.prices_a_ = np.array(self.mkt_data_df_[col_a])
self.prices_b_ = np.array(self.mkt_data_df_[col_b])
self.current_data_params_ = self.optimize_window_size()
return self.current_data_params_
@abstractmethod
def optimize_window_size(self) -> DataWindowParams:
...
class EGOptimizedWndDataPolicy(OptimizedWndDataPolicy):
'''
# Engle-Granger cointegration test
*** VERY SLOW ***
'''
def __init__(self, config: Config, *args: Any, **kwargs: Any):
super().__init__(config, *args, **kwargs)
def optimize_window_size(self) -> DataWindowParams:
# Run Engle-Granger cointegration test
last_pvalue = 1.0
result = copy.copy(self.current_data_params_)
for trn_size in range(self.min_training_size_, self.max_training_size_):
if self.end_index_ - trn_size < 0:
break
from statsmodels.tsa.stattools import coint # type: ignore
start_index = self.end_index_ - trn_size
series_a = self.prices_a_[start_index : self.end_index_]
series_b = self.prices_b_[start_index : self.end_index_]
eg_pvalue = float(coint(series_a, series_b)[1])
if eg_pvalue < last_pvalue:
last_pvalue = eg_pvalue
result.training_size_ = trn_size
result.training_start_index_ = start_index
# print(
# f"*** DEBUG *** end_index={self.end_index_}, best_trn_size={self.current_data_params_.training_size}, {last_pvalue=}"
# )
return result
class ADFOptimizedWndDataPolicy(OptimizedWndDataPolicy):
# Augmented Dickey-Fuller test
def __init__(self, config: Config, *args: Any, **kwargs: Any):
super().__init__(config, *args, **kwargs)
def optimize_window_size(self) -> DataWindowParams:
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.tools import add_constant
from statsmodels.tsa.stattools import adfuller
last_pvalue = 1.0
result = copy.copy(self.current_data_params_)
for trn_size in range(self.min_training_size_, self.max_training_size_):
if self.end_index_ - trn_size < 0:
break
start_index = self.end_index_ - trn_size
y = self.prices_a_[start_index : self.end_index_]
x = self.prices_b_[start_index : self.end_index_]
# Add constant to x for intercept
x_with_const = add_constant(x)
# OLS regression: y = a + b*x + e
model = OLS(y, x_with_const).fit()
residuals = y - model.predict(x_with_const)
# ADF test on residuals
try:
adf_result = adfuller(residuals, maxlag=1, regression="c")
adf_pvalue = float(adf_result[1])
except Exception as e:
# Handle edge cases with exception (e.g., constant series, etc.)
adf_pvalue = 1.0
if adf_pvalue < last_pvalue:
last_pvalue = adf_pvalue
result.training_size_ = trn_size
result.training_start_index_ = start_index
# print(
# f"*** DEBUG *** end_index={self.end_index_},"
# f" best_trn_size={self.current_data_params_.training_size},"
# f" {last_pvalue=}"
# )
return result
class JohansenOptdWndDataPolicy(OptimizedWndDataPolicy):
# Johansen test
def __init__(self, config: Config, *args: Any, **kwargs: Any):
super().__init__(config, *args, **kwargs)
def optimize_window_size(self) -> DataWindowParams:
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import numpy as np
best_stat = -np.inf
best_trn_size = 0
best_start_index = -1
result = copy.copy(self.current_data_params_)
for trn_size in range(self.min_training_size_, self.max_training_size_):
if self.end_index_ - trn_size < 0:
break
start_index = self.end_index_ - trn_size
series_a = self.prices_a_[start_index:self.end_index_]
series_b = self.prices_b_[start_index:self.end_index_]
# Combine into 2D matrix for Johansen test
try:
data = np.column_stack([series_a, series_b])
# Johansen test: det_order=0 (no deterministic trend), k_ar_diff=1 (lag)
res = coint_johansen(data, det_order=0, k_ar_diff=1)
# Trace statistic for cointegration rank 1
trace_stat = res.lr1[0] # test stat for rank=0 vs >=1
critical_value = res.cvt[0, 1] # 5% critical value
if trace_stat > best_stat:
best_stat = trace_stat
best_trn_size = trn_size
best_start_index = start_index
except Exception:
continue
if best_trn_size > 0:
result.training_size_ = best_trn_size
result.training_start_index_ = best_start_index
else:
print("*** WARNING: No valid cointegration window found.")
# print(
# f"*** DEBUG *** end_index={self.end_index_}, best_trn_size={best_trn_size}, trace_stat={best_stat}"
# )
return result

View File

@ -1,104 +0,0 @@
from __future__ import annotations
from typing import Optional
import pandas as pd
import statsmodels.api as sm
from pairs_trading.lib.pt_strategy.pt_model import PairsTradingModel, Prediction
from pairs_trading.lib.pt_strategy.trading_pair import TradingPair
class OLSModel(PairsTradingModel):
model_: Optional[sm.regression.linear_model.RegressionResultsWrapper]
pair_predict_result_: Optional[pd.DataFrame]
zscore_df_: Optional[pd.DataFrame]
def predict(self, pair: TradingPair) -> Prediction:
self.training_df_ = pair.market_data_.copy()
zscore_df = self._fit_zscore(pair=pair)
assert zscore_df is not None
# zscore is both disequilibrium and scaled_disequilibrium
self.training_df_["dis-equilibrium"] = zscore_df[0]
self.training_df_["scaled_dis-equilibrium"] = zscore_df[0]
assert zscore_df is not None
return Prediction(
tstamp=pair.market_data_.iloc[-1]["tstamp"],
disequilibrium=self.training_df_["dis-equilibrium"].iloc[-1],
scaled_disequilibrium=self.training_df_["scaled_dis-equilibrium"].iloc[-1],
)
def _fit_zscore(self, pair: TradingPair) -> pd.DataFrame:
assert self.training_df_ is not None
symbol_a_px_series = self.training_df_[pair.colnames()].iloc[:, 0]
symbol_b_px_series = self.training_df_[pair.colnames()].iloc[:, 1]
symbol_a_px_series, symbol_b_px_series = symbol_a_px_series.align(
symbol_b_px_series, axis=0
)
X = sm.add_constant(symbol_b_px_series)
self.model_ = sm.OLS(symbol_a_px_series, X).fit()
assert self.model_ is not None
# alternate way would be to use models residuals (will give identical results)
# alpha, beta = self.model_.params
# spread = symbol_a_px_series - (alpha + beta * symbol_b_px_series)
spread = self.model_.resid
return pd.DataFrame((spread - spread.mean()) / spread.std())
class VECMModel(PairsTradingModel):
def predict(self, pair: TradingPair) -> Prediction:
self.training_df_ = pair.market_data_.copy()
assert self.training_df_ is not None
vecm_fit = self._fit_VECM(pair=pair)
assert vecm_fit is not None
predicted_prices = vecm_fit.predict(steps=1)
# Convert prediction to a DataFrame for readability
predicted_df = pd.DataFrame(
predicted_prices, columns=pd.Index(pair.colnames()), dtype=float
)
disequilibrium = (predicted_df[pair.colnames()] @ vecm_fit.beta)[0][0]
scaled_disequilibrium = (disequilibrium - self.training_mu_) / self.training_std_
return Prediction(
tstamp=pair.market_data_.iloc[-1]["tstamp"],
disequilibrium=disequilibrium,
scaled_disequilibrium=scaled_disequilibrium,
)
def _fit_VECM(self, pair: TradingPair) -> VECMResults: # type: ignore
from statsmodels.tsa.vector_ar.vecm import VECM, VECMResults
vecm_df = self.training_df_[pair.colnames()].reset_index(drop=True)
vecm_model = VECM(vecm_df, coint_rank=1)
vecm_fit = vecm_model.fit()
assert vecm_fit is not None
# Check if the model converged properly
if not hasattr(vecm_fit, "beta") or vecm_fit.beta is None:
print(f"{self}: VECM model failed to converge properly")
diseq_series = self.training_df_[pair.colnames()] @ vecm_fit.beta
# print(diseq_series.shape)
self.training_mu_ = float(diseq_series[0].mean())
self.training_std_ = float(diseq_series[0].std())
self.training_df_["dis-equilibrium"] = (
self.training_df_[pair.colnames()] @ vecm_fit.beta
)
# Normalize the dis-equilibrium
self.training_df_["scaled_dis-equilibrium"] = (
diseq_series - self.training_mu_
) / self.training_std_
return vecm_fit

View File

@ -1,28 +0,0 @@
from __future__ import annotations
from typing import Any, Dict
import pandas as pd
class Prediction:
tstamp_: pd.Timestamp
disequilibrium_: float
scaled_disequilibrium_: float
def __init__(self, tstamp: pd.Timestamp, disequilibrium: float, scaled_disequilibrium: float):
self.tstamp_ = tstamp
self.disequilibrium_ = disequilibrium
self.scaled_disequilibrium_ = scaled_disequilibrium
def to_dict(self) -> Dict[str, Any]:
return {
"tstamp": self.tstamp_,
"disequilibrium": self.disequilibrium_,
"signed_scaled_disequilibrium": self.scaled_disequilibrium_,
"scaled_disequilibrium": abs(self.scaled_disequilibrium_),
# "pair": self.pair_,
}
def to_df(self) -> pd.DataFrame:
return pd.DataFrame([self.to_dict()])

View File

@ -1,223 +0,0 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
import pandas as pd
# ---
from cvttpy_tools.base import NamedObject
from cvttpy_tools.config import Config
from cvttpy_tools.settings.cvtt_types import JsonDictT
# ---
from cvttpy_trading.trading.mkt_data.md_summary import MdTradesAggregate
from cvttpy_trading.trading.instrument import ExchangeInstrument
# ---
from pairs_trading.lib.tools.data_loader import load_market_data
class PtMarketData(NamedObject, ABC):
config_: Config
origin_mkt_data_df_: pd.DataFrame
market_data_df_: pd.DataFrame
stat_model_price_: str
instruments_: List[ExchangeInstrument]
symbol_a_: str
symbol_b_: str
def __init__(self, config: Config, instruments: List[ExchangeInstrument]):
self.config_ = config
self.origin_mkt_data_df_ = pd.DataFrame()
self.market_data_df_ = pd.DataFrame()
self.stat_model_price_ = self.config_.get_value("model/stat_model_price")
self.instruments_ = instruments
assert len(self.instruments_) > 0, "No instruments found in config"
self.symbol_a_ = self.instruments_[0].instrument_id().split("-", 1)[1]
self.symbol_b_ = self.instruments_[1].instrument_id().split("-", 1)[1]
@abstractmethod
def md_columns(self) -> List[str]: ...
@abstractmethod
def rename_columns(self, symbol_df: pd.DataFrame) -> pd.DataFrame: ...
@abstractmethod
def tranform_df_target_colnames(self) -> List[str]: ...
def set_market_data(self) -> None:
self.market_data_df_ = pd.DataFrame(
self._transform_dataframe(self.origin_mkt_data_df_)[
["tstamp"] + self.tranform_df_target_colnames()
]
)
self.market_data_df_ = self.market_data_df_.dropna().reset_index(drop=True)
self.market_data_df_["tstamp"] = pd.to_datetime(self.market_data_df_["tstamp"])
self.market_data_df_ = self.market_data_df_.sort_values("tstamp")
def colnames(self) -> List[str]:
return [
f"{self.stat_model_price_}_{self.symbol_a_}",
f"{self.stat_model_price_}_{self.symbol_b_}",
]
def _transform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
df_selected: pd.DataFrame = pd.DataFrame(df[self.md_columns()])
result_df = (
pd.DataFrame(df_selected["tstamp"]).drop_duplicates().reset_index(drop=True)
)
# For each unique symbol, add a corresponding stat_model_price column
symbols = df_selected["symbol"].unique()
for symbol in symbols:
# Filter rows for this symbol
df_symbol = df_selected[df_selected["symbol"] == symbol].reset_index(
drop=True
)
# Create column name like "close-COIN"
temp_df: pd.DataFrame = self.rename_columns(df_symbol)
# Join with our result dataframe
result_df = pd.merge(result_df, temp_df, on="tstamp", how="left")
result_df = result_df.reset_index(
drop=True
) # do not dropna() since irrelevant symbol would affect dataset
return result_df.dropna()
class ResearchMarketData(PtMarketData):
current_index_: int
is_execution_price_: bool
def __init__(self, config: Config, instruments: List[ExchangeInstrument]):
super().__init__(config, instruments)
self.current_index_ = 0
self.is_execution_price_ = self.config_.key_exists("execution_price")
if self.is_execution_price_:
self.execution_price_column_ = self.config_.get_value("execution_price")["column"]
self.execution_price_shift_ = self.config_.get_value("execution_price")["shift"]
else:
self.execution_price_column_ = None
self.execution_price_shift_ = 0
def has_next(self) -> bool:
return self.current_index_ < len(self.market_data_df_)
def get_next(self) -> pd.Series:
result = self.market_data_df_.iloc[self.current_index_]
self.current_index_ += 1
return result
def load(self) -> None:
datafiles: List[str] = self.config_.get_value("datafiles", [])
assert len(datafiles) > 0, "No datafiles found in config"
extra_minutes: int = self.execution_price_shift_
for datafile in datafiles:
md_df = load_market_data(
datafile=datafile,
instruments=self.instruments_,
db_table_name=self.config_.get_value("market_data_loading")[
self.instruments_[0].user_data_.get("instrument_type", "?instrument_type?")
]["db_table_name"],
trading_hours=self.config_.get_value("trading_hours"),
extra_minutes=extra_minutes,
)
self.origin_mkt_data_df_ = pd.concat([self.origin_mkt_data_df_, md_df])
self.origin_mkt_data_df_ = self.origin_mkt_data_df_.sort_values(by="tstamp")
self.origin_mkt_data_df_ = self.origin_mkt_data_df_.dropna().reset_index(
drop=True
)
self.set_market_data()
self._set_execution_price_data()
def _set_execution_price_data(self) -> None:
if not self.is_execution_price_:
return
if not self.config_.key_exists("execution_price"):
self.market_data_df_[f"exec_price_{self.symbol_a_}"] = self.market_data_df_[
f"{self.stat_model_price_}_{self.symbol_a_}"
]
self.market_data_df_[f"exec_price_{self.symbol_b_}"] = self.market_data_df_[
f"{self.stat_model_price_}_{self.symbol_b_}"
]
return
execution_price_column = self.config_.get_value("execution_price")["column"]
execution_price_shift = self.config_.get_value("execution_price")["shift"]
self.market_data_df_[f"exec_price_{self.symbol_a_}"] = self.market_data_df_[
f"{execution_price_column}_{self.symbol_a_}"
].shift(-execution_price_shift)
self.market_data_df_[f"exec_price_{self.symbol_b_}"] = self.market_data_df_[
f"{execution_price_column}_{self.symbol_b_}"
].shift(-execution_price_shift)
self.market_data_df_ = self.market_data_df_.dropna().reset_index(drop=True)
def md_columns(self) -> List[str]:
# @abstractmethod
if self.is_execution_price_:
return ["tstamp", "symbol", self.stat_model_price_, self.execution_price_column_]
else:
return ["tstamp", "symbol", self.stat_model_price_]
def rename_columns(self, selected_symbol_df: pd.DataFrame) -> pd.DataFrame:
# @abstractmethod
symbol = selected_symbol_df.iloc[0]["symbol"]
new_price_column = f"{self.stat_model_price_}_{symbol}"
if self.is_execution_price_:
new_execution_price_column = f"{self.execution_price_column_}_{symbol}"
# Create temporary dataframe with timestamp and price
temp_df = pd.DataFrame(
{
"tstamp": selected_symbol_df["tstamp"],
new_price_column: selected_symbol_df[self.stat_model_price_],
new_execution_price_column: selected_symbol_df[self.execution_price_column_],
}
)
else:
temp_df = pd.DataFrame(
{
"tstamp": selected_symbol_df["tstamp"],
new_price_column: selected_symbol_df[self.stat_model_price_],
}
)
return temp_df
def tranform_df_target_colnames(self):
# @abstractmethod
return self.colnames() + self.orig_exec_prices_colnames()
def orig_exec_prices_colnames(self) -> List[str]:
return [
f"{self.execution_price_column_}_{self.symbol_a_}",
f"{self.execution_price_column_}_{self.symbol_b_}",
] if self.is_execution_price_ else []
class LiveMarketData(PtMarketData):
def __init__(self, config: Config, instruments: List[ExchangeInstrument]):
super().__init__(config, instruments)
def md_columns(self) -> List[str]:
# @abstractmethod
return ["tstamp", "symbol", self.stat_model_price_]
def rename_columns(self, selected_symbol_df: pd.DataFrame) -> pd.DataFrame:
# @abstractmethod
symbol = selected_symbol_df.iloc[0]["symbol"]
new_price_column = f"{self.stat_model_price_}_{symbol}"
temp_df = pd.DataFrame(
{
"tstamp": selected_symbol_df["tstamp"],
new_price_column: selected_symbol_df[self.stat_model_price_],
}
)
return temp_df
def tranform_df_target_colnames(self):
# @abstractmethod
return self.colnames()

View File

@ -1,30 +0,0 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any, Dict, cast
# ---
from cvttpy_tools.config import Config
# ---
from pairs_trading.lib.pt_strategy.prediction import Prediction
from pairs_trading.lib.pt_strategy.trading_pair import TradingPair
class PairsTradingModel(ABC):
@abstractmethod
def predict(self, pair: TradingPair) -> Prediction: # type: ignore[assignment]
...
@staticmethod
def create(config: Config) -> PairsTradingModel:
import importlib
model_class_name = config.get_value("model/model_class", None)
assert model_class_name is not None
module_name, class_name = model_class_name.rsplit(".", 1)
module = importlib.import_module(module_name)
model_object = getattr(module, class_name)()
return cast(PairsTradingModel, model_object)

View File

@ -1,305 +0,0 @@
from __future__ import annotations
from typing import Any, Dict, List, Optional, Tuple
import pandas as pd
# ---
from cvttpy_tools.config import Config
# ---
from cvttpy_trading.trading.instrument import ExchangeInstrument
# ---
from pairs_trading.lib.pt_strategy.model_data_policy import ModelDataPolicy
from pairs_trading.lib.pt_strategy.pt_market_data import ResearchMarketData
from pairs_trading.lib.pt_strategy.pt_model import Prediction
from pairs_trading.lib.pt_strategy.trading_pair import PairState, TradingPair, ResearchTradingPair
class PtResearchStrategy:
config_: Config
trading_pair_: ResearchTradingPair
model_data_policy_: ModelDataPolicy
pt_mkt_data_: ResearchMarketData
trades_: List[pd.DataFrame]
predictions_df_: pd.DataFrame
def __init__(
self,
config: Config,
instruments: List[ExchangeInstrument]
):
from pairs_trading.lib.pt_strategy.model_data_policy import ModelDataPolicy
from pairs_trading.lib.pt_strategy.trading_pair import TradingPair
self.config_ = config
self.trades_ = []
self.trading_pair_ = ResearchTradingPair(config=config, instruments=instruments)
self.predictions_df_ = pd.DataFrame()
import copy
# modified config must be passed to PtMarketData
config_copy = copy.deepcopy(config)
config_copy.set_value("instruments", instruments)
self.pt_mkt_data_ = ResearchMarketData(config=config_copy, instruments=instruments)
self.pt_mkt_data_.load()
self.model_data_policy_ = ModelDataPolicy.create(
config_copy, mkt_data=self.pt_mkt_data_.market_data_df_, pair=self.trading_pair_
)
def outstanding_positions(self) -> List[Dict[str, Any]]:
return list(self.trading_pair_.user_data_.get("outstanding_positions", []))
def run(self) -> None:
training_minutes = self.config_.get_value("training_minutes", 120)
market_data_series: pd.Series
market_data_df = pd.DataFrame()
idx = 0
while self.pt_mkt_data_.has_next():
market_data_series = self.pt_mkt_data_.get_next()
new_row = pd.DataFrame([market_data_series])
market_data_df = pd.concat([market_data_df, new_row], ignore_index=True)
if idx >= training_minutes:
break
idx += 1
assert idx >= training_minutes, "Not enough training data"
while self.pt_mkt_data_.has_next():
market_data_series = self.pt_mkt_data_.get_next()
new_row = pd.DataFrame([market_data_series])
market_data_df = pd.concat([market_data_df, new_row], ignore_index=True)
prediction = self.trading_pair_.run(
market_data_df, self.model_data_policy_.advance(mkt_data_df=market_data_df)
)
self.predictions_df_ = pd.concat(
[self.predictions_df_, prediction.to_df()], ignore_index=True
)
assert prediction is not None
trades = self._create_trades(
prediction=prediction, last_row=market_data_df.iloc[-1]
)
if trades is not None:
self.trades_.append(trades)
trades = self._handle_outstanding_positions()
if trades is not None:
self.trades_.append(trades)
def _create_trades(
self, prediction: Prediction, last_row: pd.Series
) -> Optional[pd.DataFrame]:
pair = self.trading_pair_
trades = None
open_threshold = self.config_.get_value("model/disequilibrium/open_trshld")
close_threshold = self.config_.get_value("model/disequilibrium/close_trshld")
scaled_disequilibrium = prediction.scaled_disequilibrium_
abs_scaled_disequilibrium = abs(scaled_disequilibrium)
if pair.user_data_["state"] in [
PairState.INITIAL,
PairState.CLOSE,
PairState.CLOSE_POSITION,
PairState.CLOSE_STOP_LOSS,
PairState.CLOSE_STOP_PROFIT,
]:
if abs_scaled_disequilibrium >= open_threshold:
trades = self._create_open_trades(
pair, row=last_row, prediction=prediction
)
if trades is not None:
trades["status"] = PairState.OPEN.name
print(f"OPEN TRADES:\n{trades}")
pair.user_data_["state"] = PairState.OPEN
pair.on_open_trades(trades)
elif pair.user_data_["state"] == PairState.OPEN:
if abs_scaled_disequilibrium <= close_threshold:
trades = self._create_close_trades(
pair, row=last_row, prediction=prediction
)
if trades is not None:
trades["status"] = PairState.CLOSE.name
print(f"CLOSE TRADES:\n{trades}")
pair.user_data_["state"] = PairState.CLOSE
pair.on_close_trades(trades)
elif pair.to_stop_close_conditions(predicted_row=last_row):
trades = self._create_close_trades(pair, row=last_row)
if trades is not None:
trades["status"] = pair.user_data_["stop_close_state"].name
print(f"STOP CLOSE TRADES:\n{trades}")
pair.user_data_["state"] = pair.user_data_["stop_close_state"]
pair.on_close_trades(trades)
return trades
def _handle_outstanding_positions(self) -> Optional[pd.DataFrame]:
trades = None
pair = self.trading_pair_
# Outstanding positions
if pair.user_data_["state"] == PairState.OPEN:
print(f"{pair}: *** Position is NOT CLOSED. ***")
# outstanding positions
if self.config_.get_value("close_outstanding_positions", False):
close_position_row = pd.Series(pair.market_data_.iloc[-2])
# close_position_row["disequilibrium"] = 0.0
# close_position_row["scaled_disequilibrium"] = 0.0
# close_position_row["signed_scaled_disequilibrium"] = 0.0
trades = self._create_close_trades(
pair=pair, row=close_position_row, prediction=None
)
if trades is not None:
trades["status"] = PairState.CLOSE_POSITION.name
print(f"CLOSE_POSITION TRADES:\n{trades}")
pair.user_data_["state"] = PairState.CLOSE_POSITION
pair.on_close_trades(trades)
else:
pair.add_outstanding_position(
symbol=pair.symbol_a(),
open_side=pair.user_data_["open_side_a"],
open_px=pair.user_data_["open_px_a"],
open_tstamp=pair.user_data_["open_tstamp"],
last_mkt_data_row=pair.market_data_.iloc[-1],
)
pair.add_outstanding_position(
symbol=pair.symbol_b(),
open_side=pair.user_data_["open_side_b"],
open_px=pair.user_data_["open_px_b"],
open_tstamp=pair.user_data_["open_tstamp"],
last_mkt_data_row=pair.market_data_.iloc[-1],
)
return trades
def _trades_df(self) -> pd.DataFrame:
types = {
"time": "datetime64[ns]",
"action": "string",
"symbol": "string",
"side": "string",
"price": "float64",
"disequilibrium": "float64",
"scaled_disequilibrium": "float64",
"signed_scaled_disequilibrium": "float64",
# "pair": "object",
}
columns = list(types.keys())
return pd.DataFrame(columns=columns).astype(types)
def _create_open_trades(
self, pair: ResearchTradingPair, row: pd.Series, prediction: Prediction
) -> Optional[pd.DataFrame]:
colname_a, colname_b = pair.exec_prices_colnames()
tstamp = row["tstamp"]
diseqlbrm = prediction.disequilibrium_
scaled_disequilibrium = prediction.scaled_disequilibrium_
px_a = row[f"{colname_a}"]
px_b = row[f"{colname_b}"]
# creating the trades
df = self._trades_df()
print(f"OPEN_TRADES: {row["tstamp"]} {scaled_disequilibrium=}")
if diseqlbrm > 0:
side_a = "SELL"
side_b = "BUY"
else:
side_a = "BUY"
side_b = "SELL"
# save closing sides
pair.user_data_["open_side_a"] = side_a # used in oustanding positions
pair.user_data_["open_side_b"] = side_b
pair.user_data_["open_px_a"] = px_a
pair.user_data_["open_px_b"] = px_b
pair.user_data_["open_tstamp"] = tstamp
pair.user_data_["close_side_a"] = side_b # used for closing trades
pair.user_data_["close_side_b"] = side_a
# create opening trades
df.loc[len(df)] = {
"time": tstamp,
"symbol": pair.symbol_a(),
"side": side_a,
"action": "OPEN",
"price": px_a,
"disequilibrium": diseqlbrm,
"signed_scaled_disequilibrium": scaled_disequilibrium,
"scaled_disequilibrium": abs(scaled_disequilibrium),
# "pair": pair,
}
df.loc[len(df)] = {
"time": tstamp,
"symbol": pair.symbol_b(),
"side": side_b,
"action": "OPEN",
"price": px_b,
"disequilibrium": diseqlbrm,
"scaled_disequilibrium": abs(scaled_disequilibrium),
"signed_scaled_disequilibrium": scaled_disequilibrium,
# "pair": pair,
}
return df
def _create_close_trades(
self, pair: ResearchTradingPair, row: pd.Series, prediction: Optional[Prediction] = None
) -> Optional[pd.DataFrame]:
colname_a, colname_b = pair.exec_prices_colnames()
tstamp = row["tstamp"]
if prediction is not None:
diseqlbrm = prediction.disequilibrium_
signed_scaled_disequilibrium = prediction.scaled_disequilibrium_
scaled_disequilibrium = abs(prediction.scaled_disequilibrium_)
else:
diseqlbrm = 0.0
signed_scaled_disequilibrium = 0.0
scaled_disequilibrium = 0.0
px_a = row[f"{colname_a}"]
px_b = row[f"{colname_b}"]
# creating the trades
df = self._trades_df()
# create opening trades
df.loc[len(df)] = {
"time": tstamp,
"symbol": pair.symbol_a(),
"side": pair.user_data_["close_side_a"],
"action": "CLOSE",
"price": px_a,
"disequilibrium": diseqlbrm,
"scaled_disequilibrium": scaled_disequilibrium,
"signed_scaled_disequilibrium": signed_scaled_disequilibrium,
# "pair": pair,
}
df.loc[len(df)] = {
"time": tstamp,
"symbol": pair.symbol_b(),
"side": pair.user_data_["close_side_b"],
"action": "CLOSE",
"price": px_b,
"disequilibrium": diseqlbrm,
"scaled_disequilibrium": scaled_disequilibrium,
"signed_scaled_disequilibrium": signed_scaled_disequilibrium,
# "pair": pair,
}
del pair.user_data_["close_side_a"]
del pair.user_data_["close_side_b"]
del pair.user_data_["open_tstamp"]
del pair.user_data_["open_px_a"]
del pair.user_data_["open_px_b"]
del pair.user_data_["open_side_a"]
del pair.user_data_["open_side_b"]
return df
def day_trades(self) -> pd.DataFrame:
return pd.concat(self.trades_, ignore_index=True)

View File

@ -1,527 +0,0 @@
import os
import sqlite3
from datetime import date, datetime
from typing import Any, Dict, List, Optional, Tuple
import pandas as pd
# ---
from cvttpy_tools.config import Config
# ---
from cvttpy_trading.trading.instrument import ExchangeInstrument
# ---
from pairs_trading.lib.pt_strategy.trading_pair import TradingPair
# Recommended replacement adapters and converters for Python 3.12+
# From: https://docs.python.org/3/library/sqlite3.html#sqlite3-adapter-converter-recipes
def adapt_date_iso(val: date) -> str:
"""Adapt datetime.date to ISO 8601 date."""
return val.isoformat()
def adapt_datetime_iso(val: datetime) -> str:
"""Adapt datetime.datetime to timezone-naive ISO 8601 date."""
return val.isoformat()
def convert_date(val: bytes) -> date:
"""Convert ISO 8601 date to datetime.date object."""
return datetime.fromisoformat(val.decode()).date()
def convert_datetime(val: bytes) -> datetime:
"""Convert ISO 8601 datetime to datetime.datetime object."""
return datetime.fromisoformat(val.decode())
# Register the adapters and converters
sqlite3.register_adapter(date, adapt_date_iso)
sqlite3.register_adapter(datetime, adapt_datetime_iso)
sqlite3.register_converter("date", convert_date)
sqlite3.register_converter("datetime", convert_datetime)
def create_result_database(db_path: str) -> None:
"""
Create the SQLite database and required tables if they don't exist.
"""
try:
# Create directory if it doesn't exist
db_dir = os.path.dirname(db_path)
if db_dir and not os.path.exists(db_dir):
os.makedirs(db_dir, exist_ok=True)
print(f"Created directory: {db_dir}")
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Create the pt_bt_results table for completed trades
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS pt_bt_results (
date DATE,
pair TEXT,
symbol TEXT,
open_time DATETIME,
open_side TEXT,
open_price REAL,
open_quantity INTEGER,
open_disequilibrium REAL,
close_time DATETIME,
close_side TEXT,
close_price REAL,
close_quantity INTEGER,
close_disequilibrium REAL,
symbol_return REAL,
pair_return REAL,
close_condition TEXT
)
"""
)
cursor.execute("DELETE FROM pt_bt_results;")
# Create the outstanding_positions table for open positions
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS outstanding_positions (
date DATE,
pair TEXT,
symbol TEXT,
position_quantity REAL,
last_price REAL,
unrealized_return REAL,
open_price REAL,
open_side TEXT
)
"""
)
cursor.execute("DELETE FROM outstanding_positions;")
# Create the config table for storing configuration JSON for reference
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS config (
id INTEGER PRIMARY KEY AUTOINCREMENT,
run_timestamp DATETIME,
config_file_path TEXT,
config_json TEXT,
datafiles TEXT,
instruments TEXT
)
"""
)
cursor.execute("DELETE FROM config;")
conn.commit()
conn.close()
except Exception as e:
print(f"Error creating result database: {str(e)}")
raise
def store_config_in_database(
db_path: str,
config_file_path: str,
config: Config,
datafiles: List[Tuple[str, str]],
instruments: List[ExchangeInstrument],
) -> None:
"""
Store configuration information in the database for reference.
"""
import json
if db_path.upper() == "NONE":
return
try:
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Convert config to JSON string
config_json = json.dumps(config.data(), indent=2, default=str)
# Convert lists to comma-separated strings for storage
datafiles_str = ", ".join([f"{datafile}" for _, datafile in datafiles])
instruments_str = ", ".join(
[
inst.details_short()
for inst in instruments
]
)
# Insert configuration record
cursor.execute(
"""
INSERT INTO config (
run_timestamp, config_file_path, config_json, datafiles, instruments
) VALUES (?, ?, ?, ?, ?)
""",
(
datetime.now(),
config_file_path,
config_json,
datafiles_str,
instruments_str,
),
)
conn.commit()
conn.close()
print(f"Configuration stored in database")
except Exception as e:
print(f"Error storing configuration in database: {str(e)}")
import traceback
traceback.print_exc()
def convert_timestamp(timestamp: Any) -> Optional[datetime]:
"""Convert pandas Timestamp to Python datetime object for SQLite compatibility."""
if timestamp is None:
return None
if isinstance(timestamp, pd.Timestamp):
return timestamp.to_pydatetime()
elif isinstance(timestamp, datetime):
return timestamp
elif isinstance(timestamp, date):
return datetime.combine(timestamp, datetime.min.time())
elif isinstance(timestamp, str):
return datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S")
elif isinstance(timestamp, int):
return datetime.fromtimestamp(timestamp)
else:
raise ValueError(f"Unsupported timestamp type: {type(timestamp)}")
DayT = str
TradeT = Dict[str, Any]
OutstandingPositionT = Dict[str, Any]
class PairResearchResult:
"""
Class to handle pair research results for a single pair across multiple days.
Simplified version of BacktestResult focused on single pair analysis.
"""
trades_: Dict[DayT, pd.DataFrame]
outstanding_positions_: Dict[DayT, List[OutstandingPositionT]]
symbol_roundtrip_trades_: Dict[str, List[Dict[str, Any]]]
config_: Config
def __init__(self, config: Config) -> None:
self.config_ = config
self.trades_ = {}
self.outstanding_positions_ = {}
self.total_realized_pnl = 0.0
self.symbol_roundtrip_trades_ = {}
def add_day_results(self, day: DayT, trades: pd.DataFrame, outstanding_positions: List[Dict[str, Any]]) -> None:
assert isinstance(trades, pd.DataFrame)
self.trades_[day] = trades
self.outstanding_positions_[day] = outstanding_positions
def outstanding_positions(self) -> List[OutstandingPositionT]:
"""Get all outstanding positions across all days as a flat list."""
res: List[Dict[str, Any]] = []
for day in self.outstanding_positions_.keys():
res.extend(self.outstanding_positions_[day])
return res
def calculate_returns(self) -> None:
"""Calculate and store total returns for the single pair across all days."""
self.extract_roundtrip_trades()
self.total_realized_pnl = 0.0
for day, day_trades in self.symbol_roundtrip_trades_.items():
for trade in day_trades:
self.total_realized_pnl += trade['symbol_return']
def extract_roundtrip_trades(self) -> None:
"""
Extract round-trip trades by day, grouping open/close pairs for each symbol.
Returns a dictionary with day as key and list of completed round-trip trades.
"""
def _symbol_return(trade1_side: str, trade1_px: float, trade2_side: str, trade2_px: float) -> float:
if trade1_side == "BUY" and trade2_side == "SELL":
return (trade2_px - trade1_px) / trade1_px * 100
elif trade1_side == "SELL" and trade2_side == "BUY":
return (trade1_px - trade2_px) / trade1_px * 100
else:
return 0
# Process each day separately
for day, day_trades in self.trades_.items():
# Sort trades by timestamp for the day
sorted_trades = day_trades #sorted(day_trades, key=lambda x: x["timestamp"] if x["timestamp"] else pd.Timestamp.min)
day_roundtrips = []
# Process trades in groups of 4 (open A, open B, close A, close B)
for idx in range(0, len(sorted_trades), 4):
if idx + 3 >= len(sorted_trades):
break
trade_a_1 = sorted_trades.iloc[idx] # Open A
trade_b_1 = sorted_trades.iloc[idx + 1] # Open B
trade_a_2 = sorted_trades.iloc[idx + 2] # Close A
trade_b_2 = sorted_trades.iloc[idx + 3] # Close B
# Validate trade sequence
if not (trade_a_1["action"] == "OPEN" and trade_a_2["action"] == "CLOSE"):
continue
if not (trade_b_1["action"] == "OPEN" and trade_b_2["action"] == "CLOSE"):
continue
# Calculate individual symbol returns
symbol_a_return = _symbol_return(
trade_a_1["side"], trade_a_1["price"],
trade_a_2["side"], trade_a_2["price"]
)
symbol_b_return = _symbol_return(
trade_b_1["side"], trade_b_1["price"],
trade_b_2["side"], trade_b_2["price"]
)
pair_return = symbol_a_return + symbol_b_return
# Create round-trip records for both symbols
funding_per_position = self.config_.get_value("funding_per_pair", 10000) / 2
# Symbol A round-trip
day_roundtrips.append({
"symbol": trade_a_1["symbol"],
"open_side": trade_a_1["side"],
"open_price": trade_a_1["price"],
"open_time": trade_a_1["time"],
"close_side": trade_a_2["side"],
"close_price": trade_a_2["price"],
"close_time": trade_a_2["time"],
"symbol_return": symbol_a_return,
"pair_return": pair_return,
"shares": funding_per_position / trade_a_1["price"],
"close_condition": trade_a_2.get("status", "UNKNOWN"),
"open_disequilibrium": trade_a_1.get("disequilibrium"),
"close_disequilibrium": trade_a_2.get("disequilibrium"),
})
# Symbol B round-trip
day_roundtrips.append({
"symbol": trade_b_1["symbol"],
"open_side": trade_b_1["side"],
"open_price": trade_b_1["price"],
"open_time": trade_b_1["time"],
"close_side": trade_b_2["side"],
"close_price": trade_b_2["price"],
"close_time": trade_b_2["time"],
"symbol_return": symbol_b_return,
"pair_return": pair_return,
"shares": funding_per_position / trade_b_1["price"],
"close_condition": trade_b_2.get("status", "UNKNOWN"),
"open_disequilibrium": trade_b_1.get("disequilibrium"),
"close_disequilibrium": trade_b_2.get("disequilibrium"),
})
if day_roundtrips:
self.symbol_roundtrip_trades_[day] = day_roundtrips
def print_returns_by_day(self) -> None:
"""
Print detailed return information for each day, grouped by day.
Shows individual symbol round-trips and daily totals.
"""
print("\n====== PAIR RESEARCH RETURNS BY DAY ======")
total_return_all_days = 0.0
for day, day_trades in sorted(self.symbol_roundtrip_trades_.items()):
print(f"\n--- {day} ---")
day_total_return = 0.0
pair_returns = []
# Group trades by pair (every 2 trades form a pair)
for idx in range(0, len(day_trades), 2):
if idx + 1 < len(day_trades):
trade_a = day_trades[idx]
trade_b = day_trades[idx + 1]
# Print individual symbol results
print(f" {trade_a['open_time'].time()}-{trade_a['close_time'].time()}")
print(f" {trade_a['symbol']}: {trade_a['open_side']} @ ${trade_a['open_price']:.2f}"
f"{trade_a['close_side']} @ ${trade_a['close_price']:.2f} | "
f"Return: {trade_a['symbol_return']:+.2f}% | Shares: {trade_a['shares']:.2f}")
print(f" {trade_b['symbol']}: {trade_b['open_side']} @ ${trade_b['open_price']:.2f}"
f"{trade_b['close_side']} @ ${trade_b['close_price']:.2f} | "
f"Return: {trade_b['symbol_return']:+.2f}% | Shares: {trade_b['shares']:.2f}")
# Show disequilibrium info if available
if trade_a.get('open_disequilibrium') is not None:
print(f" Disequilibrium: Open: {trade_a['open_disequilibrium']:.4f}, "
f"Close: {trade_a['close_disequilibrium']:.4f}")
pair_return = trade_a['pair_return']
print(f" Pair Return: {pair_return:+.2f}% | Close Condition: {trade_a['close_condition']}")
print()
pair_returns.append(pair_return)
day_total_return += pair_return
print(f" Day Total Return: {day_total_return:+.2f}% ({len(pair_returns)} pairs)")
total_return_all_days += day_total_return
print(f"\n====== TOTAL RETURN ACROSS ALL DAYS ======")
print(f"Total Return: {total_return_all_days:+.2f}%")
print(f"Total Days: {len(self.symbol_roundtrip_trades_)}")
if len(self.symbol_roundtrip_trades_) > 0:
print(f"Average Daily Return: {total_return_all_days / len(self.symbol_roundtrip_trades_):+.2f}%")
def get_return_summary(self) -> Dict[str, Any]:
"""
Get a summary of returns across all days.
Returns a dictionary with key metrics.
"""
if len(self.symbol_roundtrip_trades_) == 0:
return {
"total_return": 0.0,
"total_days": 0,
"total_pairs": 0,
"average_daily_return": 0.0,
"best_day": None,
"worst_day": None,
"daily_returns": {}
}
daily_returns = {}
total_return = 0.0
total_pairs = 0
for day, day_trades in self.symbol_roundtrip_trades_.items():
day_return = 0.0
day_pairs = len(day_trades) // 2 # Each pair has 2 symbol trades
for trade in day_trades:
day_return += trade['symbol_return']
daily_returns[day] = {
"return": day_return,
"pairs": day_pairs
}
total_return += day_return
total_pairs += day_pairs
best_day = max(daily_returns.items(), key=lambda x: x[1]["return"]) if daily_returns else None
worst_day = min(daily_returns.items(), key=lambda x: x[1]["return"]) if daily_returns else None
return {
"total_return": total_return,
"total_days": len(self.symbol_roundtrip_trades_),
"total_pairs": total_pairs,
"average_daily_return": total_return / len(self.symbol_roundtrip_trades_) if self.symbol_roundtrip_trades_ else 0.0,
"best_day": best_day,
"worst_day": worst_day,
"daily_returns": daily_returns
}
def print_grand_totals(self) -> None:
"""Print grand totals for the single pair analysis."""
summary = self.get_return_summary()
print(f"\n====== PAIR RESEARCH GRAND TOTALS ======")
print('---')
print(f"Total Return: {summary['total_return']:+.2f}%")
print('---')
print(f"Total Days Traded: {summary['total_days']}")
print(f"Total Open-Close Actions: {summary['total_pairs']}")
print(f"Total Trades: 4 * {summary['total_pairs']} = {4 * summary['total_pairs']}")
if summary['total_days'] > 0:
print(f"Average Daily Return: {summary['average_daily_return']:+.2f}%")
if summary['best_day']:
best_day, best_data = summary['best_day']
print(f"Best Day: {best_day} ({best_data['return']:+.2f}%)")
if summary['worst_day']:
worst_day, worst_data = summary['worst_day']
print(f"Worst Day: {worst_day} ({worst_data['return']:+.2f}%)")
# Update the total_realized_pnl for backward compatibility
self.total_realized_pnl = summary['total_return']
def analyze_pair_performance(self) -> None:
"""
Main method to perform comprehensive pair research analysis.
Extracts round-trip trades, calculates returns, groups by day, and prints results.
"""
print(f"\n{'='*60}")
print(f"PAIR RESEARCH PERFORMANCE ANALYSIS")
print(f"{'='*60}")
self.calculate_returns()
self.print_returns_by_day()
self.print_outstanding_positions()
self._print_additional_metrics()
self.print_grand_totals()
def _print_additional_metrics(self) -> None:
"""Print additional performance metrics."""
summary = self.get_return_summary()
if summary['total_days'] == 0:
return
print(f"\n====== ADDITIONAL METRICS ======")
# Calculate win rate
winning_days = sum(1 for day_data in summary['daily_returns'].values() if day_data['return'] > 0)
win_rate = (winning_days / summary['total_days']) * 100
print(f"Winning Days: {winning_days}/{summary['total_days']} ({win_rate:.1f}%)")
# Calculate average trade return
if summary['total_pairs'] > 0:
# Each pair has 2 symbol trades, so total symbol trades = total_pairs * 2
total_symbol_trades = summary['total_pairs'] * 2
avg_symbol_return = summary['total_return'] / total_symbol_trades
print(f"Average Symbol Return: {avg_symbol_return:+.2f}%")
avg_pair_return = summary['total_return'] / summary['total_pairs'] / 2 # Divide by 2 since we sum both symbols
print(f"Average Pair Return: {avg_pair_return:+.2f}%")
# Show daily return distribution
daily_returns_list = [data['return'] for data in summary['daily_returns'].values()]
if daily_returns_list:
print(f"Daily Return Range: {min(daily_returns_list):+.2f}% to {max(daily_returns_list):+.2f}%")
def print_outstanding_positions(self) -> None:
"""Print outstanding positions for the single pair."""
all_positions: List[OutstandingPositionT] = self.outstanding_positions()
if not all_positions:
print("\n====== NO OUTSTANDING POSITIONS ======")
return
print(f"\n====== OUTSTANDING POSITIONS ======")
print(f"{'Symbol':<10} {'Side':<4} {'Shares':<10} {'Open $':<8} {'Current $':<10} {'Value $':<12}")
print("-" * 70)
total_value = 0.0
for pos in all_positions:
current_value = pos.get("last_value", 0.0)
print(f"{pos['symbol']:<10} {pos['open_side']:<4} {pos['shares']:<10.2f} "
f"{pos['open_px']:<8.2f} {pos['last_px']:<10.2f} {current_value:<12.2f}")
total_value += current_value
print("-" * 70)
print(f"{'TOTAL VALUE':<60} ${total_value:<12.2f}")
def get_total_realized_pnl(self) -> float:
"""Get total realized PnL."""
return self.total_realized_pnl

View File

@ -1,226 +0,0 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List
import pandas as pd
# ---
from cvttpy_tools.base import NamedObject
from cvttpy_tools.config import Config
# ---
from cvttpy_trading.trading.instrument import ExchangeInstrument
# ---
from pairs_trading.lib.pt_strategy.model_data_policy import DataWindowParams
from pairs_trading.lib.pt_strategy.prediction import Prediction
class PairState(Enum):
INITIAL = 1
OPEN = 2
CLOSE = 3
CLOSE_POSITION = 4
CLOSE_STOP_LOSS = 5
CLOSE_STOP_PROFIT = 6
class TradingPair(NamedObject, ABC):
config_: Config
model_: Any # "PairsTradingModel"
market_data_: pd.DataFrame
user_data_: Dict[str, Any]
stat_model_price_: str
instruments_: List[ExchangeInstrument]
def __init__(
self,
config: Config,
instruments: List[ExchangeInstrument],
):
from pairs_trading.lib.pt_strategy.pt_model import PairsTradingModel
self.config_ = config
self.model_ = PairsTradingModel.create(config)
self.user_data_ = {}
self.instruments_ = instruments
self.instruments_[0].user_data_["symbol"] = instruments[0].instrument_id().split("-", 1)[1]
self.instruments_[1].user_data_["symbol"] = instruments[1].instrument_id().split("-", 1)[1]
self.stat_model_price_ = config.get_value("model/stat_model_price")
def run(self, market_data: pd.DataFrame, data_params: DataWindowParams) -> Prediction: # type: ignore[assignment]
self.market_data_ = market_data[
data_params.training_start_index_ : data_params.training_start_index_ + data_params.training_size_
]
return self.model_.predict(pair=self)
def colnames(self) -> List[str]:
return [
f"{self.stat_model_price_}_{self.symbol_a()}",
f"{self.stat_model_price_}_{self.symbol_b()}",
]
def symbol_a(self) -> str:
return self.get_instrument_a().user_data_["symbol"]
def symbol_b(self) -> str:
return self.get_instrument_b().user_data_["symbol"]
def get_instrument_a(self) -> ExchangeInstrument:
return self.instruments_[0]
def get_instrument_b(self) -> ExchangeInstrument:
return self.instruments_[1]
def __repr__(self) -> str:
return (
f"{self.__class__.__name__}:"
f" symbol_a={self.symbol_a()},"
f" symbol_b={self.symbol_b()},"
f" model={self.model_.__class__.__name__}"
)
class ResearchTradingPair(TradingPair):
def __init__(
self,
config: Config,
instruments: List[ExchangeInstrument],
):
assert len(instruments) == 2, "Trading pair must have exactly 2 instruments"
super().__init__(config=config, instruments=instruments)
self.user_data_ = {
"state": PairState.INITIAL,
}
def is_closed(self) -> bool:
return self.user_data_["state"] in [
PairState.CLOSE,
PairState.CLOSE_POSITION,
PairState.CLOSE_STOP_LOSS,
PairState.CLOSE_STOP_PROFIT,
]
def is_open(self) -> bool:
return not self.is_closed()
def exec_prices_colnames(self) -> List[str]:
return [
f"exec_price_{self.symbol_a()}",
f"exec_price_{self.symbol_b()}",
]
def to_stop_close_conditions(self, predicted_row: pd.Series) -> bool:
config = self.config_
if (
not config.key_exists("stop_close_conditions")
or config.get_value("stop_close_conditions") is None
):
return False
if "profit" in config.get_value("stop_close_conditions"):
current_return = self._current_return(predicted_row)
#
# print(f"time={predicted_row['tstamp']} current_return={current_return}")
#
if current_return >= config.get_value("stop_close_conditions")["profit"]:
print(f"STOP PROFIT: {current_return}")
self.user_data_["stop_close_state"] = PairState.CLOSE_STOP_PROFIT
return True
if "loss" in config.get_value("stop_close_conditions"):
if current_return <= config.get_value("stop_close_conditions")["loss"]:
print(f"STOP LOSS: {current_return}")
self.user_data_["stop_close_state"] = PairState.CLOSE_STOP_LOSS
return True
return False
def _current_return(self, predicted_row: pd.Series) -> float:
if "open_trades" in self.user_data_:
open_trades = self.user_data_["open_trades"]
if len(open_trades) == 0:
return 0.0
def _single_instrument_return(symbol: str) -> float:
instrument_open_trades = open_trades[open_trades["symbol"] == symbol]
instrument_open_price = instrument_open_trades["price"].iloc[0]
sign = -1 if instrument_open_trades["side"].iloc[0] == "SELL" else 1
instrument_price = predicted_row[f"{self.stat_model_price_}_{symbol}"]
instrument_return = (
sign
* (instrument_price - instrument_open_price)
/ instrument_open_price
)
return float(instrument_return) * 100.0
instrument_a_return = _single_instrument_return(self.symbol_a())
instrument_b_return = _single_instrument_return(self.symbol_b())
return instrument_a_return + instrument_b_return
return 0.0
def on_open_trades(self, trades: pd.DataFrame) -> None:
if "close_trades" in self.user_data_:
del self.user_data_["close_trades"]
self.user_data_["open_trades"] = trades
def on_close_trades(self, trades: pd.DataFrame) -> None:
del self.user_data_["open_trades"]
self.user_data_["close_trades"] = trades
def add_outstanding_position(
self,
symbol: str,
open_side: str,
open_px: float,
open_tstamp: datetime,
last_mkt_data_row: pd.Series,
) -> None:
assert symbol in [
self.symbol_a(),
self.symbol_b(),
], "Symbol must be one of the pair's symbols"
assert open_side in ["BUY", "SELL"], "Open side must be either BUY or SELL"
assert open_px > 0, "Open price must be greater than 0"
assert open_tstamp is not None, "Open timestamp must be provided"
assert last_mkt_data_row is not None, "Last market data row must be provided"
exec_prices_col_a, exec_prices_col_b = self.exec_prices_colnames()
if symbol == self.symbol_a():
last_px = last_mkt_data_row[exec_prices_col_a]
else:
last_px = last_mkt_data_row[exec_prices_col_b]
funding_per_position = self.config_.get_value("funding_per_pair") / 2
shares = funding_per_position / open_px
if open_side == "SELL":
shares = -shares
if "outstanding_positions" not in self.user_data_:
self.user_data_["outstanding_positions"] = []
self.user_data_["outstanding_positions"].append(
{
"symbol": symbol,
"open_side": open_side,
"open_px": open_px,
"shares": shares,
"open_tstamp": open_tstamp,
"last_px": last_px,
"last_tstamp": last_mkt_data_row["tstamp"],
"last_value": last_px * shares,
}
)
class LiveTradingPair(TradingPair):
def __init__(self, config: Config, instruments: List[ExchangeInstrument]):
super().__init__(config, instruments)
def to_stop_close_conditions(self, predicted_row: pd.Series) -> bool:
# TODO LiveTradingPair.to_stop_close_conditions()
return False

View File

@ -0,0 +1,52 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from enum import Enum
from typing import Dict, Optional, cast
import pandas as pd
from pt_trading.results import BacktestResult
from pt_trading.trading_pair import TradingPair
NanoPerMin = 1e9
class PairsTradingFitMethod(ABC):
TRADES_COLUMNS = [
"time",
"symbol",
"side",
"action",
"price",
"disequilibrium",
"scaled_disequilibrium",
"signed_scaled_disequilibrium",
"pair",
]
@staticmethod
def create(config: Dict) -> PairsTradingFitMethod:
import importlib
fit_method_class_name = config.get("fit_method_class", None)
assert fit_method_class_name is not None
module_name, class_name = fit_method_class_name.rsplit(".", 1)
module = importlib.import_module(module_name)
fit_method = getattr(module, class_name)()
return cast(PairsTradingFitMethod, fit_method)
@abstractmethod
def run_pair(
self, pair: TradingPair, bt_result: BacktestResult
) -> Optional[pd.DataFrame]: ...
@abstractmethod
def reset(self) -> None: ...
@abstractmethod
def create_trading_pair(
self,
config: Dict,
market_data: pd.DataFrame,
symbol_a: str,
symbol_b: str,
) -> TradingPair: ...

743
lib/pt_trading/results.py Normal file
View File

@ -0,0 +1,743 @@
import os
import sqlite3
from datetime import date, datetime
from typing import Any, Dict, List, Optional, Tuple
import pandas as pd
from pt_trading.trading_pair import TradingPair
# Recommended replacement adapters and converters for Python 3.12+
# From: https://docs.python.org/3/library/sqlite3.html#sqlite3-adapter-converter-recipes
def adapt_date_iso(val: date) -> str:
"""Adapt datetime.date to ISO 8601 date."""
return val.isoformat()
def adapt_datetime_iso(val: datetime) -> str:
"""Adapt datetime.datetime to timezone-naive ISO 8601 date."""
return val.isoformat()
def convert_date(val: bytes) -> date:
"""Convert ISO 8601 date to datetime.date object."""
return datetime.fromisoformat(val.decode()).date()
def convert_datetime(val: bytes) -> datetime:
"""Convert ISO 8601 datetime to datetime.datetime object."""
return datetime.fromisoformat(val.decode())
# Register the adapters and converters
sqlite3.register_adapter(date, adapt_date_iso)
sqlite3.register_adapter(datetime, adapt_datetime_iso)
sqlite3.register_converter("date", convert_date)
sqlite3.register_converter("datetime", convert_datetime)
def create_result_database(db_path: str) -> None:
"""
Create the SQLite database and required tables if they don't exist.
"""
try:
# Create directory if it doesn't exist
db_dir = os.path.dirname(db_path)
if db_dir and not os.path.exists(db_dir):
os.makedirs(db_dir, exist_ok=True)
print(f"Created directory: {db_dir}")
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Create the pt_bt_results table for completed trades
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS pt_bt_results (
date DATE,
pair TEXT,
symbol TEXT,
open_time DATETIME,
open_side TEXT,
open_price REAL,
open_quantity INTEGER,
open_disequilibrium REAL,
close_time DATETIME,
close_side TEXT,
close_price REAL,
close_quantity INTEGER,
close_disequilibrium REAL,
symbol_return REAL,
pair_return REAL,
close_condition TEXT
)
"""
)
cursor.execute("DELETE FROM pt_bt_results;")
# Create the outstanding_positions table for open positions
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS outstanding_positions (
date DATE,
pair TEXT,
symbol TEXT,
position_quantity REAL,
last_price REAL,
unrealized_return REAL,
open_price REAL,
open_side TEXT
)
"""
)
cursor.execute("DELETE FROM outstanding_positions;")
# Create the config table for storing configuration JSON for reference
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS config (
id INTEGER PRIMARY KEY AUTOINCREMENT,
run_timestamp DATETIME,
config_file_path TEXT,
config_json TEXT,
fit_method_class TEXT,
datafiles TEXT,
instruments TEXT
)
"""
)
cursor.execute("DELETE FROM config;")
conn.commit()
conn.close()
except Exception as e:
print(f"Error creating result database: {str(e)}")
raise
def store_config_in_database(
db_path: str,
config_file_path: str,
config: Dict,
fit_method_class: str,
datafiles: List[Tuple[str, str]],
instruments: List[Dict[str, str]],
) -> None:
"""
Store configuration information in the database for reference.
"""
import json
if db_path.upper() == "NONE":
return
try:
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Convert config to JSON string
config_json = json.dumps(config, indent=2, default=str)
# Convert lists to comma-separated strings for storage
datafiles_str = ", ".join([f"{datafile}" for _, datafile in datafiles])
instruments_str = ", ".join(
[
f"{inst['symbol']}:{inst['instrument_type']}:{inst['exchange_id']}"
for inst in instruments
]
)
# Insert configuration record
cursor.execute(
"""
INSERT INTO config (
run_timestamp, config_file_path, config_json, fit_method_class, datafiles, instruments
) VALUES (?, ?, ?, ?, ?, ?)
""",
(
datetime.now(),
config_file_path,
config_json,
fit_method_class,
datafiles_str,
instruments_str,
),
)
conn.commit()
conn.close()
print(f"Configuration stored in database")
except Exception as e:
print(f"Error storing configuration in database: {str(e)}")
import traceback
traceback.print_exc()
def convert_timestamp(timestamp: Any) -> Optional[datetime]:
"""Convert pandas Timestamp to Python datetime object for SQLite compatibility."""
if timestamp is None:
return None
if isinstance(timestamp, pd.Timestamp):
return timestamp.to_pydatetime()
elif isinstance(timestamp, datetime):
return timestamp
elif isinstance(timestamp, date):
return datetime.combine(timestamp, datetime.min.time())
elif isinstance(timestamp, str):
return datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S")
elif isinstance(timestamp, int):
return datetime.fromtimestamp(timestamp)
else:
raise ValueError(f"Unsupported timestamp type: {type(timestamp)}")
class BacktestResult:
"""
Class to handle backtest results, trades tracking, PnL calculations, and reporting.
"""
def __init__(self, config: Dict[str, Any]):
self.config = config
self.trades: Dict[str, Dict[str, Any]] = {}
self.total_realized_pnl = 0.0
self.outstanding_positions: List[Dict[str, Any]] = []
self.pairs_trades_: Dict[str, List[Dict[str, Any]]] = {}
def add_trade(
self,
pair_nm: str,
symbol: str,
side: str,
action: str,
price: Any,
disequilibrium: Optional[float] = None,
scaled_disequilibrium: Optional[float] = None,
timestamp: Optional[datetime] = None,
status: Optional[str] = None,
) -> None:
"""Add a trade to the results tracking."""
pair_nm = str(pair_nm)
if pair_nm not in self.trades:
self.trades[pair_nm] = {symbol: []}
if symbol not in self.trades[pair_nm]:
self.trades[pair_nm][symbol] = []
self.trades[pair_nm][symbol].append(
{
"symbol": symbol,
"side": side,
"action": action,
"price": price,
"disequilibrium": disequilibrium,
"scaled_disequilibrium": scaled_disequilibrium,
"timestamp": timestamp,
"status": status,
}
)
def add_outstanding_position(self, position: Dict[str, Any]) -> None:
"""Add an outstanding position to tracking."""
self.outstanding_positions.append(position)
def add_realized_pnl(self, realized_pnl: float) -> None:
"""Add realized PnL to the total."""
self.total_realized_pnl += realized_pnl
def get_total_realized_pnl(self) -> float:
"""Get total realized PnL."""
return self.total_realized_pnl
def get_outstanding_positions(self) -> List[Dict[str, Any]]:
"""Get all outstanding positions."""
return self.outstanding_positions
def get_trades(self) -> Dict[str, Dict[str, Any]]:
"""Get all trades."""
return self.trades
def clear_trades(self) -> None:
"""Clear all trades (used when processing new files)."""
self.trades.clear()
def collect_single_day_results(self, pairs_trades: List[pd.DataFrame]) -> None:
"""Collect and process single day trading results."""
result = pd.concat(pairs_trades, ignore_index=True)
result["time"] = pd.to_datetime(result["time"])
result = result.set_index("time").sort_index()
print("\n -------------- Suggested Trades ")
print(result)
for row in result.itertuples():
side = row.side
action = row.action
symbol = row.symbol
price = row.price
disequilibrium = getattr(row, "disequilibrium", None)
scaled_disequilibrium = getattr(row, "scaled_disequilibrium", None)
if hasattr(row, "time"):
timestamp = getattr(row, "time")
else:
timestamp = convert_timestamp(row.Index)
status = row.status
self.add_trade(
pair_nm=str(row.pair),
symbol=str(symbol),
side=str(side),
action=str(action),
price=float(str(price)),
disequilibrium=disequilibrium,
scaled_disequilibrium=scaled_disequilibrium,
timestamp=timestamp,
status=str(status) if status is not None else "?",
)
def print_single_day_results(self) -> None:
"""Print single day results summary."""
for pair, symbols in self.trades.items():
print(f"\n--- {pair} ---")
for symbol, trades in symbols.items():
for trade_data in trades:
if len(trade_data) >= 2:
side, price = trade_data[:2]
print(f"{symbol} {side} at ${price}")
def print_results_summary(self, all_results: Dict[str, Dict[str, Any]]) -> None:
"""Print summary of all processed files."""
print("\n====== Summary of All Processed Files ======")
for filename, data in all_results.items():
trade_count = sum(
len(trades)
for symbol_trades in data["trades"].values()
for trades in symbol_trades.values()
)
print(f"{filename}: {trade_count} trades")
def calculate_returns(self, all_results: Dict[str, Dict[str, Any]]) -> None:
"""Calculate and print returns by day and pair."""
def _symbol_return(trade1_side: str, trade1_px: float, trade2_side: str, trade2_px: float) -> float:
if trade1_side == "BUY" and trade2_side == "SELL":
return (trade2_px - trade1_px) / trade1_px * 100
elif trade1_side == "SELL" and trade2_side == "BUY":
return (trade1_px - trade2_px) / trade1_px * 100
else:
return 0
print("\n====== Returns By Day and Pair ======")
trades = []
for filename, data in all_results.items():
pairs = list(data["trades"].keys())
for pair in pairs:
self.pairs_trades_[pair] = []
trades_dict = data["trades"][pair]
for symbol in trades_dict.keys():
trades.extend(trades_dict[symbol])
trades = sorted(trades, key=lambda x: (x["timestamp"], x["symbol"]))
print(f"\n--- {filename} ---")
self.outstanding_positions = data["outstanding_positions"]
day_return = 0.0
for idx in range(0, len(trades), 4):
symbol_a = trades[idx]["symbol"]
trade_a_1 = trades[idx]
trade_a_2 = trades[idx + 2]
symbol_b = trades[idx + 1]["symbol"]
trade_b_1 = trades[idx + 1]
trade_b_2 = trades[idx + 3]
symbol_return = 0
assert (
trade_a_1["timestamp"] < trade_a_2["timestamp"]
), f"Trade 1: {trade_a_1['timestamp']} is not less than Trade 2: {trade_a_2['timestamp']}"
assert (
trade_a_1["action"] == "OPEN" and trade_a_2["action"] == "CLOSE"
), f"Trade 1: {trade_a_1['action']} and Trade 2: {trade_a_2['action']} are the same"
# Calculate return based on action combination
trade_return = 0
symbol_a_return = _symbol_return(trade_a_1["side"], trade_a_1["price"], trade_a_2["side"], trade_a_2["price"])
symbol_b_return = _symbol_return(trade_b_1["side"], trade_b_1["price"], trade_b_2["side"], trade_b_2["price"])
pair_return = symbol_a_return + symbol_b_return
self.pairs_trades_[pair].append(
{
"symbol": symbol_a,
"open_side": trade_a_1["side"],
"open_action": trade_a_1["action"],
"open_price": trade_a_1["price"],
"close_side": trade_a_2["side"],
"close_action": trade_a_2["action"],
"close_price": trade_a_2["price"],
"symbol_return": symbol_a_return,
"open_disequilibrium": trade_a_1["disequilibrium"],
"open_scaled_disequilibrium": trade_a_1["scaled_disequilibrium"],
"close_disequilibrium": trade_a_2["disequilibrium"],
"close_scaled_disequilibrium": trade_a_2["scaled_disequilibrium"],
"open_time": trade_a_1["timestamp"],
"close_time": trade_a_2["timestamp"],
"shares": self.config["funding_per_pair"] / 2 / trade_a_1["price"],
"is_completed": True,
"close_condition": trade_a_2["status"],
"pair_return": pair_return
}
)
self.pairs_trades_[pair].append(
{
"symbol": symbol_b,
"open_side": trade_b_1["side"],
"open_action": trade_b_1["action"],
"open_price": trade_b_1["price"],
"close_side": trade_b_2["side"],
"close_action": trade_b_2["action"],
"close_price": trade_b_2["price"],
"symbol_return": symbol_b_return,
"open_disequilibrium": trade_b_1["disequilibrium"],
"open_scaled_disequilibrium": trade_b_1["scaled_disequilibrium"],
"close_disequilibrium": trade_b_2["disequilibrium"],
"close_scaled_disequilibrium": trade_b_2["scaled_disequilibrium"],
"open_time": trade_b_1["timestamp"],
"close_time": trade_b_2["timestamp"],
"shares": self.config["funding_per_pair"] / 2 / trade_b_1["price"],
"is_completed": True,
"close_condition": trade_b_2["status"],
"pair_return": pair_return
}
)
# Print pair returns with disequilibrium information
day_return = 0.0
if pair in self.pairs_trades_:
print(f"{pair}:")
pair_return = 0.0
for trd in self.pairs_trades_[pair]:
disequil_info = ""
if (
trd["open_scaled_disequilibrium"] is not None
and trd["open_scaled_disequilibrium"] is not None
):
disequil_info = (
f' | Open Dis-eq: {trd["open_scaled_disequilibrium"]:.2f},'
f' Close Dis-eq: {trd["close_scaled_disequilibrium"]:.2f}'
)
print(
f' {trd["open_time"].time()}-{trd["close_time"].time()} {trd["symbol"]}: '
f' {trd["open_side"]} @ ${trd["open_price"]:.2f},'
f' {trd["close_side"]} @ ${trd["close_price"]:.2f},'
f' Return: {trd["symbol_return"]:.2f}%{disequil_info}'
)
pair_return += trd["symbol_return"]
print(f" Pair Total Return: {pair_return:.2f}%")
day_return += pair_return
# Print day total return and add to global realized PnL
if day_return != 0:
print(f" Day Total Return: {day_return:.2f}%")
self.add_realized_pnl(day_return)
def print_outstanding_positions(self) -> None:
"""Print all outstanding positions with share quantities and current values."""
if not self.get_outstanding_positions():
print("\n====== NO OUTSTANDING POSITIONS ======")
return
print(f"\n====== OUTSTANDING POSITIONS ======")
print(
f"{'Pair':<15}"
f" {'Symbol':<10}"
f" {'Side':<4}"
f" {'Shares':<10}"
f" {'Open $':<8}"
f" {'Current $':<10}"
f" {'Value $':<12}"
f" {'Disequilibrium':<15}"
)
print("-" * 100)
total_value = 0.0
for pos in self.get_outstanding_positions():
# Print position A
print(
f"{pos['pair']:<15}"
f" {pos['symbol_a']:<10}"
f" {pos['side_a']:<4}"
f" {pos['shares_a']:<10.2f}"
f" {pos['open_px_a']:<8.2f}"
f" {pos['current_px_a']:<10.2f}"
f" {pos['current_value_a']:<12.2f}"
f" {'':<15}"
)
# Print position B
print(
f"{'':<15}"
f" {pos['symbol_b']:<10}"
f" {pos['side_b']:<4}"
f" {pos['shares_b']:<10.2f}"
f" {pos['open_px_b']:<8.2f}"
f" {pos['current_px_b']:<10.2f}"
f" {pos['current_value_b']:<12.2f}"
)
# Print pair totals with disequilibrium info
print(
f"{'':<15}"
f" {'PAIR TOTAL':<10}"
f" {'':<4}"
f" {'':<10}"
f" {'':<8}"
f" {'':<10}"
f" {pos['total_current_value']:<12.2f}"
)
# Print disequilibrium details
print(
f"{'':<15}"
f" {'DISEQUIL':<10}"
f" {'':<4}"
f" {'':<10}"
f" {'':<8}"
f" {'':<10}"
f" Raw: {pos['current_disequilibrium']:<6.4f}"
f" Scaled: {pos['current_scaled_disequilibrium']:<6.4f}"
)
print("-" * 100)
total_value += pos["total_current_value"]
print(f"{'TOTAL OUTSTANDING VALUE':<80} ${total_value:<12.2f}")
def print_grand_totals(self) -> None:
"""Print grand totals across all pairs."""
print(f"\n====== GRAND TOTALS ACROSS ALL PAIRS ======")
print(f"Total Realized PnL: {self.get_total_realized_pnl():.2f}%")
def handle_outstanding_position(
self,
pair: TradingPair,
pair_result_df: pd.DataFrame,
last_row_index: int,
open_side_a: str,
open_side_b: str,
open_px_a: float,
open_px_b: float,
open_tstamp: datetime,
) -> Tuple[float, float, float]:
"""
Handle calculation and tracking of outstanding positions when no close signal is found.
Args:
pair: TradingPair object
pair_result_df: DataFrame with pair results
last_row_index: Index of the last row in the data
open_side_a, open_side_b: Trading sides for symbols A and B
open_px_a, open_px_b: Opening prices for symbols A and B
open_tstamp: Opening timestamp
"""
if pair_result_df is None or pair_result_df.empty:
return 0, 0, 0
last_row = pair_result_df.loc[last_row_index]
last_tstamp = last_row["tstamp"]
colname_a, colname_b = pair.exec_prices_colnames()
last_px_a = last_row[colname_a]
last_px_b = last_row[colname_b]
# Calculate share quantities based on funding per pair
# Split funding equally between the two positions
funding_per_position = self.config["funding_per_pair"] / 2
shares_a = funding_per_position / open_px_a
shares_b = funding_per_position / open_px_b
# Calculate current position values (shares * current price)
current_value_a = shares_a * last_px_a * (-1 if open_side_a == "SELL" else 1)
current_value_b = shares_b * last_px_b * (-1 if open_side_b == "SELL" else 1)
total_current_value = current_value_a + current_value_b
# Get disequilibrium information
current_disequilibrium = last_row["disequilibrium"]
current_scaled_disequilibrium = last_row["scaled_disequilibrium"]
# Store outstanding positions
self.add_outstanding_position(
{
"pair": str(pair),
"symbol_a": pair.symbol_a_,
"symbol_b": pair.symbol_b_,
"side_a": open_side_a,
"side_b": open_side_b,
"shares_a": shares_a,
"shares_b": shares_b,
"open_px_a": open_px_a,
"open_px_b": open_px_b,
"current_px_a": last_px_a,
"current_px_b": last_px_b,
"current_value_a": current_value_a,
"current_value_b": current_value_b,
"total_current_value": total_current_value,
"open_time": open_tstamp,
"last_time": last_tstamp,
"current_abs_term": current_scaled_disequilibrium,
"current_disequilibrium": current_disequilibrium,
"current_scaled_disequilibrium": current_scaled_disequilibrium,
}
)
# Print position details
print(f"{pair}: NO CLOSE SIGNAL FOUND - Position held until end of session")
print(f" Open: {open_tstamp} | Last: {last_tstamp}")
print(
f" {pair.symbol_a_}: {open_side_a} {shares_a:.2f} shares @ ${open_px_a:.2f} -> ${last_px_a:.2f} | Value: ${current_value_a:.2f}"
)
print(
f" {pair.symbol_b_}: {open_side_b} {shares_b:.2f} shares @ ${open_px_b:.2f} -> ${last_px_b:.2f} | Value: ${current_value_b:.2f}"
)
print(f" Total Value: ${total_current_value:.2f}")
print(
f" Disequilibrium: {current_disequilibrium:.4f} | Scaled: {current_scaled_disequilibrium:.4f}"
)
return current_value_a, current_value_b, total_current_value
def store_results_in_database(
self, db_path: str, day: str
) -> None:
"""
Store backtest results in the SQLite database.
"""
if db_path.upper() == "NONE":
return
try:
# Extract date from datafile name (assuming format like 20250528.mktdata.ohlcv.db)
date_str = day
# Convert to proper date format
try:
date_obj = datetime.strptime(date_str, "%Y%m%d").date()
except ValueError:
# If date parsing fails, use current date
date_obj = datetime.now().date()
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Process each trade from bt_result
trades = self.get_trades()
for pair_name, _ in trades.items():
# Second pass: insert completed trade records into database
for trade_pair in sorted(self.pairs_trades_[pair_name], key=lambda x: x["open_time"]):
# Only store completed trades in pt_bt_results table
cursor.execute(
"""
INSERT INTO pt_bt_results (
date, pair, symbol, open_time, open_side, open_price,
open_quantity, open_disequilibrium, close_time, close_side,
close_price, close_quantity, close_disequilibrium,
symbol_return, pair_return, close_condition
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
date_obj,
pair_name,
trade_pair["symbol"],
trade_pair["open_time"],
trade_pair["open_side"],
trade_pair["open_price"],
trade_pair["shares"],
trade_pair["open_scaled_disequilibrium"],
trade_pair["close_time"],
trade_pair["close_side"],
trade_pair["close_price"],
trade_pair["shares"],
trade_pair["close_scaled_disequilibrium"],
trade_pair["symbol_return"],
trade_pair["pair_return"],
trade_pair["close_condition"]
),
)
# Store outstanding positions in separate table
outstanding_positions = self.get_outstanding_positions()
for pos in outstanding_positions:
# Calculate position quantity (negative for SELL positions)
position_qty_a = (
pos["shares_a"] if pos["side_a"] == "BUY" else -pos["shares_a"]
)
position_qty_b = (
pos["shares_b"] if pos["side_b"] == "BUY" else -pos["shares_b"]
)
# Calculate unrealized returns
# For symbol A: (current_price - open_price) / open_price * 100 * position_direction
unrealized_return_a = (
(pos["current_px_a"] - pos["open_px_a"]) / pos["open_px_a"] * 100
) * (1 if pos["side_a"] == "BUY" else -1)
unrealized_return_b = (
(pos["current_px_b"] - pos["open_px_b"]) / pos["open_px_b"] * 100
) * (1 if pos["side_b"] == "BUY" else -1)
# Store outstanding position for symbol A
cursor.execute(
"""
INSERT INTO outstanding_positions (
date, pair, symbol, position_quantity, last_price, unrealized_return, open_price, open_side
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
(
date_obj,
pos["pair"],
pos["symbol_a"],
position_qty_a,
pos["current_px_a"],
unrealized_return_a,
pos["open_px_a"],
pos["side_a"],
),
)
# Store outstanding position for symbol B
cursor.execute(
"""
INSERT INTO outstanding_positions (
date, pair, symbol, position_quantity, last_price, unrealized_return, open_price, open_side
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
(
date_obj,
pos["pair"],
pos["symbol_b"],
position_qty_b,
pos["current_px_b"],
unrealized_return_b,
pos["open_px_b"],
pos["side_b"],
),
)
conn.commit()
conn.close()
except Exception as e:
print(f"Error storing results in database: {str(e)}")
import traceback
traceback.print_exc()

View File

@ -0,0 +1,317 @@
from abc import ABC, abstractmethod
from enum import Enum
from typing import Any, Dict, Optional, cast
import pandas as pd # type: ignore[import]
from pt_trading.fit_method import PairsTradingFitMethod
from pt_trading.results import BacktestResult
from pt_trading.trading_pair import PairState, TradingPair
from statsmodels.tsa.vector_ar.vecm import VECM, VECMResults
NanoPerMin = 1e9
class RollingFit(PairsTradingFitMethod):
"""
N O T E:
=========
- This class remains to be abstract
- The following methods are to be implemented in the subclass:
- create_trading_pair()
=========
"""
def __init__(self) -> None:
super().__init__()
def run_pair(
self, pair: TradingPair, bt_result: BacktestResult
) -> Optional[pd.DataFrame]:
print(f"***{pair}*** STARTING....")
config = pair.config_
curr_training_start_idx = pair.get_begin_index()
end_index = pair.get_end_index()
pair.user_data_["state"] = PairState.INITIAL
# Initialize trades DataFrame with proper dtypes to avoid concatenation warnings
pair.user_data_["trades"] = pd.DataFrame(columns=self.TRADES_COLUMNS).astype(
{
"time": "datetime64[ns]",
"symbol": "string",
"side": "string",
"action": "string",
"price": "float64",
"disequilibrium": "float64",
"scaled_disequilibrium": "float64",
"pair": "object",
}
)
training_minutes = config["training_minutes"]
curr_predicted_row_idx = 0
while True:
print(curr_training_start_idx, end="\r")
pair.get_datasets(
training_minutes=training_minutes,
training_start_index=curr_training_start_idx,
testing_size=1,
)
if len(pair.training_df_) < training_minutes:
print(
f"{pair}: current offset={curr_training_start_idx}"
f" * Training data length={len(pair.training_df_)} < {training_minutes}"
" * Not enough training data. Completing the job."
)
break
try:
# ================================ PREDICTION ================================
self.pair_predict_result_ = pair.predict()
except Exception as e:
raise RuntimeError(
f"{pair}: TrainingPrediction failed: {str(e)}"
) from e
# break
curr_training_start_idx += 1
if curr_training_start_idx > end_index:
break
curr_predicted_row_idx += 1
self._create_trading_signals(pair, config, bt_result)
print(f"***{pair}*** FINISHED *** Num Trades:{len(pair.user_data_['trades'])}")
return pair.get_trades()
def _create_trading_signals(
self, pair: TradingPair, config: Dict, bt_result: BacktestResult
) -> None:
predicted_df = self.pair_predict_result_
assert predicted_df is not None
open_threshold = config["dis-equilibrium_open_trshld"]
close_threshold = config["dis-equilibrium_close_trshld"]
for curr_predicted_row_idx in range(len(predicted_df)):
pred_row = predicted_df.iloc[curr_predicted_row_idx]
scaled_disequilibrium = pred_row["scaled_disequilibrium"]
if pair.user_data_["state"] in [
PairState.INITIAL,
PairState.CLOSE,
PairState.CLOSE_POSITION,
PairState.CLOSE_STOP_LOSS,
PairState.CLOSE_STOP_PROFIT,
]:
if scaled_disequilibrium >= open_threshold:
open_trades = self._get_open_trades(
pair, row=pred_row, open_threshold=open_threshold
)
if open_trades is not None:
open_trades["status"] = PairState.OPEN.name
print(f"OPEN TRADES:\n{open_trades}")
pair.add_trades(open_trades)
pair.user_data_["state"] = PairState.OPEN
pair.on_open_trades(open_trades)
elif pair.user_data_["state"] == PairState.OPEN:
if scaled_disequilibrium <= close_threshold:
close_trades = self._get_close_trades(
pair, row=pred_row, close_threshold=close_threshold
)
if close_trades is not None:
close_trades["status"] = PairState.CLOSE.name
print(f"CLOSE TRADES:\n{close_trades}")
pair.add_trades(close_trades)
pair.user_data_["state"] = PairState.CLOSE
pair.on_close_trades(close_trades)
elif pair.to_stop_close_conditions(predicted_row=pred_row):
close_trades = self._get_close_trades(
pair, row=pred_row, close_threshold=close_threshold
)
if close_trades is not None:
close_trades["status"] = pair.user_data_[
"stop_close_state"
].name
print(f"STOP CLOSE TRADES:\n{close_trades}")
pair.add_trades(close_trades)
pair.user_data_["state"] = pair.user_data_["stop_close_state"]
pair.on_close_trades(close_trades)
# Outstanding positions
if pair.user_data_["state"] == PairState.OPEN:
print(f"{pair}: *** Position is NOT CLOSED. ***")
# outstanding positions
if config["close_outstanding_positions"]:
close_position_row = pd.Series(pair.market_data_.iloc[-2])
close_position_row["disequilibrium"] = 0.0
close_position_row["scaled_disequilibrium"] = 0.0
close_position_row["signed_scaled_disequilibrium"] = 0.0
close_position_trades = self._get_close_trades(
pair=pair, row=close_position_row, close_threshold=close_threshold
)
if close_position_trades is not None:
close_position_trades["status"] = PairState.CLOSE_POSITION.name
print(f"CLOSE_POSITION TRADES:\n{close_position_trades}")
pair.add_trades(close_position_trades)
pair.user_data_["state"] = PairState.CLOSE_POSITION
pair.on_close_trades(close_position_trades)
else:
if predicted_df is not None:
bt_result.handle_outstanding_position(
pair=pair,
pair_result_df=predicted_df,
last_row_index=0,
open_side_a=pair.user_data_["open_side_a"],
open_side_b=pair.user_data_["open_side_b"],
open_px_a=pair.user_data_["open_px_a"],
open_px_b=pair.user_data_["open_px_b"],
open_tstamp=pair.user_data_["open_tstamp"],
)
def _get_open_trades(
self, pair: TradingPair, row: pd.Series, open_threshold: float
) -> Optional[pd.DataFrame]:
colname_a, colname_b = pair.exec_prices_colnames()
open_row = row
open_tstamp = open_row["tstamp"]
open_disequilibrium = open_row["disequilibrium"]
open_scaled_disequilibrium = open_row["scaled_disequilibrium"]
signed_scaled_disequilibrium = open_row["signed_scaled_disequilibrium"]
open_px_a = open_row[f"{colname_a}"]
open_px_b = open_row[f"{colname_b}"]
# creating the trades
# use outer single quotes so we can reference DataFrame keys with double quotes inside
print(f'OPEN_TRADES: {open_tstamp} open_scaled_disequilibrium={open_scaled_disequilibrium}')
if open_disequilibrium > 0:
open_side_a = "SELL"
open_side_b = "BUY"
close_side_a = "BUY"
close_side_b = "SELL"
else:
open_side_a = "BUY"
open_side_b = "SELL"
close_side_a = "SELL"
close_side_b = "BUY"
# save closing sides
pair.user_data_["open_side_a"] = open_side_a
pair.user_data_["open_side_b"] = open_side_b
pair.user_data_["open_px_a"] = open_px_a
pair.user_data_["open_px_b"] = open_px_b
pair.user_data_["open_tstamp"] = open_tstamp
pair.user_data_["close_side_a"] = close_side_a
pair.user_data_["close_side_b"] = close_side_b
# create opening trades
trd_signal_tuples = [
(
open_tstamp,
pair.symbol_a_,
open_side_a,
"OPEN",
open_px_a,
open_disequilibrium,
open_scaled_disequilibrium,
signed_scaled_disequilibrium,
pair,
),
(
open_tstamp,
pair.symbol_b_,
open_side_b,
"OPEN",
open_px_b,
open_disequilibrium,
open_scaled_disequilibrium,
signed_scaled_disequilibrium,
pair,
),
]
# Create DataFrame with explicit dtypes to avoid concatenation warnings
df = pd.DataFrame(trd_signal_tuples, columns=self.TRADES_COLUMNS)
# Ensure consistent dtypes
return df.astype(
{
"time": "datetime64[ns]",
"action": "string",
"symbol": "string",
"price": "float64",
"disequilibrium": "float64",
"scaled_disequilibrium": "float64",
"signed_scaled_disequilibrium": "float64",
"pair": "object",
}
)
def _get_close_trades(
self, pair: TradingPair, row: pd.Series, close_threshold: float
) -> Optional[pd.DataFrame]:
colname_a, colname_b = pair.exec_prices_colnames()
close_row = row
close_tstamp = close_row["tstamp"]
close_disequilibrium = close_row["disequilibrium"]
close_scaled_disequilibrium = close_row["scaled_disequilibrium"]
signed_scaled_disequilibrium = close_row["signed_scaled_disequilibrium"]
close_px_a = close_row[f"{colname_a}"]
close_px_b = close_row[f"{colname_b}"]
close_side_a = pair.user_data_["close_side_a"]
close_side_b = pair.user_data_["close_side_b"]
trd_signal_tuples = [
(
close_tstamp,
pair.symbol_a_,
close_side_a,
"CLOSE",
close_px_a,
close_disequilibrium,
close_scaled_disequilibrium,
signed_scaled_disequilibrium,
pair,
),
(
close_tstamp,
pair.symbol_b_,
close_side_b,
"CLOSE",
close_px_b,
close_disequilibrium,
close_scaled_disequilibrium,
signed_scaled_disequilibrium,
pair,
),
]
# Add tuples to data frame with explicit dtypes to avoid concatenation warnings
df = pd.DataFrame(
trd_signal_tuples,
columns=self.TRADES_COLUMNS,
)
# Ensure consistent dtypes
return df.astype(
{
"time": "datetime64[ns]",
"action": "string",
"symbol": "string",
"price": "float64",
"disequilibrium": "float64",
"scaled_disequilibrium": "float64",
"signed_scaled_disequilibrium": "float64",
"pair": "object",
}
)
def reset(self) -> None:
curr_training_start_idx = 0

View File

@ -0,0 +1,380 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from enum import Enum
from typing import Any, Dict, List, Optional
import pandas as pd # type:ignore
class PairState(Enum):
INITIAL = 1
OPEN = 2
CLOSE = 3
CLOSE_POSITION = 4
CLOSE_STOP_LOSS = 5
CLOSE_STOP_PROFIT = 6
class CointegrationData:
EG_PVALUE_THRESHOLD = 0.05
tstamp_: pd.Timestamp
pair_: str
eg_pvalue_: float
johansen_lr1_: float
johansen_cvt_: float
eg_is_cointegrated_: bool
johansen_is_cointegrated_: bool
def __init__(self, pair: TradingPair):
training_df = pair.training_df_
assert training_df is not None
from statsmodels.tsa.vector_ar.vecm import coint_johansen
df = training_df[pair.colnames()].reset_index(drop=True)
# Run Johansen cointegration test
result = coint_johansen(df, det_order=0, k_ar_diff=1)
self.johansen_lr1_ = result.lr1[0]
self.johansen_cvt_ = result.cvt[0, 1]
self.johansen_is_cointegrated_ = self.johansen_lr1_ > self.johansen_cvt_
# Run Engle-Granger cointegration test
from statsmodels.tsa.stattools import coint # type: ignore
col1, col2 = pair.colnames()
assert training_df is not None
series1 = training_df[col1].reset_index(drop=True)
series2 = training_df[col2].reset_index(drop=True)
self.eg_pvalue_ = float(coint(series1, series2)[1])
self.eg_is_cointegrated_ = bool(self.eg_pvalue_ < self.EG_PVALUE_THRESHOLD)
self.tstamp_ = training_df.index[-1]
self.pair_ = pair.name()
def to_dict(self) -> Dict[str, Any]:
return {
"tstamp": self.tstamp_,
"pair": self.pair_,
"eg_pvalue": self.eg_pvalue_,
"johansen_lr1": self.johansen_lr1_,
"johansen_cvt": self.johansen_cvt_,
"eg_is_cointegrated": self.eg_is_cointegrated_,
"johansen_is_cointegrated": self.johansen_is_cointegrated_,
}
def __repr__(self) -> str:
return f"CointegrationData(tstamp={self.tstamp_}, pair={self.pair_}, eg_pvalue={self.eg_pvalue_}, johansen_lr1={self.johansen_lr1_}, johansen_cvt={self.johansen_cvt_}, eg_is_cointegrated={self.eg_is_cointegrated_}, johansen_is_cointegrated={self.johansen_is_cointegrated_})"
class TradingPair(ABC):
market_data_: pd.DataFrame
symbol_a_: str
symbol_b_: str
stat_model_price_: str
training_mu_: float
training_std_: float
training_df_: pd.DataFrame
testing_df_: pd.DataFrame
user_data_: Dict[str, Any]
# predicted_df_: Optional[pd.DataFrame]
def __init__(
self,
config: Dict[str, Any],
market_data: pd.DataFrame,
symbol_a: str,
symbol_b: str,
):
self.symbol_a_ = symbol_a
self.symbol_b_ = symbol_b
self.stat_model_price_ = config["stat_model_price"]
self.user_data_ = {}
self.predicted_df_ = None
self.config_ = config
self._set_market_data(market_data)
def _set_market_data(self, market_data: pd.DataFrame) -> None:
self.market_data_ = pd.DataFrame(
self._transform_dataframe(market_data)[["tstamp"] + self.colnames()]
)
self.market_data_ = self.market_data_.dropna().reset_index(drop=True)
self.market_data_["tstamp"] = pd.to_datetime(self.market_data_["tstamp"])
self.market_data_ = self.market_data_.sort_values("tstamp")
self._set_execution_price_data()
pass
def _set_execution_price_data(self) -> None:
if "execution_price" not in self.config_:
self.market_data_[f"exec_price_{self.symbol_a_}"] = self.market_data_[f"{self.stat_model_price_}_{self.symbol_a_}"]
self.market_data_[f"exec_price_{self.symbol_b_}"] = self.market_data_[f"{self.stat_model_price_}_{self.symbol_b_}"]
return
execution_price_column = self.config_["execution_price"]["column"]
execution_price_shift = self.config_["execution_price"]["shift"]
self.market_data_[f"exec_price_{self.symbol_a_}"] = self.market_data_[f"{self.stat_model_price_}_{self.symbol_a_}"].shift(-execution_price_shift)
self.market_data_[f"exec_price_{self.symbol_b_}"] = self.market_data_[f"{self.stat_model_price_}_{self.symbol_b_}"].shift(-execution_price_shift)
self.market_data_ = self.market_data_.dropna().reset_index(drop=True)
def get_begin_index(self) -> int:
if "trading_hours" not in self.config_:
return 0
assert "timezone" in self.config_["trading_hours"]
assert "begin_session" in self.config_["trading_hours"]
start_time = (
pd.to_datetime(self.config_["trading_hours"]["begin_session"])
.tz_localize(self.config_["trading_hours"]["timezone"])
.time()
)
mask = self.market_data_["tstamp"].dt.time >= start_time
return int(self.market_data_.index[mask].min())
def get_end_index(self) -> int:
if "trading_hours" not in self.config_:
return 0
assert "timezone" in self.config_["trading_hours"]
assert "end_session" in self.config_["trading_hours"]
end_time = (
pd.to_datetime(self.config_["trading_hours"]["end_session"])
.tz_localize(self.config_["trading_hours"]["timezone"])
.time()
)
mask = self.market_data_["tstamp"].dt.time <= end_time
return int(self.market_data_.index[mask].max())
def _transform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
# Select only the columns we need
df_selected: pd.DataFrame = pd.DataFrame(
df[["tstamp", "symbol", self.stat_model_price_]]
)
# Start with unique timestamps
result_df: pd.DataFrame = (
pd.DataFrame(df_selected["tstamp"]).drop_duplicates().reset_index(drop=True)
)
# For each unique symbol, add a corresponding close price column
symbols = df_selected["symbol"].unique()
for symbol in symbols:
# Filter rows for this symbol
df_symbol = df_selected[df_selected["symbol"] == symbol].reset_index(
drop=True
)
# Create column name like "close-COIN"
new_price_column = f"{self.stat_model_price_}_{symbol}"
# Create temporary dataframe with timestamp and price
temp_df = pd.DataFrame(
{
"tstamp": df_symbol["tstamp"],
new_price_column: df_symbol[self.stat_model_price_],
}
)
# Join with our result dataframe
result_df = pd.merge(result_df, temp_df, on="tstamp", how="left")
result_df = result_df.reset_index(
drop=True
) # do not dropna() since irrelevant symbol would affect dataset
return result_df.dropna()
def get_datasets(
self,
training_minutes: int,
training_start_index: int = 0,
testing_size: Optional[int] = None,
) -> None:
testing_start_index = training_start_index + training_minutes
self.training_df_ = self.market_data_.iloc[
training_start_index:testing_start_index, :training_minutes
].copy()
assert self.training_df_ is not None
self.training_df_ = self.training_df_.dropna().reset_index(drop=True)
testing_start_index = training_start_index + training_minutes
if testing_size is None:
self.testing_df_ = self.market_data_.iloc[testing_start_index:, :].copy()
else:
self.testing_df_ = self.market_data_.iloc[
testing_start_index : testing_start_index + testing_size, :
].copy()
assert self.testing_df_ is not None
self.testing_df_ = self.testing_df_.dropna().reset_index(drop=True)
def colnames(self) -> List[str]:
return [
f"{self.stat_model_price_}_{self.symbol_a_}",
f"{self.stat_model_price_}_{self.symbol_b_}",
]
def exec_prices_colnames(self) -> List[str]:
return [
f"exec_price_{self.symbol_a_}",
f"exec_price_{self.symbol_b_}",
]
def add_trades(self, trades: pd.DataFrame) -> None:
if self.user_data_["trades"] is None or len(self.user_data_["trades"]) == 0:
# If trades is empty or None, just assign the new trades directly
self.user_data_["trades"] = trades.copy()
else:
# Ensure both DataFrames have the same columns and dtypes before concatenation
existing_trades = self.user_data_["trades"]
# If existing trades is empty, just assign the new trades
if len(existing_trades) == 0:
self.user_data_["trades"] = trades.copy()
else:
# Ensure both DataFrames have the same columns
if set(existing_trades.columns) != set(trades.columns):
# Add missing columns to trades with appropriate default values
for col in existing_trades.columns:
if col not in trades.columns:
if col == "time":
trades[col] = pd.Timestamp.now()
elif col in ["action", "symbol"]:
trades[col] = ""
elif col in [
"price",
"disequilibrium",
"scaled_disequilibrium",
]:
trades[col] = 0.0
elif col == "pair":
trades[col] = None
else:
trades[col] = None
# Concatenate with explicit dtypes to avoid warnings
self.user_data_["trades"] = pd.concat(
[existing_trades, trades], ignore_index=True, copy=False
)
def get_trades(self) -> pd.DataFrame:
return (
self.user_data_["trades"] if "trades" in self.user_data_ else pd.DataFrame()
)
def cointegration_check(self) -> Optional[pd.DataFrame]:
print(f"***{self}*** STARTING....")
config = self.config_
curr_training_start_idx = 0
COINTEGRATION_DATA_COLUMNS = {
"tstamp": "datetime64[ns]",
"pair": "string",
"eg_pvalue": "float64",
"johansen_lr1": "float64",
"johansen_cvt": "float64",
"eg_is_cointegrated": "bool",
"johansen_is_cointegrated": "bool",
}
# Initialize trades DataFrame with proper dtypes to avoid concatenation warnings
result: pd.DataFrame = pd.DataFrame(
columns=[col for col in COINTEGRATION_DATA_COLUMNS.keys()]
) # .astype(COINTEGRATION_DATA_COLUMNS)
training_minutes = config["training_minutes"]
while True:
print(curr_training_start_idx, end="\r")
self.get_datasets(
training_minutes=training_minutes,
training_start_index=curr_training_start_idx,
testing_size=1,
)
if len(self.training_df_) < training_minutes:
print(
f"{self}: current offset={curr_training_start_idx}"
f" * Training data length={len(self.training_df_)} < {training_minutes}"
" * Not enough training data. Completing the job."
)
break
new_row = pd.Series(CointegrationData(self).to_dict())
result.loc[len(result)] = new_row
curr_training_start_idx += 1
return result
def to_stop_close_conditions(self, predicted_row: pd.Series) -> bool:
config = self.config_
if (
"stop_close_conditions" not in config
or config["stop_close_conditions"] is None
):
return False
if "profit" in config["stop_close_conditions"]:
current_return = self._current_return(predicted_row)
#
# print(f"time={predicted_row['tstamp']} current_return={current_return}")
#
if current_return >= config["stop_close_conditions"]["profit"]:
print(f"STOP PROFIT: {current_return}")
self.user_data_["stop_close_state"] = PairState.CLOSE_STOP_PROFIT
return True
if "loss" in config["stop_close_conditions"]:
if current_return <= config["stop_close_conditions"]["loss"]:
print(f"STOP LOSS: {current_return}")
self.user_data_["stop_close_state"] = PairState.CLOSE_STOP_LOSS
return True
return False
def on_open_trades(self, trades: pd.DataFrame) -> None:
if "close_trades" in self.user_data_:
del self.user_data_["close_trades"]
self.user_data_["open_trades"] = trades
def on_close_trades(self, trades: pd.DataFrame) -> None:
del self.user_data_["open_trades"]
self.user_data_["close_trades"] = trades
def _current_return(self, predicted_row: pd.Series) -> float:
if "open_trades" in self.user_data_:
open_trades = self.user_data_["open_trades"]
if len(open_trades) == 0:
return 0.0
def _single_instrument_return(symbol: str) -> float:
instrument_open_trades = open_trades[open_trades["symbol"] == symbol]
instrument_open_price = instrument_open_trades["price"].iloc[0]
sign = -1 if instrument_open_trades["side"].iloc[0] == "SELL" else 1
instrument_price = predicted_row[f"{self.stat_model_price_}_{symbol}"]
instrument_return = (
sign
* (instrument_price - instrument_open_price)
/ instrument_open_price
)
return float(instrument_return) * 100.0
instrument_a_return = _single_instrument_return(self.symbol_a_)
instrument_b_return = _single_instrument_return(self.symbol_b_)
return instrument_a_return + instrument_b_return
return 0.0
def __repr__(self) -> str:
return self.name()
def name(self) -> str:
return f"{self.symbol_a_} & {self.symbol_b_}"
# return f"{self.symbol_a_} & {self.symbol_b_}"
@abstractmethod
def predict(self) -> pd.DataFrame: ...
# @abstractmethod
# def predicted_df(self) -> Optional[pd.DataFrame]: ...

View File

@ -0,0 +1,193 @@
# original script moved to vecm_rolling_fit_01.py
# 09.09.25 Added GARCH model - predicting volatility
# Rule of thumb:
# alpha + beta ≈ 1 → strong volatility clustering, persistence.
# If much lower → volatility mean reverts quickly.
# If > 1 → model is unstable / non-stationary (bad).
# the VECM disequilibrium (mean reversion signal) and
# the GARCH volatility forecast (risk measure).
# combine them → e.g., only enter trades when:
# high_volatility = 1 → persistence > 0.95 or volatility > 2 (rule of thumb: unstable / risky regime).
# high_volatility = 0 → stable regime.
# VECM disequilibrium z-score > threshold and
# GARCH-forecasted volatility is not too high (avoid noise-driven signals).
# This creates a volatility-adjusted pairs trading strategy, more robust than plain VECM
# now pair_predict_result_ DataFrame includes:
# disequilibrium, scaled_disequilibrium, z-scores, garch_alpha, garch_beta, garch_persistence (α+β rule-of-thumb)
# garch_vol_forecast (1-step volatility forecast)
# Would you like me to also add a warning flag column
# (e.g., "high_volatility" = 1 if persistence > 0.95 or vol_forecast > threshold)
# so you can easily detect unstable regimes?
# VECM/GARCH
# vecm_rolling_fit.py:
from typing import Any, Dict, Optional, cast
import numpy as np
import pandas as pd
from typing import Any, Dict, Optional
from pt_trading.results import BacktestResult
from pt_trading.rolling_window_fit import RollingFit
from pt_trading.trading_pair import TradingPair
from statsmodels.tsa.vector_ar.vecm import VECM, VECMResults
from arch import arch_model
NanoPerMin = 1e9
class VECMTradingPair(TradingPair):
vecm_fit_: Optional[VECMResults]
pair_predict_result_: Optional[pd.DataFrame]
def __init__(
self,
config: Dict[str, Any],
market_data: pd.DataFrame,
symbol_a: str,
symbol_b: str,
):
super().__init__(config, market_data, symbol_a, symbol_b)
self.vecm_fit_ = None
self.pair_predict_result_ = None
self.garch_fit_ = None
self.sigma_spread_forecast_ = None
self.garch_alpha_ = None
self.garch_beta_ = None
self.garch_persistence_ = None
self.high_volatility_flag_ = None
def _train_pair(self) -> None:
self._fit_VECM()
assert self.vecm_fit_ is not None
diseq_series = self.training_df_[self.colnames()] @ self.vecm_fit_.beta
self.training_mu_ = float(diseq_series[0].mean())
self.training_std_ = float(diseq_series[0].std())
self.training_df_["disequilibrium"] = diseq_series
self.training_df_["scaled_disequilibrium"] = (
diseq_series - self.training_mu_
) / self.training_std_
def _fit_VECM(self) -> None:
assert self.training_df_ is not None
vecm_df = self.training_df_[self.colnames()].reset_index(drop=True)
vecm_model = VECM(vecm_df, coint_rank=1)
vecm_fit = vecm_model.fit()
self.vecm_fit_ = vecm_fit
# Error Correction Term (spread)
ect_series = (vecm_df @ vecm_fit.beta).iloc[:, 0]
# Difference the spread for stationarity
dz = ect_series.diff().dropna()
if len(dz) < 30:
print("Not enough data for GARCH fitting.")
return
# Rescale if variance too small
if dz.std() < 0.1:
dz = dz * 1000
# print("Scale check:", dz.std())
try:
garch = arch_model(dz, vol="GARCH", p=1, q=1, mean="Zero", dist="normal")
garch_fit = garch.fit(disp="off")
self.garch_fit_ = garch_fit
# Extract parameters
params = garch_fit.params
self.garch_alpha_ = params.get("alpha[1]", np.nan)
self.garch_beta_ = params.get("beta[1]", np.nan)
self.garch_persistence_ = self.garch_alpha_ + self.garch_beta_
# print (f"GARCH α: {self.garch_alpha_:.4f}, β: {self.garch_beta_:.4f}, "
# f"α+β (persistence): {self.garch_persistence_:.4f}")
# One-step-ahead volatility forecast
forecast = garch_fit.forecast(horizon=1)
sigma_next = np.sqrt(forecast.variance.iloc[-1, 0])
self.sigma_spread_forecast_ = float(sigma_next)
# print("GARCH sigma forecast:", self.sigma_spread_forecast_)
# Rule of thumb: persistence close to 1 or large volatility forecast
self.high_volatility_flag_ = int(
(self.garch_persistence_ is not None and self.garch_persistence_ > 0.95)
or (self.sigma_spread_forecast_ is not None and self.sigma_spread_forecast_ > 2)
)
except Exception as e:
print(f"GARCH fit failed: {e}")
self.garch_fit_ = None
self.sigma_spread_forecast_ = None
self.high_volatility_flag_ = None
def predict(self) -> pd.DataFrame:
self._train_pair()
assert self.testing_df_ is not None
assert self.vecm_fit_ is not None
# VECM predictions
predicted_prices = self.vecm_fit_.predict(steps=len(self.testing_df_))
predicted_df = pd.merge(
self.testing_df_.reset_index(drop=True),
pd.DataFrame(predicted_prices, columns=pd.Index(self.colnames()), dtype=float),
left_index=True,
right_index=True,
suffixes=("", "_pred"),
).dropna()
# Disequilibrium and z-scores
predicted_df["disequilibrium"] = (
predicted_df[self.colnames()] @ self.vecm_fit_.beta
)
predicted_df["signed_scaled_disequilibrium"] = (
predicted_df["disequilibrium"] - self.training_mu_
) / self.training_std_
predicted_df["scaled_disequilibrium"] = abs(
predicted_df["signed_scaled_disequilibrium"]
)
# Add GARCH parameters + volatility forecast
predicted_df["garch_alpha"] = self.garch_alpha_
predicted_df["garch_beta"] = self.garch_beta_
predicted_df["garch_persistence"] = self.garch_persistence_
predicted_df["garch_vol_forecast"] = self.sigma_spread_forecast_
predicted_df["high_volatility"] = self.high_volatility_flag_
# Save results
if self.pair_predict_result_ is None:
self.pair_predict_result_ = predicted_df
else:
self.pair_predict_result_ = pd.concat(
[self.pair_predict_result_, predicted_df], ignore_index=True
)
return self.pair_predict_result_
class VECMRollingFit(RollingFit):
def __init__(self) -> None:
super().__init__()
def create_trading_pair(
self,
config: Dict,
market_data: pd.DataFrame,
symbol_a: str,
symbol_b: str,
) -> TradingPair:
return VECMTradingPair(
config=config,
market_data=market_data,
symbol_a = symbol_a,
symbol_b = symbol_b,
)

View File

@ -0,0 +1,124 @@
from typing import Any, Dict, Optional
import pandas as pd
import statsmodels.api as sm
from pt_trading.rolling_window_fit import RollingFit
from pt_trading.trading_pair import TradingPair
NanoPerMin = 1e9
class ZScoreTradingPair(TradingPair):
"""TradingPair implementation that fits a hedge ratio with OLS and
computes a standardized spread (z-score).
The class stores training spread mean/std and hedge ratio so the model
can be applied to testing data consistently.
"""
zscore_model_: Optional[sm.regression.linear_model.RegressionResultsWrapper]
pair_predict_result_: Optional[pd.DataFrame]
zscore_df_: Optional[pd.Series]
hedge_ratio_: Optional[float]
spread_mean_: Optional[float]
spread_std_: Optional[float]
def __init__(
self,
config: Dict[str, Any],
market_data: pd.DataFrame,
symbol_a: str,
symbol_b: str,
):
super().__init__(config, market_data, symbol_a, symbol_b)
self.zscore_model_ = None
self.pair_predict_result_ = None
self.zscore_df_ = None
self.hedge_ratio_ = None
self.spread_mean_ = None
self.spread_std_ = None
def _fit_zscore(self) -> None:
"""Fit OLS on the training window and compute training z-score."""
assert self.training_df_ is not None
# Extract price series for the two symbols from the training frame.
px_df = self.training_df_[self.colnames()]
symbol_a_px = px_df.iloc[:, 0]
symbol_b_px = px_df.iloc[:, 1]
# Align indexes and fit OLS: symbol_a ~ const + symbol_b
symbol_a_px, symbol_b_px = symbol_a_px.align(symbol_b_px, join="inner")
X = sm.add_constant(symbol_b_px)
self.zscore_model_ = sm.OLS(symbol_a_px, X).fit()
# Hedge ratio is the slope on symbol_b
params = self.zscore_model_.params
self.hedge_ratio_ = float(params.iloc[1]) if len(params) > 1 else 0.0
# Training spread and its standardized z-score
spread = symbol_a_px - self.hedge_ratio_ * symbol_b_px
self.spread_mean_ = float(spread.mean())
self.spread_std_ = float(spread.std(ddof=0)) if spread.std(ddof=0) != 0 else 1.0
self.zscore_df_ = (spread - self.spread_mean_) / self.spread_std_
def predict(self) -> pd.DataFrame:
"""Apply fitted hedge ratio to the testing frame and return a
dataframe with canonical columns:
- disequilibrium: signed z-score
- scaled_disequilibrium: absolute z-score
- signed_scaled_disequilibrium: same as disequilibrium (keeps sign)
"""
# Fit on training window
self._fit_zscore()
assert self.zscore_df_ is not None
assert self.hedge_ratio_ is not None
assert self.spread_mean_ is not None and self.spread_std_ is not None
# Keep training columns for inspection
self.training_df_["disequilibrium"] = self.zscore_df_
self.training_df_["scaled_disequilibrium"] = self.zscore_df_.abs()
# Apply model to testing frame
assert self.testing_df_ is not None
test_df = self.testing_df_.copy()
px_test = test_df[self.colnames()]
a_test = px_test.iloc[:, 0]
b_test = px_test.iloc[:, 1]
a_test, b_test = a_test.align(b_test, join="inner")
# Compute test spread and standardize using training mean/std
test_spread = a_test - self.hedge_ratio_ * b_test
test_zscore = (test_spread - self.spread_mean_) / self.spread_std_
# Attach canonical columns
# Align back to test_df index if needed
test_zscore = test_zscore.reindex(test_df.index)
test_df["disequilibrium"] = test_zscore
test_df["signed_scaled_disequilibrium"] = test_zscore
test_df["scaled_disequilibrium"] = test_zscore.abs()
# Reset index and accumulate results across windows
test_df = test_df.reset_index(drop=True)
if self.pair_predict_result_ is None:
self.pair_predict_result_ = test_df
else:
self.pair_predict_result_ = pd.concat(
[self.pair_predict_result_, test_df], ignore_index=True
)
self.pair_predict_result_ = self.pair_predict_result_.reset_index(drop=True)
return self.pair_predict_result_.dropna()
class ZScoreRollingFit(RollingFit):
def __init__(self) -> None:
super().__init__()
def create_trading_pair(
self, config: Dict, market_data: pd.DataFrame, symbol_a: str, symbol_b: str
) -> TradingPair:
return ZScoreTradingPair(
config=config, market_data=market_data, symbol_a=symbol_a, symbol_b=symbol_b
)

View File

@ -1,12 +1,12 @@
import hjson import hjson
from typing import Dict from typing import Dict
from datetime import datetime from datetime import datetime
# ---
from cvttpy_tools.config import Config
def load_config(config_path: str) -> Config: def load_config(config_path: str) -> Dict:
return Config(json_src=f"file://{config_path}") with open(config_path, "r") as f:
config = hjson.load(f)
return dict(config)
def expand_filename(filename: str) -> str: def expand_filename(filename: str) -> str:

View File

@ -1,10 +1,9 @@
from __future__ import annotations from __future__ import annotations
import sqlite3 import sqlite3
from typing import Any, Dict, List, Tuple, cast from typing import Dict, List, cast
import pandas as pd import pandas as pd
from cvttpy_trading.trading.instrument import ExchangeInstrument
def load_sqlite_to_dataframe(db_path:str, query:str) -> pd.DataFrame: def load_sqlite_to_dataframe(db_path:str, query:str) -> pd.DataFrame:
df: pd.DataFrame = pd.DataFrame() df: pd.DataFrame = pd.DataFrame()
@ -46,17 +45,19 @@ def convert_time_to_UTC(value: str, timezone: str, extra_minutes: int = 0) -> st
def load_market_data( def load_market_data(
datafile: str, datafile: str,
instruments: List[ExchangeInstrument], instruments: List[Dict[str, str]],
db_table_name: str, db_table_name: str,
trading_hours: Dict = {}, trading_hours: Dict = {},
extra_minutes: int = 0, extra_minutes: int = 0,
) -> pd.DataFrame: ) -> pd.DataFrame:
insts = [
inst_ids = ['"' + exch_inst.instrument_id() + '"' for exch_inst in instruments] '"' + instrument["instrument_id_pfx"] + instrument["symbol"] + '"'
instrument_ids = list(set(inst_ids)) for instrument in instruments
]
instrument_ids = list(set(insts))
exchange_ids = list( exchange_ids = list(
set(['"' + instrument.exchange_id() + '"' for instrument in instruments]) set(['"' + instrument["exchange_id"] + '"' for instrument in instruments])
) )
query = "select" query = "select"

View File

@ -1,37 +0,0 @@
import os
import glob
from typing import Dict, List, Tuple
# ---
from cvttpy_tools.config import Config
# ---
from cvttpy_trading.trading.instrument import ExchangeInstrument
DayT = str
DataFileNameT = str
def resolve_datafiles(
config: Config, date_pattern: str, instruments: List[ExchangeInstrument]
) -> List[Tuple[DayT, DataFileNameT]]:
resolved_files: List[Tuple[DayT, DataFileNameT]] = []
for exch_inst in instruments:
pattern = date_pattern
inst_type = exch_inst.user_data_.get("instrument_type", "?instrument_type?")
data_dir = config.get_value(f"market_data_loading/{inst_type}/data_directory")
if "*" in pattern or "?" in pattern:
# Handle wildcards
if not os.path.isabs(pattern):
pattern = os.path.join(data_dir, f"{pattern}.mktdata.ohlcv.db")
matched_files = glob.glob(pattern)
for matched_file in matched_files:
import re
match = re.search(r"(\d{8})\.mktdata\.ohlcv\.db$", matched_file)
assert match is not None
day = match.group(1)
resolved_files.append((day, matched_file))
else:
# Handle explicit file path
if not os.path.isabs(pattern):
pattern = os.path.join(data_dir, f"{pattern}.mktdata.ohlcv.db")
resolved_files.append((date_pattern, pattern))
return sorted(list(set(resolved_files))) # Remove duplicates and sort

View File

@ -1,79 +0,0 @@
from pairs_trading.lib.pt_strategy.research_strategy import PtResearchStrategy
def visualize_prices(strategy: PtResearchStrategy, trading_date: str) -> None:
# Plot raw price data
import matplotlib.pyplot as plt
# Set plotting style
import seaborn as sns
pair = strategy.trading_pair_
SYMBOL_A = pair.symbol_a()
SYMBOL_B = pair.symbol_b()
TRD_DATE = f"{trading_date[0:4]}-{trading_date[4:6]}-{trading_date[6:8]}"
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (15, 10)
# Get column names for the trading pair
colname_a, colname_b = pair.colnames()
price_data = strategy.pt_mkt_data_.market_data_df_.copy()
# Create separate subplots for better visibility
fig_price, price_axes = plt.subplots(2, 1, figsize=(18, 10))
# Plot SYMBOL_A
price_axes[0].plot(price_data['tstamp'], price_data[colname_a], alpha=0.7,
label=f'{SYMBOL_A}', linewidth=1, color='blue')
price_axes[0].set_title(f'{SYMBOL_A} Price Data ({TRD_DATE})')
price_axes[0].set_ylabel(f'{SYMBOL_A} Price')
price_axes[0].legend()
price_axes[0].grid(True)
# Plot SYMBOL_B
price_axes[1].plot(price_data['tstamp'], price_data[colname_b], alpha=0.7,
label=f'{SYMBOL_B}', linewidth=1, color='red')
price_axes[1].set_title(f'{SYMBOL_B} Price Data ({TRD_DATE})')
price_axes[1].set_ylabel(f'{SYMBOL_B} Price')
price_axes[1].set_xlabel('Time')
price_axes[1].legend()
price_axes[1].grid(True)
plt.tight_layout()
plt.show()
# Plot individual prices
fig, axes = plt.subplots(2, 1, figsize=(18, 12))
# Normalized prices for comparison
norm_a = price_data[colname_a] / price_data[colname_a].iloc[0]
norm_b = price_data[colname_b] / price_data[colname_b].iloc[0]
axes[0].plot(price_data['tstamp'], norm_a, label=f'{SYMBOL_A} (normalized)', alpha=0.8, linewidth=1)
axes[0].plot(price_data['tstamp'], norm_b, label=f'{SYMBOL_B} (normalized)', alpha=0.8, linewidth=1)
axes[0].set_title(f'Normalized Price Comparison (Base = 1.0) ({TRD_DATE})')
axes[0].set_ylabel('Normalized Price')
axes[0].legend()
axes[0].grid(True)
# Price ratio
price_ratio = price_data[colname_a] / price_data[colname_b]
axes[1].plot(price_data['tstamp'], price_ratio, label=f'{SYMBOL_A}/{SYMBOL_B} Ratio', color='green', alpha=0.8, linewidth=1)
axes[1].set_title(f'Price Ratio Px({SYMBOL_A})/Px({SYMBOL_B}) ({TRD_DATE})')
axes[1].set_ylabel('Ratio')
axes[1].set_xlabel('Time')
axes[1].legend()
axes[1].grid(True)
plt.tight_layout()
plt.show()
# Print basic statistics
print(f"\nPrice Statistics:")
print(f" {SYMBOL_A}: Mean=${price_data[colname_a].mean():.2f}, Std=${price_data[colname_a].std():.2f}")
print(f" {SYMBOL_B}: Mean=${price_data[colname_b].mean():.2f}, Std=${price_data[colname_b].std():.2f}")
print(f" Price Ratio: Mean={price_ratio.mean():.2f}, Std={price_ratio.std():.2f}")
print(f" Correlation: {price_data[colname_a].corr(price_data[colname_b]):.4f}")

View File

@ -1,502 +0,0 @@
from __future__ import annotations
from pairs_trading.lib.pt_strategy.results import (PairResearchResult)
from pairs_trading.lib.pt_strategy.research_strategy import PtResearchStrategy
def visualize_trades(strategy: PtResearchStrategy, results: PairResearchResult, trading_date: str) -> None:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
from IPython.display import HTML
from plotly.subplots import make_subplots
pair = strategy.trading_pair_
trades = results.trades_[trading_date].copy()
origin_mkt_data_df = strategy.pt_mkt_data_.origin_mkt_data_df_
mkt_data_df = strategy.pt_mkt_data_.market_data_df_
TRD_DATE = f"{trading_date[0:4]}-{trading_date[4:6]}-{trading_date[6:8]}"
SYMBOL_A = pair.symbol_a()
SYMBOL_B = pair.symbol_b()
print(f"\nCreated trading pair: {pair}")
print(f"Market data shape: {pair.market_data_.shape}")
print(f"Column names: {pair.colnames()}")
# Configure plotly for offline mode
pyo.init_notebook_mode(connected=True)
# Strategy-specific interactive visualization
assert strategy.config_ is not None
print("=== SLIDING FIT INTERACTIVE VISUALIZATION ===")
print("Note: Rolling Fit strategy visualization with interactive plotly charts")
# Create consistent timeline - superset of timestamps from both dataframes
all_timestamps = sorted(set(mkt_data_df['tstamp']))
# Create a unified timeline dataframe for consistent plotting
timeline_df = pd.DataFrame({'tstamp': all_timestamps})
# Merge with predicted data to get dis-equilibrium values
timeline_df = timeline_df.merge(strategy.predictions_df_[['tstamp', 'disequilibrium', 'scaled_disequilibrium', 'signed_scaled_disequilibrium']],
on='tstamp', how='left')
# Get Symbol_A and Symbol_B market data
colname_a, colname_b = pair.colnames()
symbol_a_data = mkt_data_df[['tstamp', colname_a]].copy()
symbol_b_data = mkt_data_df[['tstamp', colname_b]].copy()
norm_a = symbol_a_data[colname_a] / symbol_a_data[colname_a].iloc[0]
norm_b = symbol_b_data[colname_b] / symbol_b_data[colname_b].iloc[0]
print(f"Using consistent timeline with {len(timeline_df)} timestamps")
print(f"Timeline range: {timeline_df['tstamp'].min()} to {timeline_df['tstamp'].max()}")
# Create subplots with price charts at bottom
fig = make_subplots(
rows=4, cols=1,
row_heights=[0.3, 0.4, 0.15, 0.15],
subplot_titles=[
f'Dis-equilibrium with Trading Thresholds ({TRD_DATE})',
f'Normalized Price Comparison with BUY/SELL Signals - {SYMBOL_A}&{SYMBOL_B} ({TRD_DATE})',
f'{SYMBOL_A} Market Data with Trading Signals ({TRD_DATE})',
f'{SYMBOL_B} Market Data with Trading Signals ({TRD_DATE})',
],
vertical_spacing=0.06,
specs=[[{"secondary_y": False}],
[{"secondary_y": False}],
[{"secondary_y": False}],
[{"secondary_y": False}]]
)
# 1. Scaled dis-equilibrium with thresholds - using consistent timeline
fig.add_trace(
go.Scatter(
x=timeline_df['tstamp'],
y=timeline_df['scaled_disequilibrium'],
name='Absolute Scaled Dis-equilibrium',
line=dict(color='green', width=2),
opacity=0.8
),
row=1, col=1
)
fig.add_trace(
go.Scatter(
x=timeline_df['tstamp'],
y=timeline_df['signed_scaled_disequilibrium'],
name='Scaled Dis-equilibrium',
line=dict(color='darkmagenta', width=2),
opacity=0.8
),
row=1, col=1
)
# Add threshold lines to first subplot
fig.add_shape(
type="line",
x0=timeline_df['tstamp'].min(),
x1=timeline_df['tstamp'].max(),
y0=strategy.config_.get_value('model/disequilibrium/open_trshld'),
y1=strategy.config_.get_value('model/disequilibrium/open_trshld'),
line=dict(color="purple", width=2, dash="dot"),
opacity=0.7,
row=1, col=1
)
fig.add_shape(
type="line",
x0=timeline_df['tstamp'].min(),
x1=timeline_df['tstamp'].max(),
y0=-strategy.config_.get_value('model/disequilibrium/open_trshld'),
y1=-strategy.config_.get_value('model/disequilibrium/open_trshld'),
line=dict(color="purple", width=2, dash="dot"),
opacity=0.7,
row=1, col=1
)
fig.add_shape(
type="line",
x0=timeline_df['tstamp'].min(),
x1=timeline_df['tstamp'].max(),
y0=strategy.config_.get_value('model/disequilibrium/close_trshld'),
y1=strategy.config_.get_value('model/disequilibrium/close_trshld'),
line=dict(color="brown", width=2, dash="dot"),
opacity=0.7,
row=1, col=1
)
fig.add_shape(
type="line",
x0=timeline_df['tstamp'].min(),
x1=timeline_df['tstamp'].max(),
y0=-strategy.config_.get_value('model/disequilibrium/close_trshld'),
y1=-strategy.config_.get_value('model/disequilibrium/close_trshld'),
line=dict(color="brown", width=2, dash="dot"),
opacity=0.7,
row=1, col=1
)
fig.add_shape(
type="line",
x0=timeline_df['tstamp'].min(),
x1=timeline_df['tstamp'].max(),
y0=0,
y1=0,
line=dict(color="black", width=1, dash="solid"),
opacity=0.5,
row=1, col=1
)
# Add normalized price lines
fig.add_trace(
go.Scatter(
x=mkt_data_df['tstamp'],
y=norm_a,
name=f'{SYMBOL_A} (Normalized)',
line=dict(color='blue', width=2),
opacity=0.8
),
row=2, col=1
)
fig.add_trace(
go.Scatter(
x=mkt_data_df['tstamp'],
y=norm_b,
name=f'{SYMBOL_B} (Normalized)',
line=dict(color='orange', width=2),
opacity=0.8,
),
row=2, col=1
)
# Add BUY and SELL signals if available
if trades is not None and len(trades) > 0:
# Define signal groups to avoid legend repetition
signal_groups = {}
# Process all trades and group by signal type (ignore OPEN/CLOSE status)
for _, trade in trades.iterrows():
symbol = trade['symbol']
side = trade['side']
# status = trade['status']
action = trade['action']
# Create signal group key (without status to combine OPEN/CLOSE)
signal_key = f"{symbol} {side} {action}"
# Find normalized price for this trade
trade_time = trade['time']
if symbol == SYMBOL_A:
closest_idx = mkt_data_df['tstamp'].searchsorted(trade_time)
if closest_idx < len(norm_a):
norm_price = norm_a.iloc[closest_idx]
else:
norm_price = norm_a.iloc[-1]
else: # SYMBOL_B
closest_idx = mkt_data_df['tstamp'].searchsorted(trade_time)
if closest_idx < len(norm_b):
norm_price = norm_b.iloc[closest_idx]
else:
norm_price = norm_b.iloc[-1]
# Initialize group if not exists
if signal_key not in signal_groups:
signal_groups[signal_key] = {
'times': [],
'prices': [],
'actual_prices': [],
'symbol': symbol,
'side': side,
# 'status': status,
'action': trade['action']
}
# Add to group
signal_groups[signal_key]['times'].append(trade_time)
signal_groups[signal_key]['prices'].append(norm_price)
signal_groups[signal_key]['actual_prices'].append(trade['price'])
# Add each signal group as a single trace
for signal_key, group_data in signal_groups.items():
symbol = group_data['symbol']
side = group_data['side']
# status = group_data['status']
# Determine marker properties (same for all OPEN/CLOSE of same side)
is_close: bool = (group_data['action'] == "CLOSE")
if 'BUY' in side:
marker_color = 'green'
marker_symbol = 'triangle-up'
marker_size = 14
else: # SELL
marker_color = 'red'
marker_symbol = 'triangle-down'
marker_size = 14
# Create hover text for each point in the group
hover_texts = []
for i, (time, norm_price, actual_price) in enumerate(zip(group_data['times'],
group_data['prices'],
group_data['actual_prices'])):
# Find the corresponding trade to get the status for hover text
trade_info = trades[(trades['time'] == time) &
(trades['symbol'] == symbol) &
(trades['side'] == side)]
if len(trade_info) > 0:
action = trade_info.iloc[0]['action']
hover_texts.append(f'<b>{signal_key} {action}</b><br>' +
f'Time: {time}<br>' +
f'Normalized Price: {norm_price:.4f}<br>' +
f'Actual Price: ${actual_price:.2f}')
else:
hover_texts.append(f'<b>{signal_key}</b><br>' +
f'Time: {time}<br>' +
f'Normalized Price: {norm_price:.4f}<br>' +
f'Actual Price: ${actual_price:.2f}')
fig.add_trace(
go.Scatter(
x=group_data['times'],
y=group_data['prices'],
mode='markers',
name=signal_key,
marker=dict(
color=marker_color,
size=marker_size,
symbol=marker_symbol,
line=dict(width=2, color='black') if is_close else None
),
showlegend=True,
hovertemplate='%{text}<extra></extra>',
text=hover_texts
),
row=2, col=1
)
# -----------------------------
fig.add_trace(
go.Scatter(
x=symbol_a_data['tstamp'],
y=symbol_a_data[colname_a],
name=f'{SYMBOL_A} Price',
line=dict(color='blue', width=2),
opacity=0.8
),
row=3, col=1
)
# Filter trades for Symbol_A
symbol_a_trades = trades[trades['symbol'] == SYMBOL_A]
print(f"\nSymbol_A trades:\n{symbol_a_trades}")
if len(symbol_a_trades) > 0:
# Separate trades by action and status for different colors
buy_open_trades = symbol_a_trades[(symbol_a_trades['side'].str.contains('BUY', na=False)) &
(symbol_a_trades['action'].str.contains('OPEN', na=False))]
buy_close_trades = symbol_a_trades[(symbol_a_trades['side'].str.contains('BUY', na=False)) &
(symbol_a_trades['action'].str.contains('CLOSE', na=False))]
sell_open_trades = symbol_a_trades[(symbol_a_trades['side'].str.contains('SELL', na=False)) &
(symbol_a_trades['action'].str.contains('OPEN', na=False))]
sell_close_trades = symbol_a_trades[(symbol_a_trades['side'].str.contains('SELL', na=False)) &
(symbol_a_trades['action'].str.contains('CLOSE', na=False))]
# Add BUY OPEN signals
if len(buy_open_trades) > 0:
fig.add_trace(
go.Scatter(
x=buy_open_trades['time'],
y=buy_open_trades['price'],
mode='markers',
name=f'{SYMBOL_A} BUY OPEN',
marker=dict(color='green', size=12, symbol='triangle-up'),
showlegend=True
),
row=3, col=1
)
# Add BUY CLOSE signals
if len(buy_close_trades) > 0:
fig.add_trace(
go.Scatter(
x=buy_close_trades['time'],
y=buy_close_trades['price'],
mode='markers',
name=f'{SYMBOL_A} BUY CLOSE',
marker=dict(color='green', size=12, symbol='triangle-up'),
line=dict(width=2, color='black'),
showlegend=True
),
row=3, col=1
)
# Add SELL OPEN signals
if len(sell_open_trades) > 0:
fig.add_trace(
go.Scatter(
x=sell_open_trades['time'],
y=sell_open_trades['price'],
mode='markers',
name=f'{SYMBOL_A} SELL OPEN',
marker=dict(color='red', size=12, symbol='triangle-down'),
showlegend=True
),
row=3, col=1
)
# Add SELL CLOSE signals
if len(sell_close_trades) > 0:
fig.add_trace(
go.Scatter(
x=sell_close_trades['time'],
y=sell_close_trades['price'],
mode='markers',
name=f'{SYMBOL_A} SELL CLOSE',
marker=dict(color='red', size=12, symbol='triangle-down'),
line=dict(width=2, color='black'),
showlegend=True
),
row=3, col=1
)
# 4. Symbol_B Market Data with Trading Signals
fig.add_trace(
go.Scatter(
x=symbol_b_data['tstamp'],
y=symbol_b_data[colname_b],
name=f'{SYMBOL_B} Price',
line=dict(color='orange', width=2),
opacity=0.8
),
row=4, col=1
)
# Add trading signals for Symbol_B if available
symbol_b_trades = trades[trades['symbol'] == SYMBOL_B]
print(f"\nSymbol_B trades:\n{symbol_b_trades}")
if len(symbol_b_trades) > 0:
# Separate trades by action and status for different colors
buy_open_trades = symbol_b_trades[(symbol_b_trades['side'].str.contains('BUY', na=False)) &
(symbol_b_trades['action'].str.startswith('OPEN', na=False))]
buy_close_trades = symbol_b_trades[(symbol_b_trades['side'].str.contains('BUY', na=False)) &
(symbol_b_trades['action'].str.startswith('CLOSE', na=False))]
sell_open_trades = symbol_b_trades[(symbol_b_trades['side'].str.contains('SELL', na=False)) &
(symbol_b_trades['action'].str.contains('OPEN', na=False))]
sell_close_trades = symbol_b_trades[(symbol_b_trades['side'].str.contains('SELL', na=False)) &
(symbol_b_trades['action'].str.contains('CLOSE', na=False))]
# Add BUY OPEN signals
if len(buy_open_trades) > 0:
fig.add_trace(
go.Scatter(
x=buy_open_trades['time'],
y=buy_open_trades['price'],
mode='markers',
name=f'{SYMBOL_B} BUY OPEN',
marker=dict(color='darkgreen', size=12, symbol='triangle-up'),
showlegend=True
),
row=4, col=1
)
# Add BUY CLOSE signals
if len(buy_close_trades) > 0:
fig.add_trace(
go.Scatter(
x=buy_close_trades['time'],
y=buy_close_trades['price'],
mode='markers',
name=f'{SYMBOL_B} BUY CLOSE',
marker=dict(color='green', size=12, symbol='triangle-up'),
line=dict(width=2, color='black'),
showlegend=True
),
row=4, col=1
)
# Add SELL OPEN signals
if len(sell_open_trades) > 0:
fig.add_trace(
go.Scatter(
x=sell_open_trades['time'],
y=sell_open_trades['price'],
mode='markers',
name=f'{SYMBOL_B} SELL OPEN',
marker=dict(color='red', size=12, symbol='triangle-down'),
showlegend=True
),
row=4, col=1
)
# Add SELL CLOSE signals
if len(sell_close_trades) > 0:
fig.add_trace(
go.Scatter(
x=sell_close_trades['time'],
y=sell_close_trades['price'],
mode='markers',
name=f'{SYMBOL_B} SELL CLOSE',
marker=dict(color='red', size=12, symbol='triangle-down'),
line=dict(width=2, color='black'),
showlegend=True
),
row=4, col=1
)
# Update layout
fig.update_layout(
height=1600,
title_text=f"Strategy Analysis - {SYMBOL_A} & {SYMBOL_B} ({TRD_DATE})",
showlegend=True,
template="plotly_white",
plot_bgcolor='lightgray',
)
# Update y-axis labels
fig.update_yaxes(title_text="Scaled Dis-equilibrium", row=1, col=1)
fig.update_yaxes(title_text=f"{SYMBOL_A} Price ($)", row=2, col=1)
fig.update_yaxes(title_text=f"{SYMBOL_B} Price ($)", row=3, col=1)
fig.update_yaxes(title_text="Normalized Price (Base = 1.0)", row=4, col=1)
# Update x-axis labels and ensure consistent time range
time_range = [timeline_df['tstamp'].min(), timeline_df['tstamp'].max()]
fig.update_xaxes(range=time_range, row=1, col=1)
fig.update_xaxes(range=time_range, row=2, col=1)
fig.update_xaxes(range=time_range, row=3, col=1)
fig.update_xaxes(title_text="Time", range=time_range, row=4, col=1)
# Display using plotly offline mode
# pyo.iplot(fig)
fig.show()
else:
print("No interactive visualization data available - strategy may not have run successfully")
print(f"\nChart shows:")
print(f"- {SYMBOL_A} and {SYMBOL_B} prices normalized to start at 1.0")
print(f"- BUY signals shown as green triangles pointing up")
print(f"- SELL signals shown as orange triangles pointing down")
print(f"- All BUY signals per symbol grouped together, all SELL signals per symbol grouped together")
print(f"- Hover over markers to see individual trade details (OPEN/CLOSE status)")
if trades is not None and len(trades) > 0:
print(f"- Total signals displayed: {len(trades)}")
print(f"- {SYMBOL_A} signals: {len(trades[trades['symbol'] == SYMBOL_A])}")
print(f"- {SYMBOL_B} signals: {len(trades[trades['symbol'] == SYMBOL_B])}")
else:
print("- No trading signals to display")

169
lib/utils/db_inspector.py Normal file
View File

@ -0,0 +1,169 @@
#!/usr/bin/env python3
"""
Database inspector utility for pairs trading results database.
Provides functionality to view all tables and their contents.
"""
import sqlite3
import sys
import json
import os
from typing import List, Dict, Any
def list_tables(db_path: str) -> List[str]:
"""List all tables in the database."""
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute("""
SELECT name FROM sqlite_master
WHERE type='table'
ORDER BY name
""")
tables = [row[0] for row in cursor.fetchall()]
conn.close()
return tables
def view_table_schema(db_path: str, table_name: str) -> None:
"""View the schema of a specific table."""
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute(f"PRAGMA table_info({table_name})")
columns = cursor.fetchall()
print(f"\nTable: {table_name}")
print("-" * 50)
print("Column Name".ljust(20) + "Type".ljust(15) + "Not Null".ljust(10) + "Default")
print("-" * 50)
for col in columns:
cid, name, type_, not_null, default_value, pk = col
print(f"{name}".ljust(20) + f"{type_}".ljust(15) + f"{bool(not_null)}".ljust(10) + f"{default_value or ''}")
conn.close()
def view_config_table(db_path: str, limit: int = 10) -> None:
"""View entries from the config table."""
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute(f"""
SELECT id, run_timestamp, config_file_path, fit_method_class,
datafiles, instruments, config_json
FROM config
ORDER BY run_timestamp DESC
LIMIT {limit}
""")
rows = cursor.fetchall()
if not rows:
print("No configuration entries found.")
return
print(f"\nMost recent {len(rows)} configuration entries:")
print("=" * 80)
for row in rows:
id, run_timestamp, config_file_path, fit_method_class, datafiles, instruments, config_json = row
print(f"ID: {id} | {run_timestamp}")
print(f"Config: {config_file_path} | Strategy: {fit_method_class}")
print(f"Files: {datafiles}")
print(f"Instruments: {instruments}")
print("-" * 80)
conn.close()
def view_results_summary(db_path: str) -> None:
"""View summary of trading results."""
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Get results summary
cursor.execute("""
SELECT date, COUNT(*) as trade_count,
ROUND(SUM(symbol_return), 2) as total_return
FROM pt_bt_results
GROUP BY date
ORDER BY date DESC
""")
results = cursor.fetchall()
if not results:
print("No trading results found.")
return
print(f"\nTrading Results Summary:")
print("-" * 50)
print("Date".ljust(15) + "Trades".ljust(10) + "Total Return %")
print("-" * 50)
for date, trade_count, total_return in results:
print(f"{date}".ljust(15) + f"{trade_count}".ljust(10) + f"{total_return}")
# Get outstanding positions summary
cursor.execute("""
SELECT COUNT(*) as position_count,
ROUND(SUM(unrealized_return), 2) as total_unrealized
FROM outstanding_positions
""")
outstanding = cursor.fetchone()
if outstanding and outstanding[0] > 0:
print(f"\nOutstanding Positions: {outstanding[0]} positions")
print(f"Total Unrealized Return: {outstanding[1]}%")
conn.close()
def main() -> None:
if len(sys.argv) < 2:
print("Usage: python db_inspector.py <database_path> [command]")
print("Commands:")
print(" tables - List all tables")
print(" schema - Show schema for all tables")
print(" config - View configuration entries")
print(" results - View trading results summary")
print(" all - Show everything (default)")
print("\nExample: python db_inspector.py results/equity.db config")
sys.exit(1)
db_path = sys.argv[1]
command = sys.argv[2] if len(sys.argv) > 2 else "all"
if not os.path.exists(db_path):
print(f"Database file not found: {db_path}")
sys.exit(1)
try:
if command in ["tables", "all"]:
tables = list_tables(db_path)
print(f"Tables in database: {', '.join(tables)}")
if command in ["schema", "all"]:
tables = list_tables(db_path)
for table in tables:
view_table_schema(db_path, table)
if command in ["config", "all"]:
if "config" in list_tables(db_path):
view_config_table(db_path)
else:
print("Config table not found.")
if command in ["results", "all"]:
if "pt_bt_results" in list_tables(db_path):
view_results_summary(db_path)
else:
print("Results table not found.")
except Exception as e:
print(f"Error inspecting database: {str(e)}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()

66
pyproject.toml Normal file
View File

@ -0,0 +1,66 @@
[build-system]
requires = ["setuptools>=45", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "pairs-trading"
version = "0.1.0"
description = "Pairs Trading Backtesting Framework"
requires-python = ">=3.8"
[tool.black]
line-length = 88
target-version = ['py38']
include = '\.pyi?$'
extend-exclude = '''
/(
# directories
\.eggs
| \.git
| \.hg
| \.mypy_cache
| \.tox
| \.venv
| build
| dist
)/
'''
[tool.flake8]
max-line-length = 88
extend-ignore = ["E203", "W503"]
exclude = [
".git",
"__pycache__",
"build",
"dist",
".venv",
".mypy_cache",
".tox"
]
[tool.mypy]
python_version = "3.8"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
disallow_incomplete_defs = true
check_untyped_defs = true
disallow_untyped_decorators = true
no_implicit_optional = true
warn_redundant_casts = true
warn_unused_ignores = true
warn_no_return = true
warn_unreachable = true
strict_equality = true
[[tool.mypy.overrides]]
module = [
"numpy.*",
"pandas.*",
"matplotlib.*",
"seaborn.*",
"scipy.*",
"sklearn.*"
]
ignore_missing_imports = true

24
pyrightconfig.json Normal file
View File

@ -0,0 +1,24 @@
{
"include": [
"lib"
],
"exclude": [
"**/node_modules",
"**/__pycache__",
"**/.*",
"results",
"data"
],
"ignore": [],
"defineConstant": {},
"typeCheckingMode": "basic",
"useLibraryCodeForTypes": true,
"autoImportCompletions": true,
"autoSearchPaths": true,
"extraPaths": [
"lib"
],
"stubPath": "./typings",
"venvPath": ".",
"venv": "python3.12-venv"
}

View File

@ -61,7 +61,7 @@ protobuf>=3.12.4
psutil>=5.9.0 psutil>=5.9.0
ptyprocess>=0.7.0 ptyprocess>=0.7.0
pycurl>=7.44.1 pycurl>=7.44.1
pyelftools>=0.27 # pyelftools>=0.27
Pygments>=2.11.2 Pygments>=2.11.2
pyparsing>=2.4.7 pyparsing>=2.4.7
pyrsistent>=0.18.1 pyrsistent>=0.18.1
@ -69,7 +69,7 @@ python-debian>=0.1.43 #+ubuntu1.1
python-dotenv>=0.19.2 python-dotenv>=0.19.2
python-magic>=0.4.24 python-magic>=0.4.24
python-xlib>=0.29 python-xlib>=0.29
pyxdg>=0.27 # pyxdg>=0.27
PyYAML>=6.0 PyYAML>=6.0
reportlab>=3.6.8 reportlab>=3.6.8
requests>=2.25.1 requests>=2.25.1
@ -78,119 +78,117 @@ scipy<1.13.0
seaborn>=0.13.2 seaborn>=0.13.2
SecretStorage>=3.3.1 SecretStorage>=3.3.1
setproctitle>=1.2.2 setproctitle>=1.2.2
simpleeval>=1.0.3
six>=1.16.0 six>=1.16.0
soupsieve>=2.3.1 soupsieve>=2.3.1
ssh-import-id>=5.11 ssh-import-id>=5.11
statsmodels>=0.14.4 statsmodels>=0.14.4
texttable>=1.6.4 # texttable>=1.6.4
tldextract>=3.1.2 tldextract>=3.1.2
tomli>=1.2.2 tomli>=1.2.2
######## typed-ast>=1.4.3 ######## typed-ast>=1.4.3
types-aiofiles>=0.1 # types-aiofiles>=0.1
types-annoy>=1.17 # types-annoy>=1.17
types-appdirs>=1.4 # types-appdirs>=1.4
types-atomicwrites>=1.4 # types-atomicwrites>=1.4
types-aws-xray-sdk>=2.8 # types-aws-xray-sdk>=2.8
types-babel>=2.9 # types-babel>=2.9
types-backports-abc>=0.5 # types-backports-abc>=0.5
types-backports.ssl-match-hostname>=3.7 # types-backports.ssl-match-hostname>=3.7
types-beautifulsoup4>=4.10 # types-beautifulsoup4>=4.10
types-bleach>=4.1 # types-bleach>=4.1
types-boto>=2.49 # types-boto>=2.49
types-braintree>=4.11 # types-braintree>=4.11
types-cachetools>=4.2 # types-cachetools>=4.2
types-caldav>=0.8 # types-caldav>=0.8
types-certifi>=2020.4 # types-certifi>=2020.4
types-characteristic>=14.3 # types-characteristic>=14.3
types-chardet>=4.0 # types-chardet>=4.0
types-click>=7.1 # types-click>=7.1
types-click-spinner>=0.1 # types-click-spinner>=0.1
types-colorama>=0.4 # types-colorama>=0.4
types-commonmark>=0.9 # types-commonmark>=0.9
types-contextvars>=0.1 # types-contextvars>=0.1
types-croniter>=1.0 # types-croniter>=1.0
types-cryptography>=3.3 # types-cryptography>=3.3
types-dataclasses>=0.1 # types-dataclasses>=0.1
types-dateparser>=1.0 # types-dateparser>=1.0
types-DateTimeRange>=0.1 # types-DateTimeRange>=0.1
types-decorator>=0.1 # types-decorator>=0.1
types-Deprecated>=1.2 # types-Deprecated>=1.2
types-docopt>=0.6 # types-docopt>=0.6
types-docutils>=0.17 # types-docutils>=0.17
types-editdistance>=0.5 # types-editdistance>=0.5
types-emoji>=1.2 # types-emoji>=1.2
types-entrypoints>=0.3 # types-entrypoints>=0.3
types-enum34>=1.1 # types-enum34>=1.1
types-filelock>=3.2 # types-filelock>=3.2
types-first>=2.0 # types-first>=2.0
types-Flask>=1.1 # types-Flask>=1.1
types-freezegun>=1.1 # types-freezegun>=1.1
types-frozendict>=0.1 # types-frozendict>=0.1
types-futures>=3.3 # types-futures>=3.3
types-html5lib>=1.1 # types-html5lib>=1.1
types-httplib2>=0.19 # types-httplib2>=0.19
types-humanfriendly>=9.2 # types-humanfriendly>=9.2
types-ipaddress>=1.0 # types-ipaddress>=1.0
types-itsdangerous>=1.1 # types-itsdangerous>=1.1
types-JACK-Client>=0.1 # types-JACK-Client>=0.1
types-Jinja2>=2.11 # types-Jinja2>=2.11
types-jmespath>=0.10 # types-jmespath>=0.10
types-jsonschema>=3.2 # types-jsonschema>=3.2
types-Markdown>=3.3 # types-Markdown>=3.3
types-MarkupSafe>=1.1 # types-MarkupSafe>=1.1
types-mock>=4.0 # types-mock>=4.0
types-mypy-extensions>=0.4 # types-mypy-extensions>=0.4
types-mysqlclient>=2.0 # types-mysqlclient>=2.0
types-oauthlib>=3.1 # types-oauthlib>=3.1
types-orjson>=3.6 # types-orjson>=3.6
types-paramiko>=2.7 # types-paramiko>=2.7
types-Pillow>=8.3 # types-Pillow>=8.3
types-polib>=1.1 # types-polib>=1.1
types-prettytable>=2.1 # types-prettytable>=2.1
types-protobuf>=3.17 # types-protobuf>=3.17
types-psutil>=5.8 # types-psutil>=5.8
types-psycopg2>=2.9 # types-psycopg2>=2.9
types-pyaudio>=0.2 # types-pyaudio>=0.2
types-pycurl>=0.1 # types-pycurl>=0.1
types-pyfarmhash>=0.2 # types-pyfarmhash>=0.2
types-Pygments>=2.9 # types-Pygments>=2.9
types-PyMySQL>=1.0 # types-PyMySQL>=1.0
types-pyOpenSSL>=20.0 # types-pyOpenSSL>=20.0
types-pyRFC3339>=0.1 # types-pyRFC3339>=0.1
types-pysftp>=0.2 # types-pysftp>=0.2
types-pytest-lazy-fixture>=0.6 # types-pytest-lazy-fixture>=0.6
types-python-dateutil>=2.8 # types-python-dateutil>=2.8
types-python-gflags>=3.1 # types-python-gflags>=3.1
types-python-nmap>=0.6 # types-python-nmap>=0.6
types-python-slugify>=5.0 # types-python-slugify>=5.0
types-pytz>=2021.1 # types-pytz>=2021.1
types-pyvmomi>=7.0 # types-pyvmomi>=7.0
types-PyYAML>=5.4 # types-PyYAML>=5.4
types-redis>=3.5 # types-redis>=3.5
types-requests>=2.25 # types-requests>=2.25
types-retry>=0.9 # types-retry>=0.9
types-seaborn>0.13.2 # types-selenium>=3.141
types-selenium>=3.141 # types-Send2Trash>=1.8
types-Send2Trash>=1.8 # types-setuptools>=57.4
types-setuptools>=57.4 # types-simplejson>=3.17
types-simplejson>=3.17 # types-singledispatch>=3.7
types-singledispatch>=3.7 # types-six>=1.16
types-six>=1.16 # types-slumber>=0.7
types-slumber>=0.7 # types-stripe>=2.59
types-stripe>=2.59 # types-tabulate>=0.8
types-tabulate>=0.8 # types-termcolor>=1.1
types-termcolor>=1.1 # types-toml>=0.10
types-toml>=0.10 # types-toposort>=1.6
types-toposort>=1.6 # types-ttkthemes>=3.2
types-ttkthemes>=3.2 # types-typed-ast>=1.4
types-typed-ast>=1.4 # types-tzlocal>=0.1
types-tzlocal>=0.1 # types-ujson>=0.1
types-ujson>=0.1 # types-vobject>=0.9
types-vobject>=0.9 # types-waitress>=0.1
types-waitress>=0.1 #types-Werkzeug>=1.0
types-Werkzeug>=1.0 #types-xxhash>=2.0
types-xxhash>=2.0
typing-extensions>=3.10.0.2 typing-extensions>=3.10.0.2
Unidecode>=1.3.3 Unidecode>=1.3.3
urllib3>=1.26.5 urllib3>=1.26.5

View File

@ -1,139 +0,0 @@
from __future__ import annotations
import os
from typing import Any, Dict, List, Tuple
# ---
from cvttpy_tools.app import App
from cvttpy_tools.base import NamedObject
from cvttpy_tools.config import CvttAppConfig
# ---
from cvttpy_trading.trading.instrument import ExchangeInstrument
from cvttpy_trading.settings.instruments import Instruments
# ---
from pairs_trading.lib.pt_strategy.results import (
PairResearchResult,
create_result_database,
store_config_in_database,
)
from pairs_trading.lib.pt_strategy.research_strategy import PtResearchStrategy
from pairs_trading.lib.tools.filetools import resolve_datafiles
InstrumentTypeT = str
class Runner(NamedObject):
def __init__(self):
App()
CvttAppConfig()
# App.instance().add_cmdline_arg(
# "--config", type=str, required=True, help="Path to the configuration file."
# )
App.instance().add_cmdline_arg(
"--date_pattern",
type=str,
required=True,
help="Date YYYYMMDD, allows * and ? wildcards",
)
App.instance().add_cmdline_arg(
"--instruments",
type=str,
required=True,
help="Comma-separated list of instrument symbols (e.g., COIN:EQUITY,GBTC:CRYPTO)",
)
App.instance().add_cmdline_arg(
"--result_db",
type=str,
required=True,
help="Path to SQLite database for storing results. Use 'NONE' to disable database output.",
)
App.instance().add_call(stage=App.Stage.Config, func=self._on_config())
App.instance().add_call(stage=App.Stage.Run, func=self.run())
async def _on_config(self) -> None:
# Resolve data files (CLI takes priority over config)
instruments: List[ExchangeInstrument] = self._get_instruments()
datafiles = resolve_datafiles(
config=CvttAppConfig.instance(),
date_pattern=App.instance().get_argument("date_pattern"),
instruments=instruments,
)
days = list(set([day for day, _ in datafiles]))
print(f"Found {len(datafiles)} data files to process:")
for df in datafiles:
print(f" - {df}")
# Create result database if needed
if App.instance().get_argument("result_db").upper() != "NONE":
create_result_database(App.instance().get_argument("result_db"))
# Initialize a dictionary to store all trade results
all_results: Dict[str, Dict[str, Any]] = {}
is_config_stored = False
# Process each data file
results = PairResearchResult(config=CvttAppConfig.instance())
for day in sorted(days):
md_datafiles = [datafile for md_day, datafile in datafiles if md_day == day]
if not all([os.path.exists(datafile) for datafile in md_datafiles]):
print(f"WARNING: insufficient data files: {md_datafiles}")
exit(1)
print(f"\n====== Processing {day} ======")
if not is_config_stored:
store_config_in_database(
db_path=App.instance().get_argument("result_db"),
config_file_path=App.instance().get_argument("config"),
config=CvttAppConfig.instance(),
datafiles=datafiles,
instruments=instruments,
)
is_config_stored = True
CvttAppConfig.instance().set_value("datafiles", md_datafiles)
pt_strategy = PtResearchStrategy(
config=CvttAppConfig.instance(),
instruments=instruments,
)
pt_strategy.run()
results.add_day_results(
day=day,
trades=pt_strategy.day_trades(),
outstanding_positions=pt_strategy.outstanding_positions(),
)
results.analyze_pair_performance()
def _get_instruments(self) -> List[ExchangeInstrument]:
res: List[ExchangeInstrument] = []
for inst in App.instance().get_argument("instruments").split(","):
instrument_type = inst.split(":")[0]
exchange_id = inst.split(":")[1]
instrument_id = inst.split(":")[2]
exch_inst: ExchangeInstrument = Instruments.instance().get_exch_inst(
exch_id=exchange_id, inst_id=instrument_id, src=f"{self.fname()}"
)
exch_inst.user_data_["instrument_type"] = instrument_type
res.append(exch_inst)
return res
async def run(self) -> None:
if App.instance().get_argument("result_db").upper() != "NONE":
print(
f'\nResults stored in database: {App.instance().get_argument("result_db")}'
)
else:
print("No results to display.")
if __name__ == "__main__":
Runner()
App.instance().run()

View File

@ -0,0 +1,127 @@
import argparse
import glob
import importlib
import os
from datetime import date, datetime
from typing import Any, Dict, List, Optional
import pandas as pd
from tools.config import expand_filename, load_config
from tools.data_loader import get_available_instruments_from_db
from pt_trading.results import (
BacktestResult,
create_result_database,
store_config_in_database,
store_results_in_database,
)
from pt_trading.fit_method import PairsTradingFitMethod
from pt_trading.trading_pair import TradingPair
from research.research_tools import create_pairs, resolve_datafiles
def main() -> None:
parser = argparse.ArgumentParser(description="Run pairs trading backtest.")
parser.add_argument(
"--config", type=str, required=True, help="Path to the configuration file."
)
parser.add_argument(
"--datafile",
type=str,
required=False,
help="Market data file to process.",
)
parser.add_argument(
"--instruments",
type=str,
required=False,
help = "Comma-separated list of instrument symbols (e.g., COIN,GBTC). If not provided, auto-detects from database.",
)
args = parser.parse_args()
config: Dict = load_config(args.config)
# Resolve data files (CLI takes priority over config)
datafile = resolve_datafiles(config, args.datafile)[0]
if not datafile:
print("No data files found to process.")
return
print(f"Found {datafile} data files to process:")
# # Create result database if needed
# if args.result_db.upper() != "NONE":
# args.result_db = expand_filename(args.result_db)
# create_result_database(args.result_db)
# # Initialize a dictionary to store all trade results
# all_results: Dict[str, Dict[str, Any]] = {}
# # Store configuration in database for reference
# if args.result_db.upper() != "NONE":
# # Get list of all instruments for storage
# all_instruments = []
# for datafile in datafiles:
# if args.instruments:
# file_instruments = [
# inst.strip() for inst in args.instruments.split(",")
# ]
# else:
# file_instruments = get_available_instruments_from_db(datafile, config)
# all_instruments.extend(file_instruments)
# # Remove duplicates while preserving order
# unique_instruments = list(dict.fromkeys(all_instruments))
# store_config_in_database(
# db_path=args.result_db,
# config_file_path=args.config,
# config=config,
# fit_method_class=fit_method_class_name,
# datafiles=datafiles,
# instruments=unique_instruments,
# )
# Process each data file
stat_model_price = config["stat_model_price"]
print(f"\n====== Processing {os.path.basename(datafile)} ======")
# Determine instruments to use
if args.instruments:
# Use CLI-specified instruments
instruments = [inst.strip() for inst in args.instruments.split(",")]
print(f"Using CLI-specified instruments: {instruments}")
else:
# Auto-detect instruments from database
instruments = get_available_instruments_from_db(datafile, config)
print(f"Auto-detected instruments: {instruments}")
if not instruments:
print(f"No instruments found in {datafile}...")
return
# Process data for this file
try:
cointegration_data: pd.DataFrame = pd.DataFrame()
for pair in create_pairs(datafile, stat_model_price, config, instruments):
cointegration_data = pd.concat([cointegration_data, pair.cointegration_check()])
pd.set_option('display.width', 400)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
print(f"cointegration_data:\n{cointegration_data}")
except Exception as err:
print(f"Error processing {datafile}: {str(err)}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,16 @@
{
"cells": [],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

232
research/pt_backtest.py Normal file
View File

@ -0,0 +1,232 @@
import argparse
import glob
import importlib
import os
from datetime import date, datetime
from typing import Any, Dict, List, Optional, Tuple
import pandas as pd
from research.research_tools import create_pairs
from tools.config import expand_filename, load_config
from pt_trading.results import (
BacktestResult,
create_result_database,
store_config_in_database,
)
from pt_trading.fit_method import PairsTradingFitMethod
from pt_trading.trading_pair import TradingPair
DayT = str
DataFileNameT = str
def resolve_datafiles(
config: Dict, date_pattern: str, instruments: List[Dict[str, str]]
) -> List[Tuple[DayT, DataFileNameT]]:
resolved_files: List[Tuple[DayT, DataFileNameT]] = []
for inst in instruments:
pattern = date_pattern
inst_type = inst["instrument_type"]
data_dir = config["market_data_loading"][inst_type]["data_directory"]
if "*" in pattern or "?" in pattern:
# Handle wildcards
if not os.path.isabs(pattern):
pattern = os.path.join(data_dir, f"{pattern}.mktdata.ohlcv.db")
matched_files = glob.glob(pattern)
for matched_file in matched_files:
import re
match = re.search(r"(\d{8})\.mktdata\.ohlcv\.db$", matched_file)
assert match is not None
day = match.group(1)
resolved_files.append((day, matched_file))
else:
# Handle explicit file path
if not os.path.isabs(pattern):
pattern = os.path.join(data_dir, f"{pattern}.mktdata.ohlcv.db")
resolved_files.append((date_pattern, pattern))
return sorted(list(set(resolved_files))) # Remove duplicates and sort
def get_instruments(args: argparse.Namespace, config: Dict) -> List[Dict[str, str]]:
instruments = [
{
"symbol": inst.split(":")[0],
"instrument_type": inst.split(":")[1],
"exchange_id": inst.split(":")[2],
"instrument_id_pfx": config["market_data_loading"][inst.split(":")[1]][
"instrument_id_pfx"
],
"db_table_name": config["market_data_loading"][inst.split(":")[1]][
"db_table_name"
],
}
for inst in args.instruments.split(",")
]
return instruments
def run_backtest(
config: Dict,
datafiles: List[str],
fit_method: PairsTradingFitMethod,
instruments: List[Dict[str, str]],
) -> BacktestResult:
"""
Run backtest for all pairs using the specified instruments.
"""
bt_result: BacktestResult = BacktestResult(config=config)
# if len(datafiles) < 2:
# print(f"WARNING: insufficient data files: {datafiles}")
# return bt_result
if not all([os.path.exists(datafile) for datafile in datafiles]):
print(f"WARNING: data file {datafiles} does not exist")
return bt_result
pairs_trades = []
pairs = create_pairs(
datafiles=datafiles,
fit_method=fit_method,
config=config,
instruments=instruments,
)
for pair in pairs:
single_pair_trades = fit_method.run_pair(pair=pair, bt_result=bt_result)
if single_pair_trades is not None and len(single_pair_trades) > 0:
pairs_trades.append(single_pair_trades)
print(f"pairs_trades:\n{pairs_trades}")
# Check if result_list has any data before concatenating
if len(pairs_trades) == 0:
print("No trading signals found for any pairs")
return bt_result
bt_result.collect_single_day_results(pairs_trades)
return bt_result
def main() -> None:
parser = argparse.ArgumentParser(description="Run pairs trading backtest.")
parser.add_argument(
"--config", type=str, required=True, help="Path to the configuration file."
)
parser.add_argument(
"--date_pattern",
type=str,
required=True,
help="Date YYYYMMDD, allows * and ? wildcards",
)
parser.add_argument(
"--instruments",
type=str,
required=True,
help="Comma-separated list of instrument symbols (e.g., COIN:EQUITY,GBTC:CRYPTO)",
)
parser.add_argument(
"--result_db",
type=str,
required=True,
help="Path to SQLite database for storing results. Use 'NONE' to disable database output.",
)
args = parser.parse_args()
config: Dict = load_config(args.config)
# Dynamically instantiate fit method class
fit_method = PairsTradingFitMethod.create(config)
# Resolve data files (CLI takes priority over config)
instruments = get_instruments(args, config)
datafiles = resolve_datafiles(config, args.date_pattern, instruments)
days = list(set([day for day, _ in datafiles]))
print(f"Found {len(datafiles)} data files to process:")
for df in datafiles:
print(f" - {df}")
# Create result database if needed
if args.result_db.upper() != "NONE":
args.result_db = expand_filename(args.result_db)
create_result_database(args.result_db)
# Initialize a dictionary to store all trade results
all_results: Dict[str, Dict[str, Any]] = {}
is_config_stored = False
# Process each data file
for day in sorted(days):
md_datafiles = [datafile for md_day, datafile in datafiles if md_day == day]
if not all([os.path.exists(datafile) for datafile in md_datafiles]):
print(f"WARNING: insufficient data files: {md_datafiles}")
continue
print(f"\n====== Processing {day} ======")
if not is_config_stored:
store_config_in_database(
db_path=args.result_db,
config_file_path=args.config,
config=config,
fit_method_class=config["fit_method_class"],
datafiles=datafiles,
instruments=instruments,
)
is_config_stored = True
# Process data for this file
try:
fit_method.reset()
bt_results = run_backtest(
config=config,
datafiles=md_datafiles,
fit_method=fit_method,
instruments=instruments,
)
if bt_results.trades is None or len(bt_results.trades) == 0:
print(f"No trades found for {day}")
continue
# Store results with day name as key
filename = os.path.basename(day)
all_results[filename] = {
"trades": bt_results.trades.copy(),
"outstanding_positions": bt_results.outstanding_positions.copy(),
}
# Store results in database
if args.result_db.upper() != "NONE":
bt_results.calculate_returns(
{
filename: {
"trades": bt_results.trades.copy(),
"outstanding_positions": bt_results.outstanding_positions.copy(),
}
}
)
bt_results.store_results_in_database(db_path=args.result_db, day=day)
print(f"Successfully processed {filename}")
except Exception as err:
print(f"Error processing {day}: {str(err)}")
import traceback
traceback.print_exc()
# Calculate and print results using a new BacktestResult instance for aggregation
if all_results:
aggregate_bt_results = BacktestResult(config=config)
aggregate_bt_results.calculate_returns(all_results)
aggregate_bt_results.print_grand_totals()
aggregate_bt_results.print_outstanding_positions()
if args.result_db.upper() != "NONE":
print(f"\nResults stored in database: {args.result_db}")
else:
print("No results to display.")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,93 @@
import glob
import os
from typing import Dict, List, Optional
import pandas as pd
from pt_trading.fit_method import PairsTradingFitMethod
def resolve_datafiles(config: Dict, cli_datafiles: Optional[str] = None) -> List[str]:
"""
Resolve the list of data files to process.
CLI datafiles take priority over config datafiles.
Supports wildcards in config but not in CLI.
"""
if cli_datafiles:
# CLI override - comma-separated list, no wildcards
datafiles = [f.strip() for f in cli_datafiles.split(",")]
# Make paths absolute relative to data directory
data_dir = config.get("data_directory", "./data")
resolved_files = []
for df in datafiles:
if not os.path.isabs(df):
df = os.path.join(data_dir, df)
resolved_files.append(df)
return resolved_files
# Use config datafiles with wildcard support
config_datafiles = config.get("datafiles", [])
data_dir = config.get("data_directory", "./data")
resolved_files = []
for pattern in config_datafiles:
if "*" in pattern or "?" in pattern:
# Handle wildcards
if not os.path.isabs(pattern):
pattern = os.path.join(data_dir, pattern)
matched_files = glob.glob(pattern)
resolved_files.extend(matched_files)
else:
# Handle explicit file path
if not os.path.isabs(pattern):
pattern = os.path.join(data_dir, pattern)
resolved_files.append(pattern)
return sorted(list(set(resolved_files))) # Remove duplicates and sort
def create_pairs(
datafiles: List[str],
fit_method: PairsTradingFitMethod,
config: Dict,
instruments: List[Dict[str, str]],
) -> List:
from pt_trading.trading_pair import TradingPair
from tools.data_loader import load_market_data
all_indexes = range(len(instruments))
unique_index_pairs = [(i, j) for i in all_indexes for j in all_indexes if i < j]
pairs = []
# Update config to use the specified instruments
config_copy = config.copy()
config_copy["instruments"] = instruments
market_data_df = pd.DataFrame()
extra_minutes = 0
if "execution_price" in config_copy:
extra_minutes = config_copy["execution_price"]["shift"]
for datafile in datafiles:
md_df = load_market_data(
datafile = datafile,
instruments = instruments,
db_table_name = config_copy["market_data_loading"][instruments[0]["instrument_type"]]["db_table_name"],
trading_hours=config_copy["trading_hours"],
extra_minutes=extra_minutes,
)
market_data_df = pd.concat([market_data_df, md_df])
if len(set(market_data_df["symbol"])) != 2: # both symbols must be present for a pair
print(f"WARNING: insufficient data in files: {datafiles}")
return []
for a_index, b_index in unique_index_pairs:
symbol_a=instruments[a_index]["symbol"]
symbol_b=instruments[b_index]["symbol"]
pair = fit_method.create_trading_pair(
config=config_copy,
market_data=market_data_df,
symbol_a=symbol_a,
symbol_b=symbol_b,
)
pairs.append(pair)
return pairs

221
strategy/pair_strategy.py Normal file
View File

@ -0,0 +1,221 @@
import argparse
import asyncio
import glob
import importlib
import os
from datetime import date, datetime
from typing import Any, Dict, List, Optional
import hjson
import pandas as pd
from tools.data_loader import get_available_instruments_from_db, load_market_data
from pt_trading.results import (
BacktestResult,
create_result_database,
store_config_in_database,
store_results_in_database,
)
from pt_trading.fit_methods import PairsTradingFitMethod
from pt_trading.trading_pair import TradingPair
def run_strategy(
config: Dict,
datafile: str,
fit_method: PairsTradingFitMethod,
instruments: List[str],
) -> BacktestResult:
"""
Run backtest for all pairs using the specified instruments.
"""
bt_result: BacktestResult = BacktestResult(config=config)
def _create_pairs(config: Dict, instruments: List[str]) -> List[TradingPair]:
nonlocal datafile
all_indexes = range(len(instruments))
unique_index_pairs = [(i, j) for i in all_indexes for j in all_indexes if i < j]
pairs = []
# Update config to use the specified instruments
config_copy = config.copy()
config_copy["instruments"] = instruments
market_data_df = load_market_data(
datafile=datafile,
exchange_id=config_copy["exchange_id"],
instruments=config_copy["instruments"],
instrument_id_pfx=config_copy["instrument_id_pfx"],
db_table_name=config_copy["db_table_name"],
trading_hours=config_copy["trading_hours"],
)
for a_index, b_index in unique_index_pairs:
pair = fit_method.create_trading_pair(
market_data=market_data_df,
symbol_a=instruments[a_index],
symbol_b=instruments[b_index],
)
pairs.append(pair)
return pairs
pairs_trades = []
for pair in _create_pairs(config, instruments):
single_pair_trades = fit_method.run_pair(
pair=pair, config=config, bt_result=bt_result
)
if single_pair_trades is not None and len(single_pair_trades) > 0:
pairs_trades.append(single_pair_trades)
# Check if result_list has any data before concatenating
if len(pairs_trades) == 0:
print("No trading signals found for any pairs")
return bt_result
result = pd.concat(pairs_trades, ignore_index=True)
result["time"] = pd.to_datetime(result["time"])
result = result.set_index("time").sort_index()
bt_result.collect_single_day_results(result)
return bt_result
def main() -> None:
parser = argparse.ArgumentParser(description="Run pairs trading backtest.")
parser.add_argument(
"--config", type=str, required=True, help="Path to the configuration file."
)
parser.add_argument(
"--datafiles",
type=str,
required=False,
help="Comma-separated list of data files (overrides config). No wildcards supported.",
)
parser.add_argument(
"--instruments",
type=str,
required=False,
help="Comma-separated list of instrument symbols (e.g., COIN,GBTC). If not provided, auto-detects from database.",
)
parser.add_argument(
"--result_db",
type=str,
required=True,
help="Path to SQLite database for storing results. Use 'NONE' to disable database output.",
)
args = parser.parse_args()
config: Dict = load_config(args.config)
# Dynamically instantiate fit method class
fit_method_class_name = config.get("fit_method_class", None)
assert fit_method_class_name is not None
module_name, class_name = fit_method_class_name.rsplit(".", 1)
module = importlib.import_module(module_name)
fit_method = getattr(module, class_name)()
# Resolve data files (CLI takes priority over config)
datafiles = resolve_datafiles(config, args.datafiles)
if not datafiles:
print("No data files found to process.")
return
print(f"Found {len(datafiles)} data files to process:")
for df in datafiles:
print(f" - {df}")
# Create result database if needed
if args.result_db.upper() != "NONE":
create_result_database(args.result_db)
# Initialize a dictionary to store all trade results
all_results: Dict[str, Dict[str, Any]] = {}
# Store configuration in database for reference
if args.result_db.upper() != "NONE":
# Get list of all instruments for storage
all_instruments = []
for datafile in datafiles:
if args.instruments:
file_instruments = [
inst.strip() for inst in args.instruments.split(",")
]
else:
file_instruments = get_available_instruments_from_db(datafile, config)
all_instruments.extend(file_instruments)
# Remove duplicates while preserving order
unique_instruments = list(dict.fromkeys(all_instruments))
store_config_in_database(
db_path=args.result_db,
config_file_path=args.config,
config=config,
fit_method_class=fit_method_class_name,
datafiles=datafiles,
instruments=unique_instruments,
)
# Process each data file
for datafile in datafiles:
print(f"\n====== Processing {os.path.basename(datafile)} ======")
# Determine instruments to use
if args.instruments:
# Use CLI-specified instruments
instruments = [inst.strip() for inst in args.instruments.split(",")]
print(f"Using CLI-specified instruments: {instruments}")
else:
# Auto-detect instruments from database
instruments = get_available_instruments_from_db(datafile, config)
print(f"Auto-detected instruments: {instruments}")
if not instruments:
print(f"No instruments found for {datafile}, skipping...")
continue
# Process data for this file
try:
fit_method.reset()
bt_results = run_strategy(
config=config,
datafile=datafile,
fit_method=fit_method,
instruments=instruments,
)
# Store results with file name as key
filename = os.path.basename(datafile)
all_results[filename] = {"trades": bt_results.trades.copy()}
# Store results in database
if args.result_db.upper() != "NONE":
store_results_in_database(args.result_db, datafile, bt_results)
print(f"Successfully processed {filename}")
except Exception as err:
print(f"Error processing {datafile}: {str(err)}")
import traceback
traceback.print_exc()
# Calculate and print results using a new BacktestResult instance for aggregation
if all_results:
aggregate_bt_results = BacktestResult(config=config)
aggregate_bt_results.calculate_returns(all_results)
aggregate_bt_results.print_grand_totals()
aggregate_bt_results.print_outstanding_positions()
if args.result_db.upper() != "NONE":
print(f"\nResults stored in database: {args.result_db}")
else:
print("No results to display.")
if __name__ == "__main__":
asyncio.run(main())