to discard

My First Commit
.
2025-11-04 18:02:38 +00:00 · 2025-11-04 17:55:08 +00:00 · 2025-10-27 18:46:26 -04:00 · 2025-10-27 18:46:14 -04:00 · 2025-10-27 18:39:51 -04:00 · 2025-10-27 18:36:26 -04:00
60 changed files with 24154 additions and 12190 deletions
--- a/.envrc
+++ b/.envrc
@ -1 +0,0 @@
 source /home/oleg/.pyenv/python3.12-venv/bin/activate
--- a/.gitignore
+++ b/.gitignore
@ -1,12 +1,11 @@
 # SpecStory explanation file
 __pycache__/
 __OLD__/
 .specstory/
 .history/
 .cursorindexingignore
 data
 ####.vscode/
 cvttpy
 # SpecStory explanation file
 .specstory/.what-is-this.md
 results/
 tmp/
--- a/.vscode/.env
+++ b/.vscode/.env
@ -1 +0,0 @@
 PYTHONPATH=/home/oleg/develop
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -1,133 +0,0 @@
 {
    // Use IntelliSense to learn about possible attributes.
    // Hover to view descriptions of existing attributes.
    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Python Debugger: Current File",
            "type": "debugpy",
            "request": "launch",
            "python": "/home/oleg/.pyenv/python3.12-venv/bin/python",
            "program": "${file}",
            "console": "integratedTerminal",
            "env": {
                "PYTHONPATH": "${workspaceFolder}/lib:${workspaceFolder}/.."
            },
        },
        {
            "name": "-------- VECM --------",
        },
        {
            "name": "CRYPTO VECM BACKTEST (optimized)",
            "type": "debugpy",
            "request": "launch",
            "python": "/home/oleg/.pyenv/python3.12-venv/bin/python",
            "program": "${workspaceFolder}/research/backtest.py",
            "args": [
                "--config=http://cloud16.cvtt.vpn:6789/apps/pairs_trading/backtest",
                "--instruments=CRYPTO:BNBSPOT:PAIR-ADA-USDT,CRYPTO:BNBSPOT:PAIR-SOL-USDT",
                "--date_pattern=20250911",
                "--result_db=${workspaceFolder}/research/results/crypto/%T.vecm-opt.ADA-SOL.20250605.crypto_results.db",            
            ],
            "env": {
                "PYTHONPATH": "${workspaceFolder}/..",
                "CONFIG_SERVICE": "cloud16.cvtt.vpn:6789",
                "MODEL_CONFIG": "vecm-opt"
            },
            "console": "integratedTerminal"
        },
        // {
        //     "name": "EQUITY VECM (rolling)",
        //     "type": "debugpy",
        //     "request": "launch",
        //     "python": "/home/oleg/.pyenv/python3.12-venv/bin/python",
        //     "program": "${workspaceFolder}/research/backtest.py",
        //     "args": [
        //         "--config=${workspaceFolder}/configuration/vecm.cfg",
        //         "--instruments=COIN:EQUITY:ALPACA,MSTR:EQUITY:ALPACA",
        //         "--date_pattern=20250605",
        //         "--result_db=${workspaceFolder}/research/results/equity/%T.vecm.COIN-MSTR.20250605.equity_results.db",            
        //     ],
        //     "env": {
        //         "PYTHONPATH": "${workspaceFolder}/lib"
        //     },
        //     "console": "integratedTerminal"
        // },
        // {
        //     "name": "EQUITY-CRYPTO VECM (rolling)",
        //     "type": "debugpy",
        //     "request": "launch",
        //     "python": "/home/oleg/.pyenv/python3.12-venv/bin/python",
        //     "program": "${workspaceFolder}/research/backtest.py",
        //     "args": [
        //         "--config=${workspaceFolder}/configuration/vecm.cfg",
        //         "--instruments=COIN:EQUITY:ALPACA,BTC-USDT:CRYPTO:BNBSPOT",
        //         "--date_pattern=20250605",
        //         "--result_db=${workspaceFolder}/research/results/intermarket/%T.vecm.COIN-BTC.20250601.equity_results.db",            
        //     ],
        //     "env": {
        //         "PYTHONPATH": "${workspaceFolder}/lib"
        //     },
        //     "console": "integratedTerminal"
        // },
        {
            "name": "-------- B a t c h e s --------",
        },
        {
            "name": "CRYPTO OLS Batch (rolling)",
            "type": "debugpy",
            "request": "launch",
            "python": "/home/oleg/.pyenv/python3.12-venv/bin/python",
            "program": "${workspaceFolder}/research/backtest.py",
            "args": [
                "--config=${workspaceFolder}/configuration/ols.cfg",
                "--instruments=ADA-USDT:CRYPTO:BNBSPOT,SOL-USDT:CRYPTO:BNBSPOT",
                "--date_pattern=2025060*",
                "--result_db=${workspaceFolder}/research/results/crypto/%T.ols.ADA-SOL.2025060-.crypto_results.db",            
            ],
            "env": {
                "PYTHONPATH": "${workspaceFolder}/lib"
            },
            "console": "integratedTerminal"
        },
        {
            "name": "CRYPTO VECM Batch (rolling)",
            "type": "debugpy",
            "request": "launch",
            "python": "/home/oleg/.pyenv/python3.12-venv/bin/python",
            "program": "${workspaceFolder}/research/backtest.py",
            "args": [
                "--config=${workspaceFolder}/configuration/vecm.cfg",
                "--instruments=ADA-USDT:CRYPTO:BNBSPOT,SOL-USDT:CRYPTO:BNBSPOT",
                "--date_pattern=2025060*",
                "--result_db=${workspaceFolder}/research/results/crypto/%T.vecm.ADA-SOL.2025060-.crypto_results.db",            
            ],
            "env": {
                "PYTHONPATH": "${workspaceFolder}/lib"
            },
            "console": "integratedTerminal"
        },
        {
            "name": "-------- Viz Test --------",
        },
        {
            "name": "Viz Test",
            "type": "debugpy",
            "request": "launch",
            "python": "/home/oleg/.pyenv/python3.12-venv/bin/python",
            "program": "${workspaceFolder}/tests/viz_test.py",
            "args": [
                "--config=${workspaceFolder}/configuration/ols.cfg",
                "--instruments=ADA-USDT:CRYPTO:BNBSPOT,SOL-USDT:CRYPTO:BNBSPOT",
                "--date_pattern=20250605",
            ],
            "env": {
                "PYTHONPATH": "${workspaceFolder}/lib"
            },
            "console": "integratedTerminal"
        }
    ]
 }
--- a/.vscode/pairs_trading.code-workspace
+++ b/.vscode/pairs_trading.code-workspace
@ -1,10 +0,0 @@
 {
    "folders": [
        {
            "path": ".."
        }
    ],
    "settings": {
        "workbench.colorTheme": "Dracula Theme"
    }
 }
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -1,19 +0,0 @@
 {
    "python.testing.pytestEnabled": true,
    "python.testing.unittestEnabled": false,
    "python.testing.pytestArgs": [
        "unittests"
    ],
    "python.testing.cwd": "${workspaceFolder}",
    "python.testing.autoTestDiscoverOnSaveEnabled": true,
    "python.testing.pytestPath": "python3",
    "python.analysis.extraPaths": [
        "${workspaceFolder}",
        "${workspaceFolder}/..",
        "${workspaceFolder}/unittests"
    ],
    "python.envFile": "${workspaceFolder}/.env",
    "python.testing.debugPort": 3000,
    "python.testing.promptToConfigure": false,
    "python.defaultInterpreterPath": "/home/oleg/.pyenv/python3.12-venv/bin/python"
 }
--- a/1
+++ b/1
@ -1 +0,0 @@
 0.0.7
--- a/apps/pair_selector/pair_selector.py
+++ b/apps/pair_selector/pair_selector.py
@ -1,509 +0,0 @@
 from __future__ import annotations
 import asyncio
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple
 from aiohttp import web
 import numpy as np
 import pandas as pd
 from statsmodels.tsa.stattools import adfuller, coint # type: ignore
 from statsmodels.tsa.vector_ar.vecm import coint_johansen  # type: ignore
 from cvttpy_tools.app import App
 from cvttpy_tools.base import NamedObject
 from cvttpy_tools.config import Config, CvttAppConfig
 from cvttpy_tools.logger import Log
 from cvttpy_tools.timeutils import NanoPerSec, SecPerHour, current_nanoseconds
 from cvttpy_tools.web.rest_client import RESTSender
 from cvttpy_tools.web.rest_service import RestService
 from cvttpy_trading.trading.exchange_config import ExchangeAccounts
 from cvttpy_trading.trading.instrument import ExchangeInstrument
 from cvttpy_trading.trading.mkt_data.md_summary import MdTradesAggregate, MdSummary
 from pairs_trading.apps.pair_selector.renderer import HtmlRenderer
@dataclass
 class InstrumentQuality(NamedObject):
    instrument_: ExchangeInstrument
    record_count_: int
    latest_tstamp_: Optional[pd.Timestamp]
    status_: str
    reason_: str
@dataclass
 class PairStats(NamedObject):
    instrument_a_: ExchangeInstrument
    instrument_b_: ExchangeInstrument
    pvalue_eg_: Optional[float]
    pvalue_adf_: Optional[float]
    pvalue_j_: Optional[float]
    trace_stat_j_: Optional[float]
    rank_eg_: int = 0
    rank_adf_: int = 0
    rank_j_: int = 0
    composite_rank_: int = 0
    def as_dict(self) -> Dict[str, Any]:
        return {
            "instrument_a": self.instrument_a_.instrument_id(),
            "instrument_b": self.instrument_b_.instrument_id(),
            "pvalue_eg": self.pvalue_eg_,
            "pvalue_adf": self.pvalue_adf_,
            "pvalue_j": self.pvalue_j_,
            "trace_stat_j": self.trace_stat_j_,
            "rank_eg": self.rank_eg_,
            "rank_adf": self.rank_adf_,
            "rank_j": self.rank_j_,
            "composite_rank": self.composite_rank_,
        }
 class DataFetcher(NamedObject):
    sender_: RESTSender
    interval_sec_: int
    history_depth_sec_: int
    def __init__(
        self,
        base_url: str,
        interval_sec: int,
        history_depth_sec: int,
    ) -> None:
        self.sender_ = RESTSender(base_url=base_url)
        self.interval_sec_ = interval_sec
        self.history_depth_sec_ = history_depth_sec
    def fetch(
        self, exch_acct: str, inst: ExchangeInstrument
    ) -> List[MdTradesAggregate]:
        rqst_data = {
            "exch_acct": exch_acct,
            "instrument_id": inst.instrument_id(),
            "interval_sec": self.interval_sec_,
            "history_depth_sec": self.history_depth_sec_,
        }
        response = self.sender_.send_post(endpoint="md_summary", post_body=rqst_data)
        if response.status_code not in (200, 201):
            Log.error(
                f"{self.fname()}: error {response.status_code} for {inst.details_short()}: {response.text}"
            )
            return []
        mdsums: List[MdSummary] = MdSummary.from_REST_response(response=response)
        return [
            mdsum.create_md_trades_aggregate(
                exch_acct=exch_acct, exch_inst=inst, interval_sec=self.interval_sec_
            )
            for mdsum in mdsums
        ]
 class QualityChecker(NamedObject):
    interval_sec_: int
    def __init__(self, interval_sec: int) -> None:
        self.interval_sec_ = interval_sec
    def evaluate(
        self, inst: ExchangeInstrument, aggr: List[MdTradesAggregate]
    ) -> InstrumentQuality:
        if len(aggr) == 0:
            return InstrumentQuality(
                instrument_=inst,
                record_count_=0,
                latest_tstamp_=None,
                status_="FAIL",
                reason_="no records",
            )
        aggr_sorted = sorted(aggr, key=lambda a: a.aggr_time_ns_)
        latest_ts = pd.to_datetime(aggr_sorted[-1].aggr_time_ns_, unit="ns", utc=True)
        now_ts = pd.Timestamp.utcnow()
        recency_cutoff = now_ts - pd.Timedelta(seconds=2 * self.interval_sec_)
        if latest_ts <= recency_cutoff:
            return InstrumentQuality(
                instrument_=inst,
                record_count_=len(aggr_sorted),
                latest_tstamp_=latest_ts,
                status_="FAIL",
                reason_=f"stale: latest {latest_ts} <= cutoff {recency_cutoff}",
            )
        gaps_ok, reason = self._check_gaps(aggr_sorted)
        status = "PASS" if gaps_ok else "FAIL"
        return InstrumentQuality(
            instrument_=inst,
            record_count_=len(aggr_sorted),
            latest_tstamp_=latest_ts,
            status_=status,
            reason_=reason,
        )
    def _check_gaps(self, aggr: List[MdTradesAggregate]) -> Tuple[bool, str]:
        NUM_TRADES_THRESHOLD = 50
        if len(aggr) < 2:
            return True, "ok"
        interval_ns = self.interval_sec_ * NanoPerSec
        for idx in range(1, len(aggr)):
            prev = aggr[idx - 1]
            curr = aggr[idx]
            delta = curr.aggr_time_ns_ - prev.aggr_time_ns_
            missing_intervals = int(delta // interval_ns) - 1
            if missing_intervals <= 0:
                continue
            prev_nt = prev.num_trades_
            next_nt = curr.num_trades_
            estimate = self._approximate_num_trades(prev_nt, next_nt)
            if estimate > NUM_TRADES_THRESHOLD:
                return False, (
                    f"gap of {missing_intervals} interval(s), est num_trades={estimate} > {NUM_TRADES_THRESHOLD}"
                )
        return True, "ok"
    @staticmethod
    def _approximate_num_trades(prev_nt: int, next_nt: int) -> float:
        if prev_nt is None and next_nt is None:
            return 0.0
        if prev_nt is None:
            return float(next_nt)
        if next_nt is None:
            return float(prev_nt)
        return (prev_nt + next_nt) / 2.0
 class PairAnalyzer(NamedObject):
    price_field_: str
    interval_sec_: int
    def __init__(self, price_field: str, interval_sec: int) -> None:
        self.price_field_ = price_field
        self.interval_sec_ = interval_sec
    def analyze(
        self, series: Dict[ExchangeInstrument, pd.DataFrame]
    ) -> List[PairStats]:
        instruments = list(series.keys())
        results: List[PairStats] = []
        for i in range(len(instruments)):
            for j in range(i + 1, len(instruments)):
                inst_a = instruments[i]
                inst_b = instruments[j]
                df_a = series[inst_a][["tstamp", "price"]].rename(
                    columns={"price": "price_a"}
                )
                df_b = series[inst_b][["tstamp", "price"]].rename(
                    columns={"price": "price_b"}
                )
                merged = pd.merge(df_a, df_b, on="tstamp", how="inner").sort_values(
                    "tstamp"
                )
                stats = self._compute_stats(inst_a, inst_b, merged)
                if stats:
                    results.append(stats)
        self._rank(results)
        return results
    def _compute_stats(
        self,
        inst_a: ExchangeInstrument,
        inst_b: ExchangeInstrument,
        merged: pd.DataFrame,
    ) -> Optional[PairStats]:
        if len(merged) < 2:
            return None
        px_a = merged["price_a"].astype(float)
        px_b = merged["price_b"].astype(float)
        std_a = float(px_a.std())
        std_b = float(px_b.std())
        if std_a == 0 or std_b == 0:
            return None
        z_a = (px_a - float(px_a.mean())) / std_a
        z_b = (px_b - float(px_b.mean())) / std_b
        p_eg: Optional[float]
        p_adf: Optional[float]
        p_j: Optional[float]
        trace_stat: Optional[float]
        try:
            p_eg = float(coint(z_a, z_b)[1])
        except Exception as exc:
            Log.warning(
                f"{self.fname()}: EG failed for {inst_a.details_short()}/{inst_b.details_short()}: {exc}"
            )
            p_eg = None
        try:
            spread = z_a - z_b
            p_adf = float(adfuller(spread, maxlag=1, regression="c")[1])
        except Exception as exc:
            Log.warning(
                f"{self.fname()}: ADF failed for {inst_a.details_short()}/{inst_b.details_short()}: {exc}"
            )
            p_adf = None
        try:
            data = np.column_stack([z_a, z_b])
            res = coint_johansen(data, det_order=0, k_ar_diff=1)
            trace_stat = float(res.lr1[0])
            cv10, cv5, cv1 = res.cvt[0]
            if trace_stat > cv1:
                p_j = 0.01
            elif trace_stat > cv5:
                p_j = 0.05
            elif trace_stat > cv10:
                p_j = 0.10
            else:
                p_j = 1.0
        except Exception as exc:
            Log.warning(
                f"{self.fname()}: Johansen failed for {inst_a.details_short()}/{inst_b.details_short()}: {exc}"
            )
            p_j = None
            trace_stat = None
        return PairStats(
            instrument_a_=inst_a,
            instrument_b_=inst_b,
            pvalue_eg_=p_eg,
            pvalue_adf_=p_adf,
            pvalue_j_=p_j,
            trace_stat_j_=trace_stat,
        )
    def _rank(self, results: List[PairStats]) -> None:
        self._assign_ranks(results, key=lambda r: r.pvalue_eg_, attr="rank_eg_")
        self._assign_ranks(results, key=lambda r: r.pvalue_adf_, attr="rank_adf_")
        self._assign_ranks(results, key=lambda r: r.pvalue_j_, attr="rank_j_")
        for res in results:
            res.composite_rank_ = res.rank_eg_ + res.rank_adf_ + res.rank_j_
        results.sort(key=lambda r: r.composite_rank_)
    @staticmethod
    def _assign_ranks(results: List[PairStats], key, attr: str) -> None:
        values = [key(r) for r in results]
        sorted_vals = sorted([v for v in values if v is not None])
        for res in results:
            val = key(res)
            if val is None:
                setattr(res, attr, len(sorted_vals) + 1)
                continue
            rank = 1 + sum(1 for v in sorted_vals if v < val)
            setattr(res, attr, rank)
 class PairSelectionEngine(NamedObject):
    config_: object
    instruments_: List[ExchangeInstrument]
    price_field_: str
    fetcher_: DataFetcher
    quality_: QualityChecker
    analyzer_: PairAnalyzer
    interval_sec_: int
    history_depth_sec_: int
    data_quality_cache_: List[InstrumentQuality]
    pair_results_cache_: List[PairStats]
    def __init__(
        self,
        config: Config,
        instruments: List[ExchangeInstrument],
        price_field: str,
    ) -> None:
        self.config_ = config
        self.instruments_ = instruments
        self.price_field_ = price_field
        interval_sec = int(config.get_value("interval_sec", 0))
        history_depth_sec = int(config.get_value("history_depth_hours", 0)) * SecPerHour
        base_url = config.get_value("cvtt_base_url", None)
        assert interval_sec > 0, "interval_sec must be > 0"
        assert history_depth_sec > 0, "history_depth_sec must be > 0"
        assert base_url, "cvtt_base_url must be set"
        self.fetcher_ = DataFetcher(
            base_url=base_url,
            interval_sec=interval_sec,
            history_depth_sec=history_depth_sec,
        )
        self.quality_ = QualityChecker(interval_sec=interval_sec)
        self.analyzer_ = PairAnalyzer(
            price_field=price_field, interval_sec=interval_sec
        )
        self.interval_sec_ = interval_sec
        self.history_depth_sec_ = history_depth_sec
        self.data_quality_cache_ = []
        self.pair_results_cache_ = []
    async def run_once(self) -> None:
        quality_results: List[InstrumentQuality] = []
        price_series: Dict[ExchangeInstrument, pd.DataFrame] = {}
        for inst in self.instruments_:
            exch_acct = inst.user_data_.get("exch_acct") or inst.exchange_id_
            aggr = self.fetcher_.fetch(exch_acct=exch_acct, inst=inst)
            q = self.quality_.evaluate(inst, aggr)
            quality_results.append(q)
            if q.status_ != "PASS":
                continue
            df = self._to_dataframe(aggr, inst)
            if len(df) > 0:
                price_series[inst] = df
        self.data_quality_cache_ = quality_results
        self.pair_results_cache_ = self.analyzer_.analyze(price_series)
    def _to_dataframe(
        self, aggr: List[MdTradesAggregate], inst: ExchangeInstrument
    ) -> pd.DataFrame:
        rows: List[Dict[str, Any]] = []
        for item in aggr:
            rows.append(
                {
                    "tstamp": pd.to_datetime(item.aggr_time_ns_, unit="ns", utc=True),
                    "price": self._extract_price(item, inst),
                    "num_trades": item.num_trades_,
                }
            )
        df = pd.DataFrame(rows)
        return df.sort_values("tstamp").reset_index(drop=True)
    def _extract_price(
        self, aggr: MdTradesAggregate, inst: ExchangeInstrument
    ) -> float:
        price_field = self.price_field_
        # MdTradesAggregate inherits hist bar with fields open_, high_, low_, close_, vwap_
        field_map = {
            "open": aggr.open_,
            "high": aggr.high_,
            "low": aggr.low_,
            "close": aggr.close_,
            "vwap": aggr.vwap_,
        }
        raw = field_map.get(price_field, aggr.close_)
        return inst.get_price(raw)
    def sleep_seconds_until_next_cycle(self) -> float:
        now_ns = current_nanoseconds()
        interval_ns = self.interval_sec_ * NanoPerSec
        next_boundary = (now_ns // interval_ns + 1) * interval_ns
        return max(0.0, (next_boundary - now_ns) / NanoPerSec)
    def quality_dicts(self) -> List[Dict[str, Any]]:
        res: List[Dict[str, Any]] = []
        for q in self.data_quality_cache_:
            res.append(
                {
                    "instrument": q.instrument_.instrument_id(),
                    "record_count": q.record_count_,
                    "latest_tstamp": (
                        q.latest_tstamp_.isoformat() if q.latest_tstamp_ else None
                    ),
                    "status": q.status_,
                    "reason": q.reason_,
                }
            )
        return res
    def pair_dicts(self) -> List[Dict[str, Any]]:
        return [p.as_dict() for p in self.pair_results_cache_]
 class PairSelector(NamedObject):
    instruments_: List[ExchangeInstrument]
    engine_: PairSelectionEngine
    rest_service_: RestService
    def __init__(self) -> None:
        App.instance().add_cmdline_arg("--oneshot", action="store_true", default=False)
        App.instance().add_call(App.Stage.Config, self._on_config())
        App.instance().add_call(App.Stage.Run, self.run())
    async def _on_config(self) -> None:
        cfg = CvttAppConfig.instance()
        self.instruments_ = self._load_instruments(cfg)
        price_field = cfg.get_value("model/stat_model_price", "close")
        self.engine_ = PairSelectionEngine(
            config=cfg,
            instruments=self.instruments_,
            price_field=price_field,
        )
        self.rest_service_ = RestService(config_key="/api/REST")
        self.rest_service_.add_handler("GET", "/data_quality", self._on_data_quality)
        self.rest_service_.add_handler(
            "GET", "/pair_selection", self._on_pair_selection
        )
    def _load_instruments(self, cfg: CvttAppConfig) -> List[ExchangeInstrument]:
        instruments_cfg = cfg.get_value("instruments", [])
        instruments: List[ExchangeInstrument] = []
        assert len(instruments_cfg) >= 2, "at least two instruments required"
        for item in instruments_cfg:
            if isinstance(item, str):
                parts = item.split(":", 1)
                if len(parts) != 2:
                    raise ValueError(f"invalid instrument format: {item}")
                exch_acct, instrument_id = parts
            elif isinstance(item, dict):
                exch_acct = item.get("exch_acct", "")
                instrument_id = item.get("instrument_id", "")
                if not exch_acct or not instrument_id:
                    raise ValueError(f"invalid instrument config: {item}")
            else:
                raise ValueError(f"unsupported instrument entry: {item}")
            exch_inst = ExchangeAccounts.instance().get_exchange_instrument(
                exch_acct=exch_acct, instrument_id=instrument_id
            )
            assert (
                exch_inst is not None
            ), f"no ExchangeInstrument for {exch_acct}:{instrument_id}"
            exch_inst.user_data_["exch_acct"] = exch_acct
            instruments.append(exch_inst)
        return instruments
    async def run(self) -> None:
        oneshot = App.instance().get_argument("oneshot", False)
        while True:
            await self.engine_.run_once()
            if oneshot:
                break
            sleep_for = self.engine_.sleep_seconds_until_next_cycle()
            await asyncio.sleep(sleep_for)
    async def _on_data_quality(self, request: web.Request) -> web.Response:
        fmt = request.query.get("format", "html").lower()
        quality = self.engine_.quality_dicts()
        if fmt == "json":
            return web.json_response(quality)
        return web.Response(
            text=HtmlRenderer.render_data_quality(quality), content_type="text/html"
        )
    async def _on_pair_selection(self, request: web.Request) -> web.Response:
        fmt = request.query.get("format", "html").lower()
        pairs = self.engine_.pair_dicts()
        if fmt == "json":
            return web.json_response(pairs)
        return web.Response(
            text=HtmlRenderer.render_pairs(pairs), content_type="text/html"
        )
 if __name__ == "__main__":
    App()
    CvttAppConfig()
    PairSelector()
    App.instance().run()
--- a/apps/pair_selector/pair_selector_engine.py.md
+++ b/apps/pair_selector/pair_selector_engine.py.md
@ -1,394 +0,0 @@
 ```python
 from __future__ import annotations
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple
 import numpy as np
 import pandas as pd
 from statsmodels.tsa.stattools import adfuller, coint
 from statsmodels.tsa.vector_ar.vecm import coint_johansen
 from statsmodels.tsa.vector_ar.vecm import coint_johansen  # type: ignore
 # ---
 from cvttpy_tools.base import NamedObject
 from cvttpy_tools.config import Config
 from cvttpy_tools.logger import Log
 from cvttpy_tools.timeutils import NanoPerSec, SecPerHour, current_nanoseconds
 from cvttpy_tools.web.rest_client import RESTSender
 # ---
 from cvttpy_trading.trading.instrument import ExchangeInstrument
 from cvttpy_trading.trading.mkt_data.md_summary import MdTradesAggregate, MdSummary
@dataclass
 class InstrumentQuality(NamedObject):
    instrument_: ExchangeInstrument
    record_count_: int
    latest_tstamp_: Optional[pd.Timestamp]
    status_: str
    reason_: str
@dataclass
 class PairStats(NamedObject):
    instrument_a_: ExchangeInstrument
    instrument_b_: ExchangeInstrument
    pvalue_eg_: Optional[float]
    pvalue_adf_: Optional[float]
    pvalue_j_: Optional[float]
    trace_stat_j_: Optional[float]
    rank_eg_: int = 0
    rank_adf_: int = 0
    rank_j_: int = 0
    composite_rank_: int = 0
    def as_dict(self) -> Dict[str, Any]:
        return {
            "instrument_a": self.instrument_a_.instrument_id(),
            "instrument_b": self.instrument_b_.instrument_id(),
            "pvalue_eg": self.pvalue_eg_,
            "pvalue_adf": self.pvalue_adf_,
            "pvalue_j": self.pvalue_j_,
            "trace_stat_j": self.trace_stat_j_,
            "rank_eg": self.rank_eg_,
            "rank_adf": self.rank_adf_,
            "rank_j": self.rank_j_,
            "composite_rank": self.composite_rank_,
        }
 class DataFetcher(NamedObject):
    sender_: RESTSender
    interval_sec_: int
    history_depth_sec_: int
    def __init__(
        self,
        base_url: str,
        interval_sec: int,
        history_depth_sec: int,
    ) -> None:
        self.sender_ = RESTSender(base_url=base_url)
        self.interval_sec_ = interval_sec
        self.history_depth_sec_ = history_depth_sec
    def fetch(self, exch_acct: str, inst: ExchangeInstrument) -> List[MdTradesAggregate]:
        rqst_data = {
            "exch_acct": exch_acct,
            "instrument_id": inst.instrument_id(),
            "interval_sec": self.interval_sec_,
            "history_depth_sec": self.history_depth_sec_,
        }
        response = self.sender_.send_post(endpoint="md_summary", post_body=rqst_data)
        if response.status_code not in (200, 201):
            Log.error(
                f"{self.fname()}: error {response.status_code} for {inst.details_short()}: {response.text}")
            return []
        mdsums: List[MdSummary] = MdSummary.from_REST_response(response=response)
        return [
            mdsum.create_md_trades_aggregate(
                exch_acct=exch_acct, exch_inst=inst, interval_sec=self.interval_sec_
            )
            for mdsum in mdsums
        ]
 class QualityChecker(NamedObject):
    interval_sec_: int
    def __init__(self, interval_sec: int) -> None:
        self.interval_sec_ = interval_sec
    def evaluate(self, inst: ExchangeInstrument, aggr: List[MdTradesAggregate]) -> InstrumentQuality:
        if len(aggr) == 0:
            return InstrumentQuality(
                instrument_=inst,
                record_count_=0,
                latest_tstamp_=None,
                status_="FAIL",
                reason_="no records",
            )
        aggr_sorted = sorted(aggr, key=lambda a: a.aggr_time_ns_)
        latest_ts = pd.to_datetime(aggr_sorted[-1].aggr_time_ns_, unit="ns", utc=True)
        now_ts = pd.Timestamp.utcnow()
        recency_cutoff = now_ts - pd.Timedelta(seconds=2 * self.interval_sec_)
        if latest_ts <= recency_cutoff:
            return InstrumentQuality(
                instrument_=inst,
                record_count_=len(aggr_sorted),
                latest_tstamp_=latest_ts,
                status_="FAIL",
                reason_=f"stale: latest {latest_ts} <= cutoff {recency_cutoff}",
            )
        gaps_ok, reason = self._check_gaps(aggr_sorted)
        status = "PASS" if gaps_ok else "FAIL"
        return InstrumentQuality(
            instrument_=inst,
            record_count_=len(aggr_sorted),
            latest_tstamp_=latest_ts,
            status_=status,
            reason_=reason,
        )
    def _check_gaps(self, aggr: List[MdTradesAggregate]) -> Tuple[bool, str]:
        NUM_TRADES_THRESHOLD = 50
        if len(aggr) < 2:
            return True, "ok"
        interval_ns = self.interval_sec_ * NanoPerSec
        for idx in range(1, len(aggr)):
            prev = aggr[idx - 1]
            curr = aggr[idx]
            delta = curr.aggr_time_ns_ - prev.aggr_time_ns_
            missing_intervals = int(delta // interval_ns) - 1
            if missing_intervals <= 0:
                continue
            prev_nt = prev.num_trades_
            next_nt = curr.num_trades_
            estimate = self._approximate_num_trades(prev_nt, next_nt)
            if estimate > NUM_TRADES_THRESHOLD:
                return False, (
                    f"gap of {missing_intervals} interval(s), est num_trades={estimate} > {NUM_TRADES_THRESHOLD}"
                )
        return True, "ok"
    @staticmethod
    def _approximate_num_trades(prev_nt: int, next_nt: int) -> float:
        if prev_nt is None and next_nt is None:
            return 0.0
        if prev_nt is None:
            return float(next_nt)
        if next_nt is None:
            return float(prev_nt)
        return (prev_nt + next_nt) / 2.0
 class PairAnalyzer(NamedObject):
    price_field_: str
    interval_sec_: int
    def __init__(self, price_field: str, interval_sec: int) -> None:
        self.price_field_ = price_field
        self.interval_sec_ = interval_sec
    def analyze(self, series: Dict[ExchangeInstrument, pd.DataFrame]) -> List[PairStats]:
        instruments = list(series.keys())
        results: List[PairStats] = []
        for i in range(len(instruments)):
            for j in range(i + 1, len(instruments)):
                inst_a = instruments[i]
                inst_b = instruments[j]
                df_a = series[inst_a][["tstamp", "price"]].rename(
                    columns={"price": "price_a"}
                )
                df_b = series[inst_b][["tstamp", "price"]].rename(
                    columns={"price": "price_b"}
                )
                merged = pd.merge(df_a, df_b, on="tstamp", how="inner").sort_values(
                    "tstamp"
                )
                stats = self._compute_stats(inst_a, inst_b, merged)
                if stats:
                    results.append(stats)
        self._rank(results)
        return results
    def _compute_stats(
        self,
        inst_a: ExchangeInstrument,
        inst_b: ExchangeInstrument,
        merged: pd.DataFrame,
    ) -> Optional[PairStats]:
        if len(merged) < 2:
            return None
        px_a = merged["price_a"].astype(float)
        px_b = merged["price_b"].astype(float)
        std_a = float(px_a.std())
        std_b = float(px_b.std())
        if std_a == 0 or std_b == 0:
            return None
        z_a = (px_a - float(px_a.mean())) / std_a
        z_b = (px_b - float(px_b.mean())) / std_b
        p_eg: Optional[float]
        p_adf: Optional[float]
        p_j: Optional[float]
        trace_stat: Optional[float]
        try:
            p_eg = float(coint(z_a, z_b)[1])
        except Exception as exc:
            Log.warning(f"{self.fname()}: EG failed for {inst_a.details_short()}/{inst_b.details_short()}: {exc}")
            p_eg = None
        try:
            spread = z_a - z_b
            p_adf = float(adfuller(spread, maxlag=1, regression="c")[1])
        except Exception as exc:
            Log.warning(f"{self.fname()}: ADF failed for {inst_a.details_short()}/{inst_b.details_short()}: {exc}")
            p_adf = None
        try:
            data = np.column_stack([z_a, z_b])
            res = coint_johansen(data, det_order=0, k_ar_diff=1)
            trace_stat = float(res.lr1[0])
            cv10, cv5, cv1 = res.cvt[0]
            if trace_stat > cv1:
                p_j = 0.01
            elif trace_stat > cv5:
                p_j = 0.05
            elif trace_stat > cv10:
                p_j = 0.10
            else:
                p_j = 1.0
        except Exception as exc:
            Log.warning(f"{self.fname()}: Johansen failed for {inst_a.details_short()}/{inst_b.details_short()}: {exc}")
            p_j = None
            trace_stat = None
        return PairStats(
            instrument_a_=inst_a,
            instrument_b_=inst_b,
            pvalue_eg_=p_eg,
            pvalue_adf_=p_adf,
            pvalue_j_=p_j,
            trace_stat_j_=trace_stat,
        )
    def _rank(self, results: List[PairStats]) -> None:
        self._assign_ranks(results, key=lambda r: r.pvalue_eg_, attr="rank_eg_")
        self._assign_ranks(results, key=lambda r: r.pvalue_adf_, attr="rank_adf_")
        self._assign_ranks(results, key=lambda r: r.pvalue_j_, attr="rank_j_")
        for res in results:
            res.composite_rank_ = res.rank_eg_ + res.rank_adf_ + res.rank_j_
        results.sort(key=lambda r: r.composite_rank_)
    @staticmethod
    def _assign_ranks(
        results: List[PairStats], key, attr: str
    ) -> None:
        values = [key(r) for r in results]
        sorted_vals = sorted([v for v in values if v is not None])
        for res in results:
            val = key(res)
            if val is None:
                setattr(res, attr, len(sorted_vals) + 1)
                continue
            rank = 1 + sum(1 for v in sorted_vals if v < val)
            setattr(res, attr, rank)
 class PairSelectionEngine(NamedObject):
    config_: object
    instruments_: List[ExchangeInstrument]
    price_field_: str
    fetcher_: DataFetcher
    quality_: QualityChecker
    analyzer_: PairAnalyzer
    interval_sec_: int
    history_depth_sec_: int
    data_quality_cache_: List[InstrumentQuality]
    pair_results_cache_: List[PairStats]
    def __init__(
        self,
        config: Config,
        instruments: List[ExchangeInstrument],
        price_field: str,
    ) -> None:
        self.config_ = config
        self.instruments_ = instruments
        self.price_field_ = price_field
        interval_sec = int(config.get_value("interval_sec", 0))
        history_depth_sec = int(config.get_value("history_depth_hours", 0)) * SecPerHour
        base_url = config.get_value("cvtt_base_url", None)
        assert interval_sec > 0, "interval_sec must be > 0"
        assert history_depth_sec > 0, "history_depth_sec must be > 0"
        assert base_url, "cvtt_base_url must be set"
        self.fetcher_ = DataFetcher(
            base_url=base_url,
            interval_sec=interval_sec,
            history_depth_sec=history_depth_sec,
        )
        self.quality_ = QualityChecker(interval_sec=interval_sec)
        self.analyzer_ = PairAnalyzer(price_field=price_field, interval_sec=interval_sec)
        self.interval_sec_ = interval_sec
        self.history_depth_sec_ = history_depth_sec
        self.data_quality_cache_ = []
        self.pair_results_cache_ = []
    async def run_once(self) -> None:
        quality_results: List[InstrumentQuality] = []
        price_series: Dict[ExchangeInstrument, pd.DataFrame] = {}
        for inst in self.instruments_:
            exch_acct = inst.user_data_.get("exch_acct") or inst.exchange_id_
            aggr = self.fetcher_.fetch(exch_acct=exch_acct, inst=inst)
            q = self.quality_.evaluate(inst, aggr)
            quality_results.append(q)
            if q.status_ != "PASS":
                continue
            df = self._to_dataframe(aggr, inst)
            if len(df) > 0:
                price_series[inst] = df
        self.data_quality_cache_ = quality_results
        self.pair_results_cache_ = self.analyzer_.analyze(price_series)
    def _to_dataframe(self, aggr: List[MdTradesAggregate], inst: ExchangeInstrument) -> pd.DataFrame:
        rows: List[Dict[str, Any]] = []
        for item in aggr:
            rows.append(
                {
                    "tstamp": pd.to_datetime(item.aggr_time_ns_, unit="ns", utc=True),
                    "price": self._extract_price(item, inst),
                    "num_trades": item.num_trades_,
                }
            )
        df = pd.DataFrame(rows)
        return df.sort_values("tstamp").reset_index(drop=True)
    def _extract_price(self, aggr: MdTradesAggregate, inst: ExchangeInstrument) -> float:
        price_field = self.price_field_
        # MdTradesAggregate inherits hist bar with fields open_, high_, low_, close_, vwap_
        field_map = {
            "open": aggr.open_,
            "high": aggr.high_,
            "low": aggr.low_,
            "close": aggr.close_,
            "vwap": aggr.vwap_,
        }
        raw = field_map.get(price_field, aggr.close_)
        return inst.get_price(raw)
    def sleep_seconds_until_next_cycle(self) -> float:
        now_ns = current_nanoseconds()
        interval_ns = self.interval_sec_ * NanoPerSec
        next_boundary = (now_ns // interval_ns + 1) * interval_ns
        return max(0.0, (next_boundary - now_ns) / NanoPerSec)
    def quality_dicts(self) -> List[Dict[str, Any]]:
        res: List[Dict[str, Any]] = []
        for q in self.data_quality_cache_:
            res.append(
                {
                    "instrument": q.instrument_.instrument_id(),
                    "record_count": q.record_count_,
                    "latest_tstamp": q.latest_tstamp_.isoformat() if q.latest_tstamp_ else None,
                    "status": q.status_,
                    "reason": q.reason_,
                }
            )
        return res
    def pair_dicts(self) -> List[Dict[str, Any]]:
        return [p.as_dict() for p in self.pair_results_cache_]
 ```
--- a/apps/pair_selector/renderer.py
+++ b/apps/pair_selector/renderer.py
@ -1,140 +0,0 @@
 from __future__ import annotations
 from typing import Any, Dict, List
 from cvttpy_tools.app import App
 from cvttpy_tools.base import NamedObject
 from cvttpy_tools.config import CvttAppConfig
 class HtmlRenderer(NamedObject):
    def __init__(self) -> None:
        pass
    @staticmethod
    def render_data_quality(quality: List[Dict[str, Any]]) -> str:
        rows = "".join(
            f"<tr>"
            f"<td>{q.get('instrument','')}</td>"
            f"<td>{q.get('record_count','')}</td>"
            f"<td>{q.get('latest_tstamp','')}</td>"
            f"<td>{q.get('status','')}</td>"
            f"<td>{q.get('reason','')}</td>"
            f"</tr>"
            for q in sorted(quality, key=lambda x: str(x.get("instrument", "")))
        )
        return f"""
 <!DOCTYPE html>
 <html>
 <head>
    <meta charset='utf-8'/>
    <title>Data Quality</title>
    <style>
        body {{ font-family: Arial, sans-serif; margin: 20px; }}
        table {{ border-collapse: collapse; width: 100%; }}
        th, td {{ border: 1px solid #ccc; padding: 8px; text-align: left; }}
        th {{ background: #f2f2f2; }}
    </style>
 </head>
 <body>
    <h2>Data Quality</h2>
    <table>
        <thead>
            <tr><th>Instrument</th><th>Records</th><th>Latest</th><th>Status</th><th>Reason</th></tr>
        </thead>
        <tbody>{rows}</tbody>
    </table>
 </body>
 </html>
 """
    @staticmethod
    def render_pairs(pairs: List[Dict[str, Any]]) -> str:
        if not pairs:
            body = "<p>No pairs available. Check data quality and try again.</p>"
        else:
            body_rows = []
            for p in pairs:
                body_rows.append(
                    "<tr>"
                    f"<td>{p.get('instrument_a','')}</td>"
                    f"<td>{p.get('instrument_b','')}</td>"
                    f"<td data-value='{p.get('rank_eg',0)}'>{p.get('rank_eg','')}</td>"
                    f"<td data-value='{p.get('rank_adf',0)}'>{p.get('rank_adf','')}</td>"
                    f"<td data-value='{p.get('rank_j',0)}'>{p.get('rank_j','')}</td>"
                    f"<td data-value='{p.get('pvalue_eg','')}'>{p.get('pvalue_eg','')}</td>"
                    f"<td data-value='{p.get('pvalue_adf','')}'>{p.get('pvalue_adf','')}</td>"
                    f"<td data-value='{p.get('pvalue_j','')}'>{p.get('pvalue_j','')}</td>"
                    "</tr>"
                )
            body = "\n".join(body_rows)
        return f"""
 <!DOCTYPE html>
 <html>
 <head>
    <meta charset='utf-8'/>
    <title>Pair Selection</title>
    <style>
        body {{ font-family: Arial, sans-serif; margin: 20px; }}
        table {{ border-collapse: collapse; width: 100%; }}
        th, td {{ border: 1px solid #ccc; padding: 8px; text-align: left; }}
        th.sortable {{ cursor: pointer; background: #f2f2f2; }}
    </style>
 </head>
 <body>
    <h2>Pair Selection</h2>
    <table id="pairs-table">
        <thead>
            <tr>
                <th>Instrument A</th>
                <th>Instrument B</th>
                <th class="sortable" data-type="num">Rank-EG</th>
                <th class="sortable" data-type="num">Rank-ADF</th>
                <th class="sortable" data-type="num">Rank-J</th>
                <th>EG p-value</th>
                <th>ADF p-value</th>
                <th>Johansen pseudo p</th>
            </tr>
        </thead>
        <tbody>
            {body}
        </tbody>
    </table>
    <script>
    (function() {{
        const table = document.getElementById('pairs-table');
        if (!table) return;
        const getValue = (cell) => {{
            const val = cell.getAttribute('data-value');
            const num = parseFloat(val);
            return isNaN(num) ? val : num;
        }};
        const toggleSort = (index, isNumeric) => {{
            const tbody = table.querySelector('tbody');
            const rows = Array.from(tbody.querySelectorAll('tr'));
            const th = table.querySelectorAll('th')[index];
            const dir = th.getAttribute('data-dir') === 'asc' ? 'desc' : 'asc';
            th.setAttribute('data-dir', dir);
            rows.sort((a, b) => {{
                const va = getValue(a.children[index]);
                const vb = getValue(b.children[index]);
                if (isNumeric && !isNaN(va) && !isNaN(vb)) {{
                    return dir === 'asc' ? va - vb : vb - va;
                }}
                return dir === 'asc'
                    ? String(va).localeCompare(String(vb))
                    : String(vb).localeCompare(String(va));
            }});
            tbody.innerHTML = '';
            rows.forEach(r => tbody.appendChild(r));
        }};
        table.querySelectorAll('th.sortable').forEach((th, idx) => {{
            th.addEventListener('click', () => toggleSort(idx, th.dataset.type === 'num'));
        }});
    }})();
    </script>
 </body>
 </html>
 """
--- a/apps/pair_trader.py
+++ b/apps/pair_trader.py
@ -1,169 +0,0 @@
 from __future__ import annotations
 import asyncio
 from typing import Callable, Coroutine, Dict, List
 import aiohttp.web as web
 from cvttpy_tools.app import App
 from cvttpy_tools.config import Config
 from cvttpy_tools.base import NamedObject
 from cvttpy_tools.config import CvttAppConfig
 from cvttpy_tools.logger import Log
 from cvttpy_tools.settings.cvtt_types import BookIdT
 from cvttpy_tools.web.rest_service import RestService
 # ---
 from cvttpy_trading.trading.instrument import ExchangeInstrument
 from cvttpy_trading.trading.mkt_data.md_summary import MdTradesAggregate
 from cvttpy_trading.trading.exchange_config import ExchangeAccounts
 # ---
 from pairs_trading.lib.live.mkt_data_client import CvttRestMktDataClient
 '''
 config http://cloud16.cvtt.vpn/apps/pairs_trading
 '''
 HistMdCbT = Callable[[List[MdTradesAggregate]], Coroutine]
 UpdateMdCbT = Callable[[MdTradesAggregate], Coroutine]
 class PairTrader(NamedObject):
    config_: CvttAppConfig
    instruments_: List[ExchangeInstrument]
    book_id_: BookIdT
    live_strategy_: "PtLiveStrategy" #type: ignore
    ti_sender_: "TradingInstructionsSender" #type: ignore
    pricer_client_: CvttRestMktDataClient
    rest_service_: RestService
    latest_history_: Dict[ExchangeInstrument, List[MdTradesAggregate]]
    def __init__(self) -> None:
        self.instruments_ = []
        self.latest_history_ = {}
        App.instance().add_cmdline_arg(
            "--instrument_A",
            type=str,
            required=True,
            help=(
                " Instrument A in pair (e.g., COINBASE_AT:PAIR-BTC-USD)"
            ),
        )
        App.instance().add_cmdline_arg(
            "--instrument_B",
            type=str,
            required=True,
            help=(
                " Instrument B in pair (e.g., COINBASE_AT:PAIR-ETH-USD)"
            ),
        )
        App.instance().add_cmdline_arg(
            "--book_id",
            type=str,
            required=True,
            help="Book ID"
        )
        App.instance().add_call(App.Stage.Config, self._on_config())
        App.instance().add_call(App.Stage.Run, self.run())
    async def _on_config(self) -> None:
        self.config_ = CvttAppConfig.instance()
        self.book_id_ = App.instance().get_argument(name="book_id")
        # ------- PARSE INSTRUMENTS -------
        instr_list: List[str] = []
        instr_str = App.instance().get_argument("instrument_A", "")
        assert instr_str != "", "Missing insrument A" 
        instr_list.append(instr_str)
        instr_str = App.instance().get_argument("instrument_B", "")
        assert instr_str != "", "Missing insrument B" 
        instr_list.append(instr_str)
        for instr in instr_list:
            instr_parts = instr.split(":")
            if len(instr_parts) != 2:
                raise ValueError(f"Invalid pair format: {instr}")   
            exch_acct = instr_parts[0]
            instrument_id = instr_parts[1]
            exch_inst = ExchangeAccounts.instance().get_exchange_instrument(exch_acct=exch_acct, instrument_id=instrument_id)
            assert exch_inst is not None, f"No ExchangeInstrument for {instr}"
            exch_inst.user_data_["exch_acct"] = exch_acct
            self.instruments_.append(exch_inst)
        Log.info(f"{self.fname()} Instruments: {self.instruments_[0].details_short()} <==> {self.instruments_[1].details_short()}")
        # ------- CREATE STRATEGY -------
        from pairs_trading.lib.pt_strategy.live.live_strategy import PtLiveStrategy
        strategy_config = CvttAppConfig.instance() #self.config_.get_subconfig("strategy_config", Config({}))
        self.live_strategy_ = PtLiveStrategy(
            config=strategy_config,
            pairs_trader=self,
        )
        Log.info(f"{self.fname()} Strategy created: {self.live_strategy_}")  
        model_name = self.config_.get_value("model/name", "?model/name?")
        self.config_.set_value("strategy_id", f"{self.live_strategy_.__class__.__name__}:{model_name}")
        # # ------- CREATE PRICER CLIENT -------
        self.pricer_client_ = CvttRestMktDataClient(config=self.config_)
        Log.info(f"{self.fname()} MD client created: {self.pricer_client_}")  
        # ------- CREATE TRADER CLIENT -------
        from pairs_trading.lib.live.ti_sender import TradingInstructionsSender
        self.ti_sender_ = TradingInstructionsSender(config=self.config_, pairs_trader=self)
        Log.info(f"{self.fname()} TI sender created: {self.ti_sender_}")
        # # ------- CREATE REST SERVER -------
        self.rest_service_ = RestService(
            config_key=f"/api/REST"
        )
        # --- Strategy Handlers
        self.rest_service_.add_handler(
            method="POST",
            url="/api/strategy",
            handler=self._on_api_request,
        )
    async def subscribe_md(self) -> None:
        from functools import partial
        for exch_inst in self.instruments_:
            exch_acct = exch_inst.user_data_.get("exch_acct", "?exch_acct?")
            instrument_id = exch_inst.instrument_id()
            await self.pricer_client_.add_subscription(
                exch_acct=exch_acct,
                instrument_id=instrument_id,
                interval_sec=self.live_strategy_.interval_sec(),
                history_depth_sec=self.live_strategy_.history_depth_sec(),
                callback=partial(self._on_md_summary, exch_inst=exch_inst)
            )
    async def _on_md_summary(self, history: List[MdTradesAggregate], exch_inst: ExchangeInstrument) -> None:
        Log.info(f"{self.fname()}: got {exch_inst.details_short()} data")
        self.latest_history_[exch_inst] = history
        if len(self.latest_history_) == 2:
            from itertools import chain
            all_aggrs = sorted(list(chain.from_iterable(self.latest_history_.values())), key=lambda X: X.aggr_time_ns_)
            await self.live_strategy_.on_mkt_data_hist_snapshot(hist_aggr=all_aggrs)
            self.latest_history_ = {}
    async def _on_api_request(self, request: web.Request) -> web.Response:
        # TODO choose pair
        # TODO confirm chosen pair (after selection is implemented)
        return web.Response() # TODO  API request handler implementation
    async def run(self) -> None:
        Log.info(f"{self.fname()} ...")
        while True:
            await asyncio.sleep(0.1)
        pass
 if __name__ == "__main__":
    App()
    CvttAppConfig()
    PairTrader()
    App.instance().run()
--- a/build-dist.sh
+++ b/build-dist.sh
@ -1,186 +0,0 @@
 #!/usr/bin/env bash
 # ---------------- Settings
 repo=git@cloud21.cvtt.vpn:/works/git/cvtt2/research/pairs_trading.git
 dist_root=/home/cvttdist/software/cvtt2
 dist_user=cvttdist
 dist_host="cloud21.cvtt.vpn"
 dist_ssh_port="22"
 dist_locations="cloud21.cvtt.vpn:22 hs01.cvtt.vpn:22"
 version_file="VERSION"
 prj=pairs_trading
 brnch=master
 interactive=N
 # ---------------- Settings
 # ---------------- cmdline
 usage() {
  echo "Usage: $0 [-b <branch (master)> -i (interactive)"
  exit 1
 }
 while getopts "b:i" opt; do
  case ${opt} in
    b )
      brnch=$OPTARG
      ;;
    i )
      interactive=Y
      ;;
    \? )
      echo "Invalid option: -$OPTARG" >&2
      usage
      ;;
    : )
      echo "Option -$OPTARG requires an argument." >&2
      usage
      ;;
  esac
 done
 # ---------------- cmdline
 confirm() {
    if [ "${interactive}" == "Y" ]; then
        echo "--------------------------------"
        echo -n "Press <Enter> to continue" && read
    fi
 }
 if [ "${interactive}" == "Y" ]; then
    echo -n "Enter project [${prj}]: "
    read project
    if [ "${project}" == "" ]
    then
        project=${prj}
    fi
 else
    project=${prj}
 fi
 # repo=${git_repo_arr[${project}]}
 if [ -z ${repo} ]; then
    echo "ERROR: Project repository for ${project} not found"
    exit -1
 fi
 echo "Project repo: ${repo}"
 if [ "${interactive}" == "Y" ]; then
    echo -n "Enter branch to build release from [${brnch}]: "
    read branch
    if [ "${branch}" == "" ]
    then
        branch=${brnch}
    fi
 else
    branch=${brnch}
 fi
 tmp_dir=$(mktemp -d)
 function cleanup {
    cd ${HOME}
    rm -rf ${tmp_dir}
 }
 trap cleanup EXIT
 prj_dir="${tmp_dir}/${prj}"
 cmd_arr=()
 Cmd="git clone ${repo} ${prj_dir}"
 cmd_arr+=("${Cmd}")
 Cmd="cd ${prj_dir}"
 cmd_arr+=("${Cmd}")
 if [ "${interactive}" == "Y" ]; then
    echo "------------------------------------"
    echo "The following commands will execute:"
    echo "------------------------------------"
    for cmd in "${cmd_arr[@]}"
    do
        echo ${cmd}
    done
 fi
 confirm
 for cmd in "${cmd_arr[@]}"
 do
    echo ${cmd} && eval ${cmd}
 done
 Cmd="git checkout ${branch}"
 echo ${Cmd} && eval ${Cmd}
 if [ "${?}" != "0" ]; then
    echo "ERROR: Branch ${branch} is not found"
    cd ${HOME} && rm -rf ${tmp_dir}
    exit -1
 fi
 release_version=$(cat ${version_file} | awk -F',' '{print $1}')
 whats_new=$(cat ${version_file} | awk -F',' '{print $2}')
 echo "--------------------------------"
 echo "Version file: ${version_file}"
 echo "Release version: ${release_version}"
 confirm
 version_tag="v${release_version}"
 if [ "$(git tag -l "${version_tag}")" != "" ]; then
    version_tag="${version_tag}.$(date +%Y%m%d_%H%M)"
 fi
 version_comment="'${version_tag} ${project} ${branch} $(date +%Y-%m-%d)\n${whats_new}'"
 cmd_arr=()
 Cmd="git tag -a ${version_tag} -m ${version_comment}"
 cmd_arr+=("${Cmd}")
 Cmd="git push origin --tags"
 cmd_arr+=("${Cmd}")
 Cmd="rm -rf .git"
 cmd_arr+=("${Cmd}")
 SourceLoc=../${project}
 dist_path="${dist_root}/${project}/${release_version}"
 for dist_loc in ${dist_locations}; do
    dhp=(${dist_loc//:/ })
    dist_host=${dhp[0]}
    dist_port=${dhp[1]}
    Cmd="rsync -avzh"
    Cmd="${Cmd} --rsync-path=\"mkdir -p ${dist_path}"
    Cmd="${Cmd} && rsync\" -e \"ssh -p ${dist_ssh_port}\""
    Cmd="${Cmd} $SourceLoc ${dist_user}@${dist_host}:${dist_path}/"
    cmd_arr+=("${Cmd}")
 done
 if [ "${interactive}" == "Y" ]; then
    echo "------------------------------------"
    echo "The following commands will execute:"
    echo "------------------------------------"
    for cmd in "${cmd_arr[@]}"
    do
        echo ${cmd}
    done
 fi
 confirm
 for cmd in "${cmd_arr[@]}"
 do
    pwd && echo ${cmd} && eval ${cmd}
 done
 echo "$0 Done ${project} ${release_version}"
--- a/configuration/DELETE/ols-opt.cfg
+++ b/configuration/DELETE/ols-opt.cfg
@ -1,47 +0,0 @@
 {
    "market_data_loading": {
        "CRYPTO": {
            "data_directory": "./data/crypto",
            "db_table_name": "md_1min_bars",
            "instrument_id_pfx": "PAIR-",
        },
        "EQUITY": {
            "data_directory": "./data/equity",
            "db_table_name": "md_1min_bars",
            "instrument_id_pfx": "STOCK-",
        }
    },
    # ====== Funding ======
    "funding_per_pair": 2000.0,
    # ====== Trading Parameters ======
    "stat_model_price": "close",
    "execution_price": {
        "column": "vwap",
        "shift": 1,
    },
    "dis-equilibrium_open_trshld": 1.75,
    "dis-equilibrium_close_trshld": 0.9,
    "model_class": "pairs_trading.lib.pt_strategy.models.OLSModel",
    # "model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.EGOptimizedWndDataPolicy",
    # "model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.ADFOptimizedWndDataPolicy",
    "model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.JohansenOptdWndDataPolicy",
    "min_training_size": 60,
    "max_training_size": 150,
    # ====== Stop Conditions ======
    "stop_close_conditions": {
        "profit": 2.0,
        "loss": -0.5
    }
    # ====== End of Session Closeout ======
    "close_outstanding_positions": true,
    # "close_outstanding_positions": false,
    "trading_hours": {
        "timezone": "America/New_York",
        "begin_session": "7:30:00",
        "end_session": "18:30:00",
    }
 }
--- a/configuration/DELETE/ols.cfg
+++ b/configuration/DELETE/ols.cfg
@ -1,47 +0,0 @@
 {
    "market_data_loading": {
        "CRYPTO": {
            "data_directory": "./data/crypto",
            "db_table_name": "md_1min_bars",
            "instrument_id_pfx": "PAIR-",
        },
        "EQUITY": {
            "data_directory": "./data/equity",
            "db_table_name": "md_1min_bars",
            "instrument_id_pfx": "STOCK-",
        }
    },
    # ====== Funding ======
    "funding_per_pair": 2000.0,
    # ====== Trading Parameters ======
    "stat_model_price": "close",
    "execution_price": {
        "column": "vwap",
        "shift": 1,
    },
    "dis-equilibrium_open_trshld": 1.75,
    "dis-equilibrium_close_trshld": 0.9,
    "model_class": "pairs_trading.lib.pt_strategy.models.OLSModel",
    "training_size": 120,
    "model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.RollingWindowDataPolicy",
    # "model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.OptimizedWindowDataPolicy",
    # "min_training_size": 60,
    # "max_training_size": 150,
    # ====== Stop Conditions ======
    "stop_close_conditions": {
        "profit": 2.0,
        "loss": -0.5
    }
    # ====== End of Session Closeout ======
    "close_outstanding_positions": true,
    # "close_outstanding_positions": false,
    "trading_hours": {
        "timezone": "America/New_York",
        "begin_session": "7:30:00",
        "end_session": "18:30:00",
    }
 }
--- a/configuration/backtest.cfg
+++ b/configuration/backtest.cfg
@ -1,46 +0,0 @@
 {
    "refdata": {
        "assets": @inc=http://@env{CONFIG_SERVICE}/refdata/assets
        , "instruments": @inc=http://@env{CONFIG_SERVICE}/refdata/instruments
        , "exchange_instruments": @inc=http://@env{CONFIG_SERVICE}/refdata/exchange_instruments
        , "dynamic_instrument_exchanges": ["ALPACA"]
        , "exchanges": @inc=http://@env{CONFIG_SERVICE}/refdata/exchanges
    }, 
    "market_data_loading": {
        "CRYPTO": {
            "data_directory": "./data/crypto",
            "db_table_name": "md_1min_bars",
            "instrument_id_pfx": "PAIR-",
        },
        "EQUITY": {
            "data_directory": "./data/equity",
            "db_table_name": "md_1min_bars",
            "instrument_id_pfx": "STOCK-",
        }
    },
    # ====== Funding ======
    "funding_per_pair": 2000.0,
    # ====== Model =======
    "model": @inc=http://@env{CONFIG_SERVICE}/apps/common/models/@env{MODEL_CONFIG} 
    # ====== Trading ======= 
    "execution_price": {
        "column": "vwap",
        "shift": 1,
    },
    # ====== Stop Conditions ======
    "stop_close_conditions": {
        "profit": 2.0,
        "loss": -0.5
    }
    # ====== End of Session Closeout ======
    "close_outstanding_positions": true,
    # "close_outstanding_positions": false,
    "trading_hours": {
        "timezone": "America/New_York",
        "begin_session": "7:30:00",
        "end_session": "18:30:00",
    }
 }
--- a/configuration/equity.cfg
+++ b/configuration/equity.cfg
@ -0,0 +1,27 @@
 {
    "security_type": "EQUITY",
    "data_directory": "./data/equity",
    "datafiles": [
        "20250618.mktdata.ohlcv.db",
    ],
    "db_table_name": "md_1min_bars",
    "exchange_id": "ALPACA",
    "instrument_id_pfx": "STOCK-",
    "trading_hours": {
        "begin_session": "9:30:00",
        "end_session": "16:00:00",
        "timezone": "America/New_York"
    },
    "price_column": "close",
    "min_required_points": 30,
    "zero_threshold": 1e-10,
    "dis-equilibrium_open_trshld": 2.0,
    "dis-equilibrium_close_trshld": 1.0,
    "training_minutes": 120,
    "funding_per_pair": 2000.0,
    # "fit_method_class": "pt_trading.sliding_fit.SlidingFit",
    "fit_method_class": "pt_trading.static_fit.StaticFit",
    "exclude_instruments": ["CAN"],
    "close_outstanding_positions": false
 }
--- a/configuration/equity_lg.cfg
+++ b/configuration/equity_lg.cfg
@ -0,0 +1,26 @@
 {
    "security_type": "EQUITY",
    "data_directory": "./data/equity",
    "datafiles": [
        "20250602.mktdata.ohlcv.db",
    ],
    "db_table_name": "md_1min_bars",
    "exchange_id": "ALPACA",
    "instrument_id_pfx": "STOCK-",
    "trading_hours": {
        "begin_session": "9:30:00",
        "end_session": "16:00:00",
        "timezone": "America/New_York"
    },
    "price_column": "close",
    "min_required_points": 30,
    "zero_threshold": 1e-10,
    "dis-equilibrium_open_trshld": 2.0,
    "dis-equilibrium_close_trshld": 1.0,
    "training_minutes": 120,
    "funding_per_pair": 2000.0,
    "fit_method_class": "pt_trading.fit_methods.StaticFit",
    "exclude_instruments": ["CAN"]
 }
 # "fit_method_class": "pt_trading.fit_methods.SlidingFit",
 # "fit_method_class": "pt_trading.fit_methods.StaticFit",
--- a/configuration/pair_trader.cfg
+++ b/configuration/pair_trader.cfg
@ -1,21 +0,0 @@
 {
    "strategy_config": @inc=file:///home/oleg/develop/pairs_trading/configuration/vecm-opt.cfg
    "pricer_config": {
        "pricer_url": "ws://localhost:12346/ws",
        "history_depth_sec": 86400 #"60*60*24",  # use simpleeval
        "interval_sec": 60
    },
    "ti_config": {
        "cvtt_base_url": "http://localhost:23456"
        "book_id": "XXXXXXXXX",
        "strategy_id": "XXXXXXXXX",
        "ti_endpoint": {
            "method": "POST",
            "url": "/trading_instructions"
        },
        "health_check_endpoint": {
            "method": "GET",
            "url": "/ping"
        }
    }
 }
--- a/configuration/vecm-opt.cfg
+++ b/configuration/vecm-opt.cfg
@ -1,56 +0,0 @@
 {
    # "refdata": {
    #     "assets": @inc=http://@env{CONFIG_SERVICE}/refdata/assets
    #     , "instruments": @inc=http://@env{CONFIG_SERVICE}/refdata/instruments
    #     , "exchange_instruments": @inc=http://@env{CONFIG_SERVICE}/refdata/exchange_instruments
    #     , "dynamic_instrument_exchanges": ["ALPACA"]
    #     , "exchanges": @inc=http://@env{CONFIG_SERVICE}/refdata/exchanges
    # }, 
    # "market_data_loading": {
    #     "CRYPTO": {
    #         "data_directory": "./data/crypto",
    #         "db_table_name": "md_1min_bars",
    #         "instrument_id_pfx": "PAIR-",
    #     },
    #     "EQUITY": {
    #         "data_directory": "./data/equity",
    #         "db_table_name": "md_1min_bars",
    #         "instrument_id_pfx": "STOCK-",
    #     }
    # },
    # # ====== Funding ======
    # "funding_per_pair": 2000.0,
    # ====== Trading Parameters ======
    "stat_model_price": "close", # "vwap"
    "execution_price": {
        "column": "vwap",
        "shift": 1,
    },
    "dis-equilibrium_open_trshld": 1.75,
    "dis-equilibrium_close_trshld": 1.0,
    "model_class": "pairs_trading.lib.pt_strategy.models.VECMModel",
    # "training_size": 120,
    # "model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.RollingWindowDataPolicy",
    "model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.ADFOptimizedWndDataPolicy",
    "min_training_size": 60,
    "max_training_size": 150,
    # # ====== Stop Conditions ======
    # "stop_close_conditions": {
    #     "profit": 2.0,
    #     "loss": -0.5
    # }
    # # ====== End of Session Closeout ======
    # "close_outstanding_positions": true,
    # # "close_outstanding_positions": false,
    # "trading_hours": {
    #     "timezone": "America/New_York",
    #     "begin_session": "7:30:00",
    #     "end_session": "18:30:00",
    # }
 }
--- a/configuration/DELETE/vecm.cfg
+++ b/configuration/DELETE/vecm.cfg
@ -21,15 +21,10 @@
        "column": "vwap",
        "shift": 1,
    },
-    "dis-equilibrium_open_trshld": 1.75,
+    "dis-equilibrium_open_trshld": 2.0,
    "dis-equilibrium_close_trshld": 1.0,
-    "model_class": "pairs_trading.lib.pt_strategy.models.VECMModel",
+    "training_minutes": 120,
-
+    "fit_method_class": "pt_trading.vecm_rolling_fit.VECMRollingFit",
    "training_size": 120,
    "model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.RollingWindowDataPolicy",
    # "model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.OptimizedWindowDataPolicy",
    # "min_training_size": 60,
    # "max_training_size": 150,
    # ====== Stop Conditions ======
    "stop_close_conditions": {
@ -42,7 +37,7 @@
    # "close_outstanding_positions": false,
    "trading_hours": {
        "timezone": "America/New_York",
-        "begin_session": "7:30:00",
+        "begin_session": "9:30:00",
        "end_session": "18:30:00",
    }
 }
--- a/configuration/DELETE/ols-exp.cfg
+++ b/configuration/DELETE/ols-exp.cfg
@ -22,9 +22,8 @@
    },
    "dis-equilibrium_open_trshld": 2.0,
    "dis-equilibrium_close_trshld": 0.5,
-    "training_size": 120,
+    "training_minutes": 120,
-    "model_class": "pairs_trading.lib.pt_strategy.models.OLSModel",
+    "fit_method_class": "pt_trading.z-score_rolling_fit.ZScoreRollingFit",
    "model_data_policy_class": "pairs_trading.lib.pt_strategy.model_data_policy.ExpandingWindowDataPolicy",
    # ====== Stop Conditions ======
    "stop_close_conditions": {
@ -37,7 +36,7 @@
    # "close_outstanding_positions": false,
    "trading_hours": {
        "timezone": "America/New_York",
-        "begin_session": "7:30:00",
+        "begin_session": "9:30:00",
        "end_session": "18:30:00",
    }
 }
--- a/lg_notes.md
+++ b/lg_notes.md
@ -0,0 +1,115 @@
 07.11.2025
 pairs_trading/configuration <---- directory for config
 equity_lg.cfg <-------- copy of equity.cfg
 How to run a Program: TRIANGLEsquare ----> triangle EQUITY backtest
 Results are in  > results (timestamp table for all runs)
 table "...timestamp... .pt_backtest_results.equity.db"
 going to table  using sqlite
 > sqlite3 '/home/coder/results/20250721_175750.pt_backtest_results.equity.db'
 sqlite> .databases
 main: /home/coder/results/20250717_180122.pt_backtest_results.equity.db r/w
 sqlite> .tables
 config                 outstanding_positions  pt_bt_results   
 sqlite> PRAGMA table_info('pt_bt_results');
 0|date|DATE|0||0
 1|pair|TEXT|0||0
 2|symbol|TEXT|0||0
 3|open_time|DATETIME|0||0
 4|open_side|TEXT|0||0
 5|open_price|REAL|0||0
 6|open_quantity|INTEGER|0||0
 7|open_disequilibrium|REAL|0||0
 8|close_time|DATETIME|0||0
 9|close_side|TEXT|0||0
 10|close_price|REAL|0||0
 11|close_quantity|INTEGER|0||0
 12|close_disequilibrium|REAL|0||0
 13|symbol_return|REAL|0||0
 14|pair_return|REAL|0||0
 select count(*) as cnt from pt_bt_results;
 8
 select * from pt_bt_results;
 select 
 date, close_time, pair, symbol, symbol_return, pair_return
 from pt_bt_results ;
 select date, sum(symbol_return) as daily_return 
 from pt_bt_results where date = '2025-06-18' group by date;
 .quit
 sqlite3 '/home/coder/results/20250717_172435.pt_backtest_results.equity.db'
 sqlite> select date, sum(symbol_return) as daily_return 
 from pt_bt_results group by date;
 2025-06-02|1.29845390060828
 ...
 2025-06-18|-43.5084977104115 <========== ????? ==========>
 2025-06-20|11.8605547517183
 select 
 date, close_time, pair, symbol, symbol_return, pair_return
 from pt_bt_results ;
 select date, close_time, pair, symbol, symbol_return, pair_return
 from pt_bt_results where date = '2025-06-18';
 ./scripts/load_equity_pair_intraday.sh -A NVDA -B QQQ -d 20250701  -T ./intraday_md
 to inspect exactly what sources, formats, and processing steps you can open the script with:
 head -n 50 ./scripts/load_equity_pair_intraday.sh
 ✓ Data file found: /home/coder/pairs_trading/data/crypto/20250605.mktdata.ohlcv.db
 sqlite3 '/home/coder/results/20250722_201930.pt_backtest_results.crypto.db'
 sqlite3 '/home/coder/results/xxxxxxxx_yyyyyy.pt_backtest_results.pseudo.db'
 11111111
 === At your terminal, run these commands:
 sqlite3 '/home/coder/results/20250722_201930.pt_backtest_results.crypto.db'
 === Then inside the SQLite prompt:
 .mode csv
 .headers on
 .output results_20250722.csv
 SELECT * FROM pt_bt_results;
 .output stdout
 .quit
 cd /home/coder/
 # === mode csv formats output as CSV
 # === headers on includes column names
 # === output my_table.csv directs output to that file
 # === Run your SELECT query, then revert output
 # === Open my_table.csv in Excel directly
 # ======== Using scp (Secure Copy)
 # === On your local machine, open a terminal and run:
 scp cvtt@953f6e8df266:/home/coder/results_20250722.csv ~/Downloads/
 # ===== convert cvs pandas dataframe ====== -->
 import pandas as pd
 # Replace with the actual path to your CSV file
 file_path = '/home/coder/results_20250722.csv'
 # Read the CSV file into a DataFrame
 df = pd.read_csv(file_path)
 # Show the first few rows
 print(df.head())
--- a/lib/cvtt/mkt_data.py
+++ b/lib/cvtt/mkt_data.py
@ -0,0 +1,188 @@
 #!/usr/bin/env python3
 import argparse
 from ast import Sub
 import asyncio
 from functools import partial
 import json
 import logging
 import uuid
 from dataclasses import dataclass
 from typing import Callable, Coroutine, Dict, List, Optional
 from numpy.strings import str_len
 import websockets
 from websockets.asyncio.client import ClientConnection
 MessageTypeT = str
 SubscriptionIdT = str
 MessageT = Dict
 UrlT = str
 CallbackT = Callable[[MessageTypeT, SubscriptionIdT, MessageT], Coroutine[None, str, None]]
@dataclass
 class CvttPricesSubscription:
    id_: str
    exchange_config_name_: str
    instrument_id_: str
    interval_sec_: int
    history_depth_sec_: int
    is_subscribed_: bool
    is_historical_: bool
    callback_: CallbackT
    def __init__(
        self,
        exchange_config_name: str,
        instrument_id: str,
        interval_sec: int,
        history_depth_sec: int,
        callback: CallbackT,
    ):
        self.exchange_config_name_ = exchange_config_name
        self.instrument_id_ = instrument_id
        self.interval_sec_ = interval_sec
        self.history_depth_sec_ = history_depth_sec
        self.callback_ = callback
        self.id_ = str(uuid.uuid4())
        self.is_subscribed_ = False
        self.is_historical_ = history_depth_sec > 0
 class CvttPricerWebSockClient:
    # Class members with type hints
    ws_url_: UrlT
    websocket_: Optional[ClientConnection]
    subscriptions_: Dict[SubscriptionIdT, CvttPricesSubscription]
    is_connected_: bool
    logger_: logging.Logger
    def __init__(self, url: str):
        self.ws_url_ = url
        self.websocket_ = None
        self.is_connected_ = False
        self.subscriptions_ = {}
        self.logger_ = logging.getLogger(__name__)
        logging.basicConfig(level=logging.INFO)
    async def subscribe(
        self, subscription: CvttPricesSubscription
    ) -> str:  # returns subscription id
        if not self.is_connected_:
            try:
                self.logger_.info(f"Connecting to {self.ws_url_}")
                self.websocket_ = await websockets.connect(self.ws_url_)
                self.is_connected_ = True
            except Exception as e:
                self.logger_.error(f"Unable to connect to {self.ws_url_}: {str(e)}")
                raise e
        subscr_msg = {
            "type": "subscr",
            "id": subscription.id_,
            "subscr_type": "MD_AGGREGATE",
            "exchange_config_name": subscription.exchange_config_name_,
            "instrument_id": subscription.instrument_id_,
            "interval_sec": subscription.interval_sec_,
        }
        if subscription.is_historical_:
            subscr_msg["history_depth_sec"] = subscription.history_depth_sec_
        assert self.websocket_ is not None
        await self.websocket_.send(json.dumps(subscr_msg))
        response = await self.websocket_.recv()
        response_data = json.loads(response)
        if not await self.handle_subscription_response(subscription, response_data):
            await self.websocket_.close()
            self.is_connected_ = False
            raise Exception(f"Subscription failed: {str(response)}")
        self.subscriptions_[subscription.id_] = subscription
        return subscription.id_
    async def handle_subscription_response(
        self, subscription: CvttPricesSubscription, response: dict
    ) -> bool:
        if response.get("type") != "subscr" or response.get("id") != subscription.id_:
            return False
        if response.get("status") == "success":
            self.logger_.info(f"Subscription successful: {json.dumps(response)}")
            return True
        elif response.get("status") == "error":
            self.logger_.error(f"Subscription failed: {response.get('reason')}")
            return False
        return False
    async def run(self) -> None:
        assert self.websocket_
        try:
            while self.is_connected_:
                try:
                    message = await self.websocket_.recv()
                    message_str = (
                        message.decode("utf-8")
                        if isinstance(message, bytes)
                        else message
                    )
                    await self.process_message(json.loads(message_str))
                except websockets.ConnectionClosed:
                    self.logger_.warning("Connection closed")
                    self.is_connected_ = False
                    break
        except Exception as e:
            self.logger_.error(f"Error occurred: {str(e)}")
            self.is_connected_ = False
            await asyncio.sleep(5)  # Wait before reconnecting
    async def process_message(self, message: Dict) -> None:
        message_type = message.get("type")
        if message_type in ["md_aggregate", "historical_md_aggregate"]:
            subscription_id = message.get("subscr_id")
            if subscription_id not in self.subscriptions_:
                self.logger_.warning(f"Unknown subscription id: {subscription_id}")
                return
            subscription = self.subscriptions_[subscription_id]
            await subscription.callback_(message_type, subscription_id, message)
        else:
            self.logger_.warning(f"Unknown message type: {message.get('type')}")
 async def main() -> None:
    async def on_message(message_type: MessageTypeT, subscr_id: SubscriptionIdT, message: Dict, instrument_id: str) -> None:
        print(f"{message_type=} {subscr_id=} {instrument_id}")
        if message_type == "md_aggregate":
            aggr = message.get("md_aggregate", [])
            print(f"[{aggr['tstmp'][:19]}] *** RLTM ***  {message}")
        elif message_type == "historical_md_aggregate":
            for aggr in message.get("historical_data", []):
                print(f"[{aggr['tstmp'][:19]}] *** HIST *** {aggr}")
        else:
            print(f"Unknown message type: {message_type}")
    pricer_client = CvttPricerWebSockClient(
        "ws://localhost:12346/ws"
    )
    await pricer_client.subscribe(CvttPricesSubscription(
        exchange_config_name="COINBASE_AT",
        instrument_id="PAIR-BTC-USD",
        interval_sec=60,
        history_depth_sec=60*60*24,
        callback=partial(on_message, instrument_id="PAIR-BTC-USD")
    ))
    await pricer_client.subscribe(CvttPricesSubscription(
        exchange_config_name="COINBASE_AT",
        instrument_id="PAIR-ETH-USD",
        interval_sec=60,
        history_depth_sec=60*60*24,
        callback=partial(on_message, instrument_id="PAIR-ETH-USD")
    ))
    await pricer_client.run() 
 if __name__ == "__main__":
    asyncio.run(main())
--- a/lib/live/mkt_data_client.py
+++ b/lib/live/mkt_data_client.py
@ -1,277 +0,0 @@
 from __future__ import annotations
 import asyncio
 from typing import Dict, Any, List, Optional, Set
 import requests
 from cvttpy_tools.base import NamedObject
 from cvttpy_tools.logger import Log
 from cvttpy_tools.config import Config
 from cvttpy_tools.timer import Timer
 from cvttpy_tools.timeutils import NanosT, current_seconds
 from cvttpy_tools.settings.cvtt_types import InstrumentIdT, IntervalSecT
 from cvttpy_tools.web.rest_client import RESTSender
 # ---
 from cvttpy_trading.trading.instrument import ExchangeInstrument
 from cvttpy_trading.trading.accounting.exch_account import ExchangeAccountNameT
 from cvttpy_trading.trading.mkt_data.md_summary import MdTradesAggregate, MdSummary, MdSummaryCallbackT
 from cvttpy_trading.trading.exchange_config import ExchangeAccounts
 # ---
 # class MdSummary(HistMdBar):
 #     def __init__(
 #         self,
 #         ts_ns: int,
 #         open: float,
 #         high: float,
 #         low: float,
 #         close: float,
 #         volume: float,
 #         vwap: float,
 #         num_trades: int,
 #     ):
 #         super().__init__(ts=ts_ns)
 #         self.open_ = open
 #         self.high_ = high
 #         self.low_ = low
 #         self.close_ = close
 #         self.volume_ = volume
 #         self.vwap_ = vwap
 #         self.num_trades_ = num_trades
 #     @classmethod
 #     def from_REST_response(cls, response: requests.Response) -> List[MdSummary]:
 #         res: List[MdSummary] = []
 #         jresp = response.json()
 #         hist_data = jresp.get("historical_data", [])
 #         for hd in hist_data:
 #             res.append(
 #                 MdSummary(
 #                     ts_ns=hd["time_ns"],
 #                     open=hd["open"],
 #                     high=hd["high"],
 #                     low=hd["low"],
 #                     close=hd["close"],
 #                     volume=hd["volume"],
 #                     vwap=hd["vwap"],
 #                     num_trades=hd["num_trades"],
 #                 )
 #             )
 #         return res
 #     def create_md_trades_aggregate(
 #         self,
 #         exch_acct: ExchangeAccountNameT,
 #         exch_inst: ExchangeInstrument,
 #         interval_sec: IntervalSecT,
 #     ) -> MdTradesAggregate:
 #         res = MdTradesAggregate(
 #             exch_acct=exch_acct,
 #             exch_inst=exch_inst,
 #             interval_ns=interval_sec * NanoPerSec,
 #         )
 #         res.set(mdbar=self)
 #         return res
 # MdSummaryCallbackT = Callable[[List[MdTradesAggregate]], Coroutine]
 class MdSummaryCollector(NamedObject):
    sender_: RESTSender
    exch_acct_: ExchangeAccountNameT
    exch_inst_: ExchangeInstrument
    interval_sec_: IntervalSecT
    history_depth_sec_: IntervalSecT
    history_: List[MdTradesAggregate]
    callbacks_: List[MdSummaryCallbackT]
    timer_: Optional[Timer]
    def __init__(
        self,
        sender: RESTSender,
        exch_acct: ExchangeAccountNameT,
        instrument_id: InstrumentIdT,
        interval_sec: IntervalSecT,
        history_depth_sec: IntervalSecT,
    ) -> None:
        self.sender_ = sender
        self.exch_acct_ = exch_acct
        exch_inst = ExchangeAccounts.instance().get_exchange_instrument(
            exch_acct=exch_acct, instrument_id=instrument_id
        )
        assert exch_inst is not None, f"Unable to find Exchange instrument for {exch_acct}/{instrument_id}"
        self.exch_inst_ = exch_inst
        self.interval_sec_ = interval_sec
        self.history_depth_sec_ = history_depth_sec
        self.history_ = []
        self.callbacks_ = []
        self.timer_ = None
    def add_callback(self, cb: MdSummaryCallbackT) -> None:
        self.callbacks_.append(cb)
    def __hash__(self):
        return hash(
            (
                self.exch_acct_,
                self.exch_inst_.instrument_id(),
                self.interval_sec_,
                self.history_depth_sec_,
            )
        )
    def rqst_data(self) -> Dict[str, Any]:
        return {
            "exch_acct": self.exch_acct_,
            "instrument_id": self.exch_inst_.instrument_id(),
            "interval_sec": self.interval_sec_,
            "history_depth_sec": self.history_depth_sec_,
        }
    def get_history(self) -> List[MdSummary]:
        response: requests.Response = self.sender_.send_post(
            endpoint="md_summary", post_body=self.rqst_data()
        )
        if response.status_code not in (200, 201):
            Log.error(
                f"{self.fname()}: Received error: {response.status_code} - {response.text}"
            )
            return []
        return MdSummary.from_REST_response(response=response)
    def get_last(self) -> Optional[MdSummary]:
        Log.info(f"{self.fname()}: for {self.exch_inst_.details_short()}")
        rqst_data = self.rqst_data()
        rqst_data["history_depth_sec"] = self.interval_sec_ * 2
        response: requests.Response = self.sender_.send_post(
            endpoint="md_summary", post_body=rqst_data
        )
        if response.status_code not in (200, 201):
            Log.error(
                f"{self.fname()}: Received error: {response.status_code} - {response.text}"
            )
            return None
        res = MdSummary.from_REST_response(response=response)
        Log.info(f"DEBUG *** {self.exch_inst_.base_asset_id_}: {res[-1].tstamp_}")
        return None if len(res) == 0 else res[-1]
    def is_empty(self) -> bool:
        return len(self.history_) == 0
    async def start(self) -> None:
        if self.timer_:
            Log.error(f"{self.fname()}: Timer is already started")
            return
        mdsum_hist = self.get_history()
        self.history_ = [
            mdsum.create_md_trades_aggregate(
                exch_acct=self.exch_acct_,
                exch_inst=self.exch_inst_,
                interval_sec=self.interval_sec_,
            )
            for mdsum in mdsum_hist
        ]
        await self.run_callbacks()
        self.set_timer()
    def set_timer(self):
        if self.timer_:
            self.timer_.cancel()
        start_in = self.next_load_time() - current_seconds()
        self.timer_ = Timer(
            start_in_sec=start_in,
            func=self._load_new,
        )
        Log.info(f"{self.fname()} Timer for {self.exch_inst_.details_short()} is set to run in {start_in} sec")
    def next_load_time(self) -> NanosT:
        ALLOW_LAG_SEC = 1
        curr_sec = int(current_seconds())
        return (curr_sec - curr_sec % self.interval_sec_) + self.interval_sec_ + ALLOW_LAG_SEC
    async def _load_new(self) -> None:
        last: Optional[MdSummary] = self.get_last()
        if not last:
            Log.warning(f"{self.fname()}: did not get last update")
        elif not self.is_empty() and last.ts_ns_ <= self.history_[-1].aggr_time_ns_:
            Log.info(
                f"{self.fname()}: Received {last}. Already Have: {self.history_[-1]}"
            )
        else:
            self.history_.append(last.create_md_trades_aggregate(exch_acct=self.exch_acct_, exch_inst=self.exch_inst_, interval_sec=self.interval_sec_))
            await self.run_callbacks()
        self.set_timer()
    async def run_callbacks(self) -> None:
        [await cb(self.history_) for cb in self.callbacks_]
    def stop(self) -> None:
        if self.timer_:
            self.timer_.cancel()
            self.timer_ = None
 class CvttRestMktDataClient(NamedObject):
    config_: Config
    sender_: RESTSender
    collectors_: Set[MdSummaryCollector]
    def __init__(self, config: Config) -> None:
        self.config_ = config
        base_url = self.config_.get_value("cvtt_base_url", default="")
        assert base_url
        self.sender_ = RESTSender(base_url=base_url)
        self.collectors_ = set()
    async def add_subscription(
        self,
        exch_acct: ExchangeAccountNameT,
        instrument_id: InstrumentIdT,
        interval_sec: IntervalSecT,
        history_depth_sec: IntervalSecT,
        callback: MdSummaryCallbackT,
    ) -> None:
        mdsc = MdSummaryCollector(
            sender=self.sender_,
            exch_acct=exch_acct,
            instrument_id=instrument_id,
            interval_sec=interval_sec,
            history_depth_sec=history_depth_sec,
        )
        mdsc.add_callback(callback)
        self.collectors_.add(mdsc)
        await mdsc.start()
 if __name__ == "__main__":
    config = Config(json_src={"cvtt_base_url": "http://cvtt-tester-01.cvtt.vpn:23456"})
    # config = Config(json_src={"cvtt_base_url": "http://dev-server-02.cvtt.vpn:23456"})
    async def _calback(history: List[MdTradesAggregate]) -> None:
        Log.info(
            f"MdSummary Hist Length is {len(history)}. Last summary: {history[-1] if  len(history) > 0 else '[]'}"
        )
    async def __run() -> None:
        Log.info("Starting...")
        cvtt_client = CvttRestMktDataClient(config)
        await cvtt_client.add_subscription(
            exch_acct="COINBASE_AT",
            instrument_id="PAIR-BTC-USD",
            interval_sec=60,
            history_depth_sec=24 * 3600,
            callback=_calback,
        )
        while True:
            await asyncio.sleep(5)
    asyncio.run(__run())
    pass
--- a/lib/live/rest_client.py.md
+++ b/lib/live/rest_client.py.md
@ -1,60 +0,0 @@
 ```python
 from __future__ import annotations
 from typing import Dict
 import time
 import requests
 from cvttpy_tools.base import NamedObject
 class RESTSender(NamedObject):
    session_: requests.Session
    base_url_: str
    def __init__(self, base_url: str) -> None:
        self.base_url_ = base_url
        self.session_ = requests.Session()
    def is_ready(self) -> bool:
        """Checks if the server is up and responding"""
        url = f"{self.base_url_}/ping"
        try:
            response = self.session_.get(url)
            response.raise_for_status()
            return True
        except requests.exceptions.RequestException:
            return False
    def send_post(self, endpoint: str, post_body: Dict) -> requests.Response:
        while not self.is_ready():
            print("Waiting for FrontGateway to start...")
            time.sleep(5)
        url = f"{self.base_url_}/{endpoint}"
        try:
            return self.session_.request(
                method="POST",
                url=url,
                json=post_body,
                headers={"Content-Type": "application/json"},
            )
        except requests.exceptions.RequestException as excpt:
            raise ConnectionError(
                f"Failed to send status={excpt.response.status_code} {excpt.response.text}"  # type: ignore
            ) from excpt
    def send_get(self, endpoint: str) -> requests.Response:
        while not self.is_ready():
            print("Waiting for FrontGateway to start...")
            time.sleep(5)
        url = f"{self.base_url_}/{endpoint}"
        try:
            return self.session_.request(method="GET", url=url)
        except requests.exceptions.RequestException as excpt:
            raise ConnectionError(
                f"Failed to send status={excpt.response.status_code} {excpt.response.text}"  # type: ignore
            ) from excpt
 ```
--- a/lib/live/ti_sender.py
+++ b/lib/live/ti_sender.py
@ -1,50 +0,0 @@
 from enum import Enum
 import requests
 # import aiohttp
 from cvttpy_tools.base import NamedObject
 from cvttpy_tools.config import Config
 from cvttpy_tools.logger import Log
 from cvttpy_tools.web.rest_client import RESTSender
 # ---
 from cvttpy_trading.trading.trading_instructions import TradingInstructions
 # ---
 from pairs_trading.apps.pair_trader import PairTrader
 class TradingInstructionsSender(NamedObject):
    config_: Config
    sender_: RESTSender
    pairs_trader_: PairTrader
    class TradingInstType(str, Enum):
        TARGET_POSITION = "TARGET_POSITION"
        DIRECT_ORDER = "DIRECT_ORDER"
        MARKET_MAKING = "MARKET_MAKING"
        NONE = "NONE"
    def __init__(self, config: Config, pairs_trader: PairTrader) -> None:
        self.config_ = config
        base_url = self.config_.get_value("cvtt_base_url", default="")
        assert base_url
        self.sender_ = RESTSender(base_url=base_url)
        self.pairs_trader_ = pairs_trader
        self.book_id_ = self.pairs_trader_.book_id_
        assert self.book_id_, "book_id is required"
        self.strategy_id_ = config.get_value("strategy_id", "")
        assert self.strategy_id_, "strategy_id is required"
    async def send_trading_instructions(self, ti: TradingInstructions) -> None:
        Log.info(f"{self.fname()}: sending {ti=}")
        response: requests.Response = self.sender_.send_post(
            endpoint="trading_instructions", post_body=ti.to_dict()
        )
        if response.status_code not in (200, 201):
            Log.error(
                f"{self.fname()}: Received error: {response.status_code} - {response.text}"
            )
--- a/lib/pt_strategy/live/live_strategy.py
+++ b/lib/pt_strategy/live/live_strategy.py
@ -1,351 +0,0 @@
 from __future__ import annotations
 from typing import Any, Dict, List, Optional
 import pandas as pd
 # ---
 from cvttpy_tools.base import NamedObject
 from cvttpy_tools.app import App
 from cvttpy_tools.config import Config
 from cvttpy_tools.settings.cvtt_types import IntervalSecT
 from cvttpy_tools.timeutils import NanosT, SecPerHour, current_nanoseconds, NanoPerSec, format_nanos_utc
 from cvttpy_tools.logger import Log
 # ---
 from cvttpy_trading.trading.instrument import ExchangeInstrument
 from cvttpy_trading.trading.mkt_data.md_summary import MdTradesAggregate
 from cvttpy_trading.trading.trading_instructions import TradingInstructions
 from cvttpy_trading.trading.trading_instructions import TargetPositionSignal
 # ---
 from pairs_trading.lib.pt_strategy.model_data_policy import ModelDataPolicy
 from pairs_trading.lib.pt_strategy.pt_model import Prediction
 from pairs_trading.lib.pt_strategy.trading_pair import LiveTradingPair
 from pairs_trading.apps.pair_trader import PairTrader
 from pairs_trading.lib.pt_strategy.pt_market_data import LiveMarketData
 class PtLiveStrategy(NamedObject):
    config_: Config
    instruments_: List[ExchangeInstrument]
    interval_sec_: IntervalSecT
    history_depth_sec_: IntervalSecT
    open_threshold_: float
    close_threshold_: float
    trading_pair_: LiveTradingPair
    model_data_policy_: ModelDataPolicy
    pairs_trader_: PairTrader
    # for presentation: history of prediction values and trading signals
    predictions_df_: pd.DataFrame
    trading_signals_df_: pd.DataFrame
    allowed_md_lag_sec_: int
    def __init__(
        self,
        config: Config,
        pairs_trader: PairTrader,
    ):
        self.config_ = config
        self.pairs_trader_ = pairs_trader
        self.trading_pair_ = LiveTradingPair(
            config=config,
            instruments=self.pairs_trader_.instruments_,
        )
        self.model_data_policy_ = ModelDataPolicy.create(
            self.config_,
            is_real_time=True,
            pair=self.trading_pair_,
        )
        assert (
            self.model_data_policy_ is not None
        ), f"{self.fname()}: Unable to create ModelDataPolicy"
        self.predictions_df_ = pd.DataFrame()
        self.trading_signals_df_ = pd.DataFrame()
        self.instruments_ = self.pairs_trader_.instruments_
        App.instance().add_call(
            stage=App.Stage.Config, func=self._on_config(), can_run_now=True
        )
    async def _on_config(self) -> None:
        self.interval_sec_ = self.config_.get_value("interval_sec", 0)
        assert self.interval_sec_ > 0, "interval_sec cannot be 0"
        self.history_depth_sec_ = (
            self.config_.get_value("history_depth_hours", 0) * SecPerHour
        )
        assert self.history_depth_sec_ > 0, "history_depth_hours cannot be 0"
        self.allowed_md_lag_sec_ = self.config_.get_value("allowed_md_lag_sec", 3)
        self.open_threshold_ = self.config_.get_value(
            "model/disequilibrium/open_trshld", 0.0
        )
        self.close_threshold_ = self.config_.get_value(
            "model/disequilibrium/close_trshld", 0.0
        )
        assert (
            self.open_threshold_ > 0
        ), "disequilibrium/open_trshld must be greater than 0"
        assert (
            self.close_threshold_ > 0
        ), "disequilibrium/close_trshld must be greater than 0"
        await self.pairs_trader_.subscribe_md()
    def __repr__(self) -> str:
        return f"{self.classname()}: trading_pair={self.trading_pair_}, mdp={self.model_data_policy_.__class__.__name__}, "
    async def on_mkt_data_hist_snapshot(
        self, hist_aggr: List[MdTradesAggregate]
    ) -> None:
        if not self._is_md_actual(hist_aggr=hist_aggr):
            return
        market_data_df: pd.DataFrame = self._create_md_df(hist_aggr=hist_aggr)
        if len(market_data_df) == 0:
            Log.warning(f"{self.fname()} Unable to create market data df")
            return
        self.trading_pair_.market_data_ = market_data_df
        Log.info(f"{self.fname()}: Running prediction for pair: {self.trading_pair_}")
        prediction = self.trading_pair_.run(
            market_data_df, self.model_data_policy_.advance()
        )
        self.predictions_df_ = pd.concat(
            [self.predictions_df_, prediction.to_df()], ignore_index=True
        )
        trading_instructions: List[TradingInstructions] = (
            self._create_trading_instructions(
                prediction=prediction, last_row=market_data_df.iloc[-1]
            )
        )
        if trading_instructions is not None:
            await self._send_trading_instructions(trading_instructions)
    def _is_md_actual(self, hist_aggr: List[MdTradesAggregate]) -> bool:
        if len(hist_aggr) == 0:
            Log.warning(f"{self.fname()} list of aggregates IS EMPTY")
            return False
        curr_ns = current_nanoseconds()
        # MAYBE check market data length
        # at 18:05:01 we should see data for 18:04:00
        lag_sec = (curr_ns - hist_aggr[-1].aggr_time_ns_) / NanoPerSec - self.interval_sec()
        if lag_sec > self.allowed_md_lag_sec_:
            Log.warning(
                f"{self.fname()} {hist_aggr[-1].exch_inst_.details_short()}"
                f" Lagging {int(lag_sec)} > {self.allowed_md_lag_sec_} seconds:"
                f"\n{len(hist_aggr)} records"
                f"\n{hist_aggr[-1].exch_inst_.base_asset_id_}: {hist_aggr[-1].tstamp()}"
                f"\n{hist_aggr[-2].exch_inst_.base_asset_id_}: {hist_aggr[-2].tstamp()}"
            )
            return False
        else:
            Log.info(
                f"{self.fname()} {hist_aggr[-1].exch_inst_.details_short()}"
                f" Lag {int(lag_sec)} <= {self.allowed_md_lag_sec_} seconds"
                f"\n{len(hist_aggr)} records"
                f"\n{hist_aggr[-1].exch_inst_.base_asset_id_}: {hist_aggr[-1].tstamp()}"
                f"\n{hist_aggr[-2].exch_inst_.base_asset_id_}: {hist_aggr[-2].tstamp()}"
            )
        return True
    def _create_md_df(self, hist_aggr: List[MdTradesAggregate]) -> pd.DataFrame:
        """
                          tstamp              time_ns    symbol      open      high       low     close     volume  num_trades        vwap
        0    2025-09-10 11:30:00  1757503800000000000  ADA-USDT    0.8750    0.8750    0.8743    0.8743  50710.500           0    0.874489
        1    2025-09-10 11:30:00  1757503800000000000  SOL-USDT  219.9700  219.9800  219.6600  219.7000   2648.582           0  219.787847
        2    2025-09-10 11:31:00  1757503860000000000  SOL-USDT  219.7000  219.7300  219.6200  219.6200   1134.886           0  219.663460
        3    2025-09-10 11:31:00  1757503860000000000  ADA-USDT    0.8743    0.8745    0.8741    0.8741  10696.400           0    0.874234
        4    2025-09-10 11:32:00  1757503920000000000  ADA-USDT    0.8742    0.8742    0.8739    0.8740  18546.900           0    0.874037
        """
        rows: List[Dict[str, Any]] = []
        for aggr in hist_aggr:
            exch_inst = aggr.exch_inst_
            rows.append(
                {
                    # convert nanoseconds → tz-aware pandas timestamp
                    "tstamp": pd.to_datetime(aggr.aggr_time_ns_, unit="ns", utc=True),
                    "time_ns": aggr.aggr_time_ns_,
                    "symbol": exch_inst.instrument_id().split("-", 1)[1],
                    "exchange_id": exch_inst.exchange_id_,
                    "instrument_id": exch_inst.instrument_id(),
                    "open": exch_inst.get_price(aggr.open_),
                    "high": exch_inst.get_price(aggr.high_),
                    "low": exch_inst.get_price(aggr.low_),
                    "close": exch_inst.get_price(aggr.close_),
                    "volume": exch_inst.get_quantity(aggr.volume_),
                    "num_trades": aggr.num_trades_,
                    "vwap": exch_inst.get_price(aggr.vwap_),
                }
            )
        source_md_df = pd.DataFrame(
            rows,
            columns=[
                "tstamp",
                "time_ns",
                "symbol",
                "exchange_id",
                "instrument_id",
                "open",
                "high",
                "low",
                "close",
                "volume",
                "num_trades",
                "vwap",
            ],
        )
        # automatic sorting
        source_md_df.sort_values(
            by=["time_ns", "symbol"],
            ascending=True,
            inplace=True,
            kind="mergesort",  # stable sort
        )
        source_md_df.reset_index(drop=True, inplace=True)
        pt_mkt_data = LiveMarketData(config=self.config_, instruments=self.instruments_)
        pt_mkt_data.origin_mkt_data_df_ = source_md_df
        pt_mkt_data.set_market_data()
        return pt_mkt_data.market_data_df_
    def interval_sec(self) -> IntervalSecT:
        return self.interval_sec_
    def history_depth_sec(self) -> IntervalSecT:
        return self.history_depth_sec_
    async def _send_trading_instructions(
        self, trading_instructions: List[TradingInstructions]
    ) -> None:
        for ti in trading_instructions:
            Log.info(f"{self.fname()} Sending trading instructions {ti}")
            await self.pairs_trader_.ti_sender_.send_trading_instructions(ti)
    def _create_trading_instructions(
        self, prediction: Prediction, last_row: pd.Series
    ) -> List[TradingInstructions]:
        trd_instructions: List[TradingInstructions] = []
        pair = self.trading_pair_
        scaled_disequilibrium = prediction.scaled_disequilibrium_
        abs_scaled_disequilibrium = abs(scaled_disequilibrium)
        if abs_scaled_disequilibrium >= self.open_threshold_:
            trd_instructions = self._create_open_trade_instructions(
                pair, row=last_row, prediction=prediction
            )
        elif abs_scaled_disequilibrium <= self.close_threshold_ or pair.to_stop_close_conditions(predicted_row=last_row):
            trd_instructions = self._create_close_trade_instructions(
                pair, row=last_row  # , prediction=prediction
            )
        return trd_instructions
    def _strength(self, scaled_disequilibrium: float) -> float:
        # TODO PtLiveStrategy._strength()
        return 1.0
    def _create_open_trade_instructions(
        self, pair: LiveTradingPair, row: pd.Series, prediction: Prediction
    ) -> List[TradingInstructions]:
        diseqlbrm = prediction.disequilibrium_
        scaled_disequilibrium = prediction.scaled_disequilibrium_
        if diseqlbrm > 0:
            side_a = -1
            side_b = 1
        else:
            side_a = 1
            side_b = -1
        ti_a: Optional[TradingInstructions] = TradingInstructions(
            book=self.pairs_trader_.book_id_,
            strategy_id=self.__class__.__name__,
            ti_type=TradingInstructions.Type.TARGET_POSITION,
            issued_ts_ns=current_nanoseconds(),
            data=TargetPositionSignal(
                strength=side_a * self._strength(scaled_disequilibrium),
                exchange_id=pair.get_instrument_a().exchange_id_,
                base_asset=pair.get_instrument_a().base_asset_id_,
                quote_asset=pair.get_instrument_a().quote_asset_id_,
                user_data={}
            ),
        )
        if not ti_a:
            return []
        ti_b: Optional[TradingInstructions] = TradingInstructions(
            book=self.pairs_trader_.book_id_,
            strategy_id=self.__class__.__name__,
            ti_type=TradingInstructions.Type.TARGET_POSITION,
            issued_ts_ns=current_nanoseconds(),
            data=TargetPositionSignal(
                strength=side_b * self._strength(scaled_disequilibrium),
                exchange_id=pair.get_instrument_b().exchange_id_,
                base_asset=pair.get_instrument_b().base_asset_id_,
                quote_asset=pair.get_instrument_b().quote_asset_id_,
                user_data={}
            ),
        )
        if not ti_b:
            return []
        return [ti_a, ti_b]
    def _create_close_trade_instructions(
        self, pair: LiveTradingPair, row: pd.Series
    ) -> List[TradingInstructions]:
        ti_a: Optional[TradingInstructions] = TradingInstructions(
            book=self.pairs_trader_.book_id_,
            strategy_id=self.__class__.__name__,
            ti_type=TradingInstructions.Type.TARGET_POSITION,
            issued_ts_ns=current_nanoseconds(),
            data=TargetPositionSignal(
                strength=0,
                exchange_id=pair.get_instrument_a().exchange_id_,
                base_asset=pair.get_instrument_a().base_asset_id_,
                quote_asset=pair.get_instrument_a().quote_asset_id_,
                user_data={}
            ),
        )
        if not ti_a:
            return []
        ti_b: Optional[TradingInstructions] = TradingInstructions(
            book=self.pairs_trader_.book_id_,
            strategy_id=self.__class__.__name__,
            ti_type=TradingInstructions.Type.TARGET_POSITION,
            issued_ts_ns=current_nanoseconds(),
            data=TargetPositionSignal(
                strength=0,
                exchange_id=pair.get_instrument_b().exchange_id_,
                base_asset=pair.get_instrument_b().base_asset_id_,
                quote_asset=pair.get_instrument_b().quote_asset_id_,
                user_data={}
            ),
        )
        if not ti_b:
            return []
        return [ti_a, ti_b]
--- a/lib/pt_strategy/model_data_policy.py
+++ b/lib/pt_strategy/model_data_policy.py
@ -1,253 +0,0 @@
 from __future__ import annotations
 import copy
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import Any, Dict, Optional, cast
 import numpy as np
 import pandas as pd
 from cvttpy_tools.config import Config
@dataclass
 class DataWindowParams:
    training_size_: int
    training_start_index_: int
 class ModelDataPolicy(ABC):
    config_: Config
    current_data_params_: DataWindowParams
    count_: int
    is_real_time_: bool
    def __init__(self, config: Config, *args: Any, **kwargs: Any):
        self.config_ = config
        self.current_data_params_ = DataWindowParams(
            training_size_=config.get_value("model/training_size", 120),
            training_start_index_=0,
        )
        self.count_ = 0
        self.is_real_time_ = kwargs.get("is_real_time", False)
    @abstractmethod
    def advance(self, mkt_data_df: Optional[pd.DataFrame] = None) -> DataWindowParams:
        self.count_ += 1
        if not self.is_real_time_:
            print(self.count_, end="\r")
        return self.current_data_params_
    @staticmethod
    def create(config: Config, *args: Any, **kwargs: Any) -> ModelDataPolicy:
        import importlib
        model_data_policy_class_name = config.get_value("model/model_data_policy_class", None)
        assert model_data_policy_class_name is not None
        module_name, class_name = model_data_policy_class_name.rsplit(".", 1)
        module = importlib.import_module(module_name)
        model_training_data_policy_object = getattr(module, class_name)(
            config=config, *args, **kwargs
        )
        return cast(ModelDataPolicy, model_training_data_policy_object)
 class RollingWindowDataPolicy(ModelDataPolicy):
    def __init__(self, config: Config, *args: Any, **kwargs: Any):
        super().__init__(config, *args, **kwargs)
        self.count_ = 1
    def advance(self, mkt_data_df: Optional[pd.DataFrame] = None) -> DataWindowParams:
        super().advance(mkt_data_df)
        if self.is_real_time_:
            self.current_data_params_.training_start_index_ = 0
            if mkt_data_df and len(mkt_data_df) > self.curren_data_params_.training_size_:
                self.current_data_params_.training_start_index_ = -self.curren_data_params_.training_size_ 
        else:
            self.current_data_params_.training_start_index_ += 1
        return self.current_data_params_
 class OptimizedWndDataPolicy(ModelDataPolicy, ABC):
    mkt_data_df_: pd.DataFrame
    pair_: TradingPair  # type: ignore
    min_training_size_: int
    max_training_size_: int
    end_index_: int
    prices_a_: np.ndarray
    prices_b_: np.ndarray
    def __init__(self, config: Config, *args: Any, **kwargs: Any):
        super().__init__(config, *args, **kwargs)
        assert (
            kwargs.get("pair") is not None
        ), "pair must be provided"
        assert (config.key_exists("model/max_training_size") and config.key_exists("model/min_training_size")
            ), "min_training_size and max_training_size must be provided"
        self.min_training_size_ = cast(int, config.get_value("model/min_training_size"))
        self.max_training_size_ = cast(int, config.get_value("model/max_training_size"))
        from pairs_trading.lib.pt_strategy.trading_pair import TradingPair
        self.pair_ = cast(TradingPair, kwargs.get("pair"))        
        if "mkt_data" in kwargs:
            self.mkt_data_df_ = cast(pd.DataFrame, kwargs.get("mkt_data"))
            col_a, col_b = self.pair_.colnames()
            self.prices_a_ = np.array(self.mkt_data_df_[col_a])
            self.prices_b_ = np.array(self.mkt_data_df_[col_b])
        assert self.min_training_size_ < self.max_training_size_
    def advance(self, mkt_data_df: Optional[pd.DataFrame] = None) -> DataWindowParams:
        super().advance(mkt_data_df)
        if mkt_data_df is not None:
            self.mkt_data_df_ = mkt_data_df
        if self.is_real_time_:
            self.end_index_ = len(self.mkt_data_df_) - 1
        else:
            self.end_index_ = self.current_data_params_.training_start_index_ + self.max_training_size_
            if self.end_index_ > len(self.mkt_data_df_) - 1:
                self.end_index_ = len(self.mkt_data_df_) - 1
                self.current_data_params_.training_start_index_ = self.end_index_ - self.max_training_size_
                if self.current_data_params_.training_start_index_ < 0:
                    self.current_data_params_.training_start_index_ = 0
        col_a, col_b = self.pair_.colnames()
        self.prices_a_ = np.array(self.mkt_data_df_[col_a])
        self.prices_b_ = np.array(self.mkt_data_df_[col_b])
        self.current_data_params_ = self.optimize_window_size()
        return self.current_data_params_
    @abstractmethod
    def optimize_window_size(self) -> DataWindowParams:
        ...
 class EGOptimizedWndDataPolicy(OptimizedWndDataPolicy):
    '''
    # Engle-Granger cointegration test
    *** VERY SLOW ***
    '''
    def __init__(self, config: Config, *args: Any, **kwargs: Any):
        super().__init__(config, *args, **kwargs)
    def optimize_window_size(self) -> DataWindowParams:
        # Run Engle-Granger cointegration test
        last_pvalue = 1.0
        result = copy.copy(self.current_data_params_)
        for trn_size in range(self.min_training_size_, self.max_training_size_):
            if self.end_index_ - trn_size < 0:
                break
            from statsmodels.tsa.stattools import coint  # type: ignore
            start_index = self.end_index_ - trn_size
            series_a = self.prices_a_[start_index : self.end_index_]
            series_b = self.prices_b_[start_index : self.end_index_]
            eg_pvalue = float(coint(series_a, series_b)[1])
            if eg_pvalue < last_pvalue:
                last_pvalue = eg_pvalue
                result.training_size_ = trn_size
                result.training_start_index_ = start_index
        # print(
        #     f"*** DEBUG *** end_index={self.end_index_}, best_trn_size={self.current_data_params_.training_size}, {last_pvalue=}"
        # )
        return result
 class ADFOptimizedWndDataPolicy(OptimizedWndDataPolicy):
    # Augmented Dickey-Fuller test
    def __init__(self, config: Config, *args: Any, **kwargs: Any):
        super().__init__(config, *args, **kwargs)
    def optimize_window_size(self) -> DataWindowParams:
        from statsmodels.regression.linear_model import OLS
        from statsmodels.tools.tools import add_constant
        from statsmodels.tsa.stattools import adfuller
        last_pvalue = 1.0
        result = copy.copy(self.current_data_params_)
        for trn_size in range(self.min_training_size_, self.max_training_size_):
            if self.end_index_ - trn_size < 0:
                break
            start_index = self.end_index_ - trn_size
            y = self.prices_a_[start_index : self.end_index_]
            x = self.prices_b_[start_index : self.end_index_]
            # Add constant to x for intercept
            x_with_const = add_constant(x)
            # OLS regression: y = a + b*x + e
            model = OLS(y, x_with_const).fit()
            residuals = y - model.predict(x_with_const)
            # ADF test on residuals
            try:
                adf_result = adfuller(residuals, maxlag=1, regression="c")
                adf_pvalue = float(adf_result[1])
            except Exception as e:
                # Handle edge cases with exception (e.g., constant series, etc.)
                adf_pvalue = 1.0
            if adf_pvalue < last_pvalue:
                last_pvalue = adf_pvalue
                result.training_size_ = trn_size
                result.training_start_index_ = start_index
        # print(
        #     f"*** DEBUG *** end_index={self.end_index_},"
        #     f" best_trn_size={self.current_data_params_.training_size},"
        #     f" {last_pvalue=}"
        # )
        return result
 class JohansenOptdWndDataPolicy(OptimizedWndDataPolicy):
    # Johansen test
    def __init__(self, config: Config, *args: Any, **kwargs: Any):
        super().__init__(config, *args, **kwargs)
    def optimize_window_size(self) -> DataWindowParams:
        from statsmodels.tsa.vector_ar.vecm import coint_johansen
        import numpy as np
        best_stat = -np.inf
        best_trn_size = 0
        best_start_index = -1
        result = copy.copy(self.current_data_params_)
        for trn_size in range(self.min_training_size_, self.max_training_size_):
            if self.end_index_ - trn_size < 0:
                break
            start_index = self.end_index_ - trn_size
            series_a = self.prices_a_[start_index:self.end_index_]
            series_b = self.prices_b_[start_index:self.end_index_]
            # Combine into 2D matrix for Johansen test
            try:
                data = np.column_stack([series_a, series_b])
                # Johansen test: det_order=0 (no deterministic trend), k_ar_diff=1 (lag)
                res = coint_johansen(data, det_order=0, k_ar_diff=1)
                # Trace statistic for cointegration rank 1
                trace_stat = res.lr1[0]  # test stat for rank=0 vs >=1
                critical_value = res.cvt[0, 1]  # 5% critical value
                if trace_stat > best_stat:
                    best_stat = trace_stat
                    best_trn_size = trn_size
                    best_start_index = start_index
            except Exception:
                continue
        if best_trn_size > 0:
            result.training_size_ = best_trn_size
            result.training_start_index_ = best_start_index
        else:
            print("*** WARNING: No valid cointegration window found.")
        # print(
        #     f"*** DEBUG *** end_index={self.end_index_}, best_trn_size={best_trn_size}, trace_stat={best_stat}"
        # )
        return result
--- a/lib/pt_strategy/models.py
+++ b/lib/pt_strategy/models.py
@ -1,104 +0,0 @@
 from __future__ import annotations
 from typing import Optional
 import pandas as pd
 import statsmodels.api as sm
 from pairs_trading.lib.pt_strategy.pt_model import PairsTradingModel, Prediction
 from pairs_trading.lib.pt_strategy.trading_pair import TradingPair
 class OLSModel(PairsTradingModel):
    model_: Optional[sm.regression.linear_model.RegressionResultsWrapper]
    pair_predict_result_: Optional[pd.DataFrame]
    zscore_df_: Optional[pd.DataFrame]
    def predict(self, pair: TradingPair) -> Prediction:
        self.training_df_ = pair.market_data_.copy()
        zscore_df = self._fit_zscore(pair=pair)
        assert zscore_df is not None
        # zscore is both disequilibrium and scaled_disequilibrium
        self.training_df_["dis-equilibrium"] = zscore_df[0]
        self.training_df_["scaled_dis-equilibrium"] = zscore_df[0]
        assert zscore_df is not None
        return Prediction(
            tstamp=pair.market_data_.iloc[-1]["tstamp"],
            disequilibrium=self.training_df_["dis-equilibrium"].iloc[-1],
            scaled_disequilibrium=self.training_df_["scaled_dis-equilibrium"].iloc[-1],
        )
    def _fit_zscore(self, pair: TradingPair) -> pd.DataFrame:
        assert self.training_df_ is not None
        symbol_a_px_series = self.training_df_[pair.colnames()].iloc[:, 0]
        symbol_b_px_series = self.training_df_[pair.colnames()].iloc[:, 1]
        symbol_a_px_series, symbol_b_px_series = symbol_a_px_series.align(
            symbol_b_px_series, axis=0
        )
        X = sm.add_constant(symbol_b_px_series)
        self.model_ = sm.OLS(symbol_a_px_series, X).fit()
        assert self.model_ is not None
        # alternate way would be to use models residuals (will give identical results)
        # alpha, beta = self.model_.params
        # spread = symbol_a_px_series - (alpha + beta * symbol_b_px_series)
        spread = self.model_.resid
        return pd.DataFrame((spread - spread.mean()) / spread.std())
 class VECMModel(PairsTradingModel):
    def predict(self, pair: TradingPair) -> Prediction:
        self.training_df_ = pair.market_data_.copy()
        assert self.training_df_ is not None
        vecm_fit = self._fit_VECM(pair=pair)
        assert vecm_fit is not None
        predicted_prices = vecm_fit.predict(steps=1)
        # Convert prediction to a DataFrame for readability
        predicted_df = pd.DataFrame(
            predicted_prices, columns=pd.Index(pair.colnames()), dtype=float
        )
        disequilibrium = (predicted_df[pair.colnames()] @ vecm_fit.beta)[0][0]
        scaled_disequilibrium = (disequilibrium - self.training_mu_) / self.training_std_
        return Prediction(
            tstamp=pair.market_data_.iloc[-1]["tstamp"],
            disequilibrium=disequilibrium,
            scaled_disequilibrium=scaled_disequilibrium,
        )
    def _fit_VECM(self, pair: TradingPair) -> VECMResults: # type: ignore
        from statsmodels.tsa.vector_ar.vecm import VECM, VECMResults
        vecm_df = self.training_df_[pair.colnames()].reset_index(drop=True)
        vecm_model = VECM(vecm_df, coint_rank=1)
        vecm_fit = vecm_model.fit()
        assert vecm_fit is not None
        # Check if the model converged properly
        if not hasattr(vecm_fit, "beta") or vecm_fit.beta is None:
            print(f"{self}: VECM model failed to converge properly")
        diseq_series = self.training_df_[pair.colnames()] @ vecm_fit.beta
        # print(diseq_series.shape)
        self.training_mu_ = float(diseq_series[0].mean())
        self.training_std_ = float(diseq_series[0].std())
        self.training_df_["dis-equilibrium"] = (
            self.training_df_[pair.colnames()] @ vecm_fit.beta
        )
        # Normalize the dis-equilibrium
        self.training_df_["scaled_dis-equilibrium"] = (
            diseq_series - self.training_mu_
        ) / self.training_std_
        return vecm_fit
--- a/lib/pt_strategy/prediction.py
+++ b/lib/pt_strategy/prediction.py
@ -1,28 +0,0 @@
 from __future__ import annotations
 from typing import Any, Dict
 import pandas as pd
 class Prediction:
    tstamp_: pd.Timestamp
    disequilibrium_: float
    scaled_disequilibrium_: float
    def __init__(self, tstamp: pd.Timestamp, disequilibrium: float, scaled_disequilibrium: float):
        self.tstamp_ = tstamp
        self.disequilibrium_ = disequilibrium
        self.scaled_disequilibrium_ = scaled_disequilibrium
    def to_dict(self) -> Dict[str, Any]:
        return {
            "tstamp": self.tstamp_,
            "disequilibrium": self.disequilibrium_,
            "signed_scaled_disequilibrium": self.scaled_disequilibrium_,
            "scaled_disequilibrium": abs(self.scaled_disequilibrium_),
            # "pair": self.pair_,
        }
    def to_df(self) -> pd.DataFrame:
        return pd.DataFrame([self.to_dict()])
--- a/lib/pt_strategy/pt_market_data.py
+++ b/lib/pt_strategy/pt_market_data.py
@ -1,223 +0,0 @@
 from __future__ import annotations
 from abc import ABC, abstractmethod
 from typing import Any, Dict, List, Optional
 import pandas as pd
 # ---
 from cvttpy_tools.base import NamedObject
 from cvttpy_tools.config import Config
 from cvttpy_tools.settings.cvtt_types import JsonDictT
 # ---
 from cvttpy_trading.trading.mkt_data.md_summary import MdTradesAggregate
 from cvttpy_trading.trading.instrument import ExchangeInstrument
 # ---
 from pairs_trading.lib.tools.data_loader import load_market_data
 class PtMarketData(NamedObject, ABC):
    config_: Config
    origin_mkt_data_df_: pd.DataFrame
    market_data_df_: pd.DataFrame
    stat_model_price_: str
    instruments_: List[ExchangeInstrument]
    symbol_a_: str
    symbol_b_: str
    def __init__(self, config: Config, instruments: List[ExchangeInstrument]):
        self.config_ = config
        self.origin_mkt_data_df_ = pd.DataFrame()
        self.market_data_df_ = pd.DataFrame()
        self.stat_model_price_ = self.config_.get_value("model/stat_model_price")
        self.instruments_ = instruments
        assert len(self.instruments_) > 0, "No instruments found in config"
        self.symbol_a_ = self.instruments_[0].instrument_id().split("-", 1)[1]
        self.symbol_b_ = self.instruments_[1].instrument_id().split("-", 1)[1]
    @abstractmethod
    def md_columns(self) -> List[str]: ...
    @abstractmethod
    def rename_columns(self, symbol_df: pd.DataFrame) -> pd.DataFrame: ...
    @abstractmethod
    def tranform_df_target_colnames(self) -> List[str]: ...
    def set_market_data(self) -> None:
        self.market_data_df_ = pd.DataFrame(
            self._transform_dataframe(self.origin_mkt_data_df_)[
                ["tstamp"] + self.tranform_df_target_colnames()
            ]
        )
        self.market_data_df_ = self.market_data_df_.dropna().reset_index(drop=True)
        self.market_data_df_["tstamp"] = pd.to_datetime(self.market_data_df_["tstamp"])
        self.market_data_df_ = self.market_data_df_.sort_values("tstamp")
    def colnames(self) -> List[str]:
        return [
            f"{self.stat_model_price_}_{self.symbol_a_}",
            f"{self.stat_model_price_}_{self.symbol_b_}",
        ]
    def _transform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
        df_selected: pd.DataFrame = pd.DataFrame(df[self.md_columns()])
        result_df = (
            pd.DataFrame(df_selected["tstamp"]).drop_duplicates().reset_index(drop=True)
        )
        # For each unique symbol, add a corresponding stat_model_price column
        symbols = df_selected["symbol"].unique()
        for symbol in symbols:
            # Filter rows for this symbol
            df_symbol = df_selected[df_selected["symbol"] == symbol].reset_index(
                drop=True
            )
            # Create column name like "close-COIN"
            temp_df: pd.DataFrame = self.rename_columns(df_symbol)
            # Join with our result dataframe
            result_df = pd.merge(result_df, temp_df, on="tstamp", how="left")
            result_df = result_df.reset_index(
                drop=True
            )  # do not dropna() since irrelevant symbol would affect dataset
        return result_df.dropna()
 class ResearchMarketData(PtMarketData):
    current_index_: int
    is_execution_price_: bool
    def __init__(self, config: Config, instruments: List[ExchangeInstrument]):
        super().__init__(config, instruments)
        self.current_index_ = 0
        self.is_execution_price_ = self.config_.key_exists("execution_price")
        if self.is_execution_price_:
            self.execution_price_column_ = self.config_.get_value("execution_price")["column"]
            self.execution_price_shift_ = self.config_.get_value("execution_price")["shift"]
        else:
            self.execution_price_column_ = None
            self.execution_price_shift_ = 0
    def has_next(self) -> bool:
        return self.current_index_ < len(self.market_data_df_)
    def get_next(self) -> pd.Series:
        result = self.market_data_df_.iloc[self.current_index_]
        self.current_index_ += 1
        return result
    def load(self) -> None:
        datafiles: List[str] = self.config_.get_value("datafiles", [])
        assert len(datafiles) > 0, "No datafiles found in config"
        extra_minutes: int = self.execution_price_shift_
        for datafile in datafiles:
            md_df = load_market_data(
                datafile=datafile,
                instruments=self.instruments_,
                db_table_name=self.config_.get_value("market_data_loading")[
                    self.instruments_[0].user_data_.get("instrument_type", "?instrument_type?")
                ]["db_table_name"],
                trading_hours=self.config_.get_value("trading_hours"),
                extra_minutes=extra_minutes,
            )
            self.origin_mkt_data_df_ = pd.concat([self.origin_mkt_data_df_, md_df])
        self.origin_mkt_data_df_ = self.origin_mkt_data_df_.sort_values(by="tstamp")
        self.origin_mkt_data_df_ = self.origin_mkt_data_df_.dropna().reset_index(
            drop=True
        )
        self.set_market_data()
        self._set_execution_price_data()
    def _set_execution_price_data(self) -> None:
        if not self.is_execution_price_:
            return
        if not self.config_.key_exists("execution_price"):
            self.market_data_df_[f"exec_price_{self.symbol_a_}"] = self.market_data_df_[
                f"{self.stat_model_price_}_{self.symbol_a_}"
            ]
            self.market_data_df_[f"exec_price_{self.symbol_b_}"] = self.market_data_df_[
                f"{self.stat_model_price_}_{self.symbol_b_}"
            ]
            return
        execution_price_column = self.config_.get_value("execution_price")["column"]
        execution_price_shift = self.config_.get_value("execution_price")["shift"]
        self.market_data_df_[f"exec_price_{self.symbol_a_}"] = self.market_data_df_[
            f"{execution_price_column}_{self.symbol_a_}"
        ].shift(-execution_price_shift)
        self.market_data_df_[f"exec_price_{self.symbol_b_}"] = self.market_data_df_[
            f"{execution_price_column}_{self.symbol_b_}"
        ].shift(-execution_price_shift)
        self.market_data_df_ = self.market_data_df_.dropna().reset_index(drop=True)
    def md_columns(self) -> List[str]: 
        # @abstractmethod
        if self.is_execution_price_:
            return ["tstamp", "symbol", self.stat_model_price_, self.execution_price_column_]
        else:
            return ["tstamp", "symbol", self.stat_model_price_]
    def rename_columns(self, selected_symbol_df: pd.DataFrame) -> pd.DataFrame:  
        # @abstractmethod
        symbol = selected_symbol_df.iloc[0]["symbol"]
        new_price_column = f"{self.stat_model_price_}_{symbol}"
        if self.is_execution_price_:
            new_execution_price_column = f"{self.execution_price_column_}_{symbol}"
            # Create temporary dataframe with timestamp and price
            temp_df = pd.DataFrame(
                {
                    "tstamp": selected_symbol_df["tstamp"],
                    new_price_column: selected_symbol_df[self.stat_model_price_],
                    new_execution_price_column: selected_symbol_df[self.execution_price_column_],
                }
            )
        else:
            temp_df = pd.DataFrame(
                {
                    "tstamp": selected_symbol_df["tstamp"],
                    new_price_column: selected_symbol_df[self.stat_model_price_],
                }
            )
        return temp_df
    def tranform_df_target_colnames(self):
        # @abstractmethod
        return self.colnames() + self.orig_exec_prices_colnames()
    def orig_exec_prices_colnames(self) -> List[str]:
        return [
            f"{self.execution_price_column_}_{self.symbol_a_}",
            f"{self.execution_price_column_}_{self.symbol_b_}",
        ] if self.is_execution_price_ else []
 class LiveMarketData(PtMarketData):
    def __init__(self, config: Config, instruments: List[ExchangeInstrument]):
        super().__init__(config, instruments)
    def md_columns(self) -> List[str]: 
        # @abstractmethod
        return ["tstamp", "symbol", self.stat_model_price_]
    def rename_columns(self, selected_symbol_df: pd.DataFrame) -> pd.DataFrame:  
        # @abstractmethod
        symbol = selected_symbol_df.iloc[0]["symbol"]
        new_price_column = f"{self.stat_model_price_}_{symbol}"
        temp_df = pd.DataFrame(
            {
                "tstamp": selected_symbol_df["tstamp"],
                new_price_column: selected_symbol_df[self.stat_model_price_],
            }
        )
        return temp_df
    def tranform_df_target_colnames(self):
        # @abstractmethod
        return self.colnames()
--- a/lib/pt_strategy/pt_model.py
+++ b/lib/pt_strategy/pt_model.py
@ -1,30 +0,0 @@
 from __future__ import annotations
 from abc import ABC, abstractmethod
 from typing import Any, Dict, cast
 # ---
 from cvttpy_tools.config import Config
 # ---
 from pairs_trading.lib.pt_strategy.prediction import Prediction
 from pairs_trading.lib.pt_strategy.trading_pair import TradingPair
 class PairsTradingModel(ABC):
    @abstractmethod
    def predict(self, pair: TradingPair) -> Prediction: # type: ignore[assignment]
        ...
    @staticmethod
    def create(config: Config) -> PairsTradingModel:
        import importlib
        model_class_name = config.get_value("model/model_class", None)
        assert model_class_name is not None
        module_name, class_name = model_class_name.rsplit(".", 1)
        module = importlib.import_module(module_name)
        model_object = getattr(module, class_name)()
        return cast(PairsTradingModel, model_object)
--- a/lib/pt_strategy/research_strategy.py
+++ b/lib/pt_strategy/research_strategy.py
@ -1,305 +0,0 @@
 from __future__ import annotations
 from typing import Any, Dict, List, Optional, Tuple
 import pandas as pd
 # ---
 from cvttpy_tools.config import Config
 # ---
 from cvttpy_trading.trading.instrument import ExchangeInstrument
 # ---
 from pairs_trading.lib.pt_strategy.model_data_policy import ModelDataPolicy
 from pairs_trading.lib.pt_strategy.pt_market_data import ResearchMarketData
 from pairs_trading.lib.pt_strategy.pt_model import Prediction
 from pairs_trading.lib.pt_strategy.trading_pair import PairState, TradingPair, ResearchTradingPair
 class PtResearchStrategy:
    config_: Config
    trading_pair_: ResearchTradingPair
    model_data_policy_: ModelDataPolicy
    pt_mkt_data_: ResearchMarketData 
    trades_: List[pd.DataFrame]
    predictions_df_: pd.DataFrame
    def __init__(
        self,
        config: Config,
        instruments: List[ExchangeInstrument]
    ):
        from pairs_trading.lib.pt_strategy.model_data_policy import ModelDataPolicy
        from pairs_trading.lib.pt_strategy.trading_pair import TradingPair
        self.config_ = config
        self.trades_ = []
        self.trading_pair_ = ResearchTradingPair(config=config, instruments=instruments)
        self.predictions_df_ = pd.DataFrame()
        import copy
        # modified config must be passed to PtMarketData
        config_copy = copy.deepcopy(config)
        config_copy.set_value("instruments", instruments)
        self.pt_mkt_data_ = ResearchMarketData(config=config_copy, instruments=instruments)
        self.pt_mkt_data_.load()
        self.model_data_policy_ = ModelDataPolicy.create(
            config_copy, mkt_data=self.pt_mkt_data_.market_data_df_, pair=self.trading_pair_
        )
    def outstanding_positions(self) -> List[Dict[str, Any]]:
        return list(self.trading_pair_.user_data_.get("outstanding_positions", []))
    def run(self) -> None:
        training_minutes = self.config_.get_value("training_minutes", 120)
        market_data_series: pd.Series
        market_data_df = pd.DataFrame()
        idx = 0
        while self.pt_mkt_data_.has_next():
            market_data_series = self.pt_mkt_data_.get_next()
            new_row = pd.DataFrame([market_data_series])
            market_data_df = pd.concat([market_data_df, new_row], ignore_index=True)
            if idx >= training_minutes:
                break
            idx += 1
        assert idx >= training_minutes, "Not enough training data"
        while self.pt_mkt_data_.has_next():
            market_data_series = self.pt_mkt_data_.get_next()
            new_row = pd.DataFrame([market_data_series])
            market_data_df = pd.concat([market_data_df, new_row], ignore_index=True)
            prediction = self.trading_pair_.run(
                market_data_df, self.model_data_policy_.advance(mkt_data_df=market_data_df)
            )
            self.predictions_df_ = pd.concat(
                [self.predictions_df_, prediction.to_df()], ignore_index=True
            )
            assert prediction is not None
            trades = self._create_trades(
                prediction=prediction, last_row=market_data_df.iloc[-1]
            )
            if trades is not None:
                self.trades_.append(trades)
        trades = self._handle_outstanding_positions()
        if trades is not None:
            self.trades_.append(trades)
    def _create_trades(
        self, prediction: Prediction, last_row: pd.Series
    ) -> Optional[pd.DataFrame]:
        pair = self.trading_pair_
        trades = None
        open_threshold = self.config_.get_value("model/disequilibrium/open_trshld")
        close_threshold = self.config_.get_value("model/disequilibrium/close_trshld")
        scaled_disequilibrium = prediction.scaled_disequilibrium_
        abs_scaled_disequilibrium = abs(scaled_disequilibrium)
        if pair.user_data_["state"] in [
            PairState.INITIAL,
            PairState.CLOSE,
            PairState.CLOSE_POSITION,
            PairState.CLOSE_STOP_LOSS,
            PairState.CLOSE_STOP_PROFIT,
        ]:
            if abs_scaled_disequilibrium >= open_threshold:
                trades = self._create_open_trades(
                    pair, row=last_row, prediction=prediction
                )
                if trades is not None:
                    trades["status"] = PairState.OPEN.name
                    print(f"OPEN TRADES:\n{trades}")
                    pair.user_data_["state"] = PairState.OPEN
                    pair.on_open_trades(trades)
        elif pair.user_data_["state"] == PairState.OPEN:
            if abs_scaled_disequilibrium <= close_threshold:
                trades = self._create_close_trades(
                    pair, row=last_row, prediction=prediction
                )
                if trades is not None:
                    trades["status"] = PairState.CLOSE.name
                    print(f"CLOSE TRADES:\n{trades}")
                    pair.user_data_["state"] = PairState.CLOSE
                    pair.on_close_trades(trades)
            elif pair.to_stop_close_conditions(predicted_row=last_row):
                trades = self._create_close_trades(pair, row=last_row)
                if trades is not None:
                    trades["status"] = pair.user_data_["stop_close_state"].name
                    print(f"STOP CLOSE TRADES:\n{trades}")
                    pair.user_data_["state"] = pair.user_data_["stop_close_state"]
                    pair.on_close_trades(trades)
        return trades
    def _handle_outstanding_positions(self) -> Optional[pd.DataFrame]:
        trades = None
        pair = self.trading_pair_
        # Outstanding positions
        if pair.user_data_["state"] == PairState.OPEN:
            print(f"{pair}:  *** Position is NOT CLOSED. ***")
            # outstanding positions
            if self.config_.get_value("close_outstanding_positions", False):
                close_position_row = pd.Series(pair.market_data_.iloc[-2])
                # close_position_row["disequilibrium"] = 0.0
                # close_position_row["scaled_disequilibrium"] = 0.0
                # close_position_row["signed_scaled_disequilibrium"] = 0.0
                trades = self._create_close_trades(
                    pair=pair, row=close_position_row, prediction=None
                )
                if trades is not None:
                    trades["status"] = PairState.CLOSE_POSITION.name
                    print(f"CLOSE_POSITION TRADES:\n{trades}")
                    pair.user_data_["state"] = PairState.CLOSE_POSITION
                    pair.on_close_trades(trades)
            else:
                pair.add_outstanding_position(
                    symbol=pair.symbol_a(),
                    open_side=pair.user_data_["open_side_a"],
                    open_px=pair.user_data_["open_px_a"],
                    open_tstamp=pair.user_data_["open_tstamp"],
                    last_mkt_data_row=pair.market_data_.iloc[-1],
                )
                pair.add_outstanding_position(
                    symbol=pair.symbol_b(),
                    open_side=pair.user_data_["open_side_b"],
                    open_px=pair.user_data_["open_px_b"],
                    open_tstamp=pair.user_data_["open_tstamp"],
                    last_mkt_data_row=pair.market_data_.iloc[-1],
                )
        return trades
    def _trades_df(self) -> pd.DataFrame:
        types = {
            "time": "datetime64[ns]",
            "action": "string",
            "symbol": "string",
            "side": "string",
            "price": "float64",
            "disequilibrium": "float64",
            "scaled_disequilibrium": "float64",
            "signed_scaled_disequilibrium": "float64",
            # "pair": "object",
        }
        columns = list(types.keys())
        return pd.DataFrame(columns=columns).astype(types)
    def _create_open_trades(
        self, pair: ResearchTradingPair, row: pd.Series, prediction: Prediction
    ) -> Optional[pd.DataFrame]:
        colname_a, colname_b = pair.exec_prices_colnames()
        tstamp = row["tstamp"]
        diseqlbrm = prediction.disequilibrium_
        scaled_disequilibrium = prediction.scaled_disequilibrium_
        px_a = row[f"{colname_a}"]
        px_b = row[f"{colname_b}"]
        # creating the trades
        df = self._trades_df()
        print(f"OPEN_TRADES: {row["tstamp"]} {scaled_disequilibrium=}")
        if diseqlbrm > 0:
            side_a = "SELL"
            side_b = "BUY"
        else:
            side_a = "BUY"
            side_b = "SELL"
        # save closing sides
        pair.user_data_["open_side_a"] = side_a  # used in oustanding positions
        pair.user_data_["open_side_b"] = side_b
        pair.user_data_["open_px_a"] = px_a
        pair.user_data_["open_px_b"] = px_b
        pair.user_data_["open_tstamp"] = tstamp
        pair.user_data_["close_side_a"] = side_b  # used for closing trades
        pair.user_data_["close_side_b"] = side_a
        # create opening trades
        df.loc[len(df)] = {
            "time": tstamp,
            "symbol": pair.symbol_a(),
            "side": side_a,
            "action": "OPEN",
            "price": px_a,
            "disequilibrium": diseqlbrm,
            "signed_scaled_disequilibrium": scaled_disequilibrium,
            "scaled_disequilibrium": abs(scaled_disequilibrium),
            # "pair": pair,
        }
        df.loc[len(df)] = {
            "time": tstamp,
            "symbol": pair.symbol_b(),
            "side": side_b,
            "action": "OPEN",
            "price": px_b,
            "disequilibrium": diseqlbrm,
            "scaled_disequilibrium": abs(scaled_disequilibrium),
            "signed_scaled_disequilibrium": scaled_disequilibrium,
            # "pair": pair,
        }
        return df
    def _create_close_trades(
        self, pair: ResearchTradingPair, row: pd.Series, prediction: Optional[Prediction] = None
    ) -> Optional[pd.DataFrame]:
        colname_a, colname_b = pair.exec_prices_colnames()
        tstamp = row["tstamp"]
        if prediction is not None:
            diseqlbrm = prediction.disequilibrium_
            signed_scaled_disequilibrium = prediction.scaled_disequilibrium_
            scaled_disequilibrium = abs(prediction.scaled_disequilibrium_)
        else:
            diseqlbrm = 0.0
            signed_scaled_disequilibrium = 0.0
            scaled_disequilibrium = 0.0
        px_a = row[f"{colname_a}"]
        px_b = row[f"{colname_b}"]
        # creating the trades
        df = self._trades_df()
        # create opening trades
        df.loc[len(df)] = {
            "time": tstamp,
            "symbol": pair.symbol_a(),
            "side": pair.user_data_["close_side_a"],
            "action": "CLOSE",
            "price": px_a,
            "disequilibrium": diseqlbrm,
            "scaled_disequilibrium": scaled_disequilibrium,
            "signed_scaled_disequilibrium": signed_scaled_disequilibrium,
            # "pair": pair,
        }
        df.loc[len(df)] = {
            "time": tstamp,
            "symbol": pair.symbol_b(),
            "side": pair.user_data_["close_side_b"],
            "action": "CLOSE",
            "price": px_b,
            "disequilibrium": diseqlbrm,
            "scaled_disequilibrium": scaled_disequilibrium,
            "signed_scaled_disequilibrium": signed_scaled_disequilibrium,
            # "pair": pair,
        }
        del pair.user_data_["close_side_a"]
        del pair.user_data_["close_side_b"]
        del pair.user_data_["open_tstamp"]
        del pair.user_data_["open_px_a"]
        del pair.user_data_["open_px_b"]
        del pair.user_data_["open_side_a"]
        del pair.user_data_["open_side_b"]
        return df
    def day_trades(self) -> pd.DataFrame:
        return pd.concat(self.trades_, ignore_index=True)
--- a/lib/pt_strategy/results.py
+++ b/lib/pt_strategy/results.py
@ -1,527 +0,0 @@
 import os
 import sqlite3
 from datetime import date, datetime
 from typing import Any, Dict, List, Optional, Tuple
 import pandas as pd
 # ---
 from cvttpy_tools.config import Config
 # ---
 from cvttpy_trading.trading.instrument import ExchangeInstrument
 # ---
 from pairs_trading.lib.pt_strategy.trading_pair import TradingPair
 # Recommended replacement adapters and converters for Python 3.12+
 # From: https://docs.python.org/3/library/sqlite3.html#sqlite3-adapter-converter-recipes
 def adapt_date_iso(val: date) -> str:
    """Adapt datetime.date to ISO 8601 date."""
    return val.isoformat()
 def adapt_datetime_iso(val: datetime) -> str:
    """Adapt datetime.datetime to timezone-naive ISO 8601 date."""
    return val.isoformat()
 def convert_date(val: bytes) -> date:
    """Convert ISO 8601 date to datetime.date object."""
    return datetime.fromisoformat(val.decode()).date()
 def convert_datetime(val: bytes) -> datetime:
    """Convert ISO 8601 datetime to datetime.datetime object."""
    return datetime.fromisoformat(val.decode())
 # Register the adapters and converters
 sqlite3.register_adapter(date, adapt_date_iso)
 sqlite3.register_adapter(datetime, adapt_datetime_iso)
 sqlite3.register_converter("date", convert_date)
 sqlite3.register_converter("datetime", convert_datetime)
 def create_result_database(db_path: str) -> None:
    """
    Create the SQLite database and required tables if they don't exist.
    """
    try:
        # Create directory if it doesn't exist
        db_dir = os.path.dirname(db_path)
        if db_dir and not os.path.exists(db_dir):
            os.makedirs(db_dir, exist_ok=True)
            print(f"Created directory: {db_dir}")
        conn = sqlite3.connect(db_path)
        cursor = conn.cursor()
        # Create the pt_bt_results table for completed trades
        cursor.execute(
            """
            CREATE TABLE IF NOT EXISTS pt_bt_results (
                date DATE,
                pair TEXT,
                symbol TEXT,
                open_time DATETIME,
                open_side TEXT,
                open_price REAL,
                open_quantity INTEGER,
                open_disequilibrium REAL,
                close_time DATETIME,
                close_side TEXT,
                close_price REAL,
                close_quantity INTEGER,
                close_disequilibrium REAL,
                symbol_return REAL,
                pair_return REAL,
                close_condition TEXT
            )
        """
        )
        cursor.execute("DELETE FROM pt_bt_results;")
        # Create the outstanding_positions table for open positions
        cursor.execute(
            """
            CREATE TABLE IF NOT EXISTS outstanding_positions (
                date DATE,
                pair TEXT,
                symbol TEXT,
                position_quantity REAL,
                last_price REAL,
                unrealized_return REAL,
                open_price REAL,
                open_side TEXT
            )
        """
        )
        cursor.execute("DELETE FROM outstanding_positions;")
        # Create the config table for storing configuration JSON for reference
        cursor.execute(
            """
            CREATE TABLE IF NOT EXISTS config (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                run_timestamp DATETIME,
                config_file_path TEXT,
                config_json TEXT,
                datafiles TEXT,
                instruments TEXT
            )
        """
        )
        cursor.execute("DELETE FROM config;")
        conn.commit()
        conn.close()
    except Exception as e:
        print(f"Error creating result database: {str(e)}")
        raise
 def store_config_in_database(
    db_path: str,
    config_file_path: str,
    config: Config,
    datafiles: List[Tuple[str, str]],
    instruments: List[ExchangeInstrument],
 ) -> None:
    """
    Store configuration information in the database for reference.
    """
    import json
    if db_path.upper() == "NONE":
        return
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.cursor()
        # Convert config to JSON string
        config_json = json.dumps(config.data(), indent=2, default=str)
        # Convert lists to comma-separated strings for storage
        datafiles_str = ", ".join([f"{datafile}" for _, datafile in datafiles])
        instruments_str = ", ".join(
            [
                inst.details_short()
                for inst in instruments
            ]
        )
        # Insert configuration record
        cursor.execute(
            """
            INSERT INTO config (
                run_timestamp, config_file_path, config_json, datafiles, instruments
            ) VALUES (?, ?, ?, ?, ?)
        """,
            (
                datetime.now(),
                config_file_path,
                config_json,
                datafiles_str,
                instruments_str,
            ),
        )
        conn.commit()
        conn.close()
        print(f"Configuration stored in database")
    except Exception as e:
        print(f"Error storing configuration in database: {str(e)}")
        import traceback
        traceback.print_exc()
 def convert_timestamp(timestamp: Any) -> Optional[datetime]:
    """Convert pandas Timestamp to Python datetime object for SQLite compatibility."""
    if timestamp is None:
        return None
    if isinstance(timestamp, pd.Timestamp):
        return timestamp.to_pydatetime()
    elif isinstance(timestamp, datetime):
        return timestamp
    elif isinstance(timestamp, date):
        return datetime.combine(timestamp, datetime.min.time())
    elif isinstance(timestamp, str):
        return datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S")
    elif isinstance(timestamp, int):
        return datetime.fromtimestamp(timestamp)
    else:
        raise ValueError(f"Unsupported timestamp type: {type(timestamp)}")
 DayT = str
 TradeT = Dict[str, Any]
 OutstandingPositionT = Dict[str, Any]
 class PairResearchResult:
    """
    Class to handle pair research results for a single pair across multiple days.
    Simplified version of BacktestResult focused on single pair analysis.
    """
    trades_: Dict[DayT, pd.DataFrame]
    outstanding_positions_: Dict[DayT, List[OutstandingPositionT]]
    symbol_roundtrip_trades_: Dict[str, List[Dict[str, Any]]]
    config_: Config
    def __init__(self, config: Config) -> None:
        self.config_ = config
        self.trades_ = {}
        self.outstanding_positions_ = {}
        self.total_realized_pnl = 0.0
        self.symbol_roundtrip_trades_ = {}
    def add_day_results(self, day: DayT, trades: pd.DataFrame, outstanding_positions: List[Dict[str, Any]]) -> None:
        assert isinstance(trades, pd.DataFrame)
        self.trades_[day] = trades
        self.outstanding_positions_[day] = outstanding_positions
    def outstanding_positions(self) -> List[OutstandingPositionT]:
        """Get all outstanding positions across all days as a flat list."""
        res: List[Dict[str, Any]] = []
        for day in self.outstanding_positions_.keys():
            res.extend(self.outstanding_positions_[day])
        return res
    def calculate_returns(self) -> None:
        """Calculate and store total returns for the single pair across all days."""
        self.extract_roundtrip_trades()
        self.total_realized_pnl = 0.0
        for day, day_trades in self.symbol_roundtrip_trades_.items():
            for trade in day_trades:
                self.total_realized_pnl += trade['symbol_return']
    def extract_roundtrip_trades(self) -> None: 
        """
        Extract round-trip trades by day, grouping open/close pairs for each symbol.
        Returns a dictionary with day as key and list of completed round-trip trades.
        """
        def _symbol_return(trade1_side: str, trade1_px: float, trade2_side: str, trade2_px: float) -> float:
            if trade1_side == "BUY" and trade2_side == "SELL":
                return (trade2_px - trade1_px) / trade1_px * 100
            elif trade1_side == "SELL" and trade2_side == "BUY":
                return (trade1_px - trade2_px) / trade1_px * 100
            else:
                return 0
        # Process each day separately
        for day, day_trades in self.trades_.items():
            # Sort trades by timestamp for the day
            sorted_trades = day_trades #sorted(day_trades, key=lambda x: x["timestamp"] if x["timestamp"] else pd.Timestamp.min)
            day_roundtrips = []
            # Process trades in groups of 4 (open A, open B, close A, close B)
            for idx in range(0, len(sorted_trades), 4):
                if idx + 3 >= len(sorted_trades):
                    break
                trade_a_1 = sorted_trades.iloc[idx]      # Open A
                trade_b_1 = sorted_trades.iloc[idx + 1]  # Open B  
                trade_a_2 = sorted_trades.iloc[idx + 2]  # Close A
                trade_b_2 = sorted_trades.iloc[idx + 3]  # Close B
                # Validate trade sequence
                if not (trade_a_1["action"] == "OPEN" and trade_a_2["action"] == "CLOSE"):
                    continue
                if not (trade_b_1["action"] == "OPEN" and trade_b_2["action"] == "CLOSE"):
                    continue
                # Calculate individual symbol returns
                symbol_a_return = _symbol_return(
                    trade_a_1["side"], trade_a_1["price"], 
                    trade_a_2["side"], trade_a_2["price"]
                )
                symbol_b_return = _symbol_return(
                    trade_b_1["side"], trade_b_1["price"], 
                    trade_b_2["side"], trade_b_2["price"]
                )
                pair_return = symbol_a_return + symbol_b_return
                # Create round-trip records for both symbols
                funding_per_position = self.config_.get_value("funding_per_pair", 10000) / 2
                # Symbol A round-trip
                day_roundtrips.append({
                    "symbol": trade_a_1["symbol"],
                    "open_side": trade_a_1["side"],
                    "open_price": trade_a_1["price"],
                    "open_time": trade_a_1["time"],
                    "close_side": trade_a_2["side"],
                    "close_price": trade_a_2["price"],
                    "close_time": trade_a_2["time"],
                    "symbol_return": symbol_a_return,
                    "pair_return": pair_return,
                    "shares": funding_per_position / trade_a_1["price"],
                    "close_condition": trade_a_2.get("status", "UNKNOWN"),
                    "open_disequilibrium": trade_a_1.get("disequilibrium"),
                    "close_disequilibrium": trade_a_2.get("disequilibrium"),
                })
                # Symbol B round-trip
                day_roundtrips.append({
                    "symbol": trade_b_1["symbol"],
                    "open_side": trade_b_1["side"],
                    "open_price": trade_b_1["price"],
                    "open_time": trade_b_1["time"],
                    "close_side": trade_b_2["side"],
                    "close_price": trade_b_2["price"],
                    "close_time": trade_b_2["time"],
                    "symbol_return": symbol_b_return,
                    "pair_return": pair_return,
                    "shares": funding_per_position / trade_b_1["price"],
                    "close_condition": trade_b_2.get("status", "UNKNOWN"),
                    "open_disequilibrium": trade_b_1.get("disequilibrium"),
                    "close_disequilibrium": trade_b_2.get("disequilibrium"),
                })
            if day_roundtrips:
                self.symbol_roundtrip_trades_[day] = day_roundtrips
    def print_returns_by_day(self) -> None:
        """
        Print detailed return information for each day, grouped by day.
        Shows individual symbol round-trips and daily totals.
        """
        print("\n====== PAIR RESEARCH RETURNS BY DAY ======")
        total_return_all_days = 0.0
        for day, day_trades in sorted(self.symbol_roundtrip_trades_.items()):
            print(f"\n--- {day} ---")
            day_total_return = 0.0
            pair_returns = []
            # Group trades by pair (every 2 trades form a pair)
            for idx in range(0, len(day_trades), 2):
                if idx + 1 < len(day_trades):
                    trade_a = day_trades[idx]
                    trade_b = day_trades[idx + 1]
                    # Print individual symbol results
                    print(f"  {trade_a['open_time'].time()}-{trade_a['close_time'].time()}")
                    print(f"    {trade_a['symbol']}: {trade_a['open_side']} @ ${trade_a['open_price']:.2f} → "
                          f"{trade_a['close_side']} @ ${trade_a['close_price']:.2f} | "
                          f"Return: {trade_a['symbol_return']:+.2f}% | Shares: {trade_a['shares']:.2f}")
                    print(f"    {trade_b['symbol']}: {trade_b['open_side']} @ ${trade_b['open_price']:.2f} → "
                          f"{trade_b['close_side']} @ ${trade_b['close_price']:.2f} | "
                          f"Return: {trade_b['symbol_return']:+.2f}% | Shares: {trade_b['shares']:.2f}")
                    # Show disequilibrium info if available
                    if trade_a.get('open_disequilibrium') is not None:
                        print(f"    Disequilibrium: Open: {trade_a['open_disequilibrium']:.4f}, "
                              f"Close: {trade_a['close_disequilibrium']:.4f}")
                    pair_return = trade_a['pair_return']
                    print(f"    Pair Return: {pair_return:+.2f}% | Close Condition: {trade_a['close_condition']}")
                    print()
                    pair_returns.append(pair_return)
                    day_total_return += pair_return
            print(f"  Day Total Return: {day_total_return:+.2f}% ({len(pair_returns)} pairs)")
            total_return_all_days += day_total_return
        print(f"\n====== TOTAL RETURN ACROSS ALL DAYS ======")
        print(f"Total Return: {total_return_all_days:+.2f}%")
        print(f"Total Days: {len(self.symbol_roundtrip_trades_)}")
        if len(self.symbol_roundtrip_trades_) > 0:
            print(f"Average Daily Return: {total_return_all_days / len(self.symbol_roundtrip_trades_):+.2f}%")
    def get_return_summary(self) -> Dict[str, Any]:
        """
        Get a summary of returns across all days.
        Returns a dictionary with key metrics.
        """
        if len(self.symbol_roundtrip_trades_) == 0:
            return {
                "total_return": 0.0,
                "total_days": 0,
                "total_pairs": 0,
                "average_daily_return": 0.0,
                "best_day": None,
                "worst_day": None,
                "daily_returns": {}
            }
        daily_returns = {}
        total_return = 0.0
        total_pairs = 0
        for day, day_trades in self.symbol_roundtrip_trades_.items():
            day_return = 0.0
            day_pairs = len(day_trades) // 2  # Each pair has 2 symbol trades
            for trade in day_trades:
                day_return += trade['symbol_return']
            daily_returns[day] = {
                "return": day_return,
                "pairs": day_pairs
            }
            total_return += day_return
            total_pairs += day_pairs
        best_day = max(daily_returns.items(), key=lambda x: x[1]["return"]) if daily_returns else None
        worst_day = min(daily_returns.items(), key=lambda x: x[1]["return"]) if daily_returns else None
        return {
            "total_return": total_return,
            "total_days": len(self.symbol_roundtrip_trades_),
            "total_pairs": total_pairs,
            "average_daily_return": total_return / len(self.symbol_roundtrip_trades_) if self.symbol_roundtrip_trades_ else 0.0,
            "best_day": best_day,
            "worst_day": worst_day,
            "daily_returns": daily_returns
        }
    def print_grand_totals(self) -> None:
        """Print grand totals for the single pair analysis."""
        summary = self.get_return_summary()
        print(f"\n====== PAIR RESEARCH GRAND TOTALS ======")
        print('---')
        print(f"Total Return: {summary['total_return']:+.2f}%")
        print('---')
        print(f"Total Days Traded: {summary['total_days']}")
        print(f"Total Open-Close Actions: {summary['total_pairs']}")
        print(f"Total Trades:  4 * {summary['total_pairs']} = {4 * summary['total_pairs']}")
        if summary['total_days'] > 0:
            print(f"Average Daily Return: {summary['average_daily_return']:+.2f}%")
        if summary['best_day']:
            best_day, best_data = summary['best_day']
            print(f"Best Day: {best_day} ({best_data['return']:+.2f}%)")
        if summary['worst_day']:
            worst_day, worst_data = summary['worst_day']
            print(f"Worst Day: {worst_day} ({worst_data['return']:+.2f}%)")
        # Update the total_realized_pnl for backward compatibility
        self.total_realized_pnl = summary['total_return']
    def analyze_pair_performance(self) -> None:
        """
        Main method to perform comprehensive pair research analysis.
        Extracts round-trip trades, calculates returns, groups by day, and prints results.
        """
        print(f"\n{'='*60}")
        print(f"PAIR RESEARCH PERFORMANCE ANALYSIS")
        print(f"{'='*60}")
        self.calculate_returns()
        self.print_returns_by_day()
        self.print_outstanding_positions()
        self._print_additional_metrics()
        self.print_grand_totals()
    def _print_additional_metrics(self) -> None:
        """Print additional performance metrics."""
        summary = self.get_return_summary()
        if summary['total_days'] == 0:
            return
        print(f"\n====== ADDITIONAL METRICS ======")
        # Calculate win rate
        winning_days = sum(1 for day_data in summary['daily_returns'].values() if day_data['return'] > 0)
        win_rate = (winning_days / summary['total_days']) * 100
        print(f"Winning Days: {winning_days}/{summary['total_days']} ({win_rate:.1f}%)")
        # Calculate average trade return
        if summary['total_pairs'] > 0:
            # Each pair has 2 symbol trades, so total symbol trades = total_pairs * 2
            total_symbol_trades = summary['total_pairs'] * 2
            avg_symbol_return = summary['total_return'] / total_symbol_trades
            print(f"Average Symbol Return: {avg_symbol_return:+.2f}%")
            avg_pair_return = summary['total_return'] / summary['total_pairs'] / 2  # Divide by 2 since we sum both symbols
            print(f"Average Pair Return: {avg_pair_return:+.2f}%")
        # Show daily return distribution
        daily_returns_list = [data['return'] for data in summary['daily_returns'].values()]
        if daily_returns_list:
            print(f"Daily Return Range: {min(daily_returns_list):+.2f}% to {max(daily_returns_list):+.2f}%")
    def print_outstanding_positions(self) -> None:
        """Print outstanding positions for the single pair."""
        all_positions: List[OutstandingPositionT] = self.outstanding_positions()
        if not all_positions:
            print("\n====== NO OUTSTANDING POSITIONS ======")
            return
        print(f"\n====== OUTSTANDING POSITIONS ======")
        print(f"{'Symbol':<10} {'Side':<4} {'Shares':<10} {'Open $':<8} {'Current $':<10} {'Value $':<12}")
        print("-" * 70)
        total_value = 0.0
        for pos in all_positions:
            current_value = pos.get("last_value", 0.0)
            print(f"{pos['symbol']:<10} {pos['open_side']:<4} {pos['shares']:<10.2f} "
                  f"{pos['open_px']:<8.2f} {pos['last_px']:<10.2f} {current_value:<12.2f}")
            total_value += current_value
        print("-" * 70)
        print(f"{'TOTAL VALUE':<60} ${total_value:<12.2f}")
    def get_total_realized_pnl(self) -> float:
        """Get total realized PnL."""
        return self.total_realized_pnl
--- a/lib/pt_strategy/trading_pair.py
+++ b/lib/pt_strategy/trading_pair.py
@ -1,226 +0,0 @@
 from __future__ import annotations
 from abc import ABC, abstractmethod
 from datetime import datetime
 from enum import Enum
 from typing import Any, Dict, List
 import pandas as pd
 # ---
 from cvttpy_tools.base import NamedObject
 from cvttpy_tools.config import Config
 # ---
 from cvttpy_trading.trading.instrument import ExchangeInstrument
 # ---
 from pairs_trading.lib.pt_strategy.model_data_policy import DataWindowParams
 from pairs_trading.lib.pt_strategy.prediction import Prediction
 class PairState(Enum):
    INITIAL = 1
    OPEN = 2
    CLOSE = 3
    CLOSE_POSITION = 4
    CLOSE_STOP_LOSS = 5
    CLOSE_STOP_PROFIT = 6
 class TradingPair(NamedObject, ABC):
    config_: Config
    model_: Any # "PairsTradingModel"
    market_data_: pd.DataFrame
    user_data_: Dict[str, Any]
    stat_model_price_: str
    instruments_: List[ExchangeInstrument]
    def __init__(
        self,
        config: Config,
        instruments: List[ExchangeInstrument],
    ):
        from pairs_trading.lib.pt_strategy.pt_model import PairsTradingModel
        self.config_ = config
        self.model_ = PairsTradingModel.create(config)
        self.user_data_ = {}
        self.instruments_ = instruments
        self.instruments_[0].user_data_["symbol"] = instruments[0].instrument_id().split("-", 1)[1]
        self.instruments_[1].user_data_["symbol"] = instruments[1].instrument_id().split("-", 1)[1]        
        self.stat_model_price_ = config.get_value("model/stat_model_price")
    def run(self, market_data: pd.DataFrame, data_params: DataWindowParams) -> Prediction:  # type: ignore[assignment]
        self.market_data_ = market_data[
            data_params.training_start_index_ : data_params.training_start_index_ + data_params.training_size_
        ]
        return self.model_.predict(pair=self)
    def colnames(self) -> List[str]:
        return [
            f"{self.stat_model_price_}_{self.symbol_a()}",
            f"{self.stat_model_price_}_{self.symbol_b()}",
        ]
    def symbol_a(self) -> str:
        return self.get_instrument_a().user_data_["symbol"]
    def symbol_b(self) -> str:
        return self.get_instrument_b().user_data_["symbol"]
    def get_instrument_a(self) -> ExchangeInstrument:
        return self.instruments_[0]
    def get_instrument_b(self) -> ExchangeInstrument:
        return self.instruments_[1]
    def __repr__(self) -> str:
        return (
            f"{self.__class__.__name__}:"
            f" symbol_a={self.symbol_a()},"
            f" symbol_b={self.symbol_b()},"
            f" model={self.model_.__class__.__name__}"
        )
 class ResearchTradingPair(TradingPair):
    def __init__(
        self,
        config: Config,
        instruments: List[ExchangeInstrument],
    ):
        assert len(instruments) == 2, "Trading pair must have exactly 2 instruments"
        super().__init__(config=config, instruments=instruments)
        self.user_data_ = {
            "state": PairState.INITIAL,
        }
    def is_closed(self) -> bool:
        return self.user_data_["state"] in [
            PairState.CLOSE,
            PairState.CLOSE_POSITION,
            PairState.CLOSE_STOP_LOSS,
            PairState.CLOSE_STOP_PROFIT,
        ]
    def is_open(self) -> bool:
        return not self.is_closed()
    def exec_prices_colnames(self) -> List[str]:
        return [
            f"exec_price_{self.symbol_a()}",
            f"exec_price_{self.symbol_b()}",
        ]
    def to_stop_close_conditions(self, predicted_row: pd.Series) -> bool:
        config = self.config_
        if (
            not config.key_exists("stop_close_conditions")
            or config.get_value("stop_close_conditions") is None
        ):
            return False
        if "profit" in config.get_value("stop_close_conditions"):
            current_return = self._current_return(predicted_row)
            #
            # print(f"time={predicted_row['tstamp']} current_return={current_return}")
            #
            if current_return >= config.get_value("stop_close_conditions")["profit"]:
                print(f"STOP PROFIT: {current_return}")
                self.user_data_["stop_close_state"] = PairState.CLOSE_STOP_PROFIT
                return True
        if "loss" in config.get_value("stop_close_conditions"):
            if current_return <= config.get_value("stop_close_conditions")["loss"]:
                print(f"STOP LOSS: {current_return}")
                self.user_data_["stop_close_state"] = PairState.CLOSE_STOP_LOSS
                return True
        return False
    def _current_return(self, predicted_row: pd.Series) -> float:
        if "open_trades" in self.user_data_:
            open_trades = self.user_data_["open_trades"]
            if len(open_trades) == 0:
                return 0.0
            def _single_instrument_return(symbol: str) -> float:
                instrument_open_trades = open_trades[open_trades["symbol"] == symbol]
                instrument_open_price = instrument_open_trades["price"].iloc[0]
                sign = -1 if instrument_open_trades["side"].iloc[0] == "SELL" else 1
                instrument_price = predicted_row[f"{self.stat_model_price_}_{symbol}"]
                instrument_return = (
                    sign
                    * (instrument_price - instrument_open_price)
                    / instrument_open_price
                )
                return float(instrument_return) * 100.0
            instrument_a_return = _single_instrument_return(self.symbol_a())
            instrument_b_return = _single_instrument_return(self.symbol_b())
            return instrument_a_return + instrument_b_return
        return 0.0
    def on_open_trades(self, trades: pd.DataFrame) -> None:
        if "close_trades" in self.user_data_:
            del self.user_data_["close_trades"]
        self.user_data_["open_trades"] = trades
    def on_close_trades(self, trades: pd.DataFrame) -> None:
        del self.user_data_["open_trades"]
        self.user_data_["close_trades"] = trades
    def add_outstanding_position(
        self,
        symbol: str,
        open_side: str,
        open_px: float,
        open_tstamp: datetime,
        last_mkt_data_row: pd.Series,
    ) -> None:
        assert symbol in [
            self.symbol_a(),
            self.symbol_b(),
        ], "Symbol must be one of the pair's symbols"
        assert open_side in ["BUY", "SELL"], "Open side must be either BUY or SELL"
        assert open_px > 0, "Open price must be greater than 0"
        assert open_tstamp is not None, "Open timestamp must be provided"
        assert last_mkt_data_row is not None, "Last market data row must be provided"
        exec_prices_col_a, exec_prices_col_b = self.exec_prices_colnames()
        if symbol == self.symbol_a():
            last_px = last_mkt_data_row[exec_prices_col_a]
        else:
            last_px = last_mkt_data_row[exec_prices_col_b]
        funding_per_position = self.config_.get_value("funding_per_pair") / 2
        shares = funding_per_position / open_px
        if open_side == "SELL":
            shares = -shares
        if "outstanding_positions" not in self.user_data_:
            self.user_data_["outstanding_positions"] = []
        self.user_data_["outstanding_positions"].append(
            {
                "symbol": symbol,
                "open_side": open_side,
                "open_px": open_px,
                "shares": shares,
                "open_tstamp": open_tstamp,
                "last_px": last_px,
                "last_tstamp": last_mkt_data_row["tstamp"],
                "last_value": last_px * shares,
            }
        )
 class LiveTradingPair(TradingPair):
    def __init__(self, config: Config, instruments: List[ExchangeInstrument]):
        super().__init__(config, instruments)
    def to_stop_close_conditions(self, predicted_row: pd.Series) -> bool:
        # TODO LiveTradingPair.to_stop_close_conditions()
        return False
--- a/lib/pt_trading/fit_method.py
+++ b/lib/pt_trading/fit_method.py
@ -0,0 +1,52 @@
 from __future__ import annotations
 from abc import ABC, abstractmethod
 from enum import Enum
 from typing import Dict, Optional, cast
 import pandas as pd 
 from pt_trading.results import BacktestResult
 from pt_trading.trading_pair import TradingPair
 NanoPerMin = 1e9
 class PairsTradingFitMethod(ABC):
    TRADES_COLUMNS = [
        "time",
        "symbol",
        "side",
        "action",
        "price",
        "disequilibrium",
        "scaled_disequilibrium",
        "signed_scaled_disequilibrium",
        "pair",
    ]
    @staticmethod
    def create(config: Dict) -> PairsTradingFitMethod:
        import importlib
        fit_method_class_name = config.get("fit_method_class", None)
        assert fit_method_class_name is not None
        module_name, class_name = fit_method_class_name.rsplit(".", 1)
        module = importlib.import_module(module_name)
        fit_method = getattr(module, class_name)()
        return cast(PairsTradingFitMethod, fit_method)
    @abstractmethod
    def run_pair(
        self, pair: TradingPair, bt_result: BacktestResult
    ) -> Optional[pd.DataFrame]: ...
    @abstractmethod
    def reset(self) -> None: ...
    @abstractmethod
    def create_trading_pair(
        self,
        config: Dict,
        market_data: pd.DataFrame,
        symbol_a: str,
        symbol_b: str,
    ) -> TradingPair: ...
--- a/lib/pt_trading/results.py
+++ b/lib/pt_trading/results.py
@ -0,0 +1,743 @@
 import os
 import sqlite3
 from datetime import date, datetime
 from typing import Any, Dict, List, Optional, Tuple
 import pandas as pd
 from pt_trading.trading_pair import TradingPair
 # Recommended replacement adapters and converters for Python 3.12+
 # From: https://docs.python.org/3/library/sqlite3.html#sqlite3-adapter-converter-recipes
 def adapt_date_iso(val: date) -> str:
    """Adapt datetime.date to ISO 8601 date."""
    return val.isoformat()
 def adapt_datetime_iso(val: datetime) -> str:
    """Adapt datetime.datetime to timezone-naive ISO 8601 date."""
    return val.isoformat()
 def convert_date(val: bytes) -> date:
    """Convert ISO 8601 date to datetime.date object."""
    return datetime.fromisoformat(val.decode()).date()
 def convert_datetime(val: bytes) -> datetime:
    """Convert ISO 8601 datetime to datetime.datetime object."""
    return datetime.fromisoformat(val.decode())
 # Register the adapters and converters
 sqlite3.register_adapter(date, adapt_date_iso)
 sqlite3.register_adapter(datetime, adapt_datetime_iso)
 sqlite3.register_converter("date", convert_date)
 sqlite3.register_converter("datetime", convert_datetime)
 def create_result_database(db_path: str) -> None:
    """
    Create the SQLite database and required tables if they don't exist.
    """
    try:
        # Create directory if it doesn't exist
        db_dir = os.path.dirname(db_path)
        if db_dir and not os.path.exists(db_dir):
            os.makedirs(db_dir, exist_ok=True)
            print(f"Created directory: {db_dir}")
        conn = sqlite3.connect(db_path)
        cursor = conn.cursor()
        # Create the pt_bt_results table for completed trades
        cursor.execute(
            """
            CREATE TABLE IF NOT EXISTS pt_bt_results (
                date DATE,
                pair TEXT,
                symbol TEXT,
                open_time DATETIME,
                open_side TEXT,
                open_price REAL,
                open_quantity INTEGER,
                open_disequilibrium REAL,
                close_time DATETIME,
                close_side TEXT,
                close_price REAL,
                close_quantity INTEGER,
                close_disequilibrium REAL,
                symbol_return REAL,
                pair_return REAL,
                close_condition TEXT
            )
        """
        )
        cursor.execute("DELETE FROM pt_bt_results;")
        # Create the outstanding_positions table for open positions
        cursor.execute(
            """
            CREATE TABLE IF NOT EXISTS outstanding_positions (
                date DATE,
                pair TEXT,
                symbol TEXT,
                position_quantity REAL,
                last_price REAL,
                unrealized_return REAL,
                open_price REAL,
                open_side TEXT
            )
        """
        )
        cursor.execute("DELETE FROM outstanding_positions;")
        # Create the config table for storing configuration JSON for reference
        cursor.execute(
            """
            CREATE TABLE IF NOT EXISTS config (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                run_timestamp DATETIME,
                config_file_path TEXT,
                config_json TEXT,
                fit_method_class TEXT,
                datafiles TEXT,
                instruments TEXT
            )
        """
        )
        cursor.execute("DELETE FROM config;")
        conn.commit()
        conn.close()
    except Exception as e:
        print(f"Error creating result database: {str(e)}")
        raise
 def store_config_in_database(
    db_path: str,
    config_file_path: str,
    config: Dict,
    fit_method_class: str,
    datafiles: List[Tuple[str, str]],
    instruments: List[Dict[str, str]],
 ) -> None:
    """
    Store configuration information in the database for reference.
    """
    import json
    if db_path.upper() == "NONE":
        return
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.cursor()
        # Convert config to JSON string
        config_json = json.dumps(config, indent=2, default=str)
        # Convert lists to comma-separated strings for storage
        datafiles_str = ", ".join([f"{datafile}" for _, datafile in datafiles])
        instruments_str = ", ".join(
            [
                f"{inst['symbol']}:{inst['instrument_type']}:{inst['exchange_id']}"
                for inst in instruments
            ]
        )
        # Insert configuration record
        cursor.execute(
            """
            INSERT INTO config (
                run_timestamp, config_file_path, config_json, fit_method_class, datafiles, instruments
            ) VALUES (?, ?, ?, ?, ?, ?)
        """,
            (
                datetime.now(),
                config_file_path,
                config_json,
                fit_method_class,
                datafiles_str,
                instruments_str,
            ),
        )
        conn.commit()
        conn.close()
        print(f"Configuration stored in database")
    except Exception as e:
        print(f"Error storing configuration in database: {str(e)}")
        import traceback
        traceback.print_exc()
 def convert_timestamp(timestamp: Any) -> Optional[datetime]:
    """Convert pandas Timestamp to Python datetime object for SQLite compatibility."""
    if timestamp is None:
        return None
    if isinstance(timestamp, pd.Timestamp):
        return timestamp.to_pydatetime()
    elif isinstance(timestamp, datetime):
        return timestamp
    elif isinstance(timestamp, date):
        return datetime.combine(timestamp, datetime.min.time())
    elif isinstance(timestamp, str):
        return datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S")
    elif isinstance(timestamp, int):
        return datetime.fromtimestamp(timestamp)
    else:
        raise ValueError(f"Unsupported timestamp type: {type(timestamp)}")
 class BacktestResult:
    """
    Class to handle backtest results, trades tracking, PnL calculations, and reporting.
    """
    def __init__(self, config: Dict[str, Any]):
        self.config = config
        self.trades: Dict[str, Dict[str, Any]] = {}
        self.total_realized_pnl = 0.0
        self.outstanding_positions: List[Dict[str, Any]] = []
        self.pairs_trades_: Dict[str, List[Dict[str, Any]]] = {}
    def add_trade(
        self,
        pair_nm: str,
        symbol: str,
        side: str,
        action: str,
        price: Any,
        disequilibrium: Optional[float] = None,
        scaled_disequilibrium: Optional[float] = None,
        timestamp: Optional[datetime] = None,
        status: Optional[str] = None,
    ) -> None:
        """Add a trade to the results tracking."""
        pair_nm = str(pair_nm)
        if pair_nm not in self.trades:
            self.trades[pair_nm] = {symbol: []}
        if symbol not in self.trades[pair_nm]:
            self.trades[pair_nm][symbol] = []
        self.trades[pair_nm][symbol].append(
            {
                "symbol": symbol,
                "side": side,
                "action": action,
                "price": price,
                "disequilibrium": disequilibrium,
                "scaled_disequilibrium": scaled_disequilibrium,
                "timestamp": timestamp,
                "status": status,
            }
        )
    def add_outstanding_position(self, position: Dict[str, Any]) -> None:
        """Add an outstanding position to tracking."""
        self.outstanding_positions.append(position)
    def add_realized_pnl(self, realized_pnl: float) -> None:
        """Add realized PnL to the total."""
        self.total_realized_pnl += realized_pnl
    def get_total_realized_pnl(self) -> float:
        """Get total realized PnL."""
        return self.total_realized_pnl
    def get_outstanding_positions(self) -> List[Dict[str, Any]]:
        """Get all outstanding positions."""
        return self.outstanding_positions
    def get_trades(self) -> Dict[str, Dict[str, Any]]:
        """Get all trades."""
        return self.trades
    def clear_trades(self) -> None:
        """Clear all trades (used when processing new files)."""
        self.trades.clear()
    def collect_single_day_results(self, pairs_trades: List[pd.DataFrame]) -> None:
        """Collect and process single day trading results."""
        result = pd.concat(pairs_trades, ignore_index=True)
        result["time"] = pd.to_datetime(result["time"])
        result = result.set_index("time").sort_index()
        print("\n --------------  Suggested Trades ")
        print(result)
        for row in result.itertuples():
            side = row.side
            action = row.action
            symbol = row.symbol
            price = row.price
            disequilibrium = getattr(row, "disequilibrium", None)
            scaled_disequilibrium = getattr(row, "scaled_disequilibrium", None)
            if hasattr(row, "time"):
                timestamp = getattr(row, "time")
            else:
                timestamp = convert_timestamp(row.Index)
            status = row.status
            self.add_trade(
                pair_nm=str(row.pair),
                symbol=str(symbol),
                side=str(side),
                action=str(action),
                price=float(str(price)),
                disequilibrium=disequilibrium,
                scaled_disequilibrium=scaled_disequilibrium,
                timestamp=timestamp,
                status=str(status) if status is not None else "?",
            )
    def print_single_day_results(self) -> None:
        """Print single day results summary."""
        for pair, symbols in self.trades.items():
            print(f"\n--- {pair} ---")
            for symbol, trades in symbols.items():
                for trade_data in trades:
                    if len(trade_data) >= 2:
                        side, price = trade_data[:2]
                        print(f"{symbol} {side} at ${price}")
    def print_results_summary(self, all_results: Dict[str, Dict[str, Any]]) -> None:
        """Print summary of all processed files."""
        print("\n====== Summary of All Processed Files ======")
        for filename, data in all_results.items():
            trade_count = sum(
                len(trades)
                for symbol_trades in data["trades"].values()
                for trades in symbol_trades.values()
            )
            print(f"{filename}: {trade_count} trades")
    def calculate_returns(self, all_results: Dict[str, Dict[str, Any]]) -> None:
        """Calculate and print returns by day and pair."""
        def _symbol_return(trade1_side: str, trade1_px: float, trade2_side: str, trade2_px: float) -> float:
            if trade1_side == "BUY" and trade2_side == "SELL":
                return (trade2_px - trade1_px) / trade1_px * 100
            elif trade1_side == "SELL" and trade2_side == "BUY":
                return (trade1_px - trade2_px) / trade1_px * 100
            else:
                return 0
        print("\n====== Returns By Day and Pair ======")
        trades = []
        for filename, data in all_results.items():
            pairs = list(data["trades"].keys())
            for pair in pairs:
                self.pairs_trades_[pair] = []
                trades_dict = data["trades"][pair]
                for symbol in trades_dict.keys():
                    trades.extend(trades_dict[symbol])
            trades = sorted(trades, key=lambda x: (x["timestamp"], x["symbol"]))
            print(f"\n--- {filename} ---")
            self.outstanding_positions = data["outstanding_positions"]
            day_return = 0.0
            for idx in range(0, len(trades), 4):
                symbol_a = trades[idx]["symbol"]
                trade_a_1 = trades[idx]
                trade_a_2 = trades[idx + 2]
                symbol_b = trades[idx + 1]["symbol"]
                trade_b_1 = trades[idx + 1]
                trade_b_2 = trades[idx + 3]
                symbol_return = 0
                assert (
                    trade_a_1["timestamp"] < trade_a_2["timestamp"]
                ), f"Trade 1: {trade_a_1['timestamp']} is not less than Trade 2: {trade_a_2['timestamp']}"
                assert (
                    trade_a_1["action"] == "OPEN" and trade_a_2["action"] == "CLOSE"
                ), f"Trade 1: {trade_a_1['action']} and Trade 2: {trade_a_2['action']} are the same"
                # Calculate return based on action combination
                trade_return = 0
                symbol_a_return = _symbol_return(trade_a_1["side"], trade_a_1["price"], trade_a_2["side"], trade_a_2["price"])
                symbol_b_return = _symbol_return(trade_b_1["side"], trade_b_1["price"], trade_b_2["side"], trade_b_2["price"])
                pair_return = symbol_a_return + symbol_b_return
                self.pairs_trades_[pair].append(
                    {
                        "symbol": symbol_a,
                        "open_side": trade_a_1["side"],
                        "open_action": trade_a_1["action"],
                        "open_price": trade_a_1["price"],
                        "close_side": trade_a_2["side"],
                        "close_action": trade_a_2["action"],
                        "close_price": trade_a_2["price"],
                        "symbol_return": symbol_a_return,
                        "open_disequilibrium": trade_a_1["disequilibrium"],
                        "open_scaled_disequilibrium": trade_a_1["scaled_disequilibrium"],
                        "close_disequilibrium": trade_a_2["disequilibrium"],
                        "close_scaled_disequilibrium": trade_a_2["scaled_disequilibrium"],
                        "open_time": trade_a_1["timestamp"],
                        "close_time": trade_a_2["timestamp"],
                        "shares": self.config["funding_per_pair"] / 2  / trade_a_1["price"],
                        "is_completed": True,
                        "close_condition": trade_a_2["status"],
                        "pair_return": pair_return
                    }
                )
                self.pairs_trades_[pair].append(
                    {
                        "symbol": symbol_b,
                        "open_side": trade_b_1["side"],
                        "open_action": trade_b_1["action"],
                        "open_price": trade_b_1["price"],
                        "close_side": trade_b_2["side"],
                        "close_action": trade_b_2["action"],
                        "close_price": trade_b_2["price"],
                        "symbol_return": symbol_b_return,
                        "open_disequilibrium": trade_b_1["disequilibrium"],
                        "open_scaled_disequilibrium": trade_b_1["scaled_disequilibrium"],
                        "close_disequilibrium": trade_b_2["disequilibrium"],
                        "close_scaled_disequilibrium": trade_b_2["scaled_disequilibrium"],
                        "open_time": trade_b_1["timestamp"],
                        "close_time": trade_b_2["timestamp"],
                        "shares": self.config["funding_per_pair"] / 2  / trade_b_1["price"],
                        "is_completed": True,
                        "close_condition": trade_b_2["status"],
                        "pair_return": pair_return
                    }
                )
            # Print pair returns with disequilibrium information
            day_return = 0.0
            if pair in self.pairs_trades_:
                print(f"{pair}:")
                pair_return = 0.0
                for trd in self.pairs_trades_[pair]:
                    disequil_info = ""
                    if (
                        trd["open_scaled_disequilibrium"] is not None
                        and trd["open_scaled_disequilibrium"] is not None
                    ):
                        disequil_info = (
                            f' | Open Dis-eq: {trd["open_scaled_disequilibrium"]:.2f},'
                            f' Close Dis-eq: {trd["close_scaled_disequilibrium"]:.2f}'
                        )
                    print(
                        f' {trd["open_time"].time()}-{trd["close_time"].time()}  {trd["symbol"]}: '
                        f' {trd["open_side"]} @ ${trd["open_price"]:.2f},'
                        f' {trd["close_side"]} @ ${trd["close_price"]:.2f},'
                        f' Return: {trd["symbol_return"]:.2f}%{disequil_info}'
                    )
                    pair_return += trd["symbol_return"]
                print(f"    Pair Total Return: {pair_return:.2f}%")
                day_return += pair_return
            # Print day total return and add to global realized PnL
            if day_return != 0:
                print(f"  Day Total Return: {day_return:.2f}%")
                self.add_realized_pnl(day_return)
    def print_outstanding_positions(self) -> None:
        """Print all outstanding positions with share quantities and current values."""
        if not self.get_outstanding_positions():
            print("\n====== NO OUTSTANDING POSITIONS ======")
            return
        print(f"\n====== OUTSTANDING POSITIONS ======")
        print(
            f"{'Pair':<15}"
            f" {'Symbol':<10}"
            f" {'Side':<4}"
            f" {'Shares':<10}"
            f" {'Open $':<8}"
            f" {'Current $':<10}"
            f" {'Value $':<12}"
            f" {'Disequilibrium':<15}"
        )
        print("-" * 100)
        total_value = 0.0
        for pos in self.get_outstanding_positions():
            # Print position A
            print(
                f"{pos['pair']:<15}"
                f" {pos['symbol_a']:<10}"
                f" {pos['side_a']:<4}"
                f" {pos['shares_a']:<10.2f}"
                f" {pos['open_px_a']:<8.2f}"
                f" {pos['current_px_a']:<10.2f}"
                f" {pos['current_value_a']:<12.2f}"
                f" {'':<15}"
            )
            # Print position B
            print(
                f"{'':<15}"
                f" {pos['symbol_b']:<10}"
                f" {pos['side_b']:<4}"
                f" {pos['shares_b']:<10.2f}"
                f" {pos['open_px_b']:<8.2f}"
                f" {pos['current_px_b']:<10.2f}"
                f" {pos['current_value_b']:<12.2f}"
            )
            # Print pair totals with disequilibrium info
            print(
                f"{'':<15}"
                f" {'PAIR TOTAL':<10}"
                f" {'':<4}"
                f" {'':<10}"
                f" {'':<8}"
                f" {'':<10}"
                f" {pos['total_current_value']:<12.2f}"
            )
            # Print disequilibrium details
            print(
                f"{'':<15}"
                f" {'DISEQUIL':<10}"
                f" {'':<4}"
                f" {'':<10}"
                f" {'':<8}"
                f" {'':<10}"
                f" Raw: {pos['current_disequilibrium']:<6.4f}"
                f" Scaled: {pos['current_scaled_disequilibrium']:<6.4f}"
            )
            print("-" * 100)
            total_value += pos["total_current_value"]
        print(f"{'TOTAL OUTSTANDING VALUE':<80} ${total_value:<12.2f}")
    def print_grand_totals(self) -> None:
        """Print grand totals across all pairs."""
        print(f"\n====== GRAND TOTALS ACROSS ALL PAIRS ======")
        print(f"Total Realized PnL: {self.get_total_realized_pnl():.2f}%")
    def handle_outstanding_position(
        self,
        pair: TradingPair,
        pair_result_df: pd.DataFrame,
        last_row_index: int,
        open_side_a: str,
        open_side_b: str,
        open_px_a: float,
        open_px_b: float,
        open_tstamp: datetime,
    ) -> Tuple[float, float, float]:
        """
        Handle calculation and tracking of outstanding positions when no close signal is found.
        Args:
            pair: TradingPair object
            pair_result_df: DataFrame with pair results
            last_row_index: Index of the last row in the data
            open_side_a, open_side_b: Trading sides for symbols A and B
            open_px_a, open_px_b: Opening prices for symbols A and B
            open_tstamp: Opening timestamp
        """
        if pair_result_df is None or pair_result_df.empty:
            return 0, 0, 0
        last_row = pair_result_df.loc[last_row_index]
        last_tstamp = last_row["tstamp"]
        colname_a, colname_b = pair.exec_prices_colnames()
        last_px_a = last_row[colname_a]
        last_px_b = last_row[colname_b]
        # Calculate share quantities based on funding per pair
        # Split funding equally between the two positions
        funding_per_position = self.config["funding_per_pair"] / 2
        shares_a = funding_per_position / open_px_a
        shares_b = funding_per_position / open_px_b
        # Calculate current position values (shares * current price)
        current_value_a = shares_a * last_px_a * (-1 if open_side_a == "SELL" else 1)
        current_value_b = shares_b * last_px_b * (-1 if open_side_b == "SELL" else 1)
        total_current_value = current_value_a + current_value_b
        # Get disequilibrium information
        current_disequilibrium = last_row["disequilibrium"]
        current_scaled_disequilibrium = last_row["scaled_disequilibrium"]
        # Store outstanding positions
        self.add_outstanding_position(
            {
                "pair": str(pair),
                "symbol_a": pair.symbol_a_,
                "symbol_b": pair.symbol_b_,
                "side_a": open_side_a,
                "side_b": open_side_b,
                "shares_a": shares_a,
                "shares_b": shares_b,
                "open_px_a": open_px_a,
                "open_px_b": open_px_b,
                "current_px_a": last_px_a,
                "current_px_b": last_px_b,
                "current_value_a": current_value_a,
                "current_value_b": current_value_b,
                "total_current_value": total_current_value,
                "open_time": open_tstamp,
                "last_time": last_tstamp,
                "current_abs_term": current_scaled_disequilibrium,
                "current_disequilibrium": current_disequilibrium,
                "current_scaled_disequilibrium": current_scaled_disequilibrium,
            }
        )
        # Print position details
        print(f"{pair}: NO CLOSE SIGNAL FOUND - Position held until end of session")
        print(f"  Open: {open_tstamp} | Last: {last_tstamp}")
        print(
            f"  {pair.symbol_a_}: {open_side_a} {shares_a:.2f} shares @ ${open_px_a:.2f} -> ${last_px_a:.2f} | Value: ${current_value_a:.2f}"
        )
        print(
            f"  {pair.symbol_b_}: {open_side_b} {shares_b:.2f} shares @ ${open_px_b:.2f} -> ${last_px_b:.2f} | Value: ${current_value_b:.2f}"
        )
        print(f"  Total Value: ${total_current_value:.2f}")
        print(
            f"  Disequilibrium: {current_disequilibrium:.4f} | Scaled: {current_scaled_disequilibrium:.4f}"
        )
        return current_value_a, current_value_b, total_current_value
    def store_results_in_database(
        self, db_path: str, day: str
    ) -> None:
        """
        Store backtest results in the SQLite database.
        """
        if db_path.upper() == "NONE":
            return
        try:
            # Extract date from datafile name (assuming format like 20250528.mktdata.ohlcv.db)
            date_str = day
            # Convert to proper date format
            try:
                date_obj = datetime.strptime(date_str, "%Y%m%d").date()
            except ValueError:
                # If date parsing fails, use current date
                date_obj = datetime.now().date()
            conn = sqlite3.connect(db_path)
            cursor = conn.cursor()
            # Process each trade from bt_result
            trades = self.get_trades()
            for pair_name, _ in trades.items():
                # Second pass: insert completed trade records into database
                for trade_pair in sorted(self.pairs_trades_[pair_name], key=lambda x: x["open_time"]):
                    # Only store completed trades in pt_bt_results table
                    cursor.execute(
                        """
                        INSERT INTO pt_bt_results (
                            date, pair, symbol, open_time, open_side, open_price, 
                            open_quantity, open_disequilibrium, close_time, close_side, 
                            close_price, close_quantity, close_disequilibrium, 
                            symbol_return, pair_return, close_condition
                        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                    """,
                        (
                            date_obj,
                            pair_name,
                            trade_pair["symbol"],
                            trade_pair["open_time"],
                            trade_pair["open_side"],
                            trade_pair["open_price"],
                            trade_pair["shares"],
                            trade_pair["open_scaled_disequilibrium"],
                            trade_pair["close_time"],
                            trade_pair["close_side"],
                            trade_pair["close_price"],
                            trade_pair["shares"],
                            trade_pair["close_scaled_disequilibrium"],
                            trade_pair["symbol_return"],
                            trade_pair["pair_return"],
                            trade_pair["close_condition"]
                        ),
                    )
            # Store outstanding positions in separate table
            outstanding_positions = self.get_outstanding_positions()
            for pos in outstanding_positions:
                # Calculate position quantity (negative for SELL positions)
                position_qty_a = (
                    pos["shares_a"] if pos["side_a"] == "BUY" else -pos["shares_a"]
                )
                position_qty_b = (
                    pos["shares_b"] if pos["side_b"] == "BUY" else -pos["shares_b"]
                )
                # Calculate unrealized returns
                # For symbol A: (current_price - open_price) / open_price * 100 * position_direction
                unrealized_return_a = (
                    (pos["current_px_a"] - pos["open_px_a"]) / pos["open_px_a"] * 100
                ) * (1 if pos["side_a"] == "BUY" else -1)
                unrealized_return_b = (
                    (pos["current_px_b"] - pos["open_px_b"]) / pos["open_px_b"] * 100
                ) * (1 if pos["side_b"] == "BUY" else -1)
                # Store outstanding position for symbol A
                cursor.execute(
                    """
                    INSERT INTO outstanding_positions (
                        date, pair, symbol, position_quantity, last_price, unrealized_return, open_price, open_side
                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                """,
                    (
                        date_obj,
                        pos["pair"],
                        pos["symbol_a"],
                        position_qty_a,
                        pos["current_px_a"],
                        unrealized_return_a,
                        pos["open_px_a"],
                        pos["side_a"],
                    ),
                )
                # Store outstanding position for symbol B
                cursor.execute(
                    """
                    INSERT INTO outstanding_positions (
                        date, pair, symbol, position_quantity, last_price, unrealized_return, open_price, open_side
                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                """,
                    (
                        date_obj,
                        pos["pair"],
                        pos["symbol_b"],
                        position_qty_b,
                        pos["current_px_b"],
                        unrealized_return_b,
                        pos["open_px_b"],
                        pos["side_b"],
                    ),
                )
            conn.commit()
            conn.close()
        except Exception as e:
            print(f"Error storing results in database: {str(e)}")
            import traceback
            traceback.print_exc()
--- a/lib/pt_trading/rolling_window_fit.py
+++ b/lib/pt_trading/rolling_window_fit.py
@ -0,0 +1,317 @@
 from abc import ABC, abstractmethod
 from enum import Enum
 from typing import Any, Dict, Optional, cast
 import pandas as pd  # type: ignore[import]
 from pt_trading.fit_method import PairsTradingFitMethod
 from pt_trading.results import BacktestResult
 from pt_trading.trading_pair import PairState, TradingPair
 from statsmodels.tsa.vector_ar.vecm import VECM, VECMResults
 NanoPerMin = 1e9
 class RollingFit(PairsTradingFitMethod):
    """
    N O T E:
    =========
    - This class remains to be abstract
    - The following methods are to be implemented in the subclass:
        - create_trading_pair()
    =========
    """
    def __init__(self) -> None:
        super().__init__()
    def run_pair(
        self, pair: TradingPair, bt_result: BacktestResult
    ) -> Optional[pd.DataFrame]:
        print(f"***{pair}*** STARTING....")
        config = pair.config_
        curr_training_start_idx = pair.get_begin_index()
        end_index = pair.get_end_index()
        pair.user_data_["state"] = PairState.INITIAL
        # Initialize trades DataFrame with proper dtypes to avoid concatenation warnings
        pair.user_data_["trades"] = pd.DataFrame(columns=self.TRADES_COLUMNS).astype(
            {
                "time": "datetime64[ns]",
                "symbol": "string",
                "side": "string",
                "action": "string",
                "price": "float64",
                "disequilibrium": "float64",
                "scaled_disequilibrium": "float64",
                "pair": "object",
            }
        )
        training_minutes = config["training_minutes"]
        curr_predicted_row_idx = 0
        while True:
            print(curr_training_start_idx, end="\r")
            pair.get_datasets(
                training_minutes=training_minutes,
                training_start_index=curr_training_start_idx,
                testing_size=1,
            )
            if len(pair.training_df_) < training_minutes:
                print(
                    f"{pair}: current offset={curr_training_start_idx}"
                    f" * Training data length={len(pair.training_df_)} < {training_minutes}"
                    " * Not enough training data. Completing the job."
                )
                break
            try:
                # ================================ PREDICTION ================================
                self.pair_predict_result_ = pair.predict()
            except Exception as e:
                raise RuntimeError(
                    f"{pair}: TrainingPrediction failed: {str(e)}"
                ) from e
                # break
            curr_training_start_idx += 1
            if curr_training_start_idx > end_index:
                break
            curr_predicted_row_idx += 1
        self._create_trading_signals(pair, config, bt_result)
        print(f"***{pair}*** FINISHED *** Num Trades:{len(pair.user_data_['trades'])}")
        return pair.get_trades()
    def _create_trading_signals(
        self, pair: TradingPair, config: Dict, bt_result: BacktestResult
    ) -> None:
        predicted_df = self.pair_predict_result_
        assert predicted_df is not None
        open_threshold = config["dis-equilibrium_open_trshld"]
        close_threshold = config["dis-equilibrium_close_trshld"]
        for curr_predicted_row_idx in range(len(predicted_df)):
            pred_row = predicted_df.iloc[curr_predicted_row_idx]
            scaled_disequilibrium = pred_row["scaled_disequilibrium"]
            if pair.user_data_["state"] in [
                PairState.INITIAL,
                PairState.CLOSE,
                PairState.CLOSE_POSITION,
                PairState.CLOSE_STOP_LOSS,
                PairState.CLOSE_STOP_PROFIT,
            ]:
                if scaled_disequilibrium >= open_threshold:
                    open_trades = self._get_open_trades(
                        pair, row=pred_row, open_threshold=open_threshold
                    )
                    if open_trades is not None:
                        open_trades["status"] = PairState.OPEN.name
                        print(f"OPEN TRADES:\n{open_trades}")
                        pair.add_trades(open_trades)
                        pair.user_data_["state"] = PairState.OPEN
                        pair.on_open_trades(open_trades)
            elif pair.user_data_["state"] == PairState.OPEN:
                if scaled_disequilibrium <= close_threshold:
                    close_trades = self._get_close_trades(
                        pair, row=pred_row, close_threshold=close_threshold
                    )
                    if close_trades is not None:
                        close_trades["status"] = PairState.CLOSE.name
                        print(f"CLOSE TRADES:\n{close_trades}")
                        pair.add_trades(close_trades)
                        pair.user_data_["state"] = PairState.CLOSE
                        pair.on_close_trades(close_trades)
                elif pair.to_stop_close_conditions(predicted_row=pred_row):
                    close_trades = self._get_close_trades(
                        pair, row=pred_row, close_threshold=close_threshold
                    )
                    if close_trades is not None:
                        close_trades["status"] = pair.user_data_[
                            "stop_close_state"
                        ].name
                        print(f"STOP CLOSE TRADES:\n{close_trades}")
                        pair.add_trades(close_trades)
                        pair.user_data_["state"] = pair.user_data_["stop_close_state"]
                        pair.on_close_trades(close_trades)
        # Outstanding positions
        if pair.user_data_["state"] == PairState.OPEN:
            print(f"{pair}:  *** Position is NOT CLOSED. ***")
            # outstanding positions
            if config["close_outstanding_positions"]:
                close_position_row = pd.Series(pair.market_data_.iloc[-2])
                close_position_row["disequilibrium"] = 0.0
                close_position_row["scaled_disequilibrium"] = 0.0
                close_position_row["signed_scaled_disequilibrium"] = 0.0
                close_position_trades = self._get_close_trades(
                    pair=pair, row=close_position_row, close_threshold=close_threshold
                )
                if close_position_trades is not None:
                    close_position_trades["status"] = PairState.CLOSE_POSITION.name
                    print(f"CLOSE_POSITION TRADES:\n{close_position_trades}")
                    pair.add_trades(close_position_trades)
                    pair.user_data_["state"] = PairState.CLOSE_POSITION
                    pair.on_close_trades(close_position_trades)
            else:
                if predicted_df is not None:
                    bt_result.handle_outstanding_position(
                        pair=pair,
                        pair_result_df=predicted_df,
                        last_row_index=0,
                        open_side_a=pair.user_data_["open_side_a"],
                        open_side_b=pair.user_data_["open_side_b"],
                        open_px_a=pair.user_data_["open_px_a"],
                        open_px_b=pair.user_data_["open_px_b"],
                        open_tstamp=pair.user_data_["open_tstamp"],
                    )
    def _get_open_trades(
        self, pair: TradingPair, row: pd.Series, open_threshold: float
    ) -> Optional[pd.DataFrame]:
        colname_a, colname_b = pair.exec_prices_colnames()
        open_row = row
        open_tstamp = open_row["tstamp"]
        open_disequilibrium = open_row["disequilibrium"]
        open_scaled_disequilibrium = open_row["scaled_disequilibrium"]
        signed_scaled_disequilibrium = open_row["signed_scaled_disequilibrium"]
        open_px_a = open_row[f"{colname_a}"]
        open_px_b = open_row[f"{colname_b}"]
        # creating the trades
        # use outer single quotes so we can reference DataFrame keys with double quotes inside
        print(f'OPEN_TRADES: {open_tstamp} open_scaled_disequilibrium={open_scaled_disequilibrium}')
        if open_disequilibrium > 0:
            open_side_a = "SELL"
            open_side_b = "BUY"
            close_side_a = "BUY"
            close_side_b = "SELL"
        else:
            open_side_a = "BUY"
            open_side_b = "SELL"
            close_side_a = "SELL"
            close_side_b = "BUY"
        # save closing sides
        pair.user_data_["open_side_a"] = open_side_a
        pair.user_data_["open_side_b"] = open_side_b
        pair.user_data_["open_px_a"] = open_px_a
        pair.user_data_["open_px_b"] = open_px_b
        pair.user_data_["open_tstamp"] = open_tstamp
        pair.user_data_["close_side_a"] = close_side_a
        pair.user_data_["close_side_b"] = close_side_b
        # create opening trades
        trd_signal_tuples = [
            (
                open_tstamp,
                pair.symbol_a_,
                open_side_a,
                "OPEN",
                open_px_a,
                open_disequilibrium,
                open_scaled_disequilibrium,
                signed_scaled_disequilibrium,
                pair,
            ),
            (
                open_tstamp,
                pair.symbol_b_,
                open_side_b,
                "OPEN",
                open_px_b,
                open_disequilibrium,
                open_scaled_disequilibrium,
                signed_scaled_disequilibrium,
                pair,
            ),
        ]
        # Create DataFrame with explicit dtypes to avoid concatenation warnings
        df = pd.DataFrame(trd_signal_tuples, columns=self.TRADES_COLUMNS)
        # Ensure consistent dtypes
        return df.astype(
            {
                "time": "datetime64[ns]",
                "action": "string",
                "symbol": "string",
                "price": "float64",
                "disequilibrium": "float64",
                "scaled_disequilibrium": "float64",
                "signed_scaled_disequilibrium": "float64",
                "pair": "object",
            }
        )
    def _get_close_trades(
        self, pair: TradingPair, row: pd.Series, close_threshold: float
    ) -> Optional[pd.DataFrame]:
        colname_a, colname_b = pair.exec_prices_colnames()
        close_row = row
        close_tstamp = close_row["tstamp"]
        close_disequilibrium = close_row["disequilibrium"]
        close_scaled_disequilibrium = close_row["scaled_disequilibrium"]
        signed_scaled_disequilibrium = close_row["signed_scaled_disequilibrium"]
        close_px_a = close_row[f"{colname_a}"]
        close_px_b = close_row[f"{colname_b}"]
        close_side_a = pair.user_data_["close_side_a"]
        close_side_b = pair.user_data_["close_side_b"]
        trd_signal_tuples = [
            (
                close_tstamp,
                pair.symbol_a_,
                close_side_a,
                "CLOSE",
                close_px_a,
                close_disequilibrium,
                close_scaled_disequilibrium,
                signed_scaled_disequilibrium,
                pair,
            ),
            (
                close_tstamp,
                pair.symbol_b_,
                close_side_b,
                "CLOSE",
                close_px_b,
                close_disequilibrium,
                close_scaled_disequilibrium,
                signed_scaled_disequilibrium,
                pair,
            ),
        ]
        # Add tuples to data frame with explicit dtypes to avoid concatenation warnings
        df = pd.DataFrame(
            trd_signal_tuples,
            columns=self.TRADES_COLUMNS,
        )
        # Ensure consistent dtypes
        return df.astype(
            {
                "time": "datetime64[ns]",
                "action": "string",
                "symbol": "string",
                "price": "float64",
                "disequilibrium": "float64",
                "scaled_disequilibrium": "float64",
                "signed_scaled_disequilibrium": "float64",
                "pair": "object",
            }
        )
    def reset(self) -> None:
        curr_training_start_idx = 0
--- a/lib/pt_trading/trading_pair.py
+++ b/lib/pt_trading/trading_pair.py
@ -0,0 +1,380 @@
 from __future__ import annotations
 from abc import ABC, abstractmethod
 from enum import Enum
 from typing import Any, Dict, List, Optional
 import pandas as pd  # type:ignore
 class PairState(Enum):
    INITIAL = 1
    OPEN = 2
    CLOSE = 3
    CLOSE_POSITION = 4
    CLOSE_STOP_LOSS = 5
    CLOSE_STOP_PROFIT = 6
 class CointegrationData:
    EG_PVALUE_THRESHOLD = 0.05
    tstamp_: pd.Timestamp
    pair_: str
    eg_pvalue_: float
    johansen_lr1_: float
    johansen_cvt_: float
    eg_is_cointegrated_: bool
    johansen_is_cointegrated_: bool
    def __init__(self, pair: TradingPair):
        training_df = pair.training_df_
        assert training_df is not None
        from statsmodels.tsa.vector_ar.vecm import coint_johansen
        df = training_df[pair.colnames()].reset_index(drop=True)
        # Run Johansen cointegration test
        result = coint_johansen(df, det_order=0, k_ar_diff=1)
        self.johansen_lr1_ = result.lr1[0]
        self.johansen_cvt_ = result.cvt[0, 1]
        self.johansen_is_cointegrated_ = self.johansen_lr1_ > self.johansen_cvt_
        # Run Engle-Granger cointegration test
        from statsmodels.tsa.stattools import coint  # type: ignore
        col1, col2 = pair.colnames()
        assert training_df is not None
        series1 = training_df[col1].reset_index(drop=True)
        series2 = training_df[col2].reset_index(drop=True)
        self.eg_pvalue_ = float(coint(series1, series2)[1])
        self.eg_is_cointegrated_ = bool(self.eg_pvalue_ < self.EG_PVALUE_THRESHOLD)
        self.tstamp_ = training_df.index[-1]
        self.pair_ = pair.name()
    def to_dict(self) -> Dict[str, Any]:
        return {
            "tstamp": self.tstamp_,
            "pair": self.pair_,
            "eg_pvalue": self.eg_pvalue_,
            "johansen_lr1": self.johansen_lr1_,
            "johansen_cvt": self.johansen_cvt_,
            "eg_is_cointegrated": self.eg_is_cointegrated_,
            "johansen_is_cointegrated": self.johansen_is_cointegrated_,
        }
    def __repr__(self) -> str:
        return f"CointegrationData(tstamp={self.tstamp_}, pair={self.pair_}, eg_pvalue={self.eg_pvalue_}, johansen_lr1={self.johansen_lr1_}, johansen_cvt={self.johansen_cvt_}, eg_is_cointegrated={self.eg_is_cointegrated_}, johansen_is_cointegrated={self.johansen_is_cointegrated_})"
 class TradingPair(ABC):
    market_data_: pd.DataFrame
    symbol_a_: str
    symbol_b_: str
    stat_model_price_: str
    training_mu_: float
    training_std_: float
    training_df_: pd.DataFrame
    testing_df_: pd.DataFrame
    user_data_: Dict[str, Any]
    # predicted_df_: Optional[pd.DataFrame]
    def __init__(
        self,
        config: Dict[str, Any],
        market_data: pd.DataFrame,
        symbol_a: str,
        symbol_b: str,
    ):
        self.symbol_a_ = symbol_a
        self.symbol_b_ = symbol_b
        self.stat_model_price_ = config["stat_model_price"]
        self.user_data_ = {}
        self.predicted_df_ = None
        self.config_ = config
        self._set_market_data(market_data)
    def _set_market_data(self, market_data: pd.DataFrame) -> None:
        self.market_data_ = pd.DataFrame(
            self._transform_dataframe(market_data)[["tstamp"] + self.colnames()]
        )
        self.market_data_ = self.market_data_.dropna().reset_index(drop=True)
        self.market_data_["tstamp"] = pd.to_datetime(self.market_data_["tstamp"])
        self.market_data_ = self.market_data_.sort_values("tstamp")
        self._set_execution_price_data()
        pass
    def _set_execution_price_data(self) -> None:
        if "execution_price" not in self.config_:
            self.market_data_[f"exec_price_{self.symbol_a_}"] = self.market_data_[f"{self.stat_model_price_}_{self.symbol_a_}"]
            self.market_data_[f"exec_price_{self.symbol_b_}"] = self.market_data_[f"{self.stat_model_price_}_{self.symbol_b_}"]
            return
        execution_price_column = self.config_["execution_price"]["column"]
        execution_price_shift = self.config_["execution_price"]["shift"]
        self.market_data_[f"exec_price_{self.symbol_a_}"] = self.market_data_[f"{self.stat_model_price_}_{self.symbol_a_}"].shift(-execution_price_shift)
        self.market_data_[f"exec_price_{self.symbol_b_}"] = self.market_data_[f"{self.stat_model_price_}_{self.symbol_b_}"].shift(-execution_price_shift)
        self.market_data_ = self.market_data_.dropna().reset_index(drop=True)
    def get_begin_index(self) -> int:
        if "trading_hours" not in self.config_:
            return 0
        assert "timezone" in self.config_["trading_hours"]
        assert "begin_session" in self.config_["trading_hours"]
        start_time = (
            pd.to_datetime(self.config_["trading_hours"]["begin_session"])
            .tz_localize(self.config_["trading_hours"]["timezone"])
            .time()
        )
        mask = self.market_data_["tstamp"].dt.time >= start_time
        return int(self.market_data_.index[mask].min())
    def get_end_index(self) -> int:
        if "trading_hours" not in self.config_:
            return 0
        assert "timezone" in self.config_["trading_hours"]
        assert "end_session" in self.config_["trading_hours"]
        end_time = (
            pd.to_datetime(self.config_["trading_hours"]["end_session"])
            .tz_localize(self.config_["trading_hours"]["timezone"])
            .time()
        )
        mask = self.market_data_["tstamp"].dt.time <= end_time
        return int(self.market_data_.index[mask].max())
    def _transform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
        # Select only the columns we need
        df_selected: pd.DataFrame = pd.DataFrame(
            df[["tstamp", "symbol", self.stat_model_price_]]
        )
        # Start with unique timestamps
        result_df: pd.DataFrame = (
            pd.DataFrame(df_selected["tstamp"]).drop_duplicates().reset_index(drop=True)
        )
        # For each unique symbol, add a corresponding close price column
        symbols = df_selected["symbol"].unique()
        for symbol in symbols:
            # Filter rows for this symbol
            df_symbol = df_selected[df_selected["symbol"] == symbol].reset_index(
                drop=True
            )
            # Create column name like "close-COIN"
            new_price_column = f"{self.stat_model_price_}_{symbol}"
            # Create temporary dataframe with timestamp and price
            temp_df = pd.DataFrame(
                {
                    "tstamp": df_symbol["tstamp"],
                    new_price_column: df_symbol[self.stat_model_price_],
                }
            )
            # Join with our result dataframe
            result_df = pd.merge(result_df, temp_df, on="tstamp", how="left")
            result_df = result_df.reset_index(
                drop=True
            )  # do not dropna() since irrelevant symbol would affect dataset
        return result_df.dropna()
    def get_datasets(
        self,
        training_minutes: int,
        training_start_index: int = 0,
        testing_size: Optional[int] = None,
    ) -> None:
        testing_start_index = training_start_index + training_minutes
        self.training_df_ = self.market_data_.iloc[
            training_start_index:testing_start_index, :training_minutes
        ].copy()
        assert self.training_df_ is not None
        self.training_df_ = self.training_df_.dropna().reset_index(drop=True)
        testing_start_index = training_start_index + training_minutes
        if testing_size is None:
            self.testing_df_ = self.market_data_.iloc[testing_start_index:, :].copy()
        else:
            self.testing_df_ = self.market_data_.iloc[
                testing_start_index : testing_start_index + testing_size, :
            ].copy()
        assert self.testing_df_ is not None
        self.testing_df_ = self.testing_df_.dropna().reset_index(drop=True)
    def colnames(self) -> List[str]:
        return [
            f"{self.stat_model_price_}_{self.symbol_a_}",
            f"{self.stat_model_price_}_{self.symbol_b_}",
        ]
    def exec_prices_colnames(self) -> List[str]:
        return [
            f"exec_price_{self.symbol_a_}",
            f"exec_price_{self.symbol_b_}",
        ]
    def add_trades(self, trades: pd.DataFrame) -> None:
        if self.user_data_["trades"] is None or len(self.user_data_["trades"]) == 0:
            # If trades is empty or None, just assign the new trades directly
            self.user_data_["trades"] = trades.copy()
        else:
            # Ensure both DataFrames have the same columns and dtypes before concatenation
            existing_trades = self.user_data_["trades"]
            # If existing trades is empty, just assign the new trades
            if len(existing_trades) == 0:
                self.user_data_["trades"] = trades.copy()
            else:
                # Ensure both DataFrames have the same columns
                if set(existing_trades.columns) != set(trades.columns):
                    # Add missing columns to trades with appropriate default values
                    for col in existing_trades.columns:
                        if col not in trades.columns:
                            if col == "time":
                                trades[col] = pd.Timestamp.now()
                            elif col in ["action", "symbol"]:
                                trades[col] = ""
                            elif col in [
                                "price",
                                "disequilibrium",
                                "scaled_disequilibrium",
                            ]:
                                trades[col] = 0.0
                            elif col == "pair":
                                trades[col] = None
                            else:
                                trades[col] = None
                # Concatenate with explicit dtypes to avoid warnings
                self.user_data_["trades"] = pd.concat(
                    [existing_trades, trades], ignore_index=True, copy=False
                )
    def get_trades(self) -> pd.DataFrame:
        return (
            self.user_data_["trades"] if "trades" in self.user_data_ else pd.DataFrame()
        )
    def cointegration_check(self) -> Optional[pd.DataFrame]:
        print(f"***{self}*** STARTING....")
        config = self.config_
        curr_training_start_idx = 0
        COINTEGRATION_DATA_COLUMNS = {
            "tstamp": "datetime64[ns]",
            "pair": "string",
            "eg_pvalue": "float64",
            "johansen_lr1": "float64",
            "johansen_cvt": "float64",
            "eg_is_cointegrated": "bool",
            "johansen_is_cointegrated": "bool",
        }
        # Initialize trades DataFrame with proper dtypes to avoid concatenation warnings
        result: pd.DataFrame = pd.DataFrame(
            columns=[col for col in COINTEGRATION_DATA_COLUMNS.keys()]
        )  # .astype(COINTEGRATION_DATA_COLUMNS)
        training_minutes = config["training_minutes"]
        while True:
            print(curr_training_start_idx, end="\r")
            self.get_datasets(
                training_minutes=training_minutes,
                training_start_index=curr_training_start_idx,
                testing_size=1,
            )
            if len(self.training_df_) < training_minutes:
                print(
                    f"{self}: current offset={curr_training_start_idx}"
                    f" * Training data length={len(self.training_df_)} < {training_minutes}"
                    " * Not enough training data. Completing the job."
                )
                break
            new_row = pd.Series(CointegrationData(self).to_dict())
            result.loc[len(result)] = new_row
            curr_training_start_idx += 1
        return result
    def to_stop_close_conditions(self, predicted_row: pd.Series) -> bool:
        config = self.config_
        if (
            "stop_close_conditions" not in config
            or config["stop_close_conditions"] is None
        ):
            return False
        if "profit" in config["stop_close_conditions"]:
            current_return = self._current_return(predicted_row)
            #
            # print(f"time={predicted_row['tstamp']} current_return={current_return}")
            #
            if current_return >= config["stop_close_conditions"]["profit"]:
                print(f"STOP PROFIT: {current_return}")
                self.user_data_["stop_close_state"] = PairState.CLOSE_STOP_PROFIT
                return True
        if "loss" in config["stop_close_conditions"]:
            if current_return <= config["stop_close_conditions"]["loss"]:
                print(f"STOP LOSS: {current_return}")
                self.user_data_["stop_close_state"] = PairState.CLOSE_STOP_LOSS
                return True
        return False
    def on_open_trades(self, trades: pd.DataFrame) -> None:
        if "close_trades" in self.user_data_:
            del self.user_data_["close_trades"]
        self.user_data_["open_trades"] = trades
    def on_close_trades(self, trades: pd.DataFrame) -> None:
        del self.user_data_["open_trades"]
        self.user_data_["close_trades"] = trades
    def _current_return(self, predicted_row: pd.Series) -> float:
        if "open_trades" in self.user_data_:
            open_trades = self.user_data_["open_trades"]
            if len(open_trades) == 0:
                return 0.0
            def _single_instrument_return(symbol: str) -> float:
                instrument_open_trades = open_trades[open_trades["symbol"] == symbol]
                instrument_open_price = instrument_open_trades["price"].iloc[0]
                sign = -1 if instrument_open_trades["side"].iloc[0] == "SELL" else 1
                instrument_price = predicted_row[f"{self.stat_model_price_}_{symbol}"]
                instrument_return = (
                    sign
                    * (instrument_price - instrument_open_price)
                    / instrument_open_price
                )
                return float(instrument_return) * 100.0
            instrument_a_return = _single_instrument_return(self.symbol_a_)
            instrument_b_return = _single_instrument_return(self.symbol_b_)
            return instrument_a_return + instrument_b_return
        return 0.0
    def __repr__(self) -> str:
        return self.name()
    def name(self) -> str:
        return f"{self.symbol_a_} & {self.symbol_b_}"
        # return f"{self.symbol_a_} & {self.symbol_b_}"
    @abstractmethod
    def predict(self) -> pd.DataFrame: ...
    # @abstractmethod
    # def predicted_df(self) -> Optional[pd.DataFrame]: ...
--- a/lib/pt_trading/vecm_rolling_fit.py
+++ b/lib/pt_trading/vecm_rolling_fit.py
@ -0,0 +1,193 @@
 # original script moved to vecm_rolling_fit_01.py
 # 09.09.25  Added GARCH model - predicting volatility
 # Rule of thumb:
 # alpha + beta ≈ 1 → strong volatility clustering, persistence.
 # If much lower → volatility mean reverts quickly.
 # If > 1 → model is unstable / non-stationary (bad).
 # the VECM disequilibrium (mean reversion signal) and 
 # the GARCH volatility forecast (risk measure).
 # combine them → e.g., only enter trades when:
 # high_volatility = 1 → persistence > 0.95 or volatility > 2 (rule of thumb: unstable / risky regime).
 # high_volatility = 0 → stable regime.
 # VECM disequilibrium z-score > threshold and
 # GARCH-forecasted volatility is not too high (avoid noise-driven signals).
 # This creates a volatility-adjusted pairs trading strategy, more robust than plain VECM
 # now pair_predict_result_ DataFrame includes:
 # disequilibrium, scaled_disequilibrium, z-scores, garch_alpha, garch_beta, garch_persistence (α+β rule-of-thumb)
 # garch_vol_forecast (1-step volatility forecast)
 # Would you like me to also add a warning flag column 
 # (e.g., "high_volatility" = 1 if persistence > 0.95 or vol_forecast > threshold) 
 # so you can easily detect unstable regimes?
 # VECM/GARCH 
 # vecm_rolling_fit.py:
 from typing import Any, Dict, Optional, cast
 import numpy as np
 import pandas as pd
 from typing import Any, Dict, Optional
 from pt_trading.results import BacktestResult
 from pt_trading.rolling_window_fit import RollingFit
 from pt_trading.trading_pair import TradingPair
 from statsmodels.tsa.vector_ar.vecm import VECM, VECMResults
 from arch import arch_model
 NanoPerMin = 1e9
 class VECMTradingPair(TradingPair):
    vecm_fit_: Optional[VECMResults]
    pair_predict_result_: Optional[pd.DataFrame]
    def __init__(
        self,
        config: Dict[str, Any],
        market_data: pd.DataFrame,
        symbol_a: str,
        symbol_b: str,
    ):
        super().__init__(config, market_data, symbol_a, symbol_b)
        self.vecm_fit_ = None
        self.pair_predict_result_ = None
        self.garch_fit_ = None
        self.sigma_spread_forecast_ = None
        self.garch_alpha_ = None
        self.garch_beta_ = None
        self.garch_persistence_ = None
        self.high_volatility_flag_ = None
    def _train_pair(self) -> None:
        self._fit_VECM()
        assert self.vecm_fit_ is not None
        diseq_series = self.training_df_[self.colnames()] @ self.vecm_fit_.beta
        self.training_mu_ = float(diseq_series[0].mean())
        self.training_std_ = float(diseq_series[0].std())
        self.training_df_["disequilibrium"] = diseq_series
        self.training_df_["scaled_disequilibrium"] = (
            diseq_series - self.training_mu_
        ) / self.training_std_
    def _fit_VECM(self) -> None:
        assert self.training_df_ is not None
        vecm_df = self.training_df_[self.colnames()].reset_index(drop=True)
        vecm_model = VECM(vecm_df, coint_rank=1)
        vecm_fit = vecm_model.fit()
        self.vecm_fit_ = vecm_fit
        # Error Correction Term (spread)
        ect_series = (vecm_df @ vecm_fit.beta).iloc[:, 0]
        # Difference the spread for stationarity
        dz = ect_series.diff().dropna()
        if len(dz) < 30:
            print("Not enough data for GARCH fitting.")
            return
        # Rescale if variance too small
        if dz.std() < 0.1:
            dz = dz * 1000
        # print("Scale check:", dz.std())
        try:
            garch = arch_model(dz, vol="GARCH", p=1, q=1, mean="Zero", dist="normal")
            garch_fit = garch.fit(disp="off")
            self.garch_fit_ = garch_fit
            # Extract parameters
            params = garch_fit.params
            self.garch_alpha_ = params.get("alpha[1]", np.nan)
            self.garch_beta_ = params.get("beta[1]", np.nan)
            self.garch_persistence_ = self.garch_alpha_ + self.garch_beta_
            # print (f"GARCH α: {self.garch_alpha_:.4f}, β: {self.garch_beta_:.4f}, "
            #       f"α+β (persistence): {self.garch_persistence_:.4f}")
            # One-step-ahead volatility forecast
            forecast = garch_fit.forecast(horizon=1)
            sigma_next = np.sqrt(forecast.variance.iloc[-1, 0])
            self.sigma_spread_forecast_ = float(sigma_next)
            # print("GARCH sigma forecast:", self.sigma_spread_forecast_)
            # Rule of thumb: persistence close to 1 or large volatility forecast
            self.high_volatility_flag_ = int(
                (self.garch_persistence_ is not None and self.garch_persistence_ > 0.95)
                or (self.sigma_spread_forecast_ is not None and self.sigma_spread_forecast_ > 2)
            )
        except Exception as e:
            print(f"GARCH fit failed: {e}")
            self.garch_fit_ = None
            self.sigma_spread_forecast_ = None
            self.high_volatility_flag_ = None
    def predict(self) -> pd.DataFrame:
        self._train_pair()
        assert self.testing_df_ is not None
        assert self.vecm_fit_ is not None
        # VECM predictions
        predicted_prices = self.vecm_fit_.predict(steps=len(self.testing_df_))
        predicted_df = pd.merge(
            self.testing_df_.reset_index(drop=True),
            pd.DataFrame(predicted_prices, columns=pd.Index(self.colnames()), dtype=float),
            left_index=True,
            right_index=True,
            suffixes=("", "_pred"),
        ).dropna()
        # Disequilibrium and z-scores
        predicted_df["disequilibrium"] = (
            predicted_df[self.colnames()] @ self.vecm_fit_.beta
        )
        predicted_df["signed_scaled_disequilibrium"] = (
            predicted_df["disequilibrium"] - self.training_mu_
        ) / self.training_std_
        predicted_df["scaled_disequilibrium"] = abs(
            predicted_df["signed_scaled_disequilibrium"]
        )
        # Add GARCH parameters + volatility forecast
        predicted_df["garch_alpha"] = self.garch_alpha_
        predicted_df["garch_beta"] = self.garch_beta_
        predicted_df["garch_persistence"] = self.garch_persistence_
        predicted_df["garch_vol_forecast"] = self.sigma_spread_forecast_
        predicted_df["high_volatility"] = self.high_volatility_flag_
        # Save results
        if self.pair_predict_result_ is None:
            self.pair_predict_result_ = predicted_df
        else:
            self.pair_predict_result_ = pd.concat(
                [self.pair_predict_result_, predicted_df], ignore_index=True
            )
        return self.pair_predict_result_
 class VECMRollingFit(RollingFit):
    def __init__(self) -> None:
        super().__init__()
    def create_trading_pair(
        self,
        config: Dict,
        market_data: pd.DataFrame,
        symbol_a: str,
        symbol_b: str,
    ) -> TradingPair:
        return VECMTradingPair(
            config=config,
            market_data=market_data,
            symbol_a = symbol_a,
            symbol_b = symbol_b,
        )
--- a/lib/pt_trading/z-score_rolling_fit.py
+++ b/lib/pt_trading/z-score_rolling_fit.py
@ -0,0 +1,124 @@
 from typing import Any, Dict, Optional
 import pandas as pd
 import statsmodels.api as sm
 from pt_trading.rolling_window_fit import RollingFit
 from pt_trading.trading_pair import TradingPair
 NanoPerMin = 1e9
 class ZScoreTradingPair(TradingPair):
    """TradingPair implementation that fits a hedge ratio with OLS and
    computes a standardized spread (z-score).
    The class stores training spread mean/std and hedge ratio so the model
    can be applied to testing data consistently.
    """
    zscore_model_: Optional[sm.regression.linear_model.RegressionResultsWrapper]
    pair_predict_result_: Optional[pd.DataFrame]
    zscore_df_: Optional[pd.Series]
    hedge_ratio_: Optional[float]
    spread_mean_: Optional[float]
    spread_std_: Optional[float]
    def __init__(
        self,
        config: Dict[str, Any],
        market_data: pd.DataFrame,
        symbol_a: str,
        symbol_b: str,
    ):
        super().__init__(config, market_data, symbol_a, symbol_b)
        self.zscore_model_ = None
        self.pair_predict_result_ = None
        self.zscore_df_ = None
        self.hedge_ratio_ = None
        self.spread_mean_ = None
        self.spread_std_ = None
    def _fit_zscore(self) -> None:
        """Fit OLS on the training window and compute training z-score."""
        assert self.training_df_ is not None
        # Extract price series for the two symbols from the training frame.
        px_df = self.training_df_[self.colnames()]
        symbol_a_px = px_df.iloc[:, 0]
        symbol_b_px = px_df.iloc[:, 1]
        # Align indexes and fit OLS: symbol_a ~ const + symbol_b
        symbol_a_px, symbol_b_px = symbol_a_px.align(symbol_b_px, join="inner")
        X = sm.add_constant(symbol_b_px)
        self.zscore_model_ = sm.OLS(symbol_a_px, X).fit()
        # Hedge ratio is the slope on symbol_b
        params = self.zscore_model_.params
        self.hedge_ratio_ = float(params.iloc[1]) if len(params) > 1 else 0.0
        # Training spread and its standardized z-score
        spread = symbol_a_px - self.hedge_ratio_ * symbol_b_px
        self.spread_mean_ = float(spread.mean())
        self.spread_std_ = float(spread.std(ddof=0)) if spread.std(ddof=0) != 0 else 1.0
        self.zscore_df_ = (spread - self.spread_mean_) / self.spread_std_
    def predict(self) -> pd.DataFrame:
        """Apply fitted hedge ratio to the testing frame and return a
        dataframe with canonical columns:
          - disequilibrium: signed z-score
          - scaled_disequilibrium: absolute z-score
          - signed_scaled_disequilibrium: same as disequilibrium (keeps sign)
        """
        # Fit on training window
        self._fit_zscore()
        assert self.zscore_df_ is not None
        assert self.hedge_ratio_ is not None
        assert self.spread_mean_ is not None and self.spread_std_ is not None
        # Keep training columns for inspection
        self.training_df_["disequilibrium"] = self.zscore_df_
        self.training_df_["scaled_disequilibrium"] = self.zscore_df_.abs()
        # Apply model to testing frame
        assert self.testing_df_ is not None
        test_df = self.testing_df_.copy()
        px_test = test_df[self.colnames()]
        a_test = px_test.iloc[:, 0]
        b_test = px_test.iloc[:, 1]
        a_test, b_test = a_test.align(b_test, join="inner")
        # Compute test spread and standardize using training mean/std
        test_spread = a_test - self.hedge_ratio_ * b_test
        test_zscore = (test_spread - self.spread_mean_) / self.spread_std_
        # Attach canonical columns
        # Align back to test_df index if needed
        test_zscore = test_zscore.reindex(test_df.index)
        test_df["disequilibrium"] = test_zscore
        test_df["signed_scaled_disequilibrium"] = test_zscore
        test_df["scaled_disequilibrium"] = test_zscore.abs()
        # Reset index and accumulate results across windows
        test_df = test_df.reset_index(drop=True)
        if self.pair_predict_result_ is None:
            self.pair_predict_result_ = test_df
        else:
            self.pair_predict_result_ = pd.concat(
                [self.pair_predict_result_, test_df], ignore_index=True
            )
        self.pair_predict_result_ = self.pair_predict_result_.reset_index(drop=True)
        return self.pair_predict_result_.dropna()
 class ZScoreRollingFit(RollingFit):
    def __init__(self) -> None:
        super().__init__()
    def create_trading_pair(
        self, config: Dict, market_data: pd.DataFrame, symbol_a: str, symbol_b: str
    ) -> TradingPair:
        return ZScoreTradingPair(
            config=config, market_data=market_data, symbol_a=symbol_a, symbol_b=symbol_b
        )
--- a/lib/tools/config.py
+++ b/lib/tools/config.py
@ -1,12 +1,12 @@
 import hjson
 from typing import Dict
 from datetime import datetime
 # ---
 from cvttpy_tools.config import Config
-def load_config(config_path: str) -> Config:
+def load_config(config_path: str) -> Dict:
-    return Config(json_src=f"file://{config_path}")
+    with open(config_path, "r") as f:
        config = hjson.load(f)
    return dict(config)
 def expand_filename(filename: str) -> str:
--- a/lib/tools/data_loader.py
+++ b/lib/tools/data_loader.py
@ -1,10 +1,9 @@
 from __future__ import annotations
 import sqlite3
-from typing import Any, Dict, List, Tuple, cast
+from typing import Dict, List, cast
 import pandas as pd
 from cvttpy_trading.trading.instrument import ExchangeInstrument
 def load_sqlite_to_dataframe(db_path:str, query:str) -> pd.DataFrame:
    df: pd.DataFrame = pd.DataFrame()
@ -46,17 +45,19 @@ def convert_time_to_UTC(value: str, timezone: str, extra_minutes: int = 0) -> st
 def load_market_data(
    datafile: str,
-    instruments: List[ExchangeInstrument],
+    instruments: List[Dict[str, str]],
    db_table_name: str,
    trading_hours: Dict = {},
    extra_minutes: int = 0,
 ) -> pd.DataFrame:
-
+    insts = [
-    inst_ids = ['"' + exch_inst.instrument_id() + '"' for exch_inst in instruments]
+        '"' + instrument["instrument_id_pfx"] + instrument["symbol"] + '"'
-    instrument_ids = list(set(inst_ids))
+        for instrument in instruments
    ]
    instrument_ids = list(set(insts))
    exchange_ids = list(
-        set(['"' + instrument.exchange_id() + '"' for instrument in instruments])
+        set(['"' + instrument["exchange_id"] + '"' for instrument in instruments])
    )
    query = "select"
--- a/lib/tools/filetools.py
+++ b/lib/tools/filetools.py
@ -1,37 +0,0 @@
 import os
 import glob
 from typing import Dict, List, Tuple
 # ---
 from cvttpy_tools.config import Config
 # ---
 from cvttpy_trading.trading.instrument import ExchangeInstrument
 DayT = str
 DataFileNameT = str
 def resolve_datafiles(
    config: Config, date_pattern: str, instruments: List[ExchangeInstrument]
 ) -> List[Tuple[DayT, DataFileNameT]]:
    resolved_files: List[Tuple[DayT, DataFileNameT]] = []
    for exch_inst in instruments:
        pattern = date_pattern
        inst_type = exch_inst.user_data_.get("instrument_type", "?instrument_type?")
        data_dir = config.get_value(f"market_data_loading/{inst_type}/data_directory")
        if "*" in pattern or "?" in pattern:
            # Handle wildcards
            if not os.path.isabs(pattern):
                pattern = os.path.join(data_dir, f"{pattern}.mktdata.ohlcv.db")
            matched_files = glob.glob(pattern)
            for matched_file in matched_files:
                import re
                match = re.search(r"(\d{8})\.mktdata\.ohlcv\.db$", matched_file)
                assert match is not None
                day = match.group(1)
                resolved_files.append((day, matched_file))
        else:
            # Handle explicit file path
            if not os.path.isabs(pattern):
                pattern = os.path.join(data_dir, f"{pattern}.mktdata.ohlcv.db")
            resolved_files.append((date_pattern, pattern))
    return sorted(list(set(resolved_files)))  # Remove duplicates and sort
--- a/lib/tools/viz/viz_prices.py
+++ b/lib/tools/viz/viz_prices.py
@ -1,79 +0,0 @@
 from pairs_trading.lib.pt_strategy.research_strategy import PtResearchStrategy
 def visualize_prices(strategy: PtResearchStrategy, trading_date: str) -> None: 
    # Plot raw price data
    import matplotlib.pyplot as plt
    # Set plotting style
    import seaborn as sns
    pair = strategy.trading_pair_
    SYMBOL_A = pair.symbol_a()
    SYMBOL_B = pair.symbol_b()
    TRD_DATE = f"{trading_date[0:4]}-{trading_date[4:6]}-{trading_date[6:8]}"
    plt.style.use('seaborn-v0_8')
    sns.set_palette("husl")
    plt.rcParams['figure.figsize'] = (15, 10)
    # Get column names for the trading pair
    colname_a, colname_b = pair.colnames()
    price_data = strategy.pt_mkt_data_.market_data_df_.copy()
    # Create separate subplots for better visibility
    fig_price, price_axes = plt.subplots(2, 1, figsize=(18, 10))
    # Plot SYMBOL_A
    price_axes[0].plot(price_data['tstamp'], price_data[colname_a], alpha=0.7, 
                        label=f'{SYMBOL_A}', linewidth=1, color='blue')
    price_axes[0].set_title(f'{SYMBOL_A} Price Data ({TRD_DATE})')
    price_axes[0].set_ylabel(f'{SYMBOL_A} Price')
    price_axes[0].legend()
    price_axes[0].grid(True)
    # Plot SYMBOL_B
    price_axes[1].plot(price_data['tstamp'], price_data[colname_b], alpha=0.7, 
                        label=f'{SYMBOL_B}', linewidth=1, color='red')
    price_axes[1].set_title(f'{SYMBOL_B} Price Data ({TRD_DATE})')
    price_axes[1].set_ylabel(f'{SYMBOL_B} Price')
    price_axes[1].set_xlabel('Time')
    price_axes[1].legend()
    price_axes[1].grid(True)
    plt.tight_layout()
    plt.show()
    # Plot individual prices
    fig, axes = plt.subplots(2, 1, figsize=(18, 12))
    # Normalized prices for comparison
    norm_a = price_data[colname_a] / price_data[colname_a].iloc[0]
    norm_b = price_data[colname_b] / price_data[colname_b].iloc[0]
    axes[0].plot(price_data['tstamp'], norm_a, label=f'{SYMBOL_A} (normalized)', alpha=0.8, linewidth=1)
    axes[0].plot(price_data['tstamp'], norm_b, label=f'{SYMBOL_B} (normalized)', alpha=0.8, linewidth=1)
    axes[0].set_title(f'Normalized Price Comparison (Base = 1.0)  ({TRD_DATE})')
    axes[0].set_ylabel('Normalized Price')
    axes[0].legend()
    axes[0].grid(True)
    # Price ratio
    price_ratio = price_data[colname_a] / price_data[colname_b]
    axes[1].plot(price_data['tstamp'], price_ratio, label=f'{SYMBOL_A}/{SYMBOL_B} Ratio', color='green', alpha=0.8, linewidth=1)
    axes[1].set_title(f'Price Ratio Px({SYMBOL_A})/Px({SYMBOL_B}) ({TRD_DATE})')
    axes[1].set_ylabel('Ratio')
    axes[1].set_xlabel('Time')
    axes[1].legend()
    axes[1].grid(True)
    plt.tight_layout()
    plt.show()
    # Print basic statistics
    print(f"\nPrice Statistics:")
    print(f"  {SYMBOL_A}: Mean=${price_data[colname_a].mean():.2f}, Std=${price_data[colname_a].std():.2f}")
    print(f"  {SYMBOL_B}: Mean=${price_data[colname_b].mean():.2f}, Std=${price_data[colname_b].std():.2f}")
    print(f"  Price Ratio: Mean={price_ratio.mean():.2f}, Std={price_ratio.std():.2f}")
    print(f"  Correlation: {price_data[colname_a].corr(price_data[colname_b]):.4f}")
--- a/lib/tools/viz/viz_trades.py
+++ b/lib/tools/viz/viz_trades.py
@ -1,502 +0,0 @@
 from __future__ import annotations
 from pairs_trading.lib.pt_strategy.results import (PairResearchResult)
 from pairs_trading.lib.pt_strategy.research_strategy import PtResearchStrategy
 def visualize_trades(strategy: PtResearchStrategy, results: PairResearchResult, trading_date: str) -> None: 
    import pandas as pd
    import plotly.express as px
    import plotly.graph_objects as go
    import plotly.offline as pyo
    from IPython.display import HTML
    from plotly.subplots import make_subplots
    pair = strategy.trading_pair_
    trades = results.trades_[trading_date].copy()
    origin_mkt_data_df = strategy.pt_mkt_data_.origin_mkt_data_df_
    mkt_data_df = strategy.pt_mkt_data_.market_data_df_
    TRD_DATE = f"{trading_date[0:4]}-{trading_date[4:6]}-{trading_date[6:8]}"
    SYMBOL_A = pair.symbol_a()
    SYMBOL_B = pair.symbol_b()
    print(f"\nCreated trading pair: {pair}")
    print(f"Market data shape: {pair.market_data_.shape}")
    print(f"Column names: {pair.colnames()}")
    # Configure plotly for offline mode
    pyo.init_notebook_mode(connected=True)
    # Strategy-specific interactive visualization
    assert strategy.config_ is not None
    print("=== SLIDING FIT INTERACTIVE VISUALIZATION ===")
    print("Note: Rolling Fit strategy visualization with interactive plotly charts")
    # Create consistent timeline - superset of timestamps from both dataframes
    all_timestamps = sorted(set(mkt_data_df['tstamp']))
    # Create a unified timeline dataframe for consistent plotting
    timeline_df = pd.DataFrame({'tstamp': all_timestamps})
    # Merge with predicted data to get dis-equilibrium values
    timeline_df = timeline_df.merge(strategy.predictions_df_[['tstamp', 'disequilibrium', 'scaled_disequilibrium', 'signed_scaled_disequilibrium']], 
                                    on='tstamp', how='left')
    # Get Symbol_A and Symbol_B market data
    colname_a, colname_b = pair.colnames()
    symbol_a_data = mkt_data_df[['tstamp', colname_a]].copy()
    symbol_b_data = mkt_data_df[['tstamp', colname_b]].copy()
    norm_a = symbol_a_data[colname_a] / symbol_a_data[colname_a].iloc[0]
    norm_b = symbol_b_data[colname_b] / symbol_b_data[colname_b].iloc[0]
    print(f"Using consistent timeline with {len(timeline_df)} timestamps")
    print(f"Timeline range: {timeline_df['tstamp'].min()} to {timeline_df['tstamp'].max()}")
    # Create subplots with price charts at bottom
    fig = make_subplots(
        rows=4, cols=1,
        row_heights=[0.3, 0.4, 0.15, 0.15],
        subplot_titles=[
            f'Dis-equilibrium with Trading Thresholds ({TRD_DATE})',
            f'Normalized Price Comparison with BUY/SELL Signals - {SYMBOL_A}&{SYMBOL_B} ({TRD_DATE})',
            f'{SYMBOL_A} Market Data with Trading Signals ({TRD_DATE})',
            f'{SYMBOL_B} Market Data with Trading Signals ({TRD_DATE})',
        ],
        vertical_spacing=0.06,
        specs=[[{"secondary_y": False}],
                [{"secondary_y": False}],
                [{"secondary_y": False}],
                [{"secondary_y": False}]]
    )
    # 1. Scaled dis-equilibrium with thresholds - using consistent timeline
    fig.add_trace(
        go.Scatter(
            x=timeline_df['tstamp'],
            y=timeline_df['scaled_disequilibrium'],
            name='Absolute Scaled Dis-equilibrium',
            line=dict(color='green', width=2),
            opacity=0.8
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=timeline_df['tstamp'],
            y=timeline_df['signed_scaled_disequilibrium'],
            name='Scaled Dis-equilibrium',
            line=dict(color='darkmagenta', width=2),
            opacity=0.8
        ),
        row=1, col=1
    )
    # Add threshold lines to first subplot
    fig.add_shape(
        type="line",
        x0=timeline_df['tstamp'].min(),
        x1=timeline_df['tstamp'].max(),
        y0=strategy.config_.get_value('model/disequilibrium/open_trshld'),
        y1=strategy.config_.get_value('model/disequilibrium/open_trshld'),
        line=dict(color="purple", width=2, dash="dot"),
        opacity=0.7,
        row=1, col=1
    )
    fig.add_shape(
        type="line",
        x0=timeline_df['tstamp'].min(),
        x1=timeline_df['tstamp'].max(),
        y0=-strategy.config_.get_value('model/disequilibrium/open_trshld'),
        y1=-strategy.config_.get_value('model/disequilibrium/open_trshld'),
        line=dict(color="purple", width=2, dash="dot"),
        opacity=0.7,
        row=1, col=1
    )
    fig.add_shape(
        type="line",
        x0=timeline_df['tstamp'].min(),
        x1=timeline_df['tstamp'].max(),
        y0=strategy.config_.get_value('model/disequilibrium/close_trshld'),
        y1=strategy.config_.get_value('model/disequilibrium/close_trshld'),
        line=dict(color="brown", width=2, dash="dot"),
        opacity=0.7,
        row=1, col=1
    )
    fig.add_shape(
        type="line",
        x0=timeline_df['tstamp'].min(),
        x1=timeline_df['tstamp'].max(),
        y0=-strategy.config_.get_value('model/disequilibrium/close_trshld'),
        y1=-strategy.config_.get_value('model/disequilibrium/close_trshld'),
        line=dict(color="brown", width=2, dash="dot"),
        opacity=0.7,
        row=1, col=1
    )
    fig.add_shape(
        type="line",
        x0=timeline_df['tstamp'].min(),
        x1=timeline_df['tstamp'].max(),
        y0=0,
        y1=0,
        line=dict(color="black", width=1, dash="solid"),
        opacity=0.5,
        row=1, col=1
    )
    # Add normalized price lines
    fig.add_trace(
        go.Scatter(
            x=mkt_data_df['tstamp'],
            y=norm_a,
            name=f'{SYMBOL_A} (Normalized)',
            line=dict(color='blue', width=2),
            opacity=0.8
        ),
        row=2, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=mkt_data_df['tstamp'],
            y=norm_b,
            name=f'{SYMBOL_B} (Normalized)',
            line=dict(color='orange', width=2),
            opacity=0.8,
        ),
        row=2, col=1
    )
    # Add BUY and SELL signals if available
    if trades is not None and len(trades) > 0:
        # Define signal groups to avoid legend repetition
        signal_groups = {}
        # Process all trades and group by signal type (ignore OPEN/CLOSE status)
        for _, trade in trades.iterrows():
            symbol = trade['symbol']
            side = trade['side']
            # status = trade['status']
            action = trade['action']
            # Create signal group key (without status to combine OPEN/CLOSE)
            signal_key = f"{symbol} {side} {action}"
            # Find normalized price for this trade
            trade_time = trade['time']
            if symbol == SYMBOL_A:
                closest_idx = mkt_data_df['tstamp'].searchsorted(trade_time)
                if closest_idx < len(norm_a):
                    norm_price = norm_a.iloc[closest_idx]
                else:
                    norm_price = norm_a.iloc[-1]
            else:  # SYMBOL_B
                closest_idx = mkt_data_df['tstamp'].searchsorted(trade_time)
                if closest_idx < len(norm_b):
                    norm_price = norm_b.iloc[closest_idx]
                else:
                    norm_price = norm_b.iloc[-1]
            # Initialize group if not exists
            if signal_key not in signal_groups:
                signal_groups[signal_key] = {
                    'times': [],
                    'prices': [],
                    'actual_prices': [],
                    'symbol': symbol,
                    'side': side,
                    # 'status': status,
                    'action': trade['action']
                }
            # Add to group
            signal_groups[signal_key]['times'].append(trade_time)
            signal_groups[signal_key]['prices'].append(norm_price)
            signal_groups[signal_key]['actual_prices'].append(trade['price'])
        # Add each signal group as a single trace
        for signal_key, group_data in signal_groups.items():
            symbol = group_data['symbol']
            side = group_data['side']
            # status = group_data['status']
            # Determine marker properties (same for all OPEN/CLOSE of same side)
            is_close: bool = (group_data['action'] == "CLOSE")
            if 'BUY' in side:
                marker_color = 'green'
                marker_symbol = 'triangle-up'
                marker_size = 14
            else:  # SELL
                marker_color = 'red'
                marker_symbol = 'triangle-down'
                marker_size = 14
            # Create hover text for each point in the group
            hover_texts = []
            for i, (time, norm_price, actual_price) in enumerate(zip(group_data['times'], 
                                                                    group_data['prices'], 
                                                                    group_data['actual_prices'])):
                # Find the corresponding trade to get the status for hover text
                trade_info = trades[(trades['time'] == time) & 
                                    (trades['symbol'] == symbol) & 
                                    (trades['side'] == side)]
                if len(trade_info) > 0:
                    action = trade_info.iloc[0]['action']
                    hover_texts.append(f'<b>{signal_key} {action}</b><br>' +
                                    f'Time: {time}<br>' +
                                    f'Normalized Price: {norm_price:.4f}<br>' +
                                    f'Actual Price: ${actual_price:.2f}')
                else:
                    hover_texts.append(f'<b>{signal_key}</b><br>' +
                                    f'Time: {time}<br>' +
                                    f'Normalized Price: {norm_price:.4f}<br>' +
                                    f'Actual Price: ${actual_price:.2f}')
            fig.add_trace(
                go.Scatter(
                    x=group_data['times'],
                    y=group_data['prices'],
                    mode='markers',
                    name=signal_key,
                    marker=dict(
                        color=marker_color,
                        size=marker_size,
                        symbol=marker_symbol,
                        line=dict(width=2, color='black') if is_close else None
                    ),
                    showlegend=True,
                    hovertemplate='%{text}<extra></extra>',
                    text=hover_texts
                ),
                row=2, col=1
            )
    # -----------------------------                
        fig.add_trace(
            go.Scatter(
                x=symbol_a_data['tstamp'],
                y=symbol_a_data[colname_a],
                name=f'{SYMBOL_A} Price',
                line=dict(color='blue', width=2),
                opacity=0.8
            ),
            row=3, col=1
        )
        # Filter trades for Symbol_A
        symbol_a_trades = trades[trades['symbol'] == SYMBOL_A]
        print(f"\nSymbol_A trades:\n{symbol_a_trades}")
        if len(symbol_a_trades) > 0:
            # Separate trades by action and status for different colors
            buy_open_trades = symbol_a_trades[(symbol_a_trades['side'].str.contains('BUY', na=False)) & 
                                                (symbol_a_trades['action'].str.contains('OPEN', na=False))]
            buy_close_trades = symbol_a_trades[(symbol_a_trades['side'].str.contains('BUY', na=False)) & 
                                                (symbol_a_trades['action'].str.contains('CLOSE', na=False))]
            sell_open_trades = symbol_a_trades[(symbol_a_trades['side'].str.contains('SELL', na=False)) & 
                                                (symbol_a_trades['action'].str.contains('OPEN', na=False))]
            sell_close_trades = symbol_a_trades[(symbol_a_trades['side'].str.contains('SELL', na=False)) & 
                                                (symbol_a_trades['action'].str.contains('CLOSE', na=False))]
            # Add BUY OPEN signals
            if len(buy_open_trades) > 0:
                fig.add_trace(
                    go.Scatter(
                        x=buy_open_trades['time'],
                        y=buy_open_trades['price'],
                        mode='markers',
                        name=f'{SYMBOL_A} BUY OPEN',
                        marker=dict(color='green', size=12, symbol='triangle-up'),
                        showlegend=True
                    ),
                    row=3, col=1
                )
            # Add BUY CLOSE signals
            if len(buy_close_trades) > 0:
                fig.add_trace(
                    go.Scatter(
                        x=buy_close_trades['time'],
                        y=buy_close_trades['price'],
                        mode='markers',
                        name=f'{SYMBOL_A} BUY CLOSE',
                        marker=dict(color='green', size=12, symbol='triangle-up'),
                        line=dict(width=2, color='black'),
                        showlegend=True
                    ),
                    row=3, col=1
                )
            # Add SELL OPEN signals
            if len(sell_open_trades) > 0:
                fig.add_trace(
                    go.Scatter(
                        x=sell_open_trades['time'],
                        y=sell_open_trades['price'],
                        mode='markers',
                        name=f'{SYMBOL_A} SELL OPEN',
                        marker=dict(color='red', size=12, symbol='triangle-down'),
                        showlegend=True
                    ),
                    row=3, col=1
                )
            # Add SELL CLOSE signals
            if len(sell_close_trades) > 0:
                fig.add_trace(
                    go.Scatter(
                        x=sell_close_trades['time'],
                        y=sell_close_trades['price'],
                        mode='markers',
                        name=f'{SYMBOL_A} SELL CLOSE',
                        marker=dict(color='red', size=12, symbol='triangle-down'),
                        line=dict(width=2, color='black'),
                        showlegend=True
                    ),
                    row=3, col=1
                )
            # 4. Symbol_B Market Data with Trading Signals
        fig.add_trace(
            go.Scatter(
                x=symbol_b_data['tstamp'],
                y=symbol_b_data[colname_b],
                name=f'{SYMBOL_B} Price',
                line=dict(color='orange', width=2),
                opacity=0.8
            ),
            row=4, col=1
        )
        # Add trading signals for Symbol_B if available
        symbol_b_trades = trades[trades['symbol'] == SYMBOL_B]
        print(f"\nSymbol_B trades:\n{symbol_b_trades}")
        if len(symbol_b_trades) > 0:
            # Separate trades by action and status for different colors
            buy_open_trades = symbol_b_trades[(symbol_b_trades['side'].str.contains('BUY', na=False)) & 
                                                (symbol_b_trades['action'].str.startswith('OPEN', na=False))]
            buy_close_trades = symbol_b_trades[(symbol_b_trades['side'].str.contains('BUY', na=False)) & 
                                                (symbol_b_trades['action'].str.startswith('CLOSE', na=False))]
            sell_open_trades = symbol_b_trades[(symbol_b_trades['side'].str.contains('SELL', na=False)) & 
                                                (symbol_b_trades['action'].str.contains('OPEN', na=False))]
            sell_close_trades = symbol_b_trades[(symbol_b_trades['side'].str.contains('SELL', na=False)) & 
                                                (symbol_b_trades['action'].str.contains('CLOSE', na=False))]
            # Add BUY OPEN signals
            if len(buy_open_trades) > 0:
                fig.add_trace(
                    go.Scatter(
                        x=buy_open_trades['time'],
                        y=buy_open_trades['price'],
                        mode='markers',
                        name=f'{SYMBOL_B} BUY OPEN',
                        marker=dict(color='darkgreen', size=12, symbol='triangle-up'),
                        showlegend=True
                    ),
                    row=4, col=1
                )
            # Add BUY CLOSE signals
            if len(buy_close_trades) > 0:
                fig.add_trace(
                    go.Scatter(
                        x=buy_close_trades['time'],
                        y=buy_close_trades['price'],
                        mode='markers',
                        name=f'{SYMBOL_B} BUY CLOSE',
                        marker=dict(color='green', size=12, symbol='triangle-up'),
                        line=dict(width=2, color='black'),
                        showlegend=True
                    ),
                    row=4, col=1
                )
            # Add SELL OPEN signals
            if len(sell_open_trades) > 0:
                fig.add_trace(
                    go.Scatter(
                        x=sell_open_trades['time'],
                        y=sell_open_trades['price'],
                        mode='markers',
                        name=f'{SYMBOL_B} SELL OPEN',
                        marker=dict(color='red', size=12, symbol='triangle-down'),
                        showlegend=True
                    ),
                    row=4, col=1
                )
            # Add SELL CLOSE signals
            if len(sell_close_trades) > 0:
                fig.add_trace(
                    go.Scatter(
                        x=sell_close_trades['time'],
                        y=sell_close_trades['price'],
                        mode='markers',
                        name=f'{SYMBOL_B} SELL CLOSE',
                        marker=dict(color='red', size=12, symbol='triangle-down'),
                        line=dict(width=2, color='black'),
                        showlegend=True
                    ),
                    row=4, col=1
                )
        # Update layout
        fig.update_layout(
            height=1600,
            title_text=f"Strategy Analysis - {SYMBOL_A} & {SYMBOL_B} ({TRD_DATE})",
            showlegend=True,
            template="plotly_white",
            plot_bgcolor='lightgray',
        )
        # Update y-axis labels
        fig.update_yaxes(title_text="Scaled Dis-equilibrium", row=1, col=1)
        fig.update_yaxes(title_text=f"{SYMBOL_A} Price ($)", row=2, col=1)
        fig.update_yaxes(title_text=f"{SYMBOL_B} Price ($)", row=3, col=1)
        fig.update_yaxes(title_text="Normalized Price (Base = 1.0)", row=4, col=1)
        # Update x-axis labels and ensure consistent time range
        time_range = [timeline_df['tstamp'].min(), timeline_df['tstamp'].max()]
        fig.update_xaxes(range=time_range, row=1, col=1)
        fig.update_xaxes(range=time_range, row=2, col=1)
        fig.update_xaxes(range=time_range, row=3, col=1)
        fig.update_xaxes(title_text="Time", range=time_range, row=4, col=1)
        # Display using plotly offline mode
        # pyo.iplot(fig)
        fig.show()
    else:
        print("No interactive visualization data available - strategy may not have run successfully")
    print(f"\nChart shows:")
    print(f"- {SYMBOL_A} and {SYMBOL_B} prices normalized to start at 1.0")
    print(f"- BUY signals shown as green triangles pointing up")
    print(f"- SELL signals shown as orange triangles pointing down")
    print(f"- All BUY signals per symbol grouped together, all SELL signals per symbol grouped together")
    print(f"- Hover over markers to see individual trade details (OPEN/CLOSE status)")
    if trades is not None and len(trades) > 0:
        print(f"- Total signals displayed: {len(trades)}")
        print(f"- {SYMBOL_A} signals: {len(trades[trades['symbol'] == SYMBOL_A])}")
        print(f"- {SYMBOL_B} signals: {len(trades[trades['symbol'] == SYMBOL_B])}")
    else:
        print("- No trading signals to display")
--- a/lib/utils/db_inspector.py
+++ b/lib/utils/db_inspector.py
@ -0,0 +1,169 @@
 #!/usr/bin/env python3
 """
 Database inspector utility for pairs trading results database.
 Provides functionality to view all tables and their contents.
 """
 import sqlite3
 import sys
 import json
 import os
 from typing import List, Dict, Any
 def list_tables(db_path: str) -> List[str]:
    """List all tables in the database."""
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute("""
        SELECT name FROM sqlite_master 
        WHERE type='table' 
        ORDER BY name
    """)
    tables = [row[0] for row in cursor.fetchall()]
    conn.close()
    return tables
 def view_table_schema(db_path: str, table_name: str) -> None:
    """View the schema of a specific table."""
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute(f"PRAGMA table_info({table_name})")
    columns = cursor.fetchall()
    print(f"\nTable: {table_name}")
    print("-" * 50)
    print("Column Name".ljust(20) + "Type".ljust(15) + "Not Null".ljust(10) + "Default")
    print("-" * 50)
    for col in columns:
        cid, name, type_, not_null, default_value, pk = col
        print(f"{name}".ljust(20) + f"{type_}".ljust(15) + f"{bool(not_null)}".ljust(10) + f"{default_value or ''}")
    conn.close()
 def view_config_table(db_path: str, limit: int = 10) -> None:
    """View entries from the config table."""
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute(f"""
        SELECT id, run_timestamp, config_file_path, fit_method_class, 
               datafiles, instruments, config_json
        FROM config 
        ORDER BY run_timestamp DESC
        LIMIT {limit}
    """)
    rows = cursor.fetchall()
    if not rows:
        print("No configuration entries found.")
        return
    print(f"\nMost recent {len(rows)} configuration entries:")
    print("=" * 80)
    for row in rows:
        id, run_timestamp, config_file_path, fit_method_class, datafiles, instruments, config_json = row
        print(f"ID: {id} | {run_timestamp}")
        print(f"Config: {config_file_path} | Strategy: {fit_method_class}")
        print(f"Files: {datafiles}")
        print(f"Instruments: {instruments}")
        print("-" * 80)
    conn.close()
 def view_results_summary(db_path: str) -> None:
    """View summary of trading results."""
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    # Get results summary
    cursor.execute("""
        SELECT date, COUNT(*) as trade_count, 
               ROUND(SUM(symbol_return), 2) as total_return
        FROM pt_bt_results 
        GROUP BY date 
        ORDER BY date DESC
    """)
    results = cursor.fetchall()
    if not results:
        print("No trading results found.")
        return
    print(f"\nTrading Results Summary:")
    print("-" * 50)
    print("Date".ljust(15) + "Trades".ljust(10) + "Total Return %")
    print("-" * 50)
    for date, trade_count, total_return in results:
        print(f"{date}".ljust(15) + f"{trade_count}".ljust(10) + f"{total_return}")
    # Get outstanding positions summary
    cursor.execute("""
        SELECT COUNT(*) as position_count,
               ROUND(SUM(unrealized_return), 2) as total_unrealized
        FROM outstanding_positions
    """)
    outstanding = cursor.fetchone()
    if outstanding and outstanding[0] > 0:
        print(f"\nOutstanding Positions: {outstanding[0]} positions")
        print(f"Total Unrealized Return: {outstanding[1]}%")
    conn.close()
 def main() -> None:
    if len(sys.argv) < 2:
        print("Usage: python db_inspector.py <database_path> [command]")
        print("Commands:")
        print("  tables     - List all tables")
        print("  schema     - Show schema for all tables")
        print("  config     - View configuration entries")
        print("  results    - View trading results summary")
        print("  all        - Show everything (default)")
        print("\nExample: python db_inspector.py results/equity.db config")
        sys.exit(1)
    db_path = sys.argv[1]
    command = sys.argv[2] if len(sys.argv) > 2 else "all"
    if not os.path.exists(db_path):
        print(f"Database file not found: {db_path}")
        sys.exit(1)
    try:
        if command in ["tables", "all"]:
            tables = list_tables(db_path)
            print(f"Tables in database: {', '.join(tables)}")
        if command in ["schema", "all"]:
            tables = list_tables(db_path)
            for table in tables:
                view_table_schema(db_path, table)
        if command in ["config", "all"]:
            if "config" in list_tables(db_path):
                view_config_table(db_path)
            else:
                print("Config table not found.")
        if command in ["results", "all"]:
            if "pt_bt_results" in list_tables(db_path):
                view_results_summary(db_path)
            else:
                print("Results table not found.")
    except Exception as e:
        print(f"Error inspecting database: {str(e)}")
        import traceback
        traceback.print_exc()
 if __name__ == "__main__":
    main() 
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,66 @@
 [build-system]
 requires = ["setuptools>=45", "wheel"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "pairs-trading"
 version = "0.1.0"
 description = "Pairs Trading Backtesting Framework"
 requires-python = ">=3.8"
 [tool.black]
 line-length = 88
 target-version = ['py38']
 include = '\.pyi?$'
 extend-exclude = '''
 /(
  # directories
  \.eggs
  | \.git
  | \.hg
  | \.mypy_cache
  | \.tox
  | \.venv
  | build
  | dist
 )/
 '''
 [tool.flake8]
 max-line-length = 88
 extend-ignore = ["E203", "W503"]
 exclude = [
    ".git",
    "__pycache__",
    "build",
    "dist",
    ".venv",
    ".mypy_cache",
    ".tox"
 ]
 [tool.mypy]
 python_version = "3.8"
 warn_return_any = true
 warn_unused_configs = true
 disallow_untyped_defs = true
 disallow_incomplete_defs = true
 check_untyped_defs = true
 disallow_untyped_decorators = true
 no_implicit_optional = true
 warn_redundant_casts = true
 warn_unused_ignores = true
 warn_no_return = true
 warn_unreachable = true
 strict_equality = true
 [[tool.mypy.overrides]]
 module = [
    "numpy.*",
    "pandas.*",
    "matplotlib.*",
    "seaborn.*",
    "scipy.*",
    "sklearn.*"
 ]
 ignore_missing_imports = true 
--- a/pyrightconfig.json
+++ b/pyrightconfig.json
@ -0,0 +1,24 @@
 {
    "include": [
        "lib"
    ],
    "exclude": [
        "**/node_modules",
        "**/__pycache__",
        "**/.*",
        "results",
        "data"
    ],
    "ignore": [],
    "defineConstant": {},
    "typeCheckingMode": "basic",
    "useLibraryCodeForTypes": true,
    "autoImportCompletions": true,
    "autoSearchPaths": true,
    "extraPaths": [
        "lib"
    ],
    "stubPath": "./typings",
    "venvPath": ".",
    "venv": "python3.12-venv"
 }
--- a/requirements.txt
+++ b/requirements.txt
@ -61,7 +61,7 @@ protobuf>=3.12.4
 psutil>=5.9.0
 ptyprocess>=0.7.0
 pycurl>=7.44.1
-pyelftools>=0.27
+# pyelftools>=0.27
 Pygments>=2.11.2
 pyparsing>=2.4.7
 pyrsistent>=0.18.1
@ -69,7 +69,7 @@ python-debian>=0.1.43 #+ubuntu1.1
 python-dotenv>=0.19.2
 python-magic>=0.4.24
 python-xlib>=0.29
-pyxdg>=0.27
+# pyxdg>=0.27
 PyYAML>=6.0
 reportlab>=3.6.8
 requests>=2.25.1
@ -78,119 +78,117 @@ scipy<1.13.0
 seaborn>=0.13.2
 SecretStorage>=3.3.1
 setproctitle>=1.2.2
 simpleeval>=1.0.3
 six>=1.16.0
 soupsieve>=2.3.1
 ssh-import-id>=5.11
 statsmodels>=0.14.4
-texttable>=1.6.4
+# texttable>=1.6.4
 tldextract>=3.1.2
 tomli>=1.2.2
 ######## typed-ast>=1.4.3
-types-aiofiles>=0.1
+# types-aiofiles>=0.1
-types-annoy>=1.17
+# types-annoy>=1.17
-types-appdirs>=1.4
+# types-appdirs>=1.4
-types-atomicwrites>=1.4
+# types-atomicwrites>=1.4
-types-aws-xray-sdk>=2.8
+# types-aws-xray-sdk>=2.8
-types-babel>=2.9
+# types-babel>=2.9
-types-backports-abc>=0.5
+# types-backports-abc>=0.5
-types-backports.ssl-match-hostname>=3.7
+# types-backports.ssl-match-hostname>=3.7
-types-beautifulsoup4>=4.10
+# types-beautifulsoup4>=4.10
-types-bleach>=4.1
+# types-bleach>=4.1
-types-boto>=2.49
+# types-boto>=2.49
-types-braintree>=4.11
+# types-braintree>=4.11
-types-cachetools>=4.2
+# types-cachetools>=4.2
-types-caldav>=0.8
+# types-caldav>=0.8
-types-certifi>=2020.4
+# types-certifi>=2020.4
-types-characteristic>=14.3
+# types-characteristic>=14.3
-types-chardet>=4.0
+# types-chardet>=4.0
-types-click>=7.1
+# types-click>=7.1
-types-click-spinner>=0.1
+# types-click-spinner>=0.1
-types-colorama>=0.4
+# types-colorama>=0.4
-types-commonmark>=0.9
+# types-commonmark>=0.9
-types-contextvars>=0.1
+# types-contextvars>=0.1
-types-croniter>=1.0
+# types-croniter>=1.0
-types-cryptography>=3.3
+# types-cryptography>=3.3
-types-dataclasses>=0.1
+# types-dataclasses>=0.1
-types-dateparser>=1.0
+# types-dateparser>=1.0
-types-DateTimeRange>=0.1
+# types-DateTimeRange>=0.1
-types-decorator>=0.1
+# types-decorator>=0.1
-types-Deprecated>=1.2
+# types-Deprecated>=1.2
-types-docopt>=0.6
+# types-docopt>=0.6
-types-docutils>=0.17
+# types-docutils>=0.17
-types-editdistance>=0.5
+# types-editdistance>=0.5
-types-emoji>=1.2
+# types-emoji>=1.2
-types-entrypoints>=0.3
+# types-entrypoints>=0.3
-types-enum34>=1.1
+# types-enum34>=1.1
-types-filelock>=3.2
+# types-filelock>=3.2
-types-first>=2.0
+# types-first>=2.0
-types-Flask>=1.1
+# types-Flask>=1.1
-types-freezegun>=1.1
+# types-freezegun>=1.1
-types-frozendict>=0.1
+# types-frozendict>=0.1
-types-futures>=3.3
+# types-futures>=3.3
-types-html5lib>=1.1
+# types-html5lib>=1.1
-types-httplib2>=0.19
+# types-httplib2>=0.19
-types-humanfriendly>=9.2
+# types-humanfriendly>=9.2
-types-ipaddress>=1.0
+# types-ipaddress>=1.0
-types-itsdangerous>=1.1
+# types-itsdangerous>=1.1
-types-JACK-Client>=0.1
+# types-JACK-Client>=0.1
-types-Jinja2>=2.11
+# types-Jinja2>=2.11
-types-jmespath>=0.10
+# types-jmespath>=0.10
-types-jsonschema>=3.2
+# types-jsonschema>=3.2
-types-Markdown>=3.3
+# types-Markdown>=3.3
-types-MarkupSafe>=1.1
+# types-MarkupSafe>=1.1
-types-mock>=4.0
+# types-mock>=4.0
-types-mypy-extensions>=0.4
+# types-mypy-extensions>=0.4
-types-mysqlclient>=2.0
+# types-mysqlclient>=2.0
-types-oauthlib>=3.1
+# types-oauthlib>=3.1
-types-orjson>=3.6
+# types-orjson>=3.6
-types-paramiko>=2.7
+# types-paramiko>=2.7
-types-Pillow>=8.3
+# types-Pillow>=8.3
-types-polib>=1.1
+# types-polib>=1.1
-types-prettytable>=2.1
+# types-prettytable>=2.1
-types-protobuf>=3.17
+# types-protobuf>=3.17
-types-psutil>=5.8
+# types-psutil>=5.8
-types-psycopg2>=2.9
+# types-psycopg2>=2.9
-types-pyaudio>=0.2
+# types-pyaudio>=0.2
-types-pycurl>=0.1
+# types-pycurl>=0.1
-types-pyfarmhash>=0.2
+# types-pyfarmhash>=0.2
-types-Pygments>=2.9
+# types-Pygments>=2.9
-types-PyMySQL>=1.0
+# types-PyMySQL>=1.0
-types-pyOpenSSL>=20.0
+# types-pyOpenSSL>=20.0
-types-pyRFC3339>=0.1
+# types-pyRFC3339>=0.1
-types-pysftp>=0.2
+# types-pysftp>=0.2
-types-pytest-lazy-fixture>=0.6
+# types-pytest-lazy-fixture>=0.6
-types-python-dateutil>=2.8
+# types-python-dateutil>=2.8
-types-python-gflags>=3.1
+# types-python-gflags>=3.1
-types-python-nmap>=0.6
+# types-python-nmap>=0.6
-types-python-slugify>=5.0
+# types-python-slugify>=5.0
-types-pytz>=2021.1
+# types-pytz>=2021.1
-types-pyvmomi>=7.0
+# types-pyvmomi>=7.0
-types-PyYAML>=5.4
+# types-PyYAML>=5.4
-types-redis>=3.5
+# types-redis>=3.5
-types-requests>=2.25
+# types-requests>=2.25
-types-retry>=0.9
+# types-retry>=0.9
-types-seaborn>0.13.2
+# types-selenium>=3.141
-types-selenium>=3.141
+# types-Send2Trash>=1.8
-types-Send2Trash>=1.8
+# types-setuptools>=57.4
-types-setuptools>=57.4
+# types-simplejson>=3.17
-types-simplejson>=3.17
+# types-singledispatch>=3.7
-types-singledispatch>=3.7
+# types-six>=1.16
-types-six>=1.16
+# types-slumber>=0.7
-types-slumber>=0.7
+# types-stripe>=2.59
-types-stripe>=2.59
+# types-tabulate>=0.8
-types-tabulate>=0.8
+# types-termcolor>=1.1
-types-termcolor>=1.1
+# types-toml>=0.10
-types-toml>=0.10
+# types-toposort>=1.6
-types-toposort>=1.6
+# types-ttkthemes>=3.2
-types-ttkthemes>=3.2
+# types-typed-ast>=1.4
-types-typed-ast>=1.4
+# types-tzlocal>=0.1
-types-tzlocal>=0.1
+# types-ujson>=0.1
-types-ujson>=0.1
+# types-vobject>=0.9
-types-vobject>=0.9
+# types-waitress>=0.1
-types-waitress>=0.1
+#types-Werkzeug>=1.0
-types-Werkzeug>=1.0
+#types-xxhash>=2.0
 types-xxhash>=2.0
 typing-extensions>=3.10.0.2
 Unidecode>=1.3.3
 urllib3>=1.26.5
--- a/research/backtest.py
+++ b/research/backtest.py
@ -1,139 +0,0 @@
 from __future__ import annotations
 import os
 from typing import Any, Dict, List, Tuple
 # ---
 from cvttpy_tools.app import App
 from cvttpy_tools.base import NamedObject
 from cvttpy_tools.config import CvttAppConfig
 # ---
 from cvttpy_trading.trading.instrument import ExchangeInstrument
 from cvttpy_trading.settings.instruments import Instruments
 # ---
 from pairs_trading.lib.pt_strategy.results import (
    PairResearchResult,
    create_result_database,
    store_config_in_database,
 )
 from pairs_trading.lib.pt_strategy.research_strategy import PtResearchStrategy
 from pairs_trading.lib.tools.filetools import resolve_datafiles
 InstrumentTypeT = str
 class Runner(NamedObject):
    def __init__(self):
        App()
        CvttAppConfig()
        # App.instance().add_cmdline_arg(
        #     "--config", type=str, required=True, help="Path to the configuration file."
        # )
        App.instance().add_cmdline_arg(
            "--date_pattern",
            type=str,
            required=True,
            help="Date YYYYMMDD, allows * and ? wildcards",
        )
        App.instance().add_cmdline_arg(
            "--instruments",
            type=str,
            required=True,
            help="Comma-separated list of instrument symbols (e.g., COIN:EQUITY,GBTC:CRYPTO)",
        )
        App.instance().add_cmdline_arg(
            "--result_db",
            type=str,
            required=True,
            help="Path to SQLite database for storing results. Use 'NONE' to disable database output.",
        )
        App.instance().add_call(stage=App.Stage.Config, func=self._on_config())
        App.instance().add_call(stage=App.Stage.Run, func=self.run())
    async def _on_config(self) -> None:
        # Resolve data files (CLI takes priority over config)
        instruments: List[ExchangeInstrument] = self._get_instruments()
        datafiles = resolve_datafiles(
            config=CvttAppConfig.instance(),
            date_pattern=App.instance().get_argument("date_pattern"),
            instruments=instruments,
        )
        days = list(set([day for day, _ in datafiles]))
        print(f"Found {len(datafiles)} data files to process:")
        for df in datafiles:
            print(f"  - {df}")
        # Create result database if needed
        if App.instance().get_argument("result_db").upper() != "NONE":
            create_result_database(App.instance().get_argument("result_db"))
        # Initialize a dictionary to store all trade results
        all_results: Dict[str, Dict[str, Any]] = {}
        is_config_stored = False
        # Process each data file
        results = PairResearchResult(config=CvttAppConfig.instance())
        for day in sorted(days):
            md_datafiles = [datafile for md_day, datafile in datafiles if md_day == day]
            if not all([os.path.exists(datafile) for datafile in md_datafiles]):
                print(f"WARNING: insufficient data files: {md_datafiles}")
                exit(1)
            print(f"\n====== Processing {day} ======")
            if not is_config_stored:
                store_config_in_database(
                    db_path=App.instance().get_argument("result_db"),
                    config_file_path=App.instance().get_argument("config"),
                    config=CvttAppConfig.instance(),
                    datafiles=datafiles,
                    instruments=instruments,
                )
                is_config_stored = True
            CvttAppConfig.instance().set_value("datafiles", md_datafiles)
            pt_strategy = PtResearchStrategy(
                config=CvttAppConfig.instance(),
                instruments=instruments,
            )
            pt_strategy.run()
            results.add_day_results(
                day=day,
                trades=pt_strategy.day_trades(),
                outstanding_positions=pt_strategy.outstanding_positions(),
            )
        results.analyze_pair_performance()
    def _get_instruments(self) -> List[ExchangeInstrument]:
        res: List[ExchangeInstrument] = []
        for inst in App.instance().get_argument("instruments").split(","):
            instrument_type = inst.split(":")[0]
            exchange_id = inst.split(":")[1]
            instrument_id = inst.split(":")[2]
            exch_inst: ExchangeInstrument = Instruments.instance().get_exch_inst(
                exch_id=exchange_id, inst_id=instrument_id, src=f"{self.fname()}"
            )
            exch_inst.user_data_["instrument_type"] = instrument_type
            res.append(exch_inst)
        return res
    async def run(self) -> None:
        if App.instance().get_argument("result_db").upper() != "NONE":
            print(
                f'\nResults stored in database: {App.instance().get_argument("result_db")}'
            )
        else:
            print("No results to display.")
 if __name__ == "__main__":
    Runner()
    App.instance().run()
--- a/research/cointegration_test.py
+++ b/research/cointegration_test.py
@ -0,0 +1,127 @@
 import argparse
 import glob
 import importlib
 import os
 from datetime import date, datetime
 from typing import Any, Dict, List, Optional
 import pandas as pd
 from tools.config import expand_filename, load_config
 from tools.data_loader import get_available_instruments_from_db
 from pt_trading.results import (
    BacktestResult,
    create_result_database,
    store_config_in_database,
    store_results_in_database,
 )
 from pt_trading.fit_method import PairsTradingFitMethod
 from pt_trading.trading_pair import TradingPair
 from research.research_tools import create_pairs, resolve_datafiles
 def main() -> None:
    parser = argparse.ArgumentParser(description="Run pairs trading backtest.")
    parser.add_argument(
        "--config", type=str, required=True, help="Path to the configuration file."
    )
    parser.add_argument(
        "--datafile",
        type=str,
        required=False,
        help="Market data file to process.",
    )
    parser.add_argument(
        "--instruments",
        type=str,
        required=False,
        help = "Comma-separated list of instrument symbols (e.g., COIN,GBTC). If not provided, auto-detects from database.",
    )
    args = parser.parse_args()
    config: Dict = load_config(args.config)
    # Resolve data files (CLI takes priority over config)
    datafile = resolve_datafiles(config, args.datafile)[0]
    if not datafile:
        print("No data files found to process.")
        return
    print(f"Found {datafile} data files to process:")
    # # Create result database if needed
    # if args.result_db.upper() != "NONE":
    #     args.result_db = expand_filename(args.result_db)
    #     create_result_database(args.result_db)
    # # Initialize a dictionary to store all trade results
    # all_results: Dict[str, Dict[str, Any]] = {}
    # # Store configuration in database for reference
    # if args.result_db.upper() != "NONE":
    #     # Get list of all instruments for storage
    #     all_instruments = []
    #     for datafile in datafiles:
    #         if args.instruments:
    #             file_instruments = [
    #                 inst.strip() for inst in args.instruments.split(",")
    #             ]
    #         else:
    #             file_instruments = get_available_instruments_from_db(datafile, config)
    #         all_instruments.extend(file_instruments)
    #     # Remove duplicates while preserving order
    #     unique_instruments = list(dict.fromkeys(all_instruments))
    #     store_config_in_database(
    #         db_path=args.result_db,
    #         config_file_path=args.config,
    #         config=config,
    #         fit_method_class=fit_method_class_name,
    #         datafiles=datafiles,
    #         instruments=unique_instruments,
    #     )
    # Process each data file
    stat_model_price = config["stat_model_price"]
    print(f"\n====== Processing {os.path.basename(datafile)} ======")
    # Determine instruments to use
    if args.instruments:
        # Use CLI-specified instruments
        instruments = [inst.strip() for inst in args.instruments.split(",")]
        print(f"Using CLI-specified instruments: {instruments}")
    else:
        # Auto-detect instruments from database
        instruments = get_available_instruments_from_db(datafile, config)
        print(f"Auto-detected instruments: {instruments}")
    if not instruments:
        print(f"No instruments found in {datafile}...")
        return
    # Process data for this file
    try:
        cointegration_data: pd.DataFrame = pd.DataFrame()
        for pair in create_pairs(datafile, stat_model_price, config, instruments):
            cointegration_data = pd.concat([cointegration_data, pair.cointegration_check()])
        pd.set_option('display.width', 400)
        pd.set_option('display.max_colwidth', None)
        pd.set_option('display.max_columns', None)
        with pd.option_context('display.max_rows', None, 'display.max_columns', None):
            print(f"cointegration_data:\n{cointegration_data}")
    except Exception as err:
        print(f"Error processing {datafile}: {str(err)}")
        import traceback
        traceback.print_exc()
 if __name__ == "__main__":
    main()
--- a/research/notebooks/pair_trading_test.ipynb
+++ b/research/notebooks/pair_trading_test.ipynb
--- a/research/notebooks/pt_pair_backtest.ipynb
+++ b/research/notebooks/pt_pair_backtest.ipynb
@ -0,0 +1,16 @@
 {
 "cells": [],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.12.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/research/notebooks/pt_sliding.ipynb
+++ b/research/notebooks/pt_sliding.ipynb
--- a/research/notebooks/single_pair_test.ipynb
+++ b/research/notebooks/single_pair_test.ipynb
--- a/research/notebooks/single_pair_test_lg.ipynb
+++ b/research/notebooks/single_pair_test_lg.ipynb
--- a/research/pt_backtest.py
+++ b/research/pt_backtest.py
@ -0,0 +1,232 @@
 import argparse
 import glob
 import importlib
 import os
 from datetime import date, datetime
 from typing import Any, Dict, List, Optional, Tuple
 import pandas as pd
 from research.research_tools import create_pairs
 from tools.config import expand_filename, load_config
 from pt_trading.results import (
    BacktestResult,
    create_result_database,
    store_config_in_database,
 )
 from pt_trading.fit_method import PairsTradingFitMethod
 from pt_trading.trading_pair import TradingPair
 DayT = str
 DataFileNameT = str
 def resolve_datafiles(
    config: Dict, date_pattern: str, instruments: List[Dict[str, str]]
 ) -> List[Tuple[DayT, DataFileNameT]]:
    resolved_files: List[Tuple[DayT, DataFileNameT]] = []
    for inst in instruments:
        pattern = date_pattern
        inst_type = inst["instrument_type"]
        data_dir = config["market_data_loading"][inst_type]["data_directory"]
        if "*" in pattern or "?" in pattern:
            # Handle wildcards
            if not os.path.isabs(pattern):
                pattern = os.path.join(data_dir, f"{pattern}.mktdata.ohlcv.db")
            matched_files = glob.glob(pattern)
            for matched_file in matched_files:
                import re
                match = re.search(r"(\d{8})\.mktdata\.ohlcv\.db$", matched_file)
                assert match is not None
                day = match.group(1)
                resolved_files.append((day, matched_file))
        else:
            # Handle explicit file path
            if not os.path.isabs(pattern):
                pattern = os.path.join(data_dir, f"{pattern}.mktdata.ohlcv.db")
            resolved_files.append((date_pattern, pattern))
    return sorted(list(set(resolved_files)))  # Remove duplicates and sort
 def get_instruments(args: argparse.Namespace, config: Dict) -> List[Dict[str, str]]:
    instruments = [
        {
            "symbol": inst.split(":")[0],
            "instrument_type": inst.split(":")[1],
            "exchange_id": inst.split(":")[2],
            "instrument_id_pfx": config["market_data_loading"][inst.split(":")[1]][
                "instrument_id_pfx"
            ],
            "db_table_name": config["market_data_loading"][inst.split(":")[1]][
                "db_table_name"
            ],
        }
        for inst in args.instruments.split(",")
    ]
    return instruments
 def run_backtest(
    config: Dict,
    datafiles: List[str],
    fit_method: PairsTradingFitMethod,
    instruments: List[Dict[str, str]],
 ) -> BacktestResult:
    """
    Run backtest for all pairs using the specified instruments.
    """
    bt_result: BacktestResult = BacktestResult(config=config)
    # if len(datafiles) < 2:
    #     print(f"WARNING: insufficient data files: {datafiles}")
    #     return bt_result
    if not all([os.path.exists(datafile) for datafile in datafiles]):
        print(f"WARNING: data file {datafiles} does not exist")
        return bt_result
    pairs_trades = []
    pairs = create_pairs(
        datafiles=datafiles,
        fit_method=fit_method,
        config=config,
        instruments=instruments,
    )
    for pair in pairs:
        single_pair_trades = fit_method.run_pair(pair=pair, bt_result=bt_result)
        if single_pair_trades is not None and len(single_pair_trades) > 0:
            pairs_trades.append(single_pair_trades)
    print(f"pairs_trades:\n{pairs_trades}")
    # Check if result_list has any data before concatenating
    if len(pairs_trades) == 0:
        print("No trading signals found for any pairs")
        return bt_result
    bt_result.collect_single_day_results(pairs_trades)
    return bt_result
 def main() -> None:
    parser = argparse.ArgumentParser(description="Run pairs trading backtest.")
    parser.add_argument(
        "--config", type=str, required=True, help="Path to the configuration file."
    )
    parser.add_argument(
        "--date_pattern",
        type=str,
        required=True,
        help="Date YYYYMMDD, allows * and ? wildcards",
    )
    parser.add_argument(
        "--instruments",
        type=str,
        required=True,
        help="Comma-separated list of instrument symbols (e.g., COIN:EQUITY,GBTC:CRYPTO)",
    )
    parser.add_argument(
        "--result_db",
        type=str,
        required=True,
        help="Path to SQLite database for storing results. Use 'NONE' to disable database output.",
    )
    args = parser.parse_args()
    config: Dict = load_config(args.config)
    # Dynamically instantiate fit method class
    fit_method = PairsTradingFitMethod.create(config)
    # Resolve data files (CLI takes priority over config)
    instruments = get_instruments(args, config)
    datafiles = resolve_datafiles(config, args.date_pattern, instruments)
    days = list(set([day for day, _ in datafiles]))
    print(f"Found {len(datafiles)} data files to process:")
    for df in datafiles:
        print(f"  - {df}")
    # Create result database if needed
    if args.result_db.upper() != "NONE":
        args.result_db = expand_filename(args.result_db)
        create_result_database(args.result_db)
    # Initialize a dictionary to store all trade results
    all_results: Dict[str, Dict[str, Any]] = {}
    is_config_stored = False
    # Process each data file
    for day in sorted(days):
        md_datafiles = [datafile for md_day, datafile in datafiles if md_day == day]
        if not all([os.path.exists(datafile) for datafile in md_datafiles]):
            print(f"WARNING: insufficient data files: {md_datafiles}")
            continue
        print(f"\n====== Processing {day} ======")
        if not is_config_stored:    
            store_config_in_database(
                db_path=args.result_db,
                config_file_path=args.config,
                config=config,
                fit_method_class=config["fit_method_class"],
                datafiles=datafiles,
                instruments=instruments,
            )
            is_config_stored = True
        # Process data for this file
        try:
            fit_method.reset()
            bt_results = run_backtest(
                config=config,
                datafiles=md_datafiles,
                fit_method=fit_method,
                instruments=instruments,
            )
            if bt_results.trades is None or len(bt_results.trades) == 0:
                print(f"No trades found for {day}")
                continue
            # Store results with day name as key
            filename = os.path.basename(day)
            all_results[filename] = {
                "trades": bt_results.trades.copy(),
                "outstanding_positions": bt_results.outstanding_positions.copy(),
            }
            # Store results in database
            if args.result_db.upper() != "NONE":
                bt_results.calculate_returns(
                    {
                        filename: {
                            "trades": bt_results.trades.copy(),
                            "outstanding_positions": bt_results.outstanding_positions.copy(),
                        }
                    }
                )
                bt_results.store_results_in_database(db_path=args.result_db, day=day)
            print(f"Successfully processed {filename}")
        except Exception as err:
            print(f"Error processing {day}: {str(err)}")
            import traceback
            traceback.print_exc()
    # Calculate and print results using a new BacktestResult instance for aggregation
    if all_results:
        aggregate_bt_results = BacktestResult(config=config)
        aggregate_bt_results.calculate_returns(all_results)
        aggregate_bt_results.print_grand_totals()
        aggregate_bt_results.print_outstanding_positions()
        if args.result_db.upper() != "NONE":
            print(f"\nResults stored in database: {args.result_db}")
    else:
        print("No results to display.")
 if __name__ == "__main__":
    main()
--- a/research/research_tools.py
+++ b/research/research_tools.py
@ -0,0 +1,93 @@
 import glob
 import os
 from typing import Dict, List, Optional
 import pandas as pd
 from pt_trading.fit_method import PairsTradingFitMethod
 def resolve_datafiles(config: Dict, cli_datafiles: Optional[str] = None) -> List[str]:
    """
    Resolve the list of data files to process.
    CLI datafiles take priority over config datafiles.
    Supports wildcards in config but not in CLI.
    """
    if cli_datafiles:
        # CLI override - comma-separated list, no wildcards
        datafiles = [f.strip() for f in cli_datafiles.split(",")]
        # Make paths absolute relative to data directory
        data_dir = config.get("data_directory", "./data")
        resolved_files = []
        for df in datafiles:
            if not os.path.isabs(df):
                df = os.path.join(data_dir, df)
            resolved_files.append(df)
        return resolved_files
    # Use config datafiles with wildcard support
    config_datafiles = config.get("datafiles", [])
    data_dir = config.get("data_directory", "./data")
    resolved_files = []
    for pattern in config_datafiles:
        if "*" in pattern or "?" in pattern:
            # Handle wildcards
            if not os.path.isabs(pattern):
                pattern = os.path.join(data_dir, pattern)
            matched_files = glob.glob(pattern)
            resolved_files.extend(matched_files)
        else:
            # Handle explicit file path
            if not os.path.isabs(pattern):
                pattern = os.path.join(data_dir, pattern)
            resolved_files.append(pattern)
    return sorted(list(set(resolved_files)))  # Remove duplicates and sort
 def create_pairs(
    datafiles: List[str],
    fit_method: PairsTradingFitMethod,
    config: Dict,
    instruments: List[Dict[str, str]],
 ) -> List:
    from pt_trading.trading_pair import TradingPair
    from tools.data_loader import load_market_data
    all_indexes = range(len(instruments))
    unique_index_pairs = [(i, j) for i in all_indexes for j in all_indexes if i < j]
    pairs = []
    # Update config to use the specified instruments
    config_copy = config.copy()
    config_copy["instruments"] = instruments
    market_data_df = pd.DataFrame()
    extra_minutes = 0
    if "execution_price" in config_copy:
        extra_minutes = config_copy["execution_price"]["shift"]
    for datafile in datafiles:
        md_df = load_market_data(
            datafile = datafile,
            instruments = instruments,
            db_table_name = config_copy["market_data_loading"][instruments[0]["instrument_type"]]["db_table_name"],
            trading_hours=config_copy["trading_hours"],
            extra_minutes=extra_minutes,
        )
        market_data_df = pd.concat([market_data_df, md_df])
    if len(set(market_data_df["symbol"])) != 2: # both symbols must be present for a pair
        print(f"WARNING: insufficient data in files: {datafiles}")
        return []
    for a_index, b_index in unique_index_pairs:
        symbol_a=instruments[a_index]["symbol"]
        symbol_b=instruments[b_index]["symbol"]
        pair = fit_method.create_trading_pair(
            config=config_copy,
            market_data=market_data_df,
            symbol_a=symbol_a,
            symbol_b=symbol_b,
        )
        pairs.append(pair)
    return pairs
--- a/strategy/pair_strategy.py
+++ b/strategy/pair_strategy.py
@ -0,0 +1,221 @@
 import argparse
 import asyncio
 import glob
 import importlib
 import os
 from datetime import date, datetime
 from typing import Any, Dict, List, Optional
 import hjson
 import pandas as pd
 from tools.data_loader import get_available_instruments_from_db, load_market_data
 from pt_trading.results import (
    BacktestResult,
    create_result_database,
    store_config_in_database,
    store_results_in_database,
 )
 from pt_trading.fit_methods import PairsTradingFitMethod
 from pt_trading.trading_pair import TradingPair
 def run_strategy(
    config: Dict,
    datafile: str,
    fit_method: PairsTradingFitMethod,
    instruments: List[str],
 ) -> BacktestResult:
    """
    Run backtest for all pairs using the specified instruments.
    """
    bt_result: BacktestResult = BacktestResult(config=config)
    def _create_pairs(config: Dict, instruments: List[str]) -> List[TradingPair]:
        nonlocal datafile
        all_indexes = range(len(instruments))
        unique_index_pairs = [(i, j) for i in all_indexes for j in all_indexes if i < j]
        pairs = []
        # Update config to use the specified instruments
        config_copy = config.copy()
        config_copy["instruments"] = instruments
        market_data_df = load_market_data(
            datafile=datafile,
            exchange_id=config_copy["exchange_id"],
            instruments=config_copy["instruments"],
            instrument_id_pfx=config_copy["instrument_id_pfx"],
            db_table_name=config_copy["db_table_name"],
            trading_hours=config_copy["trading_hours"],
        )
        for a_index, b_index in unique_index_pairs:
            pair = fit_method.create_trading_pair(
                market_data=market_data_df,
                symbol_a=instruments[a_index],
                symbol_b=instruments[b_index],
            )
            pairs.append(pair)
        return pairs
    pairs_trades = []
    for pair in _create_pairs(config, instruments):
        single_pair_trades = fit_method.run_pair(
            pair=pair, config=config, bt_result=bt_result
        )
        if single_pair_trades is not None and len(single_pair_trades) > 0:
            pairs_trades.append(single_pair_trades)
    # Check if result_list has any data before concatenating
    if len(pairs_trades) == 0:
        print("No trading signals found for any pairs")
        return bt_result
    result = pd.concat(pairs_trades, ignore_index=True)
    result["time"] = pd.to_datetime(result["time"])
    result = result.set_index("time").sort_index()
    bt_result.collect_single_day_results(result)
    return bt_result
 def main() -> None:
    parser = argparse.ArgumentParser(description="Run pairs trading backtest.")
    parser.add_argument(
        "--config", type=str, required=True, help="Path to the configuration file."
    )
    parser.add_argument(
        "--datafiles",
        type=str,
        required=False,
        help="Comma-separated list of data files (overrides config). No wildcards supported.",
    )
    parser.add_argument(
        "--instruments",
        type=str,
        required=False,
        help="Comma-separated list of instrument symbols (e.g., COIN,GBTC). If not provided, auto-detects from database.",
    )
    parser.add_argument(
        "--result_db",
        type=str,
        required=True,
        help="Path to SQLite database for storing results. Use 'NONE' to disable database output.",
    )
    args = parser.parse_args()
    config: Dict = load_config(args.config)
    # Dynamically instantiate fit method class
    fit_method_class_name = config.get("fit_method_class", None)
    assert fit_method_class_name is not None
    module_name, class_name = fit_method_class_name.rsplit(".", 1)
    module = importlib.import_module(module_name)
    fit_method = getattr(module, class_name)()
    # Resolve data files (CLI takes priority over config)
    datafiles = resolve_datafiles(config, args.datafiles)
    if not datafiles:
        print("No data files found to process.")
        return
    print(f"Found {len(datafiles)} data files to process:")
    for df in datafiles:
        print(f"  - {df}")
    # Create result database if needed
    if args.result_db.upper() != "NONE":
        create_result_database(args.result_db)
    # Initialize a dictionary to store all trade results
    all_results: Dict[str, Dict[str, Any]] = {}
    # Store configuration in database for reference
    if args.result_db.upper() != "NONE":
        # Get list of all instruments for storage
        all_instruments = []
        for datafile in datafiles:
            if args.instruments:
                file_instruments = [
                    inst.strip() for inst in args.instruments.split(",")
                ]
            else:
                file_instruments = get_available_instruments_from_db(datafile, config)
            all_instruments.extend(file_instruments)
        # Remove duplicates while preserving order
        unique_instruments = list(dict.fromkeys(all_instruments))
        store_config_in_database(
            db_path=args.result_db,
            config_file_path=args.config,
            config=config,
            fit_method_class=fit_method_class_name,
            datafiles=datafiles,
            instruments=unique_instruments,
        )
    # Process each data file
    for datafile in datafiles:
        print(f"\n====== Processing {os.path.basename(datafile)} ======")
        # Determine instruments to use
        if args.instruments:
            # Use CLI-specified instruments
            instruments = [inst.strip() for inst in args.instruments.split(",")]
            print(f"Using CLI-specified instruments: {instruments}")
        else:
            # Auto-detect instruments from database
            instruments = get_available_instruments_from_db(datafile, config)
            print(f"Auto-detected instruments: {instruments}")
        if not instruments:
            print(f"No instruments found for {datafile}, skipping...")
            continue
        # Process data for this file
        try:
            fit_method.reset()
            bt_results = run_strategy(
                config=config,
                datafile=datafile,
                fit_method=fit_method,
                instruments=instruments,
            )
            # Store results with file name as key
            filename = os.path.basename(datafile)
            all_results[filename] = {"trades": bt_results.trades.copy()}
            # Store results in database
            if args.result_db.upper() != "NONE":
                store_results_in_database(args.result_db, datafile, bt_results)
            print(f"Successfully processed {filename}")
        except Exception as err:
            print(f"Error processing {datafile}: {str(err)}")
            import traceback
            traceback.print_exc()
    # Calculate and print results using a new BacktestResult instance for aggregation
    if all_results:
        aggregate_bt_results = BacktestResult(config=config)
        aggregate_bt_results.calculate_returns(all_results)
        aggregate_bt_results.print_grand_totals()
        aggregate_bt_results.print_outstanding_positions()
        if args.result_db.upper() != "NONE":
            print(f"\nResults stored in database: {args.result_db}")
    else:
        print("No results to display.")
 if __name__ == "__main__":
    asyncio.run(main())
Author	SHA1	Message	Date
Leonid Gloubchnin	809f46fe36	to discard	2025-11-04 18:02:38 +00:00
Leonid Gloubchnin	413abafe0f	My First Commit	2025-11-04 17:55:08 +00:00
Oleg Sheynin	5d46c1e32c	.	2025-10-27 18:46:26 -04:00
Oleg Sheynin	889f7ba1c3	.	2025-10-27 18:46:14 -04:00
Oleg Sheynin	1515b2d077	.	2025-10-27 18:39:51 -04:00
Oleg Sheynin	b4ae3e715d	.	2025-10-27 18:36:26 -04:00
Cryptoval Trading Technologies	6f845d32c6	.	2025-07-25 22:13:49 +00:00
Cryptoval Trading Technologies	a04e8878fb	lg_changes	2025-07-25 22:11:49 +00:00
		`@ -1 +0,0 @@`
			`source /home/oleg/.pyenv/python3.12-venv/bin/activate`