pairs_trading/src/pt_backtest.py

import datetime
import sys
import json

from typing import Any, Dict, List, Tuple, Optional

import pandas as pd
import numpy as np

# ============= statsmodels ===================
from statsmodels.tsa.vector_ar.vecm import VECM

NanoPerMin = 1e9
UNSET_FLOAT: float = sys.float_info.max
UNSET_INT: int = sys.maxsize

# ------------------------ Configuration ------------------------
# Default configuration
CRYPTO_CONFIG: Dict = {
    # --- Data retrieval
    "data_directory": "./data/crypto",
    "datafiles": [
        "20250519.mktdata.ohlcv.db",
    ],
    "db_table_name": "bnbspot_ohlcv_1min",

    # ----- Instruments
    "exchange_id": "BNBSPOT",
    "instrument_id_pfx": "PAIR-",

    "instruments": [
        "BTC-USDT",
        "ETH-USDT",
        "LTC-USDT",
    ],

    "trading_hours": {
        "begin_session": "00:00:00",
        "end_session": "23:59:00",
        "timezone": "UTC"
    },

    # ----- Model Settings
    "price_column": "close",
    "min_required_points": 30,
    "zero_threshold": 1e-10,
    "equilibrium_threshold_open": 5.0,
    "equilibrium_threshold_close": 1.0,
    "training_minutes": 120,

    # ----- Validation
    "funding_per_pair": 2000.0,  # USD
}
# ========================== EQUITIES
EQT_CONFIG: Dict = {
    # --- Data retrieval
    "data_directory": "./data/equity",
    "datafiles": [
        "20250508.alpaca_sim_md.db",
        # "20250509.alpaca_sim_md.db",
        # "20250512.alpaca_sim_md.db",
        # "20250513.alpaca_sim_md.db",
        # "20250514.alpaca_sim_md.db",
        # "20250515.alpaca_sim_md.db",
        # "20250516.alpaca_sim_md.db",
        # "20250519.alpaca_sim_md.db",
        # "20250520.alpaca_sim_md.db"
    ],
    "db_table_name": "md_1min_bars",

     # ----- Instruments
   "exchange_id": "ALPACA",
    "instrument_id_pfx": "STOCK-",

    "instruments": [
        "COIN",
        "GBTC",
        "HOOD",
        "MSTR",
        "PYPL",
    ],

    "trading_hours": {
        "begin_session": "9:30:00",
        "end_session": "16:00:00",
        "timezone": "America/New_York"
    },

    # ----- Model Settings
    "price_column": "close",
    "min_required_points": 30,
    "zero_threshold": 1e-10,
    "equilibrium_threshold_open": 5.0,
    "equilibrium_threshold_close": 1.0,
    "training_minutes": 120,

    # ----- Validation
    "funding_per_pair": 2000.0,
}

# ==========================================================================
CONFIG = EQT_CONFIG
TRADES = {}
TOTAL_UNREALIZED_PNL = 0.0  # Global variable to track total unrealized PnL
TOTAL_REALIZED_PNL = 0.0    # Global variable to track total realized PnL
OUTSTANDING_POSITIONS = []  # Global list to track outstanding positions with share quantities

class TradingPair:
    symbol_a_: str
    symbol_b_: str
    price_column_: str

    def __init__(self, symbol_a: str, symbol_b: str, price_column: str):
        self.symbol_a_ = symbol_a
        self.symbol_b_ = symbol_b
        self.price_column_ = price_column

    def colnames(self) -> List[str]:
        return [f"{self.price_column_}_{self.symbol_a_}", f"{self.price_column_}_{self.symbol_b_}"]

    def __repr__(self) ->str:
        return f"{self.symbol_a_} & {self.symbol_b_}"

def convert_time_to_UTC(value: str, timezone: str):

    from zoneinfo import ZoneInfo
    from datetime import datetime

    # Parse it to naive datetime object
    local_dt = datetime.strptime(value, '%Y-%m-%d %H:%M:%S')

    zinfo = ZoneInfo(timezone)
    result = local_dt.replace(tzinfo=zinfo)

    result = result.astimezone(ZoneInfo('UTC'))
    result = result.strftime('%Y-%m-%d %H:%M:%S')

    return result


    pass

def load_market_data(datafile: str, config: Dict) -> pd.DataFrame:
    from tools.data_loader import load_sqlite_to_dataframe

    instrument_ids = ["\"" + config["instrument_id_pfx"] + instrument + "\"" for instrument in config["instruments"]]
    exchange_id = config["exchange_id"]

    query = "select tstamp"
    query += ", tstamp_ns as time_ns"
    query += ", substr(instrument_id, 7) as symbol"
    query += ", open"
    query += ", high"
    query += ", low"
    query += ", close"
    query += ", volume"
    query += ", num_trades"
    query += ", vwap"

    query += f" from {config['db_table_name']}"
    query += f" where exchange_id ='{exchange_id}'"
    query += f" and instrument_id in ({','.join(instrument_ids)})"

    df = load_sqlite_to_dataframe(db_path=datafile, query=query)

    # Trading Hours
    date_str = df["tstamp"][0][0:10]
    trading_hours = CONFIG['trading_hours']
    start_time = f"{date_str} {trading_hours['begin_session']}"
    end_time = f"{date_str} {trading_hours['end_session']}"

    start_time = convert_time_to_UTC(start_time, trading_hours["timezone"])
    end_time = convert_time_to_UTC(end_time, trading_hours["timezone"])

    # Perform boolean selection
    df = df[(df["tstamp"] >= start_time) & (df["tstamp"] <= end_time)]
    df["tstamp"] = pd.to_datetime(df["tstamp"])

    return df

def transform_dataframe(df: pd.DataFrame, price_column: str):
    # Select only the columns we need
    df_selected = df[["tstamp", "symbol", price_column]]

    # Start with unique timestamps
    result_df: pd.DataFrame = pd.DataFrame(df_selected["tstamp"]).drop_duplicates().reset_index(drop=True)

    # For each unique symbol, add a corresponding close price column
    for symbol in df_selected["symbol"].unique():
        # Filter rows for this symbol
        df_symbol = df_selected[df_selected["symbol"] == symbol].reset_index(drop=True)

        # Create column name like "close-COIN"
        new_price_column = f"{price_column}_{symbol}"

        # Create temporary dataframe with timestamp and price
        temp_df = pd.DataFrame({
            "tstamp": df_symbol["tstamp"],
            new_price_column: df_symbol[price_column]
        })

        # Join with our result dataframe
        result_df = pd.merge(result_df, temp_df, on="tstamp", how="left")
        result_df = result_df.reset_index(drop=True) # do not dropna() since irrelevant symbol would affect dataset

    return result_df

def get_datasets(df: pd.DataFrame, training_minutes: int, pair: TradingPair) -> Tuple[pd.DataFrame, pd.DataFrame]:
    # Training dataset
    colname_a, colname_b = pair.colnames()
    df = df[["tstamp", colname_a, colname_b]]
    df = df.dropna()

    training_df = df.iloc[:training_minutes - 1, :].copy()
    training_df.reset_index(drop=True).dropna().reset_index(drop=True)

    # Testing dataset
    testing_df = df.iloc[training_minutes:, :].copy()
    testing_df.reset_index(drop=True).dropna().reset_index(drop=True)

    return (training_df, testing_df)

def fit_VECM(training_pair_df, pair: TradingPair):
    vecm_model = VECM(training_pair_df[pair.colnames()].reset_index(drop=True), coint_rank=1)
    vecm_fit = vecm_model.fit()

    # Check if the model converged properly
    if not hasattr(vecm_fit, "beta") or vecm_fit.beta is None:
        print(f"{pair}: VECM model failed to converge properly")

    return vecm_fit

def create_trading_signals(vecm_fit, testing_pair_df, pair: TradingPair) -> pd.DataFrame:
    result_columns = [
        "time",
        "action",
        "symbol",
        "price",
        "equilibrium",
        "pair",
    ]

    next_values = vecm_fit.predict(steps=len(testing_pair_df))
    colname_a, colname_b = pair.colnames()

    # Convert prediction to a DataFrame for readability
    predicted_df = pd.DataFrame(next_values, columns=[colname_a, colname_b])

    beta = vecm_fit.beta

    predicted_df["equilibrium_term"] = (
        beta[0] * predicted_df[colname_a]
        + beta[1] * predicted_df[colname_b]
    )

    pair_result_df = pd.merge(
        testing_pair_df.reset_index(drop=True), predicted_df, left_index=True, right_index=True, suffixes=('', '_pred')
    ).dropna()

    pair_result_df["equilibrium"] = (
        beta[0] * pair_result_df[colname_a]
        + beta[1] * pair_result_df[colname_b]
    )

    pair_result_df["abs_equilibrium"] = np.abs(pair_result_df["equilibrium"])
    pair_result_df["scaled_equilibrium"] = pair_result_df["abs_equilibrium"] / (abs(beta[0]) * pair_result_df[colname_a] + abs(beta[1]) * pair_result_df[colname_b])

    # Reset index to ensure proper indexing
    pair_result_df = pair_result_df.reset_index()

    # Iterate through the testing dataset to find the first trading opportunity
    open_row_index = None
    initial_abs_term = None

    for row_idx in range(len(pair_result_df)):
        current_abs_term = pair_result_df["abs_equilibrium"][row_idx]

        # Check if current row has sufficient equilibrium (not near-zero)
        if current_abs_term >= CONFIG["equilibrium_threshold_open"]:
            open_row_index = row_idx
            initial_abs_term = current_abs_term
            break

    # If no row with sufficient equilibrium found, skip this pair
    if open_row_index is None:
        print(f"{pair}: Insufficient divergence in testing dataset. Skipping.")
        return pd.DataFrame()

    # Look for close signal starting from the open position
    trading_signals_df = (
        pair_result_df["abs_equilibrium"][open_row_index:]
        # < initial_abs_term / CONFIG["equilibrium_threshold_close"]
        < CONFIG["equilibrium_threshold_close"]
    )

    # Adjust indices to account for the offset from open_row_index
    close_row_index = None
    for idx, value in trading_signals_df.items():
        if value:
            close_row_index = idx
            break

    open_row = pair_result_df.loc[open_row_index]
    open_tstamp = open_row["tstamp"]
    open_eqlbrm = open_row["equilibrium"]
    open_px_a = open_row[f"{colname_a}"]
    open_px_b = open_row[f"{colname_b}"]

    abs_beta = abs(beta[1])
    pred_px_b = pair_result_df.loc[open_row_index][f"{colname_b}_pred"]
    pred_px_a = pair_result_df.loc[open_row_index][f"{colname_a}_pred"]

    if pred_px_b * abs_beta - pred_px_a > 0:
        open_side_a = "BUY"
        open_side_b = "SELL"
        close_side_a = "SELL"
        close_side_b = "BUY"
    else:
        open_side_b = "BUY"
        open_side_a = "SELL"
        close_side_b = "SELL"
        close_side_a = "BUY"

    # If no close signal found, print position and unrealized PnL
    if close_row_index is None:
        global TOTAL_UNREALIZED_PNL, OUTSTANDING_POSITIONS

        last_row_index = len(pair_result_df) - 1
        last_row = pair_result_df.loc[last_row_index]
        last_tstamp = last_row["tstamp"]
        last_px_a = last_row[f"{colname_a}"]
        last_px_b = last_row[f"{colname_b}"]

        # Calculate share quantities based on $1000 funding per pair
        # Split $1000 equally between the two positions ($500 each)
        funding_per_position = CONFIG["funding_per_pair"] / 2
        shares_a = funding_per_position / open_px_a
        shares_b = funding_per_position / open_px_b

        # Calculate unrealized PnL for each position
        if open_side_a == "BUY":
            unrealized_pnl_a = (last_px_a - open_px_a) / open_px_a * 100
            unrealized_dollar_a = shares_a * (last_px_a - open_px_a)
        else:  # SELL
            unrealized_pnl_a = (open_px_a - last_px_a) / open_px_a * 100
            unrealized_dollar_a = shares_a * (open_px_a - last_px_a)

        if open_side_b == "BUY":
            unrealized_pnl_b = (last_px_b - open_px_b) / open_px_b * 100
            unrealized_dollar_b = shares_b * (last_px_b - open_px_b)
        else:  # SELL
            unrealized_pnl_b = (open_px_b - last_px_b) / open_px_b * 100
            unrealized_dollar_b = shares_b * (open_px_b - last_px_b)

        total_unrealized_pnl = unrealized_pnl_a + unrealized_pnl_b
        total_unrealized_dollar = unrealized_dollar_a + unrealized_dollar_b

        # Add to global total
        TOTAL_UNREALIZED_PNL += total_unrealized_pnl

        # Store outstanding positions
        OUTSTANDING_POSITIONS.append({
            'pair': str(pair),
            'symbol_a': pair.symbol_a_,
            'symbol_b': pair.symbol_b_,
            'side_a': open_side_a,
            'side_b': open_side_b,
            'shares_a': shares_a,
            'shares_b': shares_b,
            'open_px_a': open_px_a,
            'open_px_b': open_px_b,
            'current_px_a': last_px_a,
            'current_px_b': last_px_b,
            'unrealized_dollar_a': unrealized_dollar_a,
            'unrealized_dollar_b': unrealized_dollar_b,
            'total_unrealized_dollar': total_unrealized_dollar,
            'open_time': open_tstamp,
            'last_time': last_tstamp,
            'initial_abs_term': initial_abs_term,
            'current_abs_term': pair_result_df.loc[last_row_index, "abs_equilibrium"],
            'closing_threshold': initial_abs_term / CONFIG["equilibrium_threshold_close"],
            'equilibrium_ratio': pair_result_df.loc[last_row_index, "abs_equilibrium"] / (initial_abs_term / CONFIG["equilibrium_threshold_close"])
        })

        print(f"{pair}: NO CLOSE SIGNAL FOUND - Position held until end of session")
        print(f"  Open: {open_tstamp} | Last: {last_tstamp}")
        print(f"  {pair.symbol_a_}: {open_side_a} {shares_a:.2f} shares @ ${open_px_a:.2f} -> ${last_px_a:.2f} | Unrealized: ${unrealized_dollar_a:.2f} ({unrealized_pnl_a:.2f}%)")
        print(f"  {pair.symbol_b_}: {open_side_b} {shares_b:.2f} shares @ ${open_px_b:.2f} -> ${last_px_b:.2f} | Unrealized: ${unrealized_dollar_b:.2f} ({unrealized_pnl_b:.2f}%)")
        print(f"  Total Unrealized: ${total_unrealized_dollar:.2f} ({total_unrealized_pnl:.2f}%)")

        # Return only open trades (no close trades)
        trd_signal_tuples = [
            (
                open_tstamp,
                open_side_a,
                pair.symbol_a_,
                open_px_a,
                open_eqlbrm,
                pair,
            ),
            (
                open_tstamp,
                open_side_b,
                pair.symbol_b_,
                open_px_b,
                open_eqlbrm,
                pair,
            ),
        ]
    else:
        # Close signal found - create complete trade
        close_row = pair_result_df.loc[close_row_index]
        close_tstamp = close_row["tstamp"]
        close_eqlbrm = close_row["equilibrium"]
        close_px_a = close_row[f"{colname_a}"]
        close_px_b = close_row[f"{colname_b}"]

        print(f"{pair}: Close signal found at index {close_row_index}")

        trd_signal_tuples = [
            (
                open_tstamp,
                open_side_a,
                pair.symbol_a_,
                open_px_a,
                open_eqlbrm,
                pair,
            ),
            (
                open_tstamp,
                open_side_b,
                pair.symbol_b_,
                open_px_b,
                open_eqlbrm,
                pair,
            ),
            (
                close_tstamp,
                close_side_a,
                pair.symbol_a_,
                close_px_a,
                close_eqlbrm,
                pair,
            ),
            (
                close_tstamp,
                close_side_b,
                pair.symbol_b_,
                close_px_b,
                close_eqlbrm,
                pair,
            ),
        ]

    # Add tuples to data frame
    return pd.DataFrame(
        trd_signal_tuples,
        columns=result_columns,
    )

def run_single_pair(market_data: pd.DataFrame, price_column:str, pair: TradingPair) -> Optional[pd.DataFrame]:
    colname_a = f"{price_column}_{pair.symbol_a_}"
    colname_b = f"{price_column}_{pair.symbol_b_}"
    training_pair_df, testing_pair_df = get_datasets(df=market_data, training_minutes=CONFIG["training_minutes"], pair=pair)

    # Check if we have enough data points for a meaningful analysis
    min_required_points = CONFIG[
        "min_required_points"
    ]  # Minimum number of points for a reasonable VECM model
    if len(training_pair_df) < min_required_points:
        print(
            f"{pair}: Not enough data points for analysis. Found {len(training_pair_df)}, need at least {min_required_points}"
        )
        return None

    # Check for non-finite values
    if not np.isfinite(training_pair_df).all().all():
        print(f"{pair}: Data contains non-finite values (NaN or inf)")
        return None

    # Fit the VECM
    try:
        vecm_fit = fit_VECM(training_pair_df, pair=pair)
    except Exception as e:
        print(f"{pair}: VECM fitting failed: {str(e)}")
        return None

    # Add safeguard against division by zero
    if (
        abs(vecm_fit.beta[1]) < CONFIG["zero_threshold"]
    ):  # Small threshold to avoid division by very small numbers
        print(f"{pair}: Skipping due to near-zero beta[1] value: {vecm_fit.beta[1]}")
        return None

    try:
        pair_trades = create_trading_signals(
                vecm_fit=vecm_fit,
                testing_pair_df=testing_pair_df,
                pair=pair,
        )
    except Exception as e:
        print(f"{pair}: Prediction failed: {str(e)}")
        return None

    return pair_trades

def add_trade(pair_nm, symbol, action, price):
    pair_nm = str(pair_nm)


    if pair_nm not in TRADES:
        TRADES[pair_nm] = {symbol: []}
    if symbol not in TRADES[pair_nm]:
        TRADES[pair_nm][symbol] = []
    TRADES[pair_nm][symbol].append((action, price))

def collect_single_day_results(result):
    if result is None:
        return

    print("\n --------------  Suggested Trades ")
    print(result)

    for row in result.itertuples():
        action = row.action
        symbol = row.symbol
        price = row.price
        add_trade(pair_nm=row.pair, action=action, symbol=symbol, price=price)

def print_single_day_results(result):
    for pair, symbols in TRADES.items():
        print(f"\n--- {pair} ---")
        for symbol, trades in symbols.items():
            for side, price in trades:
                print(f"{symbol} {side} at ${price}")

def print_results_suummary(all_results):
    # Summary of all processed files
    print("\n====== Summary of All Processed Files ======")
    for filename, data in all_results.items():
        trade_count = sum(
            len(trades)
            for symbol_trades in data["trades"].values()
            for trades in symbol_trades.values()
        )
        print(f"{filename}: {trade_count} trades")


def calculate_returns(all_results: Dict):
    global TOTAL_REALIZED_PNL
    print("\n====== Returns By Day and Pair ======")

    for filename, data in all_results.items():
        day_return = 0
        print(f"\n--- {filename} ---")

        # Process each pair
        for pair, symbols in data["trades"].items():
            pair_return = 0
            pair_trades = []

            # Calculate individual symbol returns in the pair
            for symbol, trades in symbols.items():
                if len(trades) >= 2:  # Need at least entry and exit
                    # Get entry and exit trades
                    entry_action, entry_price = trades[0]
                    exit_action, exit_price = trades[1]

                    # Calculate return based on action
                    symbol_return = 0
                    if entry_action == "BUY" and exit_action == "SELL":
                        # Long position
                        symbol_return = (exit_price - entry_price) / entry_price * 100
                    elif entry_action == "SELL" and exit_action == "BUY":
                        # Short position
                        symbol_return = (entry_price - exit_price) / entry_price * 100

                    pair_trades.append(
                        (
                            symbol,
                            entry_action,
                            entry_price,
                            exit_action,
                            exit_price,
                            symbol_return,
                        )
                    )
                    pair_return += symbol_return

            # Print pair returns
            if pair_trades:
                print(f"  {pair}:")
                for (
                    symbol,
                    entry_action,
                    entry_price,
                    exit_action,
                    exit_price,
                    symbol_return,
                ) in pair_trades:
                    print(
                        f"    {symbol}: {entry_action} @ ${entry_price:.2f}, {exit_action} @ ${exit_price:.2f}, Return: {symbol_return:.2f}%"
                    )
                print(f"    Pair Total Return: {pair_return:.2f}%")
                day_return += pair_return

        # Print day total return and add to global realized PnL
        if day_return != 0:
            print(f"  Day Total Return: {day_return:.2f}%")
            TOTAL_REALIZED_PNL += day_return

def run_pairs(summaries_df: pd.DataFrame, price_column: str) -> None:

    result_df = transform_dataframe(df=summaries_df, price_column=price_column)

    stock_price_columns = [
        column
        for column in result_df.columns
        if column.startswith(f"{price_column}_")
    ]

    # Find the starting indices for A and B
    all_indexes = range(len(stock_price_columns))
    unique_index_pairs = [(i, j) for i in all_indexes for j in all_indexes if i < j]

    pairs_trades = []
    for a_index, b_index in unique_index_pairs:
        # Get the actual variable names
        colname_a = stock_price_columns[a_index]
        colname_b = stock_price_columns[b_index]

        symbol_a = colname_a[len(f"{price_column}-") :]
        symbol_b = colname_b[len(f"{price_column}-") :]
        pair = TradingPair(symbol_a, symbol_b, price_column)

        single_pair_trades = run_single_pair(market_data=result_df, price_column=price_column, pair=pair)
        if len(single_pair_trades) > 0:
            pairs_trades.append(single_pair_trades)
    # Check if result_list has any data before concatenating
    if len(pairs_trades) == 0:
        print("No trading signals found for any pairs")
        return None

    result = pd.concat(pairs_trades, ignore_index=True)
    result["time"] = pd.to_datetime(result["time"])
    result = result.set_index("time").sort_index()

    collect_single_day_results(result)
    # print_single_day_results(result)

def print_outstanding_positions():
    """Print all outstanding positions with share quantities and unrealized PnL"""
    if not OUTSTANDING_POSITIONS:
        print("\n====== NO OUTSTANDING POSITIONS ======")
        return

    print(f"\n====== OUTSTANDING POSITIONS ======")
    print(f"{'Pair':<15} {'Symbol':<6} {'Side':<4} {'Shares':<10} {'Open $':<8} {'Current $':<10} {'Unrealized $':<12} {'%':<8} {'Close Eq':<10}")
    print("-" * 105)

    total_unrealized_dollar = 0.0

    for pos in OUTSTANDING_POSITIONS:
        # Print position A
        print(f"{pos['pair']:<15} {pos['symbol_a']:<6} {pos['side_a']:<4} {pos['shares_a']:<10.2f} {pos['open_px_a']:<8.2f} {pos['current_px_a']:<10.2f} {pos['unrealized_dollar_a']:<12.2f} {pos['unrealized_dollar_a']/500*100:<8.2f} {'':<10}")

        # Print position B
        print(f"{'':<15} {pos['symbol_b']:<6} {pos['side_b']:<4} {pos['shares_b']:<10.2f} {pos['open_px_b']:<8.2f} {pos['current_px_b']:<10.2f} {pos['unrealized_dollar_b']:<12.2f} {pos['unrealized_dollar_b']/500*100:<8.2f} {'':<10}")

        # Print pair totals with equilibrium info
        equilibrium_status = "CLOSE" if pos['current_abs_term'] < pos['closing_threshold'] else f"{pos['equilibrium_ratio']:.2f}x"
        print(f"{'':<15} {'PAIR':<6} {'TOT':<4} {'':<10} {'':<8} {'':<10} {pos['total_unrealized_dollar']:<12.2f} {pos['total_unrealized_dollar']/1000*100:<8.2f} {equilibrium_status:<10}")

        # Print equilibrium details
        print(f"{'':<15} {'EQ':<6} {'INFO':<4} {'':<10} {'':<8} {'':<10} {'Curr:':<6}{pos['current_abs_term']:<6.4f} {'Thresh:':<7}{pos['closing_threshold']:<6.4f} {'':<10}")
        print("-" * 105)

        total_unrealized_dollar += pos['total_unrealized_dollar']

    print(f"{'TOTAL OUTSTANDING':<80} ${total_unrealized_dollar:<12.2f}")

if __name__ == "__main__":
    # Initialize a dictionary to store all trade results
    all_results = {}

    # Initialize global PnL tracking variables
    TOTAL_REALIZED_PNL = 0.0
    TOTAL_UNREALIZED_PNL = 0.0
    OUTSTANDING_POSITIONS = []

    # Process each data file
    price_column = CONFIG["price_column"]
    for datafile in CONFIG["datafiles"]:
        print(f"\n====== Processing {datafile} ======")

        # Clear the TRADES global dictionary and reset unrealized PnL for the new file
        TRADES.clear()
        TOTAL_UNREALIZED_PNL = 0.0
        TOTAL_REALIZED_PNL = 0.0

        # Process data for this file
        try:
            run_pairs(
                summaries_df=load_market_data(f'{CONFIG["data_directory"]}/{datafile}', config=CONFIG),
                price_column=price_column
            )

            # Store results with file name as key
            filename = datafile.split("/")[-1]
            all_results[filename] = {"trades": TRADES.copy()}

            print(f"Successfully processed {filename}")

            # Print total unrealized PnL for this file
            if TOTAL_UNREALIZED_PNL != 0:
                print(f"\n====== TOTAL UNREALIZED PnL for {filename}: {TOTAL_UNREALIZED_PNL:.2f}% ======")
            else:
                print(f"\n====== No unrealized positions for {filename} ======")

        except Exception as e:
            print(f"Error processing {datafile}: {str(e)}")

    # print_results_suummary(all_results)
    calculate_returns(all_results)

    # Print grand totals
    print(f"\n====== GRAND TOTALS ACROSS ALL PAIRS ======")
    print(f"Total Realized PnL: {TOTAL_REALIZED_PNL:.2f}%")
    print(f"Total Unrealized PnL: {TOTAL_UNREALIZED_PNL:.2f}%")
    print(f"Combined Total PnL: {TOTAL_REALIZED_PNL + TOTAL_UNREALIZED_PNL:.2f}%")

    print_outstanding_positions()