pairs_trading/lib/pt_strategy/pt_market_data.py
Oleg Sheynin 7ab09669b4 progress
2025-08-02 01:49:09 +00:00

230 lines
9.3 KiB
Python

from __future__ import annotations
from typing import Any, Dict, List, Optional
import pandas as pd
from cvttpy_base.settings.cvtt_types import JsonDictT
from tools.data_loader import load_market_data
from pt_strategy.trading_pair import TradingPair
class PtMarketData():
config_: Dict[str, Any]
origin_mkt_data_df_: pd.DataFrame
market_data_df_: pd.DataFrame
def __init__(self, config: Dict[str, Any]):
self.config_ = config
self.origin_mkt_data_df_ = pd.DataFrame()
self.market_data_df_ = pd.DataFrame()
class ResearchMarketData(PtMarketData):
current_index_: int
is_execution_price_: bool
def __init__(self, config: Dict[str, Any]):
super().__init__(config)
self.current_index_ = 0
self.is_execution_price_ = "execution_price" in self.config_
if self.is_execution_price_:
self.execution_price_column_ = self.config_["execution_price"]["column"]
self.execution_price_shift_ = self.config_["execution_price"]["shift"]
else:
self.execution_price_column_ = None
self.execution_price_shift_ = 0
def has_next(self) -> bool:
return self.current_index_ < len(self.market_data_df_)
def get_next(self) -> pd.Series:
result = self.market_data_df_.iloc[self.current_index_]
self.current_index_ += 1
return result
def load(self) -> None:
datafiles: List[str] = self.config_.get("datafiles", [])
instruments: List[Dict[str, str]] = self.config_.get("instruments", [])
assert len(instruments) > 0, "No instruments found in config"
assert len(datafiles) > 0, "No datafiles found in config"
self.symbol_a_ = instruments[0]["symbol"]
self.symbol_b_ = instruments[1]["symbol"]
self.stat_model_price_ = self.config_["stat_model_price"]
extra_minutes: int
extra_minutes = self.execution_price_shift_
for datafile in datafiles:
md_df = load_market_data(
datafile=datafile,
instruments=instruments,
db_table_name=self.config_["market_data_loading"][instruments[0]["instrument_type"]]["db_table_name"],
trading_hours=self.config_["trading_hours"],
extra_minutes=extra_minutes,
)
self.origin_mkt_data_df_ = pd.concat([self.origin_mkt_data_df_, md_df])
self.origin_mkt_data_df_ = self.origin_mkt_data_df_.sort_values(by="tstamp")
self.origin_mkt_data_df_ = self.origin_mkt_data_df_.dropna().reset_index(drop=True)
self._set_market_data()
def _set_market_data(self, ) -> None:
if self.is_execution_price_:
self.market_data_df_ = pd.DataFrame(
self._transform_dataframe(self.origin_mkt_data_df_)[["tstamp"] + self.colnames() + self.orig_exec_prices_colnames()]
)
else:
self.market_data_df_ = pd.DataFrame(
self._transform_dataframe(self.origin_mkt_data_df_)[["tstamp"] + self.colnames()]
)
self.market_data_df_ = self.market_data_df_.dropna().reset_index(drop=True)
self.market_data_df_["tstamp"] = pd.to_datetime(self.market_data_df_["tstamp"])
self.market_data_df_ = self.market_data_df_.sort_values("tstamp")
self._set_execution_price_data()
def _transform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
df_selected: pd.DataFrame
if self.is_execution_price_:
execution_price_column = self.config_["execution_price"]["column"]
df_selected = pd.DataFrame(
df[["tstamp", "symbol", self.stat_model_price_, execution_price_column]]
)
else:
df_selected = pd.DataFrame(
df[["tstamp", "symbol", self.stat_model_price_]]
)
result_df = pd.DataFrame(df_selected["tstamp"]).drop_duplicates().reset_index(drop=True)
# For each unique symbol, add a corresponding stat_model_price column
symbols = df_selected["symbol"].unique()
for symbol in symbols:
# Filter rows for this symbol
df_symbol = df_selected[df_selected["symbol"] == symbol].reset_index(
drop=True
)
# Create column name like "close-COIN"
new_price_column = f"{self.stat_model_price_}_{symbol}"
if self.is_execution_price_:
new_execution_price_column = f"{self.execution_price_column_}_{symbol}"
# Create temporary dataframe with timestamp and price
temp_df = pd.DataFrame(
{
"tstamp": df_symbol["tstamp"],
new_price_column: df_symbol[self.stat_model_price_],
new_execution_price_column: df_symbol[execution_price_column],
}
)
else:
temp_df = pd.DataFrame(
{
"tstamp": df_symbol["tstamp"],
new_price_column: df_symbol[self.stat_model_price_],
}
)
# Join with our result dataframe
result_df = pd.merge(result_df, temp_df, on="tstamp", how="left")
result_df = result_df.reset_index(
drop=True
) # do not dropna() since irrelevant symbol would affect dataset
return result_df.dropna()
def _set_execution_price_data(self) -> None:
if "execution_price" not in self.config_:
self.market_data_df_[f"exec_price_{self.symbol_a_}"] = self.market_data_df_[f"{self.stat_model_price_}_{self.symbol_a_}"]
self.market_data_df_[f"exec_price_{self.symbol_b_}"] = self.market_data_df_[f"{self.stat_model_price_}_{self.symbol_b_}"]
return
execution_price_column = self.config_["execution_price"]["column"]
execution_price_shift = self.config_["execution_price"]["shift"]
self.market_data_df_[f"exec_price_{self.symbol_a_}"] = self.market_data_df_[f"{execution_price_column}_{self.symbol_a_}"].shift(-execution_price_shift)
self.market_data_df_[f"exec_price_{self.symbol_b_}"] = self.market_data_df_[f"{execution_price_column}_{self.symbol_b_}"].shift(-execution_price_shift)
self.market_data_df_ = self.market_data_df_.dropna().reset_index(drop=True)
def colnames(self) -> List[str]:
return [
f"{self.stat_model_price_}_{self.symbol_a_}",
f"{self.stat_model_price_}_{self.symbol_b_}",
]
def orig_exec_prices_colnames(self) -> List[str]:
return [
f"{self.execution_price_column_}_{self.symbol_a_}",
f"{self.execution_price_column_}_{self.symbol_b_}",
]
def exec_prices_colnames(self) -> List[str]:
return [
f"exec_price_{self.symbol_a_}",
f"exec_price_{self.symbol_b_}",
]
class RealTimeMarketData(PtMarketData):
def __init__(self, config: Dict[str, Any], *args: Any, **kwargs: Any):
super().__init__(config, *args, **kwargs)
async def on_mkt_data_hist_snapshot(self, snapshot: JsonDictT) -> None:
# URGENT
# create origin_mkt_data_df_ from snapshot
# verify that the data for both instruments are present
# transform it to market_data_df_ tstamp, close_symbolA, close_symbolB
'''
# from cvttpy/exchanges/binance/spot/mkt_data.py
values = {
"time_ns": time_ns,
"tstamp": format_nanos_utc(time_ns),
"exchange_id": exch_inst.exchange_id_,
"instrument_id": exch_inst.instrument_id(),
"interval_ns": interval_sec * 1_000_000_000,
"open": float(kline[1]),
"high": float(kline[2]),
"low": float(kline[3]),
"close": float(kline[4]),
"volume": float(kline[5]),
"num_trades": kline[8],
"vwap": float(kline[7]) / float(kline[5]) if float(kline[5]) > 0 else 0.0 # Calculate VWAP
}
'''
pass
async def on_mkt_data_update(self, update: JsonDictT) -> Optional[pd.DataFrame]:
# URGENT
# make sure update has both instruments
# create DataFrame tmp1 from update
# transform tmp1 into temp. datframe tmp2
# add tmp1 to origin_mkt_data_df_
# add tmp2 to market_data_df_
# return market_data_df_
'''
class MdTradesAggregate(NamedObject):
def to_dict(self) -> Dict[str, Any]:
return {
"time_ns": self.time_ns_,
"tstamp": format_nanos_utc(self.time_ns_),
"exchange_id": self.exch_inst_.exchange_id_,
"instrument_id": self.exch_inst_.instrument_id(),
"interval_ns": self.interval_ns_,
"open": self.exch_inst_.get_price(self.open_),
"high": self.exch_inst_.get_price(self.high_),
"low": self.exch_inst_.get_price(self.low_),
"close": self.exch_inst_.get_price(self.close_),
"volume": self.exch_inst_.get_quantity(self.volume_),
"vwap": self.exch_inst_.get_price(self.vwap_),
"num_trades": self.exch_inst_.get_quantity(self.num_trades_),
}
'''
return pd.DataFrame()