import sys import sqlite3 from typing import Dict, Tuple import pandas as pd from tools.trading_pair import TradingPair def load_sqlite_to_dataframe(db_path, query): try: conn = sqlite3.connect(db_path) df = pd.read_sql_query(query, conn) return df except sqlite3.Error as excpt: print(f"SQLite error: {excpt}") raise except Exception as e: print(f"Error: {excpt}") raise finally: if "conn" in locals(): conn.close() def convert_time_to_UTC(value: str, timezone: str): from zoneinfo import ZoneInfo from datetime import datetime # Parse it to naive datetime object local_dt = datetime.strptime(value, "%Y-%m-%d %H:%M:%S") zinfo = ZoneInfo(timezone) result = local_dt.replace(tzinfo=zinfo) result = result.astimezone(ZoneInfo("UTC")) result = result.strftime("%Y-%m-%d %H:%M:%S") return result def load_market_data(datafile: str, config: Dict) -> pd.DataFrame: from tools.data_loader import load_sqlite_to_dataframe instrument_ids = [ '"' + config["instrument_id_pfx"] + instrument + '"' for instrument in config["instruments"] ] security_type = config["security_type"] exchange_id = config["exchange_id"] query = "select" if security_type == "CRYPTO": query += " strftime('%Y-%m-%d %H:%M:%S', tstamp/1000000000, 'unixepoch') as tstamp" query += ", tstamp as time_ns" else: query += " tstamp" query += ", tstamp_ns as time_ns" query += f", substr(instrument_id, {len(config['instrument_id_pfx']) + 1}) as symbol" query += ", open" query += ", high" query += ", low" query += ", close" query += ", volume" query += ", num_trades" query += ", vwap" query += f" from {config['db_table_name']}" query += f" where exchange_id ='{exchange_id}'" query += f" and instrument_id in ({','.join(instrument_ids)})" df = load_sqlite_to_dataframe(db_path=datafile, query=query) # Trading Hours date_str = df["tstamp"][0][0:10] trading_hours = config["trading_hours"] start_time = convert_time_to_UTC( f"{date_str} {trading_hours['begin_session']}", trading_hours["timezone"] ) end_time = convert_time_to_UTC( f"{date_str} {trading_hours['end_session']}", trading_hours["timezone"] ) # Perform boolean selection df = df[(df["tstamp"] >= start_time) & (df["tstamp"] <= end_time)] df["tstamp"] = pd.to_datetime(df["tstamp"]) return df def transform_dataframe(df: pd.DataFrame, price_column: str): # Select only the columns we need df_selected = df[["tstamp", "symbol", price_column]] # Start with unique timestamps result_df: pd.DataFrame = pd.DataFrame(df_selected["tstamp"]).drop_duplicates().reset_index(drop=True) # For each unique symbol, add a corresponding close price column for symbol in df_selected["symbol"].unique(): # Filter rows for this symbol df_symbol = df_selected[df_selected["symbol"] == symbol].reset_index(drop=True) # Create column name like "close-COIN" new_price_column = f"{price_column}_{symbol}" # Create temporary dataframe with timestamp and price temp_df = pd.DataFrame({ "tstamp": df_symbol["tstamp"], new_price_column: df_symbol[price_column] }) # Join with our result dataframe result_df = pd.merge(result_df, temp_df, on="tstamp", how="left") result_df = result_df.reset_index(drop=True) # do not dropna() since irrelevant symbol would affect dataset return result_df # if __name__ == "__main__": # df1 = load_sqlite_to_dataframe(sys.argv[1], table_name="md_1min_bars") # print(df1)