126 lines
3.7 KiB
Python
126 lines
3.7 KiB
Python
import sys
|
|
import sqlite3
|
|
from typing import Dict, Tuple
|
|
import pandas as pd
|
|
|
|
from tools.trading_pair import TradingPair
|
|
|
|
|
|
def load_sqlite_to_dataframe(db_path, query):
|
|
try:
|
|
conn = sqlite3.connect(db_path)
|
|
|
|
df = pd.read_sql_query(query, conn)
|
|
return df
|
|
except sqlite3.Error as excpt:
|
|
print(f"SQLite error: {excpt}")
|
|
raise
|
|
except Exception as e:
|
|
print(f"Error: {excpt}")
|
|
raise
|
|
finally:
|
|
if "conn" in locals():
|
|
conn.close()
|
|
|
|
|
|
def convert_time_to_UTC(value: str, timezone: str):
|
|
|
|
from zoneinfo import ZoneInfo
|
|
from datetime import datetime
|
|
|
|
# Parse it to naive datetime object
|
|
local_dt = datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
|
|
|
|
zinfo = ZoneInfo(timezone)
|
|
result = local_dt.replace(tzinfo=zinfo)
|
|
|
|
result = result.astimezone(ZoneInfo("UTC"))
|
|
result = result.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
return result
|
|
|
|
|
|
def load_market_data(datafile: str, config: Dict) -> pd.DataFrame:
|
|
from tools.data_loader import load_sqlite_to_dataframe
|
|
|
|
instrument_ids = [
|
|
'"' + config["instrument_id_pfx"] + instrument + '"'
|
|
for instrument in config["instruments"]
|
|
]
|
|
security_type = config["security_type"]
|
|
exchange_id = config["exchange_id"]
|
|
|
|
query = "select"
|
|
if security_type == "CRYPTO":
|
|
query += " strftime('%Y-%m-%d %H:%M:%S', tstamp/1000000000, 'unixepoch') as tstamp"
|
|
query += ", tstamp as time_ns"
|
|
else:
|
|
query += " tstamp"
|
|
query += ", tstamp_ns as time_ns"
|
|
|
|
query += f", substr(instrument_id, {len(config['instrument_id_pfx']) + 1}) as symbol"
|
|
query += ", open"
|
|
query += ", high"
|
|
query += ", low"
|
|
query += ", close"
|
|
query += ", volume"
|
|
query += ", num_trades"
|
|
query += ", vwap"
|
|
|
|
query += f" from {config['db_table_name']}"
|
|
query += f" where exchange_id ='{exchange_id}'"
|
|
query += f" and instrument_id in ({','.join(instrument_ids)})"
|
|
|
|
df = load_sqlite_to_dataframe(db_path=datafile, query=query)
|
|
|
|
# Trading Hours
|
|
date_str = df["tstamp"][0][0:10]
|
|
trading_hours = config["trading_hours"]
|
|
|
|
start_time = convert_time_to_UTC(
|
|
f"{date_str} {trading_hours['begin_session']}", trading_hours["timezone"]
|
|
)
|
|
end_time = convert_time_to_UTC(
|
|
f"{date_str} {trading_hours['end_session']}", trading_hours["timezone"]
|
|
)
|
|
|
|
# Perform boolean selection
|
|
df = df[(df["tstamp"] >= start_time) & (df["tstamp"] <= end_time)]
|
|
df["tstamp"] = pd.to_datetime(df["tstamp"])
|
|
|
|
return df
|
|
|
|
|
|
def transform_dataframe(df: pd.DataFrame, price_column: str):
|
|
# Select only the columns we need
|
|
df_selected = df[["tstamp", "symbol", price_column]]
|
|
|
|
# Start with unique timestamps
|
|
result_df: pd.DataFrame = pd.DataFrame(df_selected["tstamp"]).drop_duplicates().reset_index(drop=True)
|
|
|
|
# For each unique symbol, add a corresponding close price column
|
|
for symbol in df_selected["symbol"].unique():
|
|
# Filter rows for this symbol
|
|
df_symbol = df_selected[df_selected["symbol"] == symbol].reset_index(drop=True)
|
|
|
|
# Create column name like "close-COIN"
|
|
new_price_column = f"{price_column}_{symbol}"
|
|
|
|
# Create temporary dataframe with timestamp and price
|
|
temp_df = pd.DataFrame({
|
|
"tstamp": df_symbol["tstamp"],
|
|
new_price_column: df_symbol[price_column]
|
|
})
|
|
|
|
# Join with our result dataframe
|
|
result_df = pd.merge(result_df, temp_df, on="tstamp", how="left")
|
|
result_df = result_df.reset_index(drop=True) # do not dropna() since irrelevant symbol would affect dataset
|
|
|
|
return result_df
|
|
|
|
|
|
# if __name__ == "__main__":
|
|
# df1 = load_sqlite_to_dataframe(sys.argv[1], table_name="md_1min_bars")
|
|
|
|
# print(df1)
|