ops/research/prepare_eqt_simdata.sh
2025-01-13 20:04:52 -05:00

245 lines
4.8 KiB
Bash
Executable File

#!/bin/bash
usage() {
echo "Usage: $0 -S <symbols> -d <YYYYMMDD Date> [-O <output dir (./) >]"
exit 1
}
# --------------------- Settings
SourceHost=cloud21.cvtt.vpn
SourceRootDir=/opt/store/cvtt/md_archive/equity/alpaca_md
# --------------------- Settings
is_business_day() {
dt=${1}
date=$(date -d "${dt}" +"%Y-%m-%d")
CalendarURL=http://cloud23.cvtt.vpn:8000/api/v1/markets/hours?mic=XNYS
URL="${CalendarURL}&start=${date}&end=${date}"
open_time=$(curl -s "${URL}" | jq '.[] | .open_time')
if [ -n "${open_time}" ]; then
return 0
else
return 1
fi
}
export -f is_business_day
while getopts ":d:S:O:" opt; do
case ${opt} in
d )
Date=$OPTARG
;;
S )
SymList=$OPTARG
;;
O )
OutputDir=$OPTARG
;;
\? )
echo "Invalid option: -$OPTARG" >&2
usage
;;
: )
echo "Option -$OPTARG requires an argument." >&2
usage
;;
esac
done
if [ -z ${SymList} ] ; then
echo "symbols are not specified"
usage
fi
if [ -z ${Date} ] ; then
echo "date is not specified"
usage
fi
if [ -z ${OutputDir} ] ; then
OutputDir=.
fi
mkdir -p ${OutputDir}
if ! is_business_day ${Date}; then
echo "${Date} is not business day"
usage
fi
OLD_IFS=${IFS}
IFS=","
read -ra Symbols <<< "${SymList}"
IFS=${OLD_IFS}
echo "Date=${Date} Symbols=${Symbols[@]} OutputDir=${OutputDir}"
echo Getting data from ${DataHost} ...
year=$(date -d ${Date} +"%Y")
for sym in ${Symbols[@]}; do
inst_id="STOCK-${sym}"
capital=${sym:0:1}
SourceDir="${SourceRootDir}/${year}/${capital}/${sym}"
SourceHbarFile="${SourceDir}/${Date}.${sym}.alpaca_1m_bars.db.gz"
SourceQatFile="${SourceDir}/${Date}.${sym}.alpaca_qat.db.gz"
for src_file in ${SourceHbarFile} ${SourceQatFile}; do
Cmd="rsync -ahv"
Cmd+=" ${SourceHost}:${src_file}"
Cmd+=" $OutputDir/"
echo ${Cmd}
eval ${Cmd}
done
done
Cmd="(cd ${OutputDir} && gunzip *.db.gz)"
echo ${Cmd}
eval ${Cmd}
ResultDbFile="${OutputDir}/${Date}.alpaca_sim_md.db"
echo "Creating Result Database File ${ResultDbFile}"
echo "Creating Result Database File ${ResultDbFile}"
echo "Creating table md_trades ..."
sqlite3 ${ResultDbFile} <<EOF
CREATE TABLE IF NOT EXISTS md_trades (
tstamp text,
tstamp_ns integer,
exchange_id text,
instrument_id text,
exch text,
px real,
qty real,
trade_id text,
condition text,
tape text
);
CREATE UNIQUE INDEX IF NOT EXISTS md_trades_uidx
ON md_trades(tstamp_ns, exchange_id, instrument_id);
EOF
echo "Creating table md_quotes ..."
sqlite3 ${ResultDbFile} <<EOF
CREATE TABLE IF NOT EXISTS md_quotes (
tstamp text,
tstamp_ns integer,
exchange_id text,
instrument_id text,
bid_exch text,
bid_px real,
bid_qty real,
ask_exch text,
ask_px real,
ask_qty real
);
CREATE UNIQUE INDEX IF NOT EXISTS md_quotes_uidx
ON md_quotes(tstamp_ns, exchange_id, instrument_id);
EOF
echo "Creating table md_1min_bars ..."
sqlite3 ${ResultDbFile} <<EOF
CREATE TABLE IF NOT EXISTS md_1min_bars (
tstamp text,
tstamp_ns integer,
exchange_id text,
instrument_id text,
open real,
high real,
low real,
close real,
volume real,
vwap real,
num_trades integer
);
CREATE UNIQUE INDEX IF NOT EXISTS md_1min_bars_uidx
ON md_1min_bars(tstamp, exchange_id, instrument_id);
EOF
# set -f # not to expand *
for sym in ${Symbols[@]}; do
src_hbar_db=${OutputDir}/${Date}.${sym}.alpaca_1m_bars.db
src_qat_db=${OutputDir}/${Date}.${sym}.alpaca_qat.db
echo "Loading md_trades and md_quotes from ${src_qat_db} ..."
sqlite3 ${ResultDbFile} <<EOF
ATTACH '${src_qat_db}' AS source_db;
BEGIN;
INSERT OR IGNORE INTO md_trades SELECT
tstamp,
tstamp_ns,
exchange_id,
instrument_id,
exch,
px,
qty,
trade_id,
condition,
tape
FROM source_db.md_trades;
COMMIT;
BEGIN;
INSERT OR IGNORE INTO md_quotes SELECT
tstamp,
tstamp_ns,
exchange_id,
instrument_id,
bid_exch,
bid_px,
bid_qty,
ask_exch,
ask_px,
ask_qty
FROM source_db.md_quotes;
COMMIT;
EOF
echo "Loading md_1min_bars from ${src_hbar_db} ..."
sqlite3 ${ResultDbFile} <<EOF
ATTACH '${src_hbar_db}' AS source_db;
BEGIN;
INSERT OR IGNORE INTO md_1min_bars SELECT
tstamp,
tstamp_ns,
exchange_id,
instrument_id,
open,
high,
low,
close,
volume,
vwap,
num_trades
FROM source_db.md_1min_bars;
COMMIT;
DETACH source_db;
EOF
Cmd="rm ${src_hbar_db} ${src_qat_db}"
echo ${Cmd}
eval ${Cmd}
done
Cmd="gzip ${ResultDbFile}"
echo ${Cmd}
eval ${Cmd}
Cmd="rsync -ahvv ${ResultDbFile}.gz cvtt@hs01.cvtt.vpn:/works/cvtt/md_archive/equity/alpaca_md/sim/"
echo ${Cmd}
eval ${Cmd}
Cmd="rsync -ahvv ${ResultDbFile}.gz cvtt@cloud21.cvtt.vpn:/opt/store/cvtt/md_archive/equity/alpaca_md/sim/"
echo ${Cmd}
eval ${Cmd}
Cmd="rm ${ResultDbFile}.gz"
echo ${Cmd}
eval ${Cmd}
echo Done $0 ${*}