ops/__DEPRECATED__/sim_prepare/prepare_crypto_simdata.sh

279 lines
6.5 KiB
Bash
Executable File

#!/bin/bash
# --------------------- Settings
# SOURCE_HOST=cloud21.cvtt.vpn
# SOURCE_ROOT_DIR=/opt/store/cvtt/md_archive/crypto
# DB_SOURCES=cloud28,cloud29
# OUTPUT_DIR=/tmp
# DATE=20250516
# RSYNC_TARGETS="cvtt@hs01.cvtt.vpn:/works/cvtt/md_archive/crypto/sim/ cvtt@cloud21.cvtt.vpn:/opt/store/cvtt/md_archive/crypto/sim/"
# --------------------- Settings
if [ -z ${DATE} ] ; then
DATE=$(date -d 'yesterday' +'%Y%m%d')
fi
if [ -z ${OUTPUT_DIR} ] ; then
OUTPUT_DIR=.
fi
echo "DATE=${DATE} SOURCE_HOST=${SOURCE_HOST}"
mkdir -p ${OUTPUT_DIR}
year=$(date -d ${DATE} +"%Y")
month=$(date -d ${DATE} +"%m")
if [ -z "${DB_SOURCES}" ]; then
echo "DB_SOURCES is empty"
exit
fi
IFS=',' read -r -a db_source_hosts <<< "${DB_SOURCES}"
SourceFile="${DATE}.mktdata.db.gz"
SelectedSourceHost=""
SelectedSourceFilePath=""
SelectedSourceSize=0
for db_source_host in "${db_source_hosts[@]}"; do
SourceDir="${SOURCE_ROOT_DIR}/${db_source_host}/${year}/${month}"
CandidatePath="${SourceDir}/${SourceFile}"
remote_stat_cmd="if [ -f '${CandidatePath}' ]; then stat -c %s '${CandidatePath}'; else exit 1; fi"
CandidateSize=$(ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${SOURCE_HOST} "${remote_stat_cmd}" 2>/dev/null)
if [ $? -eq 0 ] && [ -n "${CandidateSize}" ]; then
echo "Found ${SOURCE_HOST}:${CandidatePath} (${CandidateSize} bytes)"
if [ -z "${SelectedSourceHost}" ] || [ "${CandidateSize}" -gt "${SelectedSourceSize}" ]; then
SelectedSourceHost=${db_source_host}
SelectedSourceFilePath=${CandidatePath}
SelectedSourceSize=${CandidateSize}
fi
fi
done
if [ -z "${SelectedSourceHost}" ]; then
echo "File ${SourceFile} NOT FOUND on any DB_SOURCES host"
exit
fi
echo "Using source ${SelectedSourceHost} with ${SelectedSourceFilePath} (${SelectedSourceSize} bytes)"
Cmd="/usr/bin/rsync -ahv"
Cmd+=" --mkpath"
Cmd+=" -e 'ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'"
Cmd+=" ${SOURCE_HOST}:${SelectedSourceFilePath}"
Cmd+=" $OUTPUT_DIR/"
echo ${Cmd}
eval ${Cmd}
if [ ! -f ${OUTPUT_DIR}/${SourceFile} ] ; then
echo "File ${OUTPUT_DIR}/${SourceFile} NOT FOUND"
exit
fi
Cmd="(cd ${OUTPUT_DIR} && gunzip -f *.db.gz)"
echo ${Cmd}
eval ${Cmd}
SourceDbFile="${OUTPUT_DIR}/${DATE}.mktdata.db"
ResultDbFile="${OUTPUT_DIR}/${DATE}.crypto_sim_md.db"
echo "SourceDbFile=${SourceDbFile}"
echo "Creating Result Database File ${ResultDbFile}"
cleanup() {
rm ${SourceDbFile}
}
trap cleanup EXIT
echo "Creating table md_trades ..."
sqlite3 ${ResultDbFile} <<EOF
.echo ON
CREATE TABLE IF NOT EXISTS md_trades (
tstamp text,
tstamp_ns integer,
exchange_id text,
instrument_id text,
exch text,
px real,
qty real,
trade_id text,
condition text,
tape text
);
CREATE UNIQUE INDEX IF NOT EXISTS md_trades_uidx
ON md_trades(tstamp_ns, exchange_id, instrument_id);
EOF
echo "Creating table md_quotes ..."
sqlite3 ${ResultDbFile} <<EOF
.echo ON
CREATE TABLE IF NOT EXISTS md_quotes (
tstamp text,
tstamp_ns integer,
exchange_id text,
instrument_id text,
bid_exch text,
bid_px real,
bid_qty real,
ask_exch text,
ask_px real,
ask_qty real
);
CREATE UNIQUE INDEX IF NOT EXISTS md_quotes_uidx
ON md_quotes(tstamp_ns, exchange_id, instrument_id);
EOF
echo "Creating table md_1min_bars ..."
sqlite3 ${ResultDbFile} <<EOF
.echo ON
CREATE TABLE IF NOT EXISTS md_1min_bars (
tstamp text,
tstamp_ns integer,
exchange_id text,
instrument_id text,
open real,
high real,
low real,
close real,
volume real,
vwap real,
num_trades integer
);
CREATE UNIQUE INDEX IF NOT EXISTS md_1min_bars_uidx
ON md_1min_bars(tstamp, exchange_id, instrument_id);
EOF
echo "Loading md_trades ..."
sqlite3 ${ResultDbFile} <<EOF
.echo ON
ATTACH '${SourceDbFile}' AS source_db;
BEGIN;
INSERT OR IGNORE INTO md_trades SELECT
datetime(exchange_ts_ns / 1000000000, 'unixepoch') || '.' || printf('%06d', (exchange_ts_ns % 1000000000) / 1000) as tstamp,
time as tstamp_ns,
exchange_id,
instrument_id,
"" as exch,
price as px,
quantity as qty,
"" as trade_id,
taker_side as condition,
"" as tape
from source_db.bnbspot_md_trades;
COMMIT;
BEGIN;
INSERT OR IGNORE INTO md_trades
SELECT
datetime(exchange_ts_ns / 1000000000, 'unixepoch') || '.' || printf('%06d', (exchange_ts_ns % 1000000000) / 1000) as tstamp,
time as tstamp_ns,
exchange_id,
instrument_id,
"" as exch,
price as px,
quantity as qty,
"" as trade_id,
taker_side as condition,
"" as tape
from source_db.coinbase_md_trades;
COMMIT;
DETACH source_db;
EOF
echo "Loading md_quotes ..."
sqlite3 ${ResultDbFile} <<EOF
.echo ON
ATTACH '${SourceDbFile}' AS source_db;
BEGIN;
INSERT OR IGNORE INTO md_quotes SELECT
datetime(exchange_ts_ns / 1000000000, 'unixepoch') || '.' || printf('%06d', (exchange_ts_ns % 1000000000) / 1000) as tstamp,
time as tstamp_ns,
exchange_id,
instrument_id,
exchange_id as bid_exch,
bid_price as bid_px,
bid_quantity as bid_qty,
exchange_id as ask_exch,
ask_price as ask_px,
ask_quantity as ask_qty
from bnbspot_md_booktops;
COMMIT;
BEGIN;
INSERT OR IGNORE INTO md_quotes SELECT
datetime(exchange_ts_ns / 1000000000, 'unixepoch') || '.' || printf('%06d', (exchange_ts_ns % 1000000000) / 1000) as tstamp,
time as tstamp_ns,
exchange_id,
instrument_id,
exchange_id as bid_exch,
bid_price as bid_px,
bid_quantity as bid_qty,
exchange_id as ask_exch,
ask_price as ask_px,
ask_quantity as ask_qty
from coinbase_md_booktops;
COMMIT;
DETACH source_db;
EOF
### --- REPLACE 0 with num_trades ---
echo "Loading md_1min_bars ..."
sqlite3 ${ResultDbFile} <<EOF
.echo ON
ATTACH '${SourceDbFile}' AS source_db;
BEGIN;
INSERT OR IGNORE INTO md_1min_bars SELECT
datetime(tstamp / 1000000000, 'unixepoch') || '.' || printf('%06d', (tstamp % 1000000000) / 1000) as tsatmp,
tstamp as tstamp_ns,
exchange_id,
instrument_id,
open,
high,
low,
close,
volume,
vwap,
0 as num_trades
from bnbspot_ohlcv_1min;
COMMIT;
BEGIN;
INSERT OR IGNORE INTO md_1min_bars SELECT
datetime(tstamp / 1000000000, 'unixepoch') || '.' || printf('%06d', (tstamp % 1000000000) / 1000) as tstamp,
tstamp as tstamp_ns,
exchange_id,
instrument_id,
open,
high,
low,
close,
volume,
vwap,
0 as num_trades
from coinbase_ohlcv_1min;
COMMIT;
DETACH source_db;
EOF
Cmd="gzip ${ResultDbFile}"
echo ${Cmd}
eval ${Cmd}
for tgt in ${RSYNC_TARGETS} ; do
tgt="${tgt}/${year}/${month}/"
Cmd="/usr/bin/rsync -ahv"
Cmd+=" --mkpath"
Cmd+=" -e 'ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'"
Cmd+=" ${ResultDbFile}.gz ${tgt}"
echo ${Cmd}
eval ${Cmd}
done
echo Done $0 ${*}