pairs_trading_research/scripts/load_equity_pair_daily.sh
2025-07-20 18:05:13 -04:00

146 lines
3.4 KiB
Bash
Executable File

#!/usr/bin/env bash
# Usage: ./scripts/load_equity_pair_daily.sh -A GS -B DIA -f 20241201 -t 20250131 -T ./daily_md
usage() {
echo "Usage: $0 -A <symbolA> -B <symbolB> -f <from_date (YYYYMMDD)> -t <to_date (YYYYMMDD)> -T <target_directory>"
exit 1
}
# ---------------- cmdline
while getopts "A:B:f:t:T:h" opt; do
case ${opt} in
h)
usage
;;
A )
symbolA=$OPTARG
;;
B )
symbolB=$OPTARG
;;
f )
from_date=$OPTARG
;;
t )
to_date=$OPTARG
;;
T )
target_dir=$OPTARG
;;
\? )
echo "Invalid option: -$OPTARG" >&2
usage
;;
: )
echo "Option -$OPTARG requires an argument." >&2
usage
;;
esac
done
# ---------------- cmdline
if [ -z ${symbolA} ] || [ -z ${symbolB} ] || [ -z ${from_date} ] ||[ -z ${to_date} ] || [ -z ${target_dir} ]; then
usage
fi
SourceRoot=cvtt@hs01.cvtt.vpn:/mnt/usb1/md_archive/equity/alpaca_md
TargetFile=$(realpath ${target_dir})/${from_date}-${to_date}.${symbolA}-${symbolB}.daily.db
mkdir -p ${target_dir} || exit 1
temp_dir=$(mktemp -d)
pushd ${temp_dir}
clean_temp() {
popd
rm -rf ${temp_dir}
}
trap clean_temp EXIT
echo " ------ Downloading..."
load_symbol() {
# Loop over dates from from_date to to_date
symbol=${1}
current_date="$from_date"
while [[ "$current_date" -le "$to_date" ]]; do
year=${current_date:0:4}
symb_initial=${symbol:0:1}
remote_path="${SourceRoot}/${year}/${symb_initial}/${symbol}/${current_date}.${symbol}.alpaca_1m_bars.db.gz"
echo "Fetching: $remote_path"
Cmd="rsync -avz ${remote_path} ${temp_dir}/ || echo \"Missing: ${current_date}\""
echo $Cmd
eval $Cmd
# Increment date
current_date=$(date -d "$current_date +1 day" +"%Y%m%d")
done
}
load_symbol ${symbolA}
load_symbol ${symbolB}
gunzip *.gz
ls -l ${temp_dir}
sqlite3 "$TargetFile" <<EOF
CREATE TABLE IF NOT EXISTS md_daily_bars (
date TEXT,
symbol TEXT,
open REAL,
high REAL,
low REAL,
close REAL,
volume REAL,
vwap REAL,
num_trades INTEGER
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_md_daily_bars_date_symbol
ON md_daily_bars (date, symbol);
EOF
set -euo pipefail
shopt -s nullglob
for source_file in *.alpaca_1m_bars.db; do
[[ -f "$source_file" ]] || continue
symbol=$(basename "$source_file" | cut -d. -f2)
echo "Processing: $source_file (symbol=$symbol)"
file_date=${source_file:0:8}
export TZ=America/New_York
offset_raw=$(date -d "$file_date" '+%z')
hours=$(echo ${offset_raw:1:2} | awk '{printf("%d", $1)}')
sqlite_offset="-${hours} hours"
sqlite3 $source_file <<EOF
delete from md_1min_bars
where
time(tstamp, '${sqlite_offset}') < time('09:30:00')
or time(tstamp, '${sqlite_offset}') > time('16:00:00');
EOF
sqlite3 $TargetFile <<EOF
ATTACH DATABASE '$source_file' AS tmp;
INSERT OR REPLACE INTO md_daily_bars
SELECT
date(tstamp) AS date,
'$symbol' AS symbol,
(SELECT open FROM tmp.md_1min_bars ORDER BY tstamp ASC LIMIT 1) AS open,
MAX(high) AS high,
MIN(low) AS low,
(SELECT close FROM tmp.md_1min_bars ORDER BY tstamp DESC LIMIT 1) AS close,
SUM(volume) AS volume,
SUM(num_trades) AS num_trades,
SUM(vwap * volume) / SUM(volume) AS vwap
FROM tmp.md_1min_bars;
EOF
done
sqlite3 $TargetFile <<EOF
select * from md_daily_bars;
EOF