146 lines
3.4 KiB
Bash
Executable File
146 lines
3.4 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
# Usage: ./scripts/load_equity_pair_daily.sh -A GS -B DIA -f 20241201 -t 20250131 -T ./daily_md
|
|
|
|
usage() {
|
|
echo "Usage: $0 -A <symbolA> -B <symbolB> -f <from_date (YYYYMMDD)> -t <to_date (YYYYMMDD)> -T <target_directory>"
|
|
exit 1
|
|
}
|
|
# ---------------- cmdline
|
|
while getopts "A:B:f:t:T:h" opt; do
|
|
case ${opt} in
|
|
h)
|
|
usage
|
|
;;
|
|
A )
|
|
symbolA=$OPTARG
|
|
;;
|
|
B )
|
|
symbolB=$OPTARG
|
|
;;
|
|
f )
|
|
from_date=$OPTARG
|
|
;;
|
|
t )
|
|
to_date=$OPTARG
|
|
;;
|
|
T )
|
|
target_dir=$OPTARG
|
|
;;
|
|
\? )
|
|
echo "Invalid option: -$OPTARG" >&2
|
|
usage
|
|
;;
|
|
: )
|
|
echo "Option -$OPTARG requires an argument." >&2
|
|
usage
|
|
;;
|
|
esac
|
|
done
|
|
# ---------------- cmdline
|
|
if [ -z ${symbolA} ] || [ -z ${symbolB} ] || [ -z ${from_date} ] ||[ -z ${to_date} ] || [ -z ${target_dir} ]; then
|
|
usage
|
|
fi
|
|
SourceRoot=cvtt@hs01.cvtt.vpn:/mnt/usb1/md_archive/equity/alpaca_md
|
|
TargetFile=$(realpath ${target_dir})/${from_date}-${to_date}.${symbolA}-${symbolB}.daily.db
|
|
|
|
mkdir -p ${target_dir} || exit 1
|
|
|
|
temp_dir=$(mktemp -d)
|
|
pushd ${temp_dir}
|
|
clean_temp() {
|
|
popd
|
|
rm -rf ${temp_dir}
|
|
}
|
|
trap clean_temp EXIT
|
|
|
|
echo " ------ Downloading..."
|
|
|
|
load_symbol() {
|
|
# Loop over dates from from_date to to_date
|
|
symbol=${1}
|
|
current_date="$from_date"
|
|
while [[ "$current_date" -le "$to_date" ]]; do
|
|
year=${current_date:0:4}
|
|
symb_initial=${symbol:0:1}
|
|
remote_path="${SourceRoot}/${year}/${symb_initial}/${symbol}/${current_date}.${symbol}.alpaca_1m_bars.db.gz"
|
|
|
|
echo "Fetching: $remote_path"
|
|
Cmd="rsync -avz ${remote_path} ${temp_dir}/ || echo \"Missing: ${current_date}\""
|
|
echo $Cmd
|
|
eval $Cmd
|
|
|
|
# Increment date
|
|
current_date=$(date -d "$current_date +1 day" +"%Y%m%d")
|
|
done
|
|
}
|
|
|
|
load_symbol ${symbolA}
|
|
load_symbol ${symbolB}
|
|
|
|
gunzip *.gz
|
|
ls -l ${temp_dir}
|
|
|
|
sqlite3 "$TargetFile" <<EOF
|
|
CREATE TABLE IF NOT EXISTS md_daily_bars (
|
|
date TEXT,
|
|
symbol TEXT,
|
|
open REAL,
|
|
high REAL,
|
|
low REAL,
|
|
close REAL,
|
|
volume REAL,
|
|
vwap REAL,
|
|
num_trades INTEGER
|
|
);
|
|
|
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_md_daily_bars_date_symbol
|
|
ON md_daily_bars (date, symbol);
|
|
EOF
|
|
|
|
set -euo pipefail
|
|
shopt -s nullglob
|
|
|
|
for source_file in *.alpaca_1m_bars.db; do
|
|
[[ -f "$source_file" ]] || continue
|
|
|
|
symbol=$(basename "$source_file" | cut -d. -f2)
|
|
|
|
echo "Processing: $source_file (symbol=$symbol)"
|
|
file_date=${source_file:0:8}
|
|
export TZ=America/New_York
|
|
offset_raw=$(date -d "$file_date" '+%z')
|
|
hours=$(echo ${offset_raw:1:2} | awk '{printf("%d", $1)}')
|
|
sqlite_offset="-${hours} hours"
|
|
|
|
sqlite3 $source_file <<EOF
|
|
delete from md_1min_bars
|
|
where
|
|
time(tstamp, '${sqlite_offset}') < time('09:30:00')
|
|
or time(tstamp, '${sqlite_offset}') > time('16:00:00');
|
|
|
|
EOF
|
|
sqlite3 $TargetFile <<EOF
|
|
ATTACH DATABASE '$source_file' AS tmp;
|
|
|
|
INSERT OR REPLACE INTO md_daily_bars
|
|
SELECT
|
|
date(tstamp) AS date,
|
|
'$symbol' AS symbol,
|
|
(SELECT open FROM tmp.md_1min_bars ORDER BY tstamp ASC LIMIT 1) AS open,
|
|
MAX(high) AS high,
|
|
MIN(low) AS low,
|
|
(SELECT close FROM tmp.md_1min_bars ORDER BY tstamp DESC LIMIT 1) AS close,
|
|
SUM(volume) AS volume,
|
|
SUM(num_trades) AS num_trades,
|
|
SUM(vwap * volume) / SUM(volume) AS vwap
|
|
FROM tmp.md_1min_bars;
|
|
EOF
|
|
|
|
done
|
|
|
|
sqlite3 $TargetFile <<EOF
|
|
select * from md_daily_bars;
|
|
EOF
|
|
|