ops/research/load_eqty_md.sh

181 lines
3.5 KiB
Bash
Executable File

#!/usr/bin/env bash
usage() {
echo -n "Usage: $0"
echo -n " [-h <host (homestore*/cloud21)>]"
echo -n " [-d <YYYYMMDD> (yesterday*)]"
echo -n " [-s <stocks comma separated>"
echo -n " [-t <target directory>"
echo
exit 1
}
if [[ "$(uname)" == "Darwin" ]]; then
# macOS
date='gdate'
else
date='date'
fi
# ------------------ Settings
md_date=""
host=homestore
stocks=COIN,GBTC,SQ
TargetDir="/opt/jupyter_gpu/data/eqty_md"
mkdir -p ${TargetDir}
Table=md_1min_bars
# ------------------ Settings
is_valid() {
local inst=$1
shift
local valid_instances=("$@")
for valid_inst in "${valid_instances[@]}";
do
if [[ "$inst" == "$valid_inst" ]]; then
return 0
fi
done
return 1
}
while getopts ":h:d:s:t:u" opt; do
case ${opt} in
d )
md_date=$OPTARG
;;
h )
host=$OPTARG
;;
s )
stocks=$OPTARG
;;
t )
TargetDir=$OPTARG
;;
u )
usage
;;
\? )
echo "Invalid option: -$OPTARG" >&2
usage
;;
: )
echo "Option -$OPTARG requires an argument." >&2
usage
;;
esac
done
if [ "${md_date}" == "" ] ; then
md_date="yesterday"
fi
echo "$md_date $host $source"
valid_hosts=('homestore' 'cloud21')
if ! is_valid "${host}" "${valid_hosts[@]}" ; then
echo "Host '${host}' is not valid"
usage
fi
TargetFile=$(${date} -d ${md_date} "+%Y%m%d.eqty.mktdata.ohlcv.db")
TargetFilePath="${TargetDir}/${TargetFile}"
mv ${TargetFilePath} "${TargetFilePath}.saved.$(${date} '+%Y%m%d_%H%M%S')"
touch ${TargetFilePath}
# ---- temp dir
echo ${SourceFile}
tmp_dir=$(mktemp -d)
function cleanup {
cd ${HOME}
rm -rf ${tmp_dir}
}
trap cleanup EXIT
# ---- temp dir
if [ "${host}" == "cloud21" ] ; then
SourceHost=cloud21.cvtt.vpn
SourceUser=cvtt
SourceRootDir="/opt/store/cvtt/md_archive/equity/alpaca_md"
elif [ "${host}" == "homestore" ]; then
SourceHost=homestore.cvtt.vpn
SourceUser=oleg
SourceRootDir="/works/cvtt/md_archive/equity/alpaca_md"
else
usage
fi
OLDIFS=$IFS
IFS=','
read -ra Stocks <<< "$stocks"
IFS=$OLDIFS
echo "${SourceHost} ${SourceUser} ${SourceRootDir} ${Stocks[@]}"
function download_file {
local SourceFilePath=${1}
Cmd="rsync"
Cmd="${Cmd} -ahv"
if tty -s; then
Cmd="${Cmd} --progress=info2"
fi
Cmd="${Cmd} ${SourceUser}@${SourceHost}:${SourceFilePath} ${tmp_dir}/"
echo ${Cmd}
eval ${Cmd}
if [[ ! -f ${tmp_dir}/${SourceFileZip} ]] ; then
echo "File ${SourceUser}@${SourceHost}:${SourceFilePath} is missing. Skipped."
return
fi
ls -lh ${tmp_dir}
Cmd="gunzip ${tmp_dir}/${SourceFileZip}"
echo ${Cmd} && eval ${Cmd}
DbFile=${tmp_dir}/${SourceFile}
# --- T E M P - for files older than 20240813
sqlite3 ${DbFile} <<EOF
DROP INDEX md_1min_bars_idx;
CREATE UNIQUE INDEX IF NOT EXISTS md_1min_bars_idx on md_1min_bars(tstamp, exchange_id, instrument_id);
EOF
# --- T E M P
Cmd="sqlite3 ${DbFile} \".dump\" | sqlite3 ${TargetFilePath}"
echo ${Cmd}
eval ${Cmd}
}
echo "Loading files"
for stock in "${Stocks[@]}"; do
StockLetter="${stock:0:1}"
SourceFile=$(${date} -d ${md_date} "+%Y%m%d.${stock}.1min.db")
SourceFileZip="${SourceFile}.gz"
SourceFilePath=$(${date} -d ${md_date} "+${SourceRootDir}/${StockLetter}/${stock}/%Y/${SourceFileZip}")
echo ${SourceFilePath}
download_file ${SourceFilePath}
done
chmod 600 ${TargetFilePath}
ls -lh ${TargetFilePath}
# mkdir -p /tmp/aaa
# cp -r ${tmp_dir}/* /tmp/aaa/