docker_dev/alpaca_md_day/alpaca_md_day.sh
2024-07-22 18:40:23 -04:00

144 lines
3.3 KiB
Bash
Executable File

#!/bin/bash
# --- Settings
export PYTHONPATH=/
export Python=python3.10
export Config=http://cloud16.cvtt.vpn:6789/apps/minimal_md
export PyScript=/cvttpy/exchanges/alpaca/hist_md/hist_md_bars.py
export OutputDir=/app/data/alpaca_md # Local
export LogDir=/logs
DEFAULT_NUM_JOBS=10
SLEEP_SEC_AFTER_PROC=2
DEFAULT_SLICE_SIZE=500
# --- Settings
mkdir -p ${OutputDir}
mkdir -p ${LogDir}
usage() {
echo "Usage: $0 <date (YYYY-MM-DD)> [<num-jobs> (30}) ] [<instrument_list_file>] "
exit 1
}
echo "CommandLine: ${*}"
Start=${1}
NumJobs=${2}
export CalendarURL=http://cloud16.cvtt.vpn:8000/api/v1/markets/hours?mic=XNYS
is_business_day() {
dt=${1}
open_time=$(curl -s "${CalendarURL}&start=${dt}&end=${dt}" | jq '.[] | .open_time')
if [ -n "${open_time}" ]; then
return 0
else
return 1
fi
}
export -f is_business_day
if [ -z ${Start} ] ; then
echo "start is not specified, getting yesterday..."
Start=$(date -d "yesterday" "+%Y-%m-%d")
echo "Start=${Start}"
fi
while true; do
if is_business_day ${Start}; then
break
fi
echo "${Start} is not business day in US"
Start=$(date -d "${Start} - 1 day" "+%Y-%m-%d")
done
End=$(date -d "${Start} + 1 day" "+%Y-%m-%d")
# Exclude today
if [ "${End}" == $(date '+%Y-%m-%d') ] ; then
End=$(date -d 'yesterday' ''+%Y-%m-%d'')
fi
if [ "${NumJobs}" == "" ] ; then
NumJobs=${DEFAULT_NUM_JOBS}
fi
echo "Start=${Start} End=${End} NumJobs=${NumJobs}"
run_proc() {
Inst=${1}
Start=${2}
End=${3}
echo "Running for $Inst"
Cmd="${Python} ${PyScript}"
Cmd="${Cmd} --config=${Config}"
Cmd="${Cmd} --output_dir=${OutputDir}/${Start:0:4}"
Cmd="${Cmd} --instruments=ALPACA:${Inst}"
Cmd="${Cmd} --start=${Start}"
if [ "${End}" != "" ]; then
Cmd="${Cmd} --end=${End}"
fi
Cmd="${Cmd} --log_file=${LogDir}/${Inst}.log"
Cmd="${Cmd} --log_level=WARNING"
echo ${Cmd}
eval ${Cmd}
sleep ${SLEEP_SEC_AFTER_PROC}
}
export -f run_proc
key=$(jq -r '.["ALPACA_SANDBOX"] | .api_key' ~/.creds)
secret=$(jq -r '.["ALPACA_SANDBOX"] | .secret_key' ~/.creds)
Cmd="curl -s --request GET --url 'https://paper-api.alpaca.markets/v2/assets?status=active'"
Cmd="${Cmd} --header 'APCA-API-KEY-ID: ${key}'"
Cmd="${Cmd} --header 'APCA-API-SECRET-KEY: ${secret}'"
Cmd="${Cmd} --header 'accept: application/json'"
Cmd="${Cmd} | jq '.[] | select(.class == \"us_equity\" and .exchange != \"OTC\") | .symbol'"
Cmd="${Cmd} | sed 's/\"//g'"
Cmd="${Cmd} | sed 's/^/STOCK-/'"
# split string into array
Instruments=()
for Inst in $(eval ${Cmd})
do
Instruments+=("$Inst")
done
slice_size=${DEFAULT_SLICE_SIZE} # 10K symbols parallel cannot handle
for ((ii=0; ii <${#Instruments[@]}; ii+=slice_size)); do
InstSlice=("${Instruments[@]:ii:slice_size}")
parallel -j ${NumJobs} run_proc {} ${Start} ${End} ::: "${InstSlice[@]}"
done
echo "Compressing"
for file in $(find ${OutputDir} -type f -name '*db' -print )
do
echo "Compressing ${file}"
gzip ${file}
done
Source=/app/data/
Targets=
Targets="${Targets} oleg@homestore.cvtt.vpn:/works/cvtt/md_archive/equity"
Targets="${Targets} cvtt@cloud21.cvtt.vpn:/opt/store/cvtt/md_archive/equity"
for tgt in ${Targets}
do
Cmd="/usr/bin/rsync -ahv ${Source} ${tgt}"
echo $Cmd
eval $Cmd
done