From 792b07c4bcbcd713bf247b7bc0fdb993c5c62d91 Mon Sep 17 00:00:00 2001 From: Cryptoval Trading Technologies Date: Sun, 19 Oct 2025 19:56:47 +0000 Subject: [PATCH] md_checklist uses scripts instead of direct ssh --- healthcheck/md/crypto_missing_dates.sh | 26 +++++ healthcheck/md/crypto_sim_missing_dates.sh | 22 ++++ healthcheck/md/equity_missing_dates.sh | 21 ++++ healthcheck/md/equity_sim_missing_dates.sh | 25 +++++ healthcheck/md/get_retrofit_dates.sh | 122 +++++++++++++++++++++ healthcheck/md/md_checklist.sh | 34 +++--- 6 files changed, 229 insertions(+), 21 deletions(-) create mode 100755 healthcheck/md/crypto_missing_dates.sh create mode 100755 healthcheck/md/crypto_sim_missing_dates.sh create mode 100755 healthcheck/md/equity_missing_dates.sh create mode 100755 healthcheck/md/equity_sim_missing_dates.sh create mode 100755 healthcheck/md/get_retrofit_dates.sh diff --git a/healthcheck/md/crypto_missing_dates.sh b/healthcheck/md/crypto_missing_dates.sh new file mode 100755 index 0000000..60cf446 --- /dev/null +++ b/healthcheck/md/crypto_missing_dates.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +SourceHost=${1} +if [ "${SourceHost}" == "" ]; then + echo "Usage: ${0} " + exit 1 +fi + +function_file=$(realpath $(dirname $0))/get_retrofit_dates.sh +source ${function_file} + +DEFAULT_FGLOB=*.mktdata.db.gz + +Source=cvtt@${MD_HOST:-cloud21.cvtt.vpn} +FileGlob=${FILE_GLOB:-${DEFAULT_FGLOB}} +BackDaysCount=${BACKDAYS_COUNT:-30} + + +Cmd="get_retrofit_dates ${Source} /works/cvtt/md_archive/crypto/${SourceHost} ${FileGlob} ${BackDaysCount}" +echo $Cmd + +DATES=$($Cmd) + +for dt in ${DATES}; do + echo $dt +done diff --git a/healthcheck/md/crypto_sim_missing_dates.sh b/healthcheck/md/crypto_sim_missing_dates.sh new file mode 100755 index 0000000..ce26a3e --- /dev/null +++ b/healthcheck/md/crypto_sim_missing_dates.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# DAILY_SIM_CRYPTO_DATE=20250725 /usr/bin/docker compose -f /works/docker/daily_mktdata/docker-compose.yml up daily_sim_crypto +# +# +source $(realpath $(dirname $0))/get_retrofit_dates.sh + +DEFAULT_FGLOB=*.crypto_sim_md.db.gz + +Source=cvtt@${MD_HOST:-cloud21.cvtt.vpn} +FileGlob=${FILE_GLOB:-${DEFAULT_FGLOB}} +BackDaysCount=${BACKDAYS_COUNT:-30} + +Cmd="get_retrofit_dates ${Source} /works/cvtt/md_archive/crypto/sim ${FileGlob} ${BackDaysCount}" +echo $Cmd + +DATES=$($Cmd) + +for dt in ${DATES}; do + echo $dt +done + diff --git a/healthcheck/md/equity_missing_dates.sh b/healthcheck/md/equity_missing_dates.sh new file mode 100755 index 0000000..79cf9f2 --- /dev/null +++ b/healthcheck/md/equity_missing_dates.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# EQUITY_HBAR_DATE=2025-07-23 /usr/bin/docker compose -f /works/docker/daily_mktdata/docker-compose.yml up daily_equity_hbar + +function_file=$(realpath $(dirname $0))/get_retrofit_dates.sh +source ${function_file} + +DEFAULT_FGLOB=*.alpaca_1m_bars.db.gz + +Source=cvtt@${MD_HOST:-cloud21.cvtt.vpn} +FileGlob=${FILE_GLOB:-${DEFAULT_FGLOB}} +BackDaysCount=${BACKDAYS_COUNT:-50} + +Cmd="get_equity_retrofit_dates ${Source} /works/cvtt/md_archive/equity/alpaca_md/2025/N/NVDA ${FileGlob} ${BackDaysCount}" +echo $Cmd + +DATES=$($Cmd) + +for dt in ${DATES}; do + echo $dt +done diff --git a/healthcheck/md/equity_sim_missing_dates.sh b/healthcheck/md/equity_sim_missing_dates.sh new file mode 100755 index 0000000..b573c1e --- /dev/null +++ b/healthcheck/md/equity_sim_missing_dates.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# +# DAILY_SIM_EQUITY_DATE=20250723 /usr/bin/docker compose -f /works/docker/daily_mktdata/docker-compose.yml up daily_sim_equity +# +source $(realpath $(dirname $0))/get_retrofit_dates.sh + +DEFAULT_FGLOB=*.alpaca_sim_md.db.gz + +Source=cvtt@${MD_HOST:-cloud21.cvtt.vpn} +FileGlob=${FILE_GLOB:-${DEFAULT_FGLOB}} +BackDaysCount=${BACKDAYS_COUNT:-50} + +Cmd="get_retrofit_dates ${Source} /works/cvtt/md_archive/equity/alpaca_md/sim ${FileGlob} ${BackDaysCount}" +echo $Cmd + +DATES=$($Cmd) + +for dt in ${DATES}; do + weekday=$(date -d "$dt" +%u) + (( weekday >= 6 )) && continue + + fdt=$(date -d "$dt" +%Y-%m-%d 2>/dev/null) + echo $fdt +done diff --git a/healthcheck/md/get_retrofit_dates.sh b/healthcheck/md/get_retrofit_dates.sh new file mode 100755 index 0000000..cf3452b --- /dev/null +++ b/healthcheck/md/get_retrofit_dates.sh @@ -0,0 +1,122 @@ +#!/bin/bash + +# Provides the get_retrofit_dates helper for identifying missing market-data archives on a remote host. +get_retrofit_dates() { + local host=${1:-} + local root_dir=${2:-} + local filename_glob=${3:-} + local day_count=${4:-} + + if [[ -z "$host" || -z "$root_dir" || -z "$filename_glob" || -z "$day_count" ]]; then + echo "usage: get_retrofit_dates " >&2 + return 1 + fi + + if ! [[ "$day_count" =~ ^[0-9]+$ ]] || (( day_count <= 0 )); then + echo "get_retrofit_dates: must be a positive integer" >&2 + return 1 + fi + + local -a target_dates=() + declare -A month_dirs=() + + if ! date -d "1 day ago" +%Y%m%d >/dev/null 2>&1; then + echo "get_retrofit_dates: requires GNU date arithmetic" >&2 + return 1 + fi + + local offset date + for (( offset = 1; offset <= day_count; offset++ )); do + date=$(date -d "$offset day ago" +%Y%m%d) + target_dates+=("$date") + month_dirs["${date:0:4}/${date:4:2}"]=1 + done + + declare -A existing_dates=() + + local dir remote_path remote_cmd remote_output entry + for dir in "${!month_dirs[@]}"; do + remote_path="$root_dir/$dir" + printf -v remote_cmd "cd %q 2>/dev/null && LC_ALL=C ls -1" "$remote_path" + remote_output=$(ssh "$host" "$remote_cmd" 2>/dev/null || true) + + if [[ -n "$remote_output" ]]; then + while IFS= read -r entry; do + [[ -z "$entry" ]] && continue + [[ $entry == $filename_glob ]] || continue + if [[ "$entry" =~ ^([0-9]{8}) ]]; then + existing_dates["${BASH_REMATCH[1]}"]=1 + fi + done <<<"$remote_output" + fi + done + + local -a missing_dates=() + local idx + for (( idx = ${#target_dates[@]} - 1; idx >= 0; idx-- )); do + local dt=${target_dates[$idx]} + [[ -n "${existing_dates[$dt]:-}" ]] || missing_dates+=("$dt") + done + + printf '%s\n' "${missing_dates[@]}" +} + +get_equity_retrofit_dates() { + local host=${1:-} + local root_dir=${2:-} + local filename_glob=${3:-} + local day_count=${4:-} + + if [[ -z "$host" || -z "$root_dir" || -z "$filename_glob" || -z "$day_count" ]]; then + echo "usage: get_equity_retrofit_dates " >&2 + return 1 + fi + + if ! [[ "$day_count" =~ ^[0-9]+$ ]] || (( day_count <= 0 )); then + echo "get_equity_retrofit_dates: must be a positive integer" >&2 + return 1 + fi + + if ! date -d "1 day ago" +%Y%m%d >/dev/null 2>&1; then + echo "get_equity_retrofit_dates: requires GNU date arithmetic" >&2 + return 1 + fi + + local -a target_dates=() + local offset date weekday + for (( offset = 1; offset <= day_count; offset++ )); do + date=$(date -d "$offset day ago" +%Y%m%d) + weekday=$(date -d "$date" +%u) + (( weekday >= 6 )) && continue + target_dates+=("$date") + done + + declare -A existing_dates=() + + local remote_cmd remote_output entry + printf -v remote_cmd "cd %q 2>/dev/null && LC_ALL=C ls -1 %s" "$root_dir" "$filename_glob" + remote_output=$(ssh "$host" "$remote_cmd" 2>/dev/null || true) + + if [[ -n "$remote_output" ]]; then + while IFS= read -r entry; do + [[ -z "$entry" ]] && continue + if [[ "$entry" =~ ^([0-9]{8}) ]]; then + existing_dates["${BASH_REMATCH[1]}"]=1 + fi + done <<<"$remote_output" + fi + + local -a missing_dates=() + local idx dt tmp + for (( idx = ${#target_dates[@]} - 1; idx >= 0; idx-- )); do + dt=${target_dates[$idx]} + [[ -n "${existing_dates[$dt]:-}" ]] && continue + if ! tmp=$(date -d "$dt" +%Y-%m-%d 2>/dev/null); then + echo "invalid date \"$dt\"" >&2 + return 1 + fi + missing_dates+=("$tmp") + done + + printf '%s\n' "${missing_dates[@]}" +} diff --git a/healthcheck/md/md_checklist.sh b/healthcheck/md/md_checklist.sh index 02f5091..30b7a93 100755 --- a/healthcheck/md/md_checklist.sh +++ b/healthcheck/md/md_checklist.sh @@ -19,6 +19,7 @@ Sender=${RootDir}/ops/utils/send_mmost.sh # ----- For DEBUGGING # Sender=cat # StatusChannel= +SDir=$(realpath $(dirname $0)) run_crypto_checklist() { # wrap Markdown @@ -30,12 +31,12 @@ run_crypto_checklist() { declare -A Commands Commands=( - ["hs01:cloud29"]="ssh cvtt@hs01.cvtt.vpn ls -l /works/cvtt/md_archive/crypto/cloud29/${yr}/${mn} | tail -5" - ["hs01:cloud28"]="ssh cvtt@hs01.cvtt.vpn ls -l /works/cvtt/md_archive/crypto/cloud28/${yr}/${mn} | tail -5" - ["hs01:sim"]="ssh cvtt@hs01.cvtt.vpn ls -l /works/cvtt/md_archive/crypto/sim/${yr}/${mn} | tail -5" - ["cloud21:cloud29"]="ssh cvtt@cloud21.cvtt.vpn ls -l /works/cvtt/md_archive/crypto/cloud29/${yr}/${mn} | tail -5" - ["cloud21:cloud28"]="ssh cvtt@cloud21.cvtt.vpn ls -l /works/cvtt/md_archive/crypto/cloud28/${yr}/${mn} | tail -5" - ["cloud21:sim"]="ssh cvtt@cloud21.cvtt.vpn ls -l /works/cvtt/md_archive/crypto/sim/${yr}/${mn} | tail -5" + ["crypto:missing-dates:hs01:cloud28"]="MD_HOST=hs01.cvtt.vpn ${SDir}/crypto_missing_dates.sh cloud28" + ["crypto:missing-dates:hs01:cloud29"]="MD_HOST=hs01.cvtt.vpn ${SDir}/crypto_missing_dates.sh cloud29" + ["crypto:missing-dates:hs01:sim"]="MD_HOST=hs01.cvtt.vpn ${SDir}/crypto_sim_missing_dates.sh" + ["crypto:missing-dates:cloud21:cloud28"]="${SDir}/crypto_missing_dates.sh cloud28" + ["crypto:missing-dates:cloud21:cloud29"]="${SDir}/crypto_missing_dates.sh cloud29" + ["crypto:missing-dates:cloud21:sim"]="${SDir}/crypto_sim_missing_dates.sh" ) for name in $(printf "%s\n" "${!Commands[@]}" | sort); do @@ -58,22 +59,13 @@ run_eqt_checklist() { yr=$(date -d 'yesterday' '+%Y') mn=$(date -d 'yesterday' '+%m') - CheckSymbols="A/AAPL N/NVDA M/META" declare -A Commands - Commands["hs01"]="" - for sym in ${CheckSymbols}; do - Commands["hs01"]+="ssh cvtt@hs01.cvtt.vpn ls -l /works/cvtt/md_archive/equity/alpaca_md/${yr}/${sym} | tail -3; " - done - Commands["hs01:sim"]="ssh cvtt@hs01.cvtt.vpn ls -l /works/cvtt/md_archive/equity/alpaca_md/sim/${yr}/${mn} | tail -5" - - Commands["cloud21"]="" - for sym in ${CheckSymbols}; do - Commands["cloud21"]+="ssh cvtt@cloud21.cvtt.vpn ls -l /works/cvtt/md_archive/equity/alpaca_md/${yr}/${sym} | tail -3; " - done - Commands["cloud21:sim"]="ssh cvtt@cloud21.cvtt.vpn ls -l /works/cvtt/md_archive/equity/alpaca_md/sim/${yr}/${mn} | tail -5" - - Commands["cvtt-dev-server"]="ssh cvtt@cvtt-dev-server.cvtt.vpn 'ls -ltr /works/data/equity/*db* | tail -10'" - + Commands=( + ["equity:missing-dates:cloud21"]="${SDir}/equity_missing_dates.sh" + ["equity:missing-dates:cloud21:sim"]="${SDir}/equity_sim_missing_dates.sh" + ["equity:missing-dates:hs01"]="MD_HOST=hs01.cvtt.vpn ${SDir}/equity_missing_dates.sh" + ["equity:missing-dates:hs01:sim"]="MD_HOST=hs01.cvtt.vpn ${SDir}/equity_sim_missing_dates.sh" + ) for name in $(printf "%s\n" "${!Commands[@]}" | sort); do echo "------- ${name}"