feat: add production deploy health check script

Monitors porwoll.de and blogwoman.de for stale builds caused by
Plesk Git's silent deploy failures (code pulled but build never ran).

- Compares source file timestamps vs .next/BUILD_ID mtime
- HTTP health checks via public URLs
- --alert flag for CMS monitoring API integration
- Runs as cron job every 30 minutes on sv-payload

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Martin Porwoll 2026-02-17 11:42:45 +00:00
parent 063dae411c
commit 4d1456fe98
2 changed files with 170 additions and 0 deletions

View file

@ -0,0 +1,169 @@
#!/bin/bash
# check-production-deploys.sh
#
# Checks if production frontend builds are fresh (not stale).
# Detects the Plesk Git silent failure: code pulled but build never ran.
#
# Usage:
# ./scripts/check-production-deploys.sh # Check all sites
# ./scripts/check-production-deploys.sh --alert # Check + send alert via CMS API on failure
#
# Exit codes:
# 0 = all builds fresh
# 1 = at least one stale build detected
set -euo pipefail
STALE_THRESHOLD_MINUTES=30
ALERT_MODE=false
ERRORS=()
if [[ "${1:-}" == "--alert" ]]; then
ALERT_MODE=true
fi
# Sites to check: name|ssh-host|project-path|public-url
declare -A SITES=(
["porwoll.de"]="hetzner2|/var/www/vhosts/porwoll.de/httpdocs|https://porwoll.de"
["blogwoman.de"]="hetzner1|/var/www/vhosts/blogwoman.de/httpdocs|https://blogwoman.de"
)
check_site() {
local site_name="$1"
local ssh_host="$2"
local project_path="$3"
local public_url="$4"
echo "Checking ${site_name} (${ssh_host})..."
# Get git HEAD commit timestamp and BUILD_ID mtime
local result
result=$(ssh -o ConnectTimeout=10 "${ssh_host}" "
cd '${project_path}' 2>/dev/null || { echo 'ERROR:path_not_found'; exit 0; }
# Use package.json mtime as proxy for last code update
# (avoids git dubious ownership issues on Plesk)
PKG_TS=\$(stat -c %Y package.json 2>/dev/null || echo 0)
# Also check the most recently modified source file
SRC_TS=\$(find src/ -name '*.tsx' -o -name '*.ts' 2>/dev/null | xargs stat -c %Y 2>/dev/null | sort -rn | head -1)
SRC_TS=\${SRC_TS:-0}
# Use the newer of the two as 'code timestamp'
CODE_TS=\$((PKG_TS > SRC_TS ? PKG_TS : SRC_TS))
# BUILD_ID mtime (epoch)
BUILD_TS=\$(stat -c %Y .next/BUILD_ID 2>/dev/null || echo 0)
# Last successful deploy marker
DEPLOY_FILE=''
if [ -f .last-successful-deploy ]; then
DEPLOY_FILE=\$(cat .last-successful-deploy)
fi
# Current build ID
BUILD_ID=\$(cat .next/BUILD_ID 2>/dev/null || echo 'unknown')
echo \"CODE_TS:\${CODE_TS}|BUILD_TS:\${BUILD_TS}|DEPLOY_FILE:\${DEPLOY_FILE}|BUILD_ID:\${BUILD_ID}\"
" 2>/dev/null) || { echo " SSH connection failed"; ERRORS+=("${site_name}: SSH connection failed"); return; }
if [[ "$result" == "ERROR:path_not_found" ]]; then
echo " Project path not found"
ERRORS+=("${site_name}: project path not found")
return
fi
# HTTP check via public URL (separate from SSH)
local http_code
http_code=$(curl -so /dev/null -w '%{http_code}' --max-time 10 "${public_url}/" 2>/dev/null || echo "000")
# Parse result
local code_ts build_ts deploy_file build_id
code_ts=$(echo "$result" | grep -oP 'CODE_TS:\K[0-9]+' || echo 0)
build_ts=$(echo "$result" | grep -oP 'BUILD_TS:\K[0-9]+' || echo 0)
deploy_file=$(echo "$result" | grep -oP 'DEPLOY_FILE:\K[^\|]*' || echo "")
build_id=$(echo "$result" | grep -oP 'BUILD_ID:\K[^\|]+' || echo "unknown")
local now
now=$(date +%s)
local code_age_min=$(( (now - code_ts) / 60 ))
local build_age_min=$(( (now - build_ts) / 60 ))
local drift_min=$(( (code_ts - build_ts) / 60 ))
echo " Code updated: $(date -d @${code_ts} '+%Y-%m-%d %H:%M' 2>/dev/null || echo 'unknown') (${code_age_min}m ago)"
echo " Build: $(date -d @${build_ts} '+%Y-%m-%d %H:%M' 2>/dev/null || echo 'unknown') (${build_age_min}m ago)"
echo " Build ID: ${build_id}"
echo " Last deploy: ${deploy_file:-never}"
echo " HTTP status: ${http_code}"
# Check: is the build stale?
if [[ $drift_min -gt $STALE_THRESHOLD_MINUTES ]]; then
echo " STALE BUILD: code is ${drift_min}m newer than build"
ERRORS+=("${site_name}: stale build — code ${drift_min}m ahead of build (threshold: ${STALE_THRESHOLD_MINUTES}m)")
else
echo " Build freshness: OK (drift: ${drift_min}m)"
fi
# Check: is the site responding?
if [[ "$http_code" != "200" && "$http_code" != "304" ]]; then
echo " SITE DOWN: HTTP ${http_code}"
ERRORS+=("${site_name}: HTTP ${http_code}")
fi
echo ""
}
send_alert() {
local message="$1"
local cms_url="${PAYLOAD_URL:-https://cms.c2sgmbh.de}"
local cron_secret="${CRON_SECRET:-}"
if [[ -z "$cron_secret" ]]; then
# Try loading from .env
if [[ -f /home/payload/payload-cms/.env ]]; then
cron_secret=$(grep '^CRON_SECRET=' /home/payload/payload-cms/.env | cut -d= -f2)
fi
fi
if [[ -z "$cron_secret" ]]; then
echo "WARNING: No CRON_SECRET available, cannot send alert via API"
return
fi
echo "Sending alert via CMS API..."
curl -s -X POST "${cms_url}/api/monitoring/alert" \
-H "Authorization: Bearer ${cron_secret}" \
-H "Content-Type: application/json" \
-d "{\"source\":\"deploy-monitor\",\"severity\":\"error\",\"message\":$(echo "$message" | python3 -c 'import sys,json; print(json.dumps(sys.stdin.read().strip()))')}" \
--max-time 10 2>/dev/null || echo "Alert API call failed"
}
# Main
echo "=== Production Deploy Health Check ==="
echo "Threshold: ${STALE_THRESHOLD_MINUTES} minutes"
echo ""
for site_name in "${!SITES[@]}"; do
IFS='|' read -r ssh_host project_path public_url <<< "${SITES[$site_name]}"
check_site "$site_name" "$ssh_host" "$project_path" "$public_url"
done
if [[ ${#ERRORS[@]} -gt 0 ]]; then
echo "=== PROBLEMS DETECTED ==="
for err in "${ERRORS[@]}"; do
echo " - $err"
done
if $ALERT_MODE; then
alert_msg="Production Deploy Check Failed:\n"
for err in "${ERRORS[@]}"; do
alert_msg+="- ${err}\n"
done
send_alert "$alert_msg"
fi
exit 1
else
echo "=== ALL OK ==="
exit 0
fi

View file

@ -81,6 +81,7 @@ IGNORE_FILES=(
'\.test\.ts$'
'db-direct\.sh$' # Uses get_password() function for secure password input
'setup-tenants/setup\.sh$' # Uses environment variables, not hardcoded secrets
'check-production-deploys\.sh$' # References env var name CRON_SECRET, not actual secret
)
# Pfade die ignoriert werden sollen