#!/bin/bash # check-production-deploys.sh # # Checks if production frontend builds are fresh (not stale). # Detects the Plesk Git silent failure: code pulled but build never ran. # # Usage: # ./scripts/check-production-deploys.sh # Check all sites # ./scripts/check-production-deploys.sh --alert # Check + send alert via CMS API on failure # # Exit codes: # 0 = all builds fresh # 1 = at least one stale build detected set -euo pipefail STALE_THRESHOLD_MINUTES=30 ALERT_MODE=false ERRORS=() if [[ "${1:-}" == "--alert" ]]; then ALERT_MODE=true fi # Sites to check: name|ssh-host|project-path|public-url declare -A SITES=( ["porwoll.de"]="hetzner2|/var/www/vhosts/porwoll.de/httpdocs|https://porwoll.de" ["blogwoman.de"]="hetzner1|/var/www/vhosts/blogwoman.de/httpdocs|https://blogwoman.de" ) check_site() { local site_name="$1" local ssh_host="$2" local project_path="$3" local public_url="$4" echo "Checking ${site_name} (${ssh_host})..." # Get git HEAD commit timestamp and BUILD_ID mtime local result result=$(ssh -o ConnectTimeout=10 "${ssh_host}" " cd '${project_path}' 2>/dev/null || { echo 'ERROR:path_not_found'; exit 0; } # Use package.json mtime as proxy for last code update # (avoids git dubious ownership issues on Plesk) PKG_TS=\$(stat -c %Y package.json 2>/dev/null || echo 0) # Also check the most recently modified source file SRC_TS=\$(find src/ -name '*.tsx' -o -name '*.ts' 2>/dev/null | xargs stat -c %Y 2>/dev/null | sort -rn | head -1) SRC_TS=\${SRC_TS:-0} # Use the newer of the two as 'code timestamp' CODE_TS=\$((PKG_TS > SRC_TS ? PKG_TS : SRC_TS)) # BUILD_ID mtime (epoch) BUILD_TS=\$(stat -c %Y .next/BUILD_ID 2>/dev/null || echo 0) # Last successful deploy marker DEPLOY_FILE='' if [ -f .last-successful-deploy ]; then DEPLOY_FILE=\$(cat .last-successful-deploy) fi # Current build ID BUILD_ID=\$(cat .next/BUILD_ID 2>/dev/null || echo 'unknown') echo \"CODE_TS:\${CODE_TS}|BUILD_TS:\${BUILD_TS}|DEPLOY_FILE:\${DEPLOY_FILE}|BUILD_ID:\${BUILD_ID}\" " 2>/dev/null) || { echo " SSH connection failed"; ERRORS+=("${site_name}: SSH connection failed"); return; } if [[ "$result" == "ERROR:path_not_found" ]]; then echo " Project path not found" ERRORS+=("${site_name}: project path not found") return fi # HTTP check via public URL (separate from SSH) local http_code http_code=$(curl -so /dev/null -w '%{http_code}' --max-time 10 "${public_url}/" 2>/dev/null || echo "000") # Parse result local code_ts build_ts deploy_file build_id code_ts=$(echo "$result" | grep -oP 'CODE_TS:\K[0-9]+' || echo 0) build_ts=$(echo "$result" | grep -oP 'BUILD_TS:\K[0-9]+' || echo 0) deploy_file=$(echo "$result" | grep -oP 'DEPLOY_FILE:\K[^\|]*' || echo "") build_id=$(echo "$result" | grep -oP 'BUILD_ID:\K[^\|]+' || echo "unknown") local now now=$(date +%s) local code_age_min=$(( (now - code_ts) / 60 )) local build_age_min=$(( (now - build_ts) / 60 )) local drift_min=$(( (code_ts - build_ts) / 60 )) echo " Code updated: $(date -d @${code_ts} '+%Y-%m-%d %H:%M' 2>/dev/null || echo 'unknown') (${code_age_min}m ago)" echo " Build: $(date -d @${build_ts} '+%Y-%m-%d %H:%M' 2>/dev/null || echo 'unknown') (${build_age_min}m ago)" echo " Build ID: ${build_id}" echo " Last deploy: ${deploy_file:-never}" echo " HTTP status: ${http_code}" # Check: is the build stale? if [[ $drift_min -gt $STALE_THRESHOLD_MINUTES ]]; then echo " STALE BUILD: code is ${drift_min}m newer than build" ERRORS+=("${site_name}: stale build — code ${drift_min}m ahead of build (threshold: ${STALE_THRESHOLD_MINUTES}m)") else echo " Build freshness: OK (drift: ${drift_min}m)" fi # Check: is the site responding? if [[ "$http_code" != "200" && "$http_code" != "304" ]]; then echo " SITE DOWN: HTTP ${http_code}" ERRORS+=("${site_name}: HTTP ${http_code}") fi echo "" } send_alert() { local message="$1" local cms_url="${PAYLOAD_URL:-https://cms.c2sgmbh.de}" local cron_secret="${CRON_SECRET:-}" if [[ -z "$cron_secret" ]]; then # Try loading from .env if [[ -f /home/payload/payload-cms/.env ]]; then cron_secret=$(grep '^CRON_SECRET=' /home/payload/payload-cms/.env | cut -d= -f2) fi fi if [[ -z "$cron_secret" ]]; then echo "WARNING: No CRON_SECRET available, cannot send alert via API" return fi echo "Sending alert via CMS API..." curl -s -X POST "${cms_url}/api/monitoring/alert" \ -H "Authorization: Bearer ${cron_secret}" \ -H "Content-Type: application/json" \ -d "{\"source\":\"deploy-monitor\",\"severity\":\"error\",\"message\":$(echo "$message" | python3 -c 'import sys,json; print(json.dumps(sys.stdin.read().strip()))')}" \ --max-time 10 2>/dev/null || echo "Alert API call failed" } # Main echo "=== Production Deploy Health Check ===" echo "Threshold: ${STALE_THRESHOLD_MINUTES} minutes" echo "" for site_name in "${!SITES[@]}"; do IFS='|' read -r ssh_host project_path public_url <<< "${SITES[$site_name]}" check_site "$site_name" "$ssh_host" "$project_path" "$public_url" done if [[ ${#ERRORS[@]} -gt 0 ]]; then echo "=== PROBLEMS DETECTED ===" for err in "${ERRORS[@]}"; do echo " - $err" done if $ALERT_MODE; then alert_msg="Production Deploy Check Failed:\n" for err in "${ERRORS[@]}"; do alert_msg+="- ${err}\n" done send_alert "$alert_msg" fi exit 1 else echo "=== ALL OK ===" exit 0 fi