From 037835d1de8f1819a529edee60e7699ef35284a9 Mon Sep 17 00:00:00 2001 From: Martin Porwoll Date: Tue, 17 Feb 2026 11:58:08 +0000 Subject: [PATCH] fix(ci): increase build heap size and format monitoring files Build was OOM-ing in CI with default Node heap limit. Added NODE_OPTIONS with 4GB heap. Also ran Prettier on monitoring files. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/ci.yml | 1 + src/lib/monitoring/alert-evaluator.ts | 132 +++---- src/lib/monitoring/monitoring-logger.ts | 83 ++--- src/lib/monitoring/monitoring-service.ts | 430 +++++++++++----------- src/lib/monitoring/performance-tracker.ts | 70 ++-- src/lib/monitoring/snapshot-collector.ts | 50 +-- src/lib/monitoring/types.ts | 225 ++++++----- 7 files changed, 481 insertions(+), 510 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0816464..1c5f32c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -212,6 +212,7 @@ jobs: - name: Build application run: pnpm build env: + NODE_OPTIONS: '--max-old-space-size=4096' # Minimal env vars for build PAYLOAD_SECRET: build-secret-placeholder DATABASE_URI: postgresql://placeholder:placeholder@localhost:5432/placeholder diff --git a/src/lib/monitoring/alert-evaluator.ts b/src/lib/monitoring/alert-evaluator.ts index f13e857..bdcccf7 100644 --- a/src/lib/monitoring/alert-evaluator.ts +++ b/src/lib/monitoring/alert-evaluator.ts @@ -6,8 +6,8 @@ * cooldown periods, and multi-channel alert dispatch. */ -import type { Payload } from 'payload' -import type { AlertCondition, AlertSeverity, SystemMetrics } from './types' +import type { Payload } from "payload"; +import type { AlertCondition, AlertSeverity, SystemMetrics } from "./types"; // ============================================================================ // Pure Functions @@ -18,40 +18,36 @@ import type { AlertCondition, AlertSeverity, SystemMetrics } from './types' * Example: getMetricValue(metrics, 'system.cpuUsagePercent') => 92 */ export function getMetricValue(metrics: Record, path: string): number | undefined { - const parts = path.split('.') - let current: unknown = metrics + const parts = path.split("."); + let current: unknown = metrics; for (const part of parts) { - if (current === null || current === undefined || typeof current !== 'object') { - return undefined + if (current === null || current === undefined || typeof current !== "object") { + return undefined; } - current = (current as Record)[part] + current = (current as Record)[part]; } - return typeof current === 'number' ? current : undefined + return typeof current === "number" ? current : undefined; } /** * Evaluates a condition against a value and threshold. */ -export function evaluateCondition( - condition: AlertCondition, - value: number, - threshold: number, -): boolean { +export function evaluateCondition(condition: AlertCondition, value: number, threshold: number): boolean { switch (condition) { - case 'gt': - return value > threshold - case 'lt': - return value < threshold - case 'eq': - return value === threshold - case 'gte': - return value >= threshold - case 'lte': - return value <= threshold + case "gt": + return value > threshold; + case "lt": + return value < threshold; + case "eq": + return value === threshold; + case "gte": + return value >= threshold; + case "lte": + return value <= threshold; default: - return false + return false; } } @@ -60,28 +56,28 @@ export function evaluateCondition( // ============================================================================ interface AlertRule { - id: number - name: string - metric: string - condition: AlertCondition - threshold: number - severity: AlertSeverity - channels: Array<'email' | 'slack' | 'discord'> + id: number; + name: string; + metric: string; + condition: AlertCondition; + threshold: number; + severity: AlertSeverity; + channels: Array<"email" | "slack" | "discord">; recipients?: { - emails?: Array<{ email: string }> - slackWebhook?: string - discordWebhook?: string - } - cooldownMinutes: number - enabled: boolean + emails?: Array<{ email: string }>; + slackWebhook?: string; + discordWebhook?: string; + }; + cooldownMinutes: number; + enabled: boolean; } // Maps AlertSeverity to the AlertLevel expected by alert-service const SEVERITY_TO_LEVEL: Record = { - warning: 'warning', - error: 'error', - critical: 'critical', -} + warning: "warning", + error: "error", + critical: "critical", +}; // ============================================================================ // AlertEvaluator Class @@ -89,58 +85,52 @@ const SEVERITY_TO_LEVEL: Record = { export class AlertEvaluator { /** Tracks last fire time per rule to enforce cooldown */ - private cooldownMap: Map = new Map() + private cooldownMap: Map = new Map(); /** * Returns true if the rule should fire (not in cooldown). */ shouldFire(ruleId: string, cooldownMinutes: number): boolean { - const lastFired = this.cooldownMap.get(ruleId) + const lastFired = this.cooldownMap.get(ruleId); if (lastFired) { - const elapsedMinutes = (Date.now() - lastFired) / 60_000 - if (elapsedMinutes < cooldownMinutes) return false + const elapsedMinutes = (Date.now() - lastFired) / 60_000; + if (elapsedMinutes < cooldownMinutes) return false; } - return true + return true; } /** Record that a rule fired successfully. */ recordFired(ruleId: string): void { - this.cooldownMap.set(ruleId, Date.now()) + this.cooldownMap.set(ruleId, Date.now()); } /** * Evaluates all enabled rules against current metrics. * Fires alerts for rules that match and are not in cooldown. */ - async evaluateRules( - payload: Payload, - metrics: Omit, - ): Promise { + async evaluateRules(payload: Payload, metrics: Omit): Promise { try { const rules = await payload.find({ - collection: 'monitoring-alert-rules', + collection: "monitoring-alert-rules", where: { enabled: { equals: true } }, limit: 100, - }) + }); for (const doc of rules.docs) { - const rule = doc as unknown as AlertRule - const value = getMetricValue( - metrics as unknown as Record, - rule.metric, - ) - if (value === undefined) continue + const rule = doc as unknown as AlertRule; + const value = getMetricValue(metrics as unknown as Record, rule.metric); + if (value === undefined) continue; if (evaluateCondition(rule.condition, value, rule.threshold)) { - const ruleKey = String(rule.id) + const ruleKey = String(rule.id); if (this.shouldFire(ruleKey, rule.cooldownMinutes)) { - await this.dispatchAlert(payload, rule, value) - this.recordFired(ruleKey) + await this.dispatchAlert(payload, rule, value); + this.recordFired(ruleKey); } } } } catch (error) { - console.error('[AlertEvaluator] Error evaluating rules:', error) + console.error("[AlertEvaluator] Error evaluating rules:", error); } } @@ -149,11 +139,11 @@ export class AlertEvaluator { * via the existing alert service. */ private async dispatchAlert(payload: Payload, rule: AlertRule, value: number): Promise { - const message = `${rule.name}: ${rule.metric} = ${value} (threshold: ${rule.condition} ${rule.threshold})` + const message = `${rule.name}: ${rule.metric} = ${value} (threshold: ${rule.condition} ${rule.threshold})`; try { await payload.create({ - collection: 'monitoring-alert-history', + collection: "monitoring-alert-history", data: { rule: rule.id, metric: rule.metric, @@ -163,13 +153,13 @@ export class AlertEvaluator { message, channelsSent: rule.channels, }, - }) + }); // Try to send via existing alert service try { - const { sendAlert } = await import('../alerting/alert-service.js') + const { sendAlert } = await import("../alerting/alert-service.js"); await sendAlert(payload, { - level: SEVERITY_TO_LEVEL[rule.severity] as 'warning' | 'error' | 'critical', + level: SEVERITY_TO_LEVEL[rule.severity] as "warning" | "error" | "critical", title: `[${rule.severity.toUpperCase()}] ${rule.name}`, message, details: { @@ -178,13 +168,13 @@ export class AlertEvaluator { threshold: rule.threshold, condition: rule.condition, }, - }) + }); } catch { // Alert service not available, history record is sufficient - console.warn(`[AlertEvaluator] Could not dispatch via alert-service: ${message}`) + console.warn(`[AlertEvaluator] Could not dispatch via alert-service: ${message}`); } } catch (error) { - console.error('[AlertEvaluator] Error dispatching alert:', error) + console.error("[AlertEvaluator] Error dispatching alert:", error); } } } diff --git a/src/lib/monitoring/monitoring-logger.ts b/src/lib/monitoring/monitoring-logger.ts index 2acf1da..1c19dc1 100644 --- a/src/lib/monitoring/monitoring-logger.ts +++ b/src/lib/monitoring/monitoring-logger.ts @@ -5,7 +5,7 @@ * Falls back to console output when Payload is not yet initialized. */ -import type { LogLevel, LogSource } from './types' +import type { LogLevel, LogSource } from "./types"; const LOG_LEVELS: Record = { debug: 0, @@ -13,63 +13,58 @@ const LOG_LEVELS: Record = { warn: 2, error: 3, fatal: 4, -} +}; function getMinLevel(): LogLevel { - return (process.env.MONITORING_LOG_LEVEL as LogLevel) || 'info' + return (process.env.MONITORING_LOG_LEVEL as LogLevel) || "info"; } function shouldLog(level: LogLevel): boolean { - return LOG_LEVELS[level] >= LOG_LEVELS[getMinLevel()] + return LOG_LEVELS[level] >= LOG_LEVELS[getMinLevel()]; } export interface LogContext { - requestId?: string - userId?: number - tenant?: number - duration?: number - [key: string]: unknown + requestId?: string; + userId?: number; + tenant?: number; + duration?: number; + [key: string]: unknown; } export interface MonitoringLoggerInstance { - debug(message: string, context?: LogContext): void - info(message: string, context?: LogContext): void - warn(message: string, context?: LogContext): void - error(message: string, context?: LogContext): void - fatal(message: string, context?: LogContext): void + debug(message: string, context?: LogContext): void; + info(message: string, context?: LogContext): void; + warn(message: string, context?: LogContext): void; + error(message: string, context?: LogContext): void; + fatal(message: string, context?: LogContext): void; } /** Cached Payload instance — resolved once, reused for all subsequent writes. */ -let cachedPayload: any = null +let cachedPayload: any = null; async function getPayloadInstance(): Promise { - if (cachedPayload) return cachedPayload - const { getPayload } = await import('payload') - const config = (await import(/* @vite-ignore */ '@payload-config')).default - cachedPayload = await getPayload({ config }) - return cachedPayload + if (cachedPayload) return cachedPayload; + const { getPayload } = await import("payload"); + const config = (await import(/* @vite-ignore */ "@payload-config")).default; + cachedPayload = await getPayload({ config }); + return cachedPayload; } /** Reset cached instance (used in tests). */ export function _resetPayloadCache(): void { - cachedPayload = null + cachedPayload = null; } -async function writeLog( - source: LogSource, - level: LogLevel, - message: string, - context?: LogContext, -): Promise { - if (!shouldLog(level)) return +async function writeLog(source: LogSource, level: LogLevel, message: string, context?: LogContext): Promise { + if (!shouldLog(level)) return; try { - const payload = await getPayloadInstance() + const payload = await getPayloadInstance(); - const { requestId, userId, tenant, duration, ...rest } = context || {} + const { requestId, userId, tenant, duration, ...rest } = context || {}; await payload.create({ - collection: 'monitoring-logs', + collection: "monitoring-logs", data: { level, source, @@ -80,12 +75,12 @@ async function writeLog( tenant, duration, }, - }) + }); } catch { // Fallback to console if Payload is not yet initialized - cachedPayload = null - const prefix = `[${source}][${level.toUpperCase()}]` - console.log(prefix, message, context || '') + cachedPayload = null; + const prefix = `[${source}][${level.toUpperCase()}]`; + console.log(prefix, message, context || ""); } } @@ -94,16 +89,16 @@ export function createMonitoringLogger(source: LogSource): MonitoringLoggerInsta return function logMessage(message: string, context?: LogContext): void { // Fire-and-forget -- don't block the caller writeLog(source, level, message, context).catch(function onError(err) { - console.error(`[MonitoringLogger] Failed to write ${level} log:`, err) - }) - } + console.error(`[MonitoringLogger] Failed to write ${level} log:`, err); + }); + }; } return { - debug: log('debug'), - info: log('info'), - warn: log('warn'), - error: log('error'), - fatal: log('fatal'), - } + debug: log("debug"), + info: log("info"), + warn: log("warn"), + error: log("error"), + fatal: log("fatal"), + }; } diff --git a/src/lib/monitoring/monitoring-service.ts b/src/lib/monitoring/monitoring-service.ts index 471d357..47edc7b 100644 --- a/src/lib/monitoring/monitoring-service.ts +++ b/src/lib/monitoring/monitoring-service.ts @@ -5,8 +5,8 @@ * dependency checks. Used by the monitoring dashboard and snapshot collector. */ -import os from 'node:os' -import { execSync } from 'node:child_process' +import os from "node:os"; +import { execSync } from "node:child_process"; import type { SystemHealth, ProcessStatus, @@ -21,9 +21,9 @@ import type { SecurityMetricsStatus, PerformanceMetrics, SystemMetrics, -} from './types' -import { checkSecretsHealth } from '../security/secrets-health' -import { getSecurityMetricsSnapshot } from '../security/security-observability' +} from "./types"; +import { checkSecretsHealth } from "../security/secrets-health"; +import { getSecurityMetricsSnapshot } from "../security/security-observability"; // ============================================================================ // System Health @@ -34,15 +34,15 @@ import { getSecurityMetricsSnapshot } from '../security/security-observability' * CPU usage is calculated by sampling /proc/stat twice with 100ms delay. */ export async function checkSystemHealth(): Promise { - const cpuUsagePercent = await getCpuUsage() + const cpuUsagePercent = await getCpuUsage(); - const memoryTotalMB = Math.round(os.totalmem() / 1024 / 1024) - const memoryUsedMB = Math.round((os.totalmem() - os.freemem()) / 1024 / 1024) - const memoryUsagePercent = roundToOneDecimal((memoryUsedMB / memoryTotalMB) * 100) + const memoryTotalMB = Math.round(os.totalmem() / 1024 / 1024); + const memoryUsedMB = Math.round((os.totalmem() - os.freemem()) / 1024 / 1024); + const memoryUsagePercent = roundToOneDecimal((memoryUsedMB / memoryTotalMB) * 100); - const { diskUsedGB, diskTotalGB, diskUsagePercent } = getDiskUsage() + const { diskUsedGB, diskTotalGB, diskUsagePercent } = getDiskUsage(); - const [loadAvg1, loadAvg5] = os.loadavg() + const [loadAvg1, loadAvg5] = os.loadavg(); return { cpuUsagePercent: roundToOneDecimal(cpuUsagePercent), @@ -55,7 +55,7 @@ export async function checkSystemHealth(): Promise { loadAvg1: roundToTwoDecimals(loadAvg1), loadAvg5: roundToTwoDecimals(loadAvg5), uptime: Math.round(os.uptime()), - } + }; } // ============================================================================ @@ -64,214 +64,212 @@ export async function checkSystemHealth(): Promise { export async function checkRedis(): Promise { const offlineStatus: RedisStatus = { - status: 'offline', + status: "offline", memoryUsedMB: 0, connectedClients: 0, opsPerSec: 0, - } + }; try { - const { getRedisClient } = await import('../redis.js') - const client = getRedisClient() - if (!client) return offlineStatus + const { getRedisClient } = await import("../redis.js"); + const client = getRedisClient(); + if (!client) return offlineStatus; - const info = await client.info() + const info = await client.info(); const getVal = (key: string): number => { - const match = info.match(new RegExp(`${key}:(\\d+)`)) - return match ? parseInt(match[1], 10) : 0 - } + const match = info.match(new RegExp(`${key}:(\\d+)`)); + return match ? parseInt(match[1], 10) : 0; + }; return { - status: 'online', - memoryUsedMB: Math.round(getVal('used_memory') / 1024 / 1024), - connectedClients: getVal('connected_clients'), - opsPerSec: getVal('instantaneous_ops_per_sec'), - } + status: "online", + memoryUsedMB: Math.round(getVal("used_memory") / 1024 / 1024), + connectedClients: getVal("connected_clients"), + opsPerSec: getVal("instantaneous_ops_per_sec"), + }; } catch { - return offlineStatus + return offlineStatus; } } export async function checkPostgresql(): Promise { const offlineStatus: PostgresqlStatus = { - status: 'offline', + status: "offline", connections: 0, maxConnections: 50, latencyMs: -1, - } + }; try { - const { getPayload } = await import('payload') - const payload = await getPayload({ config: (await import('@payload-config')).default }) + const { getPayload } = await import("payload"); + const payload = await getPayload({ config: (await import("@payload-config")).default }); - const start = Date.now() - await payload.find({ collection: 'users', limit: 0 }) - const latencyMs = Date.now() - start + const start = Date.now(); + await payload.find({ collection: "users", limit: 0 }); + const latencyMs = Date.now() - start; - let connections = 0 - let maxConnections = 50 + let connections = 0; + let maxConnections = 50; try { const connResult = runPsql( - '-h 10.10.181.101 -U payload -d payload_db -t -c "SELECT count(*) FROM pg_stat_activity WHERE datname = \'payload_db\'"', - ) - connections = parseInt(connResult.trim(), 10) || 0 + "-h 10.10.181.101 -U payload -d payload_db -t -c \"SELECT count(*) FROM pg_stat_activity WHERE datname = 'payload_db'\"", + ); + connections = parseInt(connResult.trim(), 10) || 0; - const maxResult = runPsql( - '-h 10.10.181.101 -U payload -d payload_db -t -c "SHOW max_connections"', - ) - maxConnections = parseInt(maxResult.trim(), 10) || 50 + const maxResult = runPsql('-h 10.10.181.101 -U payload -d payload_db -t -c "SHOW max_connections"'); + maxConnections = parseInt(maxResult.trim(), 10) || 50; } catch { // psql unavailable -- latency check already proves connectivity } return { - status: latencyMs < 1000 ? 'online' : 'warning', + status: latencyMs < 1000 ? "online" : "warning", connections, maxConnections, latencyMs, - } + }; } catch { - return offlineStatus + return offlineStatus; } } export async function checkPgBouncer(): Promise { const offlineStatus: PgBouncerStatus = { - status: 'offline', + status: "offline", activeConnections: 0, waitingClients: 0, poolSize: 0, - } + }; try { - const output = runPsql('-h 127.0.0.1 -p 6432 -U payload -d pgbouncer -t -c "SHOW POOLS"') + const output = runPsql('-h 127.0.0.1 -p 6432 -U payload -d pgbouncer -t -c "SHOW POOLS"'); // SHOW POOLS columns: database | user | cl_active | cl_waiting | sv_active | sv_idle | pool_size | ... const lines = output .trim() - .split('\n') - .filter((l) => l.includes('payload')) + .split("\n") + .filter((l) => l.includes("payload")); - let activeConnections = 0 - let waitingClients = 0 - let poolSize = 20 + let activeConnections = 0; + let waitingClients = 0; + let poolSize = 20; for (const line of lines) { - const parts = line.split('|').map((s) => s.trim()) - activeConnections += parseInt(parts[2], 10) || 0 - waitingClients += parseInt(parts[3], 10) || 0 - poolSize = parseInt(parts[6], 10) || 20 + const parts = line.split("|").map((s) => s.trim()); + activeConnections += parseInt(parts[2], 10) || 0; + waitingClients += parseInt(parts[3], 10) || 0; + poolSize = parseInt(parts[6], 10) || 20; } - return { status: 'online', activeConnections, waitingClients, poolSize } + return { status: "online", activeConnections, waitingClients, poolSize }; } catch { - return offlineStatus + return offlineStatus; } } export interface QueueCounts { - waiting: number - active: number - completed: number - failed: number + waiting: number; + active: number; + completed: number; + failed: number; } export async function checkQueues(): Promise> { try { - const { Queue } = await import('bullmq') - const { getQueueRedisConnection } = await import('../queue/queue-service.js') + const { Queue } = await import("bullmq"); + const { getQueueRedisConnection } = await import("../queue/queue-service.js"); - const connection = getQueueRedisConnection() + const connection = getQueueRedisConnection(); // Queue names matching QUEUE_NAMES in queue-service.ts - const queueNames = ['email', 'pdf', 'cleanup', 'youtube-upload'] - const results: Record = {} + const queueNames = ["email", "pdf", "cleanup", "youtube-upload"]; + const results: Record = {}; for (const name of queueNames) { try { - const queue = new Queue(name, { connection }) - const counts = await queue.getJobCounts() + const queue = new Queue(name, { connection }); + const counts = await queue.getJobCounts(); results[name] = { waiting: counts.waiting || 0, active: counts.active || 0, completed: counts.completed || 0, failed: counts.failed || 0, - } - await queue.close() + }; + await queue.close(); } catch { - results[name] = { waiting: 0, active: 0, completed: 0, failed: 0 } + results[name] = { waiting: 0, active: 0, completed: 0, failed: 0 }; } } - return results + return results; } catch { - return {} + return {}; } } export async function checkSmtp(): Promise { - const now = new Date().toISOString() + const now = new Date().toISOString(); try { - const nodemailer = await import('nodemailer') + const nodemailer = await import("nodemailer"); const transporter = nodemailer.createTransport({ host: process.env.SMTP_HOST, - port: parseInt(process.env.SMTP_PORT || '587', 10), - secure: process.env.SMTP_SECURE === 'true', + port: parseInt(process.env.SMTP_PORT || "587", 10), + secure: process.env.SMTP_SECURE === "true", auth: { user: process.env.SMTP_USER, pass: process.env.SMTP_PASS, }, - }) + }); - const start = Date.now() - await transporter.verify() - const responseTimeMs = Date.now() - start + const start = Date.now(); + await transporter.verify(); + const responseTimeMs = Date.now() - start; - return { status: 'online', lastCheck: now, responseTimeMs } + return { status: "online", lastCheck: now, responseTimeMs }; } catch { - return { status: 'offline', lastCheck: now, responseTimeMs: -1 } + return { status: "offline", lastCheck: now, responseTimeMs: -1 }; } } export async function checkOAuthTokens(): Promise<{ - metaOAuth: OAuthTokenStatus - youtubeOAuth: OAuthTokenStatus + metaOAuth: OAuthTokenStatus; + youtubeOAuth: OAuthTokenStatus; }> { const errorStatus: OAuthTokenStatus = { - status: 'error', + status: "error", tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0, - } + }; try { - const { getPayload } = await import('payload') - const payload = await getPayload({ config: (await import('@payload-config')).default }) + const { getPayload } = await import("payload"); + const payload = await getPayload({ config: (await import("@payload-config")).default }); const accounts = await payload.find({ - collection: 'social-accounts', + collection: "social-accounts", limit: 100, - where: { status: { equals: 'connected' } }, - }) + where: { status: { equals: "connected" } }, + }); - const sevenDaysFromNow = new Date() - sevenDaysFromNow.setDate(sevenDaysFromNow.getDate() + 7) - const now = new Date() + const sevenDaysFromNow = new Date(); + sevenDaysFromNow.setDate(sevenDaysFromNow.getDate() + 7); + const now = new Date(); - const meta = { tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0 } - const youtube = { tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0 } + const meta = { tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0 }; + const youtube = { tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0 }; for (const account of accounts.docs) { - const doc = account as unknown as Record - const target = doc.platform === 'youtube' ? youtube : meta - target.tokensTotal++ + const doc = account as unknown as Record; + const target = doc.platform === "youtube" ? youtube : meta; + target.tokensTotal++; - const expiresAt = doc.tokenExpiresAt ? new Date(doc.tokenExpiresAt as string) : null + const expiresAt = doc.tokenExpiresAt ? new Date(doc.tokenExpiresAt as string) : null; if (expiresAt) { if (expiresAt < now) { - target.tokensExpired++ + target.tokensExpired++; } else if (expiresAt < sevenDaysFromNow) { - target.tokensExpiringSoon++ + target.tokensExpiringSoon++; } } } @@ -279,58 +277,55 @@ export async function checkOAuthTokens(): Promise<{ return { metaOAuth: { status: getOAuthStatus(meta), ...meta }, youtubeOAuth: { status: getOAuthStatus(youtube), ...youtube }, - } + }; } catch { - return { metaOAuth: errorStatus, youtubeOAuth: errorStatus } + return { metaOAuth: errorStatus, youtubeOAuth: errorStatus }; } } export async function checkCronJobs(): Promise { - const unknownStatus: CronJobStatus = { lastRun: '', status: 'unknown' } + const unknownStatus: CronJobStatus = { lastRun: "", status: "unknown" }; try { - const { getPayload } = await import('payload') - const payload = await getPayload({ config: (await import('@payload-config')).default }) + const { getPayload } = await import("payload"); + const payload = await getPayload({ config: (await import("@payload-config")).default }); async function checkCron(source: string): Promise { try { const logs = await payload.find({ - collection: 'monitoring-logs', + collection: "monitoring-logs", limit: 1, - sort: '-createdAt', + sort: "-createdAt", where: { - and: [ - { source: { equals: 'cron' } }, - { message: { contains: source } }, - ], + and: [{ source: { equals: "cron" } }, { message: { contains: source } }], }, - }) + }); - if (logs.docs.length === 0) return unknownStatus + if (logs.docs.length === 0) return unknownStatus; - const doc = logs.docs[0] as unknown as Record + const doc = logs.docs[0] as unknown as Record; return { lastRun: doc.createdAt as string, - status: doc.level === 'error' ? 'failed' : 'ok', - } + status: doc.level === "error" ? "failed" : "ok", + }; } catch { - return unknownStatus + return unknownStatus; } } const [communitySync, tokenRefresh, youtubeSync] = await Promise.all([ - checkCron('community-sync'), - checkCron('token-refresh'), - checkCron('youtube'), - ]) + checkCron("community-sync"), + checkCron("token-refresh"), + checkCron("youtube"), + ]); - return { communitySync, tokenRefresh, youtubeSync } + return { communitySync, tokenRefresh, youtubeSync }; } catch { return { communitySync: unknownStatus, tokenRefresh: unknownStatus, youtubeSync: unknownStatus, - } + }; } } @@ -342,18 +337,19 @@ export async function checkCronJobs(): Promise { * Collects all monitoring metrics in parallel. Individual check failures * are isolated and return safe defaults instead of failing the whole collection. */ -export async function collectMetrics(): Promise> { - const [system, redis, postgresql, pgbouncer, smtp, oauth, cronJobs, secrets, securityEvents] = await Promise.allSettled([ - checkSystemHealth(), - checkRedis(), - checkPostgresql(), - checkPgBouncer(), - checkSmtp(), - checkOAuthTokens(), - checkCronJobs(), - Promise.resolve(checkSecretsHealth()), - Promise.resolve(getSecurityMetricsSnapshot()), - ]) +export async function collectMetrics(): Promise> { + const [system, redis, postgresql, pgbouncer, smtp, oauth, cronJobs, secrets, securityEvents] = + await Promise.allSettled([ + checkSystemHealth(), + checkRedis(), + checkPostgresql(), + checkPgBouncer(), + checkSmtp(), + checkOAuthTokens(), + checkCronJobs(), + Promise.resolve(checkSecretsHealth()), + Promise.resolve(getSecurityMetricsSnapshot()), + ]); // Load performance tracker lazily to avoid circular dependencies let performance: PerformanceMetrics = { @@ -362,51 +358,51 @@ export async function collectMetrics(): Promise p99ResponseTimeMs: 0, errorRate: 0, requestsPerMinute: 0, - } + }; try { // Dynamic path constructed at runtime to avoid Vite static analysis // when performance-tracker module has not been created yet - const trackerPath = './performance-tracker' - const mod = await import(/* @vite-ignore */ trackerPath) - performance = mod.performanceTracker.getMetrics('1h') + const trackerPath = "./performance-tracker"; + const mod = await import(/* @vite-ignore */ trackerPath); + performance = mod.performanceTracker.getMetrics("1h"); } catch { // Performance tracker not yet initialized } const defaultProcess: ProcessStatus = { - status: 'offline', + status: "offline", pid: 0, memoryMB: 0, uptimeSeconds: 0, restarts: 0, - } + }; - const { payloadProcess, queueWorkerProcess } = getPm2Processes(defaultProcess) + const { payloadProcess, queueWorkerProcess } = getPm2Processes(defaultProcess); const oauthDefaults = { - metaOAuth: { status: 'error' as const, tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0 }, - youtubeOAuth: { status: 'error' as const, tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0 }, - } + metaOAuth: { status: "error" as const, tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0 }, + youtubeOAuth: { status: "error" as const, tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0 }, + }; const cronDefaults: CronStatuses = { - communitySync: { lastRun: '', status: 'unknown' }, - tokenRefresh: { lastRun: '', status: 'unknown' }, - youtubeSync: { lastRun: '', status: 'unknown' }, - } + communitySync: { lastRun: "", status: "unknown" }, + tokenRefresh: { lastRun: "", status: "unknown" }, + youtubeSync: { lastRun: "", status: "unknown" }, + }; const secretsDefaults: SecretsHealthStatus = { - status: 'critical', + status: "critical", checkedAt: new Date().toISOString(), missing: [], expiringSoon: [], expired: [], rotationOverdue: [], - } + }; const securityEventsDefaults: SecurityMetricsStatus = { windowMs: 300000, counters: [], - } + }; const systemDefaults: SystemHealth = { cpuUsagePercent: 0, @@ -419,21 +415,21 @@ export async function collectMetrics(): Promise loadAvg1: 0, loadAvg5: 0, uptime: 0, - } + }; - const oauthResult = settled(oauth, oauthDefaults) + const oauthResult = settled(oauth, oauthDefaults); return { system: settled(system, systemDefaults), services: { payload: payloadProcess, queueWorker: queueWorkerProcess, - postgresql: settled(postgresql, { status: 'offline', connections: 0, maxConnections: 50, latencyMs: -1 }), - pgbouncer: settled(pgbouncer, { status: 'offline', activeConnections: 0, waitingClients: 0, poolSize: 0 }), - redis: settled(redis, { status: 'offline', memoryUsedMB: 0, connectedClients: 0, opsPerSec: 0 }), + postgresql: settled(postgresql, { status: "offline", connections: 0, maxConnections: 50, latencyMs: -1 }), + pgbouncer: settled(pgbouncer, { status: "offline", activeConnections: 0, waitingClients: 0, poolSize: 0 }), + redis: settled(redis, { status: "offline", memoryUsedMB: 0, connectedClients: 0, opsPerSec: 0 }), }, external: { - smtp: settled(smtp, { status: 'offline', lastCheck: new Date().toISOString(), responseTimeMs: -1 }), + smtp: settled(smtp, { status: "offline", lastCheck: new Date().toISOString(), responseTimeMs: -1 }), metaOAuth: oauthResult.metaOAuth, youtubeOAuth: oauthResult.youtubeOAuth, cronJobs: settled(cronJobs, cronDefaults), @@ -441,7 +437,7 @@ export async function collectMetrics(): Promise securityEvents: settled(securityEvents, securityEventsDefaults), }, performance, - } + }; } // ============================================================================ @@ -454,18 +450,18 @@ export async function collectMetrics(): Promise */ function runPsql(args: string): string { return execSync(`psql ${args}`, { - encoding: 'utf-8', + encoding: "utf-8", timeout: 5000, - env: { ...process.env, PGPASSWORD: process.env.DB_PASSWORD || '' }, - }) + env: { ...process.env, PGPASSWORD: process.env.DB_PASSWORD || "" }, + }); } function roundToOneDecimal(value: number): number { - return Math.round(value * 10) / 10 + return Math.round(value * 10) / 10; } function roundToTwoDecimals(value: number): number { - return Math.round(value * 100) / 100 + return Math.round(value * 100) / 100; } /** @@ -473,99 +469,95 @@ function roundToTwoDecimals(value: number): number { * the fallback when the promise was rejected. */ function settled(result: PromiseSettledResult, fallback: T): T { - return result.status === 'fulfilled' ? result.value : fallback + return result.status === "fulfilled" ? result.value : fallback; } async function getCpuUsage(): Promise { try { - const fs = await import('node:fs/promises') - const stat1 = await fs.readFile('/proc/stat', 'utf-8') - await new Promise((resolve) => setTimeout(resolve, 100)) - const stat2 = await fs.readFile('/proc/stat', 'utf-8') + const fs = await import("node:fs/promises"); + const stat1 = await fs.readFile("/proc/stat", "utf-8"); + await new Promise((resolve) => setTimeout(resolve, 100)); + const stat2 = await fs.readFile("/proc/stat", "utf-8"); const parse = (data: string): { idle: number; total: number } => { - const line = data.split('\n')[0] // first line: cpu user nice system idle ... - const parts = line.split(/\s+/).slice(1).map(Number) - const idle = parts[3] + (parts[4] || 0) // idle + iowait - const total = parts.reduce((a, b) => a + b, 0) - return { idle, total } - } + const line = data.split("\n")[0]; // first line: cpu user nice system idle ... + const parts = line.split(/\s+/).slice(1).map(Number); + const idle = parts[3] + (parts[4] || 0); // idle + iowait + const total = parts.reduce((a, b) => a + b, 0); + return { idle, total }; + }; - const s1 = parse(stat1) - const s2 = parse(stat2) - const idleDiff = s2.idle - s1.idle - const totalDiff = s2.total - s1.total + const s1 = parse(stat1); + const s2 = parse(stat2); + const idleDiff = s2.idle - s1.idle; + const totalDiff = s2.total - s1.total; - if (totalDiff === 0) return 0 - return ((totalDiff - idleDiff) / totalDiff) * 100 + if (totalDiff === 0) return 0; + return ((totalDiff - idleDiff) / totalDiff) * 100; } catch { // Fallback if /proc/stat is unavailable - const cpuCount = os.cpus().length - return (os.loadavg()[0] / cpuCount) * 100 + const cpuCount = os.cpus().length; + return (os.loadavg()[0] / cpuCount) * 100; } } function getDiskUsage(): { diskUsedGB: number; diskTotalGB: number; diskUsagePercent: number } { try { - const output = execSync('df -B1 / | tail -1', { encoding: 'utf-8' }) - const parts = output.trim().split(/\s+/) + const output = execSync("df -B1 / | tail -1", { encoding: "utf-8" }); + const parts = output.trim().split(/\s+/); // Format: filesystem 1B-blocks used available use% mountpoint - const total = parseInt(parts[1], 10) - const used = parseInt(parts[2], 10) + const total = parseInt(parts[1], 10); + const used = parseInt(parts[2], 10); return { diskTotalGB: roundToOneDecimal(total / 1024 / 1024 / 1024), diskUsedGB: roundToOneDecimal(used / 1024 / 1024 / 1024), diskUsagePercent: roundToOneDecimal((used / total) * 100), - } + }; } catch { - return { diskUsedGB: 0, diskTotalGB: 0, diskUsagePercent: 0 } + return { diskUsedGB: 0, diskTotalGB: 0, diskUsagePercent: 0 }; } } -function getOAuthStatus( - counts: { tokensExpired: number; tokensExpiringSoon: number }, -): OAuthTokenStatus['status'] { - if (counts.tokensExpired > 0) return 'expired' - if (counts.tokensExpiringSoon > 0) return 'expiring_soon' - return 'ok' +function getOAuthStatus(counts: { tokensExpired: number; tokensExpiringSoon: number }): OAuthTokenStatus["status"] { + if (counts.tokensExpired > 0) return "expired"; + if (counts.tokensExpiringSoon > 0) return "expiring_soon"; + return "ok"; } interface Pm2Processes { - payloadProcess: ProcessStatus - queueWorkerProcess: ProcessStatus + payloadProcess: ProcessStatus; + queueWorkerProcess: ProcessStatus; } function getPm2Processes(defaultProcess: ProcessStatus): Pm2Processes { - let payloadProcess = defaultProcess - let queueWorkerProcess = defaultProcess + let payloadProcess = defaultProcess; + let queueWorkerProcess = defaultProcess; try { - const pm2Out = execSync('pm2 jlist', { encoding: 'utf-8', timeout: 5000 }) - const pm2List = JSON.parse(pm2Out) as Array> + const pm2Out = execSync("pm2 jlist", { encoding: "utf-8", timeout: 5000 }); + const pm2List = JSON.parse(pm2Out) as Array>; for (const proc of pm2List) { - const env = proc.pm2_env as Record | undefined - const monit = proc.monit as Record | undefined + const env = proc.pm2_env as Record | undefined; + const monit = proc.monit as Record | undefined; const info: ProcessStatus = { - status: env?.status === 'online' ? 'online' : 'offline', + status: env?.status === "online" ? "online" : "offline", pid: (proc.pid as number) || 0, memoryMB: Math.round((monit?.memory || 0) / 1024 / 1024), - uptimeSeconds: env?.pm_uptime - ? Math.round((Date.now() - (env.pm_uptime as number)) / 1000) - : 0, + uptimeSeconds: env?.pm_uptime ? Math.round((Date.now() - (env.pm_uptime as number)) / 1000) : 0, restarts: (env?.restart_time as number) || 0, - } + }; - if (proc.name === 'payload') { - payloadProcess = info - } else if (proc.name === 'queue-worker') { - queueWorkerProcess = info + if (proc.name === "payload") { + payloadProcess = info; + } else if (proc.name === "queue-worker") { + queueWorkerProcess = info; } } } catch { // PM2 not available } - return { payloadProcess, queueWorkerProcess } + return { payloadProcess, queueWorkerProcess }; } diff --git a/src/lib/monitoring/performance-tracker.ts b/src/lib/monitoring/performance-tracker.ts index ac59967..fe20955 100644 --- a/src/lib/monitoring/performance-tracker.ts +++ b/src/lib/monitoring/performance-tracker.ts @@ -7,14 +7,14 @@ * error rates, and throughput. */ -import type { PerformanceEntry, PerformanceMetrics } from './types' +import type { PerformanceEntry, PerformanceMetrics } from "./types"; const PERIOD_MS: Record = { - '1h': 3_600_000, - '6h': 21_600_000, - '24h': 86_400_000, - '7d': 604_800_000, -} + "1h": 3_600_000, + "6h": 21_600_000, + "24h": 86_400_000, + "7d": 604_800_000, +}; const EMPTY_METRICS: PerformanceMetrics = { avgResponseTimeMs: 0, @@ -22,17 +22,17 @@ const EMPTY_METRICS: PerformanceMetrics = { p99ResponseTimeMs: 0, errorRate: 0, requestsPerMinute: 0, -} +}; export class PerformanceTracker { - private readonly buffer: PerformanceEntry[] - private pointer: number = 0 - private count: number = 0 - private readonly capacity: number + private readonly buffer: PerformanceEntry[]; + private pointer: number = 0; + private count: number = 0; + private readonly capacity: number; constructor(capacity: number = 10_000) { - this.capacity = capacity - this.buffer = new Array(capacity) + this.capacity = capacity; + this.buffer = new Array(capacity); } track(method: string, path: string, statusCode: number, durationMs: number): void { @@ -42,40 +42,40 @@ export class PerformanceTracker { path, statusCode, durationMs, - } - this.pointer = (this.pointer + 1) % this.capacity + }; + this.pointer = (this.pointer + 1) % this.capacity; if (this.count < this.capacity) { - this.count++ + this.count++; } } - getMetrics(period: '1h' | '6h' | '24h' | '7d' = '1h'): PerformanceMetrics { - const cutoff = Date.now() - (PERIOD_MS[period] ?? PERIOD_MS['1h']) + getMetrics(period: "1h" | "6h" | "24h" | "7d" = "1h"): PerformanceMetrics { + const cutoff = Date.now() - (PERIOD_MS[period] ?? PERIOD_MS["1h"]); - const entries: PerformanceEntry[] = [] + const entries: PerformanceEntry[] = []; for (let i = 0; i < this.count; i++) { - const entry = this.buffer[i] + const entry = this.buffer[i]; if (entry && entry.timestamp >= cutoff) { - entries.push(entry) + entries.push(entry); } } if (entries.length === 0) { - return { ...EMPTY_METRICS } + return { ...EMPTY_METRICS }; } - const durations = entries.map((e) => e.durationMs).sort((a, b) => a - b) + const durations = entries.map((e) => e.durationMs).sort((a, b) => a - b); - const avg = durations.reduce((sum, d) => sum + d, 0) / durations.length - const p95 = percentile(durations, 0.95) - const p99 = percentile(durations, 0.99) + const avg = durations.reduce((sum, d) => sum + d, 0) / durations.length; + const p95 = percentile(durations, 0.95); + const p99 = percentile(durations, 0.99); - const errorCount = entries.filter((e) => e.statusCode >= 500).length - const errorRate = errorCount / entries.length + const errorCount = entries.filter((e) => e.statusCode >= 500).length; + const errorRate = errorCount / entries.length; - const earliestTimestamp = Math.min(...entries.map((e) => e.timestamp)) - const windowMinutes = Math.max((Date.now() - earliestTimestamp) / 60_000, 1) - const requestsPerMinute = entries.length / windowMinutes + const earliestTimestamp = Math.min(...entries.map((e) => e.timestamp)); + const windowMinutes = Math.max((Date.now() - earliestTimestamp) / 60_000, 1); + const requestsPerMinute = entries.length / windowMinutes; return { avgResponseTimeMs: Math.round(avg), @@ -83,14 +83,14 @@ export class PerformanceTracker { p99ResponseTimeMs: p99, errorRate: Math.round(errorRate * 1000) / 1000, requestsPerMinute: Math.round(requestsPerMinute * 10) / 10, - } + }; } } function percentile(sorted: number[], p: number): number { - const index = Math.floor(sorted.length * p) - return sorted[Math.min(index, sorted.length - 1)] + const index = Math.floor(sorted.length * p); + return sorted[Math.min(index, sorted.length - 1)]; } /** Singleton instance used across the application. */ -export const performanceTracker = new PerformanceTracker(10_000) +export const performanceTracker = new PerformanceTracker(10_000); diff --git a/src/lib/monitoring/snapshot-collector.ts b/src/lib/monitoring/snapshot-collector.ts index a20da75..4c721df 100644 --- a/src/lib/monitoring/snapshot-collector.ts +++ b/src/lib/monitoring/snapshot-collector.ts @@ -6,62 +6,62 @@ * sie in MonitoringSnapshots. Evaluiert dabei Alert-Regeln. */ -import { collectMetrics } from './monitoring-service' -import { AlertEvaluator } from './alert-evaluator' +import { collectMetrics } from "./monitoring-service"; +import { AlertEvaluator } from "./alert-evaluator"; -let interval: ReturnType | null = null -const alertEvaluator = new AlertEvaluator() +let interval: ReturnType | null = null; +const alertEvaluator = new AlertEvaluator(); /** Cached Payload instance — resolved once, reused on every tick. */ -let cachedPayload: any = null +let cachedPayload: any = null; async function getPayloadInstance(): Promise { - if (cachedPayload) return cachedPayload - const { getPayload } = await import('payload') - const config = (await import(/* @vite-ignore */ '@payload-config')).default - cachedPayload = await getPayload({ config }) - return cachedPayload + if (cachedPayload) return cachedPayload; + const { getPayload } = await import("payload"); + const config = (await import(/* @vite-ignore */ "@payload-config")).default; + cachedPayload = await getPayload({ config }); + return cachedPayload; } export async function startSnapshotCollector(): Promise { - const INTERVAL = parseInt(process.env.MONITORING_SNAPSHOT_INTERVAL || '60000', 10) - console.log(`[SnapshotCollector] Starting (interval: ${INTERVAL}ms)`) + const INTERVAL = parseInt(process.env.MONITORING_SNAPSHOT_INTERVAL || "60000", 10); + console.log(`[SnapshotCollector] Starting (interval: ${INTERVAL}ms)`); // Run immediately once, then on interval - await collectAndSave() + await collectAndSave(); interval = setInterval(async () => { - await collectAndSave() - }, INTERVAL) + await collectAndSave(); + }, INTERVAL); } async function collectAndSave(): Promise { try { - const payload = await getPayloadInstance() + const payload = await getPayloadInstance(); - const metrics = await collectMetrics() + const metrics = await collectMetrics(); await (payload as any).create({ - collection: 'monitoring-snapshots', + collection: "monitoring-snapshots", data: { timestamp: new Date().toISOString(), ...metrics, }, - }) + }); // Evaluate alert rules against collected metrics - await alertEvaluator.evaluateRules(payload as any, metrics) + await alertEvaluator.evaluateRules(payload as any, metrics); } catch (error) { - console.error('[SnapshotCollector] Error:', error) + console.error("[SnapshotCollector] Error:", error); // Reset cache on error so next tick re-resolves - cachedPayload = null + cachedPayload = null; } } export async function stopSnapshotCollector(): Promise { if (interval) { - clearInterval(interval) - interval = null + clearInterval(interval); + interval = null; } - console.log('[SnapshotCollector] Stopped') + console.log("[SnapshotCollector] Stopped"); } diff --git a/src/lib/monitoring/types.ts b/src/lib/monitoring/types.ts index c38f683..23b4e6f 100644 --- a/src/lib/monitoring/types.ts +++ b/src/lib/monitoring/types.ts @@ -1,192 +1,185 @@ // === System Health === export interface SystemHealth { - cpuUsagePercent: number - memoryUsedMB: number - memoryTotalMB: number - memoryUsagePercent: number - diskUsedGB: number - diskTotalGB: number - diskUsagePercent: number - loadAvg1: number - loadAvg5: number - uptime: number // seconds + cpuUsagePercent: number; + memoryUsedMB: number; + memoryTotalMB: number; + memoryUsagePercent: number; + diskUsedGB: number; + diskTotalGB: number; + diskUsagePercent: number; + loadAvg1: number; + loadAvg5: number; + uptime: number; // seconds } // === Service Statuses === -export type ServiceStatusType = 'online' | 'warning' | 'offline' +export type ServiceStatusType = "online" | "warning" | "offline"; export interface ProcessStatus { - status: ServiceStatusType - pid: number - memoryMB: number - uptimeSeconds: number - restarts: number + status: ServiceStatusType; + pid: number; + memoryMB: number; + uptimeSeconds: number; + restarts: number; } export interface PostgresqlStatus { - status: ServiceStatusType - connections: number - maxConnections: number - latencyMs: number + status: ServiceStatusType; + connections: number; + maxConnections: number; + latencyMs: number; } export interface PgBouncerStatus { - status: ServiceStatusType - activeConnections: number - waitingClients: number - poolSize: number + status: ServiceStatusType; + activeConnections: number; + waitingClients: number; + poolSize: number; } export interface RedisStatus { - status: ServiceStatusType - memoryUsedMB: number - connectedClients: number - opsPerSec: number + status: ServiceStatusType; + memoryUsedMB: number; + connectedClients: number; + opsPerSec: number; } export interface ServiceStatuses { - payload: ProcessStatus - queueWorker: ProcessStatus - postgresql: PostgresqlStatus - pgbouncer: PgBouncerStatus - redis: RedisStatus + payload: ProcessStatus; + queueWorker: ProcessStatus; + postgresql: PostgresqlStatus; + pgbouncer: PgBouncerStatus; + redis: RedisStatus; } // === External Statuses === export interface SmtpStatus { - status: ServiceStatusType - lastCheck: string // ISO date - responseTimeMs: number + status: ServiceStatusType; + lastCheck: string; // ISO date + responseTimeMs: number; } -export type OAuthStatusType = 'ok' | 'expiring_soon' | 'expired' | 'error' +export type OAuthStatusType = "ok" | "expiring_soon" | "expired" | "error"; export interface OAuthTokenStatus { - status: OAuthStatusType - tokensTotal: number - tokensExpiringSoon: number - tokensExpired: number + status: OAuthStatusType; + tokensTotal: number; + tokensExpiringSoon: number; + tokensExpired: number; } export interface CronJobStatus { - lastRun: string // ISO date - status: 'ok' | 'failed' | 'unknown' + lastRun: string; // ISO date + status: "ok" | "failed" | "unknown"; } export interface CronStatuses { - communitySync: CronJobStatus - tokenRefresh: CronJobStatus - youtubeSync: CronJobStatus + communitySync: CronJobStatus; + tokenRefresh: CronJobStatus; + youtubeSync: CronJobStatus; } export interface SecretExpiringSoon { - name: string - expiresAt: string - daysRemaining: number + name: string; + expiresAt: string; + daysRemaining: number; } export interface SecretExpired { - name: string - expiresAt: string + name: string; + expiresAt: string; } export interface SecretRotationOverdue { - name: string - rotatedAt: string - ageDays: number + name: string; + rotatedAt: string; + ageDays: number; } export interface SecretsHealthStatus { - status: 'ok' | 'warning' | 'critical' - checkedAt: string - missing: string[] - expiringSoon: SecretExpiringSoon[] - expired: SecretExpired[] - rotationOverdue: SecretRotationOverdue[] + status: "ok" | "warning" | "critical"; + checkedAt: string; + missing: string[]; + expiringSoon: SecretExpiringSoon[]; + expired: SecretExpired[]; + rotationOverdue: SecretRotationOverdue[]; } export interface SecurityMetricsStatus { - windowMs: number + windowMs: number; counters: Array<{ - eventType: string - count: number - windowStart: string - }> + eventType: string; + count: number; + windowStart: string; + }>; } export interface ExternalStatuses { - smtp: SmtpStatus - metaOAuth: OAuthTokenStatus - youtubeOAuth: OAuthTokenStatus - cronJobs: CronStatuses - secrets: SecretsHealthStatus - securityEvents: SecurityMetricsStatus + smtp: SmtpStatus; + metaOAuth: OAuthTokenStatus; + youtubeOAuth: OAuthTokenStatus; + cronJobs: CronStatuses; + secrets: SecretsHealthStatus; + securityEvents: SecurityMetricsStatus; } // === Performance === export interface PerformanceMetrics { - avgResponseTimeMs: number - p95ResponseTimeMs: number - p99ResponseTimeMs: number - errorRate: number // 0-1 - requestsPerMinute: number + avgResponseTimeMs: number; + p95ResponseTimeMs: number; + p99ResponseTimeMs: number; + errorRate: number; // 0-1 + requestsPerMinute: number; } // === Full Snapshot === export interface SystemMetrics { - timestamp: string // ISO date - system: SystemHealth - services: ServiceStatuses - external: ExternalStatuses - performance: PerformanceMetrics + timestamp: string; // ISO date + system: SystemHealth; + services: ServiceStatuses; + external: ExternalStatuses; + performance: PerformanceMetrics; } // === SSE Events (discriminated union) === export type MonitoringEvent = - | { type: 'health'; data: SystemHealth } - | { type: 'service'; data: Partial } - | { type: 'alert'; data: AlertEvent } - | { type: 'log'; data: LogEvent } - | { type: 'performance'; data: PerformanceMetrics } + | { type: "health"; data: SystemHealth } + | { type: "service"; data: Partial } + | { type: "alert"; data: AlertEvent } + | { type: "log"; data: LogEvent } + | { type: "performance"; data: PerformanceMetrics }; export interface AlertEvent { - id: string - ruleId: string - metric: string - value: number - threshold: number - severity: AlertSeverity - message: string - timestamp: string + id: string; + ruleId: string; + metric: string; + value: number; + threshold: number; + severity: AlertSeverity; + message: string; + timestamp: string; } export interface LogEvent { - id: string - level: LogLevel - source: LogSource - message: string - timestamp: string - context?: Record + id: string; + level: LogLevel; + source: LogSource; + message: string; + timestamp: string; + context?: Record; } // === Enums as union types === -export type AlertCondition = 'gt' | 'lt' | 'eq' | 'gte' | 'lte' -export type AlertSeverity = 'warning' | 'error' | 'critical' -export type LogLevel = 'debug' | 'info' | 'warn' | 'error' | 'fatal' -export type LogSource = - | 'payload' - | 'queue-worker' - | 'cron' - | 'email' - | 'oauth' - | 'sync' - | 'security' +export type AlertCondition = "gt" | "lt" | "eq" | "gte" | "lte"; +export type AlertSeverity = "warning" | "error" | "critical"; +export type LogLevel = "debug" | "info" | "warn" | "error" | "fatal"; +export type LogSource = "payload" | "queue-worker" | "cron" | "email" | "oauth" | "sync" | "security"; // === Performance Tracker Entry === export interface PerformanceEntry { - timestamp: number // Date.now() - method: string - path: string - statusCode: number - durationMs: number + timestamp: number; // Date.now() + method: string; + path: string; + statusCode: number; + durationMs: number; }