fix(ci): increase build heap size and format monitoring files

Build was OOM-ing in CI with default Node heap limit. Added
NODE_OPTIONS with 4GB heap. Also ran Prettier on monitoring files.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Martin Porwoll 2026-02-17 11:58:08 +00:00
parent 884d33c0ae
commit 037835d1de
7 changed files with 481 additions and 510 deletions

View file

@ -212,6 +212,7 @@ jobs:
- name: Build application
run: pnpm build
env:
NODE_OPTIONS: '--max-old-space-size=4096'
# Minimal env vars for build
PAYLOAD_SECRET: build-secret-placeholder
DATABASE_URI: postgresql://placeholder:placeholder@localhost:5432/placeholder

View file

@ -6,8 +6,8 @@
* cooldown periods, and multi-channel alert dispatch.
*/
import type { Payload } from 'payload'
import type { AlertCondition, AlertSeverity, SystemMetrics } from './types'
import type { Payload } from "payload";
import type { AlertCondition, AlertSeverity, SystemMetrics } from "./types";
// ============================================================================
// Pure Functions
@ -18,40 +18,36 @@ import type { AlertCondition, AlertSeverity, SystemMetrics } from './types'
* Example: getMetricValue(metrics, 'system.cpuUsagePercent') => 92
*/
export function getMetricValue(metrics: Record<string, unknown>, path: string): number | undefined {
const parts = path.split('.')
let current: unknown = metrics
const parts = path.split(".");
let current: unknown = metrics;
for (const part of parts) {
if (current === null || current === undefined || typeof current !== 'object') {
return undefined
if (current === null || current === undefined || typeof current !== "object") {
return undefined;
}
current = (current as Record<string, unknown>)[part]
current = (current as Record<string, unknown>)[part];
}
return typeof current === 'number' ? current : undefined
return typeof current === "number" ? current : undefined;
}
/**
* Evaluates a condition against a value and threshold.
*/
export function evaluateCondition(
condition: AlertCondition,
value: number,
threshold: number,
): boolean {
export function evaluateCondition(condition: AlertCondition, value: number, threshold: number): boolean {
switch (condition) {
case 'gt':
return value > threshold
case 'lt':
return value < threshold
case 'eq':
return value === threshold
case 'gte':
return value >= threshold
case 'lte':
return value <= threshold
case "gt":
return value > threshold;
case "lt":
return value < threshold;
case "eq":
return value === threshold;
case "gte":
return value >= threshold;
case "lte":
return value <= threshold;
default:
return false
return false;
}
}
@ -60,28 +56,28 @@ export function evaluateCondition(
// ============================================================================
interface AlertRule {
id: number
name: string
metric: string
condition: AlertCondition
threshold: number
severity: AlertSeverity
channels: Array<'email' | 'slack' | 'discord'>
id: number;
name: string;
metric: string;
condition: AlertCondition;
threshold: number;
severity: AlertSeverity;
channels: Array<"email" | "slack" | "discord">;
recipients?: {
emails?: Array<{ email: string }>
slackWebhook?: string
discordWebhook?: string
}
cooldownMinutes: number
enabled: boolean
emails?: Array<{ email: string }>;
slackWebhook?: string;
discordWebhook?: string;
};
cooldownMinutes: number;
enabled: boolean;
}
// Maps AlertSeverity to the AlertLevel expected by alert-service
const SEVERITY_TO_LEVEL: Record<AlertSeverity, string> = {
warning: 'warning',
error: 'error',
critical: 'critical',
}
warning: "warning",
error: "error",
critical: "critical",
};
// ============================================================================
// AlertEvaluator Class
@ -89,58 +85,52 @@ const SEVERITY_TO_LEVEL: Record<AlertSeverity, string> = {
export class AlertEvaluator {
/** Tracks last fire time per rule to enforce cooldown */
private cooldownMap: Map<string, number> = new Map()
private cooldownMap: Map<string, number> = new Map();
/**
* Returns true if the rule should fire (not in cooldown).
*/
shouldFire(ruleId: string, cooldownMinutes: number): boolean {
const lastFired = this.cooldownMap.get(ruleId)
const lastFired = this.cooldownMap.get(ruleId);
if (lastFired) {
const elapsedMinutes = (Date.now() - lastFired) / 60_000
if (elapsedMinutes < cooldownMinutes) return false
const elapsedMinutes = (Date.now() - lastFired) / 60_000;
if (elapsedMinutes < cooldownMinutes) return false;
}
return true
return true;
}
/** Record that a rule fired successfully. */
recordFired(ruleId: string): void {
this.cooldownMap.set(ruleId, Date.now())
this.cooldownMap.set(ruleId, Date.now());
}
/**
* Evaluates all enabled rules against current metrics.
* Fires alerts for rules that match and are not in cooldown.
*/
async evaluateRules(
payload: Payload,
metrics: Omit<SystemMetrics, 'timestamp'>,
): Promise<void> {
async evaluateRules(payload: Payload, metrics: Omit<SystemMetrics, "timestamp">): Promise<void> {
try {
const rules = await payload.find({
collection: 'monitoring-alert-rules',
collection: "monitoring-alert-rules",
where: { enabled: { equals: true } },
limit: 100,
})
});
for (const doc of rules.docs) {
const rule = doc as unknown as AlertRule
const value = getMetricValue(
metrics as unknown as Record<string, unknown>,
rule.metric,
)
if (value === undefined) continue
const rule = doc as unknown as AlertRule;
const value = getMetricValue(metrics as unknown as Record<string, unknown>, rule.metric);
if (value === undefined) continue;
if (evaluateCondition(rule.condition, value, rule.threshold)) {
const ruleKey = String(rule.id)
const ruleKey = String(rule.id);
if (this.shouldFire(ruleKey, rule.cooldownMinutes)) {
await this.dispatchAlert(payload, rule, value)
this.recordFired(ruleKey)
await this.dispatchAlert(payload, rule, value);
this.recordFired(ruleKey);
}
}
}
} catch (error) {
console.error('[AlertEvaluator] Error evaluating rules:', error)
console.error("[AlertEvaluator] Error evaluating rules:", error);
}
}
@ -149,11 +139,11 @@ export class AlertEvaluator {
* via the existing alert service.
*/
private async dispatchAlert(payload: Payload, rule: AlertRule, value: number): Promise<void> {
const message = `${rule.name}: ${rule.metric} = ${value} (threshold: ${rule.condition} ${rule.threshold})`
const message = `${rule.name}: ${rule.metric} = ${value} (threshold: ${rule.condition} ${rule.threshold})`;
try {
await payload.create({
collection: 'monitoring-alert-history',
collection: "monitoring-alert-history",
data: {
rule: rule.id,
metric: rule.metric,
@ -163,13 +153,13 @@ export class AlertEvaluator {
message,
channelsSent: rule.channels,
},
})
});
// Try to send via existing alert service
try {
const { sendAlert } = await import('../alerting/alert-service.js')
const { sendAlert } = await import("../alerting/alert-service.js");
await sendAlert(payload, {
level: SEVERITY_TO_LEVEL[rule.severity] as 'warning' | 'error' | 'critical',
level: SEVERITY_TO_LEVEL[rule.severity] as "warning" | "error" | "critical",
title: `[${rule.severity.toUpperCase()}] ${rule.name}`,
message,
details: {
@ -178,13 +168,13 @@ export class AlertEvaluator {
threshold: rule.threshold,
condition: rule.condition,
},
})
});
} catch {
// Alert service not available, history record is sufficient
console.warn(`[AlertEvaluator] Could not dispatch via alert-service: ${message}`)
console.warn(`[AlertEvaluator] Could not dispatch via alert-service: ${message}`);
}
} catch (error) {
console.error('[AlertEvaluator] Error dispatching alert:', error)
console.error("[AlertEvaluator] Error dispatching alert:", error);
}
}
}

View file

@ -5,7 +5,7 @@
* Falls back to console output when Payload is not yet initialized.
*/
import type { LogLevel, LogSource } from './types'
import type { LogLevel, LogSource } from "./types";
const LOG_LEVELS: Record<LogLevel, number> = {
debug: 0,
@ -13,63 +13,58 @@ const LOG_LEVELS: Record<LogLevel, number> = {
warn: 2,
error: 3,
fatal: 4,
}
};
function getMinLevel(): LogLevel {
return (process.env.MONITORING_LOG_LEVEL as LogLevel) || 'info'
return (process.env.MONITORING_LOG_LEVEL as LogLevel) || "info";
}
function shouldLog(level: LogLevel): boolean {
return LOG_LEVELS[level] >= LOG_LEVELS[getMinLevel()]
return LOG_LEVELS[level] >= LOG_LEVELS[getMinLevel()];
}
export interface LogContext {
requestId?: string
userId?: number
tenant?: number
duration?: number
[key: string]: unknown
requestId?: string;
userId?: number;
tenant?: number;
duration?: number;
[key: string]: unknown;
}
export interface MonitoringLoggerInstance {
debug(message: string, context?: LogContext): void
info(message: string, context?: LogContext): void
warn(message: string, context?: LogContext): void
error(message: string, context?: LogContext): void
fatal(message: string, context?: LogContext): void
debug(message: string, context?: LogContext): void;
info(message: string, context?: LogContext): void;
warn(message: string, context?: LogContext): void;
error(message: string, context?: LogContext): void;
fatal(message: string, context?: LogContext): void;
}
/** Cached Payload instance — resolved once, reused for all subsequent writes. */
let cachedPayload: any = null
let cachedPayload: any = null;
async function getPayloadInstance(): Promise<any> {
if (cachedPayload) return cachedPayload
const { getPayload } = await import('payload')
const config = (await import(/* @vite-ignore */ '@payload-config')).default
cachedPayload = await getPayload({ config })
return cachedPayload
if (cachedPayload) return cachedPayload;
const { getPayload } = await import("payload");
const config = (await import(/* @vite-ignore */ "@payload-config")).default;
cachedPayload = await getPayload({ config });
return cachedPayload;
}
/** Reset cached instance (used in tests). */
export function _resetPayloadCache(): void {
cachedPayload = null
cachedPayload = null;
}
async function writeLog(
source: LogSource,
level: LogLevel,
message: string,
context?: LogContext,
): Promise<void> {
if (!shouldLog(level)) return
async function writeLog(source: LogSource, level: LogLevel, message: string, context?: LogContext): Promise<void> {
if (!shouldLog(level)) return;
try {
const payload = await getPayloadInstance()
const payload = await getPayloadInstance();
const { requestId, userId, tenant, duration, ...rest } = context || {}
const { requestId, userId, tenant, duration, ...rest } = context || {};
await payload.create({
collection: 'monitoring-logs',
collection: "monitoring-logs",
data: {
level,
source,
@ -80,12 +75,12 @@ async function writeLog(
tenant,
duration,
},
})
});
} catch {
// Fallback to console if Payload is not yet initialized
cachedPayload = null
const prefix = `[${source}][${level.toUpperCase()}]`
console.log(prefix, message, context || '')
cachedPayload = null;
const prefix = `[${source}][${level.toUpperCase()}]`;
console.log(prefix, message, context || "");
}
}
@ -94,16 +89,16 @@ export function createMonitoringLogger(source: LogSource): MonitoringLoggerInsta
return function logMessage(message: string, context?: LogContext): void {
// Fire-and-forget -- don't block the caller
writeLog(source, level, message, context).catch(function onError(err) {
console.error(`[MonitoringLogger] Failed to write ${level} log:`, err)
})
}
console.error(`[MonitoringLogger] Failed to write ${level} log:`, err);
});
};
}
return {
debug: log('debug'),
info: log('info'),
warn: log('warn'),
error: log('error'),
fatal: log('fatal'),
}
debug: log("debug"),
info: log("info"),
warn: log("warn"),
error: log("error"),
fatal: log("fatal"),
};
}

View file

@ -5,8 +5,8 @@
* dependency checks. Used by the monitoring dashboard and snapshot collector.
*/
import os from 'node:os'
import { execSync } from 'node:child_process'
import os from "node:os";
import { execSync } from "node:child_process";
import type {
SystemHealth,
ProcessStatus,
@ -21,9 +21,9 @@ import type {
SecurityMetricsStatus,
PerformanceMetrics,
SystemMetrics,
} from './types'
import { checkSecretsHealth } from '../security/secrets-health'
import { getSecurityMetricsSnapshot } from '../security/security-observability'
} from "./types";
import { checkSecretsHealth } from "../security/secrets-health";
import { getSecurityMetricsSnapshot } from "../security/security-observability";
// ============================================================================
// System Health
@ -34,15 +34,15 @@ import { getSecurityMetricsSnapshot } from '../security/security-observability'
* CPU usage is calculated by sampling /proc/stat twice with 100ms delay.
*/
export async function checkSystemHealth(): Promise<SystemHealth> {
const cpuUsagePercent = await getCpuUsage()
const cpuUsagePercent = await getCpuUsage();
const memoryTotalMB = Math.round(os.totalmem() / 1024 / 1024)
const memoryUsedMB = Math.round((os.totalmem() - os.freemem()) / 1024 / 1024)
const memoryUsagePercent = roundToOneDecimal((memoryUsedMB / memoryTotalMB) * 100)
const memoryTotalMB = Math.round(os.totalmem() / 1024 / 1024);
const memoryUsedMB = Math.round((os.totalmem() - os.freemem()) / 1024 / 1024);
const memoryUsagePercent = roundToOneDecimal((memoryUsedMB / memoryTotalMB) * 100);
const { diskUsedGB, diskTotalGB, diskUsagePercent } = getDiskUsage()
const { diskUsedGB, diskTotalGB, diskUsagePercent } = getDiskUsage();
const [loadAvg1, loadAvg5] = os.loadavg()
const [loadAvg1, loadAvg5] = os.loadavg();
return {
cpuUsagePercent: roundToOneDecimal(cpuUsagePercent),
@ -55,7 +55,7 @@ export async function checkSystemHealth(): Promise<SystemHealth> {
loadAvg1: roundToTwoDecimals(loadAvg1),
loadAvg5: roundToTwoDecimals(loadAvg5),
uptime: Math.round(os.uptime()),
}
};
}
// ============================================================================
@ -64,214 +64,212 @@ export async function checkSystemHealth(): Promise<SystemHealth> {
export async function checkRedis(): Promise<RedisStatus> {
const offlineStatus: RedisStatus = {
status: 'offline',
status: "offline",
memoryUsedMB: 0,
connectedClients: 0,
opsPerSec: 0,
}
};
try {
const { getRedisClient } = await import('../redis.js')
const client = getRedisClient()
if (!client) return offlineStatus
const { getRedisClient } = await import("../redis.js");
const client = getRedisClient();
if (!client) return offlineStatus;
const info = await client.info()
const info = await client.info();
const getVal = (key: string): number => {
const match = info.match(new RegExp(`${key}:(\\d+)`))
return match ? parseInt(match[1], 10) : 0
}
const match = info.match(new RegExp(`${key}:(\\d+)`));
return match ? parseInt(match[1], 10) : 0;
};
return {
status: 'online',
memoryUsedMB: Math.round(getVal('used_memory') / 1024 / 1024),
connectedClients: getVal('connected_clients'),
opsPerSec: getVal('instantaneous_ops_per_sec'),
}
status: "online",
memoryUsedMB: Math.round(getVal("used_memory") / 1024 / 1024),
connectedClients: getVal("connected_clients"),
opsPerSec: getVal("instantaneous_ops_per_sec"),
};
} catch {
return offlineStatus
return offlineStatus;
}
}
export async function checkPostgresql(): Promise<PostgresqlStatus> {
const offlineStatus: PostgresqlStatus = {
status: 'offline',
status: "offline",
connections: 0,
maxConnections: 50,
latencyMs: -1,
}
};
try {
const { getPayload } = await import('payload')
const payload = await getPayload({ config: (await import('@payload-config')).default })
const { getPayload } = await import("payload");
const payload = await getPayload({ config: (await import("@payload-config")).default });
const start = Date.now()
await payload.find({ collection: 'users', limit: 0 })
const latencyMs = Date.now() - start
const start = Date.now();
await payload.find({ collection: "users", limit: 0 });
const latencyMs = Date.now() - start;
let connections = 0
let maxConnections = 50
let connections = 0;
let maxConnections = 50;
try {
const connResult = runPsql(
'-h 10.10.181.101 -U payload -d payload_db -t -c "SELECT count(*) FROM pg_stat_activity WHERE datname = \'payload_db\'"',
)
connections = parseInt(connResult.trim(), 10) || 0
"-h 10.10.181.101 -U payload -d payload_db -t -c \"SELECT count(*) FROM pg_stat_activity WHERE datname = 'payload_db'\"",
);
connections = parseInt(connResult.trim(), 10) || 0;
const maxResult = runPsql(
'-h 10.10.181.101 -U payload -d payload_db -t -c "SHOW max_connections"',
)
maxConnections = parseInt(maxResult.trim(), 10) || 50
const maxResult = runPsql('-h 10.10.181.101 -U payload -d payload_db -t -c "SHOW max_connections"');
maxConnections = parseInt(maxResult.trim(), 10) || 50;
} catch {
// psql unavailable -- latency check already proves connectivity
}
return {
status: latencyMs < 1000 ? 'online' : 'warning',
status: latencyMs < 1000 ? "online" : "warning",
connections,
maxConnections,
latencyMs,
}
};
} catch {
return offlineStatus
return offlineStatus;
}
}
export async function checkPgBouncer(): Promise<PgBouncerStatus> {
const offlineStatus: PgBouncerStatus = {
status: 'offline',
status: "offline",
activeConnections: 0,
waitingClients: 0,
poolSize: 0,
}
};
try {
const output = runPsql('-h 127.0.0.1 -p 6432 -U payload -d pgbouncer -t -c "SHOW POOLS"')
const output = runPsql('-h 127.0.0.1 -p 6432 -U payload -d pgbouncer -t -c "SHOW POOLS"');
// SHOW POOLS columns: database | user | cl_active | cl_waiting | sv_active | sv_idle | pool_size | ...
const lines = output
.trim()
.split('\n')
.filter((l) => l.includes('payload'))
.split("\n")
.filter((l) => l.includes("payload"));
let activeConnections = 0
let waitingClients = 0
let poolSize = 20
let activeConnections = 0;
let waitingClients = 0;
let poolSize = 20;
for (const line of lines) {
const parts = line.split('|').map((s) => s.trim())
activeConnections += parseInt(parts[2], 10) || 0
waitingClients += parseInt(parts[3], 10) || 0
poolSize = parseInt(parts[6], 10) || 20
const parts = line.split("|").map((s) => s.trim());
activeConnections += parseInt(parts[2], 10) || 0;
waitingClients += parseInt(parts[3], 10) || 0;
poolSize = parseInt(parts[6], 10) || 20;
}
return { status: 'online', activeConnections, waitingClients, poolSize }
return { status: "online", activeConnections, waitingClients, poolSize };
} catch {
return offlineStatus
return offlineStatus;
}
}
export interface QueueCounts {
waiting: number
active: number
completed: number
failed: number
waiting: number;
active: number;
completed: number;
failed: number;
}
export async function checkQueues(): Promise<Record<string, QueueCounts>> {
try {
const { Queue } = await import('bullmq')
const { getQueueRedisConnection } = await import('../queue/queue-service.js')
const { Queue } = await import("bullmq");
const { getQueueRedisConnection } = await import("../queue/queue-service.js");
const connection = getQueueRedisConnection()
const connection = getQueueRedisConnection();
// Queue names matching QUEUE_NAMES in queue-service.ts
const queueNames = ['email', 'pdf', 'cleanup', 'youtube-upload']
const results: Record<string, QueueCounts> = {}
const queueNames = ["email", "pdf", "cleanup", "youtube-upload"];
const results: Record<string, QueueCounts> = {};
for (const name of queueNames) {
try {
const queue = new Queue(name, { connection })
const counts = await queue.getJobCounts()
const queue = new Queue(name, { connection });
const counts = await queue.getJobCounts();
results[name] = {
waiting: counts.waiting || 0,
active: counts.active || 0,
completed: counts.completed || 0,
failed: counts.failed || 0,
}
await queue.close()
};
await queue.close();
} catch {
results[name] = { waiting: 0, active: 0, completed: 0, failed: 0 }
results[name] = { waiting: 0, active: 0, completed: 0, failed: 0 };
}
}
return results
return results;
} catch {
return {}
return {};
}
}
export async function checkSmtp(): Promise<SmtpStatus> {
const now = new Date().toISOString()
const now = new Date().toISOString();
try {
const nodemailer = await import('nodemailer')
const nodemailer = await import("nodemailer");
const transporter = nodemailer.createTransport({
host: process.env.SMTP_HOST,
port: parseInt(process.env.SMTP_PORT || '587', 10),
secure: process.env.SMTP_SECURE === 'true',
port: parseInt(process.env.SMTP_PORT || "587", 10),
secure: process.env.SMTP_SECURE === "true",
auth: {
user: process.env.SMTP_USER,
pass: process.env.SMTP_PASS,
},
})
});
const start = Date.now()
await transporter.verify()
const responseTimeMs = Date.now() - start
const start = Date.now();
await transporter.verify();
const responseTimeMs = Date.now() - start;
return { status: 'online', lastCheck: now, responseTimeMs }
return { status: "online", lastCheck: now, responseTimeMs };
} catch {
return { status: 'offline', lastCheck: now, responseTimeMs: -1 }
return { status: "offline", lastCheck: now, responseTimeMs: -1 };
}
}
export async function checkOAuthTokens(): Promise<{
metaOAuth: OAuthTokenStatus
youtubeOAuth: OAuthTokenStatus
metaOAuth: OAuthTokenStatus;
youtubeOAuth: OAuthTokenStatus;
}> {
const errorStatus: OAuthTokenStatus = {
status: 'error',
status: "error",
tokensTotal: 0,
tokensExpiringSoon: 0,
tokensExpired: 0,
}
};
try {
const { getPayload } = await import('payload')
const payload = await getPayload({ config: (await import('@payload-config')).default })
const { getPayload } = await import("payload");
const payload = await getPayload({ config: (await import("@payload-config")).default });
const accounts = await payload.find({
collection: 'social-accounts',
collection: "social-accounts",
limit: 100,
where: { status: { equals: 'connected' } },
})
where: { status: { equals: "connected" } },
});
const sevenDaysFromNow = new Date()
sevenDaysFromNow.setDate(sevenDaysFromNow.getDate() + 7)
const now = new Date()
const sevenDaysFromNow = new Date();
sevenDaysFromNow.setDate(sevenDaysFromNow.getDate() + 7);
const now = new Date();
const meta = { tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0 }
const youtube = { tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0 }
const meta = { tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0 };
const youtube = { tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0 };
for (const account of accounts.docs) {
const doc = account as unknown as Record<string, unknown>
const target = doc.platform === 'youtube' ? youtube : meta
target.tokensTotal++
const doc = account as unknown as Record<string, unknown>;
const target = doc.platform === "youtube" ? youtube : meta;
target.tokensTotal++;
const expiresAt = doc.tokenExpiresAt ? new Date(doc.tokenExpiresAt as string) : null
const expiresAt = doc.tokenExpiresAt ? new Date(doc.tokenExpiresAt as string) : null;
if (expiresAt) {
if (expiresAt < now) {
target.tokensExpired++
target.tokensExpired++;
} else if (expiresAt < sevenDaysFromNow) {
target.tokensExpiringSoon++
target.tokensExpiringSoon++;
}
}
}
@ -279,58 +277,55 @@ export async function checkOAuthTokens(): Promise<{
return {
metaOAuth: { status: getOAuthStatus(meta), ...meta },
youtubeOAuth: { status: getOAuthStatus(youtube), ...youtube },
}
};
} catch {
return { metaOAuth: errorStatus, youtubeOAuth: errorStatus }
return { metaOAuth: errorStatus, youtubeOAuth: errorStatus };
}
}
export async function checkCronJobs(): Promise<CronStatuses> {
const unknownStatus: CronJobStatus = { lastRun: '', status: 'unknown' }
const unknownStatus: CronJobStatus = { lastRun: "", status: "unknown" };
try {
const { getPayload } = await import('payload')
const payload = await getPayload({ config: (await import('@payload-config')).default })
const { getPayload } = await import("payload");
const payload = await getPayload({ config: (await import("@payload-config")).default });
async function checkCron(source: string): Promise<CronJobStatus> {
try {
const logs = await payload.find({
collection: 'monitoring-logs',
collection: "monitoring-logs",
limit: 1,
sort: '-createdAt',
sort: "-createdAt",
where: {
and: [
{ source: { equals: 'cron' } },
{ message: { contains: source } },
],
and: [{ source: { equals: "cron" } }, { message: { contains: source } }],
},
})
});
if (logs.docs.length === 0) return unknownStatus
if (logs.docs.length === 0) return unknownStatus;
const doc = logs.docs[0] as unknown as Record<string, unknown>
const doc = logs.docs[0] as unknown as Record<string, unknown>;
return {
lastRun: doc.createdAt as string,
status: doc.level === 'error' ? 'failed' : 'ok',
}
status: doc.level === "error" ? "failed" : "ok",
};
} catch {
return unknownStatus
return unknownStatus;
}
}
const [communitySync, tokenRefresh, youtubeSync] = await Promise.all([
checkCron('community-sync'),
checkCron('token-refresh'),
checkCron('youtube'),
])
checkCron("community-sync"),
checkCron("token-refresh"),
checkCron("youtube"),
]);
return { communitySync, tokenRefresh, youtubeSync }
return { communitySync, tokenRefresh, youtubeSync };
} catch {
return {
communitySync: unknownStatus,
tokenRefresh: unknownStatus,
youtubeSync: unknownStatus,
}
};
}
}
@ -342,8 +337,9 @@ export async function checkCronJobs(): Promise<CronStatuses> {
* Collects all monitoring metrics in parallel. Individual check failures
* are isolated and return safe defaults instead of failing the whole collection.
*/
export async function collectMetrics(): Promise<Omit<SystemMetrics, 'timestamp'>> {
const [system, redis, postgresql, pgbouncer, smtp, oauth, cronJobs, secrets, securityEvents] = await Promise.allSettled([
export async function collectMetrics(): Promise<Omit<SystemMetrics, "timestamp">> {
const [system, redis, postgresql, pgbouncer, smtp, oauth, cronJobs, secrets, securityEvents] =
await Promise.allSettled([
checkSystemHealth(),
checkRedis(),
checkPostgresql(),
@ -353,7 +349,7 @@ export async function collectMetrics(): Promise<Omit<SystemMetrics, 'timestamp'>
checkCronJobs(),
Promise.resolve(checkSecretsHealth()),
Promise.resolve(getSecurityMetricsSnapshot()),
])
]);
// Load performance tracker lazily to avoid circular dependencies
let performance: PerformanceMetrics = {
@ -362,51 +358,51 @@ export async function collectMetrics(): Promise<Omit<SystemMetrics, 'timestamp'>
p99ResponseTimeMs: 0,
errorRate: 0,
requestsPerMinute: 0,
}
};
try {
// Dynamic path constructed at runtime to avoid Vite static analysis
// when performance-tracker module has not been created yet
const trackerPath = './performance-tracker'
const mod = await import(/* @vite-ignore */ trackerPath)
performance = mod.performanceTracker.getMetrics('1h')
const trackerPath = "./performance-tracker";
const mod = await import(/* @vite-ignore */ trackerPath);
performance = mod.performanceTracker.getMetrics("1h");
} catch {
// Performance tracker not yet initialized
}
const defaultProcess: ProcessStatus = {
status: 'offline',
status: "offline",
pid: 0,
memoryMB: 0,
uptimeSeconds: 0,
restarts: 0,
}
};
const { payloadProcess, queueWorkerProcess } = getPm2Processes(defaultProcess)
const { payloadProcess, queueWorkerProcess } = getPm2Processes(defaultProcess);
const oauthDefaults = {
metaOAuth: { status: 'error' as const, tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0 },
youtubeOAuth: { status: 'error' as const, tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0 },
}
metaOAuth: { status: "error" as const, tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0 },
youtubeOAuth: { status: "error" as const, tokensTotal: 0, tokensExpiringSoon: 0, tokensExpired: 0 },
};
const cronDefaults: CronStatuses = {
communitySync: { lastRun: '', status: 'unknown' },
tokenRefresh: { lastRun: '', status: 'unknown' },
youtubeSync: { lastRun: '', status: 'unknown' },
}
communitySync: { lastRun: "", status: "unknown" },
tokenRefresh: { lastRun: "", status: "unknown" },
youtubeSync: { lastRun: "", status: "unknown" },
};
const secretsDefaults: SecretsHealthStatus = {
status: 'critical',
status: "critical",
checkedAt: new Date().toISOString(),
missing: [],
expiringSoon: [],
expired: [],
rotationOverdue: [],
}
};
const securityEventsDefaults: SecurityMetricsStatus = {
windowMs: 300000,
counters: [],
}
};
const systemDefaults: SystemHealth = {
cpuUsagePercent: 0,
@ -419,21 +415,21 @@ export async function collectMetrics(): Promise<Omit<SystemMetrics, 'timestamp'>
loadAvg1: 0,
loadAvg5: 0,
uptime: 0,
}
};
const oauthResult = settled(oauth, oauthDefaults)
const oauthResult = settled(oauth, oauthDefaults);
return {
system: settled(system, systemDefaults),
services: {
payload: payloadProcess,
queueWorker: queueWorkerProcess,
postgresql: settled(postgresql, { status: 'offline', connections: 0, maxConnections: 50, latencyMs: -1 }),
pgbouncer: settled(pgbouncer, { status: 'offline', activeConnections: 0, waitingClients: 0, poolSize: 0 }),
redis: settled(redis, { status: 'offline', memoryUsedMB: 0, connectedClients: 0, opsPerSec: 0 }),
postgresql: settled(postgresql, { status: "offline", connections: 0, maxConnections: 50, latencyMs: -1 }),
pgbouncer: settled(pgbouncer, { status: "offline", activeConnections: 0, waitingClients: 0, poolSize: 0 }),
redis: settled(redis, { status: "offline", memoryUsedMB: 0, connectedClients: 0, opsPerSec: 0 }),
},
external: {
smtp: settled(smtp, { status: 'offline', lastCheck: new Date().toISOString(), responseTimeMs: -1 }),
smtp: settled(smtp, { status: "offline", lastCheck: new Date().toISOString(), responseTimeMs: -1 }),
metaOAuth: oauthResult.metaOAuth,
youtubeOAuth: oauthResult.youtubeOAuth,
cronJobs: settled(cronJobs, cronDefaults),
@ -441,7 +437,7 @@ export async function collectMetrics(): Promise<Omit<SystemMetrics, 'timestamp'>
securityEvents: settled(securityEvents, securityEventsDefaults),
},
performance,
}
};
}
// ============================================================================
@ -454,18 +450,18 @@ export async function collectMetrics(): Promise<Omit<SystemMetrics, 'timestamp'>
*/
function runPsql(args: string): string {
return execSync(`psql ${args}`, {
encoding: 'utf-8',
encoding: "utf-8",
timeout: 5000,
env: { ...process.env, PGPASSWORD: process.env.DB_PASSWORD || '' },
})
env: { ...process.env, PGPASSWORD: process.env.DB_PASSWORD || "" },
});
}
function roundToOneDecimal(value: number): number {
return Math.round(value * 10) / 10
return Math.round(value * 10) / 10;
}
function roundToTwoDecimals(value: number): number {
return Math.round(value * 100) / 100
return Math.round(value * 100) / 100;
}
/**
@ -473,99 +469,95 @@ function roundToTwoDecimals(value: number): number {
* the fallback when the promise was rejected.
*/
function settled<T>(result: PromiseSettledResult<T>, fallback: T): T {
return result.status === 'fulfilled' ? result.value : fallback
return result.status === "fulfilled" ? result.value : fallback;
}
async function getCpuUsage(): Promise<number> {
try {
const fs = await import('node:fs/promises')
const stat1 = await fs.readFile('/proc/stat', 'utf-8')
await new Promise((resolve) => setTimeout(resolve, 100))
const stat2 = await fs.readFile('/proc/stat', 'utf-8')
const fs = await import("node:fs/promises");
const stat1 = await fs.readFile("/proc/stat", "utf-8");
await new Promise((resolve) => setTimeout(resolve, 100));
const stat2 = await fs.readFile("/proc/stat", "utf-8");
const parse = (data: string): { idle: number; total: number } => {
const line = data.split('\n')[0] // first line: cpu user nice system idle ...
const parts = line.split(/\s+/).slice(1).map(Number)
const idle = parts[3] + (parts[4] || 0) // idle + iowait
const total = parts.reduce((a, b) => a + b, 0)
return { idle, total }
}
const line = data.split("\n")[0]; // first line: cpu user nice system idle ...
const parts = line.split(/\s+/).slice(1).map(Number);
const idle = parts[3] + (parts[4] || 0); // idle + iowait
const total = parts.reduce((a, b) => a + b, 0);
return { idle, total };
};
const s1 = parse(stat1)
const s2 = parse(stat2)
const idleDiff = s2.idle - s1.idle
const totalDiff = s2.total - s1.total
const s1 = parse(stat1);
const s2 = parse(stat2);
const idleDiff = s2.idle - s1.idle;
const totalDiff = s2.total - s1.total;
if (totalDiff === 0) return 0
return ((totalDiff - idleDiff) / totalDiff) * 100
if (totalDiff === 0) return 0;
return ((totalDiff - idleDiff) / totalDiff) * 100;
} catch {
// Fallback if /proc/stat is unavailable
const cpuCount = os.cpus().length
return (os.loadavg()[0] / cpuCount) * 100
const cpuCount = os.cpus().length;
return (os.loadavg()[0] / cpuCount) * 100;
}
}
function getDiskUsage(): { diskUsedGB: number; diskTotalGB: number; diskUsagePercent: number } {
try {
const output = execSync('df -B1 / | tail -1', { encoding: 'utf-8' })
const parts = output.trim().split(/\s+/)
const output = execSync("df -B1 / | tail -1", { encoding: "utf-8" });
const parts = output.trim().split(/\s+/);
// Format: filesystem 1B-blocks used available use% mountpoint
const total = parseInt(parts[1], 10)
const used = parseInt(parts[2], 10)
const total = parseInt(parts[1], 10);
const used = parseInt(parts[2], 10);
return {
diskTotalGB: roundToOneDecimal(total / 1024 / 1024 / 1024),
diskUsedGB: roundToOneDecimal(used / 1024 / 1024 / 1024),
diskUsagePercent: roundToOneDecimal((used / total) * 100),
}
};
} catch {
return { diskUsedGB: 0, diskTotalGB: 0, diskUsagePercent: 0 }
return { diskUsedGB: 0, diskTotalGB: 0, diskUsagePercent: 0 };
}
}
function getOAuthStatus(
counts: { tokensExpired: number; tokensExpiringSoon: number },
): OAuthTokenStatus['status'] {
if (counts.tokensExpired > 0) return 'expired'
if (counts.tokensExpiringSoon > 0) return 'expiring_soon'
return 'ok'
function getOAuthStatus(counts: { tokensExpired: number; tokensExpiringSoon: number }): OAuthTokenStatus["status"] {
if (counts.tokensExpired > 0) return "expired";
if (counts.tokensExpiringSoon > 0) return "expiring_soon";
return "ok";
}
interface Pm2Processes {
payloadProcess: ProcessStatus
queueWorkerProcess: ProcessStatus
payloadProcess: ProcessStatus;
queueWorkerProcess: ProcessStatus;
}
function getPm2Processes(defaultProcess: ProcessStatus): Pm2Processes {
let payloadProcess = defaultProcess
let queueWorkerProcess = defaultProcess
let payloadProcess = defaultProcess;
let queueWorkerProcess = defaultProcess;
try {
const pm2Out = execSync('pm2 jlist', { encoding: 'utf-8', timeout: 5000 })
const pm2List = JSON.parse(pm2Out) as Array<Record<string, unknown>>
const pm2Out = execSync("pm2 jlist", { encoding: "utf-8", timeout: 5000 });
const pm2List = JSON.parse(pm2Out) as Array<Record<string, unknown>>;
for (const proc of pm2List) {
const env = proc.pm2_env as Record<string, unknown> | undefined
const monit = proc.monit as Record<string, number> | undefined
const env = proc.pm2_env as Record<string, unknown> | undefined;
const monit = proc.monit as Record<string, number> | undefined;
const info: ProcessStatus = {
status: env?.status === 'online' ? 'online' : 'offline',
status: env?.status === "online" ? "online" : "offline",
pid: (proc.pid as number) || 0,
memoryMB: Math.round((monit?.memory || 0) / 1024 / 1024),
uptimeSeconds: env?.pm_uptime
? Math.round((Date.now() - (env.pm_uptime as number)) / 1000)
: 0,
uptimeSeconds: env?.pm_uptime ? Math.round((Date.now() - (env.pm_uptime as number)) / 1000) : 0,
restarts: (env?.restart_time as number) || 0,
}
};
if (proc.name === 'payload') {
payloadProcess = info
} else if (proc.name === 'queue-worker') {
queueWorkerProcess = info
if (proc.name === "payload") {
payloadProcess = info;
} else if (proc.name === "queue-worker") {
queueWorkerProcess = info;
}
}
} catch {
// PM2 not available
}
return { payloadProcess, queueWorkerProcess }
return { payloadProcess, queueWorkerProcess };
}

View file

@ -7,14 +7,14 @@
* error rates, and throughput.
*/
import type { PerformanceEntry, PerformanceMetrics } from './types'
import type { PerformanceEntry, PerformanceMetrics } from "./types";
const PERIOD_MS: Record<string, number> = {
'1h': 3_600_000,
'6h': 21_600_000,
'24h': 86_400_000,
'7d': 604_800_000,
}
"1h": 3_600_000,
"6h": 21_600_000,
"24h": 86_400_000,
"7d": 604_800_000,
};
const EMPTY_METRICS: PerformanceMetrics = {
avgResponseTimeMs: 0,
@ -22,17 +22,17 @@ const EMPTY_METRICS: PerformanceMetrics = {
p99ResponseTimeMs: 0,
errorRate: 0,
requestsPerMinute: 0,
}
};
export class PerformanceTracker {
private readonly buffer: PerformanceEntry[]
private pointer: number = 0
private count: number = 0
private readonly capacity: number
private readonly buffer: PerformanceEntry[];
private pointer: number = 0;
private count: number = 0;
private readonly capacity: number;
constructor(capacity: number = 10_000) {
this.capacity = capacity
this.buffer = new Array(capacity)
this.capacity = capacity;
this.buffer = new Array(capacity);
}
track(method: string, path: string, statusCode: number, durationMs: number): void {
@ -42,40 +42,40 @@ export class PerformanceTracker {
path,
statusCode,
durationMs,
}
this.pointer = (this.pointer + 1) % this.capacity
};
this.pointer = (this.pointer + 1) % this.capacity;
if (this.count < this.capacity) {
this.count++
this.count++;
}
}
getMetrics(period: '1h' | '6h' | '24h' | '7d' = '1h'): PerformanceMetrics {
const cutoff = Date.now() - (PERIOD_MS[period] ?? PERIOD_MS['1h'])
getMetrics(period: "1h" | "6h" | "24h" | "7d" = "1h"): PerformanceMetrics {
const cutoff = Date.now() - (PERIOD_MS[period] ?? PERIOD_MS["1h"]);
const entries: PerformanceEntry[] = []
const entries: PerformanceEntry[] = [];
for (let i = 0; i < this.count; i++) {
const entry = this.buffer[i]
const entry = this.buffer[i];
if (entry && entry.timestamp >= cutoff) {
entries.push(entry)
entries.push(entry);
}
}
if (entries.length === 0) {
return { ...EMPTY_METRICS }
return { ...EMPTY_METRICS };
}
const durations = entries.map((e) => e.durationMs).sort((a, b) => a - b)
const durations = entries.map((e) => e.durationMs).sort((a, b) => a - b);
const avg = durations.reduce((sum, d) => sum + d, 0) / durations.length
const p95 = percentile(durations, 0.95)
const p99 = percentile(durations, 0.99)
const avg = durations.reduce((sum, d) => sum + d, 0) / durations.length;
const p95 = percentile(durations, 0.95);
const p99 = percentile(durations, 0.99);
const errorCount = entries.filter((e) => e.statusCode >= 500).length
const errorRate = errorCount / entries.length
const errorCount = entries.filter((e) => e.statusCode >= 500).length;
const errorRate = errorCount / entries.length;
const earliestTimestamp = Math.min(...entries.map((e) => e.timestamp))
const windowMinutes = Math.max((Date.now() - earliestTimestamp) / 60_000, 1)
const requestsPerMinute = entries.length / windowMinutes
const earliestTimestamp = Math.min(...entries.map((e) => e.timestamp));
const windowMinutes = Math.max((Date.now() - earliestTimestamp) / 60_000, 1);
const requestsPerMinute = entries.length / windowMinutes;
return {
avgResponseTimeMs: Math.round(avg),
@ -83,14 +83,14 @@ export class PerformanceTracker {
p99ResponseTimeMs: p99,
errorRate: Math.round(errorRate * 1000) / 1000,
requestsPerMinute: Math.round(requestsPerMinute * 10) / 10,
}
};
}
}
function percentile(sorted: number[], p: number): number {
const index = Math.floor(sorted.length * p)
return sorted[Math.min(index, sorted.length - 1)]
const index = Math.floor(sorted.length * p);
return sorted[Math.min(index, sorted.length - 1)];
}
/** Singleton instance used across the application. */
export const performanceTracker = new PerformanceTracker(10_000)
export const performanceTracker = new PerformanceTracker(10_000);

View file

@ -6,62 +6,62 @@
* sie in MonitoringSnapshots. Evaluiert dabei Alert-Regeln.
*/
import { collectMetrics } from './monitoring-service'
import { AlertEvaluator } from './alert-evaluator'
import { collectMetrics } from "./monitoring-service";
import { AlertEvaluator } from "./alert-evaluator";
let interval: ReturnType<typeof setInterval> | null = null
const alertEvaluator = new AlertEvaluator()
let interval: ReturnType<typeof setInterval> | null = null;
const alertEvaluator = new AlertEvaluator();
/** Cached Payload instance — resolved once, reused on every tick. */
let cachedPayload: any = null
let cachedPayload: any = null;
async function getPayloadInstance(): Promise<any> {
if (cachedPayload) return cachedPayload
const { getPayload } = await import('payload')
const config = (await import(/* @vite-ignore */ '@payload-config')).default
cachedPayload = await getPayload({ config })
return cachedPayload
if (cachedPayload) return cachedPayload;
const { getPayload } = await import("payload");
const config = (await import(/* @vite-ignore */ "@payload-config")).default;
cachedPayload = await getPayload({ config });
return cachedPayload;
}
export async function startSnapshotCollector(): Promise<void> {
const INTERVAL = parseInt(process.env.MONITORING_SNAPSHOT_INTERVAL || '60000', 10)
console.log(`[SnapshotCollector] Starting (interval: ${INTERVAL}ms)`)
const INTERVAL = parseInt(process.env.MONITORING_SNAPSHOT_INTERVAL || "60000", 10);
console.log(`[SnapshotCollector] Starting (interval: ${INTERVAL}ms)`);
// Run immediately once, then on interval
await collectAndSave()
await collectAndSave();
interval = setInterval(async () => {
await collectAndSave()
}, INTERVAL)
await collectAndSave();
}, INTERVAL);
}
async function collectAndSave(): Promise<void> {
try {
const payload = await getPayloadInstance()
const payload = await getPayloadInstance();
const metrics = await collectMetrics()
const metrics = await collectMetrics();
await (payload as any).create({
collection: 'monitoring-snapshots',
collection: "monitoring-snapshots",
data: {
timestamp: new Date().toISOString(),
...metrics,
},
})
});
// Evaluate alert rules against collected metrics
await alertEvaluator.evaluateRules(payload as any, metrics)
await alertEvaluator.evaluateRules(payload as any, metrics);
} catch (error) {
console.error('[SnapshotCollector] Error:', error)
console.error("[SnapshotCollector] Error:", error);
// Reset cache on error so next tick re-resolves
cachedPayload = null
cachedPayload = null;
}
}
export async function stopSnapshotCollector(): Promise<void> {
if (interval) {
clearInterval(interval)
interval = null
clearInterval(interval);
interval = null;
}
console.log('[SnapshotCollector] Stopped')
console.log("[SnapshotCollector] Stopped");
}

View file

@ -1,192 +1,185 @@
// === System Health ===
export interface SystemHealth {
cpuUsagePercent: number
memoryUsedMB: number
memoryTotalMB: number
memoryUsagePercent: number
diskUsedGB: number
diskTotalGB: number
diskUsagePercent: number
loadAvg1: number
loadAvg5: number
uptime: number // seconds
cpuUsagePercent: number;
memoryUsedMB: number;
memoryTotalMB: number;
memoryUsagePercent: number;
diskUsedGB: number;
diskTotalGB: number;
diskUsagePercent: number;
loadAvg1: number;
loadAvg5: number;
uptime: number; // seconds
}
// === Service Statuses ===
export type ServiceStatusType = 'online' | 'warning' | 'offline'
export type ServiceStatusType = "online" | "warning" | "offline";
export interface ProcessStatus {
status: ServiceStatusType
pid: number
memoryMB: number
uptimeSeconds: number
restarts: number
status: ServiceStatusType;
pid: number;
memoryMB: number;
uptimeSeconds: number;
restarts: number;
}
export interface PostgresqlStatus {
status: ServiceStatusType
connections: number
maxConnections: number
latencyMs: number
status: ServiceStatusType;
connections: number;
maxConnections: number;
latencyMs: number;
}
export interface PgBouncerStatus {
status: ServiceStatusType
activeConnections: number
waitingClients: number
poolSize: number
status: ServiceStatusType;
activeConnections: number;
waitingClients: number;
poolSize: number;
}
export interface RedisStatus {
status: ServiceStatusType
memoryUsedMB: number
connectedClients: number
opsPerSec: number
status: ServiceStatusType;
memoryUsedMB: number;
connectedClients: number;
opsPerSec: number;
}
export interface ServiceStatuses {
payload: ProcessStatus
queueWorker: ProcessStatus
postgresql: PostgresqlStatus
pgbouncer: PgBouncerStatus
redis: RedisStatus
payload: ProcessStatus;
queueWorker: ProcessStatus;
postgresql: PostgresqlStatus;
pgbouncer: PgBouncerStatus;
redis: RedisStatus;
}
// === External Statuses ===
export interface SmtpStatus {
status: ServiceStatusType
lastCheck: string // ISO date
responseTimeMs: number
status: ServiceStatusType;
lastCheck: string; // ISO date
responseTimeMs: number;
}
export type OAuthStatusType = 'ok' | 'expiring_soon' | 'expired' | 'error'
export type OAuthStatusType = "ok" | "expiring_soon" | "expired" | "error";
export interface OAuthTokenStatus {
status: OAuthStatusType
tokensTotal: number
tokensExpiringSoon: number
tokensExpired: number
status: OAuthStatusType;
tokensTotal: number;
tokensExpiringSoon: number;
tokensExpired: number;
}
export interface CronJobStatus {
lastRun: string // ISO date
status: 'ok' | 'failed' | 'unknown'
lastRun: string; // ISO date
status: "ok" | "failed" | "unknown";
}
export interface CronStatuses {
communitySync: CronJobStatus
tokenRefresh: CronJobStatus
youtubeSync: CronJobStatus
communitySync: CronJobStatus;
tokenRefresh: CronJobStatus;
youtubeSync: CronJobStatus;
}
export interface SecretExpiringSoon {
name: string
expiresAt: string
daysRemaining: number
name: string;
expiresAt: string;
daysRemaining: number;
}
export interface SecretExpired {
name: string
expiresAt: string
name: string;
expiresAt: string;
}
export interface SecretRotationOverdue {
name: string
rotatedAt: string
ageDays: number
name: string;
rotatedAt: string;
ageDays: number;
}
export interface SecretsHealthStatus {
status: 'ok' | 'warning' | 'critical'
checkedAt: string
missing: string[]
expiringSoon: SecretExpiringSoon[]
expired: SecretExpired[]
rotationOverdue: SecretRotationOverdue[]
status: "ok" | "warning" | "critical";
checkedAt: string;
missing: string[];
expiringSoon: SecretExpiringSoon[];
expired: SecretExpired[];
rotationOverdue: SecretRotationOverdue[];
}
export interface SecurityMetricsStatus {
windowMs: number
windowMs: number;
counters: Array<{
eventType: string
count: number
windowStart: string
}>
eventType: string;
count: number;
windowStart: string;
}>;
}
export interface ExternalStatuses {
smtp: SmtpStatus
metaOAuth: OAuthTokenStatus
youtubeOAuth: OAuthTokenStatus
cronJobs: CronStatuses
secrets: SecretsHealthStatus
securityEvents: SecurityMetricsStatus
smtp: SmtpStatus;
metaOAuth: OAuthTokenStatus;
youtubeOAuth: OAuthTokenStatus;
cronJobs: CronStatuses;
secrets: SecretsHealthStatus;
securityEvents: SecurityMetricsStatus;
}
// === Performance ===
export interface PerformanceMetrics {
avgResponseTimeMs: number
p95ResponseTimeMs: number
p99ResponseTimeMs: number
errorRate: number // 0-1
requestsPerMinute: number
avgResponseTimeMs: number;
p95ResponseTimeMs: number;
p99ResponseTimeMs: number;
errorRate: number; // 0-1
requestsPerMinute: number;
}
// === Full Snapshot ===
export interface SystemMetrics {
timestamp: string // ISO date
system: SystemHealth
services: ServiceStatuses
external: ExternalStatuses
performance: PerformanceMetrics
timestamp: string; // ISO date
system: SystemHealth;
services: ServiceStatuses;
external: ExternalStatuses;
performance: PerformanceMetrics;
}
// === SSE Events (discriminated union) ===
export type MonitoringEvent =
| { type: 'health'; data: SystemHealth }
| { type: 'service'; data: Partial<ServiceStatuses> }
| { type: 'alert'; data: AlertEvent }
| { type: 'log'; data: LogEvent }
| { type: 'performance'; data: PerformanceMetrics }
| { type: "health"; data: SystemHealth }
| { type: "service"; data: Partial<ServiceStatuses> }
| { type: "alert"; data: AlertEvent }
| { type: "log"; data: LogEvent }
| { type: "performance"; data: PerformanceMetrics };
export interface AlertEvent {
id: string
ruleId: string
metric: string
value: number
threshold: number
severity: AlertSeverity
message: string
timestamp: string
id: string;
ruleId: string;
metric: string;
value: number;
threshold: number;
severity: AlertSeverity;
message: string;
timestamp: string;
}
export interface LogEvent {
id: string
level: LogLevel
source: LogSource
message: string
timestamp: string
context?: Record<string, unknown>
id: string;
level: LogLevel;
source: LogSource;
message: string;
timestamp: string;
context?: Record<string, unknown>;
}
// === Enums as union types ===
export type AlertCondition = 'gt' | 'lt' | 'eq' | 'gte' | 'lte'
export type AlertSeverity = 'warning' | 'error' | 'critical'
export type LogLevel = 'debug' | 'info' | 'warn' | 'error' | 'fatal'
export type LogSource =
| 'payload'
| 'queue-worker'
| 'cron'
| 'email'
| 'oauth'
| 'sync'
| 'security'
export type AlertCondition = "gt" | "lt" | "eq" | "gte" | "lte";
export type AlertSeverity = "warning" | "error" | "critical";
export type LogLevel = "debug" | "info" | "warn" | "error" | "fatal";
export type LogSource = "payload" | "queue-worker" | "cron" | "email" | "oauth" | "sync" | "security";
// === Performance Tracker Entry ===
export interface PerformanceEntry {
timestamp: number // Date.now()
method: string
path: string
statusCode: number
durationMs: number
timestamp: number; // Date.now()
method: string;
path: string;
statusCode: number;
durationMs: number;
}