From d61d1e2f98a715d521a753ddf719d9ca4740aa29 Mon Sep 17 00:00:00 2001 From: Codex Date: Sun, 28 Jun 2026 00:39:48 +0000 Subject: [PATCH] fix: keep sentinel health serving during backlog alerts --- scripts/src/hwlab-node-web-sentinel-service.ts | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/src/hwlab-node-web-sentinel-service.ts b/scripts/src/hwlab-node-web-sentinel-service.ts index c2ab84fb..386a7e92 100644 --- a/scripts/src/hwlab-node-web-sentinel-service.ts +++ b/scripts/src/hwlab-node-web-sentinel-service.ts @@ -282,7 +282,10 @@ export function startWebProbeSentinelHttpService(service: WebProbeSentinelServic async function sentinelFetch(service: WebProbeSentinelService, request: Request): Promise { const url = new URL(request.url); const pathname = normalizedSentinelRequestPath(service, url.pathname); - if (request.method === "GET" && pathname === "/api/health") return jsonResponse(service.health(), service.health().ok === true ? 200 : 503); + if (request.method === "GET" && pathname === "/api/health") { + const health = service.health(); + return jsonResponse(health, health.serving === false ? 503 : 200); + } if (request.method === "GET" && pathname === "/api/status") return jsonResponse(service.status()); if (request.method === "GET" && pathname === "/api/overview") return jsonResponse(service.overview()); if (request.method === "GET" && pathname === "/api/runs") return jsonResponse(service.dashboardRuns(url)); @@ -415,8 +418,9 @@ function serviceHealth(config: WebProbeSentinelServiceConfig, db: Database, sche const heartbeatAt = stringOrNull(scheduler.schedulerHeartbeatAt) ?? stringOrNull(readMetadata(db, "scheduler.heartbeat")?.at); const heartbeatAgeSeconds = heartbeatAt === null ? null : Math.max(0, Math.round((Date.now() - Date.parse(heartbeatAt)) / 1000)); const planned = plannedRunBacklog(config, db); + const schedulerServing = scheduler.schedulerLastError === null && heartbeatAgeSeconds !== null && heartbeatAgeSeconds <= config.schedulerHeartbeatStaleSeconds; checks.scheduler = { - ok: scheduler.schedulerLastError === null && heartbeatAgeSeconds !== null && heartbeatAgeSeconds <= config.schedulerHeartbeatStaleSeconds && !planned.stale, + ok: schedulerServing && !planned.stale, enabled: scheduler.schedulerEnabled === true, active: scheduler.schedulerTimerActive === true, heartbeatAt, @@ -438,7 +442,8 @@ function serviceHealth(config: WebProbeSentinelServiceConfig, db: Database, sche command: `bun scripts/cli.ts web-probe observe analyze --node ${config.node} --lane ${config.lane} --state-dir `, }; const ok = Object.values(checks).every((check) => check.ok === true); - return { ok, status: ok ? "healthy" : "degraded", node: config.node, lane: config.lane, sentinelId: config.sentinelId, checks, valuesRedacted: true }; + const serving = checks.config.ok === true && checks.pvc.ok === true && checks.sqlite.ok === true && schedulerServing; + return { ok, serving, status: ok ? "healthy" : serving ? "degraded" : "unavailable", node: config.node, lane: config.lane, sentinelId: config.sentinelId, checks, valuesRedacted: true }; } function checkWritable(stateRoot: string): Record {