Merge pull request #1183 from pikasTech/fix/1181-sentinel-health-serving
fix: keep sentinel health serving during backlog alerts
This commit is contained in:
@@ -282,7 +282,10 @@ export function startWebProbeSentinelHttpService(service: WebProbeSentinelServic
|
||||
async function sentinelFetch(service: WebProbeSentinelService, request: Request): Promise<Response> {
|
||||
const url = new URL(request.url);
|
||||
const pathname = normalizedSentinelRequestPath(service, url.pathname);
|
||||
if (request.method === "GET" && pathname === "/api/health") return jsonResponse(service.health(), service.health().ok === true ? 200 : 503);
|
||||
if (request.method === "GET" && pathname === "/api/health") {
|
||||
const health = service.health();
|
||||
return jsonResponse(health, health.serving === false ? 503 : 200);
|
||||
}
|
||||
if (request.method === "GET" && pathname === "/api/status") return jsonResponse(service.status());
|
||||
if (request.method === "GET" && pathname === "/api/overview") return jsonResponse(service.overview());
|
||||
if (request.method === "GET" && pathname === "/api/runs") return jsonResponse(service.dashboardRuns(url));
|
||||
@@ -415,8 +418,9 @@ function serviceHealth(config: WebProbeSentinelServiceConfig, db: Database, sche
|
||||
const heartbeatAt = stringOrNull(scheduler.schedulerHeartbeatAt) ?? stringOrNull(readMetadata(db, "scheduler.heartbeat")?.at);
|
||||
const heartbeatAgeSeconds = heartbeatAt === null ? null : Math.max(0, Math.round((Date.now() - Date.parse(heartbeatAt)) / 1000));
|
||||
const planned = plannedRunBacklog(config, db);
|
||||
const schedulerServing = scheduler.schedulerLastError === null && heartbeatAgeSeconds !== null && heartbeatAgeSeconds <= config.schedulerHeartbeatStaleSeconds;
|
||||
checks.scheduler = {
|
||||
ok: scheduler.schedulerLastError === null && heartbeatAgeSeconds !== null && heartbeatAgeSeconds <= config.schedulerHeartbeatStaleSeconds && !planned.stale,
|
||||
ok: schedulerServing && !planned.stale,
|
||||
enabled: scheduler.schedulerEnabled === true,
|
||||
active: scheduler.schedulerTimerActive === true,
|
||||
heartbeatAt,
|
||||
@@ -438,7 +442,8 @@ function serviceHealth(config: WebProbeSentinelServiceConfig, db: Database, sche
|
||||
command: `bun scripts/cli.ts web-probe observe analyze --node ${config.node} --lane ${config.lane} --state-dir <stateDir>`,
|
||||
};
|
||||
const ok = Object.values(checks).every((check) => check.ok === true);
|
||||
return { ok, status: ok ? "healthy" : "degraded", node: config.node, lane: config.lane, sentinelId: config.sentinelId, checks, valuesRedacted: true };
|
||||
const serving = checks.config.ok === true && checks.pvc.ok === true && checks.sqlite.ok === true && schedulerServing;
|
||||
return { ok, serving, status: ok ? "healthy" : serving ? "degraded" : "unavailable", node: config.node, lane: config.lane, sentinelId: config.sentinelId, checks, valuesRedacted: true };
|
||||
}
|
||||
|
||||
function checkWritable(stateRoot: string): Record<string, unknown> {
|
||||
|
||||
Reference in New Issue
Block a user