fix: enforce sentinel scheduler hard timeout

This commit is contained in:
Codex
2026-06-27 08:18:50 +00:00
parent ae95719d45
commit 25e2189219
+20 -3
View File
@@ -188,9 +188,11 @@ async function triggerSentinel(options: SchedulerOptions, schedule: SentinelSche
"--timeout-seconds",
String(schedule.timeoutSeconds),
];
const hardTimeoutMs = schedulerHardTimeoutMs(schedule);
const result = await runCommandObserved(command, repoRoot, {
timeoutMs: Math.max(60, schedule.timeoutSeconds + 90) * 1000,
timeoutMs: hardTimeoutMs,
heartbeatMs: 30_000,
killAfterMs: 3_000,
maxCaptureChars: 24_000,
env: { ...process.env, NO_COLOR: "1" },
});
@@ -215,7 +217,7 @@ async function triggerSentinel(options: SchedulerOptions, schedule: SentinelSche
latestRunIdAfter: after.latestRunId,
status,
stdoutTail: tail(result.stdout, 900),
stderrTail: tail(result.stderr, 900),
stderrTail: tail(result.timedOut ? `${result.stderr}\nscheduler hard timeout after ${Math.round(hardTimeoutMs / 1000)}s` : result.stderr, 900),
};
}
@@ -258,6 +260,8 @@ function installSystemd(options: SchedulerOptions): void {
const servicePath = `/etc/systemd/system/${unit}.service`;
const timerPath = `/etc/systemd/system/${unit}.timer`;
const sentinelArg = options.sentinelId === null ? "" : ` --sentinel ${options.sentinelId}`;
const timeoutArg = options.timeoutSeconds === null ? "" : ` --timeout-seconds ${options.timeoutSeconds}`;
const serviceTimeoutSeconds = systemdServiceTimeoutSeconds(options);
const service = `[Unit]
Description=UniDesk web-probe sentinel host cadence scheduler for ${options.node}/${options.lane}
Wants=network-online.target
@@ -265,12 +269,15 @@ After=network-online.target
[Service]
Type=oneshot
TimeoutStartSec=${serviceTimeoutSeconds}s
TimeoutStopSec=15s
KillMode=control-group
Environment=HOME=/root
Environment=PATH=${SYSTEMD_PATH}
Environment=NO_PROXY=${SYSTEMD_NO_PROXY}
Environment=no_proxy=${SYSTEMD_NO_PROXY}
WorkingDirectory=${repoRoot}
ExecStart=${BUN_EXECUTABLE} ${join(repoRoot, "scripts", "web-probe-sentinel-scheduler.ts")} run --node ${options.node} --lane ${options.lane}${sentinelArg} --stale-multiplier ${options.staleMultiplier}
ExecStart=${BUN_EXECUTABLE} ${join(repoRoot, "scripts", "web-probe-sentinel-scheduler.ts")} run --node ${options.node} --lane ${options.lane}${sentinelArg} --stale-multiplier ${options.staleMultiplier}${timeoutArg}
`;
const timer = `[Unit]
Description=Run UniDesk web-probe sentinel host cadence scheduler for ${options.node}/${options.lane}
@@ -301,6 +308,16 @@ WantedBy=timers.target
if (results.some((result) => result.exitCode !== 0)) process.exitCode = 2;
}
function schedulerHardTimeoutMs(schedule: SentinelSchedule): number {
return Math.max(60, schedule.timeoutSeconds) * 1000;
}
function systemdServiceTimeoutSeconds(options: SchedulerOptions): number {
const schedules = sentinelSchedules(specFor(options), options);
const maxTimeout = Math.max(...schedules.map((schedule) => Math.max(60, schedule.timeoutSeconds)));
return maxTimeout + Math.max(30, Math.ceil(options.fetchTimeoutMs / 1000) + 15);
}
function statusSystemd(options: SchedulerOptions): void {
const unit = systemdUnitName(options);
const results = [