diff --git a/.agents/skills/unidesk-monitor/SKILL.md b/.agents/skills/unidesk-monitor/SKILL.md index c0469241..ede94a77 100644 --- a/.agents/skills/unidesk-monitor/SKILL.md +++ b/.agents/skills/unidesk-monitor/SKILL.md @@ -28,9 +28,13 @@ bun scripts/cli.ts web-probe sentinel validate --node D601 --lane v03 --sentinel bun scripts/cli.ts web-probe sentinel dashboard verify --node D601 --lane v03 --sentinel bun scripts/cli.ts web-probe sentinel dashboard screenshot --node D601 --lane v03 --sentinel bun scripts/cli.ts web-probe sentinel report --node D601 --lane v03 --sentinel --latest --view summary +bun scripts/web-probe-sentinel-scheduler.ts status-systemd --node D601 --lane v03 +bun scripts/web-probe-sentinel-scheduler.ts install-systemd --node D601 --lane v03 --confirm bun scripts/web-probe-sentinel-scheduler.ts run --node D601 --lane v03 --sentinel --stale-multiplier 1 --dry-run ``` +`status-systemd` and `install-systemd` without `--sentinel` cover every enabled sentinel from the YAML registry and should expose missing per-sentinel timers; add `--sentinel ` only for a targeted single-sentinel operation. + For long Workbench/user-path evidence, use the normal Web probe surface: ```bash diff --git a/.agents/skills/unidesk-monitor/references/full.md b/.agents/skills/unidesk-monitor/references/full.md index ddaa76df..d003328c 100644 --- a/.agents/skills/unidesk-monitor/references/full.md +++ b/.agents/skills/unidesk-monitor/references/full.md @@ -27,6 +27,16 @@ Freshness-only check: bun scripts/web-probe-sentinel-scheduler.ts run --node D601 --lane v03 --sentinel --stale-multiplier 1 --dry-run ``` +Host timer installation/status: + +```bash +bun scripts/web-probe-sentinel-scheduler.ts status-systemd --node D601 --lane v03 +bun scripts/web-probe-sentinel-scheduler.ts install-systemd --node D601 --lane v03 --confirm +bun scripts/web-probe-sentinel-scheduler.ts status-systemd --node D601 --lane v03 --sentinel +``` + +Without `--sentinel`, `status-systemd` and `install-systemd` enumerate every enabled sentinel from the YAML registry and manage independent per-sentinel timers. Use this when a sentinel's latest run is stale: a missing timer is a runtime defect even if `run --dry-run` can enumerate the sentinel and mark it due. + Dashboard render and screenshot verification: ```bash diff --git a/scripts/web-probe-sentinel-scheduler.ts b/scripts/web-probe-sentinel-scheduler.ts index acc441c8..01ed3417 100644 --- a/scripts/web-probe-sentinel-scheduler.ts +++ b/scripts/web-probe-sentinel-scheduler.ts @@ -55,6 +55,15 @@ interface TriggerResult { readonly stderrTail: string; } +interface SystemdUnitPlan { + readonly sentinelId: string; + readonly unit: string; + readonly servicePath: string; + readonly timerPath: string; + readonly service: string; + readonly timer: string; +} + const DEFAULT_STALE_MULTIPLIER = 1; const DEFAULT_FETCH_TIMEOUT_MS = 15_000; const HOST_SCHEDULER_INTERVAL_SECONDS = 120; @@ -260,14 +269,48 @@ async function readOverview(schedule: SentinelSchedule, timeoutMs: number): Prom } function installSystemd(options: SchedulerOptions): void { - const unit = systemdUnitName(options); + const plans = systemdUnitPlans(options); + const legacyAggregate = options.sentinelId === null ? legacyAggregateSystemdPaths(options) : null; + if (!options.confirm || options.dryRun) { + console.log(JSON.stringify({ + ok: true, + mode: "dry-run", + unitCount: plans.length, + units: plans, + legacyAggregate, + valuesRedacted: true, + }, null, 2)); + return; + } + + for (const plan of plans) { + writeFileSync(plan.servicePath, plan.service, "utf8"); + writeFileSync(plan.timerPath, plan.timer, "utf8"); + } + + const cleanupResults = cleanupLegacyAggregateTimer(legacyAggregate); + const results = [ + ...cleanupResults, + runCommand(["systemctl", "daemon-reload"], "/"), + ...plans.map((plan) => runCommand(["systemctl", "enable", "--now", `${plan.unit}.timer`], "/")), + ]; + printSystemdResult({ units: plans.map(unitSummary), legacyAggregate, results }); + if (results.some((result) => result.exitCode !== 0)) process.exitCode = 2; +} + +function systemdUnitPlans(options: SchedulerOptions): SystemdUnitPlan[] { + const schedules = systemdSchedules(options); + return schedules.map((schedule) => systemdUnitPlan(options, schedule)); +} + +function systemdUnitPlan(options: SchedulerOptions, schedule: SentinelSchedule): SystemdUnitPlan { + const unit = systemdUnitName(options, schedule.sentinelId); const servicePath = `/etc/systemd/system/${unit}.service`; const timerPath = `/etc/systemd/system/${unit}.timer`; - const sentinelArg = options.sentinelId === null ? "" : ` --sentinel ${options.sentinelId}`; const timeoutArg = options.timeoutSeconds === null ? "" : ` --timeout-seconds ${options.timeoutSeconds}`; - const serviceTimeoutSeconds = systemdServiceTimeoutSeconds(options); + const serviceTimeoutSeconds = systemdServiceTimeoutSeconds(schedule, options); const service = `[Unit] -Description=UniDesk web-probe sentinel host cadence scheduler for ${options.node}/${options.lane} +Description=UniDesk web-probe sentinel host cadence scheduler for ${options.node}/${options.lane}/${schedule.sentinelId} Wants=network-online.target After=network-online.target @@ -281,10 +324,10 @@ Environment=PATH=${SYSTEMD_PATH} Environment=NO_PROXY=${SYSTEMD_NO_PROXY} Environment=no_proxy=${SYSTEMD_NO_PROXY} WorkingDirectory=${repoRoot} -ExecStart=${BUN_EXECUTABLE} ${join(repoRoot, "scripts", "web-probe-sentinel-scheduler.ts")} run --node ${options.node} --lane ${options.lane}${sentinelArg} --stale-multiplier ${options.staleMultiplier}${timeoutArg} +ExecStart=${BUN_EXECUTABLE} ${join(repoRoot, "scripts", "web-probe-sentinel-scheduler.ts")} run --node ${options.node} --lane ${options.lane} --sentinel ${schedule.sentinelId} --stale-multiplier ${options.staleMultiplier}${timeoutArg} `; const timer = `[Unit] -Description=Run UniDesk web-probe sentinel host cadence scheduler for ${options.node}/${options.lane} +Description=Run UniDesk web-probe sentinel host cadence scheduler for ${options.node}/${options.lane}/${schedule.sentinelId} [Timer] OnBootSec=${HOST_SCHEDULER_INTERVAL_SECONDS}s @@ -296,50 +339,95 @@ Unit=${unit}.service [Install] WantedBy=timers.target `; - - if (!options.confirm || options.dryRun) { - console.log(JSON.stringify({ ok: true, mode: "dry-run", servicePath, timerPath, service, timer, valuesRedacted: true }, null, 2)); - return; - } - writeFileSync(servicePath, service, "utf8"); - writeFileSync(timerPath, timer, "utf8"); - const commands = [ - ["systemctl", "daemon-reload"], - ["systemctl", "enable", "--now", `${unit}.timer`], - ]; - const results = commands.map((command) => runCommand(command, "/")); - printSystemdResult(unit, servicePath, timerPath, results); - if (results.some((result) => result.exitCode !== 0)) process.exitCode = 2; + return { sentinelId: schedule.sentinelId, unit, servicePath, timerPath, service, timer }; } function schedulerHardTimeoutMs(schedule: SentinelSchedule): number { return Math.max(60, schedule.timeoutSeconds) * 1000; } -function systemdServiceTimeoutSeconds(options: SchedulerOptions): number { - const schedules = sentinelSchedules(specFor(options), options); - const maxTimeout = Math.max(...schedules.map((schedule) => Math.max(60, schedule.timeoutSeconds))); - return maxTimeout + Math.max(30, Math.ceil(options.fetchTimeoutMs / 1000) + 15); +function systemdServiceTimeoutSeconds(schedule: SentinelSchedule, options: SchedulerOptions): number { + return Math.max(60, schedule.timeoutSeconds) + Math.max(30, Math.ceil(options.fetchTimeoutMs / 1000) + 15); } function statusSystemd(options: SchedulerOptions): void { - const unit = systemdUnitName(options); - const results = [ - runCommand(["systemctl", "is-enabled", `${unit}.timer`], "/"), - runCommand(["systemctl", "is-active", `${unit}.timer`], "/"), - runCommand(["systemctl", "show", `${unit}.timer`, "--property=NextElapseUSecRealtime", "--property=LastTriggerUSec"], "/"), - ]; - printSystemdResult(unit, `/etc/systemd/system/${unit}.service`, `/etc/systemd/system/${unit}.timer`, results); - if (results[1]?.exitCode !== 0) process.exitCode = 2; + const plans = systemdUnitPlans(options); + const results = plans.flatMap((plan) => [ + runCommand(["systemctl", "is-enabled", `${plan.unit}.timer`], "/"), + runCommand(["systemctl", "is-active", `${plan.unit}.timer`], "/"), + runCommand(["systemctl", "show", `${plan.unit}.timer`, "--property=NextElapseUSecRealtime", "--property=LastTriggerUSec"], "/"), + ]); + printSystemdResult({ + units: plans.map(unitSummary), + legacyAggregate: options.sentinelId === null ? legacyAggregateSystemdPaths(options) : null, + results, + }); + if (results.some((result) => result.exitCode !== 0)) process.exitCode = 2; } -function printSystemdResult(unit: string, servicePath: string, timerPath: string, results: readonly CommandResult[]): void { - console.log(JSON.stringify({ - ok: results.every((result) => result.exitCode === 0), +function systemdSchedules(options: SchedulerOptions): SentinelSchedule[] { + const schedules = sentinelSchedules(specFor(options), options); + const selected = options.sentinelId === null ? schedules.filter((schedule) => schedule.enabled) : schedules; + if (selected.length === 0) throw new Error(`no enabled sentinels found for ${options.node}/${options.lane}`); + return selected; +} + +function legacyAggregateSystemdPaths(options: SchedulerOptions): { unit: string; servicePath: string; timerPath: string } { + const unit = systemdUnitName(options, null); + return { unit, - servicePath, - timerPath, - results: results.map(compactCommand), + servicePath: `/etc/systemd/system/${unit}.service`, + timerPath: `/etc/systemd/system/${unit}.timer`, + }; +} + +function cleanupLegacyAggregateTimer(legacyAggregate: { unit: string; servicePath: string; timerPath: string } | null): CommandResult[] { + if (legacyAggregate === null) return []; + if (!existsSync(legacyAggregate.servicePath) && !existsSync(legacyAggregate.timerPath)) return []; + const results = [runCommand(["systemctl", "disable", "--now", `${legacyAggregate.unit}.timer`], "/")]; + for (const path of [legacyAggregate.servicePath, legacyAggregate.timerPath]) { + if (!existsSync(path)) continue; + try { + unlinkSync(path); + } catch (error) { + results.push(syntheticCommandResult(["unlink", path], error)); + } + } + return results; +} + +function syntheticCommandResult(command: string[], error: unknown): CommandResult { + return { + command, + cwd: "/", + exitCode: 1, + stdout: "", + stderr: error instanceof Error ? error.message : String(error), + signal: null, + timedOut: false, + }; +} + +function unitSummary(plan: SystemdUnitPlan): Record { + return { + sentinelId: plan.sentinelId, + unit: plan.unit, + servicePath: plan.servicePath, + timerPath: plan.timerPath, + }; +} + +function printSystemdResult(payload: { + readonly units: readonly Record[]; + readonly legacyAggregate: { unit: string; servicePath: string; timerPath: string } | null; + readonly results: readonly CommandResult[]; +}): void { + console.log(JSON.stringify({ + ok: payload.results.every((result) => result.exitCode === 0), + unitCount: payload.units.length, + units: payload.units, + legacyAggregate: payload.legacyAggregate, + results: payload.results.map(compactCommand), valuesRedacted: true, }, null, 2)); } @@ -464,8 +552,8 @@ function parseArgs(argv: readonly string[]): SchedulerOptions { function printUsage(): void { console.log(`Usage: bun scripts/web-probe-sentinel-scheduler.ts run [--node D601] [--lane v03] [--sentinel ID] [--dry-run] [--force] - bun scripts/web-probe-sentinel-scheduler.ts install-systemd --node D601 --lane v03 --confirm - bun scripts/web-probe-sentinel-scheduler.ts status-systemd --node D601 --lane v03 + bun scripts/web-probe-sentinel-scheduler.ts install-systemd --node D601 --lane v03 [--sentinel ID] --confirm + bun scripts/web-probe-sentinel-scheduler.ts status-systemd --node D601 --lane v03 [--sentinel ID] `); } @@ -576,8 +664,8 @@ function tail(value: string, maxChars: number): string { return value.length <= maxChars ? value : value.slice(-maxChars); } -function systemdUnitName(options: SchedulerOptions): string { - const sentinel = options.sentinelId === null ? "" : `-${safeSegment(options.sentinelId)}`; +function systemdUnitName(options: SchedulerOptions, sentinelId: string | null = options.sentinelId): string { + const sentinel = sentinelId === null ? "" : `-${safeSegment(sentinelId)}`; return `unidesk-web-probe-sentinel-scheduler-${safeSegment(options.node)}-${safeSegment(options.lane)}${sentinel}`; }