fix: install timers for all web sentinels
This commit is contained in:
@@ -55,6 +55,15 @@ interface TriggerResult {
|
||||
readonly stderrTail: string;
|
||||
}
|
||||
|
||||
interface SystemdUnitPlan {
|
||||
readonly sentinelId: string;
|
||||
readonly unit: string;
|
||||
readonly servicePath: string;
|
||||
readonly timerPath: string;
|
||||
readonly service: string;
|
||||
readonly timer: string;
|
||||
}
|
||||
|
||||
const DEFAULT_STALE_MULTIPLIER = 1;
|
||||
const DEFAULT_FETCH_TIMEOUT_MS = 15_000;
|
||||
const HOST_SCHEDULER_INTERVAL_SECONDS = 120;
|
||||
@@ -260,14 +269,48 @@ async function readOverview(schedule: SentinelSchedule, timeoutMs: number): Prom
|
||||
}
|
||||
|
||||
function installSystemd(options: SchedulerOptions): void {
|
||||
const unit = systemdUnitName(options);
|
||||
const plans = systemdUnitPlans(options);
|
||||
const legacyAggregate = options.sentinelId === null ? legacyAggregateSystemdPaths(options) : null;
|
||||
if (!options.confirm || options.dryRun) {
|
||||
console.log(JSON.stringify({
|
||||
ok: true,
|
||||
mode: "dry-run",
|
||||
unitCount: plans.length,
|
||||
units: plans,
|
||||
legacyAggregate,
|
||||
valuesRedacted: true,
|
||||
}, null, 2));
|
||||
return;
|
||||
}
|
||||
|
||||
for (const plan of plans) {
|
||||
writeFileSync(plan.servicePath, plan.service, "utf8");
|
||||
writeFileSync(plan.timerPath, plan.timer, "utf8");
|
||||
}
|
||||
|
||||
const cleanupResults = cleanupLegacyAggregateTimer(legacyAggregate);
|
||||
const results = [
|
||||
...cleanupResults,
|
||||
runCommand(["systemctl", "daemon-reload"], "/"),
|
||||
...plans.map((plan) => runCommand(["systemctl", "enable", "--now", `${plan.unit}.timer`], "/")),
|
||||
];
|
||||
printSystemdResult({ units: plans.map(unitSummary), legacyAggregate, results });
|
||||
if (results.some((result) => result.exitCode !== 0)) process.exitCode = 2;
|
||||
}
|
||||
|
||||
function systemdUnitPlans(options: SchedulerOptions): SystemdUnitPlan[] {
|
||||
const schedules = systemdSchedules(options);
|
||||
return schedules.map((schedule) => systemdUnitPlan(options, schedule));
|
||||
}
|
||||
|
||||
function systemdUnitPlan(options: SchedulerOptions, schedule: SentinelSchedule): SystemdUnitPlan {
|
||||
const unit = systemdUnitName(options, schedule.sentinelId);
|
||||
const servicePath = `/etc/systemd/system/${unit}.service`;
|
||||
const timerPath = `/etc/systemd/system/${unit}.timer`;
|
||||
const sentinelArg = options.sentinelId === null ? "" : ` --sentinel ${options.sentinelId}`;
|
||||
const timeoutArg = options.timeoutSeconds === null ? "" : ` --timeout-seconds ${options.timeoutSeconds}`;
|
||||
const serviceTimeoutSeconds = systemdServiceTimeoutSeconds(options);
|
||||
const serviceTimeoutSeconds = systemdServiceTimeoutSeconds(schedule, options);
|
||||
const service = `[Unit]
|
||||
Description=UniDesk web-probe sentinel host cadence scheduler for ${options.node}/${options.lane}
|
||||
Description=UniDesk web-probe sentinel host cadence scheduler for ${options.node}/${options.lane}/${schedule.sentinelId}
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
@@ -281,10 +324,10 @@ Environment=PATH=${SYSTEMD_PATH}
|
||||
Environment=NO_PROXY=${SYSTEMD_NO_PROXY}
|
||||
Environment=no_proxy=${SYSTEMD_NO_PROXY}
|
||||
WorkingDirectory=${repoRoot}
|
||||
ExecStart=${BUN_EXECUTABLE} ${join(repoRoot, "scripts", "web-probe-sentinel-scheduler.ts")} run --node ${options.node} --lane ${options.lane}${sentinelArg} --stale-multiplier ${options.staleMultiplier}${timeoutArg}
|
||||
ExecStart=${BUN_EXECUTABLE} ${join(repoRoot, "scripts", "web-probe-sentinel-scheduler.ts")} run --node ${options.node} --lane ${options.lane} --sentinel ${schedule.sentinelId} --stale-multiplier ${options.staleMultiplier}${timeoutArg}
|
||||
`;
|
||||
const timer = `[Unit]
|
||||
Description=Run UniDesk web-probe sentinel host cadence scheduler for ${options.node}/${options.lane}
|
||||
Description=Run UniDesk web-probe sentinel host cadence scheduler for ${options.node}/${options.lane}/${schedule.sentinelId}
|
||||
|
||||
[Timer]
|
||||
OnBootSec=${HOST_SCHEDULER_INTERVAL_SECONDS}s
|
||||
@@ -296,50 +339,95 @@ Unit=${unit}.service
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
`;
|
||||
|
||||
if (!options.confirm || options.dryRun) {
|
||||
console.log(JSON.stringify({ ok: true, mode: "dry-run", servicePath, timerPath, service, timer, valuesRedacted: true }, null, 2));
|
||||
return;
|
||||
}
|
||||
writeFileSync(servicePath, service, "utf8");
|
||||
writeFileSync(timerPath, timer, "utf8");
|
||||
const commands = [
|
||||
["systemctl", "daemon-reload"],
|
||||
["systemctl", "enable", "--now", `${unit}.timer`],
|
||||
];
|
||||
const results = commands.map((command) => runCommand(command, "/"));
|
||||
printSystemdResult(unit, servicePath, timerPath, results);
|
||||
if (results.some((result) => result.exitCode !== 0)) process.exitCode = 2;
|
||||
return { sentinelId: schedule.sentinelId, unit, servicePath, timerPath, service, timer };
|
||||
}
|
||||
|
||||
function schedulerHardTimeoutMs(schedule: SentinelSchedule): number {
|
||||
return Math.max(60, schedule.timeoutSeconds) * 1000;
|
||||
}
|
||||
|
||||
function systemdServiceTimeoutSeconds(options: SchedulerOptions): number {
|
||||
const schedules = sentinelSchedules(specFor(options), options);
|
||||
const maxTimeout = Math.max(...schedules.map((schedule) => Math.max(60, schedule.timeoutSeconds)));
|
||||
return maxTimeout + Math.max(30, Math.ceil(options.fetchTimeoutMs / 1000) + 15);
|
||||
function systemdServiceTimeoutSeconds(schedule: SentinelSchedule, options: SchedulerOptions): number {
|
||||
return Math.max(60, schedule.timeoutSeconds) + Math.max(30, Math.ceil(options.fetchTimeoutMs / 1000) + 15);
|
||||
}
|
||||
|
||||
function statusSystemd(options: SchedulerOptions): void {
|
||||
const unit = systemdUnitName(options);
|
||||
const results = [
|
||||
runCommand(["systemctl", "is-enabled", `${unit}.timer`], "/"),
|
||||
runCommand(["systemctl", "is-active", `${unit}.timer`], "/"),
|
||||
runCommand(["systemctl", "show", `${unit}.timer`, "--property=NextElapseUSecRealtime", "--property=LastTriggerUSec"], "/"),
|
||||
];
|
||||
printSystemdResult(unit, `/etc/systemd/system/${unit}.service`, `/etc/systemd/system/${unit}.timer`, results);
|
||||
if (results[1]?.exitCode !== 0) process.exitCode = 2;
|
||||
const plans = systemdUnitPlans(options);
|
||||
const results = plans.flatMap((plan) => [
|
||||
runCommand(["systemctl", "is-enabled", `${plan.unit}.timer`], "/"),
|
||||
runCommand(["systemctl", "is-active", `${plan.unit}.timer`], "/"),
|
||||
runCommand(["systemctl", "show", `${plan.unit}.timer`, "--property=NextElapseUSecRealtime", "--property=LastTriggerUSec"], "/"),
|
||||
]);
|
||||
printSystemdResult({
|
||||
units: plans.map(unitSummary),
|
||||
legacyAggregate: options.sentinelId === null ? legacyAggregateSystemdPaths(options) : null,
|
||||
results,
|
||||
});
|
||||
if (results.some((result) => result.exitCode !== 0)) process.exitCode = 2;
|
||||
}
|
||||
|
||||
function printSystemdResult(unit: string, servicePath: string, timerPath: string, results: readonly CommandResult[]): void {
|
||||
console.log(JSON.stringify({
|
||||
ok: results.every((result) => result.exitCode === 0),
|
||||
function systemdSchedules(options: SchedulerOptions): SentinelSchedule[] {
|
||||
const schedules = sentinelSchedules(specFor(options), options);
|
||||
const selected = options.sentinelId === null ? schedules.filter((schedule) => schedule.enabled) : schedules;
|
||||
if (selected.length === 0) throw new Error(`no enabled sentinels found for ${options.node}/${options.lane}`);
|
||||
return selected;
|
||||
}
|
||||
|
||||
function legacyAggregateSystemdPaths(options: SchedulerOptions): { unit: string; servicePath: string; timerPath: string } {
|
||||
const unit = systemdUnitName(options, null);
|
||||
return {
|
||||
unit,
|
||||
servicePath,
|
||||
timerPath,
|
||||
results: results.map(compactCommand),
|
||||
servicePath: `/etc/systemd/system/${unit}.service`,
|
||||
timerPath: `/etc/systemd/system/${unit}.timer`,
|
||||
};
|
||||
}
|
||||
|
||||
function cleanupLegacyAggregateTimer(legacyAggregate: { unit: string; servicePath: string; timerPath: string } | null): CommandResult[] {
|
||||
if (legacyAggregate === null) return [];
|
||||
if (!existsSync(legacyAggregate.servicePath) && !existsSync(legacyAggregate.timerPath)) return [];
|
||||
const results = [runCommand(["systemctl", "disable", "--now", `${legacyAggregate.unit}.timer`], "/")];
|
||||
for (const path of [legacyAggregate.servicePath, legacyAggregate.timerPath]) {
|
||||
if (!existsSync(path)) continue;
|
||||
try {
|
||||
unlinkSync(path);
|
||||
} catch (error) {
|
||||
results.push(syntheticCommandResult(["unlink", path], error));
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
function syntheticCommandResult(command: string[], error: unknown): CommandResult {
|
||||
return {
|
||||
command,
|
||||
cwd: "/",
|
||||
exitCode: 1,
|
||||
stdout: "",
|
||||
stderr: error instanceof Error ? error.message : String(error),
|
||||
signal: null,
|
||||
timedOut: false,
|
||||
};
|
||||
}
|
||||
|
||||
function unitSummary(plan: SystemdUnitPlan): Record<string, unknown> {
|
||||
return {
|
||||
sentinelId: plan.sentinelId,
|
||||
unit: plan.unit,
|
||||
servicePath: plan.servicePath,
|
||||
timerPath: plan.timerPath,
|
||||
};
|
||||
}
|
||||
|
||||
function printSystemdResult(payload: {
|
||||
readonly units: readonly Record<string, unknown>[];
|
||||
readonly legacyAggregate: { unit: string; servicePath: string; timerPath: string } | null;
|
||||
readonly results: readonly CommandResult[];
|
||||
}): void {
|
||||
console.log(JSON.stringify({
|
||||
ok: payload.results.every((result) => result.exitCode === 0),
|
||||
unitCount: payload.units.length,
|
||||
units: payload.units,
|
||||
legacyAggregate: payload.legacyAggregate,
|
||||
results: payload.results.map(compactCommand),
|
||||
valuesRedacted: true,
|
||||
}, null, 2));
|
||||
}
|
||||
@@ -464,8 +552,8 @@ function parseArgs(argv: readonly string[]): SchedulerOptions {
|
||||
function printUsage(): void {
|
||||
console.log(`Usage:
|
||||
bun scripts/web-probe-sentinel-scheduler.ts run [--node D601] [--lane v03] [--sentinel ID] [--dry-run] [--force]
|
||||
bun scripts/web-probe-sentinel-scheduler.ts install-systemd --node D601 --lane v03 --confirm
|
||||
bun scripts/web-probe-sentinel-scheduler.ts status-systemd --node D601 --lane v03
|
||||
bun scripts/web-probe-sentinel-scheduler.ts install-systemd --node D601 --lane v03 [--sentinel ID] --confirm
|
||||
bun scripts/web-probe-sentinel-scheduler.ts status-systemd --node D601 --lane v03 [--sentinel ID]
|
||||
`);
|
||||
}
|
||||
|
||||
@@ -576,8 +664,8 @@ function tail(value: string, maxChars: number): string {
|
||||
return value.length <= maxChars ? value : value.slice(-maxChars);
|
||||
}
|
||||
|
||||
function systemdUnitName(options: SchedulerOptions): string {
|
||||
const sentinel = options.sentinelId === null ? "" : `-${safeSegment(options.sentinelId)}`;
|
||||
function systemdUnitName(options: SchedulerOptions, sentinelId: string | null = options.sentinelId): string {
|
||||
const sentinel = sentinelId === null ? "" : `-${safeSegment(sentinelId)}`;
|
||||
return `unidesk-web-probe-sentinel-scheduler-${safeSegment(options.node)}-${safeSegment(options.lane)}${sentinel}`;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user