From 9ffb781c8bb9c5012e01fdbfd0409eb6bb034168 Mon Sep 17 00:00:00 2001 From: Codex Date: Sat, 27 Jun 2026 07:28:31 +0000 Subject: [PATCH] fix: restore web sentinel cadence and monitor hover --- .../PJ2026-01060508-web-probe-sentinel.md | 8 +- .../monitor-web.css | 65 +- .../monitor-web.js | 94 ++- scripts/src/hwlab-node-web-sentinel-cicd.ts | 69 +++ scripts/web-probe-sentinel-scheduler.ts | 554 ++++++++++++++++++ 5 files changed, 775 insertions(+), 15 deletions(-) create mode 100644 scripts/web-probe-sentinel-scheduler.ts diff --git a/project-management/PJ2026-01/specs/PJ2026-01060508-web-probe-sentinel.md b/project-management/PJ2026-01/specs/PJ2026-01060508-web-probe-sentinel.md index 52cafef9..f7ddf42d 100644 --- a/project-management/PJ2026-01/specs/PJ2026-01060508-web-probe-sentinel.md +++ b/project-management/PJ2026-01/specs/PJ2026-01060508-web-probe-sentinel.md @@ -23,7 +23,7 @@ | Dashboard 实现引用版本 | draft-2026-06-26-p8-web-probe-sentinel-recovery | | 多实例实现引用版本 | draft-2026-06-26-p9-multi-web-probe-sentinel | | Monitor Web 聚合实现引用版本 | draft-2026-06-26-p10-monitor-web-aggregation | -| Monitor Web 观察面板治理实现引用版本 | draft-2026-06-27-p11-monitor-web-observability-dashboard | +| Monitor Web 观察面板治理实现引用版本 | draft-2026-06-27-p11-monitor-web-observability-dashboard; draft-2026-06-27-p12-cadence-scheduler-monitor-web | | 需求规格模板 | [ISO/IEC/IEEE 29148 需求规格模板](../../templates/iso-iec-ieee-29148-requirements-spec-template.md) | | 上级规格 | [PJ2026-010605 运维监控](PJ2026-010605-observability-monitoring.md) | | 关联规格 | [PJ2026-010401 Web工作台](PJ2026-010401-web-workbench.md)、[PJ2026-0104010803 Workbench唯一投影](PJ2026-0104010803-workbench-unique-projection.md)、[PJ2026-010403 API契约](PJ2026-010403-api-contract.md)、[PJ2026-010601 发布流水](PJ2026-010601-controlled-release.md)、[PJ2026-010602 源码同步](PJ2026-010602-source-sync.md)、[PJ2026-010603 YAML运维](PJ2026-010603-yaml-first-ops.md)、[PJ2026-010604 公开入口](PJ2026-010604-public-entry.md)、[PJ2026-01060505 Workbench性能](PJ2026-01060505-workbench-performance.md) | @@ -639,6 +639,10 @@ P10 dashboard 受控验收必须沉淀为 CLI 入口。`web-probe sentinel dashb cadence freshness 必须成为 `monitor-web` 的一等状态。每个 sentinel 应显示 YAML expected cadence、scheduler heartbeat age、latest run age、latest analyzed report age、active run、planned/next run 和 stale 倍数。cadence stale 默认是非阻塞告警;只有 scheduler 停摆、run/report 长时间不产生、submit/command 失败、采样样本缺失、或 Code Agent 多轮业务链路不可继续时,才升级为 blocker。面板不得把 timing warning、terminal-boundary elapsed correction 或单纯超时预算告警伪装成业务 blocker。 +P12 runner-served-bridge 形态下,sentinel runner Pod 可以只承载 API、PVC/SQLite index、health、metrics 和 dashboard 静态资源;若 Pod 内没有完整 repo 配置、`trans`、Chromium 或 observe 依赖,不得让 Pod 自行 SSH/回调宿主机触发巡检。周期巡检必须由受控宿主控制面调度器读取同一 YAML registry、scenario/workflow cadence、publicExposure 和 targetValidation timeout,按 stale 窗口触发现有 `web-probe sentinel validate --quick-verify --confirm --wait` 路径。该调度器只负责 due 判断、互斥锁、timeout、命令执行和 JSONL 事件日志,不实现第二套采样、analyze、finding 分类或 report 写入。 + +宿主控制面调度器必须能被 systemd timer 或等价受控入口周期调用,默认 tick 间隔不得替代 YAML cadence;每次 tick 必须输出 sentinel id、cadence、latest run age、due、trigger status、latest run id 和下一步 drill-down。触发失败要区分业务 finding、命令 submit/control 失败、overview/API 不可达、lock-held 和 timeout;业务 finding 已产生新 run 时不得把 scheduler 本身标为 infra blocker。`monitor-web` 应继续把 stale run 作为非阻塞告警展示,但 run/report 持续不更新或 submit/control 失败必须能在面板和 CLI 中直接看到根因。 + `monitor-web` 前端必须使用 Vue 3 + TypeScript + Vite,并与 HWLAB Cloud Web/Sub2API 运维图表的组件化方式对齐:typed API client、format composable、auto refresh composable、chart component、timeline component、run table、detail tabs、finding groups、loading/empty/error 状态和深链路由。图表库不是前置结论;可选 Chart.js、ECharts 或原生 SVG/canvas,但 SPEC/PR 必须说明包体、构建耗时、交互能力和维护成本取舍。 Vue `monitor-web` 的 CI/CD 必须和架构一起交付。YAML 必须声明 source、build context、Node/Bun/Vite 构建环境、env image、dependency cache、registry image、GitOps path、Argo Application、Service、publicExposure、runner discovery selector 和 screenshot 验收命令。CI 读源码必须优先走 node/lane 声明的 git mirror read URL;触发 PipelineRun 前做受控 pre-sync,GitOps promotion 后做受控 post-flush,并在 status/closeout 中输出 `pendingFlush`、`githubInSync`、source commit、GitOps revision 和 PipelineRun 名称。 @@ -666,3 +670,5 @@ P9 多实例巡检与账号切换链路执行 issue 为 [#1017](https://github.c P10 monitor-web 聚合执行 issue 为 [#1056](https://github.com/pikasTech/unidesk/issues/1056)。P10 closeout 必须回写:SPEC P10 引用、runner/web 职责拆分状态、Vue+TS monitor-web 迁移边界或短修边界、root 与至少一个 route prefix 的 browser render 证据、`web-probe sentinel dashboard verify|screenshot` 证据、publicExposure 和 runtime provenance、单哨兵 API 兼容性、未完成 monitor-web 架构项是否拆出后续 issue,以及 `unidesk-monitor` skill 是否记录当前操作面。 P11 monitor-web 观察面板治理执行 issue 为 [#1112](https://github.com/pikasTech/unidesk/issues/1112)。P11 的第一阶段必须先完成本 SPEC 收敛;SPEC 未合并前不得推进 Vue `monitor-web` 实现、CI/CD、GitOps、publicExposure 或部署代码。P11 实现 PR closeout 必须回写:SPEC P11 引用、Vue monitor-web 源码和 CI/CD 文件头部追溯、趋势曲线和运行时间线截图、固定视口三栏 overflow 摘要、cadence freshness 状态、env reuse/buildServices 证据、git mirror pre-sync/post-flush 证据、PipelineRun/Argo/GitOps/source alignment、root 与至少一个 sentinel detail 远程截图 localPath/SHA,以及超过两分钟 CI/CD 耗时是否已先从 env reuse/git mirror 方向优化。 + +P12 cadence 调度和 monitor-web 交互修复执行 issue 为 [#1123](https://github.com/pikasTech/unidesk/issues/1123)。P12 closeout 必须回写:SPEC P12 引用、两个 10m cadence sentinel 的 stale 证据、宿主控制面调度器 due 判断和触发记录、auth sentinel Argo/source alignment、趋势曲线 hover 数值和时间截图/DOM 证据、三栏 sticky header 遮盖复测、远程 PNG localPath/SHA、systemd timer 状态、以及两个目标 sentinel 最新 run 已刷新到当前窗口的证据。 diff --git a/scripts/assets/web-probe-sentinel-monitor-web/monitor-web.css b/scripts/assets/web-probe-sentinel-monitor-web/monitor-web.css index 4e3bf213..56f509f3 100644 --- a/scripts/assets/web-probe-sentinel-monitor-web/monitor-web.css +++ b/scripts/assets/web-probe-sentinel-monitor-web/monitor-web.css @@ -1,3 +1,4 @@ +/* SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-27-p12-cadence-scheduler-monitor-web. */ :root { color-scheme: light; --bg: #f5f7f8; @@ -320,7 +321,7 @@ select { border: 1px solid var(--line); border-radius: 8px; background: linear-gradient(180deg, #ffffff 0%, #f7faf9 100%); - overflow: hidden; + overflow: visible; } .trend-chart { @@ -377,6 +378,47 @@ select { fill: var(--amber); } +.trend-dot-hit { + cursor: default; + outline: none; +} + +.trend-dot-hit:focus-visible .trend-hit-area { + stroke: var(--blue); + stroke-width: 1.5; +} + +.trend-hit-area { + fill: transparent; + stroke: transparent; +} + +.trend-tooltip { + position: absolute; + z-index: 8; + display: grid; + width: min(218px, calc(100% - 16px)); + max-width: calc(100% - 16px); + gap: 3px; + transform: translate(-50%, -100%); + border: 1px solid var(--line-strong); + border-radius: 8px; + background: #ffffff; + box-shadow: 0 12px 28px rgba(32, 51, 48, 0.16); + color: var(--muted); + padding: 9px 10px; + pointer-events: none; + font-size: 12px; + line-height: 1.35; +} + +.trend-tooltip strong { + overflow: hidden; + color: var(--text); + text-overflow: ellipsis; + white-space: nowrap; +} + .trend-legend { display: flex; flex-wrap: wrap; @@ -513,6 +555,8 @@ select { } .pane { + position: relative; + isolation: isolate; display: flex; min-width: 0; min-height: 0; @@ -523,16 +567,27 @@ select { .pane-header { position: sticky; - top: 0; - z-index: 2; + top: -12px; + z-index: 12; display: flex; align-items: start; justify-content: space-between; gap: 10px; margin: -12px -12px 10px; - padding: 12px; + padding: 12px 12px 13px; border-bottom: 1px solid var(--line); - background: rgba(255, 255, 255, 0.96); + background: var(--panel); + box-shadow: 0 10px 16px rgba(32, 51, 48, 0.08); +} + +.pane-header::before { + position: absolute; + right: 0; + left: 0; + top: -18px; + height: 18px; + background: var(--panel); + content: ""; } .pane-header h2 { diff --git a/scripts/assets/web-probe-sentinel-monitor-web/monitor-web.js b/scripts/assets/web-probe-sentinel-monitor-web/monitor-web.js index 7a589aec..977dad57 100644 --- a/scripts/assets/web-probe-sentinel-monitor-web/monitor-web.js +++ b/scripts/assets/web-probe-sentinel-monitor-web/monitor-web.js @@ -1,3 +1,5 @@ +// SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-27-p12-cadence-scheduler-monitor-web. +// Responsibility: Vue monitor-web runtime for sentinel trend, timeline, detail and finding observability. import { createApp, computed, onMounted, ref } from "./vendor/vue.esm-browser.prod.js"; const bootstrap = readBootstrap(); @@ -17,6 +19,7 @@ createApp({ const autoRefresh = ref(true); const refreshSeconds = ref(30); const lastLoadedAt = ref(""); + const hoveredTrendDot = ref(null); let lastAutoRefreshAt = 0; const sentinels = computed(() => { @@ -43,14 +46,34 @@ createApp({ warning: trendPolyline((run) => warningCount(run)), total: trendPolyline((run) => findingCount(run)), })); - const trendDots = computed(() => trendRows.value.map((run, index) => ({ - id: run.id || String(index), - x: trendX(index, trendRows.value.length), - redY: trendY(redCount(run)), - warningY: trendY(warningCount(run)), - severity: severityClass(run), - title: `${shortId(run.id)} ${formatDate(run.updatedAt || run.createdAt)}`, - }))); + const trendDots = computed(() => trendRows.value.map((run, index) => { + const red = redCount(run); + const warning = warningCount(run); + const total = findingCount(run); + const x = trendX(index, trendRows.value.length); + const redY = trendY(red); + const warningY = trendY(warning); + const rawTime = run.updatedAt || run.createdAt || ""; + return { + id: run.id || String(index), + runId: run.id || "", + x, + redY, + warningY, + tooltipLeft: `${clamp((x / 720) * 100, 16, 84)}%`, + tooltipTop: `${clamp(((Math.min(redY, warningY) + 18) / 142) * 100, 24, 76)}%`, + red, + warning, + total, + status: run.status || "-", + severity: severityClass(run), + rawTime, + timeLabel: formatDate(rawTime), + absoluteTime: formatAbsoluteDate(rawTime), + reportSha: shortHash(run.reportJsonSha256 || run.report_json_sha256 || run.reportSha256 || ""), + title: `${shortId(run.id)} ${formatAbsoluteDate(rawTime)} 红色 ${red} 警告 ${warning} 总量 ${total}`, + }; + })); const timelineRuns = computed(() => runs.value.slice(0, 16)); const rootCauseFindings = computed(() => { const rows = findings.value.filter((item) => item.rootCause || item.nextAction || ["red", "warning"].includes(severityClass(item))); @@ -142,6 +165,14 @@ createApp({ return Math.round(126 - (Number(value || 0) / trendMax.value) * 102); } + function showTrendTooltip(dot) { + hoveredTrendDot.value = dot; + } + + function hideTrendTooltip() { + hoveredTrendDot.value = null; + } + onMounted(() => { void loadAll(); window.setInterval(() => { @@ -165,6 +196,7 @@ createApp({ autoRefresh, refreshSeconds, lastLoadedAt, + hoveredTrendDot, sentinels, currentStatus, latestRun, @@ -182,11 +214,14 @@ createApp({ selectRun, refreshNow, currentHref, + showTrendTooltip, + hideTrendTooltip, redCount, warningCount, findingCount, severityClass, formatDate, + formatAbsoluteDate, formatDuration, shortId, rootCauseText, @@ -258,7 +293,19 @@ createApp({ - + + {{ dot.title }} @@ -267,6 +314,18 @@ createApp({ +
+ {{ shortId(hoveredTrendDot.runId) }} + {{ hoveredTrendDot.absoluteTime }} + 状态 {{ hoveredTrendDot.status }} + 红色 {{ hoveredTrendDot.red }} / 警告 {{ hoveredTrendDot.warning }} / 总量 {{ hoveredTrendDot.total }} + report {{ hoveredTrendDot.reportSha }} +
暂无运行数据
@@ -515,6 +574,13 @@ function formatDate(value) { return date.toISOString().slice(5, 16).replace("T", " "); } +function formatAbsoluteDate(value) { + if (!value) return "-"; + const date = new Date(value); + if (Number.isNaN(date.getTime())) return String(value); + return `${date.toISOString().slice(0, 19).replace("T", " ")} UTC`; +} + function formatDuration(seconds) { const value = Math.max(0, Number(seconds || 0)); if (value < 90) return `${Math.round(value)}s`; @@ -528,6 +594,16 @@ function shortId(value) { return text.length > 18 ? `${text.slice(0, 10)}...${text.slice(-6)}` : text || "-"; } +function shortHash(value) { + const text = String(value || ""); + if (text.length === 0) return ""; + return text.length > 12 ? text.slice(0, 12) : text; +} + +function clamp(value, min, max) { + return Math.max(min, Math.min(max, value)); +} + function rootCauseText(item) { return item?.rootCause || item?.evidenceSummary || item?.summary || "尚未记录根因,等待下一次 OTel/报告归因。"; } diff --git a/scripts/src/hwlab-node-web-sentinel-cicd.ts b/scripts/src/hwlab-node-web-sentinel-cicd.ts index 63492199..b5b1223d 100644 --- a/scripts/src/hwlab-node-web-sentinel-cicd.ts +++ b/scripts/src/hwlab-node-web-sentinel-cicd.ts @@ -3,6 +3,7 @@ // SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-26-p9-multi-web-probe-sentinel. // SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-26-p10-monitor-web-aggregation. // SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-27-p11-monitor-web-observability-dashboard. +// SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-27-p12-cadence-scheduler-monitor-web. // Responsibility: YAML-first CI/CD, image, GitOps and Argo command plan for the web-probe sentinel. import { createHash, randomUUID } from "node:crypto"; import { existsSync, readFileSync } from "node:fs"; @@ -1999,6 +2000,24 @@ for (let attempt = 1; attempt <= maxNavigationAttempts; attempt += 1) { await page.waitForTimeout(750 * attempt); } +await page.evaluate(() => { + const detailPane = document.querySelector(".workspace-grid .pane-detail"); + if (detailPane instanceof HTMLElement) detailPane.scrollTop = Math.min(96, Math.max(0, detailPane.scrollHeight - detailPane.clientHeight)); +}).catch(() => {}); +await page.waitForTimeout(150); + +const trendHoverPoint = await page.evaluate(() => { + const target = document.querySelector(".trend-dot-hit .trend-dot-red") || document.querySelector(".trend-dot-hit .trend-dot-warning"); + if (!(target instanceof SVGElement)) return null; + const rect = target.getBoundingClientRect(); + if (rect.width <= 0 || rect.height <= 0) return null; + return { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }; +}).catch(() => null); +if (trendHoverPoint) { + await page.mouse.move(trendHoverPoint.x, trendHoverPoint.y); + await page.waitForTimeout(250); +} + if (captureScreenshot && screenshotPath) { await page.screenshot({ path: screenshotPath, fullPage, animations: "disabled" }).catch((error) => { pageErrors.push({ message: "screenshot failed: " + String(error?.message || error).slice(0, 400) }); @@ -2012,9 +2031,12 @@ const dom = await page.evaluate(() => { const shell = document.querySelector("[data-monitor-shell='true']"); const error = document.querySelector("#monitor-web-error"); const trend = document.querySelector("[data-monitor-trend-curve]"); + const trendTooltip = document.querySelector("[data-monitor-trend-tooltip='true']"); const timeline = document.querySelector("[data-monitor-timeline='true']"); const workspace = document.querySelector("[data-monitor-independent-scroll='true']"); const panes = Array.from(document.querySelectorAll(".workspace-grid .pane")); + const detailPane = document.querySelector(".workspace-grid .pane-detail"); + const detailHeader = document.querySelector("#monitor-web-root > div > section.workspace-grid > main > div.pane-header"); const doc = document.documentElement; const body = document.body; const viewport = { width: window.innerWidth, height: window.innerHeight }; @@ -2063,6 +2085,8 @@ const dom = await page.evaluate(() => { runRows: document.querySelectorAll(".run-list .run-row").length, findingItems: document.querySelectorAll(".finding-list .finding-card").length, trendCurve: Boolean(trend), + trendDotCount: document.querySelectorAll(".trend-dot-hit").length, + trendTooltip: tooltipSummary(trendTooltip), trendPanelText: text("#trend-heading"), timelineItems: document.querySelectorAll(".timeline-list .timeline-item").length, timelineVisible: Boolean(timeline), @@ -2089,6 +2113,7 @@ const dom = await page.evaluate(() => { const style = window.getComputedStyle(pane); return style.overflowY === "auto" || style.overflowY === "scroll"; }), + stickyHeader: stickyHeaderSummary(detailPane, detailHeader), }, layout: { viewport, @@ -2098,6 +2123,46 @@ const dom = await page.evaluate(() => { overflow, }, }; + + function tooltipSummary(element) { + const body = String(element?.textContent || "").replace(/\s+/g, " ").trim(); + return { + visible: Boolean(element && body.length > 0), + text: body.slice(0, 240), + hasValues: /红色\s+\d+/u.test(body) && /警告\s+\d+/u.test(body) && /总量\s+\d+/u.test(body), + hasTime: /UTC/u.test(body) || /\d{4}-\d{2}-\d{2}/u.test(body), + }; + } + + function stickyHeaderSummary(pane, header) { + if (!(pane instanceof HTMLElement) || !(header instanceof HTMLElement)) { + return { present: false, coversScroll: false, backgroundOpaque: false, detailScrollTop: null }; + } + const rect = header.getBoundingClientRect(); + const style = window.getComputedStyle(header); + const sampleX = Math.round(rect.left + Math.min(32, Math.max(2, rect.width / 2))); + const sampleY = Math.round(rect.top + Math.min(12, Math.max(2, rect.height / 2))); + const topElement = document.elementFromPoint(sampleX, sampleY); + return { + present: true, + detailScrollTop: pane.scrollTop, + headerTop: Math.round(rect.top), + headerBottom: Math.round(rect.bottom), + zIndex: style.zIndex, + backgroundColor: style.backgroundColor, + coversScroll: Boolean(topElement && header.contains(topElement)), + backgroundOpaque: backgroundIsOpaque(style.backgroundColor), + topElementClass: String(topElement?.className || "").slice(0, 80), + }; + } + + function backgroundIsOpaque(value) { + const rgba = /rgba?\(([^)]+)\)/u.exec(value); + if (rgba === null) return value.length > 0 && value !== "transparent"; + const parts = rgba[1].split(",").map((part) => part.trim()); + if (parts.length < 4) return true; + return Number(parts[3]) >= 0.99; + } }); const consoleErrors = consoleMessages.filter((item) => item.type === "error"); @@ -2109,8 +2174,12 @@ const ok = !navigationError && dom.ready === true && dom.errorVisible !== true && dom.trendCurve === true + && (dom.trendDotCount === 0 || (dom.trendTooltip?.visible === true && dom.trendTooltip?.hasValues === true && dom.trendTooltip?.hasTime === true)) && dom.timelineVisible === true && dom.scrollModel?.independentScroll === true + && dom.scrollModel?.stickyHeader?.present === true + && dom.scrollModel?.stickyHeader?.coversScroll === true + && dom.scrollModel?.stickyHeader?.backgroundOpaque === true && dom.layout?.horizontalOverflow !== true && pageErrors.length === 0; diff --git a/scripts/web-probe-sentinel-scheduler.ts b/scripts/web-probe-sentinel-scheduler.ts new file mode 100644 index 00000000..0df6577c --- /dev/null +++ b/scripts/web-probe-sentinel-scheduler.ts @@ -0,0 +1,554 @@ +#!/usr/bin/env bun +// SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-27-p12-cadence-scheduler-monitor-web. +// Responsibility: Host-side cadence scheduler for YAML-first web-probe sentinels; it triggers the existing validate quick-verify path when runs become stale. +import { existsSync, mkdirSync, openSync, closeSync, statSync, unlinkSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { repoRoot, rootPath } from "./src/config"; +import { runCommand, runCommandObserved, type CommandResult } from "./src/command"; +import { hwlabDefaultRuntimeTarget, hwlabRuntimeLaneSpecForNode, isHwlabRuntimeLane } from "./src/hwlab-node-lanes"; +import { readConfigRefTarget, resolveWebProbeSentinel, webProbeSentinelRegistryRows } from "./src/hwlab-node-web-sentinel-resolver"; + +type SchedulerAction = "run" | "install-systemd" | "status-systemd"; + +interface SchedulerOptions { + readonly action: SchedulerAction; + readonly node: string; + readonly lane: string; + readonly sentinelId: string | null; + readonly dryRun: boolean; + readonly force: boolean; + readonly confirm: boolean; + readonly staleMultiplier: number; + readonly timeoutSeconds: number | null; + readonly fetchTimeoutMs: number; +} + +interface SentinelSchedule { + readonly sentinelId: string; + readonly enabled: boolean; + readonly publicBaseUrl: string; + readonly cadenceSeconds: number; + readonly timeoutSeconds: number; + readonly scenarioIds: readonly string[]; +} + +interface OverviewSnapshot { + readonly ok: boolean; + readonly latestRunId: string | null; + readonly latestRunAt: string | null; + readonly latestRunAgeSeconds: number | null; + readonly schedulerHeartbeatAt: string | null; + readonly schedulerHeartbeatAgeSeconds: number | null; + readonly error: string | null; +} + +interface TriggerResult { + readonly attempted: boolean; + readonly exitCode: number | null; + readonly timedOut: boolean; + readonly durationMs: number | null; + readonly recorded: boolean; + readonly latestRunIdBefore: string | null; + readonly latestRunIdAfter: string | null; + readonly status: string; + readonly stdoutTail: string; + readonly stderrTail: string; +} + +const DEFAULT_STALE_MULTIPLIER = 1; +const DEFAULT_FETCH_TIMEOUT_MS = 15_000; +const HOST_SCHEDULER_INTERVAL_SECONDS = 120; +const STATE_DIR = rootPath(".state", "web-probe-sentinel-scheduler"); + +await main().catch((error) => { + const message = error instanceof Error ? error.stack || error.message : String(error); + console.error(message); + process.exit(1); +}); + +async function main(): Promise { + const options = parseArgs(process.argv.slice(2)); + if (options.action === "install-systemd") { + installSystemd(options); + return; + } + if (options.action === "status-systemd") { + statusSystemd(options); + return; + } + await runScheduler(options); +} + +async function runScheduler(options: SchedulerOptions): Promise { + const spec = specFor(options); + const schedules = sentinelSchedules(spec, options); + const rows: Record[] = []; + let infraFailure = false; + + for (const schedule of schedules) { + if (!schedule.enabled) { + rows.push(rowFor(schedule, null, false, "disabled", null)); + continue; + } + const before = await readOverview(schedule, options.fetchTimeoutMs); + const latestAge = before.latestRunAgeSeconds; + const dueThresholdSeconds = Math.max(1, Math.round(schedule.cadenceSeconds * options.staleMultiplier)); + const due = options.force || latestAge === null || latestAge >= dueThresholdSeconds; + let trigger: TriggerResult | null = null; + if (due && !options.dryRun) { + const lock = acquireLock(options, schedule.sentinelId, schedule.timeoutSeconds); + if (lock.acquired) { + try { + trigger = await triggerSentinel(options, schedule, before); + infraFailure = infraFailure || trigger.status === "infra-failed" || trigger.status === "timeout"; + } finally { + releaseLock(lock.path); + } + } else { + trigger = { + attempted: false, + exitCode: null, + timedOut: false, + durationMs: null, + recorded: false, + latestRunIdBefore: before.latestRunId, + latestRunIdAfter: before.latestRunId, + status: `lock-held:${lock.reason}`, + stdoutTail: "", + stderrTail: "", + }; + } + } + const status = due ? options.dryRun ? "due-dry-run" : trigger?.status ?? "due" : "fresh"; + const row = rowFor(schedule, before, due, status, trigger); + rows.push(row); + appendEvent({ at: new Date().toISOString(), node: options.node, lane: options.lane, ...row, valuesRedacted: true }); + } + + printRows(rows); + if (infraFailure) process.exitCode = 2; +} + +function specFor(options: SchedulerOptions) { + if (!isHwlabRuntimeLane(options.lane)) throw new Error(`unknown lane ${options.lane}`); + return hwlabRuntimeLaneSpecForNode(options.lane, options.node); +} + +function sentinelSchedules(spec: ReturnType, options: SchedulerOptions): SentinelSchedule[] { + const registry = webProbeSentinelRegistryRows(spec); + const selectedRows = options.sentinelId === null + ? registry + : registry.filter((row) => row.id === options.sentinelId); + if (selectedRows.length === 0) { + const ids = registry.map((row) => row.id).join(", "); + throw new Error(`unknown sentinel ${options.sentinelId ?? "-"}; available: ${ids}`); + } + return selectedRows.map((row) => { + const sentinel = resolveWebProbeSentinel(spec, row.id); + const publicExposure = record(readConfigRefTarget(sentinel.configRefs.publicExposure), sentinel.configRefs.publicExposure); + const runtime = record(readConfigRefTarget(sentinel.configRefs.runtime), sentinel.configRefs.runtime); + const cicd = record(readConfigRefTarget(sentinel.configRefs.cicd), sentinel.configRefs.cicd); + const scenarios = scenarioRows(readConfigRefTarget(sentinel.configRefs.scenarios)); + const enabledScenarios = scenarios.filter((scenario) => scenario.enabled !== false); + const scenarioCadences = enabledScenarios + .map((scenario) => typeof scenario.cadence === "string" ? parseDurationSeconds(scenario.cadence) : null) + .filter((value): value is number => value !== null && value > 0); + const runtimeInterval = numberAt(runtime, "scheduler.intervalMs"); + const yamlTimeout = numberAtNullable(cicd, "targetValidation.maxSeconds"); + return { + sentinelId: sentinel.id, + enabled: row.enabled && sentinel.enabled && enabledScenarios.length > 0, + publicBaseUrl: stringAt(publicExposure, "publicBaseUrl").replace(/\/+$/u, ""), + cadenceSeconds: Math.min(...(scenarioCadences.length > 0 ? scenarioCadences : [Math.max(1, Math.round(runtimeInterval / 1000))])), + timeoutSeconds: options.timeoutSeconds ?? yamlTimeout ?? 300, + scenarioIds: enabledScenarios.map((scenario) => String(scenario.id || sentinel.id)), + }; + }); +} + +async function triggerSentinel(options: SchedulerOptions, schedule: SentinelSchedule, before: OverviewSnapshot): Promise { + const command = [ + "bun", + "scripts/cli.ts", + "web-probe", + "sentinel", + "validate", + "--node", + options.node, + "--lane", + options.lane, + "--sentinel", + schedule.sentinelId, + "--quick-verify", + "--confirm", + "--wait", + "--timeout-seconds", + String(schedule.timeoutSeconds), + ]; + const result = await runCommandObserved(command, repoRoot, { + timeoutMs: Math.max(60, schedule.timeoutSeconds + 90) * 1000, + heartbeatMs: 30_000, + maxCaptureChars: 24_000, + env: { ...process.env, NO_COLOR: "1" }, + }); + const after = await readOverview(schedule, options.fetchTimeoutMs); + const recorded = after.ok && ( + before.latestRunId === null + || after.latestRunId !== before.latestRunId + || (after.latestRunAt !== null && after.latestRunAt !== before.latestRunAt) + ); + const status = result.timedOut + ? "timeout" + : recorded + ? result.exitCode === 0 ? "recorded" : "recorded-with-findings" + : result.exitCode === 0 ? "completed-no-new-run" : "infra-failed"; + return { + attempted: true, + exitCode: result.exitCode, + timedOut: result.timedOut, + durationMs: result.durationMs ?? null, + recorded, + latestRunIdBefore: before.latestRunId, + latestRunIdAfter: after.latestRunId, + status, + stdoutTail: tail(result.stdout, 900), + stderrTail: tail(result.stderr, 900), + }; +} + +async function readOverview(schedule: SentinelSchedule, timeoutMs: number): Promise { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), timeoutMs); + try { + const response = await fetch(`${schedule.publicBaseUrl}/api/overview`, { cache: "no-store", signal: controller.signal }); + if (!response.ok) throw new Error(`HTTP ${response.status}`); + const overview = record(await response.json(), `${schedule.publicBaseUrl}/api/overview`); + const latestRun = isRecord(overview.latestRun) ? overview.latestRun : {}; + const freshness = isRecord(overview.freshness) ? overview.freshness : {}; + const latestRunAt = stringAtNullable(latestRun, "updatedAt") ?? stringAtNullable(latestRun, "createdAt"); + return { + ok: true, + latestRunId: stringAtNullable(latestRun, "id"), + latestRunAt, + latestRunAgeSeconds: numberAtNullable(freshness, "latestRunAgeSeconds") ?? ageSeconds(latestRunAt), + schedulerHeartbeatAt: stringAtNullable(overview, "scheduler.heartbeatAt") ?? stringAtNullable(freshness, "schedulerHeartbeatAt"), + schedulerHeartbeatAgeSeconds: numberAtNullable(freshness, "schedulerHeartbeatAgeSeconds"), + error: null, + }; + } catch (error) { + return { + ok: false, + latestRunId: null, + latestRunAt: null, + latestRunAgeSeconds: null, + schedulerHeartbeatAt: null, + schedulerHeartbeatAgeSeconds: null, + error: error instanceof Error ? error.message : String(error), + }; + } finally { + clearTimeout(timer); + } +} + +function installSystemd(options: SchedulerOptions): void { + const unit = systemdUnitName(options); + const servicePath = `/etc/systemd/system/${unit}.service`; + const timerPath = `/etc/systemd/system/${unit}.timer`; + const service = `[Unit] +Description=UniDesk web-probe sentinel host cadence scheduler for ${options.node}/${options.lane} +Wants=network-online.target +After=network-online.target + +[Service] +Type=oneshot +WorkingDirectory=${repoRoot} +ExecStart=/usr/bin/env bun ${join(repoRoot, "scripts", "web-probe-sentinel-scheduler.ts")} run --node ${options.node} --lane ${options.lane} --stale-multiplier ${options.staleMultiplier} +`; + const timer = `[Unit] +Description=Run UniDesk web-probe sentinel host cadence scheduler for ${options.node}/${options.lane} + +[Timer] +OnBootSec=${HOST_SCHEDULER_INTERVAL_SECONDS}s +OnUnitActiveSec=${HOST_SCHEDULER_INTERVAL_SECONDS}s +AccuracySec=15s +Persistent=true +Unit=${unit}.service + +[Install] +WantedBy=timers.target +`; + + if (!options.confirm || options.dryRun) { + console.log(JSON.stringify({ ok: true, mode: "dry-run", servicePath, timerPath, service, timer, valuesRedacted: true }, null, 2)); + return; + } + writeFileSync(servicePath, service, "utf8"); + writeFileSync(timerPath, timer, "utf8"); + const commands = [ + ["systemctl", "daemon-reload"], + ["systemctl", "enable", "--now", `${unit}.timer`], + ]; + const results = commands.map((command) => runCommand(command, "/")); + printSystemdResult(unit, servicePath, timerPath, results); + if (results.some((result) => result.exitCode !== 0)) process.exitCode = 2; +} + +function statusSystemd(options: SchedulerOptions): void { + const unit = systemdUnitName(options); + const results = [ + runCommand(["systemctl", "is-enabled", `${unit}.timer`], "/"), + runCommand(["systemctl", "is-active", `${unit}.timer`], "/"), + runCommand(["systemctl", "show", `${unit}.timer`, "--property=NextElapseUSecRealtime", "--property=LastTriggerUSec"], "/"), + ]; + printSystemdResult(unit, `/etc/systemd/system/${unit}.service`, `/etc/systemd/system/${unit}.timer`, results); + if (results[1]?.exitCode !== 0) process.exitCode = 2; +} + +function printSystemdResult(unit: string, servicePath: string, timerPath: string, results: readonly CommandResult[]): void { + console.log(JSON.stringify({ + ok: results.every((result) => result.exitCode === 0), + unit, + servicePath, + timerPath, + results: results.map(compactCommand), + valuesRedacted: true, + }, null, 2)); +} + +function rowFor(schedule: SentinelSchedule, overview: OverviewSnapshot | null, due: boolean, status: string, trigger: TriggerResult | null): Record { + return { + sentinelId: schedule.sentinelId, + enabled: schedule.enabled, + cadence: formatSeconds(schedule.cadenceSeconds), + latestAge: overview?.latestRunAgeSeconds === null || overview?.latestRunAgeSeconds === undefined ? "-" : formatSeconds(overview.latestRunAgeSeconds), + heartbeatAge: overview?.schedulerHeartbeatAgeSeconds === null || overview?.schedulerHeartbeatAgeSeconds === undefined ? "-" : formatSeconds(overview.schedulerHeartbeatAgeSeconds), + due, + status, + latestRunId: trigger?.latestRunIdAfter ?? overview?.latestRunId ?? null, + scenarios: schedule.scenarioIds.join(","), + overviewOk: overview?.ok ?? null, + overviewError: overview?.error ?? null, + trigger: trigger === null ? null : { + attempted: trigger.attempted, + exitCode: trigger.exitCode, + timedOut: trigger.timedOut, + durationMs: trigger.durationMs, + recorded: trigger.recorded, + latestRunIdBefore: trigger.latestRunIdBefore, + latestRunIdAfter: trigger.latestRunIdAfter, + }, + valuesRedacted: true, + }; +} + +function printRows(rows: readonly Record[]): void { + const headers = ["SENTINEL", "CADENCE", "LATEST_AGE", "DUE", "STATUS", "LATEST_RUN"]; + const body = rows.map((row) => [ + String(row.sentinelId ?? ""), + String(row.cadence ?? ""), + String(row.latestAge ?? ""), + String(row.due ?? ""), + String(row.status ?? ""), + String(row.latestRunId ?? "-"), + ]); + const widths = headers.map((header, index) => Math.max(header.length, ...body.map((line) => line[index].length))); + console.log(headers.map((header, index) => header.padEnd(widths[index])).join(" ")); + for (const line of body) console.log(line.map((value, index) => value.padEnd(widths[index])).join(" ")); +} + +function acquireLock(options: SchedulerOptions, sentinelId: string, timeoutSeconds: number): { acquired: true; path: string } | { acquired: false; path: string; reason: string } { + const lockDir = join(STATE_DIR, "locks"); + mkdirSync(lockDir, { recursive: true }); + const lockPath = join(lockDir, `${safeSegment(options.node)}-${safeSegment(options.lane)}-${safeSegment(sentinelId)}.lock`); + const maxLockAgeMs = Math.max(3_600_000, (timeoutSeconds + 300) * 1000); + if (existsSync(lockPath)) { + const ageMs = Date.now() - statSync(lockPath).mtimeMs; + if (ageMs > maxLockAgeMs) unlinkSync(lockPath); + } + try { + const fd = openSync(lockPath, "wx"); + writeFileSync(fd, JSON.stringify({ pid: process.pid, at: new Date().toISOString(), sentinelId, valuesRedacted: true })); + closeSync(fd); + return { acquired: true, path: lockPath }; + } catch (error) { + const reason = error instanceof Error ? error.message : String(error); + return { acquired: false, path: lockPath, reason }; + } +} + +function releaseLock(lockPath: string): void { + try { + unlinkSync(lockPath); + } catch { + // Best-effort cleanup; stale locks are aged out on the next tick. + } +} + +function appendEvent(event: Record): void { + mkdirSync(STATE_DIR, { recursive: true }); + const date = new Date().toISOString().slice(0, 10).replaceAll("-", ""); + const path = join(STATE_DIR, `run-${date}.jsonl`); + writeFileSync(path, `${JSON.stringify(event)}\n`, { flag: "a" }); +} + +function parseArgs(argv: readonly string[]): SchedulerOptions { + const defaults = hwlabDefaultRuntimeTarget(); + let action: SchedulerAction = "run"; + let node = defaults.node; + let lane = defaults.lane; + let sentinelId: string | null = null; + let dryRun = false; + let force = false; + let confirm = false; + let staleMultiplier = DEFAULT_STALE_MULTIPLIER; + let timeoutSeconds: number | null = null; + let fetchTimeoutMs = DEFAULT_FETCH_TIMEOUT_MS; + + const args = [...argv]; + if (args[0] === "run" || args[0] === "install-systemd" || args[0] === "status-systemd") { + action = args.shift() as SchedulerAction; + } + while (args.length > 0) { + const arg = args.shift(); + if (arg === undefined) break; + if (arg === "--node") node = requireValue(arg, args); + else if (arg === "--lane") lane = requireValue(arg, args); + else if (arg === "--sentinel") sentinelId = requireValue(arg, args); + else if (arg === "--dry-run") dryRun = true; + else if (arg === "--force") force = true; + else if (arg === "--confirm") confirm = true; + else if (arg === "--stale-multiplier") staleMultiplier = positiveNumber(requireValue(arg, args), arg); + else if (arg === "--timeout-seconds") timeoutSeconds = positiveInteger(requireValue(arg, args), arg); + else if (arg === "--fetch-timeout-ms") fetchTimeoutMs = positiveInteger(requireValue(arg, args), arg); + else if (arg === "-h" || arg === "--help") { + printUsage(); + process.exit(0); + } else { + throw new Error(`unknown option ${arg}`); + } + } + return { action, node, lane, sentinelId, dryRun, force, confirm, staleMultiplier, timeoutSeconds, fetchTimeoutMs }; +} + +function printUsage(): void { + console.log(`Usage: + bun scripts/web-probe-sentinel-scheduler.ts run [--node D601] [--lane v03] [--sentinel ID] [--dry-run] [--force] + bun scripts/web-probe-sentinel-scheduler.ts install-systemd --node D601 --lane v03 --confirm + bun scripts/web-probe-sentinel-scheduler.ts status-systemd --node D601 --lane v03 +`); +} + +function requireValue(flag: string, args: string[]): string { + const value = args.shift(); + if (value === undefined || value.length === 0) throw new Error(`${flag} requires a value`); + return value; +} + +function positiveInteger(value: string, flag: string): number { + const parsed = Number(value); + if (!Number.isInteger(parsed) || parsed <= 0) throw new Error(`${flag} must be a positive integer`); + return parsed; +} + +function positiveNumber(value: string, flag: string): number { + const parsed = Number(value); + if (!Number.isFinite(parsed) || parsed <= 0) throw new Error(`${flag} must be a positive number`); + return parsed; +} + +function scenarioRows(value: unknown): Record[] { + if (Array.isArray(value)) return value.map((item) => record(item, "scenario")); + if (!isRecord(value)) throw new Error("scenario configRef must point to a YAML object or array"); + if (Array.isArray(value.scenarios)) return value.scenarios.map((item) => record(item, "scenario")); + if (isRecord(value.workflow)) return [value.workflow]; + return [value]; +} + +function parseDurationSeconds(value: string): number | null { + const match = /^(\d+)(ms|s|m|h)$/u.exec(value.trim()); + if (match === null) return null; + const amount = Number(match[1]); + const unit = match[2]; + if (unit === "ms") return Math.max(1, Math.ceil(amount / 1000)); + if (unit === "s") return amount; + if (unit === "m") return amount * 60; + if (unit === "h") return amount * 3600; + return null; +} + +function formatSeconds(seconds: number): string { + if (seconds < 90) return `${Math.round(seconds)}s`; + if (seconds < 7200) return `${Math.round(seconds / 60)}m`; + if (seconds < 172800) return `${Math.round(seconds / 3600)}h`; + return `${Math.round(seconds / 86400)}d`; +} + +function ageSeconds(value: string | null): number | null { + if (value === null) return null; + const parsed = Date.parse(value); + if (!Number.isFinite(parsed)) return null; + return Math.max(0, Math.round((Date.now() - parsed) / 1000)); +} + +function stringAt(value: unknown, path: string): string { + const found = valueAtPath(value, path); + if (typeof found !== "string" || found.length === 0) throw new Error(`${path} must be a non-empty string`); + return found; +} + +function stringAtNullable(value: unknown, path: string): string | null { + const found = valueAtPath(value, path); + return typeof found === "string" && found.length > 0 ? found : null; +} + +function numberAt(value: unknown, path: string): number { + const found = valueAtPath(value, path); + if (typeof found !== "number" || !Number.isFinite(found)) throw new Error(`${path} must be a number`); + return found; +} + +function numberAtNullable(value: unknown, path: string): number | null { + const found = valueAtPath(value, path); + return typeof found === "number" && Number.isFinite(found) ? found : null; +} + +function valueAtPath(value: unknown, path: string): unknown { + let current = value; + for (const segment of path.split(".")) { + if (!isRecord(current)) return undefined; + current = current[segment]; + } + return current; +} + +function record(value: unknown, label: string): Record { + if (!isRecord(value)) throw new Error(`${label} must be an object`); + return value; +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function compactCommand(result: CommandResult): Record { + return { + command: result.command.join(" "), + exitCode: result.exitCode, + timedOut: result.timedOut, + durationMs: result.durationMs ?? null, + stdoutTail: tail(result.stdout, 900), + stderrTail: tail(result.stderr, 900), + }; +} + +function tail(value: string, maxChars: number): string { + return value.length <= maxChars ? value : value.slice(-maxChars); +} + +function systemdUnitName(options: SchedulerOptions): string { + return `unidesk-web-probe-sentinel-scheduler-${safeSegment(options.node)}-${safeSegment(options.lane)}`; +} + +function safeSegment(value: string): string { + return value.toLowerCase().replace(/[^a-z0-9._-]+/gu, "-").replace(/^-+|-+$/gu, "") || "default"; +}