From bdec05729dd017d0ca03b19f8ba90047f51773bf Mon Sep 17 00:00:00 2001 From: Lyon <88232613+pikasTech@users.noreply.github.com> Date: Fri, 26 Jun 2026 14:04:37 +0800 Subject: [PATCH] fix: restore web sentinel recovery diagnostics (#980) Co-authored-by: Codex --- config/platform-db/postgres-pk01.yaml | 2 +- .../PJ2026-01060508-web-probe-sentinel.md | 75 ++++- .../dashboard.css | 2 +- .../web-probe-sentinel-dashboard/dashboard.js | 258 ++++++++++++------ scripts/src/hwlab-node-web-probe-summary.ts | 5 +- scripts/src/hwlab-node-web-sentinel-cicd.ts | 105 ++++++- ...wlab-node-web-sentinel-dashboard-assets.ts | 120 ++++---- .../src/hwlab-node-web-sentinel-service.ts | 3 +- 8 files changed, 409 insertions(+), 161 deletions(-) diff --git a/config/platform-db/postgres-pk01.yaml b/config/platform-db/postgres-pk01.yaml index cd3c3ba4..d83e8c9e 100644 --- a/config/platform-db/postgres-pk01.yaml +++ b/config/platform-db/postgres-pk01.yaml @@ -94,7 +94,7 @@ postgres: cidr: 202.98.13.68/32 purpose: platform-infra-sub2api-active tuning: - maxConnections: 50 + maxConnections: 160 sharedBuffers: 512MB effectiveCacheSize: 2GB workMem: 8MB diff --git a/project-management/PJ2026-01/specs/PJ2026-01060508-web-probe-sentinel.md b/project-management/PJ2026-01/specs/PJ2026-01060508-web-probe-sentinel.md index d701510a..d7f2d539 100644 --- a/project-management/PJ2026-01/specs/PJ2026-01060508-web-probe-sentinel.md +++ b/project-management/PJ2026-01/specs/PJ2026-01060508-web-probe-sentinel.md @@ -20,7 +20,7 @@ | 层级 | L3 子课题 | | 状态 | 已生效 | | 实现引用版本 | draft-2026-06-25-p0-web-probe-sentinel | -| Dashboard 实现引用版本 | draft-2026-06-26-p7-web-probe-sentinel-dashboard | +| Dashboard 实现引用版本 | draft-2026-06-26-p8-web-probe-sentinel-recovery | | 需求规格模板 | [ISO/IEC/IEEE 29148 需求规格模板](../../templates/iso-iec-ieee-29148-requirements-spec-template.md) | | 上级规格 | [PJ2026-010605 运维监控](PJ2026-010605-observability-monitoring.md) | | 关联规格 | [PJ2026-010401 Web工作台](PJ2026-010401-web-workbench.md)、[PJ2026-0104010803 Workbench唯一投影](PJ2026-0104010803-workbench-unique-projection.md)、[PJ2026-010403 API契约](PJ2026-010403-api-contract.md)、[PJ2026-010601 发布流水](PJ2026-010601-controlled-release.md)、[PJ2026-010602 源码同步](PJ2026-010602-source-sync.md)、[PJ2026-010603 YAML运维](PJ2026-010603-yaml-first-ops.md)、[PJ2026-010604 公开入口](PJ2026-010604-public-entry.md)、[PJ2026-01060505 Workbench性能](PJ2026-01060505-workbench-performance.md) | @@ -355,6 +355,69 @@ sequenceDiagram Trace drill-down 必须保持两层阅读:第一层是多 turn 摘要,第二层是选中 turn/trace/sample 后的文字版 trace-frame。`Final Response` 在第二层固定成块展示;空内容显示 `(空内容)`,有内容按 redaction 策略展示摘要、字节数或允许展示的正文。 +### 5.11 P8 quick verify 控制链路图 + +```mermaid +sequenceDiagram + participant Val as sentinel validate + participant Sen as Sentinel service + participant Obs as observe runner + participant Ctrl as control queue + participant Web as HWLAB Workbench + participant Ana as observe analyze + participant Rep as report/dashboard + + Val->>Sen: /api/health through k3s Service DNS + Val->>Obs: observe start + Val->>Ctrl: command newSession + Ctrl->>Web: create Workbench session + Val->>Ctrl: command selectProvider/sendPrompt + Ctrl->>Web: submit business turn + Val->>Ana: observe analyze existing artifact + Ana->>Rep: report SHA, findings, turn-summary, trace-frame +``` + +Quick verify 的通过条件必须覆盖控制命令、业务 turn 和 analyze/report 三段。`monitor.pikapython.com` root/CSS/JS 200 只证明公开 dashboard 外壳可读,不得抵消 `newSession`、`sendPrompt`、trace rows 或 final response 缺失。 + +### 5.12 P8 故障分类数据流图 + +```mermaid +flowchart LR + Validate[sentinel validate] --> Shell[service/public dashboard health] + Validate --> Control[observe command health] + Validate --> Business[business turn health] + Validate --> Runtime[runtime/browser health] + Shell --> Result[validation result] + Control --> Result + Business --> Result + Runtime --> Result + Control --> NoTurn[quick-verify-no-business-turn] + Business --> Trace[turn-summary and trace-frame] + Runtime --> Timeout[timeout/readiness/session/api subtype] +``` + +分类必须先按 service/public-dashboard、control command、business turn、runtime/browser 分仓,再给可行动下一步。`browser-timeout` 不得默认归为浏览器安装或 Playwright 环境问题;它至少要被解释为页面导航、auth/login、Workbench readiness、session create、message submit、trace projection 或浏览器环境中的一个可复核子类。 + +### 5.13 P8 中文运维视图时序图 + +```mermaid +sequenceDiagram + participant U as 用户 + participant D as 中文运维页面 + participant API as dashboard API + participant CLI as CLI drill-down + + U->>D: 打开 monitor.pikapython.com + D->>API: overview, runs, findings + API-->>D: 服务健康、公开入口、最近业务验证、阻塞分类 + U->>D: 选择 blocked run/finding + D-->>U: 中文解释、证据、CLI 对照命令、下一步动作 + U->>CLI: sentinel report / observe collect + CLI-->>U: 同一 run/observer/report SHA 的文字 trace 证据 +``` + +中文运维页面必须默认展示中文状态、中文说明和中文下一步,同时保留原始 run id、observer id、finding code、report SHA 和 CLI 命令,便于 issue/PR 证据对照。 + ## 6. 原子需求 ### 6.1 OPS-SENTINEL-REQ-001 非分叉 wrapper 边界 @@ -411,6 +474,10 @@ Web哨兵的 `sentinel report` 和 dashboard 必须按 YAML report views 渐进 Dashboard report API 必须提供 bounded、redacted、可分页的 view contract。`/api/overview` 提供 health、scheduler、maintenance、latest run、severity counts 和 freshness;`/api/runs` 提供 scenario/status/severity/time/search 过滤与 cursor 分页;`/api/runs/:id` 提供 run detail、report refs、artifact refs 和 summary counts;`/api/findings` 提供 severity/code/scenario/window 聚合;`/api/runs/:id/views` 提供 summary、turn-summary、findings 和 trace-frame 的只读渲染。所有响应都必须能追溯到 run id、observer id、stateDir 和 report SHA。 +P8 起,quick verify 如果没有产生 sendPrompt 业务 turn、有效 session、trace rows 或 final response,必须记录独立 red blocker `quick-verify-no-business-turn`。该 blocker 属于 quick verify 控制层事实,不得由 dashboard 前端临时推断;dashboard 和 `sentinel report --view findings` 只能展示已记录的同一 finding。 + +P8 恢复判定必须把 Workbench 业务失败继续 drill-down 到运行面依赖。当 trace-frame 或 Final Response 暴露 `hwlab-cloud-api request handling failed`、PostgreSQL `53300`、`too many clients already` 或等价 DB 连接槽耗尽证据时,quick verify 不能收口为前端展示缺陷;必须检查 PK01/PostgreSQL `max_connections`、各服务连接池、CrashLoop 探针风暴和当前 `pg_stat_activity`,并通过 YAML source of truth 收敛容量或池化参数。 + ### 6.5 OPS-SENTINEL-REQ-005 CI/CD、GitOps 和 maintenance | 编号 | 短名 | 主责模块 | 关联模块 | @@ -427,6 +494,8 @@ HWLAB runtime 发布 Pipeline 应在 Argo sync 前调用当前哨兵 `maintenanc 哨兵服务不可用、首次安装未完成或配置未就绪时,CI/CD 必须结构化失败并输出缺失项、恢复建议和可重试命令;不得自动回退到原纯客户端 CLI、裸 Playwright、私有 API、read-side repair、reload 循环或 session repair 形成第二执行路径。人工排障可以显式运行原 `web-probe observe start/status/command/collect/analyze`,但不能被 targetValidation 当作自动通过证据。 +`sentinel validate --quick-verify --confirm --wait`、maintenance stop quick verify 和 control-plane targetValidation 的确认等待总耗时超过 120s 时,必须输出 warning,并在 quick verify run 摘要中记录可见警告。warning 文案必须指向严重超时和下一步调查方向:env-reuse、git mirror、source build path、运行路径和当前 wait 阶段;不得通过调大 timeout、减少业务轮次或 fallback 到第二执行路径来消除红灯。 + ### 6.6 OPS-SENTINEL-REQ-006 dsflash-go 十轮 canary | 编号 | 短名 | 主责模块 | 关联模块 | @@ -489,6 +558,8 @@ Dashboard 前端架构应借鉴 Sub2API 监控面板的分层方式:typed API Dashboard 自动刷新只能读取 bounded API,不得发送 `observe command`、不启动新采样、不重新 analyze、不保存额外截图。页面 hidden、loading 或上一次请求未完成时应暂停或跳过刷新,避免监控 UI 自身制造额外压力。 +P8 中文运维页面必须以中文为默认用户可见语言:主标题、状态、筛选、运行历史、finding 分组、run detail、trace-frame、Final Response、空态、错误态、自动刷新和下一步动作均使用中文。原始英文 code、status 枚举、CLI 命令和 report SHA 可作为机器对照保留,但不得要求用户阅读英文 finding 才能判断 HWLAB 是否可用、卡在哪一层、下一步运行什么命令。 + ## 7. 过程控制 Web哨兵架构执行 issue 为 [#883](https://github.com/pikasTech/unidesk/issues/883)。阶段跟踪 issue 为 P0 [#885](https://github.com/pikasTech/unidesk/issues/885)、P1 [#886](https://github.com/pikasTech/unidesk/issues/886)、P2 [#887](https://github.com/pikasTech/unidesk/issues/887)、P3 [#888](https://github.com/pikasTech/unidesk/issues/888)、P4 [#889](https://github.com/pikasTech/unidesk/issues/889)、P5 [#890](https://github.com/pikasTech/unidesk/issues/890) 和 P6 [#891](https://github.com/pikasTech/unidesk/issues/891)。 @@ -498,3 +569,5 @@ P0 未完成前,不得推进 CLI wrapper、服务实现、YAML schema、CI/CD Dashboard 增强执行 issue 为 [#935](https://github.com/pikasTech/unidesk/issues/935)。P7 阶段跟踪 issue 为 P0 [#938](https://github.com/pikasTech/unidesk/issues/938)、P1 [#940](https://github.com/pikasTech/unidesk/issues/940)、P2 [#941](https://github.com/pikasTech/unidesk/issues/941)、P3 [#939](https://github.com/pikasTech/unidesk/issues/939)、P4 [#943](https://github.com/pikasTech/unidesk/issues/943)、P5 [#942](https://github.com/pikasTech/unidesk/issues/942) 和 P6 [#944](https://github.com/pikasTech/unidesk/issues/944)。P7 实现 PR closeout 必须回写 dashboard API contract、frontend 分层、trace-frame 对照、redaction、`monitor.pikapython.com` 验证和哨兵独立 CI/CD 状态。 P7 P6 收口必须区分 public dashboard validation 与 targetValidation quick verify:`monitor.pikapython.com` root/CSS/JS 200 只证明公开入口和静态 dashboard 资源可用;quick verify 的 `observe command`、采样样本、analyze report 和 red finding 仍是业务恢复判定的一部分。若 quick verify 因 `observe-command-newSession-failed`、`no-samples` 或等价采样器控制失败而 blocked,P6 issue 必须保持未关闭或显式拆出后续 blocker,不得只凭 public dashboard 200 关闭。 + +P8 哨兵恢复执行 issue 为 [#971](https://github.com/pikasTech/unidesk/issues/971)。P8 closeout 必须回写:SPEC P8 引用、120s warning 证据、`quick-verify-no-business-turn` 或等价业务触达证据、`browser-timeout` 分类修正、中文运维页面验证、`monitor.pikapython.com` 公网入口验证、k3s 内部 Service DNS quick verify 路径、D601/v03 用户入口 smoke 结果,以及仍未解除的真实业务 blocker 是否已单独拆出。 diff --git a/scripts/assets/web-probe-sentinel-dashboard/dashboard.css b/scripts/assets/web-probe-sentinel-dashboard/dashboard.css index 34957b55..573aba95 100644 --- a/scripts/assets/web-probe-sentinel-dashboard/dashboard.css +++ b/scripts/assets/web-probe-sentinel-dashboard/dashboard.css @@ -1,4 +1,4 @@ -/* SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-26-p7-web-probe-sentinel-dashboard. */ +/* SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-26-p8-web-probe-sentinel-recovery. */ /* Responsibility: Responsive visual foundation for the web-probe sentinel dashboard. */ :root { color-scheme: light; diff --git a/scripts/assets/web-probe-sentinel-dashboard/dashboard.js b/scripts/assets/web-probe-sentinel-dashboard/dashboard.js index e9ba12c4..9a427625 100644 --- a/scripts/assets/web-probe-sentinel-dashboard/dashboard.js +++ b/scripts/assets/web-probe-sentinel-dashboard/dashboard.js @@ -1,4 +1,4 @@ -// SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-26-p7-web-probe-sentinel-dashboard. +// SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-26-p8-web-probe-sentinel-recovery. // Responsibility: Browser-side API client, formatting, auto refresh, and base dashboard rendering. /** @@ -258,7 +258,7 @@ async function selectRun(runId) { syncLocationQuery(); renderRuns(); refs.detailSubtitle.textContent = runId; - refs.detailContent.innerHTML = '
Loading
'; + refs.detailContent.innerHTML = '
加载中
'; try { const [detail, views] = await Promise.all([ dashboardApi.runDetail(runId), @@ -292,14 +292,14 @@ function renderDashboard() { function renderOverview() { const overview = state.overview || {}; const status = overview.status || (overview.ok ? "healthy" : "degraded"); - refs.statusPill.textContent = status; + refs.statusPill.textContent = displayStatus(status); refs.statusPill.className = `status-pill ${statusClass(status)}`; - refs.overall.textContent = status; + refs.overall.textContent = displayStatus(status); refs.origin.textContent = overview.publicOrigin || root.dataset.publicOrigin || "-"; const latest = overview.latestRun || null; refs.latestRun.textContent = latest?.runId || latest?.id || "-"; - refs.latestAge.textContent = latest?.updatedAt ? `${formatRelative(latest.updatedAt)} updated` : "-"; + refs.latestAge.textContent = latest?.updatedAt ? `${formatRelative(latest.updatedAt)} 更新` : "-"; const severityCounts = overview.severityCounts || {}; const totalFindings = Object.values(severityCounts).reduce((sum, value) => sum + Number(value || 0), 0); @@ -308,31 +308,31 @@ function renderOverview() { const scheduler = overview.scheduler || {}; const heartbeat = scheduler.heartbeat || {}; - refs.scheduler.textContent = heartbeat.at ? formatRelative(heartbeat.at) : "unknown"; + refs.scheduler.textContent = heartbeat.at ? formatRelative(heartbeat.at) : "未知"; const maxSeconds = overview.targetValidation?.maxSeconds ?? 120; - refs.budget.textContent = `targetValidation ${maxSeconds}s`; + refs.budget.textContent = `targetValidation 预算 ${maxSeconds}s`; const checks = overview.health?.checks || {}; - renderCheckChip(refs.checkConfig, "config", checks.config?.ok, checks.config?.status); - renderCheckChip(refs.checkPvc, "pvc", checks.pvc?.ok, checks.pvc?.stateRoot); - renderCheckChip(refs.checkAnalyzer, "analyzer", checks.analyzer?.ok, "observe analyze"); - renderCheckChip(refs.checkPublic, "public", Boolean(overview.publicOrigin), overview.publicOrigin || "-"); + renderCheckChip(refs.checkConfig, "配置", checks.config?.ok, checks.config?.status); + renderCheckChip(refs.checkPvc, "PVC", checks.pvc?.ok, checks.pvc?.stateRoot); + renderCheckChip(refs.checkAnalyzer, "分析器", checks.analyzer?.ok, "observe analyze"); + renderCheckChip(refs.checkPublic, "公开入口", Boolean(overview.publicOrigin), overview.publicOrigin || "-"); const maintenance = overview.maintenance || {}; - renderCheckChip(refs.checkMaintenance, "maintenance", maintenance.active !== true, maintenance.active ? "active" : "inactive"); + renderCheckChip(refs.checkMaintenance, "维护窗口", maintenance.active !== true, maintenance.active ? "生效中" : "未生效"); } function renderTimeline() { - refs.timelineCount.textContent = `${state.runs.length} recent`; + refs.timelineCount.textContent = `最近 ${formatNumber(state.runs.length)} 次`; if (state.runs.length === 0) { - refs.timeline.innerHTML = '
No timeline
'; + refs.timeline.innerHTML = '
暂无时间线
'; return; } refs.timeline.innerHTML = state.runs.slice(0, 20).map((run) => { const runId = run.runId || run.id || "-"; - const title = `${run.status || "-"} · ${run.findingCount ?? 0} findings · ${run.updatedAt ? formatRelative(run.updatedAt) : "-"}`; + const title = `${displayStatus(run.status)} · ${run.findingCount ?? 0} 个发现 · ${run.updatedAt ? formatRelative(run.updatedAt) : "-"}`; return ``; }).join(""); for (const node of refs.timeline.querySelectorAll("[data-run-id]")) { @@ -341,9 +341,9 @@ function renderTimeline() { } function renderRuns() { - refs.runsCount.textContent = `${state.runs.length} visible`; + refs.runsCount.textContent = `${formatNumber(state.runs.length)} 条可见`; if (state.runs.length === 0) { - refs.runsBody.innerHTML = 'No runs'; + refs.runsBody.innerHTML = '暂无运行'; return; } refs.runsBody.innerHTML = state.runs.map((run) => { @@ -351,10 +351,10 @@ function renderRuns() { const selected = state.selectedRunId === runId ? " selected-row" : ""; return `
${escapeHtml(runId)}
${escapeHtml(run.observerId || "-")} - ${escapeHtml(run.status || "-")} + ${escapeHtml(displayStatus(run.status))} ${escapeHtml(run.scenarioId || "-")} - ${escapeHtml(String(run.findingCount ?? 0))}${run.maxSeverity ? ` ${escapeHtml(run.maxSeverity)}` : ""} - ${escapeHtml(run.updatedAt ? formatRelative(run.updatedAt) : "-")}${escapeHtml(run.maintenance ? "maintenance" : "")} + ${escapeHtml(String(run.findingCount ?? 0))}${run.maxSeverity ? ` ${escapeHtml(displaySeverity(run.maxSeverity))}` : ""} + ${escapeHtml(run.updatedAt ? formatRelative(run.updatedAt) : "-")}${escapeHtml(run.maintenance ? "维护窗口" : "")} `; }).join(""); for (const row of refs.runsBody.querySelectorAll("tr[data-run-id]")) { @@ -364,28 +364,30 @@ function renderRuns() { function renderFindings() { renderFindingAggregation(); - refs.findingsCount.textContent = `${state.findings.length} groups`; + refs.findingsCount.textContent = `${formatNumber(state.findings.length)} 组`; if (state.findings.length === 0) { - refs.findingsList.innerHTML = '
No findings
'; + refs.findingsList.innerHTML = '
暂无发现项
'; return; } refs.findingsList.innerHTML = state.findings.map((item) => { const code = item.code || item.findingId || "finding"; const latestRunId = item.latestRunId || "-"; const hasLatestRun = latestRunId !== "-"; + const codeLabel = displayFindingCode(code); return `
- ${escapeHtml(code)} - ${escapeHtml(item.severity || "unknown")} + ${escapeHtml(codeLabel)}${codeLabel === code ? "" : ` ${escapeHtml(code)}`} + ${escapeHtml(displaySeverity(item.severity))}
-
+
-
count=${escapeHtml(String(item.count ?? 0))} · runs=${escapeHtml(String(item.runCount ?? 0))} · latest=${escapeHtml(item.latestAt ? formatRelative(item.latestAt) : "-")}
+
次数=${escapeHtml(String(item.count ?? 0))} · 运行=${escapeHtml(String(item.runCount ?? 0))} · 最近=${escapeHtml(item.latestAt ? formatRelative(item.latestAt) : "-")}
run=${escapeHtml(latestRunId)} report=${escapeHtml(item.latestReportJsonSha256 || "-")}
-
${escapeHtml(shortText(item.summary || "", 180))}
- +
${escapeHtml(shortText(displayFindingSummary(code, item.summary || ""), 180))}
+
${escapeHtml(findingNextAction(code))}
+
`; }).join(""); for (const button of refs.findingsList.querySelectorAll("[data-open-finding-run]")) { @@ -398,17 +400,17 @@ function renderFindings() { function renderFindingAggregation() { if (state.findings.length === 0) { - refs.findingAggregation.innerHTML = '
No finding aggregation
'; + refs.findingAggregation.innerHTML = '
暂无发现聚合
'; return; } const severityEntries = aggregateFindings((item) => item.severity || "unknown").slice(0, 5); const codeEntries = aggregateFindings((item) => item.code || item.findingId || "unknown").slice(0, 5); const scenarioEntries = aggregateFindings((item) => item.scenarioId || "unknown").slice(0, 5); refs.findingAggregation.innerHTML = [ - aggregationGroup("Severity", severityEntries, "severity"), - aggregationGroup("Code", codeEntries, "code"), - aggregationGroup("Scenario", scenarioEntries, "scenario"), - `
Window
`, + aggregationGroup("严重级别", severityEntries.map((item) => ({ ...item, label: displaySeverity(item.key) })), "severity"), + aggregationGroup("代码", codeEntries.map((item) => ({ ...item, label: displayFindingCode(item.key) })), "code"), + aggregationGroup("场景", scenarioEntries, "scenario"), + `
窗口
`, ].join(""); for (const button of refs.findingAggregation.querySelectorAll("[data-finding-filter]")) { button.addEventListener("click", () => applyFindingFilter(button.dataset.findingFilter, button.dataset.filterValue || "")); @@ -418,7 +420,7 @@ function renderFindingAggregation() { function aggregationGroup(label, entries, filterKey) { const chips = entries.length === 0 ? '-' - : entries.map((entry) => ``).join(""); + : entries.map((entry) => ``).join(""); return `
${escapeHtml(label)}${chips}
`; } @@ -445,8 +447,8 @@ function applyFindingFilter(key, value) { function renderDetail() { if (!state.selectedRunId || !state.runDetail) { - refs.detailSubtitle.textContent = "No run selected"; - refs.detailContent.innerHTML = '
Select a run
'; + refs.detailSubtitle.textContent = "未选择运行"; + refs.detailContent.innerHTML = '
请选择一条运行
'; return; } const detail = state.runDetail; @@ -457,29 +459,29 @@ function renderDetail() { const turnSummaryView = selectedView(state.runViews, "turn-summary"); refs.detailSubtitle.textContent = run.runId || state.selectedRunId; refs.detailContent.innerHTML = [ - detailBlock("Traceability", [ + detailBlock("追溯信息", [ ["run", run.runId || "-"], ["observer", run.observerId || "-"], ["stateDir", run.stateDir || "-"], ["report", run.reportJsonSha256 || "-"], ]), - detailBlock("Summary", [ - ["status", run.status || "-"], + detailBlock("摘要", [ + ["status", displayStatus(run.status)], ["scenario", run.scenarioId || "-"], ["findings", String(run.findingCount ?? 0)], ["artifacts", String(run.artifactCount ?? 0)], ["updated", run.updatedAt || "-"], ["views", Array.isArray(detail.viewsAvailable) ? detail.viewsAvailable.join(", ") : "-"], ]), - detailBlock("Report Summary", safeSummaryRows(detail.summary), "detail-block-wide"), + detailBlock("报告摘要", safeSummaryRows(detail.summary), "detail-block-wide"), detailFindings(findings), detailArtifacts(artifacts), detailCommands(commands), detailTurnSummary(turnSummaryView), detailTraceReader(state.runViews, commands), detailEvidence(detail, artifacts), - detailBlock("Redaction", [ - ["values", detail.valuesRedacted === true ? "redacted" : "-"], + detailBlock("脱敏", [ + ["values", detail.valuesRedacted === true ? "已脱敏" : "-"], ["prompt", detail.redaction?.prompt || "-"], ["assistant", detail.redaction?.assistantFinal || "-"], ]), @@ -495,16 +497,16 @@ function detailBlock(title, rows, className = "") { } function detailFindings(findings) { - if (findings.length === 0) return detailBlock("Run Findings", [["status", "none"]], "detail-block-wide"); - return `
Run Findings + if (findings.length === 0) return detailBlock("运行发现项", [["status", "无"]], "detail-block-wide"); + return `
运行发现项
- + ${findings.map((item) => ` - + - + `).join("")}
SeverityCodeCountSummaryReport
严重级别代码次数摘要报告
${escapeHtml(item.severity || "-")}${escapeHtml(displaySeverity(item.severity))} ${escapeHtml(item.finding_id || item.findingId || "-")} ${escapeHtml(String(item.count ?? 0))}${escapeHtml(shortText(item.summary || "", 220))}${escapeHtml(shortText(displayFindingSummary(item.finding_id || item.findingId || "", item.summary || ""), 220))} ${escapeHtml(shortText(item.report_json_sha256 || item.reportJsonSha256 || "-", 24))}
@@ -514,7 +516,7 @@ function detailFindings(findings) { function detailArtifacts(artifacts) { const screenshot = artifacts.screenshot || {}; - return detailBlock("Artifacts", [ + return detailBlock("产物", [ ["artifactCount", String(artifacts.artifactCount ?? "-")], ["reportJsonSha256", artifacts.reportJsonSha256 || "-"], ["screenshotPath", screenshot.path || "-"], @@ -529,17 +531,17 @@ function detailCommands(commands) { ["turn-summary", commands.turnSummary || "-"], ["trace-frame", commands.traceFrame || "-"], ]; - return `
CLI Commands + return `
CLI 对照命令
${rows.map(([label, command]) => `
${escapeHtml(label)}${escapeHtml(command)}
`).join("")}
`; } function detailTurnSummary(view) { - if (!view) return detailBlock("Turn Summary - Layer 1", [["status", "not indexed"]], "detail-block-wide"); - if (view.ok === false) return detailBlock("Turn Summary - Layer 1", [["status", view.error || "unavailable"]], "detail-block-wide"); + if (!view) return detailBlock("多轮摘要 - 第一层", [["status", "未索引"]], "detail-block-wide"); + if (view.ok === false) return detailBlock("多轮摘要 - 第一层", [["status", view.error || "不可用"]], "detail-block-wide"); const text = redactDisplayText(view.renderedText || ""); - const note = `${formatNumber(view.renderedTextBytes || text.length)} bytes${view.truncated ? " truncated" : ""}`; - return `
Turn Summary - Layer 1 + const note = `${formatNumber(view.renderedTextBytes || text.length)} bytes${view.truncated ? " 已截断" : ""}`; + return `
多轮摘要 - 第一层
${escapeHtml(note)}
${escapeHtml(text || "-")}
`; @@ -550,25 +552,25 @@ function detailTraceReader(response, commands) { const traceView = selectedView(response, "trace-frame"); const choices = traceChoices(turnView?.renderedText || "", traceView?.renderedText || ""); const selectedIndex = Math.min(state.selectedTraceChoiceIndex, Math.max(0, choices.length - 1)); - const selected = choices[selectedIndex] || { label: "stored trace-frame", meta: "current run", key: "stored" }; + const selected = choices[selectedIndex] || { label: "已保存 trace-frame", meta: "当前运行", key: "stored" }; const traceText = traceView?.ok === false ? "" : redactDisplayText(traceView?.renderedText || ""); const traceNote = traceView - ? `${formatNumber(traceView.renderedTextBytes || traceText.length)} bytes${traceView.truncated ? " truncated" : ""}` - : "trace-frame view not indexed"; - return `
Trace Frame - Layer 2 + ? `${formatNumber(traceView.renderedTextBytes || traceText.length)} bytes${traceView.truncated ? " 已截断" : ""}` + : "trace-frame 未索引"; + return `
Trace Frame - 第二层
-
-
Turn / trace / sample choices
+
+
选择 turn / trace / sample
${choices.map((choice, index) => ``).join("")}
-
-
Selected: ${escapeHtml(selected.label)} · ${escapeHtml(traceNote)}
-
${escapeHtml(traceText || (traceView?.ok === false ? traceView.error || "trace-frame unavailable" : "trace-frame view not indexed"))}
+
+
已选择: ${escapeHtml(selected.label)} · ${escapeHtml(traceNote)}
+
${escapeHtml(traceText || (traceView?.ok === false ? traceView.error || "trace-frame 不可用" : "trace-frame 未索引"))}
${finalResponseBlock(traceView)} -
source=${escapeHtml(commands.traceFrame || "-")} · analyzer findings do not rewrite this text
+
source=${escapeHtml(commands.traceFrame || "-")} · analyzer finding 不改写此文字证据
`; @@ -576,17 +578,17 @@ function detailTraceReader(response, commands) { function finalResponseBlock(traceView) { if (!traceView) { - return `
Final Response
trace-frame view not indexed
`; + return `
Final Response
trace-frame 未索引
`; } if (traceView.ok === false) { - return `
Final Response
${escapeHtml(traceView.error || "trace-frame unavailable")}
`; + return `
Final Response
${escapeHtml(traceView.error || "trace-frame 不可用")}
`; } const block = traceView.finalResponse || {}; const text = block.empty === true ? "(空内容)" : redactDisplayText(block.text || ""); const bytes = Number(block.byteCount || 0); return `
Final Response -
${block.empty === true ? "empty" : "available"} · ${formatNumber(bytes)} bytes · values redacted
+
${block.empty === true ? "空内容" : "有内容"} · ${formatNumber(bytes)} bytes · 已脱敏
${escapeHtml(text || "(空内容)")}
`; } @@ -602,19 +604,19 @@ function traceChoices(turnSummaryText, traceFrameText) { .map((line) => line.trim()) .filter((line) => /(trace|sample|total=|final response)/iu.test(line)) .slice(0, 12); - if (lines.length === 0) return [{ label: "stored trace-frame", meta: "current run", key: "stored" }]; + if (lines.length === 0) return [{ label: "已保存 trace-frame", meta: "当前运行", key: "stored" }]; return lines.map((line, index) => { const trace = line.match(/trace(?:Id)?[=: ]+([A-Za-z0-9_.:-]+)/u)?.[1] || line.match(/\btrc_[A-Za-z0-9_.:-]+/u)?.[0] || null; const sample = line.match(/sample(?:Seq)?[=: ]+([0-9]+)/u)?.[1] || null; const turn = line.match(/turn[=: #]+([0-9A-Za-z_.:-]+)/iu)?.[1] || null; - const meta = [turn ? `turn ${turn}` : null, trace ? `trace ${trace}` : null, sample ? `sample ${sample}` : null].filter(Boolean).join(" · ") || `line ${index + 1}`; + const meta = [turn ? `turn ${turn}` : null, trace ? `trace ${trace}` : null, sample ? `sample ${sample}` : null].filter(Boolean).join(" · ") || `第 ${index + 1} 行`; return { label: shortText(redactDisplayText(line), 120), meta, key: `${trace || "line"}-${sample || index}` }; }); } function detailEvidence(detail, artifacts) { const traceability = detail.traceability || {}; - return detailBlock("Evidence", [ + return detailBlock("证据", [ ["source", traceability.source || "-"], ["stateRoot", traceability.stateRoot || "-"], ["stateDir", traceability.stateDir || "-"], @@ -625,7 +627,7 @@ function detailEvidence(detail, artifacts) { } function selectedView(response, viewName) { - if (response?.ok === false) return { ok: false, error: response.error || "unavailable", view: viewName }; + if (response?.ok === false) return { ok: false, error: response.error || "不可用", view: viewName }; const views = Array.isArray(response?.views) ? response.views : []; return views.find((item) => item.view === viewName) || null; } @@ -645,7 +647,7 @@ function renderLoading(show) { } function renderCheckChip(element, label, ok, detail) { - const status = ok ? "ok" : "blocked"; + const status = ok ? "正常" : "阻塞"; element.textContent = `${label} ${status}${detail ? ` · ${shortText(detail, 34)}` : ""}`; element.className = `check-chip ${ok ? "check-ok" : "check-blocked"}`; } @@ -772,7 +774,7 @@ function safeSummaryRows(summary) { if (rows.length >= 8) break; rows.push([key, safeDisplayValue(value)]); } - return rows.length === 0 ? [["status", "not indexed"]] : rows; + return rows.length === 0 ? [["status", "未索引"]] : rows; } function safeDisplayKey(key) { @@ -782,25 +784,25 @@ function safeDisplayKey(key) { function safeDisplayValue(value) { if (value === null || typeof value === "undefined") return "-"; if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") return String(value); - if (Array.isArray(value)) return `${value.length} items`; + if (Array.isArray(value)) return `${value.length} 项`; const entries = Object.entries(value).filter(([key]) => safeDisplayKey(key)).slice(0, 4); - if (entries.length === 0) return "redacted object"; + if (entries.length === 0) return "已脱敏对象"; return entries.map(([key, item]) => `${key}=${primitiveValue(item)}`).join(", "); } function primitiveValue(value) { if (value === null || typeof value === "undefined") return "-"; if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") return shortText(String(value), 80); - if (Array.isArray(value)) return `${value.length} items`; - return "object"; + if (Array.isArray(value)) return `${value.length} 项`; + return "对象"; } function redactDisplayText(value) { return String(value || "") .split("\n") .map((line) => { - if (/(prompt|cookie|token|authorization|provider|payload|api[_-]?key|password|secret|stdout|stderr)/iu.test(line)) return "[redacted]"; - if (/assistant/iu.test(line) && line.length > 180) return "[assistant text redacted]"; + if (/(prompt|cookie|token|authorization|provider|payload|api[_-]?key|password|secret|stdout|stderr)/iu.test(line)) return "[已脱敏]"; + if (/assistant/iu.test(line) && line.length > 180) return "[assistant 正文已脱敏]"; return shortText(line, 360); }) .join("\n"); @@ -827,14 +829,97 @@ function severityClass(value) { return "severity-unknown"; } +function displayStatus(value) { + const normalized = String(value || "idle").toLowerCase(); + const labels = { + healthy: "健康", + observed: "已观察", + analyzed: "已分析", + pass: "通过", + ok: "正常", + warning: "警告", + degraded: "降级", + planned: "已计划", + running: "运行中", + queued: "排队中", + blocked: "阻塞", + failed: "失败", + error: "错误", + timeout: "超时", + interrupted: "已中断", + idle: "空闲", + }; + return labels[normalized] || String(value || "-"); +} + +function displaySeverity(value) { + const normalized = String(value || "unknown").toLowerCase(); + const labels = { + red: "红色", + critical: "严重", + error: "错误", + warning: "警告", + amber: "警告", + info: "信息", + unknown: "未知", + }; + return labels[normalized] || String(value || "-"); +} + +function displayFindingCode(code) { + const normalized = String(code || "").toLowerCase(); + const labels = { + "quick-verify-no-business-turn": "quick verify 未触达业务 turn", + "observer-command-failed": "观察器控制命令失败", + "runtime-requestfailed": "运行时请求失败", + "runtime-console-alerts": "运行时控制台告警", + "browser-console-or-page-errors": "浏览器控制台或页面错误", + "browser-timeout": "页面 readiness 超时", + "target-page-readiness-timeout": "页面 readiness 超时", + "network-or-api-fetch-bug": "网络或 API 请求异常", + }; + return labels[normalized] || String(code || "finding"); +} + +function displayFindingSummary(code, summary) { + const normalized = String(code || "").toLowerCase(); + const summaries = { + "quick-verify-no-business-turn": "quick verify 没有形成 sendPrompt、session、trace rows 或 Final Response;不能把公开 dashboard 200 当作 HWLAB 恢复证据。", + "observer-command-failed": "observe control command 失败,需要查看 observer timeline 和 failed command 文件确认是 readiness、session API、超时还是 runner shutdown。", + "runtime-requestfailed": "页面运行时存在请求失败,需要按路径聚合确认是 asset/provenance 噪声、public origin、auth/session 还是 Workbench API。", + "runtime-console-alerts": "页面控制台出现告警,需要结合 run detail 的 network/console 证据判断是否影响业务 turn。", + "browser-console-or-page-errors": "浏览器页面报错,需要先看 finalUrl、readiness、session create 和 API 证据,再判断是否是前端缺陷。", + "browser-timeout": "页面或 Workbench readiness 超时,不应默认判断为浏览器安装环境问题。", + "target-page-readiness-timeout": "页面或 Workbench readiness 超时,不应默认判断为浏览器安装环境问题。", + }; + const translated = summaries[normalized]; + if (!translated) return String(summary || ""); + const source = String(summary || "").trim(); + return source && !source.includes(translated) ? `${translated} 原始摘要: ${source}` : translated; +} + +function findingNextAction(code) { + const normalized = String(code || "").toLowerCase(); + const actions = { + "quick-verify-no-business-turn": "下一步: 打开该 run 的 turn-summary/trace-frame,并用 CLI 对照命令确认没有业务 turn。", + "observer-command-failed": "下一步: 查看 observe collect timeline 和 failed command 文件,定位失败阶段。", + "runtime-requestfailed": "下一步: 按请求路径聚合失败,区分网络、auth/session、API 或静态资源问题。", + "runtime-console-alerts": "下一步: 结合 console 样本与业务 trace 判断是否为阻塞级。", + "browser-console-or-page-errors": "下一步: 先查 Workbench readiness 和 session create,再决定是否修前端。", + "browser-timeout": "下一步: 查 finalUrl、loading、session create、network requestfailed 和 reportPath,不先改浏览器安装。", + "target-page-readiness-timeout": "下一步: 查 finalUrl、loading、session create、network requestfailed 和 reportPath,不先改浏览器安装。", + }; + return actions[normalized] || "下一步: 打开最近运行详情,并用 CLI 对照命令复核同一 run/observer/report。"; +} + function formatSeveritySummary(counts) { const entries = Object.entries(counts || {}).filter(([, value]) => Number(value || 0) > 0); - if (entries.length === 0) return "none"; - return entries.map(([key, value]) => `${key} ${formatNumber(Number(value || 0))}`).join(" · "); + if (entries.length === 0) return "无"; + return entries.map(([key, value]) => `${displaySeverity(key)} ${formatNumber(Number(value || 0))}`).join(" · "); } function formatNumber(value) { - return new Intl.NumberFormat("en-US").format(Number(value || 0)); + return new Intl.NumberFormat("zh-CN").format(Number(value || 0)); } function shortText(value, limit) { @@ -846,12 +931,13 @@ function formatRelative(iso) { const ms = Date.parse(String(iso || "")); if (!Number.isFinite(ms)) return "-"; const seconds = Math.max(0, Math.floor((Date.now() - ms) / 1000)); - if (seconds < 60) return `${seconds}s ago`; + if (seconds < 5) return "刚刚"; + if (seconds < 60) return `${seconds} 秒前`; const minutes = Math.floor(seconds / 60); - if (minutes < 60) return `${minutes}m ago`; + if (minutes < 60) return `${minutes} 分钟前`; const hours = Math.floor(minutes / 60); - if (hours < 24) return `${hours}h ago`; - return `${Math.floor(hours / 24)}d ago`; + if (hours < 24) return `${hours} 小时前`; + return `${Math.floor(hours / 24)} 天前`; } function escapeHtml(value) { diff --git a/scripts/src/hwlab-node-web-probe-summary.ts b/scripts/src/hwlab-node-web-probe-summary.ts index 5cb7dd0e..89efbc18 100644 --- a/scripts/src/hwlab-node-web-probe-summary.ts +++ b/scripts/src/hwlab-node-web-probe-summary.ts @@ -1,4 +1,5 @@ // SPEC: PJ2026-01060505 Workbench Performance draft-2026-06-17-p0. +// SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-26-p8-web-probe-sentinel-recovery. // Responsibility: Redacted web-probe summaries and issue-ready compaction helpers. function record(value: unknown): Record { @@ -392,7 +393,8 @@ function webProbeRunFailureKind(degradedReason: string | null, promptValidation: if (degradedReason === "agent-terminal-timeout") return "agent-runtime-timeout"; if (/trace-fetch|api|fetch|http|network/iu.test(degradedReason)) return "network-or-api-fetch-bug"; if (/auth|login|credential/iu.test(degradedReason)) return "target-auth-bug"; - if (/browser|timeout|playwright|chromium/iu.test(degradedReason)) return "browser-environment-bug"; + if (/browser-timeout|selector-timeout|readiness|session|composer|navigation|load-jitter|timeout/iu.test(degradedReason)) return "target-page-readiness-timeout"; + if (/browser|playwright|chromium/iu.test(degradedReason)) return "browser-environment-bug"; const failures = Array.isArray(promptValidation.failures) ? promptValidation.failures.join(" ") : ""; if (/final-response|agent-message|markdown|completed/iu.test(failures)) return "unmet-expectation"; return "user-facing-web-bug"; @@ -424,6 +426,7 @@ function webProbeRunNextAction( if (failureKind === "network-or-api-fetch-bug") return "Inspect trace/session API fetch fields in reportPath and retry after checking target API availability."; if (failureKind === "agent-runtime-timeout") return "Inspect summary.traceId/sessionId and the corresponding Code Agent/AgentRun runtime; the browser probe submitted the prompt and observed trace events, but no terminal agent response arrived before the wait boundary."; if (failureKind === "target-auth-bug") return "Inspect credential sourceRef/fingerprint and /auth/login status; do not print secrets."; + if (failureKind === "target-page-readiness-timeout") return "Inspect finalUrl, readiness/session-create state, Workbench loading indicators, network request failures, and session API evidence in reportPath before changing browser launcher settings."; if (failureKind === "browser-environment-bug") return "Inspect browser launcher and Playwright availability on the target workspace."; if (promptSubmitted && !traceRequested) return "Rerun with trace sampling if trace evidence is required."; return degradedReason === null ? null : "Inspect reportPath for full redacted details, then rerun the same node/lane entry."; diff --git a/scripts/src/hwlab-node-web-sentinel-cicd.ts b/scripts/src/hwlab-node-web-sentinel-cicd.ts index cb27095d..c5ffa702 100644 --- a/scripts/src/hwlab-node-web-sentinel-cicd.ts +++ b/scripts/src/hwlab-node-web-sentinel-cicd.ts @@ -1,4 +1,5 @@ // SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-25-p0-web-probe-sentinel. +// SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-26-p8-web-probe-sentinel-recovery. // Responsibility: YAML-first CI/CD, image, GitOps and Argo command plan for the web-probe sentinel. import { createHash, randomUUID } from "node:crypto"; import { existsSync, readFileSync } from "node:fs"; @@ -1353,10 +1354,28 @@ function sentinelPayloadFromLogs(logsTail: string): Record { return {}; } -function sentinelElapsedWarnings(value: unknown): string[] { +function sentinelElapsedWarnings(value: unknown, subject = "sentinel confirmed operation"): string[] { const elapsedMs = typeof value === "number" && Number.isFinite(value) ? value : null; if (elapsedMs === null || elapsedMs <= 120_000) return []; - return [`sentinel confirmed operation exceeded 120s (${Math.round(elapsedMs / 1000)}s); investigate env-reuse/git mirror/source build path before treating this as normal.`]; + return [`${subject} exceeded 120s (${Math.round(elapsedMs / 1000)}s); treat this as a severe timeout and investigate env-reuse/git mirror/source build path plus the current wait stage before retrying.`]; +} + +function mergeWarnings(...items: readonly (readonly unknown[] | unknown)[]): string[] { + const warnings: string[] = []; + for (const item of items) { + const values = Array.isArray(item) ? item : [item]; + for (const value of values) { + if (value === undefined || value === null || value === "") continue; + const warning = text(value).trim(); + if (warning.length > 0 && warning !== "-" && !warnings.includes(warning)) warnings.push(warning); + } + } + return warnings; +} + +function withWarnings(payload: Record, warnings: readonly unknown[]): Record { + const merged = mergeWarnings(payload.warnings, warnings); + return merged.length === 0 ? payload : { ...payload, warnings: merged, valuesRedacted: true }; } function sentinelProgressEvent(event: string, payload: Record): void { @@ -1469,6 +1488,7 @@ function runSentinelMaintenance(state: SentinelCicdState, options: Extract): RenderedCliResult { const command = "web-probe sentinel validate"; + const startedAt = Date.now(); const initialHealth = callSentinelService(state, "GET", "/api/health", null, options.timeoutSeconds); let quickVerify: Record | null = null; if (options.quickVerify) { @@ -1508,6 +1528,9 @@ function runSentinelValidate(state: SentinelCicdState, options: Extract { + const startedAt = Date.now(); + const elapsedMs = () => Date.now() - startedAt; + const elapsedWarnings = () => sentinelElapsedWarnings(elapsedMs(), "quick verify confirm-wait"); const scenarioId = stringAt(state.cicd, "targetValidation.scenarioId"); const maxSeconds = numberAt(state.cicd, "targetValidation.maxSeconds"); const scenario = findScenario(state, scenarioId); @@ -1595,6 +1621,7 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou steps.push({ phase: "observe-start", ok: started.ok, result: started.result }); const observerId = observerIdFromText(String(record(started.result).stdoutPreview ?? "")); if (!started.ok || observerId === null) { + const findings = quickVerifyControlFindings("observe-start-failed", 0, null, null); return recordQuickVerify(state, { ok: false, runId, @@ -1602,8 +1629,12 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou reason, status: "blocked", observerId, + elapsedMs: elapsedMs(), steps, failure: "observe-start-failed", + findingCount: findings.length, + findings, + warnings: elapsedWarnings(), valuesRedacted: true, }); } @@ -1621,7 +1652,8 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou promptIndex, steps, failure: "quick-verify-timeout-over-120s", - warnings: ["quick verify exceeded the configured 120s targetValidation budget; investigate env-reuse/git mirror/source build path before retrying."], + elapsedMs: elapsedMs(), + warnings: mergeWarnings("quick verify exceeded the configured 120s targetValidation budget; investigate env-reuse/git mirror/source build path before retrying.", elapsedWarnings()), promptSource: prompts.summary, })); } @@ -1642,6 +1674,8 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou promptIndex, steps, failure: `observe-command-${type}-failed`, + elapsedMs: elapsedMs(), + warnings: elapsedWarnings(), promptSource: prompts.summary, })); } @@ -1658,7 +1692,8 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou steps, failure: text(waitResult.failure ?? "observe-turn-terminal-wait-failed"), promptSource: prompts.summary, - warnings: Array.isArray(waitResult.warnings) ? waitResult.warnings : [], + elapsedMs: elapsedMs(), + warnings: mergeWarnings(Array.isArray(waitResult.warnings) ? waitResult.warnings : [], elapsedWarnings()), })); } } @@ -1670,7 +1705,11 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou const artifactSummary = indexEntry === null ? { ok: false, reason: "observe-index-entry-missing", observerId, valuesRedacted: true } : readAnalysisSummaryFromWorkspace(state, indexEntry.stateDir, remainingSeconds(deadline, 30)); const turnSummary = collectObserveView(state, observerId, "turn-summary", null, remainingSeconds(deadline, 30)); const traceFrame = collectObserveView(state, observerId, "trace-frame", promptIndex > 0 ? promptIndex : null, remainingSeconds(deadline, 30)); - const ok = analysis.ok && record(artifactSummary).ok === true; + const controlFindings = quickVerifyControlFindings(null, promptIndex, turnSummary, traceFrame); + const artifactSummaryRecord = record(artifactSummary); + const artifactFindings = Array.isArray(artifactSummaryRecord.findings) ? artifactSummaryRecord.findings.map(record) : []; + const findings = mergeFindingRecords(artifactFindings, controlFindings); + const ok = analysis.ok && record(artifactSummary).ok === true && controlFindings.length === 0; return recordQuickVerify(state, { ok, runId, @@ -1678,10 +1717,12 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou reason, status: ok ? "analyzed" : "blocked", observerId, + elapsedMs: elapsedMs(), stateDir: indexEntry?.stateDir ?? null, reportJsonSha256: stringAtNullable(artifactSummary, "reportJsonSha256"), - findingCount: numberAtNullable(artifactSummary, "findingCount") ?? 0, + findingCount: findings.length, artifactCount: numberAtNullable(artifactSummary, "artifactCount") ?? 0, + failure: controlFindings.length > 0 ? "quick-verify-no-business-turn" : null, promptSource: prompts.summary, steps, analysis: artifactSummary, @@ -1690,9 +1731,10 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou "turn-summary": { renderedText: typeof turnSummary.renderedText === "string" ? turnSummary.renderedText : null, ok: turnSummary.ok }, "trace-frame": { renderedText: typeof traceFrame.renderedText === "string" ? traceFrame.renderedText : null, ok: traceFrame.ok }, }, - findings: Array.isArray(record(artifactSummary).findings) ? record(artifactSummary).findings : [], + findings, screenshot: record(artifactSummary).screenshot, publicOrigin: stringAt(state.publicExposure, "publicBaseUrl"), + warnings: elapsedWarnings(), valuesRedacted: true, }); } @@ -1706,6 +1748,7 @@ function finalizeQuickVerifyFailure(state: SentinelCicdState, input: { readonly steps: readonly Record[]; readonly failure: string; readonly promptSource?: Record; + readonly elapsedMs?: number; readonly warnings?: readonly unknown[]; }): Record { const cleanupSteps: Record[] = []; @@ -1741,6 +1784,10 @@ function finalizeQuickVerifyFailure(state: SentinelCicdState, input: { : readAnalysisSummaryFromWorkspace(state, indexEntry.stateDir, 30); const turnSummary = collectObserveView(state, input.observerId, "turn-summary", null, 30); const traceFrame = collectObserveView(state, input.observerId, "trace-frame", input.promptIndex > 0 ? input.promptIndex : null, 30); + const controlFindings = quickVerifyControlFindings(input.failure, input.promptIndex, turnSummary, traceFrame); + const artifactSummaryRecord = record(artifactSummary); + const artifactFindings = Array.isArray(artifactSummaryRecord.findings) ? artifactSummaryRecord.findings.map(record) : []; + const findings = mergeFindingRecords(artifactFindings, controlFindings); return { ok: false, runId: input.runId, @@ -1748,9 +1795,10 @@ function finalizeQuickVerifyFailure(state: SentinelCicdState, input: { reason: input.reason, status: "blocked", observerId: input.observerId, + elapsedMs: input.elapsedMs ?? null, stateDir: indexEntry?.stateDir ?? null, reportJsonSha256: stringAtNullable(artifactSummary, "reportJsonSha256"), - findingCount: numberAtNullable(artifactSummary, "findingCount") ?? 0, + findingCount: findings.length, artifactCount: numberAtNullable(artifactSummary, "artifactCount") ?? 0, failure: input.failure, promptSource: input.promptSource, @@ -1761,10 +1809,10 @@ function finalizeQuickVerifyFailure(state: SentinelCicdState, input: { "turn-summary": { renderedText: typeof turnSummary.renderedText === "string" ? turnSummary.renderedText : null, ok: turnSummary.ok }, "trace-frame": { renderedText: typeof traceFrame.renderedText === "string" ? traceFrame.renderedText : null, ok: traceFrame.ok }, }, - findings: Array.isArray(record(artifactSummary).findings) ? record(artifactSummary).findings : [], + findings, screenshot: record(artifactSummary).screenshot, publicOrigin: stringAt(state.publicExposure, "publicBaseUrl"), - warnings: Array.isArray(input.warnings) ? input.warnings.map(text) : [], + warnings: mergeWarnings(Array.isArray(input.warnings) ? input.warnings : [], sentinelElapsedWarnings(input.elapsedMs ?? null, "quick verify confirm-wait")), valuesRedacted: true, }; } @@ -1782,6 +1830,9 @@ function recordQuickVerify(state: SentinelCicdState, payload: Record[], extra: readonly Record[]): Record[] { + const merged: Record[] = []; + const seen = new Set(); + for (const item of [...primary, ...extra]) { + const id = stringAtNullable(item, "id") ?? stringAtNullable(item, "kind") ?? stringAtNullable(item, "code") ?? stringAtNullable(item, "finding_id") ?? "finding"; + const severity = stringAtNullable(item, "severity") ?? stringAtNullable(item, "level") ?? "unknown"; + const key = `${id}\0${severity}`; + if (seen.has(key)) continue; + seen.add(key); + merged.push(item); + } + return merged; +} + +function quickVerifyControlFindings(failure: string | null, promptIndex: number, turnSummary: Record | null, traceFrame: Record | null): Record[] { + const rendered = [ + typeof turnSummary?.renderedText === "string" ? turnSummary.renderedText : "", + typeof traceFrame?.renderedText === "string" ? traceFrame.renderedText : "", + ].join("\n"); + const noPrompt = promptIndex <= 0 || /无\s*sendPrompt|no\s+sendPrompt/iu.test(rendered); + const noTrace = /无\s*trace\s*rows|no\s+trace\s+rows|traceId=-|routeSession=-|activeSession=-/iu.test(rendered); + const emptyFinal = /Final Response[\s\S]*\(空内容\)/iu.test(rendered); + if (!noPrompt && !noTrace && !emptyFinal && failure !== "observe-start-failed") return []; + return [{ + id: "quick-verify-no-business-turn", + severity: "red", + count: 1, + summary: "quick verify did not reach a durable business turn/session/trace rows/final response; public dashboard health cannot be treated as HWLAB recovery.", + failure: failure ?? null, + promptIndex, + valuesRedacted: true, + }]; +} + function compactCommandWithTail(result: CommandResult): CompactCommandResult & { stdoutTail: string; stderrTail: string } { return { ...compactCommand(result), diff --git a/scripts/src/hwlab-node-web-sentinel-dashboard-assets.ts b/scripts/src/hwlab-node-web-sentinel-dashboard-assets.ts index 4942bbf0..4b8c2b92 100644 --- a/scripts/src/hwlab-node-web-sentinel-dashboard-assets.ts +++ b/scripts/src/hwlab-node-web-sentinel-dashboard-assets.ts @@ -1,4 +1,4 @@ -// SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-26-p7-web-probe-sentinel-dashboard. +// SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-26-p8-web-probe-sentinel-recovery. // Responsibility: Static dashboard shell and asset serving for the web-probe sentinel frontend. import { readFileSync } from "node:fs"; import { rootPath } from "./config"; @@ -11,7 +11,7 @@ interface DashboardShellConfig { } const DASHBOARD_ASSET_ROOT = "scripts/assets/web-probe-sentinel-dashboard"; -const DASHBOARD_CONTRACT_VERSION = "draft-2026-06-26-p7-web-probe-sentinel-dashboard"; +const DASHBOARD_CONTRACT_VERSION = "draft-2026-06-26-p8-web-probe-sentinel-recovery"; export function renderWebProbeSentinelDashboardHtml(config: DashboardShellConfig): string { const publicOrigin = stringOrNull(config.publicExposure.publicBaseUrl) ?? ""; @@ -20,7 +20,7 @@ export function renderWebProbeSentinelDashboardHtml(config: DashboardShellConfig - HWLAB Web Probe Sentinel + HWLAB Web哨兵 @@ -33,58 +33,58 @@ export function renderWebProbeSentinelDashboardHtml(config: DashboardShellConfig data-config-ready="${config.plan.ok ? "true" : "false"}" data-contract-version="${DASHBOARD_CONTRACT_VERSION}" > -
+
-

HWLAB Web Probe Sentinel

+

HWLAB Web哨兵

${escapeHtml(config.node)} / ${escapeHtml(config.lane)}

-
- idle +
+ 空闲 - -
- + -
+
- Overall + 总体状态 - -
- Latest run + 最近运行 - -
- Findings + 发现项 0 -
- Scheduler + 调度器 - -
-
+
config - pvc - analyzer - @@ -94,7 +94,7 @@ export function renderWebProbeSentinelDashboardHtml(config: DashboardShellConfig
-

Run Timeline

+

运行时间线

-
@@ -103,34 +103,34 @@ export function renderWebProbeSentinelDashboardHtml(config: DashboardShellConfig
-

Runs

+

运行历史

-
- +
- - - - - + + + + + @@ -170,38 +170,38 @@ export function renderWebProbeSentinelDashboardHtml(config: DashboardShellConfig
-

Findings

+

发现分析

-
- +
@@ -210,8 +210,8 @@ export function renderWebProbeSentinelDashboardHtml(config: DashboardShellConfig diff --git a/scripts/src/hwlab-node-web-sentinel-service.ts b/scripts/src/hwlab-node-web-sentinel-service.ts index ca4d794a..f87610a3 100644 --- a/scripts/src/hwlab-node-web-sentinel-service.ts +++ b/scripts/src/hwlab-node-web-sentinel-service.ts @@ -1,5 +1,6 @@ // SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-25-p0-web-probe-sentinel. // SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-26-p7-web-probe-sentinel-dashboard. +// SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-26-p8-web-probe-sentinel-recovery. // Responsibility: Persistent HTTP wrapper service for web-probe observe scheduling, index, health, metrics, maintenance, and dashboard. import { Buffer } from "node:buffer"; import { createHash, randomUUID } from "node:crypto"; @@ -11,7 +12,7 @@ import { renderWebProbeSentinelDashboardHtml, webProbeSentinelDashboardAssetResp import { webProbeSentinelConfigPlan, type WebProbeSentinelConfigPlan } from "./hwlab-node-web-sentinel-config"; import type { HwlabRuntimeLaneSpec } from "./hwlab-node-lanes"; -const DASHBOARD_CONTRACT_VERSION = "draft-2026-06-26-p7-web-probe-sentinel-dashboard"; +const DASHBOARD_CONTRACT_VERSION = "draft-2026-06-26-p8-web-probe-sentinel-recovery"; const DASHBOARD_MAX_TEXT_BYTES = 16_000; export interface WebProbeSentinelServiceConfig {
RunStatusScenarioFindingsUpdated运行状态场景发现项更新时间