From f89e88133c39d50a25f5d61e8a8fc8e417f75fcd Mon Sep 17 00:00:00 2001 From: Codex Date: Thu, 2 Jul 2026 08:25:36 +0000 Subject: [PATCH] fix web sentinel dashboard verify fallback --- docs/reference/observability.md | 2 + scripts/src/hwlab-node-web-sentinel-p5.ts | 255 +++++++++++++++++++++- 2 files changed, 255 insertions(+), 2 deletions(-) diff --git a/docs/reference/observability.md b/docs/reference/observability.md index a107dba6..7c30a063 100644 --- a/docs/reference/observability.md +++ b/docs/reference/observability.md @@ -33,6 +33,8 @@ Web/Workbench trace、Web 哨兵和 `web-probe observe` 的人工判定入口以 Web 哨兵 dashboard/API 展示问题的第一事实源是 sentinel runner 的 `/api/overview`、`/api/runs`、`/api/runs/{id}`、`/api/findings` 和 `web-probe sentinel dashboard verify|screenshot` 远程浏览器证据。OTel/Tempo 查询不到 `hwlab-web-probe-sentinel` service span 或具体 `sentinel-run-*` id 时,只能说明当前 instrumentation 或保留窗口没有覆盖这条 dashboard/API 路径;不得因此把 UI/API 口径问题判为已追穿,也不得阻塞已由 API/DOM 证据定位的修复。需要继续追 runner 内部链路时,应把缺少 Web 哨兵 span 作为 instrumentation 问题登记到对应治理 issue。 +`web-probe sentinel dashboard verify` 必须区分页面渲染证据和 API 可达性证据。远程浏览器已经拿到 HTML shell、`data-monitor-ready=true`、目标 run/曲线 DOM 和内存/request 图表时,页面二次 `fetch("/api/*")` 遇到瞬时 `ERR_NETWORK_CHANGED`、abort 或同类浏览器网络抖动,不得直接归类为 dashboard render failed;应优先保留浏览器 DOM 证据,并用受控 runner service API 读取 `/api/overview`、`/api/runs` 和 `/api/runs/{id}` 作为显式 `service-fallback` 证据。fallback 只能修正 verifier 证据来源和合同判断,不能遮盖真实页面 shell 未 ready、图表未渲染、sentinelId/route 不匹配、目标 run 不一致、页面 error、horizontal overflow 或 service API 本身失败。 + Web 哨兵 findings 可见性要同时核对 runner API 和已有 observe artifact。若某个 run 的公开 `/api/report?view=findings&run=` 只显示 WBC-003,但 `web-probe sentinel report --run --view findings --raw` 能从 `analysis/report.json` 读出 red/amber analyzer findings,根因是索引或 artifact 可见性遮盖,不是业务没有产生 warning/error。此时应回填或重建这条既有 run 的 report index,并保留原有 report views;不要通过启动新的哨兵 run 来解释旧记录。 Web 哨兵 check code 是排障和验收合同,必须保持语义单一且确定。一个 code/id 只能对应一种可执行处置路径;如果一个 finding 可能同时表示“没有采集到业务轮次”“目标轮次缺 traceId”“trace rows/projection 缺失”“Final Response 为空但仍在运行/取消”“Final Response 为空且已失败/终止”等多种状态,就必须拆成多个固定 code/id。不得用同一个 code 的动态标题、动态 summary 或 rootCause 文案来承载多种互斥语义;动态字段只能补充证据,不能改变 code 的语义。 diff --git a/scripts/src/hwlab-node-web-sentinel-p5.ts b/scripts/src/hwlab-node-web-sentinel-p5.ts index 722b240d..d4bec958 100644 --- a/scripts/src/hwlab-node-web-sentinel-p5.ts +++ b/scripts/src/hwlab-node-web-sentinel-p5.ts @@ -681,7 +681,9 @@ export function probeSentinelDashboardBrowser(state: SentinelCicdState, options: const result = job.result; const transport = record(job.transport); const remote = record(transport.remote); - const page = parseDashboardBrowserPayload(typeof remote.stdoutTail === "string" ? remote.stdoutTail : ""); + const browserPage = parseDashboardBrowserPayload(typeof remote.stdoutTail === "string" ? remote.stdoutTail : ""); + const serviceApi = dashboardPageNeedsServiceApiFallback(browserPage) ? probeSentinelDashboardServiceApi(state, options, browserPage) : null; + const page = applyDashboardServiceApiFallback(browserPage, serviceApi); const artifacts = Array.isArray(transport.artifacts) ? transport.artifacts.map(record).map(compactDashboardArtifact) : []; const screenshot = artifacts.find((artifact) => typeof artifact.localPath === "string" && String(artifact.localPath).endsWith(".png")) ?? null; const browserOk = page?.ok === true; @@ -699,6 +701,7 @@ export function probeSentinelDashboardBrowser(state: SentinelCicdState, options: route, viewport: options.viewport, page, + serviceApi, screenshot, artifacts, artifactCount: artifacts.length, @@ -1129,6 +1132,241 @@ function parseDashboardBrowserPayload(textValue: string): Record | null): boolean { + if (page === null) return false; + const dom = record(page.dom); + const contract = record(dom.contract); + const api = record(dom.api); + const memorySummary = record(dom.memorySummary); + return contract.apiOverview !== true + || contract.apiRuns !== true + || memorySummary.contractOk !== true + || record(api.overview).ok !== true + || record(api.runs).ok !== true; +} + +function probeSentinelDashboardServiceApi(state: SentinelCicdState, options: Extract, page: Record | null): Record { + const timeoutSeconds = Math.max(5, Math.min(options.commandTimeoutSeconds, 30)); + const overview = callSentinelService(state, "GET", "/api/overview", null, timeoutSeconds); + const runs = callSentinelService(state, "GET", "/api/runs?limit=30&sort=updated", null, timeoutSeconds); + const overviewBody = record(overview.bodyJson); + const runsBody = record(runs.bodyJson); + const runItems = dashboardApiRuns(runsBody); + const latestRun = runItems[0] ?? {}; + const latestRunId = dashboardRunId(latestRun); + const pageMemory = record(record(page?.dom).memorySummary); + const selectedRunId = options.runId + ?? stringAtNullable(pageMemory, "runId") + ?? latestRunId; + const detail = selectedRunId === null ? null : callSentinelService(state, "GET", `/api/runs/${encodeURIComponent(selectedRunId)}`, null, timeoutSeconds); + const detailBody = record(detail?.bodyJson); + const detailRun = record(detailBody.run); + const memory = record(detailBody.memory); + const pageSeries = Array.isArray(memory.pageSeries) ? memory.pageSeries.map(record) : []; + const sampleCount = numberValue(memory.sampleCount); + const expectedMemorySamples = pageSeries.length > 0 || sampleCount > 0; + const overviewMatches = state.sentinelId.length === 0 || stringAtNullable(overviewBody, "sentinelId") === state.sentinelId; + const runsPayloadMatches = state.sentinelId.length === 0 || stringAtNullable(runsBody, "sentinelId") === state.sentinelId; + const runRowsMatch = state.sentinelId.length === 0 || runItems.every((run) => stringAtNullable(run, "sentinelId") === null || stringAtNullable(run, "sentinelId") === state.sentinelId); + const detailOk = detail === null ? false : detail.ok === true && detailBody.ok !== false; + return { + ok: overview.ok === true + && runs.ok === true + && overviewBody.ok !== false + && runsBody.ok !== false + && Array.isArray(runItems) + && overviewMatches + && runsPayloadMatches + && runRowsMatch + && (selectedRunId === null || detailOk), + source: "sentinel-service-api", + overview: { + ok: overview.ok === true && overviewBody.ok !== false, + httpStatus: overview.httpStatus ?? null, + sentinelId: stringAtNullable(overviewBody, "sentinelId"), + latestRunId: dashboardRunId(record(overviewBody.latestRun)), + matches: overviewMatches, + }, + runs: { + ok: runs.ok === true && runsBody.ok !== false && Array.isArray(runItems), + httpStatus: runs.httpStatus ?? null, + sentinelId: stringAtNullable(runsBody, "sentinelId"), + count: runItems.length, + latestRunId, + matches: runsPayloadMatches, + runRowsMatch, + }, + detail: detail === null ? null : { + ok: detailOk, + httpStatus: detail.httpStatus ?? null, + runId: dashboardRunId(detailRun) ?? selectedRunId, + }, + selectedRunId, + latestRun: { + runId: latestRunId, + typeCount: numberValue(latestRun.findingTypeCount ?? latestRun.findingCount ?? latestRun.finding_count), + durationMinutes: numberValue(latestRun.runDurationMinutes ?? latestRun.durationMinutes ?? record(latestRun.timing).durationMinutes), + severityKeys: Object.keys(record(latestRun.severityCounts)).sort(), + }, + memory: { + ok: detailOk && (memory.ok !== false || !expectedMemorySamples), + pageCount: pageSeries.length, + sampleCount, + source: stringAtNullable(memory, "source"), + expectedFromApi: expectedMemorySamples, + }, + valuesRedacted: true, + }; +} + +function applyDashboardServiceApiFallback(page: Record | null, serviceApi: Record | null): Record | null { + if (page === null || serviceApi === null || serviceApi.ok !== true) return page; + const dom = { ...record(page.dom) }; + const contract = { ...record(dom.contract) }; + const api = { ...record(dom.api) }; + const sentinelBoundary = { ...record(dom.sentinelBoundary) }; + const latestRunCounts = { ...record(dom.latestRunCounts) }; + const targetRunCounts = { ...record(dom.targetRunCounts) }; + const requestedRunSelection = { ...record(dom.requestedRunSelection) }; + const memorySummary = { ...record(dom.memorySummary) }; + const serviceOverview = record(serviceApi.overview); + const serviceRuns = record(serviceApi.runs); + const serviceDetail = record(serviceApi.detail); + const serviceLatestRun = record(serviceApi.latestRun); + const serviceMemory = record(serviceApi.memory); + const selectedRunId = stringAtNullable(serviceApi, "selectedRunId"); + const expectedMemory = serviceMemory.expectedFromApi === true; + const memoryRunId = stringAtNullable(memorySummary, "runId"); + const memoryMatchesTarget = selectedRunId === null || memoryRunId === selectedRunId; + const memoryChartPresent = memorySummary.present === true; + const memoryPageCount = numberValue(memorySummary.pageCount); + const apiPageCount = numberValue(serviceMemory.pageCount); + const memoryContractOk = serviceDetail.ok === true + && (expectedMemory !== true || (memoryChartPresent && memoryMatchesTarget && memoryPageCount === apiPageCount)); + + api.overview = { + ...record(api.overview), + ok: true, + httpStatus: serviceOverview.httpStatus ?? 200, + source: "service-fallback", + }; + api.runs = { + ...record(api.runs), + ok: true, + httpStatus: serviceRuns.httpStatus ?? 200, + source: "service-fallback", + }; + api.targetDetail = { + ...record(api.targetDetail), + ok: serviceDetail.ok === true, + httpStatus: serviceDetail.httpStatus ?? null, + source: "service-fallback", + }; + + contract.apiOverview = true; + contract.apiRuns = true; + contract.runCount = Math.max(numberValue(contract.runCount), numberValue(serviceRuns.count)); + contract.latestRunId = stringAtNullable(contract, "latestRunId") ?? stringAtNullable(serviceLatestRun, "runId"); + contract.latestFindingTypeCount = numberValue(contract.latestFindingTypeCount || serviceLatestRun.typeCount); + contract.memoryContract = memoryContractOk; + + const browserOverviewSentinelId = stringAtNullable(sentinelBoundary, "overviewSentinelId"); + const browserRunsSentinelId = stringAtNullable(sentinelBoundary, "runsSentinelId"); + const browserRunRows = numberValue(dom.runRows); + sentinelBoundary.overviewSentinelId = browserOverviewSentinelId ?? stringAtNullable(serviceOverview, "sentinelId"); + sentinelBoundary.runsSentinelId = browserRunsSentinelId ?? stringAtNullable(serviceRuns, "sentinelId"); + sentinelBoundary.overviewMatches = browserOverviewSentinelId === null ? serviceOverview.matches === true : sentinelBoundary.overviewMatches; + sentinelBoundary.runsPayloadMatches = browserRunsSentinelId === null ? serviceRuns.matches === true : sentinelBoundary.runsPayloadMatches; + sentinelBoundary.runRowsMatch = browserRunRows === 0 ? serviceRuns.runRowsMatch === true : sentinelBoundary.runRowsMatch; + + latestRunCounts.runId = stringAtNullable(latestRunCounts, "runId") ?? stringAtNullable(serviceLatestRun, "runId"); + latestRunCounts.typeCount = numberValue(latestRunCounts.typeCount || serviceLatestRun.typeCount); + latestRunCounts.durationMinutes = numberValue(latestRunCounts.durationMinutes || serviceLatestRun.durationMinutes); + latestRunCounts.severityKeys = Array.isArray(latestRunCounts.severityKeys) && latestRunCounts.severityKeys.length > 0 + ? latestRunCounts.severityKeys + : Array.isArray(serviceLatestRun.severityKeys) ? serviceLatestRun.severityKeys : []; + + dom.runRows = Math.max(numberValue(dom.runRows), numberValue(serviceRuns.count)); + targetRunCounts.runId = stringAtNullable(targetRunCounts, "runId") ?? selectedRunId; + targetRunCounts.requestMatched = targetRunCounts.requestMatched !== false; + const requestedRunId = stringAtNullable(requestedRunSelection, "requestedRunId"); + if (requestedRunId !== null && selectedRunId === requestedRunId && memoryMatchesTarget) { + requestedRunSelection.browserReason = requestedRunSelection.reason ?? null; + requestedRunSelection.ok = true; + requestedRunSelection.reason = "service-fallback-memory-match"; + } + + memorySummary.targetRunId = stringAtNullable(memorySummary, "targetRunId") ?? selectedRunId; + memorySummary.matchesTargetRun = memoryMatchesTarget; + memorySummary.apiOk = serviceDetail.ok === true; + memorySummary.apiPageCount = apiPageCount; + memorySummary.apiSampleCount = numberValue(serviceMemory.sampleCount); + memorySummary.apiSource = stringAtNullable(serviceMemory, "source"); + memorySummary.expectedFromApi = expectedMemory; + memorySummary.contractOk = memoryContractOk; + memorySummary.status = serviceDetail.ok !== true + ? "api-unavailable" + : expectedMemory ? memoryContractOk ? "rendered" : "mismatch" : "no-samples"; + memorySummary.source = memorySummary.source ?? "service-fallback"; + + dom.contract = contract; + dom.api = api; + dom.sentinelBoundary = sentinelBoundary; + dom.latestRunCounts = latestRunCounts; + dom.targetRunCounts = targetRunCounts; + dom.requestedRunSelection = requestedRunSelection; + dom.memorySummary = memorySummary; + dom.effectiveApiSource = "service-fallback"; + + const navigationOk = page.navigationError === null || (dom.shell === true && dom.ready === true); + const manualTrigger = record(page.manualTrigger); + const ok = navigationOk + && numberValue(page.httpStatus) >= 200 + && numberValue(page.httpStatus) < 300 + && dom.shell === true + && dom.ready === true + && contract.htmlShell === true + && contract.appReady === true + && contract.apiOverview === true + && contract.apiRuns === true + && sentinelBoundary.datasetMatches === true + && sentinelBoundary.overviewMatches === true + && sentinelBoundary.runsPayloadMatches === true + && sentinelBoundary.runRowsMatch === true + && sentinelBoundary.routePrefixMatches === true + && dom.errorVisible !== true + && requestedRunSelection.ok === true + && manualTrigger.ok === true + && record(dom.chartTiming).ok === true + && memorySummary.contractOk === true + && record(dom.layout).horizontalOverflow !== true + && numberValue(page.pageErrorCount) === 0; + + return { + ...page, + ok, + dom, + effectiveApiSource: "service-fallback", + valuesRedacted: true, + }; +} + +function dashboardApiRuns(payload: Record): Record[] { + const runs = payload.runs; + if (Array.isArray(runs)) return runs.map(record); + const items = payload.items; + if (Array.isArray(items)) return items.map(record); + return []; +} + +function dashboardRunId(value: Record): string | null { + return stringAtNullable(value, "id") ?? stringAtNullable(value, "runId"); +} + +function numberValue(value: unknown): number { + return typeof value === "number" && Number.isFinite(value) ? value : Number.isFinite(Number(value)) ? Number(value) : 0; +} + function dashboardScreenshotName(options: Extract, state: SentinelCicdState): string { const raw = options.name ?? `sentinel-dashboard-${state.spec.nodeId.toLowerCase()}-${state.spec.lane}-${state.sentinelId}.png`; const safe = raw.replace(/[^A-Za-z0-9._-]+/gu, "-").slice(0, 120); @@ -1573,6 +1811,7 @@ function renderDashboardResult(result: Record): string { const chartTiming = record(dom.chartTiming); const memorySummary = record(dom.memorySummary); const requestedRunSelection = record(dom.requestedRunSelection); + const serviceApi = record(result.serviceApi); const manualTrigger = record(page.manualTrigger); const manualTriggerUi = record(dom.manualTriggerUi); const screenshot = record(result.screenshot); @@ -1585,10 +1824,11 @@ function renderDashboardResult(result: Record): string { "", table(["NODE", "LANE", "SENTINEL", "STATUS", "URL"], [[result.node, result.lane, result.sentinelId, result.ok === true ? "pass" : "blocked", result.publicUrl]]), "", - table(["HTTP", "SHELL", "READY", "API_OVERVIEW", "API_RUNS", "RUN_ROWS", "ERRORS", "CONSOLE_ERR", "REQ_FAIL"], [[ + table(["HTTP", "SHELL", "READY", "API_SOURCE", "API_OVERVIEW", "API_RUNS", "RUN_ROWS", "ERRORS", "CONSOLE_ERR", "REQ_FAIL"], [[ page.httpStatus ?? "-", dom.shell, dom.ready, + dom.effectiveApiSource ?? "browser", `${apiOverview.ok ?? "-"}/${apiOverview.httpStatus ?? "-"}`, `${apiRuns.ok ?? "-"}/${apiRuns.httpStatus ?? "-"}`, dom.runRows, @@ -1647,6 +1887,17 @@ function renderDashboardResult(result: Record): string { memorySummary.source ?? memorySummary.apiSource ?? "-", ]]), "", + Object.keys(serviceApi).length === 0 + ? "API_FALLBACK\n-" + : table(["SOURCE", "OK", "RUNS", "LATEST_RUN", "SELECTED_RUN", "MEMORY_EXPECTED"], [[ + serviceApi.source ?? "-", + serviceApi.ok ?? "-", + record(serviceApi.runs).count ?? "-", + record(serviceApi.runs).latestRunId ?? "-", + serviceApi.selectedRunId ?? "-", + record(serviceApi.memory).expectedFromApi ?? "-", + ]]), + "", table(["VIEWPORT", "DOC", "H_OVERFLOW", "OVERFLOW_COUNT"], [[ result.viewport, `${record(layout.documentSize).width ?? "-"}x${record(layout.documentSize).height ?? "-"}`,