From 1c9132e7c63bc42ae3449efd94dcccee662e5022 Mon Sep 17 00:00:00 2001 From: Codex Date: Thu, 2 Jul 2026 07:33:19 +0000 Subject: [PATCH] fix: split quick verify control findings --- .agents/skills/unidesk-monitor/SKILL.md | 2 + .../check-catalog.yaml | 60 +++- docs/reference/observability.md | 2 + .../src/hwlab-node-web-sentinel-p5-observe.ts | 302 ++++++++++++++++-- scripts/src/hwlab-node-web-sentinel-p5.ts | 70 +++- 5 files changed, 403 insertions(+), 33 deletions(-) diff --git a/.agents/skills/unidesk-monitor/SKILL.md b/.agents/skills/unidesk-monitor/SKILL.md index 085943b6..7877dedf 100644 --- a/.agents/skills/unidesk-monitor/SKILL.md +++ b/.agents/skills/unidesk-monitor/SKILL.md @@ -19,6 +19,7 @@ description: UniDesk monitoring and Web sentinel operations. Use when working on - 诊断可用 `curl` 或一次性 `web-probe script` 采证,但重复 dashboard 验证必须沉淀为受控 `web-probe sentinel dashboard verify|screenshot` 或等价入口。 - `web-probe sentinel dashboard screenshot` 必须作为远程浏览器截图入口使用,PNG 默认下载到调用者 `/tmp`;issue/PR 证据引用 `localPath`、`sha256`、HTTP status、DOM 摘要和 overflow 结果。`VERIFIED=true` 只证明 PNG 回传和哈希校验通过,收口前仍必须打开截图或用 DOM 摘要确认不是 Chrome 网络错误页、登录页或空壳页。 - monitor-web 的“监测项”默认必须跟随选中 run;曲线点、运行详情和监测项摘要必须区分类型数与样本数,历史聚合只能作为明确标注的历史口径展示。 +- Web 哨兵 check code 必须语义单一且确定:一个 code/id 只能表达一种处置路径;如果同一 finding 可能表示多种根因或状态,必须拆成多个固定 code/id,而不是用动态标题或摘要在同一 code 下区分。 ## Quick Commands @@ -66,6 +67,7 @@ bun scripts/cli.ts web-probe observe analyze 12. Any new analyzer finding id emitted by quick verify must be registered in the selected check catalog before rollout. A missing catalog entry can make `/api/health` return 503 and leave the new runner pod unhealthy even when the image is otherwise correct. 13. If a dashboard screenshot artifact is small or visually shows `ERR_NETWORK_CHANGED`/browser error chrome while CLI status is otherwise pass, discard it as evidence and rerun after checking the public URL/API status. Treat this as a web-probe evidence-quality issue if repeated; do not close visibility issues from such a screenshot alone. 14. Request-rate curve acceptance uses `/api/runs/{id}.requestRate` plus dashboard screenshot/DOM evidence that the request chart is above the memory chart with aligned time axis. Until `dashboard verify` exposes request-rate-specific fields, do not treat legacy `API_PAGES` / `API_SAMPLES` columns as request curve counts; see `docs/reference/observability.md`. +15. Check code 设计必须先拆语义再实现展示:例如“没有业务轮次”“目标轮次缺 traceId”“trace rows/projection 缺失”“Final Response 为空且仍在运行/取消”“Final Response 为空且已失败/终止”应是不同固定 code,而不是一个 WBC-003 下的动态解释。 ## Architecture Preference diff --git a/config/hwlab-web-probe-sentinel/check-catalog.yaml b/config/hwlab-web-probe-sentinel/check-catalog.yaml index d38e34d5..c8aa07c0 100644 --- a/config/hwlab-web-probe-sentinel/check-catalog.yaml +++ b/config/hwlab-web-probe-sentinel/check-catalog.yaml @@ -38,8 +38,8 @@ sentinel: - code: WBC-003 id: quick-verify-no-business-turn level: error - titleZh: 快速验证没有有效对话 - summaryZh: 快速验证没有看到稳定会话、过程记录或最终回复。 + titleZh: 快速验证没有采集到业务轮次 + summaryZh: 快速验证的 turn-summary 没有任何业务轮次记录。 actionZh: 查看详情后处理。 blocking: true order: 30 @@ -867,3 +867,59 @@ sentinel: actionZh: 先修复性能探针采集可见性,再继续性能结论。 blocking: false order: 1060 + - code: WBC-107 + id: quick-verify-target-turn-missing + level: error + titleZh: 快速验证缺少目标业务轮次 + summaryZh: 快速验证采集到了业务轮次,但没有采集到目标 prompt 对应的轮次。 + actionZh: 检查 promptIndex、command 分段和 turn-summary 采集。 + blocking: true + order: 1070 + - code: WBC-108 + id: quick-verify-target-turn-trace-id-missing + level: error + titleZh: 目标业务轮次缺少 traceId + summaryZh: 目标业务轮次存在,但缺少可定位过程记录的 traceId。 + actionZh: 检查 Workbench/Code Agent 的 traceId 投影和消息绑定。 + blocking: true + order: 1080 + - code: WBC-109 + id: quick-verify-trace-projection-missing + level: error + titleZh: 目标 trace 投影缺失 + summaryZh: 目标业务轮次已有 traceId,但 trace-frame 没有对应 trace rows 或投影。 + actionZh: 检查 trace-frame、Workbench trace read-model 和投影水位。 + blocking: true + order: 1090 + - code: WBC-110 + id: quick-verify-final-response-empty-incomplete + level: error + titleZh: 目标轮次未完成且最终回复为空 + summaryZh: 目标业务轮次仍在运行或已取消标记,Final Response 为空。 + actionZh: 检查同一 traceId 的运行状态、取消事件和 Code Agent 终态。 + blocking: true + order: 1100 + - code: WBC-111 + id: quick-verify-final-response-empty-terminal + level: error + titleZh: 目标轮次已终止但最终回复为空 + summaryZh: 目标业务轮次已进入终态,但 Final Response 为空。 + actionZh: 检查终态事件持久化和最终回复投影。 + blocking: true + order: 1110 + - code: WBC-112 + id: quick-verify-final-response-empty-status-unknown + level: error + titleZh: 目标轮次状态未知且最终回复为空 + summaryZh: 目标业务轮次存在且 Final Response 为空,但状态字段无法判定为运行中或终态。 + actionZh: 补齐 turn-summary 状态投影后重新分类。 + blocking: true + order: 1120 + - code: WBC-113 + id: quick-verify-diagnostics-inconclusive + level: error + titleZh: 快速验证诊断信息不足 + summaryZh: 快速验证不能证明业务轮次完成,但结构化诊断不足以归入具体失败语义。 + actionZh: 先改进 turn-summary/trace-frame 诊断字段,再判断业务恢复。 + blocking: true + order: 1130 diff --git a/docs/reference/observability.md b/docs/reference/observability.md index e41dcd44..a107dba6 100644 --- a/docs/reference/observability.md +++ b/docs/reference/observability.md @@ -35,6 +35,8 @@ Web 哨兵 dashboard/API 展示问题的第一事实源是 sentinel runner 的 ` Web 哨兵 findings 可见性要同时核对 runner API 和已有 observe artifact。若某个 run 的公开 `/api/report?view=findings&run=` 只显示 WBC-003,但 `web-probe sentinel report --run --view findings --raw` 能从 `analysis/report.json` 读出 red/amber analyzer findings,根因是索引或 artifact 可见性遮盖,不是业务没有产生 warning/error。此时应回填或重建这条既有 run 的 report index,并保留原有 report views;不要通过启动新的哨兵 run 来解释旧记录。 +Web 哨兵 check code 是排障和验收合同,必须保持语义单一且确定。一个 code/id 只能对应一种可执行处置路径;如果一个 finding 可能同时表示“没有采集到业务轮次”“目标轮次缺 traceId”“trace rows/projection 缺失”“Final Response 为空但仍在运行/取消”“Final Response 为空且已失败/终止”等多种状态,就必须拆成多个固定 code/id。不得用同一个 code 的动态标题、动态 summary 或 rootCause 文案来承载多种互斥语义;动态字段只能补充证据,不能改变 code 的语义。 + Web 哨兵请求频率曲线的验收事实源是 runner `/api/runs/{id}.requestRate`、已有 observe artifact 中的 request-rate summary,以及 `web-probe sentinel dashboard screenshot` 的远程浏览器证据。阈值、采样间隔、bucket 大小和红黄线只从 YAML/source-of-truth 读取,长期文档只记录字段族与验证入口。验收时应核对 `bucketSeconds`、总请求曲线、页面曲线、API path 曲线、峰值每分钟计数、数据来源和 chart/DOM 是否显示在内存曲线上方并共享时间轴。若 quick-verify 的业务链路失败,但同一 run 的 `requestRate` API 和截图已经有曲线数据,应把请求频率能力验收与业务阻塞分开记录;反之,`requestRate.source=unavailable` 或曲线为空时要继续检查 analyzer compact 输出、artifact summary、索引回填和 report fallback。除非 `dashboard verify` 已显式输出 request-rate 专用字段,不得把旧的 `API_PAGES` / `API_SAMPLES` 列当作请求频率曲线验收结果。 ## Workbench Request Storm And Freeze diff --git a/scripts/src/hwlab-node-web-sentinel-p5-observe.ts b/scripts/src/hwlab-node-web-sentinel-p5-observe.ts index 5bce8d02..a94a7e84 100644 --- a/scripts/src/hwlab-node-web-sentinel-p5-observe.ts +++ b/scripts/src/hwlab-node-web-sentinel-p5-observe.ts @@ -342,6 +342,7 @@ export function runSentinelQuickVerify(state: SentinelCicdState, reason: string, ); const ok = analysisReadable && controlFindings.length === 0 && blockingFindings.length === 0; const businessStatus = quickVerifyBusinessStatus(null, promptIndex, turnSummary, traceFrame, elapsedMs(), maxSeconds); + const primaryControlFailure = quickVerifyPrimaryControlFailure(controlFindings); printQuickVerifyProgress(state, runId, "record-report", ok ? "succeeded" : "blocked", { observerId, reportJsonSha256: stringAtNullable(artifactSummary, "reportJsonSha256"), analysisReadable, findingCount: findings.length, blockingFindingCount: blockingFindings.length, controlFindingCount: controlFindings.length, elapsedMs: elapsedMs() }); return recordQuickVerify(state, { ok, @@ -356,7 +357,7 @@ export function runSentinelQuickVerify(state: SentinelCicdState, reason: string, reportJsonSha256: stringAtNullable(artifactSummary, "reportJsonSha256"), findingCount: findings.length, artifactCount: numberAtNullable(artifactSummary, "artifactCount") ?? 0, - failure: !analysisReadable ? "quick-verify-analysis-missing" : controlFindings.length > 0 ? "quick-verify-no-business-turn" : blockingFindings.length > 0 ? "quick-verify-blocking-findings" : null, + failure: !analysisReadable ? "quick-verify-analysis-missing" : primaryControlFailure ?? (blockingFindings.length > 0 ? "quick-verify-blocking-findings" : null), promptSource: prompts.summary, accountEnv: accountEnv.summary, steps: [...steps, cleanupStep], @@ -481,6 +482,44 @@ function inlinePromptText(item: Record): string | null { return stringAtNullable(item, "text") ?? stringAtNullable(item, "prompt") ?? stringAtNullable(item, "value"); } +export function reclassifyQuickVerifyControlFindings(state: SentinelCicdState, input: { + readonly runId: string | null; + readonly scenarioId: string | null; + readonly observerId: string | null; + readonly failure?: string | null; + readonly timeoutSeconds?: number | null; +}): Record { + if (input.observerId === null || input.observerId.length === 0) { + return { ok: false, reason: "observer-id-missing", runId: input.runId, valuesRedacted: true }; + } + const scenarioId = input.scenarioId ?? stringAt(state.cicd, "targetValidation.scenarioId"); + const scenario = findScenario(state, scenarioId); + if (scenario === null) { + return { ok: false, reason: "scenario-not-found", runId: input.runId, scenarioId, observerId: input.observerId, valuesRedacted: true }; + } + const commandSequence = arrayAt(scenario, "commandSequence").map(record); + const promptIndex = commandSequence.reduce((count, item) => { + const repeat = Math.max(1, typeof item.repeat === "number" && Number.isFinite(item.repeat) ? Math.trunc(item.repeat) : 1); + return stringAtNullable(item, "type") === "sendPrompt" ? count + repeat : count; + }, 0); + const timeoutSeconds = Math.max(5, Math.min(Math.trunc(input.timeoutSeconds ?? 55), 55)); + const turnSummary = collectObserveView(state, input.observerId, "turn-summary", null, timeoutSeconds); + const traceFrame = collectObserveView(state, input.observerId, "trace-frame", promptIndex > 0 ? promptIndex : null, timeoutSeconds); + const findings = quickVerifyControlFindings(input.failure ?? null, promptIndex, turnSummary, traceFrame); + return { + ok: true, + runId: input.runId, + scenarioId, + observerId: input.observerId, + promptIndex, + findingCount: findings.length, + findings, + turnSummary: { ok: turnSummary.ok === true, collectShape: turnSummary.collectShape, valuesRedacted: true }, + traceFrame: { ok: traceFrame.ok === true, collectShape: traceFrame.collectShape, valuesRedacted: true }, + valuesRedacted: true, + }; +} + function finalizeQuickVerifyFailure(state: SentinelCicdState, input: { readonly runId: string; readonly scenarioId: string; @@ -542,6 +581,7 @@ function finalizeQuickVerifyFailure(state: SentinelCicdState, input: { && controlFindings.length === 0 && blockingFindings.length === 0; const businessStatus = quickVerifyBusinessStatus(input.failure, input.promptIndex, turnSummary, traceFrame, input.elapsedMs ?? null, targetValidationSeconds); + const primaryControlFailure = quickVerifyPrimaryControlFailure(controlFindings); return { ok: recoveredWaitFailure, runId: input.runId, @@ -558,7 +598,8 @@ function finalizeQuickVerifyFailure(state: SentinelCicdState, input: { reportJsonSha256: stringAtNullable(artifactSummary, "reportJsonSha256"), findingCount: findings.length, artifactCount: numberAtNullable(artifactSummary, "artifactCount") ?? 0, - failure: recoveredWaitFailure ? null : input.failure, + failure: recoveredWaitFailure ? null : primaryControlFailure ?? input.failure, + runnerFailure: recoveredWaitFailure ? null : input.failure, promptSource: input.promptSource, steps: [...input.steps, ...cleanupSteps], analysis: artifactSummary, @@ -1811,6 +1852,13 @@ function isQuickVerifyBlockingFinding(item: Record): boolean { if (id === "observer-command-failed") return observerCommandFailureBlocks(item); return [ "quick-verify-no-business-turn", + "quick-verify-target-turn-missing", + "quick-verify-target-turn-trace-id-missing", + "quick-verify-trace-projection-missing", + "quick-verify-final-response-empty-incomplete", + "quick-verify-final-response-empty-terminal", + "quick-verify-final-response-empty-status-unknown", + "quick-verify-diagnostics-inconclusive", "quick-verify-analysis-summary-unreadable", "quick-verify-command-sequence-failed", "quick-verify-observer-start-failed", @@ -1836,14 +1884,105 @@ function observerCommandFailureBlocks(item: Record): boolean { }); } +function quickVerifyPrimaryControlFailure(controlFindings: Record[]): string | null { + const first = controlFindings[0]; + return first === undefined ? null : stringAtNullable(first, "id") ?? stringAtNullable(first, "kind") ?? stringAtNullable(first, "code"); +} + +function quickVerifyControlEvidenceSummary(turnDiagnostics: Record, traceDiagnostics: Record, targetStatus: Record | null): string { + const targetStatusText = targetStatus === null ? "-" : stringAtNullable(targetStatus, "status") ?? "-"; + const targetMarks = targetStatus === null ? "-" : stringAtNullable(targetStatus, "marks") ?? "-"; + const targetTrace = targetStatus === null ? "-" : targetStatus.traceIdPresent === true ? "present" : "missing"; + const targetFinalBytes = targetStatus === null ? "-" : String(targetStatus.finalResponseBytes ?? "-"); + const traceBlocker = stringAtNullable(traceDiagnostics, "blocker") ?? "-"; + const traceRowCount = traceDiagnostics.traceRowCount === null || traceDiagnostics.traceRowCount === undefined ? "-" : String(traceDiagnostics.traceRowCount); + return [ + `turn-summary rows=${String(turnDiagnostics.rowCount ?? 0)}`, + `scoped=${String(turnDiagnostics.scopedRowCount ?? 0)}`, + `targetStatus=${targetStatusText}`, + `targetMarks=${targetMarks}`, + `targetTrace=${targetTrace}`, + `targetFinalBytes=${targetFinalBytes}`, + `traceFrameTrace=${traceDiagnostics.traceIdPresent === true ? "present" : "missing"}`, + `traceFrameBlocker=${traceBlocker}`, + `traceRows=${traceRowCount}`, + `traceFinalBytes=${String(traceDiagnostics.finalResponseBytes ?? "-")}`, + ].join(" "); +} + +function quickVerifyScopedStatuses(turnDiagnostics: Record): Record[] { + return Array.isArray(turnDiagnostics.scopedStatuses) ? turnDiagnostics.scopedStatuses.map(record) : []; +} + +function quickVerifyLatestScopedStatus(turnDiagnostics: Record): Record | null { + const rows = quickVerifyScopedStatuses(turnDiagnostics); + return rows.length === 0 ? null : rows[rows.length - 1] ?? null; +} + +function quickVerifyFinalResponseEmpty(targetStatus: Record | null, traceDiagnostics: Record): boolean { + return targetStatus?.finalResponseEmpty === true || traceDiagnostics.finalResponseEmpty === true; +} + +function quickVerifyTargetTracePresent(targetStatus: Record | null, traceDiagnostics: Record): boolean { + return targetStatus?.traceIdPresent === true || traceDiagnostics.traceIdPresent === true; +} + +function quickVerifyTraceProjectionMissing(targetStatus: Record | null, traceDiagnostics: Record): boolean { + if (!quickVerifyTargetTracePresent(targetStatus, traceDiagnostics)) return false; + const blocker = stringAtNullable(traceDiagnostics, "blocker"); + if (blocker === "trace-rows-missing" || blocker === "sample-not-found") return true; + if (traceDiagnostics.collectOk !== true) return true; + const traceRowCount = numberAtNullable(traceDiagnostics, "traceRowCount"); + return traceRowCount !== null && traceRowCount === 0; +} + +function quickVerifyTargetStatusKind(targetStatus: Record | null): "incomplete" | "terminal" | "unknown" { + const status = (targetStatus === null ? "" : stringAtNullable(targetStatus, "status") ?? "").toLowerCase(); + const marks = (targetStatus === null ? "" : stringAtNullable(targetStatus, "marks") ?? "").toLowerCase(); + if (/running|pending|queued|starting|submitting|processing|in[-_ ]?progress|waiting/u.test(status)) return "incomplete"; + if (/cancel/u.test(status) || /cancel/u.test(marks)) return "incomplete"; + if (/completed|succeeded|success|done|failed|failure|error|terminal|command-failed|timeout|stopped|aborted/u.test(status)) return "terminal"; + return "unknown"; +} + +function quickVerifyControlFinding( + id: string, + titleZh: string, + summary: string, + rootCause: string, + nextAction: string, + promptIndex: number, + failure: string | null, + turnDiagnostics: Record, + traceDiagnostics: Record, + targetStatus: Record | null, + display: Record | null, +): Record { + return { + id, + severity: "red", + count: 1, + summary, + displayTitleZh: titleZh, + errorTitleZh: titleZh, + timeoutDisplay: display, + rootCause, + rootCauseStatus: "confirmed", + rootCauseConfidence: "high", + evidenceSummary: quickVerifyControlEvidenceSummary(turnDiagnostics, traceDiagnostics, targetStatus), + nextAction, + turnSummaryDiagnostics: turnDiagnostics, + traceFrameDiagnostics: traceDiagnostics, + failure: failure ?? null, + promptIndex, + valuesRedacted: true, + }; +} + function quickVerifyControlFindings(failure: string | null, promptIndex: number, turnSummary: Record | null, traceFrame: Record | null, display: Record | null = null): Record[] { if (quickVerifyHasDurableBusinessTurn(promptIndex, turnSummary, traceFrame)) return []; const turnDiagnostics = quickVerifyTurnSummaryDiagnostics(promptIndex, turnSummary); const traceDiagnostics = quickVerifyTraceFrameDiagnostics(traceFrame); - const rendered = [ - typeof turnSummary?.renderedText === "string" ? turnSummary.renderedText : "", - typeof traceFrame?.renderedText === "string" ? traceFrame.renderedText : "", - ].join("\n"); const noPromptScenario = promptIndex <= 0; if (noPromptScenario && failure === null) return []; if (noPromptScenario && failure !== null) { @@ -1864,29 +2003,135 @@ function quickVerifyControlFindings(failure: string | null, promptIndex: number, valuesRedacted: true, }]; } - const noTrace = /无\s*sendPrompt|no\s+sendPrompt|无\s*trace\s*rows|no\s+trace\s+rows|traceId=-|routeSession=-|activeSession=-/iu.test(rendered); - const emptyFinal = /Final Response[\s\S]*\(空内容\)/iu.test(rendered); - if (!noTrace && !emptyFinal && failure !== "observe-start-failed") return []; - const displayTitleZh = stringAtNullable(display ?? {}, "titleZh"); - return [{ - id: "quick-verify-no-business-turn", - severity: "red", - count: 1, - summary: displayTitleZh ?? "quick verify did not reach a durable business turn/session/trace rows/final response; public dashboard health cannot be treated as HWLAB recovery.", - displayTitleZh, - errorTitleZh: displayTitleZh, - timeoutDisplay: display, - rootCause: `quick verify could not confirm a durable completed turn: turn-summary scopedRows=${String(turnDiagnostics.scopedRowCount ?? 0)} rowCount=${String(turnDiagnostics.rowCount ?? 0)}, traceFrame traceIdPresent=${traceDiagnostics.traceIdPresent === true} finalResponseEmpty=${traceDiagnostics.finalResponseEmpty === true}.`, - rootCauseStatus: "confirmed", - rootCauseConfidence: "high", - evidenceSummary: `turn-summary rows=${String(turnDiagnostics.rowCount ?? 0)} scoped=${String(turnDiagnostics.scopedRowCount ?? 0)} traceFrameTrace=${traceDiagnostics.traceIdPresent === true ? "present" : "missing"} finalResponseBytes=${String(traceDiagnostics.finalResponseBytes ?? "-")}`, - nextAction: "Inspect the structured turnSummary/traceFrame diagnostics first; if rows exist with completed non-empty final responses, fix sentinel interpretation instead of treating HWLAB Web as blocked.", - turnSummaryDiagnostics: turnDiagnostics, - traceFrameDiagnostics: traceDiagnostics, - failure: failure ?? null, + const rowCount = numberAtNullable(turnDiagnostics, "rowCount") ?? 0; + const scopedRowCount = numberAtNullable(turnDiagnostics, "scopedRowCount") ?? 0; + const targetStatus = quickVerifyLatestScopedStatus(turnDiagnostics); + if (rowCount === 0) { + return [quickVerifyControlFinding( + "quick-verify-no-business-turn", + "快速验证没有采集到业务轮次", + "quick verify collected zero business turn rows; this is not a trace/final-response failure.", + `turn-summary rowCount=0 for promptIndex=${promptIndex}.`, + "Inspect observe command execution and turn-summary collection before checking trace-frame or final response.", + promptIndex, + failure, + turnDiagnostics, + traceDiagnostics, + targetStatus, + display, + )]; + } + if (scopedRowCount === 0) { + return [quickVerifyControlFinding( + "quick-verify-target-turn-missing", + "快速验证缺少目标业务轮次", + "quick verify collected turn rows, but none matched the target prompt index.", + `turn-summary rowCount=${rowCount} but scopedRowCount=0 for promptIndex=${promptIndex}.`, + "Check promptIndex/command segmentation and rerun the bounded turn-summary view for the observer.", + promptIndex, + failure, + turnDiagnostics, + traceDiagnostics, + targetStatus, + display, + )]; + } + + const findings: Record[] = []; + if (!quickVerifyTargetTracePresent(targetStatus, traceDiagnostics)) { + findings.push(quickVerifyControlFinding( + "quick-verify-target-turn-trace-id-missing", + "目标业务轮次缺少 traceId", + "quick verify found the target business turn, but the turn has no traceId.", + `target turn exists for promptIndex=${promptIndex}, but traceIdPresent=false.`, + "Fix Workbench/Code Agent traceId projection for the target turn before inspecting trace rows.", + promptIndex, + failure, + turnDiagnostics, + traceDiagnostics, + targetStatus, + display, + )); + return findings; + } + + if (quickVerifyTraceProjectionMissing(targetStatus, traceDiagnostics)) { + findings.push(quickVerifyControlFinding( + "quick-verify-trace-projection-missing", + "目标 trace 投影缺失", + "quick verify found a target turn traceId, but trace-frame did not contain trace rows/projection for it.", + `target turn has traceId, but trace-frame collectOk=${traceDiagnostics.collectOk === true} blocker=${stringAtNullable(traceDiagnostics, "blocker") ?? "-"} traceRowCount=${String(traceDiagnostics.traceRowCount ?? "-")}.`, + "Inspect trace-frame for the target traceId, then check Workbench trace projection/read-model hydration before treating the run as a business failure.", + promptIndex, + failure, + turnDiagnostics, + traceDiagnostics, + targetStatus, + display, + )); + } + + if (quickVerifyFinalResponseEmpty(targetStatus, traceDiagnostics)) { + const statusKind = quickVerifyTargetStatusKind(targetStatus); + if (statusKind === "incomplete") { + findings.push(quickVerifyControlFinding( + "quick-verify-final-response-empty-incomplete", + "目标轮次未完成且最终回复为空", + "quick verify found the target business turn, but it is still incomplete/cancel-marked and Final Response is empty.", + `target turn status=${stringAtNullable(targetStatus ?? {}, "status") ?? "-"} marks=${stringAtNullable(targetStatus ?? {}, "marks") ?? "-"} finalResponseEmpty=true.`, + "Inspect the same traceId in Workbench and Code Agent runtime; this is an unfinished/cancelled turn, not a zero-dialog case.", + promptIndex, + failure, + turnDiagnostics, + traceDiagnostics, + targetStatus, + display, + )); + } else if (statusKind === "terminal") { + findings.push(quickVerifyControlFinding( + "quick-verify-final-response-empty-terminal", + "目标轮次已终止但最终回复为空", + "quick verify found a terminal target business turn, but Final Response is empty.", + `target turn terminal status=${stringAtNullable(targetStatus ?? {}, "status") ?? "-"} finalResponseEmpty=true.`, + "Inspect terminal event persistence and final-response projection for the target traceId; do not classify this as no dialog.", + promptIndex, + failure, + turnDiagnostics, + traceDiagnostics, + targetStatus, + display, + )); + } else { + findings.push(quickVerifyControlFinding( + "quick-verify-final-response-empty-status-unknown", + "目标轮次状态未知且最终回复为空", + "quick verify found the target business turn and empty Final Response, but the turn status is not classifiable.", + `target turn status=${stringAtNullable(targetStatus ?? {}, "status") ?? "-"} finalResponseEmpty=true.`, + "Fix the turn-summary status projection so quick verify can classify incomplete versus terminal final-response failures.", + promptIndex, + failure, + turnDiagnostics, + traceDiagnostics, + targetStatus, + display, + )); + } + } + + if (findings.length > 0) return findings; + return [quickVerifyControlFinding( + "quick-verify-diagnostics-inconclusive", + "快速验证诊断信息不足", + "quick verify did not prove a durable completed turn, but structured diagnostics did not match any specific failure code.", + `quick verify has rowCount=${rowCount} scopedRowCount=${scopedRowCount}, traceIdPresent=${traceDiagnostics.traceIdPresent === true}, finalResponseEmpty=${traceDiagnostics.finalResponseEmpty === true}.`, + "Improve turn-summary/trace-frame diagnostics before making a business recovery decision.", promptIndex, - valuesRedacted: true, - }]; + failure, + turnDiagnostics, + traceDiagnostics, + targetStatus, + display, + )]; } function enrichObserveStartFailureFinding(finding: Record, result: Record): Record { @@ -2009,6 +2254,7 @@ function quickVerifyTurnSummaryDiagnostics(promptIndex: number, turnSummary: Rec return { round: numberAtNullable(row, "round"), status: stringAtNullable(row, "status"), + marks: stringAtNullable(row, "marks"), traceIdPresent: stringAtNullable(row, "traceId") !== null, finalResponseEmpty: finalResponse.empty === true, finalResponseBytes: numberAtNullable(finalResponse, "textBytes"), diff --git a/scripts/src/hwlab-node-web-sentinel-p5.ts b/scripts/src/hwlab-node-web-sentinel-p5.ts index f1976833..722b240d 100644 --- a/scripts/src/hwlab-node-web-sentinel-p5.ts +++ b/scripts/src/hwlab-node-web-sentinel-p5.ts @@ -8,6 +8,7 @@ import { repoRoot } from "./config"; import { startJob } from "./jobs"; import type { RenderedCliResult } from "./output"; import { runWebProbeRemoteArtifactJob } from "./web-probe-remote-artifact"; +import { readWebProbeSentinelConfigRefTarget } from "./hwlab-node-web-sentinel-config-ref"; import type { SentinelCicdState, WebProbeSentinelOptions } from "./hwlab-node-web-sentinel-cicd"; import { clipTail, @@ -32,7 +33,7 @@ import { text, withWarnings, } from "./hwlab-node-web-sentinel-cicd"; -import { metricNames, runSentinelQuickVerify, sentinelP5Next, serviceUnavailableBlocker, validationBlocker } from "./hwlab-node-web-sentinel-p5-observe"; +import { metricNames, reclassifyQuickVerifyControlFindings, runSentinelQuickVerify, sentinelP5Next, serviceUnavailableBlocker, validationBlocker } from "./hwlab-node-web-sentinel-p5-observe"; const SENTINEL_REPORT_ARTIFACT_READ_TIMEOUT_SECONDS = 55; @@ -264,6 +265,7 @@ function compactSentinelReportRawPayload( const findings = Array.isArray(body.findings) ? body.findings.map(record) : []; const artifactFindings = Array.isArray(artifact.findings) ? artifact.findings.map(record) : []; const visibleFindings = mergeSentinelReportFindings(findings, artifactFindings); + const offlineReclassify = offlineQuickVerifyReclassify(state, run, visibleFindings); const storedFindingCount = numberAtNullable(run, "finding_count") ?? numberAtNullable(run, "findingCount") ?? findings.length; const artifactFindingCount = numberAtNullable(artifact, "findingCount") ?? artifactFindings.length; const visibleFindingCount = Math.max(storedFindingCount, artifactFindingCount, visibleFindings.length); @@ -306,6 +308,7 @@ function compactSentinelReportRawPayload( }, summary: pickFields(record(body.summary), ["reason", "status", "businessStatus", "failure", "valuesRedacted"]), findings: visibleFindings.slice(0, 12).map(compactSentinelReportFinding), + offlineReclassify, artifactSummary: Object.keys(artifact).length === 0 ? null : { ok: artifact.ok === true, reason: artifact.reason ?? null, @@ -333,6 +336,53 @@ function compactSentinelReportRawPayload( }; } +function offlineQuickVerifyReclassify(state: SentinelCicdState, run: Record, findings: readonly Record[]): Record | null { + const hasLegacyQuickVerifyControl = findings.some((item) => sentinelReportFindingIdentityCandidates(item).includes("quick-verify-no-business-turn")); + if (!hasLegacyQuickVerifyControl) return null; + const catalog = sentinelReportCheckCatalogById(state); + const result = reclassifyQuickVerifyControlFindings(state, { + runId: stringAtNullable(run, "id"), + scenarioId: stringAtNullable(run, "scenario_id") ?? stringAtNullable(run, "scenarioId"), + observerId: stringAtNullable(run, "observer_id") ?? stringAtNullable(run, "observerId"), + failure: stringAtNullable(record(run.summary), "failure"), + timeoutSeconds: SENTINEL_REPORT_ARTIFACT_READ_TIMEOUT_SECONDS, + }); + const resultFindings = Array.isArray(result.findings) ? result.findings.map(record) : []; + return { + ...pickFields(result, ["ok", "reason", "runId", "scenarioId", "observerId", "promptIndex", "findingCount", "turnSummary", "traceFrame", "valuesRedacted"]), + source: "offline-existing-observe-artifact", + note: "This is a local CLI reclassification of existing turn-summary/trace-frame artifacts; it does not mutate the runner index.", + findings: resultFindings.slice(0, 8).map((item) => compactSentinelReportFinding(enrichSentinelReportFindingWithCatalog(item, catalog))), + valuesRedacted: true, + }; +} + +function sentinelReportCheckCatalogById(state: SentinelCicdState): Map> { + try { + const reportViews = record(readWebProbeSentinelConfigRefTarget(state.spec, state.configRefs.reportViews)); + const catalogRef = stringAtNullable(reportViews, "checkCatalogRef"); + if (catalogRef === null) return new Map(); + const catalog = record(readWebProbeSentinelConfigRefTarget(state.spec, catalogRef)); + const items = Array.isArray(catalog.items) ? catalog.items.map(record) : Array.isArray(catalog.checks) ? catalog.checks.map(record) : []; + return new Map(items.map((item) => [stringAtNullable(item, "id") ?? "", item]).filter((item): item is [string, Record] => item[0].length > 0)); + } catch { + return new Map(); + } +} + +function enrichSentinelReportFindingWithCatalog(item: Record, catalog: ReadonlyMap>): Record { + const id = stringAtNullable(item, "id") ?? stringAtNullable(item, "kind") ?? stringAtNullable(item, "code"); + const check = id === null ? null : catalog.get(id) ?? null; + if (check === null) return item; + return { + ...item, + check, + checkCode: stringAtNullable(check, "code"), + checkTitleZh: stringAtNullable(check, "titleZh"), + valuesRedacted: true, + }; +} + function mergeSentinelReportFindings(primary: readonly Record[], artifact: readonly Record[]): Record[] { const merged: Record[] = []; const seen = new Set(); @@ -423,8 +473,12 @@ function compactSentinelReportFinding(value: Record): Record, state: Se const run = record(payload.run); const summary = record(payload.summary); const artifactSummary = record(payload.artifactSummary); + const offlineReclassify = record(payload.offlineReclassify); const findings = Array.isArray(payload.findings) ? payload.findings.map(record) : []; + const offlineFindings = Array.isArray(offlineReclassify.findings) ? offlineReclassify.findings.map(record) : []; const reportSha = stringAtNullable(run, "reportJsonSha256") ?? stringAtNullable(artifactSummary, "reportJsonSha256"); const findingCount = run.findingCount ?? findings.length; const analyzerArtifactCount = numberAtNullable(artifactSummary, "artifactCount") ?? numberAtNullable(record(artifactSummary.counts), "artifacts"); @@ -498,26 +554,34 @@ function renderSentinelReportSummary(payload: Record, state: Se "", "Findings", findingRows, + offlineFindings.length === 0 ? "" : "", + offlineFindings.length === 0 ? "" : "Offline Reclassify", + offlineFindings.length === 0 ? "" : offlineFindings.map(formatSentinelReportFindingLine).join("\n"), ].join("\n"); } function renderSentinelReportFindings(payload: Record): string { const run = record(payload.run); const artifactSummary = record(payload.artifactSummary); + const offlineReclassify = record(payload.offlineReclassify); const findings = Array.isArray(payload.findings) ? payload.findings.map(record) : []; + const offlineFindings = Array.isArray(offlineReclassify.findings) ? offlineReclassify.findings.map(record) : []; const reportSha = stringAtNullable(run, "reportJsonSha256") ?? stringAtNullable(artifactSummary, "reportJsonSha256"); return [ "Web Probe Sentinel Findings", "=======================================================", `run=${run.id ?? "-"} report=${reportSha ?? "-"} findings=${run.findingCount ?? findings.length}`, findings.length === 0 ? "-" : findings.map(formatSentinelReportFindingLine).join("\n"), + offlineFindings.length === 0 ? "" : "", + offlineFindings.length === 0 ? "" : "Offline Reclassify", + offlineFindings.length === 0 ? "" : offlineFindings.map(formatSentinelReportFindingLine).join("\n"), ].join("\n"); } function formatSentinelReportFindingLine(item: Record): string { const check = record(item.check); - const code = stringAtNullable(check, "code") ?? stringAtNullable(item, "id") ?? "-"; - const title = stringAtNullable(check, "titleZh") ?? ""; + const code = stringAtNullable(item, "checkCode") ?? stringAtNullable(check, "code") ?? stringAtNullable(item, "id") ?? "-"; + const title = stringAtNullable(item, "checkTitleZh") ?? stringAtNullable(check, "titleZh") ?? ""; const summary = reportText(item.summary, 180) ?? ""; const rootCause = reportText(item.rootCause, 180); const evidence = reportText(item.evidenceSummary, 180);