From 384cb9dfb84c1957d24bba14c270759c40f0c053 Mon Sep 17 00:00:00 2001 From: Lyon <88232613+pikasTech@users.noreply.github.com> Date: Thu, 18 Jun 2026 11:33:57 +0800 Subject: [PATCH] fix: improve web-probe evidence summaries (#466) Co-authored-by: Codex --- .agents/skills/unidesk-webdev/SKILL.md | 10 +- docs/reference/cli.md | 2 +- scripts/src/hwlab-node.ts | 532 ++++++++++++++++++++++++- 3 files changed, 526 insertions(+), 18 deletions(-) diff --git a/.agents/skills/unidesk-webdev/SKILL.md b/.agents/skills/unidesk-webdev/SKILL.md index 8772a706..34ac8e6a 100644 --- a/.agents/skills/unidesk-webdev/SKILL.md +++ b/.agents/skills/unidesk-webdev/SKILL.md @@ -38,9 +38,14 @@ bun scripts/cli.ts hwlab nodes web-probe run --node D601 --lane v03 ```bash bun scripts/cli.ts hwlab nodes web-probe script --node D601 --lane v03 <<'JS' -export default async ({ page, gotoStable, screenshot, fetchJson, safeEvaluate }) => { +export default async ({ page, gotoStable, screenshot, fetchJson, fetchApiMatrix, recordStep, safeEvaluate }) => { await page.route('**/v1/agent/conversations**', route => setTimeout(() => route.continue(), 1000)); await gotoStable('/workbench'); + const apiMatrix = await fetchApiMatrix([ + '/v1/workbench/workspace?projectId=prj_hwpod_workbench', + '/auth/session' + ]); + recordStep('workbench-ready', { url: page.url(), apiMatrixOk: apiMatrix.ok }); return { url: page.url(), workspace: await fetchJson('/v1/workbench/workspace?projectId=prj_hwpod_workbench'), @@ -57,7 +62,8 @@ JS - 先通过 `gotoStable`、`waitWorkbenchReady` 或等价导航进入目标 origin,再用 `fetchJson` 读同源 API;不要在 `about:blank` 上请求 `/v1/...`。 - Workbench 有 SSE/长轮询时不要用 `networkidle` 判断通过;用明确 DOM/API 条件,如 final URL、route conversationId、active tab、message card、trace row 或 workspace `selectedConversationId`。 - Playwright `page.evaluate` 只能传一个可序列化参数;多个值包成对象,或用 `safeEvaluate(fn, { a, b })`。 -- 失败证据至少保留 `failureKind`、`errorMessage`、`scriptSha256`、`runDir`、`lastUrl`、`lastScreenshot`;默认失败截图是 `failure.png`。 +- 脚本中用 `recordStep(name, data)` 保存关键 DOM/API partial evidence;API 批量探测优先用 `fetchApiMatrix(paths)`,单个 API 失败不应让后续证据丢失。 +- 失败证据至少保留 `failureKind`、`errorMessage`、`scriptSha256`、`runDir`、`lastUrl`、`lastScreenshot`、`probe.summary` 和 `reportPath`/`reportSha256`;默认失败截图是 `failure.png`。 ## Fake-Server Playwright diff --git a/docs/reference/cli.md b/docs/reference/cli.md index efcf6ef6..ecc14012 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -22,7 +22,7 @@ CI/CD、GitOps、rollout、artifact 发布、PR 合并后的 runtime lane 滚动 G14/D601 v03 的 bootstrap admin password 是 HWLAB runtime Secret 生命周期的一部分,必须收敛到 `config/hwlab-node-lanes.yaml` 的 `bootstrapAdmin` 声明与受控 `hwlab nodes secret status|ensure --node --lane v03 --name hwlab-v03-bootstrap-admin` CLI。明文只能存在于 Git 忽略、owner-only 的 `.state/secrets/...` sourceRef 文件;CLI 在本地把明文转换为 HWLAB 兼容 password hash,只向运行面同步 `password-hash`,并在输出中只披露 sourceRef、sourceKey、target Secret/key、presence、byte count、fingerprint、mutation 与后续命令。`secret ensure --force` 只用于明确需要按 YAML sourceRef 重灌 bootstrap admin hash 并重启 Cloud API 的受控恢复场景,默认 ensure 不做强制重灌;不要把人工生成 hash、手工写 k8s Secret 或原生 `kubectl rollout` 沉淀为长期入口。 -`hwlab nodes web-probe run|script --node --lane ` 是 HWLAB Cloud Web 线上 DOM/Playwright 验收的受控入口;CLI 负责从 YAML 解析 workspace、public URL 和 bootstrap admin sourceRef,并只输出 redacted 凭据状态、artifact path/hash、readiness 和失败分类。具体 Web 开发、fake-server Playwright、fixture 脱敏、`web-probe script` helper、截图和 Workbench/Performance 判定口径统一见 `$unidesk-webdev`,本 CLI 参考不再维护第二套操作面。 +`hwlab nodes web-probe run|script --node --lane ` 是 HWLAB Cloud Web 线上 DOM/Playwright 验收的受控入口;CLI 负责从 YAML 解析 workspace、public URL 和 bootstrap admin sourceRef,并只输出 redacted 凭据状态、artifact path/hash、readiness、`probe.summary` 和失败分类。`run --message ...` 未显式设置 trace 参数时会做轻量 trace 采样,`script` helper 可用 `recordStep` / `safeFetchJson` / `fetchApiMatrix` 保留失败前的结构化 partial evidence,完整 redacted 报告通过 `reportPath`/`reportSha256` 展开。具体 Web 开发、fake-server Playwright、fixture 脱敏、`web-probe script` helper、截图和 Workbench/Performance 判定口径统一见 `$unidesk-webdev`,本 CLI 参考不再维护第二套操作面。 `hwlab nodes control-plane infra plan|status|apply --node D601 --lane v03` 是 D601 HWLAB v03 节点本地 CI/CD 与 git-mirror 前置控制面的 YAML 驱动入口,配置真相源是 `config/hwlab-node-control-plane.yaml`。`plan` 只读展示 YAML target 和将渲染的 control-plane 对象;`status` 只读观察 D601 Tekton、CI namespace、git-mirror、Argo、node-local registry 和 tools image readiness;`apply --dry-run` 只输出 manifest 摘要;`apply --confirm` 只收敛 D601 control-plane bootstrap 对象,不触发 HWLAB runtime rollout,不创建 PK01 DB,也不修改 Caddy/FRP。tools image 的 node-local registry 地址只能作为输出 artifact;输入 base image 必须由 YAML 声明为公开 registry 来源,缺少 output image 时应在 `status.next.blockers` 中体现,而不是把现有 node-local image 当成输入基础镜像。 diff --git a/scripts/src/hwlab-node.ts b/scripts/src/hwlab-node.ts index 12e7a6b3..aa0a314f 100644 --- a/scripts/src/hwlab-node.ts +++ b/scripts/src/hwlab-node.ts @@ -285,14 +285,16 @@ export function hwlabNodeWebProbeHelp(): Record { "bun scripts/cli.ts hwlab nodes web-probe run --node D601 --lane v03 --wait-messages-ms 1000", "bun scripts/cli.ts hwlab nodes web-probe run --node D601 --lane v03 --url https://hwlab.pikapython.com --fresh-session --message 'ping'", "bun scripts/cli.ts hwlab nodes web-probe script --node D601 --lane v03 --script-file .state/probes/workbench.mjs", - "bun scripts/cli.ts hwlab nodes web-probe script --node D601 --lane v03 <<'JS'\nexport default async ({ waitWorkbenchReady, fetchJson, collectText, safeEvaluate, screenshot }) => {\n const ready = await waitWorkbenchReady();\n const workspace = await fetchJson('/v1/workbench/workspace?projectId=prj_hwpod_workbench');\n const workspaceText = await collectText('#workspace');\n const evaluated = await safeEvaluate(({ a, b }) => ({ sum: a + b }), { a: 1, b: 2 });\n await screenshot('workbench.png');\n return { finalUrl: ready.finalUrl, workspaceOk: workspace.ok, workspaceText, evaluated };\n};\nJS", + "bun scripts/cli.ts hwlab nodes web-probe script --node D601 --lane v03 <<'JS'\nexport default async ({ waitWorkbenchReady, fetchJson, fetchApiMatrix, recordStep, collectText, safeEvaluate, screenshot }) => {\n const ready = await waitWorkbenchReady();\n const workspace = await fetchJson('/v1/workbench/workspace?projectId=prj_hwpod_workbench');\n const apiMatrix = await fetchApiMatrix(['/v1/workbench/workspace?projectId=prj_hwpod_workbench', '/auth/session']);\n const workspaceText = await collectText('#workspace');\n const evaluated = await safeEvaluate(({ a, b }) => ({ sum: a + b }), { a: 1, b: 2 });\n await screenshot('workbench.png');\n recordStep('workbench-summary', { finalUrl: ready.finalUrl, workspaceOk: workspace.ok, apiMatrixOk: apiMatrix.ok });\n return { finalUrl: ready.finalUrl, workspaceOk: workspace.ok, workspaceText, evaluated };\n};\nJS", ], actions: { run: "Run the repo-owned scripts/web-live-dom-probe.mjs helper.", - script: "Run caller-provided Playwright JS after CLI-managed /auth/login; the script receives authenticated browser/context/page plus fetchJson/collectText/safeEvaluate/waitWorkbenchReady/screenshotOnError/summarizeWorkspace/summarizeConversation helpers and must not handle secrets itself.", + script: "Run caller-provided Playwright JS after CLI-managed /auth/login; the script receives authenticated browser/context/page plus fetchJson/safeFetchJson/fetchApiMatrix/recordStep/collectText/safeEvaluate/waitWorkbenchReady/screenshotOnError/summarizeWorkspace/summarizeConversation helpers and must not handle secrets itself.", }, notes: [ "Prefer --script-file for reusable probes; stdin heredocs remain supported for one-off probes.", + "Issue-ready summary is available under probe.summary; full script report is persisted under probe.reportPath with a SHA-256 fingerprint.", + "Use recordStep(name, data) or fetchApiMatrix(paths) to keep structured partial evidence when a later step fails.", "Playwright page.evaluate accepts one serializable argument; use page.evaluate(({ a, b }) => ..., { a, b }) or safeEvaluate(fn, { a, b }).", "Failures include failureKind, errorMessage, scriptSha256, runDir, lastUrl, and lastScreenshot when a screenshot can be captured.", ], @@ -4957,8 +4959,12 @@ function parseNodeWebProbeOptions(args: string[]): NodeWebProbeOptions { const waitAfterSubmitMs = positiveIntegerOption(args, "--wait-after-submit-ms", 1500, 60000); const waitMessagesMs = positiveIntegerOption(args, "--wait-messages-ms", 2500, 60000); const waitAgentTerminalMs = positiveIntegerOption(args, "--wait-agent-terminal-ms", 0, 600000); - const traceSampleCount = positiveIntegerOption(args, "--trace-sample-count", 0, 200); - const traceSampleIntervalMs = positiveIntegerOption(args, "--trace-sample-interval-ms", 0, 60000); + const message = optionValue(args, "--message") ?? null; + const hasMessage = message !== null; + const traceSampleCountDefault = hasMessage ? 2 : 0; + const traceSampleIntervalDefault = hasMessage ? 1500 : 0; + const traceSampleCount = positiveIntegerOption(args, "--trace-sample-count", traceSampleCountDefault, 200); + const traceSampleIntervalMs = positiveIntegerOption(args, "--trace-sample-interval-ms", traceSampleIntervalDefault, 60000); const commandTimeoutAutoSeconds = nodeWebProbeAutoCommandTimeoutSeconds({ timeoutMs, waitAfterSubmitMs, @@ -4967,7 +4973,7 @@ function parseNodeWebProbeOptions(args: string[]): NodeWebProbeOptions { traceSampleCount, traceSampleIntervalMs, freshSession: args.includes("--fresh-session"), - hasMessage: optionValue(args, "--message") !== undefined, + hasMessage, }); const commandTimeoutRaw = optionValue(args, "--command-timeout-seconds"); const commandTimeoutUserProvided = commandTimeoutRaw !== undefined; @@ -4985,7 +4991,7 @@ function parseNodeWebProbeOptions(args: string[]): NodeWebProbeOptions { waitAgentTerminalMs, traceSampleCount, traceSampleIntervalMs, - message: optionValue(args, "--message") ?? null, + message, conversationId: optionValue(args, "--conversation-id") ?? null, freshSession: args.includes("--fresh-session"), cancelRunning: !args.includes("--no-cancel-running"), @@ -5060,6 +5066,7 @@ function runNodeWebProbe(options: NodeWebProbeOptions): Record const result = runTransWorkspaceStdinScript(options.node, spec.workspace, script, options.commandTimeoutSeconds); const probe = compactWebProbeResult(parseJsonObject(result.stdout)); const passed = result.exitCode === 0 && probe?.status === "pass"; + const summary = nullableRecord(probe?.summary); const degradedReason = result.timedOut ? "web-probe-command-timeout" : typeof probe?.degradedReason === "string" @@ -5081,6 +5088,8 @@ function runNodeWebProbe(options: NodeWebProbeOptions): Record timedOut: result.timedOut, }, degradedReason, + failureKind: typeof summary?.failureKind === "string" ? summary.failureKind : null, + summary, probe, result: compactCommandResult(result), valuesRedacted: true, @@ -5143,6 +5152,7 @@ function runNodeWebProbeAsync( const report = record(record(poll.status).report); const probe = compactWebProbeResult(Object.keys(report).length > 0 ? report : null); const passed = probe?.status === "pass"; + const summary = nullableRecord(probe?.summary); const degradedReason = poll.timedOut ? "web-probe-command-timeout" : typeof probe?.degradedReason === "string" @@ -5162,6 +5172,8 @@ function runNodeWebProbeAsync( mode: "async", commandTimeout: webProbeCommandTimeoutSummary(options, poll.timedOut), degradedReason, + failureKind: typeof summary?.failureKind === "string" ? summary.failureKind : null, + summary, job: { jobId, startedAt: start.startedAt ?? null, @@ -5244,6 +5256,14 @@ function runNodeWebProbeScript( const result = runTransWorkspaceStdinScript(options.node, spec.workspace, script, options.commandTimeoutSeconds); const report = compactWebProbeScriptResult(parseJsonObject(result.stdout)); const passed = result.exitCode === 0 && report?.ok === true; + const summary = nullableRecord(report?.summary); + const degradedReason = typeof summary?.degradedReason === "string" + ? summary.degradedReason + : typeof report?.failureKind === "string" + ? report.failureKind + : result.timedOut + ? "web-probe-command-timeout" + : null; return { ok: passed, status: passed ? "pass" : "blocked", @@ -5254,6 +5274,9 @@ function runNodeWebProbeScript( url: options.url, credential, scriptSource: options.scriptSource, + degradedReason, + failureKind: typeof summary?.failureKind === "string" ? summary.failureKind : typeof report?.failureKind === "string" ? report.failureKind : null, + summary, probe: report, result: compactCommandResultRedacted(result, [material.password ?? ""]), valuesRedacted: true, @@ -5293,7 +5316,8 @@ import { mkdir, readFile, stat, writeFile } from "node:fs/promises"; import path from "node:path"; import { pathToFileURL } from "node:url"; -const startedAt = new Date().toISOString(); +const startedAtMs = Date.now(); +const startedAt = new Date(startedAtMs).toISOString(); const baseUrl = normalizeBaseUrl(process.env.HWLAB_WEB_BASE_URL); const username = process.env.HWLAB_WEB_USER || "admin"; const password = process.env.HWLAB_WEB_PASS || ""; @@ -5303,6 +5327,7 @@ const timeoutMs = positiveInteger(process.env.UNIDESK_WEB_PROBE_TIMEOUT_MS, 3000 const viewport = parseViewport(process.env.UNIDESK_WEB_PROBE_VIEWPORT || "1440x900"); const artifactRecords = []; const readinessRecords = []; +const stepRecords = []; let browser; let context; let page; @@ -5343,7 +5368,8 @@ try { scriptSha256: userScriptSha256, runDir, auth: publicAuth(auth), - script: { ok: scriptOk, result: safeResult }, + script: { ok: scriptOk, result: safeResult, steps: publicSteps() }, + steps: publicSteps(), failureKind: failure ? failure.failureKind : null, error: failure ? failure.code : null, errorMessage: failure ? sanitize(failure.message) : null, @@ -5370,6 +5396,8 @@ try { scriptSha256: userScriptSha256, runDir, auth: auth === null ? null : publicAuth(auth), + script: { ok: false, steps: publicSteps() }, + steps: publicSteps(), failureKind: failure.failureKind, error: failure.code, errorMessage: sanitize(failure.message), @@ -5608,6 +5636,9 @@ function scriptHelpers() { waitWorkbenchReady, waitForReady, fetchJson, + safeFetchJson, + fetchApiMatrix, + recordStep, collectText, safeEvaluate, summarizeWorkspace, @@ -5622,6 +5653,99 @@ function scriptHelpers() { return helpers; } +function recordStep(name, data = {}, options = {}) { + const normalizedName = String(name || "step").replace(/[^A-Za-z0-9_.:-]/gu, "-").slice(0, 80) || "step"; + const item = { + index: stepRecords.length, + name: normalizedName, + ok: data && typeof data === "object" && typeof data.ok === "boolean" ? data.ok : null, + atMs: Date.now() - startedAtMs, + data: sanitize(data), + }; + stepRecords.push(item); + while (stepRecords.length > boundedInteger(options.maxSteps, 80, 1, 200)) stepRecords.shift(); + return deepClonePlain(item); +} + +function publicSteps() { + return stepRecords.slice(-50).map((item) => deepClonePlain(item)); +} + +async function safeFetchJson(target, options = {}) { + try { + return await fetchJson(target, { ...normalizeHelperOptions(options), throwOnError: false }); + } catch (error) { + const failure = classifiedProbeError(error); + return { + ok: false, + path: typeof target === "string" ? urlPath(target) : String(target ?? ""), + status: 0, + statusText: "helper-error", + failureKind: failure.failureKind, + error: sanitize(failure.message), + guidance: failure.guidance, + }; + } +} + +async function fetchApiMatrix(paths, options = {}) { + options = normalizeHelperOptions(options); + const items = normalizeApiMatrixItems(paths); + const rows = []; + for (const item of items) { + const response = await safeFetchJson(item.path, { + headers: item.headers ?? options.headers, + method: item.method ?? options.method, + body: item.body ?? undefined, + }); + rows.push(compactApiMatrixResponse(item, response)); + } + const failed = rows.filter((row) => row.ok !== true); + const matrix = { + ok: failed.length === 0, + count: rows.length, + okCount: rows.length - failed.length, + failedCount: failed.length, + items: rows, + }; + if (options.record !== false) recordStep(typeof options.name === "string" ? options.name : "api-matrix", matrix); + return matrix; +} + +function normalizeApiMatrixItems(value) { + const list = Array.isArray(value) ? value : [value]; + return list.slice(0, 20).map((item, index) => { + if (typeof item === "string") return { name: item, path: item, method: "GET" }; + if (item && typeof item === "object") { + const path = typeof item.path === "string" ? item.path : typeof item.url === "string" ? item.url : "/"; + return { + name: typeof item.name === "string" ? item.name : path, + path, + method: typeof item.method === "string" ? item.method : "GET", + headers: item.headers, + body: item.body, + }; + } + return { name: "item-" + index, path: "/", method: "GET" }; + }); +} + +function compactApiMatrixResponse(item, response) { + const body = response && typeof response.body === "object" && response.body !== null ? response.body : null; + return { + name: item.name, + path: response?.path ?? item.path, + method: item.method ?? "GET", + ok: response?.ok === true, + status: response?.status ?? null, + statusText: response?.statusText ?? null, + error: response?.error ?? response?.parseError ?? null, + failureKind: response?.failureKind ?? null, + bodyKeys: body && !Array.isArray(body) ? Object.keys(body).slice(0, 20) : [], + bodyPreview: body ? normalizeTextPreview(JSON.stringify(sanitize(body)), 240) : response?.textPreview ?? null, + }; +} + async function safeEvaluate(fn, arg = undefined, options = {}) { options = normalizeHelperOptions(options); if (typeof fn !== "function") { @@ -6651,23 +6775,125 @@ function redactString(text) { } async function emit(payload) { - process.stdout.write(JSON.stringify(sanitize(payload), null, 2) + "\n"); + const enriched = { + ...payload, + steps: publicSteps(), + artifacts: { runDir, items: artifactRecords }, + }; + enriched.summary = scriptIssueSummary(enriched); + const reportFile = artifactPath("web-probe-script-report.json"); + let reportArtifact = null; + try { + await writeFile(reportFile, JSON.stringify(sanitize(enriched), null, 2) + "\n", "utf8"); + const fileStat = await stat(reportFile); + reportArtifact = { + kind: "json", + path: reportFile, + byteCount: fileStat.size, + sha256: await sha256File(reportFile), + }; + artifactRecords.push({ ...reportArtifact }); + } catch (error) { + reportArtifact = { + kind: "json", + path: reportFile, + error: error instanceof Error ? error.message : String(error), + }; + } + const finalPayload = { + ...enriched, + reportPath: reportArtifact && typeof reportArtifact.path === "string" ? reportArtifact.path : null, + reportSha256: reportArtifact && typeof reportArtifact.sha256 === "string" ? reportArtifact.sha256 : null, + artifacts: { runDir, items: artifactRecords }, + }; + finalPayload.summary = scriptIssueSummary(finalPayload); + process.stdout.write(JSON.stringify(sanitize(finalPayload), null, 2) + "\n"); +} + +function scriptIssueSummary(payload) { + const artifacts = Array.isArray(payload?.artifacts?.items) ? payload.artifacts.items : artifactRecords; + const screenshots = artifacts + .filter((item) => item && typeof item === "object" && item.kind === "screenshot") + .slice(-5) + .map((item) => ({ + path: item.path ?? null, + sha256: item.sha256 ?? null, + byteCount: item.byteCount ?? null, + })); + const apiMatrix = latestApiMatrixFromSteps(publicSteps()); + const ok = payload?.ok === true; + const degradedReason = ok ? null : payload?.error ?? payload?.failureKind ?? "web-probe-script-failed"; + const failureKind = ok ? null : classifyIssueFailureKind(payload?.failureKind ?? payload?.error ?? degradedReason, payload?.errorMessage); + return { + ok, + status: payload?.status ?? null, + degradedReason, + failureKind, + failedCondition: ok ? null : payload?.errorMessage ?? payload?.error ?? payload?.failureKind ?? "script did not pass", + nextAction: ok ? null : issueNextAction(failureKind, payload), + baseUrl: payload?.baseUrl ?? null, + finalUrl: payload?.finalUrl ?? payload?.lastUrl ?? null, + scriptSha256: payload?.scriptSha256 ?? null, + runDir, + reportPath: payload?.reportPath ?? null, + reportSha256: payload?.reportSha256 ?? null, + lastScreenshot: payload?.lastScreenshot ?? null, + screenshots, + apiMatrix, + stepCount: stepRecords.length, + lastStep: stepRecords.length > 0 ? deepClonePlain(stepRecords[stepRecords.length - 1]) : null, + valuesRedacted: true, + }; +} + +function latestApiMatrixFromSteps(steps) { + for (let index = steps.length - 1; index >= 0; index -= 1) { + const step = steps[index]; + const data = step && typeof step === "object" ? step.data : null; + if (data && typeof data === "object" && Array.isArray(data.items) && typeof data.failedCount === "number") return data; + } + return null; +} + +function classifyIssueFailureKind(kind, message = "") { + const text = String(kind ?? "") + " " + String(message ?? ""); + if (/api|fetch|network|Failed to fetch|HTTP|status/iu.test(text)) return "network-or-api-fetch-bug"; + if (/script-api-misuse|page\.evaluate|safeEvaluate|Too many arguments/iu.test(text)) return "script-bug"; + if (/auth|login|credential/iu.test(text)) return "target-auth-bug"; + if (/browser|chromium|playwright|executable/iu.test(text)) return "browser-environment-bug"; + if (/assert|expect|ok:false|validation|final-response|message/iu.test(text)) return "unmet-expectation"; + if (/script|ReferenceError|TypeError|SyntaxError/iu.test(text)) return "script-bug"; + return "user-facing-web-bug"; +} + +function issueNextAction(failureKind, payload) { + if (failureKind === "network-or-api-fetch-bug") return "Inspect summary.apiMatrix failed rows and retry the same node/lane after checking API availability."; + if (failureKind === "script-bug") return "Fix the probe script or use safeEvaluate/safeFetchJson/fetchApiMatrix helpers, then rerun the same command."; + if (failureKind === "target-auth-bug") return "Inspect credential sourceRef/fingerprint and target /auth/login state; do not print secrets."; + if (failureKind === "browser-environment-bug") return "Inspect Playwright/browser-launcher availability in the target workspace."; + if (failureKind === "unmet-expectation") return "Inspect summary.failedCondition, DOM/API steps, and screenshots to decide whether the Web behavior or the assertion is stale."; + return payload?.guidance ?? "Inspect reportPath for full redacted details and rerun the same node/lane entry after the root cause is fixed."; } `; } function compactWebProbeScriptResult(report: Record | null): Record | null { if (report === null) return null; + const summary = compactIssueSummary(record(report.summary)); return { ok: report.ok === true, status: typeof report.status === "string" ? report.status : null, + summary, baseUrl: typeof report.baseUrl === "string" ? report.baseUrl : null, finalUrl: typeof report.finalUrl === "string" ? report.finalUrl : null, lastUrl: typeof report.lastUrl === "string" ? report.lastUrl : null, scriptSha256: typeof report.scriptSha256 === "string" ? report.scriptSha256 : null, runDir: typeof report.runDir === "string" ? report.runDir : null, + reportPath: typeof report.reportPath === "string" ? report.reportPath : null, + reportSha256: typeof report.reportSha256 === "string" ? report.reportSha256 : null, auth: record(report.auth), - script: record(report.script), + script: compactWebProbeScriptBlock(report.script), + steps: compactWebProbeSteps(report.steps), failureKind: typeof report.failureKind === "string" ? report.failureKind : null, guidance: typeof report.guidance === "string" ? report.guidance : null, lastScreenshot: nullableRecord(report.lastScreenshot), @@ -6679,6 +6905,113 @@ function compactWebProbeScriptResult(report: Record | null): Re }; } +function compactWebProbeScriptBlock(value: unknown): Record { + const script = record(value); + return { + ok: script.ok === true, + result: compactJsonForIssue(script.result), + stepCount: Array.isArray(script.steps) ? script.steps.length : null, + }; +} + +function compactWebProbeSteps(value: unknown): Record[] { + if (!Array.isArray(value)) return []; + return value.slice(-30).map((item) => { + const step = record(item); + return { + index: typeof step.index === "number" ? step.index : null, + name: typeof step.name === "string" ? step.name : null, + ok: typeof step.ok === "boolean" ? step.ok : null, + atMs: typeof step.atMs === "number" ? step.atMs : null, + data: compactStepDataForIssue(step.data), + }; + }); +} + +function compactIssueSummary(value: Record): Record { + if (Object.keys(value).length === 0) return {}; + return { + ok: value.ok === true, + status: typeof value.status === "string" ? value.status : null, + degradedReason: typeof value.degradedReason === "string" ? value.degradedReason : null, + failureKind: typeof value.failureKind === "string" ? value.failureKind : null, + failedCondition: typeof value.failedCondition === "string" ? value.failedCondition : null, + nextAction: typeof value.nextAction === "string" ? value.nextAction : null, + finalUrl: typeof value.finalUrl === "string" ? value.finalUrl : null, + scriptSha256: typeof value.scriptSha256 === "string" ? value.scriptSha256 : null, + runDir: typeof value.runDir === "string" ? value.runDir : null, + reportPath: typeof value.reportPath === "string" ? value.reportPath : null, + reportSha256: typeof value.reportSha256 === "string" ? value.reportSha256 : null, + lastScreenshot: nullableRecord(value.lastScreenshot), + screenshots: Array.isArray(value.screenshots) ? value.screenshots.slice(-5).map(nullableRecord).filter((item): item is Record => item !== null) : [], + apiMatrix: compactApiMatrixSummary(value.apiMatrix), + stepCount: typeof value.stepCount === "number" ? value.stepCount : null, + lastStep: compactStepForIssue(value.lastStep), + valuesRedacted: value.valuesRedacted === true, + }; +} + +function compactJsonForIssue(value: unknown, depth = 0): unknown { + if (value === null || value === undefined) return value ?? null; + if (typeof value === "string") return value.replace(/\s+/gu, " ").trim().slice(0, 600); + if (typeof value === "number" || typeof value === "boolean") return value; + if (depth >= 5) return "[max-depth]"; + if (Array.isArray(value)) return value.slice(0, 30).map((item) => compactJsonForIssue(item, depth + 1)); + if (typeof value === "object") { + const out: Record = {}; + for (const [key, nested] of Object.entries(value as Record).slice(0, 40)) { + out[key] = compactJsonForIssue(nested, depth + 1); + } + return out; + } + return String(value).slice(0, 600); +} + +function compactStepForIssue(value: unknown): Record | null { + const step = nullableRecord(value); + if (step === null) return null; + return { + index: typeof step.index === "number" ? step.index : null, + name: typeof step.name === "string" ? step.name : null, + ok: typeof step.ok === "boolean" ? step.ok : null, + atMs: typeof step.atMs === "number" ? step.atMs : null, + data: compactStepDataForIssue(step.data), + }; +} + +function compactStepDataForIssue(value: unknown): unknown { + const data = nullableRecord(value); + if (data === null) return compactJsonForIssue(value); + if (Array.isArray(data.items) && typeof data.failedCount === "number") return compactApiMatrixSummary(data); + return compactJsonForIssue(data); +} + +function compactApiMatrixSummary(value: unknown): Record | null { + const matrix = nullableRecord(value); + if (matrix === null) return null; + return { + ok: matrix.ok === true, + count: typeof matrix.count === "number" ? matrix.count : null, + okCount: typeof matrix.okCount === "number" ? matrix.okCount : null, + failedCount: typeof matrix.failedCount === "number" ? matrix.failedCount : null, + items: Array.isArray(matrix.items) + ? matrix.items.slice(0, 12).map((item) => { + const row = record(item); + return { + name: typeof row.name === "string" ? row.name : null, + path: typeof row.path === "string" ? row.path : null, + method: typeof row.method === "string" ? row.method : null, + ok: row.ok === true, + status: typeof row.status === "number" ? row.status : null, + error: typeof row.error === "string" ? row.error : null, + failureKind: typeof row.failureKind === "string" ? row.failureKind : null, + bodyKeys: Array.isArray(row.bodyKeys) ? row.bodyKeys.filter((key) => typeof key === "string").slice(0, 12) : [], + }; + }) + : [], + }; +} + function parseSecretOptions(args: string[]): NodeSecretOptions { const [actionRaw] = args; if (actionRaw !== "status" && actionRaw !== "ensure" && actionRaw !== "cleanup-owned-postgres" && actionRaw !== "cleanup-obsolete") { @@ -9522,18 +9855,26 @@ function parseJsonObject(text: string): Record | null { function compactWebProbeResult(report: Record | null): Record | null { if (report === null) return null; const dom = record(report.dom); - const performance = record(report.performance); - const trace = record(report.trace); - const session = record(report.session); + const performance = compactWebProbePerformance(report.performance); + const trace = compactWebProbeTrace(report.trace); + const session = compactWebProbeSession(report.session); + const artifacts = record(report.artifacts); + const promptValidation = compactPromptValidation(report.promptValidation); + const degradedReason = typeof report.degradedReason === "string" ? report.degradedReason : null; + const summary = webProbeRunIssueSummary({ report, session, trace, promptValidation, artifacts, degradedReason }); return { ok: report.ok === true, status: typeof report.status === "string" ? report.status : null, + summary, finalUrl: typeof report.finalUrl === "string" ? report.finalUrl : null, error: typeof report.error === "string" ? report.error : null, - degradedReason: typeof report.degradedReason === "string" ? report.degradedReason : null, + degradedReason, + failureKind: typeof summary.failureKind === "string" ? summary.failureKind : null, + nextAction: typeof summary.nextAction === "string" ? summary.nextAction : null, actions: compactWebProbeActions(report.actions), session, trace, + promptValidation, performance, traceSamples: compactWebProbeTraceSamples(report.traceSamples), dom: { @@ -9543,11 +9884,172 @@ function compactWebProbeResult(report: Record | null): Record { + const session = record(value); + const freshSession = record(session.freshSession); + return { + freshSessionRequested: session.freshSessionRequested === true, + freshSessionAligned: typeof session.freshSessionAligned === "boolean" ? session.freshSessionAligned : null, + conversationId: session.conversationId ?? null, + routeConversationId: session.routeConversationId ?? null, + selectedConversationId: session.selectedConversationId ?? null, + sessionId: session.sessionId ?? null, + revision: session.revision ?? null, + messageCount: session.messageCount ?? null, + degradedReason: session.degradedReason ?? null, + freshSession: Object.keys(freshSession).length === 0 ? null : { + settled: freshSession.settled === true, + aligned: freshSession.aligned === true, + reason: freshSession.reason ?? null, + }, + }; +} + +function compactWebProbeTrace(value: unknown): Record { + const trace = record(value); + const latestFetch = record(trace.latestFetch); + return { + requested: trace.requested === true, + sampleCount: trace.sampleCount ?? null, + conversationId: trace.conversationId ?? null, + sessionId: trace.sessionId ?? null, + traceId: trace.traceId ?? null, + finalAgentStatus: trace.finalAgentStatus ?? null, + finalTraceStatus: trace.finalTraceStatus ?? null, + finalDomRowCount: trace.finalDomRowCount ?? null, + firstDomRowVisibleAt: trace.firstDomRowVisibleAt ?? null, + firstDomRowPreview: typeof trace.firstDomRowPreview === "string" ? trace.firstDomRowPreview.slice(0, 240) : null, + firstRestEventVisibleAt: trace.firstRestEventVisibleAt ?? null, + firstRestEventCount: trace.firstRestEventCount ?? null, + restTraceOk: trace.restTraceOk === true, + failedFetchCount: trace.failedFetchCount ?? null, + latestFetch: Object.keys(latestFetch).length === 0 ? null : { + ok: latestFetch.ok === true, + httpStatus: latestFetch.httpStatus ?? null, + eventCount: latestFetch.eventCount ?? null, + traceStatus: latestFetch.traceStatus ?? null, + degradedReason: latestFetch.degradedReason ?? null, + attempts: Array.isArray(latestFetch.attempts) ? latestFetch.attempts.length : null, + latestEventPreview: typeof latestFetch.latestEventPreview === "string" ? latestFetch.latestEventPreview.slice(0, 240) : null, + }, + degradedReason: trace.degradedReason ?? null, + }; +} + +function compactWebProbePerformance(value: unknown): Record { + const performance = record(value); + return { + startedAt: performance.startedAt ?? null, + submittedAt: performance.submittedAt ?? null, + firstDomTraceRowAt: performance.firstDomTraceRowAt ?? null, + firstRestTraceEventAt: performance.firstRestTraceEventAt ?? null, + submitToFirstDomTraceRowMs: performance.submitToFirstDomTraceRowMs ?? null, + submitToFirstRestTraceEventMs: performance.submitToFirstRestTraceEventMs ?? null, + totalSampleWindowMs: performance.totalSampleWindowMs ?? null, + }; +} + +function compactPromptValidation(value: unknown): Record { + const prompt = record(value); + const finalResponse = record(prompt.finalResponse); + return { + ok: prompt.ok === true, + applicable: prompt.applicable === true, + reason: typeof prompt.reason === "string" ? prompt.reason : null, + failures: Array.isArray(prompt.failures) ? prompt.failures.filter((item) => typeof item === "string").slice(0, 20) : [], + finalResponse: Object.keys(finalResponse).length === 0 ? null : { + textChars: typeof finalResponse.textChars === "number" ? finalResponse.textChars : null, + textPreview: typeof finalResponse.textPreview === "string" ? finalResponse.textPreview.slice(0, 500) : null, + markdown: nullableRecord(finalResponse.markdown), + }, + }; +} + +function webProbeRunIssueSummary(input: { + report: Record; + session: Record; + trace: Record; + promptValidation: Record; + artifacts: Record; + degradedReason: string | null; +}): Record { + const failureKind = webProbeRunFailureKind(input.degradedReason, input.promptValidation); + const failedCondition = webProbeRunFailedCondition(input.degradedReason, input.promptValidation); + const promptSubmitted = record(input.report.safety).promptSubmitted === true; + const traceRequested = input.trace.requested === true || record(input.report.safety).traceSamplingRequested === true; + return { + ok: input.report.status === "pass" && !input.degradedReason, + status: typeof input.report.status === "string" ? input.report.status : null, + degradedReason: input.degradedReason, + failureKind, + failedCondition, + nextAction: webProbeRunNextAction(failureKind, input.degradedReason, input.promptValidation, promptSubmitted, traceRequested), + finalUrl: typeof input.report.finalUrl === "string" ? input.report.finalUrl : null, + conversationId: input.session.conversationId ?? input.trace.conversationId ?? null, + sessionId: input.session.sessionId ?? input.trace.sessionId ?? null, + traceId: input.trace.traceId ?? null, + traceRequested, + traceStatus: input.trace.finalTraceStatus ?? null, + agentStatus: input.trace.finalAgentStatus ?? null, + messageCount: input.session.messageCount ?? record(input.report.dom).messageCount ?? null, + promptValidation: input.promptValidation, + reportPath: typeof input.artifacts.reportPath === "string" ? input.artifacts.reportPath : null, + reportSha256: typeof input.artifacts.reportSha256 === "string" ? input.artifacts.reportSha256 : null, + screenshotPath: typeof input.artifacts.screenshotPath === "string" ? input.artifacts.screenshotPath : null, + screenshotSha256: typeof input.artifacts.screenshotSha256 === "string" ? input.artifacts.screenshotSha256 : null, + traceRecommendation: promptSubmitted && !traceRequested + ? "rerun with --trace-sample-count 2 --trace-sample-interval-ms 1500, or rely on the new default for prompt probes" + : null, + valuesRedacted: true, + }; +} + +function webProbeRunFailureKind(degradedReason: string | null, promptValidation: Record): string | null { + if (degradedReason === null) return null; + if (degradedReason === "prompt-validation-failed") return "unmet-expectation"; + if (/trace-fetch|api|fetch|http|network/iu.test(degradedReason)) return "network-or-api-fetch-bug"; + if (/auth|login|credential/iu.test(degradedReason)) return "target-auth-bug"; + if (/browser|timeout|playwright|chromium/iu.test(degradedReason)) return "browser-environment-bug"; + const failures = Array.isArray(promptValidation.failures) ? promptValidation.failures.join(" ") : ""; + if (/final-response|agent-message|markdown|completed/iu.test(failures)) return "unmet-expectation"; + return "user-facing-web-bug"; +} + +function webProbeRunFailedCondition(degradedReason: string | null, promptValidation: Record): string | null { + if (degradedReason === "prompt-validation-failed") { + const failures = Array.isArray(promptValidation.failures) ? promptValidation.failures.filter((item) => typeof item === "string") : []; + return failures.length > 0 ? failures.join(",") : "prompt final response validation failed"; + } + return degradedReason; +} + +function webProbeRunNextAction( + failureKind: string | null, + degradedReason: string | null, + promptValidation: Record, + promptSubmitted: boolean, + traceRequested: boolean, +): string | null { + if (failureKind === null) return null; + if (failureKind === "unmet-expectation") { + const failures = Array.isArray(promptValidation.failures) ? promptValidation.failures.join(",") : ""; + if (/agent-not-completed/iu.test(failures)) return "Increase --wait-agent-terminal-ms or inspect the Code Agent terminal state for the conversation/trace ids in summary."; + if (/final-response/iu.test(failures)) return "Inspect Workbench message projection and conversation detail; terminal messages must expose a final response."; + if (/markdown/iu.test(failures)) return "Check whether the prompt expectation still requires markdown structure; stale assertions should be removed instead of preserved."; + return "Inspect promptValidation.failures, screenshots, and reportPath to decide whether the Web behavior or the assertion is stale."; + } + if (failureKind === "network-or-api-fetch-bug") return "Inspect trace/session API fetch fields in reportPath and retry after checking target API availability."; + if (failureKind === "target-auth-bug") return "Inspect credential sourceRef/fingerprint and /auth/login status; do not print secrets."; + if (failureKind === "browser-environment-bug") return "Inspect browser launcher and Playwright availability on the target workspace."; + if (promptSubmitted && !traceRequested) return "Rerun with trace sampling if trace evidence is required."; + return degradedReason === null ? null : "Inspect reportPath for full redacted details, then rerun the same node/lane entry."; +} + function compactWebProbeTraceSamples(value: unknown): Record[] { if (!Array.isArray(value)) return []; return value.map(record).map((sample) => {