diff --git a/.agents/skills/unidesk-webdev/SKILL.md b/.agents/skills/unidesk-webdev/SKILL.md index b126f981..7d0374d4 100644 --- a/.agents/skills/unidesk-webdev/SKILL.md +++ b/.agents/skills/unidesk-webdev/SKILL.md @@ -97,7 +97,7 @@ bun scripts/cli.ts hwlab nodes web-probe observe analyze webobs-xxxx - `web-probe observe start` 默认是被动观测:记录 DOM 摘要、自然页面 request/response/requestfailed、截图和 performance 样本,不主动 fetch Workbench API、不 reload、不切换 session、不拦截路由、不调用 repair helper。任何 `newSession`、`selectProvider`、`sendPrompt`、`goto`、`screenshot`、`mark`、`stop` 都必须通过 `observe command` 显式下发,并进入 `control.jsonl`。 - `web-probe observe` 的 issue evidence 优先记录 observer id、stateDir、report JSON/Markdown SHA、samples/control/network/artifact 计数、routeSessionId、activeSessionId、prompt hash/textBytes、traceId、AgentRun runId/commandId、最终 status 和必要摘要;不要把 prompt 原文、assistant 大段正文、完整 stdout/stderr 或 provider payload 粘贴到 issue。 - 多轮 Workbench 采样必须证明同一个 `sessionId` 连续承载所有轮次;每轮至少记录 prompt hash、traceId、终态、最终回答摘要和性能/产物表。若 Web UI 投影卡住但 Code Agent/AgentRun result 已 terminal,应同时登记“执行终态”和“Workbench 投影未收敛”,不得用 `goto`、reload、切 session 或 result polling 把 UI 失败伪装成通过。 -- `observe analyze` 是离线分析,只读取 artifact JSONL 并写 `analysis/report.md` 与 `analysis/report.json`,不访问 Workbench API、不驱动浏览器。报告里的 `final-response-flicker`、`uncommanded-visible-state-change`、session changed、network 503 等 finding 是排障线索;用于 closeout 时必须结合原始 session/trace/DOM 证据解释,避免把采样噪声直接当作业务结论。 +- `observe analyze` 是离线分析,只读取 artifact JSONL 并写 `analysis/report.md` 与 `analysis/report.json`,不访问 Workbench API、不驱动浏览器。报告必须输出采样点 vs 每个 turn 的总耗时/最近更新时间表、DOM diagnostic/HTTP/console/requestfailed/runtime execution error 分组、page asset provenance segment、同源 API Resource Timing 分位表和超过 5s 的慢路径 finding;页面/API 加载超过 5s 视为不可用级性能红线,必须登记到上游问题,不能靠下游 retry/reload/fallback 掩盖。报告里的 `final-response-flicker`、`uncommanded-visible-state-change`、session changed、network 503 等 finding 是排障线索;用于 closeout 时必须结合原始 session/trace/DOM 证据解释,避免把采样噪声直接当作业务结论。 - 自定义 `web-probe script` 仍运行在 UniDesk `trans` 60s 最外层短连接约束内;能在一轮内完成的 P4 验收优先把 `--command-timeout-seconds` 控制在 55 秒以内,并减少无界 selector/network 等待。确需等待更久时,改用 `web-probe run` 的异步 job/status 语义,或把动作拆成“提交/采样/截图/状态读取”多次短 probe。若输出出现 `UNIDESK_SSH_RUNTIME_TIMEOUT` 但同时恢复了 `reportPath`、`reportSha256`、screenshots 或 DOM steps,先按远端报告判断脚本/页面实际状态;最终关闭证据仍优先用一次未触发短连接超时的 bounded rerun。 - issue closeout 优先引用 `web-probe script` 输出的顶层 `issueEvidence` 或 `summary.issueEvidence`;只有需要展开调查时才粘贴 `probe.script.result`、`probe.steps` 或完整 `reportPath`,避免 stdout、summary 和 report 多层重复同一证据。 - stdin heredoc 与 `--script-file` 都按 ES module 加载,脚本必须导出 `export default async ({ page, gotoStable, recordStep, ... }) => { ... }`;不要在模块顶层直接写 `return`。失败为 `Illegal return statement`、`does not provide an export named default` 或 finalUrl 仍是 `about:blank` 且 stepCount=0 时,先按 probe 脚本入口误用处理,不要归因成 Cloud Web 行为失败。 diff --git a/scripts/src/hwlab-node-impl.ts b/scripts/src/hwlab-node-impl.ts index 0f0bf2f3..12a56fb6 100644 --- a/scripts/src/hwlab-node-impl.ts +++ b/scripts/src/hwlab-node-impl.ts @@ -2852,43 +2852,92 @@ function nodeRuntimeGitMirrorRun(scoped: ReturnType \"$manifest_path\"", - scoped.dryRun - ? "kubectl create --dry-run=server -f \"$manifest_path\" -o name" - : [ - "kubectl delete job -n \"$namespace\" \"$job\" --ignore-not-found=true >/dev/null", - "kubectl create -f \"$manifest_path\"", - `deadline=$(( $(date +%s) + ${scoped.timeoutSeconds} ))`, - "while :; do", - " status=$(kubectl get job -n \"$namespace\" \"$job\" -o jsonpath='succeeded={.status.succeeded} failed={.status.failed}' 2>/dev/null || true)", - " succeeded=$(printf '%s\\n' \"$status\" | awk '{for (i = 1; i <= NF; i++) { split($i, a, \"=\"); if (a[1] == \"succeeded\") print a[2]; }}')", - " failed=$(printf '%s\\n' \"$status\" | awk '{for (i = 1; i <= NF; i++) { split($i, a, \"=\"); if (a[1] == \"failed\") print a[2]; }}')", - " if [ \"${succeeded:-0}\" = \"1\" ]; then break; fi", - " if [ \"${failed:-0}\" != \"\" ] && [ \"${failed:-0}\" != \"0\" ]; then kubectl logs -n \"$namespace\" \"job/$job\" --tail=200 || true; exit 44; fi", - " if [ \"$(date +%s)\" -ge \"$deadline\" ]; then kubectl get job,pod -n \"$namespace\" -l job-name=\"$job\" -o wide || true; exit 45; fi", - " sleep 2", - "done", - "kubectl logs -n \"$namespace\" \"job/$job\" --tail=200 || true", - ].join("\n"), - ].join("\n"); - const result = runNodeK3sScript(spec, script, scoped.timeoutSeconds); - const partialSuccess = nodeRuntimeGitMirrorFlushPartialSuccess(scoped, mirror, result); - const retryableFailure = !isCommandSuccess(result) - ? nodeRuntimeGitMirrorRetryableFailure(scoped, mirror, result, partialSuccess) - : null; - const actionSucceeded = isCommandSuccess(result); + const retryMaxAttempts = !scoped.dryRun && scoped.action === "flush" ? 5 : 1; + const attempts: Record[] = []; + let finalAttempt: { + attempt: number; + retryLabel: string; + jobName: string; + manifest: Record; + result: CommandResult; + partialSuccess: Record | null; + retryableFailure: Record | null; + actionSucceeded: boolean; + } | null = null; + + for (let attempt = 1; attempt <= retryMaxAttempts; attempt += 1) { + const retryLabel = `${attempt}/${retryMaxAttempts}`; + const jobName = nodeRuntimeGitMirrorJobName(mirror, scoped.action); + const manifest = nodeRuntimeGitMirrorJobManifest(mirror, scoped.action, jobName); + const manifestB64 = Buffer.from(JSON.stringify(manifest), "utf8").toString("base64"); + const script = [ + "set -eu", + `namespace=${shellQuote(mirror.namespace)}`, + `job=${shellQuote(jobName)}`, + `manifest_b64=${shellQuote(manifestB64)}`, + "manifest_path=\"/tmp/$job.json\"", + "printf '%s' \"$manifest_b64\" | base64 -d > \"$manifest_path\"", + scoped.dryRun + ? "kubectl create --dry-run=server -f \"$manifest_path\" -o name" + : [ + "kubectl delete job -n \"$namespace\" \"$job\" --ignore-not-found=true >/dev/null", + "kubectl create -f \"$manifest_path\"", + `deadline=$(( $(date +%s) + ${scoped.timeoutSeconds} ))`, + "while :; do", + " status=$(kubectl get job -n \"$namespace\" \"$job\" -o jsonpath='succeeded={.status.succeeded} failed={.status.failed}' 2>/dev/null || true)", + " succeeded=$(printf '%s\\n' \"$status\" | awk '{for (i = 1; i <= NF; i++) { split($i, a, \"=\"); if (a[1] == \"succeeded\") print a[2]; }}')", + " failed=$(printf '%s\\n' \"$status\" | awk '{for (i = 1; i <= NF; i++) { split($i, a, \"=\"); if (a[1] == \"failed\") print a[2]; }}')", + " if [ \"${succeeded:-0}\" = \"1\" ]; then break; fi", + " if [ \"${failed:-0}\" != \"\" ] && [ \"${failed:-0}\" != \"0\" ]; then kubectl logs -n \"$namespace\" \"job/$job\" --tail=200 || true; exit 44; fi", + " if [ \"$(date +%s)\" -ge \"$deadline\" ]; then kubectl get job,pod -n \"$namespace\" -l job-name=\"$job\" -o wide || true; exit 45; fi", + " sleep 2", + "done", + "kubectl logs -n \"$namespace\" \"job/$job\" --tail=200 || true", + ].join("\n"), + ].join("\n"); + const result = runNodeK3sScript(spec, script, scoped.timeoutSeconds); + const partialSuccess = nodeRuntimeGitMirrorFlushPartialSuccess(scoped, mirror, result); + const actionSucceeded = isCommandSuccess(result); + const retryableFailure = !actionSucceeded + ? nodeRuntimeGitMirrorRetryableFailure(scoped, mirror, result, partialSuccess, attempt, retryMaxAttempts) + : null; + const retryable = retryableFailure?.retryable === true; + const exhausted = retryable && attempt >= retryMaxAttempts; + attempts.push({ + attempt, + retryLabel, + jobName, + ok: actionSucceeded, + exitCode: result.exitCode, + retryable, + exhausted, + partialSuccess: partialSuccess?.partialSuccess ?? null, + degradedReason: partialSuccess !== null + ? "node-runtime-git-mirror-flush-post-push-fetch-failed" + : actionSucceeded ? null : `node-runtime-git-mirror-${scoped.action}-failed`, + result: compactRuntimeCommand(result), + valuesPrinted: false, + }); + finalAttempt = { attempt, retryLabel, jobName, manifest, result, partialSuccess, retryableFailure, actionSucceeded }; + if (actionSucceeded || scoped.dryRun || !retryable || exhausted) break; + const backoffMs = nodeRuntimeGitMirrorRetryDelayMs(attempt); + printNodeRuntimeTriggerProgress(spec, { + stage: `git-mirror-${scoped.action}-retry`, + status: "waiting", + retryLabel, + nextRetryLabel: `${attempt + 1}/${retryMaxAttempts}`, + backoffMs, + jobName, + }); + sleepSync(backoffMs); + } + + if (finalAttempt === null) throw new Error("git-mirror run produced no attempts"); + const { jobName, manifest, result, partialSuccess, retryableFailure, actionSucceeded } = finalAttempt; const status = scoped.dryRun || !actionSucceeded ? undefined : nodeRuntimeGitMirrorStatus({ ...scoped, action: "status", dryRun: true, confirm: false }); + const retryExhausted = retryableFailure?.retryable === true && attempts.length >= retryMaxAttempts; return { ok: actionSucceeded && (status === undefined || status.ok === true), command: `hwlab nodes git-mirror ${scoped.action} --node ${scoped.node} --lane ${scoped.lane}`, @@ -2901,6 +2950,14 @@ function nodeRuntimeGitMirrorRun(scoped: ReturnType 1 ? { + policy: "exponential-backoff", + maxAttempts: retryMaxAttempts, + attempts, + exhausted: retryExhausted, + stopped: retryExhausted, + valuesPrinted: false, + } : undefined, retryableFailure: retryableFailure ?? undefined, partialSuccess: partialSuccess?.partialSuccess ?? undefined, recovery: partialSuccess ?? undefined, @@ -2920,27 +2977,45 @@ function nodeRuntimeGitMirrorRetryableFailure( mirror: NodeRuntimeGitMirrorTargetSpec, result: CommandResult, partialSuccess: Record | null, + attempt: number, + retryMaxAttempts: number, ): Record | null { const text = `${result.stdout}\n${result.stderr}`; const retryable = partialSuccess !== null || /kex_exchange_identification|Connection closed by remote host|Could not read from remote repository|ssh.github.com|github.com|fetch-pack|early EOF/iu.test(text); if (!retryable) return null; + const retryLabel = `${attempt}/${retryMaxAttempts}`; + const exhausted = attempt >= retryMaxAttempts; return { retryable: true, - retryMaxAttempts: 5, - retryLabel: "1/5", + retryAttempt: attempt, + retryMaxAttempts, + retryLabel, + retryExhausted: exhausted, + stopped: exhausted, + backoffPolicy: "exponential", + nextRetryDelayMs: exhausted ? null : nodeRuntimeGitMirrorRetryDelayMs(attempt), reason: partialSuccess !== null ? "GitOps push appears to have succeeded, but the post-push fetch/recheck failed. Standard git-mirror stops without host workspace fallback." : "Git mirror job hit a retryable upstream GitHub SSH/fetch failure. Standard git-mirror stops without host workspace fallback.", refSources: nodeRuntimeGitMirrorRefSources(scoped, mirror), - next: { - retry: `bun scripts/cli.ts hwlab nodes git-mirror ${scoped.action} --node ${scoped.node} --lane ${scoped.lane} --confirm --wait`, - status: `bun scripts/cli.ts hwlab nodes git-mirror status --node ${scoped.node} --lane ${scoped.lane}`, - }, + next: exhausted + ? { + status: `bun scripts/cli.ts hwlab nodes git-mirror status --node ${scoped.node} --lane ${scoped.lane}`, + stopped: "retry budget exhausted; standard git-mirror stopped instead of silently continuing", + } + : { + retry: `bun scripts/cli.ts hwlab nodes git-mirror ${scoped.action} --node ${scoped.node} --lane ${scoped.lane} --confirm --wait`, + status: `bun scripts/cli.ts hwlab nodes git-mirror status --node ${scoped.node} --lane ${scoped.lane}`, + }, valuesPrinted: false, }; } +function nodeRuntimeGitMirrorRetryDelayMs(attempt: number): number { + return Math.min(15_000, 1000 * (2 ** Math.max(0, attempt - 1))); +} + function nodeRuntimeGitMirrorHostWriteUrl(spec: HwlabRuntimeLaneSpec, mirror: NodeRuntimeGitMirrorTargetSpec): string | null { const result = runNodeK3sArgs(spec, ["kubectl", "-n", mirror.namespace, "get", "service", mirror.serviceWriteName, "-o", "jsonpath={.spec.clusterIP}{\":\"}{.spec.ports[0].port}"], 60); const value = result.stdout.trim(); @@ -3091,6 +3166,8 @@ function compactNodeRuntimeGitMirrorRun(result: Record): Record jobName: result.jobName ?? null, partialSuccess: result.partialSuccess ?? null, fallback: result.fallback ?? null, + retry: result.retry ?? null, + retryableFailure: result.retryableFailure ?? null, degradedReason: result.degradedReason ?? null, statusSummary: Object.keys(status).length > 0 ? compactNodeRuntimeGitMirrorStatus(status) : null, next: result.next ?? null, diff --git a/scripts/src/hwlab-node-web-observe-runner-source.ts b/scripts/src/hwlab-node-web-observe-runner-source.ts index 0e24f0bb..2834cc16 100644 --- a/scripts/src/hwlab-node-web-observe-runner-source.ts +++ b/scripts/src/hwlab-node-web-observe-runner-source.ts @@ -52,6 +52,8 @@ let stopping = false; let terminalStatus = "starting"; let lastScreenshotAtMs = 0; let auth = null; +let pageLoadSeq = 0; +let currentPageProvenance = null; try { if (!password) throw new Error("missing HWLAB_WEB_PASS"); @@ -108,6 +110,7 @@ async function writeManifest(extra = {}) { baseUrl, targetPath, pageAuthority: { browser: "chromium", context: "single", pageId, continuityBreaksRecorded: true }, + pageProvenance: compactPageProvenance(currentPageProvenance), sampling: { mode: "passive", sampleIntervalMs, screenshotIntervalMs, maxSamples, observerInitiatedDefault: false, responseBodyReadDefault: false }, commandDirs: dirs, artifacts: files, @@ -128,6 +131,7 @@ async function writeHeartbeat(extra = {}) { pageId, baseUrl, currentUrl: currentPageUrl(), + pageProvenance: compactPageProvenance(currentPageProvenance), sampleSeq, commandSeq, activeCommandId, @@ -287,8 +291,10 @@ async function gotoTarget(rawTarget) { try { const response = await page.goto(target, { waitUntil: "domcontentloaded", timeout: 30000 }); await page.waitForTimeout(1000).catch(() => {}); - attempts.push({ attempt, ok: true, httpStatus: response ? response.status() : null }); - return { beforeUrl, afterUrl: currentPageUrl(), httpStatus: response ? response.status() : null, pageId, attempts }; + const httpStatus = response ? response.status() : null; + const pageProvenance = await refreshPageProvenance("goto", httpStatus); + attempts.push({ attempt, ok: true, httpStatus }); + return { beforeUrl, afterUrl: currentPageUrl(), httpStatus, pageId, pageProvenance: compactPageProvenance(pageProvenance), attempts }; } catch (error) { const message = error instanceof Error ? error.message : String(error); attempts.push({ attempt, ok: false, failureKind: navigationFailureKind(message), message: redactErrorMessage(message) }); @@ -301,6 +307,94 @@ async function gotoTarget(rawTarget) { return { beforeUrl, afterUrl: currentPageUrl(), httpStatus: null, pageId, attempts }; } +async function refreshPageProvenance(reason, httpStatus = null) { + if (!page || page.isClosed()) return currentPageProvenance; + const observed = await page.evaluate(() => { + const assetPath = (raw) => { + if (!raw) return null; + try { + const url = new URL(raw, location.href); + const keys = Array.from(url.searchParams.keys()).sort(); + return url.pathname + (keys.length > 0 ? "?keys=" + keys.join(",") : ""); + } catch { + return null; + } + }; + const meta = Array.from(document.querySelectorAll("meta[name], meta[property]")).map((element) => ({ + key: String(element.getAttribute("name") || element.getAttribute("property") || "").slice(0, 120), + content: String(element.getAttribute("content") || "").slice(0, 200), + })).filter((item) => item.key).sort((a, b) => a.key.localeCompare(b.key)); + const navigation = performance.getEntriesByType("navigation")[0] || null; + return { + url: location.href, + path: location.pathname, + title: document.title, + readyState: document.readyState, + timeOrigin: Math.round(performance.timeOrigin || 0), + navigationStartTime: navigation ? Math.round(navigation.startTime) : null, + scripts: Array.from(document.scripts).map((element) => assetPath(element.src)).filter(Boolean).sort(), + stylesheets: Array.from(document.querySelectorAll('link[rel~="stylesheet"][href]')).map((element) => assetPath(element.href)).filter(Boolean).sort(), + meta, + }; + }).catch((error) => ({ error: errorSummary(error), url: currentPageUrl(), path: null, scripts: [], stylesheets: [], meta: [] })); + pageLoadSeq += 1; + currentPageProvenance = normalizePageProvenance(observed, { reason, httpStatus, pageLoadSeq }); + await appendJsonl(files.control, eventRecord("page-provenance", { reason, httpStatus, pageProvenance: compactPageProvenance(currentPageProvenance) })); + return currentPageProvenance; +} + +function normalizePageProvenance(value, options = {}) { + const scripts = Array.isArray(value?.scripts) ? value.scripts.map(String).filter(Boolean) : []; + const stylesheets = Array.isArray(value?.stylesheets) ? value.stylesheets.map(String).filter(Boolean) : []; + const meta = Array.isArray(value?.meta) ? value.meta.map((item) => ({ + key: String(item?.key || "").slice(0, 120), + contentHash: sha256Text(String(item?.content || "")), + })).filter((item) => item.key) : []; + const fingerprintInput = JSON.stringify({ scripts, stylesheets, meta }); + return { + pageLoadSeq: options.pageLoadSeq ?? pageLoadSeq, + reason: options.reason || "sample", + observedAt: new Date().toISOString(), + urlPath: safeUrlPath(value?.url || currentPageUrl()), + documentPath: value?.path || null, + titleHash: sha256Text(String(value?.title || "")), + documentReadyState: value?.readyState || null, + timeOrigin: Number.isFinite(Number(value?.timeOrigin)) ? Number(value.timeOrigin) : null, + navigationStartTime: Number.isFinite(Number(value?.navigationStartTime)) ? Number(value.navigationStartTime) : null, + httpStatus: options.httpStatus ?? null, + assetFingerprint: sha256Text(fingerprintInput), + scriptCount: scripts.length, + stylesheetCount: stylesheets.length, + metaCount: meta.length, + scripts: scripts.slice(0, 30), + stylesheets: stylesheets.slice(0, 30), + meta: meta.slice(0, 30), + error: value?.error || null, + valuesRedacted: true, + }; +} + +function compactPageProvenance(value) { + if (!value) return null; + return { + pageLoadSeq: value.pageLoadSeq ?? null, + reason: value.reason || null, + observedAt: value.observedAt || null, + urlPath: value.urlPath || null, + documentReadyState: value.documentReadyState || null, + timeOrigin: value.timeOrigin ?? null, + httpStatus: value.httpStatus ?? null, + assetFingerprint: value.assetFingerprint || null, + scriptCount: value.scriptCount ?? 0, + stylesheetCount: value.stylesheetCount ?? 0, + metaCount: value.metaCount ?? 0, + scripts: Array.isArray(value.scripts) ? value.scripts.slice(0, 12) : [], + stylesheets: Array.isArray(value.stylesheets) ? value.stylesheets.slice(0, 12) : [], + error: value.error || null, + valuesRedacted: true, + }; +} + function isRetryableNavigationError(message) { return /net::ERR_NETWORK_CHANGED|net::ERR_ABORTED|net::ERR_CONNECTION_RESET|Navigation timeout/iu.test(String(message || "")); } @@ -683,6 +777,37 @@ async function samplePage(reason) { traceRows, diagnostics, turns, + pageProvenance: { + url: location.href, + path: location.pathname, + title: document.title, + readyState: document.readyState, + timeOrigin: Math.round(performance.timeOrigin || 0), + navigationStartTime: (performance.getEntriesByType("navigation")[0] || null)?.startTime ?? null, + scripts: Array.from(document.scripts).map((element) => { + if (!element.src) return null; + try { + const url = new URL(element.src, location.href); + const keys = Array.from(url.searchParams.keys()).sort(); + return url.pathname + (keys.length > 0 ? "?keys=" + keys.join(",") : ""); + } catch { + return null; + } + }).filter(Boolean).sort(), + stylesheets: Array.from(document.querySelectorAll('link[rel~="stylesheet"][href]')).map((element) => { + try { + const url = new URL(element.href, location.href); + const keys = Array.from(url.searchParams.keys()).sort(); + return url.pathname + (keys.length > 0 ? "?keys=" + keys.join(",") : ""); + } catch { + return null; + } + }).filter(Boolean).sort(), + meta: Array.from(document.querySelectorAll("meta[name], meta[property]")).map((element) => ({ + key: String(element.getAttribute("name") || element.getAttribute("property") || "").slice(0, 120), + content: String(element.getAttribute("content") || "").slice(0, 200), + })).filter((item) => item.key).sort((a, b) => a.key.localeCompare(b.key)), + }, performance: performance.getEntriesByType("resource").slice(-30).map((entry) => ({ name: entry.name.split(/[?#]/u)[0].slice(0, 240), initiatorType: entry.initiatorType, startTime: Math.round(entry.startTime), duration: Math.round(entry.duration) })), }; }).catch((error) => ({ error: errorSummary(error), url: currentPageUrl() })); @@ -708,7 +833,9 @@ function digestDom(dom) { const traceRows = Array.isArray(dom.traceRows) ? dom.traceRows.map((item) => ({ ...item, textHash: sha256Text(item.text || ""), textPreview: truncate(item.text || "", 160), textBytes: Buffer.byteLength(item.text || "") })) : []; const diagnostics = Array.isArray(dom.diagnostics) ? dom.diagnostics.map((item) => ({ ...item, textHash: sha256Text(item.text || ""), textPreview: truncate(item.text || "", 260), textBytes: Buffer.byteLength(item.text || "") })) : []; const turns = Array.isArray(dom.turns) ? dom.turns.map((item) => ({ ...item, textHash: sha256Text(item.text || ""), textPreview: truncate(item.text || "", 200), textBytes: Buffer.byteLength(item.text || "") })) : []; - return { ...dom, messages, traceRows, diagnostics, turns }; + const pageProvenance = normalizePageProvenance(dom.pageProvenance, { reason: "sample", pageLoadSeq: currentPageProvenance?.pageLoadSeq ?? pageLoadSeq }); + currentPageProvenance = pageProvenance; + return { ...dom, messages, traceRows, diagnostics, turns, pageProvenance: compactPageProvenance(pageProvenance) }; } async function captureScreenshot(reason, imageType = "png") { @@ -792,6 +919,14 @@ function safeUrl(value) { } } +function safeUrlPath(value) { + try { + return new URL(String(value || ""), baseUrl).pathname; + } catch { + return null; + } +} + function normalizeBaseUrl(value) { const raw = value || "http://127.0.0.1:3000"; const url = new URL(raw); @@ -866,9 +1001,11 @@ const heartbeat = await readJson(path.join(stateDir, "heartbeat.json")); await mkdir(analysisDir, { recursive: true, mode: 0o700 }); const transitions = buildTransitions(samples); const sampleMetrics = buildSampleMetrics(samples, control); +const pageProvenance = buildPageProvenanceReport(samples, control, manifest); +const pagePerformance = buildPagePerformanceReport(samples, manifest); const promptNetwork = buildPromptNetworkReport(control, network); const runtimeAlerts = buildRuntimeAlerts(samples, control, network, consoleEvents, errors); -const findings = buildFindings(samples, control, network, errors, sampleMetrics, promptNetwork, runtimeAlerts); +const findings = buildFindings(samples, control, network, errors, sampleMetrics, promptNetwork, runtimeAlerts, pagePerformance, pageProvenance); const commandTimeline = control.filter((item) => item.phase === "completed" || item.phase === "failed").map((item) => ({ ts: item.ts, phase: item.phase, commandId: item.commandId, type: item.type, input: item.input, afterUrl: item.afterUrl })); const report = { ok: findings.filter((item) => item.severity === "red").length === 0, @@ -881,6 +1018,8 @@ const report = { commandTimeline, transitions, sampleMetrics, + pageProvenance, + pagePerformance, promptNetwork, runtimeAlerts, findings, @@ -890,7 +1029,7 @@ const report = { await writeFile(reportJsonPath, JSON.stringify(report, null, 2) + "\n", { mode: 0o600 }); await writeFile(reportMdPath, renderMarkdown(report), { mode: 0o600 }); const [jsonMeta, mdMeta] = await Promise.all([fileMeta(reportJsonPath), fileMeta(reportMdPath)]); -console.log(JSON.stringify({ ok: true, command: "web-probe-observe analyze", stateDir, reportJsonPath, reportJsonSha256: jsonMeta.sha256, reportMdPath, reportMdSha256: mdMeta.sha256, counts: report.counts, sampleMetrics: sampleMetrics.summary, promptNetwork: promptNetwork.summary, runtimeAlerts: runtimeAlerts.summary, turnTimingRecentUpdateSawtoothJumps: sampleMetrics.turnTimingRecentUpdateSawtoothJumps.slice(0, 20), turnTimingRecentUpdateLargestSteps: sampleMetrics.turnTimingRecentUpdateLargestSteps.slice(0, 20), findings: findings.slice(0, 20), valuesRedacted: true }, null, 2)); +console.log(JSON.stringify({ ok: true, command: "web-probe-observe analyze", stateDir, reportJsonPath, reportJsonSha256: jsonMeta.sha256, reportMdPath, reportMdSha256: mdMeta.sha256, counts: report.counts, sampleMetrics: sampleMetrics.summary, pageProvenance: pageProvenance.summary, pagePerformance: pagePerformance.summary, promptNetwork: promptNetwork.summary, runtimeAlerts: runtimeAlerts.summary, turnTimingRecentUpdateSawtoothJumps: sampleMetrics.turnTimingRecentUpdateSawtoothJumps.slice(0, 20), turnTimingRecentUpdateLargestSteps: sampleMetrics.turnTimingRecentUpdateLargestSteps.slice(0, 20), pagePerformanceSlowApi: pagePerformance.sameOriginApiByPath.filter((item) => item.overFiveSecondCount > 0).slice(0, 20), findings: findings.slice(0, 20), valuesRedacted: true }, null, 2)); async function readJson(file) { try { return JSON.parse(await readFile(file, "utf8")); } catch { return null; } @@ -905,7 +1044,7 @@ async function readJsonl(file) { } catch { return []; } } -function buildFindings(samples, control, network, errors, sampleMetrics, promptNetwork, runtimeAlerts) { +function buildFindings(samples, control, network, errors, sampleMetrics, promptNetwork, runtimeAlerts, pagePerformance, pageProvenance) { const findings = []; const commandTimes = control.filter((item) => item.phase === "completed" || item.phase === "started").map((item) => Date.parse(item.ts)).filter(Number.isFinite); const routeSessions = new Set(samples.map((item) => item.routeSessionId).filter(Boolean)); @@ -947,6 +1086,9 @@ function buildFindings(samples, control, network, errors, sampleMetrics, promptN if ((runtimeAlerts?.summary?.domDiagnosticSampleCount ?? 0) > 0) findings.push({ id: "runtime-dom-diagnostics", severity: "amber", summary: "diagnostic/error/warning-like text was visible in sampled DOM", count: runtimeAlerts.summary.domDiagnosticSampleCount, samples: runtimeAlerts.domDiagnostics.slice(0, 12) }); if ((runtimeAlerts?.summary?.executionErrorCount ?? 0) > 0) findings.push({ id: "runtime-execution-errors", severity: "red", summary: "Workbench rendered execution failure/error rows during observation", count: runtimeAlerts.summary.executionErrorCount, groups: runtimeAlerts.runtimeExecutionErrorsByCode.slice(0, 12) }); if ((runtimeAlerts?.summary?.consoleAlertCount ?? 0) > 0) findings.push({ id: "runtime-console-alerts", severity: "amber", summary: "browser console warning/error entries were captured during observation", count: runtimeAlerts.summary.consoleAlertCount, groups: runtimeAlerts.consoleAlertsByPath.slice(0, 12) }); + const slowApi = Array.isArray(pagePerformance?.sameOriginApiByPath) ? pagePerformance.sameOriginApiByPath.filter((item) => item.overFiveSecondCount > 0) : []; + if (slowApi.length > 0) findings.push({ id: "page-performance-slow-same-origin-api", severity: "red", summary: "same-origin API resource timing exceeded 5s usability budget", count: slowApi.length, groups: slowApi.slice(0, 20) }); + if ((pageProvenance?.summary?.segmentCount ?? 0) > 1) findings.push({ id: "page-provenance-segments", severity: "info", summary: "observer crossed page asset provenance segments; interpret runtime findings by segment", segmentCount: pageProvenance.summary.segmentCount, segments: pageProvenance.segments.slice(0, 20) }); const naturalApi = network.filter((item) => item.observerInitiated === false && item.type === "response" && /\/v1\/|\/auth\//u.test(String(item.url || ""))); findings.push({ id: "natural-api-dom-lag-baseline", severity: "info", summary: "natural API responses and DOM samples are available for manual lag correlation", naturalApiResponses: naturalApi.length, sampleCount: samples.length }); if (errors.length > 0) findings.push({ id: "browser-console-or-page-errors", severity: "amber", summary: "pageerror/runner errors were captured", count: errors.length, first: errors.slice(0, 5) }); @@ -954,6 +1096,172 @@ function buildFindings(samples, control, network, errors, sampleMetrics, promptN return findings; } +function buildPageProvenanceReport(samples, control, manifest) { + const groups = new Map(); + for (const sample of samples) { + const provenance = sample?.pageProvenance; + if (!provenance) continue; + const key = provenance.assetFingerprint || "unknown"; + const group = groups.get(key) || { + assetFingerprint: provenance.assetFingerprint || null, + pageLoadSeqs: [], + sampleCount: 0, + firstSeq: sample.seq ?? null, + lastSeq: sample.seq ?? null, + firstAt: sample.ts ?? null, + lastAt: sample.ts ?? null, + urlPaths: [], + scriptCount: provenance.scriptCount ?? null, + stylesheetCount: provenance.stylesheetCount ?? null, + metaCount: provenance.metaCount ?? null, + scripts: Array.isArray(provenance.scripts) ? provenance.scripts.slice(0, 12) : [], + stylesheets: Array.isArray(provenance.stylesheets) ? provenance.stylesheets.slice(0, 12) : [], + valuesRedacted: true + }; + group.sampleCount += 1; + group.lastSeq = sample.seq ?? null; + group.lastAt = sample.ts ?? null; + if (provenance.pageLoadSeq !== null && provenance.pageLoadSeq !== undefined && !group.pageLoadSeqs.includes(provenance.pageLoadSeq)) group.pageLoadSeqs.push(provenance.pageLoadSeq); + if (provenance.urlPath && !group.urlPaths.includes(provenance.urlPath)) group.urlPaths.push(provenance.urlPath); + groups.set(key, group); + } + const segments = Array.from(groups.values()).sort((a, b) => Number(a.firstSeq ?? 0) - Number(b.firstSeq ?? 0)); + const controlSegments = control + .filter((item) => item.type === "page-provenance" || item?.pageProvenance) + .map((item) => ({ + ts: item.ts ?? null, + reason: item.reason ?? item.detail?.reason ?? null, + httpStatus: item.httpStatus ?? item.detail?.httpStatus ?? null, + pageProvenance: item.pageProvenance ?? item.detail?.pageProvenance ?? null, + })) + .slice(0, 80); + return { + summary: { + segmentCount: segments.length, + sampleCount: segments.reduce((sum, item) => sum + item.sampleCount, 0), + manifestFingerprint: manifest?.pageProvenance?.assetFingerprint ?? null, + controlSegmentCount: controlSegments.length + }, + segments, + controlSegments, + valuesRedacted: true + }; +} + +function buildPagePerformanceReport(samples, manifest) { + const base = manifest?.baseUrl || "http://invalid.local"; + const seen = new Set(); + const groups = new Map(); + for (const sample of samples) { + const entries = Array.isArray(sample?.performance) ? sample.performance : []; + for (const entry of entries) { + const durationMs = Number(entry?.duration); + if (!Number.isFinite(durationMs) || durationMs < 0) continue; + const parsed = parsePerformanceUrl(entry?.name, base); + if (!parsed.sameOrigin || !isApiLikePath(parsed.path)) continue; + const normalizedPath = normalizeApiPath(parsed.path); + const dedupeKey = [parsed.path, entry.initiatorType || "", entry.startTime ?? "", Math.round(durationMs)].join("|"); + if (seen.has(dedupeKey)) continue; + seen.add(dedupeKey); + const group = groups.get(normalizedPath) || { + routeKind: "same-origin-api", + path: normalizedPath, + rawPathSamples: [], + sampleCount: 0, + durationsMs: [], + overFiveSecondCount: 0, + firstAt: sample.ts ?? null, + lastAt: sample.ts ?? null, + firstSeq: sample.seq ?? null, + lastSeq: sample.seq ?? null, + initiatorTypes: [], + pageAssetFingerprints: [], + valuesRedacted: true + }; + group.sampleCount += 1; + group.durationsMs.push(durationMs); + if (durationMs > 5000) group.overFiveSecondCount += 1; + group.lastAt = sample.ts ?? null; + group.lastSeq = sample.seq ?? null; + if (parsed.path && !group.rawPathSamples.includes(parsed.path)) group.rawPathSamples.push(parsed.path); + if (entry.initiatorType && !group.initiatorTypes.includes(entry.initiatorType)) group.initiatorTypes.push(entry.initiatorType); + const assetFingerprint = sample?.pageProvenance?.assetFingerprint; + if (assetFingerprint && !group.pageAssetFingerprints.includes(assetFingerprint)) group.pageAssetFingerprints.push(assetFingerprint); + groups.set(normalizedPath, group); + } + } + const sameOriginApiByPath = Array.from(groups.values()).map((group) => { + const durations = group.durationsMs.slice().sort((a, b) => a - b); + return { + routeKind: group.routeKind, + path: group.path, + sampleCount: group.sampleCount, + p50Ms: percentile(durations, 50), + p75Ms: percentile(durations, 75), + p95Ms: percentile(durations, 95), + maxMs: durations.length > 0 ? durations[durations.length - 1] : null, + overFiveSecondCount: group.overFiveSecondCount, + overFiveSecondRatio: group.sampleCount > 0 ? Number((group.overFiveSecondCount / group.sampleCount).toFixed(3)) : 0, + firstAt: group.firstAt, + lastAt: group.lastAt, + firstSeq: group.firstSeq, + lastSeq: group.lastSeq, + initiatorTypes: group.initiatorTypes, + rawPathSamples: group.rawPathSamples.slice(0, 8), + pageAssetFingerprints: group.pageAssetFingerprints.slice(0, 8), + valuesRedacted: true + }; + }).sort((a, b) => (b.overFiveSecondCount - a.overFiveSecondCount) || (Number(b.p95Ms ?? 0) - Number(a.p95Ms ?? 0)) || a.path.localeCompare(b.path)); + const slow = sameOriginApiByPath.filter((item) => item.overFiveSecondCount > 0); + return { + summary: { + budgetMs: 5000, + sameOriginApiPathCount: sameOriginApiByPath.length, + sameOriginApiSampleCount: sameOriginApiByPath.reduce((sum, item) => sum + item.sampleCount, 0), + slowPathCount: slow.length, + slowSampleCount: slow.reduce((sum, item) => sum + item.overFiveSecondCount, 0), + worstP95Ms: sameOriginApiByPath.length > 0 ? Math.max(...sameOriginApiByPath.map((item) => Number(item.p95Ms ?? 0))) : null, + valuesRedacted: true + }, + sameOriginApiByPath, + valuesRedacted: true + }; +} + +function parsePerformanceUrl(value, base) { + try { + const url = new URL(String(value || ""), base); + const origin = new URL(String(base || "http://invalid.local")).origin; + return { sameOrigin: url.origin === origin, path: url.pathname }; + } catch { + return { sameOrigin: false, path: "-" }; + } +} + +function isApiLikePath(path) { + return /^\/(?:v1(?:\/|$)|auth(?:\/|$)|health(?:\/|$))/u.test(String(path || "")); +} + +function normalizeApiPath(path) { + return String(path || "-") + .replace(/\/v1\/workbench\/sessions\/ses_[^/]+/gu, "/v1/workbench/sessions/:id") + .replace(/\/v1\/workbench\/turns\/trc_[^/]+/gu, "/v1/workbench/turns/:traceId") + .replace(/\/v1\/workbench\/traces\/trc_[^/]+/gu, "/v1/workbench/traces/:traceId") + .replace(/\/v1\/workbench\/sessions\/[0-9a-f-]{12,}/giu, "/v1/workbench/sessions/:id") + .replace(/\/v1\/[^/]+\/[0-9a-f-]{16,}(?=\/|$)/giu, (match) => match.replace(/\/[0-9a-f-]{16,}$/iu, "/:id")); +} + +function percentile(sortedValues, percentileValue) { + if (!Array.isArray(sortedValues) || sortedValues.length === 0) return null; + if (sortedValues.length === 1) return Math.round(sortedValues[0]); + const rank = (percentileValue / 100) * (sortedValues.length - 1); + const lower = Math.floor(rank); + const upper = Math.ceil(rank); + if (lower === upper) return Math.round(sortedValues[lower]); + const weight = rank - lower; + return Math.round(sortedValues[lower] * (1 - weight) + sortedValues[upper] * weight); +} + function buildPromptNetworkReport(control, network) { const promptsById = new Map(); for (const item of control) { @@ -1945,12 +2253,12 @@ async function artifactSummary(artifacts) { function compactManifest(value) { if (!value) return null; - return { jobId: value.jobId, stateDir: value.stateDir, baseUrl: value.baseUrl, targetPath: value.targetPath, startedAt: value.startedAt, status: value.status, sampling: value.sampling, safety: value.safety }; + return { jobId: value.jobId, stateDir: value.stateDir, baseUrl: value.baseUrl, targetPath: value.targetPath, startedAt: value.startedAt, status: value.status, sampling: value.sampling, pageProvenance: value.pageProvenance ?? null, safety: value.safety }; } function compactHeartbeat(value) { if (!value) return null; - return { jobId: value.jobId, pid: value.pid, status: value.status, sampleSeq: value.sampleSeq, commandSeq: value.commandSeq, currentUrl: value.currentUrl, updatedAt: value.updatedAt, uptimeMs: value.uptimeMs }; + return { jobId: value.jobId, pid: value.pid, status: value.status, sampleSeq: value.sampleSeq, commandSeq: value.commandSeq, currentUrl: value.currentUrl, pageProvenance: value.pageProvenance ?? null, updatedAt: value.updatedAt, uptimeMs: value.uptimeMs }; } function renderTurnTimingTable(sampleMetrics) { @@ -2036,6 +2344,12 @@ function renderMarkdown(report) { const roundLines = Array.isArray(report.sampleMetrics?.rounds) && report.sampleMetrics.rounds.length > 0 ? report.sampleMetrics.rounds.map((item) => "- round " + item.promptIndex + " promptHash=" + (item.promptTextHash || "-") + " samples=" + item.sampleCount + " totalMax=" + (item.maxTotalElapsedSeconds ?? "-") + " totalLast=" + (item.lastTotalElapsedSeconds ?? "-") + " recentMax=" + (item.maxRecentUpdateSeconds ?? "-") + " recentLast=" + (item.lastRecentUpdateSeconds ?? "-") + " totalDecrease=" + (item.turnTimingTotalElapsedDecreaseCount ?? 0) + " recentJump=" + (item.turnTimingRecentUpdateJumpCount ?? 0) + " recentSawtoothJump=" + (item.turnTimingRecentUpdateSawtoothJumpCount ?? item.turnTimingRecentUpdateJumpCount ?? 0) + " recentStep=" + (item.turnTimingRecentUpdateStepCount ?? 0) + " recentMaxIncrease=" + (item.turnTimingRecentUpdateMaxIncreaseSeconds ?? "-") + " recentMaxExcess=" + (item.turnTimingRecentUpdateMaxExcessSeconds ?? 0) + " recentReset=" + (item.turnTimingRecentUpdateResetCount ?? 0) + " diagnostics=" + item.diagnosticSamples + " terminal=" + item.terminalSamples + " finalText=" + item.finalTextSamples).join("\n") : "- 无轮次指标。"; + const provenanceLines = Array.isArray(report.pageProvenance?.segments) && report.pageProvenance.segments.length > 0 + ? report.pageProvenance.segments.slice(0, 40).map((item) => "- fingerprint=" + (item.assetFingerprint || "-") + " samples=" + item.sampleCount + " seq=" + (item.firstSeq ?? "-") + ".." + (item.lastSeq ?? "-") + " ts=" + (item.firstAt || "-") + ".." + (item.lastAt || "-") + " scripts=" + (item.scriptCount ?? "-") + " styles=" + (item.stylesheetCount ?? "-") + " urlPaths=" + (Array.isArray(item.urlPaths) ? item.urlPaths.slice(0, 4).join(",") : "-")).join("\n") + : "- 无页面 provenance segment。"; + const performanceLines = Array.isArray(report.pagePerformance?.sameOriginApiByPath) && report.pagePerformance.sameOriginApiByPath.length > 0 + ? report.pagePerformance.sameOriginApiByPath.slice(0, 80).map((item) => "- " + item.path + " samples=" + item.sampleCount + " p50=" + (item.p50Ms ?? "-") + "ms p75=" + (item.p75Ms ?? "-") + "ms p95=" + (item.p95Ms ?? "-") + "ms max=" + (item.maxMs ?? "-") + "ms >5s=" + (item.overFiveSecondCount ?? 0) + " window=" + (item.firstAt || "-") + ".." + (item.lastAt || "-")).join("\n") + : "- 无同源 API Resource Timing 样本。"; const metricLines = Array.isArray(report.sampleMetrics?.timeline) && report.sampleMetrics.timeline.length > 0 ? report.sampleMetrics.timeline.slice(0, 120).map((item) => "- #" + item.seq + " " + item.ts + " prompt=" + item.promptIndex + " totalElapsedSeconds=" + (item.totalElapsedSeconds ?? "-") + " recentUpdateSeconds=" + (item.recentUpdateSeconds ?? "-") + " terminal=" + item.terminalSeen + " finalText=" + item.finalResultTextSeen + " diagnostic=" + item.diagnosticSeen).join("\n") : "- 无采样指标。"; @@ -2066,6 +2380,18 @@ function renderMarkdown(report) { + "- turnTimingRecentUpdateMaxExcessSeconds: " + (metricSummary.turnTimingRecentUpdateMaxExcessSeconds ?? 0) + "\n" + "- turnTimingRecentUpdateResetCount: " + (metricSummary.turnTimingRecentUpdateResetCount ?? 0) + "\n\n" + "### Rounds\n\n" + roundLines + "\n\n" + + "### Page provenance\n\n" + + "- segmentCount: " + (report.pageProvenance?.summary?.segmentCount ?? 0) + "\n" + + "- controlSegmentCount: " + (report.pageProvenance?.summary?.controlSegmentCount ?? 0) + "\n\n" + + provenanceLines + "\n\n" + + "### Page performance: same-origin API Resource Timing\n\n" + + "- budgetMs: " + (report.pagePerformance?.summary?.budgetMs ?? 5000) + "\n" + + "- sameOriginApiPathCount: " + (report.pagePerformance?.summary?.sameOriginApiPathCount ?? 0) + "\n" + + "- sameOriginApiSampleCount: " + (report.pagePerformance?.summary?.sameOriginApiSampleCount ?? 0) + "\n" + + "- slowPathCount: " + (report.pagePerformance?.summary?.slowPathCount ?? 0) + "\n" + + "- slowSampleCount: " + (report.pagePerformance?.summary?.slowSampleCount ?? 0) + "\n" + + "- worstP95Ms: " + (report.pagePerformance?.summary?.worstP95Ms ?? "-") + "\n\n" + + performanceLines + "\n\n" + "### Prompt network\n\n" + promptNetworkLines + "\n\n" + "### Runtime alerts\n\n" + "- httpErrorCount: " + (alertSummary.httpErrorCount ?? 0) + "\n"