diff --git a/.agents/skills/unidesk-webdev/SKILL.md b/.agents/skills/unidesk-webdev/SKILL.md index a0bdb5ae..368ec98f 100644 --- a/.agents/skills/unidesk-webdev/SKILL.md +++ b/.agents/skills/unidesk-webdev/SKILL.md @@ -47,7 +47,7 @@ web-probe 入口分三类: - `run`:repo-owned 标准 DOM probe,适合固定 P4 验收和已有脚本。 - `script`:受控 Playwright 托管脚本,适合一轮 55 秒内完成的 DOM/API 断言、截图、route/intercept 和边界采样。 -- `observe`:纯客户端长程观测,适合同一 Workbench session 多轮任务、realtime/projection 问题、长时间 trace/DOM/network 采样和无副作用报告生成。 +- `observe`:纯客户端长程观测,适合同一 Workbench session 多轮任务、realtime/projection 问题、长时间 trace/DOM/network 采样和无副作用报告生成。长程 Workbench 观测默认同时打开两个浏览器页面:control 页面只执行显式 `observe command` 用户动作,observer 页面只打开同一个 session 做被动观察,用来抓多用户/多页面下同一 session 的投影差异、历史 trace 丢失、耗时跳变和 loading 差异。 需要 Playwright route/intercept、延迟 API、读取 in-flight DOM 或截图时仍使用受控 `web-probe script`,不要裸写 Playwright: @@ -94,7 +94,7 @@ bun scripts/cli.ts hwlab nodes web-probe observe analyze webobs-xxxx 约束: - `web-probe script` 不运行默认探针,必须通过 stdin heredoc 或 `--script-file ` 提供脚本;只需要 repo-owned 标准 DOM probe 时使用 `web-probe run`。 -- `web-probe observe start` 默认是被动观测:记录 DOM 摘要、自然页面 request/response/requestfailed、截图和 performance 样本,不主动 fetch Workbench API、不 reload、不切换 session、不拦截路由、不调用 repair helper。任何 `newSession`、`selectProvider`、`sendPrompt`、`goto`、`screenshot`、`mark`、`stop` 都必须通过 `observe command` 显式下发,并进入 `control.jsonl`。 +- `web-probe observe start` 默认是被动观测:记录 DOM 摘要、自然页面 request/response/requestfailed、截图和 performance 样本,不主动 fetch Workbench API、不 reload、不切换 session、不拦截路由、不调用 repair helper。长程 Workbench 观测必须保留 control/observer 双页面模型:control 页面执行显式 command,observer 页面只同步到同一 session URL 后被动采样;两页的 `pageRole`、`pageId`、`sampleGroupSeq` 必须进入样本和 analyzer 报表。任何 `newSession`、`selectProvider`、`sendPrompt`、`goto`、`screenshot`、`mark`、`stop` 都必须通过 `observe command` 显式下发,并进入 `control.jsonl`。 - `web-probe observe` 的 issue evidence 优先记录 observer id、stateDir、report JSON/Markdown SHA、samples/control/network/artifact 计数、routeSessionId、activeSessionId、prompt hash/textBytes、traceId、AgentRun runId/commandId、最终 status 和必要摘要;不要把 prompt 原文、assistant 大段正文、完整 stdout/stderr 或 provider payload 粘贴到 issue。 - 多轮 Workbench 采样必须证明同一个 `sessionId` 连续承载所有轮次;每轮至少记录 prompt hash、traceId、终态、最终回答摘要和性能/产物表。若 Web UI 投影卡住但 Code Agent/AgentRun result 已 terminal,应同时登记“执行终态”和“Workbench 投影未收敛”,不得用 `goto`、reload、切 session 或 result polling 把 UI 失败伪装成通过。 - `observe analyze` 是离线分析,只读取 artifact JSONL 并写 `analysis/report.md` 与 `analysis/report.json`,不访问 Workbench API、不驱动浏览器。报告必须输出采样点 vs 每个 turn 的总耗时/最近更新时间表、可见“加载中”的数量/归属/并发 owner/连续出现区间、DOM diagnostic/HTTP/console/requestfailed/runtime execution error 分组、page asset provenance segment、同源 API Resource Timing 分位表和超过 5s 的慢路径 finding;页面/API 加载超过 5s 视为不可用级性能红线,可见“加载中”持续超过 5s 也必须作为真实慢加载证据登记到上游问题。修复必须降低真实请求、投影、渲染或后端路径耗时,禁止为了减少“加载中”出现时间而提前展示未加载完的内容,也不能靠下游 retry/reload/fallback 掩盖。报告里的 `final-response-flicker`、`uncommanded-visible-state-change`、session changed、network 503 等 finding 是排障线索;用于 closeout 时必须结合原始 session/trace/DOM 证据解释,避免把采样噪声直接当作业务结论。 diff --git a/scripts/src/hwlab-node-web-observe-runner-source.ts b/scripts/src/hwlab-node-web-observe-runner-source.ts index 9f6f9b88..eb2aa9ff 100644 --- a/scripts/src/hwlab-node-web-observe-runner-source.ts +++ b/scripts/src/hwlab-node-web-observe-runner-source.ts @@ -23,7 +23,8 @@ const maxSamples = positiveInteger(process.env.UNIDESK_WEB_OBSERVE_MAX_SAMPLES, const viewport = parseViewport(process.env.UNIDESK_WEB_OBSERVE_VIEWPORT || "1440x900"); const playwrightProxy = proxyConfigFromEnv(baseUrl); const chromiumLaunchOptions = chromiumLaunchOptionsForProxy(playwrightProxy); -const pageId = "page-" + randomBytes(4).toString("hex"); +const pageId = "control-" + randomBytes(4).toString("hex"); +const observerPageId = "observer-" + randomBytes(4).toString("hex"); const dirs = { commandsPending: path.join(stateDir, "commands", "pending"), commandsProcessing: path.join(stateDir, "commands", "processing"), @@ -46,6 +47,7 @@ const files = { let browser; let context; let page; +let observerPage; let sampleSeq = 0; let commandSeq = 0; let artifactSeq = 0; @@ -68,8 +70,11 @@ try { context = await browser.newContext({ viewport, ...(playwrightProxy === null ? {} : { proxy: playwrightProxy }) }); auth = await runControlCommand({ id: "startup-login", type: "login", createdAt: startedAt, source: "startup" }, async () => authenticate(context)); page = await context.newPage(); - attachPassiveListeners(page); + attachPassiveListeners(page, "control", pageId); await runControlCommand({ id: "startup-goto", type: "goto", path: targetPath, createdAt: new Date().toISOString(), source: "startup" }, async () => gotoTarget(targetPath)); + observerPage = await context.newPage(); + attachPassiveListeners(observerPage, "observer", observerPageId); + await runControlCommand({ id: "startup-observer-goto", type: "observerGoto", path: targetPath, createdAt: new Date().toISOString(), source: "startup" }, async () => syncObserverPageToControlSession("startup")); terminalStatus = "running"; await writeManifest({ status: "running", auth: publicAuth(auth) }); await writeHeartbeat({ status: "running" }); @@ -112,7 +117,7 @@ async function writeManifest(extra = {}) { baseUrl, targetPath, network: publicNetwork(playwrightProxy), - pageAuthority: { browser: "chromium", context: "single", pageId, continuityBreaksRecorded: true }, + pageAuthority: { browser: "chromium", context: "shared-auth", pageMode: "dual-control-observer", controlPageId: pageId, observerPageId, continuityBreaksRecorded: true }, pageProvenance: compactPageProvenance(currentPageProvenance), sampling: { mode: "passive", sampleIntervalMs, screenshotIntervalMs, maxSamples, observerInitiatedDefault: false, responseBodyReadDefault: false }, commandDirs: dirs, @@ -132,8 +137,10 @@ async function writeHeartbeat(extra = {}) { stateDir, status: terminalStatus, pageId, + observerPageId, baseUrl, currentUrl: currentPageUrl(), + observerUrl: pageUrl(observerPage), pageProvenance: compactPageProvenance(currentPageProvenance), sampleSeq, commandSeq, @@ -145,9 +152,11 @@ async function writeHeartbeat(extra = {}) { await writeFile(files.heartbeat, JSON.stringify(heartbeat, null, 2) + "\n", { mode: 0o600 }); } -function attachPassiveListeners(targetPage) { +function attachPassiveListeners(targetPage, pageRole = "control", targetPageId = pageId) { targetPage.on("request", (request) => { void appendJsonl(files.network, eventRecord("request", { + pageRole, + pageId: targetPageId, observerInitiated: false, commandId: activeCommandId, method: request.method(), @@ -159,6 +168,8 @@ function attachPassiveListeners(targetPage) { targetPage.on("response", (response) => { const request = response.request(); void appendJsonl(files.network, eventRecord("response", { + pageRole, + pageId: targetPageId, observerInitiated: false, commandId: activeCommandId, method: request.method(), @@ -172,6 +183,8 @@ function attachPassiveListeners(targetPage) { }); targetPage.on("requestfailed", (request) => { void appendJsonl(files.network, eventRecord("requestfailed", { + pageRole, + pageId: targetPageId, observerInitiated: false, commandId: activeCommandId, method: request.method(), @@ -181,16 +194,16 @@ function attachPassiveListeners(targetPage) { })); }); targetPage.on("console", (message) => { - void appendJsonl(files.console, eventRecord("console", { type: message.type(), text: truncate(message.text(), 1000), location: message.location() })); + void appendJsonl(files.console, eventRecord("console", { pageRole, pageId: targetPageId, type: message.type(), text: truncate(message.text(), 1000), location: message.location() })); }); targetPage.on("pageerror", (error) => { - void appendJsonl(files.errors, eventRecord("pageerror", { error: errorSummary(error) })); + void appendJsonl(files.errors, eventRecord("pageerror", { pageRole, pageId: targetPageId, error: errorSummary(error) })); }); targetPage.on("crash", () => { - void appendJsonl(files.errors, eventRecord("page-crash", { pageId })); + void appendJsonl(files.errors, eventRecord("page-crash", { pageRole, pageId: targetPageId })); }); targetPage.on("close", () => { - void appendJsonl(files.control, eventRecord("continuity-break", { pageId, reason: "page-closed" })); + void appendJsonl(files.control, eventRecord("continuity-break", { pageRole, pageId: targetPageId, reason: "page-closed" })); }); } @@ -230,11 +243,11 @@ async function processCommand(command) { switch (command.type) { case "login": return authenticate(context); case "preflight": return preflightSummary(); - case "goto": return gotoTarget(command.path || command.url || targetPath); - case "newSession": return createSessionFromUi(); - case "sendPrompt": return sendPrompt(String(command.text || "")); - case "selectProvider": return selectProvider(String(command.provider || command.value || command.text || "")); - case "clickSession": return clickSession(String(command.sessionId || command.value || "")); + case "goto": return withObserverSync(await gotoTarget(command.path || command.url || targetPath), "goto"); + case "newSession": return withObserverSync(await createSessionFromUi(), "newSession"); + case "sendPrompt": return withObserverSync(await sendPrompt(String(command.text || "")), "sendPrompt"); + case "selectProvider": return withObserverSync(await selectProvider(String(command.provider || command.value || command.text || "")), "selectProvider"); + case "clickSession": return withObserverSync(await clickSession(String(command.sessionId || command.value || "")), "clickSession"); case "screenshot": return captureScreenshot(command.reason || "manual", command.imageType || "png"); case "mark": return { mark: truncate(command.label || command.text || "mark", 200), currentUrl: currentPageUrl(), pageId }; case "stop": stopping = true; return { stopping: true, currentUrl: currentPageUrl(), pageId }; @@ -242,6 +255,28 @@ async function processCommand(command) { } } +async function withObserverSync(result, reason) { + return { ...result, observer: await syncObserverPageToControlSession(reason, result?.sessionId ?? null) }; +} + +async function syncObserverPageToControlSession(reason, explicitSessionId = null) { + if (!observerPage || observerPage.isClosed()) return { ok: false, reason, pageRole: "observer", pageId: observerPageId, failureKind: "observer-page-unavailable" }; + const snapshot = await workbenchSessionSnapshot(); + const sessionId = explicitSessionId || snapshot?.activeSessionId || snapshot?.routeSessionId || routeSessionIdFromUrl(currentPageUrl()); + const target = sessionId ? "/workbench/sessions/" + encodeURIComponent(sessionId) : targetPath; + const beforeUrl = pageUrl(observerPage); + const beforeSessionId = routeSessionIdFromUrl(beforeUrl); + if (sessionId && beforeSessionId === sessionId) return { ok: true, reason, changed: false, sessionId, beforeUrl, afterUrl: beforeUrl, pageRole: "observer", pageId: observerPageId }; + let status = null; + let statusText = null; + const response = await observerPage.goto(new URL(target, baseUrl).toString(), { waitUntil: "domcontentloaded", timeout: 45000 }).catch((error) => ({ observerGotoError: errorSummary(error) })); + if (response?.observerGotoError) return { ok: false, reason, changed: false, sessionId: sessionId ?? null, targetPath: target, beforeUrl, afterUrl: pageUrl(observerPage), pageRole: "observer", pageId: observerPageId, error: response.observerGotoError, valuesRedacted: true }; + status = typeof response?.status === "function" ? response.status() : null; + statusText = typeof response?.statusText === "function" ? response.statusText() : null; + await observerPage.waitForTimeout(1000); + return { ok: true, reason, changed: true, sessionId: sessionId ?? null, targetPath: target, beforeUrl, afterUrl: pageUrl(observerPage), pageRole: "observer", pageId: observerPageId, httpStatus: status, statusText, valuesRedacted: true }; +} + async function runControlCommand(command, fn) { activeCommandId = command.id; commandSeq += 1; @@ -883,9 +918,20 @@ async function preflightSummary() { } async function samplePage(reason) { - if (!page || page.isClosed()) return; + const groupSeq = sampleSeq + 1; + if (page && !page.isClosed()) await sampleOnePage(page, { reason, groupSeq, pageRole: "control", targetPageId: pageId }); + if (observerPage && !observerPage.isClosed()) { + await sampleOnePage(observerPage, { reason, groupSeq, pageRole: "observer", targetPageId: observerPageId }).catch((error) => appendJsonl(files.errors, eventRecord("observer-sample-error", { pageRole: "observer", pageId: observerPageId, error: errorSummary(error) }))); + } + if (screenshotIntervalMs > 0 && Date.now() - lastScreenshotAtMs >= screenshotIntervalMs) { + await captureScreenshot("checkpoint", "jpeg").catch((error) => appendJsonl(files.errors, eventRecord("screenshot-error", { pageRole: "control", pageId, error: errorSummary(error) }))); + } + await writeHeartbeat({ status: terminalStatus }); +} + +async function sampleOnePage(targetPage, { reason, groupSeq, pageRole, targetPageId }) { sampleSeq += 1; - const dom = await page.evaluate(() => { + const dom = await targetPage.evaluate(() => { const trim = (value, limit = 500) => String(value || "").replace(/\s+/g, " ").trim().slice(0, limit); const visible = (element) => { const rect = element.getBoundingClientRect(); @@ -1158,24 +1204,22 @@ async function samplePage(reason) { }, performance: performance.getEntriesByType("resource").slice(-80).map(resourceTimingSample), }; - }).catch((error) => ({ error: errorSummary(error), url: currentPageUrl() })); + }).catch((error) => ({ error: errorSummary(error), url: pageUrl(targetPage) })); const sample = { seq: sampleSeq, + sampleGroupSeq: groupSeq, ts: new Date().toISOString(), reason, - pageId, + pageRole, + pageId: targetPageId, commandId: activeCommandId, observerInitiated: false, - ...digestDom(dom), + ...digestDom(dom, pageRole), }; await appendJsonl(files.samples, sample); - if (screenshotIntervalMs > 0 && Date.now() - lastScreenshotAtMs >= screenshotIntervalMs) { - await captureScreenshot("checkpoint", "jpeg").catch((error) => appendJsonl(files.errors, eventRecord("screenshot-error", { error: errorSummary(error) }))); - } - await writeHeartbeat({ status: terminalStatus }); } -function digestDom(dom) { +function digestDom(dom, pageRole = "control") { if (dom && dom.error) return dom; const messages = Array.isArray(dom.messages) ? dom.messages.map((item) => ({ ...item, textHash: sha256Text(item.text || ""), textPreview: truncate(item.text || "", 160), textBytes: Buffer.byteLength(item.text || "") })) : []; const traceRows = Array.isArray(dom.traceRows) ? dom.traceRows.map((item) => ({ ...item, textHash: sha256Text(item.text || ""), textPreview: truncate(item.text || "", 160), textBytes: Buffer.byteLength(item.text || "") })) : []; @@ -1183,7 +1227,7 @@ function digestDom(dom) { const diagnostics = Array.isArray(dom.diagnostics) ? dom.diagnostics.map((item) => ({ ...item, textHash: sha256Text(item.text || ""), textPreview: truncate(item.text || "", 260), textBytes: Buffer.byteLength(item.text || "") })) : []; const turns = Array.isArray(dom.turns) ? dom.turns.map((item) => ({ ...item, textHash: sha256Text(item.text || ""), textPreview: truncate(item.text || "", 200), textBytes: Buffer.byteLength(item.text || "") })) : []; const pageProvenance = normalizePageProvenance(dom.pageProvenance, { reason: "sample", pageLoadSeq: currentPageProvenance?.pageLoadSeq ?? pageLoadSeq }); - currentPageProvenance = pageProvenance; + if (pageRole === "control") currentPageProvenance = pageProvenance; return { ...dom, messages, traceRows, loadings, diagnostics, turns, pageProvenance: compactPageProvenance(pageProvenance) }; } @@ -1204,7 +1248,8 @@ async function captureScreenshot(reason, imageType = "png") { } function eventRecord(type, data) { - return { ts: new Date().toISOString(), type, jobId, pageId, sampleSeq, commandId: activeCommandId, ...sanitize(data) }; + const clean = sanitize(data) || {}; + return { ts: new Date().toISOString(), type, jobId, pageId: clean.pageId ?? pageId, pageRole: clean.pageRole ?? "control", sampleSeq, commandId: activeCommandId, ...clean }; } function controlRecord(command, phase, detail) { @@ -1249,7 +1294,21 @@ async function fileMeta(file) { } function currentPageUrl() { - try { return page && !page.isClosed() ? page.url() : null; } catch { return null; } + return pageUrl(page); +} + +function pageUrl(targetPage) { + try { return targetPage && !targetPage.isClosed() ? targetPage.url() : null; } catch { return null; } +} + +function routeSessionIdFromUrl(value) { + try { + const pathname = new URL(String(value || ""), baseUrl).pathname; + const match = pathname.match(/\/workbench\/sessions\/([^/?#]+)/u); + return match ? decodeURIComponent(match[1] || "") : null; + } catch { + return null; + } } function safeFrameUrl(frame) { @@ -1575,7 +1634,9 @@ function compactSampleForAnalysis(sample) { seq: sample.seq ?? null, ts: sample.ts ?? null, reason: sample.reason ?? null, + sampleGroupSeq: sample.sampleGroupSeq ?? null, pageId: sample.pageId ?? null, + pageRole: sample.pageRole ?? null, commandId: sample.commandId ?? null, observerInitiated: sample.observerInitiated ?? null, url: sample.url ?? null, @@ -1736,6 +1797,8 @@ function buildFindings(samples, control, network, errors, sampleMetrics, promptN if (uncommandedChanges.length > 0) findings.push({ id: "uncommanded-visible-state-change", severity: "amber", summary: "visible message/trace digest changed without a nearby command", count: uncommandedChanges.length, samples: uncommandedChanges.slice(0, 20) }); const finalFlicker = detectFinalFlicker(samples); if (finalFlicker.length > 0) findings.push({ id: "final-response-flicker", severity: "red", summary: "message text digest disappeared or switched to diagnostic-like text after non-empty final text", count: finalFlicker.length, samples: finalFlicker.slice(0, 20) }); + const terminalZeroElapsed = detectTerminalZeroElapsed(samples); + if (terminalZeroElapsed.length > 0) findings.push({ id: "turn-terminal-zero-elapsed", severity: "red", summary: "terminal Code Agent card displayed 耗时 0 秒; terminal duration must come from durable timing projection, not a missing/zero display fallback", count: terminalZeroElapsed.length, samples: terminalZeroElapsed.slice(0, 20) }); const scrollJumps = []; for (let i = 1; i < samples.length; i += 1) { const prevY = Number(samples[i - 1]?.scroll?.y ?? 0); @@ -1766,6 +1829,12 @@ function buildFindings(samples, control, network, errors, sampleMetrics, promptN if ((runtimeAlerts?.summary?.domDiagnosticSampleCount ?? 0) > 0) findings.push({ id: "runtime-dom-diagnostics", severity: "amber", summary: "diagnostic/error/warning-like text was visible in sampled DOM", count: runtimeAlerts.summary.domDiagnosticSampleCount, groupCount: runtimeAlerts.summary.domDiagnosticGroupCount ?? 0, groups: runtimeAlerts.domDiagnosticsByText.slice(0, 12), samples: runtimeAlerts.domDiagnostics.slice(0, 12) }); if ((runtimeAlerts?.summary?.executionErrorCount ?? 0) > 0) findings.push({ id: "runtime-execution-errors", severity: "red", summary: "Workbench rendered execution failure/error rows during observation", count: runtimeAlerts.summary.executionErrorCount, groups: runtimeAlerts.runtimeExecutionErrorsByCode.slice(0, 12) }); if ((runtimeAlerts?.summary?.consoleAlertCount ?? 0) > 0) findings.push({ id: "runtime-console-alerts", severity: "amber", summary: "browser console warning/error entries were captured during observation", count: runtimeAlerts.summary.consoleAlertCount, groups: runtimeAlerts.consoleAlertsByPath.slice(0, 12) }); + const crossPageProjectionDiffs = detectCrossPageProjectionDiffs(samples); + if (crossPageProjectionDiffs.length > 0) findings.push({ id: "cross-page-projection-divergence", severity: "red", summary: "control and observer pages saw different projection state for the same sampled session", count: crossPageProjectionDiffs.length, samples: crossPageProjectionDiffs.slice(0, 20) }); + const traceMessageDuplicates = detectTraceMessageDuplication(samples); + if (traceMessageDuplicates.length > 0) findings.push({ id: "trace-assistant-message-duplicates-final-response", severity: "amber", summary: "trace rendered assistant message rows that duplicate the sealed final response", count: traceMessageDuplicates.length, samples: traceMessageDuplicates.slice(0, 20) }); + const turnTraceMissing = detectTurnTraceIdMissing(samples); + if (turnTraceMissing.length > 0) findings.push({ id: "turn-trace-id-missing", severity: "red", summary: "Code Agent turn/card was visible without a trace id, so historical trace hydration cannot be reliable", count: turnTraceMissing.length, samples: turnTraceMissing.slice(0, 20) }); const slowApi = Array.isArray(pagePerformance?.sameOriginApiByPath) ? pagePerformance.sameOriginApiByPath.filter((item) => item.overFiveSecondCount > 0) : []; if (slowApi.length > 0) findings.push({ id: "page-performance-slow-same-origin-api", severity: "red", summary: "same-origin API resource timing exceeded 5s usability budget", count: slowApi.length, groups: slowApi.slice(0, 20) }); const longLivedStreams = Array.isArray(pagePerformance?.sameOriginApiByPath) ? pagePerformance.sameOriginApiByPath.filter((item) => item.isLongLivedStream) : []; @@ -3519,7 +3588,7 @@ function parseTotalElapsedSeconds(text) { const hours = Number(match[2] || 0); const minutes = Number(match[3] || 0); const seconds = Number(match[4] || 0); - if (days || hours || minutes || seconds) values.push(days * 86400 + hours * 3600 + minutes * 60 + seconds); + if (days || hours || minutes || seconds || /(?:天|小时|分钟|分|秒)/u.test(match[0] || "")) values.push(days * 86400 + hours * 3600 + minutes * 60 + seconds); } return values; } @@ -3582,6 +3651,139 @@ function detectFinalFlicker(samples) { return flickers; } +function detectTerminalZeroElapsed(samples) { + const rows = []; + for (const sample of samples) { + const turns = Array.isArray(sample?.turns) ? sample.turns : []; + const messages = Array.isArray(sample?.messages) ? sample.messages : []; + for (const item of [...turns, ...messages]) { + const text = [item?.durationText, item?.activityText, item?.status, item?.textPreview, item?.text].filter(Boolean).join("\n"); + if (!/(?:Code Agent|运行记录|completed|failed|canceled|blocked)/iu.test(text)) continue; + if (!/(?:completed|failed|canceled|blocked|已完成|失败|取消)/iu.test(text)) continue; + const elapsedValues = parseTotalElapsedSeconds(text); + if (!elapsedValues.includes(0)) continue; + rows.push({ + ...ref(sample), + status: item?.status ?? null, + messageId: item?.messageId ?? null, + traceId: item?.traceId ?? null, + durationText: item?.durationText ?? null, + activityText: item?.activityText ?? null, + textPreview: limitText(item?.textPreview || item?.text || "", 180) + }); + } + } + return rows; +} + +function detectCrossPageProjectionDiffs(samples) { + const groups = new Map(); + for (const sample of samples) { + const key = sample?.sampleGroupSeq ?? sample?.seq; + if (key === null || key === undefined) continue; + const group = groups.get(key) || {}; + if (sample?.pageRole === "control") group.control = sample; + else if (sample?.pageRole === "observer") group.observer = sample; + groups.set(key, group); + } + const rows = []; + for (const [sampleGroupSeq, group] of groups.entries()) { + const control = group.control; + const observer = group.observer; + if (!control || !observer) continue; + const controlTraceIds = visibleTraceIds(control); + const observerTraceIds = visibleTraceIds(observer); + const missingInObserver = [...controlTraceIds].filter((item) => !observerTraceIds.has(item)); + const controlMessages = Array.isArray(control.messages) ? control.messages.length : 0; + const observerMessages = Array.isArray(observer.messages) ? observer.messages.length : 0; + const controlTraceRows = Array.isArray(control.traceRows) ? control.traceRows.length : 0; + const observerTraceRows = Array.isArray(observer.traceRows) ? observer.traceRows.length : 0; + const controlZero = detectTerminalZeroElapsed([control]).length > 0; + const observerZero = detectTerminalZeroElapsed([observer]).length > 0; + const sameSession = (control.routeSessionId || control.activeSessionId || null) && (control.routeSessionId || control.activeSessionId || null) === (observer.routeSessionId || observer.activeSessionId || null); + const divergent = sameSession && (missingInObserver.length > 0 || Math.abs(controlMessages - observerMessages) > 0 || Math.abs(controlTraceRows - observerTraceRows) > 0 || controlZero !== observerZero || digestMessageTexts(control) !== digestMessageTexts(observer)); + if (!divergent) continue; + rows.push({ + sampleGroupSeq, + control: ref(control), + observer: ref(observer), + missingTraceIdsInObserver: missingInObserver.slice(0, 8), + controlMessageCount: controlMessages, + observerMessageCount: observerMessages, + controlTraceRowCount: controlTraceRows, + observerTraceRowCount: observerTraceRows, + terminalZeroElapsedDiff: controlZero !== observerZero + }); + } + return rows; +} + +function detectTraceMessageDuplication(samples) { + const rows = []; + for (const sample of samples) { + const finalText = normalizedText((Array.isArray(sample?.messages) ? sample.messages : []).map((item) => item?.textPreview || item?.text || "").join("\n")); + if (finalText.length < 40) continue; + const traceRows = Array.isArray(sample?.traceRows) ? sample.traceRows : []; + for (const row of traceRows) { + const rowTextRaw = String(row?.textPreview || row?.text || ""); + if (!/(?:助手消息|assistant\s+message|assistant)/iu.test(rowTextRaw)) continue; + const rowText = normalizedText(rowTextRaw); + if (rowText.length < 24) continue; + const overlap = longestSharedSubstringLength(finalText, rowText); + if (overlap < 40 && overlap < Math.min(rowText.length, finalText.length) * 0.55) continue; + rows.push({ ...ref(sample), traceId: row?.traceId ?? null, rowIndex: row?.index ?? null, rowTextPreview: limitText(rowTextRaw, 180) }); + } + } + return rows; +} + +function detectTurnTraceIdMissing(samples) { + const rows = []; + for (const sample of samples) { + for (const item of Array.isArray(sample?.turns) ? sample.turns : []) { + const text = [item?.status, item?.durationText, item?.activityText, item?.textPreview, item?.text].filter(Boolean).join("\n"); + if (!/(?:Code Agent|运行记录|耗时|最近)/iu.test(text)) continue; + if (item?.traceId) continue; + rows.push({ ...ref(sample), status: item?.status ?? null, messageId: item?.messageId ?? null, textPreview: limitText(item?.textPreview || item?.text || "", 180) }); + } + } + return rows; +} + +function visibleTraceIds(sample) { + const ids = new Set(); + for (const group of [sample?.turns, sample?.messages, sample?.traceRows]) { + if (!Array.isArray(group)) continue; + for (const item of group) if (item?.traceId) ids.add(String(item.traceId)); + } + return ids; +} + +function digestMessageTexts(sample) { + return sha256((Array.isArray(sample?.messages) ? sample.messages : []).map((item) => item?.textHash || normalizedText(item?.textPreview || item?.text || "")).join("|")); +} + +function normalizedText(value) { + return String(value || "").replace(/\s+/gu, " ").trim(); +} + +function longestSharedSubstringLength(a, b) { + if (!a || !b) return 0; + const left = a.length <= b.length ? a : b; + const right = a.length <= b.length ? b : a; + const max = Math.min(left.length, 280); + let best = 0; + for (let start = 0; start < max; start += 1) { + for (let end = Math.min(max, start + 160); end > start + best; end -= 1) { + if (right.includes(left.slice(start, end))) { + best = end - start; + break; + } + } + } + return best; +} + function digestSample(sample) { const messages = Array.isArray(sample.messages) ? sample.messages.map((item) => item.textHash || item.textPreview || "").join("|") : ""; const trace = Array.isArray(sample.traceRows) ? sample.traceRows.map((item) => (item.status || "") + ":" + (item.textHash || item.textPreview || "")).join("|") : ""; @@ -3608,7 +3810,7 @@ function sampleRefs(samples, pick) { function ref(sample) { if (!sample) return null; - return { seq: sample.seq ?? null, ts: sample.ts ?? null, url: sample.url ?? null, routeSessionId: sample.routeSessionId ?? null, activeSessionId: sample.activeSessionId ?? null }; + return { seq: sample.seq ?? null, sampleGroupSeq: sample.sampleGroupSeq ?? null, ts: sample.ts ?? null, pageRole: sample.pageRole ?? null, pageId: sample.pageId ?? null, url: sample.url ?? null, routeSessionId: sample.routeSessionId ?? null, activeSessionId: sample.activeSessionId ?? null }; } async function artifactSummary(artifacts) { @@ -3618,12 +3820,12 @@ async function artifactSummary(artifacts) { function compactManifest(value) { if (!value) return null; - return { jobId: value.jobId, stateDir: value.stateDir, baseUrl: value.baseUrl, targetPath: value.targetPath, startedAt: value.startedAt, status: value.status, sampling: value.sampling, pageProvenance: value.pageProvenance ?? null, safety: value.safety }; + return { jobId: value.jobId, stateDir: value.stateDir, baseUrl: value.baseUrl, targetPath: value.targetPath, startedAt: value.startedAt, status: value.status, pageAuthority: value.pageAuthority ?? null, sampling: value.sampling, pageProvenance: value.pageProvenance ?? null, safety: value.safety }; } function compactHeartbeat(value) { if (!value) return null; - return { jobId: value.jobId, pid: value.pid, status: value.status, sampleSeq: value.sampleSeq, commandSeq: value.commandSeq, currentUrl: value.currentUrl, pageProvenance: value.pageProvenance ?? null, updatedAt: value.updatedAt, uptimeMs: value.uptimeMs }; + return { jobId: value.jobId, pid: value.pid, status: value.status, sampleSeq: value.sampleSeq, commandSeq: value.commandSeq, pageId: value.pageId ?? null, observerPageId: value.observerPageId ?? null, currentUrl: value.currentUrl, observerUrl: value.observerUrl ?? null, pageProvenance: value.pageProvenance ?? null, updatedAt: value.updatedAt, uptimeMs: value.uptimeMs }; } function renderTurnTimingTable(sampleMetrics) {