feat: add dual-page web-probe observer analysis (#668)
Co-authored-by: Codex <codex@noreply.local>
This commit is contained in:
@@ -47,7 +47,7 @@ web-probe 入口分三类:
|
||||
|
||||
- `run`:repo-owned 标准 DOM probe,适合固定 P4 验收和已有脚本。
|
||||
- `script`:受控 Playwright 托管脚本,适合一轮 55 秒内完成的 DOM/API 断言、截图、route/intercept 和边界采样。
|
||||
- `observe`:纯客户端长程观测,适合同一 Workbench session 多轮任务、realtime/projection 问题、长时间 trace/DOM/network 采样和无副作用报告生成。
|
||||
- `observe`:纯客户端长程观测,适合同一 Workbench session 多轮任务、realtime/projection 问题、长时间 trace/DOM/network 采样和无副作用报告生成。长程 Workbench 观测默认同时打开两个浏览器页面:control 页面只执行显式 `observe command` 用户动作,observer 页面只打开同一个 session 做被动观察,用来抓多用户/多页面下同一 session 的投影差异、历史 trace 丢失、耗时跳变和 loading 差异。
|
||||
|
||||
需要 Playwright route/intercept、延迟 API、读取 in-flight DOM 或截图时仍使用受控 `web-probe script`,不要裸写 Playwright:
|
||||
|
||||
@@ -94,7 +94,7 @@ bun scripts/cli.ts hwlab nodes web-probe observe analyze webobs-xxxx
|
||||
约束:
|
||||
|
||||
- `web-probe script` 不运行默认探针,必须通过 stdin heredoc 或 `--script-file <path>` 提供脚本;只需要 repo-owned 标准 DOM probe 时使用 `web-probe run`。
|
||||
- `web-probe observe start` 默认是被动观测:记录 DOM 摘要、自然页面 request/response/requestfailed、截图和 performance 样本,不主动 fetch Workbench API、不 reload、不切换 session、不拦截路由、不调用 repair helper。任何 `newSession`、`selectProvider`、`sendPrompt`、`goto`、`screenshot`、`mark`、`stop` 都必须通过 `observe command` 显式下发,并进入 `control.jsonl`。
|
||||
- `web-probe observe start` 默认是被动观测:记录 DOM 摘要、自然页面 request/response/requestfailed、截图和 performance 样本,不主动 fetch Workbench API、不 reload、不切换 session、不拦截路由、不调用 repair helper。长程 Workbench 观测必须保留 control/observer 双页面模型:control 页面执行显式 command,observer 页面只同步到同一 session URL 后被动采样;两页的 `pageRole`、`pageId`、`sampleGroupSeq` 必须进入样本和 analyzer 报表。任何 `newSession`、`selectProvider`、`sendPrompt`、`goto`、`screenshot`、`mark`、`stop` 都必须通过 `observe command` 显式下发,并进入 `control.jsonl`。
|
||||
- `web-probe observe` 的 issue evidence 优先记录 observer id、stateDir、report JSON/Markdown SHA、samples/control/network/artifact 计数、routeSessionId、activeSessionId、prompt hash/textBytes、traceId、AgentRun runId/commandId、最终 status 和必要摘要;不要把 prompt 原文、assistant 大段正文、完整 stdout/stderr 或 provider payload 粘贴到 issue。
|
||||
- 多轮 Workbench 采样必须证明同一个 `sessionId` 连续承载所有轮次;每轮至少记录 prompt hash、traceId、终态、最终回答摘要和性能/产物表。若 Web UI 投影卡住但 Code Agent/AgentRun result 已 terminal,应同时登记“执行终态”和“Workbench 投影未收敛”,不得用 `goto`、reload、切 session 或 result polling 把 UI 失败伪装成通过。
|
||||
- `observe analyze` 是离线分析,只读取 artifact JSONL 并写 `analysis/report.md` 与 `analysis/report.json`,不访问 Workbench API、不驱动浏览器。报告必须输出采样点 vs 每个 turn 的总耗时/最近更新时间表、可见“加载中”的数量/归属/并发 owner/连续出现区间、DOM diagnostic/HTTP/console/requestfailed/runtime execution error 分组、page asset provenance segment、同源 API Resource Timing 分位表和超过 5s 的慢路径 finding;页面/API 加载超过 5s 视为不可用级性能红线,可见“加载中”持续超过 5s 也必须作为真实慢加载证据登记到上游问题。修复必须降低真实请求、投影、渲染或后端路径耗时,禁止为了减少“加载中”出现时间而提前展示未加载完的内容,也不能靠下游 retry/reload/fallback 掩盖。报告里的 `final-response-flicker`、`uncommanded-visible-state-change`、session changed、network 503 等 finding 是排障线索;用于 closeout 时必须结合原始 session/trace/DOM 证据解释,避免把采样噪声直接当作业务结论。
|
||||
|
||||
@@ -23,7 +23,8 @@ const maxSamples = positiveInteger(process.env.UNIDESK_WEB_OBSERVE_MAX_SAMPLES,
|
||||
const viewport = parseViewport(process.env.UNIDESK_WEB_OBSERVE_VIEWPORT || "1440x900");
|
||||
const playwrightProxy = proxyConfigFromEnv(baseUrl);
|
||||
const chromiumLaunchOptions = chromiumLaunchOptionsForProxy(playwrightProxy);
|
||||
const pageId = "page-" + randomBytes(4).toString("hex");
|
||||
const pageId = "control-" + randomBytes(4).toString("hex");
|
||||
const observerPageId = "observer-" + randomBytes(4).toString("hex");
|
||||
const dirs = {
|
||||
commandsPending: path.join(stateDir, "commands", "pending"),
|
||||
commandsProcessing: path.join(stateDir, "commands", "processing"),
|
||||
@@ -46,6 +47,7 @@ const files = {
|
||||
let browser;
|
||||
let context;
|
||||
let page;
|
||||
let observerPage;
|
||||
let sampleSeq = 0;
|
||||
let commandSeq = 0;
|
||||
let artifactSeq = 0;
|
||||
@@ -68,8 +70,11 @@ try {
|
||||
context = await browser.newContext({ viewport, ...(playwrightProxy === null ? {} : { proxy: playwrightProxy }) });
|
||||
auth = await runControlCommand({ id: "startup-login", type: "login", createdAt: startedAt, source: "startup" }, async () => authenticate(context));
|
||||
page = await context.newPage();
|
||||
attachPassiveListeners(page);
|
||||
attachPassiveListeners(page, "control", pageId);
|
||||
await runControlCommand({ id: "startup-goto", type: "goto", path: targetPath, createdAt: new Date().toISOString(), source: "startup" }, async () => gotoTarget(targetPath));
|
||||
observerPage = await context.newPage();
|
||||
attachPassiveListeners(observerPage, "observer", observerPageId);
|
||||
await runControlCommand({ id: "startup-observer-goto", type: "observerGoto", path: targetPath, createdAt: new Date().toISOString(), source: "startup" }, async () => syncObserverPageToControlSession("startup"));
|
||||
terminalStatus = "running";
|
||||
await writeManifest({ status: "running", auth: publicAuth(auth) });
|
||||
await writeHeartbeat({ status: "running" });
|
||||
@@ -112,7 +117,7 @@ async function writeManifest(extra = {}) {
|
||||
baseUrl,
|
||||
targetPath,
|
||||
network: publicNetwork(playwrightProxy),
|
||||
pageAuthority: { browser: "chromium", context: "single", pageId, continuityBreaksRecorded: true },
|
||||
pageAuthority: { browser: "chromium", context: "shared-auth", pageMode: "dual-control-observer", controlPageId: pageId, observerPageId, continuityBreaksRecorded: true },
|
||||
pageProvenance: compactPageProvenance(currentPageProvenance),
|
||||
sampling: { mode: "passive", sampleIntervalMs, screenshotIntervalMs, maxSamples, observerInitiatedDefault: false, responseBodyReadDefault: false },
|
||||
commandDirs: dirs,
|
||||
@@ -132,8 +137,10 @@ async function writeHeartbeat(extra = {}) {
|
||||
stateDir,
|
||||
status: terminalStatus,
|
||||
pageId,
|
||||
observerPageId,
|
||||
baseUrl,
|
||||
currentUrl: currentPageUrl(),
|
||||
observerUrl: pageUrl(observerPage),
|
||||
pageProvenance: compactPageProvenance(currentPageProvenance),
|
||||
sampleSeq,
|
||||
commandSeq,
|
||||
@@ -145,9 +152,11 @@ async function writeHeartbeat(extra = {}) {
|
||||
await writeFile(files.heartbeat, JSON.stringify(heartbeat, null, 2) + "\n", { mode: 0o600 });
|
||||
}
|
||||
|
||||
function attachPassiveListeners(targetPage) {
|
||||
function attachPassiveListeners(targetPage, pageRole = "control", targetPageId = pageId) {
|
||||
targetPage.on("request", (request) => {
|
||||
void appendJsonl(files.network, eventRecord("request", {
|
||||
pageRole,
|
||||
pageId: targetPageId,
|
||||
observerInitiated: false,
|
||||
commandId: activeCommandId,
|
||||
method: request.method(),
|
||||
@@ -159,6 +168,8 @@ function attachPassiveListeners(targetPage) {
|
||||
targetPage.on("response", (response) => {
|
||||
const request = response.request();
|
||||
void appendJsonl(files.network, eventRecord("response", {
|
||||
pageRole,
|
||||
pageId: targetPageId,
|
||||
observerInitiated: false,
|
||||
commandId: activeCommandId,
|
||||
method: request.method(),
|
||||
@@ -172,6 +183,8 @@ function attachPassiveListeners(targetPage) {
|
||||
});
|
||||
targetPage.on("requestfailed", (request) => {
|
||||
void appendJsonl(files.network, eventRecord("requestfailed", {
|
||||
pageRole,
|
||||
pageId: targetPageId,
|
||||
observerInitiated: false,
|
||||
commandId: activeCommandId,
|
||||
method: request.method(),
|
||||
@@ -181,16 +194,16 @@ function attachPassiveListeners(targetPage) {
|
||||
}));
|
||||
});
|
||||
targetPage.on("console", (message) => {
|
||||
void appendJsonl(files.console, eventRecord("console", { type: message.type(), text: truncate(message.text(), 1000), location: message.location() }));
|
||||
void appendJsonl(files.console, eventRecord("console", { pageRole, pageId: targetPageId, type: message.type(), text: truncate(message.text(), 1000), location: message.location() }));
|
||||
});
|
||||
targetPage.on("pageerror", (error) => {
|
||||
void appendJsonl(files.errors, eventRecord("pageerror", { error: errorSummary(error) }));
|
||||
void appendJsonl(files.errors, eventRecord("pageerror", { pageRole, pageId: targetPageId, error: errorSummary(error) }));
|
||||
});
|
||||
targetPage.on("crash", () => {
|
||||
void appendJsonl(files.errors, eventRecord("page-crash", { pageId }));
|
||||
void appendJsonl(files.errors, eventRecord("page-crash", { pageRole, pageId: targetPageId }));
|
||||
});
|
||||
targetPage.on("close", () => {
|
||||
void appendJsonl(files.control, eventRecord("continuity-break", { pageId, reason: "page-closed" }));
|
||||
void appendJsonl(files.control, eventRecord("continuity-break", { pageRole, pageId: targetPageId, reason: "page-closed" }));
|
||||
});
|
||||
}
|
||||
|
||||
@@ -230,11 +243,11 @@ async function processCommand(command) {
|
||||
switch (command.type) {
|
||||
case "login": return authenticate(context);
|
||||
case "preflight": return preflightSummary();
|
||||
case "goto": return gotoTarget(command.path || command.url || targetPath);
|
||||
case "newSession": return createSessionFromUi();
|
||||
case "sendPrompt": return sendPrompt(String(command.text || ""));
|
||||
case "selectProvider": return selectProvider(String(command.provider || command.value || command.text || ""));
|
||||
case "clickSession": return clickSession(String(command.sessionId || command.value || ""));
|
||||
case "goto": return withObserverSync(await gotoTarget(command.path || command.url || targetPath), "goto");
|
||||
case "newSession": return withObserverSync(await createSessionFromUi(), "newSession");
|
||||
case "sendPrompt": return withObserverSync(await sendPrompt(String(command.text || "")), "sendPrompt");
|
||||
case "selectProvider": return withObserverSync(await selectProvider(String(command.provider || command.value || command.text || "")), "selectProvider");
|
||||
case "clickSession": return withObserverSync(await clickSession(String(command.sessionId || command.value || "")), "clickSession");
|
||||
case "screenshot": return captureScreenshot(command.reason || "manual", command.imageType || "png");
|
||||
case "mark": return { mark: truncate(command.label || command.text || "mark", 200), currentUrl: currentPageUrl(), pageId };
|
||||
case "stop": stopping = true; return { stopping: true, currentUrl: currentPageUrl(), pageId };
|
||||
@@ -242,6 +255,28 @@ async function processCommand(command) {
|
||||
}
|
||||
}
|
||||
|
||||
async function withObserverSync(result, reason) {
|
||||
return { ...result, observer: await syncObserverPageToControlSession(reason, result?.sessionId ?? null) };
|
||||
}
|
||||
|
||||
async function syncObserverPageToControlSession(reason, explicitSessionId = null) {
|
||||
if (!observerPage || observerPage.isClosed()) return { ok: false, reason, pageRole: "observer", pageId: observerPageId, failureKind: "observer-page-unavailable" };
|
||||
const snapshot = await workbenchSessionSnapshot();
|
||||
const sessionId = explicitSessionId || snapshot?.activeSessionId || snapshot?.routeSessionId || routeSessionIdFromUrl(currentPageUrl());
|
||||
const target = sessionId ? "/workbench/sessions/" + encodeURIComponent(sessionId) : targetPath;
|
||||
const beforeUrl = pageUrl(observerPage);
|
||||
const beforeSessionId = routeSessionIdFromUrl(beforeUrl);
|
||||
if (sessionId && beforeSessionId === sessionId) return { ok: true, reason, changed: false, sessionId, beforeUrl, afterUrl: beforeUrl, pageRole: "observer", pageId: observerPageId };
|
||||
let status = null;
|
||||
let statusText = null;
|
||||
const response = await observerPage.goto(new URL(target, baseUrl).toString(), { waitUntil: "domcontentloaded", timeout: 45000 }).catch((error) => ({ observerGotoError: errorSummary(error) }));
|
||||
if (response?.observerGotoError) return { ok: false, reason, changed: false, sessionId: sessionId ?? null, targetPath: target, beforeUrl, afterUrl: pageUrl(observerPage), pageRole: "observer", pageId: observerPageId, error: response.observerGotoError, valuesRedacted: true };
|
||||
status = typeof response?.status === "function" ? response.status() : null;
|
||||
statusText = typeof response?.statusText === "function" ? response.statusText() : null;
|
||||
await observerPage.waitForTimeout(1000);
|
||||
return { ok: true, reason, changed: true, sessionId: sessionId ?? null, targetPath: target, beforeUrl, afterUrl: pageUrl(observerPage), pageRole: "observer", pageId: observerPageId, httpStatus: status, statusText, valuesRedacted: true };
|
||||
}
|
||||
|
||||
async function runControlCommand(command, fn) {
|
||||
activeCommandId = command.id;
|
||||
commandSeq += 1;
|
||||
@@ -883,9 +918,20 @@ async function preflightSummary() {
|
||||
}
|
||||
|
||||
async function samplePage(reason) {
|
||||
if (!page || page.isClosed()) return;
|
||||
const groupSeq = sampleSeq + 1;
|
||||
if (page && !page.isClosed()) await sampleOnePage(page, { reason, groupSeq, pageRole: "control", targetPageId: pageId });
|
||||
if (observerPage && !observerPage.isClosed()) {
|
||||
await sampleOnePage(observerPage, { reason, groupSeq, pageRole: "observer", targetPageId: observerPageId }).catch((error) => appendJsonl(files.errors, eventRecord("observer-sample-error", { pageRole: "observer", pageId: observerPageId, error: errorSummary(error) })));
|
||||
}
|
||||
if (screenshotIntervalMs > 0 && Date.now() - lastScreenshotAtMs >= screenshotIntervalMs) {
|
||||
await captureScreenshot("checkpoint", "jpeg").catch((error) => appendJsonl(files.errors, eventRecord("screenshot-error", { pageRole: "control", pageId, error: errorSummary(error) })));
|
||||
}
|
||||
await writeHeartbeat({ status: terminalStatus });
|
||||
}
|
||||
|
||||
async function sampleOnePage(targetPage, { reason, groupSeq, pageRole, targetPageId }) {
|
||||
sampleSeq += 1;
|
||||
const dom = await page.evaluate(() => {
|
||||
const dom = await targetPage.evaluate(() => {
|
||||
const trim = (value, limit = 500) => String(value || "").replace(/\s+/g, " ").trim().slice(0, limit);
|
||||
const visible = (element) => {
|
||||
const rect = element.getBoundingClientRect();
|
||||
@@ -1158,24 +1204,22 @@ async function samplePage(reason) {
|
||||
},
|
||||
performance: performance.getEntriesByType("resource").slice(-80).map(resourceTimingSample),
|
||||
};
|
||||
}).catch((error) => ({ error: errorSummary(error), url: currentPageUrl() }));
|
||||
}).catch((error) => ({ error: errorSummary(error), url: pageUrl(targetPage) }));
|
||||
const sample = {
|
||||
seq: sampleSeq,
|
||||
sampleGroupSeq: groupSeq,
|
||||
ts: new Date().toISOString(),
|
||||
reason,
|
||||
pageId,
|
||||
pageRole,
|
||||
pageId: targetPageId,
|
||||
commandId: activeCommandId,
|
||||
observerInitiated: false,
|
||||
...digestDom(dom),
|
||||
...digestDom(dom, pageRole),
|
||||
};
|
||||
await appendJsonl(files.samples, sample);
|
||||
if (screenshotIntervalMs > 0 && Date.now() - lastScreenshotAtMs >= screenshotIntervalMs) {
|
||||
await captureScreenshot("checkpoint", "jpeg").catch((error) => appendJsonl(files.errors, eventRecord("screenshot-error", { error: errorSummary(error) })));
|
||||
}
|
||||
await writeHeartbeat({ status: terminalStatus });
|
||||
}
|
||||
|
||||
function digestDom(dom) {
|
||||
function digestDom(dom, pageRole = "control") {
|
||||
if (dom && dom.error) return dom;
|
||||
const messages = Array.isArray(dom.messages) ? dom.messages.map((item) => ({ ...item, textHash: sha256Text(item.text || ""), textPreview: truncate(item.text || "", 160), textBytes: Buffer.byteLength(item.text || "") })) : [];
|
||||
const traceRows = Array.isArray(dom.traceRows) ? dom.traceRows.map((item) => ({ ...item, textHash: sha256Text(item.text || ""), textPreview: truncate(item.text || "", 160), textBytes: Buffer.byteLength(item.text || "") })) : [];
|
||||
@@ -1183,7 +1227,7 @@ function digestDom(dom) {
|
||||
const diagnostics = Array.isArray(dom.diagnostics) ? dom.diagnostics.map((item) => ({ ...item, textHash: sha256Text(item.text || ""), textPreview: truncate(item.text || "", 260), textBytes: Buffer.byteLength(item.text || "") })) : [];
|
||||
const turns = Array.isArray(dom.turns) ? dom.turns.map((item) => ({ ...item, textHash: sha256Text(item.text || ""), textPreview: truncate(item.text || "", 200), textBytes: Buffer.byteLength(item.text || "") })) : [];
|
||||
const pageProvenance = normalizePageProvenance(dom.pageProvenance, { reason: "sample", pageLoadSeq: currentPageProvenance?.pageLoadSeq ?? pageLoadSeq });
|
||||
currentPageProvenance = pageProvenance;
|
||||
if (pageRole === "control") currentPageProvenance = pageProvenance;
|
||||
return { ...dom, messages, traceRows, loadings, diagnostics, turns, pageProvenance: compactPageProvenance(pageProvenance) };
|
||||
}
|
||||
|
||||
@@ -1204,7 +1248,8 @@ async function captureScreenshot(reason, imageType = "png") {
|
||||
}
|
||||
|
||||
function eventRecord(type, data) {
|
||||
return { ts: new Date().toISOString(), type, jobId, pageId, sampleSeq, commandId: activeCommandId, ...sanitize(data) };
|
||||
const clean = sanitize(data) || {};
|
||||
return { ts: new Date().toISOString(), type, jobId, pageId: clean.pageId ?? pageId, pageRole: clean.pageRole ?? "control", sampleSeq, commandId: activeCommandId, ...clean };
|
||||
}
|
||||
|
||||
function controlRecord(command, phase, detail) {
|
||||
@@ -1249,7 +1294,21 @@ async function fileMeta(file) {
|
||||
}
|
||||
|
||||
function currentPageUrl() {
|
||||
try { return page && !page.isClosed() ? page.url() : null; } catch { return null; }
|
||||
return pageUrl(page);
|
||||
}
|
||||
|
||||
function pageUrl(targetPage) {
|
||||
try { return targetPage && !targetPage.isClosed() ? targetPage.url() : null; } catch { return null; }
|
||||
}
|
||||
|
||||
function routeSessionIdFromUrl(value) {
|
||||
try {
|
||||
const pathname = new URL(String(value || ""), baseUrl).pathname;
|
||||
const match = pathname.match(/\/workbench\/sessions\/([^/?#]+)/u);
|
||||
return match ? decodeURIComponent(match[1] || "") : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function safeFrameUrl(frame) {
|
||||
@@ -1575,7 +1634,9 @@ function compactSampleForAnalysis(sample) {
|
||||
seq: sample.seq ?? null,
|
||||
ts: sample.ts ?? null,
|
||||
reason: sample.reason ?? null,
|
||||
sampleGroupSeq: sample.sampleGroupSeq ?? null,
|
||||
pageId: sample.pageId ?? null,
|
||||
pageRole: sample.pageRole ?? null,
|
||||
commandId: sample.commandId ?? null,
|
||||
observerInitiated: sample.observerInitiated ?? null,
|
||||
url: sample.url ?? null,
|
||||
@@ -1736,6 +1797,8 @@ function buildFindings(samples, control, network, errors, sampleMetrics, promptN
|
||||
if (uncommandedChanges.length > 0) findings.push({ id: "uncommanded-visible-state-change", severity: "amber", summary: "visible message/trace digest changed without a nearby command", count: uncommandedChanges.length, samples: uncommandedChanges.slice(0, 20) });
|
||||
const finalFlicker = detectFinalFlicker(samples);
|
||||
if (finalFlicker.length > 0) findings.push({ id: "final-response-flicker", severity: "red", summary: "message text digest disappeared or switched to diagnostic-like text after non-empty final text", count: finalFlicker.length, samples: finalFlicker.slice(0, 20) });
|
||||
const terminalZeroElapsed = detectTerminalZeroElapsed(samples);
|
||||
if (terminalZeroElapsed.length > 0) findings.push({ id: "turn-terminal-zero-elapsed", severity: "red", summary: "terminal Code Agent card displayed 耗时 0 秒; terminal duration must come from durable timing projection, not a missing/zero display fallback", count: terminalZeroElapsed.length, samples: terminalZeroElapsed.slice(0, 20) });
|
||||
const scrollJumps = [];
|
||||
for (let i = 1; i < samples.length; i += 1) {
|
||||
const prevY = Number(samples[i - 1]?.scroll?.y ?? 0);
|
||||
@@ -1766,6 +1829,12 @@ function buildFindings(samples, control, network, errors, sampleMetrics, promptN
|
||||
if ((runtimeAlerts?.summary?.domDiagnosticSampleCount ?? 0) > 0) findings.push({ id: "runtime-dom-diagnostics", severity: "amber", summary: "diagnostic/error/warning-like text was visible in sampled DOM", count: runtimeAlerts.summary.domDiagnosticSampleCount, groupCount: runtimeAlerts.summary.domDiagnosticGroupCount ?? 0, groups: runtimeAlerts.domDiagnosticsByText.slice(0, 12), samples: runtimeAlerts.domDiagnostics.slice(0, 12) });
|
||||
if ((runtimeAlerts?.summary?.executionErrorCount ?? 0) > 0) findings.push({ id: "runtime-execution-errors", severity: "red", summary: "Workbench rendered execution failure/error rows during observation", count: runtimeAlerts.summary.executionErrorCount, groups: runtimeAlerts.runtimeExecutionErrorsByCode.slice(0, 12) });
|
||||
if ((runtimeAlerts?.summary?.consoleAlertCount ?? 0) > 0) findings.push({ id: "runtime-console-alerts", severity: "amber", summary: "browser console warning/error entries were captured during observation", count: runtimeAlerts.summary.consoleAlertCount, groups: runtimeAlerts.consoleAlertsByPath.slice(0, 12) });
|
||||
const crossPageProjectionDiffs = detectCrossPageProjectionDiffs(samples);
|
||||
if (crossPageProjectionDiffs.length > 0) findings.push({ id: "cross-page-projection-divergence", severity: "red", summary: "control and observer pages saw different projection state for the same sampled session", count: crossPageProjectionDiffs.length, samples: crossPageProjectionDiffs.slice(0, 20) });
|
||||
const traceMessageDuplicates = detectTraceMessageDuplication(samples);
|
||||
if (traceMessageDuplicates.length > 0) findings.push({ id: "trace-assistant-message-duplicates-final-response", severity: "amber", summary: "trace rendered assistant message rows that duplicate the sealed final response", count: traceMessageDuplicates.length, samples: traceMessageDuplicates.slice(0, 20) });
|
||||
const turnTraceMissing = detectTurnTraceIdMissing(samples);
|
||||
if (turnTraceMissing.length > 0) findings.push({ id: "turn-trace-id-missing", severity: "red", summary: "Code Agent turn/card was visible without a trace id, so historical trace hydration cannot be reliable", count: turnTraceMissing.length, samples: turnTraceMissing.slice(0, 20) });
|
||||
const slowApi = Array.isArray(pagePerformance?.sameOriginApiByPath) ? pagePerformance.sameOriginApiByPath.filter((item) => item.overFiveSecondCount > 0) : [];
|
||||
if (slowApi.length > 0) findings.push({ id: "page-performance-slow-same-origin-api", severity: "red", summary: "same-origin API resource timing exceeded 5s usability budget", count: slowApi.length, groups: slowApi.slice(0, 20) });
|
||||
const longLivedStreams = Array.isArray(pagePerformance?.sameOriginApiByPath) ? pagePerformance.sameOriginApiByPath.filter((item) => item.isLongLivedStream) : [];
|
||||
@@ -3519,7 +3588,7 @@ function parseTotalElapsedSeconds(text) {
|
||||
const hours = Number(match[2] || 0);
|
||||
const minutes = Number(match[3] || 0);
|
||||
const seconds = Number(match[4] || 0);
|
||||
if (days || hours || minutes || seconds) values.push(days * 86400 + hours * 3600 + minutes * 60 + seconds);
|
||||
if (days || hours || minutes || seconds || /(?:天|小时|分钟|分|秒)/u.test(match[0] || "")) values.push(days * 86400 + hours * 3600 + minutes * 60 + seconds);
|
||||
}
|
||||
return values;
|
||||
}
|
||||
@@ -3582,6 +3651,139 @@ function detectFinalFlicker(samples) {
|
||||
return flickers;
|
||||
}
|
||||
|
||||
function detectTerminalZeroElapsed(samples) {
|
||||
const rows = [];
|
||||
for (const sample of samples) {
|
||||
const turns = Array.isArray(sample?.turns) ? sample.turns : [];
|
||||
const messages = Array.isArray(sample?.messages) ? sample.messages : [];
|
||||
for (const item of [...turns, ...messages]) {
|
||||
const text = [item?.durationText, item?.activityText, item?.status, item?.textPreview, item?.text].filter(Boolean).join("\n");
|
||||
if (!/(?:Code Agent|运行记录|completed|failed|canceled|blocked)/iu.test(text)) continue;
|
||||
if (!/(?:completed|failed|canceled|blocked|已完成|失败|取消)/iu.test(text)) continue;
|
||||
const elapsedValues = parseTotalElapsedSeconds(text);
|
||||
if (!elapsedValues.includes(0)) continue;
|
||||
rows.push({
|
||||
...ref(sample),
|
||||
status: item?.status ?? null,
|
||||
messageId: item?.messageId ?? null,
|
||||
traceId: item?.traceId ?? null,
|
||||
durationText: item?.durationText ?? null,
|
||||
activityText: item?.activityText ?? null,
|
||||
textPreview: limitText(item?.textPreview || item?.text || "", 180)
|
||||
});
|
||||
}
|
||||
}
|
||||
return rows;
|
||||
}
|
||||
|
||||
function detectCrossPageProjectionDiffs(samples) {
|
||||
const groups = new Map();
|
||||
for (const sample of samples) {
|
||||
const key = sample?.sampleGroupSeq ?? sample?.seq;
|
||||
if (key === null || key === undefined) continue;
|
||||
const group = groups.get(key) || {};
|
||||
if (sample?.pageRole === "control") group.control = sample;
|
||||
else if (sample?.pageRole === "observer") group.observer = sample;
|
||||
groups.set(key, group);
|
||||
}
|
||||
const rows = [];
|
||||
for (const [sampleGroupSeq, group] of groups.entries()) {
|
||||
const control = group.control;
|
||||
const observer = group.observer;
|
||||
if (!control || !observer) continue;
|
||||
const controlTraceIds = visibleTraceIds(control);
|
||||
const observerTraceIds = visibleTraceIds(observer);
|
||||
const missingInObserver = [...controlTraceIds].filter((item) => !observerTraceIds.has(item));
|
||||
const controlMessages = Array.isArray(control.messages) ? control.messages.length : 0;
|
||||
const observerMessages = Array.isArray(observer.messages) ? observer.messages.length : 0;
|
||||
const controlTraceRows = Array.isArray(control.traceRows) ? control.traceRows.length : 0;
|
||||
const observerTraceRows = Array.isArray(observer.traceRows) ? observer.traceRows.length : 0;
|
||||
const controlZero = detectTerminalZeroElapsed([control]).length > 0;
|
||||
const observerZero = detectTerminalZeroElapsed([observer]).length > 0;
|
||||
const sameSession = (control.routeSessionId || control.activeSessionId || null) && (control.routeSessionId || control.activeSessionId || null) === (observer.routeSessionId || observer.activeSessionId || null);
|
||||
const divergent = sameSession && (missingInObserver.length > 0 || Math.abs(controlMessages - observerMessages) > 0 || Math.abs(controlTraceRows - observerTraceRows) > 0 || controlZero !== observerZero || digestMessageTexts(control) !== digestMessageTexts(observer));
|
||||
if (!divergent) continue;
|
||||
rows.push({
|
||||
sampleGroupSeq,
|
||||
control: ref(control),
|
||||
observer: ref(observer),
|
||||
missingTraceIdsInObserver: missingInObserver.slice(0, 8),
|
||||
controlMessageCount: controlMessages,
|
||||
observerMessageCount: observerMessages,
|
||||
controlTraceRowCount: controlTraceRows,
|
||||
observerTraceRowCount: observerTraceRows,
|
||||
terminalZeroElapsedDiff: controlZero !== observerZero
|
||||
});
|
||||
}
|
||||
return rows;
|
||||
}
|
||||
|
||||
function detectTraceMessageDuplication(samples) {
|
||||
const rows = [];
|
||||
for (const sample of samples) {
|
||||
const finalText = normalizedText((Array.isArray(sample?.messages) ? sample.messages : []).map((item) => item?.textPreview || item?.text || "").join("\n"));
|
||||
if (finalText.length < 40) continue;
|
||||
const traceRows = Array.isArray(sample?.traceRows) ? sample.traceRows : [];
|
||||
for (const row of traceRows) {
|
||||
const rowTextRaw = String(row?.textPreview || row?.text || "");
|
||||
if (!/(?:助手消息|assistant\s+message|assistant)/iu.test(rowTextRaw)) continue;
|
||||
const rowText = normalizedText(rowTextRaw);
|
||||
if (rowText.length < 24) continue;
|
||||
const overlap = longestSharedSubstringLength(finalText, rowText);
|
||||
if (overlap < 40 && overlap < Math.min(rowText.length, finalText.length) * 0.55) continue;
|
||||
rows.push({ ...ref(sample), traceId: row?.traceId ?? null, rowIndex: row?.index ?? null, rowTextPreview: limitText(rowTextRaw, 180) });
|
||||
}
|
||||
}
|
||||
return rows;
|
||||
}
|
||||
|
||||
function detectTurnTraceIdMissing(samples) {
|
||||
const rows = [];
|
||||
for (const sample of samples) {
|
||||
for (const item of Array.isArray(sample?.turns) ? sample.turns : []) {
|
||||
const text = [item?.status, item?.durationText, item?.activityText, item?.textPreview, item?.text].filter(Boolean).join("\n");
|
||||
if (!/(?:Code Agent|运行记录|耗时|最近)/iu.test(text)) continue;
|
||||
if (item?.traceId) continue;
|
||||
rows.push({ ...ref(sample), status: item?.status ?? null, messageId: item?.messageId ?? null, textPreview: limitText(item?.textPreview || item?.text || "", 180) });
|
||||
}
|
||||
}
|
||||
return rows;
|
||||
}
|
||||
|
||||
function visibleTraceIds(sample) {
|
||||
const ids = new Set();
|
||||
for (const group of [sample?.turns, sample?.messages, sample?.traceRows]) {
|
||||
if (!Array.isArray(group)) continue;
|
||||
for (const item of group) if (item?.traceId) ids.add(String(item.traceId));
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
function digestMessageTexts(sample) {
|
||||
return sha256((Array.isArray(sample?.messages) ? sample.messages : []).map((item) => item?.textHash || normalizedText(item?.textPreview || item?.text || "")).join("|"));
|
||||
}
|
||||
|
||||
function normalizedText(value) {
|
||||
return String(value || "").replace(/\s+/gu, " ").trim();
|
||||
}
|
||||
|
||||
function longestSharedSubstringLength(a, b) {
|
||||
if (!a || !b) return 0;
|
||||
const left = a.length <= b.length ? a : b;
|
||||
const right = a.length <= b.length ? b : a;
|
||||
const max = Math.min(left.length, 280);
|
||||
let best = 0;
|
||||
for (let start = 0; start < max; start += 1) {
|
||||
for (let end = Math.min(max, start + 160); end > start + best; end -= 1) {
|
||||
if (right.includes(left.slice(start, end))) {
|
||||
best = end - start;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
function digestSample(sample) {
|
||||
const messages = Array.isArray(sample.messages) ? sample.messages.map((item) => item.textHash || item.textPreview || "").join("|") : "";
|
||||
const trace = Array.isArray(sample.traceRows) ? sample.traceRows.map((item) => (item.status || "") + ":" + (item.textHash || item.textPreview || "")).join("|") : "";
|
||||
@@ -3608,7 +3810,7 @@ function sampleRefs(samples, pick) {
|
||||
|
||||
function ref(sample) {
|
||||
if (!sample) return null;
|
||||
return { seq: sample.seq ?? null, ts: sample.ts ?? null, url: sample.url ?? null, routeSessionId: sample.routeSessionId ?? null, activeSessionId: sample.activeSessionId ?? null };
|
||||
return { seq: sample.seq ?? null, sampleGroupSeq: sample.sampleGroupSeq ?? null, ts: sample.ts ?? null, pageRole: sample.pageRole ?? null, pageId: sample.pageId ?? null, url: sample.url ?? null, routeSessionId: sample.routeSessionId ?? null, activeSessionId: sample.activeSessionId ?? null };
|
||||
}
|
||||
|
||||
async function artifactSummary(artifacts) {
|
||||
@@ -3618,12 +3820,12 @@ async function artifactSummary(artifacts) {
|
||||
|
||||
function compactManifest(value) {
|
||||
if (!value) return null;
|
||||
return { jobId: value.jobId, stateDir: value.stateDir, baseUrl: value.baseUrl, targetPath: value.targetPath, startedAt: value.startedAt, status: value.status, sampling: value.sampling, pageProvenance: value.pageProvenance ?? null, safety: value.safety };
|
||||
return { jobId: value.jobId, stateDir: value.stateDir, baseUrl: value.baseUrl, targetPath: value.targetPath, startedAt: value.startedAt, status: value.status, pageAuthority: value.pageAuthority ?? null, sampling: value.sampling, pageProvenance: value.pageProvenance ?? null, safety: value.safety };
|
||||
}
|
||||
|
||||
function compactHeartbeat(value) {
|
||||
if (!value) return null;
|
||||
return { jobId: value.jobId, pid: value.pid, status: value.status, sampleSeq: value.sampleSeq, commandSeq: value.commandSeq, currentUrl: value.currentUrl, pageProvenance: value.pageProvenance ?? null, updatedAt: value.updatedAt, uptimeMs: value.uptimeMs };
|
||||
return { jobId: value.jobId, pid: value.pid, status: value.status, sampleSeq: value.sampleSeq, commandSeq: value.commandSeq, pageId: value.pageId ?? null, observerPageId: value.observerPageId ?? null, currentUrl: value.currentUrl, observerUrl: value.observerUrl ?? null, pageProvenance: value.pageProvenance ?? null, updatedAt: value.updatedAt, uptimeMs: value.uptimeMs };
|
||||
}
|
||||
|
||||
function renderTurnTimingTable(sampleMetrics) {
|
||||
|
||||
Reference in New Issue
Block a user