diff --git a/scripts/src/hwlab-node-web-observe-analyzer-source.ts b/scripts/src/hwlab-node-web-observe-analyzer-source.ts index 178b19e1..58baf930 100644 --- a/scripts/src/hwlab-node-web-observe-analyzer-source.ts +++ b/scripts/src/hwlab-node-web-observe-analyzer-source.ts @@ -3039,6 +3039,44 @@ function workbenchTriadMismatchKind(row) { return "unknown"; } +function workbenchTriadRootCauseFromDrilldown(drilldown, summary = {}) { + const groups = Array.isArray(drilldown?.groups) ? drilldown.groups : []; + const hasStaleCompletedRail = groups.some((group) => group?.mismatchKind === "rail-card-status-mismatch" && String(group?.statusTuple || "").includes("rail=completed,card=running,final=false")); + if (hasStaleCompletedRail) return { + rootCause: "workbench_session_rail_status_stale_after_new_running_turn", + rootCauseStatus: "confirmed-from-dom-samples", + rootCauseConfidence: "high", + dominantMismatchKind: "rail-card-status-mismatch", + summary: "Workbench session rail kept the previous completed terminal status while a newer turn card was running and Final Response was absent", + nextAction: "Inspect HWLAB frontend session status authority/reducer, especially workbench-server-state sessionStatusAuthorityFromMessages and SessionRail sessionToSessionTab status input; session rail must derive from the latest active turn/message authority rather than the previous sealed terminal message.", + sourceOfTruth: "latest durable Workbench turn/message projection for the active session", + valuesRedacted: true, + }; + const finalMismatchCount = Number(summary?.cardFinalResponseMismatchCount ?? 0); + const hasFinalMismatch = finalMismatchCount > 0 || groups.some((group) => /final=false/u.test(String(group?.statusTuple || "")) && group?.mismatchKind !== "rail-card-status-mismatch"); + if (hasFinalMismatch) return { + rootCause: "workbench_terminal_final_response_not_sealed", + rootCauseStatus: "confirmed-from-dom-samples", + rootCauseConfidence: "high", + dominantMismatchKind: "completed-card-final-response-absent", + summary: "Workbench terminal turn card did not expose a structured Final Response body", + nextAction: "Inspect HWLAB terminal message/finalResponse projection contract before changing renderer fallback behavior.", + sourceOfTruth: "durable Workbench terminal message projection", + valuesRedacted: true, + }; + const mismatchKinds = Array.isArray(drilldown?.summary?.mismatchKinds) ? drilldown.summary.mismatchKinds : []; + return { + rootCause: "workbench_projection_state_triad_not_sealed", + rootCauseStatus: "confirmed-from-dom-samples", + rootCauseConfidence: "high", + dominantMismatchKind: mismatchKinds[0] ?? "unknown", + summary: "Workbench session rail status, turn card status, and Final Response body presence diverged from the allowed state tuples", + nextAction: "Use drilldown.otelDrilldown.commands for the listed traceIds, then inspect staticSourceHints and add unit tests from unitTestReproHints before changing UI rendering.", + sourceOfTruth: "durable Workbench projection/read model", + valuesRedacted: true, + }; +} + function workbenchTriadTuple(row) { return [ "rail=" + (row?.railStatus ?? "-"), @@ -3082,6 +3120,7 @@ function workbenchTriadStaticSourceHints() { function workbenchTriadUnitTestReproHints() { return [ + "frontend reducer: when a second trace is running in the same session, session rail status must not stay on the previous completed terminal trace", "backend projector: terminal event must produce a single sealed turn tuple consumed by session list, session detail, messages and turn-status APIs", "backend read model: completed rail status must not coexist with running turn card or missing Final Response for the same trace", "frontend server-state merge: stale running/empty snapshots must not overwrite a sealed completed+Final Response turn", @@ -3194,6 +3233,7 @@ function normalizeWorkbenchTriadStatus(status, running = false) { if (running === true) return "running"; if (!value) return null; if (/^(completed|complete|succeeded|success|finished|done|terminal|sealed)$/u.test(value)) return "completed"; + if (/^(failed|failure|error|blocked|timeout|canceled|cancelled|stale|thread-resume-failed|interrupted|expired|idle)$/u.test(value)) return "completed"; if (/^(pending|running|active|busy|admitted|dispatching|executing|streaming|processing|queued|in-progress|creating)$/u.test(value)) return "running"; return null; } @@ -3364,30 +3404,35 @@ function buildFindings(samples, control, network, errors, sampleMetrics, promptN ...(Array.isArray(turnStateTriad.invalidFullTriads) ? turnStateTriad.invalidFullTriads : []), ...(Array.isArray(turnStateTriad.cardFinalResponseMismatches) ? turnStateTriad.cardFinalResponseMismatches : []) ]; - if (Number(turnStateTriadSummary.invalidRowCount ?? 0) > 0) findings.push({ - id: "workbench-turn-state-triad-inconsistent", - severity: "red", - summary: "Workbench session rail status, turn card status, and Final Response body presence diverged from the allowed running/running/absent or completed/completed/present tuples", - count: turnStateTriadSummary.invalidRowCount, - fullTriadCount: turnStateTriadSummary.fullTriadRowCount, - invalidFullTriadCount: turnStateTriadSummary.invalidFullTriadCount, - cardFinalResponseMismatchCount: turnStateTriadSummary.cardFinalResponseMismatchCount, - legacyCollectorMissingCount: turnStateTriadSummary.collectorMissingRowCount, - collectorMissingFields: Array.isArray(turnStateTriadSummary.collectorMissingFields) ? turnStateTriadSummary.collectorMissingFields : [], - allowedTuples: [ - { railStatus: "completed", cardStatus: "completed", finalResponsePresent: true }, - { railStatus: "running", cardStatus: "running", finalResponsePresent: false } - ], - samples: turnStateTriadRows.slice(0, 20), - drilldown: turnStateTriad.drilldown ?? buildWorkbenchTurnStateTriadDrilldown(turnStateTriadRows), - collectorMissingSamples: Array.isArray(turnStateTriad.collectorMissingRows) ? turnStateTriad.collectorMissingRows.slice(0, 10) : [], - sourceOfTruth: "durable Workbench projection/read model; do not repair via DOM fallback or GET-side state mutation", - nextAction: "Use drilldown.otelDrilldown.commands for the listed traceIds, then inspect staticSourceHints and add unit tests from unitTestReproHints before changing UI rendering.", - rootCause: "workbench_projection_state_triad_not_sealed", - rootCauseStatus: "confirmed-from-dom-samples", - rootCauseConfidence: "high", - valuesRedacted: true - }); + if (Number(turnStateTriadSummary.invalidRowCount ?? 0) > 0) { + const drilldown = turnStateTriad.drilldown ?? buildWorkbenchTurnStateTriadDrilldown(turnStateTriadRows); + const rootCause = workbenchTriadRootCauseFromDrilldown(drilldown, turnStateTriadSummary); + findings.push({ + id: "workbench-turn-state-triad-inconsistent", + severity: "red", + summary: rootCause.summary, + count: turnStateTriadSummary.invalidRowCount, + fullTriadCount: turnStateTriadSummary.fullTriadRowCount, + invalidFullTriadCount: turnStateTriadSummary.invalidFullTriadCount, + cardFinalResponseMismatchCount: turnStateTriadSummary.cardFinalResponseMismatchCount, + legacyCollectorMissingCount: turnStateTriadSummary.collectorMissingRowCount, + collectorMissingFields: Array.isArray(turnStateTriadSummary.collectorMissingFields) ? turnStateTriadSummary.collectorMissingFields : [], + dominantMismatchKind: rootCause.dominantMismatchKind, + allowedTuples: [ + { railStatus: "completed", cardStatus: "completed", finalResponsePresent: true }, + { railStatus: "running", cardStatus: "running", finalResponsePresent: false } + ], + samples: turnStateTriadRows.slice(0, 20), + drilldown, + collectorMissingSamples: Array.isArray(turnStateTriad.collectorMissingRows) ? turnStateTriad.collectorMissingRows.slice(0, 10) : [], + sourceOfTruth: rootCause.sourceOfTruth + "; do not repair via DOM fallback or GET-side state mutation", + nextAction: rootCause.nextAction, + rootCause: rootCause.rootCause, + rootCauseStatus: rootCause.rootCauseStatus, + rootCauseConfidence: rootCause.rootCauseConfidence, + valuesRedacted: true + }); + } const promptFailures = Array.isArray(promptNetwork?.rounds) ? promptNetwork.rounds.filter((item) => item.chatPostOk === false && !promptCommandHasAuthoritativeSubmitSideEffect(control, item)) : []; if (promptFailures.length > 0) findings.push({ id: "prompt-chat-submit-failed", severity: "red", summary: "sendPrompt command had no successful /v1/agent/chat or /v1/agent/chat/steer POST response in the sampling window", count: promptFailures.length, rounds: promptFailures.slice(0, 10) }); const promptSteerRounds = Array.isArray(promptNetwork?.rounds) ? promptNetwork.rounds.filter((item) => item.steerUsed === true) : []; diff --git a/scripts/src/hwlab-node/web-observe-analyzer-triad.test.ts b/scripts/src/hwlab-node/web-observe-analyzer-triad.test.ts new file mode 100644 index 00000000..75157480 --- /dev/null +++ b/scripts/src/hwlab-node/web-observe-analyzer-triad.test.ts @@ -0,0 +1,158 @@ +import assert from "node:assert/strict"; +import { mkdtemp, readFile, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { spawnSync } from "node:child_process"; +import { test } from "bun:test"; + +import { nodeWebObserveAnalyzerSource } from "../hwlab-node-web-observe-analyzer-source"; + +const alertThresholds = { + sameOriginApiSlowMs: 60000, + partialApiSlowMs: 60000, + longLivedStreamOpenSlowMs: 60000, + visibleLoadingSlowMs: 60000, + turnTimingSampleSlackSeconds: 60, + turnElapsedSevereTimeoutSeconds: 3600, + domEvaluateTimeoutRedCount: 99, + domEvaluateTimeoutRedWindowMs: 60000, + screenshotTimeoutRedCount: 99, + pageErrorRedCount: 99, + browserProcessSampleIntervalMs: 1000, + browserTotalRssRedMb: 999999, + browserProcessRssRedMb: 999999, + browserRssGrowthRedMb: 999999, + browserRssGrowthWindowMs: 60000, + playwrightResponsivenessRedMs: 60000, + playwrightResponsivenessTimeoutRedCount: 99, + cdpMetricsTimeoutRedCount: 99, + uncommandedStateChangeCommandWindowMs: 1000, + scrollJumpCommandWindowMs: 1000, + scrollJumpFromY: 999999, + scrollJumpToY: 999999, + sessionRailFallbackRatio: 0.5, +}; + +const browserFreezePolicy = { + enabled: true, + blockerWindowMs: 60000, + memory: { + totalRssBlockerMb: 999999, + processRssBlockerMb: 999999, + growthBlockerMb: 999999, + }, + responsiveness: { + latencyBlockerMs: 60000, + eventBlockerCount: 99, + }, + cdp: { + metricsTimeoutBlockerCount: 99, + }, + kill: { + enabled: false, + gracefulSignal: "SIGTERM", + forceSignal: "SIGKILL", + graceMs: 1000, + pollIntervalMs: 100, + exitCode: 124, + }, +}; + +test("observe analyzer classifies stale completed session rail when a newer turn is running", async () => { + const stateDir = await mkdtemp(join(tmpdir(), "unidesk-web-observe-analyzer-")); + const analyzerPath = join(stateDir, "analyze.mjs"); + const samplesPath = join(stateDir, "samples.jsonl"); + await writeFile(analyzerPath, nodeWebObserveAnalyzerSource(), { mode: 0o700 }); + await writeFile(samplesPath, [ + JSON.stringify({ + seq: 1, + ts: "2026-07-01T15:21:24.767Z", + path: "/workbench/sessions/ses_triage", + url: "https://hwlab.example.test/workbench/sessions/ses_triage", + pageRole: "control", + pageId: "control-test", + routeSessionId: "ses_triage", + activeSessionId: "ses_triage", + sessionRail: { + items: [{ + index: 0, + active: true, + status: "completed", + dataStatus: "completed", + running: false, + dataRunning: "false", + sessionId: "ses_triage", + sessionIdPrefix: "ses_triage", + }], + }, + turns: [ + { + role: "agent", + status: "completed", + traceId: "trc_previous_completed", + messageId: "msg_previous_completed_agent", + finalResponsePresent: true, + finalResponseTextBytes: 12, + }, + { + role: "agent", + status: "running", + traceId: "trc_running_new_turn", + messageId: "msg_running_new_turn_agent", + finalResponsePresent: false, + finalResponseTextBytes: 0, + }, + ], + }), + JSON.stringify({ + seq: 2, + ts: "2026-07-01T15:22:49.918Z", + path: "/workbench/sessions/ses_triage", + url: "https://hwlab.example.test/workbench/sessions/ses_triage", + pageRole: "control", + pageId: "control-test", + routeSessionId: "ses_triage", + activeSessionId: "ses_triage", + sessionRail: { + items: [{ + index: 0, + active: true, + status: "canceled", + dataStatus: "canceled", + running: false, + dataRunning: "false", + sessionId: "ses_triage", + sessionIdPrefix: "ses_triage", + }], + }, + turns: [{ + role: "agent", + status: "canceled", + traceId: "trc_canceled_terminal", + messageId: "msg_canceled_terminal_agent", + finalResponsePresent: true, + finalResponseTextBytes: 17, + }], + }), + ].join("\n") + "\n"); + + const result = spawnSync("bun", [analyzerPath, stateDir], { + cwd: join(import.meta.dir, "../../.."), + env: { + ...process.env, + UNIDESK_WEB_OBSERVE_ANALYZE_TAIL_SAMPLES: "0", + UNIDESK_WEB_OBSERVE_ALERT_THRESHOLDS_JSON: JSON.stringify(alertThresholds), + UNIDESK_WEB_OBSERVE_BROWSER_FREEZE_POLICY_JSON: JSON.stringify(browserFreezePolicy), + }, + encoding: "utf8", + }); + assert.equal(result.status, 0, result.stderr || result.stdout); + + const report = JSON.parse(await readFile(join(stateDir, "analysis", "report.json"), "utf8")); + const finding = report.findings.find((item: Record) => item.id === "workbench-turn-state-triad-inconsistent"); + assert.equal(finding?.rootCause, "workbench_session_rail_status_stale_after_new_running_turn"); + assert.equal(finding?.dominantMismatchKind, "rail-card-status-mismatch"); + assert.match(String(finding?.summary), /previous completed terminal status/u); + assert.equal(report.sampleMetrics.workbenchTurnStateTriad.summary.invalidRowCount, 1); + assert.equal(report.sampleMetrics.workbenchTurnStateTriad.summary.cardFinalResponseMismatchCount, 0); +}, 20_000); diff --git a/scripts/src/platform-infra-observability/apply-status-scripts.ts b/scripts/src/platform-infra-observability/apply-status-scripts.ts index 70f54e49..62d19c00 100644 --- a/scripts/src/platform-infra-observability/apply-status-scripts.ts +++ b/scripts/src/platform-infra-observability/apply-status-scripts.ts @@ -89,6 +89,7 @@ export function compactDiagnoseCodeAgentResult(value: unknown): Record