diff --git a/scripts/src/hwlab-node-web-observe-analyzer-source.ts b/scripts/src/hwlab-node-web-observe-analyzer-source.ts index 24eaf951..3599e8d7 100644 --- a/scripts/src/hwlab-node-web-observe-analyzer-source.ts +++ b/scripts/src/hwlab-node-web-observe-analyzer-source.ts @@ -1387,7 +1387,7 @@ function sessionInvariantNavigationWindows(control) { const started = new Map(); const windows = []; for (const row of control || []) { - if (row?.type !== "switchAwayAndBack") continue; + if (row?.type !== "switchAwayAndBack" && row?.type !== "refreshCurrentSession") continue; const commandId = stringOrNull(row.commandId) ?? String(row.seq ?? ""); if (row.phase === "started") { started.set(commandId, row); @@ -1435,6 +1435,35 @@ function sampleInControlledNavigationWindow(sample, windows) { return (windows || []).some((window) => ms >= window.startMs && ms <= window.endMs); } +function sampleRefInControlledNavigationSessionWindow(sample, windows) { + const ms = timestampMs(sample?.ts); + if (!Number.isFinite(ms)) return false; + const routeSessionId = stringOrNull(sample?.routeSessionId); + const activeSessionId = stringOrNull(sample?.activeSessionId); + return (windows || []).some((window) => { + if (ms < window.startMs || ms > window.endMs) return false; + const expected = [window.canarySessionId, window.alternateSessionId].filter(Boolean); + return expected.some((sessionId) => sessionId === routeSessionId || sessionId === activeSessionId); + }); +} + +function isBlankHydrationProjectionSample(sample) { + if (!sample) return false; + const messageCount = Array.isArray(sample.messages) ? sample.messages.length : Number(sample.messageCount ?? 0); + const traceRowCount = Array.isArray(sample.traceRows) ? sample.traceRows.length : Number(sample.traceRowCount ?? 0); + return !stringOrNull(sample.activeSessionId) + && Number(messageCount) === 0 + && Number(traceRowCount) === 0; +} + +function controlledNavigationHydrationCrossPageDiff(row, windows, sampleBySeq) { + if (row?.diffKind !== "projection") return false; + if (!sampleRefInControlledNavigationSessionWindow(row.control, windows) || !sampleRefInControlledNavigationSessionWindow(row.observer, windows)) return false; + const control = sampleBySeq.get(Number(row?.control?.seq)); + const observer = sampleBySeq.get(Number(row?.observer?.seq)); + return isBlankHydrationProjectionSample(control) || isBlankHydrationProjectionSample(observer); +} + function objectValue(value) { return value && typeof value === "object" && !Array.isArray(value) ? value : {}; } @@ -1602,11 +1631,15 @@ function buildFindings(samples, control, network, errors, sampleMetrics, promptN const crossPageProjectionDiffs = crossPageDiffs.filter((item) => item.diffKind !== "trace-visibility"); const crossPageTraceVisibilityDiffs = crossPageDiffs.filter((item) => item.diffKind === "trace-visibility"); const crossPageProjectionBudgetMs = alertThresholds.crossPageProjectionDivergenceRedMs; + const sampleBySeq = new Map(samples.map((item) => [Number(item?.seq), item]).filter(([seq]) => Number.isFinite(seq))); const timedCrossPageProjectionDiffs = annotateCrossPageDiffTiming(crossPageProjectionDiffs); - const persistentCrossPageProjectionDiffs = timedCrossPageProjectionDiffs.filter((item) => Number(item.observedSpanMs ?? 0) > crossPageProjectionBudgetMs); - const transientCrossPageProjectionDiffs = timedCrossPageProjectionDiffs.filter((item) => Number(item.observedSpanMs ?? 0) <= crossPageProjectionBudgetMs); + const controlledNavigationHydrationProjectionDiffs = timedCrossPageProjectionDiffs.filter((item) => controlledNavigationHydrationCrossPageDiff(item, controlledNavigationWindows, sampleBySeq)); + const evaluatedCrossPageProjectionDiffs = timedCrossPageProjectionDiffs.filter((item) => !controlledNavigationHydrationCrossPageDiff(item, controlledNavigationWindows, sampleBySeq)); + const persistentCrossPageProjectionDiffs = evaluatedCrossPageProjectionDiffs.filter((item) => Number(item.observedSpanMs ?? 0) > crossPageProjectionBudgetMs); + const transientCrossPageProjectionDiffs = evaluatedCrossPageProjectionDiffs.filter((item) => Number(item.observedSpanMs ?? 0) <= crossPageProjectionBudgetMs); if (persistentCrossPageProjectionDiffs.length > 0) findings.push({ id: "cross-page-projection-divergence", severity: "red", summary: "control and observer pages saw different projection state for the same sampled session beyond the configured budget", count: persistentCrossPageProjectionDiffs.length, budgetMs: crossPageProjectionBudgetMs, samples: persistentCrossPageProjectionDiffs.slice(0, 20) }); if (transientCrossPageProjectionDiffs.length > 0) findings.push({ id: "cross-page-projection-transient-divergence", severity: "info", summary: "control and observer pages briefly differed near a sampled transition; retained as transient evidence but not treated as persistent projection failure", count: transientCrossPageProjectionDiffs.length, budgetMs: crossPageProjectionBudgetMs, samples: transientCrossPageProjectionDiffs.slice(0, 20) }); + if (controlledNavigationHydrationProjectionDiffs.length > 0) findings.push({ id: "cross-page-projection-controlled-navigation-hydration", severity: "info", summary: "control and observer pages differed while a non-blocking session-invariance navigation command still had an unhydrated blank page; retained as context but not treated as a red projection blocker", count: controlledNavigationHydrationProjectionDiffs.length, budgetMs: crossPageProjectionBudgetMs, samples: controlledNavigationHydrationProjectionDiffs.slice(0, 20) }); if (crossPageTraceVisibilityDiffs.length > 0) findings.push({ id: "cross-page-trace-visibility-divergence", severity: "info", summary: "control and observer pages differed only in visible trace row count; this is local disclosure/hydration visibility, not session/message projection divergence", count: crossPageTraceVisibilityDiffs.length, samples: crossPageTraceVisibilityDiffs.slice(0, 20) }); const traceMessageDuplicates = detectTraceMessageDuplication(samples); if (traceMessageDuplicates.length > 0) findings.push({ id: "trace-assistant-message-duplicates-final-response", severity: "amber", summary: "trace-frame rendered duplicate visible assistant final rows; the fixed Final Response renderer summary block is excluded", count: traceMessageDuplicates.length, finalResponseSummaryBlockCounted: false, traceFrameSource: "traceRows-only", samples: traceMessageDuplicates.slice(0, 20) }); diff --git a/scripts/src/hwlab-node-web-sentinel-cicd.ts b/scripts/src/hwlab-node-web-sentinel-cicd.ts index 4e1dfb0c..b10a83a4 100644 --- a/scripts/src/hwlab-node-web-sentinel-cicd.ts +++ b/scripts/src/hwlab-node-web-sentinel-cicd.ts @@ -1816,9 +1816,10 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou }); } const sampleIntervalMs = numberAt(scenario, "sampleIntervalMs"); - const budgetSeconds = Math.min(timeoutSeconds, maxSeconds); - const elapsedWarnings = () => targetValidationElapsedWarnings(elapsedMs(), "quick verify confirm-wait", budgetSeconds); - const deadline = Date.now() + budgetSeconds * 1000; + const warningBudgetSeconds = maxSeconds; + const hardBudgetSeconds = Math.min(timeoutSeconds, Math.max(maxSeconds, numberAt(scenario, "maxRunSeconds"))); + const elapsedWarnings = () => targetValidationElapsedWarnings(elapsedMs(), "quick verify confirm-wait", warningBudgetSeconds); + const deadline = Date.now() + hardBudgetSeconds * 1000; const runId = `sentinel-run-${Date.now().toString(36)}-${randomUUID().slice(0, 8)}`; const steps: Record[] = []; const startArgs = [ @@ -1851,7 +1852,7 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou valuesRedacted: true, }); } - const startupReady = waitForQuickVerifyObserverStartup(state, observerId, deadline, sampleIntervalMs, budgetSeconds); + const startupReady = waitForQuickVerifyObserverStartup(state, observerId, deadline, sampleIntervalMs, warningBudgetSeconds); steps.push({ phase: "observe-wait-startup-ready", ok: startupReady.ok, result: startupReady }); if (startupReady.ok !== true) { return recordQuickVerify(state, finalizeQuickVerifyFailure(state, { @@ -1883,7 +1884,7 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou steps, failure: "quick-verify-timeout-over-budget", elapsedMs: elapsedMs(), - warnings: mergeWarnings(`quick verify exceeded the configured ${budgetSeconds}s targetValidation budget; investigate Code Agent multi-round continuity before retrying.`, elapsedWarnings()), + warnings: mergeWarnings(`quick verify exceeded the hard ${hardBudgetSeconds}s execution budget after the configured ${warningBudgetSeconds}s targetValidation warning budget.`, elapsedWarnings()), promptSource: prompts.summary, })); } @@ -1920,7 +1921,7 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou })); } if (type === "sendPrompt") { - const waitResult = waitForQuickVerifyPromptTurn(state, observerId, promptIndex, deadline, sampleIntervalMs, budgetSeconds); + const waitResult = waitForQuickVerifyPromptTurn(state, observerId, promptIndex, deadline, sampleIntervalMs, warningBudgetSeconds); steps.push({ phase: "observe-wait-turn-terminal", ok: waitResult.ok, promptIndex, result: waitResult }); if (waitResult.ok !== true) { return recordQuickVerify(state, finalizeQuickVerifyFailure(state, {