fix: trust completed quick verify turn artifacts

This commit is contained in:
Codex
2026-06-28 01:34:11 +00:00
parent 1ce1b560e6
commit ea1b3e9a4a
+38 -5
View File
@@ -2893,23 +2893,30 @@ function finalizeQuickVerifyFailure(state: SentinelCicdState, input: {
: readAnalysisSummaryFromWorkspace(state, indexEntry.stateDir, 30);
const turnSummary = collectObserveView(state, input.observerId, "turn-summary", null, 30);
const traceFrame = collectObserveView(state, input.observerId, "trace-frame", input.promptIndex > 0 ? input.promptIndex : null, 30);
const durableBusinessTurn = quickVerifyHasDurableBusinessTurn(input.promptIndex, turnSummary, traceFrame);
const controlFindings = quickVerifyControlFindings(input.failure, input.promptIndex, turnSummary, traceFrame);
const artifactSummaryRecord = record(artifactSummary);
const artifactFindings = Array.isArray(artifactSummaryRecord.findings) ? artifactSummaryRecord.findings.map(record) : [];
const findings = mergeFindingRecords(artifactFindings, controlFindings);
const blockingFindings = findings.filter(isQuickVerifyBlockingFinding);
const recoveredWaitFailure = durableBusinessTurn
&& isRecoverableQuickVerifyWaitFailure(input.failure)
&& record(artifactSummary).ok === true
&& controlFindings.length === 0
&& blockingFindings.length === 0;
return {
ok: false,
ok: recoveredWaitFailure,
runId: input.runId,
scenarioId: input.scenarioId,
reason: input.reason,
status: "blocked",
status: recoveredWaitFailure ? "analyzed" : "blocked",
observerId: input.observerId,
elapsedMs: input.elapsedMs ?? null,
stateDir: indexEntry?.stateDir ?? null,
reportJsonSha256: stringAtNullable(artifactSummary, "reportJsonSha256"),
findingCount: findings.length,
artifactCount: numberAtNullable(artifactSummary, "artifactCount") ?? 0,
failure: input.failure,
failure: recoveredWaitFailure ? null : input.failure,
promptSource: input.promptSource,
steps: [...input.steps, ...cleanupSteps],
analysis: artifactSummary,
@@ -2924,6 +2931,7 @@ function finalizeQuickVerifyFailure(state: SentinelCicdState, input: {
publicOrigin: stringAt(state.publicExposure, "publicBaseUrl"),
warnings: mergeWarnings(
Array.isArray(input.warnings) ? input.warnings : [],
recoveredWaitFailure ? ["quick verify wait command timed out, but collected turn-summary/trace-frame artifacts show a durable completed business turn; treating the wait timeout as a non-blocking tool finding."] : [],
targetValidationElapsedWarnings(input.elapsedMs ?? null, "quick verify confirm-wait", numberAt(state.cicd, "targetValidation.maxSeconds")),
),
valuesRedacted: true,
@@ -4049,8 +4057,9 @@ function quickVerifyPromptWaitScript(stateDir: string, promptIndex: number, time
" if (!done || !promptTraceId) return { ok: true, round: promptIndex, status: 'command-pending', commandId: prompt.commandId || null, traceId: promptTraceId || null, finalResponseEmpty: true, commandPhase: prompt.phase || null, traceMissing: !promptTraceId, valuesRedacted: true };",
" const segment = segmentFor(samples, prompts, promptIndex - 1);",
" const controlSegment = segment.filter((sample) => sample.pageRole === 'control');",
" const statusSegment = controlSegment.length > 0 ? controlSegment : segment;",
" const traceId = promptTraceId || chooseTraceId(statusSegment, prompt) || chooseTraceId(segment, prompt);",
" const traceId = promptTraceId || chooseTraceId(controlSegment, prompt) || chooseTraceId(segment, prompt);",
" const controlTraceSegment = traceId ? controlSegment.filter((sample) => traceIdsFromSamples([sample]).includes(traceId)) : [];",
" const statusSegment = controlTraceSegment.length > 0 ? controlSegment : segment;",
" const status = statusFor(statusSegment, traceId);",
" const sampleForTrace = traceId ? statusSegment.filter((sample) => traceIdsFromSamples([sample]).includes(traceId)).slice(-1)[0] || segment.filter((sample) => traceIdsFromSamples([sample]).includes(traceId)).slice(-1)[0] || null : null;",
" const lastSample = sampleForTrace || segment.slice(-1)[0] || null;",
@@ -4307,6 +4316,7 @@ function observerCommandFailureBlocks(item: Record<string, unknown>): boolean {
}
function quickVerifyControlFindings(failure: string | null, promptIndex: number, turnSummary: Record<string, unknown> | null, traceFrame: Record<string, unknown> | null): Record<string, unknown>[] {
if (quickVerifyHasDurableBusinessTurn(promptIndex, turnSummary, traceFrame)) return [];
const rendered = [
typeof turnSummary?.renderedText === "string" ? turnSummary.renderedText : "",
typeof traceFrame?.renderedText === "string" ? traceFrame.renderedText : "",
@@ -4341,6 +4351,29 @@ function quickVerifyControlFindings(failure: string | null, promptIndex: number,
}];
}
function quickVerifyHasDurableBusinessTurn(promptIndex: number, turnSummary: Record<string, unknown> | null, traceFrame: Record<string, unknown> | null): boolean {
const rows = Array.isArray(record(turnSummary?.collect).rows) ? record(turnSummary?.collect).rows.map(record) : [];
const scopedRows = promptIndex > 0 ? rows.filter((row) => numberAtNullable(row, "round") === promptIndex) : rows;
if (scopedRows.some((row) => {
const finalResponse = record(row.finalResponse);
return isQuickVerifyTurnSuccessful(stringAtNullable(row, "status"))
&& stringAtNullable(row, "traceId") !== null
&& finalResponse.empty !== true;
})) return true;
const renderedTrace = typeof traceFrame?.renderedText === "string" ? traceFrame.renderedText : "";
if (!renderedTrace) return false;
if (/Final Response\s*\n\s*\(\)/iu.test(renderedTrace)) return false;
return /Code Agent[^\n]*completed|/iu.test(renderedTrace)
&& /Final Response\s*\n\s*\S/iu.test(renderedTrace)
&& !/\s*trace\s*rows|no\s+trace\s+rows|traceId=-|routeSession=-|activeSession=-/iu.test(renderedTrace);
}
function isRecoverableQuickVerifyWaitFailure(failure: string): boolean {
return failure === "quick-verify-wait-chunk-timeout"
|| failure === "quick-verify-timeout-over-budget"
|| failure === "observe-turn-terminal-wait-failed";
}
function compactCommandWithTail(result: CommandResult): CompactCommandResult & { stdoutTail: string; stderrTail: string } {
return {
...compactCommand(result),