From ea1b3e9a4aa1ae963488a3e6a52ab08a9e09bab0 Mon Sep 17 00:00:00 2001 From: Codex Date: Sun, 28 Jun 2026 01:34:11 +0000 Subject: [PATCH] fix: trust completed quick verify turn artifacts --- scripts/src/hwlab-node-web-sentinel-cicd.ts | 43 ++++++++++++++++++--- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/scripts/src/hwlab-node-web-sentinel-cicd.ts b/scripts/src/hwlab-node-web-sentinel-cicd.ts index 74e79d46..0cd1e548 100644 --- a/scripts/src/hwlab-node-web-sentinel-cicd.ts +++ b/scripts/src/hwlab-node-web-sentinel-cicd.ts @@ -2893,23 +2893,30 @@ function finalizeQuickVerifyFailure(state: SentinelCicdState, input: { : readAnalysisSummaryFromWorkspace(state, indexEntry.stateDir, 30); const turnSummary = collectObserveView(state, input.observerId, "turn-summary", null, 30); const traceFrame = collectObserveView(state, input.observerId, "trace-frame", input.promptIndex > 0 ? input.promptIndex : null, 30); + const durableBusinessTurn = quickVerifyHasDurableBusinessTurn(input.promptIndex, turnSummary, traceFrame); const controlFindings = quickVerifyControlFindings(input.failure, input.promptIndex, turnSummary, traceFrame); const artifactSummaryRecord = record(artifactSummary); const artifactFindings = Array.isArray(artifactSummaryRecord.findings) ? artifactSummaryRecord.findings.map(record) : []; const findings = mergeFindingRecords(artifactFindings, controlFindings); + const blockingFindings = findings.filter(isQuickVerifyBlockingFinding); + const recoveredWaitFailure = durableBusinessTurn + && isRecoverableQuickVerifyWaitFailure(input.failure) + && record(artifactSummary).ok === true + && controlFindings.length === 0 + && blockingFindings.length === 0; return { - ok: false, + ok: recoveredWaitFailure, runId: input.runId, scenarioId: input.scenarioId, reason: input.reason, - status: "blocked", + status: recoveredWaitFailure ? "analyzed" : "blocked", observerId: input.observerId, elapsedMs: input.elapsedMs ?? null, stateDir: indexEntry?.stateDir ?? null, reportJsonSha256: stringAtNullable(artifactSummary, "reportJsonSha256"), findingCount: findings.length, artifactCount: numberAtNullable(artifactSummary, "artifactCount") ?? 0, - failure: input.failure, + failure: recoveredWaitFailure ? null : input.failure, promptSource: input.promptSource, steps: [...input.steps, ...cleanupSteps], analysis: artifactSummary, @@ -2924,6 +2931,7 @@ function finalizeQuickVerifyFailure(state: SentinelCicdState, input: { publicOrigin: stringAt(state.publicExposure, "publicBaseUrl"), warnings: mergeWarnings( Array.isArray(input.warnings) ? input.warnings : [], + recoveredWaitFailure ? ["quick verify wait command timed out, but collected turn-summary/trace-frame artifacts show a durable completed business turn; treating the wait timeout as a non-blocking tool finding."] : [], targetValidationElapsedWarnings(input.elapsedMs ?? null, "quick verify confirm-wait", numberAt(state.cicd, "targetValidation.maxSeconds")), ), valuesRedacted: true, @@ -4049,8 +4057,9 @@ function quickVerifyPromptWaitScript(stateDir: string, promptIndex: number, time " if (!done || !promptTraceId) return { ok: true, round: promptIndex, status: 'command-pending', commandId: prompt.commandId || null, traceId: promptTraceId || null, finalResponseEmpty: true, commandPhase: prompt.phase || null, traceMissing: !promptTraceId, valuesRedacted: true };", " const segment = segmentFor(samples, prompts, promptIndex - 1);", " const controlSegment = segment.filter((sample) => sample.pageRole === 'control');", - " const statusSegment = controlSegment.length > 0 ? controlSegment : segment;", - " const traceId = promptTraceId || chooseTraceId(statusSegment, prompt) || chooseTraceId(segment, prompt);", + " const traceId = promptTraceId || chooseTraceId(controlSegment, prompt) || chooseTraceId(segment, prompt);", + " const controlTraceSegment = traceId ? controlSegment.filter((sample) => traceIdsFromSamples([sample]).includes(traceId)) : [];", + " const statusSegment = controlTraceSegment.length > 0 ? controlSegment : segment;", " const status = statusFor(statusSegment, traceId);", " const sampleForTrace = traceId ? statusSegment.filter((sample) => traceIdsFromSamples([sample]).includes(traceId)).slice(-1)[0] || segment.filter((sample) => traceIdsFromSamples([sample]).includes(traceId)).slice(-1)[0] || null : null;", " const lastSample = sampleForTrace || segment.slice(-1)[0] || null;", @@ -4307,6 +4316,7 @@ function observerCommandFailureBlocks(item: Record): boolean { } function quickVerifyControlFindings(failure: string | null, promptIndex: number, turnSummary: Record | null, traceFrame: Record | null): Record[] { + if (quickVerifyHasDurableBusinessTurn(promptIndex, turnSummary, traceFrame)) return []; const rendered = [ typeof turnSummary?.renderedText === "string" ? turnSummary.renderedText : "", typeof traceFrame?.renderedText === "string" ? traceFrame.renderedText : "", @@ -4341,6 +4351,29 @@ function quickVerifyControlFindings(failure: string | null, promptIndex: number, }]; } +function quickVerifyHasDurableBusinessTurn(promptIndex: number, turnSummary: Record | null, traceFrame: Record | null): boolean { + const rows = Array.isArray(record(turnSummary?.collect).rows) ? record(turnSummary?.collect).rows.map(record) : []; + const scopedRows = promptIndex > 0 ? rows.filter((row) => numberAtNullable(row, "round") === promptIndex) : rows; + if (scopedRows.some((row) => { + const finalResponse = record(row.finalResponse); + return isQuickVerifyTurnSuccessful(stringAtNullable(row, "status")) + && stringAtNullable(row, "traceId") !== null + && finalResponse.empty !== true; + })) return true; + const renderedTrace = typeof traceFrame?.renderedText === "string" ? traceFrame.renderedText : ""; + if (!renderedTrace) return false; + if (/Final Response\s*\n\s*\(空内容\)/iu.test(renderedTrace)) return false; + return /Code Agent[^\n]*completed|轮次完成(总耗时/iu.test(renderedTrace) + && /Final Response\s*\n\s*\S/iu.test(renderedTrace) + && !/无\s*trace\s*rows|no\s+trace\s+rows|traceId=-|routeSession=-|activeSession=-/iu.test(renderedTrace); +} + +function isRecoverableQuickVerifyWaitFailure(failure: string): boolean { + return failure === "quick-verify-wait-chunk-timeout" + || failure === "quick-verify-timeout-over-budget" + || failure === "observe-turn-terminal-wait-failed"; +} + function compactCommandWithTail(result: CommandResult): CompactCommandResult & { stdoutTail: string; stderrTail: string } { return { ...compactCommand(result),