Merge pull request #1006 from pikasTech/fix/sentinel-quick-verify-tail
fix: 放宽哨兵 quick verify 阻塞口径
This commit is contained in:
@@ -45,5 +45,5 @@ sentinel:
|
||||
stopCommand: sentinel maintenance stop
|
||||
targetValidation:
|
||||
scenarioId: workbench-dsflash-go-tool-call-10x
|
||||
maxSeconds: 120
|
||||
maxSeconds: 300
|
||||
serviceUnavailablePolicy: structured-failure
|
||||
|
||||
@@ -1306,17 +1306,17 @@ function buildFindings(samples, control, network, errors, sampleMetrics, promptN
|
||||
const finalFlicker = detectFinalFlicker(samples);
|
||||
if (finalFlicker.length > 0) findings.push({ id: "final-response-flicker", severity: "red", summary: "message text digest disappeared or switched to diagnostic-like text after non-empty final text", count: finalFlicker.length, samples: finalFlicker.slice(0, 20) });
|
||||
const terminalZeroElapsed = detectTerminalZeroElapsed(samples);
|
||||
if (terminalZeroElapsed.length > 0) findings.push({ id: "turn-terminal-zero-elapsed", severity: "red", summary: "terminal Code Agent card displayed 耗时 0 秒; terminal duration must come from durable timing projection, not a missing/zero display fallback", count: terminalZeroElapsed.length, samples: terminalZeroElapsed.slice(0, 20) });
|
||||
if (terminalZeroElapsed.length > 0) findings.push({ id: "turn-terminal-zero-elapsed", severity: "amber", summary: "terminal Code Agent card displayed 耗时 0 秒; terminal duration issue is a non-blocking timing alert", count: terminalZeroElapsed.length, samples: terminalZeroElapsed.slice(0, 20) });
|
||||
const cardTiming = sampleMetrics?.codeAgentCardTiming || {};
|
||||
const cardTimingSummary = cardTiming.summary || {};
|
||||
if (Number(cardTimingSummary.missingElapsedCount ?? 0) > 0) findings.push({ id: "code-agent-card-elapsed-missing", severity: "red", summary: "visible Code Agent card did not display total elapsed time; elapsed must be visible for running and terminal cards", count: cardTimingSummary.missingElapsedCount, samples: (cardTiming.missingElapsed || []).slice(0, 20) });
|
||||
if (Number(cardTimingSummary.missingRecentUpdateCount ?? 0) > 0) findings.push({ id: "code-agent-card-running-recent-update-missing", severity: "red", summary: "non-terminal Code Agent card did not display 最近更新", count: cardTimingSummary.missingRecentUpdateCount, samples: (cardTiming.missingRecentUpdate || []).slice(0, 20) });
|
||||
if (Number(cardTimingSummary.missingElapsedCount ?? 0) > 0) findings.push({ id: "code-agent-card-elapsed-missing", severity: "amber", summary: "visible Code Agent card did not display total elapsed time; elapsed visibility is a non-blocking timing alert", count: cardTimingSummary.missingElapsedCount, samples: (cardTiming.missingElapsed || []).slice(0, 20) });
|
||||
if (Number(cardTimingSummary.missingRecentUpdateCount ?? 0) > 0) findings.push({ id: "code-agent-card-running-recent-update-missing", severity: "amber", summary: "non-terminal Code Agent card did not display 最近更新; recent-update visibility is a non-blocking timing alert", count: cardTimingSummary.missingRecentUpdateCount, samples: (cardTiming.missingRecentUpdate || []).slice(0, 20) });
|
||||
const roundCompletion = cardTiming.roundCompletion || {};
|
||||
if (Number(cardTimingSummary.durationUnderreportedCount ?? 0) > 0) {
|
||||
findings.push({
|
||||
id: "code-agent-card-duration-underreported",
|
||||
severity: "red",
|
||||
summary: "completed Code Agent card total elapsed is shorter than trace/final-response duration evidence",
|
||||
severity: "amber",
|
||||
summary: "completed Code Agent card total elapsed is shorter than trace/final-response duration evidence; timing mismatch is a non-blocking alert",
|
||||
count: cardTimingSummary.durationUnderreportedCount,
|
||||
samples: (cardTiming.durationUnderreported || []).slice(0, 20),
|
||||
});
|
||||
@@ -1324,8 +1324,8 @@ function buildFindings(samples, control, network, errors, sampleMetrics, promptN
|
||||
if (Number(cardTimingSummary.durationMismatchCount ?? 0) > 0) {
|
||||
findings.push({
|
||||
id: "code-agent-card-duration-mismatch",
|
||||
severity: "red",
|
||||
summary: "completed Code Agent card total elapsed does not match sealed completion/final-response timing evidence",
|
||||
severity: "amber",
|
||||
summary: "completed Code Agent card total elapsed does not match sealed completion/final-response timing evidence; timing mismatch is a non-blocking alert",
|
||||
count: cardTimingSummary.durationMismatchCount,
|
||||
samples: (cardTiming.durationMismatches || []).slice(0, 20),
|
||||
});
|
||||
@@ -1350,9 +1350,9 @@ function buildFindings(samples, control, network, errors, sampleMetrics, promptN
|
||||
samples: (traceOrder.completionNotLast || []).slice(0, 20),
|
||||
});
|
||||
}
|
||||
if (Number(cardTimingSummary.roundCompletionElapsedMismatchCount ?? 0) > 0) findings.push({ id: "round-completion-elapsed-mismatch", severity: "red", summary: "Trace row 轮次完成(总耗时 ...) does not match the visible Code Agent card total elapsed time within YAML timing slack", count: cardTimingSummary.roundCompletionElapsedMismatchCount, toleranceSeconds: cardTimingSummary.elapsedMismatchToleranceSeconds, samples: (roundCompletion.elapsedMismatches || []).slice(0, 20) });
|
||||
if (Number(cardTimingSummary.roundCompletionElapsedMismatchCount ?? 0) > 0) findings.push({ id: "round-completion-elapsed-mismatch", severity: "amber", summary: "Trace row 轮次完成(总耗时 ...) does not match the visible Code Agent card total elapsed time within YAML timing slack; timing mismatch is a non-blocking alert", count: cardTimingSummary.roundCompletionElapsedMismatchCount, toleranceSeconds: cardTimingSummary.elapsedMismatchToleranceSeconds, samples: (roundCompletion.elapsedMismatches || []).slice(0, 20) });
|
||||
if (Number(cardTimingSummary.roundCompletionFinalResponseMissingCount ?? 0) > 0) findings.push({ id: "round-completion-final-response-missing", severity: "red", summary: "Trace row showed 轮次完成, but no final response was visible in the Code Agent card afterward", count: cardTimingSummary.roundCompletionFinalResponseMissingCount, samples: (roundCompletion.finalResponseMissing || []).slice(0, 20) });
|
||||
if (Number(cardTimingSummary.roundCompletionPostTimingChangeCount ?? 0) > 0) findings.push({ id: "round-completion-post-timing-change", severity: "red", summary: "After 轮次完成, card total elapsed or 最近更新 continued changing; terminal timing should be sealed", count: cardTimingSummary.roundCompletionPostTimingChangeCount, samples: (roundCompletion.postCompletionTimingChanges || []).slice(0, 20) });
|
||||
if (Number(cardTimingSummary.roundCompletionPostTimingChangeCount ?? 0) > 0) findings.push({ id: "round-completion-post-timing-change", severity: "amber", summary: "After 轮次完成, card total elapsed or 最近更新 continued changing; terminal timing alert is non-blocking", count: cardTimingSummary.roundCompletionPostTimingChangeCount, samples: (roundCompletion.postCompletionTimingChanges || []).slice(0, 20) });
|
||||
if (Number(cardTimingSummary.roundCompletionPostRecentUpdateVisibleCount ?? 0) > 0) findings.push({ id: "round-completion-recent-update-still-visible", severity: "info", summary: "最近更新 was still visible after 轮次完成; inspect whether terminal cards should hide activity age or keep it sealed", count: cardTimingSummary.roundCompletionPostRecentUpdateVisibleCount, samples: (roundCompletion.postCompletionRecentUpdateVisible || []).slice(0, 20) });
|
||||
const scrollJumps = [];
|
||||
for (let i = 1; i < samples.length; i += 1) {
|
||||
@@ -1369,15 +1369,15 @@ function buildFindings(samples, control, network, errors, sampleMetrics, promptN
|
||||
const promptSteerRounds = Array.isArray(promptNetwork?.rounds) ? promptNetwork.rounds.filter((item) => item.steerUsed === true) : [];
|
||||
if (promptSteerRounds.length > 0) findings.push({ id: "prompt-routed-to-steer", severity: "amber", summary: "sendPrompt was submitted through /v1/agent/chat/steer; verify the previous turn was truly in-flight and not an unsealed terminal failure", count: promptSteerRounds.length, rounds: promptSteerRounds.slice(0, 10) });
|
||||
const elapsedZeroResets = Array.isArray(sampleMetrics?.turnTimingElapsedZeroResets) ? sampleMetrics.turnTimingElapsedZeroResets : [];
|
||||
if (elapsedZeroResets.length > 0) findings.push({ id: "turn-timing-total-elapsed-zero-reset", severity: "red", summary: "Code Agent total elapsed jumped from a non-zero value back to 0 seconds", count: elapsedZeroResets.length, samples: elapsedZeroResets.slice(0, 20) });
|
||||
if (elapsedZeroResets.length > 0) findings.push({ id: "turn-timing-total-elapsed-zero-reset", severity: "amber", summary: "Code Agent total elapsed jumped from a non-zero value back to 0 seconds; timing reset is a non-blocking alert", count: elapsedZeroResets.length, samples: elapsedZeroResets.slice(0, 20) });
|
||||
const elapsedDecreases = Array.isArray(sampleMetrics?.turnTimingNonMonotonic)
|
||||
? sampleMetrics.turnTimingNonMonotonic.filter((item) => item.metric === "totalElapsedSeconds" && item.anomaly !== "zero-reset")
|
||||
: [];
|
||||
if (elapsedDecreases.length > 0) findings.push({ id: "turn-timing-total-elapsed-decrease", severity: "red", summary: "Code Agent total elapsed decreased between adjacent samples; total elapsed must be monotonic per turn", count: elapsedDecreases.length, samples: elapsedDecreases.slice(0, 20) });
|
||||
if (elapsedDecreases.length > 0) findings.push({ id: "turn-timing-total-elapsed-decrease", severity: "amber", summary: "Code Agent total elapsed decreased between adjacent samples; timing decrease is a non-blocking alert", count: elapsedDecreases.length, samples: elapsedDecreases.slice(0, 20) });
|
||||
const elapsedForwardJumps = Array.isArray(sampleMetrics?.turnTimingTotalElapsedForwardJumps) ? sampleMetrics.turnTimingTotalElapsedForwardJumps : [];
|
||||
if (elapsedForwardJumps.length > 0) findings.push({ id: "turn-timing-total-elapsed-forward-jump", severity: "red", summary: "Code Agent total elapsed jumped forward faster than browser sample interval", count: elapsedForwardJumps.length, samples: elapsedForwardJumps.slice(0, 20) });
|
||||
if (elapsedForwardJumps.length > 0) findings.push({ id: "turn-timing-total-elapsed-forward-jump", severity: "amber", summary: "Code Agent total elapsed jumped forward faster than browser sample interval; timing jump is a non-blocking alert", count: elapsedForwardJumps.length, samples: elapsedForwardJumps.slice(0, 20) });
|
||||
const terminalElapsedGrowth = Array.isArray(sampleMetrics?.turnTimingTerminalElapsedGrowth) ? sampleMetrics.turnTimingTerminalElapsedGrowth : [];
|
||||
if (terminalElapsedGrowth.length > 0) findings.push({ id: "turn-timing-terminal-elapsed-growth", severity: "red", summary: "terminal Code Agent card total elapsed changed after terminal status; completed/failed/canceled timing must be sealed", count: terminalElapsedGrowth.length, samples: terminalElapsedGrowth.slice(0, 20) });
|
||||
if (terminalElapsedGrowth.length > 0) findings.push({ id: "turn-timing-terminal-elapsed-growth", severity: "amber", summary: "terminal Code Agent card total elapsed changed after terminal status; terminal timing alert is non-blocking", count: terminalElapsedGrowth.length, samples: terminalElapsedGrowth.slice(0, 20) });
|
||||
const recentUpdateSawtoothJumps = Array.isArray(sampleMetrics?.turnTimingRecentUpdateSawtoothJumps)
|
||||
? sampleMetrics.turnTimingRecentUpdateSawtoothJumps
|
||||
: Array.isArray(sampleMetrics?.turnTimingNonMonotonic)
|
||||
@@ -1925,7 +1925,7 @@ function buildPromptNetworkReport(control, network) {
|
||||
const failures = events.filter((event) => event.type === "requestfailed");
|
||||
const responseStatuses = responses.map((event) => event.status).filter((status) => status !== null);
|
||||
const submitModes = Array.from(new Set(events.map((event) => event.submitMode).filter(Boolean))).sort();
|
||||
const chatPostOk = responseStatuses.some((status) => status >= 200 && status < 300) && failures.length === 0;
|
||||
const chatPostOk = responseStatuses.some((status) => status >= 200 && status < 300);
|
||||
const failureKind = chatPostOk
|
||||
? null
|
||||
: failures.length > 0
|
||||
|
||||
@@ -1356,6 +1356,13 @@ function sentinelElapsedWarnings(value: unknown, subject = "sentinel confirmed o
|
||||
return [`${subject} exceeded 120s (${Math.round(elapsedMs / 1000)}s); treat this as a severe timeout and investigate env-reuse/git mirror/source build path plus the current wait stage before retrying.`];
|
||||
}
|
||||
|
||||
function targetValidationElapsedWarnings(value: unknown, subject: string, budgetSeconds: number): string[] {
|
||||
const elapsedMs = typeof value === "number" && Number.isFinite(value) ? value : null;
|
||||
const budgetMs = Math.max(1, Math.trunc(budgetSeconds)) * 1000;
|
||||
if (elapsedMs === null || elapsedMs <= budgetMs) return [];
|
||||
return [`${subject} exceeded configured ${Math.round(budgetMs / 1000)}s targetValidation budget (${Math.round(elapsedMs / 1000)}s); investigate Code Agent multi-round continuity before retrying.`];
|
||||
}
|
||||
|
||||
function mergeWarnings(...items: readonly (readonly unknown[] | unknown)[]): string[] {
|
||||
const warnings: string[] = [];
|
||||
for (const item of items) {
|
||||
@@ -1525,7 +1532,7 @@ function runSentinelValidate(state: SentinelCicdState, options: Extract<WebProbe
|
||||
const publicExposure = probeSentinelPublicExposure(state, options.timeoutSeconds);
|
||||
const publicDashboard = probeSentinelPublicDashboard(state, options.timeoutSeconds);
|
||||
if (quickVerify !== null) {
|
||||
quickVerify = withWarnings(quickVerify, sentinelElapsedWarnings(Date.now() - startedAt, "sentinel validate quick verify confirm-wait"));
|
||||
quickVerify = withWarnings(quickVerify, targetValidationElapsedWarnings(Date.now() - startedAt, "sentinel validate quick verify confirm-wait", Math.min(options.timeoutSeconds, numberAt(state.cicd, "targetValidation.maxSeconds"))));
|
||||
}
|
||||
const ok = health.ok
|
||||
&& record(health.bodyJson).ok === true
|
||||
@@ -1593,7 +1600,6 @@ function renderAsyncP5Job(state: SentinelCicdState, subcommand: readonly string[
|
||||
function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeoutSeconds: number): Record<string, unknown> {
|
||||
const startedAt = Date.now();
|
||||
const elapsedMs = () => Date.now() - startedAt;
|
||||
const elapsedWarnings = () => sentinelElapsedWarnings(elapsedMs(), "quick verify confirm-wait");
|
||||
const scenarioId = stringAt(state.cicd, "targetValidation.scenarioId");
|
||||
const maxSeconds = numberAt(state.cicd, "targetValidation.maxSeconds");
|
||||
const scenario = findScenario(state, scenarioId);
|
||||
@@ -1601,7 +1607,9 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou
|
||||
const prompts = readPromptSetForScenario(scenario);
|
||||
if (!prompts.ok) return { ok: false, status: "blocked", reason: "prompt-source-unavailable", promptSource: prompts, valuesRedacted: true };
|
||||
const sampleIntervalMs = numberAt(scenario, "sampleIntervalMs");
|
||||
const deadline = Date.now() + Math.min(timeoutSeconds, maxSeconds) * 1000;
|
||||
const budgetSeconds = Math.min(timeoutSeconds, maxSeconds);
|
||||
const elapsedWarnings = () => targetValidationElapsedWarnings(elapsedMs(), "quick verify confirm-wait", budgetSeconds);
|
||||
const deadline = Date.now() + budgetSeconds * 1000;
|
||||
const runId = `sentinel-run-${Date.now().toString(36)}-${randomUUID().slice(0, 8)}`;
|
||||
const steps: Record<string, unknown>[] = [];
|
||||
const startArgs = [
|
||||
@@ -1647,9 +1655,9 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou
|
||||
observerId,
|
||||
promptIndex,
|
||||
steps,
|
||||
failure: "quick-verify-timeout-over-120s",
|
||||
failure: "quick-verify-timeout-over-budget",
|
||||
elapsedMs: elapsedMs(),
|
||||
warnings: mergeWarnings("quick verify exceeded the configured 120s targetValidation budget; investigate env-reuse/git mirror/source build path before retrying.", elapsedWarnings()),
|
||||
warnings: mergeWarnings(`quick verify exceeded the configured ${budgetSeconds}s targetValidation budget; investigate Code Agent multi-round continuity before retrying.`, elapsedWarnings()),
|
||||
promptSource: prompts.summary,
|
||||
}));
|
||||
}
|
||||
@@ -1676,7 +1684,7 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou
|
||||
}));
|
||||
}
|
||||
if (type === "sendPrompt") {
|
||||
const waitResult = waitForQuickVerifyPromptTurn(state, observerId, promptIndex, deadline, sampleIntervalMs);
|
||||
const waitResult = waitForQuickVerifyPromptTurn(state, observerId, promptIndex, deadline, sampleIntervalMs, budgetSeconds);
|
||||
steps.push({ phase: "observe-wait-turn-terminal", ok: waitResult.ok, promptIndex, result: waitResult });
|
||||
if (waitResult.ok !== true) {
|
||||
return recordQuickVerify(state, finalizeQuickVerifyFailure(state, {
|
||||
@@ -2152,7 +2160,7 @@ function readAnalysisSummaryFromWorkspace(state: SentinelCicdState, stateDir: st
|
||||
"walk(stateDir);",
|
||||
"const findings=arr(report?.findings ?? report?.archiveSummary?.redFindings).slice(0,20).map((item)=>{const v=rec(item); return {id:clip(v.id??v.kind??v.code,80),kind:clip(v.kind??v.id??v.code,80),code:clip(v.code??v.kind??v.id,80),severity:clip(v.severity??v.level,32),level:clip(v.level??v.severity,32),count:Number(v.count??v.sampleCount??1),summary:clip(v.summary??v.message,220),message:clip(v.message??v.summary,220)};});",
|
||||
"const slow=arr(report?.pagePerformanceSlowApi ?? report?.archivePagePerformanceSlowApi).slice(0,8).map((item)=>{const v=rec(item); return {path:clip(v.path??v.route,120),sampleCount:v.sampleCount??null,p95Ms:v.p95Ms??null,maxMs:v.maxMs??null,overFiveSecondCount:v.overFiveSecondCount??null};});",
|
||||
"console.log(JSON.stringify({ok:!!report&&report.ok!==false,stateDir,reportJsonPath:reportPath,reportJsonSha256:sha(jsonBuf),reportMdPath,reportMdSha256:sha(read(reportMdPath)),findingCount:Number(report?.findingCount??findings.length),artifactCount,screenshot,findings,counts:rec(report?.counts),analysisWindow:rec(report?.analysisWindow??report?.windows?.recent?.summary),pagePerformanceSlowApi:slow,valuesRedacted:true}));",
|
||||
"console.log(JSON.stringify({ok:!!report,reportOk:!!report&&report.ok!==false,stateDir,reportJsonPath:reportPath,reportJsonSha256:sha(jsonBuf),reportMdPath,reportMdSha256:sha(read(reportMdPath)),findingCount:Number(report?.findingCount??findings.length),artifactCount,screenshot,findings,counts:rec(report?.counts),analysisWindow:rec(report?.analysisWindow??report?.windows?.recent?.summary),pagePerformanceSlowApi:slow,valuesRedacted:true}));",
|
||||
"NODE",
|
||||
].join("\n");
|
||||
const result = runCommand(["trans", `${state.spec.nodeId}:${state.spec.workspace}`, "sh"], repoRoot, { input: script, timeoutMs: Math.min(timeoutSeconds, 60) * 1000 });
|
||||
@@ -2186,7 +2194,7 @@ function runChildCli(args: string[], timeoutSeconds: number, input?: string): Ch
|
||||
};
|
||||
}
|
||||
|
||||
function waitForQuickVerifyPromptTurn(state: SentinelCicdState, observerId: string, promptIndex: number, deadline: number, pollIntervalMs: number): Record<string, unknown> {
|
||||
function waitForQuickVerifyPromptTurn(state: SentinelCicdState, observerId: string, promptIndex: number, deadline: number, pollIntervalMs: number, budgetSeconds: number): Record<string, unknown> {
|
||||
const observations: Record<string, unknown>[] = [];
|
||||
const indexEntry = readLocalObserveIndex(observerId);
|
||||
if (indexEntry === null) {
|
||||
@@ -2198,7 +2206,7 @@ function waitForQuickVerifyPromptTurn(state: SentinelCicdState, observerId: stri
|
||||
valuesRedacted: true,
|
||||
};
|
||||
}
|
||||
const pollSleepMs = Math.max(250, Math.min(5000, Math.trunc(pollIntervalMs)));
|
||||
const pollSleepMs = Math.max(250, Math.min(500, Math.trunc(pollIntervalMs / 2) || 250));
|
||||
while (Date.now() < deadline) {
|
||||
const waitMs = Math.max(1000, Math.min(55_000, deadline - Date.now()));
|
||||
const script = quickVerifyPromptWaitScript(indexEntry.stateDir, promptIndex, waitMs, pollSleepMs);
|
||||
@@ -2235,10 +2243,10 @@ function waitForQuickVerifyPromptTurn(state: SentinelCicdState, observerId: stri
|
||||
}
|
||||
return {
|
||||
ok: false,
|
||||
failure: "quick-verify-timeout-over-120s",
|
||||
failure: "quick-verify-timeout-over-budget",
|
||||
round: promptIndex,
|
||||
observations: observations.slice(-6),
|
||||
warnings: ["quick verify exceeded the configured 120s targetValidation budget while waiting for a submitted turn to become terminal; investigate env-reuse/git mirror/source build path before retrying."],
|
||||
warnings: [`quick verify exceeded the configured ${budgetSeconds}s targetValidation budget while waiting for a submitted turn to become terminal; investigate Code Agent multi-round continuity before retrying.`],
|
||||
valuesRedacted: true,
|
||||
};
|
||||
}
|
||||
@@ -2267,6 +2275,26 @@ function quickVerifyPromptWaitScript(stateDir: string, promptIndex: number, time
|
||||
"const tsMs = (value) => { const ms = Date.parse(String(value || '')); return Number.isFinite(ms) ? ms : null; };",
|
||||
"const readJson = (rel) => { try { return JSON.parse(fs.readFileSync(path.join(dir, rel), 'utf8')); } catch { return null; } };",
|
||||
"const readJsonl = (rel) => { try { return fs.readFileSync(path.join(dir, rel), 'utf8').split(/\\r?\\n/u).filter(Boolean).map((line) => { try { return JSON.parse(line); } catch { return null; } }).filter(Boolean); } catch { return []; } };",
|
||||
"const readJsonlTail = (rel, maxBytes = 2000000) => {",
|
||||
" try {",
|
||||
" const file = path.join(dir, rel);",
|
||||
" const stat = fs.statSync(file);",
|
||||
" const start = Math.max(0, stat.size - maxBytes);",
|
||||
" const length = stat.size - start;",
|
||||
" const fd = fs.openSync(file, 'r');",
|
||||
" try {",
|
||||
" const buffer = Buffer.alloc(length);",
|
||||
" fs.readSync(fd, buffer, 0, length, start);",
|
||||
" const lines = buffer.toString('utf8').split(/\\r?\\n/u);",
|
||||
" if (start > 0) lines.shift();",
|
||||
" return lines.filter(Boolean).map((line) => { try { return JSON.parse(line); } catch { return null; } }).filter(Boolean);",
|
||||
" } finally {",
|
||||
" fs.closeSync(fd);",
|
||||
" }",
|
||||
" } catch {",
|
||||
" return [];",
|
||||
" }",
|
||||
"};",
|
||||
"const readDone = (id) => id ? readJson(path.join('commands', 'done', `${id}.json`)) : null;",
|
||||
"function sessionIdFromUrl(value) { const match = String(value || '').match(/\\/workbench\\/sessions\\/(ses_[A-Za-z0-9_-]+)/u); return match ? match[1] : null; }",
|
||||
"function commandSessionId(item) { const done = readDone(item?.commandId); return item?.sessionId || item?.detail?.sessionId || item?.input?.sessionId || item?.result?.sessionId || done?.result?.sessionId || done?.result?.observer?.sessionId || sessionIdFromUrl(item?.afterUrl) || sessionIdFromUrl(item?.detail?.afterUrl) || sessionIdFromUrl(done?.result?.afterUrl) || null; }",
|
||||
@@ -2300,7 +2328,7 @@ function quickVerifyPromptWaitScript(stateDir: string, promptIndex: number, time
|
||||
"function finalResponseEmpty(items, traceId) { if (!/^(completed|failed|canceled)$/u.test(statusFor(items, traceId))) return true; const entries = (traceId ? traceEntries(items, traceId) : items.flatMap((sample) => entryGroups(sample).map((entry) => ({ ...entry, sample, text: textOf(entry.item) })))).slice().reverse(); for (const entry of entries) { const role = String(entry.item?.role || entry.item?.dataRole || entry.item?.messageRole || '').toLowerCase(); if (entry.group === 'message' && role && !/assistant|agent|system/u.test(role)) continue; const text = cleanFinalResponseText(entry.item?.finalResponse?.text || entry.item?.finalResponse?.preview || entry.text); if (text && !/^Code Agent\\s*耗时/iu.test(text)) return false; } return true; }",
|
||||
"function rowFor() {",
|
||||
" const control = readJsonl('control.jsonl');",
|
||||
" const samples = readJsonl('samples.jsonl');",
|
||||
" const samples = readJsonlTail('samples.jsonl');",
|
||||
" const prompts = promptCommands(control);",
|
||||
" const prompt = prompts[promptIndex - 1] || null;",
|
||||
" if (!prompt) return { ok: true, round: promptIndex, status: null, traceId: null, finalResponseEmpty: true, lastSeq: null, lastTs: null, valuesRedacted: true };",
|
||||
@@ -2529,7 +2557,19 @@ function mergeFindingRecords(primary: readonly Record<string, unknown>[], extra:
|
||||
|
||||
function isQuickVerifyBlockingFinding(item: Record<string, unknown>): boolean {
|
||||
const severity = (stringAtNullable(item, "severity") ?? stringAtNullable(item, "level") ?? "").toLowerCase();
|
||||
return ["critical", "red", "fatal", "error", "failed", "blocked"].includes(severity);
|
||||
if (!["critical", "red", "fatal", "error", "failed", "blocked"].includes(severity)) return false;
|
||||
const id = (stringAtNullable(item, "id") ?? stringAtNullable(item, "kind") ?? stringAtNullable(item, "code") ?? "").toLowerCase();
|
||||
return [
|
||||
"quick-verify-no-business-turn",
|
||||
"observer-command-failed",
|
||||
"prompt-chat-submit-failed",
|
||||
"route-active-session-mismatch",
|
||||
"final-response-flicker",
|
||||
"round-completion-final-response-missing",
|
||||
"turn-trace-id-missing",
|
||||
"no-samples",
|
||||
"jsonl-read-issues",
|
||||
].includes(id);
|
||||
}
|
||||
|
||||
function quickVerifyControlFindings(failure: string | null, promptIndex: number, turnSummary: Record<string, unknown> | null, traceFrame: Record<string, unknown> | null): Record<string, unknown>[] {
|
||||
|
||||
Reference in New Issue
Block a user