fix: make sentinel quick verify wait for turns (#920)

Co-authored-by: Codex <codex@noreply.local>
This commit is contained in:
Lyon
2026-06-26 01:48:51 +08:00
committed by GitHub
parent edfddd2445
commit 8080c810b9
3 changed files with 261 additions and 35 deletions
+198 -29
View File
@@ -152,6 +152,12 @@ interface CompactCommandResult {
readonly stderrPreview: string;
}
interface ChildCliResult {
readonly ok: boolean;
readonly parsed: Record<string, unknown> | null;
readonly result: CompactCommandResult & { stdoutTail: string; stderrTail: string };
}
const SPEC_REF = "PJ2026-01060508 Web哨兵 draft-2026-06-25-p0-web-probe-sentinel";
export function runWebProbeSentinelCommand(spec: HwlabRuntimeLaneSpec, options: WebProbeSentinelOptions): RenderedCliResult {
@@ -1272,7 +1278,8 @@ function runSentinelReport(state: SentinelCicdState, options: Extract<WebProbeSe
const report = callSentinelService(state, "GET", `/api/report?${query.toString()}`, null, options.timeoutSeconds);
const body = record(report.bodyJson);
const renderedText = typeof body.renderedText === "string" ? body.renderedText : renderReportResult({ command, node: state.spec.nodeId, lane: state.spec.lane, report, valuesRedacted: true });
return rendered(report.ok && body.ok !== false, command, options.raw ? JSON.stringify(body, null, 2) : renderedText);
const rawPayload = Object.keys(body).length > 0 ? body : report;
return rendered(report.ok && body.ok !== false, command, options.raw ? JSON.stringify(rawPayload, null, 2) : renderedText);
}
function renderAsyncP5Job(state: SentinelCicdState, subcommand: readonly string[], timeoutSeconds: number, releaseId: string | null, reason: string | null, quickVerify: boolean): RenderedCliResult {
@@ -1339,7 +1346,17 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou
const repeat = Math.max(1, typeof item.repeat === "number" && Number.isFinite(item.repeat) ? Math.trunc(item.repeat) : 1);
for (let index = 0; index < repeat; index += 1) {
if (Date.now() >= deadline) {
return recordQuickVerify(state, quickVerifyTimeoutPayload(state, runId, scenarioId, reason, observerId, steps));
return recordQuickVerify(state, finalizeQuickVerifyFailure(state, {
runId,
scenarioId,
reason,
observerId,
promptIndex,
steps,
failure: "quick-verify-timeout-over-120s",
warnings: ["quick verify exceeded the configured 120s targetValidation budget; investigate env-reuse/git mirror/source build path before retrying."],
promptSource: prompts.summary,
}));
}
const args = ["hwlab", "nodes", "web-probe", "observe", "command", observerId, "--node", state.spec.nodeId, "--lane", state.spec.lane, "--type", type, "--wait-ms", "55000", "--command-timeout-seconds", String(remainingSeconds(deadline, 55))];
if (type === "selectProvider") args.push("--provider", stringAt(item, "provider"));
@@ -1350,18 +1367,33 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou
const commandResult = runChildCli(args, remainingSeconds(deadline, 60));
steps.push({ phase: `observe-command-${type}`, ok: commandResult.ok, promptIndex: type === "sendPrompt" ? promptIndex : null, result: commandResult.result });
if (!commandResult.ok) {
return recordQuickVerify(state, {
ok: false,
return recordQuickVerify(state, finalizeQuickVerifyFailure(state, {
runId,
scenarioId,
reason,
status: "blocked",
observerId,
promptIndex,
steps,
failure: `observe-command-${type}-failed`,
promptSource: prompts.summary,
valuesRedacted: true,
});
}));
}
if (type === "sendPrompt") {
const waitResult = waitForQuickVerifyPromptTurn(state, observerId, promptIndex, deadline);
steps.push({ phase: "observe-wait-turn-terminal", ok: waitResult.ok, promptIndex, result: waitResult });
if (waitResult.ok !== true) {
return recordQuickVerify(state, finalizeQuickVerifyFailure(state, {
runId,
scenarioId,
reason,
observerId,
promptIndex,
steps,
failure: text(waitResult.failure ?? "observe-turn-terminal-wait-failed"),
promptSource: prompts.summary,
warnings: Array.isArray(waitResult.warnings) ? waitResult.warnings : [],
}));
}
}
}
}
@@ -1398,17 +1430,63 @@ function runSentinelQuickVerify(state: SentinelCicdState, reason: string, timeou
});
}
function quickVerifyTimeoutPayload(state: SentinelCicdState, runId: string, scenarioId: string, reason: string, observerId: string, steps: readonly Record<string, unknown>[]): Record<string, unknown> {
function finalizeQuickVerifyFailure(state: SentinelCicdState, input: {
readonly runId: string;
readonly scenarioId: string;
readonly reason: string;
readonly observerId: string;
readonly promptIndex: number;
readonly steps: readonly Record<string, unknown>[];
readonly failure: string;
readonly promptSource?: Record<string, unknown>;
readonly warnings?: readonly unknown[];
}): Record<string, unknown> {
const cleanupSteps: Record<string, unknown>[] = [];
const stop = runChildCli([
"hwlab", "nodes", "web-probe", "observe", "stop", input.observerId,
"--node", state.spec.nodeId,
"--lane", state.spec.lane,
"--force",
"--command-timeout-seconds", "55",
], 30);
cleanupSteps.push({ phase: "observe-stop-after-failure", ok: stop.ok, result: stop.result });
const analysis = runChildCli([
"hwlab", "nodes", "web-probe", "observe", "analyze", input.observerId,
"--node", state.spec.nodeId,
"--lane", state.spec.lane,
"--command-timeout-seconds", "55",
], 60);
cleanupSteps.push({ phase: "observe-analyze-after-failure", ok: analysis.ok, result: analysis.result });
const indexEntry = readLocalObserveIndex(input.observerId);
const artifactSummary = indexEntry === null
? { ok: false, reason: "observe-index-entry-missing", observerId: input.observerId, valuesRedacted: true }
: readAnalysisSummaryFromWorkspace(state, indexEntry.stateDir, 30);
const turnSummary = collectObserveView(state, input.observerId, "turn-summary", null, 30);
const traceFrame = collectObserveView(state, input.observerId, "trace-frame", input.promptIndex > 0 ? input.promptIndex : null, 30);
return {
ok: false,
runId,
scenarioId,
reason,
runId: input.runId,
scenarioId: input.scenarioId,
reason: input.reason,
status: "blocked",
observerId,
steps,
failure: "quick-verify-timeout-over-120s",
warnings: ["quick verify exceeded the configured 120s targetValidation budget; investigate env-reuse/git mirror/source build path before retrying."],
observerId: input.observerId,
stateDir: indexEntry?.stateDir ?? null,
reportJsonSha256: stringAtNullable(artifactSummary, "reportJsonSha256"),
findingCount: numberAtNullable(artifactSummary, "findingCount") ?? 0,
artifactCount: numberAtNullable(artifactSummary, "artifactCount") ?? 0,
failure: input.failure,
promptSource: input.promptSource,
steps: [...input.steps, ...cleanupSteps],
analysis: artifactSummary,
views: {
summary: { renderedText: renderQuickVerifySummary({ runId: input.runId, scenarioId: input.scenarioId, observerId: input.observerId, artifactSummary, steps: input.steps, publicOrigin: stringAt(state.publicExposure, "publicBaseUrl") }) },
"turn-summary": { renderedText: typeof turnSummary.renderedText === "string" ? turnSummary.renderedText : null, ok: turnSummary.ok },
"trace-frame": { renderedText: typeof traceFrame.renderedText === "string" ? traceFrame.renderedText : null, ok: traceFrame.ok },
},
findings: Array.isArray(record(artifactSummary).findings) ? record(artifactSummary).findings : [],
screenshot: record(artifactSummary).screenshot,
publicOrigin: stringAt(state.publicExposure, "publicBaseUrl"),
warnings: Array.isArray(input.warnings) ? input.warnings.map(text) : [],
valuesRedacted: true,
};
}
@@ -1455,7 +1533,10 @@ function callSentinelService(state: SentinelCicdState, method: "GET" | "POST", p
"const init={method,headers:{}};",
"if(method!=='GET'&&method!=='HEAD'){init.headers['content-type']='application/json';init.body=body;}",
"let out;",
"try{const res=await fetch(url,init);const text=await res.text();let bodyJson=null;try{bodyJson=JSON.parse(text)}catch{};out={ok:res.ok,httpStatus:res.status,contentType:res.headers.get('content-type'),bodyJson,bodyTextPreview:text.slice(0,12000),bodyBytes:Buffer.byteLength(text),valuesRedacted:true};}",
"function rec(v){return v&&typeof v==='object'&&!Array.isArray(v)?v:{}}",
"function pick(o,keys){const r={};for(const k of keys){if(o&&Object.prototype.hasOwnProperty.call(o,k))r[k]=o[k];}return r}",
"function compactBodyJson(v){const o=rec(v);if(typeof o.renderedText!=='string')return v;return {...pick(o,['ok','view','error','availableViews','valuesRedacted']),run:pick(rec(o.run),['id','scenario_id','status','node','lane','observer_id','state_dir','report_json_sha256','finding_count','artifact_count','maintenance','created_at','updated_at']),summary:pick(rec(o.summary),['reason','status','valuesRedacted']),findings:Array.isArray(o.findings)?o.findings.slice(0,12):[],renderedText:o.renderedText,valuesRedacted:true}}",
"try{const res=await fetch(url,init);const text=await res.text();let bodyJson=null;try{bodyJson=JSON.parse(text)}catch{};out={ok:res.ok,httpStatus:res.status,contentType:res.headers.get('content-type'),bodyJson:compactBodyJson(bodyJson),bodyTextPreview:bodyJson===null?text.slice(0,4000):'',bodyBytes:Buffer.byteLength(text),valuesRedacted:true};}",
"catch(error){out={ok:false,error:error instanceof Error?error.message:String(error),valuesRedacted:true};}",
"console.log(JSON.stringify(out));",
].join("");
@@ -1701,20 +1782,114 @@ function readAnalysisSummaryFromWorkspace(state: SentinelCicdState, stateDir: st
}
function collectObserveView(state: SentinelCicdState, observerId: string, view: "turn-summary" | "trace-frame", turn: number | null, timeoutSeconds: number): Record<string, unknown> {
const args = ["hwlab", "nodes", "web-probe", "observe", "collect", observerId, "--node", state.spec.nodeId, "--lane", state.spec.lane, "--view", view, "--command-timeout-seconds", String(Math.max(5, Math.min(timeoutSeconds, 55)))];
const args = ["hwlab", "nodes", "web-probe", "observe", "collect", observerId, "--node", state.spec.nodeId, "--lane", state.spec.lane, "--view", view, "--command-timeout-seconds", String(Math.max(5, Math.min(timeoutSeconds, 55))), "--raw", "--compact-raw"];
if (turn !== null) args.push("--turn", String(turn));
const result = runChildCli(args, timeoutSeconds);
return { ok: result.ok, view, renderedText: String(record(result.result).stdoutTail ?? record(result.result).stdoutPreview ?? ""), result: result.result, valuesRedacted: true };
const payload = cliDataPayload(result.parsed);
const collect = record(payload.collect);
return {
ok: result.ok && result.parsed !== null && payload.ok !== false && collect.ok !== false,
view,
renderedText: typeof collect.renderedText === "string" ? collect.renderedText : typeof payload.renderedText === "string" ? payload.renderedText : String(record(result.result).stdoutTail ?? record(result.result).stdoutPreview ?? ""),
collect,
payload,
result: result.result,
valuesRedacted: true,
};
}
function runChildCli(args: string[], timeoutSeconds: number, input?: string): { ok: boolean; result: Record<string, unknown> } {
const result = runCommand(["bun", "scripts/cli.ts", ...args], repoRoot, { input, timeoutMs: Math.max(5, timeoutSeconds) * 1000 });
function runChildCli(args: string[], timeoutSeconds: number, input?: string): ChildCliResult {
const result = runCommand(["bun", "scripts/cli.ts", ...args], repoRoot, { input, timeoutMs: Math.max(5, Math.min(timeoutSeconds, 120)) * 1000 });
return {
ok: result.exitCode === 0 && !result.timedOut,
parsed: parseJsonObject(result.stdout),
result: compactCommandWithTail(result),
};
}
function waitForQuickVerifyPromptTurn(state: SentinelCicdState, observerId: string, promptIndex: number, deadline: number): Record<string, unknown> {
const observations: Record<string, unknown>[] = [];
while (Date.now() < deadline) {
const view = collectObserveView(state, observerId, "turn-summary", null, remainingSeconds(deadline, 20));
const rows = Array.isArray(record(view.collect).rows) ? record(view.collect).rows.map(record) : [];
const row = rows.find((item) => Number(item.round) === promptIndex) ?? null;
const status = typeof row?.status === "string" ? row.status : null;
const finalResponse = record(row?.finalResponse);
observations.push({
ok: view.ok,
round: promptIndex,
status,
traceId: row?.traceId ?? null,
finalResponseEmpty: finalResponse.empty === true,
lastSeq: row?.lastSeq ?? null,
lastTs: row?.lastTs ?? null,
valuesRedacted: true,
});
if (isQuickVerifyTurnSuccessful(status)) {
return {
ok: true,
round: promptIndex,
status,
traceId: row?.traceId ?? null,
finalResponseEmpty: finalResponse.empty === true,
observations: observations.slice(-6),
valuesRedacted: true,
};
}
if (isQuickVerifyTurnTerminal(status)) {
return {
ok: false,
failure: "observe-turn-terminal-non-success",
round: promptIndex,
status,
traceId: row?.traceId ?? null,
finalResponseEmpty: finalResponse.empty === true,
observations: observations.slice(-6),
valuesRedacted: true,
};
}
const sleepSeconds = Math.min(5, Math.max(1, Math.floor((deadline - Date.now()) / 1000)));
if (sleepSeconds <= 0) break;
runCommand(["sleep", String(sleepSeconds)], repoRoot, { timeoutMs: (sleepSeconds + 1) * 1000 });
}
return {
ok: false,
failure: "quick-verify-timeout-over-120s",
round: promptIndex,
observations: observations.slice(-6),
warnings: ["quick verify exceeded the configured 120s targetValidation budget while waiting for a submitted turn to become terminal; investigate env-reuse/git mirror/source build path before retrying."],
valuesRedacted: true,
};
}
function isQuickVerifyTurnSuccessful(value: string | null): boolean {
const status = normalizeQuickVerifyStatus(value);
return status === "completed" || status === "succeeded" || status === "success";
}
function isQuickVerifyTurnTerminal(value: string | null): boolean {
const status = normalizeQuickVerifyStatus(value);
return status === "completed"
|| status === "succeeded"
|| status === "success"
|| status === "failed"
|| status === "error"
|| status === "blocked"
|| status === "timeout"
|| status === "canceled"
|| status === "cancelled"
|| status === "terminal";
}
function normalizeQuickVerifyStatus(value: string | null): string {
return String(value ?? "").trim().toLowerCase().replace(/_/gu, "-");
}
function cliDataPayload(parsed: Record<string, unknown> | null): Record<string, unknown> {
const root = record(parsed);
return isRecord(root.data) ? root.data : root;
}
function findScenario(state: SentinelCicdState, scenarioId: string): Record<string, unknown> | null {
const sentinel = state.spec.observability.webProbe?.sentinel;
if (sentinel === undefined) return null;
@@ -1933,6 +2108,7 @@ function renderValidateResult(result: Record<string, unknown>): string {
const quickVerify = record(result.quickVerify);
const blocker = record(result.blocker);
const next = record(result.next);
const warnings = Array.isArray(quickVerify.warnings) ? quickVerify.warnings : [];
return [
String(result.command),
"",
@@ -1946,6 +2122,8 @@ function renderValidateResult(result: Record<string, unknown>): string {
["quick-verify", Object.keys(quickVerify).length === 0 ? "skipped" : quickVerify.ok, `${quickVerify.runId ?? "-"} ${short(quickVerify.reportJsonSha256)}`],
]),
"",
warnings.length === 0 ? "WARNINGS\n-" : ["WARNINGS", ...warnings.map((item) => `- ${text(item)}`)].join("\n"),
"",
Object.keys(blocker).length === 0 ? "BLOCKER\n-" : table(["CODE", "BLOCKERS"], [[blocker.code, Array.isArray(blocker.blockers) ? blocker.blockers.join(",") : blocker.reason]]),
"",
"NEXT",
@@ -1979,15 +2157,6 @@ function sentinelPipelineRunName(state: SentinelCicdState): string {
return `hwlab-web-probe-sentinel-${commit.slice(0, 12)}`;
}
function runChildCli(args: string[], timeoutSeconds: number): Record<string, unknown> {
const result = runCommand(["bun", "scripts/cli.ts", ...args], repoRoot, { timeoutMs: Math.min(timeoutSeconds, 120) * 1000 });
return {
ok: result.exitCode === 0,
parsed: parseJsonObject(result.stdout),
result: compactCommand(result),
};
}
function renderImageResult(result: Record<string, unknown>): string {
const source = record(result.source);
const image = record(result.image);
+2
View File
@@ -154,6 +154,8 @@ export interface NodeWebProbeObserveOptions {
analyzeArchivePrefix: string | null;
analyzeTailSamples: number | null;
full: boolean;
raw: boolean;
compactRaw: boolean;
stateDir: string | null;
jobId: string | null;
force: boolean;
+61 -6
View File
@@ -185,6 +185,7 @@ export function parseNodeWebProbeObserveOptions(
"--sample-seq",
"--timestamp",
"--turn",
"--compact-raw",
"--archive-prefix",
"--tail-samples",
"--state-dir",
@@ -208,7 +209,7 @@ export function parseNodeWebProbeObserveOptions(
"--workspace-root",
"--workspace-root-ref",
"--root",
]), new Set(["--force", "--full", "--text-stdin"]));
]), new Set(["--force", "--full", "--raw", "--text-stdin"]));
const commandTypeRaw = optionValue(args, "--type") ?? null;
const commandType = commandTypeRaw === null ? null : parseNodeWebProbeObserveCommandType(commandTypeRaw);
const stateDir = optionValue(args, "--state-dir") ?? indexed?.stateDir ?? null;
@@ -306,6 +307,8 @@ export function parseNodeWebProbeObserveOptions(
analyzeArchivePrefix,
analyzeTailSamples,
full: args.includes("--full"),
raw: args.includes("--raw"),
compactRaw: args.includes("--compact-raw"),
stateDir,
jobId,
force: args.includes("--force"),
@@ -1160,7 +1163,8 @@ export function runNodeWebProbeObserveCollect(options: NodeWebProbeObserveOption
].join("\n");
const result = runTransWorkspaceStdinScript(options.node, spec.workspace, script, options.commandTimeoutSeconds);
const collect = parseJsonObject(result.stdout);
return withWebObserveCollectRendered({
const compactRaw = options.raw && options.compactRaw;
const payload = {
ok: result.exitCode === 0 && collect !== null && collect.ok !== false,
status: result.exitCode === 0 && collect !== null ? "collected" : "blocked",
command: webObserveCommandLabel("collect", options),
@@ -1172,11 +1176,62 @@ export function runNodeWebProbeObserveCollect(options: NodeWebProbeObserveOption
requestedFile: options.collectFile,
requestedGrep: options.collectGrep,
degradedReason: collect === null ? "collect-json-parse-failed" : null,
collect,
wrapper: buildWebObserveWrapperForObserveOptions("collect", options, spec.workspace),
result: collect === null ? compactCommandResultWithStdoutTail(result) : compactCommandResult(result),
collect: compactRaw ? compactObserveCollectForRaw(collect) : collect,
wrapper: compactRaw
? { mode: "wrapper-only", action: "collect", node: options.node, lane: options.lane, id: webObserveIdFromOptions(options), stateDir: options.stateDir, valuesRedacted: true }
: buildWebObserveWrapperForObserveOptions("collect", options, spec.workspace),
result: compactRaw ? { exitCode: result.exitCode, timedOut: result.timedOut, stdoutBytes: Buffer.byteLength(result.stdout), stderrBytes: Buffer.byteLength(result.stderr) } : collect === null ? compactCommandResultWithStdoutTail(result) : compactCommandResult(result),
valuesRedacted: true,
});
};
return options.raw ? payload : withWebObserveCollectRendered(payload);
}
function compactObserveCollectForRaw(collect: Record<string, unknown> | null): Record<string, unknown> | null {
if (collect === null) return null;
const rows = Array.isArray(collect.rows) ? collect.rows.map((item) => {
const row = observeRecord(item);
const finalResponse = observeRecord(row.finalResponse);
return {
round: row.round ?? null,
commandId: row.commandId ?? null,
userHash: row.userHash ?? null,
userBytes: row.userBytes ?? null,
traceId: row.traceId ?? null,
status: row.status ?? null,
elapsedSeconds: row.elapsedSeconds ?? null,
recentUpdateSeconds: row.recentUpdateSeconds ?? null,
marks: row.marks ?? null,
firstSeq: row.firstSeq ?? null,
lastSeq: row.lastSeq ?? null,
lastTs: row.lastTs ?? null,
finalResponse: {
preview: finalResponse.preview ?? null,
textHash: finalResponse.textHash ?? null,
textBytes: finalResponse.textBytes ?? null,
empty: finalResponse.empty === true,
},
valuesRedacted: true,
};
}) : undefined;
return {
ok: collect.ok !== false,
command: collect.command,
view: collect.view,
stateDir: collect.stateDir,
turnCount: collect.turnCount,
...(rows === undefined ? {} : { rows }),
renderedText: typeof collect.renderedText === "string" ? collect.renderedText : undefined,
sourceFiles: Array.isArray(collect.sourceFiles) ? collect.sourceFiles : undefined,
blocker: collect.blocker,
sampleSeq: collect.sampleSeq,
traceId: collect.traceId,
finalResponse: collect.finalResponse,
valuesRedacted: true,
};
}
function observeRecord(value: unknown): Record<string, unknown> {
return typeof value === "object" && value !== null && !Array.isArray(value) ? value as Record<string, unknown> : {};
}
export function runNodeWebProbeObserveAnalyze(options: NodeWebProbeObserveOptions, spec: HwlabRuntimeLaneSpec): Record<string, unknown> | RenderedCliResult {