fix: compact codex steer rejection disclosure

This commit is contained in:
Codex
2026-05-23 08:38:46 +00:00
parent 9c3d8809f1
commit 73f68021b4
5 changed files with 145 additions and 14 deletions
+54
View File
@@ -199,6 +199,24 @@ export function runCodeQueueCliSteerContract(): JsonRecord {
assertCondition(String(exhaustedDiagnostics.message || "").includes("The operation was aborted"), "diagnostics should include provider abort message", exhaustedDiagnostics);
assertCondition(nestedRecord(exhaustedDiagnostics, ["operatorGuidance"]).rawProxyEquivalentIsFallback === false, "raw proxy equivalent should be diagnostic, not fallback", exhaustedDiagnostics);
assertCondition(String(nestedRecord(exhausted, ["commands"]).rawProxy || "").includes("microservice proxy code-queue /api/tasks/direct_task/steer"), "failure should still expose raw proxy diagnostic command", exhausted);
assertCondition(nestedRecord(exhausted, ["steer"]).promptOmitted === true, "failed steer should omit prompt by default", exhausted);
assertCondition(!("request" in exhausted), "failed steer should omit request by default", exhausted);
assertCondition(!("upstreamBodyPreview" in exhaustedDiagnostics), "failed steer should omit upstream preview by default", exhaustedDiagnostics);
assertCondition(!String(JSON.stringify(exhausted)).includes("provider-gateway-http-fetch"), "failed steer default output should omit upstream body internals", exhausted);
assertCondition(String(nestedRecord(exhausted, ["commands"]).fullDetails || "").includes("--full"), "failed steer should suggest full disclosure command", exhausted);
assertCondition(String(nestedRecord(exhausted, ["commands"]).rawDetails || "").includes("--raw"), "failed steer should suggest raw disclosure command", exhausted);
const exhaustedFull = codexSteerTaskForTest("direct_task", ["final correction", "--retry-attempts", "1", "--retry-delay-ms", "0", "--full"], () => {
return { ok: false, status: 502, body: abortedTunnelBody };
}) as JsonRecord;
const exhaustedFullDiagnostics = nestedRecord(exhaustedFull, ["diagnostics"]);
assertCondition("request" in exhaustedFull, "--full failed steer should expose request metadata", exhaustedFull);
assertCondition("upstreamBodyPreview" in exhaustedFullDiagnostics, "--full failed steer should expose bounded upstream preview", exhaustedFullDiagnostics);
const exhaustedRaw = codexSteerTaskForTest("direct_task", ["final correction", "--retry-attempts", "1", "--retry-delay-ms", "0", "--raw"], () => {
return { ok: false, status: 502, body: abortedTunnelBody };
}) as JsonRecord;
assertCondition("rawFailure" in exhaustedRaw, "--raw failed steer should expose raw response", exhaustedRaw);
const terminalPrompt = `${"do not leak ".repeat(40)}tail-secret-marker`;
const terminalRejection = codexSteerTaskForTest("completed_task", [terminalPrompt], () => ({
@@ -232,6 +250,8 @@ export function runCodeQueueCliSteerContract(): JsonRecord {
assertCondition(String(terminalCommands.show || "").includes("codex task completed_task"), "terminal rejection should suggest show command", terminalCommands);
assertCondition(String(terminalCommands.read || "").includes("codex read completed_task"), "terminal rejection should suggest read command", terminalCommands);
assertCondition(String(terminalCommands.followUpSubmit || "").includes("codex submit --prompt-file <path> --reference-task-id completed_task"), "terminal rejection should suggest follow-up submit pattern", terminalCommands);
assertCondition(String(terminalCommands.fullDetails || "").includes("codex steer completed_task --prompt-file <path> --full"), "terminal rejection should suggest explicit full disclosure command", terminalCommands);
assertCondition(String(terminalCommands.rawDetails || "").includes("codex steer completed_task --prompt-file <path> --raw"), "terminal rejection should suggest explicit raw disclosure command", terminalCommands);
const terminalJson = JSON.stringify(terminalRejection);
assertCondition(!terminalJson.includes("tail-secret-marker"), "terminal rejection must not echo steer prompt", terminalRejection);
assertCondition(!terminalJson.includes("hidden task prompt"), "terminal rejection must not echo task prompt", terminalRejection);
@@ -239,6 +259,39 @@ export function runCodeQueueCliSteerContract(): JsonRecord {
assertCondition(!("request" in terminalRejection), "terminal rejection should omit request preview", terminalRejection);
assertCondition(!("diagnostics" in terminalRejection), "terminal rejection should omit bulky diagnostics", terminalRejection);
const terminalFull = codexSteerTaskForTest("completed_task", [terminalPrompt, "--full"], () => ({
ok: false,
status: 409,
body: {
ok: false,
error: "task does not have an active steerable turn",
task: {
id: "completed_task",
status: "succeeded",
terminalStatus: "completed",
prompt: `${"hidden task prompt ".repeat(60)}tail`,
output: [{ seq: 1, text: "noisy raw task output" }],
},
},
})) as JsonRecord;
const fullDiagnostics = nestedRecord(terminalFull, ["diagnostics"]);
assertCondition("upstreamBodyPreview" in fullDiagnostics, "--full should expose bounded upstream preview behind diagnostics", fullDiagnostics);
assertCondition(typeof fullDiagnostics.rawProxyEquivalent === "string", "--full should expose raw proxy equivalent", fullDiagnostics);
assertCondition(!("rawFailure" in terminalFull), "--full should not include raw upstream response", terminalFull);
const terminalRaw = codexSteerTaskForTest("completed_task", [terminalPrompt, "--raw"], () => ({
ok: false,
status: 409,
body: {
ok: false,
error: "task does not have an active steerable turn",
task: { id: "completed_task", status: "succeeded", terminalStatus: "completed" },
},
})) as JsonRecord;
const rawFailure = nestedRecord(terminalRaw, ["rawFailure"]);
const rawFailureTask = nestedRecord(rawFailure, ["body", "task"]);
assertCondition(rawFailure.status === 409 && rawFailureTask.status === "succeeded", "--raw should expose raw upstream failure only when requested", rawFailure);
return {
ok: true,
checks: [
@@ -256,6 +309,7 @@ export function runCodeQueueCliSteerContract(): JsonRecord {
"steer failure classification is JSON-consumable",
"retryable tunnel aborts are retried with bounded diagnostics",
"terminal steer rejection is compact and actionable",
"terminal steer rejection full/raw disclosure is explicit",
],
};
}