fix(cli): retry codex steer tunnel aborts

This commit is contained in:
unidesk-code-queue-runner
2026-05-23 06:28:13 +00:00
parent d0c762da06
commit 6c02cb1655
7 changed files with 196 additions and 23 deletions
+53 -7
View File
@@ -147,13 +147,58 @@ export function runCodeQueueCliSteerContract(): JsonRecord {
assertCondition(!successJson.includes("send this"), "successful steer must not echo prompt text", success);
assertCondition(!successJson.includes("promptPreview"), "successful steer must not include promptPreview", success);
assertReason(codexSteerTaskForTest("direct_task", ["p"], () => ({ ok: false, exitCode: 1, stderrTail: "Cannot connect to the Docker daemon" })), "backend-core-unreachable", null);
assertReason(codexSteerTaskForTest("direct_task", ["p"], () => ({ ok: false, status: 404, body: { ok: false, error: "microservice not found: code-queue" } })), "code-queue-microservice-unregistered", 404);
assertReason(codexSteerTaskForTest("direct_task", ["p"], () => ({ ok: false, status: 401, body: { ok: false, error: "unauthorized" } })), "proxy-unauthorized", 401);
assertReason(codexSteerTaskForTest("direct_task", ["p"], () => ({ ok: false, status: 404, body: { ok: false, error: "proxy route not found", path: "/api/microservices/code-queue/proxy/api/tasks/direct_task/steer" } })), "proxy-404", 404);
assertReason(codexSteerTaskForTest("direct_task", ["p"], () => ({ ok: false, status: 404, body: { ok: false, error: "task not found" } })), "steer-endpoint-404", 404);
assertReason(codexSteerTaskForTest("direct_task", ["p"], () => ({ ok: false, status: 409, body: { ok: false, error: "task does not have an active steerable turn" } })), "upstream-runtime-rejected", 409);
assertReason(codexSteerTaskForTest("direct_task", ["p"], () => ({ ok: false, status: 504, body: { ok: false, error: "provider HTTP tunnel timed out or disconnected", stage: "http-tunnel-wait" } })), "stable-proxy-failed", 504);
assertReason(codexSteerTaskForTest("direct_task", ["p", "--no-retry"], () => ({ ok: false, exitCode: 1, stderrTail: "Cannot connect to the Docker daemon" })), "backend-core-unreachable", null);
assertReason(codexSteerTaskForTest("direct_task", ["p", "--no-retry"], () => ({ ok: false, status: 404, body: { ok: false, error: "microservice not found: code-queue" } })), "code-queue-microservice-unregistered", 404);
assertReason(codexSteerTaskForTest("direct_task", ["p", "--no-retry"], () => ({ ok: false, status: 401, body: { ok: false, error: "unauthorized" } })), "proxy-unauthorized", 401);
assertReason(codexSteerTaskForTest("direct_task", ["p", "--no-retry"], () => ({ ok: false, status: 404, body: { ok: false, error: "proxy route not found", path: "/api/microservices/code-queue/proxy/api/tasks/direct_task/steer" } })), "proxy-404", 404);
assertReason(codexSteerTaskForTest("direct_task", ["p", "--no-retry"], () => ({ ok: false, status: 404, body: { ok: false, error: "task not found" } })), "steer-endpoint-404", 404);
assertReason(codexSteerTaskForTest("direct_task", ["p", "--no-retry"], () => ({ ok: false, status: 409, body: { ok: false, error: "task does not have an active steerable turn" } })), "upstream-runtime-rejected", 409);
assertReason(codexSteerTaskForTest("direct_task", ["p", "--no-retry"], () => ({ ok: false, status: 504, body: { ok: false, error: "provider HTTP tunnel timed out or disconnected", stage: "http-tunnel-wait" } })), "stable-proxy-failed", 504);
const abortedTunnelBody = {
ok: false,
error: "provider HTTP tunnel failed",
stage: "provider-gateway-http-fetch",
providerId: "D601",
serviceId: "code-queue",
providerError: "The operation was aborted",
retryable: false,
attempts: [{ attempt: 1, ok: false, durationMs: 30003, timeoutMs: 30000, result: { ok: false, error: "The operation was aborted" } }],
};
let retryCalls = 0;
const retryThenSuccess = codexSteerTaskForTest("direct_task", ["transient correction", "--retry-delay-ms", "0"], () => {
retryCalls += 1;
if (retryCalls === 1) return { ok: false, status: 502, body: abortedTunnelBody };
return {
ok: true,
status: 200,
body: {
ok: true,
task: { id: "direct_task", status: "running", prompt: "hidden" },
queue: { activeTaskIds: ["direct_task"] },
},
};
}) as JsonRecord;
assertCondition(retryCalls === 2, "retryable 502 tunnel abort should be retried once by default", { retryCalls, retryThenSuccess });
assertCondition(nestedRecord(retryThenSuccess, ["steer"]).accepted === true, "retry success should accept steer", retryThenSuccess);
const retrySuccessAttempts = nestedRecord(retryThenSuccess, ["steer"]).attempts;
assertCondition(Array.isArray(retrySuccessAttempts) && retrySuccessAttempts.length === 2, "retry success should expose both attempts", retryThenSuccess);
assertCondition(String(JSON.stringify(retryThenSuccess)).includes("The operation was aborted"), "retry attempts should preserve aborted tunnel evidence", retryThenSuccess);
assertCondition(!String(JSON.stringify(retryThenSuccess)).includes("transient correction"), "retry success must not echo steer prompt", retryThenSuccess);
let exhaustedCalls = 0;
const exhausted = codexSteerTaskForTest("direct_task", ["final correction", "--retry-attempts", "2", "--retry-delay-ms", "0"], () => {
exhaustedCalls += 1;
return { ok: false, status: 502, body: abortedTunnelBody };
}) as JsonRecord;
assertCondition(exhaustedCalls === 2, "retryable 502 tunnel abort should honor retry-attempts", { exhaustedCalls, exhausted });
assertReason(exhausted, "stable-proxy-failed", 502);
const exhaustedDiagnostics = nestedRecord(exhausted, ["diagnostics"]);
const exhaustedAttempts = exhaustedDiagnostics.attempts;
assertCondition(Array.isArray(exhaustedAttempts) && exhaustedAttempts.length === 2, "exhausted retry diagnostics should expose attempts", exhaustedDiagnostics);
assertCondition(String(exhaustedDiagnostics.message || "").includes("The operation was aborted"), "diagnostics should include provider abort message", exhaustedDiagnostics);
assertCondition(nestedRecord(exhaustedDiagnostics, ["operatorGuidance"]).rawProxyEquivalentIsFallback === false, "raw proxy equivalent should be diagnostic, not fallback", exhaustedDiagnostics);
assertCondition(String(nestedRecord(exhausted, ["commands"]).rawProxy || "").includes("microservice proxy code-queue /api/tasks/direct_task/steer"), "failure should still expose raw proxy diagnostic command", exhausted);
return {
ok: true,
@@ -170,6 +215,7 @@ export function runCodeQueueCliSteerContract(): JsonRecord {
"non-dry-run uses stable proxy helper",
"successful steer confirms write without echoing prompt",
"steer failure classification is JSON-consumable",
"retryable tunnel aborts are retried with bounded diagnostics",
],
};
}