diff --git a/src/backend/codex-stdio.ts b/src/backend/codex-stdio.ts index a471394..8c2e224 100644 --- a/src/backend/codex-stdio.ts +++ b/src/backend/codex-stdio.ts @@ -1226,7 +1226,9 @@ function classifyMessageFailureKind(message: string, fallback: FailureKind): Fai if (/invalid[_ -]?prompt/u.test(text) && /invalid function arguments json string|tool_call_id/u.test(text)) return "provider-invalid-tool-call"; if (/invalid function arguments json string/u.test(text)) return "provider-invalid-tool-call"; if (/rate.?limit|too many requests|\b429\b/u.test(text)) return "provider-rate-limited"; - if (/\b401\b|\b403\b|unauthori[sz]ed|forbidden|invalid api key|authentication|auth failed|oauth|access token/u.test(text)) return "provider-auth-failed"; + if (/\b401\b|\b403\b|unauthori[sz]ed|forbidden|invalid api key|api key (?:is )?(?:required|missing)|authentication|auth failed|oauth|access token/u.test(text)) return "provider-auth-failed"; + if (isProviderStreamDisconnectedMessage(text)) return "provider-stream-disconnected"; + if (isProviderHttpErrorMessage(text)) return "provider-http-error"; if (isProviderUnavailableMessage(text)) return "provider-unavailable"; if (/timed out|timeout|idle timeout/u.test(text)) return "backend-timeout"; if (/invalid json|json parse/u.test(text)) return "backend-json-parse-error"; @@ -1237,6 +1239,16 @@ function isProviderCompactUnsupportedMessage(text: string): boolean { return /responses\/compact|\/compact\b/u.test(text) && /\b404\b|not found|unsupported|no route|not implemented/u.test(text); } +function isProviderStreamDisconnectedMessage(text: string): boolean { + return /responsestreamdisconnected|response stream disconnected|stream disconnected before completion|disconnected before completion/u.test(text); +} + +function isProviderHttpErrorMessage(text: string): boolean { + if (/\b(?:http(?:\s+status)?|status(?:\s+code)?|unexpected status|status|code)\s*[:=]?\s*[45]\d\d\b/u.test(text)) return true; + if (/\b[45]\d\d\b/u.test(text) && /http|status|service unavailable|bad gateway|gateway timeout|internal server error|not found|provider|upstream/u.test(text)) return true; + return false; +} + function isProviderUnavailableMessage(text: string): boolean { if (/\b(?:http(?:\s+status)?|status(?:\s+code)?|code)\s*[:=]?\s*5\d\d\b/u.test(text)) return true; if (/\b5\d\d\b/u.test(text) && /service unavailable|bad gateway|gateway timeout|internal server error|provider|upstream|response\s*stream\s*disconnected|responsestreamdisconnected/u.test(text)) return true; diff --git a/src/common/types.ts b/src/common/types.ts index 60a15f6..d96e777 100644 --- a/src/common/types.ts +++ b/src/common/types.ts @@ -19,6 +19,8 @@ export type FailureKind = | "backend-timeout" | "provider-auth-failed" | "provider-rate-limited" + | "provider-stream-disconnected" + | "provider-http-error" | "provider-invalid-tool-call" | "provider-compact-unsupported" | "provider-unavailable" diff --git a/src/selftest/cases/30-codex-stdio.ts b/src/selftest/cases/30-codex-stdio.ts index e2a1aba..8785b37 100644 --- a/src/selftest/cases/30-codex-stdio.ts +++ b/src/selftest/cases/30-codex-stdio.ts @@ -228,9 +228,11 @@ const selfTest: SelfTestCase = async (context) => { await runFailureCase({ client, managerUrl: server.baseUrl, context, mode: "provider-429-terminal", expectedStatus: "failed", expectedFailureKind: "provider-rate-limited" }); await runFailureCase({ client, managerUrl: server.baseUrl, context, mode: "provider-invalid-tool-call", expectedStatus: "failed", expectedFailureKind: "provider-invalid-tool-call" }); await runFailureCase({ client, managerUrl: server.baseUrl, context, mode: "provider-compact-404-terminal", expectedStatus: "failed", expectedFailureKind: "provider-compact-unsupported" }); - await runFailureCase({ client, managerUrl: server.baseUrl, context, mode: "provider-503-rpc-error", expectedStatus: "failed", expectedFailureKind: "provider-unavailable" }); - await runFailureCase({ client, managerUrl: server.baseUrl, context, mode: "provider-503-terminal", expectedStatus: "failed", expectedFailureKind: "provider-unavailable" }); - await runFailureCase({ client, managerUrl: server.baseUrl, context, mode: "provider-503-retry-event", expectedStatus: "failed", expectedFailureKind: "provider-unavailable", expectRetryError: true }); + await runFailureCase({ client, managerUrl: server.baseUrl, context, mode: "provider-stream-disconnected-rpc-error", expectedStatus: "failed", expectedFailureKind: "provider-stream-disconnected" }); + await runFailureCase({ client, managerUrl: server.baseUrl, context, mode: "provider-503-rpc-error", expectedStatus: "failed", expectedFailureKind: "provider-stream-disconnected" }); + await runFailureCase({ client, managerUrl: server.baseUrl, context, mode: "provider-503-terminal", expectedStatus: "failed", expectedFailureKind: "provider-http-error" }); + await runFailureCase({ client, managerUrl: server.baseUrl, context, mode: "provider-unavailable-terminal", expectedStatus: "failed", expectedFailureKind: "provider-unavailable" }); + await runFailureCase({ client, managerUrl: server.baseUrl, context, mode: "provider-503-retry-event", expectedStatus: "failed", expectedFailureKind: "provider-stream-disconnected", expectRetryError: true }); await runFailureCase({ client, managerUrl: server.baseUrl, context, mode: "invalid-json", expectedStatus: "failed", expectedFailureKind: "backend-json-parse-error" }); await runFailureCase({ client, managerUrl: server.baseUrl, context, mode: "missing-terminal", expectedStatus: "failed", expectedFailureKind: "backend-timeout", timeoutMs: 500 }); await runSlowProgressIdleCase({ client, managerUrl: server.baseUrl, context }); @@ -242,7 +244,7 @@ const selfTest: SelfTestCase = async (context) => { await runSessionStorageSubdirCase({ client, managerUrl: server.baseUrl, context }); await runSessionStorageNoSecretLeakCase({ client, managerUrl: server.baseUrl, context }); - return { name: "codex-stdio", tests: ["runner-lease-heartbeat", "runner-lease-conflict-recovery", "codex-stdio-fake-turn", "codex-stdio-k8s-sandbox-override", "codex-stdio-projected-writable-home", "codex-stdio-deepseek-profile-fake-turn", "codex-stdio-dsflash-go-profile-fake-turn", "codex-stdio-dsflash-go-config-metadata", "codex-stdio-minimax-m3-profile-fake-turn", "codex-stdio-deepseek-missing-secret-no-fallback", "codex-stdio-minimax-m3-missing-secret-no-fallback", "codex-stdio-config-model-authoritative", "codex-stdio-explicit-model-forwarded", "codex-stdio-final-agent-message-only", "codex-stdio-web-search-progress", "codex-stdio-stale-thread-resume-failed", "codex-stdio-live-tool-events", "codex-stdio-interrupt-before-turn-start-response", "codex-stdio-hard-timeout-during-tool-progress", "codex-stdio-noisy-reasoning-suppression", "codex-stdio-missing-turn-result", "codex-stdio-provider-auth-failed", "codex-stdio-provider-rate-limited", "codex-stdio-provider-invalid-tool-call", "codex-stdio-provider-compact-unsupported", "codex-stdio-provider-503-rpc-error", "codex-stdio-provider-503-terminal", "codex-stdio-provider-503-retry-event", "codex-stdio-invalid-json", "codex-stdio-timeout", "codex-stdio-idle-timeout-progress-refresh", "codex-stdio-command-failure-keeps-run-open", "codex-stdio-secret-unavailable", "codex-stdio-spawn-failure"] }; + return { name: "codex-stdio", tests: ["runner-lease-heartbeat", "runner-lease-conflict-recovery", "codex-stdio-fake-turn", "codex-stdio-k8s-sandbox-override", "codex-stdio-projected-writable-home", "codex-stdio-deepseek-profile-fake-turn", "codex-stdio-dsflash-go-profile-fake-turn", "codex-stdio-dsflash-go-config-metadata", "codex-stdio-minimax-m3-profile-fake-turn", "codex-stdio-deepseek-missing-secret-no-fallback", "codex-stdio-minimax-m3-missing-secret-no-fallback", "codex-stdio-config-model-authoritative", "codex-stdio-explicit-model-forwarded", "codex-stdio-final-agent-message-only", "codex-stdio-web-search-progress", "codex-stdio-stale-thread-resume-failed", "codex-stdio-live-tool-events", "codex-stdio-interrupt-before-turn-start-response", "codex-stdio-hard-timeout-during-tool-progress", "codex-stdio-noisy-reasoning-suppression", "codex-stdio-missing-turn-result", "codex-stdio-provider-auth-failed", "codex-stdio-provider-rate-limited", "codex-stdio-provider-invalid-tool-call", "codex-stdio-provider-compact-unsupported", "codex-stdio-provider-stream-disconnected", "codex-stdio-provider-503-rpc-error", "codex-stdio-provider-503-terminal", "codex-stdio-provider-unavailable", "codex-stdio-provider-503-retry-event", "codex-stdio-invalid-json", "codex-stdio-timeout", "codex-stdio-idle-timeout-progress-refresh", "codex-stdio-command-failure-keeps-run-open", "codex-stdio-secret-unavailable", "codex-stdio-spawn-failure"] }; } finally { await new Promise((resolve) => server.server.close(() => resolve())); } @@ -362,7 +364,7 @@ async function runFailureDoesNotTerminalRunCase(options: { client: ManagerClient }) as JsonRecord; assert.equal(result.stopped, "idle-timeout", "non one-shot runner should remain alive after a failed command until idle timeout"); assert.equal(result.terminalStatus, "failed"); - assert.equal(result.failureKind, "provider-unavailable"); + assert.equal(result.failureKind, "provider-http-error"); const command = await options.client.get(`/api/v1/runs/${item.runId}/commands/${item.commandId}`) as { state?: string }; assert.equal(command.state, "failed"); const run = await options.client.get(`/api/v1/runs/${item.runId}`) as { status?: string; terminalStatus?: string | null; failureKind?: string | null }; diff --git a/src/selftest/fake-codex-app-server.ts b/src/selftest/fake-codex-app-server.ts index 28be56b..d353f8b 100644 --- a/src/selftest/fake-codex-app-server.ts +++ b/src/selftest/fake-codex-app-server.ts @@ -82,6 +82,10 @@ for await (const line of rl) { respond(message.id, null, { code: -32000, message: "responseStreamDisconnected: HTTP 503 Service Unavailable from provider" }); continue; } + if (mode === "provider-stream-disconnected-rpc-error") { + respond(message.id, null, { code: -32000, message: "stream disconnected before completion: error sending request for url (http://138.2.51.180:8083/responses)" }); + continue; + } if (mode === "provider-401-rpc-error") { respond(message.id, null, { code: -32000, message: "HTTP 401 Unauthorized: invalid api key" }); continue; @@ -122,6 +126,14 @@ for await (const line of rl) { respond(message.id, { turn }); continue; } + if (mode === "provider-unavailable-terminal") { + turnCounter += 1; + const turn = { id: `turn_selftest_${turnCounter}`, status: "failed", error: { message: "provider is temporarily unavailable" } }; + notify("turn/started", { turn: { id: turn.id, status: "running" } }); + notify("turn/completed", { turn }); + respond(message.id, { turn }); + continue; + } if (mode === "provider-429-terminal") { turnCounter += 1; const turn = { id: `turn_selftest_${turnCounter}`, status: "failed", error: { message: "HTTP 429 Too Many Requests: rate limit exceeded" } };