fix: cool down Sub2API large context upstream failures
This commit is contained in:
@@ -147,7 +147,7 @@ bun scripts/cli.ts platform-infra sub2api codex-pool configure-local --confirm
|
||||
- 上游报 capacity/rate-limit/overload/Bad Gateway/Gateway Timeout 后没有切号或频繁先失败再恢复:先确认 `codex-pool validate` 里 `tempUnschedulable.ok=true` 且目标 account `runtimeEnabled=true`、规则数符合 YAML;再看 `validation.gatewayResponses.evidence.failovers` 的 account/upstream status。若 mismatch,跑 `codex-pool sync --confirm`,不要手工 patch Sub2API credentials。
|
||||
- Codex 报 weekly-limit、`less than 10% of your weekly limit left`、`Run /status for a breakdown` 等账号状态/软配额提示并要求切号:把稳定 body 关键词放进 `pool.defaultTempUnschedulable` 的 403 和 429 规则,跑 `codex-pool sync --confirm`,再用 `codex-pool validate` 确认每个 managed account 的 runtime 403/429 rules 都包含这些关键词。Sub2API 临时下线规则按 HTTP status + body keyword 匹配;如果该文案是 HTTP 200 成功内容,需要另提响应分类能力 issue,不能只靠 YAML 冷却规则声明解决。
|
||||
- 上游 503 响应体出现 `model_not_found`、`No available channel for model ...` 或同类稳定模型路由失败文案:把稳定 body 关键词放进 `pool.defaultTempUnschedulable` 的 503 规则,跑 `codex-pool sync --confirm`,再用 `codex-pool validate` 确认目标 account 的 runtime 503 rule 包含这些关键词;不要用 account membership、priority、capacity、loadFactor、WebSocket mode 或 User-Agent 改动掩盖该错误族。
|
||||
- 上游错误反复触发:默认错误冷却按严重程度分层;临时问题可从 10 分钟起步,网关/服务不可用/过载/模型路由类应更长,认证/权限/配额/账号状态类使用最长冷却。`Recovered upstream error ...`、`Bad Gateway`、`Gateway Timeout`、Codex-facing `Upstream request failed`、`Unknown error`、`context deadline exceeded`、`context canceled`、`model_not_found` 和 `No available channel for model` 这类稳定包装文案都应留在 YAML 冷却政策里。具体数值只以 YAML 为准,修改后必须 `codex-pool sync --confirm` 和 `codex-pool validate`。长期判定见 `docs/reference/platform-infra.md`。
|
||||
- 上游错误反复触发:默认错误冷却按严重程度分层;临时问题可从 10 分钟起步,网关/服务不可用/过载/模型路由类应更长,认证/权限/配额/账号状态类使用最长冷却。`Recovered upstream error ...`、`Bad Gateway`、`Gateway Timeout`、Codex-facing `Upstream request failed`、`Unknown error`、`context deadline exceeded`、`context canceled`、`model_not_found`、`No available channel for model`、大上下文 `413` 和 `openai_error` 这类稳定包装文案都应留在 YAML 冷却政策里。具体数值只以 YAML 为准,修改后必须 `codex-pool sync --confirm` 和 `codex-pool validate`。长期判定见 `docs/reference/platform-infra.md`。
|
||||
- Codex auto compact 后丢上下文:先确认本机 `~/.codex/config.toml` 是否有 `supports_websockets = true` 和 `responses_websockets_v2 = true`,再看 `codex-pool validate` 的 WSv2 candidate 和 Sub2API 日志里的 `transport=responses_websockets_v2`。
|
||||
- Codex smoke 有 reconnect/1013:这是上游并发/可用性问题,和 HTTP-only compact context-loss 分开处理;记录 session/log 证据并关联专项 issue,不要用运行时手补覆盖 YAML 容量。
|
||||
|
||||
|
||||
@@ -30,6 +30,10 @@ pool:
|
||||
keywords: [capacity, overloaded, temporarily unavailable, temporary, upstream, bad gateway, upstream request failed, websocket dial, handshake response, recovered upstream error]
|
||||
durationMinutes: 30
|
||||
description: Gateway upstream failures, including recovered upstream error wrappers, should cool down longer.
|
||||
- statusCode: 413
|
||||
keywords: [openai_error, payload too large, request too large, context length, context window, maximum context]
|
||||
durationMinutes: 30
|
||||
description: Large-context upstream failures should cool down the selected account so a larger-context channel can handle the request.
|
||||
- statusCode: 503
|
||||
keywords: [capacity, overloaded, temporarily unavailable, temporary, upstream, recovered upstream error, model_not_found, no available channel for model]
|
||||
durationMinutes: 30
|
||||
@@ -56,6 +60,7 @@ profiles:
|
||||
accountName: unidesk-codex-gptclub
|
||||
configFile: config.toml.gptclub
|
||||
authFile: auth.json.gptclub
|
||||
capacity: 10
|
||||
priority: 100
|
||||
- profile: only
|
||||
accountName: unidesk-codex-only
|
||||
|
||||
@@ -48,6 +48,11 @@ if (parsed.pool?.defaultTempUnschedulable?.enabled === true) {
|
||||
const gateway502Rule = rules.find((rule) => rule.statusCode === 502);
|
||||
const gateway502Keywords = new Set((gateway502Rule?.keywords ?? []).map((keyword) => keyword.toLowerCase()));
|
||||
assertCondition(gateway502Keywords.has("recovered upstream error"), "502 temporary-unschedulable rule must catch recovered upstream error wrappers", gateway502Rule);
|
||||
const largeContext413Rule = rules.find((rule) => rule.statusCode === 413);
|
||||
const largeContext413Keywords = new Set((largeContext413Rule?.keywords ?? []).map((keyword) => keyword.toLowerCase()));
|
||||
for (const keyword of ["openai_error", "context length", "maximum context"]) {
|
||||
assertCondition(largeContext413Keywords.has(keyword), "413 temporary-unschedulable rule must catch large-context upstream failures", { keyword, largeContext413Rule });
|
||||
}
|
||||
const gateway504Rule = rules.find((rule) => rule.statusCode === 504);
|
||||
const gateway504Keywords = new Set((gateway504Rule?.keywords ?? []).map((keyword) => keyword.toLowerCase()));
|
||||
for (const keyword of ["gateway timeout", "unknown error", "context deadline exceeded"]) {
|
||||
@@ -78,6 +83,7 @@ console.log(JSON.stringify({
|
||||
"optional WebSocket mode overrides use supported values",
|
||||
"temporary unschedulable rules are structurally valid when enabled",
|
||||
"generic recovered upstream error wrappers are caught by cooldown rules",
|
||||
"large-context upstream failures are caught by the 413 cooldown rule",
|
||||
"gateway timeout wrappers are caught by the 504 cooldown rule",
|
||||
"Codex weekly-limit prompts are caught by account-state and quota cooldown rules",
|
||||
"upstream model-routing failures are caught by the 503 cooldown rule",
|
||||
|
||||
@@ -31,6 +31,7 @@ const accountState403Rule = rules.find((rule) => rule.error_code === 403);
|
||||
const quota429Rule = rules.find((rule) => rule.error_code === 429);
|
||||
const serviceUnavailable503Rule = rules.find((rule) => rule.error_code === 503);
|
||||
const gatewayTimeout504Rule = rules.find((rule) => rule.error_code === 504);
|
||||
const largeContext413Rule = rules.find((rule) => rule.error_code === 413);
|
||||
for (const keyword of ["weekly limit", "less than 10% of your weekly limit left", "run /status for a breakdown"]) {
|
||||
assertCondition(accountState403Rule?.keywords?.includes(keyword), "403 rendered rule must preserve Codex weekly-limit account-state keyword", { keyword, accountState403Rule });
|
||||
assertCondition(quota429Rule?.keywords?.includes(keyword), "429 rendered rule must preserve Codex weekly-limit quota keyword", { keyword, quota429Rule });
|
||||
@@ -38,6 +39,9 @@ for (const keyword of ["weekly limit", "less than 10% of your weekly limit left"
|
||||
for (const keyword of ["model_not_found", "no available channel for model"]) {
|
||||
assertCondition(serviceUnavailable503Rule?.keywords?.includes(keyword), "503 rendered rule must catch upstream model-routing failures", { keyword, serviceUnavailable503Rule });
|
||||
}
|
||||
for (const keyword of ["openai_error", "context length", "maximum context"]) {
|
||||
assertCondition(largeContext413Rule?.keywords?.includes(keyword), "413 rendered rule must catch large-context upstream failures", { keyword, largeContext413Rule });
|
||||
}
|
||||
for (const keyword of ["gateway timeout", "unknown error", "context deadline exceeded"]) {
|
||||
assertCondition(gatewayTimeout504Rule?.keywords?.includes(keyword), "504 rendered rule must preserve gateway-timeout cooldown keyword", { keyword, gatewayTimeout504Rule });
|
||||
}
|
||||
@@ -56,6 +60,7 @@ console.log(JSON.stringify({
|
||||
"temporary unschedulable policy renders to Sub2API credential field names",
|
||||
"temporary unschedulable rendering follows the input policy without hard-coded policy gates",
|
||||
"Codex weekly-limit prompt keywords render into 403 and 429 cooldown rules",
|
||||
"large-context upstream failures render into the 413 cooldown rule",
|
||||
"upstream model-routing failures render into the 503 cooldown rule",
|
||||
"gateway timeout wrappers render into the 504 cooldown rule",
|
||||
"disabled policies clear runtime rules",
|
||||
|
||||
@@ -682,6 +682,12 @@ export function defaultCodexTempUnschedulablePolicy(): CodexTempUnschedulablePol
|
||||
durationMinutes: 30,
|
||||
description: "Gateway upstream failures, including recovered upstream error wrappers, should cool down longer.",
|
||||
},
|
||||
{
|
||||
statusCode: 413,
|
||||
keywords: ["openai_error", "payload too large", "request too large", "context length", "context window", "maximum context"],
|
||||
durationMinutes: 30,
|
||||
description: "Large-context upstream failures should cool down the selected account so a larger-context channel can handle the request.",
|
||||
},
|
||||
{
|
||||
statusCode: 503,
|
||||
keywords: ["capacity", "overloaded", "temporarily unavailable", "temporary", "upstream", "recovered upstream error", "model_not_found", "no available channel for model"],
|
||||
|
||||
Reference in New Issue
Block a user