diff --git a/.agents/skills/unidesk-sub2api/SKILL.md b/.agents/skills/unidesk-sub2api/SKILL.md index 12c115e1..a7d4c48c 100644 --- a/.agents/skills/unidesk-sub2api/SKILL.md +++ b/.agents/skills/unidesk-sub2api/SKILL.md @@ -129,7 +129,7 @@ bun scripts/cli.ts platform-infra sub2api codex-pool configure-local --confirm - `sub2api status`:Deployment/StatefulSet/Service/Secret 可见,运行镜像与 YAML 一致。 - `sub2api validate`:app、PostgreSQL、Redis 和 service proxy 基础检查通过。 -- `codex-pool validate`:统一 key 的 `GET /v1/models` 成功,owner balance / owner concurrency 已满足 YAML 最小值,capacity、WebSocket v2 和 temporary-unschedulable 运行时状态与 YAML 对齐。 +- `codex-pool validate`:统一 key 的 `GET /v1/models` 成功,并用 `localCodex.responsesSmokeModel` 跑一次小的 `POST /v1/responses` smoke;owner balance / owner concurrency 已满足 YAML 最小值,capacity、WebSocket v2 和 temporary-unschedulable 运行时状态与 YAML 对齐。若 Responses smoke `outcome=succeeded-with-failover`,说明请求已恢复但仍有账号级上游 5xx 需要按 evidence 继续降频或冷却。 - 若 `publicExposure.enabled=true`,确认 FRP path 可用;`expose --confirm` 会用未带 key 的 public `/v1/models` 401 作为网关可达性探针。 如果要证明真实模型请求可用,使用最小 `/v1/responses` 或等价 Codex smoke。不要把 group-level `/v1/models` 成功解释成每个上游 account 都健康。 @@ -143,7 +143,7 @@ bun scripts/cli.ts platform-infra sub2api codex-pool configure-local --confirm - 上游需要 WebSocket v2:只给该 profile 配 `openaiResponsesWebSocketsV2Mode: ctx_pool|passthrough`,跑 `sync --confirm`;把它当 capability candidate,容量仍以 YAML 中的 `capacity` 或默认值为准。 - Codex 启动 WebSocket 回退:用原入口 Codex smoke 复现,再用 bounded Sub2API 日志确认 account;对 WS handshake 5xx 的账号关闭 YAML WSv2 能力后同步,不把临时可用性推断写成调度配置。 - 上游要求 Codex User-Agent:只给该 profile 配 `upstreamUserAgent`,跑 `sync --confirm`。 -- 上游报 capacity/rate-limit/overload 后没有切号:先确认 `codex-pool validate` 里 `tempUnschedulable.ok=true` 且目标 account `runtimeEnabled=true`、规则数符合 YAML;若 mismatch,跑 `codex-pool sync --confirm`,不要手工 patch Sub2API credentials。 +- 上游报 capacity/rate-limit/overload/Bad Gateway 后没有切号或频繁先失败再恢复:先确认 `codex-pool validate` 里 `tempUnschedulable.ok=true` 且目标 account `runtimeEnabled=true`、规则数符合 YAML;再看 `validation.gatewayResponses.evidence.failovers` 的 account/upstream status。若 mismatch,跑 `codex-pool sync --confirm`,不要手工 patch Sub2API credentials。 - Codex auto compact 后丢上下文:先确认本机 `~/.codex/config.toml` 是否有 `supports_websockets = true` 和 `responses_websockets_v2 = true`,再看 `codex-pool validate` 的 WSv2 candidate 和 Sub2API 日志里的 `transport=responses_websockets_v2`。 - Codex smoke 有 reconnect/1013:这是上游并发/可用性问题,和 HTTP-only compact context-loss 分开处理;记录 session/log 证据并关联专项 issue,不要用运行时手补覆盖 YAML 容量。 diff --git a/config/platform-infra/sub2api-codex-pool.yaml b/config/platform-infra/sub2api-codex-pool.yaml index 74ac0ef8..709a17f1 100644 --- a/config/platform-infra/sub2api-codex-pool.yaml +++ b/config/platform-infra/sub2api-codex-pool.yaml @@ -26,7 +26,7 @@ pool: durationMinutes: 5 description: Transient upstream server failures should prefer another account for a short period. - statusCode: 502 - keywords: [capacity, overloaded, temporarily unavailable, temporary, upstream] + keywords: [capacity, overloaded, temporarily unavailable, temporary, upstream, bad gateway, upstream request failed, websocket dial, handshake response] durationMinutes: 5 description: Gateway upstream failures should prefer another account for a short period. - statusCode: 503 @@ -109,3 +109,4 @@ localCodex: wireApi: responses supportsWebSockets: true responsesWebSocketsV2: true + responsesSmokeModel: gpt-5.5 diff --git a/docs/reference/platform-infra.md b/docs/reference/platform-infra.md index 4e0e6d64..8b2305cd 100644 --- a/docs/reference/platform-infra.md +++ b/docs/reference/platform-infra.md @@ -34,7 +34,7 @@ - `profiles.entries[].openaiResponsesWebSocketsV2Mode` is the account-level Responses WebSocket v2 switch for OpenAI-compatible upstreams that require WebSocket transport. Allowed values are `off`, `ctx_pool`, and `passthrough`; omit the field unless that upstream needs it. - `profiles.entries[].upstreamUserAgent` is an optional account-level upstream request User-Agent override. Use it only for upstreams that require a Codex CLI compatible User-Agent; keep the value YAML-controlled and newline-free. - `publicExposure` controls the optional FRP bridge from master server to the G14 ClusterIP service. -- `localCodex` controls how the master server's current `~/.codex` consumer files are backed up and rewritten. Codex consumers using Sub2API must keep `supportsWebSockets` and `responsesWebSocketsV2` enabled so compacted long sessions can continue through the Responses WebSocket v2 response chain instead of falling back to HTTP-only summary context. +- `localCodex` controls how the master server's current `~/.codex` consumer files are backed up and rewritten. Codex consumers using Sub2API must keep `supportsWebSockets` and `responsesWebSocketsV2` enabled so compacted long sessions can continue through the Responses WebSocket v2 response chain instead of falling back to HTTP-only summary context. `localCodex.responsesSmokeModel` is the YAML-declared model used by `codex-pool validate` for the lightweight `POST /v1/responses` smoke. Enable account-level WebSocket v2 only for upstream profiles that have passed a direct Codex WSv2 probe. Treat this as a YAML-declared capability set, not a hard scheduling pin to one profile; `codex-pool validate` must show at least one current `webSocketsV2.schedulableEnabled` account, and runtime smoke remains the availability proof. The same validation reports each managed account's runtime WebSocket v2 mode and whether it matches YAML, so stale `ctx_pool` settings cannot silently keep routing Codex WS sessions to an upstream that closes with `no available account`, WS handshake 5xx, or before `response.completed`. @@ -42,7 +42,7 @@ When Codex startup repeatedly reports WebSocket reconnects or HTTPS fallback, pr Do not encode current availability assumptions in long-term reference prose. If an account needs a higher concurrency than `pool.defaultAccountCapacity`, make that a deliberate YAML override and verify it with `codex-pool validate`; the reference document should describe the rule, not repeat the current numeric value. -Do not enable Sub2API `pool_mode` for UniDesk-managed Codex accounts. `pool_mode` retries the same selected account path, while UniDesk's desired failover behavior is to mark the failing account temporarily unschedulable and let Sub2API choose another account from the group. `codex-pool validate` reports each managed account's temporary-unschedulable runtime alignment and should be used after `codex-pool sync --confirm`. +Do not enable Sub2API `pool_mode` for UniDesk-managed Codex accounts. `pool_mode` retries the same selected account path, while UniDesk's desired failover behavior is to mark the failing account temporarily unschedulable and let Sub2API choose another account from the group. `codex-pool validate` reports each managed account's temporary-unschedulable runtime alignment and should be used after `codex-pool sync --confirm`. Generic 502 bodies such as `Bad Gateway` and Codex-facing `Upstream request failed` must stay in the YAML cooldown policy so an intermittently bad account is cooled down instead of repeatedly adding latency at the next compact or Responses request. The request path is: @@ -69,7 +69,7 @@ Kubernetes readiness is not the same as pool availability: - The Sub2API app, PostgreSQL, and Redis manifests include container-level health probes. These only prove the pods and local dependencies are healthy enough for Kubernetes scheduling. - The FRP client deployment is currently a simple connector deployment and does not itself prove that master-local traffic reaches Sub2API. - No scheduled `CronJob`, `ServiceMonitor`, or `PodMonitor` currently proves the full unified Codex API path. -- `platform-infra sub2api validate` and `platform-infra sub2api codex-pool validate` are on-demand checks. Operational usage is documented in `$unidesk-sub2api`; they are acceptable for deployment closeout, but they are not continuous monitoring. +- `platform-infra sub2api validate` and `platform-infra sub2api codex-pool validate` are on-demand checks. Operational usage is documented in `$unidesk-sub2api`; they are acceptable for deployment closeout, but they are not continuous monitoring. `codex-pool validate` must test both `GET /v1/models` and a small `POST /v1/responses` request, and the Responses smoke should report request id, selected/final account evidence, upstream failover count, and whether the validation succeeded only after failover. When an automatic availability probe is added, it should be YAML-controlled and cover these layers without printing secrets: diff --git a/scripts/platform-infra-sub2api-codex-routing-contract-test.ts b/scripts/platform-infra-sub2api-codex-routing-contract-test.ts index 60011469..ea56e4b2 100644 --- a/scripts/platform-infra-sub2api-codex-routing-contract-test.ts +++ b/scripts/platform-infra-sub2api-codex-routing-contract-test.ts @@ -16,6 +16,7 @@ const parsed = Bun.YAML.parse(readFileSync(configPath, "utf8")) as { }; }; profiles?: { entries?: Array<{ profile?: string; accountName?: string; capacity?: number; openaiResponsesWebSocketsV2Mode?: string | null }> }; + localCodex?: { responsesSmokeModel?: string }; }; const entries = parsed.profiles?.entries ?? []; @@ -40,7 +41,11 @@ if (parsed.pool?.defaultTempUnschedulable?.enabled === true) { assertCondition(rules.every((rule) => Number.isInteger(rule.statusCode) && (rule.statusCode ?? 0) >= 100 && (rule.statusCode ?? 0) <= 599), "temporary unschedulable rules must declare valid HTTP status codes", rules); assertCondition(rules.every((rule) => Array.isArray(rule.keywords) && rule.keywords.length > 0), "temporary unschedulable rules must declare non-empty keywords", rules); assertCondition(rules.every((rule) => Number.isInteger(rule.durationMinutes) && (rule.durationMinutes ?? 0) > 0), "temporary unschedulable rules must declare positive cooldown durations", rules); + const gateway502Rule = rules.find((rule) => rule.statusCode === 502); + const gateway502Keywords = new Set((gateway502Rule?.keywords ?? []).map((keyword) => keyword.toLowerCase())); + assertCondition(gateway502Keywords.has("bad gateway") && gateway502Keywords.has("upstream request failed"), "502 temporary-unschedulable rule must catch generic gateway failures", gateway502Rule); } +assertCondition(typeof parsed.localCodex?.responsesSmokeModel === "string" && parsed.localCodex.responsesSmokeModel.length > 0, "localCodex.responsesSmokeModel must be declared for Responses smoke validation", parsed.localCodex); console.log(JSON.stringify({ ok: true, @@ -49,5 +54,7 @@ console.log(JSON.stringify({ "pool owner concurrency covers the YAML account capacity set", "optional WebSocket mode overrides use supported values", "temporary unschedulable rules are structurally valid when enabled", + "generic 502 gateway failures cool down the selected account", + "Responses smoke model is YAML-declared", ], })); diff --git a/scripts/src/platform-infra-sub2api-codex.ts b/scripts/src/platform-infra-sub2api-codex.ts index 279c623f..844ba9ff 100644 --- a/scripts/src/platform-infra-sub2api-codex.ts +++ b/scripts/src/platform-infra-sub2api-codex.ts @@ -128,6 +128,7 @@ interface CodexPoolLocalCodexConfig { wireApi: string; supportsWebSockets: boolean; responsesWebSocketsV2: boolean; + responsesSmokeModel: string; } interface CodexLocalConsumerTomlOptions { @@ -408,6 +409,7 @@ async function codexPoolConfigureLocal(config: UniDeskConfig, options: ConfirmOp wireApi: pool.localCodex.wireApi, supportsWebSockets: pool.localCodex.supportsWebSockets, responsesWebSocketsV2: pool.localCodex.responsesWebSocketsV2, + responsesSmokeModel: pool.localCodex.responsesSmokeModel, valuesPrinted: false, }, next: { @@ -632,6 +634,7 @@ function defaultCodexPoolConfig(): CodexPoolConfig { wireApi: "responses", supportsWebSockets: true, responsesWebSocketsV2: true, + responsesSmokeModel: "gpt-5.5", }, }; } @@ -666,7 +669,7 @@ export function defaultCodexTempUnschedulablePolicy(): CodexTempUnschedulablePol }, { statusCode: 502, - keywords: ["capacity", "overloaded", "temporarily unavailable", "temporary", "upstream"], + keywords: ["capacity", "overloaded", "temporarily unavailable", "temporary", "upstream", "bad gateway", "upstream request failed", "websocket dial", "handshake response"], durationMinutes: 5, description: "Gateway upstream failures should prefer another account for a short period.", }, @@ -884,10 +887,12 @@ function readLocalCodexConfig(value: unknown, defaults: CodexPoolLocalCodexConfi wireApi: stringValue(value.wireApi) ?? defaults.wireApi, supportsWebSockets: readBooleanConfig(value.supportsWebSockets, "localCodex.supportsWebSockets", defaults.supportsWebSockets), responsesWebSocketsV2: readBooleanConfig(value.responsesWebSocketsV2, "localCodex.responsesWebSocketsV2", defaults.responsesWebSocketsV2), + responsesSmokeModel: stringValue(value.responsesSmokeModel) ?? defaults.responsesSmokeModel, }; if (!/^[A-Za-z0-9._-]+$/u.test(config.backupSuffix)) throw new Error(`${codexPoolConfigPath}.localCodex.backupSuffix has an unsupported format`); validateProxyName(config.providerName, "localCodex.providerName"); validateProxyName(config.wireApi, "localCodex.wireApi"); + validateModelName(config.responsesSmokeModel, "localCodex.responsesSmokeModel"); return config; } @@ -901,6 +906,10 @@ function validateProxyName(value: string, key: string): void { if (!/^[A-Za-z0-9._-]+$/u.test(value)) throw new Error(`${codexPoolConfigPath}.${key} has an unsupported format`); } +function validateModelName(value: string, key: string): void { + if (!/^[A-Za-z0-9._:-]+$/u.test(value)) throw new Error(`${codexPoolConfigPath}.${key} has an unsupported model name`); +} + function validatePublicHostname(value: string, key: string): void { if (value.length > 253 || !/^[A-Za-z0-9.-]+$/u.test(value) || value.startsWith(".") || value.endsWith(".")) { throw new Error(`${codexPoolConfigPath}.${key} has an unsupported hostname format`); @@ -1710,6 +1719,7 @@ POOL_API_KEY_SECRET_KEY = "${pool.apiKeySecretKey}" MIN_OWNER_BALANCE_USD = ${JSON.stringify(pool.minOwnerBalanceUsd)} MIN_OWNER_CONCURRENCY = ${JSON.stringify(pool.minOwnerConcurrency)} POOL_DEFAULT_ACCOUNT_CAPACITY = ${JSON.stringify(pool.defaultAccountCapacity)} +RESPONSES_SMOKE_MODEL = ${JSON.stringify(pool.localCodex.responsesSmokeModel)} EXPECTED_ACCOUNT_CAPACITIES = ${JSON.stringify(desiredAccountCapacityMap(pool))} EXPECTED_ACCOUNT_WS_MODES = json.loads(${JSON.stringify(JSON.stringify(desiredAccountWebSocketsV2ModeMap(pool)))}) EXPECTED_ACCOUNT_TEMP_UNSCHEDULABLE = json.loads(${JSON.stringify(JSON.stringify(desiredAccountTempUnschedulableMap(pool)))}) @@ -2165,6 +2175,117 @@ def validate_gateway(api_key): "valuesPrinted": False, } +def response_output_preview(parsed): + if not isinstance(parsed, dict): + return "" + if isinstance(parsed.get("output_text"), str): + return parsed["output_text"][:240] + output = parsed.get("output") + if not isinstance(output, list): + return "" + parts = [] + for item in output: + if not isinstance(item, dict): + continue + content = item.get("content") + if not isinstance(content, list): + continue + for block in content: + if not isinstance(block, dict): + continue + text_value = block.get("text") + if isinstance(text_value, str) and text_value: + parts.append(text_value) + return "\\n".join(parts)[:240] + +def request_log_evidence(request_id): + proc = kubectl(["-n", NAMESPACE, "logs", "deployment/sub2api", "--since=5m", "--tail=800"]) + stdout = proc.stdout.decode("utf-8", errors="replace") + lines = [line for line in stdout.splitlines() if request_id in line] + failovers = [] + final = None + for line in lines: + json_start = line.find("{") + if json_start < 0: + continue + try: + item = json.loads(line[json_start:]) + except Exception: + continue + if "upstream_failover_switching" in line: + failovers.append({ + "accountId": item.get("account_id"), + "upstreamStatus": item.get("upstream_status"), + "switchCount": item.get("switch_count"), + "maxSwitches": item.get("max_switches"), + }) + if "http request completed" in line: + final = { + "accountId": item.get("account_id"), + "statusCode": item.get("status_code"), + "latencyMs": item.get("latency_ms"), + "path": item.get("path"), + } + return { + "requestId": request_id, + "matchedLogLineCount": len(lines), + "failovers": failovers, + "final": final, + "logsExitCode": proc.returncode, + "logsStderr": text(proc.stderr, 1000), + } + +def validate_gateway_responses(api_key): + request_id = "unidesk-codex-pool-validate-" + str(int(time.time() * 1000)) + payload = { + "model": RESPONSES_SMOKE_MODEL, + "input": "Reply exactly: unidesk-sub2api-validate-ok", + "stream": False, + "store": False, + "max_output_tokens": 32, + } + body = json.dumps(payload, separators=(",", ":")).encode("utf-8") + script = r''' +set -eu +token="$1" +request_id="$2" +tmp="$(mktemp)" +trap 'rm -f "$tmp"' EXIT +cat > "$tmp" +curl -sS -w '\\n__HTTP_CODE__:%{http_code}' -X POST \ + -H "Authorization: Bearer $token" \ + -H 'Content-Type: application/json' \ + -H "X-Request-ID: $request_id" \ + -H "OpenAI-Client-Request-ID: $request_id" \ + --data-binary @"$tmp" \ + http://127.0.0.1:8080/v1/responses +''' + started = time.time() + proc = run([ + "kubectl", "-n", NAMESPACE, "exec", "-i", APP_POD, + "--", "sh", "-c", script, "sh", api_key, request_id, + ], body) + resp = parse_curl_output(proc) + evidence = request_log_evidence(request_id) + parsed = resp.get("json") + failover_count = len(evidence.get("failovers") or []) + return { + "ok": resp.get("ok"), + "degraded": failover_count > 0, + "outcome": "succeeded-with-failover" if resp.get("ok") and failover_count > 0 else ("succeeded" if resp.get("ok") else "failed"), + "httpStatus": resp.get("httpStatus"), + "transportExitCode": resp.get("transportExitCode"), + "method": "POST /v1/responses", + "model": RESPONSES_SMOKE_MODEL, + "requestId": request_id, + "durationMs": int((time.time() - started) * 1000), + "outputTextPreview": response_output_preview(parsed), + "bodyPreview": "" if resp.get("ok") else text(resp.get("body", ""), 800), + "stderr": resp.get("stderr", ""), + "evidence": evidence, + "valuesPrinted": False, + } + def bool_value(value): if isinstance(value, bool): return value @@ -2413,8 +2534,10 @@ def run_sync(): owner_balance = ensure_pool_owner_balance(token, api_key_result["userId"]) owner_concurrency = ensure_pool_owner_concurrency(token, api_key_result["userId"]) gateway = validate_gateway(api_key) + responses_smoke = validate_gateway_responses(api_key) return { - "ok": gateway["ok"] is True and owner_concurrency["ok"] is True and capacity_status["ok"] is True and ws_v2_status["ok"] is True and temp_unschedulable_status["ok"] is True, + "ok": gateway["ok"] is True and responses_smoke["ok"] is True and owner_concurrency["ok"] is True and capacity_status["ok"] is True and ws_v2_status["ok"] is True and temp_unschedulable_status["ok"] is True, + "degraded": bool(responses_smoke.get("degraded")), "mode": "sync", "namespace": NAMESPACE, "serviceDns": SERVICE_DNS, @@ -2447,7 +2570,7 @@ def run_sync(): }, "ownerBalance": owner_balance, "ownerConcurrency": owner_concurrency, - "validation": {"gatewayModels": gateway}, + "validation": {"gatewayModels": gateway, "gatewayResponses": responses_smoke}, } def run_validate(): @@ -2465,8 +2588,10 @@ def run_validate(): ws_v2_status = account_ws_v2_status(token) temp_unschedulable_status = account_temp_unschedulable_status(token) gateway = validate_gateway(api_key) + responses_smoke = validate_gateway_responses(api_key) return { - "ok": gateway["ok"] is True and (owner_concurrency is None or owner_concurrency["ok"] is True) and capacity_status["ok"] is True and ws_v2_status["ok"] is True and temp_unschedulable_status["ok"] is True, + "ok": gateway["ok"] is True and responses_smoke["ok"] is True and (owner_concurrency is None or owner_concurrency["ok"] is True) and capacity_status["ok"] is True and ws_v2_status["ok"] is True and temp_unschedulable_status["ok"] is True, + "degraded": bool(responses_smoke.get("degraded")), "mode": "validate", "namespace": NAMESPACE, "serviceDns": SERVICE_DNS, @@ -2484,7 +2609,7 @@ def run_validate(): "capacity": capacity_status, "webSocketsV2": ws_v2_status, "tempUnschedulable": temp_unschedulable_status, - "validation": {"gatewayModels": gateway}, + "validation": {"gatewayModels": gateway, "gatewayResponses": responses_smoke}, } try: