fix: extend Sub2API compact proxy timeout
This commit is contained in:
@@ -105,6 +105,7 @@ bun scripts/cli.ts platform-infra sub2api codex-pool expose --confirm
|
||||
|
||||
- 由 `publicExposure` YAML 控制。默认公共端是 `publicBaseUrl`,master 本地消费端是 `masterBaseUrl`。
|
||||
- `expose --confirm` 只为 YAML 指定的 `remotePort` 补 master `frps` allow port,并在 G14 创建/更新 `sub2api-frpc`。
|
||||
- master Caddy site 也由 `publicExposure.masterCaddy` 渲染;`responseHeaderTimeoutSeconds` 必须足够覆盖 Codex `/responses/compact` 长请求,避免 Caddy 先返回 504 而 Sub2API 后台实际稍后成功。
|
||||
- 同一个 FRP TCP 入口同时暴露 OpenAI-compatible API 和 Sub2API 管理 UI `/login`。不要另开第二个管理端口,除非 YAML 明确声明新的暴露决策。
|
||||
- Sub2API Kubernetes Service 继续保持 ClusterIP。
|
||||
|
||||
@@ -140,7 +141,8 @@ bun scripts/cli.ts platform-infra sub2api codex-pool configure-local --confirm
|
||||
|
||||
- profile invalid:先修 `~/.codex/config.toml.<profile>` 的 `base_url`、`wire_api`、`model` 或 `auth.json.<profile>` 的 API key;不要在 YAML 中写密钥。
|
||||
- pool key 401:跑 `codex-pool sync --confirm` 重建 Sub2API key 与 k3s Secret 绑定,再跑 `codex-pool validate`。
|
||||
- FRP 不通:先看 `codex-pool expose --confirm` 输出的 `masterFrps`、`sub2api-frpc` 和 public 401 probe;需要低层证据时只用 `trans G14:k3s` 做 bounded 查询。
|
||||
- FRP 不通:先看 `codex-pool expose --confirm` 输出的 `masterFrps`、`masterCaddy`、`sub2api-frpc` 和 public 401 probe;需要低层证据时只用 `trans G14:k3s` 做 bounded 查询。
|
||||
- `/responses/compact` 约 30 秒后返回 504 但 Sub2API 日志稍后记录 `codex.remote_compact.succeeded` 时,优先检查 master Caddy `response_header_timeout` 是否由 YAML `publicExposure.masterCaddy.responseHeaderTimeoutSeconds` 渲染,修正后跑 `codex-pool expose --confirm`;这类边缘代理超时不会触发 Sub2API 账号级临时下线。
|
||||
- default profile 递归:检查 YAML default entry 是否使用 `*.pre-sub2api` 备份文件;必要时恢复备份后重新 `configure-local --confirm`。
|
||||
- 上游需要 WebSocket v2:先做 direct Codex WSv2 probe;通过后才给该 profile 配 `openaiResponsesWebSocketsV2Mode: ctx_pool|passthrough` 并跑 `sync --confirm`;把它当 capability candidate,容量仍以 YAML 中的 `capacity` 或默认值为准。
|
||||
- Codex 启动 WebSocket 回退:用原入口 Codex smoke 复现,再用 bounded Sub2API 日志确认 account;对 WS handshake 4xx/5xx、`openai.websocket_account_select_failed` 或 close-before-`response.completed` 的账号关闭 YAML WSv2 能力后同步。若没有剩余 WSv2-capable account,把 `localCodex.supportsWebSockets` 和 `localCodex.responsesWebSocketsV2` 一起关掉,不把临时可用性推断写成调度配置。
|
||||
|
||||
@@ -116,6 +116,7 @@ publicExposure:
|
||||
configPath: /etc/caddy/Caddyfile
|
||||
serviceName: caddy
|
||||
upstreamBaseUrl: http://127.0.0.1:21880
|
||||
responseHeaderTimeoutSeconds: 180
|
||||
localCodex:
|
||||
backupSuffix: pre-sub2api
|
||||
providerName: OpenAI
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
- `profiles.entries[].openaiResponsesWebSocketsV2Mode` is the account-level Responses WebSocket v2 switch for OpenAI-compatible upstreams that require WebSocket transport. Allowed values are `off`, `ctx_pool`, and `passthrough`; omit the field unless that upstream needs it.
|
||||
- `profiles.entries[].upstreamUserAgent` is an optional account-level upstream request User-Agent override. Use it only for upstreams that require a Codex CLI compatible User-Agent; keep the value YAML-controlled and newline-free.
|
||||
- `publicExposure` controls the optional FRP bridge from master server to the G14 ClusterIP service.
|
||||
- `publicExposure.masterCaddy.responseHeaderTimeoutSeconds` controls the master Caddy `response_header_timeout` for the public Sub2API site. It must be long enough for Codex `/responses/compact` requests; otherwise Caddy can return a client-visible 504 before Sub2API finishes the upstream compact request, and that edge timeout is not an account-level upstream failure that Sub2API can use for temporary-unschedulable failover.
|
||||
- `localCodex` controls how the master server's current `~/.codex` consumer files are backed up and rewritten. Keep `supportsWebSockets` and `responsesWebSocketsV2` in the same state, and enable them only when at least one YAML-managed account has a current direct Codex WSv2 smoke that passes. If no upstream profile can sustain Responses WSv2, the honest long-term state is `false/false` so Codex uses HTTP Responses directly instead of repeatedly reconnecting before `response.completed`. `localCodex.responsesSmokeModel` is the YAML-declared model used by `codex-pool validate` for the lightweight `POST /v1/responses` smoke.
|
||||
|
||||
Enable account-level WebSocket v2 only for upstream profiles that have passed a direct Codex WSv2 probe. Treat this as a YAML-declared capability set, not a hard scheduling pin to one profile; if `localCodex` enables WebSocket transport, `codex-pool validate` must show at least one current `webSocketsV2.schedulableEnabled` account, and runtime smoke remains the availability proof. The same validation reports each managed account's runtime WebSocket v2 mode and whether it matches YAML, so stale `ctx_pool` / `passthrough` settings cannot silently keep routing Codex WS sessions to an upstream that closes with `no available account`, WS handshake 5xx/4xx, or before `response.completed`.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { renderCodexLocalConsumerToml } from "./src/platform-infra-sub2api-codex";
|
||||
import { renderCaddySiteBlock, renderCodexLocalConsumerToml } from "./src/platform-infra-sub2api-codex";
|
||||
|
||||
function assertCondition(condition: unknown, message: string, detail: unknown = {}): void {
|
||||
if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`);
|
||||
@@ -58,11 +58,19 @@ const disabled = renderCodexLocalConsumerToml(existing, {
|
||||
assertCondition(disabled.includes("supports_websockets = false"), "disabled localCodex policy must render provider WebSocket transport off", disabled);
|
||||
assertCondition(disabled.includes("responses_websockets_v2 = false"), "disabled localCodex policy must render Responses WebSocket v2 off", disabled);
|
||||
|
||||
const caddyBlock = renderCaddySiteBlock("sub2api.example.test", "http://127.0.0.1:21880", 180);
|
||||
|
||||
assertCondition(caddyBlock.includes("sub2api.example.test {"), "Caddy site block must use the configured domain", caddyBlock);
|
||||
assertCondition(caddyBlock.includes("reverse_proxy 127.0.0.1:21880"), "Caddy site block must use the configured local upstream", caddyBlock);
|
||||
assertCondition(caddyBlock.includes("response_header_timeout 180s"), "Caddy response header timeout must allow long Codex compact requests", caddyBlock);
|
||||
assertCondition(!caddyBlock.includes("response_header_timeout 30s"), "Caddy site block must not retain the old 30s compact timeout", caddyBlock);
|
||||
|
||||
console.log(JSON.stringify({
|
||||
ok: true,
|
||||
checks: [
|
||||
"existing Codex TOML is upgraded to the Sub2API WSv2 consumer settings",
|
||||
"fresh Codex TOML creates provider and feature sections with WSv2 enabled",
|
||||
"disabled localCodex WebSocket policy renders both consumer flags off",
|
||||
"Caddy site block uses the YAML-controlled long response-header timeout",
|
||||
],
|
||||
}));
|
||||
|
||||
@@ -18,6 +18,7 @@ const parsed = Bun.YAML.parse(readFileSync(configPath, "utf8")) as {
|
||||
};
|
||||
};
|
||||
profiles?: { entries?: Array<{ profile?: string; accountName?: string; capacity?: number; loadFactor?: number; openaiResponsesWebSocketsV2Mode?: string | null }> };
|
||||
publicExposure?: { masterCaddy?: { responseHeaderTimeoutSeconds?: number } };
|
||||
localCodex?: { supportsWebSockets?: boolean; responsesWebSocketsV2?: boolean; responsesSmokeModel?: string };
|
||||
};
|
||||
|
||||
@@ -39,6 +40,11 @@ assertCondition(entries.every((entry) => typeof entry.profile === "string" && en
|
||||
assertCondition(entries.every((entry) => typeof entry.accountName === "string" && entry.accountName.length > 0), "profile entries must declare account names", entries);
|
||||
assertCondition(entries.every((entry) => entry.capacity === undefined || (Number.isInteger(entry.capacity) && entry.capacity > 0)), "profile capacity overrides must be positive integers when declared", entries);
|
||||
assertCondition(entries.every((entry) => entry.loadFactor === undefined || (Number.isInteger(entry.loadFactor) && entry.loadFactor > 0)), "profile load factor overrides must be positive integers when declared", entries);
|
||||
assertCondition(
|
||||
Number.isInteger(parsed.publicExposure?.masterCaddy?.responseHeaderTimeoutSeconds) && (parsed.publicExposure?.masterCaddy?.responseHeaderTimeoutSeconds ?? 0) >= 180,
|
||||
"Sub2API public Caddy response-header timeout must allow long Codex compact requests",
|
||||
parsed.publicExposure?.masterCaddy,
|
||||
);
|
||||
assertCondition(
|
||||
entries.every((entry) => entry.openaiResponsesWebSocketsV2Mode === undefined || entry.openaiResponsesWebSocketsV2Mode === null || allowedWebSocketModes.has(entry.openaiResponsesWebSocketsV2Mode)),
|
||||
"profile WebSocket mode overrides must use supported values when declared",
|
||||
@@ -59,8 +65,8 @@ if (parsed.pool?.defaultTempUnschedulable?.enabled === true) {
|
||||
const gateway502Rule = rules.find((rule) => rule.statusCode === 502);
|
||||
const gateway502Keywords = new Set((gateway502Rule?.keywords ?? []).map((keyword) => keyword.toLowerCase()));
|
||||
assertCondition(gateway502Keywords.has("recovered upstream error"), "502 temporary-unschedulable rule must catch recovered upstream error wrappers", gateway502Rule);
|
||||
for (const keyword of ["unknown error", "upstream request failed", "context canceled"]) {
|
||||
assertCondition(gateway502Keywords.has(keyword), "502 temporary-unschedulable rule must catch compact gateway wrappers", { keyword, gateway502Rule });
|
||||
for (const keyword of ["unknown error", "upstream request failed", "context deadline exceeded", "context canceled"]) {
|
||||
assertCondition(gateway502Keywords.has(keyword), "502 temporary-unschedulable rule must catch compact gateway timeout wrappers", { keyword, gateway502Rule });
|
||||
}
|
||||
const largeContext413Rule = rules.find((rule) => rule.statusCode === 413);
|
||||
const largeContext413Keywords = new Set((largeContext413Rule?.keywords ?? []).map((keyword) => keyword.toLowerCase()));
|
||||
@@ -74,7 +80,7 @@ if (parsed.pool?.defaultTempUnschedulable?.enabled === true) {
|
||||
}
|
||||
const cloudflare524Rule = rules.find((rule) => rule.statusCode === 524);
|
||||
const cloudflare524Keywords = new Set((cloudflare524Rule?.keywords ?? []).map((keyword) => keyword.toLowerCase()));
|
||||
for (const keyword of ["timeout", "a timeout occurred", "cloudflare", "unknown error", "upstream request failed", "context canceled"]) {
|
||||
for (const keyword of ["timeout", "a timeout occurred", "cloudflare", "upstream request failed", "unknown error", "context canceled", "recovered upstream error"]) {
|
||||
assertCondition(cloudflare524Keywords.has(keyword), "524 temporary-unschedulable rule must catch Cloudflare timeout wrappers", { keyword, cloudflare524Rule });
|
||||
}
|
||||
const accountState403Rule = rules.find((rule) => rule.statusCode === 403);
|
||||
@@ -99,6 +105,7 @@ console.log(JSON.stringify({
|
||||
"routing config is schema-valid without profile-specific test gates",
|
||||
"pool owner concurrency covers the YAML account capacity set",
|
||||
"profile load factor overrides are YAML-controlled positive integers",
|
||||
"public Caddy response-header timeout is long enough for Codex compact",
|
||||
"optional WebSocket mode overrides use supported values",
|
||||
"local Codex WebSocket transport is consistent with YAML-declared WSv2-capable accounts",
|
||||
"temporary unschedulable rules are structurally valid when enabled",
|
||||
|
||||
@@ -44,13 +44,13 @@ for (const keyword of ["model_not_found", "no available channel for model"]) {
|
||||
for (const keyword of ["openai_error", "context length", "maximum context"]) {
|
||||
assertCondition(largeContext413Rule?.keywords?.includes(keyword), "413 rendered rule must catch large-context upstream failures", { keyword, largeContext413Rule });
|
||||
}
|
||||
for (const keyword of ["unknown error", "upstream request failed", "context canceled"]) {
|
||||
assertCondition(gateway502Rule?.keywords?.includes(keyword), "502 rendered rule must catch compact gateway wrappers", { keyword, gateway502Rule });
|
||||
for (const keyword of ["unknown error", "upstream request failed", "context deadline exceeded", "context canceled"]) {
|
||||
assertCondition(gateway502Rule?.keywords?.includes(keyword), "502 rendered rule must catch compact gateway timeout wrappers", { keyword, gateway502Rule });
|
||||
}
|
||||
for (const keyword of ["gateway timeout", "unknown error", "context deadline exceeded"]) {
|
||||
assertCondition(gatewayTimeout504Rule?.keywords?.includes(keyword), "504 rendered rule must preserve gateway-timeout cooldown keyword", { keyword, gatewayTimeout504Rule });
|
||||
}
|
||||
for (const keyword of ["timeout", "a timeout occurred", "cloudflare", "unknown error", "upstream request failed", "context canceled"]) {
|
||||
for (const keyword of ["timeout", "a timeout occurred", "cloudflare", "upstream request failed", "unknown error", "context canceled", "recovered upstream error"]) {
|
||||
assertCondition(cloudflare524Rule?.keywords?.includes(keyword), "524 rendered rule must catch Cloudflare timeout wrappers", { keyword, cloudflare524Rule });
|
||||
}
|
||||
|
||||
|
||||
@@ -124,6 +124,7 @@ interface CodexPoolPublicExposureConfig {
|
||||
configPath: string;
|
||||
serviceName: string;
|
||||
upstreamBaseUrl: string;
|
||||
responseHeaderTimeoutSeconds: number;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -651,6 +652,7 @@ function defaultCodexPoolConfig(): CodexPoolConfig {
|
||||
configPath: "/etc/caddy/Caddyfile",
|
||||
serviceName: "caddy",
|
||||
upstreamBaseUrl: "http://127.0.0.1:21880",
|
||||
responseHeaderTimeoutSeconds: 180,
|
||||
},
|
||||
},
|
||||
localCodex: {
|
||||
@@ -822,6 +824,15 @@ function readAccountLoadFactor(value: unknown, key: string): number {
|
||||
return loadFactor;
|
||||
}
|
||||
|
||||
function readCaddyTimeoutSeconds(value: unknown, key: string, fallback: number): number {
|
||||
if (value === undefined || value === null) return fallback;
|
||||
const seconds = numberValue(value);
|
||||
if (seconds === null || !Number.isInteger(seconds) || seconds < 30 || seconds > 900) {
|
||||
throw new Error(`${codexPoolConfigPath}.${key} must be an integer from 30 to 900`);
|
||||
}
|
||||
return seconds;
|
||||
}
|
||||
|
||||
function readTempUnschedulablePolicy(value: unknown, key: string, fallback: CodexTempUnschedulablePolicy): CodexTempUnschedulablePolicy {
|
||||
if (value === undefined || value === null) return cloneTempUnschedulablePolicy(fallback);
|
||||
if (!isRecord(value)) throw new Error(`${codexPoolConfigPath}.${key} must be a YAML object`);
|
||||
@@ -916,6 +927,11 @@ function readPublicExposureConfig(value: unknown, defaults: CodexPoolPublicExpos
|
||||
configPath: stringValue(masterCaddyValue.configPath) ?? defaults.masterCaddy.configPath,
|
||||
serviceName: stringValue(masterCaddyValue.serviceName) ?? defaults.masterCaddy.serviceName,
|
||||
upstreamBaseUrl: normalizeBaseUrl(stringValue(masterCaddyValue.upstreamBaseUrl)) ?? defaults.masterCaddy.upstreamBaseUrl,
|
||||
responseHeaderTimeoutSeconds: readCaddyTimeoutSeconds(
|
||||
masterCaddyValue.responseHeaderTimeoutSeconds,
|
||||
"publicExposure.masterCaddy.responseHeaderTimeoutSeconds",
|
||||
defaults.masterCaddy.responseHeaderTimeoutSeconds,
|
||||
),
|
||||
},
|
||||
};
|
||||
validateKubernetesName(config.configMapName, "publicExposure.configMapName", true);
|
||||
@@ -1092,6 +1108,7 @@ function publicExposureSummary(pool: CodexPoolConfig): Record<string, unknown> {
|
||||
configPath: pool.publicExposure.masterCaddy.configPath,
|
||||
serviceName: pool.publicExposure.masterCaddy.serviceName,
|
||||
upstreamBaseUrl: pool.publicExposure.masterCaddy.upstreamBaseUrl,
|
||||
responseHeaderTimeoutSeconds: pool.publicExposure.masterCaddy.responseHeaderTimeoutSeconds,
|
||||
},
|
||||
upstream: {
|
||||
localIP: pool.publicExposure.localIP,
|
||||
@@ -1156,7 +1173,7 @@ async function applyMasterCaddySite(pool: CodexPoolConfig): Promise<Record<strin
|
||||
const path = caddy.configPath;
|
||||
if (!existsSync(path)) return { ok: false, error: "master-caddy-config-missing", path, valuesPrinted: false };
|
||||
const before = readFileSync(path, "utf8");
|
||||
const desiredBlock = renderCaddySiteBlock(caddy.domain, caddy.upstreamBaseUrl);
|
||||
const desiredBlock = renderCaddySiteBlock(caddy.domain, caddy.upstreamBaseUrl, caddy.responseHeaderTimeoutSeconds);
|
||||
const existing = caddySiteBlock(before, caddy.domain);
|
||||
const alreadyConfigured = existing === desiredBlock;
|
||||
let backupPath: string | null = null;
|
||||
@@ -1187,6 +1204,7 @@ async function applyMasterCaddySite(pool: CodexPoolConfig): Promise<Record<strin
|
||||
domain: caddy.domain,
|
||||
upstreamBaseUrl: caddy.upstreamBaseUrl,
|
||||
serviceName: caddy.serviceName,
|
||||
responseHeaderTimeoutSeconds: caddy.responseHeaderTimeoutSeconds,
|
||||
validate: {
|
||||
exitCode: validate.exitCode,
|
||||
stdoutTail: Buffer.from(validate.stdout).toString("utf8").slice(-1000),
|
||||
@@ -1206,7 +1224,7 @@ async function applyMasterCaddySite(pool: CodexPoolConfig): Promise<Record<strin
|
||||
};
|
||||
}
|
||||
|
||||
function renderCaddySiteBlock(domain: string, upstreamBaseUrl: string): string {
|
||||
export function renderCaddySiteBlock(domain: string, upstreamBaseUrl: string, responseHeaderTimeoutSeconds = 180): string {
|
||||
const upstream = new URL(upstreamBaseUrl);
|
||||
const upstreamHost = `${upstream.hostname}${upstream.port ? `:${upstream.port}` : ""}`;
|
||||
return `${domain} {
|
||||
@@ -1216,7 +1234,7 @@ function renderCaddySiteBlock(domain: string, upstreamBaseUrl: string): string {
|
||||
header_up X-Real-IP {remote_host}
|
||||
transport http {
|
||||
dial_timeout 5s
|
||||
response_header_timeout 30s
|
||||
response_header_timeout ${responseHeaderTimeoutSeconds}s
|
||||
}
|
||||
}
|
||||
}`;
|
||||
|
||||
Reference in New Issue
Block a user