151 lines
12 KiB
TypeScript
151 lines
12 KiB
TypeScript
import { readFileSync } from "node:fs";
|
|
import { rootPath } from "./src/config";
|
|
|
|
function assertCondition(condition: unknown, message: string, detail: unknown = {}): void {
|
|
if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`);
|
|
}
|
|
|
|
const configPath = rootPath("config", "platform-infra", "sub2api-codex-pool.yaml");
|
|
const parsed = Bun.YAML.parse(readFileSync(configPath, "utf8")) as {
|
|
pool?: {
|
|
defaultAccountPriority?: number;
|
|
defaultAccountCapacity?: number;
|
|
defaultAccountLoadFactor?: number;
|
|
minOwnerConcurrency?: number;
|
|
defaultTempUnschedulable?: {
|
|
enabled?: boolean;
|
|
rules?: Array<{ statusCode?: number; keywords?: string[]; durationMinutes?: number; description?: string }>;
|
|
};
|
|
};
|
|
profiles?: { entries?: Array<{ profile?: string; accountName?: string; capacity?: number; loadFactor?: number; openaiResponsesWebSocketsV2Mode?: string | null }> };
|
|
publicExposure?: { masterCaddy?: { responseHeaderTimeoutSeconds?: number } };
|
|
localCodex?: { supportsWebSockets?: boolean; responsesWebSocketsV2?: boolean; responsesSmokeModel?: string };
|
|
};
|
|
|
|
const entries = parsed.profiles?.entries ?? [];
|
|
const rules = parsed.pool?.defaultTempUnschedulable?.rules ?? [];
|
|
const defaultPriority = parsed.pool?.defaultAccountPriority ?? 0;
|
|
const defaultCapacity = parsed.pool?.defaultAccountCapacity ?? 0;
|
|
const defaultLoadFactor = parsed.pool?.defaultAccountLoadFactor ?? 0;
|
|
const desiredCapacity = entries.reduce((total, entry) => total + (entry.capacity ?? defaultCapacity), 0);
|
|
const explicitMinOwnerConcurrency = parsed.pool?.minOwnerConcurrency;
|
|
const resolvedMinOwnerConcurrency = explicitMinOwnerConcurrency ?? desiredCapacity;
|
|
const allowedWebSocketModes = new Set(["off", "ctx_pool", "passthrough"]);
|
|
const wsEnabledEntries = entries.filter((entry) => entry.openaiResponsesWebSocketsV2Mode && entry.openaiResponsesWebSocketsV2Mode !== "off");
|
|
const localWsEnabled = parsed.localCodex?.supportsWebSockets === true || parsed.localCodex?.responsesWebSocketsV2 === true;
|
|
|
|
assertCondition(entries.length > 0, "Codex pool must declare YAML-managed profile entries", parsed.profiles);
|
|
assertCondition(Number.isInteger(defaultPriority) && defaultPriority >= 0, "defaultAccountPriority must be a non-negative integer", parsed.pool);
|
|
assertCondition(Number.isInteger(defaultCapacity) && defaultCapacity > 0, "defaultAccountCapacity must be a positive integer", parsed.pool);
|
|
assertCondition(Number.isInteger(defaultLoadFactor) && defaultLoadFactor > 0, "defaultAccountLoadFactor must be a positive integer", parsed.pool);
|
|
assertCondition(entries.every((entry) => typeof entry.profile === "string" && entry.profile.length > 0), "profile entries must declare profile names", entries);
|
|
assertCondition(entries.every((entry) => typeof entry.accountName === "string" && entry.accountName.length > 0), "profile entries must declare account names", entries);
|
|
assertCondition(entries.every((entry) => entry.capacity === undefined || (Number.isInteger(entry.capacity) && entry.capacity > 0)), "profile capacity overrides must be positive integers when declared", entries);
|
|
assertCondition(entries.every((entry) => entry.loadFactor === undefined || (Number.isInteger(entry.loadFactor) && entry.loadFactor > 0)), "profile load factor overrides must be positive integers when declared", entries);
|
|
assertCondition(
|
|
Number.isInteger(parsed.publicExposure?.masterCaddy?.responseHeaderTimeoutSeconds) && (parsed.publicExposure?.masterCaddy?.responseHeaderTimeoutSeconds ?? 0) >= 180,
|
|
"Sub2API public Caddy response-header timeout must allow long Codex compact requests",
|
|
parsed.publicExposure?.masterCaddy,
|
|
);
|
|
assertCondition(
|
|
entries.every((entry) => entry.openaiResponsesWebSocketsV2Mode === undefined || entry.openaiResponsesWebSocketsV2Mode === null || allowedWebSocketModes.has(entry.openaiResponsesWebSocketsV2Mode)),
|
|
"profile WebSocket mode overrides must use supported values when declared",
|
|
entries,
|
|
);
|
|
assertCondition(parsed.localCodex?.supportsWebSockets === parsed.localCodex?.responsesWebSocketsV2, "local Codex WebSocket feature flags must be changed together", parsed.localCodex);
|
|
if (localWsEnabled) {
|
|
assertCondition(wsEnabledEntries.length > 0, "local Codex WebSocket transport must not be enabled without at least one YAML WSv2-capable account", { localCodex: parsed.localCodex, entries });
|
|
} else {
|
|
assertCondition(wsEnabledEntries.length === 0, "local Codex WebSocket transport disabled means all account WSv2 capability declarations must be off or omitted", { localCodex: parsed.localCodex, wsEnabledEntries });
|
|
}
|
|
assertCondition(
|
|
explicitMinOwnerConcurrency === undefined || (Number.isInteger(explicitMinOwnerConcurrency) && explicitMinOwnerConcurrency > 0),
|
|
"explicit pool owner concurrency override must be a positive integer when declared",
|
|
{ minOwnerConcurrency: explicitMinOwnerConcurrency },
|
|
);
|
|
assertCondition(
|
|
resolvedMinOwnerConcurrency >= desiredCapacity,
|
|
"pool owner concurrency must auto-resolve or be configured to cover the declared account capacity set",
|
|
{
|
|
minOwnerConcurrency: explicitMinOwnerConcurrency,
|
|
minOwnerConcurrencySource: explicitMinOwnerConcurrency === undefined ? "auto-capacity-sum" : "yaml",
|
|
resolvedMinOwnerConcurrency,
|
|
desiredCapacity,
|
|
},
|
|
);
|
|
if (parsed.pool?.defaultTempUnschedulable?.enabled === true) {
|
|
assertCondition(rules.length > 0, "enabled temporary unschedulable policy must declare rules", parsed.pool?.defaultTempUnschedulable);
|
|
assertCondition(rules.every((rule) => Number.isInteger(rule.statusCode) && (rule.statusCode ?? 0) >= 100 && (rule.statusCode ?? 0) <= 599), "temporary unschedulable rules must declare valid HTTP status codes", rules);
|
|
assertCondition(rules.every((rule) => Array.isArray(rule.keywords) && rule.keywords.length > 0), "temporary unschedulable rules must declare non-empty keywords", rules);
|
|
assertCondition(rules.every((rule) => Number.isInteger(rule.durationMinutes) && (rule.durationMinutes ?? 0) > 0), "temporary unschedulable rules must declare positive cooldown durations", rules);
|
|
const gateway502Rule = rules.find((rule) => rule.statusCode === 502);
|
|
const gateway502Keywords = new Set((gateway502Rule?.keywords ?? []).map((keyword) => keyword.toLowerCase()));
|
|
assertCondition(gateway502Keywords.has("recovered upstream error"), "502 temporary-unschedulable rule must catch recovered upstream error wrappers", gateway502Rule);
|
|
for (const keyword of ["unknown error", "upstream request failed", "context deadline exceeded", "context canceled"]) {
|
|
assertCondition(gateway502Keywords.has(keyword), "502 temporary-unschedulable rule must catch compact gateway timeout wrappers", { keyword, gateway502Rule });
|
|
}
|
|
const largeContext413Rule = rules.find((rule) => rule.statusCode === 413);
|
|
const largeContext413Keywords = new Set((largeContext413Rule?.keywords ?? []).map((keyword) => keyword.toLowerCase()));
|
|
for (const keyword of ["openai_error", "context length", "maximum context"]) {
|
|
assertCondition(largeContext413Keywords.has(keyword), "413 temporary-unschedulable rule must catch large-context upstream failures", { keyword, largeContext413Rule });
|
|
}
|
|
const gateway504Rule = rules.find((rule) => rule.statusCode === 504);
|
|
const gateway504Keywords = new Set((gateway504Rule?.keywords ?? []).map((keyword) => keyword.toLowerCase()));
|
|
for (const keyword of ["gateway timeout", "unknown error", "context deadline exceeded"]) {
|
|
assertCondition(gateway504Keywords.has(keyword), "504 temporary-unschedulable rule must catch gateway timeout wrappers", { keyword, gateway504Rule });
|
|
}
|
|
const cloudflare524Rule = rules.find((rule) => rule.statusCode === 524);
|
|
const cloudflare524Keywords = new Set((cloudflare524Rule?.keywords ?? []).map((keyword) => keyword.toLowerCase()));
|
|
for (const keyword of ["timeout", "a timeout occurred", "cloudflare", "upstream request failed", "unknown error", "context canceled", "recovered upstream error"]) {
|
|
assertCondition(cloudflare524Keywords.has(keyword), "524 temporary-unschedulable rule must catch Cloudflare timeout wrappers", { keyword, cloudflare524Rule });
|
|
}
|
|
const accountState403Rule = rules.find((rule) => rule.statusCode === 403);
|
|
const clientError400Rule = rules.find((rule) => rule.statusCode === 400);
|
|
const quota429Rule = rules.find((rule) => rule.statusCode === 429);
|
|
const successBody200Rule = rules.find((rule) => rule.statusCode === 200);
|
|
const serviceUnavailable503Rule = rules.find((rule) => rule.statusCode === 503);
|
|
const accountState403Keywords = new Set((accountState403Rule?.keywords ?? []).map((keyword) => keyword.toLowerCase()));
|
|
const clientError400Keywords = new Set((clientError400Rule?.keywords ?? []).map((keyword) => keyword.toLowerCase()));
|
|
const quota429Keywords = new Set((quota429Rule?.keywords ?? []).map((keyword) => keyword.toLowerCase()));
|
|
const successBody200Keywords = new Set((successBody200Rule?.keywords ?? []).map((keyword) => keyword.toLowerCase()));
|
|
const serviceUnavailable503Keywords = new Set((serviceUnavailable503Rule?.keywords ?? []).map((keyword) => keyword.toLowerCase()));
|
|
const accountStatePhrases = ["weekly limit", "less than 10% of your weekly limit left", "run /status for a breakdown"];
|
|
const successBodyPhrase = "less than 10% of your weekly limit left";
|
|
for (const keyword of ["invalid_encrypted_content", "encrypted content", "could not be verified", "bad_response_status_code", "暂不支持", "可用模型"]) {
|
|
assertCondition(clientError400Keywords.has(keyword), "400 temporary-unschedulable rule must catch upstream Responses compatibility and model-routing failures", { keyword, clientError400Rule });
|
|
}
|
|
for (const accountStatePhrase of accountStatePhrases) {
|
|
assertCondition(accountState403Keywords.has(accountStatePhrase), "403 temporary-unschedulable rule must catch Codex account-state phrases", { accountStatePhrase, accountState403Rule });
|
|
assertCondition(quota429Keywords.has(accountStatePhrase), "429 temporary-unschedulable rule must catch Codex account-state phrases", { accountStatePhrase, quota429Rule });
|
|
}
|
|
if (successBody200Rule !== undefined) {
|
|
assertCondition(successBody200Keywords.size === 1 && successBody200Keywords.has(successBodyPhrase), "200 temporary-unschedulable rule must use one stable success-body classifier phrase", successBody200Rule);
|
|
assertCondition(/reclassification/u.test(successBody200Rule.description ?? ""), "200 temporary-unschedulable rule must be documented as a runtime reclassification requirement", successBody200Rule);
|
|
}
|
|
for (const keyword of ["model_not_found", "no available channel for model"]) {
|
|
assertCondition(serviceUnavailable503Keywords.has(keyword), "503 temporary-unschedulable rule must catch upstream model-routing failures", { keyword, serviceUnavailable503Rule });
|
|
}
|
|
}
|
|
assertCondition(typeof parsed.localCodex?.responsesSmokeModel === "string" && parsed.localCodex.responsesSmokeModel.length > 0, "localCodex.responsesSmokeModel must be declared for Responses smoke validation", parsed.localCodex);
|
|
|
|
console.log(JSON.stringify({
|
|
ok: true,
|
|
checks: [
|
|
"routing config is schema-valid without profile-specific test gates",
|
|
"pool owner concurrency auto-resolves or covers the YAML account capacity set",
|
|
"profile load factor overrides are YAML-controlled positive integers",
|
|
"public Caddy response-header timeout is long enough for Codex compact",
|
|
"optional WebSocket mode overrides use supported values",
|
|
"local Codex WebSocket transport is consistent with YAML-declared WSv2-capable accounts",
|
|
"temporary unschedulable rules are structurally valid when enabled",
|
|
"upstream 400 Responses compatibility and model-routing failures are caught by the 400 cooldown rule",
|
|
"generic recovered upstream error wrappers are caught by cooldown rules",
|
|
"large-context upstream failures are caught by the 413 cooldown rule",
|
|
"gateway timeout wrappers are caught by the 504 cooldown rule",
|
|
"Cloudflare timeout wrappers are caught by the 524 cooldown rule",
|
|
"Codex weekly-limit prompts are caught by account-state and quota cooldown rules",
|
|
"upstream model-routing failures are caught by the 503 cooldown rule",
|
|
"Responses smoke model is YAML-declared",
|
|
],
|
|
}));
|