273 lines
10 KiB
YAML
273 lines
10 KiB
YAML
version: 1
|
|
kind: platform-infra-sub2api-codex-pool
|
|
|
|
metadata:
|
|
id: sub2api-codex-pool
|
|
owner: unidesk
|
|
relatedIssues:
|
|
- 339
|
|
- 340
|
|
|
|
pool:
|
|
groupName: unidesk-codex-pool
|
|
groupDescription: UniDesk-managed Codex API-key pool for YAML-selected Sub2API clients.
|
|
apiKeyName: unidesk-codex-pool-api-key
|
|
apiKeySecretName: sub2api-codex-pool-api-key
|
|
apiKeySecretKey: API_KEY
|
|
adminEmailDefault: admin@sub2api.platform-infra.local
|
|
minOwnerBalanceUsd: 1000
|
|
defaultAccountPriority: 10
|
|
defaultAccountCapacity: 10
|
|
defaultAccountLoadFactor: 10
|
|
defaultSentinelProtect:
|
|
enabled: true
|
|
consecutiveFailures: 3
|
|
initialRetryDelaySeconds: 2
|
|
maxRetryDelaySeconds: 60
|
|
backoffMultiplier: 2
|
|
defaultTempUnschedulable:
|
|
enabled: true
|
|
rules:
|
|
- statusCode: 400
|
|
keywords: [invalid_encrypted_content, encrypted content, could not be verified, could not be decrypted, bad_response_status_code, model_not_found, no available channel for model, unsupported, not supported, not support, 暂不支持, 可用模型]
|
|
durationMinutes: 1
|
|
description: Stable upstream 400 model-routing or Responses encrypted-content compatibility failures should use another account.
|
|
- statusCode: 401
|
|
keywords: [unauthorized, invalid api key, invalid_api_key, authentication, recovered upstream error]
|
|
durationMinutes: 1
|
|
description: Credential/auth failures should briefly cool down this account and use another account.
|
|
- statusCode: 403
|
|
keywords: [forbidden, access denied, quota, billing, capacity, weekly limit, less than 10% of your weekly limit left, run /status for a breakdown, recovered upstream error]
|
|
durationMinutes: 1
|
|
description: Permission, quota, or account-state failures should briefly cool down this account and use another account.
|
|
- statusCode: 429
|
|
keywords: [capacity, rate limit, rate_limit, quota, weekly limit, less than 10% of your weekly limit left, run /status for a breakdown, too many requests, overloaded, resource_exhausted, recovered upstream error]
|
|
durationMinutes: 1
|
|
description: Capacity and rate-limit responses are often temporary; briefly cool down this account and use another account.
|
|
- statusCode: 500
|
|
keywords: [capacity, overloaded, temporarily unavailable, temporary, upstream, recovered upstream error]
|
|
durationMinutes: 1
|
|
description: Transient upstream server failures should briefly cool down this account and prefer another account.
|
|
- statusCode: 502
|
|
keywords: [capacity, overloaded, temporarily unavailable, temporary, upstream, bad gateway, upstream request failed, unknown error, context deadline exceeded, context canceled, websocket dial, handshake response, recovered upstream error]
|
|
durationMinutes: 1
|
|
description: Gateway upstream failures, including recovered upstream error wrappers, should briefly cool down this account.
|
|
- statusCode: 413
|
|
keywords: [openai_error, payload too large, request too large, context length, context window, maximum context]
|
|
durationMinutes: 1
|
|
description: Large-context upstream failures should cool down the selected account so a larger-context channel can handle the request.
|
|
- statusCode: 503
|
|
keywords: [capacity, overloaded, temporarily unavailable, temporary, upstream, recovered upstream error, model_not_found, no available channel for model]
|
|
durationMinutes: 1
|
|
description: Service unavailable and upstream model-routing failures should briefly cool down this account.
|
|
- statusCode: 504
|
|
keywords: [gateway timeout, timeout, upstream, upstream request failed, unknown error, context deadline exceeded, context canceled, recovered upstream error]
|
|
durationMinutes: 1
|
|
description: Gateway timeout responses should cool down the selected account so another account can handle the next request.
|
|
- statusCode: 524
|
|
keywords: [timeout, a timeout occurred, cloudflare, gateway timeout, upstream, upstream request failed, unknown error, context deadline exceeded, context canceled, recovered upstream error]
|
|
durationMinutes: 1
|
|
description: Cloudflare 524 timeout responses should cool down the selected account so another account can handle the next request.
|
|
- statusCode: 529
|
|
keywords: [capacity, overloaded, temporarily unavailable, temporary, recovered upstream error]
|
|
durationMinutes: 1
|
|
description: Provider overloaded responses should briefly cool down this account and use another account.
|
|
profiles:
|
|
entries:
|
|
- profile: HY
|
|
accountName: unidesk-codex-hy
|
|
configFile: config.toml.HY
|
|
authFile: auth.json.HY
|
|
trustUpstream: true
|
|
openaiResponsesWebSocketsV2Mode: off
|
|
capacity: 10
|
|
loadFactor: 10
|
|
priority: 20
|
|
- profile: thinkai
|
|
accountName: unidesk-codex-thinkai
|
|
configFile: config.toml.thinkai
|
|
authFile: auth.json.thinkai
|
|
- profile: dawclaudecode
|
|
accountName: unidesk-codex-dawclaudecode
|
|
configFile: config.toml.dawclaudecode
|
|
authFile: auth.json.dawclaudecode
|
|
- profile: ranmeng
|
|
accountName: unidesk-codex-ranmeng
|
|
configFile: config.toml.ranmeng
|
|
authFile: auth.json.ranmeng
|
|
capacity: 20
|
|
- profile: gptclub
|
|
accountName: unidesk-codex-gptclub
|
|
configFile: config.toml.gptclub
|
|
authFile: auth.json.gptclub
|
|
trustUpstream: true
|
|
capacity: 10
|
|
priority: 100
|
|
- profile: gptclub-sub2api
|
|
accountName: unidesk-codex-gptclub-sub2api
|
|
configFile: config.toml.gptclub-sub2api
|
|
authFile: auth.json.gptclub-sub2api
|
|
priority: 1
|
|
- profile: only
|
|
accountName: unidesk-codex-only
|
|
configFile: config.toml.only
|
|
authFile: auth.json.only
|
|
trustUpstream: true
|
|
loadFactor: 1
|
|
priority: 110
|
|
- profile: zakuzaku
|
|
accountName: unidesk-codex-zakuzaku
|
|
configFile: config.toml.zakuzaku
|
|
authFile: auth.json.zakuzaku
|
|
priority: 10
|
|
- profile: zakuzaku-x9nt
|
|
accountName: unidesk-codex-zakuzaku-x9nt
|
|
configFile: config.toml.zakuzaku-x9nt
|
|
authFile: auth.json.zakuzaku-x9nt
|
|
priority: 10
|
|
- profile: ai2-hhhl
|
|
accountName: unidesk-codex-ai2-hhhl
|
|
configFile: config.toml.ai2-hhhl
|
|
authFile: auth.json.ai2-hhhl
|
|
- profile: freemodel
|
|
accountName: unidesk-codex-freemodel
|
|
configFile: config.toml.freemodel
|
|
authFile: auth.json.freemodel
|
|
- profile: nova-exellome
|
|
accountName: unidesk-codex-nova-exellome
|
|
configFile: config.toml.nova-exellome
|
|
authFile: auth.json.nova-exellome
|
|
- profile: socap
|
|
accountName: unidesk-codex-socap
|
|
configFile: config.toml.socap
|
|
authFile: auth.json.socap
|
|
- profile: yjxm1221
|
|
accountName: unidesk-codex-yjxm1221
|
|
configFile: config.toml.yjxm1221
|
|
authFile: auth.json.yjxm1221
|
|
manualAccounts:
|
|
bindingSources:
|
|
active-target-egress-proxy:
|
|
enabled: true
|
|
kind: proxy
|
|
provider: target-egress-proxy
|
|
description: Bind a protected manual account to the selected target's YAML-declared egress proxy.
|
|
pk01-local-egress-proxy:
|
|
enabled: true
|
|
kind: proxy
|
|
provider: fixed-http-proxy
|
|
description: Bind a protected manual account to PK01 local provider-gateway egress.
|
|
fixedProxy:
|
|
protocol: http
|
|
host: 127.0.0.1
|
|
port: 18789
|
|
unified-pool-group:
|
|
enabled: true
|
|
kind: group
|
|
provider: pool-group
|
|
description: Attach a protected manual account to the unified Codex pool group.
|
|
protected:
|
|
- accountName: lucianepidgeon@gmail.com
|
|
reason: Manually configured in Sub2API; keep outside UniDesk-managed Codex pool and sentinel control.
|
|
targetIds: [PK01]
|
|
proxyBinding:
|
|
enabled: true
|
|
source: pk01-local-egress-proxy
|
|
proxyName: platform-infra-sub2api-pk01-local-egress-proxy
|
|
groupBinding:
|
|
enabled: true
|
|
source: unified-pool-group
|
|
publicExposure:
|
|
enabled: false
|
|
proxyName: platform-infra-sub2api
|
|
configMapName: sub2api-frpc-config
|
|
deploymentName: sub2api-frpc
|
|
frpcImage: fatedier/frpc:v0.68.1
|
|
serverAddr: 74.48.78.17
|
|
serverPort: 7000
|
|
remotePort: 21880
|
|
localIP: sub2api.platform-infra.svc.cluster.local
|
|
localPort: 8080
|
|
publicBaseUrl: https://sub2api.74-48-78-17.nip.io
|
|
masterBaseUrl: http://127.0.0.1:21880
|
|
masterFrps:
|
|
configPath: /opt/hwlab-frp/frps.dev.toml
|
|
containerName: hwlab-frps-dev
|
|
masterCaddy:
|
|
enabled: true
|
|
domain: sub2api.74-48-78-17.nip.io
|
|
configPath: /etc/caddy/Caddyfile
|
|
serviceName: caddy
|
|
upstreamBaseUrl: http://127.0.0.1:21880
|
|
responseHeaderTimeoutSeconds: 600
|
|
edgeRetry:
|
|
enabled: true
|
|
tryDurationSeconds: 10
|
|
tryIntervalMilliseconds: 250
|
|
retryMatch:
|
|
methods: [POST]
|
|
paths: [/responses/compact]
|
|
localCodex:
|
|
backupSuffix: pre-sub2api
|
|
providerName: OpenAI
|
|
wireApi: responses
|
|
modelContextWindow: 272000
|
|
modelAutoCompactTokenLimit: 240000
|
|
supportsWebSockets: false
|
|
responsesWebSocketsV2: false
|
|
responsesSmokeModel: gpt-5.5
|
|
sentinel:
|
|
monitor:
|
|
enabled: true
|
|
actions:
|
|
enabled: true
|
|
schedule: "*/1 * * * *"
|
|
image: python:3.12-alpine
|
|
sdk:
|
|
openaiPythonVersion: "2.41.1"
|
|
serviceAccountName: sub2api-account-sentinel
|
|
configMapName: sub2api-account-sentinel-config
|
|
credentialsSecretName: sub2api-account-sentinel-profiles
|
|
stateConfigMapName: sub2api-account-sentinel-state
|
|
cronJobName: sub2api-account-sentinel
|
|
roleName: sub2api-account-sentinel
|
|
roleBindingName: sub2api-account-sentinel
|
|
model: gpt-5.5
|
|
endpoint: responses
|
|
marker:
|
|
prefix: UDSG_OK
|
|
exact: true
|
|
probe:
|
|
timeoutSeconds: 30
|
|
maxOutputTokens: 16
|
|
transportRetryMinutes: 5
|
|
userAgent: Go-http-client/1.1
|
|
gatewayFailureMonitor:
|
|
enabled: true
|
|
lookbackSeconds: 900
|
|
tailLines: 4000
|
|
initialTtlMinutes: 5
|
|
maxTtlMinutes: 30
|
|
backoffMultiplier: 2
|
|
paths:
|
|
- /responses
|
|
- /v1/responses
|
|
- /responses/compact
|
|
- /v1/responses/compact
|
|
cadence:
|
|
successInitialIntervalMinutes: 1
|
|
successMaxIntervalMinutes: 20
|
|
trustedSuccessMaxIntervalMinutes: 20
|
|
untrustedSuccessMaxIntervalMinutes: 2
|
|
successBackoffMultiplier: 2
|
|
jitterPercent: 10
|
|
freeze:
|
|
initialTtlMinutes: 1
|
|
maxTtlMinutes: 10
|
|
backoffMultiplier: 2
|
|
jitterPercent: 10
|
|
pricing:
|
|
usdPer1MInputTokens: 1.25
|
|
usdPer1MOutputTokens: 10
|
|
historyLimit: 200
|