Files

214 lines
8.4 KiB
YAML

version: 1
kind: platform-infra-sub2api-codex-pool
metadata:
id: sub2api-codex-pool
owner: unidesk
relatedIssues:
- 339
- 340
pool:
groupName: unidesk-codex-pool
groupDescription: UniDesk-managed Codex API-key pool for YAML-selected Sub2API clients.
apiKeyName: unidesk-codex-pool-api-key
apiKeySecretName: sub2api-codex-pool-api-key
apiKeySecretKey: API_KEY
adminEmailDefault: admin@sub2api.platform-infra.local
minOwnerBalanceUsd: 1000
defaultAccountPriority: 10
defaultAccountCapacity: 10
defaultAccountLoadFactor: 10
defaultSentinelProtect:
enabled: true
consecutiveFailures: 3
initialRetryDelaySeconds: 2
maxRetryDelaySeconds: 60
backoffMultiplier: 2
defaultTempUnschedulable:
enabled: true
rules:
- statusCode: 400
keywords: [invalid_encrypted_content, encrypted content, could not be verified, could not be decrypted, bad_response_status_code, model_not_found, no available channel for model, unsupported, not supported, not support, 暂不支持, 可用模型]
durationMinutes: 1
description: Stable upstream 400 model-routing or Responses encrypted-content compatibility failures should use another account.
- statusCode: 401
keywords: [unauthorized, invalid api key, invalid_api_key, authentication, recovered upstream error]
durationMinutes: 1
description: Credential/auth failures should briefly cool down this account and use another account.
- statusCode: 403
keywords: [forbidden, access denied, quota, billing, capacity, weekly limit, less than 10% of your weekly limit left, run /status for a breakdown, recovered upstream error]
durationMinutes: 1
description: Permission, quota, or account-state failures should briefly cool down this account and use another account.
- statusCode: 429
keywords: [capacity, rate limit, rate_limit, quota, weekly limit, less than 10% of your weekly limit left, run /status for a breakdown, too many requests, overloaded, resource_exhausted, recovered upstream error]
durationMinutes: 1
description: Capacity and rate-limit responses are often temporary; briefly cool down this account and use another account.
- statusCode: 500
keywords: [capacity, overloaded, temporarily unavailable, temporary, upstream, recovered upstream error]
durationMinutes: 1
description: Transient upstream server failures should briefly cool down this account and prefer another account.
- statusCode: 502
keywords: [capacity, overloaded, temporarily unavailable, temporary, upstream, bad gateway, upstream request failed, unknown error, context deadline exceeded, context canceled, websocket dial, handshake response, recovered upstream error]
durationMinutes: 1
description: Gateway upstream failures, including recovered upstream error wrappers, should briefly cool down this account.
- statusCode: 413
keywords: [openai_error, payload too large, request too large, context length, context window, maximum context]
durationMinutes: 1
description: Large-context upstream failures should cool down the selected account so a larger-context channel can handle the request.
- statusCode: 503
keywords: [capacity, overloaded, temporarily unavailable, temporary, upstream, recovered upstream error, model_not_found, no available channel for model]
durationMinutes: 1
description: Service unavailable and upstream model-routing failures should briefly cool down this account.
- statusCode: 504
keywords: [gateway timeout, timeout, upstream, upstream request failed, unknown error, context deadline exceeded, context canceled, recovered upstream error]
durationMinutes: 1
description: Gateway timeout responses should cool down the selected account so another account can handle the next request.
- statusCode: 524
keywords: [timeout, a timeout occurred, cloudflare, gateway timeout, upstream, upstream request failed, unknown error, context deadline exceeded, context canceled, recovered upstream error]
durationMinutes: 1
description: Cloudflare 524 timeout responses should cool down the selected account so another account can handle the next request.
- statusCode: 529
keywords: [capacity, overloaded, temporarily unavailable, temporary, recovered upstream error]
durationMinutes: 1
description: Provider overloaded responses should briefly cool down this account and use another account.
profiles:
entries:
- profile: yjxm1221
accountName: unidesk-codex-yjxm1221
configFile: config.toml.yjxm1221
authFile: auth.json.yjxm1221
trustUpstream: true
proxyId: 3
- profile: yjxm1221-pro
accountName: unidesk-codex-yjxm1221-pro
configFile: config.toml.yjxm1221-pro
authFile: auth.json.yjxm1221-pro
priority: 50
trustUpstream: true
proxyId: 3
manualAccounts:
bindingSources:
active-target-egress-proxy:
enabled: true
kind: proxy
provider: target-egress-proxy
description: Bind a protected manual account to the selected target's YAML-declared egress proxy.
pk01-local-egress-proxy:
enabled: true
kind: proxy
provider: fixed-http-proxy
description: Bind a protected manual account to PK01 local provider-gateway egress.
fixedProxy:
protocol: http
host: 127.0.0.1
port: 18789
unified-pool-group:
enabled: true
kind: group
provider: pool-group
description: Attach a protected manual account to the unified Codex pool group.
protected:
- accountName: unidesk-codex-gptclub-sub2api
reason: Manually configured in Sub2API as protected; keep outside pool auto-management and sentinel control.
proxyBinding:
enabled: true
source: pk01-local-egress-proxy
proxyName: platform-infra-sub2api-pk01-local-egress-proxy
groupBinding:
enabled: true
source: unified-pool-group
publicExposure:
enabled: false
proxyName: platform-infra-sub2api
configMapName: sub2api-frpc-config
deploymentName: sub2api-frpc
frpcImage: fatedier/frpc:v0.68.1
serverAddr: 74.48.78.17
serverPort: 7000
remotePort: 21880
localIP: sub2api.platform-infra.svc.cluster.local
localPort: 8080
publicBaseUrl: https://sub2api.74-48-78-17.nip.io
masterBaseUrl: http://127.0.0.1:21880
masterFrps:
configPath: /opt/hwlab-frp/frps.dev.toml
containerName: hwlab-frps-dev
masterCaddy:
enabled: true
domain: sub2api.74-48-78-17.nip.io
configPath: /etc/caddy/Caddyfile
serviceName: caddy
upstreamBaseUrl: http://127.0.0.1:21880
responseHeaderTimeoutSeconds: 600
edgeRetry:
enabled: true
tryDurationSeconds: 10
tryIntervalMilliseconds: 250
retryMatch:
methods: [POST]
paths: [/responses/compact]
localCodex:
backupSuffix: pre-sub2api
providerName: OpenAI
wireApi: responses
modelContextWindow: 272000
modelAutoCompactTokenLimit: 240000
supportsWebSockets: false
responsesWebSocketsV2: false
responsesSmokeModel: gpt-5.5
sentinel:
monitor:
enabled: true
actions:
enabled: true
schedule: "*/1 * * * *"
image: python:3.12-alpine
sdk:
openaiPythonVersion: "2.41.1"
serviceAccountName: sub2api-account-sentinel
configMapName: sub2api-account-sentinel-config
credentialsSecretName: sub2api-account-sentinel-profiles
stateConfigMapName: sub2api-account-sentinel-state
cronJobName: sub2api-account-sentinel
roleName: sub2api-account-sentinel
roleBindingName: sub2api-account-sentinel
model: gpt-5.5
endpoint: responses
marker:
prefix: UDSG_OK
exact: true
probe:
timeoutSeconds: 30
maxOutputTokens: 16
transportRetryMinutes: 5
userAgent: Go-http-client/1.1
gatewayFailureMonitor:
enabled: true
lookbackSeconds: 900
tailLines: 4000
initialTtlMinutes: 5
maxTtlMinutes: 30
backoffMultiplier: 2
paths:
- /responses
- /v1/responses
- /responses/compact
- /v1/responses/compact
cadence:
successInitialIntervalMinutes: 1
successMaxIntervalMinutes: 20
trustedSuccessMaxIntervalMinutes: 20
untrustedSuccessMaxIntervalMinutes: 2
successBackoffMultiplier: 2
jitterPercent: 10
freeze:
initialTtlMinutes: 1
maxTtlMinutes: 10
backoffMultiplier: 2
jitterPercent: 10
pricing:
usdPer1MInputTokens: 1.25
usdPer1MOutputTokens: 10
historyLimit: 200