diff --git a/.agents/skills/unidesk-sub2api/SKILL.md b/.agents/skills/unidesk-sub2api/SKILL.md index 1b758545..2ae15d03 100644 --- a/.agents/skills/unidesk-sub2api/SKILL.md +++ b/.agents/skills/unidesk-sub2api/SKILL.md @@ -50,6 +50,10 @@ bun scripts/cli.ts platform-infra sub2api validate bun scripts/cli.ts platform-infra sub2api codex-pool plan bun scripts/cli.ts platform-infra sub2api codex-pool sync --confirm bun scripts/cli.ts platform-infra sub2api codex-pool validate +bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-image status +bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-image build --confirm +bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-probe --account unidesk-codex-hy --confirm +bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-report bun scripts/cli.ts platform-infra sub2api codex-pool cleanup-probes --confirm ``` @@ -69,16 +73,22 @@ bun scripts/cli.ts platform-infra sub2api codex-pool cleanup-probes --confirm - `profiles.entries[].tempUnschedulable`: 可选 per-account 临时下线规则覆盖;字段语义以 `docs/reference/platform-infra.md` 为权威。上游 Sub2API 不支持的成功体分类、调度策略或账号冷却行为不要在这里声明。 - `profiles.entries[].openaiResponsesWebSocketsV2Mode`: 需要 Responses WebSocket v2 的上游才设置,值为 `off`、`ctx_pool` 或 `passthrough`。 - `profiles.entries[].upstreamUserAgent`: 少数要求 Codex CLI User-Agent 的上游才设置,不能含换行。 -- `sentinel.monitor.enabled`: 账号级 HTTP 200 成功体哨兵监控开关;开启后 `codex-pool sync --confirm` 会在 `platform-infra` 创建/更新 k8s CronJob、ConfigMap、Secret、ServiceAccount、Role 和 RoleBinding。CronJob 直打 YAML-managed 上游账号的 OpenAI Responses `gpt-5.5`,用确定 marker 判定是否出现维护/故障/广告等非预期成功体,并在独立 state ConfigMap 中记录 token/cost 账本。 -- `sentinel.actions.enabled`: 账号级哨兵冻结/恢复动作开关;默认必须保持 `false`,先监控一段时间,确认 marker 判定正确率、token 消耗和误报率后再改为 `true`。动作关闭时只记录 `would-freeze`,不会调用 Sub2API admin API 改 `schedulable`。 -- `sentinel.probe.maxOutputTokens`: 哨兵 OpenAI Responses 请求的硬输出上限,必须保持小值;不要只靠 prompt 要求模型少输出。哨兵不限制并发和每轮账号数,所有到期账号会在同一轮并发探测。 -- `sentinel.cadence`: 成功信任指数退避配置。当前口径是从 1 分钟开始,连续成功后退避到最大 20 分钟;任意 marker mismatch 清零成功信任并进入冻结退避。 -- `sentinel.freeze`: 冻结 TTL 指数退避配置。当前口径是初始 10 分钟,失败后 `10m -> 20m -> 40m -> 80m -> 120m`,最大 2 小时;冻结到期后只做恢复 probe,通过才自动恢复,不能仅靠 TTL 到期解封。 +- `sentinel.monitor.enabled`: 账号级 marker 哨兵监控开关;开启后 `codex-pool sync --confirm` 会在 `platform-infra` 创建/更新 k8s CronJob、ConfigMap、Secret、ServiceAccount、Role 和 RoleBinding。CronJob 直打 YAML-managed 上游账号的 OpenAI Responses `gpt-5.5`,用确定 marker 作为唯一健康标准,并在独立 state ConfigMap 中记录 token/cost 账本。 +- `sentinel.actions.enabled`: 账号级哨兵冻结/恢复动作开关;当前 marker-only guard 要求开启。动作关闭时只记录 `would-freeze`,不会调用 Sub2API admin API 改 `schedulable`。动作开启后,只要不满足 marker match,不论是 HTTP 200 私货、4xx/5xx、非 JSON、连接错误还是空输出,都进入同一个冻结/恢复状态机。 +- `sentinel.sdk.openaiPythonVersion`: 哨兵容器使用的 OpenAI Python SDK 固定版本;模型请求必须通过标准 SDK `responses.create`,不要手工拼 `/v1/responses` 请求体或手写响应解析。后续升级 SDK 只改 YAML 并 `sync --confirm`。 +- `sentinel.probe.maxOutputTokens`: 哨兵本地流式 delta 收集上限,必须保持小值;它不作为上游 `max_output_tokens` 字段发送,以保持与 Sub2API WebUI 默认账号连接测试的 Responses SSE 请求形态一致。哨兵不限制并发和每轮账号数,所有到期账号会在同一轮并发探测。 +- `sentinel.probe.userAgent`: 哨兵 direct upstream probe 的默认 User-Agent,通过 OpenAI SDK `extra_headers` 传递;默认贴近 Sub2API `net/http` 账号连接测试形态,个别账号仍可用 `profiles.entries[].upstreamUserAgent` 覆盖。 +- `sentinel.cadence`: 成功信任指数退避配置。当前口径是从 1 分钟开始,连续成功后退避到最大 20 分钟;任意非 marker match 清零成功信任并进入冻结退避。 +- `sentinel.freeze`: 冻结 TTL 指数退避配置。当前口径是初始 2 分钟,失败后 `2m -> 4m -> 8m -> 16m -> 32m -> 64m -> 120m`,最大 2 小时;冻结到期后只做恢复 probe,通过才自动恢复,不能仅靠 TTL 到期解封。 - `sentinel.pricing`: 直打上游时哨兵自己的 token/cost 估算价格。因为 direct upstream probe 不经过 Sub2API 普通用量账本,哨兵必须自己记录全局与 per-account token/cost;这些账本只用于观察,不作为跳过探测的预算门禁。 `sync --confirm` 会登录 Sub2API admin、创建/更新 group、创建/更新 YAML 中的 `unidesk-codex-*` accounts、创建/复用统一 API key Secret,并把 managed account 的 `schedulable=true` 恢复为过程控制基线;它默认不删除 YAML 中缺席的 managed account。只有明确退役上游时才使用 `sync --confirm --prune-removed` 删除缺席且 `extra.unidesk_managed=true` 的 `unidesk-codex-*` account。 -`sync --confirm` 同时会按 YAML 渲染账号级哨兵资源。哨兵默认使用 `sentinel.monitor.enabled=true` + `sentinel.actions.enabled=false` 的观察模式;如果后续要开启自动冻结/恢复,只改 `sentinel.actions.enabled=true` 后重新 `codex-pool sync --confirm`,不要手工 patch CronJob、Secret 或 Sub2API account。打开动作前必须先用 `codex-pool validate --full` 或 k8s state 证据确认近期 `lastRun` 的 marker mismatch 与实际上游行为一致,且 token/cost 账本可解释、成本可接受。 +`sentinel-image status|build` 管理哨兵 Python 运行环境镜像。镜像由 YAML 的 `sentinel.image` 基础镜像和 `sentinel.sdk.openaiPythonVersion` 派生,发布到 G14 本地 registry `127.0.0.1:5000/platform-infra/sub2api-account-sentinel:`;`build --confirm` 会先检查 registry tag,存在则快速复用,不存在才在 G14 host 构建并 push。CronJob 启动时只校验 SDK 版本,不在运行时 `pip install`。 + +`sync --confirm` 同时会按 YAML 渲染账号级哨兵资源,并在 monitor 开启时先确保可复用哨兵镜像存在。当前目标是 `sentinel.monitor.enabled=true` + `sentinel.actions.enabled=true` 的 marker-only 自动冻结/恢复;不要手工 patch CronJob、Secret 或 Sub2API account。若怀疑某个账号被误判,先用 `codex-pool sentinel-probe --account --confirm` 立即触发该账号测量;该命令从现有 CronJob 模板派生一次性 Job,复用同一份 Secret、ConfigMap、OpenAI SDK probe、token/cost 账本和冻结/恢复状态机。 + +`sentinel-report` 是只读低噪声报表,不触发 probe、不修改账号。默认输出类似 `ps` 的文本表,展示每个账号的探测次数、最近 marker/HTTP/动作、冻结 TTL、成功退避、下一次 probe 和最近 run 事件;需要机器处理时使用 `sentinel-report --raw`。 `sync --confirm` 和 `validate` 可能超过单次 SSH/runtime 短连接窗口。必须继续使用 `bun scripts/cli.ts platform-infra sub2api codex-pool ...`,由 CLI 在 G14 远端提交作业并短轮询状态;不要改用裸 `trans G14:k3s script` 等一个长连接等待完整结果。若看到 `UNIDESK_SSH_RUNTIME_TIMEOUT`,先按 `docs/reference/platform-infra.md` 的规则处理为控制面可见性问题,修 CLI/job/poll 或重跑受控命令,不要手工 patch Sub2API credentials 或源码。 @@ -167,7 +177,7 @@ bun scripts/cli.ts platform-infra sub2api codex-pool configure-local --confirm - 上游要求 Codex User-Agent:只给该 profile 配 `upstreamUserAgent`,跑 `sync --confirm`。 - 上游报 capacity/rate-limit/overload/Bad Gateway/Gateway Timeout 后没有切号或频繁先失败再恢复:先确认 `codex-pool validate` 里 `tempUnschedulable.ok=true` 且目标 account `runtimeEnabled=true`、规则数符合 YAML;再看 `validation.gatewayResponses.evidence.failovers` 的 account/upstream status。若 mismatch,跑 `codex-pool sync --confirm`;若 runtime 规则已对齐但仍不冻结或不切号,继续修 Sub2API 自动冻结/failover 能力并复测,不要手工 patch Sub2API credentials,也不要手动禁用、删除或从 YAML 移除问题账号来绕过机制缺陷。 - `codex-pool sync --confirm` 或 `codex-pool validate` 超时:先区分 CLI 传输超时和 Sub2API 运行失败。受控 CLI 应返回远端作业进度和 stdout/stderr tail;如果只是低层 `trans` 60s 超时,不能据此判定 Sub2API failover 不工作。改用或修复 CLI 的远端 job/poll 路径后重跑,并以最终结构化结果作为证据。 -- Codex 报 weekly-limit、`less than 10% of your weekly limit left`、`Run /status for a breakdown` 等账号状态/软配额提示并要求切号:如果上游以 403/429 等错误状态返回,把稳定 body 关键词放进 `pool.defaultTempUnschedulable` 的对应规则,跑 `codex-pool sync --confirm`,再用 `codex-pool validate` 确认每个 managed account 的 runtime 规则包含这些关键词。若该文案是 HTTP 200 成功内容,当前 Sub2API 不支持把它重分类为账号冷却;不要写 YAML 200 规则、不要热补 Sub2API、不要绕过 sync,必要时登记上游能力缺口 issue。 +- Codex 报 weekly-limit、`less than 10% of your weekly limit left`、`Run /status for a breakdown` 等账号状态/软配额提示并要求切号:如果上游以 403/429 等错误状态返回,把稳定 body 关键词放进 `pool.defaultTempUnschedulable` 的对应规则,跑 `codex-pool sync --confirm`,再用 `codex-pool validate` 确认每个 managed account 的 runtime 规则包含这些关键词。若该文案是 HTTP 200 成功内容,不要写 Sub2API 原生 YAML 200 规则、不要热补 Sub2API、不要绕过 sync;启用账号级哨兵时由 marker-only 哨兵按非 marker 响应统一指数冻结。 - 上游 400/503 响应体出现 `invalid_encrypted_content`、`bad_response_status_code`、`invalid_request_error` + 稳定 unsupported-model 文案、unsupported-model、`暂不支持` / `可用模型`、`model_not_found`、`No available channel for model ...` 或同类稳定模型路由 / Responses encrypted-content 兼容性失败:把稳定 body 关键词放进 `pool.defaultTempUnschedulable` 的对应 400/503 规则,跑 `codex-pool sync --confirm`,再用 `codex-pool validate` 确认目标 account 的 runtime rule 包含这些关键词;不要用 account membership、priority、capacity、loadFactor、WebSocket mode 或 User-Agent 改动掩盖该错误族。 - 上游错误反复触发:默认错误冷却按严重程度分层;临时问题可从 10 分钟起步,网关/服务不可用/过载/模型路由类应更长,认证/权限/配额/账号状态/账号兼容类使用最长冷却。`invalid_encrypted_content`、unsupported-model、`Recovered upstream error ...`、`Bad Gateway`、`Gateway Timeout`、Cloudflare `524`、Codex-facing `Upstream request failed`、`Unknown error`、`context deadline exceeded`、`context canceled`、`model_not_found`、`No available channel for model`、大上下文 `413` 和 `openai_error` 这类稳定包装文案都应留在对应 YAML 冷却政策里,特别是普通 `/responses` 与 compact 链路里上游兼容性错误或 524 可能最终表现为客户端 502/504 + `Unknown error`。具体数值只以 YAML 为准,修改后必须 `codex-pool sync --confirm` 和 `codex-pool validate`。长期判定见 `docs/reference/platform-infra.md`。 - Codex auto compact 后丢上下文:先确认 YAML `localCodex` 是否声明启用 WSv2;若启用,再确认本机 `~/.codex/config.toml` 是否有 `supports_websockets = true` 和 `responses_websockets_v2 = true`,并看 `codex-pool validate` 的 WSv2 candidate 和 Sub2API 日志里的 `transport=responses_websockets_v2`。若 YAML 当前禁用 WSv2,则按 HTTP Responses 稳定性排查,不把旧 WS 口径当成验收要求。 diff --git a/config/platform-infra/sub2api-codex-pool.yaml b/config/platform-infra/sub2api-codex-pool.yaml index f8452e24..0423f3ff 100644 --- a/config/platform-infra/sub2api-codex-pool.yaml +++ b/config/platform-infra/sub2api-codex-pool.yaml @@ -148,11 +148,11 @@ sentinel: monitor: enabled: true actions: - enabled: false - freezeOnMarkerMismatch: true - freezeOnTransportError: false + enabled: true schedule: "*/1 * * * *" image: python:3.12-alpine + sdk: + openaiPythonVersion: "2.41.1" serviceAccountName: sub2api-account-sentinel configMapName: sub2api-account-sentinel-config credentialsSecretName: sub2api-account-sentinel-profiles @@ -165,16 +165,16 @@ sentinel: exact: true probe: timeoutSeconds: 30 - maxResponseBytes: 8192 - maxOutputTokens: 12 + maxOutputTokens: 16 transportRetryMinutes: 5 + userAgent: Go-http-client/1.1 cadence: successInitialIntervalMinutes: 1 successMaxIntervalMinutes: 20 successBackoffMultiplier: 2 jitterPercent: 10 freeze: - initialTtlMinutes: 10 + initialTtlMinutes: 2 maxTtlMinutes: 120 backoffMultiplier: 2 jitterPercent: 10 diff --git a/docs/reference/platform-infra.md b/docs/reference/platform-infra.md index 66cc2256..195d4bea 100644 --- a/docs/reference/platform-infra.md +++ b/docs/reference/platform-infra.md @@ -51,12 +51,32 @@ When Codex startup repeatedly reports WebSocket reconnects or HTTPS fallback, pr Do not encode current availability assumptions in long-term reference prose. If an account needs a higher concurrency or load factor than the pool default, make that a deliberate YAML override and verify it with `codex-pool validate`; the reference document should describe the rule, not repeat the current numeric value. -Do not enable Sub2API `pool_mode` for UniDesk-managed Codex accounts. `pool_mode` retries the same selected account path, while UniDesk's desired failover behavior is to mark the failing account temporarily unschedulable and let Sub2API choose another account from the group. `codex-pool validate` reports each managed account's temporary-unschedulable runtime alignment and should be used after `codex-pool sync --confirm`. Generic 502/503/504 bodies such as `Recovered upstream error 502`, `Bad Gateway`, `Gateway Timeout`, Codex-facing `Upstream request failed`, `Unknown error`, context-deadline/canceled wrappers, stable 400 `invalid_encrypted_content` / unsupported-model wrappers, and stable `model_not_found` / "no available channel for model" wrappers must stay in the YAML cooldown policy so an intermittently bad account is cooled down instead of repeatedly adding latency at the next compact or Responses request. The Codex pool default error cooldown is severity-tiered: temporary signals can start at ten minutes, gateway/service/overload/model-routing failures should cool down longer, and credential, permission, quota, account-compatibility, or account-state failures should use the longest cooldown. Exact current values belong in YAML and runtime validation output. +Do not enable Sub2API `pool_mode` for UniDesk-managed Codex accounts. `pool_mode` retries the same selected account path, while UniDesk's desired failover behavior is to mark the failing account temporarily unschedulable and let Sub2API choose another account from the group. `codex-pool validate` reports each managed account's temporary-unschedulable runtime alignment and should be used after `codex-pool sync --confirm`. Generic 502/503/504 bodies such as `Recovered upstream error 502`, `Bad Gateway`, `Gateway Timeout`, Codex-facing `Upstream request failed`, `Unknown error`, context-deadline/canceled wrappers, stable 400 `invalid_encrypted_content` / unsupported-model wrappers, and stable `model_not_found` / "no available channel for model" wrappers must stay in the YAML cooldown policy so an intermittently bad account is cooled down instead of repeatedly adding latency at the next compact or Responses request. The Codex pool default error cooldown is severity-tiered: temporary signals should use the shortest cooldown, gateway/service/overload/model-routing failures should cool down longer, and credential, permission, quota, account-compatibility, or account-state failures should use the longest cooldown. Exact current values belong in YAML and runtime validation output. -Sub2API temporary-unschedulable rules require both an HTTP status match and a response-body keyword match in the upstream failure/error path. Do not treat them as a general successful-response content filter. If an upstream returns a quota warning or maintenance prompt as normal HTTP 200 assistant content, do not add a YAML 200 cooldown rule, patch Sub2API in place, fork behavior in UniDesk, or bypass `codex-pool sync` to make the pool pretend that account cooling exists. Record the upstream capability gap in an issue when it matters operationally; until upstream Sub2API supports that behavior and `codex-pool validate` proves it, UniDesk should not implement or rely on it. +Sub2API temporary-unschedulable rules require both an HTTP status match and a response-body keyword match in the upstream failure/error path. Do not treat them as a general successful-response content filter, and do not add a YAML 200 cooldown rule, patch Sub2API in place, fork Sub2API behavior in UniDesk, or bypass `codex-pool sync` to make the native pool pretend that HTTP 200 content cooling exists. HTTP 200 private content, maintenance text, quota prompts, ads, and similar semantic failures are handled by the external account-level sentinel when that sentinel is enabled, not by Sub2API native `temp_unschedulable_rules`. If automatic cooling or same-request failover does not happen for an error that the YAML policy declares, treat that as a Sub2API capability or integration defect. The closeout must show the failing account being marked temporarily unschedulable and the next request or same request selecting another schedulable account; a manually disabled, deleted, or pruned account is not valid evidence for this class of fix. +## Sub2API Account Test Semantics + +Sub2API v0.1.136 has a separate management-plane account connection test. The admin WebUI account modal calls `POST /api/v1/admin/accounts/:id/test` with `model_id` and, for the admin account table modal, no OpenAI `mode`; the backend binds this to `AccountTestService.TestAccountConnection`, which normalizes an empty mode to `default`. + +For OpenAI API-key accounts in default mode, the test loads the account by id, applies `account.GetMappedModel(model_id)`, checks `openai_compat.ShouldUseResponsesAPI(account.Extra)`, and then builds an upstream URL from the account base URL with `/v1/responses`. It sends a direct upstream request through `httpUpstream.DoWithTLS` with `Content-Type: application/json` and `Authorization: Bearer `. The request body is Responses API SSE, not a non-streaming JSON request: `model` is the mapped model, `input` is one user message whose text is `hi`, `stream` is `true`, and `instructions` is Sub2API's embedded OpenAI default instructions. For API-key accounts it does not set `store: false`, `max_output_tokens`, Codex CLI `User-Agent`, `OpenAI-Beta`, `Originator`, `Version`, `Session_ID`, or `Conversation_ID`; those Codex-like headers appear in other paths such as compact probing, not in the default account test. + +The management test success criterion is transport and stream completion, not semantic content. A non-200 upstream response becomes an SSE error. A 200 response is considered successful when `processOpenAIStream` sees `response.completed` or `response.done`; `response.output_text.delta` chunks are forwarded to the WebUI as display text, while `response.failed`, `error`, or EOF before completion fails the test. Therefore a WebUI "hi" success proves that this direct account can complete a streaming `/v1/responses` request with Sub2API's default payload shape, but it does not prove that a non-streaming Responses request, marker prompt, `max_output_tokens`, `store: false`, Codex header set, compact path, WebSocket path, or normal pool-scheduled gateway request will behave identically. + +This management-plane test is also outside the normal consumer gateway scheduler. It fetches the account by id instead of listing only schedulable accounts, so `status=active` in the modal and a successful account test can coexist with `schedulable=false` in scheduler state. Because the test performs its own outbound `DoWithTLS` call, regular gateway access logs and usage logs may not contain the upstream account id/path/status evidence expected from ordinary `/v1/responses` traffic. When diagnosing account tests, use the management route semantics above or Sub2API source, not gateway access-log absence or an unrelated pool request as proof. + +An external account-level sentinel that wants parity with this WebUI path should reuse the same request shape as far as the standard OpenAI SDK allows: direct account credentials, Responses API, `stream=true`, no `store: false` for API-key accounts, no upstream `max_output_tokens` field, and success parsing based on the streaming events. A local stream delta collection limit is acceptable as a sentinel safety bound, but it should not change the upstream request body. The sentinel may replace the user text `hi` with a marker prompt, but it should not introduce extra request fields or Codex/compact headers merely for convenience. If a marker-only sentinel intentionally diverges from the management test shape, the divergence must be documented in probe output so a WebUI success and sentinel failure are not misread as operator error. + +## Account Sentinel Marker Contract + +The UniDesk account-level sentinel uses marker-only health semantics. A probe is healthy only when the upstream response satisfies the configured marker match. Every other result is unhealthy and must enter the same exponential freeze state machine, regardless of whether the immediate response is HTTP 200, 400, 403, 429, 500, 502, 503, 504, a streaming error event, malformed output, empty output, timeout, or any other transport/API failure. HTTP status, upstream error code, body hash, body preview, headers, and SDK exception class are diagnostics only; they must not become additional allow/deny criteria that bypass marker mismatch. + +The sentinel must not maintain separate classifiers for "private content", "maintenance", "quota", "ads", or provider-specific body phrases as health gates. The only recovery condition is a later recovery probe that matches the marker. Freeze TTL expiry only schedules the next recovery probe; it does not restore an account by itself. Repeated non-marker results use exponential freeze backoff, and repeated marker-matching results use the configured success cadence backoff. This contract applies equally to OpenAI Responses `gpt-5.5` direct account probes and manual `codex-pool sentinel-probe --account ... --confirm` measurements. + +Operational observation for this sentinel should use the read-only `codex-pool sentinel-report` table or its `--raw` form. It is the canonical low-noise view for per-account probe count, marker result, HTTP/error diagnostics, freeze TTL, success cadence, next probe time, and recent CronJob runs; raw ConfigMap dumps and ad hoc log scraping are fallback diagnostics, not the primary state surface. + The request path is: 1. A client sends an OpenAI-compatible request to the configured consumer base URL, normally `https://sub2api.74-48-78-17.nip.io/v1/...`, with the unified API key. diff --git a/scripts/cli.ts b/scripts/cli.ts index dbfddd4a..791fe08f 100644 --- a/scripts/cli.ts +++ b/scripts/cli.ts @@ -333,6 +333,11 @@ async function main(): Promise { const { runPlatformInfraCommand } = await import("./src/platform-infra"); const result = await runPlatformInfraCommand(readConfig(), args.slice(1)); const ok = (result as { ok?: unknown }).ok !== false; + if (isRenderedCliResult(result)) { + emitText(result.renderedText); + if (!ok) process.exitCode = 1; + return; + } emitJson(commandName, result, ok); if (!ok) process.exitCode = 1; return; diff --git a/scripts/platform-infra-sub2api-codex-sentinel-contract-test.ts b/scripts/platform-infra-sub2api-codex-sentinel-contract-test.ts index 6a2da782..7d116dd3 100644 --- a/scripts/platform-infra-sub2api-codex-sentinel-contract-test.ts +++ b/scripts/platform-infra-sub2api-codex-sentinel-contract-test.ts @@ -3,10 +3,13 @@ import { rmSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { rootPath } from "./src/config"; +import { codexPoolHelp, defaultCodexTempUnschedulablePolicy } from "./src/platform-infra-sub2api-codex"; import { + codexPoolSentinelRuntimeImage, defaultCodexPoolSentinelConfig, readCodexPoolSentinelConfig, renderCodexPoolSentinelManifest, + sentinelContainerShellCommand, sentinelRunnerPython, } from "./src/platform-infra-sub2api-codex-sentinel"; @@ -15,21 +18,33 @@ function assertCondition(condition: unknown, message: string, detail: unknown = } const configPath = rootPath("config", "platform-infra", "sub2api-codex-pool.yaml"); -const parsed = Bun.YAML.parse(readFileSync(configPath, "utf8")) as { sentinel?: unknown }; +const sentinelDockerfilePath = rootPath("src", "components", "platform-infra", "sub2api", "sentinel.Dockerfile"); +const parsed = Bun.YAML.parse(readFileSync(configPath, "utf8")) as { + sentinel?: unknown; + pool?: { defaultTempUnschedulable?: { rules?: Array<{ statusCode?: unknown }> } }; +}; const sentinel = readCodexPoolSentinelConfig(parsed.sentinel, defaultCodexPoolSentinelConfig(), configPath); +const sentinelRuntimeImage = codexPoolSentinelRuntimeImage(sentinel); +const sentinelDockerfile = readFileSync(sentinelDockerfilePath, "utf8"); +const yamlTempUnschedulableRules = parsed.pool?.defaultTempUnschedulable?.rules ?? []; -assertCondition(sentinel.monitor.enabled === true, "sentinel monitor must be enabled for observation-first rollout", sentinel); -assertCondition(sentinel.actions.enabled === false, "sentinel actions must default off until monitoring quality is reviewed", sentinel); -assertCondition(sentinel.actions.freezeOnMarkerMismatch === true, "marker mismatch must be configured as freeze-worthy when actions are enabled", sentinel.actions); -assertCondition(sentinel.actions.freezeOnTransportError === false, "transport errors must not freeze by default", sentinel.actions); +assertCondition(sentinel.monitor.enabled === true, "sentinel monitor must be enabled for marker-only guard rollout", sentinel); +assertCondition(sentinel.actions.enabled === true, "sentinel actions must be enabled so marker-only guard can freeze and recover accounts", sentinel); +assertCondition(!yamlTempUnschedulableRules.some((rule) => rule.statusCode === 200), "native Sub2API temp-unschedulable policy must not classify HTTP 200 bodies; marker-only sentinel owns 200 semantic failures", yamlTempUnschedulableRules); +assertCondition(!defaultCodexTempUnschedulablePolicy().rules.some((rule) => rule.statusCode === 200), "default temp-unschedulable policy must not reintroduce HTTP 200 body classifiers", defaultCodexTempUnschedulablePolicy()); +assertCondition(!("freezeOnMarkerMismatch" in sentinel.actions), "sentinel must not keep a marker-specific freeze branch; marker match is the only health standard", sentinel.actions); +assertCondition(!("freezeOnTransportError" in sentinel.actions), "sentinel must not keep a transport-specific freeze branch; non-marker results all use the same freeze state machine", sentinel.actions); assertCondition(sentinel.endpoint === "responses", "v1 sentinel must target OpenAI Responses only", sentinel); assertCondition(sentinel.model === "gpt-5.5", "v1 sentinel must use GPT-5.5", sentinel); -assertCondition(sentinel.probe.maxOutputTokens > 0 && sentinel.probe.maxOutputTokens <= 16, "sentinel maxOutputTokens must be tightly capped", sentinel.probe); +assertCondition(sentinel.probe.maxOutputTokens > 0 && sentinel.probe.maxOutputTokens <= 16, "sentinel local stream capture limit must be tightly capped", sentinel.probe); +assertCondition(!("maxResponseBytes" in sentinel.probe), "sentinel must not use hand-rolled response byte parsing for OpenAI model probes", sentinel.probe); +assertCondition(sentinel.probe.userAgent === "Go-http-client/1.1", "sentinel default User-Agent must match Sub2API net/http account test shape", sentinel.probe); +assertCondition(sentinel.sdk.openaiPythonVersion === "2.41.1", "sentinel must pin the OpenAI Python SDK version in YAML", sentinel.sdk); assertCondition(!("concurrency" in sentinel.probe), "sentinel must not cap probe concurrency; all due accounts are probed concurrently", sentinel.probe); assertCondition(!("maxAccountsPerRun" in sentinel.probe), "sentinel must not cap accounts per run; all due accounts are eligible", sentinel.probe); assertCondition(sentinel.cadence.successInitialIntervalMinutes === 1, "success trust backoff must start at 1 minute", sentinel.cadence); assertCondition(sentinel.cadence.successMaxIntervalMinutes === 20, "success trust backoff must cap at 20 minutes", sentinel.cadence); -assertCondition(sentinel.freeze.initialTtlMinutes === 10, "freeze backoff must start at 10 minutes", sentinel.freeze); +assertCondition(sentinel.freeze.initialTtlMinutes === 2, "freeze backoff must start at 2 minutes", sentinel.freeze); assertCondition(sentinel.freeze.maxTtlMinutes === 120, "freeze backoff must cap at 2 hours", sentinel.freeze); assertCondition(!("budget" in sentinel), "sentinel must not use token budgets as a probe gate; usage is recorded only", sentinel); @@ -53,11 +68,40 @@ assertCondition(manifest.includes("concurrencyPolicy: Forbid"), "sentinel CronJo assertCondition(manifest.includes("suspend: false"), "monitor.enabled=true must unsuspend the CronJob", manifest); assertCondition(manifest.includes("kind: ServiceAccount") && manifest.includes("kind: Role") && manifest.includes("kind: RoleBinding"), "sentinel manifest must include minimal RBAC", manifest); assertCondition(manifest.includes("sub2api-account-sentinel-state"), "sentinel manifest must reference the state ConfigMap", manifest); -assertCondition(manifest.includes("\"enabled\": false"), "sentinel manifest must preserve actions.enabled=false in config.json", manifest); +assertCondition(manifest.includes("\"enabled\": true"), "sentinel manifest must preserve actions.enabled=true in config.json", manifest); assertCondition(!manifest.includes("sk-test-secret"), "sentinel manifest must not expose upstream credentials as plaintext", manifest); assertCondition(manifest.includes("profiles.json:"), "sentinel credentials Secret must include the profiles payload as Secret data", manifest); assertCondition(manifest.includes("\"budgetMode\": \"record-only\""), "sentinel runner must expose record-only budget/accounting mode", manifest); assertCondition(manifest.includes("max_workers=max(1, len(due))"), "sentinel runner must probe all due accounts concurrently", manifest); +assertCondition(manifest.includes(`image: ${sentinelRuntimeImage.runtimeImage}`), "sentinel manifest must use the reusable prebuilt runtime image", { image: sentinelRuntimeImage.runtimeImage, manifest }); +assertCondition(!manifest.includes("transport-failed-no-freeze"), "sentinel runner must not exempt transport failures from marker-based freezing", manifest); +const command = sentinelContainerShellCommand(sentinel); +assertCondition(command.includes("openai-python-version-mismatch"), "sentinel command must fail fast when the image SDK version does not match YAML", command); +assertCondition(!command.includes("pip install") && !command.includes("subprocess.check_call"), "sentinel command must not install Python packages at runtime", command); +assertCondition(sentinelDockerfile.includes("ARG OPENAI_PYTHON_VERSION=2.41.1"), "sentinel Dockerfile must make the OpenAI SDK version a build arg with the current default", sentinelDockerfile); +assertCondition(sentinelDockerfile.includes('"openai==${OPENAI_PYTHON_VERSION}"'), "sentinel Dockerfile must preinstall the pinned OpenAI SDK", sentinelDockerfile); + +const help = codexPoolHelp() as { usage?: unknown }; +assertCondition(Array.isArray(help.usage) && help.usage.some((item) => typeof item === "string" && item.includes("sentinel-probe --account")), "codex-pool help must expose manual sentinel-probe by account", help); +assertCondition(Array.isArray(help.usage) && help.usage.some((item) => typeof item === "string" && item.includes("sentinel-image build")), "codex-pool help must expose reusable sentinel image build", help); +assertCondition(Array.isArray(help.usage) && help.usage.some((item) => typeof item === "string" && item.includes("sentinel-report")), "codex-pool help must expose low-noise sentinel-report", help); +assertCondition(typeof (help as { output?: unknown }).output === "string" && String((help as { output?: unknown }).output).includes("ps-like text table"), "codex-pool help must document sentinel-report text table output", help); +const runner = sentinelRunnerPython(); +assertCondition(runner.includes("from openai import APIConnectionError, APIStatusError, APITimeoutError, OpenAI"), "sentinel runner must use the standard OpenAI Python SDK", runner); +assertCondition(runner.includes("client.responses.create(") && runner.includes("stream=True"), "sentinel runner must use the SDK Responses streaming create method", runner); +assertCondition(runner.includes("sub2api_style_input(prompt)") && runner.includes("sub2api_style_instructions()"), "sentinel runner must mirror Sub2API WebUI default account test request shape", runner); +assertCondition(runner.includes("extra_headers=headers"), "sentinel runner must pass configured User-Agent through SDK extra_headers", runner); +assertCondition(!runner.includes("store=False"), "sentinel runner must not add store=false to API-key account probes", runner); +assertCondition(!runner.includes("max_output_tokens="), "sentinel runner must not send max_output_tokens upstream for WebUI-compatible probes", runner); +assertCondition(!runner.includes("Originator") && !runner.includes("Session_ID") && !runner.includes("OpenAI-Beta"), "sentinel runner must not add Codex/compact headers to default account probes", runner); +assertCondition(!runner.includes("upstream_responses_url"), "sentinel runner must not hand-roll /v1/responses URLs for model probes", runner); +assertCondition(runner.includes("def error_details("), "sentinel runner must emit structured error diagnostics for failed probes", runner); +assertCondition(runner.includes('"openaiError": openai_error_fields(body)'), "sentinel diagnostics must expose OpenAI error type/code/message fields", runner); +assertCondition(runner.includes('"responseBodyHash": result.get("responseBodyHash")'), "sentinel state must keep response body hashes for diagnostics", runner); +assertCondition(runner.includes('"responseBodyPreview": item.get("responseBodyPreview")'), "sentinel CLI output must include bounded response body previews for diagnostics", runner); +assertCondition(runner.includes("SENTINEL_ACCOUNT_NAMES"), "sentinel runner must support forced account probes for CLI manual measurement", runner); +assertCondition(runner.includes('parsed.get("code") not in (None, 0)'), "sentinel admin client must treat Sub2API {code:0,message:success,data} envelopes as successful", runner); +assertCondition(runner.includes("page_size=20&platform=openai&type=apikey&search="), "sentinel admin client must query one target account instead of fetching all accounts into the 64KiB admin response cap", runner); const disabledMonitor = { ...sentinel, @@ -92,13 +136,20 @@ console.log(JSON.stringify({ ok: true, checks: [ "sentinel has independent monitor/actions YAML switches", - "observation-first rollout keeps actions disabled", + "marker-only guard actions are enabled", "v1 scope is OpenAI Responses + GPT-5.5", - "probe max_output_tokens is tightly capped", + "probe local stream capture limit is tightly capped", + "probe uses the standard OpenAI Python SDK streaming Responses API", + "probe mirrors Sub2API WebUI default account test request shape", + "probe passes configured User-Agent through SDK extra_headers", + "OpenAI Python SDK version is YAML-pinned", + "OpenAI Python SDK is preinstalled in a reusable sentinel image", + "manual account probe CLI is exposed", "probe concurrency is not artificially capped", + "marker match is the only health standard", "budget is record-only and does not gate probes", "success trust backoff is 1m to 20m", - "freeze backoff is 10m to 120m", + "freeze backoff is 2m to 120m", "CronJob is k8s-native with Forbid concurrency and minimal RBAC", "monitor switch controls CronJob suspend state", "rendered Secret avoids plaintext upstream credentials", diff --git a/scripts/src/help.ts b/scripts/src/help.ts index b8c2da6d..c6605d87 100644 --- a/scripts/src/help.ts +++ b/scripts/src/help.ts @@ -58,7 +58,7 @@ export function rootHelp(): unknown { { command: "hwlab nodes control-plane|git-mirror|secret --node G14 --lane v03", description: "Manage HWLAB node/lane runtime prerequisites for v0.3+ with the node identity passed as data instead of a command family." }, { command: "hwlab g14 monitor-prs | hwlab g14 control-plane status|apply|trigger-current|runtime-migration|cleanup-runs|cleanup-released-pvs | hwlab g14 git-mirror status|apply|sync|flush | hwlab g14 tools-image status|build", description: "Start the legacy G14 PR monitor, run bounded v0.2 Tekton/Argo control-plane, manual PipelineRun trigger, runtime migration, CI workspace retention, manual devops-infra git mirror/relay maintenance, or fixed HWLAB CI tools image actions; long confirmed trigger/sync/flush actions return async jobs by default." }, { command: "agentrun get|describe|events|logs|result|ack|cancel|dispatch|create|apply|steer|send|control-plane|git-mirror", description: "Use AgentRun v0.1 resource primitives with low-noise human output by default; legacy bridge groups remain available for raw compatibility." }, - { command: "platform-infra sub2api plan|apply|status|validate|codex-pool", description: "Deploy Sub2API in G14 platform-infra, manage the YAML-controlled Codex upstream pool, expose the unified API through FRP when needed, and configure master ~/.codex without printing API keys." }, + { command: "platform-infra sub2api plan|apply|status|validate|codex-pool", description: "Deploy Sub2API in G14 platform-infra, manage the YAML-controlled Codex upstream pool, expose the unified API, and inspect marker sentinel state with low-noise reports without printing API keys." }, { command: "hwlab cd audit --env dev | hwlab cd status --env dev | hwlab cd apply --env dev --dry-run", description: "Legacy D601 HWLAB DEV CD wrapper kept for explicit old-path diagnostics; current HWLAB rollout uses G14 GitOps." }, { command: "code-agent-sandbox", description: "Independent Code Agent Sandbox service skeleton for adapter, mode, and credential-boundary diagnostics." }, { command: "schedule list|get|runs|run|retry-run|delete", description: "Manage backend-core scheduled tasks and run history; schedule run supports --wait-ms N and retry-run reuses the failed run's schedule." }, @@ -622,6 +622,9 @@ function platformInfraHelpSummary(): unknown { "bun scripts/cli.ts platform-infra sub2api plan", "bun scripts/cli.ts platform-infra sub2api status [--full|--raw]", "bun scripts/cli.ts platform-infra sub2api codex-pool validate", + "bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-image status", + "bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-probe --account unidesk-codex-hy --confirm", + "bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-report", ], description: "Operate G14 platform-infra services such as Sub2API and the YAML-controlled Codex pool.", }; diff --git a/scripts/src/platform-infra-sub2api-codex-sentinel.ts b/scripts/src/platform-infra-sub2api-codex-sentinel.ts index 3845fdae..1bbaaf26 100644 --- a/scripts/src/platform-infra-sub2api-codex-sentinel.ts +++ b/scripts/src/platform-infra-sub2api-codex-sentinel.ts @@ -6,8 +6,6 @@ export interface CodexPoolSentinelConfig { }; actions: { enabled: boolean; - freezeOnMarkerMismatch: boolean; - freezeOnTransportError: boolean; }; schedule: string; image: string; @@ -24,9 +22,12 @@ export interface CodexPoolSentinelConfig { }; probe: { timeoutSeconds: number; - maxResponseBytes: number; maxOutputTokens: number; transportRetryMinutes: number; + userAgent: string; + }; + sdk: { + openaiPythonVersion: string; }; cadence: { successInitialIntervalMinutes: number; @@ -47,6 +48,13 @@ export interface CodexPoolSentinelConfig { historyLimit: number; } +export interface CodexPoolSentinelImageTarget { + baseImage: string; + runtimeImage: string; + repository: string; + tag: string; +} + export interface CodexPoolSentinelProfileSecret { accountName: string; profile: string; @@ -69,8 +77,6 @@ export function defaultCodexPoolSentinelConfig(): CodexPoolSentinelConfig { }, actions: { enabled: false, - freezeOnMarkerMismatch: true, - freezeOnTransportError: false, }, schedule: "*/1 * * * *", image: "python:3.12-alpine", @@ -87,9 +93,12 @@ export function defaultCodexPoolSentinelConfig(): CodexPoolSentinelConfig { }, probe: { timeoutSeconds: 30, - maxResponseBytes: 8192, - maxOutputTokens: 12, + maxOutputTokens: 16, transportRetryMinutes: 5, + userAgent: "Go-http-client/1.1", + }, + sdk: { + openaiPythonVersion: "2.41.1", }, cadence: { successInitialIntervalMinutes: 1, @@ -98,7 +107,7 @@ export function defaultCodexPoolSentinelConfig(): CodexPoolSentinelConfig { jitterPercent: 10, }, freeze: { - initialTtlMinutes: 10, + initialTtlMinutes: 2, maxTtlMinutes: 120, backoffMultiplier: 2, jitterPercent: 10, @@ -111,12 +120,28 @@ export function defaultCodexPoolSentinelConfig(): CodexPoolSentinelConfig { }; } +export function codexPoolSentinelRuntimeImage(config: CodexPoolSentinelConfig): CodexPoolSentinelImageTarget { + const baseTag = config.image + .replace(/[^A-Za-z0-9_.-]+/gu, "-") + .replace(/^-+|-+$/gu, "") + .slice(0, 80) || "python"; + const tag = `${baseTag}-openai-${config.sdk.openaiPythonVersion}`; + const repository = "127.0.0.1:5000/platform-infra/sub2api-account-sentinel"; + return { + baseImage: config.image, + runtimeImage: `${repository}:${tag}`, + repository, + tag, + }; +} + export function readCodexPoolSentinelConfig(value: unknown, defaults: CodexPoolSentinelConfig, sourcePath: string): CodexPoolSentinelConfig { if (!isRecord(value)) return defaults; const monitor = isRecord(value.monitor) ? value.monitor : {}; const actions = isRecord(value.actions) ? value.actions : {}; const marker = isRecord(value.marker) ? value.marker : {}; const probe = isRecord(value.probe) ? value.probe : {}; + const sdk = isRecord(value.sdk) ? value.sdk : {}; const cadence = isRecord(value.cadence) ? value.cadence : {}; const freeze = isRecord(value.freeze) ? value.freeze : {}; const pricing = isRecord(value.pricing) ? value.pricing : {}; @@ -126,8 +151,6 @@ export function readCodexPoolSentinelConfig(value: unknown, defaults: CodexPoolS }, actions: { enabled: readBoolean(valueAt(actions, "enabled"), `${sourcePath}.sentinel.actions.enabled`, defaults.actions.enabled), - freezeOnMarkerMismatch: readBoolean(valueAt(actions, "freezeOnMarkerMismatch"), `${sourcePath}.sentinel.actions.freezeOnMarkerMismatch`, defaults.actions.freezeOnMarkerMismatch), - freezeOnTransportError: readBoolean(valueAt(actions, "freezeOnTransportError"), `${sourcePath}.sentinel.actions.freezeOnTransportError`, defaults.actions.freezeOnTransportError), }, schedule: readString(valueAt(value, "schedule"), `${sourcePath}.sentinel.schedule`, defaults.schedule), image: readString(valueAt(value, "image"), `${sourcePath}.sentinel.image`, defaults.image), @@ -144,9 +167,12 @@ export function readCodexPoolSentinelConfig(value: unknown, defaults: CodexPoolS }, probe: { timeoutSeconds: readInt(valueAt(probe, "timeoutSeconds"), `${sourcePath}.sentinel.probe.timeoutSeconds`, defaults.probe.timeoutSeconds, 3, 300), - maxResponseBytes: readInt(valueAt(probe, "maxResponseBytes"), `${sourcePath}.sentinel.probe.maxResponseBytes`, defaults.probe.maxResponseBytes, 1024, 1048576), maxOutputTokens: readInt(valueAt(probe, "maxOutputTokens"), `${sourcePath}.sentinel.probe.maxOutputTokens`, defaults.probe.maxOutputTokens, 1, 128), transportRetryMinutes: readInt(valueAt(probe, "transportRetryMinutes"), `${sourcePath}.sentinel.probe.transportRetryMinutes`, defaults.probe.transportRetryMinutes, 1, 120), + userAgent: readUserAgent(valueAt(probe, "userAgent"), `${sourcePath}.sentinel.probe.userAgent`, defaults.probe.userAgent), + }, + sdk: { + openaiPythonVersion: readOpenAiPythonVersion(valueAt(sdk, "openaiPythonVersion"), `${sourcePath}.sentinel.sdk.openaiPythonVersion`, defaults.sdk.openaiPythonVersion), }, cadence: { successInitialIntervalMinutes: readInt(valueAt(cadence, "successInitialIntervalMinutes"), `${sourcePath}.sentinel.cadence.successInitialIntervalMinutes`, defaults.cadence.successInitialIntervalMinutes, 1, 1440), @@ -197,6 +223,7 @@ export function codexPoolSentinelSummary(config: CodexPoolSentinelConfig): Recor model: config.model, endpoint: config.endpoint, probe: config.probe, + sdk: config.sdk, cadence: config.cadence, freeze: config.freeze, accounting: { @@ -228,6 +255,7 @@ export function renderCodexPoolSentinelManifest( endpoint: config.endpoint, marker: config.marker, probe: config.probe, + sdk: config.sdk, cadence: config.cadence, freeze: config.freeze, pricing: config.pricing, @@ -237,7 +265,9 @@ export function renderCodexPoolSentinelManifest( }, }; const suspend = config.monitor.enabled ? "false" : "true"; - const activeDeadlineSeconds = Math.max(120, Math.min(3600, config.probe.timeoutSeconds + 120)); + const activeDeadlineSeconds = Math.max(300, Math.min(3600, config.probe.timeoutSeconds + 240)); + const command = sentinelContainerShellCommand(config); + const runtimeImage = codexPoolSentinelRuntimeImage(config).runtimeImage; return `apiVersion: v1 kind: Secret metadata: @@ -339,9 +369,11 @@ spec: restartPolicy: Never containers: - name: sentinel - image: ${config.image} + image: ${runtimeImage} imagePullPolicy: IfNotPresent - command: ["python3", "/opt/sentinel/sentinel.py"] + command: ["sh", "-c"] + args: + - ${JSON.stringify(command)} env: - name: ADMIN_EMAIL valueFrom: @@ -377,6 +409,23 @@ spec: `; } +export function sentinelContainerShellCommand(config: CodexPoolSentinelConfig): string { + return [ + "set -eu", + "python3 - <<'PY'", + "import importlib.metadata", + `expected = ${JSON.stringify(config.sdk.openaiPythonVersion)}`, + "try:", + " current = importlib.metadata.version('openai')", + "except importlib.metadata.PackageNotFoundError:", + " current = None", + "if current != expected:", + " raise SystemExit(f'openai-python-version-mismatch expected={expected} current={current}')", + "PY", + "exec python3 /opt/sentinel/sentinel.py", + ].join("\n"); +} + export function sentinelRunnerPython(): string { return String.raw`#!/usr/bin/env python3 import base64 @@ -390,7 +439,8 @@ import time import traceback from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime, timezone, timedelta -from urllib import error, request +from urllib import error, parse, request +from openai import APIConnectionError, APIStatusError, APITimeoutError, OpenAI CONFIG_PATH = "/opt/sentinel/config.json" PROFILES_PATH = "/opt/sentinel-secrets/profiles.json" @@ -547,8 +597,9 @@ def http_json(method, url, headers=None, payload=None, timeout=30, max_bytes=655 parsed = json.loads(text) if text.strip() else None except Exception: parsed = None + app_success = not (isinstance(parsed, dict) and parsed.get("code") not in (None, 0)) return { - "ok": 200 <= resp.status < 300 and not too_large, + "ok": 200 <= resp.status < 300 and not too_large and app_success, "status": resp.status, "json": parsed, "text": text, @@ -599,6 +650,9 @@ def find_token(value): return found return None +def url_quote(value): + return parse.quote(str(value), safe="") + class Sub2ApiAdmin: def __init__(self, config): self.base = config["service"]["baseUrl"].rstrip("/") @@ -647,15 +701,32 @@ class Sub2ApiAdmin: self.accounts_by_name = {item.get("name"): item for item in items if isinstance(item, dict) and isinstance(item.get("name"), str)} return self.accounts_by_name + def account(self, account_name): + if self.accounts_by_name is not None and account_name in self.accounts_by_name: + return self.accounts_by_name[account_name] + data = self.request("GET", "/api/v1/admin/accounts?page=1&page_size=20&platform=openai&type=apikey&search=" + url_quote(account_name)) + items = data if isinstance(data, list) else [] + if isinstance(data, dict): + for key in ("items", "accounts"): + if isinstance(data.get(key), list): + items = data[key] + break + for item in items: + if isinstance(item, dict) and item.get("name") == account_name: + if self.accounts_by_name is not None: + self.accounts_by_name[account_name] = item + return item + return None + def set_schedulable(self, account_name, schedulable): - account = self.accounts().get(account_name) + account = self.account(account_name) if not account or account.get("id") is None: raise RuntimeError(f"account {account_name} not found") self.request("POST", f"/api/v1/admin/accounts/{account['id']}/schedulable", {"schedulable": bool(schedulable)}) return {"accountId": account.get("id"), "schedulable": bool(schedulable)} def recover_state(self, account_name): - account = self.accounts().get(account_name) + account = self.account(account_name) if not account or account.get("id") is None: return {"skipped": True, "reason": "account-not-found"} try: @@ -664,11 +735,9 @@ class Sub2ApiAdmin: except Exception as exc: return {"ok": False, "accountId": account.get("id"), "error": str(exc)} -def upstream_responses_url(base_url): +def upstream_base_url(base_url): base = str(base_url).rstrip("/") - if base.endswith("/v1"): - return base + "/responses" - return base + "/v1/responses" + return base if base.endswith("/v1") else base + "/v1" def output_text(parsed): if isinstance(parsed, dict) and isinstance(parsed.get("output_text"), str): @@ -687,6 +756,230 @@ def output_text(parsed): parts.append(block["text"]) return "\n".join(parts) +def model_dump(value): + if hasattr(value, "model_dump"): + return value.model_dump() + if isinstance(value, dict): + return value + return {} + +def body_text(value): + if isinstance(value, bytes): + return value.decode("utf-8", errors="replace") + if isinstance(value, str): + return value + try: + return json.dumps(value, ensure_ascii=False) + except Exception: + return str(value) + +def redact_diagnostic(value): + if isinstance(value, dict): + redacted = {} + for key, item in value.items(): + key_text = str(key) + if any(token in key_text.lower() for token in ("key", "token", "secret", "password", "credential", "authorization")): + redacted[key_text] = "[redacted]" + else: + redacted[key_text] = redact_diagnostic(item) + return redacted + if isinstance(value, list): + return [redact_diagnostic(item) for item in value[:20]] + if isinstance(value, str): + return value if len(value) <= 2000 else value[:2000] + "...[truncated]" + if isinstance(value, (int, float, bool)) or value is None: + return value + return str(value) + +def selected_headers(headers): + if headers is None: + return {} + selected = {} + for key in ( + "content-type", + "x-request-id", + "x-ratelimit-limit-requests", + "x-ratelimit-remaining-requests", + "x-ratelimit-reset-requests", + "cf-ray", + "server", + ): + try: + value = headers.get(key) + except Exception: + value = None + if value: + selected[key] = str(value) + return selected + +def openai_error_fields(body): + if not isinstance(body, dict): + return {} + error_obj = body.get("error") + if isinstance(error_obj, dict): + return { + "message": error_obj.get("message"), + "type": error_obj.get("type"), + "param": error_obj.get("param"), + "code": error_obj.get("code"), + } + return { + "message": body.get("message"), + "type": body.get("type"), + "param": body.get("param"), + "code": body.get("code"), + } + +def error_details(kind, status, body=None, message=None, headers=None): + text = body_text(body) + return { + "kind": kind, + "statusCode": status, + "message": str(message) if message else None, + "openaiError": openai_error_fields(body), + "body": redact_diagnostic(body) if isinstance(body, (dict, list)) else None, + "bodyHash": sha(text), + "bodyPreview": preview(text, 1000), + "headers": selected_headers(headers), + } + +def sub2api_style_input(prompt): + return [{ + "role": "user", + "content": [{ + "type": "input_text", + "text": prompt, + }], + }] + +def sub2api_style_instructions(): + return ( + "You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer." + ) + +def event_type(event): + if isinstance(event, dict): + return event.get("type") + return getattr(event, "type", None) + +def event_delta(event): + if isinstance(event, dict): + value = event.get("delta") + return value if isinstance(value, str) else "" + value = getattr(event, "delta", "") + return value if isinstance(value, str) else "" + +def event_error_message(event): + data = model_dump(event) + if isinstance(data, dict): + if isinstance(data.get("error"), dict): + message = data["error"].get("message") + if isinstance(message, str) and message: + return message + if isinstance(data.get("response"), dict) and isinstance(data["response"].get("error"), dict): + message = data["response"]["error"].get("message") + if isinstance(message, str) and message: + return message + return None + +def openai_responses_create(profile, config, marker, prompt): + headers = { + "User-Agent": profile.get("upstreamUserAgent") or config["probe"].get("userAgent") or "Go-http-client/1.1", + "X-Request-ID": "unidesk-account-sentinel-" + hashlib.sha256(marker.encode()).hexdigest()[:16], + } + client = OpenAI( + api_key=profile["apiKey"], + base_url=upstream_base_url(profile["baseUrl"]), + timeout=float(config["probe"]["timeoutSeconds"]), + max_retries=0, + ) + started = time.time() + try: + stream = client.responses.create( + model=config["model"], + input=sub2api_style_input(prompt), + instructions=sub2api_style_instructions(), + stream=True, + extra_headers=headers, + ) + deltas = [] + events = [] + seen_completed = False + max_chars = max(32, int(config["probe"]["maxOutputTokens"]) * 12) + for event in stream: + event_data = model_dump(event) + etype = event_type(event) + events.append({"type": etype, "preview": preview(body_text(event_data), 240)}) + if etype == "response.output_text.delta": + delta = event_delta(event) + if delta: + deltas.append(delta) + if len("".join(deltas)) > max_chars: + break + elif etype in ("response.completed", "response.done"): + seen_completed = True + break + elif etype in ("response.failed", "error"): + message = event_error_message(event) or "OpenAI response failed" + raise RuntimeError(message) + out = "".join(deltas) + parsed = {"stream": True, "completed": seen_completed, "events": events[-20:], "output_text": out} + if not seen_completed: + parsed["streamError"] = "stream ended before response.completed" + return { + "ok": seen_completed, + "status": 200 if seen_completed else 0, + "json": parsed, + "outputText": out, + "text": body_text(parsed), + "tooLarge": not seen_completed and len(out) > max_chars, + "durationMs": int((time.time() - started) * 1000), + "sdk": "openai-python", + "requestShape": "sub2api-account-test-streaming-responses", + } + except APIStatusError as exc: + status = getattr(exc, "status_code", 0) or 0 + body = getattr(exc, "body", None) + response = getattr(exc, "response", None) + return { + "ok": False, + "status": status, + "json": body if isinstance(body, dict) else None, + "text": body_text(body or response or ""), + "tooLarge": False, + "durationMs": int((time.time() - started) * 1000), + "error": str(exc), + "errorDetails": error_details("APIStatusError", status, body, str(exc), getattr(response, "headers", None)), + "sdk": "openai-python", + "requestShape": "sub2api-account-test-streaming-responses", + } + except (APITimeoutError, APIConnectionError) as exc: + return { + "ok": False, + "status": 0, + "json": None, + "text": "", + "tooLarge": False, + "durationMs": int((time.time() - started) * 1000), + "error": str(exc), + "errorDetails": error_details(exc.__class__.__name__, 0, None, str(exc), None), + "sdk": "openai-python", + "requestShape": "sub2api-account-test-streaming-responses", + } + except Exception as exc: + return { + "ok": False, + "status": 0, + "json": None, + "text": "", + "tooLarge": False, + "durationMs": int((time.time() - started) * 1000), + "error": str(exc), + "errorDetails": error_details(exc.__class__.__name__, 0, None, str(exc), None), + "sdk": "openai-python", + "requestShape": "sub2api-account-test-streaming-responses", + } + def usage_from(parsed, prompt, out, config): usage = parsed.get("usage") if isinstance(parsed, dict) and isinstance(parsed.get("usage"), dict) else {} input_tokens = usage.get("input_tokens") @@ -717,36 +1010,25 @@ def usage_from(parsed, prompt, out, config): def probe_account(profile, config, purpose): marker = config["marker"]["prefix"] + "_" + hashlib.sha256((profile["accountName"] + str(time.time()) + str(random.random())).encode()).hexdigest()[:10] prompt = "Return exactly this marker and no other text: " + marker - payload = { - "model": config["model"], - "input": prompt, - "stream": False, - "store": False, - "max_output_tokens": int(config["probe"]["maxOutputTokens"]), - } - headers = { - "Authorization": "Bearer " + profile["apiKey"], - "Content-Type": "application/json", - "X-Request-ID": "unidesk-account-sentinel-" + hashlib.sha256(marker.encode()).hexdigest()[:16], - } - if profile.get("upstreamUserAgent"): - headers["User-Agent"] = profile["upstreamUserAgent"] - resp = http_json( - "POST", - upstream_responses_url(profile["baseUrl"]), - headers, - payload, - timeout=int(config["probe"]["timeoutSeconds"]), - max_bytes=int(config["probe"]["maxResponseBytes"]), - ) + resp = openai_responses_create(profile, config, marker, prompt) parsed = resp.get("json") - out = output_text(parsed) + out = resp.get("outputText") if isinstance(resp.get("outputText"), str) else output_text(parsed) trimmed = out.strip() marker_matched = trimmed == marker if config["marker"].get("exact", True) else marker in trimmed usage = usage_from(parsed if isinstance(parsed, dict) else {}, prompt, out or resp.get("text", ""), config) http_success = isinstance(resp.get("status"), int) and 200 <= resp.get("status") < 300 - ok = resp["ok"] and marker_matched - mismatch = http_success and not marker_matched + ok = marker_matched + mismatch = not marker_matched + if marker_matched: + failure_kind = "none" + elif resp.get("tooLarge"): + failure_kind = "response-too-large" + elif not resp["ok"]: + failure_kind = "transport-or-http-failure" + elif http_success: + failure_kind = "success-body-mismatch" + else: + failure_kind = "unknown-marker-mismatch" return { "accountName": profile["accountName"], "profile": profile.get("profile"), @@ -760,11 +1042,16 @@ def probe_account(profile, config, purpose): "durationMs": resp.get("durationMs"), "outputHash": sha(out), "outputPreview": "" if marker_matched else preview(out or resp.get("text", ""), 160), - "bodyPreviewHash": sha(resp.get("text", "")), + "responseBodyHash": sha(resp.get("text", "")), + "responseBodyPreview": "" if marker_matched else preview(resp.get("text", ""), 1000), "error": resp.get("error"), + "errorDetails": resp.get("errorDetails"), "usage": usage, "mismatch": mismatch, "transportFailure": not resp["ok"], + "failureKind": failure_kind, + "sdk": resp.get("sdk"), + "requestShape": resp.get("requestShape"), } def ledger_for(state, now): @@ -806,6 +1093,28 @@ def choose_due_profiles(profiles, state, config, now): due.sort(key=lambda item: item["dueAt"] or "") return due, {"selected": len(due), "due": len(due), "limit": "all-due", "budgetMode": "record-only", "ledger": ledger} +def forced_account_names(): + raw = os.environ.get("SENTINEL_ACCOUNT_NAMES") or "" + names = [item.strip() for item in raw.split(",") if item.strip()] + return set(names) + +def choose_forced_profiles(profiles, state, config, now, names): + accounts = state.setdefault("accounts", {}) + found = [] + missing = sorted(names) + due = [] + for profile in profiles: + name = profile["accountName"] + if name not in names: + continue + account_state = accounts.setdefault(name, {}) + quarantine = account_state.get("quarantine") + purpose = "manual-recovery" if isinstance(quarantine, dict) and quarantine.get("active") is True else "manual-health" + due.append({"profile": profile, "purpose": purpose, "dueAt": "forced"}) + found.append(name) + missing = sorted(name for name in names if name not in set(found)) + return due, {"selected": len(due), "due": len(due), "limit": "forced-accounts", "budgetMode": "record-only", "ledger": ledger_for(state, now)[1], "requestedAccounts": sorted(names), "missingAccounts": missing} + def next_success_interval(account_state, config): streak = int(account_state.get("successStreak") or 0) previous = int(account_state.get("successIntervalMinutes") or 0) @@ -849,9 +1158,7 @@ def apply_result(result, state, config, now, admin): account_state["lastOkAt"] = iso(now) account_state["lastStatus"] = "ok" else: - should_freeze = bool(result.get("mismatch")) and bool(config["actions"]["freezeOnMarkerMismatch"]) - if result.get("transportFailure") and bool(config["actions"].get("freezeOnTransportError")): - should_freeze = True + should_freeze = result.get("markerMatched") is not True if should_freeze: interval = next_freeze_interval(account_state, config, was_recovery) until = add_minutes(now, interval, int(config["freeze"]["jitterPercent"])) @@ -869,9 +1176,12 @@ def apply_result(result, state, config, now, admin): "applied": applied, "until": iso(until), "intervalMinutes": interval, - "reason": "marker-mismatch" if result.get("mismatch") else "transport-failure", + "reason": "marker-not-matched", + "failureKind": result.get("failureKind"), "markerHash": result.get("markerHash"), "outputHash": result.get("outputHash"), + "responseBodyHash": result.get("responseBodyHash"), + "errorDetails": result.get("errorDetails"), "lastBadAt": iso(now), } account_state["nextProbeAfter"] = iso(until) @@ -881,7 +1191,7 @@ def apply_result(result, state, config, now, admin): else: retry = int(config["probe"]["transportRetryMinutes"]) account_state["nextProbeAfter"] = iso(add_minutes(now, retry, int(config["cadence"]["jitterPercent"]))) - account_state["lastStatus"] = "transport-failed-no-freeze" + account_state["lastStatus"] = "marker-not-matched-no-freeze" account_state["lastFailureAt"] = iso(now) account_state["lastProbeAt"] = iso(now) account_state["lastProbe"] = { @@ -890,9 +1200,17 @@ def apply_result(result, state, config, now, admin): "httpStatus": result.get("httpStatus"), "durationMs": result.get("durationMs"), "markerMatched": result.get("markerMatched"), + "transportOk": result.get("transportOk"), "outputHash": result.get("outputHash"), "outputPreview": result.get("outputPreview"), + "responseBodyHash": result.get("responseBodyHash"), + "responseBodyPreview": result.get("responseBodyPreview"), + "error": result.get("error"), + "errorDetails": result.get("errorDetails"), "usage": result.get("usage"), + "failureKind": result.get("failureKind"), + "sdk": result.get("sdk"), + "requestShape": result.get("requestShape"), "action": action, } return action @@ -909,7 +1227,13 @@ def reconcile_active_quarantines(state, config, now, admin): if until is not None and until <= now: continue if quarantine.get("applied") is not True: - actions.append({"accountName": name, "type": "virtual-freeze-not-applied", "ok": True}) + try: + admin.set_schedulable(name, False) + quarantine["applied"] = True + quarantine["appliedAt"] = iso(now) + actions.append({"accountName": name, "type": "apply-pending-freeze", "ok": True}) + except Exception as exc: + actions.append({"accountName": name, "type": "apply-pending-freeze", "ok": False, "error": str(exc)}) continue try: admin.set_schedulable(name, False) @@ -927,10 +1251,14 @@ def main(): state_obj, state = load_state(kube, config) admin = Sub2ApiAdmin(config) reconcile = reconcile_active_quarantines(state, config, now, admin) - due, selection = choose_due_profiles(profiles, state, config, now) + forced_names = forced_account_names() + if forced_names: + due, selection = choose_forced_profiles(profiles, state, config, now, forced_names) + else: + due, selection = choose_due_profiles(profiles, state, config, now) results = [] actions = [] - if config["monitor"]["enabled"] and due: + if (config["monitor"]["enabled"] or forced_names) and due: with ThreadPoolExecutor(max_workers=max(1, len(due))) as executor: futures = [executor.submit(probe_account, item["profile"], config, item["purpose"]) for item in due] for future in as_completed(futures): @@ -945,7 +1273,8 @@ def main(): "profileCount": len(profiles), "selected": len(due), "okCount": sum(1 for item in results if item.get("ok") is True), - "mismatchCount": sum(1 for item in results if item.get("mismatch") is True), + "mismatchCount": sum(1 for item in results if item.get("markerMatched") is not True), + "markerMismatchCount": sum(1 for item in results if item.get("markerMatched") is not True), "transportFailureCount": sum(1 for item in results if item.get("transportFailure") is True), "actionsTaken": sum(1 for item in actions if item.get("taken") is True), "selection": selection, @@ -968,7 +1297,13 @@ def main(): "usage": item.get("usage"), "outputHash": item.get("outputHash"), "outputPreview": item.get("outputPreview"), + "responseBodyHash": item.get("responseBodyHash"), + "responseBodyPreview": item.get("responseBodyPreview"), "error": item.get("error"), + "errorDetails": item.get("errorDetails"), + "failureKind": item.get("failureKind"), + "sdk": item.get("sdk"), + "requestShape": item.get("requestShape"), } for item in results], "actions": actions, "valuesPrinted": False, @@ -1030,6 +1365,19 @@ function readMarkerPrefix(value: unknown, key: string, fallback: string): string return text; } +function readUserAgent(value: unknown, key: string, fallback: string): string { + const text = readString(value, key, fallback); + if (/[\r\n]/u.test(text)) throw new Error(`${key} must not contain newlines`); + if (Buffer.byteLength(text, "utf8") > 200) throw new Error(`${key} must be at most 200 bytes`); + return text; +} + +function readOpenAiPythonVersion(value: unknown, key: string, fallback: string): string { + const text = readString(value, key, fallback); + if (!/^[0-9]+[.][0-9]+[.][0-9]+$/u.test(text)) throw new Error(`${key} must be a pinned semver version like 2.41.1`); + return text; +} + function readInt(value: unknown, key: string, fallback: number, min: number, max: number): number { if (value === undefined || value === null) return fallback; const parsed = typeof value === "number" ? value : typeof value === "string" && value.trim() ? Number(value) : Number.NaN; diff --git a/scripts/src/platform-infra-sub2api-codex.ts b/scripts/src/platform-infra-sub2api-codex.ts index 7f71a3e4..f12268de 100644 --- a/scripts/src/platform-infra-sub2api-codex.ts +++ b/scripts/src/platform-infra-sub2api-codex.ts @@ -4,8 +4,10 @@ import { homedir } from "node:os"; import { join } from "node:path"; import type { UniDeskConfig } from "./config"; import { rootPath } from "./config"; +import type { RenderedCliResult } from "./output"; import { codexPoolSentinelSummary, + codexPoolSentinelRuntimeImage, defaultCodexPoolSentinelConfig, readCodexPoolSentinelConfig, renderCodexPoolSentinelManifest, @@ -21,6 +23,7 @@ const serviceDns = `${serviceName}.${namespace}.svc.cluster.local:8080`; const fieldManager = "unidesk-platform-infra"; const appSecretName = "sub2api-secrets"; const codexPoolConfigPath = rootPath("config", "platform-infra", "sub2api-codex-pool.yaml"); +const sentinelImageDockerfilePath = rootPath("src", "components", "platform-infra", "sub2api", "sentinel.Dockerfile"); const defaultPoolGroupName = "unidesk-codex-pool"; const defaultPoolApiKeyName = "unidesk-codex-pool-api-key"; const defaultPoolApiKeySecretName = "sub2api-codex-pool-api-key"; @@ -45,6 +48,20 @@ interface ConfirmOptions extends DisclosureOptions { confirm: boolean; } +interface SentinelProbeOptions extends ConfirmOptions { + accounts: string[]; +} + +interface SentinelReportOptions extends DisclosureOptions { + events: number; +} + +interface SentinelImageOptions extends DisclosureOptions { + action: "status" | "build"; + confirm: boolean; + dryRun: boolean; +} + interface CodexProfile { profile: string; accountName: string; @@ -162,12 +179,16 @@ interface CodexLocalConsumerTomlOptions { export function codexPoolHelp(): unknown { const pool = readCodexPoolConfig(); return { - command: "platform-infra sub2api codex-pool plan|sync|validate|cleanup-probes|expose|configure-local", - output: "json", + command: "platform-infra sub2api codex-pool plan|sync|validate|sentinel-image|sentinel-probe|sentinel-report|cleanup-probes|expose|configure-local", + output: "json, except sentinel-report defaults to a ps-like text table", usage: [ "bun scripts/cli.ts platform-infra sub2api codex-pool plan", "bun scripts/cli.ts platform-infra sub2api codex-pool sync --confirm [--prune-removed]", "bun scripts/cli.ts platform-infra sub2api codex-pool validate [--full|--raw]", + "bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-image status", + "bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-image build --confirm", + "bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-probe --account unidesk-codex-hy --confirm", + "bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-report [--events 20|--full|--raw]", "bun scripts/cli.ts platform-infra sub2api codex-pool cleanup-probes --confirm", "bun scripts/cli.ts platform-infra sub2api codex-pool expose --confirm", "bun scripts/cli.ts platform-infra sub2api codex-pool configure-local --confirm", @@ -190,11 +211,14 @@ export function codexPoolHelp(): unknown { }; } -export async function runCodexPoolCommand(config: UniDeskConfig, args: string[]): Promise> { +export async function runCodexPoolCommand(config: UniDeskConfig, args: string[]): Promise | RenderedCliResult> { const [action = "plan"] = args; if (action === "plan") return codexPoolPlan(parseDisclosureOptions(args.slice(1))); if (action === "sync") return await codexPoolSync(config, parseSyncOptions(args.slice(1))); if (action === "validate") return await codexPoolValidate(config, parseDisclosureOptions(args.slice(1))); + if (action === "sentinel-image") return await codexPoolSentinelImage(config, parseSentinelImageOptions(args.slice(1))); + if (action === "sentinel-probe") return await codexPoolSentinelProbe(config, parseSentinelProbeOptions(args.slice(1))); + if (action === "sentinel-report") return await codexPoolSentinelReport(config, parseSentinelReportOptions(args.slice(1))); if (action === "cleanup-probes") return await codexPoolCleanupProbes(config, parseConfirmOptions(args.slice(1))); if (action === "expose") return await codexPoolExpose(config, parseConfirmOptions(args.slice(1))); if (action === "configure-local") return await codexPoolConfigureLocal(config, parseConfirmOptions(args.slice(1))); @@ -218,12 +242,117 @@ function parseConfirmOptions(args: string[]): ConfirmOptions { return { ...disclosure, confirm: args.includes("--confirm") }; } +function parseSentinelImageOptions(args: string[]): SentinelImageOptions { + const [actionRaw = "status", ...rest] = args; + if (actionRaw !== "status" && actionRaw !== "build") throw new Error("sentinel-image usage: status|build [--dry-run|--confirm] [--full|--raw]"); + let confirm = false; + let explicitDryRun = false; + const disclosureArgs: string[] = []; + for (const arg of rest) { + if (arg === "--confirm") { + confirm = true; + continue; + } + if (arg === "--dry-run") { + explicitDryRun = true; + continue; + } + if (arg === "--full" || arg === "--raw") { + disclosureArgs.push(arg); + continue; + } + throw new Error(`unsupported option: ${arg}`); + } + if (confirm && explicitDryRun) throw new Error("sentinel-image accepts only one of --confirm or --dry-run"); + const disclosure = parseDisclosureOptions(disclosureArgs); + return { + ...disclosure, + action: actionRaw, + confirm, + dryRun: actionRaw === "status" ? true : explicitDryRun || !confirm, + }; +} + +function parseSentinelProbeOptions(args: string[]): SentinelProbeOptions { + const accounts: string[] = []; + const disclosureArgs: string[] = []; + let confirm = false; + for (let index = 0; index < args.length; index += 1) { + const arg = args[index]!; + if (arg === "--confirm") { + confirm = true; + continue; + } + if (arg === "--full" || arg === "--raw") { + disclosureArgs.push(arg); + continue; + } + if (arg === "--account") { + const value = args[index + 1]; + if (value === undefined || value.startsWith("--")) throw new Error("--account requires an account name"); + accounts.push(...splitAccountNames(value)); + index += 1; + continue; + } + if (arg.startsWith("--account=")) { + accounts.push(...splitAccountNames(arg.slice("--account=".length))); + continue; + } + throw new Error(`unsupported option: ${arg}`); + } + const uniqueAccounts = [...new Set(accounts)]; + if (uniqueAccounts.length === 0) throw new Error("sentinel-probe requires --account "); + for (const account of uniqueAccounts) validateKubernetesName(account, "--account", false); + const disclosure = parseDisclosureOptions(disclosureArgs); + return { ...disclosure, confirm, accounts: uniqueAccounts }; +} + +function parseSentinelReportOptions(args: string[]): SentinelReportOptions { + let events = 20; + let explicitEvents = false; + const disclosureArgs: string[] = []; + for (let index = 0; index < args.length; index += 1) { + const arg = args[index]!; + if (arg === "--full" || arg === "--raw") { + disclosureArgs.push(arg); + continue; + } + if (arg === "--events") { + const value = args[index + 1]; + if (value === undefined || value.startsWith("--")) throw new Error("--events requires a positive integer"); + events = readReportEventLimit(value, "--events"); + explicitEvents = true; + index += 1; + continue; + } + if (arg.startsWith("--events=")) { + events = readReportEventLimit(arg.slice("--events=".length), "--events"); + explicitEvents = true; + continue; + } + throw new Error(`unsupported option: ${arg}`); + } + const disclosure = parseDisclosureOptions(disclosureArgs); + if (disclosure.full && !explicitEvents) events = 80; + return { ...disclosure, events }; +} + +function readReportEventLimit(raw: string, option: string): number { + const value = Number(raw); + if (!Number.isInteger(value) || value < 1 || value > 200) throw new Error(`${option} must be an integer from 1 to 200`); + return value; +} + function parseDisclosureOptions(args: string[]): DisclosureOptions { validateOptions(args, new Set(["--full", "--raw"])); const raw = args.includes("--raw"); return { full: raw || args.includes("--full"), raw }; } +function splitAccountNames(value: string): string[] { + return value.split(",").map((item) => item.trim()).filter(Boolean); +} + function validateOptions(args: string[], booleanOptions: Set): void { for (const arg of args) { if (booleanOptions.has(arg)) continue; @@ -284,6 +413,21 @@ async function codexPoolSync(config: UniDeskConfig, options: SyncOptions): Promi }; } + const sentinelImage = pool.sentinel.monitor.enabled + ? await runCodexPoolSentinelImage(config, pool, { action: "build", confirm: true, dryRun: false, full: options.full, raw: false }) + : { ok: true, mode: "skipped-monitor-disabled" }; + if (sentinelImage.ok !== true) { + return { + ok: false, + action: "platform-infra-sub2api-codex-pool-sync", + mode: "blocked-sentinel-image", + sentinelImage, + next: { + image: "bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-image build --confirm", + }, + }; + } + const payload = { pruneRemoved: options.pruneRemoved, sentinel: { @@ -347,6 +491,7 @@ async function codexPoolSync(config: UniDeskConfig, options: SyncOptions): Promi profiles: options.full ? profiles.map(redactProfile) : undefined, valuesPrinted: false, }, + sentinelImage, remote: parsed === null ? compactCapture(result, { full: options.full || result.exitCode !== 0 }) : options.full ? parsed : codexPoolSyncSummary(parsed), @@ -356,6 +501,50 @@ async function codexPoolSync(config: UniDeskConfig, options: SyncOptions): Promi }; } +async function codexPoolSentinelImage(config: UniDeskConfig, options: SentinelImageOptions): Promise> { + const pool = readCodexPoolConfig(); + return await runCodexPoolSentinelImage(config, pool, options); +} + +async function runCodexPoolSentinelImage(config: UniDeskConfig, pool: CodexPoolConfig, options: SentinelImageOptions): Promise> { + const target = codexPoolSentinelRuntimeImage(pool.sentinel); + if (options.action === "build" && options.dryRun) { + return { + ok: true, + action: "platform-infra-sub2api-codex-pool-sentinel-image", + mode: "dry-run", + image: target, + dockerfile: sentinelImageDockerfilePath, + mutation: false, + next: { + confirm: "bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-image build --confirm", + }, + }; + } + const mode: RemoteCodexPoolMode = options.action === "status" ? "sentinel-image-status" : "sentinel-image-build"; + const script = options.action === "status" ? sentinelImageStatusScript(pool) : sentinelImageBuildScript(pool); + const result = await runRemoteCodexPoolScript(config, mode, script); + const parsed = parseJsonOutput(result.stdout); + if (options.raw) { + return { + ok: result.exitCode === 0 && boolField(parsed, "ok", false), + action: "platform-infra-sub2api-codex-pool-sentinel-image", + mode: options.action, + image: target, + remote: compactCapture(result, { full: true }), + parsed, + }; + } + return { + ok: result.exitCode === 0 && boolField(parsed, "ok", false), + action: "platform-infra-sub2api-codex-pool-sentinel-image", + mode: options.action, + image: target, + summary: parsed, + remote: compactCapture(result, { full: options.full || result.exitCode !== 0 }), + }; +} + async function codexPoolValidate(config: UniDeskConfig, options: DisclosureOptions): Promise> { const pool = readCodexPoolConfig(); const result = await runRemoteCodexPoolScript(config, "validate", validateScript(pool)); @@ -376,6 +565,77 @@ async function codexPoolValidate(config: UniDeskConfig, options: DisclosureOptio }; } +async function codexPoolSentinelReport(config: UniDeskConfig, options: SentinelReportOptions): Promise | RenderedCliResult> { + const pool = readCodexPoolConfig(); + const result = await capture(config, g14K3sRoute, ["script"], sentinelReportScript(pool, options.events)); + const parsed = parseJsonOutput(result.stdout); + const ok = result.exitCode === 0 && boolField(parsed, "ok", false); + if (options.raw) { + return { + ok, + action: "platform-infra-sub2api-codex-pool-sentinel-report", + remote: compactCapture(result, { full: true }), + report: parsed, + valuesPrinted: false, + }; + } + const text = renderSentinelReport(parsed, { + events: options.events, + full: options.full, + remote: compactCapture(result, { full: result.exitCode !== 0 || parsed === null }), + }); + return renderedCliResult(ok, "platform-infra sub2api codex-pool sentinel-report", text); +} + +async function codexPoolSentinelProbe(config: UniDeskConfig, options: SentinelProbeOptions): Promise> { + const pool = readCodexPoolConfig(); + const configuredAccounts = desiredAccountNames(pool); + const missing = options.accounts.filter((account) => !configuredAccounts.includes(account)); + if (missing.length > 0) { + return { + ok: false, + action: "platform-infra-sub2api-codex-pool-sentinel-probe", + error: "account-not-in-yaml", + missing, + configuredAccounts, + valuesPrinted: false, + }; + } + if (!options.confirm) { + return { + ok: true, + action: "platform-infra-sub2api-codex-pool-sentinel-probe", + mode: "dry-run", + target: poolTarget(pool), + accounts: options.accounts, + effect: "Would create one Kubernetes Job from the managed sentinel CronJob and force an immediate marker probe for the requested account(s).", + next: { + confirm: `bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-probe --account ${options.accounts.join(",")} --confirm`, + }, + valuesPrinted: false, + }; + } + const payload = { + accounts: options.accounts, + }; + const result = await runRemoteCodexPoolScript(config, "sentinel-probe", sentinelProbeScript(payload, pool)); + const parsed = parseJsonOutput(result.stdout); + if (options.raw) { + return { + ok: result.exitCode === 0 && boolField(parsed, "ok", false), + action: "platform-infra-sub2api-codex-pool-sentinel-probe", + remote: compactCapture(result, { full: true }), + parsed, + }; + } + return { + ok: result.exitCode === 0 && boolField(parsed, "ok", false), + action: "platform-infra-sub2api-codex-pool-sentinel-probe", + summary: options.full ? parsed : compactSentinelProbeResult(parsed), + remote: compactCapture(result, { full: options.full || result.exitCode !== 0 }), + }; +} + async function codexPoolCleanupProbes(config: UniDeskConfig, options: ConfirmOptions): Promise> { if (!options.confirm) { return { @@ -749,12 +1009,6 @@ export function defaultCodexTempUnschedulablePolicy(): CodexTempUnschedulablePol return { enabled: true, rules: [ - { - statusCode: 200, - keywords: ["less than 10% of your weekly limit left"], - durationMinutes: 120, - description: "Success-body account-state prompts require Sub2API 2xx body reclassification before they can cool accounts.", - }, { statusCode: 400, keywords: ["invalid_encrypted_content", "encrypted content", "could not be verified", "could not be decrypted", "bad_response_status_code", "model_not_found", "no available channel for model", "unsupported", "not supported", "not support", "暂不支持", "可用模型"], @@ -1519,6 +1773,200 @@ function compactSentinelStatus(block: unknown): unknown { }; } +function compactSentinelProbeResult(parsed: Record | null): Record | null { + if (parsed === null) return null; + const probe = isRecord(parsed.probe) ? parsed.probe : {}; + const summary = isRecord(probe.summary) ? probe.summary : {}; + const state = isRecord(parsed.sentinelState) ? parsed.sentinelState : {}; + return { + ok: parsed.ok, + mode: parsed.mode, + namespace: parsed.namespace, + job: parsed.job, + requestedAccounts: parsed.requestedAccounts, + summary: { + at: summary.at, + monitorEnabled: summary.monitorEnabled, + actionsEnabled: summary.actionsEnabled, + selected: summary.selected, + okCount: summary.okCount, + mismatchCount: summary.mismatchCount, + markerMismatchCount: summary.markerMismatchCount, + transportFailureCount: summary.transportFailureCount, + actionsTaken: summary.actionsTaken, + selection: summary.selection, + }, + results: recordArray(probe.results).map((item) => pickSummaryFields(item, [ + "accountName", + "purpose", + "ok", + "markerMatched", + "httpStatus", + "durationMs", + "usage", + "outputHash", + "outputPreview", + "responseBodyHash", + "responseBodyPreview", + "error", + "errorDetails", + "failureKind", + "sdk", + "requestShape", + ])), + actions: recordArray(probe.actions).map((item) => pickSummaryFields(item, [ + "accountName", + "taken", + "type", + "error", + ])), + sentinelState: { + quarantined: state.quarantined, + recentAccounts: state.recentAccounts, + lastRun: state.lastRun, + }, + valuesPrinted: false, + }; +} + +function renderedCliResult(ok: boolean, command: string, renderedText: string): RenderedCliResult { + return { ok, command, renderedText, contentType: "text/plain" }; +} + +function renderSentinelReport( + parsed: Record | null, + context: { events: number; full: boolean; remote: Record }, +): string { + if (parsed === null) { + return [ + "SUB2API SENTINEL REPORT unavailable", + `remote_exit=${context.remote.exitCode ?? "?"} stdout_bytes=${context.remote.stdoutBytes ?? "?"} stderr_bytes=${context.remote.stderrBytes ?? "?"}`, + stringValue(context.remote.stderrTail) ?? stringValue(context.remote.stdoutTail) ?? "", + ].filter(Boolean).join("\n"); + } + const metadata = isRecord(parsed.metadata) ? parsed.metadata : {}; + const cronJob = isRecord(parsed.cronJob) ? parsed.cronJob : {}; + const summary = isRecord(parsed.summary) ? parsed.summary : {}; + const accounts = recordArray(parsed.accounts); + const runs = recordArray(parsed.runs); + const globalLedger = isRecord(parsed.globalLedger) ? parsed.globalLedger : {}; + const lines: string[] = []; + lines.push([ + "SUB2API SENTINEL", + `ok=${parsed.ok === true ? "true" : "false"}`, + `accounts=${summary.accountCount ?? accounts.length}`, + `quarantined=${summary.quarantinedCount ?? "?"}`, + `history=${summary.historyCount ?? runs.length}`, + `window=${formatWindow(summary.historyFrom, summary.historyTo)}`, + ].join(" ")); + lines.push([ + "CRON", + `schedule=${cronJob.schedule ?? "-"}`, + `last=${shortIso(cronJob.lastScheduleTime)}`, + `active=${cronJob.active ?? "-"}`, + `state=${metadata.namespace ?? namespace}/${metadata.stateConfigMapName ?? "-"}`, + `ledger=req:${globalLedger.requestCount ?? 0} tok:${formatNumber(globalLedger.totalTokens)} cost:$${formatCost(globalLedger.estimatedCostUsd)}`, + ].join(" ")); + lines.push(""); + lines.push("ACCOUNTS"); + lines.push(renderTable([ + ["ACCOUNT", "STATE", "Q", "F_MIN", "S_MIN", "PROBES", "LAST", "HTTP", "M", "KIND", "ACTION", "NEXT", "OBS_MIN"], + ...accounts.map((account) => [ + stringValue(account.account) ?? "-", + stringValue(account.status) ?? "-", + account.quarantineActive === true ? "Y" : "-", + textValue(account.freezeIntervalMin), + textValue(account.successIntervalMin), + textValue(account.probeCount), + shortIso(account.lastProbeAt), + textValue(account.lastHttp), + account.lastMarker === true ? "Y" : account.lastMarker === false ? "N" : "-", + shorten(stringValue(account.lastFailureKind) ?? "-", 20), + shorten(stringValue(account.lastAction) ?? "-", 16), + shortIso(account.nextProbeAfter), + textValue(account.observedLastToNextMin), + ]), + ])); + if (runs.length > 0 || context.full) { + lines.push(""); + lines.push(`RUNS last=${Math.min(context.events, runs.length)}`); + lines.push(renderTable([ + ["AT", "SEL", "DUE", "OK", "BAD", "TF", "ACT", "REASSERT"], + ...runs.slice(-context.events).map((run) => [ + shortIso(run.at), + textValue(run.selected), + textValue(run.due), + textValue(run.ok), + textValue(run.mismatch), + textValue(run.transportFailures), + textValue(run.actionsTaken), + textValue(run.reasserts), + ]), + ])); + } + lines.push(""); + lines.push("LEGEND Q=quarantined M=marker matched F_MIN=freeze interval S_MIN=success interval OBS_MIN=last probe to next probe minutes TF=transport failures"); + lines.push("Raw: bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-report --raw"); + return lines.join("\n"); +} + +function renderTable(rows: string[][]): string { + if (rows.length === 0) return ""; + const widths: number[] = []; + for (const row of rows) { + row.forEach((cell, index) => { + widths[index] = Math.max(widths[index] ?? 0, displayWidth(cell)); + }); + } + return rows.map((row) => row.map((cell, index) => padRight(cell, widths[index] ?? 0)).join(" ").trimEnd()).join("\n"); +} + +function padRight(value: string, width: number): string { + const pad = width - displayWidth(value); + return pad <= 0 ? value : `${value}${" ".repeat(pad)}`; +} + +function displayWidth(value: string): number { + return [...value].reduce((width, char) => width + (char.charCodeAt(0) > 0x7f ? 2 : 1), 0); +} + +function formatWindow(from: unknown, to: unknown): string { + const left = shortIso(from); + const right = shortIso(to); + return left === "-" && right === "-" ? "-" : `${left}..${right}`; +} + +function shortIso(value: unknown): string { + const text = stringValue(value); + if (text === null) return "-"; + return text.replace(/^\d{4}-/u, "").replace(/:00Z$/u, "Z").replace("T", " "); +} + +function textValue(value: unknown): string { + if (value === null || value === undefined || value === "") return "-"; + if (typeof value === "number") return Number.isInteger(value) ? String(value) : String(Math.round(value * 10) / 10); + if (typeof value === "boolean") return value ? "true" : "false"; + return String(value); +} + +function shorten(value: string, maxChars: number): string { + return value.length <= maxChars ? value : `${value.slice(0, Math.max(0, maxChars - 1))}…`; +} + +function formatNumber(value: unknown): string { + const num = numberValue(value); + if (num === null) return "0"; + if (Math.abs(num) >= 1_000_000) return `${(num / 1_000_000).toFixed(1)}M`; + if (Math.abs(num) >= 1_000) return `${(num / 1_000).toFixed(1)}K`; + return String(Math.round(num)); +} + +function formatCost(value: unknown): string { + const num = numberValue(value); + if (num === null) return "0.0000"; + return num.toFixed(4); +} + function codexPoolValidationSummary(parsed: Record | null): Record | null { if (parsed === null) return null; const validation = isRecord(parsed.validation) ? parsed.validation : {}; @@ -2289,10 +2737,321 @@ function validateScript(pool: CodexPoolConfig): string { return remotePythonScript("validate", "", pool); } +function sentinelProbeScript(payload: unknown, pool: CodexPoolConfig): string { + const encoded = Buffer.from(JSON.stringify(payload), "utf8").toString("base64"); + return remotePythonScript("sentinel-probe", encoded, pool); +} + +function sentinelReportScript(pool: CodexPoolConfig, events: number): string { + const stateName = pool.sentinel.stateConfigMapName; + const cronJobName = pool.sentinel.cronJobName; + return ` +set -eu +python3 - <<'PY' +import json +import subprocess +from datetime import datetime, timezone + +NAMESPACE = ${JSON.stringify(namespace)} +STATE_NAME = ${JSON.stringify(stateName)} +CRONJOB_NAME = ${JSON.stringify(cronJobName)} +EVENT_LIMIT = ${JSON.stringify(events)} + +def run(cmd): + return subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + +def text(data, limit=2000): + if isinstance(data, bytes): + data = data.decode("utf-8", errors="replace") + return data[-limit:] + +def kube_json(args): + proc = run(["kubectl", *args, "-o", "json"]) + if proc.returncode != 0: + return None, text(proc.stderr) + try: + return json.loads(proc.stdout.decode("utf-8")), None + except Exception as exc: + return None, str(exc) + +def parse_iso(value): + if not isinstance(value, str) or not value: + return None + try: + return datetime.fromisoformat(value.replace("Z", "+00:00")) + except Exception: + return None + +def minutes_between(a, b): + left = parse_iso(a) + right = parse_iso(b) + if left is None or right is None: + return None + return round((right - left).total_seconds() / 60, 1) + +def day_ledgers(state): + ledger = {} + raw = state.get("ledger") + if isinstance(raw, dict): + for item in raw.values(): + if not isinstance(item, dict): + continue + add_ledger(ledger, item) + return ledger + +def add_ledger(target, source): + target["inputTokens"] = target.get("inputTokens", 0) + int(source.get("inputTokens") or 0) + target["outputTokens"] = target.get("outputTokens", 0) + int(source.get("outputTokens") or 0) + target["totalTokens"] = target.get("totalTokens", 0) + int(source.get("totalTokens") or 0) + target["estimatedCostUsd"] = target.get("estimatedCostUsd", 0.0) + float(source.get("estimatedCostUsd") or 0) + target["requestCount"] = target.get("requestCount", 0) + int(source.get("requestCount") or 0) + +def account_ledger(account_state): + total = {} + raw = account_state.get("daily") + if isinstance(raw, dict): + for item in raw.values(): + if isinstance(item, dict): + add_ledger(total, item) + return total + +def action_type(probe): + action = probe.get("action") if isinstance(probe, dict) else None + if isinstance(action, dict): + value = action.get("type") + if value: + return value + return "taken" if action.get("taken") is True else "" + return "" + +def error_code(probe): + details = probe.get("errorDetails") if isinstance(probe, dict) else None + if not isinstance(details, dict): + return "" + openai_error = details.get("openaiError") + if isinstance(openai_error, dict): + return openai_error.get("code") or openai_error.get("type") or "" + return details.get("kind") or "" + +def report(): + cronjob, cron_error = kube_json(["-n", NAMESPACE, "get", "cronjob", CRONJOB_NAME]) + state_cm, state_error = kube_json(["-n", NAMESPACE, "get", "configmap", STATE_NAME]) + state = {} + parse_error = None + if isinstance(state_cm, dict): + raw_state = (state_cm.get("data") or {}).get("state.json") + if isinstance(raw_state, str) and raw_state.strip(): + try: + state = json.loads(raw_state) + except Exception as exc: + parse_error = str(exc) + accounts = state.get("accounts") if isinstance(state.get("accounts"), dict) else {} + history = state.get("history") if isinstance(state.get("history"), list) else [] + account_rows = [] + for name, account_state in sorted(accounts.items()): + if not isinstance(account_state, dict): + continue + probe = account_state.get("lastProbe") if isinstance(account_state.get("lastProbe"), dict) else {} + quarantine = account_state.get("quarantine") if isinstance(account_state.get("quarantine"), dict) else {} + ledger = account_ledger(account_state) + account_rows.append({ + "account": name, + "status": account_state.get("lastStatus"), + "quarantineActive": quarantine.get("active") is True, + "quarantineApplied": quarantine.get("applied") if isinstance(quarantine, dict) else None, + "freezeIntervalMin": quarantine.get("intervalMinutes") if isinstance(quarantine, dict) else None, + "freezeUntil": quarantine.get("until") if isinstance(quarantine, dict) else None, + "successStreak": account_state.get("successStreak") or 0, + "successIntervalMin": account_state.get("successIntervalMinutes") or 0, + "probeCount": ledger.get("requestCount", 0), + "inputTokens": ledger.get("inputTokens", 0), + "outputTokens": ledger.get("outputTokens", 0), + "totalTokens": ledger.get("totalTokens", 0), + "estimatedCostUsd": round(float(ledger.get("estimatedCostUsd", 0)), 6), + "lastProbeAt": account_state.get("lastProbeAt"), + "lastPurpose": probe.get("purpose"), + "lastHttp": probe.get("httpStatus"), + "lastMarker": probe.get("markerMatched"), + "lastFailureKind": probe.get("failureKind"), + "lastErrorCode": error_code(probe), + "lastAction": action_type(probe), + "nextProbeAfter": account_state.get("nextProbeAfter"), + "observedLastToNextMin": minutes_between(account_state.get("lastProbeAt"), account_state.get("nextProbeAfter")), + "requestShape": probe.get("requestShape"), + }) + run_rows = [] + for item in history: + if not isinstance(item, dict): + continue + selection = item.get("selection") if isinstance(item.get("selection"), dict) else {} + run_rows.append({ + "at": item.get("at"), + "selected": item.get("selected"), + "due": selection.get("due"), + "ok": item.get("okCount"), + "mismatch": item.get("mismatchCount") if item.get("mismatchCount") is not None else item.get("markerMismatchCount"), + "transportFailures": item.get("transportFailureCount"), + "actionsTaken": item.get("actionsTaken"), + "reasserts": len(item.get("reconcile") or []), + }) + quarantined = [item for item in account_rows if item.get("quarantineActive") is True] + cron_spec = cronjob.get("spec") if isinstance(cronjob, dict) else {} + cron_status = cronjob.get("status") if isinstance(cronjob, dict) else {} + global_ledger = day_ledgers(state) + result = { + "ok": state_error is None and parse_error is None, + "metadata": { + "namespace": NAMESPACE, + "stateConfigMapName": STATE_NAME, + "cronJobName": CRONJOB_NAME, + "generatedAt": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + "valuesPrinted": False, + }, + "cronJob": { + "exists": isinstance(cronjob, dict), + "schedule": cron_spec.get("schedule") if isinstance(cron_spec, dict) else None, + "suspend": cron_spec.get("suspend") if isinstance(cron_spec, dict) else None, + "lastScheduleTime": cron_status.get("lastScheduleTime") if isinstance(cron_status, dict) else None, + "active": len(cron_status.get("active") or []) if isinstance(cron_status, dict) else None, + "error": cron_error, + }, + "summary": { + "accountCount": len(account_rows), + "quarantinedCount": len(quarantined), + "historyCount": len(history), + "historyFrom": run_rows[0].get("at") if run_rows else None, + "historyTo": run_rows[-1].get("at") if run_rows else None, + "lastRun": state.get("lastRun"), + }, + "globalLedger": global_ledger, + "accounts": account_rows, + "runs": run_rows[-EVENT_LIMIT:], + "errors": { + "state": state_error, + "parse": parse_error, + }, + "valuesPrinted": False, + } + return result + +payload = report() +print(json.dumps(payload, ensure_ascii=False, indent=2)) +raise SystemExit(0 if payload.get("ok") else 1) +PY +`; +} + function cleanupProbesScript(pool: CodexPoolConfig): string { return remotePythonScript("cleanup-probes", "", pool); } +function sentinelImageStatusScript(pool: CodexPoolConfig): string { + const target = codexPoolSentinelRuntimeImage(pool.sentinel); + return remoteSentinelImageScript("status", target, pool.sentinel, null); +} + +function sentinelImageBuildScript(pool: CodexPoolConfig): string { + const target = codexPoolSentinelRuntimeImage(pool.sentinel); + const dockerfile = readFileSync(sentinelImageDockerfilePath, "utf8"); + return remoteSentinelImageScript("build", target, pool.sentinel, dockerfile); +} + +function remoteSentinelImageScript(mode: "status" | "build", target: ReturnType, sentinel: CodexPoolSentinelConfig, dockerfile: string | null): string { + const dockerfileB64 = dockerfile === null ? "" : Buffer.from(dockerfile, "utf8").toString("base64"); + return ` +set -eu +mode=${shQuote(mode)} +image=${shQuote(target.runtimeImage)} +repo=${shQuote("platform-infra/sub2api-account-sentinel")} +tag=${shQuote(target.tag)} +base_image=${shQuote(target.baseImage)} +openai_version=${shQuote(sentinel.sdk.openaiPythonVersion)} +work=/tmp/unidesk-sub2api-sentinel-image +mkdir -p "$work" +dockerfile_path="$work/sentinel.Dockerfile" +registry_has_tag=false +if curl -fsS --max-time 10 "http://127.0.0.1:5000/v2/$repo/tags/list" 2>/dev/null | grep -F '"'"$tag"'"' >/dev/null 2>&1; then + registry_has_tag=true +fi +local_id="$(docker image inspect "$image" --format '{{.Id}}' 2>/dev/null || true)" +if [ "$mode" = "status" ]; then + if [ -n "$local_id" ]; then + python_version="$(docker run --rm "$image" python3 --version 2>/dev/null || true)" + openai_runtime_version="$(docker run --rm "$image" python3 -c 'import importlib.metadata; print(importlib.metadata.version("openai"))' 2>/dev/null || true)" + else + python_version= + openai_runtime_version= + fi + python3 - </dev/null 2>&1 || true + local_id="$(docker image inspect "$image" --format '{{.Id}}' 2>/dev/null || true)" + fi + python3 - < "$dockerfile_path" <<'UNIDESK_SENTINEL_DOCKERFILE_B64' +${dockerfileB64} +UNIDESK_SENTINEL_DOCKERFILE_B64 +export NO_PROXY=localhost,127.0.0.1,::1,host.docker.internal,74.48.78.17,192.168.0.0/16,10.0.0.0/8,172.16.0.0/12,10.42.0.0/16,10.43.0.0/16,.svc,.svc.cluster.local,.cluster.local,kubernetes,kubernetes.default,kubernetes.default.svc,127.0.0.1:5000,localhost:5000 +export no_proxy=$NO_PROXY +docker build --pull \\ + --build-arg BASE_IMAGE="$base_image" \\ + --build-arg OPENAI_PYTHON_VERSION="$openai_version" \\ + --build-arg HTTP_PROXY= --build-arg HTTPS_PROXY= --build-arg http_proxy= --build-arg https_proxy= \\ + --build-arg NO_PROXY --build-arg no_proxy \\ + -f "$dockerfile_path" \\ + -t "$image" \\ + "$work" +docker run --rm "$image" python3 -c 'import importlib.metadata, sys; expected=sys.argv[1]; actual=importlib.metadata.version("openai"); assert actual == expected, (actual, expected); print("openai", actual)' "$openai_version" +docker push "$image" +digest="$(docker image inspect "$image" --format '{{index .RepoDigests 0}}' 2>/dev/null || true)" +python3 - < total + value, 0); } @@ -2357,7 +3116,7 @@ function desiredAccountTempUnschedulableMap(pool: CodexPoolConfig): Record { +type RemoteCodexPoolMode = "sync" | "validate" | "sentinel-probe" | "sentinel-image-status" | "sentinel-image-build"; + +async function runRemoteCodexPoolScript(config: UniDeskConfig, mode: RemoteCodexPoolMode, script: string): Promise { const jobName = `codex-pool-${mode}-${Date.now().toString(36)}`.slice(0, 63); const startedAtMs = Date.now(); const start = await capture(config, g14K3sRoute, ["script"], remoteJobStartScript(jobName, script)); @@ -4323,11 +5274,17 @@ async function runRemoteCodexPoolScript(config: UniDeskConfig, mode: "sync" | "v stderr: [ latest?.stderr ?? "", `remote codex-pool ${mode} job ${jobName} did not finish within ${remoteJobTimeoutMs}ms`, - `status command: bun scripts/cli.ts platform-infra sub2api codex-pool ${mode === "sync" ? "sync --confirm" : "validate"}`, + `status command: ${codexPoolModeCommand(mode)}`, ].filter(Boolean).join("\n"), }; } +function codexPoolModeCommand(mode: RemoteCodexPoolMode): string { + if (mode === "sync") return "bun scripts/cli.ts platform-infra sub2api codex-pool sync --confirm"; + if (mode === "sentinel-probe") return "bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-probe --account --confirm"; + return "bun scripts/cli.ts platform-infra sub2api codex-pool validate"; +} + interface RemoteCodexPoolJobStatus { status: "running" | "succeeded" | "failed" | "unknown"; exitCode: number | null; diff --git a/scripts/src/platform-infra.ts b/scripts/src/platform-infra.ts index 738cca6e..b39d3df5 100644 --- a/scripts/src/platform-infra.ts +++ b/scripts/src/platform-infra.ts @@ -3,6 +3,7 @@ import { readFileSync } from "node:fs"; import type { UniDeskConfig } from "./config"; import { rootPath } from "./config"; import { startJob } from "./jobs"; +import type { RenderedCliResult } from "./output"; import { runSshCommandCapture, type SshCaptureResult } from "./ssh"; const g14K3sRoute = "G14:k3s"; @@ -43,6 +44,8 @@ export function platformInfraHelp(): unknown { "bun scripts/cli.ts platform-infra sub2api codex-pool plan", "bun scripts/cli.ts platform-infra sub2api codex-pool sync --confirm", "bun scripts/cli.ts platform-infra sub2api codex-pool validate", + "bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-image status", + "bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-probe --account unidesk-codex-hy --confirm", ], description: "Operate the G14 k3s internal-only Sub2API deployment in the shared platform-infra namespace. This entry creates no Ingress, NodePort, LoadBalancer, hostPort, hostNetwork, ResourceQuota, LimitRange, or CPU/memory resource requests/limits.", target: { @@ -59,13 +62,14 @@ export function platformInfraHelp(): unknown { "bun scripts/cli.ts platform-infra sub2api codex-pool plan", "bun scripts/cli.ts platform-infra sub2api codex-pool sync --confirm", "bun scripts/cli.ts platform-infra sub2api codex-pool validate", + "bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-image status", ], module: "scripts/src/platform-infra-sub2api-codex.ts", }, }; } -export async function runPlatformInfraCommand(config: UniDeskConfig, args: string[]): Promise> { +export async function runPlatformInfraCommand(config: UniDeskConfig, args: string[]): Promise | RenderedCliResult> { const [target, action] = args; if (target !== "sub2api") return unsupported(args); if (action === "plan" || action === undefined) return plan(); diff --git a/src/components/platform-infra/sub2api/sentinel.Dockerfile b/src/components/platform-infra/sub2api/sentinel.Dockerfile new file mode 100644 index 00000000..8a80a7a1 --- /dev/null +++ b/src/components/platform-infra/sub2api/sentinel.Dockerfile @@ -0,0 +1,13 @@ +ARG BASE_IMAGE=python:3.12-alpine +FROM ${BASE_IMAGE} + +ARG OPENAI_PYTHON_VERSION=2.41.1 + +RUN python3 -m pip install --no-cache-dir "openai==${OPENAI_PYTHON_VERSION}" \ + && python3 - <<'PY' +import importlib.metadata +print("openai", importlib.metadata.version("openai")) +PY + +LABEL unidesk.ai/component="platform-infra-sub2api-account-sentinel" +LABEL unidesk.ai/runtime="python-openai-responses"