diff --git a/.agents/skills/unidesk-sub2api/SKILL.md b/.agents/skills/unidesk-sub2api/SKILL.md index db1022a8..1d2f28f8 100644 --- a/.agents/skills/unidesk-sub2api/SKILL.md +++ b/.agents/skills/unidesk-sub2api/SKILL.md @@ -81,6 +81,7 @@ bun scripts/cli.ts platform-infra sub2api codex-pool cleanup-probes --confirm - `profiles.entries`: 从 master `~/.codex/` 选择上游 profile 并映射到 Sub2API account。 - `profiles.entries[].capacity`: 可选 per-account concurrency override;不写则使用 `pool.defaultAccountCapacity`。具体数值只以 `config/platform-infra/sub2api-codex-pool.yaml` 为准,skill 和长期参考只描述规则,不重复写当前值。 - `profiles.entries[].loadFactor`: 可选 per-account Sub2API `load_factor` override;不写则使用 `pool.defaultAccountLoadFactor`。具体数值只以 `config/platform-infra/sub2api-codex-pool.yaml` 为准,修改后必须 `codex-pool sync --confirm` 和 `codex-pool validate`。 +- `profiles.entries[].trustUpstream`: 可选账号级哨兵信任标记;默认 `false`。可信账号使用 `sentinel.cadence.trustedSuccessMaxIntervalMinutes` 作为连续成功后的最大探测退避,不可信账号使用 `sentinel.cadence.untrustedSuccessMaxIntervalMinutes`。它只影响哨兵探测频率和状态可见性,不改变 Sub2API account priority/capacity/loadFactor。 - 除非用户明确要求修改配置,不要仅凭推断改账号 membership、priority、capacity、loadFactor、WebSocket mode 或其他调度策略;先保留 YAML,完成 provenance/runtime evidence 溯源,并把结论写回相关 issue 或 runbook 后再提出变更。 - `profiles.entries[].tempUnschedulable`: 可选 per-account 临时下线规则覆盖;字段语义以 `docs/reference/platform-infra.md` 为权威。上游 Sub2API 不支持的成功体分类、调度策略或账号冷却行为不要在这里声明。 - `profiles.entries[].openaiResponsesWebSocketsV2Mode`: 需要 Responses WebSocket v2 的上游才设置,值为 `off`、`ctx_pool` 或 `passthrough`。 @@ -90,7 +91,7 @@ bun scripts/cli.ts platform-infra sub2api codex-pool cleanup-probes --confirm - `sentinel.sdk.openaiPythonVersion`: 哨兵容器使用的 OpenAI Python SDK 固定版本;模型请求必须通过标准 SDK `responses.create`,不要手工拼 `/v1/responses` 请求体或手写响应解析。后续升级 SDK 只改 YAML 并 `sync --confirm`。 - `sentinel.probe.maxOutputTokens`: 哨兵本地流式 delta 收集上限,必须保持小值;它不作为上游 `max_output_tokens` 字段发送,以保持与 Sub2API WebUI 默认账号连接测试的 Responses SSE 请求形态一致。哨兵不限制并发和每轮账号数,所有到期账号会在同一轮并发探测。 - `sentinel.probe.userAgent`: 哨兵 direct upstream probe 的默认 User-Agent,通过 OpenAI SDK `extra_headers` 传递;默认贴近 Sub2API `net/http` 账号连接测试形态,个别账号仍可用 `profiles.entries[].upstreamUserAgent` 覆盖。 -- `sentinel.cadence`: 成功信任指数退避配置。当前口径是从 1 分钟开始,连续成功后退避到最大 20 分钟;任意非 marker match 清零成功信任并进入冻结退避。 +- `sentinel.cadence`: 成功信任指数退避配置。当前口径是从 1 分钟开始,连续成功后按账号 `trustUpstream` 选择可信/不可信最大退避;任意非 marker match 清零成功信任并进入冻结退避。可信/不可信最大退避数值只写 YAML。 - `sentinel.freeze`: 失败冻结 TTL 指数退避配置。当前口径是初始 1 分钟,失败后 `1m -> 2m -> 4m -> 8m -> 10m`,最大 10 分钟;失败 probe 基本不消耗有效输出 token,因此冻结窗口保持短周期。冻结到期后只做恢复 probe,通过才自动恢复,不能仅靠 TTL 到期解封。 - `sentinel.pricing`: 直打上游时哨兵自己的 token/cost 估算价格。因为 direct upstream probe 不经过 Sub2API 普通用量账本,哨兵必须自己记录全局与 per-account token/cost;这些账本只用于观察,不作为跳过探测的预算门禁。 @@ -114,7 +115,7 @@ Codex 启动时反复出现 WebSocket reconnect、HTTPS fallback、`websocket cl 1. 在 master `~/.codex/` 准备带后缀的上游 profile 文件,例如 `config.toml.` 和 `auth.json.`;禁止覆盖默认 `config.toml` / `auth.json`。 2. 在 `config/platform-infra/sub2api-codex-pool.yaml` 添加 `profiles.entries` 项,指定 `profile`、`accountName`、`configFile`、`authFile`。 -3. 如需要,给该项加 `priority`、`capacity`、`loadFactor`、`tempUnschedulable`、`openaiResponsesWebSocketsV2Mode` 或 `upstreamUserAgent`;capacity/loadFactor 的具体数值只写在 YAML。 +3. 如需要,给该项加 `priority`、`capacity`、`loadFactor`、`trustUpstream`、`tempUnschedulable`、`openaiResponsesWebSocketsV2Mode` 或 `upstreamUserAgent`;capacity/loadFactor/信任退避的具体数值只写在 YAML。 4. 如果新增账号会提高声明 capacity 总和,默认让省略的 `pool.minOwnerConcurrency` 继续按 capacity 总和自动解析;只有 YAML 已经显式写了该 override 时,才同步提高到不低于总 capacity,或删除 override 回到自动解析。 5. 跑 `codex-pool plan`,确认 profile 可读、`base_url` 和 API key 来源有效,且 stdout 未泄露完整 key。 6. 跑 `codex-pool sync --confirm`。 diff --git a/config/platform-infra/sub2api-codex-pool.yaml b/config/platform-infra/sub2api-codex-pool.yaml index d39a9d81..dabbfe86 100644 --- a/config/platform-infra/sub2api-codex-pool.yaml +++ b/config/platform-infra/sub2api-codex-pool.yaml @@ -60,6 +60,7 @@ profiles: accountName: unidesk-codex-hy configFile: config.toml.HY authFile: auth.json.HY + trustUpstream: true openaiResponsesWebSocketsV2Mode: off capacity: 10 loadFactor: 10 @@ -81,12 +82,14 @@ profiles: accountName: unidesk-codex-gptclub configFile: config.toml.gptclub authFile: auth.json.gptclub + trustUpstream: true capacity: 10 priority: 100 - profile: only accountName: unidesk-codex-only configFile: config.toml.only authFile: auth.json.only + trustUpstream: true loadFactor: 1 priority: 110 - profile: zakuzaku @@ -179,6 +182,8 @@ sentinel: cadence: successInitialIntervalMinutes: 1 successMaxIntervalMinutes: 20 + trustedSuccessMaxIntervalMinutes: 20 + untrustedSuccessMaxIntervalMinutes: 2 successBackoffMultiplier: 2 jitterPercent: 10 freeze: diff --git a/scripts/src/platform-infra-sub2api-codex-sentinel.ts b/scripts/src/platform-infra-sub2api-codex-sentinel.ts index aca069e8..fa5de950 100644 --- a/scripts/src/platform-infra-sub2api-codex-sentinel.ts +++ b/scripts/src/platform-infra-sub2api-codex-sentinel.ts @@ -32,6 +32,8 @@ export interface CodexPoolSentinelConfig { cadence: { successInitialIntervalMinutes: number; successMaxIntervalMinutes: number; + trustedSuccessMaxIntervalMinutes: number; + untrustedSuccessMaxIntervalMinutes: number; successBackoffMultiplier: number; jitterPercent: number; }; @@ -61,6 +63,7 @@ export interface CodexPoolSentinelProfileSecret { baseUrl: string; apiKey: string; upstreamUserAgent: string | null; + trustUpstream: boolean; } export interface CodexPoolSentinelManifestOptions { @@ -103,6 +106,8 @@ export function defaultCodexPoolSentinelConfig(): CodexPoolSentinelConfig { cadence: { successInitialIntervalMinutes: 1, successMaxIntervalMinutes: 20, + trustedSuccessMaxIntervalMinutes: 20, + untrustedSuccessMaxIntervalMinutes: 2, successBackoffMultiplier: 2, jitterPercent: 10, }, @@ -145,6 +150,9 @@ export function readCodexPoolSentinelConfig(value: unknown, defaults: CodexPoolS const cadence = isRecord(value.cadence) ? value.cadence : {}; const freeze = isRecord(value.freeze) ? value.freeze : {}; const pricing = isRecord(value.pricing) ? value.pricing : {}; + const legacySuccessMax = readInt(valueAt(cadence, "successMaxIntervalMinutes"), `${sourcePath}.sentinel.cadence.successMaxIntervalMinutes`, defaults.cadence.successMaxIntervalMinutes, 1, 1440); + const trustedSuccessMax = readInt(valueAt(cadence, "trustedSuccessMaxIntervalMinutes"), `${sourcePath}.sentinel.cadence.trustedSuccessMaxIntervalMinutes`, legacySuccessMax, 1, 1440); + const untrustedSuccessMax = readInt(valueAt(cadence, "untrustedSuccessMaxIntervalMinutes"), `${sourcePath}.sentinel.cadence.untrustedSuccessMaxIntervalMinutes`, legacySuccessMax, 1, 1440); const config: CodexPoolSentinelConfig = { monitor: { enabled: readBoolean(valueAt(monitor, "enabled"), `${sourcePath}.sentinel.monitor.enabled`, defaults.monitor.enabled), @@ -176,7 +184,9 @@ export function readCodexPoolSentinelConfig(value: unknown, defaults: CodexPoolS }, cadence: { successInitialIntervalMinutes: readInt(valueAt(cadence, "successInitialIntervalMinutes"), `${sourcePath}.sentinel.cadence.successInitialIntervalMinutes`, defaults.cadence.successInitialIntervalMinutes, 1, 1440), - successMaxIntervalMinutes: readInt(valueAt(cadence, "successMaxIntervalMinutes"), `${sourcePath}.sentinel.cadence.successMaxIntervalMinutes`, defaults.cadence.successMaxIntervalMinutes, 1, 1440), + successMaxIntervalMinutes: legacySuccessMax, + trustedSuccessMaxIntervalMinutes: trustedSuccessMax, + untrustedSuccessMaxIntervalMinutes: untrustedSuccessMax, successBackoffMultiplier: readInt(valueAt(cadence, "successBackoffMultiplier"), `${sourcePath}.sentinel.cadence.successBackoffMultiplier`, defaults.cadence.successBackoffMultiplier, 1, 10), jitterPercent: readInt(valueAt(cadence, "jitterPercent"), `${sourcePath}.sentinel.cadence.jitterPercent`, defaults.cadence.jitterPercent, 0, 50), }, @@ -198,6 +208,12 @@ export function readCodexPoolSentinelConfig(value: unknown, defaults: CodexPoolS if (config.cadence.successMaxIntervalMinutes < config.cadence.successInitialIntervalMinutes) { throw new Error(`${sourcePath}.sentinel.cadence.successMaxIntervalMinutes must be >= successInitialIntervalMinutes`); } + if (config.cadence.trustedSuccessMaxIntervalMinutes < config.cadence.successInitialIntervalMinutes) { + throw new Error(`${sourcePath}.sentinel.cadence.trustedSuccessMaxIntervalMinutes must be >= successInitialIntervalMinutes`); + } + if (config.cadence.untrustedSuccessMaxIntervalMinutes < config.cadence.successInitialIntervalMinutes) { + throw new Error(`${sourcePath}.sentinel.cadence.untrustedSuccessMaxIntervalMinutes must be >= successInitialIntervalMinutes`); + } if (config.freeze.maxTtlMinutes < config.freeze.initialTtlMinutes) { throw new Error(`${sourcePath}.sentinel.freeze.maxTtlMinutes must be >= initialTtlMinutes`); } @@ -1032,6 +1048,7 @@ def probe_account(profile, config, purpose): return { "accountName": profile["accountName"], "profile": profile.get("profile"), + "trustUpstream": profile.get("trustUpstream") is True, "purpose": purpose, "ok": ok, "markerMatched": marker_matched, @@ -1115,11 +1132,17 @@ def choose_forced_profiles(profiles, state, config, now, names): missing = sorted(name for name in names if name not in set(found)) return due, {"selected": len(due), "due": len(due), "limit": "forced-accounts", "budgetMode": "record-only", "ledger": ledger_for(state, now)[1], "requestedAccounts": sorted(names), "missingAccounts": missing} -def next_success_interval(account_state, config): +def success_max_interval(profile, config): + cadence = config["cadence"] + if profile.get("trustUpstream") is True: + return int(cadence.get("trustedSuccessMaxIntervalMinutes") or cadence.get("successMaxIntervalMinutes")) + return int(cadence.get("untrustedSuccessMaxIntervalMinutes") or cadence.get("successMaxIntervalMinutes")) + +def next_success_interval(account_state, config, profile): streak = int(account_state.get("successStreak") or 0) previous = int(account_state.get("successIntervalMinutes") or 0) initial = int(config["cadence"]["successInitialIntervalMinutes"]) - maximum = int(config["cadence"]["successMaxIntervalMinutes"]) + maximum = success_max_interval(profile, config) multiplier = int(config["cadence"]["successBackoffMultiplier"]) return initial if streak <= 0 or previous <= 0 else min(maximum, max(initial, previous * multiplier)) @@ -1133,7 +1156,7 @@ def next_freeze_interval(account_state, config, was_recovery): return min(maximum, max(initial, previous * multiplier)) return initial -def apply_result(result, state, config, now, admin): +def apply_result(result, state, config, now, admin, profile): name = result["accountName"] account_state = state.setdefault("accounts", {}).setdefault(name, {}) add_usage(state, account_state, now, result.get("usage") or {}) @@ -1154,9 +1177,10 @@ def apply_result(result, state, config, now, admin): account_state["qualityGate"] = {**quality_gate, "pending": False, "clearedAt": iso(now)} account_state["successStreak"] = 0 account_state["successIntervalMinutes"] = 0 - interval = next_success_interval(account_state, config) + interval = next_success_interval(account_state, config, profile) account_state["successStreak"] = int(account_state.get("successStreak") or 0) + 1 account_state["successIntervalMinutes"] = interval + account_state["successMaxIntervalMinutes"] = success_max_interval(profile, config) account_state["nextProbeAfter"] = iso(add_minutes(now, interval, int(config["cadence"]["jitterPercent"]))) account_state["lastOkAt"] = iso(now) account_state["lastStatus"] = "ok" @@ -1190,6 +1214,7 @@ def apply_result(result, state, config, now, admin): account_state["nextProbeAfter"] = iso(until) account_state["successStreak"] = 0 account_state["successIntervalMinutes"] = 0 + account_state["successMaxIntervalMinutes"] = success_max_interval(profile, config) account_state["lastStatus"] = "quarantined" else: retry = int(config["probe"]["transportRetryMinutes"]) @@ -1197,9 +1222,12 @@ def apply_result(result, state, config, now, admin): account_state["lastStatus"] = "marker-not-matched-no-freeze" account_state["lastFailureAt"] = iso(now) account_state["lastProbeAt"] = iso(now) + account_state["trustUpstream"] = profile.get("trustUpstream") is True account_state["lastProbe"] = { "ok": result.get("ok"), "purpose": result.get("purpose"), + "trustUpstream": result.get("trustUpstream"), + "successMaxIntervalMinutes": success_max_interval(profile, config), "httpStatus": result.get("httpStatus"), "durationMs": result.get("durationMs"), "markerMatched": result.get("markerMatched"), @@ -1263,11 +1291,12 @@ def main(): actions = [] if (config["monitor"]["enabled"] or forced_names) and due: with ThreadPoolExecutor(max_workers=max(1, len(due))) as executor: - futures = [executor.submit(probe_account, item["profile"], config, item["purpose"]) for item in due] + futures = {executor.submit(probe_account, item["profile"], config, item["purpose"]): item["profile"] for item in due} for future in as_completed(futures): result = future.result() results.append(result) - actions.append({"accountName": result["accountName"], **apply_result(result, state, config, now, admin)}) + profile = futures[future] + actions.append({"accountName": result["accountName"], **apply_result(result, state, config, now, admin, profile)}) history = state.setdefault("history", []) run_summary = { "at": iso(now), @@ -1293,6 +1322,7 @@ def main(): "results": [{ "accountName": item.get("accountName"), "purpose": item.get("purpose"), + "trustUpstream": item.get("trustUpstream"), "ok": item.get("ok"), "markerMatched": item.get("markerMatched"), "httpStatus": item.get("httpStatus"), diff --git a/scripts/src/platform-infra-sub2api-codex.ts b/scripts/src/platform-infra-sub2api-codex.ts index 46289e82..031d39ae 100644 --- a/scripts/src/platform-infra-sub2api-codex.ts +++ b/scripts/src/platform-infra-sub2api-codex.ts @@ -76,6 +76,7 @@ interface CodexProfile { apiKeySource: "auth-json" | "env" | null; openaiResponsesWebSocketsV2Mode: OpenAIResponsesWebSocketsV2Mode | null; upstreamUserAgent: string | null; + trustUpstream: boolean; priority: number; capacity: number; loadFactor: number; @@ -126,6 +127,7 @@ interface CodexPoolProfileConfig { fallbackAuthFile: string | null; openaiResponsesWebSocketsV2Mode: OpenAIResponsesWebSocketsV2Mode | null; upstreamUserAgent: string | null; + trustUpstream: boolean; priority: number; capacity: number | null; loadFactor: number | null; @@ -480,9 +482,11 @@ async function codexPoolSync(config: UniDeskConfig, options: SyncOptions): Promi apiKeyFingerprint: fingerprint(profile.apiKey ?? ""), upstreamUserAgent: profile.upstreamUserAgent, openaiResponsesWebSocketsV2Mode: profile.openaiResponsesWebSocketsV2Mode, + trustUpstream: profile.trustUpstream, }), openaiResponsesWebSocketsV2Mode: profile.openaiResponsesWebSocketsV2Mode, upstreamUserAgent: profile.upstreamUserAgent, + trustUpstream: profile.trustUpstream, priority: profile.priority, capacity: profile.capacity, loadFactor: profile.loadFactor, @@ -830,6 +834,7 @@ function collectCodexProfiles(): CodexProfile[] { apiKeySource: null, openaiResponsesWebSocketsV2Mode: entry.openaiResponsesWebSocketsV2Mode, upstreamUserAgent: entry.upstreamUserAgent, + trustUpstream: entry.trustUpstream, priority: entry.priority, capacity: entry.capacity ?? pool.defaultAccountCapacity, loadFactor: entry.loadFactor ?? pool.defaultAccountLoadFactor, @@ -902,6 +907,7 @@ function discoverCodexProfileConfigs( fallbackAuthFile: null, openaiResponsesWebSocketsV2Mode: null, upstreamUserAgent: null, + trustUpstream: false, priority: defaultPriority, capacity: null, loadFactor: null, @@ -1135,6 +1141,7 @@ function readProfileConfig( if (fallbackAuthFile !== null) validateCodexFileName(fallbackAuthFile, `profiles.entries[${index}].fallbackAuthFile`); const openaiResponsesWebSocketsV2Mode = readOpenAIResponsesWebSocketsV2Mode(entry.openaiResponsesWebSocketsV2Mode, `profiles.entries[${index}].openaiResponsesWebSocketsV2Mode`); const upstreamUserAgent = readUpstreamUserAgent(entry.upstreamUserAgent, `profiles.entries[${index}].upstreamUserAgent`); + const trustUpstream = readTrustUpstream(entry.trustUpstream, `profiles.entries[${index}].trustUpstream`); const priority = readAccountPriority(entry.priority, `profiles.entries[${index}].priority`, defaultPriority); const capacity = entry.capacity === undefined || entry.capacity === null ? null : readAccountCapacity(entry.capacity, `profiles.entries[${index}].capacity`); const loadFactor = entry.loadFactor === undefined || entry.loadFactor === null ? null : readAccountLoadFactor(entry.loadFactor, `profiles.entries[${index}].loadFactor`); @@ -1148,6 +1155,7 @@ function readProfileConfig( fallbackAuthFile, openaiResponsesWebSocketsV2Mode, upstreamUserAgent, + trustUpstream, priority, capacity, loadFactor, @@ -1183,6 +1191,13 @@ function readUpstreamUserAgent(value: unknown, key: string): string | null { return text; } +function readTrustUpstream(value: unknown, key: string): boolean { + if (value === undefined || value === null) return false; + const parsed = booleanValue(value); + if (parsed === null) throw new Error(`${codexPoolConfigPath}.${key} must be a boolean`); + return parsed; +} + function readAccountPriority(value: unknown, key: string, fallback = defaultAccountPriority): number { if (value === undefined || value === null) return fallback; const priority = numberValue(value); @@ -1502,6 +1517,7 @@ function redactProfile(profile: CodexProfile): Record { apiKeySource: profile.apiKeySource, openaiResponsesWebSocketsV2Mode: profile.openaiResponsesWebSocketsV2Mode, upstreamUserAgent: profile.upstreamUserAgent, + trustUpstream: profile.trustUpstream, priority: profile.priority, capacity: profile.capacity, loadFactor: profile.loadFactor, @@ -1523,6 +1539,7 @@ function compactProfile(profile: CodexProfile): Record { provider: profile.provider || null, model: profile.model, priority: profile.priority, + trustUpstream: profile.trustUpstream, capacity: profile.capacity, loadFactor: profile.loadFactor, tempUnschedulableEnabled: profile.tempUnschedulable.enabled && profile.tempUnschedulable.rules.length > 0, @@ -1996,13 +2013,15 @@ function renderSentinelReport( lines.push(""); lines.push("ACCOUNTS"); lines.push(renderTable([ - ["ACCOUNT", "STATE", "Q", "F_MIN", "S_MIN", "PROBES", "LAST", "HTTP", "M", "KIND", "ACTION", "NEXT", "OBS_MIN"], + ["ACCOUNT", "STATE", "Q", "T", "F_MIN", "S_MIN", "S_MAX", "PROBES", "LAST", "HTTP", "M", "KIND", "ACTION", "NEXT", "OBS_MIN"], ...accounts.map((account) => [ stringValue(account.account) ?? "-", stringValue(account.status) ?? "-", account.quarantineActive === true ? "Y" : "-", + account.trustUpstream === true ? "Y" : account.trustUpstream === false ? "N" : "-", textValue(account.freezeIntervalMin), textValue(account.successIntervalMin), + textValue(account.successMaxIntervalMin), textValue(account.probeCount), shortIso(account.lastProbeAt), textValue(account.lastHttp), @@ -2031,7 +2050,7 @@ function renderSentinelReport( ])); } lines.push(""); - lines.push("LEGEND Q=quarantined M=marker matched F_MIN=freeze interval S_MIN=success interval OBS_MIN=last probe to next probe minutes TF=transport failures"); + lines.push("LEGEND Q=quarantined T=trusted upstream M=marker matched F_MIN=freeze interval S_MIN=success interval S_MAX=success max interval OBS_MIN=last probe to next probe minutes TF=transport failures"); lines.push("Raw: bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-report --raw"); return lines.join("\n"); } @@ -2209,6 +2228,7 @@ function sentinelProfileSecrets(profiles: CodexProfile[]): CodexPoolSentinelProf baseUrl: profile.baseUrl, apiKey: profile.apiKey ?? "", upstreamUserAgent: profile.upstreamUserAgent, + trustUpstream: profile.trustUpstream, })); } @@ -2883,6 +2903,7 @@ export function codexPoolSentinelProbeConfigFingerprint(input: { apiKeyFingerprint: string | null; upstreamUserAgent: string | null; openaiResponsesWebSocketsV2Mode: string | null; + trustUpstream: boolean; }): string { return fingerprint(JSON.stringify({ accountName: input.accountName, @@ -2891,6 +2912,7 @@ export function codexPoolSentinelProbeConfigFingerprint(input: { apiKeyFingerprint: input.apiKeyFingerprint, upstreamUserAgent: input.upstreamUserAgent, openaiResponsesWebSocketsV2Mode: input.openaiResponsesWebSocketsV2Mode, + trustUpstream: input.trustUpstream, })); } @@ -3027,8 +3049,10 @@ def report(): "quarantineApplied": quarantine.get("applied") if isinstance(quarantine, dict) else None, "freezeIntervalMin": quarantine.get("intervalMinutes") if isinstance(quarantine, dict) else None, "freezeUntil": quarantine.get("until") if isinstance(quarantine, dict) else None, + "trustUpstream": account_state.get("trustUpstream") if account_state.get("trustUpstream") is not None else probe.get("trustUpstream"), "successStreak": account_state.get("successStreak") or 0, "successIntervalMin": account_state.get("successIntervalMinutes") or 0, + "successMaxIntervalMin": account_state.get("successMaxIntervalMinutes") or probe.get("successMaxIntervalMinutes"), "probeCount": ledger.get("requestCount", 0), "inputTokens": ledger.get("inputTokens", 0), "outputTokens": ledger.get("outputTokens", 0), @@ -3445,6 +3469,8 @@ def sentinel_probe_change_reasons(current, profile): runtime_user_agent = empty_to_none(credentials.get("user_agent")) expected_ws_mode = empty_to_none(profile.get("openaiResponsesWebSocketsV2Mode")) runtime_ws_mode = empty_to_none(extra.get("openai_apikey_responses_websockets_v2_mode")) + expected_trust_upstream = profile.get("trustUpstream") is True + runtime_trust_upstream = extra.get("unidesk_trust_upstream") is True reasons = [] if empty_to_none(extra.get("unidesk_codex_profile")) != profile.get("profile"): reasons.append("profile") @@ -3456,6 +3482,8 @@ def sentinel_probe_change_reasons(current, profile): reasons.append("upstream-user-agent") if runtime_ws_mode != expected_ws_mode: reasons.append("responses-websockets-v2-mode") + if runtime_trust_upstream != expected_trust_upstream: + reasons.append("trust-upstream") return reasons def curl_api(method, path, bearer=None, payload=None): @@ -3669,6 +3697,7 @@ def account_payload(profile, group_id): "openai_responses_mode": "force_responses", "unidesk_codex_profile": profile["profile"], "unidesk_managed": True, + "unidesk_trust_upstream": profile.get("trustUpstream") is True, } ws_mode = profile.get("openaiResponsesWebSocketsV2Mode") if ws_mode: @@ -3715,6 +3744,10 @@ def planned_sentinel_account_results(profiles, existing_accounts): results.append({ "profile": profile["profile"], "accountName": profile["accountName"], + "profileConfig": { + "accountName": profile["accountName"], + "trustUpstream": profile.get("trustUpstream") is True, + }, "sentinelProbeConfigFingerprint": profile.get("sentinelProbeConfigFingerprint"), "sentinelProbeRequired": quality_gate_required, "sentinelChangeReasons": change_reasons if quality_gate_required else [], @@ -3754,6 +3787,10 @@ def ensure_accounts(token, profiles, group_id, prune_removed=False, protected_fr results.append({ "profile": profile["profile"], "accountName": profile["accountName"], + "profileConfig": { + "accountName": profile["accountName"], + "trustUpstream": profile.get("trustUpstream") is True, + }, "accountId": data.get("id") if isinstance(data, dict) else None, "action": action, "baseUrl": profile["baseUrl"], @@ -3765,6 +3802,7 @@ def ensure_accounts(token, profiles, group_id, prune_removed=False, protected_fr "sentinelDefaultFrozen": quality_gate_required, "sentinelFreezeProtected": keep_frozen, "openaiResponsesWebSocketsV2Mode": profile.get("openaiResponsesWebSocketsV2Mode"), + "trustUpstream": profile.get("trustUpstream") is True, "priority": int(profile.get("priority", POOL_DEFAULT_ACCOUNT_PRIORITY) or POOL_DEFAULT_ACCOUNT_PRIORITY), "capacity": int(profile.get("capacity", 5) or 5), "loadFactor": int(profile.get("loadFactor", POOL_DEFAULT_ACCOUNT_LOAD_FACTOR) or POOL_DEFAULT_ACCOUNT_LOAD_FACTOR), @@ -4061,6 +4099,68 @@ def clamp_sentinel_freezes_for_config(state, now): }) return items +def parse_iso_epoch(value): + if not isinstance(value, str) or not value: + return None + try: + return datetime.fromisoformat(value.replace("Z", "+00:00")).timestamp() + except Exception: + return None + +def profile_success_max_interval(profile): + cadence = SENTINEL_CONFIG.get("cadence") if isinstance(SENTINEL_CONFIG.get("cadence"), dict) else {} + legacy = cadence.get("successMaxIntervalMinutes") + if legacy is None: + legacy = cadence.get("trustedSuccessMaxIntervalMinutes") or cadence.get("untrustedSuccessMaxIntervalMinutes") or 1 + if profile.get("trustUpstream") is True: + value = cadence.get("trustedSuccessMaxIntervalMinutes") or legacy + else: + value = cadence.get("untrustedSuccessMaxIntervalMinutes") or legacy + try: + return int(value) + except Exception: + return int(legacy) + +def clamp_sentinel_success_cadence_for_config(state, profiles, now): + accounts_state = state.get("accounts") if isinstance(state.get("accounts"), dict) else {} + profile_map = {item.get("accountName"): item for item in profiles if isinstance(item, dict) and isinstance(item.get("accountName"), str)} + now_epoch = time.time() + items = [] + for name, profile in profile_map.items(): + account_state = accounts_state.get(name) + if not isinstance(account_state, dict): + continue + quarantine = account_state.get("quarantine") + if isinstance(quarantine, dict) and quarantine.get("active") is True: + account_state["trustUpstream"] = profile.get("trustUpstream") is True + account_state["successMaxIntervalMinutes"] = profile_success_max_interval(profile) + continue + try: + interval = int(account_state.get("successIntervalMinutes") or 0) + except Exception: + interval = 0 + next_epoch = parse_iso_epoch(account_state.get("nextProbeAfter")) + max_interval = profile_success_max_interval(profile) + account_state["trustUpstream"] = profile.get("trustUpstream") is True + account_state["successMaxIntervalMinutes"] = max_interval + if interval <= max_interval and (next_epoch is None or next_epoch <= now_epoch + max_interval * 60): + continue + old_next = account_state.get("nextProbeAfter") + account_state["previousSuccessIntervalMinutes"] = interval + account_state["successIntervalMinutes"] = min(interval, max_interval) if interval > 0 else interval + account_state["nextProbeAfter"] = now + account_state["cadenceClampedAt"] = now + account_state["cadenceClampedBy"] = "sync-success-max-interval" + items.append({ + "accountName": name, + "trustUpstream": profile.get("trustUpstream") is True, + "previousSuccessIntervalMinutes": interval, + "maxIntervalMinutes": max_interval, + "previousNextProbeAfter": old_next, + "nextProbeAfter": now, + }) + return items + def update_sentinel_state_configmap(obj, state): state_name = SENTINEL_CONFIG.get("stateConfigMapName") if not state_name: @@ -4101,6 +4201,7 @@ def ensure_sentinel_state_for_sync(account_results, pending_only=False): pending_until = utc_iso(3600) items = [] clamped_items = [] if pending_only else clamp_sentinel_freezes_for_config(state, now) + cadence_clamped_items = [] if pending_only else clamp_sentinel_success_cadence_for_config(state, [item.get("profileConfig") for item in account_results if isinstance(item.get("profileConfig"), dict)], now) changed_count = 0 fingerprint_only_count = 0 for item in account_results: @@ -4133,6 +4234,9 @@ def ensure_sentinel_state_for_sync(account_results, pending_only=False): account_state["nextProbeAfter"] = pending_until if pending_only else now account_state["successStreak"] = 0 account_state["successIntervalMinutes"] = 0 + profile_config = item.get("profileConfig") if isinstance(item.get("profileConfig"), dict) else {} + account_state["trustUpstream"] = profile_config.get("trustUpstream") is True + account_state["successMaxIntervalMinutes"] = profile_success_max_interval(profile_config) account_state["lastStatus"] = "pending-sentinel-quality-gate" account_state["qualityGate"] = { "pending": True, @@ -4142,15 +4246,17 @@ def ensure_sentinel_state_for_sync(account_results, pending_only=False): "pendingOnly": pending_only, } items.append({"accountName": name, "changeReasons": reasons, "nextProbeAfter": pending_until if pending_only else now, "defaultFrozen": True, "pendingOnly": pending_only}) - if changed_count <= 0 and len(clamped_items) <= 0: - return {"ok": True, "skipped": False, "reason": "no-new-or-changed-accounts", "changedCount": 0, "fingerprintOnlyCount": fingerprint_only_count, "clampedCount": 0, "items": [], "valuesPrinted": False} + if changed_count <= 0 and len(clamped_items) <= 0 and len(cadence_clamped_items) <= 0: + return {"ok": True, "skipped": False, "reason": "no-new-or-changed-accounts", "changedCount": 0, "fingerprintOnlyCount": fingerprint_only_count, "clampedCount": 0, "cadenceClampedCount": 0, "items": [], "valuesPrinted": False} update = update_sentinel_state_configmap(state_obj, state) if pending_only and changed_count > 0: reason = "new-or-changed-accounts-pending-quality-gate-prepared" - elif changed_count > 0 and len(clamped_items) > 0: - reason = "new-or-changed-accounts-default-frozen-and-freeze-backoff-clamped" + elif changed_count > 0 and (len(clamped_items) > 0 or len(cadence_clamped_items) > 0): + reason = "new-or-changed-accounts-default-frozen-and-sentinel-cadence-clamped" elif changed_count > 0: reason = "new-or-changed-accounts-default-frozen" + elif len(cadence_clamped_items) > 0: + reason = "success-cadence-clamped-to-current-config" else: reason = "freeze-backoff-clamped-to-current-config" return { @@ -4160,9 +4266,11 @@ def ensure_sentinel_state_for_sync(account_results, pending_only=False): "changedCount": changed_count, "fingerprintOnlyCount": fingerprint_only_count, "clampedCount": len(clamped_items), + "cadenceClampedCount": len(cadence_clamped_items), "pendingOnly": pending_only, "items": items, "clampedItems": clamped_items, + "cadenceClampedItems": cadence_clamped_items, "update": update, "valuesPrinted": False, }