feat: add sub2api sentinel trust cadence

This commit is contained in:
Codex
2026-06-11 13:55:25 +00:00
parent ea92eed148
commit d475cf2a9e
4 changed files with 159 additions and 15 deletions
+3 -2
View File
@@ -81,6 +81,7 @@ bun scripts/cli.ts platform-infra sub2api codex-pool cleanup-probes --confirm
- `profiles.entries`: 从 master `~/.codex/` 选择上游 profile 并映射到 Sub2API account。
- `profiles.entries[].capacity`: 可选 per-account concurrency override;不写则使用 `pool.defaultAccountCapacity`。具体数值只以 `config/platform-infra/sub2api-codex-pool.yaml` 为准,skill 和长期参考只描述规则,不重复写当前值。
- `profiles.entries[].loadFactor`: 可选 per-account Sub2API `load_factor` override;不写则使用 `pool.defaultAccountLoadFactor`。具体数值只以 `config/platform-infra/sub2api-codex-pool.yaml` 为准,修改后必须 `codex-pool sync --confirm``codex-pool validate`
- `profiles.entries[].trustUpstream`: 可选账号级哨兵信任标记;默认 `false`。可信账号使用 `sentinel.cadence.trustedSuccessMaxIntervalMinutes` 作为连续成功后的最大探测退避,不可信账号使用 `sentinel.cadence.untrustedSuccessMaxIntervalMinutes`。它只影响哨兵探测频率和状态可见性,不改变 Sub2API account priority/capacity/loadFactor。
- 除非用户明确要求修改配置,不要仅凭推断改账号 membership、priority、capacity、loadFactor、WebSocket mode 或其他调度策略;先保留 YAML,完成 provenance/runtime evidence 溯源,并把结论写回相关 issue 或 runbook 后再提出变更。
- `profiles.entries[].tempUnschedulable`: 可选 per-account 临时下线规则覆盖;字段语义以 `docs/reference/platform-infra.md` 为权威。上游 Sub2API 不支持的成功体分类、调度策略或账号冷却行为不要在这里声明。
- `profiles.entries[].openaiResponsesWebSocketsV2Mode`: 需要 Responses WebSocket v2 的上游才设置,值为 `off``ctx_pool``passthrough`
@@ -90,7 +91,7 @@ bun scripts/cli.ts platform-infra sub2api codex-pool cleanup-probes --confirm
- `sentinel.sdk.openaiPythonVersion`: 哨兵容器使用的 OpenAI Python SDK 固定版本;模型请求必须通过标准 SDK `responses.create`,不要手工拼 `/v1/responses` 请求体或手写响应解析。后续升级 SDK 只改 YAML 并 `sync --confirm`
- `sentinel.probe.maxOutputTokens`: 哨兵本地流式 delta 收集上限,必须保持小值;它不作为上游 `max_output_tokens` 字段发送,以保持与 Sub2API WebUI 默认账号连接测试的 Responses SSE 请求形态一致。哨兵不限制并发和每轮账号数,所有到期账号会在同一轮并发探测。
- `sentinel.probe.userAgent`: 哨兵 direct upstream probe 的默认 User-Agent,通过 OpenAI SDK `extra_headers` 传递;默认贴近 Sub2API `net/http` 账号连接测试形态,个别账号仍可用 `profiles.entries[].upstreamUserAgent` 覆盖。
- `sentinel.cadence`: 成功信任指数退避配置。当前口径是从 1 分钟开始,连续成功后退避到最大 20 分钟;任意非 marker match 清零成功信任并进入冻结退避。
- `sentinel.cadence`: 成功信任指数退避配置。当前口径是从 1 分钟开始,连续成功后按账号 `trustUpstream` 选择可信/不可信最大退避;任意非 marker match 清零成功信任并进入冻结退避。可信/不可信最大退避数值只写 YAML。
- `sentinel.freeze`: 失败冻结 TTL 指数退避配置。当前口径是初始 1 分钟,失败后 `1m -> 2m -> 4m -> 8m -> 10m`,最大 10 分钟;失败 probe 基本不消耗有效输出 token,因此冻结窗口保持短周期。冻结到期后只做恢复 probe,通过才自动恢复,不能仅靠 TTL 到期解封。
- `sentinel.pricing`: 直打上游时哨兵自己的 token/cost 估算价格。因为 direct upstream probe 不经过 Sub2API 普通用量账本,哨兵必须自己记录全局与 per-account token/cost;这些账本只用于观察,不作为跳过探测的预算门禁。
@@ -114,7 +115,7 @@ Codex 启动时反复出现 WebSocket reconnect、HTTPS fallback、`websocket cl
1. 在 master `~/.codex/` 准备带后缀的上游 profile 文件,例如 `config.toml.<profile>``auth.json.<profile>`;禁止覆盖默认 `config.toml` / `auth.json`
2.`config/platform-infra/sub2api-codex-pool.yaml` 添加 `profiles.entries` 项,指定 `profile``accountName``configFile``authFile`
3. 如需要,给该项加 `priority``capacity``loadFactor``tempUnschedulable``openaiResponsesWebSocketsV2Mode``upstreamUserAgent`capacity/loadFactor 的具体数值只写在 YAML。
3. 如需要,给该项加 `priority``capacity``loadFactor``trustUpstream``tempUnschedulable``openaiResponsesWebSocketsV2Mode``upstreamUserAgent`capacity/loadFactor/信任退避的具体数值只写在 YAML。
4. 如果新增账号会提高声明 capacity 总和,默认让省略的 `pool.minOwnerConcurrency` 继续按 capacity 总和自动解析;只有 YAML 已经显式写了该 override 时,才同步提高到不低于总 capacity,或删除 override 回到自动解析。
5.`codex-pool plan`,确认 profile 可读、`base_url` 和 API key 来源有效,且 stdout 未泄露完整 key。
6.`codex-pool sync --confirm`
@@ -60,6 +60,7 @@ profiles:
accountName: unidesk-codex-hy
configFile: config.toml.HY
authFile: auth.json.HY
trustUpstream: true
openaiResponsesWebSocketsV2Mode: off
capacity: 10
loadFactor: 10
@@ -81,12 +82,14 @@ profiles:
accountName: unidesk-codex-gptclub
configFile: config.toml.gptclub
authFile: auth.json.gptclub
trustUpstream: true
capacity: 10
priority: 100
- profile: only
accountName: unidesk-codex-only
configFile: config.toml.only
authFile: auth.json.only
trustUpstream: true
loadFactor: 1
priority: 110
- profile: zakuzaku
@@ -179,6 +182,8 @@ sentinel:
cadence:
successInitialIntervalMinutes: 1
successMaxIntervalMinutes: 20
trustedSuccessMaxIntervalMinutes: 20
untrustedSuccessMaxIntervalMinutes: 2
successBackoffMultiplier: 2
jitterPercent: 10
freeze:
@@ -32,6 +32,8 @@ export interface CodexPoolSentinelConfig {
cadence: {
successInitialIntervalMinutes: number;
successMaxIntervalMinutes: number;
trustedSuccessMaxIntervalMinutes: number;
untrustedSuccessMaxIntervalMinutes: number;
successBackoffMultiplier: number;
jitterPercent: number;
};
@@ -61,6 +63,7 @@ export interface CodexPoolSentinelProfileSecret {
baseUrl: string;
apiKey: string;
upstreamUserAgent: string | null;
trustUpstream: boolean;
}
export interface CodexPoolSentinelManifestOptions {
@@ -103,6 +106,8 @@ export function defaultCodexPoolSentinelConfig(): CodexPoolSentinelConfig {
cadence: {
successInitialIntervalMinutes: 1,
successMaxIntervalMinutes: 20,
trustedSuccessMaxIntervalMinutes: 20,
untrustedSuccessMaxIntervalMinutes: 2,
successBackoffMultiplier: 2,
jitterPercent: 10,
},
@@ -145,6 +150,9 @@ export function readCodexPoolSentinelConfig(value: unknown, defaults: CodexPoolS
const cadence = isRecord(value.cadence) ? value.cadence : {};
const freeze = isRecord(value.freeze) ? value.freeze : {};
const pricing = isRecord(value.pricing) ? value.pricing : {};
const legacySuccessMax = readInt(valueAt(cadence, "successMaxIntervalMinutes"), `${sourcePath}.sentinel.cadence.successMaxIntervalMinutes`, defaults.cadence.successMaxIntervalMinutes, 1, 1440);
const trustedSuccessMax = readInt(valueAt(cadence, "trustedSuccessMaxIntervalMinutes"), `${sourcePath}.sentinel.cadence.trustedSuccessMaxIntervalMinutes`, legacySuccessMax, 1, 1440);
const untrustedSuccessMax = readInt(valueAt(cadence, "untrustedSuccessMaxIntervalMinutes"), `${sourcePath}.sentinel.cadence.untrustedSuccessMaxIntervalMinutes`, legacySuccessMax, 1, 1440);
const config: CodexPoolSentinelConfig = {
monitor: {
enabled: readBoolean(valueAt(monitor, "enabled"), `${sourcePath}.sentinel.monitor.enabled`, defaults.monitor.enabled),
@@ -176,7 +184,9 @@ export function readCodexPoolSentinelConfig(value: unknown, defaults: CodexPoolS
},
cadence: {
successInitialIntervalMinutes: readInt(valueAt(cadence, "successInitialIntervalMinutes"), `${sourcePath}.sentinel.cadence.successInitialIntervalMinutes`, defaults.cadence.successInitialIntervalMinutes, 1, 1440),
successMaxIntervalMinutes: readInt(valueAt(cadence, "successMaxIntervalMinutes"), `${sourcePath}.sentinel.cadence.successMaxIntervalMinutes`, defaults.cadence.successMaxIntervalMinutes, 1, 1440),
successMaxIntervalMinutes: legacySuccessMax,
trustedSuccessMaxIntervalMinutes: trustedSuccessMax,
untrustedSuccessMaxIntervalMinutes: untrustedSuccessMax,
successBackoffMultiplier: readInt(valueAt(cadence, "successBackoffMultiplier"), `${sourcePath}.sentinel.cadence.successBackoffMultiplier`, defaults.cadence.successBackoffMultiplier, 1, 10),
jitterPercent: readInt(valueAt(cadence, "jitterPercent"), `${sourcePath}.sentinel.cadence.jitterPercent`, defaults.cadence.jitterPercent, 0, 50),
},
@@ -198,6 +208,12 @@ export function readCodexPoolSentinelConfig(value: unknown, defaults: CodexPoolS
if (config.cadence.successMaxIntervalMinutes < config.cadence.successInitialIntervalMinutes) {
throw new Error(`${sourcePath}.sentinel.cadence.successMaxIntervalMinutes must be >= successInitialIntervalMinutes`);
}
if (config.cadence.trustedSuccessMaxIntervalMinutes < config.cadence.successInitialIntervalMinutes) {
throw new Error(`${sourcePath}.sentinel.cadence.trustedSuccessMaxIntervalMinutes must be >= successInitialIntervalMinutes`);
}
if (config.cadence.untrustedSuccessMaxIntervalMinutes < config.cadence.successInitialIntervalMinutes) {
throw new Error(`${sourcePath}.sentinel.cadence.untrustedSuccessMaxIntervalMinutes must be >= successInitialIntervalMinutes`);
}
if (config.freeze.maxTtlMinutes < config.freeze.initialTtlMinutes) {
throw new Error(`${sourcePath}.sentinel.freeze.maxTtlMinutes must be >= initialTtlMinutes`);
}
@@ -1032,6 +1048,7 @@ def probe_account(profile, config, purpose):
return {
"accountName": profile["accountName"],
"profile": profile.get("profile"),
"trustUpstream": profile.get("trustUpstream") is True,
"purpose": purpose,
"ok": ok,
"markerMatched": marker_matched,
@@ -1115,11 +1132,17 @@ def choose_forced_profiles(profiles, state, config, now, names):
missing = sorted(name for name in names if name not in set(found))
return due, {"selected": len(due), "due": len(due), "limit": "forced-accounts", "budgetMode": "record-only", "ledger": ledger_for(state, now)[1], "requestedAccounts": sorted(names), "missingAccounts": missing}
def next_success_interval(account_state, config):
def success_max_interval(profile, config):
cadence = config["cadence"]
if profile.get("trustUpstream") is True:
return int(cadence.get("trustedSuccessMaxIntervalMinutes") or cadence.get("successMaxIntervalMinutes"))
return int(cadence.get("untrustedSuccessMaxIntervalMinutes") or cadence.get("successMaxIntervalMinutes"))
def next_success_interval(account_state, config, profile):
streak = int(account_state.get("successStreak") or 0)
previous = int(account_state.get("successIntervalMinutes") or 0)
initial = int(config["cadence"]["successInitialIntervalMinutes"])
maximum = int(config["cadence"]["successMaxIntervalMinutes"])
maximum = success_max_interval(profile, config)
multiplier = int(config["cadence"]["successBackoffMultiplier"])
return initial if streak <= 0 or previous <= 0 else min(maximum, max(initial, previous * multiplier))
@@ -1133,7 +1156,7 @@ def next_freeze_interval(account_state, config, was_recovery):
return min(maximum, max(initial, previous * multiplier))
return initial
def apply_result(result, state, config, now, admin):
def apply_result(result, state, config, now, admin, profile):
name = result["accountName"]
account_state = state.setdefault("accounts", {}).setdefault(name, {})
add_usage(state, account_state, now, result.get("usage") or {})
@@ -1154,9 +1177,10 @@ def apply_result(result, state, config, now, admin):
account_state["qualityGate"] = {**quality_gate, "pending": False, "clearedAt": iso(now)}
account_state["successStreak"] = 0
account_state["successIntervalMinutes"] = 0
interval = next_success_interval(account_state, config)
interval = next_success_interval(account_state, config, profile)
account_state["successStreak"] = int(account_state.get("successStreak") or 0) + 1
account_state["successIntervalMinutes"] = interval
account_state["successMaxIntervalMinutes"] = success_max_interval(profile, config)
account_state["nextProbeAfter"] = iso(add_minutes(now, interval, int(config["cadence"]["jitterPercent"])))
account_state["lastOkAt"] = iso(now)
account_state["lastStatus"] = "ok"
@@ -1190,6 +1214,7 @@ def apply_result(result, state, config, now, admin):
account_state["nextProbeAfter"] = iso(until)
account_state["successStreak"] = 0
account_state["successIntervalMinutes"] = 0
account_state["successMaxIntervalMinutes"] = success_max_interval(profile, config)
account_state["lastStatus"] = "quarantined"
else:
retry = int(config["probe"]["transportRetryMinutes"])
@@ -1197,9 +1222,12 @@ def apply_result(result, state, config, now, admin):
account_state["lastStatus"] = "marker-not-matched-no-freeze"
account_state["lastFailureAt"] = iso(now)
account_state["lastProbeAt"] = iso(now)
account_state["trustUpstream"] = profile.get("trustUpstream") is True
account_state["lastProbe"] = {
"ok": result.get("ok"),
"purpose": result.get("purpose"),
"trustUpstream": result.get("trustUpstream"),
"successMaxIntervalMinutes": success_max_interval(profile, config),
"httpStatus": result.get("httpStatus"),
"durationMs": result.get("durationMs"),
"markerMatched": result.get("markerMatched"),
@@ -1263,11 +1291,12 @@ def main():
actions = []
if (config["monitor"]["enabled"] or forced_names) and due:
with ThreadPoolExecutor(max_workers=max(1, len(due))) as executor:
futures = [executor.submit(probe_account, item["profile"], config, item["purpose"]) for item in due]
futures = {executor.submit(probe_account, item["profile"], config, item["purpose"]): item["profile"] for item in due}
for future in as_completed(futures):
result = future.result()
results.append(result)
actions.append({"accountName": result["accountName"], **apply_result(result, state, config, now, admin)})
profile = futures[future]
actions.append({"accountName": result["accountName"], **apply_result(result, state, config, now, admin, profile)})
history = state.setdefault("history", [])
run_summary = {
"at": iso(now),
@@ -1293,6 +1322,7 @@ def main():
"results": [{
"accountName": item.get("accountName"),
"purpose": item.get("purpose"),
"trustUpstream": item.get("trustUpstream"),
"ok": item.get("ok"),
"markerMatched": item.get("markerMatched"),
"httpStatus": item.get("httpStatus"),
+114 -6
View File
@@ -76,6 +76,7 @@ interface CodexProfile {
apiKeySource: "auth-json" | "env" | null;
openaiResponsesWebSocketsV2Mode: OpenAIResponsesWebSocketsV2Mode | null;
upstreamUserAgent: string | null;
trustUpstream: boolean;
priority: number;
capacity: number;
loadFactor: number;
@@ -126,6 +127,7 @@ interface CodexPoolProfileConfig {
fallbackAuthFile: string | null;
openaiResponsesWebSocketsV2Mode: OpenAIResponsesWebSocketsV2Mode | null;
upstreamUserAgent: string | null;
trustUpstream: boolean;
priority: number;
capacity: number | null;
loadFactor: number | null;
@@ -480,9 +482,11 @@ async function codexPoolSync(config: UniDeskConfig, options: SyncOptions): Promi
apiKeyFingerprint: fingerprint(profile.apiKey ?? ""),
upstreamUserAgent: profile.upstreamUserAgent,
openaiResponsesWebSocketsV2Mode: profile.openaiResponsesWebSocketsV2Mode,
trustUpstream: profile.trustUpstream,
}),
openaiResponsesWebSocketsV2Mode: profile.openaiResponsesWebSocketsV2Mode,
upstreamUserAgent: profile.upstreamUserAgent,
trustUpstream: profile.trustUpstream,
priority: profile.priority,
capacity: profile.capacity,
loadFactor: profile.loadFactor,
@@ -830,6 +834,7 @@ function collectCodexProfiles(): CodexProfile[] {
apiKeySource: null,
openaiResponsesWebSocketsV2Mode: entry.openaiResponsesWebSocketsV2Mode,
upstreamUserAgent: entry.upstreamUserAgent,
trustUpstream: entry.trustUpstream,
priority: entry.priority,
capacity: entry.capacity ?? pool.defaultAccountCapacity,
loadFactor: entry.loadFactor ?? pool.defaultAccountLoadFactor,
@@ -902,6 +907,7 @@ function discoverCodexProfileConfigs(
fallbackAuthFile: null,
openaiResponsesWebSocketsV2Mode: null,
upstreamUserAgent: null,
trustUpstream: false,
priority: defaultPriority,
capacity: null,
loadFactor: null,
@@ -1135,6 +1141,7 @@ function readProfileConfig(
if (fallbackAuthFile !== null) validateCodexFileName(fallbackAuthFile, `profiles.entries[${index}].fallbackAuthFile`);
const openaiResponsesWebSocketsV2Mode = readOpenAIResponsesWebSocketsV2Mode(entry.openaiResponsesWebSocketsV2Mode, `profiles.entries[${index}].openaiResponsesWebSocketsV2Mode`);
const upstreamUserAgent = readUpstreamUserAgent(entry.upstreamUserAgent, `profiles.entries[${index}].upstreamUserAgent`);
const trustUpstream = readTrustUpstream(entry.trustUpstream, `profiles.entries[${index}].trustUpstream`);
const priority = readAccountPriority(entry.priority, `profiles.entries[${index}].priority`, defaultPriority);
const capacity = entry.capacity === undefined || entry.capacity === null ? null : readAccountCapacity(entry.capacity, `profiles.entries[${index}].capacity`);
const loadFactor = entry.loadFactor === undefined || entry.loadFactor === null ? null : readAccountLoadFactor(entry.loadFactor, `profiles.entries[${index}].loadFactor`);
@@ -1148,6 +1155,7 @@ function readProfileConfig(
fallbackAuthFile,
openaiResponsesWebSocketsV2Mode,
upstreamUserAgent,
trustUpstream,
priority,
capacity,
loadFactor,
@@ -1183,6 +1191,13 @@ function readUpstreamUserAgent(value: unknown, key: string): string | null {
return text;
}
function readTrustUpstream(value: unknown, key: string): boolean {
if (value === undefined || value === null) return false;
const parsed = booleanValue(value);
if (parsed === null) throw new Error(`${codexPoolConfigPath}.${key} must be a boolean`);
return parsed;
}
function readAccountPriority(value: unknown, key: string, fallback = defaultAccountPriority): number {
if (value === undefined || value === null) return fallback;
const priority = numberValue(value);
@@ -1502,6 +1517,7 @@ function redactProfile(profile: CodexProfile): Record<string, unknown> {
apiKeySource: profile.apiKeySource,
openaiResponsesWebSocketsV2Mode: profile.openaiResponsesWebSocketsV2Mode,
upstreamUserAgent: profile.upstreamUserAgent,
trustUpstream: profile.trustUpstream,
priority: profile.priority,
capacity: profile.capacity,
loadFactor: profile.loadFactor,
@@ -1523,6 +1539,7 @@ function compactProfile(profile: CodexProfile): Record<string, unknown> {
provider: profile.provider || null,
model: profile.model,
priority: profile.priority,
trustUpstream: profile.trustUpstream,
capacity: profile.capacity,
loadFactor: profile.loadFactor,
tempUnschedulableEnabled: profile.tempUnschedulable.enabled && profile.tempUnschedulable.rules.length > 0,
@@ -1996,13 +2013,15 @@ function renderSentinelReport(
lines.push("");
lines.push("ACCOUNTS");
lines.push(renderTable([
["ACCOUNT", "STATE", "Q", "F_MIN", "S_MIN", "PROBES", "LAST", "HTTP", "M", "KIND", "ACTION", "NEXT", "OBS_MIN"],
["ACCOUNT", "STATE", "Q", "T", "F_MIN", "S_MIN", "S_MAX", "PROBES", "LAST", "HTTP", "M", "KIND", "ACTION", "NEXT", "OBS_MIN"],
...accounts.map((account) => [
stringValue(account.account) ?? "-",
stringValue(account.status) ?? "-",
account.quarantineActive === true ? "Y" : "-",
account.trustUpstream === true ? "Y" : account.trustUpstream === false ? "N" : "-",
textValue(account.freezeIntervalMin),
textValue(account.successIntervalMin),
textValue(account.successMaxIntervalMin),
textValue(account.probeCount),
shortIso(account.lastProbeAt),
textValue(account.lastHttp),
@@ -2031,7 +2050,7 @@ function renderSentinelReport(
]));
}
lines.push("");
lines.push("LEGEND Q=quarantined M=marker matched F_MIN=freeze interval S_MIN=success interval OBS_MIN=last probe to next probe minutes TF=transport failures");
lines.push("LEGEND Q=quarantined T=trusted upstream M=marker matched F_MIN=freeze interval S_MIN=success interval S_MAX=success max interval OBS_MIN=last probe to next probe minutes TF=transport failures");
lines.push("Raw: bun scripts/cli.ts platform-infra sub2api codex-pool sentinel-report --raw");
return lines.join("\n");
}
@@ -2209,6 +2228,7 @@ function sentinelProfileSecrets(profiles: CodexProfile[]): CodexPoolSentinelProf
baseUrl: profile.baseUrl,
apiKey: profile.apiKey ?? "",
upstreamUserAgent: profile.upstreamUserAgent,
trustUpstream: profile.trustUpstream,
}));
}
@@ -2883,6 +2903,7 @@ export function codexPoolSentinelProbeConfigFingerprint(input: {
apiKeyFingerprint: string | null;
upstreamUserAgent: string | null;
openaiResponsesWebSocketsV2Mode: string | null;
trustUpstream: boolean;
}): string {
return fingerprint(JSON.stringify({
accountName: input.accountName,
@@ -2891,6 +2912,7 @@ export function codexPoolSentinelProbeConfigFingerprint(input: {
apiKeyFingerprint: input.apiKeyFingerprint,
upstreamUserAgent: input.upstreamUserAgent,
openaiResponsesWebSocketsV2Mode: input.openaiResponsesWebSocketsV2Mode,
trustUpstream: input.trustUpstream,
}));
}
@@ -3027,8 +3049,10 @@ def report():
"quarantineApplied": quarantine.get("applied") if isinstance(quarantine, dict) else None,
"freezeIntervalMin": quarantine.get("intervalMinutes") if isinstance(quarantine, dict) else None,
"freezeUntil": quarantine.get("until") if isinstance(quarantine, dict) else None,
"trustUpstream": account_state.get("trustUpstream") if account_state.get("trustUpstream") is not None else probe.get("trustUpstream"),
"successStreak": account_state.get("successStreak") or 0,
"successIntervalMin": account_state.get("successIntervalMinutes") or 0,
"successMaxIntervalMin": account_state.get("successMaxIntervalMinutes") or probe.get("successMaxIntervalMinutes"),
"probeCount": ledger.get("requestCount", 0),
"inputTokens": ledger.get("inputTokens", 0),
"outputTokens": ledger.get("outputTokens", 0),
@@ -3445,6 +3469,8 @@ def sentinel_probe_change_reasons(current, profile):
runtime_user_agent = empty_to_none(credentials.get("user_agent"))
expected_ws_mode = empty_to_none(profile.get("openaiResponsesWebSocketsV2Mode"))
runtime_ws_mode = empty_to_none(extra.get("openai_apikey_responses_websockets_v2_mode"))
expected_trust_upstream = profile.get("trustUpstream") is True
runtime_trust_upstream = extra.get("unidesk_trust_upstream") is True
reasons = []
if empty_to_none(extra.get("unidesk_codex_profile")) != profile.get("profile"):
reasons.append("profile")
@@ -3456,6 +3482,8 @@ def sentinel_probe_change_reasons(current, profile):
reasons.append("upstream-user-agent")
if runtime_ws_mode != expected_ws_mode:
reasons.append("responses-websockets-v2-mode")
if runtime_trust_upstream != expected_trust_upstream:
reasons.append("trust-upstream")
return reasons
def curl_api(method, path, bearer=None, payload=None):
@@ -3669,6 +3697,7 @@ def account_payload(profile, group_id):
"openai_responses_mode": "force_responses",
"unidesk_codex_profile": profile["profile"],
"unidesk_managed": True,
"unidesk_trust_upstream": profile.get("trustUpstream") is True,
}
ws_mode = profile.get("openaiResponsesWebSocketsV2Mode")
if ws_mode:
@@ -3715,6 +3744,10 @@ def planned_sentinel_account_results(profiles, existing_accounts):
results.append({
"profile": profile["profile"],
"accountName": profile["accountName"],
"profileConfig": {
"accountName": profile["accountName"],
"trustUpstream": profile.get("trustUpstream") is True,
},
"sentinelProbeConfigFingerprint": profile.get("sentinelProbeConfigFingerprint"),
"sentinelProbeRequired": quality_gate_required,
"sentinelChangeReasons": change_reasons if quality_gate_required else [],
@@ -3754,6 +3787,10 @@ def ensure_accounts(token, profiles, group_id, prune_removed=False, protected_fr
results.append({
"profile": profile["profile"],
"accountName": profile["accountName"],
"profileConfig": {
"accountName": profile["accountName"],
"trustUpstream": profile.get("trustUpstream") is True,
},
"accountId": data.get("id") if isinstance(data, dict) else None,
"action": action,
"baseUrl": profile["baseUrl"],
@@ -3765,6 +3802,7 @@ def ensure_accounts(token, profiles, group_id, prune_removed=False, protected_fr
"sentinelDefaultFrozen": quality_gate_required,
"sentinelFreezeProtected": keep_frozen,
"openaiResponsesWebSocketsV2Mode": profile.get("openaiResponsesWebSocketsV2Mode"),
"trustUpstream": profile.get("trustUpstream") is True,
"priority": int(profile.get("priority", POOL_DEFAULT_ACCOUNT_PRIORITY) or POOL_DEFAULT_ACCOUNT_PRIORITY),
"capacity": int(profile.get("capacity", 5) or 5),
"loadFactor": int(profile.get("loadFactor", POOL_DEFAULT_ACCOUNT_LOAD_FACTOR) or POOL_DEFAULT_ACCOUNT_LOAD_FACTOR),
@@ -4061,6 +4099,68 @@ def clamp_sentinel_freezes_for_config(state, now):
})
return items
def parse_iso_epoch(value):
if not isinstance(value, str) or not value:
return None
try:
return datetime.fromisoformat(value.replace("Z", "+00:00")).timestamp()
except Exception:
return None
def profile_success_max_interval(profile):
cadence = SENTINEL_CONFIG.get("cadence") if isinstance(SENTINEL_CONFIG.get("cadence"), dict) else {}
legacy = cadence.get("successMaxIntervalMinutes")
if legacy is None:
legacy = cadence.get("trustedSuccessMaxIntervalMinutes") or cadence.get("untrustedSuccessMaxIntervalMinutes") or 1
if profile.get("trustUpstream") is True:
value = cadence.get("trustedSuccessMaxIntervalMinutes") or legacy
else:
value = cadence.get("untrustedSuccessMaxIntervalMinutes") or legacy
try:
return int(value)
except Exception:
return int(legacy)
def clamp_sentinel_success_cadence_for_config(state, profiles, now):
accounts_state = state.get("accounts") if isinstance(state.get("accounts"), dict) else {}
profile_map = {item.get("accountName"): item for item in profiles if isinstance(item, dict) and isinstance(item.get("accountName"), str)}
now_epoch = time.time()
items = []
for name, profile in profile_map.items():
account_state = accounts_state.get(name)
if not isinstance(account_state, dict):
continue
quarantine = account_state.get("quarantine")
if isinstance(quarantine, dict) and quarantine.get("active") is True:
account_state["trustUpstream"] = profile.get("trustUpstream") is True
account_state["successMaxIntervalMinutes"] = profile_success_max_interval(profile)
continue
try:
interval = int(account_state.get("successIntervalMinutes") or 0)
except Exception:
interval = 0
next_epoch = parse_iso_epoch(account_state.get("nextProbeAfter"))
max_interval = profile_success_max_interval(profile)
account_state["trustUpstream"] = profile.get("trustUpstream") is True
account_state["successMaxIntervalMinutes"] = max_interval
if interval <= max_interval and (next_epoch is None or next_epoch <= now_epoch + max_interval * 60):
continue
old_next = account_state.get("nextProbeAfter")
account_state["previousSuccessIntervalMinutes"] = interval
account_state["successIntervalMinutes"] = min(interval, max_interval) if interval > 0 else interval
account_state["nextProbeAfter"] = now
account_state["cadenceClampedAt"] = now
account_state["cadenceClampedBy"] = "sync-success-max-interval"
items.append({
"accountName": name,
"trustUpstream": profile.get("trustUpstream") is True,
"previousSuccessIntervalMinutes": interval,
"maxIntervalMinutes": max_interval,
"previousNextProbeAfter": old_next,
"nextProbeAfter": now,
})
return items
def update_sentinel_state_configmap(obj, state):
state_name = SENTINEL_CONFIG.get("stateConfigMapName")
if not state_name:
@@ -4101,6 +4201,7 @@ def ensure_sentinel_state_for_sync(account_results, pending_only=False):
pending_until = utc_iso(3600)
items = []
clamped_items = [] if pending_only else clamp_sentinel_freezes_for_config(state, now)
cadence_clamped_items = [] if pending_only else clamp_sentinel_success_cadence_for_config(state, [item.get("profileConfig") for item in account_results if isinstance(item.get("profileConfig"), dict)], now)
changed_count = 0
fingerprint_only_count = 0
for item in account_results:
@@ -4133,6 +4234,9 @@ def ensure_sentinel_state_for_sync(account_results, pending_only=False):
account_state["nextProbeAfter"] = pending_until if pending_only else now
account_state["successStreak"] = 0
account_state["successIntervalMinutes"] = 0
profile_config = item.get("profileConfig") if isinstance(item.get("profileConfig"), dict) else {}
account_state["trustUpstream"] = profile_config.get("trustUpstream") is True
account_state["successMaxIntervalMinutes"] = profile_success_max_interval(profile_config)
account_state["lastStatus"] = "pending-sentinel-quality-gate"
account_state["qualityGate"] = {
"pending": True,
@@ -4142,15 +4246,17 @@ def ensure_sentinel_state_for_sync(account_results, pending_only=False):
"pendingOnly": pending_only,
}
items.append({"accountName": name, "changeReasons": reasons, "nextProbeAfter": pending_until if pending_only else now, "defaultFrozen": True, "pendingOnly": pending_only})
if changed_count <= 0 and len(clamped_items) <= 0:
return {"ok": True, "skipped": False, "reason": "no-new-or-changed-accounts", "changedCount": 0, "fingerprintOnlyCount": fingerprint_only_count, "clampedCount": 0, "items": [], "valuesPrinted": False}
if changed_count <= 0 and len(clamped_items) <= 0 and len(cadence_clamped_items) <= 0:
return {"ok": True, "skipped": False, "reason": "no-new-or-changed-accounts", "changedCount": 0, "fingerprintOnlyCount": fingerprint_only_count, "clampedCount": 0, "cadenceClampedCount": 0, "items": [], "valuesPrinted": False}
update = update_sentinel_state_configmap(state_obj, state)
if pending_only and changed_count > 0:
reason = "new-or-changed-accounts-pending-quality-gate-prepared"
elif changed_count > 0 and len(clamped_items) > 0:
reason = "new-or-changed-accounts-default-frozen-and-freeze-backoff-clamped"
elif changed_count > 0 and (len(clamped_items) > 0 or len(cadence_clamped_items) > 0):
reason = "new-or-changed-accounts-default-frozen-and-sentinel-cadence-clamped"
elif changed_count > 0:
reason = "new-or-changed-accounts-default-frozen"
elif len(cadence_clamped_items) > 0:
reason = "success-cadence-clamped-to-current-config"
else:
reason = "freeze-backoff-clamped-to-current-config"
return {
@@ -4160,9 +4266,11 @@ def ensure_sentinel_state_for_sync(account_results, pending_only=False):
"changedCount": changed_count,
"fingerprintOnlyCount": fingerprint_only_count,
"clampedCount": len(clamped_items),
"cadenceClampedCount": len(cadence_clamped_items),
"pendingOnly": pending_only,
"items": items,
"clampedItems": clamped_items,
"cadenceClampedItems": cadence_clamped_items,
"update": update,
"valuesPrinted": False,
}