From e8fe7d1d9b449af3860ab26c95910ef2846ac836 Mon Sep 17 00:00:00 2001 From: Codex Date: Sat, 23 May 2026 08:05:06 +0000 Subject: [PATCH] fix: expose codex submit queue context --- docs/reference/cli.md | 2 +- docs/reference/code-queue-supervision.md | 1 + ...code-queue-submit-summary-contract-test.ts | 107 ++++++++++ scripts/src/check.ts | 5 + scripts/src/code-queue.ts | 188 +++++++++++++++++- 5 files changed, 297 insertions(+), 6 deletions(-) create mode 100644 scripts/code-queue-submit-summary-contract-test.ts diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 7350dac9..2d2d6400 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -45,7 +45,7 @@ CLI 可以从 `master` 快速演进,但必须兼容 `deploy.json` 固定的 CI - `ci install|status|run|publish-backend-core|publish-user-service|run-dev-e2e|logs` 管理 D601 原生 k3s 上的 Tekton CI。`run` 手动创建每 commit 检查和 Code Queue 只读性能门禁;`publish-backend-core` 与 `publish-user-service` 从 pushed Git commit 构建并发布 `127.0.0.1:5000/unidesk/:` commit-pinned artifacts,输出 `artifactSummary`(含 `serviceId`、`sourceCommit`、`sourceRepo`、`dockerfile`、`imageRef`、`tag`、`digest`、`digestRef`),但不部署生产;`run-dev-e2e` 的 Git 控制 runner、短 launcher、host fetch 边界、临时 smoke namespace 和 no-CD 规则只在 `docs/reference/dev-ci-runner.md` 定义;Tekton CI 通用规则见 `docs/reference/ci.md`。 - `schedule list|get|runs|run|retry-run|delete|upsert-pgdata-backup` 管理 backend-core 定时任务和运行历史。`schedule list`、`schedule get`、`schedule runs --limit N` 和 `schedule runs --limit N` 是只读观察入口;`schedule run`、`schedule retry-run`、`schedule delete` 和 `schedule upsert-pgdata-backup` 会触发运行或写入配置,生产恢复时必须有明确授权。`schedule runs --limit N` 是全局历史视图,返回 `scope=global` 和 `scheduleId=null`;`schedule runs --limit N` 是指定 schedule 历史视图,返回 `scope=schedule` 和对应 `scheduleId`。CLI 必须拒绝 `schedule runs 50` 这类纯数字位置参数,并提示使用 `schedule runs --limit 50`,避免把空数组误判成“没有历史 run”。`schedule run --wait-ms N` 触发同一 schedule,并且即使 wait 超时也必须返回 `newRunId` 和 `observeCommand`;`schedule retry-run ` 只接受 failed run,从原 run 反查 `scheduleId` 后重触发同一 schedule,并输出 `originalRunId`、`scheduleId`、`newRunId` 和 `observeCommand`。当 backend-core 目标容器缺失或只观察到 verify-only 容器时,schedule/microservice 命令必须以非零退出并返回 `failureKind=target-stack-not-running`、`runnerDisposition=infra-blocked`、`readOnlyCommands` 和 `authorizationRequiredForRecovery`,不得把 Docker 的 `No such container` 当成成功的空历史。 - `codex deploy ` 是旧 Code Queue 兼容部署入口,已禁用以防止维护通道直连 D601 部署 Code Queue;当前 dev 自动化只做 `ci run-dev-e2e` smoke,不提供 Code Queue CD,详细规则见 `docs/reference/codex-deploy.md`。 -- `codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue queueId] [--provider-id id] [--cwd path] [--model model] [--reasoning-effort effort] [--execution-mode mode] [--max-attempts N] [--reference-task-id id] [--dry-run]` 通过 backend-core 私有代理向稳定 `code-queue` 用户服务路径提交任务;prompt 必须且只能来自位置参数、文件或 stdin 之一,`--dry-run` 只返回结构化请求且不实际入队。长 prompt、多行 prompt、含引号/反引号/Markdown 表格/JSON/反斜杠的 prompt 必须优先用 `--prompt-stdin` 或 `--prompt-file`,不要拼进 shell 单个参数;位置参数只适合短单行 smoke prompt。stdin 推荐用 quoted heredoc:`cat <<'PROMPT' | bun scripts/cli.ts codex submit --prompt-stdin --queue --dry-run`,文件路径推荐 `bun scripts/cli.ts codex submit --prompt-file /tmp/code-queue-prompt.md --queue --dry-run`,确认 dry-run 后移除 `--dry-run` 提交同一 payload。dry-run 会额外输出 `routingRecommendation`,包含推荐 route、runner、model、风险信号、prompt 自包含/issue 非唯一来源/prod-secret-DB 禁止/运行态或 release 禁止/证据要求/中等复杂度候选等 guard 状态;同时输出 `policyContract`,固定暴露 GPT-5.5、DeepSeek、MiniMax 的风险分层、并发上限和外部 provider 429 退避处置。该建议只用于指挥官 preflight,不会改写 payload,不改变 runtime admission,也不假设生产 MiniMax 或 DeepSeek 可用。`--dry-run` 必须返回完整 prompt、字符数和 `truncated=false` 用于人工验收;真实提交是写入操作,默认只返回 `accepted=true`、task id、队列、写入保护摘要和后续查看命令,必须标记 `promptOmitted=true` 且不得回显 prompt 或 promptPreview。真实提交会经过本机本地串行化保护和短节流,避免同一指挥端并发 submit 把低内存主机或 `code-queue-mgr` 控制面打抖;返回值会附带低噪声 `submitConcurrencyGuard` 说明本次提交的锁与等待信息。backend-core 默认把提交、队列 CRUD、已读状态、历史摘要和轻量 Trace 读取分流到主 server `code-queue-mgr`,由它写入主 PostgreSQL;D601 scheduler 只轮询并执行已入库任务。 +- `codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue queueId] [--provider-id id] [--cwd path] [--model model] [--reasoning-effort effort] [--execution-mode mode] [--max-attempts N] [--reference-task-id id] [--dry-run]` 通过 backend-core 私有代理向稳定 `code-queue` 用户服务路径提交任务;prompt 必须且只能来自位置参数、文件或 stdin 之一,`--dry-run` 只返回结构化请求且不实际入队。长 prompt、多行 prompt、含引号/反引号/Markdown 表格/JSON/反斜杠的 prompt 必须优先用 `--prompt-stdin` 或 `--prompt-file`,不要拼进 shell 单个参数;位置参数只适合短单行 smoke prompt。stdin 推荐用 quoted heredoc:`cat <<'PROMPT' | bun scripts/cli.ts codex submit --prompt-stdin --queue --dry-run`,文件路径推荐 `bun scripts/cli.ts codex submit --prompt-file /tmp/code-queue-prompt.md --queue --dry-run`,确认 dry-run 后移除 `--dry-run` 提交同一 payload。dry-run 会额外输出 `routingRecommendation`,包含推荐 route、runner、model、风险信号、prompt 自包含/issue 非唯一来源/prod-secret-DB 禁止/运行态或 release 禁止/证据要求/中等复杂度候选等 guard 状态;同时输出 `policyContract`,固定暴露 GPT-5.5、DeepSeek、MiniMax 的风险分层、并发上限和外部 provider 429 退避处置。该建议只用于指挥官 preflight,不会改写 payload,不改变 runtime admission,也不假设生产 MiniMax 或 DeepSeek 可用。`--dry-run` 必须返回完整 prompt、字符数和 `truncated=false` 用于人工验收;真实提交是写入操作,默认只返回 `accepted=true`、task id、队列、写入保护摘要和后续查看命令,必须标记 `promptOmitted=true` 且不得回显 prompt 或 promptPreview。真实提交会经过本机本地串行化保护和短节流,避免同一指挥端并发 submit 把低内存主机或 `code-queue-mgr` 控制面打抖;返回值会附带低噪声 `submitConcurrencyGuard` 说明本次提交的锁与等待信息。真实提交的 `queue` 摘要保持低噪声:`submittedTaskIds`、`queuedTaskIds`、`activeTaskIds` 和 `databaseActiveTaskIds` 是带 `items/count/returned/omitted/truncated/source` 的有界预览对象,`queuedTaskIds.items` 必须包含本次新入队的 queued/retry_wait 任务,`countContext` 与 `counts` 是权威计数;当预览被省略或截断时,`listPreviewPolicy` 必须写明 omitted counts 和 raw 查看命令。backend-core 默认把提交、队列 CRUD、已读状态、历史摘要和轻量 Trace 读取分流到主 server `code-queue-mgr`,由它写入主 PostgreSQL;D601 scheduler 只轮询并执行已入库任务。 - `codex pr-preflight [--remote] [--push-dry-run --push-dry-run-ref refs/heads/probe/] [--pr-create-dry-run --pr-create-dry-run-head ] [--issue N] [--full|--raw]` 通过稳定 `code-queue` proxy 请求 D601 scheduler `/api/runtime-preflight`,用于 PR 型派单 admission。默认输出是紧凑 commander 视图,显式分出 `schedulerPreflight` 与 `activeRunnerPrCapability`,并附带 `commands` 和 `disclosure`,方便先看 scheduler auth 缺口、再看当前 runner/dev container 的 `gh auth status` 与 `gh pr create --dry-run` 能力;`--full` 或 `--raw` 才展开完整 `preflight`、工具、agent port、Git worktree、GitHub egress、repo/issue/PR 只读探测和观测原文。只报告 `GH_TOKEN`/`GITHUB_TOKEN` 是否存在和来源 key,不打印值。当 auth-broker 配置存在时,`tokenCoverage.source="auth-broker"`、`credentialSource="broker-issued-token"` 且 runner env token 不是成功前提;当仅 env token 存在时,`credentialSource="env-token"` 且 `authBroker.nextAction="use-env-token-until-auth-broker-live"`;两者都缺失时顶层 `ok=false`、`runnerDisposition=infra-blocked`、`degradedReason=auth-broker-needed`,`tokenCoverage.missing` 同时列出 `GH_TOKEN` 与 `GITHUB_TOKEN`,并输出 `authBroker.source="broker/auth-broker-needed"`、`capability.source="missing-token"`。该 `auth-missing` 的 scope 是 `scheduler-runner-env`,不能简化成“当前 active runner/dev container 不能创建 PR”;默认视图必须带 `scopeBoundary` 和 `activeRunnerPrCapability`。GitHub DNS/API 连接失败应归类为 `failureKind=github-transient`、`degradedReason=github-dns-api-transient`,并带 `retryable=true`、`commanderAction=retry-backoff-or-keep-running-if-heartbeat-fresh` 和有界 `githubTransient.failedProbes`;调用方应重试/退避,且在任务 heartbeat/trace 新鲜时继续监督,不把它当成 auth 缺失或 PR 语义失败。`prCapability` 是 runner-facing 合同摘要,必须包含目标分支、token/auth 来源、`systemGhBinaryRequiredForWrites=false`、UniDesk REST `bun scripts/cli.ts gh` 可用性、push dry-run/PR create dry-run 的 `writesRemote=false`、expected PR handoff、真实 PR 创建需要 commander 授权和 `gh pr merge` 的 `unsupported-command` 边界;系统 `gh` binary 缺失只进入 `tools.systemGhBinary`,不得误判为 UniDesk REST `gh` CLI 不可用。`--remote` 在 runner-like 环境里不再依赖本地 `unidesk-backend-core`、`unidesk-database`、`baidu-netdisk-backend` 容器存在;这些缺失只作为本地观测证据。若远程控制面可达,则继续走远程控制面结果;若远程控制面不可达,则结构化返回 `failureKind=control-plane-missing` / `degradedReason=remote-control-plane-unreachable`,而不是把本地 `backend-core-container-missing` 当作最终阻塞。`--pr-create-dry-run` 不 POST GitHub,只证明 runner 内 PR body 生成、`scripts/cli.ts gh pr create --dry-run` 和 branch 参数形态可用;服务端创建权限仍以 token/auth broker、repo/issue/PR read、push dry-run 和最终授权后的真实 PR 创建结果为准。 - `codex task ` 通过 Code Queue 私有代理按任务 ID 查询结构化审阅摘要;默认只返回任务身份、执行 Provider、工作目录、attempt 计数、原始 prompt、最终 response、最后错误和渐进披露命令,适合指挥官审阅完成未读任务且避免上下文爆炸。`--detail` 仍是有界详细摘要:默认只返回少量 attempt/tool 行、短 prompt/response/stderr/feedback 预览和 omitted/truncated 元数据;需要完整 prompt/response 文本或更多 tool/attempt 细节时再显式加 `--full`、`--tool-limit N`、`--trace` 或 `codex output`。该摘要读取默认由主 server `code-queue-mgr` 从 PostgreSQL 返回,不依赖 D601 `code-queue-read` Service 可用。 - `codex tasks [--view supervisor|full] [--queue id] [--status succeeded|running|queued|failed|canceled|judging|retry_wait[,..]] [--unread|--unread-only] [--limit N] [--before-id id]` 通过同一私有代理输出渐进式披露视图。默认 `supervisor` 是低噪声指挥官视图,只返回 `activeRunning`、`running`、`completedUnread`、`recentCompleted`、`queued`、`activity`、`commanderConcurrency` 和 `executionDiagnostics` 的紧凑行;`activeRunning.count` 是 running+judging 的状态计数,`exact=true` 时来自 queue summary counts,`running.returned` 和 `activeRunning.rowPage.returned` 只是本次返回的紧凑行数。`commanderConcurrency.activeRunnerCount` 是并发策略应使用的 active/running 计数,等于 `activity.effectiveActiveTaskCount`;15 并发策略按 `15 - activeRunnerCount` 计算剩余窗口。`commanderConcurrency.splitBrainDisposition=live-count-as-active` 表示 split-brain 有 fresh heartbeat 证据,应继续监督并计入 active;`interventionRequired=true` 才提示介入。prompt/body 只给短预览和原始字符数,`running`/`completedUnread`/`queued` 默认只返回一个有界小页并通过 section `commands.next` 继续分页,`recentCompleted` 默认限量且不重复 `completedUnread` 未读终态,不嵌入完整 Trace、final response 或全量 overview。`--limit` 在 supervisor 中主要是扫描/分页预算,不是返回几十条肥行的开关;CLI 安全上限是 100,输出会在 `filters.requestedLimit`、`filters.effectiveLimit`、`filters.limitCapped` 和 `disclosure.limitPolicy` 说明显式请求是否被 capped;底层 overview 拉取预算独立显示在 `source.requestedLimit` / `source.effectiveLimit`,所以 `--limit 260` 应显示 requested=260、effective=100、source requested/effective=200,而不是只露出一个含糊的 `limit`。`--unread` 是 `--unread-only` 的别名,必须只保留未读终态;`--status` 必须真实过滤支持的状态,未知参数或未知状态必须结构化失败。需要更详细当前页任务行时显式使用 `--view full` 或 `--full`,仍受 `--limit` 和 `--before-id` 分页约束。 diff --git a/docs/reference/code-queue-supervision.md b/docs/reference/code-queue-supervision.md index 5ffb11af..a939edcf 100644 --- a/docs/reference/code-queue-supervision.md +++ b/docs/reference/code-queue-supervision.md @@ -92,6 +92,7 @@ HWLAB M3 口径使用同一分级:只读报告、fixture、LOCAL/DRY-RUN 和 d Code Queue 派单模型按成本、可信度和 blast radius 分层:GPT-5.5/Codex 处理高风险和复杂任务,DeepSeek/OpenCode 处理中等复杂度且边界清晰的任务,MiniMax/OpenCode 处理简单、低权限、可复核任务,生产重启、密钥、数据库手工写入和运行中任务控制保留给指挥官或人工。 当前提交合同由 `bun scripts/cli.ts codex submit` 暴露:prompt 必须来自位置参数、`--prompt-file` 或 `--prompt-stdin`;可选字段包括 `--queue/--queue-id`、`--provider-id/--provider`、`--cwd/--workdir`、`--model`、`--reasoning-effort`、`--execution-mode/--mode`、`--max-attempts` 和 `--reference-task-id/--reference/--ref`。长 prompt、多行 prompt、含引号/反引号/Markdown 表格/JSON/反斜杠的 prompt 应使用 `--prompt-stdin` 或 `--prompt-file`,例如 `cat <<'PROMPT' | bun scripts/cli.ts codex submit --prompt-stdin --queue --dry-run` 或 `bun scripts/cli.ts codex submit --prompt-file /tmp/code-queue-prompt.md --queue --dry-run`;位置参数只适合短单行 smoke prompt。提交前先用 `--dry-run` 检查完整 payload,确认后移除 `--dry-run`。真实提交成功只返回低噪声写入确认、task id、队列和后续查看命令,必须标记 `promptOmitted=true` 且不得回显 prompt;需要复核正文时用返回的 `codex task ` 渐进展开。这些字段写入任务 payload 后由 `code-queue-mgr` 入 PostgreSQL,核心任务字段包括 `queue_id`、`provider_id`、`execution_mode`、`model`、`cwd`、`prompt/base_prompt`、`reference_task_ids`、`reasoning_effort`、`max_attempts` 和 `task_json`;队列记录至少有 `id/name/created_at/updated_at`。模型治理应优先看任务 payload 和数据库字段,不靠 worker final response 自报。 +真实 `codex submit` 确认输出的 `queue` 是低噪声监督摘要:`queuedTaskIds.items` 必须强制包含本次新建且仍为 queued/retry_wait 的任务 ID;`activeTaskIds` 在主 server 控制面 `activeTaskIds=[]` 但 `counts.running/judging>0` 时必须回退到 PostgreSQL `databaseActiveTaskIds` 或执行诊断中的 active IDs;这些 ID 列表都只能作为带 `count/returned/omitted/truncated/source` 的有界预览,权威并发口径来自 `counts` 和 `countContext`。当预览没有展开所有 ID 时,`listPreviewPolicy` 必须明确说明 omitted counts 和 raw 查看命令,避免指挥侧误判 15-runner 目标。 运行态默认模型仍是 `gpt-5.5`。`CODE_QUEUE_MODELS` 当前长期合同至少包含 GPT-5.5、GPT-5.4、GPT-5.4 Mini、DeepSeek Chat 和 MiniMax M2.7;`deepseek`/`deepseek-chat` 与 `minimax-m2.7` 会走 OpenCode port,其余模型走 Codex port。只有当执行面 `/health` 或等价配置已经显示 DeepSeek 模型可用、并完成轻量 runner smoke 后,才允许真实提交 `--model deepseek-chat`。 diff --git a/scripts/code-queue-submit-summary-contract-test.ts b/scripts/code-queue-submit-summary-contract-test.ts new file mode 100644 index 00000000..14ce1aae --- /dev/null +++ b/scripts/code-queue-submit-summary-contract-test.ts @@ -0,0 +1,107 @@ +import { compactSubmitSuccessResponseForTest } from "./src/code-queue"; + +type JsonRecord = Record; + +function assertCondition(condition: unknown, message: string, detail: unknown = {}): void { + if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`); +} + +function asRecord(value: unknown): JsonRecord { + assertCondition(typeof value === "object" && value !== null && !Array.isArray(value), "expected JSON object", { value }); + return value as JsonRecord; +} + +function asArray(value: unknown): unknown[] { + assertCondition(Array.isArray(value), "expected JSON array", { value }); + return value as unknown[]; +} + +function task(id: string, status: string, queueId = "commander-efficiency"): JsonRecord { + return { + id, + queueId, + status, + prompt: `Focused submit summary contract for ${id}`, + displayPrompt: `Focused submit summary contract for ${id}`, + providerId: "D601", + model: "gpt-5.5", + currentAttempt: status === "queued" ? 0 : 1, + maxAttempts: 99, + createdAt: "2026-05-23T00:00:00.000Z", + updatedAt: "2026-05-23T00:00:00.000Z", + }; +} + +function manyIds(prefix: string, count: number): string[] { + return Array.from({ length: count }, (_, index) => `${prefix}-${String(index + 1).padStart(2, "0")}`); +} + +export function runCodeQueueSubmitSummaryContract(): JsonRecord { + const submittedId = "codex_submitted_queued"; + const activeIds = manyIds("codex_running", 18); + const response = compactSubmitSuccessResponseForTest({ + tasks: [task(submittedId, "queued")], + queue: { + counts: { running: 18, queued: 5, succeeded: 9 }, + activeTaskIds: [], + queuedTaskIds: { items: [], count: 5, returned: 0, truncated: true }, + databaseActiveTaskIds: activeIds, + databaseActiveTaskCount: activeIds.length, + executionDiagnostics: { + state: "healthy", + databaseActiveTaskIds: activeIds, + databaseActiveTaskCount: activeIds.length, + schedulerActiveRunSlotCount: 0, + schedulerActiveTaskIds: [], + heartbeatFreshTaskIds: activeIds, + }, + }, + }, { ok: true, status: 200 }, { mode: "local-atomic-directory-submit-serialization", acquiredAfterMs: 1, heldMs: 2, throttleMs: 2000 }); + + const data = asRecord(response); + const submitted = asRecord(data.submitted); + const submittedTasks = asArray(submitted.tasks); + const submittedTask = asRecord(submittedTasks[0]); + const queue = asRecord(data.queue); + const queuedTaskIds = asRecord(queue.queuedTaskIds); + const activeTaskIds = asRecord(queue.activeTaskIds); + const databaseActiveTaskIds = asRecord(queue.databaseActiveTaskIds); + const submittedTaskIds = asRecord(queue.submittedTaskIds); + const countContext = asRecord(queue.countContext); + const listPreviewPolicy = asRecord(queue.listPreviewPolicy); + const omittedCounts = asRecord(listPreviewPolicy.omittedCounts); + const responseJson = JSON.stringify(response); + + assertCondition(submittedTask.id === submittedId && submittedTask.status === "queued", "submit response should keep the newly queued task", submittedTask); + assertCondition(asArray(submittedTaskIds.items).includes(submittedId), "submittedTaskIds should expose the just-submitted id", submittedTaskIds); + assertCondition(asArray(queuedTaskIds.items).includes(submittedId), "queuedTaskIds preview should force-include the just-submitted queued task", queuedTaskIds); + assertCondition(queuedTaskIds.count === 5 && queuedTaskIds.returned === 1 && queuedTaskIds.omitted === 4, "queuedTaskIds should preserve aggregate queued count without dumping all ids", queuedTaskIds); + assertCondition(String(queuedTaskIds.source || "").includes("submittedTaskIds"), "queuedTaskIds source should explain submitted-task fallback", queuedTaskIds); + assertCondition(String(queuedTaskIds.note || "").includes("count remains authoritative"), "queuedTaskIds should explain aggregate-count fallback", queuedTaskIds); + + assertCondition(asArray(activeTaskIds.items).length === 15, "activeTaskIds preview should stay bounded", activeTaskIds); + assertCondition(activeTaskIds.count === 18 && activeTaskIds.omitted === 3 && activeTaskIds.truncated === true, "activeTaskIds should preserve active count and truncation", activeTaskIds); + assertCondition(String(activeTaskIds.source || "").includes("databaseActiveTaskIds"), "activeTaskIds should fall back to database active ids when upstream activeTaskIds is empty", activeTaskIds); + assertCondition(databaseActiveTaskIds.count === 18 && databaseActiveTaskIds.returned === 15, "databaseActiveTaskIds preview should preserve count context", databaseActiveTaskIds); + assertCondition(countContext.running === 18 && countContext.active === 18 && countContext.databaseActive === 18, "countContext should expose accurate active counts", countContext); + + assertCondition(listPreviewPolicy.bounded === true && listPreviewPolicy.countsAreAuthoritative === true, "list preview policy should document bounded low-noise output", listPreviewPolicy); + assertCondition(listPreviewPolicy.truncated === true && omittedCounts.activeTaskIds === 3 && omittedCounts.queuedTaskIds === 4, "list preview policy should disclose omitted counts", listPreviewPolicy); + assertCondition(String(listPreviewPolicy.note || "").includes("Low-noise mutation output omits"), "list preview policy should include a clear truncation note", listPreviewPolicy); + assertCondition(submitted.promptOmitted === true && !responseJson.includes("Focused submit summary contract"), "submit confirmation should not leak prompt text", response); + assertCondition(responseJson.length < 12_000, "submit confirmation should remain low-noise", { chars: responseJson.length }); + + return { + ok: true, + checks: [ + "newly queued submitted task is included in queuedTaskIds preview", + "running count context falls back to database active ids", + "bounded id previews disclose omitted counts", + "submit confirmation omits prompt text and remains low-noise", + ], + }; +} + +if (import.meta.main) { + process.stdout.write(`${JSON.stringify(runCodeQueueSubmitSummaryContract(), null, 2)}\n`); +} diff --git a/scripts/src/check.ts b/scripts/src/check.ts index 3efa2430..b666c1fb 100644 --- a/scripts/src/check.ts +++ b/scripts/src/check.ts @@ -34,6 +34,7 @@ const syntaxFiles = [ "scripts/code-queue-prompt-lint-contract-test.ts", "scripts/code-queue-cli-steer-test.ts", "scripts/code-queue-cli-submit-prompt-contract-test.ts", + "scripts/code-queue-submit-summary-contract-test.ts", "scripts/code-queue-cli-read-terminal-contract-test.ts", "scripts/code-queue-gh-auth-redaction-contract-test.ts", "scripts/microservice-health-output-contract-test.ts", @@ -315,6 +316,8 @@ export function runChecks(config: UniDeskConfig, options: CheckOptions = default fileItem("scripts/code-queue-prompt-lint-contract-test.ts"), fileItem("scripts/code-queue-cli-steer-test.ts"), fileItem("scripts/code-queue-cli-read-terminal-contract-test.ts"), + fileItem("scripts/code-queue-cli-submit-prompt-contract-test.ts"), + fileItem("scripts/code-queue-submit-summary-contract-test.ts"), fileItem("scripts/code-queue-submit-routing-contract-test.ts"), fileItem("scripts/code-queue-gh-auth-redaction-contract-test.ts"), fileItem("scripts/code-queue-supervisor-disclosure-contract-test.ts"), @@ -359,6 +362,7 @@ export function runChecks(config: UniDeskConfig, options: CheckOptions = default items.push(commandItem("code-queue:cli-steer-contract", ["bun", "scripts/code-queue-cli-steer-test.ts"], 30_000)); items.push(commandItem("code-queue:read-terminal-contract", ["bun", "scripts/code-queue-cli-read-terminal-contract-test.ts"], 30_000)); items.push(commandItem("code-queue:submit-prompt-contract", ["bun", "scripts/code-queue-cli-submit-prompt-contract-test.ts"], 30_000)); + items.push(commandItem("code-queue:submit-summary-contract", ["bun", "scripts/code-queue-submit-summary-contract-test.ts"], 30_000)); items.push(commandItem("code-queue:submit-routing-contract", ["bun", "scripts/code-queue-submit-routing-contract-test.ts"], 30_000)); items.push(commandItem("code-queue:gh-auth-redaction-contract", ["bun", "scripts/code-queue-gh-auth-redaction-contract-test.ts"], 30_000)); items.push(commandItem("code-queue:supervisor-disclosure-contract", ["bun", "scripts/code-queue-supervisor-disclosure-contract-test.ts"], 30_000)); @@ -392,6 +396,7 @@ export function runChecks(config: UniDeskConfig, options: CheckOptions = default items.push(skippedItem("code-queue:cli-steer-contract", "Code Queue steer CLI contract is opt-in with script checks", "--scripts-typecheck or --full")); items.push(skippedItem("code-queue:read-terminal-contract", "Code Queue terminal read contract is opt-in with script checks", "--scripts-typecheck or --full")); items.push(skippedItem("code-queue:submit-prompt-contract", "Code Queue submit prompt contract is opt-in with script checks", "--scripts-typecheck or --full")); + items.push(skippedItem("code-queue:submit-summary-contract", "Code Queue submit summary contract is opt-in with script checks", "--scripts-typecheck or --full")); items.push(skippedItem("code-queue:submit-routing-contract", "Code Queue submit routing contract is opt-in with script checks", "--scripts-typecheck or --full")); items.push(skippedItem("code-queue:gh-auth-redaction-contract", "Code Queue GitHub auth output redaction contract is opt-in with script checks", "--scripts-typecheck or --full")); items.push(skippedItem("code-queue:supervisor-disclosure-contract", "Code Queue supervisor disclosure contract is opt-in with script checks", "--scripts-typecheck or --full")); diff --git a/scripts/src/code-queue.ts b/scripts/src/code-queue.ts index 2f180830..5bd9e3f4 100644 --- a/scripts/src/code-queue.ts +++ b/scripts/src/code-queue.ts @@ -22,6 +22,7 @@ const supervisorBodyPreviewChars = 70; const supervisorRecentBodyPreviewChars = 50; const diagnosticsIdPreviewLimit = 3; const diagnosticsReasonPreviewLimit = 2; +const mutationQueueIdPreviewLimit = 15; const steerPromptPreviewChars = 320; const detailAttemptReturnedLimit = 3; const detailInitialPromptPreviewChars = 1200; @@ -253,6 +254,11 @@ interface CompactTaskMutationResponseOptions { fullPrompt?: boolean; } +interface CompactSubmitQueueConfirmationOptions { + submittedTasks?: Record[]; + idPreviewLimit?: number; +} + interface CodexTasksOptions { queueId: string | undefined; requestedLimit: number; @@ -3658,17 +3664,187 @@ function compactSubmitTaskConfirmation(task: unknown): Record { }; } -function compactSubmitQueueConfirmation(value: unknown): Record | null { +function orderedUniqueStringList(values: string[]): string[] { + const seen = new Set(); + const items: string[] = []; + for (const value of values) { + if (seen.has(value)) continue; + seen.add(value); + items.push(value); + } + return items; +} + +function compactIdPreview(knownIds: string[], totalCount: number, limit: number, source: string, note: string | null = null): Record { + const all = orderedUniqueStringList(knownIds); + const count = Math.max(0, totalCount, all.length); + const items = all.slice(0, limit); + const omitted = Math.max(0, count - items.length); + return { + items, + count, + returned: items.length, + omitted, + truncated: omitted > 0 || all.length > items.length, + source, + ...(note === null ? {} : { note }), + }; +} + +function idPreviewInputItems(value: unknown): string[] { + const record = asRecord(value); + return stringList(record?.items ?? value); +} + +function idPreviewInputCount(value: unknown): number { + const record = asRecord(value); + const explicit = asNumber(record?.count, Number.NaN); + if (Number.isFinite(explicit)) return explicit; + return idPreviewInputItems(value).length; +} + +function countForStatus(counts: Record, status: string): number { + return asNumber(counts[status], 0); +} + +function maxFiniteNumber(values: number[]): number { + const finite = values.filter((value) => Number.isFinite(value)); + return finite.length === 0 ? 0 : Math.max(...finite); +} + +function taskIdsForStatuses(tasks: Record[], statuses: Set | null): string[] { + return tasks.flatMap((task) => { + const id = asString(task.id); + if (id.length === 0) return []; + if (statuses !== null && !statuses.has(asString(task.status))) return []; + return [id]; + }); +} + +function previewSource(parts: string[], fallback: string): string { + const unique = orderedUniqueStringList(parts.filter((part) => part.length > 0)); + return unique.length > 0 ? unique.join("+") : fallback; +} + +function compactSubmitQueueConfirmation(value: unknown, options: CompactSubmitQueueConfirmationOptions = {}): Record | null { const record = asRecord(value); if (record === null) return null; + const counts = asRecord(record.counts) ?? {}; + const diagnosticsRecord = asRecord(record.executionDiagnostics) ?? {}; + const submittedTasks = options.submittedTasks ?? []; + const idPreviewLimit = Math.max(1, Math.min(options.idPreviewLimit ?? mutationQueueIdPreviewLimit, maxTasksLimit)); + const submittedTaskIds = taskIdsForStatuses(submittedTasks, null); + const submittedQueuedTaskIds = taskIdsForStatuses(submittedTasks, new Set(["queued", "retry_wait"])); + const submittedActiveTaskIds = taskIdsForStatuses(submittedTasks, new Set(["running", "judging"])); + + const upstreamQueuedTaskIds = idPreviewInputItems(record.queuedTaskIds); + const queuedKnownIds = orderedUniqueStringList([...submittedQueuedTaskIds, ...upstreamQueuedTaskIds]); + const queuedStatusCount = countForStatus(counts, "queued") + countForStatus(counts, "retry_wait"); + const queuedCount = Math.max(queuedKnownIds.length, idPreviewInputCount(record.queuedTaskIds), queuedStatusCount); + const queuedPreview = compactIdPreview( + queuedKnownIds, + queuedCount, + idPreviewLimit, + previewSource([ + submittedQueuedTaskIds.length > 0 ? "submittedTaskIds" : "", + upstreamQueuedTaskIds.length > 0 ? "upstreamQueuedTaskIds" : "", + queuedKnownIds.length === 0 && queuedCount > 0 ? "aggregateCountsOnly" : "", + ], "none"), + queuedCount > queuedKnownIds.length ? "Upstream did not enumerate every queued id in this low-noise mutation response; count remains authoritative." : null, + ); + + const upstreamActiveTaskIds = idPreviewInputItems(record.activeTaskIds); + const databaseActiveTaskIds = idPreviewInputItems(record.databaseActiveTaskIds); + const diagnosticsDatabaseActiveTaskIds = idPreviewInputItems(diagnosticsRecord.databaseActiveTaskIds); + const diagnosticsHeartbeatTaskIds = idPreviewInputItems(diagnosticsRecord.heartbeatFreshTaskIds); + const activeKnownIds = orderedUniqueStringList([ + ...submittedActiveTaskIds, + ...upstreamActiveTaskIds, + ...databaseActiveTaskIds, + ...diagnosticsDatabaseActiveTaskIds, + ...diagnosticsHeartbeatTaskIds, + ]); + const statusActiveCount = countForStatus(counts, "running") + countForStatus(counts, "judging"); + const databaseActiveTaskCount = maxFiniteNumber([ + databaseActiveTaskIds.length, + diagnosticsDatabaseActiveTaskIds.length, + asNumber(record.databaseActiveTaskCount, Number.NaN), + asNumber(diagnosticsRecord.databaseActiveTaskCount, Number.NaN), + ]); + const activeCount = maxFiniteNumber([ + activeKnownIds.length, + idPreviewInputCount(record.activeTaskIds), + databaseActiveTaskCount, + statusActiveCount, + ]); + const activePreview = compactIdPreview( + activeKnownIds, + activeCount, + idPreviewLimit, + previewSource([ + submittedActiveTaskIds.length > 0 ? "submittedTaskIds" : "", + upstreamActiveTaskIds.length > 0 ? "upstreamActiveTaskIds" : "", + databaseActiveTaskIds.length > 0 ? "databaseActiveTaskIds" : "", + diagnosticsDatabaseActiveTaskIds.length > 0 ? "executionDiagnostics.databaseActiveTaskIds" : "", + diagnosticsHeartbeatTaskIds.length > 0 ? "executionDiagnostics.heartbeatFreshTaskIds" : "", + activeKnownIds.length === 0 && activeCount > 0 ? "aggregateCountsOnly" : "", + ], "none"), + activeCount > activeKnownIds.length ? "Upstream only exposed aggregate active counts for part of the running set; count remains authoritative." : null, + ); + const databaseActivePreview = compactIdPreview( + orderedUniqueStringList([...databaseActiveTaskIds, ...diagnosticsDatabaseActiveTaskIds]), + maxFiniteNumber([databaseActiveTaskCount, idPreviewInputCount(record.databaseActiveTaskIds), idPreviewInputCount(diagnosticsRecord.databaseActiveTaskIds)]), + idPreviewLimit, + previewSource([ + databaseActiveTaskIds.length > 0 ? "databaseActiveTaskIds" : "", + diagnosticsDatabaseActiveTaskIds.length > 0 ? "executionDiagnostics.databaseActiveTaskIds" : "", + databaseActiveTaskCount > 0 && databaseActiveTaskIds.length === 0 && diagnosticsDatabaseActiveTaskIds.length === 0 ? "aggregateCountsOnly" : "", + ], "none"), + ); + const submittedPreview = compactIdPreview(submittedTaskIds, submittedTaskIds.length, idPreviewLimit, "response.tasks"); + const omittedCounts = { + activeTaskIds: asNumber(activePreview.omitted, 0), + databaseActiveTaskIds: asNumber(databaseActivePreview.omitted, 0), + queuedTaskIds: asNumber(queuedPreview.omitted, 0), + submittedTaskIds: asNumber(submittedPreview.omitted, 0), + }; + const listsTruncated = Object.values(omittedCounts).some((count) => count > 0) + || activePreview.truncated === true + || databaseActivePreview.truncated === true + || queuedPreview.truncated === true + || submittedPreview.truncated === true; return { total: record.total ?? null, queueCount: record.queueCount ?? null, counts: record.counts ?? null, activeQueueIds: boundedUniqueStringList(record.activeQueueIds, 8), - activeTaskIds: boundedUniqueStringList(record.activeTaskIds ?? record.databaseActiveTaskIds, 8), - queuedTaskIds: boundedUniqueStringList(record.queuedTaskIds, 8), + activeTaskIds: activePreview, + databaseActiveTaskCount, + databaseActiveTaskIds: databaseActivePreview, + queuedTaskIds: queuedPreview, executionDiagnostics: compactQueueExecutionDiagnostics(record.executionDiagnostics), + ...(submittedTasks.length === 0 ? {} : { submittedTaskIds: submittedPreview }), + countContext: { + queued: countForStatus(counts, "queued"), + retryWait: countForStatus(counts, "retry_wait"), + running: countForStatus(counts, "running"), + judging: countForStatus(counts, "judging"), + active: statusActiveCount, + databaseActive: databaseActiveTaskCount, + submitted: submittedTaskIds.length, + }, + listPreviewPolicy: { + bounded: true, + idPreviewLimit, + countsAreAuthoritative: true, + truncated: listsTruncated, + omittedCounts, + note: listsTruncated + ? "Low-noise mutation output omits additional task ids from one or more previews; use the raw command for full upstream queue detail." + : "Low-noise mutation output includes all known task ids returned for these previews.", + rawCommand: "bun scripts/cli.ts microservice proxy code-queue /api/tasks/overview?limit=30 --raw --full", + }, + byQueue: Array.isArray(record.byQueue) ? record.byQueue : undefined, }; } @@ -3683,7 +3859,9 @@ function compactSubmitConcurrencyGuard(value: Record): Record, upstream: Record, lock: Record): Record { - const allTasks = asArray(body.tasks).map(compactSubmitTaskConfirmation); + const rawTasks = asArray(body.tasks); + const submittedTasks = rawTasks.map((task) => asRecord(task)).filter((task): task is Record => task !== null); + const allTasks = rawTasks.map(compactSubmitTaskConfirmation); const tasks = allTasks.slice(0, defaultTasksLimit); const allTaskIds = allTasks.map((task) => asString(task.id)).filter(Boolean); const taskIds = allTaskIds.slice(0, defaultTasksLimit); @@ -3710,7 +3888,7 @@ function compactSubmitSuccessResponse(body: Record, upstream: R reason: "codex submit is a write operation; default output confirms persistence and provides drill-down commands without echoing prompt text.", }, }, - queue: compactSubmitQueueConfirmation(body.queue), + queue: compactSubmitQueueConfirmation(body.queue, { submittedTasks }), submitConcurrencyGuard: compactSubmitConcurrencyGuard(lock), commands: { firstTask: firstTaskId === null ? null : `bun scripts/cli.ts codex task ${firstTaskId}`,