fix: observe code queue execution plane via tran
This commit is contained in:
@@ -107,7 +107,7 @@ UniDesk 是一个以主 server 为统一入口的分布式工作平台;本文
|
||||
- `bun scripts/cli.ts hwlab cd audit --env dev` / `status|preflight|apply --dry-run`:旧 D601 HWLAB DEV CD 指挥侧 wrapper,仅用于显式 legacy 诊断和迁移对照;当前 HWLAB DEV/PROD source/runtime truth 已迁到 G14 `/root/hwlab` 与 G14 k3s/GitOps,规则见 `docs/reference/hwlab.md`。
|
||||
- `bun scripts/cli.ts ci install/status/run/publish-backend-core/publish-user-service/run-dev-e2e/logs`:在 D601 原生 k3s 上安装和运行 Tekton CI,支持每 commit 检查、Code Queue 只读性能门禁、`CI.json` catalog 驱动的 backend-core 与 user-service commit-pinned 镜像发布和手动触发的 `origin/master:deploy.json#environments.dev` 临时 namespace e2e;catalog/producer/consumer 分工见 `docs/reference/cicd-standardization.md`,`run-dev-e2e` 的 Git 控制 runner、短 launcher 和 no-CD 边界见 `docs/reference/dev-ci-runner.md`,Tekton 规则见 `docs/reference/ci.md`。
|
||||
- `bun scripts/cli.ts codex deploy <commitId>`:旧 Code Queue 兼容部署入口已禁用,原因是它会绕过受控部署边界直连 D601 部署 Code Queue;规则见 `docs/reference/codex-deploy.md`。
|
||||
- `bun scripts/cli.ts codex prompt-lint [prompt|--prompt-file path|--prompt-stdin]` / `codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue <id>]` / `codex execution-plane [--full|--raw]` / `codex pr-preflight [--remote]`:`prompt-lint` 在派发/steer 前 dry-run 检查 runner prompt 的 DEV 测试授权分级(`read-only`/`live-read`/`live-mutating`)且不回显 prompt;`submit --dry-run` 同时给出 MiniMax/GPT/人工路由建议、该 lint 结果和 requested/effective execution mode;真实提交成功只返回写入确认、task id、服务级 runnerPermissions 和后续查看命令,不回显 prompt;`execution-plane` 只读比较 D601 原生 k3s 正式 Code Queue 执行面、旧 Compose 残留、commit/digest/worktree/probe drift;`pr-preflight` 只读检查 D601 scheduler/runner 的 GitHub token、egress 和 PR 能力,PR 型派单前必须使用,规则见 `docs/reference/cli.md` 和 `docs/reference/code-queue-supervision.md`。
|
||||
- `bun scripts/cli.ts codex prompt-lint [prompt|--prompt-file path|--prompt-stdin]` / `codex submit [prompt] [--prompt-file path|--prompt-stdin] [--queue <id>]` / `codex execution-plane [--full|--raw]` / `codex pr-preflight [--remote]`:`prompt-lint` 在派发/steer 前 dry-run 检查 runner prompt 的 DEV 测试授权分级(`read-only`/`live-read`/`live-mutating`)且不回显 prompt;`submit --dry-run` 同时给出 MiniMax/GPT/人工路由建议、该 lint 结果和 requested/effective execution mode;真实提交成功只返回写入确认、task id、服务级 runnerPermissions 和后续查看命令,不回显 prompt;`execution-plane` 通过 `tran D601:k3s` 只读比较 D601 原生 k3s 正式 Code Queue 执行面、旧 Compose 残留、commit/digest/worktree/probe drift;`pr-preflight` 只读检查 D601 scheduler/runner 的 GitHub token、egress 和 PR 能力,PR 型派单前必须使用,规则见 `docs/reference/cli.md` 和 `docs/reference/code-queue-supervision.md`。
|
||||
- `bun scripts/cli.ts codex task <taskId>`:按 Code Queue 任务 ID 查询默认审阅摘要,只返回原始 prompt、最终 response、最后错误和渐进披露命令;`codex tasks --view commander` 是 host commander 推荐轮询入口,默认有界显示 active runner 精确计数、queued/retry_wait、terminal-unread、active 风险、分类和 drill-down 命令;`--view supervisor|full`、`codex output` 和大 `--limit` 仍默认有界,完整内容需显式 `--full`/`--full-text`/分页展开;`codex queues [--full] [--limit N] [--page N|--offset N]` 默认分页低噪声输出队列摘要,完整 upstream 只通过 raw command 显式获取。
|
||||
- `bun scripts/cli.ts codex unread [--repo owner/name] [--issue N] [--limit N]`:只读汇总完成未读积压并给出 repo/issue/status/queue 计数和 drill-down/read 命令;批量已读必须显式 `codex unread mark-read ... --confirm`,规则见 `docs/reference/cli.md`。
|
||||
- `bun scripts/cli.ts codex judge <taskId> --attempt <n> [--dry-run]`:按指定 task/attempt 用与队列 worker 相同的上下文构建和 MiniMax judge 调用路径单步复现完成判定;`--dry-run` 只输出 prompt/payload 诊断。
|
||||
|
||||
@@ -280,7 +280,7 @@ replacement runner 只用于方向明显错误、质量不可接受、原 task
|
||||
- `bun scripts/cli.ts codex tasks --view commander --limit N`:host commander 轮询的推荐入口。输出是有界 action map,必须直接显示 `activeRunners.count`、计数来源、split-brain/heartbeat 处置、queued/retry_wait 精确计数、terminal-unread 总数和已省略行数、active 风险数、stale/heartbeat/trace gap、`finalResponse` 已出现但仍非终态的 awaiting terminal/judge、blocker-like final response、HWLAB#7/#99/#116/#164/#317 与 UniDesk#20/#118 命中、任务分类和下一步 drill-down 命令。默认不得输出完整 prompt、完整 final response、raw output、完整 trace 或 raw overview;需要详情只能按 task id 使用 `codex task`、`codex task --trace`、`codex output`、`codex read` 或 `rawOverview` 命令渐进展开。
|
||||
- `bun scripts/cli.ts codex tasks --view supervisor --limit N`:查看默认低噪声监督视图,包括 `activeRunning`、running、完成未读、少量最近完成、queued/runnable、activity、commanderConcurrency、execution diagnostics、任务分类和下一步 drill-down 命令。默认行只保留 task id、队列、短 prompt/body 预览和原始字符数;`--limit` 是扫描/分页预算,不是返回几十条肥行的开关,CLI effective limit 安全上限为 100,输出必须用 `filters.requestedLimit`、`filters.effectiveLimit`、`filters.limitCapped`、`source.requestedLimit` 和 `source.effectiveLimit` 区分用户请求、CLI cap 和 overview 源拉取预算;例如 `--limit 260` 应明确显示 requested=260、effective=100、source=200,`running.returned` 只是低噪声返回行数。`show/detail/trace/output/full/read` 放在 section template 中,避免每条任务重复刷屏,需要更多内容再按 taskId 展开。刚执行 `codex submit` 后也可以先读 submit 返回的 `submitted.taskStates[]`、`queue.countContext`、`queue.activity.effectiveActiveTaskCount` 和 `queue.stateDisclosure`;若某个 id preview 有 `idsUnavailable=true`,不要把它当成空队列,按 `queue.listPreviewPolicy.rawCommand` 或本 supervisor 命令继续查。
|
||||
- `bun scripts/cli.ts codex queues`:默认是 commander-first 队列态势摘要,`--commander` 是显式同义开关。输出前部固定使用 `.data.queues.commander`,先给出 `activeRunnerCount`、`source`、`target=15`、`slotDeficit`、`queuedCount`、`runningTasks`、`heartbeat.fresh`、`heartbeat.risk`、`heartbeat.staleRecoveryCandidates`、active/runnable queue 小页和 drill-down 命令;历史 queue item 列表保留在 `.data.queues.items[]`,但只是分页的次要行。需要完整队列行视图时加 `--full`,但 `--full` 仍默认分页,继续用 `--limit N`、`--page N` 或 `--offset N` 渐进展开。summary 和 full 都使用稳定 JSON path `.data.queues.items[]` 读取队列行,并从 `.data.queues.commander`、`.data.queues.commanderConcurrency`、`.data.queues.activity`、`.data.queues.counts` 与 `.data.queues.executionDiagnostics` 读取全局活跃计数和执行诊断;完整 upstream 只通过输出中的 raw command 显式获取。若 `/api/queues` 没有返回 task row,`runningTasks.items[].name` 会是 `null` 且 `nameSource=not-returned-by-api-queues`,此时按返回的 `codex task <taskId>` 或 supervisor 命令展开,不要假设任务没有名称。
|
||||
- `bun scripts/cli.ts codex execution-plane [--full|--raw]`:只读巡检 D601 原生 k3s `unidesk` namespace 下的正式执行面。该命令强制使用 `KUBECONFIG=/etc/rancher/k3s/k3s.yaml` 并确认 node `d601`,默认低噪声返回 `summary.formalExecutionPlane`、`summary.deploymentDrift`、`summary.deprecatedComposeResidual`、`executionPlane.deployments[]`、`drift.status`、`residual.status` 和 `judgeProbe.behaviorVersion`。它比较三类 Deployment:`code-queue` 必须是 scheduler,`code-queue-read` 必须是 read,`code-queue-write` 必须是 write;同时比较 deployment env/annotation commit、Pod `imageID` digest、宿主 `/home/ubuntu/cq-deploy` HEAD、以及 `/api/judge/probe` 的 `behaviorVersion=code-queue-judge-probe:v1`。任何 commit/digest/worktree/probe 不一致或缺少可比 marker 都必须输出 `deployment-drift`,不能写成 healthy。检测到旧 Docker Compose `code-queue-backend` 或旧 `127.0.0.1:4222` 监听时必须输出 `deprecated-compose-residual`。默认不打印完整 Kubernetes Deployment JSON、环境变量全集、SecretRef 值、judge probe 原始结果或命令 stdout;需要逐项展开时使用 `--full`,需要安全裁剪后的原始观察对象时使用 `--raw`。
|
||||
- `bun scripts/cli.ts codex execution-plane [--full|--raw]`:只读巡检 D601 原生 k3s `unidesk` namespace 下的正式执行面。该命令的 live collector 必须通过 UniDesk `tran`/`ssh` 维护桥访问 `D601:k3s` 和 `D601:/home/ubuntu/cq-deploy`,不得在 master server 本地调用 `kubectl`、读取本地 worktree 或把 master server 的工具缺失误报成 D601 阻塞。该命令强制使用 `KUBECONFIG=/etc/rancher/k3s/k3s.yaml` 并确认 node `d601`,默认低噪声返回 `summary.formalExecutionPlane`、`summary.deploymentDrift`、`summary.deprecatedComposeResidual`、`executionPlane.deployments[]`、`drift.status`、`residual.status` 和 `judgeProbe.behaviorVersion`。它比较三类 Deployment:`code-queue` 必须是 scheduler,`code-queue-read` 必须是 read,`code-queue-write` 必须是 write;同时比较 deployment env/annotation commit、Pod `imageID` digest、宿主 `/home/ubuntu/cq-deploy` HEAD、以及 `/api/judge/probe` 的 `behaviorVersion=code-queue-judge-probe:v1`。任何 commit/digest/worktree/probe 不一致或缺少可比 marker 都必须输出 `deployment-drift`,不能写成 healthy。检测到 D601 上旧 Docker Compose `code-queue-backend` 或旧 `127.0.0.1:4222` 监听时必须输出 `deprecated-compose-residual`。默认不打印完整 Kubernetes Deployment JSON、环境变量全集、SecretRef 值、judge probe 原始结果或命令 stdout;需要逐项展开时使用 `--full`,需要安全裁剪后的原始观察对象时使用 `--raw`。
|
||||
- `bun scripts/cli.ts codex unread --limit N`:查看完成未读审阅积压的默认 triage,按 repo、issue、status 和 queue 汇总,并给出有界最新任务紧凑行;默认行只包含 task id、状态、queue、issues、updatedAt/finishedAt 和一条 `nextStep`,不重复每任务 `show/detail/trace/output/read` 命令,也不输出 raw prompt、final response、trace 或 output。完整 per-task 命令必须显式使用 `codex unread --full`、`codex unread --view full`、`codex unread list` 或单任务 `codex task <taskId>`/`codex read <taskId>` 展开;默认输出必须保留一次性的模板命令和分页命令。
|
||||
- `bun scripts/cli.ts codex unread mark-read --repo owner/name --issue N --limit N --confirm`:批量已读入口,必须显式 `mark-read` 和 `--confirm`,否则结构化失败且不 POST `/read`。
|
||||
- `bun scripts/cli.ts codex tasks --unread --limit N`:兼容查看完成未读审阅积压;`--unread` 与 `--unread-only` 等价,不能被静默忽略。
|
||||
|
||||
@@ -3,6 +3,7 @@ import {
|
||||
runCodeQueueExecutionPlaneForTest,
|
||||
type CodeQueueExecutionPlaneObservation,
|
||||
} from "./src/code-queue-execution-plane";
|
||||
import { readFileSync } from "node:fs";
|
||||
|
||||
type JsonRecord = Record<string, unknown>;
|
||||
|
||||
@@ -175,12 +176,21 @@ async function checkProgressiveDisclosure(): Promise<void> {
|
||||
assertCondition("details" in raw && "rawObservation" in raw, "--raw should include details and raw observation", raw);
|
||||
}
|
||||
|
||||
async function checkLiveCollectorUsesD601TranTransport(): Promise<void> {
|
||||
const source = readFileSync(new URL("./src/code-queue-execution-plane.ts", import.meta.url), "utf8");
|
||||
assertCondition(source.includes('["D601:k3s", "kubectl", ...args]'), "live collector should observe k3s through D601 tran route, not local kubectl");
|
||||
assertCondition(source.includes('`D601:${options.worktreePath}`'), "worktree observation should run on D601 workspace route");
|
||||
assertCondition(!source.includes('runCommand(["kubectl", ...args]'), "live collector must not call local kubectl directly");
|
||||
assertCondition(!source.includes('runCommand(["git", "-C", options.worktreePath'), "worktree observation must not read local filesystem");
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const checks = [
|
||||
["code-queue:execution-plane-healthy-no-drift", checkHealthyNoDrift],
|
||||
["code-queue:execution-plane-deployment-drift", checkDeploymentDrift],
|
||||
["code-queue:execution-plane-deprecated-compose-residual", checkDeprecatedComposeResidual],
|
||||
["code-queue:execution-plane-progressive-disclosure", checkProgressiveDisclosure],
|
||||
["code-queue:execution-plane-d601-tran-transport", checkLiveCollectorUsesD601TranTransport],
|
||||
] as const;
|
||||
const results = [];
|
||||
for (const [name, check] of checks) {
|
||||
@@ -193,4 +203,3 @@ async function main(): Promise<void> {
|
||||
if (import.meta.main) {
|
||||
await main();
|
||||
}
|
||||
|
||||
|
||||
@@ -248,6 +248,13 @@ function commandProbe(result: CommandResult): ProbeResult {
|
||||
};
|
||||
}
|
||||
|
||||
function runTran(args: string[], options: ExecutionPlaneOptions, timeoutMs = options.timeoutMs): ProbeResult {
|
||||
return commandProbe(runCommand(["./scripts/tran", ...args], repoRoot, {
|
||||
timeoutMs,
|
||||
env: process.env,
|
||||
}));
|
||||
}
|
||||
|
||||
function safeError(probe: ProbeResult): string | null {
|
||||
if (probe.ok) return null;
|
||||
const text = firstLine(probe.stderr) ?? firstLine(probe.stdout);
|
||||
@@ -255,10 +262,7 @@ function safeError(probe: ProbeResult): string | null {
|
||||
}
|
||||
|
||||
function runKubectl(args: string[], options: ExecutionPlaneOptions): ProbeResult {
|
||||
return commandProbe(runCommand(["kubectl", ...args], repoRoot, {
|
||||
timeoutMs: options.timeoutMs,
|
||||
env: { ...process.env, KUBECONFIG: options.kubeconfig },
|
||||
}));
|
||||
return runTran(["D601:k3s", "kubectl", ...args], options);
|
||||
}
|
||||
|
||||
function collectGuard(options: ExecutionPlaneOptions): { guard: D601K3sGuardClassification; diagnostics: Record<string, unknown> } {
|
||||
@@ -436,7 +440,7 @@ function collectServices(options: ExecutionPlaneOptions): ServiceObservation[] {
|
||||
}
|
||||
|
||||
function collectWorktree(options: ExecutionPlaneOptions): WorktreeObservation {
|
||||
const probe = commandProbe(runCommand(["git", "-C", options.worktreePath, "rev-parse", "HEAD"], repoRoot, { timeoutMs: 5_000 }));
|
||||
const probe = runTran([`D601:${options.worktreePath}`, "argv", "git", "rev-parse", "HEAD"], options, 10_000);
|
||||
return {
|
||||
path: options.worktreePath,
|
||||
ok: probe.ok,
|
||||
@@ -446,7 +450,16 @@ function collectWorktree(options: ExecutionPlaneOptions): WorktreeObservation {
|
||||
}
|
||||
|
||||
function collectResidual(): ResidualObservation {
|
||||
const docker = commandProbe(runCommand(["docker", "ps", "-a", "--filter", "name=code-queue-backend", "--format", "{{.Names}}\t{{.Status}}\t{{.Image}}"], repoRoot, { timeoutMs: 8_000 }));
|
||||
const remoteOptions: ExecutionPlaneOptions = {
|
||||
namespace: expectedNamespace,
|
||||
kubeconfig: d601NativeKubeconfig,
|
||||
worktreePath: expectedWorktreePath,
|
||||
full: false,
|
||||
raw: false,
|
||||
skipProbe: true,
|
||||
timeoutMs: 15_000,
|
||||
};
|
||||
const docker = runTran(["D601", "argv", "docker", "ps", "-a", "--filter", "name=code-queue-backend", "--format", "{{.Names}}\t{{.Status}}\t{{.Image}}"], remoteOptions, 15_000);
|
||||
const containers = docker.ok
|
||||
? lines(docker.stdout).map((line) => {
|
||||
const fields = line.split("\t");
|
||||
@@ -454,7 +467,7 @@ function collectResidual(): ResidualObservation {
|
||||
}).filter((item) => item.name === "code-queue-backend" || item.name.includes("code-queue-backend"))
|
||||
: [];
|
||||
|
||||
const ss = commandProbe(runCommand(["ss", "-H", "-ltnp"], repoRoot, { timeoutMs: 8_000 }));
|
||||
const ss = runTran(["D601", "argv", "ss", "-H", "-ltnp"], remoteOptions, 15_000);
|
||||
const listeners = ss.ok
|
||||
? (lines(ss.stdout)
|
||||
.filter((line) => line.includes(":4222"))
|
||||
|
||||
Reference in New Issue
Block a user