From 790e5df28161dabbef014352c36e3e2f35cdb6ba Mon Sep 17 00:00:00 2001 From: Lyon <88232613+pikasTech@users.noreply.github.com> Date: Thu, 25 Jun 2026 08:44:30 +0800 Subject: [PATCH] feat: add AgentRun cancel lifecycle policy (#859) Co-authored-by: Codex --- .agents/skills/unidesk-code-queue/SKILL.md | 4 +- config/agentrun.yaml | 30 ++++ docs/reference/cli.md | 1 + .../specs/PJ2026-010201-agentrun-core.md | 39 +++++ .../specs/PJ2026-010203-queue-session.md | 15 ++ .../specs/PJ2026-010603-yaml-first-ops.md | 8 +- .../PJ2026-010605-observability-monitoring.md | 6 +- scripts/src/agentrun-lanes.ts | 45 ++++++ scripts/src/agentrun-manifests.ts | 8 ++ scripts/src/agentrun.ts | 135 +++++++++++++++++- 10 files changed, 278 insertions(+), 13 deletions(-) diff --git a/.agents/skills/unidesk-code-queue/SKILL.md b/.agents/skills/unidesk-code-queue/SKILL.md index d8052945..2e93d0c8 100644 --- a/.agents/skills/unidesk-code-queue/SKILL.md +++ b/.agents/skills/unidesk-code-queue/SKILL.md @@ -72,7 +72,7 @@ AgentRun queue 生命周期不是一个单独的 `queue lifecycle` 命令,而 1. 默认总览用 `get tasks --queue commander --limit 20`,只看 task state、queue/lane、run/cmd/rjob/session ref、age 和 attention。 2. 单任务用 `describe task/`,读取 `latestAttempt.runId`、`commandId`、`runnerJobId`、`sessionId/sessionPath` 和少量 `Next:`。 3. Run 级状态用 `events run/` 和 `result run/ --command `,判断 terminalClassification、failureKind、provider interruption、timeoutBudget 和 recoveryActions。 -4. Command 级状态用 `describe command/ --run ` 和 `result command/ --run `,确认 command state、ack、terminal status 和结果摘要;确认为单个 active command 卡住时,用 `cancel command/ --run --reason ` 清理该 command,保留同一个 session 后再用 `send session/` 续跑。 +4. Command 级状态用 `describe command/ --run ` 和 `result command/ --run `,确认 command state、ack、terminal status 和结果摘要;确认为单个 active command 卡住时,先用 `cancel command/ --run --reason --dry-run` 核对 `CancelLifecycle` 的 authority、cascade、runner abort 和 fencing,再去掉 `--dry-run` 清理该 command,保留同一个 session 后再用 `send session/` 续跑。 5. Runner job 只读状态用 `describe runnerjob/ --run `,确认 env image reuse、jobName、namespace、phase、exitCode、retention 和 `valuesPrinted=false`。不要为了这些字段手动调用 `trans G14:k3s kubectl ...`。 6. Runtime runner Job/Pod retention 或 operator 明确要求强杀 runner 时,不属于单个 task/session 资源原语;使用 `bun scripts/cli.ts agentrun control-plane cleanup-runners --node --lane [--force-active] --dry-run|--confirm`。普通 cleanup 只删 inactive selected runner;`--force-active` 会中断 active run/command/session,必须先 dry-run 确认 selection,并且仍应优先于裸 `kubectl delete pod/job`。 7. Session trace/output 只在 `describe task` 或 result 里有实际 `sessionId` 时使用 `logs|ack|send|cancel session/`;`sessionRef=null` 时不要猜 session 命令。用户级 follow-up 一律使用 `send session/`,不要回到旧 `turn/steer` 或 `sessions ...` 兼容路径。 @@ -80,6 +80,8 @@ AgentRun queue 生命周期不是一个单独的 `queue lifecycle` 命令,而 默认视图必须低噪声且不是 JSON envelope,`-o json|yaml` 才输出稳定机器结构,`--raw` 才保留直连 AgentRun REST envelope;命令返回里的下一步应优先是 `bun scripts/cli.ts agentrun ...` 资源原语,不得把人工 k8s 查询作为日常下一步。 +AgentRun cancel 策略由 `config/agentrun.yaml` 的 lane 级 `deployment.runner.cancelLifecycle` 管理;操作 D601、G14 或其他非默认 lane 时必须带 `--node/--lane --dry-run` 先确认 YAML policy,不要依赖全局默认或手动 k8s 强杀来替代资源原语。 + ## HWLAB Code Agent 入口整合 HWLAB Code Agent / CaseRun follow-up 的日常派单也归入 AgentRun 资源原语:新任务用 `create task --aipod Artificer` 或包含 HWLAB gitbundle 的 `apply -f -`;运行中纠偏用 `send session/ --aipod Artificer`。需要验证 HWLAB Web/Cloud API 原入口时,仍按 `$hwlab-code-agent` 使用 G14 `/root/hwlab-v02` 的 `hwlab-cli client agent ...` 拉取同一 trace/result/inspect;不要回到旧 `codex submit/resume/steer`。 diff --git a/config/agentrun.yaml b/config/agentrun.yaml index df9351a4..cbf99d5b 100644 --- a/config/agentrun.yaml +++ b/config/agentrun.yaml @@ -174,6 +174,21 @@ controlPlane: ageBasedCleanup: enabled: false maxAgeHours: 48 + cancelLifecycle: + deliveryMode: manager-epoch + gracefulAbortMs: 15000 + killEscalationMs: 30000 + staleHeartbeatFencingMs: 900000 + lateWriteFencing: + enabled: true + eventStages: + - accepted + - persisted + - delivered + - aborting + - terminalized + - fenced + - late-write-rejected localPostgres: enabled: true serviceName: agentrun-v01-postgres @@ -361,6 +376,21 @@ controlPlane: ageBasedCleanup: enabled: false maxAgeHours: 48 + cancelLifecycle: + deliveryMode: manager-epoch + gracefulAbortMs: 15000 + killEscalationMs: 30000 + staleHeartbeatFencingMs: 900000 + lateWriteFencing: + enabled: true + eventStages: + - accepted + - persisted + - delivered + - aborting + - terminalized + - fenced + - late-write-rejected localPostgres: enabled: false gitMirror: diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 3ab4da6d..9de3fe60 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -118,6 +118,7 @@ PipelineRun 失败或长时间未完成时,先按定点 `control-plane status - `codex deploy ` 是旧 Code Queue 兼容部署入口,已禁用以防止维护通道直连 D601 部署 Code Queue;当前 dev 自动化只做 `ci run-dev-e2e` smoke,不提供 Code Queue CD,详细规则见 `docs/reference/codex-deploy.md`。 - `agentrun get|describe|events|logs|result|ack|cancel|dispatch|create|apply|send` 是当前指挥官新任务和 AgentRun session 控制入口。UniDesk CLI 是 render-only client:客户端保留 k8s 风格命令解析、human 表格、生命周期摘要、下一步命令、分页、`-o json|yaml` 稳定客户端 schema 和错误展示;AgentRun 服务端只提供稳定 RESTful API、鉴权和业务事实,不承载 UniDesk CLI 渲染。日常查看用 `get tasks --queue commander`、`describe task/`、`events run/`、`logs session/`、`result run/ --command `;日常写入用 `create task --aipod Artificer --prompt-stdin`、`apply -f -`、`dispatch task/`、`send session/`、`ack/cancel task|session/`。用户级 CLI 取消 `turn` 和 `steer` 路径;`send session/` 是唯一 session follow-up 写入口,AgentRun 服务端按 durable session/run/command 状态自动决定内部 `steer` 或新 `turn`,dry-run 必须真实返回这个 decision 且不写状态。兼容 group `queue|runs|commands|runner|sessions|aipod-specs` 也走同一 direct HTTP transport,`--raw` 只披露直连 AgentRun REST envelope。 - `agentrun` 资源原语的默认 transport 是直连 AgentRun REST API,配置来源是 UniDesk 自有 YAML `config/agentrun.yaml`。不带 `--node`/`--lane` 时按 YAML 的默认 manager `baseUrl` 访问;显式 `--node --lane ` 时按同一 YAML 选中 runtime lane,经 `lane-k8s-service-proxy` 进入 manager `internalBaseUrl`,并用 manager pod env 中声明的 API key metadata 发起请求;输出只披露 node/lane/namespace/baseUrl/auth env metadata 和 `valuesPrinted=false`,不得打印 key value。该模式用于 D601 `agentrun-v02` 等非默认 lane 的资源原语操作与证据采集,尤其是 `get/describe/events/logs/result`,不替代 `agentrun control-plane ...` 发布或运维控制。鉴权可以复用 `HWLAB_API_KEY` 的环境变量/固定文件发现风格,但不得依赖 HWLAB runtime、HWLAB backend-core、HWLAB frontend 代理或 SSH official CLI;多一层转发会增加故障面,不能作为正式路径。`agentrun control-plane ...` 和 `git-mirror ...` 仍属于 G14 source/runtime 运维控制路径,可以继续使用 UniDesk SSH capture bridge;这些控制面路径不得反向成为 queue/session 资源原语的默认 transport。 +- `agentrun cancel ... --dry-run` 必须显示 `CancelLifecycle` 摘要:transport/authority、YAML lane、cascade scope、runner abort 窗口、cancel epoch 与 late-write fencing。取消策略来自 `config/agentrun.yaml` 的 `controlPlane.lanes..deployment.runner.cancelLifecycle`;字段缺失或 lane 选择错误应暴露为配置错误,不得在 CLI、manifest 或服务里补隐式默认。操作非默认 lane 时先加 `--node --lane --dry-run` 核对 policy,再移除 `--dry-run` 发起真实取消。 - `agentrun control-plane expose --dry-run|--confirm` 按 `config/agentrun.yaml` 维护 AgentRun 公网 HTTPS 入口,模式与 Sub2API 暴露一致:G14 AgentRun runtime 通过 frpc 出到 master `127.0.0.1:`,master Caddy 提供 `https://agentrun.74-48-78-17.nip.io/`。该命令只补 master `frps` allow port 和 Caddy vhost;G14 frpc Deployment/ConfigMap 必须由 AgentRun `deploy/deploy.json` + GitOps render 管理,不能在 UniDesk 侧手写 Kubernetes manifest。 - `codex submit/enqueue`、`codex steer`、`codex resume`、`codex queue create`、`codex queue merge`、`codex move`、旧 Web 提交表单、旧队列管理和旧 workdir 管理是冻结的 legacy Code Queue 写入口。CLI 必须返回 `ok=false`、`frozen=true`、`degradedReason=legacy-code-queue-frozen` 和 AgentRun 替代命令;服务端旧 API 写入口必须返回 410。新任务、session follow-up、events/logs/result、ack 和 cancel 走 AgentRun 资源原语,其中 session follow-up 只用 `agentrun send session/`。 - 旧 Code Queue 只保留历史归档、只读排障和残留任务停止。`codex task/tasks/output/read/unread/queues` 继续通过 backend-core 私有代理读取旧 PostgreSQL 历史;`codex interrupt|cancel ` 只用于停止旧运行面残留任务。旧 `steer-confirm` 只作为历史 trace confirmation 查询,不是新任务控制入口。 diff --git a/project-management/PJ2026-01/specs/PJ2026-010201-agentrun-core.md b/project-management/PJ2026-01/specs/PJ2026-010201-agentrun-core.md index b3d4f752..273c3608 100644 --- a/project-management/PJ2026-01/specs/PJ2026-010201-agentrun-core.md +++ b/project-management/PJ2026-01/specs/PJ2026-010201-agentrun-core.md @@ -38,6 +38,7 @@ AgentRun核心负责把 HWLAB Agent 任务转化为可持久查询、可调度 - Postgres durable store 中 runs、commands、events、runner jobs、sessions、backends、leases、Queue 引用和 migration ledger 的事实持久化。 - command 终态、run 终态、failureKind、result envelope、event 分页和日志/trace 脱敏边界。 - runner job identity、attempt、logPath、pod identity、stale lease recovery 和 runner replacement 的核心执行语义。 +- task、run、command、session 取消请求落到 AgentRun 核心后的 command/run 级取消状态机、cancel epoch、runner abort、terminal `canceled` 和迟到写回 fencing。 ### 2.3 范围外 @@ -57,6 +58,9 @@ AgentRun核心负责把 HWLAB Agent 任务转化为可持久查询、可调度 | command | run 内的一次 turn、steer、interrupt 或 cancel 指令,具备独立状态和终态。 | | event | 单 run 内 append-only、按 `seq` 单调递增的执行事实记录。 | | terminal status | command 或 run 的权威终态,不由 partial output、stdout、transport close 或 idle timeout 推断。 | +| cancel request | 由 Queue、Session、run 或 command 控制入口提交的取消意图,必须持久化 requestId、targetRef、reason、requestedBy 和作用范围。 | +| cancel epoch | AgentRun 用于隔离取消前后写入的单调 fencing token;runner、terminal report 和 late write 必须携带或接受该 epoch 校验。 | +| canceled terminal | 取消成功后的权威终态,区别于 completed、failed、timeout 和 transport close。 | | failureKind | AgentRun 对 schema、tenant policy、Secret、runner、backend、provider、infra 和 cancel 等失败的结构化分类。 | | durable facts | Postgres 中可重启后查询的 run、command、event、runner、job、session、backend、lease 和 migration 事实。 | @@ -84,6 +88,7 @@ AgentRun核心负责把 HWLAB Agent 任务转化为可持久查询、可调度 | PJ2026-01020105 | 控制面恢复 | 本规格 6.6 | manager boot/background reconciler、runner job observation 和 active command 收敛 | Durable事实、Kubernetes Job/Pod | HWLAB 接入、发布Lane、运维监控 | | PJ2026-01020106 | 终态Outbox | 本规格 6.7 | runner terminal fact 的幂等提交、重试和可恢复 artifact | Runner执行、Backend Profile | 控制面恢复、HWLAB 投影 | | PJ2026-01020107 | 清理安全 | 本规格 6.8 | cleanup、Job TTL、runner 上限与 active runner 保护 | 控制面恢复、发布Lane | 平台运维、运行面 GC | +| PJ2026-01020108 | 取消生命周期 | 本规格 6.9 | cancel request、cascade scope、epoch fencing、runner abort 和 canceled terminal | Queue会话、Manager API、Runner执行 | 客户端、运维监控、HWLAB接入 | ### 5.1 控制面恢复目标架构图 @@ -161,6 +166,28 @@ sequenceDiagram | Job completed before terminal commit | Job/Pod phase、outbox/artifact、logs retention | reconciler 恢复 terminal fact 或写不可恢复 blocker | 把 Job completed 单独当 command completed | | cleanup/TTL race | DB terminal state、runnerJob observation、TTL config | terminal durable 后再清理可恢复证据 | 只按 Pod 列表或年龄清理 active runner | +### 5.5 cancel lifecycle 关键时序图 + +```mermaid +sequenceDiagram + participant C as Queue / Session / CLI + participant M as agentrun-mgr authority + participant DB as durable ledger + participant R as runner + participant B as backend/tool/process + + C->>M: cancel targetRef + reason + M->>DB: persist cancelRequest + next cancel epoch + M->>R: deliver cancel epoch for active command/run + R->>B: abort stream/tool/process + R->>DB: terminal report canceled with epoch + M->>DB: seal command/run canceled + R-->>DB: late write with old epoch + DB-->>R: reject fenced write +``` + +cancel lifecycle 必须把“接受取消请求”和“运行器已经中止”分成可观测阶段。用户或上游只能把 `canceled terminal` 作为取消完成事实;`cancel requested`、HTTP 连接关闭、runner pod 消失、timeout/watchdog 或缺少新输出都不能单独代表取消完成。 + ## 6. 原子需求 ### 6.1 AR-CORE-REQ-001 Durable Resource 模型 @@ -244,3 +271,15 @@ terminal 上报必须幂等。同一 `runId + commandId + attemptId + runnerId` AgentRun cleanup、Job TTL 和 runner 上限治理必须以 DB active facts 与 Kubernetes observation 双确认作为判断基础。默认 cleanup 不得杀 active runner;确需强制终止 active runner 时必须使用显式 force 语义,并写入原因、操作者、对象和 observed facts。 Job TTL 和日志/termination metadata 保留窗口不得早于 terminal facts durable commit 所需的最低恢复窗口。runner 上限治理应优先清理 idle、terminal、expired 或不可恢复对象;不得只根据 Pod 列表、Job age、进程内计数或旧 lease 单字段清理 runner。 + +### 6.9 AR-CORE-REQ-009 Cancel lifecycle 与 fencing + +| 编号 | 短名 | 主责模块 | 关联模块 | +| --- | --- | --- | --- | +| AR-CORE-REQ-009 | 取消生命周期 | PJ2026-01020108 取消生命周期 | [队列会话](PJ2026-010203-queue-session.md)、[YAML运维](PJ2026-010603-yaml-first-ops.md)、[运维监控](PJ2026-010605-observability-monitoring.md) | + +AgentRun核心应提供 task/run/command/session 控制入口落到核心执行面后的统一取消生命周期。Manager 接受取消请求时必须持久化 cancel request、targetRef、cascade scope、reason、requestedBy、requestId 和 cancel epoch;重复取消同一目标应幂等返回既有或更高 epoch 的取消事实。 + +取消必须通过 runner 可执行的 abort 信号兑现。runner 收到 cancel epoch 后应中止 provider stream、tool call、后台任务和子进程,并用 grace kill 与强制 kill 或等价机制完成资源释放;中止结果必须以 command/run terminal `canceled`、failureKind 或 cancellation classification 写回 durable store。partial output、transport close、idle timeout、missing terminal watchdog 或 runner pod 消失不得冒充 `completed` 或 `canceled`。 + +Manager 和 durable store 必须用 cancel epoch 对迟到写回做 fencing。取消请求后的旧 epoch event、terminal report、result envelope 或 runner heartbeat 不得覆盖 sealed canceled terminal;被拒绝的迟到写回应产生可查询的 low-noise event、diagnostic 或 span attribute,使 CLI 能判断 cancel 在 accepted、delivered、aborting、terminalized、fenced 或 late-write-rejected 哪个阶段。 diff --git a/project-management/PJ2026-01/specs/PJ2026-010203-queue-session.md b/project-management/PJ2026-01/specs/PJ2026-010203-queue-session.md index df364b19..ae95739b 100644 --- a/project-management/PJ2026-01/specs/PJ2026-010203-queue-session.md +++ b/project-management/PJ2026-01/specs/PJ2026-010203-queue-session.md @@ -37,6 +37,7 @@ - Session API/CLI 的输出、trace、命令流、debug/audit 详情、read 状态和会话控制。 - Queue task 到 Core run/command/runner job/session 的引用关系和 sessionPath 输出。 - `sessions send`、session continuation、unread/default/all 状态和 terminal projection。 +- Queue task、Session、run 和 command 取消入口的用户语义、级联边界、同 session 续跑和 canceled terminal projection。 - 自动 scheduler 的 deferred 边界、future pending scan、capacity selection、runner assignment 和 stale lease recovery 方向。 ### 2.3 范围外 @@ -58,6 +59,7 @@ | sessionPath | Queue task 返回的 Session API 路径,用于读取输出和 trace。 | | read cursor | 按 readerId 记录的已读水位,用于默认列表只显示 running 或 unread session/task。 | | commander | Queue 侧的聚合视图,用于展示队列状态、最新 attempt 和下一步操作摘要。 | +| cancel scope | 用户发起取消时选择的资源边界,包括 task、session、run 或 command。 | | Scheduler | 后续自动调度器,负责 pending scan 和 runner assignment;`v0.1` 第一阶段不作为发布前置。 | ## 4. 系统边界和接口 @@ -81,6 +83,7 @@ | PJ2026-01020302 | Session控制 | 本规格 6.2 | output、trace、send、read、cancel、default/unread/all 视图 | Core events、Runtime session | CLI、客户端、用户 | | PJ2026-01020303 | 分层边界 | 本规格 6.3 | Queue、Session、Core 和 Scheduler 的职责分离 | Queue任务、Session控制 | 发布流水、客户端 | | PJ2026-01020304 | Scheduler边界 | 本规格 6.4 | 自动调度 deferred、future scan/assignment/recovery | Queue pending facts、capacity | Runner job、AgentRun Core | +| PJ2026-01020305 | 取消控制 | 本规格 6.5 | task/session/run/command 取消语义、级联关系和 canceled projection | AgentRun核心取消生命周期 | CLI、客户端、commander | ## 6. 原子需求 @@ -123,3 +126,15 @@ Queue 不维护 OA、notification、GitHub action 或外部协作 sink,不从 队列会话应保留自动 scheduler 的规格边界,但 `v0.1` 第一阶段不得因为 scheduler 未实现而阻塞最小真实闭环。 在 scheduler 启用前,CLI/manual dispatch 必须能启动真实 runner,manager durable facts、runner claim、backend turn、events 和 terminal status 必须真实可用。未来 scheduler 只能通过 manager API 和 Kubernetes runner Job 改变调度状态,不得直接写 Postgres 或直接执行 backend。 + +### 6.5 AR-QUEUE-REQ-005 取消控制语义 + +| 编号 | 短名 | 主责模块 | 关联模块 | +| --- | --- | --- | --- | +| AR-QUEUE-REQ-005 | 取消控制 | PJ2026-01020305 取消控制 | [AgentRun核心](PJ2026-010201-agentrun-core.md)、[客户端](PJ2026-0104-client.md)、[运维监控](PJ2026-010605-observability-monitoring.md) | + +队列会话应把用户可见取消语义表达为明确的资源 scope。`cancel command/` 只取消当前 command/runner job,并保留同一个 session 可继续 `send session/`;`cancel run/` 取消该 run 下 active command 和 runner job,并由 Core terminalize run;`cancel session/` 取消 session 当前 active work 和 session-scoped background work;`cancel task/` 取消当前 task attempt/run,并按 Queue retry/resume 语义更新 task projection。 + +Queue 和 Session 只能提交取消意图、展示取消阶段和投影 canceled terminal,不得从 HTTP 成功、timeout、stdout 停止、runner job 删除或 Web 轮询无新增内容推断取消完成。取消完成事实必须来自 [AgentRun核心](PJ2026-010201-agentrun-core.md) 的 durable command/run terminal、cancel epoch fencing 和 result/event 证据。 + +CLI、commander 和客户端默认输出应展示 cancel scope、targetRef、cascade scope、request id 或等价关联字段、当前阶段和下一步查询命令;完整审计字段通过 machine output 或详细视图获取。取消后的 follow-up 必须显式引用仍可续跑的 `sessionId`,不得回退到旧 Code Queue `resume/steer` 或通过复制历史 prompt 伪造会话连续性。 diff --git a/project-management/PJ2026-01/specs/PJ2026-010603-yaml-first-ops.md b/project-management/PJ2026-01/specs/PJ2026-010603-yaml-first-ops.md index 9aae20ca..e79e5da0 100644 --- a/project-management/PJ2026-01/specs/PJ2026-010603-yaml-first-ops.md +++ b/project-management/PJ2026-01/specs/PJ2026-010603-yaml-first-ops.md @@ -39,6 +39,7 @@ YAML运维负责 HWLAB/UniDesk 自有平台配置的真相源、解析、渲染 - target、lane、node、service、namespace、endpoint、publicExposure 和运行目标解析。 - Secret sourceRef、targetKey、providerCredential、manual binding source 和敏感输出约束。 - Sub2API、Codex pool、AgentRun control-plane、session policy 和平台基础设施配置的受控 CLI 读取、解释、计划和下发。 +- AgentRun lane 的 runner retention、idle timeout、egress proxy 和 cancel lifecycle policy 等运行策略配置读取、解释和下发。 - FRP、Caddy、public URL、public health、Kubernetes Secret 和平台资源渲染所需的配置投递边界。 - 可复用 ops primitive,包括 YAML path 捕获、字段解析、fingerprint、摘要输出、Secret 引用和命令输出约束。 @@ -61,6 +62,7 @@ YAML运维负责 HWLAB/UniDesk 自有平台配置的真相源、解析、渲染 | sourceRef | YAML 中指向密钥来源的声明,输出时只能显示来源标识和摘要,不显示密钥值。 | | targetKey | YAML 中声明运行面 Secret 或配置对象接收某项密钥的 key 名。 | | providerCredential | AgentRun lane 中声明 provider profile 与运行面 Secret 绑定关系的配置项。 | +| cancel lifecycle policy | AgentRun lane 中声明取消投递、runner abort、kill escalation、stale fencing 和事件阶段输出的配置块;具体数值以 YAML 为准。 | | publicExposure | YAML 中描述 FRP、Caddy、domain、TLS、public URL 和 health 目标的公开入口声明。 | | ops primitive | 平台运维 CLI 共享的底层能力,例如字段解析、fingerprint、Secret 引用、摘要输出和 YAML path 捕获。 | | 配置解释输出 | CLI 将 YAML 解析后的默认值、来源和目标以非敏感摘要展示给操作人员的输出。 | @@ -143,9 +145,11 @@ YAML运维应从 publicExposure 和 target 声明渲染 FRP、Caddy、public URL | --- | --- | --- | --- | | OPS-YAML-REQ-005 | 执行策略 | PJ2026-01060305 执行策略 | [Agent编排](PJ2026-0102-agent-orchestration.md)、[用户管理](PJ2026-0105-user-management.md) | -YAML运维应为 AgentRun control-plane default、client sessionPolicy、lane secret providerCredential、workspace 和 execution policy 提供配置读取与解释,使 AgentRun 运维入口不依赖代码内固定 profile、namespace 或执行策略。 +YAML运维应为 AgentRun control-plane default、client sessionPolicy、lane secret providerCredential、workspace、execution policy 和 cancel lifecycle policy 提供配置读取与解释,使 AgentRun 运维入口不依赖代码内固定 profile、namespace、取消超时或执行策略。 -本需求只约束执行策略如何作为平台配置进入运行面。Agent run、command、session 状态机、任务恢复和 provider 业务语义由 Agent编排负责,用户身份和 API key 约束由用户管理负责。 +cancel lifecycle policy 至少应能声明取消信号投递方式、runner graceful abort、kill escalation、stale heartbeat fencing window、late write fencing 和默认事件阶段输出开关。CLI 只校验字段结构、类型、必填项和可渲染性;具体窗口、超时和开关值由 YAML 承载,不在代码或 SPEC 中写成第二真相。 + +本需求只约束执行策略如何作为平台配置进入运行面。Agent run、command、session 状态机、任务恢复、取消语义和 provider 业务语义由 Agent编排负责,用户身份和 API key 约束由用户管理负责。 ### 6.6 OPS-YAML-REQ-006 公共 ops primitive diff --git a/project-management/PJ2026-01/specs/PJ2026-010605-observability-monitoring.md b/project-management/PJ2026-01/specs/PJ2026-010605-observability-monitoring.md index af82f021..a9074781 100644 --- a/project-management/PJ2026-01/specs/PJ2026-010605-observability-monitoring.md +++ b/project-management/PJ2026-01/specs/PJ2026-010605-observability-monitoring.md @@ -41,7 +41,7 @@ - OpenTelemetry Collector、trace backend、span 语义、trace context 传播和 trace 查询入口。 - Web/API/AgentRun/HWPOD/Harness/用户管理等服务的运行面健康、资源状态、公开入口健康和用户可感知性能观测。 - 发布后 runtime readiness、resource usage、error rate、queue depth、target availability 和 alert 状态摘要。 -- AgentRun rolling recovery 的 active runner、stale lease、terminal report retry、reconciler backlog、projection lag 和 Job TTL cleanup 观测。 +- AgentRun rolling recovery 与 cancel lifecycle 的 active runner、stale lease、cancel request、cancel delivery、terminal report retry、reconciler backlog、projection lag、late-write fencing 和 Job TTL cleanup 观测。 - 监控和 trace 数据的受控查询、低噪声摘要、失败归因和敏感输出约束。 ### 2.3 范围外 @@ -251,11 +251,11 @@ Workbench 性能监控只记录低基数指标、阶段耗时、状态分类和 | --- | --- | --- | --- | | OPS-MON-REQ-007 | Rolling恢复 | PJ2026-01060507 Rolling恢复观测 | [AgentRun核心](PJ2026-010201-agentrun-core.md)、[HWLAB接入](PJ2026-010205-hwlab-dispatch.md)、[Workbench唯一投影](PJ2026-0104010803-workbench-unique-projection.md)、[AgentRun发布Lane](PJ2026-01060105-agentrun-v01-release-lane.md) | -运维监控应为 AgentRun/HWLAB rolling recovery 提供可查询的低基数 metrics、trace span 和受控 CLI 摘要。最小观测对象包括 active runner 数、runner job observation phase、stale lease 数、terminal report retry/outbox backlog、manager reconciler backlog、reconciler last success/error、projection lag、projection blocker 数、Job TTL cleanup 数和不可恢复 blocker 数。 +运维监控应为 AgentRun/HWLAB rolling recovery 和 cancel lifecycle 提供可查询的低基数 metrics、trace span 和受控 CLI 摘要。最小观测对象包括 active runner 数、runner job observation phase、stale lease 数、cancel request accepted 数、cancel delivered 数、cancel terminalized 数、late-write-fenced 数、terminal report retry/outbox backlog、manager reconciler backlog、reconciler last success/error、projection lag、projection blocker 数、Job TTL cleanup 数和不可恢复 blocker 数。 rolling recovery 相关 span 应能用 OTel trace id/request id 关联 `sessionId`、`traceId`、`runId`、`commandId`、`runnerJobId` 和 `jobName` 的 redacted attribute。高基数业务 ID 只能作为 trace/span attribute 或 CLI 单次查询参数,不进入 Prometheus label;prompt、assistant 正文、tool 参数、stdout/stderr、Secret、完整 token、完整 DSN 和 provider payload 不得进入默认 metrics、span 或 issue closeout。 -发布/rollout 前后的受控 CLI 摘要应能回答:仍在运行的 runner 数、已恢复控制权数量、terminal report retry 数、projection lag 最大值、不可恢复 blocker 数和最近一次 reconciler 错误。该摘要只用于定位和发布判定,不替代 P6 原入口 rolling/chaos 验收,也不能把可观测性 green 当作业务任务完成。 +发布/rollout 前后的受控 CLI 摘要应能回答:仍在运行的 runner 数、已恢复控制权数量、cancel 当前阶段分布、terminal report retry 数、projection lag 最大值、不可恢复 blocker 数和最近一次 reconciler 错误。该摘要只用于定位和发布判定,不替代 P6 原入口 rolling/chaos 验收,也不能把可观测性 green 当作业务任务完成。 ## 7. 过程控制 diff --git a/scripts/src/agentrun-lanes.ts b/scripts/src/agentrun-lanes.ts index 20a87b0b..d66e12f6 100644 --- a/scripts/src/agentrun-lanes.ts +++ b/scripts/src/agentrun-lanes.ts @@ -1,3 +1,5 @@ +// SPEC: PJ2026-01060305 AgentRun execution policy + PJ2026-01020108 cancel lifecycle draft-2026-06-25-p0. +// Parses AgentRun YAML lane policy, including cancel lifecycle values owned by config/agentrun.yaml. import { rootPath } from "./config"; import { asRecord, @@ -113,6 +115,7 @@ export interface AgentRunLaneSpec { readonly egressProxyUrl: string | null; readonly noProxyExtra: readonly string[]; readonly retention: AgentRunRunnerRetentionSpec; + readonly cancelLifecycle: AgentRunCancelLifecycleSpec; }; readonly localPostgres: { readonly enabled: boolean; @@ -195,6 +198,19 @@ export interface AgentRunRunnerRetentionSpec { }; } +export type AgentRunCancelLifecycleStage = "accepted" | "persisted" | "delivered" | "aborting" | "terminalized" | "fenced" | "late-write-rejected"; + +export interface AgentRunCancelLifecycleSpec { + readonly deliveryMode: "manager-epoch"; + readonly gracefulAbortMs: number; + readonly killEscalationMs: number; + readonly staleHeartbeatFencingMs: number; + readonly lateWriteFencing: { + readonly enabled: boolean; + }; + readonly eventStages: readonly AgentRunCancelLifecycleStage[]; +} + export interface AgentRunLaneTarget { readonly configPath: string; readonly spec: AgentRunLaneSpec; @@ -312,6 +328,7 @@ export function agentRunLaneSummary(spec: AgentRunLaneSpec): Record, path: string): AgentRun egressProxyUrl: optionalStringField(runner, "egressProxyUrl", `${path}.runner`) ?? null, noProxyExtra: optionalStringArrayField(runner, "noProxyExtra", `${path}.runner`), retention: parseRunnerRetention(recordField(runner, "retention", `${path}.runner`), `${path}.runner.retention`), + cancelLifecycle: parseCancelLifecycle(recordField(runner, "cancelLifecycle", `${path}.runner`), `${path}.runner.cancelLifecycle`), }, localPostgres: parseLocalPostgres(localPostgres, `${path}.localPostgres`), }; } +function parseCancelLifecycle(input: Record, path: string): AgentRunCancelLifecycleSpec { + const lateWriteFencing = recordField(input, "lateWriteFencing", path); + return { + deliveryMode: enumField(input, "deliveryMode", path, ["manager-epoch"]), + gracefulAbortMs: positiveIntegerField(input, "gracefulAbortMs", path), + killEscalationMs: positiveIntegerField(input, "killEscalationMs", path), + staleHeartbeatFencingMs: positiveIntegerField(input, "staleHeartbeatFencingMs", path), + lateWriteFencing: { + enabled: booleanField(lateWriteFencing, "enabled", `${path}.lateWriteFencing`), + }, + eventStages: parseCancelLifecycleStages(input.eventStages, `${path}.eventStages`), + }; +} + +function parseCancelLifecycleStages(input: unknown, path: string): readonly AgentRunCancelLifecycleStage[] { + const values: readonly AgentRunCancelLifecycleStage[] = ["accepted", "persisted", "delivered", "aborting", "terminalized", "fenced", "late-write-rejected"]; + if (!Array.isArray(input)) throw new Error(`${path} must be an array`); + if (input.length === 0) throw new Error(`${path} must declare at least one stage`); + const result = input.map((value, index) => { + if (typeof value !== "string" || !values.includes(value as AgentRunCancelLifecycleStage)) throw new Error(`${path}[${index}] must be one of ${values.join(", ")}`); + return value as AgentRunCancelLifecycleStage; + }); + const duplicates = result.filter((value, index) => result.indexOf(value) !== index); + if (duplicates.length > 0) throw new Error(`${path} must not contain duplicate stages: ${[...new Set(duplicates)].join(", ")}`); + return result; +} + function parseRunnerRetention(input: Record, path: string): AgentRunRunnerRetentionSpec { const selectors = recordField(input, "selectors", path); const ageBasedCleanup = recordField(input, "ageBasedCleanup", path); diff --git a/scripts/src/agentrun-manifests.ts b/scripts/src/agentrun-manifests.ts index 266289b6..84edbd58 100644 --- a/scripts/src/agentrun-manifests.ts +++ b/scripts/src/agentrun-manifests.ts @@ -1,3 +1,5 @@ +// SPEC: PJ2026-01060305 AgentRun execution policy + PJ2026-01020108 cancel lifecycle draft-2026-06-25-p0. +// Renders AgentRun YAML lane policy into runtime manager environment. import { createHash } from "node:crypto"; import type { AgentRunLaneSpec } from "./agentrun-lanes"; @@ -449,6 +451,12 @@ function managerEnv(spec: AgentRunLaneSpec, sourceCommit: string, imageRef: stri { name: "AGENTRUN_RUNNER_RETENTION_JOB_NAME_PREFIXES", value: spec.deployment.runner.retention.selectors.jobNamePrefixes.join(",") }, { name: "AGENTRUN_RUNNER_RETENTION_AGE_BASED_CLEANUP_ENABLED", value: String(spec.deployment.runner.retention.ageBasedCleanup.enabled) }, ...(spec.deployment.runner.retention.ageBasedCleanup.maxAgeHours === null ? [] : [{ name: "AGENTRUN_RUNNER_RETENTION_AGE_BASED_MAX_AGE_HOURS", value: String(spec.deployment.runner.retention.ageBasedCleanup.maxAgeHours) }]), + { name: "AGENTRUN_CANCEL_DELIVERY_MODE", value: spec.deployment.runner.cancelLifecycle.deliveryMode }, + { name: "AGENTRUN_CANCEL_GRACEFUL_ABORT_MS", value: String(spec.deployment.runner.cancelLifecycle.gracefulAbortMs) }, + { name: "AGENTRUN_CANCEL_KILL_ESCALATION_MS", value: String(spec.deployment.runner.cancelLifecycle.killEscalationMs) }, + { name: "AGENTRUN_CANCEL_STALE_HEARTBEAT_FENCING_MS", value: String(spec.deployment.runner.cancelLifecycle.staleHeartbeatFencingMs) }, + { name: "AGENTRUN_CANCEL_LATE_WRITE_FENCING_ENABLED", value: String(spec.deployment.runner.cancelLifecycle.lateWriteFencing.enabled) }, + { name: "AGENTRUN_CANCEL_EVENT_STAGES", value: spec.deployment.runner.cancelLifecycle.eventStages.join(",") }, ...(spec.deployment.runner.egressProxyUrl === null ? [] : [{ name: "AGENTRUN_RUNNER_EGRESS_PROXY_URL", value: spec.deployment.runner.egressProxyUrl }]), ...(spec.deployment.runner.noProxyExtra.length === 0 ? [] : [{ name: "AGENTRUN_RUNNER_NO_PROXY_EXTRA", value: spec.deployment.runner.noProxyExtra.join(",") }]), { name: "AGENTRUN_API_KEY", valueFrom: { secretKeyRef: spec.deployment.manager.apiKeySecretRef } }, diff --git a/scripts/src/agentrun.ts b/scripts/src/agentrun.ts index 125d7b0d..9319cbd6 100644 --- a/scripts/src/agentrun.ts +++ b/scripts/src/agentrun.ts @@ -1,3 +1,5 @@ +// SPEC: PJ2026-01020108 cancel lifecycle + PJ2026-01020305 cancel control + PJ2026-01060305 AgentRun execution policy draft-2026-06-25-p0. +// Exposes AgentRun cancel lifecycle policy and dry-run visibility in the UniDesk CLI. import { chmodSync, copyFileSync, existsSync, readFileSync, statSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { spawnSync } from "node:child_process"; @@ -13,6 +15,7 @@ import { agentRunPipelineRunName, agentRunProviderCredentialRefs, resolveAgentRunLaneTarget, + type AgentRunCancelLifecycleSpec, type AgentRunLaneSpec, } from "./agentrun-lanes"; import { @@ -620,7 +623,7 @@ async function resourceCancel(config: UniDeskConfig | null, command: string, act if (options.reason !== null) cancelArgs.push("--reason", options.reason); if (ref.kind === "command") cancelArgs.push("--run-id", options.runId ?? requiredContext("command cancel", "--run ")); if (options.dryRun) { - const result = agentRunResourceCancelDryRunPlan(ref, options, rerunWithoutDryRun(command)); + const result = agentRunResourceCancelDryRunPlan(config, ref, options, rerunWithoutDryRun(command)); return renderMutationSummary(command, result, options, `Planned cancel ${ref.kind}/${shortId(ref.name)}`, [rerunWithoutDryRun(command)]); } const result = ref.kind === "task" @@ -636,21 +639,108 @@ async function resourceCancel(config: UniDeskConfig | null, command: string, act return renderMutationSummary(command, result, options, `${options.dryRun ? "Planned cancel" : "Cancel requested"} ${ref.kind}/${shortId(ref.name)}`, options.dryRun ? [rerunWithoutDryRun(command)] : undefined); } -function agentRunResourceCancelDryRunPlan(ref: AgentRunResourceRef, options: AgentRunResourceOptions, confirmCommand: string): Record { +function agentRunResourceCancelDryRunPlan(config: UniDeskConfig | null, ref: AgentRunResourceRef, options: AgentRunResourceOptions, confirmCommand: string): Record { const body: Record = {}; if (options.reason !== null) body.reason = options.reason; - if (ref.kind === "task") return agentRunDryRunPlan("task-cancel", `/api/v1/queue/tasks/${encodeURIComponent(ref.name)}/cancel`, body, confirmCommand); - if (ref.kind === "session") return agentRunDryRunPlan("session-cancel", `/api/v1/sessions/${encodeURIComponent(ref.name)}/control`, { action: "cancel", ...body }, confirmCommand); - if (ref.kind === "run") return agentRunDryRunPlan("run-cancel", `/api/v1/runs/${encodeURIComponent(ref.name)}/cancel`, body, confirmCommand); + const cancelLifecycle = agentRunCancelLifecycleDryRunDisclosure(config, ref, options); + if (ref.kind === "task") return agentRunDryRunPlan("task-cancel", `/api/v1/queue/tasks/${encodeURIComponent(ref.name)}/cancel`, body, confirmCommand, "POST", { cancelLifecycle }); + if (ref.kind === "session") return agentRunDryRunPlan("session-cancel", `/api/v1/sessions/${encodeURIComponent(ref.name)}/control`, { action: "cancel", ...body }, confirmCommand, "POST", { cancelLifecycle }); + if (ref.kind === "run") return agentRunDryRunPlan("run-cancel", `/api/v1/runs/${encodeURIComponent(ref.name)}/cancel`, body, confirmCommand, "POST", { cancelLifecycle }); if (ref.kind === "command") { const runId = options.runId ?? requiredContext("command cancel", "--run "); return agentRunDryRunPlan("command-cancel", `/api/v1/commands/${encodeURIComponent(ref.name)}/cancel`, body, confirmCommand, "POST", { commandRef: { runId, commandId: ref.name, valuesPrinted: false }, + cancelLifecycle, }); } throw new Error("cancel supports task/, session/, run/, or command/"); } +function agentRunCancelLifecycleDryRunDisclosure(config: UniDeskConfig | null, ref: AgentRunResourceRef, options: AgentRunResourceOptions): Record { + const target = resolveAgentRunCancelPolicyTarget(config, options); + const policy = target?.spec.deployment.runner.cancelLifecycle ?? null; + return { + specRefs: ["PJ2026-01020108", "PJ2026-01020305", "PJ2026-01060305"], + authority: agentRunCancelAuthorityDisclosure(target), + targetRef: { + kind: ref.kind, + name: ref.name, + runId: ref.kind === "command" ? options.runId : options.runId ?? null, + valuesPrinted: false, + }, + cascadeScope: agentRunCancelCascadeScope(ref.kind), + terminalAuthority: "AgentRun Core canceled terminal/result event", + expectedStages: policy?.eventStages ?? [], + runnerAbort: policy === null ? null : agentRunCancelRunnerAbortDisclosure(policy), + fencing: agentRunCancelFencingDisclosure(policy), + verification: { + describe: `bun scripts/cli.ts agentrun describe ${ref.kind}/${ref.name}`, + events: ref.kind === "run" || options.runId !== null ? `bun scripts/cli.ts agentrun events run/${ref.kind === "run" ? ref.name : options.runId} --after-seq 0` : null, + logs: ref.kind === "session" ? `bun scripts/cli.ts agentrun logs session/${ref.name} --tail 100` : null, + result: ref.kind === "command" ? `bun scripts/cli.ts agentrun result command/${ref.name} --run ${options.runId ?? ""}` : null, + valuesPrinted: false, + }, + valuesPrinted: false, + }; +} + +function resolveAgentRunCancelPolicyTarget(config: UniDeskConfig | null, options: AgentRunResourceOptions): { configPath: string; spec: AgentRunLaneSpec; source: "selected-lane" | "default-lane" } | null { + if (activeAgentRunRestTarget !== null) return { configPath: activeAgentRunRestTarget.configPath, spec: activeAgentRunRestTarget.spec, source: "selected-lane" }; + if (config === null) return null; + const { configPath, spec } = resolveAgentRunLaneTarget({ node: options.node, lane: options.lane }); + return { configPath, spec, source: options.node !== null || options.lane !== null ? "selected-lane" : "default-lane" }; +} + +function agentRunCancelAuthorityDisclosure(target: { configPath: string; spec: AgentRunLaneSpec; source: "selected-lane" | "default-lane" } | null): Record { + const laneTarget = activeAgentRunRestTarget !== null; + return { + transport: laneTarget ? "lane-k8s-service-proxy" : "direct-http", + policySource: target?.source ?? "unavailable", + node: target?.spec.nodeId ?? null, + lane: target?.spec.lane ?? null, + namespace: target?.spec.runtime.namespace ?? null, + managerDeployment: target?.spec.runtime.managerDeployment ?? null, + baseUrl: laneTarget ? target?.spec.runtime.internalBaseUrl ?? null : agentRunDirectManagerBaseUrl(), + laneConfigPath: target?.configPath ?? null, + valuesPrinted: false, + }; +} + +function agentRunDirectManagerBaseUrl(): string | null { + try { + return readAgentRunClientConfig().manager.baseUrl; + } catch { + return null; + } +} + +function agentRunCancelRunnerAbortDisclosure(policy: AgentRunCancelLifecycleSpec): Record { + return { + deliveryMode: policy.deliveryMode, + gracefulAbortMs: policy.gracefulAbortMs, + killEscalationMs: policy.killEscalationMs, + valuesPrinted: false, + }; +} + +function agentRunCancelFencingDisclosure(policy: AgentRunCancelLifecycleSpec | null): Record { + if (policy === null) return { cancelEpoch: true, policySource: "unavailable", valuesPrinted: false }; + return { + cancelEpoch: true, + staleHeartbeatFencingMs: policy.staleHeartbeatFencingMs, + lateWriteFencing: policy.lateWriteFencing.enabled, + valuesPrinted: false, + }; +} + +function agentRunCancelCascadeScope(kind: AgentRunResourceKind): string { + if (kind === "task") return "current task attempt -> run -> active command -> runner job"; + if (kind === "session") return "session active work -> active run/command -> session-scoped background work"; + if (kind === "run") return "run active commands -> runner jobs -> run terminal"; + if (kind === "command") return "single command -> current runner job; session remains reusable"; + return "unsupported cancel target"; +} + async function resourceDispatch(config: UniDeskConfig | null, command: string, action: string | undefined, args: string[], options: AgentRunResourceOptions): Promise { const ref = parseResourceRef(action, args, "task"); if (ref.kind !== "task") throw new Error("dispatch supports task/"); @@ -775,16 +865,47 @@ function renderMutationSummary(command: string, raw: Record, op if (id !== null) lines.push(`Name: ${id}`); const decision = stringOrNull(data.decision); const internalCommandType = stringOrNull(data.internalCommandType); - if (data.dryRun !== undefined) lines.push(`DryRun: ${String(data.dryRun)}`); - if (data.mutation !== undefined) lines.push(`Mutation: ${String(data.mutation)}`); + const dryRun = data.dryRun !== undefined ? data.dryRun : raw.dryRun; + const mutation = data.mutation !== undefined ? data.mutation : raw.mutation; + if (dryRun !== undefined) lines.push(`DryRun: ${String(dryRun)}`); + if (mutation !== undefined) lines.push(`Mutation: ${String(mutation)}`); if (decision !== null) lines.push(`Decision: ${decision}`); if (internalCommandType !== null) lines.push(`InternalCommandType: ${internalCommandType}`); + lines.push(...renderCancelLifecycleMutationLines(record(data.cancelLifecycle ?? raw.cancelLifecycle))); const next = record(raw.next ?? data.next); const nextLines = (overrideNextLines ?? Object.values(next).map(String)).filter((line) => line.length > 0).slice(0, 5); if (nextLines.length > 0) lines.push("", "Next:", ...nextLines.map((line) => ` ${line}`)); return renderedCliResult(raw.ok !== false, command, lines.join("\n")); } +function renderCancelLifecycleMutationLines(lifecycle: Record): string[] { + if (Object.keys(lifecycle).length === 0) return []; + const authority = record(lifecycle.authority); + const runnerAbort = record(lifecycle.runnerAbort); + const fencing = record(lifecycle.fencing); + const expectedStages = Array.isArray(lifecycle.expectedStages) ? lifecycle.expectedStages.map(String).filter((value) => value.length > 0) : []; + const node = stringOrNull(authority.node); + const lane = stringOrNull(authority.lane); + const target = node !== null && lane !== null ? `${node}/${lane}` : "-"; + const lines = ["", "CancelLifecycle:"]; + lines.push(` Authority: ${displayValue(authority.transport)} policy=${displayValue(authority.policySource)} lane=${target}`); + const namespace = stringOrNull(authority.namespace); + const deployment = stringOrNull(authority.managerDeployment); + if (namespace !== null || deployment !== null) lines.push(` Runtime: ns=${displayValue(namespace)} manager=${displayValue(deployment)}`); + const cascadeScope = stringOrNull(lifecycle.cascadeScope); + if (cascadeScope !== null) lines.push(` Cascade: ${cascadeScope}`); + if (Object.keys(runnerAbort).length > 0) { + lines.push(` RunnerAbort: mode=${displayValue(runnerAbort.deliveryMode)} gracefulMs=${displayValue(runnerAbort.gracefulAbortMs)} killMs=${displayValue(runnerAbort.killEscalationMs)}`); + } + if (Object.keys(fencing).length > 0) { + lines.push(` Fencing: cancelEpoch=${displayValue(fencing.cancelEpoch)} staleHeartbeatMs=${displayValue(fencing.staleHeartbeatFencingMs)} lateWrite=${displayValue(fencing.lateWriteFencing)}`); + } + if (expectedStages.length > 0) lines.push(` Stages: ${expectedStages.join(", ")}`); + const terminalAuthority = stringOrNull(lifecycle.terminalAuthority); + if (terminalAuthority !== null) lines.push(` Terminal: ${terminalAuthority}`); + return lines; +} + function rerunWithoutDryRun(command: string): string { return `bun scripts/cli.ts ${command.replace(/\s+--dry-run\b/gu, "").trim()}`; }