feat: add AgentRun cancel lifecycle policy (#859)
Co-authored-by: Codex <codex@noreply.local>
This commit is contained in:
@@ -72,7 +72,7 @@ AgentRun queue 生命周期不是一个单独的 `queue lifecycle` 命令,而
|
||||
1. 默认总览用 `get tasks --queue commander --limit 20`,只看 task state、queue/lane、run/cmd/rjob/session ref、age 和 attention。
|
||||
2. 单任务用 `describe task/<taskId>`,读取 `latestAttempt.runId`、`commandId`、`runnerJobId`、`sessionId/sessionPath` 和少量 `Next:`。
|
||||
3. Run 级状态用 `events run/<runId>` 和 `result run/<runId> --command <commandId>`,判断 terminalClassification、failureKind、provider interruption、timeoutBudget 和 recoveryActions。
|
||||
4. Command 级状态用 `describe command/<commandId> --run <runId>` 和 `result command/<commandId> --run <runId>`,确认 command state、ack、terminal status 和结果摘要;确认为单个 active command 卡住时,用 `cancel command/<commandId> --run <runId> --reason <text>` 清理该 command,保留同一个 session 后再用 `send session/<sessionId>` 续跑。
|
||||
4. Command 级状态用 `describe command/<commandId> --run <runId>` 和 `result command/<commandId> --run <runId>`,确认 command state、ack、terminal status 和结果摘要;确认为单个 active command 卡住时,先用 `cancel command/<commandId> --run <runId> --reason <text> --dry-run` 核对 `CancelLifecycle` 的 authority、cascade、runner abort 和 fencing,再去掉 `--dry-run` 清理该 command,保留同一个 session 后再用 `send session/<sessionId>` 续跑。
|
||||
5. Runner job 只读状态用 `describe runnerjob/<runnerJobId> --run <runId>`,确认 env image reuse、jobName、namespace、phase、exitCode、retention 和 `valuesPrinted=false`。不要为了这些字段手动调用 `trans G14:k3s kubectl ...`。
|
||||
6. Runtime runner Job/Pod retention 或 operator 明确要求强杀 runner 时,不属于单个 task/session 资源原语;使用 `bun scripts/cli.ts agentrun control-plane cleanup-runners --node <node> --lane <lane> [--force-active] --dry-run|--confirm`。普通 cleanup 只删 inactive selected runner;`--force-active` 会中断 active run/command/session,必须先 dry-run 确认 selection,并且仍应优先于裸 `kubectl delete pod/job`。
|
||||
7. Session trace/output 只在 `describe task` 或 result 里有实际 `sessionId` 时使用 `logs|ack|send|cancel session/<sessionId>`;`sessionRef=null` 时不要猜 session 命令。用户级 follow-up 一律使用 `send session/<sessionId>`,不要回到旧 `turn/steer` 或 `sessions ...` 兼容路径。
|
||||
@@ -80,6 +80,8 @@ AgentRun queue 生命周期不是一个单独的 `queue lifecycle` 命令,而
|
||||
|
||||
默认视图必须低噪声且不是 JSON envelope,`-o json|yaml` 才输出稳定机器结构,`--raw` 才保留直连 AgentRun REST envelope;命令返回里的下一步应优先是 `bun scripts/cli.ts agentrun ...` 资源原语,不得把人工 k8s 查询作为日常下一步。
|
||||
|
||||
AgentRun cancel 策略由 `config/agentrun.yaml` 的 lane 级 `deployment.runner.cancelLifecycle` 管理;操作 D601、G14 或其他非默认 lane 时必须带 `--node/--lane --dry-run` 先确认 YAML policy,不要依赖全局默认或手动 k8s 强杀来替代资源原语。
|
||||
|
||||
## HWLAB Code Agent 入口整合
|
||||
|
||||
HWLAB Code Agent / CaseRun follow-up 的日常派单也归入 AgentRun 资源原语:新任务用 `create task --aipod Artificer` 或包含 HWLAB gitbundle 的 `apply -f -`;运行中纠偏用 `send session/<sessionId> --aipod Artificer`。需要验证 HWLAB Web/Cloud API 原入口时,仍按 `$hwlab-code-agent` 使用 G14 `/root/hwlab-v02` 的 `hwlab-cli client agent ...` 拉取同一 trace/result/inspect;不要回到旧 `codex submit/resume/steer`。
|
||||
|
||||
@@ -174,6 +174,21 @@ controlPlane:
|
||||
ageBasedCleanup:
|
||||
enabled: false
|
||||
maxAgeHours: 48
|
||||
cancelLifecycle:
|
||||
deliveryMode: manager-epoch
|
||||
gracefulAbortMs: 15000
|
||||
killEscalationMs: 30000
|
||||
staleHeartbeatFencingMs: 900000
|
||||
lateWriteFencing:
|
||||
enabled: true
|
||||
eventStages:
|
||||
- accepted
|
||||
- persisted
|
||||
- delivered
|
||||
- aborting
|
||||
- terminalized
|
||||
- fenced
|
||||
- late-write-rejected
|
||||
localPostgres:
|
||||
enabled: true
|
||||
serviceName: agentrun-v01-postgres
|
||||
@@ -361,6 +376,21 @@ controlPlane:
|
||||
ageBasedCleanup:
|
||||
enabled: false
|
||||
maxAgeHours: 48
|
||||
cancelLifecycle:
|
||||
deliveryMode: manager-epoch
|
||||
gracefulAbortMs: 15000
|
||||
killEscalationMs: 30000
|
||||
staleHeartbeatFencingMs: 900000
|
||||
lateWriteFencing:
|
||||
enabled: true
|
||||
eventStages:
|
||||
- accepted
|
||||
- persisted
|
||||
- delivered
|
||||
- aborting
|
||||
- terminalized
|
||||
- fenced
|
||||
- late-write-rejected
|
||||
localPostgres:
|
||||
enabled: false
|
||||
gitMirror:
|
||||
|
||||
@@ -118,6 +118,7 @@ PipelineRun 失败或长时间未完成时,先按定点 `control-plane status
|
||||
- `codex deploy <commitId>` 是旧 Code Queue 兼容部署入口,已禁用以防止维护通道直连 D601 部署 Code Queue;当前 dev 自动化只做 `ci run-dev-e2e` smoke,不提供 Code Queue CD,详细规则见 `docs/reference/codex-deploy.md`。
|
||||
- `agentrun get|describe|events|logs|result|ack|cancel|dispatch|create|apply|send` 是当前指挥官新任务和 AgentRun session 控制入口。UniDesk CLI 是 render-only client:客户端保留 k8s 风格命令解析、human 表格、生命周期摘要、下一步命令、分页、`-o json|yaml` 稳定客户端 schema 和错误展示;AgentRun 服务端只提供稳定 RESTful API、鉴权和业务事实,不承载 UniDesk CLI 渲染。日常查看用 `get tasks --queue commander`、`describe task/<taskId>`、`events run/<runId>`、`logs session/<sessionId>`、`result run/<runId> --command <commandId>`;日常写入用 `create task --aipod Artificer --prompt-stdin`、`apply -f -`、`dispatch task/<taskId>`、`send session/<sessionId>`、`ack/cancel task|session/<id>`。用户级 CLI 取消 `turn` 和 `steer` 路径;`send session/<sessionId>` 是唯一 session follow-up 写入口,AgentRun 服务端按 durable session/run/command 状态自动决定内部 `steer` 或新 `turn`,dry-run 必须真实返回这个 decision 且不写状态。兼容 group `queue|runs|commands|runner|sessions|aipod-specs` 也走同一 direct HTTP transport,`--raw` 只披露直连 AgentRun REST envelope。
|
||||
- `agentrun` 资源原语的默认 transport 是直连 AgentRun REST API,配置来源是 UniDesk 自有 YAML `config/agentrun.yaml`。不带 `--node`/`--lane` 时按 YAML 的默认 manager `baseUrl` 访问;显式 `--node <node> --lane <lane>` 时按同一 YAML 选中 runtime lane,经 `lane-k8s-service-proxy` 进入 manager `internalBaseUrl`,并用 manager pod env 中声明的 API key metadata 发起请求;输出只披露 node/lane/namespace/baseUrl/auth env metadata 和 `valuesPrinted=false`,不得打印 key value。该模式用于 D601 `agentrun-v02` 等非默认 lane 的资源原语操作与证据采集,尤其是 `get/describe/events/logs/result`,不替代 `agentrun control-plane ...` 发布或运维控制。鉴权可以复用 `HWLAB_API_KEY` 的环境变量/固定文件发现风格,但不得依赖 HWLAB runtime、HWLAB backend-core、HWLAB frontend 代理或 SSH official CLI;多一层转发会增加故障面,不能作为正式路径。`agentrun control-plane ...` 和 `git-mirror ...` 仍属于 G14 source/runtime 运维控制路径,可以继续使用 UniDesk SSH capture bridge;这些控制面路径不得反向成为 queue/session 资源原语的默认 transport。
|
||||
- `agentrun cancel ... --dry-run` 必须显示 `CancelLifecycle` 摘要:transport/authority、YAML lane、cascade scope、runner abort 窗口、cancel epoch 与 late-write fencing。取消策略来自 `config/agentrun.yaml` 的 `controlPlane.lanes.<lane>.deployment.runner.cancelLifecycle`;字段缺失或 lane 选择错误应暴露为配置错误,不得在 CLI、manifest 或服务里补隐式默认。操作非默认 lane 时先加 `--node <node> --lane <lane> --dry-run` 核对 policy,再移除 `--dry-run` 发起真实取消。
|
||||
- `agentrun control-plane expose --dry-run|--confirm` 按 `config/agentrun.yaml` 维护 AgentRun 公网 HTTPS 入口,模式与 Sub2API 暴露一致:G14 AgentRun runtime 通过 frpc 出到 master `127.0.0.1:<remotePort>`,master Caddy 提供 `https://agentrun.74-48-78-17.nip.io/`。该命令只补 master `frps` allow port 和 Caddy vhost;G14 frpc Deployment/ConfigMap 必须由 AgentRun `deploy/deploy.json` + GitOps render 管理,不能在 UniDesk 侧手写 Kubernetes manifest。
|
||||
- `codex submit/enqueue`、`codex steer`、`codex resume`、`codex queue create`、`codex queue merge`、`codex move`、旧 Web 提交表单、旧队列管理和旧 workdir 管理是冻结的 legacy Code Queue 写入口。CLI 必须返回 `ok=false`、`frozen=true`、`degradedReason=legacy-code-queue-frozen` 和 AgentRun 替代命令;服务端旧 API 写入口必须返回 410。新任务、session follow-up、events/logs/result、ack 和 cancel 走 AgentRun 资源原语,其中 session follow-up 只用 `agentrun send session/<sessionId>`。
|
||||
- 旧 Code Queue 只保留历史归档、只读排障和残留任务停止。`codex task/tasks/output/read/unread/queues` 继续通过 backend-core 私有代理读取旧 PostgreSQL 历史;`codex interrupt|cancel <taskId>` 只用于停止旧运行面残留任务。旧 `steer-confirm` 只作为历史 trace confirmation 查询,不是新任务控制入口。
|
||||
|
||||
@@ -38,6 +38,7 @@ AgentRun核心负责把 HWLAB Agent 任务转化为可持久查询、可调度
|
||||
- Postgres durable store 中 runs、commands、events、runner jobs、sessions、backends、leases、Queue 引用和 migration ledger 的事实持久化。
|
||||
- command 终态、run 终态、failureKind、result envelope、event 分页和日志/trace 脱敏边界。
|
||||
- runner job identity、attempt、logPath、pod identity、stale lease recovery 和 runner replacement 的核心执行语义。
|
||||
- task、run、command、session 取消请求落到 AgentRun 核心后的 command/run 级取消状态机、cancel epoch、runner abort、terminal `canceled` 和迟到写回 fencing。
|
||||
|
||||
### 2.3 范围外
|
||||
|
||||
@@ -57,6 +58,9 @@ AgentRun核心负责把 HWLAB Agent 任务转化为可持久查询、可调度
|
||||
| command | run 内的一次 turn、steer、interrupt 或 cancel 指令,具备独立状态和终态。 |
|
||||
| event | 单 run 内 append-only、按 `seq` 单调递增的执行事实记录。 |
|
||||
| terminal status | command 或 run 的权威终态,不由 partial output、stdout、transport close 或 idle timeout 推断。 |
|
||||
| cancel request | 由 Queue、Session、run 或 command 控制入口提交的取消意图,必须持久化 requestId、targetRef、reason、requestedBy 和作用范围。 |
|
||||
| cancel epoch | AgentRun 用于隔离取消前后写入的单调 fencing token;runner、terminal report 和 late write 必须携带或接受该 epoch 校验。 |
|
||||
| canceled terminal | 取消成功后的权威终态,区别于 completed、failed、timeout 和 transport close。 |
|
||||
| failureKind | AgentRun 对 schema、tenant policy、Secret、runner、backend、provider、infra 和 cancel 等失败的结构化分类。 |
|
||||
| durable facts | Postgres 中可重启后查询的 run、command、event、runner、job、session、backend、lease 和 migration 事实。 |
|
||||
|
||||
@@ -84,6 +88,7 @@ AgentRun核心负责把 HWLAB Agent 任务转化为可持久查询、可调度
|
||||
| PJ2026-01020105 | 控制面恢复 | 本规格 6.6 | manager boot/background reconciler、runner job observation 和 active command 收敛 | Durable事实、Kubernetes Job/Pod | HWLAB 接入、发布Lane、运维监控 |
|
||||
| PJ2026-01020106 | 终态Outbox | 本规格 6.7 | runner terminal fact 的幂等提交、重试和可恢复 artifact | Runner执行、Backend Profile | 控制面恢复、HWLAB 投影 |
|
||||
| PJ2026-01020107 | 清理安全 | 本规格 6.8 | cleanup、Job TTL、runner 上限与 active runner 保护 | 控制面恢复、发布Lane | 平台运维、运行面 GC |
|
||||
| PJ2026-01020108 | 取消生命周期 | 本规格 6.9 | cancel request、cascade scope、epoch fencing、runner abort 和 canceled terminal | Queue会话、Manager API、Runner执行 | 客户端、运维监控、HWLAB接入 |
|
||||
|
||||
### 5.1 控制面恢复目标架构图
|
||||
|
||||
@@ -161,6 +166,28 @@ sequenceDiagram
|
||||
| Job completed before terminal commit | Job/Pod phase、outbox/artifact、logs retention | reconciler 恢复 terminal fact 或写不可恢复 blocker | 把 Job completed 单独当 command completed |
|
||||
| cleanup/TTL race | DB terminal state、runnerJob observation、TTL config | terminal durable 后再清理可恢复证据 | 只按 Pod 列表或年龄清理 active runner |
|
||||
|
||||
### 5.5 cancel lifecycle 关键时序图
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as Queue / Session / CLI
|
||||
participant M as agentrun-mgr authority
|
||||
participant DB as durable ledger
|
||||
participant R as runner
|
||||
participant B as backend/tool/process
|
||||
|
||||
C->>M: cancel targetRef + reason
|
||||
M->>DB: persist cancelRequest + next cancel epoch
|
||||
M->>R: deliver cancel epoch for active command/run
|
||||
R->>B: abort stream/tool/process
|
||||
R->>DB: terminal report canceled with epoch
|
||||
M->>DB: seal command/run canceled
|
||||
R-->>DB: late write with old epoch
|
||||
DB-->>R: reject fenced write
|
||||
```
|
||||
|
||||
cancel lifecycle 必须把“接受取消请求”和“运行器已经中止”分成可观测阶段。用户或上游只能把 `canceled terminal` 作为取消完成事实;`cancel requested`、HTTP 连接关闭、runner pod 消失、timeout/watchdog 或缺少新输出都不能单独代表取消完成。
|
||||
|
||||
## 6. 原子需求
|
||||
|
||||
### 6.1 AR-CORE-REQ-001 Durable Resource 模型
|
||||
@@ -244,3 +271,15 @@ terminal 上报必须幂等。同一 `runId + commandId + attemptId + runnerId`
|
||||
AgentRun cleanup、Job TTL 和 runner 上限治理必须以 DB active facts 与 Kubernetes observation 双确认作为判断基础。默认 cleanup 不得杀 active runner;确需强制终止 active runner 时必须使用显式 force 语义,并写入原因、操作者、对象和 observed facts。
|
||||
|
||||
Job TTL 和日志/termination metadata 保留窗口不得早于 terminal facts durable commit 所需的最低恢复窗口。runner 上限治理应优先清理 idle、terminal、expired 或不可恢复对象;不得只根据 Pod 列表、Job age、进程内计数或旧 lease 单字段清理 runner。
|
||||
|
||||
### 6.9 AR-CORE-REQ-009 Cancel lifecycle 与 fencing
|
||||
|
||||
| 编号 | 短名 | 主责模块 | 关联模块 |
|
||||
| --- | --- | --- | --- |
|
||||
| AR-CORE-REQ-009 | 取消生命周期 | PJ2026-01020108 取消生命周期 | [队列会话](PJ2026-010203-queue-session.md)、[YAML运维](PJ2026-010603-yaml-first-ops.md)、[运维监控](PJ2026-010605-observability-monitoring.md) |
|
||||
|
||||
AgentRun核心应提供 task/run/command/session 控制入口落到核心执行面后的统一取消生命周期。Manager 接受取消请求时必须持久化 cancel request、targetRef、cascade scope、reason、requestedBy、requestId 和 cancel epoch;重复取消同一目标应幂等返回既有或更高 epoch 的取消事实。
|
||||
|
||||
取消必须通过 runner 可执行的 abort 信号兑现。runner 收到 cancel epoch 后应中止 provider stream、tool call、后台任务和子进程,并用 grace kill 与强制 kill 或等价机制完成资源释放;中止结果必须以 command/run terminal `canceled`、failureKind 或 cancellation classification 写回 durable store。partial output、transport close、idle timeout、missing terminal watchdog 或 runner pod 消失不得冒充 `completed` 或 `canceled`。
|
||||
|
||||
Manager 和 durable store 必须用 cancel epoch 对迟到写回做 fencing。取消请求后的旧 epoch event、terminal report、result envelope 或 runner heartbeat 不得覆盖 sealed canceled terminal;被拒绝的迟到写回应产生可查询的 low-noise event、diagnostic 或 span attribute,使 CLI 能判断 cancel 在 accepted、delivered、aborting、terminalized、fenced 或 late-write-rejected 哪个阶段。
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
- Session API/CLI 的输出、trace、命令流、debug/audit 详情、read 状态和会话控制。
|
||||
- Queue task 到 Core run/command/runner job/session 的引用关系和 sessionPath 输出。
|
||||
- `sessions send`、session continuation、unread/default/all 状态和 terminal projection。
|
||||
- Queue task、Session、run 和 command 取消入口的用户语义、级联边界、同 session 续跑和 canceled terminal projection。
|
||||
- 自动 scheduler 的 deferred 边界、future pending scan、capacity selection、runner assignment 和 stale lease recovery 方向。
|
||||
|
||||
### 2.3 范围外
|
||||
@@ -58,6 +59,7 @@
|
||||
| sessionPath | Queue task 返回的 Session API 路径,用于读取输出和 trace。 |
|
||||
| read cursor | 按 readerId 记录的已读水位,用于默认列表只显示 running 或 unread session/task。 |
|
||||
| commander | Queue 侧的聚合视图,用于展示队列状态、最新 attempt 和下一步操作摘要。 |
|
||||
| cancel scope | 用户发起取消时选择的资源边界,包括 task、session、run 或 command。 |
|
||||
| Scheduler | 后续自动调度器,负责 pending scan 和 runner assignment;`v0.1` 第一阶段不作为发布前置。 |
|
||||
|
||||
## 4. 系统边界和接口
|
||||
@@ -81,6 +83,7 @@
|
||||
| PJ2026-01020302 | Session控制 | 本规格 6.2 | output、trace、send、read、cancel、default/unread/all 视图 | Core events、Runtime session | CLI、客户端、用户 |
|
||||
| PJ2026-01020303 | 分层边界 | 本规格 6.3 | Queue、Session、Core 和 Scheduler 的职责分离 | Queue任务、Session控制 | 发布流水、客户端 |
|
||||
| PJ2026-01020304 | Scheduler边界 | 本规格 6.4 | 自动调度 deferred、future scan/assignment/recovery | Queue pending facts、capacity | Runner job、AgentRun Core |
|
||||
| PJ2026-01020305 | 取消控制 | 本规格 6.5 | task/session/run/command 取消语义、级联关系和 canceled projection | AgentRun核心取消生命周期 | CLI、客户端、commander |
|
||||
|
||||
## 6. 原子需求
|
||||
|
||||
@@ -123,3 +126,15 @@ Queue 不维护 OA、notification、GitHub action 或外部协作 sink,不从
|
||||
队列会话应保留自动 scheduler 的规格边界,但 `v0.1` 第一阶段不得因为 scheduler 未实现而阻塞最小真实闭环。
|
||||
|
||||
在 scheduler 启用前,CLI/manual dispatch 必须能启动真实 runner,manager durable facts、runner claim、backend turn、events 和 terminal status 必须真实可用。未来 scheduler 只能通过 manager API 和 Kubernetes runner Job 改变调度状态,不得直接写 Postgres 或直接执行 backend。
|
||||
|
||||
### 6.5 AR-QUEUE-REQ-005 取消控制语义
|
||||
|
||||
| 编号 | 短名 | 主责模块 | 关联模块 |
|
||||
| --- | --- | --- | --- |
|
||||
| AR-QUEUE-REQ-005 | 取消控制 | PJ2026-01020305 取消控制 | [AgentRun核心](PJ2026-010201-agentrun-core.md)、[客户端](PJ2026-0104-client.md)、[运维监控](PJ2026-010605-observability-monitoring.md) |
|
||||
|
||||
队列会话应把用户可见取消语义表达为明确的资源 scope。`cancel command/<commandId>` 只取消当前 command/runner job,并保留同一个 session 可继续 `send session/<sessionId>`;`cancel run/<runId>` 取消该 run 下 active command 和 runner job,并由 Core terminalize run;`cancel session/<sessionId>` 取消 session 当前 active work 和 session-scoped background work;`cancel task/<taskId>` 取消当前 task attempt/run,并按 Queue retry/resume 语义更新 task projection。
|
||||
|
||||
Queue 和 Session 只能提交取消意图、展示取消阶段和投影 canceled terminal,不得从 HTTP 成功、timeout、stdout 停止、runner job 删除或 Web 轮询无新增内容推断取消完成。取消完成事实必须来自 [AgentRun核心](PJ2026-010201-agentrun-core.md) 的 durable command/run terminal、cancel epoch fencing 和 result/event 证据。
|
||||
|
||||
CLI、commander 和客户端默认输出应展示 cancel scope、targetRef、cascade scope、request id 或等价关联字段、当前阶段和下一步查询命令;完整审计字段通过 machine output 或详细视图获取。取消后的 follow-up 必须显式引用仍可续跑的 `sessionId`,不得回退到旧 Code Queue `resume/steer` 或通过复制历史 prompt 伪造会话连续性。
|
||||
|
||||
@@ -39,6 +39,7 @@ YAML运维负责 HWLAB/UniDesk 自有平台配置的真相源、解析、渲染
|
||||
- target、lane、node、service、namespace、endpoint、publicExposure 和运行目标解析。
|
||||
- Secret sourceRef、targetKey、providerCredential、manual binding source 和敏感输出约束。
|
||||
- Sub2API、Codex pool、AgentRun control-plane、session policy 和平台基础设施配置的受控 CLI 读取、解释、计划和下发。
|
||||
- AgentRun lane 的 runner retention、idle timeout、egress proxy 和 cancel lifecycle policy 等运行策略配置读取、解释和下发。
|
||||
- FRP、Caddy、public URL、public health、Kubernetes Secret 和平台资源渲染所需的配置投递边界。
|
||||
- 可复用 ops primitive,包括 YAML path 捕获、字段解析、fingerprint、摘要输出、Secret 引用和命令输出约束。
|
||||
|
||||
@@ -61,6 +62,7 @@ YAML运维负责 HWLAB/UniDesk 自有平台配置的真相源、解析、渲染
|
||||
| sourceRef | YAML 中指向密钥来源的声明,输出时只能显示来源标识和摘要,不显示密钥值。 |
|
||||
| targetKey | YAML 中声明运行面 Secret 或配置对象接收某项密钥的 key 名。 |
|
||||
| providerCredential | AgentRun lane 中声明 provider profile 与运行面 Secret 绑定关系的配置项。 |
|
||||
| cancel lifecycle policy | AgentRun lane 中声明取消投递、runner abort、kill escalation、stale fencing 和事件阶段输出的配置块;具体数值以 YAML 为准。 |
|
||||
| publicExposure | YAML 中描述 FRP、Caddy、domain、TLS、public URL 和 health 目标的公开入口声明。 |
|
||||
| ops primitive | 平台运维 CLI 共享的底层能力,例如字段解析、fingerprint、Secret 引用、摘要输出和 YAML path 捕获。 |
|
||||
| 配置解释输出 | CLI 将 YAML 解析后的默认值、来源和目标以非敏感摘要展示给操作人员的输出。 |
|
||||
@@ -143,9 +145,11 @@ YAML运维应从 publicExposure 和 target 声明渲染 FRP、Caddy、public URL
|
||||
| --- | --- | --- | --- |
|
||||
| OPS-YAML-REQ-005 | 执行策略 | PJ2026-01060305 执行策略 | [Agent编排](PJ2026-0102-agent-orchestration.md)、[用户管理](PJ2026-0105-user-management.md) |
|
||||
|
||||
YAML运维应为 AgentRun control-plane default、client sessionPolicy、lane secret providerCredential、workspace 和 execution policy 提供配置读取与解释,使 AgentRun 运维入口不依赖代码内固定 profile、namespace 或执行策略。
|
||||
YAML运维应为 AgentRun control-plane default、client sessionPolicy、lane secret providerCredential、workspace、execution policy 和 cancel lifecycle policy 提供配置读取与解释,使 AgentRun 运维入口不依赖代码内固定 profile、namespace、取消超时或执行策略。
|
||||
|
||||
本需求只约束执行策略如何作为平台配置进入运行面。Agent run、command、session 状态机、任务恢复和 provider 业务语义由 Agent编排负责,用户身份和 API key 约束由用户管理负责。
|
||||
cancel lifecycle policy 至少应能声明取消信号投递方式、runner graceful abort、kill escalation、stale heartbeat fencing window、late write fencing 和默认事件阶段输出开关。CLI 只校验字段结构、类型、必填项和可渲染性;具体窗口、超时和开关值由 YAML 承载,不在代码或 SPEC 中写成第二真相。
|
||||
|
||||
本需求只约束执行策略如何作为平台配置进入运行面。Agent run、command、session 状态机、任务恢复、取消语义和 provider 业务语义由 Agent编排负责,用户身份和 API key 约束由用户管理负责。
|
||||
|
||||
### 6.6 OPS-YAML-REQ-006 公共 ops primitive
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@
|
||||
- OpenTelemetry Collector、trace backend、span 语义、trace context 传播和 trace 查询入口。
|
||||
- Web/API/AgentRun/HWPOD/Harness/用户管理等服务的运行面健康、资源状态、公开入口健康和用户可感知性能观测。
|
||||
- 发布后 runtime readiness、resource usage、error rate、queue depth、target availability 和 alert 状态摘要。
|
||||
- AgentRun rolling recovery 的 active runner、stale lease、terminal report retry、reconciler backlog、projection lag 和 Job TTL cleanup 观测。
|
||||
- AgentRun rolling recovery 与 cancel lifecycle 的 active runner、stale lease、cancel request、cancel delivery、terminal report retry、reconciler backlog、projection lag、late-write fencing 和 Job TTL cleanup 观测。
|
||||
- 监控和 trace 数据的受控查询、低噪声摘要、失败归因和敏感输出约束。
|
||||
|
||||
### 2.3 范围外
|
||||
@@ -251,11 +251,11 @@ Workbench 性能监控只记录低基数指标、阶段耗时、状态分类和
|
||||
| --- | --- | --- | --- |
|
||||
| OPS-MON-REQ-007 | Rolling恢复 | PJ2026-01060507 Rolling恢复观测 | [AgentRun核心](PJ2026-010201-agentrun-core.md)、[HWLAB接入](PJ2026-010205-hwlab-dispatch.md)、[Workbench唯一投影](PJ2026-0104010803-workbench-unique-projection.md)、[AgentRun发布Lane](PJ2026-01060105-agentrun-v01-release-lane.md) |
|
||||
|
||||
运维监控应为 AgentRun/HWLAB rolling recovery 提供可查询的低基数 metrics、trace span 和受控 CLI 摘要。最小观测对象包括 active runner 数、runner job observation phase、stale lease 数、terminal report retry/outbox backlog、manager reconciler backlog、reconciler last success/error、projection lag、projection blocker 数、Job TTL cleanup 数和不可恢复 blocker 数。
|
||||
运维监控应为 AgentRun/HWLAB rolling recovery 和 cancel lifecycle 提供可查询的低基数 metrics、trace span 和受控 CLI 摘要。最小观测对象包括 active runner 数、runner job observation phase、stale lease 数、cancel request accepted 数、cancel delivered 数、cancel terminalized 数、late-write-fenced 数、terminal report retry/outbox backlog、manager reconciler backlog、reconciler last success/error、projection lag、projection blocker 数、Job TTL cleanup 数和不可恢复 blocker 数。
|
||||
|
||||
rolling recovery 相关 span 应能用 OTel trace id/request id 关联 `sessionId`、`traceId`、`runId`、`commandId`、`runnerJobId` 和 `jobName` 的 redacted attribute。高基数业务 ID 只能作为 trace/span attribute 或 CLI 单次查询参数,不进入 Prometheus label;prompt、assistant 正文、tool 参数、stdout/stderr、Secret、完整 token、完整 DSN 和 provider payload 不得进入默认 metrics、span 或 issue closeout。
|
||||
|
||||
发布/rollout 前后的受控 CLI 摘要应能回答:仍在运行的 runner 数、已恢复控制权数量、terminal report retry 数、projection lag 最大值、不可恢复 blocker 数和最近一次 reconciler 错误。该摘要只用于定位和发布判定,不替代 P6 原入口 rolling/chaos 验收,也不能把可观测性 green 当作业务任务完成。
|
||||
发布/rollout 前后的受控 CLI 摘要应能回答:仍在运行的 runner 数、已恢复控制权数量、cancel 当前阶段分布、terminal report retry 数、projection lag 最大值、不可恢复 blocker 数和最近一次 reconciler 错误。该摘要只用于定位和发布判定,不替代 P6 原入口 rolling/chaos 验收,也不能把可观测性 green 当作业务任务完成。
|
||||
|
||||
## 7. 过程控制
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
// SPEC: PJ2026-01060305 AgentRun execution policy + PJ2026-01020108 cancel lifecycle draft-2026-06-25-p0.
|
||||
// Parses AgentRun YAML lane policy, including cancel lifecycle values owned by config/agentrun.yaml.
|
||||
import { rootPath } from "./config";
|
||||
import {
|
||||
asRecord,
|
||||
@@ -113,6 +115,7 @@ export interface AgentRunLaneSpec {
|
||||
readonly egressProxyUrl: string | null;
|
||||
readonly noProxyExtra: readonly string[];
|
||||
readonly retention: AgentRunRunnerRetentionSpec;
|
||||
readonly cancelLifecycle: AgentRunCancelLifecycleSpec;
|
||||
};
|
||||
readonly localPostgres: {
|
||||
readonly enabled: boolean;
|
||||
@@ -195,6 +198,19 @@ export interface AgentRunRunnerRetentionSpec {
|
||||
};
|
||||
}
|
||||
|
||||
export type AgentRunCancelLifecycleStage = "accepted" | "persisted" | "delivered" | "aborting" | "terminalized" | "fenced" | "late-write-rejected";
|
||||
|
||||
export interface AgentRunCancelLifecycleSpec {
|
||||
readonly deliveryMode: "manager-epoch";
|
||||
readonly gracefulAbortMs: number;
|
||||
readonly killEscalationMs: number;
|
||||
readonly staleHeartbeatFencingMs: number;
|
||||
readonly lateWriteFencing: {
|
||||
readonly enabled: boolean;
|
||||
};
|
||||
readonly eventStages: readonly AgentRunCancelLifecycleStage[];
|
||||
}
|
||||
|
||||
export interface AgentRunLaneTarget {
|
||||
readonly configPath: string;
|
||||
readonly spec: AgentRunLaneSpec;
|
||||
@@ -312,6 +328,7 @@ export function agentRunLaneSummary(spec: AgentRunLaneSpec): Record<string, unkn
|
||||
egressProxyUrl: spec.deployment.runner.egressProxyUrl,
|
||||
noProxyExtra: spec.deployment.runner.noProxyExtra,
|
||||
retention: spec.deployment.runner.retention,
|
||||
cancelLifecycle: spec.deployment.runner.cancelLifecycle,
|
||||
},
|
||||
localPostgres: spec.deployment.localPostgres,
|
||||
},
|
||||
@@ -551,11 +568,39 @@ function parseDeployment(input: Record<string, unknown>, path: string): AgentRun
|
||||
egressProxyUrl: optionalStringField(runner, "egressProxyUrl", `${path}.runner`) ?? null,
|
||||
noProxyExtra: optionalStringArrayField(runner, "noProxyExtra", `${path}.runner`),
|
||||
retention: parseRunnerRetention(recordField(runner, "retention", `${path}.runner`), `${path}.runner.retention`),
|
||||
cancelLifecycle: parseCancelLifecycle(recordField(runner, "cancelLifecycle", `${path}.runner`), `${path}.runner.cancelLifecycle`),
|
||||
},
|
||||
localPostgres: parseLocalPostgres(localPostgres, `${path}.localPostgres`),
|
||||
};
|
||||
}
|
||||
|
||||
function parseCancelLifecycle(input: Record<string, unknown>, path: string): AgentRunCancelLifecycleSpec {
|
||||
const lateWriteFencing = recordField(input, "lateWriteFencing", path);
|
||||
return {
|
||||
deliveryMode: enumField(input, "deliveryMode", path, ["manager-epoch"]),
|
||||
gracefulAbortMs: positiveIntegerField(input, "gracefulAbortMs", path),
|
||||
killEscalationMs: positiveIntegerField(input, "killEscalationMs", path),
|
||||
staleHeartbeatFencingMs: positiveIntegerField(input, "staleHeartbeatFencingMs", path),
|
||||
lateWriteFencing: {
|
||||
enabled: booleanField(lateWriteFencing, "enabled", `${path}.lateWriteFencing`),
|
||||
},
|
||||
eventStages: parseCancelLifecycleStages(input.eventStages, `${path}.eventStages`),
|
||||
};
|
||||
}
|
||||
|
||||
function parseCancelLifecycleStages(input: unknown, path: string): readonly AgentRunCancelLifecycleStage[] {
|
||||
const values: readonly AgentRunCancelLifecycleStage[] = ["accepted", "persisted", "delivered", "aborting", "terminalized", "fenced", "late-write-rejected"];
|
||||
if (!Array.isArray(input)) throw new Error(`${path} must be an array`);
|
||||
if (input.length === 0) throw new Error(`${path} must declare at least one stage`);
|
||||
const result = input.map((value, index) => {
|
||||
if (typeof value !== "string" || !values.includes(value as AgentRunCancelLifecycleStage)) throw new Error(`${path}[${index}] must be one of ${values.join(", ")}`);
|
||||
return value as AgentRunCancelLifecycleStage;
|
||||
});
|
||||
const duplicates = result.filter((value, index) => result.indexOf(value) !== index);
|
||||
if (duplicates.length > 0) throw new Error(`${path} must not contain duplicate stages: ${[...new Set(duplicates)].join(", ")}`);
|
||||
return result;
|
||||
}
|
||||
|
||||
function parseRunnerRetention(input: Record<string, unknown>, path: string): AgentRunRunnerRetentionSpec {
|
||||
const selectors = recordField(input, "selectors", path);
|
||||
const ageBasedCleanup = recordField(input, "ageBasedCleanup", path);
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
// SPEC: PJ2026-01060305 AgentRun execution policy + PJ2026-01020108 cancel lifecycle draft-2026-06-25-p0.
|
||||
// Renders AgentRun YAML lane policy into runtime manager environment.
|
||||
import { createHash } from "node:crypto";
|
||||
import type { AgentRunLaneSpec } from "./agentrun-lanes";
|
||||
|
||||
@@ -449,6 +451,12 @@ function managerEnv(spec: AgentRunLaneSpec, sourceCommit: string, imageRef: stri
|
||||
{ name: "AGENTRUN_RUNNER_RETENTION_JOB_NAME_PREFIXES", value: spec.deployment.runner.retention.selectors.jobNamePrefixes.join(",") },
|
||||
{ name: "AGENTRUN_RUNNER_RETENTION_AGE_BASED_CLEANUP_ENABLED", value: String(spec.deployment.runner.retention.ageBasedCleanup.enabled) },
|
||||
...(spec.deployment.runner.retention.ageBasedCleanup.maxAgeHours === null ? [] : [{ name: "AGENTRUN_RUNNER_RETENTION_AGE_BASED_MAX_AGE_HOURS", value: String(spec.deployment.runner.retention.ageBasedCleanup.maxAgeHours) }]),
|
||||
{ name: "AGENTRUN_CANCEL_DELIVERY_MODE", value: spec.deployment.runner.cancelLifecycle.deliveryMode },
|
||||
{ name: "AGENTRUN_CANCEL_GRACEFUL_ABORT_MS", value: String(spec.deployment.runner.cancelLifecycle.gracefulAbortMs) },
|
||||
{ name: "AGENTRUN_CANCEL_KILL_ESCALATION_MS", value: String(spec.deployment.runner.cancelLifecycle.killEscalationMs) },
|
||||
{ name: "AGENTRUN_CANCEL_STALE_HEARTBEAT_FENCING_MS", value: String(spec.deployment.runner.cancelLifecycle.staleHeartbeatFencingMs) },
|
||||
{ name: "AGENTRUN_CANCEL_LATE_WRITE_FENCING_ENABLED", value: String(spec.deployment.runner.cancelLifecycle.lateWriteFencing.enabled) },
|
||||
{ name: "AGENTRUN_CANCEL_EVENT_STAGES", value: spec.deployment.runner.cancelLifecycle.eventStages.join(",") },
|
||||
...(spec.deployment.runner.egressProxyUrl === null ? [] : [{ name: "AGENTRUN_RUNNER_EGRESS_PROXY_URL", value: spec.deployment.runner.egressProxyUrl }]),
|
||||
...(spec.deployment.runner.noProxyExtra.length === 0 ? [] : [{ name: "AGENTRUN_RUNNER_NO_PROXY_EXTRA", value: spec.deployment.runner.noProxyExtra.join(",") }]),
|
||||
{ name: "AGENTRUN_API_KEY", valueFrom: { secretKeyRef: spec.deployment.manager.apiKeySecretRef } },
|
||||
|
||||
+128
-7
@@ -1,3 +1,5 @@
|
||||
// SPEC: PJ2026-01020108 cancel lifecycle + PJ2026-01020305 cancel control + PJ2026-01060305 AgentRun execution policy draft-2026-06-25-p0.
|
||||
// Exposes AgentRun cancel lifecycle policy and dry-run visibility in the UniDesk CLI.
|
||||
import { chmodSync, copyFileSync, existsSync, readFileSync, statSync, writeFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { spawnSync } from "node:child_process";
|
||||
@@ -13,6 +15,7 @@ import {
|
||||
agentRunPipelineRunName,
|
||||
agentRunProviderCredentialRefs,
|
||||
resolveAgentRunLaneTarget,
|
||||
type AgentRunCancelLifecycleSpec,
|
||||
type AgentRunLaneSpec,
|
||||
} from "./agentrun-lanes";
|
||||
import {
|
||||
@@ -620,7 +623,7 @@ async function resourceCancel(config: UniDeskConfig | null, command: string, act
|
||||
if (options.reason !== null) cancelArgs.push("--reason", options.reason);
|
||||
if (ref.kind === "command") cancelArgs.push("--run-id", options.runId ?? requiredContext("command cancel", "--run <runId>"));
|
||||
if (options.dryRun) {
|
||||
const result = agentRunResourceCancelDryRunPlan(ref, options, rerunWithoutDryRun(command));
|
||||
const result = agentRunResourceCancelDryRunPlan(config, ref, options, rerunWithoutDryRun(command));
|
||||
return renderMutationSummary(command, result, options, `Planned cancel ${ref.kind}/${shortId(ref.name)}`, [rerunWithoutDryRun(command)]);
|
||||
}
|
||||
const result = ref.kind === "task"
|
||||
@@ -636,21 +639,108 @@ async function resourceCancel(config: UniDeskConfig | null, command: string, act
|
||||
return renderMutationSummary(command, result, options, `${options.dryRun ? "Planned cancel" : "Cancel requested"} ${ref.kind}/${shortId(ref.name)}`, options.dryRun ? [rerunWithoutDryRun(command)] : undefined);
|
||||
}
|
||||
|
||||
function agentRunResourceCancelDryRunPlan(ref: AgentRunResourceRef, options: AgentRunResourceOptions, confirmCommand: string): Record<string, unknown> {
|
||||
function agentRunResourceCancelDryRunPlan(config: UniDeskConfig | null, ref: AgentRunResourceRef, options: AgentRunResourceOptions, confirmCommand: string): Record<string, unknown> {
|
||||
const body: Record<string, unknown> = {};
|
||||
if (options.reason !== null) body.reason = options.reason;
|
||||
if (ref.kind === "task") return agentRunDryRunPlan("task-cancel", `/api/v1/queue/tasks/${encodeURIComponent(ref.name)}/cancel`, body, confirmCommand);
|
||||
if (ref.kind === "session") return agentRunDryRunPlan("session-cancel", `/api/v1/sessions/${encodeURIComponent(ref.name)}/control`, { action: "cancel", ...body }, confirmCommand);
|
||||
if (ref.kind === "run") return agentRunDryRunPlan("run-cancel", `/api/v1/runs/${encodeURIComponent(ref.name)}/cancel`, body, confirmCommand);
|
||||
const cancelLifecycle = agentRunCancelLifecycleDryRunDisclosure(config, ref, options);
|
||||
if (ref.kind === "task") return agentRunDryRunPlan("task-cancel", `/api/v1/queue/tasks/${encodeURIComponent(ref.name)}/cancel`, body, confirmCommand, "POST", { cancelLifecycle });
|
||||
if (ref.kind === "session") return agentRunDryRunPlan("session-cancel", `/api/v1/sessions/${encodeURIComponent(ref.name)}/control`, { action: "cancel", ...body }, confirmCommand, "POST", { cancelLifecycle });
|
||||
if (ref.kind === "run") return agentRunDryRunPlan("run-cancel", `/api/v1/runs/${encodeURIComponent(ref.name)}/cancel`, body, confirmCommand, "POST", { cancelLifecycle });
|
||||
if (ref.kind === "command") {
|
||||
const runId = options.runId ?? requiredContext("command cancel", "--run <runId>");
|
||||
return agentRunDryRunPlan("command-cancel", `/api/v1/commands/${encodeURIComponent(ref.name)}/cancel`, body, confirmCommand, "POST", {
|
||||
commandRef: { runId, commandId: ref.name, valuesPrinted: false },
|
||||
cancelLifecycle,
|
||||
});
|
||||
}
|
||||
throw new Error("cancel supports task/<taskId>, session/<sessionId>, run/<runId>, or command/<commandId>");
|
||||
}
|
||||
|
||||
function agentRunCancelLifecycleDryRunDisclosure(config: UniDeskConfig | null, ref: AgentRunResourceRef, options: AgentRunResourceOptions): Record<string, unknown> {
|
||||
const target = resolveAgentRunCancelPolicyTarget(config, options);
|
||||
const policy = target?.spec.deployment.runner.cancelLifecycle ?? null;
|
||||
return {
|
||||
specRefs: ["PJ2026-01020108", "PJ2026-01020305", "PJ2026-01060305"],
|
||||
authority: agentRunCancelAuthorityDisclosure(target),
|
||||
targetRef: {
|
||||
kind: ref.kind,
|
||||
name: ref.name,
|
||||
runId: ref.kind === "command" ? options.runId : options.runId ?? null,
|
||||
valuesPrinted: false,
|
||||
},
|
||||
cascadeScope: agentRunCancelCascadeScope(ref.kind),
|
||||
terminalAuthority: "AgentRun Core canceled terminal/result event",
|
||||
expectedStages: policy?.eventStages ?? [],
|
||||
runnerAbort: policy === null ? null : agentRunCancelRunnerAbortDisclosure(policy),
|
||||
fencing: agentRunCancelFencingDisclosure(policy),
|
||||
verification: {
|
||||
describe: `bun scripts/cli.ts agentrun describe ${ref.kind}/${ref.name}`,
|
||||
events: ref.kind === "run" || options.runId !== null ? `bun scripts/cli.ts agentrun events run/${ref.kind === "run" ? ref.name : options.runId} --after-seq 0` : null,
|
||||
logs: ref.kind === "session" ? `bun scripts/cli.ts agentrun logs session/${ref.name} --tail 100` : null,
|
||||
result: ref.kind === "command" ? `bun scripts/cli.ts agentrun result command/${ref.name} --run ${options.runId ?? "<runId>"}` : null,
|
||||
valuesPrinted: false,
|
||||
},
|
||||
valuesPrinted: false,
|
||||
};
|
||||
}
|
||||
|
||||
function resolveAgentRunCancelPolicyTarget(config: UniDeskConfig | null, options: AgentRunResourceOptions): { configPath: string; spec: AgentRunLaneSpec; source: "selected-lane" | "default-lane" } | null {
|
||||
if (activeAgentRunRestTarget !== null) return { configPath: activeAgentRunRestTarget.configPath, spec: activeAgentRunRestTarget.spec, source: "selected-lane" };
|
||||
if (config === null) return null;
|
||||
const { configPath, spec } = resolveAgentRunLaneTarget({ node: options.node, lane: options.lane });
|
||||
return { configPath, spec, source: options.node !== null || options.lane !== null ? "selected-lane" : "default-lane" };
|
||||
}
|
||||
|
||||
function agentRunCancelAuthorityDisclosure(target: { configPath: string; spec: AgentRunLaneSpec; source: "selected-lane" | "default-lane" } | null): Record<string, unknown> {
|
||||
const laneTarget = activeAgentRunRestTarget !== null;
|
||||
return {
|
||||
transport: laneTarget ? "lane-k8s-service-proxy" : "direct-http",
|
||||
policySource: target?.source ?? "unavailable",
|
||||
node: target?.spec.nodeId ?? null,
|
||||
lane: target?.spec.lane ?? null,
|
||||
namespace: target?.spec.runtime.namespace ?? null,
|
||||
managerDeployment: target?.spec.runtime.managerDeployment ?? null,
|
||||
baseUrl: laneTarget ? target?.spec.runtime.internalBaseUrl ?? null : agentRunDirectManagerBaseUrl(),
|
||||
laneConfigPath: target?.configPath ?? null,
|
||||
valuesPrinted: false,
|
||||
};
|
||||
}
|
||||
|
||||
function agentRunDirectManagerBaseUrl(): string | null {
|
||||
try {
|
||||
return readAgentRunClientConfig().manager.baseUrl;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function agentRunCancelRunnerAbortDisclosure(policy: AgentRunCancelLifecycleSpec): Record<string, unknown> {
|
||||
return {
|
||||
deliveryMode: policy.deliveryMode,
|
||||
gracefulAbortMs: policy.gracefulAbortMs,
|
||||
killEscalationMs: policy.killEscalationMs,
|
||||
valuesPrinted: false,
|
||||
};
|
||||
}
|
||||
|
||||
function agentRunCancelFencingDisclosure(policy: AgentRunCancelLifecycleSpec | null): Record<string, unknown> {
|
||||
if (policy === null) return { cancelEpoch: true, policySource: "unavailable", valuesPrinted: false };
|
||||
return {
|
||||
cancelEpoch: true,
|
||||
staleHeartbeatFencingMs: policy.staleHeartbeatFencingMs,
|
||||
lateWriteFencing: policy.lateWriteFencing.enabled,
|
||||
valuesPrinted: false,
|
||||
};
|
||||
}
|
||||
|
||||
function agentRunCancelCascadeScope(kind: AgentRunResourceKind): string {
|
||||
if (kind === "task") return "current task attempt -> run -> active command -> runner job";
|
||||
if (kind === "session") return "session active work -> active run/command -> session-scoped background work";
|
||||
if (kind === "run") return "run active commands -> runner jobs -> run terminal";
|
||||
if (kind === "command") return "single command -> current runner job; session remains reusable";
|
||||
return "unsupported cancel target";
|
||||
}
|
||||
|
||||
async function resourceDispatch(config: UniDeskConfig | null, command: string, action: string | undefined, args: string[], options: AgentRunResourceOptions): Promise<RenderedCliResult> {
|
||||
const ref = parseResourceRef(action, args, "task");
|
||||
if (ref.kind !== "task") throw new Error("dispatch supports task/<taskId>");
|
||||
@@ -775,16 +865,47 @@ function renderMutationSummary(command: string, raw: Record<string, unknown>, op
|
||||
if (id !== null) lines.push(`Name: ${id}`);
|
||||
const decision = stringOrNull(data.decision);
|
||||
const internalCommandType = stringOrNull(data.internalCommandType);
|
||||
if (data.dryRun !== undefined) lines.push(`DryRun: ${String(data.dryRun)}`);
|
||||
if (data.mutation !== undefined) lines.push(`Mutation: ${String(data.mutation)}`);
|
||||
const dryRun = data.dryRun !== undefined ? data.dryRun : raw.dryRun;
|
||||
const mutation = data.mutation !== undefined ? data.mutation : raw.mutation;
|
||||
if (dryRun !== undefined) lines.push(`DryRun: ${String(dryRun)}`);
|
||||
if (mutation !== undefined) lines.push(`Mutation: ${String(mutation)}`);
|
||||
if (decision !== null) lines.push(`Decision: ${decision}`);
|
||||
if (internalCommandType !== null) lines.push(`InternalCommandType: ${internalCommandType}`);
|
||||
lines.push(...renderCancelLifecycleMutationLines(record(data.cancelLifecycle ?? raw.cancelLifecycle)));
|
||||
const next = record(raw.next ?? data.next);
|
||||
const nextLines = (overrideNextLines ?? Object.values(next).map(String)).filter((line) => line.length > 0).slice(0, 5);
|
||||
if (nextLines.length > 0) lines.push("", "Next:", ...nextLines.map((line) => ` ${line}`));
|
||||
return renderedCliResult(raw.ok !== false, command, lines.join("\n"));
|
||||
}
|
||||
|
||||
function renderCancelLifecycleMutationLines(lifecycle: Record<string, unknown>): string[] {
|
||||
if (Object.keys(lifecycle).length === 0) return [];
|
||||
const authority = record(lifecycle.authority);
|
||||
const runnerAbort = record(lifecycle.runnerAbort);
|
||||
const fencing = record(lifecycle.fencing);
|
||||
const expectedStages = Array.isArray(lifecycle.expectedStages) ? lifecycle.expectedStages.map(String).filter((value) => value.length > 0) : [];
|
||||
const node = stringOrNull(authority.node);
|
||||
const lane = stringOrNull(authority.lane);
|
||||
const target = node !== null && lane !== null ? `${node}/${lane}` : "-";
|
||||
const lines = ["", "CancelLifecycle:"];
|
||||
lines.push(` Authority: ${displayValue(authority.transport)} policy=${displayValue(authority.policySource)} lane=${target}`);
|
||||
const namespace = stringOrNull(authority.namespace);
|
||||
const deployment = stringOrNull(authority.managerDeployment);
|
||||
if (namespace !== null || deployment !== null) lines.push(` Runtime: ns=${displayValue(namespace)} manager=${displayValue(deployment)}`);
|
||||
const cascadeScope = stringOrNull(lifecycle.cascadeScope);
|
||||
if (cascadeScope !== null) lines.push(` Cascade: ${cascadeScope}`);
|
||||
if (Object.keys(runnerAbort).length > 0) {
|
||||
lines.push(` RunnerAbort: mode=${displayValue(runnerAbort.deliveryMode)} gracefulMs=${displayValue(runnerAbort.gracefulAbortMs)} killMs=${displayValue(runnerAbort.killEscalationMs)}`);
|
||||
}
|
||||
if (Object.keys(fencing).length > 0) {
|
||||
lines.push(` Fencing: cancelEpoch=${displayValue(fencing.cancelEpoch)} staleHeartbeatMs=${displayValue(fencing.staleHeartbeatFencingMs)} lateWrite=${displayValue(fencing.lateWriteFencing)}`);
|
||||
}
|
||||
if (expectedStages.length > 0) lines.push(` Stages: ${expectedStages.join(", ")}`);
|
||||
const terminalAuthority = stringOrNull(lifecycle.terminalAuthority);
|
||||
if (terminalAuthority !== null) lines.push(` Terminal: ${terminalAuthority}`);
|
||||
return lines;
|
||||
}
|
||||
|
||||
function rerunWithoutDryRun(command: string): string {
|
||||
return `bun scripts/cli.ts ${command.replace(/\s+--dry-run\b/gu, "").trim()}`;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user