fix: improve hwlab v02 cli observability

2026-05-31 09:36:50 +00:00
parent ab2369c909
commit a37d2b1374
5 changed files with 283 additions and 8 deletions
@@ -43,7 +43,7 @@ CI/CD、GitOps、rollout、artifact 发布、PR 合并后的 DEV/PROD 滚动、P
 - `artifact-registry plan|render|status|health|install|deploy-backend-core|deploy-service` 管理 D601 host-managed CNCF Distribution registry 的声明、安装、只读检查和 pull-only artifact CD。该 registry 固定为 D601 loopback `127.0.0.1:5000`，由 systemd + Docker Compose 管理，位于 native k3s 故障域外；`deploy-service` 只拉取 CI 已发布的 commit-pinned 镜像、retag/recreate 或导入 native k3s，并做 live commit 验证，不构建 runtime source。`deploy-backend-core` 是 deprecated 兼容名，标准 backend-core prod CD 入口是 `deploy apply --env prod --service backend-core`。长期规则见 `docs/reference/artifact-registry.md`。
 - `commander contract|plan --dry-run|smoke --dry-run|approval request --dry-run|prompt-lint --kind gpt55-pr` 是 host Codex 指挥官直管微服务 skeleton 入口。当前命令返回 `phase=source-contract`、service/API/state/bridge/prompt/trace/#20/#46/ClaudeQQ 审批边界、.state/commander/ 状态模型、dev 无 daemon smoke contract、dry-run 计划和 GPT-5.5 PR prompt 边界辅助 lint，不接 live bridge、不注入 prompt、不发送 ClaudeQQ。`approval request --dry-run` 会生成 200 字以内中文纯文本 ClaudeQQ 审批草案、`notification-path-unavailable` blocker 和授权后唯一可用的 `bun scripts/cli.ts microservice proxy claudeqq /api/push/text --method POST --body-json '<payload>' --raw` 命令；不得提示使用本机 ClaudeQQ skill、powershell 或本地 server。`prompt-lint` 支持 `--prompt-file` 与 `--stdin`，输出 `ok`、`missingClauses`、`riskLevel`、`suggestedPatchSnippet` 且不回显完整 prompt；它是 commander 辅助检查，不是业务 PR 门禁，也不改变 `codex submit` 默认行为。`plan`、`smoke` 与 `approval request` 必须带 `--dry-run`；缺少时返回 `error=dry-run-required`。长期规则见 `docs/reference/host-codex-commander.md`。
 - `hwlab g14 monitor-prs [--once] [--dry-run] [--interval-seconds N] [--max-cycles N] [--timeout-seconds N]` 是当前 HWLAB G14 PR -> CI/CD -> DEV rollout 的一行式入口。普通调用创建 `.state/jobs/` 异步 job 并立刻返回 `job.id`、`statusCommand` 和 stdout/stderr 路径；后台 worker 每轮通过 UniDesk `gh pr list/preflight/merge` 监控 `pikasTech/HWLAB` base=`G14` 的 open PR，ready 时合并，然后通过 UniDesk `ssh G14:k3s` 观察 `hwlab-g14-ci-poll-<short>`、Argo `hwlab-g14-dev` 和 DEV `/health/live`，直到 DEV `Synced/Healthy` 且 Deployment/StatefulSet ready；历史 `Completed` smoke/debug pod 不作为 rollout blocker。每次成功 DEV rollout 后，worker 会定位或创建 #7“指挥简报索引”中的北京日期每日简报 issue，并追加 CI/CD 耗时、CI/CD 关键指标、语义化上线 changelog、自动 diff 摘要、PipelineRun、GitOps revision 和 DEV 验证摘要；关键指标来自 G14 Tekton TaskRun results，固定包含 `lazy build reused: x/y`、reused services、rebuild services 和每个 service 的独立耗时/状态/backend，用于观察 lazy build 机制效果。语义化 changelog 优先从 PR body 的 `## 修改`/`## 变更`/`## Changelog` 等段落提取，diff 摘要只作为文件和统计证据保留，不替代 changelog。也可用 `hwlab g14 record-rollout --pr <number> --source-commit <sha>` 手动补记，手动补记同样会按 PipelineRun 采集 TaskRun 指标。状态指针按用途分离：长期监控只写 `.state/hwlab-g14/latest-monitor-job.json`，`--once` 写 `latest-once-job.json`，`--dry-run` 写 `latest-dry-run-job.json`，`--once --dry-run` 写 `latest-once-dry-run-job.json`，避免一次性收口覆盖持续监控入口。`--once --dry-run` 只做单轮监控和 merge plan，不写 GitHub、不等待 rollout。该命令禁止使用原生 `gh` 或手拼 GitHub 请求；如果 UniDesk `gh` 子命令字段或行为不够，必须先改进 `scripts/src/gh.ts` 后再使用。
- `hwlab g14 control-plane status|apply --lane v02 [--dry-run|--confirm]` 是 HWLAB `v0.2` 加法 lane 的受控 Tekton/Argo 控制面维护入口，只面向 G14 `/root/hwlab-v02`、branch `v0.2`、namespace `hwlab-ci` 和 Argo application `hwlab-g14-v02`；`status` 只读汇总 pipeline、RBAC/ServiceAccount、Argo、当前 commit PipelineRun、最近 PipelineRun 摘要、活跃 PipelineRun 和遗留 v02 CronJob 清理状态，默认只读取必要字段，禁止把完整 PipelineRun spec、Tekton 内联脚本或历史大对象展开到默认输出；`apply` 先在 G14 workspace 快进并执行 render check，再经 `G14:k3s` server-side apply `tekton-v02/rbac.yaml`、`pipeline.yaml`、`argocd/project.yaml` 和 `argocd/application-v02.yaml`，confirmed apply 会删除遗留 v02 CronJob，但不会应用 runtime-v02 workload、Secret 或数据迁移。
+- `hwlab g14 control-plane status|apply --lane v02 [--dry-run|--confirm]` 是 HWLAB `v0.2` 加法 lane 的受控 Tekton/Argo 控制面维护入口，只面向 G14 `/root/hwlab-v02`、branch `v0.2`、namespace `hwlab-ci` 和 Argo application `hwlab-g14-v02`；`status` 只读汇总 pipeline、RBAC/ServiceAccount、Argo、当前 commit PipelineRun、当前 PipelineRun 的 TaskRun 条件摘要、最近 PipelineRun 摘要、活跃 PipelineRun、遗留 v02 CronJob 清理状态，以及 19666/19667 的 Cloud Web 静态资源和 API live 探针。`webAssets` 必须直接给出 `readonly-rpc` 删除、sidebar/workspace/event panel 关键 CSS、`/health/live` 和 API revision；Cloud Web 静态资源变更时允许 `apiRevision` 与 source commit 不同，但不得把这种差异误判成 19666 未发布。默认只读取必要字段，禁止把完整 PipelineRun spec、Tekton 内联脚本、历史大对象或整份 CSS/HTML 展开到默认输出；`apply` 先在 G14 workspace 快进并执行 render check，再经 `G14:k3s` server-side apply `tekton-v02/rbac.yaml`、`pipeline.yaml`、`argocd/project.yaml` 和 `argocd/application-v02.yaml`，confirmed apply 会删除遗留 v02 CronJob，但不会应用 runtime-v02 workload、Secret 或数据迁移。
 - `hwlab g14 control-plane trigger-current --lane v02 [--dry-run|--confirm]` 是 v02 标准手动触发入口：解析当前 `origin/v0.2` full SHA，创建 commit-pinned `hwlab-v02-ci-poll-<short12>` PipelineRun；读 Git 走 `git-mirror-http.devops-infra.svc.cluster.local`，GitOps promotion 写 `git-mirror-write.devops-infra.svc.cluster.local`；confirmed trigger 在删除/创建 PipelineRun 前会先按当前 source commit render 并 server-side apply v02 Tekton RBAC、Pipeline 与 Argo Application，避免 CI/CD 脚本或 runtime-ready 逻辑已合并但集群仍执行旧 Pipeline 定义；同名 PipelineRun 成功或运行中时拒绝重复触发，失败或不存在时才删除旧对象并重新创建。
  创建 PipelineRun 前会读取 `devops-infra` mirror refs，若 `localV02` 未等于当前 source commit，则自动执行一次受控 manual `git-mirror sync` Job 并复核 ref，复核失败时停止触发，避免 Tekton `prepare-source` 已知失败；services 参数只包含 v02 runtime service matrix，`hwlab-cli` 是固定 repo 短连接源码工具，不进入 PipelineRun service build。
  `--dry-run` 只报告是否会 pre-sync，不创建 Job；confirmed trigger 默认创建 `.state/jobs/` 异步 job 并立刻返回 `job.id`、`statusCommand`、stdout/stderr 路径，避免 git mirror pre-sync 或 PipelineRun 创建期间长时间阻塞；`--wait` 路径也必须向 stderr 输出 `hwlab.v02.trigger.progress` JSON 事件，覆盖 `control-plane-refresh`、`git-mirror-pre-sync`、`delete-existing-pipelinerun` 和 `create-pipelinerun`，避免异步 job 长时间只有启动命令而无法判断卡点；默认 JSON 必须对 `manifest_b64`、长脚本和远端 stdout/stderr 做有界摘要，保留长度与 hash，最终 trigger 结果只返回阶段摘要和关键 tail，完整内容通过 job stdout/stderr 文件渐进披露；只有现场同步调试才显式加 `--wait`；旧 `rerun-current` 只作为输入别名保留。
@@ -88,7 +88,7 @@ CI/CD、GitOps、rollout、artifact 发布、PR 合并后的 DEV/PROD 滚动、P
 - `codex interrupt|cancel <taskId>` 通过 Code Queue 私有代理请求中断；running/judging 任务会请求 D601 当前 agent run 停止，queued/retry_wait 任务的取消也必须保持与 WebUI 相同代理路径，返回有界 task 摘要和后续查询命令。任何需要接触 active run 的动作仍属于 D601 执行面。
 - Code Queue 多队列 lane 由 `codex` 命令命名空间管理：`queues [--full|--all] [--limit N] [--page N|--offset N]` 列表、`queue create <queueId>` 创建、`queue merge <sourceQueueId> --into <targetQueueId>` 合并、`move <taskId> --queue <queueId>` 迁移；这些队列管理入口默认由主 server `code-queue-mgr` 直管 PostgreSQL，仍通过稳定 `code-queue` 用户服务代理路径访问。`codex queues` 默认只返回 active/nonempty/unread/runnable queue 摘要、activity、commanderConcurrency、全局 counts 和 execution diagnostics；`--full` 或 `--all` 只切换为完整队列行视图的一页，仍受 `--limit`/`--page`/`--offset` 分页约束，不再默认携带 deprecated full array。summary 和 full 的稳定机读路径都是 `.data.queues.items[]`，全局元数据固定在 `.data.queues.commanderConcurrency`、`.data.queues.activity`、`.data.queues.counts`、`.data.queues.executionDiagnostics`、`.data.queues.activeTaskIds` 和 `.data.queues.queuedTaskIds`；需要完整 upstream 时使用输出中的 raw command。`commanderConcurrency.activeRunnerCount` / `activity.effectiveActiveTaskCount` 是指挥官并发判断的有效活跃数，`schedulerLocalActiveQueueCount`/`activeQueueIds` 只描述本地 scheduler active-run slots，不能覆盖数据库 running 计数或 heartbeat-fresh runner 计数。旧 full 顶层数组语义已作为 deprecated 兼容信息记录，不再作为 `.data.queues` 主形态。同一个 queue 内部串行执行，不同 queue 之间并行执行。迁移只允许尚未被 scheduler claim 的 `queued`/`retry_wait` 任务，必须满足 `startedAt=null`、`currentAttempt=0` 且没有 active thread/turn；已进入 `running`/`judging` 或已有 claim 标记的任务返回 409，不得被 move/merge 回写成 queued。合并会移动可迁移任务归属并自动删除源 queue 记录，只保留合并后的目标 queue；若 source 或 target queue 存在 active/claimed 任务，合并整体返回 409。合并后的目标 queue 按任务原 `queueEnteredAt`/`createdAt` 时间顺序串行，成功迁移 queued/retry_wait 任务后由 D601 scheduler 轮询推进。
 - 所有 `codex` 查询和管理命令必须走与 WebUI 相同的 backend-core 私有代理路径 `/api/microservices/code-queue/proxy/...`；CLI 不得为了提交、移动、中断、取消或队列管理直接调用 D601 内部 Service、数据库、pod curl 或 k3sctl scheduler 子服务。若该路径失败，应先修复 CLI/backend/provider tunnel 链路，而不是绕过控制面。
- `job list [--limit N] [--include-command]` 与 `job status <jobId|latest> [--tail-bytes N]` 查询 `.state/jobs/` 文件系统状态，是异步命令的可观测入口。`job list` 默认只返回最新 50 条摘要；`job status` 默认只返回 stdout/stderr 末尾 12000 字节，并带 `tailPolicy` 与完整日志路径。
+- `job list [--limit N] [--include-command]` 与 `job status <jobId|latest> [--tail-bytes N]` 查询 `.state/jobs/` 文件系统状态，是异步命令的可观测入口。`job list` 默认只返回最新 50 条摘要，并为已知异步工作流返回轻量 `progress.summary` 与后续查询命令；`job status` 默认返回结构化 `progress`、stdout/stderr 末尾 12000 字节、`tailPolicy` 与完整日志路径。已知工作流应从有界日志尾部抽取阶段、关键对象名和下一步命令，避免为了判断当前阶段而手工打开完整 stdout/stderr。
 - `debug health`、`debug dispatch` 与 `debug task` 走真实内部 core、WebSocket、数据库、provider、系统指标、Docker 状态和 Host SSH 维护桥流程，只用于开发调试，不写入 `TEST.md` 的正式验收步骤。
 - `e2e run [--only pattern[,pattern...]] [--skip pattern[,pattern...]]` 使用 publicHost 派生的公开 production frontend/dev frontend/provider ingress URL，并通过 Docker 内网验证 core API、PostgreSQL、provider self-connection、系统指标曲线、Docker 状态快照、provider.upgrade 预检和 Playwright 前端页面，是交付前的自动化 E2E 门禁；CLI 默认输出 check 状态摘要，完整诊断写入 `resultPath`，日常迭代应优先用 `--only` / `--skip` 跑最小必要集合。

@@ -4,7 +4,7 @@ UniDesk 的可观测性优先级高于静默成功。CLI、服务日志、Docker

 ## CLI Logs

-异步 job 的 stdout 和 stderr 位于 `.state/jobs/`。`job list` 默认只返回最新 50 条摘要；`job status` 会返回有限尾部，避免输出爆炸，同时保留完整日志文件路径便于继续排查。实现必须只读取日志尾部字节，不得先把完整 job 日志读入 CLI 内存。
+异步 job 的 stdout 和 stderr 位于 `.state/jobs/`。`job list` 默认只返回最新 50 条摘要，并为已知异步工作流返回轻量 `progress.summary`；`job status` 会返回结构化 `progress` 与有限尾部，避免输出爆炸，同时保留完整日志文件路径便于继续排查。实现必须只读取日志尾部字节，不得先把完整 job 日志读入 CLI 内存；长时命令的阶段、关键对象名和下一步查询命令应优先沉淀到 `progress`，不能要求调用者先阅读完整日志才能知道是否卡在提交、构建、发布或观测阶段。

 ## Service Logs

@@ -80,8 +80,8 @@ export function rootHelp(): unknown {
      { command: "codex steer-confirm <taskId> --steer-id <id> [--raw]", description: "Read-only lookup for a steerId in task trace so deliveryUnconfirmed can be resolved without resending the corrective prompt." },
      { command: "codex interrupt|cancel <taskId>", description: "Request interrupt for a running Code Queue task, or cancel a queued/retry_wait task, through the same private proxy." },
      { command: "codex (queues [--full|--all] | queue create <queueId> | queue merge <sourceQueueId> --into <targetQueueId> | move <taskId> --queue <queueId>)", description: "List low-noise queue summaries by default, including effective activity counts that distinguish scheduler-local queues, DB running tasks, and heartbeat-fresh runners; full queue rows require --full/--all." },
-      { command: "job list [--limit N] [--include-command]", description: "List async jobs from .state/jobs with a bounded default page." },
-      { command: "job status <jobId|latest> [--tail-bytes N]", description: "Show job state with bounded stdout/stderr tails." },
+      { command: "job list [--limit N] [--include-command]", description: "List async jobs from .state/jobs with a bounded default page and progress summaries." },
+      { command: "job status <jobId|latest> [--tail-bytes N]", description: "Show job state with a structured progress summary and bounded stdout/stderr tails." },
      { command: "debug health", description: "Probe internal core, nodes, system/Docker status, frontend, provider ingress, and public boundary." },
      { command: "debug dispatch [providerId] [docker.ps|provider.upgrade|host.ssh|microservice.http|echo] [--wait-ms N]", description: "Submit a real internal-core dispatch request for CLI debugging." },
      { command: "debug task <taskId|latest>", description: "Read a dispatched task record from internal core for CLI debugging." },
@@ -481,7 +481,7 @@ function jobHelp(): unknown {
      "bun scripts/cli.ts job list [--limit N] [--include-command]",
      "bun scripts/cli.ts job status <jobId|latest> [--tail-bytes N]",
    ],
-    description: "Inspect fire-and-forget job state from .state/jobs without streaming unbounded logs.",
+    description: "Inspect fire-and-forget job state from .state/jobs with structured progress summaries and bounded log tails.",
  };
 }

@@ -43,6 +43,8 @@ const V02_SERVICE_IDS = [
  "hwlab-edge-proxy",
  "hwlab-agent-skills",
 ];
+const V02_CLOUD_WEB_URL = "http://74.48.78.17:19666";
+const V02_CLOUD_API_URL = "http://74.48.78.17:19667";

 export function v02PipelineServiceIds(): string[] {
  return [...V02_SERVICE_IDS];
@@ -712,11 +714,152 @@ function v02ControlPlaneStatusBundle(sourceCommit: string | null | undefined): C
    `section obsoleteCronJobs kubectl get cronjob -n ${shellQuote(CI_NAMESPACE)} ${shellQuote(V02_POLLER)} ${shellQuote(V02_RECONCILER)} --ignore-not-found -o name`,
    `section argo kubectl get application -n ${shellQuote(ARGO_NAMESPACE)} ${shellQuote(V02_APP)} -o 'jsonpath={.spec.source.targetRevision}{"\\n"}{.spec.source.path}{"\\n"}{.status.sync.revision}{"\\n"}{.status.sync.status}{"\\n"}{.status.health.status}{"\\n"}'`,
    `if [ -n "$pipeline_run" ]; then section pipelineRun kubectl get pipelinerun -n ${shellQuote(CI_NAMESPACE)} "$pipeline_run" -o 'jsonpath={.status.conditions[0].status}{"\\n"}{.status.conditions[0].reason}{"\\n"}{.status.conditions[0].message}{"\\n"}'; else section pipelineRun sh -c 'true'; fi`,
+    `if [ -n "$pipeline_run" ]; then section taskRuns kubectl get taskrun -n ${shellQuote(CI_NAMESPACE)} -l "tekton.dev/pipelineRun=$pipeline_run" -o 'jsonpath={range .items[*]}{.metadata.name}{"\\t"}{.status.conditions[0].status}{"\\t"}{.status.conditions[0].reason}{"\\t"}{.status.startTime}{"\\t"}{.status.completionTime}{"\\n"}{end}'; else section taskRuns sh -c 'true'; fi`,
    `section recentPipelineRuns kubectl get pipelinerun -n ${shellQuote(CI_NAMESPACE)} -l hwlab.pikastech.local/gitops-target=v02 -o ${shellQuote(pipelineRunRowsJsonPath())}`,
+    `section webAssets sh -c ${shellQuote(v02WebAssetsProbeScript())}`,
  ].join("\n");
  return g14K3s(["script", "--", script], 60_000);
 }

+function v02WebAssetsProbeScript(): string {
+  return [
+    "set +e",
+    `base=${shellQuote(V02_CLOUD_WEB_URL)}`,
+    `api=${shellQuote(V02_CLOUD_API_URL)}`,
+    "fetch_url() {",
+    "  if command -v curl >/dev/null 2>&1; then",
+    "    curl -fsS --connect-timeout 2 --max-time 5 \"$1\"",
+    "  elif command -v wget >/dev/null 2>&1; then",
+    "    wget -q -T 5 -O - \"$1\"",
+    "  else",
+    "    return 127",
+    "  fi",
+    "}",
+    "printf 'baseUrl\\t%s\\n' \"$base\"",
+    "printf 'apiUrl\\t%s\\n' \"$api\"",
+    "html=$(fetch_url \"$base/\" 2>/dev/null)",
+    "html_code=$?",
+    "printf 'htmlOk\\t%s\\n' \"$html_code\"",
+    "printf 'readonlyNote\\t%s\\n' \"$(printf '%s' \"$html\" | grep -Eiq 'readonly-rpc|复核入口'; printf '%s' \"$?\")\"",
+    "css=$(fetch_url \"$base/styles.css\" 2>/dev/null)",
+    "css_code=$?",
+    "printf 'cssOk\\t%s\\n' \"$css_code\"",
+    "printf 'sidebarFitCss\\t%s\\n' \"$(printf '%s' \"$css\" | grep -Eq 'grid-template-rows:[[:space:]]*auto[[:space:]]+auto[[:space:]]+auto[[:space:]]+auto[[:space:]]+minmax\\(0,[[:space:]]*1fr\\)'; printf '%s' \"$?\")\"",
+    "printf 'workspaceFitCss\\t%s\\n' \"$(printf '%s' \"$css\" | grep -Eq 'grid-template-rows:[[:space:]]*auto[[:space:]]+minmax\\(0,[[:space:]]*1fr\\)'; printf '%s' \"$?\")\"",
+    "printf 'eventPanelFitCss\\t%s\\n' \"$(printf '%s' \"$css\" | grep -Eq 'grid-template-rows:[[:space:]]*auto[[:space:]]+minmax\\(132px,[[:space:]]*1fr\\)'; printf '%s' \"$?\")\"",
+    "health=$(fetch_url \"$api/health/live\" 2>/dev/null)",
+    "health_code=$?",
+    "printf 'apiHealthOk\\t%s\\n' \"$health_code\"",
+    "printf 'apiRevision\\t%s\\n' \"$(printf '%s' \"$health\" | sed -n 's/.*\"revision\"[[:space:]]*:[[:space:]]*\"\\([0-9A-Za-z._-]*\\)\".*/\\1/p' | head -1)\"",
+  ].join("\n");
+}
+
+function taskRunsCompactFromText(text: string, commandOk: boolean, pipelineRun: string | null, exitCode: number | null, stderr: string): Record<string, unknown> {
+  if (!commandOk) {
+    return {
+      ok: false,
+      pipelineRun,
+      exitCode,
+      stderr: stderr.trim().slice(0, 2000),
+      counts: { succeeded: 0, failed: 0, running: 0, unknown: 0 },
+      items: [],
+    };
+  }
+  const items = text
+    .split(/\r?\n/u)
+    .map((line) => line.trim())
+    .filter(Boolean)
+    .map((line) => {
+      const [name = "", status = "", reason = "", startTime = "", completionTime = ""] = line.split("\t");
+      return {
+        name,
+        status: status || null,
+        reason: reason || null,
+        startTime: startTime || null,
+        completionTime: completionTime || null,
+        durationSeconds: secondsBetween(startTime, completionTime),
+      };
+    });
+  const counts = {
+    succeeded: items.filter((item) => item.status === "True").length,
+    failed: items.filter((item) => item.status === "False").length,
+    running: items.filter((item) => item.status === "Unknown").length,
+    unknown: items.filter((item) => item.status !== "True" && item.status !== "False" && item.status !== "Unknown").length,
+  };
+  return {
+    ok: true,
+    pipelineRun,
+    counts,
+    items,
+    summary: `taskruns succeeded=${counts.succeeded} failed=${counts.failed} running=${counts.running} unknown=${counts.unknown}`,
+    disclosure: items.length > 0 ? "complete taskrun condition summary" : "no taskruns observed yet",
+  };
+}
+
+function v02WebAssetsFromText(text: string, commandOk: boolean, sourceCommit: string | null, argoSyncRevision: string | null, exitCode: number | null, stderr: string): Record<string, unknown> {
+  const fields: Record<string, string> = {};
+  for (const line of text.split(/\r?\n/u)) {
+    const [key = "", ...rest] = line.split("\t");
+    if (key.length > 0) fields[key] = rest.join("\t");
+  }
+  const htmlOk = fields.htmlOk === "0";
+  const cssOk = fields.cssOk === "0";
+  const apiHealthOk = fields.apiHealthOk === "0";
+  const readonlyNoteAbsent = fields.readonlyNote === "1";
+  const sidebarFitCss = fields.sidebarFitCss === "0";
+  const workspaceFitCss = fields.workspaceFitCss === "0";
+  const eventPanelFitCss = fields.eventPanelFitCss === "0";
+  const apiRevision = fields.apiRevision || null;
+  const webChecksPass = htmlOk && cssOk && readonlyNoteAbsent && sidebarFitCss && workspaceFitCss && eventPanelFitCss && apiHealthOk;
+  const failedChecks = Object.entries({
+    htmlOk,
+    cssOk,
+    readonlyNoteAbsent,
+    sidebarFitCss,
+    workspaceFitCss,
+    eventPanelFitCss,
+    apiHealthOk,
+  }).filter(([, ok]) => !ok).map(([name]) => name);
+  return {
+    ok: commandOk && webChecksPass,
+    summary: commandOk && webChecksPass ? "19666/19667 probes passed" : `19666/19667 probe issues: ${failedChecks.join(", ") || "command failed"}`,
+    baseUrl: fields.baseUrl || V02_CLOUD_WEB_URL,
+    apiUrl: fields.apiUrl || V02_CLOUD_API_URL,
+    sourceCommit,
+    argoSyncRevision: argoSyncRevision || null,
+    checks: {
+      htmlOk,
+      cssOk,
+      readonlyNoteAbsent,
+      sidebarFitCss,
+      workspaceFitCss,
+      eventPanelFitCss,
+      apiHealthOk,
+    },
+    probeExitCodes: {
+      html: numericField(fields.htmlOk),
+      css: numericField(fields.cssOk),
+      readonlyNoteGrep: numericField(fields.readonlyNote),
+      sidebarFitCssGrep: numericField(fields.sidebarFitCss),
+      workspaceFitCssGrep: numericField(fields.workspaceFitCss),
+      eventPanelFitCssGrep: numericField(fields.eventPanelFitCss),
+      apiHealth: numericField(fields.apiHealthOk),
+    },
+    apiRevision,
+    note: apiRevision && sourceCommit && apiRevision !== sourceCommit
+      ? "cloud-api image revision can differ when a change only republishes Cloud Web static assets; use webAssets.checks for 19666 frontend asset readiness."
+      : null,
+    exitCode,
+    stderr: commandOk ? "" : stderr.trim().slice(0, 2000),
+  };
+}
+
+function numericField(value: string | undefined): number | null {
+  if (value === undefined || value.trim().length === 0) return null;
+  const parsed = Number(value);
+  return Number.isFinite(parsed) ? parsed : null;
+}
+
 function listV02PipelineRunsCompactFromText(text: string, commandOk: boolean, command: string[] | string, exitCode: number | null, stderr: string, limit = 8, nowMs = Date.now()): Record<string, unknown> {
  if (!commandOk) {
    return {
@@ -1184,6 +1327,8 @@ function v02ControlPlaneStatus(sourceCommitInput?: string | null): Record<string
  const obsoleteCronJobs = sections.obsoleteCronJobs;
  const argo = sections.argo;
  const pipelineRunSection = sections.pipelineRun;
+  const taskRunsSection = sections.taskRuns;
+  const webAssetsSection = sections.webAssets;
  const recentPipelineRuns = listV02PipelineRunsCompactFromText(
    sections.recentPipelineRuns?.stdout ?? "",
    shellSectionOk(sections.recentPipelineRuns),
@@ -1237,6 +1382,21 @@ function v02ControlPlaneStatus(sourceCommitInput?: string | null): Record<string
        pipelineRunSection?.exitCode ?? null,
        bundle.stderr,
      ),
+    taskRuns: taskRunsCompactFromText(
+      taskRunsSection?.stdout ?? "",
+      shellSectionOk(taskRunsSection),
+      pipelineRun,
+      taskRunsSection?.exitCode ?? null,
+      bundle.stderr,
+    ),
+    webAssets: v02WebAssetsFromText(
+      webAssetsSection?.stdout ?? "",
+      shellSectionOk(webAssetsSection),
+      sourceCommit,
+      syncRevision,
+      webAssetsSection?.exitCode ?? null,
+      bundle.stderr,
+    ),
    activePipelineRuns,
    recentPipelineRuns,
    query: {
@@ -24,6 +24,19 @@ export interface JobRecord {
  note: string;
 }

+export interface JobProgressSummary {
+  kind: "hwlab-v02-trigger" | "generic";
+  stage: string | null;
+  stageStatus: string | null;
+  sourceCommit: string | null;
+  pipelineRun: string | null;
+  pipelineCreated: boolean | null;
+  lastEventAt: string | null;
+  eventsObserved: number;
+  summary: string;
+  nextCommand: string | null;
+}
+
 export interface StartJobOptions {
  runner?: "local" | "docker";
  dockerImage?: string;
@@ -142,6 +155,7 @@ export async function runJob(id: string): Promise<JobRecord> {
 }

 export function jobWithTail(job: JobRecord, maxBytes = 12000): JobRecord & {
+  progress: JobProgressSummary;
  tailPolicy: {
    requestedTailBytes: number;
    stdoutBytes: number;
@@ -155,8 +169,12 @@ export function jobWithTail(job: JobRecord, maxBytes = 12000): JobRecord & {
 } {
  const stdoutBytes = existsSync(job.stdoutFile) ? statSync(job.stdoutFile).size : 0;
  const stderrBytes = existsSync(job.stderrFile) ? statSync(job.stderrFile).size : 0;
+  const progressTailBytes = Math.max(maxBytes, 96_000);
+  const stdoutProgressTail = tailFile(job.stdoutFile, progressTailBytes);
+  const stderrProgressTail = tailFile(job.stderrFile, progressTailBytes);
  return {
    ...job,
+    progress: summarizeJobProgress(job, progressTailBytes, { stdoutTail: stdoutProgressTail, stderrTail: stderrProgressTail }),
    tailPolicy: {
      requestedTailBytes: maxBytes,
      stdoutBytes,
@@ -165,11 +183,107 @@ export function jobWithTail(job: JobRecord, maxBytes = 12000): JobRecord & {
      stderrTruncated: stderrBytes > maxBytes,
      fullLogPaths: { stdoutFile: job.stdoutFile, stderrFile: job.stderrFile },
    },
-    stdoutTail: tailFile(job.stdoutFile, maxBytes),
-    stderrTail: tailFile(job.stderrFile, maxBytes),
+    stdoutTail: tailTextByBytes(stdoutProgressTail, maxBytes),
+    stderrTail: tailTextByBytes(stderrProgressTail, maxBytes),
  };
 }

+function summarizeJobProgress(job: JobRecord, maxBytes = 96_000, tails?: { stdoutTail: string; stderrTail: string }): JobProgressSummary {
+  const knownWorkflow = job.name === "hwlab_g14_v02_trigger_current";
+  if (!knownWorkflow && tails === undefined) return genericJobProgress(job);
+  const progressTailBytes = Math.max(4096, Math.floor(maxBytes));
+  const stderrTail = tails?.stderrTail ?? tailFile(job.stderrFile, progressTailBytes);
+  const stdoutTail = tails?.stdoutTail ?? tailFile(job.stdoutFile, progressTailBytes);
+  const events = parseJsonLineEvents(stderrTail, "hwlab.v02.trigger.progress");
+  const lastEvent = events.at(-1) ?? {};
+  const stage = stringField(lastEvent.stage);
+  const stageStatus = stringField(lastEvent.status);
+  const sourceCommit = stringField(lastEvent.sourceCommit) ?? firstMatch(stdoutTail, /"sourceCommit"\s*:\s*"([0-9a-f]{40})"/iu);
+  const pipelineRun = stringField(lastEvent.pipelineRun) ?? firstMatch(stdoutTail, /"pipelineRun"\s*:\s*"([^"]+)"/u);
+  const pipelineCreated = /pipelinerun\.tekton\.dev\/[^ \n]+ created/u.test(stdoutTail)
+    ? true
+    : stage === "create-pipelinerun" && stageStatus === "failed"
+      ? false
+      : null;
+  const lastEventAt = stringField(lastEvent.at);
+  const kind = events.length > 0 || knownWorkflow ? "hwlab-v02-trigger" : "generic";
+  const nextCommand = pipelineRun
+    ? `bun scripts/cli.ts hwlab g14 control-plane status --lane v02`
+    : job.status === "running"
+      ? `bun scripts/cli.ts job status ${job.id} --tail-bytes 12000`
+      : null;
+  const summary = kind === "hwlab-v02-trigger"
+    ? [
+        job.status,
+        stage ? `${stage}${stageStatus ? `:${stageStatus}` : ""}` : "stage:unknown",
+        sourceCommit ? `source=${sourceCommit.slice(0, 12)}` : null,
+        pipelineRun ? `pipelineRun=${pipelineRun}` : null,
+        pipelineCreated === true ? "created" : pipelineCreated === false ? "create-failed" : null,
+      ].filter(Boolean).join(" ")
+    : `${job.status}${job.exitCode === null ? "" : ` exit=${job.exitCode}`}`;
+  return {
+    kind,
+    stage,
+    stageStatus,
+    sourceCommit,
+    pipelineRun,
+    pipelineCreated,
+    lastEventAt,
+    eventsObserved: events.length,
+    summary,
+    nextCommand,
+  };
+}
+
+function genericJobProgress(job: JobRecord): JobProgressSummary {
+  return {
+    kind: "generic",
+    stage: null,
+    stageStatus: null,
+    sourceCommit: null,
+    pipelineRun: null,
+    pipelineCreated: null,
+    lastEventAt: null,
+    eventsObserved: 0,
+    summary: `${job.status}${job.exitCode === null ? "" : ` exit=${job.exitCode}`}`,
+    nextCommand: job.status === "running" ? `bun scripts/cli.ts job status ${job.id} --tail-bytes 12000` : null,
+  };
+}
+
+function tailTextByBytes(text: string, maxBytes: number): string {
+  const safeMaxBytes = Math.max(0, Math.floor(maxBytes));
+  if (safeMaxBytes === 0) return "";
+  const buffer = Buffer.from(text, "utf8");
+  if (buffer.length <= safeMaxBytes) return text;
+  return buffer.subarray(buffer.length - safeMaxBytes).toString("utf8");
+}
+
+function parseJsonLineEvents(text: string, eventName: string): Record<string, unknown>[] {
+  const events: Record<string, unknown>[] = [];
+  for (const line of text.split(/\r?\n/u)) {
+    const trimmed = line.trim();
+    if (!trimmed.startsWith("{")) continue;
+    try {
+      const parsed = JSON.parse(trimmed) as unknown;
+      if (typeof parsed === "object" && parsed !== null && !Array.isArray(parsed) && (parsed as Record<string, unknown>).event === eventName) {
+        events.push(parsed as Record<string, unknown>);
+      }
+    } catch {
+      // Ignore non-JSON stderr lines; the raw tail remains available in stderrTail.
+    }
+  }
+  return events;
+}
+
+function stringField(value: unknown): string | null {
+  return typeof value === "string" && value.trim().length > 0 ? value.trim() : null;
+}
+
+function firstMatch(text: string, pattern: RegExp): string | null {
+  const match = pattern.exec(text);
+  return typeof match?.[1] === "string" && match[1].length > 0 ? match[1] : null;
+}
+
 export interface JobListOptions {
  limit?: number;
  includeCommand?: boolean;
@@ -182,6 +296,7 @@ export function listJobsSummary(options: JobListOptions = {}): unknown {
    id: job.id,
    name: job.name,
    status: job.status,
+    progress: summarizeJobProgress(job, 32_000),
    runner: job.runner,
    runnerPid: job.runnerPid ?? null,
    runnerContainer: job.runnerContainer ?? null,