diff --git a/.agents/skills/unidesk-cicd/SKILL.md b/.agents/skills/unidesk-cicd/SKILL.md index 0f133380..257d73fb 100644 --- a/.agents/skills/unidesk-cicd/SKILL.md +++ b/.agents/skills/unidesk-cicd/SKILL.md @@ -16,6 +16,7 @@ bun scripts/cli.ts hwlab g14 control-plane trigger-current --lane v02 --confirm bun scripts/cli.ts hwlab g14 git-mirror status --lane v02 bun scripts/cli.ts agentrun control-plane status bun scripts/cli.ts cicd branch-follower status +bun scripts/cli.ts cicd branch-follower debug-step --follower web-probe-sentinel-master --step state-read ``` 按职责读取拆分后的 reference: @@ -42,6 +43,7 @@ bun scripts/cli.ts cicd branch-follower status - node-scoped `trigger-current --wait` 必须把 source sync、pre/post flush、PipelineRun、GitOps/Argo、runtime readiness 和 `/health` closeout 放进同一 120s 端到端预算;超预算时由 CLI 输出阶段分解、Argo target revision、runtime/public 状态和 TaskRun/Pod drill-down,不继续死等,也不要求操作者手动串联多个状态/flush 命令才能完成一次交付。 - 触发或验收 rollout 时必须绑定 lane、source commit、PipelineRun/GitOps revision、runtime ready 和 `/health` 端点验证结果;web-probe/Playwright 结果只能作为单独的 post-deploy 证据。 - CI/CD 状态、日志和事件查询必须减少 trans/SSH 传输:能在目标 NODE/k8s 内解析、聚合、裁剪的内容,必须在目标侧计算成短 JSON/table 摘要后再回传;禁止为了本地解析而把完整 ConfigMap、大对象、长日志或原始 API payload 透传回来。 +- branch-follower 排障必须优先使用 `debug-step` 做单步调试:`state-read`、`status-read`、`decide`、`state-write` 分别定位状态读取、K8s native status、决策和 ConfigMap patch,不要通过反复推小提交触发整条自动跟随大回环来定位同一问题。 - CI/CD 验证、测试和性能度量必须在目标 NODE/k8s 内执行,尤其是 branch-follower、Tekton/Argo、runtime reuse/env reuse、git mirror 和 runtime-ready 相关改动;不要在 master/local host 跑 test 或用本地验证结果替代目标运行面证据。本机只用于源码阅读、编辑和必要静态语法检查,正式收敛结论必须来自目标 NODE 计算出的短摘要。 - 一旦发现 CI/CD CLI 被误用且可能写入错误状态、产生伪证据或绕过目标运行面,必须立刻先把用法改成更符合直觉的公开入口并更新本 skill/reference,再继续验证或交付;不要只靠口头记忆、隐藏 flag、手动约定或后续小心来避免复发。内部 in-cluster 模式必须只由目标 k8s Job/Pod 调用,操作者从本机只能用公开入口提交目标侧 Job 或读取目标侧摘要。 - Secret 只通过 YAML sourceRef/targetKey 和受控 CLI 下发;输出只披露 presence/fingerprint。 diff --git a/.agents/skills/unidesk-cicd/references/branch-follower.md b/.agents/skills/unidesk-cicd/references/branch-follower.md index 5b14fd93..c3a5a6fb 100644 --- a/.agents/skills/unidesk-cicd/references/branch-follower.md +++ b/.agents/skills/unidesk-cicd/references/branch-follower.md @@ -11,12 +11,25 @@ bun scripts/cli.ts cicd branch-follower status bun scripts/cli.ts cicd branch-follower status --live bun scripts/cli.ts cicd branch-follower run-once --all --dry-run bun scripts/cli.ts cicd branch-follower run-once --follower --confirm --wait +bun scripts/cli.ts cicd branch-follower debug-step --follower --step state-read +bun scripts/cli.ts cicd branch-follower debug-step --follower --step status-read +bun scripts/cli.ts cicd branch-follower debug-step --follower --step decide +bun scripts/cli.ts cicd branch-follower debug-step --follower --step state-write --confirm bun scripts/cli.ts cicd branch-follower events --follower bun scripts/cli.ts cicd branch-follower logs --follower ``` `apply --confirm --wait` is the one-command deploy/update entry for the K8s controller. `status` is the default intermediate-state query. `status --live` and local `run-once` submit a bounded K8s reconcile Job; the Job performs all source, Tekton, Argo and runtime reads inside the cluster and may write only the compact state summary. `events` and `logs` are read-only drill-downs for the same Kubernetes-native state. `run-once --confirm --wait` is the manual one-command trigger and closeout path. +`debug-step` is the required single-step troubleshooting entry before changing branch-follower code for repeated CI/CD convergence issues. It runs in a bounded target-side Job when called from the operator host, and uses the same controller modules as the real flow: + +- `state-read`: read only the compact ConfigMap state, value bytes, resourceVersion and `_updatedAt`. +- `status-read`: read native source/Tekton/Argo/runtime status without triggering adapters. +- `decide`: run the decision function in dry-run mode without triggering adapters or writing state. +- `state-write --confirm`: patch the stored follower state back through the normal ConfigMap write helper and report before/after resourceVersion; this is for isolating state write failures, not for normal rollout. + +Do not debug the same state/read/write problem by repeatedly pushing empty or tiny source commits to drive the full automatic follower loop. + ## Source Authority - Follower decisions must not read host source worktrees, target dev directories, `.worktree/*`, local git state, or direct GitHub branch refs. @@ -67,6 +80,8 @@ Default `status` output must show follower id, phase, adapter, source branch + o Stage timing must be queryable through normal CLI output, not only raw JSON. `status` and `run-once` print a bounded `STAGE TIMINGS` table with `total`, `status-read`, git-mirror, Kubernetes Job, PipelineRun, TaskRun, Argo, runtime and closeout rows when available. `followers[].timings` remains available in `--raw`/JSON for machine consumers. +`run-once` also prints a bounded `STATE WRITES` table whenever it writes follower state. The table must include follower id, write status, before/after ConfigMap resourceVersion, whether timing was preserved, exit code and a short message. Missing write evidence is a visibility defect; use `debug-step --step state-write` before any further full-loop validation. + `timings.totalSeconds` is the authoritative end-to-end wall-clock measurement for a triggered run: measure from `timings.startedAt` until `timings.finishedAt`, or until query time while closeout is still running. Do not compute total by summing stage rows, because stage rows can overlap, omit external waiting, or be reported by different native objects. Do not backfill, infer, or migrate old branch-follower state when historical timing, stage timing, or other observability fields are missing or known to be unreliable. Compatibility starts with future state written by the current controller; old missing data must render as `-`/unknown in CLI output instead of being recovered from unrelated native objects. diff --git a/scripts/native/cicd/patch-follower-state.mjs b/scripts/native/cicd/patch-follower-state.mjs index 4fecfe5b..024859f7 100644 --- a/scripts/native/cicd/patch-follower-state.mjs +++ b/scripts/native/cicd/patch-follower-state.mjs @@ -93,6 +93,8 @@ function preserveExistingTiming(state, existing) { ensureConfigMap(); const current = readConfigMap(); +const beforeResourceVersion = stringOrNull(current?.metadata?.resourceVersion); +const beforeUpdatedAt = stringOrNull(current?.data?._updatedAt); const currentText = current?.data?.[followerId]; const existing = typeof currentText === "string" && currentText.length > 0 ? JSON.parse(currentText) : null; const incomingState = JSON.parse(stateJson); @@ -105,4 +107,15 @@ const patch = { }, }; kubectl(["patch", "configmap", configMap, "--type", "merge", "-p", JSON.stringify(patch)]); -process.stdout.write(JSON.stringify({ ok: true, followerId, preservedTiming: state !== incomingState, statusAuthority: "target-node-summary", parsedDownstreamCliOutput: false })); +const updated = readConfigMap(); +process.stdout.write(JSON.stringify({ + ok: true, + followerId, + preservedTiming: state !== incomingState, + beforeResourceVersion, + afterResourceVersion: stringOrNull(updated?.metadata?.resourceVersion), + beforeUpdatedAt, + afterUpdatedAt: stringOrNull(updated?.data?._updatedAt), + statusAuthority: "target-node-summary", + parsedDownstreamCliOutput: false, +})); diff --git a/scripts/native/cicd/read-state-summary.mjs b/scripts/native/cicd/read-state-summary.mjs index b1acbfd4..dcaa931d 100644 --- a/scripts/native/cicd/read-state-summary.mjs +++ b/scripts/native/cicd/read-state-summary.mjs @@ -161,6 +161,13 @@ if (!result.ok) errors.push(result.error); process.stdout.write(JSON.stringify({ ok: result.ok && errors.length === 0, present: result.present, + metadata: result.object === null ? null : { + name: stringOrNull(result.object?.metadata?.name), + namespace: stringOrNull(result.object?.metadata?.namespace), + resourceVersion: stringOrNull(result.object?.metadata?.resourceVersion), + updatedAt: stringOrNull(result.object?.data?._updatedAt), + keyCount: Object.keys(recordOrNull(result.object?.data) || {}).length, + }, stateByFollower, valueBytes, errors, diff --git a/scripts/src/cicd-controller-render.ts b/scripts/src/cicd-controller-render.ts index 1e9deab0..4c52a991 100644 --- a/scripts/src/cicd-controller-render.ts +++ b/scripts/src/cicd-controller-render.ts @@ -4,11 +4,11 @@ import { createHash } from "node:crypto"; import { readFileSync } from "node:fs"; import { rootPath } from "./config"; import { shQuote } from "./platform-infra-ops-library"; -import type { BranchFollowerRegistry, ParsedOptions } from "./cicd-types"; +import type { BranchFollowerDebugStep, BranchFollowerRegistry, ParsedOptions } from "./cicd-types"; const SPEC_REF = "PJ2026-01060703"; -export function renderControllerReconcileJob(registry: BranchFollowerRegistry, options: ParsedOptions, jobName: string, mode: { dryRun: boolean; recordState: boolean }, timeoutSeconds: number): Record { +export function renderControllerReconcileJob(registry: BranchFollowerRegistry, options: ParsedOptions, jobName: string, mode: { dryRun: boolean; wait?: boolean; recordState: boolean }, timeoutSeconds: number): Record { const labels = { ...registry.controller.labels, "app.kubernetes.io/component": "cicd-reconcile-job" }; const commandArgs = [ "bun", @@ -74,6 +74,75 @@ export function renderControllerReconcileJob(registry: BranchFollowerRegistry, o }; } +export function renderControllerDebugJob(registry: BranchFollowerRegistry, options: ParsedOptions, jobName: string, step: BranchFollowerDebugStep, timeoutSeconds: number): Record { + if (options.followerId === null) throw new Error("debug-step target job requires --follower "); + const labels = { ...registry.controller.labels, "app.kubernetes.io/component": "cicd-debug-job" }; + const commandArgs = [ + "bun", + "scripts/cli.ts", + "cicd", + "branch-follower", + "debug-step", + "--follower", + options.followerId, + "--step", + step, + options.confirm ? "--confirm" : "--dry-run", + "--in-cluster", + "--config", + "config/cicd-branch-followers.yaml", + "--timeout-seconds", + String(timeoutSeconds), + "--json", + ]; + return { + apiVersion: "batch/v1", + kind: "Job", + metadata: { name: jobName, namespace: registry.controller.namespace, labels }, + spec: { + backoffLimit: registry.controller.budgets.reconcileJobBackoffLimit, + ttlSecondsAfterFinished: registry.controller.budgets.reconcileJobTtlSeconds, + activeDeadlineSeconds: timeoutSeconds + registry.controller.budgets.reconcileJobDeadlineGraceSeconds, + template: { + metadata: { labels }, + spec: { + restartPolicy: "Never", + serviceAccountName: registry.controller.serviceAccountName, + volumes: [ + { name: "registry", configMap: { name: registry.controller.configMapName, defaultMode: 0o755 } }, + { name: "git-mirror-cache", persistentVolumeClaim: { claimName: registry.controller.source.gitMirrorCachePvcName } }, + { name: "git-ssh", secret: { secretName: registry.controller.source.githubSsh.secretName, defaultMode: 0o400 } }, + { name: "work", emptyDir: {} }, + ], + containers: [ + { + name: "debug", + image: registry.controller.image, + imagePullPolicy: "IfNotPresent", + command: ["/bin/sh", "/etc/unidesk-cicd-branch-follower/controller-one-shot.sh"], + args: commandArgs, + env: [ + { name: "UNIDESK_CONTROLLER_SOURCE_BRANCH", value: registry.controller.source.branch }, + { name: "UNIDESK_CONTROLLER_SOURCE_REPOSITORY", value: registry.controller.source.repository }, + { name: "UNIDESK_CONTROLLER_SOURCE_SNAPSHOT_PREFIX", value: registry.controller.source.sourceSnapshot.stageRefPrefix.replaceAll("{branch}", registry.controller.source.branch) }, + { name: "UNIDESK_CONTROLLER_GITHUB_SSH_PRIVATE_KEY", value: `/git-ssh/${registry.controller.source.githubSsh.privateKeySecretKey}` }, + { name: "UNIDESK_CONTROLLER_GITHUB_PROXY_HOST", value: registry.controller.source.githubSsh.proxyHost }, + { name: "UNIDESK_CONTROLLER_GITHUB_PROXY_PORT", value: String(registry.controller.source.githubSsh.proxyPort) }, + ], + volumeMounts: [ + { name: "registry", mountPath: "/etc/unidesk-cicd-branch-follower", readOnly: true }, + { name: "git-mirror-cache", mountPath: "/cache" }, + { name: "git-ssh", mountPath: "/git-ssh", readOnly: true }, + { name: "work", mountPath: "/work" }, + ], + }, + ], + }, + }, + }, + }; +} + export function waitForJobShell(namespace: string, jobName: string, timeoutSeconds: number): string { return [ `NAMESPACE=${shQuote(namespace)}`, diff --git a/scripts/src/cicd-debug.ts b/scripts/src/cicd-debug.ts new file mode 100644 index 00000000..2219af95 --- /dev/null +++ b/scripts/src/cicd-debug.ts @@ -0,0 +1,309 @@ +// SPEC: PJ2026-01060703 CI/CD branch follower debug steps. +// Responsibility: bounded single-step debugging for branch follower state and decision paths. +import type { CommandResult } from "./command"; +import type { AdapterSummary, BranchFollowerDebugStep, BranchFollowerRegistry, FollowerSpec, FollowerState, K8sStateRead, ParsedOptions } from "./cicd-types"; +import { renderControllerDebugJob, waitForJobShell } from "./cicd-controller-render"; +import { redactText, shQuote } from "./platform-infra-ops-library"; + +type KubeScriptRunner = (registry: BranchFollowerRegistry, options: ParsedOptions, script: string, input: string, timeoutMs: number) => CommandResult; + +export interface CicdDebugDeps { + selectFollowers(registry: BranchFollowerRegistry, options: ParsedOptions, opts: { includeDisabled: boolean }): FollowerSpec[]; + readK8sState(registry: BranchFollowerRegistry, options: ParsedOptions): K8sStateRead; + readAdapterStatus(registry: BranchFollowerRegistry, follower: FollowerSpec, options: ParsedOptions): Promise; + decideAndMaybeTrigger(registry: BranchFollowerRegistry, follower: FollowerSpec, previous: Record, live: AdapterSummary, options: ParsedOptions): Promise; + writeFollowerState(registry: BranchFollowerRegistry, state: FollowerState, options: ParsedOptions): CommandResult; + runKubeScript: KubeScriptRunner; +} + +export async function buildDebugStep(registry: BranchFollowerRegistry, options: ParsedOptions, deps: CicdDebugDeps): Promise> { + const step = options.debugStep ?? "state-read"; + if (options.followerId === null) throw new Error("debug-step requires --follower "); + if (!options.inCluster) return runTargetDebugStepJob(registry, options, step, deps); + + const selected = deps.selectFollowers(registry, options, { includeDisabled: true }); + if (selected.length !== 1) throw new Error("debug-step operates on exactly one follower"); + const follower = selected[0] as FollowerSpec; + const before = deps.readK8sState(registry, options); + const previous = before.stateByFollower[follower.id] ?? {}; + let live: AdapterSummary | null = null; + let decided: FollowerState | null = null; + let write: Record | null = null; + let after: K8sStateRead | null = null; + + if (step === "status-read" || step === "decide") { + live = await deps.readAdapterStatus(registry, follower, options); + } + if (step === "decide") { + decided = await deps.decideAndMaybeTrigger(registry, follower, previous, live as AdapterSummary, debugDecisionOptions(options)); + } + if (step === "state-write") { + const writeInput = stateWriteInput(previous); + if (writeInput === null) { + return { + ok: false, + action: "debug-step", + step, + follower: follower.id, + execution: "k8s-native-in-cluster", + dryRun: !options.confirm, + stateBefore: stateSnapshot(before, follower.id), + stateWrite: { ok: false, skipped: true, reason: "stored-state-missing" }, + parsedDownstreamCliOutput: false, + }; + } + if (options.confirm) { + const result = deps.writeFollowerState(registry, writeInput, options); + write = stateWriteResult(follower.id, result); + after = deps.readK8sState(registry, options); + } else { + write = { ok: true, skipped: true, reason: "dry-run-requires-confirm", input: "stored-state" }; + } + } + + return { + ok: write === null ? before.ok && (live === null || live.ok) : write.ok === true, + action: "debug-step", + step, + follower: follower.id, + execution: "k8s-native-in-cluster", + dryRun: !options.confirm, + stateBefore: stateSnapshot(before, follower.id), + status: live === null ? null : compactAdapterStatus(live), + decision: decided === null ? null : compactFollowerDecision(decided), + stateWrite: write, + stateAfter: after === null ? null : stateSnapshot(after, follower.id), + parsedDownstreamCliOutput: false, + next: debugNext(follower.id), + }; +} + +export function renderDebugStepHuman(payload: Record): string { + const before = asOptionalRecord(payload.stateBefore); + const after = asOptionalRecord(payload.stateAfter); + const write = asOptionalRecord(payload.stateWrite); + const status = asOptionalRecord(payload.status); + const decision = asOptionalRecord(payload.decision); + const target = asOptionalRecord(payload.target); + const next = asOptionalRecord(payload.next); + const rows = [[ + payload.follower ?? "-", + payload.step ?? "-", + payload.execution ?? "-", + payload.dryRun === true ? "true" : "false", + before?.phase ?? "-", + after?.phase ?? decision?.phase ?? status?.phase ?? "-", + shortSha(stringOrNull(before?.observedSha)), + shortSha(stringOrNull(after?.observedSha) ?? stringOrNull(decision?.observedSha) ?? stringOrNull(status?.observedSha)), + ]]; + const writeRows = write === null ? [] : [[ + write.ok === true ? "ok" : "failed", + write.skipped === true ? "skipped" : "executed", + write.input ?? "-", + asOptionalRecord(write.patch)?.beforeResourceVersion ?? "-", + asOptionalRecord(write.patch)?.afterResourceVersion ?? "-", + write.exitCode ?? "-", + write.message ?? write.reason ?? "-", + ]]; + return [ + `CI/CD BRANCH-FOLLOWER DEBUG-STEP (${payload.ok === false ? "failed" : "ok"})`, + "", + table(["FOLLOWER", "STEP", "EXECUTION", "DRY_RUN", "BEFORE", "AFTER", "BEFORE_SHA", "AFTER_SHA"], rows), + target === null ? "" : `\nTARGET JOB\n${table(["JOB", "EXIT", "TIMED_OUT", "PARSED"], [[target.name ?? "-", target.exitCode ?? "-", target.timedOut ?? "-", target.parsed === true ? "yes" : "no"]])}`, + writeRows.length === 0 ? "" : `\nSTATE WRITE\n${table(["STATUS", "MODE", "INPUT", "BEFORE_RV", "AFTER_RV", "EXIT", "MESSAGE"], writeRows)}`, + "", + "NEXT", + `state-read: ${next?.stateRead ?? "-"}`, + `status-read: ${next?.statusRead ?? "-"}`, + `decide: ${next?.decide ?? "-"}`, + `state-write: ${next?.stateWrite ?? "-"}`, + "", + ].filter((line) => line !== "").join("\n"); +} + +function runTargetDebugStepJob(registry: BranchFollowerRegistry, options: ParsedOptions, step: BranchFollowerDebugStep, deps: CicdDebugDeps): Record { + const timeoutSeconds = options.timeoutSeconds ?? registry.controller.budgets.runOnceSeconds; + const jobName = `${registry.controller.deploymentName}-debug-${step}-${Date.now().toString(36)}`.replace(/[^a-z0-9-]+/gu, "-").slice(0, 63); + const manifest = renderControllerDebugJob(registry, options, jobName, step, timeoutSeconds); + const manifestYaml = `${Bun.YAML.stringify(manifest).trim()}\n`; + const script = [ + "set -eu", + "tmp=$(mktemp)", + "base64 -d >\"$tmp\" <<'UNIDESK_CICD_DEBUG_JOB_B64'", + Buffer.from(manifestYaml, "utf8").toString("base64"), + "UNIDESK_CICD_DEBUG_JOB_B64", + `kubectl -n ${shQuote(registry.controller.namespace)} delete job ${shQuote(jobName)} --ignore-not-found=true >/dev/null 2>&1 || true`, + `kubectl apply --server-side --force-conflicts --field-manager=${shQuote(registry.controller.fieldManager)} -f "$tmp" >/dev/null`, + waitForJobShell(registry.controller.namespace, jobName, timeoutSeconds), + ].join("\n"); + const result = deps.runKubeScript(registry, options, script, "", (timeoutSeconds + registry.controller.budgets.reconcileTransportGraceSeconds) * 1000); + const parsed = parseLastJsonObject(result.stdout); + const state = deps.readK8sState(registry, options); + const followerId = options.followerId ?? ""; + return { + ok: result.exitCode === 0 && parsed?.ok !== false, + action: "debug-step", + step, + follower: followerId, + execution: "k8s-native-debug-job", + dryRun: !options.confirm, + stateBefore: asOptionalRecord(parsed?.stateBefore), + status: asOptionalRecord(parsed?.status), + decision: asOptionalRecord(parsed?.decision), + stateWrite: asOptionalRecord(parsed?.stateWrite), + target: { + name: jobName, + namespace: registry.controller.namespace, + exitCode: result.exitCode, + timedOut: result.timedOut, + parsed: parsed !== null, + stdoutTail: redactText(tailText(result.stdout, options.full ? 4000 : 1000)), + stderrTail: redactText(tailText(result.stderr, options.full ? 2000 : 800)), + }, + targetResult: parsed, + stateAfter: asOptionalRecord(parsed?.stateAfter) ?? stateSnapshot(state, followerId), + parsedDownstreamCliOutput: false, + next: debugNext(followerId), + }; +} + +function debugDecisionOptions(options: ParsedOptions): ParsedOptions { + return { ...options, confirm: false, dryRun: true, wait: false, recordState: false }; +} + +function stateWriteInput(previous: Record): FollowerState | null { + if (stringOrNull(previous.id) === null) return null; + if (asOptionalRecord(previous.source) === null || asOptionalRecord(previous.target) === null || asOptionalRecord(previous.timings) === null) return null; + return previous as unknown as FollowerState; +} + +function stateWriteResult(followerId: string, result: CommandResult): Record { + const parsed = parseLastJsonObject(result.stdout); + return { + ok: result.exitCode === 0 && parsed?.ok !== false, + follower: followerId, + exitCode: result.exitCode, + timedOut: result.timedOut, + input: "stored-state", + patch: parsed, + message: result.exitCode === 0 ? "state patch command completed" : redactText(tailText(result.stderr || result.stdout, 500)), + parsedDownstreamCliOutput: false, + }; +} + +function stateSnapshot(read: K8sStateRead, followerId: string): Record { + const state = read.stateByFollower[followerId] ?? {}; + const source = asOptionalRecord(state.source); + const target = asOptionalRecord(state.target); + const timings = asOptionalRecord(state.timings); + return { + present: read.stateConfigMapPresent, + ok: read.ok, + metadata: read.stateMetadata, + valueBytes: read.stateValueBytes[followerId] ?? null, + phase: stringOrNull(state.phase), + observedSha: stringOrNull(source?.observedSha), + targetSha: stringOrNull(target?.targetSha), + lastTriggeredSha: stringOrNull(state.lastTriggeredSha), + lastSucceededSha: stringOrNull(state.lastSucceededSha), + pipelineRun: stringOrNull(state.pipelineRun), + inFlightJob: stringOrNull(state.inFlightJob), + timingStatus: stringOrNull(timings?.totalStatus), + totalSeconds: numberOrNull(timings?.totalSeconds), + startedAt: stringOrNull(timings?.startedAt), + updatedAt: stringOrNull(state.updatedAt), + }; +} + +function compactAdapterStatus(live: AdapterSummary): Record { + return { + ok: live.ok, + phase: live.phase, + observedSha: live.observedSha, + targetSha: live.targetSha, + aligned: live.aligned, + pipelineRun: live.pipelineRun, + inFlightJob: live.inFlightJob, + message: live.message, + }; +} + +function compactFollowerDecision(state: FollowerState): Record { + return { + phase: state.phase, + observedSha: state.source.observedSha, + targetSha: state.target.targetSha, + lastTriggeredSha: state.lastTriggeredSha, + lastSucceededSha: state.lastSucceededSha, + pipelineRun: state.pipelineRun, + inFlightJob: state.inFlightJob, + decision: state.decision, + timings: state.timings, + }; +} + +function debugNext(followerId: string): Record { + return { + stateRead: `bun scripts/cli.ts cicd branch-follower debug-step --follower ${followerId} --step state-read`, + statusRead: `bun scripts/cli.ts cicd branch-follower debug-step --follower ${followerId} --step status-read`, + decide: `bun scripts/cli.ts cicd branch-follower debug-step --follower ${followerId} --step decide`, + stateWrite: `bun scripts/cli.ts cicd branch-follower debug-step --follower ${followerId} --step state-write --confirm`, + }; +} + +function parseLastJsonObject(text: string): Record | null { + const starts: number[] = []; + let offset = 0; + for (const line of text.split(/\r?\n/u)) { + if (line.trimStart().startsWith("{")) starts.push(offset + line.indexOf("{")); + offset += line.length + 1; + } + const end = text.lastIndexOf("}"); + if (end < 0) return null; + for (let index = starts.length - 1; index >= 0; index -= 1) { + const start = starts[index] ?? -1; + if (start < 0 || start >= end) continue; + try { + const parsed = JSON.parse(text.slice(start, end + 1)) as unknown; + const record = asOptionalRecord(parsed); + if (record !== null) return record; + } catch { + // Try the previous JSON-looking line. + } + } + return null; +} + +function asOptionalRecord(value: unknown): Record | null { + return typeof value === "object" && value !== null && !Array.isArray(value) ? value as Record : null; +} + +function stringOrNull(value: unknown): string | null { + return typeof value === "string" && value.length > 0 ? value : null; +} + +function numberOrNull(value: unknown): number | null { + return typeof value === "number" && Number.isFinite(value) ? value : null; +} + +function shortSha(value: string | null): string { + if (value === null) return "-"; + return value.length > 12 ? value.slice(0, 12) : value; +} + +function table(headers: readonly string[], rows: readonly (readonly unknown[])[]): string { + const normalized = rows.map((row) => headers.map((_, index) => cell(row[index]))); + const widths = headers.map((header, index) => Math.max(header.length, ...normalized.map((row) => row[index]?.length ?? 0))); + const format = (row: readonly string[]) => row.map((value, index) => value.padEnd(widths[index] ?? 0)).join(" ").trimEnd(); + return [format(headers), format(headers.map((header) => "-".repeat(header.length))), ...normalized.map(format)].join("\n"); +} + +function cell(value: unknown): string { + if (value === null || value === undefined || value === "") return "-"; + const text = String(value).replace(/\s+/gu, " "); + return text.length > 96 ? `${text.slice(0, 93)}...` : text; +} + +function tailText(text: string, maxChars: number): string { + return text.length <= maxChars ? text : text.slice(text.length - maxChars); +} diff --git a/scripts/src/cicd-types.ts b/scripts/src/cicd-types.ts index 860d1a7a..9fc17aca 100644 --- a/scripts/src/cicd-types.ts +++ b/scripts/src/cicd-types.ts @@ -2,7 +2,8 @@ // Responsibility: type contracts shared by branch follower entry, controller render, and native K8s helpers. export type OutputMode = "human" | "json" | "yaml"; -export type BranchFollowerAction = "help" | "plan" | "apply" | "status" | "run-once" | "cleanup-state" | "events" | "logs"; +export type BranchFollowerAction = "help" | "plan" | "apply" | "status" | "run-once" | "debug-step" | "cleanup-state" | "events" | "logs"; +export type BranchFollowerDebugStep = "state-read" | "status-read" | "decide" | "state-write"; export type BranchFollowerPhase = | "Observed" | "Noop" @@ -29,6 +30,7 @@ export interface ParsedOptions { full: boolean; raw: boolean; recordState: boolean; + debugStep: BranchFollowerDebugStep | null; output: OutputMode; limit: number; tailBytes: number; @@ -317,6 +319,8 @@ export interface FollowerState { export interface K8sStateRead { ok: boolean; stateByFollower: Record>; + stateMetadata: Record | null; + stateValueBytes: Record; stateConfigMapPresent: boolean; deployment: Record | null; lease: Record | null; @@ -327,6 +331,8 @@ export interface K8sStateRead { export interface K8sFollowerStateRead { ok: boolean; stateByFollower: Record>; + metadata: Record | null; + valueBytes: Record; present: boolean; error: string; } diff --git a/scripts/src/cicd.ts b/scripts/src/cicd.ts index d58c12b0..84808db8 100644 --- a/scripts/src/cicd.ts +++ b/scripts/src/cicd.ts @@ -21,10 +21,11 @@ import { sentinelPipelineRunName } from "./hwlab-node-web-sentinel-cicd-shared"; import { transPath } from "./hwlab-node/runtime-common"; import { configRefGraph, resolveConfigRefString } from "./ops/config-refs"; import { renderControllerManifests, renderControllerReconcileJob, waitForJobShell } from "./cicd-controller-render"; +import { buildDebugStep, renderDebugStepHuman } from "./cicd-debug"; import { runNativeHwlabControlPlaneRefresh } from "./cicd-hwlab-refresh"; import { nativeCicdScriptLoadShell, readNativeObjectBundle } from "./cicd-native-bundle"; import { runNativeK8sJob, runNativeTektonPipelineRun } from "./cicd-native"; -import type { AdapterSummary, BranchFollowerPhase, BranchFollowerRegistry, ControllerSpec, FollowerSpec, FollowerState, K8sFollowerStateRead, K8sStateRead, NativeCloseoutWaitResult, NativeK8sJobResult, NativeStatusSpec, NativeWorkloadSpec, OutputMode, ParsedOptions, StageTiming, TriggerResult } from "./cicd-types"; +import type { AdapterSummary, BranchFollowerAction, BranchFollowerDebugStep, BranchFollowerPhase, BranchFollowerRegistry, ControllerSpec, FollowerSpec, FollowerState, K8sFollowerStateRead, K8sStateRead, NativeCloseoutWaitResult, NativeK8sJobResult, NativeStatusSpec, NativeWorkloadSpec, OutputMode, ParsedOptions, StageTiming, TriggerResult } from "./cicd-types"; import { arrayField, asRecord, @@ -44,7 +45,7 @@ const SPEC_VERSION = "draft-2026-07-03-p0-branch-follower"; export function cicdHelp(): unknown { return { - command: "cicd branch-follower plan|apply|status|run-once|cleanup-state|events|logs", + command: "cicd branch-follower plan|apply|status|run-once|debug-step|cleanup-state|events|logs", output: "text by default; use --json, --raw, or -o json|yaml for machine output", usage: [ "bun scripts/cli.ts cicd branch-follower plan", @@ -53,6 +54,8 @@ export function cicdHelp(): unknown { "bun scripts/cli.ts cicd branch-follower status --live", "bun scripts/cli.ts cicd branch-follower run-once --all --dry-run", "bun scripts/cli.ts cicd branch-follower run-once --follower hwlab-jd01-v03 --confirm --wait", + "bun scripts/cli.ts cicd branch-follower debug-step --follower web-probe-sentinel-master --step state-read", + "bun scripts/cli.ts cicd branch-follower debug-step --follower web-probe-sentinel-master --step state-write --confirm", "bun scripts/cli.ts cicd branch-follower cleanup-state --follower web-probe-sentinel-master --confirm", "bun scripts/cli.ts cicd branch-follower events --follower agentrun-jd01-v02", "bun scripts/cli.ts cicd branch-follower logs --follower web-probe-sentinel-master", @@ -67,7 +70,7 @@ export async function runCicdCommand(_config: UniDeskConfig | null, args: string const top = args[0]; if (top === undefined || isHelpToken(top)) return renderMachine("cicd", cicdHelp(), "json"); if (top !== "branch-follower") { - throw new Error("cicd usage: cicd branch-follower plan|apply|status|run-once|cleanup-state|events|logs"); + throw new Error("cicd usage: cicd branch-follower plan|apply|status|run-once|debug-step|cleanup-state|events|logs"); } const options = parseOptions(args.slice(1)); const command = commandLabel(options); @@ -82,6 +85,8 @@ export async function runCicdCommand(_config: UniDeskConfig | null, args: string return renderResult(command, await buildStatus(registry, options), options); case "run-once": return renderResult(command, await runOnce(registry, options), options); + case "debug-step": + return renderResult(command, await buildDebugStep(registry, options, { selectFollowers, readK8sState, readAdapterStatus, decideAndMaybeTrigger, writeFollowerState, runKubeScript }), options); case "cleanup-state": return renderResult(command, cleanupState(registry, options), options); case "events": @@ -97,7 +102,7 @@ function parseOptions(args: string[]): ParsedOptions { if (actionToken === undefined || isHelpToken(actionToken)) { return defaultOptions("help", args.slice(actionToken === undefined ? 0 : 1)); } - if (!["plan", "apply", "status", "run-once", "cleanup-state", "events", "logs"].includes(actionToken)) { + if (!["plan", "apply", "status", "run-once", "debug-step", "cleanup-state", "events", "logs"].includes(actionToken)) { throw new Error(`cicd branch-follower unknown action: ${actionToken}`); } const action = actionToken as BranchFollowerAction; @@ -136,6 +141,8 @@ function parseOptions(args: string[]): ParsedOptions { options.output = "json"; } else if (arg === "--record-state") { options.recordState = true; + } else if (arg === "--step") { + options.debugStep = debugStepOption(valueOption(rest, ++index, arg)); } else if (arg === "-o" || arg === "--output") { const value = valueOption(rest, ++index, arg); if (value !== "json" && value !== "yaml" && value !== "wide" && value !== "text") throw new Error(`${arg} must be json, yaml, wide, or text`); @@ -157,6 +164,7 @@ function parseOptions(args: string[]): ParsedOptions { if (options.confirm && options.dryRun) throw new Error("cicd branch-follower accepts only one of --confirm or --dry-run"); if (options.action === "apply" && !options.confirm) options.dryRun = true; if (options.action === "run-once" && !options.confirm) options.dryRun = true; + if (options.action === "debug-step" && !options.confirm) options.dryRun = true; if (options.action === "cleanup-state" && !options.confirm) options.dryRun = true; if (options.action === "run-once" && options.confirm && !options.all && options.followerId === null) { throw new Error("run-once --confirm requires --all or --follower "); @@ -164,9 +172,17 @@ function parseOptions(args: string[]): ParsedOptions { if (options.action === "cleanup-state" && options.confirm && !options.all && options.followerId === null) { throw new Error("cleanup-state --confirm requires --all or --follower "); } + if (options.action === "debug-step" && options.followerId === null) { + throw new Error("debug-step requires --follower "); + } return options; } +function debugStepOption(value: string): BranchFollowerDebugStep { + if (value === "state-read" || value === "status-read" || value === "decide" || value === "state-write") return value; + throw new Error("--step must be state-read, status-read, decide, or state-write"); +} + function isInClusterRuntime(): boolean { return Boolean(process.env.KUBERNETES_SERVICE_HOST && process.env.KUBERNETES_SERVICE_PORT); } @@ -186,6 +202,7 @@ function defaultOptions(action: BranchFollowerAction, _args: string[]): ParsedOp full: false, raw: false, recordState: false, + debugStep: null, output: "human", limit: 20, tailBytes: 12000, @@ -599,12 +616,15 @@ async function runOnce(registry: BranchFollowerRegistry, options: ParsedOptions) const previous = readK8sState(registry, options); const results: FollowerState[] = []; const stateWriteWarnings: string[] = []; + const stateWrites: Record[] = []; for (const follower of selected) { const oldState = previous.stateByFollower[follower.id] ?? {}; const live = await readAdapterStatus(registry, follower, options); const state = await decideAndMaybeTrigger(registry, follower, oldState, live, options); if (!options.dryRun || options.recordState) { const write = writeFollowerState(registry, state, options); + const writeSummary = stateWriteSummary(follower.id, write); + stateWrites.push(writeSummary); if (write.exitCode !== 0) { const warning = `state write failed for ${follower.id}: ${tailText(write.stderr || write.stdout, 300)}`; state.warnings.push(warning); @@ -622,6 +642,7 @@ async function runOnce(registry: BranchFollowerRegistry, options: ParsedOptions) controller: options.inCluster, registry: registrySummary(registry), followers: results, + stateWrites, warnings: stateWriteWarnings, next: { status: "bun scripts/cli.ts cicd branch-follower status", @@ -1847,6 +1868,8 @@ function readK8sState(registry: BranchFollowerRegistry, options: ParsedOptions): return { ok: errors.length === 0, stateByFollower: stateResult.stateByFollower, + stateMetadata: stateResult.metadata, + stateValueBytes: stateResult.valueBytes, stateConfigMapPresent: stateResult.present, deployment: deploymentResult.value, lease: leaseResult.value, @@ -1876,20 +1899,42 @@ function kubeConfigMapFollowerState(registry: BranchFollowerRegistry, options: P return { ok: false, stateByFollower: {}, present: false, error }; } const parsedStates = asOptionalRecord(parsed.stateByFollower) ?? {}; + const metadata = asOptionalRecord(parsed.metadata); + const parsedValueBytes = asOptionalRecord(parsed.valueBytes) ?? {}; const stateByFollower: Record> = {}; + const valueBytes: Record = {}; for (const follower of registry.followers) { const state = asOptionalRecord(parsedStates[follower.id]); if (state !== null) stateByFollower[follower.id] = state; + const bytes = numberOrNull(parsedValueBytes[follower.id]); + if (bytes !== null) valueBytes[follower.id] = bytes; } const errors = Array.isArray(parsed.errors) ? parsed.errors.map(String).filter((item) => item.length > 0) : []; return { ok: parsed.ok === true && errors.length === 0, stateByFollower, + metadata, + valueBytes, present: parsed.present === true, error: errors.join("; "), }; } +function stateWriteSummary(followerId: string, result: CommandResult): Record { + const parsed = result.exitCode === 0 ? parseJsonObject(result.stdout) : null; + return { + follower: followerId, + ok: result.exitCode === 0 && parsed?.ok !== false, + exitCode: result.exitCode, + timedOut: result.timedOut, + beforeResourceVersion: stringOrNull(parsed?.beforeResourceVersion), + afterResourceVersion: stringOrNull(parsed?.afterResourceVersion), + preservedTiming: parsed?.preservedTiming === true, + message: result.exitCode === 0 ? "state patch command completed" : redactText(tailText(result.stderr || result.stdout, 500)), + parsedDownstreamCliOutput: false, + }; +} + function removeFollowerStateKeys(registry: BranchFollowerRegistry, options: ParsedOptions, ids: string[]): CommandResult { const patch = JSON.stringify({ data: Object.fromEntries(ids.map((id) => [id, null])) }); const script = [ @@ -2665,6 +2710,7 @@ function renderHuman(command: string, payload: Record, options: if (command.endsWith(" apply")) return renderApplyHuman(payload); if (command.endsWith(" status")) return renderStatusHuman(payload, options); if (command.endsWith(" run-once")) return renderRunOnceHuman(payload); + if (command.endsWith(" debug-step")) return renderDebugStepHuman(payload); if (command.endsWith(" cleanup-state")) return renderCleanupStateHuman(payload); if (command.endsWith(" events") || command.endsWith(" logs")) return renderDrillDownHuman(payload); return `${JSON.stringify(payload, null, 2)}\n`; @@ -2770,6 +2816,7 @@ function renderStatusHuman(payload: Record, _options: ParsedOpt function renderRunOnceHuman(payload: Record): string { const followers = arrayRecords(payload.followers); + const stateWrites = arrayRecords(payload.stateWrites); const rows = followers.map((item) => { const source = asOptionalRecord(item.source); const target = asOptionalRecord(item.target); @@ -2785,11 +2832,21 @@ function renderRunOnceHuman(payload: Record): string { }); const next = asOptionalRecord(payload.next); const timingRows = followers.flatMap(timingRowsForFollower).slice(0, 48); + const writeRows = stateWrites.map((item) => [ + item.follower, + item.ok === true ? "ok" : "failed", + item.beforeResourceVersion ?? "-", + item.afterResourceVersion ?? "-", + item.preservedTiming === true ? "yes" : "no", + item.exitCode ?? "-", + item.message ?? "-", + ]); return [ `CI/CD BRANCH-FOLLOWER RUN-ONCE (${payload.ok === false ? "blocked" : payload.dryRun === true ? "dry-run" : "ok"})`, "", table(["FOLLOWER", "PHASE", "OBSERVED", "TARGET", "TRIGGERED", "IN_FLIGHT", "DECISION"], rows), timingRows.length === 0 ? "" : `\nSTAGE TIMINGS\n${table(["FOLLOWER", "STAGE", "STATUS", "SECONDS", "BUDGET", "OBJECT"], timingRows)}`, + writeRows.length === 0 ? "" : `\nSTATE WRITES\n${table(["FOLLOWER", "STATUS", "BEFORE_RV", "AFTER_RV", "PRESERVED", "EXIT", "MESSAGE"], writeRows)}`, "", "NEXT", `status: ${next?.status ?? "-"}`,