From 9c61bf731564c8725862ea74b608934cf9fa3ac9 Mon Sep 17 00:00:00 2001 From: Codex Date: Fri, 3 Jul 2026 18:32:23 +0000 Subject: [PATCH] fix: bound branch follower debug json --- .../references/branch-follower.md | 4 ++ scripts/src/cicd-debug.ts | 44 ++++++++++++++----- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/.agents/skills/unidesk-cicd/references/branch-follower.md b/.agents/skills/unidesk-cicd/references/branch-follower.md index 22c3b8af..3816a66b 100644 --- a/.agents/skills/unidesk-cicd/references/branch-follower.md +++ b/.agents/skills/unidesk-cicd/references/branch-follower.md @@ -36,6 +36,8 @@ When a repeated runtime pitfall or visibility defect is found during branch-foll `debug-step` output must stay bounded in both text and JSON modes. The default machine payload should include step result, compact state/status/decision/write summaries, target Job identity and short error/timing fields only. Full target Job logs, full target JSON and long stdout/stderr tails belong behind explicit drill-down, not in the default `--json` payload. +Bounded JSON means the operator-facing `--json` payload must remain below the YAML-configured stdout limit in normal successful debug cases. Do not duplicate the same evidence as full top-level objects, compact `targetResult`, full `stateAfter` and target stdout tail at the same time; choose one compact representation by default and put full payload/log drill-down behind explicit commands. + `status-read`, `events`, `logs` and debug summaries must expose compact closeout gate details when a follower is not aligned: git-mirror readiness, Tekton PipelineRun condition, Argo sync/health, runtime target sha/readiness and short errors. Repeating only phase/observed/target/message is a visibility defect and must be fixed before further rollout tuning. Stage timing rows must not label optional gates as `not-ready` when they are not part of that follower's closeout contract. For sentinel-like followers without a GitOps branch flush gate, git-mirror source snapshot readiness should render as source-ready/ready, while missing GitOps `githubInSync` remains `-`/not-applicable instead of a failure-looking state. @@ -127,6 +129,8 @@ State ConfigMaps must stay bounded and human-queryable. Store compact summaries, Status readers must compute near the data. When the operator CLI reaches a target node or k8s route through `trans`, the target NODE/k8s side must parse ConfigMap values, Kubernetes objects and log/event lists locally, then return only the bounded follower summary, timing rows, object names, counts and short tails needed by the CLI. Do not transmit complete ConfigMap entries, full API objects or long logs back to the host just so host-side TypeScript can parse and trim them. +Operator transport timing warnings such as `UNIDESK_SSH_TIMING` measure CLI/trans latency, not branch-follower CI/CD stage time or end-to-end convergence time. Do not mix those warnings into `timings.totalSeconds`, stage rows, or performance closeout evidence; when transport cost becomes noisy, reduce round trips by adding a target-side debug/status summary instead of pulling more raw output to the host. + Validation, test and performance evidence for branch-follower changes must also run on the target NODE/k8s runtime, not on the local/master host. For CI/CD changes, use the target node's Tekton/Argo/runtime objects, controlled CLI jobs, and target-side summary scripts as the evidence source; local tests may not be cited as convergence or performance proof. Operator-facing commands must use intuitive target-side verbs instead of internal execution flags. From a local/master host, use `status --live`, `run-once ...`, `events`, or `logs`; these commands create a bounded target-side Job when live state is needed. The internal `--in-cluster` flag is reserved for the Kubernetes Job/Pod command line after the registry, serviceaccount, in-cluster API endpoint and EmptyDir source checkout are mounted. It must not appear in user-facing examples. diff --git a/scripts/src/cicd-debug.ts b/scripts/src/cicd-debug.ts index cc9e87a3..3c647add 100644 --- a/scripts/src/cicd-debug.ts +++ b/scripts/src/cicd-debug.ts @@ -140,28 +140,31 @@ function runTargetDebugStepJob(registry: BranchFollowerRegistry, options: Parsed const parsed = parseLastJsonObject(result.stdout); const state = deps.readK8sState(registry, options); const followerId = options.followerId ?? ""; + const compact = compactTargetDebugResult(parsed); + const ok = result.exitCode === 0 && parsed?.ok !== false; + const includeTargetTail = !ok || parsed === null; + const fallbackStateAfter = stateSnapshot(state, followerId); return { - ok: result.exitCode === 0 && parsed?.ok !== false, + ok, action: "debug-step", step, follower: followerId, execution: "k8s-native-debug-job", dryRun: !options.confirm, - stateBefore: asOptionalRecord(parsed?.stateBefore), - status: asOptionalRecord(parsed?.status), - decision: asOptionalRecord(parsed?.decision), - stateWrite: asOptionalRecord(parsed?.stateWrite), + stateBefore: compact?.stateBefore ?? compactStateLike(asOptionalRecord(parsed?.stateBefore)), + status: compact?.status ?? null, + decision: compact?.decision ?? null, + stateWrite: compact?.stateWrite ?? null, target: { name: jobName, namespace: registry.controller.namespace, exitCode: result.exitCode, timedOut: result.timedOut, parsed: parsed !== null, - stdoutTail: redactText(tailText(result.stdout, 1000)), - stderrTail: redactText(tailText(result.stderr, 800)), + stdoutTail: includeTargetTail ? redactText(tailText(result.stdout, 1000)) : "", + stderrTail: includeTargetTail ? redactText(tailText(result.stderr, 800)) : "", }, - targetResult: compactTargetDebugResult(parsed), - stateAfter: asOptionalRecord(parsed?.stateAfter) ?? stateSnapshot(state, followerId), + stateAfter: compact?.stateAfter ?? compactStateLike(asOptionalRecord(parsed?.stateAfter) ?? fallbackStateAfter), parsedDownstreamCliOutput: false, next: debugNext(followerId), }; @@ -335,7 +338,28 @@ function compactFollowerDecision(state: FollowerState): Record pipelineRun: state.pipelineRun, inFlightJob: state.inFlightJob, decision: state.decision, - timings: state.timings, + timings: compactDebugTimings(state.timings), + }; +} + +function compactDebugTimings(timings: FollowerState["timings"]): Record { + return { + budgetSeconds: timings.budgetSeconds, + totalSeconds: timings.totalSeconds, + totalStatus: timings.totalStatus, + totalSource: timings.totalSource, + sourceCommit: timings.sourceCommit, + startedAt: timings.startedAt, + finishedAt: timings.finishedAt, + overBudget: timings.overBudget, + stages: timings.stages.slice(0, 8).map((stage) => ({ + stage: stage.stage, + status: stage.status, + seconds: stage.seconds, + budgetSeconds: stage.budgetSeconds, + source: stage.source, + object: stage.object, + })), }; }