From 20a61b47e1a662ac699bde2dc054ac74050ea1c1 Mon Sep 17 00:00:00 2001 From: Codex Date: Fri, 3 Jul 2026 19:52:55 +0000 Subject: [PATCH] feat: add follower controller source debug step --- .../references/branch-follower.md | 4 + scripts/src/cicd-branch-follower.ts | 5 +- scripts/src/cicd-debug.ts | 81 ++++++++++++++++++- scripts/src/cicd-types.ts | 2 +- 4 files changed, 88 insertions(+), 4 deletions(-) diff --git a/.agents/skills/unidesk-cicd/references/branch-follower.md b/.agents/skills/unidesk-cicd/references/branch-follower.md index f693db07..31fd30a3 100644 --- a/.agents/skills/unidesk-cicd/references/branch-follower.md +++ b/.agents/skills/unidesk-cicd/references/branch-follower.md @@ -12,6 +12,7 @@ bun scripts/cli.ts cicd branch-follower status --live bun scripts/cli.ts cicd branch-follower run-once --all --dry-run bun scripts/cli.ts cicd branch-follower run-once --follower --confirm --wait bun scripts/cli.ts cicd branch-follower debug-step --follower --step state-read +bun scripts/cli.ts cicd branch-follower debug-step --follower --step controller-source bun scripts/cli.ts cicd branch-follower debug-step --follower --step status-read bun scripts/cli.ts cicd branch-follower debug-step --follower --step decide bun scripts/cli.ts cicd branch-follower debug-step --follower --step state-write --confirm @@ -24,6 +25,7 @@ bun scripts/cli.ts cicd branch-follower logs --follower `debug-step` is the required single-step troubleshooting entry before changing branch-follower code for repeated CI/CD convergence issues. It runs in a bounded target-side Job when called from the operator host, and uses the same controller modules as the real flow: - `state-read`: read only the compact ConfigMap state, value bytes, resourceVersion and `_updatedAt`. +- `controller-source`: read only the current target-side one-shot checkout identity: HEAD, branch, registry sha and key file markers. Use this before attributing a failed/slow self-upgrade run to new controller code. - `status-read`: read native source/Tekton/Argo/runtime status without triggering adapters. - `decide`: run the decision function in dry-run mode without triggering adapters or writing state. - `state-write --confirm`: patch the stored follower state back through the normal ConfigMap write helper and report before/after resourceVersion; this is for isolating state write failures, not for normal rollout. @@ -122,6 +124,8 @@ State writes must preserve same-source total timing at the target side. When a l Controller self-upgrade has a one-loop source boundary: the controller Deployment uses the stable tools image, syncs UniDesk source into the k8s git-mirror cache, then clones `/work/unidesk` each reconcile. A UniDesk source commit that changes branch-follower controller logic can still be triggered by the previous checkout if the loop observes that commit before cloning it for execution. Do not use that self-upgrade source change to validate new controller-state semantics, and do not backfill its missing total timing. First confirm the target Pod checkout contains the fix, then validate future timing/state behavior with a later source change or an explicit target-side `run-once` that starts from a stored state written by the fixed controller. +When self-upgrade timing is unclear, use `debug-step --step controller-source` before pushing another source change. If the checkout identity is not visible, add that single-step visibility first; do not infer controller code version from a slow automatic rollout alone. + If a deterministic Kubernetes Job or PipelineRun is reused and there is no already-stored `timings.startedAt`, the reused object's current wait/check duration is only a stage observation; it must not be promoted to `timings.totalSeconds`. When `run-once --confirm --wait` resumes a source change that is already `ClosingOut`, the CLI may wait for native closeout and report a `closeout` stage duration. That closeout-only wait is not the end-to-end total unless the stored state already contains a valid `timings.startedAt`. diff --git a/scripts/src/cicd-branch-follower.ts b/scripts/src/cicd-branch-follower.ts index 82deea9d..c03620d3 100644 --- a/scripts/src/cicd-branch-follower.ts +++ b/scripts/src/cicd-branch-follower.ts @@ -56,6 +56,7 @@ export function cicdHelp(): unknown { "bun scripts/cli.ts cicd branch-follower status --live", "bun scripts/cli.ts cicd branch-follower run-once --all --dry-run", "bun scripts/cli.ts cicd branch-follower run-once --follower hwlab-jd01-v03 --confirm --wait", + "bun scripts/cli.ts cicd branch-follower debug-step --follower web-probe-sentinel-master --step controller-source", "bun scripts/cli.ts cicd branch-follower debug-step --follower web-probe-sentinel-master --step state-read", "bun scripts/cli.ts cicd branch-follower debug-step --follower web-probe-sentinel-master --step state-write --confirm", "bun scripts/cli.ts cicd branch-follower cleanup-state --follower web-probe-sentinel-master --confirm", @@ -181,8 +182,8 @@ function parseOptions(args: string[]): ParsedOptions { } function debugStepOption(value: string): BranchFollowerDebugStep { - if (value === "state-read" || value === "status-read" || value === "decide" || value === "state-write") return value; - throw new Error("--step must be state-read, status-read, decide, or state-write"); + if (value === "state-read" || value === "controller-source" || value === "status-read" || value === "decide" || value === "state-write") return value; + throw new Error("--step must be state-read, controller-source, status-read, decide, or state-write"); } function isInClusterRuntime(): boolean { diff --git a/scripts/src/cicd-debug.ts b/scripts/src/cicd-debug.ts index b963c63f..ead58f8c 100644 --- a/scripts/src/cicd-debug.ts +++ b/scripts/src/cicd-debug.ts @@ -1,6 +1,9 @@ // SPEC: PJ2026-01060703 CI/CD branch follower debug steps. // Responsibility: bounded single-step debugging for branch follower state and decision paths. -import type { CommandResult } from "./command"; +import { createHash } from "node:crypto"; +import { existsSync, readFileSync } from "node:fs"; +import { runCommand, type CommandResult } from "./command"; +import { repoRoot, rootPath } from "./config"; import type { AdapterSummary, BranchFollowerDebugStep, BranchFollowerRegistry, FollowerSpec, FollowerState, K8sStateRead, ParsedOptions } from "./cicd-types"; import { renderControllerDebugJob, waitForJobShell } from "./cicd-controller-render"; import { redactText, shQuote } from "./platform-infra-ops-library"; @@ -30,7 +33,11 @@ export async function buildDebugStep(registry: BranchFollowerRegistry, options: let decided: FollowerState | null = null; let write: Record | null = null; let after: K8sStateRead | null = null; + let controllerSource: Record | null = null; + if (step === "controller-source") { + controllerSource = controllerSourceSnapshot(registry); + } if (step === "status-read" || step === "decide") { live = await deps.readAdapterStatus(registry, follower, options); } @@ -69,6 +76,7 @@ export async function buildDebugStep(registry: BranchFollowerRegistry, options: execution: "k8s-native-in-cluster", dryRun: !options.confirm, stateBefore: stateSnapshot(before, follower.id), + controllerSource, status: live === null ? null : compactAdapterStatus(live), decision: decided === null ? null : compactFollowerDecision(decided), stateWrite: write, @@ -84,6 +92,7 @@ export function renderDebugStepHuman(payload: Record): string { const write = asOptionalRecord(payload.stateWrite); const status = asOptionalRecord(payload.status); const decision = asOptionalRecord(payload.decision); + const controllerSource = asOptionalRecord(payload.controllerSource); const target = asOptionalRecord(payload.target); const next = asOptionalRecord(payload.next); const rows = [[ @@ -109,11 +118,13 @@ export function renderDebugStepHuman(payload: Record): string { `CI/CD BRANCH-FOLLOWER DEBUG-STEP (${payload.ok === false ? "failed" : "ok"})`, "", table(["FOLLOWER", "STEP", "EXECUTION", "DRY_RUN", "BEFORE", "AFTER", "BEFORE_SHA", "AFTER_SHA"], rows), + controllerSource === null ? "" : `\nCONTROLLER SOURCE\n${table(["HEAD", "BRANCH", "REGISTRY", "REREAD_MARKER", "FILE_SHA"], [[shortSha(stringOrNull(controllerSource.head)), controllerSource.branch ?? "-", shortSha(stringOrNull(controllerSource.registrySha256)), controllerSource.closeoutRereadMarker === true ? "present" : "missing", shortSha(stringOrNull(asOptionalRecord(controllerSource.branchFollowerFile)?.sha256))]])}`, target === null ? "" : `\nTARGET JOB\n${table(["JOB", "EXIT", "TIMED_OUT", "PARSED"], [[target.name ?? "-", target.exitCode ?? "-", target.timedOut ?? "-", target.parsed === true ? "yes" : "no"]])}`, writeRows.length === 0 ? "" : `\nSTATE WRITE\n${table(["STATUS", "MODE", "INPUT", "BEFORE_RV", "AFTER_RV", "EXIT", "MESSAGE"], writeRows)}`, "", "NEXT", `state-read: ${next?.stateRead ?? "-"}`, + `controller-source: ${next?.controllerSource ?? "-"}`, `status-read: ${next?.statusRead ?? "-"}`, `decide: ${next?.decide ?? "-"}`, `state-write: ${next?.stateWrite ?? "-"}`, @@ -152,6 +163,7 @@ function runTargetDebugStepJob(registry: BranchFollowerRegistry, options: Parsed execution: "k8s-native-debug-job", dryRun: !options.confirm, stateBefore: compact?.stateBefore ?? compactStateLike(asOptionalRecord(parsed?.stateBefore)), + controllerSource: compact?.controllerSource ?? asOptionalRecord(parsed?.controllerSource), status: compact?.status ?? null, decision: compact?.decision ?? null, stateWrite: compact?.stateWrite ?? null, @@ -173,6 +185,7 @@ function runTargetDebugStepJob(registry: BranchFollowerRegistry, options: Parsed function compactTargetDebugResult(parsed: Record | null): Record | null { if (parsed === null) return null; const stateBefore = asOptionalRecord(parsed.stateBefore); + const controllerSource = asOptionalRecord(parsed.controllerSource); const status = asOptionalRecord(parsed.status); const decision = asOptionalRecord(parsed.decision); const stateWrite = asOptionalRecord(parsed.stateWrite); @@ -183,6 +196,16 @@ function compactTargetDebugResult(parsed: Record | null): Recor step: stringOrNull(parsed.step), follower: stringOrNull(parsed.follower), stateBefore: compactStateLike(stateBefore), + controllerSource: controllerSource === null ? null : { + ok: controllerSource.ok === true, + repository: stringOrNull(controllerSource.repository), + branch: stringOrNull(controllerSource.branch), + head: stringOrNull(controllerSource.head), + registrySha256: stringOrNull(controllerSource.registrySha256), + closeoutRereadMarker: controllerSource.closeoutRereadMarker === true, + branchFollowerFile: asOptionalRecord(controllerSource.branchFollowerFile), + errors: Array.isArray(controllerSource.errors) ? controllerSource.errors.map(String).slice(0, 5) : [], + }, status: status === null ? null : { ok: status.ok === true, phase: stringOrNull(status.phase), @@ -220,6 +243,61 @@ function compactStateLike(value: Record | null): Record { + const head = commandText(["git", "rev-parse", "HEAD"]); + const branch = commandText(["git", "symbolic-ref", "--short", "HEAD"]); + const file = fileSnapshot("scripts/src/cicd-branch-follower.ts"); + const text = file.text; + const errors = [ + head.ok ? null : `git head: ${head.error}`, + branch.ok ? null : `git branch: ${branch.error}`, + file.ok ? null : `branch follower file: ${file.error}`, + ].filter((item): item is string => item !== null); + return { + ok: errors.length === 0, + repository: registry.controller.source.repository, + branch: branch.value, + head: head.value, + registrySha256: registry.rawSha256, + controllerConfigMap: registry.controller.configMapName, + closeoutRereadMarker: text.includes("post-closeout status re-read") && text.includes("automaticCloseoutAccelerated"), + branchFollowerFile: { + path: file.path, + present: file.present, + bytes: file.bytes, + sha256: file.sha256, + }, + statusAuthority: "target-controller-checkout", + parsedDownstreamCliOutput: false, + errors, + }; +} + +function commandText(command: string[]): { ok: boolean; value: string | null; error: string | null } { + const result = runCommand(command, repoRoot, { timeoutMs: 5_000 }); + const value = result.exitCode === 0 ? result.stdout.trim() : null; + return { + ok: result.exitCode === 0 && value !== null && value.length > 0, + value: value === null || value.length === 0 ? null : value, + error: result.exitCode === 0 ? null : redactText(tailText(result.stderr || result.stdout, 300)), + }; +} + +function fileSnapshot(path: string): { ok: boolean; path: string; present: boolean; bytes: number | null; sha256: string | null; text: string; error: string | null } { + const absolute = rootPath(path); + if (!existsSync(absolute)) return { ok: false, path, present: false, bytes: null, sha256: null, text: "", error: "missing" }; + const text = readFileSync(absolute, "utf8"); + return { + ok: true, + path, + present: true, + bytes: Buffer.byteLength(text), + sha256: createHash("sha256").update(text).digest("hex"), + text, + error: null, + }; +} + function debugDecisionOptions(options: ParsedOptions): ParsedOptions { return { ...options, confirm: false, dryRun: true, wait: false, recordState: false }; } @@ -407,6 +485,7 @@ function compactDebugTimings(timings: FollowerState["timings"]): Record { return { stateRead: `bun scripts/cli.ts cicd branch-follower debug-step --follower ${followerId} --step state-read`, + controllerSource: `bun scripts/cli.ts cicd branch-follower debug-step --follower ${followerId} --step controller-source`, statusRead: `bun scripts/cli.ts cicd branch-follower debug-step --follower ${followerId} --step status-read`, decide: `bun scripts/cli.ts cicd branch-follower debug-step --follower ${followerId} --step decide`, stateWrite: `bun scripts/cli.ts cicd branch-follower debug-step --follower ${followerId} --step state-write --confirm`, diff --git a/scripts/src/cicd-types.ts b/scripts/src/cicd-types.ts index 9fc17aca..314a7d05 100644 --- a/scripts/src/cicd-types.ts +++ b/scripts/src/cicd-types.ts @@ -3,7 +3,7 @@ export type OutputMode = "human" | "json" | "yaml"; export type BranchFollowerAction = "help" | "plan" | "apply" | "status" | "run-once" | "debug-step" | "cleanup-state" | "events" | "logs"; -export type BranchFollowerDebugStep = "state-read" | "status-read" | "decide" | "state-write"; +export type BranchFollowerDebugStep = "state-read" | "controller-source" | "status-read" | "decide" | "state-write"; export type BranchFollowerPhase = | "Observed" | "Noop"