diff --git a/scripts/native/cicd/read-state-summary.mjs b/scripts/native/cicd/read-state-summary.mjs index 63042c9e..b9fb341e 100644 --- a/scripts/native/cicd/read-state-summary.mjs +++ b/scripts/native/cicd/read-state-summary.mjs @@ -205,6 +205,7 @@ function compactCloseout(closeout) { function compactNativePayload(payload) { const value = recordOrNull(payload); if (value === null) return null; + const refreshEvidence = compactRefreshEvidence(recordOrNull(recordOrNull(value.nativeCapabilities)?.controlPlaneRefresh)); return { gitMirror: compactGitMirror(value.gitMirror), reuseConfig: compactReuseConfig(value.reuseConfig), @@ -214,7 +215,8 @@ function compactNativePayload(payload) { planArtifacts: compactPlanArtifacts(value.planArtifacts), argo: compactArgo(value.argo), runtime: compactRuntime(value.runtime), - refreshEvidence: compactRefreshEvidence(recordOrNull(recordOrNull(value.nativeCapabilities)?.controlPlaneRefresh)), + refreshEvidence, + refreshEvidenceReason: refreshEvidence === null ? "no stored control-plane-refresh capability evidence; status refresh job is reported separately at top-level refresh" : null, errors: arrayStrings(value.errors).slice(0, 5), statusAuthority: stringOrNull(value.statusAuthority), parsedDownstreamCliOutput: false, diff --git a/scripts/src/cicd-branch-follower.ts b/scripts/src/cicd-branch-follower.ts index aa78fc56..a318ba29 100644 --- a/scripts/src/cicd-branch-follower.ts +++ b/scripts/src/cicd-branch-follower.ts @@ -638,17 +638,20 @@ async function buildStatus(registry: BranchFollowerRegistry, options: ParsedOpti const stored = k8s.stateByFollower[follower.id] ?? {}; const fallbackStored = refresh === null ? {} : beforeRefreshStateByFollower[follower.id] ?? {}; const live = shouldLive && follower.enabled ? await readAdapterStatus(registry, follower, options) : null; - followers.push(mergeFollowerStatus(registry, follower, stored, live, shouldLive, detailedFollowers, fallbackStored)); + followers.push(mergeFollowerStatus(registry, follower, stored, live, wantsLive, detailedFollowers, fallbackStored)); } return { ok: k8s.ok && followers.every((item) => item.ok !== false), action: "status", - live: shouldLive, + live: wantsLive, + liveMode: shouldLive ? "in-cluster-adapter-status" : refresh !== null ? "operator-status-refresh-job" : "stored-state", + liveRefresh: liveRefreshSummary(wantsLive, shouldLive, refresh), registry: registrySummary(registry), controller: controllerStatusSummary(registry, k8s), followers, refresh, errors: k8s.errors, + warnings: k8s.warnings, next: { apply: "bun scripts/cli.ts cicd branch-follower apply --confirm --wait", liveStatus: "bun scripts/cli.ts cicd branch-follower status --live", @@ -2001,6 +2004,7 @@ function mergeFollowerStatus( message: live?.message ?? stringOrNull(stored.decision) ?? "no controller state yet", timings: detailed ? timings : compactListTimings(timings), performance: timingPerformanceSummary(timings), + timingContext: statusTimingContext(timings, stored, live, liveRequested, fallbackStored), evidence: detailed ? evidence : null, reconcileTimeline: detailed ? reconcileTimeline : null, rawStateDiagnostic: detailed ? asOptionalRecord(stored.rawStateDiagnostic) : null, @@ -2021,6 +2025,7 @@ function mergeFollowerStatus( function readK8sState(registry: BranchFollowerRegistry, options: ParsedOptions): K8sStateRead { const errors: string[] = []; + const warnings: string[] = []; const readTimeoutMs = statusReadTimeoutMs(registry, options); const stateResult = kubeConfigMapFollowerState(registry, options, readTimeoutMs); const namespace = registry.controller.namespace; @@ -2031,7 +2036,7 @@ function readK8sState(registry: BranchFollowerRegistry, options: ParsedOptions): if (!stateResult.ok) errors.push(`state configmap: ${stateResult.error}`); if (!deploymentResult.ok && !isNotFoundText(deploymentResult.error)) errors.push(`deployment: ${deploymentResult.error}`); if (!leaseResult.ok && !isNotFoundText(leaseResult.error)) errors.push(`lease: ${leaseResult.error}`); - if (!podsResult.ok && !isNotFoundText(podsResult.error)) errors.push(`pods: ${podsResult.error}`); + if (!podsResult.ok && !isNotFoundText(podsResult.error)) warnings.push(`pods: ${podsResult.error}`); return { ok: errors.length === 0, stateByFollower: stateResult.stateByFollower, @@ -2042,6 +2047,7 @@ function readK8sState(registry: BranchFollowerRegistry, options: ParsedOptions): lease: leaseResult.value, pods: podsResult.value, errors, + warnings, }; } @@ -2440,7 +2446,11 @@ function runControllerReconcileJob(registry: BranchFollowerRegistry, options: Pa `kubectl apply --server-side --force-conflicts --field-manager=${shQuote(registry.controller.fieldManager)} -f "$tmp" >/dev/null`, mode.wait ? waitForJobShell(registry.controller.namespace, jobName, timeoutSeconds) : "true", ].join("\n"); + const startedMs = Date.now(); + const startedAt = new Date(startedMs).toISOString(); const result = runKubeScript(registry, options, script, "", (timeoutSeconds + registry.controller.budgets.reconcileTransportGraceSeconds) * 1000); + const finishedMs = Date.now(); + const finishedAt = new Date(finishedMs).toISOString(); return { ok: result.exitCode === 0, name: jobName, @@ -2449,6 +2459,9 @@ function runControllerReconcileJob(registry: BranchFollowerRegistry, options: Pa execution: "k8s-native-job", exitCode: result.exitCode, timedOut: result.timedOut, + startedAt, + finishedAt, + elapsedMs: finishedMs >= startedMs ? finishedMs - startedMs : null, message: result.exitCode === 0 ? "reconcile job completed" : redactText(tailText(result.stderr || result.stdout, 800)), stdoutBytes: Buffer.byteLength(result.stdout, "utf8"), stderrBytes: Buffer.byteLength(result.stderr, "utf8"), @@ -2526,6 +2539,8 @@ function controllerStatusSummary(registry: BranchFollowerRegistry, k8s: K8sState availableReplicas: available, replicas, pods: podItems, + podsReadStatus: k8s.pods === null && k8s.warnings.some((item) => item.startsWith("pods: ")) ? "warning" : "ok", + podsReadWarning: k8s.warnings.find((item) => item.startsWith("pods: ")) ?? null, stateConfigMapName: registry.controller.stateConfigMapName, stateConfigMapPresent: k8s.stateConfigMapPresent, leaseName: registry.controller.leaseName, @@ -2534,6 +2549,97 @@ function controllerStatusSummary(registry: BranchFollowerRegistry, k8s: K8sState }; } +function liveRefreshSummary(wantsLive: boolean, shouldLive: boolean, refresh: Record | null): Record { + if (refresh === null) { + return { + requested: wantsLive, + executed: false, + mode: shouldLive ? "in-cluster-adapter-status" : wantsLive ? "not-created" : "not-requested", + includedInStoredTotal: false, + note: wantsLive ? "in-cluster live status does not create an operator refresh job" : "stored state read only", + }; + } + return { + requested: true, + executed: true, + mode: "operator-status-refresh-job", + name: stringOrNull(refresh.name), + namespace: stringOrNull(refresh.namespace), + ok: refresh.ok === true, + exitCode: numberOrNull(refresh.exitCode), + timedOut: refresh.timedOut === true, + startedAt: stringOrNull(refresh.startedAt), + finishedAt: stringOrNull(refresh.finishedAt), + elapsedMs: numberOrNull(refresh.elapsedMs), + stdoutBytes: numberOrNull(refresh.stdoutBytes), + stderrBytes: numberOrNull(refresh.stderrBytes), + includedInStoredTotal: false, + note: "status refresh observes/writes compact state; it is not a triggered run total", + }; +} + +function statusTimingContext( + timings: FollowerState["timings"], + stored: Record, + live: AdapterSummary | null, + liveRequested: boolean, + fallbackStored: Record, +): Record { + const storedPayload = asOptionalRecord(asOptionalRecord(stored.command)?.payload); + const fallbackPayload = asOptionalRecord(asOptionalRecord(fallbackStored.command)?.payload); + const livePayload = asOptionalRecord(live?.payload); + const nativePayload = firstRecord(livePayload, storedPayload, fallbackPayload); + return { + storedTiming: { + totalSeconds: timings.totalSeconds, + totalStatus: timings.totalStatus, + totalSource: timings.totalSource, + sourceCommit: timings.sourceCommit, + startedAt: timings.startedAt, + finishedAt: timings.finishedAt, + rangeComplete: timings.startedAt !== null && timings.finishedAt !== null, + missingRangeReason: timings.startedAt === null || timings.finishedAt === null ? "historical timing range missing; do not infer old startedAt/finishedAt" : null, + }, + liveRefresh: { + requested: liveRequested, + source: liveRequested ? live === null ? "operator-status-refresh-job" : "in-cluster-adapter-status" : "not-requested", + includedInStoredTotal: false, + note: liveRequested ? "live/status refresh is reported separately and must not be added to stored total" : null, + }, + nativeGateTiming: nativeGateTimingSummary(nativePayload, timings), + }; +} + +function nativeGateTimingSummary(payload: Record | null, timings: FollowerState["timings"]): Record { + if (payload === null) { + return { + source: "missing", + includedInStoredTotal: false, + reason: "no native payload in stored or live status", + }; + } + const statusRead = asOptionalRecord(asOptionalRecord(payload.timings)?.statusRead); + const sourceSync = asOptionalRecord(payload.sourceSync); + const tekton = asOptionalRecord(payload.tekton); + const argo = asOptionalRecord(payload.argo); + const runtime = asOptionalRecord(payload.runtime); + const argoStage = timings.stages.find((stage) => stage.stage === "argo"); + return { + source: "native-status-payload", + includedInStoredTotal: false, + statusReadSeconds: secondsFromMsValue(numberOrNull(statusRead?.elapsedMs)), + gitMirrorSyncSeconds: secondsFromMsValue(numberOrNull(sourceSync?.elapsedMs)), + pipelineRunSeconds: numberOrNull(tekton?.durationSeconds), + argoOperationSeconds: numberOrNull(argo?.operationDurationSeconds), + argoOperationStartedAt: stringOrNull(argo?.operationStartedAt), + argoOperationFinishedAt: stringOrNull(argo?.operationFinishedAt), + argoIncludedInStoredTotal: argoStage?.seconds !== null && argoStage?.source === "argocd", + runtimeReady: runtime?.ready === true, + runtimeAligned: runtime?.aligned === true ? true : runtime?.aligned === false ? false : null, + note: "native gate timings are current observations unless their stage explicitly overlaps stored startedAt/finishedAt", + }; +} + function followerNextCommands(follower: FollowerSpec): Record { const next: Record = { status: `bun scripts/cli.ts cicd branch-follower status --follower ${follower.id}`, @@ -2620,6 +2726,17 @@ function numberOrNull(value: unknown): number | null { return typeof value === "number" && Number.isFinite(value) ? value : null; } +function secondsFromMsValue(value: number | null): number | null { + return value === null ? null : Math.round(value / 100) / 10; +} + +function firstRecord(...values: Array | null>): Record | null { + for (const value of values) { + if (value !== null) return value; + } + return null; +} + function commandCompact(result: CommandResult, options: ParsedOptions): Record { return { argv: result.command, diff --git a/scripts/src/cicd-evidence.ts b/scripts/src/cicd-evidence.ts index 3c86c01e..0e08ca5f 100644 --- a/scripts/src/cicd-evidence.ts +++ b/scripts/src/cicd-evidence.ts @@ -44,7 +44,7 @@ export function followerEvidenceSummary(input: { return { pipelineRunRefName: pipelineRefName, pipeline: compactPipelineEvidence(pipeline), - refreshBoundedReason: refresh === null ? "missing-from-live-and-stored-evidence" : null, + refreshBoundedReason: refresh === null ? "missing-control-plane-refresh-capability-evidence; status refresh job is reported separately at top-level refresh/liveRefresh" : null, refresh: refresh === null ? null : { diff --git a/scripts/src/cicd-render.ts b/scripts/src/cicd-render.ts index 2264a4d7..29b1d98b 100644 --- a/scripts/src/cicd-render.ts +++ b/scripts/src/cicd-render.ts @@ -109,7 +109,10 @@ function renderStatusHuman(payload: Record, _options: ParsedOpt }); const next = asOptionalRecord(payload.next); const errors = Array.isArray(payload.errors) ? payload.errors : []; + const warnings = Array.isArray(payload.warnings) ? payload.warnings : []; + const liveRefreshRows = liveRefreshRowsForPayload(payload); const timingRows = followers.flatMap(timingRowsForFollower).slice(0, 48); + const timingContextRows = followers.flatMap(timingContextRowsForFollower).slice(0, 48); const performanceRows = followers.flatMap(performanceRowsForFollower).slice(0, 24); const evidenceRows = followers.flatMap(evidenceRowsForFollower).slice(0, 48); const reconcileRows = followers.flatMap(reconcileRowsForFollower).slice(0, 48); @@ -118,16 +121,19 @@ function renderStatusHuman(payload: Record, _options: ParsedOpt `CI/CD BRANCH-FOLLOWER STATUS (${payload.ok === false ? "degraded" : "ok"})`, "", table( - ["CTRL_NS", "ROUTE", "DEPLOY", "READY", "PODS", "STATE_CM", "LEASE"], - [[controller?.namespace ?? "-", controller?.route ?? "-", controller?.deploymentName ?? "-", `${controller?.availableReplicas ?? 0}/${controller?.replicas ?? 0}`, controller?.pods ?? "-", controller?.stateConfigMapPresent === true ? "present" : "missing", controller?.leaseHolder ?? "-"]], + ["CTRL_NS", "ROUTE", "DEPLOY", "READY", "PODS", "PODS_READ", "STATE_CM", "LEASE"], + [[controller?.namespace ?? "-", controller?.route ?? "-", controller?.deploymentName ?? "-", `${controller?.availableReplicas ?? 0}/${controller?.replicas ?? 0}`, controller?.pods ?? "-", controller?.podsReadStatus ?? "-", controller?.stateConfigMapPresent === true ? "present" : "missing", controller?.leaseHolder ?? "-"]], ), + liveRefreshRows.length === 0 ? "" : `\nLIVE REFRESH\n${table(["MODE", "REQUESTED", "EXECUTED", "JOB", "ELAPSED", "IN_TOTAL"], liveRefreshRows)}`, "", table(["FOLLOWER", "PHASE", "ADAPTER", "OBSERVED", "TARGET", "TRIGGERED", "SUCCEEDED", "IN_FLIGHT", "BUDGET", "MESSAGE"], rows), timingRows.length === 0 ? "" : `\nSTAGE TIMINGS\n${table(["FOLLOWER", "STAGE", "STATUS", "SECONDS", "BUDGET", "OBJECT"], timingRows)}`, + timingContextRows.length === 0 ? "" : `\nTIMING CONTEXT\n${table(["FOLLOWER", "CONTEXT", "SOURCE", "SECONDS", "STARTED", "FINISHED", "IN_TOTAL"], timingContextRows)}`, performanceRows.length === 0 ? "" : `\nSLOW STAGES\n${table(["FOLLOWER", "STAGE", "STATUS", "SECONDS", "SOURCE", "OBJECT"], performanceRows)}`, evidenceRows.length === 0 ? "" : `\nEVIDENCE\n${table(["FOLLOWER", "TYPE", "STATUS", "DETAIL", "OBJECT"], evidenceRows)}`, reconcileRows.length === 0 ? "" : `\nRECONCILE TIMELINE\n${table(["FOLLOWER", "STEP", "STATUS", "SECONDS", "STARTED", "OBJECT"], reconcileRows)}`, rawStateRows.length === 0 ? "" : `\nRAW STATE DIAGNOSTIC\n${table(["FOLLOWER", "STATE_BYTES", "COMMAND", "TIMELINE", "STEPS", "TIMELINE_BYTES", "REASON"], rawStateRows)}`, + warnings.length === 0 ? "" : `\nWARNINGS\n${warnings.map((item) => `- ${item}`).join("\n")}`, errors.length === 0 ? "" : `\nERRORS\n${errors.map((item) => `- ${item}`).join("\n")}`, "", "NEXT", @@ -137,6 +143,19 @@ function renderStatusHuman(payload: Record, _options: ParsedOpt ].filter((line) => line !== "").join("\n"); } +function liveRefreshRowsForPayload(payload: Record): unknown[][] { + const refresh = asOptionalRecord(payload.liveRefresh); + if (refresh === null) return []; + return [[ + stringOrNull(refresh.mode) ?? stringOrNull(payload.liveMode) ?? "-", + refresh.requested === true ? "yes" : "no", + refresh.executed === true ? "yes" : "no", + stringOrNull(refresh.name) ?? "-", + formatMs(numberOrNull(refresh.elapsedMs)), + refresh.includedInStoredTotal === true ? "yes" : "no", + ]]; +} + function renderRunOnceHuman(payload: Record): string { const followers = arrayRecords(payload.followers); const stateWrites = arrayRecords(payload.stateWrites); @@ -246,6 +265,53 @@ function performanceRowsForFollower(item: Record): unknown[][] ]); } +function timingContextRowsForFollower(item: Record): unknown[][] { + const context = asOptionalRecord(item.timingContext); + if (context === null) return []; + const stored = asOptionalRecord(context.storedTiming); + const liveRefresh = asOptionalRecord(context.liveRefresh); + const nativeGate = asOptionalRecord(context.nativeGateTiming); + const rows: unknown[][] = []; + if (stored !== null) { + rows.push([ + item.id, + "stored", + stringOrNull(stored.totalSource) ?? "-", + formatSeconds(numberOrNull(stored.totalSeconds)), + stringOrNull(stored.startedAt) ?? "-", + stringOrNull(stored.finishedAt) ?? "-", + "yes", + ]); + } + if (liveRefresh !== null) { + rows.push([ + item.id, + "live-refresh", + stringOrNull(liveRefresh.source) ?? "-", + "-", + "-", + "-", + liveRefresh.includedInStoredTotal === true ? "yes" : "no", + ]); + } + if (nativeGate !== null) { + const detail = [ + `pipeline=${formatSeconds(numberOrNull(nativeGate.pipelineRunSeconds))}`, + `argo=${formatSeconds(numberOrNull(nativeGate.argoOperationSeconds))}`, + ].join(" "); + rows.push([ + item.id, + "native-gates", + stringOrNull(nativeGate.source) ?? "-", + detail, + stringOrNull(nativeGate.argoOperationStartedAt) ?? "-", + stringOrNull(nativeGate.argoOperationFinishedAt) ?? "-", + nativeGate.argoIncludedInStoredTotal === true ? "argo" : "no", + ]); + } + return rows; +} + function reconcileRowsFromRunOnce(payload: Record, followers: Record[]): unknown[][] { const timeline = asOptionalRecord(payload.reconcileTimeline); if (timeline !== null) return reconcileRowsForTimeline(timeline, null); @@ -345,6 +411,10 @@ function formatSeconds(value: number | null): string { return value === null ? "-" : `${value}s`; } +function formatMs(value: number | null): string { + return value === null ? "-" : `${Math.round(value / 100) / 10}s`; +} + function boolMatch(value: unknown): string { return value === true ? "match" : value === false ? "mismatch" : "-"; } diff --git a/scripts/src/cicd-timings.ts b/scripts/src/cicd-timings.ts index f7c845f6..c1c7def6 100644 --- a/scripts/src/cicd-timings.ts +++ b/scripts/src/cicd-timings.ts @@ -89,7 +89,10 @@ export function compactListTimings(timings: FollowerState["timings"]): Record ({ stage: stage.stage, @@ -114,6 +117,10 @@ export function timingPerformanceSummary(timings: FollowerState["timings"]): Rec return { budgetSeconds: timings.budgetSeconds, totalSeconds: timings.totalSeconds, + totalSource: timings.totalSource, + sourceCommit: timings.sourceCommit, + startedAt: timings.startedAt, + finishedAt: timings.finishedAt, overBudget: timings.overBudget, slowestStage: slowStages[0] ?? null, slowStages, diff --git a/scripts/src/cicd-types.ts b/scripts/src/cicd-types.ts index bb7789d0..ed548b87 100644 --- a/scripts/src/cicd-types.ts +++ b/scripts/src/cicd-types.ts @@ -344,6 +344,7 @@ export interface K8sStateRead { lease: Record | null; pods: Record | null; errors: string[]; + warnings: string[]; } export interface K8sFollowerStateRead {