diff --git a/scripts/native/cicd/branch-follower-gate.mjs b/scripts/native/cicd/branch-follower-gate.mjs index 93e16283..92cc892e 100644 --- a/scripts/native/cicd/branch-follower-gate.mjs +++ b/scripts/native/cicd/branch-follower-gate.mjs @@ -24,6 +24,7 @@ const workloads = parseWorkloads(process.env.WORKLOADS_B64 || ""); const healthUrl = process.env.HEALTH_URL || ""; const slowTaskSeconds = requiredPositiveIntEnv("SLOW_TASK_SECONDS"); const healthTimeoutMs = requiredPositiveIntEnv("HEALTH_TIMEOUT_MS"); +const gateTimeoutMs = optionalPositiveIntEnv("GATE_TIMEOUT_MS") ?? healthTimeoutMs; const errors = []; const branchCommit = rev(`refs/heads/${sourceBranch}`); @@ -45,10 +46,11 @@ if (gate === "reuse-plan") evidence = await reusePlanEvidence(sourceCommit); else if (gate === "ci-taskrun-plan") evidence = await ciTaskRunEvidence(sourceCommit); else if (gate === "cd-rollout-plan") evidence = await cdRolloutEvidence(sourceCommit); else if (gate === "post-deploy-health") evidence = await postDeployHealthEvidence(sourceCommit); +else if (gate === "runtime-closeout") evidence = await runtimeCloseoutEvidence(sourceCommit); else fail(`unsupported gate ${gate}`); const ok = errors.length === 0 && evidence?.ok === true; -console.log(JSON.stringify({ +const payload = { ok, gate, follower, @@ -62,7 +64,9 @@ console.log(JSON.stringify({ statusAuthority: "kubernetes-api-serviceaccount", parsedDownstreamCliOutput: false, bounded: true, -})); +}; +console.log(JSON.stringify(payload)); +if (!ok) process.exit(2); async function reusePlanEvidence(commit) { const reuse = readReuseConfig(commit); @@ -417,6 +421,68 @@ async function postDeployHealthEvidence(commit) { }; } +async function runtimeCloseoutEvidence(commit) { + const startedAt = Date.now(); + const refresh = await argoRefresh(); + const deadline = Date.now() + gateTimeoutMs; + let polls = 0; + let latest = null; + while (Date.now() <= deadline) { + polls += 1; + latest = await runtimeCloseoutStatus(commit); + if (latest.ok === true) break; + if (Date.now() + 2000 > deadline) break; + await delay(2000); + } + return { + ok: latest?.ok === true, + sourceCommit: shortSha(commit), + gitMirror: compactGitMirrorEvidence(gitMirror), + refresh, + closeout: latest, + polls, + elapsedMs: Date.now() - startedAt, + writesState: false, + contract: { + gate: "argo-runtime-only", + excludes: ["git-mirror-flush", "state-write"], + expectation: "Argo refresh and runtime readiness are independently triggerable; git-mirror post-flush is validated by the git-mirror-flush gate.", + }, + }; +} + +async function runtimeCloseoutStatus(commit) { + const pipelineRunName = commit && pipelineRunPrefix ? `${pipelineRunPrefix}-${commit.slice(0, 12)}` : ""; + const pipelineRun = pipelineRunName && tektonNamespace + ? pipelineRunStatus(await getJson(`/apis/tekton.dev/v1/namespaces/${encodeURIComponent(tektonNamespace)}/pipelineruns/${encodeURIComponent(pipelineRunName)}`, false)) + : { present: null, succeeded: null, reason: "tekton-not-configured" }; + const argo = await argoSummary(); + const runtime = await runtimeSummary(commit); + return { + ok: source.snapshotReady === true && pipelineRun.succeeded === true && argo.ready === true && runtime.ready === true && runtime.aligned === true, + source, + pipelineRun, + argo, + runtime, + }; +} + +async function argoRefresh() { + if (!argoNamespace || !argoApplication) return { ok: true, skipped: true, reason: "argo-not-configured" }; + const path = `/apis/argoproj.io/v1alpha1/namespaces/${encodeURIComponent(argoNamespace)}/applications/${encodeURIComponent(argoApplication)}`; + const patched = await patchJson(path, { metadata: { annotations: { "argocd.argoproj.io/refresh": "hard" } } }); + return { + ok: patched !== null, + namespace: argoNamespace, + application: argoApplication, + refresh: "hard", + resourceVersion: str(patched?.metadata?.resourceVersion), + statusAuthority: "kubernetes-api-serviceaccount", + parsedDownstreamCliOutput: false, + valuesRedacted: true, + }; +} + function readReuseConfig(commit) { if (!commit || !sourceStageRef) return { present: false, reason: "source-commit-missing" }; try { @@ -517,6 +583,7 @@ async function argoSummary() { const sync = app?.status?.sync || {}; const health = app?.status?.health || {}; const op = app?.status?.operationState || {}; + const problemWorkloads = await argoProblemWorkloadSummaries(app); return { name: argoApplication, namespace: argoNamespace, @@ -525,10 +592,141 @@ async function argoSummary() { revision: str(sync.revision), operationPhase: str(op.phase), operationMessage: str(op.message), + conditions: compactArgoConditions(app?.status?.conditions), + nonReadyResources: compactArgoNonReadyResources(app?.status?.resources), + syncResultResources: compactArgoSyncResultResources(op?.syncResult?.resources), + problemWorkloads, ready: sync.status === "Synced" && health.status === "Healthy", }; } +async function argoProblemWorkloadSummaries(app) { + const resources = Array.isArray(app?.status?.resources) ? app.status.resources : []; + const syncResources = Array.isArray(app?.status?.operationState?.syncResult?.resources) ? app.status.operationState.syncResult.resources : []; + const names = uniqueStrings([ + ...resources + .filter((item) => item?.kind === "Deployment" && (item.status === "OutOfSync" || (item.health?.status && item.health.status !== "Healthy"))) + .map((item) => str(item.name)) + .filter(Boolean), + ...syncResources + .filter((item) => item?.kind === "Deployment" && problemArgoSyncResource(item)) + .map((item) => str(item.name)) + .filter(Boolean), + ]).slice(0, 8); + const rows = []; + for (const name of names) rows.push(await deploymentProblemSummary(runtimeNamespace || "default", name)); + return rows; +} + +async function deploymentProblemSummary(namespace, name) { + const deployment = await getJson(`/apis/apps/v1/namespaces/${encodeURIComponent(namespace)}/deployments/${encodeURIComponent(name)}`, false); + const selector = deploymentSelector(deployment); + const pods = selector ? await getJson(`/api/v1/namespaces/${encodeURIComponent(namespace)}/pods?labelSelector=${encodeURIComponent(selector)}`, false) : null; + const conditions = Array.isArray(deployment?.status?.conditions) + ? deployment.status.conditions + .filter((item) => item?.status !== "True" || item?.type === "Progressing") + .slice(-4) + .map((item) => ({ + type: str(item?.type), + status: str(item?.status), + reason: str(item?.reason), + message: shortText(str(item?.message) || ""), + })) + : []; + return { + kind: "Deployment", + namespace, + name, + desired: deployment?.spec?.replicas ?? 1, + readyReplicas: deployment?.status?.readyReplicas ?? 0, + updatedReplicas: deployment?.status?.updatedReplicas ?? 0, + unavailableReplicas: deployment?.status?.unavailableReplicas ?? null, + conditions, + pods: compactProblemPods(pods), + }; +} + +function deploymentSelector(deployment) { + const labels = deployment?.spec?.selector?.matchLabels; + if (!labels || typeof labels !== "object" || Array.isArray(labels)) return null; + const pairs = Object.entries(labels) + .filter((entry) => typeof entry[0] === "string" && typeof entry[1] === "string") + .map(([key, value]) => `${key}=${value}`); + return pairs.length === 0 ? null : pairs.join(","); +} + +function compactProblemPods(list) { + const items = Array.isArray(list?.items) ? list.items : []; + return items.slice(0, 6).map((pod) => { + const statuses = Array.isArray(pod?.status?.containerStatuses) ? pod.status.containerStatuses : []; + return { + name: str(pod?.metadata?.name), + phase: str(pod?.status?.phase), + ready: statuses.every((item) => item?.ready === true), + containers: statuses.slice(0, 4).map((item) => ({ + name: str(item?.name), + ready: item?.ready === true, + restartCount: typeof item?.restartCount === "number" ? item.restartCount : null, + waitingReason: str(item?.state?.waiting?.reason), + waitingMessage: shortText(str(item?.state?.waiting?.message) || ""), + terminatedReason: str(item?.lastState?.terminated?.reason) || str(item?.state?.terminated?.reason), + exitCode: typeof item?.lastState?.terminated?.exitCode === "number" ? item.lastState.terminated.exitCode : typeof item?.state?.terminated?.exitCode === "number" ? item.state.terminated.exitCode : null, + })), + }; + }); +} + +function compactArgoConditions(value) { + return Array.isArray(value) + ? value.slice(0, 5).map((item) => ({ + type: str(item?.type), + message: shortText(str(item?.message) || ""), + lastTransitionTime: str(item?.lastTransitionTime), + })) + : []; +} + +function compactArgoNonReadyResources(value) { + return Array.isArray(value) + ? value + .filter((item) => item?.health?.status && item.health.status !== "Healthy") + .slice(0, 5) + .map((item) => ({ + kind: str(item?.kind), + namespace: str(item?.namespace), + name: str(item?.name), + status: str(item?.status), + healthStatus: str(item?.health?.status), + healthMessage: shortText(str(item?.health?.message) || ""), + })) + : []; +} + +function compactArgoSyncResultResources(value) { + return Array.isArray(value) + ? value + .filter(problemArgoSyncResource) + .slice(0, 8) + .map((item) => ({ + group: str(item?.group), + kind: str(item?.kind), + namespace: str(item?.namespace), + name: str(item?.name), + status: str(item?.status), + hookPhase: str(item?.hookPhase), + syncPhase: str(item?.syncPhase), + message: shortText(str(item?.message) || ""), + })) + : []; +} + +function problemArgoSyncResource(item) { + const message = String(item?.message || ""); + return (item?.status && item.status !== "Synced") + || (item?.hookPhase && item.hookPhase !== "Succeeded") + || /fail|error|backoff|forbidden|invalid|denied|exceeded/iu.test(message); +} + async function runtimeSummary(expected) { if (!runtimeNamespace || workloads.length === 0) return { ready: null, aligned: null, reason: "runtime-not-configured" }; const rows = []; @@ -723,6 +921,49 @@ async function getJson(path, required) { }); } +async function patchJson(path, value) { + const host = process.env.KUBERNETES_SERVICE_HOST; + const port = Number(process.env.KUBERNETES_SERVICE_PORT || "443"); + const tokenPath = "/var/run/secrets/kubernetes.io/serviceaccount/token"; + const caPath = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"; + if (!host || !existsSync(tokenPath) || !existsSync(caPath)) fail("kubernetes serviceaccount is unavailable"); + const token = readFileSync(tokenPath, "utf8").trim(); + const ca = readFileSync(caPath); + const payload = JSON.stringify(value); + return await new Promise((resolve, reject) => { + const req = https.request({ + host, + port, + path, + method: "PATCH", + ca, + headers: { + authorization: `Bearer ${token}`, + "content-type": "application/merge-patch+json", + "content-length": Buffer.byteLength(payload), + }, + }, (res) => { + let body = ""; + res.setEncoding("utf8"); + res.on("data", (chunk) => { body += chunk; }); + res.on("end", () => { + if ((res.statusCode || 0) < 200 || (res.statusCode || 0) >= 300) return reject(new Error(shortText(body || `kube api ${res.statusCode}`))); + try { resolve(JSON.parse(body)); } catch (error) { reject(error); } + }); + }); + req.on("error", reject); + req.write(payload); + req.end(); + }).catch((error) => { + errors.push(`${path}: ${shortText(error?.message || String(error))}`); + return null; + }); +} + +function delay(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + async function httpProbe(url) { const client = url.startsWith("https:") ? https : http; const started = Date.now(); @@ -746,6 +987,14 @@ function parseWorkloads(value) { } } +function optionalPositiveIntEnv(name) { + const raw = process.env[name] || ""; + if (!raw) return null; + const value = Number.parseInt(raw, 10); + if (!Number.isInteger(value) || value <= 0) fail(`${name} must be a positive integer`); + return value; +} + function rev(ref) { try { const out = execFileSync("git", [`--git-dir=${repoPath}`, "rev-parse", "--verify", `${ref}^{commit}`], { encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] }).trim(); diff --git a/scripts/native/cicd/compact-native-object.mjs b/scripts/native/cicd/compact-native-object.mjs index 16cfe809..8594981b 100644 --- a/scripts/native/cicd/compact-native-object.mjs +++ b/scripts/native/cicd/compact-native-object.mjs @@ -137,6 +137,21 @@ if (key === "pipelineRun") { }; } else if (key === "argoApplication") { const resources = Array.isArray(input?.status?.resources) ? input.status.resources : []; + const syncResultResources = Array.isArray(input?.status?.operationState?.syncResult?.resources) + ? input.status.operationState.syncResult.resources + .filter(problemSyncResource) + .slice(0, 8) + .map((item) => ({ + group: item.group || null, + kind: item.kind || null, + namespace: item.namespace || null, + name: item.name || null, + status: item.status || null, + hookPhase: item.hookPhase || null, + syncPhase: item.syncPhase || null, + message: item.message || null, + })) + : []; const nonReadyResources = resources .filter((item) => item?.health?.status && item.health.status !== "Healthy") .slice(0, 8) @@ -166,6 +181,7 @@ if (key === "pipelineRun") { startedAt: input.status.operationState.startedAt || null, finishedAt: input.status.operationState.finishedAt || null, durationSeconds: durationSeconds(input.status.operationState.startedAt, input.status.operationState.finishedAt), + syncResultResources, } : null, }, @@ -194,3 +210,10 @@ if (key === "pipelineRun") { } console.log(JSON.stringify(output)); + +function problemSyncResource(item) { + const message = String(item?.message || ""); + return (item?.status && item.status !== "Synced") + || (item?.hookPhase && item.hookPhase !== "Succeeded") + || /fail|error|backoff|forbidden|invalid|denied|exceeded/iu.test(message); +} diff --git a/scripts/native/cicd/k8s-job-drilldown.mjs b/scripts/native/cicd/k8s-job-drilldown.mjs index 90c5ae3a..e29c3fb2 100644 --- a/scripts/native/cicd/k8s-job-drilldown.mjs +++ b/scripts/native/cicd/k8s-job-drilldown.mjs @@ -51,10 +51,13 @@ async function main() { const podSummaries = pods.slice(0, Math.max(1, maxContainers)).map(podSummary); const logTargets = selectLogTargets(pods).slice(0, maxContainers); const logs = []; + const gateResults = []; const perContainerBytes = Math.max(1, Math.floor(maxLogBytes / Math.max(1, logTargets.length))); for (const target of logTargets) { const read = await readPodLog(target.podName, target.container, logsTailLines, perContainerBytes); const text = read.tail || ""; + const parsedGateResult = compactGateResult(parseLastJsonObject(text)); + if (parsedGateResult !== null) gateResults.push(parsedGateResult); logs.push({ ok: read.ok, degradedReason: read.degradedReason, @@ -63,16 +66,17 @@ async function main() { container: target.container, lineCount: text.length === 0 ? 0 : text.split(/\r?\n/u).filter((line) => line.length > 0).length, bytes: Buffer.byteLength(text, "utf8"), - tail: text, + tail: compactLogTail(text, parsedGateResult), nodeCicdTiming: lastNodeCicdTiming(text), }); } const logFailures = logs.filter((item) => item.ok === false); + const gateFailures = gateResults.filter((item) => item.ok === false); const status = job.status || {}; const metadata = job.metadata || {}; console.log(JSON.stringify({ - ok: logFailures.length === 0, - degradedReason: logFailures.length === 0 ? null : "log-read-failed", + ok: logFailures.length === 0 && gateFailures.length === 0 && !(status.failed && status.failed > 0), + degradedReason: logFailures.length > 0 ? "log-read-failed" : gateFailures.length > 0 ? "gate-failed" : status.failed && status.failed > 0 ? "job-failed" : null, errors: logFailures.map((item) => ({ pod: item.pod, container: item.container, degradedReason: item.degradedReason, message: item.message })), query: { namespace, jobName, stage: stageName, sourceCommit: sourceCommit || null }, job: { @@ -94,6 +98,7 @@ async function main() { failedState: Boolean(status.failed && status.failed > 0), }, pods: podSummaries, + gateResults, logs, nodeCicdTiming: lastTiming(logs), statusAuthority: useServiceAccount ? "kubernetes-api-serviceaccount" : "target-node-kubectl-raw", @@ -290,6 +295,145 @@ function lastTiming(logs) { return null; } +function parseLastJsonObject(text) { + const lines = text.split(/\r?\n/u).map((line) => line.trim()).filter(Boolean); + for (let index = lines.length - 1; index >= 0; index -= 1) { + const line = lines[index]; + if (!line.startsWith("{") || !line.endsWith("}")) continue; + try { + const parsed = JSON.parse(line); + if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) return parsed; + } catch { + // Keep scanning bounded log lines. + } + } + return null; +} + +function compactGateResult(value) { + if (!value || typeof value !== "object") return null; + const evidence = objectOrEmpty(value.evidence); + const closeout = objectOrEmpty(evidence.closeout); + const argo = objectOrEmpty(closeout.argo); + const runtime = objectOrEmpty(closeout.runtime); + const pipelineRun = objectOrEmpty(closeout.pipelineRun); + return { + ok: value.ok === true, + gate: stringOrNull(value.gate), + follower: stringOrNull(value.follower), + sourceCommit: stringOrNull(evidence.sourceCommit) || shortSha(stringOrNull(closeout.source?.sourceCommit)), + errors: arrayItems(value.errors).slice(0, 4).map((item) => shortText(item)), + polls: integerOrNull(evidence.polls), + elapsedMs: integerOrNull(evidence.elapsedMs), + writesState: booleanOrNull(evidence.writesState), + pipelineRun: { + name: stringOrNull(pipelineRun.name), + succeeded: booleanOrNull(pipelineRun.succeeded), + reason: stringOrNull(pipelineRun.reason), + durationSeconds: integerOrNull(pipelineRun.durationSeconds), + }, + argo: { + syncStatus: stringOrNull(argo.syncStatus), + healthStatus: stringOrNull(argo.healthStatus), + operationPhase: stringOrNull(argo.operationPhase), + operationMessage: shortText(argo.operationMessage), + nonReadyResources: compactNamedResources(argo.nonReadyResources, 4), + syncResultResources: compactNamedResources(argo.syncResultResources, 3), + problemWorkloads: compactProblemWorkloads(argo.problemWorkloads, 3), + }, + runtime: { + namespace: stringOrNull(runtime.namespace), + ready: booleanOrNull(runtime.ready), + aligned: booleanOrNull(runtime.aligned), + workloads: compactRuntimeWorkloads(runtime.workloads, 8), + }, + statusAuthority: stringOrNull(value.statusAuthority), + parsedDownstreamCliOutput: value.parsedDownstreamCliOutput === true, + }; +} + +function compactLogTail(text, parsedGateResult) { + if (!text) return ""; + if (parsedGateResult !== null) { + const lines = text.split(/\r?\n/u).map((line) => line.trim()).filter(Boolean); + const nonJson = lines.filter((line) => !(line.startsWith("{") && line.endsWith("}"))); + return shortText(nonJson.slice(-8).join("\n")); + } + return shortText(text); +} + +function compactNamedResources(value, limit) { + return arrayItems(value).slice(0, limit).map((item) => ({ + kind: stringOrNull(item.kind), + namespace: stringOrNull(item.namespace), + name: stringOrNull(item.name), + status: stringOrNull(item.status), + healthStatus: stringOrNull(item.healthStatus), + hookPhase: stringOrNull(item.hookPhase), + syncPhase: stringOrNull(item.syncPhase), + message: shortText(item.message || item.healthMessage), + })); +} + +function compactRuntimeWorkloads(value, limit) { + return arrayItems(value).slice(0, limit).map((item) => ({ + kind: stringOrNull(item.kind), + name: stringOrNull(item.name), + ready: booleanOrNull(item.ready), + aligned: booleanOrNull(item.aligned), + desired: integerOrNull(item.desired), + readyReplicas: integerOrNull(item.readyReplicas), + updatedReplicas: integerOrNull(item.updatedReplicas), + sourceCommit: stringOrNull(item.sourceCommit), + })); +} + +function compactProblemWorkloads(value, limit) { + return arrayItems(value).filter(isProblemWorkload).slice(0, limit).map((item) => ({ + kind: stringOrNull(item.kind), + namespace: stringOrNull(item.namespace), + name: stringOrNull(item.name), + desired: integerOrNull(item.desired), + readyReplicas: integerOrNull(item.readyReplicas), + updatedReplicas: integerOrNull(item.updatedReplicas), + unavailableReplicas: integerOrNull(item.unavailableReplicas), + conditions: arrayItems(item.conditions).slice(0, 2).map((condition) => ({ + type: stringOrNull(condition.type), + status: stringOrNull(condition.status), + reason: stringOrNull(condition.reason), + message: shortText(condition.message), + })), + pods: arrayItems(item.pods).filter(isProblemPod).slice(0, 2).map((pod) => ({ + name: stringOrNull(pod.name), + phase: stringOrNull(pod.phase), + ready: booleanOrNull(pod.ready), + containers: arrayItems(pod.containers).filter(isProblemContainer).slice(0, 2).map((container) => ({ + name: stringOrNull(container.name), + ready: booleanOrNull(container.ready), + restartCount: integerOrNull(container.restartCount), + waitingReason: stringOrNull(container.waitingReason), + terminatedReason: stringOrNull(container.terminatedReason), + exitCode: integerOrNull(container.exitCode), + })), + })), + })); +} + +function isProblemWorkload(item) { + const desired = integerOrNull(item?.desired) ?? 1; + const readyReplicas = integerOrNull(item?.readyReplicas) ?? 0; + const unavailableReplicas = integerOrNull(item?.unavailableReplicas) ?? 0; + return readyReplicas < desired || unavailableReplicas > 0 || arrayItems(item?.pods).some(isProblemPod); +} + +function isProblemPod(pod) { + return pod?.ready === false || arrayItems(pod?.containers).some(isProblemContainer); +} + +function isProblemContainer(container) { + return container?.ready === false || Boolean(container?.waitingReason) || Boolean(container?.terminatedReason) || integerOrNull(container?.exitCode) !== null; +} + function durationSeconds(start, end) { const s = timestampMs(start); const e = timestampMs(end); @@ -317,7 +461,8 @@ function shortSha(value) { function shortText(value) { if (value === null || value === undefined) return null; const text = String(value).replace(/\s+/gu, " ").trim(); - return text.length <= maxMessageBytes ? text : `${text.slice(0, Math.max(0, maxMessageBytes - 3))}...`; + const limit = Math.min(maxMessageBytes, 300); + return text.length <= limit ? text : `${text.slice(0, Math.max(0, limit - 3))}...`; } function tailBytes(value, maxBytes) { @@ -330,6 +475,18 @@ function arrayItems(value) { return Array.isArray(value) ? value : []; } +function objectOrEmpty(value) { + return value && typeof value === "object" && !Array.isArray(value) ? value : {}; +} + +function stringOrNull(value) { + return typeof value === "string" && value.length > 0 ? value : null; +} + +function booleanOrNull(value) { + return typeof value === "boolean" ? value : null; +} + function integerOrNull(value) { return Number.isInteger(value) ? value : null; } diff --git a/scripts/src/cicd-branch-follower.ts b/scripts/src/cicd-branch-follower.ts index fcb450b8..8fa4537a 100644 --- a/scripts/src/cicd-branch-follower.ts +++ b/scripts/src/cicd-branch-follower.ts @@ -214,8 +214,8 @@ function debugStepOption(value: string): BranchFollowerDebugStep { } function gateOption(value: string): BranchFollowerGate { - if (value === "reuse-plan" || value === "ci-taskrun-plan" || value === "cd-rollout-plan" || value === "post-deploy-health" || value === "control-plane-refresh") return value; - throw new Error("--gate must be reuse-plan, ci-taskrun-plan, cd-rollout-plan, post-deploy-health, or control-plane-refresh"); + if (value === "reuse-plan" || value === "ci-taskrun-plan" || value === "cd-rollout-plan" || value === "post-deploy-health" || value === "control-plane-refresh" || value === "git-mirror-flush" || value === "runtime-closeout") return value; + throw new Error("--gate must be reuse-plan, ci-taskrun-plan, cd-rollout-plan, post-deploy-health, control-plane-refresh, git-mirror-flush, or runtime-closeout"); } function isInClusterRuntime(): boolean { diff --git a/scripts/src/cicd-gates.ts b/scripts/src/cicd-gates.ts index c619c022..32cac1cc 100644 --- a/scripts/src/cicd-gates.ts +++ b/scripts/src/cicd-gates.ts @@ -2,24 +2,50 @@ // Responsibility: submit bounded target-side gate Jobs and return compact evidence. import type { CommandResult } from "./command"; import { resolveAgentRunLaneTarget } from "./agentrun-lanes"; +import { yamlLaneGitMirrorJobManifest } from "./agentrun/secrets"; import { nativeHwlabControlPlaneRefreshJobManifest, runNativeHwlabControlPlaneRefresh } from "./cicd-hwlab-refresh"; import { nativeCicdScriptLoadShell } from "./cicd-native-bundle"; +import { runNativeK8sJob } from "./cicd-native"; import { waitForJobShell } from "./cicd-controller-render"; import type { BranchFollowerRegistry, FollowerSpec, ParsedOptions } from "./cicd-types"; import { hwlabRuntimeLaneSpecForNode } from "./hwlab-node-lanes"; +import { nodeRuntimeGitMirrorJobManifest } from "./hwlab-node/render"; +import { nodeRuntimeGitMirrorTarget } from "./hwlab-node/web-probe"; import { shQuote, redactText } from "./platform-infra-ops-library"; type KubeScriptRunner = (registry: BranchFollowerRegistry, options: ParsedOptions, script: string, input: string, timeoutMs: number) => CommandResult; export async function runBranchFollowerGate(registry: BranchFollowerRegistry, follower: FollowerSpec, options: ParsedOptions, runKubeScript: KubeScriptRunner): Promise> { - if (options.gate === null) throw new Error("gate requires --gate "); + if (options.gate === null) throw new Error("gate requires --gate "); if (options.gate === "control-plane-refresh") { return options.inCluster ? runControlPlaneRefreshGate(registry, follower, options) : runTargetControlPlaneRefreshGateJob(registry, follower, options, runKubeScript); } + if (options.gate === "git-mirror-flush") { + return options.inCluster + ? runGitMirrorFlushGate(registry, follower, options) + : runTargetGitMirrorFlushGateJob(registry, follower, options, runKubeScript); + } + if (options.gate === "runtime-closeout" && !options.confirm) { + const timeoutSeconds = gateTimeoutSeconds(follower, options); + const jobName = `bf-gate-${safeName(follower.id)}-${safeName(options.gate)}-${Date.now().toString(36)}`.slice(0, 63); + return { + ok: true, + action: "gate", + gate: options.gate, + follower: follower.id, + dryRun: true, + sourceCommit: options.sourceCommit, + target: { name: jobName, namespace: registry.controller.namespace, execution: "k8s-native-gate-job" }, + timeoutSeconds, + message: "add --confirm to run the native runtime closeout gate", + writesState: false, + parsedDownstreamCliOutput: false, + }; + } if (options.inCluster) return { ok: false, action: "gate", gate: options.gate, follower: follower.id, degradedReason: "operator-entry-required" }; - const timeoutSeconds = options.timeoutSeconds ?? follower.budgets.statusSeconds; + const timeoutSeconds = gateTimeoutSeconds(follower, options); const jobName = `bf-gate-${safeName(follower.id)}-${safeName(options.gate)}-${Date.now().toString(36)}`.slice(0, 63); const manifest = gateJobManifest(registry, follower, options, jobName, timeoutSeconds); const manifestYaml = `${Bun.YAML.stringify(manifest).trim()}\n`; @@ -56,6 +82,149 @@ export async function runBranchFollowerGate(registry: BranchFollowerRegistry, fo }; } +function gateTimeoutSeconds(follower: FollowerSpec, options: ParsedOptions): number { + return options.timeoutSeconds ?? (options.gate === "runtime-closeout" ? follower.budgets.endToEndSeconds : follower.budgets.statusSeconds); +} + +function runTargetGitMirrorFlushGateJob(registry: BranchFollowerRegistry, follower: FollowerSpec, options: ParsedOptions, runKubeScript: KubeScriptRunner): Record { + const prepared = prepareGitMirrorFlushGate(follower, options); + if (prepared.ok !== true) return prepared; + const timeoutSeconds = options.timeoutSeconds ?? follower.budgets.sourceSyncSeconds; + if (!options.confirm) return gitMirrorFlushDryRun(follower, options, prepared.namespace, prepared.jobName); + const manifestYaml = `${Bun.YAML.stringify(prepared.manifest).trim()}\n`; + const script = [ + "set -eu", + "tmp=$(mktemp)", + "base64 -d >\"$tmp\" <<'UNIDESK_GIT_MIRROR_FLUSH_GATE_JOB'", + Buffer.from(manifestYaml, "utf8").toString("base64"), + "UNIDESK_GIT_MIRROR_FLUSH_GATE_JOB", + `kubectl -n ${shQuote(prepared.namespace)} delete job ${shQuote(prepared.jobName)} --ignore-not-found=true >/dev/null 2>&1 || true`, + `kubectl apply --server-side --force-conflicts --field-manager=${shQuote(registry.controller.fieldManager)} -f "$tmp" >/dev/null`, + waitForJobShell(prepared.namespace, prepared.jobName, timeoutSeconds), + ].join("\n"); + const startedAt = Date.now(); + const command = runKubeScript(registry, options, script, "", (timeoutSeconds + registry.controller.budgets.reconcileTransportGraceSeconds) * 1000); + const parsed = parseLastJsonObject(command.stdout); + const ok = command.exitCode === 0 && parsed !== null && parsed.pendingFlush !== true; + return { + ok, + action: "gate", + gate: options.gate, + follower: follower.id, + dryRun: false, + sourceCommit: options.sourceCommit, + target: { name: prepared.jobName, namespace: prepared.namespace, execution: "k8s-native-git-mirror-flush" }, + result: parsed, + writesState: false, + command: { + exitCode: command.exitCode, + timedOut: command.timedOut, + elapsedMs: Date.now() - startedAt, + parseError: parsed === null ? "stdout-json-parse-failed" : null, + stdoutTail: ok ? "" : redactText(tailText(command.stdout, 1600)), + stderrTail: ok ? "" : redactText(tailText(command.stderr, 1200)), + }, + parsedDownstreamCliOutput: false, + next: { + statusRead: `bun scripts/cli.ts cicd branch-follower debug-step --follower ${follower.id} --step status-read --json`, + job: `bun scripts/cli.ts cicd branch-follower job --follower ${follower.id} --source-commit ${options.sourceCommit} --job git-mirror-flush --json`, + }, + }; +} + +function runGitMirrorFlushGate(registry: BranchFollowerRegistry, follower: FollowerSpec, options: ParsedOptions): Record { + const prepared = prepareGitMirrorFlushGate(follower, options); + if (prepared.ok !== true) return prepared; + const timeoutSeconds = options.timeoutSeconds ?? follower.budgets.sourceSyncSeconds; + if (!options.confirm) return gitMirrorFlushDryRun(follower, options, prepared.namespace, prepared.jobName); + const startedAt = Date.now(); + const result = runNativeK8sJob(prepared.namespace, prepared.jobName, prepared.manifest, timeoutSeconds, "flush", registry.controller.budgets); + const summary = result.summary; + const ok = result.ok && summary?.pendingFlush !== true; + return { + ok, + action: "gate", + gate: options.gate, + follower: follower.id, + dryRun: false, + sourceCommit: options.sourceCommit, + target: { name: prepared.jobName, namespace: prepared.namespace, execution: "k8s-native-git-mirror-flush" }, + result: { + ok: result.ok, + completed: result.completed, + failed: result.failed, + timedOut: result.timedOut, + created: result.created, + reused: result.reused, + polls: result.polls, + elapsedMs: result.elapsedMs, + summary, + conditionReason: result.conditionReason, + conditionMessage: result.conditionMessage, + statusAuthority: result.statusAuthority, + parsedDownstreamCliOutput: false, + }, + writesState: false, + command: { + elapsedMs: Date.now() - startedAt, + timeoutSeconds, + }, + parsedDownstreamCliOutput: false, + next: { + statusRead: `bun scripts/cli.ts cicd branch-follower debug-step --follower ${follower.id} --step status-read --json`, + job: `bun scripts/cli.ts cicd branch-follower job --follower ${follower.id} --source-commit ${options.sourceCommit} --job git-mirror-flush --json`, + }, + }; +} + +function prepareGitMirrorFlushGate(follower: FollowerSpec, options: ParsedOptions): { ok: true; namespace: string; jobName: string; manifest: Record } | Record { + if (options.sourceCommit === null) { + return { + ok: false, + action: "gate", + gate: options.gate, + follower: follower.id, + degradedReason: "source-commit-required", + message: "git-mirror-flush gate requires --source-commit ", + parsedDownstreamCliOutput: false, + }; + } + const jobName = nativeCapabilityJobName(follower.id, "flush", options.sourceCommit); + if (follower.adapter === "hwlab-node-runtime") { + const spec = hwlabRuntimeLaneSpecForNode(follower.target.lane, follower.target.node); + const mirror = nodeRuntimeGitMirrorTarget(spec); + return { ok: true, namespace: mirror.namespace, jobName, manifest: nodeRuntimeGitMirrorJobManifest(mirror, "flush", jobName) }; + } + if (follower.adapter === "agentrun-yaml-lane") { + const { spec } = resolveAgentRunLaneTarget({ node: follower.target.node, lane: follower.target.lane }); + return { ok: true, namespace: spec.gitMirror.namespace, jobName, manifest: yamlLaneGitMirrorJobManifest(spec, "flush", jobName) }; + } + return { + ok: false, + action: "gate", + gate: options.gate, + follower: follower.id, + degradedReason: "unsupported-follower-adapter", + message: "git-mirror-flush gate is only available for followers with a native git-mirror stage", + parsedDownstreamCliOutput: false, + }; +} + +function gitMirrorFlushDryRun(follower: FollowerSpec, options: ParsedOptions, namespace: string, jobName: string): Record { + return { + ok: true, + action: "gate", + gate: options.gate, + follower: follower.id, + dryRun: true, + sourceCommit: options.sourceCommit, + target: { name: jobName, namespace, execution: "k8s-native-git-mirror-flush" }, + message: "add --confirm to run the native git-mirror flush gate", + writesState: false, + parsedDownstreamCliOutput: false, + }; +} + function runTargetControlPlaneRefreshGateJob(registry: BranchFollowerRegistry, follower: FollowerSpec, options: ParsedOptions, runKubeScript: KubeScriptRunner): Record { if (follower.adapter !== "hwlab-node-runtime" || options.sourceCommit === null || !options.confirm) { return runControlPlaneRefreshGate(registry, follower, options); @@ -163,7 +332,8 @@ function runControlPlaneRefreshGate(registry: BranchFollowerRegistry, follower: function gateJobManifest(registry: BranchFollowerRegistry, follower: FollowerSpec, options: ParsedOptions, jobName: string, timeoutSeconds: number): Record { const labels = { ...registry.controller.labels, "app.kubernetes.io/component": "cicd-gate-job" }; const agentrun = follower.adapter === "agentrun-yaml-lane" ? resolveAgentRunLaneTarget({ node: follower.target.node, lane: follower.target.lane }).spec : null; - const gitopsBranch = agentrun?.gitops.branch ?? ""; + const hwlab = follower.adapter === "hwlab-node-runtime" ? hwlabRuntimeLaneSpecForNode(follower.target.lane, follower.target.node) : null; + const gitopsBranch = agentrun?.gitops.branch ?? hwlab?.gitopsBranch ?? ""; const healthUrl = gateHealthUrl(follower); const workloads = (follower.nativeStatus.runtime?.workloads ?? []).map((item) => ({ kind: item.kind, name: item.name, sourceCommit: item.sourceCommit })); const gatePolicy = gatePolicyEnv(follower); @@ -218,6 +388,7 @@ function gateJobManifest(registry: BranchFollowerRegistry, follower: FollowerSpe { name: "HEALTH_URL", value: healthUrl }, { name: "SLOW_TASK_SECONDS", value: String(gatePolicy.slowTaskSeconds) }, { name: "HEALTH_TIMEOUT_MS", value: String(gatePolicy.healthTimeoutMs) }, + { name: "GATE_TIMEOUT_MS", value: String(timeoutSeconds * 1000) }, { name: "UNIDESK_CONTROLLER_GITHUB_SSH_PRIVATE_KEY", value: `/git-ssh/${registry.controller.source.githubSsh.privateKeySecretKey}` }, { name: "UNIDESK_CONTROLLER_GITHUB_PROXY_HOST", value: registry.controller.source.githubSsh.proxyHost }, { name: "UNIDESK_CONTROLLER_GITHUB_PROXY_PORT", value: String(registry.controller.source.githubSsh.proxyPort) }, diff --git a/scripts/src/cicd-help.ts b/scripts/src/cicd-help.ts index 13761a02..27e6e4ad 100644 --- a/scripts/src/cicd-help.ts +++ b/scripts/src/cicd-help.ts @@ -21,6 +21,8 @@ export function buildCicdHelp(configPath: string, spec: string): unknown { "bun scripts/cli.ts cicd branch-follower taskrun --follower hwlab-jd01-v03 --taskrun runtime-ready --logs-tail 120 --json", "bun scripts/cli.ts cicd branch-follower job --follower agentrun-jd01-v02 --source-commit --job image-build --json", "bun scripts/cli.ts cicd branch-follower gate --follower hwlab-jd01-v03 --gate control-plane-refresh --source-commit --confirm --json", + "bun scripts/cli.ts cicd branch-follower gate --follower hwlab-jd01-v03 --gate git-mirror-flush --source-commit --confirm --json", + "bun scripts/cli.ts cicd branch-follower gate --follower hwlab-jd01-v03 --gate runtime-closeout --source-commit --confirm --json", "bun scripts/cli.ts cicd branch-follower gate --follower agentrun-jd01-v02 --gate reuse-plan --source-commit --json", ], config: configPath, diff --git a/scripts/src/cicd-job-runtime-drilldown.ts b/scripts/src/cicd-job-runtime-drilldown.ts index 58d5f4d8..143ea3a5 100644 --- a/scripts/src/cicd-job-runtime-drilldown.ts +++ b/scripts/src/cicd-job-runtime-drilldown.ts @@ -173,6 +173,7 @@ function missingPolicyPayload(action: string, follower: FollowerSpec, registry: } function resolveJobTarget(registry: BranchFollowerRegistry, follower: FollowerSpec, query: string, sourceCommit: string | null): { namespace: string; jobName: string; stage: string } | null { + if (query.startsWith("bf-gate-")) return { namespace: registry.controller.namespace, jobName: query, stage: "controller-gate-job" }; if (!isStageAlias(query)) return { namespace: follower.target.namespace, jobName: query, stage: "explicit-job" }; if (sourceCommit === null) return null; if (query === "git-mirror-sync" || query === "git-mirror-flush") { diff --git a/scripts/src/cicd-native-summary.ts b/scripts/src/cicd-native-summary.ts index 7d0649d8..ba4b1119 100644 --- a/scripts/src/cicd-native-summary.ts +++ b/scripts/src/cicd-native-summary.ts @@ -66,6 +66,7 @@ export function nativeArgoSummary(application: Record | null): const sync = asOptionalRecord(status?.sync); const health = asOptionalRecord(status?.health); const operationState = asOptionalRecord(status?.operationState); + const syncResultResources = Array.isArray(operationState?.syncResultResources) ? operationState.syncResultResources.slice(0, 5) : []; return { name: stringOrNull(metadata?.name), namespace: stringOrNull(metadata?.namespace), @@ -80,6 +81,7 @@ export function nativeArgoSummary(application: Record | null): operationDurationSeconds: numberOrNull(operationState?.durationSeconds), conditions: Array.isArray(status?.conditions) ? status.conditions.slice(0, 5) : [], nonReadyResources: Array.isArray(status?.nonReadyResources) ? status.nonReadyResources.slice(0, 5) : [], + syncResultResources, ready: argoApplicationReady(application), }; } diff --git a/scripts/src/cicd-types.ts b/scripts/src/cicd-types.ts index 95a40a03..02ba0650 100644 --- a/scripts/src/cicd-types.ts +++ b/scripts/src/cicd-types.ts @@ -4,7 +4,7 @@ export type OutputMode = "human" | "json" | "yaml"; export type BranchFollowerAction = "help" | "plan" | "apply" | "status" | "run-once" | "debug-step" | "cleanup-state" | "events" | "logs" | "taskrun" | "job" | "runtime" | "gate"; export type BranchFollowerDebugStep = "state-read" | "controller-source" | "status-read" | "decide" | "state-write"; -export type BranchFollowerGate = "reuse-plan" | "ci-taskrun-plan" | "cd-rollout-plan" | "post-deploy-health" | "control-plane-refresh"; +export type BranchFollowerGate = "reuse-plan" | "ci-taskrun-plan" | "cd-rollout-plan" | "post-deploy-health" | "control-plane-refresh" | "git-mirror-flush" | "runtime-closeout"; export type BranchFollowerPhase = | "Observed" | "Noop" diff --git a/scripts/src/hwlab-node/entry.ts b/scripts/src/hwlab-node/entry.ts index 46c80df4..a27a8b8c 100644 --- a/scripts/src/hwlab-node/entry.ts +++ b/scripts/src/hwlab-node/entry.ts @@ -612,6 +612,9 @@ export async function runNodeDelegatedDomain(config: Config, domain: DelegatedNo const result = nodeRuntimeControlPlanePlan(scoped); return nodeScopedFullOutput(scoped) ? result : withNodeRuntimeControlPlanePlanRendered(result, scoped); } + if (domain === "control-plane" && scoped.action === "allow-endpoint-bridge") { + return runNodeEndpointBridge(scoped); + } if (domain === "control-plane" && scoped.node !== defaultSpec.nodeId) { if (scoped.action === "status") { const result = nodeRuntimeControlPlaneStatus(scoped); @@ -637,9 +640,6 @@ export async function runNodeDelegatedDomain(config: Config, domain: DelegatedNo } return nodeRuntimeUnsupportedAction(scoped); } - if (domain === "control-plane" && scoped.action === "allow-endpoint-bridge") { - return runNodeEndpointBridge(scoped); - } if (domain === "control-plane" && scoped.action === "trigger-current" && scoped.confirm && !scoped.dryRun && !scoped.wait) { return startNodeDelegatedJob(scoped); }