fix(cicd): add branch follower refresh gate

This commit is contained in:
Codex
2026-07-04 11:30:54 +00:00
parent 09d7633f3d
commit 08d5e91c5e
5 changed files with 191 additions and 5 deletions
+2 -2
View File
@@ -213,8 +213,8 @@ function debugStepOption(value: string): BranchFollowerDebugStep {
}
function gateOption(value: string): BranchFollowerGate {
if (value === "reuse-plan" || value === "ci-taskrun-plan" || value === "cd-rollout-plan" || value === "post-deploy-health") return value;
throw new Error("--gate must be reuse-plan, ci-taskrun-plan, cd-rollout-plan, or post-deploy-health");
if (value === "reuse-plan" || value === "ci-taskrun-plan" || value === "cd-rollout-plan" || value === "post-deploy-health" || value === "control-plane-refresh") return value;
throw new Error("--gate must be reuse-plan, ci-taskrun-plan, cd-rollout-plan, post-deploy-health, or control-plane-refresh");
}
function isInClusterRuntime(): boolean {
+183 -1
View File
@@ -2,6 +2,7 @@
// Responsibility: submit bounded target-side gate Jobs and return compact evidence.
import type { CommandResult } from "./command";
import { resolveAgentRunLaneTarget } from "./agentrun-lanes";
import { runNativeHwlabControlPlaneRefresh } from "./cicd-hwlab-refresh";
import { nativeCicdScriptLoadShell } from "./cicd-native-bundle";
import { waitForJobShell } from "./cicd-controller-render";
import type { BranchFollowerRegistry, FollowerSpec, ParsedOptions } from "./cicd-types";
@@ -11,7 +12,12 @@ import { shQuote, redactText } from "./platform-infra-ops-library";
type KubeScriptRunner = (registry: BranchFollowerRegistry, options: ParsedOptions, script: string, input: string, timeoutMs: number) => CommandResult;
export async function runBranchFollowerGate(registry: BranchFollowerRegistry, follower: FollowerSpec, options: ParsedOptions, runKubeScript: KubeScriptRunner): Promise<Record<string, unknown>> {
if (options.gate === null) throw new Error("gate requires --gate <reuse-plan|ci-taskrun-plan|cd-rollout-plan|post-deploy-health>");
if (options.gate === null) throw new Error("gate requires --gate <reuse-plan|ci-taskrun-plan|cd-rollout-plan|post-deploy-health|control-plane-refresh>");
if (options.gate === "control-plane-refresh") {
return options.inCluster
? runControlPlaneRefreshGate(registry, follower, options)
: runTargetControlPlaneRefreshGateJob(registry, follower, options, runKubeScript);
}
if (options.inCluster) return { ok: false, action: "gate", gate: options.gate, follower: follower.id, degradedReason: "operator-entry-required" };
const timeoutSeconds = options.timeoutSeconds ?? follower.budgets.statusSeconds;
const jobName = `bf-gate-${safeName(follower.id)}-${safeName(options.gate)}-${Date.now().toString(36)}`.slice(0, 63);
@@ -50,6 +56,177 @@ export async function runBranchFollowerGate(registry: BranchFollowerRegistry, fo
};
}
function runTargetControlPlaneRefreshGateJob(registry: BranchFollowerRegistry, follower: FollowerSpec, options: ParsedOptions, runKubeScript: KubeScriptRunner): Record<string, unknown> {
if (follower.adapter !== "hwlab-node-runtime" || options.sourceCommit === null || !options.confirm) {
return runControlPlaneRefreshGate(registry, follower, options);
}
const timeoutSeconds = options.timeoutSeconds ?? follower.budgets.controlPlaneRefreshSeconds;
const jobName = `bf-gate-${safeName(follower.id)}-control-refresh-${Date.now().toString(36)}`.slice(0, 63);
const manifest = controllerGateJobManifest(registry, follower, options, jobName, timeoutSeconds);
const manifestYaml = `${Bun.YAML.stringify(manifest).trim()}\n`;
const script = [
"set -eu",
"tmp=$(mktemp)",
"base64 -d >\"$tmp\" <<'UNIDESK_CONTROL_PLANE_REFRESH_GATE_JOB'",
Buffer.from(manifestYaml, "utf8").toString("base64"),
"UNIDESK_CONTROL_PLANE_REFRESH_GATE_JOB",
`kubectl -n ${shQuote(registry.controller.namespace)} delete job ${shQuote(jobName)} --ignore-not-found=true >/dev/null 2>&1 || true`,
`kubectl apply --server-side --force-conflicts --field-manager=${shQuote(registry.controller.fieldManager)} -f "$tmp" >/dev/null`,
waitForJobShell(registry.controller.namespace, jobName, timeoutSeconds),
].join("\n");
const startedAt = Date.now();
const command = runKubeScript(registry, options, script, "", (timeoutSeconds + registry.controller.budgets.reconcileTransportGraceSeconds) * 1000);
const parsed = command.exitCode === 0 ? parseFirstJsonObject(command.stdout) : null;
const ok = command.exitCode === 0 && parsed !== null && parsed.ok !== false;
return {
ok,
action: "gate",
gate: options.gate,
follower: follower.id,
target: { name: jobName, namespace: registry.controller.namespace, execution: "k8s-native-gate-job" },
result: parsed,
command: {
exitCode: command.exitCode,
timedOut: command.timedOut,
elapsedMs: Date.now() - startedAt,
parseError: parsed === null ? "stdout-json-parse-failed" : null,
stdoutTail: ok ? "" : redactText(tailText(command.stdout, 1600)),
stderrTail: ok ? "" : redactText(tailText(command.stderr, 1200)),
},
parsedDownstreamCliOutput: false,
};
}
function runControlPlaneRefreshGate(registry: BranchFollowerRegistry, follower: FollowerSpec, options: ParsedOptions): Record<string, unknown> {
if (follower.adapter !== "hwlab-node-runtime") {
return {
ok: false,
action: "gate",
gate: options.gate,
follower: follower.id,
degradedReason: "unsupported-follower-adapter",
message: "control-plane-refresh gate is only available for hwlab-node-runtime followers",
parsedDownstreamCliOutput: false,
};
}
if (options.sourceCommit === null) {
return {
ok: false,
action: "gate",
gate: options.gate,
follower: follower.id,
degradedReason: "source-commit-required",
message: "control-plane-refresh gate requires --source-commit <sha>",
parsedDownstreamCliOutput: false,
};
}
const spec = hwlabRuntimeLaneSpecForNode(follower.target.lane, follower.target.node);
const timeoutSeconds = options.timeoutSeconds ?? follower.budgets.controlPlaneRefreshSeconds;
const jobName = nativeCapabilityJobName(follower.id, "control-plane-refresh", options.sourceCommit);
if (!options.confirm) {
return {
ok: true,
action: "gate",
gate: options.gate,
follower: follower.id,
dryRun: true,
target: { name: jobName, namespace: registry.controller.namespace, execution: "k8s-native-control-plane-refresh" },
sourceCommit: options.sourceCommit,
message: "add --confirm to run the native control-plane refresh gate",
parsedDownstreamCliOutput: false,
};
}
const startedAt = Date.now();
const refresh = runNativeHwlabControlPlaneRefresh(registry, follower, spec, options.sourceCommit, timeoutSeconds, jobName);
return {
ok: refresh.result.ok,
action: "gate",
gate: options.gate,
follower: follower.id,
dryRun: false,
sourceCommit: options.sourceCommit,
target: { name: refresh.jobName, namespace: refresh.namespace, execution: "k8s-native-control-plane-refresh" },
result: refresh.result,
command: {
elapsedMs: Date.now() - startedAt,
timeoutSeconds,
},
parsedDownstreamCliOutput: false,
next: {
statusRead: `bun scripts/cli.ts cicd branch-follower debug-step --follower ${follower.id} --step status-read --json`,
job: `bun scripts/cli.ts cicd branch-follower job --follower ${follower.id} --source-commit ${options.sourceCommit} --job control-plane-refresh --json`,
},
};
}
function controllerGateJobManifest(registry: BranchFollowerRegistry, follower: FollowerSpec, options: ParsedOptions, jobName: string, timeoutSeconds: number): Record<string, unknown> {
const labels = { ...registry.controller.labels, "app.kubernetes.io/component": "cicd-gate-job" };
const commandArgs = [
"bun",
"scripts/cli.ts",
"cicd",
"branch-follower",
"gate",
"--follower",
follower.id,
"--gate",
"control-plane-refresh",
"--source-commit",
options.sourceCommit ?? "",
"--confirm",
"--in-cluster",
"--config",
"config/cicd-branch-followers.yaml",
"--timeout-seconds",
String(timeoutSeconds),
"--json",
];
return {
apiVersion: "batch/v1",
kind: "Job",
metadata: { name: jobName, namespace: registry.controller.namespace, labels },
spec: {
backoffLimit: registry.controller.budgets.reconcileJobBackoffLimit,
ttlSecondsAfterFinished: registry.controller.budgets.reconcileJobTtlSeconds,
activeDeadlineSeconds: timeoutSeconds + registry.controller.budgets.reconcileJobDeadlineGraceSeconds,
template: {
metadata: { labels },
spec: {
restartPolicy: "Never",
serviceAccountName: registry.controller.serviceAccountName,
volumes: [
{ name: "registry", configMap: { name: registry.controller.configMapName, defaultMode: 0o755 } },
{ name: "git-mirror-cache", persistentVolumeClaim: { claimName: registry.controller.source.gitMirrorCachePvcName } },
{ name: "git-ssh", secret: { secretName: registry.controller.source.githubSsh.secretName, defaultMode: 0o400 } },
{ name: "work", emptyDir: {} },
],
containers: [{
name: "gate",
image: registry.controller.image,
imagePullPolicy: "IfNotPresent",
command: ["/bin/sh", "/etc/unidesk-cicd-branch-follower/controller-one-shot.sh"],
args: commandArgs,
env: [
{ name: "UNIDESK_CONTROLLER_SOURCE_BRANCH", value: registry.controller.source.branch },
{ name: "UNIDESK_CONTROLLER_SOURCE_REPOSITORY", value: registry.controller.source.repository },
{ name: "UNIDESK_CONTROLLER_SOURCE_SNAPSHOT_PREFIX", value: registry.controller.source.sourceSnapshot.stageRefPrefix.replaceAll("{branch}", registry.controller.source.branch) },
{ name: "UNIDESK_CONTROLLER_GITHUB_SSH_PRIVATE_KEY", value: `/git-ssh/${registry.controller.source.githubSsh.privateKeySecretKey}` },
{ name: "UNIDESK_CONTROLLER_GITHUB_PROXY_HOST", value: registry.controller.source.githubSsh.proxyHost },
{ name: "UNIDESK_CONTROLLER_GITHUB_PROXY_PORT", value: String(registry.controller.source.githubSsh.proxyPort) },
],
volumeMounts: [
{ name: "registry", mountPath: "/etc/unidesk-cicd-branch-follower", readOnly: true },
{ name: "git-mirror-cache", mountPath: "/cache" },
{ name: "git-ssh", mountPath: "/git-ssh", readOnly: true },
{ name: "work", mountPath: "/work" },
],
}],
},
},
},
};
}
function gateJobManifest(registry: BranchFollowerRegistry, follower: FollowerSpec, options: ParsedOptions, jobName: string, timeoutSeconds: number): Record<string, unknown> {
const labels = { ...registry.controller.labels, "app.kubernetes.io/component": "cicd-gate-job" };
const agentrun = follower.adapter === "agentrun-yaml-lane" ? resolveAgentRunLaneTarget({ node: follower.target.node, lane: follower.target.lane }).spec : null;
@@ -124,6 +301,11 @@ function gateJobManifest(registry: BranchFollowerRegistry, follower: FollowerSpe
};
}
function nativeCapabilityJobName(followerId: string, action: string, sha: string): string {
const prefix = `${safeName(followerId)}-${safeName(action)}`;
return `${prefix}-${sha.slice(0, 12)}`.replace(/-+/gu, "-").replace(/^-|-$/gu, "").slice(0, 63);
}
function gateHealthUrl(follower: FollowerSpec): string {
if (follower.adapter === "agentrun-yaml-lane") {
return resolveAgentRunLaneTarget({ node: follower.target.node, lane: follower.target.lane }).spec.runtime.internalBaseUrl;
+1
View File
@@ -20,6 +20,7 @@ export function buildCicdHelp(configPath: string, spec: string): unknown {
"bun scripts/cli.ts cicd branch-follower logs --follower web-probe-sentinel-master",
"bun scripts/cli.ts cicd branch-follower taskrun --follower hwlab-jd01-v03 --taskrun runtime-ready --logs-tail 120 --json",
"bun scripts/cli.ts cicd branch-follower job --follower agentrun-jd01-v02 --source-commit <sha> --job image-build --json",
"bun scripts/cli.ts cicd branch-follower gate --follower hwlab-jd01-v03 --gate control-plane-refresh --source-commit <sha> --confirm --json",
"bun scripts/cli.ts cicd branch-follower gate --follower agentrun-jd01-v02 --gate reuse-plan --source-commit <sha> --json",
],
config: configPath,
+1 -1
View File
@@ -4,7 +4,7 @@
export type OutputMode = "human" | "json" | "yaml";
export type BranchFollowerAction = "help" | "plan" | "apply" | "status" | "run-once" | "debug-step" | "cleanup-state" | "events" | "logs" | "taskrun" | "job" | "runtime" | "gate";
export type BranchFollowerDebugStep = "state-read" | "controller-source" | "status-read" | "decide" | "state-write";
export type BranchFollowerGate = "reuse-plan" | "ci-taskrun-plan" | "cd-rollout-plan" | "post-deploy-health";
export type BranchFollowerGate = "reuse-plan" | "ci-taskrun-plan" | "cd-rollout-plan" | "post-deploy-health" | "control-plane-refresh";
export type BranchFollowerPhase =
| "Observed"
| "Noop"