fix: bound node runtime cicd wait closeout

This commit is contained in:
Codex
2026-07-02 04:38:46 +00:00
parent 1b24e994f8
commit ed32eb84b2
12 changed files with 880 additions and 409 deletions
+154 -21
View File
@@ -169,33 +169,34 @@ export function nodeRuntimeGitMirrorGithubTransportSummary(mirror: NodeRuntimeGi
export function nodeRuntimeControlPlaneStatus(scoped: ReturnType<typeof parseNodeScopedDelegatedOptions>): Record<string, unknown> {
const spec = scoped.spec;
const probeTimeoutSeconds = Math.max(1, Math.min(60, scoped.timeoutSeconds));
const sourceCommitOverride = optionValue(scoped.originalArgs, "--source-commit");
const pipelineRunOverride = optionValue(scoped.originalArgs, "--pipeline-run");
const head = sourceCommitOverride === undefined ? resolveNodeRuntimeLaneHead(spec) : null;
const sourceCommit = sourceCommitOverride ?? head?.sourceCommit ?? null;
const pipelineRun = pipelineRunOverride ?? (sourceCommit === null ? null : nodeRuntimePipelineRunName(spec, sourceCommit));
const namespace = runNodeK3sArgs(spec, ["kubectl", "get", "ns", spec.runtimeNamespace, "-o", "name"], 60);
const namespace = runNodeK3sArgs(spec, ["kubectl", "get", "ns", spec.runtimeNamespace, "-o", "name"], probeTimeoutSeconds);
const namespaceExists = namespace.exitCode === 0;
const postgresObjects = namespaceExists
? runNodeK3sArgs(spec, ["kubectl", "-n", spec.runtimeNamespace, "get", "statefulset,svc,pvc", "-o", "name"], 60)
? runNodeK3sArgs(spec, ["kubectl", "-n", spec.runtimeNamespace, "get", "statefulset,svc,pvc", "-o", "name"], probeTimeoutSeconds)
: null;
const localPostgresObjects = postgresObjects === null
? []
: postgresObjects.stdout.split(/\r?\n/u).map((line) => line.trim()).filter((line) => isLocalPostgresObject(line, spec));
const serviceAccount = runNodeK3sArgs(spec, ["kubectl", "-n", "hwlab-ci", "get", "serviceaccount", spec.serviceAccountName, "-o", "name"], 60);
const pipeline = runNodeK3sArgs(spec, ["kubectl", "-n", "hwlab-ci", "get", "pipeline", spec.pipeline, "-o", "name"], 60);
const argo = runNodeK3sArgs(spec, ["kubectl", "-n", "argocd", "get", "application", spec.app, "-o", "jsonpath={.spec.source.repoURL}{\"\\n\"}{.spec.source.targetRevision}{\"\\n\"}{.spec.source.path}{\"\\n\"}{.status.sync.revision}{\"\\n\"}{.status.sync.status}{\"\\n\"}{.status.health.status}{\"\\n\"}"], 60);
const serviceAccount = runNodeK3sArgs(spec, ["kubectl", "-n", "hwlab-ci", "get", "serviceaccount", spec.serviceAccountName, "-o", "name"], probeTimeoutSeconds);
const pipeline = runNodeK3sArgs(spec, ["kubectl", "-n", "hwlab-ci", "get", "pipeline", spec.pipeline, "-o", "name"], probeTimeoutSeconds);
const argo = runNodeK3sArgs(spec, ["kubectl", "-n", "argocd", "get", "application", spec.app, "-o", "jsonpath={.spec.source.repoURL}{\"\\n\"}{.spec.source.targetRevision}{\"\\n\"}{.spec.source.path}{\"\\n\"}{.status.sync.revision}{\"\\n\"}{.status.sync.status}{\"\\n\"}{.status.health.status}{\"\\n\"}"], probeTimeoutSeconds);
const [repoURL = "", targetRevision = "", path = "", syncRevision = "", syncStatus = "", health = ""] = argo.stdout.split(/\r?\n/u);
const pipelineRunProbe = pipelineRun === null ? null : getNodeRuntimePipelineRun(spec, pipelineRun);
const pipelineRunDiagnostics = pipelineRun !== null && pipelineRunProbe?.exists === true && pipelineRunProbe?.status !== "True"
? nodeRuntimePipelineRunDiagnostics(spec, pipelineRun)
: null;
const workloads = namespaceExists
? runNodeK3sArgs(spec, ["kubectl", "-n", spec.runtimeNamespace, "get", "deploy,statefulset,svc,ingress,configmap", "-l", `hwlab.pikastech.local/gitops-target=${spec.lane}`, "-o", "name"], 60)
? runNodeK3sArgs(spec, ["kubectl", "-n", spec.runtimeNamespace, "get", "deploy,statefulset,svc,ingress,configmap", "-l", `hwlab.pikastech.local/gitops-target=${spec.lane}`, "-o", "name"], probeTimeoutSeconds)
: null;
const workloadNames = workloads === null ? [] : workloads.stdout.split(/\r?\n/u).map((line) => line.trim()).filter(Boolean);
const workloadReadinessProbe = namespaceExists
? runNodeK3sArgs(spec, ["kubectl", "-n", spec.runtimeNamespace, "get", "deploy,statefulset", "-l", `hwlab.pikastech.local/gitops-target=${spec.lane}`, "-o", "jsonpath={range .items[*]}{.kind}{\"/\"}{.metadata.name}{\"\\t\"}{.status.readyReplicas}{\"/\"}{.status.replicas}{\"/\"}{.spec.replicas}{\"\\n\"}{end}"], 60)
? runNodeK3sArgs(spec, ["kubectl", "-n", spec.runtimeNamespace, "get", "deploy,statefulset", "-l", `hwlab.pikastech.local/gitops-target=${spec.lane}`, "-o", "jsonpath={range .items[*]}{.kind}{\"/\"}{.metadata.name}{\"\\t\"}{.status.readyReplicas}{\"/\"}{.status.replicas}{\"/\"}{.spec.replicas}{\"\\n\"}{end}"], probeTimeoutSeconds)
: null;
const workloadReadiness = parseNodeRuntimeWorkloadReadiness(workloadReadinessProbe?.stdout ?? "");
const bridge = externalPostgresBridgeStatus(spec, namespaceExists);
@@ -228,7 +229,9 @@ export function nodeRuntimeControlPlaneStatus(scoped: ReturnType<typeof parseNod
const pipelineRunReady = pipelineRunProbe !== null && pipelineRunProbe.status === "True";
const pipelineRunDegradedReason = typeof pipelineRunDiagnostics?.degradedReason === "string"
? pipelineRunDiagnostics.degradedReason
: "pipelinerun-not-succeeded";
: pipelineRunProbe === null || pipelineRunProbe.exists !== true
? "pipelinerun-not-found"
: "pipelinerun-not-succeeded";
const publicReady = publicProbes.ready === true;
const gitMirrorReady = gitMirror.ok === true && gitMirrorCompact.pendingFlush === false && gitMirrorCompact.githubInSync === true;
const gitMirrorDegradedReason = gitMirrorCompact.sourceSnapshotReady === false
@@ -236,6 +239,34 @@ export function nodeRuntimeControlPlaneStatus(scoped: ReturnType<typeof parseNod
: gitMirrorCompact.pendingFlush === true
? "git-mirror-pending-flush"
: "git-mirror-not-in-sync";
const targetGitopsRevision = nodeRuntimeTargetGitopsRevision(gitMirrorCompact);
const argoDegradedReason = nodeRuntimeArgoDegradedReason({
argoCommandOk: argo.exitCode === 0,
repoURL,
expectedRepoURL: spec.argoRepoUrl,
targetRevision,
expectedTargetRevision: spec.gitopsBranch,
path,
expectedPath: spec.runtimePath,
syncRevision,
syncStatus,
health,
targetGitopsRevision,
runtimeReady,
publicReady,
});
const degradedReason = nodeRuntimeStatusDegradedReason({
controlPlaneReady,
pipelineRunReady,
pipelineRunDegradedReason,
gitMirrorReady,
gitMirrorDegradedReason,
argoReady,
argoDegradedReason,
runtimeReady,
runtimeDegradedReason,
publicReady,
});
const fullStatus = {
ok: controlPlaneReady && runtimeReady && argoReady && pipelineRunReady && publicReady && gitMirrorReady,
command: `hwlab nodes control-plane status --node ${scoped.node} --lane ${scoped.lane}`,
@@ -262,6 +293,7 @@ export function nodeRuntimeControlPlaneStatus(scoped: ReturnType<typeof parseNod
targetRevision,
path,
syncRevision,
targetGitopsRevision,
syncStatus,
health,
result: compactRuntimeCommand(argo),
@@ -296,17 +328,7 @@ export function nodeRuntimeControlPlaneStatus(scoped: ReturnType<typeof parseNod
namespace: compactRuntimeCommand(namespace),
postgresObjects: postgresObjects === null ? null : compactRuntimeCommand(postgresObjects),
},
degradedReason: controlPlaneReady
? runtimeReady
? argoReady
? pipelineRunReady
? publicReady
? gitMirrorReady ? undefined : gitMirrorDegradedReason
: "public-probe-not-ready"
: pipelineRunDegradedReason
: "argo-not-synced-healthy"
: runtimeDegradedReason
: "control-plane-not-ready",
degradedReason,
next: {
plan: `bun scripts/cli.ts hwlab nodes control-plane plan --node ${scoped.node} --lane ${scoped.lane}`,
apply: `bun scripts/cli.ts hwlab nodes control-plane apply --node ${scoped.node} --lane ${scoped.lane} --confirm`,
@@ -343,6 +365,62 @@ export function nullableInteger(value: string): number | null {
return Number(value);
}
export function nodeRuntimeTargetGitopsRevision(gitMirrorCompact: Record<string, unknown>): string | null {
return typeof gitMirrorCompact.localGitops === "string" && /^[0-9a-f]{40}$/iu.test(gitMirrorCompact.localGitops)
? gitMirrorCompact.localGitops
: typeof gitMirrorCompact.githubGitops === "string" && /^[0-9a-f]{40}$/iu.test(gitMirrorCompact.githubGitops)
? gitMirrorCompact.githubGitops
: null;
}
export function nodeRuntimeArgoDegradedReason(input: {
argoCommandOk: boolean;
repoURL: string;
expectedRepoURL: string;
targetRevision: string;
expectedTargetRevision: string;
path: string;
expectedPath: string;
syncRevision: string;
syncStatus: string;
health: string;
targetGitopsRevision: string | null;
runtimeReady: boolean;
publicReady: boolean;
}): string | null {
if (!input.argoCommandOk) return "argo-application-not-readable";
if (input.repoURL !== input.expectedRepoURL || input.targetRevision !== input.expectedTargetRevision || input.path !== input.expectedPath) {
return "argo-application-spec-drift";
}
const argoAtTarget = input.targetGitopsRevision !== null && input.syncRevision === input.targetGitopsRevision;
if (argoAtTarget && input.syncStatus === "Synced" && input.health !== "Healthy") return "argo-health-progressing";
if (argoAtTarget && input.syncStatus !== "Synced" && input.runtimeReady && input.publicReady) return "argo-health-progressing";
if (argoAtTarget) return "argo-target-revision-progressing";
if (input.targetGitopsRevision !== null) return "argo-revision-not-observed";
return "argo-not-synced-healthy";
}
export function nodeRuntimeStatusDegradedReason(input: {
controlPlaneReady: boolean;
pipelineRunReady: boolean;
pipelineRunDegradedReason: string;
gitMirrorReady: boolean;
gitMirrorDegradedReason: string;
argoReady: boolean;
argoDegradedReason: string | null;
runtimeReady: boolean;
runtimeDegradedReason: string;
publicReady: boolean;
}): string | undefined {
if (!input.controlPlaneReady) return "control-plane-not-ready";
if (!input.pipelineRunReady) return input.pipelineRunDegradedReason;
if (!input.gitMirrorReady) return input.gitMirrorDegradedReason;
if (!input.argoReady) return input.argoDegradedReason ?? "argo-not-synced-healthy";
if (!input.runtimeReady) return input.runtimeDegradedReason;
if (!input.publicReady) return "public-probe-not-ready";
return undefined;
}
export function nodeRuntimePublicProbeStatus(spec: HwlabRuntimeLaneSpec): Record<string, unknown> {
const web = publicHttpProbe("web", spec.publicWebUrl);
const apiHealth = publicHttpProbe("apiHealth", joinUrlPath(spec.publicApiUrl, "/health/live"));
@@ -479,6 +557,42 @@ export function compactNodeRuntimeTaskRunDiagnostic(value: unknown): string {
return [left, reason ? `(${webObserveShort(reason, 36)})` : ""].filter(Boolean).join("");
}
export function nodeRuntimePipelinePendingTaskRunSummaries(
spec: HwlabRuntimeLaneSpec,
pendingTaskRuns: Array<Record<string, unknown>>,
pods: Array<Record<string, unknown>>,
): Array<Record<string, unknown>> {
return pendingTaskRuns.slice(0, 16).map((taskRun) => {
const taskRunName = stringOrNull(taskRun.name);
const podName = stringOrNull(taskRun.podName);
const pod = pods.find((item) => item.name === podName || (taskRunName !== null && item.taskRun === taskRunName)) ?? {};
const containers = Array.isArray(pod.containers) ? pod.containers.map(record) : [];
const initContainers = Array.isArray(pod.initContainers) ? pod.initContainers.map(record) : [];
const waitingContainers = [...initContainers, ...containers].filter((container) => container.state === "waiting");
const runningContainers = [...initContainers, ...containers].filter((container) => container.state === "running");
return {
name: taskRunName,
taskRun: taskRunName,
pipelineTask: taskRun.pipelineTask ?? null,
taskRef: taskRun.taskRef ?? null,
status: taskRun.status ?? null,
reason: taskRun.reason ?? null,
message: diagnosticText(taskRun.message),
pod: podName,
podPhase: pod.phase ?? null,
scheduled: pod.scheduled ?? null,
scheduledReason: pod.scheduledReason ?? null,
scheduledMessage: diagnosticText(pod.scheduledMessage),
waitingContainers,
runningContainers,
taskRunCommand: taskRunName === null ? null : nodeRuntimeK3sCommand(spec, ["get", "taskrun", "-n", HWLAB_CI_NAMESPACE, taskRunName, "-o", "yaml"]),
taskRunDescribeCommand: taskRunName === null ? null : nodeRuntimeK3sCommand(spec, ["describe", "taskrun", "-n", HWLAB_CI_NAMESPACE, taskRunName]),
podDescribeCommand: podName === null ? null : nodeRuntimeK3sCommand(spec, ["describe", "pod", "-n", HWLAB_CI_NAMESPACE, podName]),
podLogsCommand: podName === null ? null : nodeRuntimePipelineLogsCommand(spec, podName, null),
};
});
}
export function summarizeNodeRuntimeControlPlaneStatus(status: Record<string, unknown>, scoped: ReturnType<typeof parseNodeScopedDelegatedOptions>): Record<string, unknown> {
const pipelineRun = record(status.pipelineRun);
const pipelineRunDiagnostics = record(status.pipelineRunDiagnostics);
@@ -531,6 +645,8 @@ export function summarizeNodeRuntimeControlPlaneStatus(status: Record<string, un
application: argo.application ?? null,
ready: argo.ready === true,
syncRevision: argo.syncRevision ?? null,
targetGitopsRevision: argo.targetGitopsRevision ?? null,
revisionObserved: typeof argo.targetGitopsRevision === "string" && argo.syncRevision === argo.targetGitopsRevision,
syncStatus: argo.syncStatus ?? null,
health: argo.health ?? null,
},
@@ -610,9 +726,17 @@ export function nodeRuntimeStatusNextAction(status: Record<string, unknown>, sco
if (reason === "argo-not-synced-healthy") {
return `bun scripts/cli.ts hwlab nodes control-plane refresh --node ${scoped.node} --lane ${scoped.lane} --confirm`;
}
if (reason === "argo-revision-not-observed" || reason === "argo-target-revision-progressing" || reason === "argo-health-progressing") {
return `${nodeRuntimeStatusCommand(scoped)} --full`;
}
if (reason === "pipelinerun-not-succeeded") {
return `bun scripts/cli.ts hwlab nodes control-plane trigger-current --node ${scoped.node} --lane ${scoped.lane} --confirm`;
}
if (reason === "node-runtime-ci-taskrun-pending") {
const next = record(record(status.pipelineRunDiagnostics).next);
const pendingTaskRun = typeof next.pendingTaskRun === "string" ? next.pendingTaskRun : null;
return pendingTaskRun ?? `${nodeRuntimeStatusCommand(scoped)} --full`;
}
if (reason === "node-runtime-ci-step-publish-failed") {
return `bun scripts/cli.ts platform-infra sub2api status --target ${scoped.node}`;
}
@@ -659,6 +783,7 @@ export function nodeRuntimePipelineRunDiagnostics(spec: HwlabRuntimeLaneSpec, pi
const pendingTaskRuns = taskRuns.filter((item) => item.status !== "True" && item.status !== "False");
const failedTaskRuns = taskRuns.filter((item) => item.status === "False");
const failedTaskRunSummaries = nodeRuntimePipelineFailedTaskRunSummaries(spec, failedTaskRuns, pods);
const pendingTaskRunSummaries = nodeRuntimePipelinePendingTaskRunSummaries(spec, pendingTaskRuns, pods);
const stepPublishFailures = failedTaskRunSummaries.filter((item) => item.container === "step-publish" || item.step === "publish" || item.step === "step-publish");
const unscheduledPods = pods.filter((item) => item.scheduled === false);
const schedulingMessages = unscheduledPods
@@ -690,7 +815,8 @@ export function nodeRuntimePipelineRunDiagnostics(spec: HwlabRuntimeLaneSpec, pi
failedTaskRuns: failedTaskRunSummaries,
stepPublishFailures,
failureSummary,
pendingTaskRuns,
pendingTaskRuns: pendingTaskRunSummaries,
pendingTaskRunCount: pendingTaskRunSummaries.length,
unscheduledPods,
schedulingMessages,
degradedReason: tooManyPods
@@ -723,7 +849,14 @@ export function nodeRuntimePipelineRunDiagnostics(spec: HwlabRuntimeLaneSpec, pi
failedTaskRun: failedTaskRunSummaries[0]?.taskRunCommand ?? null,
status: `bun scripts/cli.ts hwlab nodes control-plane status --node ${spec.nodeId} --lane ${spec.lane} --pipeline-run ${pipelineRun} --full`,
}
: undefined,
: pendingTaskRunSummaries.length > 0
? {
pendingTaskRun: pendingTaskRunSummaries[0]?.taskRunDescribeCommand ?? pendingTaskRunSummaries[0]?.taskRunCommand ?? null,
pendingPod: pendingTaskRunSummaries[0]?.podDescribeCommand ?? null,
pendingPodLogs: pendingTaskRunSummaries[0]?.podLogsCommand ?? null,
status: `bun scripts/cli.ts hwlab nodes control-plane status --node ${spec.nodeId} --lane ${spec.lane} --pipeline-run ${pipelineRun} --full`,
}
: undefined,
};
}