fix: bound node runtime cicd wait closeout
This commit is contained in:
@@ -169,33 +169,34 @@ export function nodeRuntimeGitMirrorGithubTransportSummary(mirror: NodeRuntimeGi
|
||||
|
||||
export function nodeRuntimeControlPlaneStatus(scoped: ReturnType<typeof parseNodeScopedDelegatedOptions>): Record<string, unknown> {
|
||||
const spec = scoped.spec;
|
||||
const probeTimeoutSeconds = Math.max(1, Math.min(60, scoped.timeoutSeconds));
|
||||
const sourceCommitOverride = optionValue(scoped.originalArgs, "--source-commit");
|
||||
const pipelineRunOverride = optionValue(scoped.originalArgs, "--pipeline-run");
|
||||
const head = sourceCommitOverride === undefined ? resolveNodeRuntimeLaneHead(spec) : null;
|
||||
const sourceCommit = sourceCommitOverride ?? head?.sourceCommit ?? null;
|
||||
const pipelineRun = pipelineRunOverride ?? (sourceCommit === null ? null : nodeRuntimePipelineRunName(spec, sourceCommit));
|
||||
const namespace = runNodeK3sArgs(spec, ["kubectl", "get", "ns", spec.runtimeNamespace, "-o", "name"], 60);
|
||||
const namespace = runNodeK3sArgs(spec, ["kubectl", "get", "ns", spec.runtimeNamespace, "-o", "name"], probeTimeoutSeconds);
|
||||
const namespaceExists = namespace.exitCode === 0;
|
||||
const postgresObjects = namespaceExists
|
||||
? runNodeK3sArgs(spec, ["kubectl", "-n", spec.runtimeNamespace, "get", "statefulset,svc,pvc", "-o", "name"], 60)
|
||||
? runNodeK3sArgs(spec, ["kubectl", "-n", spec.runtimeNamespace, "get", "statefulset,svc,pvc", "-o", "name"], probeTimeoutSeconds)
|
||||
: null;
|
||||
const localPostgresObjects = postgresObjects === null
|
||||
? []
|
||||
: postgresObjects.stdout.split(/\r?\n/u).map((line) => line.trim()).filter((line) => isLocalPostgresObject(line, spec));
|
||||
const serviceAccount = runNodeK3sArgs(spec, ["kubectl", "-n", "hwlab-ci", "get", "serviceaccount", spec.serviceAccountName, "-o", "name"], 60);
|
||||
const pipeline = runNodeK3sArgs(spec, ["kubectl", "-n", "hwlab-ci", "get", "pipeline", spec.pipeline, "-o", "name"], 60);
|
||||
const argo = runNodeK3sArgs(spec, ["kubectl", "-n", "argocd", "get", "application", spec.app, "-o", "jsonpath={.spec.source.repoURL}{\"\\n\"}{.spec.source.targetRevision}{\"\\n\"}{.spec.source.path}{\"\\n\"}{.status.sync.revision}{\"\\n\"}{.status.sync.status}{\"\\n\"}{.status.health.status}{\"\\n\"}"], 60);
|
||||
const serviceAccount = runNodeK3sArgs(spec, ["kubectl", "-n", "hwlab-ci", "get", "serviceaccount", spec.serviceAccountName, "-o", "name"], probeTimeoutSeconds);
|
||||
const pipeline = runNodeK3sArgs(spec, ["kubectl", "-n", "hwlab-ci", "get", "pipeline", spec.pipeline, "-o", "name"], probeTimeoutSeconds);
|
||||
const argo = runNodeK3sArgs(spec, ["kubectl", "-n", "argocd", "get", "application", spec.app, "-o", "jsonpath={.spec.source.repoURL}{\"\\n\"}{.spec.source.targetRevision}{\"\\n\"}{.spec.source.path}{\"\\n\"}{.status.sync.revision}{\"\\n\"}{.status.sync.status}{\"\\n\"}{.status.health.status}{\"\\n\"}"], probeTimeoutSeconds);
|
||||
const [repoURL = "", targetRevision = "", path = "", syncRevision = "", syncStatus = "", health = ""] = argo.stdout.split(/\r?\n/u);
|
||||
const pipelineRunProbe = pipelineRun === null ? null : getNodeRuntimePipelineRun(spec, pipelineRun);
|
||||
const pipelineRunDiagnostics = pipelineRun !== null && pipelineRunProbe?.exists === true && pipelineRunProbe?.status !== "True"
|
||||
? nodeRuntimePipelineRunDiagnostics(spec, pipelineRun)
|
||||
: null;
|
||||
const workloads = namespaceExists
|
||||
? runNodeK3sArgs(spec, ["kubectl", "-n", spec.runtimeNamespace, "get", "deploy,statefulset,svc,ingress,configmap", "-l", `hwlab.pikastech.local/gitops-target=${spec.lane}`, "-o", "name"], 60)
|
||||
? runNodeK3sArgs(spec, ["kubectl", "-n", spec.runtimeNamespace, "get", "deploy,statefulset,svc,ingress,configmap", "-l", `hwlab.pikastech.local/gitops-target=${spec.lane}`, "-o", "name"], probeTimeoutSeconds)
|
||||
: null;
|
||||
const workloadNames = workloads === null ? [] : workloads.stdout.split(/\r?\n/u).map((line) => line.trim()).filter(Boolean);
|
||||
const workloadReadinessProbe = namespaceExists
|
||||
? runNodeK3sArgs(spec, ["kubectl", "-n", spec.runtimeNamespace, "get", "deploy,statefulset", "-l", `hwlab.pikastech.local/gitops-target=${spec.lane}`, "-o", "jsonpath={range .items[*]}{.kind}{\"/\"}{.metadata.name}{\"\\t\"}{.status.readyReplicas}{\"/\"}{.status.replicas}{\"/\"}{.spec.replicas}{\"\\n\"}{end}"], 60)
|
||||
? runNodeK3sArgs(spec, ["kubectl", "-n", spec.runtimeNamespace, "get", "deploy,statefulset", "-l", `hwlab.pikastech.local/gitops-target=${spec.lane}`, "-o", "jsonpath={range .items[*]}{.kind}{\"/\"}{.metadata.name}{\"\\t\"}{.status.readyReplicas}{\"/\"}{.status.replicas}{\"/\"}{.spec.replicas}{\"\\n\"}{end}"], probeTimeoutSeconds)
|
||||
: null;
|
||||
const workloadReadiness = parseNodeRuntimeWorkloadReadiness(workloadReadinessProbe?.stdout ?? "");
|
||||
const bridge = externalPostgresBridgeStatus(spec, namespaceExists);
|
||||
@@ -228,7 +229,9 @@ export function nodeRuntimeControlPlaneStatus(scoped: ReturnType<typeof parseNod
|
||||
const pipelineRunReady = pipelineRunProbe !== null && pipelineRunProbe.status === "True";
|
||||
const pipelineRunDegradedReason = typeof pipelineRunDiagnostics?.degradedReason === "string"
|
||||
? pipelineRunDiagnostics.degradedReason
|
||||
: "pipelinerun-not-succeeded";
|
||||
: pipelineRunProbe === null || pipelineRunProbe.exists !== true
|
||||
? "pipelinerun-not-found"
|
||||
: "pipelinerun-not-succeeded";
|
||||
const publicReady = publicProbes.ready === true;
|
||||
const gitMirrorReady = gitMirror.ok === true && gitMirrorCompact.pendingFlush === false && gitMirrorCompact.githubInSync === true;
|
||||
const gitMirrorDegradedReason = gitMirrorCompact.sourceSnapshotReady === false
|
||||
@@ -236,6 +239,34 @@ export function nodeRuntimeControlPlaneStatus(scoped: ReturnType<typeof parseNod
|
||||
: gitMirrorCompact.pendingFlush === true
|
||||
? "git-mirror-pending-flush"
|
||||
: "git-mirror-not-in-sync";
|
||||
const targetGitopsRevision = nodeRuntimeTargetGitopsRevision(gitMirrorCompact);
|
||||
const argoDegradedReason = nodeRuntimeArgoDegradedReason({
|
||||
argoCommandOk: argo.exitCode === 0,
|
||||
repoURL,
|
||||
expectedRepoURL: spec.argoRepoUrl,
|
||||
targetRevision,
|
||||
expectedTargetRevision: spec.gitopsBranch,
|
||||
path,
|
||||
expectedPath: spec.runtimePath,
|
||||
syncRevision,
|
||||
syncStatus,
|
||||
health,
|
||||
targetGitopsRevision,
|
||||
runtimeReady,
|
||||
publicReady,
|
||||
});
|
||||
const degradedReason = nodeRuntimeStatusDegradedReason({
|
||||
controlPlaneReady,
|
||||
pipelineRunReady,
|
||||
pipelineRunDegradedReason,
|
||||
gitMirrorReady,
|
||||
gitMirrorDegradedReason,
|
||||
argoReady,
|
||||
argoDegradedReason,
|
||||
runtimeReady,
|
||||
runtimeDegradedReason,
|
||||
publicReady,
|
||||
});
|
||||
const fullStatus = {
|
||||
ok: controlPlaneReady && runtimeReady && argoReady && pipelineRunReady && publicReady && gitMirrorReady,
|
||||
command: `hwlab nodes control-plane status --node ${scoped.node} --lane ${scoped.lane}`,
|
||||
@@ -262,6 +293,7 @@ export function nodeRuntimeControlPlaneStatus(scoped: ReturnType<typeof parseNod
|
||||
targetRevision,
|
||||
path,
|
||||
syncRevision,
|
||||
targetGitopsRevision,
|
||||
syncStatus,
|
||||
health,
|
||||
result: compactRuntimeCommand(argo),
|
||||
@@ -296,17 +328,7 @@ export function nodeRuntimeControlPlaneStatus(scoped: ReturnType<typeof parseNod
|
||||
namespace: compactRuntimeCommand(namespace),
|
||||
postgresObjects: postgresObjects === null ? null : compactRuntimeCommand(postgresObjects),
|
||||
},
|
||||
degradedReason: controlPlaneReady
|
||||
? runtimeReady
|
||||
? argoReady
|
||||
? pipelineRunReady
|
||||
? publicReady
|
||||
? gitMirrorReady ? undefined : gitMirrorDegradedReason
|
||||
: "public-probe-not-ready"
|
||||
: pipelineRunDegradedReason
|
||||
: "argo-not-synced-healthy"
|
||||
: runtimeDegradedReason
|
||||
: "control-plane-not-ready",
|
||||
degradedReason,
|
||||
next: {
|
||||
plan: `bun scripts/cli.ts hwlab nodes control-plane plan --node ${scoped.node} --lane ${scoped.lane}`,
|
||||
apply: `bun scripts/cli.ts hwlab nodes control-plane apply --node ${scoped.node} --lane ${scoped.lane} --confirm`,
|
||||
@@ -343,6 +365,62 @@ export function nullableInteger(value: string): number | null {
|
||||
return Number(value);
|
||||
}
|
||||
|
||||
export function nodeRuntimeTargetGitopsRevision(gitMirrorCompact: Record<string, unknown>): string | null {
|
||||
return typeof gitMirrorCompact.localGitops === "string" && /^[0-9a-f]{40}$/iu.test(gitMirrorCompact.localGitops)
|
||||
? gitMirrorCompact.localGitops
|
||||
: typeof gitMirrorCompact.githubGitops === "string" && /^[0-9a-f]{40}$/iu.test(gitMirrorCompact.githubGitops)
|
||||
? gitMirrorCompact.githubGitops
|
||||
: null;
|
||||
}
|
||||
|
||||
export function nodeRuntimeArgoDegradedReason(input: {
|
||||
argoCommandOk: boolean;
|
||||
repoURL: string;
|
||||
expectedRepoURL: string;
|
||||
targetRevision: string;
|
||||
expectedTargetRevision: string;
|
||||
path: string;
|
||||
expectedPath: string;
|
||||
syncRevision: string;
|
||||
syncStatus: string;
|
||||
health: string;
|
||||
targetGitopsRevision: string | null;
|
||||
runtimeReady: boolean;
|
||||
publicReady: boolean;
|
||||
}): string | null {
|
||||
if (!input.argoCommandOk) return "argo-application-not-readable";
|
||||
if (input.repoURL !== input.expectedRepoURL || input.targetRevision !== input.expectedTargetRevision || input.path !== input.expectedPath) {
|
||||
return "argo-application-spec-drift";
|
||||
}
|
||||
const argoAtTarget = input.targetGitopsRevision !== null && input.syncRevision === input.targetGitopsRevision;
|
||||
if (argoAtTarget && input.syncStatus === "Synced" && input.health !== "Healthy") return "argo-health-progressing";
|
||||
if (argoAtTarget && input.syncStatus !== "Synced" && input.runtimeReady && input.publicReady) return "argo-health-progressing";
|
||||
if (argoAtTarget) return "argo-target-revision-progressing";
|
||||
if (input.targetGitopsRevision !== null) return "argo-revision-not-observed";
|
||||
return "argo-not-synced-healthy";
|
||||
}
|
||||
|
||||
export function nodeRuntimeStatusDegradedReason(input: {
|
||||
controlPlaneReady: boolean;
|
||||
pipelineRunReady: boolean;
|
||||
pipelineRunDegradedReason: string;
|
||||
gitMirrorReady: boolean;
|
||||
gitMirrorDegradedReason: string;
|
||||
argoReady: boolean;
|
||||
argoDegradedReason: string | null;
|
||||
runtimeReady: boolean;
|
||||
runtimeDegradedReason: string;
|
||||
publicReady: boolean;
|
||||
}): string | undefined {
|
||||
if (!input.controlPlaneReady) return "control-plane-not-ready";
|
||||
if (!input.pipelineRunReady) return input.pipelineRunDegradedReason;
|
||||
if (!input.gitMirrorReady) return input.gitMirrorDegradedReason;
|
||||
if (!input.argoReady) return input.argoDegradedReason ?? "argo-not-synced-healthy";
|
||||
if (!input.runtimeReady) return input.runtimeDegradedReason;
|
||||
if (!input.publicReady) return "public-probe-not-ready";
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function nodeRuntimePublicProbeStatus(spec: HwlabRuntimeLaneSpec): Record<string, unknown> {
|
||||
const web = publicHttpProbe("web", spec.publicWebUrl);
|
||||
const apiHealth = publicHttpProbe("apiHealth", joinUrlPath(spec.publicApiUrl, "/health/live"));
|
||||
@@ -479,6 +557,42 @@ export function compactNodeRuntimeTaskRunDiagnostic(value: unknown): string {
|
||||
return [left, reason ? `(${webObserveShort(reason, 36)})` : ""].filter(Boolean).join("");
|
||||
}
|
||||
|
||||
export function nodeRuntimePipelinePendingTaskRunSummaries(
|
||||
spec: HwlabRuntimeLaneSpec,
|
||||
pendingTaskRuns: Array<Record<string, unknown>>,
|
||||
pods: Array<Record<string, unknown>>,
|
||||
): Array<Record<string, unknown>> {
|
||||
return pendingTaskRuns.slice(0, 16).map((taskRun) => {
|
||||
const taskRunName = stringOrNull(taskRun.name);
|
||||
const podName = stringOrNull(taskRun.podName);
|
||||
const pod = pods.find((item) => item.name === podName || (taskRunName !== null && item.taskRun === taskRunName)) ?? {};
|
||||
const containers = Array.isArray(pod.containers) ? pod.containers.map(record) : [];
|
||||
const initContainers = Array.isArray(pod.initContainers) ? pod.initContainers.map(record) : [];
|
||||
const waitingContainers = [...initContainers, ...containers].filter((container) => container.state === "waiting");
|
||||
const runningContainers = [...initContainers, ...containers].filter((container) => container.state === "running");
|
||||
return {
|
||||
name: taskRunName,
|
||||
taskRun: taskRunName,
|
||||
pipelineTask: taskRun.pipelineTask ?? null,
|
||||
taskRef: taskRun.taskRef ?? null,
|
||||
status: taskRun.status ?? null,
|
||||
reason: taskRun.reason ?? null,
|
||||
message: diagnosticText(taskRun.message),
|
||||
pod: podName,
|
||||
podPhase: pod.phase ?? null,
|
||||
scheduled: pod.scheduled ?? null,
|
||||
scheduledReason: pod.scheduledReason ?? null,
|
||||
scheduledMessage: diagnosticText(pod.scheduledMessage),
|
||||
waitingContainers,
|
||||
runningContainers,
|
||||
taskRunCommand: taskRunName === null ? null : nodeRuntimeK3sCommand(spec, ["get", "taskrun", "-n", HWLAB_CI_NAMESPACE, taskRunName, "-o", "yaml"]),
|
||||
taskRunDescribeCommand: taskRunName === null ? null : nodeRuntimeK3sCommand(spec, ["describe", "taskrun", "-n", HWLAB_CI_NAMESPACE, taskRunName]),
|
||||
podDescribeCommand: podName === null ? null : nodeRuntimeK3sCommand(spec, ["describe", "pod", "-n", HWLAB_CI_NAMESPACE, podName]),
|
||||
podLogsCommand: podName === null ? null : nodeRuntimePipelineLogsCommand(spec, podName, null),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export function summarizeNodeRuntimeControlPlaneStatus(status: Record<string, unknown>, scoped: ReturnType<typeof parseNodeScopedDelegatedOptions>): Record<string, unknown> {
|
||||
const pipelineRun = record(status.pipelineRun);
|
||||
const pipelineRunDiagnostics = record(status.pipelineRunDiagnostics);
|
||||
@@ -531,6 +645,8 @@ export function summarizeNodeRuntimeControlPlaneStatus(status: Record<string, un
|
||||
application: argo.application ?? null,
|
||||
ready: argo.ready === true,
|
||||
syncRevision: argo.syncRevision ?? null,
|
||||
targetGitopsRevision: argo.targetGitopsRevision ?? null,
|
||||
revisionObserved: typeof argo.targetGitopsRevision === "string" && argo.syncRevision === argo.targetGitopsRevision,
|
||||
syncStatus: argo.syncStatus ?? null,
|
||||
health: argo.health ?? null,
|
||||
},
|
||||
@@ -610,9 +726,17 @@ export function nodeRuntimeStatusNextAction(status: Record<string, unknown>, sco
|
||||
if (reason === "argo-not-synced-healthy") {
|
||||
return `bun scripts/cli.ts hwlab nodes control-plane refresh --node ${scoped.node} --lane ${scoped.lane} --confirm`;
|
||||
}
|
||||
if (reason === "argo-revision-not-observed" || reason === "argo-target-revision-progressing" || reason === "argo-health-progressing") {
|
||||
return `${nodeRuntimeStatusCommand(scoped)} --full`;
|
||||
}
|
||||
if (reason === "pipelinerun-not-succeeded") {
|
||||
return `bun scripts/cli.ts hwlab nodes control-plane trigger-current --node ${scoped.node} --lane ${scoped.lane} --confirm`;
|
||||
}
|
||||
if (reason === "node-runtime-ci-taskrun-pending") {
|
||||
const next = record(record(status.pipelineRunDiagnostics).next);
|
||||
const pendingTaskRun = typeof next.pendingTaskRun === "string" ? next.pendingTaskRun : null;
|
||||
return pendingTaskRun ?? `${nodeRuntimeStatusCommand(scoped)} --full`;
|
||||
}
|
||||
if (reason === "node-runtime-ci-step-publish-failed") {
|
||||
return `bun scripts/cli.ts platform-infra sub2api status --target ${scoped.node}`;
|
||||
}
|
||||
@@ -659,6 +783,7 @@ export function nodeRuntimePipelineRunDiagnostics(spec: HwlabRuntimeLaneSpec, pi
|
||||
const pendingTaskRuns = taskRuns.filter((item) => item.status !== "True" && item.status !== "False");
|
||||
const failedTaskRuns = taskRuns.filter((item) => item.status === "False");
|
||||
const failedTaskRunSummaries = nodeRuntimePipelineFailedTaskRunSummaries(spec, failedTaskRuns, pods);
|
||||
const pendingTaskRunSummaries = nodeRuntimePipelinePendingTaskRunSummaries(spec, pendingTaskRuns, pods);
|
||||
const stepPublishFailures = failedTaskRunSummaries.filter((item) => item.container === "step-publish" || item.step === "publish" || item.step === "step-publish");
|
||||
const unscheduledPods = pods.filter((item) => item.scheduled === false);
|
||||
const schedulingMessages = unscheduledPods
|
||||
@@ -690,7 +815,8 @@ export function nodeRuntimePipelineRunDiagnostics(spec: HwlabRuntimeLaneSpec, pi
|
||||
failedTaskRuns: failedTaskRunSummaries,
|
||||
stepPublishFailures,
|
||||
failureSummary,
|
||||
pendingTaskRuns,
|
||||
pendingTaskRuns: pendingTaskRunSummaries,
|
||||
pendingTaskRunCount: pendingTaskRunSummaries.length,
|
||||
unscheduledPods,
|
||||
schedulingMessages,
|
||||
degradedReason: tooManyPods
|
||||
@@ -723,7 +849,14 @@ export function nodeRuntimePipelineRunDiagnostics(spec: HwlabRuntimeLaneSpec, pi
|
||||
failedTaskRun: failedTaskRunSummaries[0]?.taskRunCommand ?? null,
|
||||
status: `bun scripts/cli.ts hwlab nodes control-plane status --node ${spec.nodeId} --lane ${spec.lane} --pipeline-run ${pipelineRun} --full`,
|
||||
}
|
||||
: undefined,
|
||||
: pendingTaskRunSummaries.length > 0
|
||||
? {
|
||||
pendingTaskRun: pendingTaskRunSummaries[0]?.taskRunDescribeCommand ?? pendingTaskRunSummaries[0]?.taskRunCommand ?? null,
|
||||
pendingPod: pendingTaskRunSummaries[0]?.podDescribeCommand ?? null,
|
||||
pendingPodLogs: pendingTaskRunSummaries[0]?.podLogsCommand ?? null,
|
||||
status: `bun scripts/cli.ts hwlab nodes control-plane status --node ${spec.nodeId} --lane ${spec.lane} --pipeline-run ${pipelineRun} --full`,
|
||||
}
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user