Merge pull request #495 from pikasTech/fix/d601-k3s-max-pods-491

fix: D601 k3s pod capacity YAML-first
This commit is contained in:
Lyon
2026-06-19 22:55:25 +08:00
committed by GitHub
4 changed files with 478 additions and 16 deletions
+36
View File
@@ -4,6 +4,7 @@ metadata:
owner: unidesk
relatedIssues:
- 290
- 491
- 1119
imagePolicy:
requireReproducibleBuildSource: true
@@ -16,6 +17,41 @@ nodes:
D601:
route: D601
kubeRoute: D601:k3s
k3s:
serviceName: k3s
dropInPath: /etc/systemd/system/k3s.service.d/20-unidesk-node-config.conf
nodeStatusName: d601
execStartPre:
- - -/usr/bin/umount
- /Docker/host
serverArgs:
- server
- --disable
- traefik
- --disable
- servicelb
- --disable
- metrics-server
- --node-name
- D601
- --node-label
- unidesk.ai/node-id=D601
- --node-label
- unidesk.ai/provider-id=D601
- --tls-san
- 127.0.0.1
- --tls-san
- host.docker.internal
- --write-kubeconfig-mode
- "644"
- --kubelet-arg
- image-gc-high-threshold=95
- --kubelet-arg
- image-gc-low-threshold=90
- --kubelet-arg
- max-pods=500
kubelet:
maxPods: 500
registry:
endpoint: 127.0.0.1:5000
egressProxy:
+1 -1
View File
@@ -24,7 +24,7 @@ G14/D601 v03 的 bootstrap admin password 是 HWLAB runtime Secret 生命周期
`hwlab nodes web-probe run|script --node <node> --lane <lane>` 是 HWLAB Cloud Web 线上 DOM/Playwright 验收的受控入口;CLI 负责从 YAML 解析 workspace、public URL 和 bootstrap admin sourceRef,并只输出 redacted 凭据状态、artifact path/hash、readiness、`probe.summary` 和失败分类。`run` 使用 repo-owned 标准 DOM probe`script` 不运行默认探针,必须通过 stdin heredoc 或 `--script-file <path>` 提供调用者脚本。`run --message ...` 未显式设置 trace 参数时会做轻量 trace 采样,`script` helper 可用 `recordStep` / `safeFetchJson` / `fetchApiMatrix` 保留失败前的结构化 partial evidence,完整 redacted 报告通过 `reportPath`/`reportSha256` 展开。具体 Web 开发、fake-server Playwright、fixture 脱敏、`web-probe script` helper、截图和 Workbench/Performance 判定口径统一见 `$unidesk-webdev`,本 CLI 参考不再维护第二套操作面。
`hwlab nodes control-plane infra plan|status|apply --node D601 --lane v03` 是 D601 HWLAB v03 节点本地 CI/CD 与 git-mirror 前置控制面的 YAML 驱动入口,配置真相源是 `config/hwlab-node-control-plane.yaml``plan` 只读展示 YAML target 和将渲染的 control-plane 对象;`status` 只读观察 D601 Tekton、CI namespace、git-mirror、Argo、node-local registry 和 tools image readiness`apply --dry-run` 只输出 manifest 摘要;`apply --confirm` 收敛 D601 control-plane bootstrap 对象,不触发 HWLAB runtime rollout,不创建 PK01 DB,也不修改 Caddy/FRP。tools image 的 node-local registry 地址只能作为输出 artifact输入 base image 必须由 YAML 声明为公开 registry 来源,缺少 output image 时应在 `status.next.blockers` 中体现,而不是把现有 node-local image 当成输入基础镜像。
`hwlab nodes control-plane infra plan|status|apply --node D601 --lane v03` 是 D601 HWLAB v03 节点本地 k3s、CI/CD 与 git-mirror 前置控制面的 YAML 驱动入口,配置真相源是 `config/hwlab-node-control-plane.yaml``plan` 只读展示 YAML target、host k3s node config 摘要和将渲染的 control-plane 对象;`status` 只读观察 k3s systemd drop-in 与 node `capacity/allocatable.pods`D601 Tekton、CI namespace、git-mirror、Argo、node-local registry 和 tools image readiness`apply --dry-run` 只输出 manifest 与 host config 摘要;`apply --confirm` 按 YAML 收敛 D601 host k3s drop-in 和 control-plane bootstrap 对象,只有 host k3s 配置或 live pod capacity 未收敛时才重启 k3s不触发 HWLAB runtime rollout,不创建 PK01 DB,也不修改 Caddy/FRP。D601 host 侧 k3s pre-start 修正也必须写成 YAML `execStartPre` argv,不做手工 systemd 热改;当 kube API 已不可用时,`apply` 可用同一 YAML 渲染出的 host 脚本经 node-local tools image/Docker fallback 恢复 systemd drop-in,输出仍只给对象名、SHA、exit code 和摘要。k3s pod capacity 等可调数值只以 YAML 为准,长期参考不复制具体数值;tools image 的 node-local registry 地址只能作为输出 artifact输入 base image 必须由 YAML 声明为公开 registry 来源,缺少 output image 时应在 `status.next.blockers` 中体现,而不是把现有 node-local image 当成输入基础镜像。
`hwlab nodes git-mirror status|sync|flush --node <node> --lane <lane>` 是 node-scoped runtime lane 的 Git mirror 维护入口。`status``githubSource` / `githubGitops` 来自本地 mirror cache 的 `refs/mirror-stage/...`,不是实时 GitHub API;输出中的 `refSources.githubFieldsAreMirrorStageCache=true``refSources.cacheRefresh` 给出这一来源和刷新命令。`sync --confirm --wait` 的 k3s Job 遇到 GitHub SSH transient 时,应通过目标 workspace fallback 拉取 GitHub source/gitops 并写回 node-local mirror,输出只披露 commit、mirror write URL 和 fallback 状态。`flush --confirm --wait` 如果已经把 GitOps ref push 到 GitHub,但 post-push fetch/recheck 因 transient SSH 失败而无法刷新 mirror-stage,会标记 `partialSuccess=push-succeeded-fetch-failed`;CLI 应自动执行一次受控 sync 刷新 mirror-stage,若恢复后 `pendingFlush=false``githubInSync=true`,结果应为 `ok=true` 并输出 `partialSuccessRecovered` / `postPushRecovery`,否则才保留 `degradedReason=node-runtime-git-mirror-flush-post-push-fetch-failed` 和下一步 `sync --confirm --wait`。不要把这种 partial success 解读为需要连续盲目 flush。`hwlab nodes control-plane trigger-current --node <node> --lane <lane> --confirm --wait` 会在 source sync 后自动执行必要的 pre-flush,在 PipelineRun terminal 后自动执行必要的 post-flushprogress 事件必须显式输出 `git-mirror-pre-flush` / `git-mirror-post-flush` 的 executed/skipped、jobName、local/github source、local/github GitOps、`pendingFlush``githubInSync`,且已恢复的 partial success 不能让顶层 trigger-current false-fail。`control-plane status` 仍是只读入口,只暴露 compact `gitMirror` 摘要和下一步 flush 命令,不隐式执行写操作。
+1 -1
View File
@@ -59,7 +59,7 @@ export function rootHelp(): unknown {
{ command: "gh preflight|auth|issue|pr", description: "Run safe GitHub issue and PR CRUD/lifecycle operations through REST with body-file update replace/append, issue/comment apply_patch body patching, comment delete, token diagnostics, PR closeout preflight, hard delete unsupported, and guarded PR merge." },
{ command: "git github-push-fallback [--repo owner/name] [--branch branch] [--host-name host-or-ip] [--confirm]", description: "Plan or execute a one-shot GitHub push through ssh.github.com:443 without editing remotes; use only for reviewed DNS/port-22 push fallback." },
{ command: "commander contract|plan --dry-run|smoke --dry-run|approval request --dry-run", description: "Host Codex commander skeleton contract, no-daemon smoke plan, and dry-run approval preview without live bridges or message sends." },
{ command: "hwlab nodes control-plane|git-mirror|secret|test-accounts|web-probe --node <node> --lane <lane>", description: "Manage HWLAB node/lane runtime prerequisites, including D601 YAML-declared infra/tools-image/Argo bootstrap, redacted test-account preparation, Web DOM probe credential injection, and G14 v0.3+ runtime lanes, with the node identity passed as data." },
{ command: "hwlab nodes control-plane|git-mirror|secret|test-accounts|web-probe --node <node> --lane <lane>", description: "Manage HWLAB node/lane runtime prerequisites, including D601 YAML-declared k3s infra/tools-image/Argo bootstrap, redacted test-account preparation, Web DOM probe credential injection, and G14 v0.3+ runtime lanes, with the node identity passed as data." },
{ command: "hwlab g14 monitor-prs | hwlab g14 control-plane status|apply|trigger-current|runtime-migration|cleanup-runs|cleanup-released-pvs | hwlab g14 git-mirror status|apply|sync|flush | hwlab g14 tools-image status|build", description: "Start the legacy G14 PR monitor, run bounded v0.2 Tekton/Argo control-plane, manual PipelineRun trigger, runtime migration, CI workspace retention, manual devops-infra git mirror/relay maintenance, or fixed HWLAB CI tools image actions; long confirmed trigger/sync/flush actions return async jobs by default." },
{ command: "agentrun get|describe|events|logs|result|ack|cancel|dispatch|create|apply|send|control-plane|git-mirror", description: "Use AgentRun v0.1 resource primitives with low-noise human output by default; session follow-up uses send only and the server decides internal steer vs turn." },
{ command: "platform-infra sub2api|langbot|n8n|wechat-archive ...", description: "Deploy platform-infra services such as Sub2API, LangBot and n8n, manage YAML-controlled public FRP/Caddy exposure and WeChat archive workflows, and inspect status/logs without printing secrets." },
+440 -14
View File
@@ -50,10 +50,20 @@ interface ControlPlaneNodeSpec {
id: string;
route: string;
kubeRoute: string;
k3s: ControlPlaneK3sNodeSpec | null;
registry: { endpoint: string };
egressProxy: ControlPlaneEgressProxySpec | null;
}
interface ControlPlaneK3sNodeSpec {
serviceName: string;
dropInPath: string;
nodeStatusName: string;
execStartPre: readonly (readonly string[])[];
serverArgs: readonly string[];
kubelet: { maxPods: number };
}
interface DockerfileInlineSpec {
filename: string;
lines: readonly string[];
@@ -180,7 +190,7 @@ export function hwlabNodeControlPlaneInfraHelp(): Record<string, unknown> {
ok: true,
command: "hwlab nodes control-plane infra",
configPath: HWLAB_NODE_CONTROL_PLANE_CONFIG_PATH,
description: "Plan/status/apply YAML-controlled HWLAB node-local CI/CD and git-mirror control-plane prerequisites. Cross-node PK01/Caddy/FRP/runtime rollout remains explicit semi-automatic CLI work.",
description: "Plan/status/apply YAML-controlled HWLAB node-local k3s, CI/CD and git-mirror control-plane prerequisites. Cross-node PK01/Caddy/FRP/runtime rollout remains explicit semi-automatic CLI work.",
usage: [
"bun scripts/cli.ts hwlab nodes control-plane infra plan --node D601 --lane v03",
"bun scripts/cli.ts hwlab nodes control-plane infra status --node D601 --lane v03",
@@ -210,6 +220,7 @@ function infraPlan(_config: ControlPlaneConfig, node: ControlPlaneNodeSpec, targ
mutation: false,
target: planSummary(node, target),
expected: expectedSummary(node, target),
hostConfig: k3sNodeConfigPlan(node),
imagePolicy: _config.imagePolicy,
g14Consistency: {
laneVocabulary: ["sourceBranch", "gitopsBranch", "catalogPath", "runtime.path", "runtime.namespace", "tekton.pipeline", "pipelineRunPrefix", "argo.application"],
@@ -229,7 +240,7 @@ function infraPlan(_config: ControlPlaneConfig, node: ControlPlaneNodeSpec, targ
}
function infraStatus(_config: ControlPlaneConfig, node: ControlPlaneNodeSpec, target: ControlPlaneTargetSpec, options: InfraOptions): Record<string, unknown> {
const script = statusScript(target, node.registry.endpoint, target.tekton.toolsImage.output);
const script = statusScript(node, target);
const result = runTransK3s(node.kubeRoute, script, options.timeoutSeconds);
const parsed = parseRemoteJson(result.stdout);
const status = typeof parsed === "object" && parsed !== null ? parsed as Record<string, unknown> : { parseError: "remote status did not return a JSON object", stdoutPreview: result.stdout.slice(0, 1000) };
@@ -240,7 +251,13 @@ function infraStatus(_config: ControlPlaneConfig, node: ControlPlaneNodeSpec, ta
const tekton = record(components.tekton);
const ciNamespace = record(components.ciNamespace);
const registry = record(components.registry);
const k3sNodeConfig = record(components.k3sNodeConfig);
const k3sNodeConfigReady = node.k3s === null
|| (boolField(k3sNodeConfig, "dropInMatches")
&& numberValue(k3sNodeConfig.liveCapacityPods) === node.k3s.kubelet.maxPods
&& numberValue(k3sNodeConfig.liveAllocatablePods) === node.k3s.kubelet.maxPods);
const ok = result.exitCode === 0
&& k3sNodeConfigReady
&& boolField(tekton, "installed")
&& boolField(ciNamespace, "exists")
&& boolField(gitMirror, "namespaceExists")
@@ -267,6 +284,7 @@ function infraStatus(_config: ControlPlaneConfig, node: ControlPlaneNodeSpec, ta
status,
readiness: {
ok,
k3sNodeConfigReady,
tektonInstalled: boolField(tekton, "installed"),
ciNamespaceExists: boolField(ciNamespace, "exists"),
gitMirrorNamespaceExists: boolField(gitMirror, "namespaceExists"),
@@ -286,7 +304,7 @@ function infraStatus(_config: ControlPlaneConfig, node: ControlPlaneNodeSpec, ta
toolsImageReady: boolField(registry, "toolsImageReady"),
},
result: compactCommandResult(result),
next: ok ? { runtimePreparation: `bun scripts/cli.ts hwlab nodes control-plane plan --node ${node.id} --lane ${target.lane}` } : statusNext(node, target, registry, gitMirror, argo, ciNamespace),
next: ok ? { runtimePreparation: `bun scripts/cli.ts hwlab nodes control-plane plan --node ${node.id} --lane ${target.lane}` } : statusNext(node, target, registry, gitMirror, argo, ciNamespace, k3sNodeConfig),
};
}
@@ -306,6 +324,7 @@ function infraApply(_config: ControlPlaneConfig, node: ControlPlaneNodeSpec, tar
mode: "dry-run",
mutation: false,
expected: expectedSummary(node, target),
hostConfig: k3sNodeConfigPlan(node),
preflight: {
registryReady: imageStatus.registryReady,
toolsImageReady: imageStatus.toolsImageReady,
@@ -318,7 +337,7 @@ function infraApply(_config: ControlPlaneConfig, node: ControlPlaneNodeSpec, tar
next: applyNext(node, target, imageStatus),
};
}
const script = applyScript(yaml);
const script = applyScript(yaml, node, target);
const result = runTransK3s(node.kubeRoute, script, options.timeoutSeconds);
const parsed = parseRemoteJson(result.stdout);
return {
@@ -430,7 +449,7 @@ function toolsImageBuild(node: ControlPlaneNodeSpec, target: ControlPlaneTargetS
}
function argoCommandStatus(node: ControlPlaneNodeSpec, target: ControlPlaneTargetSpec, options: ArgoOptions): Record<string, unknown> {
const result = runTransK3s(node.kubeRoute, statusScript(target, node.registry.endpoint, target.tekton.toolsImage.output), options.timeoutSeconds);
const result = runTransK3s(node.kubeRoute, statusScript(node, target), options.timeoutSeconds);
const parsed = parseRemoteJson(result.stdout);
const status = typeof parsed === "object" && parsed !== null ? parsed as Record<string, unknown> : {};
const argo = record(record(status.components).argo);
@@ -733,15 +752,67 @@ function isNodeLocalImage(image: string): boolean {
function nodeSpec(id: string, raw: Record<string, unknown>): ControlPlaneNodeSpec {
const registry = asRecord(raw.registry, `nodes.${id}.registry`);
const egressProxy = raw.egressProxy === undefined ? null : egressProxySpec(asRecord(raw.egressProxy, `nodes.${id}.egressProxy`), `nodes.${id}.egressProxy`);
const k3s = raw.k3s === undefined ? null : k3sNodeSpec(asRecord(raw.k3s, `nodes.${id}.k3s`), `nodes.${id}.k3s`);
return {
id,
route: stringField(raw, "route", `nodes.${id}`),
kubeRoute: stringField(raw, "kubeRoute", `nodes.${id}`),
k3s,
registry: { endpoint: stringField(registry, "endpoint", `nodes.${id}.registry`) },
egressProxy,
};
}
function k3sNodeSpec(raw: Record<string, unknown>, path: string): ControlPlaneK3sNodeSpec {
const kubelet = asRecord(raw.kubelet, `${path}.kubelet`);
const serviceName = stringField(raw, "serviceName", path);
if (!/^[A-Za-z0-9_.@-]+$/u.test(serviceName)) throw new Error(`${path}.serviceName has an unsupported systemd unit name`);
const dropInPath = stringField(raw, "dropInPath", path);
if (!dropInPath.startsWith("/etc/systemd/system/") || !dropInPath.endsWith(".conf") || dropInPath.includes("..")) {
throw new Error(`${path}.dropInPath must be an absolute /etc/systemd/system/*.conf path`);
}
const nodeStatusName = stringField(raw, "nodeStatusName", path);
if (!/^[A-Za-z0-9_.-]+$/u.test(nodeStatusName)) throw new Error(`${path}.nodeStatusName has an unsupported Kubernetes node name`);
const execStartPre = execStartPreField(raw.execStartPre, `${path}.execStartPre`);
const serverArgs = stringArrayField(raw, "serverArgs", path);
if (serverArgs.length === 0 || serverArgs[0] !== "server") throw new Error(`${path}.serverArgs must start with k3s server`);
for (const [index, arg] of serverArgs.entries()) {
if (arg.includes("\n") || arg.includes("\r") || arg.length === 0) throw new Error(`${path}.serverArgs[${index}] must be a single non-empty argv token`);
}
const maxPods = positiveConfigIntegerField(kubelet, "maxPods", `${path}.kubelet`);
const expectedMaxPodsArg = `max-pods=${maxPods}`;
let hasExpectedMaxPodsArg = false;
for (let index = 0; index < serverArgs.length - 1; index += 1) {
if (serverArgs[index] === "--kubelet-arg" && serverArgs[index + 1] === expectedMaxPodsArg) hasExpectedMaxPodsArg = true;
}
if (!hasExpectedMaxPodsArg) throw new Error(`${path}.serverArgs must include --kubelet-arg ${expectedMaxPodsArg}`);
return {
serviceName,
dropInPath,
nodeStatusName,
execStartPre,
serverArgs,
kubelet: { maxPods },
};
}
function execStartPreField(raw: unknown, path: string): readonly (readonly string[])[] {
if (raw === undefined) return [];
if (!Array.isArray(raw)) throw new Error(`${path} must be an array of argv arrays`);
return raw.map((item, index) => {
if (!Array.isArray(item)) throw new Error(`${path}[${index}] must be an argv array`);
const command = item.map((value, tokenIndex) => {
if (typeof value !== "string") throw new Error(`${path}[${index}][${tokenIndex}] must be a string`);
if (value.length === 0 || value.includes("\n") || value.includes("\r")) throw new Error(`${path}[${index}][${tokenIndex}] must be a single non-empty argv token`);
return value;
});
if (command.length === 0) throw new Error(`${path}[${index}] must not be empty`);
const executable = command[0].startsWith("-") ? command[0].slice(1) : command[0];
if (!executable.startsWith("/") || executable.includes("..")) throw new Error(`${path}[${index}][0] must be an absolute executable path, optionally prefixed with -`);
return command;
});
}
function egressProxySpec(raw: Record<string, unknown>, path: string): ControlPlaneEgressProxySpec {
const mode = stringField(raw, "mode", path);
if (mode !== "k8s-service-cluster-ip") throw new Error(`${path}.mode must be k8s-service-cluster-ip`);
@@ -1298,6 +1369,7 @@ function planSummary(node: ControlPlaneNodeSpec, target: ControlPlaneTargetSpec)
enabled: target.enabled,
ciNamespace: target.ciNamespace,
runtimeNamespace: target.runtimeNamespace,
k3sNodeConfig: k3sNodeConfigPlan(node),
registry: node.registry.endpoint,
egressProxy: node.egressProxy,
sourceBranch: target.source.branch,
@@ -1328,6 +1400,7 @@ function expectedSummary(node: ControlPlaneNodeSpec, target: ControlPlaneTargetS
runtimePath: target.gitops.path,
runtimeNamespace: target.runtimeNamespace,
namespace: target.ciNamespace,
k3sNodeConfig: k3sNodeConfigPlan(node),
gitMirror: {
namespace: target.gitMirror.namespace,
readUrl: target.gitMirror.readUrl,
@@ -1366,10 +1439,43 @@ function expectedSummary(node: ControlPlaneNodeSpec, target: ControlPlaneTargetS
};
}
function statusScript(target: ControlPlaneTargetSpec, registryEndpoint: string, toolsImage: string): string {
function k3sNodeConfigPlan(node: ControlPlaneNodeSpec): Record<string, unknown> {
if (node.k3s === null) return { managed: false };
const dropIn = k3sDropInContent(node.k3s);
return {
managed: true,
serviceName: node.k3s.serviceName,
dropInPath: node.k3s.dropInPath,
nodeStatusName: node.k3s.nodeStatusName,
desiredMaxPods: node.k3s.kubelet.maxPods,
dropInSha256: sha256Short(dropIn),
execStartPreCount: node.k3s.execStartPre.length,
serverArgCount: node.k3s.serverArgs.length,
};
}
function k3sDropInContent(spec: ControlPlaneK3sNodeSpec): string {
return [
"# Managed by UniDesk. Source: config/hwlab-node-control-plane.yaml nodes.<node>.k3s",
"[Service]",
...spec.execStartPre.map((command) => `ExecStartPre=${command.map(systemdExecArg).join(" ")}`),
"ExecStart=",
`ExecStart=${["/usr/local/bin/k3s", ...spec.serverArgs].map(systemdExecArg).join(" ")}`,
"",
].join("\n");
}
function systemdExecArg(value: string): string {
if (/^[A-Za-z0-9_@%+=:,./-]+$/u.test(value)) return value;
return `"${value.replaceAll("\\", "\\\\").replaceAll("\"", "\\\"").replaceAll("$", "\\$").replaceAll("`", "\\`")}"`;
}
function statusScript(nodeSpec: ControlPlaneNodeSpec, target: ControlPlaneTargetSpec): string {
const requiredCrds = shellJsonArray(target.argo.install.requiredCrds);
const argoDeployments = shellJsonArray(target.argo.install.expectedDeployments);
const argoStatefulSets = shellJsonArray(target.argo.install.expectedStatefulSets);
const k3s = nodeSpec.k3s;
const k3sDropIn = k3s === null ? "" : k3sDropInContent(k3s);
return `
set +e
node=${shQuote(target.node)}
@@ -1388,11 +1494,17 @@ service_account=${shQuote(target.tekton.serviceAccountName)}
argo_ns=${shQuote(target.argo.namespace)}
argo_project=${shQuote(target.argo.projectName)}
argo_app=${shQuote(target.argo.applicationName)}
registry=${shQuote(registryEndpoint)}
tools_image=${shQuote(toolsImage)}
registry=${shQuote(nodeSpec.registry.endpoint)}
tools_image=${shQuote(target.tekton.toolsImage.output)}
required_crds_json=${shQuote(requiredCrds)}
argo_deployments_json=${shQuote(argoDeployments)}
argo_statefulsets_json=${shQuote(argoStatefulSets)}
k3s_managed=${k3s === null ? "false" : "true"}
k3s_service=${shQuote(k3s?.serviceName ?? "")}
k3s_dropin=${shQuote(k3s?.dropInPath ?? "")}
k3s_node=${shQuote(k3s?.nodeStatusName ?? "")}
k3s_desired_max_pods=${shQuote(String(k3s?.kubelet.maxPods ?? ""))}
k3s_expected_sha=${shQuote(k3s === null ? "" : sha256Short(k3sDropIn))}
exists_ns() { kubectl get ns "$1" >/dev/null 2>&1 && printf true || printf false; }
exists_res() { kubectl -n "$1" get "$2" "$3" >/dev/null 2>&1 && printf true || printf false; }
deploy_ready() { desired=$(kubectl -n "$1" get deploy "$2" -o 'jsonpath={.spec.replicas}' 2>/dev/null || true); ready=$(kubectl -n "$1" get deploy "$2" -o 'jsonpath={.status.readyReplicas}' 2>/dev/null || true); [ -n "$desired" ] && [ "$desired" -gt 0 ] 2>/dev/null && [ "\${ready:-0}" = "$desired" ] && printf true || printf false; }
@@ -1407,6 +1519,64 @@ tools_image_ready=false
if [ "$tools_repo" != "$tools_repo_tag" ] && command -v curl >/dev/null 2>&1; then curl -fsS --max-time 5 "http://$registry/v2/$tools_repo/manifests/$tools_tag" >/tmp/hwlab-tools-image.out 2>/tmp/hwlab-tools-image.err && tools_image_ready=true; fi
cache_host_path_ready=false
if [ -n "$cache_host_path" ] && kubectl -n "$gitmirror_ns" exec deploy/"$read_deploy" -- sh -lc 'test -d /cache' >/dev/null 2>&1; then cache_host_path_ready=true; fi
k3s_fragment=$(python3 - "$k3s_managed" "$k3s_service" "$k3s_dropin" "$k3s_node" "$k3s_desired_max_pods" "$k3s_expected_sha" <<'PY'
import hashlib, json, re, subprocess, sys
managed = sys.argv[1] == "true"
service, dropin, node_name, desired_raw, expected_sha = sys.argv[2:7]
def run(args):
return subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
def to_int(value):
try:
return int(value)
except Exception:
return None
if not managed:
print(json.dumps({"managed": False, "ready": True}))
raise SystemExit(0)
desired = to_int(desired_raw)
node_json = run(["kubectl", "get", "node", node_name, "-o", "json"])
capacity = None
allocatable = None
node_ready = False
if node_json.returncode == 0:
data = json.loads(node_json.stdout)
capacity = to_int(data.get("status", {}).get("capacity", {}).get("pods"))
allocatable = to_int(data.get("status", {}).get("allocatable", {}).get("pods"))
for condition in data.get("status", {}).get("conditions", []):
if condition.get("type") == "Ready":
node_ready = condition.get("status") == "True"
unit = run(["systemctl", "cat", service])
unit_text = unit.stdout if unit.returncode == 0 else ""
dropin_read = run(["cat", dropin])
dropin_exists = dropin_read.returncode == 0
dropin_text = dropin_read.stdout if dropin_exists else ""
dropin_sha = "sha256:" + hashlib.sha256(dropin_text.encode()).hexdigest() if dropin_exists else None
matches = re.findall(r"max-pods=([0-9]+)", unit_text + "\\n" + dropin_text)
configured = to_int(matches[-1]) if matches else None
dropin_matches = dropin_sha == expected_sha
ready = dropin_matches and capacity == desired and allocatable == desired
source = "managed-dropin" if dropin_matches else ("systemd-or-config" if configured is not None else "kubelet-default")
print(json.dumps({
"managed": True,
"ready": ready,
"serviceName": service,
"dropInPath": dropin,
"dropInExists": dropin_exists,
"dropInSha256": dropin_sha,
"expectedDropInSha256": expected_sha,
"dropInMatches": dropin_matches,
"configuredMaxPods": configured,
"desiredMaxPods": desired,
"liveNodeName": node_name,
"liveCapacityPods": capacity,
"liveAllocatablePods": allocatable,
"nodeReady": node_ready,
"restartRequired": not ready,
"source": source,
"unitReadable": unit.returncode == 0,
}))
PY
)
python3 - "$required_crds_json" "$argo_deployments_json" "$argo_statefulsets_json" <<'PY' >/tmp/hwlab-node-status-fragments.json
import json, subprocess, sys
required_crds=json.loads(sys.argv[1])
@@ -1432,27 +1602,274 @@ print(json.dumps({"crds": crds, "deployments": deploy, "statefulSets": sts, "crd
PY
argo_fragment=$(cat /tmp/hwlab-node-status-fragments.json 2>/dev/null || printf '{}')
cat <<JSON
{"observedAt":"$(date -u +%Y-%m-%dT%H:%M:%SZ)","node":"$node","lane":"$lane","components":{"tekton":{"installed":$(kubectl get crd pipelines.tekton.dev pipelineruns.tekton.dev >/dev/null 2>&1 && printf true || printf false),"controllerReady":$(deploy_ready tekton-pipelines tekton-pipelines-controller),"webhookReady":$(deploy_ready tekton-pipelines tekton-pipelines-webhook)},"ciNamespace":{"name":"$ci_ns","exists":$(exists_ns "$ci_ns"),"serviceAccountExists":$(exists_res "$ci_ns" serviceaccount "$service_account"),"pipelineExists":$(exists_res "$ci_ns" pipeline "$pipeline")},"gitMirror":{"namespace":"$gitmirror_ns","namespaceExists":$(exists_ns "$gitmirror_ns"),"readDeploymentReady":$(deploy_ready "$gitmirror_ns" "$read_deploy"),"writeDeploymentReady":$(deploy_ready "$gitmirror_ns" "$write_deploy"),"readServiceExists":$(exists_res "$gitmirror_ns" service "$read_svc"),"writeServiceExists":$(exists_res "$gitmirror_ns" service "$write_svc"),"readEndpointsReady":$(endpoint_ready "$gitmirror_ns" "$read_svc"),"writeEndpointsReady":$(endpoint_ready "$gitmirror_ns" "$write_svc"),"cachePvcExists":$(exists_res "$gitmirror_ns" pvc "$cache_pvc"),"cacheHostPath":"$cache_host_path","cacheHostPathReady":$cache_host_path_ready,"summary":{"localSource":null,"githubSource":null,"localGitops":null,"githubGitops":null,"pendingFlush":null,"flushNeeded":null,"githubInSync":null}},"argo":{"namespace":"$argo_ns","namespaceExists":$(exists_ns "$argo_ns"),"installed":$(kubectl get crd applications.argoproj.io appprojects.argoproj.io >/dev/null 2>&1 && printf true || printf false),"projectExists":$(kubectl -n "$argo_ns" get appproject "$argo_project" >/dev/null 2>&1 && printf true || printf false),"applicationExists":$(kubectl -n "$argo_ns" get application "$argo_app" >/dev/null 2>&1 && printf true || printf false),"install":$argo_fragment},"registry":{"endpoint":"$registry","ready":$registry_ready,"toolsImage":"$tools_image","toolsImageReady":$tools_image_ready},"runtimeNamespace":{"name":"$runtime_ns","exists":$(exists_ns "$runtime_ns")}}}
{"observedAt":"$(date -u +%Y-%m-%dT%H:%M:%SZ)","node":"$node","lane":"$lane","components":{"k3sNodeConfig":$k3s_fragment,"tekton":{"installed":$(kubectl get crd pipelines.tekton.dev pipelineruns.tekton.dev >/dev/null 2>&1 && printf true || printf false),"controllerReady":$(deploy_ready tekton-pipelines tekton-pipelines-controller),"webhookReady":$(deploy_ready tekton-pipelines tekton-pipelines-webhook)},"ciNamespace":{"name":"$ci_ns","exists":$(exists_ns "$ci_ns"),"serviceAccountExists":$(exists_res "$ci_ns" serviceaccount "$service_account"),"pipelineExists":$(exists_res "$ci_ns" pipeline "$pipeline")},"gitMirror":{"namespace":"$gitmirror_ns","namespaceExists":$(exists_ns "$gitmirror_ns"),"readDeploymentReady":$(deploy_ready "$gitmirror_ns" "$read_deploy"),"writeDeploymentReady":$(deploy_ready "$gitmirror_ns" "$write_deploy"),"readServiceExists":$(exists_res "$gitmirror_ns" service "$read_svc"),"writeServiceExists":$(exists_res "$gitmirror_ns" service "$write_svc"),"readEndpointsReady":$(endpoint_ready "$gitmirror_ns" "$read_svc"),"writeEndpointsReady":$(endpoint_ready "$gitmirror_ns" "$write_svc"),"cachePvcExists":$(exists_res "$gitmirror_ns" pvc "$cache_pvc"),"cacheHostPath":"$cache_host_path","cacheHostPathReady":$cache_host_path_ready,"summary":{"localSource":null,"githubSource":null,"localGitops":null,"githubGitops":null,"pendingFlush":null,"flushNeeded":null,"githubInSync":null}},"argo":{"namespace":"$argo_ns","namespaceExists":$(exists_ns "$argo_ns"),"installed":$(kubectl get crd applications.argoproj.io appprojects.argoproj.io >/dev/null 2>&1 && printf true || printf false),"projectExists":$(kubectl -n "$argo_ns" get appproject "$argo_project" >/dev/null 2>&1 && printf true || printf false),"applicationExists":$(kubectl -n "$argo_ns" get application "$argo_app" >/dev/null 2>&1 && printf true || printf false),"install":$argo_fragment},"registry":{"endpoint":"$registry","ready":$registry_ready,"toolsImage":"$tools_image","toolsImageReady":$tools_image_ready},"runtimeNamespace":{"name":"$runtime_ns","exists":$(exists_ns "$runtime_ns")}}}
JSON
`;
}
function applyScript(yaml: string): string {
function applyScript(yaml: string, node: ControlPlaneNodeSpec, target: ControlPlaneTargetSpec): string {
const encoded = Buffer.from(yaml, "utf8").toString("base64");
return `
set +e
manifest=$(mktemp /tmp/hwlab-node-infra.XXXXXX.yaml)
printf %s ${shQuote(encoded)} | base64 -d >"$manifest"
kubectl apply --server-side --field-manager=unidesk-hwlab-node-control-plane -f "$manifest" >/tmp/hwlab-node-infra-apply.out 2>/tmp/hwlab-node-infra-apply.err
rc=$?
python3 - "$rc" <<'PY'
kubectl_rc=$?
${k3sApplyScriptFragment(node.k3s, target)}
python3 - "$kubectl_rc" "$k3s_report_file" <<'PY'
import json, pathlib, sys
k3s_report = {}
try:
k3s_report = json.loads(pathlib.Path(sys.argv[2]).read_text(errors='replace'))
except Exception as exc:
k3s_report = {"managed": None, "ok": False, "parseError": str(exc)}
out=pathlib.Path('/tmp/hwlab-node-infra-apply.out').read_text(errors='replace') if pathlib.Path('/tmp/hwlab-node-infra-apply.out').exists() else ''
err=pathlib.Path('/tmp/hwlab-node-infra-apply.err').read_text(errors='replace') if pathlib.Path('/tmp/hwlab-node-infra-apply.err').exists() else ''
print(json.dumps({'applyExitCode': int(sys.argv[1]), 'stdoutPreview': out[-2000:], 'stderrPreview': err[-2000:], 'runtimeRolloutTriggered': False, 'pk01Touched': False}, ensure_ascii=False))
print(json.dumps({'k3sNodeConfig': k3s_report, 'kubernetesApply': {'applyExitCode': int(sys.argv[1]), 'stdoutPreview': out[-2000:], 'stderrPreview': err[-2000:], 'runtimeRolloutTriggered': False, 'pk01Touched': False}}, ensure_ascii=False))
PY
rm -f "$manifest"
exit "$rc"
if [ "$kubectl_rc" != 0 ]; then exit "$kubectl_rc"; fi
exit "$k3s_rc"
`;
}
function k3sApplyScriptFragment(spec: ControlPlaneK3sNodeSpec | null, target: ControlPlaneTargetSpec): string {
if (spec === null) {
return `
k3s_report_file=$(mktemp /tmp/hwlab-node-k3s.XXXXXX.json)
printf '{"managed":false,"ok":true,"mutation":false}\\n' >"$k3s_report_file"
k3s_rc=0
`;
}
const content = k3sDropInContent(spec);
const encoded = Buffer.from(content, "utf8").toString("base64");
return `
k3s_report_file=$(mktemp /tmp/hwlab-node-k3s.XXXXXX.json)
k3s_service=${shQuote(spec.serviceName)}
k3s_dropin=${shQuote(spec.dropInPath)}
k3s_node=${shQuote(spec.nodeStatusName)}
k3s_namespace=${shQuote(target.ciNamespace)}
k3s_image=${shQuote(target.tekton.toolsImage.output)}
k3s_desired_max_pods=${shQuote(String(spec.kubelet.maxPods))}
k3s_expected_sha=${shQuote(sha256Short(content))}
k3s_before_capacity=$(kubectl get node "$k3s_node" -o 'jsonpath={.status.capacity.pods}' 2>/dev/null || true)
k3s_before_allocatable=$(kubectl get node "$k3s_node" -o 'jsonpath={.status.allocatable.pods}' 2>/dev/null || true)
capacity_restart=false
if [ "$k3s_before_capacity" != "$k3s_desired_max_pods" ] || [ "$k3s_before_allocatable" != "$k3s_desired_max_pods" ]; then capacity_restart=true; fi
k3s_current_dropin_sha=
if [ -f "$k3s_dropin" ]; then k3s_current_dropin_sha=$(sha256sum "$k3s_dropin" | awk '{print "sha256:"$1}'); fi
if [ "$k3s_current_dropin_sha" = "$k3s_expected_sha" ] && [ "$capacity_restart" = false ]; then
python3 - "$k3s_current_dropin_sha" "$k3s_expected_sha" "$k3s_service" "$k3s_dropin" "$k3s_node" "$k3s_desired_max_pods" "$k3s_before_capacity" "$k3s_before_allocatable" <<'PY' >"$k3s_report_file"
import json, sys
dropin_sha, expected_sha, service, dropin, node_name, desired, before_capacity, before_allocatable = sys.argv[1:9]
print(json.dumps({
"managed": True,
"ok": True,
"mutation": False,
"applyMode": "noop",
"completionPending": False,
"serviceName": service,
"dropInPath": dropin,
"dropInSha256": dropin_sha,
"expectedDropInSha256": expected_sha,
"dropInMatches": dropin_sha == expected_sha,
"nodeName": node_name,
"desiredMaxPods": int(desired),
"beforeCapacityPods": int(before_capacity) if before_capacity.isdigit() else None,
"beforeAllocatablePods": int(before_allocatable) if before_allocatable.isdigit() else None,
}, ensure_ascii=False))
PY
k3s_rc=0
else
k3s_job="hwlab-node-k3s-config-$(date +%s)"
k3s_job_manifest=$(mktemp /tmp/hwlab-node-k3s-job.XXXXXX.json)
k3s_host_script=$(mktemp /tmp/hwlab-node-k3s-host.XXXXXX.sh)
k3s_job_apply_stdout=/tmp/hwlab-node-k3s-job-apply.out
k3s_job_apply_stderr=/tmp/hwlab-node-k3s-job-apply.err
k3s_docker_stdout=/tmp/hwlab-node-k3s-docker.out
k3s_docker_stderr=/tmp/hwlab-node-k3s-docker.err
k3s_host_report="/tmp/$k3s_job-report.json"
rm -f "$k3s_host_report"
python3 - "$k3s_job_manifest" "$k3s_host_script" "$k3s_job" "$k3s_namespace" "$k3s_image" "$k3s_dropin" ${shQuote(encoded)} "$k3s_service" "$k3s_desired_max_pods" "$k3s_expected_sha" "$capacity_restart" "$k3s_host_report" <<'PY'
import json, os, shlex, sys
manifest_path, host_script_path, job, namespace, image, dropin, encoded, service, desired, expected_sha, capacity_restart, report_path = sys.argv[1:13]
script = f"""#!/bin/sh
set -eu
expected=/tmp/unidesk-k3s-dropin.conf
printf %s {shlex.quote(encoded)} | base64 -d > "$expected"
host_dropin=/host{shlex.quote(dropin)}
host_report=/host{shlex.quote(report_path)}
mkdir -p "$(dirname "$host_dropin")"
before_sha=
if [ -f "$host_dropin" ]; then before_sha=$(sha256sum "$host_dropin" | awk '{{print "sha256:"$1}}'); fi
changed=false
if ! cmp -s "$expected" "$host_dropin" 2>/dev/null; then
cp "$expected" "$host_dropin"
chown 0:0 "$host_dropin" 2>/dev/null || true
chmod 0644 "$host_dropin"
changed=true
fi
nsenter_path=$(command -v nsenter || true)
host_systemctl() {{
if command -v chroot >/dev/null 2>&1 && [ -x /host/usr/bin/systemctl ]; then
chroot /host /usr/bin/systemctl "$@"
return $?
fi
if [ -n "$nsenter_path" ]; then
"$nsenter_path" -t 1 -m -u -i -n -p -- /usr/bin/systemctl "$@"
return $?
fi
return 127
}}
daemon_reload_rc=0
restart_rc=0
restarted=false
if command -v chroot >/dev/null 2>&1 || [ -n "$nsenter_path" ]; then
host_systemctl daemon-reload || daemon_reload_rc=$?
if [ "$changed" = true ] || [ {shlex.quote(capacity_restart)} = true ]; then
restarted=true
host_systemctl restart {shlex.quote(service)} || restart_rc=$?
fi
else
daemon_reload_rc=127
restart_rc=127
fi
after_sha=
if [ -f "$host_dropin" ]; then after_sha=$(sha256sum "$host_dropin" | awk '{{print "sha256:"$1}}'); fi
service_active=unknown
if command -v chroot >/dev/null 2>&1 || [ -n "$nsenter_path" ]; then service_active=$(host_systemctl is-active {shlex.quote(service)} 2>/dev/null || true); fi
python3 - "$changed" "$restarted" "$daemon_reload_rc" "$restart_rc" "$before_sha" "$after_sha" "$service_active" "$nsenter_path" <<'REPORT' >"$host_report"
import json, sys
changed, restarted = sys.argv[1] == "true", sys.argv[2] == "true"
daemon_reload_rc, restart_rc = int(sys.argv[3] or "0"), int(sys.argv[4] or "0")
print(json.dumps({{
"jobChanged": changed,
"jobRestarted": restarted,
"daemonReloadExitCode": daemon_reload_rc,
"restartExitCode": restart_rc,
"beforeDropInSha256": sys.argv[5] or None,
"dropInSha256": sys.argv[6] or None,
"expectedDropInSha256": {json.dumps(expected_sha)},
"dropInMatches": sys.argv[6] == {json.dumps(expected_sha)},
"serviceActiveText": sys.argv[7] or None,
"nsenterPresent": bool(sys.argv[8]),
}}))
REPORT
chmod 0644 "$host_report" 2>/dev/null || true
cat "$host_report"
"""
with open(host_script_path, "w", encoding="utf-8") as handle:
handle.write(script)
os.chmod(host_script_path, 0o755)
manifest = {
"apiVersion": "batch/v1",
"kind": "Job",
"metadata": {"name": job, "namespace": namespace, "labels": {"app.kubernetes.io/part-of": "hwlab-node-control-plane", "unidesk.ai/operation": "k3s-node-config"}},
"spec": {
"backoffLimit": 0,
"ttlSecondsAfterFinished": 300,
"template": {
"metadata": {"labels": {"app.kubernetes.io/part-of": "hwlab-node-control-plane", "unidesk.ai/operation": "k3s-node-config"}},
"spec": {
"restartPolicy": "Never",
"hostPID": True,
"hostNetwork": True,
"containers": [{
"name": "apply-k3s-node-config",
"image": image,
"imagePullPolicy": "IfNotPresent",
"securityContext": {"privileged": True},
"command": ["/bin/sh", "-lc", script],
"volumeMounts": [{"name": "host-root", "mountPath": "/host"}],
}],
"volumes": [{"name": "host-root", "hostPath": {"path": "/", "type": "Directory"}}],
},
},
},
}
with open(manifest_path, "w", encoding="utf-8") as handle:
json.dump(manifest, handle)
PY
k3s_render_rc=$?
if [ "$k3s_render_rc" != 0 ]; then
python3 - "$k3s_render_rc" "$k3s_expected_sha" "$k3s_service" "$k3s_dropin" "$k3s_node" "$k3s_desired_max_pods" <<'PY' >"$k3s_report_file"
import json, sys
render_rc = int(sys.argv[1] or "1")
expected_sha, service, dropin, node_name, desired = sys.argv[2:7]
print(json.dumps({
"managed": True,
"ok": False,
"mutation": False,
"renderExitCode": render_rc,
"serviceName": service,
"dropInPath": dropin,
"expectedDropInSha256": expected_sha,
"nodeName": node_name,
"desiredMaxPods": int(desired),
}, ensure_ascii=False))
PY
k3s_rc=$k3s_render_rc
else
kubectl apply -f "$k3s_job_manifest" >"$k3s_job_apply_stdout" 2>"$k3s_job_apply_stderr"
k3s_job_apply_rc=$?
k3s_apply_mode=kubernetes-job
k3s_docker_rc=127
if [ "$k3s_job_apply_rc" != 0 ] && command -v docker >/dev/null 2>&1; then
k3s_apply_mode=docker-host-fallback
docker run --rm --privileged --pid=host --network=host -v /:/host --entrypoint /bin/sh "$k3s_image" "/host$k3s_host_script" >"$k3s_docker_stdout" 2>"$k3s_docker_stderr"
k3s_docker_rc=$?
fi
k3s_submit_rc=$k3s_job_apply_rc
if [ "$k3s_job_apply_rc" != 0 ] && [ "$k3s_docker_rc" = 0 ]; then k3s_submit_rc=0; fi
python3 - "$k3s_submit_rc" "$k3s_job_apply_rc" "$k3s_docker_rc" "$k3s_apply_mode" "$k3s_before_capacity" "$k3s_before_allocatable" "$k3s_expected_sha" "$k3s_service" "$k3s_dropin" "$k3s_node" "$k3s_desired_max_pods" "$k3s_job" "$k3s_namespace" "$k3s_host_report" "$k3s_job_apply_stdout" "$k3s_job_apply_stderr" "$k3s_docker_stdout" "$k3s_docker_stderr" <<'PY' >"$k3s_report_file"
import json, pathlib, sys
submit_rc, job_apply_rc, docker_rc = [int(value or "0") for value in sys.argv[1:4]]
apply_mode = sys.argv[4]
before_capacity, before_allocatable = sys.argv[5:7]
expected_sha, service, dropin, node_name, desired, job_name, namespace, host_report = sys.argv[7:15]
def read(path):
return pathlib.Path(path).read_text(errors='replace') if pathlib.Path(path).exists() else ''
try:
host_report_data = json.loads(read(host_report) or "{}")
except Exception:
host_report_data = {}
apply_ok = submit_rc == 0
print(json.dumps({
"managed": True,
"ok": apply_ok,
"mutation": apply_ok,
"completionPending": apply_ok and apply_mode == "kubernetes-job",
"applyMode": apply_mode,
"jobName": job_name,
"namespace": namespace,
"jobApplyExitCode": job_apply_rc,
"dockerFallbackExitCode": docker_rc,
"serviceName": service,
"dropInPath": dropin,
"dropInSha256": host_report_data.get("dropInSha256"),
"expectedDropInSha256": expected_sha,
"dropInMatches": host_report_data.get("dropInSha256") == expected_sha if host_report_data else None,
"daemonReloadExitCode": host_report_data.get("daemonReloadExitCode"),
"restartExitCode": host_report_data.get("restartExitCode"),
"serviceActive": host_report_data.get("serviceActiveText") == "active" if host_report_data else None,
"nodeName": node_name,
"desiredMaxPods": int(desired),
"beforeCapacityPods": int(before_capacity) if before_capacity.isdigit() else None,
"beforeAllocatablePods": int(before_allocatable) if before_allocatable.isdigit() else None,
"hostReportPath": host_report,
"statusCommand": f"bun scripts/cli.ts hwlab nodes control-plane infra status --node {node_name.upper()} --lane ${target.lane}",
"jobCompletionCommand": f"kubectl -n {namespace} wait --for=condition=complete job/{job_name} --timeout=120s",
"jobLogsCommand": f"kubectl -n {namespace} logs job/{job_name} --tail=120",
"jobApplyStdoutPreview": read(sys.argv[15])[-1000:],
"jobApplyStderrPreview": read(sys.argv[16])[-1000:],
"dockerStdoutPreview": read(sys.argv[17])[-1000:],
"dockerStderrPreview": read(sys.argv[18])[-1000:],
}, ensure_ascii=False))
PY
k3s_rc=$k3s_submit_rc
fi
rm -f "$k3s_job_manifest" "$k3s_host_script"
fi
`;
}
@@ -1499,6 +1916,7 @@ function statusNext(
gitMirror: Record<string, unknown>,
argo: Record<string, unknown>,
ciNamespace: Record<string, unknown>,
k3sNodeConfig: Record<string, unknown>,
): Record<string, unknown> {
const bootstrapMissing = !boolField(ciNamespace, "exists")
|| !boolField(gitMirror, "namespaceExists")
@@ -1506,6 +1924,7 @@ function statusNext(
|| !boolField(gitMirror, "writeServiceExists")
|| (!boolField(gitMirror, "cachePvcExists") && !boolField(gitMirror, "cacheHostPathReady"));
const blockers: string[] = [];
if (node.k3s !== null && !boolField(k3sNodeConfig, "ready")) blockers.push("k3s-node-config-not-applied");
if (!boolField(registry, "ready")) blockers.push("node-local-registry-not-ready");
if (!boolField(registry, "toolsImageReady")) blockers.push("tools-image-missing");
if (bootstrapMissing) blockers.push("control-plane-bootstrap-missing");
@@ -1530,6 +1949,9 @@ function statusNext(
if (!boolField(argo, "installed")) {
next.installArgo = "准备受控 D601 Argo CD 安装入口后再进入 runtime rollout。";
}
if (node.k3s !== null && !boolField(k3sNodeConfig, "ready")) {
next.applyK3sNodeConfig = `bun scripts/cli.ts hwlab nodes control-plane infra apply --node ${node.id} --lane ${target.lane} --confirm`;
}
if (bootstrapMissing) next.applyBootstrap = `bun scripts/cli.ts hwlab nodes control-plane infra apply --node ${node.id} --lane ${target.lane} --confirm`;
else next.reapplyBootstrap = `bun scripts/cli.ts hwlab nodes control-plane infra apply --node ${node.id} --lane ${target.lane} --confirm`;
return next;
@@ -1900,6 +2322,10 @@ function boolField(obj: Record<string, unknown>, key: string): boolean {
return obj[key] === true;
}
function numberValue(value: unknown): number | null {
return typeof value === "number" && Number.isFinite(value) ? value : null;
}
function requiredOption(args: string[], name: string): string {
const index = args.indexOf(name);
if (index === -1) throw new Error(`${name} is required`);