diff --git a/config/hwlab-node-lanes.yaml b/config/hwlab-node-lanes.yaml index eff44e4b..328304f5 100644 --- a/config/hwlab-node-lanes.yaml +++ b/config/hwlab-node-lanes.yaml @@ -93,6 +93,7 @@ lanes: argoApplicationFile: application-v03.yaml registryPrefix: 127.0.0.1:5000/hwlab baseImage: 127.0.0.1:5000/hwlab/hwlab-node20-base:20-bookworm-slim + baseImageSource: node:20-bookworm-slim serviceIds: - hwlab-cloud-api - hwlab-user-billing diff --git a/scripts/src/hwlab-node.ts b/scripts/src/hwlab-node.ts index 8971435d..44226acf 100644 --- a/scripts/src/hwlab-node.ts +++ b/scripts/src/hwlab-node.ts @@ -168,6 +168,8 @@ export function hwlabNodeHelp(): Record { "bun scripts/cli.ts hwlab nodes control-plane status --node G14 --lane v03", "bun scripts/cli.ts hwlab nodes control-plane apply --node G14 --lane v03 --dry-run", "bun scripts/cli.ts hwlab nodes control-plane refresh --node G14 --lane v03 --confirm", + "bun scripts/cli.ts hwlab nodes control-plane runtime-image status --node G14 --lane v03", + "bun scripts/cli.ts hwlab nodes control-plane runtime-image preload --node G14 --lane v03 --confirm", "bun scripts/cli.ts hwlab nodes control-plane sync --node D601 --lane v03 --confirm", "bun scripts/cli.ts hwlab nodes control-plane public-exposure --node D601 --lane v03 --dry-run", "bun scripts/cli.ts hwlab nodes control-plane public-exposure --node D601 --lane v03 --confirm", @@ -207,6 +209,9 @@ async function runNodeDelegatedDomain(config: Config, domain: DelegatedNodeDomai spec: scoped.spec, }); } + if (domain === "control-plane" && scoped.action === "runtime-image") { + return nodeRuntimeBaseImageCommand(scoped); + } if (domain === "control-plane" && scoped.action === "plan") { return nodeRuntimeControlPlanePlan(scoped); } @@ -243,6 +248,7 @@ async function runNodeDelegatedDomain(config: Config, domain: DelegatedNodeDomai function parseNodeScopedDelegatedOptions(domain: DelegatedNodeDomain, args: string[]): { domain: DelegatedNodeDomain; action: string; + runtimeImageAction: string | null; node: string; lane: HwlabRuntimeLane; confirm: boolean; @@ -256,6 +262,7 @@ function parseNodeScopedDelegatedOptions(domain: DelegatedNodeDomain, args: stri } { const [actionRaw] = args; if (typeof actionRaw !== "string" || actionRaw.startsWith("--")) throw new Error(`${domain} usage: ${domain} ACTION --node NODE --lane vNN [--dry-run|--confirm]`); + const runtimeImageAction = actionRaw === "runtime-image" && typeof args[1] === "string" && !args[1].startsWith("--") ? args[1] : null; const node = requiredOption(args, "--node"); assertNodeId(node); const laneRaw = requiredOption(args, "--lane"); @@ -267,6 +274,7 @@ function parseNodeScopedDelegatedOptions(domain: DelegatedNodeDomain, args: stri return { domain, action: actionRaw, + runtimeImageAction, node, lane: laneRaw, confirm, @@ -444,7 +452,7 @@ function nodeRuntimeUnsupportedAction(scoped: ReturnType): Record { + const action = scoped.runtimeImageAction; + if (action === null) { + return { + ok: false, + command: `hwlab nodes control-plane runtime-image --node ${scoped.node} --lane ${scoped.lane}`, + node: scoped.node, + lane: scoped.lane, + mutation: false, + degradedReason: "node-runtime-image-action-missing", + message: "runtime-image requires one of: status, preload, build", + expected: nodeRuntimeExpected(scoped.spec), + }; + } + if (action !== "status" && action !== "preload" && action !== "build") { + return { + ok: false, + command: `hwlab nodes control-plane runtime-image ${action} --node ${scoped.node} --lane ${scoped.lane}`, + node: scoped.node, + lane: scoped.lane, + mutation: false, + degradedReason: "unsupported-node-runtime-image-action", + message: "runtime-image currently supports status/preload/build", + expected: nodeRuntimeExpected(scoped.spec), + }; + } + const statusBefore = nodeRuntimeBaseImageStatus(scoped.spec, scoped.timeoutSeconds); + if (action === "status") { + return { + ok: statusBefore.ok, + command: `hwlab nodes control-plane runtime-image status --node ${scoped.node} --lane ${scoped.lane}`, + node: scoped.node, + lane: scoped.lane, + mode: "status", + mutation: false, + status: statusBefore, + degradedReason: statusBefore.ok ? undefined : "node-runtime-base-image-not-ready", + next: statusBefore.ok ? undefined : { + preload: `bun scripts/cli.ts hwlab nodes control-plane runtime-image preload --node ${scoped.node} --lane ${scoped.lane} --confirm`, + }, + }; + } + if (!scoped.confirm && !scoped.dryRun) throw new Error("control-plane runtime-image preload/build requires --dry-run or --confirm"); + const preload = ensureNodeBaseImage(scoped.spec, scoped.dryRun, scoped.timeoutSeconds); + const statusAfter = nodeRuntimeBaseImageStatus(scoped.spec, scoped.timeoutSeconds); + return { + ok: preload !== null && preload.ok === true && (scoped.dryRun || statusAfter.ok === true), + command: `hwlab nodes control-plane runtime-image ${action} --node ${scoped.node} --lane ${scoped.lane}`, + node: scoped.node, + lane: scoped.lane, + mode: scoped.dryRun ? "dry-run" : "confirmed-preload", + requestedAction: action, + effectiveAction: "preload", + mutation: !scoped.dryRun && preload !== null && preload.ok === true, + statusBefore, + preload, + statusAfter, + degradedReason: preload === null + ? "node-runtime-base-image-source-missing" + : preload.ok === true && (scoped.dryRun || statusAfter.ok === true) + ? undefined + : "node-runtime-base-image-seed-failed", + next: scoped.dryRun + ? { preload: `bun scripts/cli.ts hwlab nodes control-plane runtime-image preload --node ${scoped.node} --lane ${scoped.lane} --confirm` } + : { triggerCurrent: `bun scripts/cli.ts hwlab nodes control-plane trigger-current --node ${scoped.node} --lane ${scoped.lane} --confirm --rerun` }, + }; +} + function nodeRuntimeMigration(scoped: ReturnType): Record { const spec = scoped.spec; if (scoped.allowLiveDbRead && scoped.confirm) throw new Error("control-plane runtime-migration accepts --allow-live-db-read only with dry-run/source-check mode, not --confirm"); @@ -2948,6 +3024,48 @@ function syncNodeExternalPostgresSecrets(spec: HwlabRuntimeLaneSpec, dryRun: boo }; } +function nodeRuntimeBaseImageStatus(spec: HwlabRuntimeLaneSpec, timeoutSeconds: number): Record { + const source = spec.baseImageSource ?? ""; + const script = [ + "set -eu", + `target=${shellQuote(spec.baseImage)}`, + `source=${shellQuote(source)}`, + "repo_tag=${target#*/}", + "repo=${repo_tag%:*}", + "tag=${repo_tag##*:}", + "if [ \"$repo\" = \"$repo_tag\" ]; then tag=latest; fi", + "registry_url=\"http://127.0.0.1:5000/v2/$repo/tags/list\"", + "registry_present=false", + "if curl -fsS \"$registry_url\" 2>/dev/null | grep -q '\"'\"$tag\"'\"'; then registry_present=true; fi", + "target_present=false", + "if docker image inspect \"$target\" >/dev/null 2>&1; then target_present=true; fi", + "source_present=false", + "if [ -n \"$source\" ] && docker image inspect \"$source\" >/dev/null 2>&1; then source_present=true; fi", + "printf 'target\\t%s\\n' \"$target\"", + "printf 'source\\t%s\\n' \"$source\"", + "printf 'sourceConfigured\\t%s\\n' \"$([ -n \"$source\" ] && printf true || printf false)\"", + "printf 'registryUrl\\t%s\\n' \"$registry_url\"", + "printf 'registryTagPresent\\t%s\\n' \"$registry_present\"", + "printf 'targetImagePresent\\t%s\\n' \"$target_present\"", + "printf 'sourceImagePresent\\t%s\\n' \"$source_present\"", + ].join("\n"); + const result = runNodeHostScript(spec, script, Math.min(timeoutSeconds, 300)); + const fields = keyValueLinesFromText(statusText(result)); + const sourceConfigured = fields.sourceConfigured === "true"; + const registryTagPresent = fields.registryTagPresent === "true"; + return { + ok: isCommandSuccess(result) && sourceConfigured && registryTagPresent, + target: fields.target ?? spec.baseImage, + source: fields.source || null, + sourceConfigured, + registryUrl: fields.registryUrl ?? null, + registryTagPresent, + targetImagePresent: fields.targetImagePresent === "true", + sourceImagePresent: fields.sourceImagePresent === "true", + result: compactRuntimeCommand(result), + }; +} + function ensureNodeBaseImage(spec: HwlabRuntimeLaneSpec, dryRun: boolean, timeoutSeconds: number): Record | null { if (spec.baseImageSource === undefined) return null; const script = [ @@ -2955,6 +3073,7 @@ function ensureNodeBaseImage(spec: HwlabRuntimeLaneSpec, dryRun: boolean, timeou `target=${shellQuote(spec.baseImage)}`, `source=${shellQuote(spec.baseImageSource)}`, `dry_run=${shellQuote(dryRun ? "true" : "false")}`, + `pull_retries=${shellQuote(String(Math.max(1, spec.downloadProfile.docker.pullRetries)))}`, "repo_tag=${target#*/}", "repo=${repo_tag%:*}", "tag=${repo_tag##*:}", @@ -2966,6 +3085,19 @@ function ensureNodeBaseImage(spec: HwlabRuntimeLaneSpec, dryRun: boolean, timeou "if [ \"$present\" = false ]; then", " action=seed", " if [ \"$dry_run\" = false ]; then", + " source_present_before=true", + " if ! docker image inspect \"$source\" >/dev/null 2>&1; then", + " source_present_before=false", + " attempt=1", + " pulled_source=false", + " while [ \"$attempt\" -le \"$pull_retries\" ]; do", + " pull_attempts=$attempt", + " if docker pull \"$source\" >/tmp/hwlab-node-base-image-pull.out 2>&1; then pulled_source=true; break; fi", + " attempt=$((attempt + 1))", + " sleep 2", + " done", + " if [ \"$pulled_source\" != true ]; then cat /tmp/hwlab-node-base-image-pull.out >&2 2>/dev/null || true; fi", + " fi", " docker image inspect \"$source\" >/dev/null", " docker tag \"$source\" \"$target\"", " docker push \"$target\" >/tmp/hwlab-node-base-image-push.out", @@ -2979,6 +3111,10 @@ function ensureNodeBaseImage(spec: HwlabRuntimeLaneSpec, dryRun: boolean, timeou "printf 'presentBefore\\t%s\\n' \"$present\"", "printf 'presentAfter\\t%s\\n' \"$after\"", "printf 'action\\t%s\\n' \"$action\"", + "printf 'pullRetries\\t%s\\n' \"$pull_retries\"", + "printf 'sourcePresentBefore\\t%s\\n' \"${source_present_before:-unknown}\"", + "printf 'pulledSource\\t%s\\n' \"${pulled_source:-false}\"", + "printf 'pullAttempts\\t%s\\n' \"${pull_attempts:-0}\"", "if [ \"$dry_run\" = true ] || [ \"$after\" = true ]; then exit 0; fi", "exit 1", ].join("\n"); @@ -2992,6 +3128,10 @@ function ensureNodeBaseImage(spec: HwlabRuntimeLaneSpec, dryRun: boolean, timeou presentBefore: fields.presentBefore === "true", presentAfter: fields.presentAfter === "true", action: fields.action ?? null, + pullRetries: numericField(fields.pullRetries), + sourcePresentBefore: fields.sourcePresentBefore ?? null, + pulledSource: fields.pulledSource === "true", + pullAttempts: numericField(fields.pullAttempts), result: compactRuntimeCommand(result), }; }