From 296398625112bd7ea1a441c3cfc6f5ae3accc988 Mon Sep 17 00:00:00 2001 From: Lyon <88232613+pikasTech@users.noreply.github.com> Date: Thu, 25 Jun 2026 23:39:31 +0800 Subject: [PATCH] feat: publish web probe sentinel from cicd (#902) Co-authored-by: Codex --- .../cicd.d601-v03.yaml | 15 + scripts/src/hwlab-node-impl.ts | 2 +- scripts/src/hwlab-node-web-sentinel-cicd.ts | 628 +++++++++++++++++- 3 files changed, 628 insertions(+), 17 deletions(-) diff --git a/config/hwlab-web-probe-sentinel/cicd.d601-v03.yaml b/config/hwlab-web-probe-sentinel/cicd.d601-v03.yaml index f95cacf0..6c8be427 100644 --- a/config/hwlab-web-probe-sentinel/cicd.d601-v03.yaml +++ b/config/hwlab-web-probe-sentinel/cicd.d601-v03.yaml @@ -10,9 +10,24 @@ sentinel: source: repository: pikasTech/unidesk branch: master + gitSshUrl: ssh://git@ssh.github.com:443/pikasTech/unidesk.git gitMirrorReadUrl: http://git-mirror-http.devops-infra.svc.cluster.local:8080/pikasTech/unidesk.git buildContext: . entrypoint: scripts/web-probe-sentinel-service.ts + checkoutPaths: + - scripts + - config + - package.json + - bun.lock + - bun.lockb + builder: + namespace: devops-infra + sourceMode: sparse-git-checkout + jobPrefix: web-probe-sentinel-publish + gitSshSecretName: git-mirror-github-ssh + dockerSocketPath: /var/run/docker.sock + activeDeadlineSeconds: 900 + ttlSecondsAfterFinished: 3600 gitopsPath: deploy/gitops/node/d601/web-probe-sentinel argo: namespace: argocd diff --git a/scripts/src/hwlab-node-impl.ts b/scripts/src/hwlab-node-impl.ts index ab550f18..80bf094d 100644 --- a/scripts/src/hwlab-node-impl.ts +++ b/scripts/src/hwlab-node-impl.ts @@ -7489,7 +7489,7 @@ function parseNodeWebProbeSentinelOptions(args: string[]): NodeWebProbeSentinelO const confirm = args.includes("--confirm"); const dryRun = args.includes("--dry-run"); if (confirm && dryRun) throw new Error("web-probe sentinel accepts only one of --confirm or --dry-run"); - const timeoutSeconds = positiveIntegerOption(args, "--timeout-seconds", 60, 3600); + const timeoutSeconds = positiveIntegerOption(args, "--timeout-seconds", 900, 3600); let sentinel: WebProbeSentinelOptions; if (sentinelActionRaw === "plan" || sentinelActionRaw === "status") { sentinel = { kind: "config", action: sentinelActionRaw, node, lane, dryRun }; diff --git a/scripts/src/hwlab-node-web-sentinel-cicd.ts b/scripts/src/hwlab-node-web-sentinel-cicd.ts index 6f41dd36..5c2003b5 100644 --- a/scripts/src/hwlab-node-web-sentinel-cicd.ts +++ b/scripts/src/hwlab-node-web-sentinel-cicd.ts @@ -4,6 +4,7 @@ import { createHash } from "node:crypto"; import { existsSync, readFileSync } from "node:fs"; import { repoRoot, rootPath } from "./config"; import { runCommand, type CommandResult } from "./command"; +import { startJob } from "./jobs"; import { webProbeSentinelConfigPlan, withWebProbeSentinelConfigRendered } from "./hwlab-node-web-sentinel-config"; import type { HwlabRuntimeLaneSpec } from "./hwlab-node-lanes"; import type { RenderedCliResult } from "./output"; @@ -77,6 +78,26 @@ interface SentinelImagePlan { readonly dockerfilePreview: string; } +interface SentinelObservedStatus { + readonly sourceMirror: Record; + readonly registry: Record; + readonly gitMirror: Record; + readonly argo: Record; + readonly runtime: Record; +} + +interface SentinelRemoteJobResult { + readonly ok: boolean; + readonly phase: string; + readonly jobName: string; + readonly payload: Record; + readonly polls?: number; + readonly elapsedMs?: number; + readonly create?: Record; + readonly probe?: Record; + readonly valuesRedacted: true; +} + interface CompactCommandResult { readonly exitCode: number | null; readonly timedOut: boolean; @@ -97,11 +118,14 @@ export function runWebProbeSentinelCommand(spec: HwlabRuntimeLaneSpec, options: function runSentinelImage(state: SentinelCicdState, options: Extract): RenderedCliResult { const command = `hwlab nodes web-probe sentinel image ${options.action}`; + if (options.action === "build" && options.confirm) { + if (!options.wait) return renderAsyncSentinelJob(state, "image", "build", options.timeoutSeconds); + return runSentinelImageBuildConfirmed(state, options); + } const registry = options.action === "status" ? probeImageRegistry(state, options.timeoutSeconds) : null; - const mutationBlocked = options.confirm ? confirmBlocked("image build", state) : null; const registryReady = options.action !== "status" || record(registry?.probe).present === true; const result = { - ok: state.configReady && state.sourceHead.ok && registryReady && mutationBlocked === null, + ok: state.configReady && state.sourceHead.ok && registryReady, command, node: state.spec.nodeId, lane: state.spec.lane, @@ -111,10 +135,11 @@ function runSentinelImage(state: SentinelCicdState, options: Extract): RenderedCliResult { const command = `hwlab nodes web-probe sentinel control-plane ${options.action}`; const mutationAction = options.action === "apply" || options.action === "trigger-current"; - const mutationBlocked = options.confirm && mutationAction ? confirmBlocked(options.action, state) : null; - const gitMirrorStatus = options.action === "status" ? runChildCli(["hwlab", "nodes", "git-mirror", "status", "--node", state.spec.nodeId, "--lane", state.spec.lane], options.timeoutSeconds) : null; - const nodeControlPlaneStatus = options.action === "status" ? runChildCli(["hwlab", "nodes", "control-plane", "status", "--node", state.spec.nodeId, "--lane", state.spec.lane], options.timeoutSeconds) : null; - const observedReady = options.action !== "status" || (record(gitMirrorStatus).ok === true && record(nodeControlPlaneStatus).ok === true); + if (options.confirm && mutationAction) { + if (!options.wait) return renderAsyncSentinelJob(state, "control-plane", options.action, options.timeoutSeconds); + return runSentinelControlPlaneConfirmed(state, options); + } + const observed = options.action === "status" ? collectSentinelObservedStatus(state, options.timeoutSeconds) : null; + const observedReady = options.action !== "status" || sentinelObservedReady(record(observed)); const pipelineRun = sentinelPipelineRunName(state); const result = { - ok: state.configReady && state.sourceHead.ok && observedReady && mutationBlocked === null, + ok: state.configReady && state.sourceHead.ok && observedReady, command, node: state.spec.nodeId, lane: state.spec.lane, @@ -166,11 +193,8 @@ function runSentinelControlPlane(state: SentinelCicdState, options: Extract): RenderedCliResult { + const command = "hwlab nodes web-probe sentinel image build"; + const publish = runSentinelPublishJob(state, false, options.timeoutSeconds); + const registry = probeImageRegistry(state, options.timeoutSeconds); + const registryReady = record(registry.probe).present === true; + const result = { + ok: state.configReady && state.sourceHead.ok && publish.ok === true && registryReady, + command, + node: state.spec.nodeId, + lane: state.spec.lane, + mode: "confirm-wait", + mutation: true, + specRef: SPEC_REF, + source: state.sourceHead, + image: state.image, + registry, + publish, + warnings: sentinelElapsedWarnings(record(publish).elapsedMs), + blocker: null, + next: { + status: `bun scripts/cli.ts hwlab nodes web-probe sentinel image status --node ${state.spec.nodeId} --lane ${state.spec.lane}`, + controlPlaneTrigger: `bun scripts/cli.ts hwlab nodes web-probe sentinel control-plane trigger-current --node ${state.spec.nodeId} --lane ${state.spec.lane} --confirm`, + }, + valuesRedacted: true, + }; + return rendered(result.ok, command, renderImageResult(result)); +} + +function runSentinelControlPlaneConfirmed(state: SentinelCicdState, options: Extract): RenderedCliResult { + const command = `hwlab nodes web-probe sentinel control-plane ${options.action}`; + const applyOnly = options.action === "apply"; + const publish = applyOnly ? null : runSentinelPublishJob(state, true, options.timeoutSeconds); + const flush = !applyOnly && record(publish).ok === true + ? runChildCli(["hwlab", "nodes", "git-mirror", "flush", "--node", state.spec.nodeId, "--lane", state.spec.lane, "--confirm", "--wait"], options.timeoutSeconds) + : null; + const argoApply = applySentinelArgoApplication(state, options.timeoutSeconds); + const observed = waitForSentinelObservedStatus(state, options.timeoutSeconds); + const ok = state.configReady + && state.sourceHead.ok + && (applyOnly || record(publish).ok === true) + && (applyOnly || record(flush).ok === true) + && record(argoApply).ok === true + && sentinelObservedReady(observed); + const result = { + ok, + command, + node: state.spec.nodeId, + lane: state.spec.lane, + mode: "confirm-wait", + mutation: true, + specRef: SPEC_REF, + source: state.sourceHead, + image: state.image, + pipelineRun: sentinelPipelineRunName(state), + gitops: { + path: stringAt(state.cicd, "gitopsPath"), + targetRevision: stringAt(state.cicd, "argo.targetRevision"), + manifestObjects: state.manifests.length, + manifestSha256: state.manifestSha256, + }, + argo: { + namespace: stringAt(state.cicd, "argo.namespace"), + projectName: stringAt(state.cicd, "argo.projectName"), + applicationName: stringAt(state.cicd, "argo.applicationName"), + }, + validation: { + scenarioId: stringAt(state.cicd, "targetValidation.scenarioId"), + maxSeconds: numberAt(state.cicd, "targetValidation.maxSeconds"), + automaticSecondPath: false, + }, + manifests: { + objects: manifestObjectSummary(state.manifests), + sha256: state.manifestSha256, + }, + publish, + flush, + argoApply, + observed, + warnings: Array.from(new Set([ + ...sentinelElapsedWarnings(record(publish).elapsedMs), + ...sentinelElapsedWarnings(record(flush).result === undefined ? null : record(record(flush).result).durationMs), + ])), + blocker: null, + next: controlPlaneNext(state, options.action), + valuesRedacted: true, + }; + return rendered(ok, command, renderControlPlaneResult(result)); +} + +function renderAsyncSentinelJob(state: SentinelCicdState, domain: "image" | "control-plane", action: string, timeoutSeconds: number): RenderedCliResult { + const args = domain === "image" + ? ["hwlab", "nodes", "web-probe", "sentinel", "image", action, "--node", state.spec.nodeId, "--lane", state.spec.lane, "--confirm", "--wait", "--timeout-seconds", String(timeoutSeconds)] + : ["hwlab", "nodes", "web-probe", "sentinel", "control-plane", action, "--node", state.spec.nodeId, "--lane", state.spec.lane, "--confirm", "--wait", "--timeout-seconds", String(timeoutSeconds)]; + const job = startJob(`hwlab_nodes_${state.spec.lane}_web_probe_sentinel_${domain}_${action}`, ["bun", "scripts/cli.ts", ...args], `Run HWLAB ${state.spec.lane} web-probe sentinel ${domain} ${action} for node ${state.spec.nodeId}`); + const command = `hwlab nodes web-probe sentinel ${domain} ${action}`; + const result = { + ok: true, + command, + node: state.spec.nodeId, + lane: state.spec.lane, + mode: "async-job", + mutation: true, + reason: "confirmed sentinel publish/build can exceed the short interactive window; use job status for bounded progress.", + job, + next: { + status: `bun scripts/cli.ts job status ${job.id} --tail-bytes 12000`, + wait: ["bun", "scripts/cli.ts", ...args].join(" "), + }, + valuesRedacted: true, + }; + return rendered(true, command, renderAsyncJobResult(result)); +} + +function collectSentinelObservedStatus(state: SentinelCicdState, timeoutSeconds: number): SentinelObservedStatus { + return { + sourceMirror: probeSourceMirror(state, timeoutSeconds), + registry: probeImageRegistry(state, timeoutSeconds), + gitMirror: runChildCli(["hwlab", "nodes", "git-mirror", "status", "--node", state.spec.nodeId, "--lane", state.spec.lane], timeoutSeconds), + argo: probeArgoApplication(state, timeoutSeconds), + runtime: probeRuntimeObjects(state, timeoutSeconds), + }; +} + +function waitForSentinelObservedStatus(state: SentinelCicdState, timeoutSeconds: number): SentinelObservedStatus { + const startedAt = Date.now(); + const timeoutMs = Math.max(30_000, Math.min(timeoutSeconds * 1000, 900_000)); + let observed = collectSentinelObservedStatus(state, timeoutSeconds); + while (!sentinelObservedReady(observed) && Date.now() - startedAt < timeoutMs) { + runCommand(["sleep", "5"], repoRoot, { timeoutMs: 6_000 }); + observed = collectSentinelObservedStatus(state, timeoutSeconds); + } + return observed; +} + +function sentinelObservedReady(value: Record | SentinelObservedStatus): boolean { + const observed = record(value); + return record(observed.sourceMirror).ok === true + && record(record(observed.registry).probe).present === true + && record(observed.gitMirror).ok === true + && record(observed.argo).ok === true + && record(observed.runtime).ok === true; +} + +function probeSourceMirror(state: SentinelCicdState, timeoutSeconds: number): Record { + const sourceMode = stringAt(state.cicd, "builder.sourceMode"); + if (sourceMode === "sparse-git-checkout") { + return { + ok: state.sourceHead.ok, + probe: { + mode: sourceMode, + commit: state.sourceHead.commit, + expectedCommit: state.sourceHead.commit, + persistentMirrorPresent: false, + source: "commit-pinned sparse checkout declared in config/hwlab-web-probe-sentinel/cicd.d601-v03.yaml#sentinel.cicd.source.checkoutPaths", + valuesRedacted: true, + }, + result: { exitCode: 0, timedOut: false, stdoutBytes: 0, stderrBytes: 0, stdoutPreview: "sourceMode=sparse-git-checkout", stderrPreview: "" }, + }; + } + const namespace = stringAt(state.cicd, "builder.namespace"); + const repository = stringAt(state.cicd, "source.repository"); + const branch = stringAt(state.cicd, "source.branch"); + const expectedCommit = state.sourceHead.commit; + const script = [ + "set +e", + `repo_path=${shellQuote(`/cache/${repository}.git`)}`, + `branch=${shellQuote(branch)}`, + `expected=${shellQuote(expectedCommit ?? "")}`, + "commit=$(kubectl -n " + shellQuote(namespace) + " exec deploy/git-mirror-http -- sh -lc \"git --git-dir=\\\"$repo_path\\\" rev-parse \\\"refs/heads/$branch\\\" 2>/dev/null\" 2>/dev/null)", + "rc=$?", + "node - \"$rc\" \"$commit\" \"$expected\" \"$repo_path\" \"$branch\" <<'NODE'", + "const [rc, commit, expected, repoPath, branch] = process.argv.slice(2);", + "const present = Number(rc) === 0 && /^[0-9a-f]{40}$/i.test(commit || '');", + "console.log(JSON.stringify({ ok: present && (!expected || commit === expected), present, commit: present ? commit : null, expectedCommit: expected || null, branch, repoPath, valuesRedacted: true }));", + "NODE", + ].join("\n"); + const result = runCommand(["trans", stringAt(state.controlPlaneNode, "kubeRoute"), "sh", "--", script], repoRoot, { timeoutMs: Math.min(timeoutSeconds, 60) * 1000 }); + return { ok: result.exitCode === 0 && parseJsonObject(result.stdout)?.ok === true, probe: parseJsonObject(result.stdout), result: compactCommand(result) }; +} + +function probeArgoApplication(state: SentinelCicdState, timeoutSeconds: number): Record { + const namespace = stringAt(state.cicd, "argo.namespace"); + const applicationName = stringAt(state.cicd, "argo.applicationName"); + const result = runCommand(["trans", stringAt(state.controlPlaneNode, "kubeRoute"), "kubectl", "-n", namespace, "get", "application", applicationName, "-o", "json"], repoRoot, { timeoutMs: Math.min(timeoutSeconds, 60) * 1000 }); + const app = parseJsonObject(result.stdout); + const status = record(app?.status); + const sync = record(status.sync); + const health = record(status.health); + const ok = result.exitCode === 0 && sync.status === "Synced" && health.status === "Healthy"; + return { + ok, + present: result.exitCode === 0, + syncStatus: sync.status ?? null, + healthStatus: health.status ?? null, + revision: sync.revision ?? null, + result: compactCommand(result), + }; +} + +function probeRuntimeObjects(state: SentinelCicdState, timeoutSeconds: number): Record { + const namespace = stringAt(state.runtime, "namespace"); + const deploymentName = stringAt(state.runtime, "deploymentName"); + const serviceName = stringAt(state.runtime, "serviceName"); + const pvcName = stringAt(state.runtime, "pvcName"); + const configMapName = `${deploymentName}-config`; + const serviceAccountName = stringAt(state.runtime, "serviceAccountName"); + const script = [ + "set +e", + `namespace=${shellQuote(namespace)}`, + `deployment=${shellQuote(deploymentName)}`, + `service=${shellQuote(serviceName)}`, + `pvc=${shellQuote(pvcName)}`, + `configmap=${shellQuote(configMapName)}`, + `serviceaccount=${shellQuote(serviceAccountName)}`, + "tmp=$(mktemp -d)", + "kubectl -n \"$namespace\" get deploy \"$deployment\" -o json >\"$tmp/deploy.json\" 2>/dev/null; echo $? >\"$tmp/deploy.rc\"", + "kubectl -n \"$namespace\" get svc \"$service\" -o json >\"$tmp/svc.json\" 2>/dev/null; echo $? >\"$tmp/svc.rc\"", + "kubectl -n \"$namespace\" get pvc \"$pvc\" -o json >\"$tmp/pvc.json\" 2>/dev/null; echo $? >\"$tmp/pvc.rc\"", + "kubectl -n \"$namespace\" get cm \"$configmap\" -o json >\"$tmp/cm.json\" 2>/dev/null; echo $? >\"$tmp/cm.rc\"", + "kubectl -n \"$namespace\" get sa \"$serviceaccount\" -o json >\"$tmp/sa.json\" 2>/dev/null; echo $? >\"$tmp/sa.rc\"", + "node - \"$tmp\" <<'NODE'", + "const fs = require('node:fs');", + "const dir = process.argv[2];", + "function rc(name){ return Number(fs.readFileSync(`${dir}/${name}.rc`, 'utf8').trim()); }", + "function json(name){ try { return JSON.parse(fs.readFileSync(`${dir}/${name}.json`, 'utf8')); } catch { return null; } }", + "const dep = json('deploy');", + "const deploymentPresent = rc('deploy') === 0;", + "const desired = Number(dep?.spec?.replicas ?? 0);", + "const ready = Number(dep?.status?.readyReplicas ?? 0);", + "const updated = Number(dep?.status?.updatedReplicas ?? 0);", + "const image = dep?.spec?.template?.spec?.containers?.[0]?.image ?? null;", + "const payload = {", + " deployment: { present: deploymentPresent, desiredReplicas: desired, readyReplicas: ready, updatedReplicas: updated, image },", + " service: { present: rc('svc') === 0 },", + " pvc: { present: rc('pvc') === 0, phase: json('pvc')?.status?.phase ?? null },", + " configMap: { present: rc('cm') === 0 },", + " serviceAccount: { present: rc('sa') === 0 },", + " valuesRedacted: true", + "};", + "payload.ok = payload.deployment.present && ready >= Math.max(1, desired) && updated >= Math.max(1, desired) && payload.service.present && payload.pvc.present && payload.pvc.phase === 'Bound' && payload.configMap.present && payload.serviceAccount.present;", + "console.log(JSON.stringify(payload));", + "NODE", + ].join("\n"); + const result = runCommand(["trans", stringAt(state.controlPlaneNode, "kubeRoute"), "sh", "--", script], repoRoot, { timeoutMs: Math.min(timeoutSeconds, 60) * 1000 }); + const probe = parseJsonObject(result.stdout); + return { ok: result.exitCode === 0 && probe?.ok === true, probe, result: compactCommand(result) }; +} + +function runSentinelPublishJob(state: SentinelCicdState, publishGitops: boolean, timeoutSeconds: number): SentinelRemoteJobResult { + const jobName = `${stringAt(state.cicd, "builder.jobPrefix")}-${Date.now().toString(36)}`.replace(/[^a-z0-9-]/giu, "-").toLowerCase().slice(0, 63); + const manifest = sentinelPublishJobManifest(state, jobName, publishGitops); + const namespace = stringAt(state.cicd, "builder.namespace"); + sentinelProgressEvent("sentinel.publish.progress", { phase: "create-job", status: "submitting", jobName, publishGitops, sourceCommit: state.sourceHead.commit, node: state.spec.nodeId, lane: state.spec.lane }); + const created = runCommand(["trans", stringAt(state.controlPlaneNode, "kubeRoute"), "sh", "--", createK8sJobScript(namespace, manifest)], repoRoot, { timeoutMs: Math.min(timeoutSeconds, 60) * 1000 }); + if (created.exitCode !== 0) { + sentinelProgressEvent("sentinel.publish.progress", { phase: "create-job", status: "failed", jobName, publishGitops, node: state.spec.nodeId, lane: state.spec.lane }); + return { ok: false, phase: "create-job", jobName, payload: { ok: false, status: "create-failed", valuesRedacted: true }, create: compactCommand(created), valuesRedacted: true }; + } + sentinelProgressEvent("sentinel.publish.progress", { phase: "create-job", status: "succeeded", jobName, publishGitops, node: state.spec.nodeId, lane: state.spec.lane }); + const startedAt = Date.now(); + const timeoutMs = Math.max(30_000, Math.min(timeoutSeconds * 1000, 900_000)); + let polls = 0; + let lastProbe: Record = {}; + while (Date.now() - startedAt < timeoutMs) { + polls += 1; + const probeCapture = runCommand(["trans", stringAt(state.controlPlaneNode, "kubeRoute"), "sh", "--", probeK8sJobScript(namespace, jobName)], repoRoot, { timeoutMs: Math.min(timeoutSeconds, 60) * 1000 }); + const probe = parseJsonObject(probeCapture.stdout) ?? {}; + lastProbe = { ...probe, capture: compactCommand(probeCapture) }; + const payload = sentinelPayloadFromLogs(String(probe.logsTail ?? "")); + sentinelProgressEvent("sentinel.publish.progress", { + phase: "remote-job", + status: probe.succeeded === true ? "succeeded" : probe.failed === true ? "failed" : "running", + jobName, + publishGitops, + polls, + elapsedMs: Date.now() - startedAt, + pod: probe.pod ?? null, + sourceCommit: state.sourceHead.commit, + node: state.spec.nodeId, + lane: state.spec.lane, + }); + if (probe.succeeded === true) { + const ok = payload.ok === true; + return { ok, phase: "job-succeeded", jobName, payload: Object.keys(payload).length === 0 ? { ok: false, status: "result-missing", valuesRedacted: true } : payload, polls, elapsedMs: Date.now() - startedAt, probe: lastProbe, valuesRedacted: true }; + } + if (probe.failed === true) { + return { ok: false, phase: "job-failed", jobName, payload: Object.keys(payload).length === 0 ? { ok: false, status: "failed", valuesRedacted: true } : payload, polls, elapsedMs: Date.now() - startedAt, probe: lastProbe, valuesRedacted: true }; + } + if (Date.now() - startedAt > 120_000) sentinelProgressEvent("sentinel.publish.warning", { warning: "remote job exceeded 120s; investigate env-reuse/git mirror/source build path", jobName, elapsedMs: Date.now() - startedAt, node: state.spec.nodeId, lane: state.spec.lane }); + runCommand(["sleep", "5"], repoRoot, { timeoutMs: 6_000 }); + } + return { ok: false, phase: "job-timeout", jobName, payload: { ok: false, status: "timeout", valuesRedacted: true }, polls, elapsedMs: Date.now() - startedAt, probe: lastProbe, valuesRedacted: true }; +} + +function sentinelPublishJobManifest(state: SentinelCicdState, jobName: string, publishGitops: boolean): Record { + const namespace = stringAt(state.cicd, "builder.namespace"); + const labels = { + "app.kubernetes.io/name": "web-probe-sentinel-publish", + "app.kubernetes.io/part-of": "hwlab-web-probe-sentinel", + "unidesk.ai/spec-ref": "PJ2026-01060508", + "unidesk.ai/node": state.spec.nodeId, + "unidesk.ai/lane": state.spec.lane, + }; + return { + apiVersion: "batch/v1", + kind: "Job", + metadata: { name: jobName, namespace, labels }, + spec: { + backoffLimit: 0, + activeDeadlineSeconds: numberAt(state.cicd, "builder.activeDeadlineSeconds"), + ttlSecondsAfterFinished: numberAt(state.cicd, "builder.ttlSecondsAfterFinished"), + template: { + metadata: { labels }, + spec: { + restartPolicy: "Never", + volumes: [ + { name: "cache", hostPath: { path: stringAt(state.controlPlaneTarget, "gitMirror.cacheHostPath"), type: "DirectoryOrCreate" } }, + { name: "git-ssh", secret: { secretName: stringAt(state.cicd, "builder.gitSshSecretName"), defaultMode: 256 } }, + { name: "docker-sock", hostPath: { path: stringAt(state.cicd, "builder.dockerSocketPath"), type: "Socket" } }, + ], + containers: [{ + name: "publish", + image: state.image.baseImage, + imagePullPolicy: "IfNotPresent", + command: ["/bin/sh", "-ec", sentinelPublishShell(state, jobName, publishGitops)], + volumeMounts: [ + { name: "cache", mountPath: "/cache" }, + { name: "git-ssh", mountPath: "/git-ssh", readOnly: true }, + { name: "docker-sock", mountPath: stringAt(state.cicd, "builder.dockerSocketPath") }, + ], + }], + }, + }, + }, + }; +} + +function sentinelPublishShell(state: SentinelCicdState, jobName: string, publishGitops: boolean): string { + const gitopsFiles = publishGitops ? sentinelGitopsFiles(state) : []; + const filesB64 = Buffer.from(JSON.stringify(gitopsFiles.map((file) => ({ + path: file.path, + contentBase64: Buffer.from(file.content, "utf8").toString("base64"), + }))), "utf8").toString("base64"); + const checkoutPathsB64 = Buffer.from(JSON.stringify(arrayAt(state.cicd, "source.checkoutPaths").map((item) => { + if (typeof item !== "string" || item.length === 0 || item.startsWith("/") || item.includes("..")) throw new Error("source.checkoutPaths must contain safe relative paths"); + return item; + })), "utf8").toString("base64"); + const dockerfileB64 = Buffer.from(state.image.dockerfilePreview, "utf8").toString("base64"); + return [ + "set -eu", + `job_name=${shellQuote(jobName)}`, + `source_repository=${shellQuote(stringAt(state.cicd, "source.repository"))}`, + `source_branch=${shellQuote(stringAt(state.cicd, "source.branch"))}`, + `source_git_url=${shellQuote(stringAt(state.cicd, "source.gitSshUrl"))}`, + `source_commit=${shellQuote(state.sourceHead.commit ?? "")}`, + `checkout_paths_b64=${shellQuote(checkoutPathsB64)}`, + `image_ref=${shellQuote(state.image.ref)}`, + `image_repository=${shellQuote(state.image.repository)}`, + `dockerfile_b64=${shellQuote(dockerfileB64)}`, + `gitops_repository=${shellQuote(stringAt(state.controlPlaneTarget, "source.repository"))}`, + `gitops_branch=${shellQuote(stringAt(state.cicd, "argo.targetRevision"))}`, + `files_b64=${shellQuote(filesB64)}`, + "started_ms=$(node -e 'console.log(Date.now())')", + "emit_failed() { code=$?; if [ \"$code\" -ne 0 ]; then node - \"$code\" \"$job_name\" <<'NODE'\nconst [code, jobName] = process.argv.slice(2); console.log(JSON.stringify({ ok:false, status:'failed', exitCode:Number(code), jobName, valuesRedacted:true }));\nNODE\nfi; exit \"$code\"; }", + "trap emit_failed EXIT", + "mkdir -p /root/.ssh", + "cp /git-ssh/ssh-privatekey /root/.ssh/id_rsa", + "chmod 0400 /root/.ssh/id_rsa", + "export GIT_SSH_COMMAND='ssh -i /root/.ssh/id_rsa -o IdentitiesOnly=yes -o BatchMode=yes -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/root/.ssh/known_hosts -o ConnectTimeout=15 -o ServerAliveInterval=5 -o ServerAliveCountMax=1'", + "worktree=\"/tmp/$job_name/source\"", + "rm -rf \"/tmp/$job_name\"", + "mkdir -p \"/tmp/$job_name\"", + "git init \"$worktree\"", + "cd \"$worktree\"", + "git remote add origin \"$source_git_url\"", + "git config core.sparseCheckout true", + "git config remote.origin.promisor true", + "git config remote.origin.partialclonefilter blob:none", + "CHECKOUT_PATHS_B64=\"$checkout_paths_b64\" node <<'NODE'", + "const fs = require('node:fs');", + "const paths = JSON.parse(Buffer.from(process.env.CHECKOUT_PATHS_B64 || '', 'base64').toString('utf8'));", + "fs.mkdirSync('.git/info', { recursive: true });", + "fs.writeFileSync('.git/info/sparse-checkout', paths.map((item) => item.endsWith('/') ? item : item + (item.includes('.') ? '' : '/')).join('\\n') + '\\n');", + "NODE", + "git fetch --depth=1 --filter=blob:none origin \"+refs/heads/$source_branch:refs/remotes/origin/$source_branch\"", + "git checkout --detach \"$source_commit\"", + "mirror_commit=$(git rev-parse HEAD)", + "test \"$mirror_commit\" = \"$source_commit\"", + "DOCKERFILE_B64=\"$dockerfile_b64\" node <<'NODE'", + "const fs = require('node:fs');", + "fs.writeFileSync('Dockerfile.web-probe-sentinel', Buffer.from(process.env.DOCKERFILE_B64 || '', 'base64'));", + "NODE", + "docker build -f Dockerfile.web-probe-sentinel -t \"$image_ref\" .", + "docker push \"$image_ref\" > /tmp/web-probe-sentinel-docker-push.log 2>&1", + "cat /tmp/web-probe-sentinel-docker-push.log", + "tag=${image_ref##*:}", + "repo_no_tag=${image_ref%:*}", + "registry_path=${repo_no_tag#127.0.0.1:5000/}", + "digest=$(awk '/digest: sha256:/ {print $3; exit}' /tmp/web-probe-sentinel-docker-push.log)", + "if [ -z \"$digest\" ]; then digest=$(curl -fsSI --max-time 10 \"http://127.0.0.1:5000/v2/$registry_path/manifests/$tag\" 2>/dev/null | awk 'BEGIN{IGNORECASE=1} /^docker-content-digest:/ {gsub(/\\r/,\"\",$2); print $2; exit}'); fi", + "test -n \"$digest\"", + "digest_ref=\"$repo_no_tag@$digest\"", + "gitops_commit=''", + "changed=false", + "file_count=0", + "if [ \"$files_b64\" != \"W10=\" ]; then", + " gitops_cache=\"/cache/${gitops_repository}.git\"", + " gitops_worktree=\"/tmp/$job_name/gitops\"", + " git clone --no-checkout \"$gitops_cache\" \"$gitops_worktree\"", + " cd \"$gitops_worktree\"", + " git fetch origin \"$gitops_branch\" || true", + " if git rev-parse --verify \"refs/remotes/origin/$gitops_branch^{commit}\" >/dev/null 2>&1; then git checkout -B \"$gitops_branch\" \"refs/remotes/origin/$gitops_branch\"; else git checkout --orphan \"$gitops_branch\"; git rm -rf . >/dev/null 2>&1 || true; fi", + " FILES_B64=\"$files_b64\" IMAGE_REF=\"$image_ref\" DIGEST_REF=\"$digest_ref\" node <<'NODE'", + "const fs = require('node:fs');", + "const path = require('node:path');", + "const files = JSON.parse(Buffer.from(process.env.FILES_B64 || '', 'base64').toString('utf8'));", + "for (const file of files) {", + " const target = path.resolve(process.cwd(), file.path);", + " if (!target.startsWith(process.cwd() + path.sep)) throw new Error(`refuse path outside workspace: ${file.path}`);", + " fs.mkdirSync(path.dirname(target), { recursive: true });", + " const text = Buffer.from(file.contentBase64, 'base64').toString('utf8').split(process.env.IMAGE_REF).join(process.env.DIGEST_REF);", + " fs.writeFileSync(target, text);", + "}", + "console.error(JSON.stringify({event:'web-probe-sentinel-gitops-files', fileCount: files.length, valuesRedacted:true}));", + "NODE", + " git add .", + " file_count=$(git diff --cached --name-only | wc -l | tr -d ' ')", + " if git diff --quiet --cached; then changed=false; else changed=true; git -c user.email=web-probe-sentinel@unidesk.local -c user.name='UniDesk Web Probe Sentinel' commit -m \"deploy: render web-probe sentinel ${source_commit}\"; fi", + " git push origin \"HEAD:refs/heads/$gitops_branch\"", + " gitops_commit=$(git rev-parse HEAD)", + "fi", + "finished_ms=$(node -e 'console.log(Date.now())')", + "node - \"$job_name\" \"$source_commit\" \"$mirror_commit\" \"$image_ref\" \"$digest_ref\" \"$gitops_commit\" \"$changed\" \"$file_count\" \"$started_ms\" \"$finished_ms\" <<'NODE'", + "const [jobName, sourceCommit, mirrorCommit, imageRef, digestRef, gitopsCommit, changed, fileCount, startedMs, finishedMs] = process.argv.slice(2);", + "console.log(JSON.stringify({ ok:true, status:'succeeded', jobName, sourceCommit, mirrorCommit, imageRef, digestRef, gitopsCommit: gitopsCommit || null, changed: changed === 'true', fileCount: Number(fileCount || 0), elapsedMs: Number(finishedMs) - Number(startedMs), valuesRedacted:true }));", + "NODE", + "trap - EXIT", + ].join("\n"); +} + +function sentinelGitopsFiles(state: SentinelCicdState): readonly { path: string; content: string }[] { + const runtimeManifests = state.manifests.filter((item) => item.kind !== "Application"); + return [{ + path: `${stringAt(state.cicd, "gitopsPath")}/web-probe-sentinel.yaml`, + content: `${runtimeManifests.map((item) => Bun.YAML.stringify(item).trim()).join("\n---\n")}\n`, + }]; +} + +function applySentinelArgoApplication(state: SentinelCicdState, timeoutSeconds: number): Record { + const app = state.manifests.find((item) => item.kind === "Application"); + if (app === undefined) return { ok: false, reason: "application-manifest-missing", valuesRedacted: true }; + const yaml = `${Bun.YAML.stringify(app).trim()}\n`; + const script = [ + "set -eu", + "tmp=$(mktemp)", + `cat >"$tmp" <<'YAML'\n${yaml}YAML`, + "kubectl apply -f \"$tmp\"", + ].join("\n"); + const result = runCommand(["trans", stringAt(state.controlPlaneNode, "kubeRoute"), "sh", "--", script], repoRoot, { timeoutMs: Math.min(timeoutSeconds, 60) * 1000 }); + return { ok: result.exitCode === 0, result: compactCommand(result), valuesRedacted: true }; +} + +function createK8sJobScript(namespace: string, manifest: Record): string { + const yaml = `${Bun.YAML.stringify(manifest).trim()}\n`; + return [ + "set -eu", + `kubectl -n ${shellQuote(namespace)} delete job ${shellQuote(stringAt(manifest, "metadata.name"))} --ignore-not-found=true >/dev/null 2>&1 || true`, + "tmp=$(mktemp)", + `cat >"$tmp" <<'YAML'\n${yaml}YAML`, + "kubectl apply -f \"$tmp\"", + ].join("\n"); +} + +function probeK8sJobScript(namespace: string, jobName: string): string { + return [ + "set +e", + `namespace=${shellQuote(namespace)}`, + `job=${shellQuote(jobName)}`, + "succeeded=$(kubectl -n \"$namespace\" get job \"$job\" -o jsonpath='{.status.succeeded}' 2>/dev/null)", + "failed=$(kubectl -n \"$namespace\" get job \"$job\" -o jsonpath='{.status.failed}' 2>/dev/null)", + "pod=$(kubectl -n \"$namespace\" get pod -l job-name=\"$job\" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)", + "logs_tail=''", + "if [ -n \"$pod\" ]; then logs_tail=$(kubectl -n \"$namespace\" logs \"$pod\" --tail=120 2>/dev/null | tail -c 12000 | base64 | tr -d '\\n'); fi", + "node - \"$succeeded\" \"$failed\" \"$pod\" \"$logs_tail\" <<'NODE'", + "const [succeeded, failed, pod, logsB64] = process.argv.slice(2);", + "console.log(JSON.stringify({ succeeded: Number(succeeded || 0) > 0, failed: Number(failed || 0) > 0, pod: pod || null, logsTail: Buffer.from(logsB64 || '', 'base64').toString('utf8'), valuesRedacted: true }));", + "NODE", + ].join("\n"); +} + +function sentinelPayloadFromLogs(logsTail: string): Record { + const lines = logsTail.split(/\r?\n/u).map((line) => line.trim()).filter(Boolean); + for (let index = lines.length - 1; index >= 0; index -= 1) { + const line = lines[index]; + if (!line.startsWith("{") || !line.endsWith("}")) continue; + const parsed = parseJsonObject(line); + if (parsed !== null && (parsed.ok === true || parsed.ok === false)) return parsed; + } + return {}; +} + +function sentinelElapsedWarnings(value: unknown): string[] { + const elapsedMs = typeof value === "number" && Number.isFinite(value) ? value : null; + if (elapsedMs === null || elapsedMs <= 120_000) return []; + return [`sentinel confirmed operation exceeded 120s (${Math.round(elapsedMs / 1000)}s); investigate env-reuse/git mirror/source build path before treating this as normal.`]; +} + +function sentinelProgressEvent(event: string, payload: Record): void { + console.error(JSON.stringify({ event, at: new Date().toISOString(), ...payload, valuesRedacted: true })); +} + function confirmBlocked(action: string, state: SentinelCicdState): Record { return { code: "sentinel-cicd-confirm-requires-remote-publish-job", @@ -449,8 +985,10 @@ function renderImageResult(result: Record): string { const source = record(result.source); const image = record(result.image); const registry = record(result.registry); + const publish = record(result.publish); const blocker = record(result.blocker); const next = record(result.next); + const warnings = Array.isArray(result.warnings) ? result.warnings : []; return [ String(result.command), "", @@ -462,11 +1000,17 @@ function renderImageResult(result: Record): string { "", Object.keys(registry).length === 0 ? "REGISTRY\n-" : table(["PROBED", "PRESENT", "DIGEST"], [[record(registry.probe).url ?? "-", record(registry.probe).present ?? "-", short(record(registry.probe).digest)]]), "", + Object.keys(publish).length === 0 ? "PUBLISH\n-" : table(["OK", "PHASE", "JOB", "DIGEST", "GITOPS"], [[publish.ok, publish.phase, publish.jobName, short(record(publish.payload).digestRef), short(record(publish.payload).gitopsCommit)]]), + "", + warnings.length === 0 ? "WARNINGS\n-" : ["WARNINGS", ...warnings.map((item) => `- ${text(item)}`)].join("\n"), + "", Object.keys(blocker).length === 0 ? "BLOCKER\n-" : table(["CODE", "REASON"], [[blocker.code, blocker.reason]]), "", "NEXT", ` status: ${next.status ?? "-"}`, ` dry-run: ${next.dryRun ?? "-"}`, + ` confirm: ${next.confirm ?? "-"}`, + ` trigger: ${next.controlPlaneTrigger ?? "-"}`, ` control-plane: ${next.controlPlanePlan ?? "-"}`, "", "DISCLOSURE", @@ -481,8 +1025,12 @@ function renderControlPlaneResult(result: Record): string { const argo = record(result.argo); const validation = record(result.validation); const observed = record(result.observed); + const publish = record(result.publish); + const flush = record(result.flush); + const argoApply = record(result.argoApply); const blocker = record(result.blocker); const next = record(result.next); + const warnings = Array.isArray(result.warnings) ? result.warnings : []; return [ String(result.command), "", @@ -496,6 +1044,14 @@ function renderControlPlaneResult(result: Record): string { "", renderObservedStatus(observed), "", + Object.keys(publish).length === 0 ? "PUBLISH\n-" : table(["OK", "PHASE", "JOB", "DIGEST", "GITOPS"], [[publish.ok, publish.phase, publish.jobName, short(record(publish.payload).digestRef), short(record(publish.payload).gitopsCommit)]]), + "", + Object.keys(flush).length === 0 ? "FLUSH\n-" : table(["OK", "EXIT", "TIMED_OUT", "PREVIEW"], [[flush.ok, record(flush.result).exitCode, record(flush.result).timedOut, record(flush.result).stdoutPreview]]), + "", + Object.keys(argoApply).length === 0 ? "ARGO_APPLY\n-" : table(["OK", "EXIT", "PREVIEW"], [[argoApply.ok, record(argoApply.result).exitCode, record(argoApply.result).stdoutPreview]]), + "", + warnings.length === 0 ? "WARNINGS\n-" : ["WARNINGS", ...warnings.map((item) => `- ${text(item)}`)].join("\n"), + "", Object.keys(blocker).length === 0 ? "BLOCKER\n-" : table(["CODE", "REASON"], [[blocker.code, blocker.reason]]), "", "NEXT", @@ -512,18 +1068,52 @@ function renderControlPlaneResult(result: Record): string { function renderObservedStatus(observed: Record): string { const rows = [ + observedStatusRow("source", observed.sourceMirror), + observedStatusRow("registry", observed.registry), observedStatusRow("git-mirror", observed.gitMirror), - observedStatusRow("control-plane", observed.nodeControlPlane), + observedStatusRow("argo", observed.argo), + observedStatusRow("runtime", observed.runtime), ].filter((row) => row !== null); if (rows.length === 0) return "OBSERVED\n-"; - return table(["CHECK", "OK", "EXIT", "TIMED_OUT", "STDOUT_BYTES", "PREVIEW"], rows); + return table(["CHECK", "OK", "DETAIL", "EXIT", "TIMED_OUT", "PREVIEW"], rows); } function observedStatusRow(name: string, value: unknown): unknown[] | null { const item = record(value); if (Object.keys(item).length === 0) return null; const result = record(item.result); - return [name, item.ok, result.exitCode, result.timedOut, result.stdoutBytes, result.stdoutPreview]; + return [name, item.ok, observedDetail(name, item), result.exitCode, result.timedOut, result.stdoutPreview]; +} + +function observedDetail(name: string, item: Record): string { + if (name === "source") return `${record(item.probe).mode ?? "mirror"} ${short(record(item.probe).commit)}/${short(record(item.probe).expectedCommit)}`; + if (name === "registry") return `${record(item.probe).present === true ? "present" : "missing"} ${short(record(item.probe).digest)}`; + if (name === "argo") return `${item.syncStatus ?? "-"} ${item.healthStatus ?? "-"} ${short(item.revision)}`; + if (name === "runtime") { + const probe = record(item.probe); + const deployment = record(probe.deployment); + return `ready=${deployment.readyReplicas ?? "-"} image=${short(deployment.image)}`; + } + return "-"; +} + +function renderAsyncJobResult(result: Record): string { + const job = record(result.job); + const next = record(result.next); + return [ + String(result.command), + "", + table(["NODE", "LANE", "MODE", "MUTATION", "JOB"], [[result.node, result.lane, result.mode, result.mutation, job.id]]), + "", + table(["STATUS", "NAME", "CREATED"], [[job.status, job.name, job.createdAt]]), + "", + "NEXT", + ` status: ${next.status ?? "-"}`, + ` wait: ${next.wait ?? "-"}`, + "", + "DISCLOSURE", + " confirmed operation is delegated to UniDesk job status to keep interactive calls bounded.", + ].join("\n"); } function rendered(ok: boolean, command: string, text: string): RenderedCliResult { @@ -593,6 +1183,12 @@ function numberAt(value: unknown, path: string): number { return found; } +function arrayAt(value: unknown, path: string): unknown[] { + const found = valueAtPath(value, path); + if (!Array.isArray(found)) throw new Error(`${path} must be an array`); + return found; +} + function recordTarget(value: unknown, label: string): Record { if (!isRecord(value)) throw new Error(`${label} must resolve to an object`); return value;