diff --git a/scripts/native/hwlab/runtime-gitops-observability.mjs b/scripts/native/hwlab/runtime-gitops-observability.mjs new file mode 100644 index 00000000..fce3644a --- /dev/null +++ b/scripts/native/hwlab/runtime-gitops-observability.mjs @@ -0,0 +1,44 @@ +// Native helper injected into HWLAB runtime GitOps postprocess/verify scripts. +// It intentionally avoids imports so it can also run inside `node -` heredocs. +(function installRuntimeGitopsObservability(globalObject) { + const prometheusOperatorKinds = new Set(["ServiceMonitor", "PrometheusRule", "PodMonitor", "Probe"]); + + function isObject(value) { + return value !== null && typeof value === "object" && !Array.isArray(value); + } + + function isPrometheusOperatorResource(item) { + return isObject(item) + && typeof item.apiVersion === "string" + && item.apiVersion.startsWith("monitoring.coreos.com/") + && prometheusOperatorKinds.has(String(item.kind)); + } + + function prometheusOperatorDisabled(overlay) { + return isObject(overlay?.observability) && overlay.observability.prometheusOperator === false; + } + + function stripPrometheusOperatorResources(doc, overlay) { + if (!prometheusOperatorDisabled(overlay)) return { docs: [doc], changed: false }; + if (isObject(doc) && doc.kind === "List" && Array.isArray(doc.items)) { + const items = doc.items.filter((item) => !isPrometheusOperatorResource(item)); + return { docs: items.length > 0 ? [{ ...doc, items }] : [], changed: items.length !== doc.items.length }; + } + return isPrometheusOperatorResource(doc) ? { docs: [], changed: true } : { docs: [doc], changed: false }; + } + + function prometheusOperatorResourceRef(item, file) { + return { + file, + kind: item && item.kind, + name: item && item.metadata && item.metadata.name, + container: null, + }; + } + + globalObject.unideskRuntimeGitopsObservability = { + isPrometheusOperatorResource, + stripPrometheusOperatorResources, + prometheusOperatorResourceRef, + }; +})(globalThis); diff --git a/scripts/src/hwlab-node/git-mirror.ts b/scripts/src/hwlab-node/git-mirror.ts index c7bd53e0..18767102 100644 --- a/scripts/src/hwlab-node/git-mirror.ts +++ b/scripts/src/hwlab-node/git-mirror.ts @@ -661,6 +661,8 @@ export function withNodeRuntimeControlPlaneStatusRendered(result: Record [ + webObserveText(item.kind), + webObserveText(item.namespace), + webObserveShort(webObserveText(item.name), 44), + webObserveText(item.status), + webObserveShort(webObserveText(item.message), 120), + ]), + ), + "", + "ARGO_OUT_OF_SYNC", + webObserveTable( + ["KIND", "NAMESPACE", "NAME", "SYNC", "HEALTH"], + argoProblemResources.length === 0 + ? [["-", "-", "-", "-", "-"]] + : argoProblemResources.slice(0, 8).map((item) => [ + webObserveText(item.kind), + webObserveText(item.namespace), + webObserveShort(webObserveText(item.name), 44), + webObserveText(item.status), + webObserveText(item.healthStatus), + ]), + ), + "", + "ARGO_CONDITIONS", + webObserveTable( + ["TYPE", "MESSAGE", "TIME"], + argoConditions.length === 0 + ? [["-", "-", "-"]] + : argoConditions.slice(0, 6).map((item) => [ + webObserveText(item.type), + webObserveShort(webObserveText(item.message), 120), + webObserveText(item.lastTransitionTime), + ]), + ), + "", + "ARGO_EVENTS", + webObserveTable( + ["TYPE", "REASON", "COUNT", "TIME", "MESSAGE"], + argoEvents.length === 0 + ? [["-", "-", "-", "-", "-"]] + : argoEvents.slice(0, 6).map((item) => [ + webObserveText(item.type), + webObserveText(item.reason), + webObserveText(item.count), + webObserveText(item.lastTimestamp), + webObserveShort(webObserveText(item.message), 120), + ]), + ), + "", "RUNTIME_WORKLOADS", webObserveTable(["REF", "READY", "READY_REPLICAS", "CURRENT", "DESIRED"], workloadRows), ...(workloadReadiness.length > workloadRows.length ? [` ... ${workloadReadiness.length - workloadRows.length} more workloads omitted; use --raw for complete JSON.`] : []), @@ -823,3 +894,9 @@ export function withNodeRuntimeControlPlaneStatusFullRendered(result: Record[] { + return Array.isArray(value) + ? value.filter((item): item is Record => typeof item === "object" && item !== null && !Array.isArray(item)) + : []; +} diff --git a/scripts/src/hwlab-node/render.ts b/scripts/src/hwlab-node/render.ts index d396ead2..836ec0bb 100644 --- a/scripts/src/hwlab-node/render.ts +++ b/scripts/src/hwlab-node/render.ts @@ -40,6 +40,8 @@ import { externalPostgresBridgeStatus, externalPostgresSecretStatus, getNodeRunt import { webObserveShort, webObserveText } from "./web-probe-observe"; import { hwlabRuntimeActiveExternalPostgres } from "../hwlab-node-lanes"; +const runtimeGitopsObservabilityNativeScript = readFileSync(rootPath("scripts/native/hwlab/runtime-gitops-observability.mjs"), "utf8").trimEnd(); + export function nodeRuntimeGitMirrorJobName(mirror: NodeRuntimeGitMirrorTargetSpec, action: "sync" | "flush"): string { const prefix = action === "sync" ? mirror.syncJobPrefix : mirror.flushJobPrefix; return `${prefix}-${Date.now().toString(36)}`.slice(0, 63); @@ -226,6 +228,9 @@ export function nodeRuntimeControlPlaneStatus(scoped: ReturnType { + const resourceTemplate = `{{range .status.resources}}{{.kind}}{{"\\t"}}{{.namespace}}{{"\\t"}}{{.name}}{{"\\t"}}{{.status}}{{"\\t"}}{{with .health}}{{.status}}{{"\\t"}}{{printf "%.500s" .message}}{{else}}{{"\\t"}}{{end}}{{"\\n"}}{{end}}`; + const operationTemplate = `{{with .status.operationState}}{{.phase}}{{"\\t"}}{{printf "%.500s" .message}}{{"\\t"}}{{.startedAt}}{{"\\t"}}{{.finishedAt}}{{"\\t"}}{{with .syncResult}}{{.revision}}{{"\\t"}}{{with .source}}{{.repoURL}}{{end}}{{end}}{{"\\n"}}{{with .syncResult}}{{range .resources}}{{.group}}{{"\\t"}}{{.kind}}{{"\\t"}}{{.namespace}}{{"\\t"}}{{.name}}{{"\\t"}}{{.status}}{{"\\t"}}{{printf "%.500s" .message}}{{"\\t"}}{{.hookPhase}}{{"\\t"}}{{.syncPhase}}{{"\\n"}}{{end}}{{end}}{{end}}`; + const conditionTemplate = `{{range .status.conditions}}{{.type}}{{"\\t"}}{{printf "%.500s" .message}}{{"\\t"}}{{.lastTransitionTime}}{{"\\n"}}{{end}}`; + const eventTemplate = `{{range .items}}{{.type}}{{"\\t"}}{{.reason}}{{"\\t"}}{{printf "%.500s" .message}}{{"\\t"}}{{.count}}{{"\\t"}}{{.lastTimestamp}}{{"\\n"}}{{end}}`; + const resourceResult = runNodeK3sArgs(spec, ["kubectl", "-n", "argocd", "get", "application", spec.app, "-o", `go-template=${resourceTemplate}`], timeoutSeconds); + const operationResult = runNodeK3sArgs(spec, ["kubectl", "-n", "argocd", "get", "application", spec.app, "-o", `go-template=${operationTemplate}`], timeoutSeconds); + const conditionResult = runNodeK3sArgs(spec, ["kubectl", "-n", "argocd", "get", "application", spec.app, "-o", `go-template=${conditionTemplate}`], timeoutSeconds); + const eventsResult = runNodeK3sArgs(spec, ["kubectl", "-n", "argocd", "get", "events", "--field-selector", `involvedObject.name=${spec.app}`, "--sort-by=.lastTimestamp", "-o", `go-template=${eventTemplate}`], timeoutSeconds); + const resources = argoResourceRows(resourceResult.stdout); + const problemResources = resources.filter((item) => { + const sync = typeof item.status === "string" ? item.status : null; + const health = typeof item.healthStatus === "string" ? item.healthStatus : null; + return (sync !== null && sync !== "Synced") || (health !== null && health !== "Healthy"); + }).slice(0, 12); + const operation = argoOperationRows(operationResult.stdout); + const conditions = argoConditionRows(conditionResult.stdout).slice(-8); + const events = argoEventRows(eventsResult.stdout).slice(-8); + return { + ok: resourceResult.exitCode === 0 && operationResult.exitCode === 0 && conditionResult.exitCode === 0, + application: spec.app, + resourceCount: resources.length, + problemResourceCount: problemResources.length, + problemResources, + operationState: operation.state, + operationResources: operation.resources.slice(0, 12), + conditions, + events, + result: compactRuntimeCommand(operationResult), + resourcesResult: compactRuntimeCommand(resourceResult), + conditionsResult: compactRuntimeCommand(conditionResult), + eventsResult: compactRuntimeCommand(eventsResult), + valuesPrinted: false, + }; +} + +function compactArgoDiagnostics(diagnostics: Record): Record | null { + if (Object.keys(diagnostics).length === 0) return null; + return { + ok: diagnostics.ok === true, + problemResourceCount: diagnostics.problemResourceCount ?? null, + problemResources: Array.isArray(diagnostics.problemResources) ? diagnostics.problemResources.slice(0, 8) : [], + operationState: diagnostics.operationState ?? null, + operationResources: Array.isArray(diagnostics.operationResources) ? diagnostics.operationResources.slice(0, 8) : [], + conditions: Array.isArray(diagnostics.conditions) ? diagnostics.conditions.slice(0, 6) : [], + events: Array.isArray(diagnostics.events) ? diagnostics.events.slice(0, 6) : [], + valuesPrinted: false, + }; +} + +function shortDiagnosticText(value: unknown): string | null { + if (typeof value !== "string" || value.length === 0) return null; + return webObserveShort(value.replace(/\s+/gu, " ").trim(), 500); +} + +function argoResourceRows(text: string): Record[] { + return text.split(/\r?\n/u).map((line) => { + const [kind = "", namespace = "", name = "", status = "", healthStatus = "", healthMessage = ""] = line.split("\t"); + if (kind.length === 0 && name.length === 0) return null; + return { + kind: kind || null, + namespace: namespace || null, + name: name || null, + status: status || null, + healthStatus: healthStatus || null, + healthMessage: shortDiagnosticText(healthMessage), + }; + }).filter((item): item is Record => item !== null); +} + +function argoOperationRows(text: string): { state: Record; resources: Record[] } { + const lines = text.split(/\r?\n/u).filter((line) => line.length > 0); + const [phase = "", message = "", startedAt = "", finishedAt = "", syncResultRevision = "", syncResultSource = ""] = (lines.shift() ?? "").split("\t"); + return { + state: { + phase: phase || null, + message: shortDiagnosticText(message), + startedAt: startedAt || null, + finishedAt: finishedAt || null, + syncResultRevision: syncResultRevision || null, + syncResultSource: syncResultSource || null, + }, + resources: lines.map((line) => { + const [group = "", kind = "", namespace = "", name = "", status = "", resourceMessage = "", hookPhase = "", syncPhase = ""] = line.split("\t"); + return { + group: group || null, + kind: kind || null, + namespace: namespace || null, + name: name || null, + status: status || null, + message: shortDiagnosticText(resourceMessage), + hookPhase: hookPhase || null, + syncPhase: syncPhase || null, + }; + }).filter((item) => item.status !== "Synced" || item.message !== null || item.hookPhase !== null), + }; +} + +function argoConditionRows(text: string): Record[] { + return text.split(/\r?\n/u).map((line) => { + const [type = "", message = "", lastTransitionTime = ""] = line.split("\t"); + if (type.length === 0) return null; + return { type, message: shortDiagnosticText(message), lastTransitionTime: lastTransitionTime || null }; + }).filter((item): item is Record => item !== null); +} + +function argoEventRows(text: string): Record[] { + return text.split(/\r?\n/u).map((line) => { + const [type = "", reason = "", message = "", count = "", lastTimestamp = ""] = line.split("\t"); + if (type.length === 0 && reason.length === 0) return null; + return { type: type || null, reason: reason || null, message: shortDiagnosticText(message), count: numericField(count), lastTimestamp: lastTimestamp || null }; + }).filter((item): item is Record => item !== null); +} + export function nodeRuntimeStatusNextAction(status: Record, scoped: ReturnType): string { const reason = typeof status.degradedReason === "string" ? status.degradedReason : null; if (reason === null) return `${nodeRuntimeStatusCommand(scoped)} --full`; @@ -1289,6 +1410,7 @@ export function nodeRuntimePipelinePostprocessScript(): string[] { "const vm = require('node:vm');", "const renderDir = process.argv[2];", "const overlay = JSON.parse(Buffer.from(process.argv[3], 'base64').toString('utf8'));", + `const runtimeGitopsObservabilityNativeScript = ${JSON.stringify(runtimeGitopsObservabilityNativeScript)};`, "const pipelinePath = path.join(renderDir, overlay.tektonDir, 'pipeline.yaml');", "let text = fs.readFileSync(pipelinePath, 'utf8');", "let YAML = null;", @@ -1453,6 +1575,8 @@ export function nodeRuntimePipelinePostprocessScript(): string[] { "const crypto = require('crypto');", "const YAML = require('yaml');", "const overlay = ${runtimeOverlay};", + "${runtimeGitopsObservabilityNativeScript}", + "const observabilityNative = globalThis.unideskRuntimeGitopsObservability;", "const runtimePath = String(overlay.runtimePath || '');", "const renderDir = String(overlay.runtimeRenderDir || '');", "const legacyRuntimePath = runtimePath ? path.posix.join(path.posix.dirname(path.posix.dirname(runtimePath)), path.posix.basename(runtimePath)) : '';", @@ -1675,8 +1799,14 @@ export function nodeRuntimePipelinePostprocessScript(): string[] { " if (path.basename(file) === 'kustomization.yaml') continue;", " const docs = readYamlDocuments(file);", " let changed = false;", + " const nextDocs = [];", " for (const doc of docs) {", - " for (const item of listItems(doc).filter(Boolean)) {", + " const stripped = observabilityNative.stripPrometheusOperatorResources(doc, overlay);", + " changed = stripped.changed || changed;", + " observabilityChanged = observabilityChanged || stripped.changed;", + " for (const nextDoc of stripped.docs) {", + " nextDocs.push(nextDoc);", + " for (const item of listItems(nextDoc).filter(Boolean)) {", " if (!isObject(item)) continue;", " if (overlay.observability && overlay.observability.prometheusOperator === false) {", " const metadataChanged = stripMonitoringMetadata(item.metadata);", @@ -1702,8 +1832,12 @@ export function nodeRuntimePipelinePostprocessScript(): string[] { " codeAgentRuntimeChanged = codeAgentRuntimeChanged || envChanged.codeAgentRuntimeChanged;", " cloudWebRuntimeChanged = cloudWebRuntimeChanged || envChanged.cloudWebRuntimeChanged;", " }", + " }", + " }", + " if (changed) {", + " if (nextDocs.length === 0) fs.rmSync(file, { force: true });", + " else writeYamlDocuments(file, nextDocs);", " }", - " if (changed) writeYamlDocuments(file, docs);", " }", " return { observabilityChanged, startupProbeChanged, imageRewriteChanged, gitReadUrlChanged, publicEndpointChanged, dbSslModeChanged, codeAgentRuntimeChanged, cloudWebRuntimeChanged };", "}", @@ -1712,7 +1846,12 @@ export function nodeRuntimePipelinePostprocessScript(): string[] { " if (!fs.existsSync(file)) return false;", " const doc = readYaml(file) || {};", " const resources = Array.isArray(doc.resources) ? doc.resources : [];", - " const next = resources.filter((item) => !(overlay.observability && overlay.observability.prometheusOperator === false && item === 'observability.yaml'));", + " const next = resources.filter((item) => {", + " if (!(overlay.observability && overlay.observability.prometheusOperator === false)) return true;", + " const resource = String(item);", + " if (resource === 'observability.yaml') return false;", + " return !(/\\.ya?ml$/u.test(resource) && !fs.existsSync(path.join(runtimePath, resource)));", + " });", " let changed = false;", " if (next.length !== resources.length) { doc.resources = next; writeYaml(file, doc); changed = true; }", " const observabilityFile = path.join(runtimePath, 'observability.yaml');", @@ -1877,8 +2016,8 @@ export function nodeRuntimePipelinePostprocessScript(): string[] { " console.error(JSON.stringify({ event: 'unidesk-public-exposure-postprocess', ok: true, applied: true, changed, filePath: file, hostname: exposure.hostname, serverAddr: exposure.serverAddr, serverPort: exposure.serverPort, webProxy: exposure.webProxy.name, apiProxy: exposure.apiProxy.name, extraProxyCount: Array.isArray(exposure.extraProxies) ? exposure.extraProxies.length : 0, configSha256: tomlSha256 }));", " return { configured: true, changed, foundConfigMap, foundDeployment };", "}", - "const kustomizationChanged = patchKustomization();", "const runtimeWorkloadsChanged = patchRuntimeWorkloads();", + "const kustomizationChanged = patchKustomization();", "const externalPostgresChanged = patchExternalPostgres();", "const healthContractChanged = patchHealthContract();", "const publicExposureChanged = patchPublicExposure();", @@ -1904,6 +2043,8 @@ export function nodeRuntimePipelinePostprocessScript(): string[] { "const crypto = require('crypto');", "const YAML = require('yaml');", "const overlay = ${runtimeOverlay};", + "${runtimeGitopsObservabilityNativeScript}", + "const observabilityNative = globalThis.unideskRuntimeGitopsObservability;", "const runtimePath = String(overlay.runtimePath || '');", "function fail(reason, extra = {}) {", " console.error(JSON.stringify({ event: 'unidesk-runtime-gitops-verify', ok: false, reason, runtimePath, ...extra }));", @@ -1960,6 +2101,7 @@ export function nodeRuntimePipelinePostprocessScript(): string[] { "}", "function workloadRef(item, file, container) { return { file, kind: item && item.kind, name: item && item.metadata && item.metadata.name, container: container && container.name }; }", "function workloadChecks() {", + " const monitoringResources = [];", " const metricsRefs = [];", " const missingStartupProbes = [];", " const publicRuntimeImages = [];", @@ -1987,6 +2129,7 @@ export function nodeRuntimePipelinePostprocessScript(): string[] { " if (path.basename(file) === 'kustomization.yaml') continue;", " for (const doc of readYamlDocuments(file)) {", " for (const item of listItems(doc).filter(Boolean)) {", + " if (observabilityNative.isPrometheusOperatorResource(item)) monitoringResources.push(observabilityNative.prometheusOperatorResourceRef(item, file));", " const podSpec = podSpecFor(item);", " if (!isObject(podSpec)) continue;", " for (const container of Array.isArray(podSpec.containers) ? podSpec.containers : []) {", @@ -2026,7 +2169,7 @@ export function nodeRuntimePipelinePostprocessScript(): string[] { " }", " }", " }", - " return { metricsRefs, missingStartupProbes, publicRuntimeImages, staleGitReadUrls, wrongPublicEndpoints, wrongDbSslModes, wrongCodeAgentRuntimeEnvs, wrongCloudWebRuntimeEnvs };", + " return { monitoringResources, metricsRefs, missingStartupProbes, publicRuntimeImages, staleGitReadUrls, wrongPublicEndpoints, wrongDbSslModes, wrongCodeAgentRuntimeEnvs, wrongCloudWebRuntimeEnvs };", "}", "const checks = [];", "const workloadCheck = workloadChecks();", @@ -2035,6 +2178,7 @@ export function nodeRuntimePipelinePostprocessScript(): string[] { " if (!fs.existsSync(kustomizationPath)) fail('kustomization-missing');", " const resources = readYaml(kustomizationPath).resources || [];", " if (resources.includes('observability.yaml')) fail('observability-resource-still-rendered', { file: kustomizationPath });", + " if (workloadCheck.monitoringResources.length > 0) fail('prometheus-operator-resource-still-rendered', { refs: workloadCheck.monitoringResources.slice(0, 12), count: workloadCheck.monitoringResources.length });", " if (workloadCheck.metricsRefs.length > 0) fail('observability-sidecar-still-rendered', { refs: workloadCheck.metricsRefs.slice(0, 12), count: workloadCheck.metricsRefs.length });", " checks.push('observability-disabled');", "}",