fix: expose hwlab argo closeout diagnostics

This commit is contained in:
Codex
2026-07-04 10:26:32 +00:00
parent 419ed4519b
commit 519abbdce1
2 changed files with 230 additions and 6 deletions
+152 -5
View File
@@ -226,6 +226,9 @@ export function nodeRuntimeControlPlaneStatus(scoped: ReturnType<typeof parseNod
? codeAgentRuntimeDegradedReason ?? "code-agent-runtime-not-ready"
: "runtime-not-ready";
const argoReady = argo.exitCode === 0 && repoURL === spec.argoRepoUrl && targetRevision === spec.gitopsBranch && path === spec.runtimePath && syncStatus === "Synced" && health === "Healthy";
const argoDiagnostics = argo.exitCode === 0 && !argoReady
? nodeRuntimeArgoDiagnostics(spec, probeTimeoutSeconds)
: null;
const pipelineRunReady = pipelineRunProbe !== null && pipelineRunProbe.status === "True";
const pipelineRunDegradedReason = typeof pipelineRunDiagnostics?.degradedReason === "string"
? pipelineRunDiagnostics.degradedReason
@@ -296,6 +299,7 @@ export function nodeRuntimeControlPlaneStatus(scoped: ReturnType<typeof parseNod
targetGitopsRevision,
syncStatus,
health,
diagnostics: argoDiagnostics,
result: compactRuntimeCommand(argo),
},
pipelineRun: pipelineRunProbe,
@@ -649,6 +653,7 @@ export function summarizeNodeRuntimeControlPlaneStatus(status: Record<string, un
revisionObserved: typeof argo.targetGitopsRevision === "string" && argo.syncRevision === argo.targetGitopsRevision,
syncStatus: argo.syncStatus ?? null,
health: argo.health ?? null,
diagnostics: compactArgoDiagnostics(record(argo.diagnostics)),
},
runtime: {
namespace: runtime.namespace ?? null,
@@ -716,6 +721,120 @@ export function summarizeNodeRuntimeControlPlaneStatus(status: Record<string, un
};
}
function nodeRuntimeArgoDiagnostics(spec: HwlabRuntimeLaneSpec, timeoutSeconds: number): Record<string, unknown> {
const resourceTemplate = `{{range .status.resources}}{{.kind}}{{"\\t"}}{{.namespace}}{{"\\t"}}{{.name}}{{"\\t"}}{{.status}}{{"\\t"}}{{with .health}}{{.status}}{{"\\t"}}{{printf "%.500s" .message}}{{else}}{{"\\t"}}{{end}}{{"\\n"}}{{end}}`;
const operationTemplate = `{{with .status.operationState}}{{.phase}}{{"\\t"}}{{printf "%.500s" .message}}{{"\\t"}}{{.startedAt}}{{"\\t"}}{{.finishedAt}}{{"\\t"}}{{with .syncResult}}{{.revision}}{{"\\t"}}{{with .source}}{{.repoURL}}{{end}}{{end}}{{"\\n"}}{{with .syncResult}}{{range .resources}}{{.group}}{{"\\t"}}{{.kind}}{{"\\t"}}{{.namespace}}{{"\\t"}}{{.name}}{{"\\t"}}{{.status}}{{"\\t"}}{{printf "%.500s" .message}}{{"\\t"}}{{.hookPhase}}{{"\\t"}}{{.syncPhase}}{{"\\n"}}{{end}}{{end}}{{end}}`;
const conditionTemplate = `{{range .status.conditions}}{{.type}}{{"\\t"}}{{printf "%.500s" .message}}{{"\\t"}}{{.lastTransitionTime}}{{"\\n"}}{{end}}`;
const eventTemplate = `{{range .items}}{{.type}}{{"\\t"}}{{.reason}}{{"\\t"}}{{printf "%.500s" .message}}{{"\\t"}}{{.count}}{{"\\t"}}{{.lastTimestamp}}{{"\\n"}}{{end}}`;
const resourceResult = runNodeK3sArgs(spec, ["kubectl", "-n", "argocd", "get", "application", spec.app, "-o", `go-template=${resourceTemplate}`], timeoutSeconds);
const operationResult = runNodeK3sArgs(spec, ["kubectl", "-n", "argocd", "get", "application", spec.app, "-o", `go-template=${operationTemplate}`], timeoutSeconds);
const conditionResult = runNodeK3sArgs(spec, ["kubectl", "-n", "argocd", "get", "application", spec.app, "-o", `go-template=${conditionTemplate}`], timeoutSeconds);
const eventsResult = runNodeK3sArgs(spec, ["kubectl", "-n", "argocd", "get", "events", "--field-selector", `involvedObject.name=${spec.app}`, "--sort-by=.lastTimestamp", "-o", `go-template=${eventTemplate}`], timeoutSeconds);
const resources = argoResourceRows(resourceResult.stdout);
const problemResources = resources.filter((item) => {
const sync = typeof item.status === "string" ? item.status : null;
const health = typeof item.healthStatus === "string" ? item.healthStatus : null;
return (sync !== null && sync !== "Synced") || (health !== null && health !== "Healthy");
}).slice(0, 12);
const operation = argoOperationRows(operationResult.stdout);
const conditions = argoConditionRows(conditionResult.stdout).slice(-8);
const events = argoEventRows(eventsResult.stdout).slice(-8);
return {
ok: resourceResult.exitCode === 0 && operationResult.exitCode === 0 && conditionResult.exitCode === 0,
application: spec.app,
resourceCount: resources.length,
problemResourceCount: problemResources.length,
problemResources,
operationState: operation.state,
operationResources: operation.resources.slice(0, 12),
conditions,
events,
result: compactRuntimeCommand(operationResult),
resourcesResult: compactRuntimeCommand(resourceResult),
conditionsResult: compactRuntimeCommand(conditionResult),
eventsResult: compactRuntimeCommand(eventsResult),
valuesPrinted: false,
};
}
function compactArgoDiagnostics(diagnostics: Record<string, unknown>): Record<string, unknown> | null {
if (Object.keys(diagnostics).length === 0) return null;
return {
ok: diagnostics.ok === true,
problemResourceCount: diagnostics.problemResourceCount ?? null,
problemResources: Array.isArray(diagnostics.problemResources) ? diagnostics.problemResources.slice(0, 8) : [],
operationState: diagnostics.operationState ?? null,
operationResources: Array.isArray(diagnostics.operationResources) ? diagnostics.operationResources.slice(0, 8) : [],
conditions: Array.isArray(diagnostics.conditions) ? diagnostics.conditions.slice(0, 6) : [],
events: Array.isArray(diagnostics.events) ? diagnostics.events.slice(0, 6) : [],
valuesPrinted: false,
};
}
function shortDiagnosticText(value: unknown): string | null {
if (typeof value !== "string" || value.length === 0) return null;
return webObserveShort(value.replace(/\s+/gu, " ").trim(), 500);
}
function argoResourceRows(text: string): Record<string, unknown>[] {
return text.split(/\r?\n/u).map((line) => {
const [kind = "", namespace = "", name = "", status = "", healthStatus = "", healthMessage = ""] = line.split("\t");
if (kind.length === 0 && name.length === 0) return null;
return {
kind: kind || null,
namespace: namespace || null,
name: name || null,
status: status || null,
healthStatus: healthStatus || null,
healthMessage: shortDiagnosticText(healthMessage),
};
}).filter((item): item is Record<string, unknown> => item !== null);
}
function argoOperationRows(text: string): { state: Record<string, unknown>; resources: Record<string, unknown>[] } {
const lines = text.split(/\r?\n/u).filter((line) => line.length > 0);
const [phase = "", message = "", startedAt = "", finishedAt = "", syncResultRevision = "", syncResultSource = ""] = (lines.shift() ?? "").split("\t");
return {
state: {
phase: phase || null,
message: shortDiagnosticText(message),
startedAt: startedAt || null,
finishedAt: finishedAt || null,
syncResultRevision: syncResultRevision || null,
syncResultSource: syncResultSource || null,
},
resources: lines.map((line) => {
const [group = "", kind = "", namespace = "", name = "", status = "", resourceMessage = "", hookPhase = "", syncPhase = ""] = line.split("\t");
return {
group: group || null,
kind: kind || null,
namespace: namespace || null,
name: name || null,
status: status || null,
message: shortDiagnosticText(resourceMessage),
hookPhase: hookPhase || null,
syncPhase: syncPhase || null,
};
}).filter((item) => item.status !== "Synced" || item.message !== null || item.hookPhase !== null),
};
}
function argoConditionRows(text: string): Record<string, unknown>[] {
return text.split(/\r?\n/u).map((line) => {
const [type = "", message = "", lastTransitionTime = ""] = line.split("\t");
if (type.length === 0) return null;
return { type, message: shortDiagnosticText(message), lastTransitionTime: lastTransitionTime || null };
}).filter((item): item is Record<string, unknown> => item !== null);
}
function argoEventRows(text: string): Record<string, unknown>[] {
return text.split(/\r?\n/u).map((line) => {
const [type = "", reason = "", message = "", count = "", lastTimestamp = ""] = line.split("\t");
if (type.length === 0 && reason.length === 0) return null;
return { type: type || null, reason: reason || null, message: shortDiagnosticText(message), count: numericField(count), lastTimestamp: lastTimestamp || null };
}).filter((item): item is Record<string, unknown> => item !== null);
}
export function nodeRuntimeStatusNextAction(status: Record<string, unknown>, scoped: ReturnType<typeof parseNodeScopedDelegatedOptions>): string {
const reason = typeof status.degradedReason === "string" ? status.degradedReason : null;
if (reason === null) return `${nodeRuntimeStatusCommand(scoped)} --full`;
@@ -1521,6 +1640,15 @@ export function nodeRuntimePipelinePostprocessScript(): string[] {
" }",
" return changed;",
"}",
"function isPrometheusOperatorResource(item) { return item && item.apiVersion && String(item.apiVersion).startsWith('monitoring.coreos.com/') && ['ServiceMonitor', 'PrometheusRule', 'PodMonitor', 'Probe'].includes(item.kind); }",
"function stripPrometheusOperatorResources(doc) {",
" if (!(overlay.observability && overlay.observability.prometheusOperator === false)) return { docs: [doc], changed: false };",
" if (doc && doc.kind === 'List' && Array.isArray(doc.items)) {",
" const items = doc.items.filter((item) => !isPrometheusOperatorResource(item));",
" return { docs: items.length > 0 ? [{ ...doc, items }] : [], changed: items.length !== doc.items.length };",
" }",
" return isPrometheusOperatorResource(doc) ? { docs: [], changed: true } : { docs: [doc], changed: false };",
"}",
"function containerHasVolumeMount(container, name) { return isObject(container) && Array.isArray(container.volumeMounts) && container.volumeMounts.some((mount) => mount && mount.name === name); }",
"function removeMetricsSidecar(podSpec) {",
" if (!isObject(podSpec)) return false;",
@@ -1675,8 +1803,14 @@ export function nodeRuntimePipelinePostprocessScript(): string[] {
" if (path.basename(file) === 'kustomization.yaml') continue;",
" const docs = readYamlDocuments(file);",
" let changed = false;",
" const nextDocs = [];",
" for (const doc of docs) {",
" for (const item of listItems(doc).filter(Boolean)) {",
" const stripped = stripPrometheusOperatorResources(doc);",
" changed = stripped.changed || changed;",
" observabilityChanged = observabilityChanged || stripped.changed;",
" for (const nextDoc of stripped.docs) {",
" nextDocs.push(nextDoc);",
" for (const item of listItems(nextDoc).filter(Boolean)) {",
" if (!isObject(item)) continue;",
" if (overlay.observability && overlay.observability.prometheusOperator === false) {",
" const metadataChanged = stripMonitoringMetadata(item.metadata);",
@@ -1702,8 +1836,12 @@ export function nodeRuntimePipelinePostprocessScript(): string[] {
" codeAgentRuntimeChanged = codeAgentRuntimeChanged || envChanged.codeAgentRuntimeChanged;",
" cloudWebRuntimeChanged = cloudWebRuntimeChanged || envChanged.cloudWebRuntimeChanged;",
" }",
" }",
" }",
" if (changed) {",
" if (nextDocs.length === 0) fs.rmSync(file, { force: true });",
" else writeYamlDocuments(file, nextDocs);",
" }",
" if (changed) writeYamlDocuments(file, docs);",
" }",
" return { observabilityChanged, startupProbeChanged, imageRewriteChanged, gitReadUrlChanged, publicEndpointChanged, dbSslModeChanged, codeAgentRuntimeChanged, cloudWebRuntimeChanged };",
"}",
@@ -1712,7 +1850,12 @@ export function nodeRuntimePipelinePostprocessScript(): string[] {
" if (!fs.existsSync(file)) return false;",
" const doc = readYaml(file) || {};",
" const resources = Array.isArray(doc.resources) ? doc.resources : [];",
" const next = resources.filter((item) => !(overlay.observability && overlay.observability.prometheusOperator === false && item === 'observability.yaml'));",
" const next = resources.filter((item) => {",
" if (!(overlay.observability && overlay.observability.prometheusOperator === false)) return true;",
" const resource = String(item);",
" if (resource === 'observability.yaml') return false;",
" return !(/\\.ya?ml$/u.test(resource) && !fs.existsSync(path.join(runtimePath, resource)));",
" });",
" let changed = false;",
" if (next.length !== resources.length) { doc.resources = next; writeYaml(file, doc); changed = true; }",
" const observabilityFile = path.join(runtimePath, 'observability.yaml');",
@@ -1877,8 +2020,8 @@ export function nodeRuntimePipelinePostprocessScript(): string[] {
" console.error(JSON.stringify({ event: 'unidesk-public-exposure-postprocess', ok: true, applied: true, changed, filePath: file, hostname: exposure.hostname, serverAddr: exposure.serverAddr, serverPort: exposure.serverPort, webProxy: exposure.webProxy.name, apiProxy: exposure.apiProxy.name, extraProxyCount: Array.isArray(exposure.extraProxies) ? exposure.extraProxies.length : 0, configSha256: tomlSha256 }));",
" return { configured: true, changed, foundConfigMap, foundDeployment };",
"}",
"const kustomizationChanged = patchKustomization();",
"const runtimeWorkloadsChanged = patchRuntimeWorkloads();",
"const kustomizationChanged = patchKustomization();",
"const externalPostgresChanged = patchExternalPostgres();",
"const healthContractChanged = patchHealthContract();",
"const publicExposureChanged = patchPublicExposure();",
@@ -1958,8 +2101,10 @@ export function nodeRuntimePipelinePostprocessScript(): string[] {
" if (!exposure || !Array.isArray(exposure.extraProxies)) return [];",
" return exposure.extraProxies.filter((proxy) => proxy && proxy.cloudWebEnvName && proxy.publicBaseUrl).map((proxy) => ({ name: String(proxy.cloudWebEnvName), value: String(proxy.publicBaseUrl) }));",
"}",
"function isPrometheusOperatorResource(item) { return item && item.apiVersion && String(item.apiVersion).startsWith('monitoring.coreos.com/') && ['ServiceMonitor', 'PrometheusRule', 'PodMonitor', 'Probe'].includes(item.kind); }",
"function workloadRef(item, file, container) { return { file, kind: item && item.kind, name: item && item.metadata && item.metadata.name, container: container && container.name }; }",
"function workloadChecks() {",
" const monitoringResources = [];",
" const metricsRefs = [];",
" const missingStartupProbes = [];",
" const publicRuntimeImages = [];",
@@ -1987,6 +2132,7 @@ export function nodeRuntimePipelinePostprocessScript(): string[] {
" if (path.basename(file) === 'kustomization.yaml') continue;",
" for (const doc of readYamlDocuments(file)) {",
" for (const item of listItems(doc).filter(Boolean)) {",
" if (isPrometheusOperatorResource(item)) monitoringResources.push(workloadRef(item, file, null));",
" const podSpec = podSpecFor(item);",
" if (!isObject(podSpec)) continue;",
" for (const container of Array.isArray(podSpec.containers) ? podSpec.containers : []) {",
@@ -2026,7 +2172,7 @@ export function nodeRuntimePipelinePostprocessScript(): string[] {
" }",
" }",
" }",
" return { metricsRefs, missingStartupProbes, publicRuntimeImages, staleGitReadUrls, wrongPublicEndpoints, wrongDbSslModes, wrongCodeAgentRuntimeEnvs, wrongCloudWebRuntimeEnvs };",
" return { monitoringResources, metricsRefs, missingStartupProbes, publicRuntimeImages, staleGitReadUrls, wrongPublicEndpoints, wrongDbSslModes, wrongCodeAgentRuntimeEnvs, wrongCloudWebRuntimeEnvs };",
"}",
"const checks = [];",
"const workloadCheck = workloadChecks();",
@@ -2035,6 +2181,7 @@ export function nodeRuntimePipelinePostprocessScript(): string[] {
" if (!fs.existsSync(kustomizationPath)) fail('kustomization-missing');",
" const resources = readYaml(kustomizationPath).resources || [];",
" if (resources.includes('observability.yaml')) fail('observability-resource-still-rendered', { file: kustomizationPath });",
" if (workloadCheck.monitoringResources.length > 0) fail('prometheus-operator-resource-still-rendered', { refs: workloadCheck.monitoringResources.slice(0, 12), count: workloadCheck.monitoringResources.length });",
" if (workloadCheck.metricsRefs.length > 0) fail('observability-sidecar-still-rendered', { refs: workloadCheck.metricsRefs.slice(0, 12), count: workloadCheck.metricsRefs.length });",
" checks.push('observability-disabled');",
"}",