feat: surface g14 observability resource snapshot

This commit is contained in:
Codex
2026-06-05 02:38:00 +00:00
parent 64c936bf0c
commit 73a4fb57ee
4 changed files with 31 additions and 4 deletions
+17 -1
View File
@@ -1,4 +1,4 @@
import { activeV02PipelineRuns, g14ObservabilityQueryAssertion, gitMirrorFlushJobManifest, gitMirrorStatusSummary, gitMirrorSyncJobManifest, gitMirrorV02SyncRequirement, hwlabG14Help, hwlabG14MonitorStateFileName, parseGitMirrorStatusRefs, parsePipelineTaskRunMetrics, parseV02TriggerSnapshot, rolloutRecordBody, semanticChangelogBullets, summarizeV02CdStatus, v02CloseoutVerdict, v02CommitAlignment, v02ControlPlaneRefreshScriptHash, v02ControlPlaneRenderScript, v02ExistingPipelineRunReuseDecision, v02FalseGreenGuard, v02GitMirrorPreSyncWaitMs, v02LatestOnlyTargetValidation, v02PipelineServiceIds, v02PrAutomationCommentBody, v02ReusableGitMirrorPreSyncMarker, v02ReusableRefreshMarker, v02TaskRunPerformanceSummary } from "./src/hwlab-g14";
import { activeV02PipelineRuns, g14ObservabilityQueryAssertion, gitMirrorFlushJobManifest, gitMirrorStatusSummary, gitMirrorSyncJobManifest, gitMirrorV02SyncRequirement, hwlabG14Help, hwlabG14MonitorStateFileName, parseGitMirrorStatusRefs, parseK8sCpuMillicores, parseK8sMemoryMiB, parsePipelineTaskRunMetrics, parseV02TriggerSnapshot, rolloutRecordBody, semanticChangelogBullets, summarizeV02CdStatus, v02CloseoutVerdict, v02CommitAlignment, v02ControlPlaneRefreshScriptHash, v02ControlPlaneRenderScript, v02ExistingPipelineRunReuseDecision, v02FalseGreenGuard, v02GitMirrorPreSyncWaitMs, v02LatestOnlyTargetValidation, v02PipelineServiceIds, v02PrAutomationCommentBody, v02ReusableGitMirrorPreSyncMarker, v02ReusableRefreshMarker, v02TaskRunPerformanceSummary } from "./src/hwlab-g14";
import { runCommand } from "./src/command";
function assertCondition(condition: unknown, message: string, detail: unknown = {}): void {
@@ -114,6 +114,20 @@ assertCondition(
"observability CLI must fail visibly on unsupported options instead of silently ignoring friction-prone flags",
unsupportedObservabilityJson,
);
assertCondition(
parseK8sCpuMillicores("46095136n") !== null
&& Math.abs((parseK8sCpuMillicores("46095136n") ?? 0) - 46.095136) < 0.000001
&& parseK8sCpuMillicores("47m") === 47
&& parseK8sCpuMillicores("1") === 1000,
"observability resource snapshot must convert metrics.k8s.io CPU quantities to millicores",
);
assertCondition(
parseK8sMemoryMiB("99860Ki") !== null
&& Math.abs((parseK8sMemoryMiB("99860Ki") ?? 0) - 97.51953125) < 0.000001
&& parseK8sMemoryMiB("97Mi") === 97
&& parseK8sMemoryMiB("1048576") === 1,
"observability resource snapshot must convert metrics.k8s.io memory quantities to MiB",
);
const v02CommentBody = v02PrAutomationCommentBody({
pr: {
@@ -741,6 +755,8 @@ console.log(JSON.stringify({
"observability help exposes assertion, target, boundary, and closeout entrypoints",
"observability query assertions report count and terminal value pass/fail",
"observability CLI rejects unsupported options with visible JSON errors",
"observability resource snapshot converts metrics.k8s.io CPU quantities to millicores",
"observability resource snapshot converts metrics.k8s.io memory quantities to MiB",
"git mirror sync is a manual devops-infra Job, not a CronJob",
"git mirror flush is a manual devops-infra Job, not a CronJob",
"trigger-current can decide whether v0.2 git mirror pre-sync is required",
+9
View File
@@ -5681,6 +5681,7 @@ function closeoutAdvice(summary: Record<string, unknown>): string[] {
if (summary.scrapeReachable !== "pass") advice.push("scrapeReachable failed -> check ServiceMonitor labels, metrics sidecar port name, and Prometheus target discovery");
if (summary.sidecarServing !== "pass") advice.push("sidecarServing failed -> check hwlab-metrics sidecar readiness, restartCount, and metrics script/container logs");
if (summary.businessHealthProbe !== "pass") advice.push("businessHealthProbe failed -> up=1 but health_probe=0 usually means sidecar can be scraped but cannot reach the business health endpoint");
if (summary.resourceSnapshot !== "pass") advice.push("resourceSnapshot failed -> check metrics.k8s.io APIService and metrics-server availability on G14 k3s");
if (summary.namespaceControlPlaneBoundary !== "pass") advice.push("namespaceControlPlaneBoundary failed -> remove Prometheus/Alertmanager from workload namespace; shared control plane belongs in devops-infra");
if (summary.publicMetricsExposure !== "pass") advice.push("publicMetricsExposure failed -> public /metrics returned Prometheus text; remove FRP/edge exposure or add an authenticated internal-only route");
return advice;
@@ -5697,6 +5698,7 @@ function runG14ObservabilityCloseout(options: G14ObservabilityOptions): Record<s
const platformReady = record(status.crds).ok === true && record(status.operator).ok === true && record(status.prometheus).ok === true && record(status.query).ok === true;
const namespaceBoundaryOk = record(record(boundary.namespaceBoundary)).ok === true && record(record(boundary.infraControlPlane)).ok === true;
const publicDenied = record(record(boundary.publicMetricsExposure)).ok === true;
const resourceOk = record(record(targets.resourceSnapshot)).ok === true;
const summary = {
platformReady: passFail(platformReady),
workloadMonitorCount: numericValue(record(status.workloadMonitors).count) ?? numericValue(record(targets.monitors).count) ?? 0,
@@ -5705,6 +5707,7 @@ function runG14ObservabilityCloseout(options: G14ObservabilityOptions): Record<s
sidecarServing: passFail(queryOk("sidecarServing") && sidecarsOk),
businessHealthProbe: passFail(queryOk("businessHealthProbe")),
sidecarReady: passFail(sidecarsOk),
resourceSnapshot: passFail(resourceOk),
namespaceControlPlaneBoundary: passFail(namespaceBoundaryOk),
publicMetricsExposure: passFail(publicDenied),
publicMetricsExposureState: publicDenied ? "denied" : "exposed-or-unknown",
@@ -5739,12 +5742,18 @@ function runG14ObservabilityCloseout(options: G14ObservabilityOptions): Record<s
readySidecarCount: record(targets.sidecars).readyCount ?? null,
healthProbeDuration: record(record(targets.levelSummary).healthProbeDuration),
scrapeDuration: record(record(targets.levelSummary).scrapeDuration),
resourceSnapshot: record(targets.resourceSnapshot),
resourceUsage: record(record(targets.levelSummary).resourceUsage),
services: arrayRecords(record(targets.levelSummary).services).map((service) => ({
serviceId: service.serviceId ?? null,
scrapeReachable: service.scrapeReachable ?? null,
sidecarServing: service.sidecarServing ?? null,
businessHealthProbe: service.businessHealthProbe ?? null,
statusCode: service.statusCode ?? null,
totalCpuMillicores: service.totalCpuMillicores ?? null,
totalMemoryMiB: service.totalMemoryMiB ?? null,
businessCpuMillicores: service.businessCpuMillicores ?? null,
businessMemoryMiB: service.businessMemoryMiB ?? null,
})),
},
boundary: {