feat: add G14 observability control CLI

This commit is contained in:
Codex
2026-06-05 00:20:06 +00:00
parent 384fd613ff
commit 9b85ee7345
2 changed files with 524 additions and 5 deletions
+6 -3
View File
@@ -37,13 +37,16 @@ The shared Prometheus stack may discover application monitors across namespaces
Monitoring infrastructure must be declared as Git-backed desired state and applied through a controlled UniDesk or G14 GitOps path. A temporary `kubectl apply` may be used only as a `$dad-dev` P2 experiment; it must be followed by a durable source change and GitOps/CLI validation.
Recommended durable shape:
Current durable control surface:
- A dedicated G14 infrastructure desired-state path for `devops-infra` observability resources.
- A dedicated Argo CD Application or an equivalent UniDesk-controlled apply surface for that path.
- `bun scripts/cli.ts hwlab g14 observability status` reads the G14 monitoring state through the controlled `G14:k3s` route and reports CRDs, Prometheus Operator readiness, Prometheus readiness, selected workload monitors and a bounded `up` query.
- `bun scripts/cli.ts hwlab g14 observability apply --dry-run|--confirm` is the standard write path for the shared stack. It installs Prometheus Operator `v0.91.0`, Prometheus `v3.12.0`, Prometheus RBAC, the `devops-infra` Prometheus instance and the internal query Service.
- `bun scripts/cli.ts hwlab g14 observability query --promql <expr>` is the controlled query path. It uses Kubernetes service proxy to the internal ClusterIP Service and must not expose Prometheus through FRP, NodePort or LoadBalancer.
- Cluster-scoped CRDs and ClusterRole/ClusterRoleBinding resources owned by the infrastructure path, not by a HWLAB lane Application whose destination is only `hwlab-v02`.
- Runtime workloads in `devops-infra` labeled with `app.kubernetes.io/part-of=devops-infra` and component labels such as `observability`, `prometheus`, `operator` or `query`.
Future GitOps work may move the same desired state behind a dedicated G14 infrastructure Argo CD Application. Until that exists, the UniDesk CLI source is the stable audited desired-state entry, and direct native `kubectl` remains only an implementation detail inside that CLI.
Do not attach Prometheus Operator CRDs, Prometheus Deployments, Grafana or Alertmanager to `hwlab-g14-v02`. That Argo Application is scoped to the HWLAB v0.2 runtime namespace and must remain a lane-specific application rollout controller.
## Security
+518 -2
View File
@@ -38,6 +38,14 @@ const GIT_MIRROR_NAMESPACE = "devops-infra";
const GIT_MIRROR_MANIFEST_FIELD_MANAGER = "unidesk-hwlab-git-mirror";
const GIT_MIRROR_SYNC_JOB_PREFIX = "git-mirror-hwlab-sync-manual";
const GIT_MIRROR_LEGACY_CRONJOB = "git-mirror-hwlab-sync";
const G14_OBSERVABILITY_NAMESPACE = "devops-infra";
const G14_OBSERVABILITY_FIELD_MANAGER = "unidesk-g14-observability";
const G14_PROMETHEUS_OPERATOR_VERSION = "v0.91.0";
const G14_PROMETHEUS_VERSION = "v3.12.0";
const G14_PROMETHEUS_NAME = "g14-shared";
const G14_PROMETHEUS_SERVICE = "prometheus-g14-shared";
const G14_PROMETHEUS_SERVICE_ACCOUNT = "g14-observability-prometheus";
const G14_PROMETHEUS_OPERATOR_RELEASE_ASSET = `https://github.com/prometheus-operator/prometheus-operator/releases/download/${G14_PROMETHEUS_OPERATOR_VERSION}/bundle.yaml`;
const V02_SERVICE_IDS = [
"hwlab-cloud-api",
"hwlab-cloud-web",
@@ -131,6 +139,15 @@ interface G14GitMirrorOptions {
timeoutSeconds: number;
}
interface G14ObservabilityOptions {
action: "status" | "apply" | "query";
dryRun: boolean;
confirm: boolean;
wait: boolean;
timeoutSeconds: number;
query: string;
}
interface G14SecretOptions {
action: "status" | "ensure";
lane: "v02";
@@ -370,6 +387,27 @@ function parseGitMirrorOptions(args: string[]): G14GitMirrorOptions {
};
}
function parseObservabilityOptions(args: string[]): G14ObservabilityOptions {
const [actionRaw] = args;
if (actionRaw !== "status" && actionRaw !== "apply" && actionRaw !== "query") {
throw new Error("observability usage: status|apply|query [--promql <expr>] [--dry-run|--confirm]");
}
const confirm = args.includes("--confirm");
const explicitDryRun = args.includes("--dry-run");
if (confirm && explicitDryRun) throw new Error("observability accepts only one of --confirm or --dry-run");
const query = optionValue(args, "--promql") ?? optionValue(args, "--query") ?? 'up{namespace="hwlab-v02"}';
if (query.length > 500) throw new Error("--promql is limited to 500 characters");
if (query.includes("\n") || query.includes("\r")) throw new Error("--promql must be a single-line expression");
return {
action: actionRaw,
confirm,
wait: args.includes("--wait"),
dryRun: actionRaw === "status" || actionRaw === "query" ? true : explicitDryRun || !confirm,
timeoutSeconds: positiveIntegerOption(args, "--timeout-seconds", actionRaw === "apply" ? 240 : 120, 900),
query,
};
}
function parseSecretOptions(args: string[]): G14SecretOptions {
const [actionRaw] = args;
if (actionRaw !== "status" && actionRaw !== "ensure") {
@@ -4121,6 +4159,473 @@ function runG14GitMirror(options: G14GitMirrorOptions): Record<string, unknown>
return runGitMirrorSync(options);
}
function observabilityLabels(component: string): Record<string, string> {
return {
"app.kubernetes.io/part-of": "devops-infra",
"app.kubernetes.io/component": "observability",
"g14.pikastech.local/observability-component": component,
};
}
function observabilityNamespaceLabel(): Record<string, string> {
return {
"g14.pikastech.local/observability-discovery": "enabled",
};
}
function g14PrometheusManifest(): Record<string, unknown> {
const namespaceSelector = { matchLabels: observabilityNamespaceLabel() };
const monitorSelector = { matchLabels: { "hwlab.pikastech.local/monitoring": "enabled" } };
return {
apiVersion: "v1",
kind: "List",
items: [
{
apiVersion: "v1",
kind: "Namespace",
metadata: {
name: G14_OBSERVABILITY_NAMESPACE,
labels: {
...observabilityNamespaceLabel(),
...observabilityLabels("namespace"),
},
},
},
{
apiVersion: "v1",
kind: "Namespace",
metadata: {
name: V02_RUNTIME_NAMESPACE,
labels: observabilityNamespaceLabel(),
},
},
{
apiVersion: "v1",
kind: "ServiceAccount",
metadata: {
name: G14_PROMETHEUS_SERVICE_ACCOUNT,
namespace: G14_OBSERVABILITY_NAMESPACE,
labels: observabilityLabels("prometheus"),
},
},
{
apiVersion: "rbac.authorization.k8s.io/v1",
kind: "ClusterRole",
metadata: {
name: G14_PROMETHEUS_SERVICE_ACCOUNT,
labels: observabilityLabels("prometheus"),
},
rules: [
{
apiGroups: [""],
resources: ["nodes", "nodes/metrics", "services", "endpoints", "pods"],
verbs: ["get", "list", "watch"],
},
{
apiGroups: ["discovery.k8s.io"],
resources: ["endpointslices"],
verbs: ["get", "list", "watch"],
},
{
apiGroups: ["networking.k8s.io"],
resources: ["ingresses"],
verbs: ["get", "list", "watch"],
},
{
nonResourceURLs: ["/metrics"],
verbs: ["get"],
},
],
},
{
apiVersion: "rbac.authorization.k8s.io/v1",
kind: "ClusterRoleBinding",
metadata: {
name: G14_PROMETHEUS_SERVICE_ACCOUNT,
labels: observabilityLabels("prometheus"),
},
roleRef: {
apiGroup: "rbac.authorization.k8s.io",
kind: "ClusterRole",
name: G14_PROMETHEUS_SERVICE_ACCOUNT,
},
subjects: [{
kind: "ServiceAccount",
name: G14_PROMETHEUS_SERVICE_ACCOUNT,
namespace: G14_OBSERVABILITY_NAMESPACE,
}],
},
{
apiVersion: "monitoring.coreos.com/v1",
kind: "Prometheus",
metadata: {
name: G14_PROMETHEUS_NAME,
namespace: G14_OBSERVABILITY_NAMESPACE,
labels: observabilityLabels("prometheus"),
},
spec: {
replicas: 1,
version: G14_PROMETHEUS_VERSION,
serviceAccountName: G14_PROMETHEUS_SERVICE_ACCOUNT,
scrapeInterval: "30s",
evaluationInterval: "30s",
retention: "7d",
resources: {
requests: { cpu: "100m", memory: "256Mi" },
limits: { cpu: "500m", memory: "1Gi" },
},
storage: {
volumeClaimTemplate: {
spec: {
storageClassName: "local-path",
accessModes: ["ReadWriteOnce"],
resources: { requests: { storage: "10Gi" } },
},
},
},
serviceMonitorSelector: monitorSelector,
serviceMonitorNamespaceSelector: namespaceSelector,
podMonitorSelector: monitorSelector,
podMonitorNamespaceSelector: namespaceSelector,
ruleSelector: monitorSelector,
ruleNamespaceSelector: namespaceSelector,
probeSelector: monitorSelector,
probeNamespaceSelector: namespaceSelector,
},
},
{
apiVersion: "v1",
kind: "Service",
metadata: {
name: G14_PROMETHEUS_SERVICE,
namespace: G14_OBSERVABILITY_NAMESPACE,
labels: observabilityLabels("query"),
},
spec: {
type: "ClusterIP",
selector: {
prometheus: G14_PROMETHEUS_NAME,
},
ports: [{
name: "web",
port: 9090,
targetPort: "web",
}],
},
},
],
};
}
function parseSectionJson(section: ShellSection | undefined): Record<string, unknown> {
const text = String(section?.stdout ?? "").trim();
if (text.length === 0) return {};
try {
return record(JSON.parse(text) as unknown);
} catch {
return {};
}
}
function parseSectionJsonArray(section: ShellSection | undefined): Record<string, unknown>[] {
const parsed = parseSectionJson(section);
const items = parsed.kind === "List" && Array.isArray(parsed.items)
? parsed.items
: Array.isArray(parsed.items)
? parsed.items
: [];
return items.map((item) => record(item));
}
function conditionStatus(items: Record<string, unknown>[], type: string): string | null {
for (const item of items) {
if (item.type === type) return typeof item.status === "string" ? item.status : null;
}
return null;
}
function deploymentReady(deployment: Record<string, unknown>): boolean {
const spec = record(deployment.spec);
const status = record(deployment.status);
const desired = numericValue(spec.replicas) ?? 1;
const ready = numericValue(status.readyReplicas) ?? 0;
const available = numericValue(status.availableReplicas) ?? 0;
return ready >= desired && available >= desired;
}
function prometheusReady(prometheus: Record<string, unknown>): boolean {
const conditions = Array.isArray(record(prometheus.status).conditions)
? record(prometheus.status).conditions.map((item) => record(item))
: [];
const available = conditionStatus(conditions, "Available");
const reconciled = conditionStatus(conditions, "Reconciled");
return available === "True" || reconciled === "True";
}
function g14ObservabilityStatus(): Record<string, unknown> {
const startedAtMs = Date.now();
const queryPath = `/api/v1/namespaces/${G14_OBSERVABILITY_NAMESPACE}/services/http:${G14_PROMETHEUS_SERVICE}:9090/proxy/api/v1/query?query=${encodeURIComponent("up")}`;
const crds = [
"servicemonitors.monitoring.coreos.com",
"podmonitors.monitoring.coreos.com",
"prometheusrules.monitoring.coreos.com",
"prometheuses.monitoring.coreos.com",
"alertmanagers.monitoring.coreos.com",
];
const script = [
"set +e",
"section() {",
" name=\"$1\"",
" shift",
" printf '__UNIDESK_SECTION_BEGIN__ %s\\n' \"$name\"",
" \"$@\"",
" code=$?",
" printf '\\n__UNIDESK_SECTION_END__ %s exit=%s\\n' \"$name\" \"$code\"",
"}",
`section namespace kubectl get namespace ${shellQuote(G14_OBSERVABILITY_NAMESPACE)} -o json`,
`section discoveryNamespace kubectl get namespace ${shellQuote(V02_RUNTIME_NAMESPACE)} -o json`,
`section crds kubectl get crd ${crds.map(shellQuote).join(" ")} -o json`,
`section operator kubectl get deploy -n ${shellQuote(G14_OBSERVABILITY_NAMESPACE)} prometheus-operator -o json`,
`section operatorPods kubectl get pods -n ${shellQuote(G14_OBSERVABILITY_NAMESPACE)} -l app.kubernetes.io/name=prometheus-operator -o json`,
`section prometheus kubectl get prometheus -n ${shellQuote(G14_OBSERVABILITY_NAMESPACE)} ${shellQuote(G14_PROMETHEUS_NAME)} -o json`,
`section prometheusPods kubectl get pods -n ${shellQuote(G14_OBSERVABILITY_NAMESPACE)} -l prometheus=${shellQuote(G14_PROMETHEUS_NAME)} -o json`,
`section prometheusService kubectl get service -n ${shellQuote(G14_OBSERVABILITY_NAMESPACE)} ${shellQuote(G14_PROMETHEUS_SERVICE)} -o json`,
`section workloadMonitors kubectl get servicemonitor,prometheusrule -n ${shellQuote(V02_RUNTIME_NAMESPACE)} -l hwlab.pikastech.local/monitoring=enabled -o json`,
`section query kubectl get --raw ${shellQuote(queryPath)}`,
].join("\n");
const bundle = g14K3s(["script", "--", script], 120_000);
const sections = parseShellSections(statusText(bundle));
const namespace = parseSectionJson(sections.namespace);
const discoveryNamespace = parseSectionJson(sections.discoveryNamespace);
const crdItems = parseSectionJsonArray(sections.crds);
const crdNames = crdItems.map((item) => String(record(item.metadata).name ?? "")).filter(Boolean);
const operator = parseSectionJson(sections.operator);
const operatorPods = parseSectionJsonArray(sections.operatorPods);
const prometheus = parseSectionJson(sections.prometheus);
const prometheusPods = parseSectionJsonArray(sections.prometheusPods);
const prometheusService = parseSectionJson(sections.prometheusService);
const workloadMonitorItems = parseSectionJsonArray(sections.workloadMonitors);
const query = parseSectionJson(sections.query);
const requiredCrdsPresent = crds.every((name) => crdNames.includes(name));
const namespaceLabel = stringOrNull(record(record(namespace.metadata).labels)["g14.pikastech.local/observability-discovery"]);
const workloadNamespaceLabel = stringOrNull(record(record(discoveryNamespace.metadata).labels)["g14.pikastech.local/observability-discovery"]);
const operatorIsReady = Object.keys(operator).length > 0 && deploymentReady(operator);
const prometheusExists = Object.keys(prometheus).length > 0;
const prometheusIsReady = prometheusExists && (
prometheusReady(prometheus)
|| prometheusPods.some((pod) => {
const statuses = Array.isArray(record(pod.status).containerStatuses) ? record(pod.status).containerStatuses.map((item) => record(item)) : [];
return statuses.length > 0 && statuses.every((status) => status.ready === true);
})
);
const queryOk = sections.query?.exitCode === 0 && query.status === "success";
return {
ok: isCommandSuccess(bundle) && requiredCrdsPresent && operatorIsReady && prometheusExists,
command: "hwlab g14 observability status",
namespace: G14_OBSERVABILITY_NAMESPACE,
mode: "status",
elapsedMs: Date.now() - startedAtMs,
versions: {
prometheusOperator: G14_PROMETHEUS_OPERATOR_VERSION,
prometheus: G14_PROMETHEUS_VERSION,
operatorBundle: G14_PROMETHEUS_OPERATOR_RELEASE_ASSET,
},
discovery: {
namespaceLabel,
workloadNamespace: V02_RUNTIME_NAMESPACE,
workloadNamespaceLabel,
selectorLabel: "hwlab.pikastech.local/monitoring=enabled",
},
crds: {
ok: requiredCrdsPresent,
required: crds,
present: crdNames,
missing: crds.filter((name) => !crdNames.includes(name)),
sectionOk: shellSectionOk(sections.crds),
},
operator: {
ok: operatorIsReady,
deployment: stringOrNull(record(operator.metadata).name),
desiredReplicas: numericValue(record(operator.spec).replicas) ?? 1,
readyReplicas: numericValue(record(operator.status).readyReplicas) ?? 0,
availableReplicas: numericValue(record(operator.status).availableReplicas) ?? 0,
pods: operatorPods.map((pod) => ({
name: stringOrNull(record(pod.metadata).name),
phase: stringOrNull(record(pod.status).phase),
})),
sectionOk: shellSectionOk(sections.operator),
},
prometheus: {
ok: prometheusExists && prometheusIsReady,
name: G14_PROMETHEUS_NAME,
service: G14_PROMETHEUS_SERVICE,
serviceExists: Object.keys(prometheusService).length > 0,
ready: prometheusIsReady,
conditions: Array.isArray(record(prometheus.status).conditions) ? record(prometheus.status).conditions : [],
pods: prometheusPods.map((pod) => ({
name: stringOrNull(record(pod.metadata).name),
phase: stringOrNull(record(pod.status).phase),
ready: Array.isArray(record(pod.status).containerStatuses)
? record(pod.status).containerStatuses.map((item) => record(item)).every((status) => status.ready === true)
: null,
})),
sectionOk: shellSectionOk(sections.prometheus),
},
workloadMonitors: {
ok: shellSectionOk(sections.workloadMonitors),
namespace: V02_RUNTIME_NAMESPACE,
count: workloadMonitorItems.length,
items: workloadMonitorItems.map((item) => ({
kind: item.kind ?? null,
name: stringOrNull(record(item.metadata).name),
})),
stderr: shellSectionOk(sections.workloadMonitors) ? "" : commandErrorSummary(bundle),
},
query: {
ok: queryOk,
promql: "up",
serviceProxyPath: queryPath,
resultType: nested(query, ["data", "resultType"]) ?? null,
resultCount: Array.isArray(nested(query, ["data", "result"])) ? (nested(query, ["data", "result"]) as unknown[]).length : null,
status: query.status ?? null,
sectionOk: shellSectionOk(sections.query),
stderr: shellSectionOk(sections.query) ? "" : commandErrorSummary(bundle),
},
result: compactCommandResult(bundle),
next: requiredCrdsPresent && operatorIsReady && prometheusExists
? { query: 'bun scripts/cli.ts hwlab g14 observability query --promql \'up{namespace="hwlab-v02"}\'' }
: { apply: "bun scripts/cli.ts hwlab g14 observability apply --confirm" },
};
}
function g14ObservabilityApplyScript(options: G14ObservabilityOptions, manifestB64: string): string {
const dryRunArg = options.dryRun ? "--dry-run=server" : "";
const stackDryRunCommand = options.dryRun
? [
"core_stack_path=\"$tmpdir/g14-prometheus-core-stack.json\"",
"node - \"$stack_path\" \"$core_stack_path\" <<'NODE'",
"const fs = require('node:fs');",
"const input = process.argv[2];",
"const output = process.argv[3];",
"const stack = JSON.parse(fs.readFileSync(input, 'utf8'));",
"stack.items = (stack.items || []).filter((item) => item.kind !== 'Prometheus');",
"fs.writeFileSync(output, JSON.stringify(stack));",
"NODE",
"kubectl apply --dry-run=client --validate=false -f \"$core_stack_path\"",
"echo prometheus_cr_dry_run=skipped_until_monitoring_crds_are_installed",
].join("\n")
: `kubectl apply --server-side --force-conflicts --field-manager=${shellQuote(G14_OBSERVABILITY_FIELD_MANAGER)} -f "$stack_path"`;
const preStackWaitCommands = options.dryRun
? "echo observability_wait=skipped_dry_run"
: [
"kubectl wait --for=condition=Established --timeout=120s crd/servicemonitors.monitoring.coreos.com crd/podmonitors.monitoring.coreos.com crd/prometheusrules.monitoring.coreos.com crd/prometheuses.monitoring.coreos.com",
`kubectl -n ${shellQuote(G14_OBSERVABILITY_NAMESPACE)} rollout status deploy/prometheus-operator --timeout=${options.timeoutSeconds}s`,
].join("\n");
const postStackWaitCommands = options.dryRun
? "echo prometheus_wait=skipped_dry_run"
: [
`kubectl -n ${shellQuote(G14_OBSERVABILITY_NAMESPACE)} wait --for=condition=Available --timeout=${options.timeoutSeconds}s prometheus/${G14_PROMETHEUS_NAME} || true`,
`kubectl -n ${shellQuote(G14_OBSERVABILITY_NAMESPACE)} get deploy,pod,svc,prometheus -l app.kubernetes.io/component=observability -o wide || true`,
`kubectl -n ${shellQuote(G14_OBSERVABILITY_NAMESPACE)} get pod -l prometheus=${shellQuote(G14_PROMETHEUS_NAME)} -o wide || true`,
].join("\n");
return [
"set -eu",
`namespace=${shellQuote(G14_OBSERVABILITY_NAMESPACE)}`,
`bundle_url=${shellQuote(G14_PROMETHEUS_OPERATOR_RELEASE_ASSET)}`,
`operator_version=${shellQuote(G14_PROMETHEUS_OPERATOR_VERSION)}`,
`prometheus_version=${shellQuote(G14_PROMETHEUS_VERSION)}`,
`stack_b64=${shellQuote(manifestB64)}`,
"tmpdir=$(mktemp -d /tmp/g14-observability-XXXXXX)",
"cleanup() { rm -rf \"$tmpdir\"; }",
"trap cleanup EXIT",
"bundle_path=\"$tmpdir/operator-bundle.yaml\"",
"operator_path=\"$tmpdir/operator-rendered.yaml\"",
"stack_path=\"$tmpdir/g14-prometheus-stack.json\"",
"printf '%s' \"$stack_b64\" | base64 -d > \"$stack_path\"",
"export HTTP_PROXY=${HTTP_PROXY:-http://127.0.0.1:10808}",
"export HTTPS_PROXY=${HTTPS_PROXY:-http://127.0.0.1:10808}",
"export http_proxy=$HTTP_PROXY",
"export https_proxy=$HTTPS_PROXY",
"export NO_PROXY=${NO_PROXY:-localhost,127.0.0.1,::1,10.0.0.0/8,10.42.0.0/16,10.43.0.0/16,.svc,.svc.cluster.local,.cluster.local,kubernetes,kubernetes.default,kubernetes.default.svc}",
"export no_proxy=$NO_PROXY",
"curl -fsSL --connect-timeout 20 --retry 3 --retry-delay 2 -o \"$bundle_path\" \"$bundle_url\"",
"cat > \"$tmpdir/kustomization.yaml\" <<'YAML'",
"apiVersion: kustomize.config.k8s.io/v1beta1",
"kind: Kustomization",
`namespace: ${G14_OBSERVABILITY_NAMESPACE}`,
"resources:",
"- operator-bundle.yaml",
"YAML",
"kubectl kustomize \"$tmpdir\" > \"$operator_path\"",
"grep -q 'namespace: devops-infra' \"$operator_path\"",
`kubectl create namespace ${shellQuote(G14_OBSERVABILITY_NAMESPACE)} --dry-run=client -o yaml | kubectl apply --server-side --force-conflicts --field-manager=${shellQuote(G14_OBSERVABILITY_FIELD_MANAGER)} ${dryRunArg} -f -`,
`kubectl apply --server-side --force-conflicts --field-manager=${shellQuote(G14_OBSERVABILITY_FIELD_MANAGER)} ${dryRunArg} -f "$operator_path"`,
preStackWaitCommands,
stackDryRunCommand,
postStackWaitCommands,
`printf 'observability_apply=ok namespace=%s operator=%s prometheus=%s dryRun=%s\\n' "$namespace" "$operator_version" "$prometheus_version" ${shellQuote(String(options.dryRun))}`,
].join("\n");
}
function runG14ObservabilityApply(options: G14ObservabilityOptions): Record<string, unknown> {
const startedAtMs = Date.now();
const manifest = g14PrometheusManifest();
const manifestB64 = Buffer.from(JSON.stringify(manifest), "utf8").toString("base64");
const script = g14ObservabilityApplyScript(options, manifestB64);
const result = g14K3s(["script", "--", script], options.timeoutSeconds * 1000 + 90_000);
const ok = isCommandSuccess(result);
return {
ok,
command: "hwlab g14 observability apply",
mode: options.dryRun ? "dry-run" : "confirmed-apply",
namespace: G14_OBSERVABILITY_NAMESPACE,
versions: {
prometheusOperator: G14_PROMETHEUS_OPERATOR_VERSION,
prometheus: G14_PROMETHEUS_VERSION,
operatorBundle: G14_PROMETHEUS_OPERATOR_RELEASE_ASSET,
},
manifest: options.dryRun ? manifest : undefined,
elapsedMs: Date.now() - startedAtMs,
result: compactCommandResult(result),
status: ok && !options.dryRun ? g14ObservabilityStatus() : undefined,
next: options.dryRun
? { apply: "bun scripts/cli.ts hwlab g14 observability apply --confirm" }
: { status: "bun scripts/cli.ts hwlab g14 observability status", query: 'bun scripts/cli.ts hwlab g14 observability query --promql \'up{namespace="hwlab-v02"}\'' },
};
}
function runG14ObservabilityQuery(options: G14ObservabilityOptions): Record<string, unknown> {
const serviceProxyPath = `/api/v1/namespaces/${G14_OBSERVABILITY_NAMESPACE}/services/http:${G14_PROMETHEUS_SERVICE}:9090/proxy/api/v1/query?query=${encodeURIComponent(options.query)}`;
const result = g14K3s(["kubectl", "get", "--raw", serviceProxyPath], options.timeoutSeconds * 1000);
const parsed = (() => {
try {
return record(JSON.parse(statusText(result)) as unknown);
} catch {
return {};
}
})();
return {
ok: isCommandSuccess(result) && parsed.status === "success",
command: "hwlab g14 observability query",
namespace: G14_OBSERVABILITY_NAMESPACE,
service: G14_PROMETHEUS_SERVICE,
promql: options.query,
serviceProxyPath,
status: parsed.status ?? null,
resultType: nested(parsed, ["data", "resultType"]) ?? null,
resultCount: Array.isArray(nested(parsed, ["data", "result"])) ? (nested(parsed, ["data", "result"]) as unknown[]).length : null,
data: parsed.data ?? null,
raw: Object.keys(parsed).length === 0 ? tailText(statusText(result), 4000) : undefined,
commandResult: compactCommandResult(result),
};
}
function runG14Observability(options: G14ObservabilityOptions): Record<string, unknown> {
if (options.action === "status") return g14ObservabilityStatus();
if (options.action === "query") return runG14ObservabilityQuery(options);
return runG14ObservabilityApply(options);
}
function startAsyncHwlabG14Job(name: string, command: string[], note: string): Record<string, unknown> {
const job = startJob(name, command, note);
const statusCommand = `bun scripts/cli.ts job status ${job.id} --tail-bytes 12000`;
@@ -5658,11 +6163,15 @@ export function hwlabG14Help(): Record<string, unknown> {
"bun scripts/cli.ts hwlab g14 git-mirror flush --confirm",
"bun scripts/cli.ts hwlab g14 git-mirror sync --confirm --wait",
"bun scripts/cli.ts hwlab g14 git-mirror flush --confirm --wait",
"bun scripts/cli.ts hwlab g14 observability status",
"bun scripts/cli.ts hwlab g14 observability apply --dry-run",
"bun scripts/cli.ts hwlab g14 observability apply --confirm",
"bun scripts/cli.ts hwlab g14 observability query --promql 'up{namespace=\"hwlab-v02\"}'",
"bun scripts/cli.ts hwlab g14 tools-image status --name ci-node-tools --tag node22-alpine-bun-v1",
"bun scripts/cli.ts hwlab g14 tools-image build --name ci-node-tools --tag node22-alpine-bun-v1 --confirm",
"bun scripts/cli.ts job status <jobId> --tail-bytes 30000",
],
description: "G14 HWLAB PR monitor, DEV rollout command, bounded v0.2 control-plane bootstrap/cleanup/runtime-migration helper, v0.2 runtime SecretRef bootstrap, devops-infra git mirror maintenance, and controlled CI tools image build/status entry. The public monitor starts a fire-and-forget job. Default monitor lane is base=G14; --lane v02 monitors base=v0.2 PRs, waits for GitHub preflight/CI readiness, automatically merges ready PRs without waiting for other active v0.2 PipelineRuns, triggers v0.2 CD with latest-only GitOps writeback, flushes the git mirror when needed, and posts deduplicated PR comments for pending, blocked/conflict, success, superseded, failure, or timeout states. confirmed control-plane trigger-current and git-mirror sync/flush also return async jobs by default, with --wait reserved for explicit synchronous debugging. control-plane status/closeout/apply/cleanup-runs/cleanup-released-pvs/runtime-migration uses UniDesk G14:k3s routes for v0.2 Tekton/Argo control resources, runtime migration, historical PipelineRun/source-commit closeout verdicts, GitOps mirror flush state, and completed CI workspace retention only. secret status/ensure is the standard v0.2 runtime SecretRef bootstrap path; it never reads or prints secret values. git-mirror status/apply/sync/flush is the manual devops-infra mirror/relay control path and does not install a CronJob.",
description: "G14 HWLAB PR monitor, DEV rollout command, bounded v0.2 control-plane bootstrap/cleanup/runtime-migration helper, v0.2 runtime SecretRef bootstrap, devops-infra git mirror and observability maintenance, and controlled CI tools image build/status entry. The public monitor starts a fire-and-forget job. Default monitor lane is base=G14; --lane v02 monitors base=v0.2 PRs, waits for GitHub preflight/CI readiness, automatically merges ready PRs without waiting for other active v0.2 PipelineRuns, triggers v0.2 CD with latest-only GitOps writeback, flushes the git mirror when needed, and posts deduplicated PR comments for pending, blocked/conflict, success, superseded, failure, or timeout states. confirmed control-plane trigger-current and git-mirror sync/flush also return async jobs by default, with --wait reserved for explicit synchronous debugging. control-plane status/closeout/apply/cleanup-runs/cleanup-released-pvs/runtime-migration uses UniDesk G14:k3s routes for v0.2 Tekton/Argo control resources, runtime migration, historical PipelineRun/source-commit closeout verdicts, GitOps mirror flush state, and completed CI workspace retention only. secret status/ensure is the standard v0.2 runtime SecretRef bootstrap path; it never reads or prints secret values. git-mirror status/apply/sync/flush is the manual devops-infra mirror/relay control path and does not install a CronJob. observability status/apply/query owns the shared Prometheus Operator and Prometheus instance in devops-infra, while HWLAB lane manifests own only ServiceMonitor and PrometheusRule objects.",
defaults: {
repo: HWLAB_REPO,
base: G14_SOURCE_BRANCH,
@@ -5675,6 +6184,9 @@ export function hwlabG14Help(): Record<string, unknown> {
devApplication: DEV_APP,
v02Application: V02_APP,
briefIndexIssue: G14_BRIEF_INDEX_ISSUE,
observabilityNamespace: G14_OBSERVABILITY_NAMESPACE,
prometheusOperatorVersion: G14_PROMETHEUS_OPERATOR_VERSION,
prometheusVersion: G14_PROMETHEUS_VERSION,
},
stateFiles: {
monitor: ".state/hwlab-g14/latest-monitor-job.json",
@@ -5780,8 +6292,12 @@ export async function runHwlabG14Command(_config: Config, args: string[]): Promi
}
return runG14GitMirror(options);
}
if (action === "observability") {
const options = parseObservabilityOptions(args.slice(1));
return runG14Observability(options);
}
if (action !== "monitor-prs") {
return { ok: false, command: `hwlab g14 ${action ?? ""}`.trim(), degradedReason: "unsupported-command", message: "supported commands: hwlab g14 monitor-prs, hwlab g14 record-rollout, hwlab g14 control-plane, hwlab g14 secret, hwlab g14 git-mirror, hwlab g14 tools-image" };
return { ok: false, command: `hwlab g14 ${action ?? ""}`.trim(), degradedReason: "unsupported-command", message: "supported commands: hwlab g14 monitor-prs, hwlab g14 record-rollout, hwlab g14 control-plane, hwlab g14 secret, hwlab g14 git-mirror, hwlab g14 observability, hwlab g14 tools-image" };
}
const options = parseOptions(args.slice(1));
if (options.worker) return runMonitorWorker(options);