feat: add Workbench observability rule summaries
This commit is contained in:
@@ -169,6 +169,7 @@ lanes:
|
||||
workbench:
|
||||
enabled: true
|
||||
summaryPath: /v1/web-performance/summary
|
||||
lowSampleThreshold: 5
|
||||
metricPrefixes:
|
||||
- hwlab_workbench_
|
||||
- hwlab_webui_
|
||||
@@ -180,6 +181,68 @@ lanes:
|
||||
backendLabelDenylist:
|
||||
- unknown
|
||||
maxUnknownEventLines: 0
|
||||
recordingRules:
|
||||
- id: workbench_submit_first_visible_p95
|
||||
metric: hwlab:workbench_submit_first_visible:p95_seconds
|
||||
sourceMetric: hwlab_workbench_journey_duration_seconds
|
||||
quantile: 0.95
|
||||
window: 5m
|
||||
minSamples: 5
|
||||
groupBy: [namespace, gitops_target, journey, route, backend, transport, entry, outcome]
|
||||
matchLabels:
|
||||
journey: submit_to_first_visible
|
||||
- id: workbench_backend_event_visible_p95
|
||||
metric: hwlab:workbench_backend_event_visible:p95_seconds
|
||||
sourceMetric: hwlab_workbench_backend_event_visible_latency_seconds
|
||||
quantile: 0.95
|
||||
window: 5m
|
||||
minSamples: 5
|
||||
groupBy: [namespace, gitops_target, event_type, backend, transport, outcome]
|
||||
- id: workbench_session_switch_p95
|
||||
metric: hwlab:workbench_session_switch:p95_seconds
|
||||
sourceMetric: hwlab_workbench_journey_duration_seconds
|
||||
quantile: 0.95
|
||||
window: 5m
|
||||
minSamples: 5
|
||||
groupBy: [namespace, gitops_target, journey, route, target_state, cache, source, outcome]
|
||||
matchLabels:
|
||||
journey: session_switch_first_visible|session_switch_full_load
|
||||
- id: workbench_open_p95
|
||||
metric: hwlab:workbench_open:p95_seconds
|
||||
sourceMetric: hwlab_workbench_journey_duration_seconds
|
||||
quantile: 0.95
|
||||
window: 5m
|
||||
minSamples: 5
|
||||
groupBy: [namespace, gitops_target, journey, route, cache, auth_state, outcome]
|
||||
matchLabels:
|
||||
journey: workbench_open_first_visible|workbench_open_full_load
|
||||
warningAlerts:
|
||||
- id: HWLABWorkbenchSubmitFirstVisibleSlow
|
||||
ruleId: workbench_submit_first_visible_p95
|
||||
severity: warning
|
||||
thresholdSeconds: 15
|
||||
minSamples: 5
|
||||
for: 10m
|
||||
matchLabels:
|
||||
journey: submit_to_first_visible
|
||||
- id: HWLABWorkbenchBackendEventVisibleSlow
|
||||
ruleId: workbench_backend_event_visible_p95
|
||||
severity: warning
|
||||
thresholdSeconds: 10
|
||||
minSamples: 5
|
||||
for: 10m
|
||||
- id: HWLABWorkbenchSessionSwitchSlow
|
||||
ruleId: workbench_session_switch_p95
|
||||
severity: warning
|
||||
thresholdSeconds: 8
|
||||
minSamples: 5
|
||||
for: 10m
|
||||
- id: HWLABWorkbenchOpenSlow
|
||||
ruleId: workbench_open_p95
|
||||
severity: warning
|
||||
thresholdSeconds: 13
|
||||
minSamples: 5
|
||||
for: 10m
|
||||
runtimeImageRewrites:
|
||||
- source: fatedier/frpc:v0.68.1
|
||||
target: 127.0.0.1:5000/hwlab/frpc:v0.68.1
|
||||
|
||||
@@ -87,6 +87,8 @@ export interface HwlabRuntimeObservabilitySpec {
|
||||
readonly prometheusOperator: boolean;
|
||||
readonly metricsEndpoint?: HwlabRuntimeObservabilityMetricsEndpointSpec;
|
||||
readonly workbench?: HwlabRuntimeObservabilityWorkbenchSpec;
|
||||
readonly recordingRules: readonly HwlabRuntimeObservabilityRecordingRuleSpec[];
|
||||
readonly warningAlerts: readonly HwlabRuntimeObservabilityWarningAlertSpec[];
|
||||
}
|
||||
|
||||
export interface HwlabRuntimeObservabilityMetricsEndpointSpec {
|
||||
@@ -102,12 +104,34 @@ export interface HwlabRuntimeObservabilityMetricsEndpointSpec {
|
||||
export interface HwlabRuntimeObservabilityWorkbenchSpec {
|
||||
readonly enabled: boolean;
|
||||
readonly summaryPath: string;
|
||||
readonly lowSampleThreshold: number;
|
||||
readonly metricPrefixes: readonly string[];
|
||||
readonly requiredSeries: readonly string[];
|
||||
readonly backendLabelDenylist: readonly string[];
|
||||
readonly maxUnknownEventLines: number;
|
||||
}
|
||||
|
||||
export interface HwlabRuntimeObservabilityRecordingRuleSpec {
|
||||
readonly id: string;
|
||||
readonly metric: string;
|
||||
readonly sourceMetric: string;
|
||||
readonly quantile: number;
|
||||
readonly window: string;
|
||||
readonly minSamples: number;
|
||||
readonly groupBy: readonly string[];
|
||||
readonly matchLabels: Record<string, string>;
|
||||
}
|
||||
|
||||
export interface HwlabRuntimeObservabilityWarningAlertSpec {
|
||||
readonly id: string;
|
||||
readonly ruleId: string;
|
||||
readonly severity: "warning";
|
||||
readonly thresholdSeconds: number;
|
||||
readonly minSamples: number;
|
||||
readonly for: string;
|
||||
readonly matchLabels: Record<string, string>;
|
||||
}
|
||||
|
||||
export interface HwlabRuntimeImageRewriteSpec {
|
||||
readonly source: string;
|
||||
readonly target: string;
|
||||
@@ -295,6 +319,12 @@ function nonNegativeIntegerField(obj: Record<string, unknown>, key: string, path
|
||||
return value;
|
||||
}
|
||||
|
||||
function positiveNumberField(obj: Record<string, unknown>, key: string, path: string): number {
|
||||
const value = obj[key];
|
||||
if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) throw new Error(`${path}.${key} must be a positive number`);
|
||||
return value;
|
||||
}
|
||||
|
||||
function sortedRecordEntries(value: unknown, path: string): Array<[string, Record<string, unknown>]> {
|
||||
return Object.entries(asRecord(value, path)).map(([key, item]) => [key, asRecord(item, `${path}.${key}`)]);
|
||||
}
|
||||
@@ -584,10 +614,18 @@ function publicExposureConfig(value: unknown, path: string): HwlabRuntimePublicE
|
||||
|
||||
function observabilityConfig(value: unknown, path: string): HwlabRuntimeObservabilitySpec {
|
||||
const raw = asRecord(value, path);
|
||||
const recordingRules = observabilityRecordingRulesConfig(raw.recordingRules, `${path}.recordingRules`);
|
||||
const warningAlerts = observabilityWarningAlertsConfig(raw.warningAlerts, `${path}.warningAlerts`);
|
||||
const recordingRuleIds = new Set(recordingRules.map((rule) => rule.id));
|
||||
for (const alert of warningAlerts) {
|
||||
if (!recordingRuleIds.has(alert.ruleId)) throw new Error(`${path}.warningAlerts.${alert.id}.ruleId must reference a recordingRules id`);
|
||||
}
|
||||
return {
|
||||
prometheusOperator: booleanField(raw, "prometheusOperator", path),
|
||||
metricsEndpoint: observabilityMetricsEndpointConfig(raw.metricsEndpoint, `${path}.metricsEndpoint`),
|
||||
workbench: observabilityWorkbenchConfig(raw.workbench, `${path}.workbench`),
|
||||
recordingRules,
|
||||
warningAlerts,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -617,6 +655,7 @@ function observabilityWorkbenchConfig(value: unknown, path: string): HwlabRuntim
|
||||
return {
|
||||
enabled: booleanField(raw, "enabled", path),
|
||||
summaryPath: stringField(raw, "summaryPath", path),
|
||||
lowSampleThreshold: numberField(raw, "lowSampleThreshold", path),
|
||||
metricPrefixes: stringArrayField(raw, "metricPrefixes", path),
|
||||
requiredSeries: stringArrayField(raw, "requiredSeries", path),
|
||||
backendLabelDenylist: stringArrayField(raw, "backendLabelDenylist", path),
|
||||
@@ -624,6 +663,47 @@ function observabilityWorkbenchConfig(value: unknown, path: string): HwlabRuntim
|
||||
};
|
||||
}
|
||||
|
||||
function observabilityRecordingRulesConfig(value: unknown, path: string): HwlabRuntimeObservabilityRecordingRuleSpec[] {
|
||||
if (value === undefined) return [];
|
||||
if (!Array.isArray(value)) throw new Error(`${path} must be an array`);
|
||||
return value.map((item, index) => {
|
||||
const itemPath = `${path}[${index}]`;
|
||||
const raw = asRecord(item, itemPath);
|
||||
const quantile = positiveNumberField(raw, "quantile", itemPath);
|
||||
if (quantile >= 1) throw new Error(`${itemPath}.quantile must be less than 1`);
|
||||
return {
|
||||
id: stringField(raw, "id", itemPath),
|
||||
metric: stringField(raw, "metric", itemPath),
|
||||
sourceMetric: stringField(raw, "sourceMetric", itemPath),
|
||||
quantile,
|
||||
window: stringField(raw, "window", itemPath),
|
||||
minSamples: numberField(raw, "minSamples", itemPath),
|
||||
groupBy: stringArrayField(raw, "groupBy", itemPath),
|
||||
matchLabels: optionalStringRecord(raw.matchLabels, `${itemPath}.matchLabels`),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function observabilityWarningAlertsConfig(value: unknown, path: string): HwlabRuntimeObservabilityWarningAlertSpec[] {
|
||||
if (value === undefined) return [];
|
||||
if (!Array.isArray(value)) throw new Error(`${path} must be an array`);
|
||||
return value.map((item, index) => {
|
||||
const itemPath = `${path}[${index}]`;
|
||||
const raw = asRecord(item, itemPath);
|
||||
const severity = stringField(raw, "severity", itemPath);
|
||||
if (severity !== "warning") throw new Error(`${itemPath}.severity must be warning`);
|
||||
return {
|
||||
id: stringField(raw, "id", itemPath),
|
||||
ruleId: stringField(raw, "ruleId", itemPath),
|
||||
severity,
|
||||
thresholdSeconds: positiveNumberField(raw, "thresholdSeconds", itemPath),
|
||||
minSamples: numberField(raw, "minSamples", itemPath),
|
||||
for: stringField(raw, "for", itemPath),
|
||||
matchLabels: optionalStringRecord(raw.matchLabels, `${itemPath}.matchLabels`),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function runtimeImageRewritesConfig(value: unknown, path: string): HwlabRuntimeImageRewriteSpec[] {
|
||||
if (value === undefined) return [];
|
||||
if (!Array.isArray(value)) throw new Error(`${path} must be an array`);
|
||||
|
||||
+118
-1
@@ -9,7 +9,7 @@ import { startJob } from "./jobs";
|
||||
import { classifySshTcpPoolFailure } from "./ssh";
|
||||
import { runHwlabG14Command } from "./hwlab-g14";
|
||||
import { HWLAB_NODE_CONTROL_PLANE_CONFIG_PATH, hwlabNodeControlPlaneInfraHelp, runHwlabNodeControlPlaneInfra } from "./hwlab-node-control-plane";
|
||||
import { hwlabRuntimeLaneConfigPath, hwlabRuntimeLaneSpec, hwlabRuntimeLaneSpecForNode, isHwlabRuntimeLane, type HwlabRuntimeLane, type HwlabRuntimeLaneSpec, type HwlabRuntimeObservabilitySpec, type HwlabRuntimePublicExposureSpec } from "./hwlab-node-lanes";
|
||||
import { hwlabRuntimeLaneConfigPath, hwlabRuntimeLaneSpec, hwlabRuntimeLaneSpecForNode, isHwlabRuntimeLane, type HwlabRuntimeLane, type HwlabRuntimeLaneSpec, type HwlabRuntimeObservabilityRecordingRuleSpec, type HwlabRuntimeObservabilitySpec, type HwlabRuntimeObservabilityWarningAlertSpec, type HwlabRuntimePublicExposureSpec } from "./hwlab-node-lanes";
|
||||
|
||||
type SecretAction = "status" | "ensure" | "cleanup-owned-postgres" | "cleanup-obsolete";
|
||||
type SecretPreset = "openfga" | "master-server-admin-api-key" | "bootstrap-admin" | "code-agent-provider" | "cloud-api-db" | "owned-postgres-cleanup" | "obsolete-secret-cleanup";
|
||||
@@ -697,6 +697,8 @@ function nodeObservabilityWorkbenchSummary(options: NodeObservabilityOptions, ex
|
||||
podName,
|
||||
containerName: serviceStatus.containerName,
|
||||
},
|
||||
recordingRules: nodeObservabilityRecordingRuleSummaries(options.spec.observability),
|
||||
warningAlerts: nodeObservabilityWarningAlertSummaries(options.spec.observability),
|
||||
metrics,
|
||||
probe: metricsProbe === null ? null : {
|
||||
ok: metricsProbe.ok,
|
||||
@@ -719,6 +721,8 @@ function summarizeNodeObservabilityStatus(status: Record<string, unknown>): Reco
|
||||
const publicRawMetrics = record(status.publicRawMetrics);
|
||||
const workbenchSummary = record(status.workbenchSummary);
|
||||
const metrics = record(workbenchSummary.metrics);
|
||||
const recordingRules = Array.isArray(workbenchSummary.recordingRules) ? workbenchSummary.recordingRules : [];
|
||||
const warningAlerts = Array.isArray(workbenchSummary.warningAlerts) ? workbenchSummary.warningAlerts : [];
|
||||
return {
|
||||
ok: status.ok === true,
|
||||
command: status.command,
|
||||
@@ -744,8 +748,11 @@ function summarizeNodeObservabilityStatus(status: Record<string, unknown>): Reco
|
||||
ok: workbenchSummary.ok === true,
|
||||
httpStatus: metrics.httpStatus ?? null,
|
||||
bodyBytes: metrics.bodyBytes ?? null,
|
||||
recordingRuleCount: recordingRules.length,
|
||||
warningAlertCount: warningAlerts.length,
|
||||
seriesByPrefix: metrics.seriesByPrefix ?? {},
|
||||
missingSeries: metrics.missingSeries ?? [],
|
||||
topSlowDimensions: metrics.topSlowDimensions ?? [],
|
||||
deniedBackendEventLineCount: metrics.deniedBackendEventLineCount ?? null,
|
||||
maxDeniedBackendEventLines: metrics.maxDeniedBackendEventLines ?? null,
|
||||
},
|
||||
@@ -771,6 +778,8 @@ function nodeObservabilityRenderPlan(observability: HwlabRuntimeObservabilitySpe
|
||||
prometheusOperator: observability.prometheusOperator,
|
||||
metricsEndpoint: nodeObservabilityEndpointSummary(observability),
|
||||
workbench: observability.workbench ?? null,
|
||||
recordingRules: nodeObservabilityRecordingRuleSummaries(observability),
|
||||
warningAlerts: nodeObservabilityWarningAlertSummaries(observability),
|
||||
clusterResources: observability.prometheusOperator
|
||||
? {
|
||||
source: "HWLAB GitOps rendered manifests",
|
||||
@@ -785,6 +794,68 @@ function nodeObservabilityRenderPlan(observability: HwlabRuntimeObservabilitySpe
|
||||
};
|
||||
}
|
||||
|
||||
function nodeObservabilityRecordingRuleSummaries(observability: HwlabRuntimeObservabilitySpec): Array<Record<string, unknown>> {
|
||||
return observability.recordingRules.map((rule) => ({
|
||||
...rule,
|
||||
expression: nodeObservabilityRecordingRuleExpression(rule),
|
||||
sampleCountExpression: nodeObservabilitySampleCountExpression(rule, rule.matchLabels),
|
||||
lowSampleGuardExpression: `${nodeObservabilitySampleCountExpression(rule, rule.matchLabels)} >= ${rule.minSamples}`,
|
||||
}));
|
||||
}
|
||||
|
||||
function nodeObservabilityWarningAlertSummaries(observability: HwlabRuntimeObservabilitySpec): Array<Record<string, unknown>> {
|
||||
const recordingRules = new Map(observability.recordingRules.map((rule) => [rule.id, rule]));
|
||||
return observability.warningAlerts.map((alert) => {
|
||||
const rule = recordingRules.get(alert.ruleId);
|
||||
return {
|
||||
...alert,
|
||||
expression: rule === undefined ? null : nodeObservabilityWarningAlertExpression(rule, alert),
|
||||
recordingRule: rule === undefined ? null : rule.metric,
|
||||
lowSampleGuardExpression: rule === undefined ? null : `${nodeObservabilitySampleCountExpression(rule, { ...rule.matchLabels, ...alert.matchLabels })} >= ${alert.minSamples}`,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function nodeObservabilityRecordingRuleExpression(rule: HwlabRuntimeObservabilityRecordingRuleSpec): string {
|
||||
const bucketGroupBy = uniqueStrings([...rule.groupBy, "le"]);
|
||||
return `histogram_quantile(${formatPrometheusNumber(rule.quantile)}, sum by (${bucketGroupBy.join(", ")}) (rate(${prometheusMetricSelector(`${rule.sourceMetric}_bucket`, rule.matchLabels)}[${rule.window}])))`;
|
||||
}
|
||||
|
||||
function nodeObservabilitySampleCountExpression(rule: HwlabRuntimeObservabilityRecordingRuleSpec, matchLabels: Record<string, string>): string {
|
||||
const groupBy = uniqueStrings([...rule.groupBy]);
|
||||
return `sum by (${groupBy.join(", ")}) (increase(${prometheusMetricSelector(`${rule.sourceMetric}_count`, matchLabels)}[${rule.window}]))`;
|
||||
}
|
||||
|
||||
function nodeObservabilityWarningAlertExpression(rule: HwlabRuntimeObservabilityRecordingRuleSpec, alert: HwlabRuntimeObservabilityWarningAlertSpec): string {
|
||||
const sampleLabels = { ...rule.matchLabels, ...alert.matchLabels };
|
||||
const sampleGuard = `${nodeObservabilitySampleCountExpression(rule, sampleLabels)} >= ${alert.minSamples}`;
|
||||
const vectorMatch = rule.groupBy.length === 0 ? "" : ` on (${uniqueStrings([...rule.groupBy]).join(", ")})`;
|
||||
return `(${prometheusMetricSelector(rule.metric, alert.matchLabels)} > ${formatPrometheusNumber(alert.thresholdSeconds)}) and${vectorMatch} (${sampleGuard})`;
|
||||
}
|
||||
|
||||
function prometheusMetricSelector(metric: string, labels: Record<string, string>): string {
|
||||
const selector = prometheusLabelSelector(labels);
|
||||
return selector.length === 0 ? metric : `${metric}${selector}`;
|
||||
}
|
||||
|
||||
function prometheusLabelSelector(labels: Record<string, string>): string {
|
||||
const entries = Object.entries(labels);
|
||||
if (entries.length === 0) return "";
|
||||
return `{${entries.map(([key, value]) => `${key}${value.includes("|") ? "=~" : "="}"${escapePrometheusLabelValue(value)}"`).join(",")}}`;
|
||||
}
|
||||
|
||||
function escapePrometheusLabelValue(value: string): string {
|
||||
return value.replace(/\\/gu, "\\\\").replace(/"/gu, "\\\"");
|
||||
}
|
||||
|
||||
function formatPrometheusNumber(value: number): string {
|
||||
return Number.isInteger(value) ? String(value) : String(value);
|
||||
}
|
||||
|
||||
function uniqueStrings(values: readonly string[]): string[] {
|
||||
return [...new Set(values)];
|
||||
}
|
||||
|
||||
function nodeObservabilityApplyPlan(observability: HwlabRuntimeObservabilitySpec): Record<string, unknown> {
|
||||
const endpoint = observability.metricsEndpoint;
|
||||
if (endpoint === undefined) {
|
||||
@@ -903,6 +974,12 @@ function nodeObservabilityMetricsProbe(options: NodeObservabilityOptions, podNam
|
||||
requiredSeries: workbench?.requiredSeries ?? [],
|
||||
backendLabelDenylist: workbench?.backendLabelDenylist ?? [],
|
||||
maxDeniedBackendEventLines: workbench?.maxUnknownEventLines ?? 0,
|
||||
lowSampleThreshold: workbench?.lowSampleThreshold ?? 0,
|
||||
histogramMetrics: [
|
||||
{ id: "workbench_journey", metric: "hwlab_workbench_journey_duration_seconds", kind: "workbench_journey", dimensionLabels: ["namespace", "gitops_target", "journey", "route", "backend", "transport", "target_state", "cache", "source", "entry", "outcome"] },
|
||||
{ id: "workbench_event_phase", metric: "hwlab_workbench_event_phase_duration_seconds", kind: "workbench_event_phase", dimensionLabels: ["namespace", "gitops_target", "phase", "event_type", "backend", "transport", "outcome"] },
|
||||
{ id: "workbench_backend_event_visible", metric: "hwlab_workbench_backend_event_visible_latency_seconds", kind: "workbench_backend_event_visible", dimensionLabels: ["namespace", "gitops_target", "event_type", "backend", "transport", "outcome"] },
|
||||
],
|
||||
}), "utf8").toString("base64");
|
||||
const source = [
|
||||
"const http = require('node:http');",
|
||||
@@ -914,6 +991,42 @@ function nodeObservabilityMetricsProbe(options: NodeObservabilityOptions, podNam
|
||||
"function lineHasLabel(line, name, value) { return line.includes(`${name}=\\\"${String(value).replace(/\\\\/g, '\\\\\\\\').replace(/\\\"/g, '\\\\\\\"')}\\\"`); }",
|
||||
"function isBackendEventMetric(name) { return name.startsWith('hwlab_workbench_event_') || name.startsWith('hwlab_workbench_backend_event_'); }",
|
||||
"function compactLines(lines) { return lines.slice(0, 16).map((line) => line.length > 320 ? `${line.slice(0, 317)}...` : line); }",
|
||||
"function parseLabels(raw) { const labels = {}; for (const part of String(raw || '').split(',')) { const index = part.indexOf('='); if (index <= 0) continue; const key = part.slice(0, index); let value = part.slice(index + 1); if (value.startsWith('\\\"') && value.endsWith('\\\"')) value = value.slice(1, -1); labels[key] = value.replace(/\\\\\\\"/g, '\\\"').replace(/\\\\\\\\/g, '\\\\'); } return labels; }",
|
||||
"function labelKey(metric, labels) { const copy = { ...labels }; delete copy.le; return `${metric}|${Object.keys(copy).sort().map((key) => `${key}=${copy[key]}`).join('|')}`; }",
|
||||
"function pickLabels(labels, names) { return Object.fromEntries(names.map((name) => [name, labels[name] || 'unknown'])); }",
|
||||
"function quantile(row, q) { const count = Number(row.count || 0); if (count <= 0) return 0; const rank = Math.max(1, Math.ceil(count * q)); const buckets = row.buckets.filter((item) => item.le !== '+Inf').sort((left, right) => Number(left.le) - Number(right.le)); for (const bucket of buckets) { if (bucket.value >= rank) return Number(bucket.le); } return buckets.length ? Number(buckets[buckets.length - 1].le) : 0; }",
|
||||
"function summarizeHistograms(sampleLines) {",
|
||||
" const metricConfig = new Map(config.histogramMetrics.map((item) => [item.metric, item]));",
|
||||
" const series = new Map();",
|
||||
" for (const line of sampleLines) {",
|
||||
" const match = /^([A-Za-z_:][A-Za-z0-9_:]*?)_(bucket|sum|count)\\{([^}]*)\\}\\s+([0-9.eE+-]+)/.exec(line);",
|
||||
" if (!match || !metricConfig.has(match[1])) continue;",
|
||||
" const metric = match[1];",
|
||||
" const suffix = match[2];",
|
||||
" const labels = parseLabels(match[3]);",
|
||||
" const value = Number(match[4]);",
|
||||
" if (!Number.isFinite(value)) continue;",
|
||||
" const key = labelKey(metric, labels);",
|
||||
" const row = series.get(key) || { metric, labels: { ...labels }, buckets: [], sum: 0, count: 0 };",
|
||||
" if (suffix === 'bucket') row.buckets.push({ le: labels.le || '+Inf', value });",
|
||||
" if (suffix === 'sum') row.sum = value;",
|
||||
" if (suffix === 'count') row.count = value;",
|
||||
" series.set(key, row);",
|
||||
" }",
|
||||
" const lowSampleThreshold = Number(config.lowSampleThreshold || 0);",
|
||||
" const groups = Object.fromEntries(config.histogramMetrics.map((item) => [item.id, []]));",
|
||||
" const rows = [];",
|
||||
" for (const row of series.values()) {",
|
||||
" const cfg = metricConfig.get(row.metric);",
|
||||
" const sampleCount = Number(row.count || 0);",
|
||||
" const output = { kind: cfg.kind, metric: row.metric, sampleCount, average: sampleCount > 0 ? Number((row.sum / sampleCount).toFixed(4)) : 0, p50: quantile(row, 0.5), p75: quantile(row, 0.75), p95: quantile(row, 0.95), lowSample: sampleCount > 0 && sampleCount < lowSampleThreshold, sampleState: sampleCount <= 0 ? 'empty' : sampleCount < lowSampleThreshold ? 'low-sample' : 'ok', dimensions: pickLabels(row.labels, cfg.dimensionLabels) };",
|
||||
" groups[cfg.id].push(output);",
|
||||
" rows.push(output);",
|
||||
" }",
|
||||
" for (const key of Object.keys(groups)) groups[key].sort((left, right) => right.p95 - left.p95 || right.sampleCount - left.sampleCount);",
|
||||
" rows.sort((left, right) => right.p95 - left.p95 || right.sampleCount - left.sampleCount);",
|
||||
" return { groups: Object.fromEntries(Object.entries(groups).map(([key, value]) => [key, value.slice(0, 24)])), topSlowDimensions: rows.slice(0, 12) };",
|
||||
"}",
|
||||
"function summarize(text, statusCode) {",
|
||||
" const lines = text.split(/\\r?\\n/).map((line) => line.trim()).filter(Boolean);",
|
||||
" const sampleLines = lines.filter((line) => !line.startsWith('#'));",
|
||||
@@ -921,6 +1034,7 @@ function nodeObservabilityMetricsProbe(options: NodeObservabilityOptions, podNam
|
||||
" const requiredSeries = config.requiredSeries.map((name) => { const matching = sampleLines.filter((line) => lineMatchesMetric(line, name)); return { name, present: matching.length > 0, sampleCount: matching.length }; });",
|
||||
" const missingSeries = requiredSeries.filter((series) => !series.present).map((series) => series.name);",
|
||||
" const deniedBackendEventLines = sampleLines.filter((line) => { const name = metricNameFromLine(line) || ''; if (!isBackendEventMetric(name)) return false; return config.backendLabelDenylist.some((label) => lineHasLabel(line, 'backend', label)); });",
|
||||
" const histogramSummary = summarizeHistograms(sampleLines);",
|
||||
" const ready = statusCode >= 200 && statusCode < 300 && missingSeries.length === 0 && deniedBackendEventLines.length <= config.maxDeniedBackendEventLines;",
|
||||
" return {",
|
||||
" ready,",
|
||||
@@ -933,6 +1047,9 @@ function nodeObservabilityMetricsProbe(options: NodeObservabilityOptions, podNam
|
||||
" requiredSeries,",
|
||||
" missingSeries,",
|
||||
" backendLabelDenylist: config.backendLabelDenylist,",
|
||||
" lowSampleThreshold: config.lowSampleThreshold,",
|
||||
" workbenchHistograms: histogramSummary.groups,",
|
||||
" topSlowDimensions: histogramSummary.topSlowDimensions,",
|
||||
" deniedBackendEventLineCount: deniedBackendEventLines.length,",
|
||||
" maxDeniedBackendEventLines: config.maxDeniedBackendEventLines,",
|
||||
" deniedBackendEventLines: compactLines(deniedBackendEventLines),",
|
||||
|
||||
Reference in New Issue
Block a user