fix: compact HWLAB runtime pipeline diagnostics
This commit is contained in:
@@ -215,6 +215,10 @@ interface CommandJsonResult {
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
parsed: unknown | null;
|
||||
durationMs?: number;
|
||||
timedOut?: boolean;
|
||||
stdoutBytes?: number;
|
||||
stderrBytes?: number;
|
||||
}
|
||||
|
||||
interface RemoteAsyncCommandSpec {
|
||||
@@ -646,7 +650,9 @@ function shellQuote(value: string): string {
|
||||
}
|
||||
|
||||
function commandJson(command: string[], timeoutMs = 60_000): CommandJsonResult {
|
||||
const startedAtMs = Date.now();
|
||||
const result = runCommand(command, repoRoot, { timeoutMs });
|
||||
const durationMs = Date.now() - startedAtMs;
|
||||
let parsed: unknown | null = null;
|
||||
if (result.stdout.trim().length > 0) {
|
||||
try {
|
||||
@@ -662,11 +668,17 @@ function commandJson(command: string[], timeoutMs = 60_000): CommandJsonResult {
|
||||
stdout: result.stdout,
|
||||
stderr: result.stderr,
|
||||
parsed,
|
||||
durationMs,
|
||||
timedOut: result.timedOut,
|
||||
stdoutBytes: Buffer.byteLength(result.stdout, "utf8"),
|
||||
stderrBytes: Buffer.byteLength(result.stderr, "utf8"),
|
||||
};
|
||||
}
|
||||
|
||||
function commandJsonWithInput(command: string[], input: string, timeoutMs = 60_000): CommandJsonResult {
|
||||
const startedAtMs = Date.now();
|
||||
const result = runCommand(command, repoRoot, { timeoutMs, input });
|
||||
const durationMs = Date.now() - startedAtMs;
|
||||
let parsed: unknown | null = null;
|
||||
if (result.stdout.trim().length > 0) {
|
||||
try {
|
||||
@@ -682,6 +694,10 @@ function commandJsonWithInput(command: string[], input: string, timeoutMs = 60_0
|
||||
stdout: result.stdout,
|
||||
stderr: result.stderr,
|
||||
parsed,
|
||||
durationMs,
|
||||
timedOut: result.timedOut,
|
||||
stdoutBytes: Buffer.byteLength(result.stdout, "utf8"),
|
||||
stderrBytes: Buffer.byteLength(result.stderr, "utf8"),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1439,6 +1455,161 @@ function pipelineRunCompactFromText(
|
||||
};
|
||||
}
|
||||
|
||||
function runtimeLaneTaskRunDiagnosticsScript(): string {
|
||||
return [
|
||||
"set +e",
|
||||
"node <<'NODE'",
|
||||
"const cp = require('node:child_process');",
|
||||
"const namespace = process.env.CI_NAMESPACE || 'hwlab-ci';",
|
||||
"const pipelineRun = process.env.PIPELINE_RUN || '';",
|
||||
"const tailLines = Math.max(20, Math.min(Number(process.env.TASKRUN_LOG_TAIL_LINES || 60), 200));",
|
||||
"const maxLogChars = Math.max(1000, Math.min(Number(process.env.TASKRUN_LOG_MAX_CHARS || 4000), 20000));",
|
||||
"function kubectl(args, timeoutMs, maxBuffer) {",
|
||||
" const out = cp.spawnSync('kubectl', args, { encoding: 'utf8', timeout: timeoutMs, maxBuffer });",
|
||||
" const err = out.error;",
|
||||
" return { exitCode: out.status, stdout: out.stdout || '', stderr: out.stderr || (err ? String(err.message || err) : ''), timedOut: Boolean(err && err.code === 'ETIMEDOUT') };",
|
||||
"}",
|
||||
"function parseJson(text) { try { return JSON.parse(text); } catch { return null; } }",
|
||||
"function oneLine(value, limit) { return String(value || '').replace(/[\\r\\n\\t]+/g, ' ').replace(/\\s+/g, ' ').trim().slice(0, limit); }",
|
||||
"function timestampMs(value) { const parsed = Date.parse(String(value || '')); return Number.isFinite(parsed) ? parsed : null; }",
|
||||
"function durationSeconds(start, end) { const s = timestampMs(start); const e = timestampMs(end); return s === null || e === null || e < s ? null : Math.round((e - s) / 1000); }",
|
||||
"function condition(item) { const conditions = Array.isArray(item && item.status && item.status.conditions) ? item.status.conditions : []; return conditions.find((entry) => entry && entry.type === 'Succeeded') || conditions[0] || {}; }",
|
||||
"function redact(text) {",
|
||||
" return String(text || '')",
|
||||
" .replace(/Bearer\\s+[A-Za-z0-9._~+\\/-]+=*/giu, 'Bearer <redacted>')",
|
||||
" .replace(/((?:api[_-]?key|authorization|token|password|secret|database_url|dsn)\\s*[:=]\\s*)\\S+/giu, '$1<redacted>');",
|
||||
"}",
|
||||
"function trimTail(text) {",
|
||||
" const redacted = redact(text);",
|
||||
" let tail = redacted.split(/\\r?\\n/).slice(-tailLines).join('\\n').trimEnd();",
|
||||
" if (tail.length > maxLogChars) {",
|
||||
" tail = tail.slice(tail.length - maxLogChars);",
|
||||
" const lineBreak = tail.indexOf('\\n');",
|
||||
" if (lineBreak >= 0) tail = tail.slice(lineBreak + 1);",
|
||||
" }",
|
||||
" return tail;",
|
||||
"}",
|
||||
"function stateSummary(state) {",
|
||||
" if (!state || typeof state !== 'object') return { state: null, reason: null, exitCode: null };",
|
||||
" const key = Object.keys(state)[0] || null;",
|
||||
" const value = key ? state[key] || {} : {};",
|
||||
" return { state: key, reason: value.reason || null, exitCode: typeof value.exitCode === 'number' ? value.exitCode : null };",
|
||||
"}",
|
||||
"function podContainerStates(pod) {",
|
||||
" const status = (pod && pod.status) || {};",
|
||||
" const containers = [...(status.initContainerStatuses || []), ...(status.containerStatuses || [])];",
|
||||
" return containers.map((item) => {",
|
||||
" const summary = stateSummary(item.state);",
|
||||
" return { name: item.name || null, ready: item.ready === true, restartCount: item.restartCount || 0, state: summary.state, reason: summary.reason, exitCode: summary.exitCode };",
|
||||
" }).filter((item) => item.state !== 'running' || item.restartCount > 0).slice(0, 12);",
|
||||
"}",
|
||||
"function findPodName(taskRunName) {",
|
||||
" const out = kubectl(['-n', namespace, 'get', 'pod', '-l', 'tekton.dev/taskRun=' + taskRunName, '-o', 'jsonpath={.items[0].metadata.name}'], 10000, 65536);",
|
||||
" return out.exitCode === 0 && out.stdout.trim() ? out.stdout.trim() : null;",
|
||||
"}",
|
||||
"function collectLogTail(taskRun) {",
|
||||
" const podName = taskRun.podName || findPodName(taskRun.name);",
|
||||
" if (!podName) return { taskRun: taskRun.name, pipelineTask: taskRun.pipelineTask, podName: null, ok: false, reason: 'pod-not-found', logTail: '' };",
|
||||
" const podResult = kubectl(['-n', namespace, 'get', 'pod', podName, '-o', 'json'], 10000, 262144);",
|
||||
" const pod = parseJson(podResult.stdout);",
|
||||
" const logResult = kubectl(['-n', namespace, 'logs', podName, '--all-containers=true', '--tail=' + String(tailLines), '--prefix=true'], 20000, Math.max(262144, maxLogChars * 8));",
|
||||
" const raw = logResult.stdout || logResult.stderr;",
|
||||
" return {",
|
||||
" taskRun: taskRun.name,",
|
||||
" pipelineTask: taskRun.pipelineTask,",
|
||||
" podName,",
|
||||
" ok: logResult.exitCode === 0,",
|
||||
" exitCode: logResult.exitCode,",
|
||||
" timedOut: logResult.timedOut,",
|
||||
" containers: podContainerStates(pod),",
|
||||
" logTail: trimTail(raw),",
|
||||
" };",
|
||||
"}",
|
||||
"if (!pipelineRun) {",
|
||||
" console.log(JSON.stringify({ ok: true, pipelineRun: null, namespace, skipped: true, reason: 'pipeline-run-empty', total: 0, items: [], failedTaskRuns: [], activeTaskRuns: [], logTails: [], valuesPrinted: false }));",
|
||||
" process.exit(0);",
|
||||
"}",
|
||||
"const taskRunResult = kubectl(['-n', namespace, 'get', 'taskrun', '-l', 'tekton.dev/pipelineRun=' + pipelineRun, '-o', 'json'], 25000, 2 * 1024 * 1024);",
|
||||
"const taskData = parseJson(taskRunResult.stdout);",
|
||||
"const rawItems = Array.isArray(taskData && taskData.items) ? taskData.items : [];",
|
||||
"const items = rawItems.map((item) => {",
|
||||
" const metadata = item.metadata || {};",
|
||||
" const labels = metadata.labels || {};",
|
||||
" const status = item.status || {};",
|
||||
" const c = condition(item);",
|
||||
" return {",
|
||||
" name: metadata.name || null,",
|
||||
" pipelineTask: labels['tekton.dev/pipelineTask'] || null,",
|
||||
" status: c.status || null,",
|
||||
" reason: c.reason || null,",
|
||||
" message: oneLine(c.message, 360) || null,",
|
||||
" startTime: status.startTime || null,",
|
||||
" completionTime: status.completionTime || null,",
|
||||
" durationSeconds: durationSeconds(status.startTime, status.completionTime),",
|
||||
" podName: status.podName || null,",
|
||||
" };",
|
||||
"}).filter((item) => item.name).sort((left, right) => (timestampMs(left.startTime) || 0) - (timestampMs(right.startTime) || 0) || String(left.name).localeCompare(String(right.name)));",
|
||||
"const failedTaskRuns = items.filter((item) => item.status === 'False' || /fail|error|timeout/i.test(String(item.reason || '')));",
|
||||
"const activeTaskRuns = items.filter((item) => item.status !== 'True' && item.status !== 'False');",
|
||||
"const succeededCount = items.filter((item) => item.status === 'True').length;",
|
||||
"const logTails = failedTaskRuns.slice(0, 3).map(collectLogTail);",
|
||||
"console.log(JSON.stringify({",
|
||||
" ok: taskRunResult.exitCode === 0,",
|
||||
" pipelineRun,",
|
||||
" namespace,",
|
||||
" total: items.length,",
|
||||
" succeededCount,",
|
||||
" failedCount: failedTaskRuns.length,",
|
||||
" activeCount: activeTaskRuns.length,",
|
||||
" failedTaskRuns: failedTaskRuns.slice(0, 8),",
|
||||
" activeTaskRuns: activeTaskRuns.slice(0, 8),",
|
||||
" recentTaskRuns: items.slice(-8),",
|
||||
" logTails,",
|
||||
" query: { taskRunsExitCode: taskRunResult.exitCode, taskRunsTimedOut: taskRunResult.timedOut, taskRunsStderr: oneLine(taskRunResult.stderr, 500) || null },",
|
||||
" bounded: { maxFailedTaskRuns: 8, maxLogTaskRuns: 3, tailLines, maxLogChars, redacted: true },",
|
||||
" valuesPrinted: false,",
|
||||
"}));",
|
||||
"NODE",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function runtimeLaneTaskRunDiagnosticsFromText(
|
||||
pipelineRun: string,
|
||||
section: ShellSection | undefined,
|
||||
stderr: string,
|
||||
): Record<string, unknown> {
|
||||
const raw = String(section?.stdout ?? "").trim();
|
||||
if (raw.length === 0) {
|
||||
return {
|
||||
ok: shellSectionOk(section),
|
||||
pipelineRun,
|
||||
total: 0,
|
||||
failedTaskRuns: [],
|
||||
activeTaskRuns: [],
|
||||
logTails: [],
|
||||
sectionExitCode: section?.exitCode ?? null,
|
||||
stderr: stderr.trim().slice(0, 1000),
|
||||
};
|
||||
}
|
||||
try {
|
||||
const data = record(JSON.parse(raw) as unknown);
|
||||
return {
|
||||
...data,
|
||||
pipelineRun: stringOrNull(data.pipelineRun) ?? pipelineRun,
|
||||
sectionExitCode: section?.exitCode ?? null,
|
||||
};
|
||||
} catch {
|
||||
return {
|
||||
ok: false,
|
||||
pipelineRun,
|
||||
sectionExitCode: section?.exitCode ?? null,
|
||||
raw: raw.slice(0, 2000),
|
||||
stderr: stderr.trim().slice(0, 1000),
|
||||
degradedReason: "taskrun-diagnostics-json-parse-failed",
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function timestampMs(value: unknown): number | null {
|
||||
if (typeof value !== "string" || value.trim().length === 0) return null;
|
||||
const parsed = Date.parse(value);
|
||||
@@ -3917,6 +4088,7 @@ function runtimeLaneControlPlaneStatusBundle(spec: HwlabRuntimeLaneSpec, target:
|
||||
`section controlPlane sh -lc ${shellQuote(controlPlaneProbe)}`,
|
||||
`section argo kubectl get application -n ${shellQuote(ARGO_NAMESPACE)} ${shellQuote(spec.app)} -o 'jsonpath={.spec.source.targetRevision}{"\\n"}{.spec.source.path}{"\\n"}{.status.sync.revision}{"\\n"}{.status.sync.status}{"\\n"}{.status.health.status}{"\\n"}'`,
|
||||
`section pipelineRun kubectl get pipelinerun -n ${shellQuote(CI_NAMESPACE)} "$pipeline_run" -o 'jsonpath={.status.conditions[0].status}{"\\n"}{.status.conditions[0].reason}{"\\n"}{.status.conditions[0].message}{"\\n"}'`,
|
||||
`section taskRunDiagnostics env PIPELINE_RUN="$pipeline_run" CI_NAMESPACE=${shellQuote(CI_NAMESPACE)} TASKRUN_LOG_TAIL_LINES=60 TASKRUN_LOG_MAX_CHARS=4000 sh -lc ${shellQuote(runtimeLaneTaskRunDiagnosticsScript())}`,
|
||||
`section runtimeWorkloads kubectl get deploy,statefulset,svc,ingress,configmap -n ${shellQuote(spec.runtimeNamespace)} -l hwlab.pikastech.local/gitops-target=${shellQuote(spec.lane)} -o name`,
|
||||
`section publicProbes sh -lc ${shellQuote(runtimeLanePublicProbeScript(spec))}`,
|
||||
[
|
||||
@@ -3983,6 +4155,7 @@ function runtimeLaneControlPlaneStatus(spec: HwlabRuntimeLaneSpec, target: V02Co
|
||||
sections.pipelineRun?.exitCode ?? null,
|
||||
bundle.stderr,
|
||||
);
|
||||
const taskRuns = pipelineRun === null ? null : runtimeLaneTaskRunDiagnosticsFromText(pipelineRun, sections.taskRunDiagnostics, bundle.stderr);
|
||||
const runtimeWorkloadNames = String(sections.runtimeWorkloads?.stdout ?? "").split(/\r?\n/u).map((line) => line.trim()).filter(Boolean);
|
||||
const publicProbeFields = keyValueLinesFromText(sections.publicProbes?.stdout ?? "");
|
||||
const publicProbesOk = shellSectionOk(sections.publicProbes)
|
||||
@@ -4047,6 +4220,7 @@ function runtimeLaneControlPlaneStatus(spec: HwlabRuntimeLaneSpec, target: V02Co
|
||||
exitCode: sections.argo?.exitCode ?? null,
|
||||
},
|
||||
pipelineRun: pipelineRunInfo,
|
||||
taskRuns,
|
||||
runtimeWorkloads: {
|
||||
ok: shellSectionOk(sections.runtimeWorkloads),
|
||||
namespace: spec.runtimeNamespace,
|
||||
@@ -4071,6 +4245,10 @@ function runtimeLaneControlPlaneStatus(spec: HwlabRuntimeLaneSpec, target: V02Co
|
||||
query: {
|
||||
ok: isCommandSuccess(bundle),
|
||||
exitCode: bundle.exitCode,
|
||||
timedOut: bundle.timedOut ?? false,
|
||||
durationMs: bundle.durationMs ?? null,
|
||||
stdoutBytes: bundle.stdoutBytes ?? Buffer.byteLength(bundle.stdout, "utf8"),
|
||||
stderrBytes: bundle.stderrBytes ?? Buffer.byteLength(bundle.stderr, "utf8"),
|
||||
stderr: bundle.stderr.trim().slice(0, 2000),
|
||||
},
|
||||
next: {
|
||||
|
||||
Reference in New Issue
Block a user