diff --git a/scripts/src/hwlab-g14.ts b/scripts/src/hwlab-g14.ts index 731f3fb2..cab353a7 100644 --- a/scripts/src/hwlab-g14.ts +++ b/scripts/src/hwlab-g14.ts @@ -215,6 +215,10 @@ interface CommandJsonResult { stdout: string; stderr: string; parsed: unknown | null; + durationMs?: number; + timedOut?: boolean; + stdoutBytes?: number; + stderrBytes?: number; } interface RemoteAsyncCommandSpec { @@ -646,7 +650,9 @@ function shellQuote(value: string): string { } function commandJson(command: string[], timeoutMs = 60_000): CommandJsonResult { + const startedAtMs = Date.now(); const result = runCommand(command, repoRoot, { timeoutMs }); + const durationMs = Date.now() - startedAtMs; let parsed: unknown | null = null; if (result.stdout.trim().length > 0) { try { @@ -662,11 +668,17 @@ function commandJson(command: string[], timeoutMs = 60_000): CommandJsonResult { stdout: result.stdout, stderr: result.stderr, parsed, + durationMs, + timedOut: result.timedOut, + stdoutBytes: Buffer.byteLength(result.stdout, "utf8"), + stderrBytes: Buffer.byteLength(result.stderr, "utf8"), }; } function commandJsonWithInput(command: string[], input: string, timeoutMs = 60_000): CommandJsonResult { + const startedAtMs = Date.now(); const result = runCommand(command, repoRoot, { timeoutMs, input }); + const durationMs = Date.now() - startedAtMs; let parsed: unknown | null = null; if (result.stdout.trim().length > 0) { try { @@ -682,6 +694,10 @@ function commandJsonWithInput(command: string[], input: string, timeoutMs = 60_0 stdout: result.stdout, stderr: result.stderr, parsed, + durationMs, + timedOut: result.timedOut, + stdoutBytes: Buffer.byteLength(result.stdout, "utf8"), + stderrBytes: Buffer.byteLength(result.stderr, "utf8"), }; } @@ -1439,6 +1455,161 @@ function pipelineRunCompactFromText( }; } +function runtimeLaneTaskRunDiagnosticsScript(): string { + return [ + "set +e", + "node <<'NODE'", + "const cp = require('node:child_process');", + "const namespace = process.env.CI_NAMESPACE || 'hwlab-ci';", + "const pipelineRun = process.env.PIPELINE_RUN || '';", + "const tailLines = Math.max(20, Math.min(Number(process.env.TASKRUN_LOG_TAIL_LINES || 60), 200));", + "const maxLogChars = Math.max(1000, Math.min(Number(process.env.TASKRUN_LOG_MAX_CHARS || 4000), 20000));", + "function kubectl(args, timeoutMs, maxBuffer) {", + " const out = cp.spawnSync('kubectl', args, { encoding: 'utf8', timeout: timeoutMs, maxBuffer });", + " const err = out.error;", + " return { exitCode: out.status, stdout: out.stdout || '', stderr: out.stderr || (err ? String(err.message || err) : ''), timedOut: Boolean(err && err.code === 'ETIMEDOUT') };", + "}", + "function parseJson(text) { try { return JSON.parse(text); } catch { return null; } }", + "function oneLine(value, limit) { return String(value || '').replace(/[\\r\\n\\t]+/g, ' ').replace(/\\s+/g, ' ').trim().slice(0, limit); }", + "function timestampMs(value) { const parsed = Date.parse(String(value || '')); return Number.isFinite(parsed) ? parsed : null; }", + "function durationSeconds(start, end) { const s = timestampMs(start); const e = timestampMs(end); return s === null || e === null || e < s ? null : Math.round((e - s) / 1000); }", + "function condition(item) { const conditions = Array.isArray(item && item.status && item.status.conditions) ? item.status.conditions : []; return conditions.find((entry) => entry && entry.type === 'Succeeded') || conditions[0] || {}; }", + "function redact(text) {", + " return String(text || '')", + " .replace(/Bearer\\s+[A-Za-z0-9._~+\\/-]+=*/giu, 'Bearer ')", + " .replace(/((?:api[_-]?key|authorization|token|password|secret|database_url|dsn)\\s*[:=]\\s*)\\S+/giu, '$1');", + "}", + "function trimTail(text) {", + " const redacted = redact(text);", + " let tail = redacted.split(/\\r?\\n/).slice(-tailLines).join('\\n').trimEnd();", + " if (tail.length > maxLogChars) {", + " tail = tail.slice(tail.length - maxLogChars);", + " const lineBreak = tail.indexOf('\\n');", + " if (lineBreak >= 0) tail = tail.slice(lineBreak + 1);", + " }", + " return tail;", + "}", + "function stateSummary(state) {", + " if (!state || typeof state !== 'object') return { state: null, reason: null, exitCode: null };", + " const key = Object.keys(state)[0] || null;", + " const value = key ? state[key] || {} : {};", + " return { state: key, reason: value.reason || null, exitCode: typeof value.exitCode === 'number' ? value.exitCode : null };", + "}", + "function podContainerStates(pod) {", + " const status = (pod && pod.status) || {};", + " const containers = [...(status.initContainerStatuses || []), ...(status.containerStatuses || [])];", + " return containers.map((item) => {", + " const summary = stateSummary(item.state);", + " return { name: item.name || null, ready: item.ready === true, restartCount: item.restartCount || 0, state: summary.state, reason: summary.reason, exitCode: summary.exitCode };", + " }).filter((item) => item.state !== 'running' || item.restartCount > 0).slice(0, 12);", + "}", + "function findPodName(taskRunName) {", + " const out = kubectl(['-n', namespace, 'get', 'pod', '-l', 'tekton.dev/taskRun=' + taskRunName, '-o', 'jsonpath={.items[0].metadata.name}'], 10000, 65536);", + " return out.exitCode === 0 && out.stdout.trim() ? out.stdout.trim() : null;", + "}", + "function collectLogTail(taskRun) {", + " const podName = taskRun.podName || findPodName(taskRun.name);", + " if (!podName) return { taskRun: taskRun.name, pipelineTask: taskRun.pipelineTask, podName: null, ok: false, reason: 'pod-not-found', logTail: '' };", + " const podResult = kubectl(['-n', namespace, 'get', 'pod', podName, '-o', 'json'], 10000, 262144);", + " const pod = parseJson(podResult.stdout);", + " const logResult = kubectl(['-n', namespace, 'logs', podName, '--all-containers=true', '--tail=' + String(tailLines), '--prefix=true'], 20000, Math.max(262144, maxLogChars * 8));", + " const raw = logResult.stdout || logResult.stderr;", + " return {", + " taskRun: taskRun.name,", + " pipelineTask: taskRun.pipelineTask,", + " podName,", + " ok: logResult.exitCode === 0,", + " exitCode: logResult.exitCode,", + " timedOut: logResult.timedOut,", + " containers: podContainerStates(pod),", + " logTail: trimTail(raw),", + " };", + "}", + "if (!pipelineRun) {", + " console.log(JSON.stringify({ ok: true, pipelineRun: null, namespace, skipped: true, reason: 'pipeline-run-empty', total: 0, items: [], failedTaskRuns: [], activeTaskRuns: [], logTails: [], valuesPrinted: false }));", + " process.exit(0);", + "}", + "const taskRunResult = kubectl(['-n', namespace, 'get', 'taskrun', '-l', 'tekton.dev/pipelineRun=' + pipelineRun, '-o', 'json'], 25000, 2 * 1024 * 1024);", + "const taskData = parseJson(taskRunResult.stdout);", + "const rawItems = Array.isArray(taskData && taskData.items) ? taskData.items : [];", + "const items = rawItems.map((item) => {", + " const metadata = item.metadata || {};", + " const labels = metadata.labels || {};", + " const status = item.status || {};", + " const c = condition(item);", + " return {", + " name: metadata.name || null,", + " pipelineTask: labels['tekton.dev/pipelineTask'] || null,", + " status: c.status || null,", + " reason: c.reason || null,", + " message: oneLine(c.message, 360) || null,", + " startTime: status.startTime || null,", + " completionTime: status.completionTime || null,", + " durationSeconds: durationSeconds(status.startTime, status.completionTime),", + " podName: status.podName || null,", + " };", + "}).filter((item) => item.name).sort((left, right) => (timestampMs(left.startTime) || 0) - (timestampMs(right.startTime) || 0) || String(left.name).localeCompare(String(right.name)));", + "const failedTaskRuns = items.filter((item) => item.status === 'False' || /fail|error|timeout/i.test(String(item.reason || '')));", + "const activeTaskRuns = items.filter((item) => item.status !== 'True' && item.status !== 'False');", + "const succeededCount = items.filter((item) => item.status === 'True').length;", + "const logTails = failedTaskRuns.slice(0, 3).map(collectLogTail);", + "console.log(JSON.stringify({", + " ok: taskRunResult.exitCode === 0,", + " pipelineRun,", + " namespace,", + " total: items.length,", + " succeededCount,", + " failedCount: failedTaskRuns.length,", + " activeCount: activeTaskRuns.length,", + " failedTaskRuns: failedTaskRuns.slice(0, 8),", + " activeTaskRuns: activeTaskRuns.slice(0, 8),", + " recentTaskRuns: items.slice(-8),", + " logTails,", + " query: { taskRunsExitCode: taskRunResult.exitCode, taskRunsTimedOut: taskRunResult.timedOut, taskRunsStderr: oneLine(taskRunResult.stderr, 500) || null },", + " bounded: { maxFailedTaskRuns: 8, maxLogTaskRuns: 3, tailLines, maxLogChars, redacted: true },", + " valuesPrinted: false,", + "}));", + "NODE", + ].join("\n"); +} + +function runtimeLaneTaskRunDiagnosticsFromText( + pipelineRun: string, + section: ShellSection | undefined, + stderr: string, +): Record { + const raw = String(section?.stdout ?? "").trim(); + if (raw.length === 0) { + return { + ok: shellSectionOk(section), + pipelineRun, + total: 0, + failedTaskRuns: [], + activeTaskRuns: [], + logTails: [], + sectionExitCode: section?.exitCode ?? null, + stderr: stderr.trim().slice(0, 1000), + }; + } + try { + const data = record(JSON.parse(raw) as unknown); + return { + ...data, + pipelineRun: stringOrNull(data.pipelineRun) ?? pipelineRun, + sectionExitCode: section?.exitCode ?? null, + }; + } catch { + return { + ok: false, + pipelineRun, + sectionExitCode: section?.exitCode ?? null, + raw: raw.slice(0, 2000), + stderr: stderr.trim().slice(0, 1000), + degradedReason: "taskrun-diagnostics-json-parse-failed", + }; + } +} + function timestampMs(value: unknown): number | null { if (typeof value !== "string" || value.trim().length === 0) return null; const parsed = Date.parse(value); @@ -3917,6 +4088,7 @@ function runtimeLaneControlPlaneStatusBundle(spec: HwlabRuntimeLaneSpec, target: `section controlPlane sh -lc ${shellQuote(controlPlaneProbe)}`, `section argo kubectl get application -n ${shellQuote(ARGO_NAMESPACE)} ${shellQuote(spec.app)} -o 'jsonpath={.spec.source.targetRevision}{"\\n"}{.spec.source.path}{"\\n"}{.status.sync.revision}{"\\n"}{.status.sync.status}{"\\n"}{.status.health.status}{"\\n"}'`, `section pipelineRun kubectl get pipelinerun -n ${shellQuote(CI_NAMESPACE)} "$pipeline_run" -o 'jsonpath={.status.conditions[0].status}{"\\n"}{.status.conditions[0].reason}{"\\n"}{.status.conditions[0].message}{"\\n"}'`, + `section taskRunDiagnostics env PIPELINE_RUN="$pipeline_run" CI_NAMESPACE=${shellQuote(CI_NAMESPACE)} TASKRUN_LOG_TAIL_LINES=60 TASKRUN_LOG_MAX_CHARS=4000 sh -lc ${shellQuote(runtimeLaneTaskRunDiagnosticsScript())}`, `section runtimeWorkloads kubectl get deploy,statefulset,svc,ingress,configmap -n ${shellQuote(spec.runtimeNamespace)} -l hwlab.pikastech.local/gitops-target=${shellQuote(spec.lane)} -o name`, `section publicProbes sh -lc ${shellQuote(runtimeLanePublicProbeScript(spec))}`, [ @@ -3983,6 +4155,7 @@ function runtimeLaneControlPlaneStatus(spec: HwlabRuntimeLaneSpec, target: V02Co sections.pipelineRun?.exitCode ?? null, bundle.stderr, ); + const taskRuns = pipelineRun === null ? null : runtimeLaneTaskRunDiagnosticsFromText(pipelineRun, sections.taskRunDiagnostics, bundle.stderr); const runtimeWorkloadNames = String(sections.runtimeWorkloads?.stdout ?? "").split(/\r?\n/u).map((line) => line.trim()).filter(Boolean); const publicProbeFields = keyValueLinesFromText(sections.publicProbes?.stdout ?? ""); const publicProbesOk = shellSectionOk(sections.publicProbes) @@ -4047,6 +4220,7 @@ function runtimeLaneControlPlaneStatus(spec: HwlabRuntimeLaneSpec, target: V02Co exitCode: sections.argo?.exitCode ?? null, }, pipelineRun: pipelineRunInfo, + taskRuns, runtimeWorkloads: { ok: shellSectionOk(sections.runtimeWorkloads), namespace: spec.runtimeNamespace, @@ -4071,6 +4245,10 @@ function runtimeLaneControlPlaneStatus(spec: HwlabRuntimeLaneSpec, target: V02Co query: { ok: isCommandSuccess(bundle), exitCode: bundle.exitCode, + timedOut: bundle.timedOut ?? false, + durationMs: bundle.durationMs ?? null, + stdoutBytes: bundle.stdoutBytes ?? Buffer.byteLength(bundle.stdout, "utf8"), + stderrBytes: bundle.stderrBytes ?? Buffer.byteLength(bundle.stderr, "utf8"), stderr: bundle.stderr.trim().slice(0, 2000), }, next: {