Merge pull request #427 from pikasTech/fix/issue-426-g14-status-compact

fix: compact HWLAB runtime pipeline diagnostics
This commit is contained in:
Lyon
2026-06-15 20:39:47 +08:00
committed by GitHub
+178
View File
@@ -215,6 +215,10 @@ interface CommandJsonResult {
stdout: string;
stderr: string;
parsed: unknown | null;
durationMs?: number;
timedOut?: boolean;
stdoutBytes?: number;
stderrBytes?: number;
}
interface RemoteAsyncCommandSpec {
@@ -646,7 +650,9 @@ function shellQuote(value: string): string {
}
function commandJson(command: string[], timeoutMs = 60_000): CommandJsonResult {
const startedAtMs = Date.now();
const result = runCommand(command, repoRoot, { timeoutMs });
const durationMs = Date.now() - startedAtMs;
let parsed: unknown | null = null;
if (result.stdout.trim().length > 0) {
try {
@@ -662,11 +668,17 @@ function commandJson(command: string[], timeoutMs = 60_000): CommandJsonResult {
stdout: result.stdout,
stderr: result.stderr,
parsed,
durationMs,
timedOut: result.timedOut,
stdoutBytes: Buffer.byteLength(result.stdout, "utf8"),
stderrBytes: Buffer.byteLength(result.stderr, "utf8"),
};
}
function commandJsonWithInput(command: string[], input: string, timeoutMs = 60_000): CommandJsonResult {
const startedAtMs = Date.now();
const result = runCommand(command, repoRoot, { timeoutMs, input });
const durationMs = Date.now() - startedAtMs;
let parsed: unknown | null = null;
if (result.stdout.trim().length > 0) {
try {
@@ -682,6 +694,10 @@ function commandJsonWithInput(command: string[], input: string, timeoutMs = 60_0
stdout: result.stdout,
stderr: result.stderr,
parsed,
durationMs,
timedOut: result.timedOut,
stdoutBytes: Buffer.byteLength(result.stdout, "utf8"),
stderrBytes: Buffer.byteLength(result.stderr, "utf8"),
};
}
@@ -1439,6 +1455,161 @@ function pipelineRunCompactFromText(
};
}
function runtimeLaneTaskRunDiagnosticsScript(): string {
return [
"set +e",
"node <<'NODE'",
"const cp = require('node:child_process');",
"const namespace = process.env.CI_NAMESPACE || 'hwlab-ci';",
"const pipelineRun = process.env.PIPELINE_RUN || '';",
"const tailLines = Math.max(20, Math.min(Number(process.env.TASKRUN_LOG_TAIL_LINES || 60), 200));",
"const maxLogChars = Math.max(1000, Math.min(Number(process.env.TASKRUN_LOG_MAX_CHARS || 4000), 20000));",
"function kubectl(args, timeoutMs, maxBuffer) {",
" const out = cp.spawnSync('kubectl', args, { encoding: 'utf8', timeout: timeoutMs, maxBuffer });",
" const err = out.error;",
" return { exitCode: out.status, stdout: out.stdout || '', stderr: out.stderr || (err ? String(err.message || err) : ''), timedOut: Boolean(err && err.code === 'ETIMEDOUT') };",
"}",
"function parseJson(text) { try { return JSON.parse(text); } catch { return null; } }",
"function oneLine(value, limit) { return String(value || '').replace(/[\\r\\n\\t]+/g, ' ').replace(/\\s+/g, ' ').trim().slice(0, limit); }",
"function timestampMs(value) { const parsed = Date.parse(String(value || '')); return Number.isFinite(parsed) ? parsed : null; }",
"function durationSeconds(start, end) { const s = timestampMs(start); const e = timestampMs(end); return s === null || e === null || e < s ? null : Math.round((e - s) / 1000); }",
"function condition(item) { const conditions = Array.isArray(item && item.status && item.status.conditions) ? item.status.conditions : []; return conditions.find((entry) => entry && entry.type === 'Succeeded') || conditions[0] || {}; }",
"function redact(text) {",
" return String(text || '')",
" .replace(/Bearer\\s+[A-Za-z0-9._~+\\/-]+=*/giu, 'Bearer <redacted>')",
" .replace(/((?:api[_-]?key|authorization|token|password|secret|database_url|dsn)\\s*[:=]\\s*)\\S+/giu, '$1<redacted>');",
"}",
"function trimTail(text) {",
" const redacted = redact(text);",
" let tail = redacted.split(/\\r?\\n/).slice(-tailLines).join('\\n').trimEnd();",
" if (tail.length > maxLogChars) {",
" tail = tail.slice(tail.length - maxLogChars);",
" const lineBreak = tail.indexOf('\\n');",
" if (lineBreak >= 0) tail = tail.slice(lineBreak + 1);",
" }",
" return tail;",
"}",
"function stateSummary(state) {",
" if (!state || typeof state !== 'object') return { state: null, reason: null, exitCode: null };",
" const key = Object.keys(state)[0] || null;",
" const value = key ? state[key] || {} : {};",
" return { state: key, reason: value.reason || null, exitCode: typeof value.exitCode === 'number' ? value.exitCode : null };",
"}",
"function podContainerStates(pod) {",
" const status = (pod && pod.status) || {};",
" const containers = [...(status.initContainerStatuses || []), ...(status.containerStatuses || [])];",
" return containers.map((item) => {",
" const summary = stateSummary(item.state);",
" return { name: item.name || null, ready: item.ready === true, restartCount: item.restartCount || 0, state: summary.state, reason: summary.reason, exitCode: summary.exitCode };",
" }).filter((item) => item.state !== 'running' || item.restartCount > 0).slice(0, 12);",
"}",
"function findPodName(taskRunName) {",
" const out = kubectl(['-n', namespace, 'get', 'pod', '-l', 'tekton.dev/taskRun=' + taskRunName, '-o', 'jsonpath={.items[0].metadata.name}'], 10000, 65536);",
" return out.exitCode === 0 && out.stdout.trim() ? out.stdout.trim() : null;",
"}",
"function collectLogTail(taskRun) {",
" const podName = taskRun.podName || findPodName(taskRun.name);",
" if (!podName) return { taskRun: taskRun.name, pipelineTask: taskRun.pipelineTask, podName: null, ok: false, reason: 'pod-not-found', logTail: '' };",
" const podResult = kubectl(['-n', namespace, 'get', 'pod', podName, '-o', 'json'], 10000, 262144);",
" const pod = parseJson(podResult.stdout);",
" const logResult = kubectl(['-n', namespace, 'logs', podName, '--all-containers=true', '--tail=' + String(tailLines), '--prefix=true'], 20000, Math.max(262144, maxLogChars * 8));",
" const raw = logResult.stdout || logResult.stderr;",
" return {",
" taskRun: taskRun.name,",
" pipelineTask: taskRun.pipelineTask,",
" podName,",
" ok: logResult.exitCode === 0,",
" exitCode: logResult.exitCode,",
" timedOut: logResult.timedOut,",
" containers: podContainerStates(pod),",
" logTail: trimTail(raw),",
" };",
"}",
"if (!pipelineRun) {",
" console.log(JSON.stringify({ ok: true, pipelineRun: null, namespace, skipped: true, reason: 'pipeline-run-empty', total: 0, items: [], failedTaskRuns: [], activeTaskRuns: [], logTails: [], valuesPrinted: false }));",
" process.exit(0);",
"}",
"const taskRunResult = kubectl(['-n', namespace, 'get', 'taskrun', '-l', 'tekton.dev/pipelineRun=' + pipelineRun, '-o', 'json'], 25000, 2 * 1024 * 1024);",
"const taskData = parseJson(taskRunResult.stdout);",
"const rawItems = Array.isArray(taskData && taskData.items) ? taskData.items : [];",
"const items = rawItems.map((item) => {",
" const metadata = item.metadata || {};",
" const labels = metadata.labels || {};",
" const status = item.status || {};",
" const c = condition(item);",
" return {",
" name: metadata.name || null,",
" pipelineTask: labels['tekton.dev/pipelineTask'] || null,",
" status: c.status || null,",
" reason: c.reason || null,",
" message: oneLine(c.message, 360) || null,",
" startTime: status.startTime || null,",
" completionTime: status.completionTime || null,",
" durationSeconds: durationSeconds(status.startTime, status.completionTime),",
" podName: status.podName || null,",
" };",
"}).filter((item) => item.name).sort((left, right) => (timestampMs(left.startTime) || 0) - (timestampMs(right.startTime) || 0) || String(left.name).localeCompare(String(right.name)));",
"const failedTaskRuns = items.filter((item) => item.status === 'False' || /fail|error|timeout/i.test(String(item.reason || '')));",
"const activeTaskRuns = items.filter((item) => item.status !== 'True' && item.status !== 'False');",
"const succeededCount = items.filter((item) => item.status === 'True').length;",
"const logTails = failedTaskRuns.slice(0, 3).map(collectLogTail);",
"console.log(JSON.stringify({",
" ok: taskRunResult.exitCode === 0,",
" pipelineRun,",
" namespace,",
" total: items.length,",
" succeededCount,",
" failedCount: failedTaskRuns.length,",
" activeCount: activeTaskRuns.length,",
" failedTaskRuns: failedTaskRuns.slice(0, 8),",
" activeTaskRuns: activeTaskRuns.slice(0, 8),",
" recentTaskRuns: items.slice(-8),",
" logTails,",
" query: { taskRunsExitCode: taskRunResult.exitCode, taskRunsTimedOut: taskRunResult.timedOut, taskRunsStderr: oneLine(taskRunResult.stderr, 500) || null },",
" bounded: { maxFailedTaskRuns: 8, maxLogTaskRuns: 3, tailLines, maxLogChars, redacted: true },",
" valuesPrinted: false,",
"}));",
"NODE",
].join("\n");
}
function runtimeLaneTaskRunDiagnosticsFromText(
pipelineRun: string,
section: ShellSection | undefined,
stderr: string,
): Record<string, unknown> {
const raw = String(section?.stdout ?? "").trim();
if (raw.length === 0) {
return {
ok: shellSectionOk(section),
pipelineRun,
total: 0,
failedTaskRuns: [],
activeTaskRuns: [],
logTails: [],
sectionExitCode: section?.exitCode ?? null,
stderr: stderr.trim().slice(0, 1000),
};
}
try {
const data = record(JSON.parse(raw) as unknown);
return {
...data,
pipelineRun: stringOrNull(data.pipelineRun) ?? pipelineRun,
sectionExitCode: section?.exitCode ?? null,
};
} catch {
return {
ok: false,
pipelineRun,
sectionExitCode: section?.exitCode ?? null,
raw: raw.slice(0, 2000),
stderr: stderr.trim().slice(0, 1000),
degradedReason: "taskrun-diagnostics-json-parse-failed",
};
}
}
function timestampMs(value: unknown): number | null {
if (typeof value !== "string" || value.trim().length === 0) return null;
const parsed = Date.parse(value);
@@ -3917,6 +4088,7 @@ function runtimeLaneControlPlaneStatusBundle(spec: HwlabRuntimeLaneSpec, target:
`section controlPlane sh -lc ${shellQuote(controlPlaneProbe)}`,
`section argo kubectl get application -n ${shellQuote(ARGO_NAMESPACE)} ${shellQuote(spec.app)} -o 'jsonpath={.spec.source.targetRevision}{"\\n"}{.spec.source.path}{"\\n"}{.status.sync.revision}{"\\n"}{.status.sync.status}{"\\n"}{.status.health.status}{"\\n"}'`,
`section pipelineRun kubectl get pipelinerun -n ${shellQuote(CI_NAMESPACE)} "$pipeline_run" -o 'jsonpath={.status.conditions[0].status}{"\\n"}{.status.conditions[0].reason}{"\\n"}{.status.conditions[0].message}{"\\n"}'`,
`section taskRunDiagnostics env PIPELINE_RUN="$pipeline_run" CI_NAMESPACE=${shellQuote(CI_NAMESPACE)} TASKRUN_LOG_TAIL_LINES=60 TASKRUN_LOG_MAX_CHARS=4000 sh -lc ${shellQuote(runtimeLaneTaskRunDiagnosticsScript())}`,
`section runtimeWorkloads kubectl get deploy,statefulset,svc,ingress,configmap -n ${shellQuote(spec.runtimeNamespace)} -l hwlab.pikastech.local/gitops-target=${shellQuote(spec.lane)} -o name`,
`section publicProbes sh -lc ${shellQuote(runtimeLanePublicProbeScript(spec))}`,
[
@@ -3983,6 +4155,7 @@ function runtimeLaneControlPlaneStatus(spec: HwlabRuntimeLaneSpec, target: V02Co
sections.pipelineRun?.exitCode ?? null,
bundle.stderr,
);
const taskRuns = pipelineRun === null ? null : runtimeLaneTaskRunDiagnosticsFromText(pipelineRun, sections.taskRunDiagnostics, bundle.stderr);
const runtimeWorkloadNames = String(sections.runtimeWorkloads?.stdout ?? "").split(/\r?\n/u).map((line) => line.trim()).filter(Boolean);
const publicProbeFields = keyValueLinesFromText(sections.publicProbes?.stdout ?? "");
const publicProbesOk = shellSectionOk(sections.publicProbes)
@@ -4047,6 +4220,7 @@ function runtimeLaneControlPlaneStatus(spec: HwlabRuntimeLaneSpec, target: V02Co
exitCode: sections.argo?.exitCode ?? null,
},
pipelineRun: pipelineRunInfo,
taskRuns,
runtimeWorkloads: {
ok: shellSectionOk(sections.runtimeWorkloads),
namespace: spec.runtimeNamespace,
@@ -4071,6 +4245,10 @@ function runtimeLaneControlPlaneStatus(spec: HwlabRuntimeLaneSpec, target: V02Co
query: {
ok: isCommandSuccess(bundle),
exitCode: bundle.exitCode,
timedOut: bundle.timedOut ?? false,
durationMs: bundle.durationMs ?? null,
stdoutBytes: bundle.stdoutBytes ?? Buffer.byteLength(bundle.stdout, "utf8"),
stderrBytes: bundle.stderrBytes ?? Buffer.byteLength(bundle.stderr, "utf8"),
stderr: bundle.stderr.trim().slice(0, 2000),
},
next: {