fix(sentinel): harden post-deploy report and ssh summaries

This commit is contained in:
Codex
2026-07-01 08:16:53 +00:00
parent 6906037ba4
commit 6b73dcd0c1
6 changed files with 273 additions and 14 deletions
@@ -325,10 +325,14 @@ export function runSentinelQuickVerify(state: SentinelCicdState, reason: string,
cleanupFindings,
);
const blockingFindings = findings.filter(isQuickVerifyBlockingFinding);
const analysisWarnings = analysis.ok ? [] : ["quick verify analyze command returned non-zero but a readable analysis artifact was produced; targetValidation is using artifact severity plus control blockers."];
const ok = record(artifactSummary).ok === true && controlFindings.length === 0 && blockingFindings.length === 0;
const analysisReadable = stringAtNullable(artifactSummary, "reportJsonSha256") !== null;
const analysisWarnings = mergeWarnings(
analysis.ok ? [] : ["quick verify analyze command returned non-zero but a readable analysis artifact was produced; targetValidation is using artifact severity plus control blockers."],
analysisReadable && artifactSummaryRecord.ok !== true ? ["quick verify analysis report contains non-blocking findings; targetValidation keeps them in report but does not fail the CronJob without red/blocking findings."] : [],
);
const ok = analysisReadable && controlFindings.length === 0 && blockingFindings.length === 0;
const businessStatus = quickVerifyBusinessStatus(null, promptIndex, turnSummary, traceFrame, elapsedMs(), maxSeconds);
printQuickVerifyProgress(state, runId, "record-report", ok ? "succeeded" : "blocked", { observerId, reportJsonSha256: stringAtNullable(artifactSummary, "reportJsonSha256"), findingCount: findings.length, blockingFindingCount: blockingFindings.length, controlFindingCount: controlFindings.length, elapsedMs: elapsedMs() });
printQuickVerifyProgress(state, runId, "record-report", ok ? "succeeded" : "blocked", { observerId, reportJsonSha256: stringAtNullable(artifactSummary, "reportJsonSha256"), analysisReadable, findingCount: findings.length, blockingFindingCount: blockingFindings.length, controlFindingCount: controlFindings.length, elapsedMs: elapsedMs() });
return recordQuickVerify(state, {
ok,
runId,
@@ -342,7 +346,7 @@ export function runSentinelQuickVerify(state: SentinelCicdState, reason: string,
reportJsonSha256: stringAtNullable(artifactSummary, "reportJsonSha256"),
findingCount: findings.length,
artifactCount: numberAtNullable(artifactSummary, "artifactCount") ?? 0,
failure: controlFindings.length > 0 ? "quick-verify-no-business-turn" : blockingFindings.length > 0 ? "quick-verify-blocking-findings" : null,
failure: !analysisReadable ? "quick-verify-analysis-missing" : controlFindings.length > 0 ? "quick-verify-no-business-turn" : blockingFindings.length > 0 ? "quick-verify-blocking-findings" : null,
promptSource: prompts.summary,
accountEnv: accountEnv.summary,
steps: [...steps, cleanupStep],
@@ -611,7 +615,9 @@ function callSentinelService(state: SentinelCicdState, method: "GET" | "POST", p
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
result = runCommand(["trans", stringAt(state.controlPlaneNode, "kubeRoute"), "sh", "--", script], repoRoot, { timeoutMs: attemptTimeoutSeconds * 1000 });
parsed = parseJsonObject(result.stdout);
attempts.push({ attempt, ...compactCommand(result), parsedOk: parsed !== null, valuesRedacted: true });
const recovered = parsed === null ? recoverTruncatedSshStdoutJson(result) : null;
if (recovered !== null) parsed = recovered.parsed;
attempts.push({ attempt, ...compactCommand(result), parsedOk: parsed !== null, parsedFromDump: recovered !== null, dumpPath: recovered?.dumpPath ?? null, valuesRedacted: true });
if (result.exitCode === 0) break;
}
return {
@@ -631,6 +637,29 @@ function callSentinelService(state: SentinelCicdState, method: "GET" | "POST", p
};
}
function recoverTruncatedSshStdoutJson(result: CommandResult): { parsed: Record<string, unknown>; dumpPath: string } | null {
const dumpPath = sshStdoutDumpPathFromStderr(result.stderr);
if (dumpPath === null || !existsSync(dumpPath)) return null;
const parsed = parseJsonObject(readFileSync(dumpPath, "utf8"));
return parsed === null ? null : { parsed, dumpPath };
}
function sshStdoutDumpPathFromStderr(value: string): string | null {
for (const rawLine of value.split(/\r?\n/u)) {
const line = rawLine.trim();
const prefix = "UNIDESK_SSH_STDOUT_TRUNCATED ";
if (!line.startsWith(prefix)) continue;
try {
const payload = JSON.parse(line.slice(prefix.length)) as unknown;
const dumpPath = stringAtNullable(record(payload), "dumpPath");
if (dumpPath !== null) return dumpPath;
} catch {
return null;
}
}
return null;
}
function compactSentinelServiceBodyJson(value: Record<string, unknown> | null): unknown {
if (value === null || typeof value.renderedText !== "string") return value;
return {
+68 -2
View File
@@ -1,6 +1,7 @@
// SPEC: PJ2026-01060508 Web哨兵 draft-2026-06-27-p11-monitor-web-observability-dashboard.
// SPEC: PJ2026-01060508 Web哨兵 draft-2026-07-01-p15-cadence-otel.
// Responsibility: P5 web-probe sentinel service validation, maintenance, report and dashboard commands.
import { existsSync, readFileSync } from "node:fs";
import type { CommandResult } from "./command";
import { runCommand } from "./command";
import { repoRoot } from "./config";
@@ -14,6 +15,7 @@ import {
compactSentinelServiceBodyJson,
mergeWarnings,
numberAt,
numberAtNullable,
parseJsonObject,
pickFields,
record,
@@ -229,13 +231,18 @@ export function runSentinelReport(state: SentinelCicdState, options: Extract<Web
if (options.sampleSeq !== null) query.set("sampleSeq", String(options.sampleSeq));
const report = callSentinelService(state, "GET", `/api/report?${query.toString()}`, null, options.timeoutSeconds);
const body = record(report.bodyJson);
const renderedText = typeof body.renderedText === "string" ? body.renderedText : renderReportResult({ command, node: state.spec.nodeId, lane: state.spec.lane, report, valuesRedacted: true });
const rawPayload = Object.keys(body).length > 0 ? body : report;
if (options.full) return rendered(report.ok && body.ok !== false, command, JSON.stringify(rawPayload, null, 2));
if (options.raw) {
const artifactSummary = readSentinelReportArtifactSummary(state, body, Math.min(options.timeoutSeconds, 55));
return rendered(report.ok && body.ok !== false, command, JSON.stringify(compactSentinelReportRawPayload(state, body, report, artifactSummary), null, 2));
}
if (options.view === "summary") {
const artifactSummary = readSentinelReportArtifactSummary(state, body, Math.min(options.timeoutSeconds, 55));
const payload = compactSentinelReportRawPayload(state, body, report, artifactSummary);
return rendered(report.ok && body.ok !== false, command, renderSentinelReportSummary(payload, state));
}
const renderedText = typeof body.renderedText === "string" ? body.renderedText : renderReportResult({ command, node: state.spec.nodeId, lane: state.spec.lane, report, valuesRedacted: true });
return rendered(report.ok && body.ok !== false, command, renderedText);
}
@@ -374,6 +381,40 @@ function isSafeSentinelReportStateDir(value: string): boolean {
return value.startsWith(".state/web-observe/") && !value.includes("\0") && !value.includes("..") && !value.startsWith("/");
}
function renderSentinelReportSummary(payload: Record<string, unknown>, state: SentinelCicdState): string {
const run = record(payload.run);
const summary = record(payload.summary);
const artifactSummary = record(payload.artifactSummary);
const findings = Array.isArray(payload.findings) ? payload.findings.map(record) : [];
const reportSha = stringAtNullable(run, "reportJsonSha256") ?? stringAtNullable(artifactSummary, "reportJsonSha256");
const findingCount = run.findingCount ?? findings.length;
const analyzerArtifactCount = numberAtNullable(artifactSummary, "artifactCount") ?? numberAtNullable(record(artifactSummary.counts), "artifacts");
const runArtifactCount = numberAtNullable(run, "artifactCount");
const artifactCount = runArtifactCount === null || runArtifactCount === 0 && analyzerArtifactCount !== null
? analyzerArtifactCount ?? "-"
: runArtifactCount;
const publicOrigin = stringAtNullable(state.publicExposure, "publicBaseUrl") ?? "-";
const findingRows = findings.length === 0
? "-"
: table(["ID", "SEVERITY", "COUNT", "SUMMARY"], findings.slice(0, 12).map((item) => [
stringAtNullable(item, "id") ?? "-",
stringAtNullable(item, "severity") ?? "-",
item.count ?? "-",
reportText(item.summary, 120) ?? "-",
]));
return [
"Web Probe Sentinel Quick Verify",
"=======================================================",
`run=${run.id ?? "-"} scenario=${run.scenarioId ?? "-"} observer=${run.observerId ?? "-"}`,
`status=${run.status ?? summary.status ?? "-"} reason=${summary.reason ?? "-"} failure=${summary.failure ?? "-"}`,
`report=${reportSha ?? "-"} artifacts=${artifactCount} findings=${findingCount}`,
`publicOrigin=${publicOrigin}`,
"",
"Findings",
findingRows,
].join("\n");
}
function compactRootCauseSignals(value: unknown): Record<string, unknown> | null {
const item = record(value);
const keys = [
@@ -960,7 +1001,9 @@ function callSentinelService(state: SentinelCicdState, method: "GET" | "POST", p
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
result = runCommand(["trans", stringAt(state.controlPlaneNode, "kubeRoute"), "sh", "--", script], repoRoot, { timeoutMs: attemptTimeoutSeconds * 1000 });
parsed = parseJsonObject(result.stdout);
attempts.push({ attempt, ...compactCommand(result), parsedOk: parsed !== null, valuesRedacted: true });
const recovered = parsed === null ? recoverTruncatedSshStdoutJson(result) : null;
if (recovered !== null) parsed = recovered.parsed;
attempts.push({ attempt, ...compactCommand(result), parsedOk: parsed !== null, parsedFromDump: recovered !== null, dumpPath: recovered?.dumpPath ?? null, valuesRedacted: true });
if (result.exitCode === 0) break;
}
const compactBodyJson = compactSentinelServiceBodyJson(parsed);
@@ -981,6 +1024,29 @@ function callSentinelService(state: SentinelCicdState, method: "GET" | "POST", p
};
}
function recoverTruncatedSshStdoutJson(result: CommandResult): { parsed: Record<string, unknown>; dumpPath: string } | null {
const dumpPath = sshStdoutDumpPathFromStderr(result.stderr);
if (dumpPath === null || !existsSync(dumpPath)) return null;
const parsed = parseJsonObject(readFileSync(dumpPath, "utf8"));
return parsed === null ? null : { parsed, dumpPath };
}
function sshStdoutDumpPathFromStderr(value: string): string | null {
for (const rawLine of value.split(/\r?\n/u)) {
const line = rawLine.trim();
const prefix = "UNIDESK_SSH_STDOUT_TRUNCATED ";
if (!line.startsWith(prefix)) continue;
try {
const payload = JSON.parse(line.slice(prefix.length)) as unknown;
const dumpPath = stringAtNullable(record(payload), "dumpPath");
if (dumpPath !== null) return dumpPath;
} catch {
return null;
}
}
return null;
}
function callSentinelServiceDirect(method: "GET" | "POST", pathWithQuery: string, body: Record<string, unknown> | null, timeoutSeconds: number, url: string): Record<string, unknown> {
const bodyB64 = Buffer.from(body === null ? "" : JSON.stringify(body), "utf8").toString("base64");
const fetchScript = [
+16
View File
@@ -9,7 +9,9 @@ import {
createSshStderrForwarder,
createSshStdoutForwarder,
formatSshStdoutTruncationHint,
formatSshTruncationCompletionSummary,
parseSshInvocation,
sshTruncationCompletionSummary,
sshCaptureBackendPlan,
sshStderrStreamMaxBytes,
sshStdoutStreamMaxBytes,
@@ -214,6 +216,20 @@ describe("ssh stdout bounded streaming", () => {
expect(hint).toContain("\"forwardedBytes\":5");
const payload = JSON.parse(hint!.slice("UNIDESK_SSH_STDOUT_TRUNCATED ".length)) as { dumpPath: string };
expect(readFileSync(payload.dumpPath, "utf8")).toBe("abcdefghijkl");
const summary = sshTruncationCompletionSummary({
invocation,
transport: "frontend-websocket",
exitCode: 0,
timedOut: false,
startedAtMs: Date.now() - 1234,
stdout: forwarder.summary(),
stderr: null,
});
const formattedSummary = formatSshTruncationCompletionSummary(summary);
expect(formattedSummary).toContain("UNIDESK_SSH_TRUNCATION_SUMMARY");
expect(formattedSummary).toContain("\"exitCode\":0");
expect(formattedSummary).toContain("\"commandOmitted\":true");
expect(formattedSummary).toContain("\"dumpPath\"");
rmSync(payload.dumpPath, { force: true });
});
+98 -7
View File
@@ -175,6 +175,33 @@ export interface SshStdoutTruncationHint {
note: string;
}
export interface SshStreamTruncationSummary {
stream: "stdout" | "stderr";
thresholdBytes: number;
observedBytesAtTruncation: number;
forwardedBytes: number;
dumpPath: string | null;
dumpError: string | null;
}
export interface SshTruncationCompletionSummary {
code: "ssh-truncation-summary";
level: "info";
providerId: string;
route: string;
transport: "backend-core-broker" | "frontend-websocket";
invocationKind: SshInvocationKind;
exitCode: number;
timedOut: boolean;
elapsedMs: number;
elapsedSeconds: number;
commandOmitted: true;
stdout: SshStreamTruncationSummary | null;
stderr: SshStreamTruncationSummary | null;
message: string;
action: string;
}
export type SshTcpPoolFailureKind =
| "provider-data-channel-closed"
| "provider-data-channel-missing"
@@ -2718,6 +2745,40 @@ export function formatSshStdoutTruncationHint(hint: SshStdoutTruncationHint): st
return `${marker} ${JSON.stringify(hint)}\n`;
}
export function sshTruncationCompletionSummary(options: {
invocation: ParsedSshInvocation;
transport: SshTruncationCompletionSummary["transport"];
exitCode: number;
timedOut: boolean;
startedAtMs: number;
stdout: SshStreamTruncationSummary | null;
stderr: SshStreamTruncationSummary | null;
}): SshTruncationCompletionSummary | null {
if (options.stdout === null && options.stderr === null) return null;
const elapsedMs = Math.max(0, Date.now() - options.startedAtMs);
return {
code: "ssh-truncation-summary",
level: "info",
providerId: safeProviderId(options.invocation.providerId),
route: options.invocation.route.raw,
transport: options.transport,
invocationKind: options.invocation.parsed.invocationKind,
exitCode: options.exitCode,
timedOut: options.timedOut,
elapsedMs,
elapsedSeconds: Math.round(elapsedMs / 100) / 10,
commandOmitted: true,
stdout: options.stdout,
stderr: options.stderr,
message: "SSH output was truncated, but the command has finished; use this completion summary for closeout status and inspect dumpPath only when full output is needed.",
action: "Use exitCode/timedOut plus dumpPath from this summary instead of manually inferring success from a long truncated stream.",
};
}
export function formatSshTruncationCompletionSummary(summary: SshTruncationCompletionSummary | null): string {
return summary === null ? "" : `UNIDESK_SSH_TRUNCATION_SUMMARY ${JSON.stringify(summary)}\n`;
}
export function classifySshTcpPoolFailure(text: string): SshTcpPoolFailureKind | null {
const normalized = text.toLowerCase();
if (normalized.includes("ssh tcp data channel closed")) return "provider-data-channel-closed";
@@ -2783,12 +2844,17 @@ function sshStreamDumpPath(invocation: ParsedSshInvocation, stream: SshStdoutTru
return join(policy.dumpDir, `${timestamp}-${process.pid}-${suffix}-${slug}.${stream}.bin`);
}
export interface SshStreamForwarder {
write: (chunk: Buffer) => string | null;
summary: () => SshStreamTruncationSummary | null;
}
export function createSshStdoutForwarder(options: {
invocation: ParsedSshInvocation;
transport: SshStdoutTruncationHint["transport"];
maxBytes?: number;
stdout?: NodeJS.WritableStream;
}): { write: (chunk: Buffer) => string | null } {
}): SshStreamForwarder {
return createSshStreamForwarder({
invocation: options.invocation,
transport: options.transport,
@@ -2803,7 +2869,7 @@ export function createSshStderrForwarder(options: {
transport: SshStdoutTruncationHint["transport"];
maxBytes?: number;
stderr?: NodeJS.WritableStream;
}): { write: (chunk: Buffer) => string | null } {
}): SshStreamForwarder {
return createSshStreamForwarder({
invocation: options.invocation,
transport: options.transport,
@@ -2819,12 +2885,13 @@ function createSshStreamForwarder(options: {
stream: SshStdoutTruncationHint["stream"];
maxBytes: number;
target: NodeJS.WritableStream;
}): { write: (chunk: Buffer) => string | null } {
}): SshStreamForwarder {
let observedBytes = 0;
let forwardedBytes = 0;
let truncated = false;
let dumpPath: string | null = null;
let dumpError: string | null = null;
let lastHint: SshStdoutTruncationHint | null = null;
const bufferedChunks: Buffer[] = [];
const appendDump = (chunk: Buffer): void => {
@@ -2862,7 +2929,7 @@ function createSshStreamForwarder(options: {
forwardedBytes += remaining;
}
appendDump(chunk);
return formatSshStdoutTruncationHint(sshStdoutTruncationHint({
lastHint = sshStdoutTruncationHint({
invocation: options.invocation,
transport: options.transport,
stream: options.stream,
@@ -2871,12 +2938,24 @@ function createSshStreamForwarder(options: {
forwardedBytes,
dumpPath,
dumpError,
}));
});
return formatSshStdoutTruncationHint(lastHint);
}
appendDump(chunk);
return null;
},
summary(): SshStreamTruncationSummary | null {
if (lastHint === null) return null;
return {
stream: lastHint.stream,
thresholdBytes: lastHint.thresholdBytes,
observedBytesAtTruncation: lastHint.observedBytesAtTruncation,
forwardedBytes: lastHint.forwardedBytes,
dumpPath,
dumpError,
};
},
};
}
@@ -3827,14 +3906,16 @@ export async function runSsh(config: UniDeskConfig, providerId: string, args: st
const text = Buffer.isBuffer(chunk) ? chunk.toString("utf8") : chunk;
stderrTail = (stderrTail + text).slice(-16_384);
};
let stdoutForwarder: SshStreamForwarder | null = null;
let stderrForwarder: SshStreamForwarder | null = null;
if (parsed.remoteCommand === null) {
child.stdout.pipe(process.stdout);
} else {
const stdoutForwarder = createSshStdoutForwarder({
stdoutForwarder = createSshStdoutForwarder({
invocation,
transport: "backend-core-broker",
});
const stderrForwarder = createSshStderrForwarder({
stderrForwarder = createSshStderrForwarder({
invocation,
transport: "backend-core-broker",
});
@@ -3923,6 +4004,16 @@ export async function runSsh(config: UniDeskConfig, providerId: string, args: st
startedAtMs,
}));
if (timingHint) process.stderr.write(timingHint);
const truncationSummary = formatSshTruncationCompletionSummary(sshTruncationCompletionSummary({
invocation,
transport: "backend-core-broker",
exitCode,
timedOut,
startedAtMs,
stdout: stdoutForwarder?.summary() ?? null,
stderr: stderrForwarder?.summary() ?? null,
}));
if (truncationSummary) process.stderr.write(truncationSummary);
resolve(exitCode);
};
child.on("error", (error) => {
+56
View File
@@ -130,6 +130,10 @@ export function runWebProbeRemoteArtifactJob(options: WebProbeRemoteArtifactJobO
}
}
const stop = pollFailure !== null && manifest === null
? stopRemoteJob(options.route, remoteDir)
: { payload: { attempted: false, reason: "not-needed", remoteDir, valuesRedacted: true }, result: null };
if (stop.result !== null) commandResults.push(stop.result);
const cleanup = cleanupRemoteDir(options.route, remoteDir, keepRemote);
if (cleanup.result !== null) commandResults.push(cleanup.result);
const manifestExitCode = manifest?.exitCode ?? null;
@@ -164,6 +168,7 @@ export function runWebProbeRemoteArtifactJob(options: WebProbeRemoteArtifactJobO
submitStdoutBytes: Buffer.byteLength(submit.stdout, "utf8"),
submitStderrBytes: Buffer.byteLength(submit.stderr, "utf8"),
},
stop: stop.payload,
valuesRedacted: true,
};
return {
@@ -177,6 +182,25 @@ export function runWebProbeRemoteArtifactJob(options: WebProbeRemoteArtifactJobO
};
}
function stopRemoteJob(route: string, remoteDir: string): { payload: Record<string, unknown>; result: CommandResult | null } {
const result = runCommand([transPath(), route, "sh", "--", remoteArtifactStopScript(remoteDir)], repoRoot, { timeoutMs: 30_000 });
const parsed = parseTabStatus(result.stdout);
return {
payload: {
attempted: true,
ok: result.exitCode === 0,
remoteDir,
status: parsed.status ?? null,
pid: parsed.pid ?? null,
exitCode: result.exitCode,
stdoutTail: result.stdout.slice(-1000),
stderrTail: result.stderr.slice(-1000),
valuesRedacted: true,
},
result,
};
}
function pollRemoteManifest(
route: string,
remoteDir: string,
@@ -300,6 +324,27 @@ function remoteArtifactStatusScript(remoteDir: string): string {
].join("\n");
}
function remoteArtifactStopScript(remoteDir: string): string {
return [
"set +e",
`remote_dir=${shellQuote(remoteDir)}`,
'pid_file="$remote_dir/pid"',
'if [ ! -f "$pid_file" ]; then printf "status\\tno-pid\\nremote_dir\\t%s\\n" "$remote_dir"; exit 0; fi',
'pid="$(cat "$pid_file" 2>/dev/null | tr -cd "0-9")"',
'if [ -z "$pid" ]; then printf "status\\tinvalid-pid\\nremote_dir\\t%s\\n" "$remote_dir"; exit 0; fi',
'children_of() { command -v pgrep >/dev/null 2>&1 && pgrep -P "$1" 2>/dev/null || true; }',
'term_tree() { _pid="$1"; for _child in $(children_of "$_pid"); do term_tree "$_child"; done; kill -TERM "$_pid" 2>/dev/null || true; }',
'kill_tree() { _pid="$1"; for _child in $(children_of "$_pid"); do kill_tree "$_child"; done; kill -KILL "$_pid" 2>/dev/null || true; }',
'if ! kill -0 "$pid" >/dev/null 2>&1; then printf "status\\tnot-running\\nremote_dir\\t%s\\npid\\t%s\\n" "$remote_dir" "$pid"; exit 0; fi',
'term_tree "$pid"',
"sleep 1",
'if kill -0 "$pid" >/dev/null 2>&1; then kill_tree "$pid"; sleep 1; fi',
'if kill -0 "$pid" >/dev/null 2>&1; then printf "status\\tstill-running\\nremote_dir\\t%s\\npid\\t%s\\n" "$remote_dir" "$pid"; exit 1; fi',
'printf "status\\tstopped\\nremote_dir\\t%s\\npid\\t%s\\n" "$remote_dir" "$pid"',
"exit 0",
].join("\n");
}
function remoteArtifactRunnerScript(remoteDir: string, runId: string, stdoutTailBytes: number): string {
return [
"set -eu",
@@ -434,6 +479,17 @@ function parseJsonObject(text: string): Record<string, unknown> {
}
}
function parseTabStatus(text: string): Record<string, string> {
const out: Record<string, string> = {};
for (const rawLine of text.split(/\r?\n/u)) {
const line = rawLine.trimEnd();
if (line.length === 0) continue;
const [key, ...rest] = line.split("\t");
if (key) out[key] = rest.join("\t");
}
return out;
}
function numberOrNull(value: unknown): number | null {
return typeof value === "number" && Number.isFinite(value) ? value : null;
}