diff --git a/docs/reference/observability.md b/docs/reference/observability.md index 7c30a063..94839e6d 100644 --- a/docs/reference/observability.md +++ b/docs/reference/observability.md @@ -81,6 +81,8 @@ OA Event Flow 的高频 trace 统计不得把每个 `trace-stats-updated` 投影 当一个 UniDesk CLI 子流程调用另一个 UniDesk CLI 并使用 `--raw`、`--full` 或机器消费输出时,必须识别全局 stdout guard 返回的 `outputTruncated=true` / `data.dump.path` 包装,并跟随 dump 文件读取真实 JSON payload;不能把 bounded wrapper 当成业务 payload。若内部调用不能安全跟随 dump,应把命令改成更窄的显式 view、id-specific drill-down 或 compact schema,而不是提高全局 dump 阈值。人工 closeout 仍引用有界摘要、hash、run id 和 drill-down 命令,不把 `/tmp/unidesk-cli-output` 当作长期证据源。 +branch-follower 的 gate 与 TaskRun drill-down 默认输出必须能直接作为 target-side 性能证据使用,不能稳定触发 stdout dump。CI gate 应返回有界的 PipelineRun 总耗时、慢 TaskRun 对象名、阶段 timeline 和性能摘要;成功 TaskRun drill-down 默认只返回容器状态、日志字节数和 `node-cicd-timing` 摘要,不把成功日志正文塞进 JSON。失败、等待或显式 full/raw drill-down 才展开日志 tail。 + 全局 stdout guard 不能只返回 dump 元数据。对已知高频长输出命令,bounded wrapper 的 `summary` 必须保留可直接 closeout 的命令特定字段,同时把完整 payload 留在 dump/raw drill-down 中。例如 `debug dispatch ... provider.upgrade` 超阈值时应保留 dispatch task、wait task、plan host root、当前/目标 gateway 版本、scheduler 结果和最终 promoted container 的 `version`、`restartPolicy`、`pidMode`、`heartbeatTimestamp`;`provider triage --full` 超阈值时应保留 `decision`、`scope`、`retryable`、failed/degraded/healthy scopes、signal counts、recommended cross-checks 和问题信号预览。新增会稳定超阈值的诊断命令时,优先补命令特定 compact summary,而不是扩大全局 stdout 阈值。 本地或远端 `AGENTS.md`、`CLAUDE.md`、`SKILL.md` 或同类 agent 入口文档超过 `10 KiB`、超过 YAML dump 阈值,或被 CLI/SSH/trans 读取时触发自动 dump,不能只把 dump 文件路径当成继续工作的正常入口。该现象表示入口文档已经过长,必须按 `docs-spec` 把入口文件拆成短索引:只保留 P0 规则摘要、关键命令入口和指向权威文档的链接;具体流程、背景、判定标准和长篇约束迁入对应职责文件。`SKILL.md` 拆到 `references/` 后禁止再堆成 `references/full.md`、`all.md`、`guide.md` 或其他变相超级 Markdown;必须按职责、生命周期和读取场景拆分成多个可选择的 reference,并在 `SKILL.md` 写清“何时读取哪个文件”。拆分后入口文档、skill 和长期参考必须互相交叉引用,避免同一规则在多个位置重复展开或产生第二真相。 diff --git a/scripts/native/cicd/branch-follower-gate.mjs b/scripts/native/cicd/branch-follower-gate.mjs index f0ebaee2..7739bbf8 100644 --- a/scripts/native/cicd/branch-follower-gate.mjs +++ b/scripts/native/cicd/branch-follower-gate.mjs @@ -87,6 +87,7 @@ async function ciTaskRunEvidence(commit) { name: pipelineRef, taskCount: Array.isArray(pipeline?.spec?.tasks) ? pipeline.spec.tasks.length : null, tasks: Array.isArray(pipeline?.spec?.tasks) ? pipeline.spec.tasks.slice(0, 12).map((task) => ({ name: str(task?.name), runAfter: Array.isArray(task?.runAfter) ? task.runAfter.slice(0, 6) : [] })) : [], + tasksTruncated: Array.isArray(pipeline?.spec?.tasks) ? pipeline.spec.tasks.length > 12 : false, }, taskRuns: taskSummary, }; @@ -264,22 +265,73 @@ function taskRunsSummary(list) { taskName: str(item?.metadata?.labels?.["tekton.dev/pipelineTask"]) || str(item?.spec?.taskRef?.name), status: str(condition.status) || "Unknown", reason: str(condition.reason), + startTime: str(item?.status?.startTime), + completionTime: str(item?.status?.completionTime), durationSeconds: durationSeconds(item?.status?.startTime, item?.status?.completionTime), }; }); const failed = rows.filter((item) => item.status === "False"); const active = rows.filter((item) => item.status !== "True" && item.status !== "False"); - const slow = rows.filter((item) => typeof item.durationSeconds === "number" && item.durationSeconds >= slowTaskSeconds); + const slow = rows + .filter((item) => typeof item.durationSeconds === "number" && item.durationSeconds >= slowTaskSeconds) + .sort((left, right) => (right.durationSeconds || 0) - (left.durationSeconds || 0)); + const timeline = rows.slice().sort(compareTaskRunRows); + const performance = taskRunPerformance(rows); return { count: rows.length, slowThresholdSeconds: slowTaskSeconds, failedCount: failed.length, activeCount: active.length, slowCount: slow.length, - failedItems: failed.slice(0, 5), - activeItems: active.slice(0, 5), - slowItems: slow.slice(0, 5), - items: rows.slice(0, 12), + failedItems: failed.slice(0, 6).map(compactNamedTaskRunRow), + activeItems: active.slice(0, 6).map(compactNamedTaskRunRow), + slowItems: slow.slice(0, 6).map(compactNamedTaskRunRow), + timeline: timeline.slice(0, 16).map((item) => compactTimelineTaskRunRow(item, performance.firstStart)), + timelineTruncated: timeline.length > 16, + performance, + }; +} + +function taskRunPerformance(rows) { + const starts = rows.map((item) => Date.parse(item.startTime || "")).filter((value) => Number.isFinite(value)); + const finishes = rows.map((item) => Date.parse(item.completionTime || "")).filter((value) => Number.isFinite(value)); + const durations = rows.map((item) => item.durationSeconds).filter((value) => typeof value === "number"); + const firstStart = starts.length === 0 ? null : new Date(Math.min(...starts)).toISOString(); + const lastCompletion = finishes.length === 0 ? null : new Date(Math.max(...finishes)).toISOString(); + return { + firstStart, + lastCompletion, + spanSeconds: firstStart && lastCompletion ? durationSeconds(firstStart, lastCompletion) : null, + taskDurationSumSeconds: durations.length === 0 ? null : Math.round(durations.reduce((sum, value) => sum + value, 0) * 10) / 10, + maxTaskDurationSeconds: durations.length === 0 ? null : Math.max(...durations), + }; +} + +function compareTaskRunRows(left, right) { + const leftTime = Date.parse(left.startTime || ""); + const rightTime = Date.parse(right.startTime || ""); + if (Number.isFinite(leftTime) && Number.isFinite(rightTime) && leftTime !== rightTime) return leftTime - rightTime; + if (Number.isFinite(leftTime)) return -1; + if (Number.isFinite(rightTime)) return 1; + return String(left.name || "").localeCompare(String(right.name || "")); +} + +function compactNamedTaskRunRow(item) { + return { + name: item.name, + taskName: item.taskName, + status: item.status, + reason: item.reason, + durationSeconds: item.durationSeconds, + }; +} + +function compactTimelineTaskRunRow(item, firstStart) { + return { + taskName: item.taskName, + status: item.status, + startOffsetSeconds: firstStart && item.startTime ? durationSeconds(firstStart, item.startTime) : null, + durationSeconds: item.durationSeconds, }; } diff --git a/scripts/native/cicd/taskrun-drilldown.mjs b/scripts/native/cicd/taskrun-drilldown.mjs index bc3f45a1..2227b857 100644 --- a/scripts/native/cicd/taskrun-drilldown.mjs +++ b/scripts/native/cicd/taskrun-drilldown.mjs @@ -60,6 +60,7 @@ async function main() { const taskRunSteps = stepRows(status); const containers = mergeContainerRows(taskRunSteps, podStatuses).slice(0, maxContainers); const logContainers = selectLogContainers(containers); + const includeLogTails = process.env.INCLUDE_LOG_TAILS === "true" || condition?.status !== "True"; const logs = []; const perContainerBytes = Math.max(1, Math.floor(maxLogBytes / Math.max(1, logContainers.length))); for (const container of logContainers) { @@ -75,7 +76,7 @@ async function main() { container: name, lineCount: text.length === 0 ? 0 : text.split(/\r?\n/u).filter((line) => line.length > 0).length, bytes: Buffer.byteLength(text, "utf8"), - tail: text, + ...(includeLogTails || read.ok === false ? { tail: text } : {}), nodeCicdTiming: lastNodeCicdTiming(text), }); }