fix: bound codex supervisor diagnostics

This commit is contained in:
Codex
2026-05-22 15:18:14 +00:00
parent 2ed9e79d67
commit 03c98c83ef
4 changed files with 119 additions and 24 deletions
+2 -2
View File
@@ -186,9 +186,9 @@ bun scripts/code-queue-pr-preflight-example.ts --repo pikasTech/unidesk --base m
- 当默认审阅摘要不足时,再逐级使用 `bun scripts/cli.ts codex task <taskId> --detail``bun scripts/cli.ts codex task <taskId> --trace --limit N``codex output`
- 当 master 控制面状态和 D601 scheduler 状态看起来分裂时,使用 `docs/reference/observability.md` 中的活性规则判断。
默认 supervisor 视图必须保持低噪声。每个任务应带 `commands.show``commands.detail``commands.trace``commands.output``commands.full``commands.read`,让下一步渐进披露动作明确;默认不得嵌入完整 queue 列表、完整 final response、raw output 页或完整 trace 行。`recentCompleted` 必须默认限量,且不得重复 `completedUnread` 里的未读终态,避免完成历史把当前 running、阻塞和未读审阅挤出视野;需要完整当前页时显式使用 `--view full``commands.read` 只是在人工审阅后的建议命令,listing 命令绝不能自动执行。
默认 supervisor 视图必须保持低噪声。每个任务应带 `commands.show``commands.detail``commands.trace``commands.output``commands.full``commands.read`,让下一步渐进披露动作明确;默认不得嵌入完整 queue 列表、完整 final response、raw output 页或完整 trace 行。`recentCompleted` 必须默认限量,且不得重复 `completedUnread` 里的未读终态,避免完成历史把当前 running、阻塞和未读审阅挤出视野;需要完整当前页时显式使用 `--view full``executionDiagnostics` 只能展示有界 task-id/reason 预览、总数、截断标记和 omitted counts;需要全量诊断时使用输出中的 raw command。`commands.read` 只是在人工审阅后的建议命令,listing 命令绝不能自动执行。
这条规则直接服务 HWLAB #131:指挥官要优先看到真实业务推进、部署修复、阻塞和需要人工审阅的未读结果,Gate/报告/审查/诊断类任务只能作为折叠的分类信号存在,不能在默认输出中用长 prompt/body 抢占上下文。
这条规则直接服务 HWLAB #132:指挥官要优先看到真实业务推进、部署修复、阻塞和需要人工审阅的未读结果,Gate/报告/审查/诊断类任务只能作为折叠的分类信号存在,不能在默认输出中用长 prompt/body 抢占上下文。
完成未读任务的审阅也必须遵循渐进披露。指挥官默认只拉取原始 prompt 和最终 response,用它判断任务是否声称完成、是否有明显越界、是否缺少验收证据;不要默认拉完整 trace、全量 tool summary 或 raw output。只有当 final response 与目标不一致、证据不足、远端 commit 无法验证、任务疑似造假、或需要追溯失败原因时,才继续展开 `--detail`、分页 `--trace`、或按 seq 读取 `codex output`。这条规则的目标是降低上下文压力,同时保留通过多步查询拿到完整证据的能力。
@@ -176,7 +176,6 @@ function remoteControlPlaneResult(overrides: Partial<JsonRecord> = {}): JsonReco
prCreateDryRun: null,
limitations: [],
risks: [],
runnerDisposition: "ready",
recoveryHint: "Runner PR workflow has env-token coverage for the scheduler.",
commands: {
local: "bun scripts/cli.ts gh auth status --repo pikasTech/unidesk",
@@ -300,11 +299,11 @@ async function main(): Promise<void> {
database: { port: 15432, containerPort: 5432 },
providerIngress: { port: 18082, containerPort: 8081 },
},
database: { user: "unidesk", password: "unidesk_dev_password", name: "unidesk", volume: "unidesk_pgdata_10gb", volumeSize: "15GB" },
database: { user: "unidesk", password: "<redacted-test-password>", name: "unidesk", volume: "unidesk_pgdata_10gb", volumeSize: "15GB" },
providerGateway: {
id: "main-server",
name: "Main Server Provider",
token: "unidesk-dev-token-change-me",
token: "<redacted-test-token>",
labels: { host: "main-server", role: "self-provider", docker: true },
heartbeatIntervalMs: 15000,
reconnectBaseMs: 1000,
@@ -316,7 +315,7 @@ async function main(): Promise<void> {
microservices: [],
paths: { stateDir: ".state", logsDir: "logs", docsReferenceDir: "docs/reference" },
sshForwarding: { mode: "ws", keyDir: "/root/.ssh", host: "main-server", port: 22, user: "root" },
auth: { username: "admin", password: "Liang6516.", sessionSecret: "secret", sessionTtlSeconds: 86400 },
auth: { username: "admin", password: "<redacted-test-password>", sessionSecret: "<redacted-test-session-secret>", sessionTtlSeconds: 86400 },
},
coreFetch: (path) => {
observedLocalPath = path;
@@ -20,6 +20,10 @@ function longText(marker: string, repeat: number): string {
return Array.from({ length: repeat }, (_, index) => `${marker}-${index} #132 Gate report diagnostic review evidence direct workbench fix`).join("\n");
}
function manyIds(prefix: string, count: number): string[] {
return Array.from({ length: count }, (_, index) => `${prefix}-${String(index + 1).padStart(3, "0")}`);
}
function task(id: string, status: string, updatedAt: string, readAt: string | null = null): JsonRecord {
return {
id,
@@ -79,9 +83,30 @@ function fixtureResponse(path: string): JsonRecord {
ok: true,
queue: {
executionDiagnostics: {
state: "split-brain",
splitBrain: true,
effectiveLiveness: "live",
splitBrainLive: true,
recommendedAction: "continue-supervision",
livenessSummary: longText("split-brain-live-summary", 45),
databaseActiveTaskCount: 80,
databaseActiveTaskIds: manyIds("db-active", 80),
schedulerActiveRunSlotCount: 30,
schedulerActiveTaskIds: manyIds("scheduler-active", 30),
activeHeartbeatCount: 80,
activeHeartbeatTaskIds: manyIds("active-heartbeat", 80),
heartbeatFreshTaskIds: manyIds("fresh-heartbeat", 80),
heartbeatExpiredTaskIds: [],
heartbeatMissingTaskIds: [],
staleRecoveryCandidateTaskIds: [],
heartbeatRiskTaskIds: [],
traceGapTaskIds: manyIds("trace-gap", 60),
traceGapNotStaleTaskIds: manyIds("trace-gap-fresh", 40),
reasons: Array.from({ length: 24 }, (_, index) => longText(`diagnostic-reason-${index + 1}`, 10)),
oaPublisher: {
pendingTaskIds: manyIds("oa-pending", 80),
lastError: longText("oa-publisher-error", 60),
},
},
},
pagination: {
@@ -131,11 +156,20 @@ export function runCodeQueueSupervisorDisclosureContract(): JsonRecord {
const fullItem = asRecord(asArray(asRecord(asRecord(full).tasks).items)[0]);
const completedUnread = asRecord(supervisorView.completedUnread);
const fullTasks = asRecord(asRecord(full).tasks);
const diagnostics = asRecord(supervisorView.executionDiagnostics);
const listBudget = asRecord(diagnostics.listBudget);
const omittedCounts = asRecord(listBudget.omittedCounts);
assertCondition(supervisorBody.length < fullBody.length * 0.55, "supervisor output should be materially smaller than full output", { supervisorChars: supervisorBody.length, fullChars: fullBody.length });
assertCondition(supervisorBody.length < 70_000, "supervisor output should remain bounded even with large diagnostics", { supervisorChars: supervisorBody.length });
assertCondition(recentItems.length === 5, "recentCompleted should be capped below --limit by default", { returned: recentItems.length });
assertCondition(asArray(completedUnread.items).length === 7, "completedUnread should keep unread terminal tasks separate from recentCompleted", completedUnread);
assertCondition(recentItems.every((item) => asRecord(item).unreadTerminal === false), "recentCompleted should not duplicate unread terminal tasks", { recentItems });
assertCondition(asArray(diagnostics.databaseActiveTaskIds).length === 12, "diagnostic task id lists should be capped", diagnostics);
assertCondition(omittedCounts.databaseActiveTaskIds === 68, "diagnostic omitted counts should preserve full visibility metadata", omittedCounts);
assertCondition(asArray(diagnostics.reasons).length === 6, "diagnostic reasons should be capped", diagnostics);
assertCondition(diagnostics.livenessSummaryTruncated === true, "long diagnostic liveness summary should be previewed", diagnostics);
assertCondition(listBudget.truncated === true && typeof listBudget.rawCommand === "string", "diagnostic list budget should disclose raw command", listBudget);
assertCondition(asArray(runningItem.issueRefs).includes("#132"), "supervisor row should expose issue refs for triage", runningItem);
assertCondition(Number(prompt.chars) > String(prompt.text ?? "").length && prompt.truncated === true, "supervisor prompt must be a short preview with original char count", prompt);
assertCondition(Number(lastMessage.chars) > String(lastMessage.text ?? "").length && lastMessage.truncated === true, "supervisor body must be a short preview with original char count", lastMessage);
@@ -150,6 +184,7 @@ export function runCodeQueueSupervisorDisclosureContract(): JsonRecord {
checks: [
"supervisor output materially smaller than full",
"recentCompleted capped",
"split-brain diagnostics capped",
"prompt/body previews bounded",
"drill-down commands preserved",
"full view remains detailed",
+79 -18
View File
@@ -16,6 +16,8 @@ const supervisorRecentCompletedLimit = 5;
const supervisorPromptPreviewChars = 160;
const supervisorBodyPreviewChars = 180;
const supervisorRecentBodyPreviewChars = 80;
const diagnosticsIdPreviewLimit = 12;
const diagnosticsReasonPreviewLimit = 6;
const steerPromptPreviewChars = 320;
const minimaxSubmitModel = "minimax-m2.7";
const deepseekSubmitModel = "deepseek-chat";
@@ -1013,42 +1015,101 @@ function recommendedActionFromDiagnostics(record: Record<string, unknown>): stri
return "none";
}
function boundedUniqueStringList(value: unknown, limit = diagnosticsIdPreviewLimit): { items: string[]; count: number; omitted: number; truncated: boolean } {
const all = Array.from(new Set(stringList(value))).sort();
const items = all.slice(0, limit);
return {
items,
count: all.length,
omitted: Math.max(0, all.length - items.length),
truncated: all.length > items.length,
};
}
function boundedInlineString(value: unknown, maxChars: number): { text: string | null; chars: number; truncated: boolean; omittedChars: number } {
const text = asString(value).replace(/\s+/gu, " ").trim();
const truncated = text.length > maxChars;
return {
text: text.length === 0 ? null : truncated ? text.slice(0, maxChars) : text,
chars: text.length,
truncated,
omittedChars: truncated ? text.length - maxChars : 0,
};
}
function compactExecutionDiagnostics(value: unknown): Record<string, unknown> | null {
const record = asRecord(value);
if (record === null) return null;
const heartbeatRiskTaskIds = Array.from(new Set([
const fullHeartbeatRiskTaskIds = Array.from(new Set([
...stringList(record.heartbeatRiskTaskIds),
...stringList(record.heartbeatExpiredTaskIds),
...stringList(record.heartbeatMissingTaskIds),
...stringList(record.staleRecoveryCandidateTaskIds),
])).sort();
const databaseActiveTaskIds = boundedUniqueStringList(record.databaseActiveTaskIds);
const schedulerActiveTaskIds = boundedUniqueStringList(record.schedulerActiveTaskIds);
const activeHeartbeatTaskIds = boundedUniqueStringList(record.activeHeartbeatTaskIds);
const heartbeatFreshTaskIds = boundedUniqueStringList(record.heartbeatFreshTaskIds);
const heartbeatExpiredTaskIds = boundedUniqueStringList(record.heartbeatExpiredTaskIds);
const heartbeatMissingTaskIds = boundedUniqueStringList(record.heartbeatMissingTaskIds);
const staleRecoveryCandidateTaskIds = boundedUniqueStringList(record.staleRecoveryCandidateTaskIds);
const heartbeatRiskTaskIds = boundedUniqueStringList(fullHeartbeatRiskTaskIds);
const traceGapTaskIds = boundedUniqueStringList(record.traceGapTaskIds);
const traceGapNotStaleTaskIds = boundedUniqueStringList(record.traceGapNotStaleTaskIds);
const allReasons = stringList(record.reasons);
const reasons = allReasons.slice(0, diagnosticsReasonPreviewLimit).map((reason) => boundedInlineString(reason, 240).text).filter((reason): reason is string => reason !== null);
const livenessSummary = boundedInlineString(record.livenessSummary, 420);
const omittedCounts = {
databaseActiveTaskIds: databaseActiveTaskIds.omitted,
schedulerActiveTaskIds: schedulerActiveTaskIds.omitted,
activeHeartbeatTaskIds: activeHeartbeatTaskIds.omitted,
heartbeatFreshTaskIds: heartbeatFreshTaskIds.omitted,
heartbeatExpiredTaskIds: heartbeatExpiredTaskIds.omitted,
heartbeatMissingTaskIds: heartbeatMissingTaskIds.omitted,
staleRecoveryCandidateTaskIds: staleRecoveryCandidateTaskIds.omitted,
heartbeatRiskTaskIds: heartbeatRiskTaskIds.omitted,
traceGapTaskIds: traceGapTaskIds.omitted,
traceGapNotStaleTaskIds: traceGapNotStaleTaskIds.omitted,
reasons: Math.max(0, allReasons.length - reasons.length),
livenessSummaryChars: livenessSummary.omittedChars,
};
return {
state: record.state ?? record.health ?? null,
degraded: record.degraded ?? null,
splitBrain: record.splitBrain ?? null,
splitBrainLive: splitBrainLiveFromDiagnostics(record),
effectiveLiveness: effectiveLivenessFromDiagnostics({ ...record, heartbeatRiskTaskIds }),
recommendedAction: recommendedActionFromDiagnostics({ ...record, heartbeatRiskTaskIds }),
livenessSummary: record.livenessSummary ?? null,
effectiveLiveness: effectiveLivenessFromDiagnostics({ ...record, heartbeatRiskTaskIds: fullHeartbeatRiskTaskIds }),
recommendedAction: recommendedActionFromDiagnostics({ ...record, heartbeatRiskTaskIds: fullHeartbeatRiskTaskIds }),
livenessSummary: livenessSummary.text,
livenessSummaryChars: livenessSummary.chars,
livenessSummaryTruncated: livenessSummary.truncated,
executionStateSource: record.executionStateSource ?? null,
controlPlane: record.controlPlane ?? null,
databaseActiveTaskCount: record.databaseActiveTaskCount ?? null,
databaseActiveTaskIds: record.databaseActiveTaskIds ?? [],
controlPlane: boundedInlineString(record.controlPlane, 120).text,
databaseActiveTaskCount: record.databaseActiveTaskCount ?? databaseActiveTaskIds.count,
databaseActiveTaskIds: databaseActiveTaskIds.items,
schedulerActiveRunSlotCount: record.schedulerActiveRunSlotCount ?? null,
schedulerActiveTaskIds: record.schedulerActiveTaskIds ?? [],
activeHeartbeatTaskIds: record.activeHeartbeatTaskIds ?? [],
heartbeatFreshTaskIds: record.heartbeatFreshTaskIds ?? [],
heartbeatExpiredTaskIds: record.heartbeatExpiredTaskIds ?? [],
heartbeatMissingTaskIds: record.heartbeatMissingTaskIds ?? [],
staleRecoveryCandidateTaskIds: record.staleRecoveryCandidateTaskIds ?? [],
heartbeatRiskTaskIds,
traceGapTaskIds: record.traceGapTaskIds ?? [],
traceGapNotStaleTaskIds: record.traceGapNotStaleTaskIds ?? [],
schedulerActiveTaskIds: schedulerActiveTaskIds.items,
activeHeartbeatCount: record.activeHeartbeatCount ?? activeHeartbeatTaskIds.count,
activeHeartbeatTaskIds: activeHeartbeatTaskIds.items,
heartbeatFreshTaskIds: heartbeatFreshTaskIds.items,
heartbeatExpiredTaskIds: heartbeatExpiredTaskIds.items,
heartbeatMissingTaskIds: heartbeatMissingTaskIds.items,
staleRecoveryCandidateTaskIds: staleRecoveryCandidateTaskIds.items,
heartbeatRiskTaskIds: heartbeatRiskTaskIds.items,
traceGapTaskIds: traceGapTaskIds.items,
traceGapNotStaleTaskIds: traceGapNotStaleTaskIds.items,
lastSchedulerHeartbeatAt: record.lastSchedulerHeartbeatAt ?? null,
lastObservedAgentEventAt: record.lastObservedAgentEventAt ?? null,
lastPersistedTraceAt: record.lastPersistedTraceAt ?? null,
oaPublisher: record.oaPublisher ?? null,
reasons: record.reasons ?? [],
oaPublisher: previewJson(record.oaPublisher ?? null, { maxDepth: 3, maxArrayItems: 4, maxObjectKeys: 12, maxStringLength: 240 }),
reasons,
listBudget: {
idPreviewLimit: diagnosticsIdPreviewLimit,
reasonPreviewLimit: diagnosticsReasonPreviewLimit,
truncated: Object.values(omittedCounts).some((count) => count > 0),
omittedCounts,
rawCommand: "bun scripts/cli.ts microservice proxy code-queue /api/tasks/overview?limit=30 --raw --full",
},
};
}