fix: bound codex supervisor diagnostics
This commit is contained in:
@@ -186,9 +186,9 @@ bun scripts/code-queue-pr-preflight-example.ts --repo pikasTech/unidesk --base m
|
||||
- 当默认审阅摘要不足时,再逐级使用 `bun scripts/cli.ts codex task <taskId> --detail`、`bun scripts/cli.ts codex task <taskId> --trace --limit N` 或 `codex output`。
|
||||
- 当 master 控制面状态和 D601 scheduler 状态看起来分裂时,使用 `docs/reference/observability.md` 中的活性规则判断。
|
||||
|
||||
默认 supervisor 视图必须保持低噪声。每个任务应带 `commands.show`、`commands.detail`、`commands.trace`、`commands.output`、`commands.full` 和 `commands.read`,让下一步渐进披露动作明确;默认不得嵌入完整 queue 列表、完整 final response、raw output 页或完整 trace 行。`recentCompleted` 必须默认限量,且不得重复 `completedUnread` 里的未读终态,避免完成历史把当前 running、阻塞和未读审阅挤出视野;需要完整当前页时显式使用 `--view full`。`commands.read` 只是在人工审阅后的建议命令,listing 命令绝不能自动执行。
|
||||
默认 supervisor 视图必须保持低噪声。每个任务应带 `commands.show`、`commands.detail`、`commands.trace`、`commands.output`、`commands.full` 和 `commands.read`,让下一步渐进披露动作明确;默认不得嵌入完整 queue 列表、完整 final response、raw output 页或完整 trace 行。`recentCompleted` 必须默认限量,且不得重复 `completedUnread` 里的未读终态,避免完成历史把当前 running、阻塞和未读审阅挤出视野;需要完整当前页时显式使用 `--view full`。`executionDiagnostics` 只能展示有界 task-id/reason 预览、总数、截断标记和 omitted counts;需要全量诊断时使用输出中的 raw command。`commands.read` 只是在人工审阅后的建议命令,listing 命令绝不能自动执行。
|
||||
|
||||
这条规则直接服务 HWLAB #131:指挥官要优先看到真实业务推进、部署修复、阻塞和需要人工审阅的未读结果,Gate/报告/审查/诊断类任务只能作为折叠的分类信号存在,不能在默认输出中用长 prompt/body 抢占上下文。
|
||||
这条规则直接服务 HWLAB #132:指挥官要优先看到真实业务推进、部署修复、阻塞和需要人工审阅的未读结果,Gate/报告/审查/诊断类任务只能作为折叠的分类信号存在,不能在默认输出中用长 prompt/body 抢占上下文。
|
||||
|
||||
完成未读任务的审阅也必须遵循渐进披露。指挥官默认只拉取原始 prompt 和最终 response,用它判断任务是否声称完成、是否有明显越界、是否缺少验收证据;不要默认拉完整 trace、全量 tool summary 或 raw output。只有当 final response 与目标不一致、证据不足、远端 commit 无法验证、任务疑似造假、或需要追溯失败原因时,才继续展开 `--detail`、分页 `--trace`、或按 seq 读取 `codex output`。这条规则的目标是降低上下文压力,同时保留通过多步查询拿到完整证据的能力。
|
||||
|
||||
|
||||
@@ -176,7 +176,6 @@ function remoteControlPlaneResult(overrides: Partial<JsonRecord> = {}): JsonReco
|
||||
prCreateDryRun: null,
|
||||
limitations: [],
|
||||
risks: [],
|
||||
runnerDisposition: "ready",
|
||||
recoveryHint: "Runner PR workflow has env-token coverage for the scheduler.",
|
||||
commands: {
|
||||
local: "bun scripts/cli.ts gh auth status --repo pikasTech/unidesk",
|
||||
@@ -300,11 +299,11 @@ async function main(): Promise<void> {
|
||||
database: { port: 15432, containerPort: 5432 },
|
||||
providerIngress: { port: 18082, containerPort: 8081 },
|
||||
},
|
||||
database: { user: "unidesk", password: "unidesk_dev_password", name: "unidesk", volume: "unidesk_pgdata_10gb", volumeSize: "15GB" },
|
||||
database: { user: "unidesk", password: "<redacted-test-password>", name: "unidesk", volume: "unidesk_pgdata_10gb", volumeSize: "15GB" },
|
||||
providerGateway: {
|
||||
id: "main-server",
|
||||
name: "Main Server Provider",
|
||||
token: "unidesk-dev-token-change-me",
|
||||
token: "<redacted-test-token>",
|
||||
labels: { host: "main-server", role: "self-provider", docker: true },
|
||||
heartbeatIntervalMs: 15000,
|
||||
reconnectBaseMs: 1000,
|
||||
@@ -316,7 +315,7 @@ async function main(): Promise<void> {
|
||||
microservices: [],
|
||||
paths: { stateDir: ".state", logsDir: "logs", docsReferenceDir: "docs/reference" },
|
||||
sshForwarding: { mode: "ws", keyDir: "/root/.ssh", host: "main-server", port: 22, user: "root" },
|
||||
auth: { username: "admin", password: "Liang6516.", sessionSecret: "secret", sessionTtlSeconds: 86400 },
|
||||
auth: { username: "admin", password: "<redacted-test-password>", sessionSecret: "<redacted-test-session-secret>", sessionTtlSeconds: 86400 },
|
||||
},
|
||||
coreFetch: (path) => {
|
||||
observedLocalPath = path;
|
||||
|
||||
@@ -20,6 +20,10 @@ function longText(marker: string, repeat: number): string {
|
||||
return Array.from({ length: repeat }, (_, index) => `${marker}-${index} #132 Gate report diagnostic review evidence direct workbench fix`).join("\n");
|
||||
}
|
||||
|
||||
function manyIds(prefix: string, count: number): string[] {
|
||||
return Array.from({ length: count }, (_, index) => `${prefix}-${String(index + 1).padStart(3, "0")}`);
|
||||
}
|
||||
|
||||
function task(id: string, status: string, updatedAt: string, readAt: string | null = null): JsonRecord {
|
||||
return {
|
||||
id,
|
||||
@@ -79,9 +83,30 @@ function fixtureResponse(path: string): JsonRecord {
|
||||
ok: true,
|
||||
queue: {
|
||||
executionDiagnostics: {
|
||||
state: "split-brain",
|
||||
splitBrain: true,
|
||||
effectiveLiveness: "live",
|
||||
splitBrainLive: true,
|
||||
recommendedAction: "continue-supervision",
|
||||
livenessSummary: longText("split-brain-live-summary", 45),
|
||||
databaseActiveTaskCount: 80,
|
||||
databaseActiveTaskIds: manyIds("db-active", 80),
|
||||
schedulerActiveRunSlotCount: 30,
|
||||
schedulerActiveTaskIds: manyIds("scheduler-active", 30),
|
||||
activeHeartbeatCount: 80,
|
||||
activeHeartbeatTaskIds: manyIds("active-heartbeat", 80),
|
||||
heartbeatFreshTaskIds: manyIds("fresh-heartbeat", 80),
|
||||
heartbeatExpiredTaskIds: [],
|
||||
heartbeatMissingTaskIds: [],
|
||||
staleRecoveryCandidateTaskIds: [],
|
||||
heartbeatRiskTaskIds: [],
|
||||
traceGapTaskIds: manyIds("trace-gap", 60),
|
||||
traceGapNotStaleTaskIds: manyIds("trace-gap-fresh", 40),
|
||||
reasons: Array.from({ length: 24 }, (_, index) => longText(`diagnostic-reason-${index + 1}`, 10)),
|
||||
oaPublisher: {
|
||||
pendingTaskIds: manyIds("oa-pending", 80),
|
||||
lastError: longText("oa-publisher-error", 60),
|
||||
},
|
||||
},
|
||||
},
|
||||
pagination: {
|
||||
@@ -131,11 +156,20 @@ export function runCodeQueueSupervisorDisclosureContract(): JsonRecord {
|
||||
const fullItem = asRecord(asArray(asRecord(asRecord(full).tasks).items)[0]);
|
||||
const completedUnread = asRecord(supervisorView.completedUnread);
|
||||
const fullTasks = asRecord(asRecord(full).tasks);
|
||||
const diagnostics = asRecord(supervisorView.executionDiagnostics);
|
||||
const listBudget = asRecord(diagnostics.listBudget);
|
||||
const omittedCounts = asRecord(listBudget.omittedCounts);
|
||||
|
||||
assertCondition(supervisorBody.length < fullBody.length * 0.55, "supervisor output should be materially smaller than full output", { supervisorChars: supervisorBody.length, fullChars: fullBody.length });
|
||||
assertCondition(supervisorBody.length < 70_000, "supervisor output should remain bounded even with large diagnostics", { supervisorChars: supervisorBody.length });
|
||||
assertCondition(recentItems.length === 5, "recentCompleted should be capped below --limit by default", { returned: recentItems.length });
|
||||
assertCondition(asArray(completedUnread.items).length === 7, "completedUnread should keep unread terminal tasks separate from recentCompleted", completedUnread);
|
||||
assertCondition(recentItems.every((item) => asRecord(item).unreadTerminal === false), "recentCompleted should not duplicate unread terminal tasks", { recentItems });
|
||||
assertCondition(asArray(diagnostics.databaseActiveTaskIds).length === 12, "diagnostic task id lists should be capped", diagnostics);
|
||||
assertCondition(omittedCounts.databaseActiveTaskIds === 68, "diagnostic omitted counts should preserve full visibility metadata", omittedCounts);
|
||||
assertCondition(asArray(diagnostics.reasons).length === 6, "diagnostic reasons should be capped", diagnostics);
|
||||
assertCondition(diagnostics.livenessSummaryTruncated === true, "long diagnostic liveness summary should be previewed", diagnostics);
|
||||
assertCondition(listBudget.truncated === true && typeof listBudget.rawCommand === "string", "diagnostic list budget should disclose raw command", listBudget);
|
||||
assertCondition(asArray(runningItem.issueRefs).includes("#132"), "supervisor row should expose issue refs for triage", runningItem);
|
||||
assertCondition(Number(prompt.chars) > String(prompt.text ?? "").length && prompt.truncated === true, "supervisor prompt must be a short preview with original char count", prompt);
|
||||
assertCondition(Number(lastMessage.chars) > String(lastMessage.text ?? "").length && lastMessage.truncated === true, "supervisor body must be a short preview with original char count", lastMessage);
|
||||
@@ -150,6 +184,7 @@ export function runCodeQueueSupervisorDisclosureContract(): JsonRecord {
|
||||
checks: [
|
||||
"supervisor output materially smaller than full",
|
||||
"recentCompleted capped",
|
||||
"split-brain diagnostics capped",
|
||||
"prompt/body previews bounded",
|
||||
"drill-down commands preserved",
|
||||
"full view remains detailed",
|
||||
|
||||
+79
-18
@@ -16,6 +16,8 @@ const supervisorRecentCompletedLimit = 5;
|
||||
const supervisorPromptPreviewChars = 160;
|
||||
const supervisorBodyPreviewChars = 180;
|
||||
const supervisorRecentBodyPreviewChars = 80;
|
||||
const diagnosticsIdPreviewLimit = 12;
|
||||
const diagnosticsReasonPreviewLimit = 6;
|
||||
const steerPromptPreviewChars = 320;
|
||||
const minimaxSubmitModel = "minimax-m2.7";
|
||||
const deepseekSubmitModel = "deepseek-chat";
|
||||
@@ -1013,42 +1015,101 @@ function recommendedActionFromDiagnostics(record: Record<string, unknown>): stri
|
||||
return "none";
|
||||
}
|
||||
|
||||
function boundedUniqueStringList(value: unknown, limit = diagnosticsIdPreviewLimit): { items: string[]; count: number; omitted: number; truncated: boolean } {
|
||||
const all = Array.from(new Set(stringList(value))).sort();
|
||||
const items = all.slice(0, limit);
|
||||
return {
|
||||
items,
|
||||
count: all.length,
|
||||
omitted: Math.max(0, all.length - items.length),
|
||||
truncated: all.length > items.length,
|
||||
};
|
||||
}
|
||||
|
||||
function boundedInlineString(value: unknown, maxChars: number): { text: string | null; chars: number; truncated: boolean; omittedChars: number } {
|
||||
const text = asString(value).replace(/\s+/gu, " ").trim();
|
||||
const truncated = text.length > maxChars;
|
||||
return {
|
||||
text: text.length === 0 ? null : truncated ? text.slice(0, maxChars) : text,
|
||||
chars: text.length,
|
||||
truncated,
|
||||
omittedChars: truncated ? text.length - maxChars : 0,
|
||||
};
|
||||
}
|
||||
|
||||
function compactExecutionDiagnostics(value: unknown): Record<string, unknown> | null {
|
||||
const record = asRecord(value);
|
||||
if (record === null) return null;
|
||||
const heartbeatRiskTaskIds = Array.from(new Set([
|
||||
const fullHeartbeatRiskTaskIds = Array.from(new Set([
|
||||
...stringList(record.heartbeatRiskTaskIds),
|
||||
...stringList(record.heartbeatExpiredTaskIds),
|
||||
...stringList(record.heartbeatMissingTaskIds),
|
||||
...stringList(record.staleRecoveryCandidateTaskIds),
|
||||
])).sort();
|
||||
const databaseActiveTaskIds = boundedUniqueStringList(record.databaseActiveTaskIds);
|
||||
const schedulerActiveTaskIds = boundedUniqueStringList(record.schedulerActiveTaskIds);
|
||||
const activeHeartbeatTaskIds = boundedUniqueStringList(record.activeHeartbeatTaskIds);
|
||||
const heartbeatFreshTaskIds = boundedUniqueStringList(record.heartbeatFreshTaskIds);
|
||||
const heartbeatExpiredTaskIds = boundedUniqueStringList(record.heartbeatExpiredTaskIds);
|
||||
const heartbeatMissingTaskIds = boundedUniqueStringList(record.heartbeatMissingTaskIds);
|
||||
const staleRecoveryCandidateTaskIds = boundedUniqueStringList(record.staleRecoveryCandidateTaskIds);
|
||||
const heartbeatRiskTaskIds = boundedUniqueStringList(fullHeartbeatRiskTaskIds);
|
||||
const traceGapTaskIds = boundedUniqueStringList(record.traceGapTaskIds);
|
||||
const traceGapNotStaleTaskIds = boundedUniqueStringList(record.traceGapNotStaleTaskIds);
|
||||
const allReasons = stringList(record.reasons);
|
||||
const reasons = allReasons.slice(0, diagnosticsReasonPreviewLimit).map((reason) => boundedInlineString(reason, 240).text).filter((reason): reason is string => reason !== null);
|
||||
const livenessSummary = boundedInlineString(record.livenessSummary, 420);
|
||||
const omittedCounts = {
|
||||
databaseActiveTaskIds: databaseActiveTaskIds.omitted,
|
||||
schedulerActiveTaskIds: schedulerActiveTaskIds.omitted,
|
||||
activeHeartbeatTaskIds: activeHeartbeatTaskIds.omitted,
|
||||
heartbeatFreshTaskIds: heartbeatFreshTaskIds.omitted,
|
||||
heartbeatExpiredTaskIds: heartbeatExpiredTaskIds.omitted,
|
||||
heartbeatMissingTaskIds: heartbeatMissingTaskIds.omitted,
|
||||
staleRecoveryCandidateTaskIds: staleRecoveryCandidateTaskIds.omitted,
|
||||
heartbeatRiskTaskIds: heartbeatRiskTaskIds.omitted,
|
||||
traceGapTaskIds: traceGapTaskIds.omitted,
|
||||
traceGapNotStaleTaskIds: traceGapNotStaleTaskIds.omitted,
|
||||
reasons: Math.max(0, allReasons.length - reasons.length),
|
||||
livenessSummaryChars: livenessSummary.omittedChars,
|
||||
};
|
||||
return {
|
||||
state: record.state ?? record.health ?? null,
|
||||
degraded: record.degraded ?? null,
|
||||
splitBrain: record.splitBrain ?? null,
|
||||
splitBrainLive: splitBrainLiveFromDiagnostics(record),
|
||||
effectiveLiveness: effectiveLivenessFromDiagnostics({ ...record, heartbeatRiskTaskIds }),
|
||||
recommendedAction: recommendedActionFromDiagnostics({ ...record, heartbeatRiskTaskIds }),
|
||||
livenessSummary: record.livenessSummary ?? null,
|
||||
effectiveLiveness: effectiveLivenessFromDiagnostics({ ...record, heartbeatRiskTaskIds: fullHeartbeatRiskTaskIds }),
|
||||
recommendedAction: recommendedActionFromDiagnostics({ ...record, heartbeatRiskTaskIds: fullHeartbeatRiskTaskIds }),
|
||||
livenessSummary: livenessSummary.text,
|
||||
livenessSummaryChars: livenessSummary.chars,
|
||||
livenessSummaryTruncated: livenessSummary.truncated,
|
||||
executionStateSource: record.executionStateSource ?? null,
|
||||
controlPlane: record.controlPlane ?? null,
|
||||
databaseActiveTaskCount: record.databaseActiveTaskCount ?? null,
|
||||
databaseActiveTaskIds: record.databaseActiveTaskIds ?? [],
|
||||
controlPlane: boundedInlineString(record.controlPlane, 120).text,
|
||||
databaseActiveTaskCount: record.databaseActiveTaskCount ?? databaseActiveTaskIds.count,
|
||||
databaseActiveTaskIds: databaseActiveTaskIds.items,
|
||||
schedulerActiveRunSlotCount: record.schedulerActiveRunSlotCount ?? null,
|
||||
schedulerActiveTaskIds: record.schedulerActiveTaskIds ?? [],
|
||||
activeHeartbeatTaskIds: record.activeHeartbeatTaskIds ?? [],
|
||||
heartbeatFreshTaskIds: record.heartbeatFreshTaskIds ?? [],
|
||||
heartbeatExpiredTaskIds: record.heartbeatExpiredTaskIds ?? [],
|
||||
heartbeatMissingTaskIds: record.heartbeatMissingTaskIds ?? [],
|
||||
staleRecoveryCandidateTaskIds: record.staleRecoveryCandidateTaskIds ?? [],
|
||||
heartbeatRiskTaskIds,
|
||||
traceGapTaskIds: record.traceGapTaskIds ?? [],
|
||||
traceGapNotStaleTaskIds: record.traceGapNotStaleTaskIds ?? [],
|
||||
schedulerActiveTaskIds: schedulerActiveTaskIds.items,
|
||||
activeHeartbeatCount: record.activeHeartbeatCount ?? activeHeartbeatTaskIds.count,
|
||||
activeHeartbeatTaskIds: activeHeartbeatTaskIds.items,
|
||||
heartbeatFreshTaskIds: heartbeatFreshTaskIds.items,
|
||||
heartbeatExpiredTaskIds: heartbeatExpiredTaskIds.items,
|
||||
heartbeatMissingTaskIds: heartbeatMissingTaskIds.items,
|
||||
staleRecoveryCandidateTaskIds: staleRecoveryCandidateTaskIds.items,
|
||||
heartbeatRiskTaskIds: heartbeatRiskTaskIds.items,
|
||||
traceGapTaskIds: traceGapTaskIds.items,
|
||||
traceGapNotStaleTaskIds: traceGapNotStaleTaskIds.items,
|
||||
lastSchedulerHeartbeatAt: record.lastSchedulerHeartbeatAt ?? null,
|
||||
lastObservedAgentEventAt: record.lastObservedAgentEventAt ?? null,
|
||||
lastPersistedTraceAt: record.lastPersistedTraceAt ?? null,
|
||||
oaPublisher: record.oaPublisher ?? null,
|
||||
reasons: record.reasons ?? [],
|
||||
oaPublisher: previewJson(record.oaPublisher ?? null, { maxDepth: 3, maxArrayItems: 4, maxObjectKeys: 12, maxStringLength: 240 }),
|
||||
reasons,
|
||||
listBudget: {
|
||||
idPreviewLimit: diagnosticsIdPreviewLimit,
|
||||
reasonPreviewLimit: diagnosticsReasonPreviewLimit,
|
||||
truncated: Object.values(omittedCounts).some((count) => count > 0),
|
||||
omittedCounts,
|
||||
rawCommand: "bun scripts/cli.ts microservice proxy code-queue /api/tasks/overview?limit=30 --raw --full",
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user