From 216209ca95f11c8fdebe512ede425aa946c883cd Mon Sep 17 00:00:00 2001 From: Lyon <88232613+pikasTech@users.noreply.github.com> Date: Fri, 12 Jun 2026 01:20:02 +0800 Subject: [PATCH] fix: return recovery action descriptors (#174) Co-authored-by: AgentRun Codex --- scripts/src/cli.ts | 14 +++-- src/common/types.ts | 2 +- src/mgr/diagnosis.ts | 37 +++++++++---- src/mgr/kubernetes-runner-job.ts | 24 +++++++-- src/mgr/provider-profiles.ts | 46 +++++++++++------ src/mgr/queue-dispatch.ts | 26 +++++++--- src/mgr/result.ts | 46 ++++++++++++----- src/mgr/server.ts | 61 ++++++++++++++++++---- src/mgr/tool-credentials.ts | 19 +++++-- src/selftest/cases/30-codex-stdio.ts | 34 +++++++++++- src/selftest/cases/55-timeout-liveness.ts | 63 +++++++++++++++++++++-- 11 files changed, 298 insertions(+), 74 deletions(-) diff --git a/scripts/src/cli.ts b/scripts/src/cli.ts index de297a8..0ee980d 100644 --- a/scripts/src/cli.ts +++ b/scripts/src/cli.ts @@ -228,12 +228,16 @@ function summarizeLivenessRecord(record: JsonRecord | null): JsonRecord | null { const lastActivity = jsonRecordValue(record.lastActivity ?? record.lastCommandActivity); const timeoutBudget = jsonRecordValue(record.timeoutBudget); const transportDisconnect = jsonRecordValue(record.transportDisconnect); + const retryInterruption = jsonRecordValue(record.retryInterruption); + const lease = jsonRecordValue(record.lease); return { ...withoutFullRecordBytes(compactRecord(record, { keys: ["phase", "active", "runStatus", "commandId", "commandType", "commandState", "lastSeq", "lastEventAt", "lastEventAgeMs"] })), terminalClassification: summarizeTerminalClassificationIfPresent({ terminalClassification: record.terminalClassification ?? null }), lastActivity: lastActivity ? withoutFullRecordBytes(compactRecord(lastActivity, { keys: ["sourceSeq", "eventId", "activityKind", "type", "status", "toolName", "itemId", "ageMs", "summary"] })) : null, - timeoutBudget: timeoutBudget ? withoutFullRecordBytes(compactRecord(timeoutBudget, { keys: ["state", "timeoutMs", "elapsedMs", "remainingMs", "startedAt", "source"] })) : null, + timeoutBudget: timeoutBudget ? withoutFullRecordBytes(compactRecord(timeoutBudget, { keys: ["state", "timeoutKind", "timeoutMs", "elapsedMs", "idleElapsedMs", "remainingMs", "startedAt", "idleStartedAt", "lastActivityAt", "lastActivitySeq", "source"] })) : null, + lease: lease ? withoutFullRecordBytes(compactRecord(lease, { keys: ["claimedBy", "leaseExpiresAt", "leaseExpired", "leaseRemainingMs"] })) : null, transportDisconnect: transportDisconnect ? withoutFullRecordBytes(compactRecord(transportDisconnect, { keys: ["sourceSeq", "eventId", "activityKind", "type", "status", "ageMs", "summary"] })) : null, + retryInterruption: retryInterruption ? withoutFullRecordBytes(compactRecord(retryInterruption, { keys: ["sourceSeq", "eventId", "activityKind", "type", "status", "ageMs", "summary"] })) : null, recoveryActions: summarizeRecoveryActions(record.recoveryActions), valuesPrinted: false, }; @@ -990,23 +994,23 @@ function summarizeSupervisorRecord(record: JsonRecord | null): JsonRecord | null const terminalClassification = jsonRecordValue(record.terminalClassification); const diagnosis = jsonRecordValue(record.diagnosis); return { - ...withoutFullRecordBytes(compactRecord(record, { keys: ["phase", "active", "status", "terminalStatus", "failureKind", "runId", "commandId", "lastSeq"] })), + ...withoutFullRecordBytes(compactRecord(record, { keys: ["phase", "active", "status", "terminalStatus", "failureKind", "runId", "commandId", "lastSeq", "lastEventAt", "lastEventAgeMs", "leaseRemainingMs", "leaseExpired"] })), diagnosis: diagnosis ? summarizeDiagnosisRecord(diagnosis) : null, terminalClassification: terminalClassification ? summarizeTerminalClassification(terminalClassification) : null, lastActivity: lastActivity ? withoutFullRecordBytes(compactRecord(lastActivity, { keys: ["sourceSeq", "eventId", "activityKind", "type", "status", "toolName", "itemId", "ageMs", "summary"] })) : null, - timeoutBudget: timeoutBudget ? withoutFullRecordBytes(compactRecord(timeoutBudget, { keys: ["state", "timeoutMs", "elapsedMs", "remainingMs", "startedAt", "source"] })) : null, + timeoutBudget: timeoutBudget ? withoutFullRecordBytes(compactRecord(timeoutBudget, { keys: ["state", "timeoutKind", "timeoutMs", "elapsedMs", "idleElapsedMs", "remainingMs", "startedAt", "idleStartedAt", "lastActivityAt", "lastActivitySeq", "source"] })) : null, recoveryActions: summarizeRecoveryActions(record.recoveryActions), valuesPrinted: false, }; } function summarizeTerminalClassification(record: JsonRecord): JsonRecord { - return withoutFullRecordBytes(compactRecord(record, { keys: ["category", "confidence", "providerEvidence", "providerInterruption", "providerInterruptionKnown", "providerInterruptionReason", "hardTimeout", "transportDisconnectObserved", "transportDisconnectSeq", "reason"] })); + return withoutFullRecordBytes(compactRecord(record, { keys: ["category", "confidence", "providerEvidence", "providerInterruption", "providerInterruptionKnown", "providerInterruptionReason", "retryInterruptionObserved", "retryInterruptionSeq", "retryInterruptionKind", "hardTimeout", "idleTimeout", "timeoutKind", "timeoutState", "transportDisconnectObserved", "transportDisconnectSeq", "reason"] })); } function summarizeRecoveryActions(value: JsonValue | undefined): JsonValue[] { if (!Array.isArray(value)) return []; - return value.slice(0, 5).map((item) => withoutFullRecordBytes(compactRecord(jsonRecordValue(item), { keys: ["action", "reason", "runId", "commandId", "sessionId", "afterSeq", "hint"] }))); + return value.slice(0, 5).map((item) => withoutFullRecordBytes(compactRecord(jsonRecordValue(item), { keys: ["action", "operation", "resourceKind", "resourceName", "reason", "reasonHint", "reasonRequired", "inputKind", "runId", "commandId", "runnerJobId", "sessionId", "afterSeq", "limit", "failureMessage"] }))); } function withoutFullRecordBytes(record: JsonRecord): JsonRecord { diff --git a/src/common/types.ts b/src/common/types.ts index 94a4436..8d32e69 100644 --- a/src/common/types.ts +++ b/src/common/types.ts @@ -465,7 +465,7 @@ export interface QueueDispatchResult extends JsonRecord { envImage: JsonRecord | null; workReady: JsonRecord | null; latestAttempt: QueueAttemptRef; - pollCommands: JsonRecord; + pollActions: JsonRecord[]; } export interface BackendEvent { diff --git a/src/mgr/diagnosis.ts b/src/mgr/diagnosis.ts index 6706ad2..660ff28 100644 --- a/src/mgr/diagnosis.ts +++ b/src/mgr/diagnosis.ts @@ -86,9 +86,9 @@ export function runnerJobDiagnosis(job: RunnerJobRecord, events: RunEvent[] = [] namespace: job.namespace, logPath: stringValue(recordAt(job.result, "runner")?.logPath), nextActions: [ - { action: "inspect-run", command: `./scripts/agentrun runs show ${job.runId}`, valuesPrinted: false }, - { action: "inspect-command", command: `./scripts/agentrun commands show ${job.commandId} --run-id ${job.runId}`, valuesPrinted: false }, - { action: "poll-events", command: `./scripts/agentrun runs events ${job.runId} --after-seq 0 --limit 100 --tail-summary`, valuesPrinted: false }, + recoveryDescriptor({ action: "inspect-run", operation: "describe", resourceKind: "run", resourceName: job.runId, runId: job.runId }), + recoveryDescriptor({ action: "inspect-command", operation: "describe", resourceKind: "command", resourceName: job.commandId, runId: job.runId, commandId: job.commandId }), + recoveryDescriptor({ action: "poll-events", operation: "events", resourceKind: "run", resourceName: job.runId, runId: job.runId, commandId: job.commandId, afterSeq: 0, limit: 100 }), ], valuesPrinted: false, }; @@ -167,16 +167,35 @@ function evidenceLevel(category: string, providerEvidence: string, runnerLost: b function recoveryActionsForDiagnosis(input: { run: RunRecord; command: CommandRecord | null; latestJob: RunnerJobRecord | null; session: JsonRecord; runnerLost: boolean; staleClaimed: boolean; terminalCommandOpenRun: boolean; failureKind: string | null; lastSeq: number }): JsonRecord[] { const actions: JsonRecord[] = []; - if (input.latestJob) actions.push({ action: "inspect-runner-job", runnerJobId: input.latestJob.id, command: `./scripts/agentrun runner job-status ${input.latestJob.id} --run-id ${input.run.id}`, valuesPrinted: false }); - if (input.command) actions.push({ action: "inspect-command", commandId: input.command.id, command: `./scripts/agentrun commands result ${input.command.id} --run-id ${input.run.id}`, valuesPrinted: false }); - actions.push({ action: "poll-events", runId: input.run.id, afterSeq: input.lastSeq, command: `./scripts/agentrun runs events ${input.run.id} --after-seq ${input.lastSeq} --limit 100 --tail-summary`, valuesPrinted: false }); + if (input.latestJob) actions.push(recoveryDescriptor({ action: "inspect-runner-job", operation: "describe", resourceKind: "runnerjob", resourceName: input.latestJob.id, runId: input.run.id, commandId: input.command?.id ?? input.latestJob.commandId, runnerJobId: input.latestJob.id })); + if (input.command) actions.push(recoveryDescriptor({ action: "inspect-command", operation: "result", resourceKind: "command", resourceName: input.command.id, runId: input.run.id, commandId: input.command.id })); + actions.push(recoveryDescriptor({ action: "poll-events", operation: "events", resourceKind: "run", resourceName: input.run.id, runId: input.run.id, commandId: input.command?.id ?? null, afterSeq: input.lastSeq, limit: 100 })); const sessionId = stringValue(input.session.sessionId); - if (sessionId) actions.push({ action: "continue-session", sessionId, command: `./scripts/agentrun sessions send ${sessionId} --prompt-stdin`, valuesPrinted: false }); - else actions.push({ action: "session-unavailable", reason: "sessionRef=null", hint: "当前 run 没有 sessionRef,管理者只能从 run/events/command/runner-job 读取 trace 后重新提交;这表示该任务不可同 session 续跑。", valuesPrinted: false }); - if (input.runnerLost || input.staleClaimed || input.terminalCommandOpenRun) actions.push({ action: "refresh-queue-or-resubmit", reason: input.failureKind ?? "stale-runner-state", hint: "先用 queue refresh/show 对齐 attempt;有 sessionId 时继续同一 session,没有 sessionId 才重新派发。", valuesPrinted: false }); + if (sessionId) actions.push(recoveryDescriptor({ action: "continue-session", operation: "send", resourceKind: "session", resourceName: sessionId, runId: input.run.id, commandId: input.command?.id ?? null, sessionId, inputKind: "prompt" })); + else actions.push({ action: "session-unavailable", operation: "operator-decision", resourceKind: "run", resourceName: input.run.id, runId: input.run.id, commandId: input.command?.id ?? null, reason: "sessionRef=null", reasonHint: "当前 run 没有 sessionRef,管理者只能从 run/events/command/runner-job 读取 trace 后重新提交;这表示该任务不可同 session 续跑。", valuesPrinted: false }); + if (input.runnerLost || input.staleClaimed || input.terminalCommandOpenRun) actions.push(recoveryDescriptor({ action: "refresh-queue-or-resubmit", operation: "operator-decision", resourceKind: sessionId ? "session" : "run", resourceName: sessionId ?? input.run.id, runId: input.run.id, commandId: input.command?.id ?? null, sessionId, reason: input.failureKind ?? "stale-runner-state", reasonHint: "先用 queue refresh/show 对齐 attempt;有 sessionId 时继续同一 session,没有 sessionId 才重新派发。" })); return actions.slice(0, 6); } +function recoveryDescriptor(input: { action: string; operation: string; resourceKind: string; resourceName: string; runId?: string | null; commandId?: string | null; runnerJobId?: string | null; sessionId?: string | null; afterSeq?: number | null; limit?: number | null; reason?: string | null; reasonHint?: string | null; inputKind?: string | null }): JsonRecord { + return { + action: input.action, + operation: input.operation, + resourceKind: input.resourceKind, + resourceName: input.resourceName, + runId: input.runId ?? null, + commandId: input.commandId ?? null, + runnerJobId: input.runnerJobId ?? null, + sessionId: input.sessionId ?? null, + ...(input.afterSeq !== undefined ? { afterSeq: input.afterSeq } : {}), + ...(input.limit !== undefined ? { limit: input.limit } : {}), + ...(input.reason ? { reason: input.reason } : {}), + ...(input.reasonHint ? { reasonHint: input.reasonHint } : {}), + ...(input.inputKind ? { inputKind: input.inputKind } : {}), + valuesPrinted: false, + }; +} + function runnerJobReference(job: RunnerJobRecord, events: RunEvent[]): JsonRecord { const observation = runnerJobObservation(job, events); const terminalStatus = stringValue(observation.terminalStatus); diff --git a/src/mgr/kubernetes-runner-job.ts b/src/mgr/kubernetes-runner-job.ts index c02901d..61e3144 100644 --- a/src/mgr/kubernetes-runner-job.ts +++ b/src/mgr/kubernetes-runner-job.ts @@ -213,11 +213,11 @@ export async function createKubernetesRunnerJob(options: { store: AgentRunStore; retention: { ttlSecondsAfterFinished: render.ttlSecondsAfterFinished, }, - pollCommands: { - run: `./scripts/agentrun runs show ${run.id} --manager-url ${managerUrl}`, - command: `./scripts/agentrun commands show ${commandId} --run-id ${run.id} --manager-url ${managerUrl}`, - events: `./scripts/agentrun runs events ${run.id} --manager-url ${managerUrl} --after-seq 0 --limit 100`, - }, + pollActions: [ + runnerJobActionDescriptor({ action: "inspect-run", operation: "describe", resourceKind: "run", resourceName: run.id, runId: run.id }), + runnerJobActionDescriptor({ action: "inspect-command", operation: "describe", resourceKind: "command", resourceName: commandId, runId: run.id, commandId }), + runnerJobActionDescriptor({ action: "poll-events", operation: "events", resourceKind: "run", resourceName: run.id, runId: run.id, commandId, afterSeq: 0, limit: 100 }), + ], warnings: render.warnings, kubernetes: { created: true, @@ -459,6 +459,20 @@ function stringField(record: JsonRecord, key: string): string { return value.trim(); } +function runnerJobActionDescriptor(input: { action: string; operation: string; resourceKind: string; resourceName: string; runId?: string | null; commandId?: string | null; afterSeq?: number | null; limit?: number | null }): JsonRecord { + return { + action: input.action, + operation: input.operation, + resourceKind: input.resourceKind, + resourceName: input.resourceName, + runId: input.runId ?? null, + commandId: input.commandId ?? null, + ...(input.afterSeq !== undefined ? { afterSeq: input.afterSeq } : {}), + ...(input.limit !== undefined ? { limit: input.limit } : {}), + valuesPrinted: false, + }; +} + function optionalString(value: unknown): string | undefined { return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined; } diff --git a/src/mgr/provider-profiles.ts b/src/mgr/provider-profiles.ts index becaa0b..0312c61 100644 --- a/src/mgr/provider-profiles.ts +++ b/src/mgr/provider-profiles.ts @@ -126,12 +126,12 @@ export async function removeProviderProfile(profileValue: string, options: Provi configHashSuffix: hashDataKey(data, "config.toml") ?? stringPath(annotations, [`${credentialAnnotationPrefix}-config-hash-suffix`]), updatedAt: new Date().toISOString(), valuesPrinted: false, - pollCommands: { - list: "./scripts/agentrun provider-profiles list", - show: `./scripts/agentrun provider-profiles show ${profile}`, - setKey: `./scripts/agentrun provider-profiles set-key ${profile} --key-stdin`, - setConfig: `./scripts/agentrun provider-profiles set-config ${profile} --config-stdin`, - }, + pollActions: [ + providerActionDescriptor({ action: "list-provider-profiles", operation: "list", resourceKind: "provider-profile", resourceName: "*" }), + providerActionDescriptor({ action: "inspect-provider-profile", operation: "describe", resourceKind: "provider-profile", resourceName: profile, profile }), + providerActionDescriptor({ action: "set-provider-key", operation: "set-key", resourceKind: "provider-profile", resourceName: profile, profile, inputKind: "credential" }), + providerActionDescriptor({ action: "set-provider-config", operation: "set-config", resourceKind: "provider-profile", resourceName: profile, profile, inputKind: "config" }), + ], }; } @@ -184,12 +184,12 @@ export async function setProviderProfileConfig(profileValue: string, body: unkno configTomlPrinted: false, credentialValuesPrinted: false, valuesPrinted: false, - pollCommands: { - config: `./scripts/agentrun provider-profiles config ${profile}`, - show: `./scripts/agentrun provider-profiles show ${profile}`, - setKey: `./scripts/agentrun provider-profiles set-key ${profile} --key-stdin`, - validate: `./scripts/agentrun provider-profiles validate ${profile} --wait --timeout-ms 120000`, - }, + pollActions: [ + providerActionDescriptor({ action: "inspect-provider-config", operation: "config", resourceKind: "provider-profile", resourceName: profile, profile }), + providerActionDescriptor({ action: "inspect-provider-profile", operation: "describe", resourceKind: "provider-profile", resourceName: profile, profile }), + providerActionDescriptor({ action: "set-provider-key", operation: "set-key", resourceKind: "provider-profile", resourceName: profile, profile, inputKind: "credential" }), + providerActionDescriptor({ action: "validate-provider-profile", operation: "validate", resourceKind: "provider-profile", resourceName: profile, profile, wait: true, timeoutMs: 120_000 }), + ], }; } @@ -241,10 +241,24 @@ export async function setProviderProfileCredential(profileValue: string, body: u delegatedBy, requiresExternalBridgeUpdate: profileUsesMoonBridge(profile, renderedConfig.configToml), valuesPrinted: false, - pollCommands: { - show: `./scripts/agentrun provider-profiles show ${profile}`, - validate: `./scripts/agentrun provider-profiles validate ${profile} --wait --timeout-ms 120000`, - }, + pollActions: [ + providerActionDescriptor({ action: "inspect-provider-profile", operation: "describe", resourceKind: "provider-profile", resourceName: profile, profile }), + providerActionDescriptor({ action: "validate-provider-profile", operation: "validate", resourceKind: "provider-profile", resourceName: profile, profile, wait: true, timeoutMs: 120_000 }), + ], + }; +} + +function providerActionDescriptor(input: { action: string; operation: string; resourceKind: string; resourceName: string; profile?: string | null; inputKind?: string | null; wait?: boolean; timeoutMs?: number | null }): JsonRecord { + return { + action: input.action, + operation: input.operation, + resourceKind: input.resourceKind, + resourceName: input.resourceName, + profile: input.profile ?? null, + ...(input.inputKind ? { inputKind: input.inputKind } : {}), + ...(input.wait === true ? { wait: true } : {}), + ...(input.timeoutMs !== undefined ? { timeoutMs: input.timeoutMs } : {}), + valuesPrinted: false, }; } diff --git a/src/mgr/queue-dispatch.ts b/src/mgr/queue-dispatch.ts index 79d605b..baefd1b 100644 --- a/src/mgr/queue-dispatch.ts +++ b/src/mgr/queue-dispatch.ts @@ -52,12 +52,26 @@ export async function dispatchQueueTask(options: DispatchQueueTaskOptions): Prom envImage: jsonRecordOrNull(runnerJob.envImage), workReady: jsonRecordOrNull(runnerJob.workReady), latestAttempt, - pollCommands: { - queue: `./scripts/agentrun queue show ${task.id}`, - run: `./scripts/agentrun runs show ${run.id}`, - command: `./scripts/agentrun commands show ${command.id} --run-id ${run.id}`, - events: `./scripts/agentrun runs events ${run.id} --after-seq 0 --limit 100`, - }, + pollActions: [ + dispatchActionDescriptor({ action: "inspect-task", operation: "describe", resourceKind: "task", resourceName: task.id }), + dispatchActionDescriptor({ action: "inspect-run", operation: "describe", resourceKind: "run", resourceName: run.id, runId: run.id }), + dispatchActionDescriptor({ action: "inspect-command", operation: "describe", resourceKind: "command", resourceName: command.id, runId: run.id, commandId: command.id }), + dispatchActionDescriptor({ action: "poll-events", operation: "events", resourceKind: "run", resourceName: run.id, runId: run.id, commandId: command.id, afterSeq: 0, limit: 100 }), + ], + }; +} + +function dispatchActionDescriptor(input: { action: string; operation: string; resourceKind: string; resourceName: string; runId?: string | null; commandId?: string | null; afterSeq?: number | null; limit?: number | null }): JsonRecord { + return { + action: input.action, + operation: input.operation, + resourceKind: input.resourceKind, + resourceName: input.resourceName, + runId: input.runId ?? null, + commandId: input.commandId ?? null, + ...(input.afterSeq !== undefined ? { afterSeq: input.afterSeq } : {}), + ...(input.limit !== undefined ? { limit: input.limit } : {}), + valuesPrinted: false, }; } diff --git a/src/mgr/result.ts b/src/mgr/result.ts index 3fee284..b90d2ae 100644 --- a/src/mgr/result.ts +++ b/src/mgr/result.ts @@ -442,39 +442,57 @@ function ageMs(value: string, nowMs: number): number | null { function recoveryActions(input: { run: RunRecord; command: CommandRecord | null; afterSeq: number; active: boolean; terminal: TerminalStatus | null; failureKind: FailureKind | null; failureMessage: string | null; needsContinuation: boolean; finalResponseAuthority: string }): JsonRecord[] { const { run, command, afterSeq, active, terminal, failureKind, failureMessage, needsContinuation, finalResponseAuthority } = input; const sessionId = run.sessionRef?.sessionId ?? null; - const traceCommand = sessionId ? `./scripts/agentrun sessions trace ${sessionId} --after-seq ${afterSeq} --limit 100 --run-id ${run.id}` : `./scripts/agentrun runs events ${run.id} --after-seq ${afterSeq} --limit 100 --summary`; - const outputCommand = sessionId ? `./scripts/agentrun sessions output ${sessionId} --after-seq ${afterSeq} --limit 100 --run-id ${run.id}` : null; const actions: JsonRecord[] = [ - { action: "poll-trace", runId: run.id, commandId: command?.id ?? null, afterSeq, command: traceCommand, valuesPrinted: false }, + recoveryDescriptor({ action: "poll-trace", operation: "events", resourceKind: "run", resourceName: run.id, runId: run.id, commandId: command?.id ?? null, sessionId, afterSeq, limit: 100 }), ]; - if (outputCommand) actions.push({ action: "poll-output", runId: run.id, commandId: command?.id ?? null, afterSeq, command: outputCommand, valuesPrinted: false }); + if (sessionId) actions.push(recoveryDescriptor({ action: "poll-output", operation: "logs", resourceKind: "session", resourceName: sessionId, runId: run.id, commandId: command?.id ?? null, sessionId, afterSeq, limit: 100 })); if (active) { - if (sessionId) actions.push({ action: "send-session", sessionId, runId: run.id, commandId: command?.id ?? null, command: `./scripts/agentrun sessions send ${sessionId} --prompt-stdin`, hint: "manager 会按当前 session 状态自动决定内部 steer 或新 turn", valuesPrinted: false }); - if (command) actions.push({ action: "cancel-command", runId: run.id, commandId: command.id, command: `./scripts/agentrun commands cancel ${command.id} --reason `, valuesPrinted: false }); - else actions.push({ action: "cancel-run", runId: run.id, command: `./scripts/agentrun runs cancel ${run.id} --reason `, valuesPrinted: false }); + if (sessionId) actions.push(recoveryDescriptor({ action: "send-session", operation: "send", resourceKind: "session", resourceName: sessionId, runId: run.id, commandId: command?.id ?? null, sessionId, inputKind: "prompt", reasonHint: "manager 会按当前 session 状态自动决定内部 steer 或新 turn" })); + if (command) actions.push(recoveryDescriptor({ action: "cancel-command", operation: "cancel", resourceKind: "command", resourceName: command.id, runId: run.id, commandId: command.id, sessionId, reasonRequired: true, reasonHint: "operator supplied cancel reason" })); + else actions.push(recoveryDescriptor({ action: "cancel-run", operation: "cancel", resourceKind: "run", resourceName: run.id, runId: run.id, sessionId, reasonRequired: true, reasonHint: "operator supplied cancel reason" })); return actions; } if (needsContinuation && sessionId) { - if (command) actions.push({ action: "inspect-result", runId: run.id, commandId: command.id, command: `./scripts/agentrun commands result ${command.id} --run-id ${run.id}`, valuesPrinted: false }); - actions.push({ action: "continue-session", reason: `final-response-${finalResponseAuthority}`, sessionId, command: `./scripts/agentrun sessions send ${sessionId} --prompt-stdin`, hint: "命令已 terminal completed,但没有 authoritative final response;管理者应先读 trace/output,再用同一 session 发送后续 prompt。", valuesPrinted: false }); + if (command) actions.push(recoveryDescriptor({ action: "inspect-result", operation: "result", resourceKind: "command", resourceName: command.id, runId: run.id, commandId: command.id, sessionId })); + actions.push(recoveryDescriptor({ action: "continue-session", operation: "send", resourceKind: "session", resourceName: sessionId, runId: run.id, commandId: command?.id ?? null, sessionId, reason: `final-response-${finalResponseAuthority}`, inputKind: "prompt", reasonHint: "命令已 terminal completed,但没有 authoritative final response;管理者应先读 trace/output,再用同一 session 发送后续 prompt。" })); return actions; } if (terminal === "failed" || terminal === "blocked" || terminal === "cancelled") { - if (command) actions.push({ action: "inspect-result", runId: run.id, commandId: command.id, command: `./scripts/agentrun commands result ${command.id} --run-id ${run.id}`, valuesPrinted: false }); - if (sessionId) actions.push({ action: "continue-session", sessionId, command: `./scripts/agentrun sessions send ${sessionId} --prompt-stdin`, valuesPrinted: false }); - if (failureKind === "backend-timeout") actions.push({ action: "split-task", reason: "backend-timeout", hint: "先由管理者读取 trace/result,总结下一步,再把后续 prompt 发到同一 session;必要时把大 patch / 长工具链拆成更短 turn。", failureMessage: failureMessage ? boundedTextSummary(failureMessage, { limitChars: 200 }).text as string : null, valuesPrinted: false }); - else actions.push({ action: "retry-or-split", reason: failureKind ?? "terminal", hint: "先读 trace/output 的 detail id,再决定继续同 session、重跑或拆分", valuesPrinted: false }); + if (command) actions.push(recoveryDescriptor({ action: "inspect-result", operation: "result", resourceKind: "command", resourceName: command.id, runId: run.id, commandId: command.id, sessionId })); + if (sessionId) actions.push(recoveryDescriptor({ action: "continue-session", operation: "send", resourceKind: "session", resourceName: sessionId, runId: run.id, commandId: command?.id ?? null, sessionId, inputKind: "prompt" })); + if (failureKind === "backend-timeout") actions.push(recoveryDescriptor({ action: "split-task", operation: "operator-decision", resourceKind: sessionId ? "session" : "run", resourceName: sessionId ?? run.id, runId: run.id, commandId: command?.id ?? null, sessionId, reason: "backend-timeout", reasonHint: "先由管理者读取 trace/result,总结下一步,再把后续 prompt 发到同一 session;必要时把大 patch / 长工具链拆成更短 turn。", failureMessage: failureMessage ? boundedTextSummary(failureMessage, { limitChars: 200 }).text as string : null })); + else actions.push(recoveryDescriptor({ action: "retry-or-split", operation: "operator-decision", resourceKind: sessionId ? "session" : "run", resourceName: sessionId ?? run.id, runId: run.id, commandId: command?.id ?? null, sessionId, reason: failureKind ?? "terminal", reasonHint: "先读 trace/output 的 detail id,再决定继续同 session、重跑或拆分" })); } return actions; } +function recoveryDescriptor(input: { action: string; operation: string; resourceKind: string; resourceName: string; runId?: string | null; commandId?: string | null; sessionId?: string | null; afterSeq?: number | null; limit?: number | null; reason?: string | null; reasonHint?: string | null; reasonRequired?: boolean; inputKind?: string | null; failureMessage?: string | null }): JsonRecord { + return { + action: input.action, + operation: input.operation, + resourceKind: input.resourceKind, + resourceName: input.resourceName, + runId: input.runId ?? null, + commandId: input.commandId ?? null, + sessionId: input.sessionId ?? null, + ...(input.afterSeq !== undefined ? { afterSeq: input.afterSeq } : {}), + ...(input.limit !== undefined ? { limit: input.limit } : {}), + ...(input.reason ? { reason: input.reason } : {}), + ...(input.reasonHint ? { reasonHint: input.reasonHint } : {}), + ...(input.reasonRequired === true ? { reasonRequired: true } : {}), + ...(input.inputKind ? { inputKind: input.inputKind } : {}), + ...(input.failureMessage ? { failureMessage: input.failureMessage } : {}), + valuesPrinted: false, + }; +} + function finalResponseAuthority(reply: AssistantReplySummary): "authoritative" | "fallback" | "missing" { if (reply.replyAuthority || reply.final) return "authoritative"; return reply.text.length > 0 ? "fallback" : "missing"; } function completionEvidenceSummary(input: { terminal: TerminalStatus | null; terminalSource: string; reply: AssistantReplySummary; responseAuthority: string; needsContinuation: boolean; sessionId: string | null }): JsonRecord { - const recommendedAction = input.needsContinuation && input.sessionId ? `./scripts/agentrun sessions send ${input.sessionId} --prompt-stdin` : null; + const recommendedAction = input.needsContinuation && input.sessionId ? recoveryDescriptor({ action: "continue-session", operation: "send", resourceKind: "session", resourceName: input.sessionId, sessionId: input.sessionId, inputKind: "prompt", reason: `final-response-${input.responseAuthority}` }) : null; return { terminalStatus: input.terminal, terminalSource: input.terminalSource, diff --git a/src/mgr/server.ts b/src/mgr/server.ts index 595de59..2bd30fc 100644 --- a/src/mgr/server.ts +++ b/src/mgr/server.ts @@ -165,6 +165,7 @@ async function queueTaskSupervisor(store: AgentRunStore, task: JsonRecord): Prom const lastActivity = asJsonRecord(liveness?.lastActivity ?? liveness?.lastCommandActivity); const timeoutBudget = asJsonRecord(liveness?.timeoutBudget); const terminalClassification = asJsonRecord(result.terminalClassification ?? liveness?.terminalClassification); + const lease = asJsonRecord(liveness?.lease); return { runId: stringJsonValue(result.runId), commandId: stringJsonValue(result.commandId), @@ -176,8 +177,13 @@ async function queueTaskSupervisor(store: AgentRunStore, task: JsonRecord): Prom phase: stringJsonValue(liveness?.phase), active: liveness?.active === true, lastSeq: numberJsonValue(liveness?.lastSeq ?? result.lastSeq), + lastEventAt: stringJsonValue(liveness?.lastEventAt), + lastEventAgeMs: numberJsonValue(liveness?.lastEventAgeMs), lastActivity: lastActivity ? compactActivity(lastActivity) : null, timeoutBudget: timeoutBudget ? compactTimeoutBudget(timeoutBudget) : null, + lease: lease ? compactLease(lease) : null, + leaseRemainingMs: numberJsonValue(lease?.leaseRemainingMs), + leaseExpired: lease?.leaseExpired === true, recoveryActions: compactRecoveryActions(liveness?.recoveryActions), valuesPrinted: false, }; @@ -220,6 +226,16 @@ function compactTimeoutBudget(budget: JsonRecord): JsonRecord { }; } +function compactLease(lease: JsonRecord): JsonRecord { + return { + claimedBy: stringJsonValue(lease.claimedBy), + leaseExpiresAt: stringJsonValue(lease.leaseExpiresAt), + leaseExpired: lease.leaseExpired === true, + leaseRemainingMs: numberJsonValue(lease.leaseRemainingMs), + valuesPrinted: false, + }; +} + function compactTerminalClassification(record: JsonRecord): JsonRecord { return { category: stringJsonValue(record.category), @@ -248,13 +264,20 @@ function compactRecoveryActions(value: JsonValue | undefined): JsonValue[] { if (!action) return { action: "unknown", valuesPrinted: false }; return { action: stringJsonValue(action.action), + operation: stringJsonValue(action.operation), + resourceKind: stringJsonValue(action.resourceKind), + resourceName: stringJsonValue(action.resourceName), reason: stringJsonValue(action.reason), + reasonHint: boundedJsonString(action.reasonHint, 220), + reasonRequired: action.reasonRequired === true, + inputKind: stringJsonValue(action.inputKind), runId: stringJsonValue(action.runId), commandId: stringJsonValue(action.commandId), + runnerJobId: stringJsonValue(action.runnerJobId), sessionId: stringJsonValue(action.sessionId), afterSeq: numberJsonValue(action.afterSeq), - command: boundedJsonString(action.command, 220), - hint: boundedJsonString(action.hint, 220), + limit: numberJsonValue(action.limit), + failureMessage: boundedJsonString(action.failureMessage, 220), valuesPrinted: false, }; }); @@ -710,7 +733,7 @@ function sessionSendPlan(sessionId: string, decision: "steer" | "turn", active: activeBefore: active ? activeBeforeSummary(active) : null, request, ...(runBody ? { run: { bodyBytes: jsonByteLength(runBody), sessionRef: summarizeSendSessionRef(runBody), valuesPrinted: false } } : {}), - next: { confirm: `./scripts/agentrun sessions send ${sessionId} --prompt-stdin`, note: "Remove --dry-run to perform the mutation. Manager will decide internal steer vs turn from durable session state." }, + next: { confirm: managerActionDescriptor({ action: "send-session", operation: "send", resourceKind: "session", resourceName: sessionId, sessionId, inputKind: "prompt" }), note: "Remove --dry-run to perform the mutation. Manager will decide internal steer vs turn from durable session state." }, valuesPrinted: false, }; } @@ -727,13 +750,31 @@ function sessionSendResponse(input: { sessionId: string; decision: "steer" | "tu command: input.command as unknown as JsonRecord, runnerJob: input.runnerJob, activeBefore: input.activeBefore ? activeBeforeSummary(input.activeBefore) : null, - pollCommands: { - show: `./scripts/agentrun sessions show ${input.sessionId} --reader-id cli`, - trace: `./scripts/agentrun sessions trace ${input.sessionId} --after-seq 0 --limit 100`, - output: `./scripts/agentrun sessions output ${input.sessionId} --after-seq 0 --limit 100`, - read: `./scripts/agentrun sessions read ${input.sessionId} --reader-id cli`, - cancel: `./scripts/agentrun sessions cancel ${input.sessionId}`, - }, + pollActions: [ + managerActionDescriptor({ action: "inspect-session", operation: "describe", resourceKind: "session", resourceName: input.sessionId, sessionId: input.sessionId, readerId: "cli" }), + managerActionDescriptor({ action: "poll-trace", operation: "events", resourceKind: "run", resourceName: input.run.id, runId: input.run.id, commandId: input.command.id, sessionId: input.sessionId, afterSeq: 0, limit: 100 }), + managerActionDescriptor({ action: "poll-output", operation: "logs", resourceKind: "session", resourceName: input.sessionId, runId: input.run.id, commandId: input.command.id, sessionId: input.sessionId, afterSeq: 0, limit: 100 }), + managerActionDescriptor({ action: "read-session", operation: "read", resourceKind: "session", resourceName: input.sessionId, sessionId: input.sessionId, readerId: "cli" }), + managerActionDescriptor({ action: "cancel-session", operation: "cancel", resourceKind: "session", resourceName: input.sessionId, sessionId: input.sessionId, reasonRequired: true }), + ], + valuesPrinted: false, + }; +} + +function managerActionDescriptor(input: { action: string; operation: string; resourceKind: string; resourceName: string; runId?: string | null; commandId?: string | null; sessionId?: string | null; afterSeq?: number | null; limit?: number | null; readerId?: string | null; reasonRequired?: boolean; inputKind?: string | null }): JsonRecord { + return { + action: input.action, + operation: input.operation, + resourceKind: input.resourceKind, + resourceName: input.resourceName, + runId: input.runId ?? null, + commandId: input.commandId ?? null, + sessionId: input.sessionId ?? null, + ...(input.afterSeq !== undefined ? { afterSeq: input.afterSeq } : {}), + ...(input.limit !== undefined ? { limit: input.limit } : {}), + ...(input.readerId ? { readerId: input.readerId } : {}), + ...(input.reasonRequired === true ? { reasonRequired: true } : {}), + ...(input.inputKind ? { inputKind: input.inputKind } : {}), valuesPrinted: false, }; } diff --git a/src/mgr/tool-credentials.ts b/src/mgr/tool-credentials.ts index dc4131b..bc8e8e9 100644 --- a/src/mgr/tool-credentials.ts +++ b/src/mgr/tool-credentials.ts @@ -92,10 +92,21 @@ export async function setGithubSshToolCredential(body: unknown, options: ToolCre updatedAt: stringPath(applied, ["metadata", "annotations", `${annotationPrefix}-updated-at`]) ?? updatedAt, credentialValuesPrinted: false, valuesPrinted: false, - pollCommands: { - show: "./scripts/agentrun tool-credentials show github-ssh", - list: "./scripts/agentrun tool-credentials list", - }, + pollActions: [ + toolCredentialActionDescriptor({ action: "inspect-tool-credential", operation: "describe", resourceKind: "tool-credential", resourceName: spec.name, tool: spec.tool }), + toolCredentialActionDescriptor({ action: "list-tool-credentials", operation: "list", resourceKind: "tool-credential", resourceName: "*", tool: spec.tool }), + ], + }; +} + +function toolCredentialActionDescriptor(input: { action: string; operation: string; resourceKind: string; resourceName: string; tool?: string | null }): JsonRecord { + return { + action: input.action, + operation: input.operation, + resourceKind: input.resourceKind, + resourceName: input.resourceName, + tool: input.tool ?? null, + valuesPrinted: false, }; } diff --git a/src/selftest/cases/30-codex-stdio.ts b/src/selftest/cases/30-codex-stdio.ts index 1d33b14..135c78a 100644 --- a/src/selftest/cases/30-codex-stdio.ts +++ b/src/selftest/cases/30-codex-stdio.ts @@ -422,7 +422,39 @@ async function runFailureCase(options: { client: ManagerClient; managerUrl: stri } const command = await options.client.get(`/api/v1/runs/${item.runId}/commands/${item.commandId}`) as { state?: string }; assert.equal(command.state, "failed", options.mode); - assertNoSecretLeak(events); + const envelope = await options.client.get(`/api/v1/runs/${item.runId}/commands/${item.commandId}/result`) as JsonRecord; + if (options.mode === "provider-503-terminal") { + const classification = envelope.terminalClassification as JsonRecord; + const liveness = envelope.liveness as JsonRecord; + const timeoutBudget = liveness.timeoutBudget as JsonRecord; + assert.equal(classification.category, "provider-failed"); + assert.equal(classification.providerEvidence, "failure-kind"); + assert.equal(classification.providerInterruptionKnown, true); + assert.equal(classification.failureKind, "provider-http-error"); + assert.equal(liveness.phase, "terminal"); + assert.equal(typeof liveness.lastEventAgeMs, "number"); + assert.equal(timeoutBudget.timeoutKind, "idle"); + assert.equal(typeof timeoutBudget.idleElapsedMs, "number"); + assertRecoveryActionDescriptors(liveness.recoveryActions); + } + assertNoSecretLeak({ events, envelope }); +} + +function assertRecoveryActionDescriptors(value: unknown): void { + assert.ok(Array.isArray(value), "recoveryActions must be an array"); + const text = JSON.stringify(value); + assert.equal(text.includes("./scripts/agentrun sessions"), false, "server recoveryActions must not expose old sessions CLI paths"); + assert.equal(text.includes("./scripts/agentrun commands"), false, "server recoveryActions must not expose old commands CLI paths"); + assert.equal(text.includes("bun scripts/cli.ts agentrun"), false, "server recoveryActions must not hardcode render-only client commands"); + for (const item of value) { + const action = item as JsonRecord; + assert.equal(Object.prototype.hasOwnProperty.call(action, "command"), false, "recovery action must be a descriptor, not a rendered command string"); + assert.equal(typeof action.action, "string"); + assert.equal(typeof action.operation, "string"); + assert.equal(typeof action.resourceKind, "string"); + assert.equal(typeof action.resourceName, "string"); + assert.equal(action.valuesPrinted, false); + } } function eventPayload(event: { payload: unknown }): JsonRecord { diff --git a/src/selftest/cases/55-timeout-liveness.ts b/src/selftest/cases/55-timeout-liveness.ts index 4825808..af585f5 100644 --- a/src/selftest/cases/55-timeout-liveness.ts +++ b/src/selftest/cases/55-timeout-liveness.ts @@ -29,6 +29,25 @@ const selfTest: SelfTestCase = async (context: SelfTestContext) => { assert.equal(assistantLive.phase, "waiting-model-output"); assert.equal(((assistantLive.lastActivity as JsonRecord).activityKind), "assistant-progress"); + const retry = await createActiveRun(client, context, "timeout-liveness-provider-retry", 120_000); + await client.post(`/api/v1/runs/${retry.runId}/events`, { type: "error", payload: { commandId: retry.commandId, failureKind: "provider-stream-disconnected", willRetry: true, message: "provider stream disconnected; retrying" } }); + const retryResult = await commandResult(client, retry); + const retryLive = retryResult.liveness as JsonRecord; + const retryClassification = retryResult.terminalClassification as JsonRecord; + assert.equal(retryLive.active, true); + assert.equal(retryClassification.category, "active-retry-interruption"); + assert.equal(retryClassification.providerEvidence, "retry-event"); + assert.equal(retryClassification.retryInterruptionObserved, true); + assert.equal(((retryLive.timeoutBudget as JsonRecord).state), "within-budget"); + assert.equal(typeof retryLive.lastEventAgeMs, "number"); + const retryCancelAction = (retryLive.recoveryActions as JsonRecord[]).find((action) => action.action === "cancel-command") as JsonRecord; + assert.equal(retryCancelAction.operation, "cancel"); + assert.equal(retryCancelAction.resourceKind, "command"); + assert.equal(retryCancelAction.resourceName, retry.commandId); + assert.equal(retryCancelAction.runId, retry.runId); + assert.equal(retryCancelAction.commandId, retry.commandId); + assertRecoveryActionDescriptors(retryLive.recoveryActions); + const inactive = await createActiveRun(client, context, "timeout-liveness-inactive", 40); await sleep(36); const inactiveLive = (await commandResult(client, inactive)).liveness as JsonRecord; @@ -72,7 +91,21 @@ const selfTest: SelfTestCase = async (context: SelfTestContext) => { assert.equal((noSessionLive.transportDisconnect as JsonRecord).sourceSeq, 4); assert.equal((noSessionLive.recoveryActions as JsonRecord[]).some((action) => action.action === "continue-session"), false, "sessionId=null must not suggest session-only continuation"); assert.equal((noSessionLive.recoveryActions as JsonRecord[]).some((action) => action.action === "poll-output"), false, "sessionId=null must not suggest session output path"); - assert.ok((noSessionLive.recoveryActions as JsonRecord[]).some((action) => action.action === "poll-trace" && String(action.command).includes("runs events"))); + assert.ok((noSessionLive.recoveryActions as JsonRecord[]).some((action) => action.action === "poll-trace" && action.operation === "events" && action.resourceKind === "run" && action.resourceName === noSession.runId)); + assertRecoveryActionDescriptors(noSessionLive.recoveryActions); + + const manualCancel = await createActiveRun(client, context, "timeout-liveness-manual-command-cancel", 120_000); + await client.post(`/api/v1/commands/${manualCancel.commandId}/cancel`, { reason: "self-test manual command cancel" }); + const manualCancelResult = await commandResult(client, manualCancel); + const manualCancelLive = manualCancelResult.liveness as JsonRecord; + const manualCancelClassification = manualCancelResult.terminalClassification as JsonRecord; + assert.equal(manualCancelResult.terminalStatus, "cancelled"); + assert.equal(manualCancelResult.failureKind, "cancelled"); + assert.equal(manualCancelLive.phase, "terminal"); + assert.equal(manualCancelClassification.category, "cancelled"); + assert.equal(manualCancelClassification.reason, "terminal status or failureKind is cancelled"); + assert.ok((manualCancelLive.recoveryActions as JsonRecord[]).some((action) => action.action === "inspect-result" && action.operation === "result" && action.resourceKind === "command" && action.resourceName === manualCancel.commandId)); + assertRecoveryActionDescriptors(manualCancelLive.recoveryActions); const stale = await createActiveRun(client, context, "timeout-liveness-stale-claimed", 120_000, { session: false, leaseMs: 1 }); await store.saveRunnerJob({ @@ -98,12 +131,14 @@ const selfTest: SelfTestCase = async (context: SelfTestContext) => { assert.equal(staleDiagnosis.runnerLost, true); assert.equal(((staleDiagnosis.runnerJob as JsonRecord).phase), "created"); assert.equal(((staleDiagnosis.session as JsonRecord).sessionRefNull), true); + assertRecoveryActionDescriptors(staleDiagnosis.recoveryActions); assert.ok(terminal.sessionId, "terminal fixture must have a session id"); const terminalSessionId = terminal.sessionId; const session = await client.get(`/api/v1/sessions/${terminalSessionId}?readerId=timeout-liveness`) as JsonRecord; assert.equal(((session.liveness as JsonRecord).phase), "terminal"); assert.ok(Array.isArray(((session.supervisor as JsonRecord).recoveryActions)), "session show must keep terminal recovery actions"); + assertRecoveryActionDescriptors((session.supervisor as JsonRecord).recoveryActions); const task = await client.post("/api/v1/queue/tasks", queueTask(context, terminalSessionId, 50)) as JsonRecord; store.updateQueueTaskAttempt(String(task.id), { @@ -117,16 +152,21 @@ const selfTest: SelfTestCase = async (context: SelfTestContext) => { assert.equal((((commanderItem.supervisor as JsonRecord).diagnosis as JsonRecord).category), "execution-idle-timeout"); assert.equal((((commanderItem.supervisor as JsonRecord).timeoutBudget as JsonRecord).state), "timed-out"); assert.equal((((commanderItem.supervisor as JsonRecord).timeoutBudget as JsonRecord).timeoutKind), "idle"); + assert.equal(typeof ((commanderItem.supervisor as JsonRecord).lastEventAgeMs), "number"); + assert.equal(typeof ((commanderItem.supervisor as JsonRecord).leaseRemainingMs), "number"); const commanderSummary = summarizeQueueCommanderSnapshot(commander, { limit: 5 }); const summaryItem = ((commanderSummary.items as JsonRecord[]) ?? []).find((item) => item.id === task.id) as JsonRecord; assert.equal(((summaryItem.supervisor as JsonRecord).phase), "terminal"); assert.equal((((summaryItem.supervisor as JsonRecord).terminalClassification as JsonRecord).category), "execution-idle-timeout"); assert.equal((((summaryItem.supervisor as JsonRecord).terminalClassification as JsonRecord).providerEvidence), "insufficient"); + assert.equal(typeof ((summaryItem.supervisor as JsonRecord).lastEventAgeMs), "number"); + assert.equal(typeof (((summaryItem.supervisor as JsonRecord).timeoutBudget as JsonRecord).idleElapsedMs), "number"); + assertRecoveryActionDescriptors((summaryItem.supervisor as JsonRecord).recoveryActions); assert.equal(JSON.stringify(commanderSummary).includes("hwpod workspace apply-patch"), false, "commander summary must stay compact and avoid dumping command bodies"); assert.equal(JSON.stringify(summaryItem).includes("fullRecordBytes"), false, "commander item must not add bookkeeping noise"); - assertNoSecretLeak({ toolResult, assistantLive, inactiveLive, terminalResult, noSessionResult, staleResult, session, commanderSummary }); + assertNoSecretLeak({ toolResult, assistantLive, retryResult, inactiveLive, terminalResult, noSessionResult, manualCancelResult, staleResult, session, commanderSummary }); - return { name: "timeout-liveness", tests: ["tool-in-flight-liveness", "assistant-progress-liveness", "stdio-inactive-timeout-budget", "terminal-timeout-recovery", "no-session-drilldown", "terminal-classification", "queue-commander-supervisor", "diagnosis-visibility", "stale-claimed-runner-lost"] }; + return { name: "timeout-liveness", tests: ["tool-in-flight-liveness", "assistant-progress-liveness", "active-provider-retry-summary", "stdio-inactive-timeout-budget", "terminal-timeout-recovery", "no-session-drilldown", "manual-command-cancel-summary", "terminal-classification", "queue-commander-supervisor", "diagnosis-visibility", "stale-claimed-runner-lost"] }; } finally { await new Promise((resolve) => server.server.close(() => resolve())); } @@ -188,4 +228,21 @@ function executionPolicy(timeoutMs: number, codexHome: string): JsonRecord { }; } +function assertRecoveryActionDescriptors(value: unknown): void { + assert.ok(Array.isArray(value), "recoveryActions must be an array"); + const text = JSON.stringify(value); + assert.equal(text.includes("./scripts/agentrun sessions"), false, "server recoveryActions must not expose old sessions CLI paths"); + assert.equal(text.includes("./scripts/agentrun commands"), false, "server recoveryActions must not expose old commands CLI paths"); + assert.equal(text.includes("bun scripts/cli.ts agentrun"), false, "server recoveryActions must not hardcode render-only client commands"); + for (const item of value) { + const action = item as JsonRecord; + assert.equal(Object.prototype.hasOwnProperty.call(action, "command"), false, "recovery action must be a descriptor, not a rendered command string"); + assert.equal(typeof action.action, "string"); + assert.equal(typeof action.operation, "string"); + assert.equal(typeof action.resourceKind, "string"); + assert.equal(typeof action.resourceName, "string"); + assert.equal(action.valuesPrinted, false); + } +} + export default selfTest;