diff --git a/scripts/src/cli.ts b/scripts/src/cli.ts index 14b6c43..faaca16 100644 --- a/scripts/src/cli.ts +++ b/scripts/src/cli.ts @@ -96,7 +96,7 @@ async function dispatch(args: ParsedArgs): Promise { if (group === "queue" && command === "dispatch" && id) return dispatchQueueTask(args, id); if (group === "queue" && command === "refresh" && id) return refreshQueueTask(args, id); if (group === "runs" && command === "create") return client(args).post("/api/v1/runs", await jsonFile(args)); - if (group === "runs" && command === "show" && id) return client(args).get(`/api/v1/runs/${encodeURIComponent(id)}`); + if (group === "runs" && command === "show" && id) return showRun(args, id); if (group === "runs" && command === "events" && id) return runEvents(args, id); if (group === "runs" && command === "result" && id) { const commandId = optionalFlag(args, "command-id"); @@ -113,7 +113,7 @@ async function dispatch(args: ParsedArgs): Promise { if (group === "commands" && command === "show" && id) { const runId = flag(args, "run-id", ""); if (!runId) throw new AgentRunError("schema-invalid", "commands show requires --run-id", { httpStatus: 2 }); - return client(args).get(`/api/v1/runs/${encodeURIComponent(runId)}/commands/${encodeURIComponent(id)}`); + return showCommand(args, runId, id); } if (group === "commands" && command === "result" && id) { const runId = flag(args, "run-id", ""); @@ -159,6 +159,102 @@ async function listRunnerJobs(args: ParsedArgs): Promise { return client(args).get(`/api/v1/runs/${encodeURIComponent(runId)}/runner-jobs${commandId ? `?commandId=${encodeURIComponent(commandId)}` : ""}`); } +async function showRun(args: ParsedArgs, runId: string): Promise { + const run = await client(args).get(`/api/v1/runs/${encodeURIComponent(runId)}`); + if (wantsExpandedOutput(args)) return run; + const commandId = optionalFlag(args, "command-id"); + const result = await client(args).get(`/api/v1/runs/${encodeURIComponent(runId)}/result${commandId ? `?commandId=${encodeURIComponent(commandId)}` : ""}`); + return summarizeRunShowResult(run, result, runId, commandId); +} + +async function showCommand(args: ParsedArgs, runId: string, commandId: string): Promise { + const command = await client(args).get(`/api/v1/runs/${encodeURIComponent(runId)}/commands/${encodeURIComponent(commandId)}`); + if (wantsExpandedOutput(args)) return command; + const result = await client(args).get(`/api/v1/runs/${encodeURIComponent(runId)}/commands/${encodeURIComponent(commandId)}/result`); + return summarizeCommandShowResult(command, result, runId, commandId); +} + +function summarizeRunShowResult(run: JsonValue, result: JsonValue, runId: string, commandId: string | null): JsonRecord { + const resultRecord = jsonRecordValue(result); + const resolvedCommandId = commandId ?? stringValue(resultRecord?.commandId); + const sessionId = stringValue(jsonRecordValue(resultRecord?.sessionRef)?.sessionId); + const lastSeq = numberValue(resultRecord?.lastSeq) ?? 0; + return { + action: "runs-show-summary", + run: summarizeRunRecord(jsonRecordValue(run)), + result: summarizeResultEnvelope(resultRecord), + terminalClassification: summarizeTerminalClassificationIfPresent(resultRecord), + liveness: summarizeLivenessRecord(jsonRecordValue(resultRecord?.liveness)), + finalResponse: summarizeFinalResponseRecord(jsonRecordValue(resultRecord?.finalResponse)), + fullResponseBytes: jsonByteLength({ run, result }), + valuesPrinted: false, + drillDownCommands: runCommandDrillDown(runId, resolvedCommandId, sessionId, lastSeq), + }; +} + +function summarizeCommandShowResult(command: JsonValue, result: JsonValue, runId: string, commandId: string): JsonRecord { + const resultRecord = jsonRecordValue(result); + const sessionId = stringValue(jsonRecordValue(resultRecord?.sessionRef)?.sessionId); + const lastSeq = numberValue(resultRecord?.lastSeq) ?? 0; + return { + action: "commands-show-summary", + command: summarizeCommandRecord(jsonRecordValue(command)), + result: summarizeResultEnvelope(resultRecord), + terminalClassification: summarizeTerminalClassificationIfPresent(resultRecord), + liveness: summarizeLivenessRecord(jsonRecordValue(resultRecord?.liveness)), + finalResponse: summarizeFinalResponseRecord(jsonRecordValue(resultRecord?.finalResponse)), + fullResponseBytes: jsonByteLength({ command, result }), + valuesPrinted: false, + drillDownCommands: runCommandDrillDown(runId, commandId, sessionId, lastSeq), + }; +} + +function summarizeResultEnvelope(record: JsonRecord | null): JsonRecord | null { + if (!record) return null; + return withoutFullRecordBytes(compactRecord(record, { + keys: ["runId", "commandId", "attemptId", "runnerId", "jobName", "namespace", "status", "runStatus", "commandState", "terminalStatus", "terminalSource", "completed", "failureKind", "failureMessage", "lastSeq", "eventCount", "scopedEventCount", "scopedLastSeq", "runnerJobCount"], + })); +} + +function summarizeTerminalClassificationIfPresent(record: JsonRecord | null): JsonRecord | null { + const liveness = jsonRecordValue(record?.liveness); + const classification = jsonRecordValue(record?.terminalClassification) ?? jsonRecordValue(liveness?.terminalClassification); + return classification ? summarizeTerminalClassification(classification) : null; +} + +function summarizeLivenessRecord(record: JsonRecord | null): JsonRecord | null { + if (!record) return null; + const lastActivity = jsonRecordValue(record.lastActivity ?? record.lastCommandActivity); + const timeoutBudget = jsonRecordValue(record.timeoutBudget); + const transportDisconnect = jsonRecordValue(record.transportDisconnect); + return { + ...withoutFullRecordBytes(compactRecord(record, { keys: ["phase", "active", "runStatus", "commandId", "commandType", "commandState", "lastSeq", "lastEventAt", "lastEventAgeMs"] })), + terminalClassification: summarizeTerminalClassificationIfPresent({ terminalClassification: record.terminalClassification ?? null }), + lastActivity: lastActivity ? withoutFullRecordBytes(compactRecord(lastActivity, { keys: ["sourceSeq", "eventId", "activityKind", "type", "status", "toolName", "itemId", "ageMs", "summary"] })) : null, + timeoutBudget: timeoutBudget ? withoutFullRecordBytes(compactRecord(timeoutBudget, { keys: ["state", "timeoutMs", "elapsedMs", "remainingMs", "startedAt", "source"] })) : null, + transportDisconnect: transportDisconnect ? withoutFullRecordBytes(compactRecord(transportDisconnect, { keys: ["sourceSeq", "eventId", "activityKind", "type", "status", "ageMs", "summary"] })) : null, + recoveryActions: summarizeRecoveryActions(record.recoveryActions), + valuesPrinted: false, + }; +} + +function summarizeFinalResponseRecord(record: JsonRecord | null): JsonRecord | null { + if (!record) return null; + return withoutFullRecordBytes(compactRecord(record, { keys: ["seq", "source", "final", "replyAuthority", "textTruncated", "outputTruncated", "text"] })); +} + +function runCommandDrillDown(runId: string, commandId: string | null, sessionId: string | null, lastSeq: number): JsonRecord { + return { + run: `./scripts/agentrun runs show ${runId}`, + runFull: `./scripts/agentrun runs show ${runId} --full`, + result: `./scripts/agentrun runs result ${runId}${commandId ? ` --command-id ${commandId}` : ""}`, + events: `./scripts/agentrun runs events ${runId} --after-seq ${lastSeq} --limit 100 --tail-summary`, + ...(commandId ? { command: `./scripts/agentrun commands show ${commandId} --run-id ${runId}`, commandResult: `./scripts/agentrun commands result ${commandId} --run-id ${runId}` } : {}), + ...(sessionId ? { trace: `./scripts/agentrun sessions trace ${sessionId} --after-seq ${lastSeq} --limit 100 --run-id ${runId}`, output: `./scripts/agentrun sessions output ${sessionId} --after-seq ${lastSeq} --limit 100 --run-id ${runId}` } : {}), + valuesPrinted: false, + }; +} + async function runEvents(args: ParsedArgs, runId: string): Promise { const afterSeq = integerFlag(args, "after-seq", 0, { min: 0 }); const limit = integerFlag(args, "limit", 100, { min: 1, max: 500 }); @@ -593,7 +689,11 @@ export function summarizeQueueTaskListResult(result: JsonValue, options: QueueSu export function summarizeQueueTaskShowResult(result: JsonValue, taskId: string): JsonRecord { const record = jsonRecordValue(result); if (!record) throw new AgentRunError("schema-invalid", "queue show response must be an object", { httpStatus: 2 }); - const sessionId = stringValue(jsonRecordValue(record.sessionRef)?.sessionId) ?? stringValue(jsonRecordValue(record.latestAttempt)?.sessionId); + const latestAttempt = jsonRecordValue(record.latestAttempt); + const runId = stringValue(latestAttempt?.runId); + const commandId = stringValue(latestAttempt?.commandId); + const sessionId = stringValue(jsonRecordValue(record.sessionRef)?.sessionId) ?? stringValue(latestAttempt?.sessionId); + const afterSeq = numberValue(jsonRecordValue(record.supervisor)?.lastSeq) ?? 0; return { action: "queue-show-summary", task: summarizeQueueTaskWithAttempt(record, taskId), @@ -605,6 +705,8 @@ export function summarizeQueueTaskShowResult(result: JsonValue, taskId: string): valuesPrinted: false, pollCommands: { full: `./scripts/agentrun queue show ${taskId} --full`, + ...(runId ? { run: `./scripts/agentrun runs show ${runId}`, events: `./scripts/agentrun runs events ${runId} --after-seq ${afterSeq} --limit 100 --tail-summary` } : {}), + ...(runId && commandId ? { command: `./scripts/agentrun commands show ${commandId} --run-id ${runId}` } : {}), ...(sessionId ? { trace: `./scripts/agentrun sessions trace ${sessionId} --after-seq 0 --limit 100`, output: `./scripts/agentrun sessions output ${sessionId} --after-seq 0 --limit 100` } : {}), }, }; @@ -633,6 +735,9 @@ export function summarizeQueueCommanderSnapshot(result: JsonValue, options: Queu full: "./scripts/agentrun queue commander --reader-id cli --full", raw: "./scripts/agentrun queue commander --reader-id cli --raw", item: "./scripts/agentrun queue show ", + run: "./scripts/agentrun runs show ", + events: "./scripts/agentrun runs events --after-seq --limit 100 --tail-summary", + command: "./scripts/agentrun commands show --run-id ", trace: "./scripts/agentrun sessions trace --after-seq 0 --limit 100", output: "./scripts/agentrun sessions output --after-seq 0 --limit 100", }, @@ -795,8 +900,10 @@ function summarizeSupervisorRecord(record: JsonRecord | null): JsonRecord | null if (!record) return null; const lastActivity = jsonRecordValue(record.lastActivity); const timeoutBudget = jsonRecordValue(record.timeoutBudget); + const terminalClassification = jsonRecordValue(record.terminalClassification); return { ...withoutFullRecordBytes(compactRecord(record, { keys: ["phase", "active", "status", "terminalStatus", "failureKind", "runId", "commandId", "lastSeq"] })), + terminalClassification: terminalClassification ? summarizeTerminalClassification(terminalClassification) : null, lastActivity: lastActivity ? withoutFullRecordBytes(compactRecord(lastActivity, { keys: ["sourceSeq", "eventId", "activityKind", "type", "status", "toolName", "itemId", "ageMs", "summary"] })) : null, timeoutBudget: timeoutBudget ? withoutFullRecordBytes(compactRecord(timeoutBudget, { keys: ["state", "timeoutMs", "elapsedMs", "remainingMs", "startedAt", "source"] })) : null, recoveryActions: summarizeRecoveryActions(record.recoveryActions), @@ -804,6 +911,10 @@ function summarizeSupervisorRecord(record: JsonRecord | null): JsonRecord | null }; } +function summarizeTerminalClassification(record: JsonRecord): JsonRecord { + return withoutFullRecordBytes(compactRecord(record, { keys: ["category", "confidence", "providerEvidence", "providerInterruption", "providerInterruptionKnown", "providerInterruptionReason", "hardTimeout", "transportDisconnectObserved", "transportDisconnectSeq", "reason"] })); +} + function summarizeRecoveryActions(value: JsonValue | undefined): JsonValue[] { if (!Array.isArray(value)) return []; return value.slice(0, 5).map((item) => withoutFullRecordBytes(compactRecord(jsonRecordValue(item), { keys: ["action", "reason", "runId", "commandId", "sessionId", "afterSeq", "hint"] }))); @@ -821,7 +932,7 @@ function summarizeRunRecord(record: JsonRecord | null): JsonRecord | null { function summarizeCommandRecord(record: JsonRecord | null): JsonRecord | null { if (!record) return null; - return compactRecord(record, { keys: ["id", "runId", "seq", "type", "state", "createdAt", "updatedAt", "acknowledgedAt"] }); + return compactRecord(record, { keys: ["id", "runId", "seq", "type", "state", "terminalStatus", "failureKind", "failureMessage", "createdAt", "updatedAt", "acknowledgedAt"] }); } function summarizeRunnerJobRecord(record: JsonRecord | null): JsonRecord | null { diff --git a/src/mgr/result.ts b/src/mgr/result.ts index 0fda440..41b5543 100644 --- a/src/mgr/result.ts +++ b/src/mgr/result.ts @@ -25,6 +25,17 @@ interface AssistantReplySummary { outputTruncated: boolean; } +interface TerminalClassificationInput { + terminal: TerminalStatus | null; + terminalSource: string; + failureKind: FailureKind | null; + failureMessage: string | null; + timeoutBudget: JsonRecord; + transportDisconnect: RunEvent | null; + lastActivity: JsonRecord | null; + command: CommandRecord | null; +} + export async function buildRunResult(store: AgentRunStore, runId: string, commandId?: string): Promise { const run = await store.getRun(runId); const command = await selectCommand(store, runId, commandId); @@ -44,6 +55,7 @@ export async function buildRunResult(store: AgentRunStore, runId: string, comman const reply = assistantReply(scopedEvents); const blocker = terminal === "blocked" || terminal === "failed" ? { failureKind, message: failureMessage, details: failureDetails } : null; const liveness = livenessSnapshot(run, command, events, scopedEvents, terminal, failureKind, failureMessage); + const terminalClassification = terminalClassificationSummary({ terminal, terminalSource, failureKind, failureMessage, liveness }); const steerDelivery = command?.type === "steer" ? steerDeliverySummary(events, command.id) : null; return { runId: run.id, @@ -75,6 +87,7 @@ export async function buildRunResult(store: AgentRunStore, runId: string, comman failureKind, failureMessage, failureDetails, + terminalClassification, blocker, liveness, ...(steerDelivery ? { steerDelivery } : {}), @@ -103,6 +116,7 @@ function livenessSnapshot(run: RunRecord, command: CommandRecord | null, events: const transportDisconnect = latestTransportDisconnect(scopedEvents); const lastActivity = livenessActivitySummary(lastCommandActivity, nowMs); const timeoutBudget = timeoutBudgetSummary(run, command, terminal, failureKind, nowMs); + const terminalClassification = terminalClassificationFromEvidence({ terminal, terminalSource: "liveness", failureKind, failureMessage, timeoutBudget, transportDisconnect, lastActivity, command }); const phase = livenessPhase({ active, command, lastVisibleActivity, leaseExpired: lease.leaseExpired, transportDisconnect, timeoutBudget, lastActivity }); const afterSeq = lastEvent?.seq ?? 0; return { @@ -119,6 +133,7 @@ function livenessSnapshot(run: RunRecord, command: CommandRecord | null, events: lastActivity, lastCommandActivity: lastActivity, timeoutBudget, + terminalClassification, lease, transportDisconnect: transportDisconnect ? livenessActivitySummary(transportDisconnect, nowMs) : null, recoveryActions: recoveryActions({ run, command, afterSeq, active, terminal, failureKind, failureMessage }), @@ -146,6 +161,103 @@ function livenessPhase(input: { active: boolean; command: CommandRecord | null; return "waiting-model"; } +function terminalClassificationSummary(input: { terminal: TerminalStatus | null; terminalSource: string; failureKind: FailureKind | null; failureMessage: string | null; liveness: JsonRecord }): JsonRecord { + const livenessClassification = jsonRecordValue(input.liveness.terminalClassification); + return { + ...(livenessClassification ?? {}), + terminalStatus: input.terminal, + terminalSource: input.terminalSource, + failureKind: input.failureKind, + failureMessage: input.failureMessage ? boundedTextSummary(input.failureMessage, { limitChars: 240 }).text as string : null, + valuesPrinted: false, + }; +} + +function terminalClassificationFromEvidence(input: TerminalClassificationInput): JsonRecord { + const timeoutState = stringJsonValue(input.timeoutBudget.state); + const hardTimeout = input.failureKind === "backend-timeout" || timeoutState === "timed-out"; + const providerKind = providerFailureCategory(input.failureKind); + const cancelled = input.terminal === "cancelled" || input.failureKind === "cancelled"; + const taskFailure = input.terminal === "failed" && input.failureKind !== null && !hardTimeout && !providerKind && !infrastructureFailureKind(input.failureKind); + let category = "unknown"; + let confidence = "low"; + let providerEvidence = "not-applicable"; + let reason = "terminal state is not yet available"; + + if (input.terminal === "completed") { + category = "completed"; + confidence = "high"; + reason = "command completed successfully"; + } else if (cancelled) { + category = "cancelled"; + confidence = "high"; + reason = "terminal status or failureKind is cancelled"; + } else if (providerKind) { + category = providerKind; + confidence = "high"; + providerEvidence = "failure-kind"; + reason = `failureKind ${input.failureKind} is provider-specific`; + } else if (hardTimeout && input.transportDisconnect) { + category = "execution-hard-timeout"; + confidence = "medium"; + providerEvidence = "observed-transport-disconnect"; + reason = "hard timeout is terminal and a backend transport/app-server close event was observed, but existing events do not prove the model provider caused it"; + } else if (hardTimeout) { + category = "execution-hard-timeout"; + confidence = "high"; + providerEvidence = "insufficient"; + reason = "hard timeout is terminal; no provider-specific failure event was recorded"; + } else if (input.terminal === "blocked") { + category = "blocked"; + confidence = "high"; + reason = `terminal status is blocked${input.failureKind ? ` with failureKind ${input.failureKind}` : ""}`; + } else if (taskFailure) { + category = "task-failed"; + confidence = "medium"; + reason = `terminal failure is not timeout, cancellation, provider-specific, or infrastructure-classified${input.failureKind ? `; failureKind=${input.failureKind}` : ""}`; + } else if (input.terminal === "failed" && infrastructureFailureKind(input.failureKind)) { + category = "infrastructure-failed"; + confidence = "medium"; + reason = `failureKind ${input.failureKind} is infrastructure/backend classified`; + } + + return { + category, + confidence, + providerEvidence, + providerInterruption: providerEvidence === "failure-kind" || providerEvidence === "observed-transport-disconnect" ? providerEvidence : "not-established", + providerInterruptionKnown: providerEvidence === "failure-kind", + providerInterruptionReason: providerEvidence === "failure-kind" + ? "provider-specific failureKind is authoritative" + : providerEvidence === "observed-transport-disconnect" + ? "transport disconnect was observed, but current events cannot distinguish provider outage from runner/backend shutdown during timeout" + : providerEvidence === "insufficient" + ? "no provider-specific error or disconnect evidence was recorded" + : null, + hardTimeout, + timeoutState, + transportDisconnectObserved: Boolean(input.transportDisconnect), + transportDisconnectSeq: input.transportDisconnect?.seq ?? null, + lastActivityKind: stringJsonValue(input.lastActivity?.activityKind), + lastActivitySeq: numberJsonValue(input.lastActivity?.sourceSeq), + commandId: input.command?.id ?? null, + reason, + valuesPrinted: false, + }; +} + +function providerFailureCategory(failureKind: FailureKind | null): string | null { + if (!failureKind) return null; + if (failureKind === "provider-stream-disconnected") return "provider-interrupted"; + if (failureKind.startsWith("provider-")) return "provider-failed"; + return null; +} + +function infrastructureFailureKind(failureKind: FailureKind | null): boolean { + if (!failureKind) return false; + return failureKind.startsWith("backend-") || failureKind === "runner-lease-conflict" || failureKind === "infra-failed" || failureKind === "thread-resume-failed"; +} + function timeoutBudgetSummary(run: RunRecord, command: CommandRecord | null, terminal: TerminalStatus | null, failureKind: FailureKind | null, nowMs: number): JsonRecord { const timeoutMs = typeof run.executionPolicy.timeoutMs === "number" && Number.isFinite(run.executionPolicy.timeoutMs) && run.executionPolicy.timeoutMs > 0 ? Math.trunc(run.executionPolicy.timeoutMs) : null; const startedAt = command?.acknowledgedAt ?? command?.createdAt ?? run.updatedAt ?? run.createdAt; @@ -295,6 +407,14 @@ function numberJsonValue(value: JsonValue | undefined): number | null { return typeof value === "number" && Number.isFinite(value) ? value : null; } +function stringJsonValue(value: JsonValue | undefined): string | null { + return typeof value === "string" && value.length > 0 ? value : null; +} + +function jsonRecordValue(value: unknown): JsonRecord | null { + return typeof value === "object" && value !== null && !Array.isArray(value) ? value as JsonRecord : null; +} + function steerDeliverySummary(events: RunEvent[], commandId: string): JsonRecord { const related = events.filter((event) => event.payload.commandId === commandId); const completed = latestPhaseEvent(related, "turn/steer:completed"); diff --git a/src/mgr/server.ts b/src/mgr/server.ts index 578fef8..99b1c88 100644 --- a/src/mgr/server.ts +++ b/src/mgr/server.ts @@ -144,12 +144,14 @@ async function queueTaskSupervisor(store: AgentRunStore, task: JsonRecord): Prom const liveness = asJsonRecord(result.liveness); const lastActivity = asJsonRecord(liveness?.lastActivity ?? liveness?.lastCommandActivity); const timeoutBudget = asJsonRecord(liveness?.timeoutBudget); + const terminalClassification = asJsonRecord(result.terminalClassification ?? liveness?.terminalClassification); return { runId: stringJsonValue(result.runId), commandId: stringJsonValue(result.commandId), status: stringJsonValue(result.status), terminalStatus: stringJsonValue(result.terminalStatus), failureKind: stringJsonValue(result.failureKind), + terminalClassification: terminalClassification ? compactTerminalClassification(terminalClassification) : null, phase: stringJsonValue(liveness?.phase), active: liveness?.active === true, lastSeq: numberJsonValue(liveness?.lastSeq ?? result.lastSeq), @@ -190,6 +192,22 @@ function compactTimeoutBudget(budget: JsonRecord): JsonRecord { }; } +function compactTerminalClassification(record: JsonRecord): JsonRecord { + return { + category: stringJsonValue(record.category), + confidence: stringJsonValue(record.confidence), + providerEvidence: stringJsonValue(record.providerEvidence), + providerInterruption: stringJsonValue(record.providerInterruption), + providerInterruptionKnown: record.providerInterruptionKnown === true, + providerInterruptionReason: boundedJsonString(record.providerInterruptionReason, 240), + hardTimeout: record.hardTimeout === true, + transportDisconnectObserved: record.transportDisconnectObserved === true, + transportDisconnectSeq: numberJsonValue(record.transportDisconnectSeq), + reason: boundedJsonString(record.reason, 240), + valuesPrinted: false, + }; +} + function compactRecoveryActions(value: JsonValue | undefined): JsonValue[] { if (!Array.isArray(value)) return []; return value.slice(0, 5).map((item) => { diff --git a/src/selftest/cases/55-timeout-liveness.ts b/src/selftest/cases/55-timeout-liveness.ts index fea079e..16eff73 100644 --- a/src/selftest/cases/55-timeout-liveness.ts +++ b/src/selftest/cases/55-timeout-liveness.ts @@ -44,18 +44,40 @@ const selfTest: SelfTestCase = async (context: SelfTestContext) => { assert.equal(terminalResult.terminalStatus, "failed"); assert.equal(terminalLive.phase, "terminal"); assert.equal(((terminalLive.timeoutBudget as JsonRecord).state), "timed-out"); + assert.equal(((terminalResult.terminalClassification as JsonRecord).category), "execution-hard-timeout"); + assert.equal(((terminalResult.terminalClassification as JsonRecord).providerEvidence), "insufficient"); + assert.equal(((terminalLive.terminalClassification as JsonRecord).providerInterruptionKnown), false); assert.ok((terminalLive.recoveryActions as JsonRecord[]).some((action) => action.action === "resume-session")); assert.ok((terminalLive.recoveryActions as JsonRecord[]).some((action) => action.action === "split-task")); - const session = await client.get(`/api/v1/sessions/${terminal.sessionId}?readerId=timeout-liveness`) as JsonRecord; + const noSession = await createActiveRun(client, context, "timeout-liveness-no-session", 50, { session: false }); + await client.post(`/api/v1/runs/${noSession.runId}/events`, { type: "backend_status", payload: { commandId: noSession.commandId, phase: "codex-app-server-closed", message: "stdio closed before terminal result" } }); + await client.post(`/api/v1/runs/${noSession.runId}/events`, { type: "terminal_status", payload: { commandId: noSession.commandId, terminalStatus: "failed", failureKind: "backend-timeout", message: "codex stdio turn hard timed out after 50ms" } }); + await client.patch(`/api/v1/commands/${noSession.commandId}/status`, { terminalStatus: "failed", failureKind: "backend-timeout", failureMessage: "codex stdio turn hard timed out after 50ms" }); + await client.patch(`/api/v1/runs/${noSession.runId}/status`, { terminalStatus: "failed", failureKind: "backend-timeout", failureMessage: "codex stdio turn hard timed out after 50ms" }); + const noSessionResult = await commandResult(client, noSession); + const noSessionLive = noSessionResult.liveness as JsonRecord; + const noSessionClassification = noSessionResult.terminalClassification as JsonRecord; + assert.equal(noSessionClassification.category, "execution-hard-timeout"); + assert.equal(noSessionClassification.providerEvidence, "observed-transport-disconnect"); + assert.equal(noSessionClassification.providerInterruptionKnown, false); + assert.match(String(noSessionClassification.providerInterruptionReason), /cannot distinguish provider outage/u); + assert.equal((noSessionLive.transportDisconnect as JsonRecord).sourceSeq, 4); + assert.equal((noSessionLive.recoveryActions as JsonRecord[]).some((action) => action.action === "resume-session"), false, "sessionId=null must not suggest session-only resume"); + assert.equal((noSessionLive.recoveryActions as JsonRecord[]).some((action) => action.action === "poll-output"), false, "sessionId=null must not suggest session output path"); + assert.ok((noSessionLive.recoveryActions as JsonRecord[]).some((action) => action.action === "poll-trace" && String(action.command).includes("runs events"))); + + assert.ok(terminal.sessionId, "terminal fixture must have a session id"); + const terminalSessionId = terminal.sessionId; + const session = await client.get(`/api/v1/sessions/${terminalSessionId}?readerId=timeout-liveness`) as JsonRecord; assert.equal(((session.liveness as JsonRecord).phase), "terminal"); assert.ok(Array.isArray(((session.supervisor as JsonRecord).recoveryActions)), "session show must keep terminal recovery actions"); - const task = await client.post("/api/v1/queue/tasks", queueTask(context, terminal.sessionId, 50)) as JsonRecord; + const task = await client.post("/api/v1/queue/tasks", queueTask(context, terminalSessionId, 50)) as JsonRecord; store.updateQueueTaskAttempt(String(task.id), { state: "running", - latestAttempt: { attemptId: "attempt_timeout_liveness", state: "running", runId: terminal.runId, commandId: terminal.commandId, runnerJobId: null, sessionId: terminal.sessionId, sessionPath: `/api/v1/sessions/${terminal.sessionId}` }, - sessionPath: `/api/v1/sessions/${terminal.sessionId}`, + latestAttempt: { attemptId: "attempt_timeout_liveness", state: "running", runId: terminal.runId, commandId: terminal.commandId, runnerJobId: null, sessionId: terminalSessionId, sessionPath: `/api/v1/sessions/${terminalSessionId}` }, + sessionPath: `/api/v1/sessions/${terminalSessionId}`, }); const commander = await client.get("/api/v1/queue/commander?queue=timeout-liveness&readerId=timeout-liveness") as JsonRecord; const commanderItem = ((commander.items as JsonRecord[]) ?? []).find((item) => item.id === task.id) as JsonRecord; @@ -64,35 +86,37 @@ const selfTest: SelfTestCase = async (context: SelfTestContext) => { const commanderSummary = summarizeQueueCommanderSnapshot(commander, { limit: 5 }); const summaryItem = ((commanderSummary.items as JsonRecord[]) ?? []).find((item) => item.id === task.id) as JsonRecord; assert.equal(((summaryItem.supervisor as JsonRecord).phase), "terminal"); + assert.equal((((summaryItem.supervisor as JsonRecord).terminalClassification as JsonRecord).category), "execution-hard-timeout"); + assert.equal((((summaryItem.supervisor as JsonRecord).terminalClassification as JsonRecord).providerEvidence), "insufficient"); assert.equal(JSON.stringify(commanderSummary).includes("hwpod workspace apply-patch"), false, "commander summary must stay compact and avoid dumping command bodies"); assert.equal(JSON.stringify(summaryItem).includes("fullRecordBytes"), false, "commander item must not add bookkeeping noise"); - assertNoSecretLeak({ toolResult, assistantLive, inactiveLive, terminalResult, session, commanderSummary }); + assertNoSecretLeak({ toolResult, assistantLive, inactiveLive, terminalResult, noSessionResult, session, commanderSummary }); - return { name: "timeout-liveness", tests: ["tool-in-flight-liveness", "assistant-progress-liveness", "stdio-inactive-timeout-budget", "terminal-timeout-recovery", "queue-commander-supervisor"] }; + return { name: "timeout-liveness", tests: ["tool-in-flight-liveness", "assistant-progress-liveness", "stdio-inactive-timeout-budget", "terminal-timeout-recovery", "no-session-drilldown", "terminal-classification", "queue-commander-supervisor"] }; } finally { await new Promise((resolve) => server.server.close(() => resolve())); } }; -async function createActiveRun(client: ManagerClient, context: SelfTestContext, sessionSuffix: string, timeoutMs: number): Promise<{ runId: string; commandId: string; sessionId: string }> { +async function createActiveRun(client: ManagerClient, context: SelfTestContext, sessionSuffix: string, timeoutMs: number, options: { session?: boolean } = {}): Promise<{ runId: string; commandId: string; sessionId: string | null }> { const sessionId = `selftest-${sessionSuffix}`; - const run = await client.post("/api/v1/runs", runBody(context, sessionId, timeoutMs)) as JsonRecord; + const run = await client.post("/api/v1/runs", runBody(context, options.session === false ? null : sessionId, timeoutMs)) as JsonRecord; const command = await client.post(`/api/v1/runs/${run.id}/commands`, { type: "turn", payload: { prompt: sessionSuffix }, idempotencyKey: sessionSuffix }) as JsonRecord; await client.post(`/api/v1/runs/${run.id}/claim`, { runnerId: `runner_${sessionSuffix}`, leaseMs: 60_000 }); await client.post(`/api/v1/commands/${command.id}/ack`, {}); - return { runId: String(run.id), commandId: String(command.id), sessionId }; + return { runId: String(run.id), commandId: String(command.id), sessionId: options.session === false ? null : sessionId }; } async function commandResult(client: ManagerClient, item: { runId: string; commandId: string }): Promise { return await client.get(`/api/v1/runs/${item.runId}/commands/${item.commandId}/result`) as JsonRecord; } -function runBody(context: SelfTestContext, sessionId: string, timeoutMs: number): JsonRecord { +function runBody(context: SelfTestContext, sessionId: string | null, timeoutMs: number): JsonRecord { return { tenantId: "unidesk", projectId: "pikasTech/agentrun", workspaceRef: { kind: "host-path", path: context.workspace }, - sessionRef: { sessionId, conversationId: sessionId }, + sessionRef: sessionId ? { sessionId, conversationId: sessionId } : null, providerId: "G14", backendProfile: "codex", executionPolicy: executionPolicy(timeoutMs, context.codexHome),