From f49695a821caabe7710cde2dedbc2589cc12ad10 Mon Sep 17 00:00:00 2001 From: AgentRun Codex Date: Thu, 11 Jun 2026 12:52:46 +0800 Subject: [PATCH] fix: use idle timeout for codex turns --- src/backend/codex-stdio.ts | 14 ----- src/common/aipod-specs.ts | 15 +++-- src/mgr/diagnosis.ts | 6 +- src/mgr/result.ts | 62 +++++++++++++------ src/mgr/server.ts | 9 +++ src/runner/run-once.ts | 5 +- src/selftest/cases/30-codex-stdio.ts | 22 +++---- .../cases/50-hwlab-manual-dispatch.ts | 7 ++- src/selftest/cases/55-timeout-liveness.ts | 24 ++++--- src/selftest/cases/76-aipod-spec.ts | 3 + src/selftest/fake-codex-app-server.ts | 20 +++--- 11 files changed, 115 insertions(+), 72 deletions(-) diff --git a/src/backend/codex-stdio.ts b/src/backend/codex-stdio.ts index 1275f17..e7cca31 100644 --- a/src/backend/codex-stdio.ts +++ b/src/backend/codex-stdio.ts @@ -483,15 +483,7 @@ async function runCodexStdioTurnWithSession(options: CodexStdioTurnOptions, sess }; options.abortSignal?.addEventListener("abort", abortTurn, { once: true }); const turnIdleTimeoutMs = positiveTimeout(options.timeoutMs); - let hardTimeout: NodeJS.Timeout | null = null; let idleTimeout: NodeJS.Timeout | null = null; - hardTimeout = setTimeout(() => { - if (terminal) return; - terminal = { status: "failed", failureKind: "backend-timeout", message: `codex stdio turn hard timed out after ${turnIdleTimeoutMs}ms` }; - emitEvent({ type: "error", payload: { failureKind: terminal.failureKind, message: terminal.message, phase: "turn:hard-timeout" } }); - beginInterruptAndStop("hard timeout", "turn:hard-timeout"); - terminalResolve(); - }, turnIdleTimeoutMs); const refreshTurnActivity = (): void => { if (terminal) return; if (idleTimeout) clearTimeout(idleTimeout); @@ -508,11 +500,6 @@ async function runCodexStdioTurnWithSession(options: CodexStdioTurnOptions, sess clearTimeout(idleTimeout); idleTimeout = null; }; - const stopTurnHardTimeout = (): void => { - if (!hardTimeout) return; - clearTimeout(hardTimeout); - hardTimeout = null; - }; refreshTurnActivity(); const stopNotifications = session.addNotificationHandler((message) => { refreshTurnActivity(); @@ -611,7 +598,6 @@ async function runCodexStdioTurnWithSession(options: CodexStdioTurnOptions, sess stopNotifications(); options.abortSignal?.removeEventListener("abort", abortTurn); stopTurnIdleTimeout(); - stopTurnHardTimeout(); } if (!terminal) terminal = { status: "failed", failureKind: "backend-response-invalid", message: "codex app-server finished without terminal status" }; const pendingInterrupt: Promise | null = interruptInFlight as Promise | null; diff --git a/src/common/aipod-specs.ts b/src/common/aipod-specs.ts index 72c007c..bc63be8 100644 --- a/src/common/aipod-specs.ts +++ b/src/common/aipod-specs.ts @@ -119,7 +119,7 @@ export function renderAipodSpec(record: AipodSpecRecord, input: RenderAipodInput const payload = mergeRecords(spec.payloadDefaults, input.payload); if (typeof input.prompt === "string" && input.prompt.trim().length > 0) payload.prompt = input.prompt; applyModelPayload(payload, spec.model); - const sessionRef = renderSessionRef(spec.sessionRef ?? null, input); + const sessionRef = renderSessionRef(spec.sessionRef ?? null, input, record, payload); const queueTask = validateCreateQueueTask({ tenantId: input.tenantId ?? spec.tenantId ?? "unidesk", projectId: input.projectId ?? spec.projectId ?? "default", @@ -212,10 +212,17 @@ function validateAipodProviderCredential(profile: BackendProfile, policy: Execut } } -function renderSessionRef(base: SessionRef | null, input: RenderAipodInput): SessionRef | null { +function renderSessionRef(base: SessionRef | null, input: RenderAipodInput, record: AipodSpecRecord, payload: JsonRecord): SessionRef | null { if (input.sessionRef !== undefined) return input.sessionRef; - if (!input.sessionId) return base; - return { ...(base ?? {}), sessionId: input.sessionId }; + const sessionId = input.sessionId ?? base?.sessionId ?? defaultAipodSessionId(record, input, payload); + return { ...(base ?? {}), sessionId, conversationId: base?.conversationId ?? sessionId }; +} + +function defaultAipodSessionId(record: AipodSpecRecord, input: RenderAipodInput, payload: JsonRecord): string { + const seed = input.idempotencyKey ?? `${record.name}:${stableHash({ prompt: input.prompt ?? null, payload })}`; + const suffix = stableHash({ aipod: record.name, seed }).slice(0, 24); + const name = fileSafeAipodName(record.name).toLowerCase().replace(/[^a-z0-9]+/gu, "_").replace(/^_+|_+$/gu, "") || "aipod"; + return `sess_${name}_${suffix}`; } function validateWorkspaceRef(record: JsonRecord): WorkspaceRef { diff --git a/src/mgr/diagnosis.ts b/src/mgr/diagnosis.ts index 355a22c..5cf08fb 100644 --- a/src/mgr/diagnosis.ts +++ b/src/mgr/diagnosis.ts @@ -59,7 +59,7 @@ export function runDiagnosis(input: RunDiagnosisInput): JsonRecord { } : null, runnerJob, session, - timeoutBudget: timeoutBudget ? compactRecord(timeoutBudget, ["state", "timeoutMs", "elapsedMs", "remainingMs", "startedAt", "source"]) : null, + timeoutBudget: timeoutBudget ? compactRecord(timeoutBudget, ["state", "timeoutKind", "timeoutMs", "elapsedMs", "idleElapsedMs", "remainingMs", "startedAt", "idleStartedAt", "lastActivityAt", "lastActivitySeq", "commandElapsedMs", "runElapsedMs", "source"]) : null, recoveryActions, valuesPrinted: false, }; @@ -116,8 +116,8 @@ function recoveryActionsForDiagnosis(input: { run: RunRecord; command: CommandRe actions.push({ action: "poll-events", runId: input.run.id, afterSeq: input.lastSeq, command: `./scripts/agentrun runs events ${input.run.id} --after-seq ${input.lastSeq} --limit 100 --tail-summary`, valuesPrinted: false }); const sessionId = stringValue(input.session.sessionId); if (sessionId) actions.push({ action: "resume-session", sessionId, command: `./scripts/agentrun sessions turn ${sessionId} --prompt-stdin`, valuesPrinted: false }); - else actions.push({ action: "session-unavailable", reason: "sessionRef=null", hint: "当前 run 没有 sessionRef,只能从 run/events/command/runner-job 继续 drill-down 或重新提交任务。", valuesPrinted: false }); - if (input.runnerLost || input.staleClaimed || input.terminalCommandOpenRun) actions.push({ action: "refresh-queue-or-resubmit", reason: input.failureKind ?? "stale-runner-state", hint: "先用 queue refresh/show 对齐 attempt,再按任务边界决定重新派发或拆分续跑。", valuesPrinted: false }); + else actions.push({ action: "session-unavailable", reason: "sessionRef=null", hint: "当前 run 没有 sessionRef,管理者只能从 run/events/command/runner-job 读取 trace 后重新提交;这表示该任务不可同 session 续跑。", valuesPrinted: false }); + if (input.runnerLost || input.staleClaimed || input.terminalCommandOpenRun) actions.push({ action: "refresh-queue-or-resubmit", reason: input.failureKind ?? "stale-runner-state", hint: "先用 queue refresh/show 对齐 attempt;有 sessionId 时继续同一 session,没有 sessionId 才重新派发。", valuesPrinted: false }); return actions.slice(0, 6); } diff --git a/src/mgr/result.ts b/src/mgr/result.ts index d7e2fa0..7d8ac61 100644 --- a/src/mgr/result.ts +++ b/src/mgr/result.ts @@ -118,7 +118,7 @@ function livenessSnapshot(run: RunRecord, command: CommandRecord | null, events: const lease = leaseSummary(run, nowMs); const transportDisconnect = latestTransportDisconnect(scopedEvents); const lastActivity = livenessActivitySummary(lastCommandActivity, nowMs); - const timeoutBudget = timeoutBudgetSummary(run, command, terminal, failureKind, nowMs); + const timeoutBudget = timeoutBudgetSummary(run, command, terminal, failureKind, nowMs, lastActivity); const terminalClassification = terminalClassificationFromEvidence({ terminal, terminalSource: "liveness", failureKind, failureMessage, timeoutBudget, transportDisconnect, lastActivity, command }); const phase = livenessPhase({ active, command, lastVisibleActivity, leaseExpired: lease.leaseExpired, transportDisconnect, timeoutBudget, lastActivity }); const afterSeq = lastEvent?.seq ?? 0; @@ -178,10 +178,13 @@ function terminalClassificationSummary(input: { terminal: TerminalStatus | null; function terminalClassificationFromEvidence(input: TerminalClassificationInput): JsonRecord { const timeoutState = stringJsonValue(input.timeoutBudget.state); - const hardTimeout = input.failureKind === "backend-timeout" || timeoutState === "timed-out"; + const timeoutFailure = input.failureKind === "backend-timeout" || timeoutState === "timed-out"; + const timeoutKind = stringJsonValue(input.timeoutBudget.timeoutKind) ?? (input.timeoutBudget.hardTimeout === true ? "hard" : "idle"); + const idleTimeout = timeoutFailure && timeoutKind === "idle"; + const hardTimeout = timeoutFailure && timeoutKind === "hard"; const providerKind = providerFailureCategory(input.failureKind); const cancelled = input.terminal === "cancelled" || input.failureKind === "cancelled"; - const taskFailure = input.terminal === "failed" && input.failureKind !== null && !hardTimeout && !providerKind && !infrastructureFailureKind(input.failureKind); + const taskFailure = input.terminal === "failed" && input.failureKind !== null && !timeoutFailure && !providerKind && !infrastructureFailureKind(input.failureKind); let category = "unknown"; let confidence = "low"; let providerEvidence = "not-applicable"; @@ -200,16 +203,18 @@ function terminalClassificationFromEvidence(input: TerminalClassificationInput): confidence = "high"; providerEvidence = "failure-kind"; reason = `failureKind ${input.failureKind} is provider-specific`; - } else if (hardTimeout && input.transportDisconnect) { - category = "execution-hard-timeout"; + } else if (timeoutFailure && input.transportDisconnect) { + category = idleTimeout ? "execution-idle-timeout" : "execution-hard-timeout"; confidence = "medium"; providerEvidence = "observed-transport-disconnect"; - reason = "hard timeout is terminal and a backend transport/app-server close event was observed, but existing events do not prove the model provider caused it"; - } else if (hardTimeout) { - category = "execution-hard-timeout"; + reason = idleTimeout + ? "idle timeout is terminal and a backend transport/app-server close event was observed, but existing events do not prove the model provider caused it" + : "hard timeout is terminal and a backend transport/app-server close event was observed, but existing events do not prove the model provider caused it"; + } else if (timeoutFailure) { + category = idleTimeout ? "execution-idle-timeout" : "execution-hard-timeout"; confidence = "high"; providerEvidence = "insufficient"; - reason = "hard timeout is terminal; no provider-specific failure event was recorded"; + reason = idleTimeout ? "idle timeout is terminal; no provider-specific failure event was recorded" : "hard timeout is terminal; no provider-specific failure event was recorded"; } else if (input.terminal === "blocked") { category = "blocked"; confidence = "high"; @@ -238,6 +243,9 @@ function terminalClassificationFromEvidence(input: TerminalClassificationInput): ? "no provider-specific error or disconnect evidence was recorded" : null, hardTimeout, + idleTimeout, + timeoutFailure, + timeoutKind, timeoutState, transportDisconnectObserved: Boolean(input.transportDisconnect), transportDisconnectSeq: input.transportDisconnect?.seq ?? null, @@ -261,29 +269,47 @@ function infrastructureFailureKind(failureKind: FailureKind | null): boolean { return failureKind.startsWith("backend-") || failureKind === "runner-lease-conflict" || failureKind === "infra-failed" || failureKind === "thread-resume-failed"; } -function timeoutBudgetSummary(run: RunRecord, command: CommandRecord | null, terminal: TerminalStatus | null, failureKind: FailureKind | null, nowMs: number): JsonRecord { +function timeoutBudgetSummary(run: RunRecord, command: CommandRecord | null, terminal: TerminalStatus | null, failureKind: FailureKind | null, nowMs: number, lastActivity: JsonRecord | null): JsonRecord { const timeoutMs = typeof run.executionPolicy.timeoutMs === "number" && Number.isFinite(run.executionPolicy.timeoutMs) && run.executionPolicy.timeoutMs > 0 ? Math.trunc(run.executionPolicy.timeoutMs) : null; - const startedAt = command?.acknowledgedAt ?? command?.createdAt ?? run.updatedAt ?? run.createdAt; - const startedMs = Date.parse(startedAt); - const elapsedMs = timeoutMs !== null && Number.isFinite(startedMs) ? Math.max(0, nowMs - startedMs) : null; + const commandStartedAt = command?.acknowledgedAt ?? command?.createdAt ?? null; + const fallbackStartedAt = commandStartedAt ?? run.updatedAt ?? run.createdAt; + const fallbackStartedMs = Date.parse(fallbackStartedAt); + const lastActivityAt = stringJsonValue(lastActivity?.createdAt); + const lastActivityMs = lastActivityAt ? Date.parse(lastActivityAt) : NaN; + const idleStartedAt = Number.isFinite(lastActivityMs) ? lastActivityAt : fallbackStartedAt; + const idleStartedMs = Number.isFinite(lastActivityMs) ? lastActivityMs : fallbackStartedMs; + const elapsedMs = timeoutMs !== null && Number.isFinite(idleStartedMs) ? Math.max(0, nowMs - idleStartedMs) : null; const remainingMs = timeoutMs !== null && elapsedMs !== null ? Math.max(0, timeoutMs - elapsedMs) : null; const approachingThresholdMs = timeoutMs === null ? null : Math.min(120_000, Math.max(10_000, Math.floor(timeoutMs / 10))); + const commandStartedMs = commandStartedAt ? Date.parse(commandStartedAt) : NaN; + const commandElapsedMs = Number.isFinite(commandStartedMs) ? Math.max(0, nowMs - commandStartedMs) : null; + const runStartedMs = Date.parse(run.createdAt); + const runElapsedMs = Number.isFinite(runStartedMs) ? Math.max(0, nowMs - runStartedMs) : null; let state = "unknown"; if (timeoutMs !== null && elapsedMs !== null) { if (terminal !== null) state = failureKind === "backend-timeout" ? "timed-out" : "terminal"; else if (remainingMs === 0) state = "overdue"; - else if (approachingThresholdMs !== null && remainingMs !== null && remainingMs <= approachingThresholdMs) state = "approaching-hard-timeout"; + else if (approachingThresholdMs !== null && remainingMs !== null && remainingMs <= approachingThresholdMs) state = "approaching-idle-timeout"; else state = "within-budget"; } return { timeoutMs, - source: "executionPolicy.timeoutMs", - startedAt, + source: "executionPolicy.timeoutMs idle activity", + timeoutKind: "idle", + startedAt: idleStartedAt, + idleStartedAt, + lastActivityAt: Number.isFinite(lastActivityMs) ? lastActivityAt : null, + lastActivitySeq: numberJsonValue(lastActivity?.sourceSeq ?? lastActivity?.seq), elapsedMs, + idleElapsedMs: elapsedMs, remainingMs, approachingThresholdMs, state, - hardTimeout: true, + hardTimeout: false, + commandStartedAt, + commandElapsedMs, + runStartedAt: run.createdAt, + runElapsedMs, valuesPrinted: false, }; } @@ -400,7 +426,7 @@ function recoveryActions(input: { run: RunRecord; command: CommandRecord | null; if (terminal === "failed" || terminal === "blocked" || terminal === "cancelled") { if (command) actions.push({ action: "inspect-result", runId: run.id, commandId: command.id, command: `./scripts/agentrun commands result ${command.id} --run-id ${run.id}`, valuesPrinted: false }); if (sessionId) actions.push({ action: "resume-session", sessionId, command: `./scripts/agentrun sessions turn ${sessionId} --prompt-stdin`, valuesPrinted: false }); - if (failureKind === "backend-timeout") actions.push({ action: "split-task", reason: "backend-timeout", hint: "把大 patch / 长工具链拆成更短 turn 后用同一 session 续跑", failureMessage: failureMessage ? boundedTextSummary(failureMessage, { limitChars: 200 }).text as string : null, valuesPrinted: false }); + if (failureKind === "backend-timeout") actions.push({ action: "split-task", reason: "backend-timeout", hint: "先由管理者读取 trace/result,总结下一步,再把后续 prompt 发到同一 session;必要时把大 patch / 长工具链拆成更短 turn。", failureMessage: failureMessage ? boundedTextSummary(failureMessage, { limitChars: 200 }).text as string : null, valuesPrinted: false }); else actions.push({ action: "retry-or-split", reason: failureKind ?? "terminal", hint: "先读 trace/output 的 detail id,再决定 steer、重跑或拆分", valuesPrinted: false }); } return actions; diff --git a/src/mgr/server.ts b/src/mgr/server.ts index de8588e..fd6dc46 100644 --- a/src/mgr/server.ts +++ b/src/mgr/server.ts @@ -184,10 +184,17 @@ function compactActivity(activity: JsonRecord): JsonRecord { function compactTimeoutBudget(budget: JsonRecord): JsonRecord { return { state: stringJsonValue(budget.state), + timeoutKind: stringJsonValue(budget.timeoutKind), timeoutMs: numberJsonValue(budget.timeoutMs), elapsedMs: numberJsonValue(budget.elapsedMs), + idleElapsedMs: numberJsonValue(budget.idleElapsedMs), remainingMs: numberJsonValue(budget.remainingMs), startedAt: stringJsonValue(budget.startedAt), + idleStartedAt: stringJsonValue(budget.idleStartedAt), + lastActivityAt: stringJsonValue(budget.lastActivityAt), + lastActivitySeq: numberJsonValue(budget.lastActivitySeq), + commandElapsedMs: numberJsonValue(budget.commandElapsedMs), + runElapsedMs: numberJsonValue(budget.runElapsedMs), source: stringJsonValue(budget.source), valuesPrinted: false, }; @@ -202,6 +209,8 @@ function compactTerminalClassification(record: JsonRecord): JsonRecord { providerInterruptionKnown: record.providerInterruptionKnown === true, providerInterruptionReason: boundedJsonString(record.providerInterruptionReason, 240), hardTimeout: record.hardTimeout === true, + idleTimeout: record.idleTimeout === true, + timeoutKind: stringJsonValue(record.timeoutKind), transportDisconnectObserved: record.transportDisconnectObserved === true, transportDisconnectSeq: numberJsonValue(record.transportDisconnectSeq), reason: boundedJsonString(record.reason, 240), diff --git a/src/runner/run-once.ts b/src/runner/run-once.ts index 9c46cfb..8922951 100644 --- a/src/runner/run-once.ts +++ b/src/runner/run-once.ts @@ -188,7 +188,10 @@ function resourceMaterializationEnv(env: NodeJS.ProcessEnv, runId: string, attem function resourceEnvForMaterialized(env: NodeJS.ProcessEnv, materialized: Awaited>): NodeJS.ProcessEnv | undefined { if (!materialized) return undefined; let next: NodeJS.ProcessEnv | undefined; - if (materialized.binPath) next = prependPath(env, materialized.binPath); + if (materialized.binPath) { + const withPath = prependPath(env, materialized.binPath); + next = { ...withPath, AGENTRUN_RESOURCE_BIN_PATH: withPath.AGENTRUN_RESOURCE_BIN_PATH ?? materialized.binPath }; + } if (materialized.skillsDir) { const base = next ?? { ...env }; const previous = base.AGENTRUN_SKILLS_DIRS; diff --git a/src/selftest/cases/30-codex-stdio.ts b/src/selftest/cases/30-codex-stdio.ts index b968f27..1d33b14 100644 --- a/src/selftest/cases/30-codex-stdio.ts +++ b/src/selftest/cases/30-codex-stdio.ts @@ -182,7 +182,7 @@ const selfTest: SelfTestCase = async (context) => { assert.equal(liveResult.terminalStatus, "completed", "slow live tool event turn should complete"); await runInterruptBeforeTurnStartResponseCase({ client, managerUrl: server.baseUrl, context }); - await runHardTimeoutDuringToolProgressCase({ client, managerUrl: server.baseUrl, context }); + await runToolProgressRefreshesIdleTimeoutCase({ client, managerUrl: server.baseUrl, context }); const noisy = await createRunWithCommand(client, context, "hello noisy reasoning", "selftest-noisy-reasoning-events", 15_000); const noisyResult = await runOnce({ managerUrl: server.baseUrl, runId: noisy.runId, codexCommand: context.fakeCodexCommand, codexArgs: context.fakeCodexArgs, codexHome: context.codexHome, env: { CODEX_HOME: context.codexHome, AGENTRUN_FAKE_CODEX_MODE: "noisy-reasoning-events" }, oneShot: true }) as JsonRecord; @@ -245,7 +245,7 @@ const selfTest: SelfTestCase = async (context) => { await runSessionStorageSubdirCase({ client, managerUrl: server.baseUrl, context }); await runSessionStorageNoSecretLeakCase({ client, managerUrl: server.baseUrl, context }); - return { name: "codex-stdio", tests: ["runner-lease-heartbeat", "runner-lease-conflict-recovery", "codex-stdio-fake-turn", "codex-stdio-k8s-sandbox-override", "codex-stdio-projected-writable-home", "codex-stdio-deepseek-profile-fake-turn", "codex-stdio-dsflash-go-profile-fake-turn", "codex-stdio-dsflash-go-config-metadata", "codex-stdio-minimax-m3-profile-fake-turn", "codex-stdio-deepseek-missing-secret-no-fallback", "codex-stdio-minimax-m3-missing-secret-no-fallback", "codex-stdio-config-model-authoritative", "codex-stdio-explicit-model-forwarded", "codex-stdio-final-agent-message-only", "codex-stdio-web-search-progress", "codex-stdio-stale-thread-resume-failed", "codex-stdio-live-tool-events", "codex-stdio-interrupt-before-turn-start-response", "codex-stdio-hard-timeout-during-tool-progress", "codex-stdio-noisy-reasoning-suppression", "codex-stdio-missing-turn-result", "codex-stdio-provider-auth-failed", "codex-stdio-provider-rate-limited", "codex-stdio-provider-invalid-tool-call", "codex-stdio-provider-compact-unsupported", "codex-stdio-provider-stream-disconnected", "codex-stdio-provider-503-rpc-error", "codex-stdio-provider-503-terminal", "codex-stdio-provider-unavailable", "codex-stdio-provider-503-retry-event", "codex-stdio-provider-refused-retry-recovered", "codex-stdio-invalid-json", "codex-stdio-timeout", "codex-stdio-idle-timeout-progress-refresh", "codex-stdio-command-failure-keeps-run-open", "codex-stdio-secret-unavailable", "codex-stdio-spawn-failure"] }; + return { name: "codex-stdio", tests: ["runner-lease-heartbeat", "runner-lease-conflict-recovery", "codex-stdio-fake-turn", "codex-stdio-k8s-sandbox-override", "codex-stdio-projected-writable-home", "codex-stdio-deepseek-profile-fake-turn", "codex-stdio-dsflash-go-profile-fake-turn", "codex-stdio-dsflash-go-config-metadata", "codex-stdio-minimax-m3-profile-fake-turn", "codex-stdio-deepseek-missing-secret-no-fallback", "codex-stdio-minimax-m3-missing-secret-no-fallback", "codex-stdio-config-model-authoritative", "codex-stdio-explicit-model-forwarded", "codex-stdio-final-agent-message-only", "codex-stdio-web-search-progress", "codex-stdio-stale-thread-resume-failed", "codex-stdio-live-tool-events", "codex-stdio-interrupt-before-turn-start-response", "codex-stdio-tool-progress-refreshes-idle-timeout", "codex-stdio-noisy-reasoning-suppression", "codex-stdio-missing-turn-result", "codex-stdio-provider-auth-failed", "codex-stdio-provider-rate-limited", "codex-stdio-provider-invalid-tool-call", "codex-stdio-provider-compact-unsupported", "codex-stdio-provider-stream-disconnected", "codex-stdio-provider-503-rpc-error", "codex-stdio-provider-503-terminal", "codex-stdio-provider-unavailable", "codex-stdio-provider-503-retry-event", "codex-stdio-provider-refused-retry-recovered", "codex-stdio-invalid-json", "codex-stdio-timeout", "codex-stdio-idle-timeout-progress-refresh", "codex-stdio-command-failure-keeps-run-open", "codex-stdio-secret-unavailable", "codex-stdio-spawn-failure"] }; } finally { await new Promise((resolve) => server.server.close(() => resolve())); } @@ -350,26 +350,24 @@ async function runInterruptBeforeTurnStartResponseCase(options: { client: Manage assertNoSecretLeak({ result, events }); } -async function runHardTimeoutDuringToolProgressCase(options: { client: ManagerClient; managerUrl: string; context: SelfTestContext }): Promise { - const item = await createRunWithCommand(options.client, options.context, "hard timeout during tool progress", "selftest-hard-timeout-tool-progress", 120); +async function runToolProgressRefreshesIdleTimeoutCase(options: { client: ManagerClient; managerUrl: string; context: SelfTestContext }): Promise { + const item = await createRunWithCommand(options.client, options.context, "tool progress refreshes idle timeout", "selftest-tool-progress-refreshes-idle", 120); const result = await runOnce({ managerUrl: options.managerUrl, runId: item.runId, codexCommand: options.context.fakeCodexCommand, codexArgs: options.context.fakeCodexArgs, codexHome: options.context.codexHome, - env: { CODEX_HOME: options.context.codexHome, AGENTRUN_FAKE_CODEX_MODE: "hard-timeout-tool-progress" }, + env: { CODEX_HOME: options.context.codexHome, AGENTRUN_FAKE_CODEX_MODE: "tool-progress-refreshes-idle" }, oneShot: true, }) as JsonRecord; - assert.equal(result.terminalStatus, "failed", "hard timeout should fail even while the tool keeps producing progress"); - assert.equal(result.failureKind, "backend-timeout"); + assert.equal(result.terminalStatus, "completed", "tool progress should refresh idle timeout until terminal completion"); + assert.equal(result.failureKind, null); const events = await options.client.get(`/api/v1/runs/${item.runId}/events?afterSeq=0&limit=100`) as { items?: Array<{ type: string; payload: unknown }> }; - assert.ok(events.items?.some((event) => event.type === "command_output" && String(eventPayload(event).text ?? "").includes("progress")), "progress output should be visible before hard timeout"); - assert.ok(events.items?.some((event) => event.type === "error" && eventPayload(event).phase === "turn:hard-timeout"), "hard timeout should be recorded as an error event"); - assert.ok(events.items?.some((event) => event.type === "backend_status" && eventPayload(event).phase === "turn-interrupt-requested"), "timeout should request backend interrupt before process teardown"); - assert.ok(events.items?.some((event) => event.type === "backend_status" && eventPayload(event).phase === "turn/interrupt:completed"), "timeout interrupt result should be visible"); + assert.ok(events.items?.some((event) => event.type === "command_output" && String(eventPayload(event).text ?? "").includes("progress 3")), "progress output should stay visible while idle timeout is refreshed"); + assert.equal(events.items?.some((event) => event.type === "error" && eventPayload(event).failureKind === "backend-timeout"), false, "progressing tool output must not fail on wall-clock elapsed time"); const command = await options.client.get(`/api/v1/runs/${item.runId}/commands/${item.commandId}`) as { state?: string }; - assert.equal(command.state, "failed", "hard timed out command should be failed"); + assert.equal(command.state, "completed", "command should complete after progress-delayed terminal status"); assertNoSecretLeak({ result, events }); } diff --git a/src/selftest/cases/50-hwlab-manual-dispatch.ts b/src/selftest/cases/50-hwlab-manual-dispatch.ts index 50e37d6..dddbae1 100644 --- a/src/selftest/cases/50-hwlab-manual-dispatch.ts +++ b/src/selftest/cases/50-hwlab-manual-dispatch.ts @@ -97,7 +97,7 @@ console.log(JSON.stringify({ apiVersion: manifest.apiVersion, kind: manifest.kin const resultBundleTargets = (((resultEnvelope.resourceBundleRef as JsonRecord).bundles as JsonRecord).items as JsonRecord[]).map((item) => item.targetPath); assert.deepEqual(resultBundleTargets, ["tools", ".agents/skills"]); const materialized = ((resultEnvelope.resourceBundleRef as JsonRecord).materialized as JsonRecord); - assert.deepEqual(((materialized.tools as JsonRecord).names), ["hwpod"]); + assert.deepEqual(((materialized.tools as JsonRecord).names), ["apply_patch", "hwpod", "tran", "trans"]); assert.equal(((materialized.tools as JsonRecord).installed), true); assert.deepEqual(((materialized.skillDirs as JsonRecord).names), ["dad-dev", "hwpod-cli", "hwpod-ctl"]); const requiredSkillItems = ((materialized.requiredSkills as JsonRecord).items as JsonRecord[]); @@ -275,6 +275,9 @@ async function createLocalGitBundle(context: SelfTestContext, repoName = "bundle await mkdir(path.join(repo, "tools"), { recursive: true }); await mkdir(path.join(repo, "tools", "src"), { recursive: true }); await writeFile(path.join(repo, "tools", "hwpod"), "#!/usr/bin/env sh\nexec bun \"$(dirname \"$0\")/hwpod-cli.ts\" \"$@\"\n", "utf8"); + await writeFile(path.join(repo, "tools", "tran"), "#!/usr/bin/env sh\necho tran-selftest\n", "utf8"); + await writeFile(path.join(repo, "tools", "trans"), "#!/usr/bin/env sh\necho trans-selftest\n", "utf8"); + await writeFile(path.join(repo, "tools", "apply_patch"), "#!/usr/bin/env sh\necho apply-patch-selftest\n", "utf8"); await writeFile(path.join(repo, "tools", "hwpod-cli.ts"), "import { hwpodSelftestName } from './src/hwpod-harness-lib.ts';\nconsole.log(JSON.stringify({ ok: true, cli: hwpodSelftestName(), argv: process.argv.slice(2) }));\n", "utf8"); await writeFile(path.join(repo, "tools", "src", "hwpod-harness-lib.ts"), "export function hwpodSelftestName() { return 'hwpod-selftest'; }\n", "utf8"); await writeFile(path.join(repo, "tools", "hwpod-node.test.ts"), "console.log('test-only source file without shebang');\n", "utf8"); @@ -315,7 +318,7 @@ async function createLocalGitBundle(context: SelfTestContext, repoName = "bundle "Use hwpod-ctl for HWPOD runtime inspection and control-plane state.", ].join("\n"), "utf8"); await writeFile(path.join(repo, "skills", "hwpod-ctl", "scripts", "hwpod-ctl.mjs"), "console.log(JSON.stringify({ ok: true, cli: 'hwpod-ctl-skill-selftest' }));\n", "utf8"); - await execFile("git", ["add", "README.md", "tools/hwpod", "tools/hwpod-cli.ts", "tools/src/hwpod-harness-lib.ts", "tools/hwpod-node.test.ts", "internal/agent/prompts/hwlab-v02-runtime.md", "skills/dad-dev/SKILL.md", "skills/hwpod-cli/SKILL.md", "skills/hwpod-cli/scripts/hwpod-cli.mjs", "skills/hwpod-ctl/SKILL.md", "skills/hwpod-ctl/scripts/hwpod-ctl.mjs"], { cwd: repo }); + await execFile("git", ["add", "README.md", "tools/hwpod", "tools/tran", "tools/trans", "tools/apply_patch", "tools/hwpod-cli.ts", "tools/src/hwpod-harness-lib.ts", "tools/hwpod-node.test.ts", "internal/agent/prompts/hwlab-v02-runtime.md", "skills/dad-dev/SKILL.md", "skills/hwpod-cli/SKILL.md", "skills/hwpod-cli/scripts/hwpod-cli.mjs", "skills/hwpod-ctl/SKILL.md", "skills/hwpod-ctl/scripts/hwpod-ctl.mjs"], { cwd: repo }); await execFile("git", ["-c", "user.email=selftest@example.invalid", "-c", "user.name=AgentRun SelfTest", "commit", "-m", "bundle selftest"], { cwd: repo }); const { stdout } = await execFile("git", ["rev-parse", "HEAD"], { cwd: repo }); return { repoUrl: repo, commitId: stdout.trim(), requiredSkills: [{ name: "dad-dev" }] }; diff --git a/src/selftest/cases/55-timeout-liveness.ts b/src/selftest/cases/55-timeout-liveness.ts index 9608b13..aaa993e 100644 --- a/src/selftest/cases/55-timeout-liveness.ts +++ b/src/selftest/cases/55-timeout-liveness.ts @@ -33,18 +33,21 @@ const selfTest: SelfTestCase = async (context: SelfTestContext) => { await sleep(36); const inactiveLive = (await commandResult(client, inactive)).liveness as JsonRecord; assert.equal(inactiveLive.phase, "runner-stdio-inactive"); - assert.ok(["approaching-hard-timeout", "overdue"].includes(String((inactiveLive.timeoutBudget as JsonRecord).state))); + assert.ok(["approaching-idle-timeout", "overdue"].includes(String((inactiveLive.timeoutBudget as JsonRecord).state))); + assert.equal(((inactiveLive.timeoutBudget as JsonRecord).timeoutKind), "idle"); const terminal = await createActiveRun(client, context, "timeout-liveness-terminal", 50); - await client.post(`/api/v1/runs/${terminal.runId}/events`, { type: "error", payload: { commandId: terminal.commandId, failureKind: "backend-timeout", phase: "turn:hard-timeout", message: "codex stdio turn hard timed out after 50ms" } }); - await client.patch(`/api/v1/commands/${terminal.commandId}/status`, { terminalStatus: "failed", failureKind: "backend-timeout", failureMessage: "codex stdio turn hard timed out after 50ms" }); - await client.patch(`/api/v1/runs/${terminal.runId}/status`, { terminalStatus: "failed", failureKind: "backend-timeout", failureMessage: "codex stdio turn hard timed out after 50ms" }); + await client.post(`/api/v1/runs/${terminal.runId}/events`, { type: "error", payload: { commandId: terminal.commandId, failureKind: "backend-timeout", phase: "turn:idle-timeout", message: "codex stdio turn idle timed out after 50ms without activity" } }); + await client.patch(`/api/v1/commands/${terminal.commandId}/status`, { terminalStatus: "failed", failureKind: "backend-timeout", failureMessage: "codex stdio turn idle timed out after 50ms without activity" }); + await client.patch(`/api/v1/runs/${terminal.runId}/status`, { terminalStatus: "failed", failureKind: "backend-timeout", failureMessage: "codex stdio turn idle timed out after 50ms without activity" }); const terminalResult = await commandResult(client, terminal); const terminalLive = terminalResult.liveness as JsonRecord; assert.equal(terminalResult.terminalStatus, "failed"); assert.equal(terminalLive.phase, "terminal"); assert.equal(((terminalLive.timeoutBudget as JsonRecord).state), "timed-out"); - assert.equal(((terminalResult.terminalClassification as JsonRecord).category), "execution-hard-timeout"); + assert.equal(((terminalLive.timeoutBudget as JsonRecord).timeoutKind), "idle"); + assert.equal(((terminalResult.terminalClassification as JsonRecord).category), "execution-idle-timeout"); + assert.equal(((terminalResult.terminalClassification as JsonRecord).idleTimeout), true); assert.equal(((terminalResult.terminalClassification as JsonRecord).providerEvidence), "insufficient"); assert.equal(((terminalLive.terminalClassification as JsonRecord).providerInterruptionKnown), false); assert.ok((terminalLive.recoveryActions as JsonRecord[]).some((action) => action.action === "resume-session")); @@ -52,13 +55,13 @@ const selfTest: SelfTestCase = async (context: SelfTestContext) => { const noSession = await createActiveRun(client, context, "timeout-liveness-no-session", 50, { session: false }); await client.post(`/api/v1/runs/${noSession.runId}/events`, { type: "backend_status", payload: { commandId: noSession.commandId, phase: "codex-app-server-closed", message: "stdio closed before terminal result" } }); - await client.post(`/api/v1/runs/${noSession.runId}/events`, { type: "terminal_status", payload: { commandId: noSession.commandId, terminalStatus: "failed", failureKind: "backend-timeout", message: "codex stdio turn hard timed out after 50ms" } }); - await client.patch(`/api/v1/commands/${noSession.commandId}/status`, { terminalStatus: "failed", failureKind: "backend-timeout", failureMessage: "codex stdio turn hard timed out after 50ms" }); + await client.post(`/api/v1/runs/${noSession.runId}/events`, { type: "terminal_status", payload: { commandId: noSession.commandId, terminalStatus: "failed", failureKind: "backend-timeout", message: "codex stdio turn idle timed out after 50ms without activity" } }); + await client.patch(`/api/v1/commands/${noSession.commandId}/status`, { terminalStatus: "failed", failureKind: "backend-timeout", failureMessage: "codex stdio turn idle timed out after 50ms without activity" }); const noSessionResult = await commandResult(client, noSession); const noSessionLive = noSessionResult.liveness as JsonRecord; const noSessionClassification = noSessionResult.terminalClassification as JsonRecord; const noSessionDiagnosis = noSessionResult.diagnosis as JsonRecord; - assert.equal(noSessionClassification.category, "execution-hard-timeout"); + assert.equal(noSessionClassification.category, "execution-idle-timeout"); assert.equal(noSessionClassification.providerEvidence, "observed-transport-disconnect"); assert.equal(noSessionClassification.providerInterruptionKnown, false); assert.equal(noSessionDiagnosis.category, "terminal-command-open-run"); @@ -111,12 +114,13 @@ const selfTest: SelfTestCase = async (context: SelfTestContext) => { const commander = await client.get("/api/v1/queue/commander?queue=timeout-liveness&readerId=timeout-liveness") as JsonRecord; const commanderItem = ((commander.items as JsonRecord[]) ?? []).find((item) => item.id === task.id) as JsonRecord; assert.equal(((commanderItem.supervisor as JsonRecord).phase), "terminal"); - assert.equal((((commanderItem.supervisor as JsonRecord).diagnosis as JsonRecord).category), "execution-hard-timeout"); + assert.equal((((commanderItem.supervisor as JsonRecord).diagnosis as JsonRecord).category), "execution-idle-timeout"); assert.equal((((commanderItem.supervisor as JsonRecord).timeoutBudget as JsonRecord).state), "timed-out"); + assert.equal((((commanderItem.supervisor as JsonRecord).timeoutBudget as JsonRecord).timeoutKind), "idle"); const commanderSummary = summarizeQueueCommanderSnapshot(commander, { limit: 5 }); const summaryItem = ((commanderSummary.items as JsonRecord[]) ?? []).find((item) => item.id === task.id) as JsonRecord; assert.equal(((summaryItem.supervisor as JsonRecord).phase), "terminal"); - assert.equal((((summaryItem.supervisor as JsonRecord).terminalClassification as JsonRecord).category), "execution-hard-timeout"); + assert.equal((((summaryItem.supervisor as JsonRecord).terminalClassification as JsonRecord).category), "execution-idle-timeout"); assert.equal((((summaryItem.supervisor as JsonRecord).terminalClassification as JsonRecord).providerEvidence), "insufficient"); assert.equal(JSON.stringify(commanderSummary).includes("hwpod workspace apply-patch"), false, "commander summary must stay compact and avoid dumping command bodies"); assert.equal(JSON.stringify(summaryItem).includes("fullRecordBytes"), false, "commander item must not add bookkeeping noise"); diff --git a/src/selftest/cases/76-aipod-spec.ts b/src/selftest/cases/76-aipod-spec.ts index ebb8680..66488a2 100644 --- a/src/selftest/cases/76-aipod-spec.ts +++ b/src/selftest/cases/76-aipod-spec.ts @@ -38,6 +38,9 @@ const selfTest: SelfTestCase = async (context) => { assert.equal(task.backendProfile, "sub2api"); assert.equal(task.providerId, "G14"); assert.equal(task.idempotencyKey, "selftest-aipod-artificer"); + const sessionRef = task.sessionRef as JsonRecord; + assert.match(String(sessionRef.sessionId), /^sess_artificer_[a-f0-9]{24}$/u, "Artificer queue task should default to a resumable session"); + assert.equal(sessionRef.conversationId, sessionRef.sessionId, "default Artificer conversation should match the generated session"); const taskImageRef = ((task.metadata as JsonRecord).aipodImageRef as JsonRecord); assert.equal(taskImageRef.kind, "env-image-dockerfile"); assert.equal(taskImageRef.valuesPrinted, false); diff --git a/src/selftest/fake-codex-app-server.ts b/src/selftest/fake-codex-app-server.ts index 2b10ff6..24771ea 100644 --- a/src/selftest/fake-codex-app-server.ts +++ b/src/selftest/fake-codex-app-server.ts @@ -262,17 +262,21 @@ for await (const line of rl) { activeSteerTurn = { id: turn.id, completed: false, timer: setTimeout(() => undefined, 60_000) }; continue; } - if (mode === "hard-timeout-tool-progress") { + if (mode === "tool-progress-refreshes-idle") { turnCounter += 1; - const turn = { id: `turn_selftest_${turnCounter}`, status: "running" }; - notify("turn/started", { turn }); - notify("item/started", { item: { id: "tool_hard_timeout", type: "commandExecution", command: "hwpod cmd long-running", status: "running", processId: process.pid } }); + const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" }; + notify("turn/started", { turn: { id: turn.id, status: "running" } }); + notify("item/started", { item: { id: "tool_idle_refresh", type: "commandExecution", command: "hwpod cmd long-running", status: "running", processId: process.pid } }); respond(message.id, { turn }); - activeSteerTurn = { id: turn.id, completed: false, timer: null }; let ticks = 0; - activeSteerTurn.timer = setInterval(() => { + const timer = setInterval(() => { ticks += 1; - notify("item/commandExecution/outputDelta", { itemId: "tool_hard_timeout", delta: `progress ${ticks}\n` }); + notify("item/commandExecution/outputDelta", { itemId: "tool_idle_refresh", delta: `progress ${ticks}\n` }); + if (ticks >= 4) { + clearInterval(timer); + notify("item/completed", { item: { id: "tool_idle_refresh", type: "commandExecution", command: "hwpod cmd long-running", status: "completed" } }); + notify("turn/completed", { turn }); + } }, 25); continue; } @@ -329,7 +333,7 @@ for await (const line of rl) { continue; } if (message.method === "turn/interrupt") { - if ((mode !== "tool-hangs-before-turn-start-response" && mode !== "hard-timeout-tool-progress" && mode !== "steer-waits") || !activeSteerTurn) { + if ((mode !== "tool-hangs-before-turn-start-response" && mode !== "steer-waits") || !activeSteerTurn) { respond(message.id, null, { code: -32000, message: "no active fake turn for interrupt" }); continue; }