fix: use idle timeout for codex turns
This commit is contained in:
@@ -483,15 +483,7 @@ async function runCodexStdioTurnWithSession(options: CodexStdioTurnOptions, sess
|
||||
};
|
||||
options.abortSignal?.addEventListener("abort", abortTurn, { once: true });
|
||||
const turnIdleTimeoutMs = positiveTimeout(options.timeoutMs);
|
||||
let hardTimeout: NodeJS.Timeout | null = null;
|
||||
let idleTimeout: NodeJS.Timeout | null = null;
|
||||
hardTimeout = setTimeout(() => {
|
||||
if (terminal) return;
|
||||
terminal = { status: "failed", failureKind: "backend-timeout", message: `codex stdio turn hard timed out after ${turnIdleTimeoutMs}ms` };
|
||||
emitEvent({ type: "error", payload: { failureKind: terminal.failureKind, message: terminal.message, phase: "turn:hard-timeout" } });
|
||||
beginInterruptAndStop("hard timeout", "turn:hard-timeout");
|
||||
terminalResolve();
|
||||
}, turnIdleTimeoutMs);
|
||||
const refreshTurnActivity = (): void => {
|
||||
if (terminal) return;
|
||||
if (idleTimeout) clearTimeout(idleTimeout);
|
||||
@@ -508,11 +500,6 @@ async function runCodexStdioTurnWithSession(options: CodexStdioTurnOptions, sess
|
||||
clearTimeout(idleTimeout);
|
||||
idleTimeout = null;
|
||||
};
|
||||
const stopTurnHardTimeout = (): void => {
|
||||
if (!hardTimeout) return;
|
||||
clearTimeout(hardTimeout);
|
||||
hardTimeout = null;
|
||||
};
|
||||
refreshTurnActivity();
|
||||
const stopNotifications = session.addNotificationHandler((message) => {
|
||||
refreshTurnActivity();
|
||||
@@ -611,7 +598,6 @@ async function runCodexStdioTurnWithSession(options: CodexStdioTurnOptions, sess
|
||||
stopNotifications();
|
||||
options.abortSignal?.removeEventListener("abort", abortTurn);
|
||||
stopTurnIdleTimeout();
|
||||
stopTurnHardTimeout();
|
||||
}
|
||||
if (!terminal) terminal = { status: "failed", failureKind: "backend-response-invalid", message: "codex app-server finished without terminal status" };
|
||||
const pendingInterrupt: Promise<void> | null = interruptInFlight as Promise<void> | null;
|
||||
|
||||
@@ -119,7 +119,7 @@ export function renderAipodSpec(record: AipodSpecRecord, input: RenderAipodInput
|
||||
const payload = mergeRecords(spec.payloadDefaults, input.payload);
|
||||
if (typeof input.prompt === "string" && input.prompt.trim().length > 0) payload.prompt = input.prompt;
|
||||
applyModelPayload(payload, spec.model);
|
||||
const sessionRef = renderSessionRef(spec.sessionRef ?? null, input);
|
||||
const sessionRef = renderSessionRef(spec.sessionRef ?? null, input, record, payload);
|
||||
const queueTask = validateCreateQueueTask({
|
||||
tenantId: input.tenantId ?? spec.tenantId ?? "unidesk",
|
||||
projectId: input.projectId ?? spec.projectId ?? "default",
|
||||
@@ -212,10 +212,17 @@ function validateAipodProviderCredential(profile: BackendProfile, policy: Execut
|
||||
}
|
||||
}
|
||||
|
||||
function renderSessionRef(base: SessionRef | null, input: RenderAipodInput): SessionRef | null {
|
||||
function renderSessionRef(base: SessionRef | null, input: RenderAipodInput, record: AipodSpecRecord, payload: JsonRecord): SessionRef | null {
|
||||
if (input.sessionRef !== undefined) return input.sessionRef;
|
||||
if (!input.sessionId) return base;
|
||||
return { ...(base ?? {}), sessionId: input.sessionId };
|
||||
const sessionId = input.sessionId ?? base?.sessionId ?? defaultAipodSessionId(record, input, payload);
|
||||
return { ...(base ?? {}), sessionId, conversationId: base?.conversationId ?? sessionId };
|
||||
}
|
||||
|
||||
function defaultAipodSessionId(record: AipodSpecRecord, input: RenderAipodInput, payload: JsonRecord): string {
|
||||
const seed = input.idempotencyKey ?? `${record.name}:${stableHash({ prompt: input.prompt ?? null, payload })}`;
|
||||
const suffix = stableHash({ aipod: record.name, seed }).slice(0, 24);
|
||||
const name = fileSafeAipodName(record.name).toLowerCase().replace(/[^a-z0-9]+/gu, "_").replace(/^_+|_+$/gu, "") || "aipod";
|
||||
return `sess_${name}_${suffix}`;
|
||||
}
|
||||
|
||||
function validateWorkspaceRef(record: JsonRecord): WorkspaceRef {
|
||||
|
||||
@@ -59,7 +59,7 @@ export function runDiagnosis(input: RunDiagnosisInput): JsonRecord {
|
||||
} : null,
|
||||
runnerJob,
|
||||
session,
|
||||
timeoutBudget: timeoutBudget ? compactRecord(timeoutBudget, ["state", "timeoutMs", "elapsedMs", "remainingMs", "startedAt", "source"]) : null,
|
||||
timeoutBudget: timeoutBudget ? compactRecord(timeoutBudget, ["state", "timeoutKind", "timeoutMs", "elapsedMs", "idleElapsedMs", "remainingMs", "startedAt", "idleStartedAt", "lastActivityAt", "lastActivitySeq", "commandElapsedMs", "runElapsedMs", "source"]) : null,
|
||||
recoveryActions,
|
||||
valuesPrinted: false,
|
||||
};
|
||||
@@ -116,8 +116,8 @@ function recoveryActionsForDiagnosis(input: { run: RunRecord; command: CommandRe
|
||||
actions.push({ action: "poll-events", runId: input.run.id, afterSeq: input.lastSeq, command: `./scripts/agentrun runs events ${input.run.id} --after-seq ${input.lastSeq} --limit 100 --tail-summary`, valuesPrinted: false });
|
||||
const sessionId = stringValue(input.session.sessionId);
|
||||
if (sessionId) actions.push({ action: "resume-session", sessionId, command: `./scripts/agentrun sessions turn ${sessionId} --prompt-stdin`, valuesPrinted: false });
|
||||
else actions.push({ action: "session-unavailable", reason: "sessionRef=null", hint: "当前 run 没有 sessionRef,只能从 run/events/command/runner-job 继续 drill-down 或重新提交任务。", valuesPrinted: false });
|
||||
if (input.runnerLost || input.staleClaimed || input.terminalCommandOpenRun) actions.push({ action: "refresh-queue-or-resubmit", reason: input.failureKind ?? "stale-runner-state", hint: "先用 queue refresh/show 对齐 attempt,再按任务边界决定重新派发或拆分续跑。", valuesPrinted: false });
|
||||
else actions.push({ action: "session-unavailable", reason: "sessionRef=null", hint: "当前 run 没有 sessionRef,管理者只能从 run/events/command/runner-job 读取 trace 后重新提交;这表示该任务不可同 session 续跑。", valuesPrinted: false });
|
||||
if (input.runnerLost || input.staleClaimed || input.terminalCommandOpenRun) actions.push({ action: "refresh-queue-or-resubmit", reason: input.failureKind ?? "stale-runner-state", hint: "先用 queue refresh/show 对齐 attempt;有 sessionId 时继续同一 session,没有 sessionId 才重新派发。", valuesPrinted: false });
|
||||
return actions.slice(0, 6);
|
||||
}
|
||||
|
||||
|
||||
+44
-18
@@ -118,7 +118,7 @@ function livenessSnapshot(run: RunRecord, command: CommandRecord | null, events:
|
||||
const lease = leaseSummary(run, nowMs);
|
||||
const transportDisconnect = latestTransportDisconnect(scopedEvents);
|
||||
const lastActivity = livenessActivitySummary(lastCommandActivity, nowMs);
|
||||
const timeoutBudget = timeoutBudgetSummary(run, command, terminal, failureKind, nowMs);
|
||||
const timeoutBudget = timeoutBudgetSummary(run, command, terminal, failureKind, nowMs, lastActivity);
|
||||
const terminalClassification = terminalClassificationFromEvidence({ terminal, terminalSource: "liveness", failureKind, failureMessage, timeoutBudget, transportDisconnect, lastActivity, command });
|
||||
const phase = livenessPhase({ active, command, lastVisibleActivity, leaseExpired: lease.leaseExpired, transportDisconnect, timeoutBudget, lastActivity });
|
||||
const afterSeq = lastEvent?.seq ?? 0;
|
||||
@@ -178,10 +178,13 @@ function terminalClassificationSummary(input: { terminal: TerminalStatus | null;
|
||||
|
||||
function terminalClassificationFromEvidence(input: TerminalClassificationInput): JsonRecord {
|
||||
const timeoutState = stringJsonValue(input.timeoutBudget.state);
|
||||
const hardTimeout = input.failureKind === "backend-timeout" || timeoutState === "timed-out";
|
||||
const timeoutFailure = input.failureKind === "backend-timeout" || timeoutState === "timed-out";
|
||||
const timeoutKind = stringJsonValue(input.timeoutBudget.timeoutKind) ?? (input.timeoutBudget.hardTimeout === true ? "hard" : "idle");
|
||||
const idleTimeout = timeoutFailure && timeoutKind === "idle";
|
||||
const hardTimeout = timeoutFailure && timeoutKind === "hard";
|
||||
const providerKind = providerFailureCategory(input.failureKind);
|
||||
const cancelled = input.terminal === "cancelled" || input.failureKind === "cancelled";
|
||||
const taskFailure = input.terminal === "failed" && input.failureKind !== null && !hardTimeout && !providerKind && !infrastructureFailureKind(input.failureKind);
|
||||
const taskFailure = input.terminal === "failed" && input.failureKind !== null && !timeoutFailure && !providerKind && !infrastructureFailureKind(input.failureKind);
|
||||
let category = "unknown";
|
||||
let confidence = "low";
|
||||
let providerEvidence = "not-applicable";
|
||||
@@ -200,16 +203,18 @@ function terminalClassificationFromEvidence(input: TerminalClassificationInput):
|
||||
confidence = "high";
|
||||
providerEvidence = "failure-kind";
|
||||
reason = `failureKind ${input.failureKind} is provider-specific`;
|
||||
} else if (hardTimeout && input.transportDisconnect) {
|
||||
category = "execution-hard-timeout";
|
||||
} else if (timeoutFailure && input.transportDisconnect) {
|
||||
category = idleTimeout ? "execution-idle-timeout" : "execution-hard-timeout";
|
||||
confidence = "medium";
|
||||
providerEvidence = "observed-transport-disconnect";
|
||||
reason = "hard timeout is terminal and a backend transport/app-server close event was observed, but existing events do not prove the model provider caused it";
|
||||
} else if (hardTimeout) {
|
||||
category = "execution-hard-timeout";
|
||||
reason = idleTimeout
|
||||
? "idle timeout is terminal and a backend transport/app-server close event was observed, but existing events do not prove the model provider caused it"
|
||||
: "hard timeout is terminal and a backend transport/app-server close event was observed, but existing events do not prove the model provider caused it";
|
||||
} else if (timeoutFailure) {
|
||||
category = idleTimeout ? "execution-idle-timeout" : "execution-hard-timeout";
|
||||
confidence = "high";
|
||||
providerEvidence = "insufficient";
|
||||
reason = "hard timeout is terminal; no provider-specific failure event was recorded";
|
||||
reason = idleTimeout ? "idle timeout is terminal; no provider-specific failure event was recorded" : "hard timeout is terminal; no provider-specific failure event was recorded";
|
||||
} else if (input.terminal === "blocked") {
|
||||
category = "blocked";
|
||||
confidence = "high";
|
||||
@@ -238,6 +243,9 @@ function terminalClassificationFromEvidence(input: TerminalClassificationInput):
|
||||
? "no provider-specific error or disconnect evidence was recorded"
|
||||
: null,
|
||||
hardTimeout,
|
||||
idleTimeout,
|
||||
timeoutFailure,
|
||||
timeoutKind,
|
||||
timeoutState,
|
||||
transportDisconnectObserved: Boolean(input.transportDisconnect),
|
||||
transportDisconnectSeq: input.transportDisconnect?.seq ?? null,
|
||||
@@ -261,29 +269,47 @@ function infrastructureFailureKind(failureKind: FailureKind | null): boolean {
|
||||
return failureKind.startsWith("backend-") || failureKind === "runner-lease-conflict" || failureKind === "infra-failed" || failureKind === "thread-resume-failed";
|
||||
}
|
||||
|
||||
function timeoutBudgetSummary(run: RunRecord, command: CommandRecord | null, terminal: TerminalStatus | null, failureKind: FailureKind | null, nowMs: number): JsonRecord {
|
||||
function timeoutBudgetSummary(run: RunRecord, command: CommandRecord | null, terminal: TerminalStatus | null, failureKind: FailureKind | null, nowMs: number, lastActivity: JsonRecord | null): JsonRecord {
|
||||
const timeoutMs = typeof run.executionPolicy.timeoutMs === "number" && Number.isFinite(run.executionPolicy.timeoutMs) && run.executionPolicy.timeoutMs > 0 ? Math.trunc(run.executionPolicy.timeoutMs) : null;
|
||||
const startedAt = command?.acknowledgedAt ?? command?.createdAt ?? run.updatedAt ?? run.createdAt;
|
||||
const startedMs = Date.parse(startedAt);
|
||||
const elapsedMs = timeoutMs !== null && Number.isFinite(startedMs) ? Math.max(0, nowMs - startedMs) : null;
|
||||
const commandStartedAt = command?.acknowledgedAt ?? command?.createdAt ?? null;
|
||||
const fallbackStartedAt = commandStartedAt ?? run.updatedAt ?? run.createdAt;
|
||||
const fallbackStartedMs = Date.parse(fallbackStartedAt);
|
||||
const lastActivityAt = stringJsonValue(lastActivity?.createdAt);
|
||||
const lastActivityMs = lastActivityAt ? Date.parse(lastActivityAt) : NaN;
|
||||
const idleStartedAt = Number.isFinite(lastActivityMs) ? lastActivityAt : fallbackStartedAt;
|
||||
const idleStartedMs = Number.isFinite(lastActivityMs) ? lastActivityMs : fallbackStartedMs;
|
||||
const elapsedMs = timeoutMs !== null && Number.isFinite(idleStartedMs) ? Math.max(0, nowMs - idleStartedMs) : null;
|
||||
const remainingMs = timeoutMs !== null && elapsedMs !== null ? Math.max(0, timeoutMs - elapsedMs) : null;
|
||||
const approachingThresholdMs = timeoutMs === null ? null : Math.min(120_000, Math.max(10_000, Math.floor(timeoutMs / 10)));
|
||||
const commandStartedMs = commandStartedAt ? Date.parse(commandStartedAt) : NaN;
|
||||
const commandElapsedMs = Number.isFinite(commandStartedMs) ? Math.max(0, nowMs - commandStartedMs) : null;
|
||||
const runStartedMs = Date.parse(run.createdAt);
|
||||
const runElapsedMs = Number.isFinite(runStartedMs) ? Math.max(0, nowMs - runStartedMs) : null;
|
||||
let state = "unknown";
|
||||
if (timeoutMs !== null && elapsedMs !== null) {
|
||||
if (terminal !== null) state = failureKind === "backend-timeout" ? "timed-out" : "terminal";
|
||||
else if (remainingMs === 0) state = "overdue";
|
||||
else if (approachingThresholdMs !== null && remainingMs !== null && remainingMs <= approachingThresholdMs) state = "approaching-hard-timeout";
|
||||
else if (approachingThresholdMs !== null && remainingMs !== null && remainingMs <= approachingThresholdMs) state = "approaching-idle-timeout";
|
||||
else state = "within-budget";
|
||||
}
|
||||
return {
|
||||
timeoutMs,
|
||||
source: "executionPolicy.timeoutMs",
|
||||
startedAt,
|
||||
source: "executionPolicy.timeoutMs idle activity",
|
||||
timeoutKind: "idle",
|
||||
startedAt: idleStartedAt,
|
||||
idleStartedAt,
|
||||
lastActivityAt: Number.isFinite(lastActivityMs) ? lastActivityAt : null,
|
||||
lastActivitySeq: numberJsonValue(lastActivity?.sourceSeq ?? lastActivity?.seq),
|
||||
elapsedMs,
|
||||
idleElapsedMs: elapsedMs,
|
||||
remainingMs,
|
||||
approachingThresholdMs,
|
||||
state,
|
||||
hardTimeout: true,
|
||||
hardTimeout: false,
|
||||
commandStartedAt,
|
||||
commandElapsedMs,
|
||||
runStartedAt: run.createdAt,
|
||||
runElapsedMs,
|
||||
valuesPrinted: false,
|
||||
};
|
||||
}
|
||||
@@ -400,7 +426,7 @@ function recoveryActions(input: { run: RunRecord; command: CommandRecord | null;
|
||||
if (terminal === "failed" || terminal === "blocked" || terminal === "cancelled") {
|
||||
if (command) actions.push({ action: "inspect-result", runId: run.id, commandId: command.id, command: `./scripts/agentrun commands result ${command.id} --run-id ${run.id}`, valuesPrinted: false });
|
||||
if (sessionId) actions.push({ action: "resume-session", sessionId, command: `./scripts/agentrun sessions turn ${sessionId} --prompt-stdin`, valuesPrinted: false });
|
||||
if (failureKind === "backend-timeout") actions.push({ action: "split-task", reason: "backend-timeout", hint: "把大 patch / 长工具链拆成更短 turn 后用同一 session 续跑", failureMessage: failureMessage ? boundedTextSummary(failureMessage, { limitChars: 200 }).text as string : null, valuesPrinted: false });
|
||||
if (failureKind === "backend-timeout") actions.push({ action: "split-task", reason: "backend-timeout", hint: "先由管理者读取 trace/result,总结下一步,再把后续 prompt 发到同一 session;必要时把大 patch / 长工具链拆成更短 turn。", failureMessage: failureMessage ? boundedTextSummary(failureMessage, { limitChars: 200 }).text as string : null, valuesPrinted: false });
|
||||
else actions.push({ action: "retry-or-split", reason: failureKind ?? "terminal", hint: "先读 trace/output 的 detail id,再决定 steer、重跑或拆分", valuesPrinted: false });
|
||||
}
|
||||
return actions;
|
||||
|
||||
@@ -184,10 +184,17 @@ function compactActivity(activity: JsonRecord): JsonRecord {
|
||||
function compactTimeoutBudget(budget: JsonRecord): JsonRecord {
|
||||
return {
|
||||
state: stringJsonValue(budget.state),
|
||||
timeoutKind: stringJsonValue(budget.timeoutKind),
|
||||
timeoutMs: numberJsonValue(budget.timeoutMs),
|
||||
elapsedMs: numberJsonValue(budget.elapsedMs),
|
||||
idleElapsedMs: numberJsonValue(budget.idleElapsedMs),
|
||||
remainingMs: numberJsonValue(budget.remainingMs),
|
||||
startedAt: stringJsonValue(budget.startedAt),
|
||||
idleStartedAt: stringJsonValue(budget.idleStartedAt),
|
||||
lastActivityAt: stringJsonValue(budget.lastActivityAt),
|
||||
lastActivitySeq: numberJsonValue(budget.lastActivitySeq),
|
||||
commandElapsedMs: numberJsonValue(budget.commandElapsedMs),
|
||||
runElapsedMs: numberJsonValue(budget.runElapsedMs),
|
||||
source: stringJsonValue(budget.source),
|
||||
valuesPrinted: false,
|
||||
};
|
||||
@@ -202,6 +209,8 @@ function compactTerminalClassification(record: JsonRecord): JsonRecord {
|
||||
providerInterruptionKnown: record.providerInterruptionKnown === true,
|
||||
providerInterruptionReason: boundedJsonString(record.providerInterruptionReason, 240),
|
||||
hardTimeout: record.hardTimeout === true,
|
||||
idleTimeout: record.idleTimeout === true,
|
||||
timeoutKind: stringJsonValue(record.timeoutKind),
|
||||
transportDisconnectObserved: record.transportDisconnectObserved === true,
|
||||
transportDisconnectSeq: numberJsonValue(record.transportDisconnectSeq),
|
||||
reason: boundedJsonString(record.reason, 240),
|
||||
|
||||
@@ -188,7 +188,10 @@ function resourceMaterializationEnv(env: NodeJS.ProcessEnv, runId: string, attem
|
||||
function resourceEnvForMaterialized(env: NodeJS.ProcessEnv, materialized: Awaited<ReturnType<typeof materializeResourceBundle>>): NodeJS.ProcessEnv | undefined {
|
||||
if (!materialized) return undefined;
|
||||
let next: NodeJS.ProcessEnv | undefined;
|
||||
if (materialized.binPath) next = prependPath(env, materialized.binPath);
|
||||
if (materialized.binPath) {
|
||||
const withPath = prependPath(env, materialized.binPath);
|
||||
next = { ...withPath, AGENTRUN_RESOURCE_BIN_PATH: withPath.AGENTRUN_RESOURCE_BIN_PATH ?? materialized.binPath };
|
||||
}
|
||||
if (materialized.skillsDir) {
|
||||
const base = next ?? { ...env };
|
||||
const previous = base.AGENTRUN_SKILLS_DIRS;
|
||||
|
||||
@@ -182,7 +182,7 @@ const selfTest: SelfTestCase = async (context) => {
|
||||
assert.equal(liveResult.terminalStatus, "completed", "slow live tool event turn should complete");
|
||||
|
||||
await runInterruptBeforeTurnStartResponseCase({ client, managerUrl: server.baseUrl, context });
|
||||
await runHardTimeoutDuringToolProgressCase({ client, managerUrl: server.baseUrl, context });
|
||||
await runToolProgressRefreshesIdleTimeoutCase({ client, managerUrl: server.baseUrl, context });
|
||||
|
||||
const noisy = await createRunWithCommand(client, context, "hello noisy reasoning", "selftest-noisy-reasoning-events", 15_000);
|
||||
const noisyResult = await runOnce({ managerUrl: server.baseUrl, runId: noisy.runId, codexCommand: context.fakeCodexCommand, codexArgs: context.fakeCodexArgs, codexHome: context.codexHome, env: { CODEX_HOME: context.codexHome, AGENTRUN_FAKE_CODEX_MODE: "noisy-reasoning-events" }, oneShot: true }) as JsonRecord;
|
||||
@@ -245,7 +245,7 @@ const selfTest: SelfTestCase = async (context) => {
|
||||
await runSessionStorageSubdirCase({ client, managerUrl: server.baseUrl, context });
|
||||
await runSessionStorageNoSecretLeakCase({ client, managerUrl: server.baseUrl, context });
|
||||
|
||||
return { name: "codex-stdio", tests: ["runner-lease-heartbeat", "runner-lease-conflict-recovery", "codex-stdio-fake-turn", "codex-stdio-k8s-sandbox-override", "codex-stdio-projected-writable-home", "codex-stdio-deepseek-profile-fake-turn", "codex-stdio-dsflash-go-profile-fake-turn", "codex-stdio-dsflash-go-config-metadata", "codex-stdio-minimax-m3-profile-fake-turn", "codex-stdio-deepseek-missing-secret-no-fallback", "codex-stdio-minimax-m3-missing-secret-no-fallback", "codex-stdio-config-model-authoritative", "codex-stdio-explicit-model-forwarded", "codex-stdio-final-agent-message-only", "codex-stdio-web-search-progress", "codex-stdio-stale-thread-resume-failed", "codex-stdio-live-tool-events", "codex-stdio-interrupt-before-turn-start-response", "codex-stdio-hard-timeout-during-tool-progress", "codex-stdio-noisy-reasoning-suppression", "codex-stdio-missing-turn-result", "codex-stdio-provider-auth-failed", "codex-stdio-provider-rate-limited", "codex-stdio-provider-invalid-tool-call", "codex-stdio-provider-compact-unsupported", "codex-stdio-provider-stream-disconnected", "codex-stdio-provider-503-rpc-error", "codex-stdio-provider-503-terminal", "codex-stdio-provider-unavailable", "codex-stdio-provider-503-retry-event", "codex-stdio-provider-refused-retry-recovered", "codex-stdio-invalid-json", "codex-stdio-timeout", "codex-stdio-idle-timeout-progress-refresh", "codex-stdio-command-failure-keeps-run-open", "codex-stdio-secret-unavailable", "codex-stdio-spawn-failure"] };
|
||||
return { name: "codex-stdio", tests: ["runner-lease-heartbeat", "runner-lease-conflict-recovery", "codex-stdio-fake-turn", "codex-stdio-k8s-sandbox-override", "codex-stdio-projected-writable-home", "codex-stdio-deepseek-profile-fake-turn", "codex-stdio-dsflash-go-profile-fake-turn", "codex-stdio-dsflash-go-config-metadata", "codex-stdio-minimax-m3-profile-fake-turn", "codex-stdio-deepseek-missing-secret-no-fallback", "codex-stdio-minimax-m3-missing-secret-no-fallback", "codex-stdio-config-model-authoritative", "codex-stdio-explicit-model-forwarded", "codex-stdio-final-agent-message-only", "codex-stdio-web-search-progress", "codex-stdio-stale-thread-resume-failed", "codex-stdio-live-tool-events", "codex-stdio-interrupt-before-turn-start-response", "codex-stdio-tool-progress-refreshes-idle-timeout", "codex-stdio-noisy-reasoning-suppression", "codex-stdio-missing-turn-result", "codex-stdio-provider-auth-failed", "codex-stdio-provider-rate-limited", "codex-stdio-provider-invalid-tool-call", "codex-stdio-provider-compact-unsupported", "codex-stdio-provider-stream-disconnected", "codex-stdio-provider-503-rpc-error", "codex-stdio-provider-503-terminal", "codex-stdio-provider-unavailable", "codex-stdio-provider-503-retry-event", "codex-stdio-provider-refused-retry-recovered", "codex-stdio-invalid-json", "codex-stdio-timeout", "codex-stdio-idle-timeout-progress-refresh", "codex-stdio-command-failure-keeps-run-open", "codex-stdio-secret-unavailable", "codex-stdio-spawn-failure"] };
|
||||
} finally {
|
||||
await new Promise<void>((resolve) => server.server.close(() => resolve()));
|
||||
}
|
||||
@@ -350,26 +350,24 @@ async function runInterruptBeforeTurnStartResponseCase(options: { client: Manage
|
||||
assertNoSecretLeak({ result, events });
|
||||
}
|
||||
|
||||
async function runHardTimeoutDuringToolProgressCase(options: { client: ManagerClient; managerUrl: string; context: SelfTestContext }): Promise<void> {
|
||||
const item = await createRunWithCommand(options.client, options.context, "hard timeout during tool progress", "selftest-hard-timeout-tool-progress", 120);
|
||||
async function runToolProgressRefreshesIdleTimeoutCase(options: { client: ManagerClient; managerUrl: string; context: SelfTestContext }): Promise<void> {
|
||||
const item = await createRunWithCommand(options.client, options.context, "tool progress refreshes idle timeout", "selftest-tool-progress-refreshes-idle", 120);
|
||||
const result = await runOnce({
|
||||
managerUrl: options.managerUrl,
|
||||
runId: item.runId,
|
||||
codexCommand: options.context.fakeCodexCommand,
|
||||
codexArgs: options.context.fakeCodexArgs,
|
||||
codexHome: options.context.codexHome,
|
||||
env: { CODEX_HOME: options.context.codexHome, AGENTRUN_FAKE_CODEX_MODE: "hard-timeout-tool-progress" },
|
||||
env: { CODEX_HOME: options.context.codexHome, AGENTRUN_FAKE_CODEX_MODE: "tool-progress-refreshes-idle" },
|
||||
oneShot: true,
|
||||
}) as JsonRecord;
|
||||
assert.equal(result.terminalStatus, "failed", "hard timeout should fail even while the tool keeps producing progress");
|
||||
assert.equal(result.failureKind, "backend-timeout");
|
||||
assert.equal(result.terminalStatus, "completed", "tool progress should refresh idle timeout until terminal completion");
|
||||
assert.equal(result.failureKind, null);
|
||||
const events = await options.client.get(`/api/v1/runs/${item.runId}/events?afterSeq=0&limit=100`) as { items?: Array<{ type: string; payload: unknown }> };
|
||||
assert.ok(events.items?.some((event) => event.type === "command_output" && String(eventPayload(event).text ?? "").includes("progress")), "progress output should be visible before hard timeout");
|
||||
assert.ok(events.items?.some((event) => event.type === "error" && eventPayload(event).phase === "turn:hard-timeout"), "hard timeout should be recorded as an error event");
|
||||
assert.ok(events.items?.some((event) => event.type === "backend_status" && eventPayload(event).phase === "turn-interrupt-requested"), "timeout should request backend interrupt before process teardown");
|
||||
assert.ok(events.items?.some((event) => event.type === "backend_status" && eventPayload(event).phase === "turn/interrupt:completed"), "timeout interrupt result should be visible");
|
||||
assert.ok(events.items?.some((event) => event.type === "command_output" && String(eventPayload(event).text ?? "").includes("progress 3")), "progress output should stay visible while idle timeout is refreshed");
|
||||
assert.equal(events.items?.some((event) => event.type === "error" && eventPayload(event).failureKind === "backend-timeout"), false, "progressing tool output must not fail on wall-clock elapsed time");
|
||||
const command = await options.client.get(`/api/v1/runs/${item.runId}/commands/${item.commandId}`) as { state?: string };
|
||||
assert.equal(command.state, "failed", "hard timed out command should be failed");
|
||||
assert.equal(command.state, "completed", "command should complete after progress-delayed terminal status");
|
||||
assertNoSecretLeak({ result, events });
|
||||
}
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ console.log(JSON.stringify({ apiVersion: manifest.apiVersion, kind: manifest.kin
|
||||
const resultBundleTargets = (((resultEnvelope.resourceBundleRef as JsonRecord).bundles as JsonRecord).items as JsonRecord[]).map((item) => item.targetPath);
|
||||
assert.deepEqual(resultBundleTargets, ["tools", ".agents/skills"]);
|
||||
const materialized = ((resultEnvelope.resourceBundleRef as JsonRecord).materialized as JsonRecord);
|
||||
assert.deepEqual(((materialized.tools as JsonRecord).names), ["hwpod"]);
|
||||
assert.deepEqual(((materialized.tools as JsonRecord).names), ["apply_patch", "hwpod", "tran", "trans"]);
|
||||
assert.equal(((materialized.tools as JsonRecord).installed), true);
|
||||
assert.deepEqual(((materialized.skillDirs as JsonRecord).names), ["dad-dev", "hwpod-cli", "hwpod-ctl"]);
|
||||
const requiredSkillItems = ((materialized.requiredSkills as JsonRecord).items as JsonRecord[]);
|
||||
@@ -275,6 +275,9 @@ async function createLocalGitBundle(context: SelfTestContext, repoName = "bundle
|
||||
await mkdir(path.join(repo, "tools"), { recursive: true });
|
||||
await mkdir(path.join(repo, "tools", "src"), { recursive: true });
|
||||
await writeFile(path.join(repo, "tools", "hwpod"), "#!/usr/bin/env sh\nexec bun \"$(dirname \"$0\")/hwpod-cli.ts\" \"$@\"\n", "utf8");
|
||||
await writeFile(path.join(repo, "tools", "tran"), "#!/usr/bin/env sh\necho tran-selftest\n", "utf8");
|
||||
await writeFile(path.join(repo, "tools", "trans"), "#!/usr/bin/env sh\necho trans-selftest\n", "utf8");
|
||||
await writeFile(path.join(repo, "tools", "apply_patch"), "#!/usr/bin/env sh\necho apply-patch-selftest\n", "utf8");
|
||||
await writeFile(path.join(repo, "tools", "hwpod-cli.ts"), "import { hwpodSelftestName } from './src/hwpod-harness-lib.ts';\nconsole.log(JSON.stringify({ ok: true, cli: hwpodSelftestName(), argv: process.argv.slice(2) }));\n", "utf8");
|
||||
await writeFile(path.join(repo, "tools", "src", "hwpod-harness-lib.ts"), "export function hwpodSelftestName() { return 'hwpod-selftest'; }\n", "utf8");
|
||||
await writeFile(path.join(repo, "tools", "hwpod-node.test.ts"), "console.log('test-only source file without shebang');\n", "utf8");
|
||||
@@ -315,7 +318,7 @@ async function createLocalGitBundle(context: SelfTestContext, repoName = "bundle
|
||||
"Use hwpod-ctl for HWPOD runtime inspection and control-plane state.",
|
||||
].join("\n"), "utf8");
|
||||
await writeFile(path.join(repo, "skills", "hwpod-ctl", "scripts", "hwpod-ctl.mjs"), "console.log(JSON.stringify({ ok: true, cli: 'hwpod-ctl-skill-selftest' }));\n", "utf8");
|
||||
await execFile("git", ["add", "README.md", "tools/hwpod", "tools/hwpod-cli.ts", "tools/src/hwpod-harness-lib.ts", "tools/hwpod-node.test.ts", "internal/agent/prompts/hwlab-v02-runtime.md", "skills/dad-dev/SKILL.md", "skills/hwpod-cli/SKILL.md", "skills/hwpod-cli/scripts/hwpod-cli.mjs", "skills/hwpod-ctl/SKILL.md", "skills/hwpod-ctl/scripts/hwpod-ctl.mjs"], { cwd: repo });
|
||||
await execFile("git", ["add", "README.md", "tools/hwpod", "tools/tran", "tools/trans", "tools/apply_patch", "tools/hwpod-cli.ts", "tools/src/hwpod-harness-lib.ts", "tools/hwpod-node.test.ts", "internal/agent/prompts/hwlab-v02-runtime.md", "skills/dad-dev/SKILL.md", "skills/hwpod-cli/SKILL.md", "skills/hwpod-cli/scripts/hwpod-cli.mjs", "skills/hwpod-ctl/SKILL.md", "skills/hwpod-ctl/scripts/hwpod-ctl.mjs"], { cwd: repo });
|
||||
await execFile("git", ["-c", "user.email=selftest@example.invalid", "-c", "user.name=AgentRun SelfTest", "commit", "-m", "bundle selftest"], { cwd: repo });
|
||||
const { stdout } = await execFile("git", ["rev-parse", "HEAD"], { cwd: repo });
|
||||
return { repoUrl: repo, commitId: stdout.trim(), requiredSkills: [{ name: "dad-dev" }] };
|
||||
|
||||
@@ -33,18 +33,21 @@ const selfTest: SelfTestCase = async (context: SelfTestContext) => {
|
||||
await sleep(36);
|
||||
const inactiveLive = (await commandResult(client, inactive)).liveness as JsonRecord;
|
||||
assert.equal(inactiveLive.phase, "runner-stdio-inactive");
|
||||
assert.ok(["approaching-hard-timeout", "overdue"].includes(String((inactiveLive.timeoutBudget as JsonRecord).state)));
|
||||
assert.ok(["approaching-idle-timeout", "overdue"].includes(String((inactiveLive.timeoutBudget as JsonRecord).state)));
|
||||
assert.equal(((inactiveLive.timeoutBudget as JsonRecord).timeoutKind), "idle");
|
||||
|
||||
const terminal = await createActiveRun(client, context, "timeout-liveness-terminal", 50);
|
||||
await client.post(`/api/v1/runs/${terminal.runId}/events`, { type: "error", payload: { commandId: terminal.commandId, failureKind: "backend-timeout", phase: "turn:hard-timeout", message: "codex stdio turn hard timed out after 50ms" } });
|
||||
await client.patch(`/api/v1/commands/${terminal.commandId}/status`, { terminalStatus: "failed", failureKind: "backend-timeout", failureMessage: "codex stdio turn hard timed out after 50ms" });
|
||||
await client.patch(`/api/v1/runs/${terminal.runId}/status`, { terminalStatus: "failed", failureKind: "backend-timeout", failureMessage: "codex stdio turn hard timed out after 50ms" });
|
||||
await client.post(`/api/v1/runs/${terminal.runId}/events`, { type: "error", payload: { commandId: terminal.commandId, failureKind: "backend-timeout", phase: "turn:idle-timeout", message: "codex stdio turn idle timed out after 50ms without activity" } });
|
||||
await client.patch(`/api/v1/commands/${terminal.commandId}/status`, { terminalStatus: "failed", failureKind: "backend-timeout", failureMessage: "codex stdio turn idle timed out after 50ms without activity" });
|
||||
await client.patch(`/api/v1/runs/${terminal.runId}/status`, { terminalStatus: "failed", failureKind: "backend-timeout", failureMessage: "codex stdio turn idle timed out after 50ms without activity" });
|
||||
const terminalResult = await commandResult(client, terminal);
|
||||
const terminalLive = terminalResult.liveness as JsonRecord;
|
||||
assert.equal(terminalResult.terminalStatus, "failed");
|
||||
assert.equal(terminalLive.phase, "terminal");
|
||||
assert.equal(((terminalLive.timeoutBudget as JsonRecord).state), "timed-out");
|
||||
assert.equal(((terminalResult.terminalClassification as JsonRecord).category), "execution-hard-timeout");
|
||||
assert.equal(((terminalLive.timeoutBudget as JsonRecord).timeoutKind), "idle");
|
||||
assert.equal(((terminalResult.terminalClassification as JsonRecord).category), "execution-idle-timeout");
|
||||
assert.equal(((terminalResult.terminalClassification as JsonRecord).idleTimeout), true);
|
||||
assert.equal(((terminalResult.terminalClassification as JsonRecord).providerEvidence), "insufficient");
|
||||
assert.equal(((terminalLive.terminalClassification as JsonRecord).providerInterruptionKnown), false);
|
||||
assert.ok((terminalLive.recoveryActions as JsonRecord[]).some((action) => action.action === "resume-session"));
|
||||
@@ -52,13 +55,13 @@ const selfTest: SelfTestCase = async (context: SelfTestContext) => {
|
||||
|
||||
const noSession = await createActiveRun(client, context, "timeout-liveness-no-session", 50, { session: false });
|
||||
await client.post(`/api/v1/runs/${noSession.runId}/events`, { type: "backend_status", payload: { commandId: noSession.commandId, phase: "codex-app-server-closed", message: "stdio closed before terminal result" } });
|
||||
await client.post(`/api/v1/runs/${noSession.runId}/events`, { type: "terminal_status", payload: { commandId: noSession.commandId, terminalStatus: "failed", failureKind: "backend-timeout", message: "codex stdio turn hard timed out after 50ms" } });
|
||||
await client.patch(`/api/v1/commands/${noSession.commandId}/status`, { terminalStatus: "failed", failureKind: "backend-timeout", failureMessage: "codex stdio turn hard timed out after 50ms" });
|
||||
await client.post(`/api/v1/runs/${noSession.runId}/events`, { type: "terminal_status", payload: { commandId: noSession.commandId, terminalStatus: "failed", failureKind: "backend-timeout", message: "codex stdio turn idle timed out after 50ms without activity" } });
|
||||
await client.patch(`/api/v1/commands/${noSession.commandId}/status`, { terminalStatus: "failed", failureKind: "backend-timeout", failureMessage: "codex stdio turn idle timed out after 50ms without activity" });
|
||||
const noSessionResult = await commandResult(client, noSession);
|
||||
const noSessionLive = noSessionResult.liveness as JsonRecord;
|
||||
const noSessionClassification = noSessionResult.terminalClassification as JsonRecord;
|
||||
const noSessionDiagnosis = noSessionResult.diagnosis as JsonRecord;
|
||||
assert.equal(noSessionClassification.category, "execution-hard-timeout");
|
||||
assert.equal(noSessionClassification.category, "execution-idle-timeout");
|
||||
assert.equal(noSessionClassification.providerEvidence, "observed-transport-disconnect");
|
||||
assert.equal(noSessionClassification.providerInterruptionKnown, false);
|
||||
assert.equal(noSessionDiagnosis.category, "terminal-command-open-run");
|
||||
@@ -111,12 +114,13 @@ const selfTest: SelfTestCase = async (context: SelfTestContext) => {
|
||||
const commander = await client.get("/api/v1/queue/commander?queue=timeout-liveness&readerId=timeout-liveness") as JsonRecord;
|
||||
const commanderItem = ((commander.items as JsonRecord[]) ?? []).find((item) => item.id === task.id) as JsonRecord;
|
||||
assert.equal(((commanderItem.supervisor as JsonRecord).phase), "terminal");
|
||||
assert.equal((((commanderItem.supervisor as JsonRecord).diagnosis as JsonRecord).category), "execution-hard-timeout");
|
||||
assert.equal((((commanderItem.supervisor as JsonRecord).diagnosis as JsonRecord).category), "execution-idle-timeout");
|
||||
assert.equal((((commanderItem.supervisor as JsonRecord).timeoutBudget as JsonRecord).state), "timed-out");
|
||||
assert.equal((((commanderItem.supervisor as JsonRecord).timeoutBudget as JsonRecord).timeoutKind), "idle");
|
||||
const commanderSummary = summarizeQueueCommanderSnapshot(commander, { limit: 5 });
|
||||
const summaryItem = ((commanderSummary.items as JsonRecord[]) ?? []).find((item) => item.id === task.id) as JsonRecord;
|
||||
assert.equal(((summaryItem.supervisor as JsonRecord).phase), "terminal");
|
||||
assert.equal((((summaryItem.supervisor as JsonRecord).terminalClassification as JsonRecord).category), "execution-hard-timeout");
|
||||
assert.equal((((summaryItem.supervisor as JsonRecord).terminalClassification as JsonRecord).category), "execution-idle-timeout");
|
||||
assert.equal((((summaryItem.supervisor as JsonRecord).terminalClassification as JsonRecord).providerEvidence), "insufficient");
|
||||
assert.equal(JSON.stringify(commanderSummary).includes("hwpod workspace apply-patch"), false, "commander summary must stay compact and avoid dumping command bodies");
|
||||
assert.equal(JSON.stringify(summaryItem).includes("fullRecordBytes"), false, "commander item must not add bookkeeping noise");
|
||||
|
||||
@@ -38,6 +38,9 @@ const selfTest: SelfTestCase = async (context) => {
|
||||
assert.equal(task.backendProfile, "sub2api");
|
||||
assert.equal(task.providerId, "G14");
|
||||
assert.equal(task.idempotencyKey, "selftest-aipod-artificer");
|
||||
const sessionRef = task.sessionRef as JsonRecord;
|
||||
assert.match(String(sessionRef.sessionId), /^sess_artificer_[a-f0-9]{24}$/u, "Artificer queue task should default to a resumable session");
|
||||
assert.equal(sessionRef.conversationId, sessionRef.sessionId, "default Artificer conversation should match the generated session");
|
||||
const taskImageRef = ((task.metadata as JsonRecord).aipodImageRef as JsonRecord);
|
||||
assert.equal(taskImageRef.kind, "env-image-dockerfile");
|
||||
assert.equal(taskImageRef.valuesPrinted, false);
|
||||
|
||||
@@ -262,17 +262,21 @@ for await (const line of rl) {
|
||||
activeSteerTurn = { id: turn.id, completed: false, timer: setTimeout(() => undefined, 60_000) };
|
||||
continue;
|
||||
}
|
||||
if (mode === "hard-timeout-tool-progress") {
|
||||
if (mode === "tool-progress-refreshes-idle") {
|
||||
turnCounter += 1;
|
||||
const turn = { id: `turn_selftest_${turnCounter}`, status: "running" };
|
||||
notify("turn/started", { turn });
|
||||
notify("item/started", { item: { id: "tool_hard_timeout", type: "commandExecution", command: "hwpod cmd long-running", status: "running", processId: process.pid } });
|
||||
const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" };
|
||||
notify("turn/started", { turn: { id: turn.id, status: "running" } });
|
||||
notify("item/started", { item: { id: "tool_idle_refresh", type: "commandExecution", command: "hwpod cmd long-running", status: "running", processId: process.pid } });
|
||||
respond(message.id, { turn });
|
||||
activeSteerTurn = { id: turn.id, completed: false, timer: null };
|
||||
let ticks = 0;
|
||||
activeSteerTurn.timer = setInterval(() => {
|
||||
const timer = setInterval(() => {
|
||||
ticks += 1;
|
||||
notify("item/commandExecution/outputDelta", { itemId: "tool_hard_timeout", delta: `progress ${ticks}\n` });
|
||||
notify("item/commandExecution/outputDelta", { itemId: "tool_idle_refresh", delta: `progress ${ticks}\n` });
|
||||
if (ticks >= 4) {
|
||||
clearInterval(timer);
|
||||
notify("item/completed", { item: { id: "tool_idle_refresh", type: "commandExecution", command: "hwpod cmd long-running", status: "completed" } });
|
||||
notify("turn/completed", { turn });
|
||||
}
|
||||
}, 25);
|
||||
continue;
|
||||
}
|
||||
@@ -329,7 +333,7 @@ for await (const line of rl) {
|
||||
continue;
|
||||
}
|
||||
if (message.method === "turn/interrupt") {
|
||||
if ((mode !== "tool-hangs-before-turn-start-response" && mode !== "hard-timeout-tool-progress" && mode !== "steer-waits") || !activeSteerTurn) {
|
||||
if ((mode !== "tool-hangs-before-turn-start-response" && mode !== "steer-waits") || !activeSteerTurn) {
|
||||
respond(message.id, null, { code: -32000, message: "no active fake turn for interrupt" });
|
||||
continue;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user