28c329e434
# Conflicts: # src/selftest/cases/30-codex-stdio.ts
352 lines
18 KiB
TypeScript
352 lines
18 KiB
TypeScript
import * as readline from "node:readline";
|
|
import { appendFileSync } from "node:fs";
|
|
|
|
const rl = readline.createInterface({ input: process.stdin, crlfDelay: Infinity });
|
|
const mode = process.env.AGENTRUN_FAKE_CODEX_MODE ?? "success";
|
|
if (process.env.AGENTRUN_FAKE_CODEX_START_FILE) appendFileSync(process.env.AGENTRUN_FAKE_CODEX_START_FILE, `${process.pid}\n`);
|
|
let threadCounter = 0;
|
|
let turnCounter = 0;
|
|
let observedThreadModel = false;
|
|
let activeSteerTurn: { id: string; completed: boolean; timer: NodeJS.Timeout | null } | null = null;
|
|
|
|
for await (const line of rl) {
|
|
const trimmed = String(line).trim();
|
|
if (trimmed.length === 0) continue;
|
|
const message = JSON.parse(trimmed) as { id?: number; method?: string; params?: Record<string, unknown> };
|
|
if (message.method === "initialize") {
|
|
if (mode === "invalid-json") {
|
|
process.stdout.write('{"token":"test-token-material"\n');
|
|
process.exit(0);
|
|
}
|
|
respond(message.id, { serverInfo: { name: "fake-codex-app-server", version: "self-test" } });
|
|
continue;
|
|
}
|
|
if (message.method === "thread/start") {
|
|
observedThreadModel = Object.hasOwn(message.params ?? {}, "model");
|
|
if (mode === "require-danger-sandbox" && message.params?.sandbox !== "danger-full-access") {
|
|
respond(message.id, null, { code: -32000, message: `thread/start expected danger-full-access sandbox, got ${String(message.params?.sandbox ?? "missing")}` });
|
|
continue;
|
|
}
|
|
if (mode === "reject-unexpected-model" && observedThreadModel) {
|
|
respond(message.id, null, { code: -32000, message: "thread/start unexpectedly included model" });
|
|
continue;
|
|
}
|
|
if (mode === "require-explicit-model" && message.params?.model !== "gpt-5.5") {
|
|
respond(message.id, null, { code: -32000, message: "thread/start did not include expected model" });
|
|
continue;
|
|
}
|
|
threadCounter += 1;
|
|
const thread = { id: `thread_selftest_${threadCounter}` };
|
|
notify("thread/started", { thread });
|
|
respond(message.id, { thread });
|
|
continue;
|
|
}
|
|
if (message.method === "thread/resume") {
|
|
observedThreadModel = Object.hasOwn(message.params ?? {}, "model");
|
|
if (mode === "require-danger-sandbox" && message.params?.sandbox !== "danger-full-access") {
|
|
respond(message.id, null, { code: -32000, message: `thread/resume expected danger-full-access sandbox, got ${String(message.params?.sandbox ?? "missing")}` });
|
|
continue;
|
|
}
|
|
if (mode === "resume-no-rollout") {
|
|
respond(message.id, null, { code: -32000, message: `no rollout found for thread id ${String(message.params?.threadId ?? "unknown")}` });
|
|
continue;
|
|
}
|
|
if (mode === "reject-unexpected-model" && observedThreadModel) {
|
|
respond(message.id, null, { code: -32000, message: "thread/resume unexpectedly included model" });
|
|
continue;
|
|
}
|
|
if (mode === "require-explicit-model" && message.params?.model !== "gpt-5.5") {
|
|
respond(message.id, null, { code: -32000, message: "thread/resume did not include expected model" });
|
|
continue;
|
|
}
|
|
const thread = { id: String(message.params?.threadId ?? "thread_selftest_resumed") };
|
|
notify("thread/started", { thread });
|
|
respond(message.id, { thread });
|
|
continue;
|
|
}
|
|
if (message.method === "turn/start") {
|
|
if (process.env.AGENTRUN_FAKE_CODEX_TURN_INPUT_FILE) appendFileSync(process.env.AGENTRUN_FAKE_CODEX_TURN_INPUT_FILE, `${JSON.stringify({ threadId: message.params?.threadId ?? null, input: message.params?.input ?? null })}\n`);
|
|
if (mode === "reject-unexpected-model" && (observedThreadModel || Object.hasOwn(message.params ?? {}, "model"))) {
|
|
respond(message.id, null, { code: -32000, message: "turn/start unexpectedly included model" });
|
|
continue;
|
|
}
|
|
if (mode === "require-explicit-model" && message.params?.model !== "gpt-5.5") {
|
|
respond(message.id, null, { code: -32000, message: "turn/start did not include expected model" });
|
|
continue;
|
|
}
|
|
if (mode === "missing-turn-result") {
|
|
respond(message.id, {});
|
|
continue;
|
|
}
|
|
if (mode === "provider-503-rpc-error") {
|
|
respond(message.id, null, { code: -32000, message: "responseStreamDisconnected: HTTP 503 Service Unavailable from provider" });
|
|
continue;
|
|
}
|
|
if (mode === "provider-stream-disconnected-rpc-error") {
|
|
respond(message.id, null, { code: -32000, message: "stream disconnected before completion: error sending request for url (http://138.2.51.180:8083/responses)" });
|
|
continue;
|
|
}
|
|
if (mode === "provider-401-rpc-error") {
|
|
respond(message.id, null, { code: -32000, message: "HTTP 401 Unauthorized: invalid api key" });
|
|
continue;
|
|
}
|
|
if (mode === "missing-terminal") {
|
|
turnCounter += 1;
|
|
const turn = { id: `turn_selftest_${turnCounter}`, status: "running" };
|
|
notify("turn/started", { turn });
|
|
respond(message.id, { turn });
|
|
continue;
|
|
}
|
|
if (mode === "tool-completes-without-terminal") {
|
|
turnCounter += 1;
|
|
const turn = { id: `turn_selftest_${turnCounter}`, status: "running" };
|
|
notify("turn/started", { turn });
|
|
notify("item/started", { item: { id: "tool_idle_after_tool", type: "commandExecution", command: "echo idle-after-tool" } });
|
|
notify("item/completed", { item: { id: "tool_idle_after_tool", type: "commandExecution", command: "echo idle-after-tool", status: "completed", exitCode: 0 } });
|
|
respond(message.id, { turn });
|
|
continue;
|
|
}
|
|
if (mode === "slow-progress-before-terminal") {
|
|
turnCounter += 1;
|
|
const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" };
|
|
notify("turn/started", { turn: { id: turn.id, status: "running" } });
|
|
respond(message.id, { turn: { id: turn.id, status: "running" } });
|
|
setTimeout(() => notify("item/agentMessage/delta", { itemId: "msg_slow_progress", delta: "still working" }), 40);
|
|
setTimeout(() => {
|
|
notify("item/completed", { item: { id: "msg_slow_progress", type: "agentMessage", text: "slow progress final" } });
|
|
notify("turn/completed", { turn });
|
|
}, 90);
|
|
continue;
|
|
}
|
|
if (mode === "provider-503-terminal") {
|
|
turnCounter += 1;
|
|
const turn = { id: `turn_selftest_${turnCounter}`, status: "failed", error: { message: "HTTP 503 Service Unavailable" } };
|
|
notify("turn/started", { turn: { id: turn.id, status: "running" } });
|
|
notify("turn/completed", { turn });
|
|
respond(message.id, { turn });
|
|
continue;
|
|
}
|
|
if (mode === "provider-unavailable-terminal") {
|
|
turnCounter += 1;
|
|
const turn = { id: `turn_selftest_${turnCounter}`, status: "failed", error: { message: "provider is temporarily unavailable" } };
|
|
notify("turn/started", { turn: { id: turn.id, status: "running" } });
|
|
notify("turn/completed", { turn });
|
|
respond(message.id, { turn });
|
|
continue;
|
|
}
|
|
if (mode === "provider-429-terminal") {
|
|
turnCounter += 1;
|
|
const turn = { id: `turn_selftest_${turnCounter}`, status: "failed", error: { message: "HTTP 429 Too Many Requests: rate limit exceeded" } };
|
|
notify("turn/started", { turn: { id: turn.id, status: "running" } });
|
|
notify("turn/completed", { turn });
|
|
respond(message.id, { turn });
|
|
continue;
|
|
}
|
|
if (mode === "provider-invalid-tool-call") {
|
|
turnCounter += 1;
|
|
const turn = {
|
|
id: `turn_selftest_${turnCounter}`,
|
|
status: "failed",
|
|
error: {
|
|
message: "invalid params, invalid function arguments json string, tool_call_id: call_function_selftest_2 (2013)",
|
|
code: "invalid_prompt",
|
|
},
|
|
};
|
|
notify("turn/started", { turn: { id: turn.id, status: "running" } });
|
|
notify("turn/completed", { turn });
|
|
respond(message.id, { turn });
|
|
continue;
|
|
}
|
|
if (mode === "provider-compact-404-terminal") {
|
|
turnCounter += 1;
|
|
const turn = {
|
|
id: `turn_selftest_${turnCounter}`,
|
|
status: "failed",
|
|
error: {
|
|
message: "Error running remote compact task: unexpected status 404 Not Found: 404 page not found, url: http://hwlab-deepseek-proxy.hwlab-v02.svc.cluster.local:4000/v1/responses/compact",
|
|
},
|
|
};
|
|
notify("turn/started", { turn: { id: turn.id, status: "running" } });
|
|
notify("turn/completed", { turn });
|
|
respond(message.id, { turn });
|
|
continue;
|
|
}
|
|
if (mode === "provider-503-retry-event") {
|
|
turnCounter += 1;
|
|
const turn = {
|
|
id: `turn_selftest_${turnCounter}`,
|
|
status: "failed",
|
|
error: {
|
|
message: "unexpected status 503 Service Unavailable: Service temporarily unavailable",
|
|
codexErrorInfo: { responseStreamDisconnected: { httpStatusCode: 503 } },
|
|
},
|
|
};
|
|
notify("turn/started", { turn: { id: turn.id, status: "running" } });
|
|
notify("error", {
|
|
willRetry: true,
|
|
error: {
|
|
message: "Reconnecting... 1/5",
|
|
codexErrorInfo: { responseStreamDisconnected: { httpStatusCode: 503 } },
|
|
additionalDetails: "unexpected status 503 Service Unavailable: Service temporarily unavailable, url: https://hyueapi.com/responses",
|
|
},
|
|
});
|
|
notify("turn/completed", { turn });
|
|
respond(message.id, { turn });
|
|
continue;
|
|
}
|
|
if (mode === "multi-agent-message-final") {
|
|
turnCounter += 1;
|
|
const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" };
|
|
notify("turn/started", { turn });
|
|
notify("item/agentMessage/delta", { itemId: "msg_progress", delta: "I am checking the workspace. " });
|
|
notify("item/completed", { item: { id: "msg_progress", type: "agentMessage", text: "I am checking the workspace." } });
|
|
notify("item/agentMessage/delta", { itemId: "msg_final", delta: "Final answer only." });
|
|
notify("item/completed", { item: { id: "msg_final", type: "agentMessage", text: "Final answer only." } });
|
|
notify("turn/completed", { turn });
|
|
respond(message.id, { turn });
|
|
continue;
|
|
}
|
|
if (mode === "web-search-progress") {
|
|
turnCounter += 1;
|
|
const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" };
|
|
notify("turn/started", { turn });
|
|
notify("item/started", { item: { id: "search_selftest", type: "webSearch", status: "running" } });
|
|
notify("item/agentMessage/delta", { itemId: "msg_search", delta: "I am checking Kubernetes identity components and deployment docs. " });
|
|
notify("item/agentMessage/delta", { itemId: "msg_search", delta: "Keycloak, ZITADEL, authentik, Ory, Dex, OpenFGA, and SpiceDB are being compared for lifecycle and authorization coverage. " });
|
|
notify("item/agentMessage/delta", { itemId: "msg_search", delta: "Gateway/IAP choices are being separated from IdP and fine-grained authorization so the result can recommend a layered architecture. " });
|
|
notify("item/agentMessage/delta", { itemId: "msg_search", delta: "This long progress text intentionally crosses the AgentRun live progress threshold before the final completed agentMessage is emitted. " });
|
|
notify("item/agentMessage/delta", { itemId: "msg_search", delta: "The visible trace should therefore show work in progress while web search is still running, not only after turn completion. " });
|
|
notify("item/completed", { item: { id: "search_selftest", type: "webSearch", status: "completed", outputSummary: "searched Kubernetes IAM and gateway auth options" } });
|
|
notify("item/completed", { item: { id: "msg_search", type: "agentMessage", text: "Final IAM recommendation." } });
|
|
notify("turn/completed", { turn });
|
|
respond(message.id, { turn });
|
|
continue;
|
|
}
|
|
if (mode === "slow-tool-events") {
|
|
turnCounter += 1;
|
|
const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" };
|
|
notify("turn/started", { turn });
|
|
notify("item/started", { item: { id: "tool_selftest", type: "commandExecution", command: "sleep 0.05 && echo live" } });
|
|
notify("item/commandExecution/outputDelta", { itemId: "tool_selftest", delta: "live output\n" });
|
|
setTimeout(() => {
|
|
notify("item/completed", { item: { id: "tool_selftest", type: "commandExecution", command: "sleep 0.05 && echo live", status: "completed" } });
|
|
notify("item/agentMessage/delta", { itemId: "msg_selftest", delta: "done" });
|
|
notify("turn/completed", { turn });
|
|
respond(message.id, { turn });
|
|
}, 50);
|
|
continue;
|
|
}
|
|
if (mode === "tool-hangs-before-turn-start-response") {
|
|
turnCounter += 1;
|
|
const turn = { id: `turn_selftest_${turnCounter}`, status: "running" };
|
|
notify("turn/started", { turn });
|
|
notify("item/started", { item: { id: "tool_hang_before_response", type: "commandExecution", command: "hwpod cmd git clone", status: "running", processId: process.pid } });
|
|
notify("item/commandExecution/outputDelta", { itemId: "tool_hang_before_response", delta: "clone started\n" });
|
|
activeSteerTurn = { id: turn.id, completed: false, timer: setTimeout(() => undefined, 60_000) };
|
|
continue;
|
|
}
|
|
if (mode === "hard-timeout-tool-progress") {
|
|
turnCounter += 1;
|
|
const turn = { id: `turn_selftest_${turnCounter}`, status: "running" };
|
|
notify("turn/started", { turn });
|
|
notify("item/started", { item: { id: "tool_hard_timeout", type: "commandExecution", command: "hwpod cmd long-running", status: "running", processId: process.pid } });
|
|
respond(message.id, { turn });
|
|
activeSteerTurn = { id: turn.id, completed: false, timer: null };
|
|
let ticks = 0;
|
|
activeSteerTurn.timer = setInterval(() => {
|
|
ticks += 1;
|
|
notify("item/commandExecution/outputDelta", { itemId: "tool_hard_timeout", delta: `progress ${ticks}\n` });
|
|
}, 25);
|
|
continue;
|
|
}
|
|
if (mode === "steer-waits") {
|
|
turnCounter += 1;
|
|
const turn = { id: `turn_selftest_${turnCounter}`, status: "running" };
|
|
notify("turn/started", { turn });
|
|
activeSteerTurn = {
|
|
id: turn.id,
|
|
completed: false,
|
|
timer: setTimeout(() => completeActiveSteerTurn("timeout-no-steer"), 2_000),
|
|
};
|
|
respond(message.id, { turn });
|
|
continue;
|
|
}
|
|
if (mode === "noisy-reasoning-events") {
|
|
turnCounter += 1;
|
|
const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" };
|
|
notify("turn/started", { turn });
|
|
notify("item/started", { item: { id: "reasoning_selftest", type: "reasoning", content: [] } });
|
|
notify("item/reasoning/textDelta", { itemId: "reasoning_selftest", delta: "internal reasoning must not become durable trace text 1" });
|
|
notify("item/reasoning/textDelta", { itemId: "reasoning_selftest", delta: "internal reasoning must not become durable trace text 2" });
|
|
notify("thread/tokenUsage/updated", { usage: { inputTokens: 1, outputTokens: 2 } });
|
|
notify("account/rateLimits/updated", { limit: "selftest" });
|
|
notify("warning", { message: "low value warning should be summarized" });
|
|
notify("configWarning", { message: "low value config warning should be summarized" });
|
|
notify("item/completed", { item: { id: "reasoning_selftest", type: "reasoning", content: ["internal reasoning must not become durable trace text 3"] } });
|
|
notify("item/started", { item: { id: "tool_after_noise", type: "commandExecution", command: "echo after-noise" } });
|
|
notify("item/completed", { item: { id: "tool_after_noise", type: "commandExecution", command: "echo after-noise", status: "completed" } });
|
|
notify("item/completed", { item: { id: "msg_after_noise", type: "agentMessage", text: "noise filtered final" } });
|
|
notify("turn/completed", { turn });
|
|
respond(message.id, { turn });
|
|
continue;
|
|
}
|
|
turnCounter += 1;
|
|
const turn = { id: `turn_selftest_${turnCounter}`, status: "completed" };
|
|
notify("turn/started", { turn });
|
|
notify("item/agentMessage/delta", { itemId: "msg_selftest", delta: "fake codex stdio reply" });
|
|
notify("item/commandExecution/outputDelta", { itemId: "cmd_selftest", delta: "Authorization: Bearer test-token\n" });
|
|
notify("turn/completed", { turn });
|
|
respond(message.id, { turn });
|
|
continue;
|
|
}
|
|
if (message.method === "turn/steer") {
|
|
if (mode !== "steer-waits" || !activeSteerTurn) {
|
|
respond(message.id, null, { code: -32000, message: "no active fake turn for steer" });
|
|
continue;
|
|
}
|
|
const text = steerText(message.params?.input);
|
|
notify("item/agentMessage/delta", { itemId: "msg_steer", delta: `steered:${text}` });
|
|
notify("item/completed", { item: { id: "msg_steer", type: "agentMessage", text: `steered:${text}` } });
|
|
respond(message.id, { accepted: true });
|
|
setTimeout(() => completeActiveSteerTurn("steer-applied"), 20);
|
|
continue;
|
|
}
|
|
if (message.method === "turn/interrupt") {
|
|
if ((mode !== "tool-hangs-before-turn-start-response" && mode !== "hard-timeout-tool-progress" && mode !== "steer-waits") || !activeSteerTurn) {
|
|
respond(message.id, null, { code: -32000, message: "no active fake turn for interrupt" });
|
|
continue;
|
|
}
|
|
notify("item/completed", { item: { id: "tool_interrupted", type: "commandExecution", command: "hwpod cmd interrupted", status: "cancelled" } });
|
|
respond(message.id, { interrupted: true });
|
|
setTimeout(() => completeActiveSteerTurn("interrupt-applied", "cancelled"), 20);
|
|
continue;
|
|
}
|
|
respond(message.id, null, { code: -32601, message: `unsupported fake method ${message.method ?? "unknown"}` });
|
|
}
|
|
|
|
function respond(id: number | undefined, result: unknown, error?: unknown): void {
|
|
if (id === undefined) return;
|
|
process.stdout.write(`${JSON.stringify(error ? { id, error } : { id, result })}\n`);
|
|
}
|
|
|
|
function notify(method: string, params: unknown): void {
|
|
process.stdout.write(`${JSON.stringify({ method, params })}\n`);
|
|
}
|
|
|
|
function completeActiveSteerTurn(reason: string, status = "completed"): void {
|
|
if (!activeSteerTurn || activeSteerTurn.completed) return;
|
|
activeSteerTurn.completed = true;
|
|
if (activeSteerTurn.timer) clearTimeout(activeSteerTurn.timer);
|
|
const turn = { id: activeSteerTurn.id, status, reason };
|
|
notify("turn/completed", { turn });
|
|
}
|
|
|
|
function steerText(input: unknown): string {
|
|
if (!Array.isArray(input)) return "";
|
|
return input.flatMap((item) => {
|
|
if (typeof item !== "object" || item === null || Array.isArray(item)) return [];
|
|
const text = (item as Record<string, unknown>).text;
|
|
return typeof text === "string" ? [text] : [];
|
|
}).join("");
|
|
}
|