Files
pikasTech-unidesk/scripts/provider-runner-triage-contract-test.ts
T
2026-06-03 01:44:46 +00:00

120 lines
7.2 KiB
TypeScript

import { buildProviderTriageResult, providerTriageRecommendedCrossChecks, type ProviderTriageSignal } from "./src/provider-triage";
import { codexTaskQuery } from "./src/code-queue";
import { classifyRunnerError } from "../src/components/microservices/code-queue/src/runner-error-classifier";
type JsonRecord = Record<string, unknown>;
function assertCondition(condition: unknown, message: string, detail: unknown = {}): void {
if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`);
}
function signal(
id: string,
scope: ProviderTriageSignal["scope"],
status: ProviderTriageSignal["status"],
independentPath = true,
): ProviderTriageSignal {
return {
id,
scope,
status,
independentPath,
observedAt: "2026-05-21T00:00:00.000Z",
summary: `${id}:${scope}:${status}`,
};
}
function assertScope(message: string, expectedScope: string, expectedDisposition: string): JsonRecord {
const classification = classifyRunnerError(message, "D601") as unknown as JsonRecord;
assertCondition(classification.scope === expectedScope, `runner error should classify as ${expectedScope}`, classification);
assertCondition(classification.disposition === expectedDisposition, `runner error disposition should be ${expectedDisposition}`, classification);
assertCondition(classification.globalBlocker === false, "single runner error classification must not be global blocker", classification);
assertCondition(classification.retryable === true, "single runner error classification should remain retryable", classification);
assertCondition(String(classification.recommendedTriageCommand ?? "").includes("provider triage D601"), "classification should include triage command", classification);
return classification;
}
export function runProviderRunnerTriageContract(): JsonRecord {
assertScope("provider is not online: D601", "runner-local", "runner-local-observation-gap");
assertScope("provider-gateway http tunnel failed waiting for request", "provider-gateway", "provider-degraded");
assertScope("artifact registry /v2 manifest HEAD failed on 127.0.0.1:5000", "registry", "service-degraded");
assertScope("kubectl get pods failed: k3s api unavailable", "k3s", "service-degraded");
assertScope("code-queue scheduler active run heartbeat is stale", "scheduler", "retryable-transient");
assertScope("unexpected runner process exited with code 1", "unknown", "retryable-transient");
const rateLimit = assertScope("exceeded retry limit, last status: 429 Too Many Requests, request id: 21zqfw7apcg", "external-provider", "retryable-transient");
assertCondition(rateLimit.externalProvider429 === true, "OpenAI/model 429 should be explicit externalProvider429 evidence", rateLimit);
assertCondition(rateLimit.failureKind === "external-provider-rate-limit", "429 should use a stable rate-limit failure kind", rateLimit);
assertCondition((rateLimit.backoffHint as JsonRecord | undefined)?.strategy === "exponential-jitter", "429 classification should expose jittered backoff hint", rateLimit);
const singlePath = classifyRunnerError("provider is not online: D601", "D601");
const result = buildProviderTriageResult("D601", [
signal("observed-runner-error", singlePath.scope, "failed", false),
signal("backend-core-node", "provider-gateway", "ok"),
signal("host-ssh-probe", "ssh", "ok"),
signal("code-queue-health", "scheduler", "ok"),
], "2026-05-21T00:00:00.000Z");
assertCondition(result.blockingDisposition === "runner-local-observation-gap", "single path provider offline must stay observation gap", result);
assertCondition(result.decision === "retryable-transient", "single path provider offline should be retryable transient", result);
assertCondition(result.retryable === true, "single path provider offline should be retryable", result);
assertCondition(result.contract.singlePathProviderOfflineIsGlobalBlocker === false, "triage contract should reject single-path global blocker", result);
assertCondition(result.recommendedCrossChecks.includes("trans D601 argv true"), "provider triage result must recommend argv Host SSH cross-check", result.recommendedCrossChecks);
const crossChecks = providerTriageRecommendedCrossChecks("D601");
assertCondition(crossChecks.includes("trans D601 argv true"), "provider triage recommendedCrossChecks must keep ssh argv true", crossChecks);
assertCondition(crossChecks.includes("bun scripts/cli.ts debug dispatch D601 host.ssh --wait-ms 15000"), "provider triage recommendedCrossChecks must keep host.ssh dispatch probe", crossChecks);
const rateLimitTriage = buildProviderTriageResult("D601", [
signal("observed-runner-429", "external-provider", "failed", false),
signal("code-queue-health", "scheduler", "ok"),
], "2026-05-21T00:00:00.000Z");
assertCondition(rateLimitTriage.blockingDisposition === "external-provider-backoff", "external provider 429 should stay in backoff disposition", rateLimitTriage);
assertCondition(rateLimitTriage.decision === "retryable-transient", "external provider 429 should remain retryable transient", rateLimitTriage);
assertCondition(rateLimitTriage.retryable === true, "external provider 429 should be retryable", rateLimitTriage);
const cliSummary = codexTaskQuery("codex_runner_triage_fixture", ["--detail"], (path) => {
assertCondition(path.includes("/api/microservices/code-queue/proxy/api/tasks/codex_runner_triage_fixture/summary"), "task summary should use stable proxy path", { path });
return {
ok: true,
upstream: { ok: true, status: 200 },
body: {
ok: true,
summary: {
id: "codex_runner_triage_fixture",
status: "failed",
providerId: "D601",
attempts: [{
index: 1,
mode: "initial",
terminalStatus: "failed",
runnerErrorClassification: singlePath,
stderrTail: "provider is not online: D601",
}],
},
},
};
}) as JsonRecord;
const summary = cliSummary.summary as JsonRecord;
const attempts = summary.attempts as JsonRecord;
const attemptRecords = attempts.attemptRecords as JsonRecord[];
const compactClassification = attemptRecords[0]?.runnerErrorClassification as JsonRecord | undefined;
assertCondition(compactClassification?.scope === "runner-local", "CLI compact task detail should preserve runnerErrorClassification", cliSummary);
assertCondition(compactClassification?.globalBlocker === false, "CLI compact classification should preserve non-global-blocker contract", cliSummary);
return {
ok: true,
checks: [
"runner error classifier separates runner-local/provider-gateway/registry/k3s/scheduler/unknown",
"each single runner error classification has globalBlocker=false",
"provider triage keeps single provider is not online as retryable-transient, not global-blocker",
"provider triage recommendedCrossChecks keeps host.ssh dispatch and ssh argv true probes",
"external OpenAI/model provider 429 is explicit retryable backoff evidence, not Code Queue infra outage",
"codex task --detail preserves runnerErrorClassification in compact attempt output",
],
};
}
if (import.meta.main) {
process.stdout.write(`${JSON.stringify(runProviderRunnerTriageContract(), null, 2)}\n`);
}