From 9b164c1781c3fe919b356c2f08a197a326e5a3bc Mon Sep 17 00:00:00 2001 From: Codex Date: Sat, 23 May 2026 19:56:01 +0000 Subject: [PATCH] Fix Code Queue runner skills delivery --- docs/reference/ci.md | 6 +- .../code-queue-cicd-dry-run-contract-test.ts | 12 + .../code-queue-runner-skills-contract-test.ts | 12 +- scripts/src/ci.ts | 218 +++++++++++++++++- scripts/src/code-queue.ts | 3 + scripts/src/microservices.ts | 4 + .../microservices/code-queue/src/index.ts | 75 ++++++ .../code-queue/src/runner-error-classifier.ts | 4 + .../code-queue/src/skill-availability.ts | 76 ++++++ 9 files changed, 402 insertions(+), 8 deletions(-) diff --git a/docs/reference/ci.md b/docs/reference/ci.md index 0b10d1c8..32193d30 100644 --- a/docs/reference/ci.md +++ b/docs/reference/ci.md @@ -127,6 +127,8 @@ The CI user-service artifact task must follow these rules: The same command also has a read-only preflight mode: `bun scripts/cli.ts ci publish-user-service --service --commit --dry-run`. That mode may be called from the main server or through remote frontend passthrough, and it must return `runnerDisposition`, `missingChannels`, `missingControlChannels`, `channels`, `controlChannels`, `registry`, `artifactSummary`, `controlledPublish`, `boundary` and `next` without creating a PipelineRun or pushing an image. `missingChannels` is the detailed probe list, while `missingControlChannels` is the runner-facing domain list using only `backend-core`, `database`, `provider` and `registry`. `controlledPublish` must point at the real producer boundary: D601, namespace `unidesk-ci`, PipelineRun `unidesk-user-service-artifact-publish`, and the non-dry-run `ci publish-user-service` command shape. If backend-core, database, provider or registry channels are missing, the result must be structured `infra-blocked`, not a bare container lookup failure. +`ci publish-user-service` accepts `--transport auto|tekton|direct-docker`. `auto` keeps the Tekton path for normal user-service producers, but selects `direct-docker` for repo-owned Code Queue artifacts so runner skills delivery can publish a commit-pinned image without local `unidesk-database`, backend-core dispatch, or provider control-channel availability. The direct-Docker path checks out the requested UniDesk commit into a temporary worktree, builds `127.0.0.1:5000/unidesk/code-queue:` with the warmed `unidesk-code-queue:d601` base image, verifies labels and digest, pushes to the D601 loopback registry, and returns the same `artifactSummary` fields. If the warmed base image or registry is unavailable, the command must return a structured infra-blocked report. It must not run `deploy apply`, trigger a rollout, restart Code Queue, mutate active tasks, or touch production. + `ci publish-backend-core --commit --dry-run` is the equivalent backend-core preflight. It must stay read-only and report `targetCommit`, `sourceRepo`, `ciRunner`, `registryTarget`, `wouldBuildOnD601`, `blockedScopes` and `recommendedAction`, plus the same control-channel diagnostics as user-service preflight. It must also expose `sourceAuth` for the D601 GitHub SSH deploy identity and provider-gateway egress proxy, `artifactRequirements` for the required labels and digest header, and `devApplyPath` for the standard next hop: publish artifact, verify `artifactSummary.digest` / `artifactSummary.digestRef` and labels, then run `deploy apply --env dev --service backend-core --commit ` as pull-only CD. The dry-run must not export source, create a Tekton PipelineRun, compile Rust, build or push an image, call `deploy apply`, restart services, or suggest production backend-core apply as the default next step. Publish a Baidu Netdisk artifact: @@ -158,10 +160,10 @@ Publish k3s-managed service artifacts: ```bash bun scripts/cli.ts ci publish-user-service --service mdtodo --commit --wait-ms 1200000 bun scripts/cli.ts ci publish-user-service --service claudeqq --commit --wait-ms 1200000 -bun scripts/cli.ts ci publish-user-service --service code-queue --commit --wait-ms 1200000 +bun scripts/cli.ts ci publish-user-service --service code-queue --commit --wait-ms 1200000 --transport auto ``` -MDTODO and ClaudeQQ artifacts are consumed first by dev CD and then by production CD through the D601 registry artifact consumer. Code Queue artifacts are consumed only by the dev artifact consumer; CI publication does not enable production Code Queue deployment. +MDTODO and ClaudeQQ artifacts are consumed first by dev CD and then by production CD through the D601 registry artifact consumer. Code Queue artifacts are consumed only by the dev artifact consumer; CI publication does not enable production Code Queue deployment or a runtime rollout. ## Dev Namespace E2E diff --git a/scripts/code-queue-cicd-dry-run-contract-test.ts b/scripts/code-queue-cicd-dry-run-contract-test.ts index 0b8360e9..acba36e0 100644 --- a/scripts/code-queue-cicd-dry-run-contract-test.ts +++ b/scripts/code-queue-cicd-dry-run-contract-test.ts @@ -1,4 +1,5 @@ import { spawnSync } from "node:child_process"; +import { readFileSync } from "node:fs"; type JsonRecord = Record; @@ -153,6 +154,16 @@ assertCondition(prodArtifactDryRun.requiresSupervisorApproval === true, "artifac assertCondition(asRecord(prodArtifactDryRun.selfBootstrapGuard, "prod artifact selfBootstrapGuard").selfBootstrapBlocked === true, "artifact-registry prod code-queue should expose self-bootstrap guard", prodArtifactDryRun); assertCondition(JSON.stringify(prodArtifactDryRun).includes("production artifact deploy") && JSON.stringify(prodArtifactDryRun).includes("active task"), "artifact-registry prod code-queue should explain prod deploy and active-task boundaries", prodArtifactDryRun); +const ciSource = readFileSync("scripts/src/ci.ts", "utf8"); +assertCondition(ciSource.includes('type CiPublishTransport = "auto" | "tekton" | "direct-docker"'), "ci publish-user-service should expose an explicit transport selector"); +assertCondition(ciSource.includes('options.transport === "direct-docker"'), "ci publish-user-service should support direct-docker artifact publish"); +assertCondition(ciSource.includes('options.transport === "auto" && options.serviceId === "code-queue"'), "auto transport should select direct-docker for Code Queue artifacts"); +assertCondition(ciSource.includes("dependsOnLocalUnideskDatabase: false"), "direct-docker publish must not depend on local unidesk-database dispatch"); +assertCondition(ciSource.includes("CODE_QUEUE_BASE_IMAGE=unidesk-code-queue:d601"), "direct-docker Code Queue publish must use the warmed D601 base image"); +assertCondition(ciSource.includes("code-queue-base-image-missing"), "direct-docker Code Queue publish should fail fast when the warmed base image is missing"); +assertCondition(ciSource.includes("no deploy apply, no rollout, no Code Queue restart, no active task mutation"), "direct-docker publish boundary should forbid runtime mutation"); +assertCondition(ciSource.includes("repo-owned Docker artifact publish without backend-core/database dispatch"), "ci help should describe the repo-owned direct-docker delivery path"); + process.stdout.write(`${JSON.stringify({ ok: true, checks: [ @@ -161,5 +172,6 @@ process.stdout.write(`${JSON.stringify({ "prod Code Queue plan is unsupported and exposes no runtime deploy target", "prod Code Queue boundary forbids self-deploy, prod mutation, interrupt and cancel actions", "artifact-registry dev dry-run is non-mutating while prod remains unsupported", + "ci publish-user-service exposes direct-docker Code Queue artifact publish without local database dispatch", ], }, null, 2)}\n`); diff --git a/scripts/code-queue-runner-skills-contract-test.ts b/scripts/code-queue-runner-skills-contract-test.ts index 04d61cfc..70122335 100644 --- a/scripts/code-queue-runner-skills-contract-test.ts +++ b/scripts/code-queue-runner-skills-contract-test.ts @@ -72,6 +72,8 @@ assertCondition(typeof available.resolvedPath === "string" && available.resolved assertCondition(asRecord(available.resolution, "available.resolution").passesToRunnerEnv === true, "skill report must expose runner env path resolution", available.resolution); assertCondition(Array.isArray(available.requiredSkills) && available.requiredSkills.includes("docs-spec"), "skill report must expose requiredSkills"); assertCondition(Array.isArray(available.missingSkills), "skill report must expose missingSkills"); +assertCondition(asRecord(available.version, "available.version").selectedFingerprint !== undefined, "skill report must expose selected skills fingerprint", available.version); +assertCondition(asRecord(available.version, "available.version").sourceLatestMtime !== undefined, "skill report must expose source skills mtime", available.version); assertCondition(available.valuesPrinted === false, "skill report must declare valuesPrinted=false"); assertCondition(asRecord(available.pathSpelling, "pathSpelling").forbiddenPathMustNotBeUsed === true, "skill report must flag misspelled path risk without spreading the literal path"); assertCondition(!JSON.stringify(available).includes(forbiddenPathLiteral), "skill report must not propagate misspelled path literal"); @@ -173,6 +175,8 @@ assertCondition(syncDryRun.expected.target === "/root/.agents/skills", "skills s assertCondition(syncDryRun.expected.env === "UNIDESK_SKILLS_PATH" && syncDryRun.expected.envValue === "/root/.agents/skills", "skills sync must expose env contract", syncDryRun.expected); assertCondition(syncDryRun.counts.requiredSkills === 2, "skills sync must expose required skill count", syncDryRun.counts); assertCondition(syncDryRun.counts.targetSkills === 0 && syncDryRun.counts.missingTargetSkills === 2, "skills sync must expose target counts and missing count", syncDryRun.counts); +assertCondition(asRecord(syncDryRun.version, "syncDryRun.version").sourceFingerprint !== undefined, "skills sync must expose source fingerprint", syncDryRun.version); +assertCondition(asRecord(syncDryRun.version, "syncDryRun.version").targetLatestMtime !== undefined, "skills sync must expose target mtime", syncDryRun.version); assertCondition(syncDryRun.missing.targetSkills.includes("docs-spec") && syncDryRun.missing.targetSkills.includes("cli-spec"), "skills sync must expose missing target skills", syncDryRun.missing); assertCondition(syncDryRun.blocker === "unapproved-target", "arbitrary target paths must be blocked before silent copying", syncDryRun); assertCondition(syncDryRun.plannedActions.copy === false && syncDryRun.plannedActions.copyFromArbitraryPath === false, "skills sync dry-run must not plan arbitrary copy", syncDryRun.plannedActions); @@ -202,6 +206,9 @@ assertCondition(runtimePreflight.includes("collectSkillSyncPreflight"), "runtime assertCondition(runtimePreflight.includes("skills.runnerUsable && ports.codex.ok"), "runtime preflight ok must depend on runner usable skills without blocking on host rollout contract drift"); assertCondition(indexSource.includes("skills.runnerUsable === true"), "dev-ready must gate on structured runner usable skills"); assertCondition(indexSource.includes("resolvedRunnerSkillsPath"), "runtime must pass resolved skills path to code agents"); +assertCondition(indexSource.includes("runnerSkillsBlocker"), "scheduler must check skills before starting code agents"); +assertCondition(indexSource.includes("task_blocked_by_runner_skills"), "scheduler must emit structured runner skills blockers"); +assertCondition(indexSource.includes("runnerDisposition: \"infra-blocked\""), "runner skills blocker must classify infra-blocked"); assertCondition(indexSource.includes("collectSkillsSyncPreflight"), "runtime index must expose skills sync preflight"); assertCondition(indexSource.includes("/api/skills-sync"), "runtime must expose a dry-run skills sync endpoint"); assertCondition(indexSource.includes("pass dryRun=1"), "skills sync endpoint must reject non-dry-run calls"); @@ -393,8 +400,9 @@ process.stdout.write(`${JSON.stringify({ ok: true, checks: [ "production Code Queue mounts /home/ubuntu/.agents/skills read-only at /root/.agents/skills", - "skill availability report exposes source, target, requiredSkills, missingSkills, degraded/blocker and valuesPrinted=false", - "skills sync dry-run reports source, target, counts, missing skills, permission failures, instructions and no-copy actions", + "skill availability report exposes source, target, requiredSkills, missingSkills, version fingerprint/mtime, degraded/blocker and valuesPrinted=false", + "skills sync dry-run reports source, target, counts, version fingerprint/mtime, missing skills, permission failures, instructions and no-copy actions", + "scheduler blocks runner startup with structured infra-blocked output when required skills are unavailable", "runtime-preflight, dev-ready, health and PR preflight use the same structured skill and sync reports", "default health/preflight summaries expose bounded skills lifecycle evidence and --full expansion", "misspelled skills paths are rejected with forbidden-skills-path-configured before generic missing/unapproved path blockers", diff --git a/scripts/src/ci.ts b/scripts/src/ci.ts index b196cac3..a205e8c0 100644 --- a/scripts/src/ci.ts +++ b/scripts/src/ci.ts @@ -1,6 +1,7 @@ import { randomUUID } from "node:crypto"; -import { existsSync, readFileSync } from "node:fs"; -import { posix as posixPath } from "node:path"; +import { existsSync, mkdtempSync, readFileSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join, posix as posixPath } from "node:path"; import { blockedCatalogArtifactIds, catalogSummary, findCiCatalogArtifact, loadCiCatalog, supportedSourceBuildArtifactIds, type CiCatalogArtifact, type CiSourceBuildCatalogArtifact, type CiUpstreamImageCatalogArtifact } from "./ci-catalog"; import { runCommand } from "./command"; import { type UniDeskConfig, repoRoot, rootPath } from "./config"; @@ -63,8 +64,11 @@ interface CiPublishUserServiceArtifactOptions { dockerfile: string; imageRepository: string; dryRun: boolean; + transport: CiPublishTransport; } +type CiPublishTransport = "auto" | "tekton" | "direct-docker"; + interface CiDevE2EOptions { repoUrl: string; desiredRef: string; @@ -232,6 +236,12 @@ function boolFlag(args: string[], name: string): boolean { return args.includes(name); } +function publishTransportOption(value: string | null): CiPublishTransport { + if (value === null || value === "auto") return "auto"; + if (value === "tekton" || value === "direct-docker") return value; + throw new Error("ci publish-user-service --transport must be one of: auto, tekton, direct-docker"); +} + function isHelpArg(value: string | undefined): boolean { return value === "help" || value === "--help" || value === "-h"; } @@ -1478,6 +1488,195 @@ function missingArtifactSummaryFields(artifact: ArtifactSummary): string[] { return missing; } +function dockerArtifactDigest(repository: string, imageRef: string): string | null { + const inspect = runCommand(["docker", "image", "inspect", imageRef, "--format", "{{range .RepoDigests}}{{println .}}{{end}}"], repoRoot, { timeoutMs: 30_000 }); + if (inspect.exitCode !== 0) return null; + for (const line of inspect.stdout.split(/\r?\n/u).map((item) => item.trim()).filter(Boolean)) { + const [repo, digest] = line.split("@"); + if (repo === repository && /^sha256:[0-9a-f]{64}$/u.test(digest ?? "")) return digest ?? null; + } + return null; +} + +function registryManifestDigest(repository: string, tag: string): string | null { + const registry = "127.0.0.1:5000"; + if (!repository.startsWith(`${registry}/`)) return null; + const repositoryPath = repository.slice(`${registry}/`.length); + if (repositoryPath.length === 0 || repositoryPath.includes("..") || tag.length === 0) return null; + const result = runCommand([ + "curl", + "-fsSI", + "-H", + "Accept: application/vnd.docker.distribution.manifest.v2+json", + `http://${registry}/v2/${repositoryPath}/manifests/${tag}`, + ], repoRoot, { timeoutMs: 30_000 }); + if (result.exitCode !== 0) return null; + const match = /^Docker-Content-Digest:\s*(sha256:[0-9a-f]{64})\s*$/imu.exec(result.stdout); + return match?.[1] ?? null; +} + +function assertCommandOk(result: ReturnType, label: string): void { + if (result.exitCode === 0 && !result.timedOut) return; + throw new Error(`${label} failed: ${result.stderr.slice(-2000) || result.stdout.slice(-2000) || `exitCode=${result.exitCode}`}`); +} + +function buildContextForService(serviceId: string, dockerfile: string): string { + return serviceId === "claudeqq" ? posixPath.dirname(dockerfile) : "."; +} + +function directDockerSourceGitMode(repoUrl: string): "local-unidesk-worktree" | "git-archive" { + return repoUrl === "https://github.com/pikasTech/unidesk" ? "local-unidesk-worktree" : "git-archive"; +} + +async function prepareDirectDockerUserServiceSource(options: CiPublishUserServiceArtifactOptions): Promise<{ path: string; cleanup: () => void; summary: Record }> { + const tempRoot = mkdtempSync(join(tmpdir(), `unidesk-ci-${options.serviceId}-${options.commit.slice(0, 8)}-`)); + const sourcePath = join(tempRoot, "source"); + const gitMode = directDockerSourceGitMode(options.repoUrl); + if (gitMode !== "local-unidesk-worktree") { + rmSync(tempRoot, { recursive: true, force: true }); + throw new Error("direct-docker publish currently supports only UniDesk repo-owned source-build artifacts; use --transport tekton for external repositories"); + } + const resolved = runCommand(["git", "rev-parse", "--verify", `${options.commit}^{commit}`], repoRoot, { timeoutMs: 30_000 }); + assertCommandOk(resolved, "resolve source commit"); + if (resolved.stdout.trim() !== options.commit) { + rmSync(tempRoot, { recursive: true, force: true }); + throw new Error(`direct-docker source commit mismatch: resolved ${resolved.stdout.trim()} expected ${options.commit}`); + } + const dockerfileExists = runCommand(["git", "cat-file", "-e", `${options.commit}:${options.dockerfile}`], repoRoot, { timeoutMs: 30_000 }); + assertCommandOk(dockerfileExists, "verify source dockerfile"); + const worktree = runCommand(["git", "worktree", "add", "--detach", sourcePath, options.commit], repoRoot, { timeoutMs: 120_000 }); + if (worktree.exitCode !== 0) { + rmSync(tempRoot, { recursive: true, force: true }); + throw new Error(`prepare source worktree failed: ${worktree.stderr || worktree.stdout}`); + } + return { + path: sourcePath, + cleanup: () => { + const removed = runCommand(["git", "worktree", "remove", "--force", sourcePath], repoRoot, { timeoutMs: 60_000 }); + if (removed.exitCode !== 0) rmSync(tempRoot, { recursive: true, force: true }); + else rmSync(tempRoot, { recursive: true, force: true }); + }, + summary: { + ok: true, + mode: gitMode, + providerId: "local", + repoUrl: options.repoUrl, + commit: options.commit, + serviceId: options.serviceId, + dockerfile: options.dockerfile, + sourceHostPath: sourcePath, + valuesPrinted: false, + }, + }; +} + +async function publishUserServiceArtifactDirectDocker(options: CiPublishUserServiceArtifactOptions, context: ArtifactSummaryContext): Promise> { + const source = await prepareDirectDockerUserServiceSource(options); + const planned = artifactSummaryDefaults(context); + const localImage = `${options.imageRepository}:${options.commit}`; + const buildContext = buildContextForService(options.serviceId, options.dockerfile); + const baseArgs = options.serviceId === "code-queue" && runCommand(["docker", "image", "inspect", "unidesk-code-queue:d601"], repoRoot, { timeoutMs: 30_000 }).exitCode === 0 + ? ["--build-arg", "CODE_QUEUE_BASE_IMAGE=unidesk-code-queue:d601"] + : []; + try { + if (options.serviceId === "code-queue" && baseArgs.length === 0) { + return { + ok: false, + runnerDisposition: "infra-blocked", + failureClassification: "ci-runner-not-ready", + failureKind: "code-queue-base-image-missing", + serviceId: options.serviceId, + commit: options.commit, + artifactSummary: planned, + source: source.summary, + artifact: planned.imageRef, + transport: "direct-docker", + controlledPublish: { + environment: "DEV-local-artifact", + command: `bun scripts/cli.ts ci publish-user-service --service ${options.serviceId} --commit ${options.commit} --transport direct-docker`, + noRollout: true, + noRuntimeMutation: true, + }, + boundary: "direct-docker CI artifact publish builds and pushes a commit-pinned image only; no deploy apply, no rollout, no Code Queue restart, no active task mutation", + next: [ + "Restore or warm local unidesk-code-queue:d601 base image, then rerun the same direct-docker publish command.", + ], + }; + } + const build = runCommand([ + "docker", + "build", + "--label", + `unidesk.ai/service-id=${options.serviceId}`, + "--label", + `unidesk.ai/source-repo=${options.repoUrl}`, + "--label", + `unidesk.ai/source-commit=${options.commit}`, + "--label", + `unidesk.ai/dockerfile=${options.dockerfile}`, + ...baseArgs, + "-t", + localImage, + "-t", + planned.imageRef, + "-f", + options.dockerfile, + buildContext, + ], source.path, { timeoutMs: Math.max(options.waitMs, 20 * 60_000) }); + assertCommandOk(build, "direct docker build"); + const inspectLabels = runCommand(["docker", "image", "inspect", planned.imageRef, "--format", "{{ index .Config.Labels \"unidesk.ai/service-id\" }} {{ index .Config.Labels \"unidesk.ai/source-commit\" }}"], repoRoot, { timeoutMs: 30_000 }); + assertCommandOk(inspectLabels, "inspect built image labels"); + if (!inspectLabels.stdout.includes(`${options.serviceId} ${options.commit}`)) throw new Error(`direct docker image labels did not match ${options.serviceId}/${options.commit}`); + const registryCheck = runCommand(["curl", "-fsS", "http://127.0.0.1:5000/v2/"], repoRoot, { timeoutMs: 15_000 }); + assertCommandOk(registryCheck, "D601 loopback artifact registry health"); + const push = runCommand(["docker", "push", planned.imageRef], repoRoot, { timeoutMs: Math.max(options.waitMs, 10 * 60_000) }); + assertCommandOk(push, "direct docker push"); + const pull = runCommand(["docker", "pull", planned.imageRef], repoRoot, { timeoutMs: 120_000 }); + assertCommandOk(pull, "direct docker pull verification"); + const digest = dockerArtifactDigest(planned.repository, planned.imageRef) ?? registryManifestDigest(planned.repository, planned.tag); + const artifact: ArtifactSummary = { + ...planned, + digest, + digestRef: digest === null ? null : `${planned.repository}@${digest}`, + }; + assertArtifactSummaryComplete(artifact, "direct-docker"); + return { + ok: true, + transport: "direct-docker", + pipelineRun: null, + namespace: null, + repoUrl: options.repoUrl, + commit: options.commit, + serviceId: options.serviceId, + source: source.summary, + artifact: artifact.imageRef, + artifactSummary: artifact, + controlledPublish: { + environment: "DEV-local-artifact", + command: `bun scripts/cli.ts ci publish-user-service --service ${options.serviceId} --commit ${options.commit} --transport direct-docker`, + noRollout: true, + noRuntimeMutation: true, + dependsOnLocalUnideskDatabase: false, + }, + boundary: "direct-docker CI artifact publish builds and pushes a commit-pinned image only; no deploy apply, no rollout, no Code Queue restart, no active task mutation", + wait: { + ok: true, + dispatchOk: null, + dispatchStatus: null, + dispatchExitCode: null, + stdoutTail: push.stdout.slice(-6000), + stderrTail: push.stderr.slice(-6000), + }, + condition: null, + next: [ + "use artifactSummary.imageRef or artifactSummary.digestRef as later dev artifact consumer input", + ], + }; + } finally { + source.cleanup(); + } +} + function assertArtifactSummaryComplete(artifact: ArtifactSummary, pipelineRun: string): void { const missing = missingArtifactSummaryFields(artifact); if (missing.length > 0) { @@ -1862,13 +2061,16 @@ async function publishUserServiceArtifact(config: UniDeskConfig, options: CiPubl environment: "D601", namespace: "unidesk-ci", pipeline: "unidesk-user-service-artifact-publish", - command: `bun scripts/cli.ts ci publish-user-service --service ${options.serviceId} --commit ${options.commit} --wait-ms 1200000`, + command: `bun scripts/cli.ts ci publish-user-service --service ${options.serviceId} --commit ${options.commit} --wait-ms 1200000 --transport ${options.transport}`, requiresReadyControlChannels: publishPreflightControlChannelOrder, }, boundary: preflight.boundary, next: preflight.next, }; } + if (options.transport === "direct-docker" || (options.transport === "auto" && options.serviceId === "code-queue" && options.repoUrl === "https://github.com/pikasTech/unidesk")) { + return publishUserServiceArtifactDirectDocker(options, summaryContext); + } const source = options.serviceId === "claudeqq" ? await prepareClaudeqqArtifactSource(config, options) : await prepareUserServiceArtifactSource(config, options); @@ -1948,6 +2150,7 @@ export async function runCiPublishUserServiceDryRunPreflight( imageRepository: artifact.image.repository, sourceHostPath: userServiceArtifactSourceHostPath(serviceId, commit), dryRun: true, + transport: publishTransportOption(stringOption(args, "--transport")), }; const preflight = await publishUserServicePreflight(config, options, plannedArtifact, transport); const plannedRepoFetchUrl = repoSshUrl(options.repoUrl); @@ -1992,7 +2195,7 @@ export async function runCiPublishUserServiceDryRunPreflight( environment: "D601", namespace: "unidesk-ci", pipeline: "unidesk-user-service-artifact-publish", - command: `bun scripts/cli.ts ci publish-user-service --service ${options.serviceId} --commit ${options.commit} --wait-ms 1200000`, + command: `bun scripts/cli.ts ci publish-user-service --service ${options.serviceId} --commit ${options.commit} --wait-ms 1200000 --transport ${options.transport}`, requiresReadyControlChannels: publishPreflightControlChannelOrder, }, boundary: preflight.boundary, @@ -2422,6 +2625,11 @@ export function ciHelp(): Record { userServiceArtifact: { producer: "D601 CI", command: "bun scripts/cli.ts ci publish-user-service --service --commit ", + transports: { + auto: "uses direct-docker for repo-owned code-queue artifacts and Tekton for the remaining services", + tekton: "D601 Tekton PipelineRun through backend-core/provider control plane", + directDocker: "repo-owned Docker artifact publish without backend-core/database dispatch; no deploy apply, rollout, restart, or active-task mutation", + }, supportedServices: supportedSourceBuildArtifactIds().filter((serviceId) => serviceId !== "backend-core"), blockedServices: blockedCatalogArtifactIds(), registry: "127.0.0.1:5000/unidesk/:", @@ -2486,6 +2694,7 @@ export async function runCiCommand(config: UniDeskConfig, args: string[]): Promi const commit = requireFullCommit(stringOption(args, "--commit") ?? stringOption(args, "--revision")); const waitMs = numberOption(args, "--wait-ms", 0); const dryRun = boolFlag(args, "--dry-run"); + const transport = publishTransportOption(stringOption(args, "--transport")); if (stringOption(args, "--repo") !== null || stringOption(args, "--repo-url") !== null) { throw new Error("ci publish-user-service reads source repo from CI.json; edit CI.json instead of using --repo"); } @@ -2512,6 +2721,7 @@ export async function runCiCommand(config: UniDeskConfig, args: string[]): Promi imageRepository: artifact.image.repository, sourceHostPath: userServiceArtifactSourceHostPath(serviceId, commit), dryRun, + transport, }); } if (action === "run-dev-e2e") { diff --git a/scripts/src/code-queue.ts b/scripts/src/code-queue.ts index bf733131..0dcfeab2 100644 --- a/scripts/src/code-queue.ts +++ b/scripts/src/code-queue.ts @@ -6207,6 +6207,7 @@ function compactSkillsStatus(value: unknown): Record | null { degradedReason: record.degradedReason ?? record.blocker ?? null, readonly: record.readonly ?? false, skillCount: record.skillCount ?? 0, + version: record.version ?? null, sourceSkillCount: record.sourceSkillCount ?? null, targetSkillCount: record.targetSkillCount ?? null, sourceMissingSkills: Array.isArray(record.sourceMissingSkills) ? record.sourceMissingSkills.map(String) : [], @@ -6241,6 +6242,7 @@ function compactSkillPathReport(value: unknown): Record | null symlink: record.symlink ?? false, realPath: record.realPath ?? null, skillCount: record.skillCount ?? 0, + version: record.version ?? null, requiredSkills: Array.isArray(record.requiredSkills) ? record.requiredSkills.map(String) : [], missingSkills: Array.isArray(record.missingSkills) ? record.missingSkills.map(String) : [], error: record.error ?? null, @@ -6265,6 +6267,7 @@ function compactSkillsSyncStatus(value: unknown, full = false): Record | nul degradedReason: skills.degradedReason ?? skills.blocker ?? null, readonly: skills.readonly ?? false, skillCount: skills.skillCount ?? 0, + version: skills.version ?? null, sourceSkillCount: skills.sourceSkillCount ?? null, targetSkillCount: skills.targetSkillCount ?? null, sourceMissingSkills: Array.isArray(skills.sourceMissingSkills) ? skills.sourceMissingSkills.map(String) : [], @@ -676,6 +677,7 @@ function compactSkillSync(value: unknown): Record | null { "symlink", "realPath", "skillCount", + "version", "requiredSkills", "missingSkills", "error", @@ -692,6 +694,7 @@ function compactSkillSync(value: unknown): Record | null { "symlink", "realPath", "skillCount", + "version", "requiredSkills", "missingSkills", "error", @@ -708,6 +711,7 @@ function compactSkillSync(value: unknown): Record | null { source, target, counts: sync.counts ?? null, + version: sync.version ?? null, missing: sync.missing ?? null, permissionFailureCount: permissionFailures.length, permissionFailures: permissionFailures.slice(0, 4), diff --git a/src/components/microservices/code-queue/src/index.ts b/src/components/microservices/code-queue/src/index.ts index 4f974e3d..d4c97c81 100644 --- a/src/components/microservices/code-queue/src/index.ts +++ b/src/components/microservices/code-queue/src/index.ts @@ -2481,6 +2481,43 @@ function resolvedRunnerSkillsPath(): string { return currentSkillAvailability().resolution.runnerEnvValue; } +function runnerSkillsBlocker(): Record | null { + const skills = currentSkillAvailability(); + if (skills.runnerUsable) return null; + const pathSpelling = { + expectedTarget: skills.pathSpelling.expectedTarget, + forbiddenPathChecked: skills.pathSpelling.forbiddenPathChecked, + forbiddenPathExists: skills.pathSpelling.forbiddenPathExists, + forbiddenPathConfigured: skills.pathSpelling.forbiddenPathConfigured, + forbiddenPathRoles: skills.pathSpelling.forbiddenPathRoles, + forbiddenPathMustNotBeUsed: skills.pathSpelling.forbiddenPathMustNotBeUsed, + }; + return { + ok: false, + runnerDisposition: "infra-blocked", + failureKind: skills.blocker ?? "runner-skills-unavailable", + degradedReason: skills.degradedReason ?? skills.blocker ?? "runner-skills-unavailable", + message: "Code Queue runner skills are unavailable; refusing to start a code agent without the controlled skills projection.", + checkedAt: skills.checkedAt, + source: skills.source, + target: skills.target, + resolvedPath: skills.resolvedPath, + resolvedPathSource: skills.resolvedPathSource, + requiredSkills: skills.requiredSkills, + missingSkills: skills.missingSkills, + counts: { + sourceSkills: skills.sourceSkillCount, + targetSkills: skills.targetSkillCount, + missingSourceSkills: skills.sourceMissingSkills.length, + missingTargetSkills: skills.targetMissingSkills.length, + }, + version: skills.version, + pathSpelling, + repairHint: skills.repairHint, + valuesPrinted: false, + }; +} + function collectDevReady(): JsonValue { const now = Date.now(); if (devReadyCache !== null && now - devReadyCache.checkedAtMs < 30_000) return devReadyCache.value; @@ -4046,6 +4083,44 @@ function failTaskForFallbackRetryLimit(task: QueueTask, judge: JudgeResult | nul async function runTask(task: QueueTask): Promise { const claimQueueId = queueIdOf(task); logger("info", "task_processor_start", { taskId: task.id, queueId: claimQueueId, providerId: task.providerId, executionMode: task.executionMode, cwd: task.cwd, maxAttempts: task.maxAttempts, model: task.model, agentPort: codeAgentPortForModel(task.model), promptPreview: safePreview(task.prompt, 240) }); + const skillsBlocker = runnerSkillsBlocker(); + if (skillsBlocker !== null) { + const finishedAt = nowIso(); + const message = String(skillsBlocker.message); + task.status = "failed"; + task.startedAt ??= finishedAt; + task.finishedAt = finishedAt; + task.updatedAt = finishedAt; + task.activeTurnId = null; + task.lastError = message; + appendOutput(task, "error", `${message}\n${JSON.stringify(skillsBlocker)}\n`, "runner-skills/blocker"); + task.attempts.push({ + index: task.attempts.length + 1, + mode: task.nextMode ?? "initial", + startedAt: finishedAt, + finishedAt, + providerId: task.providerId, + executionMode: task.executionMode, + terminalStatus: "failed", + transportClosedBeforeTerminal: false, + appServerExitCode: null, + appServerSignal: null, + error: message, + events: [], + finalResponse: "", + finalResponsePreview: "", + finalResponseChars: 0, + judge: null, + judgeAt: null, + judgeSeq: null, + stderrTail: "", + runnerErrorClassification: skillsBlocker as JsonValue, + }); + persistTaskState(task); + logger("error", "task_blocked_by_runner_skills", { taskId: task.id, queueId: claimQueueId, skills: skillsBlocker as JsonValue }); + void notifyTaskTerminal(task); + return; + } if ( task.status === "retry_wait" && task.lastJudge?.source === "fallback" diff --git a/src/components/microservices/code-queue/src/runner-error-classifier.ts b/src/components/microservices/code-queue/src/runner-error-classifier.ts index 1fd451c8..9e864142 100644 --- a/src/components/microservices/code-queue/src/runner-error-classifier.ts +++ b/src/components/microservices/code-queue/src/runner-error-classifier.ts @@ -101,6 +101,10 @@ export function classifyRunnerError(message: string, providerId?: string | null) const schedulerEvidence = collectEvidence(normalized, [ /code[- ]queue scheduler/gu, /\bscheduler\b/gu, + /runner skills?/gu, + /skills projection/gu, + /required[- ]target[- ]skills[- ]missing/gu, + /forbidden[- ]skills[- ]path[- ]configured/gu, /active run/gu, /runtime-preflight/gu, /database claim/gu, diff --git a/src/components/microservices/code-queue/src/skill-availability.ts b/src/components/microservices/code-queue/src/skill-availability.ts index 66c4b18a..5a9e0d44 100644 --- a/src/components/microservices/code-queue/src/skill-availability.ts +++ b/src/components/microservices/code-queue/src/skill-availability.ts @@ -1,4 +1,5 @@ import { accessSync, constants, existsSync, lstatSync, readFileSync, readdirSync, realpathSync, statSync } from "node:fs"; +import { createHash } from "node:crypto"; import { resolve } from "node:path"; import { spawnSync } from "node:child_process"; import type { JsonValue } from "./types"; @@ -70,6 +71,16 @@ export interface SkillAvailabilityReport { forbiddenPathMustNotBeUsed: true; }; expectedMount: string; + version: { + selectedFingerprint: string | null; + selectedLatestMtime: string | null; + sourceFingerprint: string | null; + targetFingerprint: string | null; + sourceLatestMtime: string | null; + targetLatestMtime: string | null; + sourceSampledSkillNames: string[]; + targetSampledSkillNames: string[]; + }; repairHint: string | null; error: string | null; valuesPrinted: false; @@ -87,6 +98,12 @@ export interface SkillSyncPathReport { symlink: boolean; realPath: string | null; skillCount: number; + version: { + fingerprint: string | null; + latestMtime: string | null; + latestMtimeMs: number | null; + sampledSkillNames: string[]; + }; requiredSkills: string[]; missingSkills: string[]; skills: Array<{ name: string; present: boolean; skillMdPresent: boolean; path: string }>; @@ -139,6 +156,14 @@ export interface SkillSyncPreflightReport { missingSourceSkills: number; missingTargetSkills: number; }; + version: { + sourceFingerprint: string | null; + targetFingerprint: string | null; + sourceLatestMtime: string | null; + targetLatestMtime: string | null; + sourceSampledSkillNames: string[]; + targetSampledSkillNames: string[]; + }; missing: { sourceSkills: string[]; targetSkills: string[]; @@ -246,6 +271,38 @@ function skillStatus(target: string, requiredSkills: string[]): SkillAvailabilit }); } +function skillSetVersion(path: string, readable: boolean): SkillSyncPathReport["version"] { + if (!readable) return { fingerprint: null, latestMtime: null, latestMtimeMs: null, sampledSkillNames: [] }; + try { + const entries = readdirSync(path, { withFileTypes: true }) + .filter((entry) => entry.isDirectory()) + .map((entry) => { + const skillPath = resolve(path, entry.name); + const skillMdPath = resolve(skillPath, "SKILL.md"); + const skillStat = existsSync(skillMdPath) ? statSync(skillMdPath) : statSync(skillPath); + return { + name: entry.name, + mtimeMs: Math.floor(skillStat.mtimeMs), + skillMd: existsSync(skillMdPath), + }; + }) + .sort((left, right) => left.name.localeCompare(right.name)); + const latestMtimeMs = entries.reduce((latest, entry) => latest === null || entry.mtimeMs > latest ? entry.mtimeMs : latest, null); + const fingerprint = createHash("sha256") + .update(JSON.stringify(entries)) + .digest("hex") + .slice(0, 16); + return { + fingerprint: entries.length === 0 ? null : fingerprint, + latestMtime: latestMtimeMs === null ? null : new Date(latestMtimeMs).toISOString(), + latestMtimeMs, + sampledSkillNames: entries.map((entry) => entry.name).slice(0, 12), + }; + } catch { + return { fingerprint: null, latestMtime: null, latestMtimeMs: null, sampledSkillNames: [] }; + } +} + function accessProbe(path: string, mode: number, operation: string): { ok: boolean; failure: { path: string; operation: string; error: string } | null } { try { accessSync(path, mode); @@ -332,6 +389,7 @@ function collectSyncPathReport(path: string, approved: boolean, requiredSkills: symlink, realPath, skillCount, + version: skillSetVersion(path, readable), requiredSkills, missingSkills, skills, @@ -479,6 +537,16 @@ export function collectSkillAvailability(options: SkillAvailabilityOptions): Ski forbiddenPathMustNotBeUsed: true, }, expectedMount: `${defaultSource} mounted read-only to ${expectedTarget}`, + version: { + selectedFingerprint: selectedReport.version.fingerprint, + selectedLatestMtime: selectedReport.version.latestMtime, + sourceFingerprint: sourceProbe.version.fingerprint, + targetFingerprint: targetProbe.version.fingerprint, + sourceLatestMtime: sourceProbe.version.latestMtime, + targetLatestMtime: targetProbe.version.latestMtime, + sourceSampledSkillNames: sourceProbe.version.sampledSkillNames, + targetSampledSkillNames: targetProbe.version.sampledSkillNames, + }, repairHint: contractOk ? null : runnerUsable @@ -555,6 +623,14 @@ export function collectSkillSyncPreflight(options: SkillSyncPreflightOptions = { missingSourceSkills: source.report.missingSkills.length, missingTargetSkills: target.report.missingSkills.length, }, + version: { + sourceFingerprint: source.report.version.fingerprint, + targetFingerprint: target.report.version.fingerprint, + sourceLatestMtime: source.report.version.latestMtime, + targetLatestMtime: target.report.version.latestMtime, + sourceSampledSkillNames: source.report.version.sampledSkillNames, + targetSampledSkillNames: target.report.version.sampledSkillNames, + }, missing: { sourceSkills: source.report.missingSkills, targetSkills: target.report.missingSkills,