From 021a9eef01cd2c36a643ca614d3754d2d429cacc Mon Sep 17 00:00:00 2001 From: Codex Date: Wed, 20 May 2026 21:49:50 +0000 Subject: [PATCH] feat: add artifact publish preflight --- docs/reference/artifact-registry.md | 2 + docs/reference/ci.md | 2 + docs/reference/cicd-standardization.md | 2 + docs/reference/cli.md | 2 +- docs/reference/code-queue-supervision.md | 2 + docs/reference/deploy.md | 2 + ...sh-user-service-preflight-contract-test.ts | 101 +++++ scripts/src/artifact-registry.ts | 66 ++- scripts/src/ci.ts | 379 ++++++++++++++++-- scripts/src/remote.ts | 123 +++++- 10 files changed, 636 insertions(+), 45 deletions(-) create mode 100644 scripts/ci-publish-user-service-preflight-contract-test.ts diff --git a/docs/reference/artifact-registry.md b/docs/reference/artifact-registry.md index e8eb34b0..fdbee57f 100644 --- a/docs/reference/artifact-registry.md +++ b/docs/reference/artifact-registry.md @@ -126,6 +126,8 @@ bun scripts/cli.ts ssh D601 argv bash -lc '' `status` 表示只读查询是否成功;未安装时仍可 `ok=true` 并报告 `installed=false`。`health` 表示 registry 是否已按期望运行;未安装或不健康时返回 `ok=false`。两个只读命令都应输出 `decision`、`retryable`、`healthyScopes`、`failedScopes` 和 `runtimeApiHealthy`,方便上层 provider triage 判断局部退化范围。 +这两个只读命令也可以通过远端 frontend 透传调用,适合 runner 或指挥官在不登录主 server 本机 Docker 环境时做预检。若 backend-core、database、provider-dispatch 或 provider-host-ssh 缺失,CLI 必须返回结构化 `infra-blocked` 和缺失通道,而不是让调用方只看到 Docker 的 `No such container`。 + registry health 的 `decision=service-degraded` 不等同于 D601 全局离线。特别是当 systemd unit inactive 或 unit hash drift,但 Docker container running、loopback listener 正常、`/v2/` 返回 200 时,runtime registry API 仍可用;这种状态应作为 registry 服务治理问题处理,不能覆盖 provider heartbeat、Host SSH、k3sctl-adapter、Code Queue scheduler 或业务 API 的健康证据。 ## Manual Maintenance diff --git a/docs/reference/ci.md b/docs/reference/ci.md index 70a7c139..71fc93f8 100644 --- a/docs/reference/ci.md +++ b/docs/reference/ci.md @@ -124,6 +124,8 @@ The CI user-service artifact task must follow these rules: - For D601 direct services, `findjob` and `pipeline` have reviewed dev/prod D601 Compose artifact consumers, `met-nonlinear` is dry-run only until the long-running service image contract matches the published artifact, and `k3sctl-adapter` is supervisor-only because it is the native k3s control bridge outside the k3s failure domain. - ClaudeQQ source comes from `https://gitee.com/lyon1998/agent_skills`; the producer exports the `claudeqq/` subtree and overlays the UniDesk Dockerfile plus API adapter from `src/components/microservices/claudeqq/` before building. Runtime topology and deploy intent still live in manifests and `deploy.json`, not in `CI.json`. +The same command also has a read-only preflight mode: `bun scripts/cli.ts ci publish-user-service --service --commit --dry-run`. That mode may be called from the main server or through remote frontend passthrough, and it must return `runnerDisposition`, `missingChannels`, `channels`, `registry`, `artifactSummary`, `boundary` and `next` without creating a PipelineRun or pushing an image. If backend-core, database or provider channels are missing, the result must be structured `infra-blocked`, not a bare container lookup failure. + Publish a Baidu Netdisk artifact: ```bash diff --git a/docs/reference/cicd-standardization.md b/docs/reference/cicd-standardization.md index 36301b89..f1275dca 100644 --- a/docs/reference/cicd-standardization.md +++ b/docs/reference/cicd-standardization.md @@ -187,3 +187,5 @@ backend-core and D601 `code-queue` remain restricted to dev image validation in ## Validation Boundary This precheck uses lightweight parsing and dry-run evidence only. It intentionally does not run full `check`, e2e, Playwright, or other broad browser/runtime test suites on the master server because those are outside the precheck scope and may exceed master-server resource limits. `backend-core` and D601 `code-queue` production validation are also out of scope; backend-core dev rollout can be attempted only through the existing D601 dev path, and a provider-offline result is an infrastructure blocker rather than permission to validate production. + +The structured read-only preflight entrypoints are `artifact-registry status|health` and `ci publish-user-service --dry-run`. Remote runners may call them through the frontend passthrough path, and the result must classify missing backend-core, database or provider channels as `runnerDisposition=infra-blocked` with explicit missing channel names. Those cases are infrastructure blockers, not business failures and not a license to retry a real publish. diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 25ae92d1..1de87e85 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -146,7 +146,7 @@ bun scripts/cli.ts ssh D601 glob --root /home/ubuntu/pikapython --pattern '**/*- `--main-server-ip` 是一个全局前缀,必须放在需要透传的命令同一次调用中,例如 `bun scripts/cli.ts --main-server-ip 74.48.78.17 debug health`。默认传输是公网 frontend:本地 CLI 读取本仓库 `config.json` 中的 frontend 登录账号密码,登录 `http://:/` 获取 HttpOnly session cookie,然后通过 frontend 的 `/api/*` 同源代理访问 backend-core 内网 API;因此计算节点只需要能访问公网 frontend,不需要主 server SSH key,也不需要打开 backend-core REST API 或 PostgreSQL 端口。 -默认 frontend 传输支持 `debug health`、`debug dispatch`、`debug task`、`microservice list/status/health/diagnostics/tunnel-self-test/proxy`、`decision upload/list/show/health`、`decision requirement list/upsert`、`decision diary import/list/months/show/edit/upsert`、`codex task `、`codex tasks`、`codex output `、`codex judge --attempt N` 和 `ssh `。运行中纠偏 `codex steer` 属于 active run write control,应在主 server 本机 CLI 或显式 SSH 传输上执行,避免公网 frontend 透传限制 stdin/body 审计语义。其中 `ssh` 的 remote frontend 传输使用 `host.ssh` dispatch 执行有界远端命令,适合 `ssh D601 hostname` 和 `ssh D601 skills` 这类自测;交互式登录 shell 仍应在主 server 本机 CLI 使用,或显式切换到旧 SSH 传输后在主 server 上执行。frontend 远程透传不会流式转发本地 stdin,因此 `ssh py < script.py`、`ssh apply-patch < patch.diff` 这类 stdin-backed helper 必须在主 server 本机运行,或显式切换到 `--main-server-transport ssh`。若确实需要旧行为,可使用 `--main-server-key ` 或 `--main-server-transport ssh`,这时 CLI 会通过 SSH 登录主 server 的 `--main-server-root` 目录执行同一个 `bun scripts/cli.ts `。 +默认 frontend 传输支持 `debug health`、`debug dispatch`、`debug task`、`artifact-registry status|health`、`ci publish-user-service --dry-run`、`microservice list/status/health/diagnostics/tunnel-self-test/proxy`、`decision upload/list/show/health`、`decision requirement list/upsert`、`decision diary import/list/months/show/edit/upsert`、`codex task `、`codex tasks`、`codex output `、`codex judge --attempt N` 和 `ssh `。运行中纠偏 `codex steer` 属于 active run write control,应在主 server 本机 CLI 或显式 SSH 传输上执行,避免公网 frontend 透传限制 stdin/body 审计语义。其中 `ssh` 的 remote frontend 传输使用 `host.ssh` dispatch 执行有界远端命令,适合 `ssh D601 hostname` 和 `ssh D601 skills` 这类自测;交互式登录 shell 仍应在主 server 本机 CLI 使用,或显式切换到旧 SSH 传输后在主 server 上执行。frontend 远程透传不会流式转发本地 stdin,因此 `ssh py < script.py`、`ssh apply-patch < patch.diff` 这类 stdin-backed helper 必须在主 server 本机运行,或显式切换到 `--main-server-transport ssh`。当 backend-core、database、provider-dispatch 或 provider-host-ssh 缺失时,这些 read-only 预检必须返回结构化 `runnerDisposition=infra-blocked` 和缺失通道列表,而不是裸 `No such container`。若确实需要旧行为,可使用 `--main-server-key ` 或 `--main-server-transport ssh`,这时 CLI 会通过 SSH 登录主 server 的 `--main-server-root` 目录执行同一个 `bun scripts/cli.ts `。 计算节点可以用该入口测试自身的远程升级闭环,而不需要在计算节点公开 core REST API 或 database。标准顺序是:先运行 `bun scripts/cli.ts --main-server-ip 74.48.78.17 debug health` 确认主 server 看到当前 Provider 在线,且该 Provider labels 中 `unideskCapabilities` 包含 `host.ssh`、`hostSshConfigured=true`、`hostSshKeyPresent=true`;再运行 `bun scripts/cli.ts --main-server-ip 74.48.78.17 debug dispatch provider.upgrade --mode schedule --wait-ms 15000` 触发真实 `provider.upgrade`;随后再次运行 `debug health` 确认节点重新上线;最后运行 `bun scripts/cli.ts --main-server-ip 74.48.78.17 debug dispatch host.ssh --wait-ms 15000` 和 `bun scripts/cli.ts --main-server-ip 74.48.78.17 ssh hostname` 验证 SSH 透传能力。provider-gateway 新部署或升级后没有完成这组 remote CLI 自测,不能视为交付完成。 diff --git a/docs/reference/code-queue-supervision.md b/docs/reference/code-queue-supervision.md index 346ade75..7f7ac683 100644 --- a/docs/reference/code-queue-supervision.md +++ b/docs/reference/code-queue-supervision.md @@ -247,6 +247,8 @@ Code Queue task 不是只要 push 代码就算完成。 如果业务任务发现缺少工具或凭证路径,指挥官应把它拆成独立 infra task,而不是埋在业务任务 prompt 中。业务任务在 bridge 存在时应继续推进。 +Artifact publish preflight 也属于基础设施问题的只读分类范畴:`artifact-registry status|health` 和 `ci publish-user-service --dry-run` 返回 `runnerDisposition=infra-blocked` 时,通常说明 backend-core/database/provider 通道缺失,而不是用户服务本身的业务错误。此时应先恢复控制通道,再决定是否重试,不要把裸 `No such container` 当成可直接回归的业务失败。 + ## 指挥边界 指挥官可以: diff --git a/docs/reference/deploy.md b/docs/reference/deploy.md index 5464d627..e92106cb 100644 --- a/docs/reference/deploy.md +++ b/docs/reference/deploy.md @@ -235,6 +235,8 @@ D601 默认 `kubectl` context 可能指向 Docker Desktop、kind 或其他本地 Continuous integration is intentionally separate from this deploy reconciler. D601 k3s hosts Tekton CI resources described in `docs/reference/ci.md`; PipelineRuns may clone, check, run read-only performance gates, create temporary CI-owned namespaces for dev manifest smoke e2e, or publish commit-pinned backend-core/user-service image artifacts to the D601 artifact registry. They must not call `deploy apply`, `codex deploy`, `kubectl rollout restart` for production services, mutate `deploy.json`, or write production namespaces. +Artifact publish preflight is part of CI, not deploy: `artifact-registry status|health` and `ci publish-user-service --dry-run` are the supported read-only checks for registry reachability and user-service publish readiness. These commands must not depend on a coincidentally present local `unidesk-database` container, and when backend-core/database/provider channels are missing they should return structured `infra-blocked` instead of a raw container error. + The Code Queue performance gate may create a temporary `code-queue-ci-read` service and read the main PostgreSQL through the existing `d601-tcp-egress-gateway`. Because it runs with `CODE_QUEUE_SERVICE_ROLE=read`, scheduler/backfill/notification disabled and EmptyDir state, it is not deployment truth and does not need a temporary database for the current read-only checks. ## Version Stamping And Verification diff --git a/scripts/ci-publish-user-service-preflight-contract-test.ts b/scripts/ci-publish-user-service-preflight-contract-test.ts new file mode 100644 index 00000000..d0d5a402 --- /dev/null +++ b/scripts/ci-publish-user-service-preflight-contract-test.ts @@ -0,0 +1,101 @@ +import { readConfig } from "./src/config"; +import { runCiPublishUserServiceDryRunPreflight, type PublishPreflightTransport } from "./src/ci"; + +type JsonRecord = Record; + +function assertCondition(condition: unknown, message: string, detail: unknown = {}): void { + if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`); +} + +function asRecord(value: unknown, label: string): JsonRecord { + assertCondition(typeof value === "object" && value !== null && !Array.isArray(value), `${label} must be an object`, { value }); + return value as JsonRecord; +} + +const commit = "0123456789abcdef0123456789abcdef01234567"; +const config = readConfig(); + +const infraBlockedTransport: PublishPreflightTransport = { + coreFetch: async (path) => ({ + ok: false, + status: 503, + body: { + ok: false, + failureKind: "target-stack-not-running", + runnerDisposition: "infra-blocked", + degradedReason: "backend-core-container-missing", + message: `backend-core unavailable for ${path}`, + }, + }), + dispatchHostSsh: async (command, waitMs, remoteTimeoutMs) => ({ + ok: false, + taskId: null, + status: "infra-blocked", + stdout: "", + stderr: `backend-core bridge unavailable while dispatching readonly SSH task (${waitMs}/${remoteTimeoutMs})`, + exitCode: null, + raw: { + ok: false, + failureKind: "target-stack-not-running", + runnerDisposition: "infra-blocked", + command, + }, + }), + commandCwd: "/workspace/unidesk", + artifactRegistryCommand: (probe) => [process.execPath, "scripts/cli.ts", "ssh", probe.providerId, "argv", "bash", "-lc", probe.script], +}; + +async function main(): Promise { + const result = await runCiPublishUserServiceDryRunPreflight(config, [ + "publish-user-service", + "--service", + "frontend", + "--commit", + commit, + "--dry-run", + ], infraBlockedTransport); + + const record = asRecord(result, "preflight"); + const source = asRecord(record.source, "source"); + const channels = Array.isArray(record.channels) ? record.channels.map((item) => asRecord(item, "channel")) : []; + const registry = asRecord(record.registry, "registry"); + const backendCore = asRecord(channels.find((item) => item.channel === "backend-core-api")?.detail, "backendCore detail"); + const backendCoreTransport = asRecord(backendCore.detail, "backendCore transport payload"); + const backendCoreBody = asRecord(backendCoreTransport.body, "backendCore body payload"); + const providerDispatch = asRecord(channels.find((item) => item.channel === "provider-dispatch")?.detail, "providerDispatch detail"); + const missingChannels = Array.isArray(record.missingChannels) ? record.missingChannels as string[] : []; + + assertCondition(record.ok === false, "infra-blocked preflight should fail", record); + assertCondition(record.mode === "dry-run-preflight", "dry-run preflight mode should be reported", record); + assertCondition(record.runnerDisposition === "infra-blocked", "runnerDisposition should be infra-blocked", record); + assertCondition(Array.isArray(record.missingChannels), "missingChannels should be an array", record); + assertCondition(missingChannels.includes("backend-core-api"), "backend-core-api should be missing", record); + assertCondition(missingChannels.includes("database"), "database should be missing", record); + assertCondition(missingChannels.includes("provider-dispatch"), "provider-dispatch should be missing", record); + assertCondition(missingChannels.includes("provider-host-ssh"), "provider-host-ssh should be missing", record); + assertCondition(missingChannels.includes("artifact-registry"), "artifact-registry should be missing", record); + assertCondition(!JSON.stringify(record).includes("No such container: unidesk-database"), "raw container error should not leak", record); + assertCondition(backendCoreBody.failureKind === "target-stack-not-running", "backend-core detail should classify target-stack-not-running", backendCoreBody); + assertCondition(providerDispatch.status === "infra-blocked", "provider dispatch should be infra-blocked", providerDispatch); + assertCondition(registry.ok === false, "registry channel should fail without backend-core bridge", registry); + assertCondition(Array.isArray(channels) && channels.length >= 5, "expected five channel probes", channels); + assertCondition(source.mode === "planned-only", "source should remain planned-only", source); + assertCondition(source.repoFetchUrl === "git@github.com:pikasTech/unidesk.git", "source repo should use CI catalog ssh form", source); + assertCondition(asRecord(record.artifactSummary, "artifactSummary").imageRef === `127.0.0.1:5000/unidesk/frontend:${commit}`, "artifact ref should remain commit-pinned", record.artifactSummary); + assertCondition(String(record.boundary ?? "").includes("read-only"), "boundary should state preflight is read-only", record); + + process.stdout.write(`${JSON.stringify({ + ok: true, + checks: [ + "dry-run preflight returns infra-blocked when backend-core/database/provider channels are absent", + "missing channel list names the absent channels", + "artifact summary remains commit-pinned and read-only", + ], + missingChannels: record.missingChannels, + registry, + }, null, 2)}\n`); +} + +if (import.meta.main) { + await main(); +} diff --git a/scripts/src/artifact-registry.ts b/scripts/src/artifact-registry.ts index 4de6e4a6..4a4f8899 100644 --- a/scripts/src/artifact-registry.ts +++ b/scripts/src/artifact-registry.ts @@ -5,10 +5,10 @@ import { runCommand, type CommandResult } from "./command"; import { readConfig, type UniDeskConfig, repoRoot, rootPath } from "./config"; import { startJob } from "./jobs"; -type ArtifactRegistryAction = "plan" | "render" | "status" | "health" | "install" | "deploy-backend-core" | "deploy-service"; +export type ArtifactRegistryAction = "plan" | "render" | "status" | "health" | "install" | "deploy-backend-core" | "deploy-service"; type ArtifactDeployEnvironment = "prod" | "dev"; -interface ArtifactRegistryOptions { +export interface ArtifactRegistryOptions { environment: ArtifactDeployEnvironment | null; providerId: string; host: string; @@ -49,6 +49,15 @@ interface RenderedBundle { }; } +export interface ArtifactRegistryReadonlyProbe { + action: "status" | "health"; + providerId: string; + script: string; + timeoutMs: number; + healthMode: boolean; + options: ArtifactRegistryOptions; +} + const defaultOptions: ArtifactRegistryOptions = { environment: null, providerId: "D601", @@ -758,7 +767,7 @@ function environmentValue(value: string, option: string): ArtifactDeployEnvironm throw new Error(`${option} must be one of: prod, dev`); } -function parseOptions(args: string[]): ArtifactRegistryOptions { +export function parseArtifactRegistryOptions(args: string[]): ArtifactRegistryOptions { const options = { ...defaultOptions }; for (let index = 0; index < args.length; index += 1) { const arg = args[index]; @@ -1103,7 +1112,7 @@ function registryHealthDecision(checks: Record, commandOk: bool }; } -function commandTail(result: CommandResult): Record { +export function artifactRegistryCommandTail(result: CommandResult): Record { return { command: result.command.length > 7 ? [...result.command.slice(0, 7), ""] : result.command, exitCode: result.exitCode, @@ -1114,6 +1123,10 @@ function commandTail(result: CommandResult): Record { }; } +function commandTail(result: CommandResult): Record { + return artifactRegistryCommandTail(result); +} + function artifactConsumerSpec(serviceId: string, environment: ArtifactDeployEnvironment | null): ArtifactConsumerSpec | null { const key = environment === null || environment === "prod" ? serviceId : `${environment}:${serviceId}`; const explicit = artifactConsumerSpecs[key]; @@ -1329,12 +1342,53 @@ function runReadonlyStatus(options: ArtifactRegistryOptions, healthMode: boolean image: options.image, paths: bundle.paths, }, - command: commandTail(result), + command: artifactRegistryCommandTail(result), }; } return statusFromValues(options, parseKeyValueOutput(result.stdout), result, healthMode); } +export function buildArtifactRegistryReadonlyProbe(action: "status" | "health", options: ArtifactRegistryOptions): ArtifactRegistryReadonlyProbe { + const bundle = renderBundle(options); + const healthMode = action === "health"; + return { + action, + providerId: options.providerId, + script: statusScript(options, bundle), + timeoutMs: options.timeoutMs, + healthMode, + options, + }; +} + +export function artifactRegistryReadonlyResultFromCommand( + probe: ArtifactRegistryReadonlyProbe, + command: CommandResult, +): Record { + if (command.exitCode !== 0 || command.timedOut) { + const bundle = renderBundle(probe.options); + return { + ok: false, + readonly: true, + installed: false, + healthy: false, + decision: "retryable-transient", + retryable: true, + healthyScopes: [], + failedScopes: ["provider-ssh-command"], + runtimeApiHealthy: false, + checks: {}, + expected: { + endpoint: `http://${probe.options.host}:${probe.options.port}`, + image: probe.options.image, + paths: bundle.paths, + }, + command: artifactRegistryCommandTail(command), + }; + } + return statusFromValues(probe.options, parseKeyValueOutput(command.stdout), command, probe.healthMode); +} + function remoteWriteFileCommand(item: RenderedFile): string { const encoded = Buffer.from(item.content, "utf8").toString("base64"); const rootOwned = item.path.startsWith("/etc/"); @@ -2540,7 +2594,7 @@ export async function runArtifactRegistryCommand(args: string[]): Promise unknown | Promise; + dispatchHostSsh: (command: string, waitMs: number, remoteTimeoutMs: number) => Promise; + commandCwd: string; + artifactRegistryCommand: (probe: ArtifactRegistryReadonlyProbe) => string[]; +} + interface PipelineRunCondition { ok: boolean | null; status: string; @@ -262,6 +296,42 @@ function blockedReason(artifact: CiSourceBuildCatalogArtifact): string { return artifact.blockedReason; } +function userServicePublishBoundaryBlock( + config: UniDeskConfig, + serviceId: string, + commit: string, + artifact: CiSourceBuildCatalogArtifact, +): Record | null { + const configService = config.microservices.find((item) => item.id === serviceId); + if (configService === undefined) return null; + const isD601K3sService = configService.providerId === d601ProviderId + && configService.development.providerId === d601ProviderId + && configService.deployment.mode === "k3sctl-managed"; + const isD601DirectService = configService.providerId === d601ProviderId + && configService.development.providerId === d601ProviderId + && configService.deployment.mode === "unidesk-direct"; + const isMainServerDirectService = configService.providerId === "main-server" + && configService.development.providerId === "main-server" + && configService.deployment.mode === "unidesk-direct"; + const isMainServerInternalSidecar = configService.providerId === "main-server" + && configService.development.providerId === "main-server" + && configService.deployment.mode === "internal-sidecar"; + if (isD601K3sService || isD601DirectService || isMainServerDirectService || isMainServerInternalSidecar) return null; + return { + ok: false, + status: "blocked", + error: "blocked", + serviceId, + commit, + reason: `config.json marks ${serviceId} as ${configService.providerId}/${configService.deployment.mode}, which is outside the reviewed CI artifact producer boundary`, + catalogArtifact: artifact, + configService: { + providerId: configService.providerId, + deploymentMode: configService.deployment.mode, + }, + }; +} + function chunks(value: string, size: number): string[] { const result: string[] = []; for (let index = 0; index < value.length; index += size) { @@ -282,6 +352,100 @@ function coreBody(response: unknown): Record | null { return asRecord(asRecord(response)?.body); } +function responseOk(response: unknown): boolean { + if (typeof response !== "object" || response === null) return false; + const record = response as Record; + if ("ok" in record && record.ok === false) return false; + const body = asRecord(record.body); + if (body !== null && "ok" in body && body.ok === false) return false; + return true; +} + +function channelProbe( + channel: PublishPreflightChannelProbe["channel"], + ok: boolean, + requiredFor: string, + detail: unknown, +): PublishPreflightChannelProbe { + return { channel, ok, requiredFor, detail }; +} + +function backendCoreUnavailable(value: unknown): boolean { + const record = asRecord(value); + if (record?.runnerDisposition === "infra-blocked") return true; + if (record?.failureKind === "target-stack-not-running") return true; + const text = JSON.stringify(value) ?? ""; + return text.includes("No such container: unidesk-backend-core") + || text.includes("No such container: unidesk-database"); +} + +function dispatchPreflightFailure(command: string, result: DispatchResult): DispatchResult { + return { + ok: false, + taskId: result.taskId, + status: result.status, + stdout: result.stdout.slice(-4000), + stderr: result.stderr.slice(-4000), + exitCode: result.exitCode, + raw: { + command, + taskId: result.taskId, + status: result.status, + exitCode: result.exitCode, + stderrTail: result.stderr.slice(-1200), + stdoutTail: result.stdout.slice(-1200), + raw: result.raw, + }, + }; +} + +function commandResultFromDispatch(command: string[], cwd: string, result: DispatchResult) { + return { + command, + cwd, + exitCode: result.exitCode, + stdout: result.stdout, + stderr: result.stderr, + signal: null, + timedOut: result.status === "timeout", + }; +} + +async function dispatchReadonlySsh(command: string, waitMs: number, remoteTimeoutMs: number): Promise { + try { + const result = await dispatchSsh(command, waitMs, remoteTimeoutMs); + if (!result.ok && backendCoreUnavailable(result.raw)) { + return { + ...result, + status: "infra-blocked", + stderr: "backend-core bridge unavailable while dispatching readonly SSH task", + }; + } + return result; + } catch (error) { + return { + ok: false, + taskId: null, + status: null, + stdout: "", + stderr: error instanceof Error ? error.message : String(error), + exitCode: null, + raw: error instanceof Error ? { name: error.name, message: error.message, stack: error.stack ?? null } : String(error), + }; + } +} + +function artifactRegistryProbeCommand(probe: ArtifactRegistryReadonlyProbe): string[] { + return [process.execPath, "scripts/cli.ts", "ssh", probe.providerId, "argv", "bash", "-lc", probe.script]; +} + +const localPublishPreflightTransport: PublishPreflightTransport = { + coreFetch: (path, init) => coreInternalFetch(path, init), + dispatchHostSsh: dispatchReadonlySsh, + commandCwd: repoRoot, + artifactRegistryCommand: artifactRegistryProbeCommand, +}; + function positiveManifestNumber(value: unknown, fallback: number, path: string): number { if (value === undefined || value === null) return fallback; if (typeof value !== "number" || !Number.isInteger(value) || value <= 0) throw new Error(`${path} must be a positive integer`); @@ -1117,6 +1281,92 @@ function assertArtifactSummaryComplete(artifact: ArtifactSummary, pipelineRun: s } } +async function publishUserServicePreflight( + _config: UniDeskConfig, + options: CiPublishUserServiceArtifactOptions, + plannedArtifact: ArtifactSummary, + transport: PublishPreflightTransport, +): Promise { + const providerId = d601ProviderId; + const channels: PublishPreflightChannelProbe[] = []; + const overview = await transport.coreFetch("/api/overview", { maxResponseBytes: 500_000 }); + const overviewBody = coreBody(overview); + const backendCoreOk = responseOk(overview) && overviewBody?.dbReady === true; + channels.push(channelProbe("backend-core-api", backendCoreOk, "dispatch API, provider catalog, task polling, and database-backed CI state", { + ok: responseOk(overview), + dbReady: overviewBody?.dbReady ?? null, + runnerDisposition: asRecord(overview)?.runnerDisposition ?? null, + failureKind: asRecord(overview)?.failureKind ?? null, + detail: backendCoreUnavailable(overview) ? overview : { + status: asRecord(overview)?.status ?? null, + body: overviewBody, + }, + })); + channels.push(channelProbe("database", backendCoreOk, "backend-core task dispatch, provider state, Tekton task polling, and source identity lookup", { + dbReady: overviewBody?.dbReady ?? false, + observedThrough: "backend-core /api/overview", + })); + + const probeScript = [ + "set -euo pipefail", + "printf 'provider_host_ssh=ok\\n'", + "command -v bash >/dev/null", + "command -v docker >/dev/null", + "command -v kubectl >/dev/null", + "test -S /var/run/docker.sock || test -S /run/docker.sock || true", + ].join("\n"); + const sshProbe = await transport.dispatchHostSsh(probeScript, 30_000, 15_000); + channels.push(channelProbe("provider-dispatch", sshProbe.taskId !== null || sshProbe.ok, "backend-core /api/dispatch can create D601 host.ssh tasks", { + taskId: sshProbe.taskId, + status: sshProbe.status, + ok: sshProbe.taskId !== null || sshProbe.ok, + exitCode: sshProbe.exitCode, + stderrTail: sshProbe.stderr.slice(-1200), + })); + channels.push(channelProbe("provider-host-ssh", sshProbe.ok, "D601 source export, registry checks, kubectl/Tekton submission, and artifact summary reads", { + taskId: sshProbe.taskId, + status: sshProbe.status, + exitCode: sshProbe.exitCode, + stdoutTail: sshProbe.stdout.slice(-1200), + stderrTail: sshProbe.stderr.slice(-1200), + raw: sshProbe.ok ? undefined : dispatchPreflightFailure("host.ssh readonly probe", sshProbe).raw, + })); + + const registryOptions = parseArtifactRegistryOptions(["--provider-id", providerId]); + const registryProbe = buildArtifactRegistryReadonlyProbe("health", registryOptions); + const registryDispatch = await transport.dispatchHostSsh(registryProbe.script, Math.max(registryProbe.timeoutMs, 30_000), registryProbe.timeoutMs); + const registryCommand = commandResultFromDispatch(transport.artifactRegistryCommand(registryProbe), transport.commandCwd, registryDispatch); + const registry = artifactRegistryReadonlyResultFromCommand(registryProbe, registryCommand); + const registryRecord = asRecord(registry); + const registryOk = registryRecord?.ok === true || registryRecord?.runtimeApiHealthy === true; + channels.push(channelProbe("artifact-registry", registryOk, "commit-pinned image push and later CD manifest checks", registry)); + + const missingChannels = channels.filter((item) => !item.ok).map((item) => item.channel); + const ready = missingChannels.length === 0; + return { + ok: ready, + runnerDisposition: ready ? "ready" : "infra-blocked", + serviceId: options.serviceId, + commit: options.commit, + providerId, + supportedArtifactPublish: true, + missingChannels, + channels, + registry, + next: ready + ? [ + `bun scripts/cli.ts ci publish-user-service --service ${options.serviceId} --commit ${options.commit} --wait-ms 1200000`, + `later CD must consume ${plannedArtifact.imageRef}; CI itself must not deploy production`, + ] + : [ + "Run from the main-server CLI or use remote frontend transport against a healthy frontend/backend-core path.", + "Restore backend-core/database/provider-gateway/Host SSH connectivity before retrying artifact publication.", + "Use bun scripts/cli.ts artifact-registry health --provider-id D601 to recheck registry reachability after the control bridge is restored.", + ], + boundary: "preflight is read-only: no D601 source export, no Tekton PipelineRun, no image push, no deploy apply, no service restart", + }; +} + async function readArtifactSummaryFromPipelineRun(name: string, context: ArtifactSummaryContext): Promise { const result = await runRemoteKubectlRaw([ "set -euo pipefail", @@ -1274,17 +1524,23 @@ async function publishUserServiceArtifact(config: UniDeskConfig, options: CiPubl const plannedArtifact = artifactSummaryDefaults(summaryContext); const plannedRepoFetchUrl = repoSshUrl(options.repoUrl); if (options.dryRun) { + const preflight = await publishUserServicePreflight(config, options, plannedArtifact, localPublishPreflightTransport); return { - ok: true, - mode: "dry-run", + ok: preflight.ok, + mode: "dry-run-preflight", + runnerDisposition: preflight.runnerDisposition, pipeline: "unidesk-user-service-artifact-publish", namespace: "unidesk-ci", repoUrl: options.repoUrl, commit: options.commit, serviceId: options.serviceId, + supportedArtifactPublish: preflight.supportedArtifactPublish, + missingChannels: preflight.missingChannels, + channels: preflight.channels, + registry: preflight.registry, sourceHostPath: options.sourceHostPath, source: { - ok: true, + ok: preflight.ok, mode: "planned-only", providerId: d601ProviderId, repoUrl: options.repoUrl, @@ -1299,10 +1555,8 @@ async function publishUserServiceArtifact(config: UniDeskConfig, options: CiPubl }, artifact: plannedArtifact.imageRef, artifactSummary: plannedArtifact, - boundary: "dry-run only; no D601 source export, no Tekton submission, no production mutation", - next: [ - `bun scripts/cli.ts ci publish-user-service --service ${options.serviceId} --commit ${options.commit} --wait-ms 1200000`, - ], + boundary: preflight.boundary, + next: preflight.next, }; } const source = options.serviceId === "claudeqq" @@ -1343,6 +1597,85 @@ async function publishUserServiceArtifact(config: UniDeskConfig, options: CiPubl }; } +export async function runCiPublishUserServiceDryRunPreflight( + config: UniDeskConfig, + args: string[], + transport: PublishPreflightTransport, +): Promise> { + const serviceId = requireServiceId(stringOption(args, "--service") ?? stringOption(args, "--service-id")); + const commit = requireFullCommit(stringOption(args, "--commit") ?? stringOption(args, "--revision")); + if (!args.includes("--dry-run")) throw new Error("publish-user-service preflight requires --dry-run"); + if (stringOption(args, "--repo") !== null || stringOption(args, "--repo-url") !== null) { + throw new Error("ci publish-user-service reads source repo from CI.json; edit CI.json instead of using --repo"); + } + const artifact = resolveCatalogArtifact(serviceId); + if (artifact.kind === "source-build" && artifact.serviceId === "backend-core") { + throw new Error("backend-core uses ci publish-backend-core; publish-user-service is for registered user services"); + } + if (artifact.kind === "upstream-image") { + return blockedArtifactResult(artifact, commit, artifact.blockedReason); + } + if (artifact.status === "blocked") { + return blockedArtifactResult(artifact, commit, blockedReason(artifact)); + } + const dockerfile = requireRepoRelativePath(artifact.source.dockerfile, `CI.json.artifacts.${serviceId}.source.dockerfile`); + const boundaryBlock = userServicePublishBoundaryBlock(config, serviceId, commit, artifact); + if (boundaryBlock !== null) return boundaryBlock; + const summaryContext: ArtifactSummaryContext = { + serviceId, + dockerfile, + commit, + repoUrl: artifact.source.repo, + imageRepository: artifact.image.repository, + }; + const plannedArtifact = artifactSummaryDefaults(summaryContext); + const options: CiPublishUserServiceArtifactOptions = { + repoUrl: artifact.source.repo, + commit, + waitMs: numberOption(args, "--wait-ms", 0), + serviceId, + dockerfile, + imageRepository: artifact.image.repository, + sourceHostPath: userServiceArtifactSourceHostPath(serviceId, commit), + dryRun: true, + }; + const preflight = await publishUserServicePreflight(config, options, plannedArtifact, transport); + const plannedRepoFetchUrl = repoSshUrl(options.repoUrl); + return { + ok: preflight.ok, + mode: "dry-run-preflight", + runnerDisposition: preflight.runnerDisposition, + pipeline: "unidesk-user-service-artifact-publish", + namespace: "unidesk-ci", + repoUrl: options.repoUrl, + commit: options.commit, + serviceId: options.serviceId, + supportedArtifactPublish: preflight.supportedArtifactPublish, + missingChannels: preflight.missingChannels, + channels: preflight.channels, + registry: preflight.registry, + sourceHostPath: options.sourceHostPath, + source: { + ok: preflight.ok, + mode: "planned-only", + providerId: d601ProviderId, + repoUrl: options.repoUrl, + repoFetchUrl: plannedRepoFetchUrl, + repoProbeUrl: repoConnectivityProbeUrl(plannedRepoFetchUrl), + commit: options.commit, + serviceId: options.serviceId, + dockerfile: options.dockerfile, + imageRepository: options.imageRepository, + sourceHostPath: options.sourceHostPath, + ...(options.serviceId === "claudeqq" ? { overlay: "UniDesk claudeqq Dockerfile and unidesk-adapter.cjs are injected before Tekton build" } : {}), + }, + artifact: plannedArtifact.imageRef, + artifactSummary: plannedArtifact, + boundary: preflight.boundary, + next: preflight.next, + }; +} + async function runRemoteDevE2ELauncher(options: CiDevE2EOptions): Promise { const scriptTimeoutMs = Math.max(options.scriptTimeoutMs, options.waitMs, 60_000); const remoteTimeoutMs = 45_000; @@ -1727,36 +2060,8 @@ export async function runCiCommand(config: UniDeskConfig, args: string[]): Promi } const repoUrl = artifact.source.repo; const dockerfile = requireRepoRelativePath(artifact.source.dockerfile, `CI.json.artifacts.${serviceId}.source.dockerfile`); - const configService = config.microservices.find((item) => item.id === serviceId); - if (configService !== undefined) { - const isD601K3sService = configService.providerId === d601ProviderId - && configService.development.providerId === d601ProviderId - && configService.deployment.mode === "k3sctl-managed"; - const isD601DirectService = configService.providerId === d601ProviderId - && configService.development.providerId === d601ProviderId - && configService.deployment.mode === "unidesk-direct"; - const isMainServerDirectService = configService.providerId === "main-server" - && configService.development.providerId === "main-server" - && configService.deployment.mode === "unidesk-direct"; - const isMainServerInternalSidecar = configService.providerId === "main-server" - && configService.development.providerId === "main-server" - && configService.deployment.mode === "internal-sidecar"; - if (!isD601K3sService && !isD601DirectService && !isMainServerDirectService && !isMainServerInternalSidecar) { - return { - ok: false, - status: "blocked", - error: "blocked", - serviceId, - commit, - reason: `config.json marks ${serviceId} as ${configService.providerId}/${configService.deployment.mode}, which is outside the reviewed CI artifact producer boundary`, - catalogArtifact: artifact, - configService: { - providerId: configService.providerId, - deploymentMode: configService.deployment.mode, - }, - }; - } - } + const boundaryBlock = userServicePublishBoundaryBlock(config, serviceId, commit, artifact); + if (boundaryBlock !== null) return boundaryBlock; return publishUserServiceArtifact(config, { repoUrl, commit, diff --git a/scripts/src/remote.ts b/scripts/src/remote.ts index f319808d..d553818d 100644 --- a/scripts/src/remote.ts +++ b/scripts/src/remote.ts @@ -6,6 +6,12 @@ import { parseNetworkPerfOptions, runNetworkPerf } from "./network-perf"; import { isSshSkillDiscoveryArgs, parseSshArgs } from "./ssh"; import { codexJudgeQueryAsync, codexOutputQueryAsync, codexTaskQueryAsync, codexTasksQueryAsync } from "./code-queue"; import { runDecisionCenterCommandAsync } from "./decision-center"; +import { + artifactRegistryReadonlyResultFromCommand, + buildArtifactRegistryReadonlyProbe, + parseArtifactRegistryOptions, +} from "./artifact-registry"; +import { runCiPublishUserServiceDryRunPreflight } from "./ci"; export interface RemoteCliOptions { host: string | null; @@ -517,6 +523,113 @@ async function remoteMicroservice(session: FrontendSession, args: string[]): Pro throw new Error("remote microservice command must be: microservice list | status | health | diagnostics | tunnel-self-test | proxy "); } +function commandResultFromFrontendTask(command: string[], task: { status?: string; result?: Record } | undefined) { + const result = task?.result ?? {}; + const stdout = typeof result.stdout === "string" ? result.stdout : ""; + const stderr = typeof result.stderr === "string" ? result.stderr : ""; + return { + command, + cwd: ".", + exitCode: typeof result.exitCode === "number" ? result.exitCode : null, + stdout, + stderr, + signal: null, + timedOut: task?.status !== "succeeded" && stderr.includes("timeout"), + }; +} + +async function dispatchHostSshJson( + session: FrontendSession, + providerId: string, + command: string, + timeoutMs: number, + waitMs = Math.max(timeoutMs + 5000, 20_000), +): Promise<{ dispatch: FetchJsonResult; wait: unknown; task?: { status?: string; result?: Record }; taskId: string | null }> { + const dispatch = await frontendJson(session, "/api/dispatch", { + method: "POST", + body: JSON.stringify({ + providerId, + command: "host.ssh", + payload: { source: "cli-remote-artifact-registry", mode: "exec", command, timeoutMs }, + }), + }); + const taskId = (dispatch as { body?: { taskId?: string } }).body?.taskId ?? ""; + const wait = taskId.length > 0 ? await waitForFrontendTask(session, taskId, waitMs) : null; + const task = (wait as { task?: { status?: string; result?: Record } } | null)?.task; + return { dispatch, wait, task, taskId: taskId.length > 0 ? taskId : null }; +} + +async function remoteArtifactRegistry(session: FrontendSession, args: string[]): Promise { + const action = args[1] ?? "status"; + if (action !== "status" && action !== "health") { + throw new Error("remote frontend transport supports artifact-registry status|health only; use main-server SSH for mutating install/deploy commands"); + } + const options = parseArtifactRegistryOptions(args.slice(2)); + const probe = buildArtifactRegistryReadonlyProbe(action, options); + const dispatched = await dispatchHostSshJson(session, probe.providerId, probe.script, probe.timeoutMs); + const command = ["frontend", "/api/dispatch", probe.providerId, "host.ssh", action]; + const result = commandResultFromFrontendTask(command, dispatched.task); + const registryResult = artifactRegistryReadonlyResultFromCommand(probe, result); + return { + transport: "frontend", + readonly: true, + dispatch: dispatched.dispatch, + wait: dispatched.wait, + result: dispatched.taskId === null + ? { + ok: false, + readonly: true, + installed: false, + healthy: false, + decision: "infra-blocked", + retryable: true, + runnerDisposition: "infra-blocked", + healthyScopes: [], + failedScopes: ["backend-core-api"], + runtimeApiHealthy: false, + channels: [ + { channel: "backend-core-api", ok: false, requiredFor: "frontend /api/dispatch backend-core session creation", detail: dispatched.dispatch }, + { channel: "provider-dispatch", ok: false, requiredFor: "host.ssh task creation", detail: dispatched.dispatch }, + ], + registry: registryResult, + } + : registryResult, + }; +} + +async function remoteCi(session: FrontendSession, config: UniDeskConfig, args: string[]): Promise { + const action = args[1] ?? "status"; + if (action !== "publish-user-service" || !args.includes("--dry-run")) { + throw new Error("remote frontend transport supports only ci publish-user-service --dry-run preflight; real CI publication must run from the controlled main-server CLI after preflight is ready"); + } + return { + transport: "frontend", + readonly: true, + result: await runCiPublishUserServiceDryRunPreflight(config, args.slice(1), { + coreFetch: (path, init) => frontendJson(session, path, init === undefined ? undefined : { + method: init.method, + body: init.body === undefined ? undefined : JSON.stringify(init.body), + }, 12_000, init?.maxResponseBytes ?? 500_000), + dispatchHostSsh: async (command, waitMs, remoteTimeoutMs) => { + const dispatched = await dispatchHostSshJson(session, "D601", command, remoteTimeoutMs, waitMs); + const task = dispatched.task; + const result = task?.result ?? {}; + return { + ok: task?.status === "succeeded" && (typeof result.exitCode !== "number" || result.exitCode === 0) && dispatched.taskId !== null, + taskId: dispatched.taskId, + status: task?.status ?? null, + stdout: typeof result.stdout === "string" ? result.stdout : "", + stderr: typeof result.stderr === "string" ? result.stderr : "", + exitCode: typeof result.exitCode === "number" ? result.exitCode : null, + raw: task ?? dispatched.wait ?? dispatched.dispatch, + }; + }, + commandCwd: ".", + artifactRegistryCommand: (probe) => ["frontend", "/api/dispatch", probe.providerId, "host.ssh", probe.action], + }), + }; +} + async function remoteCodeQueue(session: FrontendSession, args: string[]): Promise { const action = args[1] ?? "task"; if (action !== "task" && action !== "summary" && action !== "show" && action !== "tasks" && action !== "overview" && action !== "output" && action !== "judge") { @@ -610,7 +723,7 @@ async function runRemoteCliOverFrontend(options: RemoteCliOptions, config: UniDe emitRemoteJson(name, { transport: "frontend", baseUrl: session.baseUrl, - commands: ["debug health", "debug dispatch", "debug task", "ssh ", "ssh skills", "microservice list", "microservice status ", "microservice health ", "microservice diagnostics ", "microservice tunnel-self-test ", "microservice proxy ", "decision upload ", "decision list", "decision show ", "codex task ", "codex tasks", "codex judge --attempt N", "network perf"], + commands: ["debug health", "debug dispatch", "debug task", "ssh ", "ssh skills", "artifact-registry status|health", "ci publish-user-service --dry-run", "microservice list", "microservice status ", "microservice health ", "microservice diagnostics ", "microservice tunnel-self-test ", "microservice proxy ", "decision upload ", "decision list", "decision show ", "codex task ", "codex tasks", "codex judge --attempt N", "network perf"], }); return 0; } @@ -630,6 +743,14 @@ async function runRemoteCliOverFrontend(options: RemoteCliOptions, config: UniDe emitRemoteJson(name, await remoteMicroservice(session, args)); return 0; } + if (top === "artifact-registry") { + emitRemoteJson(name, await remoteArtifactRegistry(session, args)); + return 0; + } + if (top === "ci") { + emitRemoteJson(name, await remoteCi(session, config, args)); + return 0; + } if (top === "decision" || top === "decision-center") { const fetcher = (path: string, init?: { method?: string; body?: unknown }): Promise => { const requestInit = init === undefined