fix: add backend-core publish preflight

This commit is contained in:
Codex
2026-05-21 09:39:25 +00:00
parent 1a722412cd
commit 43ce0ee051
6 changed files with 449 additions and 63 deletions
+5 -2
View File
@@ -126,6 +126,8 @@ The CI user-service artifact task must follow these rules:
The same command also has a read-only preflight mode: `bun scripts/cli.ts ci publish-user-service --service <id> --commit <full-sha> --dry-run`. That mode may be called from the main server or through remote frontend passthrough, and it must return `runnerDisposition`, `missingChannels`, `missingControlChannels`, `channels`, `controlChannels`, `registry`, `artifactSummary`, `controlledPublish`, `boundary` and `next` without creating a PipelineRun or pushing an image. `missingChannels` is the detailed probe list, while `missingControlChannels` is the runner-facing domain list using only `backend-core`, `database`, `provider` and `registry`. `controlledPublish` must point at the real producer boundary: D601, namespace `unidesk-ci`, PipelineRun `unidesk-user-service-artifact-publish`, and the non-dry-run `ci publish-user-service` command shape. If backend-core, database, provider or registry channels are missing, the result must be structured `infra-blocked`, not a bare container lookup failure. The same command also has a read-only preflight mode: `bun scripts/cli.ts ci publish-user-service --service <id> --commit <full-sha> --dry-run`. That mode may be called from the main server or through remote frontend passthrough, and it must return `runnerDisposition`, `missingChannels`, `missingControlChannels`, `channels`, `controlChannels`, `registry`, `artifactSummary`, `controlledPublish`, `boundary` and `next` without creating a PipelineRun or pushing an image. `missingChannels` is the detailed probe list, while `missingControlChannels` is the runner-facing domain list using only `backend-core`, `database`, `provider` and `registry`. `controlledPublish` must point at the real producer boundary: D601, namespace `unidesk-ci`, PipelineRun `unidesk-user-service-artifact-publish`, and the non-dry-run `ci publish-user-service` command shape. If backend-core, database, provider or registry channels are missing, the result must be structured `infra-blocked`, not a bare container lookup failure.
`ci publish-backend-core --commit <full-sha> --dry-run` is the equivalent backend-core preflight. It must stay read-only and report `targetCommit`, `sourceRepo`, `ciRunner`, `registryTarget`, `wouldBuildOnD601`, `blockedScopes` and `recommendedAction`, plus the same control-channel diagnostics as user-service preflight. It must also expose `sourceAuth` for the D601 GitHub SSH deploy identity and provider-gateway egress proxy, `artifactRequirements` for the required labels and digest header, and `devApplyPath` for the standard next hop: publish artifact, verify `artifactSummary.digest` / `artifactSummary.digestRef` and labels, then run `deploy apply --env dev --service backend-core --commit <full-sha>` as pull-only CD. The dry-run must not export source, create a Tekton PipelineRun, compile Rust, build or push an image, call `deploy apply`, restart services, or suggest production backend-core apply as the default next step.
Publish a Baidu Netdisk artifact: Publish a Baidu Netdisk artifact:
```bash ```bash
@@ -198,13 +200,14 @@ Run CI manually for a commit:
bun scripts/cli.ts ci run --revision <commit> bun scripts/cli.ts ci run --revision <commit>
``` ```
Publish a backend-core artifact for production CD: Preflight and publish a backend-core artifact for dev CD:
```bash ```bash
bun scripts/cli.ts ci publish-backend-core --commit <full-sha> --dry-run
bun scripts/cli.ts ci publish-backend-core --commit <full-sha> --wait-ms 1200000 bun scripts/cli.ts ci publish-backend-core --commit <full-sha> --wait-ms 1200000
``` ```
This command creates the `unidesk-backend-core-artifact-publish` Tekton PipelineRun. It is a CI producer action only: it may build and push `127.0.0.1:5000/unidesk/backend-core:<commit>`, but it must not recreate dev or prod runtime containers. Dev deployment is triggered separately with `deploy apply --env dev --service backend-core --commit <full-sha>`; production deployment is triggered separately with `deploy apply --env prod --service backend-core --commit <full-sha>`. The dry-run is the read-only gate. The publish command creates the `unidesk-backend-core-artifact-publish` Tekton PipelineRun. It is a CI producer action only: it may build and push `127.0.0.1:5000/unidesk/backend-core:<commit>`, but it must not recreate dev or prod runtime containers. Dev deployment is triggered separately with `deploy apply --env dev --service backend-core --commit <full-sha>` after digest and label verification.
Publish a user-service artifact: Publish a user-service artifact:
+10 -8
View File
@@ -95,14 +95,16 @@ Use this sequence for backend-core Rust and frontend dev work:
2. Run local non-Rust checks on the master server, for example `bun scripts/cli.ts check --files --scripts-typecheck --compose --logs`. 2. Run local non-Rust checks on the master server, for example `bun scripts/cli.ts check --files --scripts-typecheck --compose --logs`.
3. Commit and push the code to `origin master`; `deploy apply --env dev` cannot deploy unpushed local changes. 3. Commit and push the code to `origin master`; `deploy apply --env dev` cannot deploy unpushed local changes.
4. Update `deploy.json` `environments.dev.services` so `backend-core` and `frontend` point at the pushed commit, then commit and push that manifest update. 4. Update `deploy.json` `environments.dev.services` so `backend-core` and `frontend` point at the pushed commit, then commit and push that manifest update.
5. Publish the artifact first: `bun scripts/cli.ts ci publish-backend-core --commit <full-sha> --wait-ms 1200000` for backend-core, or `bun scripts/cli.ts ci publish-user-service --service <frontend|decision-center|mdtodo|claudeqq|code-queue> --commit <full-sha> --wait-ms 1200000` for user services. 5. Preflight backend-core publication: `bun scripts/cli.ts ci publish-backend-core --commit <full-sha> --dry-run`. The result must have no `blockedScopes`, `wouldBuildOnD601=true`, D601 `unidesk-ci` Tekton runner metadata, D601 registry target `127.0.0.1:5000/unidesk/backend-core`, required labels for service id/source repo/source commit/Dockerfile, and `recommendedAction` pointing to the real publish command.
6. Run `bun scripts/cli.ts deploy apply --env dev --service backend-core --dry-run` and confirm `artifactConsumer.noRuntimeSourceBuild=true`, `build.willCompile=false`, the registry image is `127.0.0.1:5000/unidesk/backend-core:<commit>`, and the target is `unidesk-dev/backend-core-dev`. 6. Publish the artifact first: `bun scripts/cli.ts ci publish-backend-core --commit <full-sha> --wait-ms 1200000` for backend-core, or `bun scripts/cli.ts ci publish-user-service --service <frontend|decision-center|mdtodo|claudeqq|code-queue> --commit <full-sha> --wait-ms 1200000` for user services.
7. Run `bun scripts/cli.ts deploy apply --env dev --service backend-core` and observe the returned job with `bun scripts/cli.ts job status <jobId> --tail-bytes 30000`. 7. Verify the publish output contains non-empty `artifactSummary.digest` and `artifactSummary.digestRef`, and that the pushed image labels match `backend-core`, the source repo, source commit and Dockerfile. This verification can use the publish output and the D601 registry manifest HEAD; it must not rebuild.
8. Run `bun scripts/cli.ts deploy apply --env dev --service <frontend|decision-center|mdtodo|claudeqq|code-queue>` and observe the job the same way; this must consume the registry artifact and verify live deploy metadata through the service health path. 8. Run `bun scripts/cli.ts deploy apply --env dev --service backend-core --dry-run` and confirm `artifactConsumer.noRuntimeSourceBuild=true`, `build.willCompile=false`, the registry image is `127.0.0.1:5000/unidesk/backend-core:<commit>`, and the target is `unidesk-dev/backend-core-dev`.
9. If the dev service catalog changes, deploy the pushed `k3sctl-adapter` commit through the controlled local manifest exception, then verify `/api/control-plane` lists `k3s/dev/unidesk-dev-core.k3s.json`. 9. Run `bun scripts/cli.ts deploy apply --env dev --service backend-core` and observe the returned job with `bun scripts/cli.ts job status <jobId> --tail-bytes 30000`.
10. Rebuild or verify `dev-frontend-proxy` on the main server with `bun scripts/cli.ts server rebuild dev-frontend-proxy` when the proxy config or port changes. 10. Run `bun scripts/cli.ts deploy apply --env dev --service <frontend|decision-center|mdtodo|claudeqq|code-queue>` and observe the job the same way; this must consume the registry artifact and verify live deploy metadata through the service health path.
11. Manually test `http://74.48.78.17:18083/` and the dev health endpoints. 11. If the dev service catalog changes, deploy the pushed `k3sctl-adapter` commit through the controlled local manifest exception, then verify `/api/control-plane` lists `k3s/dev/unidesk-dev-core.k3s.json`.
12. Run D601 CI for the commit and the dev smoke runner: `bun scripts/cli.ts ci run --revision <commit> --wait-ms <ms>` and `bun scripts/cli.ts ci run-dev-e2e --wait-ms <ms>`. When Code Queue behavior changes, update the `code-queue` entry in `environments.dev.services` to the pushed commit before running dev artifact validation or the temporary dev smoke. 12. Rebuild or verify `dev-frontend-proxy` on the main server with `bun scripts/cli.ts server rebuild dev-frontend-proxy` when the proxy config or port changes.
13. Manually test `http://74.48.78.17:18083/` and the dev health endpoints.
14. Run D601 CI for the commit and the dev smoke runner: `bun scripts/cli.ts ci run --revision <commit> --wait-ms <ms>` and `bun scripts/cli.ts ci run-dev-e2e --wait-ms <ms>`. When Code Queue behavior changes, update the `code-queue` entry in `environments.dev.services` to the pushed commit before running dev artifact validation or the temporary dev smoke.
## Validation Commands ## Validation Commands
@@ -0,0 +1,131 @@
import {
artifactRegistryReadonlyResultFromCommand,
buildArtifactRegistryReadonlyProbe,
parseArtifactRegistryOptions,
} from "./src/artifact-registry";
import type { CommandResult } from "./src/command";
type JsonRecord = Record<string, unknown>;
function assertCondition(condition: unknown, message: string, detail: unknown = {}): void {
if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`);
}
function asRecord(value: unknown, label: string): JsonRecord {
assertCondition(typeof value === "object" && value !== null && !Array.isArray(value), `${label} must be an object`, { value });
return value as JsonRecord;
}
function asStringArray(value: unknown, label: string): string[] {
assertCondition(Array.isArray(value), `${label} must be an array`, { value });
return (value as unknown[]).map(String);
}
function command(overrides: Partial<CommandResult>): CommandResult {
return {
command: ["frontend", "/api/dispatch", "D601", "host.ssh", "health"],
cwd: ".",
exitCode: 0,
stdout: "",
stderr: "",
signal: null,
timedOut: false,
...overrides,
};
}
const options = parseArtifactRegistryOptions(["--provider-id", "D601"]);
const probe = buildArtifactRegistryReadonlyProbe("health", options);
assertCondition(Buffer.byteLength(probe.script, "utf8") < 4000, "readonly registry probe script must fit provider-gateway host.ssh command length limit", {
bytes: Buffer.byteLength(probe.script, "utf8"),
remoteCommandShape: probe.remoteCommandShape,
});
const missing = asRecord(artifactRegistryReadonlyResultFromCommand(probe, command({
exitCode: 1,
stderr: "provider does not declare host.ssh capability: D601\n",
})), "missing provider ssh result");
assertCondition(missing.ok === false, "missing provider ssh command should fail", missing);
assertCondition(missing.failureClassification === "provider-ssh-command-missing", "missing provider ssh command classification mismatch", missing);
assertCondition(asStringArray(missing.failedScopes, "missing.failedScopes").includes("provider-ssh-command"), "missing provider ssh command scope should be reported", missing);
assertCondition(typeof missing.recommendedAction === "string" && missing.recommendedAction.length > 0, "missing provider ssh command should include recommendedAction", missing);
assertCondition(typeof missing.remoteCommandShape === "string" && missing.remoteCommandShape.includes("bash -lc"), "missing provider ssh command should include remoteCommandShape", missing);
const commandShape = asRecord(artifactRegistryReadonlyResultFromCommand(probe, command({
exitCode: 1,
stderr: "Error: host SSH command is too long: 4039 bytes\n",
})), "command shape result");
assertCondition(commandShape.ok === false, "oversized host.ssh command should fail", commandShape);
assertCondition(commandShape.failureClassification === "ssh-helper-command-shape-incompatible", "oversized host.ssh command classification mismatch", commandShape);
assertCondition(asStringArray(commandShape.failedScopes, "commandShape.failedScopes").includes("ssh-helper-command-shape"), "oversized host.ssh command scope should be reported", commandShape);
const timeout = asRecord(artifactRegistryReadonlyResultFromCommand(probe, command({
exitCode: null,
stderr: "host.ssh task task_contract did not finish within 30000ms\n",
timedOut: true,
})), "timeout result");
assertCondition(timeout.ok === false, "remote timeout should fail", timeout);
assertCondition(timeout.failureClassification === "remote-command-timeout", "remote timeout classification mismatch", timeout);
assertCondition(asStringArray(timeout.failedScopes, "timeout.failedScopes").includes("remote-command-timeout"), "remote timeout scope should be reported", timeout);
assertCondition(timeout.retryable === true, "remote timeout should be retryable", timeout);
const successStdout = [
"readonly=true",
"unit_path=/etc/systemd/system/unidesk-artifact-registry.service",
"compose_path=/home/ubuntu/.unidesk/artifact-registry/compose.yml",
"config_path=/home/ubuntu/.unidesk/artifact-registry/config.yml",
"storage_path=/home/ubuntu/.unidesk/registry-storage",
"unit_exists=true",
"compose_exists=true",
"config_exists=true",
"storage_exists=true",
"systemctl_available=true",
"unit_active=active",
"unit_enabled=enabled",
"docker_available=true",
"container_running=true",
"container_status=running",
"container_image=registry:2.8.3",
"container_restart_policy=unless-stopped",
"listener_count=1",
"bad_listener_count=0",
"loopback_only=true",
"curl_available=true",
"v2_http_code=200",
"config_hash=contract-config",
"compose_hash=contract-compose",
"unit_hash=contract-unit",
"expected_config_hash=contract-config",
"expected_compose_hash=contract-compose",
"expected_unit_hash=contract-unit",
"config_hash_matches=true",
"compose_hash_matches=true",
"unit_hash_matches=true",
"image_matches=true",
"",
].join("\n");
const success = asRecord(artifactRegistryReadonlyResultFromCommand(probe, command({
stdout: successStdout,
})), "success result");
assertCondition(success.ok === true, "healthy registry command should pass", success);
assertCondition(success.failureClassification === null, "healthy registry should not have a failure classification", success);
assertCondition(asStringArray(success.failedScopes, "success.failedScopes").length === 0, "healthy registry should not have failed scopes", success);
assertCondition(success.recommendedAction === "none", "healthy registry recommendedAction should be none", success);
assertCondition(success.remoteCommandShape === probe.remoteCommandShape, "healthy registry should echo remote command shape", success);
process.stdout.write(`${JSON.stringify({
ok: true,
checks: [
"provider-ssh-command missing is classified distinctly",
"oversized host.ssh command shape is classified distinctly",
"remote host.ssh timeout is classified distinctly",
"successful registry readonly probe has no failed scopes",
],
classifications: {
missing: missing.failureClassification,
commandShape: commandShape.failureClassification,
timeout: timeout.failureClassification,
success: success.failureClassification,
},
}, null, 2)}\n`);
@@ -0,0 +1,152 @@
import { readConfig } from "./src/config";
import { runCiPublishBackendCoreDryRunPreflight, type PublishPreflightTransport } from "./src/ci";
type JsonRecord = Record<string, unknown>;
function assertCondition(condition: unknown, message: string, detail: unknown = {}): void {
if (!condition) throw new Error(`${message}: ${JSON.stringify(detail)}`);
}
function asRecord(value: unknown, label: string): JsonRecord {
assertCondition(typeof value === "object" && value !== null && !Array.isArray(value), `${label} must be an object`, { value });
return value as JsonRecord;
}
const commit = "0123456789abcdef0123456789abcdef01234567";
const config = readConfig();
const readyTransport: PublishPreflightTransport = {
kind: "remote-frontend",
remoteHost: "https://example.invalid",
coreFetch: async () => ({
ok: true,
status: 200,
body: {
ok: true,
dbReady: true,
},
}),
dispatchHostSsh: async (command) => {
if (command.includes("kubectl get namespace unidesk-ci")) {
return {
ok: true,
taskId: "task-ci-runner",
status: "succeeded",
stdout: [
"kubectl=ok",
"docker=ok",
"docker_socket=true",
"namespace=true",
"tekton_pipeline=true",
"tekton_task=true",
"service_account=true",
"pvc=true",
"source_parent_directory=true",
].join("\n"),
stderr: "",
exitCode: 0,
raw: {},
};
}
if (command.includes("v2/")) {
return {
ok: true,
taskId: "task-registry",
status: "succeeded",
stdout: [
"systemctl_available=true",
"docker_available=true",
"curl_available=true",
"unit_exists=true",
"unit_active=active",
"unit_enabled=enabled",
"compose_exists=true",
"config_exists=true",
"storage_exists=true",
"container_running=true",
"container_status=running",
"container_image=registry:2.8.3",
"container_restart_policy=always",
"listener_count=1",
"bad_listener_count=0",
"loopback_only=true",
"v2_http_code=200",
"image_matches=true",
"config_hash_matches=true",
"compose_hash_matches=true",
"unit_hash_matches=true",
].join("\n"),
stderr: "",
exitCode: 0,
raw: {},
};
}
return {
ok: true,
taskId: "task-host-ssh",
status: "succeeded",
stdout: "provider_host_ssh=ok\n",
stderr: "",
exitCode: 0,
raw: {},
};
},
commandCwd: "/workspace/unidesk",
artifactRegistryCommand: (probe) => ["mock", probe.action, probe.remoteCommandShape],
};
async function main(): Promise<void> {
const result = await runCiPublishBackendCoreDryRunPreflight(config, [
"publish-backend-core",
"--commit",
commit,
"--dry-run",
], readyTransport);
const record = asRecord(result, "backend-core preflight");
const source = asRecord(record.source, "source");
const sourceAuth = asRecord(record.sourceAuth, "sourceAuth");
const d601Ci = asRecord(record.d601Ci, "d601Ci");
const artifactSummary = asRecord(record.artifactSummary, "artifactSummary");
const artifactRequirements = asRecord(record.artifactRequirements, "artifactRequirements");
const requiredLabels = asRecord(artifactRequirements.requiredLabels, "requiredLabels");
const devApplyPath = asRecord(record.devApplyPath, "devApplyPath");
const controlledPublish = asRecord(record.controlledPublish, "controlledPublish");
const blockedScopes = Array.isArray(record.blockedScopes) ? record.blockedScopes as string[] : [];
assertCondition(record.ok === true, "ready backend-core preflight should pass", record);
assertCondition(record.mode === "dry-run-preflight", "backend-core dry-run must be preflight mode", record);
assertCondition(record.targetCommit === commit, "target commit should be surfaced", record);
assertCondition(record.sourceRepo === "https://github.com/pikasTech/unidesk", "source repo should come from CI.json", record);
assertCondition(record.registryTarget === "127.0.0.1:5000/unidesk/backend-core", "registry target should be D601 backend-core repository", record);
assertCondition(record.wouldBuildOnD601 === true, "preflight should state real publish would build on D601", record);
assertCondition(record.dryRunBuildStarted === false, "dry-run must not start a build", record);
assertCondition(blockedScopes.length === 0, "ready preflight should have no blocked scopes", record);
assertCondition(source.repoFetchUrl === "git@github.com:pikasTech/unidesk.git", "source auth should use GitHub SSH fetch URL", source);
assertCondition(sourceAuth.egressProxy === "http://127.0.0.1:18789", "source auth should name provider egress proxy", sourceAuth);
assertCondition(d601Ci.wouldBuildOnD601 === true, "D601 CI runner preflight should expose D601 build boundary", d601Ci);
assertCondition(artifactSummary.imageRef === `127.0.0.1:5000/unidesk/backend-core:${commit}`, "artifact should be commit-pinned", artifactSummary);
assertCondition(artifactSummary.digest === null, "dry-run must not fake digest", artifactSummary);
assertCondition(requiredLabels["unidesk.ai/service-id"] === "backend-core", "service id label should be required", requiredLabels);
assertCondition(requiredLabels["unidesk.ai/source-commit"] === commit, "source commit label should be required", requiredLabels);
assertCondition(devApplyPath.pullOnly === true, "dev apply path should be pull-only", devApplyPath);
assertCondition(String(devApplyPath.apply ?? "").includes("deploy apply --env dev --service backend-core"), "dev apply command should be surfaced", devApplyPath);
assertCondition(!String(devApplyPath.apply ?? "").includes("--env prod"), "dev apply path must not point to prod", devApplyPath);
assertCondition(controlledPublish.environment === "D601", "controlled publish should name D601", controlledPublish);
assertCondition(String(record.boundary ?? "").includes("no Rust compile"), "boundary should explicitly forbid Rust compile during dry-run", record);
assertCondition(String(record.recommendedAction ?? "").includes("ci publish-backend-core"), "recommended action should name real publish command", record);
process.stdout.write(`${JSON.stringify({
ok: true,
checks: [
"backend-core dry-run exposes target commit, source repo, D601 runner and registry target",
"backend-core dry-run reports wouldBuildOnD601 without starting a build",
"artifact labels and digest requirements are visible without faking a digest",
"standard path is publish artifact, verify labels/digest, then dev pull-only apply",
],
}, null, 2)}\n`);
}
if (import.meta.main) {
await main();
}
+105 -51
View File
@@ -56,8 +56,17 @@ export interface ArtifactRegistryReadonlyProbe {
timeoutMs: number; timeoutMs: number;
healthMode: boolean; healthMode: boolean;
options: ArtifactRegistryOptions; options: ArtifactRegistryOptions;
remoteCommandShape: string;
} }
export type ArtifactRegistryFailureClassification =
| "local-docker-required"
| "provider-ssh-command-missing"
| "ssh-helper-command-shape-incompatible"
| "remote-command-timeout"
| "registry-not-installed"
| "registry-unhealthy";
const defaultOptions: ArtifactRegistryOptions = { const defaultOptions: ArtifactRegistryOptions = {
environment: null, environment: null,
providerId: "D601", providerId: "D601",
@@ -1033,10 +1042,6 @@ container=${shellQuote(options.containerName)}
expected_image=${shellQuote(options.image)} expected_image=${shellQuote(options.image)}
port=${options.port} port=${options.port}
kv readonly true kv readonly true
kv unit_path "$unit"
kv compose_path "$compose"
kv config_path "$config"
kv storage_path "$storage"
kv unit_exists "$(bool_file "$unit")" kv unit_exists "$(bool_file "$unit")"
kv compose_exists "$(bool_file "$compose")" kv compose_exists "$(bool_file "$compose")"
kv config_exists "$(bool_file "$config")" kv config_exists "$(bool_file "$config")"
@@ -1092,9 +1097,6 @@ unit_hash="$(hash_file "$unit")"
kv config_hash "$config_hash" kv config_hash "$config_hash"
kv compose_hash "$compose_hash" kv compose_hash "$compose_hash"
kv unit_hash "$unit_hash" kv unit_hash "$unit_hash"
kv expected_config_hash ${shellQuote(hashes[bundle.paths.config] ?? "")}
kv expected_compose_hash ${shellQuote(hashes[bundle.paths.compose] ?? "")}
kv expected_unit_hash ${shellQuote(hashes[bundle.paths.unit] ?? "")}
kv config_hash_matches "$([ -n "$config_hash" ] && [ "$config_hash" = ${shellQuote(hashes[bundle.paths.config] ?? "")} ] && printf true || printf false)" kv config_hash_matches "$([ -n "$config_hash" ] && [ "$config_hash" = ${shellQuote(hashes[bundle.paths.config] ?? "")} ] && printf true || printf false)"
kv compose_hash_matches "$([ -n "$compose_hash" ] && [ "$compose_hash" = ${shellQuote(hashes[bundle.paths.compose] ?? "")} ] && printf true || printf false)" kv compose_hash_matches "$([ -n "$compose_hash" ] && [ "$compose_hash" = ${shellQuote(hashes[bundle.paths.compose] ?? "")} ] && printf true || printf false)"
kv unit_hash_matches "$([ -n "$unit_hash" ] && [ "$unit_hash" = ${shellQuote(hashes[bundle.paths.unit] ?? "")} ] && printf true || printf false)" kv unit_hash_matches "$([ -n "$unit_hash" ] && [ "$unit_hash" = ${shellQuote(hashes[bundle.paths.unit] ?? "")} ] && printf true || printf false)"
@@ -1115,6 +1117,56 @@ function asBool(value: string | undefined): boolean {
return value === "true"; return value === "true";
} }
function registryRecommendedAction(classification: ArtifactRegistryFailureClassification | null): string {
if (classification === null) return "none";
if (classification === "local-docker-required") return "run the read-only check through --main-server-ip <host> or from a main-server CLI with backend-core available";
if (classification === "provider-ssh-command-missing") return "restore D601 provider-gateway host.ssh dispatch/capability before retrying artifact registry health";
if (classification === "ssh-helper-command-shape-incompatible") return "upgrade the CLI/control-plane host.ssh helper shape so it can run bash -lc readonly probes";
if (classification === "remote-command-timeout") return "rerun artifact-registry health and inspect D601 host.ssh latency if the timeout repeats";
if (classification === "registry-not-installed") return "install the D601 artifact registry through the controlled artifact-registry install path, then rerun health";
return "restore the D601 artifact registry service/container/API until registry health passes";
}
function readonlyRemoteCommandShape(action: "status" | "health", options: ArtifactRegistryOptions): string {
return `host.ssh provider=${options.providerId} mode=exec argv=bash -lc <artifact-registry-${action}-readonly-script> timeoutMs=${options.timeoutMs}`;
}
function classifyProviderSshCommandFailure(command: CommandResult): ArtifactRegistryFailureClassification {
const output = `${command.stderr}\n${command.stdout}`.toLowerCase();
if (command.timedOut || output.includes("timed out") || output.includes("timeout")) return "remote-command-timeout";
if (output.includes("no such container: unidesk-backend-core") || output.includes("no such container: unidesk-database") || output.includes("backend-core bridge unavailable")) {
return "local-docker-required";
}
if (output.includes("provider does not declare host.ssh") || output.includes("provider is offline") || output.includes("host.ssh capability")) {
return "provider-ssh-command-missing";
}
if (output.includes("remote frontend transport does not support") || output.includes("ssh helper") || output.includes("unsupported command") || output.includes("bad substitution") || output.includes("command is too long")) {
return "ssh-helper-command-shape-incompatible";
}
if (command.exitCode === 126 || command.exitCode === 127 || output.includes("command not found") || output.includes("exec format error")) {
return "ssh-helper-command-shape-incompatible";
}
return "provider-ssh-command-missing";
}
function providerSshCommandFailureScopes(classification: ArtifactRegistryFailureClassification): string[] {
const scopes = ["provider-ssh-command"];
if (classification === "remote-command-timeout") scopes.push("remote-command-timeout");
if (classification === "local-docker-required") scopes.push("local-docker-control-plane");
if (classification === "ssh-helper-command-shape-incompatible") scopes.push("ssh-helper-command-shape");
return scopes;
}
function registryHealthFailureClassification(checks: Record<string, boolean>, failedScopes: string[]): ArtifactRegistryFailureClassification | null {
if (failedScopes.length === 0) return null;
const installed = checks.unitExists === true
|| checks.composeExists === true
|| checks.configExists === true
|| checks.storageExists === true
|| checks.containerRunning === true;
return installed ? "registry-unhealthy" : "registry-not-installed";
}
function registryHealthDecision(checks: Record<string, boolean>, commandOk: boolean): Record<string, unknown> { function registryHealthDecision(checks: Record<string, boolean>, commandOk: boolean): Record<string, unknown> {
const scopeChecks: Array<[string, boolean]> = [ const scopeChecks: Array<[string, boolean]> = [
["systemd", checks.systemctlAvailable === true && checks.unitExists === true && checks.unitActive === true], ["systemd", checks.systemctlAvailable === true && checks.unitExists === true && checks.unitActive === true],
@@ -1129,11 +1181,14 @@ function registryHealthDecision(checks: Record<string, boolean>, commandOk: bool
const healthyScopes = scopeChecks.filter(([, ok]) => ok).map(([scope]) => scope); const healthyScopes = scopeChecks.filter(([, ok]) => ok).map(([scope]) => scope);
const failedScopes = scopeChecks.filter(([, ok]) => !ok).map(([scope]) => scope); const failedScopes = scopeChecks.filter(([, ok]) => !ok).map(([scope]) => scope);
const runtimeApiHealthy = checks.containerRunning === true && checks.loopbackOnly === true && checks.v2Ok === true; const runtimeApiHealthy = checks.containerRunning === true && checks.loopbackOnly === true && checks.v2Ok === true;
const failureClassification = commandOk ? registryHealthFailureClassification(checks, failedScopes) : "provider-ssh-command-missing";
return { return {
decision: commandOk && failedScopes.length === 0 ? "healthy" : commandOk || runtimeApiHealthy ? "service-degraded" : "retryable-transient", decision: commandOk && failedScopes.length === 0 ? "healthy" : commandOk || runtimeApiHealthy ? "service-degraded" : "retryable-transient",
retryable: true, retryable: true,
failureClassification,
healthyScopes, healthyScopes,
failedScopes, failedScopes,
recommendedAction: registryRecommendedAction(failureClassification),
runtimeApiHealthy, runtimeApiHealthy,
}; };
} }
@@ -1273,8 +1328,40 @@ function runRemoteScript(options: ArtifactRegistryOptions, script: string, timeo
return runCommand(command, repoRoot, { timeoutMs }); return runCommand(command, repoRoot, { timeoutMs });
} }
function readonlyCommandFailureResult(
options: ArtifactRegistryOptions,
command: CommandResult,
action: "status" | "health",
): Record<string, unknown> {
const bundle = renderBundle(options);
const failureClassification = classifyProviderSshCommandFailure(command);
return {
ok: false,
readonly: true,
installed: false,
healthy: false,
decision: failureClassification === "local-docker-required" ? "infra-blocked" : "retryable-transient",
retryable: failureClassification !== "ssh-helper-command-shape-incompatible",
failureClassification,
recommendedAction: registryRecommendedAction(failureClassification),
remoteCommandShape: readonlyRemoteCommandShape(action, options),
healthyScopes: [],
failedScopes: providerSshCommandFailureScopes(failureClassification),
runtimeApiHealthy: false,
checks: {},
expected: {
endpoint: `http://${options.host}:${options.port}`,
image: options.image,
paths: bundle.paths,
},
command: artifactRegistryCommandTail(command),
};
}
function statusFromValues(options: ArtifactRegistryOptions, values: Record<string, string>, command: CommandResult, healthMode: boolean): Record<string, unknown> { function statusFromValues(options: ArtifactRegistryOptions, values: Record<string, string>, command: CommandResult, healthMode: boolean): Record<string, unknown> {
const commandOk = command.exitCode === 0 && !command.timedOut; const commandOk = command.exitCode === 0 && !command.timedOut;
const bundle = renderBundle(options);
const hashes = Object.fromEntries(bundle.files.map((item) => [item.path, item.sha256]));
const checks = { const checks = {
systemctlAvailable: asBool(values.systemctl_available), systemctlAvailable: asBool(values.systemctl_available),
dockerAvailable: asBool(values.docker_available), dockerAvailable: asBool(values.docker_available),
@@ -1316,12 +1403,13 @@ function statusFromValues(options: ArtifactRegistryOptions, values: Record<strin
installed, installed,
healthy, healthy,
...decision, ...decision,
remoteCommandShape: readonlyRemoteCommandShape(healthMode ? "health" : "status", options),
checks, checks,
observed: { observed: {
unit: { path: values.unit_path, active: values.unit_active, enabled: values.unit_enabled }, unit: { path: bundle.paths.unit, active: values.unit_active, enabled: values.unit_enabled },
compose: { path: values.compose_path, sha256: values.compose_hash }, compose: { path: bundle.paths.compose, sha256: values.compose_hash ?? null },
config: { path: values.config_path, sha256: values.config_hash }, config: { path: bundle.paths.config, sha256: values.config_hash ?? null },
storage: { path: values.storage_path }, storage: { path: bundle.paths.storage },
container: { container: {
name: options.containerName, name: options.containerName,
running: values.container_running, running: values.container_running,
@@ -1337,9 +1425,9 @@ function statusFromValues(options: ArtifactRegistryOptions, values: Record<strin
registryApi: { v2HttpCode: values.v2_http_code }, registryApi: { v2HttpCode: values.v2_http_code },
}, },
expected: { expected: {
unitHash: values.expected_unit_hash, unitHash: hashes[bundle.paths.unit] ?? "",
composeHash: values.expected_compose_hash, composeHash: hashes[bundle.paths.compose] ?? "",
configHash: values.expected_config_hash, configHash: hashes[bundle.paths.config] ?? "",
image: options.image, image: options.image,
endpoint: `http://${options.host}:${options.port}`, endpoint: `http://${options.host}:${options.port}`,
}, },
@@ -1352,24 +1440,7 @@ function runReadonlyStatus(options: ArtifactRegistryOptions, healthMode: boolean
const script = statusScript(options, bundle); const script = statusScript(options, bundle);
const result = runRemoteScript(options, script); const result = runRemoteScript(options, script);
if (result.exitCode !== 0 || result.timedOut) { if (result.exitCode !== 0 || result.timedOut) {
return { return readonlyCommandFailureResult(options, result, healthMode ? "health" : "status");
ok: false,
readonly: true,
installed: false,
healthy: false,
decision: "retryable-transient",
retryable: true,
healthyScopes: [],
failedScopes: ["provider-ssh-command"],
runtimeApiHealthy: false,
checks: {},
expected: {
endpoint: `http://${options.host}:${options.port}`,
image: options.image,
paths: bundle.paths,
},
command: artifactRegistryCommandTail(result),
};
} }
return statusFromValues(options, parseKeyValueOutput(result.stdout), result, healthMode); return statusFromValues(options, parseKeyValueOutput(result.stdout), result, healthMode);
} }
@@ -1384,6 +1455,7 @@ export function buildArtifactRegistryReadonlyProbe(action: "status" | "health",
timeoutMs: options.timeoutMs, timeoutMs: options.timeoutMs,
healthMode, healthMode,
options, options,
remoteCommandShape: readonlyRemoteCommandShape(action, options),
}; };
} }
@@ -1392,25 +1464,7 @@ export function artifactRegistryReadonlyResultFromCommand(
command: CommandResult, command: CommandResult,
): Record<string, unknown> { ): Record<string, unknown> {
if (command.exitCode !== 0 || command.timedOut) { if (command.exitCode !== 0 || command.timedOut) {
const bundle = renderBundle(probe.options); return readonlyCommandFailureResult(probe.options, command, probe.action);
return {
ok: false,
readonly: true,
installed: false,
healthy: false,
decision: "retryable-transient",
retryable: true,
healthyScopes: [],
failedScopes: ["provider-ssh-command"],
runtimeApiHealthy: false,
checks: {},
expected: {
endpoint: `http://${probe.options.host}:${probe.options.port}`,
image: probe.options.image,
paths: bundle.paths,
},
command: artifactRegistryCommandTail(command),
};
} }
return statusFromValues(probe.options, parseKeyValueOutput(command.stdout), command, probe.healthMode); return statusFromValues(probe.options, parseKeyValueOutput(command.stdout), command, probe.healthMode);
} }
+46 -2
View File
@@ -87,7 +87,7 @@ interface DispatchResult {
raw: unknown; raw: unknown;
} }
type PublishPreflightFailureClassification = "auth-missing" | "remote-proxy-missing" | "provider-unreachable" | "local-docker-required"; type PublishPreflightFailureClassification = "auth-missing" | "remote-proxy-missing" | "provider-unreachable" | "local-docker-required" | "registry-not-installed" | "registry-unhealthy" | "remote-command-timeout" | "ssh-helper-command-shape-incompatible";
type PublishPreflightControlChannel = "backend-core" | "database" | "provider" | "registry"; type PublishPreflightControlChannel = "backend-core" | "database" | "provider" | "registry";
type PublishPreflightDetailedChannel = "backend-core-api" | "provider-dispatch" | "provider-host-ssh" | "database" | "artifact-registry"; type PublishPreflightDetailedChannel = "backend-core-api" | "provider-dispatch" | "provider-host-ssh" | "database" | "artifact-registry";
@@ -120,6 +120,8 @@ interface PublishPreflight {
channels: PublishPreflightChannelProbe[]; channels: PublishPreflightChannelProbe[];
registry: unknown; registry: unknown;
controlPlane: Record<string, unknown>; controlPlane: Record<string, unknown>;
recommendedAction: string;
remoteCommandShape: string;
next: string[]; next: string[];
boundary: string; boundary: string;
} }
@@ -435,10 +437,36 @@ function classifyPublishPreflightFailure(
if (kind !== "local-docker" && !responseOk(overview)) return "remote-proxy-missing"; if (kind !== "local-docker" && !responseOk(overview)) return "remote-proxy-missing";
if (!sshProbe.ok) return "provider-unreachable"; if (!sshProbe.ok) return "provider-unreachable";
const registryRecord = asRecord(registry); const registryRecord = asRecord(registry);
if (registryRecord?.ok === false) return "provider-unreachable"; if (registryRecord?.failureClassification === "local-docker-required") return "local-docker-required";
if (registryRecord?.failureClassification === "provider-ssh-command-missing") return "provider-unreachable";
if (registryRecord?.failureClassification === "ssh-helper-command-shape-incompatible") return "ssh-helper-command-shape-incompatible";
if (registryRecord?.failureClassification === "remote-command-timeout") return "remote-command-timeout";
if (registryRecord?.failureClassification === "registry-not-installed") return "registry-not-installed";
if (registryRecord?.failureClassification === "registry-unhealthy") return "registry-unhealthy";
if (registryRecord?.ok === false) return kind === "local-docker" ? "provider-unreachable" : "registry-unhealthy";
return null; return null;
} }
function recommendedPublishPreflightAction(
failureClassification: PublishPreflightFailureClassification | null,
registry: unknown,
missingControlChannels: PublishPreflightControlChannel[],
): string {
if (failureClassification === null && missingControlChannels.length === 0) return "none";
const registryRecommendedAction = asString(asRecord(registry)?.recommendedAction);
if (registryRecommendedAction.length > 0 && registryRecommendedAction !== "none") return registryRecommendedAction;
if (failureClassification === "local-docker-required") return "rerun this read-only preflight through --main-server-ip <host> or from a main-server CLI with backend-core available";
if (failureClassification === "auth-missing") return "restore frontend control-plane authentication before rerunning the dry-run preflight";
if (failureClassification === "remote-proxy-missing") return "restore frontend to backend-core proxy reachability before rerunning the dry-run preflight";
if (failureClassification === "provider-unreachable") return "restore D601 provider-gateway host.ssh reachability before rerunning the dry-run preflight";
if (failureClassification === "ssh-helper-command-shape-incompatible") return "upgrade or shorten the host.ssh readonly command shape before rerunning the dry-run preflight";
if (failureClassification === "remote-command-timeout") return "rerun artifact-registry health and inspect D601 host.ssh latency if the timeout repeats";
if (failureClassification === "registry-not-installed") return "install the D601 artifact registry through the controlled artifact-registry install path, then rerun health";
if (failureClassification === "registry-unhealthy") return "restore the D601 artifact registry service/container/API until artifact-registry health passes";
if (missingControlChannels.includes("registry")) return "restore or install the D601 artifact registry until artifact-registry health passes";
return `restore missing control channel(s): ${missingControlChannels.join(", ") || "unknown"}`;
}
function publishPreflightControlPlane( function publishPreflightControlPlane(
transport: PublishPreflightTransport, transport: PublishPreflightTransport,
failureClassification: PublishPreflightFailureClassification | null, failureClassification: PublishPreflightFailureClassification | null,
@@ -462,6 +490,13 @@ function publishPreflightControlPlane(
}; };
} }
function publishPreflightFailedScopes(preflight: PublishPreflight): string[] {
if (preflight.ok) return [];
const registryScopes = asRecord(preflight.registry)?.failedScopes;
if (Array.isArray(registryScopes)) return registryScopes.map(String);
return preflight.missingChannels;
}
function dispatchPreflightFailure(command: string, result: DispatchResult): DispatchResult { function dispatchPreflightFailure(command: string, result: DispatchResult): DispatchResult {
return { return {
ok: false, ok: false,
@@ -1430,6 +1465,7 @@ async function publishUserServicePreflight(
const missingControlChannels = controlChannels.filter((item) => !item.ok).map((item) => item.channel); const missingControlChannels = controlChannels.filter((item) => !item.ok).map((item) => item.channel);
const ready = missingChannels.length === 0; const ready = missingChannels.length === 0;
const failureClassification = ready ? null : classifyPublishPreflightFailure(transport, overview, sshProbe, registry); const failureClassification = ready ? null : classifyPublishPreflightFailure(transport, overview, sshProbe, registry);
const recommendedAction = recommendedPublishPreflightAction(failureClassification, registry, missingControlChannels);
return { return {
ok: ready, ok: ready,
runnerDisposition: ready ? "ready" : "infra-blocked", runnerDisposition: ready ? "ready" : "infra-blocked",
@@ -1444,6 +1480,8 @@ async function publishUserServicePreflight(
channels, channels,
registry, registry,
controlPlane: publishPreflightControlPlane(transport, failureClassification), controlPlane: publishPreflightControlPlane(transport, failureClassification),
recommendedAction,
remoteCommandShape: registryProbe.remoteCommandShape,
next: ready next: ready
? [ ? [
`bun scripts/cli.ts ci publish-user-service --service ${options.serviceId} --commit ${options.commit} --wait-ms 1200000`, `bun scripts/cli.ts ci publish-user-service --service ${options.serviceId} --commit ${options.commit} --wait-ms 1200000`,
@@ -1634,6 +1672,9 @@ async function publishUserServiceArtifact(config: UniDeskConfig, options: CiPubl
controlChannels: preflight.controlChannels, controlChannels: preflight.controlChannels,
channels: preflight.channels, channels: preflight.channels,
failureClassification: preflight.failureClassification, failureClassification: preflight.failureClassification,
failedScopes: publishPreflightFailedScopes(preflight),
recommendedAction: preflight.recommendedAction,
remoteCommandShape: preflight.remoteCommandShape,
controlPlane: preflight.controlPlane, controlPlane: preflight.controlPlane,
registry: preflight.registry, registry: preflight.registry,
sourceHostPath: options.sourceHostPath, sourceHostPath: options.sourceHostPath,
@@ -1761,6 +1802,9 @@ export async function runCiPublishUserServiceDryRunPreflight(
controlChannels: preflight.controlChannels, controlChannels: preflight.controlChannels,
channels: preflight.channels, channels: preflight.channels,
failureClassification: preflight.failureClassification, failureClassification: preflight.failureClassification,
failedScopes: publishPreflightFailedScopes(preflight),
recommendedAction: preflight.recommendedAction,
remoteCommandShape: preflight.remoteCommandShape,
controlPlane: preflight.controlPlane, controlPlane: preflight.controlPlane,
registry: preflight.registry, registry: preflight.registry,
sourceHostPath: options.sourceHostPath, sourceHostPath: options.sourceHostPath,