diff --git a/docs/reference/cli.md b/docs/reference/cli.md index b38b0830..be04259d 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -30,7 +30,7 @@ G14/D601 v03 的 bootstrap admin password 是 HWLAB runtime Secret 生命周期 `hwlab nodes control-plane infra plan|status|apply --node D601 --lane v03` 是 D601 HWLAB v03 节点本地 CI/CD 与 git-mirror 前置控制面的 YAML 驱动入口,配置真相源是 `config/hwlab-node-control-plane.yaml`。`plan` 只读展示 YAML target 和将渲染的 control-plane 对象;`status` 只读观察 D601 Tekton、CI namespace、git-mirror、Argo、node-local registry 和 tools image readiness;`apply --dry-run` 只输出 manifest 摘要;`apply --confirm` 只收敛 D601 control-plane bootstrap 对象,不触发 HWLAB runtime rollout,不创建 PK01 DB,也不修改 Caddy/FRP。tools image 的 node-local registry 地址只能作为输出 artifact;输入 base image 必须由 YAML 声明为公开 registry 来源,缺少 output image 时应在 `status.next.blockers` 中体现,而不是把现有 node-local image 当成输入基础镜像。 -`hwlab nodes git-mirror status|sync|flush --node --lane ` 是 node-scoped runtime lane 的 Git mirror 维护入口。`status` 的 `githubSource` / `githubGitops` 来自本地 mirror cache 的 `refs/mirror-stage/...`,不是实时 GitHub API;输出中的 `refSources.githubFieldsAreMirrorStageCache=true` 和 `refSources.cacheRefresh` 给出这一来源和刷新命令。`flush --confirm --wait` 如果已经把 GitOps ref push 到 GitHub,但 post-push fetch/recheck 因 transient SSH 失败而无法刷新 mirror-stage,会返回 `partialSuccess=push-succeeded-fetch-failed`、`degradedReason=node-runtime-git-mirror-flush-post-push-fetch-failed`,并把下一步指向 `sync --confirm --wait` 后再查 `status`;不要把这种 partial success 解读为需要连续盲目 flush。 +`hwlab nodes git-mirror status|sync|flush --node --lane ` 是 node-scoped runtime lane 的 Git mirror 维护入口。`status` 的 `githubSource` / `githubGitops` 来自本地 mirror cache 的 `refs/mirror-stage/...`,不是实时 GitHub API;输出中的 `refSources.githubFieldsAreMirrorStageCache=true` 和 `refSources.cacheRefresh` 给出这一来源和刷新命令。`flush --confirm --wait` 如果已经把 GitOps ref push 到 GitHub,但 post-push fetch/recheck 因 transient SSH 失败而无法刷新 mirror-stage,会返回 `partialSuccess=push-succeeded-fetch-failed`、`degradedReason=node-runtime-git-mirror-flush-post-push-fetch-failed`,并把下一步指向 `sync --confirm --wait` 后再查 `status`;不要把这种 partial success 解读为需要连续盲目 flush。`hwlab nodes control-plane trigger-current --node --lane --confirm --wait` 会在 source sync 后自动执行必要的 pre-flush,在 PipelineRun terminal 后自动执行必要的 post-flush;progress 事件必须显式输出 `git-mirror-pre-flush` / `git-mirror-post-flush` 的 executed/skipped、jobName、local/github source、local/github GitOps、`pendingFlush` 和 `githubInSync`。`control-plane status` 仍是只读入口,只暴露 compact `gitMirror` 摘要和下一步 flush 命令,不隐式执行写操作。 `hwlab nodes control-plane infra tools-image status|build|logs --node D601 --lane v03` 是 D601 tools image 的受控入口。Dockerfile 必须由 `config/hwlab-node-control-plane.yaml` 的 `tekton.toolsImage.dockerfileInline` 声明,输入镜像必须列在 `publicBaseImages`,构建参数和网络模式也来自 YAML;confirmed build 只在 D601 后台异步构建并推送到 node-local registry,返回 status/logs 轮询命令。`hwlab nodes control-plane infra argo status|apply|logs --node D601 --lane v03` 是 D601 Argo CD 的声明式安装入口。Argo 版本、官方 manifest URL、镜像 rewrite/preload、field manager、imagePullPolicy、CRD 列表、期望 Deployment/StatefulSet 以及生成的 AppProject/Application 都必须来自同一个 YAML;`argo apply --confirm` 只执行可重复 server-side apply 和后台轮询,不把原生 `kubectl apply`、手工 Argo CLI 或临时 manifest 作为正式安装路径。 diff --git a/scripts/src/hwlab-node.ts b/scripts/src/hwlab-node.ts index 63ca6630..8b1659d2 100644 --- a/scripts/src/hwlab-node.ts +++ b/scripts/src/hwlab-node.ts @@ -2028,14 +2028,22 @@ function nodeRuntimeTriggerCurrent(scoped: ReturnType, sourceCommit: string): Record { +function nodeRuntimeEnsureGitMirrorSourceCurrent(scoped: ReturnType, sourceCommit: string, pipelineRun: string): Record { + const full = nodeScopedFullOutput(scoped); const before = nodeRuntimeGitMirrorStatus({ ...scoped, action: "status", dryRun: true, confirm: false }); - const beforeSummary = record(before.summary); - if (before.ok === true && beforeSummary.localSource === sourceCommit) { - return { ok: true, mode: "already-current", sourceCommit, before }; + const beforeSummary = compactNodeRuntimeGitMirrorStatus(before); + if (before.ok === true && beforeSummary.localSource === sourceCommit && beforeSummary.githubSource === sourceCommit) { + const flush = nodeRuntimeEnsureGitMirrorFlushed(scoped, "pre", sourceCommit, pipelineRun, before); + return { + ok: flush.ok === true, + mode: "already-current", + sourceCommit, + beforeSummary, + before: full ? before : undefined, + flush, + degradedReason: flush.ok === true ? undefined : "node-runtime-git-mirror-pre-flush-failed", + }; } const sync = nodeRuntimeGitMirrorRun({ ...scoped, domain: "git-mirror", action: "sync", confirm: true, dryRun: false, wait: true }); const after = record(sync.status); - const afterSummary = record(after.summary); - const ok = sync.ok === true && afterSummary.localSource === sourceCommit; + const afterSummary = Object.keys(after).length > 0 ? compactNodeRuntimeGitMirrorStatus(after) : {}; + const sourceOk = sync.ok === true && afterSummary.localSource === sourceCommit && afterSummary.githubSource === sourceCommit; + const flush = sourceOk ? nodeRuntimeEnsureGitMirrorFlushed(scoped, "pre", sourceCommit, pipelineRun, after) : null; + const ok = sourceOk && (flush === null || flush.ok === true); return { ok, mode: "synced-before-trigger", sourceCommit, - before, - sync, - after: sync.status ?? null, - degradedReason: ok ? undefined : "node-runtime-git-mirror-local-source-not-current-after-sync", + beforeSummary, + before: full ? before : undefined, + sync: full ? sync : compactNodeRuntimeGitMirrorRun(sync), + afterSummary: Object.keys(afterSummary).length > 0 ? afterSummary : null, + after: full ? sync.status ?? null : undefined, + flush, + degradedReason: ok + ? undefined + : sourceOk + ? "node-runtime-git-mirror-pre-flush-failed" + : "node-runtime-git-mirror-local-source-not-current-after-sync", + }; +} + +function nodeRuntimeEnsureGitMirrorFlushed( + scoped: ReturnType, + phase: "pre" | "post", + sourceCommit: string, + pipelineRun: string | null, + statusInput: Record | null = null, +): Record { + const stage = `git-mirror-${phase}-flush`; + const full = nodeScopedFullOutput(scoped); + const before = statusInput ?? nodeRuntimeGitMirrorStatus({ ...scoped, action: "status", dryRun: true, confirm: false }); + const beforeSummary = compactNodeRuntimeGitMirrorStatus(before); + if (before.ok !== true) { + printNodeRuntimeTriggerProgress(scoped.spec, { stage, status: "failed", sourceCommit, pipelineRun, reason: "git-mirror-status-failed", ...beforeSummary }); + return { + ok: false, + phase, + mode: "status-failed", + executed: false, + before: full ? before : undefined, + beforeSummary, + degradedReason: `node-runtime-git-mirror-${phase}-status-failed`, + next: { status: `bun scripts/cli.ts hwlab nodes git-mirror status --node ${scoped.node} --lane ${scoped.lane}` }, + }; + } + const flushNeeded = nodeRuntimeGitMirrorNeedsFlush(before); + if (!flushNeeded) { + printNodeRuntimeTriggerProgress(scoped.spec, { stage, status: "skipped", sourceCommit, pipelineRun, flushNeeded: false, ...beforeSummary }); + return { + ok: true, + phase, + mode: "already-flushed", + executed: false, + before: full ? before : undefined, + beforeSummary, + after: full ? before : undefined, + afterSummary: beforeSummary, + }; + } + printNodeRuntimeTriggerProgress(scoped.spec, { stage, status: "started", sourceCommit, pipelineRun, flushNeeded: true, ...beforeSummary }); + const flush = nodeRuntimeGitMirrorRun({ ...scoped, domain: "git-mirror", action: "flush", confirm: true, dryRun: false, wait: true }); + const after = record(flush.status); + const afterSummary = Object.keys(after).length > 0 ? compactNodeRuntimeGitMirrorStatus(after) : {}; + const ok = flush.ok === true && Object.keys(after).length > 0 && !nodeRuntimeGitMirrorNeedsFlush(after); + printNodeRuntimeTriggerProgress(scoped.spec, { + stage, + status: ok ? "succeeded" : "failed", + sourceCommit, + pipelineRun, + flushNeeded: true, + jobName: flush.jobName ?? null, + ...afterSummary, + }); + return { + ok, + phase, + mode: "flushed", + executed: true, + before: full ? before : undefined, + beforeSummary, + flush: full ? flush : compactNodeRuntimeGitMirrorRun(flush), + jobName: flush.jobName ?? null, + after: full ? (Object.keys(after).length > 0 ? after : null) : undefined, + afterSummary: Object.keys(afterSummary).length > 0 ? afterSummary : null, + degradedReason: ok ? undefined : `node-runtime-git-mirror-${phase}-flush-failed`, + next: ok ? undefined : flush.next ?? { flush: `bun scripts/cli.ts hwlab nodes git-mirror flush --node ${scoped.node} --lane ${scoped.lane} --confirm --wait` }, + }; +} + +function nodeRuntimeGitMirrorNeedsFlush(status: Record): boolean { + const summary = record(status.summary); + const localGitops = typeof summary.localGitops === "string" ? summary.localGitops : null; + const githubGitops = typeof summary.githubGitops === "string" ? summary.githubGitops : null; + return summary.pendingFlush === true + || summary.flushNeeded === true + || summary.githubInSync === false + || (localGitops !== null && githubGitops !== null && localGitops !== githubGitops); +} + +function nodeScopedFullOutput(scoped: ReturnType): boolean { + return scoped.originalArgs.includes("--full") || scoped.originalArgs.includes("--raw"); +} + +function compactNodeRuntimeGitMirrorObservation(status: Record): Record { + return { + ok: status.ok === true, + mode: status.mode ?? null, + mutation: status.mutation === true, + summary: compactNodeRuntimeGitMirrorStatus(status), + degradedReason: status.degradedReason ?? null, + next: status.next ?? null, + }; +} + +function compactNodeRuntimeGitMirrorRun(result: Record): Record { + const status = record(result.status); + return { + ok: result.ok === true, + action: result.action ?? null, + mode: result.mode ?? null, + mutation: result.mutation === true, + jobName: result.jobName ?? null, + partialSuccess: result.partialSuccess ?? null, + degradedReason: result.degradedReason ?? null, + statusSummary: Object.keys(status).length > 0 ? compactNodeRuntimeGitMirrorStatus(status) : null, + next: result.next ?? null, + }; +} + +function compactNodeRuntimeGitMirrorStatus(status: Record): Record { + const summary = record(status.summary); + return { + ok: status.ok === true, + localSource: summary.localSource ?? null, + githubSource: summary.githubSource ?? null, + localGitops: summary.localGitops ?? null, + githubGitops: summary.githubGitops ?? null, + pendingFlush: summary.pendingFlush === true, + flushNeeded: summary.flushNeeded === true, + githubInSync: summary.githubInSync === true, }; } @@ -2375,14 +2543,17 @@ function nodeRuntimeControlPlaneStatus(scoped: ReturnType 0 && workloadReadiness.every((item) => item.ready); const runtimeReady = namespaceExists && localPostgresObjects.length === 0 && workloadsReady && (spec.externalPostgres === undefined || (bridge.ready && secrets.ready)); const argoReady = argo.exitCode === 0 && repoURL === spec.argoRepoUrl && targetRevision === spec.gitopsBranch && path === spec.runtimePath && syncStatus === "Synced" && health === "Healthy"; const pipelineRunReady = pipelineRunProbe !== null && pipelineRunProbe.status === "True"; const publicReady = publicProbes.ready === true; + const gitMirrorReady = gitMirror.ok === true && gitMirrorCompact.pendingFlush === false && gitMirrorCompact.githubInSync === true; const fullStatus = { - ok: controlPlaneReady && runtimeReady && argoReady && pipelineRunReady && publicReady, + ok: controlPlaneReady && runtimeReady && argoReady && pipelineRunReady && publicReady && gitMirrorReady, command: `hwlab nodes control-plane status --node ${scoped.node} --lane ${scoped.lane}`, mode: "node-scoped-runtime-status", mutation: false, @@ -2428,6 +2599,11 @@ function nodeRuntimeControlPlaneStatus(scoped: ReturnType, const argo = record(status.argo); const runtime = record(status.runtime); const publicProbes = record(status.publicProbes); + const gitMirror = record(status.gitMirror); + const gitMirrorCompact = record(gitMirror.compact); const workloadReadiness = Array.isArray(runtime.workloadReadiness) ? runtime.workloadReadiness.map(record) : []; const readyWorkloads = workloadReadiness.filter((item) => item.ready === true).length; const workloadCount = typeof runtime.workloadCount === "number" ? runtime.workloadCount : workloadReadiness.length; @@ -2554,11 +2734,22 @@ function summarizeNodeRuntimeControlPlaneStatus(status: Record, web: { url: webProbe.url ?? null, ok: webProbe.ok === true, httpStatus: webProbe.httpStatus ?? null }, apiHealth: { url: apiProbe.url ?? null, ok: apiProbe.ok === true, httpStatus: apiProbe.httpStatus ?? null }, }, + gitMirror: { + ready: gitMirror.ready === true, + localSource: gitMirrorCompact.localSource ?? null, + githubSource: gitMirrorCompact.githubSource ?? null, + localGitops: gitMirrorCompact.localGitops ?? null, + githubGitops: gitMirrorCompact.githubGitops ?? null, + pendingFlush: gitMirrorCompact.pendingFlush === true, + flushNeeded: gitMirrorCompact.flushNeeded === true, + githubInSync: gitMirrorCompact.githubInSync === true, + }, nextAction: nodeRuntimeStatusNextAction(status, scoped), next: { full: `${nodeRuntimeStatusCommand(scoped)} --full`, plan: `bun scripts/cli.ts hwlab nodes control-plane plan --node ${scoped.node} --lane ${scoped.lane}`, triggerCurrent: `bun scripts/cli.ts hwlab nodes control-plane trigger-current --node ${scoped.node} --lane ${scoped.lane} --confirm`, + gitMirrorFlush: `bun scripts/cli.ts hwlab nodes git-mirror flush --node ${scoped.node} --lane ${scoped.lane} --confirm --wait`, webProbe: `bun scripts/cli.ts hwlab nodes web-probe run --node ${scoped.node} --lane ${scoped.lane}`, }, }; @@ -2580,6 +2771,9 @@ function nodeRuntimeStatusNextAction(status: Record, scoped: Re if (reason === "public-probe-not-ready") { return `bun scripts/cli.ts hwlab nodes web-probe run --node ${scoped.node} --lane ${scoped.lane}`; } + if (reason === "git-mirror-pending-flush") { + return `bun scripts/cli.ts hwlab nodes git-mirror flush --node ${scoped.node} --lane ${scoped.lane} --confirm --wait`; + } return `${nodeRuntimeStatusCommand(scoped)} --full`; } @@ -3990,6 +4184,43 @@ function createNodeRuntimePipelineRun(spec: HwlabRuntimeLaneSpec, sourceCommit: return runNodeK3sScript(spec, script, timeoutSeconds); } +function waitForNodeRuntimePipelineRunTerminal(spec: HwlabRuntimeLaneSpec, pipelineRun: string, timeoutSeconds: number): Record { + const startedAt = Date.now(); + const deadline = startedAt + timeoutSeconds * 1000; + let polls = 0; + let last: Record = { exists: false, name: pipelineRun }; + printNodeRuntimeTriggerProgress(spec, { stage: "pipelinerun-wait", status: "started", pipelineRun, timeoutSeconds }); + while (Date.now() <= deadline) { + polls += 1; + last = getNodeRuntimePipelineRun(spec, pipelineRun); + const status = typeof last.status === "string" ? last.status : null; + const reason = typeof last.reason === "string" ? last.reason : null; + printNodeRuntimeTriggerProgress(spec, { stage: "pipelinerun-wait", status: "poll", pipelineRun, pipelineStatus: status, reason, polls, elapsedMs: Date.now() - startedAt }); + if (status === "True" || status === "False") { + const ok = status === "True"; + printNodeRuntimeTriggerProgress(spec, { stage: "pipelinerun-wait", status: ok ? "succeeded" : "failed", pipelineRun, pipelineStatus: status, reason, polls, elapsedMs: Date.now() - startedAt }); + return { + ok, + status: ok ? "succeeded" : "failed", + pipelineRun: last, + polls, + elapsedMs: Date.now() - startedAt, + degradedReason: ok ? undefined : "node-runtime-pipelinerun-failed", + }; + } + sleepSync(Math.min(10_000, Math.max(1000, deadline - Date.now()))); + } + printNodeRuntimeTriggerProgress(spec, { stage: "pipelinerun-wait", status: "timeout", pipelineRun, polls, elapsedMs: Date.now() - startedAt }); + return { + ok: false, + status: "timeout", + pipelineRun: last, + polls, + elapsedMs: Date.now() - startedAt, + degradedReason: "node-runtime-pipelinerun-wait-timeout", + }; +} + function printNodeRuntimeTriggerProgress(spec: HwlabRuntimeLaneSpec, data: Record = {}): void { process.stderr.write(`${JSON.stringify({ event: "hwlab.runtime-lane.trigger.progress", at: new Date().toISOString(), lane: spec.lane, node: spec.nodeId, ...data })}\n`); } @@ -4009,6 +4240,11 @@ function getNodeRuntimePipelineRun(spec: HwlabRuntimeLaneSpec, pipelineRun: stri }; } +function sleepSync(ms: number): void { + const buffer = new SharedArrayBuffer(4); + Atomics.wait(new Int32Array(buffer), 0, 0, Math.max(0, ms)); +} + function syncNodeExternalPostgresSecrets(spec: HwlabRuntimeLaneSpec, dryRun: boolean, timeoutSeconds: number): Record | null { const pg = spec.externalPostgres; if (pg === undefined) return null;