From a376ce5d9d4fde7453d8d361462be34d3ab51a48 Mon Sep 17 00:00:00 2001 From: Codex Date: Fri, 26 Jun 2026 11:40:37 +0000 Subject: [PATCH] fix: pass no-cache mode to ci benchmark --- .agents/skills/unidesk-cicd/references/full.md | 2 +- docs/reference/cli.md | 2 +- scripts/src/hwlab-node-control-plane.ts | 10 +++++++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.agents/skills/unidesk-cicd/references/full.md b/.agents/skills/unidesk-cicd/references/full.md index afc07ecb..aa642c0b 100644 --- a/.agents/skills/unidesk-cicd/references/full.md +++ b/.agents/skills/unidesk-cicd/references/full.md @@ -101,7 +101,7 @@ bun scripts/cli.ts hwlab nodes control-plane sync --node D601 --lane v03 --confi 从 `config/hwlab-node-control-plane.yaml` 渲染 D601 HWLAB v03 的节点本地 CI/CD、git-mirror、Tekton、runtime dependency image preload 和 Argo 前置对象。confirmed apply 只做 control-plane bootstrap,不触发 runtime rollout,不创建 PK01 DB,也不修改 Caddy/FRP。node-local registry 镜像只能作为 tools image 或 runtime dependency 的输出 artifact;输入 base/pull image 必须是 YAML 中声明的公开 registry 来源,缺失 output image 时通过 `status.next.blockers` 或 `runtime-image status` 暴露。D601 Argo CD 安装也必须由 YAML 声明:官方 manifest URL、版本、镜像 rewrite/preload、CRD、期望 workload 和 AppProject/Application 都来自 YAML,不能使用手工 kubectl/argo CLI 作为正式安装路径。 -`ci-build-benchmark` 是 HWLAB v0.3 k3s CI/CD 全量无缓存构建出网测速入口。profile、独立 catalog path 模板、cache policy、必须输出的 timing 阶段和失败族来自 `config/hwlab-node-control-plane.yaml`;实际 service set、git mirror URL、Pipeline、ServiceAccount、registry prefix 和 base image 仍以 `config/hwlab-node-lanes.yaml` 为准。confirmed benchmark 只创建唯一 PipelineRun 并返回 status/logs 轮询命令;通过证据必须包含每个 `build-` TaskRun,PipelineRun 成功但缺少 build task 要按 `cache-hit-forbidden` 处理。 +`ci-build-benchmark` 是 HWLAB v0.3 k3s CI/CD 全量无缓存构建出网测速入口。profile、独立 catalog path 模板、cache policy、必须输出的 timing 阶段和失败族来自 `config/hwlab-node-control-plane.yaml`;实际 service set、git mirror URL、Pipeline、ServiceAccount、registry prefix 和 base image 仍以 `config/hwlab-node-lanes.yaml` 为准。`forbidBuildkitCache=true` 时会向 PipelineRun 传 `build-cache-mode=disabled`。confirmed benchmark 只创建唯一 PipelineRun 并返回 status/logs 轮询命令;通过证据必须包含每个 `build-` TaskRun,PipelineRun 成功但缺少 build task 要按 `cache-hit-forbidden` 处理。 `hwlab nodes control-plane status` 默认返回 compact commander summary,只保留 source commit、PipelineRun、Argo、runtime readiness、public probe 和 next action;完整 expected YAML/render target、kubectl result tail、Secret/sourceRef 详情和 probe 原始结果只在 `--full` 或 `--raw` 下展开。 diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 5628742e..a76b4a7c 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -32,7 +32,7 @@ G14/D601 v03 的 bootstrap admin password 是 HWLAB runtime Secret 生命周期 `hwlab nodes control-plane infra plan|status|apply --node D601 --lane v03` 是 D601 HWLAB v03 节点本地 k3s、CI/CD 与 git-mirror 前置控制面的 YAML 驱动入口,配置真相源是 `config/hwlab-node-control-plane.yaml`。`plan` 只读展示 YAML target、host k3s node config 摘要和将渲染的 control-plane 对象;`status` 只读观察 k3s systemd drop-in 与 node `capacity/allocatable.pods`、D601 Tekton、CI namespace、git-mirror、Argo、node-local registry 和 tools image readiness;`apply --dry-run` 只输出 manifest 与 host config 摘要;`apply --confirm` 按 YAML 收敛 D601 host k3s drop-in 和 control-plane bootstrap 对象,只有 host k3s 配置或 live pod capacity 未收敛时才重启 k3s,不触发 HWLAB runtime rollout,不创建 PK01 DB,也不修改 Caddy/FRP。D601 host 侧 k3s pre-start 修正也必须写成 YAML `execStartPre` argv,不做手工 systemd 热改;当 kube API 已不可用时,`apply` 可用同一 YAML 渲染出的 host 脚本经 node-local tools image/Docker fallback 恢复 systemd drop-in,输出仍只给对象名、SHA、exit code 和摘要。k3s pod capacity 等可调数值只以 YAML 为准,长期参考不复制具体数值;tools image 的 node-local registry 地址只能作为输出 artifact,输入 base image 必须由 YAML 声明为公开 registry 来源,缺少 output image 时应在 `status.next.blockers` 中体现,而不是把现有 node-local image 当成输入基础镜像。 -`hwlab nodes control-plane infra ci-build-benchmark --node D601 --lane v03 --profile --confirm` 是 HWLAB v0.3 k3s CI/CD 全量无缓存构建出网测速入口,profile、cache policy、独立 catalog path 模板、PipelineRun prefix、必须输出的 timing 阶段和失败族都来自 `config/hwlab-node-control-plane.yaml`。confirmed benchmark 只创建一次唯一 PipelineRun,使用 node-lane YAML 中的实际 HWLAB v0.3 service set、git mirror read/write URL、registry prefix、base image 和 Tekton pipeline;status/logs 通过短连接轮询 PipelineRun/TaskRun 摘要和有界日志。成功的 benchmark 必须出现每个 `build-` TaskRun;如果 PipelineRun 成功但缺少任一 service build task,CLI 必须把该 service 报为 `cache-hit-forbidden`,不能把 catalog/env reuse 当作 #1010 这类性能验收的通过证据。 +`hwlab nodes control-plane infra ci-build-benchmark --node D601 --lane v03 --profile --confirm` 是 HWLAB v0.3 k3s CI/CD 全量无缓存构建出网测速入口,profile、cache policy、独立 catalog path 模板、PipelineRun prefix、必须输出的 timing 阶段和失败族都来自 `config/hwlab-node-control-plane.yaml`。confirmed benchmark 只创建一次唯一 PipelineRun,使用 node-lane YAML 中的实际 HWLAB v0.3 service set、git mirror read/write URL、registry prefix、base image 和 Tekton pipeline;`forbidBuildkitCache=true` 时会向 PipelineRun 传 `build-cache-mode=disabled`。status/logs 通过短连接轮询 PipelineRun/TaskRun 摘要和有界日志。成功的 benchmark 必须出现每个 `build-` TaskRun;如果 PipelineRun 成功但缺少任一 service build task,CLI 必须把该 service 报为 `cache-hit-forbidden`,不能把 catalog/env reuse 当作 #1010 这类性能验收的通过证据。 `hwlab nodes git-mirror status|sync|flush --node --lane ` 是 node-scoped runtime lane 的 Git mirror 维护入口。`status` 的 `githubSource` / `githubGitops` 来自本地 mirror cache 的 `refs/mirror-stage/...`,不是实时 GitHub API;输出中的 `refSources.githubFieldsAreMirrorStageCache=true` 和 `refSources.cacheRefresh` 给出这一来源和刷新命令。`sync --confirm --wait` 的 k3s Job 遇到 GitHub SSH transient 时,应通过目标 workspace fallback 拉取 GitHub source/gitops 并写回 node-local mirror,输出只披露 commit、mirror write URL 和 fallback 状态。`flush --confirm --wait` 如果已经把 GitOps ref push 到 GitHub,但 post-push fetch/recheck 因 transient SSH 失败而无法刷新 mirror-stage,会标记 `partialSuccess=push-succeeded-fetch-failed`;CLI 应自动执行一次受控 sync 刷新 mirror-stage,若恢复后 `pendingFlush=false` 且 `githubInSync=true`,结果应为 `ok=true` 并输出 `partialSuccessRecovered` / `postPushRecovery`,否则才保留 `degradedReason=node-runtime-git-mirror-flush-post-push-fetch-failed` 和下一步 `sync --confirm --wait`。不要把这种 partial success 解读为需要连续盲目 flush。`hwlab nodes control-plane trigger-current --node --lane --confirm --wait` 会在 source sync 后自动执行必要的 pre-flush,在 PipelineRun terminal 后自动执行必要的 post-flush;progress 事件必须显式输出 `git-mirror-pre-flush` / `git-mirror-post-flush` 的 executed/skipped、jobName、local/github source、local/github GitOps、`pendingFlush` 和 `githubInSync`,且已恢复的 partial success 不能让顶层 trigger-current false-fail。`control-plane status` 仍是只读入口,只暴露 compact `gitMirror` 摘要和下一步 flush 命令,不隐式执行写操作。 diff --git a/scripts/src/hwlab-node-control-plane.ts b/scripts/src/hwlab-node-control-plane.ts index 599c9a1e..b9c27a2c 100644 --- a/scripts/src/hwlab-node-control-plane.ts +++ b/scripts/src/hwlab-node-control-plane.ts @@ -688,6 +688,10 @@ function ciBuildBenchmarkCatalogPath(profile: CiBuildBenchmarkProfileSpec, pipel return profile.catalogPathTemplate.replace(/\{profile\}/gu, profile.profile).replace(/\{pipelineRun\}/gu, pipelineRun); } +function ciBuildBenchmarkBuildCacheMode(profile: CiBuildBenchmarkProfileSpec): "disabled" | "registry" { + return profile.cachePolicy.forbidBuildkitCache ? "disabled" : "registry"; +} + function ciBuildBenchmarkDefinitionSummary(runtime: HwlabRuntimeLaneSpec, target: ControlPlaneTargetSpec, profile: CiBuildBenchmarkProfileSpec): Record { return { targetId: target.id, @@ -705,6 +709,7 @@ function ciBuildBenchmarkDefinitionSummary(runtime: HwlabRuntimeLaneSpec, target baseImage: runtime.baseImage, services: runtime.serviceIds, imageTagMode: profile.imageTagMode, + buildCacheMode: ciBuildBenchmarkBuildCacheMode(profile), cachePolicy: profile.cachePolicy, requiredTimings: profile.requiredTimings, failureFamilies: profile.failureFamilies, @@ -742,6 +747,7 @@ function ciBuildBenchmarkPipelineRunManifest( "hwlab.pikastech.local/download-profile": runtime.downloadProfileId, "unidesk.ai/issue": "pikasTech/unidesk#1010", "unidesk.ai/cache-policy": JSON.stringify(profile.cachePolicy), + "unidesk.ai/build-cache-mode": ciBuildBenchmarkBuildCacheMode(profile), "unidesk.ai/catalog-path": catalogPath, "unidesk.ai/runtime-lane-config-ref": profile.runtimeLaneConfigRef, "unidesk.ai/required-timings": profile.requiredTimings.join(","), @@ -772,6 +778,7 @@ function ciBuildBenchmarkPipelineRunManifest( { name: "registry-prefix", value: runtime.registryPrefix }, { name: "services", value: runtime.serviceIds.join(",") }, { name: "base-image", value: runtime.baseImage }, + { name: "build-cache-mode", value: ciBuildBenchmarkBuildCacheMode(profile) }, ], workspaces: [ { name: "source", volumeClaimTemplate: { spec: { accessModes: ["ReadWriteOnce"], resources: { requests: { storage: "8Gi" } } } } }, @@ -811,6 +818,7 @@ function renderCiBuildBenchmarkResult(result: Record): Rendered ["pipeline", renderCell(benchmark.pipeline)], ["catalogPath", renderCell(start.catalogPath ?? plan.catalogPath ?? pipelineRun.catalogPath)], ["services", String((Array.isArray(benchmark.services) ? benchmark.services : []).length)], + ["buildCacheMode", renderCell(benchmark.buildCacheMode)], ["cachePolicy", JSON.stringify(benchmark.cachePolicy ?? {})], ["requiredTimings", Array.isArray(benchmark.requiredTimings) ? benchmark.requiredTimings.join(",") : "-"], ]), @@ -3444,7 +3452,7 @@ function ciBuildBenchmarkPolicyOk(job: Record, cachePolicy: CiB function ciBuildBenchmarkLogHasBuildkitCache(job: Record): boolean { const logTail = typeof job.logTail === "string" ? job.logTail : ""; - return /buildkitCacheRef|--import-cache|--export-cache|writing cache image manifest/iu.test(logTail); + return /"buildkitCacheRef"\s*:\s*"[^"]+"|--import-cache|--export-cache|writing cache image manifest/iu.test(logTail); } function ciBuildBenchmarkLogHasReuse(job: Record): boolean {