diff --git a/.agents/skills/unidesk-cicd/SKILL.md b/.agents/skills/unidesk-cicd/SKILL.md index 257d73fb..d1ace571 100644 --- a/.agents/skills/unidesk-cicd/SKILL.md +++ b/.agents/skills/unidesk-cicd/SKILL.md @@ -44,7 +44,9 @@ bun scripts/cli.ts cicd branch-follower debug-step --follower web-probe-sentinel - 触发或验收 rollout 时必须绑定 lane、source commit、PipelineRun/GitOps revision、runtime ready 和 `/health` 端点验证结果;web-probe/Playwright 结果只能作为单独的 post-deploy 证据。 - CI/CD 状态、日志和事件查询必须减少 trans/SSH 传输:能在目标 NODE/k8s 内解析、聚合、裁剪的内容,必须在目标侧计算成短 JSON/table 摘要后再回传;禁止为了本地解析而把完整 ConfigMap、大对象、长日志或原始 API payload 透传回来。 - branch-follower 排障必须优先使用 `debug-step` 做单步调试:`state-read`、`status-read`、`decide`、`state-write` 分别定位状态读取、K8s native status、决策和 ConfigMap patch,不要通过反复推小提交触发整条自动跟随大回环来定位同一问题。 +- CI/CD 排障中再次踩到已经暴露过的运行面坑、工具误用、镜像假设或状态可见性缺口时,必须先把长期规则写入本 skill/reference,再继续单步调试;branch-follower 必须先让相关 `debug-step` 单步全部通过,再做 `run-once`、自动 loop 或小 PR/小提交联调。 - CI/CD 验证、测试和性能度量必须在目标 NODE/k8s 内执行,尤其是 branch-follower、Tekton/Argo、runtime reuse/env reuse、git mirror 和 runtime-ready 相关改动;不要在 master/local host 跑 test 或用本地验证结果替代目标运行面证据。本机只用于源码阅读、编辑和必要静态语法检查,正式收敛结论必须来自目标 NODE 计算出的短摘要。 +- in-cluster/controller/native helper 不能假设镜像内存在 `kubectl` binary。目标 Pod/Job 内读取或写入 Kubernetes 对象必须走 serviceaccount token + Kubernetes HTTPS API 或已封装的 native helper;`kubectl` 只允许在 operator 侧受控 CLI/trans 边界作为 transport/debug 包装,不得进入正式 controller 状态读写链路。 - 一旦发现 CI/CD CLI 被误用且可能写入错误状态、产生伪证据或绕过目标运行面,必须立刻先把用法改成更符合直觉的公开入口并更新本 skill/reference,再继续验证或交付;不要只靠口头记忆、隐藏 flag、手动约定或后续小心来避免复发。内部 in-cluster 模式必须只由目标 k8s Job/Pod 调用,操作者从本机只能用公开入口提交目标侧 Job 或读取目标侧摘要。 - Secret 只通过 YAML sourceRef/targetKey 和受控 CLI 下发;输出只披露 presence/fingerprint。 - 长命令用异步 job 或短轮询;不要长时间挂住 trans/ssh。 diff --git a/.agents/skills/unidesk-cicd/references/branch-follower.md b/.agents/skills/unidesk-cicd/references/branch-follower.md index c3a5a6fb..161b2fe1 100644 --- a/.agents/skills/unidesk-cicd/references/branch-follower.md +++ b/.agents/skills/unidesk-cicd/references/branch-follower.md @@ -30,6 +30,8 @@ bun scripts/cli.ts cicd branch-follower logs --follower Do not debug the same state/read/write problem by repeatedly pushing empty or tiny source commits to drive the full automatic follower loop. +When a repeated runtime pitfall or visibility defect is found during branch-follower work, update this reference or the skill entry first, then continue with the narrow debug step. Do not proceed to `run-once`, controller loop observation, automatic follower validation, or source-commit-driven integration until the relevant `state-read`, `status-read`, `decide`, and `state-write` debug steps pass for the affected follower. + ## Source Authority - Follower decisions must not read host source worktrees, target dev directories, `.worktree/*`, local git state, or direct GitHub branch refs. @@ -107,6 +109,8 @@ Status and decision inputs are Kubernetes-native: The branch follower must not parse downstream CLI stdout/stderr, `kubectl` human tables, `argo` text, `tkn` text, or curl output to infer observed sha, target sha, readiness or closeout. `kubectl -o json` may be used inside the controller/Job as a structured Kubernetes API transport only. +In-cluster controller and native helper scripts must not require a `kubectl` binary in the image. Native helpers that read or write ConfigMaps, Jobs, PipelineRuns, Argo Applications, Pods or logs must use the serviceaccount token and Kubernetes HTTPS API directly, or a shared native helper that does the same. A missing `kubectl` binary is a product defect in the helper, not a node problem. Operator-side `kubectl` through the controlled CLI/trans boundary remains acceptable only as a transport/debug wrapper. + The controller automatic loop submits trigger work without a blocking wait; later loops close out via the native state objects above. Failed state must not dedupe a source commit forever: retries may reuse deterministic native objects for the same source commit, and a new compact observation should be able to move the follower back into triggering or closeout. State ConfigMaps must stay bounded and human-queryable. Store compact summaries, stage refs, conditions, short messages, and drill-down object names; do not store full API payloads or long log dumps. Cleanup is an explicit operator operation for stale/broken state and must not be required for normal convergence. diff --git a/scripts/native/cicd/patch-configmap-data.mjs b/scripts/native/cicd/patch-configmap-data.mjs new file mode 100644 index 00000000..0c37d5e6 --- /dev/null +++ b/scripts/native/cicd/patch-configmap-data.mjs @@ -0,0 +1,50 @@ +import { readFileSync } from "node:fs"; +import https from "node:https"; + +const namespace = process.env.NAMESPACE || ""; +const configMap = process.env.CONFIGMAP || ""; +const patch = JSON.parse(Buffer.from(process.env.PATCH_B64 || "", "base64").toString("utf8")); +const host = process.env.KUBERNETES_SERVICE_HOST; +const port = Number(process.env.KUBERNETES_SERVICE_PORT || "443"); +const token = readFileSync("/var/run/secrets/kubernetes.io/serviceaccount/token", "utf8").trim(); +const ca = readFileSync("/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"); + +function request(method, path, body, contentType = "application/json") { + return new Promise((resolve, reject) => { + const headers = { authorization: `Bearer ${token}` }; + const payload = body === undefined ? null : typeof body === "string" ? body : JSON.stringify(body); + if (payload !== null) { + headers["content-type"] = contentType; + headers["content-length"] = Buffer.byteLength(payload); + } + const req = https.request({ host, port, path, method, ca, headers }, (res) => { + let text = ""; + res.setEncoding("utf8"); + res.on("data", (chunk) => { text += chunk; }); + res.on("end", () => resolve({ status: res.statusCode || 0, text })); + }); + req.on("error", reject); + if (payload !== null) req.write(payload); + req.end(); + }); +} + +const path = `/api/v1/namespaces/${encodeURIComponent(namespace)}/configmaps/${encodeURIComponent(configMap)}`; +const before = await request("GET", path); +if (before.status === 404) { + process.stdout.write(JSON.stringify({ ok: true, present: false, patched: false, reason: "state-configmap-not-found", parsedDownstreamCliOutput: false })); + process.exit(0); +} +if (before.status < 200 || before.status >= 300) throw new Error(before.text || `kube api GET configmap status ${before.status}`); +const beforeObject = JSON.parse(before.text); +const result = await request("PATCH", path, patch, "application/merge-patch+json"); +if (result.status < 200 || result.status >= 300) throw new Error(result.text || `kube api PATCH configmap status ${result.status}`); +const afterObject = JSON.parse(result.text); +process.stdout.write(JSON.stringify({ + ok: true, + present: true, + patched: true, + beforeResourceVersion: beforeObject?.metadata?.resourceVersion || null, + afterResourceVersion: afterObject?.metadata?.resourceVersion || null, + parsedDownstreamCliOutput: false, +})); diff --git a/scripts/native/cicd/patch-follower-state.mjs b/scripts/native/cicd/patch-follower-state.mjs index 024859f7..ae5e84d6 100644 --- a/scripts/native/cicd/patch-follower-state.mjs +++ b/scripts/native/cicd/patch-follower-state.mjs @@ -1,38 +1,54 @@ -import { execFileSync } from "node:child_process"; +import { readFileSync } from "node:fs"; +import https from "node:https"; const namespace = process.env.NAMESPACE || ""; const configMap = process.env.CONFIGMAP || ""; const followerId = process.env.FOLLOWER_ID || ""; const specRef = process.env.SPEC_REF || ""; const stateJson = Buffer.from(process.env.STATE_B64 || "", "base64").toString("utf8"); +const host = process.env.KUBERNETES_SERVICE_HOST; +const port = Number(process.env.KUBERNETES_SERVICE_PORT || "443"); +const token = readFileSync("/var/run/secrets/kubernetes.io/serviceaccount/token", "utf8").trim(); +const ca = readFileSync("/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"); -function kubectl(args, input) { - return execFileSync("kubectl", ["-n", namespace, ...args], { - input, - encoding: "utf8", - stdio: ["pipe", "pipe", "pipe"], +function request(method, path, body, contentType = "application/json") { + return new Promise((resolve, reject) => { + const headers = { authorization: `Bearer ${token}` }; + const payload = body === undefined ? null : typeof body === "string" ? body : JSON.stringify(body); + if (payload !== null) { + headers["content-type"] = contentType; + headers["content-length"] = Buffer.byteLength(payload); + } + const req = https.request({ host, port, path, method, ca, headers }, (res) => { + let text = ""; + res.setEncoding("utf8"); + res.on("data", (chunk) => { text += chunk; }); + res.on("end", () => resolve({ status: res.statusCode || 0, text })); + }); + req.on("error", reject); + if (payload !== null) req.write(payload); + req.end(); }); } -function readConfigMap() { - try { - return JSON.parse(kubectl(["get", "configmap", configMap, "-o", "json"])); - } catch (error) { - const stderr = String(error?.stderr || error?.message || ""); - if (/not found/i.test(stderr)) return null; - throw error; - } +async function readConfigMap() { + const result = await request("GET", `/api/v1/namespaces/${encodeURIComponent(namespace)}/configmaps/${encodeURIComponent(configMap)}`); + if (result.status === 404) return null; + if (result.status < 200 || result.status >= 300) throw new Error(result.text || `kube api GET configmap status ${result.status}`); + return JSON.parse(result.text); } -function ensureConfigMap() { - if (readConfigMap() !== null) return; +async function ensureConfigMap() { + if (await readConfigMap() !== null) return; const object = { apiVersion: "v1", kind: "ConfigMap", metadata: { name: configMap, namespace }, data: { _createdAt: new Date().toISOString(), _specRef: specRef }, }; - kubectl(["apply", "-f", "-"], JSON.stringify(object)); + const result = await request("POST", `/api/v1/namespaces/${encodeURIComponent(namespace)}/configmaps`, object); + if (result.status === 409) return; + if (result.status < 200 || result.status >= 300) throw new Error(result.text || `kube api POST configmap status ${result.status}`); } function stringOrNull(value) { @@ -91,8 +107,8 @@ function preserveExistingTiming(state, existing) { }; } -ensureConfigMap(); -const current = readConfigMap(); +await ensureConfigMap(); +const current = await readConfigMap(); const beforeResourceVersion = stringOrNull(current?.metadata?.resourceVersion); const beforeUpdatedAt = stringOrNull(current?.data?._updatedAt); const currentText = current?.data?.[followerId]; @@ -106,8 +122,9 @@ const patch = { _specRef: specRef, }, }; -kubectl(["patch", "configmap", configMap, "--type", "merge", "-p", JSON.stringify(patch)]); -const updated = readConfigMap(); +const patchResult = await request("PATCH", `/api/v1/namespaces/${encodeURIComponent(namespace)}/configmaps/${encodeURIComponent(configMap)}`, patch, "application/merge-patch+json"); +if (patchResult.status < 200 || patchResult.status >= 300) throw new Error(patchResult.text || `kube api PATCH configmap status ${patchResult.status}`); +const updated = await readConfigMap(); process.stdout.write(JSON.stringify({ ok: true, followerId, diff --git a/scripts/native/cicd/read-state-summary.mjs b/scripts/native/cicd/read-state-summary.mjs index dcaa931d..d9cc0ec4 100644 --- a/scripts/native/cicd/read-state-summary.mjs +++ b/scripts/native/cicd/read-state-summary.mjs @@ -1,9 +1,14 @@ -import { execFileSync } from "node:child_process"; +import { readFileSync } from "node:fs"; +import https from "node:https"; const namespace = process.env.NAMESPACE || ""; const configMap = process.env.CONFIGMAP || ""; const followerIds = parseFollowerIds(process.env.FOLLOWERS_JSON || "[]"); const maxTimingStages = Number(process.env.MAX_TIMING_STAGES || "24"); +const host = process.env.KUBERNETES_SERVICE_HOST; +const port = Number(process.env.KUBERNETES_SERVICE_PORT || "443"); +const token = readFileSync("/var/run/secrets/kubernetes.io/serviceaccount/token", "utf8").trim(); +const ca = readFileSync("/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"); function parseFollowerIds(text) { try { @@ -14,18 +19,34 @@ function parseFollowerIds(text) { } } -function kubectlConfigMap() { - try { - const stdout = execFileSync("kubectl", ["-n", namespace, "get", "configmap", configMap, "-o", "json"], { - encoding: "utf8", - maxBuffer: 16 * 1024 * 1024, - stdio: ["ignore", "pipe", "pipe"], +function request(method, path, body, contentType = "application/json") { + return new Promise((resolve, reject) => { + const headers = { authorization: `Bearer ${token}` }; + const payload = body === undefined ? null : typeof body === "string" ? body : JSON.stringify(body); + if (payload !== null) { + headers["content-type"] = contentType; + headers["content-length"] = Buffer.byteLength(payload); + } + const req = https.request({ host, port, path, method, ca, headers }, (res) => { + let text = ""; + res.setEncoding("utf8"); + res.on("data", (chunk) => { text += chunk; }); + res.on("end", () => resolve({ status: res.statusCode || 0, text })); }); - return { ok: true, present: true, object: JSON.parse(stdout), error: "" }; + req.on("error", reject); + if (payload !== null) req.write(payload); + req.end(); + }); +} + +async function readConfigMap() { + try { + const result = await request("GET", `/api/v1/namespaces/${encodeURIComponent(namespace)}/configmaps/${encodeURIComponent(configMap)}`); + if (result.status === 404) return { ok: true, present: false, object: null, error: result.text }; + if (result.status < 200 || result.status >= 300) return { ok: false, present: false, object: null, error: result.text || `kube api GET configmap status ${result.status}` }; + return { ok: true, present: true, object: JSON.parse(result.text), error: "" }; } catch (error) { - const stderr = String(error?.stderr || error?.message || ""); - if (/not found/i.test(stderr)) return { ok: true, present: false, object: null, error: stderr }; - return { ok: false, present: false, object: null, error: stderr || "kubectl configmap read failed" }; + return { ok: false, present: false, object: null, error: error?.message || String(error) }; } } @@ -139,7 +160,7 @@ function numberOrNull(value) { return typeof value === "number" && Number.isFinite(value) ? value : null; } -const result = kubectlConfigMap(); +const result = await readConfigMap(); const errors = []; const stateByFollower = {}; const valueBytes = {}; diff --git a/scripts/src/cicd.ts b/scripts/src/cicd.ts index 84808db8..6e50d283 100644 --- a/scripts/src/cicd.ts +++ b/scripts/src/cicd.ts @@ -1857,8 +1857,9 @@ function mergeFollowerStatus( function readK8sState(registry: BranchFollowerRegistry, options: ParsedOptions): K8sStateRead { const errors: string[] = []; const stateResult = kubeConfigMapFollowerState(registry, options); - const deploymentResult = kubeJson(registry, options, `kubectl -n ${shQuote(registry.controller.namespace)} get deploy ${shQuote(registry.controller.deploymentName)} -o json`, 10_000); - const leaseResult = kubeJson(registry, options, `kubectl -n ${shQuote(registry.controller.namespace)} get lease ${shQuote(registry.controller.leaseName)} -o json`, 10_000); + const namespace = registry.controller.namespace; + const deploymentResult = kubeJson(registry, options, `kubectl -n ${shQuote(namespace)} get deploy ${shQuote(registry.controller.deploymentName)} -o json`, 10_000, `/apis/apps/v1/namespaces/${encodeURIComponent(namespace)}/deployments/${encodeURIComponent(registry.controller.deploymentName)}`); + const leaseResult = kubeJson(registry, options, `kubectl -n ${shQuote(namespace)} get lease ${shQuote(registry.controller.leaseName)} -o json`, 10_000, `/apis/coordination.k8s.io/v1/namespaces/${encodeURIComponent(namespace)}/leases/${encodeURIComponent(registry.controller.leaseName)}`); const podSelector = labelSelector(registry.controller.labels); const podsResult = kubePodList(registry, options, podSelector); if (!stateResult.ok) errors.push(`state configmap: ${stateResult.error}`); @@ -1937,31 +1938,54 @@ function stateWriteSummary(followerId: string, result: CommandResult): Record [id, null])) }); - const script = [ - "set -eu", - "tmpdir=$(mktemp -d)", - "cleanup() { rm -rf \"$tmpdir\"; }", - "trap cleanup EXIT INT TERM", - `NAMESPACE=${shQuote(registry.controller.namespace)}`, - `CONFIGMAP=${shQuote(registry.controller.stateConfigMapName)}`, - `PATCH=${shQuote(patch)}`, - "export NAMESPACE CONFIGMAP PATCH", - "if ! kubectl -n \"$NAMESPACE\" get configmap \"$CONFIGMAP\" >/dev/null 2>\"$tmpdir/error\"; then", - " if grep -qi 'not found' \"$tmpdir/error\"; then", - " printf '{\"ok\":true,\"present\":false,\"patched\":false,\"reason\":\"state-configmap-not-found\",\"parsedDownstreamCliOutput\":false}'", - " exit 0", - " fi", - " cat \"$tmpdir/error\" >&2", - " exit 1", - "fi", - "kubectl -n \"$NAMESPACE\" patch configmap \"$CONFIGMAP\" --type merge -p \"$PATCH\" >/dev/null", - "printf '{\"ok\":true,\"present\":true,\"patched\":true,\"parsedDownstreamCliOutput\":false}'", - ].join("\n"); + const script = options.inCluster + ? [ + "set -eu", + "tmpdir=$(mktemp -d)", + "cleanup() { rm -rf \"$tmpdir\"; }", + "trap cleanup EXIT INT TERM", + nativeCicdScriptLoadShell(["patch-configmap-data.mjs"]), + `NAMESPACE=${shQuote(registry.controller.namespace)}`, + `CONFIGMAP=${shQuote(registry.controller.stateConfigMapName)}`, + `PATCH_B64=${shQuote(Buffer.from(patch, "utf8").toString("base64"))}`, + "export NAMESPACE CONFIGMAP PATCH_B64", + "node \"$tmpdir/patch-configmap-data.mjs\"", + ].join("\n") + : [ + "set -eu", + "tmpdir=$(mktemp -d)", + "cleanup() { rm -rf \"$tmpdir\"; }", + "trap cleanup EXIT INT TERM", + `NAMESPACE=${shQuote(registry.controller.namespace)}`, + `CONFIGMAP=${shQuote(registry.controller.stateConfigMapName)}`, + `PATCH=${shQuote(patch)}`, + "export NAMESPACE CONFIGMAP PATCH", + "if ! kubectl -n \"$NAMESPACE\" get configmap \"$CONFIGMAP\" >/dev/null 2>\"$tmpdir/error\"; then", + " if grep -qi 'not found' \"$tmpdir/error\"; then", + " printf '{\"ok\":true,\"present\":false,\"patched\":false,\"reason\":\"state-configmap-not-found\",\"parsedDownstreamCliOutput\":false}'", + " exit 0", + " fi", + " cat \"$tmpdir/error\" >&2", + " exit 1", + "fi", + "kubectl -n \"$NAMESPACE\" patch configmap \"$CONFIGMAP\" --type merge -p \"$PATCH\" >/dev/null", + "printf '{\"ok\":true,\"present\":true,\"patched\":true,\"parsedDownstreamCliOutput\":false}'", + ].join("\n"); return runKubeScript(registry, options, script, "", 10_000); } -function kubeJson(registry: BranchFollowerRegistry, options: ParsedOptions, command: string, timeoutMs: number): { ok: boolean; value: Record | null; error: string } { - const result = runKubeScript(registry, options, `set -eu\n${command}`, "", timeoutMs); +function kubeJson(registry: BranchFollowerRegistry, options: ParsedOptions, command: string, timeoutMs: number, inClusterPath?: string): { ok: boolean; value: Record | null; error: string } { + const script = options.inCluster && inClusterPath !== undefined + ? [ + "set -eu", + "tmpdir=$(mktemp -d)", + "cleanup() { rm -rf \"$tmpdir\"; }", + "trap cleanup EXIT INT TERM", + nativeCicdScriptLoadShell(["kube-get.mjs"]), + `node "$tmpdir/kube-get.mjs" ${shQuote(inClusterPath)}`, + ].join("\n") + : `set -eu\n${command}`; + const result = runKubeScript(registry, options, script, "", timeoutMs); const value = result.exitCode === 0 ? parseJsonObject(result.stdout) : null; return { ok: result.exitCode === 0 && value !== null, @@ -1972,12 +1996,21 @@ function kubeJson(registry: BranchFollowerRegistry, options: ParsedOptions, comm function kubePodList(registry: BranchFollowerRegistry, options: ParsedOptions, selector: string): { ok: boolean; value: Record | null; error: string } { const command = `kubectl -n ${shQuote(registry.controller.namespace)} get pods -l ${shQuote(selector)} -o name`; - const result = runKubeScript(registry, options, `set -eu\n${command}`, "", 10_000); - const names = result.stdout - .split(/\r?\n/u) - .map((line) => line.trim()) - .filter((line) => line.length > 0) - .map((line) => line.replace(/^pod\//u, "")); + const script = options.inCluster + ? [ + "set -eu", + "tmpdir=$(mktemp -d)", + "cleanup() { rm -rf \"$tmpdir\"; }", + "trap cleanup EXIT INT TERM", + nativeCicdScriptLoadShell(["kube-get.mjs"]), + `node "$tmpdir/kube-get.mjs" ${shQuote(`/api/v1/namespaces/${encodeURIComponent(registry.controller.namespace)}/pods?labelSelector=${encodeURIComponent(selector)}`)}`, + ].join("\n") + : `set -eu\n${command}`; + const result = runKubeScript(registry, options, script, "", 10_000); + const parsed = options.inCluster && result.exitCode === 0 ? parseJsonObject(result.stdout) : null; + const names = options.inCluster + ? arrayRecords(parsed?.items).map((item) => stringOrNull(asOptionalRecord(item.metadata)?.name)).filter((name): name is string => name !== null) + : result.stdout.split(/\r?\n/u).map((line) => line.trim()).filter((line) => line.length > 0).map((line) => line.replace(/^pod\//u, "")); return { ok: result.exitCode === 0, value: result.exitCode === 0 ? { items: names.map((name) => ({ metadata: { name } })) } : null,