From 678dc427d608a62d0a00875f9fe54260febb6f7b Mon Sep 17 00:00:00 2001 From: Codex Date: Fri, 26 Jun 2026 16:34:23 +0000 Subject: [PATCH] feat: add real dependency proxy benchmark --- .../egress-proxy-benchmarks.yaml | 66 +++ ...-01060310-real-k3s-deps-proxy-benchmark.md | 61 +++ scripts/src/platform-infra-egress-proxy.ts | 2 +- .../src/platform-infra-k3s-build-benchmark.ts | 463 ++++++++++++++++-- 4 files changed, 562 insertions(+), 30 deletions(-) create mode 100644 project-management/PJ2026-01/specs/PJ2026-01060310-real-k3s-deps-proxy-benchmark.md diff --git a/config/platform-infra/egress-proxy-benchmarks.yaml b/config/platform-infra/egress-proxy-benchmarks.yaml index d7551a24..80e75a89 100644 --- a/config/platform-infra/egress-proxy-benchmarks.yaml +++ b/config/platform-infra/egress-proxy-benchmarks.yaml @@ -5,8 +5,74 @@ metadata: owner: unidesk relatedIssues: - 1032 + - 1048 profiles: + real-deps-500m: + enabled: true + workload: k3s-real-deps + description: Real k3s dependency egress benchmark; kubelet pulls remote alpine/node/golang images, then pod stages run apk, npm and go downloads through the YAML-declared proxy. + image: docker.io/library/alpine:3.20 + imagePullPolicy: Always + targetOverrides: {} + payloadMiB: 500 + timeoutSeconds: 3600 + ttlSecondsAfterFinished: 3600 + noMirror: + apt: not-used + npmRegistry: https://registry.npmjs.org/ + pipIndexUrl: not-used + registryMirror: forbidden + aptPackages: [] + dependencyDownload: + enabled: false + url: https://example.invalid/not-used + chunks: 0 + expectedMiB: 0 + realDeps: + minProxyMiB: 500 + imagePullPolicy: Always + apk: + image: docker.io/library/alpine:3.20 + packages: + - build-base + - cmake + - git + - go + - nodejs + - npm + - cargo + - python3-dev + - openssl-dev + - libffi-dev + expectedMiB: 80 + npm: + image: docker.io/library/node:22-bookworm + registry: https://registry.npmjs.org/ + packages: + "@swc/core": latest + "@types/node": latest + eslint: latest + next: latest + react: latest + react-dom: latest + rollup: latest + typescript: latest + vite: latest + webpack: latest + expectedMiB: 120 + go: + image: docker.io/library/golang:1.24-bookworm + goProxy: https://proxy.golang.org,direct + modules: + - github.com/aws/aws-sdk-go-v2/config@v1.29.14 + - github.com/gin-gonic/gin@v1.10.1 + - github.com/prometheus/client_golang@v1.20.5 + - go.opentelemetry.io/otel@v1.34.0 + - google.golang.org/grpc@v1.69.4 + - k8s.io/client-go@v0.32.4 + expectedMiB: 180 + no-mirror-600m: enabled: true workload: k3s-build diff --git a/project-management/PJ2026-01/specs/PJ2026-01060310-real-k3s-deps-proxy-benchmark.md b/project-management/PJ2026-01/specs/PJ2026-01060310-real-k3s-deps-proxy-benchmark.md new file mode 100644 index 00000000..454d1634 --- /dev/null +++ b/project-management/PJ2026-01/specs/PJ2026-01060310-real-k3s-deps-proxy-benchmark.md @@ -0,0 +1,61 @@ +# PJ2026-01060310 Real K3s Dependency Proxy Benchmark + +## Scope + +This SPEC covers pikasTech/unidesk#1048. It supersedes synthetic Cloudflare download evidence for proxy acceleration decisions and adds a real k3s dependency benchmark profile named `real-deps-500m`. + +The benchmark must prove the target k3s cluster can use the platform-infra egress proxy for real dependency acquisition. It has four required stages: + +- Kubernetes image pull: kubelet/containerd must directly pull remote `alpine`, `node`, and `golang` images with `imagePullPolicy: Always`. +- Pod `apk add`: the Alpine stage must fetch packages from upstream apk repositories through proxy environment variables. +- Pod `npm install`: the Node stage must install packages from `https://registry.npmjs.org/` through the proxy. +- Pod `go mod download`: the Go stage must download modules through `GOPROXY=https://proxy.golang.org,direct` and the proxy. + +If the Kubernetes image pull stage fails, the benchmark result is not an application dependency failure; it is an image-pull proxy failure in the k3s/containerd path and must be fixed there. + +## Architecture + +`platform-infra egress-proxy k3s-build-benchmark` remains the single coordinator. It reads targets from `config/platform-infra/sub2api.yaml`, reads profiles from `config/platform-infra/egress-proxy-benchmarks.yaml`, renders one Job per target, and uses `trans sh -- ...` as the bounded control path. + +The `real-deps-500m` profile renders a multi-stage Kubernetes Job: + +- `initContainer/apk-add`: image `docker.io/library/alpine:3.20`. +- `initContainer/npm-install`: image `docker.io/library/node:22-bookworm`. +- `initContainer/go-download`: image `docker.io/library/golang:1.24-bookworm`. +- `container/summary`: emits a bounded result marker after all init containers finish. + +All three init containers receive the YAML-declared `sub2api-egress-proxy` service URL through `HTTP_PROXY`, `HTTPS_PROXY`, `ALL_PROXY`, and lowercase variants. The image pull itself happens before Pod process execution; therefore image pull proxy evidence must come from the k3s/containerd path and proxyserver-side traffic sampling, not from in-container env alone. + +## Observability + +The source of truth for traffic is `platform-infra egress-proxy traffic --target --sample-seconds N`. Benchmark status may include this traffic sample. The final evidence table must include proxyserver window bytes/rate/cumulative bytes, top client, and top destination. + +For image pull traffic, the observed proxy client may be the node/k3s/containerd path rather than the benchmark Pod IP. For `apk`, `npm`, and `go` stages, the observed proxy client should correspond to the benchmark Pod network path. This distinction must be preserved in issue evidence. + +Status output must classify failures into at least: + +- `image-pull`: kubelet/containerd cannot pull remote images. +- `apk-download`: Pod started but apk fetch/install failed. +- `npm-download`: Pod started but npm install failed. +- `go-download`: Pod started but Go module download failed. +- `none`: all stages succeeded. + +## Boundaries + +This benchmark must not: + +- use Cloudflare speed-test downloads as acceptance evidence; +- install Node or Go only as a substitute for Kubernetes pulling `node`/`golang` images; +- rewrite apk/npm/go sources to regional mirrors; +- use HWLAB source repositories, Tekton, Argo, git-mirror, or previous build caches; +- hide image pull failures behind local image overrides. + +`payloadMiB: 500` in the `real-deps-500m` profile means the minimum proxyserver-observed traffic required for acceptance. The Pod result marker may report apk/npm/go workspace sizes, but those sizes do not replace proxyserver traffic evidence because image pull bytes are outside the Pod filesystem. + +## Acceptance + +- `bun scripts/cli.ts platform-infra egress-proxy k3s-build-benchmark --targets D601,D518 --profile real-deps-500m --dry-run` prints both node plans and the remote image set. +- `--confirm` creates one unique Job per node and returns immediately. +- `status --traffic-sample-seconds 15` reports Job state, `image-pull`/`apk`/`npm`/`go` failure family when applicable, and proxyserver traffic columns. +- D601 and D518 both have final rows with target, profile, job, state, duration, apk MiB, npm MiB, go MiB, proxy traffic window/rate/cumulative, top client, top destination, and failure family. +- Acceptance requires at least 500 MiB of proxyserver-observed traffic per successful node run. If a node cannot reach that point because image pull fails, the issue remains open until the k3s/containerd image pull proxy path is fixed or a blocker is explicitly documented. diff --git a/scripts/src/platform-infra-egress-proxy.ts b/scripts/src/platform-infra-egress-proxy.ts index fd2a47a9..83357aaa 100644 --- a/scripts/src/platform-infra-egress-proxy.ts +++ b/scripts/src/platform-infra-egress-proxy.ts @@ -150,7 +150,7 @@ function platformTrafficSpec(options: TrafficOptions): EgressProxyTrafficSpec { function parseEgressProxyOptions(args: string[]): EgressProxyOptions { const actionRaw = args[0] ?? "benchmark"; if (!isEgressProxyAction(actionRaw)) { - throw new Error("platform-infra egress-proxy usage: benchmark|benchmark-status|benchmark-logs|traffic|k3s-build-benchmark --target D601|D518 [--profile no-mirror|no-mirror-600m] [--sample-seconds N]"); + throw new Error("platform-infra egress-proxy usage: benchmark|benchmark-status|benchmark-logs|traffic|k3s-build-benchmark --target D601|D518 [--profile no-mirror|no-mirror-600m|real-deps-500m] [--sample-seconds N]"); } const action = actionRaw; const rest = args.slice(1); diff --git a/scripts/src/platform-infra-k3s-build-benchmark.ts b/scripts/src/platform-infra-k3s-build-benchmark.ts index 17bb9ee1..b7ff593e 100644 --- a/scripts/src/platform-infra-k3s-build-benchmark.ts +++ b/scripts/src/platform-infra-k3s-build-benchmark.ts @@ -12,8 +12,9 @@ import { resolveTarget } from "./platform-infra/manifest"; const BENCHMARK_CONFIG_PATH = "config/platform-infra/egress-proxy-benchmarks.yaml"; const BENCHMARK_APP = "unidesk-k3s-build-benchmark"; -type K3sBuildAction = "start" | "status" | "logs"; +type K3sBuildAction = "start" | "status" | "logs" | "cleanup"; type ImagePullPolicy = "Always" | "IfNotPresent" | "Never"; +type K3sBuildWorkload = "k3s-build" | "k3s-real-deps"; interface K3sBuildBenchmarkOptions { action: K3sBuildAction; @@ -29,7 +30,7 @@ interface K3sBuildBenchmarkOptions { interface K3sBuildBenchmarkProfile { id: string; enabled: boolean; - workload: "k3s-build"; + workload: K3sBuildWorkload; description: string; image: string; imagePullPolicy: ImagePullPolicy; @@ -50,6 +51,29 @@ interface K3sBuildBenchmarkProfile { chunks: number; expectedMiB: number; }; + realDeps?: K3sRealDepsSpec; +} + +interface K3sRealDepsSpec { + minProxyMiB: number; + imagePullPolicy: ImagePullPolicy; + apk: { + image: string; + packages: readonly string[]; + expectedMiB: number; + }; + npm: { + image: string; + registry: string; + packages: Record; + expectedMiB: number; + }; + go: { + image: string; + goProxy: string; + modules: readonly string[]; + expectedMiB: number; + }; } interface K3sBuildBenchmarkTargetOverride { @@ -86,6 +110,10 @@ interface TargetStatus { outputMiB: number | null; downloadMiB: number | null; payloadMiB: number | null; + apkMiB: number | null; + npmMiB: number | null; + goMiB: number | null; + realDepsMiB: number | null; failureFamily: string; logTail: string; traffic?: TrafficSummary; @@ -107,15 +135,16 @@ export function runK3sBuildBenchmarkCommand(args: string[]): RenderedCliResult { const plans = resolvePlans(config, options); if (options.action === "start" && options.dryRun) return renderDryRun(plans, options); if (options.action === "start") return startBenchmarks(plans, options); + if (options.action === "cleanup") return cleanupBenchmarks(plans, options); return statusBenchmarks(plans, options); } function parseK3sBuildBenchmarkOptions(args: string[]): K3sBuildBenchmarkOptions { const first = args[0]; - const action: K3sBuildAction = first === "status" || first === "logs" ? first : "start"; + const action: K3sBuildAction = first === "status" || first === "logs" || first === "cleanup" ? first : "start"; const rest = action === "start" ? args : args.slice(1); if (first === "--help" || first === "-h" || first === "help") { - throw new Error("platform-infra egress-proxy k3s-build-benchmark usage: k3s-build-benchmark [status|logs] --targets D601,D518 --profile no-mirror-600m [--dry-run|--confirm]"); + throw new Error("platform-infra egress-proxy k3s-build-benchmark usage: k3s-build-benchmark [status|logs|cleanup] --targets D601,D518 --profile real-deps-500m [--dry-run|--confirm]"); } const confirm = rest.includes("--confirm"); const explicitDryRun = rest.includes("--dry-run"); @@ -152,6 +181,7 @@ function resolvePlans(config: K3sBuildBenchmarkConfig, options: K3sBuildBenchmar if (profile === undefined) return { ok: false, targetId, blocker: "profile-missing", detail: `${BENCHMARK_CONFIG_PATH}.profiles.${options.profile} is missing` }; if (!profile.enabled) return { ok: false, targetId, profile, blocker: "profile-disabled", detail: `${BENCHMARK_CONFIG_PATH}.profiles.${profile.id}.enabled=false` }; if (profile.payloadMiB < 500) return { ok: false, targetId, profile, blocker: "payload-too-small", detail: `${BENCHMARK_CONFIG_PATH}.profiles.${profile.id}.payloadMiB must be >= 500` }; + if (profile.workload === "k3s-real-deps" && profile.realDeps === undefined) return { ok: false, targetId, profile, blocker: "real-deps-missing", detail: `${BENCHMARK_CONFIG_PATH}.profiles.${profile.id}.realDeps is missing` }; try { const target = resolveTarget(sub2api, targetId); if (target.egressProxy === null || !target.egressProxy.enabled) return { ok: false, targetId: target.id, target, profile, blocker: "egress-proxy-disabled", detail: `config/platform-infra/sub2api.yaml target ${target.id} has no enabled egressProxy` }; @@ -240,6 +270,10 @@ function statusBenchmarks(plans: readonly TargetPlan[], options: K3sBuildBenchma status.durationSeconds === null ? "-" : `${status.durationSeconds}s`, status.outputMiB === null ? "-" : `${status.outputMiB}MiB`, status.downloadMiB === null ? "-" : `${status.downloadMiB}MiB`, + status.apkMiB === null ? "-" : `${status.apkMiB}MiB`, + status.npmMiB === null ? "-" : `${status.npmMiB}MiB`, + status.goMiB === null ? "-" : `${status.goMiB}MiB`, + status.realDepsMiB === null ? "-" : `${status.realDepsMiB}MiB`, status.traffic === undefined ? "-" : bytes(status.traffic.windowBytes), status.traffic === undefined ? "-" : rate(status.traffic.rateBps), status.traffic === undefined ? "-" : bytes(status.traffic.processTotalBytes), @@ -257,7 +291,7 @@ function statusBenchmarks(plans: readonly TargetPlan[], options: K3sBuildBenchma renderedText: [ "PLATFORM-INFRA K3S BUILD BENCHMARK STATUS", "", - ...table(["TARGET", "PROFILE", "STATE", "JOB", "DURATION", "OUTPUT", "DOWNLOAD", "TRAFFIC_WINDOW", "TRAFFIC_RATE", "PROXY_CUM", "TOP_CLIENT", "TOP_DEST", "FAILURE"], rows), + ...table(["TARGET", "PROFILE", "STATE", "JOB", "DURATION", "OUTPUT", "DOWNLOAD", "APK", "NPM", "GO", "REAL_DEPS", "TRAFFIC_WINDOW", "TRAFFIC_RATE", "PROXY_CUM", "TOP_CLIENT", "TOP_DEST", "FAILURE"], rows), ...logSections, "", "NEXT", @@ -269,6 +303,38 @@ function statusBenchmarks(plans: readonly TargetPlan[], options: K3sBuildBenchma }; } +function cleanupBenchmarks(plans: readonly TargetPlan[], options: K3sBuildBenchmarkOptions): RenderedCliResult { + const rows = plans.map((plan) => { + if (!plan.ok || plan.target === undefined || plan.profile === undefined) { + return { targetId: plan.targetId, profile: options.profile, state: "blocked", deleted: "-", detail: plan.detail ?? plan.blocker ?? "blocked" }; + } + if (!options.confirm) return { targetId: plan.targetId, profile: plan.profile.id, state: "dry-run", deleted: "-", detail: "pass --confirm to delete matching benchmark Jobs" }; + const result = runTrans(plan.target.route, cleanupScript(plan.target, plan.profile), options.timeoutSeconds); + const parsed = parseJson(result.stdout); + const data = record(parsed); + return { + targetId: plan.targetId, + profile: plan.profile.id, + state: result.exitCode === 0 && data.ok !== false ? "deleted" : "failed", + deleted: text(data.deleted, "-"), + detail: text(data.detail, result.stderr.slice(-1000) || result.stdout.slice(-1000)), + }; + }); + return { + ok: rows.every((row) => row.state === "deleted" || row.state === "dry-run"), + command: "platform-infra egress-proxy k3s-build-benchmark cleanup", + contentType: "text/plain", + renderedText: [ + "PLATFORM-INFRA K3S BUILD BENCHMARK CLEANUP", + "", + ...table(["TARGET", "PROFILE", "STATE", "DELETED", "DETAIL"], rows.map((row) => [row.targetId, row.profile, row.state, row.deleted, row.detail])), + "", + "NEXT", + ` bun scripts/cli.ts platform-infra egress-proxy k3s-build-benchmark --targets ${rows.map((row) => row.targetId).join(",")} --profile ${options.profile} --confirm`, + ].join("\n"), + }; +} + function renderDryRun(plans: readonly TargetPlan[], options: K3sBuildBenchmarkOptions): RenderedCliResult { const rows = plans.map((plan) => [ plan.targetId, @@ -277,11 +343,11 @@ function renderDryRun(plans: readonly TargetPlan[], options: K3sBuildBenchmarkOp plan.target?.route ?? "-", plan.target?.namespace ?? "-", plan.target?.egressProxy?.serviceName ?? "-", - plan.target !== undefined && plan.profile !== undefined ? effectiveImage(plan.target, plan.profile).image : "-", - plan.target !== undefined && plan.profile !== undefined ? effectiveImage(plan.target, plan.profile).imagePullPolicy : "-", + plan.profile !== undefined ? dryRunImages(plan.profile) : "-", + plan.profile !== undefined ? dryRunPullPolicy(plan.profile) : "-", plan.profile === undefined ? "-" : `${plan.profile.payloadMiB}MiB`, - plan.profile === undefined ? "-" : `${plan.profile.dependencyDownload.expectedMiB}MiB`, - plan.detail ?? "no-mirror emptyDir unique-job", + plan.profile === undefined ? "-" : dependencySummary(plan.profile), + plan.detail ?? dryRunDetail(plan.profile), ]); return { ok: plans.every((plan) => plan.ok), @@ -290,7 +356,7 @@ function renderDryRun(plans: readonly TargetPlan[], options: K3sBuildBenchmarkOp renderedText: [ "PLATFORM-INFRA K3S BUILD BENCHMARK DRY-RUN", "", - ...table(["TARGET", "PROFILE", "STATUS", "ROUTE", "NAMESPACE", "PROXY", "IMAGE", "PULL", "PAYLOAD", "DOWNLOAD", "DETAIL"], rows), + ...table(["TARGET", "PROFILE", "STATUS", "ROUTE", "NAMESPACE", "PROXY", "IMAGES", "PULL", "MIN_PROXY", "DEPS", "DETAIL"], rows), "", "NEXT", ` bun scripts/cli.ts platform-infra egress-proxy k3s-build-benchmark --targets ${plans.map((plan) => plan.targetId).join(",")} --profile ${options.profile} --confirm`, @@ -300,7 +366,32 @@ function renderDryRun(plans: readonly TargetPlan[], options: K3sBuildBenchmarkOp }; } +function dryRunImages(profile: K3sBuildBenchmarkProfile): string { + if (profile.workload === "k3s-real-deps" && profile.realDeps !== undefined) { + return [profile.realDeps.apk.image, profile.realDeps.npm.image, profile.realDeps.go.image].join(","); + } + return profile.image; +} + +function dryRunPullPolicy(profile: K3sBuildBenchmarkProfile): string { + if (profile.workload === "k3s-real-deps" && profile.realDeps !== undefined) return profile.realDeps.imagePullPolicy; + return profile.imagePullPolicy; +} + +function dependencySummary(profile: K3sBuildBenchmarkProfile): string { + if (profile.workload === "k3s-real-deps" && profile.realDeps !== undefined) { + return `apk~${profile.realDeps.apk.expectedMiB} npm~${profile.realDeps.npm.expectedMiB} go~${profile.realDeps.go.expectedMiB}`; + } + return `${profile.dependencyDownload.expectedMiB}MiB`; +} + +function dryRunDetail(profile: K3sBuildBenchmarkProfile | undefined): string { + if (profile?.workload === "k3s-real-deps") return "kubelet-image-pull + apk/npm/go through proxy"; + return "no-mirror emptyDir unique-job"; +} + function benchmarkJobManifest(target: Sub2ApiTargetConfig, profile: K3sBuildBenchmarkProfile, runId: string, jobName: string): Record { + if (profile.workload === "k3s-real-deps") return realDepsJobManifest(target, profile, runId, jobName); const proxy = target.egressProxy; if (proxy === null) throw new Error(`target ${target.id} has no egressProxy`); const proxyUrl = `http://${proxy.serviceName}.${target.namespace}.svc.cluster.local:${proxy.listenPort}`; @@ -365,6 +456,197 @@ function benchmarkJobManifest(target: Sub2ApiTargetConfig, profile: K3sBuildBenc }; } +function realDepsJobManifest(target: Sub2ApiTargetConfig, profile: K3sBuildBenchmarkProfile, runId: string, jobName: string): Record { + const proxy = target.egressProxy; + if (proxy === null) throw new Error(`target ${target.id} has no egressProxy`); + const realDeps = requireRealDeps(profile); + const proxyUrl = `http://${proxy.serviceName}.${target.namespace}.svc.cluster.local:${proxy.listenPort}`; + const noProxy = proxy.noProxy.join(","); + const labels = benchmarkLabels(target, profile, runId); + const commonEnv = [ + { name: "HTTP_PROXY", value: proxyUrl }, + { name: "HTTPS_PROXY", value: proxyUrl }, + { name: "ALL_PROXY", value: proxyUrl }, + { name: "http_proxy", value: proxyUrl }, + { name: "https_proxy", value: proxyUrl }, + { name: "all_proxy", value: proxyUrl }, + { name: "NO_PROXY", value: noProxy }, + { name: "no_proxy", value: noProxy }, + { name: "BENCHMARK_TARGET", value: target.id }, + { name: "BENCHMARK_PROFILE", value: profile.id }, + { name: "BENCHMARK_RUN_ID", value: runId }, + { name: "MIN_PROXY_MIB", value: String(realDeps.minProxyMiB) }, + ]; + return { + apiVersion: "batch/v1", + kind: "Job", + metadata: { + name: jobName, + namespace: target.namespace, + labels, + annotations: { + "unidesk.ai/workload": profile.workload, + "unidesk.ai/min-proxy-mib": String(realDeps.minProxyMiB), + "unidesk.ai/image-pull-mode": "kubelet-containerd", + "unidesk.ai/remote-images": [realDeps.apk.image, realDeps.npm.image, realDeps.go.image].join(","), + }, + }, + spec: { + backoffLimit: 0, + activeDeadlineSeconds: profile.timeoutSeconds, + ttlSecondsAfterFinished: profile.ttlSecondsAfterFinished, + template: { + metadata: { labels }, + spec: { + restartPolicy: "Never", + initContainers: [ + { + name: "apk-add", + image: realDeps.apk.image, + imagePullPolicy: realDeps.imagePullPolicy, + command: ["/bin/sh", "-lc"], + args: [realDepsApkScript()], + env: [ + ...commonEnv, + { name: "APK_PACKAGES", value: realDeps.apk.packages.join(" ") }, + { name: "APK_IMAGE", value: realDeps.apk.image }, + ], + volumeMounts: [{ name: "work", mountPath: "/work" }], + }, + { + name: "npm-install", + image: realDeps.npm.image, + imagePullPolicy: realDeps.imagePullPolicy, + command: ["/bin/sh", "-lc"], + args: [realDepsNpmScript(realDeps)], + env: [ + ...commonEnv, + { name: "NPM_CONFIG_REGISTRY", value: realDeps.npm.registry }, + { name: "NPM_IMAGE", value: realDeps.npm.image }, + ], + volumeMounts: [{ name: "work", mountPath: "/work" }], + }, + { + name: "go-download", + image: realDeps.go.image, + imagePullPolicy: realDeps.imagePullPolicy, + command: ["/bin/sh", "-lc"], + args: [realDepsGoScript()], + env: [ + ...commonEnv, + { name: "GOPROXY", value: realDeps.go.goProxy }, + { name: "GO_MODULES", value: realDeps.go.modules.join(" ") }, + { name: "GO_IMAGE", value: realDeps.go.image }, + ], + volumeMounts: [{ name: "work", mountPath: "/work" }], + }, + ], + containers: [{ + name: "summary", + image: realDeps.apk.image, + imagePullPolicy: "IfNotPresent", + command: ["/bin/sh", "-lc"], + args: [realDepsSummaryScript(realDeps)], + env: commonEnv, + volumeMounts: [{ name: "work", mountPath: "/work" }], + }], + volumes: [{ name: "work", emptyDir: { sizeLimit: "6Gi" } }], + }, + }, + }, + }; +} + +function requireRealDeps(profile: K3sBuildBenchmarkProfile): K3sRealDepsSpec { + if (profile.realDeps === undefined) throw new Error(`profiles.${profile.id}.realDeps is required for workload=${profile.workload}`); + return profile.realDeps; +} + +function realDepsApkScript(): string { + return `set -eu +mkdir -p /work/stages +started_epoch="$(date +%s)" +started_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)" +printf 'UNIDESK_K3S_REAL_DEPS_EVENT stage=apk target=%s profile=%s run=%s image=%s packages="%s"\\n' "$BENCHMARK_TARGET" "$BENCHMARK_PROFILE" "$BENCHMARK_RUN_ID" "$APK_IMAGE" "$APK_PACKAGES" +if grep -R -E 'npmmirror|daocloud|aliyun|tuna|ustc|huaweicloud' /etc/apk/repositories >/tmp/apk-mirror-check.out 2>/dev/null; then + cat /tmp/apk-mirror-check.out >&2 + echo "unexpected apk mirror in base image" >&2 + exit 42 +fi +apk update +apk add --no-cache $APK_PACKAGES +apk_mib="$(du -sk /usr/bin /usr/lib /usr/include /usr/local 2>/dev/null | awk '{s+=$1} END {printf "%d", int((s+1023)/1024)}')" +{ + printf 'realDepsStartedEpoch=%s\\n' "$started_epoch" + printf 'realDepsStartedAt=%s\\n' "$started_at" + printf 'apkMiB=%s\\n' "$apk_mib" +} > /work/stages/apk.env +printf 'UNIDESK_K3S_REAL_DEPS_STAGE {"stage":"apk","ok":true,"image":"%s","installedMiB":%s}\\n' "$APK_IMAGE" "$apk_mib" +`; +} + +function realDepsNpmScript(realDeps: K3sRealDepsSpec): string { + const packageJson = JSON.stringify({ private: true, dependencies: realDeps.npm.packages }, null, 2); + return `set -eu +mkdir -p /work/stages /work/npm/project /work/npm/cache +cd /work/npm/project +cat > package.json <<'JSON' +${packageJson} +JSON +printf 'UNIDESK_K3S_REAL_DEPS_EVENT stage=npm target=%s profile=%s run=%s image=%s registry=%s\\n' "$BENCHMARK_TARGET" "$BENCHMARK_PROFILE" "$BENCHMARK_RUN_ID" "$NPM_IMAGE" "$NPM_CONFIG_REGISTRY" +npm install --ignore-scripts --no-audit --no-fund --cache /work/npm/cache --registry "$NPM_CONFIG_REGISTRY" +npm_mib="$(du -sk /work/npm/cache /work/npm/project/node_modules 2>/dev/null | awk '{s+=$1} END {printf "%d", int((s+1023)/1024)}')" +printf 'npmMiB=%s\\n' "$npm_mib" > /work/stages/npm.env +printf 'UNIDESK_K3S_REAL_DEPS_STAGE {"stage":"npm","ok":true,"image":"%s","installedMiB":%s}\\n' "$NPM_IMAGE" "$npm_mib" +`; +} + +function realDepsGoScript(): string { + return `set -eu +mkdir -p /work/stages /work/go/module /work/go/gopath /work/go/gomodcache +cd /work/go/module +if ! command -v go >/dev/null 2>&1 && [ -x /usr/local/go/bin/go ]; then + export PATH="/usr/local/go/bin:\${PATH:-/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin}" +fi +if ! command -v go >/dev/null 2>&1; then + echo "go-runtime-missing PATH=\${PATH:-}" >&2 + exit 127 +fi +go version +go mod init unidesk.local/proxy-benchmark +printf 'UNIDESK_K3S_REAL_DEPS_EVENT stage=go target=%s profile=%s run=%s image=%s goproxy=%s modules="%s"\\n' "$BENCHMARK_TARGET" "$BENCHMARK_PROFILE" "$BENCHMARK_RUN_ID" "$GO_IMAGE" "$GOPROXY" "$GO_MODULES" +export GOPATH=/work/go/gopath +export GOMODCACHE=/work/go/gomodcache +for module in $GO_MODULES; do + go get "$module" +done +go mod download -x all +go_mib="$(du -sk /work/go/gomodcache /work/go/gopath/pkg 2>/dev/null | awk '{s+=$1} END {printf "%d", int((s+1023)/1024)}')" +printf 'goMiB=%s\\n' "$go_mib" > /work/stages/go.env +printf 'UNIDESK_K3S_REAL_DEPS_STAGE {"stage":"go","ok":true,"image":"%s","downloadedMiB":%s}\\n' "$GO_IMAGE" "$go_mib" +`; +} + +function realDepsSummaryScript(realDeps: K3sRealDepsSpec): string { + return `set -eu +apkMiB=0 +npmMiB=0 +goMiB=0 +realDepsStartedEpoch="$(date +%s)" +realDepsStartedAt="$(date -u +%Y-%m-%dT%H:%M:%SZ)" +[ -f /work/stages/apk.env ] && . /work/stages/apk.env +[ -f /work/stages/npm.env ] && . /work/stages/npm.env +[ -f /work/stages/go.env ] && . /work/stages/go.env +completed_epoch="$(date +%s)" +completed_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)" +real_deps_mib=$((apkMiB + npmMiB + goMiB)) +duration=$((completed_epoch - realDepsStartedEpoch)) +pod_ip="$(hostname -i 2>/dev/null | awk '{print $1}')" +printf 'UNIDESK_K3S_REAL_DEPS_RESULT {"ok":true,"target":"%s","profile":"%s","runId":"%s","startedAt":"%s","completedAt":"%s","durationSeconds":%s,"podIp":"%s","apkMiB":%s,"npmMiB":%s,"goMiB":%s,"realDepsMiB":%s,"minProxyMiB":%s,"imagePullMode":"kubelet-containerd","apkImage":"%s","npmImage":"%s","goImage":"%s"}\\n' \\ + "$BENCHMARK_TARGET" "$BENCHMARK_PROFILE" "$BENCHMARK_RUN_ID" "$realDepsStartedAt" "$completed_at" "$duration" "$pod_ip" "$apkMiB" "$npmMiB" "$goMiB" "$real_deps_mib" "$MIN_PROXY_MIB" ${shQuote(realDeps.apk.image)} ${shQuote(realDeps.npm.image)} ${shQuote(realDeps.go.image)} +`; +} + function workloadScript(profile: K3sBuildBenchmarkProfile): string { return `set -eu started_epoch="$(date +%s)" @@ -530,6 +812,24 @@ printf '{"ok":true,"jobName":"%s","namespace":"%s","target":"%s","runId":"%s","p `; } +function cleanupScript(target: Sub2ApiTargetConfig, profile: K3sBuildBenchmarkProfile): string { + const selector = `app.kubernetes.io/name=${BENCHMARK_APP},unidesk.ai/benchmark-profile=${profile.id},unidesk.ai/runtime-node=${target.id.toLowerCase()}`; + return ` +set -eu +namespace=${shQuote(target.namespace)} +selector=${shQuote(selector)} +before="$(kubectl -n "$namespace" get jobs -l "$selector" --no-headers 2>/dev/null | wc -l | tr -d ' ')" +kubectl -n "$namespace" delete jobs -l "$selector" --ignore-not-found >/tmp/k3s-build-benchmark-cleanup.out 2>/tmp/k3s-build-benchmark-cleanup.err +after="$(kubectl -n "$namespace" get jobs -l "$selector" --no-headers 2>/dev/null | wc -l | tr -d ' ')" +deleted=$((before - after)) +python3 - "$before" "$after" "$deleted" <<'PY' +import json, sys +before, after, deleted = sys.argv[1:4] +print(json.dumps({"ok": True, "before": int(before), "after": int(after), "deleted": int(deleted), "detail": "matching jobs deleted"}, ensure_ascii=False)) +PY +`; +} + function statusScript(target: Sub2ApiTargetConfig, profile: K3sBuildBenchmarkProfile, tailLines: number): string { const selector = `app.kubernetes.io/name=${BENCHMARK_APP},unidesk.ai/benchmark-profile=${profile.id},unidesk.ai/runtime-node=${target.id.toLowerCase()}`; return ` @@ -561,26 +861,55 @@ pods = json.loads(pods_result.stdout or "{}").get("items", []) if pods_result.re pods.sort(key=lambda item: item.get("metadata", {}).get("creationTimestamp", "")) pod_name = pods[-1].get("metadata", {}).get("name") if pods else None waiting_reasons = [] +container_names = [] if pods: - for container_status in (pods[-1].get("status", {}) or {}).get("containerStatuses", []) or []: - waiting = ((container_status.get("state") or {}).get("waiting") or {}) + pod_status = pods[-1].get("status", {}) or {} + status_groups = [] + status_groups.extend((pod_status.get("initContainerStatuses") or [])) + status_groups.extend((pod_status.get("containerStatuses") or [])) + for container_status in status_groups: + container_name = container_status.get("name") or "container" + container_names.append(container_name) + image_name = container_status.get("image") or "-" + state_record = container_status.get("state") or {} + waiting = (state_record.get("waiting") or {}) if waiting: reason = waiting.get("reason") or "waiting" message = waiting.get("message") or "" - waiting_reasons.append((reason + " " + message).strip()) -logs = "" -if pod_name: - logs_result = kubectl(["logs", pod_name, "--tail", str(tail_lines)]) - logs = logs_result.stdout if logs_result.returncode == 0 else logs_result.stderr -full_logs = "" -if pod_name: - full_result = kubectl(["logs", pod_name, "--tail", "800"]) - full_logs = full_result.stdout if full_result.returncode == 0 else "" + waiting_reasons.append((container_name + " " + image_name + " " + reason + " " + message).strip()) + terminated = (state_record.get("terminated") or {}) + if terminated and int(terminated.get("exitCode") or 0) != 0: + reason = terminated.get("reason") or "terminated" + message = terminated.get("message") or "" + waiting_reasons.append(f"{container_name} {image_name} terminated exit={terminated.get('exitCode')} reason={reason} {message}".strip()) + +def collect_logs(lines): + if not pod_name: + return "" + chunks = [] + seen = set() + for name in container_names: + if name in seen: + continue + seen.add(name) + log_result = kubectl(["logs", pod_name, "-c", name, "--tail", str(lines)]) + text = log_result.stdout if log_result.returncode == 0 else log_result.stderr + if text: + chunks.append(f"[{name}]\\n{text}") + return "\\n".join(chunks) + +logs = collect_logs(tail_lines) +full_logs = collect_logs(800) match = None for line in reversed(full_logs.splitlines()): - if line.startswith("UNIDESK_K3S_BUILD_BENCHMARK_RESULT "): + marker = None + if "UNIDESK_K3S_BUILD_BENCHMARK_RESULT " in line: + marker = "UNIDESK_K3S_BUILD_BENCHMARK_RESULT " + elif "UNIDESK_K3S_REAL_DEPS_RESULT " in line: + marker = "UNIDESK_K3S_REAL_DEPS_RESULT " + if marker is not None: try: - match = json.loads(line.split(" ", 1)[1]) + match = json.loads(line.split(marker, 1)[1]) except Exception: match = None break @@ -604,6 +933,12 @@ elif "ImagePullBackOff" in tail_text or "ErrImagePull" in tail_text: failure_family = "image-pull" elif "apt-get" in tail_text and ("Failed" in tail_text or "Unable to" in tail_text): failure_family = "apt-download" +elif "apk-add" in tail_text and ("ERROR:" in tail_text or "temporary error" in tail_text or "Permission denied" in tail_text): + failure_family = "apk-download" +elif "npm-install" in tail_text and ("npm ERR!" in tail_text or "EAI_AGAIN" in tail_text or "ETIMEDOUT" in tail_text): + failure_family = "npm-download" +elif "go-download" in tail_text and ("terminated exit=" in tail_text or "i/o timeout" in tail_text or "connection refused" in tail_text or "no such host" in tail_text or "proxyconnect tcp" in tail_text or "TLS handshake timeout" in tail_text or "go-runtime-missing" in tail_text): + failure_family = "go-download" elif "curl:" in tail_text or "urllib.error.HTTPError" in tail_text or "urllib.error.URLError" in tail_text: failure_family = "dependency-download" elif "payload-too-small" in tail_text: @@ -666,6 +1001,29 @@ function trafficSpec(target: Sub2ApiTargetConfig): EgressProxyTrafficSpec { } function normalizeStatus(plan: TargetPlan, parsed: unknown, result: CommandResult): TargetStatus { + if (typeof parsed !== "object" || parsed === null) { + const state = result.exitCode === 0 ? "transport-unparseable" : "transport-failed"; + return { + ok: false, + targetId: plan.targetId, + profile: plan.profile?.id ?? "-", + state, + jobName: "-", + runId: "-", + startedAt: "-", + completedAt: "-", + durationSeconds: null, + outputMiB: null, + downloadMiB: null, + payloadMiB: plan.profile?.payloadMiB ?? null, + apkMiB: null, + npmMiB: null, + goMiB: null, + realDepsMiB: null, + failureFamily: result.timedOut ? "transport-timeout" : state, + logTail: (result.stderr || result.stdout).slice(-4000), + }; + } const data = typeof parsed === "object" && parsed !== null ? parsed as Record : {}; const jobResult = record(data.result); const state = text(data.state, result.exitCode === 0 ? "unknown" : "failed"); @@ -683,6 +1041,10 @@ function normalizeStatus(plan: TargetPlan, parsed: unknown, result: CommandResul outputMiB: nullableNumber(jobResult.outputMiB), downloadMiB: nullableNumber(jobResult.downloadMiB), payloadMiB: nullableNumber(jobResult.payloadMiB), + apkMiB: nullableNumber(jobResult.apkMiB), + npmMiB: nullableNumber(jobResult.npmMiB), + goMiB: nullableNumber(jobResult.goMiB), + realDepsMiB: nullableNumber(jobResult.realDepsMiB), failureFamily: text(data.failureFamily, data.ok === true ? "none" : state === "running" || state === "pending" ? "in-progress" : text(data.reason, "unknown")), logTail: text(data.logTail, result.stderr.slice(-2000)), }; @@ -703,6 +1065,10 @@ function blockedStatus(plan: TargetPlan, profile: string): TargetStatus { outputMiB: null, downloadMiB: null, payloadMiB: plan.profile?.payloadMiB ?? null, + apkMiB: null, + npmMiB: null, + goMiB: null, + realDepsMiB: null, failureFamily: plan.blocker ?? "blocked", logTail: plan.detail ?? "", }; @@ -734,9 +1100,8 @@ function readK3sBuildBenchmarkConfig(): K3sBuildBenchmarkConfig { function profileSpec(id: string, raw: Record): K3sBuildBenchmarkProfile { const workload = stringField(raw, "workload", `profiles.${id}`); - if (workload !== "k3s-build") throw new Error(`profiles.${id}.workload must be k3s-build`); - const imagePullPolicy = stringField(raw, "imagePullPolicy", `profiles.${id}`); - if (imagePullPolicy !== "Always" && imagePullPolicy !== "IfNotPresent" && imagePullPolicy !== "Never") throw new Error(`profiles.${id}.imagePullPolicy must be Always, IfNotPresent, or Never`); + if (workload !== "k3s-build" && workload !== "k3s-real-deps") throw new Error(`profiles.${id}.workload must be k3s-build or k3s-real-deps`); + const imagePullPolicy = imagePullPolicyField(raw, "imagePullPolicy", `profiles.${id}`); const noMirror = asRecord(raw.noMirror, `profiles.${id}.noMirror`); const registryMirror = stringField(noMirror, "registryMirror", `profiles.${id}.noMirror`); if (registryMirror !== "forbidden") throw new Error(`profiles.${id}.noMirror.registryMirror must be forbidden`); @@ -765,6 +1130,34 @@ function profileSpec(id: string, raw: Record): K3sBuildBenchmar chunks: integerField(dependencyDownload, "chunks", `profiles.${id}.dependencyDownload`), expectedMiB: integerField(dependencyDownload, "expectedMiB", `profiles.${id}.dependencyDownload`), }, + realDeps: raw.realDeps === undefined ? undefined : realDepsSpec(asRecord(raw.realDeps, `profiles.${id}.realDeps`), `profiles.${id}.realDeps`), + }; +} + +function realDepsSpec(raw: Record, path: string): K3sRealDepsSpec { + const apk = asRecord(raw.apk, `${path}.apk`); + const npm = asRecord(raw.npm, `${path}.npm`); + const go = asRecord(raw.go, `${path}.go`); + return { + minProxyMiB: integerField(raw, "minProxyMiB", path), + imagePullPolicy: imagePullPolicyField(raw, "imagePullPolicy", path), + apk: { + image: stringField(apk, "image", `${path}.apk`), + packages: stringArrayField(apk, "packages", `${path}.apk`), + expectedMiB: integerField(apk, "expectedMiB", `${path}.apk`), + }, + npm: { + image: stringField(npm, "image", `${path}.npm`), + registry: stringField(npm, "registry", `${path}.npm`), + packages: stringRecordField(npm, "packages", `${path}.npm`), + expectedMiB: integerField(npm, "expectedMiB", `${path}.npm`), + }, + go: { + image: stringField(go, "image", `${path}.go`), + goProxy: stringField(go, "goProxy", `${path}.go`), + modules: stringArrayField(go, "modules", `${path}.go`), + expectedMiB: integerField(go, "expectedMiB", `${path}.go`), + }, }; } @@ -776,9 +1169,7 @@ function targetOverridesField(raw: Record, path: string): Recor const override: K3sBuildBenchmarkTargetOverride = {}; if (recordValue.image !== undefined) override.image = stringField(recordValue, "image", `${path}.targetOverrides.${targetId}`); if (recordValue.imagePullPolicy !== undefined) { - const pullPolicy = stringField(recordValue, "imagePullPolicy", `${path}.targetOverrides.${targetId}`); - if (pullPolicy !== "Always" && pullPolicy !== "IfNotPresent" && pullPolicy !== "Never") throw new Error(`${path}.targetOverrides.${targetId}.imagePullPolicy must be Always, IfNotPresent, or Never`); - override.imagePullPolicy = pullPolicy; + override.imagePullPolicy = imagePullPolicyField(recordValue, "imagePullPolicy", `${path}.targetOverrides.${targetId}`); } return [targetId, override]; })); @@ -856,6 +1247,20 @@ function stringArrayField(obj: Record, key: string, path: strin return [...value] as string[]; } +function stringRecordField(obj: Record, key: string, path: string): Record { + const value = obj[key]; + if (typeof value !== "object" || value === null || Array.isArray(value)) throw new Error(`${path}.${key} must be an object`); + const entries = Object.entries(value as Record); + if (entries.some(([name, item]) => name.length === 0 || typeof item !== "string" || item.length === 0)) throw new Error(`${path}.${key} must contain string values`); + return Object.fromEntries(entries) as Record; +} + +function imagePullPolicyField(obj: Record, key: string, path: string): ImagePullPolicy { + const value = stringField(obj, key, path); + if (value !== "Always" && value !== "IfNotPresent" && value !== "Never") throw new Error(`${path}.${key} must be Always, IfNotPresent, or Never`); + return value; +} + function table(headers: string[], rows: string[][]): string[] { const widths = headers.map((header, index) => Math.max(header.length, ...rows.map((row) => row[index]?.length ?? 0))); const render = (row: string[]) => row.map((cell, index) => cell.padEnd(widths[index] ?? cell.length)).join(" ").trimEnd();