fix: decouple k3s benchmark bootstrap image

This commit is contained in:
Codex
2026-06-26 13:35:04 +00:00
parent 0547a6e95d
commit 4489e34c7f
3 changed files with 169 additions and 46 deletions
@@ -11,8 +11,12 @@ profiles:
enabled: true
workload: k3s-build
description: Generic k3s build benchmark with no registry/source mirror and at least 600 MiB output payload.
image: docker.io/library/debian:bookworm
imagePullPolicy: Always
image: docker.io/library/python:3.12-alpine
imagePullPolicy: IfNotPresent
targetOverrides:
D601:
image: 127.0.0.1:5000/platform-infra/sub2api-account-sentinel:python-3.12-alpine-openai-2.41.1
imagePullPolicy: IfNotPresent
payloadMiB: 600
timeoutSeconds: 3600
ttlSecondsAfterFinished: 3600
@@ -21,14 +25,7 @@ profiles:
npmRegistry: https://registry.npmjs.org/
pipIndexUrl: https://pypi.org/simple
registryMirror: forbidden
aptPackages:
- build-essential
- ca-certificates
- curl
- git
- make
- pkg-config
- xz-utils
aptPackages: []
dependencyDownload:
enabled: true
url: https://speed.cloudflare.com/__down?bytes=67108864
@@ -6,12 +6,16 @@ This SPEC replaces the earlier HWLAB full-CI dependency for pikasTech/unidesk#10
The first profile is `no-mirror-600m` in `config/platform-infra/egress-proxy-benchmarks.yaml`. It runs a fresh k3s Job per node, uses an emptyDir workspace, forbids benchmark reuse, keeps registry/source mirror settings out of the workload, and produces at least 600 MiB of build output. The profile also downloads an explicit no-mirror dependency payload so proxyserver traffic can be observed during the run.
The benchmark separates bootstrap image availability from the measured workload. A profile may define target-scoped `targetOverrides.<target>.image` and `imagePullPolicy` when a node cannot pull the default bootstrap image directly before pod proxy variables exist. These overrides must be generic platform-infra images, not HWLAB application images, and the measured workload must still use the same no-mirror download and 500MiB+ output rules.
## Architecture
`platform-infra egress-proxy k3s-build-benchmark` is the coordinator. It reads platform-infra targets from `config/platform-infra/sub2api.yaml`, reads workload profiles from `config/platform-infra/egress-proxy-benchmarks.yaml`, renders one Kubernetes Job per target, and uses `trans <target.route> sh -- ...` as the short control path.
The workload Job runs in the target platform-infra namespace and receives proxy environment variables that point at the YAML-declared `sub2api-egress-proxy` service. The existing `platform-infra egress-proxy traffic` sampler remains the proxyserver-side observability source. Benchmark status may include a bounded traffic sample, but raw proxy credentials and Secret values are never printed.
The portable workload path uses Python from the bootstrap image to download the declared dependency payload and generate the build artifact. It does not rewrite apt, apk, npm, pip, or registry sources. Debian/apt bootstrap images remain supported for future profiles, but apt is not required for the default cross-node benchmark.
## Data Model
Stable benchmark records contain:
@@ -20,6 +24,7 @@ Stable benchmark records contain:
- `profile`: benchmark profile id, initially `no-mirror-600m`.
- `runId` and `jobName`: k3s Job identity.
- `image`, `payloadMiB`, `dependencyDownload.expectedMiB`, `noMirror`: profile facts from YAML.
- `targetOverrides`: optional target-specific bootstrap image facts; these are not measured dependency mirrors.
- `state`: `pending`, `running`, `succeeded`, `failed`, or `missing`.
- `startedAt`, `completedAt`, `durationSeconds`: Job timing.
- `outputMiB`, `downloadMiB`: workload evidence parsed from Job logs.
@@ -39,7 +44,7 @@ Stable benchmark records contain:
This benchmark is not a HWLAB application CI result and must not be reported as PipelineRun timing. It measures a generic k3s build workload under the node-local platform-infra egress proxy. It is valid for cross-node proxy path and build egress performance comparison because every target runs the same YAML profile and the same Job template.
The workload must stay no-mirror: do not rewrite apt sources to regional mirrors, do not use npm mirror registries, do not configure Docker registry mirrors, and do not reuse a previous Job, PVC, or dependency cache. The configured profile must keep output payload at or above 500 MiB.
The workload must stay no-mirror: do not rewrite apt/apk sources to regional mirrors, do not use npm mirror registries, do not configure Docker registry mirrors, and do not reuse a previous Job, PVC, or dependency cache. The configured profile must keep output payload at or above 500 MiB.
## Acceptance
+156 -35
View File
@@ -13,6 +13,7 @@ const BENCHMARK_CONFIG_PATH = "config/platform-infra/egress-proxy-benchmarks.yam
const BENCHMARK_APP = "unidesk-k3s-build-benchmark";
type K3sBuildAction = "start" | "status" | "logs";
type ImagePullPolicy = "Always" | "IfNotPresent" | "Never";
interface K3sBuildBenchmarkOptions {
action: K3sBuildAction;
@@ -31,7 +32,8 @@ interface K3sBuildBenchmarkProfile {
workload: "k3s-build";
description: string;
image: string;
imagePullPolicy: "Always" | "IfNotPresent" | "Never";
imagePullPolicy: ImagePullPolicy;
targetOverrides: Record<string, K3sBuildBenchmarkTargetOverride>;
payloadMiB: number;
timeoutSeconds: number;
ttlSecondsAfterFinished: number;
@@ -50,6 +52,11 @@ interface K3sBuildBenchmarkProfile {
};
}
interface K3sBuildBenchmarkTargetOverride {
image?: string;
imagePullPolicy?: ImagePullPolicy;
}
interface K3sBuildBenchmarkConfig {
version: number;
kind: string;
@@ -270,6 +277,8 @@ function renderDryRun(plans: readonly TargetPlan[], options: K3sBuildBenchmarkOp
plan.target?.route ?? "-",
plan.target?.namespace ?? "-",
plan.target?.egressProxy?.serviceName ?? "-",
plan.target !== undefined && plan.profile !== undefined ? effectiveImage(plan.target, plan.profile).image : "-",
plan.target !== undefined && plan.profile !== undefined ? effectiveImage(plan.target, plan.profile).imagePullPolicy : "-",
plan.profile === undefined ? "-" : `${plan.profile.payloadMiB}MiB`,
plan.profile === undefined ? "-" : `${plan.profile.dependencyDownload.expectedMiB}MiB`,
plan.detail ?? "no-mirror emptyDir unique-job",
@@ -281,7 +290,7 @@ function renderDryRun(plans: readonly TargetPlan[], options: K3sBuildBenchmarkOp
renderedText: [
"PLATFORM-INFRA K3S BUILD BENCHMARK DRY-RUN",
"",
...table(["TARGET", "PROFILE", "STATUS", "ROUTE", "NAMESPACE", "PROXY", "PAYLOAD", "DOWNLOAD", "DETAIL"], rows),
...table(["TARGET", "PROFILE", "STATUS", "ROUTE", "NAMESPACE", "PROXY", "IMAGE", "PULL", "PAYLOAD", "DOWNLOAD", "DETAIL"], rows),
"",
"NEXT",
` bun scripts/cli.ts platform-infra egress-proxy k3s-build-benchmark --targets ${plans.map((plan) => plan.targetId).join(",")} --profile ${options.profile} --confirm`,
@@ -296,6 +305,7 @@ function benchmarkJobManifest(target: Sub2ApiTargetConfig, profile: K3sBuildBenc
if (proxy === null) throw new Error(`target ${target.id} has no egressProxy`);
const proxyUrl = `http://${proxy.serviceName}.${target.namespace}.svc.cluster.local:${proxy.listenPort}`;
const noProxy = proxy.noProxy.join(",");
const image = effectiveImage(target, profile);
return {
apiVersion: "batch/v1",
kind: "Job",
@@ -307,6 +317,8 @@ function benchmarkJobManifest(target: Sub2ApiTargetConfig, profile: K3sBuildBenc
"unidesk.ai/no-mirror": JSON.stringify(profile.noMirror),
"unidesk.ai/payload-mib": String(profile.payloadMiB),
"unidesk.ai/dependency-download-mib": String(profile.dependencyDownload.expectedMiB),
"unidesk.ai/bootstrap-image": image.image,
"unidesk.ai/bootstrap-image-pull-policy": image.imagePullPolicy,
},
},
spec: {
@@ -319,8 +331,8 @@ function benchmarkJobManifest(target: Sub2ApiTargetConfig, profile: K3sBuildBenc
restartPolicy: "Never",
containers: [{
name: "build",
image: profile.image,
imagePullPolicy: profile.imagePullPolicy,
image: image.image,
imagePullPolicy: image.imagePullPolicy,
command: ["/bin/sh", "-lc"],
args: [workloadScript(profile)],
env: [
@@ -357,20 +369,23 @@ function workloadScript(profile: K3sBuildBenchmarkProfile): string {
return `set -eu
started_epoch="$(date +%s)"
started_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
export BENCHMARK_STARTED_EPOCH="$started_epoch"
export BENCHMARK_STARTED_AT="$started_at"
work=/work/k3s-build-benchmark
download_dir="$work/download"
build_dir="$work/build"
output_dir="$work/output"
mkdir -p "$download_dir" "$build_dir" "$output_dir"
printf 'UNIDESK_K3S_BUILD_BENCHMARK_EVENT target=%s profile=%s run=%s payloadMiB=%s expectedDownloadMiB=%s noMirror=true\\n' "$BENCHMARK_TARGET" "$BENCHMARK_PROFILE" "$BENCHMARK_RUN_ID" "$PAYLOAD_MIB" "$DOWNLOAD_EXPECTED_MIB"
if grep -R -E 'npmmirror|daocloud|aliyun|tuna|ustc' /etc/apt/sources.list /etc/apt/sources.list.d >/tmp/mirror-check.out 2>/dev/null; then
cat /tmp/mirror-check.out >&2
echo "unexpected apt mirror in base image" >&2
exit 42
fi
apt-get -o Acquire::http::No-Cache=true -o Acquire::https::No-Cache=true update
apt-get -o Acquire::http::No-Cache=true -o Acquire::https::No-Cache=true install -y --no-install-recommends $APT_PACKAGES
cat > "$build_dir/bench.c" <<'C'
if command -v apt-get >/dev/null 2>&1 && [ -n "$APT_PACKAGES" ]; then
if grep -R -E 'npmmirror|daocloud|aliyun|tuna|ustc' /etc/apt/sources.list /etc/apt/sources.list.d >/tmp/mirror-check.out 2>/dev/null; then
cat /tmp/mirror-check.out >&2
echo "unexpected apt mirror in base image" >&2
exit 42
fi
apt-get -o Acquire::http::No-Cache=true -o Acquire::https::No-Cache=true update
apt-get -o Acquire::http::No-Cache=true -o Acquire::https::No-Cache=true install -y --no-install-recommends $APT_PACKAGES
cat > "$build_dir/bench.c" <<'C'
#include <stdint.h>
#include <stdio.h>
int main(void) {
@@ -383,31 +398,112 @@ int main(void) {
return 0;
}
C
cc -O2 "$build_dir/bench.c" -o "$build_dir/bench"
"$build_dir/bench" > "$output_dir/compile-result.txt"
if [ "${profile.dependencyDownload.enabled ? "1" : "0"}" = "1" ]; then
i=1
while [ "$i" -le "$DOWNLOAD_CHUNKS" ]; do
curl -fL --retry 2 --connect-timeout 15 --max-time 240 "$DOWNLOAD_URL" -o "$download_dir/chunk-$i.bin"
i=$((i + 1))
done
cc -O2 "$build_dir/bench.c" -o "$build_dir/bench"
"$build_dir/bench" > "$output_dir/compile-result.txt"
fi
download_mib="$(du -sm "$download_dir" | awk '{print $1}')"
rm -rf "$download_dir"
dd if=/dev/zero of="$output_dir/payload.bin" bs=1M count="$PAYLOAD_MIB" status=none
sha256sum "$output_dir/payload.bin" > "$output_dir/payload.sha256"
output_mib="$(du -sm "$output_dir" | awk '{print $1}')"
if [ "$output_mib" -lt 500 ]; then
echo "payload-too-small outputMiB=$output_mib" >&2
exit 43
pybin=""
if command -v python3 >/dev/null 2>&1; then pybin=python3; elif command -v python >/dev/null 2>&1; then pybin=python; fi
if [ -z "$pybin" ]; then
echo "python-runtime-missing" >&2
exit 44
fi
completed_epoch="$(date +%s)"
completed_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
duration_seconds=$((completed_epoch - started_epoch))
printf 'UNIDESK_K3S_BUILD_BENCHMARK_RESULT {"ok":true,"target":"%s","profile":"%s","runId":"%s","startedAt":"%s","completedAt":"%s","durationSeconds":%s,"payloadMiB":%s,"downloadMiB":%s,"downloadExpectedMiB":%s,"outputMiB":%s,"noMirror":true,"aptMirror":"system-default","npmRegistry":"%s","pipIndexUrl":"%s"}\\n' "$BENCHMARK_TARGET" "$BENCHMARK_PROFILE" "$BENCHMARK_RUN_ID" "$started_at" "$completed_at" "$duration_seconds" "$PAYLOAD_MIB" "$download_mib" "$DOWNLOAD_EXPECTED_MIB" "$output_mib" "$NPM_CONFIG_REGISTRY" "$PIP_INDEX_URL"
"$pybin" - <<'PY'
import hashlib
import json
import math
import os
import pathlib
import shutil
import sys
import time
import urllib.request
started_epoch = int(os.environ.get("BENCHMARK_STARTED_EPOCH", "0") or "0") or int(time.time())
started_at = os.environ.get("BENCHMARK_STARTED_AT") or time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(started_epoch))
payload_mib = int(os.environ["PAYLOAD_MIB"])
download_chunks = int(os.environ["DOWNLOAD_CHUNKS"])
download_expected_mib = int(os.environ["DOWNLOAD_EXPECTED_MIB"])
download_url = os.environ["DOWNLOAD_URL"]
download_enabled = "${profile.dependencyDownload.enabled ? "1" : "0"}" == "1"
download_dir = pathlib.Path("/work/k3s-build-benchmark/download")
build_dir = pathlib.Path("/work/k3s-build-benchmark/build")
output_dir = pathlib.Path("/work/k3s-build-benchmark/output")
download_dir.mkdir(parents=True, exist_ok=True)
build_dir.mkdir(parents=True, exist_ok=True)
output_dir.mkdir(parents=True, exist_ok=True)
def mib_from_bytes(value):
return int(math.ceil(value / 1048576))
download_bytes = 0
if download_enabled:
for index in range(1, download_chunks + 1):
destination = download_dir / f"chunk-{index}.bin"
request = urllib.request.Request(download_url, headers={"User-Agent": "curl/8.5.0", "Accept": "*/*"})
with urllib.request.urlopen(request, timeout=240) as response, destination.open("wb") as handle:
while True:
block = response.read(1024 * 1024)
if not block:
break
handle.write(block)
download_bytes += len(block)
print(f"UNIDESK_K3S_BUILD_BENCHMARK_DOWNLOAD chunk={index} bytes={destination.stat().st_size}", flush=True)
source_file = build_dir / "portable_build_source.py"
source_file.write_text("VALUE = 'unidesk-k3s-build-benchmark'\\n", encoding="utf-8")
__import__("py_compile").compile(str(source_file), cfile=str(build_dir / "portable_build_source.pyc"), doraise=True)
payload = output_dir / "payload.bin"
digest = hashlib.sha256()
seed = hashlib.sha256(f"{os.environ['BENCHMARK_TARGET']}:{os.environ['BENCHMARK_RUN_ID']}".encode("utf-8")).digest()
block = (seed * ((1024 * 1024 // len(seed)) + 1))[:1024 * 1024]
with payload.open("wb") as handle:
for index in range(payload_mib):
digest.update(block)
handle.write(block)
if index % 64 == 0:
handle.flush()
(output_dir / "payload.sha256").write_text(digest.hexdigest() + " payload.bin\\n", encoding="utf-8")
shutil.rmtree(download_dir, ignore_errors=True)
output_bytes = sum(path.stat().st_size for path in output_dir.rglob("*") if path.is_file())
output_mib = mib_from_bytes(output_bytes)
download_mib = mib_from_bytes(download_bytes)
if output_mib < 500:
print(f"payload-too-small outputMiB={output_mib}", file=sys.stderr)
sys.exit(43)
completed_epoch = int(time.time())
result = {
"ok": True,
"target": os.environ["BENCHMARK_TARGET"],
"profile": os.environ["BENCHMARK_PROFILE"],
"runId": os.environ["BENCHMARK_RUN_ID"],
"startedAt": started_at,
"completedAt": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(completed_epoch)),
"durationSeconds": completed_epoch - started_epoch,
"payloadMiB": payload_mib,
"downloadMiB": download_mib,
"downloadExpectedMiB": download_expected_mib,
"outputMiB": output_mib,
"noMirror": True,
"packageManager": "none" if not os.environ.get("APT_PACKAGES") else "apt",
"aptMirror": "not-used" if not os.environ.get("APT_PACKAGES") else "system-default",
"npmRegistry": os.environ["NPM_CONFIG_REGISTRY"],
"pipIndexUrl": os.environ["PIP_INDEX_URL"],
}
print("UNIDESK_K3S_BUILD_BENCHMARK_RESULT " + json.dumps(result, sort_keys=True), flush=True)
PY
`;
}
function effectiveImage(target: Sub2ApiTargetConfig, profile: K3sBuildBenchmarkProfile): { image: string; imagePullPolicy: ImagePullPolicy } {
const override = profile.targetOverrides[target.id] ?? profile.targetOverrides[target.id.toLowerCase()] ?? profile.targetOverrides[target.id.toUpperCase()];
return {
image: override?.image ?? profile.image,
imagePullPolicy: override?.imagePullPolicy ?? profile.imagePullPolicy,
};
}
function benchmarkLabels(target: Sub2ApiTargetConfig, profile: K3sBuildBenchmarkProfile, runId: string): Record<string, string> {
return {
"app.kubernetes.io/name": BENCHMARK_APP,
@@ -464,6 +560,14 @@ pods_result = kubectl(["get", "pods", "-l", "job-name=" + job_name, "-o", "json"
pods = json.loads(pods_result.stdout or "{}").get("items", []) if pods_result.returncode == 0 else []
pods.sort(key=lambda item: item.get("metadata", {}).get("creationTimestamp", ""))
pod_name = pods[-1].get("metadata", {}).get("name") if pods else None
waiting_reasons = []
if pods:
for container_status in (pods[-1].get("status", {}) or {}).get("containerStatuses", []) or []:
waiting = ((container_status.get("state") or {}).get("waiting") or {})
if waiting:
reason = waiting.get("reason") or "waiting"
message = waiting.get("message") or ""
waiting_reasons.append((reason + " " + message).strip())
logs = ""
if pod_name:
logs_result = kubectl(["logs", pod_name, "--tail", str(tail_lines)])
@@ -493,14 +597,14 @@ elif active:
else:
state = "pending"
failure_family = "none" if state == "succeeded" else ("in-progress" if state in ("running", "pending") else "unknown")
tail_text = (full_logs or logs)[-4000:]
tail_text = "\\n".join(waiting_reasons + [(full_logs or logs)[-4000:]])
if state == "missing":
failure_family = "job-missing"
elif "ImagePullBackOff" in tail_text or "ErrImagePull" in tail_text:
failure_family = "image-pull"
elif "apt-get" in tail_text and ("Failed" in tail_text or "Unable to" in tail_text):
failure_family = "apt-download"
elif "curl:" in tail_text:
elif "curl:" in tail_text or "urllib.error.HTTPError" in tail_text or "urllib.error.URLError" in tail_text:
failure_family = "dependency-download"
elif "payload-too-small" in tail_text:
failure_family = "payload-too-small"
@@ -516,7 +620,7 @@ payload = {
"completedAt": status.get("completionTime") or (match or {}).get("completedAt"),
"result": match,
"failureFamily": failure_family,
"logTail": logs[-4000:],
"logTail": "\\n".join(waiting_reasons + [logs])[-4000:],
}
print(json.dumps(payload, ensure_ascii=False))
PY
@@ -644,6 +748,7 @@ function profileSpec(id: string, raw: Record<string, unknown>): K3sBuildBenchmar
description: stringField(raw, "description", `profiles.${id}`),
image: stringField(raw, "image", `profiles.${id}`),
imagePullPolicy,
targetOverrides: targetOverridesField(raw, `profiles.${id}`),
payloadMiB: integerField(raw, "payloadMiB", `profiles.${id}`),
timeoutSeconds: integerField(raw, "timeoutSeconds", `profiles.${id}`),
ttlSecondsAfterFinished: integerField(raw, "ttlSecondsAfterFinished", `profiles.${id}`),
@@ -663,6 +768,22 @@ function profileSpec(id: string, raw: Record<string, unknown>): K3sBuildBenchmar
};
}
function targetOverridesField(raw: Record<string, unknown>, path: string): Record<string, K3sBuildBenchmarkTargetOverride> {
if (raw.targetOverrides === undefined) return {};
const overrides = asRecord(raw.targetOverrides, `${path}.targetOverrides`);
return Object.fromEntries(Object.entries(overrides).map(([targetId, value]) => {
const recordValue = asRecord(value, `${path}.targetOverrides.${targetId}`);
const override: K3sBuildBenchmarkTargetOverride = {};
if (recordValue.image !== undefined) override.image = stringField(recordValue, "image", `${path}.targetOverrides.${targetId}`);
if (recordValue.imagePullPolicy !== undefined) {
const pullPolicy = stringField(recordValue, "imagePullPolicy", `${path}.targetOverrides.${targetId}`);
if (pullPolicy !== "Always" && pullPolicy !== "IfNotPresent" && pullPolicy !== "Never") throw new Error(`${path}.targetOverrides.${targetId}.imagePullPolicy must be Always, IfNotPresent, or Never`);
override.imagePullPolicy = pullPolicy;
}
return [targetId, override];
}));
}
function runTrans(route: string, script: string, timeoutSeconds: number): CommandResult {
return runCommand(["/root/.local/bin/trans", route, "sh", "--", script], rootPath(), { timeoutMs: timeoutSeconds * 1000 });
}