diff --git a/AGENTS.md b/AGENTS.md index a75cfbd9..3c52831d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -2,6 +2,11 @@ UniDesk 是一个以主 server 为统一入口的分布式工作平台;本文件是项目顶级索引,也承担 `scripts/cli.ts` 的 CLI 使用说明入口。 +## P0 最高优先级:自有配置 YAML 优先规则 + +- P0: UniDesk 自有配置一律优先使用 YAML(`.yaml`/`.yml`),包括 `config/` 下的运行面、平台基础设施、节点/lane、部署参数和可调版本配置;除非外部工具硬性要求 JSON/TOML/ENV 等格式,禁止新增 JSON 作为 UniDesk 自有配置真相。 +- P0: 需要代码读取的 YAML 配置必须显式校验 schema、字段类型和必填项;禁止静默 fallback、宽松猜测或把配置藏进脚本常量,后续版本、镜像、namespace、endpoint 等可调项必须从 YAML 配置进入受控 CLI。 + ## P0 最高优先级:CaseRun 无服务与单步调试规则 - P0: CaseRun、case registry 产物整理、trace 语义化、harness 诊断、短连接 CLI 和本地/目标 host 上可直接运行的 runner 调试,默认是无服务工作流;只要不需要变更 cloud-api、web、gateway、GitOps、k3s runtime 或其他常驻服务,就必须直接无服务运行和验证,禁止为了运行 CaseRun 触发 CI/CD、rollout 或服务发布。 diff --git a/config/platform-infra/sub2api.yaml b/config/platform-infra/sub2api.yaml new file mode 100644 index 00000000..5cab6974 --- /dev/null +++ b/config/platform-infra/sub2api.yaml @@ -0,0 +1,4 @@ +image: + repository: weishaw/sub2api + tag: 0.1.135 + pullPolicy: IfNotPresent diff --git a/scripts/cli.ts b/scripts/cli.ts index fa10d227..d947c2e9 100644 --- a/scripts/cli.ts +++ b/scripts/cli.ts @@ -29,6 +29,7 @@ import { runHwlabG14Command } from "./src/hwlab-g14"; import { runHwlabNodeCommand } from "./src/hwlab-node"; import { runGcCommand } from "./src/gc"; import { runAgentRunCommand } from "./src/agentrun"; +import { runPlatformInfraCommand } from "./src/platform-infra"; const remoteOptions = extractRemoteCliOptions(process.argv.slice(2)); const args = remoteOptions.args; @@ -320,6 +321,14 @@ async function main(): Promise { return; } + if (top === "platform-infra") { + const result = await runPlatformInfraCommand(readConfig(), args.slice(1)); + const ok = (result as { ok?: unknown }).ok !== false; + emitJson(commandName, result, ok); + if (!ok) process.exitCode = 1; + return; + } + const config = readConfig(); const autoRemoteCiPublishPlan = autoRemoteCiPublishUserServiceDryRunPlan(config, args); if (autoRemoteCiPublishPlan.enabled && autoRemoteCiPublishPlan.host !== null) { diff --git a/scripts/src/help.ts b/scripts/src/help.ts index f6485b38..1d13616c 100644 --- a/scripts/src/help.ts +++ b/scripts/src/help.ts @@ -4,6 +4,7 @@ import { hwlabHelp } from "./hwlab-cd"; import { hwlabG14Help } from "./hwlab-g14"; import { hwlabNodeHelp } from "./hwlab-node"; import { agentRunHelp } from "./agentrun"; +import { platformInfraHelp } from "./platform-infra"; export function rootHelp(): unknown { return { @@ -62,6 +63,7 @@ export function rootHelp(): unknown { { command: "hwlab nodes control-plane|git-mirror|secret --node G14 --lane v03", description: "Manage HWLAB node/lane runtime prerequisites for v0.3+ with the node identity passed as data instead of a command family." }, { command: "hwlab g14 monitor-prs | hwlab g14 control-plane status|apply|trigger-current|runtime-migration|cleanup-runs|cleanup-released-pvs | hwlab g14 git-mirror status|apply|sync|flush | hwlab g14 tools-image status|build", description: "Start the legacy G14 PR monitor, run bounded v0.2 Tekton/Argo control-plane, manual PipelineRun trigger, runtime migration, CI workspace retention, manual devops-infra git mirror/relay maintenance, or fixed HWLAB CI tools image actions; long confirmed trigger/sync/flush actions return async jobs by default." }, { command: "agentrun v01 queue|sessions|control-plane|git-mirror", description: "Use AgentRun v0.1 Queue and Sessions as the active commander entry through the official G14 CLI bridge, plus bounded Tekton/Argo and git-mirror operations." }, + { command: "platform-infra sub2api plan|apply|status|validate", description: "Deploy and validate the G14 k3s internal-only Sub2API service in the platform-infra namespace through the controlled UniDesk CLI." }, { command: "hwlab cd audit --env dev | hwlab cd status --env dev | hwlab cd apply --env dev --dry-run", description: "Legacy D601 HWLAB DEV CD wrapper kept for explicit old-path diagnostics; current HWLAB rollout uses G14 GitOps." }, { command: "code-agent-sandbox", description: "Independent Code Agent Sandbox service skeleton for adapter, mode, and credential-boundary diagnostics." }, { command: "schedule list|get|runs|run|retry-run|delete", description: "Manage backend-core scheduled tasks and run history; schedule run supports --wait-ms N and retry-run reuses the failed run's schedule." }, @@ -615,6 +617,7 @@ export function staticNamespaceHelp(args: string[]): unknown | null { if (top === "auth-broker") return authBrokerHelp(); if (top === "gh") return ghHelp(); if (top === "agentrun") return agentRunHelp(); + if (top === "platform-infra") return platformInfraHelp(); if (top === "hwlab" && (sub === "node" || sub === "nodes")) return hwlabNodeHelp(); if (top === "hwlab" && sub === "g14") return hwlabG14Help(); if (top === "hwlab") return hwlabHelp(); diff --git a/scripts/src/platform-infra.ts b/scripts/src/platform-infra.ts new file mode 100644 index 00000000..4fccd08c --- /dev/null +++ b/scripts/src/platform-infra.ts @@ -0,0 +1,787 @@ +import { readFileSync } from "node:fs"; +import type { UniDeskConfig } from "./config"; +import { rootPath } from "./config"; +import { startJob } from "./jobs"; +import { runSshCommandCapture, type SshCaptureResult } from "./ssh"; + +const g14K3sRoute = "G14:k3s"; +const namespace = "platform-infra"; +const serviceName = "sub2api"; +const fieldManager = "unidesk-platform-infra"; +const manifestPath = rootPath("src", "components", "platform-infra", "sub2api", "sub2api.k8s.yaml"); +const configPath = rootPath("config", "platform-infra", "sub2api.yaml"); +const secretName = "sub2api-secrets"; +const requiredSecretKeys = ["POSTGRES_PASSWORD", "ADMIN_PASSWORD", "JWT_SECRET", "TOTP_ENCRYPTION_KEY"] as const; + +interface Sub2ApiConfig { + image: { + repository: string; + tag: string; + pullPolicy: "Always" | "IfNotPresent" | "Never"; + }; +} + +export function platformInfraHelp(): unknown { + return { + command: "platform-infra sub2api plan|apply|status|validate", + output: "json", + usage: [ + "bun scripts/cli.ts platform-infra sub2api plan", + "bun scripts/cli.ts platform-infra sub2api apply --dry-run", + "bun scripts/cli.ts platform-infra sub2api apply --confirm", + "bun scripts/cli.ts platform-infra sub2api status [--full|--raw]", + "bun scripts/cli.ts platform-infra sub2api validate [--full|--raw]", + ], + description: "Operate the G14 k3s internal-only Sub2API deployment in the shared platform-infra namespace. This entry creates no Ingress, NodePort, LoadBalancer, hostPort, hostNetwork, ResourceQuota, LimitRange, or CPU/memory resource requests/limits.", + target: { + route: g14K3sRoute, + namespace, + service: serviceName, + serviceDns: `${serviceName}.${namespace}.svc.cluster.local:8080`, + exposure: "k3s-cluster-internal-only", + resourceLimits: "unset-by-policy", + versionConfigPath: configPath, + }, + }; +} + +export async function runPlatformInfraCommand(config: UniDeskConfig, args: string[]): Promise> { + const [target, action] = args; + if (target !== "sub2api") return unsupported(args); + if (action === "plan" || action === undefined) return plan(); + if (action === "apply") return await apply(config, parseApplyOptions(args.slice(2))); + if (action === "status") return await status(config, parseDisclosureOptions(args.slice(2))); + if (action === "validate") return await validate(config, parseDisclosureOptions(args.slice(2))); + return unsupported(args); +} + +interface ApplyOptions { + dryRun: boolean; + confirm: boolean; + wait: boolean; +} + +interface DisclosureOptions { + full: boolean; + raw: boolean; +} + +interface PolicyCheck { + name: string; + ok: boolean; + detail: string; +} + +function unsupported(args: string[]): Record { + return { + ok: false, + error: "unsupported-platform-infra-command", + args, + help: platformInfraHelp(), + }; +} + +function parseApplyOptions(args: string[]): ApplyOptions { + validateOptions(args, new Set(["--dry-run", "--confirm", "--wait"])); + if (args.includes("--dry-run") && args.includes("--confirm")) throw new Error("apply accepts only one of --dry-run or --confirm"); + return { + dryRun: args.includes("--dry-run") || !args.includes("--confirm"), + confirm: args.includes("--confirm"), + wait: args.includes("--wait"), + }; +} + +function parseDisclosureOptions(args: string[]): DisclosureOptions { + validateOptions(args, new Set(["--full", "--raw"])); + const raw = args.includes("--raw"); + return { full: raw || args.includes("--full"), raw }; +} + +function validateOptions(args: string[], booleanOptions: Set): void { + for (const arg of args) { + if (booleanOptions.has(arg)) continue; + throw new Error(`unsupported option: ${arg}`); + } +} + +function readSub2ApiConfig(): Sub2ApiConfig { + const parsed = Bun.YAML.parse(readFileSync(configPath, "utf8")) as unknown; + if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) throw new Error(`${configPath} must contain a YAML object`); + const image = (parsed as { image?: unknown }).image; + if (typeof image !== "object" || image === null || Array.isArray(image)) throw new Error(`${configPath}.image must be an object`); + const record = image as Record; + const repository = stringField(record, "repository", "image"); + const tag = stringField(record, "tag", "image"); + const pullPolicy = stringField(record, "pullPolicy", "image"); + if (pullPolicy !== "Always" && pullPolicy !== "IfNotPresent" && pullPolicy !== "Never") throw new Error(`${configPath}.image.pullPolicy must be Always, IfNotPresent, or Never`); + if (!/^[a-z0-9._/-]+(?::[0-9]+)?$/u.test(repository)) throw new Error(`${configPath}.image.repository has an unsupported format`); + if (!/^[A-Za-z0-9._-]+$/u.test(tag)) throw new Error(`${configPath}.image.tag has an unsupported format`); + return { image: { repository, tag, pullPolicy } }; +} + +function stringField(obj: Record, key: string, path: string): string { + const value = obj[key]; + if (typeof value !== "string" || value.length === 0) throw new Error(`${configPath}.${path}.${key} must be a non-empty string`); + return value; +} + +function imageRef(sub2api: Sub2ApiConfig): string { + return `${sub2api.image.repository}:${sub2api.image.tag}`; +} + +function manifest(): string { + const sub2api = readSub2ApiConfig(); + const template = readFileSync(manifestPath, "utf8"); + return template + .replaceAll("__SUB2API_IMAGE__", imageRef(sub2api)) + .replaceAll("__SUB2API_IMAGE_PULL_POLICY__", sub2api.image.pullPolicy); +} + +function plan(): Record { + const sub2api = readSub2ApiConfig(); + const yaml = manifest(); + const policy = policyChecks(yaml); + return { + ok: policy.every((check) => check.ok), + action: "platform-infra-sub2api-plan", + target: { + route: g14K3sRoute, + namespace, + manifestPath, + configPath, + fieldManager, + serviceDns: `${serviceName}.${namespace}.svc.cluster.local:8080`, + }, + config: { + image: imageRef(sub2api), + pullPolicy: sub2api.image.pullPolicy, + }, + decision: { + owner: "UniDesk", + namespace, + reason: "Sub2API is an internal shared platform utility for G14 k3s workloads, so it belongs with platform infrastructure rather than a user workload namespace.", + exposure: "ClusterIP only; no public ingress or node-level exposure.", + resourcePolicy: "No Kubernetes CPU/memory requests or limits, matching issue #220.", + imageVersionControl: "Sub2API image repository/tag/pullPolicy are controlled by config/platform-infra/sub2api.yaml in the UniDesk repository.", + dataStores: ["PostgreSQL 18", "Redis 8"], + appPoolCaps: { + databaseMaxOpenConns: 10, + databaseMaxIdleConns: 2, + redisPoolSize: 32, + redisMinIdleConns: 2, + }, + }, + policy, + next: { + dryRun: "bun scripts/cli.ts platform-infra sub2api apply --dry-run", + apply: "bun scripts/cli.ts platform-infra sub2api apply --confirm", + status: "bun scripts/cli.ts platform-infra sub2api status", + validate: "bun scripts/cli.ts platform-infra sub2api validate", + }, + }; +} + +async function apply(config: UniDeskConfig, options: ApplyOptions): Promise> { + const yaml = manifest(); + const policy = policyChecks(yaml); + if (!policy.every((check) => check.ok)) { + return { + ok: false, + action: "platform-infra-sub2api-apply", + mode: "policy-blocked", + policy, + }; + } + if (options.confirm && !options.wait) { + const job = startJob( + "platform_infra_sub2api_apply", + ["bun", "scripts/cli.ts", "platform-infra", "sub2api", "apply", "--confirm", "--wait"], + "Apply G14 k3s platform-infra Sub2API manifests through the controlled UniDesk CLI", + ); + return { + ok: true, + action: "platform-infra-sub2api-apply", + mode: "async-job", + job, + statusCommand: `bun scripts/cli.ts job status ${job.id} --tail-bytes 12000`, + next: { + status: `bun scripts/cli.ts job status ${job.id} --tail-bytes 12000`, + rollout: "bun scripts/cli.ts platform-infra sub2api status", + validate: "bun scripts/cli.ts platform-infra sub2api validate", + }, + }; + } + if (options.dryRun) { + const result = await capture(config, g14K3sRoute, ["script"], dryRunScript(yaml)); + const parsed = parseJsonOutput(result.stdout); + return { + ok: result.exitCode === 0 && boolField(parsed, "ok", false), + action: "platform-infra-sub2api-apply", + mode: "dry-run", + policy, + remote: parsed ?? compactCapture(result, { full: true }), + }; + } + const result = await capture(config, g14K3sRoute, ["script"], applyScript(yaml)); + const parsed = parseJsonOutput(result.stdout); + return { + ok: result.exitCode === 0 && boolField(parsed, "ok", false), + action: "platform-infra-sub2api-apply", + mode: "confirmed", + policy, + remote: parsed ?? compactCapture(result, { full: true }), + next: { + status: "bun scripts/cli.ts platform-infra sub2api status", + validate: "bun scripts/cli.ts platform-infra sub2api validate", + }, + }; +} + +async function status(config: UniDeskConfig, options: DisclosureOptions): Promise> { + const result = await capture(config, g14K3sRoute, ["script"], statusScript(imageRef(readSub2ApiConfig()))); + const parsed = parseJsonOutput(result.stdout); + if (options.raw) { + return { + ok: result.exitCode === 0 && boolField(parsed, "ok", false), + action: "platform-infra-sub2api-status", + remote: compactCapture(result, { full: true }), + parsed, + }; + } + return { + ok: result.exitCode === 0 && boolField(parsed, "ok", false), + action: "platform-infra-sub2api-status", + summary: parsed, + remote: compactCapture(result, { full: options.full || result.exitCode !== 0 }), + }; +} + +async function validate(config: UniDeskConfig, options: DisclosureOptions): Promise> { + const result = await capture(config, g14K3sRoute, ["script"], validateScript()); + const parsed = parseJsonOutput(result.stdout); + if (options.raw) { + return { + ok: result.exitCode === 0 && boolField(parsed, "ok", false), + action: "platform-infra-sub2api-validate", + remote: compactCapture(result, { full: true }), + parsed, + }; + } + return { + ok: result.exitCode === 0 && boolField(parsed, "ok", false), + action: "platform-infra-sub2api-validate", + summary: parsed, + remote: compactCapture(result, { full: options.full || result.exitCode !== 0 }), + }; +} + +function policyChecks(yaml: string): PolicyCheck[] { + return [ + { + name: "no-ingress", + ok: !/^\s*kind:\s*Ingress\s*$/mu.test(yaml), + detail: "Sub2API must not be exposed through Kubernetes Ingress.", + }, + { + name: "no-nodeport-or-loadbalancer", + ok: !/^\s*type:\s*(NodePort|LoadBalancer)\s*$/mu.test(yaml), + detail: "Services must remain ClusterIP/internal-only.", + }, + { + name: "no-host-network", + ok: !/^\s*hostNetwork:\s*true\s*$/mu.test(yaml), + detail: "Pods must not join the host network.", + }, + { + name: "no-host-port", + ok: !/^\s*hostPort:\s*[0-9]+\s*$/mu.test(yaml), + detail: "Pods must not expose host ports.", + }, + { + name: "no-cpu-memory-resources", + ok: !/^\s*(cpu|memory):\s*/mu.test(yaml), + detail: "Issue #220 requires no Kubernetes CPU/memory requests or limits.", + }, + { + name: "no-resource-quota-or-limit-range", + ok: !/^\s*kind:\s*(ResourceQuota|LimitRange)\s*$/mu.test(yaml), + detail: "The platform-infra namespace must not receive quota/default limit objects for this deployment.", + }, + { + name: "expected-namespace", + ok: new RegExp(`^\\s*name:\\s*${namespace}\\s*$`, "mu").test(yaml), + detail: `Manifest declares namespace ${namespace}.`, + }, + ]; +} + +function dryRunScript(yaml: string): string { + const encoded = Buffer.from(yaml, "utf8").toString("base64"); + return ` +set -u +tmp="$(mktemp -d)" +trap 'rm -rf "$tmp"' EXIT +manifest="$tmp/sub2api.k8s.yaml" +printf '%s' '${encoded}' | base64 -d > "$manifest" +client_out="$tmp/client.out" +client_err="$tmp/client.err" +server_out="$tmp/server.out" +server_err="$tmp/server.err" +kubectl apply --dry-run=client -f "$manifest" >"$client_out" 2>"$client_err" +client_rc=$? +if kubectl get namespace ${namespace} >/dev/null 2>&1; then + namespace_exists=true + kubectl apply --server-side --dry-run=server --field-manager=${fieldManager} -f "$manifest" >"$server_out" 2>"$server_err" + server_rc=$? +else + namespace_exists=false + server_rc=0 + printf '%s\\n' 'server dry-run skipped because namespace does not exist yet; first apply creates it before namespaced resources' >"$server_out" + : >"$server_err" +fi +python3 - "$client_rc" "$server_rc" "$namespace_exists" "$client_out" "$client_err" "$server_out" "$server_err" <<'PY' +import json +import sys +client_rc = int(sys.argv[1]) +server_rc = int(sys.argv[2]) +namespace_exists = sys.argv[3] == "true" +paths = sys.argv[4:] +def text(path): + try: + return open(path, encoding="utf-8").read() + except FileNotFoundError: + return "" +payload = { + "ok": client_rc == 0 and server_rc == 0, + "namespace": "${namespace}", + "namespaceExistsBeforeDryRun": namespace_exists, + "clientDryRun": { + "exitCode": client_rc, + "stdout": text(paths[0])[-4000:], + "stderr": text(paths[1])[-4000:], + }, + "serverDryRun": { + "exitCode": server_rc, + "disposition": "executed" if namespace_exists else "skipped-namespace-missing", + "stdout": text(paths[2])[-4000:], + "stderr": text(paths[3])[-4000:], + }, +} +print(json.dumps(payload, ensure_ascii=False, indent=2)) +sys.exit(0 if payload["ok"] else 1) +PY +`; +} + +function applyScript(yaml: string): string { + const encoded = Buffer.from(yaml, "utf8").toString("base64"); + return ` +set -u +tmp="$(mktemp -d)" +trap 'rm -rf "$tmp"' EXIT +manifest="$tmp/sub2api.k8s.yaml" +printf '%s' '${encoded}' | base64 -d > "$manifest" +ns_out="$tmp/ns.out" +ns_err="$tmp/ns.err" +secret_out="$tmp/secret.out" +secret_err="$tmp/secret.err" +apply_out="$tmp/apply.out" +apply_err="$tmp/apply.err" +kubectl create namespace ${namespace} --dry-run=client -o yaml | kubectl apply --server-side --force-conflicts --field-manager=${fieldManager} -f - >"$ns_out" 2>"$ns_err" +ns_rc=$? +secret_action="unknown" +secret_rc=0 +if [ "$ns_rc" -eq 0 ]; then + if kubectl -n ${namespace} get secret ${secretName} >/dev/null 2>&1; then + secret_action="kept-existing" + : >"$secret_out" + : >"$secret_err" + else + rand_hex() { + bytes="$1" + if command -v openssl >/dev/null 2>&1; then + openssl rand -hex "$bytes" + else + dd if=/dev/urandom bs="$bytes" count=1 2>/dev/null | od -An -tx1 | tr -d ' \\n' + fi + } + kubectl -n ${namespace} create secret generic ${secretName} \\ + --from-literal=POSTGRES_PASSWORD="$(rand_hex 32)" \\ + --from-literal=ADMIN_PASSWORD="$(rand_hex 16)" \\ + --from-literal=JWT_SECRET="$(rand_hex 32)" \\ + --from-literal=TOTP_ENCRYPTION_KEY="$(rand_hex 32)" \\ + --dry-run=client -o yaml | kubectl apply --server-side --force-conflicts --field-manager=${fieldManager} -f - >"$secret_out" 2>"$secret_err" + secret_rc=$? + secret_action="created" + fi +fi +apply_rc=1 +if [ "$ns_rc" -eq 0 ] && [ "$secret_rc" -eq 0 ]; then + kubectl apply --server-side --force-conflicts --field-manager=${fieldManager} -f "$manifest" >"$apply_out" 2>"$apply_err" + apply_rc=$? +else + : >"$apply_out" + printf '%s\\n' 'skipped because namespace or secret step failed' >"$apply_err" +fi +python3 - "$ns_rc" "$secret_rc" "$apply_rc" "$secret_action" "$ns_out" "$ns_err" "$secret_out" "$secret_err" "$apply_out" "$apply_err" <<'PY' +import json +import sys +ns_rc = int(sys.argv[1]) +secret_rc = int(sys.argv[2]) +apply_rc = int(sys.argv[3]) +secret_action = sys.argv[4] +paths = sys.argv[5:] +def text(path): + try: + return open(path, encoding="utf-8").read() + except FileNotFoundError: + return "" +payload = { + "ok": ns_rc == 0 and secret_rc == 0 and apply_rc == 0, + "namespace": "${namespace}", + "secret": { + "name": "${secretName}", + "action": secret_action, + "requiredKeys": ${JSON.stringify(requiredSecretKeys)}, + "valuesPrinted": False, + }, + "steps": { + "namespace": {"exitCode": ns_rc, "stdout": text(paths[0])[-4000:], "stderr": text(paths[1])[-4000:]}, + "secret": {"exitCode": secret_rc, "stdout": text(paths[2])[-4000:], "stderr": text(paths[3])[-4000:]}, + "apply": {"exitCode": apply_rc, "stdout": text(paths[4])[-8000:], "stderr": text(paths[5])[-4000:]}, + }, +} +print(json.dumps(payload, ensure_ascii=False, indent=2)) +sys.exit(0 if payload["ok"] else 1) +PY +`; +} + +function statusScript(expectedImage: string): string { + return ` +set -u +tmp="$(mktemp -d)" +trap 'rm -rf "$tmp"' EXIT +capture_json() { + name="$1" + shift + "$@" -o json >"$tmp/$name.json" 2>"$tmp/$name.err" + rc=$? + printf '%s' "$rc" >"$tmp/$name.rc" +} +capture_json ns kubectl get namespace ${namespace} +capture_json deployments kubectl -n ${namespace} get deployments -l app.kubernetes.io/part-of=platform-infra +capture_json statefulsets kubectl -n ${namespace} get statefulsets -l app.kubernetes.io/part-of=platform-infra +capture_json pods kubectl -n ${namespace} get pods -l app.kubernetes.io/part-of=platform-infra +capture_json services kubectl -n ${namespace} get services -l app.kubernetes.io/part-of=platform-infra +capture_json pvc kubectl -n ${namespace} get pvc -l app.kubernetes.io/part-of=platform-infra +capture_json secrets kubectl -n ${namespace} get secret ${secretName} +capture_json ingresses kubectl -n ${namespace} get ingress +capture_json quotas kubectl -n ${namespace} get resourcequota +capture_json limitranges kubectl -n ${namespace} get limitrange +python3 - "$tmp" <<'PY' +import json +import os +import sys + +tmp = sys.argv[1] + +def rc(name): + try: + return int(open(os.path.join(tmp, f"{name}.rc"), encoding="utf-8").read() or "1") + except FileNotFoundError: + return 1 + +def load(name): + path = os.path.join(tmp, f"{name}.json") + if not os.path.exists(path): + return None + try: + return json.load(open(path, encoding="utf-8")) + except json.JSONDecodeError: + return None + +def items(name): + data = load(name) + if not isinstance(data, dict): + return [] + return data.get("items") or [] + +def deployment_summary(item): + spec = item.get("spec") or {} + status = item.get("status") or {} + desired = spec.get("replicas", 1) + available = status.get("availableReplicas", 0) + init_containers = ((spec.get("template") or {}).get("spec") or {}).get("initContainers", []) + containers = ((spec.get("template") or {}).get("spec") or {}).get("containers", []) + return { + "name": item["metadata"]["name"], + "desired": desired, + "readyReplicas": status.get("readyReplicas", 0), + "availableReplicas": available, + "updatedReplicas": status.get("updatedReplicas", 0), + "ready": available >= desired, + "images": [c.get("image") for c in containers], + "initImages": [c.get("image") for c in init_containers], + } + +def statefulset_summary(item): + spec = item.get("spec") or {} + status = item.get("status") or {} + desired = spec.get("replicas", 1) + ready = status.get("readyReplicas", 0) + return { + "name": item["metadata"]["name"], + "desired": desired, + "readyReplicas": ready, + "currentReplicas": status.get("currentReplicas", 0), + "updatedReplicas": status.get("updatedReplicas", 0), + "ready": ready >= desired, + "images": [c.get("image") for c in ((spec.get("template") or {}).get("spec") or {}).get("containers", [])], + } + +def pod_summary(item): + status = item.get("status") or {} + container_statuses = status.get("containerStatuses") or [] + return { + "name": item["metadata"]["name"], + "phase": status.get("phase"), + "ready": all((cs.get("ready") is True) for cs in container_statuses) if container_statuses else False, + "restarts": sum(int(cs.get("restartCount") or 0) for cs in container_statuses), + "nodeName": (item.get("spec") or {}).get("nodeName"), + "containers": [ + { + "name": cs.get("name"), + "ready": cs.get("ready"), + "restartCount": cs.get("restartCount"), + "image": cs.get("image"), + "state": list((cs.get("state") or {}).keys()), + } + for cs in container_statuses + ], + } + +def service_summary(item): + spec = item.get("spec") or {} + return { + "name": item["metadata"]["name"], + "type": spec.get("type", "ClusterIP"), + "clusterIP": spec.get("clusterIP"), + "ports": [ + { + "name": p.get("name"), + "port": p.get("port"), + "targetPort": p.get("targetPort"), + "nodePort": p.get("nodePort"), + } + for p in spec.get("ports", []) + ], + } + +def pvc_summary(item): + spec = item.get("spec") or {} + status = item.get("status") or {} + req = (spec.get("resources") or {}).get("requests") or {} + return { + "name": item["metadata"]["name"], + "phase": status.get("phase"), + "storageClassName": spec.get("storageClassName"), + "requestedStorage": req.get("storage"), + } + +def resource_findings(kind, collection): + findings = [] + for item in collection: + spec = item.get("spec") or {} + template_spec = ((spec.get("template") or {}).get("spec") or {}) + if template_spec.get("hostNetwork") is True: + findings.append({"kind": kind, "name": item["metadata"]["name"], "field": "hostNetwork"}) + all_containers = [(container, "containers") for container in template_spec.get("containers", [])] + [(container, "initContainers") for container in template_spec.get("initContainers", [])] + for container, container_group in all_containers: + resources = container.get("resources") or {} + if resources.get("requests"): + findings.append({"kind": kind, "name": item["metadata"]["name"], "container": container.get("name"), "containerGroup": container_group, "field": "resources.requests"}) + if resources.get("limits"): + findings.append({"kind": kind, "name": item["metadata"]["name"], "container": container.get("name"), "containerGroup": container_group, "field": "resources.limits"}) + for port in container.get("ports", []): + if "hostPort" in port: + findings.append({"kind": kind, "name": item["metadata"]["name"], "container": container.get("name"), "containerGroup": container_group, "field": "hostPort", "value": port.get("hostPort")}) + return findings + +deployments = items("deployments") +statefulsets = items("statefulsets") +services = items("services") +pods = items("pods") +pvcs = items("pvc") +secret = load("secrets") +secret_keys = sorted(((secret or {}).get("data") or {}).keys()) +missing_secret_keys = [key for key in ${JSON.stringify(requiredSecretKeys)} if key not in secret_keys] +service_violations = [] +for svc in services: + spec = svc.get("spec") or {} + if spec.get("type", "ClusterIP") != "ClusterIP": + service_violations.append({"name": svc["metadata"]["name"], "type": spec.get("type")}) + for port in spec.get("ports", []): + if "nodePort" in port: + service_violations.append({"name": svc["metadata"]["name"], "nodePort": port.get("nodePort")}) +resource_violations = resource_findings("Deployment", deployments) + resource_findings("StatefulSet", statefulsets) +expected_image = "${expectedImage}" +sub2api_deployment = next((deployment_summary(item) for item in deployments if item["metadata"]["name"] == "${serviceName}"), None) +image_aligned = sub2api_deployment is not None and expected_image in sub2api_deployment.get("images", []) +boundary = { + "internalOnly": len(service_violations) == 0 and len(items("ingresses")) == 0, + "serviceViolations": service_violations, + "ingressCount": len(items("ingresses")), + "resourceQuotaCount": len(items("quotas")), + "limitRangeCount": len(items("limitranges")), + "resourceViolations": resource_violations, +} +workload_ready = all(d["ready"] for d in map(deployment_summary, deployments)) and all(s["ready"] for s in map(statefulset_summary, statefulsets)) +payload = { + "ok": rc("ns") == 0 and workload_ready and image_aligned and boundary["internalOnly"] and len(resource_violations) == 0 and boundary["resourceQuotaCount"] == 0 and boundary["limitRangeCount"] == 0 and len(missing_secret_keys) == 0, + "namespace": "${namespace}", + "namespaceExists": rc("ns") == 0, + "deployments": [deployment_summary(item) for item in deployments], + "statefulsets": [statefulset_summary(item) for item in statefulsets], + "pods": [pod_summary(item) for item in pods], + "services": [service_summary(item) for item in services], + "pvcs": [pvc_summary(item) for item in pvcs], + "secret": { + "name": "${secretName}", + "exists": rc("secrets") == 0, + "requiredKeys": ${JSON.stringify(requiredSecretKeys)}, + "missingKeys": missing_secret_keys, + "valuesPrinted": False, + }, + "imageControl": { + "desiredImage": expected_image, + "configPath": "config/platform-infra/sub2api.yaml", + "aligned": image_aligned, + "runningImages": sub2api_deployment.get("images", []) if sub2api_deployment else [], + }, + "boundary": boundary, + "serviceDns": "${serviceName}.${namespace}.svc.cluster.local:8080", + "next": { + "apply": "bun scripts/cli.ts platform-infra sub2api apply --confirm", + "validate": "bun scripts/cli.ts platform-infra sub2api validate", + }, +} +print(json.dumps(payload, ensure_ascii=False, indent=2)) +sys.exit(0 if payload["ok"] else 1) +PY +`; +} + +function validateScript(): string { + return ` +set -u +tmp="$(mktemp -d)" +trap 'rm -rf "$tmp"' EXIT +kubectl get --raw /api/v1/namespaces/${namespace}/services/${serviceName}:8080/proxy/health >"$tmp/health.body" 2>"$tmp/health.err" +health_rc=$? +kubectl get --raw /api/v1/namespaces/${namespace}/services/${serviceName}:8080/proxy/ >"$tmp/root.body" 2>"$tmp/root.err" +root_rc=$? +pg_pod="$(kubectl -n ${namespace} get pod -l app.kubernetes.io/name=sub2api-postgres -o jsonpath='{.items[0].metadata.name}' 2>"$tmp/pg-pod.err")" +redis_pod="$(kubectl -n ${namespace} get pod -l app.kubernetes.io/name=sub2api-redis -o jsonpath='{.items[0].metadata.name}' 2>"$tmp/redis-pod.err")" +if [ -n "$pg_pod" ]; then + kubectl -n ${namespace} exec "$pg_pod" -- pg_isready -U sub2api -d sub2api -h 127.0.0.1 >"$tmp/pg.out" 2>"$tmp/pg.err" + pg_rc=$? +else + pg_rc=1 + printf '%s\\n' 'sub2api postgres pod not found' >"$tmp/pg.err" +fi +if [ -n "$redis_pod" ]; then + kubectl -n ${namespace} exec "$redis_pod" -- redis-cli ping >"$tmp/redis.out" 2>"$tmp/redis.err" + redis_rc=$? +else + redis_rc=1 + printf '%s\\n' 'sub2api redis pod not found' >"$tmp/redis.err" +fi +python3 - "$tmp" "$health_rc" "$root_rc" "$pg_rc" "$redis_rc" <<'PY' +import json +import os +import sys + +tmp = sys.argv[1] +health_rc, root_rc, pg_rc, redis_rc = [int(value) for value in sys.argv[2:]] + +def text(name, limit=4000): + path = os.path.join(tmp, name) + try: + data = open(path, encoding="utf-8", errors="replace").read() + except FileNotFoundError: + return "" + return data[-limit:] + +health_body = text("health.body", 2000) +root_body = text("root.body", 2000) +payload = { + "ok": health_rc == 0 and root_rc == 0 and pg_rc == 0 and redis_rc == 0, + "namespace": "${namespace}", + "serviceDns": "${serviceName}.${namespace}.svc.cluster.local:8080", + "checks": { + "sub2apiHealthViaKubernetesServiceProxy": { + "exitCode": health_rc, + "method": "kubectl get --raw /api/v1/namespaces/${namespace}/services/${serviceName}:8080/proxy/health", + "bodyPreview": health_body, + "stderr": text("health.err", 2000), + }, + "sub2apiRootViaKubernetesServiceProxy": { + "exitCode": root_rc, + "method": "kubectl get --raw /api/v1/namespaces/${namespace}/services/${serviceName}:8080/proxy/", + "bodyBytes": len(root_body.encode("utf-8")), + "bodyPreview": root_body[:400], + "stderr": text("root.err", 2000), + }, + "postgresPgIsReady": { + "exitCode": pg_rc, + "stdout": text("pg.out", 2000), + "stderr": text("pg.err", 2000), + }, + "redisPing": { + "exitCode": redis_rc, + "stdout": text("redis.out", 2000), + "stderr": text("redis.err", 2000), + }, + }, +} +print(json.dumps(payload, ensure_ascii=False, indent=2)) +sys.exit(0 if payload["ok"] else 1) +PY +`; +} + +async function capture(config: UniDeskConfig, target: string, args: string[], input?: string): Promise { + return await runSshCommandCapture(config, target, args, input); +} + +function parseJsonOutput(stdout: string): Record | null { + const trimmed = stdout.trim(); + if (trimmed.length === 0) return null; + const start = trimmed.indexOf("{"); + const end = trimmed.lastIndexOf("}"); + if (start === -1 || end === -1 || end <= start) return null; + try { + const parsed = JSON.parse(trimmed.slice(start, end + 1)) as unknown; + return typeof parsed === "object" && parsed !== null && !Array.isArray(parsed) ? parsed as Record : null; + } catch { + return null; + } +} + +function boolField(value: Record | null, key: string, defaultValue: boolean): boolean { + if (value === null) return defaultValue; + const field = value[key]; + return typeof field === "boolean" ? field : defaultValue; +} + +function compactCapture(result: SshCaptureResult, options: { full?: boolean } = {}): Record { + const full = options.full ?? false; + return { + exitCode: result.exitCode, + stdoutBytes: Buffer.byteLength(result.stdout, "utf8"), + stderrBytes: Buffer.byteLength(result.stderr, "utf8"), + stdoutTail: full || result.exitCode !== 0 ? result.stdout.slice(-8000) : "", + stderrTail: full || result.exitCode !== 0 ? result.stderr.slice(-4000) : "", + }; +} diff --git a/src/components/platform-infra/sub2api/sub2api.k8s.yaml b/src/components/platform-infra/sub2api/sub2api.k8s.yaml new file mode 100644 index 00000000..debda34c --- /dev/null +++ b/src/components/platform-infra/sub2api/sub2api.k8s.yaml @@ -0,0 +1,412 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: platform-infra + labels: + app.kubernetes.io/name: platform-infra + app.kubernetes.io/managed-by: unidesk + unidesk.ai/runtime-node: G14 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: sub2api-config + namespace: platform-infra + labels: + app.kubernetes.io/name: sub2api + app.kubernetes.io/part-of: platform-infra + app.kubernetes.io/managed-by: unidesk +data: + AUTO_SETUP: "true" + SERVER_HOST: "0.0.0.0" + SERVER_PORT: "8080" + SERVER_MODE: "release" + RUN_MODE: "standard" + DATABASE_HOST: "sub2api-postgres" + DATABASE_PORT: "5432" + DATABASE_USER: "sub2api" + DATABASE_DBNAME: "sub2api" + DATABASE_SSLMODE: "disable" + DATABASE_MAX_OPEN_CONNS: "10" + DATABASE_MAX_IDLE_CONNS: "2" + DATABASE_CONN_MAX_LIFETIME_MINUTES: "30" + DATABASE_CONN_MAX_IDLE_TIME_MINUTES: "5" + REDIS_HOST: "sub2api-redis" + REDIS_PORT: "6379" + REDIS_PASSWORD: "" + REDIS_DB: "0" + REDIS_POOL_SIZE: "32" + REDIS_MIN_IDLE_CONNS: "2" + REDIS_ENABLE_TLS: "false" + ADMIN_EMAIL: "admin@sub2api.platform-infra.local" + JWT_EXPIRE_HOUR: "24" + TZ: "Asia/Shanghai" + SECURITY_URL_ALLOWLIST_ENABLED: "false" + SECURITY_URL_ALLOWLIST_ALLOW_INSECURE_HTTP: "false" + SECURITY_URL_ALLOWLIST_ALLOW_PRIVATE_HOSTS: "false" + SECURITY_URL_ALLOWLIST_UPSTREAM_HOSTS: "" + UPDATE_PROXY_URL: "" + GATEWAY_OPENAI_RESPONSE_HEADER_TIMEOUT: "0" + GATEWAY_OPENAI_HTTP2_ENABLED: "true" + GATEWAY_OPENAI_HTTP2_ALLOW_PROXY_FALLBACK_TO_HTTP1: "true" + GATEWAY_OPENAI_HTTP2_FALLBACK_ERROR_THRESHOLD: "2" + GATEWAY_OPENAI_HTTP2_FALLBACK_WINDOW_SECONDS: "60" + GATEWAY_OPENAI_HTTP2_FALLBACK_TTL_SECONDS: "600" + GATEWAY_IMAGE_STREAM_DATA_INTERVAL_TIMEOUT: "900" + GATEWAY_IMAGE_STREAM_KEEPALIVE_INTERVAL: "10" + GATEWAY_IMAGE_CONCURRENCY_ENABLED: "false" + GATEWAY_IMAGE_CONCURRENCY_MAX_CONCURRENT_REQUESTS: "0" + GATEWAY_IMAGE_CONCURRENCY_OVERFLOW_MODE: "reject" + GATEWAY_IMAGE_CONCURRENCY_WAIT_TIMEOUT_SECONDS: "30" + GATEWAY_IMAGE_CONCURRENCY_MAX_WAITING_REQUESTS: "100" +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: sub2api-data + namespace: platform-infra + labels: + app.kubernetes.io/name: sub2api + app.kubernetes.io/component: app + app.kubernetes.io/part-of: platform-infra + app.kubernetes.io/managed-by: unidesk +spec: + accessModes: + - ReadWriteOnce + storageClassName: local-path + resources: + requests: + storage: 2Gi +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: sub2api-redis-data + namespace: platform-infra + labels: + app.kubernetes.io/name: sub2api-redis + app.kubernetes.io/component: redis + app.kubernetes.io/part-of: platform-infra + app.kubernetes.io/managed-by: unidesk +spec: + accessModes: + - ReadWriteOnce + storageClassName: local-path + resources: + requests: + storage: 2Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: sub2api-postgres + namespace: platform-infra + labels: + app.kubernetes.io/name: sub2api-postgres + app.kubernetes.io/component: postgres + app.kubernetes.io/part-of: platform-infra + app.kubernetes.io/managed-by: unidesk +spec: + selector: + app.kubernetes.io/name: sub2api-postgres + app.kubernetes.io/component: postgres + ports: + - name: postgres + port: 5432 + targetPort: postgres +--- +apiVersion: v1 +kind: Service +metadata: + name: sub2api-redis + namespace: platform-infra + labels: + app.kubernetes.io/name: sub2api-redis + app.kubernetes.io/component: redis + app.kubernetes.io/part-of: platform-infra + app.kubernetes.io/managed-by: unidesk +spec: + selector: + app.kubernetes.io/name: sub2api-redis + app.kubernetes.io/component: redis + ports: + - name: redis + port: 6379 + targetPort: redis +--- +apiVersion: v1 +kind: Service +metadata: + name: sub2api + namespace: platform-infra + labels: + app.kubernetes.io/name: sub2api + app.kubernetes.io/component: app + app.kubernetes.io/part-of: platform-infra + app.kubernetes.io/managed-by: unidesk +spec: + selector: + app.kubernetes.io/name: sub2api + app.kubernetes.io/component: app + ports: + - name: http + port: 8080 + targetPort: http +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: sub2api-postgres + namespace: platform-infra + labels: + app.kubernetes.io/name: sub2api-postgres + app.kubernetes.io/component: postgres + app.kubernetes.io/part-of: platform-infra + app.kubernetes.io/managed-by: unidesk +spec: + serviceName: sub2api-postgres + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: sub2api-postgres + app.kubernetes.io/component: postgres + template: + metadata: + labels: + app.kubernetes.io/name: sub2api-postgres + app.kubernetes.io/component: postgres + app.kubernetes.io/part-of: platform-infra + spec: + securityContext: + fsGroup: 70 + containers: + - name: postgres + image: postgres:18-alpine + imagePullPolicy: IfNotPresent + ports: + - name: postgres + containerPort: 5432 + env: + - name: POSTGRES_USER + value: sub2api + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: sub2api-secrets + key: POSTGRES_PASSWORD + - name: POSTGRES_DB + value: sub2api + - name: PGDATA + value: /var/lib/postgresql/data + - name: TZ + value: Asia/Shanghai + readinessProbe: + exec: + command: + - sh + - -c + - pg_isready -U sub2api -d sub2api -h 127.0.0.1 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + livenessProbe: + exec: + command: + - sh + - -c + - pg_isready -U sub2api -d sub2api -h 127.0.0.1 + initialDelaySeconds: 30 + periodSeconds: 20 + timeoutSeconds: 5 + failureThreshold: 6 + volumeMounts: + - name: postgres-data + mountPath: /var/lib/postgresql/data + volumeClaimTemplates: + - metadata: + name: postgres-data + labels: + app.kubernetes.io/name: sub2api-postgres + app.kubernetes.io/component: postgres + app.kubernetes.io/part-of: platform-infra + app.kubernetes.io/managed-by: unidesk + spec: + accessModes: + - ReadWriteOnce + storageClassName: local-path + resources: + requests: + storage: 10Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sub2api-redis + namespace: platform-infra + labels: + app.kubernetes.io/name: sub2api-redis + app.kubernetes.io/component: redis + app.kubernetes.io/part-of: platform-infra + app.kubernetes.io/managed-by: unidesk +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app.kubernetes.io/name: sub2api-redis + app.kubernetes.io/component: redis + template: + metadata: + labels: + app.kubernetes.io/name: sub2api-redis + app.kubernetes.io/component: redis + app.kubernetes.io/part-of: platform-infra + spec: + securityContext: + fsGroup: 999 + containers: + - name: redis + image: redis:8-alpine + imagePullPolicy: IfNotPresent + command: + - sh + - -c + args: + - redis-server --save 60 1 --appendonly yes --appendfsync everysec + ports: + - name: redis + containerPort: 6379 + env: + - name: TZ + value: Asia/Shanghai + readinessProbe: + exec: + command: + - redis-cli + - ping + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + livenessProbe: + exec: + command: + - redis-cli + - ping + initialDelaySeconds: 30 + periodSeconds: 20 + timeoutSeconds: 5 + failureThreshold: 6 + volumeMounts: + - name: redis-data + mountPath: /data + volumes: + - name: redis-data + persistentVolumeClaim: + claimName: sub2api-redis-data +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sub2api + namespace: platform-infra + labels: + app.kubernetes.io/name: sub2api + app.kubernetes.io/component: app + app.kubernetes.io/part-of: platform-infra + app.kubernetes.io/managed-by: unidesk +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app.kubernetes.io/name: sub2api + app.kubernetes.io/component: app + template: + metadata: + labels: + app.kubernetes.io/name: sub2api + app.kubernetes.io/component: app + app.kubernetes.io/part-of: platform-infra + spec: + securityContext: + fsGroup: 1000 + initContainers: + - name: wait-postgres + image: postgres:18-alpine + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - until pg_isready -h sub2api-postgres -U sub2api -d sub2api; do sleep 2; done + - name: wait-redis + image: redis:8-alpine + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - until redis-cli -h sub2api-redis ping | grep -q PONG; do sleep 2; done + containers: + - name: sub2api + image: __SUB2API_IMAGE__ + imagePullPolicy: __SUB2API_IMAGE_PULL_POLICY__ + ports: + - name: http + containerPort: 8080 + envFrom: + - configMapRef: + name: sub2api-config + env: + - name: DATABASE_PASSWORD + valueFrom: + secretKeyRef: + name: sub2api-secrets + key: POSTGRES_PASSWORD + - name: ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: sub2api-secrets + key: ADMIN_PASSWORD + - name: JWT_SECRET + valueFrom: + secretKeyRef: + name: sub2api-secrets + key: JWT_SECRET + - name: TOTP_ENCRYPTION_KEY + valueFrom: + secretKeyRef: + name: sub2api-secrets + key: TOTP_ENCRYPTION_KEY + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 30 + periodSeconds: 20 + timeoutSeconds: 5 + failureThreshold: 6 + startupProbe: + httpGet: + path: /health + port: http + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 30 + volumeMounts: + - name: sub2api-data + mountPath: /app/data + volumes: + - name: sub2api-data + persistentVolumeClaim: + claimName: sub2api-data