fix: finalize HWLAB v03 platform DB bridge

This commit is contained in:
Codex
2026-06-09 05:43:57 +00:00
parent ac14d8e7c5
commit 480a7a7b37
4 changed files with 156 additions and 43 deletions
+1 -1
View File
@@ -16,7 +16,7 @@ CI/CD、GitOps、rollout、artifact 发布、PR 合并后的 runtime lane 滚动
当现有 CLI 对某个 CI/CD 操作缺字段、缺动作、缺状态或缺权限时,处理顺序是先补 CLI,再执行发布或治理动作。临时低层 route 写操作只允许用于一次性止血,并且必须随后把稳定能力补进 CLI 与本参考文档;不能把手工 `kubectl apply/delete/annotate`、原生 GitHub CLI、手写 REST 请求或 registry shell 脚本沉淀成长期流程。长时观察仍遵守 60 秒短查询和 submit-and-poll 语义,不用单个 `trans`/`tran` 等待完整 PipelineRun 或 Argo rollout 结束。
`hwlab nodes secret cleanup-owned-postgres --node G14 --lane v03 [--dry-run|--confirm]` 是 v03 迁移到 G14 platform PostgreSQL 后清理旧 repo-owned Postgres StatefulSet/Service/ConfigMap/Secret/PVC 的受控入口。迁移后的 `hwlab-cloud-api-v03-db``hwlab-v03-openfga` SecretRef 来自 G14 host platform DB 凭据文件,不再从 lane-local Postgres Secret 派生;v03+ 的 `hwlab nodes secret ensure --name hwlab-cloud-api-v03-db|hwlab-v03-openfga` 旧路径已删除,status 只做 redacted SecretRef 与 `g14-platform-postgres` bridge 观测。平台 DB 运行、SecretRef 轮换边界和 health 验证见 `docs/reference/g14-platform-db.md`
`hwlab nodes control-plane allow-endpoint-bridge --node G14 --lane v03 [--dry-run|--confirm]` 是 v03 platform PostgreSQL bridge 的受控收敛入口,用于让 Argo 跟踪固定 `EndpointSlice/g14-platform-postgres-host`,并清理旧 `Endpoints/g14-platform-postgres` 及其派生的随机 EndpointSlice。`hwlab nodes secret cleanup-owned-postgres --node G14 --lane v03 [--dry-run|--confirm]` 是 v03 迁移到 G14 platform PostgreSQL 后清理旧 repo-owned Postgres StatefulSet/Service/ConfigMap/Secret/PVC 的受控入口。迁移后的 `hwlab-cloud-api-v03-db``hwlab-v03-openfga` SecretRef 来自 G14 host platform DB 凭据文件,不再从 lane-local Postgres Secret 派生;v03+ 的 `hwlab nodes secret ensure --name hwlab-cloud-api-v03-db|hwlab-v03-openfga` 旧路径已删除,status 只做 redacted SecretRef 与 `g14-platform-postgres` Service、固定 EndpointSlice 和旧 Endpoints 缺席观测。平台 DB 运行、SecretRef 轮换边界和 health 验证见 `docs/reference/g14-platform-db.md`
`hwlab nodes secret status|ensure --node G14 --lane v03 --name hwlab-v03-code-agent-provider` 是 v03 Code Agent / MoonBridge provider SecretRef 的受控 bootstrap 入口;`ensure` 只从集群内既有 `hwlab-v02/hwlab-v02-code-agent-provider` 复制 `openai-api-key``opencode-api-key` 到 lane-local Secret,输出仅披露 source/target Secret 名、key presence、decoded byte count、mutation 和后续命令,禁止打印 base64、解码值、完整 API key 或可复用凭据。OpenFGA 和 master admin API key 继续使用同一命名空间下的 `hwlab nodes secret ... --name hwlab-v03-openfga|hwlab-v03-master-server-admin-api-key`
+13 -3
View File
@@ -48,10 +48,9 @@ PostgreSQL 只监听 G14 host loopback 与 k3s pod 可达的 node gateway 地址
业务 Pod 不直接访问 host IP 字面量,而是访问所在 namespace 内的 `g14-platform-postgres` Service。HWLAB v0.3 的桥接对象在 `hwlab-v03` namespace 中:
- `Service/g14-platform-postgres`
- `Endpoints/g14-platform-postgres`
- `EndpointSlice/g14-platform-postgres-host`
目标地址固定为 `10.42.0.1:5432`。bridge 最低验证标准是 namespace-local `Service/g14-platform-postgres` 存在,并且 `Endpoints``EndpointSlice` 至少一条路径可用;最终仍以 runtime `/health/live``hwlab nodes control-plane status` 的真实连接结果为准,不把某一种端点对象形态做成额外门禁
目标地址固定为 `10.42.0.1:5432`。bridge 最低验证标准是 namespace-local `Service/g14-platform-postgres` 存在、固定 `EndpointSlice/g14-platform-postgres-host` 存在且指向 host PostgreSQL,并且 `Endpoints/g14-platform-postgres` 不存在。不要保留 `Endpoints` 兼容路径或由它派生的随机 EndpointSlice;最终仍以 runtime `/health/live``hwlab nodes control-plane status` 的真实连接结果为准。
业务 SecretRef 固定使用现有应用 Secret 名称,不迁移为共享明文配置:
@@ -85,6 +84,15 @@ bun scripts/cli.ts hwlab nodes control-plane status --node G14 --lane v03 --pipe
Argo Application `hwlab-node-v03` 应显示 `Synced/Healthy`runtime workloads 中应只保留 `service/g14-platform-postgres`,不应再出现 `statefulset.apps/hwlab-v03-postgres``service/hwlab-v03-postgres``configmap/hwlab-v03-postgres-init`
Argo 必须跟踪 `Service/g14-platform-postgres` 和固定 `EndpointSlice/g14-platform-postgres-host`。如果 `argocd-cm` 仍排除或忽略 `Endpoints` / `EndpointSlice`,或者旧 GitOps revision 留下了 `Endpoints/g14-platform-postgres` 与随机派生 EndpointSlice,使用受控入口收敛,不要手工 `kubectl delete`
```bash
bun scripts/cli.ts hwlab nodes control-plane allow-endpoint-bridge --node G14 --lane v03 --dry-run
bun scripts/cli.ts hwlab nodes control-plane allow-endpoint-bridge --node G14 --lane v03 --confirm
```
该入口会移除旧 Argo endpoint 资源排除、重启 Argo application controller、删除旧 `Endpoints/g14-platform-postgres` 和它派生的多余 EndpointSlice,并 hard refresh `hwlab-node-v03`。收敛后的状态必须是 `legacyEndpointsExist=false``hostEndpointSliceExists=true``extraEndpointSlices=[]`
## 旧自有 DB 清理
迁移到平台 DB 后,v0.3 自有 PostgreSQL StatefulSet、Service、ConfigMap、Secret 和 PVC 都必须清理。GitOps render 不再生成旧 `postgres.yaml`;如果运行面仍残留旧对象,直接用受控 CLI 清理精确命名资源,不保留兼容路径:
@@ -108,6 +116,8 @@ trans G14:k3s kubectl -n hwlab-v03 get statefulset,svc,endpoints,endpointslice,c
curl -fsS http://74.48.78.17:20667/health/live | jq '{status, ready, environment, db: {ready: .db.ready, connectionResult: .db.connectionResult}, runtime: {ready: .runtime.ready, queryResult: .runtime.connection.queryResult}}'
```
其中 `endpoints/g14-platform-postgres` 应为空,`endpointslice/g14-platform-postgres-host` 应存在且是唯一带 `kubernetes.io/service-name=g14-platform-postgres` label 的 EndpointSlice。
## 备份与恢复
备份脚本固定在 G14 host
@@ -139,7 +149,7 @@ trans G14 script -- '/usr/local/sbin/g14-platform-db-backup'
- `postgresql` systemd service active。
- `ss -ltnp` 只显示 `127.0.0.1:5432``10.42.0.1:5432` 监听。
- `/usr/local/sbin/g14-platform-db-health` 能列出预期 database。
- `hwlab-v03``g14-platform-postgres` Service 可见,Endpoints 或 EndpointSlice 至少一条 bridge 路径可见
- `hwlab-v03``g14-platform-postgres` Service 可见,`EndpointSlice/g14-platform-postgres-host` 是唯一 bridge endpoint,旧 `Endpoints/g14-platform-postgres` 不存在
- `hwlab-cloud-api` `/health/live` 返回 `status=ok``ready=true``db.connectionResult=connected``runtime.connection.queryResult=durable_readiness_ready`
- `hwlab nodes control-plane status --node G14 --lane v03` 显示 Argo `Synced/Healthy`runtime workload 摘要不包含旧自有 Postgres。
+12 -3
View File
@@ -190,7 +190,8 @@ const cloudApiDbStatus = nodeSecretStatusFromTextForTest([
"legacyPostgresSecretExists\tno",
"platformService\tg14-platform-postgres",
"platformServiceExists\tyes",
"platformEndpointsExists\tyes",
"platformEndpointsExists\tno",
"platformEndpointSlice\tg14-platform-postgres-host",
"platformEndpointSliceExists\tyes",
"dbName\thwlab_v03",
"dbUser\thwlab_v03_app",
@@ -207,7 +208,10 @@ assertCondition(
&& record(record(record(cloudApiDbStatus).after).databaseUrl).valueBytes === 172
&& record(record(cloudApiDbStatus).legacyPostgresSecret).exists === false
&& record(record(cloudApiDbStatus).platformService).name === "g14-platform-postgres"
&& record(record(cloudApiDbStatus).platformService).endpointsExist === true
&& record(record(cloudApiDbStatus).platformService).endpointsExist === false
&& record(record(cloudApiDbStatus).platformService).legacyEndpointsAbsent === true
&& record(record(cloudApiDbStatus).platformService).endpointSlice === "g14-platform-postgres-host"
&& record(record(cloudApiDbStatus).platformService).endpointSliceExists === true
&& record(cloudApiDbStatus).dbUser === "hwlab_v03_app"
&& record(cloudApiDbStatus).dbHost === "g14-platform-postgres.hwlab-v03.svc.cluster.local"
&& !JSON.stringify(cloudApiDbStatus).includes("postgres://")
@@ -235,7 +239,8 @@ const openFgaPlatformStatus = nodeSecretStatusFromTextForTest([
"legacyPostgresSecretExists\tno",
"platformService\tg14-platform-postgres",
"platformServiceExists\tyes",
"platformEndpointsExists\tyes",
"platformEndpointsExists\tno",
"platformEndpointSlice\tg14-platform-postgres-host",
"platformEndpointSliceExists\tyes",
"dbName\topenfga_v03",
"dbUser\topenfga_v03_app",
@@ -248,6 +253,10 @@ assertCondition(
record(openFgaPlatformStatus).ok === true
&& record(openFgaPlatformStatus).platformDbMode === true
&& record(record(openFgaPlatformStatus).legacyPostgresSecret).exists === false
&& record(record(openFgaPlatformStatus).platformService).endpointsExist === false
&& record(record(openFgaPlatformStatus).platformService).legacyEndpointsAbsent === true
&& record(record(openFgaPlatformStatus).platformService).endpointSlice === "g14-platform-postgres-host"
&& record(record(openFgaPlatformStatus).platformService).endpointSliceExists === true
&& record(openFgaPlatformStatus).dbName === "openfga_v03"
&& record(openFgaPlatformStatus).dbUser === "openfga_v03_app"
&& !JSON.stringify(openFgaPlatformStatus).includes("postgres://")
+130 -36
View File
@@ -402,7 +402,14 @@ function runNodeEndpointBridge(options: ReturnType<typeof parseNodeScopedDelegat
const beforeIgnored = fields.beforeEndpointsIgnoreUpdates === "yes" || fields.beforeEndpointSliceIgnoreUpdates === "yes";
const afterExcluded = fields.afterEndpointResourcesExcluded === "yes";
const afterIgnored = fields.afterEndpointsIgnoreUpdates === "yes" || fields.afterEndpointSliceIgnoreUpdates === "yes";
const ok = result.exitCode === 0 && !afterExcluded && !afterIgnored;
const beforeExtraEndpointSlices = splitWhitespaceField(fields.beforeExtraEndpointSliceNames);
const afterExtraEndpointSlices = splitWhitespaceField(fields.afterExtraEndpointSliceNames);
const beforeLegacyEndpoints = fields.beforeLegacyEndpointsExists === "yes";
const afterLegacyEndpoints = fields.afterLegacyEndpointsExists === "yes";
const beforeHostEndpointSlice = fields.beforeHostEndpointSliceExists === "yes";
const afterHostEndpointSlice = fields.afterHostEndpointSliceExists === "yes";
const bridgeReady = !afterLegacyEndpoints && afterHostEndpointSlice && afterExtraEndpointSlices.length === 0;
const ok = result.exitCode === 0 && !afterExcluded && !afterIgnored && bridgeReady;
return {
ok: dryRun ? result.exitCode === 0 : ok,
command: "hwlab nodes control-plane allow-endpoint-bridge",
@@ -419,21 +426,32 @@ function runNodeEndpointBridge(options: ReturnType<typeof parseNodeScopedDelegat
endpointResourcesExcluded: beforeExcluded,
endpointsIgnoreUpdates: fields.beforeEndpointsIgnoreUpdates === "yes",
endpointSliceIgnoreUpdates: fields.beforeEndpointSliceIgnoreUpdates === "yes",
legacyEndpointsExist: beforeLegacyEndpoints,
hostEndpointSliceExists: beforeHostEndpointSlice,
extraEndpointSlices: beforeExtraEndpointSlices,
},
after: {
endpointResourcesExcluded: afterExcluded,
endpointsIgnoreUpdates: fields.afterEndpointsIgnoreUpdates === "yes",
endpointSliceIgnoreUpdates: fields.afterEndpointSliceIgnoreUpdates === "yes",
legacyEndpointsExist: afterLegacyEndpoints,
hostEndpointSliceExists: afterHostEndpointSlice,
extraEndpointSlices: afterExtraEndpointSlices,
},
runtimeNamespace: fields.runtimeNamespace || `hwlab-${options.lane}`,
platformService: fields.platformService || "g14-platform-postgres",
hostEndpointSlice: fields.hostEndpointSlice || "g14-platform-postgres-host",
patchExitCode: numericField(fields.patchExitCode),
rolloutRestartExitCode: numericField(fields.rolloutRestartExitCode),
rolloutStatusExitCode: numericField(fields.rolloutStatusExitCode),
deleteLegacyEndpointsExitCode: numericField(fields.deleteLegacyEndpointsExitCode),
deleteExtraEndpointSlicesExitCode: numericField(fields.deleteExtraEndpointSlicesExitCode),
refreshExitCode: numericField(fields.refreshExitCode),
exitCode: result.exitCode,
stderr: result.exitCode === 0 ? "" : result.stderr.trim().slice(0, 2000),
summary: !afterExcluded && !afterIgnored
? "Argo tracks HWLAB external Postgres EndpointSlice resources"
: "Argo still excludes or ignores HWLAB external Postgres EndpointSlice resources",
summary: !afterExcluded && !afterIgnored && bridgeReady
? "Argo tracks HWLAB external Postgres EndpointSlice and no legacy Endpoints remain"
: "Argo endpoint bridge is not in final Service plus EndpointSlice shape",
},
result: compactCommandResult(result),
};
@@ -441,34 +459,59 @@ function runNodeEndpointBridge(options: ReturnType<typeof parseNodeScopedDelegat
function endpointBridgeScript(options: { lane: HwlabRuntimeLane; dryRun: boolean }): string {
const application = `hwlab-node-${options.lane}`;
const runtimeNamespace = `hwlab-${options.lane}`;
return [
"set +e",
"namespace=argocd",
`runtime_namespace=${shellQuote(runtimeNamespace)}`,
"configmap=argocd-cm",
`application=${shellQuote(application)}`,
`dry_run=${shellQuote(options.dryRun ? "true" : "false")}`,
"platform_service=g14-platform-postgres",
"host_endpointslice=g14-platform-postgres-host",
"preset=endpoint-bridge-resource-tracking",
"cm_data() { kubectl -n \"$namespace\" get configmap \"$configmap\" -o \"go-template={{ index .data \\\"$1\\\" }}\" 2>/dev/null || true; }",
"cm_has_key() { kubectl -n \"$namespace\" get configmap \"$configmap\" -o jsonpath=\"{.data.$1}\" >/tmp/hwlab-argocd-cm-key.out 2>/dev/null && [ -s /tmp/hwlab-argocd-cm-key.out ] && printf yes || printf no; }",
"cm_has_key() { value=$(cm_data \"$1\"); [ -n \"$value\" ] && [ \"$value\" != \"<no value>\" ] && printf yes || printf no; }",
"endpoint_resources_excluded() { exclusions=$(cm_data resource.exclusions); printf '%s' \"$exclusions\" | grep -Eq '(^|[[:space:]])(Endpoints|EndpointSlice)([[:space:]]|$)' && printf yes || printf no; }",
"resource_exists() { kubectl -n \"$runtime_namespace\" get \"$1\" \"$2\" >/dev/null 2>&1 && printf yes || printf no; }",
"extra_endpoint_slices() { kubectl -n \"$runtime_namespace\" get endpointslice -l \"kubernetes.io/service-name=$platform_service\" -o name 2>/dev/null | sed \"/\\/$host_endpointslice$/d\" | tr '\\n' ' ' | sed 's/[[:space:]]*$//'; }",
"wait_runtime_bridge_clean() {",
" for _ in $(seq 1 30); do",
" current_legacy=$(resource_exists endpoints \"$platform_service\")",
" current_extra=$(extra_endpoint_slices)",
" current_host=$(resource_exists endpointslice \"$host_endpointslice\")",
" if [ \"$current_legacy\" != yes ] && [ -z \"$current_extra\" ] && [ \"$current_host\" = yes ]; then return 0; fi",
" sleep 2",
" done",
" return 1",
"}",
"before_endpoint_resources_excluded=$(endpoint_resources_excluded)",
"before_endpoints_ignore_updates=$(cm_has_key 'resource\\.customizations\\.ignoreResourceUpdates\\.Endpoints')",
"before_endpoint_slice_ignore_updates=$(cm_has_key 'resource\\.customizations\\.ignoreResourceUpdates\\.discovery\\.k8s\\.io_EndpointSlice')",
"before_endpoints_ignore_updates=$(cm_has_key 'resource.customizations.ignoreResourceUpdates.Endpoints')",
"before_endpoint_slice_ignore_updates=$(cm_has_key 'resource.customizations.ignoreResourceUpdates.discovery.k8s.io_EndpointSlice')",
"before_legacy_endpoints_exists=$(resource_exists endpoints \"$platform_service\")",
"before_host_endpointslice_exists=$(resource_exists endpointslice \"$host_endpointslice\")",
"before_extra_endpoint_slice_names=$(extra_endpoint_slices)",
"needs_argo_update=false",
"if [ \"$before_endpoint_resources_excluded\" = yes ] || [ \"$before_endpoints_ignore_updates\" = yes ] || [ \"$before_endpoint_slice_ignore_updates\" = yes ]; then needs_argo_update=true; fi",
"needs_runtime_cleanup=false",
"if [ \"$before_legacy_endpoints_exists\" = yes ] || [ -n \"$before_extra_endpoint_slice_names\" ]; then needs_runtime_cleanup=true; fi",
"action=observed",
"mutation=false",
"patch_exit=",
"rollout_restart_exit=",
"rollout_status_exit=",
"delete_legacy_endpoints_exit=",
"delete_extra_endpointslices_exit=",
"refresh_exit=",
"needs_update=false",
"if [ \"$before_endpoint_resources_excluded\" = yes ] || [ \"$before_endpoints_ignore_updates\" = yes ] || [ \"$before_endpoint_slice_ignore_updates\" = yes ]; then needs_update=true; fi",
"if [ \"$dry_run\" = true ]; then",
" if [ \"$needs_update\" = true ]; then action=would-remove-old-endpoint-exclusions; else action=kept; fi",
"elif [ \"$needs_update\" = false ]; then",
" action=kept",
" if [ \"$needs_argo_update\" = true ] && [ \"$needs_runtime_cleanup\" = true ]; then action=would-remove-old-endpoint-exclusions-and-legacy-endpoints",
" elif [ \"$needs_argo_update\" = true ]; then action=would-remove-old-endpoint-exclusions",
" elif [ \"$needs_runtime_cleanup\" = true ]; then action=would-remove-legacy-endpoints",
" else action=kept; fi",
"else",
" patch_file=$(mktemp /tmp/hwlab-argocd-endpoint-bridge.XXXXXX.json)",
" python3 - <<'PY' >\"$patch_file\"",
" if [ \"$needs_argo_update\" = true ]; then",
" patch_file=$(mktemp /tmp/hwlab-argocd-endpoint-bridge.XXXXXX.json)",
" python3 - <<'PY' >\"$patch_file\"",
"import json",
"desired = '''### Internal Kubernetes resources excluded to reduce watch volume",
"- apiGroups:",
@@ -526,32 +569,58 @@ function endpointBridgeScript(options: { lane: HwlabRuntimeLane; dryRun: boolean
" }",
"}))",
"PY",
" kubectl -n \"$namespace\" patch configmap \"$configmap\" --type merge --patch-file \"$patch_file\" >/tmp/hwlab-argocd-endpoint-bridge-patch.out 2>/tmp/hwlab-argocd-endpoint-bridge-patch.err",
" patch_exit=$?",
" rm -f \"$patch_file\"",
" if [ \"$patch_exit\" -eq 0 ]; then",
" kubectl -n \"$namespace\" rollout restart statefulset/argocd-application-controller >/tmp/hwlab-argocd-endpoint-bridge-rollout-restart.out 2>/tmp/hwlab-argocd-endpoint-bridge-rollout-restart.err",
" rollout_restart_exit=$?",
" if [ \"$rollout_restart_exit\" -eq 0 ]; then",
" kubectl -n \"$namespace\" rollout status statefulset/argocd-application-controller --timeout=180s >/tmp/hwlab-argocd-endpoint-bridge-rollout-status.out 2>/tmp/hwlab-argocd-endpoint-bridge-rollout-status.err",
" rollout_status_exit=$?",
" kubectl -n \"$namespace\" patch configmap \"$configmap\" --type merge --patch-file \"$patch_file\" >/tmp/hwlab-argocd-endpoint-bridge-patch.out 2>/tmp/hwlab-argocd-endpoint-bridge-patch.err",
" patch_exit=$?",
" rm -f \"$patch_file\"",
" if [ \"$patch_exit\" -eq 0 ]; then",
" kubectl -n \"$namespace\" rollout restart statefulset/argocd-application-controller >/tmp/hwlab-argocd-endpoint-bridge-rollout-restart.out 2>/tmp/hwlab-argocd-endpoint-bridge-rollout-restart.err",
" rollout_restart_exit=$?",
" if [ \"$rollout_restart_exit\" -eq 0 ]; then",
" kubectl -n \"$namespace\" rollout status statefulset/argocd-application-controller --timeout=180s >/tmp/hwlab-argocd-endpoint-bridge-rollout-status.out 2>/tmp/hwlab-argocd-endpoint-bridge-rollout-status.err",
" rollout_status_exit=$?",
" fi",
" fi",
" kubectl -n \"$namespace\" annotate application \"$application\" argocd.argoproj.io/refresh=hard --overwrite >/tmp/hwlab-argocd-endpoint-bridge-refresh.out 2>/tmp/hwlab-argocd-endpoint-bridge-refresh.err",
" refresh_exit=$?",
" if [ \"$rollout_restart_exit\" -ne 0 ]; then action=rollout-restart-failed",
" elif [ \"$rollout_status_exit\" -ne 0 ]; then action=rollout-status-failed",
" elif [ \"$refresh_exit\" -ne 0 ]; then action=refresh-failed",
" else action=removed-old-endpoint-exclusions; mutation=true; fi",
" fi",
" if [ -n \"$patch_exit\" ] && [ \"$patch_exit\" != 0 ]; then action=patch-failed",
" elif [ -n \"$rollout_restart_exit\" ] && [ \"$rollout_restart_exit\" != 0 ]; then action=rollout-restart-failed",
" elif [ -n \"$rollout_status_exit\" ] && [ \"$rollout_status_exit\" != 0 ]; then action=rollout-status-failed",
" else",
" action=patch-failed",
" if [ \"$needs_runtime_cleanup\" = true ]; then",
" kubectl -n \"$runtime_namespace\" delete endpoints \"$platform_service\" --ignore-not-found=true >/tmp/hwlab-platform-postgres-endpoints-delete.out 2>/tmp/hwlab-platform-postgres-endpoints-delete.err",
" delete_legacy_endpoints_exit=$?",
" wait_runtime_bridge_clean",
" remaining_extra=$(extra_endpoint_slices)",
" if [ -n \"$remaining_extra\" ]; then",
" kubectl -n \"$runtime_namespace\" delete $remaining_extra --ignore-not-found=true >/tmp/hwlab-platform-postgres-endpointslices-delete.out 2>/tmp/hwlab-platform-postgres-endpointslices-delete.err",
" delete_extra_endpointslices_exit=$?",
" wait_runtime_bridge_clean",
" fi",
" fi",
" if [ \"$needs_argo_update\" = true ] || [ \"$needs_runtime_cleanup\" = true ]; then",
" kubectl -n \"$namespace\" annotate application \"$application\" argocd.argoproj.io/refresh=hard --overwrite >/tmp/hwlab-argocd-endpoint-bridge-refresh.out 2>/tmp/hwlab-argocd-endpoint-bridge-refresh.err",
" refresh_exit=$?",
" fi",
" if [ -n \"$delete_legacy_endpoints_exit\" ] && [ \"$delete_legacy_endpoints_exit\" != 0 ]; then action=delete-legacy-endpoints-failed",
" elif [ -n \"$delete_extra_endpointslices_exit\" ] && [ \"$delete_extra_endpointslices_exit\" != 0 ]; then action=delete-extra-endpointslices-failed",
" elif [ -n \"$refresh_exit\" ] && [ \"$refresh_exit\" != 0 ]; then action=refresh-failed",
" elif [ \"$needs_argo_update\" = true ] && [ \"$needs_runtime_cleanup\" = true ]; then action=removed-old-endpoint-exclusions-and-legacy-endpoints; mutation=true",
" elif [ \"$needs_argo_update\" = true ]; then action=removed-old-endpoint-exclusions; mutation=true",
" elif [ \"$needs_runtime_cleanup\" = true ]; then action=removed-legacy-endpoints; mutation=true",
" else action=kept; fi",
" fi",
"fi",
"after_endpoint_resources_excluded=$(endpoint_resources_excluded)",
"after_endpoints_ignore_updates=$(cm_has_key 'resource\\.customizations\\.ignoreResourceUpdates\\.Endpoints')",
"after_endpoint_slice_ignore_updates=$(cm_has_key 'resource\\.customizations\\.ignoreResourceUpdates\\.discovery\\.k8s\\.io_EndpointSlice')",
"after_endpoints_ignore_updates=$(cm_has_key 'resource.customizations.ignoreResourceUpdates.Endpoints')",
"after_endpoint_slice_ignore_updates=$(cm_has_key 'resource.customizations.ignoreResourceUpdates.discovery.k8s.io_EndpointSlice')",
"after_legacy_endpoints_exists=$(resource_exists endpoints \"$platform_service\")",
"after_host_endpointslice_exists=$(resource_exists endpointslice \"$host_endpointslice\")",
"after_extra_endpoint_slice_names=$(extra_endpoint_slices)",
"printf 'namespace\\t%s\\n' \"$namespace\"",
"printf 'runtimeNamespace\\t%s\\n' \"$runtime_namespace\"",
"printf 'configMap\\t%s\\n' \"$configmap\"",
"printf 'application\\t%s\\n' \"$application\"",
"printf 'platformService\\t%s\\n' \"$platform_service\"",
"printf 'hostEndpointSlice\\t%s\\n' \"$host_endpointslice\"",
"printf 'preset\\t%s\\n' \"$preset\"",
"printf 'action\\t%s\\n' \"$action\"",
"printf 'dryRun\\t%s\\n' \"$dry_run\"",
@@ -559,17 +628,28 @@ function endpointBridgeScript(options: { lane: HwlabRuntimeLane; dryRun: boolean
"printf 'beforeEndpointResourcesExcluded\\t%s\\n' \"$before_endpoint_resources_excluded\"",
"printf 'beforeEndpointsIgnoreUpdates\\t%s\\n' \"$before_endpoints_ignore_updates\"",
"printf 'beforeEndpointSliceIgnoreUpdates\\t%s\\n' \"$before_endpoint_slice_ignore_updates\"",
"printf 'beforeLegacyEndpointsExists\\t%s\\n' \"$before_legacy_endpoints_exists\"",
"printf 'beforeHostEndpointSliceExists\\t%s\\n' \"$before_host_endpointslice_exists\"",
"printf 'beforeExtraEndpointSliceNames\\t%s\\n' \"$before_extra_endpoint_slice_names\"",
"printf 'afterEndpointResourcesExcluded\\t%s\\n' \"$after_endpoint_resources_excluded\"",
"printf 'afterEndpointsIgnoreUpdates\\t%s\\n' \"$after_endpoints_ignore_updates\"",
"printf 'afterEndpointSliceIgnoreUpdates\\t%s\\n' \"$after_endpoint_slice_ignore_updates\"",
"printf 'afterLegacyEndpointsExists\\t%s\\n' \"$after_legacy_endpoints_exists\"",
"printf 'afterHostEndpointSliceExists\\t%s\\n' \"$after_host_endpointslice_exists\"",
"printf 'afterExtraEndpointSliceNames\\t%s\\n' \"$after_extra_endpoint_slice_names\"",
"printf 'patchExitCode\\t%s\\n' \"$patch_exit\"",
"printf 'rolloutRestartExitCode\\t%s\\n' \"$rollout_restart_exit\"",
"printf 'rolloutStatusExitCode\\t%s\\n' \"$rollout_status_exit\"",
"printf 'deleteLegacyEndpointsExitCode\\t%s\\n' \"$delete_legacy_endpoints_exit\"",
"printf 'deleteExtraEndpointSlicesExitCode\\t%s\\n' \"$delete_extra_endpointslices_exit\"",
"printf 'refreshExitCode\\t%s\\n' \"$refresh_exit\"",
"if [ \"$dry_run\" != true ] && { [ \"$after_endpoint_resources_excluded\" = yes ] || [ \"$after_endpoints_ignore_updates\" = yes ] || [ \"$after_endpoint_slice_ignore_updates\" = yes ]; }; then exit 46; fi",
"if [ \"$dry_run\" != true ] && { [ \"$after_legacy_endpoints_exists\" = yes ] || [ -n \"$after_extra_endpoint_slice_names\" ] || [ \"$after_host_endpointslice_exists\" != yes ]; }; then exit 47; fi",
"if [ -n \"$patch_exit\" ] && [ \"$patch_exit\" != 0 ]; then exit \"$patch_exit\"; fi",
"if [ -n \"$rollout_restart_exit\" ] && [ \"$rollout_restart_exit\" != 0 ]; then exit \"$rollout_restart_exit\"; fi",
"if [ -n \"$rollout_status_exit\" ] && [ \"$rollout_status_exit\" != 0 ]; then exit \"$rollout_status_exit\"; fi",
"if [ -n \"$delete_legacy_endpoints_exit\" ] && [ \"$delete_legacy_endpoints_exit\" != 0 ]; then exit \"$delete_legacy_endpoints_exit\"; fi",
"if [ -n \"$delete_extra_endpointslices_exit\" ] && [ \"$delete_extra_endpointslices_exit\" != 0 ]; then exit \"$delete_extra_endpointslices_exit\"; fi",
"if [ -n \"$refresh_exit\" ] && [ \"$refresh_exit\" != 0 ]; then exit \"$refresh_exit\"; fi",
].join("\n");
}
@@ -689,6 +769,7 @@ function ownedPostgresCleanupScript(options: NodeSecretOptions, spec: RuntimeSec
function platformDbSecretStatusScript(options: NodeSecretOptions, spec: RuntimeSecretSpec): string {
const isOpenFga = options.preset === "openfga";
const platformEndpointSlice = `${spec.platformPostgresService}-host`;
return [
"set +e",
`namespace=${shellQuote(spec.namespace)}`,
@@ -698,6 +779,7 @@ function platformDbSecretStatusScript(options: NodeSecretOptions, spec: RuntimeS
`postgres_password_key=${shellQuote(OPENFGA_POSTGRES_PASSWORD_KEY)}`,
`legacy_postgres_secret=${shellQuote(spec.postgresSecret)}`,
`platform_service=${shellQuote(spec.platformPostgresService)}`,
`platform_endpointslice=${shellQuote(platformEndpointSlice)}`,
`platform_host=${shellQuote(spec.platformPostgresService)}`,
`platform_host_fqdn=${shellQuote(spec.openFgaDbHost)}`,
`db_name=${shellQuote(isOpenFga ? spec.openFgaDbName : spec.cloudApiDbName)}`,
@@ -708,7 +790,7 @@ function platformDbSecretStatusScript(options: NodeSecretOptions, spec: RuntimeS
"dry_run=true",
"secret_exists_flag() { kubectl -n \"$namespace\" get secret \"$1\" >/dev/null 2>&1 && printf yes || printf no; }",
"resource_exists_flag() { kubectl -n \"$namespace\" get \"$1\" \"$2\" >/dev/null 2>&1 && printf yes || printf no; }",
"endpointslice_exists_flag() { kubectl -n \"$namespace\" get endpointslice -l \"kubernetes.io/service-name=$1\" -o name 2>/dev/null | grep -q . && printf yes || printf no; }",
"endpointslice_exists_flag() { kubectl -n \"$namespace\" get endpointslice \"$1\" >/dev/null 2>&1 && printf yes || printf no; }",
"secret_b64_key() { kubectl -n \"$namespace\" get secret \"$1\" -o \"go-template={{ index .data \\\"$2\\\" }}\" 2>/dev/null || true; }",
"decoded_value() { if [ -n \"$1\" ]; then printf '%s' \"$1\" | base64 -d 2>/dev/null || true; fi; }",
"decoded_length() { if [ -n \"$1\" ]; then printf '%s' \"$1\" | base64 -d 2>/dev/null | wc -c | tr -d ' '; else printf '0'; fi; }",
@@ -738,7 +820,7 @@ function platformDbSecretStatusScript(options: NodeSecretOptions, spec: RuntimeS
"pg_password_bytes=$(decoded_length \"$pg_password_b64\")",
"platform_service_exists=$(resource_exists_flag service \"$platform_service\")",
"platform_endpoints_exists=$(resource_exists_flag endpoints \"$platform_service\")",
"platform_endpointslice_exists=$(endpointslice_exists_flag \"$platform_service\")",
"platform_endpointslice_exists=$(endpointslice_exists_flag \"$platform_endpointslice\")",
"uri_matches_expected \"$uri_value\"",
"printf 'namespace\\t%s\\n' \"$namespace\"",
"printf 'secret\\t%s\\n' \"$name\"",
@@ -763,6 +845,7 @@ function platformDbSecretStatusScript(options: NodeSecretOptions, spec: RuntimeS
"printf 'platformService\\t%s\\n' \"$platform_service\"",
"printf 'platformServiceExists\\t%s\\n' \"$platform_service_exists\"",
"printf 'platformEndpointsExists\\t%s\\n' \"$platform_endpoints_exists\"",
"printf 'platformEndpointSlice\\t%s\\n' \"$platform_endpointslice\"",
"printf 'platformEndpointSliceExists\\t%s\\n' \"$platform_endpointslice_exists\"",
"printf 'dbName\\t%s\\n' \"$db_name\"",
"printf 'dbUser\\t%s\\n' \"$db_user\"",
@@ -1357,7 +1440,8 @@ function secretStatusFromText(text: string, commandOk: boolean, exitCode: number
fields.afterDatabaseUrlPresent === "yes" &&
typeof afterUrlBytes === "number" && afterUrlBytes > 0;
const platformBridgeHealthy = fields.platformServiceExists === "yes" &&
(fields.platformEndpointsExists === "yes" || fields.platformEndpointSliceExists === "yes");
fields.platformEndpointsExists !== "yes" &&
fields.platformEndpointSliceExists === "yes";
const uriHealthy = fields.dbHostMatchesPlatform === "yes" &&
fields.dbNameMatchesExpected === "yes" &&
fields.dbUserMatchesExpected === "yes";
@@ -1384,6 +1468,8 @@ function secretStatusFromText(text: string, commandOk: boolean, exitCode: number
name: fields.platformService || spec.platformPostgresService,
exists: fields.platformServiceExists === "yes",
endpointsExist: fields.platformEndpointsExists === "yes",
legacyEndpointsAbsent: fields.platformEndpointsExists !== "yes",
endpointSlice: fields.platformEndpointSlice || `${spec.platformPostgresService}-host`,
endpointSliceExists: fields.platformEndpointSliceExists === "yes",
},
dbName: fields.dbName || spec.cloudApiDbName,
@@ -1459,7 +1545,8 @@ function secretStatusFromText(text: string, commandOk: boolean, exitCode: number
typeof afterAuthnBytes === "number" && afterAuthnBytes > 0 &&
typeof afterUriBytes === "number" && afterUriBytes > 0;
const platformBridgeHealthy = fields.platformServiceExists === "yes" &&
(fields.platformEndpointsExists === "yes" || fields.platformEndpointSliceExists === "yes");
fields.platformEndpointsExists !== "yes" &&
fields.platformEndpointSliceExists === "yes";
const uriHealthy = fields.dbHostMatchesPlatform === "yes" &&
fields.dbNameMatchesExpected === "yes" &&
fields.dbUserMatchesExpected === "yes";
@@ -1487,6 +1574,8 @@ function secretStatusFromText(text: string, commandOk: boolean, exitCode: number
name: fields.platformService || spec.platformPostgresService,
exists: fields.platformServiceExists === "yes",
endpointsExist: fields.platformEndpointsExists === "yes",
legacyEndpointsAbsent: fields.platformEndpointsExists !== "yes",
endpointSlice: fields.platformEndpointSlice || `${spec.platformPostgresService}-host`,
endpointSliceExists: fields.platformEndpointSliceExists === "yes",
},
dbName: fields.dbName || spec.openFgaDbName,
@@ -1625,6 +1714,11 @@ function numericField(value: string | undefined): number | null {
return Number.isFinite(parsed) ? parsed : null;
}
function splitWhitespaceField(value: string | undefined): string[] {
if (!value) return [];
return value.split(/\s+/u).filter(Boolean);
}
function compactCommandResult(result: CommandResult): Record<string, unknown> {
return {
command: compactCommand(result.command),