fix: stabilize hwlab node observability and git mirror flush (#590)

Co-authored-by: Codex <codex@noreply.local>
This commit is contained in:
Lyon
2026-06-21 19:06:04 +08:00
committed by GitHub
parent 79df8c49db
commit e6be9cfa2d
2 changed files with 21 additions and 9 deletions
-1
View File
@@ -185,7 +185,6 @@ lanes:
- hwlab_http_
requiredSeries:
- hwlab_workbench_journey_total
- hwlab_workbench_event_phase_duration_seconds_count
- hwlab_workbench_projection_lag_events_bucket
- hwlab_workbench_projection_lag_seconds_bucket
- hwlab_workbench_projection_stuck_traces
+21 -8
View File
@@ -1459,18 +1459,31 @@ function gitMirrorFlushShell(node: ControlPlaneNodeSpec, target: ControlPlaneTar
"push_exit=0",
"fetch_status=skipped",
"fetch_exit=0",
"fetch_attempt=0",
"fetch_max_attempts=5",
"if [ -n \"$local_gitops\" ]; then",
" set +e",
" git --git-dir=\"$repo\" -c remote.origin.mirror=false push origin \"refs/heads/${gitops_branch}:refs/heads/${gitops_branch}\"",
" timeout 240 git --git-dir=\"$repo\" -c remote.origin.mirror=false push origin \"refs/heads/${gitops_branch}:refs/heads/${gitops_branch}\"",
" push_exit=$?",
" set -e",
" if [ \"$push_exit\" = \"0\" ]; then",
" push_status=succeeded",
" set +e",
" git --git-dir=\"$repo\" fetch origin \"+refs/heads/${gitops_branch}:refs/mirror-stage/heads/${gitops_branch}\"",
" fetch_exit=$?",
" set -e",
" if [ \"$fetch_exit\" = \"0\" ]; then fetch_status=succeeded; else fetch_status=failed; fi",
" fetch_retry_delay=1",
" while [ \"$fetch_attempt\" -lt \"$fetch_max_attempts\" ]; do",
" fetch_attempt=$((fetch_attempt + 1))",
" echo \"git-mirror post-push fetch attempt ${fetch_attempt}/${fetch_max_attempts}\" >&2",
" set +e",
" timeout 240 git --git-dir=\"$repo\" fetch origin \"+refs/heads/${gitops_branch}:refs/mirror-stage/heads/${gitops_branch}\"",
" fetch_exit=$?",
" set -e",
" if [ \"$fetch_exit\" = \"0\" ]; then fetch_status=succeeded; break; fi",
" fetch_status=failed",
" if [ \"$fetch_attempt\" -lt \"$fetch_max_attempts\" ]; then",
" echo \"git-mirror post-push fetch retry ${fetch_attempt}/${fetch_max_attempts} failed exit=${fetch_exit}; backoff=${fetch_retry_delay}s\" >&2",
" sleep \"$fetch_retry_delay\"",
" if [ \"$fetch_retry_delay\" -lt 16 ]; then fetch_retry_delay=$((fetch_retry_delay * 2)); fi",
" fi",
" done",
" else",
" push_status=failed",
" fi",
@@ -1491,9 +1504,9 @@ function gitMirrorFlushShell(node: ControlPlaneNodeSpec, target: ControlPlaneTar
" degraded_reason=git-mirror-post-push-fetch-failed",
" exit_code=44",
"fi",
"export repository gitops_branch started_at local_gitops github_gitops pending push_status push_exit fetch_status fetch_exit status partial_success degraded_reason",
"export repository gitops_branch started_at local_gitops github_gitops pending push_status push_exit fetch_status fetch_exit fetch_attempt fetch_max_attempts status partial_success degraded_reason",
"node <<'NODE' | tee /cache/HWLAB.last-flush.json",
"const payload = { event: 'git-mirror-flush', repo: process.env.repository, status: process.env.status || 'failed', partialSuccess: process.env.partial_success || null, degradedReason: process.env.degraded_reason || null, startedAt: process.env.started_at, flushedAt: new Date().toISOString(), gitopsBranch: process.env.gitops_branch, localGitops: process.env.local_gitops || null, githubGitops: process.env.github_gitops || null, pendingFlush: process.env.pending === 'true', stages: { push: process.env.push_status || null, pushExitCode: Number.parseInt(process.env.push_exit || '0', 10), postPushFetch: process.env.fetch_status || null, postPushFetchExitCode: Number.parseInt(process.env.fetch_exit || '0', 10) } };",
"const payload = { event: 'git-mirror-flush', repo: process.env.repository, status: process.env.status || 'failed', partialSuccess: process.env.partial_success || null, degradedReason: process.env.degraded_reason || null, startedAt: process.env.started_at, flushedAt: new Date().toISOString(), gitopsBranch: process.env.gitops_branch, localGitops: process.env.local_gitops || null, githubGitops: process.env.github_gitops || null, pendingFlush: process.env.pending === 'true', stages: { push: process.env.push_status || null, pushExitCode: Number.parseInt(process.env.push_exit || '0', 10), postPushFetch: process.env.fetch_status || null, postPushFetchExitCode: Number.parseInt(process.env.fetch_exit || '0', 10), postPushFetchAttempts: Number.parseInt(process.env.fetch_attempt || '0', 10), postPushFetchMaxAttempts: Number.parseInt(process.env.fetch_max_attempts || '0', 10) } };",
"console.log(JSON.stringify(payload));",
"NODE",
"cat /cache/HWLAB.last-flush.json",