diff --git a/config/hwlab-node-lanes.yaml b/config/hwlab-node-lanes.yaml index 2bb9b970..66cb5269 100644 --- a/config/hwlab-node-lanes.yaml +++ b/config/hwlab-node-lanes.yaml @@ -185,7 +185,6 @@ lanes: - hwlab_http_ requiredSeries: - hwlab_workbench_journey_total - - hwlab_workbench_event_phase_duration_seconds_count - hwlab_workbench_projection_lag_events_bucket - hwlab_workbench_projection_lag_seconds_bucket - hwlab_workbench_projection_stuck_traces diff --git a/scripts/src/hwlab-node-control-plane.ts b/scripts/src/hwlab-node-control-plane.ts index 10b99630..f6e4237a 100644 --- a/scripts/src/hwlab-node-control-plane.ts +++ b/scripts/src/hwlab-node-control-plane.ts @@ -1459,18 +1459,31 @@ function gitMirrorFlushShell(node: ControlPlaneNodeSpec, target: ControlPlaneTar "push_exit=0", "fetch_status=skipped", "fetch_exit=0", + "fetch_attempt=0", + "fetch_max_attempts=5", "if [ -n \"$local_gitops\" ]; then", " set +e", - " git --git-dir=\"$repo\" -c remote.origin.mirror=false push origin \"refs/heads/${gitops_branch}:refs/heads/${gitops_branch}\"", + " timeout 240 git --git-dir=\"$repo\" -c remote.origin.mirror=false push origin \"refs/heads/${gitops_branch}:refs/heads/${gitops_branch}\"", " push_exit=$?", " set -e", " if [ \"$push_exit\" = \"0\" ]; then", " push_status=succeeded", - " set +e", - " git --git-dir=\"$repo\" fetch origin \"+refs/heads/${gitops_branch}:refs/mirror-stage/heads/${gitops_branch}\"", - " fetch_exit=$?", - " set -e", - " if [ \"$fetch_exit\" = \"0\" ]; then fetch_status=succeeded; else fetch_status=failed; fi", + " fetch_retry_delay=1", + " while [ \"$fetch_attempt\" -lt \"$fetch_max_attempts\" ]; do", + " fetch_attempt=$((fetch_attempt + 1))", + " echo \"git-mirror post-push fetch attempt ${fetch_attempt}/${fetch_max_attempts}\" >&2", + " set +e", + " timeout 240 git --git-dir=\"$repo\" fetch origin \"+refs/heads/${gitops_branch}:refs/mirror-stage/heads/${gitops_branch}\"", + " fetch_exit=$?", + " set -e", + " if [ \"$fetch_exit\" = \"0\" ]; then fetch_status=succeeded; break; fi", + " fetch_status=failed", + " if [ \"$fetch_attempt\" -lt \"$fetch_max_attempts\" ]; then", + " echo \"git-mirror post-push fetch retry ${fetch_attempt}/${fetch_max_attempts} failed exit=${fetch_exit}; backoff=${fetch_retry_delay}s\" >&2", + " sleep \"$fetch_retry_delay\"", + " if [ \"$fetch_retry_delay\" -lt 16 ]; then fetch_retry_delay=$((fetch_retry_delay * 2)); fi", + " fi", + " done", " else", " push_status=failed", " fi", @@ -1491,9 +1504,9 @@ function gitMirrorFlushShell(node: ControlPlaneNodeSpec, target: ControlPlaneTar " degraded_reason=git-mirror-post-push-fetch-failed", " exit_code=44", "fi", - "export repository gitops_branch started_at local_gitops github_gitops pending push_status push_exit fetch_status fetch_exit status partial_success degraded_reason", + "export repository gitops_branch started_at local_gitops github_gitops pending push_status push_exit fetch_status fetch_exit fetch_attempt fetch_max_attempts status partial_success degraded_reason", "node <<'NODE' | tee /cache/HWLAB.last-flush.json", - "const payload = { event: 'git-mirror-flush', repo: process.env.repository, status: process.env.status || 'failed', partialSuccess: process.env.partial_success || null, degradedReason: process.env.degraded_reason || null, startedAt: process.env.started_at, flushedAt: new Date().toISOString(), gitopsBranch: process.env.gitops_branch, localGitops: process.env.local_gitops || null, githubGitops: process.env.github_gitops || null, pendingFlush: process.env.pending === 'true', stages: { push: process.env.push_status || null, pushExitCode: Number.parseInt(process.env.push_exit || '0', 10), postPushFetch: process.env.fetch_status || null, postPushFetchExitCode: Number.parseInt(process.env.fetch_exit || '0', 10) } };", + "const payload = { event: 'git-mirror-flush', repo: process.env.repository, status: process.env.status || 'failed', partialSuccess: process.env.partial_success || null, degradedReason: process.env.degraded_reason || null, startedAt: process.env.started_at, flushedAt: new Date().toISOString(), gitopsBranch: process.env.gitops_branch, localGitops: process.env.local_gitops || null, githubGitops: process.env.github_gitops || null, pendingFlush: process.env.pending === 'true', stages: { push: process.env.push_status || null, pushExitCode: Number.parseInt(process.env.push_exit || '0', 10), postPushFetch: process.env.fetch_status || null, postPushFetchExitCode: Number.parseInt(process.env.fetch_exit || '0', 10), postPushFetchAttempts: Number.parseInt(process.env.fetch_attempt || '0', 10), postPushFetchMaxAttempts: Number.parseInt(process.env.fetch_max_attempts || '0', 10) } };", "console.log(JSON.stringify(payload));", "NODE", "cat /cache/HWLAB.last-flush.json",