diff --git a/scripts/native/cicd/controller-one-shot.sh b/scripts/native/cicd/controller-one-shot.sh index c2b3d7e6..59c004f8 100644 --- a/scripts/native/cicd/controller-one-shot.sh +++ b/scripts/native/cicd/controller-one-shot.sh @@ -4,7 +4,7 @@ set -eu cd /work rm -rf /work/unidesk started_at=$(date -Iseconds) -echo "branch-follower one-shot started ${started_at}" +echo "branch-follower one-shot started ${started_at}" >&2 /etc/unidesk-cicd-branch-follower/sync-source.sh \ "${UNIDESK_CONTROLLER_SOURCE_REPOSITORY}" \ @@ -18,4 +18,4 @@ cd /work/unidesk "$@" -echo "branch-follower one-shot finished $(date -Iseconds)" +echo "branch-follower one-shot finished $(date -Iseconds)" >&2 diff --git a/scripts/src/cicd-branch-follower.ts b/scripts/src/cicd-branch-follower.ts index dea32118..6616291c 100644 --- a/scripts/src/cicd-branch-follower.ts +++ b/scripts/src/cicd-branch-follower.ts @@ -937,8 +937,10 @@ async function decideAndMaybeTrigger( if (options.confirm && (phase === "PendingTrigger" || phase === "Superseded" || (phase === "Observed" && observedSha !== null))) { const trigger = await executeTrigger(registry, follower, observedSha, options); triggerCommand = trigger.command; - phase = trigger.ok ? (options.wait ? "ClosingOut" : "Triggering") : "Failed"; - decision = trigger.ok ? `trigger submitted for ${shortSha(observedSha)}` : `trigger failed for ${shortSha(observedSha)}: ${redactText(trigger.message).slice(0, 220)}`; + const triggerStillRunning = trigger.ok && trigger.command.stillRunning === true; + const triggerBudgetAdvisory = trigger.ok && (trigger.command.budgetTimedOut === true || asOptionalRecord(trigger.command.closeout)?.timedOut === true); + phase = trigger.ok ? (triggerStillRunning ? "Triggering" : options.wait ? "ClosingOut" : "Triggering") : "Failed"; + decision = trigger.ok ? (triggerStillRunning || triggerBudgetAdvisory ? redactText(trigger.message).slice(0, 220) : `trigger submitted for ${shortSha(observedSha)}`) : `trigger failed for ${shortSha(observedSha)}: ${redactText(trigger.message).slice(0, 220)}`; inFlightJob = trigger.jobId ?? live.inFlightJob; lastTriggeredSha = observedSha; if (trigger.ok && options.wait && trigger.completed) { @@ -948,7 +950,7 @@ async function decideAndMaybeTrigger( targetSha = observedSha; lastSucceededSha = observedSha; } - if (!trigger.ok) warnings.push(trigger.message); + if (!trigger.ok || triggerStillRunning || triggerBudgetAdvisory) warnings.push(trigger.message); } if (options.confirm && options.wait && phase === "ClosingOut" && observedSha !== null && triggerCommand === undefined) { const closeout = await waitNativeFollowerCloseout(registry, follower, observedSha, options, options.timeoutSeconds ?? follower.budgets.endToEndSeconds); @@ -1329,15 +1331,16 @@ function nativeK8sStageFailure( message: string, startedAt?: number, ): TriggerResult { + const budgetPending = job.timedOut && !job.completed && !job.failed; const detail = [ - message, - job.timedOut ? "timedOut=true" : null, + budgetPending ? `${message}; native Job still running after advisory budget` : message, + job.timedOut ? (budgetPending ? "overBudget=true" : "timedOut=true") : null, job.conditionReason === null ? null : `reason=${job.conditionReason}`, job.conditionMessage === null ? null : `condition=${job.conditionMessage}`, - job.logsTail === null ? null : `logs=${tailText(job.logsTail, 500)}`, + budgetPending || job.logsTail === null ? null : `logs=${tailText(job.logsTail, 500)}`, ].filter((item): item is string => item !== null).join("; "); return { - ok: false, + ok: budgetPending, completed: false, message: detail, jobId: jobName, @@ -1347,9 +1350,12 @@ function nativeK8sStageFailure( phase, jobName, sourceCommit: observedSha, - ok: false, + ok: budgetPending, + completed: false, + stillRunning: budgetPending ? true : undefined, + budgetTimedOut: budgetPending ? true : undefined, startedAt: startedAt === undefined ? null : new Date(startedAt).toISOString(), - finishedAt: new Date().toISOString(), + finishedAt: budgetPending ? null : new Date().toISOString(), elapsedMs: startedAt === undefined ? job.elapsedMs : Date.now() - startedAt, payload, job, @@ -1407,6 +1413,7 @@ function nativeTektonTriggerResult(input: { }): TriggerResult { const pipelineRunCompleted = input.payload.completed === true; const failed = input.payload.failed === true || input.result.exitCode !== 0; + const closeoutHardFailed = nativeCloseoutHardFailed(input.closeout); const stillRunning = input.payload.stillRunning === true || input.payload.timedOutWait === true; const message = failed ? nativeTektonFailureText(input.payload, input.result) @@ -1419,8 +1426,8 @@ function nativeTektonTriggerResult(input: { : stillRunning ? `native PipelineRun ${input.pipelineRun} is still running; query status/events/logs for closeout` : `native PipelineRun ${input.pipelineRun} submitted`; - const ok = !failed && (input.closeout === null || input.closeout.completed === true); - const finishedAt = failed || input.result.timedOut || input.closeout?.completed === true || input.closeout?.timedOut === true + const ok = !failed && !closeoutHardFailed; + const finishedAt = failed || input.result.timedOut || closeoutHardFailed || input.closeout?.completed === true ? new Date().toISOString() : null; return { @@ -1452,6 +1459,10 @@ function nativeTektonTriggerResult(input: { }; } +function nativeCloseoutHardFailed(closeout: NativeCloseoutWaitResult | null): boolean { + return closeout !== null && closeout.ok === false && closeout.timedOut !== true; +} + function nativeTektonFailureText(payload: Record, result: CommandResult): string { return [ stringOrNull(payload.message), @@ -1522,8 +1533,9 @@ async function executeNativeSentinelTrigger(registry: BranchFollowerRegistry, fo : stillRunning ? `native sentinel PipelineRun ${pipelineRun} is still running; query status/events/logs for closeout` : `native sentinel PipelineRun ${pipelineRun} submitted`; - const ok = !failed && (closeout === null || closeout.completed === true); - const finishedAt = failed || result.timedOut || closeout?.completed === true || closeout?.timedOut === true + const closeoutHardFailed = nativeCloseoutHardFailed(closeout); + const ok = !failed && !closeoutHardFailed; + const finishedAt = failed || result.timedOut || closeoutHardFailed || closeout?.completed === true ? new Date().toISOString() : null; return { @@ -2255,6 +2267,7 @@ function compactStateCommand(command: Record | undefined): Reco payload, exitCode: numberOrNull(command.exitCode), timedOut: command.timedOut === true, + budgetTimedOut: command.budgetTimedOut === true ? true : undefined, statusAuthority: stringOrNull(command.statusAuthority), reconcileTimeline: compactReconcileTimeline(command.reconcileTimeline), parsedDownstreamCliOutput: false, @@ -2510,7 +2523,9 @@ function totalTimingFromCommand(command: Record | undefined, ph const exitCode = numberOrNull(command.exitCode); const status = command.ok === false || (exitCode !== null && exitCode !== 0) ? "failed" - : command.timedOut === true || closeout?.timedOut === true + : command.budgetTimedOut === true || closeout?.timedOut === true + ? "over-budget" + : command.timedOut === true ? "timed-out" : closeout?.completed === true || command.completed === true ? "completed" @@ -2648,7 +2663,7 @@ function stageTimingsFromCommand(command: Record | undefined): const gitMirrorFlush = asOptionalRecord(closeout.gitMirrorFlush); const gitMirrorFlushStage = k8sJobTiming("git-mirror-flush", asOptionalRecord(gitMirrorFlush?.result), stringOrNull(gitMirrorFlush?.jobName)); if (gitMirrorFlushStage !== null) stages.push(gitMirrorFlushStage); - const status = closeout.completed === true ? "completed" : closeout.timedOut === true ? "timed-out" : "pending"; + const status = closeout.completed === true ? "completed" : closeout.timedOut === true ? "over-budget" : "pending"; stages.push(stageTiming("closeout", status, secondsFromMs(numberOrNull(closeout.elapsedMs)), null, "k8s-native-closeout", stringOrNull(command.pipelineRun))); } return stages; @@ -2661,7 +2676,7 @@ function k8sJobTiming(stage: string, job: Record | null, object : job.failed === true ? "failed" : job.timedOut === true - ? "timed-out" + ? "over-budget" : "running"; return stageTiming(stage, status, secondsFromMs(numberOrNull(job.elapsedMs)), null, "kubernetes-job", objectOverride ?? stringOrNull(job.jobName)); } diff --git a/scripts/src/cicd-native.ts b/scripts/src/cicd-native.ts index fb27f5c3..acd407be 100644 --- a/scripts/src/cicd-native.ts +++ b/scripts/src/cicd-native.ts @@ -40,11 +40,12 @@ export function runNativeK8sJob(namespace: string, jobName: string, manifest: Re }, }); const parsed = parseJsonObject(result.stdout); + const nativeWaitTimedOut = parsed?.timedOut === true && parsed?.failed !== true; return { ok: result.exitCode === 0 && parsed?.ok === true, completed: parsed?.completed === true, - failed: parsed?.failed === true || result.exitCode !== 0, - timedOut: parsed?.timedOut === true || result.timedOut, + failed: parsed?.failed === true || (result.exitCode !== 0 && !nativeWaitTimedOut), + timedOut: nativeWaitTimedOut || result.timedOut, created: parsed?.created === true, reused: parsed?.reused === true, jobName, @@ -54,7 +55,7 @@ export function runNativeK8sJob(namespace: string, jobName: string, manifest: Re logsTail: stringOrNull(parsed?.logsTail), summary: asOptionalRecord(parsed?.summary), conditionReason: stringOrNull(parsed?.conditionReason), - conditionMessage: stringOrNull(parsed?.conditionMessage) ?? (result.exitCode === 0 ? null : tailText(result.stderr || result.stdout, 500)), + conditionMessage: stringOrNull(parsed?.conditionMessage) ?? (result.exitCode === 0 || nativeWaitTimedOut ? null : tailText(result.stderr || result.stdout, 500)), statusAuthority: "kubernetes-api-serviceaccount", parsedDownstreamCliOutput: false, };