fix(web-probe): bound sentinel control-plane wait

This commit is contained in:
Codex
2026-06-26 13:07:07 +00:00
parent 9730971297
commit 17bf569eb8
5 changed files with 50 additions and 27 deletions
@@ -43,6 +43,8 @@ sentinel:
maintenance:
startCommand: sentinel maintenance start
stopCommand: sentinel maintenance stop
confirmWait:
maxSeconds: 120
targetValidation:
scenarioId: workbench-auth-session-switch-2users
maxSeconds: 300
@@ -43,6 +43,8 @@ sentinel:
maintenance:
startCommand: sentinel maintenance start
stopCommand: sentinel maintenance stop
confirmWait:
maxSeconds: 120
targetValidation:
scenarioId: workbench-dsflash-go-tool-call-10x
maxSeconds: 300
@@ -505,7 +505,7 @@ HWLAB runtime 发布 Pipeline 应在 Argo sync 前调用当前哨兵 `maintenanc
哨兵服务不可用、首次安装未完成或配置未就绪时,CI/CD 必须结构化失败并输出缺失项、恢复建议和可重试命令;不得自动回退到原纯客户端 CLI、裸 Playwright、私有 API、read-side repair、reload 循环或 session repair 形成第二执行路径。人工排障可以显式运行原 `web-probe observe start/status/command/collect/analyze`,但不能被 targetValidation 当作自动通过证据。
`sentinel validate --quick-verify --confirm --wait`maintenance stop quick verify 和 control-plane targetValidation 的确认等待总耗时超过 120s 时,必须输出 warning,并在 quick verify run 摘要中记录可见警告。计时超限本身只作为非阻塞告警;只有真正影响 Code Agent 多轮业务链路、submit/command 执行、trace/final 可见性或 session 连续性的失败才构成 targetValidation blocker。control-plane publish/build 等非业务等待可通过 YAML 将确认等待预算放宽到 300s;不得通过减少业务轮次、吞掉 submit 失败、fallback 到第二执行路径或读侧 repair 来消除红灯。
`web-probe sentinel control-plane trigger-current --confirm --wait` 只等待 source mirror、publish、flush、publicExposure、Argo 和 runtime observed 收敛;CI/CD confirm-wait 超过 YAML `confirmWait.maxSeconds`(当前 120s)时必须输出 warning,并先优化等待阶段耗时,不得继续把长业务验证塞在部署同步路径里死等。`sentinel validate --quick-verify --confirm --wait`maintenance stop quick verify 才执行 targetValidation 业务验证;业务 quick verify 可通过 YAML `targetValidation.maxSeconds` 放宽到 300s。计时超限本身只作为非阻塞告警;只有真正影响 Code Agent 多轮业务链路、submit/command 执行、trace/final 可见性或 session 连续性的失败才构成 targetValidation blocker。不得通过减少业务轮次、吞掉 submit 失败、fallback 到第二执行路径或读侧 repair 来消除红灯。
### 6.6 OPS-SENTINEL-REQ-006 dsflash-go 十轮 canary
+43 -25
View File
@@ -261,6 +261,8 @@ function runSentinelControlPlane(state: SentinelCicdState, options: Extract<WebP
validation: {
scenarioId: stringAt(state.cicd, "targetValidation.scenarioId"),
maxSeconds: numberAt(state.cicd, "targetValidation.maxSeconds"),
controlPlaneWaitMaxSeconds: controlPlaneWaitWarningSeconds(state),
quickVerifyMode: "manual-validate",
automaticSecondPath: false,
},
manifests: {
@@ -580,6 +582,7 @@ function runSentinelImageBuildConfirmed(state: SentinelCicdState, options: Extra
const registryReady = record(registry.probe).present === true;
const ok = state.configReady && state.sourceHead.ok && sourceMirrorSync.ok === true && publish.ok === true && registryReady;
const elapsedMs = Date.now() - startedAt;
const cicdWaitWarningSeconds = controlPlaneWaitWarningSeconds(state);
const result = {
ok,
command,
@@ -595,9 +598,9 @@ function runSentinelImageBuildConfirmed(state: SentinelCicdState, options: Extra
publish,
elapsedMs,
warnings: [
...sentinelElapsedWarnings(elapsedMs, "sentinel confirmed operation", numberAt(state.cicd, "targetValidation.maxSeconds")),
...sentinelElapsedWarnings(record(sourceMirrorSync).elapsedMs, "sentinel source mirror sync", numberAt(state.cicd, "targetValidation.maxSeconds")),
...sentinelElapsedWarnings(record(publish).elapsedMs, "sentinel publish", numberAt(state.cicd, "targetValidation.maxSeconds")),
...sentinelCicdElapsedWarnings(elapsedMs, "sentinel image build confirm-wait", cicdWaitWarningSeconds),
...sentinelCicdElapsedWarnings(record(sourceMirrorSync).elapsedMs, "sentinel source mirror sync", cicdWaitWarningSeconds),
...sentinelCicdElapsedWarnings(record(publish).elapsedMs, "sentinel publish", cicdWaitWarningSeconds),
],
blocker: ok
? null
@@ -632,18 +635,8 @@ function runSentinelControlPlaneConfirmed(state: SentinelCicdState, options: Ext
const argoApply = applySentinelArgoApplication(state, options.timeoutSeconds);
const observed = waitForSentinelObservedStatus(state, options.timeoutSeconds);
const observedReady = sentinelObservedReady(observed);
const targetValidation = applyOnly
? null
: observedReady
? runSentinelQuickVerify(state, "control-plane-target-validation", options.timeoutSeconds)
: {
ok: false,
status: "blocked",
scenarioId: stringAt(state.cicd, "targetValidation.scenarioId"),
reason: "runtime-not-ready",
valuesRedacted: true,
};
const targetValidationBlocked = !applyOnly && record(targetValidation).ok !== true;
const targetValidation = null;
const targetValidationBlocked = false;
const ok = state.configReady
&& state.sourceHead.ok
&& (applyOnly || record(sourceMirrorSync).ok === true)
@@ -653,6 +646,7 @@ function runSentinelControlPlaneConfirmed(state: SentinelCicdState, options: Ext
&& record(argoApply).ok === true
&& observedReady;
const elapsedMs = Date.now() - startedAt;
const cicdWaitWarningSeconds = controlPlaneWaitWarningSeconds(state);
const blocker = ok ? null : {
code: record(sourceMirrorSync).ok === false ? "sentinel-source-mirror-sync-failed" : "sentinel-control-plane-not-ready",
reason: record(sourceMirrorSync).ok === false
@@ -684,6 +678,8 @@ function runSentinelControlPlaneConfirmed(state: SentinelCicdState, options: Ext
validation: {
scenarioId: stringAt(state.cicd, "targetValidation.scenarioId"),
maxSeconds: numberAt(state.cicd, "targetValidation.maxSeconds"),
controlPlaneWaitMaxSeconds: cicdWaitWarningSeconds,
quickVerifyMode: applyOnly ? "not-applicable" : "manual-validate",
automaticSecondPath: false,
},
manifests: {
@@ -699,10 +695,11 @@ function runSentinelControlPlaneConfirmed(state: SentinelCicdState, options: Ext
targetValidation,
elapsedMs,
warnings: Array.from(new Set([
...sentinelElapsedWarnings(elapsedMs, "sentinel confirmed operation", numberAt(state.cicd, "targetValidation.maxSeconds")),
...sentinelElapsedWarnings(record(sourceMirrorSync).elapsedMs, "sentinel source mirror sync", numberAt(state.cicd, "targetValidation.maxSeconds")),
...sentinelElapsedWarnings(record(publish).elapsedMs, "sentinel publish", numberAt(state.cicd, "targetValidation.maxSeconds")),
...sentinelElapsedWarnings(record(flush).result === undefined ? null : record(record(flush).result).durationMs, "sentinel git-mirror flush", numberAt(state.cicd, "targetValidation.maxSeconds")),
...sentinelCicdElapsedWarnings(elapsedMs, "sentinel control-plane confirm-wait", cicdWaitWarningSeconds),
...sentinelCicdElapsedWarnings(record(sourceMirrorSync).elapsedMs, "sentinel source mirror sync", cicdWaitWarningSeconds),
...sentinelCicdElapsedWarnings(record(publish).elapsedMs, "sentinel publish", cicdWaitWarningSeconds),
...sentinelCicdElapsedWarnings(record(flush).result === undefined ? null : record(record(flush).result).durationMs, "sentinel git-mirror flush", cicdWaitWarningSeconds),
...targetValidationDeferredWarnings(state, applyOnly, cicdWaitWarningSeconds),
...(Array.isArray(record(targetValidation).warnings) ? record(targetValidation).warnings.map(text) : []),
...(targetValidationBlocked ? ["targetValidation is blocked; top-level STATUS only covers sentinel control-plane rollout. HWLAB business recovery remains pending; rerun quick verify after internal DB switch completes, without public fallback or a second execution path."] : []),
])),
@@ -756,7 +753,7 @@ function collectSentinelObservedStatus(state: SentinelCicdState, timeoutSeconds:
function waitForSentinelObservedStatus(state: SentinelCicdState, timeoutSeconds: number, expectation?: SentinelObservedExpectation): SentinelObservedStatus {
const startedAt = Date.now();
const timeoutMs = Math.max(30_000, Math.min(timeoutSeconds * 1000, 900_000));
const timeoutMs = Math.max(30_000, Math.min(timeoutSeconds * 1000, controlPlaneWaitWarningSeconds(state) * 1000));
let observed = collectSentinelObservedStatus(state, timeoutSeconds, expectation);
while (!sentinelObservedReady(observed) && Date.now() - startedAt < timeoutMs) {
runCommand(["sleep", "5"], repoRoot, { timeoutMs: 6_000 });
@@ -920,8 +917,8 @@ function runSentinelSourceMirrorSyncJob(state: SentinelCicdState, timeoutSeconds
return { ok: false, phase: "create-job", jobName, payload: { ok: false, status: "create-failed", valuesRedacted: true }, create: compactCommand(created), valuesRedacted: true };
}
const startedAt = Date.now();
const timeoutMs = Math.max(30_000, Math.min(timeoutSeconds * 1000, 900_000));
const warningBudgetMs = Math.max(1, Math.trunc(numberAt(state.cicd, "targetValidation.maxSeconds"))) * 1000;
const timeoutMs = Math.max(30_000, Math.min(timeoutSeconds * 1000, controlPlaneWaitWarningSeconds(state) * 1000));
const warningBudgetMs = Math.max(1, Math.trunc(controlPlaneWaitWarningSeconds(state))) * 1000;
let slowWarningSent = false;
let polls = 0;
let lastProbe: Record<string, unknown> = {};
@@ -1163,8 +1160,8 @@ function runSentinelPublishJob(state: SentinelCicdState, publishGitops: boolean,
}
sentinelProgressEvent("sentinel.publish.progress", { phase: "create-job", status: "succeeded", jobName, publishGitops, node: state.spec.nodeId, lane: state.spec.lane });
const startedAt = Date.now();
const timeoutMs = Math.max(30_000, Math.min(timeoutSeconds * 1000, 900_000));
const warningBudgetMs = Math.max(1, Math.trunc(numberAt(state.cicd, "targetValidation.maxSeconds"))) * 1000;
const timeoutMs = Math.max(30_000, Math.min(timeoutSeconds * 1000, controlPlaneWaitWarningSeconds(state) * 1000));
const warningBudgetMs = Math.max(1, Math.trunc(controlPlaneWaitWarningSeconds(state))) * 1000;
let slowWarningSent = false;
let polls = 0;
let lastProbe: Record<string, unknown> = {};
@@ -1419,6 +1416,23 @@ function sentinelElapsedWarnings(value: unknown, subject = "sentinel confirmed o
return [`${subject} exceeded configured ${Math.round(budgetMs / 1000)}s timing budget (${Math.round(elapsedMs / 1000)}s); non-blocking timing alert, investigate wait-stage latency without treating timing alone as HWLAB business blockage.`];
}
function controlPlaneWaitWarningSeconds(state: SentinelCicdState): number {
return numberAt(state.cicd, "confirmWait.maxSeconds");
}
function sentinelCicdElapsedWarnings(value: unknown, subject: string, budgetSeconds: number): string[] {
const elapsedMs = typeof value === "number" && Number.isFinite(value) ? value : null;
const budgetMs = Math.max(1, Math.trunc(budgetSeconds)) * 1000;
if (elapsedMs === null || elapsedMs <= budgetMs) return [];
return [`${subject} exceeded configured ${Math.round(budgetMs / 1000)}s CI/CD wait budget (${Math.round(elapsedMs / 1000)}s); optimize wait-stage latency before rerunning long confirm-wait operations.`];
}
function targetValidationDeferredWarnings(state: SentinelCicdState, applyOnly: boolean, budgetSeconds: number): string[] {
if (applyOnly) return [];
const next = sentinelP5Next(state);
return [`targetValidation quick verify is deferred from control-plane confirm-wait to keep CI/CD wait under ${Math.round(budgetSeconds)}s; run ${next.quickVerify}.`];
}
function targetValidationElapsedWarnings(value: unknown, subject: string, budgetSeconds: number): string[] {
const elapsedMs = typeof value === "number" && Number.isFinite(value) ? value : null;
const budgetMs = Math.max(1, Math.trunc(budgetSeconds)) * 1000;
@@ -1473,6 +1487,8 @@ function controlPlaneNext(state: SentinelCicdState, action: WebProbeSentinelCont
status: `bun scripts/cli.ts web-probe sentinel control-plane status --node ${node} --lane ${lane}${suffix}`,
image: `bun scripts/cli.ts web-probe sentinel image status --node ${node} --lane ${lane}${suffix}`,
triggerCurrent: `bun scripts/cli.ts web-probe sentinel control-plane trigger-current --node ${node} --lane ${lane}${suffix} --dry-run`,
validate: `bun scripts/cli.ts web-probe sentinel validate --node ${node} --lane ${lane}${suffix}`,
quickVerify: `bun scripts/cli.ts web-probe sentinel validate --node ${node} --lane ${lane}${suffix} --quick-verify --confirm --wait`,
issue: "https://github.com/pikasTech/unidesk/issues/889",
currentAction: action,
};
@@ -2969,7 +2985,7 @@ function renderControlPlaneResult(result: Record<string, unknown>): string {
"",
table(["GITOPS_PATH", "ARGO_APP", "TARGET_REV", "OBJECTS"], [[gitops.path, argo.applicationName, gitops.targetRevision, gitops.manifestObjects]]),
"",
table(["SCENARIO", "MAX_SECONDS", "SECOND_PATH"], [[validation.scenarioId, validation.maxSeconds, validation.automaticSecondPath]]),
table(["SCENARIO", "MAX_SECONDS", "CI_WAIT", "QVERIFY", "SECOND_PATH"], [[validation.scenarioId, validation.maxSeconds, validation.controlPlaneWaitMaxSeconds ?? "-", validation.quickVerifyMode ?? "-", validation.automaticSecondPath]]),
"",
renderObservedStatus(observed),
"",
@@ -3007,6 +3023,8 @@ function renderControlPlaneResult(result: Record<string, unknown>): string {
` status: ${next.status ?? "-"}`,
` image: ${next.image ?? "-"}`,
` trigger-current: ${next.triggerCurrent ?? "-"}`,
` validate: ${next.validate ?? "-"}`,
` quick-verify: ${next.quickVerify ?? "-"}`,
"",
"DISCLOSURE",
" default view is a bounded CI/CD summary; full manifest content is represented by object counts and sha256.",
@@ -150,6 +150,7 @@ const REQUIRED_TARGET_SHAPES: Record<HwlabRuntimeWebProbeSentinelConfigRefKey, R
"image.envRecipeRef",
"maintenance.startCommand",
"maintenance.stopCommand",
"confirmWait.maxSeconds",
"targetValidation.scenarioId",
"targetValidation.maxSeconds",
"targetValidation.serviceUnavailablePolicy",
@@ -416,7 +417,7 @@ function summarizeTarget(key: HwlabRuntimeWebProbeSentinelConfigRefKey, target:
if (key === "promptSet") return `id=${textAt(target, "id")} provider=${textAt(target, "providerProfile")} prompts=${textAt(target, "promptCount")} markers=${arrayAt(target, "expectedMarkers").slice(0, 12).join(",") || "-"} source=${textAt(target, "promptSourceRef")}:${textAt(target, "promptSourceKey")}`;
if (key === "reportViews") return `default=${textAt(target, "defaultView")} views=${arrayAt(target, "views").length}`;
if (key === "publicExposure") return `enabled=${textAt(target, "enabled")} mode=${textAt(target, "mode")} url=${textAt(target, "publicBaseUrl")}`;
if (key === "cicd") return `gitops=${textAt(target, "gitopsPath")} image=${textAt(target, "image.repository")}:${textAt(target, "image.tagSource")}`;
if (key === "cicd") return `gitops=${textAt(target, "gitopsPath")} image=${textAt(target, "image.repository")}:${textAt(target, "image.tagSource")} confirmWait=${textAt(target, "confirmWait.maxSeconds")} targetValidation=${textAt(target, "targetValidation.maxSeconds")}`;
if (key === "secrets") return `sources=${arrayAt(target, "sources").length} runtimeSecrets=${arrayAt(target, "runtimeSecrets").length}`;
return `keys=${Object.keys(target).length}`;
}