From 17bf569eb8b37e10bba2f2c94c9cc05272bf26d5 Mon Sep 17 00:00:00 2001 From: Codex Date: Fri, 26 Jun 2026 13:07:07 +0000 Subject: [PATCH] fix(web-probe): bound sentinel control-plane wait --- .../cicd.auth-session-switch.d601-v03.yaml | 2 + .../cicd.d601-v03.yaml | 2 + .../PJ2026-01060508-web-probe-sentinel.md | 2 +- scripts/src/hwlab-node-web-sentinel-cicd.ts | 68 ++++++++++++------- scripts/src/hwlab-node-web-sentinel-config.ts | 3 +- 5 files changed, 50 insertions(+), 27 deletions(-) diff --git a/config/hwlab-web-probe-sentinel/cicd.auth-session-switch.d601-v03.yaml b/config/hwlab-web-probe-sentinel/cicd.auth-session-switch.d601-v03.yaml index 05396272..f8654edd 100644 --- a/config/hwlab-web-probe-sentinel/cicd.auth-session-switch.d601-v03.yaml +++ b/config/hwlab-web-probe-sentinel/cicd.auth-session-switch.d601-v03.yaml @@ -43,6 +43,8 @@ sentinel: maintenance: startCommand: sentinel maintenance start stopCommand: sentinel maintenance stop + confirmWait: + maxSeconds: 120 targetValidation: scenarioId: workbench-auth-session-switch-2users maxSeconds: 300 diff --git a/config/hwlab-web-probe-sentinel/cicd.d601-v03.yaml b/config/hwlab-web-probe-sentinel/cicd.d601-v03.yaml index 353bb729..63209e42 100644 --- a/config/hwlab-web-probe-sentinel/cicd.d601-v03.yaml +++ b/config/hwlab-web-probe-sentinel/cicd.d601-v03.yaml @@ -43,6 +43,8 @@ sentinel: maintenance: startCommand: sentinel maintenance start stopCommand: sentinel maintenance stop + confirmWait: + maxSeconds: 120 targetValidation: scenarioId: workbench-dsflash-go-tool-call-10x maxSeconds: 300 diff --git a/project-management/PJ2026-01/specs/PJ2026-01060508-web-probe-sentinel.md b/project-management/PJ2026-01/specs/PJ2026-01060508-web-probe-sentinel.md index c2977f1f..6e8781ab 100644 --- a/project-management/PJ2026-01/specs/PJ2026-01060508-web-probe-sentinel.md +++ b/project-management/PJ2026-01/specs/PJ2026-01060508-web-probe-sentinel.md @@ -505,7 +505,7 @@ HWLAB runtime 发布 Pipeline 应在 Argo sync 前调用当前哨兵 `maintenanc 哨兵服务不可用、首次安装未完成或配置未就绪时,CI/CD 必须结构化失败并输出缺失项、恢复建议和可重试命令;不得自动回退到原纯客户端 CLI、裸 Playwright、私有 API、read-side repair、reload 循环或 session repair 形成第二执行路径。人工排障可以显式运行原 `web-probe observe start/status/command/collect/analyze`,但不能被 targetValidation 当作自动通过证据。 -`sentinel validate --quick-verify --confirm --wait`、maintenance stop quick verify 和 control-plane targetValidation 的确认等待总耗时超过 120s 时,必须输出 warning,并在 quick verify run 摘要中记录可见警告。计时超限本身只作为非阻塞告警;只有真正影响 Code Agent 多轮业务链路、submit/command 执行、trace/final 可见性或 session 连续性的失败才构成 targetValidation blocker。control-plane publish/build 等非业务等待可通过 YAML 将确认等待预算放宽到 300s;不得通过减少业务轮次、吞掉 submit 失败、fallback 到第二执行路径或读侧 repair 来消除红灯。 +`web-probe sentinel control-plane trigger-current --confirm --wait` 只等待 source mirror、publish、flush、publicExposure、Argo 和 runtime observed 收敛;CI/CD confirm-wait 超过 YAML `confirmWait.maxSeconds`(当前 120s)时必须输出 warning,并先优化等待阶段耗时,不得继续把长业务验证塞在部署同步路径里死等。`sentinel validate --quick-verify --confirm --wait` 和 maintenance stop quick verify 才执行 targetValidation 业务验证;业务 quick verify 可通过 YAML `targetValidation.maxSeconds` 放宽到 300s。计时超限本身只作为非阻塞告警;只有真正影响 Code Agent 多轮业务链路、submit/command 执行、trace/final 可见性或 session 连续性的失败才构成 targetValidation blocker。不得通过减少业务轮次、吞掉 submit 失败、fallback 到第二执行路径或读侧 repair 来消除红灯。 ### 6.6 OPS-SENTINEL-REQ-006 dsflash-go 十轮 canary diff --git a/scripts/src/hwlab-node-web-sentinel-cicd.ts b/scripts/src/hwlab-node-web-sentinel-cicd.ts index 2662508f..8c335b82 100644 --- a/scripts/src/hwlab-node-web-sentinel-cicd.ts +++ b/scripts/src/hwlab-node-web-sentinel-cicd.ts @@ -261,6 +261,8 @@ function runSentinelControlPlane(state: SentinelCicdState, options: Extract = {}; @@ -1163,8 +1160,8 @@ function runSentinelPublishJob(state: SentinelCicdState, publishGitops: boolean, } sentinelProgressEvent("sentinel.publish.progress", { phase: "create-job", status: "succeeded", jobName, publishGitops, node: state.spec.nodeId, lane: state.spec.lane }); const startedAt = Date.now(); - const timeoutMs = Math.max(30_000, Math.min(timeoutSeconds * 1000, 900_000)); - const warningBudgetMs = Math.max(1, Math.trunc(numberAt(state.cicd, "targetValidation.maxSeconds"))) * 1000; + const timeoutMs = Math.max(30_000, Math.min(timeoutSeconds * 1000, controlPlaneWaitWarningSeconds(state) * 1000)); + const warningBudgetMs = Math.max(1, Math.trunc(controlPlaneWaitWarningSeconds(state))) * 1000; let slowWarningSent = false; let polls = 0; let lastProbe: Record = {}; @@ -1419,6 +1416,23 @@ function sentinelElapsedWarnings(value: unknown, subject = "sentinel confirmed o return [`${subject} exceeded configured ${Math.round(budgetMs / 1000)}s timing budget (${Math.round(elapsedMs / 1000)}s); non-blocking timing alert, investigate wait-stage latency without treating timing alone as HWLAB business blockage.`]; } +function controlPlaneWaitWarningSeconds(state: SentinelCicdState): number { + return numberAt(state.cicd, "confirmWait.maxSeconds"); +} + +function sentinelCicdElapsedWarnings(value: unknown, subject: string, budgetSeconds: number): string[] { + const elapsedMs = typeof value === "number" && Number.isFinite(value) ? value : null; + const budgetMs = Math.max(1, Math.trunc(budgetSeconds)) * 1000; + if (elapsedMs === null || elapsedMs <= budgetMs) return []; + return [`${subject} exceeded configured ${Math.round(budgetMs / 1000)}s CI/CD wait budget (${Math.round(elapsedMs / 1000)}s); optimize wait-stage latency before rerunning long confirm-wait operations.`]; +} + +function targetValidationDeferredWarnings(state: SentinelCicdState, applyOnly: boolean, budgetSeconds: number): string[] { + if (applyOnly) return []; + const next = sentinelP5Next(state); + return [`targetValidation quick verify is deferred from control-plane confirm-wait to keep CI/CD wait under ${Math.round(budgetSeconds)}s; run ${next.quickVerify}.`]; +} + function targetValidationElapsedWarnings(value: unknown, subject: string, budgetSeconds: number): string[] { const elapsedMs = typeof value === "number" && Number.isFinite(value) ? value : null; const budgetMs = Math.max(1, Math.trunc(budgetSeconds)) * 1000; @@ -1473,6 +1487,8 @@ function controlPlaneNext(state: SentinelCicdState, action: WebProbeSentinelCont status: `bun scripts/cli.ts web-probe sentinel control-plane status --node ${node} --lane ${lane}${suffix}`, image: `bun scripts/cli.ts web-probe sentinel image status --node ${node} --lane ${lane}${suffix}`, triggerCurrent: `bun scripts/cli.ts web-probe sentinel control-plane trigger-current --node ${node} --lane ${lane}${suffix} --dry-run`, + validate: `bun scripts/cli.ts web-probe sentinel validate --node ${node} --lane ${lane}${suffix}`, + quickVerify: `bun scripts/cli.ts web-probe sentinel validate --node ${node} --lane ${lane}${suffix} --quick-verify --confirm --wait`, issue: "https://github.com/pikasTech/unidesk/issues/889", currentAction: action, }; @@ -2969,7 +2985,7 @@ function renderControlPlaneResult(result: Record): string { "", table(["GITOPS_PATH", "ARGO_APP", "TARGET_REV", "OBJECTS"], [[gitops.path, argo.applicationName, gitops.targetRevision, gitops.manifestObjects]]), "", - table(["SCENARIO", "MAX_SECONDS", "SECOND_PATH"], [[validation.scenarioId, validation.maxSeconds, validation.automaticSecondPath]]), + table(["SCENARIO", "MAX_SECONDS", "CI_WAIT", "QVERIFY", "SECOND_PATH"], [[validation.scenarioId, validation.maxSeconds, validation.controlPlaneWaitMaxSeconds ?? "-", validation.quickVerifyMode ?? "-", validation.automaticSecondPath]]), "", renderObservedStatus(observed), "", @@ -3007,6 +3023,8 @@ function renderControlPlaneResult(result: Record): string { ` status: ${next.status ?? "-"}`, ` image: ${next.image ?? "-"}`, ` trigger-current: ${next.triggerCurrent ?? "-"}`, + ` validate: ${next.validate ?? "-"}`, + ` quick-verify: ${next.quickVerify ?? "-"}`, "", "DISCLOSURE", " default view is a bounded CI/CD summary; full manifest content is represented by object counts and sha256.", diff --git a/scripts/src/hwlab-node-web-sentinel-config.ts b/scripts/src/hwlab-node-web-sentinel-config.ts index 5f84bdf1..b3caa6a8 100644 --- a/scripts/src/hwlab-node-web-sentinel-config.ts +++ b/scripts/src/hwlab-node-web-sentinel-config.ts @@ -150,6 +150,7 @@ const REQUIRED_TARGET_SHAPES: Record