From 542cf37935c9a66677451df8469f066f27bd71d1 Mon Sep 17 00:00:00 2001 From: Codex Date: Wed, 17 Jun 2026 15:48:34 +0000 Subject: [PATCH] fix: extend hwlab web probe sampling timeout --- docs/reference/cli.md | 2 +- scripts/src/hwlab-node.ts | 141 +++++++++++++++++++++++++++++++++++--- 2 files changed, 132 insertions(+), 11 deletions(-) diff --git a/docs/reference/cli.md b/docs/reference/cli.md index be04259d..5569b8c3 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -22,7 +22,7 @@ CI/CD、GitOps、rollout、artifact 发布、PR 合并后的 runtime lane 滚动 G14/D601 v03 的 bootstrap admin password 是 HWLAB runtime Secret 生命周期的一部分,必须收敛到 `config/hwlab-node-lanes.yaml` 的 `bootstrapAdmin` 声明与受控 `hwlab nodes secret status|ensure --node --lane v03 --name hwlab-v03-bootstrap-admin` CLI。明文只能存在于 Git 忽略、owner-only 的 `.state/secrets/...` sourceRef 文件;CLI 在本地把明文转换为 HWLAB 兼容 password hash,只向运行面同步 `password-hash`,并在输出中只披露 sourceRef、sourceKey、target Secret/key、presence、byte count、fingerprint、mutation 与后续命令。`secret ensure --force` 只用于明确需要按 YAML sourceRef 重灌 bootstrap admin hash 并重启 Cloud API 的受控恢复场景,默认 ensure 不做强制重灌;不要把人工生成 hash、手工写 k8s Secret 或原生 `kubectl rollout` 沉淀为长期入口。 -`hwlab nodes web-probe run --node --lane [--url ]` 是 HWLAB Cloud Web DOM probe 的受控指挥入口。它从 `config/hwlab-node-lanes.yaml` 解析目标 workspace、public URL 和 bootstrap admin sourceRef,在 UniDesk 指挥侧读取 owner-only 明文后只通过一次性 stdin/env 注入目标 workspace 的 `scripts/web-live-dom-probe.mjs`;stdout 只披露 sourceRef、sourceKey、presence、fingerprint、注入方式、DOM 摘要和 artifact hash,不打印密码。缺少 sourceRef 或 source 文件时应结构化返回 `web_login_secret_missing`,不能回退历史默认密码或要求把 secret 复制到 D601/G14 目标 host。Code Agent Trace 实时性验收使用 `--trace-sample-count ` 和 `--trace-sample-interval-ms ` 透传到目标 helper,输出每次采样的 agent status、trace presence/status、row count、empty label 和最新 row preview,用于证明运行中渐进拉取;这类采样不能由终态截图替代。需要自定义 Playwright route/intercept、in-flight DOM 读取或专用截图时,使用 `hwlab nodes web-probe script --node --lane <<'JS' ... JS`,由 CLI 负责同一 sourceRef 凭据解析、`/auth/login` 建立 `hwlab_session`、已认证 `browser/context/page/baseUrl` 注入和 artifact path/hash 摘要;自定义脚本不得自行读取或打印 Web 登录凭据。`web-probe script` 托管登录先对同源 `/auth/login` 做短重试;仍未拿到 `hwlab_session` 时自动回到当前 Cloud Web 登录表单,以浏览器方式提交同一凭据。`probe.auth` 只输出 method、origin、loginPath、status、attempts、retryCount、fallbackUsed、fallback、retryable、transientObserved、fingerprint、commanderAction 和 redacted errorSummary,不打印密码、cookie 或可复制 session 值。 +`hwlab nodes web-probe run --node --lane [--url ]` 是 HWLAB Cloud Web DOM probe 的受控指挥入口。它从 `config/hwlab-node-lanes.yaml` 解析目标 workspace、public URL 和 bootstrap admin sourceRef,在 UniDesk 指挥侧读取 owner-only 明文后只通过一次性 stdin/env 注入目标 workspace 的 `scripts/web-live-dom-probe.mjs`;stdout 只披露 sourceRef、sourceKey、presence、fingerprint、注入方式、DOM 摘要和 artifact hash,不打印密码。缺少 sourceRef 或 source 文件时应结构化返回 `web_login_secret_missing`,不能回退历史默认密码或要求把 secret 复制到 D601/G14 目标 host。Code Agent Trace 实时性验收使用 `--trace-sample-count ` 和 `--trace-sample-interval-ms ` 透传到目标 helper,输出每次采样的 agent status、trace presence/status、row count、empty label 和最新 row preview,用于证明运行中渐进拉取;这类采样不能由终态截图替代。CLI 默认按 trace 采样窗口、terminal wait 和页面 timeout 自动扩展短连接 `commandTimeout`,显式 `--command-timeout-seconds` 只允许延长该预算,不应把 90-180s 采样压回 60s。需要自定义 Playwright route/intercept、in-flight DOM 读取或专用截图时,使用 `hwlab nodes web-probe script --node --lane <<'JS' ... JS`,由 CLI 负责同一 sourceRef 凭据解析、`/auth/login` 建立 `hwlab_session`、已认证 `browser/context/page/baseUrl` 注入和 artifact path/hash 摘要;自定义脚本不得自行读取或打印 Web 登录凭据。`web-probe script` 托管登录先对同源 `/auth/login` 做短重试;仍未拿到 `hwlab_session` 时自动回到当前 Cloud Web 登录表单,以浏览器方式提交同一凭据。`probe.auth` 只输出 method、origin、loginPath、status、attempts、retryCount、fallbackUsed、fallback、retryable、transientObserved、fingerprint、commanderAction 和 redacted errorSummary,不打印密码、cookie 或可复制 session 值。 `web-probe script` 的默认 `goto('/workbench')` 是稳定导航边界:它会先复用当前 page,失败后有限次切 fresh page 重试,并等待 workbench 基础 DOM(默认 `#workspace` 和 `#command-input`)可见;需要显式控制时使用注入的 `gotoStable(target, { selectors, activeSelector, attempts, readinessTimeoutMs })`、`waitWorkbenchReady({ selectors })`、`waitForReady({ selectors })`、`gotoRaw()` 和 `getPage()`。稳定化失败必须在 `probe.readiness` 中低噪声披露 attempt、阶段、selector、是否观察到 `/v1` API request、API failure 摘要和失败截图 artifact;分类值固定为 `browser-load-jitter`、`selector-timeout`、`api-not-sent`、`api-response-failed`,避免把“页面没准备好/请求未发出”和“后端响应失败”混成同一种 selector timeout。runner 不在用户脚本执行前抢先导航同一 page,保证脚本仍可先安装 `page.route` 或 context route;如重试切换 fresh page,后续脚本应通过 `gotoStable()` 返回值或 `getPage()` 取得当前 page。 diff --git a/scripts/src/hwlab-node.ts b/scripts/src/hwlab-node.ts index 8b1659d2..a07ae40a 100644 --- a/scripts/src/hwlab-node.ts +++ b/scripts/src/hwlab-node.ts @@ -32,6 +32,8 @@ interface NodeWebProbeRunOptions { freshSession: boolean; cancelRunning: boolean; commandTimeoutSeconds: number; + commandTimeoutAutoSeconds: number; + commandTimeoutUserProvided: boolean; } interface NodeWebProbeScriptOptions { @@ -4784,14 +4786,16 @@ function parseNodeWebProbeOptions(args: string[]): NodeWebProbeOptions { } const scriptText = scriptFile === undefined ? readFileSync(0, "utf8") : readFileSync(scriptFile, "utf8"); if (scriptText.trim().length === 0) throw new Error("web-probe script received an empty script"); + const timeoutMs = positiveIntegerOption(args, "--timeout-ms", 30000, 60000); + const commandTimeoutSeconds = positiveIntegerOption(args, "--command-timeout-seconds", Math.max(60, Math.ceil(timeoutMs / 1000) + 30), 3600); return { action: "script", node, lane, url: optionValue(args, "--url") ?? spec.publicWebUrl, - timeoutMs: positiveIntegerOption(args, "--timeout-ms", 30000, 60000), + timeoutMs, viewport: optionValue(args, "--viewport") ?? "1440x900", - commandTimeoutSeconds: positiveIntegerOption(args, "--command-timeout-seconds", 60, 60), + commandTimeoutSeconds, scriptText, scriptSource: { kind: scriptFile === undefined ? "stdin" : "file", @@ -4818,25 +4822,69 @@ function parseNodeWebProbeOptions(args: string[]): NodeWebProbeOptions { "--fresh-session", "--no-cancel-running", ])); + const timeoutMs = positiveIntegerOption(args, "--timeout-ms", 30000, 60000); + const waitAfterSubmitMs = positiveIntegerOption(args, "--wait-after-submit-ms", 1500, 60000); + const waitMessagesMs = positiveIntegerOption(args, "--wait-messages-ms", 2500, 60000); + const waitAgentTerminalMs = positiveIntegerOption(args, "--wait-agent-terminal-ms", 0, 600000); + const traceSampleCount = positiveIntegerOption(args, "--trace-sample-count", 0, 200); + const traceSampleIntervalMs = positiveIntegerOption(args, "--trace-sample-interval-ms", 0, 60000); + const commandTimeoutAutoSeconds = nodeWebProbeAutoCommandTimeoutSeconds({ + timeoutMs, + waitAfterSubmitMs, + waitMessagesMs, + waitAgentTerminalMs, + traceSampleCount, + traceSampleIntervalMs, + freshSession: args.includes("--fresh-session"), + hasMessage: optionValue(args, "--message") !== undefined, + }); + const commandTimeoutRaw = optionValue(args, "--command-timeout-seconds"); + const commandTimeoutUserProvided = commandTimeoutRaw !== undefined; + const commandTimeoutSeconds = commandTimeoutUserProvided + ? Math.max(positiveIntegerOption(args, "--command-timeout-seconds", commandTimeoutAutoSeconds, 3600), commandTimeoutAutoSeconds) + : commandTimeoutAutoSeconds; return { action: "run", node, lane, url: optionValue(args, "--url") ?? spec.publicWebUrl, - timeoutMs: positiveIntegerOption(args, "--timeout-ms", 30000, 60000), - waitAfterSubmitMs: positiveIntegerOption(args, "--wait-after-submit-ms", 1500, 60000), - waitMessagesMs: positiveIntegerOption(args, "--wait-messages-ms", 2500, 60000), - waitAgentTerminalMs: positiveIntegerOption(args, "--wait-agent-terminal-ms", 0, 600000), - traceSampleCount: positiveIntegerOption(args, "--trace-sample-count", 0, 200), - traceSampleIntervalMs: positiveIntegerOption(args, "--trace-sample-interval-ms", 0, 60000), + timeoutMs, + waitAfterSubmitMs, + waitMessagesMs, + waitAgentTerminalMs, + traceSampleCount, + traceSampleIntervalMs, message: optionValue(args, "--message") ?? null, conversationId: optionValue(args, "--conversation-id") ?? null, freshSession: args.includes("--fresh-session"), cancelRunning: !args.includes("--no-cancel-running"), - commandTimeoutSeconds: positiveIntegerOption(args, "--command-timeout-seconds", 60, 60), + commandTimeoutSeconds, + commandTimeoutAutoSeconds, + commandTimeoutUserProvided, }; } +function nodeWebProbeAutoCommandTimeoutSeconds(input: { + timeoutMs: number; + waitAfterSubmitMs: number; + waitMessagesMs: number; + waitAgentTerminalMs: number; + traceSampleCount: number; + traceSampleIntervalMs: number; + freshSession: boolean; + hasMessage: boolean; +}): number { + const traceWindowMs = input.traceSampleCount > 0 + ? Math.max(0, input.traceSampleCount - 1) * input.traceSampleIntervalMs + : 0; + const startupBudgetMs = input.timeoutMs + 30_000; + const freshnessBudgetMs = input.freshSession ? Math.min(input.timeoutMs, 30_000) : 0; + const submitBudgetMs = input.hasMessage ? input.waitAfterSubmitMs + input.waitMessagesMs + 15_000 : 0; + const terminalBudgetMs = input.waitAgentTerminalMs > 0 ? input.waitAgentTerminalMs : 0; + const totalMs = startupBudgetMs + freshnessBudgetMs + submitBudgetMs + terminalBudgetMs + traceWindowMs + 15_000; + return Math.min(3600, Math.max(60, Math.ceil(totalMs / 1000))); +} + function assertKnownOptions(args: string[], valueOptions: Set, flagOptions: Set): void { for (let index = 0; index < args.length; index += 1) { const arg = args[index] ?? ""; @@ -4894,6 +4942,11 @@ function runNodeWebProbe(options: NodeWebProbeOptions): Record const result = runTransWorkspaceStdinScript(options.node, spec.workspace, script, options.commandTimeoutSeconds); const probe = compactWebProbeResult(parseJsonObject(result.stdout)); const passed = result.exitCode === 0 && probe?.status === "pass"; + const degradedReason = result.timedOut + ? "web-probe-command-timeout" + : typeof probe?.degradedReason === "string" + ? probe.degradedReason + : null; return { ok: passed, status: passed ? "pass" : "blocked", @@ -4903,6 +4956,13 @@ function runNodeWebProbe(options: NodeWebProbeOptions): Record workspace: spec.workspace, url: options.url, credential, + commandTimeout: { + seconds: options.commandTimeoutSeconds, + autoSeconds: options.commandTimeoutAutoSeconds, + userProvided: options.commandTimeoutUserProvided, + timedOut: result.timedOut, + }, + degradedReason, probe, result: compactCommandResult(result), valuesRedacted: true, @@ -9212,16 +9272,25 @@ function parseJsonObject(text: string): Record | null { function compactWebProbeResult(report: Record | null): Record | null { if (report === null) return null; const dom = record(report.dom); + const performance = record(report.performance); + const trace = record(report.trace); + const session = record(report.session); return { ok: report.ok === true, status: typeof report.status === "string" ? report.status : null, finalUrl: typeof report.finalUrl === "string" ? report.finalUrl : null, error: typeof report.error === "string" ? report.error : null, - actions: Array.isArray(report.actions) ? report.actions : [], + degradedReason: typeof report.degradedReason === "string" ? report.degradedReason : null, + actions: compactWebProbeActions(report.actions), + session, + trace, + performance, + traceSamples: Array.isArray(report.traceSamples) ? report.traceSamples : [], dom: { authState: typeof dom.authState === "string" ? dom.authState : null, requiredSelectors: record(dom.requiredSelectors), messageCount: typeof dom.messageCount === "number" ? dom.messageCount : null, + sessionRail: record(dom.sessionRail), }, failureDom: record(report.failureDom), artifacts: record(report.artifacts), @@ -9229,6 +9298,58 @@ function compactWebProbeResult(report: Record | null): Record[] { + if (!Array.isArray(value)) return []; + return value.map(record).map((action) => { + const name = typeof action.action === "string" ? action.action : "unknown"; + if (name === "fresh-session") { + const alignment = record(action.alignment); + const check = record(alignment.check); + const repair = record(alignment.repair); + return { + action: name, + settled: action.settled === true, + aligned: action.aligned === true, + reason: alignment.reason ?? check.reason ?? null, + conversationId: check.conversationId ?? null, + routeConversationId: check.routeConversationId ?? null, + selectedConversationId: check.selectedConversationId ?? null, + repaired: Object.keys(repair).length > 0, + repairConversationId: repair.conversationId ?? null, + attempts: Array.isArray(alignment.attempts) ? alignment.attempts.length : null, + }; + } + if (name === "trace-interval-samples") { + return { + action: name, + count: action.count ?? null, + intervalMs: action.intervalMs ?? null, + }; + } + if (name === "submit-prompt") { + return { + action: name, + chars: action.chars ?? null, + submittedAt: action.submittedAt ?? null, + }; + } + if (name === "wait-agent-terminal") { + return { + action: name, + terminal: action.terminal === true, + timeoutMs: action.timeoutMs ?? null, + }; + } + return { + action: name, + skipped: action.skipped ?? null, + ready: action.ready ?? null, + found: action.found ?? null, + timeoutMs: action.timeoutMs ?? null, + }; + }); +} + function compactCommand(command: string[]): string[] { const scriptIndex = command.indexOf("--"); if (scriptIndex >= 0 && scriptIndex + 1 < command.length) return [...command.slice(0, scriptIndex + 1), "