From 437ccff0e418215753bf30f8040157cd9c9f6bea Mon Sep 17 00:00:00 2001 From: Codex Date: Sun, 28 Jun 2026 01:18:14 +0000 Subject: [PATCH] fix: recover unresponsive web probe control page --- .../hwlab-node-web-observe-runner-source.ts | 114 +++++++++++++++++- 1 file changed, 108 insertions(+), 6 deletions(-) diff --git a/scripts/src/hwlab-node-web-observe-runner-source.ts b/scripts/src/hwlab-node-web-observe-runner-source.ts index 00f287a5..6e2cabea 100644 --- a/scripts/src/hwlab-node-web-observe-runner-source.ts +++ b/scripts/src/hwlab-node-web-observe-runner-source.ts @@ -1101,11 +1101,12 @@ async function gotoTarget(rawTarget) { async function recreateControlPageForNavigation(reason, attempt) { const before = currentPageUrl(); - if (page && !page.isClosed()) await page.close().catch(() => {}); + if (page && !page.isClosed()) await withHardTimeout(page.close(), 3000, "control page close exceeded 3000ms").catch((error) => appendJsonl(files.errors, eventRecord("control-page-close-timeout", { reason, attempt, error: errorSummary(error), pageRole: "control", pageId, pageEpoch: controlPageEpoch }))); + controlPageEpoch += 1; page = await context.newPage(); attachPassiveListeners(page, "control", pageId); currentPageProvenance = null; - await appendJsonl(files.control, eventRecord("page-recreated", { reason, attempt, beforeUrl: before, afterUrl: currentPageUrl(), pageRole: "control", pageId, valuesRedacted: true })); + await appendJsonl(files.control, eventRecord("page-recreated", { reason, attempt, beforeUrl: before, afterUrl: currentPageUrl(), pageRole: "control", pageId, pageEpoch: controlPageEpoch, valuesRedacted: true })); } async function recreateAuthenticatedContextForNavigation(reason, attempt) { @@ -1619,9 +1620,101 @@ async function workbenchSessionSnapshot(targetPage = page) { }).catch(() => null); } +function controlPageRecoveryTarget(snapshot, beforeUrl) { + const sessionId = snapshot?.routeSessionId || snapshot?.activeSessionId || routeSessionIdFromUrl(beforeUrl); + if (sessionId) return { sessionId, targetPath: "/workbench/sessions/" + encodeURIComponent(sessionId), valuesRedacted: true }; + const path = safeUrlPath(beforeUrl); + if (isWorkbenchPathname(path || "")) return { sessionId: null, targetPath: path, valuesRedacted: true }; + return { sessionId: null, targetPath, valuesRedacted: true }; +} + +async function controlPageLivenessSnapshot(reason, timeoutMs = 1500) { + const started = Date.now(); + return withHardTimeout(workbenchSessionSnapshot(page), timeoutMs, "control page liveness snapshot exceeded " + timeoutMs + "ms") + .then((snapshot) => ({ + ok: snapshot !== null, + reason, + durationMs: Date.now() - started, + snapshot, + pageRole: "control", + pageId, + pageEpoch: controlPageEpoch, + valuesRedacted: true + })) + .catch((error) => ({ + ok: false, + reason, + durationMs: Date.now() - started, + error: errorSummary(error), + pageRole: "control", + pageId, + pageEpoch: controlPageEpoch, + valuesRedacted: true + })); +} + +async function ensureControlPageResponsiveForCommand(reason) { + const beforeUrl = currentPageUrl(); + const liveness = await controlPageLivenessSnapshot(reason + "-preflight", 1500); + if (liveness.ok) return { ok: true, recovered: false, reason, beforeUrl, afterUrl: currentPageUrl(), liveness, pageRole: "control", pageId, pageEpoch: controlPageEpoch, valuesRedacted: true }; + const target = controlPageRecoveryTarget(liveness.snapshot, beforeUrl); + await appendJsonl(files.control, eventRecord("control-page-unresponsive-before-command", { + reason, + beforeUrl, + target, + liveness, + pageRole: "control", + pageId, + pageEpoch: controlPageEpoch, + valuesRedacted: true + })); + await recreateControlPageForNavigation(reason + "-control-page-unresponsive", 1); + let navigation = null; + let hydration = null; + let afterLiveness = null; + try { + navigation = await gotoTarget(target.targetPath); + } catch (error) { + navigation = { ok: false, targetPath: target.targetPath, error: errorSummary(error), valuesRedacted: true }; + } + if (!navigation?.error && target.sessionId) { + hydration = await withHardTimeout( + waitForWorkbenchSessionHydrated(page, target.sessionId, { timeoutMs: 10000 }), + 12000, + "control page recovery hydration exceeded 12000ms" + ).catch((error) => ({ ok: false, error: errorSummary(error), valuesRedacted: true })); + } + afterLiveness = await controlPageLivenessSnapshot(reason + "-post-recovery", 3000); + const ok = !navigation?.error && afterLiveness.ok === true && (!target.sessionId || hydration?.ok === true); + const recovery = { + ok, + recovered: ok, + reason, + beforeUrl, + afterUrl: currentPageUrl(), + target, + liveness, + navigation, + hydration, + afterLiveness, + pageRole: "control", + pageId, + pageEpoch: controlPageEpoch, + valuesRedacted: true + }; + await appendJsonl(files.control, eventRecord(ok ? "control-page-recovered-before-command" : "control-page-recovery-failed-before-command", recovery)); + if (!ok) { + const error = new Error("control page recovery failed before " + reason); + error.details = recovery; + throw error; + } + return recovery; +} + async function sendPrompt(text, options = {}) { if (text.trim().length === 0) throw new Error("sendPrompt requires non-empty text"); const responsePath = options.responsePath || "/v1/agent/chat"; + const controlRecovery = await ensureControlPageResponsiveForCommand("sendPrompt"); const beforeUrl = currentPageUrl(); const beforeEvidence = await promptSideEffectSnapshot(); const primaryEditor = page.locator("#command-input").last(); @@ -1682,7 +1775,9 @@ async function sendPrompt(text, options = {}) { actualAction: composer.action, valuesRedacted: true }, + controlRecovery, pageId, + pageEpoch: controlPageEpoch, valuesRedacted: true }; if (options.throwOnActionMismatch === true) { @@ -1715,7 +1810,9 @@ async function sendPrompt(text, options = {}) { textHash: sha256Text(text), textBytes: Buffer.byteLength(text), chatSubmit: { status: null, statusText: null, urlPath: responsePath, waitError: chatResponse.waitError, sideEffectObserved: true, sideEffect }, - pageId + controlRecovery, + pageId, + pageEpoch: controlPageEpoch }; } const error = new Error("sendPrompt did not observe POST " + responsePath + " response or an authoritative new turn after submit: " + (chatResponse.waitError.message || chatResponse.waitError.name || "timeout")); @@ -1725,7 +1822,9 @@ async function sendPrompt(text, options = {}) { textHash: sha256Text(text), textBytes: Buffer.byteLength(text), chatSubmit: { status: null, statusText: null, urlPath: responsePath, waitError: chatResponse.waitError, sideEffectObserved: false, sideEffect }, + controlRecovery, pageId, + pageEpoch: controlPageEpoch, valuesRedacted: true }; throw error; @@ -1770,7 +1869,9 @@ async function sendPrompt(text, options = {}) { responseParseError: chatPayloadError, valuesRedacted: true }, - pageId + controlRecovery, + pageId, + pageEpoch: controlPageEpoch }; } @@ -1854,7 +1955,7 @@ async function waitForPromptSideEffect(beforeEvidence, timeoutMs) { } async function promptSideEffectSnapshot() { - return page.evaluate(() => { + return withHardTimeout(page.evaluate(() => { const text = document.body?.innerText || ""; const visible = (element) => { const rect = element.getBoundingClientRect(); @@ -1870,7 +1971,8 @@ async function promptSideEffectSnapshot() { textBytes: new TextEncoder().encode(text).length, valuesRedacted: true }; - }).catch(() => ({ runIds: [], traceIds: [], running: false, executionError: false, messageCount: 0, textBytes: 0, valuesRedacted: true })); + }), 3000, "prompt side-effect snapshot exceeded 3000ms") + .catch(() => ({ runIds: [], traceIds: [], running: false, executionError: false, messageCount: 0, textBytes: 0, valuesRedacted: true })); } ${nodeWebObserveRunnerCommandActionsSource()}