diff --git a/scripts/src/hwlab-node-web-observe-runner-source.ts b/scripts/src/hwlab-node-web-observe-runner-source.ts index f0c1abda..00f287a5 100644 --- a/scripts/src/hwlab-node-web-observe-runner-source.ts +++ b/scripts/src/hwlab-node-web-observe-runner-source.ts @@ -73,6 +73,7 @@ let pageLoadSeq = 0; let controlPageEpoch = 0; let observerPageEpoch = 0; let currentPageProvenance = null; +let heartbeatPulseTimer = null; const jsonlRotation = { stamp: compactFileTimestamp(startedAt), files: [] }; try { @@ -81,6 +82,7 @@ try { await rotateExistingJsonlArtifacts(); await writeManifest({ status: "starting" }); await writeHeartbeat({ status: "starting" }); + heartbeatPulseTimer = startHeartbeatPulse(); if (jsonlRotation.files.length > 0) await appendJsonl(files.control, eventRecord("jsonl-rotated", { stamp: jsonlRotation.stamp, archiveDir: path.relative(stateDir, dirs.archive), files: jsonlRotation.files, valuesRedacted: true })); const launcher = await import(pathToFileURL(path.resolve("scripts/src/browser-launcher.mjs")).href); const { chromium } = await launcher.importPlaywright(); @@ -126,6 +128,7 @@ try { await writeManifest({ status: "failed", error: errorSummary(error) }).catch(() => {}); process.exitCode = 2; } finally { + if (heartbeatPulseTimer) clearInterval(heartbeatPulseTimer); if (browser) await browser.close().catch(() => {}); } @@ -221,6 +224,16 @@ async function writeHeartbeat(extra = {}) { await writeFile(files.heartbeat, JSON.stringify(heartbeat, null, 2) + "\n", { mode: 0o600 }); } +function startHeartbeatPulse() { + const timer = setInterval(() => { + if (stopping) return; + void writeHeartbeat({ status: terminalStatus, heartbeatPulse: true }) + .catch((error) => appendJsonl(files.errors, eventRecord("heartbeat-pulse-error", { error: errorSummary(error) }))); + }, 5000); + if (timer && typeof timer.unref === "function") timer.unref(); + return timer; +} + function attachPassiveListeners(targetPage, pageRole = "control", targetPageId = pageId) { targetPage.on("request", (request) => { void appendJsonl(files.network, eventRecord("request", { @@ -3197,19 +3210,24 @@ async function preflightSummary() { async function samplePage(reason, options = {}) { if (options?.refreshObserver !== false) await maybeRefreshObserverPage(reason); const groupSeq = sampleSeq + 1; - if (page && !page.isClosed()) await sampleOnePage(page, { reason, groupSeq, pageRole: "control", targetPageId: pageId, pageEpoch: controlPageEpoch }); + if (page && !page.isClosed()) { + await sampleOnePage(page, { reason, groupSeq, pageRole: "control", targetPageId: pageId, pageEpoch: controlPageEpoch }) + .catch((error) => appendJsonl(files.errors, eventRecord("control-sample-error", { pageRole: "control", pageId, pageEpoch: controlPageEpoch, error: errorSummary(error) }))); + } if (observerPage && !observerPage.isClosed()) { await sampleOnePage(observerPage, { reason, groupSeq, pageRole: "observer", targetPageId: observerPageId, pageEpoch: observerPageEpoch }).catch((error) => appendJsonl(files.errors, eventRecord("observer-sample-error", { pageRole: "observer", pageId: observerPageId, pageEpoch: observerPageEpoch, error: errorSummary(error) }))); } if (options?.screenshot !== false && screenshotIntervalMs > 0 && Date.now() - lastScreenshotAtMs >= screenshotIntervalMs) { - await captureScreenshot("checkpoint", "jpeg").catch((error) => appendJsonl(files.errors, eventRecord("screenshot-error", { pageRole: "control", pageId, error: errorSummary(error) }))); + await withHardTimeout(captureScreenshot("checkpoint", "jpeg"), 15000, "captureScreenshot checkpoint exceeded 15s") + .catch((error) => appendJsonl(files.errors, eventRecord("screenshot-error", { pageRole: "control", pageId, error: errorSummary(error) }))); } await writeHeartbeat({ status: terminalStatus }); } async function sampleOnePage(targetPage, { reason, groupSeq, pageRole, targetPageId, pageEpoch }) { sampleSeq += 1; - const dom = await targetPage.evaluate((input) => { + const evaluateTimeoutMs = Math.max(3000, Math.min(8000, Number(sampleIntervalMs) || 5000)); + const dom = await withHardTimeout(targetPage.evaluate((input) => { const trim = (value, limit = 500) => String(value || "").replace(/\s+/g, " ").trim().slice(0, limit); const visible = (element) => { if (!element) return false; @@ -3788,7 +3806,7 @@ async function sampleOnePage(targetPage, { reason, groupSeq, pageRole, targetPag }, performance: performance.getEntriesByType("resource").slice(-80).map(resourceTimingSample), }; - }, { projectManagement }).catch((error) => ({ error: errorSummary(error), url: pageUrl(targetPage) })); + }, { projectManagement }), evaluateTimeoutMs, "sampleOnePage DOM evaluate exceeded " + evaluateTimeoutMs + "ms").catch((error) => ({ error: errorSummary(error), url: pageUrl(targetPage) })); const sample = { seq: sampleSeq, sampleGroupSeq: groupSeq,