diff --git a/scripts/src/hwlab-node-impl.ts b/scripts/src/hwlab-node-impl.ts index 15ac603b..458a8773 100644 --- a/scripts/src/hwlab-node-impl.ts +++ b/scripts/src/hwlab-node-impl.ts @@ -7222,6 +7222,11 @@ function renderWebObserveStatusTable(value: Record): string { }).slice(-5) : []; const next = record(value.next); + const heartbeatError = record(heartbeat?.error) ?? record(manifest?.error); + const heartbeatAuth = record(heartbeatError?.auth); + const manifestNetwork = record(manifest?.network); + const manifestBrowser = record(manifestNetwork?.browser); + const manifestProxy = record(manifestNetwork?.proxy); const lines = [ `hwlab nodes web-probe observe status (${webObserveText(value.status)})`, "", @@ -7247,6 +7252,17 @@ function renderWebObserveStatusTable(value: Record): string { ]]), "", ] : []), + ...(heartbeatError !== null ? [ + "Observer error:", + webObserveTable(["STATUS", "RETRY", "EXHAUSTED", "BROWSER_PROXY", "MESSAGE"], [[ + webObserveText(heartbeat?.status ?? manifest?.status), + webObserveText(heartbeatAuth?.lastRetryLabel), + webObserveText(heartbeatAuth?.retryExhausted), + webObserveShort(webObserveText(manifestBrowser?.proxyMode ?? manifestProxy?.enabled), 32), + webObserveShort(webObserveText(heartbeatError.message ?? heartbeatAuth?.lastError), 120), + ]]), + "", + ] : []), "Recent samples:", webObserveTable(["SEQ", "TS", "PATH", "ROUTE_SESSION", "ACTIVE_SESSION", "MSG", "TRACE"], samples.length > 0 ? samples.map((sample) => [ webObserveText(sample.seq), @@ -7259,13 +7275,15 @@ function renderWebObserveStatusTable(value: Record): string { ]) : [["-", "-", "-", "-", "-", "-", "-"]]), "", "Recent control:", - webObserveTable(["SEQ", "TS", "PHASE", "TYPE", "COMMAND"], controls.length > 0 ? controls.map((control) => [ + webObserveTable(["SEQ", "TS", "PHASE", "TYPE", "COMMAND", "RETRY", "DETAIL"], controls.length > 0 ? controls.map((control) => [ webObserveText(control.seq), webObserveShort(webObserveText(control.ts), 24), webObserveText(control.phase), webObserveText(control.type), webObserveShort(webObserveText(control.commandId), 28), - ]) : [["-", "-", "-", "-", "-"]]), + webObserveText(control.retry), + webObserveShort(webObserveText(control.detail), 80), + ]) : [["-", "-", "-", "-", "-", "-", "-"]]), "", "Network alerts:", webObserveTable(["TS", "METHOD", "STATUS", "TYPE", "URL_OR_FAILURE"], network.length > 0 ? network.map((item) => [ @@ -7479,6 +7497,7 @@ function runNodeWebProbeObserveAnalyze(options: NodeWebProbeObserveOptions, spec "const slimDomSample = (item) => { const v = objectOrNull(item) || {}; return { seq: v.seq ?? null, ts: v.ts ?? null, source: clip(v.source, 32), diagnosticCode: clip(v.diagnosticCode, 48), traceId: clip(v.traceId, 64), httpStatus: v.httpStatus ?? null, idleSeconds: v.idleSeconds ?? null, waitingFor: clip(v.waitingFor, 48), lastEventLabel: clip(v.lastEventLabel, 80), text: clip(v.text ?? v.preview, 180) }; };", "const slimConsoleGroup = (item) => { const v = objectOrNull(item) || {}; return { count: v.count ?? null, type: clip(v.type, 24), status: v.status ?? null, path: clip(v.path, 96), lastAt: v.lastAt ?? v.firstAt ?? null, firstAt: v.firstAt ?? null, traceIds: Array.isArray(v.traceIds) ? v.traceIds.slice(0, 3).map((x) => clip(x, 64)) : [] }; };", "const slimConsoleSample = (item) => { const v = objectOrNull(item) || {}; return { ts: v.ts ?? null, type: clip(v.type, 24), status: v.status ?? null, path: clip(v.path, 96), traceId: clip(v.traceId, 64), text: clip(v.text, 180) }; };", + "const slimRunnerError = (item) => { const v = objectOrNull(item) || {}; return { ts: v.ts ?? null, type: clip(v.type, 32), commandId: clip(v.commandId, 80), sampleSeq: v.sampleSeq ?? null, message: clip(v.message, 240), retry: clip(v.retry, 24), retryExhausted: v.retryExhausted === true, lastError: clip(v.lastError, 180) }; };", "const slimJump = (item) => { const v = objectOrNull(item) || {}; return { columnLabel: v.columnLabel ?? null, promptIndex: v.promptIndex ?? null, fromSeq: v.fromSeq ?? null, toSeq: v.toSeq ?? null, fromValue: v.fromValue ?? null, toValue: v.toValue ?? null, delta: v.delta ?? null, sampleDeltaSeconds: v.sampleDeltaSeconds ?? null, allowedIncreaseSeconds: v.allowedIncreaseSeconds ?? null, traceId: v.traceId ?? null }; };", "const srcMetrics = objectOrNull(source?.sampleMetrics);", "const metrics = srcMetrics ? {", @@ -7517,6 +7536,7 @@ function runNodeWebProbeObserveAnalyze(options: NodeWebProbeObserveOptions, spec " domDiagnosticSamples: takeHead(source.domDiagnosticSamples, 5).map(slimDomSample),", " consoleAlertGroups: takeHead(source.consoleAlertGroups, 5).map(slimConsoleGroup),", " consoleAlertSamples: takeHead(source.consoleAlertSamples, 5).map(slimConsoleSample),", + " runnerErrors: takeTail(source.runnerErrors, 8).map(slimRunnerError),", " turnTimingRecentUpdateJumps: takeHead(source.turnTimingRecentUpdateJumps, 5).map(slimJump),", " reportJsonPath: source.reportJsonPath || reportJsonPath,", " reportJsonSha256: source.reportJsonSha256 || sha256(reportJsonPath),", @@ -7594,6 +7614,7 @@ function renderWebObserveAnalyzeTable(value: Record): string { const domDiagnosticSamples = Array.isArray(analysis?.domDiagnosticSamples) ? analysis.domDiagnosticSamples.map(record).filter((item): item is Record => item !== null).slice(0, 8) : []; const consoleAlertGroups = Array.isArray(analysis?.consoleAlertGroups) ? analysis.consoleAlertGroups.map(record).filter((item): item is Record => item !== null).slice(0, 8) : []; const consoleAlertSamples = Array.isArray(analysis?.consoleAlertSamples) ? analysis.consoleAlertSamples.map(record).filter((item): item is Record => item !== null).slice(0, 8) : []; + const runnerErrors = Array.isArray(analysis?.runnerErrors) ? analysis.runnerErrors.map(record).filter((item): item is Record => item !== null).slice(-8) : []; const recentUpdateJumps = Array.isArray(analysis?.turnTimingRecentUpdateJumps) ? analysis.turnTimingRecentUpdateJumps.map(record).filter((item): item is Record => item !== null).slice(0, 8) : []; const domDiagnosticRows = domDiagnostics.length > 0 ? domDiagnostics.map((item) => [ @@ -7754,6 +7775,15 @@ function renderWebObserveAnalyzeTable(value: Record): string { webObserveText(promptNetwork?.promptSegments), ]]), "", + "Runner errors:", + webObserveTable(["TS", "TYPE", "RETRY", "EXHAUSTED", "MESSAGE"], runnerErrors.length > 0 ? runnerErrors.map((item) => [ + webObserveShort(webObserveText(item.ts), 24), + webObserveShort(webObserveText(item.type), 18), + webObserveText(item.retry), + webObserveText(item.retryExhausted), + webObserveShort(webObserveText(item.message || item.lastError), 96), + ]) : [["-", "-", "-", "-", "-"]]), + "", "HTTP error groups:", webObserveTable(["COUNT", "METHOD", "STATUS", "PATH", "LAST", "FAILURE"], httpErrorRows.length > 0 ? httpErrorRows : [["-", "-", "-", "-", "-", "-"]]), "", @@ -8406,9 +8436,11 @@ const tailN=Math.min(${tailLines},3); const readJson=(name)=>{try{return JSON.parse(fs.readFileSync(path.join(dir,name),'utf8'))}catch{return null}}; const tailJsonl=(name)=>{try{const file=path.join(dir,name); const st=fs.statSync(file); const maxBytes=Math.min(st.size,8*1024*1024); const fd=fs.openSync(file,'r'); try{const buf=Buffer.alloc(maxBytes); fs.readSync(fd,buf,0,maxBytes,st.size-maxBytes); const lines=buf.toString('utf8').split(/\\r?\\n/).filter(Boolean); if(st.size>maxBytes&&lines.length>0) lines.shift(); return lines.slice(-tailN).map(line=>{try{return JSON.parse(line)}catch{return {parseError:true, rawTail:line.slice(-500)}}});}finally{fs.closeSync(fd)}}catch{return []}}; const short=(value)=>String(value||'').slice(0,160); -const compactManifest=(item)=>item?{jobId:item.jobId,status:item.status,specRef:item.specRef,baseUrl:item.baseUrl,targetPath:item.targetPath,pageAuthority:item.pageAuthority,sampling:item.sampling,safety:item.safety,startedAt:item.startedAt,completedAt:item.completedAt}:null; -const compactHeartbeat=(item)=>item?{ok:item.ok,jobId:item.jobId,pid:item.pid,stateDir:item.stateDir,status:item.status,pageId:item.pageId,baseUrl:item.baseUrl,currentUrl:item.currentUrl,sampleSeq:item.sampleSeq,commandSeq:item.commandSeq,activeCommandId:item.activeCommandId,updatedAt:item.updatedAt,uptimeMs:item.uptimeMs}:null; -const compactControl=(item)=>({ts:item.ts,seq:item.seq,phase:item.phase,type:item.type,commandId:item.commandId}); +const compactManifest=(item)=>item?{jobId:item.jobId,status:item.status,specRef:item.specRef,baseUrl:item.baseUrl,targetPath:item.targetPath,network:item.network,pageAuthority:item.pageAuthority,sampling:item.sampling,safety:item.safety,startedAt:item.startedAt,completedAt:item.completedAt,error:item.error?{message:short(item.error.message),auth:item.error.auth?{lastRetryLabel:item.error.auth.lastRetryLabel||null,retryExhausted:item.error.auth.retryExhausted===true,lastError:short(item.error.auth.lastError||'')}:null}:null}:null; +const compactHeartbeat=(item)=>item?{ok:item.ok,jobId:item.jobId,pid:item.pid,stateDir:item.stateDir,status:item.status,pageId:item.pageId,baseUrl:item.baseUrl,currentUrl:item.currentUrl,sampleSeq:item.sampleSeq,commandSeq:item.commandSeq,activeCommandId:item.activeCommandId,updatedAt:item.updatedAt,uptimeMs:item.uptimeMs,error:item.error?{message:short(item.error.message),auth:item.error.auth?{lastRetryLabel:item.error.auth.lastRetryLabel||null,retryExhausted:item.error.auth.retryExhausted===true,lastError:short(item.error.auth.lastError||'')}:null}:null}:null; +const retryLabel=(detail)=>detail&&detail.auth?detail.auth.lastRetryLabel||'':detail&&detail.result?detail.result.lastRetryLabel||'':detail&&detail.error&&detail.error.auth?detail.error.auth.lastRetryLabel||'':''; +const detailText=(detail)=>detail&&detail.error?short((detail.error.message||'')+(detail.error.auth&&detail.error.auth.lastError?' '+detail.error.auth.lastError:'')):detail&&detail.result?short([detail.result.statusText,detail.result.retryExhausted?'retry-exhausted':''].filter(Boolean).join(' ')):''; +const compactControl=(item)=>({ts:item.ts,seq:item.seq,phase:item.phase,type:item.type,commandId:item.commandId,durationMs:item.detail&&item.detail.durationMs||null,retry:retryLabel(item.detail),detail:detailText(item.detail)}); const compactSample=(item)=>({seq:item.seq,ts:item.ts,path:item.path,routeSessionId:item.routeSessionId||null,activeSessionId:item.activeSessionId||null,messageCount:Array.isArray(item.messages)?item.messages.length:0,traceRowCount:Array.isArray(item.traceRows)?item.traceRows.length:0}); const compactNetwork=(item)=>({ts:item.ts,type:item.type,method:item.method,url:short(item.url),status:item.status||null,failure:item.failure?short(item.failure):null}); const pidText=fs.existsSync(path.join(dir,'pid'))?fs.readFileSync(path.join(dir,'pid'),'utf8').trim():null; diff --git a/scripts/src/hwlab-node-web-observe-runner-source.ts b/scripts/src/hwlab-node-web-observe-runner-source.ts index 678eb98e..3c203b97 100644 --- a/scripts/src/hwlab-node-web-observe-runner-source.ts +++ b/scripts/src/hwlab-node-web-observe-runner-source.ts @@ -262,28 +262,141 @@ async function runControlCommand(command, fn) { async function authenticate(browserContext) { const loginUrl = new URL("/auth/login", baseUrl).toString(); - const response = await browserContext.request.post(loginUrl, { - data: { username, password }, - headers: { accept: "application/json", "content-type": "application/json" }, - timeout: 15000, - }); - const cookies = await browserContext.cookies(baseUrl); - const cookieNames = cookies.map((cookie) => cookie.name).filter((name) => /session|auth|token/iu.test(name)); - return { - ok: response.ok() && cookieNames.length > 0, + const attempts = []; + const maxAttempts = 5; + const initialDelayMs = 250; + const maxDelayMs = 5000; + for (let attempt = 1; attempt <= maxAttempts; attempt += 1) { + const retryDelayMs = attempt < maxAttempts ? Math.min(maxDelayMs, initialDelayMs * (2 ** (attempt - 1))) : 0; + try { + const response = await browserContext.request.post(loginUrl, { + data: { username, password }, + headers: { accept: "application/json", "content-type": "application/json" }, + timeout: 12000, + }); + const cookieState = await readAuthCookieState(browserContext); + const retryable = isRetryableAuthStatus(response.status()); + const item = { + attempt, + retryAttempt: attempt, + retryMaxAttempts: maxAttempts, + retryLabel: attempt + "/" + maxAttempts, + retryDelayMs: retryable && attempt < maxAttempts ? retryDelayMs : 0, + method: "api", + status: response.status(), + statusText: response.statusText(), + retryable, + cookiePresent: cookieState.cookiePresent, + cookieNames: cookieState.cookieNames, + valuesRedacted: true, + }; + attempts.push(item); + if (response.ok() && cookieState.cookiePresent) { + return { + ok: true, + method: "api", + loginPath: new URL(loginUrl).pathname, + status: response.status(), + statusText: response.statusText(), + cookiePresent: true, + cookieNames: cookieState.cookieNames, + attempts, + retryCount: attempt - 1, + retryMaxAttempts: maxAttempts, + lastRetryLabel: attempt + "/" + maxAttempts, + retryExhausted: false, + retryable: false, + valuesRedacted: true, + }; + } + if (!retryable) break; + } catch (error) { + const retryable = isRetryableAuthError(error); + attempts.push({ + attempt, + retryAttempt: attempt, + retryMaxAttempts: maxAttempts, + retryLabel: attempt + "/" + maxAttempts, + retryDelayMs: retryable && attempt < maxAttempts ? retryDelayMs : 0, + method: "api", + status: 0, + statusText: "request-error", + retryable, + error: error && error.message ? truncate(error.message, 500) : truncate(String(error), 500), + cookiePresent: false, + cookieNames: [], + valuesRedacted: true, + }); + if (!retryable) break; + } + if (attempt < maxAttempts && attempts[attempts.length - 1]?.retryable === true) await sleep(retryDelayMs); + } + const cookieState = await readAuthCookieState(browserContext); + const last = attempts[attempts.length - 1] || null; + const retryable = attempts.some((attempt) => attempt && attempt.retryable === true); + const failure = { + ok: false, method: "api", loginPath: new URL(loginUrl).pathname, - status: response.status(), - statusText: response.statusText(), - cookiePresent: cookieNames.length > 0, - cookieNames, + status: typeof last?.status === "number" ? last.status : 0, + statusText: typeof last?.statusText === "string" ? last.statusText : "api-login-failed", + cookiePresent: cookieState.cookiePresent, + cookieNames: cookieState.cookieNames, + attempts, + retryCount: Math.max(0, attempts.length - 1), + retryMaxAttempts: maxAttempts, + lastRetryLabel: last?.retryLabel || null, + retryExhausted: retryable && attempts.length >= maxAttempts, + retryable, + lastError: last?.error || null, valuesRedacted: true, }; + const error = new Error(authFailureMessage(failure)); + error.webProbeAuth = failure; + throw error; } function publicAuth(value) { if (!value) return null; - return { ok: value.ok === true, method: value.method, status: value.status, cookiePresent: value.cookiePresent === true, cookieNames: value.cookieNames || [], valuesRedacted: true }; + return { + ok: value.ok === true, + method: value.method, + status: value.status, + cookiePresent: value.cookiePresent === true, + cookieNames: value.cookieNames || [], + retryCount: value.retryCount ?? null, + retryMaxAttempts: value.retryMaxAttempts ?? null, + lastRetryLabel: value.lastRetryLabel ?? null, + retryExhausted: value.retryExhausted === true, + valuesRedacted: true + }; +} + +async function readAuthCookieState(browserContext) { + const cookies = await browserContext.cookies(baseUrl); + const cookieNames = cookies.map((cookie) => cookie.name).sort(); + return { + cookiePresent: cookieNames.includes("hwlab_session") || cookieNames.some((name) => /session|auth|token/iu.test(name)), + cookieNames: cookieNames.filter((name) => /session|auth|token/iu.test(name)), + }; +} + +function isRetryableAuthStatus(status) { + return status === 0 || status === 408 || status === 409 || status === 425 || status === 429 || status >= 500; +} + +function isRetryableAuthError(error) { + const message = error && error.message ? String(error.message) : String(error || ""); + return /EAI_AGAIN|ETIMEDOUT|ECONNRESET|ECONNREFUSED|ECONNABORTED|socket hang up|ERR_NETWORK_CHANGED|fetch failed|network|timeout/iu.test(message); +} + +function authFailureMessage(failure) { + const last = Array.isArray(failure.attempts) && failure.attempts.length > 0 ? failure.attempts[failure.attempts.length - 1] : null; + const retry = failure.lastRetryLabel ? " retry=" + failure.lastRetryLabel : ""; + const exhausted = failure.retryExhausted ? " exhausted=true" : ""; + const status = last ? " status=" + (last.status ?? "-") + " " + (last.statusText ?? "") : ""; + const error = last?.error ? " error=" + truncate(last.error, 160) : ""; + return ("auth login failed:" + retry + exhausted + status + error).trim(); } function proxyConfigFromEnv(targetBaseUrl) { @@ -1095,7 +1208,9 @@ function sanitize(value) { } function errorSummary(error) { - return { name: error && error.name ? error.name : "Error", message: error && error.message ? truncate(error.message, 1000) : truncate(String(error), 1000), stackTail: error && error.stack ? truncate(error.stack, 2000) : null }; + const summary = { name: error && error.name ? error.name : "Error", message: error && error.message ? truncate(error.message, 1000) : truncate(String(error), 1000), stackTail: error && error.stack ? truncate(error.stack, 2000) : null }; + if (error && error.webProbeAuth) summary.auth = sanitize(error.webProbeAuth); + return summary; } function sleep(ms) { @@ -1133,6 +1248,16 @@ const pageProvenance = buildPageProvenanceReport(samples, control, manifest); const pagePerformance = buildPagePerformanceReport(samples, manifest); const promptNetwork = buildPromptNetworkReport(control, network); const runtimeAlerts = buildRuntimeAlerts(samples, control, network, consoleEvents, errors); +const runnerErrors = errors.slice(-8).map((item) => ({ + ts: item.ts ?? null, + type: item.type ?? null, + commandId: item.commandId ?? null, + sampleSeq: item.sampleSeq ?? null, + message: limitText(item.error?.message ?? item.message ?? "", 240), + retry: item.error?.auth?.lastRetryLabel ?? null, + retryExhausted: item.error?.auth?.retryExhausted === true, + lastError: limitText(item.error?.auth?.lastError ?? "", 160), +})); const findings = buildFindings(samples, control, network, errors, sampleMetrics, promptNetwork, runtimeAlerts, pagePerformance, pageProvenance); if (jsonlReadIssues.length > 0) findings.unshift({ id: "jsonl-read-issues", severity: "red", summary: "observer analyzer hit JSONL read/parse issues", count: jsonlReadIssues.length, issues: jsonlReadIssues.slice(0, 20) }); const recentWindow = buildRecentAnalysisWindow({ samples, control, network, consoleEvents, errors, manifest }); @@ -1152,6 +1277,7 @@ const report = { pagePerformance, promptNetwork, runtimeAlerts, + runnerErrors, findings, windows: { recent: recentWindow }, readIssues: jsonlReadIssues, @@ -1189,6 +1315,7 @@ console.log(JSON.stringify({ pagePerformance: recentWindow.pagePerformance.summary, promptNetwork: recentWindow.promptNetwork.summary, runtimeAlerts: recentWindow.runtimeAlerts.summary, + runnerErrors, httpErrorGroups: recentWindow.runtimeAlerts.networkHttpErrorsByPath.slice(0, 8).map((item) => ({ method: item.method ?? null, status: item.status ?? null,