fix(web-probe): retry observe auth diagnostics (#642)

Co-authored-by: Codex <codex@noreply.local>
This commit is contained in:
Lyon
2026-06-22 09:12:54 +08:00
committed by GitHub
parent 8434d62d07
commit 9abff705ef
2 changed files with 179 additions and 20 deletions
+37 -5
View File
@@ -7222,6 +7222,11 @@ function renderWebObserveStatusTable(value: Record<string, unknown>): string {
}).slice(-5)
: [];
const next = record(value.next);
const heartbeatError = record(heartbeat?.error) ?? record(manifest?.error);
const heartbeatAuth = record(heartbeatError?.auth);
const manifestNetwork = record(manifest?.network);
const manifestBrowser = record(manifestNetwork?.browser);
const manifestProxy = record(manifestNetwork?.proxy);
const lines = [
`hwlab nodes web-probe observe status (${webObserveText(value.status)})`,
"",
@@ -7247,6 +7252,17 @@ function renderWebObserveStatusTable(value: Record<string, unknown>): string {
]]),
"",
] : []),
...(heartbeatError !== null ? [
"Observer error:",
webObserveTable(["STATUS", "RETRY", "EXHAUSTED", "BROWSER_PROXY", "MESSAGE"], [[
webObserveText(heartbeat?.status ?? manifest?.status),
webObserveText(heartbeatAuth?.lastRetryLabel),
webObserveText(heartbeatAuth?.retryExhausted),
webObserveShort(webObserveText(manifestBrowser?.proxyMode ?? manifestProxy?.enabled), 32),
webObserveShort(webObserveText(heartbeatError.message ?? heartbeatAuth?.lastError), 120),
]]),
"",
] : []),
"Recent samples:",
webObserveTable(["SEQ", "TS", "PATH", "ROUTE_SESSION", "ACTIVE_SESSION", "MSG", "TRACE"], samples.length > 0 ? samples.map((sample) => [
webObserveText(sample.seq),
@@ -7259,13 +7275,15 @@ function renderWebObserveStatusTable(value: Record<string, unknown>): string {
]) : [["-", "-", "-", "-", "-", "-", "-"]]),
"",
"Recent control:",
webObserveTable(["SEQ", "TS", "PHASE", "TYPE", "COMMAND"], controls.length > 0 ? controls.map((control) => [
webObserveTable(["SEQ", "TS", "PHASE", "TYPE", "COMMAND", "RETRY", "DETAIL"], controls.length > 0 ? controls.map((control) => [
webObserveText(control.seq),
webObserveShort(webObserveText(control.ts), 24),
webObserveText(control.phase),
webObserveText(control.type),
webObserveShort(webObserveText(control.commandId), 28),
]) : [["-", "-", "-", "-", "-"]]),
webObserveText(control.retry),
webObserveShort(webObserveText(control.detail), 80),
]) : [["-", "-", "-", "-", "-", "-", "-"]]),
"",
"Network alerts:",
webObserveTable(["TS", "METHOD", "STATUS", "TYPE", "URL_OR_FAILURE"], network.length > 0 ? network.map((item) => [
@@ -7479,6 +7497,7 @@ function runNodeWebProbeObserveAnalyze(options: NodeWebProbeObserveOptions, spec
"const slimDomSample = (item) => { const v = objectOrNull(item) || {}; return { seq: v.seq ?? null, ts: v.ts ?? null, source: clip(v.source, 32), diagnosticCode: clip(v.diagnosticCode, 48), traceId: clip(v.traceId, 64), httpStatus: v.httpStatus ?? null, idleSeconds: v.idleSeconds ?? null, waitingFor: clip(v.waitingFor, 48), lastEventLabel: clip(v.lastEventLabel, 80), text: clip(v.text ?? v.preview, 180) }; };",
"const slimConsoleGroup = (item) => { const v = objectOrNull(item) || {}; return { count: v.count ?? null, type: clip(v.type, 24), status: v.status ?? null, path: clip(v.path, 96), lastAt: v.lastAt ?? v.firstAt ?? null, firstAt: v.firstAt ?? null, traceIds: Array.isArray(v.traceIds) ? v.traceIds.slice(0, 3).map((x) => clip(x, 64)) : [] }; };",
"const slimConsoleSample = (item) => { const v = objectOrNull(item) || {}; return { ts: v.ts ?? null, type: clip(v.type, 24), status: v.status ?? null, path: clip(v.path, 96), traceId: clip(v.traceId, 64), text: clip(v.text, 180) }; };",
"const slimRunnerError = (item) => { const v = objectOrNull(item) || {}; return { ts: v.ts ?? null, type: clip(v.type, 32), commandId: clip(v.commandId, 80), sampleSeq: v.sampleSeq ?? null, message: clip(v.message, 240), retry: clip(v.retry, 24), retryExhausted: v.retryExhausted === true, lastError: clip(v.lastError, 180) }; };",
"const slimJump = (item) => { const v = objectOrNull(item) || {}; return { columnLabel: v.columnLabel ?? null, promptIndex: v.promptIndex ?? null, fromSeq: v.fromSeq ?? null, toSeq: v.toSeq ?? null, fromValue: v.fromValue ?? null, toValue: v.toValue ?? null, delta: v.delta ?? null, sampleDeltaSeconds: v.sampleDeltaSeconds ?? null, allowedIncreaseSeconds: v.allowedIncreaseSeconds ?? null, traceId: v.traceId ?? null }; };",
"const srcMetrics = objectOrNull(source?.sampleMetrics);",
"const metrics = srcMetrics ? {",
@@ -7517,6 +7536,7 @@ function runNodeWebProbeObserveAnalyze(options: NodeWebProbeObserveOptions, spec
" domDiagnosticSamples: takeHead(source.domDiagnosticSamples, 5).map(slimDomSample),",
" consoleAlertGroups: takeHead(source.consoleAlertGroups, 5).map(slimConsoleGroup),",
" consoleAlertSamples: takeHead(source.consoleAlertSamples, 5).map(slimConsoleSample),",
" runnerErrors: takeTail(source.runnerErrors, 8).map(slimRunnerError),",
" turnTimingRecentUpdateJumps: takeHead(source.turnTimingRecentUpdateJumps, 5).map(slimJump),",
" reportJsonPath: source.reportJsonPath || reportJsonPath,",
" reportJsonSha256: source.reportJsonSha256 || sha256(reportJsonPath),",
@@ -7594,6 +7614,7 @@ function renderWebObserveAnalyzeTable(value: Record<string, unknown>): string {
const domDiagnosticSamples = Array.isArray(analysis?.domDiagnosticSamples) ? analysis.domDiagnosticSamples.map(record).filter((item): item is Record<string, unknown> => item !== null).slice(0, 8) : [];
const consoleAlertGroups = Array.isArray(analysis?.consoleAlertGroups) ? analysis.consoleAlertGroups.map(record).filter((item): item is Record<string, unknown> => item !== null).slice(0, 8) : [];
const consoleAlertSamples = Array.isArray(analysis?.consoleAlertSamples) ? analysis.consoleAlertSamples.map(record).filter((item): item is Record<string, unknown> => item !== null).slice(0, 8) : [];
const runnerErrors = Array.isArray(analysis?.runnerErrors) ? analysis.runnerErrors.map(record).filter((item): item is Record<string, unknown> => item !== null).slice(-8) : [];
const recentUpdateJumps = Array.isArray(analysis?.turnTimingRecentUpdateJumps) ? analysis.turnTimingRecentUpdateJumps.map(record).filter((item): item is Record<string, unknown> => item !== null).slice(0, 8) : [];
const domDiagnosticRows = domDiagnostics.length > 0
? domDiagnostics.map((item) => [
@@ -7754,6 +7775,15 @@ function renderWebObserveAnalyzeTable(value: Record<string, unknown>): string {
webObserveText(promptNetwork?.promptSegments),
]]),
"",
"Runner errors:",
webObserveTable(["TS", "TYPE", "RETRY", "EXHAUSTED", "MESSAGE"], runnerErrors.length > 0 ? runnerErrors.map((item) => [
webObserveShort(webObserveText(item.ts), 24),
webObserveShort(webObserveText(item.type), 18),
webObserveText(item.retry),
webObserveText(item.retryExhausted),
webObserveShort(webObserveText(item.message || item.lastError), 96),
]) : [["-", "-", "-", "-", "-"]]),
"",
"HTTP error groups:",
webObserveTable(["COUNT", "METHOD", "STATUS", "PATH", "LAST", "FAILURE"], httpErrorRows.length > 0 ? httpErrorRows : [["-", "-", "-", "-", "-", "-"]]),
"",
@@ -8406,9 +8436,11 @@ const tailN=Math.min(${tailLines},3);
const readJson=(name)=>{try{return JSON.parse(fs.readFileSync(path.join(dir,name),'utf8'))}catch{return null}};
const tailJsonl=(name)=>{try{const file=path.join(dir,name); const st=fs.statSync(file); const maxBytes=Math.min(st.size,8*1024*1024); const fd=fs.openSync(file,'r'); try{const buf=Buffer.alloc(maxBytes); fs.readSync(fd,buf,0,maxBytes,st.size-maxBytes); const lines=buf.toString('utf8').split(/\\r?\\n/).filter(Boolean); if(st.size>maxBytes&&lines.length>0) lines.shift(); return lines.slice(-tailN).map(line=>{try{return JSON.parse(line)}catch{return {parseError:true, rawTail:line.slice(-500)}}});}finally{fs.closeSync(fd)}}catch{return []}};
const short=(value)=>String(value||'').slice(0,160);
const compactManifest=(item)=>item?{jobId:item.jobId,status:item.status,specRef:item.specRef,baseUrl:item.baseUrl,targetPath:item.targetPath,pageAuthority:item.pageAuthority,sampling:item.sampling,safety:item.safety,startedAt:item.startedAt,completedAt:item.completedAt}:null;
const compactHeartbeat=(item)=>item?{ok:item.ok,jobId:item.jobId,pid:item.pid,stateDir:item.stateDir,status:item.status,pageId:item.pageId,baseUrl:item.baseUrl,currentUrl:item.currentUrl,sampleSeq:item.sampleSeq,commandSeq:item.commandSeq,activeCommandId:item.activeCommandId,updatedAt:item.updatedAt,uptimeMs:item.uptimeMs}:null;
const compactControl=(item)=>({ts:item.ts,seq:item.seq,phase:item.phase,type:item.type,commandId:item.commandId});
const compactManifest=(item)=>item?{jobId:item.jobId,status:item.status,specRef:item.specRef,baseUrl:item.baseUrl,targetPath:item.targetPath,network:item.network,pageAuthority:item.pageAuthority,sampling:item.sampling,safety:item.safety,startedAt:item.startedAt,completedAt:item.completedAt,error:item.error?{message:short(item.error.message),auth:item.error.auth?{lastRetryLabel:item.error.auth.lastRetryLabel||null,retryExhausted:item.error.auth.retryExhausted===true,lastError:short(item.error.auth.lastError||'')}:null}:null}:null;
const compactHeartbeat=(item)=>item?{ok:item.ok,jobId:item.jobId,pid:item.pid,stateDir:item.stateDir,status:item.status,pageId:item.pageId,baseUrl:item.baseUrl,currentUrl:item.currentUrl,sampleSeq:item.sampleSeq,commandSeq:item.commandSeq,activeCommandId:item.activeCommandId,updatedAt:item.updatedAt,uptimeMs:item.uptimeMs,error:item.error?{message:short(item.error.message),auth:item.error.auth?{lastRetryLabel:item.error.auth.lastRetryLabel||null,retryExhausted:item.error.auth.retryExhausted===true,lastError:short(item.error.auth.lastError||'')}:null}:null}:null;
const retryLabel=(detail)=>detail&&detail.auth?detail.auth.lastRetryLabel||'':detail&&detail.result?detail.result.lastRetryLabel||'':detail&&detail.error&&detail.error.auth?detail.error.auth.lastRetryLabel||'':'';
const detailText=(detail)=>detail&&detail.error?short((detail.error.message||'')+(detail.error.auth&&detail.error.auth.lastError?' '+detail.error.auth.lastError:'')):detail&&detail.result?short([detail.result.statusText,detail.result.retryExhausted?'retry-exhausted':''].filter(Boolean).join(' ')):'';
const compactControl=(item)=>({ts:item.ts,seq:item.seq,phase:item.phase,type:item.type,commandId:item.commandId,durationMs:item.detail&&item.detail.durationMs||null,retry:retryLabel(item.detail),detail:detailText(item.detail)});
const compactSample=(item)=>({seq:item.seq,ts:item.ts,path:item.path,routeSessionId:item.routeSessionId||null,activeSessionId:item.activeSessionId||null,messageCount:Array.isArray(item.messages)?item.messages.length:0,traceRowCount:Array.isArray(item.traceRows)?item.traceRows.length:0});
const compactNetwork=(item)=>({ts:item.ts,type:item.type,method:item.method,url:short(item.url),status:item.status||null,failure:item.failure?short(item.failure):null});
const pidText=fs.existsSync(path.join(dir,'pid'))?fs.readFileSync(path.join(dir,'pid'),'utf8').trim():null;
@@ -262,28 +262,141 @@ async function runControlCommand(command, fn) {
async function authenticate(browserContext) {
const loginUrl = new URL("/auth/login", baseUrl).toString();
const response = await browserContext.request.post(loginUrl, {
data: { username, password },
headers: { accept: "application/json", "content-type": "application/json" },
timeout: 15000,
});
const cookies = await browserContext.cookies(baseUrl);
const cookieNames = cookies.map((cookie) => cookie.name).filter((name) => /session|auth|token/iu.test(name));
return {
ok: response.ok() && cookieNames.length > 0,
const attempts = [];
const maxAttempts = 5;
const initialDelayMs = 250;
const maxDelayMs = 5000;
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
const retryDelayMs = attempt < maxAttempts ? Math.min(maxDelayMs, initialDelayMs * (2 ** (attempt - 1))) : 0;
try {
const response = await browserContext.request.post(loginUrl, {
data: { username, password },
headers: { accept: "application/json", "content-type": "application/json" },
timeout: 12000,
});
const cookieState = await readAuthCookieState(browserContext);
const retryable = isRetryableAuthStatus(response.status());
const item = {
attempt,
retryAttempt: attempt,
retryMaxAttempts: maxAttempts,
retryLabel: attempt + "/" + maxAttempts,
retryDelayMs: retryable && attempt < maxAttempts ? retryDelayMs : 0,
method: "api",
status: response.status(),
statusText: response.statusText(),
retryable,
cookiePresent: cookieState.cookiePresent,
cookieNames: cookieState.cookieNames,
valuesRedacted: true,
};
attempts.push(item);
if (response.ok() && cookieState.cookiePresent) {
return {
ok: true,
method: "api",
loginPath: new URL(loginUrl).pathname,
status: response.status(),
statusText: response.statusText(),
cookiePresent: true,
cookieNames: cookieState.cookieNames,
attempts,
retryCount: attempt - 1,
retryMaxAttempts: maxAttempts,
lastRetryLabel: attempt + "/" + maxAttempts,
retryExhausted: false,
retryable: false,
valuesRedacted: true,
};
}
if (!retryable) break;
} catch (error) {
const retryable = isRetryableAuthError(error);
attempts.push({
attempt,
retryAttempt: attempt,
retryMaxAttempts: maxAttempts,
retryLabel: attempt + "/" + maxAttempts,
retryDelayMs: retryable && attempt < maxAttempts ? retryDelayMs : 0,
method: "api",
status: 0,
statusText: "request-error",
retryable,
error: error && error.message ? truncate(error.message, 500) : truncate(String(error), 500),
cookiePresent: false,
cookieNames: [],
valuesRedacted: true,
});
if (!retryable) break;
}
if (attempt < maxAttempts && attempts[attempts.length - 1]?.retryable === true) await sleep(retryDelayMs);
}
const cookieState = await readAuthCookieState(browserContext);
const last = attempts[attempts.length - 1] || null;
const retryable = attempts.some((attempt) => attempt && attempt.retryable === true);
const failure = {
ok: false,
method: "api",
loginPath: new URL(loginUrl).pathname,
status: response.status(),
statusText: response.statusText(),
cookiePresent: cookieNames.length > 0,
cookieNames,
status: typeof last?.status === "number" ? last.status : 0,
statusText: typeof last?.statusText === "string" ? last.statusText : "api-login-failed",
cookiePresent: cookieState.cookiePresent,
cookieNames: cookieState.cookieNames,
attempts,
retryCount: Math.max(0, attempts.length - 1),
retryMaxAttempts: maxAttempts,
lastRetryLabel: last?.retryLabel || null,
retryExhausted: retryable && attempts.length >= maxAttempts,
retryable,
lastError: last?.error || null,
valuesRedacted: true,
};
const error = new Error(authFailureMessage(failure));
error.webProbeAuth = failure;
throw error;
}
function publicAuth(value) {
if (!value) return null;
return { ok: value.ok === true, method: value.method, status: value.status, cookiePresent: value.cookiePresent === true, cookieNames: value.cookieNames || [], valuesRedacted: true };
return {
ok: value.ok === true,
method: value.method,
status: value.status,
cookiePresent: value.cookiePresent === true,
cookieNames: value.cookieNames || [],
retryCount: value.retryCount ?? null,
retryMaxAttempts: value.retryMaxAttempts ?? null,
lastRetryLabel: value.lastRetryLabel ?? null,
retryExhausted: value.retryExhausted === true,
valuesRedacted: true
};
}
async function readAuthCookieState(browserContext) {
const cookies = await browserContext.cookies(baseUrl);
const cookieNames = cookies.map((cookie) => cookie.name).sort();
return {
cookiePresent: cookieNames.includes("hwlab_session") || cookieNames.some((name) => /session|auth|token/iu.test(name)),
cookieNames: cookieNames.filter((name) => /session|auth|token/iu.test(name)),
};
}
function isRetryableAuthStatus(status) {
return status === 0 || status === 408 || status === 409 || status === 425 || status === 429 || status >= 500;
}
function isRetryableAuthError(error) {
const message = error && error.message ? String(error.message) : String(error || "");
return /EAI_AGAIN|ETIMEDOUT|ECONNRESET|ECONNREFUSED|ECONNABORTED|socket hang up|ERR_NETWORK_CHANGED|fetch failed|network|timeout/iu.test(message);
}
function authFailureMessage(failure) {
const last = Array.isArray(failure.attempts) && failure.attempts.length > 0 ? failure.attempts[failure.attempts.length - 1] : null;
const retry = failure.lastRetryLabel ? " retry=" + failure.lastRetryLabel : "";
const exhausted = failure.retryExhausted ? " exhausted=true" : "";
const status = last ? " status=" + (last.status ?? "-") + " " + (last.statusText ?? "") : "";
const error = last?.error ? " error=" + truncate(last.error, 160) : "";
return ("auth login failed:" + retry + exhausted + status + error).trim();
}
function proxyConfigFromEnv(targetBaseUrl) {
@@ -1095,7 +1208,9 @@ function sanitize(value) {
}
function errorSummary(error) {
return { name: error && error.name ? error.name : "Error", message: error && error.message ? truncate(error.message, 1000) : truncate(String(error), 1000), stackTail: error && error.stack ? truncate(error.stack, 2000) : null };
const summary = { name: error && error.name ? error.name : "Error", message: error && error.message ? truncate(error.message, 1000) : truncate(String(error), 1000), stackTail: error && error.stack ? truncate(error.stack, 2000) : null };
if (error && error.webProbeAuth) summary.auth = sanitize(error.webProbeAuth);
return summary;
}
function sleep(ms) {
@@ -1133,6 +1248,16 @@ const pageProvenance = buildPageProvenanceReport(samples, control, manifest);
const pagePerformance = buildPagePerformanceReport(samples, manifest);
const promptNetwork = buildPromptNetworkReport(control, network);
const runtimeAlerts = buildRuntimeAlerts(samples, control, network, consoleEvents, errors);
const runnerErrors = errors.slice(-8).map((item) => ({
ts: item.ts ?? null,
type: item.type ?? null,
commandId: item.commandId ?? null,
sampleSeq: item.sampleSeq ?? null,
message: limitText(item.error?.message ?? item.message ?? "", 240),
retry: item.error?.auth?.lastRetryLabel ?? null,
retryExhausted: item.error?.auth?.retryExhausted === true,
lastError: limitText(item.error?.auth?.lastError ?? "", 160),
}));
const findings = buildFindings(samples, control, network, errors, sampleMetrics, promptNetwork, runtimeAlerts, pagePerformance, pageProvenance);
if (jsonlReadIssues.length > 0) findings.unshift({ id: "jsonl-read-issues", severity: "red", summary: "observer analyzer hit JSONL read/parse issues", count: jsonlReadIssues.length, issues: jsonlReadIssues.slice(0, 20) });
const recentWindow = buildRecentAnalysisWindow({ samples, control, network, consoleEvents, errors, manifest });
@@ -1152,6 +1277,7 @@ const report = {
pagePerformance,
promptNetwork,
runtimeAlerts,
runnerErrors,
findings,
windows: { recent: recentWindow },
readIssues: jsonlReadIssues,
@@ -1189,6 +1315,7 @@ console.log(JSON.stringify({
pagePerformance: recentWindow.pagePerformance.summary,
promptNetwork: recentWindow.promptNetwork.summary,
runtimeAlerts: recentWindow.runtimeAlerts.summary,
runnerErrors,
httpErrorGroups: recentWindow.runtimeAlerts.networkHttpErrorsByPath.slice(0, 8).map((item) => ({
method: item.method ?? null,
status: item.status ?? null,