fix: add opencode smoke observability probes

2026-06-30 10:44:28 +00:00
parent f9d7d68522
commit 2d1f96650e
8 changed files with 389 additions and 8 deletions
@@ -45,6 +45,7 @@ export function hwlabNodeWebProbeHelp(): Record<string, unknown> {
    examples: [
      "bun scripts/cli.ts web-probe run --node D601 --lane v03 --wait-messages-ms 1000",
      "bun scripts/cli.ts web-probe run --node D601 --lane v03 --fresh-session --message 'ping'",
+      "bun scripts/cli.ts web-probe opencode-smoke --node D601 --lane v03 --message 'hi'",
      "bun scripts/cli.ts web-probe script --node D601 --lane v03 --script-file .state/probes/workbench.mjs",
      "bun scripts/cli.ts web-probe screenshot --node D601 --lane v03 --url https://monitor.pikapython.com --viewport 1440x900",
      "bun scripts/cli.ts web-probe screenshot --node D601 --lane v03 --url https://monitor.pikapython.com --viewport 390x844 --name monitor-mobile.png",
@@ -72,6 +73,7 @@ export function hwlabNodeWebProbeHelp(): Record<string, unknown> {
    ],
    actions: {
      run: "Run the repo-owned scripts/web-live-dom-probe.mjs helper.",
+      "opencode-smoke": "Run the repo-owned OpenCode iframe/direct-host composer smoke and require DOM assistant text plus EventSource update/finish/idle evidence.",
      script: "Run caller-provided Playwright JS after CLI-managed /auth/login; scripts must not handle secrets themselves.",
      screenshot: "Capture a no-auth or public page through the selected node/lane remote browser and download PNG artifacts to the caller /tmp by default.",
      observe: "Start, inspect, control, stop, collect, and analyze a long-running observer that writes JSONL artifacts.",
@@ -80,6 +82,7 @@ export function hwlabNodeWebProbeHelp(): Record<string, unknown> {
    notes: [
      "Default URL, browser proxy mode, observe/analyze thresholds, and project-management command allowlist come from config/hwlab-node-lanes.yaml webProbe.",
      "`web-probe script` is an ad-hoc exploration escape hatch; repeated/high-frequency workflows must become `web-probe observe command` types or repo-owned web-probe commands.",
+      "`web-probe opencode-smoke` is the repo-owned OpenCode smoke; prefer it over repeating one-off OpenCode Playwright snippets.",
      "observe is passive by default; user actions must be explicit observe command entries in control.jsonl.",
      "After observe start, prefer observe status|command|stop|collect|analyze <id> instead of repeating --node/--lane/--state-dir.",
      "collect views render bounded summaries from existing artifacts and do not create a second source of truth.",
@@ -86,8 +86,12 @@ export interface NodeWebProbeScriptOptions {
  browserProxyMode: WebProbeBrowserProxyMode;
  commandTimeoutSeconds: number;
  scriptText: string;
+  commandLabel?: string;
+  suppressAdHocWarning?: boolean;
+  generatedHints?: string[];
+  generatedPreferredCommands?: Record<string, string>;
  scriptSource: {
-    kind: "stdin" | "file";
+    kind: "stdin" | "file" | "generated";
    path: string | null;
    byteCount: number;
    sha256: string;
@@ -469,6 +469,7 @@ export function runNodeWebProbeScript(
  material: BootstrapAdminPasswordMaterial,
  credential: Record<string, unknown>,
 ): Record<string, unknown> {
+  const commandLabel = options.commandLabel ?? `web-probe script --node ${options.node} --lane ${options.lane}`;
  const webProbeProxy = nodeWebProbeHostProxyEnv(spec, options.browserProxyMode);
  const script = nodeWebProbeScriptRemoteShell(options, secretSpec, material.username ?? secretSpec.bootstrapAdminUsername, material.password ?? "", webProbeProxy, spec.webProbe?.playwrightBrowsersPath);
  const result = runTransWorkspaceStdinScript(options.node, spec.workspace, script, options.commandTimeoutSeconds);
@@ -545,7 +546,7 @@ export function runNodeWebProbeScript(
  return renderWebProbeScriptResult({
    ok: passed,
    status: passed ? "pass" : "blocked",
-    command: `web-probe script --node ${options.node} --lane ${options.lane}`,
+    command: commandLabel,
    node: options.node,
    lane: options.lane,
    workspace: spec.workspace,
@@ -579,6 +580,7 @@ export function runNodeWebProbeScript(
 }

 function webProbeScriptGovernanceWarnings(options: NodeWebProbeScriptOptions): Record<string, unknown>[] {
+  if (options.suppressAdHocWarning === true) return [];
  return [{
    code: "web_probe_script_ad_hoc_only",
    severity: "warning",
@@ -590,6 +592,7 @@ function webProbeScriptGovernanceWarnings(options: NodeWebProbeScriptOptions): R
 }

 function webProbeScriptGovernanceHints(options: NodeWebProbeScriptOptions): string[] {
+  if (options.generatedHints !== undefined) return options.generatedHints;
  return [
    "Prefer `web-probe observe start` plus `web-probe observe command` for interactive flows; use `observe collect/analyze` for repeated evidence reads.",
    "If the same script is needed more than once, add or extend a reusable command type in the web-probe observe command surface.",
@@ -598,6 +601,7 @@ function webProbeScriptGovernanceHints(options: NodeWebProbeScriptOptions): stri
 }

 function webProbeScriptPreferredCommands(options: NodeWebProbeScriptOptions): Record<string, string> {
+  if (options.generatedPreferredCommands !== undefined) return options.generatedPreferredCommands;
  return {
    startObserver: `bun scripts/cli.ts web-probe observe start --node ${options.node} --lane ${options.lane} --target-path /projects/mdtodo`,
    mdtodoSummary: "bun scripts/cli.ts web-probe observe collect <observerId> --view project-mdtodo-summary",
@@ -564,7 +564,9 @@ export function runNodeWebProbe(options: NodeWebProbeOptions): Record<string, un
    return {
      ok: false,
      status: "blocked",
-      command: options.action === "observe"
+      command: options.action === "script" && typeof options.commandLabel === "string"
+        ? options.commandLabel
+        : options.action === "observe"
        ? `web-probe observe ${options.observeAction} --node ${options.node} --lane ${options.lane}`
        : `web-probe ${options.action} --node ${options.node} --lane ${options.lane}`,
      node: options.node,
@@ -321,7 +321,7 @@ export function searchScript(observability: ObservabilityConfig, target: Observa
  const proxyPrefix = `/api/v1/namespaces/${target.namespace}/services/http:${observability.traceBackend.serviceName}:http/proxy`;
  const searchProxyPath = `${proxyPrefix}${searchPath}`;
  const grepLiteral = options.grep === null ? "None" : JSON.stringify(options.grep);
-  const effectiveQuery = inferSearchTempoQuery(options);
+  const effectiveQuery = options.query ?? inferSearchTempoQuery(options);
  const queryLiteral = effectiveQuery === null ? "None" : JSON.stringify(effectiveQuery);
  const pathLiteral = options.path === null ? "None" : JSON.stringify(options.path);
  const statusLiteral = options.status === null ? "None" : String(options.status);
@@ -383,6 +383,16 @@ IMPORTANT_ATTRS = [
    "returnedMessages", "totalMessages", "roleSequencePrefix",
    "consecutiveUserPrefix", "adjacentSameRoleCount", "userCount",
    "agentCount",
+    "opencode.proxy.phase", "opencode.proxy.streaming",
+    "opencode.proxy.ticket_accepted", "opencode.proxy.sse.directory_rewrite_enabled",
+    "opencode.proxy.sse.directory_rewrite_from",
+    "opencode.proxy.sse.directory_rewrite_to",
+    "opencode.provider.sse.content_chunks",
+    "opencode.provider.sse.content_chars",
+    "opencode.provider.sse.output_data_lines",
+    "opencode.provider.sse.done_lines",
+    "opencode.provider.sse.json_errors",
+    "opencode.provider.sse.reasoning_only_choices_dropped",
    "http.target", "http.url", "url.path",
    "db.system", "db.operation.name", "db.sql.table", "db.query.arg_count",
    "db.index.expected", "db.pool.max_open", "db.pool.open_connections",
@@ -542,8 +552,57 @@ def compact_span(span, service, resource_attrs, scope_name):
 def grep_matches_text(text):
    return GREP is not None and GREP.lower() in text.lower()

+def parse_grep_key_value():
+    if GREP is None:
+        return None, None
+    match = re.match(r"^([A-Za-z0-9_.-]+)=(.+)$", GREP)
+    if not match:
+        return None, None
+    key = match.group(1)
+    if key.startswith("span."):
+        key = key[5:]
+    raw = match.group(2).strip()
+    if len(raw) >= 2 and ((raw[0] == raw[-1] == '"') or (raw[0] == raw[-1] == "'")):
+        raw = raw[1:-1]
+    lowered = raw.lower()
+    if lowered == "true":
+        return key, True
+    if lowered == "false":
+        return key, False
+    if re.match(r"^-?(?:0|[1-9][0-9]*)$", raw):
+        try:
+            return key, int(raw)
+        except Exception:
+            pass
+    if re.match(r"^-?(?:0|[1-9][0-9]*)[.][0-9]+$", raw):
+        try:
+            return key, float(raw)
+        except Exception:
+            pass
+    return key, raw
+
+def values_equal_for_grep(actual, expected):
+    if actual == expected:
+        return True
+    if isinstance(actual, bool) or isinstance(expected, bool):
+        return str(actual).lower() == str(expected).lower()
+    if isinstance(actual, (int, float)) and isinstance(expected, (int, float)):
+        return float(actual) == float(expected)
+    return str(actual) == str(expected)
+
 def grep_matches_item(item):
-    return GREP is not None and grep_matches_text(json.dumps(item, ensure_ascii=False, sort_keys=True))
+    if GREP is None:
+        return False
+    key, expected = parse_grep_key_value()
+    if key is not None:
+        if key == "name" and values_equal_for_grep(item.get("name"), expected):
+            return True
+        if key.startswith("resource.") and values_equal_for_grep(item.get(key.replace("resource.", "", 1)), expected):
+            return True
+        attrs = item.get("attributes", {}) if isinstance(item.get("attributes"), dict) else {}
+        if key in attrs and values_equal_for_grep(attrs.get(key), expected):
+            return True
+    return grep_matches_text(json.dumps(item, ensure_ascii=False, sort_keys=True))

 def span_matches_filters(item):
    attrs = item.get("attributes", {}) if isinstance(item.get("attributes"), dict) else {}
@@ -641,7 +700,10 @@ def trace_summary(trace_id, meta, body, rc, stderr):
            for span in raw_spans:
                if not isinstance(span, dict):
                    continue
+                raw_attrs = attrs_to_dict(span.get("attributes"))
                item = compact_span(span, service, resource_attrs, scope_name)
+                match_item = dict(item)
+                match_item["attributes"] = raw_attrs
                attrs = item.get("attributes", {})
                if isinstance(attrs, dict) and isinstance(attrs.get("traceId"), str):
                    business_trace_ids.add(attrs.get("traceId"))
@@ -650,7 +712,7 @@ def trace_summary(trace_id, meta, body, rc, stderr):
                spans.append(item)
                if is_error_span(span, attrs if isinstance(attrs, dict) else {}):
                    error_spans.append(item)
-                if span_matches_filters(item) and (GREP is None or grep_matches_item(item)):
+                if span_matches_filters(match_item) and (GREP is None or grep_matches_item(match_item)):
                    matched_spans.append(item)
    return {
        "traceId": trace_id,
@@ -728,6 +790,8 @@ payload = {
    "tempoQuery": QUERY,
    "pathFilter": PATH_FILTER,
    "statusFilter": STATUS_FILTER,
+    "grepCoverage": None if GREP is None else "raw trace body, span name, status message, route and full span attributes inside scanned candidate traces",
+    "grepQueryInference": "tempo-query-present" if QUERY is not None and GREP is not None else None,
    "businessTraceSearch": BUSINESS_TRACE_GREP,
    "limit": LIMIT,
    "candidateLimit": CANDIDATE_LIMIT,
@@ -39,6 +39,9 @@ export function observabilityHelp(): Record<string, unknown> {
      "bun scripts/cli.ts platform-infra observability validate --target D518 [--full|--raw]",
      "bun scripts/cli.ts platform-infra observability trace --target D518 --trace-id <traceId> [--grep provider-stream-disconnected] [--limit 40] [--full|--raw]",
      "bun scripts/cli.ts platform-infra observability search --target D518 --grep 'no rollout found' [--lookback-minutes 360] [--candidate-limit 80] [--limit 20] [--full|--raw]",
+      "bun scripts/cli.ts platform-infra observability search --target JD01 --grep opencode.proxy.stream.start --lookback-minutes 30 --limit 20",
+      "bun scripts/cli.ts platform-infra observability search --target JD01 --grep opencode.proxy.sse.directory_rewrite_enabled --lookback-minutes 30 --limit 20",
+      "bun scripts/cli.ts platform-infra observability search --target JD01 --grep opencode-provider-proxy --lookback-minutes 30 --limit 20",
      "bun scripts/cli.ts platform-infra observability search --target D518 --path /v1/workbench/sessions --status 502 [--lookback-minutes 120] [--full|--raw]",
      "bun scripts/cli.ts platform-infra observability diagnose-code-agent --target D518 --business-trace-id <trc_...> [--full|--raw]",
      "bun scripts/cli.ts platform-infra observability diagnose-code-agent --target D518 --run-id <run_...> [--command-id <cmd_...>] [--session-id <ses_...>] [--runner-job-id <rjob_...>] [--full|--raw]",
@@ -104,6 +104,8 @@ export async function search(config: UniDeskConfig, options: SearchOptions): Pro
    endAt: new Date(endSeconds * 1000).toISOString(),
    businessTraceId,
    mode: businessTraceId === null ? "candidate-grep" : "business-trace-exact",
+    grepQueryInference: options.query === null && businessTraceId === null && effectiveTempoQuery !== null ? "inferred-from-grep-or-filters" : null,
+    grepCoverage: options.grep === null ? null : "candidate traces are fetched by tempoQuery, then each scanned trace is matched against raw trace body, span name, status message, route and full span attributes",
    candidateLimit: options.candidateLimit,
    limit: options.limit,
  };
@@ -143,6 +145,9 @@ function compactSearchFullResult(value: unknown): Record<string, unknown> {
    tempoQuery: source.tempoQuery ?? null,
    pathFilter: source.pathFilter ?? null,
    statusFilter: source.statusFilter ?? null,
+    matchingActive: source.matchingActive ?? null,
+    grepCoverage: source.grepCoverage ?? null,
+    grepQueryInference: source.grepQueryInference ?? null,
    limit: source.limit ?? null,
    candidateLimit: source.candidateLimit ?? null,
    searchParseOk: source.searchParseOk ?? null,
@@ -196,7 +201,57 @@ export function inferSearchTempoQuery(options: SearchOptions): string | null {
  const filters: string[] = [];
  if (options.path !== null) filters.push(`.http.route = ${JSON.stringify(options.path)}`);
  if (options.status !== null) filters.push(`.http.response.status_code = ${options.status}`);
-  return filters.length > 0 ? `{ ${filters.join(" && ")} }` : null;
+  if (filters.length > 0) return `{ ${filters.join(" && ")} }`;
+  const grep = options.grep?.trim() ?? "";
+  if (!grep) return null;
+  const keyValue = grep.match(/^([A-Za-z0-9_.-]+)=(.+)$/u);
+  if (keyValue !== null) {
+    const key = keyValue[1] ?? "";
+    const value = (keyValue[2] ?? "").trim();
+    const traceQlKey = key === "name" || key.startsWith("resource.") ? key : `.${key.replace(/^span[.]/u, "")}`;
+    if (/^(?:name|resource[.][A-Za-z0-9_.-]+|[.][A-Za-z0-9_.-]+)$/u.test(traceQlKey) && value.length > 0 && value.length <= 200) return `{ ${traceQlKey} = ${traceQlLiteral(value)} }`;
+  }
+  if (grep.startsWith("/") && !grep.includes("\n") && grep.length <= 300) return `{ .http.route = ${JSON.stringify(grep)} }`;
+  if (/^[A-Za-z0-9][A-Za-z0-9_.:-]{0,180}$/u.test(grep)) {
+    if (isKnownTraceAttributeKey(grep)) return `{ .${grep} != nil }`;
+    if (grep.includes(".") || grep.includes(":")) return `{ name = ${JSON.stringify(grep)} }`;
+    if (/^(?:hwlab|agentrun|opencode|platform|code-queue|unidesk)[A-Za-z0-9-]*$/u.test(grep) || /-(?:api|web|proxy|runner|manager|service|mgr|collector|tempo)$/u.test(grep)) return `{ resource.service.name = ${JSON.stringify(grep)} }`;
+  }
+  return null;
+}
+
+const knownTraceAttributeKeys = new Set([
+  "opencode.proxy.phase",
+  "opencode.proxy.streaming",
+  "opencode.proxy.ticket_accepted",
+  "opencode.proxy.sse.directory_rewrite_enabled",
+  "opencode.proxy.sse.directory_rewrite_from",
+  "opencode.proxy.sse.directory_rewrite_to",
+  "opencode.provider.sse.content_chunks",
+  "opencode.provider.sse.content_chars",
+  "opencode.provider.sse.output_data_lines",
+  "opencode.provider.sse.done_lines",
+  "opencode.provider.sse.json_errors",
+  "opencode.provider.sse.reasoning_only_choices_dropped",
+]);
+
+function isKnownTraceAttributeKey(value: string): boolean {
+  return knownTraceAttributeKeys.has(value) || value.startsWith("opencode.proxy.sse.") || value.startsWith("opencode.provider.sse.");
+}
+
+function traceQlLiteral(value: string): string {
+  const unquoted = stripOuterQuotes(value.trim());
+  if (/^(?:true|false)$/iu.test(unquoted)) return unquoted.toLowerCase();
+  if (/^-?(?:0|[1-9][0-9]*)(?:[.][0-9]+)?$/u.test(unquoted)) return unquoted;
+  if (unquoted === "nil") return "nil";
+  return JSON.stringify(unquoted);
+}
+
+function stripOuterQuotes(value: string): string {
+  if (value.length >= 2 && ((value.startsWith("\"") && value.endsWith("\"")) || (value.startsWith("'") && value.endsWith("'")))) {
+    return value.slice(1, -1);
+  }
+  return value;
 }

 export function businessTraceIdFromSearchText(value: string | null): string | null {
@@ -584,12 +639,20 @@ export function renderSearchTable(input: {
    `  candidateLimit=${textValue(input.query.candidateLimit)} limit=${textValue(input.query.limit)}`,
    `  candidates=${textValue(input.result.candidateTraceCount)} scanned=${textValue(input.result.scannedTraceCount)} matched=${textValue(input.result.matchedTraceCount)} stopped=${textValue(input.result.scanStopped)}`,
  ];
+  if (input.query.grep !== null) {
+    lines.push(`  grepCoverage=${textValue(input.query.grepCoverage)}`);
+    if (input.query.grepQueryInference !== null) lines.push(`  grepQueryInference=${textValue(input.query.grepQueryInference)}`);
+  }
  const firstTraceId = traces.length > 0 ? textValue(traces[0].traceId) : "";
  lines.push("", "Next:");
  if (firstTraceId.length > 0 && firstTraceId !== "-") {
    lines.push(`  bun scripts/cli.ts platform-infra observability trace --target ${input.target.id} --trace-id ${firstTraceId}`);
  }
  lines.push(`  ${buildSearchCommand(input.target, input.options, true)}`);
+  if (input.query.grep !== null && Number(input.result.matchedTraceCount ?? 0) === 0) {
+    lines.push(`  explicit TraceQL: bun scripts/cli.ts platform-infra observability search --target ${input.target.id} --query '{ resource.service.name = "<service>" }' --lookback-minutes ${textValue(input.query.lookbackMinutes)} --candidate-limit ${textValue(input.query.candidateLimit)} --limit ${textValue(input.query.limit)}`);
+    lines.push(`  widen candidates: bun scripts/cli.ts platform-infra observability search --target ${input.target.id} --grep ${JSON.stringify(String(input.query.grep))} --lookback-minutes ${textValue(input.query.lookbackMinutes)} --candidate-limit 300 --limit ${textValue(input.query.limit)}`);
+  }
  lines.push("", "Disclosure:");
  lines.push("  default view is a bounded table; use --full for structured diagnosis JSON or trace --trace-id for one trace.");
  return {