fix: suppress routine ssh timing noise
This commit is contained in:
@@ -133,7 +133,7 @@ core 只允许声明了 `host.ssh` capability 的 provider 使用 `ssh` 透传
|
||||
|
||||
本地 broker 默认等待 provider SSH 会话打开 60000ms,以便在目标节点同时有较多 microservice.http 任务时仍能建立维护会话;需要诊断慢连接时可用 `UNIDESK_SSH_OPEN_TIMEOUT_MS=<ms>` 临时调大,但最小有效值固定为 15000ms,避免把真实离线误判为长时间阻塞。
|
||||
|
||||
ssh-like 远端命令如果出现 `kex_exchange_identification`、`Connection closed by remote host`、provider session timeout 或 exit code 255,CLI 会在原始 stderr 后追加一行 `UNIDESK_SSH_HINT { ... }`。该 JSON 不回显原始远端命令,只包含 `code=ssh-like-command-friction`、`trigger`、`try` 和 `triage`;`try` 固定指向 stdin script 形态,避免把一次 ssh-like 解析/握手摩擦误读成 D601 SSH 整体不可用。每次 `ssh`/`tran` 运行结束还会在 stderr 追加一行 `UNIDESK_SSH_TIMING { ... }`,包含 `elapsedMs`、`elapsedSeconds`、`transport`、`invocationKind` 和 `exitCode`;耗时超过默认 10000ms 时 `level=warning`,提示优先排查 provider/session 延迟、远端命令自身耗时、helper bootstrap 或 `tran`/`apply-patch` 工具层回归。阈值可用 `UNIDESK_SSH_SLOW_WARNING_MS=<ms>` 临时调节,提示同样不回显原始远端命令。
|
||||
ssh-like 远端命令如果出现 `kex_exchange_identification`、`Connection closed by remote host`、provider session timeout 或 exit code 255,CLI 会在原始 stderr 后追加一行 `UNIDESK_SSH_HINT { ... }`。该 JSON 不回显原始远端命令,只包含 `code=ssh-like-command-friction`、`trigger`、`try` 和 `triage`;`try` 固定指向 stdin script 形态,避免把一次 ssh-like 解析/握手摩擦误读成 D601 SSH 整体不可用。`ssh`/`tran` 只有在运行耗时超过默认 10000ms 时才会在 stderr 追加一行 `UNIDESK_SSH_TIMING { ... }`,且 `level=warning`;正常短调用不输出 timing 噪声。warning 包含 `elapsedMs`、`elapsedSeconds`、`transport`、`invocationKind` 和 `exitCode`,提示优先排查 provider/session 延迟、远端命令自身耗时、helper bootstrap 或 `tran`/`apply-patch` 工具层回归。阈值可用 `UNIDESK_SSH_SLOW_WARNING_MS=<ms>` 临时调节,提示同样不回显原始远端命令。
|
||||
|
||||
`ssh <providerId>` 只在当前 operation 需要 helper 时才注入 `/tmp/unidesk-ssh-tools`,普通 `argv`、`script`、`kubectl`、`logs` 等路径不得传输无关工具源码。`apply-patch` 只注入 `apply_patch`;`glob` 只注入 `glob`;`skills`/`skill discover` 只注入 `skill-discover`。`apply_patch` 接受标准 `*** Begin Patch` / `*** End Patch` patch 格式,便于通过 SSH 透传编辑远端仓库文件;远端存在 `perl` 时必须走快速精确匹配路径,避免大文件 hunk 被 sh 模式匹配拖成几十秒,缺少 `perl` 时才退回 sh-only 实现。`glob` 和 `skill-discover` 需要远端 `python3`。注入工具只写 `/tmp/unidesk-ssh-tools`,不修改目标仓库。
|
||||
|
||||
|
||||
+1
-1
@@ -178,7 +178,7 @@ export function sshHelp(): unknown {
|
||||
"Do not put operation names in any colon route segment, including nested k3s namespace/workload/container segments.",
|
||||
"Do not use post-provider shorthand such as `ssh G14 k3s ...`; write `ssh G14:k3s ...` so location and operation stay separated.",
|
||||
"If an ssh-like remote command fails with timeout/kex/exit-255 friction, stderr includes one low-noise UNIDESK_SSH_HINT JSON line with the argv retry command.",
|
||||
"Every ssh/tran runtime writes one UNIDESK_SSH_TIMING JSON line to stderr with elapsedMs/elapsedSeconds; operations over 10s are marked level=warning and should be checked for provider latency, remote command cost, helper bootstrap, or tran/apply-patch optimization before repeating high-frequency work.",
|
||||
"Only slow ssh/tran runtime writes UNIDESK_SSH_TIMING JSON to stderr; operations over 10s are marked level=warning and should be checked for provider latency, remote command cost, helper bootstrap, or tran/apply-patch optimization before repeating high-frequency work. Routine short calls do not emit timing noise.",
|
||||
"The local tran wrapper serializes non-interactive calls per provider/plane before opening provider SSH sessions, so parallel Codex file reads do not stampede the provider session allocator; set UNIDESK_TRAN_SESSION_LOCK=0 only for explicit diagnostics.",
|
||||
"Use -- before a remote command that intentionally starts with a dash.",
|
||||
],
|
||||
|
||||
@@ -968,12 +968,13 @@ async function runRemoteSshWebSocket(
|
||||
restore();
|
||||
const hint = sshFailureHint(invocation.providerId, parsed, code, "");
|
||||
if (hint !== null) process.stderr.write(formatSshFailureHint(hint));
|
||||
process.stderr.write(formatSshRuntimeTimingHint(sshRuntimeTimingHint({
|
||||
const timingHint = formatSshRuntimeTimingHint(sshRuntimeTimingHint({
|
||||
invocation,
|
||||
transport: "frontend-websocket",
|
||||
exitCode: code,
|
||||
startedAtMs,
|
||||
})));
|
||||
}));
|
||||
if (timingHint) process.stderr.write(timingHint);
|
||||
resolve(code);
|
||||
};
|
||||
const onStdinData = (chunk: Buffer): void => {
|
||||
|
||||
+4
-2
@@ -1626,6 +1626,7 @@ export function sshRuntimeTimingHint(options: {
|
||||
}
|
||||
|
||||
export function formatSshRuntimeTimingHint(hint: SshRuntimeTimingHint): string {
|
||||
if (!hint.slow) return "";
|
||||
return `UNIDESK_SSH_TIMING ${JSON.stringify(hint)}\n`;
|
||||
}
|
||||
|
||||
@@ -1836,12 +1837,13 @@ export async function runSsh(config: UniDeskConfig, providerId: string, args: st
|
||||
restore();
|
||||
const hint = sshFailureHint(invocation.providerId, parsed, exitCode, stderrTail);
|
||||
if (hint !== null) process.stderr.write(formatSshFailureHint(hint));
|
||||
process.stderr.write(formatSshRuntimeTimingHint(sshRuntimeTimingHint({
|
||||
const timingHint = formatSshRuntimeTimingHint(sshRuntimeTimingHint({
|
||||
invocation,
|
||||
transport: "backend-core-broker",
|
||||
exitCode,
|
||||
startedAtMs,
|
||||
})));
|
||||
}));
|
||||
if (timingHint) process.stderr.write(timingHint);
|
||||
resolve(exitCode);
|
||||
};
|
||||
child.on("error", (error) => {
|
||||
|
||||
@@ -338,8 +338,9 @@ export function runSshArgvGuidanceContract(): JsonRecord {
|
||||
finishedAtMs: 5200,
|
||||
thresholdMs: 10_000,
|
||||
});
|
||||
assertCondition(timingInfo.level === "info" && timingInfo.slow === false, "short ssh operation should emit an info timing hint", timingInfo);
|
||||
assertCondition(timingInfo.level === "info" && timingInfo.slow === false, "short ssh operation should stay below the timing warning threshold", timingInfo);
|
||||
assertCondition(timingInfo.elapsedMs === 4200 && timingInfo.elapsedSeconds === 4.2, "timing hint must include elapsed ms and seconds", timingInfo);
|
||||
assertCondition(formatSshRuntimeTimingHint(timingInfo) === "", "short ssh operation must not write routine timing noise to stderr", timingInfo);
|
||||
const slowTiming = sshRuntimeTimingHint({
|
||||
invocation: parseSshInvocation("D601", ["apply-patch"]),
|
||||
transport: "frontend-websocket",
|
||||
@@ -371,7 +372,7 @@ export function runSshArgvGuidanceContract(): JsonRecord {
|
||||
assertCondition(helpText.includes("apply-patch [--allow-loose]") && helpText.includes("low-context update hunks"), "ssh help must document apply-patch loose-context guard", helpText);
|
||||
assertCondition(helpText.includes("ssh D601:k3s:hwlab-dev:hwlab-cloud-api script <<'SCRIPT'"), "ssh help must document k3s script operation", helpText);
|
||||
assertCondition(helpText.includes("UNIDESK_SSH_HINT"), "ssh help must document structured failure hint", helpText);
|
||||
assertCondition(helpText.includes("UNIDESK_SSH_TIMING") && helpText.includes("10s"), "ssh help must document runtime timing hints", helpText);
|
||||
assertCondition(helpText.includes("UNIDESK_SSH_TIMING") && helpText.includes("10s") && helpText.includes("Routine short calls do not emit timing noise"), "ssh help must document slow-only runtime timing hints", helpText);
|
||||
assertCondition(helpText.includes("UNIDESK_TRAN_SESSION_LOCK=0") && helpText.includes("provider session allocator"), "ssh help must document tran provider session serialization", helpText);
|
||||
|
||||
const crossChecks = providerTriageRecommendedCrossChecks("D601");
|
||||
@@ -438,7 +439,7 @@ export function runSshArgvGuidanceContract(): JsonRecord {
|
||||
"k3s route stays location-only while operations fix native kubeconfig and assemble kubectl exec as argv",
|
||||
"top-level remote option parsing preserves command-local -- separators for script -- sed -n style commands",
|
||||
"ssh-like timeout/kex failures emit one structured argv retry hint",
|
||||
"ssh runtime emits one structured timing hint on stderr and marks operations over 10 seconds as warnings",
|
||||
"ssh runtime emits structured timing only for slow operations over 10 seconds",
|
||||
"help text documents stdin script passthrough and UNIDESK_SSH_HINT",
|
||||
"provider triage recommendedCrossChecks keeps ssh D601 argv true",
|
||||
"remote frontend ssh uses the same structured route parser for host, k3s and pod argv routes",
|
||||
|
||||
Reference in New Issue
Block a user