fix: report actual memory without cache

This commit is contained in:
Codex
2026-05-04 14:52:31 +00:00
parent a16fedd9e4
commit 3207b9ecb1
6 changed files with 27 additions and 14 deletions
+1 -1
View File
@@ -54,7 +54,7 @@
## T13 资源节点任务管理器曲线
阅读 `AGENTS.md`(本项目 `AGENTS.md` 同时承担 `SKILL.md``scripts/cli.ts` 的解释职责),然后用 cli 手动测试以下内容:运行 `bun scripts/cli.ts e2e run`,确认 `provider:system-status``frontend:system-monitor-visible` passed;再用浏览器登录 frontend,进入左侧 `资源节点` 和顶部 `资源监控` 子标签,确认可以像 Windows 任务管理器一样看到 CPU、Memory、Disk 当前用量和历史曲线,并能执行 `Provider Gateway 升级``预检升级`
阅读 `AGENTS.md`(本项目 `AGENTS.md` 同时承担 `SKILL.md``scripts/cli.ts` 的解释职责),然后用 cli 手动测试以下内容:运行 `bun scripts/cli.ts e2e run`,确认 `provider:system-status``frontend:system-monitor-visible` passed;再用浏览器登录 frontend,进入左侧 `资源节点` 和顶部 `资源监控` 子标签,确认可以像 Windows 任务管理器一样看到 CPU、Memory、Disk 当前用量和历史曲线,Memory 明确显示为不含 Linux page cache / buffer 的实际内存占用,并能执行 `Provider Gateway 升级``预检升级`
## T14 Provider Gateway 远程升级预检
+1 -1
View File
@@ -12,7 +12,7 @@ frontend 应用源码必须使用 TypeScript + React,禁止在 `src/components
## Resource Node Monitor View
资源节点模块必须提供 `资源监控` 子标签,用类似 Windows 任务管理器的性能页展示每个 provider 节点的 CPU、内存和硬盘用量历史曲线。该页面应包含节点切换、当前用量摘要、CPU/Memory/Disk 三条曲线、采样说明和 `Provider Gateway 升级` 控制区;曲线数据来自 backend-core 的 `/api/nodes/system-status`,不得在页面默认展示原始 JSON。
资源节点模块必须提供 `资源监控` 子标签,用类似 Windows 任务管理器的性能页展示每个 provider 节点的 CPU、内存和硬盘用量历史曲线。该页面应包含节点切换、当前用量摘要、CPU/Memory/Disk 三条曲线、采样说明和 `Provider Gateway 升级` 控制区;曲线数据来自 backend-core 的 `/api/nodes/system-status`,不得在页面默认展示原始 JSON。内存曲线必须使用实际内存口径,不把 Linux page cache / buffer 计入占用。
## Resource Node Docker View
+1 -1
View File
@@ -20,7 +20,7 @@ provider-gateway 连接成功后必须周期性上报 Docker daemon 状态,数
## System Status Telemetry
provider-gateway 连接成功后必须周期性上报节点 CPU、内存和硬盘用量。采集来源是节点本地 `/proc/stat``/proc/loadavg``/proc/meminfo``df -PB1`backend-core 将最新快照保存到 `unidesk_node_system_status`,并将历史采样保存到 `unidesk_node_metric_samples` 供 frontend 绘制任务管理器风格曲线。该链路仍然由 provider 主动上报,主 server 不反向探测计算节点。
provider-gateway 连接成功后必须周期性上报节点 CPU、内存和硬盘用量。采集来源是节点本地 `/proc/stat``/proc/loadavg``/proc/meminfo``df -PB1`backend-core 将最新快照保存到 `unidesk_node_system_status`,并将历史采样保存到 `unidesk_node_metric_samples` 供 frontend 绘制任务管理器风格曲线。内存使用量采用实际占用口径:`MemTotal - MemFree - Buffers - Cached - SReclaimable + Shmem`,也就是不把 Linux page cache / buffer 计入占用;上报中同时保留 `cacheBytes` 便于排查。该链路仍然由 provider 主动上报,主 server 不反向探测计算节点。
## Remote Provider Upgrade
+4 -4
View File
@@ -210,13 +210,13 @@ async function serviceChecks(config: UniDeskConfig, urls: PublicUrls, checks: E2
const providerIngress = await fetchProbe(urls.providerIngressHealthUrl);
const overviewBody = (coreOverview as { body?: { ok?: boolean; dbReady?: boolean; onlineNodeCount?: number } }).body;
const nodeList = (coreNodes as { body?: { nodes?: Array<{ providerId?: string; status?: string }> } }).body?.nodes ?? [];
const systemStatuses = (systemStatus as { body?: { systemStatuses?: Array<{ providerId?: string; current?: { cpu?: { percent?: number }; memory?: { percent?: number }; disk?: { percent?: number } }; history?: unknown[] }> } }).body?.systemStatuses ?? [];
const systemStatuses = (systemStatus as { body?: { systemStatuses?: Array<{ providerId?: string; current?: { cpu?: { percent?: number }; memory?: { percent?: number; mode?: string; cacheBytes?: number }; disk?: { percent?: number } }; history?: unknown[] }> } }).body?.systemStatuses ?? [];
const mainSystem = systemStatuses.find((item) => item.providerId === config.providerGateway.id);
const dockerStatuses = (dockerStatus as { body?: { dockerStatuses?: Array<{ providerId?: string; dockerStatus?: { counts?: { containers?: number }; containers?: unknown[] } }> } }).body?.dockerStatuses ?? [];
const mainDocker = dockerStatuses.find((item) => item.providerId === config.providerGateway.id);
addCheck(checks, "core:internal-overview", (coreOverview as { ok?: boolean }).ok === true && overviewBody?.ok === true && overviewBody.dbReady === true && (overviewBody.onlineNodeCount ?? 0) >= 1, coreOverview);
addCheck(checks, "provider:self-node-online", nodeList.some((node) => node.providerId === config.providerGateway.id && node.status === "online"), coreNodes);
addCheck(checks, "provider:system-status", (systemStatus as { ok?: boolean }).ok === true && mainSystem?.current !== undefined && Number.isFinite(mainSystem.current.cpu?.percent) && Number.isFinite(mainSystem.current.memory?.percent) && Number.isFinite(mainSystem.current.disk?.percent) && (mainSystem.history?.length ?? 0) > 0, systemStatusCheckDetail(systemStatus, config.providerGateway.id));
addCheck(checks, "provider:system-status", (systemStatus as { ok?: boolean }).ok === true && mainSystem?.current !== undefined && Number.isFinite(mainSystem.current.cpu?.percent) && Number.isFinite(mainSystem.current.memory?.percent) && mainSystem.current.memory?.mode === "actual_without_cache" && Number.isFinite(mainSystem.current.memory?.cacheBytes) && Number.isFinite(mainSystem.current.disk?.percent) && (mainSystem.history?.length ?? 0) > 0, systemStatusCheckDetail(systemStatus, config.providerGateway.id));
addCheck(checks, "provider:docker-status", (dockerStatus as { ok?: boolean }).ok === true && mainDocker?.dockerStatus !== undefined && ((mainDocker.dockerStatus.counts?.containers ?? 0) > 0 || (mainDocker.dockerStatus.containers?.length ?? 0) > 0), dockerStatusCheckDetail(dockerStatus, config.providerGateway.id));
const upgradeDispatch = dockerCoreJson("/api/dispatch", {
method: "POST",
@@ -285,7 +285,7 @@ async function frontendCheck(config: UniDeskConfig, urls: PublicUrls, checks: E2
await page.waitForSelector('[data-testid="metric-chart-disk"]', { timeout: 10000 });
await page.waitForFunction(() => {
const text = document.body.innerText.toLowerCase();
return text.includes("任务管理器视图") && text.includes("cpu") && text.includes("memory") && text.includes("disk");
return text.includes("任务管理器视图") && text.includes("cpu") && text.includes("memory") && text.includes("disk") && text.includes("不含缓存");
}, undefined, { timeout: 10000 });
const monitorText = await page.locator('[data-testid="node-monitor-page"]').innerText({ timeout: 5000 });
await page.getByTestId("upgrade-plan-button").click();
@@ -303,7 +303,7 @@ async function frontendCheck(config: UniDeskConfig, urls: PublicUrls, checks: E2
addCheck(checks, "frontend:login-provider-visible", bodyText.includes(config.providerGateway.id) && bodyText.includes(config.providerGateway.name) && bodyText.includes("核心在线"), { screenshotPath });
addCheck(checks, "frontend:no-naked-json-before-click", rawBlocksBefore === 0 && !nakedJsonText, { rawBlocksBefore, nakedJsonText });
addCheck(checks, "frontend:raw-json-explicit-button", rawText.includes('"providerId"') && rawText.includes(config.providerGateway.id), { rawTextPreview: rawText.slice(0, 400) });
addCheck(checks, "frontend:system-monitor-visible", monitorText.includes("任务管理器视图") && monitorText.includes("CPU") && monitorText.includes("Memory") && monitorText.includes("Disk"), { monitorTextPreview: monitorText.slice(0, 800) });
addCheck(checks, "frontend:system-monitor-visible", monitorText.includes("任务管理器视图") && monitorText.includes("CPU") && monitorText.includes("Memory") && monitorText.includes("Disk") && monitorText.includes("不含缓存"), { monitorTextPreview: monitorText.slice(0, 800) });
addCheck(checks, "frontend:upgrade-plan-dispatch", upgradeControlText.includes("预检升级 已下发"), { providerId: config.providerGateway.id, upgradeControlPreview: upgradeControlText.slice(0, 500) });
addCheck(checks, "frontend:docker-status-visible", dockerText.toLowerCase().includes("docker desktop 视图") && dockerText.toLowerCase().includes("containers") && dockerText.includes("unidesk_pgdata_10gb") && (dockerText.includes("unidesk-frontend") || dockerText.includes("unidesk-backend-core")), { dockerTextPreview: dockerText.slice(0, 800) });
addCheck(checks, "frontend:no-console-errors", consoleErrors.length === 0, { consoleErrors });
+4 -4
View File
@@ -451,7 +451,7 @@ function NodeMonitorPage({ nodes, systemStatuses, onRaw, refresh }: AnyRecord) {
h("div", { className: "docker-meta" },
h("span", null, `${cpu.cores || 0} CPU cores`),
h("span", null, `load ${asNumber(cpu.load1).toFixed(2)} / ${asNumber(cpu.load5).toFixed(2)} / ${asNumber(cpu.load15).toFixed(2)}`),
h("span", null, `memory ${fmtBytes(memory.usedBytes)} / ${fmtBytes(memory.totalBytes)}`),
h("span", null, `memory actual ${fmtBytes(memory.usedBytes)} / ${fmtBytes(memory.totalBytes)}`),
h("span", null, `disk ${fmtBytes(disk.usedBytes)} / ${fmtBytes(disk.totalBytes)}`),
),
),
@@ -459,12 +459,12 @@ function NodeMonitorPage({ nodes, systemStatuses, onRaw, refresh }: AnyRecord) {
),
h("div", { className: "monitor-chart-grid" },
h(MetricChart, { title: "CPU", metricKey: "cpuPercent", current: cpu.percent, points, detail: `${cpu.cores || 0} cores / load ${asNumber(cpu.load1).toFixed(2)}`, tone: "cpu", testId: "metric-chart-cpu" }),
h(MetricChart, { title: "Memory", metricKey: "memoryPercent", current: memory.percent, points, detail: `${fmtBytes(memory.usedBytes)} used / ${fmtBytes(memory.availableBytes)} free`, tone: "memory", testId: "metric-chart-memory" }),
h(MetricChart, { title: "Memory", metricKey: "memoryPercent", current: memory.percent, points, detail: `${fmtBytes(memory.usedBytes)} actual / ${fmtBytes(memory.cacheBytes)} cache excluded`, tone: "memory", testId: "metric-chart-memory" }),
h(MetricChart, { title: "Disk", metricKey: "diskPercent", current: disk.percent, points, detail: `${disk.path || "/"} mounted ${disk.mount || "--"}`, tone: "disk", testId: "metric-chart-disk" }),
),
h("div", { className: "monitor-summary-grid" },
h(MetricCard, { label: "CPU 当前", value: fmtPercent(cpu.percent), hint: `history ${points.length} samples`, tone: "ok" }),
h(MetricCard, { label: "内存已用", value: fmtBytes(memory.usedBytes), hint: fmtPercent(memory.percent) }),
h(MetricCard, { label: "实际内存", value: fmtBytes(memory.usedBytes), hint: `${fmtPercent(memory.percent)} 不含缓存` }),
h(MetricCard, { label: "硬盘已用", value: fmtBytes(disk.usedBytes), hint: fmtPercent(disk.percent) }),
h(MetricCard, { label: "更新时间", value: fmtDate(active.systemUpdatedAt || current.collectedAt), hint: active.providerId }),
),
@@ -475,7 +475,7 @@ function NodeMonitorPage({ nodes, systemStatuses, onRaw, refresh }: AnyRecord) {
h(Panel, { title: "采样说明", eyebrow: "Retention" },
h("div", { className: "monitor-note-list" },
h("article", null, h("b", null, "CPU"), h("span", null, "从 /proc/stat 计算相邻采样差值,首个采样用 load/cores 近似")),
h("article", null, h("b", null, "Memory"), h("span", null, "使用 MemTotal MemAvailable 计算已用比例")),
h("article", null, h("b", null, "Memory"), h("span", null, "实际内存 = MemTotal - MemFree - Buffers - Cached - SReclaimable + Shmem,不把 page cache / buffer 计入占用")),
h("article", null, h("b", null, "Disk"), h("span", null, "使用 df -PB1 对配置路径采样,默认监控根文件系统")),
),
),
+16 -3
View File
@@ -380,16 +380,29 @@ async function collectSystemStatus(): Promise<SystemStatusSnapshot> {
errors.push({ source: "proc.stat", error: error instanceof Error ? error.message : String(error) });
}
let memory: Record<string, JsonValue> = { totalBytes: 0, usedBytes: 0, availableBytes: 0, percent: 0 };
let memory: Record<string, JsonValue> = { totalBytes: 0, usedBytes: 0, availableBytes: 0, cacheBytes: 0, percent: 0, mode: "actual_without_cache" };
try {
const mem = readMemInfo();
const totalBytes = mem.MemTotal ?? 0;
const availableBytes = mem.MemAvailable ?? mem.MemFree ?? 0;
const usedBytes = Math.max(0, totalBytes - availableBytes);
const freeBytes = mem.MemFree ?? 0;
const buffersBytes = mem.Buffers ?? 0;
const cachedBytes = mem.Cached ?? 0;
const reclaimableBytes = mem.SReclaimable ?? 0;
const shmemBytes = mem.Shmem ?? 0;
const cacheBytes = Math.max(0, buffersBytes + cachedBytes + reclaimableBytes - shmemBytes);
const usedBytes = Math.max(0, totalBytes - freeBytes - cacheBytes);
const availableBytes = mem.MemAvailable ?? Math.max(0, freeBytes + cacheBytes);
memory = {
mode: "actual_without_cache",
totalBytes,
usedBytes,
availableBytes,
freeBytes,
cacheBytes,
buffersBytes,
cachedBytes,
reclaimableBytes,
shmemBytes,
percent: totalBytes > 0 ? clampPercent((usedBytes / totalBytes) * 100) : 0,
};
} catch (error) {