fix: cap postgres connection pools

This commit is contained in:
Codex
2026-05-15 00:18:42 +00:00
parent c930607316
commit c206f13216
10 changed files with 47 additions and 8 deletions
+5
View File
@@ -56,6 +56,7 @@ services:
PROVIDER_PORT: "8081"
DATABASE_URL: "postgres://${UNIDESK_DATABASE_USER}:${UNIDESK_DATABASE_PASSWORD}@database:5432/${UNIDESK_DATABASE_NAME}"
PROVIDER_TOKEN: "${UNIDESK_PROVIDER_TOKEN}"
DATABASE_POOL_MAX: "${UNIDESK_BACKEND_CORE_DATABASE_POOL_MAX:-4}"
HEARTBEAT_TIMEOUT_MS: "${UNIDESK_HEARTBEAT_TIMEOUT_MS}"
TASK_PENDING_TIMEOUT_MS: "${UNIDESK_TASK_PENDING_TIMEOUT_MS:-600000}"
DATABASE_VOLUME_NAME: "${UNIDESK_DATABASE_VOLUME}"
@@ -89,6 +90,7 @@ services:
PORT: "4211"
TODO_NOTE_BACKEND_ONLY: "1"
DATABASE_URL: "postgres://${UNIDESK_DATABASE_USER}:${UNIDESK_DATABASE_PASSWORD}@database:5432/${UNIDESK_DATABASE_NAME}"
PGAPPNAME: "unidesk-todo-note"
TODO_NOTE_LOGS_DIR: "logs"
LOG_FILE: "/var/log/unidesk/${UNIDESK_LOG_DAY}/${UNIDESK_LOG_PREFIX}_todo-note.jsonl"
TODO_NOTE_LOG_PATH: "/var/log/unidesk/${UNIDESK_LOG_DAY}/${UNIDESK_LOG_PREFIX}_todo-note.jsonl"
@@ -127,6 +129,7 @@ services:
HOST: "0.0.0.0"
PORT: "4255"
DATABASE_URL: "postgres://${UNIDESK_DATABASE_USER}:${UNIDESK_DATABASE_PASSWORD}@database:5432/${UNIDESK_DATABASE_NAME}"
DATABASE_POOL_MAX: "${UNIDESK_OA_EVENT_FLOW_DATABASE_POOL_MAX:-2}"
PIPELINE_OA_BRIDGE_BASE_URL: "http://backend-core:8080/api/microservices/pipeline/proxy"
PIPELINE_OA_BRIDGE_INTERVAL_MS: "${UNIDESK_PIPELINE_OA_BRIDGE_INTERVAL_MS:-15000}"
PIPELINE_OA_BRIDGE_RUN_LIMIT: "${UNIDESK_PIPELINE_OA_BRIDGE_RUN_LIMIT:-50}"
@@ -155,6 +158,7 @@ services:
HOST: "0.0.0.0"
PORT: "4233"
DATABASE_URL: "postgres://${UNIDESK_DATABASE_USER}:${UNIDESK_DATABASE_PASSWORD}@database:5432/${UNIDESK_DATABASE_NAME}"
DATABASE_POOL_MAX: "${UNIDESK_PROJECT_MANAGER_DATABASE_POOL_MAX:-1}"
LOG_FILE: "/var/log/unidesk/${UNIDESK_LOG_DAY}/${UNIDESK_LOG_PREFIX}_project-manager.jsonl"
UNIDESK_LOG_RETENTION_BYTES: "${UNIDESK_LOG_RETENTION_BYTES:-1GiB}"
volumes:
@@ -180,6 +184,7 @@ services:
HOST: "0.0.0.0"
PORT: "4244"
DATABASE_URL: "postgres://${UNIDESK_DATABASE_USER}:${UNIDESK_DATABASE_PASSWORD}@database:5432/${UNIDESK_DATABASE_NAME}"
DATABASE_POOL_MAX: "${UNIDESK_BAIDU_NETDISK_DATABASE_POOL_MAX:-2}"
BAIDU_NETDISK_CLIENT_ID: "${UNIDESK_BAIDU_NETDISK_CLIENT_ID:-}"
BAIDU_NETDISK_CLIENT_SECRET: "${UNIDESK_BAIDU_NETDISK_CLIENT_SECRET:-}"
BAIDU_NETDISK_TOKEN_KEY: "${UNIDESK_BAIDU_NETDISK_TOKEN_KEY:-}"
+6
View File
@@ -55,6 +55,12 @@ frontend 的 Docker 上线顺序为:先运行必要的本地校验,例如 `b
Code Queue 已从主 server 迁移到 D601,但仍必须保持明确的 memory/swap 硬上限,默认 `CODE_QUEUE_MAX_ACTIVE_QUEUES=0` 以恢复 queue 间并行,仍保持 `CODE_QUEUE_IN_MEMORY_OUTPUT_RECORDS=10``CODE_QUEUE_IN_MEMORY_EVENT_RECORDS=10` 这类小热窗口;任务历史、队列统计、Trace/output 读取和 `/health` 摘要必须优先从 PostgreSQL 直读或聚合,不能为了性能便利在 Bun 进程内缓存全量历史。任何提高 Code Queue 热窗口、日志缓冲、Playwright/Codex 子进程常驻规模或容器上限的变更,或把 `CODE_QUEUE_MAX_ACTIVE_QUEUES` 显式改成正数,都必须在同一任务里说明 D601 资源预算来源,并通过 D601 `docker inspect code-queue-backend``docker stats --no-stream code-queue-backend``microservice health code-queue` 和对应 E2E 证明未重新引入内存爆炸风险。
## Database Connection Budget
主 PostgreSQL 的内存预算按“少量长驻服务连接池 + 短查询按需连接”设计,不允许每个 Bun 服务沿用默认 8 到 10 个连接。`backend-core` 默认 `DATABASE_POOL_MAX=4`,主 server 上的 `oa-event-flow``baidu-netdisk` 默认 `DATABASE_POOL_MAX=2``project-manager` 默认 `DATABASE_POOL_MAX=1`D601 `code-queue` 默认 `CODE_QUEUE_DATABASE_POOL_MAX=2`;如需提高任一连接池上限,必须同时说明并发 SQL 需求、验证 `pg_stat_activity` 中该服务没有长期 idle 堆积,并确认 `max_connections` 仍有足够余量。PostgreSQL 基础配置固定保守值:`shared_buffers=128MB``work_mem=4MB``maintenance_work_mem=64MB``max_connections=50`,避免主 server 低内存环境被空闲 backend 和过大的 per-query 内存预算挤占。
排查 PostgreSQL 内存时以 `docker stats unidesk-database``pg_stat_activity` 分组和 `pg_settings` 为准;主机 `ps` 中每个 `postgres` 进程的 RSS 会重复计入共享内存,不能把所有 backend RSS 简单相加当作真实容器占用。所有 UniDesk PostgreSQL 客户端都必须设置可识别的 `application_name`,便于按服务统计连接数、状态和慢查询归属。
## Database Volume
架构要求数据库使用 10 GB named volume;当前实现将 volume 命名为 `unidesk_pgdata_10gb` 以固定生命周期。Docker named volume 默认不强制容量上限;如需硬配额,应在主机存储层或 Docker volume driver 层配置。CLI server 控制只能使用不删除 volume 的 `down` / `up` 流程,禁止使用 `down -v``docker volume rm` 或删除 `unidesk_pgdata_10gb`
+4 -1
View File
@@ -41,6 +41,7 @@ interface RuntimeConfig {
baiduNetdiskInternalUrl: string;
microservices: MicroserviceConfig[];
logFile: string;
databasePoolMax: number;
}
interface MicroserviceConfig {
@@ -204,9 +205,10 @@ const serviceStartedAt = new Date();
const config = readConfig();
const logger = createLogger("backend-core", config.logFile);
const sql = postgres(config.databaseUrl, {
max: 8,
max: config.databasePoolMax,
idle_timeout: 20,
connect_timeout: 10,
connection: { application_name: "unidesk-backend-core" },
});
let dbReady = false;
@@ -363,6 +365,7 @@ function readConfig(): RuntimeConfig {
baiduNetdiskInternalUrl: process.env.BAIDU_NETDISK_INTERNAL_URL || "http://baidu-netdisk:4244",
microservices: readMicroservicesEnv(),
logFile: requiredEnv("LOG_FILE"),
databasePoolMax: Math.max(1, Math.min(16, readOptionalNumberEnv("DATABASE_POOL_MAX", 4))),
};
}
@@ -1,6 +1,6 @@
# UniDesk keeps PostgreSQL mostly stock; runtime logging options are injected by docker-compose.yml.
shared_buffers = '256MB'
work_mem = '16MB'
shared_buffers = '128MB'
work_mem = '4MB'
maintenance_work_mem = '64MB'
max_connections = 100
max_connections = 50
listen_addresses = '*'
@@ -17,6 +17,7 @@ interface RuntimeConfig {
port: number;
databaseUrl: string;
logFile: string;
databasePoolMax: number;
clientId: string;
clientSecret: string;
tokenKey: string;
@@ -113,6 +114,7 @@ function configFromEnv(): RuntimeConfig {
port: Number(process.env.PORT || 4244),
databaseUrl,
logFile: process.env.LOG_FILE || "",
databasePoolMax: Math.max(1, Math.min(8, Number(process.env.DATABASE_POOL_MAX || 2) || 2)),
clientId: process.env.BAIDU_NETDISK_CLIENT_ID || process.env.BAIDU_NETDISK_APP_KEY || "",
clientSecret: process.env.BAIDU_NETDISK_CLIENT_SECRET || process.env.BAIDU_NETDISK_SECRET_KEY || "",
tokenKey: process.env.BAIDU_NETDISK_TOKEN_KEY || "",
@@ -124,7 +126,12 @@ function configFromEnv(): RuntimeConfig {
}
const config = configFromEnv();
const sql = postgres(config.databaseUrl, { max: 8, idle_timeout: 20, connect_timeout: 10 });
const sql = postgres(config.databaseUrl, {
max: config.databasePoolMax,
idle_timeout: 20,
connect_timeout: 10,
connection: { application_name: "unidesk-baidu-netdisk" },
});
const logWriter = config.logFile
? createHourlyJsonlWriter({
baseLogFile: config.logFile,
@@ -28,6 +28,7 @@ services:
CODE_QUEUE_APPROVAL_POLICY: "${CODE_QUEUE_APPROVAL_POLICY:-never}"
CODE_QUEUE_MAX_ATTEMPTS: "${CODE_QUEUE_MAX_ATTEMPTS:-99}"
CODE_QUEUE_MAX_ACTIVE_QUEUES: "${CODE_QUEUE_MAX_ACTIVE_QUEUES:-0}"
CODE_QUEUE_DATABASE_POOL_MAX: "${CODE_QUEUE_DATABASE_POOL_MAX:-2}"
NODE_OPTIONS: "${CODE_QUEUE_NODE_OPTIONS:---max-old-space-size=1024}"
CODE_QUEUE_IN_MEMORY_OUTPUT_RECORDS: "${CODE_QUEUE_IN_MEMORY_OUTPUT_RECORDS:-10}"
CODE_QUEUE_IN_MEMORY_EVENT_RECORDS: "${CODE_QUEUE_IN_MEMORY_EVENT_RECORDS:-10}"
@@ -184,9 +184,10 @@ let persistDirty = false;
let shutdownRequested = false;
let serviceReady = false;
const sql: SqlClient = postgres(config.databaseUrl, {
max: 4,
max: config.databasePoolMax,
idle_timeout: 20,
connect_timeout: 10,
connection: { application_name: "unidesk-code-queue" },
});
let databaseReady = false;
let databaseLastError: string | null = null;
@@ -322,6 +323,7 @@ function readConfig(): RuntimeConfig {
judgeMaxTokens: Math.max(800, Math.min(4000, envNumber("MINIMAX_JUDGE_MAX_TOKENS", 1800))),
turnNoActivityTimeoutMs: Math.max(60_000, Math.min(30 * 60_000, envNumber("CODEX_TURN_NO_ACTIVITY_TIMEOUT_MS", 6 * 60_000))),
databaseUrl: envRequiredString("DATABASE_URL"),
databasePoolMax: Math.max(1, Math.min(8, envNumber("CODE_QUEUE_DATABASE_POOL_MAX", envNumber("DATABASE_POOL_MAX", 2)))),
databaseFlushIntervalMs: Math.max(100, Math.min(10_000, envNumber("CODE_QUEUE_DATABASE_FLUSH_INTERVAL_MS", 1000))),
oaEventFlowBaseUrl: envString("OA_EVENT_FLOW_BASE_URL", "http://oa-event-flow:4255").replace(/\/+$/u, ""),
notifyClaudeQqEnabled: envBool("CODE_QUEUE_NOTIFY_CLAUDEQQ_ENABLED", false),
@@ -64,6 +64,7 @@ export interface RuntimeConfig {
judgeMaxTokens: number;
turnNoActivityTimeoutMs: number;
databaseUrl: string;
databasePoolMax: number;
databaseFlushIntervalMs: number;
oaEventFlowBaseUrl: string;
notifyClaudeQqEnabled: boolean;
@@ -12,6 +12,7 @@ interface RuntimeConfig {
port: number;
databaseUrl: string;
logFile: string;
databasePoolMax: number;
pipelineBridgeBaseUrl: string;
pipelineBridgeIntervalMs: number;
pipelineBridgeRunLimit: number;
@@ -179,6 +180,7 @@ function configFromEnv(): RuntimeConfig {
port: envNumber("PORT", 4255),
databaseUrl: envRequiredString("DATABASE_URL"),
logFile: envString("LOG_FILE", "/var/log/unidesk/oa-event-flow.jsonl"),
databasePoolMax: Math.max(1, Math.min(8, envNumber("DATABASE_POOL_MAX", 2))),
pipelineBridgeBaseUrl: envString("PIPELINE_OA_BRIDGE_BASE_URL", "").replace(/\/+$/u, ""),
pipelineBridgeIntervalMs: envNumber("PIPELINE_OA_BRIDGE_INTERVAL_MS", 15_000),
pipelineBridgeRunLimit: envNumber("PIPELINE_OA_BRIDGE_RUN_LIMIT", 50),
@@ -186,7 +188,12 @@ function configFromEnv(): RuntimeConfig {
}
const config = configFromEnv();
const sql: SqlClient = postgres(config.databaseUrl, { max: 8, idle_timeout: 20, connect_timeout: 10 });
const sql: SqlClient = postgres(config.databaseUrl, {
max: config.databasePoolMax,
idle_timeout: 20,
connect_timeout: 10,
connection: { application_name: "unidesk-oa-event-flow" },
});
const pipelineBridgeState: PipelineBridgeState = {
enabled: config.pipelineBridgeBaseUrl.length > 0,
mode: "snapshot",
@@ -13,6 +13,7 @@ interface RuntimeConfig {
port: number;
databaseUrl: string;
logFile: string;
databasePoolMax: number;
}
interface ProjectRow {
@@ -90,11 +91,17 @@ function configFromEnv(): RuntimeConfig {
port: Number(process.env.PORT || 4233),
databaseUrl,
logFile: process.env.LOG_FILE || "",
databasePoolMax: Math.max(1, Math.min(8, Number(process.env.DATABASE_POOL_MAX || 1) || 1)),
};
}
const config = configFromEnv();
const sql = postgres(config.databaseUrl, { max: 8, idle_timeout: 20, connect_timeout: 10 });
const sql = postgres(config.databaseUrl, {
max: config.databasePoolMax,
idle_timeout: 20,
connect_timeout: 10,
connection: { application_name: "unidesk-project-manager" },
});
const logWriter = config.logFile
? createHourlyJsonlWriter({
baseLogFile: config.logFile,