fix: cap tran runtime and remove local lock

This commit is contained in:
Codex
2026-05-25 23:48:03 +00:00
parent 15f5a49375
commit 8af5aafb9e
6 changed files with 200 additions and 124 deletions
+35 -55
View File
@@ -7,67 +7,47 @@ if [ ! -f "$repo/scripts/cli.ts" ]; then
repo=$(CDPATH= cd -- "$self_dir/.." && pwd)
fi
tran_timeout_seconds() {
raw=${UNIDESK_TRAN_RUNTIME_TIMEOUT_SECONDS:-}
if [ -z "$raw" ] && [ -n "${UNIDESK_TRAN_RUNTIME_TIMEOUT_MS:-}" ]; then
case "$UNIDESK_TRAN_RUNTIME_TIMEOUT_MS" in
''|*[!0-9]*) raw=60 ;;
*) raw=$(((${UNIDESK_TRAN_RUNTIME_TIMEOUT_MS} + 999) / 1000)) ;;
esac
fi
if [ -z "$raw" ] && [ -n "${UNIDESK_SSH_RUNTIME_TIMEOUT_MS:-}" ]; then
case "$UNIDESK_SSH_RUNTIME_TIMEOUT_MS" in
''|*[!0-9]*) raw=60 ;;
*) raw=$(((${UNIDESK_SSH_RUNTIME_TIMEOUT_MS} + 999) / 1000)) ;;
esac
fi
raw=${raw:-60}
case "${raw:-60}" in
''|*[!0-9]*) raw=60 ;;
esac
[ "$raw" -gt 0 ] || raw=60
[ "$raw" -le 60 ] || raw=60
printf '%s\n' "$raw"
}
if [ "${UNIDESK_TRAN_TIMEOUT_GUARD:-0}" != "1" ] && command -v timeout >/dev/null 2>&1; then
timeout_seconds=$(tran_timeout_seconds)
set +e
UNIDESK_TRAN_TIMEOUT_GUARD=1 timeout -s TERM -k 2s "${timeout_seconds}s" "$0" "$@"
rc=$?
set -e
if [ "$rc" = 124 ] || [ "$rc" = 137 ] || [ "$rc" = 143 ]; then
printf 'UNIDESK_TRAN_TIMEOUT_HINT {"code":"tran-top-level-timeout","level":"warning","timeoutSeconds":%s,"message":"tran exceeded the top-level runtime limit and was disconnected.","action":"Use short query plus poll semantics; do not keep tran open waiting for long CI/CD, trace, logs, or build progress."}\n' "$timeout_seconds" >&2
fi
exit "$rc"
fi
host=${UNIDESK_MAIN_SERVER_IP:-${UNIDESK_MAIN_SERVER_HOST:-${CODE_QUEUE_DEV_CONTAINER_MASTER_HOST:-}}}
runner_env=0
if [ -n "${CODE_QUEUE_SERVICE_ROLE:-}" ] || [ -n "${CODE_QUEUE_INSTANCE_ID:-}" ] || [ -n "${KUBERNETES_SERVICE_HOST:-}" ]; then
runner_env=1
fi
tran_lock_scope() {
[ "$#" -ge 2 ] || return 1
case "${UNIDESK_TRAN_SESSION_LOCK:-1}" in
0|false|FALSE|no|NO|off|OFF) return 1 ;;
esac
route=$1
case "$route" in
""|-*) return 1 ;;
esac
provider=${route%%:*}
[ -n "$provider" ] || return 1
plane=host
case "$route" in
*:win|*:win/*) plane=win ;;
*:k3s*) plane=k3s ;;
esac
printf '%s\n' "$provider-$plane"
}
tran_acquire_lock() {
scope=$1
lock_root=${UNIDESK_TRAN_LOCK_DIR:-/tmp/unidesk-tran-locks}
lock_name=$(printf '%s' "$scope" | tr -c 'A-Za-z0-9_.-' '_')
lock_path=$lock_root/$lock_name.lock
notice_seconds=${UNIDESK_TRAN_LOCK_NOTICE_SECONDS:-3}
warning_seconds=${UNIDESK_TRAN_LOCK_WARNING_SECONDS:-10}
timeout_seconds=${UNIDESK_TRAN_LOCK_TIMEOUT_SECONDS:-120}
mkdir -p "$lock_root"
started=$(date +%s)
noticed=0
warned=0
while ! mkdir "$lock_path" 2>/dev/null; do
now=$(date +%s)
waited=$((now - started))
if [ "$noticed" = 0 ] && [ "$waited" -ge "$notice_seconds" ]; then
printf 'tran provider session lock waiting scope=%s waited=%ss; serializing concurrent opens to avoid provider session allocation timeouts\n' "$scope" "$waited" >&2
noticed=1
fi
if [ "$warned" = 0 ] && [ "$waited" -ge "$warning_seconds" ]; then
printf 'tran provider session lock warning scope=%s waited=%ss; high-frequency distributed calls are queued behind another tran, consider batching reads or checking stuck sessions if this repeats\n' "$scope" "$waited" >&2
warned=1
fi
if [ "$waited" -ge "$timeout_seconds" ]; then
printf 'tran provider session lock timeout scope=%s waited=%ss lock=%s\n' "$scope" "$waited" "$lock_path" >&2
exit 255
fi
sleep 1
done
trap 'rmdir "$lock_path" 2>/dev/null || true' EXIT
}
if scope=$(tran_lock_scope "$@"); then
tran_acquire_lock "$scope"
fi
if [ "$runner_env" = 1 ] && [ -n "$host" ] && [ "${UNIDESK_TRAN_LOCAL:-}" != "1" ]; then
bun "$repo/scripts/cli.ts" --main-server-ip "$host" ssh "$@"
exit $?