diff --git a/.github/workflows/bench-aws-fargate.yml b/.github/workflows/bench-aws-fargate.yml
index 6201da1a..9d423062 100644
--- a/.github/workflows/bench-aws-fargate.yml
+++ b/.github/workflows/bench-aws-fargate.yml
@@ -325,33 +325,55 @@ jobs:
# Wait for task to complete with a custom loop (longer timeout)
echo "Waiting for task to complete..."
- MAX_ATTEMPTS=240 # 240 * 30s = 120 minutes
+ MAX_ATTEMPTS=360 # 360 * 30s = 180 minutes (3 hours)
ATTEMPT=0
+ LOG_GROUP="/ecs/${CLUSTER}"
+ TASK_ID=$(echo "${TASK_ARN}" | awk -F/ '{print $NF}')
+ LOG_STREAM="benchmark/benchmark-container/${TASK_ID}"
+ LAST_LOG_TS=0
while [ $ATTEMPT -lt $MAX_ATTEMPTS ]; do
STATUS=$(aws ecs describe-tasks \
--cluster "${CLUSTER}" \
--tasks "${TASK_ARN}" \
--query 'tasks[0].lastStatus' \
--output text)
-
+
echo "Attempt $((ATTEMPT+1))/${MAX_ATTEMPTS}: Task status is ${STATUS}"
-
+
if [ "${STATUS}" == "STOPPED" ]; then
echo "✅ Task has stopped."
break
fi
-
+
if [ "${STATUS}" == "None" ]; then
echo "❌ Error: Task not found."
exit 1
fi
-
+
+ # Tail recent log events every ~5 attempts (~2.5 minutes)
+ if [ "${STATUS}" == "RUNNING" ] && [ $((ATTEMPT % 5)) -eq 0 ]; then
+ LOG_JSON=$(aws logs get-log-events \
+ --log-group-name "${LOG_GROUP}" \
+ --log-stream-name "${LOG_STREAM}" \
+ --start-time "${LAST_LOG_TS}" \
+ --start-from-head \
+ --limit 50 \
+ --output json 2>/dev/null || echo '{"events":[]}')
+ EVENT_COUNT=$(echo "${LOG_JSON}" | jq '.events | length')
+ if [ "${EVENT_COUNT}" -gt 0 ]; then
+ echo "----- recent logs (last ${EVENT_COUNT} lines) -----"
+ echo "${LOG_JSON}" | jq -r '.events[] | .message'
+ echo "---------------------------------------------------"
+ LAST_LOG_TS=$(echo "${LOG_JSON}" | jq '.events[-1].timestamp + 1')
+ fi
+ fi
+
sleep 30
ATTEMPT=$((ATTEMPT+1))
done
if [ $ATTEMPT -eq $MAX_ATTEMPTS ]; then
- echo "❌ Error: Timeout waiting for task to complete after 120 minutes."
+ echo "❌ Error: Timeout waiting for task to complete after 180 minutes."
# Try to stop the task if it's still running
aws ecs stop-task --cluster "${CLUSTER}" --task "${TASK_ARN}" --reason "Timeout in GitHub Actions" || true
exit 1
@@ -404,11 +426,61 @@ jobs:
LOG_ATTEMPT=$((LOG_ATTEMPT+1))
done
- # Download logs using jq to ensure each event is on a new line
- aws logs get-log-events \
+ # The bench writes the markdown file as a base64 block bracketed by
+ # ===BEGIN_BENCHMARK_RESULTS=== / ===END_BENCHMARK_RESULTS===, and these
+ # markers always sit at the very end of the run. Locate the BEGIN marker
+ # via filter-log-events (server-side, single request) and then download
+ # only the events after that timestamp. This avoids paginating hundreds
+ # of MB of CloudWatch events from the noisy odyssey logs.
+ : > benchmark-results/stdout.txt
+ MARKER_TS=$(aws logs filter-log-events \
--log-group-name "${LOG_GROUP}" \
- --log-stream-name "${LOG_STREAM}" \
- --output json | jq -r '.events[].message' > benchmark-results/stdout.txt
+ --log-stream-names "${LOG_STREAM}" \
+ --filter-pattern '"===BEGIN_BENCHMARK_RESULTS==="' \
+ --output json | jq '.events[0].timestamp // empty')
+
+ if [ -z "${MARKER_TS}" ]; then
+ echo "❌ ===BEGIN_BENCHMARK_RESULTS=== marker not found in log stream"
+ echo "The benchmark scenario likely did not finish — fetching last 200 events for context"
+ aws logs get-log-events \
+ --log-group-name "${LOG_GROUP}" \
+ --log-stream-name "${LOG_STREAM}" \
+ --limit 200 \
+ --output json | jq -r '.events[].message' >> benchmark-results/stdout.txt
+ else
+ echo "Found BEGIN marker at timestamp ${MARKER_TS}"
+ NEXT_TOKEN=""
+ while :; do
+ if [ -z "${NEXT_TOKEN}" ]; then
+ PAGE=$(aws logs get-log-events \
+ --log-group-name "${LOG_GROUP}" \
+ --log-stream-name "${LOG_STREAM}" \
+ --start-time "${MARKER_TS}" \
+ --start-from-head \
+ --output json)
+ else
+ PAGE=$(aws logs get-log-events \
+ --log-group-name "${LOG_GROUP}" \
+ --log-stream-name "${LOG_STREAM}" \
+ --start-time "${MARKER_TS}" \
+ --start-from-head \
+ --next-token "${NEXT_TOKEN}" \
+ --output json)
+ fi
+ EVENTS=$(echo "${PAGE}" | jq '.events | length')
+ echo "${PAGE}" | jq -r '.events[].message' >> benchmark-results/stdout.txt
+ # Stop as soon as the END marker is in the file
+ if grep -q "===END_BENCHMARK_RESULTS===" benchmark-results/stdout.txt; then
+ break
+ fi
+ NEW_TOKEN=$(echo "${PAGE}" | jq -r '.nextForwardToken')
+ if [ "${EVENTS}" -eq 0 ] || [ "${NEW_TOKEN}" = "${NEXT_TOKEN}" ]; then
+ break
+ fi
+ NEXT_TOKEN="${NEW_TOKEN}"
+ done
+ fi
+ echo "Downloaded $(wc -l < benchmark-results/stdout.txt) log lines"
echo "Extracting benchmark results from logs..."
diff --git a/.github/workflows/fetch-fargate-logs.yml b/.github/workflows/fetch-fargate-logs.yml
index 55835eff..31346f2d 100644
--- a/.github/workflows/fetch-fargate-logs.yml
+++ b/.github/workflows/fetch-fargate-logs.yml
@@ -10,11 +10,27 @@ on:
description: 'ECS task ID (the UUID part, e.g. 8f1d948c60c74239998e8be8dee87ea7)'
required: true
type: string
+ filter_pattern:
+ description: 'CloudWatch Logs filter pattern (empty = no filter, e.g. "?ERROR ?pgbench ?Scenario")'
+ required: false
+ default: ''
+ type: string
+ start_minute:
+ description: 'Skip first N minutes after task start (helps avoid setup spam)'
+ required: false
+ default: '0'
+ type: string
+ end_minute:
+ description: 'Stop fetching after N minutes from task start (0 = no limit)'
+ required: false
+ default: '0'
+ type: string
jobs:
fetch-logs:
name: Fetch CloudWatch Logs
runs-on: ubuntu-latest
+ timeout-minutes: 30
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
@@ -23,8 +39,14 @@ jobs:
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- - name: Download full log stream from CloudWatch
+ - name: Download log stream from CloudWatch
+ env:
+ FILTER_PATTERN: ${{ inputs.filter_pattern }}
+ START_MINUTE: ${{ inputs.start_minute }}
+ END_MINUTE: ${{ inputs.end_minute }}
run: |
+ set -euo pipefail
+
CLUSTER="${{ secrets.ECS_CLUSTER_NAME }}"
TASK_ID="${{ inputs.task_id }}"
LOG_GROUP="/ecs/${CLUSTER}"
@@ -36,35 +58,88 @@ jobs:
echo "Fetching ${LOG_GROUP}/${LOG_STREAM}"
+ # Find the timestamp of the first event in the stream so start/end offsets
+ # are interpreted relative to "task start" rather than wall-clock epoch.
+ FIRST_TS=$(aws logs get-log-events \
+ --log-group-name "${LOG_GROUP}" \
+ --log-stream-name "${LOG_STREAM}" \
+ --start-from-head \
+ --limit 1 \
+ --output json | jq '.events[0].timestamp // 0')
+ if [ "${FIRST_TS}" = "0" ]; then
+ echo "❌ Log stream is empty or unreachable"
+ exit 1
+ fi
+ echo "First event timestamp: ${FIRST_TS} ($(date -u -d @$((FIRST_TS / 1000)) +%FT%TZ))"
+
+ START_TS=$((FIRST_TS + START_MINUTE * 60 * 1000))
+ if [ "${END_MINUTE}" -gt 0 ]; then
+ END_TS=$((FIRST_TS + END_MINUTE * 60 * 1000))
+ END_ARG=(--end-time "${END_TS}")
+ echo "Window: [+${START_MINUTE}min, +${END_MINUTE}min] (start=${START_TS}, end=${END_TS})"
+ else
+ END_ARG=()
+ echo "Window: [+${START_MINUTE}min, end] (start=${START_TS})"
+ fi
+
+ # Use filter-log-events when a pattern is supplied — it pre-filters on the
+ # AWS side, so we don't pull half a gigabyte of odyssey spam through the
+ # paginator. Without a pattern, fall back to filter-log-events with no
+ # pattern, which still respects the time window.
NEXT_TOKEN=""
PAGE_NUM=0
+ TOTAL=0
while :; do
- if [ -z "${NEXT_TOKEN}" ]; then
- PAGE=$(aws logs get-log-events \
- --log-group-name "${LOG_GROUP}" \
- --log-stream-name "${LOG_STREAM}" \
- --start-from-head \
- --output json)
+ if [ -n "${NEXT_TOKEN}" ]; then
+ if [ -n "${FILTER_PATTERN}" ]; then
+ PAGE=$(aws logs filter-log-events \
+ --log-group-name "${LOG_GROUP}" \
+ --log-stream-names "${LOG_STREAM}" \
+ --start-time "${START_TS}" \
+ "${END_ARG[@]}" \
+ --filter-pattern "${FILTER_PATTERN}" \
+ --next-token "${NEXT_TOKEN}" \
+ --output json)
+ else
+ PAGE=$(aws logs filter-log-events \
+ --log-group-name "${LOG_GROUP}" \
+ --log-stream-names "${LOG_STREAM}" \
+ --start-time "${START_TS}" \
+ "${END_ARG[@]}" \
+ --next-token "${NEXT_TOKEN}" \
+ --output json)
+ fi
else
- PAGE=$(aws logs get-log-events \
- --log-group-name "${LOG_GROUP}" \
- --log-stream-name "${LOG_STREAM}" \
- --start-from-head \
- --next-token "${NEXT_TOKEN}" \
- --output json)
+ if [ -n "${FILTER_PATTERN}" ]; then
+ PAGE=$(aws logs filter-log-events \
+ --log-group-name "${LOG_GROUP}" \
+ --log-stream-names "${LOG_STREAM}" \
+ --start-time "${START_TS}" \
+ "${END_ARG[@]}" \
+ --filter-pattern "${FILTER_PATTERN}" \
+ --output json)
+ else
+ PAGE=$(aws logs filter-log-events \
+ --log-group-name "${LOG_GROUP}" \
+ --log-stream-names "${LOG_STREAM}" \
+ --start-time "${START_TS}" \
+ "${END_ARG[@]}" \
+ --output json)
+ fi
fi
EVENTS=$(echo "${PAGE}" | jq '.events | length')
echo "${PAGE}" | jq -r '.events[] | "\(.timestamp) \(.message)"' >> "${OUT}"
- NEW_TOKEN=$(echo "${PAGE}" | jq -r '.nextForwardToken')
- PAGE_NUM=$((PAGE_NUM+1))
- echo "page ${PAGE_NUM}: +${EVENTS} events (total $(wc -l < "${OUT}"))"
- if [ "${EVENTS}" -eq 0 ] || [ "${NEW_TOKEN}" = "${NEXT_TOKEN}" ]; then
+ TOTAL=$((TOTAL + EVENTS))
+ PAGE_NUM=$((PAGE_NUM + 1))
+ echo "page ${PAGE_NUM}: +${EVENTS} events (total ${TOTAL})"
+ NEW_TOKEN=$(echo "${PAGE}" | jq -r '.nextToken // empty')
+ if [ -z "${NEW_TOKEN}" ]; then
break
fi
NEXT_TOKEN="${NEW_TOKEN}"
done
- echo "Downloaded $(wc -l < "${OUT}") log lines into ${OUT}"
+ echo "Downloaded ${TOTAL} log lines into ${OUT}"
echo "File size: $(du -h "${OUT}" | cut -f1)"
- name: Upload logs as artifact
diff --git a/documentation/en/src/changelog.md b/documentation/en/src/changelog.md
index bbf79f58..654c1454 100644
--- a/documentation/en/src/changelog.md
+++ b/documentation/en/src/changelog.md
@@ -1,6 +1,24 @@
# Changelog
-### 3.4.1 Apr 3, 2026
+### 3.4.0 Apr 1, 2026
+
+**New Features:**
+
+- **Unix socket listener.** `unix_socket_dir` creates `.s.PGSQL.` socket file. Connect with `psql -h ` or `pgbench -h `. No TCP overhead on local connections.
+
+- **HBA `local` rule matching.** `local` rules in pg_hba now apply to Unix socket connections. `host`/`hostssl`/`hostnossl` rules apply only to TCP. Previously `local` rules were parsed but ignored.
+
+- **`unix_socket_mode` controls socket file permissions.** New `[general]` setting fixes the permission bits on `.s.PGSQL.` after bind, so the access surface no longer depends on the process umask. Octal string, default `"0600"` (owner only). Set to `"0660"` to grant a Unix group, or `"0666"` to allow any local user. Validated at config load — invalid octal values, setuid/setgid/sticky bits, and overflow into bits above `0o777` are rejected upfront.
+
+- **Pool Coordinator — database-level connection limits.** New `max_db_connections` setting caps total server connections per database across all user pools. When the limit is reached, the coordinator evicts idle connections from users with the largest surplus (respecting `min_guaranteed_pool_size`), then waits for a connection to be returned, and falls back to a reserve pool as last resort. Disabled by default (`max_db_connections = 0`) — zero overhead when not configured. Five new pool-level config fields: `max_db_connections`, `min_connection_lifetime` (eviction age threshold), `reserve_pool_size` (extra slots beyond the limit), `reserve_pool_timeout` (wait before using reserve), `min_guaranteed_pool_size` (per-user eviction protection independent of `min_pool_size`).
+
+- **`SHOW POOL_COORDINATOR` admin command.** Displays per-database coordinator status: configured limits, current connection count, reserve usage, cumulative evictions, reserve acquisitions, and client exhaustion errors.
+
+- **Pool Coordinator Prometheus metrics.** Seven new metrics under `pg_doorman_pool_coordinator{type, database}`: `connections` (current), `reserve_in_use` (current), `max_connections` (configured limit), `reserve_pool_size` (configured reserve), `evictions_total`, `reserve_acquisitions_total`, `exhaustions_total` (client errors from full exhaustion — primary pager signal).
+
+- **Reserve pressure relief.** Idle reserve connections (created under `max_db_connections` pressure) are closed early by the retain cycle once idle longer than `min_connection_lifetime`, returning reserve capacity before the regular `idle_timeout` fires.
+
+- **Runtime log level control via admin `SET` command.** Change log level without restarting the pooler: `SET log_level = 'debug'` for global, `SET log_level = 'warn,pg_doorman::pool::pool_coordinator=debug'` for per-module (RUST_LOG syntax). View current level with `SHOW LOG_LEVEL`. Changes are ephemeral (lost on restart). Zero overhead on the hot path at production log levels — filtering uses lock-free `ArcSwap` instead of `RwLock`.
**Improvements:**
@@ -14,25 +32,14 @@
- **Prepared statement cache eviction log.** Shows truncated query text and current cache size (`size=99/100`) to help diagnose cache sizing issues.
-**New features:**
-
-- **Runtime log level control via admin `SET` command.** Change log level without restarting the pooler: `SET log_level = 'debug'` for global, `SET log_level = 'warn,pg_doorman::pool::pool_coordinator=debug'` for per-module (RUST_LOG syntax). View current level with `SHOW LOG_LEVEL`. Changes are ephemeral (lost on restart). Zero overhead on the hot path at production log levels — filtering uses lock-free `ArcSwap` instead of `RwLock`.
-
**Security:**
- **Removed password hash from logs.** The "unsupported password type" warning no longer includes the password hash value.
-### 3.4.0 Apr 1, 2026
-
-**New Features:**
-
-- **Pool Coordinator — database-level connection limits.** New `max_db_connections` setting caps total server connections per database across all user pools. When the limit is reached, the coordinator evicts idle connections from users with the largest surplus (respecting `min_guaranteed_pool_size`), then waits for a connection to be returned, and falls back to a reserve pool as last resort. Disabled by default (`max_db_connections = 0`) — zero overhead when not configured. Five new pool-level config fields: `max_db_connections`, `min_connection_lifetime` (eviction age threshold), `reserve_pool_size` (extra slots beyond the limit), `reserve_pool_timeout` (wait before using reserve), `min_guaranteed_pool_size` (per-user eviction protection independent of `min_pool_size`).
-
-- **`SHOW POOL_COORDINATOR` admin command.** Displays per-database coordinator status: configured limits, current connection count, reserve usage, cumulative evictions, reserve acquisitions, and client exhaustion errors.
+**Known limitations (Unix socket):**
-- **Pool Coordinator Prometheus metrics.** Seven new metrics under `pg_doorman_pool_coordinator{type, database}`: `connections` (current), `reserve_in_use` (current), `max_connections` (configured limit), `reserve_pool_size` (configured reserve), `evictions_total`, `reserve_acquisitions_total`, `exhaustions_total` (client errors from full exhaustion — primary pager signal).
-
-- **Reserve pressure relief.** Idle reserve connections (created under `max_db_connections` pressure) are closed early by the retain cycle once idle longer than `min_connection_lifetime`, returning reserve capacity before the regular `idle_timeout` fires.
+- Unix listener not handed off during `SIGUSR2` binary upgrade. New process re-creates the socket; connections refused for ~100ms.
+- `only_ssl_connections` does not reject Unix socket connections. Unix sockets do not need TLS for transport security.
### 3.3.5 Mar 31, 2026
diff --git a/pg_doorman.toml b/pg_doorman.toml
index 69c17517..b29fd41e 100644
--- a/pg_doorman.toml
+++ b/pg_doorman.toml
@@ -115,6 +115,15 @@ tcp_user_timeout = 60
# Default: 1048576 (1048576 bytes)
unix_socket_buffer_size = 1048576
+# Directory for Unix socket listener. Creates .s.PGSQL. file. Use psql -h or pgbench -h to connect.
+# When set, pg_doorman also accepts connections via Unix socket (.s.PGSQL.).
+# unix_socket_dir = "/var/run/pg_doorman"
+
+# Permission mode applied to the .s.PGSQL. socket file after bind.
+# Octal string. Only the lowest 9 bits (0o777) are honored.
+# Default: "0600"
+unix_socket_mode = "0600"
+
# --------------------------------------------------------------------------
# Connection Limits
# --------------------------------------------------------------------------
diff --git a/pg_doorman.yaml b/pg_doorman.yaml
index 400f019e..0d0b7a2c 100644
--- a/pg_doorman.yaml
+++ b/pg_doorman.yaml
@@ -152,6 +152,15 @@ general:
# Default: "1MB" (1048576 bytes)
unix_socket_buffer_size: "1MB"
+ # Directory for Unix socket listener. Creates .s.PGSQL. file. Use psql -h or pgbench -h to connect.
+ # When set, pg_doorman also accepts connections via Unix socket (.s.PGSQL.).
+ # unix_socket_dir: "/var/run/pg_doorman"
+
+ # Permission mode applied to the .s.PGSQL. socket file after bind.
+ # Octal string. Only the lowest 9 bits (0o777) are honored.
+ # Default: "0600"
+ unix_socket_mode: "0600"
+
# --------------------------------------------------------------------------
# Connection Limits
# --------------------------------------------------------------------------
diff --git a/src/app/generate/annotated.rs b/src/app/generate/annotated.rs
index 4250fb0c..4f4bd2bc 100644
--- a/src/app/generate/annotated.rs
+++ b/src/app/generate/annotated.rs
@@ -593,6 +593,18 @@ fn write_general_section(w: &mut ConfigWriter, config: &Config) {
"1048576 bytes",
);
+ write_field_desc(w, fi, "general", "unix_socket_dir");
+ w.comment(
+ fi,
+ "When set, pg_doorman also accepts connections via Unix socket (.s.PGSQL.).",
+ );
+ w.commented_kv(fi, "unix_socket_dir", &w.str_val("/var/run/pg_doorman"));
+ w.blank();
+
+ write_field_comment(w, fi, "general", "unix_socket_mode");
+ w.kv(fi, "unix_socket_mode", &w.str_val(&g.unix_socket_mode));
+ w.blank();
+
// --- Connection Limits ---
w.separator(fi, f.section_title("limits").get(w.russian));
w.blank();
diff --git a/src/app/generate/docs.rs b/src/app/generate/docs.rs
index 12c081c5..407bef3e 100644
--- a/src/app/generate/docs.rs
+++ b/src/app/generate/docs.rs
@@ -237,6 +237,8 @@ fn write_general_fields(out: &mut String, f: &FieldsData) {
"tcp_keepalives_interval",
"tcp_user_timeout",
"unix_socket_buffer_size",
+ "unix_socket_dir",
+ "unix_socket_mode",
"admin_username",
"admin_password",
"prepared_statements",
diff --git a/src/app/generate/fields.yaml b/src/app/generate/fields.yaml
index e534ba40..71986807 100644
--- a/src/app/generate/fields.yaml
+++ b/src/app/generate/fields.yaml
@@ -482,6 +482,27 @@ fields:
doc: "Buffer size for read and write operations when connecting to PostgreSQL via a unix socket."
default: "1048576"
+ unix_socket_dir:
+ config:
+ en: "Directory for Unix socket listener. Creates .s.PGSQL. file. Use psql -h or pgbench -h to connect."
+ ru: "Директория для Unix socket. Создаёт файл .s.PGSQL.. Подключение: psql -h или pgbench -h ."
+ doc: "Directory for Unix domain socket listener. Creates .s.PGSQL. socket file for local client connections."
+ default: "null"
+
+ unix_socket_mode:
+ config:
+ en: |
+ Permission mode applied to the .s.PGSQL. socket file after bind.
+ Octal string. Only the lowest 9 bits (0o777) are honored.
+ ru: |
+ Права доступа на файл .s.PGSQL., выставляемые сразу после bind.
+ Восьмеричная строка. Учитываются только младшие 9 бит (0o777).
+ doc: |
+ Permission mode applied to the Unix domain socket file `.s.PGSQL.` immediately after `bind()`. Specified as an octal string (e.g. `"0600"`, `"0660"`, `"0666"`). Only the lowest 9 bits are honored — setuid/setgid/sticky bits are rejected.
+
+ The default `"0600"` restricts socket access to the user running pg_doorman. To let other local users connect, set a more permissive mode such as `"0660"` (group access) or `"0666"` (any local user). When loosening the mode, ensure the parent directory permissions allow traversal by the intended group.
+ default: "\"0600\""
+
max_connections:
config:
en: |
diff --git a/src/app/server.rs b/src/app/server.rs
index feff45c6..0610a51b 100644
--- a/src/app/server.rs
+++ b/src/app/server.rs
@@ -17,7 +17,7 @@ use tokio::{runtime::Builder, sync::mpsc};
use crate::app::args::Args;
use crate::config::{get_config, reload_config, Config};
use crate::daemon;
-use crate::messages::configure_tcp_socket;
+use crate::messages::{configure_tcp_socket, configure_unix_socket};
use crate::pool::{retain, ClientServerMap, ConnectionPool};
use crate::prometheus::start_prometheus_server;
use crate::stats::{Collector, Reporter, REPORTER, TOTAL_CONNECTION_COUNTER};
@@ -206,6 +206,34 @@ pub fn run_server(args: Args, config: Config) -> Result<(), Box {
+ let path = format!("{dir}/.s.PGSQL.{}", config.general.port);
+ let mode = crate::config::General::parse_unix_socket_mode(
+ &config.general.unix_socket_mode,
+ )
+ .expect("unix_socket_mode validated at config load");
+ match create_unix_listener(&path, mode) {
+ Ok((listener, ownership)) => {
+ info!("Unix socket listening on {path} (mode={mode:#o})");
+ (Some(listener), Some(ownership))
+ }
+ Err(err) => {
+ error!("{err}");
+ std::process::exit(exitcode::OSERR);
+ }
+ }
+ }
+ None => (None, None),
+ };
+
config.show();
// Tracks which client is connected to which server for query cancellation.
@@ -414,22 +442,17 @@ pub fn run_server(args: Args, config: Config) -> Result<(), Box max_connections {
warn!("[#c{connection_id}] client {addr} rejected: too many clients (current={current_clients}, max={max_connections})");
- match crate::client::client_entrypoint_too_many_clients_already(
+ if let Err(err) = crate::client::client_entrypoint_too_many_clients_already(
socket, client_server_map).await {
- Ok(()) => (),
- Err(err) => {
- error!("[#c{connection_id}] client {addr} disconnected with error: {err}");
- }
+ error!("[#c{connection_id}] client {addr} disconnected with error: {err}");
}
CURRENT_CLIENT_COUNT.fetch_add(-1, Ordering::SeqCst);
- return
+ return;
}
let start = Utc::now().naive_utc();
-
- match crate::client::client_entrypoint(
+ let result = crate::client::client_entrypoint(
socket,
client_server_map,
admin_only,
@@ -437,30 +460,74 @@ pub fn run_server(args: Args, config: Config) -> Result<(), Box {
- if log_client_disconnections
- || log::log_enabled!(log::Level::Debug)
- {
- let session = format_duration(
- &(Utc::now().naive_utc() - start),
- );
- let identity = match &session_info {
- Some(si) => format!("[{}@{} #c{}]", si.username, si.pool_name, si.connection_id),
- None => format!("[#c{connection_id}]"),
- };
- info!("{identity} client disconnected from {addr}, session={session}");
- }
- }
+ .await;
+ log_session_end(
+ result,
+ connection_id,
+ &addr.to_string(),
+ start,
+ log_client_disconnections,
+ );
+ CURRENT_CLIENT_COUNT.fetch_add(-1, Ordering::SeqCst);
+ });
+ }
- Err(err) => {
- // Pre-auth failures: identity unknown, only connection_id available.
- // Post-auth failures already logged with [user@pool #cN] inside entrypoint.
- let session = format_duration(&(Utc::now().naive_utc() - start));
- warn!("[#c{connection_id}] client {addr} disconnected with error: {err}, session={session}");
+ // Unix socket client
+ new_unix = async {
+ if let Some(ref l) = unix_listener {
+ l.accept().await
+ } else {
+ std::future::pending().await
+ }
+ } => {
+ let (socket, _unix_addr) = match new_unix {
+ Ok(pair) => pair,
+ Err(err) => {
+ error!("Failed to accept Unix connection: {err}");
+ continue;
+ }
+ };
+ if admin_only {
+ drop(socket);
+ continue;
+ }
+ configure_unix_socket(&socket);
+ let client_server_map = client_server_map.clone();
+ let config = get_config();
+ let log_client_disconnections = config.general.log_client_disconnections;
+ let max_connections = config.general.max_connections;
+
+ tokio::task::spawn(async move {
+ let connection_id = TOTAL_CONNECTION_COUNTER.fetch_add(1, Ordering::Relaxed) as u64 + 1;
+ let current_clients = CURRENT_CLIENT_COUNT.fetch_add(1, Ordering::SeqCst);
+ if current_clients as u64 > max_connections {
+ warn!("[#c{connection_id}] unix client rejected: too many clients (current={current_clients}, max={max_connections})");
+ if let Err(err) = crate::client::client_entrypoint_too_many_clients_already_unix(
+ socket,
+ connection_id,
+ )
+ .await
+ {
+ warn!("[#c{connection_id}] unix client rejection response failed: {err}");
}
- };
+ CURRENT_CLIENT_COUNT.fetch_add(-1, Ordering::SeqCst);
+ return;
+ }
+ let start = Utc::now().naive_utc();
+ let result = crate::client::client_entrypoint_unix(
+ socket,
+ client_server_map,
+ admin_only,
+ connection_id,
+ )
+ .await;
+ log_session_end(
+ result,
+ connection_id,
+ "unix:",
+ start,
+ log_client_disconnections,
+ );
CURRENT_CLIENT_COUNT.fetch_add(-1, Ordering::SeqCst);
});
}
@@ -471,6 +538,26 @@ pub fn run_server(args: Args, config: Config) -> Result<(), Box {}
+ UnixSocketCleanup::Missing => {}
+ UnixSocketCleanup::Skipped { reason } => {
+ info!(
+ "Leaving Unix socket {} in place: {reason}",
+ ownership.path
+ );
+ }
+ UnixSocketCleanup::Failed { err } => {
+ warn!("Failed to remove Unix socket {}: {err}", ownership.path);
+ }
+ }
+ }
+
info!("Shutting down...");
});
@@ -758,3 +845,487 @@ fn spawn_shutdown_timer(exit_tx: mpsc::Sender<()>, shutdown_timeout: Duration) {
}
});
}
+
+/// Identity of a Unix socket file this process bound to, captured as
+/// `(dev, ino)` plus the original path. Used to decide at shutdown whether
+/// the inode on disk is still ours or has been replaced by a successor
+/// process during a binary upgrade.
+#[cfg(unix)]
+#[derive(Debug, Clone)]
+struct UnixSocketOwnership {
+ path: String,
+ dev: u64,
+ ino: u64,
+}
+
+#[cfg(unix)]
+#[derive(Debug, PartialEq, Eq)]
+enum UnixSocketCleanup {
+ /// The inode matched; the file has been removed.
+ Removed,
+ /// Nothing was on disk at the captured path.
+ Missing,
+ /// A different inode sits at the path — a successor rebound it.
+ Skipped { reason: String },
+ /// Removal was attempted but the syscall returned an error.
+ Failed { err: String },
+}
+
+#[cfg(unix)]
+impl UnixSocketOwnership {
+ /// Stat the path and remember `(dev, ino)` so future cleanup can verify
+ /// the inode has not been replaced.
+ fn capture(path: &str) -> Result {
+ use std::os::unix::fs::MetadataExt;
+ let meta = std::fs::metadata(path)?;
+ Ok(Self {
+ path: path.to_string(),
+ dev: meta.dev(),
+ ino: meta.ino(),
+ })
+ }
+
+ /// Remove the socket file only if the inode on disk still matches the
+ /// one captured at `capture` time.
+ fn cleanup_if_ours(&self) -> UnixSocketCleanup {
+ match Self::inspect(&self.path, self.dev, self.ino) {
+ CleanupDecision::Remove => match std::fs::remove_file(&self.path) {
+ Ok(()) => UnixSocketCleanup::Removed,
+ Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
+ UnixSocketCleanup::Missing
+ }
+ Err(err) => UnixSocketCleanup::Failed {
+ err: err.to_string(),
+ },
+ },
+ CleanupDecision::Missing => UnixSocketCleanup::Missing,
+ CleanupDecision::Skip(reason) => UnixSocketCleanup::Skipped { reason },
+ }
+ }
+
+ /// Pure decision function: given a path and the expected `(dev, ino)`,
+ /// should the caller proceed to unlink the file? Split out so the logic
+ /// can be unit-tested without touching real filesystem state.
+ fn inspect(path: &str, expected_dev: u64, expected_ino: u64) -> CleanupDecision {
+ use std::os::unix::fs::MetadataExt;
+ match std::fs::symlink_metadata(path) {
+ Ok(meta) => {
+ let dev = meta.dev();
+ let ino = meta.ino();
+ if dev == expected_dev && ino == expected_ino {
+ CleanupDecision::Remove
+ } else {
+ CleanupDecision::Skip(format!(
+ "inode changed (expected dev={expected_dev} ino={expected_ino}, found dev={dev} ino={ino}); another process owns the path now"
+ ))
+ }
+ }
+ Err(err) if err.kind() == std::io::ErrorKind::NotFound => CleanupDecision::Missing,
+ Err(err) => CleanupDecision::Skip(format!("stat failed: {err}")),
+ }
+ }
+}
+
+#[cfg(unix)]
+#[derive(Debug, PartialEq, Eq)]
+enum CleanupDecision {
+ Remove,
+ Missing,
+ Skip(String),
+}
+
+/// Log the end of a client session using a shared format string. Both the
+/// TCP and Unix accept branches used to inline the same match on
+/// `Result