ozontech · vadv · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026 · Apr 6, 2026
diff --git a/.github/workflows/bench-aws-fargate.yml b/.github/workflows/bench-aws-fargate.yml
@@ -325,33 +325,55 @@ jobs:
 
           # Wait for task to complete with a custom loop (longer timeout)
           echo "Waiting for task to complete..."
-          MAX_ATTEMPTS=240 # 240 * 30s = 120 minutes
+          MAX_ATTEMPTS=360 # 360 * 30s = 180 minutes (3 hours)
           ATTEMPT=0
+          LOG_GROUP="/ecs/${CLUSTER}"
+          TASK_ID=$(echo "${TASK_ARN}" | awk -F/ '{print $NF}')
+          LOG_STREAM="benchmark/benchmark-container/${TASK_ID}"
+          LAST_LOG_TS=0
           while [ $ATTEMPT -lt $MAX_ATTEMPTS ]; do
             STATUS=$(aws ecs describe-tasks \
               --cluster "${CLUSTER}" \
               --tasks "${TASK_ARN}" \
               --query 'tasks[0].lastStatus' \
               --output text)
-            
+
             echo "Attempt $((ATTEMPT+1))/${MAX_ATTEMPTS}: Task status is ${STATUS}"
-            
+
             if [ "${STATUS}" == "STOPPED" ]; then
               echo "✅ Task has stopped."
               break
             fi
-            
+
             if [ "${STATUS}" == "None" ]; then
               echo "❌ Error: Task not found."
               exit 1
             fi
-
+
+            # Tail recent log events every ~5 attempts (~2.5 minutes)
+            if [ "${STATUS}" == "RUNNING" ] && [ $((ATTEMPT % 5)) -eq 0 ]; then
+              LOG_JSON=$(aws logs get-log-events \
+                --log-group-name "${LOG_GROUP}" \
+                --log-stream-name "${LOG_STREAM}" \
+                --start-time "${LAST_LOG_TS}" \
+                --start-from-head \
+                --limit 50 \
+                --output json 2>/dev/null || echo '{"events":[]}')
+              EVENT_COUNT=$(echo "${LOG_JSON}" | jq '.events | length')
+              if [ "${EVENT_COUNT}" -gt 0 ]; then
+                echo "----- recent logs (last ${EVENT_COUNT} lines) -----"
+                echo "${LOG_JSON}" | jq -r '.events[] | .message'
+                echo "---------------------------------------------------"
+                LAST_LOG_TS=$(echo "${LOG_JSON}" | jq '.events[-1].timestamp + 1')
+              fi
+            fi
+
             sleep 30
             ATTEMPT=$((ATTEMPT+1))
           done
 
           if [ $ATTEMPT -eq $MAX_ATTEMPTS ]; then
-            echo "❌ Error: Timeout waiting for task to complete after 120 minutes."
+            echo "❌ Error: Timeout waiting for task to complete after 180 minutes."
             # Try to stop the task if it's still running
             aws ecs stop-task --cluster "${CLUSTER}" --task "${TASK_ARN}" --reason "Timeout in GitHub Actions" || true
             exit 1
@@ -404,11 +426,61 @@ jobs:
             LOG_ATTEMPT=$((LOG_ATTEMPT+1))
           done
 
-          # Download logs using jq to ensure each event is on a new line
-          aws logs get-log-events \
+          # The bench writes the markdown file as a base64 block bracketed by
+          # ===BEGIN_BENCHMARK_RESULTS=== / ===END_BENCHMARK_RESULTS===, and these
+          # markers always sit at the very end of the run. Locate the BEGIN marker
+          # via filter-log-events (server-side, single request) and then download
+          # only the events after that timestamp. This avoids paginating hundreds
+          # of MB of CloudWatch events from the noisy odyssey logs.
+          : > benchmark-results/stdout.txt
+          MARKER_TS=$(aws logs filter-log-events \
             --log-group-name "${LOG_GROUP}" \
-            --log-stream-name "${LOG_STREAM}" \
-            --output json | jq -r '.events[].message' > benchmark-results/stdout.txt
+            --log-stream-names "${LOG_STREAM}" \
+            --filter-pattern '"===BEGIN_BENCHMARK_RESULTS==="' \
+            --output json | jq '.events[0].timestamp // empty')
+
+          if [ -z "${MARKER_TS}" ]; then
+            echo "❌ ===BEGIN_BENCHMARK_RESULTS=== marker not found in log stream"
+            echo "The benchmark scenario likely did not finish — fetching last 200 events for context"
+            aws logs get-log-events \
+              --log-group-name "${LOG_GROUP}" \
+              --log-stream-name "${LOG_STREAM}" \
+              --limit 200 \
+              --output json | jq -r '.events[].message' >> benchmark-results/stdout.txt
+          else
+            echo "Found BEGIN marker at timestamp ${MARKER_TS}"
+            NEXT_TOKEN=""
+            while :; do
+              if [ -z "${NEXT_TOKEN}" ]; then
+                PAGE=$(aws logs get-log-events \
+                  --log-group-name "${LOG_GROUP}" \
+                  --log-stream-name "${LOG_STREAM}" \
+                  --start-time "${MARKER_TS}" \
+                  --start-from-head \
+                  --output json)
+              else
+                PAGE=$(aws logs get-log-events \
+                  --log-group-name "${LOG_GROUP}" \
+                  --log-stream-name "${LOG_STREAM}" \
+                  --start-time "${MARKER_TS}" \
+                  --start-from-head \
+                  --next-token "${NEXT_TOKEN}" \
+                  --output json)
+              fi
+              EVENTS=$(echo "${PAGE}" | jq '.events | length')
+              echo "${PAGE}" | jq -r '.events[].message' >> benchmark-results/stdout.txt
+              # Stop as soon as the END marker is in the file
+              if grep -q "===END_BENCHMARK_RESULTS===" benchmark-results/stdout.txt; then
+                break
+              fi
+              NEW_TOKEN=$(echo "${PAGE}" | jq -r '.nextForwardToken')
+              if [ "${EVENTS}" -eq 0 ] || [ "${NEW_TOKEN}" = "${NEXT_TOKEN}" ]; then
+                break
+              fi
+              NEXT_TOKEN="${NEW_TOKEN}"
+            done
+          fi
+          echo "Downloaded $(wc -l < benchmark-results/stdout.txt) log lines"
 
           echo "Extracting benchmark results from logs..."
 

diff --git a/.github/workflows/fetch-fargate-logs.yml b/.github/workflows/fetch-fargate-logs.yml
@@ -10,11 +10,27 @@ on:
         description: 'ECS task ID (the UUID part, e.g. 8f1d948c60c74239998e8be8dee87ea7)'
         required: true
         type: string
+      filter_pattern:
+        description: 'CloudWatch Logs filter pattern (empty = no filter, e.g. "?ERROR ?pgbench ?Scenario")'
+        required: false
+        default: ''
+        type: string
+      start_minute:
+        description: 'Skip first N minutes after task start (helps avoid setup spam)'
+        required: false
+        default: '0'
+        type: string
+      end_minute:
+        description: 'Stop fetching after N minutes from task start (0 = no limit)'
+        required: false
+        default: '0'
+        type: string
 
 jobs:
   fetch-logs:
     name: Fetch CloudWatch Logs
     runs-on: ubuntu-latest
+    timeout-minutes: 30
     steps:
       - name: Configure AWS credentials
         uses: aws-actions/configure-aws-credentials@v4
@@ -23,8 +39,14 @@ jobs:
           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
           aws-region: ${{ secrets.AWS_REGION }}
 
-      - name: Download full log stream from CloudWatch
+      - name: Download log stream from CloudWatch
+        env:
+          FILTER_PATTERN: ${{ inputs.filter_pattern }}
+          START_MINUTE: ${{ inputs.start_minute }}
+          END_MINUTE: ${{ inputs.end_minute }}
         run: |
+          set -euo pipefail
+
           CLUSTER="${{ secrets.ECS_CLUSTER_NAME }}"
           TASK_ID="${{ inputs.task_id }}"
           LOG_GROUP="/ecs/${CLUSTER}"
@@ -36,35 +58,88 @@ jobs:
 
           echo "Fetching ${LOG_GROUP}/${LOG_STREAM}"
 
+          # Find the timestamp of the first event in the stream so start/end offsets
+          # are interpreted relative to "task start" rather than wall-clock epoch.
+          FIRST_TS=$(aws logs get-log-events \
+            --log-group-name "${LOG_GROUP}" \
+            --log-stream-name "${LOG_STREAM}" \
+            --start-from-head \
+            --limit 1 \
+            --output json | jq '.events[0].timestamp // 0')
+          if [ "${FIRST_TS}" = "0" ]; then
+            echo "❌ Log stream is empty or unreachable"
+            exit 1
+          fi
+          echo "First event timestamp: ${FIRST_TS} ($(date -u -d @$((FIRST_TS / 1000)) +%FT%TZ))"
+
+          START_TS=$((FIRST_TS + START_MINUTE * 60 * 1000))
+          if [ "${END_MINUTE}" -gt 0 ]; then
+            END_TS=$((FIRST_TS + END_MINUTE * 60 * 1000))
+            END_ARG=(--end-time "${END_TS}")
+            echo "Window: [+${START_MINUTE}min, +${END_MINUTE}min] (start=${START_TS}, end=${END_TS})"
+          else
+            END_ARG=()
+            echo "Window: [+${START_MINUTE}min, end] (start=${START_TS})"
+          fi
+
+          # Use filter-log-events when a pattern is supplied — it pre-filters on the
+          # AWS side, so we don't pull half a gigabyte of odyssey spam through the
+          # paginator. Without a pattern, fall back to filter-log-events with no
+          # pattern, which still respects the time window.
           NEXT_TOKEN=""
           PAGE_NUM=0
+          TOTAL=0
           while :; do
-            if [ -z "${NEXT_TOKEN}" ]; then
-              PAGE=$(aws logs get-log-events \
-                --log-group-name "${LOG_GROUP}" \
-                --log-stream-name "${LOG_STREAM}" \
-                --start-from-head \
-                --output json)
+            if [ -n "${NEXT_TOKEN}" ]; then
+              if [ -n "${FILTER_PATTERN}" ]; then
+                PAGE=$(aws logs filter-log-events \
+                  --log-group-name "${LOG_GROUP}" \
+                  --log-stream-names "${LOG_STREAM}" \
+                  --start-time "${START_TS}" \
+                  "${END_ARG[@]}" \
+                  --filter-pattern "${FILTER_PATTERN}" \
+                  --next-token "${NEXT_TOKEN}" \
+                  --output json)
+              else
+                PAGE=$(aws logs filter-log-events \
+                  --log-group-name "${LOG_GROUP}" \
+                  --log-stream-names "${LOG_STREAM}" \
+                  --start-time "${START_TS}" \
+                  "${END_ARG[@]}" \
+                  --next-token "${NEXT_TOKEN}" \
+                  --output json)
+              fi
             else
-              PAGE=$(aws logs get-log-events \
-                --log-group-name "${LOG_GROUP}" \
-                --log-stream-name "${LOG_STREAM}" \
-                --start-from-head \
-                --next-token "${NEXT_TOKEN}" \
-                --output json)
+              if [ -n "${FILTER_PATTERN}" ]; then
+                PAGE=$(aws logs filter-log-events \
+                  --log-group-name "${LOG_GROUP}" \
+                  --log-stream-names "${LOG_STREAM}" \
+                  --start-time "${START_TS}" \
+                  "${END_ARG[@]}" \
+                  --filter-pattern "${FILTER_PATTERN}" \
+                  --output json)
+              else
+                PAGE=$(aws logs filter-log-events \
+                  --log-group-name "${LOG_GROUP}" \
+                  --log-stream-names "${LOG_STREAM}" \
+                  --start-time "${START_TS}" \
+                  "${END_ARG[@]}" \
+                  --output json)
+              fi
             fi
             EVENTS=$(echo "${PAGE}" | jq '.events | length')
             echo "${PAGE}" | jq -r '.events[] | "\(.timestamp) \(.message)"' >> "${OUT}"
-            NEW_TOKEN=$(echo "${PAGE}" | jq -r '.nextForwardToken')
-            PAGE_NUM=$((PAGE_NUM+1))
-            echo "page ${PAGE_NUM}: +${EVENTS} events (total $(wc -l < "${OUT}"))"
-            if [ "${EVENTS}" -eq 0 ] || [ "${NEW_TOKEN}" = "${NEXT_TOKEN}" ]; then
+            TOTAL=$((TOTAL + EVENTS))
+            PAGE_NUM=$((PAGE_NUM + 1))
+            echo "page ${PAGE_NUM}: +${EVENTS} events (total ${TOTAL})"
+            NEW_TOKEN=$(echo "${PAGE}" | jq -r '.nextToken // empty')
+            if [ -z "${NEW_TOKEN}" ]; then
               break
             fi
             NEXT_TOKEN="${NEW_TOKEN}"
           done
 
-          echo "Downloaded $(wc -l < "${OUT}") log lines into ${OUT}"
+          echo "Downloaded ${TOTAL} log lines into ${OUT}"
           echo "File size: $(du -h "${OUT}" | cut -f1)"
 
       - name: Upload logs as artifact

diff --git a/documentation/en/src/changelog.md b/documentation/en/src/changelog.md
@@ -1,6 +1,24 @@
 # Changelog
 
-### 3.4.1 <small>Apr 3, 2026</small>
+### 3.4.0 <small>Apr 1, 2026</small>
+
+**New Features:**
+
+- **Unix socket listener.** `unix_socket_dir` creates `.s.PGSQL.<port>` socket file. Connect with `psql -h <dir>` or `pgbench -h <dir>`. No TCP overhead on local connections.
+
+- **HBA `local` rule matching.** `local` rules in pg_hba now apply to Unix socket connections. `host`/`hostssl`/`hostnossl` rules apply only to TCP. Previously `local` rules were parsed but ignored.
+
+- **`unix_socket_mode` controls socket file permissions.** New `[general]` setting fixes the permission bits on `.s.PGSQL.<port>` after bind, so the access surface no longer depends on the process umask. Octal string, default `"0600"` (owner only). Set to `"0660"` to grant a Unix group, or `"0666"` to allow any local user. Validated at config load — invalid octal values, setuid/setgid/sticky bits, and overflow into bits above `0o777` are rejected upfront.
+
+- **Pool Coordinator — database-level connection limits.** New `max_db_connections` setting caps total server connections per database across all user pools. When the limit is reached, the coordinator evicts idle connections from users with the largest surplus (respecting `min_guaranteed_pool_size`), then waits for a connection to be returned, and falls back to a reserve pool as last resort. Disabled by default (`max_db_connections = 0`) — zero overhead when not configured. Five new pool-level config fields: `max_db_connections`, `min_connection_lifetime` (eviction age threshold), `reserve_pool_size` (extra slots beyond the limit), `reserve_pool_timeout` (wait before using reserve), `min_guaranteed_pool_size` (per-user eviction protection independent of `min_pool_size`).
+
+- **`SHOW POOL_COORDINATOR` admin command.** Displays per-database coordinator status: configured limits, current connection count, reserve usage, cumulative evictions, reserve acquisitions, and client exhaustion errors.
+
+- **Pool Coordinator Prometheus metrics.** Seven new metrics under `pg_doorman_pool_coordinator{type, database}`: `connections` (current), `reserve_in_use` (current), `max_connections` (configured limit), `reserve_pool_size` (configured reserve), `evictions_total`, `reserve_acquisitions_total`, `exhaustions_total` (client errors from full exhaustion — primary pager signal).
+
+- **Reserve pressure relief.** Idle reserve connections (created under `max_db_connections` pressure) are closed early by the retain cycle once idle longer than `min_connection_lifetime`, returning reserve capacity before the regular `idle_timeout` fires.
+
+- **Runtime log level control via admin `SET` command.** Change log level without restarting the pooler: `SET log_level = 'debug'` for global, `SET log_level = 'warn,pg_doorman::pool::pool_coordinator=debug'` for per-module (RUST_LOG syntax). View current level with `SHOW LOG_LEVEL`. Changes are ephemeral (lost on restart). Zero overhead on the hot path at production log levels — filtering uses lock-free `ArcSwap` instead of `RwLock`.
 
 **Improvements:**
 
@@ -14,25 +32,14 @@
 
 - **Prepared statement cache eviction log.** Shows truncated query text and current cache size (`size=99/100`) to help diagnose cache sizing issues.
 
-**New features:**
-
-- **Runtime log level control via admin `SET` command.** Change log level without restarting the pooler: `SET log_level = 'debug'` for global, `SET log_level = 'warn,pg_doorman::pool::pool_coordinator=debug'` for per-module (RUST_LOG syntax). View current level with `SHOW LOG_LEVEL`. Changes are ephemeral (lost on restart). Zero overhead on the hot path at production log levels — filtering uses lock-free `ArcSwap` instead of `RwLock`.
-
 **Security:**
 
 - **Removed password hash from logs.** The "unsupported password type" warning no longer includes the password hash value.
 
-### 3.4.0 <small>Apr 1, 2026</small>
-
-**New Features:**
-
-- **Pool Coordinator — database-level connection limits.** New `max_db_connections` setting caps total server connections per database across all user pools. When the limit is reached, the coordinator evicts idle connections from users with the largest surplus (respecting `min_guaranteed_pool_size`), then waits for a connection to be returned, and falls back to a reserve pool as last resort. Disabled by default (`max_db_connections = 0`) — zero overhead when not configured. Five new pool-level config fields: `max_db_connections`, `min_connection_lifetime` (eviction age threshold), `reserve_pool_size` (extra slots beyond the limit), `reserve_pool_timeout` (wait before using reserve), `min_guaranteed_pool_size` (per-user eviction protection independent of `min_pool_size`).
-
-- **`SHOW POOL_COORDINATOR` admin command.** Displays per-database coordinator status: configured limits, current connection count, reserve usage, cumulative evictions, reserve acquisitions, and client exhaustion errors.
+**Known limitations (Unix socket):**
 
-- **Pool Coordinator Prometheus metrics.** Seven new metrics under `pg_doorman_pool_coordinator{type, database}`: `connections` (current), `reserve_in_use` (current), `max_connections` (configured limit), `reserve_pool_size` (configured reserve), `evictions_total`, `reserve_acquisitions_total`, `exhaustions_total` (client errors from full exhaustion — primary pager signal).
-
-- **Reserve pressure relief.** Idle reserve connections (created under `max_db_connections` pressure) are closed early by the retain cycle once idle longer than `min_connection_lifetime`, returning reserve capacity before the regular `idle_timeout` fires.
+- Unix listener not handed off during `SIGUSR2` binary upgrade. New process re-creates the socket; connections refused for ~100ms.
+- `only_ssl_connections` does not reject Unix socket connections. Unix sockets do not need TLS for transport security.
 
 ### 3.3.5 <small>Mar 31, 2026</small>
 

diff --git a/pg_doorman.toml b/pg_doorman.toml
@@ -115,6 +115,15 @@ tcp_user_timeout = 60
 # Default: 1048576 (1048576 bytes)
 unix_socket_buffer_size = 1048576
 
+# Directory for Unix socket listener. Creates .s.PGSQL.<port> file. Use psql -h <dir> or pgbench -h <dir> to connect.
+# When set, pg_doorman also accepts connections via Unix socket (.s.PGSQL.<port>).
+# unix_socket_dir = "/var/run/pg_doorman"
+
+# Permission mode applied to the .s.PGSQL.<port> socket file after bind.
+# Octal string. Only the lowest 9 bits (0o777) are honored.
+# Default: "0600"
+unix_socket_mode = "0600"
+
 # --------------------------------------------------------------------------
 # Connection Limits
 # --------------------------------------------------------------------------

diff --git a/pg_doorman.yaml b/pg_doorman.yaml
@@ -152,6 +152,15 @@ general:
   # Default: "1MB" (1048576 bytes)
   unix_socket_buffer_size: "1MB"
 
+  # Directory for Unix socket listener. Creates .s.PGSQL.<port> file. Use psql -h <dir> or pgbench -h <dir> to connect.
+  # When set, pg_doorman also accepts connections via Unix socket (.s.PGSQL.<port>).
+  # unix_socket_dir: "/var/run/pg_doorman"
+
+  # Permission mode applied to the .s.PGSQL.<port> socket file after bind.
+  # Octal string. Only the lowest 9 bits (0o777) are honored.
+  # Default: "0600"
+  unix_socket_mode: "0600"
+
   # --------------------------------------------------------------------------
   # Connection Limits
   # --------------------------------------------------------------------------

diff --git a/src/app/generate/annotated.rs b/src/app/generate/annotated.rs
@@ -593,6 +593,18 @@ fn write_general_section(w: &mut ConfigWriter, config: &Config) {
         "1048576 bytes",
     );
 
+    write_field_desc(w, fi, "general", "unix_socket_dir");
+    w.comment(
+        fi,
+        "When set, pg_doorman also accepts connections via Unix socket (.s.PGSQL.<port>).",
+    );
+    w.commented_kv(fi, "unix_socket_dir", &w.str_val("/var/run/pg_doorman"));
+    w.blank();
+
+    write_field_comment(w, fi, "general", "unix_socket_mode");
+    w.kv(fi, "unix_socket_mode", &w.str_val(&g.unix_socket_mode));
+    w.blank();
+
     // --- Connection Limits ---
     w.separator(fi, f.section_title("limits").get(w.russian));
     w.blank();