Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
81b05f1
feat(health): add XState connection health monitor with per-URL state
polaz Apr 3, 2026
e0183b3
test(health): comprehensive unit tests for health monitor and handlers
polaz Apr 3, 2026
ec09100
chore(deps): bump all dependencies, yarn 4.13, hono CVE fix
polaz Apr 3, 2026
f9f63b3
fix(health): remove ProxyAgent connect option, clamp headersTimeout, …
polaz Apr 3, 2026
4fca838
fix(health): typed timeout sentinel, keepAlive gap clamp, calendar da…
polaz Apr 3, 2026
f29c6b2
refactor(url): consolidate InstanceRegistry normalization with normal…
polaz Apr 3, 2026
977f300
fix(registry): remove .js extension from import, tighten handler test…
polaz Apr 3, 2026
0a75561
fix(health): align per-URL filter stats with connecting state, normal…
polaz Apr 3, 2026
0fc2a0e
fix(handlers): use plain Error for handler timeout, correct manage_co…
polaz Apr 3, 2026
e30112a
fix(handlers): remove unused InitializationTimeoutError import
polaz Apr 3, 2026
6ac4123
fix(health): bootstrapComplete before refreshCache, correct timeout c…
polaz Apr 3, 2026
2799170
fix(health): use PROBE_MS for fast-path check, include URL in resolve…
polaz Apr 4, 2026
08caa98
fix(health): revert probe to status < 500 for reachability check
polaz Apr 4, 2026
c1039de
fix(fetch): enforce connect timeout for ProxyAgent via proxyTls signal
polaz Apr 4, 2026
2a96d29
fix(fetch): use numeric timeout for ProxyAgent proxyTls, assert getSt…
polaz Apr 4, 2026
f453e8e
fix(health): strict parseTimerMs, requestTls timeout, URL in error me…
polaz Apr 4, 2026
4a963f2
fix(config): strict parseStrictInt for all count env vars, URL in err…
polaz Apr 4, 2026
caab7f4
fix(handlers): add reachable manage_context test, document OAuth fall…
polaz Apr 4, 2026
c9175cd
fix(health): fail open getRestrictedParameters for unknown version, a…
polaz Apr 4, 2026
c078374
fix(config): allow zero retry attempts, strict refreshCache and repor…
polaz Apr 4, 2026
66ae8b6
fix(config): use isSafeInteger, extract nested ternary, pin test asse…
polaz Apr 4, 2026
2c6cef2
fix(config): trim env values before strict parse, pin health gate URL…
polaz Apr 4, 2026
0c68d6b
test(handlers): use findLast for handler lookup, add OAuth reachabili…
polaz Apr 4, 2026
f1c49c1
test(handlers): pin bootstrap timeout reportError to 'timed out' message
polaz Apr 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
940 changes: 940 additions & 0 deletions .yarn/releases/yarn-4.13.0.cjs

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions .yarnrc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ enableGlobalCache: false
enableMirror: false

nodeLinker: node-modules

yarnPath: .yarn/releases/yarn-4.13.0.cjs
7 changes: 4 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
FROM node:22-alpine AS dependencies

# Enable Corepack and prepare Yarn
RUN corepack enable && corepack prepare yarn@4.12.0 --activate
RUN corepack enable && corepack prepare yarn@4.13.0 --activate

# Set working directory
WORKDIR /app

# Copy only package files for better caching
COPY package.json yarn.lock .yarnrc.yml ./
COPY .yarn/releases/ .yarn/releases/

# Install ALL dependencies (including dev) for building
RUN --mount=type=cache,target=/root/.yarn/berry/cache \
Expand All @@ -26,7 +27,7 @@
FROM node:22-alpine AS builder

# Enable Corepack and prepare Yarn (same version)
RUN corepack enable && corepack prepare yarn@4.12.0 --activate
RUN corepack enable && corepack prepare yarn@4.13.0 --activate

# Set working directory
WORKDIR /app
Expand Down Expand Up @@ -54,7 +55,7 @@
FROM node:22-alpine AS production-deps

# Enable Corepack and prepare Yarn (same version)
RUN corepack enable && corepack prepare yarn@4.12.0 --activate
RUN corepack enable && corepack prepare yarn@4.13.0 --activate

# Set working directory
WORKDIR /app
Expand Down Expand Up @@ -102,7 +103,7 @@
# ============================================================================

# Core GitLab connection settings (required)
ENV GITLAB_TOKEN=""

Check warning on line 106 in Dockerfile

View workflow job for this annotation

GitHub Actions / Docker Build

Sensitive data should not be used in the ARG or ENV commands

SecretsUsedInArgOrEnv: Do not use ARG or ENV instructions for sensitive data (ENV "GITLAB_TOKEN") More info: https://docs.docker.com/go/dockerfile/rule/secrets-used-in-arg-or-env/

Check warning on line 106 in Dockerfile

View workflow job for this annotation

GitHub Actions / Build Docker Image

Sensitive data should not be used in the ARG or ENV commands

SecretsUsedInArgOrEnv: Do not use ARG or ENV instructions for sensitive data (ENV "GITLAB_TOKEN") More info: https://docs.docker.com/go/dockerfile/rule/secrets-used-in-arg-or-env/
ENV GITLAB_API_URL=""

# Optional GitLab connection settings
Expand Down
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Advanced GitLab MCP server — 44 tools across 18 entity types with CQRS archite

- **44 tools** across 18 entity types — projects, merge requests, pipelines, work items, wiki, and more
- **CQRS architecture** — `browse_*` for queries, `manage_*` for commands
- **Connection resilience** — Bounded startup, auto-reconnect with exponential backoff, disconnected mode when GitLab is unreachable
- **Multi-instance support** — Connect to multiple GitLab instances with per-instance OAuth and rate limiting
- **Multiple transports** — stdio, SSE, StreamableHTTP
- **OAuth 2.1** — Per-user authentication via Claude Custom Connector
Expand Down Expand Up @@ -80,6 +81,25 @@ docker run -i --rm -e GITLAB_TOKEN=your_token \
ghcr.io/structured-world/gitlab-mcp:latest
```

## Connection Resilience

The server handles GitLab connectivity issues gracefully:

- **Bounded startup** — Server starts within `GITLAB_INIT_TIMEOUT_MS` (default 5s) regardless of GitLab availability
- **Disconnected mode** — When GitLab is unreachable (`disconnected`/`failed` state), only `manage_context` tools are exposed (e.g. `whoami`, `switch_profile`, `set_scope` for diagnostics). During active reconnect (`connecting` state), the full tool list remains available so MCP clients don't lose their tool catalog during brief outages. MCP clients are notified of tool availability changes via `tools/list_changed`
- **Auto-reconnect** — Exponential backoff reconnection (5s → 60s) with ±10% jitter
- **Error classification** — Transient errors (network, 5xx, timeouts) trigger auto-reconnect. Auth/config errors at startup transition to `failed` state (no auto-reconnect). Runtime auth errors from tool calls are forwarded to `HealthMonitor.reportError()` via `classifyError()`; the remaining gap is token-revocation/403 detection (#370)
- **Instance health monitor** — The configured GitLab instance is monitored with its own XState state machine. Multi-instance OAuth URLs currently pass through as reachable.

| Variable | Default | Description |
|----------|---------|-------------|
| `GITLAB_INIT_TIMEOUT_MS` | `5000` | Max time to wait for GitLab during startup |
| `GITLAB_RECONNECT_BASE_DELAY_MS` | `5000` | Initial reconnect delay (doubles each attempt) |
| `GITLAB_RECONNECT_MAX_DELAY_MS` | `60000` | Maximum reconnect delay |
| `GITLAB_HEALTH_CHECK_INTERVAL_MS` | `60000` | Health check interval when connected |
| `GITLAB_FAILURE_THRESHOLD` | `3` | Consecutive transient failures before disconnecting |
| `GITLAB_TOOL_TIMEOUT_MS` | `120000` | Max time for tool/bootstrap execution before timeout |

## Feature Flags

| Flag | Default | Tools Enabled |
Expand Down
20 changes: 20 additions & 0 deletions README.md.in
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Advanced GitLab MCP server — __TOOL_COUNT__ tools across __ENTITY_COUNT__ enti

- **__TOOL_COUNT__ tools** across __ENTITY_COUNT__ entity types — projects, merge requests, pipelines, work items, wiki, and more
- **CQRS architecture** — `browse_*` for queries, `manage_*` for commands
- **Connection resilience** — Bounded startup, auto-reconnect with exponential backoff, disconnected mode when GitLab is unreachable
- **Multi-instance support** — Connect to multiple GitLab instances with per-instance OAuth and rate limiting
- **Multiple transports** — stdio, SSE, StreamableHTTP
- **OAuth 2.1** — Per-user authentication via Claude Custom Connector
Expand Down Expand Up @@ -80,6 +81,25 @@ docker run -i --rm -e GITLAB_TOKEN=your_token \
ghcr.io/structured-world/gitlab-mcp:latest
```

## Connection Resilience

The server handles GitLab connectivity issues gracefully:

- **Bounded startup** — Server starts within `GITLAB_INIT_TIMEOUT_MS` (default 5s) regardless of GitLab availability
- **Disconnected mode** — When GitLab is unreachable (`disconnected`/`failed` state), only `manage_context` tools are exposed (e.g. `whoami`, `switch_profile`, `set_scope` for diagnostics). During active reconnect (`connecting` state), the full tool list remains available so MCP clients don't lose their tool catalog during brief outages. MCP clients are notified of tool availability changes via `tools/list_changed`
- **Auto-reconnect** — Exponential backoff reconnection (5s → 60s) with ±10% jitter
- **Error classification** — Transient errors (network, 5xx, timeouts) trigger auto-reconnect. Auth/config errors at startup transition to `failed` state (no auto-reconnect). Runtime auth errors from tool calls are forwarded to `HealthMonitor.reportError()` via `classifyError()`; the remaining gap is token-revocation/403 detection (#370)
- **Instance health monitor** — The configured GitLab instance is monitored with its own XState state machine. Multi-instance OAuth URLs currently pass through as reachable.

| Variable | Default | Description |
|----------|---------|-------------|
| `GITLAB_INIT_TIMEOUT_MS` | `5000` | Max time to wait for GitLab during startup |
| `GITLAB_RECONNECT_BASE_DELAY_MS` | `5000` | Initial reconnect delay (doubles each attempt) |
| `GITLAB_RECONNECT_MAX_DELAY_MS` | `60000` | Maximum reconnect delay |
| `GITLAB_HEALTH_CHECK_INTERVAL_MS` | `60000` | Health check interval when connected |
| `GITLAB_FAILURE_THRESHOLD` | `3` | Consecutive transient failures before disconnecting |
| `GITLAB_TOOL_TIMEOUT_MS` | `120000` | Max time for tool/bootstrap execution before timeout |

## Feature Flags

| Flag | Default | Tools Enabled |
Expand Down
21 changes: 11 additions & 10 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"resolutions": {
"lodash": "4.17.23",
"lodash-es": "4.17.23",
"hono": "4.11.7",
"hono": "4.12.10",
"diff": "4.0.4",
"esbuild": ">=0.25.0"
},
Expand Down Expand Up @@ -406,7 +406,7 @@
],
"license": "Apache-2.0",
"author": "Dmitry Prudnikov <mail@polaz.com>",
"packageManager": "yarn@4.12.0",
"packageManager": "yarn@4.13.0",
"contributors": [
{
"name": "zereight",
Expand Down Expand Up @@ -462,8 +462,8 @@
"build:mcpb": "./scripts/build-mcpb.sh"
},
"dependencies": {
"@clack/prompts": "^1.1.0",
"@modelcontextprotocol/sdk": "^1.28.0",
"@clack/prompts": "^1.2.0",
"@modelcontextprotocol/sdk": "^1.29.0",
"@prisma/client": "^7.6.0",
"express": "^5.2.1",
"graphql": "^16.13.2",
Expand All @@ -473,30 +473,31 @@
"pino": "^10.3.1",
"pino-pretty": "^13.1.3",
"transliteration": "^2.6.1",
"undici": "^7.24.6",
"undici": "^8.0.0",
"xstate": "^5.30.0",
"yaml": "^2.8.3",
"zod": "^4.3.6"
},
"devDependencies": {
"@cloudflare/workers-types": "^4.20260329.1",
"@cloudflare/workers-types": "^4.20260402.1",
"@eslint/js": "^10.0.1",
"@graphql-typed-document-node/core": "^3.2.0",
"@structured-world/vue-privacy": "^1.10.0",
"@types/express": "^5.0.6",
"@types/jest": "^30.0.0",
"@types/node": "^25.5.0",
"@types/picomatch": "^4.0.2",
"@typescript-eslint/eslint-plugin": "^8.57.2",
"@typescript-eslint/parser": "^8.57.2",
"@typescript-eslint/eslint-plugin": "^8.58.0",
"@typescript-eslint/parser": "^8.58.0",
"auto-changelog": "^2.5.0",
"cross-env": "^10.1.0",
"dotenv": "^17.3.1",
"dotenv": "^17.4.0",
"eslint": "^10.1.0",
"eslint-plugin-prettier": "^5.5.5",
"jest": "^30.3.0",
"prettier": "^3.8.1",
"prisma": "^7.6.0",
"ts-jest": "^29.4.6",
"ts-jest": "^29.4.9",
"ts-node": "^10.9.2",
"typescript": "^6.0.2",
"vitepress": "^1.6.4"
Expand Down
131 changes: 78 additions & 53 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -342,97 +342,122 @@ export const SSL_PASSPHRASE = process.env.SSL_PASSPHRASE;
// Values: 'true', 'false', 'loopback', 'linklocal', 'uniquelocal', or specific IPs
export const TRUST_PROXY = process.env.TRUST_PROXY;

// Node.js setTimeout/setInterval max safe delay is 2^31-1 ms (~24.8 days).
// Larger values silently clamp to 1ms, causing tight loops. All timer-backed
// configs are parsed through this helper to enforce the ceiling.
export const MAX_SAFE_TIMEOUT_MS = 2_147_483_647;

/** Strict integer parse — rejects partial matches like "120s" or "1e3".
* @param allowZero - when true, 0 is a valid value (e.g. retry attempts) */
function parseStrictInt(envValue: string | undefined, fallback: number, allowZero = false): number {
const raw = envValue?.trim() ?? String(fallback);
if (!/^\d+$/.test(raw)) return fallback;
const parsed = Number(raw);
if (!Number.isSafeInteger(parsed)) return fallback;
const minValue = allowZero ? 0 : 1;
return parsed >= minValue ? parsed : fallback;
}

function parseTimerMs(envValue: string | undefined, fallback: number): number {
const parsed = parseStrictInt(envValue, fallback);
return Math.min(parsed, MAX_SAFE_TIMEOUT_MS);
}

// SSE heartbeat interval (in milliseconds)
// Sends `: ping\n\n` comments to keep SSE connections alive through proxies (Cloudflare, Envoy, etc.)
// Default 30s — well under Cloudflare's ~100-125s idle timeout
const parsedHeartbeatMs = parseInt(process.env.GITLAB_SSE_HEARTBEAT_MS ?? '30000', 10);
export const SSE_HEARTBEAT_MS =
Number.isFinite(parsedHeartbeatMs) && parsedHeartbeatMs > 0 ? parsedHeartbeatMs : 30000;
export const SSE_HEARTBEAT_MS = parseTimerMs(process.env.GITLAB_SSE_HEARTBEAT_MS, 30000);

// HTTP server keepalive timeout (in milliseconds)
// Must be higher than any upstream proxy timeout (Cloudflare max is 600s for Enterprise)
// Default 620s ensures the Node.js server doesn't close connections before the proxy does
const parsedKeepAliveTimeout = parseInt(
process.env.GITLAB_HTTP_KEEPALIVE_TIMEOUT_MS ?? '620000',
10,
export const HTTP_KEEPALIVE_TIMEOUT_MS = parseTimerMs(
process.env.GITLAB_HTTP_KEEPALIVE_TIMEOUT_MS,
620000,
);
export const HTTP_KEEPALIVE_TIMEOUT_MS =
Number.isFinite(parsedKeepAliveTimeout) && parsedKeepAliveTimeout > 0
? parsedKeepAliveTimeout
: 620000;

// === Granular API timeout configuration ===
// Each phase of an HTTP request has its own timeout to prevent different types of hangs.

// TCP connect timeout (default: 2s)
const parsedConnectTimeoutMs = parseInt(process.env.GITLAB_API_CONNECT_TIMEOUT_MS ?? '2000', 10);
export const CONNECT_TIMEOUT_MS =
Number.isFinite(parsedConnectTimeoutMs) && parsedConnectTimeoutMs > 0
? parsedConnectTimeoutMs
: 2000;
export const CONNECT_TIMEOUT_MS = parseTimerMs(process.env.GITLAB_API_CONNECT_TIMEOUT_MS, 2000);

// Response headers timeout (default: 10s) — time to first response byte after connect
const parsedHeadersTimeoutMs = parseInt(process.env.GITLAB_API_HEADERS_TIMEOUT_MS ?? '10000', 10);
export const HEADERS_TIMEOUT_MS =
Number.isFinite(parsedHeadersTimeoutMs) && parsedHeadersTimeoutMs > 0
? parsedHeadersTimeoutMs
: 10000;
export const HEADERS_TIMEOUT_MS = parseTimerMs(process.env.GITLAB_API_HEADERS_TIMEOUT_MS, 10000);

// Response body timeout (default: 30s) — time to receive full body after headers
// Larger default for big responses (pipeline logs, large diffs)
const parsedBodyTimeoutMs = parseInt(process.env.GITLAB_API_BODY_TIMEOUT_MS ?? '30000', 10);
export const BODY_TIMEOUT_MS =
Number.isFinite(parsedBodyTimeoutMs) && parsedBodyTimeoutMs > 0 ? parsedBodyTimeoutMs : 30000;
export const BODY_TIMEOUT_MS = parseTimerMs(process.env.GITLAB_API_BODY_TIMEOUT_MS, 30000);

// Tool handler timeout (default: 120s) — total time for entire tool execution including retries
const parsedHandlerTimeoutMs = parseInt(process.env.GITLAB_TOOL_TIMEOUT_MS ?? '120000', 10);
export const HANDLER_TIMEOUT_MS =
Number.isFinite(parsedHandlerTimeoutMs) && parsedHandlerTimeoutMs > 0
? parsedHandlerTimeoutMs
: 120000;
export const HANDLER_TIMEOUT_MS = parseTimerMs(process.env.GITLAB_TOOL_TIMEOUT_MS, 120000);

// === Connection health monitoring ===

// Startup initialization timeout — how long to wait for GitLab during server startup
// If exceeded, server starts in disconnected mode and retries in background
export const INIT_TIMEOUT_MS = parseTimerMs(process.env.GITLAB_INIT_TIMEOUT_MS, 5000);

// Reconnect backoff: base delay (doubles each attempt up to max)
export const RECONNECT_BASE_DELAY_MS = parseTimerMs(
process.env.GITLAB_RECONNECT_BASE_DELAY_MS,
5000,
);

// Reconnect backoff: maximum delay between attempts
export const RECONNECT_MAX_DELAY_MS = parseTimerMs(
process.env.GITLAB_RECONNECT_MAX_DELAY_MS,
60000,
);

// Health check interval when connection is healthy (light ping)
export const HEALTH_CHECK_INTERVAL_MS = parseTimerMs(
process.env.GITLAB_HEALTH_CHECK_INTERVAL_MS,
60000,
);

// Consecutive transient failures before transitioning to DISCONNECTED
export const FAILURE_THRESHOLD = parseStrictInt(process.env.GITLAB_FAILURE_THRESHOLD, 3);

// === Connection pool configuration ===
// Max HTTP connections per GitLab instance (default: 25, up from 10)
const parsedPoolMaxConnections = parseInt(process.env.GITLAB_POOL_MAX_CONNECTIONS ?? '25', 10);
export const POOL_MAX_CONNECTIONS =
Number.isFinite(parsedPoolMaxConnections) && parsedPoolMaxConnections > 0
? parsedPoolMaxConnections
: 25;
export const POOL_MAX_CONNECTIONS = parseStrictInt(process.env.GITLAB_POOL_MAX_CONNECTIONS, 25);

// Retry configuration for idempotent operations (GET/HEAD/OPTIONS requests by default)
// Retries on: timeouts, network errors, 5xx server errors, 429 rate limits
export const API_RETRY_ENABLED = process.env.GITLAB_API_RETRY_ENABLED !== 'false';

const parsedMaxAttempts = parseInt(process.env.GITLAB_API_RETRY_MAX_ATTEMPTS ?? '3', 10);
export const API_RETRY_MAX_ATTEMPTS =
Number.isFinite(parsedMaxAttempts) && parsedMaxAttempts >= 0 ? parsedMaxAttempts : 3;

const parsedBaseDelay = parseInt(process.env.GITLAB_API_RETRY_BASE_DELAY_MS ?? '1000', 10);
export const API_RETRY_BASE_DELAY_MS =
Number.isFinite(parsedBaseDelay) && parsedBaseDelay > 0 ? parsedBaseDelay : 1000;
// allowZero: 0 means "single attempt, no retries" — valid config without toggling RETRY_ENABLED
export const API_RETRY_MAX_ATTEMPTS = parseStrictInt(
process.env.GITLAB_API_RETRY_MAX_ATTEMPTS,
3,
true,
);

const parsedMaxDelay = parseInt(process.env.GITLAB_API_RETRY_MAX_DELAY_MS ?? '4000', 10);
export const API_RETRY_MAX_DELAY_MS =
Number.isFinite(parsedMaxDelay) && parsedMaxDelay > 0 ? parsedMaxDelay : 4000;
export const API_RETRY_BASE_DELAY_MS = parseTimerMs(
process.env.GITLAB_API_RETRY_BASE_DELAY_MS,
1000,
);
export const API_RETRY_MAX_DELAY_MS = parseTimerMs(process.env.GITLAB_API_RETRY_MAX_DELAY_MS, 4000);

// Rate limiting configuration
// Per-IP rate limiting (for anonymous requests) - enabled by default
export const RATE_LIMIT_IP_ENABLED = process.env.RATE_LIMIT_IP_ENABLED !== 'false';
export const RATE_LIMIT_IP_WINDOW_MS = parseInt(process.env.RATE_LIMIT_IP_WINDOW_MS ?? '60000', 10); // 1 minute
export const RATE_LIMIT_IP_MAX_REQUESTS = parseInt(
process.env.RATE_LIMIT_IP_MAX_REQUESTS ?? '100',
10,
export const RATE_LIMIT_IP_WINDOW_MS = parseTimerMs(process.env.RATE_LIMIT_IP_WINDOW_MS, 60000); // 1 minute
export const RATE_LIMIT_IP_MAX_REQUESTS = parseStrictInt(
process.env.RATE_LIMIT_IP_MAX_REQUESTS,
100,
);

// Per-session rate limiting (for authenticated requests) - disabled by default
export const RATE_LIMIT_SESSION_ENABLED = process.env.RATE_LIMIT_SESSION_ENABLED === 'true';
export const RATE_LIMIT_SESSION_WINDOW_MS = parseInt(
process.env.RATE_LIMIT_SESSION_WINDOW_MS ?? '60000',
10,
export const RATE_LIMIT_SESSION_WINDOW_MS = parseTimerMs(
process.env.RATE_LIMIT_SESSION_WINDOW_MS,
60000,
);
export const RATE_LIMIT_SESSION_MAX_REQUESTS = parseInt(
process.env.RATE_LIMIT_SESSION_MAX_REQUESTS ?? '300',
10,
export const RATE_LIMIT_SESSION_MAX_REQUESTS = parseStrictInt(
process.env.RATE_LIMIT_SESSION_MAX_REQUESTS,
300,
);

// Transport mode selection:
Expand Down
Loading
Loading