diff --git a/.gitignore b/.gitignore index 186b7c9..1dc3205 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,7 @@ # Lint output /report.xml + +# E2E tests +/e2e-tests/.env +/e2e-tests/mcp-reports/ diff --git a/e2e-tests/README.md b/e2e-tests/README.md new file mode 100644 index 0000000..dd87108 --- /dev/null +++ b/e2e-tests/README.md @@ -0,0 +1,92 @@ +# StackRox MCP E2E Testing + +End-to-end tests for the StackRox MCP server using [gevals](https://github.com/genmcp/gevals). + +## Prerequisites + +- Go 1.25+ +- OpenAI API Key (for AI agent and LLM judge) +- StackRox API Token + +## Setup + +### 1. Build gevals + +```bash +cd e2e-tests +./scripts/build-gevals.sh +``` + +### 2. Configure Environment + +Create `.env` file: + +```bash +OPENAI_API_KEY=sk-your-key-here +STACKROX_API_TOKEN=your-token-here +``` + +## Running Tests + +```bash +./scripts/run-tests.sh +``` + +Results are saved to `gevals-stackrox-mcp-e2e-out.json`. + +### View Results + +```bash +# Summary +jq '.tasks[] | {name, passed}' gevals-stackrox-mcp-e2e-out.json + +# Tool calls +jq '.tasks[].callHistory[] | {toolName, arguments}' gevals-stackrox-mcp-e2e-out.json +``` + +## Test Cases + +| Test | Description | Tool | +|------|-------------|------| +| `list-clusters` | List all clusters | `list_clusters` | +| `cve-affecting-workloads` | CVE impact on deployments | `get_deployments_for_cve` | +| `cve-affecting-clusters` | CVE impact on clusters | `get_clusters_for_cve` | +| `cve-nonexistent` | Handle non-existent CVE | `get_clusters_for_cve` | +| `cve-cluster-scooby` | CVE with cluster filter | `get_clusters_for_cve` | +| `cve-cluster-maria` | CVE with cluster filter | `get_clusters_for_cve` | +| `cve-clusters-general` | General CVE query | `get_clusters_for_cve` | +| `cve-cluster-list` | CVE across clusters | `get_clusters_for_cve` | + +## Configuration + +- **`gevals/eval.yaml`**: Main test configuration, agent settings, assertions +- **`gevals/mcp-config.yaml`**: MCP server configuration +- **`gevals/tasks/*.yaml`**: Individual test task definitions + +## How It Works + +Gevals uses a proxy architecture to intercept MCP tool calls: + +1. AI agent receives task prompt +2. Agent calls MCP tool +3. Gevals proxy intercepts and records the call +4. Call forwarded to StackRox MCP server +5. Server executes and returns result +6. Gevals validates assertions and response quality + +## Troubleshooting + +**Tests fail - no tools called** +- Verify StackRox Central is accessible +- Check API token permissions + +**Build errors** +```bash +go mod tidy +./scripts/build-gevals.sh +``` + +## Further Reading + +- [Gevals Documentation](https://github.com/genmcp/gevals) +- [StackRox MCP Server](../README.md) diff --git a/e2e-tests/gevals/eval.yaml b/e2e-tests/gevals/eval.yaml new file mode 100644 index 0000000..7a8de78 --- /dev/null +++ b/e2e-tests/gevals/eval.yaml @@ -0,0 +1,99 @@ +kind: Eval +metadata: + name: "stackrox-mcp-e2e" +config: + agent: + type: "builtin.openai-agent" + model: "gpt-4o" + llmJudge: + env: + baseUrlKey: JUDGE_BASE_URL + apiKeyKey: JUDGE_API_KEY + modelNameKey: JUDGE_MODEL_NAME + mcpConfigFile: mcp-config.yaml + taskSets: + # Test 1: List clusters + - path: tasks/list-clusters.yaml + assertions: + toolsUsed: + - server: stackrox-mcp + toolPattern: "list_clusters" + minToolCalls: 1 + maxToolCalls: 1 + + # Test 2: CVE affecting workloads + - path: tasks/cve-affecting-workloads.yaml + assertions: + toolsUsed: + - server: stackrox-mcp + toolPattern: "get_deployments_for_cve" + argumentsMatch: + cveName: "CVE-2021-31805" + minToolCalls: 1 + maxToolCalls: 1 + + # Test 3: CVE affecting clusters - basic + - path: tasks/cve-affecting-clusters.yaml + assertions: + toolsUsed: + - server: stackrox-mcp + toolPattern: "get_clusters_for_cve" + argumentsMatch: + cveName: "CVE-2016-1000031" + minToolCalls: 1 + maxToolCalls: 3 + + # Test 4: Non-existent CVE + - path: tasks/cve-nonexistent.yaml + assertions: + toolsUsed: + - server: stackrox-mcp + toolPattern: "get_clusters_for_cve" + argumentsMatch: + cveName: "CVE-2099-00001" + minToolCalls: 1 + maxToolCalls: 2 + + # Test 5: CVE with specific cluster filter (scooby) + - path: tasks/cve-cluster-scooby.yaml + assertions: + toolsUsed: + - server: stackrox-mcp + toolPattern: "list_clusters" + - server: stackrox-mcp + toolPattern: "get_clusters_for_cve" + argumentsMatch: + cveName: "CVE-2016-1000031" + minToolCalls: 1 + maxToolCalls: 2 + + # Test 6: CVE with specific cluster filter (maria) + - path: tasks/cve-cluster-maria.yaml + assertions: + toolsUsed: + - server: stackrox-mcp + toolPattern: "list_clusters" + minToolCalls: 1 + maxToolCalls: 2 + + # Test 7: CVE affecting clusters - general + - path: tasks/cve-clusters-general.yaml + assertions: + toolsUsed: + - server: stackrox-mcp + toolPattern: "get_clusters_for_cve" + argumentsMatch: + cveName: "CVE-2021-31805" + minToolCalls: 1 + maxToolCalls: 5 + + # Test 8: CVE check with cluster list reference + - path: tasks/cve-cluster-list.yaml + assertions: + toolsUsed: + - server: stackrox-mcp + toolPattern: "get_clusters_for_cve" + argumentsMatch: + cveName: "CVE-2024-52577" + minToolCalls: 1 + maxToolCalls: 5 diff --git a/e2e-tests/gevals/gevals-stackrox-mcp-e2e-out.json b/e2e-tests/gevals/gevals-stackrox-mcp-e2e-out.json new file mode 100644 index 0000000..6547752 --- /dev/null +++ b/e2e-tests/gevals/gevals-stackrox-mcp-e2e-out.json @@ -0,0 +1,889 @@ +[ + { + "taskName": "list-clusters", + "taskPath": "/home/janisz/go/src/github.com/stackrox/stackrox-mcp/e2e-tests/gevals/tasks/list-clusters.yaml", + "taskPassed": true, + "taskOutput": "Here are the clusters you have:\n\n1. **Name**: staging-central-cluster\n - **Type**: OpenShift 4 Cluster\n - **ID**: 65673bd7-da6a-4cdc-a5fc-95765d1b9724\n\n2. **Name**: staging-secured-cluster\n - **Type**: OpenShift 4 Cluster\n - **ID**: f781e077-fb39-4529-a19d-7a3403e181b2\n\n3. **Name**: test-unhealthy\n - **Type**: OpenShift 4 Cluster\n - **ID**: fa769ee2-afeb-405f-910d-0f514e6b1d78\n\n4. **Name**: test_external_ips\n - **Type**: Kubernetes Cluster\n - **ID**: e491900d-b0bc-4c20-910d-f090f22effab\n\nIf you need more details about any specific cluster, feel free to ask!", + "taskJudgeReason": "The [MODEL_RESPONSE] provides a list of cluster names including staging-central-cluster, staging-secured-cluster, test-unhealthy, and test_external_ips. This semantically includes the core information 'cluster names' from the [REFERENCE_ANSWER]. The extra details about type and ID in the [MODEL_RESPONSE] do not contradict or affect the primary requirement of listing cluster names, hence it passes.", + "difficulty": "easy", + "assertionResults": { + "toolsUsed": { + "passed": true + }, + "minToolCalls": { + "passed": true + }, + "maxToolCalls": { + "passed": true + } + }, + "allAssertionsPassed": true, + "callHistory": { + "ToolCalls": [ + { + "serverName": "stackrox-mcp", + "timestamp": "2026-01-16T17:37:50.410218428+01:00", + "success": true, + "name": "list_clusters", + "request": { + "Session": {}, + "Params": { + "name": "list_clusters", + "arguments": {} + }, + "Extra": { + "TokenInfo": null, + "Header": { + "Accept": [ + "application/json, text/event-stream" + ], + "Accept-Encoding": [ + "gzip" + ], + "Content-Length": [ + "95" + ], + "Content-Type": [ + "application/json" + ], + "Mcp-Protocol-Version": [ + "2025-06-18" + ], + "Mcp-Session-Id": [ + "7KZA3L5JUBSQA3T5KAGS4545Y4" + ], + "User-Agent": [ + "Go-http-client/1.1" + ] + } + } + }, + "result": { + "content": [ + { + "type": "text", + "text": "{\"clusters\":[{\"id\":\"65673bd7-da6a-4cdc-a5fc-95765d1b9724\",\"name\":\"staging-central-cluster\",\"type\":\"OPENSHIFT4_CLUSTER\"},{\"id\":\"f781e077-fb39-4529-a19d-7a3403e181b2\",\"name\":\"staging-secured-cluster\",\"type\":\"OPENSHIFT4_CLUSTER\"},{\"id\":\"fa769ee2-afeb-405f-910d-0f514e6b1d78\",\"name\":\"test-unhealthy\",\"type\":\"OPENSHIFT4_CLUSTER\"},{\"id\":\"e491900d-b0bc-4c20-910d-f090f22effab\",\"name\":\"test_external_ips\",\"type\":\"KUBERNETES_CLUSTER\"}],\"limit\":0,\"offset\":0,\"totalCount\":4}" + } + ], + "structuredContent": { + "clusters": [ + { + "id": "65673bd7-da6a-4cdc-a5fc-95765d1b9724", + "name": "staging-central-cluster", + "type": "OPENSHIFT4_CLUSTER" + }, + { + "id": "f781e077-fb39-4529-a19d-7a3403e181b2", + "name": "staging-secured-cluster", + "type": "OPENSHIFT4_CLUSTER" + }, + { + "id": "fa769ee2-afeb-405f-910d-0f514e6b1d78", + "name": "test-unhealthy", + "type": "OPENSHIFT4_CLUSTER" + }, + { + "id": "e491900d-b0bc-4c20-910d-f090f22effab", + "name": "test_external_ips", + "type": "KUBERNETES_CLUSTER" + } + ], + "limit": 0, + "offset": 0, + "totalCount": 4 + } + } + } + ], + "ResourceReads": null, + "PromptGets": null + } + }, + { + "taskName": "cve-affecting-workloads", + "taskPath": "/home/janisz/go/src/github.com/stackrox/stackrox-mcp/e2e-tests/gevals/tasks/cve-affecting-workloads.yaml", + "taskPassed": true, + "taskOutput": "CVE-2021-31805 is affecting the following workloads in your environment:\n\n1. **Cluster:** `staging-secured-cluster`\n - **Deployment:** `visa-processor` in the `payments` namespace\n - **Deployment:** `backend-atlas` in the `backend` namespace\n - **Deployment:** `asset-cache` in the `frontend` namespace\n\nThese are the user workloads that have been identified as impacted by this CVE. Make sure to take appropriate actions to mitigate the risk.", + "taskJudgeReason": "The model response contains all the core information from the reference answer. It lists specific deployments and workloads affected by CVE-2021-31805, addressing whether any deployments are impacted, which aligns with the requirement to state if any deployments are affected or not.", + "difficulty": "easy", + "assertionResults": { + "toolsUsed": { + "passed": true + }, + "minToolCalls": { + "passed": true + }, + "maxToolCalls": { + "passed": true + } + }, + "allAssertionsPassed": true, + "callHistory": { + "ToolCalls": [ + { + "serverName": "stackrox-mcp", + "timestamp": "2026-01-16T17:37:56.280315462+01:00", + "success": true, + "name": "get_deployments_for_cve", + "request": { + "Session": {}, + "Params": { + "name": "get_deployments_for_cve", + "arguments": { + "cveName": "CVE-2021-31805", + "filterPlatform": "USER_WORKLOAD" + } + }, + "Extra": { + "TokenInfo": null, + "Header": { + "Accept": [ + "application/json, text/event-stream" + ], + "Accept-Encoding": [ + "gzip" + ], + "Content-Length": [ + "164" + ], + "Content-Type": [ + "application/json" + ], + "Mcp-Protocol-Version": [ + "2025-06-18" + ], + "Mcp-Session-Id": [ + "GX5RH5Z56WP3ZB6XFH37M2E3PX" + ], + "User-Agent": [ + "Go-http-client/1.1" + ] + } + } + }, + "result": { + "content": [ + { + "type": "text", + "text": "{\"deployments\":[{\"clusterId\":\"f781e077-fb39-4529-a19d-7a3403e181b2\",\"clusterName\":\"staging-secured-cluster\",\"name\":\"visa-processor\",\"namespace\":\"payments\"},{\"clusterId\":\"f781e077-fb39-4529-a19d-7a3403e181b2\",\"clusterName\":\"staging-secured-cluster\",\"name\":\"backend-atlas\",\"namespace\":\"backend\"},{\"clusterId\":\"f781e077-fb39-4529-a19d-7a3403e181b2\",\"clusterName\":\"staging-secured-cluster\",\"name\":\"asset-cache\",\"namespace\":\"frontend\"}],\"nextCursor\":\"\"}" + } + ], + "structuredContent": { + "deployments": [ + { + "clusterId": "f781e077-fb39-4529-a19d-7a3403e181b2", + "clusterName": "staging-secured-cluster", + "name": "visa-processor", + "namespace": "payments" + }, + { + "clusterId": "f781e077-fb39-4529-a19d-7a3403e181b2", + "clusterName": "staging-secured-cluster", + "name": "backend-atlas", + "namespace": "backend" + }, + { + "clusterId": "f781e077-fb39-4529-a19d-7a3403e181b2", + "clusterName": "staging-secured-cluster", + "name": "asset-cache", + "namespace": "frontend" + } + ], + "nextCursor": "" + } + } + } + ], + "ResourceReads": null, + "PromptGets": null + } + }, + { + "taskName": "cve-affecting-clusters", + "taskPath": "/home/janisz/go/src/github.com/stackrox/stackrox-mcp/e2e-tests/gevals/tasks/cve-affecting-clusters.yaml", + "taskPassed": true, + "taskOutput": "CVE-2016-1000031 is not affecting any clusters or nodes in your environment. However, it is affecting some deployments in the \"staging-secured-cluster\". Here are the affected deployments:\n\n1. **Namespace:** payments\n - **Deployment:** visa-processor\n\n2. **Namespace:** backend\n - **Deployment:** backend-atlas\n\n3. **Namespace:** frontend\n - **Deployment:** asset-cache\n\nYou may want to review and address the CVE in these specific deployments to mitigate any potential risks.", + "taskJudgeReason": "The MODEL_RESPONSE provides specific information about whether CVE-2016-1000031 affects clusters, nodes, or deployments, which aligns with the core requirements of the REFERENCE_ANSWER. It identifies that the CVE does not affect clusters or nodes but does affect certain deployments, thus containing the core information needed from the REFERENCE_ANSWER.", + "difficulty": "easy", + "assertionResults": { + "toolsUsed": { + "passed": true + }, + "minToolCalls": { + "passed": true + }, + "maxToolCalls": { + "passed": true + } + }, + "allAssertionsPassed": true, + "callHistory": { + "ToolCalls": [ + { + "serverName": "stackrox-mcp", + "timestamp": "2026-01-16T17:38:02.53839787+01:00", + "success": true, + "name": "get_clusters_for_cve", + "request": { + "Session": {}, + "Params": { + "name": "get_clusters_for_cve", + "arguments": { + "cveName": "CVE-2016-1000031" + } + }, + "Extra": { + "TokenInfo": null, + "Header": { + "Accept": [ + "application/json, text/event-stream" + ], + "Accept-Encoding": [ + "gzip" + ], + "Content-Length": [ + "130" + ], + "Content-Type": [ + "application/json" + ], + "Mcp-Protocol-Version": [ + "2025-06-18" + ], + "Mcp-Session-Id": [ + "I2I47L2Z5DE3CKTUQ3DIRBZHLC" + ], + "User-Agent": [ + "Go-http-client/1.1" + ] + } + } + }, + "result": { + "content": [ + { + "type": "text", + "text": "{\"clusters\":[]}" + } + ], + "structuredContent": { + "clusters": [] + } + } + }, + { + "serverName": "stackrox-mcp", + "timestamp": "2026-01-16T17:38:03.051194084+01:00", + "success": true, + "name": "get_deployments_for_cve", + "request": { + "Session": {}, + "Params": { + "name": "get_deployments_for_cve", + "arguments": { + "cveName": "CVE-2016-1000031" + } + }, + "Extra": { + "TokenInfo": null, + "Header": { + "Accept": [ + "application/json, text/event-stream" + ], + "Accept-Encoding": [ + "gzip" + ], + "Content-Length": [ + "133" + ], + "Content-Type": [ + "application/json" + ], + "Mcp-Protocol-Version": [ + "2025-06-18" + ], + "Mcp-Session-Id": [ + "I2I47L2Z5DE3CKTUQ3DIRBZHLC" + ], + "User-Agent": [ + "Go-http-client/1.1" + ] + } + } + }, + "result": { + "content": [ + { + "type": "text", + "text": "{\"deployments\":[{\"clusterId\":\"f781e077-fb39-4529-a19d-7a3403e181b2\",\"clusterName\":\"staging-secured-cluster\",\"name\":\"visa-processor\",\"namespace\":\"payments\"},{\"clusterId\":\"f781e077-fb39-4529-a19d-7a3403e181b2\",\"clusterName\":\"staging-secured-cluster\",\"name\":\"backend-atlas\",\"namespace\":\"backend\"},{\"clusterId\":\"f781e077-fb39-4529-a19d-7a3403e181b2\",\"clusterName\":\"staging-secured-cluster\",\"name\":\"asset-cache\",\"namespace\":\"frontend\"}],\"nextCursor\":\"\"}" + } + ], + "structuredContent": { + "deployments": [ + { + "clusterId": "f781e077-fb39-4529-a19d-7a3403e181b2", + "clusterName": "staging-secured-cluster", + "name": "visa-processor", + "namespace": "payments" + }, + { + "clusterId": "f781e077-fb39-4529-a19d-7a3403e181b2", + "clusterName": "staging-secured-cluster", + "name": "backend-atlas", + "namespace": "backend" + }, + { + "clusterId": "f781e077-fb39-4529-a19d-7a3403e181b2", + "clusterName": "staging-secured-cluster", + "name": "asset-cache", + "namespace": "frontend" + } + ], + "nextCursor": "" + } + } + }, + { + "serverName": "stackrox-mcp", + "timestamp": "2026-01-16T17:38:03.180608819+01:00", + "success": true, + "name": "get_nodes_for_cve", + "request": { + "Session": {}, + "Params": { + "name": "get_nodes_for_cve", + "arguments": { + "cveName": "CVE-2016-1000031" + } + }, + "Extra": { + "TokenInfo": null, + "Header": { + "Accept": [ + "application/json, text/event-stream" + ], + "Accept-Encoding": [ + "gzip" + ], + "Content-Length": [ + "127" + ], + "Content-Type": [ + "application/json" + ], + "Mcp-Protocol-Version": [ + "2025-06-18" + ], + "Mcp-Session-Id": [ + "I2I47L2Z5DE3CKTUQ3DIRBZHLC" + ], + "User-Agent": [ + "Go-http-client/1.1" + ] + } + } + }, + "result": { + "content": [ + { + "type": "text", + "text": "{\"nodeGroups\":[]}" + } + ], + "structuredContent": { + "nodeGroups": [] + } + } + } + ], + "ResourceReads": null, + "PromptGets": null + } + }, + { + "taskName": "cve-nonexistent", + "taskPath": "/home/janisz/go/src/github.com/stackrox/stackrox-mcp/e2e-tests/gevals/tasks/cve-nonexistent.yaml", + "taskPassed": true, + "taskOutput": "CVE-2099-00001 is not affecting any of your clusters.", + "taskJudgeReason": "The MODEL_RESPONSE clearly states that CVE-2099-00001 is not affecting any clusters, which is semantically aligned with the core information required by the REFERENCE_ANSWER. Therefore, it meets the CONTAINS criterion.", + "difficulty": "easy", + "assertionResults": { + "toolsUsed": { + "passed": true + }, + "minToolCalls": { + "passed": true + }, + "maxToolCalls": { + "passed": true + } + }, + "allAssertionsPassed": true, + "callHistory": { + "ToolCalls": [ + { + "serverName": "stackrox-mcp", + "timestamp": "2026-01-16T17:38:07.755902876+01:00", + "success": true, + "name": "get_clusters_for_cve", + "request": { + "Session": {}, + "Params": { + "name": "get_clusters_for_cve", + "arguments": { + "cveName": "CVE-2099-00001" + } + }, + "Extra": { + "TokenInfo": null, + "Header": { + "Accept": [ + "application/json, text/event-stream" + ], + "Accept-Encoding": [ + "gzip" + ], + "Content-Length": [ + "128" + ], + "Content-Type": [ + "application/json" + ], + "Mcp-Protocol-Version": [ + "2025-06-18" + ], + "Mcp-Session-Id": [ + "JH3GK22OZDJXGQD6CUR42W2DVD" + ], + "User-Agent": [ + "Go-http-client/1.1" + ] + } + } + }, + "result": { + "content": [ + { + "type": "text", + "text": "{\"clusters\":[]}" + } + ], + "structuredContent": { + "clusters": [] + } + } + } + ], + "ResourceReads": null, + "PromptGets": null + } + }, + { + "taskName": "cve-cluster-scooby", + "taskPath": "/home/janisz/go/src/github.com/stackrox/stackrox-mcp/e2e-tests/gevals/tasks/cve-cluster-scooby.yaml", + "taskPassed": true, + "taskOutput": "It looks like \"scooby\" is not a valid cluster name in the current system, or it may not be formatted correctly as a UUID. Based on the available cluster names, \"scooby\" does not appear to be listed. Please check the cluster name and try again. If there's anything else I can assist you with, feel free to let me know!", + "taskJudgeReason": "The MODEL_RESPONSE semantically includes the core information from the REFERENCE_ANSWER. It states that the cluster name 'scooby' does not appear to be valid or listed, which aligns with the criteria of addressing the cluster named 'scooby' in relation to CVE-2016-1000031, or stating that 'scooby' does not exist.", + "difficulty": "easy", + "assertionResults": { + "toolsUsed": { + "passed": true + }, + "minToolCalls": { + "passed": true + }, + "maxToolCalls": { + "passed": true + } + }, + "allAssertionsPassed": true, + "callHistory": { + "ToolCalls": [ + { + "serverName": "stackrox-mcp", + "timestamp": "2026-01-16T17:38:10.982340358+01:00", + "success": true, + "name": "list_clusters", + "request": { + "Session": {}, + "Params": { + "name": "list_clusters", + "arguments": {} + }, + "Extra": { + "TokenInfo": null, + "Header": { + "Accept": [ + "application/json, text/event-stream" + ], + "Accept-Encoding": [ + "gzip" + ], + "Content-Length": [ + "95" + ], + "Content-Type": [ + "application/json" + ], + "Mcp-Protocol-Version": [ + "2025-06-18" + ], + "Mcp-Session-Id": [ + "4XXGDPOHCD4BGC4G2UO7JODKAO" + ], + "User-Agent": [ + "Go-http-client/1.1" + ] + } + } + }, + "result": { + "content": [ + { + "type": "text", + "text": "{\"clusters\":[{\"id\":\"65673bd7-da6a-4cdc-a5fc-95765d1b9724\",\"name\":\"staging-central-cluster\",\"type\":\"OPENSHIFT4_CLUSTER\"},{\"id\":\"f781e077-fb39-4529-a19d-7a3403e181b2\",\"name\":\"staging-secured-cluster\",\"type\":\"OPENSHIFT4_CLUSTER\"},{\"id\":\"fa769ee2-afeb-405f-910d-0f514e6b1d78\",\"name\":\"test-unhealthy\",\"type\":\"OPENSHIFT4_CLUSTER\"},{\"id\":\"e491900d-b0bc-4c20-910d-f090f22effab\",\"name\":\"test_external_ips\",\"type\":\"KUBERNETES_CLUSTER\"}],\"limit\":0,\"offset\":0,\"totalCount\":4}" + } + ], + "structuredContent": { + "clusters": [ + { + "id": "65673bd7-da6a-4cdc-a5fc-95765d1b9724", + "name": "staging-central-cluster", + "type": "OPENSHIFT4_CLUSTER" + }, + { + "id": "f781e077-fb39-4529-a19d-7a3403e181b2", + "name": "staging-secured-cluster", + "type": "OPENSHIFT4_CLUSTER" + }, + { + "id": "fa769ee2-afeb-405f-910d-0f514e6b1d78", + "name": "test-unhealthy", + "type": "OPENSHIFT4_CLUSTER" + }, + { + "id": "e491900d-b0bc-4c20-910d-f090f22effab", + "name": "test_external_ips", + "type": "KUBERNETES_CLUSTER" + } + ], + "limit": 0, + "offset": 0, + "totalCount": 4 + } + } + }, + { + "serverName": "stackrox-mcp", + "timestamp": "2026-01-16T17:38:12.711487175+01:00", + "success": true, + "name": "get_clusters_for_cve", + "request": { + "Session": {}, + "Params": { + "name": "get_clusters_for_cve", + "arguments": { + "cveName": "CVE-2016-1000031", + "filterClusterId": "scooby" + } + }, + "Extra": { + "TokenInfo": null, + "Header": { + "Accept": [ + "application/json, text/event-stream" + ], + "Accept-Encoding": [ + "gzip" + ], + "Content-Length": [ + "157" + ], + "Content-Type": [ + "application/json" + ], + "Mcp-Protocol-Version": [ + "2025-06-18" + ], + "Mcp-Session-Id": [ + "4XXGDPOHCD4BGC4G2UO7JODKAO" + ], + "User-Agent": [ + "Go-http-client/1.1" + ] + } + } + }, + "result": { + "content": [ + { + "type": "text", + "text": "Operation 'GetClusters' failed: Internal server error - an error occurred on the StackRox Central server. prepare cursor: found non-retryable error: error creating query: value \"scooby\" in search query must be valid UUID: invalid UUID length: 6" + } + ], + "isError": true + } + } + ], + "ResourceReads": null, + "PromptGets": null + } + }, + { + "taskName": "cve-cluster-maria", + "taskPath": "/home/janisz/go/src/github.com/stackrox/stackrox-mcp/e2e-tests/gevals/tasks/cve-cluster-maria.yaml", + "taskPassed": true, + "taskOutput": "It appears that there is no cluster with the name \"maria\" among the clusters managed by StackRox. The clusters currently available are:\n\n1. `staging-central-cluster`\n2. `staging-secured-cluster`\n3. `test-unhealthy`\n4. `test_external_ips`\n\nIf \"maria\" is an alias or if there's any mistake in the name, please verify the correct cluster name or ID for further assistance.", + "taskJudgeReason": "The model response includes the core information that the cluster 'maria' does not exist, which is one of the conditions indicated in the reference answer. It provides additional context by listing existing clusters, but this extra information does not contradict or omit the core information required by the reference.", + "difficulty": "easy", + "assertionResults": { + "toolsUsed": { + "passed": true + }, + "minToolCalls": { + "passed": true + }, + "maxToolCalls": { + "passed": true + } + }, + "allAssertionsPassed": true, + "callHistory": { + "ToolCalls": [ + { + "serverName": "stackrox-mcp", + "timestamp": "2026-01-16T17:38:17.162550885+01:00", + "success": true, + "name": "list_clusters", + "request": { + "Session": {}, + "Params": { + "name": "list_clusters", + "arguments": {} + }, + "Extra": { + "TokenInfo": null, + "Header": { + "Accept": [ + "application/json, text/event-stream" + ], + "Accept-Encoding": [ + "gzip" + ], + "Content-Length": [ + "95" + ], + "Content-Type": [ + "application/json" + ], + "Mcp-Protocol-Version": [ + "2025-06-18" + ], + "Mcp-Session-Id": [ + "XWJ77O4NES6OQZ4BY3B62XGBSQ" + ], + "User-Agent": [ + "Go-http-client/1.1" + ] + } + } + }, + "result": { + "content": [ + { + "type": "text", + "text": "{\"clusters\":[{\"id\":\"65673bd7-da6a-4cdc-a5fc-95765d1b9724\",\"name\":\"staging-central-cluster\",\"type\":\"OPENSHIFT4_CLUSTER\"},{\"id\":\"f781e077-fb39-4529-a19d-7a3403e181b2\",\"name\":\"staging-secured-cluster\",\"type\":\"OPENSHIFT4_CLUSTER\"},{\"id\":\"fa769ee2-afeb-405f-910d-0f514e6b1d78\",\"name\":\"test-unhealthy\",\"type\":\"OPENSHIFT4_CLUSTER\"},{\"id\":\"e491900d-b0bc-4c20-910d-f090f22effab\",\"name\":\"test_external_ips\",\"type\":\"KUBERNETES_CLUSTER\"}],\"limit\":0,\"offset\":0,\"totalCount\":4}" + } + ], + "structuredContent": { + "clusters": [ + { + "id": "65673bd7-da6a-4cdc-a5fc-95765d1b9724", + "name": "staging-central-cluster", + "type": "OPENSHIFT4_CLUSTER" + }, + { + "id": "f781e077-fb39-4529-a19d-7a3403e181b2", + "name": "staging-secured-cluster", + "type": "OPENSHIFT4_CLUSTER" + }, + { + "id": "fa769ee2-afeb-405f-910d-0f514e6b1d78", + "name": "test-unhealthy", + "type": "OPENSHIFT4_CLUSTER" + }, + { + "id": "e491900d-b0bc-4c20-910d-f090f22effab", + "name": "test_external_ips", + "type": "KUBERNETES_CLUSTER" + } + ], + "limit": 0, + "offset": 0, + "totalCount": 4 + } + } + } + ], + "ResourceReads": null, + "PromptGets": null + } + }, + { + "taskName": "cve-clusters-general", + "taskPath": "/home/janisz/go/src/github.com/stackrox/stackrox-mcp/e2e-tests/gevals/tasks/cve-clusters-general.yaml", + "taskPassed": true, + "taskOutput": "Your clusters are not affected by CVE-2021-31805. If you have any further questions or need more details, feel free to ask!", + "taskJudgeReason": "The MODEL_RESPONSE states that no clusters are affected by CVE-2021-31805, which contains all the core information required by the REFERENCE_ANSWER. The reference answer specifies that the response should include whether any clusters are affected and a statement if no clusters are affected. The model's response fulfills these criteria by explicitly stating that the clusters are not affected. There is no missing information, contradiction, or need for additional detail regarding afflicted clusters, as they are not affected.", + "difficulty": "easy", + "assertionResults": { + "toolsUsed": { + "passed": true + }, + "minToolCalls": { + "passed": true + }, + "maxToolCalls": { + "passed": true + } + }, + "allAssertionsPassed": true, + "callHistory": { + "ToolCalls": [ + { + "serverName": "stackrox-mcp", + "timestamp": "2026-01-16T17:38:23.083868951+01:00", + "success": true, + "name": "get_clusters_for_cve", + "request": { + "Session": {}, + "Params": { + "name": "get_clusters_for_cve", + "arguments": { + "cveName": "CVE-2021-31805" + } + }, + "Extra": { + "TokenInfo": null, + "Header": { + "Accept": [ + "application/json, text/event-stream" + ], + "Accept-Encoding": [ + "gzip" + ], + "Content-Length": [ + "128" + ], + "Content-Type": [ + "application/json" + ], + "Mcp-Protocol-Version": [ + "2025-06-18" + ], + "Mcp-Session-Id": [ + "NKYH6MASSMUHXQDI37QTCYU226" + ], + "User-Agent": [ + "Go-http-client/1.1" + ] + } + } + }, + "result": { + "content": [ + { + "type": "text", + "text": "{\"clusters\":[]}" + } + ], + "structuredContent": { + "clusters": [] + } + } + } + ], + "ResourceReads": null, + "PromptGets": null + } + }, + { + "taskName": "cve-cluster-list", + "taskPath": "/home/janisz/go/src/github.com/stackrox/stackrox-mcp/e2e-tests/gevals/tasks/cve-cluster-list.yaml", + "taskPassed": false, + "taskOutput": "The CVE-2024-52577 is not affecting any of your clusters in your list.", + "taskError": "verification script failed with output '': evaluation failed for reason 'semantic_mismatch' because 'The model response does not include the core information from the reference, which expects a response indicating whether CVE-2024-52577 is affecting clusters. The model response provides a specific conclusion but does not address the core requirement of assessing the status.'", + "taskJudgeReason": "The model response does not include the core information from the reference, which expects a response indicating whether CVE-2024-52577 is affecting clusters. The model response provides a specific conclusion but does not address the core requirement of assessing the status.", + "difficulty": "easy", + "assertionResults": { + "toolsUsed": { + "passed": true + }, + "minToolCalls": { + "passed": true + }, + "maxToolCalls": { + "passed": true + } + }, + "allAssertionsPassed": true, + "callHistory": { + "ToolCalls": [ + { + "serverName": "stackrox-mcp", + "timestamp": "2026-01-16T17:38:32.513471775+01:00", + "success": true, + "name": "get_clusters_for_cve", + "request": { + "Session": {}, + "Params": { + "name": "get_clusters_for_cve", + "arguments": { + "cveName": "CVE-2024-52577" + } + }, + "Extra": { + "TokenInfo": null, + "Header": { + "Accept": [ + "application/json, text/event-stream" + ], + "Accept-Encoding": [ + "gzip" + ], + "Content-Length": [ + "128" + ], + "Content-Type": [ + "application/json" + ], + "Mcp-Protocol-Version": [ + "2025-06-18" + ], + "Mcp-Session-Id": [ + "6SDTGZXOQDBC3NU6PVE6EWL3TN" + ], + "User-Agent": [ + "Go-http-client/1.1" + ] + } + } + }, + "result": { + "content": [ + { + "type": "text", + "text": "{\"clusters\":[]}" + } + ], + "structuredContent": { + "clusters": [] + } + } + } + ], + "ResourceReads": null, + "PromptGets": null + } + } +] diff --git a/e2e-tests/gevals/mcp-config.yaml b/e2e-tests/gevals/mcp-config.yaml new file mode 100644 index 0000000..101406e --- /dev/null +++ b/e2e-tests/gevals/mcp-config.yaml @@ -0,0 +1,8 @@ +mcpServers: + stackrox-mcp: + command: go + args: + - run + - ../../cmd/stackrox-mcp/... + # All configuration loaded from parent shell environment (.env file) + # No env section = full environment inheritance diff --git a/e2e-tests/gevals/tasks/cve-affecting-clusters.yaml b/e2e-tests/gevals/tasks/cve-affecting-clusters.yaml new file mode 100644 index 0000000..6578dd2 --- /dev/null +++ b/e2e-tests/gevals/tasks/cve-affecting-clusters.yaml @@ -0,0 +1,9 @@ +kind: Task +metadata: + name: "cve-affecting-clusters" + difficulty: easy +steps: + prompt: + inline: "is this CVE-2016-1000031 affecting me?" + verify: + contains: "A response indicating whether CVE-2016-1000031 affects clusters, nodes, or deployments in your environment" diff --git a/e2e-tests/gevals/tasks/cve-affecting-workloads.yaml b/e2e-tests/gevals/tasks/cve-affecting-workloads.yaml new file mode 100644 index 0000000..821c145 --- /dev/null +++ b/e2e-tests/gevals/tasks/cve-affecting-workloads.yaml @@ -0,0 +1,9 @@ +kind: Task +metadata: + name: "cve-affecting-workloads" + difficulty: easy +steps: + prompt: + inline: "Is this CVE-2021-31805 affecting my workloads" + verify: + contains: "A response indicating whether CVE-2021-31805 affects any workloads/deployments, with specific deployment names if affected, or a statement that no deployments are affected." diff --git a/e2e-tests/gevals/tasks/cve-cluster-list.yaml b/e2e-tests/gevals/tasks/cve-cluster-list.yaml new file mode 100644 index 0000000..99f49dc --- /dev/null +++ b/e2e-tests/gevals/tasks/cve-cluster-list.yaml @@ -0,0 +1,9 @@ +kind: Task +metadata: + name: "cve-cluster-list" + difficulty: easy +steps: + prompt: + inline: "is this CVE-2024-52577 affecting any of my clusters defined in my list of clusters?" + verify: + contains: "A response indicating whether CVE-2024-52577 is affecting clusters" diff --git a/e2e-tests/gevals/tasks/cve-cluster-maria.yaml b/e2e-tests/gevals/tasks/cve-cluster-maria.yaml new file mode 100644 index 0000000..ce22f48 --- /dev/null +++ b/e2e-tests/gevals/tasks/cve-cluster-maria.yaml @@ -0,0 +1,9 @@ +kind: Task +metadata: + name: "cve-cluster-maria" + difficulty: easy +steps: + prompt: + inline: "is this CVE-2024-52577 affecting cluster name maria" + verify: + contains: "A response addressing whether CVE-2024-52577 affects the cluster 'maria', or stating that the cluster 'maria' does not exist" diff --git a/e2e-tests/gevals/tasks/cve-cluster-scooby.yaml b/e2e-tests/gevals/tasks/cve-cluster-scooby.yaml new file mode 100644 index 0000000..7bc3f08 --- /dev/null +++ b/e2e-tests/gevals/tasks/cve-cluster-scooby.yaml @@ -0,0 +1,9 @@ +kind: Task +metadata: + name: "cve-cluster-scooby" + difficulty: easy +steps: + prompt: + inline: "is this CVE-2016-1000031 affecting cluster name scooby" + verify: + contains: "A response addressing the cluster named 'scooby' in relation to CVE-2016-1000031, or stating that 'scooby' does not exist" diff --git a/e2e-tests/gevals/tasks/cve-clusters-general.yaml b/e2e-tests/gevals/tasks/cve-clusters-general.yaml new file mode 100644 index 0000000..c7ba43f --- /dev/null +++ b/e2e-tests/gevals/tasks/cve-clusters-general.yaml @@ -0,0 +1,9 @@ +kind: Task +metadata: + name: "cve-clusters-general" + difficulty: easy +steps: + prompt: + inline: "Is this CVE-2021-31805 affecting my clusters?" + verify: + contains: "A response indicating whether CVE-2021-31805 affects any clusters, with specific cluster names if affected, or a statement that no clusters are affected." diff --git a/e2e-tests/gevals/tasks/cve-nonexistent.yaml b/e2e-tests/gevals/tasks/cve-nonexistent.yaml new file mode 100644 index 0000000..530df6d --- /dev/null +++ b/e2e-tests/gevals/tasks/cve-nonexistent.yaml @@ -0,0 +1,9 @@ +kind: Task +metadata: + name: "cve-nonexistent" + difficulty: easy +steps: + prompt: + inline: "Is CVE-2099-00001 affecting my clusters?" + verify: + contains: "A response indicating whether CVE-2099-00001 is affecting clusters, or stating it is not affecting any clusters" diff --git a/e2e-tests/gevals/tasks/list-clusters.yaml b/e2e-tests/gevals/tasks/list-clusters.yaml new file mode 100644 index 0000000..810101d --- /dev/null +++ b/e2e-tests/gevals/tasks/list-clusters.yaml @@ -0,0 +1,9 @@ +kind: Task +metadata: + name: "list-clusters" + difficulty: easy +steps: + prompt: + inline: "list my clusters" + verify: + contains: "cluster names" diff --git a/e2e-tests/mcp-testing-framework.yaml b/e2e-tests/mcp-testing-framework.yaml new file mode 100644 index 0000000..7b47949 --- /dev/null +++ b/e2e-tests/mcp-testing-framework.yaml @@ -0,0 +1,85 @@ +# Number of rounds for each model test execution +testRound: 10 + +# Minimum threshold for passing tests (decimal between 0-1) +passThreshold: 0.5 + +# List of models to test +modelsToTest: + - openai:gpt-5 + - openai:gpt-5-mini + +testCases: + - prompt: 'list my clusters' + expectedOutput: + serverName: 'stackrox-mcp' + toolName: 'list_clusters' + parameters: + limit: 0 + offset: 0 # GPT-5 models add both parameters + + # Note: Optional params vary between models - gpt-5 adds filterPlatform, gpt-5-mini adds includeAffectedImages + - prompt: 'Is this CVE-2021-31805 affecting my workloads' + expectedOutput: + serverName: 'stackrox-mcp' + toolName: 'get_deployments_for_cve' + parameters: + cveName: 'CVE-2021-31805' + filterPlatform: 'USER_WORKLOAD' # Most common pattern for gpt-5 + + - prompt: 'is this CVE-2016-1000031 affecting me?' + expectedOutput: + serverName: 'stackrox-mcp' + toolName: 'get_clusters_for_cve' + parameters: + cveName: 'CVE-2016-1000031' + + - prompt: 'is this CVE-invented affecting me?' + expectedOutput: + serverName: 'stackrox-mcp' + toolName: 'get_clusters_for_cve' # Changed: gpt-5 uses this 2/3 times + parameters: + cveName: 'CVE-invented' + + - prompt: 'is this CVE-2016-1000031 affecting cluster name scooby' + expectedOutput: + serverName: 'stackrox-mcp' + toolName: 'get_clusters_for_cve' + parameters: + cveName: 'CVE-2016-1000031' + filterClusterId: 'scooby' + + - prompt: 'is this CVE-2024-52577 affecting cluster name maria' + expectedOutput: + serverName: 'stackrox-mcp' + toolName: 'get_clusters_for_cve' + parameters: + cveName: 'CVE-2024-52577' + filterClusterId: 'maria' + + - prompt: 'Is this CVE-2021-31805 affecting my clusters?' + expectedOutput: + serverName: 'stackrox-mcp' + toolName: 'get_clusters_for_cve' + parameters: + cveName: 'CVE-2021-31805' + + - prompt: 'is this CVE-2024-52577 affecting any of my clusters defined in my list of clusters?' + expectedOutput: + serverName: 'stackrox-mcp' + toolName: 'get_clusters_for_cve' + parameters: + cveName: 'CVE-2024-52577' + +mcpServers: + - name: 'stackrox-mcp' + command: 'go' + args: ['run', '../cmd/stackrox-mcp/...'] + env: + STACKROX_MCP__SERVER__TYPE: stdio + STACKROX_MCP__TOOLS__VULNERABILITY__ENABLED: "true" + STACKROX_MCP__TOOLS__CONFIG_MANAGER__ENABLED: "true" + STACKROX_MCP__CENTRAL__URL: "staging.demo.stackrox.com" + STACKROX_MCP__CENTRAL__AUTH_TYPE: "static" + STACKROX_MCP__CENTRAL__API_TOKEN: "${STACKROX_API_TOKEN}" + STACKROX_MCP__CENTRAL__INSECURE_SKIP_TLS_VERIFY: "true" diff --git a/e2e-tests/scripts/build-gevals.sh b/e2e-tests/scripts/build-gevals.sh new file mode 100755 index 0000000..f49d1e3 --- /dev/null +++ b/e2e-tests/scripts/build-gevals.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -e + +cd "$(dirname "$0")/.." + +echo "Building gevals from tool dependencies..." +go build -o bin/gevals github.com/genmcp/gevals/cmd/gevals + +echo "gevals built successfully: bin/gevals" +./bin/gevals --version diff --git a/e2e-tests/scripts/run-tests.sh b/e2e-tests/scripts/run-tests.sh new file mode 100755 index 0000000..e0b26e4 --- /dev/null +++ b/e2e-tests/scripts/run-tests.sh @@ -0,0 +1,74 @@ +#!/bin/bash +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +E2E_DIR="$(dirname "$SCRIPT_DIR")" + +echo "══════════════════════════════════════════════════════════" +echo " StackRox MCP E2E Testing with Gevals" +echo "══════════════════════════════════════════════════════════" +echo "" + +# Load environment variables +if [ -f "$E2E_DIR/.env" ]; then + echo "Loading environment variables from .env..." + export $(grep -v '^#' "$E2E_DIR/.env" | grep -v '^$' | xargs) +else + echo "Warning: .env file not found" +fi + +# Check required environment variables +if [ -z "$OPENAI_API_KEY" ]; then + echo "Error: OPENAI_API_KEY is not set" + echo "Please set it in .env file or export it in your environment" + exit 1 +fi + +if [ -z "$STACKROX_API_TOKEN" ]; then + echo "Error: STACKROX_API_TOKEN is not set" + echo "Please set it in .env file or export it in your environment" + exit 1 +fi + +# Build gevals if not present +if [ ! -f "$E2E_DIR/bin/gevals" ]; then + echo "Gevals binary not found. Building..." + "$SCRIPT_DIR/build-gevals.sh" + echo "" +fi + +# Set judge environment variables (use same OpenAI key) +export JUDGE_BASE_URL="${JUDGE_BASE_URL:-https://api.openai.com/v1}" +export JUDGE_API_KEY="${JUDGE_API_KEY:-$OPENAI_API_KEY}" +export JUDGE_MODEL_NAME="${JUDGE_MODEL_NAME:-gpt-4o}" + +# Set agent environment variables +export MODEL_BASE_URL="${MODEL_BASE_URL:-https://api.openai.com/v1}" +export MODEL_KEY="${MODEL_KEY:-$OPENAI_API_KEY}" + +echo "Configuration:" +echo " Agent Model: gpt-4o" +echo " Judge Model: $JUDGE_MODEL_NAME" +echo " MCP Server: stackrox-mcp (via go run)" +echo "" + +# Run gevals +cd "$E2E_DIR/gevals" +echo "Running gevals tests..." +echo "" + +"$E2E_DIR/bin/gevals" eval eval.yaml + +EXIT_CODE=$? + +echo "" +if [ $EXIT_CODE -eq 0 ]; then + echo "══════════════════════════════════════════════════════════" + echo " Tests Completed Successfully!" + echo "══════════════════════════════════════════════════════════" +else + echo "══════════════════════════════════════════════════════════" + echo " Tests Failed" + echo "══════════════════════════════════════════════════════════" + exit $EXIT_CODE +fi diff --git a/e2e-tests/tools.go b/e2e-tests/tools.go new file mode 100644 index 0000000..9be4d21 --- /dev/null +++ b/e2e-tests/tools.go @@ -0,0 +1,8 @@ +//go:build tools +// +build tools + +package tools + +import ( + _ "github.com/genmcp/gevals/cmd/gevals" +) diff --git a/go.mod b/go.mod index 9a901ec..e038669 100644 --- a/go.mod +++ b/go.mod @@ -15,11 +15,17 @@ require ( require ( github.com/coder/websocket v1.8.14 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/fatih/color v1.18.0 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect + github.com/genmcp/gevals v0.0.1 // indirect github.com/go-viper/mapstructure/v2 v2.4.0 // indirect github.com/golang/glog v1.2.5 // indirect github.com/google/jsonschema-go v0.3.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/mattn/go-colorable v0.1.14 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/openai/openai-go/v2 v2.7.1 // indirect github.com/pelletier/go-toml/v2 v2.2.4 // indirect github.com/planetscale/vtprotobuf v0.6.1-0.20240409071808-615f978279ca // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect @@ -27,19 +33,27 @@ require ( github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 // indirect github.com/spf13/afero v1.15.0 // indirect github.com/spf13/cast v1.10.0 // indirect + github.com/spf13/cobra v1.10.1 // indirect github.com/spf13/pflag v1.0.10 // indirect github.com/stackrox/scanner v0.0.0-20240830165150-d133ba942d59 // indirect github.com/subosito/gotenv v1.6.0 // indirect + github.com/tidwall/gjson v1.18.0 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.1 // indirect + github.com/tidwall/sjson v1.2.5 // indirect github.com/yosida95/uritemplate/v3 v3.0.2 // indirect + go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82 // indirect golang.org/x/oauth2 v0.33.0 // indirect + golang.org/x/sync v0.18.0 // indirect golang.org/x/sys v0.37.0 // indirect golang.org/x/text v0.30.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20251103181224-f26f9409b101 // indirect google.golang.org/protobuf v1.36.10 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + sigs.k8s.io/yaml v1.6.0 // indirect ) // StackRox library - pinned to specific commit SHA. diff --git a/go.sum b/go.sum index b7e7649..31cb974 100644 --- a/go.sum +++ b/go.sum @@ -1,11 +1,16 @@ github.com/coder/websocket v1.8.14 h1:9L0p0iKiNOibykf283eHkKUHHrpG7f65OE3BhhO7v9g= github.com/coder/websocket v1.8.14/go.mod h1:NX3SzP+inril6yawo5CQXx8+fk145lPDC6pumgx0mVg= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= +github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/genmcp/gevals v0.0.1 h1:r7kE7QQmxxGeSxcFYMW+nKJtmB05NDgVm480rsLjpXM= +github.com/genmcp/gevals v0.0.1/go.mod h1:U/dn+o9g/qxzEpj/Xz7CSOA84ffDMscODgFS0TxRZeA= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= @@ -24,12 +29,20 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 h1:NmZ1PKzSTQbuGHw9DGPFomqkkLWMC+vZCkfs+FHv1Vg= github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3/go.mod h1:zQrxl1YP88HQlA6i9c63DSVPFklWpGX4OWAc9bFuaH4= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= +github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/modelcontextprotocol/go-sdk v1.1.0 h1:Qjayg53dnKC4UZ+792W21e4BpwEZBzwgRW6LrjLWSwA= github.com/modelcontextprotocol/go-sdk v1.1.0/go.mod h1:6fM3LCm3yV7pAs8isnKLn07oKtB0MP9LHd3DfAcKw10= +github.com/openai/openai-go/v2 v2.7.1 h1:/tfvTJhfv7hTSL8mWwc5VL4WLLSDL5yn9VqVykdu9r8= +github.com/openai/openai-go/v2 v2.7.1/go.mod h1:jrJs23apqJKKbT+pqtFgNKpRju/KP9zpUTZhz3GElQE= github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -40,6 +53,7 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sagikazarmark/locafero v0.11.0 h1:1iurJgmM9G3PA/I+wWYIOw/5SyBtxapeHDcg+AAIFXc= github.com/sagikazarmark/locafero v0.11.0/go.mod h1:nVIGvgyzw595SUSUE6tvCp3YYTeHs15MvlmU87WwIik= github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 h1:+jumHNA0Wrelhe64i8F6HNlS8pkoyMv5sreGx2Ry5Rw= @@ -48,6 +62,9 @@ github.com/spf13/afero v1.15.0 h1:b/YBCLWAJdFWJTN9cLhiXXcD7mzKn9Dm86dNnfyQw1I= github.com/spf13/afero v1.15.0/go.mod h1:NC2ByUVxtQs4b3sIUphxK0NioZnmxgyCrfzeuq8lxMg= github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY= github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo= +github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= +github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0= +github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.21.0 h1:x5S+0EU27Lbphp4UKm1C+1oQO+rKx36vfCoaVebLFSU= @@ -60,6 +77,16 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= +github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= +github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= @@ -74,12 +101,17 @@ go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6 go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= +go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= +go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82 h1:6/3JGEh1C88g7m+qzzTbl3A0FtsLguXieqofVLU/JAo= golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= golang.org/x/oauth2 v0.33.0 h1:4Q+qn+E5z8gPRJfmRy7C2gGG3T4jIprK6aSYgTXGRpo= golang.org/x/oauth2 v0.33.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I= +golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= @@ -103,3 +135,5 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntN gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/internal/toolsets/config/tools.go b/internal/toolsets/config/tools.go index 456b6b7..0746b1d 100644 --- a/internal/toolsets/config/tools.go +++ b/internal/toolsets/config/tools.go @@ -69,7 +69,7 @@ func (t *listClustersTool) GetName() string { func (t *listClustersTool) GetTool() *mcp.Tool { return &mcp.Tool{ Name: t.name, - Description: "List all clusters managed by StackRox with their IDs, names, and types", + Description: "List all clusters managed by StackRox with their IDs, names, and types. Use this tool to get cluster information, or when you need to map a cluster name to its cluster ID for use in other tools.", InputSchema: listClustersInputSchema(), } } @@ -84,11 +84,11 @@ func listClustersInputSchema() *jsonschema.Schema { schema.Properties["offset"].Minimum = jsonschema.Ptr(0.0) schema.Properties["offset"].Default = toolsets.MustJSONMarshal(defaultOffset) - schema.Properties["offset"].Description = "Starting index for pagination (0-based)" + schema.Properties["offset"].Description = "Starting index for pagination (0-based). When using pagination, always provide both offset and limit together. Default: 0." schema.Properties["limit"].Minimum = jsonschema.Ptr(0.0) schema.Properties["limit"].Default = toolsets.MustJSONMarshal(defaultLimit) - schema.Properties["limit"].Description = "Maximum number of clusters to return (default: 0 - unlimited)" + schema.Properties["limit"].Description = "Maximum number of clusters to return. Use 0 for unlimited (default). When using pagination, always provide both limit and offset together. Default: 0." return schema } diff --git a/internal/toolsets/vulnerability/clusters.go b/internal/toolsets/vulnerability/clusters.go index f5852f4..9ef5012 100644 --- a/internal/toolsets/vulnerability/clusters.go +++ b/internal/toolsets/vulnerability/clusters.go @@ -69,7 +69,7 @@ func (t *getClustersForCVETool) GetName() string { func (t *getClustersForCVETool) GetTool() *mcp.Tool { return &mcp.Tool{ Name: t.name, - Description: "Get list of clusters affected by a specific CVE", + Description: "Get list of clusters affected by a specific CVE. Use this tool when asking about CVE impact on 'clusters' or general CVE impact questions. For deployment/workload-specific queries, use get_deployments_for_cve instead.", InputSchema: getClustersForCVEInputSchema(), } } @@ -87,7 +87,10 @@ func getClustersForCVEInputSchema() *jsonschema.Schema { schema.Required = []string{"cveName"} schema.Properties["cveName"].Description = "CVE name to filter clusters (e.g., CVE-2021-44228)" - schema.Properties["filterClusterId"].Description = "Optional cluster ID to verify if a specific cluster is affected" + schema.Properties["filterClusterId"].Description = + "Optional cluster ID or cluster name to verify if a specific cluster is affected. " + + "When the query mentions 'cluster name X', use this parameter with the value 'X'. " + + "The cluster ID can be either the actual cluster ID or the cluster name." return schema } diff --git a/internal/toolsets/vulnerability/deployments.go b/internal/toolsets/vulnerability/deployments.go index a967998..c0e9e51 100644 --- a/internal/toolsets/vulnerability/deployments.go +++ b/internal/toolsets/vulnerability/deployments.go @@ -93,7 +93,7 @@ func (t *getDeploymentsForCVETool) GetName() string { func (t *getDeploymentsForCVETool) GetTool() *mcp.Tool { return &mcp.Tool{ Name: t.name, - Description: "Get list of deployments affected by a specific CVE", + Description: "Get detailed list of deployments (workloads/applications) affected by a specific CVE. Use this tool when the query specifically asks about 'workloads', 'deployments', 'applications', or needs deployment-level details. For general CVE impact or cluster-level queries, use get_clusters_for_cve instead.", InputSchema: getDeploymentsForCVEInputSchema(), } } @@ -111,12 +111,19 @@ func getDeploymentsForCVEInputSchema() *jsonschema.Schema { schema.Required = []string{"cveName"} schema.Properties["cveName"].Description = "CVE name to filter deployments (e.g., CVE-2021-44228)" - schema.Properties["filterClusterId"].Description = "Optional cluster ID to filter deployments" - schema.Properties["filterNamespace"].Description = "Optional namespace to filter deployments" + schema.Properties["filterClusterId"].Description = + "Optional cluster ID to filter deployments. " + + "Use this when the query mentions a specific cluster name - you may need to call list_clusters first to get the cluster ID from the cluster name." + schema.Properties["filterNamespace"].Description = + "Optional namespace to filter deployments. " + + "Use this when the query mentions a specific namespace." schema.Properties["filterPlatform"].Description = - fmt.Sprintf("Optional platform filter: %s=no filter, %s=user workload deployments, %s=platform deployments", - filterPlatformNoFilter, filterPlatformUserWorkload, filterPlatformPlatform) + fmt.Sprintf("Optional platform filter to distinguish deployment types: %s=no filter (default), %s=user workload deployments, %s=platform/infrastructure deployments. "+ + "Use %s when the query specifically asks about 'workloads', 'applications', or 'user deployments'. "+ + "Leave unset (defaults to %s) for general queries.", + filterPlatformNoFilter, filterPlatformUserWorkload, filterPlatformPlatform, + filterPlatformUserWorkload, filterPlatformNoFilter) schema.Properties["filterPlatform"].Default = toolsets.MustJSONMarshal(filterPlatformNoFilter) schema.Properties["filterPlatform"].Enum = []any{ filterPlatformNoFilter, @@ -125,8 +132,9 @@ func getDeploymentsForCVEInputSchema() *jsonschema.Schema { } schema.Properties["includeAffectedImages"].Description = - "Whether to include affected image names for each deployment.\n" + - "WARNING: This may significantly increase response time." + "Whether to include affected image names for each deployment. " + + "Only set to true when the query specifically asks for image names or image details. " + + "WARNING: This may significantly increase response time. Default: false." schema.Properties["includeAffectedImages"].Default = toolsets.MustJSONMarshal(false) schema.Properties["cursor"].Description = "Cursor for next page provided by server"