Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ export LLM_MODEL=<a model that supports completions>
export EMBED_API_KEY=<YOUR KEY>
export EMBED_MODEL=<a text embedding model>
export EMBED_URL="<URL BASE>/v1/embeddings"
export PHENOTYPE_INDEX_DIR="<ABSOLUTE PATH TO phenotype_index>"
export STUDY_AGENT_MCP_CWD="<REPO ROOT (optional, for stable relative paths)>"
export STUDY_AGENT_HOST=127.0.0.1
export STUDY_AGENT_PORT=8765
export STUDY_AGENT_MCP_COMMAND=study-agent-mcp
Expand All @@ -126,6 +128,8 @@ study-agent-acp
```
Note: Prefer stopping the ACP process (SIGINT/SIGTERM) so the MCP subprocess is closed cleanly. Killing the MCP directly can leave defunct processes.
Note: ACP uses a threaded HTTP server by default. Set `STUDY_AGENT_THREADING=0` to disable threading.
Note: `/health` includes MCP preflight details under `mcp_index` when MCP is configured.
Troubleshooting: run `python mcp_server/scripts/mcp_probe.py` to verify index paths and search without ACP.

2. Run `phenotype_recommendation`
```bash
Expand Down
12 changes: 12 additions & 0 deletions acp_agent/study_agent_acp/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,18 @@ def run_phenotype_recommendation_flow(
}

full = search_result.get("full_result") or {}
if full.get("error"):
payload = {
"status": "error",
"error": full.get("error"),
"details": full,
}
if full.get("error") == "phenotype_index_unavailable":
payload["hint"] = (
"Set PHENOTYPE_INDEX_DIR to the phenotype_index directory "
"(prefer an absolute path) and verify catalog.jsonl exists."
)
return payload
if "results" not in full and full.get("content"):
return {
"status": "error",
Expand Down
20 changes: 18 additions & 2 deletions acp_agent/study_agent_acp/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ def do_GET(self) -> None:
payload = {"status": "ok"}
if self.mcp_client is not None:
payload["mcp"] = self.mcp_client.health_check()
if payload["mcp"].get("ok"):
try:
payload["mcp_index"] = self.mcp_client.call_tool("phenotype_index_status", {})
except Exception as exc:
payload["mcp_index"] = {"error": str(exc)}
_write_json(self, 200, payload)
return
if self.path == "/tools":
Expand Down Expand Up @@ -377,11 +382,12 @@ def _build_agent(
mcp_command: Optional[str],
mcp_args: Optional[list[str]],
allow_core_fallback: bool,
mcp_cwd: Optional[str],
) -> tuple[StudyAgent, Optional[StdioMCPClient]]:
mcp_client = None
if mcp_command:
mcp_client = StdioMCPClient(
StdioMCPClientConfig(command=mcp_command, args=mcp_args or []),
StdioMCPClientConfig(command=mcp_command, args=mcp_args or [], cwd=mcp_cwd),
)
return StudyAgent(mcp_client=mcp_client, allow_core_fallback=allow_core_fallback), mcp_client

Expand Down Expand Up @@ -450,9 +456,19 @@ def main(host: str = "127.0.0.1", port: int = 8765) -> None:
allow_core_fallback = os.getenv("STUDY_AGENT_ALLOW_CORE_FALLBACK", "1") == "1"
debug = os.getenv("STUDY_AGENT_DEBUG", "0") == "1"
threaded = os.getenv("STUDY_AGENT_THREADING", "1") == "1"
mcp_cwd = os.getenv("STUDY_AGENT_MCP_CWD") or os.getcwd()

if mcp_command:
if os.getenv("PHENOTYPE_INDEX_DIR") is None:
print("ACP WARN > PHENOTYPE_INDEX_DIR not set; MCP will use its default.")
if os.getenv("EMBED_URL") is None:
print("ACP WARN > EMBED_URL not set; MCP will use its default.")
if os.getenv("EMBED_MODEL") is None:
print("ACP WARN > EMBED_MODEL not set; MCP will use its default.")
print(f"ACP INFO > MCP cwd={mcp_cwd}")

args_list = [arg for arg in mcp_args.split(" ") if arg]
agent, mcp_client = _build_agent(mcp_command, args_list, allow_core_fallback)
agent, mcp_client = _build_agent(mcp_command, args_list, allow_core_fallback, mcp_cwd)

class Handler(ACPRequestHandler):
agent = None
Expand Down
3 changes: 2 additions & 1 deletion docs/PHENOTYPE_INDEXING.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@ The output directory will contain:
**Notes**
1. If FAISS/numpy are not installed, omit `--build-dense` or install them first.
2. Indexing is safe to run repeatedly; it rebuilds the directory contents.
3. Set `PHENOTYPE_INDEX_DIR` in your MCP environment to point at the output directory.
3. Set `PHENOTYPE_INDEX_DIR` in your MCP environment to point at the output directory (prefer an absolute path).
4. If `PHENOTYPE_INDEX_DIR` is not set, MCP falls back to the repo-relative default `data/phenotype_index`.
5 changes: 4 additions & 1 deletion docs/PHENOTYPE_RECOMMENDATION_DESIGN.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ Each phenotype is stored as a compact JSON document (one line per document):
9. `source_meta`

**Index Directory Layout**
Default root is `PHENOTYPE_INDEX_DIR` or `data/phenotype_index`.
Default root is `PHENOTYPE_INDEX_DIR` or repo-relative `data/phenotype_index` (resolved from the MCP package location).
1. `catalog.jsonl` (compact phenotype docs)
2. `sparse_index.pkl` (pure-Python BM25-style index)
3. `dense.index` (FAISS index)
Expand Down Expand Up @@ -70,6 +70,7 @@ Default root is `PHENOTYPE_INDEX_DIR` or `data/phenotype_index`.
3. `phenotype_fetch_definition(cohortId, truncate=true)`
4. `phenotype_list_similar(cohortId, top_k=10)`
5. `phenotype_prompt_bundle(task)` (returns overview/spec/output_schema)
6. `phenotype_index_status()` (returns index path + file existence for preflight checks)

**ACP Orchestration**
1. User submits study intent to ACP.
Expand Down Expand Up @@ -118,6 +119,8 @@ Candidate selection:
17. `STUDY_AGENT_THREADING` (default `1`) uses a threaded HTTP server for ACP. Set to `0` to disable.
18. `STUDY_AGENT_HOST` (default `127.0.0.1`)
19. `STUDY_AGENT_PORT` (default `8765`)
20. `STUDY_AGENT_MCP_CWD` (optional) working directory passed to MCP subprocesses. Use for stable relative paths.
21. `MCP_LOG_LEVEL` (default `INFO`) controls MCP stderr logging (`DEBUG|INFO|WARN|ERROR|OFF`).

**Risks and Mitigations**
1. Missing dependencies for FAISS
Expand Down
42 changes: 38 additions & 4 deletions docs/TESTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,14 @@ Start ACP with an MCP tool server:
STUDY_AGENT_MCP_COMMAND=study-agent-mcp STUDY_AGENT_MCP_ARGS="" study-agent-acp
```

Recommended MCP environment (use absolute paths for stability):

```bash
export PHENOTYPE_INDEX_DIR="/absolute/path/to/phenotype_index"
export EMBED_URL="http://localhost:3000/ollama/api/embed"
export EMBED_MODEL="qwen3-embedding:4b"
```

Optional host/port override:

```bash
Expand All @@ -160,6 +168,12 @@ STUDY_AGENT_HOST=0.0.0.0 STUDY_AGENT_PORT=9000 study-agent-acp

Then run the same curl commands as above.

Health check now includes MCP index preflight details under `mcp_index`:

```bash
curl -s http://127.0.0.1:8765/health
```

## ACP phenotype flow (MCP + LLM)

Ensure MCP is running and set LLM env vars for an OpenAI-compatible endpoint:
Expand Down Expand Up @@ -289,16 +303,36 @@ doit smoke_phenotype_validation_review_flow

## MCP smoke test (import)

## Service listing
```bash
python -c "import study_agent_mcp; print('mcp import ok')"
```

Use the `/services` endpoint (or the helper task) to list ACP services:
## MCP probe (index + search)

This checks index paths and runs a simple search, without ACP.

```bash
doit list_services
python mcp_server/scripts/mcp_probe.py --query "acute GI bleed in hospitalized patients" --top-k 5
```

PowerShell (Windows) equivalent:

```powershell
python mcp_server/scripts/mcp_probe.py --query "acute GI bleed in hospitalized patients" --top-k 5
```

Print and sort environment variables (PowerShell):

```powershell
Get-ChildItem Env: | Sort-Object Name
```

## Service listing

Use the `/services` endpoint (or the helper task) to list ACP services:

```bash
python -c "import study_agent_mcp; print('mcp import ok')"
doit list_services
```

## Stop server
Expand Down
1 change: 1 addition & 0 deletions mcp_server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Phenotype retrieval + metadata:
- `phenotype_fetch_definition`
- `phenotype_list_similar`
- `phenotype_reindex`
- `phenotype_index_status`
- `phenotype_prompt_bundle`
- `phenotype_recommendation_advice`

Expand Down
46 changes: 46 additions & 0 deletions mcp_server/scripts/mcp_probe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from __future__ import annotations

import argparse
import json
import sys
import time

from study_agent_mcp.retrieval import get_default_index, index_status


def main() -> int:
parser = argparse.ArgumentParser(description="Probe MCP phenotype index + search path.")
parser.add_argument("--query", default="acute GI bleed in hospitalized patients")
parser.add_argument("--top-k", type=int, default=5)
args = parser.parse_args()

status = index_status()
print("INDEX STATUS:")
print(json.dumps(status, indent=2))

if not status.get("exists"):
print("ERROR: index directory missing.", file=sys.stderr)
return 1

try:
t0 = time.time()
index = get_default_index()
print(f"INDEX LOAD OK: {len(index.catalog)} docs in {time.time() - t0:.2f}s")
except Exception as exc:
print(f"ERROR: index load failed: {exc}", file=sys.stderr)
return 2

try:
t1 = time.time()
results = index.search(args.query, top_k=args.top_k)
print(f"SEARCH OK: {len(results)} results in {time.time() - t1:.2f}s")
print(json.dumps(results[: args.top_k], indent=2))
except Exception as exc:
print(f"ERROR: search failed: {exc}", file=sys.stderr)
return 3

return 0


if __name__ == "__main__":
raise SystemExit(main())
4 changes: 2 additions & 2 deletions mcp_server/study_agent_mcp/retrieval/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import annotations

from .index import PhenotypeIndex, get_default_index
from .index import PhenotypeIndex, get_default_index, index_status

__all__ = ["PhenotypeIndex", "get_default_index"]
__all__ = ["PhenotypeIndex", "get_default_index", "index_status"]
41 changes: 39 additions & 2 deletions mcp_server/study_agent_mcp/retrieval/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,13 +287,50 @@ def _sparse_search(self, query: str, top_k: int) -> Dict[int, float]:
_DEFAULT_INDEX: Optional[PhenotypeIndex] = None


def _default_index_dir() -> tuple[str, str]:
env_dir = os.getenv("PHENOTYPE_INDEX_DIR")
if env_dir:
return os.path.abspath(env_dir), "env"
repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
return os.path.join(repo_root, "data", "phenotype_index"), "default"


def index_status(index_dir: Optional[str] = None) -> Dict[str, Any]:
resolved_dir, source = _default_index_dir()
if index_dir:
resolved_dir = os.path.abspath(index_dir)
source = "explicit"
paths = _index_paths(resolved_dir)
files = {}
for key, path in paths.items():
exists = os.path.exists(path)
size = None
if exists and os.path.isfile(path):
try:
size = os.path.getsize(path)
except OSError:
size = None
files[key] = {"path": path, "exists": exists, "size": size}
return {
"index_dir": resolved_dir,
"index_dir_source": source,
"exists": os.path.isdir(resolved_dir),
"files": files,
}


def get_default_index() -> PhenotypeIndex:
global _DEFAULT_INDEX
if _DEFAULT_INDEX is None:
index_dir = os.getenv("PHENOTYPE_INDEX_DIR", "data/phenotype_index")
status = index_status()
if not status["exists"]:
raise RuntimeError(f"Phenotype index directory not found: {status['index_dir']}")
catalog_info = status["files"].get("catalog") or {}
if not catalog_info.get("exists"):
raise RuntimeError(f"Phenotype catalog not found: {catalog_info.get('path')}")
embed_url = os.getenv("EMBED_URL", "http://localhost:3000/ollama/api/embed")
embed_model = os.getenv("EMBED_MODEL", "qwen3-embedding:4b")
api_key = os.getenv("EMBED_API_KEY")
embedding_client = EmbeddingClient(url=embed_url, model=embed_model, api_key=api_key)
_DEFAULT_INDEX = PhenotypeIndex(index_dir=index_dir, embedding_client=embedding_client).load()
_DEFAULT_INDEX = PhenotypeIndex(index_dir=status["index_dir"], embedding_client=embedding_client).load()
return _DEFAULT_INDEX
32 changes: 32 additions & 0 deletions mcp_server/study_agent_mcp/server.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,47 @@
import os
import sys

from mcp.server.fastmcp import FastMCP

from study_agent_mcp.tools import register_all
from study_agent_mcp.retrieval import index_status

mcp = FastMCP("study-agent")
register_all(mcp)

def _log(level: str, message: str) -> None:
configured = os.getenv("MCP_LOG_LEVEL", "INFO").upper()
levels = {"DEBUG": 10, "INFO": 20, "WARN": 30, "WARNING": 30, "ERROR": 40, "OFF": 100}
if levels.get(level, 20) < levels.get(configured, 20):
return
if levels.get(configured, 20) >= levels["OFF"]:
return
print(f"MCP {level} > {message}", file=sys.stderr)


def _preflight() -> None:
status = index_status()
if os.getenv("PHENOTYPE_INDEX_DIR") is None:
_log(
"WARN",
f"PHENOTYPE_INDEX_DIR not set; using default {status['index_dir']}",
)
if not status["exists"]:
_log("ERROR", f"Phenotype index directory missing: {status['index_dir']}")
catalog = status["files"].get("catalog") or {}
if not catalog.get("exists"):
_log("ERROR", f"Phenotype catalog missing: {catalog.get('path')}")
embed_url = os.getenv("EMBED_URL")
embed_model = os.getenv("EMBED_MODEL")
if not embed_url:
_log("WARN", "EMBED_URL not set; default OpenWebUI embed endpoint will be used.")
if not embed_model:
_log("WARN", "EMBED_MODEL not set; default embedding model will be used.")


def main() -> None:
transport = os.getenv("MCP_TRANSPORT", "stdio").lower()
_preflight()

if transport in ("sse", "http"):
host = os.getenv("MCP_HOST", "0.0.0.0")
Expand Down
1 change: 1 addition & 0 deletions mcp_server/study_agent_mcp/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"study_agent_mcp.tools.phenotype_fetch_definition",
"study_agent_mcp.tools.phenotype_list_similar",
"study_agent_mcp.tools.phenotype_reindex",
"study_agent_mcp.tools.phenotype_index_status",
"study_agent_mcp.tools.phenotype_prompt_bundle",
"study_agent_mcp.tools.phenotype_recommendation_advice",
"study_agent_mcp.tools.lint_prompt_bundle",
Expand Down
20 changes: 20 additions & 0 deletions mcp_server/study_agent_mcp/tools/phenotype_index_status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from __future__ import annotations

import os
from typing import Any, Dict

from study_agent_mcp.retrieval import index_status

from ._common import with_meta


def register(mcp: object) -> None:
@mcp.tool(name="phenotype_index_status")
def phenotype_index_status_tool() -> Dict[str, Any]:
status = index_status()
status["embed_url"] = os.getenv("EMBED_URL", "http://localhost:3000/ollama/api/embed")
status["embed_model"] = os.getenv("EMBED_MODEL", "qwen3-embedding:4b")
status["embed_api_key_set"] = os.getenv("EMBED_API_KEY") is not None
return with_meta(status, "phenotype_index_status")

return None
Loading