diff --git a/examples/nemo-agent-toolkit/.env.example b/examples/nemo-agent-toolkit/.env.example new file mode 100644 index 00000000..7ff7c925 --- /dev/null +++ b/examples/nemo-agent-toolkit/.env.example @@ -0,0 +1,7 @@ +# ─── Required ──────────────────────────────────────────────────────────────── +# NVIDIA cloud inference API key — get one at https://build.nvidia.com +NVIDIA_API_KEY=nvapi-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + +# ─── Optional ──────────────────────────────────────────────────────────────── +# Disable telemetry in non-interactive / CI environments (true/false) +NAT_TELEMETRY_ENABLED=false diff --git a/examples/nemo-agent-toolkit/.gitignore b/examples/nemo-agent-toolkit/.gitignore new file mode 100644 index 00000000..06de5d01 --- /dev/null +++ b/examples/nemo-agent-toolkit/.gitignore @@ -0,0 +1,5 @@ +.env +.venv/ +__pycache__/ +*.pyc +nat-start.log diff --git a/examples/nemo-agent-toolkit/.saturn/saturn.json b/examples/nemo-agent-toolkit/.saturn/saturn.json new file mode 100644 index 00000000..7956f856 --- /dev/null +++ b/examples/nemo-agent-toolkit/.saturn/saturn.json @@ -0,0 +1,26 @@ +{ + "name": "example-nemo-agent-toolkit", + "image_uri": "public.ecr.aws/saturncloud/saturn-python:2025.05.01", + "description": "NVIDIA NeMo Agent Toolkit — Research Assistant. An AI agent that uses Wikipedia search and NVIDIA NIM cloud inference to research and summarize any topic. Bring your own NVIDIA_API_KEY.", + "working_directory": "/home/jovyan/examples/examples/nemo-agent-toolkit", + "start_script": "bash start.sh", + "environment_variables": { + "NVIDIA_API_KEY": "", + "NAT_TELEMETRY_ENABLED": "false" + }, + "git_repositories": [ + { + "url": "https://github.com/saturncloud/examples", + "path": "/home/jovyan/examples" + } + ], + "jupyter_server": { + "disk_space": "10Gi", + "instance_type": "large", + "auto_shutoff": "1 hour", + "routes": [ + {"container_port": 8000, "visibility": "owner"} + ] + }, + "version": "2022.01.06" +} diff --git a/examples/nemo-agent-toolkit/README.md b/examples/nemo-agent-toolkit/README.md new file mode 100644 index 00000000..bf522b7f --- /dev/null +++ b/examples/nemo-agent-toolkit/README.md @@ -0,0 +1,208 @@ +# 🤖 NVIDIA NeMo Agent Toolkit — Research Assistant + +### **Overview** + +This template deploys an AI research agent powered by the [NVIDIA NeMo Agent Toolkit](https://github.com/NVIDIA/NeMo-Agent-Toolkit). Given any question or topic, the agent searches Wikipedia, reasons step-by-step using a **ReAct loop** (Reason + Act), and returns a structured answer — all backed by **NVIDIA NIM cloud inference**. No GPU required. + +* **Hardware:** CPU Large (2 cores, 16 GB RAM) — GPU supported but not required +* **Runtime:** NVIDIA NIM cloud API — bring your own `nvapi-...` key +* **Use Case:** Research automation, knowledge summarization, agentic reasoning demos + +--- + +### **Tech Stack** + +* **NVIDIA NeMo Agent Toolkit (`nvidia-nat`):** Orchestrates the ReAct agent loop, tool dispatch, and LLM calls. +* **NVIDIA NIM (`nvidia/nemotron-3-nano-30b-a3b`):** Cloud-hosted inference — no local GPU needed. +* **Wikipedia Search (`wiki_search`):** Built-in tool; no extra API key required. +* **LangChain:** Provides the tool and chain integration layer used by `nvidia-nat`. + +--- + +## 🪐 Using on Saturn Cloud + +### 1. Get an NVIDIA API key + +Get a free key at [build.nvidia.com](https://build.nvidia.com) → sign up → **API Keys** → **Generate Key**. Your key will start with `nvapi-`. + +### 2. Create the workspace from the template + +In Saturn Cloud, go to **New Resource → Workspace → Templates** and select **NeMo Agent Toolkit — Research Assistant**. + +Before clicking Start, add your NVIDIA API key: + +1. Open **Settings → Environment Variables** +2. Find `NVIDIA_API_KEY` in the list — it will be empty +3. Click the edit icon next to it and paste your `nvapi-...` key +4. Click **Save** + +If you started the workspace without setting the key, stop it, add the key following the steps above, then start it again. + +> **GPU users** — the template defaults to CPU Large since inference runs in the cloud via NVIDIA NIM. If you want to run on a GPU instance (e.g. to extend the agent with local model inference), go to **Settings → Hardware** and select a GPU instance type before starting. Everything else works the same. + +### 3. Start the workspace + +Click **Start**. Saturn Cloud will clone the repo and run `start.sh` automatically — this installs all dependencies and runs a demo query. The process takes about 3–4 minutes. You can watch it complete by opening **Logs** from the workspace panel. + +### 4. Open JupyterLab + +Once the workspace status shows **Running**, click the **JupyterLab** button to open the IDE. Open a terminal from **File → New → Terminal**. + +### 5. Run a query from the terminal + +```bash +cd /home/jovyan/examples/examples/nemo-agent-toolkit +source .venv/bin/activate +nat run --config_file workflow.yml --input "your question here" +``` + +You will see the agent's full reasoning — each Wikipedia search and every reasoning step — printed live in the terminal. + +### 6. Launch the Gradio chat UI + +```bash +source .venv/bin/activate +python app.py +``` + +Then go to **Settings → Routes** on the workspace and open the URL listed next to port **8000**. This opens the chat interface in your browser where you can have a full conversation with the agent. + +To stop the UI: + +```bash +pkill -f "app.py" +``` + +--- + +## 🛠️ Local Setup + +### 1. Set your NVIDIA API key + +```bash +cp .env.example .env +# Edit .env and set NVIDIA_API_KEY=nvapi-... +``` + +Get a free key at [build.nvidia.com](https://build.nvidia.com) → API Keys. + +### 2. Run the demo + +```bash +chmod +x start.sh test.sh +./start.sh +``` + +`start.sh` creates a `.venv`, installs `nvidia-nat` and its integrations, then runs a pre-set research query so you can see the agent working immediately. + +### 3. Verify your setup (optional) + +```bash +./test.sh +``` + +Checks Python version, `nat` CLI availability, `workflow.yml`, API key format, and live NVIDIA API connectivity. + +--- + +## 🏃 Run a custom query + +```bash +source .venv/bin/activate +nat run --config_file workflow.yml --input "your question here" +``` + +Examples: + +```bash +nat run --config_file workflow.yml --input "What is NVIDIA Hopper architecture and how does it differ from Ampere?" +nat run --config_file workflow.yml --input "Explain how transformer models work and list three key papers" +nat run --config_file workflow.yml --input "Research the history of autonomous vehicles and list 5 major milestones" +``` + +--- + +## 💬 Launch the chat UI + +```bash +source .venv/bin/activate +python app.py +``` + +Opens a Gradio chat interface on port 8000. Type questions and the agent will search Wikipedia and reason step-by-step in the background. + +**On Saturn Cloud** — the port 8000 route is pre-configured on the workspace. Once `app.py` is running, open the port 8000 URL from your workspace settings. + +To stop: + +```bash +pkill -f "app.py" +``` + +--- + +## ⚙️ Customise the workflow + +All configuration lives in `workflow.yml` — no Python required. + +**Change the model:** + +```yaml +llms: + nim_llm: + model_name: nvidia/llama-3.1-nemotron-70b-instruct # higher quality + # model_name: nvidia/llama-3.1-nemotron-nano-8b-v1 # faster / cheaper + # model_name: meta/llama-3.1-8b-instruct # widely available +``` + +**Get more search results:** + +```yaml +functions: + wikipedia_search: + max_results: 5 # default is 3 +``` + +**Reduce verbosity:** + +```yaml +workflow: + verbose: false +``` + +Browse all available NIM models at [build.nvidia.com](https://build.nvidia.com). + +--- + +## 🔧 Troubleshooting + +**`NVIDIA_API_KEY is not set`** + +```bash +cp .env.example .env # then paste your key +``` + +**`nat: command not found`** + +```bash +source .venv/bin/activate # activate the venv first +``` + +**`HTTP 401` from NVIDIA API** — key is invalid or expired; generate a new one at [build.nvidia.com](https://build.nvidia.com). + +**Agent loops without answering** — increase retries or switch to a more capable model: + +```yaml +workflow: + parse_agent_response_max_retries: 5 +``` + +**Model not available** — try `meta/llama-3.1-8b-instruct`, which is broadly available on free-tier keys. + +--- + +## 🔗 Resources + +* **NeMo Agent Toolkit docs:** [docs.nvidia.com/nemo/agent-toolkit](https://docs.nvidia.com/nemo/agent-toolkit/latest/) +* **NVIDIA NIM models:** [build.nvidia.com](https://build.nvidia.com) +* **Saturn Cloud:** [saturncloud.io](https://saturncloud.io/) diff --git a/examples/nemo-agent-toolkit/app.py b/examples/nemo-agent-toolkit/app.py new file mode 100644 index 00000000..e1e667ce --- /dev/null +++ b/examples/nemo-agent-toolkit/app.py @@ -0,0 +1,29 @@ +import gradio as gr +from langgraph.errors import GraphRecursionError +from nat.runtime.loader import load_workflow + + +async def predict(message, history): + try: + async with load_workflow("workflow.yml") as session: + async with session.run(message) as runner: + result = await runner.result() + return str(result) + except GraphRecursionError: + return ( + "I searched Wikipedia but couldn't find enough information to answer " + "this question within the search limit. Try rephrasing your question or " + "asking about a more specific topic." + ) + except Exception as e: + return f"Error: {str(e)}" + + +demo = gr.ChatInterface( + fn=predict, + title="NeMo Agent — Research Assistant", + description="Ask any question. The agent searches Wikipedia and reasons step-by-step.", +) + +if __name__ == "__main__": + demo.launch(server_name="0.0.0.0", server_port=8000) diff --git a/examples/nemo-agent-toolkit/start.sh b/examples/nemo-agent-toolkit/start.sh new file mode 100755 index 00000000..319c38a0 --- /dev/null +++ b/examples/nemo-agent-toolkit/start.sh @@ -0,0 +1,154 @@ +#!/usr/bin/env bash +# NeMo Agent Toolkit — Research Assistant demo +# Runs locally and as a Saturn Cloud start_script +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LOG_FILE="$SCRIPT_DIR/nat-start.log" + +log() { echo "[$(date '+%H:%M:%S')] $*" | tee -a "$LOG_FILE"; } +die() { log "ERROR: $*"; exit 1; } + +# ─── 1. Load environment ───────────────────────────────────────────────────── +if [[ -f "$SCRIPT_DIR/.env" ]]; then + set -o allexport + source "$SCRIPT_DIR/.env" + set +o allexport + log "Loaded .env" +fi + +# Disable telemetry in non-interactive environments +export NAT_TELEMETRY_ENABLED="${NAT_TELEMETRY_ENABLED:-false}" + +# ─── 2. Validate required vars ─────────────────────────────────────────────── +[[ -z "${NVIDIA_API_KEY:-}" ]] && die "NVIDIA_API_KEY is not set. Copy .env.example to .env and fill it in." +export NVIDIA_API_KEY + +# ─── 3. Check Python 3.11+ ─────────────────────────────────────────────────── +PYTHON_CMD="" +check_python() { + local python_cmd="" + for cmd in python3.13 python3.12 python3.11 python3 python; do + if command -v "$cmd" &>/dev/null; then + local ver + ver=$("$cmd" -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')" 2>/dev/null) + local major minor + major=$(echo "$ver" | cut -d. -f1) + minor=$(echo "$ver" | cut -d. -f2) + if [[ "$major" -eq 3 && "$minor" -ge 11 ]]; then + python_cmd="$cmd" + break + fi + fi + done + [[ -z "$python_cmd" ]] && die "Python 3.11, 3.12, or 3.13 is required. Install one and re-run." + log "Python $("$python_cmd" --version) — OK" + PYTHON_CMD="$python_cmd" +} + +# ─── 4. Set up virtual environment ─────────────────────────────────────────── +setup_venv() { + local python_cmd="$1" + local venv_dir="$SCRIPT_DIR/.venv" + + if [[ ! -d "$venv_dir" ]]; then + log "Creating virtual environment..." + "$python_cmd" -m venv "$venv_dir" &>>"$LOG_FILE" + fi + + # Activate + source "$venv_dir/bin/activate" + log "Virtual environment active" +} + +# ─── 5. Install nvidia-nat ─────────────────────────────────────────────────── +install_nat() { + if python -c "import nemo_agent_toolkit; import langchain_core; import langchain_nvidia_ai_endpoints; import nat.plugins.langchain; import nat.plugins.eval; import nat.plugins.opentelemetry; import gradio" &>/dev/null 2>&1; then + log "nvidia-nat and integrations already installed — OK" + apply_wikipedia_patch + apply_loader_patch + return + fi + log "Installing nvidia-nat..." + python -m pip install --quiet --upgrade nvidia-nat &>>"$LOG_FILE" \ + || die "nvidia-nat install failed — check $LOG_FILE" + + log "Installing langchain integrations..." + python -m pip install --quiet --upgrade langchain langchain-core langchain-community langchain-nvidia-ai-endpoints &>>"$LOG_FILE" \ + || die "langchain integrations install failed — check $LOG_FILE" + + log "Registering integration plugins..." + python -m pip install --quiet --upgrade --no-deps nvidia-nat-langchain nvidia-nat-eval nvidia-nat-opentelemetry &>>"$LOG_FILE" \ + || die "plugin registration failed — check $LOG_FILE" + + log "Installing telemetry dependencies..." + python -m pip install --quiet --upgrade opentelemetry-api opentelemetry-sdk "opentelemetry-exporter-otlp~=1.3" &>>"$LOG_FILE" \ + || die "telemetry dependencies install failed — check $LOG_FILE" + + log "Installing Gradio chat UI..." + python -m pip install --quiet --upgrade gradio &>>"$LOG_FILE" \ + || die "gradio install failed — check $LOG_FILE" + + log "nvidia-nat and integrations installed successfully" + apply_wikipedia_patch + apply_loader_patch +} + +apply_wikipedia_patch() { + log "Applying Wikipedia API User-Agent patch..." + python -c " +import nat.plugins.langchain.tools.wikipedia_search as ws +path = ws.__file__ +with open(path, 'r') as f: + content = f.read() +if 'wikipedia.set_user_agent' not in content: + patched = content.replace( + 'async def _wiki_search(question: str) -> str:', + 'async def _wiki_search(question: str) -> str:\n import wikipedia\n wikipedia.set_user_agent(\"SaturnCloudResearchBot/1.0 (contact@saturncloud.io)\")' + ) + with open(path, 'w') as f: + f.write(patched) +" &>>"$LOG_FILE" || log "WARNING: Failed to apply Wikipedia User-Agent patch" +} + +apply_loader_patch() { + log "Applying plugin loader traceback suppression patch..." + python -c " +import nat.runtime.loader as nl +path = nl.__file__ +with open(path, 'r') as f: + content = f.read() +target = ' except ImportError:\n logger.warning(\"Failed to import plugin \\'%s\\'\", entry_point.name, exc_info=True)' +replacement = ' except ImportError:\n logger.debug(\"Failed to import plugin \\'%s\\'\", entry_point.name, exc_info=True)\n logger.warning(\"Failed to import plugin \\'%s\\' (optional integration dependencies not installed)\", entry_point.name)' +if target in content: + patched = content.replace(target, replacement) + with open(path, 'w') as f: + f.write(patched) +" &>>"$LOG_FILE" || log "WARNING: Failed to apply loader patch" +} + +# ─── 6. Run the demo ───────────────────────────────────────────────────────── +run_demo() { + log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + log "Running Research Assistant demo..." + log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + + nat run \ + --config_file "$SCRIPT_DIR/workflow.yml" \ + --input "Research large language models and provide a timeline of five key milestones in their development, from early transformer models to recent advances." + + log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + log "Demo complete. To run your own query:" + log " nat run --config_file workflow.yml --input \"your question here\"" + log "" + log "To launch the chat UI:" + log " python app.py" + log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +} + +# ─── Main ──────────────────────────────────────────────────────────────────── +log "Starting NeMo Agent Toolkit — Research Assistant" +check_python +setup_venv "$PYTHON_CMD" +install_nat +run_demo diff --git a/examples/nemo-agent-toolkit/test.sh b/examples/nemo-agent-toolkit/test.sh new file mode 100755 index 00000000..bbdede38 --- /dev/null +++ b/examples/nemo-agent-toolkit/test.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash +# Smoke-tests the NeMo Agent Toolkit setup before running the demo +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PASS=0; FAIL=0 + +ok() { echo " [PASS] $*"; PASS=$((PASS + 1)); } +fail() { echo " [FAIL] $*"; FAIL=$((FAIL + 1)); } +header() { echo; echo "── $* ──────────────────────────────────"; } + +# Load env +if [[ -f "$SCRIPT_DIR/.env" ]]; then + set -o allexport; source "$SCRIPT_DIR/.env"; set +o allexport +fi + +# ─── Python ─────────────────────────────────────────────────────────────────── +header "Python" + +PYTHON_CMD="" +for cmd in python3.13 python3.12 python3.11 python3 python; do + if command -v "$cmd" &>/dev/null; then + ver=$("$cmd" -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')" 2>/dev/null) + major=$(echo "$ver" | cut -d. -f1) + minor=$(echo "$ver" | cut -d. -f2) + if [[ "$major" -eq 3 && "$minor" -ge 11 ]]; then + PYTHON_CMD="$cmd" + break + fi + fi +done + +[[ -n "$PYTHON_CMD" ]] \ + && ok "Python $("$PYTHON_CMD" --version) found" \ + || fail "Python 3.11, 3.12, or 3.13 not found — install one first" + +# ─── nvidia-nat ─────────────────────────────────────────────────────────────── +header "nvidia-nat" + +VENV_NAT=".venv/bin/nat" +if [[ -f "$SCRIPT_DIR/$VENV_NAT" ]]; then + ok "nat CLI found in .venv" + "$SCRIPT_DIR/$VENV_NAT" --version &>/dev/null \ + && ok "nat --version OK" \ + || fail "nat --version failed" +else + fail "nat CLI not found — run ./start.sh first to install" +fi + +# ─── Workflow config ────────────────────────────────────────────────────────── +header "Workflow config" + +[[ -f "$SCRIPT_DIR/workflow.yml" ]] \ + && ok "workflow.yml present" \ + || fail "workflow.yml missing" + +# ─── Environment variables ──────────────────────────────────────────────────── +header "Environment variables" + +[[ -n "${NVIDIA_API_KEY:-}" ]] \ + && ok "NVIDIA_API_KEY is set" \ + || fail "NVIDIA_API_KEY not set — copy .env.example to .env and fill it in" + +[[ "${NVIDIA_API_KEY:-}" == nvapi-* ]] \ + && ok "NVIDIA_API_KEY format looks correct (nvapi-...)" \ + || fail "NVIDIA_API_KEY doesn't start with 'nvapi-' — check your key" + +# ─── NVIDIA API connectivity ────────────────────────────────────────────────── +header "NVIDIA API connectivity" + +if [[ -n "${NVIDIA_API_KEY:-}" ]]; then + http_code=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer $NVIDIA_API_KEY" \ + "https://integrate.api.nvidia.com/v1/models") + + case "$http_code" in + 200) ok "NVIDIA API reachable and key is valid (HTTP 200)" ;; + 401) fail "NVIDIA API key invalid or expired (HTTP 401)" ;; + 403) fail "NVIDIA API key lacks permissions (HTTP 403)" ;; + *) fail "NVIDIA API returned unexpected status: HTTP $http_code" ;; + esac +else + fail "Skipping API test — NVIDIA_API_KEY not set" +fi + +# ─── Summary ────────────────────────────────────────────────────────────────── +echo +echo "══════════════════════════════════════" +echo " Results: $PASS passed, $FAIL failed" +echo "══════════════════════════════════════" + +[[ $FAIL -eq 0 ]] && echo " Ready to run: ./start.sh" || echo " Fix the failures above, then re-run test.sh" +echo +[[ $FAIL -eq 0 ]] diff --git a/examples/nemo-agent-toolkit/workflow.yml b/examples/nemo-agent-toolkit/workflow.yml new file mode 100644 index 00000000..e6a8b9ac --- /dev/null +++ b/examples/nemo-agent-toolkit/workflow.yml @@ -0,0 +1,24 @@ +functions: + # Wikipedia search — no extra API key needed, works out of the box + wikipedia_search: + _type: wiki_search + max_results: 3 + +llms: + nim_llm: + _type: nim + # Change this to any model available at build.nvidia.com + model_name: nvidia/nemotron-3-nano-30b-a3b + temperature: 0.0 + chat_template_kwargs: + enable_thinking: false + +workflow: + _type: react_agent + tool_names: [wikipedia_search] + llm_name: nim_llm + verbose: true + # Retry up to 3 times on parse errors + parse_agent_response_max_retries: 3 + # Stop after 6 tool calls instead of looping indefinitely + max_tool_calls: 6