From f60ff7d738787c4e21d0e0d0cedaa1f3f12612ba Mon Sep 17 00:00:00 2001
From: Olusegun Durojaye <durojaye@saturncloud.io>
Date: Sun, 17 May 2026 23:25:23 +0100
Subject: [PATCH 01/11] Add NVIDIA NeMo Agent Toolkit research assistant
 example CPU-only workspace template using nvidia-nat and NVIDIA NIM cloud
 inference. ReAct agent with Wikipedia search; bring-your-own NVIDIA_API_KEY.

---
 examples/nemo-agent-toolkit/.env.example      |   7 +
 examples/nemo-agent-toolkit/.gitignore        |   5 +
 .../nemo-agent-toolkit/.saturn/saturn.json    |  25 +++
 examples/nemo-agent-toolkit/README.md         | 150 ++++++++++++++++++
 examples/nemo-agent-toolkit/start.sh          | 150 ++++++++++++++++++
 examples/nemo-agent-toolkit/test.sh           |  94 +++++++++++
 examples/nemo-agent-toolkit/workflow.yml      |  22 +++
 7 files changed, 453 insertions(+)
 create mode 100644 examples/nemo-agent-toolkit/.env.example
 create mode 100644 examples/nemo-agent-toolkit/.gitignore
 create mode 100644 examples/nemo-agent-toolkit/.saturn/saturn.json
 create mode 100644 examples/nemo-agent-toolkit/README.md
 create mode 100755 examples/nemo-agent-toolkit/start.sh
 create mode 100755 examples/nemo-agent-toolkit/test.sh
 create mode 100644 examples/nemo-agent-toolkit/workflow.yml

diff --git a/examples/nemo-agent-toolkit/.env.example b/examples/nemo-agent-toolkit/.env.example
new file mode 100644
index 00000000..7ff7c925
--- /dev/null
+++ b/examples/nemo-agent-toolkit/.env.example
@@ -0,0 +1,7 @@
+# ─── Required ────────────────────────────────────────────────────────────────
+# NVIDIA cloud inference API key — get one at https://build.nvidia.com
+NVIDIA_API_KEY=nvapi-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+
+# ─── Optional ────────────────────────────────────────────────────────────────
+# Disable telemetry in non-interactive / CI environments (true/false)
+NAT_TELEMETRY_ENABLED=false
diff --git a/examples/nemo-agent-toolkit/.gitignore b/examples/nemo-agent-toolkit/.gitignore
new file mode 100644
index 00000000..06de5d01
--- /dev/null
+++ b/examples/nemo-agent-toolkit/.gitignore
@@ -0,0 +1,5 @@
+.env
+.venv/
+__pycache__/
+*.pyc
+nat-start.log
diff --git a/examples/nemo-agent-toolkit/.saturn/saturn.json b/examples/nemo-agent-toolkit/.saturn/saturn.json
new file mode 100644
index 00000000..9c1eab9b
--- /dev/null
+++ b/examples/nemo-agent-toolkit/.saturn/saturn.json
@@ -0,0 +1,25 @@
+{
+  "name": "example-nemo-agent-toolkit",
+  "image_uri": "public.ecr.aws/saturncloud/saturn-python:2025.05.01",
+  "description": "NVIDIA NeMo Agent Toolkit — Research Assistant. An AI agent that uses Wikipedia search and NVIDIA NIM cloud inference to research and summarize any topic. Bring your own NVIDIA_API_KEY.",
+  "working_directory": "/home/jovyan/examples/examples/nemo-agent-toolkit",
+  "start_script": "bash start.sh",
+  "environment_variables": {
+    "NVIDIA_API_KEY": "",
+    "NAT_TELEMETRY_ENABLED": "false"
+  },
+  "git_repositories": [
+    {
+      "url": "https://github.com/saturncloud/examples",
+      "path": "/home/jovyan/examples",
+      "reference": "nemo-agent-toolkit",
+      "reference_type": "branch"
+    }
+  ],
+  "jupyter_server": {
+    "disk_space": "10Gi",
+    "instance_type": "large",
+    "auto_shutoff": "1 hour"
+  },
+  "version": "2022.01.06"
+}
diff --git a/examples/nemo-agent-toolkit/README.md b/examples/nemo-agent-toolkit/README.md
new file mode 100644
index 00000000..4ed93e2e
--- /dev/null
+++ b/examples/nemo-agent-toolkit/README.md
@@ -0,0 +1,150 @@
+# 🤖 NVIDIA NeMo Agent Toolkit — Research Assistant
+
+### **Overview**
+
+This template deploys an AI research agent powered by the [NVIDIA NeMo Agent Toolkit](https://github.com/NVIDIA/NeMo-Agent-Toolkit). Given any question or topic, the agent searches Wikipedia, reasons step-by-step using a **ReAct loop** (Reason + Act), and returns a structured answer — all backed by **NVIDIA NIM cloud inference**. No GPU required.
+
+* **Hardware:** CPU Large (2 cores, 16 GB RAM)
+* **Runtime:** NVIDIA NIM cloud API — bring your own `nvapi-...` key
+* **Use Case:** Research automation, knowledge summarization, agentic reasoning demos
+
+---
+
+### **Tech Stack**
+
+* **NVIDIA NeMo Agent Toolkit (`nvidia-nat`):** Orchestrates the ReAct agent loop, tool dispatch, and LLM calls.
+* **NVIDIA NIM (`nvidia/nemotron-3-nano-30b-a3b`):** Cloud-hosted inference — no local GPU needed.
+* **Wikipedia Search (`wiki_search`):** Built-in tool; no extra API key required.
+* **LangChain:** Provides the tool and chain integration layer used by `nvidia-nat`.
+
+---
+
+## 🛠️ Local Setup
+
+### 1. Set your NVIDIA API key
+
+```bash
+cp .env.example .env
+# Edit .env and set NVIDIA_API_KEY=nvapi-...
+```
+
+Get a free key at [build.nvidia.com](https://build.nvidia.com) → API Keys.
+
+### 2. Run the demo
+
+```bash
+chmod +x start.sh test.sh
+./start.sh
+```
+
+`start.sh` creates a `.venv`, installs `nvidia-nat` and its integrations, then runs a pre-set research query so you can see the agent working immediately.
+
+### 3. Verify your setup (optional)
+
+```bash
+./test.sh
+```
+
+Checks Python version, `nat` CLI availability, `workflow.yml`, API key format, and live NVIDIA API connectivity.
+
+---
+
+## 🏃 Run a custom query
+
+```bash
+source .venv/bin/activate
+nat run --config_file workflow.yml --input "your question here"
+```
+
+Examples:
+
+```bash
+nat run --config_file workflow.yml --input "What is NVIDIA Hopper architecture and how does it differ from Ampere?"
+nat run --config_file workflow.yml --input "Explain how transformer models work and list three key papers"
+nat run --config_file workflow.yml --input "Research the history of autonomous vehicles and list 5 major milestones"
+```
+
+---
+
+## 💬 Launch the chat UI
+
+```bash
+source .venv/bin/activate
+nat serve --config_file workflow.yml
+```
+
+Opens the NeMo Agent Toolkit built-in chat interface at `http://localhost:8000`. Type questions, watch the agent's step-by-step reasoning, and debug the workflow interactively.
+
+To stop:
+
+```bash
+pkill -f "nat serve"
+```
+
+---
+
+## ⚙️ Customise the workflow
+
+All configuration lives in `workflow.yml` — no Python required.
+
+**Change the model:**
+
+```yaml
+llms:
+  nim_llm:
+    model_name: nvidia/llama-3.1-nemotron-70b-instruct   # higher quality
+    # model_name: nvidia/llama-3.1-nemotron-nano-8b-v1    # faster / cheaper
+    # model_name: meta/llama-3.1-8b-instruct              # widely available
+```
+
+**Get more search results:**
+
+```yaml
+functions:
+  wikipedia_search:
+    max_results: 5   # default is 3
+```
+
+**Reduce verbosity:**
+
+```yaml
+workflow:
+  verbose: false
+```
+
+Browse all available NIM models at [build.nvidia.com](https://build.nvidia.com).
+
+---
+
+## 🔧 Troubleshooting
+
+**`NVIDIA_API_KEY is not set`**
+
+```bash
+cp .env.example .env   # then paste your key
+```
+
+**`nat: command not found`**
+
+```bash
+source .venv/bin/activate   # activate the venv first
+```
+
+**`HTTP 401` from NVIDIA API** — key is invalid or expired; generate a new one at [build.nvidia.com](https://build.nvidia.com).
+
+**Agent loops without answering** — increase retries or switch to a more capable model:
+
+```yaml
+workflow:
+  parse_agent_response_max_retries: 5
+```
+
+**Model not available** — try `meta/llama-3.1-8b-instruct`, which is broadly available on free-tier keys.
+
+---
+
+## 🔗 Resources
+
+* **NeMo Agent Toolkit docs:** [docs.nvidia.com/nemo/agent-toolkit](https://docs.nvidia.com/nemo/agent-toolkit/latest/)
+* **NVIDIA NIM models:** [build.nvidia.com](https://build.nvidia.com)
+* **Saturn Cloud:** [saturncloud.io](https://saturncloud.io/)
diff --git a/examples/nemo-agent-toolkit/start.sh b/examples/nemo-agent-toolkit/start.sh
new file mode 100755
index 00000000..3c6a6c9a
--- /dev/null
+++ b/examples/nemo-agent-toolkit/start.sh
@@ -0,0 +1,150 @@
+#!/usr/bin/env bash
+# NeMo Agent Toolkit — Research Assistant demo
+# Runs locally and as a Saturn Cloud start_script
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+LOG_FILE="$SCRIPT_DIR/nat-start.log"
+
+log() { echo "[$(date '+%H:%M:%S')] $*" | tee -a "$LOG_FILE"; }
+die() { log "ERROR: $*"; exit 1; }
+
+# ─── 1. Load environment ─────────────────────────────────────────────────────
+if [[ -f "$SCRIPT_DIR/.env" ]]; then
+    set -o allexport
+    source "$SCRIPT_DIR/.env"
+    set +o allexport
+    log "Loaded .env"
+fi
+
+# Disable telemetry in non-interactive environments
+export NAT_TELEMETRY_ENABLED="${NAT_TELEMETRY_ENABLED:-false}"
+
+# ─── 2. Validate required vars ───────────────────────────────────────────────
+[[ -z "${NVIDIA_API_KEY:-}" ]] && die "NVIDIA_API_KEY is not set. Copy .env.example to .env and fill it in."
+export NVIDIA_API_KEY
+
+# ─── 3. Check Python 3.11+ ───────────────────────────────────────────────────
+PYTHON_CMD=""
+check_python() {
+    local python_cmd=""
+    for cmd in python3.13 python3.12 python3.11 python3 python; do
+        if command -v "$cmd" &>/dev/null; then
+            local ver
+            ver=$("$cmd" -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')" 2>/dev/null)
+            local major minor
+            major=$(echo "$ver" | cut -d. -f1)
+            minor=$(echo "$ver" | cut -d. -f2)
+            if [[ "$major" -eq 3 && "$minor" -ge 11 ]]; then
+                python_cmd="$cmd"
+                break
+            fi
+        fi
+    done
+    [[ -z "$python_cmd" ]] && die "Python 3.11, 3.12, or 3.13 is required. Install one and re-run."
+    log "Python $("$python_cmd" --version) — OK"
+    PYTHON_CMD="$python_cmd"
+}
+
+# ─── 4. Set up virtual environment ───────────────────────────────────────────
+setup_venv() {
+    local python_cmd="$1"
+    local venv_dir="$SCRIPT_DIR/.venv"
+
+    if [[ ! -d "$venv_dir" ]]; then
+        log "Creating virtual environment..."
+        "$python_cmd" -m venv "$venv_dir" &>>"$LOG_FILE"
+    fi
+
+    # Activate
+    source "$venv_dir/bin/activate"
+    log "Virtual environment active"
+}
+
+# ─── 5. Install nvidia-nat ───────────────────────────────────────────────────
+install_nat() {
+    if python -c "import nemo_agent_toolkit; import langchain_core; import langchain_nvidia_ai_endpoints; import nat.plugins.langchain; import nat.plugins.eval; import nat.plugins.opentelemetry" &>/dev/null 2>&1; then
+        log "nvidia-nat and integrations already installed — OK"
+        apply_wikipedia_patch
+        apply_loader_patch
+        return
+    fi
+    log "Installing nvidia-nat..."
+    python -m pip install --quiet --upgrade nvidia-nat &>>"$LOG_FILE" \
+        || die "nvidia-nat install failed — check $LOG_FILE"
+
+    log "Installing langchain integrations..."
+    python -m pip install --quiet --upgrade langchain langchain-core langchain-community langchain-nvidia-ai-endpoints &>>"$LOG_FILE" \
+        || die "langchain integrations install failed — check $LOG_FILE"
+
+    log "Registering integration plugins..."
+    python -m pip install --quiet --upgrade --no-deps nvidia-nat-langchain nvidia-nat-eval nvidia-nat-opentelemetry &>>"$LOG_FILE" \
+        || die "plugin registration failed — check $LOG_FILE"
+
+    log "Installing telemetry dependencies..."
+    python -m pip install --quiet --upgrade opentelemetry-api opentelemetry-sdk &>>"$LOG_FILE" \
+        || die "telemetry dependencies install failed — check $LOG_FILE"
+
+    log "nvidia-nat and integrations installed successfully"
+    apply_wikipedia_patch
+    apply_loader_patch
+}
+
+apply_wikipedia_patch() {
+    log "Applying Wikipedia API User-Agent patch..."
+    python -c "
+import nat.plugins.langchain.tools.wikipedia_search as ws
+path = ws.__file__
+with open(path, 'r') as f:
+    content = f.read()
+if 'wikipedia.set_user_agent' not in content:
+    patched = content.replace(
+        'async def _wiki_search(question: str) -> str:',
+        'async def _wiki_search(question: str) -> str:\n        import wikipedia\n        wikipedia.set_user_agent(\"SaturnCloudResearchBot/1.0 (contact@saturncloud.io)\")'
+    )
+    with open(path, 'w') as f:
+        f.write(patched)
+" &>>"$LOG_FILE" || log "WARNING: Failed to apply Wikipedia User-Agent patch"
+}
+
+apply_loader_patch() {
+    log "Applying plugin loader traceback suppression patch..."
+    python -c "
+import nat.runtime.loader as nl
+path = nl.__file__
+with open(path, 'r') as f:
+    content = f.read()
+target = '            except ImportError:\n                logger.warning(\"Failed to import plugin \\'%s\\'\", entry_point.name, exc_info=True)'
+replacement = '            except ImportError:\n                logger.debug(\"Failed to import plugin \\'%s\\'\", entry_point.name, exc_info=True)\n                logger.warning(\"Failed to import plugin \\'%s\\' (optional integration dependencies not installed)\", entry_point.name)'
+if target in content:
+    patched = content.replace(target, replacement)
+    with open(path, 'w') as f:
+        f.write(patched)
+" &>>"$LOG_FILE" || log "WARNING: Failed to apply loader patch"
+}
+
+# ─── 6. Run the demo ─────────────────────────────────────────────────────────
+run_demo() {
+    log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+    log "Running Research Assistant demo..."
+    log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+
+    nat run \
+        --config_file "$SCRIPT_DIR/workflow.yml" \
+        --input "Research large language models and provide a timeline of five key milestones in their development, from early transformer models to recent advances."
+
+    log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+    log "Demo complete. To run your own query:"
+    log "  nat run --config_file workflow.yml --input \"your question here\""
+    log ""
+    log "To launch the chat UI:"
+    log "  nat serve --config_file workflow.yml"
+    log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+}
+
+# ─── Main ────────────────────────────────────────────────────────────────────
+log "Starting NeMo Agent Toolkit — Research Assistant"
+check_python
+setup_venv "$PYTHON_CMD"
+install_nat
+run_demo
diff --git a/examples/nemo-agent-toolkit/test.sh b/examples/nemo-agent-toolkit/test.sh
new file mode 100755
index 00000000..bbdede38
--- /dev/null
+++ b/examples/nemo-agent-toolkit/test.sh
@@ -0,0 +1,94 @@
+#!/usr/bin/env bash
+# Smoke-tests the NeMo Agent Toolkit setup before running the demo
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PASS=0; FAIL=0
+
+ok()     { echo "  [PASS] $*"; PASS=$((PASS + 1)); }
+fail()   { echo "  [FAIL] $*"; FAIL=$((FAIL + 1)); }
+header() { echo; echo "── $* ──────────────────────────────────"; }
+
+# Load env
+if [[ -f "$SCRIPT_DIR/.env" ]]; then
+    set -o allexport; source "$SCRIPT_DIR/.env"; set +o allexport
+fi
+
+# ─── Python ───────────────────────────────────────────────────────────────────
+header "Python"
+
+PYTHON_CMD=""
+for cmd in python3.13 python3.12 python3.11 python3 python; do
+    if command -v "$cmd" &>/dev/null; then
+        ver=$("$cmd" -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')" 2>/dev/null)
+        major=$(echo "$ver" | cut -d. -f1)
+        minor=$(echo "$ver" | cut -d. -f2)
+        if [[ "$major" -eq 3 && "$minor" -ge 11 ]]; then
+            PYTHON_CMD="$cmd"
+            break
+        fi
+    fi
+done
+
+[[ -n "$PYTHON_CMD" ]] \
+    && ok "Python $("$PYTHON_CMD" --version) found" \
+    || fail "Python 3.11, 3.12, or 3.13 not found — install one first"
+
+# ─── nvidia-nat ───────────────────────────────────────────────────────────────
+header "nvidia-nat"
+
+VENV_NAT=".venv/bin/nat"
+if [[ -f "$SCRIPT_DIR/$VENV_NAT" ]]; then
+    ok "nat CLI found in .venv"
+    "$SCRIPT_DIR/$VENV_NAT" --version &>/dev/null \
+        && ok "nat --version OK" \
+        || fail "nat --version failed"
+else
+    fail "nat CLI not found — run ./start.sh first to install"
+fi
+
+# ─── Workflow config ──────────────────────────────────────────────────────────
+header "Workflow config"
+
+[[ -f "$SCRIPT_DIR/workflow.yml" ]] \
+    && ok "workflow.yml present" \
+    || fail "workflow.yml missing"
+
+# ─── Environment variables ────────────────────────────────────────────────────
+header "Environment variables"
+
+[[ -n "${NVIDIA_API_KEY:-}" ]] \
+    && ok "NVIDIA_API_KEY is set" \
+    || fail "NVIDIA_API_KEY not set — copy .env.example to .env and fill it in"
+
+[[ "${NVIDIA_API_KEY:-}" == nvapi-* ]] \
+    && ok "NVIDIA_API_KEY format looks correct (nvapi-...)" \
+    || fail "NVIDIA_API_KEY doesn't start with 'nvapi-' — check your key"
+
+# ─── NVIDIA API connectivity ──────────────────────────────────────────────────
+header "NVIDIA API connectivity"
+
+if [[ -n "${NVIDIA_API_KEY:-}" ]]; then
+    http_code=$(curl -s -o /dev/null -w "%{http_code}" \
+        -H "Authorization: Bearer $NVIDIA_API_KEY" \
+        "https://integrate.api.nvidia.com/v1/models")
+
+    case "$http_code" in
+        200) ok "NVIDIA API reachable and key is valid (HTTP 200)" ;;
+        401) fail "NVIDIA API key invalid or expired (HTTP 401)" ;;
+        403) fail "NVIDIA API key lacks permissions (HTTP 403)" ;;
+        *)   fail "NVIDIA API returned unexpected status: HTTP $http_code" ;;
+    esac
+else
+    fail "Skipping API test — NVIDIA_API_KEY not set"
+fi
+
+# ─── Summary ──────────────────────────────────────────────────────────────────
+echo
+echo "══════════════════════════════════════"
+echo "  Results: $PASS passed, $FAIL failed"
+echo "══════════════════════════════════════"
+
+[[ $FAIL -eq 0 ]] && echo "  Ready to run: ./start.sh" || echo "  Fix the failures above, then re-run test.sh"
+echo
+[[ $FAIL -eq 0 ]]
diff --git a/examples/nemo-agent-toolkit/workflow.yml b/examples/nemo-agent-toolkit/workflow.yml
new file mode 100644
index 00000000..1179834b
--- /dev/null
+++ b/examples/nemo-agent-toolkit/workflow.yml
@@ -0,0 +1,22 @@
+functions:
+  # Wikipedia search — no extra API key needed, works out of the box
+  wikipedia_search:
+    _type: wiki_search
+    max_results: 3
+
+llms:
+  nim_llm:
+    _type: nim
+    # Change this to any model available at build.nvidia.com
+    model_name: nvidia/nemotron-3-nano-30b-a3b
+    temperature: 0.0
+    chat_template_kwargs:
+      enable_thinking: false
+
+workflow:
+  _type: react_agent
+  tool_names: [wikipedia_search]
+  llm_name: nim_llm
+  verbose: true
+  # Retry up to 3 times on parse errors
+  parse_agent_response_max_retries: 3

From 738e3eeaa471b60c62e4592ea6b25a0dde0304b8 Mon Sep 17 00:00:00 2001
From: Olusegun Durojaye <durojaye@saturncloud.io>
Date: Sun, 17 May 2026 23:54:25 +0100
Subject: [PATCH 02/11] Fix missing opentelemetry-exporter-otlp dependency in
 start.sh nvidia-nat-opentelemetry 1.6.0 requires this package but it was not
 being installed, causing a dependency conflict warning on startup.

---
 examples/nemo-agent-toolkit/start.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/nemo-agent-toolkit/start.sh b/examples/nemo-agent-toolkit/start.sh
index 3c6a6c9a..3dd7de0e 100755
--- a/examples/nemo-agent-toolkit/start.sh
+++ b/examples/nemo-agent-toolkit/start.sh
@@ -82,7 +82,7 @@ install_nat() {
         || die "plugin registration failed — check $LOG_FILE"
 
     log "Installing telemetry dependencies..."
-    python -m pip install --quiet --upgrade opentelemetry-api opentelemetry-sdk &>>"$LOG_FILE" \
+    python -m pip install --quiet --upgrade opentelemetry-api opentelemetry-sdk "opentelemetry-exporter-otlp~=1.3" &>>"$LOG_FILE" \
         || die "telemetry dependencies install failed — check $LOG_FILE"
 
     log "nvidia-nat and integrations installed successfully"

From 31a19bf9fc691a5a16d2f729feb58e1af1963079 Mon Sep 17 00:00:00 2001
From: Olusegun Durojaye <durojaye@saturncloud.io>
Date: Mon, 18 May 2026 00:25:16 +0100
Subject: [PATCH 03/11] Add port 8000 route and fix nat serve host binding
 Exposes the NeMo Agent Toolkit chat UI (nat serve) via a dedicated Saturn
 Cloud route on port 8000. Documents --host 0.0.0.0 flag required for Saturn's
 proxy to reach the server.

---
 examples/nemo-agent-toolkit/.saturn/saturn.json |  5 ++++-
 examples/nemo-agent-toolkit/README.md           | 10 ++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/examples/nemo-agent-toolkit/.saturn/saturn.json b/examples/nemo-agent-toolkit/.saturn/saturn.json
index 9c1eab9b..a7887784 100644
--- a/examples/nemo-agent-toolkit/.saturn/saturn.json
+++ b/examples/nemo-agent-toolkit/.saturn/saturn.json
@@ -19,7 +19,10 @@
   "jupyter_server": {
     "disk_space": "10Gi",
     "instance_type": "large",
-    "auto_shutoff": "1 hour"
+    "auto_shutoff": "1 hour",
+    "routes": [
+      {"container_port": 8000, "visibility": "owner"}
+    ]
   },
   "version": "2022.01.06"
 }
diff --git a/examples/nemo-agent-toolkit/README.md b/examples/nemo-agent-toolkit/README.md
index 4ed93e2e..18988419 100644
--- a/examples/nemo-agent-toolkit/README.md
+++ b/examples/nemo-agent-toolkit/README.md
@@ -70,10 +70,16 @@ nat run --config_file workflow.yml --input "Research the history of autonomous v
 
 ```bash
 source .venv/bin/activate
-nat serve --config_file workflow.yml
+nat serve --config_file workflow.yml --host 0.0.0.0
 ```
 
-Opens the NeMo Agent Toolkit built-in chat interface at `http://localhost:8000`. Type questions, watch the agent's step-by-step reasoning, and debug the workflow interactively.
+Opens the NeMo Agent Toolkit built-in chat interface on port 8000. Type questions, watch the agent's step-by-step reasoning, and debug the workflow interactively.
+
+**On Saturn Cloud** — access it via the port 8000 route on your workspace URL:
+```
+https://<your-workspace-subdomain>.community.saturnenterprise.io/
+```
+The port 8000 route is pre-configured on the workspace. The `--host 0.0.0.0` flag is required so Saturn's proxy can reach the server.
 
 To stop:
 

From 1d7daba4643f91cfef2b5fca82c59742ed23f018 Mon Sep 17 00:00:00 2001
From: Olusegun Durojaye <durojaye@saturncloud.io>
Date: Mon, 18 May 2026 11:38:29 +0100
Subject: [PATCH 04/11] Add Gradio chat UI (app.py) as single-process web
 interface Replaces nat serve with a Gradio-based chatbot that runs on port
 8000, binds to 0.0.0.0, and works natively with Saturn Cloud's port routing.
 Gradio is installed as part of start.sh setup.

---
 examples/nemo-agent-toolkit/README.md | 12 ++++--------
 examples/nemo-agent-toolkit/app.py    | 22 ++++++++++++++++++++++
 examples/nemo-agent-toolkit/start.sh  |  8 ++++++--
 3 files changed, 32 insertions(+), 10 deletions(-)
 create mode 100644 examples/nemo-agent-toolkit/app.py

diff --git a/examples/nemo-agent-toolkit/README.md b/examples/nemo-agent-toolkit/README.md
index 18988419..d73f9aa0 100644
--- a/examples/nemo-agent-toolkit/README.md
+++ b/examples/nemo-agent-toolkit/README.md
@@ -70,21 +70,17 @@ nat run --config_file workflow.yml --input "Research the history of autonomous v
 
 ```bash
 source .venv/bin/activate
-nat serve --config_file workflow.yml --host 0.0.0.0
+python app.py
 ```
 
-Opens the NeMo Agent Toolkit built-in chat interface on port 8000. Type questions, watch the agent's step-by-step reasoning, and debug the workflow interactively.
+Opens a Gradio chat interface on port 8000. Type questions and the agent will search Wikipedia and reason step-by-step in the background.
 
-**On Saturn Cloud** — access it via the port 8000 route on your workspace URL:
-```
-https://<your-workspace-subdomain>.community.saturnenterprise.io/
-```
-The port 8000 route is pre-configured on the workspace. The `--host 0.0.0.0` flag is required so Saturn's proxy can reach the server.
+**On Saturn Cloud** — the port 8000 route is pre-configured on the workspace. Once `app.py` is running, open the port 8000 URL from your workspace settings.
 
 To stop:
 
 ```bash
-pkill -f "nat serve"
+pkill -f "app.py"
 ```
 
 ---
diff --git a/examples/nemo-agent-toolkit/app.py b/examples/nemo-agent-toolkit/app.py
new file mode 100644
index 00000000..c6a93e3e
--- /dev/null
+++ b/examples/nemo-agent-toolkit/app.py
@@ -0,0 +1,22 @@
+import gradio as gr
+from nat.runtime.loader import load_workflow
+
+
+async def predict(message, history):
+    try:
+        async with load_workflow("workflow.yml") as session:
+            response = await session.run(input=message)
+            return response.result
+    except Exception as e:
+        return f"Error: {str(e)}"
+
+
+demo = gr.ChatInterface(
+    fn=predict,
+    title="NeMo Agent — Research Assistant",
+    description="Ask any question. The agent searches Wikipedia and reasons step-by-step.",
+    theme="soft",
+)
+
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=8000)
diff --git a/examples/nemo-agent-toolkit/start.sh b/examples/nemo-agent-toolkit/start.sh
index 3dd7de0e..319c38a0 100755
--- a/examples/nemo-agent-toolkit/start.sh
+++ b/examples/nemo-agent-toolkit/start.sh
@@ -63,7 +63,7 @@ setup_venv() {
 
 # ─── 5. Install nvidia-nat ───────────────────────────────────────────────────
 install_nat() {
-    if python -c "import nemo_agent_toolkit; import langchain_core; import langchain_nvidia_ai_endpoints; import nat.plugins.langchain; import nat.plugins.eval; import nat.plugins.opentelemetry" &>/dev/null 2>&1; then
+    if python -c "import nemo_agent_toolkit; import langchain_core; import langchain_nvidia_ai_endpoints; import nat.plugins.langchain; import nat.plugins.eval; import nat.plugins.opentelemetry; import gradio" &>/dev/null 2>&1; then
         log "nvidia-nat and integrations already installed — OK"
         apply_wikipedia_patch
         apply_loader_patch
@@ -85,6 +85,10 @@ install_nat() {
     python -m pip install --quiet --upgrade opentelemetry-api opentelemetry-sdk "opentelemetry-exporter-otlp~=1.3" &>>"$LOG_FILE" \
         || die "telemetry dependencies install failed — check $LOG_FILE"
 
+    log "Installing Gradio chat UI..."
+    python -m pip install --quiet --upgrade gradio &>>"$LOG_FILE" \
+        || die "gradio install failed — check $LOG_FILE"
+
     log "nvidia-nat and integrations installed successfully"
     apply_wikipedia_patch
     apply_loader_patch
@@ -138,7 +142,7 @@ run_demo() {
     log "  nat run --config_file workflow.yml --input \"your question here\""
     log ""
     log "To launch the chat UI:"
-    log "  nat serve --config_file workflow.yml"
+    log "  python app.py"
     log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
 }
 

From 0f3124fa58f52a0b3c618af2b8fe4907f3331a2c Mon Sep 17 00:00:00 2001
From: Olusegun Durojaye <durojaye@saturncloud.io>
Date: Mon, 18 May 2026 15:39:27 +0100
Subject: [PATCH 05/11] Remove unsupported theme arg from gr.ChatInterface

---
 examples/nemo-agent-toolkit/app.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/nemo-agent-toolkit/app.py b/examples/nemo-agent-toolkit/app.py
index c6a93e3e..f24f4067 100644
--- a/examples/nemo-agent-toolkit/app.py
+++ b/examples/nemo-agent-toolkit/app.py
@@ -15,7 +15,6 @@ async def predict(message, history):
     fn=predict,
     title="NeMo Agent — Research Assistant",
     description="Ask any question. The agent searches Wikipedia and reasons step-by-step.",
-    theme="soft",
 )
 
 if __name__ == "__main__":

From 07b519c132310aa230fbb5764063c834c3ce666e Mon Sep 17 00:00:00 2001
From: Olusegun Durojaye <durojaye@saturncloud.io>
Date: Mon, 18 May 2026 16:00:12 +0100
Subject: [PATCH 06/11] Fix app.py: use correct session.run() async context
 manager API

---
 examples/nemo-agent-toolkit/app.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/nemo-agent-toolkit/app.py b/examples/nemo-agent-toolkit/app.py
index f24f4067..496d8a61 100644
--- a/examples/nemo-agent-toolkit/app.py
+++ b/examples/nemo-agent-toolkit/app.py
@@ -5,8 +5,9 @@
 async def predict(message, history):
     try:
         async with load_workflow("workflow.yml") as session:
-            response = await session.run(input=message)
-            return response.result
+            async with session.run(message) as runner:
+                result = await runner.result()
+                return str(result)
     except Exception as e:
         return f"Error: {str(e)}"
 

From 4dfec933a2c11c3e995ce2a1b42fc7ed597743d7 Mon Sep 17 00:00:00 2001
From: Olusegun Durojaye <durojaye@saturncloud.io>
Date: Mon, 18 May 2026 16:13:51 +0100
Subject: [PATCH 07/11] =?UTF-8?q?Add=20graceful=20iteration=20limit=20?=
 =?UTF-8?q?=E2=80=94=20max=206=20tool=20calls=20with=20friendly=20fallback?=
 =?UTF-8?q?=20message?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/nemo-agent-toolkit/app.py       | 7 +++++++
 examples/nemo-agent-toolkit/workflow.yml | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/examples/nemo-agent-toolkit/app.py b/examples/nemo-agent-toolkit/app.py
index 496d8a61..e1e667ce 100644
--- a/examples/nemo-agent-toolkit/app.py
+++ b/examples/nemo-agent-toolkit/app.py
@@ -1,4 +1,5 @@
 import gradio as gr
+from langgraph.errors import GraphRecursionError
 from nat.runtime.loader import load_workflow
 
 
@@ -8,6 +9,12 @@ async def predict(message, history):
             async with session.run(message) as runner:
                 result = await runner.result()
                 return str(result)
+    except GraphRecursionError:
+        return (
+            "I searched Wikipedia but couldn't find enough information to answer "
+            "this question within the search limit. Try rephrasing your question or "
+            "asking about a more specific topic."
+        )
     except Exception as e:
         return f"Error: {str(e)}"
 
diff --git a/examples/nemo-agent-toolkit/workflow.yml b/examples/nemo-agent-toolkit/workflow.yml
index 1179834b..e6a8b9ac 100644
--- a/examples/nemo-agent-toolkit/workflow.yml
+++ b/examples/nemo-agent-toolkit/workflow.yml
@@ -20,3 +20,5 @@ workflow:
   verbose: true
   # Retry up to 3 times on parse errors
   parse_agent_response_max_retries: 3
+  # Stop after 6 tool calls instead of looping indefinitely
+  max_tool_calls: 6

From 5fd4b6958276082fc3c979783d7300e2a3b9deb2 Mon Sep 17 00:00:00 2001
From: Olusegun Durojaye <durojaye@saturncloud.io>
Date: Mon, 18 May 2026 16:50:09 +0100
Subject: [PATCH 08/11] =?UTF-8?q?Remove=20test=20branch=20reference=20from?=
 =?UTF-8?q?=20recipe=20=E2=80=94=20defaults=20to=20main?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/nemo-agent-toolkit/.saturn/saturn.json | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/examples/nemo-agent-toolkit/.saturn/saturn.json b/examples/nemo-agent-toolkit/.saturn/saturn.json
index a7887784..7956f856 100644
--- a/examples/nemo-agent-toolkit/.saturn/saturn.json
+++ b/examples/nemo-agent-toolkit/.saturn/saturn.json
@@ -11,9 +11,7 @@
   "git_repositories": [
     {
       "url": "https://github.com/saturncloud/examples",
-      "path": "/home/jovyan/examples",
-      "reference": "nemo-agent-toolkit",
-      "reference_type": "branch"
+      "path": "/home/jovyan/examples"
     }
   ],
   "jupyter_server": {

From 69e241f77ac3146c16a7e43ff5ba94e6b9aef3ad Mon Sep 17 00:00:00 2001
From: Olusegun Durojaye <durojaye@saturncloud.io>
Date: Mon, 18 May 2026 16:53:48 +0100
Subject: [PATCH 09/11] Add Saturn Cloud deployment walkthrough to README

---
 examples/nemo-agent-toolkit/README.md | 45 +++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/examples/nemo-agent-toolkit/README.md b/examples/nemo-agent-toolkit/README.md
index d73f9aa0..bc2ee03f 100644
--- a/examples/nemo-agent-toolkit/README.md
+++ b/examples/nemo-agent-toolkit/README.md
@@ -19,6 +19,51 @@ This template deploys an AI research agent powered by the [NVIDIA NeMo Agent Too
 
 ---
 
+## 🪐 Using on Saturn Cloud
+
+### 1. Get an NVIDIA API key
+
+Get a free key at [build.nvidia.com](https://build.nvidia.com) → sign up → **API Keys** → **Generate Key**. Your key will start with `nvapi-`.
+
+### 2. Create the workspace from the template
+
+In Saturn Cloud, go to **New Resource → Workspace → Templates** and select **NeMo Agent Toolkit — Research Assistant**. Before clicking Start, open **Settings → Environment Variables**, find `NVIDIA_API_KEY`, and paste your key in.
+
+### 3. Start the workspace
+
+Click **Start**. Saturn Cloud will clone the repo and run `start.sh` automatically — this installs all dependencies and runs a demo query. The process takes about 3–4 minutes. You can watch it complete by opening **Logs** from the workspace panel.
+
+### 4. Open JupyterLab
+
+Once the workspace status shows **Running**, click the **JupyterLab** button to open the IDE. Open a terminal from **File → New → Terminal**.
+
+### 5. Run a query from the terminal
+
+```bash
+cd /home/jovyan/examples/examples/nemo-agent-toolkit
+source .venv/bin/activate
+nat run --config_file workflow.yml --input "your question here"
+```
+
+You will see the agent's full reasoning — each Wikipedia search and every reasoning step — printed live in the terminal.
+
+### 6. Launch the Gradio chat UI
+
+```bash
+source .venv/bin/activate
+python app.py
+```
+
+Then go to **Settings → Routes** on the workspace and open the URL listed next to port **8000**. This opens the chat interface in your browser where you can have a full conversation with the agent.
+
+To stop the UI:
+
+```bash
+pkill -f "app.py"
+```
+
+---
+
 ## 🛠️ Local Setup
 
 ### 1. Set your NVIDIA API key

From 6c9da353c918a96b3c82dfadd5d478102d8bc1b9 Mon Sep 17 00:00:00 2001
From: Olusegun Durojaye <durojaye@saturncloud.io>
Date: Mon, 18 May 2026 16:54:39 +0100
Subject: [PATCH 10/11] Add NVIDIA API key setup steps to Saturn Cloud
 walkthrough

---
 examples/nemo-agent-toolkit/README.md | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/examples/nemo-agent-toolkit/README.md b/examples/nemo-agent-toolkit/README.md
index bc2ee03f..cb4aa31a 100644
--- a/examples/nemo-agent-toolkit/README.md
+++ b/examples/nemo-agent-toolkit/README.md
@@ -27,7 +27,16 @@ Get a free key at [build.nvidia.com](https://build.nvidia.com) → sign up → *
 
 ### 2. Create the workspace from the template
 
-In Saturn Cloud, go to **New Resource → Workspace → Templates** and select **NeMo Agent Toolkit — Research Assistant**. Before clicking Start, open **Settings → Environment Variables**, find `NVIDIA_API_KEY`, and paste your key in.
+In Saturn Cloud, go to **New Resource → Workspace → Templates** and select **NeMo Agent Toolkit — Research Assistant**.
+
+Before clicking Start, add your NVIDIA API key:
+
+1. Open **Settings → Environment Variables**
+2. Find `NVIDIA_API_KEY` in the list — it will be empty
+3. Click the edit icon next to it and paste your `nvapi-...` key
+4. Click **Save**
+
+If you started the workspace without setting the key, stop it, add the key following the steps above, then start it again.
 
 ### 3. Start the workspace
 

From 4b16aa10b585d62d01aeebb29970ed90234f3357 Mon Sep 17 00:00:00 2001
From: Olusegun Durojaye <durojaye@saturncloud.io>
Date: Mon, 18 May 2026 16:58:18 +0100
Subject: [PATCH 11/11] Document CPU/GPU flexibility in README

---
 examples/nemo-agent-toolkit/README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/nemo-agent-toolkit/README.md b/examples/nemo-agent-toolkit/README.md
index cb4aa31a..bf522b7f 100644
--- a/examples/nemo-agent-toolkit/README.md
+++ b/examples/nemo-agent-toolkit/README.md
@@ -4,7 +4,7 @@
 
 This template deploys an AI research agent powered by the [NVIDIA NeMo Agent Toolkit](https://github.com/NVIDIA/NeMo-Agent-Toolkit). Given any question or topic, the agent searches Wikipedia, reasons step-by-step using a **ReAct loop** (Reason + Act), and returns a structured answer — all backed by **NVIDIA NIM cloud inference**. No GPU required.
 
-* **Hardware:** CPU Large (2 cores, 16 GB RAM)
+* **Hardware:** CPU Large (2 cores, 16 GB RAM) — GPU supported but not required
 * **Runtime:** NVIDIA NIM cloud API — bring your own `nvapi-...` key
 * **Use Case:** Research automation, knowledge summarization, agentic reasoning demos
 
@@ -38,6 +38,8 @@ Before clicking Start, add your NVIDIA API key:
 
 If you started the workspace without setting the key, stop it, add the key following the steps above, then start it again.
 
+> **GPU users** — the template defaults to CPU Large since inference runs in the cloud via NVIDIA NIM. If you want to run on a GPU instance (e.g. to extend the agent with local model inference), go to **Settings → Hardware** and select a GPU instance type before starting. Everything else works the same.
+
 ### 3. Start the workspace
 
 Click **Start**. Saturn Cloud will clone the repo and run `start.sh` automatically — this installs all dependencies and runs a demo query. The process takes about 3–4 minutes. You can watch it complete by opening **Logs** from the workspace panel.