From aff993ec9f8605c14c8d7752f7b63b921dd06f38 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 10 Feb 2026 21:16:09 +0000 Subject: [PATCH 1/5] chore: update labs-config.json for changed labs [skip ci] --- docs/labs-config.json | 148 +++++++++++++++++++++--------------------- 1 file changed, 74 insertions(+), 74 deletions(-) diff --git a/docs/labs-config.json b/docs/labs-config.json index 1a11eb95..6260643f 100644 --- a/docs/labs-config.json +++ b/docs/labs-config.json @@ -17,9 +17,9 @@ "vieiraae", "nourshaker-msft" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/access-controlling", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/access-controlling", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.726Z" + "lastCommitDate": "2026-02-10T21:16:09.128Z" }, { "id": "ai-agent-service", @@ -38,9 +38,9 @@ "vieiraae", "nourshaker-msft" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/ai-agent-service", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/ai-agent-service", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.727Z" + "lastCommitDate": "2026-02-10T21:16:09.128Z" }, { "id": "ai-foundry-deepseek", @@ -59,9 +59,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/ai-foundry-deepseek", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/ai-foundry-deepseek", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.727Z" + "lastCommitDate": "2026-02-10T21:16:09.129Z" }, { "id": "ai-foundry-private-mcp", @@ -86,8 +86,8 @@ "nourshaker-msft" ], "tags": [], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/ai-foundry-private-mcp", - "lastCommitDate": "2026-02-04T13:09:52.727Z" + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/ai-foundry-private-mcp", + "lastCommitDate": "2026-02-10T21:16:09.129Z" }, { "id": "ai-foundry-sdk", @@ -105,9 +105,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/ai-foundry-sdk", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/ai-foundry-sdk", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.728Z" + "lastCommitDate": "2026-02-10T21:16:09.130Z" }, { "id": "aws-bedrock", @@ -125,9 +125,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/aws-bedrock", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/aws-bedrock", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.728Z" + "lastCommitDate": "2026-02-10T21:16:09.130Z" }, { "id": "backend-pool-load-balancing", @@ -145,9 +145,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/backend-pool-load-balancing", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/backend-pool-load-balancing", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.728Z" + "lastCommitDate": "2026-02-10T21:16:09.130Z" }, { "id": "backend-pool-load-balancing-tf", @@ -164,9 +164,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/backend-pool-load-balancing-tf", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/backend-pool-load-balancing-tf", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.729Z" + "lastCommitDate": "2026-02-10T21:16:09.131Z" }, { "id": "built-in-logging", @@ -184,9 +184,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/built-in-logging", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/built-in-logging", "tags": [], - "lastCommitDate": "2026-02-04T18:19:33.465Z" + "lastCommitDate": "2026-02-10T21:16:09.131Z" }, { "id": "content-safety", @@ -204,9 +204,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/content-safety", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/content-safety", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.730Z" + "lastCommitDate": "2026-02-10T21:16:09.132Z" }, { "id": "finops-framework", @@ -226,9 +226,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/finops-framework", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/finops-framework", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.730Z" + "lastCommitDate": "2026-02-10T21:16:09.132Z" }, { "id": "foundry-models-evals", @@ -248,8 +248,8 @@ "vieiraae" ], "tags": [], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/foundry-models-evals", - "lastCommitDate": "2026-02-04T18:13:32.653Z" + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/foundry-models-evals", + "lastCommitDate": "2026-02-10T21:16:09.133Z" }, { "id": "function-calling", @@ -268,9 +268,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/function-calling", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/function-calling", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.731Z" + "lastCommitDate": "2026-02-10T21:16:09.134Z" }, { "id": "gemini-mcp-agents", @@ -289,9 +289,9 @@ "authors": [ "nourshaker-msft" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/gemini-mcp-agents", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/gemini-mcp-agents", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.732Z" + "lastCommitDate": "2026-02-10T21:16:09.134Z" }, { "id": "gemini-models", @@ -311,8 +311,8 @@ "vieiraae" ], "tags": [], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/gemini-models", - "lastCommitDate": "2026-02-04T13:09:52.732Z" + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/gemini-models", + "lastCommitDate": "2026-02-10T21:16:09.135Z" }, { "id": "google-gemini-api", @@ -332,8 +332,8 @@ "seilorjunior" ], "tags": [], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/google-gemini-api", - "lastCommitDate": "2026-02-04T13:09:52.732Z" + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/google-gemini-api", + "lastCommitDate": "2026-02-10T21:16:09.135Z" }, { "id": "image-generation", @@ -351,9 +351,9 @@ "authors": [ "nourshaker-msft" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/image-generation", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/image-generation", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.733Z" + "lastCommitDate": "2026-02-10T21:16:09.135Z" }, { "id": "mcp-a2a-agents", @@ -375,9 +375,9 @@ "authors": [ "nourshaker-msft" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/mcp-a2a-agents", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/mcp-a2a-agents", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.733Z" + "lastCommitDate": "2026-02-10T21:16:09.136Z" }, { "id": "mcp-client-authorization", @@ -413,9 +413,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/mcp-from-api", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/mcp-from-api", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.734Z" + "lastCommitDate": "2026-02-10T21:16:09.137Z" }, { "id": "mcp-prm-oauth", @@ -434,9 +434,9 @@ "authors": [ "nourshaker-msft" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/mcp-prm-oauth", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/mcp-prm-oauth", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.735Z" + "lastCommitDate": "2026-02-10T21:16:09.137Z" }, { "id": "mcp-registry-apic", @@ -455,9 +455,9 @@ "authors": [ "jukasper" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/mcp-registry-apic", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/mcp-registry-apic", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.735Z" + "lastCommitDate": "2026-02-10T21:16:09.138Z" }, { "id": "mcp-registry-apic-github-workflow", @@ -477,9 +477,9 @@ "authors": [ "frankqianms" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/mcp-registry-apic-github-workflow", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/mcp-registry-apic-github-workflow", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.736Z" + "lastCommitDate": "2026-02-10T21:16:09.138Z" }, { "id": "message-storing", @@ -497,9 +497,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/message-storing", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/message-storing", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.736Z" + "lastCommitDate": "2026-02-10T21:16:09.139Z" }, { "id": "model-context-protocol", @@ -519,9 +519,9 @@ "vieiraae", "nourshaker-msft" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/model-context-protocol", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/model-context-protocol", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.736Z" + "lastCommitDate": "2026-02-10T21:16:09.139Z" }, { "id": "model-routing", @@ -541,9 +541,9 @@ "simonkurtz-MSFT", "nourshaker-msft" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/model-routing", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/model-routing", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.736Z" + "lastCommitDate": "2026-02-10T21:16:09.140Z" }, { "id": "openai-agents", @@ -561,9 +561,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/openai-agents", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/openai-agents", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.737Z" + "lastCommitDate": "2026-02-10T21:16:09.140Z" }, { "id": "private-connectivity", @@ -582,9 +582,9 @@ "authors": [ "HoussemDellai" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/private-connectivity", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/private-connectivity", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.737Z" + "lastCommitDate": "2026-02-10T21:16:09.140Z" }, { "id": "realtime-audio", @@ -603,9 +603,9 @@ "authors": [ "nourshaker-msft" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/realtime-audio", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/realtime-audio", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.738Z" + "lastCommitDate": "2026-02-10T21:16:09.141Z" }, { "id": "realtime-mcp-agents", @@ -628,9 +628,9 @@ "nourshaker-msft", "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/realtime-mcp-agents", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/realtime-mcp-agents", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.738Z" + "lastCommitDate": "2026-02-10T21:16:09.141Z" }, { "id": "secure-responses-api", @@ -648,9 +648,9 @@ "authors": [ "anotherRedbeard" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/secure-responses-api", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/secure-responses-api", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.738Z" + "lastCommitDate": "2026-02-10T21:16:09.141Z" }, { "id": "semantic-caching", @@ -669,9 +669,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/semantic-caching", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/semantic-caching", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.739Z" + "lastCommitDate": "2026-02-10T21:16:09.142Z" }, { "id": "session-awareness", @@ -690,9 +690,9 @@ "authors": [ "dbarkol" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/session-awareness", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/session-awareness", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.739Z" + "lastCommitDate": "2026-02-10T21:16:09.142Z" }, { "id": "slm-self-hosting", @@ -711,9 +711,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/slm-self-hosting", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/slm-self-hosting", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.739Z" + "lastCommitDate": "2026-02-10T21:16:09.142Z" }, { "id": "token-metrics-emitting", @@ -732,9 +732,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/token-metrics-emitting", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/token-metrics-emitting", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.740Z" + "lastCommitDate": "2026-02-10T21:16:09.143Z" }, { "id": "token-rate-limiting", @@ -752,9 +752,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/token-rate-limiting", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/token-rate-limiting", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.740Z" + "lastCommitDate": "2026-02-10T21:16:09.143Z" }, { "id": "vector-searching", @@ -772,9 +772,9 @@ "authors": [ "vieiraae" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/vector-searching", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/vector-searching", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.740Z" + "lastCommitDate": "2026-02-10T21:16:09.143Z" }, { "id": "zero-to-production", @@ -792,8 +792,8 @@ "authors": [ "simonkurtz-MSFT" ], - "githubPath": "https://github.com/Azure-Samples/AI-Gateway/tree/main/labs/zero-to-production", + "githubPath": "https://github.com/saldanaj/AI-Gateway/tree/main/labs/zero-to-production", "tags": [], - "lastCommitDate": "2026-02-04T13:09:52.740Z" + "lastCommitDate": "2026-02-10T21:16:09.143Z" } ] From 530a0a84366180c647b119ec663d1f289fe67de7 Mon Sep 17 00:00:00 2001 From: saldanaj Date: Wed, 11 Feb 2026 05:21:02 +0000 Subject: [PATCH 2/5] markdowns that explain the breakdown of the AI Agent Service and the AI Foundry SDK. These markdown files provide detailed insights into the architecture, components, and functionalities of both the AI Agent Service and the AI Foundry SDK, serving as valuable resources for developers and stakeholders interested in understanding these technologies. --- .../AI-AGENT-SERVICE-BREAKDOWN.md | 670 ++++++++++++++++++ .../AI-FOUNDRY-SDK-BREAKDOWN.md | 375 ++++++++++ 2 files changed, 1045 insertions(+) create mode 100644 labs/ai-agent-service/AI-AGENT-SERVICE-BREAKDOWN.md create mode 100644 labs/ai-foundry-sdk/AI-FOUNDRY-SDK-BREAKDOWN.md diff --git a/labs/ai-agent-service/AI-AGENT-SERVICE-BREAKDOWN.md b/labs/ai-agent-service/AI-AGENT-SERVICE-BREAKDOWN.md new file mode 100644 index 00000000..33a6cdac --- /dev/null +++ b/labs/ai-agent-service/AI-AGENT-SERVICE-BREAKDOWN.md @@ -0,0 +1,670 @@ +# Azure AI Agent Service Code Breakdown + +This document provides a detailed breakdown of the Python code used in the AI Agent Service lab notebooks, explaining the Azure AI Foundry SDK syntax for building AI agents with tools. + +--- + +## Table of Contents + +1. [Lab Overview & Architecture](#1-lab-overview--architecture) +2. [Azure Resources Deployed](#2-azure-resources-deployed) +3. [SDK Initialization Patterns](#3-sdk-initialization-patterns) +4. [Creating and Running Agents](#4-creating-and-running-agents) +5. [Working with Tools](#5-working-with-tools) +6. [Bing Grounding Tool](#6-bing-grounding-tool) +7. [OpenAPI Tools](#7-openapi-tools) +8. [Thread and Message Management](#8-thread-and-message-management) +9. [Monitoring with Application Insights](#9-monitoring-with-application-insights) +10. [V1 vs V2 SDK Differences](#10-v1-vs-v2-sdk-differences) + +--- + +## 1. Lab Overview & Architecture + +This lab demonstrates how to build AI agents using the Azure AI Agent Service, integrated with Azure API Management (APIM) for: +- **Controlling AI model access** - Route requests through APIM +- **Integrating external services** - Bing Search, Logic Apps, custom APIs +- **Monitoring and observability** - Application Insights telemetry + +### Lab Versions +- **V1 (ai-agent-service-v1.ipynb)**: Uses AI Foundry Hub & Projects architecture with connection strings +- **V2 (ai-agent-service-v2.ipynb)**: Uses newer AI Foundry endpoint-based architecture + +--- + +## 2. Azure Resources Deployed + +The Bicep templates deploy the following resources: + +| Resource | Purpose | +|----------|---------| +| **Azure API Management** | Gateway for AI services and custom APIs | +| **Azure OpenAI / AI Services** | Language models (GPT-4o) | +| **AI Foundry Project** | Agent management and orchestration | +| **Bing Search** | Web search grounding for agents | +| **Logic Apps** | Order processing workflow | +| **Application Insights** | Telemetry and monitoring | +| **Log Analytics** | Centralized logging | + +### Configuration Variables + +```python +# V1 Configuration +openai_resources = [ {"name": "openai1", "location": "swedencentral"} ] +openai_model_name = "gpt-4o" +openai_model_version = "2024-08-06" +openai_model_sku = "GlobalStandard" +openai_model_capacity = 400 +openai_deployment_name = "gpt-4o" +openai_api_version = "2025-01-01-preview" + +# V2 Configuration +aiservices_config = [{"name": "foundry1", "location": "eastus2"}] +models_config = [{"name": "gpt-4.1-mini", "publisher": "OpenAI", + "version": "2025-04-14", "sku": "GlobalStandard", "capacity": 20}] +``` + +--- + +## 3. SDK Initialization Patterns + +### Required Imports + +```python +from azure.ai.projects import AIProjectClient +from azure.identity import DefaultAzureCredential +``` + +### V1: Connection String Pattern + +```python +# Install specific SDK version for V1 +%pip install azure-ai-projects==1.0.0b10 + +project_client = AIProjectClient.from_connection_string( + credential=DefaultAzureCredential(), + conn_str=project_connection_string # Format: "endpoint;subscription_id;resource_group;project_name" +) +``` + +### V2: Endpoint Pattern + +```python +# Install specific SDK version for V2 +%pip install azure-ai-projects==1.0.0b12 + +project_client = AIProjectClient( + endpoint=foundry_project_endpoint, # Direct endpoint URL + credential=DefaultAzureCredential() +) +``` + +### Using Context Managers + +Both patterns support context managers for automatic resource cleanup: + +```python +with AIProjectClient(...) as project_client: + # Client is automatically cleaned up when exiting the block + agent = project_client.agents.create_agent(...) +``` + +--- + +## 4. Creating and Running Agents + +### Agent Lifecycle + +1. **Create Agent** - Define model, name, instructions, and tools +2. **Create Thread** - Conversation container +3. **Create Message** - User input +4. **Create Run** - Execute the agent +5. **Poll for Completion** - Wait for agent to finish +6. **Retrieve Messages** - Get agent's response +7. **Cleanup** - Delete agent when done + +### Basic Agent Creation (V1) + +```python +from azure.ai.projects import AIProjectClient +from azure.identity import DefaultAzureCredential + +prompt_content = "I need to solve the equation `3x + 11 = 14`. Can you help me?" + +with AIProjectClient.from_connection_string( + credential=DefaultAzureCredential(), + conn_str=project_connection_string +) as project_client: + + # Step 1: Create the agent + maths_agent = project_client.agents.create_agent( + model=openai_deployment_name, # e.g., "gpt-4o" + name="math-tutor", + instructions="You are a personal math tutor. Answer questions briefly, in a sentence or less." + ) + print(f"Created agent, agent ID: {maths_agent.id}") + + # Step 2: Create a conversation thread + thread = project_client.agents.create_thread() + print(f"Created thread, thread ID: {thread.id}") + + # Step 3: Add user message to thread + message = project_client.agents.create_message( + thread_id=thread.id, + role="user", + content=prompt_content + ) + print(f"Created message, message ID: {message.id}") + + # Step 4: Create and start the run + run = project_client.agents.create_run( + thread_id=thread.id, + agent_id=maths_agent.id + ) + + # Step 5: Poll until completion + while run.status in ["queued", "in_progress", "requires_action"]: + time.sleep(1) + run = project_client.agents.get_run(thread_id=thread.id, run_id=run.id) + print(f"Run status: {run.status}") + + # Step 6: Retrieve messages + messages = project_client.agents.list_messages(thread_id=thread.id) + print(f"πŸ—¨οΈ {messages.data[0].content[0].text.value}") +``` + +### Basic Agent Creation (V2) + +```python +from azure.ai.projects import AIProjectClient +from azure.identity import DefaultAzureCredential +from azure.ai.agents.models import CodeInterpreterTool + +project_client = AIProjectClient( + endpoint=foundry_project_endpoint, + credential=DefaultAzureCredential() +) + +code_interpreter = CodeInterpreterTool() + +with project_client: + # Create agent with code interpreter tool + agent = project_client.agents.create_agent( + model=str(models_config[0].get('name')), + name="my-maths-agent", + instructions="You are a personal math tutor. Answer questions briefly.", + tools=code_interpreter.definitions # Attach tools + ) + + # Create thread (note: different method path in V2) + thread = project_client.agents.threads.create() + + # Create message (note: different method path in V2) + message = project_client.agents.messages.create( + thread_id=thread.id, + role="user", + content="I need to solve the equation `3x + 11 = 14`. Can you help me?" + ) + + # Create and process run automatically + run = project_client.agents.runs.create_and_process( + thread_id=thread.id, + agent_id=agent.id + ) + + # Get messages + messages = project_client.agents.messages.list(thread_id=thread.id) + for message in messages: + print(f"Role: {message.role}, Content: {message.content}") + + # Cleanup + project_client.agents.delete_agent(agent.id) +``` + +### Run Processing Options + +| Method | Description | +|--------|-------------| +| `create_run()` | Start run, requires manual polling | +| `create_and_process_run()` (V1) | Auto-processes until completion | +| `runs.create_and_process()` (V2) | Auto-processes until completion | + +--- + +## 5. Working with Tools + +Tools extend agent capabilities. The SDK supports several built-in tools: + +| Tool Type | Import | Purpose | +|-----------|--------|---------| +| `CodeInterpreterTool` | `azure.ai.agents.models` | Execute Python code | +| `BingGroundingTool` | `azure.ai.projects.models` (V1) / `azure.ai.agents.models` (V2) | Web search | +| `OpenApiTool` | `azure.ai.projects.models` (V1) / `azure.ai.agents.models` (V2) | Call REST APIs | + +### Tool Attachment Pattern + +```python +# Tools are defined and passed during agent creation +agent = project_client.agents.create_agent( + model="gpt-4o", + name="my-agent", + instructions="...", + tools=my_tool.definitions # List of tool definitions +) +``` + +--- + +## 6. Bing Grounding Tool + +The Bing Grounding Tool enables agents to search the web for current information. + +### V1 Pattern + +```python +from azure.ai.projects import AIProjectClient +from azure.identity import DefaultAzureCredential +from azure.ai.projects.models import BingGroundingTool + +project_client = AIProjectClient.from_connection_string( + credential=DefaultAzureCredential(), + conn_str=project_connection_string +) + +# Get Bing connection from AI Foundry +bing_connection = project_client.connections.get(connection_name=bing_search_connection) +conn_id = bing_connection.id + +# Initialize Bing tool with connection ID +bing = BingGroundingTool(connection_id=conn_id) + +with project_client: + # Create agent with Bing tool + bing_agent = project_client.agents.create_agent( + model=openai_deployment_name, + name="my-bing-assistant", + instructions="You are a helpful assistant", + tools=bing.definitions, + headers={"x-ms-enable-preview": "true"} # Required for preview features + ) + + thread = project_client.agents.create_thread() + + message = project_client.agents.create_message( + thread_id=thread.id, + role="user", + content="What are the top news today?" + ) + + # Auto-process handles tool calls internally + run = project_client.agents.create_and_process_run( + thread_id=thread.id, + agent_id=bing_agent.id + ) + + messages = project_client.agents.list_messages(thread_id=thread.id) + print(f"πŸ—¨οΈ {messages.data[0].content[0].text.value}") +``` + +### V2 Pattern + +```python +from azure.ai.projects import AIProjectClient +from azure.identity import DefaultAzureCredential +from azure.ai.agents.models import BingGroundingTool, ListSortOrder, MessageTextContent + +project_client = AIProjectClient( + endpoint=foundry_project_endpoint, + credential=DefaultAzureCredential() +) +agents_client = project_client.agents + +# Get connection by name +bing_connection = project_client.connections.get(name='bingSearch-connection') +conn_id = bing_connection.id + +bing = BingGroundingTool(connection_id=conn_id) + +with project_client: + bing_agent = agents_client.create_agent( + model=str(models_config[0].get('name')), + name="my-bing-assistant", + instructions="You are a helpful assistant who uses Bing Search to answer questions.", + tools=bing.definitions, + headers={"x-ms-enable-preview": "true"} + ) + + thread = agents_client.threads.create() + + message = agents_client.messages.create( + thread_id=thread.id, + role="user", + content="What are the top 5 news headlines today from the UK?" + ) + + # Create run and poll manually + run = agents_client.runs.create(thread_id=thread.id, agent_id=bing_agent.id) + + while run.status in ["queued", "in_progress", "requires_action"]: + run = agents_client.runs.get(thread_id=thread.id, run_id=run.id) + print(f"⏳ Run status: {run.status}") + + # Fetch messages with ordering + messages = agents_client.messages.list( + thread_id=thread.id, + order=ListSortOrder.ASCENDING + ) + + for item in messages: + last_message_content = item.content[-1] + if isinstance(last_message_content, MessageTextContent): + print(f"πŸ—¨οΈ {item.role}: {last_message_content.text.value}") + + agents_client.delete_agent(bing_agent.id) +``` + +--- + +## 7. OpenAPI Tools + +OpenAPI tools allow agents to call REST APIs defined by OpenAPI specifications. + +### Required Imports + +```python +import jsonref # For resolving JSON references in OpenAPI specs +from azure.ai.projects.models import ( # V1 + OpenApiTool, + OpenApiConnectionAuthDetails, + OpenApiConnectionSecurityScheme +) +# Or for V2: +from azure.ai.agents.models import ( + OpenApiTool, + OpenApiConnectionAuthDetails, + OpenApiConnectionSecurityScheme +) +``` + +### Creating OpenAPI Tool from Spec + +```python +# Load and modify OpenAPI spec (replace placeholder URLs) +with open("./city-weather-openapi.json", "r") as f: + openapi_weather = jsonref.loads( + f.read().replace( + "https://replace-me.local/weatherservice", + f"{apim_resource_gateway_url}/weatherservice" + ) + ) + +# Create OpenAPI tool with authentication +openapi_tool = OpenApiTool( + name="get_weather", + spec=openapi_weather, + description="Retrieve weather information for a location", + auth=OpenApiConnectionAuthDetails( + security_scheme=OpenApiConnectionSecurityScheme( + connection_id=weather_api_connection_id # APIM connection + ) + ) +) +``` + +### Combining Multiple OpenAPI Tools + +```python +# First tool - Product Catalog +with open("./product-catalog-openapi.json", "r") as f: + openapi_product_catalog = jsonref.loads( + f.read().replace("https://replace-me.local/catalogservice", + f"{apim_resource_gateway_url}/catalogservice") + ) + +openapi_tools = OpenApiTool( + name="get_product_catalog", + spec=openapi_product_catalog, + description="Retrieve the list of products available in the catalog", + auth=OpenApiConnectionAuthDetails( + security_scheme=OpenApiConnectionSecurityScheme( + connection_id=product_catalog_api_connection_id + ) + ) +) + +# Add second tool - Place Order (using add_definition) +with open("./place-order-openapi.json", "r") as f: + openapi_place_order = jsonref.loads( + f.read().replace("https://replace-me.local/orderservice", + f"{apim_resource_gateway_url}/orderservice") + ) + +openapi_tools.add_definition( + name="place_order", + spec=openapi_place_order, + description="Place a product order", + auth=OpenApiConnectionAuthDetails( + security_scheme=OpenApiConnectionSecurityScheme( + connection_id=place_order_api_connection_id + ) + ) +) + +# Create agent with multiple tools +agent = project_client.agents.create_agent( + model=openai_deployment_name, + name="my-sales-assistant", + instructions="You are a helpful sales assistant. Recover from errors and place multiple orders if needed.", + tools=openapi_tools.definitions # Contains both tools +) +``` + +### Inspecting Tool Calls During Run + +```python +# After run completes, inspect the steps +run_steps = project_client.agents.list_run_steps(thread_id=thread.id, run_id=run.id) + +for step in reversed(run_steps.data): + print(f"Step {step['id']} status: {step['status']}") + + step_details = step.get("step_details", {}) + tool_calls = step_details.get("tool_calls", []) + + if tool_calls: + for call in tool_calls: + function_details = call.get("function", {}) + if function_details: + print(f" Function: {function_details.get('name')}") + print(f" Arguments: {function_details.get('arguments')}") + print(f" Output: {function_details.get('output')}") +``` + +--- + +## 8. Thread and Message Management + +### Listing Connections + +```python +from azure.ai.projects.models import ConnectionType + +with project_client: + connections = project_client.connections.list() + for connection in connections: + # V1 + print(f"Name: {connection.name}, Type: {connection.connection_type}") + # V2 + print(f"Name: {connection.name}, Id: {connection.id}, Type: {connection.type}") +``` + +### Getting Specific Connection + +```python +# V1 +bing_connection = project_client.connections.get(connection_name=bing_search_connection) + +# V2 +bing_connection = project_client.connections.get(name='bingSearch-connection') +``` + +### Message Roles + +| Role | Usage | +|------|-------| +| `user` | Input from the human user | +| `assistant` | Response from the agent | + +### Retrieving Messages + +```python +# V1 - Returns object with data attribute +messages = project_client.agents.list_messages(thread_id=thread.id) +response = messages.data[0].content[0].text.value + +# V2 - Returns iterable +messages = project_client.agents.messages.list(thread_id=thread.id) +for message in messages: + print(f"Role: {message.role}, Content: {message.content}") +``` + +--- + +## 9. Monitoring with Application Insights + +### Enable Azure Monitor Telemetry + +```python +from azure.monitor.opentelemetry import configure_azure_monitor + +# Get connection string from AI Foundry project +application_insights_connection_string = project_client.telemetry.get_connection_string() + +# Configure Azure Monitor +configure_azure_monitor(connection_string=application_insights_connection_string) +``` + +### Query Custom Metrics with KQL + +```python +import pandas as pd + +query = """ +customMetrics +| where name == 'Total Tokens' +| where timestamp >= ago(1h) +| extend parsedCustomDimensions = parse_json(customDimensions) +| extend apimSubscription = tostring(parsedCustomDimensions.['Subscription ID']) +| extend agentID = tostring(parsedCustomDimensions.['Agent ID']) +| summarize TotalValue = sum(value) by apimSubscription, bin(timestamp, 1m), agentID +| order by timestamp asc +""" + +output = utils.run( + f"az monitor app-insights query --app {app_insights_name} -g {resource_group_name} --analytics-query \"{query}\"", + "App Insights query succeeded", + "App Insights query failed" +) + +# Parse results into DataFrame +table = output.json_data['tables'][0] +df = pd.DataFrame( + table.get("rows"), + columns=[col.get("name") for col in table.get('columns')] +) +``` + +### Visualize Token Usage + +```python +import matplotlib.pyplot as plt + +df_pivot = df.pivot(index='timestamp', columns='apimSubscription', values='TotalValue') +ax = df_pivot.plot(kind='bar', stacked=True) +plt.title('Total token usage over time by APIM Subscription') +plt.xlabel('Time') +plt.ylabel('Tokens') +plt.legend(title='APIM Subscription') +plt.show() +``` + +--- + +## 10. V1 vs V2 SDK Differences + +### Client Initialization + +| Aspect | V1 | V2 | +|--------|----|----| +| Package | `azure-ai-projects==1.0.0b10` | `azure-ai-projects==1.0.0b12` | +| Client Creation | `AIProjectClient.from_connection_string()` | `AIProjectClient(endpoint=...)` | +| Authentication | Connection string | Direct endpoint URL | + +### API Method Paths + +| Operation | V1 | V2 | +|-----------|----|----| +| Create Thread | `project_client.agents.create_thread()` | `project_client.agents.threads.create()` | +| Create Message | `project_client.agents.create_message()` | `project_client.agents.messages.create()` | +| Create Run | `project_client.agents.create_run()` | `project_client.agents.runs.create()` | +| Auto-Process Run | `project_client.agents.create_and_process_run()` | `project_client.agents.runs.create_and_process()` | +| List Messages | `project_client.agents.list_messages()` | `project_client.agents.messages.list()` | +| List Run Steps | `project_client.agents.list_run_steps()` | `project_client.agents.run_steps.list()` | +| Get Run | `project_client.agents.get_run()` | `project_client.agents.runs.get()` | + +### Tool Imports + +| Tool | V1 | V2 | +|------|----|----| +| Bing | `azure.ai.projects.models.BingGroundingTool` | `azure.ai.agents.models.BingGroundingTool` | +| OpenAPI | `azure.ai.projects.models.OpenApiTool` | `azure.ai.agents.models.OpenApiTool` | +| Code Interpreter | (not shown) | `azure.ai.agents.models.CodeInterpreterTool` | + +### Message Response Structure + +```python +# V1 - Access via .data attribute +messages = project_client.agents.list_messages(thread_id=thread.id) +response_text = messages.data[0].content[0].text.value + +# V2 - Direct iteration, use MessageTextContent type check +from azure.ai.agents.models import MessageTextContent + +messages = project_client.agents.messages.list(thread_id=thread.id) +for item in messages: + last_content = item.content[-1] + if isinstance(last_content, MessageTextContent): + print(last_content.text.value) +``` + +--- + +## Summary + +### Key SDK Patterns + +1. **Use context managers** (`with project_client:`) for automatic cleanup +2. **Always delete agents** when done to avoid resource accumulation +3. **Use `create_and_process_run`** for simpler code when you don't need manual tool handling +4. **Poll run status** when using `create_run` directly +5. **Configure Azure Monitor** early for telemetry + +### Required Packages + +```bash +# V1 +pip install azure-ai-projects==1.0.0b10 azure-identity jsonref azure-monitor-opentelemetry + +# V2 +pip install azure-ai-projects==1.0.0b12 azure-identity jsonref azure-monitor-opentelemetry +``` + +--- + +## Additional Resources + +- [Azure AI Agent Service Overview](https://learn.microsoft.com/azure/ai-services/agents/overview) +- [Azure AI Agent Service Quickstart](https://learn.microsoft.com/azure/ai-services/agents/quickstart) +- [Bing Grounding Tool Documentation](https://learn.microsoft.com/azure/ai-services/agents/how-to/tools/bing-grounding) +- [Azure AI Projects SDK Samples](https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/ai/azure-ai-projects/samples) +- [Azure AI Foundry Tracing](https://learn.microsoft.com/azure/ai-studio/concepts/trace) diff --git a/labs/ai-foundry-sdk/AI-FOUNDRY-SDK-BREAKDOWN.md b/labs/ai-foundry-sdk/AI-FOUNDRY-SDK-BREAKDOWN.md new file mode 100644 index 00000000..ede8584f --- /dev/null +++ b/labs/ai-foundry-sdk/AI-FOUNDRY-SDK-BREAKDOWN.md @@ -0,0 +1,375 @@ +# Azure AI Foundry SDK Code Breakdown + +This document provides a detailed breakdown of the Python code used in the `ai-foundry-sdk.ipynb` notebook, explaining the Azure AI Foundry SDK syntax and patterns. + +--- + +## Table of Contents + +1. [Notebook Initialization](#1-notebook-initialization) +2. [Azure CLI Verification](#2-azure-cli-verification) +3. [Bicep Deployment](#3-bicep-deployment) +4. [Retrieving Deployment Outputs](#4-retrieving-deployment-outputs) +5. [Chat Completion with AI Foundry SDK](#5-chat-completion-with-ai-foundry-sdk) +6. [Chat Completion with APIM](#6-chat-completion-with-apim) + +--- + +## 1. Notebook Initialization + +```python +import os, sys, json +sys.path.insert(1, '../../shared') # add the shared directory to the Python path +import utils +``` + +### What's Happening: +- **`import os, sys, json`**: Imports standard Python libraries for OS operations, system configuration, and JSON handling. +- **`sys.path.insert(1, '../../shared')`**: Adds the `shared` directory to Python's module search path, allowing imports from that location. +- **`import utils`**: Imports the custom utility module from the shared directory containing helper functions for Azure operations. + +### Configuration Variables: + +```python +deployment_name = os.path.basename(os.path.dirname(globals()['__vsc_ipynb_file__'])) +resource_group_name = f"lab-{deployment_name}" +resource_group_location = "eastus2" +``` + +| Variable | Purpose | +|----------|---------| +| `deployment_name` | Extracts the folder name (e.g., `ai-foundry-sdk`) to use as deployment identifier | +| `resource_group_name` | Creates a resource group name prefixed with `lab-` | +| `resource_group_location` | Azure region where resources will be deployed | + +### AI Services Configuration: + +```python +aiservices_config = [{"name": "foundry1", "location": "eastus2"}] +``` + +This defines a list of AI Services accounts to create. Each entry specifies: +- **`name`**: Unique identifier for the AI service instance +- **`location`**: Azure region for the service + +### Models Configuration: + +```python +models_config = [{"name": "gpt-4.1-mini", "publisher": "OpenAI", "version": "2025-04-14", "sku": "GlobalStandard", "capacity": 20}] +``` + +| Property | Description | +|----------|-------------| +| `name` | Model deployment name (e.g., `gpt-4.1-mini`) | +| `publisher` | Model publisher (e.g., `OpenAI`) | +| `version` | Specific model version | +| `sku` | Deployment tier (`GlobalStandard`, `Standard`, etc.) | +| `capacity` | Tokens-per-minute capacity in thousands | + +### APIM Configuration: + +```python +apim_sku = 'Basicv2' +apim_subscriptions_config = [{"name": "subscription1", "displayName": "Subscription 1"}] +``` + +- **`apim_sku`**: Azure API Management pricing tier (Basicv2 is cost-effective for labs) +- **`apim_subscriptions_config`**: List of APIM subscriptions for API access control + +### Inference API Settings: + +```python +inference_api_path = "inference" # URL path segment for the inference API +inference_api_type = "AzureAI" # API type: AzureOpenAI, AzureAI, OpenAI, PassThrough +inference_api_version = "2024-05-01-preview" +foundry_project_name = deployment_name +``` + +--- + +## 2. Azure CLI Verification + +```python +output = utils.run("az account show", "Retrieved az account", "Failed to get the current az account") + +if output.success and output.json_data: + current_user = output.json_data['user']['name'] + tenant_id = output.json_data['tenantId'] + subscription_id = output.json_data['id'] +``` + +### What's Happening: +- **`utils.run()`**: Executes an Azure CLI command and returns a result object + - First param: The CLI command to execute + - Second param: Success message + - Third param: Failure message +- **`output.success`**: Boolean indicating command success +- **`output.json_data`**: Parsed JSON response from the CLI command + +### Extracting Account Information: +The code extracts key values from the `az account show` JSON response: +- `user.name`: Current logged-in user +- `tenantId`: Azure AD tenant ID +- `id`: Azure subscription ID + +--- + +## 3. Bicep Deployment + +```python +# Create the resource group if doesn't exist +utils.create_resource_group(resource_group_name, resource_group_location) +``` + +This utility function wraps `az group create` to ensure the resource group exists. + +### Building Bicep Parameters: + +```python +bicep_parameters = { + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "apimSku": { "value": apim_sku }, + "aiServicesConfig": { "value": aiservices_config }, + "modelsConfig": { "value": models_config }, + "apimSubscriptionsConfig": { "value": apim_subscriptions_config }, + "inferenceAPIPath": { "value": inference_api_path }, + "inferenceAPIType": { "value": inference_api_type }, + "foundryProjectName": { "value": foundry_project_name } + } +} +``` + +### Parameter File Structure: +| Field | Purpose | +|-------|---------| +| `$schema` | ARM template schema reference | +| `contentVersion` | Version identifier for the parameters file | +| `parameters` | Dictionary of parameter name β†’ `{ "value": ... }` pairs | + +### Writing & Executing Deployment: + +```python +with open('params.json', 'w') as bicep_parameters_file: + bicep_parameters_file.write(json.dumps(bicep_parameters)) + +output = utils.run(f"az deployment group create --name {deployment_name} --resource-group {resource_group_name} --template-file main.bicep --parameters params.json", + f"Deployment '{deployment_name}' succeeded", f"Deployment '{deployment_name}' failed") +``` + +The `az deployment group create` command: +- `--name`: Deployment name for tracking +- `--resource-group`: Target resource group +- `--template-file`: Path to Bicep template +- `--parameters`: Path to parameters JSON file + +--- + +## 4. Retrieving Deployment Outputs + +```python +output = utils.run(f"az deployment group show --name {deployment_name} -g {resource_group_name}", + f"Retrieved deployment: {deployment_name}", + f"Failed to retrieve deployment: {deployment_name}") + +if output.success and output.json_data: + log_analytics_id = utils.get_deployment_output(output, 'logAnalyticsWorkspaceId', 'Log Analytics Id') + apim_service_id = utils.get_deployment_output(output, 'apimServiceId', 'APIM Service Id') + apim_resource_gateway_url = utils.get_deployment_output(output, 'apimResourceGatewayURL', 'APIM API Gateway URL') +``` + +### What's Happening: +- **`az deployment group show`**: Retrieves details of a completed deployment +- **`utils.get_deployment_output()`**: Helper to extract specific output values from the deployment + +### Parsing Subscriptions: + +```python +apim_subscriptions = json.loads(utils.get_deployment_output(output, 'apimSubscriptions').replace("\'", "\"")) +for subscription in apim_subscriptions: + subscription_name = subscription['name'] + subscription_key = subscription['key'] +``` + +The subscription data is returned as a string representation of a list, so: +1. Replace single quotes with double quotes for valid JSON +2. Parse with `json.loads()` +3. Iterate to extract name and key for each subscription + +--- + +## 5. Chat Completion with AI Foundry SDK + +This is the **core AI Foundry SDK usage pattern**. + +### Imports: + +```python +from azure.identity import DefaultAzureCredential +from azure.ai.projects import AIProjectClient +``` + +| Import | Purpose | +|--------|---------| +| `DefaultAzureCredential` | Provides automatic authentication using available credentials (Azure CLI, managed identity, environment variables, etc.) | +| `AIProjectClient` | Main client for interacting with Azure AI Foundry projects | + +### Creating the Project Client: + +```python +project = AIProjectClient( + endpoint=foundry_project_endpoint, + credential=DefaultAzureCredential() +) +``` + +| Parameter | Description | +|-----------|-------------| +| `endpoint` | The AI Foundry project endpoint URL (e.g., `https://.services.ai.azure.com`) | +| `credential` | Authentication credential object | + +### Getting the OpenAI Client: + +```python +models = project.inference.get_azure_openai_client(api_version=inference_api_version) +``` + +This method returns an `AzureOpenAI` client from the `openai` package, pre-configured with: +- The project's endpoint +- Proper authentication +- Specified API version + +### Making a Chat Completion Request: + +```python +response = models.chat.completions.create( + model=str(models_config[0].get('name')), + messages=[ + {"role": "system", "content": "You are a sarcastic, unhelpful assistant."}, + {"role": "user", "content": "Can you tell me the time, please?"} + ], +) +``` + +### Chat Completion Parameters: + +| Parameter | Description | +|-----------|-------------| +| `model` | Name of the deployed model (e.g., `gpt-4.1-mini`) | +| `messages` | List of message objects with `role` and `content` | + +### Message Roles: + +| Role | Purpose | +|------|---------| +| `system` | Sets the assistant's behavior and personality | +| `user` | Contains the user's input/question | +| `assistant` | (not shown) Previous assistant responses for multi-turn conversations | + +### Accessing the Response: + +```python +print("πŸ’¬ ", response.choices[0].message.content) +``` + +The response structure: +- `response.choices`: List of completion choices (usually 1) +- `response.choices[0].message`: The message object +- `response.choices[0].message.content`: The actual text response + +--- + +## 6. Chat Completion with APIM + +This demonstrates using the **Azure AI Inference SDK** to call models through API Management. + +### Imports: + +```python +from azure.ai.inference import ChatCompletionsClient +from azure.core.credentials import AzureKeyCredential +from azure.ai.inference.models import SystemMessage, UserMessage +``` + +| Import | Purpose | +|--------|---------| +| `ChatCompletionsClient` | Client specifically for chat completion operations | +| `AzureKeyCredential` | Credential wrapper for API key authentication | +| `SystemMessage`, `UserMessage` | Typed message classes for better code clarity | + +### Creating the Client: + +```python +client = ChatCompletionsClient( + endpoint=f"{apim_resource_gateway_url}/{inference_api_path}/models", + credential=AzureKeyCredential(api_key), +) +``` + +| Parameter | Description | +|-----------|-------------| +| `endpoint` | APIM gateway URL with the inference API path (e.g., `https://.azure-api.net/inference/models`) | +| `credential` | APIM subscription key wrapped in `AzureKeyCredential` | + +### Making a Request: + +```python +response = client.complete( + messages=[ + SystemMessage(content="You are a sarcastic, unhelpful assistant."), + UserMessage(content="Can you tell me the time, please?"), + ], + model=str(models_config[0].get('name')) +) +``` + +### Key Differences from Direct AI Foundry Usage: + +| Aspect | AI Foundry Direct | APIM Route | +|--------|-------------------|------------| +| Authentication | `DefaultAzureCredential` (Azure AD) | `AzureKeyCredential` (API key) | +| Client | `AIProjectClient` β†’ OpenAI client | `ChatCompletionsClient` | +| Message format | Dictionaries | Typed message classes | +| Endpoint | Project endpoint | APIM gateway URL | + +--- + +## Summary of SDK Patterns + +### Authentication Options: + +1. **DefaultAzureCredential** - Best for Azure-native apps with Azure AD + ```python + from azure.identity import DefaultAzureCredential + credential = DefaultAzureCredential() + ``` + +2. **AzureKeyCredential** - Best for API key-based access + ```python + from azure.core.credentials import AzureKeyCredential + credential = AzureKeyCredential("your-api-key") + ``` + +### Client Hierarchy: + +``` +AIProjectClient (azure.ai.projects) +└── inference + └── get_azure_openai_client() β†’ AzureOpenAI client + └── chat.completions.create() +``` + +### Required Packages: + +```bash +pip install azure-ai-projects azure-ai-inference azure-identity +``` + +--- + +## Additional Resources + +- [Azure AI Foundry SDK Overview](https://learn.microsoft.com/azure/ai-studio/how-to/develop/sdk-overview) +- [Azure AI Inference SDK](https://learn.microsoft.com/python/api/overview/azure/ai-inference-readme) +- [Azure Identity Library](https://learn.microsoft.com/python/api/overview/azure/identity-readme) From 6cae356791acca3b1a9bc71f62ad935135b59270 Mon Sep 17 00:00:00 2001 From: saldanaj Date: Wed, 11 Feb 2026 19:34:38 +0000 Subject: [PATCH 3/5] including markdowns on how to deploy changes and modify agents. Also added a new file AGENT-CICD-APPROACH.md which outlines the approach for continuous integration and deployment of agents. This will help in streamlining the process of updating and deploying agents in the future. --- AGENT-CICD-APPROACH.md | 863 +++++++++++++++++++++++ labs/ai-agent-service/UPDATING-AGENTS.md | 625 ++++++++++++++++ 2 files changed, 1488 insertions(+) create mode 100644 AGENT-CICD-APPROACH.md create mode 100644 labs/ai-agent-service/UPDATING-AGENTS.md diff --git a/AGENT-CICD-APPROACH.md b/AGENT-CICD-APPROACH.md new file mode 100644 index 00000000..93b89e55 --- /dev/null +++ b/AGENT-CICD-APPROACH.md @@ -0,0 +1,863 @@ +# CI/CD Approach for Azure AI Agent Deployments + +This document outlines a production-ready approach for deploying Azure AI Foundry agents across environments using a declarative, manifest-based pattern. + +--- + +## Table of Contents + +1. [The Problem](#1-the-problem) +2. [Recommended Approach: Manifest-Based Deployment](#2-recommended-approach-manifest-based-deployment) +3. [Manifest Schema Design](#3-manifest-schema-design) +4. [Environment Configuration](#4-environment-configuration) +5. [Deployment CLI Tool](#5-deployment-cli-tool) +6. [CI/CD Pipeline Structure](#6-cicd-pipeline-structure) +7. [Testing Strategy](#7-testing-strategy) +8. [Key Design Decisions](#8-key-design-decisions) +9. [Infrastructure vs Application Layer](#9-infrastructure-vs-application-layer) +10. [Alternative Approaches Considered](#10-alternative-approaches-considered) +11. [Implementation Roadmap](#11-implementation-roadmap) + +--- + +## 1. The Problem + +Running Python SDK scripts directly in CI/CD pipelines has several issues: + +| Issue | Description | +|-------|-------------| +| **Not Declarative** | Imperative scripts are harder to review, diff, and reason about in pull requests | +| **Environment Coupling** | Connection IDs, endpoints, and secrets differ per environment and are hardcoded | +| **No Idempotency Guarantee** | Running twice might create duplicate agents or fail unexpectedly | +| **Drift Detection** | No way to know if someone manually changed the agent in the Azure portal | +| **Poor Auditability** | Changes to agent behavior aren't easily tracked in version control | + +### Current State (Lab Notebooks) + +```python +# This pattern works for experimentation but not production +agent = project_client.agents.create_agent( + model="gpt-4o", + name="my-agent", + instructions="...", + tools=[...] +) +``` + +**Problems:** +- Hardcoded values +- No environment awareness +- Creates new agent every run +- No state management + +--- + +## 2. Recommended Approach: Manifest-Based Deployment + +Adopt a **declarative manifest approach** similar to Kubernetes, Terraform, or Azure Bicep patterns: + +### Core Principles + +1. **Declarative** - Define the desired end state, not the steps to get there +2. **Idempotent** - Running the same deployment multiple times yields the same result +3. **Environment-Agnostic** - Same manifests work across dev/staging/prod with variable substitution +4. **Version Controlled** - All agent configurations live in Git +5. **Reviewable** - Changes are visible in pull request diffs + +### High-Level Architecture + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Source Control (Git) β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ agents/ β”‚ β”‚environments/ β”‚ β”‚ knowledge/ β”‚ β”‚ +β”‚ β”‚ *.yaml β”‚ β”‚ dev.yaml β”‚ β”‚ product_docs.md β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ staging.yamlβ”‚ β”‚ faq.md β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ prod.yaml β”‚ β”‚ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Deployment CLI Tool β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ 1. Load manifests + environment config β”‚ β”‚ +β”‚ β”‚ 2. Resolve variable substitutions β”‚ β”‚ +β”‚ β”‚ 3. Query current state (get_agent) β”‚ β”‚ +β”‚ β”‚ 4. Calculate diff (create vs update vs no-op) β”‚ β”‚ +β”‚ β”‚ 5. Apply changes (create_agent / update_agent) β”‚ β”‚ +β”‚ β”‚ 6. Upload files to vector stores if needed β”‚ β”‚ +β”‚ β”‚ 7. Output deployment report β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Azure AI Foundry Project β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Agents β”‚ β”‚Vector Stores β”‚ β”‚ Connections β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ (Bing, APIs) β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +--- + +## 3. Manifest Schema Design + +### Agent Manifest Structure + +```yaml +# agents/customer-support-agent.yaml +apiVersion: agents/v1 +kind: Agent +metadata: + name: customer-support-agent + description: "Production customer support agent with knowledge base access" + labels: + team: support + tier: production +spec: + # Model configuration + model: ${MODEL_DEPLOYMENT_NAME} + + # Agent behavior + instructions: | + You are a helpful customer support agent for Contoso Products. + + Guidelines: + - Always check the product knowledge base first for product questions + - Use Bing search for current information, pricing, or news + - Be polite, professional, and concise + - If you don't know the answer, say so honestly + + # Model parameters + temperature: 0.7 + topP: 0.95 + + # Tools configuration + tools: + - type: bing_grounding + config: + connectionRef: ${BING_CONNECTION_NAME} + + - type: file_search + config: + vectorStoreRef: product-knowledge-base + + - type: openapi + config: + name: weather_api + specPath: ./openapi/weather-openapi.json + connectionRef: ${WEATHER_API_CONNECTION_NAME} + description: "Get weather information for locations" + + # Tool resources (vector stores) + toolResources: + vectorStores: + - name: product-knowledge-base + files: + - path: ./knowledge/product_catalog.md + - path: ./knowledge/faq.md + - path: ./knowledge/troubleshooting_guide.md + # Or reference existing blob storage + # blobUri: ${KNOWLEDGE_BASE_BLOB_URI} + + # Metadata for tracking + metadata: + version: "2.1.0" + owner: "support-team@contoso.com" + lastReviewedBy: "jane.doe@contoso.com" +``` + +### Multi-Agent Deployment + +```yaml +# agents/sales-team/order-agent.yaml +apiVersion: agents/v1 +kind: Agent +metadata: + name: order-processing-agent + namespace: sales # Logical grouping +spec: + model: ${MODEL_DEPLOYMENT_NAME} + instructions: | + You are a sales assistant that helps process orders. + tools: + - type: openapi + config: + name: product_catalog + specPath: ./openapi/catalog-openapi.json + connectionRef: ${CATALOG_API_CONNECTION} + - type: openapi + config: + name: place_order + specPath: ./openapi/order-openapi.json + connectionRef: ${ORDER_API_CONNECTION} +``` + +### Manifest Validation Schema + +```yaml +# schemas/agent-manifest-schema.yaml (JSON Schema for validation) +$schema: "http://json-schema.org/draft-07/schema#" +type: object +required: + - apiVersion + - kind + - metadata + - spec +properties: + apiVersion: + type: string + enum: ["agents/v1"] + kind: + type: string + enum: ["Agent"] + metadata: + type: object + required: ["name"] + properties: + name: + type: string + pattern: "^[a-z0-9-]+$" + description: + type: string + labels: + type: object + spec: + type: object + required: ["model", "instructions"] + properties: + model: + type: string + instructions: + type: string + temperature: + type: number + minimum: 0 + maximum: 2 + tools: + type: array + items: + type: object + required: ["type"] +``` + +--- + +## 4. Environment Configuration + +### Environment Overlay Files + +```yaml +# environments/dev.yaml +ENVIRONMENT: dev +FOUNDRY_ENDPOINT: https://dev-aiproject.services.ai.azure.com +MODEL_DEPLOYMENT_NAME: gpt-4o-dev +BING_CONNECTION_NAME: bing-search-dev +WEATHER_API_CONNECTION_NAME: weather-api-dev +CATALOG_API_CONNECTION: catalog-api-dev +ORDER_API_CONNECTION: order-api-dev + +# Optional: Override specific agent settings for dev +agentOverrides: + customer-support-agent: + spec: + temperature: 1.0 # More creative in dev for testing +``` + +```yaml +# environments/staging.yaml +ENVIRONMENT: staging +FOUNDRY_ENDPOINT: https://staging-aiproject.services.ai.azure.com +MODEL_DEPLOYMENT_NAME: gpt-4o-staging +BING_CONNECTION_NAME: bing-search-staging +WEATHER_API_CONNECTION_NAME: weather-api-staging +CATALOG_API_CONNECTION: catalog-api-staging +ORDER_API_CONNECTION: order-api-staging +``` + +```yaml +# environments/prod.yaml +ENVIRONMENT: prod +FOUNDRY_ENDPOINT: https://prod-aiproject.services.ai.azure.com +MODEL_DEPLOYMENT_NAME: gpt-4o-prod +BING_CONNECTION_NAME: bing-search-prod +WEATHER_API_CONNECTION_NAME: weather-api-prod +CATALOG_API_CONNECTION: catalog-api-prod +ORDER_API_CONNECTION: order-api-prod + +# Production-specific settings +agentOverrides: + customer-support-agent: + spec: + temperature: 0.5 # More deterministic in production +``` + +### Secrets Management + +Secrets should NOT be in environment files. Instead: + +```yaml +# environments/prod.yaml +# Reference Azure Key Vault or environment variables +FOUNDRY_ENDPOINT: ${AZURE_FOUNDRY_ENDPOINT} # From Key Vault / CI variable +``` + +--- + +## 5. Deployment CLI Tool + +### Proposed CLI Interface + +```bash +# Validate manifests against schema +agent-deploy validate -f agents/ + +# Show what would change (dry run) +agent-deploy plan -f agents/ -e environments/prod.yaml + +# Apply changes +agent-deploy apply -f agents/ -e environments/prod.yaml + +# Show diff between desired and actual state +agent-deploy diff -f agents/ -e environments/prod.yaml + +# Get status of deployed agents +agent-deploy status -e environments/prod.yaml + +# Delete an agent +agent-deploy delete -n customer-support-agent -e environments/prod.yaml + +# Export current agent state to manifest (reverse engineering) +agent-deploy export -n customer-support-agent -e environments/prod.yaml > exported-agent.yaml +``` + +### CLI Output Example + +``` +$ agent-deploy plan -f agents/ -e environments/prod.yaml + +Planning deployment to: https://prod-aiproject.services.ai.azure.com +Environment: prod + +Resolving manifests... + βœ“ agents/customer-support-agent.yaml + βœ“ agents/sales-team/order-agent.yaml + +Comparing desired state vs actual state... + +customer-support-agent: + ~ instructions: (modified - 3 lines changed) + + tools[2]: openapi/weather_api (new tool) + ~ temperature: 0.7 β†’ 0.5 + +order-processing-agent: + (no changes) + +Plan: 1 to update, 0 to create, 1 unchanged + +Run `agent-deploy apply` to execute this plan. +``` + +### Core Implementation Logic (Pseudocode) + +```python +class AgentDeployer: + def __init__(self, endpoint: str, credential): + self.client = AIProjectClient(endpoint=endpoint, credential=credential) + + def plan(self, manifests: List[AgentManifest], env: EnvironmentConfig) -> DeploymentPlan: + plan = DeploymentPlan() + + for manifest in manifests: + resolved = self._resolve_variables(manifest, env) + agent_name = resolved.metadata.name + + # Check if agent exists + existing = self._get_agent_by_name(agent_name) + + if existing is None: + plan.add_create(resolved) + elif self._has_changes(existing, resolved): + plan.add_update(existing.id, resolved, self._calculate_diff(existing, resolved)) + else: + plan.add_unchanged(agent_name) + + return plan + + def apply(self, plan: DeploymentPlan) -> DeploymentResult: + results = [] + + for action in plan.actions: + if action.type == "create": + result = self._create_agent(action.manifest) + elif action.type == "update": + result = self._update_agent(action.agent_id, action.manifest) + results.append(result) + + return DeploymentResult(results) + + def _get_agent_by_name(self, name: str) -> Optional[Agent]: + """Look up agent by name (idempotency key)""" + agents = self.client.agents.list_agents() + for agent in agents.data: + if agent.name == name: + return agent + return None +``` + +--- + +## 6. CI/CD Pipeline Structure + +### GitHub Actions Example + +```yaml +# .github/workflows/deploy-agents.yaml +name: Deploy AI Agents + +on: + push: + branches: [main] + paths: + - 'agents/**' + - 'knowledge/**' + - 'environments/**' + pull_request: + branches: [main] + paths: + - 'agents/**' + - 'knowledge/**' + - 'environments/**' + +jobs: + validate: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install agent-deploy CLI + run: pip install agent-deploy-cli + + - name: Validate manifests + run: agent-deploy validate -f agents/ + + plan-dev: + needs: validate + runs-on: ubuntu-latest + environment: development + steps: + - uses: actions/checkout@v4 + + - name: Azure Login + uses: azure/login@v2 + with: + creds: ${{ secrets.AZURE_CREDENTIALS_DEV }} + + - name: Plan deployment + run: | + agent-deploy plan -f agents/ -e environments/dev.yaml + env: + AZURE_FOUNDRY_ENDPOINT: ${{ secrets.FOUNDRY_ENDPOINT_DEV }} + + deploy-dev: + needs: plan-dev + if: github.event_name == 'push' + runs-on: ubuntu-latest + environment: development + steps: + - uses: actions/checkout@v4 + + - name: Azure Login + uses: azure/login@v2 + with: + creds: ${{ secrets.AZURE_CREDENTIALS_DEV }} + + - name: Deploy to dev + run: | + agent-deploy apply -f agents/ -e environments/dev.yaml + env: + AZURE_FOUNDRY_ENDPOINT: ${{ secrets.FOUNDRY_ENDPOINT_DEV }} + + - name: Run smoke tests + run: | + python tests/smoke_test.py --env dev + + deploy-staging: + needs: deploy-dev + runs-on: ubuntu-latest + environment: staging + steps: + - uses: actions/checkout@v4 + + - name: Azure Login + uses: azure/login@v2 + with: + creds: ${{ secrets.AZURE_CREDENTIALS_STAGING }} + + - name: Deploy to staging + run: | + agent-deploy apply -f agents/ -e environments/staging.yaml + + - name: Run integration tests + run: | + python tests/integration_test.py --env staging + + deploy-prod: + needs: deploy-staging + runs-on: ubuntu-latest + environment: + name: production + # Manual approval required + steps: + - uses: actions/checkout@v4 + + - name: Azure Login + uses: azure/login@v2 + with: + creds: ${{ secrets.AZURE_CREDENTIALS_PROD }} + + - name: Deploy to production + run: | + agent-deploy apply -f agents/ -e environments/prod.yaml + + - name: Smoke test + run: | + python tests/smoke_test.py --env prod + + - name: Tag release + run: | + git tag "agents-$(date +%Y%m%d-%H%M%S)" + git push --tags +``` + +### Pipeline Flow Diagram + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ PR Created │────▢│ Validate + │────▢│ PR Merged β”‚ +β”‚ β”‚ β”‚ Plan (dev) β”‚ β”‚ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Production │◀────│ Staging │◀────│ Development β”‚ +β”‚ (manual gate) β”‚ β”‚ + Int Tests β”‚ β”‚ + Smoke Tests β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ β”‚ β”‚ + β–Ό β–Ό β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ AI Foundry Prod β”‚ β”‚AI Foundry Stage β”‚ β”‚ AI Foundry Dev β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +--- + +## 7. Testing Strategy + +### Test Levels + +| Level | When | What | +|-------|------|------| +| **Schema Validation** | PR / Pre-deploy | Manifest structure is valid | +| **Dry Run** | PR / Pre-deploy | Plan shows expected changes | +| **Smoke Tests** | Post-deploy | Agent responds to basic prompts | +| **Integration Tests** | Post-deploy (staging) | Tools work correctly, end-to-end flows | +| **Evaluation Tests** | Scheduled / Release | Quality metrics (groundedness, coherence, etc.) | + +### Smoke Test Example + +```python +# tests/smoke_test.py +import os +from azure.ai.projects import AIProjectClient +from azure.identity import DefaultAzureCredential + +def test_agent_responds(agent_name: str, endpoint: str): + """Verify agent can receive a message and respond""" + client = AIProjectClient(endpoint=endpoint, credential=DefaultAzureCredential()) + + # Find agent by name + agents = client.agents.list_agents() + agent = next((a for a in agents.data if a.name == agent_name), None) + assert agent is not None, f"Agent {agent_name} not found" + + # Create thread and send test message + thread = client.agents.threads.create() + client.agents.messages.create( + thread_id=thread.id, + role="user", + content="Hello, are you working?" + ) + + run = client.agents.runs.create_and_process( + thread_id=thread.id, + agent_id=agent.id + ) + + assert run.status == "completed", f"Run failed: {run.last_error}" + + messages = client.agents.messages.list(thread_id=thread.id) + assistant_messages = [m for m in messages if m.role == "assistant"] + assert len(assistant_messages) > 0, "No assistant response" + + print(f"βœ“ Agent {agent_name} smoke test passed") + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--env", required=True) + args = parser.parse_args() + + # Load environment config and run tests + endpoint = os.environ["AZURE_FOUNDRY_ENDPOINT"] + test_agent_responds("customer-support-agent", endpoint) +``` + +### Integration Test Example + +```python +# tests/integration_test.py +def test_file_search_tool_works(agent_name: str, endpoint: str): + """Verify agent can search uploaded knowledge base""" + client = AIProjectClient(endpoint=endpoint, credential=DefaultAzureCredential()) + + agent = get_agent_by_name(client, agent_name) + thread = client.agents.threads.create() + + # Ask a question that requires knowledge base + client.agents.messages.create( + thread_id=thread.id, + role="user", + content="What is the return policy for Contoso products?" + ) + + run = client.agents.runs.create_and_process( + thread_id=thread.id, + agent_id=agent.id + ) + + assert run.status == "completed" + + # Verify file_search tool was used + run_steps = client.agents.run_steps.list(thread_id=thread.id, run_id=run.id) + tool_calls = [s for s in run_steps if s.type == "tool_calls"] + file_search_calls = [ + tc for step in tool_calls + for tc in step.step_details.get("tool_calls", []) + if tc.get("type") == "file_search" + ] + + assert len(file_search_calls) > 0, "File search tool was not used" + print(f"βœ“ File search integration test passed") +``` + +--- + +## 8. Key Design Decisions + +### Idempotency Strategy: Name-Based Lookup + +| Approach | Pros | Cons | +|----------|------|------| +| **Name-based lookup** βœ… | Simple, no state file, preserves agent ID | Need consistent naming | +| Version-based (v1, v2, v3) | Clear history, easy rollback | Accumulates old agents | +| ID tracking (state file) | Explicit, Terraform-like | Requires state management | + +**Recommendation:** Use **name-based lookup with update** - the agent name serves as the idempotency key. The deployment tool looks up agents by name and creates or updates accordingly. + +### What Gets Version Controlled + +| Asset | Version Control? | Notes | +|-------|------------------|-------| +| Agent manifests (YAML) | βœ… Yes | Core configuration | +| Knowledge base files | βœ… Yes | Or reference external blob storage | +| Connection names (refs) | βœ… Yes | Actual connections created via IaC | +| OpenAPI specs | βœ… Yes | Define API tool interfaces | +| Agent IDs | ❌ No | Generated at runtime | +| API keys / secrets | ❌ No | From Key Vault / CI secrets | +| Vector store IDs | ❌ No | Created dynamically | + +### Handling Vector Stores and Files + +**Option A: Recreate on Deploy (Simple)** +``` +Deploy β†’ Upload files β†’ Create new vector store β†’ Attach to agent +``` +- Simple but may cause brief inconsistency during deployment + +**Option B: Incremental Updates (Complex)** +``` +Deploy β†’ Diff files β†’ Upload only new/changed β†’ Update vector store +``` +- More efficient but requires content hashing and tracking + +**Recommendation:** Start with Option A; optimize to Option B later if file uploads become slow. + +--- + +## 9. Infrastructure vs Application Layer + +Clear separation of concerns: + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Infrastructure Layer (Bicep / Terraform) β”‚ +β”‚ β”‚ +β”‚ Resources (deployed once, shared across agents): β”‚ +β”‚ β€’ Azure AI Foundry Project (Hub + Project) β”‚ +β”‚ β€’ Azure API Management (APIM) β”‚ +β”‚ β€’ Connections (Bing, OpenAPI endpoints, API keys) β”‚ +β”‚ β€’ Azure Key Vault β”‚ +β”‚ β€’ Storage Account (for knowledge base files) β”‚ +β”‚ β€’ Log Analytics Workspace β”‚ +β”‚ β€’ Application Insights β”‚ +β”‚ β”‚ +β”‚ Deployment: IaC pipelines (separate from agent deployment) β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β”‚ References (connection names, endpoints) + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Application Layer (Manifest Deployment) β”‚ +β”‚ β”‚ +β”‚ Resources (deployed per agent, frequent changes): β”‚ +β”‚ β€’ Agent definitions (name, instructions, model, tools) β”‚ +β”‚ β€’ Vector stores β”‚ +β”‚ β€’ Uploaded files (knowledge base content) β”‚ +β”‚ β€’ Tool configurations β”‚ +β”‚ β”‚ +β”‚ Deployment: agent-deploy CLI (this approach) β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### Why This Separation? + +- **Infrastructure changes are slow** - APIM can take 30+ minutes to deploy +- **Agent changes are fast** - Agent updates take seconds +- **Different change frequency** - Infrastructure is stable; agents evolve rapidly +- **Different approval processes** - Infrastructure may need CAB approval; agent instructions can be team-owned + +--- + +## 10. Alternative Approaches Considered + +| Approach | Why It Was Not Chosen | +|----------|----------------------| +| **Pure ARM/Bicep** | Agents aren't first-class ARM resources - would need custom deployment scripts anyway | +| **Terraform only** | Same limitation - no native azurerm_ai_agent resource | +| **SDK scripts per agent** | Not declarative, hard to review, environment handling is messy | +| **Portal-only** | No version control, no reproducibility, no CI/CD, no audit trail | +| **Azure DevOps Pipelines** | Works fine, but GitHub Actions example is more portable | + +### What About Azure's Native Tooling? + +As of this writing, Azure doesn't provide a first-class declarative deployment mechanism for AI Foundry agents. This approach fills that gap. If Microsoft releases an official tool (e.g., `az ai agent deploy`), this pattern could adapt to use it under the hood. + +--- + +## 11. Implementation Roadmap + +### Phase 1: MVP (1-2 weeks) + +- [ ] Define final manifest schema (YAML) +- [ ] Build basic CLI with `validate`, `plan`, `apply` commands +- [ ] Implement name-based agent lookup (idempotency) +- [ ] Support basic tools: Bing, file_search, OpenAPI +- [ ] Environment variable substitution +- [ ] Basic smoke test framework + +### Phase 2: Production Hardening (2-3 weeks) + +- [ ] Add `diff` and `status` commands +- [ ] Implement `export` (reverse engineer existing agent to manifest) +- [ ] Add JSON Schema validation for manifests +- [ ] Detailed deployment logs and error handling +- [ ] Support for agent metadata and labels +- [ ] Vector store file diffing (only upload changed files) + +### Phase 3: Advanced Features (3-4 weeks) + +- [ ] Support for `agentOverrides` in environment files +- [ ] Rollback command (deploy previous Git commit's manifests) +- [ ] Agent versioning (maintain history within the tool) +- [ ] Integration with Azure AI Evaluation SDK for quality gates +- [ ] Support for namespace/folder organization +- [ ] Helm-like templating (optional) + +### Phase 4: Ecosystem (Ongoing) + +- [ ] GitHub Action: `agent-deploy-action` +- [ ] Azure DevOps Task +- [ ] VS Code extension for manifest authoring +- [ ] Documentation and examples + +--- + +## What This Enables + +| Capability | Benefit | +|------------|---------| +| **Code review for agent changes** | PR shows exactly what's changing in the manifest | +| **Audit trail** | Git history shows who changed what and when | +| **Rollback** | Revert to previous manifest and redeploy | +| **Environment parity** | Same manifest structure, different variable values | +| **Parallel development** | Multiple teams can own different agent manifests | +| **Compliance** | Track all agent changes for regulatory requirements | +| **Disaster recovery** | Rebuild entire agent fleet from manifests | + +--- + +## Repository Structure Example + +``` +my-agents-repo/ +β”œβ”€β”€ .github/ +β”‚ └── workflows/ +β”‚ └── deploy-agents.yaml +β”œβ”€β”€ agents/ +β”‚ β”œβ”€β”€ customer-support-agent.yaml +β”‚ └── sales-team/ +β”‚ β”œβ”€β”€ order-agent.yaml +β”‚ └── inventory-agent.yaml +β”œβ”€β”€ environments/ +β”‚ β”œβ”€β”€ dev.yaml +β”‚ β”œβ”€β”€ staging.yaml +β”‚ └── prod.yaml +β”œβ”€β”€ knowledge/ +β”‚ β”œβ”€β”€ product_catalog.md +β”‚ β”œβ”€β”€ faq.md +β”‚ └── troubleshooting_guide.md +β”œβ”€β”€ openapi/ +β”‚ β”œβ”€β”€ weather-openapi.json +β”‚ β”œβ”€β”€ catalog-openapi.json +β”‚ └── order-openapi.json +β”œβ”€β”€ tests/ +β”‚ β”œβ”€β”€ smoke_test.py +β”‚ └── integration_test.py +β”œβ”€β”€ schemas/ +β”‚ └── agent-manifest-schema.yaml +└── README.md +``` + +--- + +## Next Steps + +1. **Review this approach** and provide feedback +2. **Decide on schema details** (finalize manifest format) +3. **Build MVP CLI** as a proof of concept +4. **Test with real agents** in a development environment +5. **Iterate based on learnings** + +--- + +*Document created: February 2026* +*Last updated: February 2026* diff --git a/labs/ai-agent-service/UPDATING-AGENTS.md b/labs/ai-agent-service/UPDATING-AGENTS.md new file mode 100644 index 00000000..d68a34a7 --- /dev/null +++ b/labs/ai-agent-service/UPDATING-AGENTS.md @@ -0,0 +1,625 @@ +# Updating and Modifying Existing Azure AI Agents + +This document explains how to update existing agents using the Azure AI Foundry SDK - including adding tools, modifying instructions, attaching knowledge bases (vector stores), and managing agent configurations. + +--- + +## Table of Contents + +1. [Overview](#1-overview) +2. [The update_agent Method](#2-the-update_agent-method) +3. [Listing and Retrieving Existing Agents](#3-listing-and-retrieving-existing-agents) +4. [Adding Tools to an Existing Agent](#4-adding-tools-to-an-existing-agent) +5. [Adding a Knowledge Base (Vector Store)](#5-adding-a-knowledge-base-vector-store) +6. [Modifying Agent Instructions and Settings](#6-modifying-agent-instructions-and-settings) +7. [Removing Tools or Resources](#7-removing-tools-or-resources) +8. [Complete Example: Full Agent Modification Workflow](#8-complete-example-full-agent-modification-workflow) + +--- + +## 1. Overview + +Unlike the `create_agent` method used in the lab notebooks, the Azure AI Foundry SDK also provides an `update_agent` method that allows you to modify an existing agent without recreating it. This is useful for: + +- **Adding new tools** to extend agent capabilities +- **Attaching knowledge bases** (vector stores) for file search +- **Updating instructions** to change behavior +- **Adjusting model parameters** like temperature +- **Adding or updating metadata** + +### Key Concept: Agents are Persistent + +When you create an agent, it persists in your AI Foundry project until deleted. The ID returned from `create_agent` can be stored and used later to: +- Retrieve the agent with `get_agent` +- Modify it with `update_agent` +- Delete it with `delete_agent` + +--- + +## 2. The update_agent Method + +### Method Signature + +```python +update_agent( + agent_id: str, # Required: The ID of the agent to modify + *, + model: str | None = None, # Change the model deployment + name: str | None = None, # Change the agent's name + description: str | None = None, # Change the description + instructions: str | None = None, # Change system instructions + tools: List[ToolDefinition] | None = None, # Replace/update tools + tool_resources: ToolResources | None = None, # Update tool resources (e.g., vector stores) + temperature: float | None = None, # Adjust creativity (0-2) + top_p: float | None = None, # Nucleus sampling parameter + response_format: AgentsResponseFormatOption | None = None, # Output format + metadata: Dict[str, str] | None = None, # Key/value metadata + **kwargs +) -> Agent +``` + +### Important Notes + +- **Only pass parameters you want to change** - omitted parameters keep their current values +- **Tools are replaced, not merged** - if you update tools, you must include ALL tools the agent should have +- **Tool resources follow the same pattern** - include all vector stores when updating + +--- + +## 3. Listing and Retrieving Existing Agents + +### List All Agents + +```python +from azure.ai.projects import AIProjectClient +from azure.identity import DefaultAzureCredential + +project_client = AIProjectClient( + endpoint=foundry_project_endpoint, + credential=DefaultAzureCredential() +) + +with project_client: + agents_client = project_client.agents + + # List all agents in the project + agents = agents_client.list_agents() + + for agent in agents.data: + print(f"Agent ID: {agent.id}") + print(f" Name: {agent.name}") + print(f" Model: {agent.model}") + print(f" Instructions: {agent.instructions[:50]}...") + print(f" Tools: {[t['type'] for t in agent.tools]}") + print() +``` + +### Retrieve a Specific Agent + +```python +# Using agent ID stored from creation +agent_id = "asst_abc123xyz" + +with project_client: + agents_client = project_client.agents + + # Get the agent by ID + agent = agents_client.get_agent(agent_id=agent_id) + + print(f"Retrieved agent: {agent.name}") + print(f"Current instructions: {agent.instructions}") + print(f"Current tools: {agent.tools}") + print(f"Current tool_resources: {agent.tool_resources}") +``` + +--- + +## 4. Adding Tools to an Existing Agent + +### Example: Add Bing Grounding to an Existing Agent + +```python +from azure.ai.projects import AIProjectClient +from azure.identity import DefaultAzureCredential +from azure.ai.agents.models import BingGroundingTool + +project_client = AIProjectClient( + endpoint=foundry_project_endpoint, + credential=DefaultAzureCredential() +) + +# Assume we have an existing agent +existing_agent_id = "asst_abc123xyz" + +with project_client: + agents_client = project_client.agents + + # First, retrieve the existing agent to see current tools + agent = agents_client.get_agent(agent_id=existing_agent_id) + current_tools = agent.tools or [] + + print(f"Current tools: {current_tools}") + + # Get Bing connection + bing_connection = project_client.connections.get(name='bingSearch-connection') + + # Create Bing tool + bing = BingGroundingTool(connection_id=bing_connection.id) + + # Combine existing tools with new tool + # Note: bing.definitions returns a list of tool definitions + combined_tools = current_tools + bing.definitions + + # Update the agent with combined tools + updated_agent = agents_client.update_agent( + agent_id=existing_agent_id, + tools=combined_tools + ) + + print(f"Updated agent tools: {[t['type'] for t in updated_agent.tools]}") +``` + +### Example: Add OpenAPI Tool to an Existing Agent + +```python +import jsonref +from azure.ai.agents.models import ( + OpenApiTool, + OpenApiConnectionAuthDetails, + OpenApiConnectionSecurityScheme +) + +with project_client: + agents_client = project_client.agents + + # Get existing agent + agent = agents_client.get_agent(agent_id=existing_agent_id) + current_tools = agent.tools or [] + + # Load OpenAPI spec + with open("./weather-openapi.json", "r") as f: + openapi_spec = jsonref.loads(f.read()) + + # Create OpenAPI tool + weather_tool = OpenApiTool( + name="get_weather", + spec=openapi_spec, + description="Retrieve weather information", + auth=OpenApiConnectionAuthDetails( + security_scheme=OpenApiConnectionSecurityScheme( + connection_id=weather_api_connection_id + ) + ) + ) + + # Combine with existing tools + combined_tools = current_tools + weather_tool.definitions + + # Update agent + updated_agent = agents_client.update_agent( + agent_id=existing_agent_id, + tools=combined_tools + ) + + print(f"Agent now has {len(updated_agent.tools)} tools") +``` + +--- + +## 5. Adding a Knowledge Base (Vector Store) + +### Step 1: Upload Files and Create Vector Store + +```python +from azure.ai.agents.models import FilePurpose, FileSearchTool + +with project_client: + agents_client = project_client.agents + + # Upload a file + file = agents_client.files.upload_and_poll( + file_path="./product_documentation.md", + purpose=FilePurpose.AGENTS + ) + print(f"Uploaded file, ID: {file.id}") + + # Create vector store with the file + vector_store = agents_client.vector_stores.create_and_poll( + file_ids=[file.id], + name="product_knowledge_base" + ) + print(f"Created vector store, ID: {vector_store.id}") +``` + +### Step 2: Add File Search Tool to Existing Agent + +```python +with project_client: + agents_client = project_client.agents + + # Get existing agent + agent = agents_client.get_agent(agent_id=existing_agent_id) + current_tools = agent.tools or [] + current_tool_resources = agent.tool_resources or {} + + # Create file search tool + file_search = FileSearchTool(vector_store_ids=[vector_store.id]) + + # Combine tools + combined_tools = current_tools + file_search.definitions + + # Merge tool_resources (file_search.resources contains vector store references) + # file_search.resources typically looks like: {"file_search": {"vector_store_ids": [...]}} + merged_resources = {**current_tool_resources, **file_search.resources} + + # Update the agent + updated_agent = agents_client.update_agent( + agent_id=existing_agent_id, + tools=combined_tools, + tool_resources=merged_resources + ) + + print(f"Agent updated with file search capability") + print(f"Tool resources: {updated_agent.tool_resources}") +``` + +### Adding Files to an Existing Vector Store + +If you already have a vector store attached to an agent and want to add more files: + +```python +with project_client: + agents_client = project_client.agents + + # Upload additional file + new_file = agents_client.files.upload_and_poll( + file_path="./additional_docs.pdf", + purpose=FilePurpose.AGENTS + ) + + # Add file to existing vector store using batch + file_batch = agents_client.vector_store_file_batches.create_and_poll( + vector_store_id=existing_vector_store_id, + file_ids=[new_file.id] + ) + + print(f"Added file to vector store: {file_batch.id}") + # No agent update needed - the vector store is already attached! +``` + +--- + +## 6. Modifying Agent Instructions and Settings + +### Update Instructions + +```python +with project_client: + agents_client = project_client.agents + + # Update only the instructions + updated_agent = agents_client.update_agent( + agent_id=existing_agent_id, + instructions="""You are an expert customer support agent. + Always be polite and professional. + Use the knowledge base to answer product questions. + If you don't know the answer, say so honestly.""" + ) + + print(f"Updated instructions: {updated_agent.instructions}") +``` + +### Update Temperature and Sampling + +```python +with project_client: + agents_client = project_client.agents + + # Make the agent more creative + updated_agent = agents_client.update_agent( + agent_id=existing_agent_id, + temperature=0.8, # Higher = more creative (default is often 1.0) + top_p=0.95 # Nucleus sampling + ) + + print(f"Updated temperature: {updated_agent.temperature}") +``` + +### Update Name and Description + +```python +with project_client: + agents_client = project_client.agents + + updated_agent = agents_client.update_agent( + agent_id=existing_agent_id, + name="customer-support-agent-v2", + description="Enhanced customer support agent with knowledge base access" + ) + + print(f"Updated name: {updated_agent.name}") +``` + +### Update Metadata + +```python +with project_client: + agents_client = project_client.agents + + # Add tracking metadata + updated_agent = agents_client.update_agent( + agent_id=existing_agent_id, + metadata={ + "version": "2.0", + "last_updated": "2026-02-11", + "owner": "support-team", + "environment": "production" + } + ) + + print(f"Updated metadata: {updated_agent.metadata}") +``` + +--- + +## 7. Removing Tools or Resources + +### Remove a Specific Tool + +Since tools are replaced entirely, you need to filter out the tool you want to remove: + +```python +with project_client: + agents_client = project_client.agents + + # Get current agent + agent = agents_client.get_agent(agent_id=existing_agent_id) + current_tools = agent.tools or [] + + # Filter out the tool you want to remove (e.g., remove Bing grounding) + tools_without_bing = [ + tool for tool in current_tools + if tool.get('type') != 'bing_grounding' + ] + + # Update agent with remaining tools + updated_agent = agents_client.update_agent( + agent_id=existing_agent_id, + tools=tools_without_bing + ) + + print(f"Agent now has {len(updated_agent.tools)} tools") +``` + +### Remove a Vector Store from File Search + +```python +from azure.ai.agents.models import FileSearchTool + +with project_client: + agents_client = project_client.agents + + # Get current agent + agent = agents_client.get_agent(agent_id=existing_agent_id) + + # Get current vector store IDs + current_vector_stores = agent.tool_resources.get('file_search', {}).get('vector_store_ids', []) + + # Remove the specific vector store + vector_store_to_remove = "vs_abc123" + updated_vector_stores = [vs for vs in current_vector_stores if vs != vector_store_to_remove] + + # Create new file search tool with remaining vector stores + file_search = FileSearchTool(vector_store_ids=updated_vector_stores) + + # Filter tools to update file_search definition + other_tools = [t for t in agent.tools if t.get('type') != 'file_search'] + combined_tools = other_tools + file_search.definitions + + # Update agent + updated_agent = agents_client.update_agent( + agent_id=existing_agent_id, + tools=combined_tools, + tool_resources=file_search.resources + ) + + print(f"Removed vector store from agent") +``` + +### Using Helper Methods (if available in FileSearchTool) + +Some SDK versions include helper methods: + +```python +# Add vector store to file search tool +file_search_tool.add_vector_store(new_vector_store.id) + +# Remove vector store from file search tool +file_search_tool.remove_vector_store(vector_store_to_remove_id) + +# Then update the agent +agents_client.update_agent( + agent_id=agent.id, + tools=file_search_tool.definitions, + tool_resources=file_search_tool.resources +) +``` + +--- + +## 8. Complete Example: Full Agent Modification Workflow + +Here's a complete example that demonstrates the full workflow of creating an agent, then later updating it: + +```python +import os +from azure.ai.projects import AIProjectClient +from azure.identity import DefaultAzureCredential +from azure.ai.agents.models import ( + BingGroundingTool, + FileSearchTool, + FilePurpose +) + +# Initialize client +project_client = AIProjectClient( + endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"], + credential=DefaultAzureCredential() +) + +with project_client: + agents_client = project_client.agents + + # ======================================== + # PHASE 1: Create a basic agent + # ======================================== + + print("=== Phase 1: Creating basic agent ===") + + agent = agents_client.create_agent( + model=os.environ["MODEL_DEPLOYMENT_NAME"], + name="my-support-agent", + instructions="You are a helpful support agent." + ) + print(f"Created agent: {agent.id}") + + # Store the agent ID (in real scenario, persist this) + agent_id = agent.id + + # ======================================== + # PHASE 2: Later, add Bing grounding + # ======================================== + + print("\n=== Phase 2: Adding Bing grounding ===") + + # Retrieve the agent + agent = agents_client.get_agent(agent_id=agent_id) + + # Get Bing connection and create tool + bing_connection = project_client.connections.get(name='bingSearch-connection') + bing_tool = BingGroundingTool(connection_id=bing_connection.id) + + # Update with Bing tool + agent = agents_client.update_agent( + agent_id=agent_id, + tools=bing_tool.definitions, + instructions="You are a helpful support agent. Use Bing to search for current information when needed." + ) + print(f"Added Bing grounding. Tools: {[t['type'] for t in agent.tools]}") + + # ======================================== + # PHASE 3: Add a knowledge base + # ======================================== + + print("\n=== Phase 3: Adding knowledge base ===") + + # Upload documentation file + file = agents_client.files.upload_and_poll( + file_path="./product_docs.md", + purpose=FilePurpose.AGENTS + ) + print(f"Uploaded file: {file.id}") + + # Create vector store + vector_store = agents_client.vector_stores.create_and_poll( + file_ids=[file.id], + name="product-knowledge-base" + ) + print(f"Created vector store: {vector_store.id}") + + # Create file search tool + file_search = FileSearchTool(vector_store_ids=[vector_store.id]) + + # Combine with existing tools + agent = agents_client.get_agent(agent_id=agent_id) + combined_tools = agent.tools + file_search.definitions + + # Update agent + agent = agents_client.update_agent( + agent_id=agent_id, + tools=combined_tools, + tool_resources=file_search.resources, + instructions="""You are a helpful support agent with access to: + 1. Bing search for current information + 2. Product documentation knowledge base + + Always check the knowledge base first for product questions. + Use Bing for current events or information not in the knowledge base.""" + ) + print(f"Added file search. Tools: {[t['type'] for t in agent.tools]}") + + # ======================================== + # PHASE 4: Test the updated agent + # ======================================== + + print("\n=== Phase 4: Testing updated agent ===") + + thread = agents_client.threads.create() + message = agents_client.messages.create( + thread_id=thread.id, + role="user", + content="What features does our product have?" + ) + + run = agents_client.runs.create_and_process( + thread_id=thread.id, + agent_id=agent_id + ) + + print(f"Run status: {run.status}") + + if run.status == "completed": + messages = agents_client.messages.list(thread_id=thread.id) + for msg in messages: + if msg.role == "assistant" and msg.text_messages: + print(f"Agent response: {msg.text_messages[-1].text.value[:200]}...") + + # ======================================== + # PHASE 5: Cleanup (optional) + # ======================================== + + print("\n=== Phase 5: Cleanup ===") + + # To keep the agent for later use, skip deletion + # agents_client.delete_agent(agent_id) + # agents_client.vector_stores.delete(vector_store.id) + # agents_client.files.delete(file.id) + + print(f"Agent {agent_id} is ready for production use!") +``` + +--- + +## Summary + +### Key Patterns for Updating Agents + +| Task | Pattern | +|------|---------| +| Add tool | Get current tools β†’ Combine with new β†’ `update_agent(tools=combined)` | +| Add knowledge base | Create vector store β†’ Create FileSearchTool β†’ `update_agent(tools=..., tool_resources=...)` | +| Update instructions | `update_agent(instructions="...")` | +| Change model params | `update_agent(temperature=..., top_p=...)` | +| Remove tool | Filter out tool from list β†’ `update_agent(tools=filtered)` | + +### Best Practices + +1. **Always retrieve before updating** - Get current agent state to avoid losing existing configuration +2. **Merge tools carefully** - Tools are replaced, not merged automatically +3. **Test after updates** - Run a test conversation to verify changes work correctly +4. **Use metadata** - Track versions and changes in the metadata field +5. **Store agent IDs** - Persist agent IDs for later retrieval and updates + +### Required Packages + +```bash +pip install azure-ai-projects azure-ai-agents azure-identity +``` + +--- + +## Additional Resources + +- [AgentsClient API Reference](https://learn.microsoft.com/python/api/azure-ai-agents/azure.ai.agents.agentsclient) +- [File Search Tool Documentation](https://learn.microsoft.com/azure/ai-foundry/agents/how-to/tools/file-search) +- [Azure AI Agents SDK Overview](https://learn.microsoft.com/python/api/overview/azure/ai-agents-readme) From a80434971f322e6808d366655893ef6a2d834e99 Mon Sep 17 00:00:00 2001 From: saldanaj Date: Tue, 17 Feb 2026 22:00:42 +0000 Subject: [PATCH 4/5] adding model deployment markdown --- MODEL-DEPLOYMENT-APPROACH.md | 846 +++++++++++++++++++++++++++++++++++ 1 file changed, 846 insertions(+) create mode 100644 MODEL-DEPLOYMENT-APPROACH.md diff --git a/MODEL-DEPLOYMENT-APPROACH.md b/MODEL-DEPLOYMENT-APPROACH.md new file mode 100644 index 00000000..6be05bea --- /dev/null +++ b/MODEL-DEPLOYMENT-APPROACH.md @@ -0,0 +1,846 @@ +# Model Deployment Approach for Azure AI Foundry + +This document outlines the recommended approach for deploying, modifying, and deleting model deployments in Azure AI Foundry, and how this differs from agent deployment. + +--- + +## Table of Contents + +1. [Model Deployments vs Agents](#1-model-deployments-vs-agents) +2. [Recommendation: IaC for Model Deployments](#2-recommendation-iac-for-model-deployments) +3. [Bicep Approach (Recommended)](#3-bicep-approach-recommended) +4. [Terraform Approach](#4-terraform-approach) +5. [Python SDK Approach](#5-python-sdk-approach) +6. [Azure CLI Approach](#6-azure-cli-approach) +7. [Layered Deployment Architecture](#7-layered-deployment-architecture) +8. [Two-Pipeline Strategy](#8-two-pipeline-strategy) +9. [Common Operations](#9-common-operations) +10. [Summary](#10-summary) + +--- + +## 1. Model Deployments vs Agents + +Model deployments and agents are fundamentally different types of resources: + +| Aspect | Model Deployments | Agents | +|--------|-------------------|--------| +| **What they are** | Infrastructure resources (compute, endpoints) | Application configurations | +| **Change frequency** | Infrequent (weeks/months) | Frequent (days/weeks) | +| **Azure Resource** | βœ… Yes - first-class ARM resource | ❌ No - not an ARM resource | +| **Bicep/Terraform support** | βœ… Native support | ❌ Not supported | +| **Deployment time** | Minutes (provisioning compute) | Seconds | +| **Shared across** | Multiple agents | Single agent | +| **Requires quota** | βœ… Yes - TPM/RPM limits | ❌ No | +| **Cost implications** | βœ… Direct cost based on capacity | ❌ No direct cost | + +### Key Insight + +> **Model deployments are infrastructure; agents are applications.** +> +> They should be deployed accordingly with separate pipelines and tooling. + +--- + +## 2. Recommendation: IaC for Model Deployments + +Model deployments should be managed via **Bicep or Terraform**, not Python SDK or manifest approach, because: + +1. **They're real Azure resources** with proper resource types: + - `Microsoft.CognitiveServices/accounts/deployments` + - `Microsoft.MachineLearningServices/workspaces/deployments` + +2. **They integrate with Azure resource management**: + - Quotas and capacity limits + - Role-based access control (RBAC) + - Cost management and billing + - Azure Policy compliance + +3. **They change infrequently**: + - Model version upgrades are planned events + - Capacity changes require quota approval + - New model types require evaluation + +4. **They're shared infrastructure**: + - Multiple agents reference the same deployment + - Changes affect all dependent agents + +--- + +## 3. Bicep Approach (Recommended) + +### Basic Model Deployment + +```bicep +// modules/model-deployments.bicep + +@description('Name of the Azure AI Services account') +param aiServicesName string + +@description('Model deployments to create') +param modelDeployments array = [ + { + name: 'gpt-4o' + model: 'gpt-4o' + version: '2024-08-06' + sku: 'GlobalStandard' + capacity: 50 + } + { + name: 'gpt-4o-mini' + model: 'gpt-4o-mini' + version: '2024-07-18' + sku: 'GlobalStandard' + capacity: 100 + } +] + +resource aiServices 'Microsoft.CognitiveServices/accounts@2024-10-01' existing = { + name: aiServicesName +} + +resource deployments 'Microsoft.CognitiveServices/accounts/deployments@2024-10-01' = [for deployment in modelDeployments: { + parent: aiServices + name: deployment.name + sku: { + name: deployment.sku + capacity: deployment.capacity + } + properties: { + model: { + format: 'OpenAI' + name: deployment.model + version: deployment.version + } + versionUpgradeOption: 'OnceNewDefaultVersionAvailable' + raiPolicyName: 'Microsoft.Default' + } +}] + +output deploymentNames array = [for (deployment, i) in modelDeployments: deployments[i].name] +``` + +### Complete AI Services with Deployments + +```bicep +// main.bicep + +@description('Location for all resources') +param location string = resourceGroup().location + +@description('Environment name') +@allowed(['dev', 'staging', 'prod']) +param environment string + +@description('Model configuration') +param modelsConfig array = [ + { + name: 'gpt-4o' + model: 'gpt-4o' + version: '2024-08-06' + sku: 'GlobalStandard' + capacity: environment == 'prod' ? 100 : 30 + } + { + name: 'gpt-4o-mini' + model: 'gpt-4o-mini' + version: '2024-07-18' + sku: 'GlobalStandard' + capacity: environment == 'prod' ? 200 : 50 + } +] + +// AI Services Account +resource aiServices 'Microsoft.CognitiveServices/accounts@2024-10-01' = { + name: 'ai-services-${environment}-${uniqueString(resourceGroup().id)}' + location: location + kind: 'AIServices' + sku: { + name: 'S0' + } + properties: { + customSubDomainName: 'ai-services-${environment}-${uniqueString(resourceGroup().id)}' + publicNetworkAccess: 'Enabled' + } +} + +// Model Deployments +resource modelDeployments 'Microsoft.CognitiveServices/accounts/deployments@2024-10-01' = [for model in modelsConfig: { + parent: aiServices + name: model.name + sku: { + name: model.sku + capacity: model.capacity + } + properties: { + model: { + format: 'OpenAI' + name: model.model + version: model.version + } + versionUpgradeOption: 'OnceNewDefaultVersionAvailable' + raiPolicyName: 'Microsoft.Default' + } +}] + +// Outputs for downstream use (agent deployments) +output aiServicesEndpoint string = aiServices.properties.endpoint +output aiServicesName string = aiServices.name +output modelDeploymentNames array = [for (model, i) in modelsConfig: modelDeployments[i].name] +``` + +### Environment-Specific Parameters + +```json +// parameters/dev.parameters.json +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "environment": { "value": "dev" }, + "modelsConfig": { + "value": [ + { + "name": "gpt-4o", + "model": "gpt-4o", + "version": "2024-08-06", + "sku": "GlobalStandard", + "capacity": 30 + } + ] + } + } +} +``` + +```json +// parameters/prod.parameters.json +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "environment": { "value": "prod" }, + "modelsConfig": { + "value": [ + { + "name": "gpt-4o", + "model": "gpt-4o", + "version": "2024-08-06", + "sku": "GlobalStandard", + "capacity": 100 + }, + { + "name": "gpt-4o-mini", + "model": "gpt-4o-mini", + "version": "2024-07-18", + "sku": "GlobalStandard", + "capacity": 200 + } + ] + } + } +} +``` + +--- + +## 4. Terraform Approach + +### Using azurerm Provider + +```hcl +# providers.tf +terraform { + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 3.100" + } + } +} + +provider "azurerm" { + features {} +} +``` + +```hcl +# variables.tf +variable "environment" { + type = string + description = "Environment name (dev, staging, prod)" +} + +variable "location" { + type = string + default = "eastus2" +} + +variable "model_deployments" { + type = list(object({ + name = string + model = string + version = string + sku = string + capacity = number + })) + description = "List of model deployments to create" +} +``` + +```hcl +# main.tf +resource "azurerm_resource_group" "main" { + name = "rg-ai-${var.environment}" + location = var.location +} + +resource "azurerm_cognitive_account" "ai_services" { + name = "ai-services-${var.environment}-${random_string.suffix.result}" + location = azurerm_resource_group.main.location + resource_group_name = azurerm_resource_group.main.name + kind = "AIServices" + sku_name = "S0" + custom_subdomain_name = "ai-services-${var.environment}-${random_string.suffix.result}" + + tags = { + environment = var.environment + } +} + +resource "azurerm_cognitive_deployment" "models" { + for_each = { for model in var.model_deployments : model.name => model } + + name = each.value.name + cognitive_account_id = azurerm_cognitive_account.ai_services.id + + model { + format = "OpenAI" + name = each.value.model + version = each.value.version + } + + sku { + name = each.value.sku + capacity = each.value.capacity + } +} + +resource "random_string" "suffix" { + length = 8 + special = false + upper = false +} +``` + +```hcl +# outputs.tf +output "ai_services_endpoint" { + value = azurerm_cognitive_account.ai_services.endpoint +} + +output "model_deployment_names" { + value = [for d in azurerm_cognitive_deployment.models : d.name] +} +``` + +### Environment-Specific tfvars + +```hcl +# environments/dev.tfvars +environment = "dev" +location = "eastus2" + +model_deployments = [ + { + name = "gpt-4o" + model = "gpt-4o" + version = "2024-08-06" + sku = "GlobalStandard" + capacity = 30 + } +] +``` + +```hcl +# environments/prod.tfvars +environment = "prod" +location = "eastus2" + +model_deployments = [ + { + name = "gpt-4o" + model = "gpt-4o" + version = "2024-08-06" + sku = "GlobalStandard" + capacity = 100 + }, + { + name = "gpt-4o-mini" + model = "gpt-4o-mini" + version = "2024-07-18" + sku = "GlobalStandard" + capacity = 200 + } +] +``` + +--- + +## 5. Python SDK Approach + +The Python SDK can manage deployments but is better suited for **ad-hoc operations and scripting** rather than production CI/CD. + +### Using Azure Management SDK + +```python +# model_management.py +from azure.identity import DefaultAzureCredential +from azure.mgmt.cognitiveservices import CognitiveServicesManagementClient +from azure.mgmt.cognitiveservices.models import Deployment, DeploymentModel, Sku + +class ModelDeploymentManager: + def __init__(self, subscription_id: str, resource_group: str, account_name: str): + self.subscription_id = subscription_id + self.resource_group = resource_group + self.account_name = account_name + self.client = CognitiveServicesManagementClient( + credential=DefaultAzureCredential(), + subscription_id=subscription_id + ) + + def create_deployment( + self, + deployment_name: str, + model_name: str, + model_version: str, + sku_name: str = "GlobalStandard", + capacity: int = 30 + ) -> Deployment: + """Create or update a model deployment""" + deployment = Deployment( + sku=Sku(name=sku_name, capacity=capacity), + properties={ + "model": { + "format": "OpenAI", + "name": model_name, + "version": model_version + }, + "versionUpgradeOption": "OnceNewDefaultVersionAvailable", + "raiPolicyName": "Microsoft.Default" + } + ) + + result = self.client.deployments.begin_create_or_update( + resource_group_name=self.resource_group, + account_name=self.account_name, + deployment_name=deployment_name, + deployment=deployment + ).result() + + print(f"βœ“ Deployment created/updated: {result.name}") + return result + + def list_deployments(self) -> list: + """List all model deployments""" + deployments = self.client.deployments.list( + resource_group_name=self.resource_group, + account_name=self.account_name + ) + return list(deployments) + + def get_deployment(self, deployment_name: str) -> Deployment: + """Get a specific deployment""" + return self.client.deployments.get( + resource_group_name=self.resource_group, + account_name=self.account_name, + deployment_name=deployment_name + ) + + def update_capacity(self, deployment_name: str, new_capacity: int) -> Deployment: + """Update deployment capacity (TPM)""" + existing = self.get_deployment(deployment_name) + existing.sku.capacity = new_capacity + + result = self.client.deployments.begin_create_or_update( + resource_group_name=self.resource_group, + account_name=self.account_name, + deployment_name=deployment_name, + deployment=existing + ).result() + + print(f"βœ“ Capacity updated: {result.name} -> {new_capacity}K TPM") + return result + + def delete_deployment(self, deployment_name: str) -> None: + """Delete a model deployment""" + self.client.deployments.begin_delete( + resource_group_name=self.resource_group, + account_name=self.account_name, + deployment_name=deployment_name + ).result() + + print(f"βœ“ Deployment deleted: {deployment_name}") + + +# Example usage +if __name__ == "__main__": + import os + + manager = ModelDeploymentManager( + subscription_id=os.environ["AZURE_SUBSCRIPTION_ID"], + resource_group="my-rg", + account_name="my-ai-services" + ) + + # Create a deployment + manager.create_deployment( + deployment_name="gpt-4o", + model_name="gpt-4o", + model_version="2024-08-06", + capacity=50 + ) + + # List all deployments + print("\nCurrent deployments:") + for d in manager.list_deployments(): + model = d.properties.model + print(f" {d.name}: {model.name} v{model.version} ({d.sku.capacity}K TPM)") + + # Update capacity + manager.update_capacity("gpt-4o", 100) + + # Delete a deployment + # manager.delete_deployment("old-deployment") +``` + +### When to Use Python SDK + +| Scenario | Use Python SDK? | +|----------|-----------------| +| Production CI/CD pipeline | ❌ Use Bicep/Terraform | +| One-time capacity adjustment | βœ… Quick script | +| Listing current deployments | βœ… Convenient | +| Emergency changes | βœ… Fast | +| Automation scripts | βœ… Good fit | +| Integration tests | βœ… Verify deployments exist | + +--- + +## 6. Azure CLI Approach + +For quick ad-hoc operations: + +### Create Deployment + +```bash +az cognitiveservices account deployment create \ + --name "my-ai-services" \ + --resource-group "my-rg" \ + --deployment-name "gpt-4o" \ + --model-name "gpt-4o" \ + --model-version "2024-08-06" \ + --model-format "OpenAI" \ + --sku-name "GlobalStandard" \ + --sku-capacity 50 +``` + +### List Deployments + +```bash +az cognitiveservices account deployment list \ + --name "my-ai-services" \ + --resource-group "my-rg" \ + --output table +``` + +### Update Capacity + +```bash +az cognitiveservices account deployment create \ + --name "my-ai-services" \ + --resource-group "my-rg" \ + --deployment-name "gpt-4o" \ + --sku-capacity 100 +``` + +### Delete Deployment + +```bash +az cognitiveservices account deployment delete \ + --name "my-ai-services" \ + --resource-group "my-rg" \ + --deployment-name "old-deployment" +``` + +### Show Deployment Details + +```bash +az cognitiveservices account deployment show \ + --name "my-ai-services" \ + --resource-group "my-rg" \ + --deployment-name "gpt-4o" +``` + +--- + +## 7. Layered Deployment Architecture + +Clear separation between infrastructure and application: + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Layer 1: Infrastructure (Bicep/Terraform) β”‚ +β”‚ Change frequency: Monthly | Approval: CAB/Change Management β”‚ +β”‚ β”‚ +β”‚ Resources: β”‚ +β”‚ β€’ Azure AI Foundry Hub + Project β”‚ +β”‚ β€’ Azure AI Services account β”‚ +β”‚ β€’ Model Deployments (gpt-4o, gpt-4o-mini, etc.) ◀── THIS DOC β”‚ +β”‚ β€’ Azure API Management (APIM) β”‚ +β”‚ β€’ Connections (Bing, OpenAPI endpoints) β”‚ +β”‚ β€’ Key Vault, Storage β”‚ +β”‚ β€’ Networking (VNet, Private Endpoints) β”‚ +β”‚ β€’ Log Analytics, Application Insights β”‚ +β”‚ β”‚ +β”‚ Deployment Method: az deployment group create / terraform applyβ”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β”‚ Outputs: deployment names, endpoints, connection IDs + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Layer 2: Application (Agent Manifests) β”‚ +β”‚ Change frequency: Weekly | Approval: Team/PR review β”‚ +β”‚ β”‚ +β”‚ Resources: β”‚ +β”‚ β€’ Agent definitions (name, instructions, tools) β”‚ +β”‚ β€’ Vector stores β”‚ +β”‚ β€’ Uploaded files (knowledge base content) β”‚ +β”‚ β€’ Tool configurations β”‚ +β”‚ β”‚ +β”‚ Deployment Method: agent-deploy apply (manifest-based) β”‚ +β”‚ See: AGENT-CICD-APPROACH.md β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### Why This Separation? + +| Factor | Infrastructure | Application | +|--------|----------------|-------------| +| **Deployment time** | 5-30 minutes | Seconds | +| **Change risk** | High (affects all agents) | Lower (single agent) | +| **Approval process** | CAB/Change control | PR review | +| **Rollback complexity** | Complex | Simple | +| **Skills required** | Platform engineering | Application development | + +--- + +## 8. Two-Pipeline Strategy + +### Infrastructure Pipeline (Bicep/Terraform) + +```yaml +# .github/workflows/deploy-infrastructure.yaml +name: Deploy Infrastructure + +on: + push: + branches: [main] + paths: + - 'infra/**' + workflow_dispatch: + inputs: + environment: + description: 'Environment to deploy' + required: true + type: choice + options: [dev, staging, prod] + +jobs: + validate: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Validate Bicep + run: az bicep build --file infra/main.bicep + + deploy-dev: + needs: validate + if: github.event_name == 'push' || github.event.inputs.environment == 'dev' + runs-on: ubuntu-latest + environment: development + steps: + - uses: actions/checkout@v4 + + - name: Azure Login + uses: azure/login@v2 + with: + creds: ${{ secrets.AZURE_CREDENTIALS_DEV }} + + - name: What-If + run: | + az deployment group what-if \ + --resource-group rg-ai-dev \ + --template-file infra/main.bicep \ + --parameters infra/parameters/dev.parameters.json + + - name: Deploy + run: | + az deployment group create \ + --resource-group rg-ai-dev \ + --template-file infra/main.bicep \ + --parameters infra/parameters/dev.parameters.json + + - name: Output Deployment Info + id: outputs + run: | + OUTPUTS=$(az deployment group show \ + --resource-group rg-ai-dev \ + --name main \ + --query properties.outputs) + echo "deployment_outputs=$OUTPUTS" >> $GITHUB_OUTPUT + + deploy-staging: + needs: deploy-dev + runs-on: ubuntu-latest + environment: staging + steps: + # Similar to dev... + + deploy-prod: + needs: deploy-staging + runs-on: ubuntu-latest + environment: + name: production + # Manual approval required + steps: + # Similar to dev with prod parameters... +``` + +### Agent Pipeline (Manifests) + +```yaml +# .github/workflows/deploy-agents.yaml +name: Deploy Agents + +on: + push: + branches: [main] + paths: + - 'agents/**' + - 'knowledge/**' + workflow_dispatch: + +jobs: + # See AGENT-CICD-APPROACH.md for full details + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Deploy agents + run: agent-deploy apply -f agents/ -e environments/prod.yaml +``` + +### Coordination Between Pipelines + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Infrastructure Pipeline β”‚ +β”‚ β”‚ +β”‚ 1. Deploy AI Services + Model Deployments β”‚ +β”‚ 2. Output: MODEL_DEPLOYMENT_NAMES, FOUNDRY_ENDPOINT β”‚ +β”‚ 3. Store outputs in GitHub Environment Variables β”‚ +β”‚ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β”‚ Environment variables available + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Agent Pipeline β”‚ +β”‚ β”‚ +β”‚ 1. Read agent manifests β”‚ +β”‚ 2. Resolve ${MODEL_DEPLOYMENT_NAME} from environment β”‚ +β”‚ 3. Deploy agents referencing those model deployments β”‚ +β”‚ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +--- + +## 9. Common Operations + +### Adding a New Model + +1. **Update Bicep/Terraform** - Add new deployment to `modelsConfig` +2. **PR Review** - Ensure quota is available +3. **Deploy Infrastructure** - Run infrastructure pipeline +4. **Update Agent Manifests** - Reference new model by name +5. **Deploy Agents** - Agents now use new model + +### Upgrading Model Version + +1. **Update Bicep parameters** - Change `version` field +2. **What-If** - Preview changes +3. **Deploy** - Existing deployment is updated in-place +4. **No agent changes needed** - Agents reference by deployment name + +### Increasing Capacity (TPM) + +1. **Check quota** - Ensure subscription has available TPM +2. **Update Bicep parameters** - Change `capacity` field +3. **Deploy** - Capacity updated +4. **No agent changes needed** + +### Deprecating a Model + +1. **Update agent manifests first** - Point to new model +2. **Deploy agents** - Agents now use new model +3. **Update Bicep** - Remove old deployment +4. **Deploy infrastructure** - Old deployment deleted + +--- + +## 10. Summary + +### Decision Matrix + +| Resource Type | Deployment Method | Reason | +|---------------|-------------------|--------| +| **Model Deployments** | Bicep/Terraform | Real ARM resources, native support, infrequent changes | +| **AI Services Account** | Bicep/Terraform | Infrastructure, shared resource | +| **Connections** | Bicep/Terraform | ARM resources, shared across agents | +| **APIM Instance** | Bicep/Terraform | Infrastructure, long deployment time | +| **Agents** | Manifest + CLI | Not ARM resources, frequent changes | +| **Vector Stores** | Manifest + CLI | Dynamic, tied to agent lifecycle | +| **Knowledge Base Files** | Manifest + CLI | Changes with agent updates | + +### Best Practices + +1. **Use Bicep/Terraform for model deployments** - Native ARM support, state management, drift detection + +2. **Separate infrastructure and application pipelines** - Different change frequencies and approval processes + +3. **Use environment-specific parameters** - Same templates, different capacity/configuration per environment + +4. **Output deployment names** - Pass from infrastructure to application layer via environment variables + +5. **Plan before apply** - Always preview changes (what-if / plan) + +6. **Version control everything** - Both Bicep templates and agent manifests in Git + +7. **Use Python SDK for ad-hoc operations** - Quick capacity changes, listing, emergency updates + +--- + +## Related Documents + +- [AGENT-CICD-APPROACH.md](./AGENT-CICD-APPROACH.md) - Manifest-based approach for agent deployments +- [labs/ai-agent-service/](./labs/ai-agent-service/) - Hands-on labs for AI agents +- [modules/cognitive-services/](./modules/cognitive-services/) - Reusable Bicep modules + +--- + +*Document created: February 2026* +*Last updated: February 2026* From 153c86870c6fc04de9cf1634c163f189fac801fd Mon Sep 17 00:00:00 2001 From: saldanaj Date: Tue, 17 Feb 2026 22:00:57 +0000 Subject: [PATCH 5/5] adding claude.md --- CLAUDE.md | 180 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..5c4b2faa --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,180 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +This repository contains Azure AI Gateway experimental labs and samples, demonstrating API Management patterns for AI services. The project is organized as a collection of Jupyter notebook-based labs that explore different aspects of the AI Gateway pattern using Azure API Management with Azure OpenAI and other AI services. + +## Development Environment Setup + +### Python Environment +```bash +# Install Python dependencies +pip install -r requirements.txt + +# The comprehensive requirements.txt covers all labs +# Key packages include: openai, azure-identity, azure-mgmt-apimanagement, +# azure-ai-projects, mcp, autogen-core, semantic-kernel +``` + +### Workshop Documentation (Docusaurus) +```bash +# Navigate to workshop directory +cd workshop + +# Install dependencies and run development server +npm install +npm start + +# Build static site +npm run build + +# Type checking +npm run typecheck +``` + +## Repository Structure + +- **`labs/`** - Individual experimental labs as Jupyter notebooks + - Each lab contains: README.md, notebook files (.ipynb), Bicep templates, policy XML files + - Lab categories: AI Agents, Inference API, Azure OpenAI-based + - Notable labs: model-context-protocol, openai-agents, semantic-caching, backend-pool-load-balancing + +- **`modules/`** - Reusable Bicep modules for Azure resources + - `apim/` - Azure API Management modules (v1, v2) + - `cognitive-services/` - Azure OpenAI and AI Foundry modules + - `monitor/` - Application Insights modules + - `network/` - Virtual network modules + +- **`shared/`** - Shared Python utilities + - `utils.py` - Core utilities for Azure resource management, deployment, cleanup + - `apimtools.py` - APIM-specific client tools and API discovery + - `snippets/` - Reusable code snippets for common operations + +- **`tools/`** - Supporting tools and utilities + - Mock server implementation + - Tracing and streaming tools + - Rate limiting and testing utilities + +- **`workshop/`** - Docusaurus-based documentation website + +## Common Development Commands + +### Python Environment Setup +```bash +# Install dependencies for all labs +pip install -r requirements.txt + +# Run individual Jupyter notebooks (preferred method for lab development) +jupyter lab labs//.ipynb +``` + +### Azure Resource Management +```bash +# Deploy lab infrastructure (run from lab directory) +az deployment group create --resource-group --template-file main.bicep + +# Clean up resources (use dedicated cleanup notebooks) +jupyter lab clean-up-resources.ipynb +``` + +### Workshop Documentation (Docusaurus) +```bash +cd workshop + +# Development +npm install +npm start # Development server on localhost:3000 + +# Build and type checking +npm run build +npm run typecheck +``` + +## Key Architecture Patterns + +### Lab Structure +Each lab follows a consistent pattern: +- `main.bicep` - Infrastructure as Code template +- `policy.xml` - APIM policy configuration +- `.ipynb` - Main implementation notebook +- `clean-up-resources.ipynb` - Resource cleanup +- `README.md` - Lab-specific documentation + +### Azure Resource Organization +- Resource groups follow naming: `lab-` +- Resources use unique suffixes via `uniqueString()` function +- Cleanup includes purging soft-deleted resources (APIM, Cognitive Services, Key Vault) + +### APIM Integration Patterns +- Backend pool load balancing for resilience +- Token-based rate limiting and metrics +- Semantic caching for performance +- Policy-based content filtering and safety +- OAuth 2.0 and client credential flows for security + +### Python Utilities Architecture +- `shared/utils.py` - Core utilities for Azure resource management, deployment, cleanup with colored output formatting +- `shared/apimtools.py` - APIM-specific client tools and API discovery using Azure SDK +- Standardized `print_*` functions for consistent output formatting (print_ok, print_error, print_info, etc.) +- Resource lifecycle management integrated with Azure CLI commands +- APIM policy management via REST APIs and Azure Management SDK + +## Development Guidelines + +### Code Style +- Python follows PEP 8 conventions +- Bicep templates use descriptive parameter names and documentation +- XML policies are properly formatted and commented +- Jupyter notebooks include markdown explanations + +### Resource Naming +- Use `resourceSuffix` parameter for unique naming +- Follow Azure naming conventions +- Include resource type prefixes (apim-, ai-, kv-, etc.) + +### Security Practices +- Use Azure Managed Identity where possible +- Store secrets in Key Vault +- Implement proper RBAC permissions +- Follow principle of least privilege + +### Lab Testing and Validation +- Each lab is self-contained with its own infrastructure and cleanup +- Every lab includes a `clean-up-resources.ipynb` notebook that MUST be run after testing +- Validation is done through notebook execution and checking response outputs +- Resource cleanup includes purging soft-deleted Azure resources (APIM, Cognitive Services, Key Vault) +- Use debug tracing tools in `tools/` directory for troubleshooting API calls +- Test files use `.rest`, `.http`, and PowerShell scripts for API validation + +## Notable Dependencies + +- **Azure CLI** - Primary interface for Azure operations +- **Bicep** - Infrastructure as Code templates +- **Azure SDK for Python** - Programmatic Azure resource management +- **OpenAI Python SDK** - AI model interactions +- **MCP (Model Context Protocol)** - Agent and tool integration +- **AutoGen** - Multi-agent conversation frameworks +- **Semantic Kernel** - AI application orchestration + +## Development Workflow + +### Lab Development Process +1. Navigate to the specific lab directory: `cd labs/` +2. Deploy infrastructure using `main.bicep` template +3. Execute the lab notebook step-by-step for testing and validation +4. **Always run `clean-up-resources.ipynb` when finished** to avoid resource charges +5. Update documentation if making changes to lab functionality + +### Git Workflow +- Standard branch-based workflow from main branch +- Each lab is self-contained and must remain functional +- Always include both implementation and cleanup procedures +- Test lab functionality end-to-end before committing +- Sensitive files: use `.gitignore` or `git update-index --skip-worktree` for tracked files (see `scripts/git-helpers.md`) + +### Working with Shared Components +- Modify `shared/utils.py` or `shared/apimtools.py` carefully as they affect all labs +- Test changes across multiple labs before committing shared utility changes +- Follow existing patterns for output formatting and error handling \ No newline at end of file