From 13604c5929e9e1ab3b00be8cc045d42c5364d5e6 Mon Sep 17 00:00:00 2001 From: pyrex41 Date: Sat, 22 Nov 2025 15:53:02 -0600 Subject: [PATCH] feat: add LangSmith Cloud observability for all AI API calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add comprehensive tracing for production monitoring and debugging: - OpenAI (GPT-4o, GPT-4o-mini) - scene generation, semantic augmentation - Anthropic (Claude) - prompt parsing, creative direction - XAI (Grok-4) - AI-powered image pair selection - Replicate API - image/video/audio generation (Flux, SkyReels, Veo3, Hailuo) - HTTP requests - FastAPI middleware for end-to-end tracing Changes: - Add langsmith, langchain-openai, langchain-anthropic dependencies - Instrument all LLM provider methods with @traceable decorators - Add LangSmithMiddleware to both backend and promptparser FastAPI apps - Configure environment variables in config files and docker-compose.yml - Create comprehensive setup documentation (LANGSMITH_SETUP.md) Benefits: - Real-time observability of all AI API calls in LangSmith dashboard - Track costs, latency, errors, and token usage per model - Hierarchical traces showing full request → LLM call flow - Easy debugging with prompt/response inspection - Filter and search by tags (openai, anthropic, xai, grok, replicate) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- LANGSMITH_SETUP.md | 239 ++++++++++++++++++ backend/config.py | 6 + backend/llm_interpreter.py | 3 + backend/main.py | 34 +++ backend/services/replicate_client.py | 5 + backend/services/scene_generator.py | 5 + backend/services/xai_client.py | 4 + docker-compose.yml | 13 + promptparser/.env.example | 6 + promptparser/app/core/config.py | 6 + promptparser/app/main.py | 35 ++- .../app/services/llm/claude_provider.py | 3 + .../app/services/llm/openai_provider.py | 3 + pyproject.toml | 3 + 14 files changed, 364 insertions(+), 1 deletion(-) create mode 100644 LANGSMITH_SETUP.md diff --git a/LANGSMITH_SETUP.md b/LANGSMITH_SETUP.md new file mode 100644 index 00000000..50ab5531 --- /dev/null +++ b/LANGSMITH_SETUP.md @@ -0,0 +1,239 @@ +# LangSmith Observability Setup + +This document explains how to set up and use LangSmith for monitoring LLM and Replicate API calls in this project. + +## Overview + +LangSmith provides observability for: +- **OpenAI API calls** (GPT-4o, GPT-4o-mini) - scene generation, semantic augmentation, prompt parsing +- **Anthropic API calls** (Claude) - prompt parsing, creative direction +- **XAI API calls** (Grok-4) - AI-powered image pair selection, property scene analysis +- **Replicate API calls** - image generation (Flux), video generation (SkyReels, Veo3, Hailuo), audio generation +- **HTTP requests** - all FastAPI endpoints in backend and promptparser services + +## Quick Start + +### 1. Sign Up for LangSmith Cloud + +1. Go to [smith.langchain.com](https://smith.langchain.com) +2. Sign up for a free account +3. Create a new project (e.g., "video-sim-poc") +4. Generate an API key from Settings → API Keys + +### 2. Configure Environment Variables + +Add the following to your `.env` file or environment: + +```bash +# Enable LangSmith tracing +LANGCHAIN_TRACING_V2=true + +# Your LangSmith API key (required) +LANGCHAIN_API_KEY= + +# Project name (optional, defaults to "video-sim-poc") +LANGCHAIN_PROJECT=video-sim-poc + +# LangSmith API endpoint (optional, defaults to cloud) +LANGCHAIN_ENDPOINT=https://api.smith.langchain.com +``` + +For **promptparser** service, also add these to `promptparser/.env`: + +```bash +LANGCHAIN_TRACING_V2=true +LANGCHAIN_API_KEY= +LANGCHAIN_PROJECT=video-sim-poc +LANGCHAIN_ENDPOINT=https://api.smith.langchain.com +``` + +### 3. Install Dependencies + +Dependencies are already added to `pyproject.toml`. Install them with: + +```bash +# If using uv (recommended) +uv pip install -e . + +# Or with pip +pip install -e . +``` + +### 4. Run the Application + +Start your application as usual: + +```bash +# Docker Compose +docker-compose up + +# Or locally +uvicorn backend.main:app --reload +``` + +### 5. View Traces + +1. Go to [smith.langchain.com](https://smith.langchain.com) +2. Navigate to your project +3. You'll see traces for all API calls automatically + +## What Gets Traced + +### LLM Calls + +All OpenAI and Anthropic API calls are automatically traced with: +- **Input prompts** and system messages +- **Model** and parameters (temperature, max_tokens, etc.) +- **Responses** and token usage +- **Latency** and timing +- **Errors** with full stack traces + +**Instrumented functions:** +- `backend/services/scene_generator.py`: `generate_scenes()`, `regenerate_scene()` +- `backend/llm_interpreter.py`: `augment_object()`, `augment_scene()` +- `backend/services/xai_client.py`: `select_image_pairs()`, `select_property_scene_pairs()` +- `promptparser/app/services/llm/openai_provider.py`: `complete()`, `analyze_image()` +- `promptparser/app/services/llm/claude_provider.py`: `complete()`, `analyze_image()` + +### Replicate API Calls + +All Replicate API calls are traced with: +- **Model** and version +- **Input parameters** (prompt, images, duration, etc.) +- **Prediction ID** and status +- **Polling** behavior and timing +- **Output URLs** and results +- **Errors** and failures + +**Instrumented functions:** +- `backend/services/replicate_client.py`: `generate_image()`, `generate_video()`, `generate_video_from_pair()`, `poll_prediction()` + +### HTTP Requests + +All HTTP requests to your FastAPI endpoints are traced with: +- **Method** and path (e.g., `POST /api/scenes`) +- **Query parameters** +- **Nested LLM/Replicate calls** automatically grouped under the parent request +- **Response time** and status + +**Middleware:** +- `backend/main.py`: `LangSmithMiddleware` +- `promptparser/app/main.py`: `LangSmithMiddleware` + +## Viewing Traces + +### Trace Hierarchy + +Traces are organized hierarchically: + +``` +POST /api/scenes (HTTP request) +├─ generate_scenes (scene generation) +│ ├─ openai_generate_scenes (OpenAI API call) +│ │ └─ Input: prompt, model, temperature +│ │ └─ Output: scenes JSON, tokens used +│ └─ replicate_generate_image (Replicate API call) +│ └─ Input: prompt, model +│ └─ Output: image URL, prediction ID +└─ Response: 200 OK +``` + +### Filtering and Searching + +Use LangSmith's UI to: +- **Filter by tags**: `openai`, `anthropic`, `xai`, `grok`, `replicate`, `http_request`, `scene_generation`, `image_selection`, etc. +- **Search by prompt**: Find specific scenes or prompts +- **Filter by status**: Find errors or slow requests +- **View costs**: Track spending per model +- **Compare runs**: See how prompts changed over time + +## Disabling Tracing + +To temporarily disable tracing: + +```bash +# Set to false or remove the variable +LANGCHAIN_TRACING_V2=false +``` + +Or remove the `LANGCHAIN_API_KEY` environment variable. + +## Docker Compose + +The `docker-compose.yml` already includes all necessary environment variables with defaults: + +```yaml +environment: + - LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2:-false} + - LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY} + - LANGCHAIN_PROJECT=${LANGCHAIN_PROJECT:-video-sim-poc} + - LANGCHAIN_ENDPOINT=${LANGCHAIN_ENDPOINT:-https://api.smith.langchain.com} +``` + +Just set `LANGCHAIN_TRACING_V2=true` and `LANGCHAIN_API_KEY=` in your `.env` file. + +## Troubleshooting + +### No traces appearing + +1. **Check environment variables**: Ensure `LANGCHAIN_TRACING_V2=true` and `LANGCHAIN_API_KEY` is set +2. **Check API key**: Verify the API key is valid in LangSmith settings +3. **Check project name**: Ensure the project exists in LangSmith +4. **Check logs**: Look for any LangSmith-related errors in application logs + +### Traces are incomplete + +- Ensure all services are using the same `LANGCHAIN_PROJECT` name +- Verify middleware is registered (check `backend/main.py` and `promptparser/app/main.py`) + +### Performance concerns + +- Tracing adds minimal overhead (~5-20ms per trace) +- For high-throughput production, consider: + - Using sampling (trace only % of requests) + - Disabling tracing for health checks (already implemented) + - Using LangSmith's batch mode + +## Advanced Configuration + +### Custom Tags + +Add custom tags to traces for better organization: + +```python +@traceable(name="my_function", tags=["custom_tag", "production"]) +def my_function(): + # Your code here +``` + +### Metadata + +Add metadata to provide additional context: + +```python +@traceable( + name="process_user_request", + metadata={ + "user_id": user_id, + "campaign_id": campaign_id, + "version": "v2" + } +) +def process_user_request(user_id, campaign_id): + # Your code here +``` + +## Cost Tracking + +LangSmith automatically tracks costs for: +- **OpenAI** models (based on token usage and pricing) +- **Anthropic** models (based on token usage and pricing) +- **Replicate** models (if pricing info is available) + +View costs in the LangSmith dashboard under your project. + +## Further Reading + +- [LangSmith Documentation](https://docs.smith.langchain.com/) +- [LangSmith Python SDK](https://github.com/langchain-ai/langsmith-sdk) +- [Tracing Reference](https://docs.smith.langchain.com/tracing) diff --git a/backend/config.py b/backend/config.py index 4fed86fd..4049c50e 100644 --- a/backend/config.py +++ b/backend/config.py @@ -22,6 +22,12 @@ class Settings(BaseSettings): OPENROUTER_API_KEY: Optional[str] = None XAI_API_KEY: Optional[str] = None # For Grok models + # LangSmith observability settings + LANGCHAIN_TRACING_V2: bool = False # Enable LangSmith tracing + LANGCHAIN_API_KEY: Optional[str] = None # LangSmith API key + LANGCHAIN_PROJECT: str = "video-sim-poc" # LangSmith project name + LANGCHAIN_ENDPOINT: str = "https://api.smith.langchain.com" # LangSmith API endpoint + # Storage settings VIDEO_STORAGE_PATH: str = "./DATA/videos" diff --git a/backend/llm_interpreter.py b/backend/llm_interpreter.py index 8bcbf18b..49ae2a48 100644 --- a/backend/llm_interpreter.py +++ b/backend/llm_interpreter.py @@ -10,6 +10,7 @@ from typing import Dict, List, Optional from openai import OpenAI from pydantic import BaseModel +from langsmith import traceable class GenesisProperties(BaseModel): @@ -41,6 +42,7 @@ def __init__(self): self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) self.model = "gpt-4o" # or gpt-4-turbo, gpt-4, gpt-3.5-turbo + @traceable(name="llm_augment_object", tags=["openai", "genesis", "semantic_augmentation"]) async def augment_object( self, shape: str, @@ -199,6 +201,7 @@ def _parse_llm_response(self, response: str) -> GenesisProperties: reasoning=f"Failed to parse LLM response: {e}" ) + @traceable(name="llm_augment_scene", tags=["openai", "genesis", "scene_augmentation"]) async def augment_scene( self, scene_objects, diff --git a/backend/main.py b/backend/main.py index e20362d4..1b5db906 100644 --- a/backend/main.py +++ b/backend/main.py @@ -26,6 +26,8 @@ import asyncio from dotenv import load_dotenv from pathlib import Path +from langsmith import traceable +from starlette.middleware.base import BaseHTTPMiddleware # Import Asset Pydantic models from .schemas.assets import ( @@ -335,6 +337,38 @@ def validate_file_type_with_magic_bytes( ) +# LangSmith tracing middleware +class LangSmithMiddleware(BaseHTTPMiddleware): + """Middleware to trace HTTP requests with LangSmith""" + + async def dispatch(self, request: Request, call_next): + # Get settings to check if tracing is enabled + settings = get_settings() + + # Skip tracing if disabled or for static files/health checks + if not settings.LANGCHAIN_TRACING_V2 or request.url.path in ["/health", "/docs", "/openapi.json", "/redoc"] or request.url.path.startswith("/assets"): + return await call_next(request) + + # Create traced function for the request + @traceable( + name=f"{request.method} {request.url.path}", + tags=["http_request", "backend", request.method.lower()], + metadata={ + "method": request.method, + "path": request.url.path, + "query_params": dict(request.query_params), + } + ) + async def process_request(): + response = await call_next(request) + return response + + return await process_request() + + +app.add_middleware(LangSmithMiddleware) + + # Add rate limiting @app.exception_handler(RateLimitExceeded) async def rate_limit_handler(request, exc): diff --git a/backend/services/replicate_client.py b/backend/services/replicate_client.py index b0de8411..5027e117 100644 --- a/backend/services/replicate_client.py +++ b/backend/services/replicate_client.py @@ -10,6 +10,7 @@ from os import environ from typing import Dict, List, Optional, Any import requests +from langsmith import traceable # Configure logging logger = logging.getLogger(__name__) @@ -84,6 +85,7 @@ def __init__(self, api_key: Optional[str] = None): logger.info("ReplicateClient initialized successfully") + @traceable(name="replicate_generate_image", tags=["replicate", "image_generation", "flux"]) def generate_image( self, prompt: str, @@ -187,6 +189,7 @@ def generate_image( "prediction_id": None } + @traceable(name="replicate_generate_video", tags=["replicate", "video_generation", "skyreels"]) def generate_video( self, image_urls: List[str], @@ -313,6 +316,7 @@ def generate_video( "duration_seconds": 0 } + @traceable(name="replicate_poll_prediction", tags=["replicate", "polling", "status"]) def poll_prediction( self, prediction_id: str, @@ -483,6 +487,7 @@ def estimate_cost(self, num_images: int, video_duration: int) -> float: return total_cost + @traceable(name="replicate_generate_video_from_pair", tags=["replicate", "video_generation", "image_to_video"]) def generate_video_from_pair( self, image1_url: str, diff --git a/backend/services/scene_generator.py b/backend/services/scene_generator.py index 56f178b7..e523a84e 100644 --- a/backend/services/scene_generator.py +++ b/backend/services/scene_generator.py @@ -10,6 +10,7 @@ import os from typing import List, Dict, Any, Optional from openai import OpenAI +from langsmith import traceable # Configure logging logger = logging.getLogger(__name__) @@ -28,6 +29,7 @@ class SceneGenerationError(Exception): pass +@traceable(name="generate_scenes", tags=["scene_generation", "openai"]) def generate_scenes( ad_basics: Dict[str, Any], creative_direction: Dict[str, Any], @@ -101,6 +103,7 @@ def generate_scenes( raise SceneGenerationError(f"Failed to generate scenes: {str(e)}") +@traceable(name="regenerate_scene", tags=["scene_regeneration", "openai"]) def regenerate_scene( scene_number: int, original_scene: Dict[str, Any], @@ -288,6 +291,7 @@ def _build_scene_regeneration_prompt( return prompt +@traceable(name="openai_generate_scenes", tags=["openai_api", "llm_call"]) def _generate_scenes_openai(prompt: str, num_scenes: int) -> List[Dict[str, Any]]: """Generate scenes using OpenAI API.""" if not AI_API_KEY: @@ -344,6 +348,7 @@ def _generate_scenes_openai(prompt: str, num_scenes: int) -> List[Dict[str, Any] raise SceneGenerationError(f"OpenAI API error: {str(e)}") +@traceable(name="openai_regenerate_scene", tags=["openai_api", "llm_call"]) def _regenerate_scene_openai(prompt: str, original_scene: Dict[str, Any]) -> Dict[str, Any]: """Regenerate a single scene using OpenAI API.""" if not AI_API_KEY: diff --git a/backend/services/xai_client.py b/backend/services/xai_client.py index be0acbee..3cf14126 100644 --- a/backend/services/xai_client.py +++ b/backend/services/xai_client.py @@ -9,6 +9,7 @@ import logging from typing import List, Dict, Any, Optional, Tuple import requests +from langsmith import traceable from ..config import get_settings @@ -33,6 +34,7 @@ def __init__(self, api_key: Optional[str] = None): self.base_url = "https://api.x.ai/v1" self.model = "grok-4-1-fast-non-reasoning" + @traceable(name="xai_select_image_pairs", tags=["xai", "grok", "image_selection", "llm_call"]) def select_image_pairs( self, assets: List[Dict[str, Any]], @@ -458,6 +460,7 @@ def _build_property_scene_prompt( return prompt + @traceable(name="xai_grok_api_call", tags=["xai", "grok_api", "api_call"]) def _call_grok_api( self, prompt: str, image_assets: List[Dict[str, Any]] ) -> Dict[str, Any]: @@ -586,6 +589,7 @@ def _parse_pairs_response( logger.error(f"Failed to parse Grok response: {e}", exc_info=True) raise ValueError(f"Invalid response format from Grok: {e}") + @traceable(name="xai_select_property_scene_pairs", tags=["xai", "grok", "property_scenes", "llm_call"]) def select_property_scene_pairs( self, property_info: Dict[str, Any], diff --git a/docker-compose.yml b/docker-compose.yml index e07f9853..ac4114b6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,8 +6,21 @@ services: ports: - "8000:8000" environment: + # Existing environment variables - REPLICATE_AI_KEY=${REPLICATE_AI_KEY} - DATA=/data + + # LangSmith observability (optional - set LANGCHAIN_TRACING_V2=true to enable) + - LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2:-false} + - LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY} + - LANGCHAIN_PROJECT=${LANGCHAIN_PROJECT:-video-sim-poc} + - LANGCHAIN_ENDPOINT=${LANGCHAIN_ENDPOINT:-https://api.smith.langchain.com} + + # API keys for LLM providers + - OPENAI_API_KEY=${OPENAI_API_KEY} + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} + - XAI_API_KEY=${XAI_API_KEY} volumes: - ./data:/data restart: unless-stopped diff --git a/promptparser/.env.example b/promptparser/.env.example index 55ff6056..3c9d940b 100644 --- a/promptparser/.env.example +++ b/promptparser/.env.example @@ -5,3 +5,9 @@ ANTHROPIC_API_KEY= REDIS_URL=redis://localhost:6379/0 PORT=8080 +# LangSmith observability +LANGCHAIN_TRACING_V2=true +LANGCHAIN_API_KEY= +LANGCHAIN_PROJECT=video-sim-poc +LANGCHAIN_ENDPOINT=https://api.smith.langchain.com + diff --git a/promptparser/app/core/config.py b/promptparser/app/core/config.py index dc9c3561..171d71ee 100644 --- a/promptparser/app/core/config.py +++ b/promptparser/app/core/config.py @@ -17,6 +17,12 @@ class Settings(BaseSettings): RATE_LIMIT_PER_MINUTE: int = Field(60, ge=1) USE_MOCK_LLM: bool = False + # LangSmith observability settings + LANGCHAIN_TRACING_V2: bool = False + LANGCHAIN_API_KEY: str | None = None + LANGCHAIN_PROJECT: str = "video-sim-poc" + LANGCHAIN_ENDPOINT: str = "https://api.smith.langchain.com" + model_config = SettingsConfigDict( env_file=".env", env_file_encoding="utf-8", diff --git a/promptparser/app/main.py b/promptparser/app/main.py index 173edcab..b041e59f 100644 --- a/promptparser/app/main.py +++ b/promptparser/app/main.py @@ -2,8 +2,10 @@ from contextlib import asynccontextmanager -from fastapi import FastAPI +from fastapi import FastAPI, Request from slowapi.errors import RateLimitExceeded +from starlette.middleware.base import BaseHTTPMiddleware +from langsmith import traceable from app.core.config import Settings, get_settings from app.core.logging import configure_logging @@ -20,6 +22,34 @@ async def lifespan(app: FastAPI): yield +class LangSmithMiddleware(BaseHTTPMiddleware): + """Middleware to trace HTTP requests with LangSmith""" + + async def dispatch(self, request: Request, call_next): + # Get settings to check if tracing is enabled + settings = get_settings() + + # Skip tracing if disabled or for health checks/docs + if not settings.LANGCHAIN_TRACING_V2 or request.url.path in ["/v1/health", "/docs", "/openapi.json", "/redoc"]: + return await call_next(request) + + # Create traced function for the request + @traceable( + name=f"{request.method} {request.url.path}", + tags=["http_request", "prompt_parser", request.method.lower()], + metadata={ + "method": request.method, + "path": request.url.path, + "query_params": dict(request.query_params), + } + ) + async def process_request(): + response = await call_next(request) + return response + + return await process_request() + + def create_app() -> FastAPI: """Application factory.""" app = FastAPI( @@ -49,6 +79,9 @@ async def rate_limit_handler(request, exc): app.include_router(cache_admin_api.router, prefix="/v1", tags=["cache"]) app.include_router(health_api.router, prefix="/v1", tags=["health"]) + # Add LangSmith tracing middleware + app.add_middleware(LangSmithMiddleware) + return app diff --git a/promptparser/app/services/llm/claude_provider.py b/promptparser/app/services/llm/claude_provider.py index f5c555cd..afd39f25 100644 --- a/promptparser/app/services/llm/claude_provider.py +++ b/promptparser/app/services/llm/claude_provider.py @@ -7,6 +7,7 @@ from anthropic import AsyncAnthropic import structlog +from langsmith import traceable from app.core.config import get_settings from app.services.llm.base import LLMProvider @@ -28,6 +29,7 @@ def __init__(self, model: str = "claude-3-sonnet-20240229", *, client: AsyncAnth self._available = True self._latency_ms = 4000 + @traceable(name="claude_complete", tags=["anthropic", "prompt_parser", "llm_call"]) async def complete( self, prompt: str, @@ -52,6 +54,7 @@ async def complete( logger.warning("claude.complete_failed", error=str(exc)) raise + @traceable(name="claude_analyze_image", tags=["anthropic", "vision", "image_analysis"]) async def analyze_image(self, image_b64: str, question: str) -> dict[str, Any]: try: response = await self.client.messages.create( diff --git a/promptparser/app/services/llm/openai_provider.py b/promptparser/app/services/llm/openai_provider.py index fe55e258..0dd03efc 100644 --- a/promptparser/app/services/llm/openai_provider.py +++ b/promptparser/app/services/llm/openai_provider.py @@ -7,6 +7,7 @@ from openai import AsyncOpenAI import structlog +from langsmith import traceable from app.core.config import get_settings from app.services.llm.base import LLMProvider @@ -28,6 +29,7 @@ def __init__(self, model: str = "gpt-4o", *, client: AsyncOpenAI | None = None) self._available = True self._latency_ms = 3000 + @traceable(name="openai_complete", tags=["openai", "prompt_parser", "llm_call"]) async def complete( self, prompt: str, @@ -56,6 +58,7 @@ async def complete( logger.warning("openai.complete_failed", error=str(exc)) raise + @traceable(name="openai_analyze_image", tags=["openai", "vision", "image_analysis"]) async def analyze_image(self, image_b64: str, question: str) -> dict[str, Any]: try: if not image_b64.startswith("data:"): diff --git a/pyproject.toml b/pyproject.toml index c3cebad5..d2b9d935 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,4 +20,7 @@ dependencies = [ "tenacity>=9.1.2", "pydantic-settings>=2.11.0", "slowapi>=0.1.9", + "langsmith>=0.2.0", + "langchain-openai>=0.2.0", + "langchain-anthropic>=0.3.0", ]