diff --git a/.aws-sam/build.toml b/.aws-sam/build.toml new file mode 100644 index 0000000..ed5d88e --- /dev/null +++ b/.aws-sam/build.toml @@ -0,0 +1,15 @@ +# This file is auto generated by SAM CLI build command + +[function_build_definitions.59b4df2e-a009-469c-8bf5-d19de268fe1d] +codeuri = "/home/runner/work/lambda-opentelemetry-demo/lambda-opentelemetry-demo" +runtime = "nodejs20.x" +architecture = "x86_64" +handler = "src/order-service/index.handler" +manifest_hash = "" +packagetype = "Zip" +functions = ["OrderServiceFunction", "InventoryServiceFunction", "PaymentServiceFunction"] + +[function_build_definitions.59b4df2e-a009-469c-8bf5-d19de268fe1d.metadata] +BuildMethod = "nodejs20.x" + +[layer_build_definitions] diff --git a/.aws-sam/build/InventoryServiceFunction/.env.example b/.aws-sam/build/InventoryServiceFunction/.env.example new file mode 100644 index 0000000..3f3aa74 --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/.env.example @@ -0,0 +1,7 @@ +# OpenTelemetry Configuration +OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318/v1/traces +ENVIRONMENT=development + +# Service URLs (for local testing) +INVENTORY_SERVICE_URL=http://localhost:3001/inventory +PAYMENT_SERVICE_URL=http://localhost:3002/payment diff --git a/.aws-sam/build/InventoryServiceFunction/ADOT-LAYER-CONFIG.md b/.aws-sam/build/InventoryServiceFunction/ADOT-LAYER-CONFIG.md new file mode 100644 index 0000000..cffddc9 --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/ADOT-LAYER-CONFIG.md @@ -0,0 +1,366 @@ +# AWS ADOT Lambda Layer Configuration Guide + +This document explains how to configure and use the AWS Distro for OpenTelemetry (ADOT) Lambda Layer for automatic instrumentation. + +## Overview + +The AWS ADOT Lambda Layer provides automatic OpenTelemetry instrumentation for Lambda functions without requiring you to bundle OpenTelemetry SDKs in your deployment package. + +## Layer ARNs by Region + +### Node.js 18.x ADOT Layer ARNs (Latest: v1-18-1) + +| Region | ARN | +|--------|-----| +| us-east-1 | `arn:aws:lambda:us-east-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| us-east-2 | `arn:aws:lambda:us-east-2:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| us-west-1 | `arn:aws:lambda:us-west-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| us-west-2 | `arn:aws:lambda:us-west-2:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| eu-west-1 | `arn:aws:lambda:eu-west-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| eu-central-1 | `arn:aws:lambda:eu-central-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| ap-southeast-1 | `arn:aws:lambda:ap-southeast-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| ap-northeast-1 | `arn:aws:lambda:ap-northeast-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | + +**Find the latest ARNs**: https://aws-otel.github.io/docs/getting-started/lambda/lambda-js + +## Required Environment Variables + +### Essential Configuration + +```yaml +Environment: + Variables: + # Enable ADOT auto-instrumentation wrapper + AWS_LAMBDA_EXEC_WRAPPER: /opt/otel-handler + + # Service name for identification in traces + OTEL_SERVICE_NAME: your-service-name + + # Sampling configuration (AlwaysOn for demo, adjust for production) + OTEL_TRACES_SAMPLER: AlwaysOn + + # Protocol for OTLP export + OTEL_EXPORTER_OTLP_PROTOCOL: http/protobuf + + # OpenTelemetry Collector endpoint (without /v1/traces) + OTEL_EXPORTER_OTLP_ENDPOINT: http://your-collector:4318 + + # Trace context propagation format + OTEL_PROPAGATORS: tracecontext +``` + +### Advanced Configuration Options + +```yaml +Environment: + Variables: + # Resource attributes (for environment, version, etc.) + OTEL_RESOURCE_ATTRIBUTES: deployment.environment=production,service.version=1.0.0 + + # Specific endpoint for traces (optional, overrides OTEL_EXPORTER_OTLP_ENDPOINT) + OTEL_EXPORTER_OTLP_TRACES_ENDPOINT: http://your-collector:4318/v1/traces + + # Enable/disable specific instrumentations + OTEL_INSTRUMENTATION_AWS_SDK_ENABLED: true + OTEL_INSTRUMENTATION_HTTP_ENABLED: true + + # Span attribute limits + OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT: 4095 + OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT: 128 +``` + +## Exporter Backends + +### 1. Jaeger (Local Development) + +```yaml +OTEL_EXPORTER_OTLP_ENDPOINT: http://localhost:4318 +OTEL_EXPORTER_OTLP_PROTOCOL: http/protobuf +``` + +Run Jaeger with OTLP support: +```bash +docker run -d --name jaeger \ + -e COLLECTOR_OTLP_ENABLED=true \ + -p 16686:16686 \ + -p 4318:4318 \ + jaegertracing/all-in-one:latest +``` + +### 2. AWS X-Ray (with ADOT Collector) + +If you want to send traces to AWS X-Ray, deploy an ADOT Collector in your VPC: + +```yaml +OTEL_EXPORTER_OTLP_ENDPOINT: http://adot-collector:4318 +OTEL_PROPAGATORS: tracecontext,xray +``` + +ADOT Collector configuration: +```yaml +receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 + +exporters: + awsxray: + region: us-east-1 + +service: + pipelines: + traces: + receivers: [otlp] + exporters: [awsxray] +``` + +### 3. Grafana Cloud / Honeycomb / DataDog + +```yaml +OTEL_EXPORTER_OTLP_ENDPOINT: https://your-saas-endpoint +OTEL_EXPORTER_OTLP_HEADERS: Authorization=Bearer your-api-key +``` + +### 4. Self-Hosted OpenTelemetry Collector + +Deploy a collector in your VPC or use Lambda Extension: + +```yaml +OTEL_EXPORTER_OTLP_ENDPOINT: http://collector:4318 +``` + +## How Auto-Instrumentation Works + +### 1. Wrapper Execution + +The `AWS_LAMBDA_EXEC_WRAPPER` environment variable points to `/opt/otel-handler`, which: +- Initializes the OpenTelemetry SDK before your handler +- Automatically instruments common libraries (http, https, aws-sdk, etc.) +- Creates a root span for each Lambda invocation +- Propagates trace context from incoming requests + +### 2. Automatic Instrumentation + +The layer automatically instruments: +- ✅ AWS SDK calls +- ✅ HTTP/HTTPS requests (axios, node-fetch, http, https) +- ✅ Lambda invocation (creates root span) +- ✅ Downstream service calls with trace context propagation + +### 3. Context Propagation + +The layer automatically: +- Extracts W3C trace context from incoming API Gateway requests +- Injects trace context into outgoing HTTP requests +- Maintains trace context across async operations + +## Usage in Your Code + +### Basic Usage (Automatic) + +With the layer configured, your Lambda function is automatically instrumented: + +```javascript +exports.handler = async (event) => { + // Automatically traced! + const response = await axios.get('https://api.example.com'); + return { statusCode: 200, body: JSON.stringify(response.data) }; +}; +``` + +### Adding Custom Spans + +For additional business logic spans: + +```javascript +const api = require('@opentelemetry/api'); + +exports.handler = async (event) => { + const tracer = api.trace.getTracer('my-service', '1.0.0'); + + return tracer.startActiveSpan('business-operation', async (span) => { + try { + span.setAttribute('customer.id', event.customerId); + + // Your business logic + const result = await processOrder(event); + + span.setStatus({ code: api.SpanStatusCode.OK }); + return result; + } catch (error) { + span.recordException(error); + span.setStatus({ code: api.SpanStatusCode.ERROR }); + throw error; + } finally { + span.end(); + } + }); +}; +``` + +### Adding Attributes to Current Span + +```javascript +const api = require('@opentelemetry/api'); + +exports.handler = async (event) => { + const span = api.trace.getActiveSpan(); + if (span) { + span.setAttribute('order.id', event.orderId); + span.setAttribute('order.amount', event.amount); + } + + // Your handler logic +}; +``` + +## Trace Context Propagation + +### Automatic (HTTP Clients) + +The layer automatically propagates context for instrumented HTTP clients: + +```javascript +const axios = require('axios'); + +// Trace context is automatically added to headers! +const response = await axios.post('https://api.example.com/payment', data); +``` + +### Manual (Custom Clients) + +For custom HTTP clients or non-instrumented libraries: + +```javascript +const api = require('@opentelemetry/api'); + +function getTraceHeaders() { + const headers = {}; + const span = api.trace.getActiveSpan(); + + if (span) { + const spanContext = span.spanContext(); + headers.traceparent = `00-${spanContext.traceId}-${spanContext.spanId}-01`; + } + + return headers; +} + +// Use in your custom client +const headers = { ...getTraceHeaders(), 'Content-Type': 'application/json' }; +``` + +## Sampling + +### AlwaysOn (Development/Demo) + +```yaml +OTEL_TRACES_SAMPLER: AlwaysOn +``` + +Traces every request. Good for development but expensive in production. + +### TraceIdRatioBased (Production) + +```yaml +OTEL_TRACES_SAMPLER: TraceIdRatioBased +OTEL_TRACES_SAMPLER_ARG: 0.1 # 10% sampling +``` + +Samples a percentage of requests to reduce costs. + +### ParentBased (Recommended) + +```yaml +OTEL_TRACES_SAMPLER: ParentBased_TraceIdRatioBased +OTEL_TRACES_SAMPLER_ARG: 0.1 +``` + +Respects parent sampling decisions while applying ratio-based sampling to root spans. + +## Performance Considerations + +### Cold Start Impact + +The ADOT layer adds ~200-300ms to cold start time: +- Layer initialization: ~100ms +- Auto-instrumentation setup: ~100-200ms + +### Runtime Overhead + +- Minimal overhead during warm execution (<5ms per invocation) +- Async span export doesn't block Lambda execution +- Batching reduces network calls + +### Optimization Tips + +1. **Use provisioned concurrency** for latency-sensitive functions +2. **Adjust span limits** to reduce memory usage: + ```yaml + OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT: 32 + OTEL_SPAN_EVENT_COUNT_LIMIT: 32 + ``` +3. **Use sampling** in high-volume environments +4. **Disable unused instrumentations**: + ```yaml + OTEL_INSTRUMENTATION_AWS_LAMBDA_ENABLED: true + OTEL_INSTRUMENTATION_HTTP_ENABLED: true + OTEL_INSTRUMENTATION_AWS_SDK_ENABLED: false + ``` + +## Troubleshooting + +### No Traces Appearing + +1. Check CloudWatch Logs for ADOT errors: + ``` + grep "otel" /aws/lambda/your-function + ``` + +2. Verify environment variables: + ```bash + aws lambda get-function-configuration --function-name your-function \ + --query 'Environment.Variables' + ``` + +3. Test collector endpoint connectivity: + - Ensure Lambda has network access to the collector + - Check security groups and NACLs + - Verify collector is accepting OTLP HTTP on port 4318 + +### Wrapper Not Running + +Error: `AWS_LAMBDA_EXEC_WRAPPER is set but the wrapper does not exist` + +**Solution**: Verify the layer ARN is correct and matches your region. + +### Trace Context Not Propagating + +1. Ensure `OTEL_PROPAGATORS=tracecontext` is set +2. Check if HTTP library is supported (axios, node-fetch, http, https) +3. For unsupported libraries, manually inject headers + +### High Cold Start Times + +1. Consider using Lambda SnapStart (if available) +2. Use provisioned concurrency +3. Minimize layer count (combine layers if possible) +4. Profile and optimize your application code + +## Best Practices + +1. **Set meaningful service names**: Use descriptive names that reflect the business function +2. **Add business context**: Include order IDs, customer IDs, and other relevant attributes +3. **Handle errors properly**: Record exceptions and set error status on spans +4. **Use semantic conventions**: Follow OpenTelemetry semantic conventions for consistency +5. **Monitor collector health**: Ensure your collector is performant and highly available +6. **Set appropriate sampling**: Balance cost with observability needs +7. **Use tags for filtering**: Add environment, version, and region as resource attributes + +## References + +- AWS ADOT Lambda: https://aws-otel.github.io/docs/getting-started/lambda +- OpenTelemetry Lambda Instrumentation: https://opentelemetry.io/docs/platforms/faas/lambda-auto-instrument/ +- W3C Trace Context: https://www.w3.org/TR/trace-context/ +- OpenTelemetry Semantic Conventions: https://opentelemetry.io/docs/specs/semconv/ diff --git a/.aws-sam/build/InventoryServiceFunction/ARCHITECTURE.md b/.aws-sam/build/InventoryServiceFunction/ARCHITECTURE.md new file mode 100644 index 0000000..ff7e7c2 --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/ARCHITECTURE.md @@ -0,0 +1,366 @@ +# Architecture Details + +## Service Flow + +### 1. Successful Order Flow + +``` +Client + │ + │ POST /order + ├─────────────────────────────────────────┐ + │ │ + ▼ │ +Order Service │ + │ │ + │ 1. Validate request │ + │ 2. Create root span │ + │ │ + │ POST /inventory (with trace context) │ + ├─────────────────────────────┐ │ + │ │ │ + ▼ │ │ +Inventory Service │ │ + │ │ │ + │ 1. Extract trace context │ │ + │ 2. Create child span │ │ + │ 3. Check stock levels │ │ + │ 4. Return availability │ │ + │ │ │ + └─────────────────────────────┤ │ + │ Response: available=true │ │ + ▼ │ │ +Order Service │ │ + │ │ │ + │ POST /payment (with trace context) │ + ├─────────────────────────────┐ │ + │ │ │ + ▼ │ │ +Payment Service │ │ + │ │ │ + │ 1. Extract trace context │ │ + │ 2. Create child span │ │ + │ 3. Process payment │ │ + │ 4. Return result │ │ + │ │ │ + └─────────────────────────────┤ │ + │ Response: success=true │ │ + ▼ │ │ +Order Service │ │ + │ │ │ + │ Complete order processing │ │ + │ │ │ + └─────────────────────────────┤ │ + │ Response: 200 OK │ │ + ▼ │ │ +Client ◄──────────────────────────────────────┘ +``` + +### 2. Out of Stock Flow + +``` +Client → Order Service → Inventory Service + (Out of stock detected) + ← + Order Service + (Skip payment, return 409) + ← +Client +``` + +### 3. Payment Failure Flow + +``` +Client → Order Service → Inventory Service + (Items available) + ← + Order Service → Payment Service + (Payment failed) + ← + Order Service + (Return 402) + ← +Client +``` + +## Trace Structure + +### Trace Hierarchy + +``` +Trace ID: 00000000000000000000000000000001 +│ +└─ Span: process-order (Order Service) + │ Duration: 245ms + │ Attributes: + │ - order.id: ORD-001 + │ - customer.id: CUST-123 + │ - order.items.count: 2 + │ + ├─ Span: check-inventory (Order Service) + │ │ Duration: 15ms + │ │ Attributes: + │ │ - service: inventory + │ │ - order.id: ORD-001 + │ │ + │ └─ Span: HTTP POST (Auto-instrumented) + │ │ Duration: 12ms + │ │ + │ └─ Span: check-inventory (Inventory Service) + │ Duration: 8ms + │ Attributes: + │ - inventory.available: true + │ - inventory.items.count: 2 + │ + └─ Span: process-payment (Order Service) + │ Duration: 180ms + │ Attributes: + │ - service: payment + │ - order.id: ORD-001 + │ - payment.method: credit-card-5555 + │ - payment.amount: 1059.97 + │ + └─ Span: HTTP POST (Auto-instrumented) + │ Duration: 175ms + │ + └─ Span: process-payment (Payment Service) + Duration: 170ms + Attributes: + - payment.success: true + - payment.transaction_id: txn-1234567890-abc123 +``` + +## OpenTelemetry Components + +### 1. Tracer Provider + +Manages the lifecycle of tracers and span processors. + +```javascript +const provider = new NodeTracerProvider({ + resource: resource, +}); +``` + +### 2. OTLP Exporter + +Exports spans to OpenTelemetry Collector using HTTP. + +```javascript +const exporter = new OTLPTraceExporter({ + url: 'http://localhost:4318/v1/traces', +}); +``` + +### 3. Batch Span Processor + +Batches spans before export for efficiency. + +```javascript +provider.addSpanProcessor(new BatchSpanProcessor(exporter)); +``` + +### 4. Auto Instrumentations + +Automatically instruments common libraries. + +```javascript +registerInstrumentations({ + instrumentations: [ + getNodeAutoInstrumentations({ + '@opentelemetry/instrumentation-http': { enabled: true }, + '@opentelemetry/instrumentation-https': { enabled: true }, + }), + ], +}); +``` + +## Trace Context Propagation + +### W3C Trace Context Format + +``` +traceparent: 00--- + +Example: +traceparent: 00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01 + │ │ │ │ + │ └─ Trace ID (32 hex chars) │ └─ Flags + │ └─ Parent Span ID (16 hex chars) + └─ Version (00) +``` + +### Context Injection + +When Order Service calls Inventory Service: + +```javascript +// Order Service +const headers = injectTraceContext(); +// headers = { traceparent: '00-...-...-01' } + +axios.post(inventoryUrl, data, { headers }); +``` + +### Context Extraction + +When Inventory Service receives request: + +```javascript +// Inventory Service +const traceContext = extractTraceContext(event); +// Continue the trace with the same trace ID +``` + +## Span Attributes + +### Semantic Conventions + +Following OpenTelemetry semantic conventions: + +| Attribute | Description | Example | +|-----------|-------------|---------| +| `service.name` | Service identifier | `order-service` | +| `service.version` | Service version | `1.0.0` | +| `deployment.environment` | Environment | `production` | +| `order.id` | Order identifier | `ORD-001` | +| `customer.id` | Customer identifier | `CUST-123` | +| `payment.amount` | Payment amount | `1059.97` | +| `payment.method` | Payment method | `credit-card-5555` | +| `inventory.available` | Availability flag | `true` | +| `http.method` | HTTP method | `POST` | +| `http.status_code` | HTTP status | `200` | + +## Error Handling + +### Error Span Status + +```javascript +span.setStatus({ + code: api.SpanStatusCode.ERROR, + message: error.message, +}); +span.recordException(error); +``` + +### Business Logic Errors vs Technical Errors + +- **Business Logic Errors** (out of stock, payment declined): + - Span status: OK + - Custom attributes indicate business failure + - Example: `inventory.available: false` + +- **Technical Errors** (network failure, invalid input): + - Span status: ERROR + - Exception recorded + - Stack trace captured + +## Performance Considerations + +### Batch Processing + +Spans are batched before export to reduce network overhead: + +```javascript +new BatchSpanProcessor(exporter, { + maxQueueSize: 2048, + maxExportBatchSize: 512, + scheduledDelayMillis: 5000, +}) +``` + +### Sampling + +For high-volume systems, implement sampling: + +```javascript +const sampler = new TraceIdRatioBasedSampler(0.1); // 10% sampling +const provider = new NodeTracerProvider({ + resource: resource, + sampler: sampler, +}); +``` + +### Async Export + +Span export is asynchronous and doesn't block Lambda execution. + +## Security + +### Securing OTLP Endpoint + +```javascript +const exporter = new OTLPTraceExporter({ + url: process.env.OTEL_EXPORTER_OTLP_ENDPOINT, + headers: { + 'Authorization': `Bearer ${process.env.OTEL_API_KEY}`, + }, +}); +``` + +### Data Privacy + +- Avoid capturing sensitive data in spans +- Redact PII from span attributes +- Use attribute filtering in the collector + +## Monitoring and Alerting + +### Key Metrics to Monitor + +1. **Trace Completeness** + - Percentage of complete traces + - Missing spans + +2. **Error Rates** + - Spans with ERROR status + - Service-specific error rates + +3. **Latency** + - P50, P95, P99 latencies + - Per-service latency breakdown + +4. **Business Metrics** + - Order success rate + - Payment failure rate + - Inventory unavailability rate + +### Example Queries (Jaeger) + +``` +# Find all failed orders +service="order-service" AND error=true + +# Find orders with payment failures +service="payment-service" AND payment.success=false + +# Find slow orders (> 1s) +service="order-service" AND duration>1000ms + +# Find out-of-stock scenarios +service="inventory-service" AND inventory.available=false +``` + +## Troubleshooting + +### Common Issues + +1. **Traces not appearing** + - Check OTLP endpoint connectivity + - Verify Lambda has network access + - Check CloudWatch logs for export errors + +2. **Broken traces** + - Verify trace context propagation + - Check HTTP header forwarding + - Ensure consistent trace ID format + +3. **High latency** + - Review span processor configuration + - Check collector performance + - Consider async export optimization + +4. **Missing spans** + - Verify auto-instrumentation is active + - Check for exceptions during span creation + - Review sampling configuration diff --git a/.aws-sam/build/InventoryServiceFunction/README.md b/.aws-sam/build/InventoryServiceFunction/README.md new file mode 100644 index 0000000..aff152d --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/README.md @@ -0,0 +1,476 @@ +# Lambda OpenTelemetry Demo + +A comprehensive AWS Lambda project demonstrating how to integrate OpenTelemetry for end-to-end distributed tracing across multiple microservices **without using AWS CloudWatch or X-Ray**, using the **AWS Distro for OpenTelemetry (ADOT) Lambda Layer** for automatic instrumentation. + +## 🎯 Overview + +This project implements a realistic e-commerce order processing workflow using three Lambda functions representing distinct microservices: + +1. **Order Service** - Orchestrates the order processing workflow +2. **Inventory Service** - Validates item availability +3. **Payment Service** - Processes payments + +The services communicate with each other while propagating trace context using OpenTelemetry, enabling complete visibility into request flows across all services. + +## 🏗️ Architecture + +``` +┌─────────────────┐ +│ Order Service │ +│ (Lambda) │ +└────────┬────────┘ + │ + ├──────────────────┐ + │ │ + ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ +│ Inventory Svc │ │ Payment Svc │ +│ (Lambda) │ │ (Lambda) │ +└─────────────────┘ └─────────────────┘ + │ │ + └──────┬───────────┘ + ▼ + OpenTelemetry + Collector + │ + ▼ + Tracing Backend + (Jaeger/Zipkin/etc) +``` + +## ✨ Features + +- ✅ **AWS ADOT Lambda Layer** - Uses AWS Distro for OpenTelemetry Lambda Layer for zero-code instrumentation +- ✅ **Automatic Instrumentation** - Auto-instruments Lambda, HTTP/HTTPS, and AWS SDK without code changes +- ✅ **Distributed Tracing** - End-to-end trace propagation across Lambda functions +- ✅ **W3C Trace Context** - Standard trace context propagation using W3C format +- ✅ **OTLP Export** - Exports traces using OTLP HTTP protocol +- ✅ **Custom Spans** - Manual span creation for business logic insights +- ✅ **No CloudWatch/X-Ray** - Direct export to any OpenTelemetry-compatible backend +- ✅ **Error Scenarios** - Built-in test scenarios for: + - Out of stock items + - Payment failures (card declined, insufficient funds) + - Network errors + +## 📋 Prerequisites + +- Node.js 18.x or later +- AWS CLI configured with appropriate credentials +- AWS SAM CLI for deployment +- OpenTelemetry Collector or compatible backend (Jaeger, Zipkin, etc.) + +## 🚀 Quick Start + +### 1. Install Dependencies + +```bash +npm install +``` + +### 2. Set Up OpenTelemetry Backend + +You need an OpenTelemetry-compatible backend to receive and visualize traces. Here are some options: + +#### Option A: Jaeger (Recommended for local testing) + +```bash +# Run Jaeger all-in-one with Docker (or use docker-compose) +docker-compose up -d jaeger + +# Or manually: +docker run -d --name jaeger \ + -e COLLECTOR_OTLP_ENABLED=true \ + -p 16686:16686 \ + -p 4318:4318 \ + jaegertracing/all-in-one:latest + +# Access Jaeger UI at http://localhost:16686 +``` + +#### Option B: Zipkin + +```bash +# Run Zipkin with Docker +docker run -d --name zipkin \ + -p 9411:9411 \ + openzipkin/zipkin:latest + +# Note: You'll need an OpenTelemetry Collector to convert OTLP to Zipkin format +``` + +#### Option C: Grafana Cloud, Honeycomb, or other SaaS providers + +Configure the OTEL_EXPORTER_OTLP_ENDPOINT parameter with your provider's endpoint. + +### 3. Update Lambda Layer ARN + +**Important**: Update the `AdotLayerArn` parameter in `template.yaml` with the correct ARN for your AWS region. + +Find the latest ARN for your region here: +- https://aws-otel.github.io/docs/getting-started/lambda/lambda-js + +Example ARNs: +- **us-east-1**: `arn:aws:lambda:us-east-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` +- **us-west-2**: `arn:aws:lambda:us-west-2:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` +- **eu-west-1**: `arn:aws:lambda:eu-west-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` + +See [ADOT-LAYER-CONFIG.md](./ADOT-LAYER-CONFIG.md) for complete list. + +### 4. Deploy to AWS + +```bash +# Build the Lambda functions +sam build + +# Deploy (follow the prompts) +sam deploy --guided + +# Note the API endpoint from the outputs +``` + +During `sam deploy --guided`, you'll be prompted to provide: +- **Stack name** (e.g., `lambda-otel-demo`) +- **AWS Region** (e.g., `us-east-1`) +- **OtelCollectorEndpoint** - Your OpenTelemetry Collector endpoint without `/v1/traces` (e.g., `http://your-collector:4318`) +- **Environment** - Deployment environment (e.g., `production`, `staging`) +- **AdotLayerArn** - AWS ADOT Lambda Layer ARN for your region +- Confirm changes before deployment + +**Note**: If you're using a collector in a VPC, ensure your Lambda functions have VPC access configured. + +### 5. Test the Services + +Use the provided test script: + +```bash +# Replace with your actual API endpoint from SAM deploy output +./test-api.sh https://your-api-id.execute-api.region.amazonaws.com/Prod +``` + +Or manually test with curl: + +```bash +# Successful order +curl -X POST https://your-api-endpoint/Prod/order \ + -H "Content-Type: application/json" \ + -d @test-payloads.json +``` + +### 6. View Traces + +Open your tracing backend UI: +- Jaeger: http://localhost:16686 +- Select service: `order-service` +- Click "Find Traces" to view the end-to-end traces + +## 🔧 AWS ADOT Lambda Layer + +This project uses the **AWS Distro for OpenTelemetry (ADOT) Lambda Layer** for automatic instrumentation. The layer provides: + +### Key Benefits + +1. **Zero-Code Instrumentation** - Auto-instruments your Lambda function without code changes +2. **No Dependency Bundling** - OpenTelemetry SDKs are provided by the layer, reducing deployment package size +3. **Automatic Context Propagation** - Trace context is automatically propagated across service calls +4. **AWS-Optimized** - Maintained and supported by AWS with regular updates + +### How It Works + +The layer works through the `AWS_LAMBDA_EXEC_WRAPPER` environment variable: + +```yaml +Environment: + Variables: + AWS_LAMBDA_EXEC_WRAPPER: /opt/otel-handler # Enables auto-instrumentation + OTEL_SERVICE_NAME: order-service # Service identifier + OTEL_TRACES_SAMPLER: AlwaysOn # Sampling strategy + OTEL_EXPORTER_OTLP_ENDPOINT: http://collector:4318 +``` + +When your Lambda function is invoked: +1. The wrapper initializes OpenTelemetry SDK +2. Auto-instrumentation is activated for HTTP, AWS SDK, and Lambda runtime +3. A root span is created for the Lambda invocation +4. Your handler executes within the trace context +5. Spans are exported to your configured OTLP endpoint + +### What Gets Instrumented Automatically + +- ✅ Lambda function invocations +- ✅ HTTP/HTTPS requests (axios, node-fetch, native http/https) +- ✅ AWS SDK v2 and v3 calls +- ✅ Database clients (when using instrumented libraries) +- ✅ Trace context propagation in headers + +### Adding Custom Instrumentation + +While the layer handles most instrumentation automatically, you can add custom spans for business logic: + +```javascript +const api = require('@opentelemetry/api'); + +exports.handler = async (event) => { + const tracer = api.trace.getTracer('my-service'); + + return tracer.startActiveSpan('custom-operation', async (span) => { + span.setAttribute('business.attribute', 'value'); + // Your logic here + span.end(); + }); +}; +``` + +For detailed configuration options, see [ADOT-LAYER-CONFIG.md](./ADOT-LAYER-CONFIG.md). + +## 📝 Test Scenarios + +### Scenario 1: Successful Order ✅ + +Tests the happy path where all services succeed. + +```json +{ + "orderId": "ORD-001", + "customerId": "CUST-123", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + } + ] +} +``` + +**Expected Trace:** +- Order Service → Inventory Service (Success) +- Order Service → Payment Service (Success) +- Order completed with HTTP 200 + +### Scenario 2: Out of Stock ⚠️ + +Tests inventory unavailability. + +```json +{ + "orderId": "ORD-002", + "customerId": "CUST-456", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-003", + "name": "Keyboard", + "quantity": 1, + "price": 79.99 + } + ] +} +``` + +**Expected Trace:** +- Order Service → Inventory Service (Out of stock detected) +- Payment Service NOT called +- Order fails with HTTP 409 + +**Out of Stock Items:** +- `item-003` - Keyboard (0 in stock) +- `item-005` - Headphones (0 in stock) + +### Scenario 3: Payment Failure 💳 + +Tests payment processing failures. + +**Card Declined:** +```json +{ + "orderId": "ORD-003", + "customerId": "CUST-789", + "paymentMethod": "4111111111111111", + "items": [{"itemId": "item-001", "quantity": 1, "price": 999.99}] +} +``` + +**Insufficient Funds:** +```json +{ + "orderId": "ORD-004", + "customerId": "CUST-999", + "paymentMethod": "4222222222222222", + "items": [{"itemId": "item-004", "quantity": 2, "price": 399.99}] +} +``` + +**Test Payment Methods:** +- `4111111111111111` - Card declined +- `4222222222222222` - Insufficient funds +- `4333333333333333` - Expired card +- Any other value - Success (with 10% random failure rate) + +**Expected Trace:** +- Order Service → Inventory Service (Success) +- Order Service → Payment Service (Payment fails) +- Order fails with HTTP 402 + +## 🔍 Understanding the Traces + +Each trace will show: + +1. **Span Attributes:** + - Order ID, Customer ID + - Item counts and details + - Payment amounts and methods + - Service names and versions + - Success/failure reasons + +2. **Span Hierarchy:** + ``` + process-order (Order Service) + ├── check-inventory (Order Service -> Inventory Service) + │ └── check-inventory (Inventory Service) + └── process-payment (Order Service -> Payment Service) + └── process-payment (Payment Service) + ``` + +3. **Trace Context Propagation:** + - Trace IDs are consistent across all services + - Parent-child relationships are maintained + - W3C traceparent headers are used + +## 🛠️ Configuration + +### Environment Variables + +Configure these in the SAM template or Lambda console: + +- `OTEL_EXPORTER_OTLP_ENDPOINT` - OpenTelemetry Collector endpoint (default: `http://localhost:4318/v1/traces`) +- `ENVIRONMENT` - Deployment environment (default: `development`) +- `INVENTORY_SERVICE_URL` - Inventory service endpoint +- `PAYMENT_SERVICE_URL` - Payment service endpoint + +### Inventory Configuration + +Modify the mock inventory in `src/inventory-service/index.js`: + +```javascript +const inventory = { + 'item-001': { name: 'Laptop', quantity: 10 }, + 'item-002': { name: 'Mouse', quantity: 50 }, + 'item-003': { name: 'Keyboard', quantity: 0 }, // Out of stock + // Add more items... +}; +``` + +### Payment Configuration + +Modify failure patterns in `src/payment-service/index.js`: + +```javascript +const FAILURE_PATTERNS = { + 'card-declined': ['4111111111111111', 'card-declined'], + 'insufficient-funds': ['4222222222222222', 'insufficient-funds'], + // Add more patterns... +}; +``` + +## 📂 Project Structure + +``` +lambda-opentelemetry-demo/ +├── src/ +│ ├── order-service/ # Order orchestration service +│ │ └── index.js +│ ├── inventory-service/ # Inventory check service +│ │ └── index.js +│ ├── payment-service/ # Payment processing service +│ │ └── index.js +│ └── shared/ # Shared utilities +│ ├── tracer.js # OpenTelemetry setup +│ └── utils.js # Common utilities +├── template.yaml # AWS SAM template +├── package.json # Node.js dependencies +├── test-payloads.json # Sample test data +├── test-api.sh # API test script +└── README.md # This file +``` + +## 🔧 Local Development + +To test locally without deploying to AWS: + +1. Start your OpenTelemetry backend (e.g., Jaeger) +2. Use AWS SAM Local: + +```bash +sam build +sam local start-api --env-vars env.json +``` + +Create `env.json`: +```json +{ + "Parameters": { + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "INVENTORY_SERVICE_URL": "http://host.docker.internal:3000/inventory", + "PAYMENT_SERVICE_URL": "http://host.docker.internal:3000/payment" + } +} +``` + +## 📊 Observability Best Practices + +This demo demonstrates several observability best practices: + +1. **Structured Logging** - Logs include trace context for correlation +2. **Semantic Attributes** - Meaningful span attributes for filtering and analysis +3. **Error Handling** - Errors are captured as span events +4. **Business Context** - Business-relevant data in spans (order IDs, amounts, etc.) +5. **Service Naming** - Clear service names for easy identification +6. **Context Propagation** - W3C standard trace context across service boundaries + +## 🔐 Security Considerations + +- This is a demo project - do not use in production without proper security hardening +- Implement proper authentication/authorization for API endpoints +- Secure OpenTelemetry Collector endpoints +- Use AWS Secrets Manager for sensitive configuration +- Enable API Gateway throttling and request validation + +## 🤝 Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. + +## 📄 License + +MIT License - See LICENSE file for details + +## 🎓 Learning Resources + +- [OpenTelemetry Documentation](https://opentelemetry.io/docs/) +- [AWS Lambda Best Practices](https://docs.aws.amazon.com/lambda/latest/dg/best-practices.html) +- [W3C Trace Context](https://www.w3.org/TR/trace-context/) +- [Distributed Tracing Guide](https://opentelemetry.io/docs/concepts/signals/traces/) + +## 🐛 Troubleshooting + +### Traces not appearing in backend + +- Verify OTEL_EXPORTER_OTLP_ENDPOINT is correctly configured +- Check Lambda logs in CloudWatch for errors +- Ensure OpenTelemetry Collector/backend is running and accessible +- Verify network connectivity between Lambda and collector + +### Service-to-service calls failing + +- Check that service URLs are correctly configured +- Verify API Gateway endpoints are deployed +- Review Lambda function logs for errors +- Check IAM permissions if using private endpoints + +### High latency or timeouts + +- Increase Lambda timeout in template.yaml +- Check OpenTelemetry Collector performance +- Consider using asynchronous export +- Review batch span processor configuration diff --git a/.aws-sam/build/InventoryServiceFunction/docker-compose.yaml b/.aws-sam/build/InventoryServiceFunction/docker-compose.yaml new file mode 100644 index 0000000..7f969f4 --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/docker-compose.yaml @@ -0,0 +1,37 @@ +version: '3.8' + +services: + # Jaeger all-in-one with OTLP support + jaeger: + image: jaegertracing/all-in-one:latest + container_name: jaeger-otel + environment: + - COLLECTOR_OTLP_ENABLED=true + - LOG_LEVEL=debug + ports: + - "16686:16686" # Jaeger UI + - "4318:4318" # OTLP HTTP receiver + - "4317:4317" # OTLP gRPC receiver + - "14268:14268" # Jaeger collector HTTP + - "14250:14250" # Jaeger collector gRPC + networks: + - otel-demo + + # Optional: OpenTelemetry Collector (if you want to use a separate collector) + # otel-collector: + # image: otel/opentelemetry-collector-contrib:latest + # container_name: otel-collector + # command: ["--config=/etc/otel-collector-config.yaml"] + # volumes: + # - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml + # ports: + # - "4318:4318" # OTLP HTTP receiver + # - "4317:4317" # OTLP gRPC receiver + # - "8888:8888" # Prometheus metrics + # - "13133:13133" # Health check + # networks: + # - otel-demo + +networks: + otel-demo: + driver: bridge diff --git a/.aws-sam/build/InventoryServiceFunction/env.json b/.aws-sam/build/InventoryServiceFunction/env.json new file mode 100644 index 0000000..67b70e4 --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/env.json @@ -0,0 +1,16 @@ +{ + "OrderServiceFunction": { + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "ENVIRONMENT": "local", + "INVENTORY_SERVICE_URL": "http://127.0.0.1:3001/inventory", + "PAYMENT_SERVICE_URL": "http://127.0.0.1:3002/payment" + }, + "InventoryServiceFunction": { + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "ENVIRONMENT": "local" + }, + "PaymentServiceFunction": { + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "ENVIRONMENT": "local" + } +} diff --git a/.aws-sam/build/InventoryServiceFunction/local-test.sh b/.aws-sam/build/InventoryServiceFunction/local-test.sh new file mode 100755 index 0000000..8b43c30 --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/local-test.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Simple local testing script using SAM Local +# This script starts SAM local API and runs tests + +set -e + +echo "Starting Lambda OpenTelemetry Demo Local Test" +echo "==============================================" +echo "" + +# Check if Docker is running +if ! docker info > /dev/null 2>&1; then + echo "Error: Docker is not running. Please start Docker first." + exit 1 +fi + +# Check if Jaeger is running +if ! docker ps | grep -q jaeger-otel; then + echo "Starting Jaeger..." + docker-compose up -d jaeger + echo "Waiting for Jaeger to be ready..." + sleep 5 +else + echo "Jaeger is already running" +fi + +echo "" +echo "Jaeger UI available at: http://localhost:16686" +echo "" + +# Build the Lambda functions +echo "Building Lambda functions..." +sam build + +if [ $? -ne 0 ]; then + echo "Error: Build failed" + exit 1 +fi + +echo "" +echo "Lambda functions built successfully!" +echo "" +echo "To test the API:" +echo "1. Start SAM local API in one terminal:" +echo " sam local start-api --env-vars env.json" +echo "" +echo "2. In another terminal, run the test script:" +echo " ./test-api.sh http://127.0.0.1:3000" +echo "" +echo "3. View traces in Jaeger UI:" +echo " http://localhost:16686" +echo "" diff --git a/.aws-sam/build/InventoryServiceFunction/package-lock.json b/.aws-sam/build/InventoryServiceFunction/package-lock.json new file mode 100644 index 0000000..eeb2106 --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/package-lock.json @@ -0,0 +1,295 @@ +{ + "name": "lambda-opentelemetry-demo", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "lambda-opentelemetry-demo", + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "axios": "^1.13.5" + } + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, + "node_modules/axios": { + "version": "1.13.5", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.5.tgz", + "integrity": "sha512-cz4ur7Vb0xS4/KUN0tPWe44eqxrIu31me+fbang3ijiNscE129POzipJJA6zniq2C/Z6sJCjMimjS8Lc/GAs8Q==", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.15.11", + "form-data": "^4.0.5", + "proxy-from-env": "^1.1.0" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/follow-redirects": { + "version": "1.15.11", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", + "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", + "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "license": "MIT" + } + } +} diff --git a/.aws-sam/build/InventoryServiceFunction/package.json b/.aws-sam/build/InventoryServiceFunction/package.json new file mode 100644 index 0000000..abe61a7 --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/package.json @@ -0,0 +1,23 @@ +{ + "name": "lambda-opentelemetry-demo", + "version": "1.0.0", + "description": "Sample AWS Lambda project demonstrating OpenTelemetry integration without CloudWatch/X-Ray", + "main": "index.js", + "scripts": { + "test": "echo \"No tests specified\" && exit 0", + "deploy": "sam deploy --guided", + "build": "sam build" + }, + "keywords": [ + "aws", + "lambda", + "opentelemetry", + "tracing", + "nodejs" + ], + "author": "", + "license": "MIT", + "dependencies": { + "axios": "^1.13.5" + } +} diff --git a/.aws-sam/build/InventoryServiceFunction/src/inventory-service/index.js b/.aws-sam/build/InventoryServiceFunction/src/inventory-service/index.js new file mode 100644 index 0000000..7cb0b00 --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/src/inventory-service/index.js @@ -0,0 +1,143 @@ +const { withSpan, addSpanAttributes } = require('../shared/tracer'); +const { createResponse } = require('../shared/utils'); + +// Mock inventory database +const inventory = { + 'item-001': { name: 'Laptop', quantity: 10 }, + 'item-002': { name: 'Mouse', quantity: 50 }, + 'item-003': { name: 'Keyboard', quantity: 0 }, // Out of stock + 'item-004': { name: 'Monitor', quantity: 5 }, + 'item-005': { name: 'Headphones', quantity: 0 }, // Out of stock +}; + +/** + * Inventory Service Lambda Handler + * This service checks if requested items are available in inventory + * OpenTelemetry instrumentation is automatically provided by the Lambda Layer + */ +exports.handler = async (event) => { + console.log('Inventory Service received event:', JSON.stringify(event)); + + return withSpan('check-inventory', async (span) => { + try { + // Parse request body + const body = typeof event.body === 'string' ? JSON.parse(event.body) : event.body; + const { orderId, items } = body; + + // Validate input + if (!orderId || !items || !Array.isArray(items)) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Missing or invalid required fields', + }); + return createResponse(400, { + available: false, + error: 'Missing or invalid required fields: orderId, items (array)', + }); + } + + addSpanAttributes({ + 'order.id': orderId, + 'inventory.items.count': items.length, + }); + + console.log(`Checking inventory for order ${orderId}`); + + // Check each item + const unavailableItems = []; + let allAvailable = true; + + for (const item of items) { + const { itemId, quantity } = item; + + if (!itemId || !quantity) { + continue; + } + + addSpanAttributes({ + [`inventory.item.${itemId}.requested`]: quantity, + }); + + // Check if item exists in inventory + if (!inventory[itemId]) { + console.log(`Item ${itemId} not found in inventory`); + unavailableItems.push({ + itemId, + reason: 'Item not found', + }); + allAvailable = false; + addSpanAttributes({ + [`inventory.item.${itemId}.status`]: 'not-found', + }); + continue; + } + + const availableQuantity = inventory[itemId].quantity; + addSpanAttributes({ + [`inventory.item.${itemId}.available`]: availableQuantity, + }); + + // Check if sufficient quantity is available + if (availableQuantity < quantity) { + console.log(`Insufficient quantity for item ${itemId}. Requested: ${quantity}, Available: ${availableQuantity}`); + unavailableItems.push({ + itemId, + name: inventory[itemId].name, + requestedQuantity: quantity, + availableQuantity: availableQuantity, + reason: availableQuantity === 0 ? 'Out of stock' : 'Insufficient quantity', + }); + allAvailable = false; + addSpanAttributes({ + [`inventory.item.${itemId}.status`]: 'insufficient', + }); + } else { + console.log(`Item ${itemId} is available. Requested: ${quantity}, Available: ${availableQuantity}`); + addSpanAttributes({ + [`inventory.item.${itemId}.status`]: 'available', + }); + } + } + + addSpanAttributes({ + 'inventory.all_available': allAvailable, + 'inventory.unavailable_count': unavailableItems.length, + }); + + if (allAvailable) { + console.log(`All items available for order ${orderId}`); + + return createResponse(200, { + available: true, + orderId, + message: 'All items are available', + }); + } else { + console.log(`Some items unavailable for order ${orderId}:`, unavailableItems); + addSpanAttributes({ + 'inventory.failure_reason': 'items-unavailable', + }); + + return createResponse(200, { + available: false, + orderId, + unavailableItems, + message: 'Some items are not available', + }); + } + + } catch (error) { + console.error('Inventory check error:', error); + addSpanAttributes({ + 'error': true, + 'error.message': error.message, + }); + + return createResponse(500, { + available: false, + error: 'Internal server error', + details: error.message, + }); + } + }); +}; diff --git a/.aws-sam/build/InventoryServiceFunction/src/order-service/index.js b/.aws-sam/build/InventoryServiceFunction/src/order-service/index.js new file mode 100644 index 0000000..7d05578 --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/src/order-service/index.js @@ -0,0 +1,181 @@ +const { withSpan, addSpanAttributes } = require('../shared/tracer'); +const { callService, createResponse } = require('../shared/utils'); + +/** + * Order Service Lambda Handler + * This service accepts orders and orchestrates calls to Inventory and Payment services + * OpenTelemetry instrumentation is automatically provided by the Lambda Layer + */ +exports.handler = async (event) => { + console.log('Order Service received event:', JSON.stringify(event)); + + return withSpan('process-order', async (span) => { + try { + // Parse request body + const body = typeof event.body === 'string' ? JSON.parse(event.body) : event.body; + const { orderId, customerId, items, paymentMethod } = body; + + // Validate input + if (!orderId || !customerId || !items || !paymentMethod) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Missing required fields', + }); + return createResponse(400, { + success: false, + error: 'Missing required fields: orderId, customerId, items, paymentMethod', + }); + } + + // Add order details to span + addSpanAttributes({ + 'order.id': orderId, + 'customer.id': customerId, + 'order.items.count': items.length, + 'payment.method': paymentMethod, + }); + + console.log(`Processing order ${orderId} for customer ${customerId}`); + + // Step 1: Check inventory + console.log('Step 1: Checking inventory'); + const inventoryUrl = process.env.INVENTORY_SERVICE_URL || 'http://localhost:3001/inventory'; + + let inventoryResult; + try { + inventoryResult = await withSpan('check-inventory', async (inventorySpan) => { + addSpanAttributes({ + 'service': 'inventory', + 'order.id': orderId, + }); + + const result = await callService(inventoryUrl, { + orderId, + items, + }); + + addSpanAttributes({ + 'inventory.available': result.available, + }); + + return result; + }); + } catch (error) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Inventory check failed', + }); + return createResponse(500, { + success: false, + orderId, + error: 'Inventory check failed', + details: error.message, + }); + } + + if (!inventoryResult.available) { + addSpanAttributes({ + 'inventory.status': 'out-of-stock', + 'inventory.unavailable_items': JSON.stringify(inventoryResult.unavailableItems || []), + }); + + console.log(`Order ${orderId} failed: Items out of stock`); + return createResponse(409, { + success: false, + orderId, + error: 'Items out of stock', + unavailableItems: inventoryResult.unavailableItems, + }); + } + + addSpanAttributes({ 'inventory.status': 'available' }); + + // Step 2: Process payment + console.log('Step 2: Processing payment'); + const paymentUrl = process.env.PAYMENT_SERVICE_URL || 'http://localhost:3002/payment'; + + let paymentResult; + try { + paymentResult = await withSpan('process-payment', async (paymentSpan) => { + const totalAmount = items.reduce((sum, item) => sum + (item.price * item.quantity), 0); + + addSpanAttributes({ + 'service': 'payment', + 'order.id': orderId, + 'payment.method': paymentMethod, + 'payment.amount': totalAmount, + }); + + const result = await callService(paymentUrl, { + orderId, + customerId, + amount: totalAmount, + paymentMethod, + }); + + addSpanAttributes({ + 'payment.success': result.success, + 'payment.transaction_id': result.transactionId || 'none', + }); + + return result; + }); + } catch (error) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Payment processing failed', + }); + return createResponse(500, { + success: false, + orderId, + error: 'Payment processing failed', + details: error.message, + }); + } + + if (!paymentResult.success) { + addSpanAttributes({ + 'payment.status': 'failed', + 'payment.failure_reason': paymentResult.reason || 'unknown', + }); + + console.log(`Order ${orderId} failed: Payment failed`); + return createResponse(402, { + success: false, + orderId, + error: 'Payment failed', + reason: paymentResult.reason, + }); + } + + addSpanAttributes({ + 'payment.status': 'success', + 'payment.transaction_id': paymentResult.transactionId, + }); + + // Order successful + console.log(`Order ${orderId} completed successfully`); + + return createResponse(200, { + success: true, + orderId, + message: 'Order processed successfully', + transactionId: paymentResult.transactionId, + totalAmount: items.reduce((sum, item) => sum + (item.price * item.quantity), 0), + }); + + } catch (error) { + console.error('Order processing error:', error); + addSpanAttributes({ + 'error': true, + 'error.message': error.message, + }); + + return createResponse(500, { + success: false, + error: 'Internal server error', + details: error.message, + }); + } + }); +}; diff --git a/.aws-sam/build/InventoryServiceFunction/src/payment-service/index.js b/.aws-sam/build/InventoryServiceFunction/src/payment-service/index.js new file mode 100644 index 0000000..6a88ea6 --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/src/payment-service/index.js @@ -0,0 +1,144 @@ +const { withSpan, addSpanAttributes } = require('../shared/tracer'); +const { createResponse } = require('../shared/utils'); + +// Mock payment processing +const FAILURE_PATTERNS = { + 'card-declined': ['4111111111111111', 'card-declined'], + 'insufficient-funds': ['4222222222222222', 'insufficient-funds'], + 'expired-card': ['4333333333333333', 'expired-card'], +}; + +/** + * Simulate payment processing with some failure scenarios + * @param {string} paymentMethod - Payment method identifier + * @param {number} amount - Payment amount + * @returns {Object} Payment result + */ +function processPayment(paymentMethod, amount) { + // Check for known failure patterns + for (const [reason, patterns] of Object.entries(FAILURE_PATTERNS)) { + if (patterns.some(pattern => paymentMethod.includes(pattern))) { + return { + success: false, + reason: reason.replace('-', ' '), + transactionId: null, + }; + } + } + + // Simulate random failures (10% chance) + if (Math.random() < 0.1) { + const reasons = ['network-timeout', 'gateway-error', 'rate-limit-exceeded']; + const randomReason = reasons[Math.floor(Math.random() * reasons.length)]; + return { + success: false, + reason: randomReason.replace('-', ' '), + transactionId: null, + }; + } + + // Successful payment + return { + success: true, + transactionId: `txn-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`, + }; +} + +/** + * Payment Service Lambda Handler + * This service processes payments with failure scenarios for testing + * OpenTelemetry instrumentation is automatically provided by the Lambda Layer + */ +exports.handler = async (event) => { + console.log('Payment Service received event:', JSON.stringify(event)); + + return withSpan('process-payment', async (span) => { + try { + // Parse request body + const body = typeof event.body === 'string' ? JSON.parse(event.body) : event.body; + const { orderId, customerId, amount, paymentMethod } = body; + + // Validate input + if (!orderId || !customerId || !amount || !paymentMethod) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Missing required fields', + }); + return createResponse(400, { + success: false, + error: 'Missing required fields: orderId, customerId, amount, paymentMethod', + }); + } + + addSpanAttributes({ + 'order.id': orderId, + 'customer.id': customerId, + 'payment.amount': amount, + 'payment.method': paymentMethod, + }); + + console.log(`Processing payment for order ${orderId}, amount: ${amount}`); + + // Validate amount + if (amount <= 0) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Invalid payment amount', + }); + return createResponse(400, { + success: false, + error: 'Invalid payment amount', + }); + } + + // Process payment + const paymentResult = processPayment(paymentMethod, amount); + + addSpanAttributes({ + 'payment.success': paymentResult.success, + }); + + if (!paymentResult.success) { + addSpanAttributes({ + 'payment.failure_reason': paymentResult.reason, + }); + + console.log(`Payment failed for order ${orderId}: ${paymentResult.reason}`); + + return createResponse(200, { + success: false, + orderId, + reason: paymentResult.reason, + message: 'Payment processing failed', + }); + } + + addSpanAttributes({ + 'payment.transaction_id': paymentResult.transactionId, + }); + + console.log(`Payment successful for order ${orderId}. Transaction ID: ${paymentResult.transactionId}`); + + return createResponse(200, { + success: true, + orderId, + transactionId: paymentResult.transactionId, + amount, + message: 'Payment processed successfully', + }); + + } catch (error) { + console.error('Payment processing error:', error); + addSpanAttributes({ + 'error': true, + 'error.message': error.message, + }); + + return createResponse(500, { + success: false, + error: 'Internal server error', + details: error.message, + }); + } + }); +}; diff --git a/.aws-sam/build/InventoryServiceFunction/src/shared/tracer.js b/.aws-sam/build/InventoryServiceFunction/src/shared/tracer.js new file mode 100644 index 0000000..3cedef5 --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/src/shared/tracer.js @@ -0,0 +1,118 @@ +// When using OpenTelemetry Lambda Layer with auto-instrumentation, +// the @opentelemetry/api is provided by the layer at /opt/nodejs/node_modules +const api = require('@opentelemetry/api'); + +/** + * Get the current active span + * The Lambda Layer automatically creates spans for Lambda invocations + * @returns {Span} Current active span + */ +function getCurrentSpan() { + return api.trace.getActiveSpan(); +} + +/** + * Get a tracer instance + * @param {string} name - Tracer name (usually service name) + * @param {string} version - Service version + * @returns {Tracer} OpenTelemetry tracer instance + */ +function getTracer(name = 'lambda-app', version = '1.0.0') { + return api.trace.getTracer(name, version); +} + +/** + * Create a new span for a specific operation + * @param {string} spanName - Name of the span + * @param {Function} fn - Function to execute within the span + * @param {Object} attributes - Optional span attributes + * @returns {Promise} Result of the function execution + */ +async function withSpan(spanName, fn, attributes = {}) { + const tracer = getTracer(); + return tracer.startActiveSpan(spanName, async (span) => { + try { + // Add custom attributes + Object.entries(attributes).forEach(([key, value]) => { + span.setAttribute(key, value); + }); + + const result = await fn(span); + span.setStatus({ code: api.SpanStatusCode.OK }); + return result; + } catch (error) { + span.setStatus({ + code: api.SpanStatusCode.ERROR, + message: error.message, + }); + span.recordException(error); + throw error; + } finally { + span.end(); + } + }); +} + +/** + * Add custom attributes to the current span + * @param {Object} attributes - Key-value pairs of attributes to add + */ +function addSpanAttributes(attributes) { + const span = getCurrentSpan(); + if (span) { + Object.entries(attributes).forEach(([key, value]) => { + if (value !== null && value !== undefined) { + span.setAttribute(key, value); + } + }); + } +} + +/** + * Record an exception on the current span + * @param {Error} error - Error to record + */ +function recordException(error) { + const span = getCurrentSpan(); + if (span) { + span.recordException(error); + span.setStatus({ + code: api.SpanStatusCode.ERROR, + message: error.message, + }); + } +} + +/** + * Inject trace context into headers for downstream service calls + * Uses W3C Trace Context propagation + * @returns {Object} Headers with trace context + */ +function injectTraceContext() { + const headers = {}; + const span = getCurrentSpan(); + + if (span) { + const spanContext = span.spanContext(); + if (spanContext && spanContext.traceId && spanContext.spanId) { + // W3C Trace Context format + const traceFlags = spanContext.traceFlags || 0; + headers.traceparent = `00-${spanContext.traceId}-${spanContext.spanId}-${traceFlags.toString(16).padStart(2, '0')}`; + + if (spanContext.traceState) { + headers.tracestate = spanContext.traceState.serialize(); + } + } + } + + return headers; +} + +module.exports = { + getTracer, + getCurrentSpan, + withSpan, + addSpanAttributes, + recordException, + injectTraceContext, +}; diff --git a/.aws-sam/build/InventoryServiceFunction/src/shared/utils.js b/.aws-sam/build/InventoryServiceFunction/src/shared/utils.js new file mode 100644 index 0000000..2da73e3 --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/src/shared/utils.js @@ -0,0 +1,61 @@ +const axios = require('axios'); +const { injectTraceContext } = require('./tracer'); + +/** + * Make an HTTP request to another Lambda service with trace context propagation + * @param {string} url - Service URL + * @param {Object} data - Request payload + * @param {string} method - HTTP method (default: POST) + * @returns {Promise} Response data + */ +async function callService(url, data, method = 'POST') { + // Inject trace context into headers + const headers = { + 'Content-Type': 'application/json', + ...injectTraceContext(), + }; + + console.log(`Calling service: ${url} with method: ${method}`); + console.log(`Trace context headers:`, headers); + + try { + const response = await axios({ + method, + url, + data, + headers, + }); + + console.log(`Service call successful: ${url}`); + return response.data; + } catch (error) { + console.error(`Service call failed: ${url}`, error.message); + if (error.response) { + throw new Error(`Service call failed: ${error.response.status} - ${JSON.stringify(error.response.data)}`); + } + throw error; + } +} + +/** + * Create a Lambda response object + * @param {number} statusCode - HTTP status code + * @param {Object} body - Response body + * @param {Object} headers - Additional headers + * @returns {Object} Lambda response object + */ +function createResponse(statusCode, body, headers = {}) { + return { + statusCode, + headers: { + 'Content-Type': 'application/json', + ...headers, + }, + body: JSON.stringify(body), + }; +} + +module.exports = { + callService, + createResponse, +}; diff --git a/.aws-sam/build/InventoryServiceFunction/template.yaml b/.aws-sam/build/InventoryServiceFunction/template.yaml new file mode 100644 index 0000000..bc11944 --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/template.yaml @@ -0,0 +1,137 @@ +AWSTemplateFormatVersion: '2010-09-09' +Transform: AWS::Serverless-2016-10-31 +Description: Lambda OpenTelemetry Demo - Order, Inventory, and Payment Services with AWS ADOT Layer + +Globals: + Function: + Timeout: 30 + MemorySize: 512 + Runtime: nodejs20.x + Environment: + Variables: + AWS_LAMBDA_EXEC_WRAPPER: /opt/otel-handler + OTEL_SERVICE_NAME: will-be-overridden + OTEL_TRACES_SAMPLER: AlwaysOn + OTEL_EXPORTER_OTLP_PROTOCOL: http/protobuf + OTEL_EXPORTER_OTLP_ENDPOINT: !Ref OtelCollectorEndpoint + OTEL_PROPAGATORS: tracecontext + OTEL_RESOURCE_ATTRIBUTES: !Sub deployment.environment=${Environment} + +Parameters: + OtelCollectorEndpoint: + Type: String + Default: 'http://localhost:4318' + Description: OpenTelemetry Collector endpoint URL (without /v1/traces path) + + Environment: + Type: String + Default: 'production' + Description: Deployment environment (e.g., development, staging, production) + + AdotLayerArn: + Type: String + Default: 'arn:aws:lambda:us-east-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5' + Description: | + ARN of the AWS Distro for OpenTelemetry (ADOT) Lambda Layer for Node.js. + Find the latest ARN for your region at: + https://aws-otel.github.io/docs/getting-started/lambda/lambda-js + +Resources: + # Order Service Function + OrderServiceFunction: + Type: AWS::Serverless::Function + Metadata: + BuildMethod: nodejs20.x + Properties: + FunctionName: order-service + CodeUri: ./ + Handler: src/order-service/index.handler + Description: Order Service - Orchestrates order processing + Layers: + - !Ref AdotLayerArn + Environment: + Variables: + OTEL_SERVICE_NAME: order-service + INVENTORY_SERVICE_URL: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/inventory' + PAYMENT_SERVICE_URL: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/payment' + Events: + OrderApi: + Type: Api + Properties: + Path: /order + Method: post + RestApiId: !Ref ApiGateway + + # Inventory Service Function + InventoryServiceFunction: + Type: AWS::Serverless::Function + Metadata: + BuildMethod: nodejs20.x + Properties: + FunctionName: inventory-service + CodeUri: ./ + Handler: src/inventory-service/index.handler + Description: Inventory Service - Checks item availability + Layers: + - !Ref AdotLayerArn + Environment: + Variables: + OTEL_SERVICE_NAME: inventory-service + Events: + InventoryApi: + Type: Api + Properties: + Path: /inventory + Method: post + RestApiId: !Ref ApiGateway + + # Payment Service Function + PaymentServiceFunction: + Type: AWS::Serverless::Function + Metadata: + BuildMethod: nodejs20.x + Properties: + FunctionName: payment-service + CodeUri: ./ + Handler: src/payment-service/index.handler + Description: Payment Service - Processes payments + Layers: + - !Ref AdotLayerArn + Environment: + Variables: + OTEL_SERVICE_NAME: payment-service + Events: + PaymentApi: + Type: Api + Properties: + Path: /payment + Method: post + RestApiId: !Ref ApiGateway + + # API Gateway + ApiGateway: + Type: AWS::Serverless::Api + Properties: + StageName: Prod + Description: API Gateway for Lambda OpenTelemetry Demo + +Outputs: + ApiEndpoint: + Description: API Gateway endpoint URL + Value: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod' + + OrderServiceUrl: + Description: Order Service endpoint + Value: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/order' + + InventoryServiceUrl: + Description: Inventory Service endpoint + Value: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/inventory' + + PaymentServiceUrl: + Description: Payment Service endpoint + Value: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/payment' + + AdotLayerVersion: + Description: ADOT Lambda Layer ARN being used + Value: !Ref AdotLayerArn diff --git a/.aws-sam/build/InventoryServiceFunction/test-api.sh b/.aws-sam/build/InventoryServiceFunction/test-api.sh new file mode 100755 index 0000000..680b864 --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/test-api.sh @@ -0,0 +1,130 @@ +#!/bin/bash + +# Color codes for output +GREEN='\033[0;32m' +BLUE='\033[0;34m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +echo -e "${BLUE}========================================${NC}" +echo -e "${BLUE}Lambda OpenTelemetry Demo - Test Script${NC}" +echo -e "${BLUE}========================================${NC}" +echo "" + +# Check if API endpoint is provided +if [ -z "$1" ]; then + echo -e "${RED}Error: API endpoint is required${NC}" + echo "Usage: ./test-api.sh " + echo "Example: ./test-api.sh https://abc123.execute-api.us-east-1.amazonaws.com/Prod" + exit 1 +fi + +API_ENDPOINT=$1 +ORDER_URL="${API_ENDPOINT}/order" + +echo -e "${YELLOW}Using API Endpoint: ${API_ENDPOINT}${NC}" +echo "" + +# Test 1: Successful Order +echo -e "${BLUE}Test 1: Successful Order${NC}" +echo "Testing with items that are in stock and valid payment method..." +curl -X POST ${ORDER_URL} \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-001", + "customerId": "CUST-123", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + }, + { + "itemId": "item-002", + "name": "Mouse", + "quantity": 2, + "price": 29.99 + } + ] + }' +echo -e "\n" + +sleep 2 + +# Test 2: Out of Stock Order +echo -e "${BLUE}Test 2: Out of Stock Order${NC}" +echo "Testing with items that are out of stock..." +curl -X POST ${ORDER_URL} \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-002", + "customerId": "CUST-456", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-003", + "name": "Keyboard", + "quantity": 1, + "price": 79.99 + }, + { + "itemId": "item-005", + "name": "Headphones", + "quantity": 1, + "price": 149.99 + } + ] + }' +echo -e "\n" + +sleep 2 + +# Test 3: Payment Failure (Card Declined) +echo -e "${BLUE}Test 3: Payment Failure - Card Declined${NC}" +echo "Testing with a payment method that will be declined..." +curl -X POST ${ORDER_URL} \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-003", + "customerId": "CUST-789", + "paymentMethod": "4111111111111111", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + } + ] + }' +echo -e "\n" + +sleep 2 + +# Test 4: Payment Failure (Insufficient Funds) +echo -e "${BLUE}Test 4: Payment Failure - Insufficient Funds${NC}" +echo "Testing with insufficient funds scenario..." +curl -X POST ${ORDER_URL} \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-004", + "customerId": "CUST-999", + "paymentMethod": "4222222222222222", + "items": [ + { + "itemId": "item-004", + "name": "Monitor", + "quantity": 2, + "price": 399.99 + } + ] + }' +echo -e "\n" + +echo -e "${GREEN}========================================${NC}" +echo -e "${GREEN}All tests completed!${NC}" +echo -e "${GREEN}Check your OpenTelemetry backend to view the traces${NC}" +echo -e "${GREEN}========================================${NC}" diff --git a/.aws-sam/build/InventoryServiceFunction/test-payloads.json b/.aws-sam/build/InventoryServiceFunction/test-payloads.json new file mode 100644 index 0000000..603d30c --- /dev/null +++ b/.aws-sam/build/InventoryServiceFunction/test-payloads.json @@ -0,0 +1,66 @@ +{ + "successfulOrder": { + "orderId": "ORD-001", + "customerId": "CUST-123", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + }, + { + "itemId": "item-002", + "name": "Mouse", + "quantity": 2, + "price": 29.99 + } + ] + }, + "outOfStockOrder": { + "orderId": "ORD-002", + "customerId": "CUST-456", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-003", + "name": "Keyboard", + "quantity": 1, + "price": 79.99 + }, + { + "itemId": "item-005", + "name": "Headphones", + "quantity": 1, + "price": 149.99 + } + ] + }, + "paymentFailureOrder": { + "orderId": "ORD-003", + "customerId": "CUST-789", + "paymentMethod": "4111111111111111", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + } + ] + }, + "insufficientFundsOrder": { + "orderId": "ORD-004", + "customerId": "CUST-999", + "paymentMethod": "4222222222222222", + "items": [ + { + "itemId": "item-004", + "name": "Monitor", + "quantity": 2, + "price": 399.99 + } + ] + } +} diff --git a/.aws-sam/build/OrderServiceFunction/.env.example b/.aws-sam/build/OrderServiceFunction/.env.example new file mode 100644 index 0000000..3f3aa74 --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/.env.example @@ -0,0 +1,7 @@ +# OpenTelemetry Configuration +OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318/v1/traces +ENVIRONMENT=development + +# Service URLs (for local testing) +INVENTORY_SERVICE_URL=http://localhost:3001/inventory +PAYMENT_SERVICE_URL=http://localhost:3002/payment diff --git a/.aws-sam/build/OrderServiceFunction/ADOT-LAYER-CONFIG.md b/.aws-sam/build/OrderServiceFunction/ADOT-LAYER-CONFIG.md new file mode 100644 index 0000000..cffddc9 --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/ADOT-LAYER-CONFIG.md @@ -0,0 +1,366 @@ +# AWS ADOT Lambda Layer Configuration Guide + +This document explains how to configure and use the AWS Distro for OpenTelemetry (ADOT) Lambda Layer for automatic instrumentation. + +## Overview + +The AWS ADOT Lambda Layer provides automatic OpenTelemetry instrumentation for Lambda functions without requiring you to bundle OpenTelemetry SDKs in your deployment package. + +## Layer ARNs by Region + +### Node.js 18.x ADOT Layer ARNs (Latest: v1-18-1) + +| Region | ARN | +|--------|-----| +| us-east-1 | `arn:aws:lambda:us-east-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| us-east-2 | `arn:aws:lambda:us-east-2:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| us-west-1 | `arn:aws:lambda:us-west-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| us-west-2 | `arn:aws:lambda:us-west-2:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| eu-west-1 | `arn:aws:lambda:eu-west-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| eu-central-1 | `arn:aws:lambda:eu-central-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| ap-southeast-1 | `arn:aws:lambda:ap-southeast-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| ap-northeast-1 | `arn:aws:lambda:ap-northeast-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | + +**Find the latest ARNs**: https://aws-otel.github.io/docs/getting-started/lambda/lambda-js + +## Required Environment Variables + +### Essential Configuration + +```yaml +Environment: + Variables: + # Enable ADOT auto-instrumentation wrapper + AWS_LAMBDA_EXEC_WRAPPER: /opt/otel-handler + + # Service name for identification in traces + OTEL_SERVICE_NAME: your-service-name + + # Sampling configuration (AlwaysOn for demo, adjust for production) + OTEL_TRACES_SAMPLER: AlwaysOn + + # Protocol for OTLP export + OTEL_EXPORTER_OTLP_PROTOCOL: http/protobuf + + # OpenTelemetry Collector endpoint (without /v1/traces) + OTEL_EXPORTER_OTLP_ENDPOINT: http://your-collector:4318 + + # Trace context propagation format + OTEL_PROPAGATORS: tracecontext +``` + +### Advanced Configuration Options + +```yaml +Environment: + Variables: + # Resource attributes (for environment, version, etc.) + OTEL_RESOURCE_ATTRIBUTES: deployment.environment=production,service.version=1.0.0 + + # Specific endpoint for traces (optional, overrides OTEL_EXPORTER_OTLP_ENDPOINT) + OTEL_EXPORTER_OTLP_TRACES_ENDPOINT: http://your-collector:4318/v1/traces + + # Enable/disable specific instrumentations + OTEL_INSTRUMENTATION_AWS_SDK_ENABLED: true + OTEL_INSTRUMENTATION_HTTP_ENABLED: true + + # Span attribute limits + OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT: 4095 + OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT: 128 +``` + +## Exporter Backends + +### 1. Jaeger (Local Development) + +```yaml +OTEL_EXPORTER_OTLP_ENDPOINT: http://localhost:4318 +OTEL_EXPORTER_OTLP_PROTOCOL: http/protobuf +``` + +Run Jaeger with OTLP support: +```bash +docker run -d --name jaeger \ + -e COLLECTOR_OTLP_ENABLED=true \ + -p 16686:16686 \ + -p 4318:4318 \ + jaegertracing/all-in-one:latest +``` + +### 2. AWS X-Ray (with ADOT Collector) + +If you want to send traces to AWS X-Ray, deploy an ADOT Collector in your VPC: + +```yaml +OTEL_EXPORTER_OTLP_ENDPOINT: http://adot-collector:4318 +OTEL_PROPAGATORS: tracecontext,xray +``` + +ADOT Collector configuration: +```yaml +receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 + +exporters: + awsxray: + region: us-east-1 + +service: + pipelines: + traces: + receivers: [otlp] + exporters: [awsxray] +``` + +### 3. Grafana Cloud / Honeycomb / DataDog + +```yaml +OTEL_EXPORTER_OTLP_ENDPOINT: https://your-saas-endpoint +OTEL_EXPORTER_OTLP_HEADERS: Authorization=Bearer your-api-key +``` + +### 4. Self-Hosted OpenTelemetry Collector + +Deploy a collector in your VPC or use Lambda Extension: + +```yaml +OTEL_EXPORTER_OTLP_ENDPOINT: http://collector:4318 +``` + +## How Auto-Instrumentation Works + +### 1. Wrapper Execution + +The `AWS_LAMBDA_EXEC_WRAPPER` environment variable points to `/opt/otel-handler`, which: +- Initializes the OpenTelemetry SDK before your handler +- Automatically instruments common libraries (http, https, aws-sdk, etc.) +- Creates a root span for each Lambda invocation +- Propagates trace context from incoming requests + +### 2. Automatic Instrumentation + +The layer automatically instruments: +- ✅ AWS SDK calls +- ✅ HTTP/HTTPS requests (axios, node-fetch, http, https) +- ✅ Lambda invocation (creates root span) +- ✅ Downstream service calls with trace context propagation + +### 3. Context Propagation + +The layer automatically: +- Extracts W3C trace context from incoming API Gateway requests +- Injects trace context into outgoing HTTP requests +- Maintains trace context across async operations + +## Usage in Your Code + +### Basic Usage (Automatic) + +With the layer configured, your Lambda function is automatically instrumented: + +```javascript +exports.handler = async (event) => { + // Automatically traced! + const response = await axios.get('https://api.example.com'); + return { statusCode: 200, body: JSON.stringify(response.data) }; +}; +``` + +### Adding Custom Spans + +For additional business logic spans: + +```javascript +const api = require('@opentelemetry/api'); + +exports.handler = async (event) => { + const tracer = api.trace.getTracer('my-service', '1.0.0'); + + return tracer.startActiveSpan('business-operation', async (span) => { + try { + span.setAttribute('customer.id', event.customerId); + + // Your business logic + const result = await processOrder(event); + + span.setStatus({ code: api.SpanStatusCode.OK }); + return result; + } catch (error) { + span.recordException(error); + span.setStatus({ code: api.SpanStatusCode.ERROR }); + throw error; + } finally { + span.end(); + } + }); +}; +``` + +### Adding Attributes to Current Span + +```javascript +const api = require('@opentelemetry/api'); + +exports.handler = async (event) => { + const span = api.trace.getActiveSpan(); + if (span) { + span.setAttribute('order.id', event.orderId); + span.setAttribute('order.amount', event.amount); + } + + // Your handler logic +}; +``` + +## Trace Context Propagation + +### Automatic (HTTP Clients) + +The layer automatically propagates context for instrumented HTTP clients: + +```javascript +const axios = require('axios'); + +// Trace context is automatically added to headers! +const response = await axios.post('https://api.example.com/payment', data); +``` + +### Manual (Custom Clients) + +For custom HTTP clients or non-instrumented libraries: + +```javascript +const api = require('@opentelemetry/api'); + +function getTraceHeaders() { + const headers = {}; + const span = api.trace.getActiveSpan(); + + if (span) { + const spanContext = span.spanContext(); + headers.traceparent = `00-${spanContext.traceId}-${spanContext.spanId}-01`; + } + + return headers; +} + +// Use in your custom client +const headers = { ...getTraceHeaders(), 'Content-Type': 'application/json' }; +``` + +## Sampling + +### AlwaysOn (Development/Demo) + +```yaml +OTEL_TRACES_SAMPLER: AlwaysOn +``` + +Traces every request. Good for development but expensive in production. + +### TraceIdRatioBased (Production) + +```yaml +OTEL_TRACES_SAMPLER: TraceIdRatioBased +OTEL_TRACES_SAMPLER_ARG: 0.1 # 10% sampling +``` + +Samples a percentage of requests to reduce costs. + +### ParentBased (Recommended) + +```yaml +OTEL_TRACES_SAMPLER: ParentBased_TraceIdRatioBased +OTEL_TRACES_SAMPLER_ARG: 0.1 +``` + +Respects parent sampling decisions while applying ratio-based sampling to root spans. + +## Performance Considerations + +### Cold Start Impact + +The ADOT layer adds ~200-300ms to cold start time: +- Layer initialization: ~100ms +- Auto-instrumentation setup: ~100-200ms + +### Runtime Overhead + +- Minimal overhead during warm execution (<5ms per invocation) +- Async span export doesn't block Lambda execution +- Batching reduces network calls + +### Optimization Tips + +1. **Use provisioned concurrency** for latency-sensitive functions +2. **Adjust span limits** to reduce memory usage: + ```yaml + OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT: 32 + OTEL_SPAN_EVENT_COUNT_LIMIT: 32 + ``` +3. **Use sampling** in high-volume environments +4. **Disable unused instrumentations**: + ```yaml + OTEL_INSTRUMENTATION_AWS_LAMBDA_ENABLED: true + OTEL_INSTRUMENTATION_HTTP_ENABLED: true + OTEL_INSTRUMENTATION_AWS_SDK_ENABLED: false + ``` + +## Troubleshooting + +### No Traces Appearing + +1. Check CloudWatch Logs for ADOT errors: + ``` + grep "otel" /aws/lambda/your-function + ``` + +2. Verify environment variables: + ```bash + aws lambda get-function-configuration --function-name your-function \ + --query 'Environment.Variables' + ``` + +3. Test collector endpoint connectivity: + - Ensure Lambda has network access to the collector + - Check security groups and NACLs + - Verify collector is accepting OTLP HTTP on port 4318 + +### Wrapper Not Running + +Error: `AWS_LAMBDA_EXEC_WRAPPER is set but the wrapper does not exist` + +**Solution**: Verify the layer ARN is correct and matches your region. + +### Trace Context Not Propagating + +1. Ensure `OTEL_PROPAGATORS=tracecontext` is set +2. Check if HTTP library is supported (axios, node-fetch, http, https) +3. For unsupported libraries, manually inject headers + +### High Cold Start Times + +1. Consider using Lambda SnapStart (if available) +2. Use provisioned concurrency +3. Minimize layer count (combine layers if possible) +4. Profile and optimize your application code + +## Best Practices + +1. **Set meaningful service names**: Use descriptive names that reflect the business function +2. **Add business context**: Include order IDs, customer IDs, and other relevant attributes +3. **Handle errors properly**: Record exceptions and set error status on spans +4. **Use semantic conventions**: Follow OpenTelemetry semantic conventions for consistency +5. **Monitor collector health**: Ensure your collector is performant and highly available +6. **Set appropriate sampling**: Balance cost with observability needs +7. **Use tags for filtering**: Add environment, version, and region as resource attributes + +## References + +- AWS ADOT Lambda: https://aws-otel.github.io/docs/getting-started/lambda +- OpenTelemetry Lambda Instrumentation: https://opentelemetry.io/docs/platforms/faas/lambda-auto-instrument/ +- W3C Trace Context: https://www.w3.org/TR/trace-context/ +- OpenTelemetry Semantic Conventions: https://opentelemetry.io/docs/specs/semconv/ diff --git a/.aws-sam/build/OrderServiceFunction/ARCHITECTURE.md b/.aws-sam/build/OrderServiceFunction/ARCHITECTURE.md new file mode 100644 index 0000000..ff7e7c2 --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/ARCHITECTURE.md @@ -0,0 +1,366 @@ +# Architecture Details + +## Service Flow + +### 1. Successful Order Flow + +``` +Client + │ + │ POST /order + ├─────────────────────────────────────────┐ + │ │ + ▼ │ +Order Service │ + │ │ + │ 1. Validate request │ + │ 2. Create root span │ + │ │ + │ POST /inventory (with trace context) │ + ├─────────────────────────────┐ │ + │ │ │ + ▼ │ │ +Inventory Service │ │ + │ │ │ + │ 1. Extract trace context │ │ + │ 2. Create child span │ │ + │ 3. Check stock levels │ │ + │ 4. Return availability │ │ + │ │ │ + └─────────────────────────────┤ │ + │ Response: available=true │ │ + ▼ │ │ +Order Service │ │ + │ │ │ + │ POST /payment (with trace context) │ + ├─────────────────────────────┐ │ + │ │ │ + ▼ │ │ +Payment Service │ │ + │ │ │ + │ 1. Extract trace context │ │ + │ 2. Create child span │ │ + │ 3. Process payment │ │ + │ 4. Return result │ │ + │ │ │ + └─────────────────────────────┤ │ + │ Response: success=true │ │ + ▼ │ │ +Order Service │ │ + │ │ │ + │ Complete order processing │ │ + │ │ │ + └─────────────────────────────┤ │ + │ Response: 200 OK │ │ + ▼ │ │ +Client ◄──────────────────────────────────────┘ +``` + +### 2. Out of Stock Flow + +``` +Client → Order Service → Inventory Service + (Out of stock detected) + ← + Order Service + (Skip payment, return 409) + ← +Client +``` + +### 3. Payment Failure Flow + +``` +Client → Order Service → Inventory Service + (Items available) + ← + Order Service → Payment Service + (Payment failed) + ← + Order Service + (Return 402) + ← +Client +``` + +## Trace Structure + +### Trace Hierarchy + +``` +Trace ID: 00000000000000000000000000000001 +│ +└─ Span: process-order (Order Service) + │ Duration: 245ms + │ Attributes: + │ - order.id: ORD-001 + │ - customer.id: CUST-123 + │ - order.items.count: 2 + │ + ├─ Span: check-inventory (Order Service) + │ │ Duration: 15ms + │ │ Attributes: + │ │ - service: inventory + │ │ - order.id: ORD-001 + │ │ + │ └─ Span: HTTP POST (Auto-instrumented) + │ │ Duration: 12ms + │ │ + │ └─ Span: check-inventory (Inventory Service) + │ Duration: 8ms + │ Attributes: + │ - inventory.available: true + │ - inventory.items.count: 2 + │ + └─ Span: process-payment (Order Service) + │ Duration: 180ms + │ Attributes: + │ - service: payment + │ - order.id: ORD-001 + │ - payment.method: credit-card-5555 + │ - payment.amount: 1059.97 + │ + └─ Span: HTTP POST (Auto-instrumented) + │ Duration: 175ms + │ + └─ Span: process-payment (Payment Service) + Duration: 170ms + Attributes: + - payment.success: true + - payment.transaction_id: txn-1234567890-abc123 +``` + +## OpenTelemetry Components + +### 1. Tracer Provider + +Manages the lifecycle of tracers and span processors. + +```javascript +const provider = new NodeTracerProvider({ + resource: resource, +}); +``` + +### 2. OTLP Exporter + +Exports spans to OpenTelemetry Collector using HTTP. + +```javascript +const exporter = new OTLPTraceExporter({ + url: 'http://localhost:4318/v1/traces', +}); +``` + +### 3. Batch Span Processor + +Batches spans before export for efficiency. + +```javascript +provider.addSpanProcessor(new BatchSpanProcessor(exporter)); +``` + +### 4. Auto Instrumentations + +Automatically instruments common libraries. + +```javascript +registerInstrumentations({ + instrumentations: [ + getNodeAutoInstrumentations({ + '@opentelemetry/instrumentation-http': { enabled: true }, + '@opentelemetry/instrumentation-https': { enabled: true }, + }), + ], +}); +``` + +## Trace Context Propagation + +### W3C Trace Context Format + +``` +traceparent: 00--- + +Example: +traceparent: 00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01 + │ │ │ │ + │ └─ Trace ID (32 hex chars) │ └─ Flags + │ └─ Parent Span ID (16 hex chars) + └─ Version (00) +``` + +### Context Injection + +When Order Service calls Inventory Service: + +```javascript +// Order Service +const headers = injectTraceContext(); +// headers = { traceparent: '00-...-...-01' } + +axios.post(inventoryUrl, data, { headers }); +``` + +### Context Extraction + +When Inventory Service receives request: + +```javascript +// Inventory Service +const traceContext = extractTraceContext(event); +// Continue the trace with the same trace ID +``` + +## Span Attributes + +### Semantic Conventions + +Following OpenTelemetry semantic conventions: + +| Attribute | Description | Example | +|-----------|-------------|---------| +| `service.name` | Service identifier | `order-service` | +| `service.version` | Service version | `1.0.0` | +| `deployment.environment` | Environment | `production` | +| `order.id` | Order identifier | `ORD-001` | +| `customer.id` | Customer identifier | `CUST-123` | +| `payment.amount` | Payment amount | `1059.97` | +| `payment.method` | Payment method | `credit-card-5555` | +| `inventory.available` | Availability flag | `true` | +| `http.method` | HTTP method | `POST` | +| `http.status_code` | HTTP status | `200` | + +## Error Handling + +### Error Span Status + +```javascript +span.setStatus({ + code: api.SpanStatusCode.ERROR, + message: error.message, +}); +span.recordException(error); +``` + +### Business Logic Errors vs Technical Errors + +- **Business Logic Errors** (out of stock, payment declined): + - Span status: OK + - Custom attributes indicate business failure + - Example: `inventory.available: false` + +- **Technical Errors** (network failure, invalid input): + - Span status: ERROR + - Exception recorded + - Stack trace captured + +## Performance Considerations + +### Batch Processing + +Spans are batched before export to reduce network overhead: + +```javascript +new BatchSpanProcessor(exporter, { + maxQueueSize: 2048, + maxExportBatchSize: 512, + scheduledDelayMillis: 5000, +}) +``` + +### Sampling + +For high-volume systems, implement sampling: + +```javascript +const sampler = new TraceIdRatioBasedSampler(0.1); // 10% sampling +const provider = new NodeTracerProvider({ + resource: resource, + sampler: sampler, +}); +``` + +### Async Export + +Span export is asynchronous and doesn't block Lambda execution. + +## Security + +### Securing OTLP Endpoint + +```javascript +const exporter = new OTLPTraceExporter({ + url: process.env.OTEL_EXPORTER_OTLP_ENDPOINT, + headers: { + 'Authorization': `Bearer ${process.env.OTEL_API_KEY}`, + }, +}); +``` + +### Data Privacy + +- Avoid capturing sensitive data in spans +- Redact PII from span attributes +- Use attribute filtering in the collector + +## Monitoring and Alerting + +### Key Metrics to Monitor + +1. **Trace Completeness** + - Percentage of complete traces + - Missing spans + +2. **Error Rates** + - Spans with ERROR status + - Service-specific error rates + +3. **Latency** + - P50, P95, P99 latencies + - Per-service latency breakdown + +4. **Business Metrics** + - Order success rate + - Payment failure rate + - Inventory unavailability rate + +### Example Queries (Jaeger) + +``` +# Find all failed orders +service="order-service" AND error=true + +# Find orders with payment failures +service="payment-service" AND payment.success=false + +# Find slow orders (> 1s) +service="order-service" AND duration>1000ms + +# Find out-of-stock scenarios +service="inventory-service" AND inventory.available=false +``` + +## Troubleshooting + +### Common Issues + +1. **Traces not appearing** + - Check OTLP endpoint connectivity + - Verify Lambda has network access + - Check CloudWatch logs for export errors + +2. **Broken traces** + - Verify trace context propagation + - Check HTTP header forwarding + - Ensure consistent trace ID format + +3. **High latency** + - Review span processor configuration + - Check collector performance + - Consider async export optimization + +4. **Missing spans** + - Verify auto-instrumentation is active + - Check for exceptions during span creation + - Review sampling configuration diff --git a/.aws-sam/build/OrderServiceFunction/README.md b/.aws-sam/build/OrderServiceFunction/README.md new file mode 100644 index 0000000..aff152d --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/README.md @@ -0,0 +1,476 @@ +# Lambda OpenTelemetry Demo + +A comprehensive AWS Lambda project demonstrating how to integrate OpenTelemetry for end-to-end distributed tracing across multiple microservices **without using AWS CloudWatch or X-Ray**, using the **AWS Distro for OpenTelemetry (ADOT) Lambda Layer** for automatic instrumentation. + +## 🎯 Overview + +This project implements a realistic e-commerce order processing workflow using three Lambda functions representing distinct microservices: + +1. **Order Service** - Orchestrates the order processing workflow +2. **Inventory Service** - Validates item availability +3. **Payment Service** - Processes payments + +The services communicate with each other while propagating trace context using OpenTelemetry, enabling complete visibility into request flows across all services. + +## 🏗️ Architecture + +``` +┌─────────────────┐ +│ Order Service │ +│ (Lambda) │ +└────────┬────────┘ + │ + ├──────────────────┐ + │ │ + ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ +│ Inventory Svc │ │ Payment Svc │ +│ (Lambda) │ │ (Lambda) │ +└─────────────────┘ └─────────────────┘ + │ │ + └──────┬───────────┘ + ▼ + OpenTelemetry + Collector + │ + ▼ + Tracing Backend + (Jaeger/Zipkin/etc) +``` + +## ✨ Features + +- ✅ **AWS ADOT Lambda Layer** - Uses AWS Distro for OpenTelemetry Lambda Layer for zero-code instrumentation +- ✅ **Automatic Instrumentation** - Auto-instruments Lambda, HTTP/HTTPS, and AWS SDK without code changes +- ✅ **Distributed Tracing** - End-to-end trace propagation across Lambda functions +- ✅ **W3C Trace Context** - Standard trace context propagation using W3C format +- ✅ **OTLP Export** - Exports traces using OTLP HTTP protocol +- ✅ **Custom Spans** - Manual span creation for business logic insights +- ✅ **No CloudWatch/X-Ray** - Direct export to any OpenTelemetry-compatible backend +- ✅ **Error Scenarios** - Built-in test scenarios for: + - Out of stock items + - Payment failures (card declined, insufficient funds) + - Network errors + +## 📋 Prerequisites + +- Node.js 18.x or later +- AWS CLI configured with appropriate credentials +- AWS SAM CLI for deployment +- OpenTelemetry Collector or compatible backend (Jaeger, Zipkin, etc.) + +## 🚀 Quick Start + +### 1. Install Dependencies + +```bash +npm install +``` + +### 2. Set Up OpenTelemetry Backend + +You need an OpenTelemetry-compatible backend to receive and visualize traces. Here are some options: + +#### Option A: Jaeger (Recommended for local testing) + +```bash +# Run Jaeger all-in-one with Docker (or use docker-compose) +docker-compose up -d jaeger + +# Or manually: +docker run -d --name jaeger \ + -e COLLECTOR_OTLP_ENABLED=true \ + -p 16686:16686 \ + -p 4318:4318 \ + jaegertracing/all-in-one:latest + +# Access Jaeger UI at http://localhost:16686 +``` + +#### Option B: Zipkin + +```bash +# Run Zipkin with Docker +docker run -d --name zipkin \ + -p 9411:9411 \ + openzipkin/zipkin:latest + +# Note: You'll need an OpenTelemetry Collector to convert OTLP to Zipkin format +``` + +#### Option C: Grafana Cloud, Honeycomb, or other SaaS providers + +Configure the OTEL_EXPORTER_OTLP_ENDPOINT parameter with your provider's endpoint. + +### 3. Update Lambda Layer ARN + +**Important**: Update the `AdotLayerArn` parameter in `template.yaml` with the correct ARN for your AWS region. + +Find the latest ARN for your region here: +- https://aws-otel.github.io/docs/getting-started/lambda/lambda-js + +Example ARNs: +- **us-east-1**: `arn:aws:lambda:us-east-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` +- **us-west-2**: `arn:aws:lambda:us-west-2:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` +- **eu-west-1**: `arn:aws:lambda:eu-west-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` + +See [ADOT-LAYER-CONFIG.md](./ADOT-LAYER-CONFIG.md) for complete list. + +### 4. Deploy to AWS + +```bash +# Build the Lambda functions +sam build + +# Deploy (follow the prompts) +sam deploy --guided + +# Note the API endpoint from the outputs +``` + +During `sam deploy --guided`, you'll be prompted to provide: +- **Stack name** (e.g., `lambda-otel-demo`) +- **AWS Region** (e.g., `us-east-1`) +- **OtelCollectorEndpoint** - Your OpenTelemetry Collector endpoint without `/v1/traces` (e.g., `http://your-collector:4318`) +- **Environment** - Deployment environment (e.g., `production`, `staging`) +- **AdotLayerArn** - AWS ADOT Lambda Layer ARN for your region +- Confirm changes before deployment + +**Note**: If you're using a collector in a VPC, ensure your Lambda functions have VPC access configured. + +### 5. Test the Services + +Use the provided test script: + +```bash +# Replace with your actual API endpoint from SAM deploy output +./test-api.sh https://your-api-id.execute-api.region.amazonaws.com/Prod +``` + +Or manually test with curl: + +```bash +# Successful order +curl -X POST https://your-api-endpoint/Prod/order \ + -H "Content-Type: application/json" \ + -d @test-payloads.json +``` + +### 6. View Traces + +Open your tracing backend UI: +- Jaeger: http://localhost:16686 +- Select service: `order-service` +- Click "Find Traces" to view the end-to-end traces + +## 🔧 AWS ADOT Lambda Layer + +This project uses the **AWS Distro for OpenTelemetry (ADOT) Lambda Layer** for automatic instrumentation. The layer provides: + +### Key Benefits + +1. **Zero-Code Instrumentation** - Auto-instruments your Lambda function without code changes +2. **No Dependency Bundling** - OpenTelemetry SDKs are provided by the layer, reducing deployment package size +3. **Automatic Context Propagation** - Trace context is automatically propagated across service calls +4. **AWS-Optimized** - Maintained and supported by AWS with regular updates + +### How It Works + +The layer works through the `AWS_LAMBDA_EXEC_WRAPPER` environment variable: + +```yaml +Environment: + Variables: + AWS_LAMBDA_EXEC_WRAPPER: /opt/otel-handler # Enables auto-instrumentation + OTEL_SERVICE_NAME: order-service # Service identifier + OTEL_TRACES_SAMPLER: AlwaysOn # Sampling strategy + OTEL_EXPORTER_OTLP_ENDPOINT: http://collector:4318 +``` + +When your Lambda function is invoked: +1. The wrapper initializes OpenTelemetry SDK +2. Auto-instrumentation is activated for HTTP, AWS SDK, and Lambda runtime +3. A root span is created for the Lambda invocation +4. Your handler executes within the trace context +5. Spans are exported to your configured OTLP endpoint + +### What Gets Instrumented Automatically + +- ✅ Lambda function invocations +- ✅ HTTP/HTTPS requests (axios, node-fetch, native http/https) +- ✅ AWS SDK v2 and v3 calls +- ✅ Database clients (when using instrumented libraries) +- ✅ Trace context propagation in headers + +### Adding Custom Instrumentation + +While the layer handles most instrumentation automatically, you can add custom spans for business logic: + +```javascript +const api = require('@opentelemetry/api'); + +exports.handler = async (event) => { + const tracer = api.trace.getTracer('my-service'); + + return tracer.startActiveSpan('custom-operation', async (span) => { + span.setAttribute('business.attribute', 'value'); + // Your logic here + span.end(); + }); +}; +``` + +For detailed configuration options, see [ADOT-LAYER-CONFIG.md](./ADOT-LAYER-CONFIG.md). + +## 📝 Test Scenarios + +### Scenario 1: Successful Order ✅ + +Tests the happy path where all services succeed. + +```json +{ + "orderId": "ORD-001", + "customerId": "CUST-123", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + } + ] +} +``` + +**Expected Trace:** +- Order Service → Inventory Service (Success) +- Order Service → Payment Service (Success) +- Order completed with HTTP 200 + +### Scenario 2: Out of Stock ⚠️ + +Tests inventory unavailability. + +```json +{ + "orderId": "ORD-002", + "customerId": "CUST-456", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-003", + "name": "Keyboard", + "quantity": 1, + "price": 79.99 + } + ] +} +``` + +**Expected Trace:** +- Order Service → Inventory Service (Out of stock detected) +- Payment Service NOT called +- Order fails with HTTP 409 + +**Out of Stock Items:** +- `item-003` - Keyboard (0 in stock) +- `item-005` - Headphones (0 in stock) + +### Scenario 3: Payment Failure 💳 + +Tests payment processing failures. + +**Card Declined:** +```json +{ + "orderId": "ORD-003", + "customerId": "CUST-789", + "paymentMethod": "4111111111111111", + "items": [{"itemId": "item-001", "quantity": 1, "price": 999.99}] +} +``` + +**Insufficient Funds:** +```json +{ + "orderId": "ORD-004", + "customerId": "CUST-999", + "paymentMethod": "4222222222222222", + "items": [{"itemId": "item-004", "quantity": 2, "price": 399.99}] +} +``` + +**Test Payment Methods:** +- `4111111111111111` - Card declined +- `4222222222222222` - Insufficient funds +- `4333333333333333` - Expired card +- Any other value - Success (with 10% random failure rate) + +**Expected Trace:** +- Order Service → Inventory Service (Success) +- Order Service → Payment Service (Payment fails) +- Order fails with HTTP 402 + +## 🔍 Understanding the Traces + +Each trace will show: + +1. **Span Attributes:** + - Order ID, Customer ID + - Item counts and details + - Payment amounts and methods + - Service names and versions + - Success/failure reasons + +2. **Span Hierarchy:** + ``` + process-order (Order Service) + ├── check-inventory (Order Service -> Inventory Service) + │ └── check-inventory (Inventory Service) + └── process-payment (Order Service -> Payment Service) + └── process-payment (Payment Service) + ``` + +3. **Trace Context Propagation:** + - Trace IDs are consistent across all services + - Parent-child relationships are maintained + - W3C traceparent headers are used + +## 🛠️ Configuration + +### Environment Variables + +Configure these in the SAM template or Lambda console: + +- `OTEL_EXPORTER_OTLP_ENDPOINT` - OpenTelemetry Collector endpoint (default: `http://localhost:4318/v1/traces`) +- `ENVIRONMENT` - Deployment environment (default: `development`) +- `INVENTORY_SERVICE_URL` - Inventory service endpoint +- `PAYMENT_SERVICE_URL` - Payment service endpoint + +### Inventory Configuration + +Modify the mock inventory in `src/inventory-service/index.js`: + +```javascript +const inventory = { + 'item-001': { name: 'Laptop', quantity: 10 }, + 'item-002': { name: 'Mouse', quantity: 50 }, + 'item-003': { name: 'Keyboard', quantity: 0 }, // Out of stock + // Add more items... +}; +``` + +### Payment Configuration + +Modify failure patterns in `src/payment-service/index.js`: + +```javascript +const FAILURE_PATTERNS = { + 'card-declined': ['4111111111111111', 'card-declined'], + 'insufficient-funds': ['4222222222222222', 'insufficient-funds'], + // Add more patterns... +}; +``` + +## 📂 Project Structure + +``` +lambda-opentelemetry-demo/ +├── src/ +│ ├── order-service/ # Order orchestration service +│ │ └── index.js +│ ├── inventory-service/ # Inventory check service +│ │ └── index.js +│ ├── payment-service/ # Payment processing service +│ │ └── index.js +│ └── shared/ # Shared utilities +│ ├── tracer.js # OpenTelemetry setup +│ └── utils.js # Common utilities +├── template.yaml # AWS SAM template +├── package.json # Node.js dependencies +├── test-payloads.json # Sample test data +├── test-api.sh # API test script +└── README.md # This file +``` + +## 🔧 Local Development + +To test locally without deploying to AWS: + +1. Start your OpenTelemetry backend (e.g., Jaeger) +2. Use AWS SAM Local: + +```bash +sam build +sam local start-api --env-vars env.json +``` + +Create `env.json`: +```json +{ + "Parameters": { + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "INVENTORY_SERVICE_URL": "http://host.docker.internal:3000/inventory", + "PAYMENT_SERVICE_URL": "http://host.docker.internal:3000/payment" + } +} +``` + +## 📊 Observability Best Practices + +This demo demonstrates several observability best practices: + +1. **Structured Logging** - Logs include trace context for correlation +2. **Semantic Attributes** - Meaningful span attributes for filtering and analysis +3. **Error Handling** - Errors are captured as span events +4. **Business Context** - Business-relevant data in spans (order IDs, amounts, etc.) +5. **Service Naming** - Clear service names for easy identification +6. **Context Propagation** - W3C standard trace context across service boundaries + +## 🔐 Security Considerations + +- This is a demo project - do not use in production without proper security hardening +- Implement proper authentication/authorization for API endpoints +- Secure OpenTelemetry Collector endpoints +- Use AWS Secrets Manager for sensitive configuration +- Enable API Gateway throttling and request validation + +## 🤝 Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. + +## 📄 License + +MIT License - See LICENSE file for details + +## 🎓 Learning Resources + +- [OpenTelemetry Documentation](https://opentelemetry.io/docs/) +- [AWS Lambda Best Practices](https://docs.aws.amazon.com/lambda/latest/dg/best-practices.html) +- [W3C Trace Context](https://www.w3.org/TR/trace-context/) +- [Distributed Tracing Guide](https://opentelemetry.io/docs/concepts/signals/traces/) + +## 🐛 Troubleshooting + +### Traces not appearing in backend + +- Verify OTEL_EXPORTER_OTLP_ENDPOINT is correctly configured +- Check Lambda logs in CloudWatch for errors +- Ensure OpenTelemetry Collector/backend is running and accessible +- Verify network connectivity between Lambda and collector + +### Service-to-service calls failing + +- Check that service URLs are correctly configured +- Verify API Gateway endpoints are deployed +- Review Lambda function logs for errors +- Check IAM permissions if using private endpoints + +### High latency or timeouts + +- Increase Lambda timeout in template.yaml +- Check OpenTelemetry Collector performance +- Consider using asynchronous export +- Review batch span processor configuration diff --git a/.aws-sam/build/OrderServiceFunction/docker-compose.yaml b/.aws-sam/build/OrderServiceFunction/docker-compose.yaml new file mode 100644 index 0000000..7f969f4 --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/docker-compose.yaml @@ -0,0 +1,37 @@ +version: '3.8' + +services: + # Jaeger all-in-one with OTLP support + jaeger: + image: jaegertracing/all-in-one:latest + container_name: jaeger-otel + environment: + - COLLECTOR_OTLP_ENABLED=true + - LOG_LEVEL=debug + ports: + - "16686:16686" # Jaeger UI + - "4318:4318" # OTLP HTTP receiver + - "4317:4317" # OTLP gRPC receiver + - "14268:14268" # Jaeger collector HTTP + - "14250:14250" # Jaeger collector gRPC + networks: + - otel-demo + + # Optional: OpenTelemetry Collector (if you want to use a separate collector) + # otel-collector: + # image: otel/opentelemetry-collector-contrib:latest + # container_name: otel-collector + # command: ["--config=/etc/otel-collector-config.yaml"] + # volumes: + # - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml + # ports: + # - "4318:4318" # OTLP HTTP receiver + # - "4317:4317" # OTLP gRPC receiver + # - "8888:8888" # Prometheus metrics + # - "13133:13133" # Health check + # networks: + # - otel-demo + +networks: + otel-demo: + driver: bridge diff --git a/.aws-sam/build/OrderServiceFunction/env.json b/.aws-sam/build/OrderServiceFunction/env.json new file mode 100644 index 0000000..67b70e4 --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/env.json @@ -0,0 +1,16 @@ +{ + "OrderServiceFunction": { + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "ENVIRONMENT": "local", + "INVENTORY_SERVICE_URL": "http://127.0.0.1:3001/inventory", + "PAYMENT_SERVICE_URL": "http://127.0.0.1:3002/payment" + }, + "InventoryServiceFunction": { + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "ENVIRONMENT": "local" + }, + "PaymentServiceFunction": { + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "ENVIRONMENT": "local" + } +} diff --git a/.aws-sam/build/OrderServiceFunction/local-test.sh b/.aws-sam/build/OrderServiceFunction/local-test.sh new file mode 100755 index 0000000..8b43c30 --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/local-test.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Simple local testing script using SAM Local +# This script starts SAM local API and runs tests + +set -e + +echo "Starting Lambda OpenTelemetry Demo Local Test" +echo "==============================================" +echo "" + +# Check if Docker is running +if ! docker info > /dev/null 2>&1; then + echo "Error: Docker is not running. Please start Docker first." + exit 1 +fi + +# Check if Jaeger is running +if ! docker ps | grep -q jaeger-otel; then + echo "Starting Jaeger..." + docker-compose up -d jaeger + echo "Waiting for Jaeger to be ready..." + sleep 5 +else + echo "Jaeger is already running" +fi + +echo "" +echo "Jaeger UI available at: http://localhost:16686" +echo "" + +# Build the Lambda functions +echo "Building Lambda functions..." +sam build + +if [ $? -ne 0 ]; then + echo "Error: Build failed" + exit 1 +fi + +echo "" +echo "Lambda functions built successfully!" +echo "" +echo "To test the API:" +echo "1. Start SAM local API in one terminal:" +echo " sam local start-api --env-vars env.json" +echo "" +echo "2. In another terminal, run the test script:" +echo " ./test-api.sh http://127.0.0.1:3000" +echo "" +echo "3. View traces in Jaeger UI:" +echo " http://localhost:16686" +echo "" diff --git a/.aws-sam/build/OrderServiceFunction/package-lock.json b/.aws-sam/build/OrderServiceFunction/package-lock.json new file mode 100644 index 0000000..eeb2106 --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/package-lock.json @@ -0,0 +1,295 @@ +{ + "name": "lambda-opentelemetry-demo", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "lambda-opentelemetry-demo", + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "axios": "^1.13.5" + } + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, + "node_modules/axios": { + "version": "1.13.5", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.5.tgz", + "integrity": "sha512-cz4ur7Vb0xS4/KUN0tPWe44eqxrIu31me+fbang3ijiNscE129POzipJJA6zniq2C/Z6sJCjMimjS8Lc/GAs8Q==", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.15.11", + "form-data": "^4.0.5", + "proxy-from-env": "^1.1.0" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/follow-redirects": { + "version": "1.15.11", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", + "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", + "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "license": "MIT" + } + } +} diff --git a/.aws-sam/build/OrderServiceFunction/package.json b/.aws-sam/build/OrderServiceFunction/package.json new file mode 100644 index 0000000..abe61a7 --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/package.json @@ -0,0 +1,23 @@ +{ + "name": "lambda-opentelemetry-demo", + "version": "1.0.0", + "description": "Sample AWS Lambda project demonstrating OpenTelemetry integration without CloudWatch/X-Ray", + "main": "index.js", + "scripts": { + "test": "echo \"No tests specified\" && exit 0", + "deploy": "sam deploy --guided", + "build": "sam build" + }, + "keywords": [ + "aws", + "lambda", + "opentelemetry", + "tracing", + "nodejs" + ], + "author": "", + "license": "MIT", + "dependencies": { + "axios": "^1.13.5" + } +} diff --git a/.aws-sam/build/OrderServiceFunction/src/inventory-service/index.js b/.aws-sam/build/OrderServiceFunction/src/inventory-service/index.js new file mode 100644 index 0000000..7cb0b00 --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/src/inventory-service/index.js @@ -0,0 +1,143 @@ +const { withSpan, addSpanAttributes } = require('../shared/tracer'); +const { createResponse } = require('../shared/utils'); + +// Mock inventory database +const inventory = { + 'item-001': { name: 'Laptop', quantity: 10 }, + 'item-002': { name: 'Mouse', quantity: 50 }, + 'item-003': { name: 'Keyboard', quantity: 0 }, // Out of stock + 'item-004': { name: 'Monitor', quantity: 5 }, + 'item-005': { name: 'Headphones', quantity: 0 }, // Out of stock +}; + +/** + * Inventory Service Lambda Handler + * This service checks if requested items are available in inventory + * OpenTelemetry instrumentation is automatically provided by the Lambda Layer + */ +exports.handler = async (event) => { + console.log('Inventory Service received event:', JSON.stringify(event)); + + return withSpan('check-inventory', async (span) => { + try { + // Parse request body + const body = typeof event.body === 'string' ? JSON.parse(event.body) : event.body; + const { orderId, items } = body; + + // Validate input + if (!orderId || !items || !Array.isArray(items)) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Missing or invalid required fields', + }); + return createResponse(400, { + available: false, + error: 'Missing or invalid required fields: orderId, items (array)', + }); + } + + addSpanAttributes({ + 'order.id': orderId, + 'inventory.items.count': items.length, + }); + + console.log(`Checking inventory for order ${orderId}`); + + // Check each item + const unavailableItems = []; + let allAvailable = true; + + for (const item of items) { + const { itemId, quantity } = item; + + if (!itemId || !quantity) { + continue; + } + + addSpanAttributes({ + [`inventory.item.${itemId}.requested`]: quantity, + }); + + // Check if item exists in inventory + if (!inventory[itemId]) { + console.log(`Item ${itemId} not found in inventory`); + unavailableItems.push({ + itemId, + reason: 'Item not found', + }); + allAvailable = false; + addSpanAttributes({ + [`inventory.item.${itemId}.status`]: 'not-found', + }); + continue; + } + + const availableQuantity = inventory[itemId].quantity; + addSpanAttributes({ + [`inventory.item.${itemId}.available`]: availableQuantity, + }); + + // Check if sufficient quantity is available + if (availableQuantity < quantity) { + console.log(`Insufficient quantity for item ${itemId}. Requested: ${quantity}, Available: ${availableQuantity}`); + unavailableItems.push({ + itemId, + name: inventory[itemId].name, + requestedQuantity: quantity, + availableQuantity: availableQuantity, + reason: availableQuantity === 0 ? 'Out of stock' : 'Insufficient quantity', + }); + allAvailable = false; + addSpanAttributes({ + [`inventory.item.${itemId}.status`]: 'insufficient', + }); + } else { + console.log(`Item ${itemId} is available. Requested: ${quantity}, Available: ${availableQuantity}`); + addSpanAttributes({ + [`inventory.item.${itemId}.status`]: 'available', + }); + } + } + + addSpanAttributes({ + 'inventory.all_available': allAvailable, + 'inventory.unavailable_count': unavailableItems.length, + }); + + if (allAvailable) { + console.log(`All items available for order ${orderId}`); + + return createResponse(200, { + available: true, + orderId, + message: 'All items are available', + }); + } else { + console.log(`Some items unavailable for order ${orderId}:`, unavailableItems); + addSpanAttributes({ + 'inventory.failure_reason': 'items-unavailable', + }); + + return createResponse(200, { + available: false, + orderId, + unavailableItems, + message: 'Some items are not available', + }); + } + + } catch (error) { + console.error('Inventory check error:', error); + addSpanAttributes({ + 'error': true, + 'error.message': error.message, + }); + + return createResponse(500, { + available: false, + error: 'Internal server error', + details: error.message, + }); + } + }); +}; diff --git a/.aws-sam/build/OrderServiceFunction/src/order-service/index.js b/.aws-sam/build/OrderServiceFunction/src/order-service/index.js new file mode 100644 index 0000000..7d05578 --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/src/order-service/index.js @@ -0,0 +1,181 @@ +const { withSpan, addSpanAttributes } = require('../shared/tracer'); +const { callService, createResponse } = require('../shared/utils'); + +/** + * Order Service Lambda Handler + * This service accepts orders and orchestrates calls to Inventory and Payment services + * OpenTelemetry instrumentation is automatically provided by the Lambda Layer + */ +exports.handler = async (event) => { + console.log('Order Service received event:', JSON.stringify(event)); + + return withSpan('process-order', async (span) => { + try { + // Parse request body + const body = typeof event.body === 'string' ? JSON.parse(event.body) : event.body; + const { orderId, customerId, items, paymentMethod } = body; + + // Validate input + if (!orderId || !customerId || !items || !paymentMethod) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Missing required fields', + }); + return createResponse(400, { + success: false, + error: 'Missing required fields: orderId, customerId, items, paymentMethod', + }); + } + + // Add order details to span + addSpanAttributes({ + 'order.id': orderId, + 'customer.id': customerId, + 'order.items.count': items.length, + 'payment.method': paymentMethod, + }); + + console.log(`Processing order ${orderId} for customer ${customerId}`); + + // Step 1: Check inventory + console.log('Step 1: Checking inventory'); + const inventoryUrl = process.env.INVENTORY_SERVICE_URL || 'http://localhost:3001/inventory'; + + let inventoryResult; + try { + inventoryResult = await withSpan('check-inventory', async (inventorySpan) => { + addSpanAttributes({ + 'service': 'inventory', + 'order.id': orderId, + }); + + const result = await callService(inventoryUrl, { + orderId, + items, + }); + + addSpanAttributes({ + 'inventory.available': result.available, + }); + + return result; + }); + } catch (error) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Inventory check failed', + }); + return createResponse(500, { + success: false, + orderId, + error: 'Inventory check failed', + details: error.message, + }); + } + + if (!inventoryResult.available) { + addSpanAttributes({ + 'inventory.status': 'out-of-stock', + 'inventory.unavailable_items': JSON.stringify(inventoryResult.unavailableItems || []), + }); + + console.log(`Order ${orderId} failed: Items out of stock`); + return createResponse(409, { + success: false, + orderId, + error: 'Items out of stock', + unavailableItems: inventoryResult.unavailableItems, + }); + } + + addSpanAttributes({ 'inventory.status': 'available' }); + + // Step 2: Process payment + console.log('Step 2: Processing payment'); + const paymentUrl = process.env.PAYMENT_SERVICE_URL || 'http://localhost:3002/payment'; + + let paymentResult; + try { + paymentResult = await withSpan('process-payment', async (paymentSpan) => { + const totalAmount = items.reduce((sum, item) => sum + (item.price * item.quantity), 0); + + addSpanAttributes({ + 'service': 'payment', + 'order.id': orderId, + 'payment.method': paymentMethod, + 'payment.amount': totalAmount, + }); + + const result = await callService(paymentUrl, { + orderId, + customerId, + amount: totalAmount, + paymentMethod, + }); + + addSpanAttributes({ + 'payment.success': result.success, + 'payment.transaction_id': result.transactionId || 'none', + }); + + return result; + }); + } catch (error) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Payment processing failed', + }); + return createResponse(500, { + success: false, + orderId, + error: 'Payment processing failed', + details: error.message, + }); + } + + if (!paymentResult.success) { + addSpanAttributes({ + 'payment.status': 'failed', + 'payment.failure_reason': paymentResult.reason || 'unknown', + }); + + console.log(`Order ${orderId} failed: Payment failed`); + return createResponse(402, { + success: false, + orderId, + error: 'Payment failed', + reason: paymentResult.reason, + }); + } + + addSpanAttributes({ + 'payment.status': 'success', + 'payment.transaction_id': paymentResult.transactionId, + }); + + // Order successful + console.log(`Order ${orderId} completed successfully`); + + return createResponse(200, { + success: true, + orderId, + message: 'Order processed successfully', + transactionId: paymentResult.transactionId, + totalAmount: items.reduce((sum, item) => sum + (item.price * item.quantity), 0), + }); + + } catch (error) { + console.error('Order processing error:', error); + addSpanAttributes({ + 'error': true, + 'error.message': error.message, + }); + + return createResponse(500, { + success: false, + error: 'Internal server error', + details: error.message, + }); + } + }); +}; diff --git a/.aws-sam/build/OrderServiceFunction/src/payment-service/index.js b/.aws-sam/build/OrderServiceFunction/src/payment-service/index.js new file mode 100644 index 0000000..6a88ea6 --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/src/payment-service/index.js @@ -0,0 +1,144 @@ +const { withSpan, addSpanAttributes } = require('../shared/tracer'); +const { createResponse } = require('../shared/utils'); + +// Mock payment processing +const FAILURE_PATTERNS = { + 'card-declined': ['4111111111111111', 'card-declined'], + 'insufficient-funds': ['4222222222222222', 'insufficient-funds'], + 'expired-card': ['4333333333333333', 'expired-card'], +}; + +/** + * Simulate payment processing with some failure scenarios + * @param {string} paymentMethod - Payment method identifier + * @param {number} amount - Payment amount + * @returns {Object} Payment result + */ +function processPayment(paymentMethod, amount) { + // Check for known failure patterns + for (const [reason, patterns] of Object.entries(FAILURE_PATTERNS)) { + if (patterns.some(pattern => paymentMethod.includes(pattern))) { + return { + success: false, + reason: reason.replace('-', ' '), + transactionId: null, + }; + } + } + + // Simulate random failures (10% chance) + if (Math.random() < 0.1) { + const reasons = ['network-timeout', 'gateway-error', 'rate-limit-exceeded']; + const randomReason = reasons[Math.floor(Math.random() * reasons.length)]; + return { + success: false, + reason: randomReason.replace('-', ' '), + transactionId: null, + }; + } + + // Successful payment + return { + success: true, + transactionId: `txn-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`, + }; +} + +/** + * Payment Service Lambda Handler + * This service processes payments with failure scenarios for testing + * OpenTelemetry instrumentation is automatically provided by the Lambda Layer + */ +exports.handler = async (event) => { + console.log('Payment Service received event:', JSON.stringify(event)); + + return withSpan('process-payment', async (span) => { + try { + // Parse request body + const body = typeof event.body === 'string' ? JSON.parse(event.body) : event.body; + const { orderId, customerId, amount, paymentMethod } = body; + + // Validate input + if (!orderId || !customerId || !amount || !paymentMethod) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Missing required fields', + }); + return createResponse(400, { + success: false, + error: 'Missing required fields: orderId, customerId, amount, paymentMethod', + }); + } + + addSpanAttributes({ + 'order.id': orderId, + 'customer.id': customerId, + 'payment.amount': amount, + 'payment.method': paymentMethod, + }); + + console.log(`Processing payment for order ${orderId}, amount: ${amount}`); + + // Validate amount + if (amount <= 0) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Invalid payment amount', + }); + return createResponse(400, { + success: false, + error: 'Invalid payment amount', + }); + } + + // Process payment + const paymentResult = processPayment(paymentMethod, amount); + + addSpanAttributes({ + 'payment.success': paymentResult.success, + }); + + if (!paymentResult.success) { + addSpanAttributes({ + 'payment.failure_reason': paymentResult.reason, + }); + + console.log(`Payment failed for order ${orderId}: ${paymentResult.reason}`); + + return createResponse(200, { + success: false, + orderId, + reason: paymentResult.reason, + message: 'Payment processing failed', + }); + } + + addSpanAttributes({ + 'payment.transaction_id': paymentResult.transactionId, + }); + + console.log(`Payment successful for order ${orderId}. Transaction ID: ${paymentResult.transactionId}`); + + return createResponse(200, { + success: true, + orderId, + transactionId: paymentResult.transactionId, + amount, + message: 'Payment processed successfully', + }); + + } catch (error) { + console.error('Payment processing error:', error); + addSpanAttributes({ + 'error': true, + 'error.message': error.message, + }); + + return createResponse(500, { + success: false, + error: 'Internal server error', + details: error.message, + }); + } + }); +}; diff --git a/.aws-sam/build/OrderServiceFunction/src/shared/tracer.js b/.aws-sam/build/OrderServiceFunction/src/shared/tracer.js new file mode 100644 index 0000000..3cedef5 --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/src/shared/tracer.js @@ -0,0 +1,118 @@ +// When using OpenTelemetry Lambda Layer with auto-instrumentation, +// the @opentelemetry/api is provided by the layer at /opt/nodejs/node_modules +const api = require('@opentelemetry/api'); + +/** + * Get the current active span + * The Lambda Layer automatically creates spans for Lambda invocations + * @returns {Span} Current active span + */ +function getCurrentSpan() { + return api.trace.getActiveSpan(); +} + +/** + * Get a tracer instance + * @param {string} name - Tracer name (usually service name) + * @param {string} version - Service version + * @returns {Tracer} OpenTelemetry tracer instance + */ +function getTracer(name = 'lambda-app', version = '1.0.0') { + return api.trace.getTracer(name, version); +} + +/** + * Create a new span for a specific operation + * @param {string} spanName - Name of the span + * @param {Function} fn - Function to execute within the span + * @param {Object} attributes - Optional span attributes + * @returns {Promise} Result of the function execution + */ +async function withSpan(spanName, fn, attributes = {}) { + const tracer = getTracer(); + return tracer.startActiveSpan(spanName, async (span) => { + try { + // Add custom attributes + Object.entries(attributes).forEach(([key, value]) => { + span.setAttribute(key, value); + }); + + const result = await fn(span); + span.setStatus({ code: api.SpanStatusCode.OK }); + return result; + } catch (error) { + span.setStatus({ + code: api.SpanStatusCode.ERROR, + message: error.message, + }); + span.recordException(error); + throw error; + } finally { + span.end(); + } + }); +} + +/** + * Add custom attributes to the current span + * @param {Object} attributes - Key-value pairs of attributes to add + */ +function addSpanAttributes(attributes) { + const span = getCurrentSpan(); + if (span) { + Object.entries(attributes).forEach(([key, value]) => { + if (value !== null && value !== undefined) { + span.setAttribute(key, value); + } + }); + } +} + +/** + * Record an exception on the current span + * @param {Error} error - Error to record + */ +function recordException(error) { + const span = getCurrentSpan(); + if (span) { + span.recordException(error); + span.setStatus({ + code: api.SpanStatusCode.ERROR, + message: error.message, + }); + } +} + +/** + * Inject trace context into headers for downstream service calls + * Uses W3C Trace Context propagation + * @returns {Object} Headers with trace context + */ +function injectTraceContext() { + const headers = {}; + const span = getCurrentSpan(); + + if (span) { + const spanContext = span.spanContext(); + if (spanContext && spanContext.traceId && spanContext.spanId) { + // W3C Trace Context format + const traceFlags = spanContext.traceFlags || 0; + headers.traceparent = `00-${spanContext.traceId}-${spanContext.spanId}-${traceFlags.toString(16).padStart(2, '0')}`; + + if (spanContext.traceState) { + headers.tracestate = spanContext.traceState.serialize(); + } + } + } + + return headers; +} + +module.exports = { + getTracer, + getCurrentSpan, + withSpan, + addSpanAttributes, + recordException, + injectTraceContext, +}; diff --git a/.aws-sam/build/OrderServiceFunction/src/shared/utils.js b/.aws-sam/build/OrderServiceFunction/src/shared/utils.js new file mode 100644 index 0000000..2da73e3 --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/src/shared/utils.js @@ -0,0 +1,61 @@ +const axios = require('axios'); +const { injectTraceContext } = require('./tracer'); + +/** + * Make an HTTP request to another Lambda service with trace context propagation + * @param {string} url - Service URL + * @param {Object} data - Request payload + * @param {string} method - HTTP method (default: POST) + * @returns {Promise} Response data + */ +async function callService(url, data, method = 'POST') { + // Inject trace context into headers + const headers = { + 'Content-Type': 'application/json', + ...injectTraceContext(), + }; + + console.log(`Calling service: ${url} with method: ${method}`); + console.log(`Trace context headers:`, headers); + + try { + const response = await axios({ + method, + url, + data, + headers, + }); + + console.log(`Service call successful: ${url}`); + return response.data; + } catch (error) { + console.error(`Service call failed: ${url}`, error.message); + if (error.response) { + throw new Error(`Service call failed: ${error.response.status} - ${JSON.stringify(error.response.data)}`); + } + throw error; + } +} + +/** + * Create a Lambda response object + * @param {number} statusCode - HTTP status code + * @param {Object} body - Response body + * @param {Object} headers - Additional headers + * @returns {Object} Lambda response object + */ +function createResponse(statusCode, body, headers = {}) { + return { + statusCode, + headers: { + 'Content-Type': 'application/json', + ...headers, + }, + body: JSON.stringify(body), + }; +} + +module.exports = { + callService, + createResponse, +}; diff --git a/.aws-sam/build/OrderServiceFunction/template.yaml b/.aws-sam/build/OrderServiceFunction/template.yaml new file mode 100644 index 0000000..bc11944 --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/template.yaml @@ -0,0 +1,137 @@ +AWSTemplateFormatVersion: '2010-09-09' +Transform: AWS::Serverless-2016-10-31 +Description: Lambda OpenTelemetry Demo - Order, Inventory, and Payment Services with AWS ADOT Layer + +Globals: + Function: + Timeout: 30 + MemorySize: 512 + Runtime: nodejs20.x + Environment: + Variables: + AWS_LAMBDA_EXEC_WRAPPER: /opt/otel-handler + OTEL_SERVICE_NAME: will-be-overridden + OTEL_TRACES_SAMPLER: AlwaysOn + OTEL_EXPORTER_OTLP_PROTOCOL: http/protobuf + OTEL_EXPORTER_OTLP_ENDPOINT: !Ref OtelCollectorEndpoint + OTEL_PROPAGATORS: tracecontext + OTEL_RESOURCE_ATTRIBUTES: !Sub deployment.environment=${Environment} + +Parameters: + OtelCollectorEndpoint: + Type: String + Default: 'http://localhost:4318' + Description: OpenTelemetry Collector endpoint URL (without /v1/traces path) + + Environment: + Type: String + Default: 'production' + Description: Deployment environment (e.g., development, staging, production) + + AdotLayerArn: + Type: String + Default: 'arn:aws:lambda:us-east-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5' + Description: | + ARN of the AWS Distro for OpenTelemetry (ADOT) Lambda Layer for Node.js. + Find the latest ARN for your region at: + https://aws-otel.github.io/docs/getting-started/lambda/lambda-js + +Resources: + # Order Service Function + OrderServiceFunction: + Type: AWS::Serverless::Function + Metadata: + BuildMethod: nodejs20.x + Properties: + FunctionName: order-service + CodeUri: ./ + Handler: src/order-service/index.handler + Description: Order Service - Orchestrates order processing + Layers: + - !Ref AdotLayerArn + Environment: + Variables: + OTEL_SERVICE_NAME: order-service + INVENTORY_SERVICE_URL: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/inventory' + PAYMENT_SERVICE_URL: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/payment' + Events: + OrderApi: + Type: Api + Properties: + Path: /order + Method: post + RestApiId: !Ref ApiGateway + + # Inventory Service Function + InventoryServiceFunction: + Type: AWS::Serverless::Function + Metadata: + BuildMethod: nodejs20.x + Properties: + FunctionName: inventory-service + CodeUri: ./ + Handler: src/inventory-service/index.handler + Description: Inventory Service - Checks item availability + Layers: + - !Ref AdotLayerArn + Environment: + Variables: + OTEL_SERVICE_NAME: inventory-service + Events: + InventoryApi: + Type: Api + Properties: + Path: /inventory + Method: post + RestApiId: !Ref ApiGateway + + # Payment Service Function + PaymentServiceFunction: + Type: AWS::Serverless::Function + Metadata: + BuildMethod: nodejs20.x + Properties: + FunctionName: payment-service + CodeUri: ./ + Handler: src/payment-service/index.handler + Description: Payment Service - Processes payments + Layers: + - !Ref AdotLayerArn + Environment: + Variables: + OTEL_SERVICE_NAME: payment-service + Events: + PaymentApi: + Type: Api + Properties: + Path: /payment + Method: post + RestApiId: !Ref ApiGateway + + # API Gateway + ApiGateway: + Type: AWS::Serverless::Api + Properties: + StageName: Prod + Description: API Gateway for Lambda OpenTelemetry Demo + +Outputs: + ApiEndpoint: + Description: API Gateway endpoint URL + Value: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod' + + OrderServiceUrl: + Description: Order Service endpoint + Value: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/order' + + InventoryServiceUrl: + Description: Inventory Service endpoint + Value: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/inventory' + + PaymentServiceUrl: + Description: Payment Service endpoint + Value: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/payment' + + AdotLayerVersion: + Description: ADOT Lambda Layer ARN being used + Value: !Ref AdotLayerArn diff --git a/.aws-sam/build/OrderServiceFunction/test-api.sh b/.aws-sam/build/OrderServiceFunction/test-api.sh new file mode 100755 index 0000000..680b864 --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/test-api.sh @@ -0,0 +1,130 @@ +#!/bin/bash + +# Color codes for output +GREEN='\033[0;32m' +BLUE='\033[0;34m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +echo -e "${BLUE}========================================${NC}" +echo -e "${BLUE}Lambda OpenTelemetry Demo - Test Script${NC}" +echo -e "${BLUE}========================================${NC}" +echo "" + +# Check if API endpoint is provided +if [ -z "$1" ]; then + echo -e "${RED}Error: API endpoint is required${NC}" + echo "Usage: ./test-api.sh " + echo "Example: ./test-api.sh https://abc123.execute-api.us-east-1.amazonaws.com/Prod" + exit 1 +fi + +API_ENDPOINT=$1 +ORDER_URL="${API_ENDPOINT}/order" + +echo -e "${YELLOW}Using API Endpoint: ${API_ENDPOINT}${NC}" +echo "" + +# Test 1: Successful Order +echo -e "${BLUE}Test 1: Successful Order${NC}" +echo "Testing with items that are in stock and valid payment method..." +curl -X POST ${ORDER_URL} \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-001", + "customerId": "CUST-123", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + }, + { + "itemId": "item-002", + "name": "Mouse", + "quantity": 2, + "price": 29.99 + } + ] + }' +echo -e "\n" + +sleep 2 + +# Test 2: Out of Stock Order +echo -e "${BLUE}Test 2: Out of Stock Order${NC}" +echo "Testing with items that are out of stock..." +curl -X POST ${ORDER_URL} \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-002", + "customerId": "CUST-456", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-003", + "name": "Keyboard", + "quantity": 1, + "price": 79.99 + }, + { + "itemId": "item-005", + "name": "Headphones", + "quantity": 1, + "price": 149.99 + } + ] + }' +echo -e "\n" + +sleep 2 + +# Test 3: Payment Failure (Card Declined) +echo -e "${BLUE}Test 3: Payment Failure - Card Declined${NC}" +echo "Testing with a payment method that will be declined..." +curl -X POST ${ORDER_URL} \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-003", + "customerId": "CUST-789", + "paymentMethod": "4111111111111111", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + } + ] + }' +echo -e "\n" + +sleep 2 + +# Test 4: Payment Failure (Insufficient Funds) +echo -e "${BLUE}Test 4: Payment Failure - Insufficient Funds${NC}" +echo "Testing with insufficient funds scenario..." +curl -X POST ${ORDER_URL} \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-004", + "customerId": "CUST-999", + "paymentMethod": "4222222222222222", + "items": [ + { + "itemId": "item-004", + "name": "Monitor", + "quantity": 2, + "price": 399.99 + } + ] + }' +echo -e "\n" + +echo -e "${GREEN}========================================${NC}" +echo -e "${GREEN}All tests completed!${NC}" +echo -e "${GREEN}Check your OpenTelemetry backend to view the traces${NC}" +echo -e "${GREEN}========================================${NC}" diff --git a/.aws-sam/build/OrderServiceFunction/test-payloads.json b/.aws-sam/build/OrderServiceFunction/test-payloads.json new file mode 100644 index 0000000..603d30c --- /dev/null +++ b/.aws-sam/build/OrderServiceFunction/test-payloads.json @@ -0,0 +1,66 @@ +{ + "successfulOrder": { + "orderId": "ORD-001", + "customerId": "CUST-123", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + }, + { + "itemId": "item-002", + "name": "Mouse", + "quantity": 2, + "price": 29.99 + } + ] + }, + "outOfStockOrder": { + "orderId": "ORD-002", + "customerId": "CUST-456", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-003", + "name": "Keyboard", + "quantity": 1, + "price": 79.99 + }, + { + "itemId": "item-005", + "name": "Headphones", + "quantity": 1, + "price": 149.99 + } + ] + }, + "paymentFailureOrder": { + "orderId": "ORD-003", + "customerId": "CUST-789", + "paymentMethod": "4111111111111111", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + } + ] + }, + "insufficientFundsOrder": { + "orderId": "ORD-004", + "customerId": "CUST-999", + "paymentMethod": "4222222222222222", + "items": [ + { + "itemId": "item-004", + "name": "Monitor", + "quantity": 2, + "price": 399.99 + } + ] + } +} diff --git a/.aws-sam/build/PaymentServiceFunction/.env.example b/.aws-sam/build/PaymentServiceFunction/.env.example new file mode 100644 index 0000000..3f3aa74 --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/.env.example @@ -0,0 +1,7 @@ +# OpenTelemetry Configuration +OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318/v1/traces +ENVIRONMENT=development + +# Service URLs (for local testing) +INVENTORY_SERVICE_URL=http://localhost:3001/inventory +PAYMENT_SERVICE_URL=http://localhost:3002/payment diff --git a/.aws-sam/build/PaymentServiceFunction/ADOT-LAYER-CONFIG.md b/.aws-sam/build/PaymentServiceFunction/ADOT-LAYER-CONFIG.md new file mode 100644 index 0000000..cffddc9 --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/ADOT-LAYER-CONFIG.md @@ -0,0 +1,366 @@ +# AWS ADOT Lambda Layer Configuration Guide + +This document explains how to configure and use the AWS Distro for OpenTelemetry (ADOT) Lambda Layer for automatic instrumentation. + +## Overview + +The AWS ADOT Lambda Layer provides automatic OpenTelemetry instrumentation for Lambda functions without requiring you to bundle OpenTelemetry SDKs in your deployment package. + +## Layer ARNs by Region + +### Node.js 18.x ADOT Layer ARNs (Latest: v1-18-1) + +| Region | ARN | +|--------|-----| +| us-east-1 | `arn:aws:lambda:us-east-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| us-east-2 | `arn:aws:lambda:us-east-2:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| us-west-1 | `arn:aws:lambda:us-west-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| us-west-2 | `arn:aws:lambda:us-west-2:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| eu-west-1 | `arn:aws:lambda:eu-west-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| eu-central-1 | `arn:aws:lambda:eu-central-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| ap-southeast-1 | `arn:aws:lambda:ap-southeast-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| ap-northeast-1 | `arn:aws:lambda:ap-northeast-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | + +**Find the latest ARNs**: https://aws-otel.github.io/docs/getting-started/lambda/lambda-js + +## Required Environment Variables + +### Essential Configuration + +```yaml +Environment: + Variables: + # Enable ADOT auto-instrumentation wrapper + AWS_LAMBDA_EXEC_WRAPPER: /opt/otel-handler + + # Service name for identification in traces + OTEL_SERVICE_NAME: your-service-name + + # Sampling configuration (AlwaysOn for demo, adjust for production) + OTEL_TRACES_SAMPLER: AlwaysOn + + # Protocol for OTLP export + OTEL_EXPORTER_OTLP_PROTOCOL: http/protobuf + + # OpenTelemetry Collector endpoint (without /v1/traces) + OTEL_EXPORTER_OTLP_ENDPOINT: http://your-collector:4318 + + # Trace context propagation format + OTEL_PROPAGATORS: tracecontext +``` + +### Advanced Configuration Options + +```yaml +Environment: + Variables: + # Resource attributes (for environment, version, etc.) + OTEL_RESOURCE_ATTRIBUTES: deployment.environment=production,service.version=1.0.0 + + # Specific endpoint for traces (optional, overrides OTEL_EXPORTER_OTLP_ENDPOINT) + OTEL_EXPORTER_OTLP_TRACES_ENDPOINT: http://your-collector:4318/v1/traces + + # Enable/disable specific instrumentations + OTEL_INSTRUMENTATION_AWS_SDK_ENABLED: true + OTEL_INSTRUMENTATION_HTTP_ENABLED: true + + # Span attribute limits + OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT: 4095 + OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT: 128 +``` + +## Exporter Backends + +### 1. Jaeger (Local Development) + +```yaml +OTEL_EXPORTER_OTLP_ENDPOINT: http://localhost:4318 +OTEL_EXPORTER_OTLP_PROTOCOL: http/protobuf +``` + +Run Jaeger with OTLP support: +```bash +docker run -d --name jaeger \ + -e COLLECTOR_OTLP_ENABLED=true \ + -p 16686:16686 \ + -p 4318:4318 \ + jaegertracing/all-in-one:latest +``` + +### 2. AWS X-Ray (with ADOT Collector) + +If you want to send traces to AWS X-Ray, deploy an ADOT Collector in your VPC: + +```yaml +OTEL_EXPORTER_OTLP_ENDPOINT: http://adot-collector:4318 +OTEL_PROPAGATORS: tracecontext,xray +``` + +ADOT Collector configuration: +```yaml +receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 + +exporters: + awsxray: + region: us-east-1 + +service: + pipelines: + traces: + receivers: [otlp] + exporters: [awsxray] +``` + +### 3. Grafana Cloud / Honeycomb / DataDog + +```yaml +OTEL_EXPORTER_OTLP_ENDPOINT: https://your-saas-endpoint +OTEL_EXPORTER_OTLP_HEADERS: Authorization=Bearer your-api-key +``` + +### 4. Self-Hosted OpenTelemetry Collector + +Deploy a collector in your VPC or use Lambda Extension: + +```yaml +OTEL_EXPORTER_OTLP_ENDPOINT: http://collector:4318 +``` + +## How Auto-Instrumentation Works + +### 1. Wrapper Execution + +The `AWS_LAMBDA_EXEC_WRAPPER` environment variable points to `/opt/otel-handler`, which: +- Initializes the OpenTelemetry SDK before your handler +- Automatically instruments common libraries (http, https, aws-sdk, etc.) +- Creates a root span for each Lambda invocation +- Propagates trace context from incoming requests + +### 2. Automatic Instrumentation + +The layer automatically instruments: +- ✅ AWS SDK calls +- ✅ HTTP/HTTPS requests (axios, node-fetch, http, https) +- ✅ Lambda invocation (creates root span) +- ✅ Downstream service calls with trace context propagation + +### 3. Context Propagation + +The layer automatically: +- Extracts W3C trace context from incoming API Gateway requests +- Injects trace context into outgoing HTTP requests +- Maintains trace context across async operations + +## Usage in Your Code + +### Basic Usage (Automatic) + +With the layer configured, your Lambda function is automatically instrumented: + +```javascript +exports.handler = async (event) => { + // Automatically traced! + const response = await axios.get('https://api.example.com'); + return { statusCode: 200, body: JSON.stringify(response.data) }; +}; +``` + +### Adding Custom Spans + +For additional business logic spans: + +```javascript +const api = require('@opentelemetry/api'); + +exports.handler = async (event) => { + const tracer = api.trace.getTracer('my-service', '1.0.0'); + + return tracer.startActiveSpan('business-operation', async (span) => { + try { + span.setAttribute('customer.id', event.customerId); + + // Your business logic + const result = await processOrder(event); + + span.setStatus({ code: api.SpanStatusCode.OK }); + return result; + } catch (error) { + span.recordException(error); + span.setStatus({ code: api.SpanStatusCode.ERROR }); + throw error; + } finally { + span.end(); + } + }); +}; +``` + +### Adding Attributes to Current Span + +```javascript +const api = require('@opentelemetry/api'); + +exports.handler = async (event) => { + const span = api.trace.getActiveSpan(); + if (span) { + span.setAttribute('order.id', event.orderId); + span.setAttribute('order.amount', event.amount); + } + + // Your handler logic +}; +``` + +## Trace Context Propagation + +### Automatic (HTTP Clients) + +The layer automatically propagates context for instrumented HTTP clients: + +```javascript +const axios = require('axios'); + +// Trace context is automatically added to headers! +const response = await axios.post('https://api.example.com/payment', data); +``` + +### Manual (Custom Clients) + +For custom HTTP clients or non-instrumented libraries: + +```javascript +const api = require('@opentelemetry/api'); + +function getTraceHeaders() { + const headers = {}; + const span = api.trace.getActiveSpan(); + + if (span) { + const spanContext = span.spanContext(); + headers.traceparent = `00-${spanContext.traceId}-${spanContext.spanId}-01`; + } + + return headers; +} + +// Use in your custom client +const headers = { ...getTraceHeaders(), 'Content-Type': 'application/json' }; +``` + +## Sampling + +### AlwaysOn (Development/Demo) + +```yaml +OTEL_TRACES_SAMPLER: AlwaysOn +``` + +Traces every request. Good for development but expensive in production. + +### TraceIdRatioBased (Production) + +```yaml +OTEL_TRACES_SAMPLER: TraceIdRatioBased +OTEL_TRACES_SAMPLER_ARG: 0.1 # 10% sampling +``` + +Samples a percentage of requests to reduce costs. + +### ParentBased (Recommended) + +```yaml +OTEL_TRACES_SAMPLER: ParentBased_TraceIdRatioBased +OTEL_TRACES_SAMPLER_ARG: 0.1 +``` + +Respects parent sampling decisions while applying ratio-based sampling to root spans. + +## Performance Considerations + +### Cold Start Impact + +The ADOT layer adds ~200-300ms to cold start time: +- Layer initialization: ~100ms +- Auto-instrumentation setup: ~100-200ms + +### Runtime Overhead + +- Minimal overhead during warm execution (<5ms per invocation) +- Async span export doesn't block Lambda execution +- Batching reduces network calls + +### Optimization Tips + +1. **Use provisioned concurrency** for latency-sensitive functions +2. **Adjust span limits** to reduce memory usage: + ```yaml + OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT: 32 + OTEL_SPAN_EVENT_COUNT_LIMIT: 32 + ``` +3. **Use sampling** in high-volume environments +4. **Disable unused instrumentations**: + ```yaml + OTEL_INSTRUMENTATION_AWS_LAMBDA_ENABLED: true + OTEL_INSTRUMENTATION_HTTP_ENABLED: true + OTEL_INSTRUMENTATION_AWS_SDK_ENABLED: false + ``` + +## Troubleshooting + +### No Traces Appearing + +1. Check CloudWatch Logs for ADOT errors: + ``` + grep "otel" /aws/lambda/your-function + ``` + +2. Verify environment variables: + ```bash + aws lambda get-function-configuration --function-name your-function \ + --query 'Environment.Variables' + ``` + +3. Test collector endpoint connectivity: + - Ensure Lambda has network access to the collector + - Check security groups and NACLs + - Verify collector is accepting OTLP HTTP on port 4318 + +### Wrapper Not Running + +Error: `AWS_LAMBDA_EXEC_WRAPPER is set but the wrapper does not exist` + +**Solution**: Verify the layer ARN is correct and matches your region. + +### Trace Context Not Propagating + +1. Ensure `OTEL_PROPAGATORS=tracecontext` is set +2. Check if HTTP library is supported (axios, node-fetch, http, https) +3. For unsupported libraries, manually inject headers + +### High Cold Start Times + +1. Consider using Lambda SnapStart (if available) +2. Use provisioned concurrency +3. Minimize layer count (combine layers if possible) +4. Profile and optimize your application code + +## Best Practices + +1. **Set meaningful service names**: Use descriptive names that reflect the business function +2. **Add business context**: Include order IDs, customer IDs, and other relevant attributes +3. **Handle errors properly**: Record exceptions and set error status on spans +4. **Use semantic conventions**: Follow OpenTelemetry semantic conventions for consistency +5. **Monitor collector health**: Ensure your collector is performant and highly available +6. **Set appropriate sampling**: Balance cost with observability needs +7. **Use tags for filtering**: Add environment, version, and region as resource attributes + +## References + +- AWS ADOT Lambda: https://aws-otel.github.io/docs/getting-started/lambda +- OpenTelemetry Lambda Instrumentation: https://opentelemetry.io/docs/platforms/faas/lambda-auto-instrument/ +- W3C Trace Context: https://www.w3.org/TR/trace-context/ +- OpenTelemetry Semantic Conventions: https://opentelemetry.io/docs/specs/semconv/ diff --git a/.aws-sam/build/PaymentServiceFunction/ARCHITECTURE.md b/.aws-sam/build/PaymentServiceFunction/ARCHITECTURE.md new file mode 100644 index 0000000..ff7e7c2 --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/ARCHITECTURE.md @@ -0,0 +1,366 @@ +# Architecture Details + +## Service Flow + +### 1. Successful Order Flow + +``` +Client + │ + │ POST /order + ├─────────────────────────────────────────┐ + │ │ + ▼ │ +Order Service │ + │ │ + │ 1. Validate request │ + │ 2. Create root span │ + │ │ + │ POST /inventory (with trace context) │ + ├─────────────────────────────┐ │ + │ │ │ + ▼ │ │ +Inventory Service │ │ + │ │ │ + │ 1. Extract trace context │ │ + │ 2. Create child span │ │ + │ 3. Check stock levels │ │ + │ 4. Return availability │ │ + │ │ │ + └─────────────────────────────┤ │ + │ Response: available=true │ │ + ▼ │ │ +Order Service │ │ + │ │ │ + │ POST /payment (with trace context) │ + ├─────────────────────────────┐ │ + │ │ │ + ▼ │ │ +Payment Service │ │ + │ │ │ + │ 1. Extract trace context │ │ + │ 2. Create child span │ │ + │ 3. Process payment │ │ + │ 4. Return result │ │ + │ │ │ + └─────────────────────────────┤ │ + │ Response: success=true │ │ + ▼ │ │ +Order Service │ │ + │ │ │ + │ Complete order processing │ │ + │ │ │ + └─────────────────────────────┤ │ + │ Response: 200 OK │ │ + ▼ │ │ +Client ◄──────────────────────────────────────┘ +``` + +### 2. Out of Stock Flow + +``` +Client → Order Service → Inventory Service + (Out of stock detected) + ← + Order Service + (Skip payment, return 409) + ← +Client +``` + +### 3. Payment Failure Flow + +``` +Client → Order Service → Inventory Service + (Items available) + ← + Order Service → Payment Service + (Payment failed) + ← + Order Service + (Return 402) + ← +Client +``` + +## Trace Structure + +### Trace Hierarchy + +``` +Trace ID: 00000000000000000000000000000001 +│ +└─ Span: process-order (Order Service) + │ Duration: 245ms + │ Attributes: + │ - order.id: ORD-001 + │ - customer.id: CUST-123 + │ - order.items.count: 2 + │ + ├─ Span: check-inventory (Order Service) + │ │ Duration: 15ms + │ │ Attributes: + │ │ - service: inventory + │ │ - order.id: ORD-001 + │ │ + │ └─ Span: HTTP POST (Auto-instrumented) + │ │ Duration: 12ms + │ │ + │ └─ Span: check-inventory (Inventory Service) + │ Duration: 8ms + │ Attributes: + │ - inventory.available: true + │ - inventory.items.count: 2 + │ + └─ Span: process-payment (Order Service) + │ Duration: 180ms + │ Attributes: + │ - service: payment + │ - order.id: ORD-001 + │ - payment.method: credit-card-5555 + │ - payment.amount: 1059.97 + │ + └─ Span: HTTP POST (Auto-instrumented) + │ Duration: 175ms + │ + └─ Span: process-payment (Payment Service) + Duration: 170ms + Attributes: + - payment.success: true + - payment.transaction_id: txn-1234567890-abc123 +``` + +## OpenTelemetry Components + +### 1. Tracer Provider + +Manages the lifecycle of tracers and span processors. + +```javascript +const provider = new NodeTracerProvider({ + resource: resource, +}); +``` + +### 2. OTLP Exporter + +Exports spans to OpenTelemetry Collector using HTTP. + +```javascript +const exporter = new OTLPTraceExporter({ + url: 'http://localhost:4318/v1/traces', +}); +``` + +### 3. Batch Span Processor + +Batches spans before export for efficiency. + +```javascript +provider.addSpanProcessor(new BatchSpanProcessor(exporter)); +``` + +### 4. Auto Instrumentations + +Automatically instruments common libraries. + +```javascript +registerInstrumentations({ + instrumentations: [ + getNodeAutoInstrumentations({ + '@opentelemetry/instrumentation-http': { enabled: true }, + '@opentelemetry/instrumentation-https': { enabled: true }, + }), + ], +}); +``` + +## Trace Context Propagation + +### W3C Trace Context Format + +``` +traceparent: 00--- + +Example: +traceparent: 00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01 + │ │ │ │ + │ └─ Trace ID (32 hex chars) │ └─ Flags + │ └─ Parent Span ID (16 hex chars) + └─ Version (00) +``` + +### Context Injection + +When Order Service calls Inventory Service: + +```javascript +// Order Service +const headers = injectTraceContext(); +// headers = { traceparent: '00-...-...-01' } + +axios.post(inventoryUrl, data, { headers }); +``` + +### Context Extraction + +When Inventory Service receives request: + +```javascript +// Inventory Service +const traceContext = extractTraceContext(event); +// Continue the trace with the same trace ID +``` + +## Span Attributes + +### Semantic Conventions + +Following OpenTelemetry semantic conventions: + +| Attribute | Description | Example | +|-----------|-------------|---------| +| `service.name` | Service identifier | `order-service` | +| `service.version` | Service version | `1.0.0` | +| `deployment.environment` | Environment | `production` | +| `order.id` | Order identifier | `ORD-001` | +| `customer.id` | Customer identifier | `CUST-123` | +| `payment.amount` | Payment amount | `1059.97` | +| `payment.method` | Payment method | `credit-card-5555` | +| `inventory.available` | Availability flag | `true` | +| `http.method` | HTTP method | `POST` | +| `http.status_code` | HTTP status | `200` | + +## Error Handling + +### Error Span Status + +```javascript +span.setStatus({ + code: api.SpanStatusCode.ERROR, + message: error.message, +}); +span.recordException(error); +``` + +### Business Logic Errors vs Technical Errors + +- **Business Logic Errors** (out of stock, payment declined): + - Span status: OK + - Custom attributes indicate business failure + - Example: `inventory.available: false` + +- **Technical Errors** (network failure, invalid input): + - Span status: ERROR + - Exception recorded + - Stack trace captured + +## Performance Considerations + +### Batch Processing + +Spans are batched before export to reduce network overhead: + +```javascript +new BatchSpanProcessor(exporter, { + maxQueueSize: 2048, + maxExportBatchSize: 512, + scheduledDelayMillis: 5000, +}) +``` + +### Sampling + +For high-volume systems, implement sampling: + +```javascript +const sampler = new TraceIdRatioBasedSampler(0.1); // 10% sampling +const provider = new NodeTracerProvider({ + resource: resource, + sampler: sampler, +}); +``` + +### Async Export + +Span export is asynchronous and doesn't block Lambda execution. + +## Security + +### Securing OTLP Endpoint + +```javascript +const exporter = new OTLPTraceExporter({ + url: process.env.OTEL_EXPORTER_OTLP_ENDPOINT, + headers: { + 'Authorization': `Bearer ${process.env.OTEL_API_KEY}`, + }, +}); +``` + +### Data Privacy + +- Avoid capturing sensitive data in spans +- Redact PII from span attributes +- Use attribute filtering in the collector + +## Monitoring and Alerting + +### Key Metrics to Monitor + +1. **Trace Completeness** + - Percentage of complete traces + - Missing spans + +2. **Error Rates** + - Spans with ERROR status + - Service-specific error rates + +3. **Latency** + - P50, P95, P99 latencies + - Per-service latency breakdown + +4. **Business Metrics** + - Order success rate + - Payment failure rate + - Inventory unavailability rate + +### Example Queries (Jaeger) + +``` +# Find all failed orders +service="order-service" AND error=true + +# Find orders with payment failures +service="payment-service" AND payment.success=false + +# Find slow orders (> 1s) +service="order-service" AND duration>1000ms + +# Find out-of-stock scenarios +service="inventory-service" AND inventory.available=false +``` + +## Troubleshooting + +### Common Issues + +1. **Traces not appearing** + - Check OTLP endpoint connectivity + - Verify Lambda has network access + - Check CloudWatch logs for export errors + +2. **Broken traces** + - Verify trace context propagation + - Check HTTP header forwarding + - Ensure consistent trace ID format + +3. **High latency** + - Review span processor configuration + - Check collector performance + - Consider async export optimization + +4. **Missing spans** + - Verify auto-instrumentation is active + - Check for exceptions during span creation + - Review sampling configuration diff --git a/.aws-sam/build/PaymentServiceFunction/README.md b/.aws-sam/build/PaymentServiceFunction/README.md new file mode 100644 index 0000000..aff152d --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/README.md @@ -0,0 +1,476 @@ +# Lambda OpenTelemetry Demo + +A comprehensive AWS Lambda project demonstrating how to integrate OpenTelemetry for end-to-end distributed tracing across multiple microservices **without using AWS CloudWatch or X-Ray**, using the **AWS Distro for OpenTelemetry (ADOT) Lambda Layer** for automatic instrumentation. + +## 🎯 Overview + +This project implements a realistic e-commerce order processing workflow using three Lambda functions representing distinct microservices: + +1. **Order Service** - Orchestrates the order processing workflow +2. **Inventory Service** - Validates item availability +3. **Payment Service** - Processes payments + +The services communicate with each other while propagating trace context using OpenTelemetry, enabling complete visibility into request flows across all services. + +## 🏗️ Architecture + +``` +┌─────────────────┐ +│ Order Service │ +│ (Lambda) │ +└────────┬────────┘ + │ + ├──────────────────┐ + │ │ + ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ +│ Inventory Svc │ │ Payment Svc │ +│ (Lambda) │ │ (Lambda) │ +└─────────────────┘ └─────────────────┘ + │ │ + └──────┬───────────┘ + ▼ + OpenTelemetry + Collector + │ + ▼ + Tracing Backend + (Jaeger/Zipkin/etc) +``` + +## ✨ Features + +- ✅ **AWS ADOT Lambda Layer** - Uses AWS Distro for OpenTelemetry Lambda Layer for zero-code instrumentation +- ✅ **Automatic Instrumentation** - Auto-instruments Lambda, HTTP/HTTPS, and AWS SDK without code changes +- ✅ **Distributed Tracing** - End-to-end trace propagation across Lambda functions +- ✅ **W3C Trace Context** - Standard trace context propagation using W3C format +- ✅ **OTLP Export** - Exports traces using OTLP HTTP protocol +- ✅ **Custom Spans** - Manual span creation for business logic insights +- ✅ **No CloudWatch/X-Ray** - Direct export to any OpenTelemetry-compatible backend +- ✅ **Error Scenarios** - Built-in test scenarios for: + - Out of stock items + - Payment failures (card declined, insufficient funds) + - Network errors + +## 📋 Prerequisites + +- Node.js 18.x or later +- AWS CLI configured with appropriate credentials +- AWS SAM CLI for deployment +- OpenTelemetry Collector or compatible backend (Jaeger, Zipkin, etc.) + +## 🚀 Quick Start + +### 1. Install Dependencies + +```bash +npm install +``` + +### 2. Set Up OpenTelemetry Backend + +You need an OpenTelemetry-compatible backend to receive and visualize traces. Here are some options: + +#### Option A: Jaeger (Recommended for local testing) + +```bash +# Run Jaeger all-in-one with Docker (or use docker-compose) +docker-compose up -d jaeger + +# Or manually: +docker run -d --name jaeger \ + -e COLLECTOR_OTLP_ENABLED=true \ + -p 16686:16686 \ + -p 4318:4318 \ + jaegertracing/all-in-one:latest + +# Access Jaeger UI at http://localhost:16686 +``` + +#### Option B: Zipkin + +```bash +# Run Zipkin with Docker +docker run -d --name zipkin \ + -p 9411:9411 \ + openzipkin/zipkin:latest + +# Note: You'll need an OpenTelemetry Collector to convert OTLP to Zipkin format +``` + +#### Option C: Grafana Cloud, Honeycomb, or other SaaS providers + +Configure the OTEL_EXPORTER_OTLP_ENDPOINT parameter with your provider's endpoint. + +### 3. Update Lambda Layer ARN + +**Important**: Update the `AdotLayerArn` parameter in `template.yaml` with the correct ARN for your AWS region. + +Find the latest ARN for your region here: +- https://aws-otel.github.io/docs/getting-started/lambda/lambda-js + +Example ARNs: +- **us-east-1**: `arn:aws:lambda:us-east-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` +- **us-west-2**: `arn:aws:lambda:us-west-2:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` +- **eu-west-1**: `arn:aws:lambda:eu-west-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` + +See [ADOT-LAYER-CONFIG.md](./ADOT-LAYER-CONFIG.md) for complete list. + +### 4. Deploy to AWS + +```bash +# Build the Lambda functions +sam build + +# Deploy (follow the prompts) +sam deploy --guided + +# Note the API endpoint from the outputs +``` + +During `sam deploy --guided`, you'll be prompted to provide: +- **Stack name** (e.g., `lambda-otel-demo`) +- **AWS Region** (e.g., `us-east-1`) +- **OtelCollectorEndpoint** - Your OpenTelemetry Collector endpoint without `/v1/traces` (e.g., `http://your-collector:4318`) +- **Environment** - Deployment environment (e.g., `production`, `staging`) +- **AdotLayerArn** - AWS ADOT Lambda Layer ARN for your region +- Confirm changes before deployment + +**Note**: If you're using a collector in a VPC, ensure your Lambda functions have VPC access configured. + +### 5. Test the Services + +Use the provided test script: + +```bash +# Replace with your actual API endpoint from SAM deploy output +./test-api.sh https://your-api-id.execute-api.region.amazonaws.com/Prod +``` + +Or manually test with curl: + +```bash +# Successful order +curl -X POST https://your-api-endpoint/Prod/order \ + -H "Content-Type: application/json" \ + -d @test-payloads.json +``` + +### 6. View Traces + +Open your tracing backend UI: +- Jaeger: http://localhost:16686 +- Select service: `order-service` +- Click "Find Traces" to view the end-to-end traces + +## 🔧 AWS ADOT Lambda Layer + +This project uses the **AWS Distro for OpenTelemetry (ADOT) Lambda Layer** for automatic instrumentation. The layer provides: + +### Key Benefits + +1. **Zero-Code Instrumentation** - Auto-instruments your Lambda function without code changes +2. **No Dependency Bundling** - OpenTelemetry SDKs are provided by the layer, reducing deployment package size +3. **Automatic Context Propagation** - Trace context is automatically propagated across service calls +4. **AWS-Optimized** - Maintained and supported by AWS with regular updates + +### How It Works + +The layer works through the `AWS_LAMBDA_EXEC_WRAPPER` environment variable: + +```yaml +Environment: + Variables: + AWS_LAMBDA_EXEC_WRAPPER: /opt/otel-handler # Enables auto-instrumentation + OTEL_SERVICE_NAME: order-service # Service identifier + OTEL_TRACES_SAMPLER: AlwaysOn # Sampling strategy + OTEL_EXPORTER_OTLP_ENDPOINT: http://collector:4318 +``` + +When your Lambda function is invoked: +1. The wrapper initializes OpenTelemetry SDK +2. Auto-instrumentation is activated for HTTP, AWS SDK, and Lambda runtime +3. A root span is created for the Lambda invocation +4. Your handler executes within the trace context +5. Spans are exported to your configured OTLP endpoint + +### What Gets Instrumented Automatically + +- ✅ Lambda function invocations +- ✅ HTTP/HTTPS requests (axios, node-fetch, native http/https) +- ✅ AWS SDK v2 and v3 calls +- ✅ Database clients (when using instrumented libraries) +- ✅ Trace context propagation in headers + +### Adding Custom Instrumentation + +While the layer handles most instrumentation automatically, you can add custom spans for business logic: + +```javascript +const api = require('@opentelemetry/api'); + +exports.handler = async (event) => { + const tracer = api.trace.getTracer('my-service'); + + return tracer.startActiveSpan('custom-operation', async (span) => { + span.setAttribute('business.attribute', 'value'); + // Your logic here + span.end(); + }); +}; +``` + +For detailed configuration options, see [ADOT-LAYER-CONFIG.md](./ADOT-LAYER-CONFIG.md). + +## 📝 Test Scenarios + +### Scenario 1: Successful Order ✅ + +Tests the happy path where all services succeed. + +```json +{ + "orderId": "ORD-001", + "customerId": "CUST-123", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + } + ] +} +``` + +**Expected Trace:** +- Order Service → Inventory Service (Success) +- Order Service → Payment Service (Success) +- Order completed with HTTP 200 + +### Scenario 2: Out of Stock ⚠️ + +Tests inventory unavailability. + +```json +{ + "orderId": "ORD-002", + "customerId": "CUST-456", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-003", + "name": "Keyboard", + "quantity": 1, + "price": 79.99 + } + ] +} +``` + +**Expected Trace:** +- Order Service → Inventory Service (Out of stock detected) +- Payment Service NOT called +- Order fails with HTTP 409 + +**Out of Stock Items:** +- `item-003` - Keyboard (0 in stock) +- `item-005` - Headphones (0 in stock) + +### Scenario 3: Payment Failure 💳 + +Tests payment processing failures. + +**Card Declined:** +```json +{ + "orderId": "ORD-003", + "customerId": "CUST-789", + "paymentMethod": "4111111111111111", + "items": [{"itemId": "item-001", "quantity": 1, "price": 999.99}] +} +``` + +**Insufficient Funds:** +```json +{ + "orderId": "ORD-004", + "customerId": "CUST-999", + "paymentMethod": "4222222222222222", + "items": [{"itemId": "item-004", "quantity": 2, "price": 399.99}] +} +``` + +**Test Payment Methods:** +- `4111111111111111` - Card declined +- `4222222222222222` - Insufficient funds +- `4333333333333333` - Expired card +- Any other value - Success (with 10% random failure rate) + +**Expected Trace:** +- Order Service → Inventory Service (Success) +- Order Service → Payment Service (Payment fails) +- Order fails with HTTP 402 + +## 🔍 Understanding the Traces + +Each trace will show: + +1. **Span Attributes:** + - Order ID, Customer ID + - Item counts and details + - Payment amounts and methods + - Service names and versions + - Success/failure reasons + +2. **Span Hierarchy:** + ``` + process-order (Order Service) + ├── check-inventory (Order Service -> Inventory Service) + │ └── check-inventory (Inventory Service) + └── process-payment (Order Service -> Payment Service) + └── process-payment (Payment Service) + ``` + +3. **Trace Context Propagation:** + - Trace IDs are consistent across all services + - Parent-child relationships are maintained + - W3C traceparent headers are used + +## 🛠️ Configuration + +### Environment Variables + +Configure these in the SAM template or Lambda console: + +- `OTEL_EXPORTER_OTLP_ENDPOINT` - OpenTelemetry Collector endpoint (default: `http://localhost:4318/v1/traces`) +- `ENVIRONMENT` - Deployment environment (default: `development`) +- `INVENTORY_SERVICE_URL` - Inventory service endpoint +- `PAYMENT_SERVICE_URL` - Payment service endpoint + +### Inventory Configuration + +Modify the mock inventory in `src/inventory-service/index.js`: + +```javascript +const inventory = { + 'item-001': { name: 'Laptop', quantity: 10 }, + 'item-002': { name: 'Mouse', quantity: 50 }, + 'item-003': { name: 'Keyboard', quantity: 0 }, // Out of stock + // Add more items... +}; +``` + +### Payment Configuration + +Modify failure patterns in `src/payment-service/index.js`: + +```javascript +const FAILURE_PATTERNS = { + 'card-declined': ['4111111111111111', 'card-declined'], + 'insufficient-funds': ['4222222222222222', 'insufficient-funds'], + // Add more patterns... +}; +``` + +## 📂 Project Structure + +``` +lambda-opentelemetry-demo/ +├── src/ +│ ├── order-service/ # Order orchestration service +│ │ └── index.js +│ ├── inventory-service/ # Inventory check service +│ │ └── index.js +│ ├── payment-service/ # Payment processing service +│ │ └── index.js +│ └── shared/ # Shared utilities +│ ├── tracer.js # OpenTelemetry setup +│ └── utils.js # Common utilities +├── template.yaml # AWS SAM template +├── package.json # Node.js dependencies +├── test-payloads.json # Sample test data +├── test-api.sh # API test script +└── README.md # This file +``` + +## 🔧 Local Development + +To test locally without deploying to AWS: + +1. Start your OpenTelemetry backend (e.g., Jaeger) +2. Use AWS SAM Local: + +```bash +sam build +sam local start-api --env-vars env.json +``` + +Create `env.json`: +```json +{ + "Parameters": { + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "INVENTORY_SERVICE_URL": "http://host.docker.internal:3000/inventory", + "PAYMENT_SERVICE_URL": "http://host.docker.internal:3000/payment" + } +} +``` + +## 📊 Observability Best Practices + +This demo demonstrates several observability best practices: + +1. **Structured Logging** - Logs include trace context for correlation +2. **Semantic Attributes** - Meaningful span attributes for filtering and analysis +3. **Error Handling** - Errors are captured as span events +4. **Business Context** - Business-relevant data in spans (order IDs, amounts, etc.) +5. **Service Naming** - Clear service names for easy identification +6. **Context Propagation** - W3C standard trace context across service boundaries + +## 🔐 Security Considerations + +- This is a demo project - do not use in production without proper security hardening +- Implement proper authentication/authorization for API endpoints +- Secure OpenTelemetry Collector endpoints +- Use AWS Secrets Manager for sensitive configuration +- Enable API Gateway throttling and request validation + +## 🤝 Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. + +## 📄 License + +MIT License - See LICENSE file for details + +## 🎓 Learning Resources + +- [OpenTelemetry Documentation](https://opentelemetry.io/docs/) +- [AWS Lambda Best Practices](https://docs.aws.amazon.com/lambda/latest/dg/best-practices.html) +- [W3C Trace Context](https://www.w3.org/TR/trace-context/) +- [Distributed Tracing Guide](https://opentelemetry.io/docs/concepts/signals/traces/) + +## 🐛 Troubleshooting + +### Traces not appearing in backend + +- Verify OTEL_EXPORTER_OTLP_ENDPOINT is correctly configured +- Check Lambda logs in CloudWatch for errors +- Ensure OpenTelemetry Collector/backend is running and accessible +- Verify network connectivity between Lambda and collector + +### Service-to-service calls failing + +- Check that service URLs are correctly configured +- Verify API Gateway endpoints are deployed +- Review Lambda function logs for errors +- Check IAM permissions if using private endpoints + +### High latency or timeouts + +- Increase Lambda timeout in template.yaml +- Check OpenTelemetry Collector performance +- Consider using asynchronous export +- Review batch span processor configuration diff --git a/.aws-sam/build/PaymentServiceFunction/docker-compose.yaml b/.aws-sam/build/PaymentServiceFunction/docker-compose.yaml new file mode 100644 index 0000000..7f969f4 --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/docker-compose.yaml @@ -0,0 +1,37 @@ +version: '3.8' + +services: + # Jaeger all-in-one with OTLP support + jaeger: + image: jaegertracing/all-in-one:latest + container_name: jaeger-otel + environment: + - COLLECTOR_OTLP_ENABLED=true + - LOG_LEVEL=debug + ports: + - "16686:16686" # Jaeger UI + - "4318:4318" # OTLP HTTP receiver + - "4317:4317" # OTLP gRPC receiver + - "14268:14268" # Jaeger collector HTTP + - "14250:14250" # Jaeger collector gRPC + networks: + - otel-demo + + # Optional: OpenTelemetry Collector (if you want to use a separate collector) + # otel-collector: + # image: otel/opentelemetry-collector-contrib:latest + # container_name: otel-collector + # command: ["--config=/etc/otel-collector-config.yaml"] + # volumes: + # - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml + # ports: + # - "4318:4318" # OTLP HTTP receiver + # - "4317:4317" # OTLP gRPC receiver + # - "8888:8888" # Prometheus metrics + # - "13133:13133" # Health check + # networks: + # - otel-demo + +networks: + otel-demo: + driver: bridge diff --git a/.aws-sam/build/PaymentServiceFunction/env.json b/.aws-sam/build/PaymentServiceFunction/env.json new file mode 100644 index 0000000..67b70e4 --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/env.json @@ -0,0 +1,16 @@ +{ + "OrderServiceFunction": { + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "ENVIRONMENT": "local", + "INVENTORY_SERVICE_URL": "http://127.0.0.1:3001/inventory", + "PAYMENT_SERVICE_URL": "http://127.0.0.1:3002/payment" + }, + "InventoryServiceFunction": { + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "ENVIRONMENT": "local" + }, + "PaymentServiceFunction": { + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "ENVIRONMENT": "local" + } +} diff --git a/.aws-sam/build/PaymentServiceFunction/local-test.sh b/.aws-sam/build/PaymentServiceFunction/local-test.sh new file mode 100755 index 0000000..8b43c30 --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/local-test.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Simple local testing script using SAM Local +# This script starts SAM local API and runs tests + +set -e + +echo "Starting Lambda OpenTelemetry Demo Local Test" +echo "==============================================" +echo "" + +# Check if Docker is running +if ! docker info > /dev/null 2>&1; then + echo "Error: Docker is not running. Please start Docker first." + exit 1 +fi + +# Check if Jaeger is running +if ! docker ps | grep -q jaeger-otel; then + echo "Starting Jaeger..." + docker-compose up -d jaeger + echo "Waiting for Jaeger to be ready..." + sleep 5 +else + echo "Jaeger is already running" +fi + +echo "" +echo "Jaeger UI available at: http://localhost:16686" +echo "" + +# Build the Lambda functions +echo "Building Lambda functions..." +sam build + +if [ $? -ne 0 ]; then + echo "Error: Build failed" + exit 1 +fi + +echo "" +echo "Lambda functions built successfully!" +echo "" +echo "To test the API:" +echo "1. Start SAM local API in one terminal:" +echo " sam local start-api --env-vars env.json" +echo "" +echo "2. In another terminal, run the test script:" +echo " ./test-api.sh http://127.0.0.1:3000" +echo "" +echo "3. View traces in Jaeger UI:" +echo " http://localhost:16686" +echo "" diff --git a/.aws-sam/build/PaymentServiceFunction/package-lock.json b/.aws-sam/build/PaymentServiceFunction/package-lock.json new file mode 100644 index 0000000..eeb2106 --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/package-lock.json @@ -0,0 +1,295 @@ +{ + "name": "lambda-opentelemetry-demo", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "lambda-opentelemetry-demo", + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "axios": "^1.13.5" + } + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, + "node_modules/axios": { + "version": "1.13.5", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.5.tgz", + "integrity": "sha512-cz4ur7Vb0xS4/KUN0tPWe44eqxrIu31me+fbang3ijiNscE129POzipJJA6zniq2C/Z6sJCjMimjS8Lc/GAs8Q==", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.15.11", + "form-data": "^4.0.5", + "proxy-from-env": "^1.1.0" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/follow-redirects": { + "version": "1.15.11", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", + "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", + "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "license": "MIT" + } + } +} diff --git a/.aws-sam/build/PaymentServiceFunction/package.json b/.aws-sam/build/PaymentServiceFunction/package.json new file mode 100644 index 0000000..abe61a7 --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/package.json @@ -0,0 +1,23 @@ +{ + "name": "lambda-opentelemetry-demo", + "version": "1.0.0", + "description": "Sample AWS Lambda project demonstrating OpenTelemetry integration without CloudWatch/X-Ray", + "main": "index.js", + "scripts": { + "test": "echo \"No tests specified\" && exit 0", + "deploy": "sam deploy --guided", + "build": "sam build" + }, + "keywords": [ + "aws", + "lambda", + "opentelemetry", + "tracing", + "nodejs" + ], + "author": "", + "license": "MIT", + "dependencies": { + "axios": "^1.13.5" + } +} diff --git a/.aws-sam/build/PaymentServiceFunction/src/inventory-service/index.js b/.aws-sam/build/PaymentServiceFunction/src/inventory-service/index.js new file mode 100644 index 0000000..7cb0b00 --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/src/inventory-service/index.js @@ -0,0 +1,143 @@ +const { withSpan, addSpanAttributes } = require('../shared/tracer'); +const { createResponse } = require('../shared/utils'); + +// Mock inventory database +const inventory = { + 'item-001': { name: 'Laptop', quantity: 10 }, + 'item-002': { name: 'Mouse', quantity: 50 }, + 'item-003': { name: 'Keyboard', quantity: 0 }, // Out of stock + 'item-004': { name: 'Monitor', quantity: 5 }, + 'item-005': { name: 'Headphones', quantity: 0 }, // Out of stock +}; + +/** + * Inventory Service Lambda Handler + * This service checks if requested items are available in inventory + * OpenTelemetry instrumentation is automatically provided by the Lambda Layer + */ +exports.handler = async (event) => { + console.log('Inventory Service received event:', JSON.stringify(event)); + + return withSpan('check-inventory', async (span) => { + try { + // Parse request body + const body = typeof event.body === 'string' ? JSON.parse(event.body) : event.body; + const { orderId, items } = body; + + // Validate input + if (!orderId || !items || !Array.isArray(items)) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Missing or invalid required fields', + }); + return createResponse(400, { + available: false, + error: 'Missing or invalid required fields: orderId, items (array)', + }); + } + + addSpanAttributes({ + 'order.id': orderId, + 'inventory.items.count': items.length, + }); + + console.log(`Checking inventory for order ${orderId}`); + + // Check each item + const unavailableItems = []; + let allAvailable = true; + + for (const item of items) { + const { itemId, quantity } = item; + + if (!itemId || !quantity) { + continue; + } + + addSpanAttributes({ + [`inventory.item.${itemId}.requested`]: quantity, + }); + + // Check if item exists in inventory + if (!inventory[itemId]) { + console.log(`Item ${itemId} not found in inventory`); + unavailableItems.push({ + itemId, + reason: 'Item not found', + }); + allAvailable = false; + addSpanAttributes({ + [`inventory.item.${itemId}.status`]: 'not-found', + }); + continue; + } + + const availableQuantity = inventory[itemId].quantity; + addSpanAttributes({ + [`inventory.item.${itemId}.available`]: availableQuantity, + }); + + // Check if sufficient quantity is available + if (availableQuantity < quantity) { + console.log(`Insufficient quantity for item ${itemId}. Requested: ${quantity}, Available: ${availableQuantity}`); + unavailableItems.push({ + itemId, + name: inventory[itemId].name, + requestedQuantity: quantity, + availableQuantity: availableQuantity, + reason: availableQuantity === 0 ? 'Out of stock' : 'Insufficient quantity', + }); + allAvailable = false; + addSpanAttributes({ + [`inventory.item.${itemId}.status`]: 'insufficient', + }); + } else { + console.log(`Item ${itemId} is available. Requested: ${quantity}, Available: ${availableQuantity}`); + addSpanAttributes({ + [`inventory.item.${itemId}.status`]: 'available', + }); + } + } + + addSpanAttributes({ + 'inventory.all_available': allAvailable, + 'inventory.unavailable_count': unavailableItems.length, + }); + + if (allAvailable) { + console.log(`All items available for order ${orderId}`); + + return createResponse(200, { + available: true, + orderId, + message: 'All items are available', + }); + } else { + console.log(`Some items unavailable for order ${orderId}:`, unavailableItems); + addSpanAttributes({ + 'inventory.failure_reason': 'items-unavailable', + }); + + return createResponse(200, { + available: false, + orderId, + unavailableItems, + message: 'Some items are not available', + }); + } + + } catch (error) { + console.error('Inventory check error:', error); + addSpanAttributes({ + 'error': true, + 'error.message': error.message, + }); + + return createResponse(500, { + available: false, + error: 'Internal server error', + details: error.message, + }); + } + }); +}; diff --git a/.aws-sam/build/PaymentServiceFunction/src/order-service/index.js b/.aws-sam/build/PaymentServiceFunction/src/order-service/index.js new file mode 100644 index 0000000..7d05578 --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/src/order-service/index.js @@ -0,0 +1,181 @@ +const { withSpan, addSpanAttributes } = require('../shared/tracer'); +const { callService, createResponse } = require('../shared/utils'); + +/** + * Order Service Lambda Handler + * This service accepts orders and orchestrates calls to Inventory and Payment services + * OpenTelemetry instrumentation is automatically provided by the Lambda Layer + */ +exports.handler = async (event) => { + console.log('Order Service received event:', JSON.stringify(event)); + + return withSpan('process-order', async (span) => { + try { + // Parse request body + const body = typeof event.body === 'string' ? JSON.parse(event.body) : event.body; + const { orderId, customerId, items, paymentMethod } = body; + + // Validate input + if (!orderId || !customerId || !items || !paymentMethod) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Missing required fields', + }); + return createResponse(400, { + success: false, + error: 'Missing required fields: orderId, customerId, items, paymentMethod', + }); + } + + // Add order details to span + addSpanAttributes({ + 'order.id': orderId, + 'customer.id': customerId, + 'order.items.count': items.length, + 'payment.method': paymentMethod, + }); + + console.log(`Processing order ${orderId} for customer ${customerId}`); + + // Step 1: Check inventory + console.log('Step 1: Checking inventory'); + const inventoryUrl = process.env.INVENTORY_SERVICE_URL || 'http://localhost:3001/inventory'; + + let inventoryResult; + try { + inventoryResult = await withSpan('check-inventory', async (inventorySpan) => { + addSpanAttributes({ + 'service': 'inventory', + 'order.id': orderId, + }); + + const result = await callService(inventoryUrl, { + orderId, + items, + }); + + addSpanAttributes({ + 'inventory.available': result.available, + }); + + return result; + }); + } catch (error) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Inventory check failed', + }); + return createResponse(500, { + success: false, + orderId, + error: 'Inventory check failed', + details: error.message, + }); + } + + if (!inventoryResult.available) { + addSpanAttributes({ + 'inventory.status': 'out-of-stock', + 'inventory.unavailable_items': JSON.stringify(inventoryResult.unavailableItems || []), + }); + + console.log(`Order ${orderId} failed: Items out of stock`); + return createResponse(409, { + success: false, + orderId, + error: 'Items out of stock', + unavailableItems: inventoryResult.unavailableItems, + }); + } + + addSpanAttributes({ 'inventory.status': 'available' }); + + // Step 2: Process payment + console.log('Step 2: Processing payment'); + const paymentUrl = process.env.PAYMENT_SERVICE_URL || 'http://localhost:3002/payment'; + + let paymentResult; + try { + paymentResult = await withSpan('process-payment', async (paymentSpan) => { + const totalAmount = items.reduce((sum, item) => sum + (item.price * item.quantity), 0); + + addSpanAttributes({ + 'service': 'payment', + 'order.id': orderId, + 'payment.method': paymentMethod, + 'payment.amount': totalAmount, + }); + + const result = await callService(paymentUrl, { + orderId, + customerId, + amount: totalAmount, + paymentMethod, + }); + + addSpanAttributes({ + 'payment.success': result.success, + 'payment.transaction_id': result.transactionId || 'none', + }); + + return result; + }); + } catch (error) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Payment processing failed', + }); + return createResponse(500, { + success: false, + orderId, + error: 'Payment processing failed', + details: error.message, + }); + } + + if (!paymentResult.success) { + addSpanAttributes({ + 'payment.status': 'failed', + 'payment.failure_reason': paymentResult.reason || 'unknown', + }); + + console.log(`Order ${orderId} failed: Payment failed`); + return createResponse(402, { + success: false, + orderId, + error: 'Payment failed', + reason: paymentResult.reason, + }); + } + + addSpanAttributes({ + 'payment.status': 'success', + 'payment.transaction_id': paymentResult.transactionId, + }); + + // Order successful + console.log(`Order ${orderId} completed successfully`); + + return createResponse(200, { + success: true, + orderId, + message: 'Order processed successfully', + transactionId: paymentResult.transactionId, + totalAmount: items.reduce((sum, item) => sum + (item.price * item.quantity), 0), + }); + + } catch (error) { + console.error('Order processing error:', error); + addSpanAttributes({ + 'error': true, + 'error.message': error.message, + }); + + return createResponse(500, { + success: false, + error: 'Internal server error', + details: error.message, + }); + } + }); +}; diff --git a/.aws-sam/build/PaymentServiceFunction/src/payment-service/index.js b/.aws-sam/build/PaymentServiceFunction/src/payment-service/index.js new file mode 100644 index 0000000..6a88ea6 --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/src/payment-service/index.js @@ -0,0 +1,144 @@ +const { withSpan, addSpanAttributes } = require('../shared/tracer'); +const { createResponse } = require('../shared/utils'); + +// Mock payment processing +const FAILURE_PATTERNS = { + 'card-declined': ['4111111111111111', 'card-declined'], + 'insufficient-funds': ['4222222222222222', 'insufficient-funds'], + 'expired-card': ['4333333333333333', 'expired-card'], +}; + +/** + * Simulate payment processing with some failure scenarios + * @param {string} paymentMethod - Payment method identifier + * @param {number} amount - Payment amount + * @returns {Object} Payment result + */ +function processPayment(paymentMethod, amount) { + // Check for known failure patterns + for (const [reason, patterns] of Object.entries(FAILURE_PATTERNS)) { + if (patterns.some(pattern => paymentMethod.includes(pattern))) { + return { + success: false, + reason: reason.replace('-', ' '), + transactionId: null, + }; + } + } + + // Simulate random failures (10% chance) + if (Math.random() < 0.1) { + const reasons = ['network-timeout', 'gateway-error', 'rate-limit-exceeded']; + const randomReason = reasons[Math.floor(Math.random() * reasons.length)]; + return { + success: false, + reason: randomReason.replace('-', ' '), + transactionId: null, + }; + } + + // Successful payment + return { + success: true, + transactionId: `txn-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`, + }; +} + +/** + * Payment Service Lambda Handler + * This service processes payments with failure scenarios for testing + * OpenTelemetry instrumentation is automatically provided by the Lambda Layer + */ +exports.handler = async (event) => { + console.log('Payment Service received event:', JSON.stringify(event)); + + return withSpan('process-payment', async (span) => { + try { + // Parse request body + const body = typeof event.body === 'string' ? JSON.parse(event.body) : event.body; + const { orderId, customerId, amount, paymentMethod } = body; + + // Validate input + if (!orderId || !customerId || !amount || !paymentMethod) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Missing required fields', + }); + return createResponse(400, { + success: false, + error: 'Missing required fields: orderId, customerId, amount, paymentMethod', + }); + } + + addSpanAttributes({ + 'order.id': orderId, + 'customer.id': customerId, + 'payment.amount': amount, + 'payment.method': paymentMethod, + }); + + console.log(`Processing payment for order ${orderId}, amount: ${amount}`); + + // Validate amount + if (amount <= 0) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Invalid payment amount', + }); + return createResponse(400, { + success: false, + error: 'Invalid payment amount', + }); + } + + // Process payment + const paymentResult = processPayment(paymentMethod, amount); + + addSpanAttributes({ + 'payment.success': paymentResult.success, + }); + + if (!paymentResult.success) { + addSpanAttributes({ + 'payment.failure_reason': paymentResult.reason, + }); + + console.log(`Payment failed for order ${orderId}: ${paymentResult.reason}`); + + return createResponse(200, { + success: false, + orderId, + reason: paymentResult.reason, + message: 'Payment processing failed', + }); + } + + addSpanAttributes({ + 'payment.transaction_id': paymentResult.transactionId, + }); + + console.log(`Payment successful for order ${orderId}. Transaction ID: ${paymentResult.transactionId}`); + + return createResponse(200, { + success: true, + orderId, + transactionId: paymentResult.transactionId, + amount, + message: 'Payment processed successfully', + }); + + } catch (error) { + console.error('Payment processing error:', error); + addSpanAttributes({ + 'error': true, + 'error.message': error.message, + }); + + return createResponse(500, { + success: false, + error: 'Internal server error', + details: error.message, + }); + } + }); +}; diff --git a/.aws-sam/build/PaymentServiceFunction/src/shared/tracer.js b/.aws-sam/build/PaymentServiceFunction/src/shared/tracer.js new file mode 100644 index 0000000..3cedef5 --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/src/shared/tracer.js @@ -0,0 +1,118 @@ +// When using OpenTelemetry Lambda Layer with auto-instrumentation, +// the @opentelemetry/api is provided by the layer at /opt/nodejs/node_modules +const api = require('@opentelemetry/api'); + +/** + * Get the current active span + * The Lambda Layer automatically creates spans for Lambda invocations + * @returns {Span} Current active span + */ +function getCurrentSpan() { + return api.trace.getActiveSpan(); +} + +/** + * Get a tracer instance + * @param {string} name - Tracer name (usually service name) + * @param {string} version - Service version + * @returns {Tracer} OpenTelemetry tracer instance + */ +function getTracer(name = 'lambda-app', version = '1.0.0') { + return api.trace.getTracer(name, version); +} + +/** + * Create a new span for a specific operation + * @param {string} spanName - Name of the span + * @param {Function} fn - Function to execute within the span + * @param {Object} attributes - Optional span attributes + * @returns {Promise} Result of the function execution + */ +async function withSpan(spanName, fn, attributes = {}) { + const tracer = getTracer(); + return tracer.startActiveSpan(spanName, async (span) => { + try { + // Add custom attributes + Object.entries(attributes).forEach(([key, value]) => { + span.setAttribute(key, value); + }); + + const result = await fn(span); + span.setStatus({ code: api.SpanStatusCode.OK }); + return result; + } catch (error) { + span.setStatus({ + code: api.SpanStatusCode.ERROR, + message: error.message, + }); + span.recordException(error); + throw error; + } finally { + span.end(); + } + }); +} + +/** + * Add custom attributes to the current span + * @param {Object} attributes - Key-value pairs of attributes to add + */ +function addSpanAttributes(attributes) { + const span = getCurrentSpan(); + if (span) { + Object.entries(attributes).forEach(([key, value]) => { + if (value !== null && value !== undefined) { + span.setAttribute(key, value); + } + }); + } +} + +/** + * Record an exception on the current span + * @param {Error} error - Error to record + */ +function recordException(error) { + const span = getCurrentSpan(); + if (span) { + span.recordException(error); + span.setStatus({ + code: api.SpanStatusCode.ERROR, + message: error.message, + }); + } +} + +/** + * Inject trace context into headers for downstream service calls + * Uses W3C Trace Context propagation + * @returns {Object} Headers with trace context + */ +function injectTraceContext() { + const headers = {}; + const span = getCurrentSpan(); + + if (span) { + const spanContext = span.spanContext(); + if (spanContext && spanContext.traceId && spanContext.spanId) { + // W3C Trace Context format + const traceFlags = spanContext.traceFlags || 0; + headers.traceparent = `00-${spanContext.traceId}-${spanContext.spanId}-${traceFlags.toString(16).padStart(2, '0')}`; + + if (spanContext.traceState) { + headers.tracestate = spanContext.traceState.serialize(); + } + } + } + + return headers; +} + +module.exports = { + getTracer, + getCurrentSpan, + withSpan, + addSpanAttributes, + recordException, + injectTraceContext, +}; diff --git a/.aws-sam/build/PaymentServiceFunction/src/shared/utils.js b/.aws-sam/build/PaymentServiceFunction/src/shared/utils.js new file mode 100644 index 0000000..2da73e3 --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/src/shared/utils.js @@ -0,0 +1,61 @@ +const axios = require('axios'); +const { injectTraceContext } = require('./tracer'); + +/** + * Make an HTTP request to another Lambda service with trace context propagation + * @param {string} url - Service URL + * @param {Object} data - Request payload + * @param {string} method - HTTP method (default: POST) + * @returns {Promise} Response data + */ +async function callService(url, data, method = 'POST') { + // Inject trace context into headers + const headers = { + 'Content-Type': 'application/json', + ...injectTraceContext(), + }; + + console.log(`Calling service: ${url} with method: ${method}`); + console.log(`Trace context headers:`, headers); + + try { + const response = await axios({ + method, + url, + data, + headers, + }); + + console.log(`Service call successful: ${url}`); + return response.data; + } catch (error) { + console.error(`Service call failed: ${url}`, error.message); + if (error.response) { + throw new Error(`Service call failed: ${error.response.status} - ${JSON.stringify(error.response.data)}`); + } + throw error; + } +} + +/** + * Create a Lambda response object + * @param {number} statusCode - HTTP status code + * @param {Object} body - Response body + * @param {Object} headers - Additional headers + * @returns {Object} Lambda response object + */ +function createResponse(statusCode, body, headers = {}) { + return { + statusCode, + headers: { + 'Content-Type': 'application/json', + ...headers, + }, + body: JSON.stringify(body), + }; +} + +module.exports = { + callService, + createResponse, +}; diff --git a/.aws-sam/build/PaymentServiceFunction/template.yaml b/.aws-sam/build/PaymentServiceFunction/template.yaml new file mode 100644 index 0000000..bc11944 --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/template.yaml @@ -0,0 +1,137 @@ +AWSTemplateFormatVersion: '2010-09-09' +Transform: AWS::Serverless-2016-10-31 +Description: Lambda OpenTelemetry Demo - Order, Inventory, and Payment Services with AWS ADOT Layer + +Globals: + Function: + Timeout: 30 + MemorySize: 512 + Runtime: nodejs20.x + Environment: + Variables: + AWS_LAMBDA_EXEC_WRAPPER: /opt/otel-handler + OTEL_SERVICE_NAME: will-be-overridden + OTEL_TRACES_SAMPLER: AlwaysOn + OTEL_EXPORTER_OTLP_PROTOCOL: http/protobuf + OTEL_EXPORTER_OTLP_ENDPOINT: !Ref OtelCollectorEndpoint + OTEL_PROPAGATORS: tracecontext + OTEL_RESOURCE_ATTRIBUTES: !Sub deployment.environment=${Environment} + +Parameters: + OtelCollectorEndpoint: + Type: String + Default: 'http://localhost:4318' + Description: OpenTelemetry Collector endpoint URL (without /v1/traces path) + + Environment: + Type: String + Default: 'production' + Description: Deployment environment (e.g., development, staging, production) + + AdotLayerArn: + Type: String + Default: 'arn:aws:lambda:us-east-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5' + Description: | + ARN of the AWS Distro for OpenTelemetry (ADOT) Lambda Layer for Node.js. + Find the latest ARN for your region at: + https://aws-otel.github.io/docs/getting-started/lambda/lambda-js + +Resources: + # Order Service Function + OrderServiceFunction: + Type: AWS::Serverless::Function + Metadata: + BuildMethod: nodejs20.x + Properties: + FunctionName: order-service + CodeUri: ./ + Handler: src/order-service/index.handler + Description: Order Service - Orchestrates order processing + Layers: + - !Ref AdotLayerArn + Environment: + Variables: + OTEL_SERVICE_NAME: order-service + INVENTORY_SERVICE_URL: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/inventory' + PAYMENT_SERVICE_URL: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/payment' + Events: + OrderApi: + Type: Api + Properties: + Path: /order + Method: post + RestApiId: !Ref ApiGateway + + # Inventory Service Function + InventoryServiceFunction: + Type: AWS::Serverless::Function + Metadata: + BuildMethod: nodejs20.x + Properties: + FunctionName: inventory-service + CodeUri: ./ + Handler: src/inventory-service/index.handler + Description: Inventory Service - Checks item availability + Layers: + - !Ref AdotLayerArn + Environment: + Variables: + OTEL_SERVICE_NAME: inventory-service + Events: + InventoryApi: + Type: Api + Properties: + Path: /inventory + Method: post + RestApiId: !Ref ApiGateway + + # Payment Service Function + PaymentServiceFunction: + Type: AWS::Serverless::Function + Metadata: + BuildMethod: nodejs20.x + Properties: + FunctionName: payment-service + CodeUri: ./ + Handler: src/payment-service/index.handler + Description: Payment Service - Processes payments + Layers: + - !Ref AdotLayerArn + Environment: + Variables: + OTEL_SERVICE_NAME: payment-service + Events: + PaymentApi: + Type: Api + Properties: + Path: /payment + Method: post + RestApiId: !Ref ApiGateway + + # API Gateway + ApiGateway: + Type: AWS::Serverless::Api + Properties: + StageName: Prod + Description: API Gateway for Lambda OpenTelemetry Demo + +Outputs: + ApiEndpoint: + Description: API Gateway endpoint URL + Value: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod' + + OrderServiceUrl: + Description: Order Service endpoint + Value: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/order' + + InventoryServiceUrl: + Description: Inventory Service endpoint + Value: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/inventory' + + PaymentServiceUrl: + Description: Payment Service endpoint + Value: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/payment' + + AdotLayerVersion: + Description: ADOT Lambda Layer ARN being used + Value: !Ref AdotLayerArn diff --git a/.aws-sam/build/PaymentServiceFunction/test-api.sh b/.aws-sam/build/PaymentServiceFunction/test-api.sh new file mode 100755 index 0000000..680b864 --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/test-api.sh @@ -0,0 +1,130 @@ +#!/bin/bash + +# Color codes for output +GREEN='\033[0;32m' +BLUE='\033[0;34m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +echo -e "${BLUE}========================================${NC}" +echo -e "${BLUE}Lambda OpenTelemetry Demo - Test Script${NC}" +echo -e "${BLUE}========================================${NC}" +echo "" + +# Check if API endpoint is provided +if [ -z "$1" ]; then + echo -e "${RED}Error: API endpoint is required${NC}" + echo "Usage: ./test-api.sh " + echo "Example: ./test-api.sh https://abc123.execute-api.us-east-1.amazonaws.com/Prod" + exit 1 +fi + +API_ENDPOINT=$1 +ORDER_URL="${API_ENDPOINT}/order" + +echo -e "${YELLOW}Using API Endpoint: ${API_ENDPOINT}${NC}" +echo "" + +# Test 1: Successful Order +echo -e "${BLUE}Test 1: Successful Order${NC}" +echo "Testing with items that are in stock and valid payment method..." +curl -X POST ${ORDER_URL} \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-001", + "customerId": "CUST-123", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + }, + { + "itemId": "item-002", + "name": "Mouse", + "quantity": 2, + "price": 29.99 + } + ] + }' +echo -e "\n" + +sleep 2 + +# Test 2: Out of Stock Order +echo -e "${BLUE}Test 2: Out of Stock Order${NC}" +echo "Testing with items that are out of stock..." +curl -X POST ${ORDER_URL} \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-002", + "customerId": "CUST-456", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-003", + "name": "Keyboard", + "quantity": 1, + "price": 79.99 + }, + { + "itemId": "item-005", + "name": "Headphones", + "quantity": 1, + "price": 149.99 + } + ] + }' +echo -e "\n" + +sleep 2 + +# Test 3: Payment Failure (Card Declined) +echo -e "${BLUE}Test 3: Payment Failure - Card Declined${NC}" +echo "Testing with a payment method that will be declined..." +curl -X POST ${ORDER_URL} \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-003", + "customerId": "CUST-789", + "paymentMethod": "4111111111111111", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + } + ] + }' +echo -e "\n" + +sleep 2 + +# Test 4: Payment Failure (Insufficient Funds) +echo -e "${BLUE}Test 4: Payment Failure - Insufficient Funds${NC}" +echo "Testing with insufficient funds scenario..." +curl -X POST ${ORDER_URL} \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-004", + "customerId": "CUST-999", + "paymentMethod": "4222222222222222", + "items": [ + { + "itemId": "item-004", + "name": "Monitor", + "quantity": 2, + "price": 399.99 + } + ] + }' +echo -e "\n" + +echo -e "${GREEN}========================================${NC}" +echo -e "${GREEN}All tests completed!${NC}" +echo -e "${GREEN}Check your OpenTelemetry backend to view the traces${NC}" +echo -e "${GREEN}========================================${NC}" diff --git a/.aws-sam/build/PaymentServiceFunction/test-payloads.json b/.aws-sam/build/PaymentServiceFunction/test-payloads.json new file mode 100644 index 0000000..603d30c --- /dev/null +++ b/.aws-sam/build/PaymentServiceFunction/test-payloads.json @@ -0,0 +1,66 @@ +{ + "successfulOrder": { + "orderId": "ORD-001", + "customerId": "CUST-123", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + }, + { + "itemId": "item-002", + "name": "Mouse", + "quantity": 2, + "price": 29.99 + } + ] + }, + "outOfStockOrder": { + "orderId": "ORD-002", + "customerId": "CUST-456", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-003", + "name": "Keyboard", + "quantity": 1, + "price": 79.99 + }, + { + "itemId": "item-005", + "name": "Headphones", + "quantity": 1, + "price": 149.99 + } + ] + }, + "paymentFailureOrder": { + "orderId": "ORD-003", + "customerId": "CUST-789", + "paymentMethod": "4111111111111111", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + } + ] + }, + "insufficientFundsOrder": { + "orderId": "ORD-004", + "customerId": "CUST-999", + "paymentMethod": "4222222222222222", + "items": [ + { + "itemId": "item-004", + "name": "Monitor", + "quantity": 2, + "price": 399.99 + } + ] + } +} diff --git a/.aws-sam/build/template.yaml b/.aws-sam/build/template.yaml new file mode 100644 index 0000000..a8e7e8c --- /dev/null +++ b/.aws-sam/build/template.yaml @@ -0,0 +1,140 @@ +AWSTemplateFormatVersion: '2010-09-09' +Transform: AWS::Serverless-2016-10-31 +Description: Lambda OpenTelemetry Demo - Order, Inventory, and Payment Services with + AWS ADOT Layer +Globals: + Function: + Timeout: 30 + MemorySize: 512 + Runtime: nodejs20.x + Environment: + Variables: + AWS_LAMBDA_EXEC_WRAPPER: /opt/otel-handler + OTEL_SERVICE_NAME: will-be-overridden + OTEL_TRACES_SAMPLER: AlwaysOn + OTEL_EXPORTER_OTLP_PROTOCOL: http/protobuf + OTEL_EXPORTER_OTLP_ENDPOINT: + Ref: OtelCollectorEndpoint + OTEL_PROPAGATORS: tracecontext + OTEL_RESOURCE_ATTRIBUTES: + Fn::Sub: deployment.environment=${Environment} +Parameters: + OtelCollectorEndpoint: + Type: String + Default: http://localhost:4318 + Description: OpenTelemetry Collector endpoint URL (without /v1/traces path) + Environment: + Type: String + Default: production + Description: Deployment environment (e.g., development, staging, production) + AdotLayerArn: + Type: String + Default: arn:aws:lambda:us-east-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5 + Description: 'ARN of the AWS Distro for OpenTelemetry (ADOT) Lambda Layer for + Node.js. + + Find the latest ARN for your region at: + + https://aws-otel.github.io/docs/getting-started/lambda/lambda-js + + ' +Resources: + OrderServiceFunction: + Type: AWS::Serverless::Function + Metadata: + BuildMethod: nodejs20.x + SamResourceId: OrderServiceFunction + Properties: + FunctionName: order-service + CodeUri: OrderServiceFunction + Handler: src/order-service/index.handler + Description: Order Service - Orchestrates order processing + Layers: + - Ref: AdotLayerArn + Environment: + Variables: + OTEL_SERVICE_NAME: order-service + INVENTORY_SERVICE_URL: + Fn::Sub: https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/inventory + PAYMENT_SERVICE_URL: + Fn::Sub: https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/payment + Events: + OrderApi: + Type: Api + Properties: + Path: /order + Method: post + RestApiId: + Ref: ApiGateway + InventoryServiceFunction: + Type: AWS::Serverless::Function + Metadata: + BuildMethod: nodejs20.x + SamResourceId: InventoryServiceFunction + Properties: + FunctionName: inventory-service + CodeUri: InventoryServiceFunction + Handler: src/inventory-service/index.handler + Description: Inventory Service - Checks item availability + Layers: + - Ref: AdotLayerArn + Environment: + Variables: + OTEL_SERVICE_NAME: inventory-service + Events: + InventoryApi: + Type: Api + Properties: + Path: /inventory + Method: post + RestApiId: + Ref: ApiGateway + PaymentServiceFunction: + Type: AWS::Serverless::Function + Metadata: + BuildMethod: nodejs20.x + SamResourceId: PaymentServiceFunction + Properties: + FunctionName: payment-service + CodeUri: PaymentServiceFunction + Handler: src/payment-service/index.handler + Description: Payment Service - Processes payments + Layers: + - Ref: AdotLayerArn + Environment: + Variables: + OTEL_SERVICE_NAME: payment-service + Events: + PaymentApi: + Type: Api + Properties: + Path: /payment + Method: post + RestApiId: + Ref: ApiGateway + ApiGateway: + Type: AWS::Serverless::Api + Properties: + StageName: Prod + Description: API Gateway for Lambda OpenTelemetry Demo +Outputs: + ApiEndpoint: + Description: API Gateway endpoint URL + Value: + Fn::Sub: https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod + OrderServiceUrl: + Description: Order Service endpoint + Value: + Fn::Sub: https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/order + InventoryServiceUrl: + Description: Inventory Service endpoint + Value: + Fn::Sub: https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/inventory + PaymentServiceUrl: + Description: Payment Service endpoint + Value: + Fn::Sub: https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/payment + AdotLayerVersion: + Description: ADOT Lambda Layer ARN being used + Value: + Ref: AdotLayerArn diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..3f3aa74 --- /dev/null +++ b/.env.example @@ -0,0 +1,7 @@ +# OpenTelemetry Configuration +OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318/v1/traces +ENVIRONMENT=development + +# Service URLs (for local testing) +INVENTORY_SERVICE_URL=http://localhost:3001/inventory +PAYMENT_SERVICE_URL=http://localhost:3002/payment diff --git a/ADOT-LAYER-CONFIG.md b/ADOT-LAYER-CONFIG.md new file mode 100644 index 0000000..cffddc9 --- /dev/null +++ b/ADOT-LAYER-CONFIG.md @@ -0,0 +1,366 @@ +# AWS ADOT Lambda Layer Configuration Guide + +This document explains how to configure and use the AWS Distro for OpenTelemetry (ADOT) Lambda Layer for automatic instrumentation. + +## Overview + +The AWS ADOT Lambda Layer provides automatic OpenTelemetry instrumentation for Lambda functions without requiring you to bundle OpenTelemetry SDKs in your deployment package. + +## Layer ARNs by Region + +### Node.js 18.x ADOT Layer ARNs (Latest: v1-18-1) + +| Region | ARN | +|--------|-----| +| us-east-1 | `arn:aws:lambda:us-east-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| us-east-2 | `arn:aws:lambda:us-east-2:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| us-west-1 | `arn:aws:lambda:us-west-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| us-west-2 | `arn:aws:lambda:us-west-2:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| eu-west-1 | `arn:aws:lambda:eu-west-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| eu-central-1 | `arn:aws:lambda:eu-central-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| ap-southeast-1 | `arn:aws:lambda:ap-southeast-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | +| ap-northeast-1 | `arn:aws:lambda:ap-northeast-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` | + +**Find the latest ARNs**: https://aws-otel.github.io/docs/getting-started/lambda/lambda-js + +## Required Environment Variables + +### Essential Configuration + +```yaml +Environment: + Variables: + # Enable ADOT auto-instrumentation wrapper + AWS_LAMBDA_EXEC_WRAPPER: /opt/otel-handler + + # Service name for identification in traces + OTEL_SERVICE_NAME: your-service-name + + # Sampling configuration (AlwaysOn for demo, adjust for production) + OTEL_TRACES_SAMPLER: AlwaysOn + + # Protocol for OTLP export + OTEL_EXPORTER_OTLP_PROTOCOL: http/protobuf + + # OpenTelemetry Collector endpoint (without /v1/traces) + OTEL_EXPORTER_OTLP_ENDPOINT: http://your-collector:4318 + + # Trace context propagation format + OTEL_PROPAGATORS: tracecontext +``` + +### Advanced Configuration Options + +```yaml +Environment: + Variables: + # Resource attributes (for environment, version, etc.) + OTEL_RESOURCE_ATTRIBUTES: deployment.environment=production,service.version=1.0.0 + + # Specific endpoint for traces (optional, overrides OTEL_EXPORTER_OTLP_ENDPOINT) + OTEL_EXPORTER_OTLP_TRACES_ENDPOINT: http://your-collector:4318/v1/traces + + # Enable/disable specific instrumentations + OTEL_INSTRUMENTATION_AWS_SDK_ENABLED: true + OTEL_INSTRUMENTATION_HTTP_ENABLED: true + + # Span attribute limits + OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT: 4095 + OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT: 128 +``` + +## Exporter Backends + +### 1. Jaeger (Local Development) + +```yaml +OTEL_EXPORTER_OTLP_ENDPOINT: http://localhost:4318 +OTEL_EXPORTER_OTLP_PROTOCOL: http/protobuf +``` + +Run Jaeger with OTLP support: +```bash +docker run -d --name jaeger \ + -e COLLECTOR_OTLP_ENABLED=true \ + -p 16686:16686 \ + -p 4318:4318 \ + jaegertracing/all-in-one:latest +``` + +### 2. AWS X-Ray (with ADOT Collector) + +If you want to send traces to AWS X-Ray, deploy an ADOT Collector in your VPC: + +```yaml +OTEL_EXPORTER_OTLP_ENDPOINT: http://adot-collector:4318 +OTEL_PROPAGATORS: tracecontext,xray +``` + +ADOT Collector configuration: +```yaml +receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 + +exporters: + awsxray: + region: us-east-1 + +service: + pipelines: + traces: + receivers: [otlp] + exporters: [awsxray] +``` + +### 3. Grafana Cloud / Honeycomb / DataDog + +```yaml +OTEL_EXPORTER_OTLP_ENDPOINT: https://your-saas-endpoint +OTEL_EXPORTER_OTLP_HEADERS: Authorization=Bearer your-api-key +``` + +### 4. Self-Hosted OpenTelemetry Collector + +Deploy a collector in your VPC or use Lambda Extension: + +```yaml +OTEL_EXPORTER_OTLP_ENDPOINT: http://collector:4318 +``` + +## How Auto-Instrumentation Works + +### 1. Wrapper Execution + +The `AWS_LAMBDA_EXEC_WRAPPER` environment variable points to `/opt/otel-handler`, which: +- Initializes the OpenTelemetry SDK before your handler +- Automatically instruments common libraries (http, https, aws-sdk, etc.) +- Creates a root span for each Lambda invocation +- Propagates trace context from incoming requests + +### 2. Automatic Instrumentation + +The layer automatically instruments: +- ✅ AWS SDK calls +- ✅ HTTP/HTTPS requests (axios, node-fetch, http, https) +- ✅ Lambda invocation (creates root span) +- ✅ Downstream service calls with trace context propagation + +### 3. Context Propagation + +The layer automatically: +- Extracts W3C trace context from incoming API Gateway requests +- Injects trace context into outgoing HTTP requests +- Maintains trace context across async operations + +## Usage in Your Code + +### Basic Usage (Automatic) + +With the layer configured, your Lambda function is automatically instrumented: + +```javascript +exports.handler = async (event) => { + // Automatically traced! + const response = await axios.get('https://api.example.com'); + return { statusCode: 200, body: JSON.stringify(response.data) }; +}; +``` + +### Adding Custom Spans + +For additional business logic spans: + +```javascript +const api = require('@opentelemetry/api'); + +exports.handler = async (event) => { + const tracer = api.trace.getTracer('my-service', '1.0.0'); + + return tracer.startActiveSpan('business-operation', async (span) => { + try { + span.setAttribute('customer.id', event.customerId); + + // Your business logic + const result = await processOrder(event); + + span.setStatus({ code: api.SpanStatusCode.OK }); + return result; + } catch (error) { + span.recordException(error); + span.setStatus({ code: api.SpanStatusCode.ERROR }); + throw error; + } finally { + span.end(); + } + }); +}; +``` + +### Adding Attributes to Current Span + +```javascript +const api = require('@opentelemetry/api'); + +exports.handler = async (event) => { + const span = api.trace.getActiveSpan(); + if (span) { + span.setAttribute('order.id', event.orderId); + span.setAttribute('order.amount', event.amount); + } + + // Your handler logic +}; +``` + +## Trace Context Propagation + +### Automatic (HTTP Clients) + +The layer automatically propagates context for instrumented HTTP clients: + +```javascript +const axios = require('axios'); + +// Trace context is automatically added to headers! +const response = await axios.post('https://api.example.com/payment', data); +``` + +### Manual (Custom Clients) + +For custom HTTP clients or non-instrumented libraries: + +```javascript +const api = require('@opentelemetry/api'); + +function getTraceHeaders() { + const headers = {}; + const span = api.trace.getActiveSpan(); + + if (span) { + const spanContext = span.spanContext(); + headers.traceparent = `00-${spanContext.traceId}-${spanContext.spanId}-01`; + } + + return headers; +} + +// Use in your custom client +const headers = { ...getTraceHeaders(), 'Content-Type': 'application/json' }; +``` + +## Sampling + +### AlwaysOn (Development/Demo) + +```yaml +OTEL_TRACES_SAMPLER: AlwaysOn +``` + +Traces every request. Good for development but expensive in production. + +### TraceIdRatioBased (Production) + +```yaml +OTEL_TRACES_SAMPLER: TraceIdRatioBased +OTEL_TRACES_SAMPLER_ARG: 0.1 # 10% sampling +``` + +Samples a percentage of requests to reduce costs. + +### ParentBased (Recommended) + +```yaml +OTEL_TRACES_SAMPLER: ParentBased_TraceIdRatioBased +OTEL_TRACES_SAMPLER_ARG: 0.1 +``` + +Respects parent sampling decisions while applying ratio-based sampling to root spans. + +## Performance Considerations + +### Cold Start Impact + +The ADOT layer adds ~200-300ms to cold start time: +- Layer initialization: ~100ms +- Auto-instrumentation setup: ~100-200ms + +### Runtime Overhead + +- Minimal overhead during warm execution (<5ms per invocation) +- Async span export doesn't block Lambda execution +- Batching reduces network calls + +### Optimization Tips + +1. **Use provisioned concurrency** for latency-sensitive functions +2. **Adjust span limits** to reduce memory usage: + ```yaml + OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT: 32 + OTEL_SPAN_EVENT_COUNT_LIMIT: 32 + ``` +3. **Use sampling** in high-volume environments +4. **Disable unused instrumentations**: + ```yaml + OTEL_INSTRUMENTATION_AWS_LAMBDA_ENABLED: true + OTEL_INSTRUMENTATION_HTTP_ENABLED: true + OTEL_INSTRUMENTATION_AWS_SDK_ENABLED: false + ``` + +## Troubleshooting + +### No Traces Appearing + +1. Check CloudWatch Logs for ADOT errors: + ``` + grep "otel" /aws/lambda/your-function + ``` + +2. Verify environment variables: + ```bash + aws lambda get-function-configuration --function-name your-function \ + --query 'Environment.Variables' + ``` + +3. Test collector endpoint connectivity: + - Ensure Lambda has network access to the collector + - Check security groups and NACLs + - Verify collector is accepting OTLP HTTP on port 4318 + +### Wrapper Not Running + +Error: `AWS_LAMBDA_EXEC_WRAPPER is set but the wrapper does not exist` + +**Solution**: Verify the layer ARN is correct and matches your region. + +### Trace Context Not Propagating + +1. Ensure `OTEL_PROPAGATORS=tracecontext` is set +2. Check if HTTP library is supported (axios, node-fetch, http, https) +3. For unsupported libraries, manually inject headers + +### High Cold Start Times + +1. Consider using Lambda SnapStart (if available) +2. Use provisioned concurrency +3. Minimize layer count (combine layers if possible) +4. Profile and optimize your application code + +## Best Practices + +1. **Set meaningful service names**: Use descriptive names that reflect the business function +2. **Add business context**: Include order IDs, customer IDs, and other relevant attributes +3. **Handle errors properly**: Record exceptions and set error status on spans +4. **Use semantic conventions**: Follow OpenTelemetry semantic conventions for consistency +5. **Monitor collector health**: Ensure your collector is performant and highly available +6. **Set appropriate sampling**: Balance cost with observability needs +7. **Use tags for filtering**: Add environment, version, and region as resource attributes + +## References + +- AWS ADOT Lambda: https://aws-otel.github.io/docs/getting-started/lambda +- OpenTelemetry Lambda Instrumentation: https://opentelemetry.io/docs/platforms/faas/lambda-auto-instrument/ +- W3C Trace Context: https://www.w3.org/TR/trace-context/ +- OpenTelemetry Semantic Conventions: https://opentelemetry.io/docs/specs/semconv/ diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 0000000..ff7e7c2 --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,366 @@ +# Architecture Details + +## Service Flow + +### 1. Successful Order Flow + +``` +Client + │ + │ POST /order + ├─────────────────────────────────────────┐ + │ │ + ▼ │ +Order Service │ + │ │ + │ 1. Validate request │ + │ 2. Create root span │ + │ │ + │ POST /inventory (with trace context) │ + ├─────────────────────────────┐ │ + │ │ │ + ▼ │ │ +Inventory Service │ │ + │ │ │ + │ 1. Extract trace context │ │ + │ 2. Create child span │ │ + │ 3. Check stock levels │ │ + │ 4. Return availability │ │ + │ │ │ + └─────────────────────────────┤ │ + │ Response: available=true │ │ + ▼ │ │ +Order Service │ │ + │ │ │ + │ POST /payment (with trace context) │ + ├─────────────────────────────┐ │ + │ │ │ + ▼ │ │ +Payment Service │ │ + │ │ │ + │ 1. Extract trace context │ │ + │ 2. Create child span │ │ + │ 3. Process payment │ │ + │ 4. Return result │ │ + │ │ │ + └─────────────────────────────┤ │ + │ Response: success=true │ │ + ▼ │ │ +Order Service │ │ + │ │ │ + │ Complete order processing │ │ + │ │ │ + └─────────────────────────────┤ │ + │ Response: 200 OK │ │ + ▼ │ │ +Client ◄──────────────────────────────────────┘ +``` + +### 2. Out of Stock Flow + +``` +Client → Order Service → Inventory Service + (Out of stock detected) + ← + Order Service + (Skip payment, return 409) + ← +Client +``` + +### 3. Payment Failure Flow + +``` +Client → Order Service → Inventory Service + (Items available) + ← + Order Service → Payment Service + (Payment failed) + ← + Order Service + (Return 402) + ← +Client +``` + +## Trace Structure + +### Trace Hierarchy + +``` +Trace ID: 00000000000000000000000000000001 +│ +└─ Span: process-order (Order Service) + │ Duration: 245ms + │ Attributes: + │ - order.id: ORD-001 + │ - customer.id: CUST-123 + │ - order.items.count: 2 + │ + ├─ Span: check-inventory (Order Service) + │ │ Duration: 15ms + │ │ Attributes: + │ │ - service: inventory + │ │ - order.id: ORD-001 + │ │ + │ └─ Span: HTTP POST (Auto-instrumented) + │ │ Duration: 12ms + │ │ + │ └─ Span: check-inventory (Inventory Service) + │ Duration: 8ms + │ Attributes: + │ - inventory.available: true + │ - inventory.items.count: 2 + │ + └─ Span: process-payment (Order Service) + │ Duration: 180ms + │ Attributes: + │ - service: payment + │ - order.id: ORD-001 + │ - payment.method: credit-card-5555 + │ - payment.amount: 1059.97 + │ + └─ Span: HTTP POST (Auto-instrumented) + │ Duration: 175ms + │ + └─ Span: process-payment (Payment Service) + Duration: 170ms + Attributes: + - payment.success: true + - payment.transaction_id: txn-1234567890-abc123 +``` + +## OpenTelemetry Components + +### 1. Tracer Provider + +Manages the lifecycle of tracers and span processors. + +```javascript +const provider = new NodeTracerProvider({ + resource: resource, +}); +``` + +### 2. OTLP Exporter + +Exports spans to OpenTelemetry Collector using HTTP. + +```javascript +const exporter = new OTLPTraceExporter({ + url: 'http://localhost:4318/v1/traces', +}); +``` + +### 3. Batch Span Processor + +Batches spans before export for efficiency. + +```javascript +provider.addSpanProcessor(new BatchSpanProcessor(exporter)); +``` + +### 4. Auto Instrumentations + +Automatically instruments common libraries. + +```javascript +registerInstrumentations({ + instrumentations: [ + getNodeAutoInstrumentations({ + '@opentelemetry/instrumentation-http': { enabled: true }, + '@opentelemetry/instrumentation-https': { enabled: true }, + }), + ], +}); +``` + +## Trace Context Propagation + +### W3C Trace Context Format + +``` +traceparent: 00--- + +Example: +traceparent: 00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01 + │ │ │ │ + │ └─ Trace ID (32 hex chars) │ └─ Flags + │ └─ Parent Span ID (16 hex chars) + └─ Version (00) +``` + +### Context Injection + +When Order Service calls Inventory Service: + +```javascript +// Order Service +const headers = injectTraceContext(); +// headers = { traceparent: '00-...-...-01' } + +axios.post(inventoryUrl, data, { headers }); +``` + +### Context Extraction + +When Inventory Service receives request: + +```javascript +// Inventory Service +const traceContext = extractTraceContext(event); +// Continue the trace with the same trace ID +``` + +## Span Attributes + +### Semantic Conventions + +Following OpenTelemetry semantic conventions: + +| Attribute | Description | Example | +|-----------|-------------|---------| +| `service.name` | Service identifier | `order-service` | +| `service.version` | Service version | `1.0.0` | +| `deployment.environment` | Environment | `production` | +| `order.id` | Order identifier | `ORD-001` | +| `customer.id` | Customer identifier | `CUST-123` | +| `payment.amount` | Payment amount | `1059.97` | +| `payment.method` | Payment method | `credit-card-5555` | +| `inventory.available` | Availability flag | `true` | +| `http.method` | HTTP method | `POST` | +| `http.status_code` | HTTP status | `200` | + +## Error Handling + +### Error Span Status + +```javascript +span.setStatus({ + code: api.SpanStatusCode.ERROR, + message: error.message, +}); +span.recordException(error); +``` + +### Business Logic Errors vs Technical Errors + +- **Business Logic Errors** (out of stock, payment declined): + - Span status: OK + - Custom attributes indicate business failure + - Example: `inventory.available: false` + +- **Technical Errors** (network failure, invalid input): + - Span status: ERROR + - Exception recorded + - Stack trace captured + +## Performance Considerations + +### Batch Processing + +Spans are batched before export to reduce network overhead: + +```javascript +new BatchSpanProcessor(exporter, { + maxQueueSize: 2048, + maxExportBatchSize: 512, + scheduledDelayMillis: 5000, +}) +``` + +### Sampling + +For high-volume systems, implement sampling: + +```javascript +const sampler = new TraceIdRatioBasedSampler(0.1); // 10% sampling +const provider = new NodeTracerProvider({ + resource: resource, + sampler: sampler, +}); +``` + +### Async Export + +Span export is asynchronous and doesn't block Lambda execution. + +## Security + +### Securing OTLP Endpoint + +```javascript +const exporter = new OTLPTraceExporter({ + url: process.env.OTEL_EXPORTER_OTLP_ENDPOINT, + headers: { + 'Authorization': `Bearer ${process.env.OTEL_API_KEY}`, + }, +}); +``` + +### Data Privacy + +- Avoid capturing sensitive data in spans +- Redact PII from span attributes +- Use attribute filtering in the collector + +## Monitoring and Alerting + +### Key Metrics to Monitor + +1. **Trace Completeness** + - Percentage of complete traces + - Missing spans + +2. **Error Rates** + - Spans with ERROR status + - Service-specific error rates + +3. **Latency** + - P50, P95, P99 latencies + - Per-service latency breakdown + +4. **Business Metrics** + - Order success rate + - Payment failure rate + - Inventory unavailability rate + +### Example Queries (Jaeger) + +``` +# Find all failed orders +service="order-service" AND error=true + +# Find orders with payment failures +service="payment-service" AND payment.success=false + +# Find slow orders (> 1s) +service="order-service" AND duration>1000ms + +# Find out-of-stock scenarios +service="inventory-service" AND inventory.available=false +``` + +## Troubleshooting + +### Common Issues + +1. **Traces not appearing** + - Check OTLP endpoint connectivity + - Verify Lambda has network access + - Check CloudWatch logs for export errors + +2. **Broken traces** + - Verify trace context propagation + - Check HTTP header forwarding + - Ensure consistent trace ID format + +3. **High latency** + - Review span processor configuration + - Check collector performance + - Consider async export optimization + +4. **Missing spans** + - Verify auto-instrumentation is active + - Check for exceptions during span creation + - Review sampling configuration diff --git a/DEPLOYMENT-VERIFICATION.md b/DEPLOYMENT-VERIFICATION.md new file mode 100644 index 0000000..bc2b641 --- /dev/null +++ b/DEPLOYMENT-VERIFICATION.md @@ -0,0 +1,286 @@ +# Deployment Verification Guide + +This guide helps you verify that the Lambda OpenTelemetry Demo is working correctly. + +## Pre-Deployment Checklist + +- [ ] Node.js 20.x or later installed +- [ ] AWS CLI configured with valid credentials +- [ ] AWS SAM CLI installed +- [ ] Docker installed (for local testing) +- [ ] OpenTelemetry backend ready (Jaeger/Zipkin/etc.) + +## Quick Deployment Steps + +### 1. Start OpenTelemetry Backend + +```bash +# Start Jaeger with Docker Compose +docker-compose up -d jaeger + +# Verify Jaeger is running +curl http://localhost:16686 +``` + +### 2. Update Layer ARN + +Edit `template.yaml` and update the `AdotLayerArn` parameter for your region: + +```yaml +AdotLayerArn: + Default: 'arn:aws:lambda:YOUR-REGION:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5' +``` + +Find ARNs for your region: https://aws-otel.github.io/docs/getting-started/lambda/lambda-js + +### 3. Build and Deploy + +```bash +# Install dependencies +npm install + +# Build Lambda functions +sam build + +# Deploy to AWS +sam deploy --guided +``` + +During deployment, provide: +- **Stack name**: `lambda-otel-demo` +- **AWS Region**: Your preferred region (e.g., `us-east-1`) +- **OtelCollectorEndpoint**: Your collector URL (e.g., `http://your-collector:4318`) +- **Environment**: `production` or `development` +- **AdotLayerArn**: ARN for your region +- Confirm changes: `y` + +### 4. Note Deployment Outputs + +Save these outputs from the deployment: +- `ApiEndpoint` - Base API Gateway URL +- `OrderServiceUrl` - Order service endpoint +- `InventoryServiceUrl` - Inventory service endpoint +- `PaymentServiceUrl` - Payment service endpoint + +## Verification Tests + +### Test 1: Successful Order ✅ + +```bash +curl -X POST https://YOUR-API-ENDPOINT/Prod/order \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-001", + "customerId": "CUST-123", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + } + ] + }' +``` + +**Expected Response**: HTTP 200 with `"success": true` + +### Test 2: Out of Stock ⚠️ + +```bash +curl -X POST https://YOUR-API-ENDPOINT/Prod/order \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-002", + "customerId": "CUST-456", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-003", + "name": "Keyboard", + "quantity": 1, + "price": 79.99 + } + ] + }' +``` + +**Expected Response**: HTTP 409 with `"error": "Items out of stock"` + +### Test 3: Payment Failure 💳 + +```bash +curl -X POST https://YOUR-API-ENDPOINT/Prod/order \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-003", + "customerId": "CUST-789", + "paymentMethod": "4111111111111111", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + } + ] + }' +``` + +**Expected Response**: HTTP 402 with `"error": "Payment failed"` + +### Test 4: Automated Test Script + +```bash +# Run all tests at once +./test-api.sh https://YOUR-API-ENDPOINT/Prod +``` + +## Trace Verification + +### 1. Access Jaeger UI + +Open http://localhost:16686 in your browser + +### 2. Search for Traces + +- **Service**: Select `order-service` +- **Operation**: Select `process-order` +- Click **Find Traces** + +### 3. Verify Trace Structure + +A successful order trace should show: + +``` +order-service: process-order (root span) +├── order-service: check-inventory +│ └── HTTP POST +│ └── inventory-service: check-inventory +└── order-service: process-payment + └── HTTP POST + └── payment-service: process-payment +``` + +### 4. Verify Span Attributes + +Click on a span and verify these attributes are present: + +**Order Service Span:** +- `order.id` +- `customer.id` +- `order.items.count` +- `payment.method` + +**Inventory Service Span:** +- `inventory.all_available` +- `inventory.items.count` + +**Payment Service Span:** +- `payment.success` +- `payment.amount` +- `payment.transaction_id` + +## Common Issues and Solutions + +### Issue: No traces appearing + +**Solutions:** +1. Check CloudWatch Logs for Lambda errors: + ```bash + aws logs tail /aws/lambda/order-service --follow + ``` + +2. Verify ADOT Layer is attached: + ```bash + aws lambda get-function --function-name order-service \ + --query 'Configuration.Layers' + ``` + +3. Verify environment variables: + ```bash + aws lambda get-function-configuration --function-name order-service \ + --query 'Environment.Variables' + ``` + +4. Test collector connectivity (if using VPC): + - Ensure Lambda functions have VPC access + - Check security groups allow outbound traffic to collector + - Verify collector is listening on correct port + +### Issue: Trace context not propagating + +**Solutions:** +1. Verify `OTEL_PROPAGATORS=tracecontext` is set +2. Check API Gateway is passing headers through +3. Verify axios version is compatible (should be 1.13.5) + +### Issue: High cold start times + +**Solutions:** +1. Use provisioned concurrency for latency-sensitive functions +2. Increase Lambda memory (reduces cold start time) +3. Consider Lambda SnapStart if available in your region + +### Issue: Service URLs incorrect + +**Solutions:** +1. Verify API Gateway is deployed correctly +2. Check environment variables `INVENTORY_SERVICE_URL` and `PAYMENT_SERVICE_URL` +3. Ensure API Gateway stage is `Prod` + +## Performance Benchmarks + +Expected performance metrics: + +| Metric | Value | +|--------|-------| +| Cold Start | 800-1200ms | +| Warm Invocation | 50-150ms | +| Trace Export Overhead | <5ms | +| E2E Order Processing | 200-500ms | + +## Cleanup + +To remove all resources: + +```bash +# Delete the CloudFormation stack +sam delete --stack-name lambda-otel-demo + +# Stop Jaeger +docker-compose down + +# Clean build artifacts +rm -rf .aws-sam node_modules +``` + +## Next Steps + +1. **Configure Sampling**: Update `OTEL_TRACES_SAMPLER` for production +2. **Add Alerting**: Set up CloudWatch alarms for Lambda errors +3. **Monitor Costs**: Track Lambda invocations and data transfer +4. **Customize Inventory**: Modify mock inventory in `src/inventory-service/index.js` +5. **Add More Scenarios**: Create additional test cases for your use case + +## Support + +For issues or questions: +- Review [README.md](./README.md) for detailed documentation +- Check [ADOT-LAYER-CONFIG.md](./ADOT-LAYER-CONFIG.md) for configuration details +- Review [ARCHITECTURE.md](./ARCHITECTURE.md) for technical details +- Check AWS ADOT documentation: https://aws-otel.github.io/ +- OpenTelemetry documentation: https://opentelemetry.io/docs/ + +## Success Criteria + +✅ All 3 Lambda functions deployed successfully +✅ API Gateway endpoints accessible +✅ Test scenarios return expected responses +✅ Traces visible in backend UI +✅ Trace context propagates across services +✅ Span attributes contain business data +✅ Error scenarios traced correctly + +Congratulations! Your Lambda OpenTelemetry Demo is fully operational! 🎉 diff --git a/README.md b/README.md index 11f8c55..aff152d 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,476 @@ -# lambda-opentelemetry-demo -Sample lambda project demonstrating how to instrument lambda with OpenTelemetry +# Lambda OpenTelemetry Demo + +A comprehensive AWS Lambda project demonstrating how to integrate OpenTelemetry for end-to-end distributed tracing across multiple microservices **without using AWS CloudWatch or X-Ray**, using the **AWS Distro for OpenTelemetry (ADOT) Lambda Layer** for automatic instrumentation. + +## 🎯 Overview + +This project implements a realistic e-commerce order processing workflow using three Lambda functions representing distinct microservices: + +1. **Order Service** - Orchestrates the order processing workflow +2. **Inventory Service** - Validates item availability +3. **Payment Service** - Processes payments + +The services communicate with each other while propagating trace context using OpenTelemetry, enabling complete visibility into request flows across all services. + +## 🏗️ Architecture + +``` +┌─────────────────┐ +│ Order Service │ +│ (Lambda) │ +└────────┬────────┘ + │ + ├──────────────────┐ + │ │ + ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ +│ Inventory Svc │ │ Payment Svc │ +│ (Lambda) │ │ (Lambda) │ +└─────────────────┘ └─────────────────┘ + │ │ + └──────┬───────────┘ + ▼ + OpenTelemetry + Collector + │ + ▼ + Tracing Backend + (Jaeger/Zipkin/etc) +``` + +## ✨ Features + +- ✅ **AWS ADOT Lambda Layer** - Uses AWS Distro for OpenTelemetry Lambda Layer for zero-code instrumentation +- ✅ **Automatic Instrumentation** - Auto-instruments Lambda, HTTP/HTTPS, and AWS SDK without code changes +- ✅ **Distributed Tracing** - End-to-end trace propagation across Lambda functions +- ✅ **W3C Trace Context** - Standard trace context propagation using W3C format +- ✅ **OTLP Export** - Exports traces using OTLP HTTP protocol +- ✅ **Custom Spans** - Manual span creation for business logic insights +- ✅ **No CloudWatch/X-Ray** - Direct export to any OpenTelemetry-compatible backend +- ✅ **Error Scenarios** - Built-in test scenarios for: + - Out of stock items + - Payment failures (card declined, insufficient funds) + - Network errors + +## 📋 Prerequisites + +- Node.js 18.x or later +- AWS CLI configured with appropriate credentials +- AWS SAM CLI for deployment +- OpenTelemetry Collector or compatible backend (Jaeger, Zipkin, etc.) + +## 🚀 Quick Start + +### 1. Install Dependencies + +```bash +npm install +``` + +### 2. Set Up OpenTelemetry Backend + +You need an OpenTelemetry-compatible backend to receive and visualize traces. Here are some options: + +#### Option A: Jaeger (Recommended for local testing) + +```bash +# Run Jaeger all-in-one with Docker (or use docker-compose) +docker-compose up -d jaeger + +# Or manually: +docker run -d --name jaeger \ + -e COLLECTOR_OTLP_ENABLED=true \ + -p 16686:16686 \ + -p 4318:4318 \ + jaegertracing/all-in-one:latest + +# Access Jaeger UI at http://localhost:16686 +``` + +#### Option B: Zipkin + +```bash +# Run Zipkin with Docker +docker run -d --name zipkin \ + -p 9411:9411 \ + openzipkin/zipkin:latest + +# Note: You'll need an OpenTelemetry Collector to convert OTLP to Zipkin format +``` + +#### Option C: Grafana Cloud, Honeycomb, or other SaaS providers + +Configure the OTEL_EXPORTER_OTLP_ENDPOINT parameter with your provider's endpoint. + +### 3. Update Lambda Layer ARN + +**Important**: Update the `AdotLayerArn` parameter in `template.yaml` with the correct ARN for your AWS region. + +Find the latest ARN for your region here: +- https://aws-otel.github.io/docs/getting-started/lambda/lambda-js + +Example ARNs: +- **us-east-1**: `arn:aws:lambda:us-east-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` +- **us-west-2**: `arn:aws:lambda:us-west-2:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` +- **eu-west-1**: `arn:aws:lambda:eu-west-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5` + +See [ADOT-LAYER-CONFIG.md](./ADOT-LAYER-CONFIG.md) for complete list. + +### 4. Deploy to AWS + +```bash +# Build the Lambda functions +sam build + +# Deploy (follow the prompts) +sam deploy --guided + +# Note the API endpoint from the outputs +``` + +During `sam deploy --guided`, you'll be prompted to provide: +- **Stack name** (e.g., `lambda-otel-demo`) +- **AWS Region** (e.g., `us-east-1`) +- **OtelCollectorEndpoint** - Your OpenTelemetry Collector endpoint without `/v1/traces` (e.g., `http://your-collector:4318`) +- **Environment** - Deployment environment (e.g., `production`, `staging`) +- **AdotLayerArn** - AWS ADOT Lambda Layer ARN for your region +- Confirm changes before deployment + +**Note**: If you're using a collector in a VPC, ensure your Lambda functions have VPC access configured. + +### 5. Test the Services + +Use the provided test script: + +```bash +# Replace with your actual API endpoint from SAM deploy output +./test-api.sh https://your-api-id.execute-api.region.amazonaws.com/Prod +``` + +Or manually test with curl: + +```bash +# Successful order +curl -X POST https://your-api-endpoint/Prod/order \ + -H "Content-Type: application/json" \ + -d @test-payloads.json +``` + +### 6. View Traces + +Open your tracing backend UI: +- Jaeger: http://localhost:16686 +- Select service: `order-service` +- Click "Find Traces" to view the end-to-end traces + +## 🔧 AWS ADOT Lambda Layer + +This project uses the **AWS Distro for OpenTelemetry (ADOT) Lambda Layer** for automatic instrumentation. The layer provides: + +### Key Benefits + +1. **Zero-Code Instrumentation** - Auto-instruments your Lambda function without code changes +2. **No Dependency Bundling** - OpenTelemetry SDKs are provided by the layer, reducing deployment package size +3. **Automatic Context Propagation** - Trace context is automatically propagated across service calls +4. **AWS-Optimized** - Maintained and supported by AWS with regular updates + +### How It Works + +The layer works through the `AWS_LAMBDA_EXEC_WRAPPER` environment variable: + +```yaml +Environment: + Variables: + AWS_LAMBDA_EXEC_WRAPPER: /opt/otel-handler # Enables auto-instrumentation + OTEL_SERVICE_NAME: order-service # Service identifier + OTEL_TRACES_SAMPLER: AlwaysOn # Sampling strategy + OTEL_EXPORTER_OTLP_ENDPOINT: http://collector:4318 +``` + +When your Lambda function is invoked: +1. The wrapper initializes OpenTelemetry SDK +2. Auto-instrumentation is activated for HTTP, AWS SDK, and Lambda runtime +3. A root span is created for the Lambda invocation +4. Your handler executes within the trace context +5. Spans are exported to your configured OTLP endpoint + +### What Gets Instrumented Automatically + +- ✅ Lambda function invocations +- ✅ HTTP/HTTPS requests (axios, node-fetch, native http/https) +- ✅ AWS SDK v2 and v3 calls +- ✅ Database clients (when using instrumented libraries) +- ✅ Trace context propagation in headers + +### Adding Custom Instrumentation + +While the layer handles most instrumentation automatically, you can add custom spans for business logic: + +```javascript +const api = require('@opentelemetry/api'); + +exports.handler = async (event) => { + const tracer = api.trace.getTracer('my-service'); + + return tracer.startActiveSpan('custom-operation', async (span) => { + span.setAttribute('business.attribute', 'value'); + // Your logic here + span.end(); + }); +}; +``` + +For detailed configuration options, see [ADOT-LAYER-CONFIG.md](./ADOT-LAYER-CONFIG.md). + +## 📝 Test Scenarios + +### Scenario 1: Successful Order ✅ + +Tests the happy path where all services succeed. + +```json +{ + "orderId": "ORD-001", + "customerId": "CUST-123", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + } + ] +} +``` + +**Expected Trace:** +- Order Service → Inventory Service (Success) +- Order Service → Payment Service (Success) +- Order completed with HTTP 200 + +### Scenario 2: Out of Stock ⚠️ + +Tests inventory unavailability. + +```json +{ + "orderId": "ORD-002", + "customerId": "CUST-456", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-003", + "name": "Keyboard", + "quantity": 1, + "price": 79.99 + } + ] +} +``` + +**Expected Trace:** +- Order Service → Inventory Service (Out of stock detected) +- Payment Service NOT called +- Order fails with HTTP 409 + +**Out of Stock Items:** +- `item-003` - Keyboard (0 in stock) +- `item-005` - Headphones (0 in stock) + +### Scenario 3: Payment Failure 💳 + +Tests payment processing failures. + +**Card Declined:** +```json +{ + "orderId": "ORD-003", + "customerId": "CUST-789", + "paymentMethod": "4111111111111111", + "items": [{"itemId": "item-001", "quantity": 1, "price": 999.99}] +} +``` + +**Insufficient Funds:** +```json +{ + "orderId": "ORD-004", + "customerId": "CUST-999", + "paymentMethod": "4222222222222222", + "items": [{"itemId": "item-004", "quantity": 2, "price": 399.99}] +} +``` + +**Test Payment Methods:** +- `4111111111111111` - Card declined +- `4222222222222222` - Insufficient funds +- `4333333333333333` - Expired card +- Any other value - Success (with 10% random failure rate) + +**Expected Trace:** +- Order Service → Inventory Service (Success) +- Order Service → Payment Service (Payment fails) +- Order fails with HTTP 402 + +## 🔍 Understanding the Traces + +Each trace will show: + +1. **Span Attributes:** + - Order ID, Customer ID + - Item counts and details + - Payment amounts and methods + - Service names and versions + - Success/failure reasons + +2. **Span Hierarchy:** + ``` + process-order (Order Service) + ├── check-inventory (Order Service -> Inventory Service) + │ └── check-inventory (Inventory Service) + └── process-payment (Order Service -> Payment Service) + └── process-payment (Payment Service) + ``` + +3. **Trace Context Propagation:** + - Trace IDs are consistent across all services + - Parent-child relationships are maintained + - W3C traceparent headers are used + +## 🛠️ Configuration + +### Environment Variables + +Configure these in the SAM template or Lambda console: + +- `OTEL_EXPORTER_OTLP_ENDPOINT` - OpenTelemetry Collector endpoint (default: `http://localhost:4318/v1/traces`) +- `ENVIRONMENT` - Deployment environment (default: `development`) +- `INVENTORY_SERVICE_URL` - Inventory service endpoint +- `PAYMENT_SERVICE_URL` - Payment service endpoint + +### Inventory Configuration + +Modify the mock inventory in `src/inventory-service/index.js`: + +```javascript +const inventory = { + 'item-001': { name: 'Laptop', quantity: 10 }, + 'item-002': { name: 'Mouse', quantity: 50 }, + 'item-003': { name: 'Keyboard', quantity: 0 }, // Out of stock + // Add more items... +}; +``` + +### Payment Configuration + +Modify failure patterns in `src/payment-service/index.js`: + +```javascript +const FAILURE_PATTERNS = { + 'card-declined': ['4111111111111111', 'card-declined'], + 'insufficient-funds': ['4222222222222222', 'insufficient-funds'], + // Add more patterns... +}; +``` + +## 📂 Project Structure + +``` +lambda-opentelemetry-demo/ +├── src/ +│ ├── order-service/ # Order orchestration service +│ │ └── index.js +│ ├── inventory-service/ # Inventory check service +│ │ └── index.js +│ ├── payment-service/ # Payment processing service +│ │ └── index.js +│ └── shared/ # Shared utilities +│ ├── tracer.js # OpenTelemetry setup +│ └── utils.js # Common utilities +├── template.yaml # AWS SAM template +├── package.json # Node.js dependencies +├── test-payloads.json # Sample test data +├── test-api.sh # API test script +└── README.md # This file +``` + +## 🔧 Local Development + +To test locally without deploying to AWS: + +1. Start your OpenTelemetry backend (e.g., Jaeger) +2. Use AWS SAM Local: + +```bash +sam build +sam local start-api --env-vars env.json +``` + +Create `env.json`: +```json +{ + "Parameters": { + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "INVENTORY_SERVICE_URL": "http://host.docker.internal:3000/inventory", + "PAYMENT_SERVICE_URL": "http://host.docker.internal:3000/payment" + } +} +``` + +## 📊 Observability Best Practices + +This demo demonstrates several observability best practices: + +1. **Structured Logging** - Logs include trace context for correlation +2. **Semantic Attributes** - Meaningful span attributes for filtering and analysis +3. **Error Handling** - Errors are captured as span events +4. **Business Context** - Business-relevant data in spans (order IDs, amounts, etc.) +5. **Service Naming** - Clear service names for easy identification +6. **Context Propagation** - W3C standard trace context across service boundaries + +## 🔐 Security Considerations + +- This is a demo project - do not use in production without proper security hardening +- Implement proper authentication/authorization for API endpoints +- Secure OpenTelemetry Collector endpoints +- Use AWS Secrets Manager for sensitive configuration +- Enable API Gateway throttling and request validation + +## 🤝 Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. + +## 📄 License + +MIT License - See LICENSE file for details + +## 🎓 Learning Resources + +- [OpenTelemetry Documentation](https://opentelemetry.io/docs/) +- [AWS Lambda Best Practices](https://docs.aws.amazon.com/lambda/latest/dg/best-practices.html) +- [W3C Trace Context](https://www.w3.org/TR/trace-context/) +- [Distributed Tracing Guide](https://opentelemetry.io/docs/concepts/signals/traces/) + +## 🐛 Troubleshooting + +### Traces not appearing in backend + +- Verify OTEL_EXPORTER_OTLP_ENDPOINT is correctly configured +- Check Lambda logs in CloudWatch for errors +- Ensure OpenTelemetry Collector/backend is running and accessible +- Verify network connectivity between Lambda and collector + +### Service-to-service calls failing + +- Check that service URLs are correctly configured +- Verify API Gateway endpoints are deployed +- Review Lambda function logs for errors +- Check IAM permissions if using private endpoints + +### High latency or timeouts + +- Increase Lambda timeout in template.yaml +- Check OpenTelemetry Collector performance +- Consider using asynchronous export +- Review batch span processor configuration diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..7f969f4 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,37 @@ +version: '3.8' + +services: + # Jaeger all-in-one with OTLP support + jaeger: + image: jaegertracing/all-in-one:latest + container_name: jaeger-otel + environment: + - COLLECTOR_OTLP_ENABLED=true + - LOG_LEVEL=debug + ports: + - "16686:16686" # Jaeger UI + - "4318:4318" # OTLP HTTP receiver + - "4317:4317" # OTLP gRPC receiver + - "14268:14268" # Jaeger collector HTTP + - "14250:14250" # Jaeger collector gRPC + networks: + - otel-demo + + # Optional: OpenTelemetry Collector (if you want to use a separate collector) + # otel-collector: + # image: otel/opentelemetry-collector-contrib:latest + # container_name: otel-collector + # command: ["--config=/etc/otel-collector-config.yaml"] + # volumes: + # - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml + # ports: + # - "4318:4318" # OTLP HTTP receiver + # - "4317:4317" # OTLP gRPC receiver + # - "8888:8888" # Prometheus metrics + # - "13133:13133" # Health check + # networks: + # - otel-demo + +networks: + otel-demo: + driver: bridge diff --git a/env.json b/env.json new file mode 100644 index 0000000..67b70e4 --- /dev/null +++ b/env.json @@ -0,0 +1,16 @@ +{ + "OrderServiceFunction": { + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "ENVIRONMENT": "local", + "INVENTORY_SERVICE_URL": "http://127.0.0.1:3001/inventory", + "PAYMENT_SERVICE_URL": "http://127.0.0.1:3002/payment" + }, + "InventoryServiceFunction": { + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "ENVIRONMENT": "local" + }, + "PaymentServiceFunction": { + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://host.docker.internal:4318/v1/traces", + "ENVIRONMENT": "local" + } +} diff --git a/local-test.sh b/local-test.sh new file mode 100755 index 0000000..8b43c30 --- /dev/null +++ b/local-test.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Simple local testing script using SAM Local +# This script starts SAM local API and runs tests + +set -e + +echo "Starting Lambda OpenTelemetry Demo Local Test" +echo "==============================================" +echo "" + +# Check if Docker is running +if ! docker info > /dev/null 2>&1; then + echo "Error: Docker is not running. Please start Docker first." + exit 1 +fi + +# Check if Jaeger is running +if ! docker ps | grep -q jaeger-otel; then + echo "Starting Jaeger..." + docker-compose up -d jaeger + echo "Waiting for Jaeger to be ready..." + sleep 5 +else + echo "Jaeger is already running" +fi + +echo "" +echo "Jaeger UI available at: http://localhost:16686" +echo "" + +# Build the Lambda functions +echo "Building Lambda functions..." +sam build + +if [ $? -ne 0 ]; then + echo "Error: Build failed" + exit 1 +fi + +echo "" +echo "Lambda functions built successfully!" +echo "" +echo "To test the API:" +echo "1. Start SAM local API in one terminal:" +echo " sam local start-api --env-vars env.json" +echo "" +echo "2. In another terminal, run the test script:" +echo " ./test-api.sh http://127.0.0.1:3000" +echo "" +echo "3. View traces in Jaeger UI:" +echo " http://localhost:16686" +echo "" diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..eeb2106 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,295 @@ +{ + "name": "lambda-opentelemetry-demo", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "lambda-opentelemetry-demo", + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "axios": "^1.13.5" + } + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, + "node_modules/axios": { + "version": "1.13.5", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.5.tgz", + "integrity": "sha512-cz4ur7Vb0xS4/KUN0tPWe44eqxrIu31me+fbang3ijiNscE129POzipJJA6zniq2C/Z6sJCjMimjS8Lc/GAs8Q==", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.15.11", + "form-data": "^4.0.5", + "proxy-from-env": "^1.1.0" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/follow-redirects": { + "version": "1.15.11", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", + "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", + "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "license": "MIT" + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..abe61a7 --- /dev/null +++ b/package.json @@ -0,0 +1,23 @@ +{ + "name": "lambda-opentelemetry-demo", + "version": "1.0.0", + "description": "Sample AWS Lambda project demonstrating OpenTelemetry integration without CloudWatch/X-Ray", + "main": "index.js", + "scripts": { + "test": "echo \"No tests specified\" && exit 0", + "deploy": "sam deploy --guided", + "build": "sam build" + }, + "keywords": [ + "aws", + "lambda", + "opentelemetry", + "tracing", + "nodejs" + ], + "author": "", + "license": "MIT", + "dependencies": { + "axios": "^1.13.5" + } +} diff --git a/src/inventory-service/index.js b/src/inventory-service/index.js new file mode 100644 index 0000000..7cb0b00 --- /dev/null +++ b/src/inventory-service/index.js @@ -0,0 +1,143 @@ +const { withSpan, addSpanAttributes } = require('../shared/tracer'); +const { createResponse } = require('../shared/utils'); + +// Mock inventory database +const inventory = { + 'item-001': { name: 'Laptop', quantity: 10 }, + 'item-002': { name: 'Mouse', quantity: 50 }, + 'item-003': { name: 'Keyboard', quantity: 0 }, // Out of stock + 'item-004': { name: 'Monitor', quantity: 5 }, + 'item-005': { name: 'Headphones', quantity: 0 }, // Out of stock +}; + +/** + * Inventory Service Lambda Handler + * This service checks if requested items are available in inventory + * OpenTelemetry instrumentation is automatically provided by the Lambda Layer + */ +exports.handler = async (event) => { + console.log('Inventory Service received event:', JSON.stringify(event)); + + return withSpan('check-inventory', async (span) => { + try { + // Parse request body + const body = typeof event.body === 'string' ? JSON.parse(event.body) : event.body; + const { orderId, items } = body; + + // Validate input + if (!orderId || !items || !Array.isArray(items)) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Missing or invalid required fields', + }); + return createResponse(400, { + available: false, + error: 'Missing or invalid required fields: orderId, items (array)', + }); + } + + addSpanAttributes({ + 'order.id': orderId, + 'inventory.items.count': items.length, + }); + + console.log(`Checking inventory for order ${orderId}`); + + // Check each item + const unavailableItems = []; + let allAvailable = true; + + for (const item of items) { + const { itemId, quantity } = item; + + if (!itemId || !quantity) { + continue; + } + + addSpanAttributes({ + [`inventory.item.${itemId}.requested`]: quantity, + }); + + // Check if item exists in inventory + if (!inventory[itemId]) { + console.log(`Item ${itemId} not found in inventory`); + unavailableItems.push({ + itemId, + reason: 'Item not found', + }); + allAvailable = false; + addSpanAttributes({ + [`inventory.item.${itemId}.status`]: 'not-found', + }); + continue; + } + + const availableQuantity = inventory[itemId].quantity; + addSpanAttributes({ + [`inventory.item.${itemId}.available`]: availableQuantity, + }); + + // Check if sufficient quantity is available + if (availableQuantity < quantity) { + console.log(`Insufficient quantity for item ${itemId}. Requested: ${quantity}, Available: ${availableQuantity}`); + unavailableItems.push({ + itemId, + name: inventory[itemId].name, + requestedQuantity: quantity, + availableQuantity: availableQuantity, + reason: availableQuantity === 0 ? 'Out of stock' : 'Insufficient quantity', + }); + allAvailable = false; + addSpanAttributes({ + [`inventory.item.${itemId}.status`]: 'insufficient', + }); + } else { + console.log(`Item ${itemId} is available. Requested: ${quantity}, Available: ${availableQuantity}`); + addSpanAttributes({ + [`inventory.item.${itemId}.status`]: 'available', + }); + } + } + + addSpanAttributes({ + 'inventory.all_available': allAvailable, + 'inventory.unavailable_count': unavailableItems.length, + }); + + if (allAvailable) { + console.log(`All items available for order ${orderId}`); + + return createResponse(200, { + available: true, + orderId, + message: 'All items are available', + }); + } else { + console.log(`Some items unavailable for order ${orderId}:`, unavailableItems); + addSpanAttributes({ + 'inventory.failure_reason': 'items-unavailable', + }); + + return createResponse(200, { + available: false, + orderId, + unavailableItems, + message: 'Some items are not available', + }); + } + + } catch (error) { + console.error('Inventory check error:', error); + addSpanAttributes({ + 'error': true, + 'error.message': error.message, + }); + + return createResponse(500, { + available: false, + error: 'Internal server error', + details: error.message, + }); + } + }); +}; diff --git a/src/order-service/index.js b/src/order-service/index.js new file mode 100644 index 0000000..7d05578 --- /dev/null +++ b/src/order-service/index.js @@ -0,0 +1,181 @@ +const { withSpan, addSpanAttributes } = require('../shared/tracer'); +const { callService, createResponse } = require('../shared/utils'); + +/** + * Order Service Lambda Handler + * This service accepts orders and orchestrates calls to Inventory and Payment services + * OpenTelemetry instrumentation is automatically provided by the Lambda Layer + */ +exports.handler = async (event) => { + console.log('Order Service received event:', JSON.stringify(event)); + + return withSpan('process-order', async (span) => { + try { + // Parse request body + const body = typeof event.body === 'string' ? JSON.parse(event.body) : event.body; + const { orderId, customerId, items, paymentMethod } = body; + + // Validate input + if (!orderId || !customerId || !items || !paymentMethod) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Missing required fields', + }); + return createResponse(400, { + success: false, + error: 'Missing required fields: orderId, customerId, items, paymentMethod', + }); + } + + // Add order details to span + addSpanAttributes({ + 'order.id': orderId, + 'customer.id': customerId, + 'order.items.count': items.length, + 'payment.method': paymentMethod, + }); + + console.log(`Processing order ${orderId} for customer ${customerId}`); + + // Step 1: Check inventory + console.log('Step 1: Checking inventory'); + const inventoryUrl = process.env.INVENTORY_SERVICE_URL || 'http://localhost:3001/inventory'; + + let inventoryResult; + try { + inventoryResult = await withSpan('check-inventory', async (inventorySpan) => { + addSpanAttributes({ + 'service': 'inventory', + 'order.id': orderId, + }); + + const result = await callService(inventoryUrl, { + orderId, + items, + }); + + addSpanAttributes({ + 'inventory.available': result.available, + }); + + return result; + }); + } catch (error) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Inventory check failed', + }); + return createResponse(500, { + success: false, + orderId, + error: 'Inventory check failed', + details: error.message, + }); + } + + if (!inventoryResult.available) { + addSpanAttributes({ + 'inventory.status': 'out-of-stock', + 'inventory.unavailable_items': JSON.stringify(inventoryResult.unavailableItems || []), + }); + + console.log(`Order ${orderId} failed: Items out of stock`); + return createResponse(409, { + success: false, + orderId, + error: 'Items out of stock', + unavailableItems: inventoryResult.unavailableItems, + }); + } + + addSpanAttributes({ 'inventory.status': 'available' }); + + // Step 2: Process payment + console.log('Step 2: Processing payment'); + const paymentUrl = process.env.PAYMENT_SERVICE_URL || 'http://localhost:3002/payment'; + + let paymentResult; + try { + paymentResult = await withSpan('process-payment', async (paymentSpan) => { + const totalAmount = items.reduce((sum, item) => sum + (item.price * item.quantity), 0); + + addSpanAttributes({ + 'service': 'payment', + 'order.id': orderId, + 'payment.method': paymentMethod, + 'payment.amount': totalAmount, + }); + + const result = await callService(paymentUrl, { + orderId, + customerId, + amount: totalAmount, + paymentMethod, + }); + + addSpanAttributes({ + 'payment.success': result.success, + 'payment.transaction_id': result.transactionId || 'none', + }); + + return result; + }); + } catch (error) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Payment processing failed', + }); + return createResponse(500, { + success: false, + orderId, + error: 'Payment processing failed', + details: error.message, + }); + } + + if (!paymentResult.success) { + addSpanAttributes({ + 'payment.status': 'failed', + 'payment.failure_reason': paymentResult.reason || 'unknown', + }); + + console.log(`Order ${orderId} failed: Payment failed`); + return createResponse(402, { + success: false, + orderId, + error: 'Payment failed', + reason: paymentResult.reason, + }); + } + + addSpanAttributes({ + 'payment.status': 'success', + 'payment.transaction_id': paymentResult.transactionId, + }); + + // Order successful + console.log(`Order ${orderId} completed successfully`); + + return createResponse(200, { + success: true, + orderId, + message: 'Order processed successfully', + transactionId: paymentResult.transactionId, + totalAmount: items.reduce((sum, item) => sum + (item.price * item.quantity), 0), + }); + + } catch (error) { + console.error('Order processing error:', error); + addSpanAttributes({ + 'error': true, + 'error.message': error.message, + }); + + return createResponse(500, { + success: false, + error: 'Internal server error', + details: error.message, + }); + } + }); +}; diff --git a/src/payment-service/index.js b/src/payment-service/index.js new file mode 100644 index 0000000..6a88ea6 --- /dev/null +++ b/src/payment-service/index.js @@ -0,0 +1,144 @@ +const { withSpan, addSpanAttributes } = require('../shared/tracer'); +const { createResponse } = require('../shared/utils'); + +// Mock payment processing +const FAILURE_PATTERNS = { + 'card-declined': ['4111111111111111', 'card-declined'], + 'insufficient-funds': ['4222222222222222', 'insufficient-funds'], + 'expired-card': ['4333333333333333', 'expired-card'], +}; + +/** + * Simulate payment processing with some failure scenarios + * @param {string} paymentMethod - Payment method identifier + * @param {number} amount - Payment amount + * @returns {Object} Payment result + */ +function processPayment(paymentMethod, amount) { + // Check for known failure patterns + for (const [reason, patterns] of Object.entries(FAILURE_PATTERNS)) { + if (patterns.some(pattern => paymentMethod.includes(pattern))) { + return { + success: false, + reason: reason.replace('-', ' '), + transactionId: null, + }; + } + } + + // Simulate random failures (10% chance) + if (Math.random() < 0.1) { + const reasons = ['network-timeout', 'gateway-error', 'rate-limit-exceeded']; + const randomReason = reasons[Math.floor(Math.random() * reasons.length)]; + return { + success: false, + reason: randomReason.replace('-', ' '), + transactionId: null, + }; + } + + // Successful payment + return { + success: true, + transactionId: `txn-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`, + }; +} + +/** + * Payment Service Lambda Handler + * This service processes payments with failure scenarios for testing + * OpenTelemetry instrumentation is automatically provided by the Lambda Layer + */ +exports.handler = async (event) => { + console.log('Payment Service received event:', JSON.stringify(event)); + + return withSpan('process-payment', async (span) => { + try { + // Parse request body + const body = typeof event.body === 'string' ? JSON.parse(event.body) : event.body; + const { orderId, customerId, amount, paymentMethod } = body; + + // Validate input + if (!orderId || !customerId || !amount || !paymentMethod) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Missing required fields', + }); + return createResponse(400, { + success: false, + error: 'Missing required fields: orderId, customerId, amount, paymentMethod', + }); + } + + addSpanAttributes({ + 'order.id': orderId, + 'customer.id': customerId, + 'payment.amount': amount, + 'payment.method': paymentMethod, + }); + + console.log(`Processing payment for order ${orderId}, amount: ${amount}`); + + // Validate amount + if (amount <= 0) { + addSpanAttributes({ + 'error': true, + 'error.message': 'Invalid payment amount', + }); + return createResponse(400, { + success: false, + error: 'Invalid payment amount', + }); + } + + // Process payment + const paymentResult = processPayment(paymentMethod, amount); + + addSpanAttributes({ + 'payment.success': paymentResult.success, + }); + + if (!paymentResult.success) { + addSpanAttributes({ + 'payment.failure_reason': paymentResult.reason, + }); + + console.log(`Payment failed for order ${orderId}: ${paymentResult.reason}`); + + return createResponse(200, { + success: false, + orderId, + reason: paymentResult.reason, + message: 'Payment processing failed', + }); + } + + addSpanAttributes({ + 'payment.transaction_id': paymentResult.transactionId, + }); + + console.log(`Payment successful for order ${orderId}. Transaction ID: ${paymentResult.transactionId}`); + + return createResponse(200, { + success: true, + orderId, + transactionId: paymentResult.transactionId, + amount, + message: 'Payment processed successfully', + }); + + } catch (error) { + console.error('Payment processing error:', error); + addSpanAttributes({ + 'error': true, + 'error.message': error.message, + }); + + return createResponse(500, { + success: false, + error: 'Internal server error', + details: error.message, + }); + } + }); +}; diff --git a/src/shared/tracer.js b/src/shared/tracer.js new file mode 100644 index 0000000..3cedef5 --- /dev/null +++ b/src/shared/tracer.js @@ -0,0 +1,118 @@ +// When using OpenTelemetry Lambda Layer with auto-instrumentation, +// the @opentelemetry/api is provided by the layer at /opt/nodejs/node_modules +const api = require('@opentelemetry/api'); + +/** + * Get the current active span + * The Lambda Layer automatically creates spans for Lambda invocations + * @returns {Span} Current active span + */ +function getCurrentSpan() { + return api.trace.getActiveSpan(); +} + +/** + * Get a tracer instance + * @param {string} name - Tracer name (usually service name) + * @param {string} version - Service version + * @returns {Tracer} OpenTelemetry tracer instance + */ +function getTracer(name = 'lambda-app', version = '1.0.0') { + return api.trace.getTracer(name, version); +} + +/** + * Create a new span for a specific operation + * @param {string} spanName - Name of the span + * @param {Function} fn - Function to execute within the span + * @param {Object} attributes - Optional span attributes + * @returns {Promise} Result of the function execution + */ +async function withSpan(spanName, fn, attributes = {}) { + const tracer = getTracer(); + return tracer.startActiveSpan(spanName, async (span) => { + try { + // Add custom attributes + Object.entries(attributes).forEach(([key, value]) => { + span.setAttribute(key, value); + }); + + const result = await fn(span); + span.setStatus({ code: api.SpanStatusCode.OK }); + return result; + } catch (error) { + span.setStatus({ + code: api.SpanStatusCode.ERROR, + message: error.message, + }); + span.recordException(error); + throw error; + } finally { + span.end(); + } + }); +} + +/** + * Add custom attributes to the current span + * @param {Object} attributes - Key-value pairs of attributes to add + */ +function addSpanAttributes(attributes) { + const span = getCurrentSpan(); + if (span) { + Object.entries(attributes).forEach(([key, value]) => { + if (value !== null && value !== undefined) { + span.setAttribute(key, value); + } + }); + } +} + +/** + * Record an exception on the current span + * @param {Error} error - Error to record + */ +function recordException(error) { + const span = getCurrentSpan(); + if (span) { + span.recordException(error); + span.setStatus({ + code: api.SpanStatusCode.ERROR, + message: error.message, + }); + } +} + +/** + * Inject trace context into headers for downstream service calls + * Uses W3C Trace Context propagation + * @returns {Object} Headers with trace context + */ +function injectTraceContext() { + const headers = {}; + const span = getCurrentSpan(); + + if (span) { + const spanContext = span.spanContext(); + if (spanContext && spanContext.traceId && spanContext.spanId) { + // W3C Trace Context format + const traceFlags = spanContext.traceFlags || 0; + headers.traceparent = `00-${spanContext.traceId}-${spanContext.spanId}-${traceFlags.toString(16).padStart(2, '0')}`; + + if (spanContext.traceState) { + headers.tracestate = spanContext.traceState.serialize(); + } + } + } + + return headers; +} + +module.exports = { + getTracer, + getCurrentSpan, + withSpan, + addSpanAttributes, + recordException, + injectTraceContext, +}; diff --git a/src/shared/utils.js b/src/shared/utils.js new file mode 100644 index 0000000..2da73e3 --- /dev/null +++ b/src/shared/utils.js @@ -0,0 +1,61 @@ +const axios = require('axios'); +const { injectTraceContext } = require('./tracer'); + +/** + * Make an HTTP request to another Lambda service with trace context propagation + * @param {string} url - Service URL + * @param {Object} data - Request payload + * @param {string} method - HTTP method (default: POST) + * @returns {Promise} Response data + */ +async function callService(url, data, method = 'POST') { + // Inject trace context into headers + const headers = { + 'Content-Type': 'application/json', + ...injectTraceContext(), + }; + + console.log(`Calling service: ${url} with method: ${method}`); + console.log(`Trace context headers:`, headers); + + try { + const response = await axios({ + method, + url, + data, + headers, + }); + + console.log(`Service call successful: ${url}`); + return response.data; + } catch (error) { + console.error(`Service call failed: ${url}`, error.message); + if (error.response) { + throw new Error(`Service call failed: ${error.response.status} - ${JSON.stringify(error.response.data)}`); + } + throw error; + } +} + +/** + * Create a Lambda response object + * @param {number} statusCode - HTTP status code + * @param {Object} body - Response body + * @param {Object} headers - Additional headers + * @returns {Object} Lambda response object + */ +function createResponse(statusCode, body, headers = {}) { + return { + statusCode, + headers: { + 'Content-Type': 'application/json', + ...headers, + }, + body: JSON.stringify(body), + }; +} + +module.exports = { + callService, + createResponse, +}; diff --git a/template.yaml b/template.yaml new file mode 100644 index 0000000..bc11944 --- /dev/null +++ b/template.yaml @@ -0,0 +1,137 @@ +AWSTemplateFormatVersion: '2010-09-09' +Transform: AWS::Serverless-2016-10-31 +Description: Lambda OpenTelemetry Demo - Order, Inventory, and Payment Services with AWS ADOT Layer + +Globals: + Function: + Timeout: 30 + MemorySize: 512 + Runtime: nodejs20.x + Environment: + Variables: + AWS_LAMBDA_EXEC_WRAPPER: /opt/otel-handler + OTEL_SERVICE_NAME: will-be-overridden + OTEL_TRACES_SAMPLER: AlwaysOn + OTEL_EXPORTER_OTLP_PROTOCOL: http/protobuf + OTEL_EXPORTER_OTLP_ENDPOINT: !Ref OtelCollectorEndpoint + OTEL_PROPAGATORS: tracecontext + OTEL_RESOURCE_ATTRIBUTES: !Sub deployment.environment=${Environment} + +Parameters: + OtelCollectorEndpoint: + Type: String + Default: 'http://localhost:4318' + Description: OpenTelemetry Collector endpoint URL (without /v1/traces path) + + Environment: + Type: String + Default: 'production' + Description: Deployment environment (e.g., development, staging, production) + + AdotLayerArn: + Type: String + Default: 'arn:aws:lambda:us-east-1:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:5' + Description: | + ARN of the AWS Distro for OpenTelemetry (ADOT) Lambda Layer for Node.js. + Find the latest ARN for your region at: + https://aws-otel.github.io/docs/getting-started/lambda/lambda-js + +Resources: + # Order Service Function + OrderServiceFunction: + Type: AWS::Serverless::Function + Metadata: + BuildMethod: nodejs20.x + Properties: + FunctionName: order-service + CodeUri: ./ + Handler: src/order-service/index.handler + Description: Order Service - Orchestrates order processing + Layers: + - !Ref AdotLayerArn + Environment: + Variables: + OTEL_SERVICE_NAME: order-service + INVENTORY_SERVICE_URL: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/inventory' + PAYMENT_SERVICE_URL: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/payment' + Events: + OrderApi: + Type: Api + Properties: + Path: /order + Method: post + RestApiId: !Ref ApiGateway + + # Inventory Service Function + InventoryServiceFunction: + Type: AWS::Serverless::Function + Metadata: + BuildMethod: nodejs20.x + Properties: + FunctionName: inventory-service + CodeUri: ./ + Handler: src/inventory-service/index.handler + Description: Inventory Service - Checks item availability + Layers: + - !Ref AdotLayerArn + Environment: + Variables: + OTEL_SERVICE_NAME: inventory-service + Events: + InventoryApi: + Type: Api + Properties: + Path: /inventory + Method: post + RestApiId: !Ref ApiGateway + + # Payment Service Function + PaymentServiceFunction: + Type: AWS::Serverless::Function + Metadata: + BuildMethod: nodejs20.x + Properties: + FunctionName: payment-service + CodeUri: ./ + Handler: src/payment-service/index.handler + Description: Payment Service - Processes payments + Layers: + - !Ref AdotLayerArn + Environment: + Variables: + OTEL_SERVICE_NAME: payment-service + Events: + PaymentApi: + Type: Api + Properties: + Path: /payment + Method: post + RestApiId: !Ref ApiGateway + + # API Gateway + ApiGateway: + Type: AWS::Serverless::Api + Properties: + StageName: Prod + Description: API Gateway for Lambda OpenTelemetry Demo + +Outputs: + ApiEndpoint: + Description: API Gateway endpoint URL + Value: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod' + + OrderServiceUrl: + Description: Order Service endpoint + Value: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/order' + + InventoryServiceUrl: + Description: Inventory Service endpoint + Value: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/inventory' + + PaymentServiceUrl: + Description: Payment Service endpoint + Value: !Sub 'https://${ApiGateway}.execute-api.${AWS::Region}.amazonaws.com/Prod/payment' + + AdotLayerVersion: + Description: ADOT Lambda Layer ARN being used + Value: !Ref AdotLayerArn diff --git a/test-api.sh b/test-api.sh new file mode 100755 index 0000000..680b864 --- /dev/null +++ b/test-api.sh @@ -0,0 +1,130 @@ +#!/bin/bash + +# Color codes for output +GREEN='\033[0;32m' +BLUE='\033[0;34m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +echo -e "${BLUE}========================================${NC}" +echo -e "${BLUE}Lambda OpenTelemetry Demo - Test Script${NC}" +echo -e "${BLUE}========================================${NC}" +echo "" + +# Check if API endpoint is provided +if [ -z "$1" ]; then + echo -e "${RED}Error: API endpoint is required${NC}" + echo "Usage: ./test-api.sh " + echo "Example: ./test-api.sh https://abc123.execute-api.us-east-1.amazonaws.com/Prod" + exit 1 +fi + +API_ENDPOINT=$1 +ORDER_URL="${API_ENDPOINT}/order" + +echo -e "${YELLOW}Using API Endpoint: ${API_ENDPOINT}${NC}" +echo "" + +# Test 1: Successful Order +echo -e "${BLUE}Test 1: Successful Order${NC}" +echo "Testing with items that are in stock and valid payment method..." +curl -X POST ${ORDER_URL} \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-001", + "customerId": "CUST-123", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + }, + { + "itemId": "item-002", + "name": "Mouse", + "quantity": 2, + "price": 29.99 + } + ] + }' +echo -e "\n" + +sleep 2 + +# Test 2: Out of Stock Order +echo -e "${BLUE}Test 2: Out of Stock Order${NC}" +echo "Testing with items that are out of stock..." +curl -X POST ${ORDER_URL} \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-002", + "customerId": "CUST-456", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-003", + "name": "Keyboard", + "quantity": 1, + "price": 79.99 + }, + { + "itemId": "item-005", + "name": "Headphones", + "quantity": 1, + "price": 149.99 + } + ] + }' +echo -e "\n" + +sleep 2 + +# Test 3: Payment Failure (Card Declined) +echo -e "${BLUE}Test 3: Payment Failure - Card Declined${NC}" +echo "Testing with a payment method that will be declined..." +curl -X POST ${ORDER_URL} \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-003", + "customerId": "CUST-789", + "paymentMethod": "4111111111111111", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + } + ] + }' +echo -e "\n" + +sleep 2 + +# Test 4: Payment Failure (Insufficient Funds) +echo -e "${BLUE}Test 4: Payment Failure - Insufficient Funds${NC}" +echo "Testing with insufficient funds scenario..." +curl -X POST ${ORDER_URL} \ + -H "Content-Type: application/json" \ + -d '{ + "orderId": "ORD-004", + "customerId": "CUST-999", + "paymentMethod": "4222222222222222", + "items": [ + { + "itemId": "item-004", + "name": "Monitor", + "quantity": 2, + "price": 399.99 + } + ] + }' +echo -e "\n" + +echo -e "${GREEN}========================================${NC}" +echo -e "${GREEN}All tests completed!${NC}" +echo -e "${GREEN}Check your OpenTelemetry backend to view the traces${NC}" +echo -e "${GREEN}========================================${NC}" diff --git a/test-payloads.json b/test-payloads.json new file mode 100644 index 0000000..603d30c --- /dev/null +++ b/test-payloads.json @@ -0,0 +1,66 @@ +{ + "successfulOrder": { + "orderId": "ORD-001", + "customerId": "CUST-123", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + }, + { + "itemId": "item-002", + "name": "Mouse", + "quantity": 2, + "price": 29.99 + } + ] + }, + "outOfStockOrder": { + "orderId": "ORD-002", + "customerId": "CUST-456", + "paymentMethod": "credit-card-5555", + "items": [ + { + "itemId": "item-003", + "name": "Keyboard", + "quantity": 1, + "price": 79.99 + }, + { + "itemId": "item-005", + "name": "Headphones", + "quantity": 1, + "price": 149.99 + } + ] + }, + "paymentFailureOrder": { + "orderId": "ORD-003", + "customerId": "CUST-789", + "paymentMethod": "4111111111111111", + "items": [ + { + "itemId": "item-001", + "name": "Laptop", + "quantity": 1, + "price": 999.99 + } + ] + }, + "insufficientFundsOrder": { + "orderId": "ORD-004", + "customerId": "CUST-999", + "paymentMethod": "4222222222222222", + "items": [ + { + "itemId": "item-004", + "name": "Monitor", + "quantity": 2, + "price": 399.99 + } + ] + } +}