diff --git a/CHANGELOG.md b/CHANGELOG.md index 24dc539f7ff..bac4615b267 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ - Use `gen_ai.function_id` as a fallback for `gen_ai.agent.name`. ([#5776](https://github.com/getsentry/relay/pull/5776)) - Add `gen_ai.input.messages` and `gen_ai.output.messages` as distinct fields for SpanData. ([#5797](https://github.com/getsentry/relay/pull/5797)) - Extract `http.query` and `url.query` attributes from `query_string` in transactions' request context. ([#5784](https://github.com/getsentry/relay/pull/5784)) +- Add `ModelMetadata` config with context size and utilization. Compute `gen_ai.context.window_size` and `gen_ai.context.utilization` on AI spans. ([#5814](https://github.com/getsentry/relay/pull/5814)) **Internal**: diff --git a/relay-cabi/src/processing.rs b/relay-cabi/src/processing.rs index df3050accc0..2b256f6594f 100644 --- a/relay-cabi/src/processing.rs +++ b/relay-cabi/src/processing.rs @@ -267,8 +267,8 @@ pub unsafe extern "C" fn relay_store_normalizer_normalize_event( max_tag_value_length: usize::MAX, span_description_rules: None, performance_score: None, - geoip_lookup: None, // only supported in relay - ai_model_costs: None, // only supported in relay + geoip_lookup: None, // only supported in relay + ai_model_metadata: None, // only supported in relay enable_trimming: config.enable_trimming.unwrap_or_default(), measurements: None, normalize_spans: config.normalize_spans, diff --git a/relay-conventions/src/consts.rs b/relay-conventions/src/consts.rs index 575d967101b..06941a8fbe0 100644 --- a/relay-conventions/src/consts.rs +++ b/relay-conventions/src/consts.rs @@ -34,6 +34,8 @@ convention_attributes!( ENVIRONMENT => "sentry.environment", EVENT_NAME => "event.name", FAAS_TRIGGER => "faas.trigger", + GEN_AI_CONTEXT_UTILIZATION => "gen_ai.context.utilization", + GEN_AI_CONTEXT_WINDOW_SIZE => "gen_ai.context.window_size", GEN_AI_COST_INPUT_TOKENS => "gen_ai.cost.input_tokens", GEN_AI_COST_OUTPUT_TOKENS => "gen_ai.cost.output_tokens", GEN_AI_COST_TOTAL_TOKENS => "gen_ai.cost.total_tokens", diff --git a/relay-dynamic-config/src/global.rs b/relay-dynamic-config/src/global.rs index 275d6c5b917..95f6a24c9d9 100644 --- a/relay-dynamic-config/src/global.rs +++ b/relay-dynamic-config/src/global.rs @@ -4,7 +4,9 @@ use std::io::BufReader; use std::path::Path; use relay_base_schema::metrics::MetricNamespace; -use relay_event_normalization::{MeasurementsConfig, ModelCosts, SpanOpDefaults}; +use relay_event_normalization::{ + MeasurementsConfig, ModelCosts, ModelMetadata, ModelMetadataEntry, SpanOpDefaults, +}; use relay_filter::GenericFiltersConfig; use relay_quotas::Quota; use serde::{Deserialize, Serialize, de}; @@ -49,6 +51,10 @@ pub struct GlobalConfig { #[serde(skip_serializing_if = "is_model_costs_empty")] pub ai_model_costs: ErrorBoundary, + /// Metadata for AI models including costs and context size. + #[serde(skip_serializing_if = "is_model_metadata_empty")] + pub ai_model_metadata: ErrorBoundary, + /// Configuration to derive the `span.op` from other span fields. #[serde( deserialize_with = "default_on_error", @@ -73,6 +79,41 @@ impl GlobalConfig { } } + /// Returns [`ModelMetadata`], preferring `ai_model_metadata` if present and falling + /// back to `ai_model_costs` (adapted to the new format) otherwise. + pub fn model_metadata(&self) -> Option { + if let Some(metadata) = self + .ai_model_metadata + .as_ref() + .ok() + .filter(|m| m.is_enabled()) + { + return Some(metadata.clone()); + } + + let costs = self + .ai_model_costs + .as_ref() + .ok() + .filter(|c| c.is_enabled())?; + + let models = costs + .models + .iter() + .map(|(pattern, cost)| { + ( + pattern.clone(), + ModelMetadataEntry { + costs: Some(*cost), + context_size: None, + }, + ) + }) + .collect(); + + Some(ModelMetadata { version: 1, models }) + } + /// Returns the generic inbound filters. pub fn filters(&self) -> Option<&GenericFiltersConfig> { match &self.filters { @@ -335,6 +376,10 @@ fn is_model_costs_empty(value: &ErrorBoundary) -> bool { matches!(value, ErrorBoundary::Ok(model_costs) if model_costs.is_empty()) } +fn is_model_metadata_empty(value: &ErrorBoundary) -> bool { + matches!(value, ErrorBoundary::Ok(metadata) if metadata.is_empty()) +} + #[cfg(test)] mod tests { use super::*; diff --git a/relay-event-normalization/src/eap/ai.rs b/relay-event-normalization/src/eap/ai.rs index 0c8f2cef294..3c21ffc0dfd 100644 --- a/relay-event-normalization/src/eap/ai.rs +++ b/relay-event-normalization/src/eap/ai.rs @@ -4,7 +4,7 @@ use relay_conventions::consts::*; use relay_event_schema::protocol::Attributes; use relay_protocol::Annotated; -use crate::ModelCosts; +use crate::ModelMetadata; use crate::span::ai; use crate::statsd::{Counters, map_origin_to_integration, platform_tag}; @@ -22,7 +22,7 @@ use crate::statsd::{Counters, map_origin_to_integration, platform_tag}; pub fn normalize_ai( attributes: &mut Annotated, duration: Option, - costs: Option<&ModelCosts>, + model_metadata: Option<&ModelMetadata>, ) { let Some(attributes) = attributes.value_mut() else { return; @@ -38,7 +38,8 @@ pub fn normalize_ai( normalize_ai_type(attributes); normalize_total_tokens(attributes); normalize_tokens_per_second(attributes, duration); - normalize_ai_costs(attributes, costs); + normalize_context_utilization(attributes, model_metadata); + normalize_ai_costs(attributes, model_metadata); } /// Returns whether the item is should have AI normalizations applied. @@ -124,8 +125,37 @@ fn normalize_tokens_per_second(attributes: &mut Attributes, duration: Option, +) { + let model_id = attributes + .get_value(GEN_AI_RESPONSE_MODEL) + .and_then(|v| v.as_str()); + + let context_size = model_id.and_then(|id| model_metadata.and_then(|m| m.context_size(id))); + + let Some(context_size) = context_size else { + return; + }; + + attributes.insert(GEN_AI_CONTEXT_WINDOW_SIZE, context_size as i64); + + let total_tokens = attributes + .get_value(GEN_AI_USAGE_TOTAL_TOKENS) + .and_then(|v| v.as_f64()); + + if let Some(total_tokens) = total_tokens { + attributes.insert( + GEN_AI_CONTEXT_UTILIZATION, + total_tokens / context_size as f64, + ); + } +} + /// Calculates model costs and serializes them into attributes. -fn normalize_ai_costs(attributes: &mut Attributes, model_costs: Option<&ModelCosts>) { +fn normalize_ai_costs(attributes: &mut Attributes, model_metadata: Option<&ModelMetadata>) { let origin = extract_string_value(attributes, ORIGIN); let platform = extract_string_value(attributes, PLATFORM); @@ -145,7 +175,7 @@ fn normalize_ai_costs(attributes: &mut Attributes, model_costs: Option<&ModelCos return; }; - let Some(model_cost) = model_costs.and_then(|c| c.cost_per_token(model_id)) else { + let Some(model_cost) = model_metadata.and_then(|m| m.cost_per_token(model_id)) else { relay_statsd::metric!( counter(Counters::GenAiCostCalculationResult) += 1, result = "calculation_no_model_cost_available", @@ -191,7 +221,7 @@ mod tests { use relay_pattern::Pattern; use relay_protocol::{Empty, assert_annotated_snapshot}; - use crate::ModelCostV2; + use crate::{ModelCostV2, ModelMetadataEntry}; use super::*; @@ -203,34 +233,59 @@ mod tests { }; } - fn model_costs() -> ModelCosts { - ModelCosts { - version: 2, + fn model_metadata() -> ModelMetadata { + ModelMetadata { + version: 1, models: HashMap::from([ ( Pattern::new("claude-2.1").unwrap(), - ModelCostV2 { - input_per_token: 0.01, - output_per_token: 0.02, - output_reasoning_per_token: 0.03, - input_cached_per_token: 0.04, - input_cache_write_per_token: 0.0, + ModelMetadataEntry { + costs: Some(ModelCostV2 { + input_per_token: 0.01, + output_per_token: 0.02, + output_reasoning_per_token: 0.03, + input_cached_per_token: 0.04, + input_cache_write_per_token: 0.0, + }), + context_size: None, }, ), ( Pattern::new("gpt4-21-04").unwrap(), - ModelCostV2 { - input_per_token: 0.09, - output_per_token: 0.05, - output_reasoning_per_token: 0.0, - input_cached_per_token: 0.0, - input_cache_write_per_token: 0.0, + ModelMetadataEntry { + costs: Some(ModelCostV2 { + input_per_token: 0.09, + output_per_token: 0.05, + output_reasoning_per_token: 0.0, + input_cached_per_token: 0.0, + input_cache_write_per_token: 0.0, + }), + context_size: None, }, ), ]), } } + fn model_metadata_with_context_size() -> ModelMetadata { + ModelMetadata { + version: 1, + models: HashMap::from([( + Pattern::new("claude-2.1").unwrap(), + ModelMetadataEntry { + costs: Some(ModelCostV2 { + input_per_token: 0.01, + output_per_token: 0.02, + output_reasoning_per_token: 0.03, + input_cached_per_token: 0.04, + input_cache_write_per_token: 0.0, + }), + context_size: Some(100_000), + }, + )]), + } + } + #[test] fn test_normalize_ai_all_tokens() { let mut attributes = Annotated::new(attributes! { @@ -245,7 +300,7 @@ mod tests { normalize_ai( &mut attributes, Some(Duration::from_secs(1)), - Some(&model_costs()), + Some(&model_metadata()), ); assert_annotated_snapshot!(attributes, @r#" @@ -314,7 +369,7 @@ mod tests { normalize_ai( &mut attributes, Some(Duration::from_millis(500)), - Some(&model_costs()), + Some(&model_metadata()), ); assert_annotated_snapshot!(attributes, @r#" @@ -372,7 +427,11 @@ mod tests { "gen_ai.request.model" => "unknown".to_owned(), }); - normalize_ai(&mut attributes, Some(Duration::ZERO), Some(&model_costs())); + normalize_ai( + &mut attributes, + Some(Duration::ZERO), + Some(&model_metadata()), + ); assert_annotated_snapshot!(attributes, @r#" { @@ -419,7 +478,7 @@ mod tests { normalize_ai( &mut attributes, Some(Duration::from_millis(500)), - Some(&model_costs()), + Some(&model_metadata()), ); assert_annotated_snapshot!(attributes, @r#" @@ -488,7 +547,7 @@ mod tests { normalize_ai( &mut attributes, Some(Duration::from_millis(500)), - Some(&model_costs()), + Some(&model_metadata()), ); assert_annotated_snapshot!(attributes, @r#" @@ -547,7 +606,7 @@ mod tests { normalize_ai( &mut attributes, Some(Duration::from_millis(500)), - Some(&model_costs()), + Some(&model_metadata()), ); assert_annotated_snapshot!(&mut attributes, @r#" @@ -573,7 +632,7 @@ mod tests { normalize_ai( &mut attributes, Some(Duration::from_millis(500)), - Some(&model_costs()), + Some(&model_metadata()), ); assert_annotated_snapshot!(&mut attributes, @r#" @@ -593,9 +652,146 @@ mod tests { normalize_ai( &mut attributes, Some(Duration::from_millis(500)), - Some(&model_costs()), + Some(&model_metadata()), ); assert!(attributes.is_empty()); } + + #[test] + fn test_context_utilization_with_total_tokens() { + let mut attributes = Annotated::new(attributes! { + "gen_ai.operation.type" => "ai_client".to_owned(), + "gen_ai.usage.input_tokens" => 30000, + "gen_ai.usage.output_tokens" => 12000, + "gen_ai.request.model" => "claude-2.1".to_owned(), + }); + + normalize_ai( + &mut attributes, + Some(Duration::from_secs(1)), + Some(&model_metadata_with_context_size()), + ); + + assert_annotated_snapshot!(attributes, @r#" + { + "gen_ai.context.utilization": { + "type": "double", + "value": 0.42 + }, + "gen_ai.context.window_size": { + "type": "integer", + "value": 100000 + }, + "gen_ai.cost.input_tokens": { + "type": "double", + "value": 300.0 + }, + "gen_ai.cost.output_tokens": { + "type": "double", + "value": 240.0 + }, + "gen_ai.cost.total_tokens": { + "type": "double", + "value": 540.0 + }, + "gen_ai.operation.type": { + "type": "string", + "value": "ai_client" + }, + "gen_ai.request.model": { + "type": "string", + "value": "claude-2.1" + }, + "gen_ai.response.model": { + "type": "string", + "value": "claude-2.1" + }, + "gen_ai.response.tokens_per_second": { + "type": "double", + "value": 12000.0 + }, + "gen_ai.usage.input_tokens": { + "type": "integer", + "value": 30000 + }, + "gen_ai.usage.output_tokens": { + "type": "integer", + "value": 12000 + }, + "gen_ai.usage.total_tokens": { + "type": "double", + "value": 42000.0 + } + } + "#); + } + + #[test] + fn test_context_utilization_no_context_size() { + let mut attributes = Annotated::new(attributes! { + "gen_ai.operation.type" => "ai_client".to_owned(), + "gen_ai.usage.input_tokens" => 1000, + "gen_ai.usage.output_tokens" => 2000, + "gen_ai.request.model" => "claude-2.1".to_owned(), + }); + + // model_metadata() has no context_size set. + normalize_ai( + &mut attributes, + Some(Duration::from_secs(1)), + Some(&model_metadata()), + ); + + let attrs = attributes.value().unwrap(); + assert!(attrs.get_value("gen_ai.context.window_size").is_none()); + assert!(attrs.get_value("gen_ai.context.utilization").is_none()); + } + + #[test] + fn test_context_utilization_no_total_tokens() { + // Only context_size is available, but no token counts at all. + let mut attributes = Annotated::new(attributes! { + "gen_ai.operation.type" => "ai_client".to_owned(), + "gen_ai.request.model" => "claude-2.1".to_owned(), + }); + + normalize_ai( + &mut attributes, + Some(Duration::from_secs(1)), + Some(&model_metadata_with_context_size()), + ); + + let attrs = attributes.value().unwrap(); + // window_size should still be set even without tokens. + assert_eq!( + attrs + .get_value("gen_ai.context.window_size") + .unwrap() + .as_f64(), + Some(100_000.0) + ); + // But utilization cannot be computed without total_tokens. + assert!(attrs.get_value("gen_ai.context.utilization").is_none()); + } + + #[test] + fn test_context_utilization_unknown_model() { + let mut attributes = Annotated::new(attributes! { + "gen_ai.operation.type" => "ai_client".to_owned(), + "gen_ai.usage.input_tokens" => 1000, + "gen_ai.usage.output_tokens" => 2000, + "gen_ai.request.model" => "unknown-model".to_owned(), + }); + + normalize_ai( + &mut attributes, + Some(Duration::from_secs(1)), + Some(&model_metadata_with_context_size()), + ); + + let attrs = attributes.value().unwrap(); + assert!(attrs.get_value("gen_ai.context.window_size").is_none()); + assert!(attrs.get_value("gen_ai.context.utilization").is_none()); + } } diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs index 4459e1f9a96..20f1df5813c 100644 --- a/relay-event-normalization/src/event.rs +++ b/relay-event-normalization/src/event.rs @@ -33,7 +33,7 @@ use crate::span::tag_extraction::extract_span_tags_from_event; use crate::utils::{self, MAX_DURATION_MOBILE_MS, get_event_user_tag}; use crate::{ BorrowedSpanOpDefaults, BreakdownsConfig, CombinedMeasurementsConfig, GeoIpLookup, MaxChars, - ModelCosts, PerformanceScoreConfig, RawUserAgentInfo, SpanDescriptionRule, + ModelMetadata, PerformanceScoreConfig, RawUserAgentInfo, SpanDescriptionRule, TransactionNameConfig, breakdowns, event_error, legacy, mechanism, remove_other, schema, span, stacktrace, transactions, trimming, user_agent, }; @@ -137,8 +137,8 @@ pub struct NormalizationConfig<'a> { /// Configuration for generating performance score measurements for web vitals pub performance_score: Option<&'a PerformanceScoreConfig>, - /// Configuration for calculating the cost of AI model runs - pub ai_model_costs: Option<&'a ModelCosts>, + /// Metadata for AI models including costs and context size. + pub ai_model_metadata: Option<&'a ModelMetadata>, /// An initialized GeoIP lookup. pub geoip_lookup: Option<&'a GeoIpLookup>, @@ -196,7 +196,7 @@ impl Default for NormalizationConfig<'_> { span_description_rules: Default::default(), performance_score: Default::default(), geoip_lookup: Default::default(), - ai_model_costs: Default::default(), + ai_model_metadata: Default::default(), enable_trimming: false, measurements: None, normalize_spans: true, @@ -327,7 +327,7 @@ fn normalize(event: &mut Event, meta: &mut Meta, config: &NormalizationConfig) { .get_or_default::() .score_profile_version = Annotated::new(version); } - enrich_ai_event_data(event, config.ai_model_costs); + enrich_ai_event_data(event, config.ai_model_metadata); normalize_breakdowns(event, config.breakdowns_config); // Breakdowns are part of the metric extraction too normalize_default_attributes(event, meta, config); normalize_trace_context_tags(event); @@ -1546,7 +1546,7 @@ mod tests { use serde_json::json; use super::*; - use crate::{ClientHints, MeasurementsConfig, ModelCostV2}; + use crate::{ClientHints, MeasurementsConfig, ModelCostV2, ModelMetadataEntry}; const IOS_MOBILE_EVENT: &str = r#" { @@ -2312,27 +2312,33 @@ mod tests { normalize_event( &mut event, &NormalizationConfig { - ai_model_costs: Some(&ModelCosts { - version: 2, + ai_model_metadata: Some(&ModelMetadata { + version: 1, models: HashMap::from([ ( Pattern::new("claude-2.1").unwrap(), - ModelCostV2 { - input_per_token: 0.01, - output_per_token: 0.02, - output_reasoning_per_token: 0.03, - input_cached_per_token: 0.0, - input_cache_write_per_token: 0.0, + ModelMetadataEntry { + costs: Some(ModelCostV2 { + input_per_token: 0.01, + output_per_token: 0.02, + output_reasoning_per_token: 0.03, + input_cached_per_token: 0.0, + input_cache_write_per_token: 0.0, + }), + context_size: None, }, ), ( Pattern::new("gpt4-21-04").unwrap(), - ModelCostV2 { - input_per_token: 0.02, - output_per_token: 0.03, - output_reasoning_per_token: 0.04, - input_cached_per_token: 0.0, - input_cache_write_per_token: 0.0, + ModelMetadataEntry { + costs: Some(ModelCostV2 { + input_per_token: 0.02, + output_per_token: 0.03, + output_reasoning_per_token: 0.04, + input_cached_per_token: 0.0, + input_cache_write_per_token: 0.0, + }), + context_size: None, }, ), ]), @@ -2431,27 +2437,33 @@ mod tests { normalize_event( &mut event, &NormalizationConfig { - ai_model_costs: Some(&ModelCosts { - version: 2, + ai_model_metadata: Some(&ModelMetadata { + version: 1, models: HashMap::from([ ( Pattern::new("claude-2.1").unwrap(), - ModelCostV2 { - input_per_token: 0.01, - output_per_token: 0.02, - output_reasoning_per_token: 0.03, - input_cached_per_token: 0.04, - input_cache_write_per_token: 0.0, + ModelMetadataEntry { + costs: Some(ModelCostV2 { + input_per_token: 0.01, + output_per_token: 0.02, + output_reasoning_per_token: 0.03, + input_cached_per_token: 0.04, + input_cache_write_per_token: 0.0, + }), + context_size: None, }, ), ( Pattern::new("gpt4-21-04").unwrap(), - ModelCostV2 { - input_per_token: 0.09, - output_per_token: 0.05, - output_reasoning_per_token: 0.0, - input_cached_per_token: 0.0, - input_cache_write_per_token: 0.0, + ModelMetadataEntry { + costs: Some(ModelCostV2 { + input_per_token: 0.09, + output_per_token: 0.05, + output_reasoning_per_token: 0.0, + input_cached_per_token: 0.0, + input_cache_write_per_token: 0.0, + }), + context_size: None, }, ), ]), @@ -2533,16 +2545,19 @@ mod tests { normalize_event( &mut event, &NormalizationConfig { - ai_model_costs: Some(&ModelCosts { - version: 2, + ai_model_metadata: Some(&ModelMetadata { + version: 1, models: HashMap::from([( Pattern::new("claude-2.1").unwrap(), - ModelCostV2 { - input_per_token: 0.01, - output_per_token: 0.02, - output_reasoning_per_token: 0.03, - input_cached_per_token: 0.0, - input_cache_write_per_token: 0.0, + ModelMetadataEntry { + costs: Some(ModelCostV2 { + input_per_token: 0.01, + output_per_token: 0.02, + output_reasoning_per_token: 0.03, + input_cached_per_token: 0.0, + input_cache_write_per_token: 0.0, + }), + context_size: None, }, )]), }), @@ -2605,27 +2620,33 @@ mod tests { normalize_event( &mut event, &NormalizationConfig { - ai_model_costs: Some(&ModelCosts { - version: 2, + ai_model_metadata: Some(&ModelMetadata { + version: 1, models: HashMap::from([ ( Pattern::new("claude-2.1").unwrap(), - ModelCostV2 { - input_per_token: 0.01, - output_per_token: 0.02, - output_reasoning_per_token: 0.0, - input_cached_per_token: 0.04, - input_cache_write_per_token: 0.0, + ModelMetadataEntry { + costs: Some(ModelCostV2 { + input_per_token: 0.01, + output_per_token: 0.02, + output_reasoning_per_token: 0.0, + input_cached_per_token: 0.04, + input_cache_write_per_token: 0.0, + }), + context_size: None, }, ), ( Pattern::new("gpt4-21-04").unwrap(), - ModelCostV2 { - input_per_token: 0.09, - output_per_token: 0.05, - output_reasoning_per_token: 0.06, - input_cached_per_token: 0.0, - input_cache_write_per_token: 0.0, + ModelMetadataEntry { + costs: Some(ModelCostV2 { + input_per_token: 0.09, + output_per_token: 0.05, + output_reasoning_per_token: 0.06, + input_cached_per_token: 0.0, + input_cache_write_per_token: 0.0, + }), + context_size: None, }, ), ]), @@ -2692,8 +2713,8 @@ mod tests { normalize_event( &mut event, &NormalizationConfig { - ai_model_costs: Some(&ModelCosts { - version: 2, + ai_model_metadata: Some(&ModelMetadata { + version: 1, models: HashMap::new(), }), ..NormalizationConfig::default() @@ -2736,8 +2757,8 @@ mod tests { normalize_event( &mut event, &NormalizationConfig { - ai_model_costs: Some(&ModelCosts { - version: 2, + ai_model_metadata: Some(&ModelMetadata { + version: 1, models: HashMap::new(), }), ..NormalizationConfig::default() diff --git a/relay-event-normalization/src/normalize/mod.rs b/relay-event-normalization/src/normalize/mod.rs index 8739b39574d..20ca46a7e49 100644 --- a/relay-event-normalization/src/normalize/mod.rs +++ b/relay-event-normalization/src/normalize/mod.rs @@ -371,6 +371,101 @@ pub struct ModelCostV2 { pub input_cache_write_per_token: f64, } +/// Metadata for AI models including costs and context size. +/// +/// Example JSON: +/// ```json +/// { +/// "version": 1, +/// "models": { +/// "gpt-4": { +/// "costs": { +/// "inputPerToken": 0.0000003, +/// "outputPerToken": 0.00000165, +/// "outputReasoningPerToken": 0.0, +/// "inputCachedPerToken": 0.0000015, +/// "inputCacheWritePerToken": 0.00001875 +/// }, +/// "contextSize": 1000000 +/// } +/// } +/// } +/// ``` +#[derive(Clone, Default, Debug, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ModelMetadata { + /// The version of the model metadata struct. + pub version: u16, + + /// The mappings of model ID => metadata as a dictionary. + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub models: HashMap, +} + +impl ModelMetadata { + const SUPPORTED_VERSION: u16 = 1; + + /// `true` if the model metadata is empty or the version is unsupported. + pub fn is_empty(&self) -> bool { + self.models.is_empty() || !self.is_enabled() + } + + /// `false` if the version is unsupported. + pub fn is_enabled(&self) -> bool { + self.version == Self::SUPPORTED_VERSION + } + + /// Gets the cost per token for a given model, if defined. + pub fn cost_per_token(&self, model_id: &str) -> Option<&ModelCostV2> { + self.get(model_id).and_then(|entry| entry.costs.as_ref()) + } + + /// Gets the context window size for a given model, if defined. + /// + /// Returns `None` for a zero context size, as it is not a meaningful value. + pub fn context_size(&self, model_id: &str) -> Option { + self.get(model_id) + .and_then(|entry| entry.context_size) + .filter(|&size| size > 0) + } + + /// Gets the metadata for a given model, if defined. + pub fn get(&self, model_id: &str) -> Option<&ModelMetadataEntry> { + if !self.is_enabled() { + return None; + } + + let normalized_model_id = normalize_ai_model_name(model_id); + + // First try exact match. + if let Some(value) = self.models.get(normalized_model_id) { + return Some(value); + } + + // Fall back to glob matching. + self.models.iter().find_map(|(key, value)| { + if key.is_match(normalized_model_id) { + Some(value) + } else { + None + } + }) + } +} + +/// Metadata for a single AI model. +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ModelMetadataEntry { + /// Token costs for this model. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub costs: Option, + + /// The context window size in tokens. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub context_size: Option, +} + #[cfg(test)] mod tests { use chrono::{TimeZone, Utc}; diff --git a/relay-event-normalization/src/normalize/span/ai.rs b/relay-event-normalization/src/normalize/span/ai.rs index fb381bf4209..40b34d23612 100644 --- a/relay-event-normalization/src/normalize/span/ai.rs +++ b/relay-event-normalization/src/normalize/span/ai.rs @@ -1,7 +1,7 @@ //! AI cost calculation. use crate::statsd::{Counters, map_origin_to_integration, platform_tag}; -use crate::{ModelCostV2, ModelCosts}; +use crate::{ModelCostV2, ModelMetadata}; use relay_event_schema::protocol::{ Event, Measurements, OperationType, Span, SpanData, TraceContext, }; @@ -261,11 +261,38 @@ fn set_total_tokens(data: &mut SpanData) { } } +/// Sets the context window size and utilization for the model. +fn extract_context_utilization(data: &mut SpanData, model_metadata: &ModelMetadata) { + let model_id = data + .gen_ai_response_model + .value() + .and_then(|val| val.as_str()); + + let context_size = model_id.and_then(|id| model_metadata.context_size(id)); + + let Some(context_size) = context_size else { + return; + }; + + data.gen_ai_context_window_size + .set_value(Value::U64(context_size).into()); + + let total_tokens = data + .gen_ai_usage_total_tokens + .value() + .and_then(Value::as_f64); + + if let Some(total_tokens) = total_tokens { + data.gen_ai_context_utilization + .set_value(Value::F64(total_tokens / context_size as f64).into()); + } +} + /// Extract the additional data into the span fn extract_ai_data( data: &mut SpanData, duration: f64, - ai_model_costs: &ModelCosts, + model_metadata: &ModelMetadata, origin: Option<&str>, platform: Option<&str>, ) { @@ -281,6 +308,8 @@ fn extract_ai_data( .set_value(Value::F64(output_tokens / (duration / 1000.0)).into()); } + extract_context_utilization(data, model_metadata); + // Extracts the total cost of the AI model used if let Some(model_id) = data .gen_ai_response_model @@ -288,7 +317,7 @@ fn extract_ai_data( .and_then(|val| val.as_str()) { extract_ai_model_cost_data( - ai_model_costs.cost_per_token(model_id), + model_metadata.cost_per_token(model_id), data, origin, platform, @@ -309,7 +338,7 @@ fn enrich_ai_span_data( span_op: &Annotated, measurements: &Annotated, duration: f64, - model_costs: Option<&ModelCosts>, + model_metadata: Option<&ModelMetadata>, origin: Option<&str>, platform: Option<&str>, ) { @@ -337,8 +366,8 @@ fn enrich_ai_span_data( data.gen_ai_agent_name.set_value(Some(function_id)); } - if let Some(model_costs) = model_costs { - extract_ai_data(data, duration, model_costs, origin, platform); + if let Some(model_metadata) = model_metadata { + extract_ai_data(data, duration, model_metadata, origin, platform); } else { relay_statsd::metric!( counter(Counters::GenAiCostCalculationResult) += 1, @@ -360,7 +389,7 @@ fn enrich_ai_span_data( } /// Enrich the AI span data -pub fn enrich_ai_span(span: &mut Span, model_costs: Option<&ModelCosts>) { +pub fn enrich_ai_span(span: &mut Span, model_metadata: Option<&ModelMetadata>) { let duration = span .get_value("span.duration") .and_then(|v| v.as_f64()) @@ -371,14 +400,14 @@ pub fn enrich_ai_span(span: &mut Span, model_costs: Option<&ModelCosts>) { &span.op, &span.measurements, duration, - model_costs, + model_metadata, span.origin.as_str(), span.platform.as_str(), ); } /// Extract the ai data from all of an event's spans -pub fn enrich_ai_event_data(event: &mut Event, model_costs: Option<&ModelCosts>) { +pub fn enrich_ai_event_data(event: &mut Event, model_metadata: Option<&ModelMetadata>) { let event_duration = event .get_value("event.duration") .and_then(|v| v.as_f64()) @@ -395,7 +424,7 @@ pub fn enrich_ai_event_data(event: &mut Event, model_costs: Option<&ModelCosts>) &trace_context.op, &event.measurements, event_duration, - model_costs, + model_metadata, trace_context.origin.as_str(), event.platform.as_str(), ); @@ -415,7 +444,7 @@ pub fn enrich_ai_event_data(event: &mut Event, model_costs: Option<&ModelCosts>) &span.op, &span.measurements, span_duration, - model_costs, + model_metadata, span.origin.as_str(), span_platform, ); @@ -439,10 +468,14 @@ fn is_ai_span(span_data: &Annotated, span_op: Option<&OperationType>) #[cfg(test)] mod tests { + use std::collections::HashMap; + + use relay_pattern::Pattern; use relay_protocol::{FromValue, assert_annotated_snapshot}; use serde_json::json; use super::*; + use crate::ModelMetadataEntry; fn ai_span_with_data(data: serde_json::Value) -> Span { Span { @@ -1067,4 +1100,136 @@ mod tests { } "#); } + + fn metadata_with_context_size() -> ModelMetadata { + ModelMetadata { + version: 1, + models: HashMap::from([( + Pattern::new("claude-2.1").unwrap(), + ModelMetadataEntry { + costs: Some(ModelCostV2 { + input_per_token: 0.01, + output_per_token: 0.02, + output_reasoning_per_token: 0.0, + input_cached_per_token: 0.0, + input_cache_write_per_token: 0.0, + }), + context_size: Some(100_000), + }, + )]), + } + } + + #[test] + fn test_context_utilization_with_total_tokens() { + let mut span = Span { + op: "gen_ai.test".to_owned().into(), + data: SpanData::from_value( + json!({ + "gen_ai.response.model": "claude-2.1", + "gen_ai.usage.input_tokens": 30000.0, + "gen_ai.usage.output_tokens": 12000.0, + "gen_ai.usage.total_tokens": 42000.0, + }) + .into(), + ), + ..Default::default() + }; + + enrich_ai_span(&mut span, Some(&metadata_with_context_size())); + + let data = span.data.value().unwrap(); + assert_eq!( + data.gen_ai_context_window_size + .value() + .and_then(Value::as_f64), + Some(100_000.0) + ); + assert_eq!( + data.gen_ai_context_utilization + .value() + .and_then(Value::as_f64), + Some(0.42) + ); + } + + #[test] + fn test_context_utilization_no_context_size() { + let metadata = ModelMetadata { + version: 1, + models: HashMap::from([( + Pattern::new("claude-2.1").unwrap(), + ModelMetadataEntry { + costs: None, + context_size: None, + }, + )]), + }; + + let mut span = Span { + op: "gen_ai.test".to_owned().into(), + data: SpanData::from_value( + json!({ + "gen_ai.response.model": "claude-2.1", + "gen_ai.usage.total_tokens": 1000.0, + }) + .into(), + ), + ..Default::default() + }; + + enrich_ai_span(&mut span, Some(&metadata)); + + let data = span.data.value().unwrap(); + assert!(data.gen_ai_context_window_size.value().is_none()); + assert!(data.gen_ai_context_utilization.value().is_none()); + } + + #[test] + fn test_context_utilization_no_total_tokens() { + let mut span = Span { + op: "gen_ai.test".to_owned().into(), + data: SpanData::from_value( + json!({ + "gen_ai.response.model": "claude-2.1", + }) + .into(), + ), + ..Default::default() + }; + + enrich_ai_span(&mut span, Some(&metadata_with_context_size())); + + let data = span.data.value().unwrap(); + // window_size should still be set even without tokens. + assert_eq!( + data.gen_ai_context_window_size + .value() + .and_then(Value::as_f64), + Some(100_000.0) + ); + // But utilization cannot be computed without total_tokens. + assert!(data.gen_ai_context_utilization.value().is_none()); + } + + #[test] + fn test_context_utilization_unknown_model() { + let mut span = Span { + op: "gen_ai.test".to_owned().into(), + data: SpanData::from_value( + json!({ + "gen_ai.response.model": "unknown-model", + "gen_ai.usage.total_tokens": 1000.0, + }) + .into(), + ), + ..Default::default() + }; + + enrich_ai_span(&mut span, Some(&metadata_with_context_size())); + + let data = span.data.value().unwrap(); + assert!(data.gen_ai_context_window_size.value().is_none()); + assert!(data.gen_ai_context_utilization.value().is_none()); + } } diff --git a/relay-event-schema/src/protocol/span.rs b/relay-event-schema/src/protocol/span.rs index 90cc716c330..ed0c7b87832 100644 --- a/relay-event-schema/src/protocol/span.rs +++ b/relay-event-schema/src/protocol/span.rs @@ -541,6 +541,14 @@ pub struct SpanData { #[metastructure(field = "gen_ai.request.model", legacy_alias = "ai.model_id")] pub gen_ai_request_model: Annotated, + /// The context window size of the model in tokens. + #[metastructure(field = "gen_ai.context.window_size")] + pub gen_ai_context_window_size: Annotated, + + /// The fraction of the context window used by total tokens. + #[metastructure(field = "gen_ai.context.utilization")] + pub gen_ai_context_utilization: Annotated, + /// The total cost for the tokens used (duplicate field for migration) #[metastructure(field = "gen_ai.cost.total_tokens", pii = "maybe")] pub gen_ai_cost_total_tokens: Annotated, diff --git a/relay-server/src/processing/spans/process.rs b/relay-server/src/processing/spans/process.rs index 8d61399e5dc..f983e2a1137 100644 --- a/relay-server/src/processing/spans/process.rs +++ b/relay-server/src/processing/spans/process.rs @@ -1,7 +1,7 @@ use std::collections::BTreeMap; use std::time::Duration; -use relay_event_normalization::{GeoIpLookup, RequiredMode, SchemaProcessor, eap}; +use relay_event_normalization::{GeoIpLookup, ModelMetadata, RequiredMode, SchemaProcessor, eap}; use relay_event_schema::processor::{ProcessingState, ValueType, process_value}; use relay_event_schema::protocol::{Span, SpanId, SpanV2}; use relay_protocol::Annotated; @@ -134,10 +134,18 @@ fn parse_and_validate_span_attachment(item: &Item) -> Result<(Option, Ex /// Normalizes individual spans. pub fn normalize(spans: &mut Managed, geo_lookup: &GeoIpLookup, ctx: Context<'_>) { + let model_metadata = ctx.global_config.model_metadata(); spans.retain_with_context( |spans| (&mut spans.spans, &spans.headers), |span, headers, _| { - normalize_span(&mut span.span, headers, geo_lookup, ctx).inspect_err(|err| { + normalize_span( + &mut span.span, + headers, + geo_lookup, + model_metadata.as_ref(), + ctx, + ) + .inspect_err(|err| { relay_log::debug!("failed to normalize span: {err}"); }) }, @@ -148,6 +156,7 @@ fn normalize_span( span: &mut Annotated, headers: &EnvelopeHeaders, geo_lookup: &GeoIpLookup, + model_metadata: Option<&ModelMetadata>, ctx: Context<'_>, ) -> Result<()> { let meta = headers.meta(); @@ -160,7 +169,6 @@ fn normalize_span( if let Some(span) = span.value_mut() { let dsc = headers.dsc(); let duration = span_duration(span); - let model_costs = ctx.global_config.ai_model_costs.as_ref().ok(); let allowed_hosts = ctx.global_config.options.http_span_allowed_hosts.as_slice(); validate_timestamps(span)?; @@ -182,7 +190,7 @@ fn normalize_span( eap::normalize_dsc(&mut span.attributes, dsc); } if ctx.is_processing() { - eap::normalize_ai(&mut span.attributes, duration, model_costs); + eap::normalize_ai(&mut span.attributes, duration, model_metadata); } eap::normalize_attribute_values(&mut span.attributes, allowed_hosts); eap::write_legacy_attributes(&mut span.attributes); @@ -848,7 +856,7 @@ mod tests { &[], ); - normalize_span(&mut span, &headers, &geo_lookup, ctx).unwrap(); + normalize_span(&mut span, &headers, &geo_lookup, None, ctx).unwrap(); assert_attributes_contains( &span, @@ -877,7 +885,7 @@ mod tests { &[], ); - normalize_span(&mut span, &headers, &geo_lookup, ctx).unwrap(); + normalize_span(&mut span, &headers, &geo_lookup, None, ctx).unwrap(); assert_attributes_contains( &span, @@ -907,7 +915,7 @@ mod tests { &[("http.response.status_code", 502.)], ); - normalize_span(&mut span, &headers, &geo_lookup, ctx).unwrap(); + normalize_span(&mut span, &headers, &geo_lookup, None, ctx).unwrap(); assert_attributes_contains( &span, @@ -932,7 +940,7 @@ mod tests { &[("http.response.status_code", 502.)], ); - normalize_span(&mut span, &headers, &geo_lookup, ctx).unwrap(); + normalize_span(&mut span, &headers, &geo_lookup, None, ctx).unwrap(); assert_attributes_contains( &span, @@ -958,7 +966,7 @@ mod tests { &[("http.response.status_code", 502.)], ); - normalize_span(&mut span, &headers, &geo_lookup, ctx).unwrap(); + normalize_span(&mut span, &headers, &geo_lookup, None, ctx).unwrap(); assert_attributes_contains( &span, diff --git a/relay-server/src/processing/utils/event.rs b/relay-server/src/processing/utils/event.rs index ea7ff592060..98b32fdea92 100644 --- a/relay-server/src/processing/utils/event.rs +++ b/relay-server/src/processing/utils/event.rs @@ -221,7 +221,7 @@ pub fn normalize( // Inherit from spans, as transactions no longer produce metrics. let transaction_aggregator_config = ctx.config.aggregator_config_for(MetricNamespace::Spans); - let ai_model_costs = ctx.global_config.ai_model_costs.as_ref().ok(); + let ai_model_metadata = ctx.global_config.model_metadata(); let http_span_allowed_hosts = ctx.global_config.options.http_span_allowed_hosts.as_slice(); let project_info = ctx.project_info; @@ -289,7 +289,7 @@ pub fn normalize( emit_event_errors: full_normalization, span_description_rules: project_info.config.span_description_rules.as_ref(), geoip_lookup: Some(geoip_lookup), - ai_model_costs, + ai_model_metadata: ai_model_metadata.as_ref(), enable_trimming: true, measurements: Some(CombinedMeasurementsConfig::new( ctx.project_info.config().measurements.as_ref(), diff --git a/relay-server/src/services/processor/span.rs b/relay-server/src/services/processor/span.rs index dcfcafae61e..dca579a0891 100644 --- a/relay-server/src/services/processor/span.rs +++ b/relay-server/src/services/processor/span.rs @@ -19,7 +19,7 @@ use relay_dynamic_config::{ use relay_event_normalization::span::ai::enrich_ai_span; use relay_event_normalization::{ BorrowedSpanOpDefaults, ClientHints, CombinedMeasurementsConfig, FromUserAgentInfo, - GeoIpLookup, MeasurementsConfig, ModelCosts, PerformanceScoreConfig, RawUserAgentInfo, + GeoIpLookup, MeasurementsConfig, ModelMetadata, PerformanceScoreConfig, RawUserAgentInfo, SchemaProcessor, TimestampProcessor, TransactionNameRule, TransactionsProcessor, TrimmingProcessor, normalize_measurements, normalize_performance_score, normalize_transaction_name, span::tag_extraction, validate_span, @@ -235,8 +235,8 @@ struct NormalizeSpanConfig<'a> { /// If at least one is provided, then normalization will truncate custom measurements /// and add units of known built-in measurements. measurements: Option>, - /// Configuration for AI model cost calculation - ai_model_costs: Option<&'a ModelCosts>, + /// Metadata for AI models including costs and context size. + ai_model_metadata: Option, /// The maximum length for names of custom measurements. /// /// Measurements with longer names are removed from the transaction event and replaced with a @@ -280,7 +280,7 @@ impl<'a> NormalizeSpanConfig<'a> { project_config.measurements.as_ref(), global_config.measurements.as_ref(), )), - ai_model_costs: global_config.ai_model_costs.as_ref().ok(), + ai_model_metadata: global_config.model_metadata(), max_name_and_unit_len: aggregator_config .max_name_length .saturating_sub(MeasurementsConfig::MEASUREMENT_MRI_OVERHEAD), @@ -342,7 +342,7 @@ fn normalize( max_tag_value_size, performance_score, measurements, - ai_model_costs, + ai_model_metadata, max_name_and_unit_len, tx_name_rules, user_agent, @@ -450,7 +450,7 @@ fn normalize( normalize_performance_score(span, performance_score); - enrich_ai_span(span, ai_model_costs); + enrich_ai_span(span, ai_model_metadata.as_ref()); tag_extraction::extract_measurements(span, is_mobile); @@ -799,7 +799,7 @@ mod tests { max_tag_value_size: 200, performance_score: None, measurements: None, - ai_model_costs: None, + ai_model_metadata: None, max_name_and_unit_len: 200, tx_name_rules: &[], user_agent: None, diff --git a/tests/integration/test_ai.py b/tests/integration/test_ai.py index 51c765a3ea8..1a2420976ea 100644 --- a/tests/integration/test_ai.py +++ b/tests/integration/test_ai.py @@ -35,15 +35,18 @@ def test_ai_spans_example_transaction( project_id = 42 mini_sentry.add_full_project_config(project_id) - mini_sentry.global_config["aiModelCosts"] = { - "version": 2, + mini_sentry.global_config["aiModelMetadata"] = { + "version": 1, "models": { "gpt-4o": { - "inputPerToken": 0.01, - "outputPerToken": 0.02, - "outputReasoningPerToken": 0.03, - "inputCachedPerToken": 0.0, - "inputCacheWritePerToken": 0.0, + "costs": { + "inputPerToken": 0.01, + "outputPerToken": 0.02, + "outputReasoningPerToken": 0.03, + "inputCachedPerToken": 0.0, + "inputCacheWritePerToken": 0.0, + }, + "contextSize": 128000, }, }, } @@ -387,6 +390,11 @@ def test_ai_spans_example_transaction( "type": "string", "value": "resp_0c1c943ef2dc8bf9006909e7b8e3e88197bffb4d0e80187ca1", }, + "gen_ai.context.utilization": { + "type": "double", + "value": mock.ANY, + }, + "gen_ai.context.window_size": {"type": "integer", "value": 128000}, "gen_ai.cost.input_tokens": {"type": "double", "value": 2.45}, "gen_ai.cost.output_tokens": {"type": "double", "value": 1.3}, "gen_ai.cost.total_tokens": {"type": "double", "value": 3.75}, @@ -476,6 +484,11 @@ def test_ai_spans_example_transaction( "type": "string", "value": "resp_0c1c943ef2dc8bf9006909e7b649008197a541a144de019abf", }, + "gen_ai.context.utilization": { + "type": "double", + "value": mock.ANY, + }, + "gen_ai.context.window_size": {"type": "integer", "value": 128000}, "gen_ai.cost.input_tokens": {"type": "double", "value": 0.37}, "gen_ai.cost.output_tokens": {"type": "double", "value": 0.92}, "gen_ai.cost.total_tokens": {"type": "double", "value": 1.29}, @@ -965,6 +978,11 @@ def test_ai_spans_example_transaction( "type": "string", "value": "resp_0c1c943ef2dc8bf9006909e7b8e3e88197bffb4d0e80187ca1", }, + "gen_ai.context.utilization": { + "type": "double", + "value": mock.ANY, + }, + "gen_ai.context.window_size": {"type": "integer", "value": 128000}, "gen_ai.cost.input_tokens": {"type": "double", "value": 2.08}, "gen_ai.cost.output_tokens": {"type": "double", "value": 0.38}, "gen_ai.cost.total_tokens": {"type": "double", "value": 2.46},