diff --git a/Cargo.lock b/Cargo.lock index 16293129801..b1eb3b35194 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,6 +23,7 @@ version = "0.1.35" dependencies = [ "argh", "async-trait", + "axum", "bytes", "chrono", "colored", @@ -40,6 +41,7 @@ dependencies = [ "papaya", "prometheus-exposition", "prost-types", + "rmp", "saluki-api", "saluki-app", "saluki-common", @@ -3761,6 +3763,7 @@ dependencies = [ "protobuf", "rand 0.9.3", "regex", + "rmp", "rmp-serde", "saluki-api", "saluki-common", diff --git a/Cargo.toml b/Cargo.toml index e18dc906baa..2a1652c382a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -213,6 +213,7 @@ tracing-appender = { version = "0.2", default-features = false } base64 = { version = "0.22.1", default-features = false } treediff = { version = "5", default-features = false } argh = { version = "0.1", default-features = false } +rmp = { version = "0.8" } rmp-serde = { version = "1.3", default-features = false } serde_bytes = { version = "0.11.19", default-features = false } num-traits = { version = "0.2", default-features = false } diff --git a/bin/agent-data-plane/Cargo.toml b/bin/agent-data-plane/Cargo.toml index 7395c278705..918cdc7156a 100644 --- a/bin/agent-data-plane/Cargo.toml +++ b/bin/agent-data-plane/Cargo.toml @@ -15,6 +15,7 @@ fips = ["saluki-app/tls-fips"] [dependencies] argh = { workspace = true, features = ["help"] } async-trait = { workspace = true } +axum = { workspace = true } bytes = { workspace = true } chrono = { workspace = true } colored = { workspace = true } @@ -30,6 +31,7 @@ memory-accounting = { workspace = true } metrics = { workspace = true } ottl = { workspace = true } papaya = { workspace = true } +rmp = { workspace = true } prometheus-exposition = { workspace = true } prost-types = { workspace = true } saluki-api = { workspace = true } diff --git a/bin/agent-data-plane/src/cli/run.rs b/bin/agent-data-plane/src/cli/run.rs index 79b1585a1e2..1f41c683a57 100644 --- a/bin/agent-data-plane/src/cli/run.rs +++ b/bin/agent-data-plane/src/cli/run.rs @@ -17,11 +17,11 @@ use saluki_components::{ encoders::{ BufferedIncrementalConfiguration, DatadogApmStatsEncoderConfiguration, DatadogEventsConfiguration, DatadogLogsConfiguration, DatadogMetricsConfiguration, DatadogServiceChecksConfiguration, - DatadogTraceConfiguration, + V1DatadogTraceConfiguration, }, forwarders::{DatadogConfiguration, OtlpForwarderConfiguration}, relays::otlp::OtlpRelayConfiguration, - sources::{DogStatsDConfiguration, OtlpConfiguration}, + sources::{apm::sampling_rates::V1SamplingRatesHandle, ApmReceiverConfiguration, DogStatsDConfiguration, OtlpConfiguration}, transforms::{ AggregateConfiguration, ApmStatsTransformConfiguration, ChainedConfiguration, DogStatsDMapperConfiguration, DogStatsDPrefixFilterConfiguration, HostEnrichmentConfiguration, HostTagsConfiguration, @@ -41,6 +41,7 @@ use crate::{ components::{ apm_onboarding::ApmOnboardingConfiguration, ottl_filter_processor::OttlFilterConfiguration, ottl_transform_processor::OttlTransformConfiguration, tag_filterlist::TagFilterlistConfiguration, + v1_apm_onboarding::V1ApmOnboardingConfiguration, }, internal::{create_internal_supervisor, remote_agent::RemoteAgentBootstrap}, }; @@ -135,25 +136,14 @@ pub async fn handle_run_command( let dsd_stats_config = DogStatsDStatisticsConfiguration::new(); - // Create our primary data topology and spawn any internal processes, which will ensure all relevant components are - // registered and accounted for in terms of memory usage. - let blueprint = create_topology( - &config, - &dp_config, - &env_provider, - &component_registry, - dsd_stats_config.clone(), - ) - .await?; - // Create the internal supervisor (control plane + observability) let mut internal_supervisor = create_internal_supervisor( &config, &dp_config, &component_registry, health_registry.clone(), - env_provider, - dsd_stats_config, + env_provider.clone(), + dsd_stats_config.clone(), ra_bootstrap, ) .await @@ -178,19 +168,31 @@ pub async fn handle_run_command( } } - // Bounds validation succeeded, so now we'll build and spawn the topology. - let built_topology = blueprint.build().await?; - let mut running_topology = built_topology.spawn(&health_registry, memory_limiter).await?; + // Build and spawn the topology only when at least one data pipeline needs it. + // Some pipelines (e.g. the APM receiver) run entirely as control-plane workers and + // produce no topology components, so there is nothing to build or wait on. + let mut running_topology = if dp_config.topology_required() { + let blueprint = create_topology( + &config, + &dp_config, + &env_provider, + &component_registry, + dsd_stats_config, + ) + .await?; + + let built_topology = blueprint.build().await?; + Some(built_topology.spawn(&health_registry, memory_limiter).await?) + } else { + None + }; let startup_time = started.elapsed(); // Emit the startup metrics for the application. emit_startup_metrics(); - info!( - init_time_ms = startup_time.as_millis(), - "Topology running. Waiting for interrupt..." - ); + info!(init_time_ms = startup_time.as_millis(), "Waiting for interrupt..."); // Wait for all components to become ready. tokio::spawn(async move { @@ -239,7 +241,12 @@ pub async fn handle_run_command( } } } - _ = running_topology.wait_for_unexpected_finish() => { + _ = async { + match running_topology.as_mut() { + Some(t) => t.wait_for_unexpected_finish().await, + None => std::future::pending().await, + } + } => { error!("Topology component unexpectedly finished. Shutting down..."); topology_failed = true; }, @@ -248,8 +255,11 @@ pub async fn handle_run_command( } } - // Shutdown the primary topology - let topology_result = running_topology.shutdown_with_timeout(Duration::from_secs(30)).await; + // Shutdown the primary topology if one was running. + let topology_result = match running_topology { + Some(t) => t.shutdown_with_timeout(Duration::from_secs(30)).await, + None => Ok(()), + }; // Signal the internal supervisor to shutdown (if still running) and drive it to completion. // If the supervisor already exited (i.e., the select! above matched its branch), both the send @@ -302,6 +312,7 @@ async fn create_topology( if dp_config.metrics_pipeline_required() || dp_config.logs_pipeline_required() || dp_config.traces_pipeline_required() + || dp_config.apm_pipeline_required() { let dd_forwarder_config = DatadogConfiguration::from_configuration(config).error_context("Failed to configure Datadog forwarder.")?; @@ -329,9 +340,80 @@ async fn create_topology( add_otlp_pipeline_to_blueprint(&mut blueprint, config, dp_config, env_provider)?; } + if dp_config.apm_pipeline_required() { + add_apm_pipeline_to_blueprint(&mut blueprint, config, dp_config, env_provider).await?; + } + Ok(blueprint) } +async fn add_apm_pipeline_to_blueprint( + blueprint: &mut TopologyBlueprint, config: &GenericConfiguration, dp_config: &DataPlaneConfiguration, + env_provider: &ADPEnvironmentProvider, +) -> Result<(), GenericError> { + let sampling_rates = V1SamplingRatesHandle::new(); + + let apm_receiver_config = ApmReceiverConfiguration::from_configuration(config) + .error_context("Failed to configure APM receiver.")? + .with_sampling_rates(sampling_rates.clone()); + + let v1_trace_obfuscation_config = TraceObfuscationConfiguration::from_apm_configuration(config) + .error_context("Failed to configure trace obfuscation.")?; + + let v1_trace_sampler_config = TraceSamplerConfiguration::from_configuration(config) + .error_context("Failed to configure V1 trace sampler.")? + .with_sampling_rates(sampling_rates.clone()); + + let v1_traces_enrich_config = ChainedConfiguration::default() + .with_transform_builder("v1_apm_onboarding", V1ApmOnboardingConfiguration) + .with_transform_builder("trace_obfuscation", v1_trace_obfuscation_config) + .with_transform_builder("v1_trace_sampler", v1_trace_sampler_config); + + let v1_dd_traces_config = V1DatadogTraceConfiguration::from_configuration(config) + .error_context("Failed to configure V1 Datadog Traces encoder.")? + .with_environment_provider(env_provider.clone()) + .await?; + + let apm_stats_config = ApmStatsTransformConfiguration::from_configuration(config) + .error_context("Failed to configure APM stats transform.")? + .with_environment_provider(env_provider.clone()) + .await?; + + blueprint + .add_source("apm_in", apm_receiver_config)? + .add_transform("v1_traces_enrich", v1_traces_enrich_config)? + .add_transform("apm_dd_apm_stats", apm_stats_config)? + .add_encoder("v1_dd_traces_encode", v1_dd_traces_config)? + .connect_component("v1_traces_enrich", ["apm_in.traces"])? + .connect_component("v1_dd_traces_encode", ["v1_traces_enrich"])? + .connect_component("apm_dd_apm_stats", ["v1_traces_enrich"])? + .connect_component("dd_out", ["v1_dd_traces_encode"])?; + + // `dd_stats_encode` is shared with the OTLP traces pipeline when both are active. + // + // APM-only: we own the encoder — register it first, then connect apm_dd_apm_stats + // as its input and dd_out as its output. + // + // OTLP+APM: the encoder already exists (registered by add_baseline_traces_pipeline) + // and dd_out is already wired to it; we only need to add apm_dd_apm_stats + // as a second upstream. Adding the dd_out edge again would create a + // duplicate graph edge that forwards every stats payload twice. + if !dp_config.traces_pipeline_required() { + let dd_apm_stats_encoder = DatadogApmStatsEncoderConfiguration::from_configuration(config) + .error_context("Failed to configure Datadog APM Stats encoder.")? + .with_environment_provider(env_provider.clone()) + .await?; + blueprint + .add_encoder("dd_stats_encode", dd_apm_stats_encoder)? + .connect_component("dd_stats_encode", ["apm_dd_apm_stats"])? + .connect_component("dd_out", ["dd_stats_encode"])?; + } else { + blueprint.connect_component("dd_stats_encode", ["apm_dd_apm_stats"])?; + } + + Ok(()) +} + async fn add_baseline_metrics_pipeline_to_blueprint( blueprint: &mut TopologyBlueprint, config: &GenericConfiguration, dp_config: &DataPlaneConfiguration, env_provider: &ADPEnvironmentProvider, @@ -381,7 +463,7 @@ async fn add_baseline_logs_pipeline_to_blueprint( async fn add_baseline_traces_pipeline_to_blueprint( blueprint: &mut TopologyBlueprint, config: &GenericConfiguration, env_provider: &ADPEnvironmentProvider, ) -> Result<(), GenericError> { - let dd_traces_config = DatadogTraceConfiguration::from_configuration(config) + let dd_traces_config = V1DatadogTraceConfiguration::from_configuration(config) .error_context("Failed to configure Datadog Traces encoder.")? .with_environment_provider(env_provider.clone()) .await?; diff --git a/bin/agent-data-plane/src/components/apm_onboarding/mod.rs b/bin/agent-data-plane/src/components/apm_onboarding/mod.rs index 19300357bbe..d009e770c41 100644 --- a/bin/agent-data-plane/src/components/apm_onboarding/mod.rs +++ b/bin/agent-data-plane/src/components/apm_onboarding/mod.rs @@ -6,15 +6,14 @@ use saluki_common::{ }; use saluki_core::{ components::{transforms::*, ComponentContext}, - data_model::event::trace::{Span, Trace}, + data_model::event::trace::{AttributeValue, Span, Trace}, topology::EventsBuffer, }; use saluki_error::GenericError; use stringtheory::MetaString; use tracing::debug; -mod install_info; -use self::install_info::InstallInfo; +use super::install_info::InstallInfo; static META_TAG_INSTALL_ID: MetaString = MetaString::from_static("_dd.install.id"); static META_TAG_INSTALL_TYPE: MetaString = MetaString::from_static("_dd.install.type"); @@ -102,6 +101,7 @@ impl SynchronousTransform for ApmOnboarding { } fn get_root_span_from_trace_mut(trace: &mut Trace) -> Option<&mut Span> { + let trace_id_low = trace.trace_id_low; let spans = trace.spans_mut(); if spans.is_empty() { return None; @@ -130,7 +130,7 @@ fn get_root_span_from_trace_mut(trace: &mut Trace) -> Option<&mut Span> { if parent_to_child.len() != 1 { debug!( - trace_id = spans[0].trace_id(), + trace_id = trace_id_low, "Failed to reliably identify a root span for a trace." ); } @@ -155,7 +155,5 @@ fn add_onboarding_metadata_to_span(span: &mut Span, install_info: &InstallInfo) } fn add_meta_entry_if_missing(span: &mut Span, key: &MetaString, value: &MetaString) { - if !span.meta().contains_key(key) { - span.meta_mut().insert(key.clone(), value.clone()); - } + span.attributes.entry(key.clone()).or_insert_with(|| AttributeValue::String(value.clone())); } diff --git a/bin/agent-data-plane/src/components/apm_onboarding/install_info.rs b/bin/agent-data-plane/src/components/install_info.rs similarity index 87% rename from bin/agent-data-plane/src/components/apm_onboarding/install_info.rs rename to bin/agent-data-plane/src/components/install_info.rs index eb18cbaf711..f3e7cd60cc3 100644 --- a/bin/agent-data-plane/src/components/apm_onboarding/install_info.rs +++ b/bin/agent-data-plane/src/components/install_info.rs @@ -40,12 +40,8 @@ impl InstallInfo { pub async fn load_or_create() -> Result { let path = PlatformSettings::get_config_dir_path().join("install.json"); - // See if the file exists, and load it if so. let (install_info, should_write) = match tokio::fs::read(&path).await { Ok(data) => { - // Try and decode the installation info. - // - // If we fail, we don't try to update it. let install_info = serde_json::from_slice(&data).with_error_context(|| { format!( "Failed to decode installation info file '{}'.", @@ -57,10 +53,8 @@ impl InstallInfo { } Err(e) => match e.kind() { - // If the file doesn't exist, then _we'll_ try and create it. ErrorKind::NotFound => (Self::from_environment(), true), - // There was a legitimate error so we bail out. _ => { return Err(e).with_error_context(|| { format!("Failed to read installation info file '{}'.", path.as_path().display()) @@ -69,9 +63,6 @@ impl InstallInfo { }, }; - // Write it out if we were the ones to create it. - // - // If we fail to write it out, then we also just bail out. if should_write { let install_info_json = serde_json::to_vec(&install_info).error_context("Failed to serialize installation info to JSON.")?; diff --git a/bin/agent-data-plane/src/components/mod.rs b/bin/agent-data-plane/src/components/mod.rs index 6f9816a6811..96ae72fbfb0 100644 --- a/bin/agent-data-plane/src/components/mod.rs +++ b/bin/agent-data-plane/src/components/mod.rs @@ -1,4 +1,6 @@ pub mod apm_onboarding; +mod install_info; pub mod ottl_filter_processor; pub mod ottl_transform_processor; pub mod tag_filterlist; +pub mod v1_apm_onboarding; diff --git a/bin/agent-data-plane/src/components/ottl_filter_processor/mod.rs b/bin/agent-data-plane/src/components/ottl_filter_processor/mod.rs index 448e4806f6c..8e9b805b1af 100644 --- a/bin/agent-data-plane/src/components/ottl_filter_processor/mod.rs +++ b/bin/agent-data-plane/src/components/ottl_filter_processor/mod.rs @@ -99,12 +99,12 @@ impl OttlFilter { /// Returns true if the span should be dropped (any condition matched). /// /// Uses `self.current_trace` (set in `transform_buffer`) to access resource tags. - fn should_drop_span(&self, trace: &Trace, span: &Span) -> bool { + fn should_drop_span(&self, _trace: &Trace, span: &Span) -> bool { if self.span_parsers.is_empty() { return false; } - let mut ctx = SpanFilterContext::new(span, trace.resource_tags()); + let mut ctx = SpanFilterContext::new(span, &_trace.attributes); for parser in &self.span_parsers { match parser.execute(&mut ctx) { @@ -153,32 +153,36 @@ mod tests { use saluki_common::collections::FastHashMap; use saluki_config::ConfigurationLoader; - use saluki_context::tags::TagSet; use saluki_core::{ components::{transforms::*, ComponentContext}, - data_model::event::{trace::Span, trace::Trace, Event}, + data_model::event::{trace::{AttributeValue, Span, Trace}, Event}, topology::{ComponentId, EventsBuffer}, }; use stringtheory::MetaString; use super::*; - fn make_span(trace_id: u64, span_id: u64, meta: HashMap) -> Span { + fn make_span(_trace_id: u64, span_id: u64, meta: HashMap) -> Span { let mut meta_map = FastHashMap::default(); for (k, v) in meta { meta_map.insert(MetaString::from(k), MetaString::from(v)); } - Span::new("svc", "op", "res", "web", trace_id, span_id, 0, 0, 1000, 0).with_meta(meta_map) + Span::new("svc", "op", "res", "web", span_id, 0, 0, 1000, 0).with_meta(meta_map) } fn make_trace(spans: Vec, resource_tags: Option>) -> Trace { - let mut tag_set = TagSet::default(); + let mut trace = Trace::new(spans); if let Some(tags) = resource_tags { - for t in tags { - tag_set.insert_tag(t); + for tag_str in tags { + if let Some((k, v)) = tag_str.split_once(':') { + trace.attributes.insert( + MetaString::from(k), + AttributeValue::String(MetaString::from(v)), + ); + } } } - Trace::new(spans, tag_set) + trace } fn span_count_in_buffer(buffer: &EventsBuffer) -> usize { @@ -498,6 +502,7 @@ mod tests { assert!(buffer.try_push(Event::Trace(trace)).is_none()); transform.transform_buffer(&mut buffer); assert_eq!(span_count_in_buffer(&buffer), 2); + use saluki_core::data_model::event::trace::AttributeValue; let remaining_labels: Vec = buffer .into_iter() .filter_map(|e| match e { @@ -506,10 +511,10 @@ mod tests { }) .flatten() .filter_map(|s| { - s.meta() + s.attributes .iter() .find(|(k, _)| k.as_ref() == "label") - .map(|(_, v)| v.as_ref().to_string()) + .and_then(|(_, v)| AttributeValue::as_string(v).map(|s| s.as_ref().to_string())) }) .collect(); assert_eq!( diff --git a/bin/agent-data-plane/src/components/ottl_filter_processor/span_context.rs b/bin/agent-data-plane/src/components/ottl_filter_processor/span_context.rs index f4b3f575095..0120f298269 100644 --- a/bin/agent-data-plane/src/components/ottl_filter_processor/span_context.rs +++ b/bin/agent-data-plane/src/components/ottl_filter_processor/span_context.rs @@ -11,8 +11,9 @@ use std::collections::HashMap; use std::sync::Arc; use ottl::{EvalContextFamily, Field, IndexExpr, PathAccessor, PathResolverMap, Value}; -use saluki_context::tags::TagSet; -use saluki_core::data_model::event::trace::Span; +use saluki_common::collections::FastHashMap; +use saluki_core::data_model::event::trace::{AttributeValue, Span}; +use stringtheory::MetaString; /// Family type for the span filter evaluation context. /// @@ -33,15 +34,15 @@ impl EvalContextFamily for SpanFilterFamily { pub struct SpanFilterContext<'a> { /// Reference to the span being evaluated. pub(super) span: &'a Span, - /// Reference to the trace's resource-level tags. - pub(super) resource_tags: &'a TagSet, + /// Reference to the trace's resource-level attributes. + pub(super) resource_attrs: &'a FastHashMap, } impl<'a> SpanFilterContext<'a> { - /// Creates a context from references to the current span and resource tags. + /// Creates a context from references to the current span and resource attributes. #[inline] - pub fn new(span: &'a Span, resource_tags: &'a TagSet) -> Self { - Self { span, resource_tags } + pub fn new(span: &'a Span, resource_attrs: &'a FastHashMap) -> Self { + Self { span, resource_attrs } } } @@ -54,11 +55,11 @@ pub struct SpanAttributesAccessor; impl PathAccessor for SpanAttributesAccessor { fn get<'a>(&self, ctx: &SpanFilterContext<'a>, fields: &[Field]) -> ottl::Result { let value = if let Some(IndexExpr::String(key)) = fields.first().and_then(|f| f.keys.first()) { - ctx.span - .meta() - .get(key.as_str()) - .map(|v| Value::string(v.as_ref())) - .unwrap_or(Value::Nil) + match ctx.span.attributes.get(key.as_str()) { + Some(AttributeValue::String(s)) => Value::string(s.as_ref()), + Some(AttributeValue::Float(f)) => Value::Float(*f), + Some(_) | None => Value::Nil, + } } else { Value::Nil }; @@ -85,11 +86,11 @@ impl PathAccessor for ResourceAttributesAccessor { fn get<'a>(&self, ctx: &SpanFilterContext<'a>, fields: &[Field]) -> ottl::Result { let attrs_field = fields.get(1); let value = if let Some(IndexExpr::String(key)) = attrs_field.and_then(|f| f.keys.first()) { - ctx.resource_tags - .get_single_tag(key.as_str()) - .and_then(|t| t.value()) - .map(Value::string) - .unwrap_or(Value::Nil) + match ctx.resource_attrs.get(key.as_str()) { + Some(AttributeValue::String(s)) => Value::string(s.as_ref()), + Some(AttributeValue::Float(f)) => Value::Float(*f), + Some(_) | None => Value::Nil, + } } else if attrs_field.is_none_or(|f| f.keys.is_empty()) { Value::Map(HashMap::new()) } else { diff --git a/bin/agent-data-plane/src/components/ottl_transform_processor/mod.rs b/bin/agent-data-plane/src/components/ottl_transform_processor/mod.rs index ea8d0354711..1211e0b3136 100644 --- a/bin/agent-data-plane/src/components/ottl_transform_processor/mod.rs +++ b/bin/agent-data-plane/src/components/ottl_transform_processor/mod.rs @@ -10,13 +10,14 @@ use async_trait::async_trait; use memory_accounting::{MemoryBounds, MemoryBoundsBuilder}; use ottl::{CallbackMap, EnumMap, OttlParser}; +use saluki_common::collections::FastHashMap; use saluki_config::GenericConfiguration; -use saluki_context::tags::TagSet; use saluki_core::{ components::{transforms::*, ComponentContext}, - data_model::event::trace::Span, + data_model::event::trace::{AttributeValue, Span}, topology::EventsBuffer, }; +use stringtheory::MetaString; use saluki_error::{generic_error, GenericError}; use tracing::{debug, error}; @@ -106,8 +107,8 @@ impl OttlTransform { /// Each statement is executed in order. For editor statements (e.g. `set`), the `where` /// clause is evaluated first; if it matches (or is absent), the editor function runs. /// Errors are handled according to `error_mode`. - fn transform_span(&self, span: &mut Span, resource_tags: &TagSet) { - let mut ctx = SpanTransformContext::new(span, resource_tags); + fn transform_span(&self, span: &mut Span, resource_attrs: &FastHashMap) { + let mut ctx = SpanTransformContext::new(span, resource_attrs); for parser in &self.span_parsers { match parser.execute(&mut ctx) { @@ -138,10 +139,11 @@ impl SynchronousTransform for OttlTransform { for event in event_buffer { if let Some(trace) = event.try_as_trace_mut() { - let resource_tags = trace.resource_tags().clone(); + let resource_attrs = std::mem::take(&mut trace.attributes); for span in trace.spans_mut() { - self.transform_span(span, &resource_tags); + self.transform_span(span, &resource_attrs); } + trace.attributes = resource_attrs; } } } @@ -153,12 +155,11 @@ mod tests { use saluki_common::collections::FastHashMap; use saluki_config::ConfigurationLoader; - use saluki_context::tags::TagSet; use saluki_core::{ components::{transforms::*, ComponentContext}, data_model::event::{ service_check::{CheckStatus, ServiceCheck}, - trace::{Span, Trace}, + trace::{AttributeValue, Span, Trace}, Event, }, topology::{ComponentId, EventsBuffer}, @@ -169,25 +170,31 @@ mod tests { // ---- Helpers ---- - fn make_span(trace_id: u64, span_id: u64, meta: HashMap) -> Span { + fn make_span(_trace_id: u64, span_id: u64, meta: HashMap) -> Span { let mut meta_map = FastHashMap::default(); for (k, v) in meta { meta_map.insert(MetaString::from(k), MetaString::from(v)); } - Span::new("svc", "op", "res", "web", trace_id, span_id, 0, 0, 1000, 0).with_meta(meta_map) + Span::new("svc", "op", "res", "web", span_id, 0, 0, 1000, 0).with_meta(meta_map) } fn make_trace(spans: Vec, resource_tags: Option>) -> Trace { - let mut tag_set = TagSet::default(); + let mut trace = Trace::new(spans); if let Some(tags) = resource_tags { - for t in tags { - tag_set.insert_tag(t); + for tag_str in tags { + if let Some((k, v)) = tag_str.split_once(':') { + trace.attributes.insert( + MetaString::from(k), + AttributeValue::String(MetaString::from(v)), + ); + } } } - Trace::new(spans, tag_set) + trace } fn get_span_attr(buffer: &EventsBuffer, span_index: usize, key: &str) -> Option { + use saluki_core::data_model::event::trace::AttributeValue; buffer .into_iter() .filter_map(|e| match e { @@ -196,7 +203,7 @@ mod tests { }) .flat_map(|spans| spans.iter()) .nth(span_index) - .and_then(|span| span.meta().get(key).map(|v| v.as_ref().to_string())) + .and_then(|span| span.attributes.get(key).and_then(AttributeValue::as_string).map(|v| v.as_ref().to_string())) } async fn build_transform(cfg_json: Option) -> Box { @@ -721,9 +728,10 @@ mod tests { }) .expect("trace should still be in buffer"); let tag_val = trace_out - .resource_tags() - .get_single_tag("key") - .and_then(|t| t.value().map(|v| v.to_string())); + .attributes + .get("key") + .and_then(AttributeValue::as_string) + .map(|v| v.as_ref().to_string()); assert_eq!( tag_val.as_deref(), Some("original"), @@ -815,7 +823,12 @@ mod tests { trace_span_x = t .spans() .first() - .and_then(|s| s.meta().get("x").map(|v| v.as_ref().to_string())); + .and_then(|s| { + s.attributes + .get("x") + .and_then(AttributeValue::as_string) + .map(|v| v.as_ref().to_string()) + }); } _ => {} } diff --git a/bin/agent-data-plane/src/components/ottl_transform_processor/span_context.rs b/bin/agent-data-plane/src/components/ottl_transform_processor/span_context.rs index 63a6e24fffe..29799231c14 100644 --- a/bin/agent-data-plane/src/components/ottl_transform_processor/span_context.rs +++ b/bin/agent-data-plane/src/components/ottl_transform_processor/span_context.rs @@ -15,8 +15,8 @@ use std::collections::HashMap; use std::sync::Arc; use ottl::{EvalContextFamily, Field, IndexExpr, PathAccessor, PathResolverMap, Value}; -use saluki_context::tags::TagSet; -use saluki_core::data_model::event::trace::Span; +use saluki_common::collections::FastHashMap; +use saluki_core::data_model::event::trace::{AttributeValue, Span}; use stringtheory::MetaString; /// Family type for the span transform evaluation context. @@ -39,34 +39,34 @@ impl EvalContextFamily for SpanTransformFamily { pub struct SpanTransformContext<'a> { /// Mutable reference to the span being transformed. pub(super) span: &'a mut Span, - /// Reference to the trace's resource-level tags (read-only). - pub(super) resource_tags: &'a TagSet, + /// Reference to the trace's resource-level attributes (read-only). + pub(super) resource_attrs: &'a FastHashMap, } impl<'a> SpanTransformContext<'a> { - /// Creates a context from a mutable span reference and immutable resource tags. + /// Creates a context from a mutable span reference and immutable resource attributes. #[inline] - pub fn new(span: &'a mut Span, resource_tags: &'a TagSet) -> Self { - Self { span, resource_tags } + pub fn new(span: &'a mut Span, resource_attrs: &'a FastHashMap) -> Self { + Self { span, resource_attrs } } } -/// Path accessor for `attributes` (span-level string metadata). +/// Path accessor for `attributes` (span-level attributes). /// -/// Reads from and writes to the span's `meta` map, which stores `MetaString` key-value -/// pairs. On `set`, string values are inserted directly; `Nil` removes the key; other -/// value types are converted to their display representation. +/// Reads from and writes to the span's `attributes` map. +/// On `set`, string values are inserted as `AttributeValue::String`; `Nil` removes the key; +/// numeric types become `AttributeValue::Float`; other types are stringified. #[derive(Debug)] pub struct SpanAttributesAccessor; impl PathAccessor for SpanAttributesAccessor { fn get<'a>(&self, ctx: &SpanTransformContext<'a>, fields: &[Field]) -> ottl::Result { let value = if let Some(IndexExpr::String(key)) = fields.first().and_then(|f| f.keys.first()) { - ctx.span - .meta() - .get(key.as_str()) - .map(|v| Value::string(v.as_ref())) - .unwrap_or(Value::Nil) + match ctx.span.attributes.get(key.as_str()) { + Some(AttributeValue::String(s)) => Value::string(s.as_ref()), + Some(AttributeValue::Float(f)) => Value::Float(*f), + Some(_) | None => Value::Nil, + } } else { Value::Nil }; @@ -77,27 +77,30 @@ impl PathAccessor for SpanAttributesAccessor { if let Some(IndexExpr::String(key)) = fields.first().and_then(|f| f.keys.first()) { match value { Value::Nil => { - ctx.span.meta_mut().remove(key.as_str()); + ctx.span.attributes.remove(key.as_str()); } Value::String(s) => { - ctx.span - .meta_mut() - .insert(MetaString::from(key.as_str()), MetaString::from(Arc::clone(s))); + ctx.span.attributes.insert( + MetaString::from(key.as_str()), + AttributeValue::String(MetaString::from(Arc::clone(s))), + ); } Value::Int(n) => { - ctx.span - .meta_mut() - .insert(MetaString::from(key.as_str()), MetaString::from(n.to_string().as_str())); + ctx.span.attributes.insert( + MetaString::from(key.as_str()), + AttributeValue::String(MetaString::from(n.to_string().as_str())), + ); } Value::Float(f) => { - ctx.span - .meta_mut() - .insert(MetaString::from(key.as_str()), MetaString::from(f.to_string().as_str())); + ctx.span.attributes.insert( + MetaString::from(key.as_str()), + AttributeValue::String(MetaString::from(f.to_string().as_str())), + ); } Value::Bool(b) => { - ctx.span.meta_mut().insert( + ctx.span.attributes.insert( MetaString::from(key.as_str()), - MetaString::from(if *b { "true" } else { "false" }), + AttributeValue::String(MetaString::from(if *b { "true" } else { "false" })), ); } _ => { @@ -123,11 +126,11 @@ impl PathAccessor for ResourceAttributesAccessor { fn get<'a>(&self, ctx: &SpanTransformContext<'a>, fields: &[Field]) -> ottl::Result { let attrs_field = fields.get(1); let value = if let Some(IndexExpr::String(key)) = attrs_field.and_then(|f| f.keys.first()) { - ctx.resource_tags - .get_single_tag(key.as_str()) - .and_then(|t| t.value()) - .map(Value::string) - .unwrap_or(Value::Nil) + match ctx.resource_attrs.get(key.as_str()) { + Some(AttributeValue::String(s)) => Value::string(s.as_ref()), + Some(AttributeValue::Float(f)) => Value::Float(*f), + Some(_) | None => Value::Nil, + } } else if attrs_field.is_none_or(|f| f.keys.is_empty()) { Value::Map(HashMap::new()) } else { diff --git a/bin/agent-data-plane/src/components/v1_apm_onboarding/mod.rs b/bin/agent-data-plane/src/components/v1_apm_onboarding/mod.rs new file mode 100644 index 00000000000..7adc7870064 --- /dev/null +++ b/bin/agent-data-plane/src/components/v1_apm_onboarding/mod.rs @@ -0,0 +1,136 @@ +use async_trait::async_trait; +use memory_accounting::{MemoryBounds, MemoryBoundsBuilder}; +use saluki_common::{ + collections::{FastHashSet, PrehashedHashMap}, + strings::unsigned_integer_to_string, +}; +use saluki_core::{ + components::{transforms::*, ComponentContext}, + data_model::event::{ + trace::{AttributeValue, Span}, + Event, + }, + topology::EventsBuffer, +}; +use saluki_error::GenericError; +use stringtheory::MetaString; +use tracing::debug; + +use super::install_info::InstallInfo; + +static META_TAG_INSTALL_ID: MetaString = MetaString::from_static("_dd.install.id"); +static META_TAG_INSTALL_TYPE: MetaString = MetaString::from_static("_dd.install.type"); +static META_TAG_INSTALL_TIME: MetaString = MetaString::from_static("_dd.install.time"); + +/// APM Onboarding synchronous transform. +/// +/// Enriches APM trace chunks on a service-by-service basis with metadata indicating that a given +/// service has been onboarded to Datadog APM. +#[derive(Default)] +pub struct V1ApmOnboardingConfiguration; + +#[async_trait] +impl SynchronousTransformBuilder for V1ApmOnboardingConfiguration { + async fn build(&self, _context: ComponentContext) -> Result, GenericError> { + let install_info = match InstallInfo::load_or_create().await { + Ok(info) => Some(info), + Err(e) => { + debug!(error = %e, "Failed to load or create install info. Skipping."); + None + } + }; + + Ok(Box::new(V1ApmOnboarding::from_install_info(install_info))) + } +} + +impl MemoryBounds for V1ApmOnboardingConfiguration { + fn specify_bounds(&self, builder: &mut MemoryBoundsBuilder) { + builder.minimum().with_single_value::("component struct"); + } +} + +pub struct V1ApmOnboarding { + install_info: Option, + first_span_by_service: FastHashSet, +} + +impl V1ApmOnboarding { + fn from_install_info(install_info: Option) -> Self { + Self { + install_info, + first_span_by_service: FastHashSet::default(), + } + } + + fn enrich_spans(&mut self, spans: &mut [Span]) { + let root_span = match get_root_span_mut(spans) { + Some(s) => s, + None => { + debug!("Failed to get the root span of the APM trace."); + return; + } + }; + + let service = MetaString::from(root_span.service()); + if !self.first_span_by_service.contains(&service) { + self.first_span_by_service.insert(service); + let install_info = self.install_info.as_ref().unwrap(); + add_onboarding_metadata(root_span, install_info); + } + } +} + +impl SynchronousTransform for V1ApmOnboarding { + fn transform_buffer(&mut self, event_buffer: &mut EventsBuffer) { + if self.install_info.is_none() { + return; + } + + for event in event_buffer { + if let Event::Trace(trace) = event { + self.enrich_spans(trace.spans_mut()); + } + } + } +} + +fn get_root_span_mut(spans: &mut [Span]) -> Option<&mut Span> { + if spans.is_empty() { + return None; + } + + let mut parent_to_child = PrehashedHashMap::default(); + + for (idx, span) in spans.iter().enumerate().rev() { + if span.parent_id() == 0 { + return Some(&mut spans[idx]); + } + parent_to_child.insert(span.parent_id(), idx); + } + + for span in spans.iter() { + parent_to_child.remove(&span.span_id()); + } + + if parent_to_child.len() != 1 { + debug!("Failed to reliably identify a root span for an APM trace."); + } + + if let Some(root_span_idx) = parent_to_child.values().next() { + return Some(&mut spans[*root_span_idx]); + } + + spans.last_mut() +} + +fn add_onboarding_metadata(span: &mut Span, install_info: &InstallInfo) { + let install_time = unsigned_integer_to_string(install_info.install_time); + add_meta_if_missing(span, META_TAG_INSTALL_ID.clone(), install_info.install_id.clone()); + add_meta_if_missing(span, META_TAG_INSTALL_TYPE.clone(), install_info.install_type.clone()); + add_meta_if_missing(span, META_TAG_INSTALL_TIME.clone(), install_time); +} + +fn add_meta_if_missing(span: &mut Span, key: MetaString, value: MetaString) { + span.attributes.entry(key).or_insert(AttributeValue::String(value)); +} diff --git a/bin/agent-data-plane/src/config.rs b/bin/agent-data-plane/src/config.rs index 6f4fc1189d2..c2aaeb97d54 100644 --- a/bin/agent-data-plane/src/config.rs +++ b/bin/agent-data-plane/src/config.rs @@ -13,6 +13,7 @@ pub struct DataPlaneConfiguration { secure_api_listen_address: ListenAddress, telemetry_enabled: bool, telemetry_listen_addr: ListenAddress, + apm: DataPlaneApmConfiguration, dogstatsd: DataPlaneDogStatsDConfiguration, otlp: DataPlaneOtlpConfiguration, } @@ -48,6 +49,7 @@ impl DataPlaneConfiguration { telemetry_listen_addr: config .try_get_typed("data_plane.telemetry_listen_addr")? .unwrap_or_else(|| ListenAddress::any_tcp(5102)), + apm: DataPlaneApmConfiguration::from_configuration(config)?, dogstatsd: DataPlaneDogStatsDConfiguration::from_configuration(config)?, otlp: DataPlaneOtlpConfiguration::from_configuration(config)?, }) @@ -97,6 +99,11 @@ impl DataPlaneConfiguration { &self.telemetry_listen_addr } + /// Returns a reference to the APM-specific data plane configuration. + pub const fn apm(&self) -> &DataPlaneApmConfiguration { + &self.apm + } + /// Returns a reference to the DogStatsD-specific data plane configuration. pub const fn dogstatsd(&self) -> &DataPlaneDogStatsDConfiguration { &self.dogstatsd @@ -109,7 +116,12 @@ impl DataPlaneConfiguration { /// Returns `true` if any data pipelines are enabled. pub const fn data_pipelines_enabled(&self) -> bool { - self.dogstatsd().enabled() || self.otlp().enabled() + self.topology_required() + } + + /// Returns `true` if the primary topology needs to be built and run. + pub const fn topology_required(&self) -> bool { + self.apm().enabled() || self.dogstatsd().enabled() || self.otlp().enabled() } /// Returns `true` if the metrics pipeline is required. @@ -142,6 +154,33 @@ impl DataPlaneConfiguration { // - OTLP is enabled and not in proxy mode or proxy mode is enabled and proxy traces are disabled self.otlp().enabled() && (!self.otlp().proxy().enabled() || !self.otlp().proxy().proxy_traces()) } + + /// Returns `true` if the APM pipeline is required. + /// + /// This indicates that the native APM trace ingestion pipeline (`apm_in` → `v1_traces_enrich` → + /// `v1_dd_traces_encode` / `v1_dd_apm_stats`) needs to be built. + pub const fn apm_pipeline_required(&self) -> bool { + self.apm().enabled() + } +} + +/// APM-specific data plane configuration. +#[derive(Clone, Debug)] +pub struct DataPlaneApmConfiguration { + enabled: bool, +} + +impl DataPlaneApmConfiguration { + fn from_configuration(config: &GenericConfiguration) -> Result { + Ok(Self { + enabled: config.try_get_typed("data_plane.apm.enabled")?.unwrap_or(false), + }) + } + + /// Returns `true` if the APM receiver is enabled. + pub const fn enabled(&self) -> bool { + self.enabled + } } /// DogStatsD-specific data plane configuration. @@ -297,6 +336,30 @@ mod tests { // `data_plane.dogstatsd.enabled`. These tests guard against a regression where ADP starts // reading `use_dogstatsd` directly, which would let ADP and the Core Agent disagree. + #[tokio::test] + async fn apm_pipeline_required_when_apm_enabled() { + let (config, _) = ConfigurationLoader::for_tests( + Some(json!({ "data_plane": { "apm": { "enabled": true } } })), + None, + false, + ) + .await; + let dp = DataPlaneConfiguration::from_configuration(&config).expect("parse config"); + assert!(dp.apm_pipeline_required()); + } + + #[tokio::test] + async fn apm_pipeline_not_required_when_apm_disabled() { + let (config, _) = ConfigurationLoader::for_tests( + Some(json!({ "data_plane": { "apm": { "enabled": false } } })), + None, + false, + ) + .await; + let dp = DataPlaneConfiguration::from_configuration(&config).expect("parse config"); + assert!(!dp.apm_pipeline_required()); + } + #[tokio::test] async fn use_dogstatsd_true_does_not_enable_dogstatsd() { let (config, _) = ConfigurationLoader::for_tests(Some(json!({ "use_dogstatsd": true })), None, false).await; diff --git a/lib/saluki-components/Cargo.toml b/lib/saluki-components/Cargo.toml index 6dadba74485..d05fb22fd09 100644 --- a/lib/saluki-components/Cargo.toml +++ b/lib/saluki-components/Cargo.toml @@ -47,6 +47,7 @@ prost = { workspace = true } protobuf = { workspace = true } rand = { workspace = true, features = ["std", "std_rng"] } regex = { workspace = true, features = ["unicode-perl"] } +rmp = { workspace = true } rmp-serde = { workspace = true } saluki-api = { workspace = true } saluki-common = { workspace = true } diff --git a/lib/saluki-components/src/common/datadog/mod.rs b/lib/saluki-components/src/common/datadog/mod.rs index f2e9d8258ff..fc87311c774 100644 --- a/lib/saluki-components/src/common/datadog/mod.rs +++ b/lib/saluki-components/src/common/datadog/mod.rs @@ -56,12 +56,16 @@ pub fn sample_by_rate(trace_id: u64, rate: f64) -> bool { pub fn get_trace_env(trace: &Trace, root_span_idx: usize) -> Option<&MetaString> { // logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/traceutil/trace.go#L19-L20 - let env = trace.spans().get(root_span_idx).and_then(|span| span.meta().get("env")); + use saluki_core::data_model::event::trace::AttributeValue; + let env = trace + .spans() + .get(root_span_idx) + .and_then(|span| span.attributes.get("env").and_then(AttributeValue::as_string)); match env { Some(env) => Some(env), None => { for span in trace.spans().iter() { - if let Some(env) = span.meta().get("env") { + if let Some(env) = span.attributes.get("env").and_then(AttributeValue::as_string) { return Some(env); } } diff --git a/lib/saluki-components/src/common/datadog/obfuscation.rs b/lib/saluki-components/src/common/datadog/obfuscation.rs index 4a6ae125f6f..2c5542eb75a 100644 --- a/lib/saluki-components/src/common/datadog/obfuscation.rs +++ b/lib/saluki-components/src/common/datadog/obfuscation.rs @@ -152,10 +152,18 @@ impl ObfuscationConfig { &self.credit_cards } + pub fn set_credit_cards(&mut self, credit_cards: CreditCardObfuscationConfig) { + self.credit_cards = credit_cards; + } + pub fn redis(&self) -> &RedisObfuscationConfig { &self.redis } + pub fn set_redis(&mut self, redis: RedisObfuscationConfig) { + self.redis = redis; + } + pub fn valkey(&self) -> &ValkeyObfuscationConfig { &self.valkey } diff --git a/lib/saluki-components/src/common/otlp/traces/transform.rs b/lib/saluki-components/src/common/otlp/traces/transform.rs index 953b46a890d..a33fe0b5837 100644 --- a/lib/saluki-components/src/common/otlp/traces/transform.rs +++ b/lib/saluki-components/src/common/otlp/traces/transform.rs @@ -32,7 +32,7 @@ use crate::common::otlp::traces::normalize::{ normalize_tag_value_into_unchecked, }; use crate::common::otlp::traces::normalize::{truncate_utf8, MAX_RESOURCE_LEN}; -use crate::common::otlp::traces::translator::{convert_span_id, convert_trace_id}; +use crate::common::otlp::traces::translator::convert_span_id; use crate::common::otlp::util::get_string_attribute; use crate::common::otlp::util::{ DEPLOYMENT_ENVIRONMENT_KEY, KEY_DATADOG_CONTAINER_ID, KEY_DATADOG_ENVIRONMENT, KEY_DATADOG_VERSION, @@ -117,6 +117,7 @@ pub fn otel_span_to_dd_span( interner, string_builder, ); + let mut meta_struct: FastHashMap> = FastHashMap::default(); for (dd_key, apm_key) in DD_NAMESPACED_TO_APM_CONVENTIONS { if let Some(value) = use_both_maps( @@ -136,6 +137,7 @@ pub fn otel_span_to_dd_span( attribute, &mut meta, &mut metrics, + &mut meta_struct, ignore_missing_fields, interner, string_builder, @@ -274,7 +276,7 @@ pub fn otel_span_to_dd_span( } } - dd_span.with_meta(Some(meta)).with_metrics(Some(metrics)) + dd_span.with_meta(Some(meta)).with_metrics(Some(metrics)).with_meta_struct(Some(meta_struct)) } // OtelSpanToDDSpanMinimal otelSpanToDDSpan converts an OTel span to a DD span. @@ -293,7 +295,6 @@ pub fn otel_to_dd_span_minimal( let resource_attributes = &otel_resource.attributes; let mut dd_span = DdSpan::default(); - let trace_id = convert_trace_id(&otel_span.trace_id); let span_id = convert_span_id(&otel_span.span_id); let parent_id = convert_span_id(&otel_span.parent_span_id); let start = otel_span.start_time_unix_nano; @@ -431,7 +432,6 @@ pub fn otel_to_dd_span_minimal( .with_name(name) .with_resource(resource) .with_span_type(span_type) - .with_trace_id(trace_id) .with_span_id(span_id) .with_parent_id(parent_id) .with_start(start) @@ -942,7 +942,8 @@ const SQL_DB_SYSTEMS: &[&str] = &[ fn map_attribute_generic( attribute: &KeyValue, meta: &mut FastHashMap, metrics: &mut FastHashMap, - ignore_missing_fields: bool, interner: &GenericMapInterner, string_builder: &mut StringBuilder, + meta_struct: &mut FastHashMap>, ignore_missing_fields: bool, + interner: &GenericMapInterner, string_builder: &mut StringBuilder, ) { if attribute.key.is_empty() { return; @@ -977,16 +978,10 @@ fn map_attribute_generic( ); } OtlpValue::BytesValue(bytes) => { - let placeholder = format!("<{} bytes>", bytes.len()); - conditionally_map_otlp_attribute_to_meta( - attribute.key.as_str(), - &placeholder, - meta, - metrics, - ignore_missing_fields, - interner, - string_builder, - ); + if !attribute.key.is_empty() { + let key = MetaString::from_interner(attribute.key.as_str(), interner); + meta_struct.insert(key, bytes.clone()); + } } OtlpValue::IntValue(i) => { conditionally_map_otlp_attribute_to_metric( @@ -1429,7 +1424,7 @@ fn use_both_maps_key_list( None } -fn get_otel_env( +pub(crate) fn get_otel_env( span_attributes: &[KeyValue], resource_attributes: &[KeyValue], ignore_missing_fields: bool, interner: &GenericMapInterner, string_builder: &mut StringBuilder, ) -> MetaString { @@ -1463,7 +1458,7 @@ fn get_otel_env( } // GetOTelVersion returns the version based on OTel span and resource attributes, with span taking precedence. -fn get_otel_version( +pub(crate) fn get_otel_version( span_attributes: &[KeyValue], resource_attributes: &[KeyValue], ignore_missing_fields: bool, interner: &GenericMapInterner, string_builder: &mut StringBuilder, ) -> MetaString { @@ -1496,7 +1491,7 @@ fn get_otel_version( MetaString::empty() } -fn get_otel_container_id( +pub(crate) fn get_otel_container_id( span_attributes: &[KeyValue], resource_attributes: &[KeyValue], ignore_missing_fields: bool, interner: &GenericMapInterner, string_builder: &mut StringBuilder, ) -> MetaString { @@ -1747,6 +1742,7 @@ mod tests { fn test_map_attribute_generic_matches_agent_rules() { let mut meta = FastHashMap::default(); let mut metrics = FastHashMap::default(); + let mut meta_struct: FastHashMap> = FastHashMap::default(); let interner = test_interner(); let mut string_builder = StringBuilder::new().with_interner(interner.clone()); @@ -1755,6 +1751,7 @@ mod tests { &http_attr, &mut meta, &mut metrics, + &mut meta_struct, false, &interner, &mut string_builder, @@ -1766,6 +1763,7 @@ mod tests { &sampling_attr, &mut meta, &mut metrics, + &mut meta_struct, false, &interner, &mut string_builder, @@ -1777,6 +1775,7 @@ mod tests { &analytics_attr, &mut meta, &mut metrics, + &mut meta_struct, false, &interner, &mut string_builder, @@ -1784,16 +1783,18 @@ mod tests { assert_eq!(metrics.get(EVENT_EXTRACTION_METRIC_KEY), Some(&1.0)); let dd_attr = kv_str("datadog.service", "svc"); - map_attribute_generic(&dd_attr, &mut meta, &mut metrics, false, &interner, &mut string_builder); + map_attribute_generic(&dd_attr, &mut meta, &mut metrics, &mut meta_struct, false, &interner, &mut string_builder); assert!(!meta.contains_key("datadog.service")); let mut meta_ignore = FastHashMap::default(); let mut metrics_ignore = FastHashMap::default(); + let mut meta_struct_ignore: FastHashMap> = FastHashMap::default(); let env_attr = kv_str("env", "prod"); map_attribute_generic( &env_attr, &mut meta_ignore, &mut metrics_ignore, + &mut meta_struct_ignore, true, &interner, &mut string_builder, @@ -2184,25 +2185,30 @@ mod tests { &mut string_builder, None, ); - let meta = dd_span.meta(); + + use saluki_core::data_model::event::trace::AttributeValue; + let get_meta_str = |key: &str| -> Option<&str> { + dd_span.attributes.get(key).and_then(AttributeValue::as_string).map(|s| s.as_ref()) + }; if tc.should_map { assert_eq!( - meta.get("db.name").map(|s| s.as_ref()), + get_meta_str("db.name"), Some(tc.expected_name), "test case: {}", tc.name ); } else if !tc.expected_name.is_empty() { assert_eq!( - meta.get("db.name").map(|s| s.as_ref()), + get_meta_str("db.name"), Some(tc.expected_name), "test case: {}", tc.name ); } else { + let val = get_meta_str("db.name"); assert!( - meta.get("db.name").is_none() || meta.get("db.name").map(|s| s.as_ref()) == Some(""), + val.is_none() || val == Some(""), "test case: {}", tc.name ); diff --git a/lib/saluki-components/src/common/otlp/traces/translator.rs b/lib/saluki-components/src/common/otlp/traces/translator.rs index bb1fc0cbc7d..9ff4696f437 100644 --- a/lib/saluki-components/src/common/otlp/traces/translator.rs +++ b/lib/saluki-components/src/common/otlp/traces/translator.rs @@ -2,22 +2,29 @@ use std::collections::hash_map::IntoIter; use std::num::NonZeroUsize; use std::sync::Arc; -use otlp_protos::opentelemetry::proto::common::v1::{self as otlp_common}; +use otlp_protos::opentelemetry::proto::common::v1::{self as otlp_common, any_value::Value as OtlpValue}; use otlp_protos::opentelemetry::proto::resource::v1::Resource as OtlpResource; use otlp_protos::opentelemetry::proto::trace::v1::ResourceSpans; use saluki_common::collections::FastHashMap; use saluki_common::strings::StringBuilder; -use saluki_context::tags::{SharedTagSet, TagSet}; -use saluki_core::data_model::event::trace::{Span as DdSpan, Trace, TraceSampling}; +use saluki_core::data_model::event::trace::{AttributeValue, Span as DdSpan, Trace}; use saluki_core::data_model::event::Event; use stringtheory::interning::GenericMapInterner; use stringtheory::MetaString; use crate::common::datadog::SAMPLING_PRIORITY_METRIC_KEY; use crate::common::otlp::config::TracesConfig; -use crate::common::otlp::traces::transform::{bytes_to_hex_lowercase, otel_span_to_dd_span, otlp_value_to_string}; +use crate::common::otlp::traces::transform::{ + bytes_to_hex_lowercase, get_otel_container_id, get_otel_env, get_otel_version, otel_span_to_dd_span, + otlp_value_to_string, +}; +use crate::common::otlp::util::get_string_attribute; use crate::common::otlp::Metrics; +const DATADOG_HOSTNAME_ATTR: &str = "datadog.host.name"; +const TELEMETRY_SDK_LANGUAGE: &str = "telemetry.sdk.language"; +const TELEMETRY_SDK_VERSION: &str = "telemetry.sdk.version"; + pub fn convert_trace_id(trace_id: &[u8]) -> u64 { if trace_id.len() < 8 { return 0; @@ -25,6 +32,16 @@ pub fn convert_trace_id(trace_id: &[u8]) -> u64 { u64::from_be_bytes((&trace_id[(trace_id.len() - 8)..]).try_into().unwrap_or_default()) } +/// Extracts the high 8 bytes of a 128-bit OTLP trace ID as a big-endian u64. +/// +/// Returns 0 if the trace ID is shorter than 16 bytes (e.g. a 64-bit-only ID). +pub fn convert_trace_id_high(trace_id: &[u8]) -> u64 { + if trace_id.len() < 16 { + return 0; + } + u64::from_be_bytes((&trace_id[..8]).try_into().unwrap_or_default()) +} + pub fn convert_span_id(span_id: &[u8]) -> u64 { if span_id.len() != 8 { return 0; @@ -32,30 +49,109 @@ pub fn convert_span_id(span_id: &[u8]) -> u64 { u64::from_be_bytes(span_id.try_into().unwrap_or_default()) } -fn resource_attributes_to_tagset( - attributes: &[otlp_common::KeyValue], string_builder: &mut StringBuilder, -) -> TagSet { - let mut tags = TagSet::with_capacity(attributes.len()); +/// Metadata extracted from OTLP resource attributes for the unified `Trace` fields. +/// +/// Built once per `ResourceSpans` batch and shared across all traces derived from +/// the same resource. +struct OtlpResourceMeta { + /// Resolved environment name. + env: MetaString, + /// Resolved hostname. + hostname: MetaString, + /// Resolved container ID. + container_id: MetaString, + /// Resolved application version. + app_version: MetaString, + /// Resolved tracer language name. + language_name: MetaString, + /// Resolved tracer SDK version. + tracer_version: MetaString, + /// All resource attributes as a typed map (for `Trace::attributes`). + attributes: FastHashMap, +} + +/// Extracts unified trace-level fields from OTLP resource attributes. +/// +/// All known fields are also inserted into the returned `attributes` map so that +/// downstream code can use a single map lookup regardless of whether a field is +/// explicitly modelled on `Trace`. +fn extract_resource_meta( + attributes: &[otlp_common::KeyValue], ignore_missing_fields: bool, interner: &GenericMapInterner, + string_builder: &mut StringBuilder, +) -> OtlpResourceMeta { + // Reuse the existing normalizing helpers (span_attrs = empty, resource_attrs = full). + let empty: &[otlp_common::KeyValue] = &[]; + + let env = get_otel_env(attributes, empty, ignore_missing_fields, interner, string_builder); + let app_version = get_otel_version(attributes, empty, ignore_missing_fields, interner, string_builder); + let container_id = get_otel_container_id(attributes, empty, ignore_missing_fields, interner, string_builder); + + let hostname = get_string_attribute(attributes, DATADOG_HOSTNAME_ATTR) + .filter(|s| !s.is_empty()) + .map(|s| MetaString::from_interner(s, interner)) + .unwrap_or_default(); + + let language_name = get_string_attribute(attributes, TELEMETRY_SDK_LANGUAGE) + .filter(|s| !s.is_empty()) + .map(|s| MetaString::from_interner(s, interner)) + .unwrap_or_default(); + + let tracer_version = get_string_attribute(attributes, TELEMETRY_SDK_VERSION) + .filter(|s| !s.is_empty()) + .map(|s| MetaString::from_interner(s, interner)) + .unwrap_or_default(); + // language_version is intentionally not populated for OTLP traces: OTLP has no standardised + // attribute for the language runtime version, so we leave it empty rather than guess. + + // Build the typed attributes map from all resource attributes. + let mut attr_map: FastHashMap = FastHashMap::default(); + attr_map.reserve(attributes.len()); for kv in attributes { - if let Some(key_value) = &kv.value { - if let Some(value) = &key_value.value { - if let Some(string_value) = otlp_value_to_string(value) { - string_builder.clear(); - let _ = string_builder.push_str(kv.key.as_str()); - let _ = string_builder.push(':'); - let _ = string_builder.push_str(string_value.as_str()); - tags.insert_tag(string_builder.to_meta_string()); + if kv.key.is_empty() { + continue; + } + let Some(wrapper) = &kv.value else { continue }; + let Some(value) = &wrapper.value else { continue }; + + let attr_value = match value { + OtlpValue::StringValue(s) => AttributeValue::String(MetaString::from_interner(s.as_str(), interner)), + OtlpValue::IntValue(i) => AttributeValue::Float(*i as f64), + OtlpValue::DoubleValue(d) => AttributeValue::Float(*d), + OtlpValue::BoolValue(b) => { + AttributeValue::String(MetaString::from_static(if *b { "true" } else { "false" })) + } + OtlpValue::BytesValue(b) => AttributeValue::Bytes(b.clone()), + _ => { + // Arrays and KVLists are stringified via JSON. + if let Some(s) = otlp_value_to_string(value) { + AttributeValue::String(MetaString::from_interner(s.as_str(), interner)) + } else { + continue; } } - } + }; + + let key = MetaString::from_interner(kv.key.as_str(), interner); + attr_map.insert(key, attr_value); + } + + OtlpResourceMeta { + env, + hostname, + container_id, + app_version, + language_name, + tracer_version, + attributes: attr_map, } - tags } struct TraceEntry { spans: Vec, priority: Option, trace_id_hex: Option, + /// High 8 bytes of the 128-bit trace ID (captured from the first span). + trace_id_high: u64, } pub struct OtlpTracesTranslator { @@ -81,7 +177,10 @@ impl OtlpTracesTranslator { let compute_top_level = self.config.enable_otlp_compute_top_level_by_span_kind; let interner = &self.interner; let string_builder = &mut self.string_builder; - let resource_tags = resource_attributes_to_tagset(&resource.attributes, string_builder).into_shared(); + + // Build unified resource metadata for the new Trace fields. + let resource_meta = extract_resource_meta(&resource.attributes, ignore_missing_fields, interner, string_builder); + let mut traces_by_id: FastHashMap = FastHashMap::default(); let trace_count_hint = resource_spans.scope_spans.len(); traces_by_id.reserve(trace_count_hint); @@ -92,10 +191,12 @@ impl OtlpTracesTranslator { metrics.spans_received().increment(scope_spans.spans.len() as u64); for span in scope_spans.spans { let trace_id = convert_trace_id(&span.trace_id); + let trace_id_high = convert_trace_id_high(&span.trace_id); let entry = traces_by_id.entry(trace_id).or_insert_with(|| TraceEntry { spans: Vec::new(), priority: None, trace_id_hex: None, + trace_id_high, }); if entry.trace_id_hex.is_none() { @@ -114,7 +215,7 @@ impl OtlpTracesTranslator { ); // Track last-seen priority for this trace (overwrites previous values) - if let Some(&priority) = dd_span.metrics().get(SAMPLING_PRIORITY_METRIC_KEY) { + if let Some(priority) = dd_span.attributes.get(SAMPLING_PRIORITY_METRIC_KEY).and_then(AttributeValue::as_float) { entry.priority = Some(priority as i32); } @@ -123,14 +224,14 @@ impl OtlpTracesTranslator { } OtlpTraceEventsIter { - resource_tags, + resource_meta, entries: traces_by_id.into_iter(), } } } struct OtlpTraceEventsIter { - resource_tags: SharedTagSet, + resource_meta: OtlpResourceMeta, entries: IntoIter, } @@ -138,17 +239,24 @@ impl Iterator for OtlpTraceEventsIter { type Item = Event; fn next(&mut self) -> Option { - for (_, entry) in self.entries.by_ref() { + for (trace_id_low, entry) in self.entries.by_ref() { if entry.spans.is_empty() { continue; } - let mut trace = Trace::new(entry.spans, self.resource_tags.clone()); + let mut trace = Trace::new(entry.spans); - // Set the trace-level sampling priority if one was found - if let Some(priority) = entry.priority { - trace.set_sampling(Some(TraceSampling::new(false, Some(priority), None, None))); - } + // ── Unified Trace fields ────────────────────────────────────────────── + trace.trace_id_low = trace_id_low; + trace.trace_id_high = entry.trace_id_high; + trace.priority = entry.priority; + trace.env = self.resource_meta.env.clone(); + trace.hostname = self.resource_meta.hostname.clone(); + trace.container_id = self.resource_meta.container_id.clone(); + trace.app_version = self.resource_meta.app_version.clone(); + trace.language_name = self.resource_meta.language_name.clone(); + trace.tracer_version = self.resource_meta.tracer_version.clone(); + trace.attributes = self.resource_meta.attributes.clone(); return Some(Event::Trace(trace)); } diff --git a/lib/saluki-components/src/common/otlp/util.rs b/lib/saluki-components/src/common/otlp/util.rs index 11523bdd294..c2811a290d4 100644 --- a/lib/saluki-components/src/common/otlp/util.rs +++ b/lib/saluki-components/src/common/otlp/util.rs @@ -8,13 +8,14 @@ use opentelemetry_semantic_conventions::resource::*; use otlp_protos::opentelemetry::proto::common::v1::{self as otlp_common, any_value::Value}; use saluki_common::collections::{FastHashMap, FastHashSet}; use saluki_context::tags::TagSet; +use saluki_core::data_model::event::trace::AttributeValue; +use stringtheory::MetaString; // ============================================================================ // Datadog attribute key constants shared across the encoder and translator // ============================================================================ pub const KEY_DATADOG_VERSION: &str = "datadog.version"; -pub const KEY_DATADOG_HOST: &str = "datadog.host"; pub const KEY_DATADOG_ENVIRONMENT: &str = "datadog.env"; pub const KEY_DATADOG_CONTAINER_ID: &str = "datadog.container_id"; pub const KEY_DATADOG_CONTAINER_TAGS: &str = "datadog.container_tags"; @@ -142,38 +143,6 @@ pub fn extract_container_tags_from_resource_attributes(attributes: &[otlp_common } } -/// Extracts container tags from a resource tagset and inserts them into the provided TagSet. -/// -/// This mirrors `extract_container_tags_from_resource_attributes`, but operates on a `TagSet` representation of -/// the resource. -pub fn extract_container_tags_from_resource_tagset(resource_tags: &TagSet, tags: &mut TagSet) { - let mut extracted_tags = FastHashSet::default(); - - for tag in resource_tags { - let Some(value) = tag.value() else { - continue; - }; - - // Semantic Conventions - if let Some(datadog_key) = CONTAINER_MAPPINGS.get(tag.name()) { - tags.insert_tag(format!("{}:{}", datadog_key, value)); - extracted_tags.insert(*datadog_key); - } - - // Custom (datadog.container.tag namespace) - if tag.name().starts_with(CUSTOM_CONTAINER_TAG_PREFIX) { - if let Some(custom_key) = tag.name().get(CUSTOM_CONTAINER_TAG_PREFIX.len()..) { - if !custom_key.is_empty() { - // Do not replace if set via semantic conventions mappings. - if !extracted_tags.insert(custom_key) { - tags.insert_tag(format!("{}:{}", custom_key, value)); - } - } - } - } - } -} - /// Resolves the source metadata from OTLP resource attributes. /// /// This determines whether the telemetry came from a hostname or serverless environment. @@ -208,18 +177,27 @@ pub fn resource_to_source(resource: &otlp_protos::opentelemetry::proto::resource None } -/// Resolves the source metadata from a resource `TagSet`. +/// Resolves the source metadata from a typed attribute map. /// -/// This is equivalent to `resource_to_source`, but avoids the OTLP protobuf resource type. -pub fn tags_to_source(resource_tags: &TagSet) -> Option { - let get = |key: &str| -> Option<&str> { resource_tags.get_single_tag(key).and_then(|t| t.value()) }; +/// Equivalent to [`tags_to_source`] but works on a `FastHashMap` +/// instead of a `TagSet`. +pub fn source_from_attributes_map(attributes: &FastHashMap) -> Option { + let get_str = |key: &str| -> Option<&str> { + attributes.get(key).and_then(|v| { + if let AttributeValue::String(s) = v { + Some(s.as_ref()) + } else { + None + } + }) + }; // AWS ECS Fargate - if get(CLOUD_PROVIDER) == Some("aws") - && get(opentelemetry_semantic_conventions::resource::CLOUD_PLATFORM) == Some("aws_ecs") - && get(opentelemetry_semantic_conventions::resource::AWS_ECS_LAUNCHTYPE) == Some("fargate") + if get_str(CLOUD_PROVIDER) == Some("aws") + && get_str(opentelemetry_semantic_conventions::resource::CLOUD_PLATFORM) == Some("aws_ecs") + && get_str(opentelemetry_semantic_conventions::resource::AWS_ECS_LAUNCHTYPE) == Some("fargate") { - if let Some(task_arn) = get(AWS_ECS_TASK_ARN) { + if let Some(task_arn) = get_str(AWS_ECS_TASK_ARN) { return Some(Source { kind: SourceKind::AwsEcsFargateKind, identifier: task_arn.to_string(), @@ -228,7 +206,7 @@ pub fn tags_to_source(resource_tags: &TagSet) -> Option { } // Hostname from attributes - if let Some(host_name) = get(opentelemetry_semantic_conventions::resource::HOST_NAME) { + if let Some(host_name) = get_str(opentelemetry_semantic_conventions::resource::HOST_NAME) { return Some(Source { kind: SourceKind::HostnameKind, identifier: host_name.to_string(), @@ -237,3 +215,38 @@ pub fn tags_to_source(resource_tags: &TagSet) -> Option { None } + +/// Extracts container tags from a typed attribute map and inserts them into `tags`. +/// +/// Equivalent to [`extract_container_tags_from_resource_tagset`] but works on a +/// `FastHashMap` instead of a `TagSet`. +pub fn extract_container_tags_from_attributes_map( + attributes: &FastHashMap, tags: &mut TagSet, +) { + let mut extracted_tags = FastHashSet::default(); + + for (key, value) in attributes { + let s_val = match value { + AttributeValue::String(s) => s.as_ref(), + _ => continue, + }; + + // Semantic Conventions + if let Some(datadog_key) = CONTAINER_MAPPINGS.get(key.as_ref()) { + tags.insert_tag(format!("{}:{}", datadog_key, s_val)); + extracted_tags.insert(*datadog_key); + } + + // Custom (datadog.container.tag namespace) + if key.as_ref().starts_with(CUSTOM_CONTAINER_TAG_PREFIX) { + if let Some(custom_key) = key.as_ref().get(CUSTOM_CONTAINER_TAG_PREFIX.len()..) { + if !custom_key.is_empty() { + // Do not replace if set via semantic conventions mappings. + if !extracted_tags.insert(custom_key) { + tags.insert_tag(format!("{}:{}", custom_key, s_val)); + } + } + } + } + } +} diff --git a/lib/saluki-components/src/encoders/datadog/mod.rs b/lib/saluki-components/src/encoders/datadog/mod.rs index 7106b1e1c71..58c315ad4f9 100644 --- a/lib/saluki-components/src/encoders/datadog/mod.rs +++ b/lib/saluki-components/src/encoders/datadog/mod.rs @@ -14,5 +14,5 @@ mod stats; #[allow(unused)] pub use self::stats::DatadogApmStatsEncoderConfiguration; -mod traces; -pub use self::traces::DatadogTraceConfiguration; +mod v1_traces; +pub use self::v1_traces::V1DatadogTraceConfiguration; diff --git a/lib/saluki-components/src/encoders/datadog/traces/mod.rs b/lib/saluki-components/src/encoders/datadog/traces/mod.rs deleted file mode 100644 index e7c0e5b6f42..00000000000 --- a/lib/saluki-components/src/encoders/datadog/traces/mod.rs +++ /dev/null @@ -1,987 +0,0 @@ -#![allow(dead_code)] - -use std::{fmt::Write, time::Duration}; - -use async_trait::async_trait; -use datadog_protos::traces::builders::{ - attribute_any_value::AttributeAnyValueType, attribute_array_value::AttributeArrayValueType, AgentPayloadBuilder, - AttributeAnyValueBuilder, AttributeArrayValueBuilder, -}; -use facet::Facet; -use http::{uri::PathAndQuery, HeaderName, HeaderValue, Method, Uri}; -use memory_accounting::{MemoryBounds, MemoryBoundsBuilder}; -use opentelemetry_semantic_conventions::resource::{ - CONTAINER_ID, DEPLOYMENT_ENVIRONMENT_NAME, K8S_POD_UID, SERVICE_VERSION, -}; -use piecemeal::{ScratchBuffer, ScratchWriter}; -use saluki_common::strings::StringBuilder; -use saluki_common::task::HandleExt as _; -use saluki_config::GenericConfiguration; -use saluki_context::tags::TagSet; -use saluki_core::data_model::event::trace::{AttributeScalarValue, AttributeValue, Span as DdSpan}; -use saluki_core::topology::{EventsBuffer, PayloadsBuffer}; -use saluki_core::{ - components::{encoders::*, ComponentContext}, - data_model::{ - event::{trace::Trace, EventType}, - payload::{HttpPayload, Payload, PayloadMetadata, PayloadType}, - }, - observability::ComponentMetricsExt as _, -}; -use saluki_env::host::providers::BoxedHostProvider; -use saluki_env::{EnvironmentProvider, HostProvider}; -use saluki_error::generic_error; -use saluki_error::{ErrorContext as _, GenericError}; -use saluki_io::compression::CompressionScheme; -use saluki_metrics::MetricsBuilder; -use serde::Deserialize; -use stringtheory::MetaString; -use tokio::{ - select, - sync::mpsc::{self, Receiver, Sender}, - time::sleep, -}; -use tracing::{debug, error}; - -use crate::common::datadog::{ - apm::ApmConfig, - io::RB_BUFFER_CHUNK_SIZE, - request_builder::{EndpointEncoder, RequestBuilder}, - telemetry::ComponentTelemetry, - DEFAULT_INTAKE_COMPRESSED_SIZE_LIMIT, DEFAULT_INTAKE_UNCOMPRESSED_SIZE_LIMIT, TAG_DECISION_MAKER, -}; -use crate::common::otlp::config::TracesConfig; -use crate::common::otlp::util::{ - extract_container_tags_from_resource_tagset, tags_to_source, Source as OtlpSource, SourceKind as OtlpSourceKind, - DEPLOYMENT_ENVIRONMENT_KEY, KEY_DATADOG_CONTAINER_ID, KEY_DATADOG_CONTAINER_TAGS, KEY_DATADOG_ENVIRONMENT, - KEY_DATADOG_HOST, KEY_DATADOG_VERSION, -}; - -const CONTAINER_TAGS_META_KEY: &str = "_dd.tags.container"; -const MAX_TRACES_PER_PAYLOAD: usize = 10000; -static CONTENT_TYPE_PROTOBUF: HeaderValue = HeaderValue::from_static("application/x-protobuf"); - -// Sampling metadata keys / values. -const TAG_OTLP_SAMPLING_RATE: &str = "_dd.otlp_sr"; -const DEFAULT_CHUNK_PRIORITY: i32 = 1; // PRIORITY_AUTO_KEEP - -fn default_serializer_compressor_kind() -> String { - "zstd".to_string() -} - -const fn default_zstd_compressor_level() -> i32 { - 3 -} - -const fn default_flush_timeout_secs() -> u64 { - 2 -} - -fn default_env() -> String { - "none".to_string() -} - -/// Configuration for the Datadog Traces encoder. -/// -/// This encoder converts trace events into Datadog's TracerPayload protobuf format and sends them -/// to the Datadog traces intake endpoint (`/api/v0.2/traces`). It handles batching, compression, -/// and enrichment with metadata such as hostname, environment, and container tags. -#[derive(Deserialize, Facet)] -pub struct DatadogTraceConfiguration { - #[serde( - rename = "serializer_compressor_kind", // renames the field in the user_configuration from "serializer_compressor_kind" to "compressor_kind". - default = "default_serializer_compressor_kind" - )] - compressor_kind: String, - - #[serde( - rename = "serializer_zstd_compressor_level", - default = "default_zstd_compressor_level" - )] - zstd_compressor_level: i32, - - /// Flush timeout for pending requests, in seconds. - /// - /// When the encoder has written traces to the in-flight request payload, but it has not yet reached the - /// payload size limits that would force the payload to be flushed, the encoder will wait for a period of time - /// before flushing the in-flight request payload. - /// - /// Defaults to 2 seconds. - #[serde(default = "default_flush_timeout_secs")] - flush_timeout_secs: u64, - - #[serde(skip)] - default_hostname: Option, - - #[serde(skip)] - version: String, - - #[serde(skip)] - #[facet(opaque)] - apm_config: ApmConfig, - - #[serde(skip)] - #[facet(opaque)] - otlp_traces: TracesConfig, - - #[serde(default = "default_env")] - env: String, -} - -impl DatadogTraceConfiguration { - /// Creates a new `DatadogTraceConfiguration` from the given configuration. - pub fn from_configuration(config: &GenericConfiguration) -> Result { - let mut trace_config: Self = config.as_typed()?; - - let app_details = saluki_metadata::get_app_details(); - trace_config.version = format!("agent-data-plane/{}", app_details.version().raw()); - - trace_config.apm_config = ApmConfig::from_configuration(config)?; - trace_config.otlp_traces = config.try_get_typed("otlp_config.traces")?.unwrap_or_default(); - - Ok(trace_config) - } -} - -impl DatadogTraceConfiguration { - /// Sets the default_hostname using the environment provider - pub async fn with_environment_provider(mut self, environment_provider: E) -> Result - where - E: EnvironmentProvider, - { - let host_provider = environment_provider.host(); - let hostname = host_provider.get_hostname().await?; - self.default_hostname = Some(hostname); - Ok(self) - } -} - -#[async_trait] -impl EncoderBuilder for DatadogTraceConfiguration { - fn input_event_type(&self) -> EventType { - EventType::Trace - } - - fn output_payload_type(&self) -> PayloadType { - PayloadType::Http - } - - async fn build(&self, context: ComponentContext) -> Result, GenericError> { - let metrics_builder = MetricsBuilder::from_component_context(&context); - let telemetry = ComponentTelemetry::from_builder(&metrics_builder); - let compression_scheme = CompressionScheme::new(&self.compressor_kind, self.zstd_compressor_level); - - let default_hostname = self.default_hostname.clone().unwrap_or_default(); - let default_hostname = MetaString::from(default_hostname); - - // Create request builder for traces which is used to generate HTTP requests. - - let mut trace_rb = RequestBuilder::new( - TraceEndpointEncoder::new( - default_hostname, - self.version.clone(), - self.env.clone(), - self.apm_config.clone(), - self.otlp_traces.clone(), - ), - compression_scheme, - RB_BUFFER_CHUNK_SIZE, - ) - .await?; - trace_rb.with_max_inputs_per_payload(MAX_TRACES_PER_PAYLOAD); - - let flush_timeout = match self.flush_timeout_secs { - // We always give ourselves a minimum flush timeout of 10ms to allow for some very minimal amount of - // batching, while still practically flushing things almost immediately. - 0 => Duration::from_millis(10), - secs => Duration::from_secs(secs), - }; - - Ok(Box::new(DatadogTrace { - trace_rb, - telemetry, - flush_timeout, - })) - } -} - -impl MemoryBounds for DatadogTraceConfiguration { - fn specify_bounds(&self, builder: &mut MemoryBoundsBuilder) { - // TODO: How do we properly represent the requests we can generate that may be sitting around in-flight? - builder - .minimum() - .with_single_value::("component struct") - .with_array::("request builder events channel", 8) - .with_array::("request builder payloads channel", 8); - - builder - .firm() - .with_array::("traces split re-encode buffer", MAX_TRACES_PER_PAYLOAD); - } -} - -pub struct DatadogTrace { - trace_rb: RequestBuilder, - telemetry: ComponentTelemetry, - flush_timeout: Duration, -} - -// Encodes Trace events to TracerPayloads. -#[async_trait] -impl Encoder for DatadogTrace { - async fn run(mut self: Box, mut context: EncoderContext) -> Result<(), GenericError> { - let Self { - trace_rb, - telemetry, - flush_timeout, - } = *self; - - let mut health = context.take_health_handle(); - - // The encoder runs two async loops, the main encoder loop and the request builder loop, - // this channel is used to send events from the main encoder loop to the request builder loop safely. - let (events_tx, events_rx) = mpsc::channel(8); - // adds a channel to send payloads to the dispatcher and a channel to receive them. - let (payloads_tx, mut payloads_rx) = mpsc::channel(8); - let request_builder_fut = run_request_builder(trace_rb, telemetry, events_rx, payloads_tx, flush_timeout); - // Spawn the request builder task on the global thread pool, this task is responsible for encoding traces and flushing requests. - let request_builder_handle = context - .topology_context() - .global_thread_pool() // Use the shared Tokio runtime thread pool. - .spawn_traced_named("dd-traces-request-builder", request_builder_fut); - - health.mark_ready(); - debug!("Datadog Trace encoder started."); - - loop { - select! { - biased; // makes the branches of the select statement be evaluated in order. - - _ = health.live() => continue, - maybe_payload = payloads_rx.recv() => match maybe_payload { - Some(payload) => { - // Dispatch an HTTP payload to the dispatcher. - if let Err(e) = context.dispatcher().dispatch(payload).await { - error!("Failed to dispatch payload: {}", e); - } - } - None => break, - }, - maybe_event_buffer = context.events().next() => match maybe_event_buffer { - Some(event_buffer) => events_tx.send(event_buffer).await - .error_context("Failed to send event buffer to request builder task.")?, - None => break, - }, - } - } - - // Drop the events sender, which signals the request builder task to stop. - drop(events_tx); - - // Continue draining the payloads receiver until it is closed. - while let Some(payload) = payloads_rx.recv().await { - if let Err(e) = context.dispatcher().dispatch(payload).await { - error!("Failed to dispatch payload: {}", e); - } - } - - // Request build task should now be stopped. - match request_builder_handle.await { - Ok(Ok(())) => debug!("Request builder task stopped."), - Ok(Err(e)) => error!(error = %e, "Request builder task failed."), - Err(e) => error!(error = %e, "Request builder task panicked."), - } - - debug!("Datadog Trace encoder stopped."); - - Ok(()) - } -} - -async fn run_request_builder( - mut trace_request_builder: RequestBuilder, telemetry: ComponentTelemetry, - mut events_rx: Receiver, payloads_tx: Sender, flush_timeout: std::time::Duration, -) -> Result<(), GenericError> { - let mut pending_flush = false; - let pending_flush_timeout = sleep(flush_timeout); - tokio::pin!(pending_flush_timeout); - - loop { - select! { - Some(event_buffer) = events_rx.recv() => { - for event in event_buffer { - let trace = match event.try_into_trace() { - Some(trace) => trace, - None => continue, - }; - // Encode the trace. If we get it back, that means the current request is full, and we need to - // flush it before we can try to encode the trace again. - let trace_to_retry = match trace_request_builder.encode(trace).await { - Ok(None) => continue, - Ok(Some(trace)) => trace, - Err(e) => { - error!(error = %e, "Failed to encode trace."); - telemetry.events_dropped_encoder().increment(1); - continue; - } - }; - - let maybe_requests = trace_request_builder.flush().await; - if maybe_requests.is_empty() { - panic!("builder told us to flush, but gave us nothing"); - } - - for maybe_request in maybe_requests { - match maybe_request { - Ok((events, request)) => { - let payload_meta = PayloadMetadata::from_event_count(events); - let http_payload = HttpPayload::new(payload_meta, request); - let payload = Payload::Http(http_payload); - - payloads_tx.send(payload).await - .map_err(|_| generic_error!("Failed to send payload to encoder."))?; - }, - Err(e) => if e.is_recoverable() { - // If the error is recoverable, we'll hold on to the trace to retry it later. - continue; - } else { - return Err(GenericError::from(e).context("Failed to flush request.")); - } - } - } - - // Now try to encode the trace again. - if let Err(e) = trace_request_builder.encode(trace_to_retry).await { - error!(error = %e, "Failed to encode trace."); - telemetry.events_dropped_encoder().increment(1); - } - } - - debug!("Processed event buffer."); - - // If we're not already pending a flush, we'll start the countdown. - if !pending_flush { - pending_flush_timeout.as_mut().reset(tokio::time::Instant::now() + flush_timeout); - pending_flush = true; - } - }, - _ = &mut pending_flush_timeout, if pending_flush => { - debug!("Flushing pending request(s)."); - - pending_flush = false; - - // Once we've encoded and written all traces, we flush the request builders to generate a request with - // anything left over. Again, we'll enqueue those requests to be sent immediately. - let maybe_trace_requests = trace_request_builder.flush().await; - for maybe_request in maybe_trace_requests { - match maybe_request { - Ok((events, request)) => { - let payload_meta = PayloadMetadata::from_event_count(events); - let http_payload = HttpPayload::new(payload_meta, request); - let payload = Payload::Http(http_payload); - - payloads_tx.send(payload).await - .map_err(|_| generic_error!("Failed to send payload to encoder."))?; - }, - Err(e) => if e.is_recoverable() { - continue; - } else { - return Err(GenericError::from(e).context("Failed to flush request.")); - } - } - } - - debug!("All flushed requests sent to I/O task. Waiting for next event buffer..."); - }, - - // Event buffers channel has been closed, and we have no pending flushing, so we're all done. - else => break, - } - } - - Ok(()) -} - -#[derive(Debug)] -struct TraceEndpointEncoder { - scratch: ScratchWriter>, - default_hostname: MetaString, - agent_hostname: String, - version: String, - env: String, - apm_config: ApmConfig, - otlp_traces: TracesConfig, - string_builder: StringBuilder, - error_tracking_standalone: bool, - extra_headers: Vec<(HeaderName, HeaderValue)>, -} - -impl TraceEndpointEncoder { - fn new( - default_hostname: MetaString, version: String, env: String, apm_config: ApmConfig, otlp_traces: TracesConfig, - ) -> Self { - let error_tracking_standalone = apm_config.error_tracking_standalone_enabled(); - let extra_headers = if error_tracking_standalone { - vec![( - HeaderName::from_static("x-datadog-error-tracking-standalone"), - HeaderValue::from_static("true"), - )] - } else { - Vec::new() - }; - Self { - scratch: ScratchWriter::new(Vec::with_capacity(8192)), - agent_hostname: default_hostname.as_ref().to_string(), - default_hostname, - version, - env, - apm_config, - otlp_traces, - string_builder: StringBuilder::new(), - error_tracking_standalone, - extra_headers, - } - } - - fn encode_tracer_payload(&mut self, trace: &Trace, output_buffer: &mut Vec) -> std::io::Result<()> { - let sampling_rate = self.sampling_rate(); - let resource_tags = trace.resource_tags(); - let first_span = trace.spans().first(); - let source = tags_to_source(resource_tags); - - // Resolve metadata from resource tags. - let container_id = resolve_container_id(resource_tags, first_span); - let lang = get_resource_tag_value(resource_tags, "telemetry.sdk.language"); - let sdk_version = get_resource_tag_value(resource_tags, "telemetry.sdk.version").unwrap_or(""); - let tracer_version = format!("otlp-{}", sdk_version); - let container_tags = resolve_container_tags( - resource_tags, - source.as_ref(), - self.otlp_traces.ignore_missing_datadog_fields, - ); - let env = resolve_env(resource_tags, self.otlp_traces.ignore_missing_datadog_fields); - let hostname = resolve_hostname( - resource_tags, - source.as_ref(), - Some(self.default_hostname.as_ref()), - self.otlp_traces.ignore_missing_datadog_fields, - ); - let app_version = resolve_app_version(resource_tags); - - // Resolve sampling metadata. - let (priority, dropped_trace, decision_maker, otlp_sr) = match trace.sampling() { - Some(sampling) => ( - sampling.priority.unwrap_or(DEFAULT_CHUNK_PRIORITY), - sampling.dropped_trace, - sampling.decision_maker.as_deref(), - sampling.otlp_sampling_rate.unwrap_or(sampling_rate), - ), - None => (DEFAULT_CHUNK_PRIORITY, false, None, sampling_rate), - }; - - // Now incrementally build the payload. - let mut ap_builder = AgentPayloadBuilder::new(&mut self.scratch); - - ap_builder - .host_name(&self.agent_hostname)? - .env(&self.env)? - .agent_version(&self.version)? - .target_tps(self.apm_config.target_traces_per_second())? - .error_tps(self.apm_config.errors_per_second())?; - - ap_builder.add_tracer_payloads(|tp| { - if let Some(cid) = container_id { - tp.container_id(cid)?; - } - if let Some(l) = lang { - tp.language_name(l)?; - } - tp.tracer_version(&tracer_version)?; - - // Encode the single TraceChunk containing all spans. - tp.add_chunks(|chunk| { - chunk.priority(priority)?; - - for span in trace.spans() { - chunk.add_spans(|s| { - s.service(span.service())? - .name(span.name())? - .resource(span.resource())? - .trace_id(span.trace_id())? - .span_id(span.span_id())? - .parent_id(span.parent_id())? - .start(span.start() as i64)? - .duration(span.duration() as i64)? - .error(span.error())?; - - { - let mut meta = s.meta(); - for (k, v) in span.meta() { - meta.write_entry(k.as_ref(), v.as_ref())?; - } - } - - { - let mut metrics = s.metrics(); - for (k, v) in span.metrics() { - metrics.write_entry(k.as_ref(), *v)?; - } - } - - s.type_(span.span_type())?; - - { - let mut ms = s.meta_struct(); - for (k, v) in span.meta_struct() { - ms.write_entry(k.as_ref(), v.as_slice())?; - } - } - - for link in span.span_links() { - s.add_span_links(|sl| { - sl.trace_id(link.trace_id())? - .trace_id_high(link.trace_id_high())? - .span_id(link.span_id())?; - { - let mut attrs = sl.attributes(); - for (k, v) in link.attributes() { - attrs.write_entry(&**k, &**v)?; - } - } - let tracestate = link.tracestate().to_string(); - sl.tracestate(tracestate.as_str())?.flags(link.flags())?; - Ok(()) - })?; - } - - for event in span.span_events() { - s.add_span_events(|se| { - se.time_unix_nano(event.time_unix_nano())?.name(event.name())?; - { - let mut attrs = se.attributes(); - for (k, v) in event.attributes() { - attrs.write_entry(&**k, |av| encode_attribute_value(av, v))?; - } - } - Ok(()) - })?; - } - - Ok(()) - })?; - } - - // Chunk tags. - { - let mut tags = chunk.tags(); - if let Some(dm) = decision_maker { - tags.write_entry(TAG_DECISION_MAKER, dm)?; - } - if self.error_tracking_standalone { - let trace_has_error = trace.spans().iter().any(|span| { - span.error() != 0 - || span - .meta() - .get("_dd.span_events.has_exception") - .is_some_and(|v| v == "true") - }); - if trace_has_error { - tags.write_entry("_dd.error_tracking_standalone.error", "true")?; - } - } - - self.string_builder.clear(); - write!(&mut self.string_builder, "{:.2}", otlp_sr) - .expect("should never fail to format sampling rate"); - tags.write_entry(TAG_OTLP_SAMPLING_RATE, self.string_builder.as_str())?; - } - - if dropped_trace { - chunk.dropped_trace(true)?; - } - - Ok(()) - })?; - - // Tracer payload tags. - if let Some(ct) = container_tags { - let mut tags = tp.tags(); - tags.write_entry(CONTAINER_TAGS_META_KEY, &*ct)?; - } - - if let Some(e) = env { - tp.env(e)?; - } - if let Some(h) = hostname { - tp.hostname(h)?; - } - if let Some(av) = app_version { - tp.app_version(av)?; - } - - Ok(()) - })?; - - ap_builder.finish(output_buffer)?; - - Ok(()) - } - - fn sampling_rate(&self) -> f64 { - let rate = self.otlp_traces.probabilistic_sampler.sampling_percentage / 100.0; - if rate <= 0.0 || rate >= 1.0 { - return 1.0; - } - rate - } -} - -impl EndpointEncoder for TraceEndpointEncoder { - type Input = Trace; - type EncodeError = std::io::Error; - fn encoder_name() -> &'static str { - "traces" - } - - fn compressed_size_limit(&self) -> usize { - DEFAULT_INTAKE_COMPRESSED_SIZE_LIMIT - } - - fn uncompressed_size_limit(&self) -> usize { - DEFAULT_INTAKE_UNCOMPRESSED_SIZE_LIMIT - } - - fn encode(&mut self, trace: &Self::Input, buffer: &mut Vec) -> Result<(), Self::EncodeError> { - self.encode_tracer_payload(trace, buffer) - } - - fn endpoint_uri(&self) -> Uri { - PathAndQuery::from_static("/api/v0.2/traces").into() - } - - fn endpoint_method(&self) -> Method { - Method::POST - } - - fn content_type(&self) -> HeaderValue { - CONTENT_TYPE_PROTOBUF.clone() - } - - fn additional_headers(&self) -> &[(HeaderName, HeaderValue)] { - &self.extra_headers - } -} - -fn encode_attribute_value( - builder: &mut AttributeAnyValueBuilder<'_, S>, value: &AttributeValue, -) -> std::io::Result<()> { - match value { - AttributeValue::String(v) => { - builder.type_(AttributeAnyValueType::STRING_VALUE)?.string_value(v)?; - } - AttributeValue::Bool(v) => { - builder.type_(AttributeAnyValueType::BOOL_VALUE)?.bool_value(*v)?; - } - AttributeValue::Int(v) => { - builder.type_(AttributeAnyValueType::INT_VALUE)?.int_value(*v)?; - } - AttributeValue::Double(v) => { - builder.type_(AttributeAnyValueType::DOUBLE_VALUE)?.double_value(*v)?; - } - AttributeValue::Array(values) => { - builder.type_(AttributeAnyValueType::ARRAY_VALUE)?.array_value(|arr| { - for val in values { - arr.add_values(|av| encode_attribute_array_value(av, val))?; - } - Ok(()) - })?; - } - } - Ok(()) -} - -fn encode_attribute_array_value( - builder: &mut AttributeArrayValueBuilder<'_, S>, value: &AttributeScalarValue, -) -> std::io::Result<()> { - match value { - AttributeScalarValue::String(v) => { - builder.type_(AttributeArrayValueType::STRING_VALUE)?.string_value(v)?; - } - AttributeScalarValue::Bool(v) => { - builder.type_(AttributeArrayValueType::BOOL_VALUE)?.bool_value(*v)?; - } - AttributeScalarValue::Int(v) => { - builder.type_(AttributeArrayValueType::INT_VALUE)?.int_value(*v)?; - } - AttributeScalarValue::Double(v) => { - builder.type_(AttributeArrayValueType::DOUBLE_VALUE)?.double_value(*v)?; - } - } - Ok(()) -} - -fn get_resource_tag_value<'a>(resource_tags: &'a TagSet, key: &str) -> Option<&'a str> { - resource_tags.get_single_tag(key).and_then(|t| t.value()) -} - -fn resolve_hostname<'a>( - resource_tags: &'a TagSet, source: Option<&'a OtlpSource>, default_hostname: Option<&'a str>, - ignore_missing_fields: bool, -) -> Option<&'a str> { - let mut hostname = match source { - Some(src) => match src.kind { - OtlpSourceKind::HostnameKind => Some(src.identifier.as_str()), - _ => Some(""), - }, - None => default_hostname, - }; - - if ignore_missing_fields { - hostname = Some(""); - } - - if let Some(value) = get_resource_tag_value(resource_tags, KEY_DATADOG_HOST) { - hostname = Some(value); - } - - hostname -} - -fn resolve_env(resource_tags: &TagSet, ignore_missing_fields: bool) -> Option<&str> { - if let Some(value) = get_resource_tag_value(resource_tags, KEY_DATADOG_ENVIRONMENT) { - return Some(value); - } - if ignore_missing_fields { - return None; - } - if let Some(value) = get_resource_tag_value(resource_tags, DEPLOYMENT_ENVIRONMENT_NAME) { - return Some(value); - } - get_resource_tag_value(resource_tags, DEPLOYMENT_ENVIRONMENT_KEY) -} - -fn resolve_container_id<'a>(resource_tags: &'a TagSet, first_span: Option<&'a DdSpan>) -> Option<&'a str> { - for key in [KEY_DATADOG_CONTAINER_ID, CONTAINER_ID, K8S_POD_UID] { - if let Some(value) = get_resource_tag_value(resource_tags, key) { - return Some(value); - } - } - // TODO: add container id fallback equivalent to cidProvider - // https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/api/otlp.go#L414 - if let Some(span) = first_span { - for (k, v) in span.meta() { - if k == KEY_DATADOG_CONTAINER_ID || k == K8S_POD_UID { - return Some(v.as_ref()); - } - } - } - None -} - -fn resolve_app_version(resource_tags: &TagSet) -> Option<&str> { - if let Some(value) = get_resource_tag_value(resource_tags, KEY_DATADOG_VERSION) { - return Some(value); - } - get_resource_tag_value(resource_tags, SERVICE_VERSION) -} - -fn resolve_container_tags( - resource_tags: &TagSet, source: Option<&OtlpSource>, ignore_missing_fields: bool, -) -> Option { - // TODO: some refactoring is probably needed to normalize this function, the tags should already be normalized - // since we do so when we transform OTLP spans to DD spans however to make this class extensible for non otlp traces, we would - // need to normalize the tags here. - if let Some(tags) = get_resource_tag_value(resource_tags, KEY_DATADOG_CONTAINER_TAGS) { - if !tags.is_empty() { - return Some(MetaString::from(tags)); - } - } - - if ignore_missing_fields { - return None; - } - let mut container_tags = TagSet::default(); - extract_container_tags_from_resource_tagset(resource_tags, &mut container_tags); - let is_fargate_source = source.is_some_and(|src| src.kind == OtlpSourceKind::AwsEcsFargateKind); - if container_tags.is_empty() && !is_fargate_source { - return None; - } - - let mut flattened = flatten_container_tag(container_tags); - if is_fargate_source { - if let Some(src) = source { - append_tags(&mut flattened, &src.tag()); - } - } - - if flattened.is_empty() { - None - } else { - Some(MetaString::from(flattened)) - } -} - -fn flatten_container_tag(tags: TagSet) -> String { - let mut flattened = String::new(); - for tag in tags { - if !flattened.is_empty() { - flattened.push(','); - } - flattened.push_str(tag.as_str()); - } - flattened -} - -fn append_tags(target: &mut String, tags: &str) { - if tags.is_empty() { - return; - } - if !target.is_empty() { - target.push(','); - } - target.push_str(tags); -} - -#[cfg(test)] -mod tests { - use datadog_protos::traces::AgentPayload; - use protobuf::Message as _; - use saluki_config::ConfigurationLoader; - use saluki_context::tags::TagSet; - use saluki_core::data_model::event::trace::{Span as DdSpan, Trace, TraceSampling}; - use stringtheory::MetaString; - - use super::*; - use crate::common::datadog::apm::ApmConfig; - use crate::common::otlp::config::TracesConfig; - use crate::config::{DatadogRemapper, KEY_ALIASES}; - - async fn make_encoder(ets_enabled: bool) -> TraceEndpointEncoder { - let env_vars: Vec<(String, String)> = if ets_enabled { - vec![("APM_ERROR_TRACKING_STANDALONE_ENABLED".to_string(), "true".to_string())] - } else { - vec![] - }; - let (cfg, _) = ConfigurationLoader::for_tests_with_provider_factory( - None, - Some(&env_vars), - false, - KEY_ALIASES, - DatadogRemapper::new, - ) - .await; - let apm_config = ApmConfig::from_configuration(&cfg).expect("ApmConfig should deserialize"); - TraceEndpointEncoder::new( - MetaString::from("test-host"), - "0.0.0".to_string(), - "none".to_string(), - apm_config, - TracesConfig::default(), - ) - } - - fn make_trace() -> Trace { - let span = DdSpan::new( - MetaString::from("svc"), - MetaString::from("op"), - MetaString::from("res"), - MetaString::from("web"), - 1, - 1, - 0, - 0, - 1000, - 0, - ); - let mut trace = Trace::new(vec![span], TagSet::default()); - trace.set_sampling(Some(TraceSampling::new(false, Some(1), None, None))); - trace - } - - fn make_error_trace() -> Trace { - let span = DdSpan::new( - MetaString::from("svc"), - MetaString::from("op"), - MetaString::from("res"), - MetaString::from("web"), - 1, // trace_id - 1, // span_id - 0, // parent_id - 0, // start - 1000, // duration - 1, // error - ); - let mut trace = Trace::new(vec![span], TagSet::default()); - trace.set_sampling(Some(TraceSampling::new(false, Some(1), None, None))); - trace - } - - #[tokio::test] - async fn ets_header_present_when_enabled() { - let encoder = make_encoder(true).await; - let headers = encoder.additional_headers(); - assert_eq!(headers.len(), 1); - assert_eq!(headers[0].0.as_str(), "x-datadog-error-tracking-standalone"); - assert_eq!(headers[0].1, "true"); - } - - #[tokio::test] - async fn ets_header_absent_when_disabled() { - let encoder = make_encoder(false).await; - assert!(encoder.additional_headers().is_empty()); - } - - #[tokio::test] - async fn ets_chunk_tag_present_for_error_trace() { - let mut encoder = make_encoder(true).await; - let trace = make_error_trace(); - let mut buf = Vec::new(); - encoder.encode(&trace, &mut buf).expect("encode should succeed"); - let payload = AgentPayload::parse_from_bytes(&buf).expect("should parse AgentPayload"); - let tag_value = payload - .tracerPayloads - .iter() - .flat_map(|tp| tp.chunks.iter()) - .find_map(|chunk| { - chunk - .tags - .get("_dd.error_tracking_standalone.error") - .map(|v| v.as_str()) - }); - assert_eq!( - tag_value, - Some("true"), - "ETS chunk tag should be present for error traces when ETS is enabled" - ); - } - - #[tokio::test] - async fn ets_chunk_tag_absent_for_non_error_trace() { - let mut encoder = make_encoder(true).await; - let trace = make_trace(); // no error - let mut buf = Vec::new(); - encoder.encode(&trace, &mut buf).expect("encode should succeed"); - let payload = AgentPayload::parse_from_bytes(&buf).expect("should parse AgentPayload"); - let has_tag = payload - .tracerPayloads - .iter() - .flat_map(|tp| tp.chunks.iter()) - .any(|chunk| chunk.tags.contains_key("_dd.error_tracking_standalone.error")); - assert!(!has_tag, "ETS chunk tag should be absent for non-error traces"); - } - - #[tokio::test] - async fn ets_chunk_tag_absent_when_disabled() { - let mut encoder = make_encoder(false).await; - let trace = make_trace(); - let mut buf = Vec::new(); - encoder.encode(&trace, &mut buf).expect("encode should succeed"); - let payload = AgentPayload::parse_from_bytes(&buf).expect("should parse AgentPayload"); - let has_tag = payload - .tracerPayloads - .iter() - .flat_map(|tp| tp.chunks.iter()) - .any(|chunk| chunk.tags.contains_key("_dd.error_tracking_standalone.error")); - assert!(!has_tag, "ETS chunk tag should be absent when ETS is disabled"); - } -} diff --git a/lib/saluki-components/src/encoders/datadog/v1_traces/mod.rs b/lib/saluki-components/src/encoders/datadog/v1_traces/mod.rs new file mode 100644 index 00000000000..0e7f2dacf02 --- /dev/null +++ b/lib/saluki-components/src/encoders/datadog/v1_traces/mod.rs @@ -0,0 +1,1346 @@ +//! APM traces encoder (idx format). +//! +//! Encodes `Event::Trace` events from both the V1 APM pipeline and the OTLP pipeline to +//! `AgentPayload.idxTracerPayloads` (proto field 11) using the `idx.TracerPayload` +//! string-indexed format, forwarded to `/api/v0.2/traces`. +//! +//! **Wire format note**: The Go Trace Agent V1 writer uses `idxTracerPayloads` (field 11), NOT +//! the legacy `tracerPayloads` (field 5). The `idx.TracerPayload` message stores all strings in +//! a flat `Strings []` table at field 1; every other string field is a `uint32` index into that +//! table. A two-pass approach is used: a pre-pass builds the complete string table, then the +//! write pass emits the table followed by all indexed fields. + +use std::time::Duration; + +use async_trait::async_trait; +use datadog_protos::traces::builders::{idx, AgentPayloadBuilder}; +use facet::Facet; +use http::{uri::PathAndQuery, HeaderName, HeaderValue, Method, Uri}; +use memory_accounting::{MemoryBounds, MemoryBoundsBuilder}; +use piecemeal::ScratchWriter; +use saluki_common::collections::FastHashMap; +use saluki_common::task::HandleExt as _; +use saluki_config::GenericConfiguration; +use saluki_context::tags::TagSet; +use saluki_core::{ + components::{encoders::*, ComponentContext}, + data_model::{ + event::{ + trace::{AttributeValue, Span, Trace}, + EventType, + }, + payload::{HttpPayload, Payload, PayloadMetadata, PayloadType}, + }, + observability::ComponentMetricsExt as _, + topology::{EventsBuffer, PayloadsBuffer}, +}; +use saluki_env::{host::providers::BoxedHostProvider, EnvironmentProvider, HostProvider}; +use saluki_error::{generic_error, ErrorContext as _, GenericError}; +use saluki_io::compression::CompressionScheme; +use saluki_metrics::MetricsBuilder; +use serde::Deserialize; +use stringtheory::MetaString; +use tokio::{ + select, + sync::mpsc::{self, Receiver, Sender}, + time::sleep, +}; +use tracing::{debug, error}; + +use crate::common::datadog::{ + apm::ApmConfig, + io::RB_BUFFER_CHUNK_SIZE, + request_builder::{EndpointEncoder, RequestBuilder}, + telemetry::ComponentTelemetry, + DEFAULT_INTAKE_COMPRESSED_SIZE_LIMIT, DEFAULT_INTAKE_UNCOMPRESSED_SIZE_LIMIT, OTEL_TRACE_ID_META_KEY, + TAG_DECISION_MAKER, +}; +use crate::common::otlp::config::TracesConfig; +use crate::common::otlp::util::{ + extract_container_tags_from_attributes_map, source_from_attributes_map, SourceKind as OtlpSourceKind, + KEY_DATADOG_CONTAINER_TAGS, +}; + +const MAX_TRACES_PER_PAYLOAD: usize = 10000; +/// Sentinel priority value matching Go's `PriorityNone = math.MinInt8`. +const PRIORITY_NONE: i32 = i8::MIN as i32; +/// Default priority for OTLP traces without an explicit sampling decision (AUTO_KEEP). +const DEFAULT_CHUNK_PRIORITY: i32 = 1; +static CONTENT_TYPE_PROTOBUF: HeaderValue = HeaderValue::from_static("application/x-protobuf"); + +const CONTAINER_TAGS_META_KEY: &str = "_dd.tags.container"; +const TAG_OTLP_SAMPLING_RATE: &str = "_dd.otlp_sr"; +const TAG_ETS_ERROR: &str = "_dd.error_tracking_standalone.error"; + +fn default_serializer_compressor_kind() -> String { + "zstd".to_string() +} + +const fn default_zstd_compressor_level() -> i32 { + 3 +} + +const fn default_flush_timeout_secs() -> u64 { + 2 +} + +fn default_env() -> String { + "none".to_string() +} + +/// Configuration for the V1 APM traces encoder. +/// +/// Handles both native V1 APM traces and OTLP traces, encoding them to the `idxTracerPayloads` +/// field (field 11) of `AgentPayload` using the string-indexed idx format. +#[derive(Deserialize, Facet)] +pub struct V1DatadogTraceConfiguration { + #[serde( + rename = "serializer_compressor_kind", + default = "default_serializer_compressor_kind" + )] + compressor_kind: String, + + #[serde(rename = "serializer_zstd_compressor_level", default = "default_zstd_compressor_level")] + zstd_compressor_level: i32, + + #[serde(default = "default_flush_timeout_secs")] + flush_timeout_secs: u64, + + #[serde(skip)] + default_hostname: Option, + + #[serde(skip)] + version: String, + + #[serde(skip)] + #[facet(opaque)] + apm_config: ApmConfig, + + #[serde(skip)] + #[facet(opaque)] + otlp_traces: TracesConfig, + + #[serde(default = "default_env")] + env: String, +} + +impl V1DatadogTraceConfiguration { + /// Creates a new `V1DatadogTraceConfiguration` from the given configuration. + pub fn from_configuration(config: &GenericConfiguration) -> Result { + let mut cfg: Self = config.as_typed()?; + let app_details = saluki_metadata::get_app_details(); + cfg.version = format!("agent-data-plane/{}", app_details.version().raw()); + cfg.apm_config = ApmConfig::from_configuration(config)?; + cfg.otlp_traces = config.try_get_typed("otlp_config.traces")?.unwrap_or_default(); + Ok(cfg) + } + + /// Sets the default hostname using the environment provider. + pub async fn with_environment_provider(mut self, env_provider: E) -> Result + where + E: EnvironmentProvider, + { + let hostname = env_provider.host().get_hostname().await?; + self.default_hostname = Some(hostname); + Ok(self) + } +} + +#[async_trait] +impl EncoderBuilder for V1DatadogTraceConfiguration { + fn input_event_type(&self) -> EventType { + EventType::Trace + } + + fn output_payload_type(&self) -> PayloadType { + PayloadType::Http + } + + async fn build(&self, context: ComponentContext) -> Result, GenericError> { + let metrics_builder = MetricsBuilder::from_component_context(&context); + let telemetry = ComponentTelemetry::from_builder(&metrics_builder); + let compression_scheme = CompressionScheme::new(&self.compressor_kind, self.zstd_compressor_level); + + let default_hostname = MetaString::from(self.default_hostname.clone().unwrap_or_default()); + + let mut trace_rb = RequestBuilder::new( + V1TraceEndpointEncoder::new( + default_hostname, + self.version.clone(), + self.env.clone(), + self.apm_config.clone(), + self.otlp_traces.clone(), + ), + compression_scheme, + RB_BUFFER_CHUNK_SIZE, + ) + .await?; + trace_rb.with_max_inputs_per_payload(MAX_TRACES_PER_PAYLOAD); + + let flush_timeout = match self.flush_timeout_secs { + 0 => Duration::from_millis(10), + secs => Duration::from_secs(secs), + }; + + Ok(Box::new(V1DatadogTrace { + trace_rb, + telemetry, + flush_timeout, + })) + } +} + +impl MemoryBounds for V1DatadogTraceConfiguration { + fn specify_bounds(&self, builder: &mut MemoryBoundsBuilder) { + builder + .minimum() + .with_single_value::("component struct") + .with_array::("request builder events channel", 8) + .with_array::("request builder payloads channel", 8); + + builder + .firm() + .with_array::("traces split re-encode buffer", MAX_TRACES_PER_PAYLOAD); + } +} + +struct V1DatadogTrace { + trace_rb: RequestBuilder, + telemetry: ComponentTelemetry, + flush_timeout: Duration, +} + +#[async_trait] +impl Encoder for V1DatadogTrace { + async fn run(mut self: Box, mut context: EncoderContext) -> Result<(), GenericError> { + let Self { + trace_rb, + telemetry, + flush_timeout, + } = *self; + + let mut health = context.take_health_handle(); + let (events_tx, events_rx) = mpsc::channel(8); + let (payloads_tx, mut payloads_rx) = mpsc::channel(8); + let request_builder_fut = run_request_builder(trace_rb, telemetry, events_rx, payloads_tx, flush_timeout); + let request_builder_handle = context + .topology_context() + .global_thread_pool() + .spawn_traced_named("v1-traces-request-builder", request_builder_fut); + + health.mark_ready(); + debug!("V1 Datadog Trace encoder started."); + + loop { + select! { + biased; + _ = health.live() => continue, + maybe_payload = payloads_rx.recv() => match maybe_payload { + Some(payload) => { + if let Err(e) = context.dispatcher().dispatch(payload).await { + error!("Failed to dispatch V1 trace payload: {}", e); + } + } + None => break, + }, + maybe_event_buffer = context.events().next() => match maybe_event_buffer { + Some(event_buffer) => events_tx.send(event_buffer).await + .error_context("Failed to send event buffer to V1 request builder.")?, + None => break, + }, + } + } + + drop(events_tx); + while let Some(payload) = payloads_rx.recv().await { + if let Err(e) = context.dispatcher().dispatch(payload).await { + error!("Failed to dispatch V1 trace payload: {}", e); + } + } + match request_builder_handle.await { + Ok(Ok(())) => debug!("V1 request builder task stopped."), + Ok(Err(e)) => error!(error = %e, "V1 request builder task failed."), + Err(e) => error!(error = %e, "V1 request builder task panicked."), + } + debug!("V1 Datadog Trace encoder stopped."); + Ok(()) + } +} + +async fn run_request_builder( + mut rb: RequestBuilder, telemetry: ComponentTelemetry, + mut events_rx: Receiver, payloads_tx: Sender, flush_timeout: Duration, +) -> Result<(), GenericError> { + let mut pending_flush = false; + let pending_flush_timeout = sleep(flush_timeout); + tokio::pin!(pending_flush_timeout); + + loop { + select! { + Some(event_buffer) = events_rx.recv() => { + for event in event_buffer { + let trace = match event.try_into_trace() { + Some(t) => t, + None => continue, + }; + let trace_to_retry = match rb.encode(trace).await { + Ok(None) => continue, + Ok(Some(t)) => t, + Err(e) => { + error!(error = %e, "Failed to encode V1 trace."); + telemetry.events_dropped_encoder().increment(1); + continue; + } + }; + let maybe_requests = rb.flush().await; + if maybe_requests.is_empty() { + panic!("V1 trace builder told us to flush, but gave us nothing"); + } + for maybe_request in maybe_requests { + match maybe_request { + Ok((events, request)) => { + let payload_meta = PayloadMetadata::from_event_count(events); + let http_payload = HttpPayload::new(payload_meta, request); + payloads_tx.send(Payload::Http(http_payload)).await + .map_err(|_| generic_error!("Failed to send V1 payload."))?; + } + Err(e) => { + if !e.is_recoverable() { + return Err(GenericError::from(e).context("Failed to flush V1 request.")); + } + } + } + } + if let Err(e) = rb.encode(trace_to_retry).await { + error!(error = %e, "Failed to re-encode V1 trace."); + telemetry.events_dropped_encoder().increment(1); + } + } + if !pending_flush { + pending_flush_timeout.as_mut().reset(tokio::time::Instant::now() + flush_timeout); + pending_flush = true; + } + }, + _ = &mut pending_flush_timeout, if pending_flush => { + pending_flush = false; + let maybe_requests = rb.flush().await; + for maybe_request in maybe_requests { + match maybe_request { + Ok((events, request)) => { + let payload_meta = PayloadMetadata::from_event_count(events); + let http_payload = HttpPayload::new(payload_meta, request); + payloads_tx.send(Payload::Http(http_payload)).await + .map_err(|_| generic_error!("Failed to send V1 payload."))?; + } + Err(e) => { + if !e.is_recoverable() { + return Err(GenericError::from(e).context("Failed to flush V1 request.")); + } + } + } + } + }, + else => break, + } + } + Ok(()) +} + +// ── String table ────────────────────────────────────────────────────────────── + +/// Minimal string interning table for `idx.TracerPayload` encoding. +/// +/// Index 0 is always the empty string (reserved by the proto format). Non-empty +/// strings are assigned indices 1..N in first-encounter order during a pre-pass +/// over the entire `Trace`, ensuring the `Strings` proto field can be written +/// before any `*_ref` field references an index. +struct IdxStringTable { + map: FastHashMap, + /// Ordered list of all strings; `strings[0]` is always the empty string. + strings: Vec, +} + +impl IdxStringTable { + fn new() -> Self { + let mut strings = Vec::with_capacity(64); + strings.push(MetaString::empty()); // index 0 = empty string + Self { + map: FastHashMap::default(), + strings, + } + } + + /// Intern a string and return its index. Empty strings always return 0. + fn intern(&mut self, s: &MetaString) -> u32 { + if s.is_empty() { + return 0; + } + if let Some(&idx) = self.map.get(s) { + return idx; + } + let idx = self.strings.len() as u32; + self.map.insert(s.clone(), idx); + self.strings.push(s.clone()); + idx + } + + /// Intern a `&str` slice. + fn intern_str(&mut self, s: &str) -> u32 { + if s.is_empty() { + return 0; + } + // Use a temporary MetaString to look up; only clone if we need to insert. + if let Some(&idx) = self.map.get(s) { + return idx; + } + let ms = MetaString::from(s); + let idx = self.strings.len() as u32; + self.map.insert(ms.clone(), idx); + self.strings.push(ms); + idx + } + + /// Look up the index of an already-interned string. Returns 0 for unknown strings. + fn get(&self, s: &MetaString) -> u32 { + if s.is_empty() { + return 0; + } + *self.map.get(s).unwrap_or(&0) + } + + fn get_str(&self, s: &str) -> u32 { + if s.is_empty() { + return 0; + } + *self.map.get(s).unwrap_or(&0) + } +} + +/// Build the complete string table from a `Trace` in a single pre-pass. +fn collect_strings(trace: &Trace) -> IdxStringTable { + let mut st = IdxStringTable::new(); + + // Payload-level metadata strings. + st.intern(&trace.container_id); + st.intern(&trace.language_name); + st.intern(&trace.language_version); + st.intern(&trace.tracer_version); + st.intern(&trace.runtime_id); + st.intern(&trace.env); + st.intern(&trace.hostname); + st.intern(&trace.app_version); + + // Trace-level attributes (merged payload + chunk attributes). + intern_attribute_map(&mut st, &trace.attributes); + + // Chunk-level strings. + st.intern(&trace.origin); + + // Per-span strings. + for span in trace.spans() { + st.intern(&MetaString::from(span.service())); + st.intern(&MetaString::from(span.name())); + st.intern(&MetaString::from(span.resource())); + st.intern(&MetaString::from(span.span_type())); + st.intern(&span.env); + st.intern(&span.version); + st.intern(&span.component); + + // Span attributes. + for (k, v) in &span.attributes { + st.intern(k); + if let AttributeValue::String(s) = v { + st.intern(s); + } + } + + for link in span.span_links() { + st.intern(&MetaString::from(link.tracestate())); + for (k, v) in link.attributes() { + st.intern(k); + if let AttributeValue::String(s) = v { + st.intern(s); + } + } + } + for event in span.span_events() { + st.intern(&MetaString::from(event.name())); + intern_attribute_map(&mut st, event.attributes()); + } + } + + st +} + +fn intern_attribute_map(st: &mut IdxStringTable, attrs: &FastHashMap) { + for (k, v) in attrs { + st.intern(k); + intern_attribute_value_strings(st, v); + } +} + +fn intern_attribute_value_strings(st: &mut IdxStringTable, v: &AttributeValue) { + match v { + AttributeValue::String(s) => { + st.intern(s); + } + AttributeValue::Array(arr) => { + for elem in arr { + intern_attribute_value_strings(st, elem); + } + } + AttributeValue::KeyValueList(kvs) => { + for (k, val) in kvs { + st.intern(k); + intern_attribute_value_strings(st, val); + } + } + AttributeValue::Bool(_) | AttributeValue::Int(_) | AttributeValue::Float(_) | AttributeValue::Bytes(_) => {} + } +} + +// ── Encoding helpers ────────────────────────────────────────────────────────── + +/// Pack a 128-bit trace ID into a 16-byte big-endian representation. +fn trace_id_bytes(high: u64, low: u64) -> [u8; 16] { + let mut b = [0u8; 16]; + b[..8].copy_from_slice(&high.to_be_bytes()); + b[8..].copy_from_slice(&low.to_be_bytes()); + b +} + +/// Map a span kind integer to the `idx.SpanKind` enum. +/// +/// Both the V1 wire format and the internal `Span.kind` field use OTEL values: +/// 0=unspecified, 1=internal, 2=server, 3=client, 4=producer, 5=consumer. +fn v1_kind_to_span_kind(kind: u32) -> idx::SpanKind { + match kind { + 1 => idx::SpanKind::SPAN_KIND_INTERNAL, + 2 => idx::SpanKind::SPAN_KIND_SERVER, + 3 => idx::SpanKind::SPAN_KIND_CLIENT, + 4 => idx::SpanKind::SPAN_KIND_PRODUCER, + 5 => idx::SpanKind::SPAN_KIND_CONSUMER, + _ => idx::SpanKind::SPAN_KIND_UNSPECIFIED, + } +} + +/// Write an `AttributeValue` into an `idx.ValueOneOfBuilder`. +fn encode_attribute_value( + v: &mut idx::ValueOneOfBuilder<'_, S>, value: &AttributeValue, st: &IdxStringTable, +) -> std::io::Result<()> { + match value { + AttributeValue::String(s) => v.string_value_ref(st.get(s)), + AttributeValue::Bool(b) => v.bool_value(*b), + AttributeValue::Int(i) => v.int_value(*i), + AttributeValue::Float(f) => v.double_value(*f), + AttributeValue::Bytes(b) => v.bytes_value(b.as_slice()), + AttributeValue::Array(arr) => v.array_value(|a| { + for elem in arr { + a.add_values(|av| { + av.value(|v2| encode_attribute_value(v2, elem, st))?; + Ok(()) + })?; + } + Ok(()) + }), + AttributeValue::KeyValueList(kvs) => v.key_value_list(|kv_builder| { + for (k, val) in kvs { + let key_ref = st.get(k); + kv_builder.add_key_values(|kv| { + kv.key(key_ref)?; + kv.value(|av| { + av.value(|vb| encode_attribute_value(vb, val, st))?; + Ok(()) + })?; + Ok(()) + })?; + } + Ok(()) + }), + } +} + +/// Write a `FastHashMap` into an `idx` attribute map. +fn write_idx_attribute_map( + map: &mut piecemeal::MessageMapBuilder<'_, S, piecemeal::types::protobuf::Varint, idx::AnyValue>, + attrs: &FastHashMap, + st: &IdxStringTable, +) -> std::io::Result<()> { + for (k, v) in attrs { + let key_ref = st.get(k); + if key_ref == 0 { + continue; + } + map.write_entry(key_ref, |av| { + av.value(|vb| encode_attribute_value(vb, v, st))?; + Ok(()) + })?; + } + Ok(()) +} + +/// Write span attributes into an `idx` attribute map. +fn write_idx_span_attrs( + map: &mut piecemeal::MessageMapBuilder<'_, S, piecemeal::types::protobuf::Varint, idx::AnyValue>, + span: &Span, + st: &IdxStringTable, +) -> std::io::Result<()> { + write_idx_attribute_map(map, &span.attributes, st) +} + + +// ── Container tag helpers (OTLP) ────────────────────────────────────────────── + +fn resolve_container_tags_from_attributes( + attributes: &FastHashMap, ignore_missing_fields: bool, +) -> Option { + if let Some(AttributeValue::String(tags)) = attributes.get(KEY_DATADOG_CONTAINER_TAGS) { + if !tags.is_empty() { + return Some(tags.clone()); + } + } + + if ignore_missing_fields { + return None; + } + + let mut container_tags = TagSet::default(); + extract_container_tags_from_attributes_map(attributes, &mut container_tags); + + let source = source_from_attributes_map(attributes); + let is_fargate_source = source.as_ref().is_some_and(|src| src.kind == OtlpSourceKind::AwsEcsFargateKind); + + if container_tags.is_empty() && !is_fargate_source { + return None; + } + + let mut flattened = flatten_container_tag(container_tags); + if is_fargate_source { + if let Some(src) = source { + append_tags(&mut flattened, &src.tag()); + } + } + + if flattened.is_empty() { + None + } else { + Some(MetaString::from(flattened)) + } +} + +fn flatten_container_tag(tags: TagSet) -> String { + let mut flattened = String::new(); + for tag in tags { + if !flattened.is_empty() { + flattened.push(','); + } + flattened.push_str(tag.as_str()); + } + flattened +} + +fn append_tags(target: &mut String, tags: &str) { + if tags.is_empty() { + return; + } + if !target.is_empty() { + target.push(','); + } + target.push_str(tags); +} + +// ── Endpoint encoder ────────────────────────────────────────────────────────── + +#[derive(Debug)] +struct V1TraceEndpointEncoder { + scratch: ScratchWriter>, + default_hostname: MetaString, + agent_hostname: String, + version: String, + env: String, + apm_config: ApmConfig, + otlp_traces: TracesConfig, + error_tracking_standalone: bool, + extra_headers: Vec<(HeaderName, HeaderValue)>, +} + +impl V1TraceEndpointEncoder { + fn new( + default_hostname: MetaString, version: String, env: String, apm_config: ApmConfig, + otlp_traces: TracesConfig, + ) -> Self { + let error_tracking_standalone = apm_config.error_tracking_standalone_enabled(); + let extra_headers = if error_tracking_standalone { + vec![( + HeaderName::from_static("x-datadog-error-tracking-standalone"), + HeaderValue::from_static("true"), + )] + } else { + Vec::new() + }; + Self { + scratch: ScratchWriter::new(Vec::with_capacity(8192)), + agent_hostname: default_hostname.as_ref().to_string(), + default_hostname, + version, + env, + apm_config, + otlp_traces, + error_tracking_standalone, + extra_headers, + } + } + + fn sampling_rate(&self) -> f64 { + let rate = self.otlp_traces.probabilistic_sampler.sampling_percentage / 100.0; + if rate <= 0.0 || rate >= 1.0 { + return 1.0; + } + rate + } + + fn encode_idx_payload(&mut self, trace: &Trace, output: &mut Vec) -> std::io::Result<()> { + let root_service = trace + .spans() + .iter() + .find(|s| s.parent_id() == 0) + .or_else(|| trace.spans().first()) + .map(|s| s.service()) + .unwrap_or(""); + debug!( + spans = trace.spans().len(), + env = trace.env.as_ref(), + service = root_service, + "Encoding V1 trace." + ); + + // ── Detect OTLP source ──────────────────────────────────────────────── + let root_span_idx = trace.spans().iter().position(|s| s.parent_id() == 0).unwrap_or(0); + let is_otlp = trace + .spans() + .get(root_span_idx) + .map(|s| { + s.attributes + .get(OTEL_TRACE_ID_META_KEY) + .and_then(AttributeValue::as_string) + .is_some() + }) + .unwrap_or(false); + + // ── Pre-compute OTLP enrichment values ──────────────────────────────── + let modified_tracer_version: Option = if is_otlp { + Some(MetaString::from(format!("otlp-{}", trace.tracer_version.as_ref()))) + } else { + None + }; + + let container_tags: Option = if is_otlp { + resolve_container_tags_from_attributes(&trace.attributes, self.otlp_traces.ignore_missing_datadog_fields) + } else { + None + }; + + let otlp_sr: Option = if is_otlp { + Some(trace.otlp_sampling_rate.unwrap_or_else(|| self.sampling_rate())) + } else { + None + }; + + let decision_maker = trace.decision_maker.as_ref(); + + let trace_has_error = self.error_tracking_standalone + && trace.spans().iter().any(|span| { + span.error() != 0 + || span + .attributes + .get("_dd.span_events.has_exception") + .and_then(AttributeValue::as_string) + .is_some_and(|v| v == "true") + }); + + // ── Phase 1: build the string table ────────────────────────────────── + let mut st = collect_strings(trace); + + // Intern additional strings for OTLP enrichment. + if let Some(ref tv) = modified_tracer_version { + st.intern(tv); + } + if let Some(ref ct) = container_tags { + st.intern_str(CONTAINER_TAGS_META_KEY); + st.intern(ct); + } + if is_otlp { + st.intern_str(TAG_OTLP_SAMPLING_RATE); + } + if let Some(dm) = decision_maker { + st.intern_str(TAG_DECISION_MAKER); + st.intern(dm); + } + if trace_has_error { + st.intern_str(TAG_ETS_ERROR); + st.intern_str("true"); + } + // Hostname fallback: intern default_hostname if trace has none. + if trace.hostname.is_empty() && !self.default_hostname.is_empty() { + st.intern(&self.default_hostname); + } + + // ── Compute string refs ─────────────────────────────────────────────── + let container_id_ref = st.get(&trace.container_id); + let language_name_ref = st.get(&trace.language_name); + let language_version_ref = st.get(&trace.language_version); + let tracer_version_ref = if let Some(ref tv) = modified_tracer_version { + st.get(tv) + } else { + st.get(&trace.tracer_version) + }; + let runtime_id_ref = st.get(&trace.runtime_id); + let env_ref = st.get(&trace.env); + let hostname_ref = if !trace.hostname.is_empty() { + st.get(&trace.hostname) + } else { + st.get(&self.default_hostname) + }; + let app_version_ref = st.get(&trace.app_version); + let origin_ref = st.get(&trace.origin); + let priority = trace + .priority + .unwrap_or(if is_otlp { DEFAULT_CHUNK_PRIORITY } else { PRIORITY_NONE }); + + // ── Phase 2: write the payload ──────────────────────────────────────── + let mut ap = AgentPayloadBuilder::new(&mut self.scratch); + + ap.host_name(&self.agent_hostname)? + .env(&self.env)? + .agent_version(&self.version)? + .target_tps(self.apm_config.target_traces_per_second())? + .error_tps(self.apm_config.errors_per_second())?; + + ap.add_idx_tracer_payloads(|tp| { + // Field 1 — string table (must precede all *_ref fields). + tp.strings(|rb| { + for s in &st.strings { + rb.add(s.as_bytes())?; + } + Ok(()) + })?; + + if container_id_ref != 0 { + tp.container_id_ref(container_id_ref)?; + } + if language_name_ref != 0 { + tp.language_name_ref(language_name_ref)?; + } + if language_version_ref != 0 { + tp.language_version_ref(language_version_ref)?; + } + if tracer_version_ref != 0 { + tp.tracer_version_ref(tracer_version_ref)?; + } + if runtime_id_ref != 0 { + tp.runtime_id_ref(runtime_id_ref)?; + } + if env_ref != 0 { + tp.env_ref(env_ref)?; + } + if hostname_ref != 0 { + tp.hostname_ref(hostname_ref)?; + } + if app_version_ref != 0 { + tp.app_version_ref(app_version_ref)?; + } + + // Payload-level attributes: trace.attributes plus OTLP container tags. + { + let mut attrs = tp.attributes(); + if let Some(ref ct) = container_tags { + let key_ref = st.get_str(CONTAINER_TAGS_META_KEY); + let val_ref = st.get(ct); + if key_ref != 0 && val_ref != 0 { + attrs.write_entry(key_ref, |av| { + av.value(|vb| vb.string_value_ref(val_ref))?; + Ok(()) + })?; + } + } + write_idx_attribute_map(&mut attrs, &trace.attributes, &st)?; + } + + // The single chunk. + tp.add_chunks(|ch| { + ch.priority(priority)?; + + if origin_ref != 0 { + ch.origin_ref(origin_ref)?; + } + + // Chunk-level attributes: decision maker, OTLP sampling rate, ETS tag. + { + let mut attrs = ch.attributes(); + if let Some(dm) = decision_maker { + let key_ref = st.get_str(TAG_DECISION_MAKER); + let val_ref = st.get(dm); + if key_ref != 0 { + attrs.write_entry(key_ref, |av| { + av.value(|vb| vb.string_value_ref(val_ref))?; + Ok(()) + })?; + } + } + if let Some(rate) = otlp_sr { + let key_ref = st.get_str(TAG_OTLP_SAMPLING_RATE); + if key_ref != 0 { + attrs.write_entry(key_ref, |av| { + av.value(|vb| vb.double_value(rate))?; + Ok(()) + })?; + } + } + if trace_has_error { + let key_ref = st.get_str(TAG_ETS_ERROR); + let val_ref = st.get_str("true"); + if key_ref != 0 { + attrs.write_entry(key_ref, |av| { + av.value(|vb| vb.string_value_ref(val_ref))?; + Ok(()) + })?; + } + } + } + + if trace.dropped_trace { + ch.dropped_trace(true)?; + } + + if trace.sampling_mechanism != 0 { + ch.sampling_mechanism(trace.sampling_mechanism)?; + } + + let tid = trace_id_bytes(trace.trace_id_high, trace.trace_id_low); + ch.trace_id(&tid)?; + + for span in trace.spans() { + let service_ref = st.get_str(span.service()); + let name_ref = st.get_str(span.name()); + let resource_ref = st.get_str(span.resource()); + let type_ref = st.get_str(span.span_type()); + let span_env_ref = st.get(&span.env); + let version_ref = st.get(&span.version); + let component_ref = st.get(&span.component); + let span_kind = v1_kind_to_span_kind(span.kind); + + ch.add_spans(|sb| { + if service_ref != 0 { + sb.service_ref(service_ref)?; + } + if name_ref != 0 { + sb.name_ref(name_ref)?; + } + if resource_ref != 0 { + sb.resource_ref(resource_ref)?; + } + + sb.span_id(span.span_id())? + .parent_id(span.parent_id())? + .start(span.start())? + .duration(span.duration())? + .error(span.error() != 0)?; + + if type_ref != 0 { + sb.type_ref(type_ref)?; + } + if span_env_ref != 0 { + sb.env_ref(span_env_ref)?; + } + if version_ref != 0 { + sb.version_ref(version_ref)?; + } + if component_ref != 0 { + sb.component_ref(component_ref)?; + } + if span_kind != idx::SpanKind::SPAN_KIND_UNSPECIFIED { + sb.kind(span_kind)?; + } + + write_idx_span_attrs(&mut sb.attributes(), span, &st)?; + + for link in span.span_links() { + let tracestate_ref = st.get_str(link.tracestate()); + let link_tid = trace_id_bytes(link.trace_id_high(), link.trace_id()); + sb.add_links(|sl| { + sl.trace_id(&link_tid)?; + sl.span_id(link.span_id())?; + write_idx_attribute_map(&mut sl.attributes(), link.attributes(), &st)?; + if tracestate_ref != 0 { + sl.tracestate_ref(tracestate_ref)?; + } + sl.flags(link.flags())?; + Ok(()) + })?; + } + + for event in span.span_events() { + let event_name_ref = st.get_str(event.name()); + sb.add_events(|se| { + se.time(event.time_unix_nano())?; + if event_name_ref != 0 { + se.name_ref(event_name_ref)?; + } + write_idx_attribute_map(&mut se.attributes(), event.attributes(), &st)?; + Ok(()) + })?; + } + + Ok(()) + })?; + } + + Ok(()) + })?; + + Ok(()) + })?; + + ap.finish(output)?; + Ok(()) + } +} + +impl EndpointEncoder for V1TraceEndpointEncoder { + type Input = Trace; + type EncodeError = std::io::Error; + + fn encoder_name() -> &'static str { + "v1_traces" + } + + fn compressed_size_limit(&self) -> usize { + DEFAULT_INTAKE_COMPRESSED_SIZE_LIMIT + } + + fn uncompressed_size_limit(&self) -> usize { + DEFAULT_INTAKE_UNCOMPRESSED_SIZE_LIMIT + } + + fn encode(&mut self, trace: &Self::Input, buffer: &mut Vec) -> Result<(), Self::EncodeError> { + self.encode_idx_payload(trace, buffer) + } + + fn endpoint_uri(&self) -> Uri { + PathAndQuery::from_static("/api/v0.2/traces").into() + } + + fn endpoint_method(&self) -> Method { + Method::POST + } + + fn content_type(&self) -> HeaderValue { + CONTENT_TYPE_PROTOBUF.clone() + } + + fn additional_headers(&self) -> &[(http::HeaderName, HeaderValue)] { + &self.extra_headers + } +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use datadog_protos::traces::AgentPayload; + use protobuf::Message as _; + use saluki_common::collections::FastHashMap; + use saluki_config::ConfigurationLoader; + use saluki_core::data_model::event::trace::{AttributeValue, Span, SpanEvent, SpanLink, Trace}; + use stringtheory::MetaString; + + use super::*; + use crate::common::datadog::apm::ApmConfig; + use crate::common::otlp::config::TracesConfig; + use crate::config::{DatadogRemapper, KEY_ALIASES}; + + async fn make_encoder(ets_enabled: bool) -> V1TraceEndpointEncoder { + let env_vars: Vec<(String, String)> = if ets_enabled { + vec![("APM_ERROR_TRACKING_STANDALONE_ENABLED".to_string(), "true".to_string())] + } else { + vec![] + }; + let (cfg, _) = ConfigurationLoader::for_tests_with_provider_factory( + None, + Some(&env_vars), + false, + KEY_ALIASES, + DatadogRemapper::new, + ) + .await; + let apm_config = ApmConfig::from_configuration(&cfg).expect("ApmConfig should deserialize"); + V1TraceEndpointEncoder::new( + MetaString::from("test-host"), + "0.0.0".to_string(), + "none".to_string(), + apm_config, + TracesConfig::default(), + ) + } + + fn make_span(service: &str, name: &str, resource: &str, span_id: u64, parent_id: u64) -> Span { + Span::new(service, name, resource, "web", span_id, parent_id, 1_000_000_000, 5_000_000, 0) + .with_kind(2) // server + } + + fn make_trace(spans: Vec) -> Trace { + let mut trace = Trace::new(spans); + trace.priority = Some(1); + trace.trace_id_high = 0x0102030405060708; + trace.trace_id_low = 0x090a0b0c0d0e0f10; + trace.sampling_mechanism = 4; + trace.container_id = MetaString::from("abc123"); + trace.language_name = MetaString::from("python"); + trace.language_version = MetaString::from("3.11"); + trace.tracer_version = MetaString::from("1.2.3"); + trace.runtime_id = MetaString::from("runtime-uuid"); + trace.env = MetaString::from("prod"); + trace.hostname = MetaString::from("web-01"); + trace.app_version = MetaString::from("2.0.0"); + trace.client_dropped_p0s_weight = 0.5; + trace + } + + fn make_error_trace() -> Trace { + let span = Span::new( + "svc", + "op", + "res", + "web", + 1, // span_id + 0, // parent_id (root) + 0, // start + 1000, // duration + 1, // error + ); + let mut trace = Trace::new(vec![span]); + trace.priority = Some(1); + trace + } + + fn make_plain_trace() -> Trace { + let span = Span::new("svc", "op", "res", "web", 1, 0, 0, 1000, 0); + let mut trace = Trace::new(vec![span]); + trace.priority = Some(1); + trace + } + + fn parse_outer(buf: &[u8]) -> AgentPayload { + AgentPayload::parse_from_bytes(buf).expect("should parse AgentPayload") + } + + #[tokio::test] + async fn encodes_to_idx_field_not_tracer_payloads_field() { + let mut enc = make_encoder(false).await; + let trace = make_trace(vec![make_span("svc", "op", "GET /", 1, 0)]); + let mut buf = Vec::new(); + enc.encode(&trace, &mut buf).expect("encode should succeed"); + + let payload = parse_outer(&buf); + + assert!( + payload.tracerPayloads.is_empty(), + "legacy tracerPayloads (field 5) must be empty for V1 traces" + ); + assert!( + !payload.idxTracerPayloads.is_empty(), + "idxTracerPayloads (field 11) must be populated" + ); + } + + #[tokio::test] + async fn outer_agent_payload_fields_are_correct() { + let mut enc = make_encoder(false).await; + let trace = make_trace(vec![make_span("svc", "op", "GET /", 1, 0)]); + let mut buf = Vec::new(); + enc.encode(&trace, &mut buf).unwrap(); + + let payload = parse_outer(&buf); + assert_eq!(payload.hostName, "test-host"); + assert_eq!(payload.env, "none"); + assert_eq!(payload.agentVersion, "0.0.0"); + } + + #[tokio::test] + async fn string_table_deduplicates_repeated_strings() { + let span1 = make_span("shared-service", "op1", "res1", 1, 0); + let span2 = make_span("shared-service", "op2", "res2", 2, 1); + let trace = make_trace(vec![span1, span2]); + + let st = collect_strings(&trace); + let idx1 = st.get(&MetaString::from("shared-service")); + let idx2 = st.get(&MetaString::from("shared-service")); + assert_eq!(idx1, idx2, "same string must get the same index"); + assert_ne!(idx1, 0, "non-empty string must not get index 0"); + + assert_eq!(st.get(&MetaString::empty()), 0); + } + + #[tokio::test] + async fn span_kind_mapping_covers_all_v1_values() { + let cases: &[(u32, idx::SpanKind)] = &[ + (0, idx::SpanKind::SPAN_KIND_UNSPECIFIED), + (1, idx::SpanKind::SPAN_KIND_INTERNAL), + (2, idx::SpanKind::SPAN_KIND_SERVER), + (3, idx::SpanKind::SPAN_KIND_CLIENT), + (4, idx::SpanKind::SPAN_KIND_PRODUCER), + (5, idx::SpanKind::SPAN_KIND_CONSUMER), + (99, idx::SpanKind::SPAN_KIND_UNSPECIFIED), + ]; + for &(v1_kind, expected) in cases { + assert_eq!( + v1_kind_to_span_kind(v1_kind), + expected, + "v1 kind {} should map to {:?}", + v1_kind, + expected + ); + } + } + + #[tokio::test] + async fn trace_id_bytes_packs_high_and_low() { + let high = 0x0102030405060708u64; + let low = 0x090a0b0c0d0e0f10u64; + let bytes = trace_id_bytes(high, low); + assert_eq!(&bytes[..8], &high.to_be_bytes()); + assert_eq!(&bytes[8..], &low.to_be_bytes()); + } + + #[tokio::test] + async fn encode_succeeds_with_span_attributes() { + let mut enc = make_encoder(false).await; + let mut meta = FastHashMap::default(); + meta.insert(MetaString::from("http.method"), MetaString::from("GET")); + meta.insert(MetaString::from("cache_hit"), MetaString::from("true")); + let mut metrics = FastHashMap::default(); + metrics.insert(MetaString::from("http.status_code"), 200.0f64); + metrics.insert(MetaString::from("latency_ms"), 3.14f64); + let span = make_span("svc", "op", "res", 1, 0) + .with_meta(Some(meta)) + .with_metrics(Some(metrics)); + let trace = make_trace(vec![span]); + let mut buf = Vec::new(); + enc.encode(&trace, &mut buf).expect("encode with attributes should succeed"); + assert!(!buf.is_empty()); + } + + #[tokio::test] + async fn encode_succeeds_with_span_links_and_events() { + let mut enc = make_encoder(false).await; + let mut link_attrs = FastHashMap::default(); + link_attrs.insert(MetaString::from("link.type"), AttributeValue::String(MetaString::from("follows_from"))); + let link = SpanLink::new(0xBBBBBBBBBBBBBBBB, 42) + .with_trace_id_high(0xAAAAAAAAAAAAAAAA) + .with_attributes(Some(link_attrs)) + .with_tracestate(MetaString::from("dd=t.dm:-4")) + .with_flags(1); + + let mut event_attrs = FastHashMap::default(); + event_attrs.insert( + MetaString::from("exception.message"), + AttributeValue::String(MetaString::from("oops")), + ); + let event = SpanEvent::new(999_000_000, "exception").with_attributes(Some(event_attrs)); + + let span = make_span("svc", "op", "res", 1, 0) + .with_span_links(Some(vec![link])) + .with_span_events(Some(vec![event])); + let trace = make_trace(vec![span]); + let mut buf = Vec::new(); + enc.encode(&trace, &mut buf).expect("encode with links and events should succeed"); + assert!(!buf.is_empty()); + } + + #[tokio::test] + async fn dropped_trace_flag_propagates() { + let mut enc = make_encoder(false).await; + let mut trace = make_trace(vec![make_span("svc", "op", "res", 1, 0)]); + trace.dropped_trace = true; + let mut buf = Vec::new(); + enc.encode(&trace, &mut buf).unwrap(); + let payload = parse_outer(&buf); + assert!(!payload.idxTracerPayloads.is_empty()); + } + + #[tokio::test] + async fn empty_optional_metadata_does_not_panic() { + let mut enc = make_encoder(false).await; + let trace = make_trace(vec![make_span("svc", "op", "res", 1, 0)]); + let mut buf = Vec::new(); + enc.encode(&trace, &mut buf).expect("empty metadata should not panic"); + assert!(!buf.is_empty()); + } + + // ── ETS tests ───────────────────────────────────────────────────────────── + + #[tokio::test] + async fn ets_header_present_when_enabled() { + let encoder = make_encoder(true).await; + let headers = encoder.additional_headers(); + assert_eq!(headers.len(), 1); + assert_eq!(headers[0].0.as_str(), "x-datadog-error-tracking-standalone"); + assert_eq!(headers[0].1, "true"); + } + + #[tokio::test] + async fn ets_header_absent_when_disabled() { + let encoder = make_encoder(false).await; + assert!(encoder.additional_headers().is_empty()); + } + + #[tokio::test] + async fn ets_encode_error_trace_does_not_panic() { + let mut encoder = make_encoder(true).await; + let trace = make_error_trace(); + let mut buf = Vec::new(); + encoder.encode(&trace, &mut buf).expect("encode should succeed"); + assert!(!buf.is_empty()); + let payload = parse_outer(&buf); + assert!(!payload.idxTracerPayloads.is_empty()); + } + + #[tokio::test] + async fn ets_encode_non_error_trace_does_not_panic() { + let mut encoder = make_encoder(true).await; + let trace = make_plain_trace(); + let mut buf = Vec::new(); + encoder.encode(&trace, &mut buf).expect("encode should succeed"); + assert!(!buf.is_empty()); + } + + #[tokio::test] + async fn otlp_trace_encodes_with_otlp_prefix_and_sampling_rate() { + let mut enc = make_encoder(false).await; + // OTLP traces have `otel.trace_id` in the root span's attributes. + let mut span = make_span("svc", "op", "res", 1, 0); + span.attributes.insert( + MetaString::from(OTEL_TRACE_ID_META_KEY), + AttributeValue::String(MetaString::from("abc123")), + ); + let mut trace = make_trace(vec![span]); + trace.tracer_version = MetaString::from("1.0.0"); + + let mut buf = Vec::new(); + enc.encode(&trace, &mut buf).expect("OTLP trace encode should succeed"); + assert!(!buf.is_empty()); + let payload = parse_outer(&buf); + assert!(!payload.idxTracerPayloads.is_empty(), "OTLP trace must produce idxTracerPayloads"); + } + + #[tokio::test] + async fn hostname_falls_back_to_default_when_trace_hostname_empty() { + let mut enc = make_encoder(false).await; + let mut trace = make_trace(vec![make_span("svc", "op", "res", 1, 0)]); + trace.hostname = MetaString::empty(); + let mut buf = Vec::new(); + enc.encode(&trace, &mut buf).expect("encode should succeed"); + // Verify encoding produces output (hostname fallback doesn't panic). + assert!(!buf.is_empty()); + } +} diff --git a/lib/saluki-components/src/encoders/mod.rs b/lib/saluki-components/src/encoders/mod.rs index ace425ede0e..289e33e3d16 100644 --- a/lib/saluki-components/src/encoders/mod.rs +++ b/lib/saluki-components/src/encoders/mod.rs @@ -6,5 +6,5 @@ pub use self::buffered_incremental::BufferedIncrementalConfiguration; mod datadog; pub use self::datadog::{ DatadogApmStatsEncoderConfiguration, DatadogEventsConfiguration, DatadogLogsConfiguration, - DatadogMetricsConfiguration, DatadogServiceChecksConfiguration, DatadogTraceConfiguration, + DatadogMetricsConfiguration, DatadogServiceChecksConfiguration, V1DatadogTraceConfiguration, }; diff --git a/lib/saluki-components/src/sources/apm/deserialize.rs b/lib/saluki-components/src/sources/apm/deserialize.rs new file mode 100644 index 00000000000..59dadf7af46 --- /dev/null +++ b/lib/saluki-components/src/sources/apm/deserialize.rs @@ -0,0 +1,1704 @@ +use std::io::Read; + +use rmp::Marker; + +/// Maximum allowed element count for any array or map in a single payload (mirrors Go agent's 25 MB cap). +const MAX_SIZE: u64 = 25_000_000; + +// ── Wire-format error type ────────────────────────────────────────────────── + +// The enum fields carry diagnostic detail for logging/debugging. They are matched but not always +// destructured in production code, so the compiler considers the inner values "unread". +#[allow(dead_code)] +#[derive(Debug)] +pub(super) enum DeserializeError { + UnexpectedEof, + UnexpectedMarker(Marker), + InvalidStringIndex(u32), + InvalidUtf8, + LimitExceeded(u64), + /// Attribute array length was not a multiple of 3. + InvalidAttributeCount(u32), + /// Array element count for an AnyValue::Array was not a multiple of 2. + InvalidArrayElementCount(u32), + /// Field 1 (bulk strings) was present; msgpack payloads must use streaming strings instead. + UnexpectedStringsField, + UnknownAnyValueType(u32), + /// TraceID binary payload was not exactly 16 bytes. + InvalidTraceIdLength(u32), +} + +impl std::fmt::Display for DeserializeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} + +impl std::error::Error for DeserializeError {} + +// ── Wire-format string table ──────────────────────────────────────────────── + +/// Interned string table shared across an entire v1 payload. +/// +/// Index 0 is always the empty string `""`. All subsequent indices are assigned in the order +/// strings are first encountered during deserialization. +#[derive(Debug)] +pub(super) struct StringTable { + strings: Vec, +} + +impl StringTable { + pub(super) fn new() -> Self { + Self { + strings: vec![String::new()], + } + } + + pub(super) fn push(&mut self, s: String) -> u32 { + let idx = self.strings.len() as u32; + self.strings.push(s); + idx + } + + #[cfg(test)] + pub(super) fn get(&self, idx: u32) -> Option<&str> { + self.strings.get(idx as usize).map(|s| s.as_str()) + } + + pub(super) fn len(&self) -> usize { + self.strings.len() + } + + pub(super) fn iter(&self) -> impl Iterator { + self.strings.iter().map(|s| s.as_str()) + } +} + +// ── Raw wire-format payload types (private deserialization intermediates) ─── + +#[derive(Debug)] +pub(super) struct RawTracerPayload { + pub(super) string_table: StringTable, + pub(super) container_id: u32, + pub(super) language_name: u32, + pub(super) language_version: u32, + pub(super) tracer_version: u32, + pub(super) runtime_id: u32, + pub(super) env: u32, + pub(super) hostname: u32, + pub(super) app_version: u32, + pub(super) attributes: Vec, + pub(super) chunks: Vec, +} + +#[derive(Debug)] +pub(super) struct RawTraceChunk { + pub(super) priority: i32, + pub(super) origin: u32, + pub(super) attributes: Vec, + pub(super) spans: Vec, + pub(super) dropped_trace: bool, + pub(super) trace_id_high: u64, + pub(super) trace_id_low: u64, + pub(super) sampling_mechanism: u32, +} + +#[derive(Debug)] +pub(super) struct RawSpan { + pub(super) service: u32, + pub(super) name: u32, + pub(super) resource: u32, + pub(super) span_id: u64, + pub(super) parent_id: u64, + pub(super) start: u64, + pub(super) duration: u64, + pub(super) error: bool, + pub(super) attributes: Vec, + pub(super) span_type: u32, + pub(super) links: Vec, + pub(super) events: Vec, + pub(super) env: u32, + pub(super) version: u32, + pub(super) component: u32, + pub(super) kind: u32, +} + +#[derive(Debug)] +pub(super) struct RawSpanLink { + pub(super) trace_id_high: u64, + pub(super) trace_id_low: u64, + pub(super) span_id: u64, + pub(super) attributes: Vec, + pub(super) tracestate: u32, + pub(super) flags: u32, +} + +#[derive(Debug)] +pub(super) struct RawSpanEvent { + pub(super) time_unix_nano: u64, + pub(super) name: u32, + pub(super) attributes: Vec, +} + +#[derive(Debug)] +pub(super) struct RawKeyValue { + pub(super) key: u32, + pub(super) value: RawAnyValue, +} + +#[derive(Debug)] +pub(super) enum RawAnyValue { + String(u32), + Bool(bool), + Double(f64), + Int(i64), + Bytes(Vec), + Array(Vec), + KeyValueList(Vec), +} + +// ── Error conversion helpers ──────────────────────────────────────────────── + +fn vr_err(e: rmp::decode::ValueReadError) -> DeserializeError { + match e { + rmp::decode::ValueReadError::InvalidMarkerRead(_) | rmp::decode::ValueReadError::InvalidDataRead(_) => { + DeserializeError::UnexpectedEof + } + rmp::decode::ValueReadError::TypeMismatch(m) => DeserializeError::UnexpectedMarker(m), + } +} + +fn nvr_err(e: rmp::decode::NumValueReadError) -> DeserializeError { + match e { + rmp::decode::NumValueReadError::InvalidMarkerRead(_) + | rmp::decode::NumValueReadError::InvalidDataRead(_) + | rmp::decode::NumValueReadError::OutOfRange => DeserializeError::UnexpectedEof, + rmp::decode::NumValueReadError::TypeMismatch(m) => DeserializeError::UnexpectedMarker(m), + } +} + +// ── Low-level byte helpers ────────────────────────────────────────────────── + +fn skip_bytes(rd: &mut R, mut n: usize) -> Result<(), DeserializeError> { + let mut buf = [0u8; 1024]; + while n > 0 { + let chunk = n.min(buf.len()); + rd.read_exact(&mut buf[..chunk]) + .map_err(|_| DeserializeError::UnexpectedEof)?; + n -= chunk; + } + Ok(()) +} + +fn read_u8_raw(rd: &mut R) -> Result { + let mut b = [0u8; 1]; + rd.read_exact(&mut b).map_err(|_| DeserializeError::UnexpectedEof)?; + Ok(b[0]) +} + +fn read_u16_be(rd: &mut R) -> Result { + let mut b = [0u8; 2]; + rd.read_exact(&mut b).map_err(|_| DeserializeError::UnexpectedEof)?; + Ok(u16::from_be_bytes(b)) +} + +fn read_u32_be(rd: &mut R) -> Result { + let mut b = [0u8; 4]; + rd.read_exact(&mut b).map_err(|_| DeserializeError::UnexpectedEof)?; + Ok(u32::from_be_bytes(b)) +} + +// ── String helpers ────────────────────────────────────────────────────────── + +/// Read the body of a msgpack string given that the leading marker has already been consumed. +fn read_str_body(rd: &mut R, marker: Marker) -> Result { + let len = match marker { + Marker::FixStr(n) => n as u32, + Marker::Str8 => read_u8_raw(rd)? as u32, + Marker::Str16 => read_u16_be(rd)? as u32, + Marker::Str32 => read_u32_be(rd)?, + _ => return Err(DeserializeError::UnexpectedMarker(marker)), + }; + let mut buf = vec![0u8; len as usize]; + rd.read_exact(&mut buf).map_err(|_| DeserializeError::UnexpectedEof)?; + String::from_utf8(buf).map_err(|_| DeserializeError::InvalidUtf8) +} + +/// Read a uint given that the leading marker has already been consumed. +fn read_uint_from_marker(rd: &mut R, marker: Marker) -> Result { + match marker { + Marker::FixPos(v) => Ok(v as u32), + Marker::U8 => Ok(read_u8_raw(rd)? as u32), + Marker::U16 => Ok(read_u16_be(rd)? as u32), + Marker::U32 => Ok(read_u32_be(rd)?), + Marker::U64 => { + let mut b = [0u8; 8]; + rd.read_exact(&mut b).map_err(|_| DeserializeError::UnexpectedEof)?; + let v = u64::from_be_bytes(b); + u32::try_from(v).map_err(|_| DeserializeError::UnexpectedMarker(marker)) + } + _ => Err(DeserializeError::UnexpectedMarker(marker)), + } +} + +/// Decode a streaming string field. +/// +/// If the next msgpack value is a string, it is a new entry added to the table. +/// If it is a uint, it is a back-reference to a previously-seen string. +fn decode_streaming_string(rd: &mut R, table: &mut StringTable) -> Result { + let marker = rmp::decode::read_marker(rd).map_err(|_| DeserializeError::UnexpectedEof)?; + match marker { + Marker::FixStr(_) | Marker::Str8 | Marker::Str16 | Marker::Str32 => { + let s = read_str_body(rd, marker)?; + Ok(table.push(s)) + } + Marker::FixPos(_) | Marker::U8 | Marker::U16 | Marker::U32 | Marker::U64 => { + let idx = read_uint_from_marker(rd, marker)?; + if idx as usize >= table.len() { + return Err(DeserializeError::InvalidStringIndex(idx)); + } + Ok(idx) + } + _ => Err(DeserializeError::UnexpectedMarker(marker)), + } +} + +// ── Skip helper ───────────────────────────────────────────────────────────── + +/// Discard one complete msgpack value from `rd`, regardless of type. +pub(super) fn skip_msgpack_value(rd: &mut R) -> Result<(), DeserializeError> { + let marker = rmp::decode::read_marker(rd).map_err(|_| DeserializeError::UnexpectedEof)?; + match marker { + Marker::Null | Marker::True | Marker::False | Marker::FixPos(_) | Marker::FixNeg(_) => Ok(()), + Marker::U8 | Marker::I8 => skip_bytes(rd, 1), + Marker::U16 | Marker::I16 => skip_bytes(rd, 2), + Marker::U32 | Marker::I32 | Marker::F32 => skip_bytes(rd, 4), + Marker::U64 | Marker::I64 | Marker::F64 => skip_bytes(rd, 8), + Marker::FixStr(n) => skip_bytes(rd, n as usize), + Marker::Str8 => { + let len = read_u8_raw(rd)? as usize; + skip_bytes(rd, len) + } + Marker::Str16 => { + let len = read_u16_be(rd)? as usize; + skip_bytes(rd, len) + } + Marker::Str32 => { + let len = read_u32_be(rd)? as usize; + skip_bytes(rd, len) + } + Marker::Bin8 => { + let len = read_u8_raw(rd)? as usize; + skip_bytes(rd, len) + } + Marker::Bin16 => { + let len = read_u16_be(rd)? as usize; + skip_bytes(rd, len) + } + Marker::Bin32 => { + let len = read_u32_be(rd)? as usize; + skip_bytes(rd, len) + } + Marker::FixArray(n) => { + for _ in 0..n { + skip_msgpack_value(rd)?; + } + Ok(()) + } + Marker::Array16 => { + let len = read_u16_be(rd)?; + for _ in 0..len { + skip_msgpack_value(rd)?; + } + Ok(()) + } + Marker::Array32 => { + let len = read_u32_be(rd)?; + for _ in 0..len { + skip_msgpack_value(rd)?; + } + Ok(()) + } + Marker::FixMap(n) => { + for _ in 0..n { + skip_msgpack_value(rd)?; + skip_msgpack_value(rd)?; + } + Ok(()) + } + Marker::Map16 => { + let len = read_u16_be(rd)?; + for _ in 0..len { + skip_msgpack_value(rd)?; + skip_msgpack_value(rd)?; + } + Ok(()) + } + Marker::Map32 => { + let len = read_u32_be(rd)?; + for _ in 0..len { + skip_msgpack_value(rd)?; + skip_msgpack_value(rd)?; + } + Ok(()) + } + Marker::FixExt1 => skip_bytes(rd, 2), + Marker::FixExt2 => skip_bytes(rd, 3), + Marker::FixExt4 => skip_bytes(rd, 5), + Marker::FixExt8 => skip_bytes(rd, 9), + Marker::FixExt16 => skip_bytes(rd, 17), + Marker::Ext8 => { + let len = read_u8_raw(rd)? as usize; + skip_bytes(rd, 1 + len) + } + Marker::Ext16 => { + let len = read_u16_be(rd)? as usize; + skip_bytes(rd, 1 + len) + } + Marker::Ext32 => { + let len = read_u32_be(rd)? as usize; + skip_bytes(rd, 1 + len) + } + Marker::Reserved => Err(DeserializeError::UnexpectedMarker(marker)), + } +} + +// ── Attribute / AnyValue decoding ─────────────────────────────────────────── + +fn decode_attributes(rd: &mut R, table: &mut StringTable) -> Result, DeserializeError> { + let num_elements = rmp::decode::read_array_len(rd).map_err(vr_err)?; + if num_elements as u64 > MAX_SIZE { + return Err(DeserializeError::LimitExceeded(num_elements as u64)); + } + if num_elements % 3 != 0 { + return Err(DeserializeError::InvalidAttributeCount(num_elements)); + } + let mut kvs = Vec::with_capacity(num_elements as usize / 3); + for _ in 0..num_elements / 3 { + let key = decode_streaming_string(rd, table)?; + let value = decode_any_value(rd, table)?; + kvs.push(RawKeyValue { key, value }); + } + Ok(kvs) +} + +enum AnyValueTypeTag { + String = 1, + Bool = 2, + Double = 3, + Int = 4, + Bytes = 5, + Array = 6, + KeyValueList = 7, +} + +impl AnyValueTypeTag { + fn from_u32(v: u32) -> Option { + match v { + 1 => Some(Self::String), + 2 => Some(Self::Bool), + 3 => Some(Self::Double), + 4 => Some(Self::Int), + 5 => Some(Self::Bytes), + 6 => Some(Self::Array), + 7 => Some(Self::KeyValueList), + _ => None, + } + } +} + +fn decode_any_value(rd: &mut R, table: &mut StringTable) -> Result { + let raw: u32 = rmp::decode::read_int(rd).map_err(nvr_err)?; + let tag = AnyValueTypeTag::from_u32(raw).ok_or(DeserializeError::UnknownAnyValueType(raw))?; + match tag { + AnyValueTypeTag::String => Ok(RawAnyValue::String(decode_streaming_string(rd, table)?)), + AnyValueTypeTag::Bool => Ok(RawAnyValue::Bool(rmp::decode::read_bool(rd).map_err(vr_err)?)), + AnyValueTypeTag::Double => Ok(RawAnyValue::Double(rmp::decode::read_f64(rd).map_err(vr_err)?)), + AnyValueTypeTag::Int => { + let v: i64 = rmp::decode::read_int(rd).map_err(nvr_err)?; + Ok(RawAnyValue::Int(v)) + } + AnyValueTypeTag::Bytes => { + let bin_len = rmp::decode::read_bin_len(rd).map_err(vr_err)?; + let mut buf = vec![0u8; bin_len as usize]; + rd.read_exact(&mut buf).map_err(|_| DeserializeError::UnexpectedEof)?; + Ok(RawAnyValue::Bytes(buf)) + } + AnyValueTypeTag::Array => { + let num_elements = rmp::decode::read_array_len(rd).map_err(vr_err)?; + if num_elements as u64 > MAX_SIZE { + return Err(DeserializeError::LimitExceeded(num_elements as u64)); + } + if num_elements % 2 != 0 { + return Err(DeserializeError::InvalidArrayElementCount(num_elements)); + } + let mut values = Vec::with_capacity(num_elements as usize / 2); + for _ in 0..num_elements / 2 { + values.push(decode_any_value(rd, table)?); + } + Ok(RawAnyValue::Array(values)) + } + AnyValueTypeTag::KeyValueList => Ok(RawAnyValue::KeyValueList(decode_attributes(rd, table)?)), + } +} + +// ── Wire field-number constants ───────────────────────────────────────────── + +mod span_link { + pub const FIELD_TRACE_ID: u32 = 1; + pub const FIELD_SPAN_ID: u32 = 2; + pub const FIELD_ATTRIBUTES: u32 = 3; + pub const FIELD_TRACESTATE: u32 = 4; + pub const FIELD_FLAGS: u32 = 5; +} + +mod span_event { + pub const FIELD_TIME_UNIX_NANO: u32 = 1; + pub const FIELD_NAME: u32 = 2; + pub const FIELD_ATTRIBUTES: u32 = 3; +} + +mod span { + pub const FIELD_SERVICE: u32 = 1; + pub const FIELD_NAME: u32 = 2; + pub const FIELD_RESOURCE: u32 = 3; + pub const FIELD_SPAN_ID: u32 = 4; + pub const FIELD_PARENT_ID: u32 = 5; + pub const FIELD_START: u32 = 6; + pub const FIELD_DURATION: u32 = 7; + pub const FIELD_ERROR: u32 = 8; + pub const FIELD_ATTRIBUTES: u32 = 9; + pub const FIELD_TYPE: u32 = 10; + pub const FIELD_LINKS: u32 = 11; + pub const FIELD_EVENTS: u32 = 12; + pub const FIELD_ENV: u32 = 13; + pub const FIELD_VERSION: u32 = 14; + pub const FIELD_COMPONENT: u32 = 15; + pub const FIELD_KIND: u32 = 16; +} + +mod trace_chunk { + pub const FIELD_PRIORITY: u32 = 1; + pub const FIELD_ORIGIN: u32 = 2; + pub const FIELD_ATTRIBUTES: u32 = 3; + pub const FIELD_SPANS: u32 = 4; + pub const FIELD_DROPPED_TRACE: u32 = 5; + pub const FIELD_TRACE_ID: u32 = 6; + pub const FIELD_SAMPLING_MECHANISM: u32 = 7; +} + +mod tracer_payload { + pub const FIELD_STRINGS: u32 = 1; + pub const FIELD_CONTAINER_ID: u32 = 2; + pub const FIELD_LANGUAGE_NAME: u32 = 3; + pub const FIELD_LANGUAGE_VERSION: u32 = 4; + pub const FIELD_TRACER_VERSION: u32 = 5; + pub const FIELD_RUNTIME_ID: u32 = 6; + pub const FIELD_ENV: u32 = 7; + pub const FIELD_HOSTNAME: u32 = 8; + pub const FIELD_APP_VERSION: u32 = 9; + pub const FIELD_ATTRIBUTES: u32 = 10; + pub const FIELD_CHUNKS: u32 = 11; +} + +// ── SpanLink / SpanEvent ──────────────────────────────────────────────────── + +fn decode_span_link(rd: &mut R, table: &mut StringTable) -> Result { + let map_len = rmp::decode::read_map_len(rd).map_err(vr_err)?; + if map_len as u64 > MAX_SIZE { + return Err(DeserializeError::LimitExceeded(map_len as u64)); + } + + let mut link = RawSpanLink { + trace_id_high: 0, + trace_id_low: 0, + span_id: 0, + attributes: Vec::new(), + tracestate: 0, + flags: 0, + }; + + for _ in 0..map_len { + let field_num: u32 = rmp::decode::read_int(rd).map_err(nvr_err)?; + match field_num { + span_link::FIELD_TRACE_ID => { + let bin_len = rmp::decode::read_bin_len(rd).map_err(vr_err)?; + if bin_len > 16 { + return Err(DeserializeError::InvalidTraceIdLength(bin_len)); + } + let mut buf = [0u8; 16]; + let offset = 16 - bin_len as usize; + rd.read_exact(&mut buf[offset..]).map_err(|_| DeserializeError::UnexpectedEof)?; + link.trace_id_high = u64::from_be_bytes(buf[..8].try_into().unwrap()); + link.trace_id_low = u64::from_be_bytes(buf[8..].try_into().unwrap()); + } + span_link::FIELD_SPAN_ID => link.span_id = rmp::decode::read_int(rd).map_err(nvr_err)?, + span_link::FIELD_ATTRIBUTES => link.attributes = decode_attributes(rd, table)?, + span_link::FIELD_TRACESTATE => link.tracestate = decode_streaming_string(rd, table)?, + span_link::FIELD_FLAGS => link.flags = rmp::decode::read_int(rd).map_err(nvr_err)?, + _ => { + skip_msgpack_value(rd)?; + } + } + } + Ok(link) +} + +fn decode_span_event(rd: &mut R, table: &mut StringTable) -> Result { + let map_len = rmp::decode::read_map_len(rd).map_err(vr_err)?; + if map_len as u64 > MAX_SIZE { + return Err(DeserializeError::LimitExceeded(map_len as u64)); + } + + let mut event = RawSpanEvent { + time_unix_nano: 0, + name: 0, + attributes: Vec::new(), + }; + + for _ in 0..map_len { + let field_num: u32 = rmp::decode::read_int(rd).map_err(nvr_err)?; + match field_num { + span_event::FIELD_TIME_UNIX_NANO => event.time_unix_nano = rmp::decode::read_int(rd).map_err(nvr_err)?, + span_event::FIELD_NAME => event.name = decode_streaming_string(rd, table)?, + span_event::FIELD_ATTRIBUTES => event.attributes = decode_attributes(rd, table)?, + _ => { + skip_msgpack_value(rd)?; + } + } + } + Ok(event) +} + +// ── Span ──────────────────────────────────────────────────────────────────── + +fn decode_span(rd: &mut R, table: &mut StringTable) -> Result { + let map_len = rmp::decode::read_map_len(rd).map_err(vr_err)?; + if map_len as u64 > MAX_SIZE { + return Err(DeserializeError::LimitExceeded(map_len as u64)); + } + + let mut s = RawSpan { + service: 0, + name: 0, + resource: 0, + span_id: 0, + parent_id: 0, + start: 0, + duration: 0, + error: false, + attributes: Vec::new(), + span_type: 0, + links: Vec::new(), + events: Vec::new(), + env: 0, + version: 0, + component: 0, + kind: 0, + }; + + for _ in 0..map_len { + let field_num: u32 = rmp::decode::read_int(rd).map_err(nvr_err)?; + match field_num { + span::FIELD_SERVICE => s.service = decode_streaming_string(rd, table)?, + span::FIELD_NAME => s.name = decode_streaming_string(rd, table)?, + span::FIELD_RESOURCE => s.resource = decode_streaming_string(rd, table)?, + span::FIELD_SPAN_ID => s.span_id = rmp::decode::read_int(rd).map_err(nvr_err)?, + span::FIELD_PARENT_ID => s.parent_id = rmp::decode::read_int(rd).map_err(nvr_err)?, + span::FIELD_START => s.start = rmp::decode::read_int(rd).map_err(nvr_err)?, + span::FIELD_DURATION => s.duration = rmp::decode::read_int(rd).map_err(nvr_err)?, + span::FIELD_ERROR => s.error = rmp::decode::read_bool(rd).map_err(vr_err)?, + span::FIELD_ATTRIBUTES => s.attributes = decode_attributes(rd, table)?, + span::FIELD_TYPE => s.span_type = decode_streaming_string(rd, table)?, + span::FIELD_LINKS => { + let arr_len = rmp::decode::read_array_len(rd).map_err(vr_err)?; + if arr_len as u64 > MAX_SIZE { + return Err(DeserializeError::LimitExceeded(arr_len as u64)); + } + s.links = (0..arr_len) + .map(|_| decode_span_link(rd, table)) + .collect::>()?; + } + span::FIELD_EVENTS => { + let arr_len = rmp::decode::read_array_len(rd).map_err(vr_err)?; + if arr_len as u64 > MAX_SIZE { + return Err(DeserializeError::LimitExceeded(arr_len as u64)); + } + s.events = (0..arr_len) + .map(|_| decode_span_event(rd, table)) + .collect::>()?; + } + span::FIELD_ENV => s.env = decode_streaming_string(rd, table)?, + span::FIELD_VERSION => s.version = decode_streaming_string(rd, table)?, + span::FIELD_COMPONENT => s.component = decode_streaming_string(rd, table)?, + span::FIELD_KIND => s.kind = rmp::decode::read_int(rd).map_err(nvr_err)?, + _ => { + skip_msgpack_value(rd)?; + } + } + } + Ok(s) +} + +// ── TraceChunk ────────────────────────────────────────────────────────────── + +fn decode_chunk(rd: &mut R, table: &mut StringTable) -> Result { + let map_len = rmp::decode::read_map_len(rd).map_err(vr_err)?; + if map_len as u64 > MAX_SIZE { + return Err(DeserializeError::LimitExceeded(map_len as u64)); + } + + let mut chunk = RawTraceChunk { + priority: 0, + origin: 0, + attributes: Vec::new(), + spans: Vec::new(), + dropped_trace: false, + trace_id_high: 0, + trace_id_low: 0, + sampling_mechanism: 0, + }; + + for _ in 0..map_len { + let field_num: u32 = rmp::decode::read_int(rd).map_err(nvr_err)?; + match field_num { + trace_chunk::FIELD_PRIORITY => chunk.priority = rmp::decode::read_int(rd).map_err(nvr_err)?, + trace_chunk::FIELD_ORIGIN => chunk.origin = decode_streaming_string(rd, table)?, + trace_chunk::FIELD_ATTRIBUTES => chunk.attributes = decode_attributes(rd, table)?, + trace_chunk::FIELD_SPANS => { + let arr_len = rmp::decode::read_array_len(rd).map_err(vr_err)?; + if arr_len as u64 > MAX_SIZE { + return Err(DeserializeError::LimitExceeded(arr_len as u64)); + } + chunk.spans = (0..arr_len).map(|_| decode_span(rd, table)).collect::>()?; + } + trace_chunk::FIELD_DROPPED_TRACE => chunk.dropped_trace = rmp::decode::read_bool(rd).map_err(vr_err)?, + trace_chunk::FIELD_TRACE_ID => { + let bin_len = rmp::decode::read_bin_len(rd).map_err(vr_err)?; + if bin_len > 16 { + return Err(DeserializeError::InvalidTraceIdLength(bin_len)); + } + let mut buf = [0u8; 16]; + let offset = 16 - bin_len as usize; + rd.read_exact(&mut buf[offset..]).map_err(|_| DeserializeError::UnexpectedEof)?; + chunk.trace_id_high = u64::from_be_bytes(buf[..8].try_into().unwrap()); + chunk.trace_id_low = u64::from_be_bytes(buf[8..].try_into().unwrap()); + } + trace_chunk::FIELD_SAMPLING_MECHANISM => { + chunk.sampling_mechanism = rmp::decode::read_int(rd).map_err(nvr_err)? + } + _ => { + skip_msgpack_value(rd)?; + } + } + } + Ok(chunk) +} + +// ── TracerPayload ─────────────────────────────────────────────────────────── + +pub(super) fn decode_tracer_payload(rd: &mut R) -> Result { + let map_len = rmp::decode::read_map_len(rd).map_err(vr_err)?; + if map_len as u64 > MAX_SIZE { + return Err(DeserializeError::LimitExceeded(map_len as u64)); + } + + let mut table = StringTable::new(); + let mut container_id = 0u32; + let mut language_name = 0u32; + let mut language_version = 0u32; + let mut tracer_version = 0u32; + let mut runtime_id = 0u32; + let mut env = 0u32; + let mut hostname = 0u32; + let mut app_version = 0u32; + let mut attributes = Vec::new(); + let mut chunks = Vec::new(); + + for _ in 0..map_len { + let field_num: u32 = rmp::decode::read_int(rd).map_err(nvr_err)?; + match field_num { + tracer_payload::FIELD_STRINGS => { + return Err(DeserializeError::UnexpectedStringsField); + } + tracer_payload::FIELD_CONTAINER_ID => { + container_id = decode_streaming_string(rd, &mut table)?; + } + tracer_payload::FIELD_LANGUAGE_NAME => { + language_name = decode_streaming_string(rd, &mut table)?; + } + tracer_payload::FIELD_LANGUAGE_VERSION => { + language_version = decode_streaming_string(rd, &mut table)?; + } + tracer_payload::FIELD_TRACER_VERSION => { + tracer_version = decode_streaming_string(rd, &mut table)?; + } + tracer_payload::FIELD_RUNTIME_ID => { + runtime_id = decode_streaming_string(rd, &mut table)?; + } + tracer_payload::FIELD_ENV => { + env = decode_streaming_string(rd, &mut table)?; + } + tracer_payload::FIELD_HOSTNAME => { + hostname = decode_streaming_string(rd, &mut table)?; + } + tracer_payload::FIELD_APP_VERSION => { + app_version = decode_streaming_string(rd, &mut table)?; + } + tracer_payload::FIELD_ATTRIBUTES => { + attributes = decode_attributes(rd, &mut table)?; + } + tracer_payload::FIELD_CHUNKS => { + let arr_len = rmp::decode::read_array_len(rd).map_err(vr_err)?; + if arr_len as u64 > MAX_SIZE { + return Err(DeserializeError::LimitExceeded(arr_len as u64)); + } + chunks = (0..arr_len) + .map(|_| decode_chunk(rd, &mut table)) + .collect::>()?; + } + _ => { + skip_msgpack_value(rd)?; + } + } + } + + Ok(RawTracerPayload { + string_table: table, + container_id, + language_name, + language_version, + tracer_version, + runtime_id, + env, + hostname, + app_version, + attributes, + chunks, + }) +} + +// ── Tests ─────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + + // ── Encoding helpers ──────────────────────────────────────────────────── + + fn encode_fixmap_header(count: u8) -> Vec { + assert!( + count <= 15, + "fixmap supports 0-15 entries; use encode_map16_header for more" + ); + vec![0x80 | (count & 0x0f)] + } + + fn encode_map16_header(count: u16) -> Vec { + let mut b = vec![0xde]; + b.extend_from_slice(&count.to_be_bytes()); + b + } + + fn encode_fixarray_header(count: u8) -> Vec { + assert!( + count <= 15, + "fixarray supports 0-15 entries; use encode_array16_header for more" + ); + vec![0x90 | (count & 0x0f)] + } + + fn encode_array16_header(count: u16) -> Vec { + let mut b = vec![0xdc]; + b.extend_from_slice(&count.to_be_bytes()); + b + } + + fn encode_fixpos(v: u8) -> Vec { + vec![v] + } + + fn encode_u8(v: u8) -> Vec { + vec![0xcc, v] + } + + fn encode_i32(v: i32) -> Vec { + let mut b = vec![0xd2]; + b.extend_from_slice(&v.to_be_bytes()); + b + } + + fn encode_i64(v: i64) -> Vec { + let mut b = vec![0xd3]; + b.extend_from_slice(&v.to_be_bytes()); + b + } + + fn encode_u64(v: u64) -> Vec { + let mut b = vec![0xcf]; + b.extend_from_slice(&v.to_be_bytes()); + b + } + + fn encode_f64(v: f64) -> Vec { + let mut b = vec![0xcb]; + b.extend_from_slice(&v.to_bits().to_be_bytes()); + b + } + + fn encode_bool(v: bool) -> Vec { + vec![if v { 0xc3 } else { 0xc2 }] + } + + fn encode_nil() -> Vec { + vec![0xc0] + } + + fn encode_fixstr(s: &str) -> Vec { + assert!(s.len() <= 31, "use encode_str8 for longer strings"); + let mut b = vec![0xa0 | s.len() as u8]; + b.extend_from_slice(s.as_bytes()); + b + } + + fn encode_str8(s: &str) -> Vec { + assert!(s.len() <= 255); + let mut b = vec![0xd9, s.len() as u8]; + b.extend_from_slice(s.as_bytes()); + b + } + + fn encode_bin8(data: &[u8]) -> Vec { + assert!(data.len() <= 255); + let mut b = vec![0xc4, data.len() as u8]; + b.extend_from_slice(data); + b + } + + fn encode_trace_id(high: u64, low: u64) -> Vec { + let mut data = Vec::with_capacity(16); + data.extend_from_slice(&high.to_be_bytes()); + data.extend_from_slice(&low.to_be_bytes()); + encode_bin8(&data) + } + + fn concat(parts: &[Vec]) -> Vec { + parts.iter().flat_map(|p| p.iter().copied()).collect() + } + + // ── StringTable tests ─────────────────────────────────────────────────── + + #[test] + fn string_table_index_zero_is_empty() { + let table = StringTable::new(); + assert_eq!(table.get(0), Some("")); + } + + #[test] + fn string_table_push_and_get() { + let mut table = StringTable::new(); + let idx = table.push("hello".to_owned()); + assert_eq!(idx, 1); + assert_eq!(table.get(1), Some("hello")); + } + + #[test] + fn string_table_out_of_bounds_returns_none() { + let table = StringTable::new(); + assert_eq!(table.get(1), None); + assert_eq!(table.get(999), None); + } + + // ── decode_streaming_string ───────────────────────────────────────────── + + #[test] + fn streaming_string_new_inline_string_added_to_table() { + let mut table = StringTable::new(); + let data = encode_fixstr("hello"); + let mut rd = data.as_slice(); + let idx = decode_streaming_string(&mut rd, &mut table).unwrap(); + assert_eq!(idx, 1); + assert_eq!(table.get(1), Some("hello")); + } + + #[test] + fn streaming_string_back_reference_resolves_correctly() { + let mut table = StringTable::new(); + table.push("world".to_owned()); + + let data = encode_fixpos(1); + let mut rd = data.as_slice(); + let idx = decode_streaming_string(&mut rd, &mut table).unwrap(); + assert_eq!(idx, 1); + } + + #[test] + fn streaming_string_index_zero_resolves_to_empty() { + let mut table = StringTable::new(); + let data = encode_fixpos(0); + let mut rd = data.as_slice(); + let idx = decode_streaming_string(&mut rd, &mut table).unwrap(); + assert_eq!(idx, 0); + assert_eq!(table.get(0), Some("")); + } + + #[test] + fn streaming_string_out_of_bounds_index_is_error() { + let mut table = StringTable::new(); + let data = encode_fixpos(5); + let mut rd = data.as_slice(); + let err = decode_streaming_string(&mut rd, &mut table).unwrap_err(); + assert!(matches!(err, DeserializeError::InvalidStringIndex(5))); + } + + #[test] + fn streaming_string_u8_encoded_index() { + let mut table = StringTable::new(); + table.push("a".to_owned()); + + let data = encode_u8(1); + let mut rd = data.as_slice(); + let idx = decode_streaming_string(&mut rd, &mut table).unwrap(); + assert_eq!(idx, 1); + } + + #[test] + fn streaming_string_str8_encoding() { + let mut table = StringTable::new(); + let s = "x".repeat(50); + let data = encode_str8(&s); + let mut rd = data.as_slice(); + let idx = decode_streaming_string(&mut rd, &mut table).unwrap(); + assert_eq!(idx, 1); + assert_eq!(table.get(1), Some(s.as_str())); + } + + // ── Field 1 (strings) is always an error ─────────────────────────────── + + #[test] + fn payload_field1_strings_is_error() { + let strings_arr = concat(&[encode_fixarray_header(1), encode_fixstr("svc")]); + let data = concat(&[encode_fixmap_header(1), encode_fixpos(1), strings_arr]); + let mut rd = data.as_slice(); + let err = decode_tracer_payload(&mut rd).unwrap_err(); + assert!(matches!(err, DeserializeError::UnexpectedStringsField)); + } + + // ── AnyValue decoding ─────────────────────────────────────────────────── + + fn decode_av(data: &[u8]) -> RawAnyValue { + let mut table = StringTable::new(); + let mut rd = data; + decode_any_value(&mut rd, &mut table).unwrap() + } + + #[test] + fn anyvalue_type1_string_inline() { + let mut table = StringTable::new(); + let data = concat(&[encode_fixpos(1), encode_fixstr("hello")]); + let mut rd = data.as_slice(); + let av = decode_any_value(&mut rd, &mut table).unwrap(); + assert!(matches!(av, RawAnyValue::String(1))); + assert_eq!(table.get(1), Some("hello")); + } + + #[test] + fn anyvalue_type1_string_via_index() { + let mut table = StringTable::new(); + table.push("hello".to_owned()); + let data = concat(&[encode_fixpos(1), encode_fixpos(1)]); + let mut rd = data.as_slice(); + let av = decode_any_value(&mut rd, &mut table).unwrap(); + assert!(matches!(av, RawAnyValue::String(1))); + } + + #[test] + fn anyvalue_type2_bool_true() { + let data = concat(&[encode_fixpos(2), encode_bool(true)]); + assert!(matches!(decode_av(&data), RawAnyValue::Bool(true))); + } + + #[test] + fn anyvalue_type2_bool_false() { + let data = concat(&[encode_fixpos(2), encode_bool(false)]); + assert!(matches!(decode_av(&data), RawAnyValue::Bool(false))); + } + + #[test] + fn anyvalue_type3_double() { + let data = concat(&[encode_fixpos(3), encode_f64(1.23)]); + let RawAnyValue::Double(v) = decode_av(&data) else { + panic!("expected Double") + }; + assert!((v - 1.23).abs() < 1e-9); + } + + #[test] + fn anyvalue_type4_int() { + let data = concat(&[encode_fixpos(4), encode_i64(-42)]); + assert!(matches!(decode_av(&data), RawAnyValue::Int(-42))); + } + + #[test] + fn anyvalue_type5_bytes() { + let data = concat(&[encode_fixpos(5), encode_bin8(&[0xde, 0xad, 0xbe, 0xef])]); + let RawAnyValue::Bytes(b) = decode_av(&data) else { + panic!("expected Bytes") + }; + assert_eq!(b, &[0xde, 0xad, 0xbe, 0xef]); + } + + #[test] + fn anyvalue_type6_array() { + let data = concat(&[ + encode_fixpos(6), + encode_fixarray_header(4), + encode_fixpos(2), + encode_bool(true), + encode_fixpos(4), + encode_fixpos(7), + ]); + let RawAnyValue::Array(arr) = decode_av(&data) else { + panic!("expected Array") + }; + assert_eq!(arr.len(), 2); + assert!(matches!(arr[0], RawAnyValue::Bool(true))); + assert!(matches!(arr[1], RawAnyValue::Int(7))); + } + + #[test] + fn anyvalue_type6_odd_element_count_is_error() { + let data = concat(&[ + encode_fixpos(6), + encode_fixarray_header(3), + encode_fixpos(2), + encode_bool(true), + encode_fixpos(4), + ]); + let mut table = StringTable::new(); + let mut rd = data.as_slice(); + let err = decode_any_value(&mut rd, &mut table).unwrap_err(); + assert!(matches!(err, DeserializeError::InvalidArrayElementCount(3))); + } + + #[test] + fn anyvalue_type7_kvlist() { + let data = concat(&[ + encode_fixpos(7), + encode_fixarray_header(3), + encode_fixstr("k"), + encode_fixpos(2), + encode_bool(true), + ]); + let mut table = StringTable::new(); + let mut rd = data.as_slice(); + let RawAnyValue::KeyValueList(kvl) = decode_any_value(&mut rd, &mut table).unwrap() else { + panic!("expected KeyValueList") + }; + assert_eq!(kvl.len(), 1); + assert_eq!(table.get(kvl[0].key), Some("k")); + assert!(matches!(kvl[0].value, RawAnyValue::Bool(true))); + } + + #[test] + fn anyvalue_unknown_type_tag_is_error() { + let data = concat(&[encode_fixpos(99)]); + let mut table = StringTable::new(); + let mut rd = data.as_slice(); + let err = decode_any_value(&mut rd, &mut table).unwrap_err(); + assert!(matches!(err, DeserializeError::UnknownAnyValueType(99))); + } + + // ── Attribute array ───────────────────────────────────────────────────── + + #[test] + fn attributes_empty_array() { + let data = encode_fixarray_header(0); + let mut table = StringTable::new(); + let mut rd = data.as_slice(); + let attrs = decode_attributes(&mut rd, &mut table).unwrap(); + assert!(attrs.is_empty()); + } + + #[test] + fn attributes_multiple_mixed_types() { + let data = concat(&[ + encode_fixarray_header(6), + encode_fixstr("k1"), + encode_fixpos(2), + encode_bool(true), + encode_fixstr("k2"), + encode_fixpos(4), + encode_fixpos(99), + ]); + let mut table = StringTable::new(); + let mut rd = data.as_slice(); + let attrs = decode_attributes(&mut rd, &mut table).unwrap(); + assert_eq!(attrs.len(), 2); + assert_eq!(table.get(attrs[0].key), Some("k1")); + assert!(matches!(attrs[0].value, RawAnyValue::Bool(true))); + assert_eq!(table.get(attrs[1].key), Some("k2")); + assert!(matches!(attrs[1].value, RawAnyValue::Int(99))); + } + + #[test] + fn attributes_non_multiple_of_three_is_error() { + let data = encode_fixarray_header(4); + let mut table = StringTable::new(); + let mut rd = data.as_slice(); + let err = decode_attributes(&mut rd, &mut table).unwrap_err(); + assert!(matches!(err, DeserializeError::InvalidAttributeCount(4))); + } + + // ── Span decoding ─────────────────────────────────────────────────────── + + #[test] + fn span_all_fields_round_trip() { + let data = concat(&[ + encode_map16_header(16), + encode_fixpos(1), + encode_fixstr("my-svc"), + encode_fixpos(2), + encode_fixstr("http.request"), + encode_fixpos(3), + encode_fixstr("/api/v1"), + encode_fixpos(4), + encode_u64(0xdeadbeef_abbaabba), + encode_fixpos(5), + encode_u64(0x0102030405060708), + encode_fixpos(6), + encode_u64(1_700_000_000_000_000_000), + encode_fixpos(7), + encode_u64(500_000), + encode_fixpos(8), + encode_bool(true), + encode_fixpos(9), + encode_fixarray_header(0), + encode_fixpos(10), + encode_fixstr("web"), + encode_fixpos(11), + encode_fixarray_header(0), + encode_fixpos(12), + encode_fixarray_header(0), + encode_fixpos(13), + encode_fixstr("prod"), + encode_fixpos(14), + encode_fixstr("1.0.0"), + encode_fixpos(15), + encode_fixstr("net/http"), + encode_fixpos(16), + encode_fixpos(1), + ]); + + let mut table = StringTable::new(); + let mut rd = data.as_slice(); + let span = decode_span(&mut rd, &mut table).unwrap(); + + assert_eq!(table.get(span.service), Some("my-svc")); + assert_eq!(table.get(span.name), Some("http.request")); + assert_eq!(table.get(span.resource), Some("/api/v1")); + assert_eq!(span.span_id, 0xdeadbeef_abbaabba); + assert_eq!(span.parent_id, 0x0102030405060708); + assert_eq!(span.start, 1_700_000_000_000_000_000); + assert_eq!(span.duration, 500_000); + assert!(span.error); + assert_eq!(table.get(span.span_type), Some("web")); + assert_eq!(table.get(span.env), Some("prod")); + assert_eq!(table.get(span.version), Some("1.0.0")); + assert_eq!(table.get(span.component), Some("net/http")); + assert_eq!(span.kind, 1); + assert!(span.links.is_empty()); + assert!(span.events.is_empty()); + } + + #[test] + fn span_unknown_field_is_skipped() { + let data = concat(&[ + encode_fixmap_header(2), + encode_fixpos(4), + encode_u64(42), + encode_fixpos(99), + encode_nil(), + ]); + let mut table = StringTable::new(); + let mut rd = data.as_slice(); + let span = decode_span(&mut rd, &mut table).unwrap(); + assert_eq!(span.span_id, 42); + } + + #[test] + fn chunk_trace_id_splits_into_high_low() { + let trace_id_high: u64 = 0xaaaaaaaaaaaaaaaa; + let trace_id_low: u64 = 0xbbbbbbbbbbbbbbbb; + let data = concat(&[ + encode_fixmap_header(1), + encode_fixpos(6), + encode_trace_id(trace_id_high, trace_id_low), + ]); + let mut table = StringTable::new(); + let mut rd = data.as_slice(); + let chunk = decode_chunk(&mut rd, &mut table).unwrap(); + assert_eq!(chunk.trace_id_high, trace_id_high); + assert_eq!(chunk.trace_id_low, trace_id_low); + } + + #[test] + fn chunk_short_trace_id_right_aligned() { + // 8-byte trace ID (64-bit) should land in the low half; high half stays zero. + let mut data = vec![0xde, 0x00, 0x01]; // map16 with 1 entry + data.push(6); // FIELD_TRACE_ID + let trace_bytes: u64 = 0xcafe_babe_1234_5678; + let mut bin = vec![0xc4, 8]; // bin8, 8 bytes + bin.extend_from_slice(&trace_bytes.to_be_bytes()); + data.extend_from_slice(&bin); + let mut table = StringTable::new(); + let mut rd = data.as_slice(); + let chunk = decode_chunk(&mut rd, &mut table).unwrap(); + assert_eq!(chunk.trace_id_high, 0); + assert_eq!(chunk.trace_id_low, trace_bytes); + } + + #[test] + fn chunk_priority_negative() { + let data = concat(&[encode_fixmap_header(1), encode_fixpos(1), encode_i32(-1)]); + let mut table = StringTable::new(); + let mut rd = data.as_slice(); + let chunk = decode_chunk(&mut rd, &mut table).unwrap(); + assert_eq!(chunk.priority, -1); + } + + #[test] + fn chunk_dropped_trace_bool() { + let data = concat(&[encode_fixmap_header(1), encode_fixpos(5), encode_bool(true)]); + let mut table = StringTable::new(); + let mut rd = data.as_slice(); + let chunk = decode_chunk(&mut rd, &mut table).unwrap(); + assert!(chunk.dropped_trace); + } + + // ── TracerPayload ─────────────────────────────────────────────────────── + + #[test] + fn payload_empty_map_decodes_without_error() { + let data = encode_fixmap_header(0); + let mut rd = data.as_slice(); + let payload = decode_tracer_payload(&mut rd).unwrap(); + assert!(payload.chunks.is_empty()); + } + + #[test] + fn payload_all_string_fields() { + let data = concat(&[ + encode_fixmap_header(8), + encode_fixpos(2), + encode_fixstr("ctr-123"), + encode_fixpos(3), + encode_fixstr("python"), + encode_fixpos(4), + encode_fixstr("3.11"), + encode_fixpos(5), + encode_fixstr("ddtrace-1.0"), + encode_fixpos(6), + encode_fixstr("runtime-abc"), + encode_fixpos(7), + encode_fixstr("staging"), + encode_fixpos(8), + encode_fixstr("host-1"), + encode_fixpos(9), + encode_fixstr("v2"), + ]); + let mut rd = data.as_slice(); + let p = decode_tracer_payload(&mut rd).unwrap(); + + assert_eq!(p.string_table.get(p.container_id), Some("ctr-123")); + assert_eq!(p.string_table.get(p.language_name), Some("python")); + assert_eq!(p.string_table.get(p.language_version), Some("3.11")); + assert_eq!(p.string_table.get(p.tracer_version), Some("ddtrace-1.0")); + assert_eq!(p.string_table.get(p.runtime_id), Some("runtime-abc")); + assert_eq!(p.string_table.get(p.env), Some("staging")); + assert_eq!(p.string_table.get(p.hostname), Some("host-1")); + assert_eq!(p.string_table.get(p.app_version), Some("v2")); + } + + #[test] + fn payload_multiple_chunks() { + let chunk_data = encode_fixmap_header(0); + let data = concat(&[ + encode_fixmap_header(1), + encode_fixpos(11), + concat(&[encode_fixarray_header(2), chunk_data.clone(), chunk_data]), + ]); + let mut rd = data.as_slice(); + let payload = decode_tracer_payload(&mut rd).unwrap(); + assert_eq!(payload.chunks.len(), 2); + } + + // ── Error / structural cases ──────────────────────────────────────────── + + #[test] + fn empty_slice_is_error() { + let data: &[u8] = &[]; + let mut rd = data; + let err = decode_tracer_payload(&mut rd).unwrap_err(); + assert!(matches!(err, DeserializeError::UnexpectedEof)); + } + + #[test] + fn truncated_input_is_error() { + let data = vec![0x81]; + let mut rd = data.as_slice(); + let err = decode_tracer_payload(&mut rd).unwrap_err(); + assert!(matches!(err, DeserializeError::UnexpectedEof)); + } + + #[test] + fn wrong_type_for_map_header_is_error() { + let data = encode_fixstr("oops"); + let mut rd = data.as_slice(); + let err = decode_tracer_payload(&mut rd).unwrap_err(); + assert!(matches!(err, DeserializeError::UnexpectedMarker(_))); + } + + #[test] + fn attribute_count_exceeds_limit_is_error() { + let count = (MAX_SIZE + 1) as u32; + let mut b = vec![0xdd]; + b.extend_from_slice(&count.to_be_bytes()); + let mut table = StringTable::new(); + let mut rd = b.as_slice(); + let err = decode_attributes(&mut rd, &mut table).unwrap_err(); + assert!(matches!(err, DeserializeError::LimitExceeded(_))); + } + + // ── skip_msgpack_value ────────────────────────────────────────────────── + + #[test] + fn skip_nil() { + let data = encode_nil(); + let mut rd = data.as_slice(); + skip_msgpack_value(&mut rd).unwrap(); + assert!(rd.is_empty()); + } + + #[test] + fn skip_bool() { + for b in [true, false] { + let data = encode_bool(b); + let mut rd = data.as_slice(); + skip_msgpack_value(&mut rd).unwrap(); + assert!(rd.is_empty()); + } + } + + #[test] + fn skip_int_variants() { + for data in [vec![0x05], encode_u8(200), encode_i32(-1), encode_u64(u64::MAX)] { + let mut rd = data.as_slice(); + skip_msgpack_value(&mut rd).unwrap(); + assert!(rd.is_empty()); + } + } + + #[test] + fn skip_str() { + let data = encode_fixstr("hello"); + let mut rd = data.as_slice(); + skip_msgpack_value(&mut rd).unwrap(); + assert!(rd.is_empty()); + } + + #[test] + fn skip_bin() { + let data = encode_bin8(&[1, 2, 3, 4]); + let mut rd = data.as_slice(); + skip_msgpack_value(&mut rd).unwrap(); + assert!(rd.is_empty()); + } + + #[test] + fn skip_nested_array() { + let data = concat(&[encode_fixarray_header(3), encode_nil(), encode_nil(), encode_nil()]); + let mut rd = data.as_slice(); + skip_msgpack_value(&mut rd).unwrap(); + assert!(rd.is_empty()); + } + + #[test] + fn skip_nested_map() { + let data = concat(&[encode_fixmap_header(1), encode_fixpos(1), encode_nil()]); + let mut rd = data.as_slice(); + skip_msgpack_value(&mut rd).unwrap(); + assert!(rd.is_empty()); + } + + #[test] + fn skip_deeply_nested() { + let inner1 = concat(&[encode_fixarray_header(2), encode_nil(), encode_nil()]); + let inner2 = concat(&[encode_fixarray_header(1), encode_nil()]); + let data = concat(&[encode_fixarray_header(2), inner1, inner2]); + let mut rd = data.as_slice(); + skip_msgpack_value(&mut rd).unwrap(); + assert!(rd.is_empty()); + } + + // ── Realistic golden-input test ───────────────────────────────────────── + + fn test_payload() -> Vec { + let simple_span = |env_str: &str| { + concat(&[ + encode_fixmap_header(8), + encode_fixpos(span::FIELD_SERVICE as u8), + encode_fixstr("my-service"), + encode_fixpos(span::FIELD_NAME as u8), + encode_fixstr("http.get"), + encode_fixpos(span::FIELD_RESOURCE as u8), + encode_fixstr("/users/{id}"), + encode_fixpos(span::FIELD_SPAN_ID as u8), + encode_u64(0xaaaa_0000_0000_0001), + encode_fixpos(span::FIELD_DURATION as u8), + encode_u64(100_000_u64), + encode_fixpos(span::FIELD_ERROR as u8), + encode_bool(false), + encode_fixpos(span::FIELD_ATTRIBUTES as u8), + encode_fixarray_header(0), + encode_fixpos(span::FIELD_ENV as u8), + encode_fixstr(env_str), + ]) + }; + + let rich_span = concat(&[ + encode_fixmap_header(4), + encode_fixpos(span::FIELD_SERVICE as u8), + encode_fixstr("my-service"), + encode_fixpos(span::FIELD_NAME as u8), + encode_fixstr("http.get"), + encode_fixpos(span::FIELD_SPAN_ID as u8), + encode_u64(0xbbbb_0000_0000_0002), + encode_fixpos(span::FIELD_ATTRIBUTES as u8), + concat(&[ + encode_array16_header(21), + encode_fixstr("attr-key"), + encode_fixpos(1), + encode_fixstr("some-val"), + encode_fixstr("attr-key"), + encode_fixpos(2), + encode_bool(true), + encode_fixstr("attr-key"), + encode_fixpos(3), + encode_f64(1.5), + encode_fixstr("attr-key"), + encode_fixpos(4), + encode_i64(-1), + encode_fixstr("attr-key"), + encode_fixpos(5), + encode_bin8(&[0xab]), + encode_fixstr("attr-key"), + encode_fixpos(6), + concat(&[ + encode_fixarray_header(4), + encode_fixpos(2), + encode_bool(false), + encode_fixpos(4), + encode_fixpos(0), + ]), + encode_fixstr("attr-key"), + encode_fixpos(7), + concat(&[ + encode_fixarray_header(3), + encode_fixstr("nested-key"), + encode_fixpos(2), + encode_bool(true), + ]), + ]), + ]); + + let linked_span = concat(&[ + encode_fixmap_header(4), + encode_fixpos(span::FIELD_SERVICE as u8), + encode_fixstr("my-service"), + encode_fixpos(span::FIELD_SPAN_ID as u8), + encode_u64(0xcccc_0000_0000_0003), + encode_fixpos(span::FIELD_LINKS as u8), + concat(&[ + encode_fixarray_header(1), + concat(&[ + encode_fixmap_header(3), + encode_fixpos(span_link::FIELD_TRACE_ID as u8), + encode_trace_id(0x1234, 0x5678), + encode_fixpos(span_link::FIELD_SPAN_ID as u8), + encode_u64(0xdeadbeef), + encode_fixpos(span_link::FIELD_FLAGS as u8), + encode_fixpos(1), + ]), + ]), + encode_fixpos(span::FIELD_EVENTS as u8), + concat(&[ + encode_fixarray_header(1), + concat(&[ + encode_fixmap_header(2), + encode_fixpos(span_event::FIELD_TIME_UNIX_NANO as u8), + encode_u64(999_999_999_u64), + encode_fixpos(span_event::FIELD_NAME as u8), + encode_fixstr("my-event"), + ]), + ]), + ]); + + let chunk1 = concat(&[ + encode_fixmap_header(4), + encode_fixpos(trace_chunk::FIELD_PRIORITY as u8), + encode_i32(1), + encode_fixpos(trace_chunk::FIELD_SPANS as u8), + concat(&[encode_fixarray_header(3), simple_span("prod"), rich_span, linked_span]), + encode_fixpos(trace_chunk::FIELD_DROPPED_TRACE as u8), + encode_bool(false), + encode_fixpos(trace_chunk::FIELD_TRACE_ID as u8), + encode_trace_id(0xfeed_face_dead_beef, 0xcafe_babe_1234_5678), + ]); + + let chunk2 = concat(&[ + encode_fixmap_header(3), + encode_fixpos(trace_chunk::FIELD_PRIORITY as u8), + encode_i32(-1), + encode_fixpos(trace_chunk::FIELD_SPANS as u8), + concat(&[ + encode_fixarray_header(3), + simple_span("staging"), + simple_span("staging"), + simple_span("staging"), + ]), + encode_fixpos(trace_chunk::FIELD_DROPPED_TRACE as u8), + encode_bool(true), + ]); + + concat(&[ + encode_fixmap_header(2), + encode_fixpos(tracer_payload::FIELD_HOSTNAME as u8), + encode_fixstr("host-1"), + encode_fixpos(tracer_payload::FIELD_CHUNKS as u8), + concat(&[encode_fixarray_header(2), chunk1, chunk2]), + ]) + } + + #[test] + fn golden_payload_decodes_end_to_end() { + let data = test_payload(); + let mut rd = data.as_slice(); + let payload = decode_tracer_payload(&mut rd).unwrap(); + + assert_eq!(rd.len(), 0, "all bytes should be consumed"); + assert_eq!(payload.string_table.get(payload.hostname), Some("host-1")); + assert_eq!(payload.chunks.len(), 2); + + let c0 = &payload.chunks[0]; + assert_eq!(c0.priority, 1); + assert!(!c0.dropped_trace); + assert_eq!(c0.trace_id_high, 0xfeed_face_dead_beef); + assert_eq!(c0.trace_id_low, 0xcafe_babe_1234_5678); + assert_eq!(c0.spans.len(), 3); + + let rich = &c0.spans[1]; + assert_eq!(rich.attributes.len(), 7); + assert!(matches!(rich.attributes[0].value, RawAnyValue::String(_))); + assert!(matches!(rich.attributes[1].value, RawAnyValue::Bool(true))); + assert!(matches!(rich.attributes[2].value, RawAnyValue::Double(_))); + assert!(matches!(rich.attributes[3].value, RawAnyValue::Int(-1))); + assert!(matches!(rich.attributes[4].value, RawAnyValue::Bytes(_))); + assert!(matches!(rich.attributes[5].value, RawAnyValue::Array(_))); + assert!(matches!(rich.attributes[6].value, RawAnyValue::KeyValueList(_))); + + let linked = &c0.spans[2]; + assert_eq!(linked.links.len(), 1); + assert_eq!(linked.links[0].trace_id_high, 0x1234); + assert_eq!(linked.links[0].trace_id_low, 0x5678); + assert_eq!(linked.links[0].span_id, 0xdeadbeef); + assert_eq!(linked.events.len(), 1); + assert_eq!(linked.events[0].time_unix_nano, 999_999_999); + + let c1 = &payload.chunks[1]; + assert_eq!(c1.priority, -1); + assert!(c1.dropped_trace); + assert_eq!(c1.spans.len(), 3); + } + + fn test_payload_streaming() -> Vec { + let span = |first: bool| { + if first { + concat(&[ + encode_fixmap_header(7), + encode_fixpos(span::FIELD_SERVICE as u8), + encode_fixstr("my-service"), + encode_fixpos(span::FIELD_NAME as u8), + encode_fixstr("http.get"), + encode_fixpos(span::FIELD_RESOURCE as u8), + encode_fixstr("/users/{id}"), + encode_fixpos(span::FIELD_SPAN_ID as u8), + encode_u64(0x0000_0001), + encode_fixpos(span::FIELD_ATTRIBUTES as u8), + encode_fixarray_header(0), + encode_fixpos(span::FIELD_TYPE as u8), + encode_fixstr("web"), + encode_fixpos(span::FIELD_ENV as u8), + encode_fixstr("prod"), + ]) + } else { + concat(&[ + encode_fixmap_header(7), + encode_fixpos(span::FIELD_SERVICE as u8), + encode_fixpos(2), + encode_fixpos(span::FIELD_NAME as u8), + encode_fixpos(3), + encode_fixpos(span::FIELD_RESOURCE as u8), + encode_fixpos(4), + encode_fixpos(span::FIELD_SPAN_ID as u8), + encode_u64(0x0000_0002), + encode_fixpos(span::FIELD_ATTRIBUTES as u8), + encode_fixarray_header(0), + encode_fixpos(span::FIELD_TYPE as u8), + encode_fixpos(5), + encode_fixpos(span::FIELD_ENV as u8), + encode_fixpos(6), + ]) + } + }; + + let chunk = concat(&[ + encode_fixmap_header(2), + encode_fixpos(trace_chunk::FIELD_PRIORITY as u8), + encode_i32(1), + encode_fixpos(trace_chunk::FIELD_SPANS as u8), + concat(&[encode_fixarray_header(3), span(true), span(false), span(false)]), + ]); + + concat(&[ + encode_fixmap_header(2), + encode_fixpos(tracer_payload::FIELD_HOSTNAME as u8), + encode_fixstr("host-1"), + encode_fixpos(tracer_payload::FIELD_CHUNKS as u8), + concat(&[encode_fixarray_header(1), chunk]), + ]) + } + + #[test] + fn golden_streaming_payload_decodes_end_to_end() { + let data = test_payload_streaming(); + let mut rd = data.as_slice(); + let payload = decode_tracer_payload(&mut rd).unwrap(); + + assert_eq!(rd.len(), 0, "all bytes should be consumed"); + assert_eq!(payload.string_table.get(payload.hostname), Some("host-1")); + + assert_eq!(payload.chunks.len(), 1); + let chunk = &payload.chunks[0]; + assert_eq!(chunk.priority, 1); + assert_eq!(chunk.spans.len(), 3); + + for span in &chunk.spans { + assert_eq!(payload.string_table.get(span.service), Some("my-service")); + assert_eq!(payload.string_table.get(span.name), Some("http.get")); + assert_eq!(payload.string_table.get(span.resource), Some("/users/{id}")); + assert_eq!(payload.string_table.get(span.span_type), Some("web")); + assert_eq!(payload.string_table.get(span.env), Some("prod")); + } + } +} diff --git a/lib/saluki-components/src/sources/apm/mod.rs b/lib/saluki-components/src/sources/apm/mod.rs new file mode 100644 index 00000000000..a190c5223c6 --- /dev/null +++ b/lib/saluki-components/src/sources/apm/mod.rs @@ -0,0 +1,628 @@ +use std::net::SocketAddr; +use std::num::NonZeroUsize; +use std::sync::LazyLock; + +use async_trait::async_trait; +use axum::{ + body::Bytes, + extract::State, + http::{HeaderMap, StatusCode}, + response::Response, + routing::{get, post}, + Router, +}; +use memory_accounting::{MemoryBounds, MemoryBoundsBuilder}; +use saluki_config::GenericConfiguration; +use saluki_core::{ + components::{ + sources::{Source, SourceBuilder, SourceContext}, + ComponentContext, + }, + data_model::event::{ + trace::{ + AttributeValue, Span, SpanEvent, SpanLink, Trace, + }, + Event, EventType, + }, + topology::OutputDefinition, +}; + +mod v1_types; +use self::v1_types::{V1AnyValue, V1KeyValue, V1Span, V1SpanEvent, V1SpanLink, V1Trace, V1TraceChunk}; +use saluki_common::collections::FastHashMap; +use saluki_error::{generic_error, GenericError}; +use stringtheory::{interning::GenericMapInterner, MetaString}; +use tokio::{net::TcpListener, sync::mpsc}; +use tracing::{debug, error, info, warn}; + +pub mod sampling_rates; +use self::sampling_rates::{RateResponse, V1SamplingRatesHandle}; + +mod deserialize; +use self::deserialize::{ + decode_tracer_payload, DeserializeError, RawAnyValue, RawKeyValue, RawSpan, RawSpanEvent, RawSpanLink, + RawTraceChunk, RawTracerPayload, +}; + +const DEFAULT_LISTEN_ADDRESS: &str = "0.0.0.0:8126"; + +/// Header sent by tracers reporting how many P0 (AutoDrop) traces were dropped client-side. +const HEADER_CLIENT_DROPPED_P0: &str = "Datadog-Client-Dropped-P0-Traces"; +/// Header used by tracers to report (and the agent to set) the current rates payload version. +const HEADER_RATES_VERSION: &str = "Datadog-Rates-Payload-Version"; + +/// Sentinel value used by the V1 wire format to indicate no priority was set. +/// Matches Go's `PriorityNone = math.MinInt8`. +const V1_PRIORITY_NONE: i32 = i8::MIN as i32; + +/// Configuration for the APM receiver source. +pub struct ApmReceiverConfiguration { + listen_address: SocketAddr, + sampling_rates: V1SamplingRatesHandle, +} + +impl ApmReceiverConfiguration { + /// Creates a new `ApmReceiverConfiguration` from the given configuration. + /// + /// Reads `data_plane.apm.listen_address` (default: `0.0.0.0:8126`). + pub fn from_configuration(config: &GenericConfiguration) -> Result { + let addr_str = config + .try_get_typed::("data_plane.apm.listen_address")? + .unwrap_or_else(|| DEFAULT_LISTEN_ADDRESS.to_owned()); + + let listen_address = addr_str + .parse::() + .map_err(|e| generic_error!("Invalid APM listen address '{}': {}", addr_str, e))?; + + Ok(Self { + listen_address, + sampling_rates: V1SamplingRatesHandle::new(), + }) + } + + /// Attaches a shared [`V1SamplingRatesHandle`] so the receiver can include current + /// per-service sampling rates in every HTTP response. + pub fn with_sampling_rates(mut self, handle: V1SamplingRatesHandle) -> Self { + self.sampling_rates = handle; + self + } +} + +impl Default for ApmReceiverConfiguration { + fn default() -> Self { + Self { + listen_address: DEFAULT_LISTEN_ADDRESS.parse().expect("default listen address is valid"), + sampling_rates: V1SamplingRatesHandle::new(), + } + } +} + +#[async_trait] +impl SourceBuilder for ApmReceiverConfiguration { + fn outputs(&self) -> &[OutputDefinition] { + static OUTPUTS: LazyLock>> = + LazyLock::new(|| vec![OutputDefinition::named_output("traces", EventType::Trace)]); + &OUTPUTS + } + + async fn build(&self, _context: ComponentContext) -> Result, GenericError> { + Ok(Box::new(ApmReceiver { + listen_address: self.listen_address, + sampling_rates: self.sampling_rates.clone(), + })) + } +} + +impl MemoryBounds for ApmReceiverConfiguration { + fn specify_bounds(&self, builder: &mut MemoryBoundsBuilder) { + builder.minimum().with_single_value::("component struct"); + } +} + +struct ApmReceiver { + listen_address: SocketAddr, + sampling_rates: V1SamplingRatesHandle, +} + +/// Shared state for the axum request handler. +#[derive(Clone)] +struct HandlerState { + tx: mpsc::Sender>, + sampling_rates: V1SamplingRatesHandle, +} + +async fn handle_info() -> Response { + Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json") + .body(axum::body::Body::from(r#"{"endpoints":["/v1.0/traces"]}"#)) + .unwrap() +} + +async fn handle_v1_traces( + State(state): State, + headers: HeaderMap, + body: Bytes, +) -> Response { + // Read the client-dropped-P0 count for rate-computation weight adjustment. + let client_dropped_p0s = headers + .get(HEADER_CLIENT_DROPPED_P0) + .and_then(|v| v.to_str().ok()) + .and_then(|s| s.parse::().ok()) + .unwrap_or(0); + + // Read the tracer's current rates version for idempotent response optimization. + let client_version = headers + .get(HEADER_RATES_VERSION) + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .to_owned(); + + match decode_tracer_payload(&mut body.as_ref()) { + Ok(raw) => { + let chunk_count = raw.chunks.len().max(1); + let total_spans: usize = raw.chunks.iter().map(|c| c.spans.len()).sum(); + debug!( + chunks = raw.chunks.len(), + spans = total_spans, + client_dropped_p0s, + "Received V1 tracer payload." + ); + let per_chunk_weight = client_dropped_p0s as f64 / chunk_count as f64; + let traces = resolve_payload(raw, per_chunk_weight); + if !traces.is_empty() { + debug!(traces = traces.len(), "Dispatching trace events to topology."); + if let Err(e) = state.tx.try_send(traces) { + warn!(error = %e, "APM receiver channel full; dropping payload."); + // TODO: The v1 spec requires a 429 response here so tracers can back off. + // Currently we return 200 OK even when dropping, matching legacy v0.4 behaviour. + } + } + + let client_sent_version = !client_version.is_empty(); + let rate_response = state.sampling_rates.get_response(&client_version); + build_rate_response(rate_response, client_sent_version) + } + Err(DeserializeError::UnexpectedEof) | Err(DeserializeError::UnexpectedMarker(_)) => { + warn!("Malformed v1 trace payload (parse error)."); + Response::builder() + .status(StatusCode::BAD_REQUEST) + .body(axum::body::Body::empty()) + .unwrap() + } + Err(e) => { + warn!(error = ?e, "Failed to deserialize v1 trace payload."); + Response::builder() + .status(StatusCode::BAD_REQUEST) + .body(axum::body::Body::empty()) + .unwrap() + } + } +} + +fn build_rate_response(response: RateResponse, client_sent_version: bool) -> Response { + let (body_bytes, version) = match response { + RateResponse::Unchanged { version } => (b"{}".to_vec(), version), + RateResponse::Updated { rates, version } => { + let json = serde_json::to_vec(&serde_json::json!({ "rate_by_service": rates })) + .unwrap_or_else(|_| b"{}".to_vec()); + (json, version) + } + }; + + let mut builder = Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json"); + + if client_sent_version && !version.is_empty() { + builder = builder.header(HEADER_RATES_VERSION, version.as_str()); + } + + builder + .body(axum::body::Body::from(body_bytes)) + .unwrap_or_else(|_| { + Response::builder() + .status(StatusCode::INTERNAL_SERVER_ERROR) + .body(axum::body::Body::empty()) + .unwrap() + }) +} + +#[async_trait] +impl Source for ApmReceiver { + async fn run(self: Box, mut context: SourceContext) -> Result<(), GenericError> { + let mut shutdown = context.take_shutdown_handle(); + let mut health = context.take_health_handle(); + + let (tx, mut rx) = mpsc::channel::>(256); + + let listener = TcpListener::bind(self.listen_address) + .await + .map_err(|e| generic_error!("Failed to bind APM receiver on {}: {}", self.listen_address, e))?; + + let app = Router::new() + .route("/info", get(handle_info)) + .route("/v1.0/traces", post(handle_v1_traces)) + .with_state(HandlerState { + tx, + sampling_rates: self.sampling_rates, + }); + + let (server_shutdown_tx, server_shutdown_rx) = tokio::sync::oneshot::channel::<()>(); + + tokio::spawn(async move { + let serve = axum::serve(listener, app).with_graceful_shutdown(async move { + let _ = server_shutdown_rx.await; + }); + if let Err(e) = serve.await { + error!(error = %e, "APM HTTP server error."); + } + }); + + health.mark_ready(); + info!("APM receiver source started on {}.", self.listen_address); + + loop { + tokio::select! { + _ = &mut shutdown => { + debug!("APM receiver source shutting down."); + let _ = server_shutdown_tx.send(()); + break; + } + Some(traces) = rx.recv() => { + let dispatcher = context + .dispatcher() + .buffered_named("traces") + .map_err(|e| generic_error!("Failed to get traces dispatcher: {}", e))?; + if let Err(e) = dispatcher.send_all(traces.into_iter().map(Event::Trace)).await { + error!(error = %e, "Failed to dispatch trace events."); + } + } + _ = health.live() => continue, + } + } + + debug!("APM receiver source stopped."); + Ok(()) + } +} + +// ── Resolution pass: RawTracerPayload → Vec (internal) ──────────── + +fn resolve_payload(raw: RawTracerPayload, per_chunk_weight: f64) -> Vec { + let capacity_bytes = raw.string_table.len().saturating_mul(64).saturating_add(1024); + let capacity = NonZeroUsize::new(capacity_bytes).unwrap_or(NonZeroUsize::MIN); + let interner = GenericMapInterner::new(capacity); + + let resolved: Vec = raw + .string_table + .iter() + .map(|s| MetaString::from_interner(s, &interner)) + .collect(); + + let r = |idx: u32| -> MetaString { resolved.get(idx as usize).cloned().unwrap_or_default() }; + + let payload_attributes = resolve_kvs(raw.attributes, &r); + let container_id = r(raw.container_id); + let language_name = r(raw.language_name); + let language_version = r(raw.language_version); + let tracer_version = r(raw.tracer_version); + let runtime_id = r(raw.runtime_id); + let env = r(raw.env); + let hostname = r(raw.hostname); + let app_version = r(raw.app_version); + + raw.chunks + .into_iter() + .map(|raw_chunk| { + let v1 = V1Trace { + chunk: resolve_chunk(raw_chunk, &r), + container_id: container_id.clone(), + language_name: language_name.clone(), + language_version: language_version.clone(), + tracer_version: tracer_version.clone(), + runtime_id: runtime_id.clone(), + env: env.clone(), + hostname: hostname.clone(), + app_version: app_version.clone(), + payload_attributes: payload_attributes.clone(), + client_dropped_p0s_weight: per_chunk_weight, + }; + v1_trace_to_trace(v1) + }) + .collect() +} + +fn resolve_chunk(raw: RawTraceChunk, r: &impl Fn(u32) -> MetaString) -> V1TraceChunk { + V1TraceChunk { + priority: raw.priority, + origin: r(raw.origin), + attributes: resolve_kvs(raw.attributes, r), + spans: raw.spans.into_iter().map(|s| resolve_span(s, r)).collect(), + dropped_trace: raw.dropped_trace, + trace_id_high: raw.trace_id_high, + trace_id_low: raw.trace_id_low, + sampling_mechanism: raw.sampling_mechanism, + } +} + +fn resolve_span(raw: RawSpan, r: &impl Fn(u32) -> MetaString) -> V1Span { + V1Span { + service: r(raw.service), + name: r(raw.name), + resource: r(raw.resource), + span_id: raw.span_id, + parent_id: raw.parent_id, + start: raw.start, + duration: raw.duration, + error: raw.error, + attributes: resolve_kvs(raw.attributes, r), + span_type: r(raw.span_type), + links: raw.links.into_iter().map(|l| resolve_link(l, r)).collect(), + events: raw.events.into_iter().map(|e| resolve_event(e, r)).collect(), + env: r(raw.env), + version: r(raw.version), + component: r(raw.component), + kind: raw.kind, + } +} + +fn resolve_link(raw: RawSpanLink, r: &impl Fn(u32) -> MetaString) -> V1SpanLink { + V1SpanLink { + trace_id_high: raw.trace_id_high, + trace_id_low: raw.trace_id_low, + span_id: raw.span_id, + attributes: resolve_kvs(raw.attributes, r), + tracestate: r(raw.tracestate), + flags: raw.flags, + } +} + +fn resolve_event(raw: RawSpanEvent, r: &impl Fn(u32) -> MetaString) -> V1SpanEvent { + V1SpanEvent { + time_unix_nano: raw.time_unix_nano, + name: r(raw.name), + attributes: resolve_kvs(raw.attributes, r), + } +} + +fn resolve_kvs(raw: Vec, r: &impl Fn(u32) -> MetaString) -> Vec { + raw.into_iter() + .map(|kv| V1KeyValue { + key: r(kv.key), + value: resolve_any_value(kv.value, r), + }) + .collect() +} + +fn resolve_any_value(raw: RawAnyValue, r: &impl Fn(u32) -> MetaString) -> V1AnyValue { + match raw { + RawAnyValue::String(idx) => V1AnyValue::String(r(idx)), + RawAnyValue::Bool(v) => V1AnyValue::Bool(v), + RawAnyValue::Double(v) => V1AnyValue::Double(v), + RawAnyValue::Int(v) => V1AnyValue::Int(v), + RawAnyValue::Bytes(v) => V1AnyValue::Bytes(v), + RawAnyValue::Array(items) => { + V1AnyValue::Array(items.into_iter().map(|item| resolve_any_value(item, r)).collect()) + } + RawAnyValue::KeyValueList(kvs) => V1AnyValue::KeyValueList(resolve_kvs(kvs, r)), + } +} + +// ── V1Trace → unified Trace conversion ──────────────────────────────────────── + +/// Convert a resolved `V1Trace` into the unified `Trace` event type. +/// +/// The V1 types are wire-format intermediates produced by the APM source's deserialization pass. +/// After this conversion they are no longer referenced; all downstream pipeline components work +/// with the unified `Trace` and `Span` types. +fn v1_trace_to_trace(v1: V1Trace) -> Trace { + // `V1_PRIORITY_NONE` (i8::MIN) sentinel → None; any other value → Some(value). + let priority = if v1.chunk.priority == V1_PRIORITY_NONE { + None + } else { + Some(v1.chunk.priority) + }; + + // Payload attributes are defaults common to all chunks; chunk attributes are more + // specific and override them for the same key. + let mut attributes = v1_kvs_to_attribute_map(v1.payload_attributes); + for kv in v1.chunk.attributes { + if let Some(av) = v1_anyvalue_to_attribute_value(kv.value) { + attributes.insert(kv.key, av); + } + } + + let spans = v1.chunk.spans.into_iter().map(v1_span_to_span).collect(); + + let mut trace = Trace::new(spans); + + // Unified trace-level fields. + trace.trace_id_high = v1.chunk.trace_id_high; + trace.trace_id_low = v1.chunk.trace_id_low; + trace.origin = v1.chunk.origin; + trace.priority = priority; + trace.dropped_trace = v1.chunk.dropped_trace; + trace.sampling_mechanism = v1.chunk.sampling_mechanism; + trace.container_id = v1.container_id; + trace.language_name = v1.language_name; + trace.language_version = v1.language_version; + trace.tracer_version = v1.tracer_version; + trace.runtime_id = v1.runtime_id; + trace.env = v1.env; + trace.hostname = v1.hostname; + trace.app_version = v1.app_version; + trace.client_dropped_p0s_weight = v1.client_dropped_p0s_weight; + trace.attributes = attributes; + + trace +} + +fn v1_span_to_span(v1: V1Span) -> Span { + let span_links = v1.links.into_iter().map(v1_span_link_to_span_link).collect(); + let span_events = v1.events.into_iter().map(v1_span_event_to_span_event).collect(); + + let mut span = Span::new( + v1.service, + v1.name, + v1.resource, + v1.span_type, + v1.span_id, + v1.parent_id, + v1.start, + v1.duration, + if v1.error { 1 } else { 0 }, + ) + .with_span_links(Some(span_links)) + .with_span_events(Some(span_events)) + .with_env(v1.env) + .with_version(v1.version) + .with_component(v1.component) + .with_kind(v1.kind); + + for kv in v1.attributes { + if let Some(av) = v1_anyvalue_to_attribute_value(kv.value) { + span.attributes.insert(kv.key, av); + } + } + + span +} + +fn v1_span_link_to_span_link(v1: V1SpanLink) -> SpanLink { + let attrs = v1 + .attributes + .into_iter() + .filter_map(|kv| v1_anyvalue_to_attribute_value(kv.value).map(|av| (kv.key, av))) + .collect(); + + SpanLink::new(v1.trace_id_low, v1.span_id) + .with_trace_id_high(v1.trace_id_high) + .with_attributes(Some(attrs)) + .with_tracestate(v1.tracestate) + .with_flags(v1.flags) +} + +fn v1_span_event_to_span_event(v1: V1SpanEvent) -> SpanEvent { + let attrs = v1 + .attributes + .into_iter() + .filter_map(|kv| v1_anyvalue_to_attribute_value(kv.value).map(|av| (kv.key, av))) + .collect(); + + SpanEvent::new(v1.time_unix_nano, v1.name).with_attributes(Some(attrs)) +} + +/// Convert a `Vec` into `Trace.attributes` (typed attribute map). +fn v1_kvs_to_attribute_map( + kvs: Vec, +) -> FastHashMap { + let mut map = FastHashMap::default(); + for kv in kvs { + if let Some(av) = v1_anyvalue_to_attribute_value(kv.value) { + map.insert(kv.key, av); + } + } + map +} + +fn v1_anyvalue_to_attribute_value(v: V1AnyValue) -> Option { + match v { + V1AnyValue::String(s) => Some(AttributeValue::String(s)), + V1AnyValue::Bool(b) => Some(AttributeValue::Bool(b)), + V1AnyValue::Int(i) => Some(AttributeValue::Int(i)), + V1AnyValue::Double(d) => Some(AttributeValue::Float(d)), + V1AnyValue::Bytes(b) => Some(AttributeValue::Bytes(b)), + V1AnyValue::Array(items) => Some(AttributeValue::Array( + items.into_iter().filter_map(v1_anyvalue_to_attribute_value).collect(), + )), + V1AnyValue::KeyValueList(kvs) => Some(AttributeValue::KeyValueList( + kvs.into_iter() + .filter_map(|kv| v1_anyvalue_to_attribute_value(kv.value).map(|v| (kv.key, v))) + .collect(), + )), + } +} + +#[cfg(test)] +mod tests { + use saluki_common::collections::FastHashMap; + use stringtheory::MetaString; + + use super::*; + + fn kv(key: &str, value: &str) -> V1KeyValue { + V1KeyValue { + key: MetaString::from(key), + value: V1AnyValue::String(MetaString::from(value)), + } + } + + fn empty_chunk(chunk_attrs: Vec) -> V1TraceChunk { + V1TraceChunk { + priority: 1, + origin: MetaString::default(), + attributes: chunk_attrs, + spans: vec![], + dropped_trace: false, + trace_id_high: 0, + trace_id_low: 1, + sampling_mechanism: 0, + } + } + + fn make_v1_trace(chunk_attrs: Vec, payload_attrs: Vec) -> V1Trace { + V1Trace { + chunk: empty_chunk(chunk_attrs), + container_id: MetaString::default(), + language_name: MetaString::default(), + language_version: MetaString::default(), + tracer_version: MetaString::default(), + runtime_id: MetaString::default(), + env: MetaString::default(), + hostname: MetaString::default(), + app_version: MetaString::default(), + payload_attributes: payload_attrs, + client_dropped_p0s_weight: 0.0, + } + } + + fn attr_string(attrs: &FastHashMap, key: &str) -> Option { + attrs.get(key).and_then(|v| { + if let AttributeValue::String(s) = v { + Some(s.to_string()) + } else { + None + } + }) + } + + // Issue 2: chunk-level attributes must take priority over payload-level attributes + // when both set the same key. The payload defines defaults common to all chunks; + // a chunk can override them. + + #[test] + fn chunk_attr_takes_priority_over_payload_attr_for_same_key() { + let v1 = make_v1_trace( + vec![kv("env", "chunk-env")], + vec![kv("env", "payload-env")], + ); + let trace = v1_trace_to_trace(v1); + assert_eq!( + attr_string(&trace.attributes, "env").as_deref(), + Some("chunk-env"), + "chunk attribute should win over payload attribute for the same key" + ); + } + + #[test] + fn payload_attr_present_when_no_chunk_conflict() { + let v1 = make_v1_trace(vec![], vec![kv("payload-key", "payload-val")]); + let trace = v1_trace_to_trace(v1); + assert_eq!( + attr_string(&trace.attributes, "payload-key").as_deref(), + Some("payload-val"), + "payload attribute with no chunk conflict should still be present" + ); + } +} diff --git a/lib/saluki-components/src/sources/apm/sampling_rates.rs b/lib/saluki-components/src/sources/apm/sampling_rates.rs new file mode 100644 index 00000000000..920ea99fb35 --- /dev/null +++ b/lib/saluki-components/src/sources/apm/sampling_rates.rs @@ -0,0 +1,225 @@ +//! Shared sampling-rate state between the APM receiver source and the V1 trace sampler. + +use std::sync::{Arc, RwLock}; +use std::time::{SystemTime, UNIX_EPOCH}; + +use saluki_common::collections::FastHashMap; + +/// Per-service sampling rates computed by the V1 priority sampler. +struct V1SamplingRates { + /// Map from `"service:,env:"` to a rate in `[0.0, 1.0]`. + rates: FastHashMap, + /// Opaque version token in the form `"-"`. + /// + /// Mirrors the Go Trace Agent's `newVersion()`: + /// `strconv.FormatInt(time.Now().Unix(), 16) + "-" + strconv.FormatInt(localVersion.Inc(), 16)` + /// + /// The timestamp prefix makes the token time-anchored and opaque to clients; the + /// counter suffix ensures uniqueness within the same second. + version: String, + /// Monotonic counter incremented on each `set_all` call. + generation: u64, +} + +impl Default for V1SamplingRates { + fn default() -> Self { + Self { + rates: FastHashMap::default(), + version: String::new(), + generation: 0, + } + } +} + +impl V1SamplingRates { + fn set_all(&mut self, new_rates: FastHashMap) { + if new_rates == self.rates { + return; + } + self.rates = new_rates; + self.generation = self.generation.wrapping_add(1); + self.version = new_version(self.generation); + } +} + +/// Builds a version token matching the Go agent's `newVersion()`. +/// +/// Format: `"-"`, e.g. `"67f4a2b1-3"`. +fn new_version(generation: u64) -> String { + let unix_secs = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + format!("{:x}-{:x}", unix_secs, generation) +} + +/// Response produced by [`V1SamplingRatesHandle::get_response`]. +pub enum RateResponse { + /// Rates are unchanged since the client's last-known version. + /// Respond with `{}` and set the version header. + Unchanged { + /// Current version token. + version: String, + }, + /// Rates have been updated. + /// Respond with the full `{"rate_by_service": {...}}` payload. + Updated { + /// Current per-service rates. + rates: FastHashMap, + /// Current version token. + version: String, + }, +} + +/// Cheap-clone handle to the shared APM priority-sampler rate table. +/// +/// The [`crate::transforms::V1TraceSamplerConfiguration`] holds one clone (writer). +/// The APM receiver source holds another (reader). Cloning is O(1) — just an Arc +/// refcount increment. +#[derive(Clone)] +pub struct V1SamplingRatesHandle { + inner: Arc>, +} + +impl V1SamplingRatesHandle { + /// Creates a new handle backed by an empty rate table. + pub fn new() -> Self { + Self { + inner: Arc::new(RwLock::new(V1SamplingRates::default())), + } + } + + /// Replaces the current rate table with `new_rates`. + /// + /// Called by the V1 trace sampler transform whenever the core sampler's + /// sliding window advances and produces new per-service rates. + pub fn set_all(&self, new_rates: FastHashMap) { + // Recover from lock poisoning consistently with the read side — if another + // thread panicked holding the lock, the data inside is still valid to update. + let mut guard = self.inner.write().unwrap_or_else(|e| e.into_inner()); + guard.set_all(new_rates); + } + + /// Returns the appropriate response for a tracer's `/v1.0/traces` request. + /// + /// `client_version` is the value of the `Datadog-Rates-Payload-Version` request + /// header, or an empty string if the header was absent. + pub fn get_response(&self, client_version: &str) -> RateResponse { + let guard = self.inner.read().unwrap_or_else(|e| e.into_inner()); + let current_version = guard.version.clone(); + // An empty version means no rates have been computed yet — always send Updated + // so the tracer gets an explicit empty map rather than a stale "unchanged" reply. + // This matches the Go agent's treatment of version="" as a "no rates" sentinel. + let version_matches = !current_version.is_empty() + && !client_version.is_empty() + && client_version == current_version; + if version_matches { + RateResponse::Unchanged { version: current_version } + } else { + RateResponse::Updated { + rates: guard.rates.clone(), + version: current_version, + } + } + } +} + +impl Default for V1SamplingRatesHandle { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_rates(pairs: &[(&str, f64)]) -> FastHashMap { + pairs.iter().map(|(k, v)| (k.to_string(), *v)).collect() + } + + #[test] + fn version_empty_on_new() { + let handle = V1SamplingRatesHandle::new(); + assert!(handle.inner.read().unwrap().version.is_empty()); + } + + #[test] + fn version_changes_when_rates_change() { + let handle = V1SamplingRatesHandle::new(); + handle.set_all(make_rates(&[("service:foo,env:prod", 0.5)])); + let v1 = handle.inner.read().unwrap().version.clone(); + // Let at least 1 µs pass so the timestamp portion can't collide. + std::thread::sleep(std::time::Duration::from_millis(1)); + handle.set_all(make_rates(&[("service:foo,env:prod", 0.3)])); + let v2 = handle.inner.read().unwrap().version.clone(); + assert_ne!(v1, v2, "version must change when rates change"); + } + + #[test] + fn version_unchanged_when_rates_unchanged() { + let handle = V1SamplingRatesHandle::new(); + handle.set_all(make_rates(&[("service:foo,env:prod", 0.5)])); + let v1 = handle.inner.read().unwrap().version.clone(); + handle.set_all(make_rates(&[("service:foo,env:prod", 0.5)])); + let v2 = handle.inner.read().unwrap().version.clone(); + assert_eq!(v1, v2, "version must not change when rates are identical"); + } + + #[test] + fn version_format_matches_go_agent() { + // Expected: "-", e.g. "67f4a2b1-1" + let handle = V1SamplingRatesHandle::new(); + handle.set_all(make_rates(&[("service:,env:", 1.0)])); + let version = handle.inner.read().unwrap().version.clone(); + let parts: Vec<&str> = version.splitn(2, '-').collect(); + assert_eq!(parts.len(), 2, "version must contain exactly one '-'"); + u64::from_str_radix(parts[0], 16).expect("timestamp part must be hex"); + u64::from_str_radix(parts[1], 16).expect("counter part must be hex"); + } + + #[test] + fn unchanged_response_when_version_matches() { + let handle = V1SamplingRatesHandle::new(); + handle.set_all(make_rates(&[("service:foo,env:prod", 0.5)])); + let current_version = handle.inner.read().unwrap().version.clone(); + + let response = handle.get_response(¤t_version); + assert!(matches!(response, RateResponse::Unchanged { .. })); + } + + #[test] + fn updated_response_when_version_differs() { + let handle = V1SamplingRatesHandle::new(); + handle.set_all(make_rates(&[("service:foo,env:prod", 0.5)])); + + let response = handle.get_response("stale-version"); + match response { + RateResponse::Updated { rates, .. } => { + assert_eq!(rates.get("service:foo,env:prod"), Some(&0.5)); + } + _ => panic!("expected Updated response"), + } + } + + #[test] + fn empty_client_version_always_gets_updated() { + let handle = V1SamplingRatesHandle::new(); + handle.set_all(make_rates(&[("service:,env:", 1.0)])); + let response = handle.get_response(""); + assert!(matches!(response, RateResponse::Updated { .. })); + } + + #[test] + fn updated_response_before_any_set_all() { + // Before the sampler calls set_all, version is empty. A tracer that also + // has an empty version should still receive Updated (not Unchanged), matching + // the Go agent's treatment of version="" as "no rates computed yet". + let handle = V1SamplingRatesHandle::new(); + let response = handle.get_response(""); + assert!( + matches!(response, RateResponse::Updated { .. }), + "should return Updated before any set_all even when client version is also empty" + ); + } +} diff --git a/lib/saluki-components/src/sources/apm/v1_types.rs b/lib/saluki-components/src/sources/apm/v1_types.rs new file mode 100644 index 00000000000..fe36a35402a --- /dev/null +++ b/lib/saluki-components/src/sources/apm/v1_types.rs @@ -0,0 +1,146 @@ +use stringtheory::MetaString; + +/// A chunk of spans belonging to a single trace. +#[derive(Clone, Debug, PartialEq)] +pub struct V1TraceChunk { + /// Sampling priority for this chunk. + pub priority: i32, + /// Trace origin. + pub origin: MetaString, + /// Chunk-level attributes. + pub attributes: Vec, + /// Spans contained in this chunk. + pub spans: Vec, + /// Whether this trace was dropped during sampling. + pub dropped_trace: bool, + /// Upper 8 bytes of the 128-bit trace ID (big-endian). + pub trace_id_high: u64, + /// Lower 8 bytes of the 128-bit trace ID (big-endian). + pub trace_id_low: u64, + /// Sampling mechanism identifier. + pub sampling_mechanism: u32, +} + +/// A single span within a trace chunk. +#[derive(Clone, Debug, PartialEq)] +pub struct V1Span { + /// Service name. + pub service: MetaString, + /// Operation name. + pub name: MetaString, + /// Resource name. + pub resource: MetaString, + /// Unique identifier of this span. + pub span_id: u64, + /// Identifier of this span's parent, or zero if this is a root span. + pub parent_id: u64, + /// Start timestamp in nanoseconds since Unix epoch. + pub start: u64, + /// Duration in nanoseconds. + pub duration: u64, + /// Whether this span recorded an error. + pub error: bool, + /// Span-level attributes. + pub attributes: Vec, + /// Span type classification (e.g. web, db, cache). + pub span_type: MetaString, + /// Links to spans in other traces. + pub links: Vec, + /// Timestamped events associated with this span. + pub events: Vec, + /// Per-span environment override. + pub env: MetaString, + /// Application version. + pub version: MetaString, + /// Instrumentation component. + pub component: MetaString, + /// Span kind (OTEL values): 0=unspecified, 1=internal, 2=server, 3=client, 4=producer, 5=consumer. + pub kind: u32, +} + +/// A link from a span to another span in a different trace. +#[derive(Clone, Debug, PartialEq)] +pub struct V1SpanLink { + /// Upper 8 bytes of the linked trace ID (big-endian). + pub trace_id_high: u64, + /// Lower 8 bytes of the linked trace ID (big-endian). + pub trace_id_low: u64, + /// Span identifier of the linked span. + pub span_id: u64, + /// Attributes attached to the link. + pub attributes: Vec, + /// W3C tracestate value. + pub tracestate: MetaString, + /// W3C trace flags. + pub flags: u32, +} + +/// A timestamped event associated with a span. +#[derive(Clone, Debug, PartialEq)] +pub struct V1SpanEvent { + /// Event timestamp in nanoseconds since Unix epoch. + pub time_unix_nano: u64, + /// Event name. + pub name: MetaString, + /// Event attributes. + pub attributes: Vec, +} + +/// A key-value attribute entry. +#[derive(Clone, Debug, PartialEq)] +pub struct V1KeyValue { + /// Attribute key. + pub key: MetaString, + /// Attribute value. + pub value: V1AnyValue, +} + +/// A typed attribute value. +#[derive(Clone, Debug, PartialEq)] +pub enum V1AnyValue { + /// String value. + String(MetaString), + /// Boolean value. + Bool(bool), + /// 64-bit floating-point value. + Double(f64), + /// 64-bit signed integer value. + Int(i64), + /// Raw byte sequence. + Bytes(Vec), + /// Ordered sequence of values. + Array(Vec), + /// Ordered list of key-value pairs. + KeyValueList(Vec), +} + +/// A resolved v1 trace event. +/// +/// Carries one [`V1TraceChunk`] with all string fields resolved to [`MetaString`], plus +/// payload-level metadata promoted from the originating tracer payload. +#[derive(Clone, Debug, PartialEq)] +pub struct V1Trace { + /// The chunk of spans for one trace. + pub chunk: V1TraceChunk, + /// Container ID. + pub container_id: MetaString, + /// Tracer language name. + pub language_name: MetaString, + /// Tracer language version. + pub language_version: MetaString, + /// Tracer library version. + pub tracer_version: MetaString, + /// Runtime ID. + pub runtime_id: MetaString, + /// Environment name. + pub env: MetaString, + /// Hostname. + pub hostname: MetaString, + /// Application version. + pub app_version: MetaString, + /// Payload-level attributes. + pub payload_attributes: Vec, + /// Per-chunk weight from the `Datadog-Client-Dropped-P0-Traces` request header, + /// computed as `header_value / num_chunks_in_payload`. Zero if the header was absent. + pub client_dropped_p0s_weight: f64, +} diff --git a/lib/saluki-components/src/sources/mod.rs b/lib/saluki-components/src/sources/mod.rs index b71cfd96b23..5d00a8aed79 100644 --- a/lib/saluki-components/src/sources/mod.rs +++ b/lib/saluki-components/src/sources/mod.rs @@ -1,5 +1,9 @@ //! Source implementations. +/// APM receiver source and shared sampling-rate state. +pub mod apm; +pub use self::apm::ApmReceiverConfiguration; + mod dogstatsd; pub use self::dogstatsd::DogStatsDConfiguration; diff --git a/lib/saluki-components/src/transforms/apm_stats/aggregation.rs b/lib/saluki-components/src/transforms/apm_stats/aggregation.rs index a9433c5645c..c571d7f90c1 100644 --- a/lib/saluki-components/src/transforms/apm_stats/aggregation.rs +++ b/lib/saluki-components/src/transforms/apm_stats/aggregation.rs @@ -7,7 +7,8 @@ use std::{ }; use fnv::FnvHasher; -use saluki_common::collections::{FastHashMap, PrehashedHashMap}; +use saluki_common::collections::PrehashedHashMap; +use saluki_core::data_model::event::trace::{AttributeValue, Span}; use stringtheory::MetaString; pub const BUCKET_DURATION_NS: u64 = 10_000_000_000; @@ -464,12 +465,12 @@ pub fn process_tags_hash(process_tags: &str) -> u64 { tags_fnv_hash(process_tags.split(',')) } -pub fn get_status_code(meta: &FastHashMap, metrics: &FastHashMap) -> u32 { - if let Some(&code) = metrics.get(TAG_STATUS_CODE) { +pub fn get_status_code(span: &Span) -> u32 { + if let Some(code) = span.attributes.get(TAG_STATUS_CODE).and_then(AttributeValue::as_float) { return code as u32; } - if let Some(code_str) = meta.get(TAG_STATUS_CODE) { + if let Some(code_str) = span.attributes.get(TAG_STATUS_CODE).and_then(AttributeValue::as_string) { if let Ok(code) = code_str.as_ref().parse::() { return code; } @@ -478,9 +479,7 @@ pub fn get_status_code(meta: &FastHashMap, metrics: &Fas 0 } -pub fn get_grpc_status_code( - meta: &FastHashMap, metrics: &FastHashMap, -) -> GrpcStatusCode { +pub fn get_grpc_status_code(span: &Span) -> GrpcStatusCode { const STATUS_CODE_FIELDS: &[&str] = &[ "rpc.grpc.status_code", "grpc.code", @@ -489,18 +488,19 @@ pub fn get_grpc_status_code( ]; for key in STATUS_CODE_FIELDS { - if let Some(value) = meta.get(*key) { - if value.is_empty() { - continue; + if let Some(av) = span.attributes.get(*key) { + match av { + AttributeValue::String(value) => { + if value.is_empty() { + continue; + } + return GrpcStatusCode::from_str(value.as_ref()); + } + AttributeValue::Float(code) => { + return GrpcStatusCode::from_code(*code as u8); + } + _ => continue, } - - return GrpcStatusCode::from_str(value.as_ref()); - } - } - - for key in STATUS_CODE_FIELDS { - if let Some(&code) = metrics.get(*key) { - return GrpcStatusCode::from_code(code as u8); } } @@ -509,115 +509,113 @@ pub fn get_grpc_status_code( #[cfg(test)] mod tests { + use saluki_common::collections::FastHashMap; use saluki_core::data_model::event::trace::Span; use super::*; use crate::transforms::apm_stats::span_concentrator::SpanConcentrator; use crate::transforms::apm_stats::statsraw::new_aggregation_from_span; + fn span_with_meta_str(key: &str, val: &str) -> Span { + let mut m = FastHashMap::default(); + m.insert(MetaString::from(key), MetaString::from(val)); + Span::default().with_meta(Some(m)) + } + + fn span_with_metric(key: &str, val: f64) -> Span { + let mut m = FastHashMap::default(); + m.insert(MetaString::from(key), val); + Span::default().with_metrics(Some(m)) + } + + fn span_with_meta_and_metric(meta_key: &str, meta_val: &str, metric_key: &str, metric_val: f64) -> Span { + let mut meta = FastHashMap::default(); + meta.insert(MetaString::from(meta_key), MetaString::from(meta_val)); + let mut metrics = FastHashMap::default(); + metrics.insert(MetaString::from(metric_key), metric_val); + Span::default().with_meta(Some(meta)).with_metrics(Some(metrics)) + } + #[test] fn test_get_status_code() { // Empty span - let meta = FastHashMap::default(); - let metrics = FastHashMap::default(); - assert_eq!(get_status_code(&meta, &metrics), 0); + assert_eq!(get_status_code(&Span::default()), 0); - // Meta only - let mut meta = FastHashMap::default(); - meta.insert(MetaString::from("http.status_code"), MetaString::from("200")); - let metrics = FastHashMap::default(); - assert_eq!(get_status_code(&meta, &metrics), 200); + // Meta only (string) + assert_eq!(get_status_code(&span_with_meta_str("http.status_code", "200")), 200); - // Metrics only - let meta = FastHashMap::default(); - let mut metrics = FastHashMap::default(); - metrics.insert(MetaString::from("http.status_code"), 302.0); - assert_eq!(get_status_code(&meta, &metrics), 302); + // Metrics only (float) + assert_eq!(get_status_code(&span_with_metric("http.status_code", 302.0)), 302); - // Both meta and metrics - metrics takes precedence - let mut meta = FastHashMap::default(); - meta.insert(MetaString::from("http.status_code"), MetaString::from("200")); - let mut metrics = FastHashMap::default(); - metrics.insert(MetaString::from("http.status_code"), 302.0); - assert_eq!(get_status_code(&meta, &metrics), 302); + // Both meta and metrics - float takes precedence (checked first) + assert_eq!( + get_status_code(&span_with_meta_and_metric("http.status_code", "200", "http.status_code", 302.0)), + 302 + ); // Invalid meta value - let mut meta = FastHashMap::default(); - meta.insert(MetaString::from("http.status_code"), MetaString::from("x")); - let metrics = FastHashMap::default(); - assert_eq!(get_status_code(&meta, &metrics), 0); + assert_eq!(get_status_code(&span_with_meta_str("http.status_code", "x")), 0); } #[test] fn test_get_grpc_status_code() { // Empty span - let meta = FastHashMap::default(); - let metrics = FastHashMap::default(); - assert_eq!(get_grpc_status_code(&meta, &metrics), GrpcStatusCode::Unset); + assert_eq!(get_grpc_status_code(&Span::default()), GrpcStatusCode::Unset); // Meta with lowercase name "aborted" - let mut meta = FastHashMap::default(); - meta.insert(MetaString::from("rpc.grpc.status_code"), MetaString::from("aborted")); - let metrics = FastHashMap::default(); - assert_eq!(get_grpc_status_code(&meta, &metrics), GrpcStatusCode::Aborted); + assert_eq!( + get_grpc_status_code(&span_with_meta_str("rpc.grpc.status_code", "aborted")), + GrpcStatusCode::Aborted + ); // Metrics with numeric code - let meta = FastHashMap::default(); - let mut metrics = FastHashMap::default(); - metrics.insert(MetaString::from("grpc.code"), 1.0); - assert_eq!(get_grpc_status_code(&meta, &metrics), GrpcStatusCode::Cancelled); + assert_eq!( + get_grpc_status_code(&span_with_metric("grpc.code", 1.0)), + GrpcStatusCode::Cancelled + ); - // Both meta and metrics - meta takes precedence - let mut meta = FastHashMap::default(); - meta.insert(MetaString::from("grpc.status.code"), MetaString::from("0")); - let mut metrics = FastHashMap::default(); - metrics.insert(MetaString::from("grpc.status.code"), 1.0); - assert_eq!(get_grpc_status_code(&meta, &metrics), GrpcStatusCode::Ok); + // When both string and float values are set for the same key, the last writer wins. + // span_with_meta_and_metric calls with_metrics after with_meta, so float (Cancelled=1) wins. + assert_eq!( + get_grpc_status_code(&span_with_meta_and_metric("grpc.status.code", "0", "grpc.status.code", 1.0)), + GrpcStatusCode::Cancelled + ); // Numeric string in meta - let mut meta = FastHashMap::default(); - meta.insert(MetaString::from("rpc.grpc.status.code"), MetaString::from("15")); - let metrics = FastHashMap::default(); - assert_eq!(get_grpc_status_code(&meta, &metrics), GrpcStatusCode::DataLoss); + assert_eq!( + get_grpc_status_code(&span_with_meta_str("rpc.grpc.status.code", "15")), + GrpcStatusCode::DataLoss + ); // "Canceled" (mixed case) - let mut meta = FastHashMap::default(); - meta.insert(MetaString::from("rpc.grpc.status.code"), MetaString::from("Canceled")); - let metrics = FastHashMap::default(); - assert_eq!(get_grpc_status_code(&meta, &metrics), GrpcStatusCode::Cancelled); + assert_eq!( + get_grpc_status_code(&span_with_meta_str("rpc.grpc.status.code", "Canceled")), + GrpcStatusCode::Cancelled + ); // "CANCELLED" (uppercase) - let mut meta = FastHashMap::default(); - meta.insert(MetaString::from("rpc.grpc.status.code"), MetaString::from("CANCELLED")); - let metrics = FastHashMap::default(); - assert_eq!(get_grpc_status_code(&meta, &metrics), GrpcStatusCode::Cancelled); + assert_eq!( + get_grpc_status_code(&span_with_meta_str("rpc.grpc.status.code", "CANCELLED")), + GrpcStatusCode::Cancelled + ); // With "StatusCode." prefix - let mut meta = FastHashMap::default(); - meta.insert( - MetaString::from("grpc.status.code"), - MetaString::from("StatusCode.ABORTED"), + assert_eq!( + get_grpc_status_code(&span_with_meta_str("grpc.status.code", "StatusCode.ABORTED")), + GrpcStatusCode::Aborted ); - let metrics = FastHashMap::default(); - assert_eq!(get_grpc_status_code(&meta, &metrics), GrpcStatusCode::Aborted); // Invalid prefix (typo) - let mut meta = FastHashMap::default(); - meta.insert( - MetaString::from("grpc.status.code"), - MetaString::from("StatusCodee.ABORTED"), + assert_eq!( + get_grpc_status_code(&span_with_meta_str("grpc.status.code", "StatusCodee.ABORTED")), + GrpcStatusCode::Unset ); - let metrics = FastHashMap::default(); - assert_eq!(get_grpc_status_code(&meta, &metrics), GrpcStatusCode::Unset); // "InvalidArgument" (PascalCase) - let mut meta = FastHashMap::default(); - meta.insert( - MetaString::from("rpc.grpc.status_code"), - MetaString::from("InvalidArgument"), + assert_eq!( + get_grpc_status_code(&span_with_meta_str("rpc.grpc.status_code", "InvalidArgument")), + GrpcStatusCode::InvalidArgument ); - let metrics = FastHashMap::default(); - assert_eq!(get_grpc_status_code(&meta, &metrics), GrpcStatusCode::InvalidArgument); } #[test] diff --git a/lib/saluki-components/src/transforms/apm_stats/mod.rs b/lib/saluki-components/src/transforms/apm_stats/mod.rs index 35cfde8674b..2b47c1d515f 100644 --- a/lib/saluki-components/src/transforms/apm_stats/mod.rs +++ b/lib/saluki-components/src/transforms/apm_stats/mod.rs @@ -9,13 +9,12 @@ use std::{ use async_trait::async_trait; use memory_accounting::{MemoryBounds, MemoryBoundsBuilder}; -use opentelemetry_semantic_conventions::resource::{CONTAINER_ID, K8S_POD_UID}; use saluki_config::GenericConfiguration; use saluki_context::{origin::OriginTagCardinality, tags::TagSet}; use saluki_core::{ components::{transforms::*, ComponentContext}, data_model::event::{ - trace::Trace, + trace::{AttributeValue, Trace}, trace_stats::{ClientStatsPayload, TraceStats}, Event, EventType, }, @@ -31,14 +30,14 @@ use tracing::{debug, error}; use crate::common::{ datadog::apm::ApmConfig, - otlp::util::{extract_container_tags_from_resource_tagset, KEY_DATADOG_CONTAINER_ID}, + otlp::util::extract_container_tags_from_attributes_map, }; mod aggregation; -use self::aggregation::{process_tags_hash, PayloadAggregationKey}; +pub(crate) use self::aggregation::{process_tags_hash, PayloadAggregationKey}; mod span_concentrator; -use self::span_concentrator::{InfraTags, SpanConcentrator}; +pub(crate) use self::span_concentrator::{InfraTags, SpanConcentrator}; mod statsraw; @@ -165,7 +164,7 @@ impl ApmStats { let origin = trace .spans() .first() - .and_then(|s| s.meta().get("_dd.origin")) + .and_then(|s| s.attributes.get("_dd.origin").and_then(AttributeValue::as_string)) .map(|s| s.as_ref()) .unwrap_or(""); @@ -178,15 +177,15 @@ impl ApmStats { } fn build_infra_tags(&self, trace: &Trace, process_tags: &str) -> InfraTags { - let resource_tags = trace.resource_tags(); - let container_id = resolve_container_id(resource_tags); + let container_id = trace.container_id.clone(); let mut container_tags = if container_id.is_empty() { TagSet::default() } else { - extract_container_tags(resource_tags) + let mut tags = TagSet::default(); + extract_container_tags_from_attributes_map(&trace.attributes, &mut tags); + tags }; - // Query the workload provider for additional container tags. if !container_id.is_empty() { if let Some(workload_provider) = &self.workload_provider { let entity_id = EntityId::Container(container_id.clone()); @@ -206,39 +205,64 @@ impl ApmStats { .find(|s| s.parent_id() == 0) .or_else(|| trace.spans().first()); - let span_env = root_span.and_then(|s| s.meta().get("env")).filter(|s| !s.is_empty()); - let env = span_env.cloned().unwrap_or_else(|| self.agent_env.clone()); + let env = root_span + .and_then(|s| s.attributes.get("env").and_then(AttributeValue::as_string).filter(|s| !s.is_empty())) + .cloned() + .unwrap_or_else(|| { + if !trace.env.is_empty() { + trace.env.clone() + } else { + self.agent_env.clone() + } + }); let hostname = root_span - .and_then(|s| s.meta().get("_dd.hostname")) - .filter(|s| !s.is_empty()) + .and_then(|s| s.attributes.get("_dd.hostname").and_then(AttributeValue::as_string).filter(|s| !s.is_empty())) .cloned() - .unwrap_or_else(|| self.agent_hostname.clone()); + .unwrap_or_else(|| { + if !trace.hostname.is_empty() { + trace.hostname.clone() + } else { + self.agent_hostname.clone() + } + }); - let version = root_span - .and_then(|s| s.meta().get("version")) - .cloned() - .unwrap_or_default(); + let version = if !trace.app_version.is_empty() { + trace.app_version.clone() + } else { + root_span + .and_then(|s| s.attributes.get("version").and_then(AttributeValue::as_string).filter(|s| !s.is_empty())) + .cloned() + .unwrap_or_default() + }; - let container_id = root_span - .and_then(|s| s.meta().get("_dd.container_id")) - .cloned() - .unwrap_or_default(); + let container_id = if !trace.container_id.is_empty() { + trace.container_id.clone() + } else { + root_span + .and_then(|s| s.attributes.get("_dd.container_id").and_then(AttributeValue::as_string)) + .cloned() + .unwrap_or_default() + }; let git_commit_sha = root_span - .and_then(|s| s.meta().get("_dd.git.commit.sha")) + .and_then(|s| s.attributes.get("_dd.git.commit.sha").and_then(AttributeValue::as_string).filter(|s| !s.is_empty())) .cloned() .unwrap_or_default(); let image_tag = root_span - .and_then(|s| s.meta().get("_dd.image_tag")) + .and_then(|s| s.attributes.get("_dd.image_tag").and_then(AttributeValue::as_string).filter(|s| !s.is_empty())) .cloned() .unwrap_or_default(); - let lang = root_span - .and_then(|s| s.meta().get("language")) - .cloned() - .unwrap_or_default(); + let lang = if !trace.language_name.is_empty() { + trace.language_name.clone() + } else { + root_span + .and_then(|s| s.attributes.get("language").and_then(AttributeValue::as_string)) + .cloned() + .unwrap_or_default() + }; PayloadAggregationKey { env, @@ -421,37 +445,19 @@ fn now_nanos() -> u64 { .as_nanos() as u64 } -/// Resolves container ID from OTLP resource tags. -fn resolve_container_id(resource_tags: &TagSet) -> MetaString { - for key in [KEY_DATADOG_CONTAINER_ID, CONTAINER_ID, K8S_POD_UID] { - if let Some(tag) = resource_tags.get_single_tag(key) { - if let Some(value) = tag.value() { - if !value.is_empty() { - return MetaString::from(value); - } - } +/// Extracts process tags from trace, checking both span meta and trace attributes. +fn extract_process_tags(trace: &Trace) -> MetaString { + let root_span = trace.spans().iter().find(|s| s.parent_id() == 0).or_else(|| trace.spans().first()); + if let Some(span) = root_span { + if let Some(tags) = span.attributes.get(TAG_PROCESS_TAGS).and_then(AttributeValue::as_string).filter(|s| !s.is_empty()) { + return tags.clone(); } } - - MetaString::empty() -} - -/// Extracts container tags from OTLP resource tags. -fn extract_container_tags(resource_tags: &TagSet) -> TagSet { - let mut container_tags_set = TagSet::default(); - extract_container_tags_from_resource_tagset(resource_tags, &mut container_tags_set); - - container_tags_set -} - -/// Extracts process tags from trace. -fn extract_process_tags(trace: &Trace) -> MetaString { - if let Some(first_span) = trace.spans().first() { - if let Some(process_tags) = first_span.meta().get(TAG_PROCESS_TAGS) { - return process_tags.clone(); + if let Some(AttributeValue::String(tags)) = trace.attributes.get(TAG_PROCESS_TAGS) { + if !tags.is_empty() { + return tags.clone(); } } - MetaString::empty() } @@ -459,7 +465,6 @@ fn extract_process_tags(trace: &Trace) -> MetaString { mod tests { use proptest::prelude::*; use saluki_common::collections::FastHashMap; - use saluki_context::tags::TagSet; use saluki_core::data_model::event::trace_stats::ClientStatsBucket; use saluki_core::data_model::event::{trace::Span, trace_stats::ClientGroupedStats}; @@ -486,7 +491,7 @@ mod tests { let start = bucket_start - duration; Span::new( - service, "query", resource, "db", 1, span_id, parent_id, start, duration, error, + service, "query", resource, "db", span_id, parent_id, start, duration, error, ) .with_meta(meta) .with_metrics(metrics) @@ -497,7 +502,7 @@ mod tests { let mut metrics = FastHashMap::default(); metrics.insert(MetaString::from("_dd.measured"), 1.0); - Span::new(service, name, resource, "web", 1, 1, 0, 1000000000, 100000000, 0).with_metrics(metrics) + Span::new(service, name, resource, "web", 1, 0, 1000000000, 100000000, 0).with_metrics(metrics) } /// Creates a top-level span (parent_id = 0, has _top_level metric) @@ -535,7 +540,7 @@ mod tests { }; let span = make_test_span("test-service", "test-operation", "test-resource"); - let trace = Trace::new(vec![span], TagSet::default()); + let trace = Trace::new(vec![span]); transform.process_trace(&trace); @@ -568,7 +573,6 @@ mod tests { "test-resource", "web", 1, - 1, 0, now, 100000000, @@ -576,7 +580,7 @@ mod tests { ) .with_metrics(metrics); - let trace = Trace::new(vec![span], TagSet::default()); + let trace = Trace::new(vec![span]); transform.process_trace(&trace); let stats = transform.concentrator.flush(now + BUCKET_DURATION_NS * 2, true); @@ -598,7 +602,7 @@ mod tests { // Add a span let span = make_top_level_span(aligned_now, 1, 50, 5, "A1", "resource1", 0, None); - let trace = Trace::new(vec![span], TagSet::default()); + let trace = Trace::new(vec![span]); let payload_key = PayloadAggregationKey { env: MetaString::from("test"), @@ -639,7 +643,7 @@ mod tests { metrics.insert(MetaString::from(METRIC_PARTIAL_VERSION), 830604.0); let span = test_span(aligned_now, 1, 0, 50, 5, "A1", "resource1", 0, None, Some(metrics)); - let trace = Trace::new(vec![span], TagSet::default()); + let trace = Trace::new(vec![span]); let payload_key = PayloadAggregationKey { env: MetaString::from("test"), @@ -814,7 +818,7 @@ mod tests { // Create a simple top-level span using the same pattern as make_test_span (which works) let mut metrics = FastHashMap::default(); metrics.insert(MetaString::from("_top_level"), 1.0); - let span = Span::new("myservice", "query", "GET /users", "web", 1, 1, 0, now, 500, 0).with_metrics(metrics); + let span = Span::new("myservice", "query", "GET /users", "web", 1, 0, now, 500, 0).with_metrics(metrics); let payload_key = PayloadAggregationKey { env: MetaString::from("test"), @@ -830,7 +834,7 @@ mod tests { // Should NOT produce stats when compute_stats_by_span_kind is disabled let mut client_meta = FastHashMap::default(); client_meta.insert(MetaString::from("span.kind"), MetaString::from("client")); - let client_span = Span::new("myservice", "postgres.query", "SELECT ...", "db", 1, 2, 1, now, 75, 0) + let client_span = Span::new("myservice", "postgres.query", "SELECT ...", "db", 2, 1, now, 75, 0) .with_meta(client_meta); if let Some(stat_span) = concentrator.new_stat_span_from_span(&client_span) { @@ -857,7 +861,7 @@ mod tests { // Create a simple top-level span let mut metrics = FastHashMap::default(); metrics.insert(MetaString::from("_top_level"), 1.0); - let span = Span::new("myservice", "query", "GET /users", "web", 1, 1, 0, now, 500, 0).with_metrics(metrics); + let span = Span::new("myservice", "query", "GET /users", "web", 1, 0, now, 500, 0).with_metrics(metrics); let payload_key = PayloadAggregationKey { env: MetaString::from("test"), @@ -873,7 +877,7 @@ mod tests { // SHOULD produce stats when compute_stats_by_span_kind is enabled let mut client_meta = FastHashMap::default(); client_meta.insert(MetaString::from("span.kind"), MetaString::from("client")); - let client_span = Span::new("myservice", "postgres.query", "SELECT ...", "db", 1, 2, 1, now, 75, 0) + let client_span = Span::new("myservice", "postgres.query", "SELECT ...", "db", 2, 1, now, 75, 0) .with_meta(client_meta); if let Some(stat_span) = concentrator.new_stat_span_from_span(&client_span) { @@ -909,7 +913,7 @@ mod tests { client_meta.insert(MetaString::from("db.system"), MetaString::from("postgres")); let mut client_metrics = FastHashMap::default(); client_metrics.insert(MetaString::from("_dd.measured"), 1.0); - let client_span = Span::new("myservice", "postgres.query", "SELECT ...", "db", 1, 2, 1, now, 75, 0) + let client_span = Span::new("myservice", "postgres.query", "SELECT ...", "db", 2, 1, now, 75, 0) .with_meta(client_meta) .with_metrics(client_metrics); @@ -950,7 +954,7 @@ mod tests { client_meta.insert(MetaString::from("db.system"), MetaString::from("postgres")); let mut client_metrics = FastHashMap::default(); client_metrics.insert(MetaString::from("_dd.measured"), 1.0); - let client_span = Span::new("myservice", "postgres.query", "SELECT ...", "db", 1, 2, 1, now, 75, 0) + let client_span = Span::new("myservice", "postgres.query", "SELECT ...", "db", 2, 1, now, 75, 0) .with_meta(client_meta) .with_metrics(client_metrics); @@ -1096,7 +1100,7 @@ mod tests { // Test with no process tags { let span = Span::default(); - let trace = Trace::new(vec![span], TagSet::default()); + let trace = Trace::new(vec![span]); let process_tags = extract_process_tags(&trace); assert!(process_tags.is_empty(), "Should be empty when no _dd.tags.process"); } @@ -1106,7 +1110,7 @@ mod tests { let mut meta = FastHashMap::default(); meta.insert(MetaString::from(TAG_PROCESS_TAGS), MetaString::from("a:1,b:2,c:3")); let span = Span::default().with_meta(meta); - let trace = Trace::new(vec![span], TagSet::default()); + let trace = Trace::new(vec![span]); let process_tags = extract_process_tags(&trace); assert_eq!(process_tags, "a:1,b:2,c:3"); } @@ -1116,7 +1120,7 @@ mod tests { let mut meta = FastHashMap::default(); meta.insert(MetaString::from(TAG_PROCESS_TAGS), MetaString::from("")); let span = Span::default().with_meta(meta); - let trace = Trace::new(vec![span], TagSet::default()); + let trace = Trace::new(vec![span]); let process_tags = extract_process_tags(&trace); assert!( process_tags.is_empty(), @@ -1126,7 +1130,7 @@ mod tests { // Test with empty trace { - let trace = Trace::new(vec![], TagSet::default()); + let trace = Trace::new(vec![]); let process_tags = extract_process_tags(&trace); assert!(process_tags.is_empty(), "Should be empty when trace has no spans"); } diff --git a/lib/saluki-components/src/transforms/apm_stats/span_concentrator.rs b/lib/saluki-components/src/transforms/apm_stats/span_concentrator.rs index 6eb81170154..c3e46b000ed 100644 --- a/lib/saluki-components/src/transforms/apm_stats/span_concentrator.rs +++ b/lib/saluki-components/src/transforms/apm_stats/span_concentrator.rs @@ -2,19 +2,20 @@ use saluki_common::collections::FastHashMap; use saluki_context::tags::TagSet; -use saluki_core::data_model::event::trace::Span; +use saluki_core::data_model::event::trace::{AttributeValue, Span}; use saluki_core::data_model::event::trace_stats::{ClientStatsBucket, ClientStatsPayload}; use stringtheory::MetaString; use super::aggregation::AggregationRegistry; use super::aggregation::{ - get_grpc_status_code, get_status_code, process_tags_hash, PayloadAggregationKey, BUCKET_DURATION_NS, - TAG_BASE_SERVICE, TAG_SPAN_KIND, + get_grpc_status_code, get_status_code, process_tags_hash, PayloadAggregationKey, + BUCKET_DURATION_NS, TAG_BASE_SERVICE, TAG_SPAN_KIND, }; use super::statsraw::RawBucket; const DEFAULT_BUFFER_LEN: u64 = 2; const METRIC_TOP_LEVEL: &str = "_top_level"; +const METRIC_TRACER_TOP_LEVEL: &str = "_dd.top_level"; const METRIC_MEASURED: &str = "_dd.measured"; pub const METRIC_PARTIAL_VERSION: &str = "_dd.partial_version"; @@ -160,11 +161,11 @@ impl SpanConcentrator { } } - pub fn new_stat_span_from_span(&self, span: &Span) -> Option { + pub(super) fn new_stat_span_from_span(&self, span: &Span) -> Option { self.new_stat_span(span) } - pub fn add_span( + pub(super) fn add_span( &mut self, stat_span: &StatSpan, weight: f64, payload_key: &PayloadAggregationKey, infra_tags: &InfraTags, origin: &str, ) { @@ -226,18 +227,23 @@ impl SpanConcentrator { } fn is_span_eligible(&self, span: &Span) -> bool { - if let Some(&val) = span.metrics().get(METRIC_TOP_LEVEL) { + if let Some(val) = span.attributes.get(METRIC_TOP_LEVEL).and_then(AttributeValue::as_float) { if val == 1.0 { return true; } } - if let Some(&val) = span.metrics().get(METRIC_MEASURED) { + if let Some(val) = span.attributes.get(METRIC_TRACER_TOP_LEVEL).and_then(AttributeValue::as_float) { + if val == 1.0 { + return true; + } + } + if let Some(val) = span.attributes.get(METRIC_MEASURED).and_then(AttributeValue::as_float) { if val == 1.0 { return true; } } if self.compute_stats_by_span_kind { - if let Some(kind) = span.meta().get(TAG_SPAN_KIND) { + if let Some(kind) = span.attributes.get(TAG_SPAN_KIND).and_then(AttributeValue::as_string) { return compute_stats_for_span_kind(kind); } } @@ -253,10 +259,11 @@ impl SpanConcentrator { return None; } - let span_kind = span.meta().get(TAG_SPAN_KIND).cloned().unwrap_or_default(); - let status_code = get_status_code(span.meta(), span.metrics()); - let grpc_status_code = get_grpc_status_code(span.meta(), span.metrics()).to_metastring(); - let is_top_level = span.metrics().get(METRIC_TOP_LEVEL).map(|&v| v == 1.0).unwrap_or(false); + let span_kind = span.attributes.get(TAG_SPAN_KIND).and_then(AttributeValue::as_string).cloned().unwrap_or_default(); + let status_code = get_status_code(span); + let grpc_status_code = get_grpc_status_code(span).to_metastring(); + let is_top_level = span.attributes.get(METRIC_TOP_LEVEL).and_then(AttributeValue::as_float).is_some_and(|v| v == 1.0) + || span.attributes.get(METRIC_TRACER_TOP_LEVEL).and_then(AttributeValue::as_float).is_some_and(|v| v == 1.0); let matching_peer_tags = self.matching_peer_tags(span, &span_kind); Some(StatSpan { @@ -281,9 +288,10 @@ impl SpanConcentrator { fn matching_peer_tags(&self, span: &Span, span_kind: &str) -> Vec { let mut peer_tags = Vec::new(); - let keys_to_check = self.peer_tag_keys_to_aggregate_for_span(span_kind, span.meta().get(TAG_BASE_SERVICE)); + let base_service = span.attributes.get(TAG_BASE_SERVICE).and_then(AttributeValue::as_string); + let keys_to_check = self.peer_tag_keys_to_aggregate_for_span(span_kind, base_service); for key in keys_to_check { - if let Some(value) = span.meta().get(key) { + if let Some(value) = span.attributes.get(key.as_ref()).and_then(AttributeValue::as_string) { if !value.is_empty() { peer_tags.push(MetaString::from(format!("{}:{}", key, value))); } @@ -337,6 +345,7 @@ impl SpanConcentrator { b.handle_span(s, weight, origin, agg_key.clone(), &mut self.key_registry); } + } /// Align timestamp to bucket boundary. @@ -352,9 +361,59 @@ pub const fn compute_stats_for_span_kind(kind: &str) -> bool { || kind.eq_ignore_ascii_case("consumer") } +#[cfg(test)] +mod tests { + use saluki_common::collections::FastHashMap; + use saluki_core::data_model::event::trace::Span; + use stringtheory::MetaString; + + use super::*; + + fn concentrator() -> SpanConcentrator { + SpanConcentrator::new(false, false, &[], 1_000_000_000) + } + + fn span_with_metric(key: &str, val: f64) -> Span { + let mut metrics = FastHashMap::default(); + metrics.insert(MetaString::from(key), val); + Span::new("svc", "op", "resource", "web", 1, 0, 1_000_000_000, 100_000_000, 0).with_metrics(metrics) + } + + // Issue 3: _dd.top_level is the tracer-set key in v1 payloads. The concentrator + // must treat it the same as the legacy agent-set _top_level key. + + #[test] + fn tracer_top_level_key_makes_span_eligible_for_stats() { + let span = span_with_metric("_dd.top_level", 1.0); + assert!( + concentrator().new_stat_span_from_span(&span).is_some(), + "_dd.top_level=1.0 should make a span eligible for stats (same as _top_level)" + ); + } + + #[test] + fn tracer_top_level_key_sets_is_top_level_flag_on_stat_span() { + let span = span_with_metric("_dd.top_level", 1.0); + let stat = concentrator() + .new_stat_span_from_span(&span) + .expect("span with _dd.top_level=1.0 should produce a stat span"); + assert!(stat.is_top_level, "stat span from _dd.top_level=1.0 should have is_top_level=true"); + } + + #[test] + fn agent_top_level_key_still_makes_span_eligible() { + let span = span_with_metric("_top_level", 1.0); + let stat = concentrator() + .new_stat_span_from_span(&span) + .expect("_top_level=1.0 should still produce a stat span"); + assert!(stat.is_top_level); + } +} + fn is_partial_snapshot(span: &Span) -> bool { - match span.metrics().get(METRIC_PARTIAL_VERSION) { - Some(&v) => v >= 0.0, + match span.attributes.get(METRIC_PARTIAL_VERSION).and_then(AttributeValue::as_float) { + Some(v) => v >= 0.0, None => false, } } + diff --git a/lib/saluki-components/src/transforms/apm_stats/weight.rs b/lib/saluki-components/src/transforms/apm_stats/weight.rs index d1e074eb8cc..cdc4bf43e4f 100644 --- a/lib/saluki-components/src/transforms/apm_stats/weight.rs +++ b/lib/saluki-components/src/transforms/apm_stats/weight.rs @@ -1,11 +1,18 @@ //! Span weight calculation for APM stats. -use saluki_core::data_model::event::trace::Span; +use saluki_core::data_model::event::trace::{AttributeValue, Span}; const KEY_SAMPLING_RATE_GLOBAL: &str = "_sample_rate"; +// TODO: `Trace::client_dropped_p0s_weight` (populated from the `Datadog-Client-Dropped-P0-Traces` +// HTTP header on the V1 APM path) is not factored into stats weight. In the Go trace agent, +// dropped P0 client traces inflate the weight to compensate for traces the client discarded before +// sending. This function only accounts for agent-side sampling rate (`_sample_rate`), so V1 APM +// stats may be undercounted when clients are dropping P0s. The fix would be to accept `&Trace` +// here (or take the multiplier as a parameter) and multiply the result by +// `trace.client_dropped_p0s_weight` when it is non-zero. pub(super) fn weight(span: &Span) -> f64 { - if let Some(&rate) = span.metrics().get(KEY_SAMPLING_RATE_GLOBAL) { + if let Some(rate) = span.attributes.get(KEY_SAMPLING_RATE_GLOBAL).and_then(AttributeValue::as_float) { if rate > 0.0 && rate <= 1.0 { return 1.0 / rate; } diff --git a/lib/saluki-components/src/transforms/mod.rs b/lib/saluki-components/src/transforms/mod.rs index 06e3b97c0c5..921a6cbc480 100644 --- a/lib/saluki-components/src/transforms/mod.rs +++ b/lib/saluki-components/src/transforms/mod.rs @@ -29,3 +29,5 @@ pub use self::apm_stats::ApmStatsTransformConfiguration; mod trace_obfuscation; pub use self::trace_obfuscation::TraceObfuscationConfiguration; + + diff --git a/lib/saluki-components/src/transforms/trace_obfuscation/credit_cards.rs b/lib/saluki-components/src/transforms/trace_obfuscation/credit_cards.rs index 8b2284ad34f..3d61bb4e4d5 100644 --- a/lib/saluki-components/src/transforms/trace_obfuscation/credit_cards.rs +++ b/lib/saluki-components/src/transforms/trace_obfuscation/credit_cards.rs @@ -35,6 +35,12 @@ const ALLOWLISTED_TAGS: &[&str] = &[ "service", "sql.query", "version", + // Data Job Monitoring tags — these values are frequently similar to credit card numbers. + "databricks_job_id", + "databricks_job_run_id", + "databricks_task_run_id", + "config.spark_app_startTime", + "config.spark_databricks_job_parentRunId", ]; /// Credit card obfuscator with configuration. @@ -598,6 +604,38 @@ mod tests { assert_eq!(obfuscator.obfuscate_credit_card_number("user.id", "12345"), None); } + #[test] + fn test_databricks_and_spark_tags_are_not_obfuscated() { + // These tags look like credit card numbers but are Data Job Monitoring IDs; + // they must be in the allowlist to prevent false-positive obfuscation. + let obfuscator = CreditCardObfuscator::new(&default_config()); + + // A value that would be detected as a card if the key is not allowlisted. + let card_like_value = "4111111111111111"; + + let allowlisted = &[ + "databricks_job_id", + "databricks_job_run_id", + "databricks_task_run_id", + "config.spark_app_startTime", + "config.spark_databricks_job_parentRunId", + ]; + for key in allowlisted { + assert_eq!( + obfuscator.obfuscate_credit_card_number(key, card_like_value), + None, + "Key '{}' should be allowlisted and not obfuscated", + key + ); + } + + // Verify the value itself is detected as a card on a non-allowlisted key. + assert_eq!( + obfuscator.obfuscate_credit_card_number("payment.card", card_like_value), + Some("?".into()) + ); + } + #[test] fn test_obfuscate_with_luhn() { let config = CreditCardObfuscationConfig { diff --git a/lib/saluki-components/src/transforms/trace_obfuscation/mod.rs b/lib/saluki-components/src/transforms/trace_obfuscation/mod.rs index 5b671232f05..3ab201d4673 100644 --- a/lib/saluki-components/src/transforms/trace_obfuscation/mod.rs +++ b/lib/saluki-components/src/transforms/trace_obfuscation/mod.rs @@ -16,7 +16,10 @@ use memory_accounting::{MemoryBounds, MemoryBoundsBuilder}; use saluki_config::GenericConfiguration; use saluki_core::{ components::{transforms::*, ComponentContext}, - data_model::event::{trace::Span, Event}, + data_model::event::{ + trace::{AttributeValue, Span}, + Event, + }, topology::EventsBuffer, }; use saluki_error::GenericError; @@ -104,66 +107,69 @@ impl TraceObfuscation { } fn obfuscate_credit_cards_in_span(&mut self, span: &mut Span) { - for (key, value) in span.meta_mut().iter_mut() { - if let Some(replacement) = self - .obfuscator - .obfuscate_credit_card_number(key.as_ref(), value.as_ref()) - { - *value = replacement; + for (key, value) in span.attributes.iter_mut() { + if let AttributeValue::String(s) = value { + if let Some(replacement) = self + .obfuscator + .obfuscate_credit_card_number(key.as_ref(), s.as_ref()) + { + *s = replacement; + } } } } fn obfuscate_http_span(&mut self, span: &mut Span) { - let url_value = match span.meta().get(tags::HTTP_URL) { - Some(v) if !v.is_empty() => v.as_ref(), + let url_value = match span.attributes.get(tags::HTTP_URL).and_then(AttributeValue::as_string) { + Some(v) if !v.is_empty() => v.as_ref().to_owned(), _ => return, }; - if let Some(obfuscated) = self.obfuscator.obfuscate_url(url_value) { - span.meta_mut().insert(tags::HTTP_URL.into(), obfuscated); + if let Some(obfuscated) = self.obfuscator.obfuscate_url(&url_value) { + span.attributes.insert(tags::HTTP_URL.into(), AttributeValue::String(obfuscated)); } } fn obfuscate_sql_span(&mut self, span: &mut Span) { - let sql_query: &str = span - .meta() + let sql_query: String = span + .attributes .get(tags::DB_STATEMENT) + .and_then(AttributeValue::as_string) .map(|v| v.as_ref()) .filter(|s| !s.is_empty()) - .unwrap_or_else(|| span.resource()); + .unwrap_or_else(|| span.resource()) + .to_owned(); if sql_query.is_empty() { return; } - let dbms = span.meta().get(tags::DBMS); + let dbms = span.attributes.get(tags::DBMS).and_then(AttributeValue::as_string).map(|s| s.to_string()); let config = match dbms { - Some(d) if !d.is_empty() => self.obfuscator.config.sql().with_dbms(d.to_string()), + Some(d) if !d.is_empty() => self.obfuscator.config.sql().with_dbms(d), _ => self.obfuscator.config.sql().clone(), }; - match sql::obfuscate_sql_string(sql_query, &config) { + match sql::obfuscate_sql_string(&sql_query, &config) { Ok(obfuscated) => { let query: MetaString = obfuscated.query.into(); span.set_resource(query.clone()); - span.meta_mut().insert(tags::SQL_QUERY.into(), query.clone()); + span.attributes.insert(tags::SQL_QUERY.into(), AttributeValue::String(query.clone())); - if span.meta().contains_key(tags::DB_STATEMENT) { - span.meta_mut().insert(tags::DB_STATEMENT.into(), query); + if span.attributes.contains_key(tags::DB_STATEMENT) { + span.attributes.insert(tags::DB_STATEMENT.into(), AttributeValue::String(query)); } if !obfuscated.table_names.is_empty() { - span.meta_mut() - .insert("sql.tables".into(), obfuscated.table_names.into()); + span.attributes.insert("sql.tables".into(), AttributeValue::String(obfuscated.table_names.into())); } } Err(_) => { let non_parsable: MetaString = TEXT_NON_PARSABLE_SQL.into(); span.set_resource(non_parsable.clone()); - span.meta_mut().insert(tags::SQL_QUERY.into(), non_parsable); + span.attributes.insert(tags::SQL_QUERY.into(), AttributeValue::String(non_parsable)); } } } @@ -179,17 +185,17 @@ impl TraceObfuscation { } if span.span_type() == "redis" && self.obfuscator.config.redis().enabled() { - if let Some(cmd_value) = span.meta().get(tags::REDIS_RAW_COMMAND) { - if let Some(obfuscated) = self.obfuscator.obfuscate_redis_string(cmd_value.as_ref()) { - span.meta_mut().insert(tags::REDIS_RAW_COMMAND.into(), obfuscated); + if let Some(cmd_value) = span.attributes.get(tags::REDIS_RAW_COMMAND).and_then(AttributeValue::as_string).map(|s| s.as_ref().to_owned()) { + if let Some(obfuscated) = self.obfuscator.obfuscate_redis_string(&cmd_value) { + span.attributes.insert(tags::REDIS_RAW_COMMAND.into(), AttributeValue::String(obfuscated)); } } } if span.span_type() == "valkey" && self.obfuscator.config.valkey().enabled() { - if let Some(cmd_value) = span.meta().get(tags::VALKEY_RAW_COMMAND) { - if let Some(obfuscated) = self.obfuscator.obfuscate_valkey_string(cmd_value.as_ref()) { - span.meta_mut().insert(tags::VALKEY_RAW_COMMAND.into(), obfuscated); + if let Some(cmd_value) = span.attributes.get(tags::VALKEY_RAW_COMMAND).and_then(AttributeValue::as_string).map(|s| s.as_ref().to_owned()) { + if let Some(obfuscated) = self.obfuscator.obfuscate_valkey_string(&cmd_value) { + span.attributes.insert(tags::VALKEY_RAW_COMMAND.into(), AttributeValue::String(obfuscated)); } } } @@ -200,41 +206,41 @@ impl TraceObfuscation { return; } - let cmd_value = match span.meta().get(tags::MEMCACHED_COMMAND) { - Some(v) if !v.is_empty() => v.as_ref(), + let cmd_value = match span.attributes.get(tags::MEMCACHED_COMMAND).and_then(AttributeValue::as_string) { + Some(v) if !v.is_empty() => v.as_ref().to_owned(), _ => return, }; - if let Some(obfuscated) = self.obfuscator.obfuscate_memcached_command(cmd_value) { + if let Some(obfuscated) = self.obfuscator.obfuscate_memcached_command(&cmd_value) { if obfuscated.is_empty() { - span.meta_mut().remove(tags::MEMCACHED_COMMAND); + span.attributes.remove(tags::MEMCACHED_COMMAND); } else { - span.meta_mut().insert(tags::MEMCACHED_COMMAND.into(), obfuscated); + span.attributes.insert(tags::MEMCACHED_COMMAND.into(), AttributeValue::String(obfuscated)); } } } fn obfuscate_mongodb_span(&mut self, span: &mut Span) { - let query_value = match span.meta().get(tags::MONGODB_QUERY) { - Some(v) => v.as_ref(), + let query_value = match span.attributes.get(tags::MONGODB_QUERY).and_then(AttributeValue::as_string) { + Some(v) => v.as_ref().to_owned(), None => return, }; - if let Some(obfuscated) = self.obfuscator.obfuscate_mongodb_string(query_value) { - span.meta_mut().insert(tags::MONGODB_QUERY.into(), obfuscated); + if let Some(obfuscated) = self.obfuscator.obfuscate_mongodb_string(&query_value) { + span.attributes.insert(tags::MONGODB_QUERY.into(), AttributeValue::String(obfuscated)); } } fn obfuscate_elasticsearch_span(&mut self, span: &mut Span) { - if let Some(body_value) = span.meta().get(tags::ELASTIC_BODY) { - if let Some(obfuscated) = self.obfuscator.obfuscate_elasticsearch_string(body_value.as_ref()) { - span.meta_mut().insert(tags::ELASTIC_BODY.into(), obfuscated); + if let Some(body_value) = span.attributes.get(tags::ELASTIC_BODY).and_then(AttributeValue::as_string).map(|s| s.as_ref().to_owned()) { + if let Some(obfuscated) = self.obfuscator.obfuscate_elasticsearch_string(&body_value) { + span.attributes.insert(tags::ELASTIC_BODY.into(), AttributeValue::String(obfuscated)); } } - if let Some(body_value) = span.meta().get(tags::OPENSEARCH_BODY) { - if let Some(obfuscated) = self.obfuscator.obfuscate_opensearch_string(body_value.as_ref()) { - span.meta_mut().insert(tags::OPENSEARCH_BODY.into(), obfuscated); + if let Some(body_value) = span.attributes.get(tags::OPENSEARCH_BODY).and_then(AttributeValue::as_string).map(|s| s.as_ref().to_owned()) { + if let Some(obfuscated) = self.obfuscator.obfuscate_opensearch_string(&body_value) { + span.attributes.insert(tags::OPENSEARCH_BODY.into(), AttributeValue::String(obfuscated)); } } } diff --git a/lib/saluki-components/src/transforms/trace_obfuscation/obfuscator.rs b/lib/saluki-components/src/transforms/trace_obfuscation/obfuscator.rs index 55ac32e791b..eed818f26d9 100644 --- a/lib/saluki-components/src/transforms/trace_obfuscation/obfuscator.rs +++ b/lib/saluki-components/src/transforms/trace_obfuscation/obfuscator.rs @@ -124,4 +124,5 @@ impl Obfuscator { pub fn obfuscate_opensearch_string(&self, query: &str) -> Option { Some(self.open_search_obfuscator.as_ref()?.obfuscate(query).into()) } + } diff --git a/lib/saluki-components/src/transforms/trace_obfuscation/redis.rs b/lib/saluki-components/src/transforms/trace_obfuscation/redis.rs index 2ef6e65f17d..b5f9d97dad7 100644 --- a/lib/saluki-components/src/transforms/trace_obfuscation/redis.rs +++ b/lib/saluki-components/src/transforms/trace_obfuscation/redis.rs @@ -237,13 +237,23 @@ fn obfuscate_redis_cmd(out: &mut String, cmd: &str, args: &[String]) { let mut args = args.to_vec(); match cmd_upper.as_str() { - "AUTH" => { + // AUTH, MIGRATE, HELLO: obfuscate all arguments by replacing the first with "?" and + // truncating. MIGRATE carries an inline AUTH password; HELLO carries AUTH credentials. + "AUTH" | "MIGRATE" | "HELLO" => { if !args.is_empty() { args[0] = "?".to_string(); args.truncate(1); } } + // ACL: keep the subcommand (arg 0) and obfuscate all further arguments. + "ACL" => { + if args.len() > 1 { + args[1] = "?".to_string(); + args.truncate(2); + } + } + "APPEND" | "GETSET" | "LPUSHX" | "GEORADIUSBYMEMBER" | "RPUSHX" | "SET" | "SETNX" | "SISMEMBER" | "ZRANK" | "ZREVRANK" | "ZSCORE" => { obfuscate_arg_n(&mut args, 1); @@ -316,7 +326,9 @@ fn obfuscate_redis_cmd(out: &mut String, cmd: &str, args: &[String]) { fn needs_obfuscation(cmd_upper: &str, args: &[String]) -> bool { match cmd_upper { - "AUTH" | "APPEND" | "GETSET" | "LPUSHX" | "GEORADIUSBYMEMBER" | "RPUSHX" | "SET" | "SETNX" | "SISMEMBER" + "AUTH" | "MIGRATE" | "HELLO" => !args.is_empty(), + "ACL" => args.len() > 1, + "APPEND" | "GETSET" | "LPUSHX" | "GEORADIUSBYMEMBER" | "RPUSHX" | "SET" | "SETNX" | "SISMEMBER" | "ZRANK" | "ZREVRANK" | "ZSCORE" | "HSETNX" | "LREM" | "LSET" | "SETBIT" | "SETEX" | "PSETEX" | "SETRANGE" | "ZINCRBY" | "SMOVE" | "RESTORE" | "LINSERT" | "GEOHASH" | "GEOPOS" | "GEODIST" | "LPUSH" | "RPUSH" | "SREM" | "ZREM" | "SADD" | "GEOADD" | "HSET" | "HMSET" | "MSET" | "MSETNX" | "ZADD" => true, @@ -609,6 +621,20 @@ mod tests { ("ZADD key XX INCR score member", "ZADD key XX INCR score ?"), ("ZADD key XX INCR score", "ZADD key XX INCR score"), ("\nCONFIG command\nSET k v\n\t\t\t", "CONFIG command\nSET k ?"), + // MIGRATE, HELLO: obfuscate everything after the command (arg 0 → ?, truncate) + ("MIGRATE host port key 0 5000", "MIGRATE ?"), + ("MIGRATE host port \"\" 0 5000 COPY REPLACE AUTH secret", "MIGRATE ?"), + ("MIGRATE", "MIGRATE"), + ("HELLO 3 AUTH username secret SETNAME client", "HELLO ?"), + ("HELLO 3", "HELLO ?"), + ("HELLO", "HELLO"), + // ACL: keep subcommand (arg 0), obfuscate arg 1, truncate + ("ACL SETUSER alice on >password ~cached:* +get", "ACL SETUSER ?"), + ("ACL GETUSER alice", "ACL GETUSER ?"), + ("ACL DELUSER alice bob", "ACL DELUSER ?"), + ("ACL LIST", "ACL LIST"), + ("ACL WHOAMI", "ACL WHOAMI"), + ("ACL", "ACL"), ]; for (input, expected) in cases { diff --git a/lib/saluki-components/src/transforms/trace_sampler/catalog.rs b/lib/saluki-components/src/transforms/trace_sampler/catalog.rs index ec483b3874e..761e4d1e8bf 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/catalog.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/catalog.rs @@ -28,7 +28,7 @@ struct CatalogEntry { /// /// The catalog maintains a bounded cache of service signatures, evicting /// the least recently used entries when the capacity is exceeded. -pub(super) struct ServiceKeyCatalog { +pub(crate) struct ServiceKeyCatalog { /// Map from ServiceSignature to slot token in the LRU slab. items: FastHashMap, /// LRU list of entries (front = most recently used). @@ -99,6 +99,45 @@ impl ServiceKeyCatalog { hash } + + /// Builds the sampling-rates-by-service map used in HTTP responses to tracers. + /// + /// Keys use the format `"service:,env:"`. The default rate (empty service, + /// empty env — `"service:,env:"`) is always present. Entries whose signature is absent + /// from `rates` are evicted from the catalog (they have received no traffic recently). + /// + /// When a service's env matches `agent_env`, an additional empty-env alias + /// `"service:,env:"` is included so tracers that don't send an env tag still + /// receive a calibrated rate. + pub(crate) fn rates_by_service( + &mut self, + agent_env: &str, + rates: &FastHashMap, + default_rate: f64, + ) -> FastHashMap { + let mut result: FastHashMap = FastHashMap::default(); + let mut stale: Vec<(ServiceSignature, u32)> = Vec::new(); + + for (svc_sig, &slot) in &self.items { + let sig = self.entries.peek(slot).sig; + if let Some(&rate) = rates.get(&sig) { + result.insert(format!("service:{},env:{}", svc_sig.name(), svc_sig.env()), rate); + if !svc_sig.env().is_empty() && svc_sig.env() == agent_env { + result.insert(format!("service:{},env:", svc_sig.name()), rate); + } + } else { + stale.push((svc_sig.clone(), slot)); + } + } + + for (key, slot) in stale { + self.entries.remove(slot); + self.items.remove(&key); + } + + result.insert("service:,env:".to_string(), default_rate); + result + } } #[cfg(test)] diff --git a/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs index 9d70ccac60d..2f59554423a 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs @@ -105,7 +105,7 @@ impl Sampler { } } - pub(super) fn count_weighted_sig(&mut self, now: SystemTime, signature: &Signature, n: f32) -> bool { + pub(crate) fn count_weighted_sig(&mut self, now: SystemTime, signature: &Signature, n: f32) -> bool { // All traces within the same `BUCKET_DURATION` interval share the same bucket_id let bucket_id = now.duration_since(UNIX_EPOCH).unwrap_or_default().as_secs() / BUCKET_DURATION.as_secs(); let prev_bucket_id = self.last_bucket_id; @@ -209,24 +209,6 @@ impl Sampler { (rates, self.default_rate()) } - pub fn update_target_tps(&mut self, target_tps: f64) { - let prev_target = self.target_tps; - self.target_tps = target_tps; - - if prev_target == 0.0 { - return; - } - let ratio = target_tps / prev_target; - for rate in self.rates.values_mut() { - let new_rate = (*rate * ratio).min(1.0); - *rate = new_rate; - } - } - - pub fn target_tps(&self) -> f64 { - self.target_tps - } - /// Computes the default rate for unknown signatures. /// Based on the moving max of all signatures seen and the lowest stored rate. fn default_rate(&self) -> f64 { diff --git a/lib/saluki-components/src/transforms/trace_sampler/errors.rs b/lib/saluki-components/src/transforms/trace_sampler/errors.rs index 805a741d7fd..c49b058a60c 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/errors.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/errors.rs @@ -40,7 +40,6 @@ mod tests { // logic for these tests are taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/sampler/scoresampler_test.go#L23 use std::time::{Duration, SystemTime}; - use saluki_context::tags::TagSet; use saluki_core::data_model::event::trace::{Span, Trace}; use stringtheory::MetaString; @@ -63,7 +62,6 @@ mod tests { MetaString::from("GET /api"), MetaString::from("resource"), MetaString::from("web"), - trace_id, 1, // span_id 0, // parent_id 42, // start @@ -77,7 +75,6 @@ mod tests { MetaString::from("SELECT * FROM users"), MetaString::from("resource"), MetaString::from("sql"), - trace_id, 2, // span_id 1, // parent_id 100, // start @@ -85,7 +82,8 @@ mod tests { 0, // error ); - let trace = Trace::new(vec![root, child], TagSet::default()); + let mut trace = Trace::new(vec![root, child]); + trace.trace_id_low = trace_id; (trace, 0) // Root is at index 0 } @@ -97,7 +95,6 @@ mod tests { MetaString::from("GET /api"), MetaString::from("resource"), MetaString::from("web"), - trace_id, 1, // span_id 0, // parent_id 42, // start @@ -111,7 +108,6 @@ mod tests { MetaString::from("SELECT * FROM users"), MetaString::from("resource"), MetaString::from("sql"), - trace_id, 2, // span_id 1, // parent_id 100, // start @@ -119,7 +115,8 @@ mod tests { 0, // error ); - let trace = Trace::new(vec![root, child], TagSet::default()); + let mut trace = Trace::new(vec![root, child]); + trace.trace_id_low = trace_id; (trace, 0) // Root is at index 0 } diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 3341e169abf..da7f14b6b77 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -15,11 +15,12 @@ use async_trait::async_trait; use memory_accounting::{MemoryBounds, MemoryBoundsBuilder}; use saluki_common::collections::FastHashMap; +use saluki_common::rate::TokenBucket; use saluki_config::GenericConfiguration; use saluki_core::{ components::{transforms::*, ComponentContext}, data_model::event::{ - trace::{Span, Trace, TraceSampling}, + trace::{AttributeValue, Span, Trace}, Event, }, topology::EventsBuffer, @@ -28,21 +29,28 @@ use saluki_error::GenericError; use stringtheory::MetaString; use tracing::debug; -mod catalog; -mod core_sampler; +pub(crate) mod catalog; +pub(crate) mod core_sampler; mod errors; -mod priority_sampler; mod probabilistic; mod rare_sampler; mod score_sampler; -mod signature; +pub(crate) mod signature; +mod v1; +mod v1_no_priority; +mod v1_priority; use self::probabilistic::PROB_RATE_KEY; +use self::v1::V1TraceSamplerImpl; +use self::v1::ERROR_SAMPLER_BURST as V1_ERROR_SAMPLER_BURST; +use self::v1_no_priority::V1NoPrioritySampler; +use self::v1_priority::PrioritySampler; use crate::common::datadog::{ - apm::ApmConfig, sample_by_rate, DECISION_MAKER_MANUAL, DECISION_MAKER_PROBABILISTIC, OTEL_TRACE_ID_META_KEY, - SAMPLING_PRIORITY_METRIC_KEY, TAG_DECISION_MAKER, + apm::ApmConfig, get_trace_env, sample_by_rate, DECISION_MAKER_MANUAL, DECISION_MAKER_PROBABILISTIC, + OTEL_TRACE_ID_META_KEY, SAMPLING_PRIORITY_METRIC_KEY, TAG_DECISION_MAKER, }; use crate::common::otlp::config::TracesConfig; +use crate::sources::apm::sampling_rates::V1SamplingRatesHandle; // Sampling priority constants (matching datadog-agent) const PRIORITY_AUTO_DROP: i32 = 0; @@ -66,10 +74,20 @@ fn normalize_sampling_rate(rate: f64) -> f64 { } /// Configuration for the trace sampler transform. -#[derive(Debug)] pub struct TraceSamplerConfiguration { apm_config: ApmConfig, otlp_sampling_rate: f64, + sampling_rates: Option, +} + +impl std::fmt::Debug for TraceSamplerConfiguration { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TraceSamplerConfiguration") + .field("apm_config", &self.apm_config) + .field("otlp_sampling_rate", &self.otlp_sampling_rate) + .field("sampling_rates", &self.sampling_rates.as_ref().map(|_| "")) + .finish() + } } impl TraceSamplerConfiguration { @@ -81,8 +99,17 @@ impl TraceSamplerConfiguration { Ok(Self { apm_config, otlp_sampling_rate, + sampling_rates: None, }) } + + /// Attaches a shared [`V1SamplingRatesHandle`] to enable the APM (V1) sampling path. + /// + /// When set, `build()` returns a `V1TraceSamplerImpl` instead of the OTLP-path `TraceSampler`. + pub fn with_sampling_rates(mut self, handle: V1SamplingRatesHandle) -> Self { + self.sampling_rates = Some(handle); + self + } } #[async_trait] @@ -90,37 +117,80 @@ impl SynchronousTransformBuilder for TraceSamplerConfiguration { async fn build(&self, _context: ComponentContext) -> Result, GenericError> { // TODO: Need to support remote configuration changing these at runtime // See https://github.com/DataDog/saluki/issues/1326 - let sampler = TraceSampler { - sampling_rate: self.apm_config.probabilistic_sampler_sampling_percentage() / 100.0, - error_sampling_enabled: self.apm_config.error_sampling_enabled(), - error_tracking_standalone: self.apm_config.error_tracking_standalone_enabled(), - probabilistic_sampler_enabled: self.apm_config.probabilistic_sampler_enabled(), - otlp_sampling_rate: self.otlp_sampling_rate, - error_sampler: errors::ErrorsSampler::new(self.apm_config.errors_per_second(), ERROR_SAMPLE_RATE), - priority_sampler: priority_sampler::PrioritySampler::new( - self.apm_config.default_env().clone(), - ERROR_SAMPLE_RATE, - self.apm_config.target_traces_per_second(), - ), - no_priority_sampler: score_sampler::NoPrioritySampler::new( - self.apm_config.target_traces_per_second(), - ERROR_SAMPLE_RATE, - ), - rare_sampler: rare_sampler::RareSampler::new( - self.apm_config.rare_sampler_enabled(), - self.apm_config.rare_sampler_tps(), - std::time::Duration::from_secs_f64(self.apm_config.rare_sampler_cooldown_period_secs()), - self.apm_config.rare_sampler_cardinality(), - ), - }; + if let Some(rates) = &self.sampling_rates { + // APM path: use V1 sampler with priority/rate-feedback loop. + if self.apm_config.probabilistic_sampler_enabled() { + tracing::warn!( + "apm_config.probabilistic_sampler.enabled is set but the V1 trace sampler \ + does not yet implement the probabilistic path; falling back to priority sampler" + ); + } + + let error_token_bucket = if self.apm_config.error_sampling_enabled() { + Some(TokenBucket::new(self.apm_config.errors_per_second(), V1_ERROR_SAMPLER_BURST)) + } else { + None + }; + + let sampler = V1TraceSamplerImpl { + priority_sampler: PrioritySampler::new( + self.apm_config.default_env().clone(), + self.apm_config.target_traces_per_second(), + 1.0, + rates.clone(), + ), + no_priority_sampler: V1NoPrioritySampler::new(self.apm_config.target_traces_per_second()), + rare_sampler: rare_sampler::RareSampler::new( + self.apm_config.rare_sampler_enabled(), + self.apm_config.rare_sampler_tps(), + std::time::Duration::from_secs_f64(self.apm_config.rare_sampler_cooldown_period_secs()), + self.apm_config.rare_sampler_cardinality(), + ), + error_token_bucket, + error_sampling_enabled: self.apm_config.error_sampling_enabled(), + error_tracking_standalone: self.apm_config.error_tracking_standalone_enabled(), + }; - Ok(Box::new(sampler)) + Ok(Box::new(sampler)) + } else { + // OTLP path: existing TraceSampler. + let sampler = TraceSampler { + sampling_rate: self.apm_config.probabilistic_sampler_sampling_percentage() / 100.0, + error_sampling_enabled: self.apm_config.error_sampling_enabled(), + error_tracking_standalone: self.apm_config.error_tracking_standalone_enabled(), + probabilistic_sampler_enabled: self.apm_config.probabilistic_sampler_enabled(), + otlp_sampling_rate: self.otlp_sampling_rate, + error_sampler: errors::ErrorsSampler::new(self.apm_config.errors_per_second(), ERROR_SAMPLE_RATE), + priority_sampler: PrioritySampler::new( + self.apm_config.default_env().clone(), + self.apm_config.target_traces_per_second(), + ERROR_SAMPLE_RATE, + V1SamplingRatesHandle::new(), + ), + no_priority_sampler: score_sampler::NoPrioritySampler::new( + self.apm_config.target_traces_per_second(), + ERROR_SAMPLE_RATE, + ), + rare_sampler: rare_sampler::RareSampler::new( + self.apm_config.rare_sampler_enabled(), + self.apm_config.rare_sampler_tps(), + std::time::Duration::from_secs_f64(self.apm_config.rare_sampler_cooldown_period_secs()), + self.apm_config.rare_sampler_cardinality(), + ), + }; + + Ok(Box::new(sampler)) + } } } impl MemoryBounds for TraceSamplerConfiguration { fn specify_bounds(&self, builder: &mut MemoryBoundsBuilder) { - builder.minimum().with_single_value::("component struct"); + if self.sampling_rates.is_some() { + builder.minimum().with_single_value::("component struct"); + } else { + builder.minimum().with_single_value::("component struct"); + } } } @@ -131,7 +201,7 @@ pub struct TraceSampler { probabilistic_sampler_enabled: bool, otlp_sampling_rate: f64, error_sampler: errors::ErrorsSampler, - priority_sampler: priority_sampler::PrioritySampler, + priority_sampler: PrioritySampler, no_priority_sampler: score_sampler::NoPrioritySampler, rare_sampler: rare_sampler::RareSampler, } @@ -171,7 +241,7 @@ impl TraceSampler { if parent_id_to_child.len() != 1 { debug!( "Didn't reliably find the root span for traceID:{}", - &spans[0].trace_id() + &spans[0].span_id() ); } @@ -188,10 +258,8 @@ impl TraceSampler { /// Check for user-set sampling priority in trace fn get_user_priority(&self, trace: &Trace, root_span_idx: usize) -> Option { // First check trace-level sampling priority (last-seen priority from OTLP ingest) - if let Some(sampling) = trace.sampling() { - if let Some(priority) = sampling.priority { - return Some(priority); - } + if let Some(priority) = trace.priority { + return Some(priority); } if trace.spans().is_empty() { @@ -201,14 +269,14 @@ impl TraceSampler { // Fall back to checking spans (for compatibility with non-OTLP traces) // Prefer the root span (common case), but fall back to scanning all spans to be robust to ordering. if let Some(root) = trace.spans().get(root_span_idx) { - if let Some(&p) = root.metrics().get(SAMPLING_PRIORITY_METRIC_KEY) { + if let Some(p) = root.attributes.get(SAMPLING_PRIORITY_METRIC_KEY).and_then(AttributeValue::as_float) { return Some(p as i32); } } let spans = trace.spans(); spans .iter() - .find_map(|span| span.metrics().get(SAMPLING_PRIORITY_METRIC_KEY).map(|&p| p as i32)) + .find_map(|span| span.attributes.get(SAMPLING_PRIORITY_METRIC_KEY).and_then(AttributeValue::as_float).map(|p| p as i32)) } /// Returns `true` if the given trace ID should be probabilistically sampled. @@ -221,8 +289,10 @@ impl TraceSampler { .spans() .get(root_span_idx) .map(|span| { - span.meta() - .contains_key(&MetaString::from_static(OTEL_TRACE_ID_META_KEY)) + span.attributes + .get(OTEL_TRACE_ID_META_KEY) + .and_then(AttributeValue::as_string) + .is_some() }) .unwrap_or(false) } @@ -238,10 +308,10 @@ impl TraceSampler { /// /// This checks for the `_dd.span_events.has_exception` meta field set to `"true"`. fn span_contains_exception_span_event(&self, span: &Span) -> bool { - if let Some(has_exception) = span.meta().get("_dd.span_events.has_exception") { - return has_exception == "true"; - } - false + span.attributes + .get("_dd.span_events.has_exception") + .and_then(AttributeValue::as_string) + .is_some_and(|v| v == "true") } /// Computes the OTLP pre-sampling priority and decision maker for a trace, mirroring @@ -258,7 +328,7 @@ impl TraceSampler { let (priority, dm) = if let Some(user_priority) = self.get_user_priority(trace, root_span_idx) { (user_priority, DECISION_MAKER_MANUAL) } else { - let root_trace_id = trace.spans()[root_span_idx].trace_id(); + let root_trace_id = trace.trace_id_low; if sample_by_rate(root_trace_id, self.otlp_sampling_rate) { (PRIORITY_AUTO_KEEP, DECISION_MAKER_PROBABILISTIC) } else { @@ -267,7 +337,7 @@ impl TraceSampler { }; if priority == PRIORITY_AUTO_KEEP { if let Some(root_span) = trace.spans_mut().get_mut(root_span_idx) { - root_span.metrics_mut().remove(PROB_RATE_KEY); + root_span.attributes.remove(PROB_RATE_KEY); } } Some((priority, dm)) @@ -277,11 +347,12 @@ impl TraceSampler { /// /// Returns `true` if the trace was modified. fn analyzed_span_sampling(&self, trace: &mut Trace) -> bool { - let retained = trace.retain_spans(|_, span| span.metrics().contains_key(KEY_ANALYZED_SPANS)); + let retained = trace.retain_spans(|_, span| span.attributes.get(KEY_ANALYZED_SPANS).and_then(AttributeValue::as_float).is_some()); if retained > 0 { // Mark trace as kept with high priority - let sampling = TraceSampling::new(false, Some(PRIORITY_USER_KEEP), None, Some(self.sampling_rate)); - trace.set_sampling(Some(sampling)); + trace.priority = Some(PRIORITY_USER_KEEP); + trace.dropped_trace = false; + trace.otlp_sampling_rate = Some(self.sampling_rate); true } else { false @@ -293,22 +364,18 @@ impl TraceSampler { trace .spans() .iter() - .any(|span| span.metrics().contains_key(KEY_ANALYZED_SPANS)) + .any(|span| span.attributes.get(KEY_ANALYZED_SPANS).and_then(AttributeValue::as_float).is_some()) } /// Apply Single Span Sampling to the trace /// Returns true if the trace was modified fn single_span_sampling(&self, trace: &mut Trace) -> bool { - let retained = trace.retain_spans(|_, span| span.metrics().contains_key(KEY_SPAN_SAMPLING_MECHANISM)); + let retained = trace.retain_spans(|_, span| span.attributes.get(KEY_SPAN_SAMPLING_MECHANISM).and_then(AttributeValue::as_float).is_some()); if retained > 0 { // Set high priority and mark as kept - let sampling = TraceSampling::new( - false, - Some(PRIORITY_USER_KEEP), - None, // No decision maker for SSS - Some(self.sampling_rate), - ); - trace.set_sampling(Some(sampling)); + trace.priority = Some(PRIORITY_USER_KEEP); + trace.dropped_trace = false; + trace.otlp_sampling_rate = Some(self.sampling_rate); true } else { false @@ -361,15 +428,14 @@ impl TraceSampler { // Rare sampler wins over probabilistic sampling. prob_keep = true; } else { - // Run probabilistic sampler - use root span's trace ID - let root_trace_id = trace.spans()[root_span_idx].trace_id(); + // Run probabilistic sampler - use trace's trace ID + let root_trace_id = trace.trace_id_low; if self.sample_probabilistic(root_trace_id) { decision_maker = DECISION_MAKER_PROBABILISTIC; prob_keep = true; if let Some(root_span) = trace.spans_mut().get_mut(root_span_idx) { - let metrics = root_span.metrics_mut(); - metrics.insert(MetaString::from(PROB_RATE_KEY), self.sampling_rate); + root_span.attributes.insert(MetaString::from(PROB_RATE_KEY), AttributeValue::Float(self.sampling_rate)); } } else if self.error_sampling_enabled && contains_error { prob_keep = self.error_sampler.sample_error(now, trace, root_span_idx); @@ -396,7 +462,11 @@ impl TraceSampler { return (true, priority, "", Some(root_span_idx)); } - if self.priority_sampler.sample(now, trace, root_span_idx, priority, 0.0) { + let tracer_env = get_trace_env(trace, root_span_idx) + .map(|e| e.as_ref().to_owned()) + .unwrap_or_default(); + let root = &mut trace.spans_mut()[root_span_idx]; + if self.priority_sampler.sample(now, priority, root, &tracer_env, 0.0) { return (true, priority, "", Some(root_span_idx)); } } else if self.is_otlp_trace(trace, root_span_idx) { @@ -406,10 +476,10 @@ impl TraceSampler { } // some sampling happens upstream in the otlp receiver in the agent: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/api/otlp.go#L572 - let root_trace_id = trace.spans()[root_span_idx].trace_id(); + let root_trace_id = trace.trace_id_low; if sample_by_rate(root_trace_id, self.otlp_sampling_rate) { if let Some(root_span) = trace.spans_mut().get_mut(root_span_idx) { - root_span.metrics_mut().remove(PROB_RATE_KEY); + root_span.attributes.remove(PROB_RATE_KEY); } return ( true, @@ -453,7 +523,7 @@ impl TraceSampler { // Add tag for the decision maker let existing_decision_maker = if decision_maker.is_empty() { - root_span_value.meta().get(TAG_DECISION_MAKER).cloned() + root_span_value.attributes.get(TAG_DECISION_MAKER).and_then(AttributeValue::as_string).cloned() } else { None }; @@ -463,29 +533,25 @@ impl TraceSampler { Some(MetaString::from(decision_maker)) }; - let meta = root_span_value.meta_mut(); // When the APM-level probabilistic sampler is used with OTLP traces, the DD Agent writes // _dd.p.dm to trace chunk tags only (not span meta). For the legacy OTLP sampling path, // it is written to both. We match that behavior by skipping the span meta write only when - // both conditions hold; the DM value still flows through TraceSampling to the encoder. + // both conditions hold; the DM value still flows through the flat `decision_maker` field to the encoder. if priority > 0 && !(is_otlp && self.probabilistic_sampler_enabled) { if let Some(dm) = decision_maker_meta.as_ref() { - meta.insert(MetaString::from(TAG_DECISION_MAKER), dm.clone()); + root_span_value.attributes.insert(MetaString::from(TAG_DECISION_MAKER), AttributeValue::String(dm.clone())); } } // Now we can use trace again to set sampling metadata. - let sampling = TraceSampling::new( - !keep, - Some(priority), - if priority > 0 { decision_maker_meta } else { None }, - Some(if is_otlp { - self.otlp_sampling_rate - } else { - self.sampling_rate - }), - ); - trace.set_sampling(Some(sampling)); + trace.priority = Some(priority); + trace.dropped_trace = !keep; + trace.decision_maker = if priority > 0 { decision_maker_meta } else { None }; + trace.otlp_sampling_rate = Some(if is_otlp { + self.otlp_sampling_rate + } else { + self.sampling_rate + }); } fn process_trace(&mut self, trace: &mut Trace) -> bool { @@ -544,7 +610,6 @@ impl SynchronousTransform for TraceSampler { mod tests { use std::collections::HashMap; - use saluki_context::tags::TagSet; use saluki_core::data_model::event::trace::{Span as DdSpan, Trace}; const PRIORITY_USER_DROP: i32 = -1; @@ -557,19 +622,18 @@ mod tests { probabilistic_sampler_enabled: true, otlp_sampling_rate: 1.0, error_sampler: errors::ErrorsSampler::new(10.0, 1.0), - priority_sampler: priority_sampler::PrioritySampler::new(MetaString::from("agent-env"), 1.0, 10.0), + priority_sampler: PrioritySampler::new(MetaString::from("agent-env"), 10.0, 1.0, V1SamplingRatesHandle::new()), no_priority_sampler: score_sampler::NoPrioritySampler::new(10.0, 1.0), rare_sampler: rare_sampler::RareSampler::new(false, 5.0, std::time::Duration::from_secs(300), 200), } } - fn create_test_span(trace_id: u64, span_id: u64, error: i32) -> DdSpan { + fn create_test_span(span_id: u64, error: i32) -> DdSpan { DdSpan::new( MetaString::from("test-service"), MetaString::from("test-operation"), MetaString::from("test-resource"), MetaString::from("test-type"), - trace_id, span_id, 0, // parent_id 0, // start @@ -578,26 +642,25 @@ mod tests { ) } - fn create_test_span_with_metrics(trace_id: u64, span_id: u64, metrics: HashMap) -> DdSpan { + fn create_test_span_with_metrics(span_id: u64, metrics: HashMap) -> DdSpan { let mut metrics_map = saluki_common::collections::FastHashMap::default(); for (k, v) in metrics { metrics_map.insert(MetaString::from(k), v); } - create_test_span(trace_id, span_id, 0).with_metrics(metrics_map) + create_test_span(span_id, 0).with_metrics(metrics_map) } #[allow(dead_code)] - fn create_test_span_with_meta(trace_id: u64, span_id: u64, meta: HashMap) -> DdSpan { + fn create_test_span_with_meta(span_id: u64, meta: HashMap) -> DdSpan { let mut meta_map = saluki_common::collections::FastHashMap::default(); for (k, v) in meta { meta_map.insert(MetaString::from(k), MetaString::from(v)); } - create_test_span(trace_id, span_id, 0).with_meta(meta_map) + create_test_span(span_id, 0).with_meta(meta_map) } fn create_test_trace(spans: Vec) -> Trace { - let tags = TagSet::default(); - Trace::new(spans, tags) + Trace::new(spans) } #[test] @@ -607,7 +670,7 @@ mod tests { // Test trace with user-set priority = 2 (UserKeep) let mut metrics = HashMap::new(); metrics.insert(SAMPLING_PRIORITY_METRIC_KEY.to_string(), 2.0); - let span = create_test_span_with_metrics(12345, 1, metrics); + let span = create_test_span_with_metrics(1, metrics); let trace = create_test_trace(vec![span]); let root_idx = sampler.get_root_span_index(&trace).unwrap(); @@ -616,14 +679,14 @@ mod tests { // Test trace with user-set priority = -1 (UserDrop) let mut metrics = HashMap::new(); metrics.insert(SAMPLING_PRIORITY_METRIC_KEY.to_string(), -1.0); - let span = create_test_span_with_metrics(12345, 1, metrics); + let span = create_test_span_with_metrics(1, metrics); let trace = create_test_trace(vec![span]); let root_idx = sampler.get_root_span_index(&trace).unwrap(); assert_eq!(sampler.get_user_priority(&trace, root_idx), Some(-1)); // Test trace without user priority - let span = create_test_span(12345, 1, 0); + let span = create_test_span(1, 0); let trace = create_test_trace(vec![span]); let root_idx = sampler.get_root_span_index(&trace).unwrap(); @@ -638,11 +701,11 @@ mod tests { // Create spans with different priorities - root has 0, later span has 2 let mut metrics_root = HashMap::new(); metrics_root.insert(SAMPLING_PRIORITY_METRIC_KEY.to_string(), 0.0); - let root_span = create_test_span_with_metrics(12345, 1, metrics_root); + let root_span = create_test_span_with_metrics(1, metrics_root); let mut metrics_later = HashMap::new(); metrics_later.insert(SAMPLING_PRIORITY_METRIC_KEY.to_string(), 1.0); - let later_span = create_test_span_with_metrics(12345, 2, metrics_later).with_parent_id(1); + let later_span = create_test_span_with_metrics(2, metrics_later).with_parent_id(1); let mut trace = create_test_trace(vec![root_span, later_span]); let root_idx = sampler.get_root_span_index(&trace).unwrap(); @@ -651,15 +714,15 @@ mod tests { assert_eq!(sampler.get_user_priority(&trace, root_idx), Some(0)); // Now set trace-level priority to 2 (simulating last-seen priority from OTLP translator) - trace.set_sampling(Some(TraceSampling::new(false, Some(2), None, None))); + trace.priority = Some(2); // Trace-level priority should take precedence assert_eq!(sampler.get_user_priority(&trace, root_idx), Some(2)); // Test that trace-level priority is used even when no span has priority - let span_no_priority = create_test_span(12345, 3, 0); + let span_no_priority = create_test_span(3, 0); let mut trace_only_trace_level = create_test_trace(vec![span_no_priority]); - trace_only_trace_level.set_sampling(Some(TraceSampling::new(false, Some(1), None, None))); + trace_only_trace_level.priority = Some(1); let root_idx = sampler.get_root_span_index(&trace_only_trace_level).unwrap(); assert_eq!(sampler.get_user_priority(&trace_only_trace_level, root_idx), Some(1)); @@ -671,9 +734,9 @@ mod tests { sampler.probabilistic_sampler_enabled = false; // Use legacy path that checks user priority // Test that manual keep (priority = 2) works via trace-level priority - let span = create_test_span(12345, 1, 0); + let span = create_test_span(1, 0); let mut trace = create_test_trace(vec![span]); - trace.set_sampling(Some(TraceSampling::new(false, Some(PRIORITY_USER_KEEP), None, None))); + trace.priority = Some(PRIORITY_USER_KEEP); let (keep, priority, decision_maker, _) = sampler.run_samplers(&mut trace); assert!(keep); @@ -681,18 +744,18 @@ mod tests { assert_eq!(decision_maker, ""); // Test manual drop (priority = -1) via trace-level priority - let span = create_test_span(12345, 1, 0); + let span = create_test_span(1, 0); let mut trace = create_test_trace(vec![span]); - trace.set_sampling(Some(TraceSampling::new(false, Some(PRIORITY_USER_DROP), None, None))); + trace.priority = Some(PRIORITY_USER_DROP); let (keep, priority, _, _) = sampler.run_samplers(&mut trace); assert!(!keep); // Should not keep when user drops assert_eq!(priority, PRIORITY_USER_DROP); // Test that priority = 1 (auto keep) via trace-level is also respected - let span = create_test_span(12345, 1, 0); + let span = create_test_span(1, 0); let mut trace = create_test_trace(vec![span]); - trace.set_sampling(Some(TraceSampling::new(false, Some(PRIORITY_AUTO_KEEP), None, None))); + trace.priority = Some(PRIORITY_AUTO_KEEP); let (keep, priority, decision_maker, _) = sampler.run_samplers(&mut trace); assert!(keep); @@ -716,12 +779,12 @@ mod tests { let sampler = create_test_sampler(); // Test trace with error field set - let span_with_error = create_test_span(12345, 1, 1); + let span_with_error = create_test_span(1, 1); let trace = create_test_trace(vec![span_with_error]); assert!(sampler.trace_contains_error(&trace, false)); // Test trace without error - let span_without_error = create_test_span(12345, 1, 0); + let span_without_error = create_test_span(1, 0); let trace = create_test_trace(vec![span_without_error]); assert!(!sampler.trace_contains_error(&trace, false)); } @@ -735,7 +798,7 @@ mod tests { // Create trace with error that would be dropped by probabilistic // Using a trace ID that we know will be dropped at 50% rate - let span_with_error = create_test_span(u64::MAX - 1, 1, 1); + let span_with_error = create_test_span(1, 1); let mut trace = create_test_trace(vec![span_with_error]); let (keep, priority, decision_maker, _) = sampler.run_samplers(&mut trace); @@ -749,7 +812,7 @@ mod tests { let mut metrics = HashMap::new(); metrics.insert(SAMPLING_PRIORITY_METRIC_KEY.to_string(), 2.0); - let span = create_test_span_with_metrics(12345, 1, metrics); + let span = create_test_span_with_metrics(1, metrics); let mut trace = create_test_trace(vec![span]); let (keep, priority, decision_maker, _) = sampler.run_samplers(&mut trace); @@ -778,7 +841,6 @@ mod tests { MetaString::from("operation"), MetaString::from("resource"), MetaString::from("type"), - 12345, 1, 0, // parent_id = 0 indicates root 0, @@ -790,7 +852,6 @@ mod tests { MetaString::from("child_op"), MetaString::from("resource"), MetaString::from("type"), - 12345, 2, 1, // parent_id = 1 (points to root) 100, @@ -808,7 +869,6 @@ mod tests { MetaString::from("orphan"), MetaString::from("resource"), MetaString::from("type"), - 12345, 3, 999, // parent_id = 999 (doesn't exist in trace) 200, @@ -820,8 +880,8 @@ mod tests { assert_eq!(trace.spans()[root_idx].span_id(), 3); // Test 3: Multiple root candidates: should return the last one found (index 1) - let span1 = create_test_span(12345, 1, 0); - let span2 = create_test_span(12345, 2, 0); + let span1 = create_test_span(1, 0); + let span2 = create_test_span(2, 0); let trace = create_test_trace(vec![span1, span2]); // Both have parent_id = 0, should return the last one found (span_id = 2) let root_idx = sampler.get_root_span_index(&trace).unwrap(); @@ -839,10 +899,10 @@ mod tests { // Create span with SSS metric let mut metrics_map = saluki_common::collections::FastHashMap::default(); metrics_map.insert(MetaString::from(KEY_SPAN_SAMPLING_MECHANISM), 8.0); // Any value - let sss_span = create_test_span(12345, 1, 0).with_metrics(metrics_map.clone()); + let sss_span = create_test_span(1, 0).with_metrics(metrics_map.clone()); // Create regular span without SSS - let regular_span = create_test_span(12345, 2, 0); + let regular_span = create_test_span(2, 0); let mut trace = create_test_trace(vec![sss_span.clone(), regular_span]); @@ -853,11 +913,11 @@ mod tests { assert_eq!(trace.spans()[0].span_id(), 1); // It's the SSS span // Check that trace has been marked as kept with high priority - assert!(trace.sampling().is_some()); - assert_eq!(trace.sampling().as_ref().unwrap().priority, Some(PRIORITY_USER_KEEP)); + assert!(trace.priority.is_some()); + assert_eq!(trace.priority, Some(PRIORITY_USER_KEEP)); // Test 2: Trace without SSS tags should not be modified - let trace_without_sss = create_test_trace(vec![create_test_span(12345, 3, 0)]); + let trace_without_sss = create_test_trace(vec![create_test_span(3, 0)]); let mut trace_copy = trace_without_sss.clone(); let modified = sampler.single_span_sampling(&mut trace_copy); assert!(!modified); @@ -871,15 +931,15 @@ mod tests { // Test 1: Trace with analyzed spans let mut metrics_map = saluki_common::collections::FastHashMap::default(); metrics_map.insert(MetaString::from(KEY_ANALYZED_SPANS), 1.0); - let analyzed_span = create_test_span(12345, 1, 0).with_metrics(metrics_map.clone()); - let regular_span = create_test_span(12345, 2, 0); + let analyzed_span = create_test_span(1, 0).with_metrics(metrics_map.clone()); + let regular_span = create_test_span(2, 0); let mut trace = create_test_trace(vec![analyzed_span.clone(), regular_span]); let analyzed_span_ids: Vec = trace .spans() .iter() - .filter(|span| span.metrics().contains_key(KEY_ANALYZED_SPANS)) + .filter(|span| span.attributes.get(KEY_ANALYZED_SPANS).and_then(AttributeValue::as_float).is_some()) .map(|span| span.span_id()) .collect(); assert_eq!(analyzed_span_ids, vec![1]); @@ -889,15 +949,15 @@ mod tests { assert!(modified); assert_eq!(trace.spans().len(), 1); assert_eq!(trace.spans()[0].span_id(), 1); - assert!(trace.sampling().is_some()); + assert!(trace.priority.is_some()); // Test 2: Trace without analyzed spans - let trace_no_analytics = create_test_trace(vec![create_test_span(12345, 3, 0)]); + let trace_no_analytics = create_test_trace(vec![create_test_span(3, 0)]); let mut trace_no_analytics_copy = trace_no_analytics.clone(); let analyzed_span_ids: Vec = trace_no_analytics .spans() .iter() - .filter(|span| span.metrics().contains_key(KEY_ANALYZED_SPANS)) + .filter(|span| span.attributes.get(KEY_ANALYZED_SPANS).and_then(AttributeValue::as_float).is_some()) .map(|span| span.span_id()) .collect(); assert!(analyzed_span_ids.is_empty()); @@ -920,7 +980,6 @@ mod tests { MetaString::from("operation"), MetaString::from("resource"), MetaString::from("type"), - trace_id, 1, 0, // parent_id = 0 indicates root 0, @@ -928,6 +987,7 @@ mod tests { 0, ); let mut trace = create_test_trace(vec![root_span]); + trace.trace_id_low = trace_id; let (keep, priority, decision_maker, root_span_idx) = sampler.run_samplers(&mut trace); @@ -939,8 +999,8 @@ mod tests { // Check that the root span already has the probRateKey (it should have been added in run_samplers) let root_idx = root_span_idx.unwrap_or(0); let root_span = &trace.spans()[root_idx]; - assert!(root_span.metrics().contains_key(PROB_RATE_KEY)); - assert_eq!(*root_span.metrics().get(PROB_RATE_KEY).unwrap(), 0.75); + assert!(root_span.attributes.get(PROB_RATE_KEY).and_then(AttributeValue::as_float).is_some()); + assert_eq!(root_span.attributes.get(PROB_RATE_KEY).and_then(AttributeValue::as_float).unwrap(), 0.75); // Test that apply_sampling_metadata still works correctly for other metadata let mut trace_with_metadata = trace.clone(); @@ -948,9 +1008,9 @@ mod tests { // Check that decision maker tag was added let modified_root = &trace_with_metadata.spans()[root_idx]; - assert!(modified_root.meta().contains_key(TAG_DECISION_MAKER)); + assert!(modified_root.attributes.get(TAG_DECISION_MAKER).and_then(AttributeValue::as_string).is_some()); assert_eq!( - modified_root.meta().get(TAG_DECISION_MAKER).unwrap(), + modified_root.attributes.get(TAG_DECISION_MAKER).and_then(AttributeValue::as_string).unwrap(), &MetaString::from(DECISION_MAKER_PROBABILISTIC) ); } @@ -965,10 +1025,10 @@ mod tests { /// /// The rare sampler only considers spans that have `_top_level=1` or `_dd.measured=1`. /// This helper sets `_top_level=1` so that the rare sampler can consider the span. - fn create_top_level_span(trace_id: u64, span_id: u64) -> DdSpan { + fn create_top_level_span(span_id: u64) -> DdSpan { let mut metrics = saluki_common::collections::FastHashMap::default(); metrics.insert(MetaString::from("_top_level"), 1.0); - create_test_span(trace_id, span_id, 0).with_metrics(metrics) + create_test_span(span_id, 0).with_metrics(metrics) } /// Create a `TraceSampler` with the rare sampler enabled and a very high TPS limit so it @@ -989,7 +1049,7 @@ mod tests { sampler.sampling_rate = 0.0; // probabilistic drops everything sampler.probabilistic_sampler_enabled = true; - let span = create_top_level_span(111, 1); + let span = create_top_level_span(1); let mut trace = create_test_trace(vec![span]); let (keep, priority, decision_maker, _) = sampler.run_samplers(&mut trace); @@ -1007,14 +1067,14 @@ mod tests { sampler.sampling_rate = 0.0; sampler.probabilistic_sampler_enabled = true; - let span = create_top_level_span(222, 1); + let span = create_top_level_span(1); let mut trace = create_test_trace(vec![span]); let (keep, _, _, root_idx) = sampler.run_samplers(&mut trace); assert!(keep); let root = &trace.spans()[root_idx.unwrap()]; assert_eq!( - root.metrics().get(rare_sampler::RARE_KEY).copied(), + root.attributes.get(rare_sampler::RARE_KEY).and_then(AttributeValue::as_float), Some(1.0), "_dd.rare should be 1 on first occurrence" ); @@ -1031,14 +1091,14 @@ mod tests { sampler.probabilistic_sampler_enabled = true; // First trace: rare catches it. - let span1 = create_top_level_span(333, 1); + let span1 = create_top_level_span(1); let mut trace1 = create_test_trace(vec![span1]); let (keep1, _, _, _) = sampler.run_samplers(&mut trace1); assert!(keep1, "first occurrence should be kept by rare sampler"); // Second trace: same signature (same service/operation/resource on the top-level span), // still within TTL → rare won't catch it; probabilistic at 0% drops it. - let span2 = create_top_level_span(333, 2); + let span2 = create_top_level_span(2); let mut trace2 = create_test_trace(vec![span2]); let (keep2, priority2, _, _) = sampler.run_samplers(&mut trace2); assert!(!keep2, "second occurrence within TTL should be dropped"); @@ -1054,7 +1114,7 @@ mod tests { sampler.sampling_rate = 0.0; sampler.probabilistic_sampler_enabled = true; - let span = create_top_level_span(444, 1); + let span = create_top_level_span(1); let mut trace = create_test_trace(vec![span]); let (keep, priority, _, _) = sampler.run_samplers(&mut trace); @@ -1075,7 +1135,7 @@ mod tests { MetaString::from(SAMPLING_PRIORITY_METRIC_KEY), PRIORITY_AUTO_DROP as f64, ); - let span = create_test_span(555, 1, 0).with_metrics(metrics); + let span = create_test_span(1, 0).with_metrics(metrics); let mut trace = create_test_trace(vec![span]); let (keep, priority, decision_maker, _) = sampler.run_samplers(&mut trace); @@ -1094,7 +1154,7 @@ mod tests { let mut metrics = saluki_common::collections::FastHashMap::default(); metrics.insert(MetaString::from("_top_level"), 1.0); metrics.insert(MetaString::from(SAMPLING_PRIORITY_METRIC_KEY), 2.0); // UserKeep - let span = create_test_span(556, 1, 0).with_metrics(metrics); + let span = create_test_span(1, 0).with_metrics(metrics); let mut trace = create_test_trace(vec![span]); let (keep, priority, _, _) = sampler.run_samplers(&mut trace); @@ -1109,7 +1169,7 @@ mod tests { sampler.sampling_rate = 1.0; sampler.probabilistic_sampler_enabled = true; - let span = create_top_level_span(666, 1); + let span = create_top_level_span(1); let mut trace = create_test_trace(vec![span]); let (keep, priority, decision_maker, _) = sampler.run_samplers(&mut trace); @@ -1126,7 +1186,7 @@ mod tests { sampler.probabilistic_sampler_enabled = true; sampler.error_sampling_enabled = false; - let span = create_top_level_span(777, 1); + let span = create_top_level_span(1); let mut trace = create_test_trace(vec![span]); let (keep, priority, _, _) = sampler.run_samplers(&mut trace); @@ -1148,7 +1208,7 @@ mod tests { MetaString::from_static(OTEL_TRACE_ID_META_KEY), MetaString::from("00000000000000000000000000000001"), ); - let span = create_top_level_span(888, 1).with_meta(meta); + let span = create_top_level_span(1).with_meta(meta); let mut trace = create_test_trace(vec![span]); let (keep, priority, decision_maker, root_idx) = sampler.run_samplers(&mut trace); @@ -1160,9 +1220,9 @@ mod tests { assert_eq!(decision_maker, ""); assert_eq!( trace.spans()[root_idx.unwrap()] - .metrics() + .attributes .get(rare_sampler::RARE_KEY) - .copied(), + .and_then(AttributeValue::as_float), Some(1.0), "_dd.rare should be set to 1 on first occurrence" ); @@ -1178,7 +1238,7 @@ mod tests { sampler.sampling_rate = 1.0; sampler.probabilistic_sampler_enabled = true; - let span = create_top_level_span(901, 1); + let span = create_top_level_span(1); let mut trace = create_test_trace(vec![span]); let (keep, priority, decision_maker, _) = sampler.run_samplers(&mut trace); @@ -1197,8 +1257,8 @@ mod tests { sampler.probabilistic_sampler_enabled = false; sampler.error_sampling_enabled = true; - let span = create_top_level_span(902, 1); - let error_span = create_test_span(902, 2, 1); // error=1 + let span = create_top_level_span(1); + let error_span = create_test_span(2, 1); // error=1 let mut trace = create_test_trace(vec![span, error_span]); let (keep, priority, decision_maker, _) = sampler.run_samplers(&mut trace); @@ -1219,7 +1279,7 @@ mod tests { let mut metrics = saluki_common::collections::FastHashMap::default(); metrics.insert(MetaString::from("_top_level"), 1.0); metrics.insert(MetaString::from(SAMPLING_PRIORITY_METRIC_KEY), -1.0); // UserDrop - let span = create_test_span(903, 1, 0).with_metrics(metrics); + let span = create_test_span(1, 0).with_metrics(metrics); let mut trace = create_test_trace(vec![span]); let (keep, priority, _, _) = sampler.run_samplers(&mut trace); @@ -1242,7 +1302,7 @@ mod tests { fn ets_keeps_trace_with_error() { let mut sampler = create_sampler_with_ets(); - let span = create_test_span(100, 1, 1); // error=1 + let span = create_test_span(1, 1); // error=1 let mut trace = create_test_trace(vec![span]); let (keep, priority, decision_maker, _) = sampler.run_samplers(&mut trace); @@ -1256,7 +1316,7 @@ mod tests { fn ets_drops_trace_without_error() { let mut sampler = create_sampler_with_ets(); - let span = create_test_span(101, 1, 0); // error=0 + let span = create_test_span(1, 0); // error=0 let mut trace = create_test_trace(vec![span]); let (keep, priority, _, _) = sampler.run_samplers(&mut trace); @@ -1272,13 +1332,13 @@ mod tests { // Span with SSS metric — would trigger single span sampling in non-ETS mode. let mut metrics = saluki_common::collections::FastHashMap::default(); metrics.insert(MetaString::from(KEY_SPAN_SAMPLING_MECHANISM), 8.0); - let span = create_test_span(102, 1, 0).with_metrics(metrics); + let span = create_test_span(1, 0).with_metrics(metrics); let mut trace = create_test_trace(vec![span]); let forwarded = sampler.process_trace(&mut trace); assert!(forwarded, "ETS should forward non-error traces to intake"); assert!( - trace.sampling().is_some_and(|s| s.dropped_trace), + trace.dropped_trace, "non-error ETS trace should have DroppedTrace=true" ); } @@ -1294,7 +1354,7 @@ mod tests { MetaString::from("_dd.span_events.has_exception"), MetaString::from("true"), ); - let span = create_test_span(104, 1, 0).with_meta(meta); + let span = create_test_span(1, 0).with_meta(meta); let mut trace = create_test_trace(vec![span]); let (keep, _, _, _) = sampler.run_samplers(&mut trace); @@ -1308,7 +1368,7 @@ mod tests { sampler.sampling_rate = 1.0; sampler.probabilistic_sampler_enabled = true; - let span = create_test_span(105, 1, 0); // no error + let span = create_test_span(1, 0); // no error let mut trace = create_test_trace(vec![span]); let (keep, _, decision_maker, _) = sampler.run_samplers(&mut trace); @@ -1321,13 +1381,13 @@ mod tests { // priority/dm before runSamplersV1, so ETS sees those values even when it // short-circuits. See: pkg/trace/api/otlp.go#L561-L585. - fn create_otlp_test_span(trace_id: u64, span_id: u64, error: i32) -> DdSpan { + fn create_otlp_test_span(span_id: u64, error: i32) -> DdSpan { let mut meta = saluki_common::collections::FastHashMap::default(); meta.insert( MetaString::from_static(OTEL_TRACE_ID_META_KEY), MetaString::from("0000000000000000deadbeefcafebabe"), ); - create_test_span(trace_id, span_id, error).with_meta(meta) + create_test_span(span_id, error).with_meta(meta) } fn create_sampler_with_ets_legacy() -> TraceSampler { @@ -1345,7 +1405,7 @@ mod tests { fn ets_otlp_non_error_gets_presample_priority_and_dm() { let mut sampler = create_sampler_with_ets_legacy(); - let span = create_otlp_test_span(200, 1, 0); // no error + let span = create_otlp_test_span(1, 0); // no error let mut trace = create_test_trace(vec![span]); let (keep, priority, dm, _) = sampler.run_samplers(&mut trace); @@ -1362,7 +1422,7 @@ mod tests { fn ets_otlp_error_gets_presample_priority_and_dm() { let mut sampler = create_sampler_with_ets_legacy(); - let span = create_otlp_test_span(201, 1, 1); // error=1 + let span = create_otlp_test_span(1, 1); // error=1 let mut trace = create_test_trace(vec![span]); let (keep, priority, dm, _) = sampler.run_samplers(&mut trace); @@ -1378,7 +1438,7 @@ mod tests { let mut sampler = create_sampler_with_ets_legacy(); sampler.probabilistic_sampler_enabled = true; // override to prob path - let span = create_otlp_test_span(202, 1, 0); // no error + let span = create_otlp_test_span(1, 0); // no error let mut trace = create_test_trace(vec![span]); let (keep, priority, dm, _) = sampler.run_samplers(&mut trace); @@ -1395,7 +1455,7 @@ mod tests { fn ets_non_otlp_unaffected_by_presample() { let mut sampler = create_sampler_with_ets_legacy(); - let span = create_test_span(203, 1, 0); // no error, no OTLP meta + let span = create_test_span(1, 0); // no error, no OTLP meta let mut trace = create_test_trace(vec![span]); let (keep, priority, dm, _) = sampler.run_samplers(&mut trace); @@ -1411,7 +1471,7 @@ mod tests { let mut metrics = saluki_common::collections::FastHashMap::default(); metrics.insert(MetaString::from(SAMPLING_PRIORITY_METRIC_KEY), 2.0); // UserKeep - let span = create_otlp_test_span(204, 1, 0).with_metrics(metrics); // no error + let span = create_otlp_test_span(1, 0).with_metrics(metrics); // no error let mut trace = create_test_trace(vec![span]); let (keep, priority, dm, _) = sampler.run_samplers(&mut trace); diff --git a/lib/saluki-components/src/transforms/trace_sampler/priority_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/priority_sampler.rs deleted file mode 100644 index 124c6e2af3b..00000000000 --- a/lib/saluki-components/src/transforms/trace_sampler/priority_sampler.rs +++ /dev/null @@ -1,274 +0,0 @@ -//! Priority sampler for trace sampling based on service rates. -#![allow(dead_code)] -use std::time::SystemTime; - -use saluki_core::data_model::event::trace::Trace; -use stringtheory::MetaString; - -use super::{ - catalog::ServiceKeyCatalog, - core_sampler::Sampler, - score_sampler::weight_root, - signature::{ServiceSignature, Signature}, - PRIORITY_AUTO_DROP, PRIORITY_AUTO_KEEP, PRIORITY_USER_KEEP, -}; -use crate::common::datadog::get_trace_env; - -const DEPRECATED_RATE_KEY: &str = "_sampling_priority_rate_v1"; - -/// Priority sampler for traces with sampling priority set by the tracer. -pub struct PrioritySampler { - agent_env: MetaString, - sampler: Sampler, - catalog: ServiceKeyCatalog, -} -// the logic for this class is taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/sampler/prioritysampler.go#L39 -// note that any logic involving tracers were removed because ADP does not currently support tracers. -impl PrioritySampler { - /// Creates a new priority sampler with the given configuration. - pub(super) fn new(agent_env: MetaString, extra_sample_rate: f64, target_tps: f64) -> Self { - PrioritySampler { - agent_env, - sampler: Sampler::new(extra_sample_rate, target_tps), - catalog: ServiceKeyCatalog::new(), - } - } - - /// Updates the target traces per second. - pub(super) fn update_target_tps(&mut self, target_tps: f64) { - self.sampler.update_target_tps(target_tps); - } - - /// Returns the current target traces per second. - pub(super) fn get_target_tps(&self) -> f64 { - self.sampler.target_tps() - } - - /// Sample a trace that already has a sampling priority set. - /// - /// The decision is based on the priority value; the sampler only updates - /// feedback rates for auto-priority traces. - pub(super) fn sample( - &mut self, now: SystemTime, trace: &mut Trace, root_idx: usize, priority: i32, client_dropped_p0s_weight: f64, - ) -> bool { - if trace.spans().is_empty() || root_idx >= trace.spans().len() { - return false; - } - - let sampled = priority == PRIORITY_AUTO_KEEP || priority == PRIORITY_USER_KEEP; - - // Only auto-priority traces (0 or 1) participate in the feedback loop. - if !(PRIORITY_AUTO_DROP..=PRIORITY_AUTO_KEEP).contains(&priority) { - return sampled; - } - - let (service_name, tracer_env, weight) = { - let root = &trace.spans()[root_idx]; - let tracer_env = get_trace_env(trace, root_idx).map(|env| env.as_ref()).unwrap_or(""); - let weight = weight_root(root) + client_dropped_p0s_weight as f32; - (root.service(), tracer_env, weight) - }; - - let sampler_env = to_sampler_env(tracer_env, &self.agent_env); - let svc_sig = ServiceSignature::new(service_name, sampler_env); - let signature = self.catalog.register(svc_sig); - - let _ = self.sampler.count_weighted_sig(now, &signature, weight); - - if sampled { - self.apply_rate(trace, root_idx, &signature); - } - - sampled - } - - fn apply_rate(&self, trace: &mut Trace, root_idx: usize, signature: &Signature) -> f64 { - let root = &mut trace.spans_mut()[root_idx]; - if root.parent_id() != 0 { - return 1.0; - } - - // ignore the tracer specific logic - - let rate = self.sampler.get_signature_sample_rate(signature); - root.metrics_mut() - .insert(MetaString::from_static(DEPRECATED_RATE_KEY), rate); - rate - } -} - -fn to_sampler_env(tracer_env: &str, agent_env: &MetaString) -> MetaString { - if tracer_env.is_empty() { - agent_env.clone() - } else { - MetaString::from(tracer_env) - } -} - -#[cfg(test)] -mod tests { - // logic for these tests are taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/sampler/prioritysampler_test.go - use std::time::{Duration, SystemTime}; - - use saluki_context::tags::TagSet; - use saluki_core::data_model::event::trace::{Span, Trace}; - use stringtheory::MetaString; - - use super::*; - use crate::transforms::trace_sampler::signature::ServiceSignature; - - const BUCKET_DURATION: Duration = Duration::from_secs(5); - const PRIORITY_USER_DROP: i32 = -1; - - fn get_test_priority_sampler(target_tps: f64) -> PrioritySampler { - PrioritySampler::new(MetaString::from("agent-env"), 1.0, target_tps) - } - - fn get_test_trace_with_service(service: &str, trace_id: u64) -> (Trace, usize) { - let root = Span::new( - MetaString::from(service), - MetaString::from("root-operation"), - MetaString::from("root-resource"), - MetaString::from("web"), - trace_id, - 1, // span_id - 0, // parent_id - 42, // start - 1000000, // duration - 0, // error - ); - - let child = Span::new( - MetaString::from(service), - MetaString::from("child-operation"), - MetaString::from("child-resource"), - MetaString::from("sql"), - trace_id, - 2, // span_id - 1, // parent_id - 100, // start - 200000, // duration - 0, // error - ); - - let trace = Trace::new(vec![root, child], TagSet::default()); - (trace, 0) - } - - #[test] - fn test_priority_sample() { - let test_cases = [ - (PRIORITY_USER_DROP, false), // user drop - (PRIORITY_AUTO_DROP, false), // auto drop - (PRIORITY_AUTO_KEEP, true), // auto keep - (PRIORITY_USER_KEEP, true), // user keep - ]; - - for (idx, (priority, expected_sampled)) in test_cases.iter().copied().enumerate() { - let mut sampler = get_test_priority_sampler(0.0); - let (mut trace, root_idx) = get_test_trace_with_service("service-a", idx as u64 + 1); - let sampled = sampler.sample(SystemTime::now(), &mut trace, root_idx, priority, 0.0); - assert_eq!( - sampled, expected_sampled, - "priority {} should sample={}", - priority, expected_sampled - ); - } - } - - #[test] - fn test_priority_sampler_tps_feedback_loop() { - struct TestCase { - target_tps: f64, - generated_tps: f64, - service: &'static str, - expected_tps: f64, - relative_error: f64, - } - - let test_cases = [ - TestCase { - target_tps: 5.0, - generated_tps: 50.0, - expected_tps: 5.0, - relative_error: 0.25, - service: "bim", - }, - TestCase { - target_tps: 3.0, - generated_tps: 200.0, - expected_tps: 3.0, - relative_error: 0.25, - service: "2", - }, - TestCase { - target_tps: 10.0, - generated_tps: 10.0, - expected_tps: 10.0, - relative_error: 0.03, - service: "4", - }, - TestCase { - target_tps: 10.0, - generated_tps: 3.0, - expected_tps: 3.0, - relative_error: 0.03, - service: "10", - }, - TestCase { - target_tps: 0.5, - generated_tps: 100.0, - expected_tps: 0.5, - relative_error: 0.6, - service: "0.5", - }, - ]; - - for tc in test_cases { - let mut sampler = get_test_priority_sampler(tc.target_tps); - let signature = ServiceSignature::new(tc.service, "agent-env").hash(); - let expected_rate = tc.expected_tps / tc.generated_tps; - - let warm_up_duration = 5; - let test_duration = 20; - let mut test_time = SystemTime::now(); - - let mut sampled_count = 0; - let mut handled_count = 0; - - for time_elapsed in 0..(warm_up_duration + test_duration) { - let traces_per_period = (tc.generated_tps * BUCKET_DURATION.as_secs_f64()) as usize; - test_time += BUCKET_DURATION; - - for i in 0..traces_per_period { - let trace_id = (time_elapsed as u64) << 32 | i as u64; - let (mut trace, root_idx) = get_test_trace_with_service(tc.service, trace_id); - let sampled = sampler.sample(test_time, &mut trace, root_idx, PRIORITY_AUTO_KEEP, 0.0); - - if time_elapsed < warm_up_duration { - continue; - } - - let rate = sampler.sampler.get_signature_sample_rate(&signature); - assert!( - (rate - expected_rate).abs() <= expected_rate * tc.relative_error, - "rate mismatch for service {}: got {}, want {}", - tc.service, - rate, - expected_rate - ); - - handled_count += 1; - if sampled { - sampled_count += 1; - } - } - } - - assert_eq!( - sampled_count, handled_count, - "auto-keep priority should sample every handled trace" - ); - } - } -} diff --git a/lib/saluki-components/src/transforms/trace_sampler/rare_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/rare_sampler.rs index 4952dd3a110..fc9a8cb743c 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/rare_sampler.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/rare_sampler.rs @@ -15,7 +15,7 @@ use std::time::{Duration, Instant}; use saluki_common::{collections::FastHashMap, rate::TokenBucket}; -use saluki_core::data_model::event::trace::{Span, Trace}; +use saluki_core::data_model::event::trace::{AttributeValue, Span, Trace}; use stringtheory::MetaString; use super::signature::{span_hash_for_rare, ServiceSignature, Signature}; @@ -160,7 +160,7 @@ impl RareSampler { // Now safe to mutably borrow trace. if let Some(span) = trace.spans_mut().get_mut(sampled_idx) { - span.metrics_mut().insert(MetaString::from_static(RARE_KEY), 1.0); + span.attributes.insert(MetaString::from_static(RARE_KEY), AttributeValue::Float(1.0)); } true @@ -209,9 +209,9 @@ impl RareSampler { /// Checks `_top_level` (agent-set), `_dd.top_level` (tracer-set), and `_dd.measured`, mirroring /// `HasTopLevel` + `IsMeasured` in the Go agent's `traceutil` package. fn is_top_level_or_measured(span: &Span) -> bool { - span.metrics().get(KEY_TOP_LEVEL).is_some_and(|v| *v == 1.0) - || span.metrics().get(KEY_TRACER_TOP_LEVEL).is_some_and(|v| *v == 1.0) - || span.metrics().get(KEY_MEASURED).is_some_and(|v| *v == 1.0) + span.attributes.get(KEY_TOP_LEVEL).and_then(AttributeValue::as_float).is_some_and(|v| v == 1.0) + || span.attributes.get(KEY_TRACER_TOP_LEVEL).and_then(AttributeValue::as_float).is_some_and(|v| v == 1.0) + || span.attributes.get(KEY_MEASURED).and_then(AttributeValue::as_float).is_some_and(|v| v == 1.0) } #[cfg(test)] @@ -219,8 +219,7 @@ mod tests { use std::time::Duration; use saluki_common::collections::FastHashMap; - use saluki_context::tags::TagSet; - use saluki_core::data_model::event::trace::{Span as DdSpan, Trace}; + use saluki_core::data_model::event::trace::{AttributeValue, Span as DdSpan, Trace}; use stringtheory::MetaString; use super::{RareSampler, KEY_MEASURED, KEY_TOP_LEVEL, KEY_TRACER_TOP_LEVEL, RARE_KEY}; @@ -234,7 +233,6 @@ mod tests { MetaString::from(resource), MetaString::from("web"), 1, - 1, 0, 0, 1000, @@ -260,7 +258,7 @@ mod tests { } fn make_trace(spans: Vec) -> Trace { - Trace::new(spans, TagSet::default()) + Trace::new(spans) } #[test] @@ -272,11 +270,15 @@ mod tests { #[test] fn new_signature_is_kept() { + use saluki_core::data_model::event::trace::AttributeValue; let mut sampler = RareSampler::new(true, 5.0, Duration::from_secs(300), 200); let mut trace = make_trace(vec![make_top_level_span("svc", "op", "res")]); assert!(sampler.sample(&mut trace, 0)); // The rare key should be set on the sampled span. - assert_eq!(trace.spans()[0].metrics().get(RARE_KEY).copied(), Some(1.0)); + assert_eq!( + trace.spans()[0].attributes.get(RARE_KEY).and_then(AttributeValue::as_float), + Some(1.0) + ); } #[test] @@ -411,7 +413,7 @@ mod tests { let mut trace1 = make_trace(vec![make_top_level_span("s1", "op", "r1")]); assert!(sampler.sample(&mut trace1, 0)); - assert_eq!(trace1.spans()[0].metrics().get(RARE_KEY).copied(), Some(1.0)); + assert_eq!(trace1.spans()[0].attributes.get(RARE_KEY).and_then(AttributeValue::as_float), Some(1.0)); let mut trace2 = make_trace(vec![ make_top_level_span("s1", "op", "r1"), @@ -419,12 +421,12 @@ mod tests { ]); assert!(sampler.sample(&mut trace2, 0)); assert_eq!( - trace2.spans()[0].metrics().get(RARE_KEY).copied(), + trace2.spans()[0].attributes.get(RARE_KEY).and_then(AttributeValue::as_float), None, "r1 should not get rare flag" ); assert_eq!( - trace2.spans()[1].metrics().get(RARE_KEY).copied(), + trace2.spans()[1].attributes.get(RARE_KEY).and_then(AttributeValue::as_float), Some(1.0), "r2 should get rare flag" ); diff --git a/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs index a328e9d6f49..7a06dea3711 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs @@ -1,7 +1,7 @@ use std::time::SystemTime; use saluki_common::collections::FastHashMap; -use saluki_core::data_model::event::trace::{Span, Trace}; +use saluki_core::data_model::event::trace::{AttributeValue, Span, Trace}; use stringtheory::MetaString; use super::signature::{compute_signature_with_root_and_env, Signature}; @@ -97,15 +97,15 @@ impl ScoreSampler { let rate = self.sampler.get_signature_sample_rate(&signature); // Apply the sampling decision + let trace_id_low = trace.trace_id_low; let root = &mut trace.spans_mut()[root_idx]; - self.apply_sample_rate(root, rate) + self.apply_sample_rate(root, rate, trace_id_low) } /// Apply the sampling rate to determine if the trace should be kept. - fn apply_sample_rate(&self, root: &mut Span, rate: f64) -> bool { + fn apply_sample_rate(&self, root: &mut Span, rate: f64, trace_id: u64) -> bool { let initial_rate = get_global_rate(root); let new_rate = initial_rate * rate; - let trace_id = root.trace_id(); let sampled = sample_by_rate(trace_id, new_rate); if sampled { @@ -146,8 +146,8 @@ impl ScoreSampler { /// Set the sampling rate metric on a span. pub fn set_sampling_rate_metric(&self, span: &mut Span, rate: f64) { - span.metrics_mut() - .insert(MetaString::from(self.sampling_rate_key), rate); + span.attributes + .insert(MetaString::from(self.sampling_rate_key), AttributeValue::Float(rate)); } } @@ -169,16 +169,16 @@ impl ScoreSampler { /// Calculate the weight from the span's global rate and presampler rate. pub(super) fn weight_root(span: &Span) -> f32 { let client_rate = span - .metrics() + .attributes .get(KEY_SAMPLING_RATE_GLOBAL) - .copied() + .and_then(AttributeValue::as_float) .filter(|&r| r > 0.0 && r <= 1.0) .unwrap_or(1.0); let pre_sampler_rate = span - .metrics() + .attributes .get(KEY_SAMPLING_RATE_PRE_SAMPLER) - .copied() + .and_then(AttributeValue::as_float) .filter(|&r| r > 0.0 && r <= 1.0) .unwrap_or(1.0); @@ -187,5 +187,44 @@ pub(super) fn weight_root(span: &Span) -> f32 { /// Get the cumulative sample rate of the trace to which this span belongs. fn get_global_rate(span: &Span) -> f64 { - span.metrics().get(KEY_SAMPLING_RATE_GLOBAL).copied().unwrap_or(1.0) + span.attributes.get(KEY_SAMPLING_RATE_GLOBAL).and_then(AttributeValue::as_float).unwrap_or(1.0) +} + +#[cfg(test)] +mod tests { + use saluki_core::data_model::event::trace::{AttributeValue, Span}; + use stringtheory::MetaString; + + use super::*; + + fn make_span() -> Span { + Span::new("svc", "op", "res", "web", 1, 0, 0, 1000, 0) + } + + #[test] + fn weight_root_defaults_to_one() { + assert_eq!(weight_root(&make_span()), 1.0f32); + } + + #[test] + fn weight_root_divides_by_client_rate() { + let mut span = make_span(); + span.attributes.insert(MetaString::from(KEY_SAMPLING_RATE_GLOBAL), AttributeValue::Float(0.5)); + assert_eq!(weight_root(&span), 2.0f32); + } + + #[test] + fn weight_root_uses_both_rates() { + let mut span = make_span(); + span.attributes.insert(MetaString::from(KEY_SAMPLING_RATE_GLOBAL), AttributeValue::Float(0.5)); + span.attributes.insert(MetaString::from(KEY_SAMPLING_RATE_PRE_SAMPLER), AttributeValue::Float(0.5)); + assert_eq!(weight_root(&span), 4.0f32); + } + + #[test] + fn weight_root_ignores_out_of_range_rates() { + let mut span = make_span(); + span.attributes.insert(MetaString::from(KEY_SAMPLING_RATE_GLOBAL), AttributeValue::Float(2.0)); + assert_eq!(weight_root(&span), 1.0f32); + } } diff --git a/lib/saluki-components/src/transforms/trace_sampler/signature.rs b/lib/saluki-components/src/transforms/trace_sampler/signature.rs index a64eec9466d..b15b0f86c51 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/signature.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/signature.rs @@ -4,7 +4,7 @@ //! - a small FNV-1a 32-bit helper (used by probabilistic sampling) //! - a signature newtype + compute helper (for score/TPS samplers) -use saluki_core::data_model::event::trace::{Span, Trace}; +use saluki_core::data_model::event::trace::{AttributeValue, Span, Trace}; use stringtheory::MetaString; use crate::common::datadog::get_trace_env; @@ -22,37 +22,47 @@ fn write_hash(mut hash: u32, bytes: &[u8]) -> u32 { hash } -pub(super) fn fnv1a_32(seed: &[u8], bytes: &[u8]) -> u32 { +pub(crate) fn fnv1a_32(seed: &[u8], bytes: &[u8]) -> u32 { let hash = write_hash(OFFSET_32, seed); write_hash(hash, bytes) } #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] -pub(super) struct Signature(pub(super) u64); +pub(crate) struct Signature(pub(super) u64); /// Service identifier for sampling rate lookups. /// /// Represents a unique (service name, environment) pair used as a key /// for storing and retrieving sampling rates in distributed sampling. #[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] -pub(super) struct ServiceSignature { +pub(crate) struct ServiceSignature { name: MetaString, env: MetaString, } impl ServiceSignature { /// Creates a new ServiceSignature from name and environment. - pub(super) fn new(name: impl Into, env: impl Into) -> Self { + pub(crate) fn new(name: impl Into, env: impl Into) -> Self { Self { name: name.into(), env: env.into(), } } + /// Returns the service name. + pub(crate) fn name(&self) -> &str { + self.name.as_ref() + } + + /// Returns the environment. + pub(crate) fn env(&self) -> &str { + self.env.as_ref() + } + /// Computes FNV-1a hash matching Go's ServiceSignature.Hash(). /// /// The hash is computed over: `name + "," + env` - pub(super) fn hash(&self) -> Signature { + pub(crate) fn hash(&self) -> Signature { let mut h = OFFSET_32; h = write_hash(h, self.name.as_ref().as_bytes()); h = write_hash(h, b","); @@ -114,10 +124,10 @@ pub(super) fn compute_span_hash(span: &Span, env: &str, with_resource: bool) -> if with_resource { h = write_hash(h, span.resource().as_bytes()); } - if let Some(code) = span.meta().get(KEY_HTTP_STATUS_CODE) { + if let Some(code) = span.attributes.get(KEY_HTTP_STATUS_CODE).and_then(AttributeValue::as_string) { h = write_hash(h, code.as_ref().as_bytes()); } - if let Some(typ) = span.meta().get(KEY_ERROR_TYPE) { + if let Some(typ) = span.attributes.get(KEY_ERROR_TYPE).and_then(AttributeValue::as_string) { h = write_hash(h, typ.as_ref().as_bytes()); } h diff --git a/lib/saluki-components/src/transforms/trace_sampler/v1.rs b/lib/saluki-components/src/transforms/trace_sampler/v1.rs new file mode 100644 index 00000000000..42e4ab656b6 --- /dev/null +++ b/lib/saluki-components/src/transforms/trace_sampler/v1.rs @@ -0,0 +1,391 @@ +//! V1 trace sampling implementation. +//! +//! Implements `runSamplersV1` from `pkg/trace/agent/agent.go`: reads the tracer-set +//! sampling priority from each chunk, runs the appropriate sampler(s), and writes the +//! final decision back to `chunk.priority` / `chunk.dropped_trace` in place. +//! +//! Unlike the OTLP-path `TraceSampler`, the V1 path carries sampling decisions +//! pre-made by the tracer; the agent's role is to: +//! 1. Respect and count those decisions for the rate-feedback loop. +//! 2. Override `PriorityAutoDrop` traces when the rare sampler or error sampler fires. +//! 3. Propagate per-service rates back to tracers via the `ApmReceiver` HTTP response. + +use saluki_common::rate::TokenBucket; +use saluki_core::{ + data_model::event::{trace::Trace, Event}, + topology::EventsBuffer, +}; +use saluki_core::components::transforms::SynchronousTransform; +use std::time::SystemTime; +use tracing::debug; + +use super::v1_no_priority::V1NoPrioritySampler; +use super::v1_priority::PrioritySampler; +use super::rare_sampler::RareSampler; + +/// Sentinel indicating the tracer set no priority (matches Go's `PriorityNone = math.MinInt8`). +pub(super) const PRIORITY_NONE: i32 = i8::MIN as i32; + +pub(super) const PRIORITY_AUTO_KEEP: i32 = 1; +pub(super) const ERROR_SAMPLER_BURST: usize = 100; + +pub(super) struct V1TraceSamplerImpl { + pub(super) priority_sampler: PrioritySampler, + pub(super) no_priority_sampler: V1NoPrioritySampler, + pub(super) rare_sampler: RareSampler, + pub(super) error_token_bucket: Option, + pub(super) error_sampling_enabled: bool, + pub(super) error_tracking_standalone: bool, +} + +impl V1TraceSamplerImpl { + /// Implements `runSamplersV1` / `traceSamplingV1` from the Go Trace Agent. + /// + /// Returns `true` if the trace should be forwarded, `false` if it should be + /// removed from the buffer entirely. In ETS mode the trace is always forwarded + /// (with `dropped_trace` set to reflect whether it was a kept or dropped trace). + pub(super) fn process_trace( + &mut self, + now: SystemTime, + trace: &mut Trace, + tracer_env: &str, + client_dropped_p0s_weight: f64, + ) -> bool { + if trace.spans().is_empty() { + return false; + } + + // ── Error Tracking Standalone (ETS) ──────────────────────────────────── + if self.error_tracking_standalone { + let has_error = trace.spans().iter().any(|s| s.error() != 0); + let keep = has_error + && self + .error_token_bucket + .as_mut() + .map(|b| b.allow()) + .unwrap_or(true); + trace.dropped_trace = !keep; + return true; + } + + // ── Rare sampler runs unconditionally before any keep/drop decision ───── + let root_idx = find_root_span_idx(trace.spans()); + let rare = self.rare_sampler.sample(trace, root_idx); + + // ── Manual/user drop: hard drop, no overrides possible ───────────────── + // Only hard-drop when the tracer explicitly set a negative priority. + // A missing priority (trace.priority == None, wire sentinel MinInt8) is NOT a user + // drop — it must reach the no-priority path below. + // TODO: implement the full isManualUserDropV1 check from the Go agent. + if matches!(trace.priority, Some(p) if p < 0) { + trace.dropped_trace = true; + return false; + } + + // ── Rare sampler override ─────────────────────────────────────────────── + if rare { + trace.priority = Some(PRIORITY_AUTO_KEEP); + trace.dropped_trace = false; + debug!(trace_id_low = trace.trace_id_low, "Keeping V1 trace chunk: rare sampler override."); + return true; + } + + // ── Priority / NoPriority path ────────────────────────────────────────── + let has_priority = trace.priority.is_some(); + // Unwrap to 0 (auto-drop) for the no-priority branch; the value is unused there. + let priority = trace.priority.unwrap_or(0); + + let keep = if has_priority { + let spans = trace.spans_mut(); + let root = &mut spans[root_idx]; + self.priority_sampler.sample(now, priority, root, tracer_env, client_dropped_p0s_weight) + } else { + self.no_priority_sampler.sample() + }; + + if keep { + // Normalize PRIORITY_NONE so the encoder never writes an undefined priority. + if trace.priority.is_none() { + trace.priority = Some(PRIORITY_AUTO_KEEP); + } + trace.dropped_trace = false; + debug!( + trace_id_low = trace.trace_id_low, + priority = trace.priority, + has_priority, + "Keeping V1 trace chunk: priority/no-priority sampler." + ); + return true; + } + + // ── Error sampler as final override ──────────────────────────────────── + if self.error_sampling_enabled && trace.spans().iter().any(|s| s.error() != 0) { + if let Some(ref mut bucket) = self.error_token_bucket { + if bucket.allow() { + trace.priority = Some(PRIORITY_AUTO_KEEP); + trace.dropped_trace = false; + debug!(trace_id_low = trace.trace_id_low, "Keeping V1 trace chunk: error sampler override."); + return true; + } + } + } + + // Normalize PRIORITY_NONE on the drop path too. + if trace.priority.is_none() { + trace.priority = Some(0); // PRIORITY_AUTO_DROP + } + debug!( + trace_id_low = trace.trace_id_low, + priority = trace.priority, + "Dropping V1 trace chunk." + ); + false + } +} + +impl SynchronousTransform for V1TraceSamplerImpl { + fn transform_buffer(&mut self, buffer: &mut EventsBuffer) { + let now = SystemTime::now(); + let mut kept = 0u32; + let mut dropped = 0u32; + buffer.remove_if(|event| match event { + Event::Trace(trace) => { + let tracer_env = trace.env.clone(); + let weight = trace.client_dropped_p0s_weight; + let remove = !self.process_trace(now, trace, tracer_env.as_ref(), weight); + if remove { + dropped += 1; + } else { + kept += 1; + } + remove + } + _ => false, + }); + if kept + dropped > 0 { + debug!(kept, dropped, "V1 trace sampler processed buffer."); + } + } +} + +/// Find the index of the root span (parent_id == 0). Falls back to the last span. +pub(super) fn find_root_span_idx(spans: &[saluki_core::data_model::event::trace::Span]) -> usize { + let len = spans.len(); + + // Fast path: scan from the end (tracers often report root last). + for i in (0..len).rev() { + if spans[i].parent_id() == 0 { + return i; + } + } + + // Build parent→child map and remove entries whose parent exists in the trace. + let mut parent_to_child: std::collections::HashMap = spans + .iter() + .enumerate() + .map(|(i, s)| (s.parent_id(), i)) + .collect(); + for span in spans { + parent_to_child.remove(&span.span_id()); + } + if let Some((&_, &idx)) = parent_to_child.iter().next() { + return idx; + } + + len - 1 +} + +#[cfg(test)] +mod tests { + use saluki_core::data_model::event::trace::Trace; + use saluki_common::rate::TokenBucket; + use stringtheory::MetaString; + use std::time::{Duration, SystemTime}; + + use super::*; + use crate::sources::apm::sampling_rates::V1SamplingRatesHandle; + + fn make_sampler() -> V1TraceSamplerImpl { + V1TraceSamplerImpl { + priority_sampler: PrioritySampler::new( + MetaString::from_static("prod"), + 10.0, + 1.0, + V1SamplingRatesHandle::new(), + ), + no_priority_sampler: V1NoPrioritySampler::new(10.0), + rare_sampler: RareSampler::new(false, 5.0, Duration::from_secs(300), 200), + error_token_bucket: Some(TokenBucket::new(10.0, 100)), + error_sampling_enabled: true, + error_tracking_standalone: false, + } + } + + fn make_span(parent_id: u64, error: bool) -> saluki_core::data_model::event::trace::Span { + saluki_core::data_model::event::trace::Span::new( + "svc", "op", "res", "web", 1, parent_id, 0, 1000, if error { 1 } else { 0 }, + ) + } + + fn make_top_level_span(parent_id: u64, error: bool) -> saluki_core::data_model::event::trace::Span { + use saluki_core::data_model::event::trace::AttributeValue; + let mut span = make_span(parent_id, error); + span.attributes.insert(MetaString::from("_top_level"), AttributeValue::Float(1.0)); + span + } + + fn make_trace(priority: i32, spans: Vec) -> Trace { + let mut trace = Trace::new(spans); + if priority == PRIORITY_NONE { + trace.priority = None; + } else { + trace.priority = Some(priority); + } + trace + } + + fn process(sampler: &mut V1TraceSamplerImpl, trace: &mut Trace) -> bool { + sampler.process_trace(SystemTime::now(), trace, "prod", 0.0) + } + + // ── Basic keep/drop ───────────────────────────────────────────────────── + + #[test] + fn empty_chunk_is_dropped() { + let mut s = make_sampler(); + let mut trace = make_trace(0, vec![]); + assert!(!process(&mut s, &mut trace)); + } + + #[test] + fn user_drop_is_hard_dropped() { + let mut s = make_sampler(); + let mut trace = make_trace(-1, vec![make_span(0, false)]); + assert!(!process(&mut s, &mut trace)); + assert!(trace.dropped_trace); + } + + #[test] + fn auto_keep_is_forwarded() { + let mut s = make_sampler(); + let mut trace = make_trace(1, vec![make_span(0, false)]); + assert!(process(&mut s, &mut trace)); + assert!(!trace.dropped_trace); + } + + #[test] + fn user_keep_is_forwarded() { + let mut s = make_sampler(); + let mut trace = make_trace(2, vec![make_span(0, false)]); + assert!(process(&mut s, &mut trace)); + assert!(!trace.dropped_trace); + } + + #[test] + fn auto_drop_with_error_is_kept_by_error_sampler() { + let mut s = make_sampler(); + let mut trace = make_trace(0, vec![make_span(0, true)]); + assert!(process(&mut s, &mut trace)); + assert_eq!(trace.priority, Some(PRIORITY_AUTO_KEEP)); + assert!(!trace.dropped_trace); + } + + #[test] + fn auto_drop_without_error_no_rare_is_dropped() { + let mut s = V1TraceSamplerImpl { + error_token_bucket: None, + error_sampling_enabled: false, + ..make_sampler() + }; + let mut trace = make_trace(0, vec![make_span(0, false)]); + assert!(!process(&mut s, &mut trace)); + } + + // ── Rare sampler ──────────────────────────────────────────────────────── + + #[test] + fn rare_sampler_overrides_auto_drop_first_occurrence() { + let mut s = V1TraceSamplerImpl { + rare_sampler: RareSampler::new(true, 1000.0, Duration::from_secs(300), 200), + error_token_bucket: None, + error_sampling_enabled: false, + ..make_sampler() + }; + let mut trace = make_trace(0, vec![make_top_level_span(0, false)]); + assert!(process(&mut s, &mut trace)); + assert_eq!(trace.priority, Some(PRIORITY_AUTO_KEEP)); + } + + #[test] + fn rare_sampler_runs_before_drop_decision() { + let mut s = V1TraceSamplerImpl { + rare_sampler: RareSampler::new(true, 1000.0, Duration::from_secs(300), 200), + error_token_bucket: None, + error_sampling_enabled: false, + ..make_sampler() + }; + let mut trace = make_trace(0, vec![make_top_level_span(0, false)]); + assert!(process(&mut s, &mut trace), "rare should keep first occurrence"); + + let mut trace2 = make_trace(0, vec![make_top_level_span(0, false)]); + assert!(!process(&mut s, &mut trace2), "rare should not repeat-sample within TTL"); + } + + // ── PriorityNone path ─────────────────────────────────────────────────── + + // A trace with no tracer-set priority (trace.priority == None, wire value MinInt8) + // must be routed to V1NoPrioritySampler, not hard-dropped as a user-drop. + // When the no-priority sampler has budget, the trace should be kept. + #[test] + fn priority_none_is_routed_to_no_priority_sampler_not_hard_dropped() { + let mut s = V1TraceSamplerImpl { + // target_tps=0 ensures the priority sampler would drop everything — if a + // no-priority trace were incorrectly routed here it would still be dropped, + // making the test a clean signal for which path was taken. + priority_sampler: PrioritySampler::new( + MetaString::from_static("prod"), + 0.0, + 1.0, + V1SamplingRatesHandle::new(), + ), + // High TPS budget: the no-priority sampler keeps all traces within the burst window. + no_priority_sampler: V1NoPrioritySampler::new(10000.0), + rare_sampler: RareSampler::new(false, 5.0, Duration::from_secs(300), 200), + error_token_bucket: None, + error_sampling_enabled: false, + error_tracking_standalone: false, + }; + + let mut trace = make_trace(PRIORITY_NONE, vec![make_span(0, false)]); + let kept = process(&mut s, &mut trace); + + assert!(kept, "no-priority trace must be kept when no-priority sampler has budget"); + assert!(!trace.dropped_trace, "dropped_trace must be false for a kept no-priority trace"); + } + + // ── ETS mode ──────────────────────────────────────────────────────────── + + #[test] + fn ets_keeps_error_trace() { + let mut s = V1TraceSamplerImpl { + error_tracking_standalone: true, + error_token_bucket: Some(TokenBucket::new(10.0, 100)), + ..make_sampler() + }; + let mut trace = make_trace(0, vec![make_span(0, true)]); + assert!(process(&mut s, &mut trace)); + assert!(!trace.dropped_trace); + } + + #[test] + fn ets_drops_non_error_trace_but_forwards_it() { + let mut s = V1TraceSamplerImpl { + error_tracking_standalone: true, + error_token_bucket: Some(TokenBucket::new(10.0, 100)), + ..make_sampler() + }; + let mut trace = make_trace(1, vec![make_span(0, false)]); + assert!(process(&mut s, &mut trace)); + assert!(trace.dropped_trace, "non-error ETS trace must have dropped_trace=true"); + } +} diff --git a/lib/saluki-components/src/transforms/trace_sampler/v1_no_priority.rs b/lib/saluki-components/src/transforms/trace_sampler/v1_no_priority.rs new file mode 100644 index 00000000000..93f2204bb2e --- /dev/null +++ b/lib/saluki-components/src/transforms/trace_sampler/v1_no_priority.rs @@ -0,0 +1,32 @@ +//! V1 no-priority sampler. +//! +//! Used for V1 trace chunks where the tracer did not set a sampling priority +//! (the sentinel value `i8::MIN as i32 = -128`). This situation is uncommon — +//! modern DD tracers always send a priority — so a simple token-bucket is +//! sufficient for the initial implementation. +//! +//! TODO: Replace with a full score-sampler integration (weighted signature +//! counting + per-signature rate computation) matching `NoPrioritySampler.SampleV1` +//! from `pkg/trace/sampler/scoresampler.go`. + +use saluki_common::rate::TokenBucket; + +const NO_PRIORITY_BURST: usize = 100; + +/// Token-bucket sampler for V1 chunks without a tracer-set priority. +pub(super) struct V1NoPrioritySampler { + bucket: TokenBucket, +} + +impl V1NoPrioritySampler { + pub(super) fn new(target_tps: f64) -> Self { + Self { + bucket: TokenBucket::new(target_tps, NO_PRIORITY_BURST), + } + } + + /// Returns `true` if the chunk should be kept. + pub(super) fn sample(&mut self) -> bool { + self.bucket.allow() + } +} diff --git a/lib/saluki-components/src/transforms/trace_sampler/v1_priority.rs b/lib/saluki-components/src/transforms/trace_sampler/v1_priority.rs new file mode 100644 index 00000000000..2e7006cacca --- /dev/null +++ b/lib/saluki-components/src/transforms/trace_sampler/v1_priority.rs @@ -0,0 +1,277 @@ +//! Priority sampler with per-service rate propagation. +//! +//! Mirrors `PrioritySampler.SampleV1` + `countSignatureV1` + `applyRateV1` + `updateRates` +//! from `pkg/trace/sampler/prioritysampler.go`. +//! +//! Responsibilities: +//! - Count auto-priority (0/1) traces toward per-service rate computation. +//! - Short-circuit for user-set priorities (< 0 or > 1) without counting. +//! - Write the computed agent rate to the root span attribute when a trace is kept. +//! - Push updated per-service rates to the shared [`V1SamplingRatesHandle`] after each +//! sliding-window advance. + +use std::time::SystemTime; + +use saluki_core::data_model::event::trace::{AttributeValue, Span}; +use stringtheory::MetaString; + +use crate::sources::apm::sampling_rates::V1SamplingRatesHandle; +use crate::transforms::trace_sampler::catalog::ServiceKeyCatalog; +use crate::transforms::trace_sampler::core_sampler::Sampler; +use crate::transforms::trace_sampler::signature::{ServiceSignature, Signature}; +use super::score_sampler::weight_root; + +// Root-span attribute keys (matching Go agent sampler constants). +const KEY_AGENT_PSR: &str = "_dd.agent_psr"; +const KEY_RULE_PSR: &str = "_dd.rule_psr"; +const KEY_DEPRECATED_RATE: &str = "_sampling_priority_rate_v1"; + +/// Priority sampler. +/// +/// Counts auto-priority traces toward a TPS-based rate computation and propagates +/// the resulting per-service rates to tracers via the HTTP response. +pub(super) struct PrioritySampler { + agent_env: MetaString, + core_sampler: Sampler, + catalog: ServiceKeyCatalog, + rates: V1SamplingRatesHandle, +} + +impl PrioritySampler { + pub(super) fn new( + agent_env: MetaString, + target_tps: f64, + extra_rate: f64, + rates: V1SamplingRatesHandle, + ) -> Self { + Self { + agent_env, + core_sampler: Sampler::new(extra_rate, target_tps), + catalog: ServiceKeyCatalog::new(), + rates, + } + } + + /// Evaluate the chunk against the priority sampler. + /// + /// Returns `true` if the chunk should be kept (priority > 0). + /// + /// Only auto-priorities (0 and 1) are counted toward the rate computation. + /// User-set priorities (< 0 or > 1) short-circuit without affecting rates. + pub(super) fn sample( + &mut self, + now: SystemTime, + priority: i32, + root: &mut Span, + tracer_env: &str, + client_dropped_p0s_weight: f64, + ) -> bool { + + // Short-circuit: don't count user-explicit decisions. + if priority < 0 || priority > 1 { + return priority > 0; + } + + let effective_env = if tracer_env.is_empty() { + self.agent_env.as_ref() + } else { + tracer_env + }; + + let svc_sig = ServiceSignature::new(root.service(), effective_env); + let signature = self.catalog.register(svc_sig); + + let weight = weight_root(root) + client_dropped_p0s_weight as f32; + let new_rates = self.core_sampler.count_weighted_sig(now, &signature, weight); + if new_rates { + self.update_rates(); + } + + let sampled = priority > 0; + if sampled { + apply_rate(root, &signature, &self.core_sampler); + } + sampled + } + + fn update_rates(&mut self) { + let (rates_map, default_rate) = self.core_sampler.get_all_signature_sample_rates(); + let new_rates = self.catalog.rates_by_service(self.agent_env.as_ref(), &rates_map, default_rate); + self.rates.set_all(new_rates); + } +} + +/// Write the agent-computed sampling rate to the root span. +/// +/// Mirrors `applyRateV1` from `pkg/trace/sampler/prioritysampler.go`. +/// Does nothing if the tracer already annotated the root with a rate. +fn apply_rate(root: &mut Span, signature: &Signature, core_sampler: &Sampler) { + if root.parent_id() != 0 { + return; + } + if root.attributes.get(KEY_AGENT_PSR).and_then(AttributeValue::as_float).is_some() { + return; + } + if root.attributes.get(KEY_RULE_PSR).and_then(AttributeValue::as_float).is_some() { + return; + } + if root.attributes.get(KEY_DEPRECATED_RATE).and_then(AttributeValue::as_float).is_some() { + return; + } + let rate = core_sampler.get_signature_sample_rate(signature); + root.attributes.insert(MetaString::from(KEY_DEPRECATED_RATE), AttributeValue::Float(rate)); +} + + +#[cfg(test)] +mod tests { + use std::time::SystemTime; + + use saluki_common::collections::FastHashMap; + use saluki_core::data_model::event::trace::{AttributeValue, Span}; + use stringtheory::MetaString; + + use super::*; + use crate::sources::apm::sampling_rates::V1SamplingRatesHandle; + use crate::transforms::trace_sampler::signature::ServiceSignature; + + fn make_sampler() -> PrioritySampler { + PrioritySampler::new( + MetaString::from_static("prod"), + 10.0, + 1.0, + V1SamplingRatesHandle::new(), + ) + } + + fn make_span(parent_id: u64) -> Span { + Span::new("svc", "op", "res", "web", 1, parent_id, 0, 1000, 0) + } + + // ── Short-circuit tests ───────────────────────────────────────────────── + + #[test] + fn user_drop_short_circuits_without_counting() { + let mut sampler = make_sampler(); + let mut root = make_span(0); + let now = SystemTime::now(); + + assert!(!sampler.sample(now, -1, &mut root, "prod", 0.0)); + assert_eq!( + sampler.catalog.rates_by_service("prod", &FastHashMap::default(), 1.0).len(), + 1, + "only default rate key; no service registered" + ); + } + + #[test] + fn user_keep_short_circuits_returns_true() { + let mut sampler = make_sampler(); + let mut root = make_span(0); + let now = SystemTime::now(); + + assert!(sampler.sample(now, 2, &mut root, "prod", 0.0)); + assert_eq!( + sampler.catalog.rates_by_service("prod", &FastHashMap::default(), 1.0).len(), + 1, + "only default rate key; no service registered" + ); + } + + // ── Counting tests ────────────────────────────────────────────────────── + + #[test] + fn auto_keep_priority_returns_true() { + let mut sampler = make_sampler(); + let mut root = make_span(0); + assert!(sampler.sample(SystemTime::now(), 1, &mut root, "prod", 0.0)); + } + + #[test] + fn auto_drop_priority_returns_false() { + let mut sampler = make_sampler(); + let mut root = make_span(0); + assert!(!sampler.sample(SystemTime::now(), 0, &mut root, "prod", 0.0)); + } + + // ── apply_rate tests ──────────────────────────────────────────────────── + + #[test] + fn kept_trace_gets_rate_written_to_root_span() { + let mut sampler = make_sampler(); + let mut root = make_span(0); + sampler.sample(SystemTime::now(), 1, &mut root, "prod", 0.0); + assert!( + root.attributes.get(KEY_DEPRECATED_RATE).and_then(AttributeValue::as_float).is_some(), + "rate metric should be written to kept root span" + ); + } + + #[test] + fn dropped_trace_does_not_get_rate_written() { + let mut sampler = make_sampler(); + let mut root = make_span(0); + sampler.sample(SystemTime::now(), 0, &mut root, "prod", 0.0); + assert!( + root.attributes.get(KEY_DEPRECATED_RATE).and_then(AttributeValue::as_float).is_none(), + "rate metric should not be written for dropped trace" + ); + } + + #[test] + fn existing_agent_psr_is_not_overwritten() { + let mut sampler = make_sampler(); + let mut root = make_span(0); + root.attributes.insert(MetaString::from(KEY_AGENT_PSR), AttributeValue::Float(0.25)); + + sampler.sample(SystemTime::now(), 1, &mut root, "prod", 0.0); + + assert_eq!( + root.attributes.get(KEY_AGENT_PSR).and_then(AttributeValue::as_float), + Some(0.25), + "existing _dd.agent_psr must not be overwritten" + ); + } + + #[test] + fn non_root_span_does_not_get_rate() { + let mut sampler = make_sampler(); + let mut non_root = make_span(99); // parent_id != 0 + + sampler.sample(SystemTime::now(), 1, &mut non_root, "prod", 0.0); + + let has_rate = [KEY_DEPRECATED_RATE, KEY_AGENT_PSR, KEY_RULE_PSR] + .iter() + .any(|k| non_root.attributes.get(*k).and_then(AttributeValue::as_float).is_some()); + assert!(!has_rate, "rate must not be written for non-root spans"); + } + + // ── effective_env test ───────────────────────────────────────────────── + + #[test] + fn empty_tracer_env_falls_back_to_agent_env() { + // Two samplers: one with agent_env="staging", one with agent_env="prod". + // With an empty tracer_env, the agent_env is used, so the two samplers + // produce different signatures for the same service. + let mut sampler_staging = PrioritySampler::new( + MetaString::from_static("staging"), + 10.0, + 1.0, + V1SamplingRatesHandle::new(), + ); + let mut sampler_prod = PrioritySampler::new( + MetaString::from_static("prod"), + 10.0, + 1.0, + V1SamplingRatesHandle::new(), + ); + let mut root = make_span(0); + // Both samplers with empty tracer_env and priority=1 should keep. + assert!(sampler_staging.sample(SystemTime::now(), 1, &mut root, "", 0.0)); + assert!(sampler_prod.sample(SystemTime::now(), 1, &mut root, "", 0.0)); + // Verify different signatures are registered by comparing the catalog entries. + let sig_staging = ServiceSignature::new("svc", "staging").hash(); + let sig_prod = ServiceSignature::new("svc", "prod").hash(); + assert_ne!(sig_staging, sig_prod, "different envs must produce different signatures"); + } +} diff --git a/lib/saluki-core/src/data_model/event/mod.rs b/lib/saluki-core/src/data_model/event/mod.rs index cb4ccca67c7..b6274d3aeda 100644 --- a/lib/saluki-core/src/data_model/event/mod.rs +++ b/lib/saluki-core/src/data_model/event/mod.rs @@ -226,6 +226,7 @@ impl Event { pub fn is_trace(&self) -> bool { matches!(self, Event::Trace(_)) } + } #[cfg(test)] diff --git a/lib/saluki-core/src/data_model/event/trace/mod.rs b/lib/saluki-core/src/data_model/event/trace/mod.rs index 9647a8528d1..98e7897005b 100644 --- a/lib/saluki-core/src/data_model/event/trace/mod.rs +++ b/lib/saluki-core/src/data_model/event/trace/mod.rs @@ -1,52 +1,55 @@ //! Traces. use saluki_common::collections::FastHashMap; -use saluki_context::tags::TagSet; use stringtheory::MetaString; -/// Trace-level sampling metadata. +/// Typed value for attributes at every level of the trace model: span attributes, +/// span event attributes, span link attributes, and trace-level attributes. /// -/// This struct stores sampling-related metadata that applies to the entire trace, -/// typically set by the trace sampler and consumed by the encoder. +/// Covers all variants carried by the V1 APM idx wire format (`RawAnyValue`). #[derive(Clone, Debug, PartialEq)] -pub struct TraceSampling { - /// Whether or not the trace was dropped during sampling. - pub dropped_trace: bool, - - /// The sampling priority assigned to this trace. - /// - /// Common values include: - /// - `2`: Manual keep (user-requested) - /// - `1`: Auto keep (sampled in) - /// - `0`: Auto drop (sampled out) - /// - `-1`: Manual drop (user-requested drop) - pub priority: Option, +pub enum AttributeValue { + /// String-valued attribute. + String(MetaString), + /// Boolean attribute. + Bool(bool), + /// Integer attribute. + Int(i64), + /// Floating-point attribute. + Float(f64), + /// Raw bytes attribute. + Bytes(Vec), + /// Array of attribute values (may be heterogeneous). + Array(Vec), + /// List of key-value pairs. + KeyValueList(Vec<(MetaString, AttributeValue)>), +} - /// The decision maker identifier indicating which sampler made the sampling decision. - /// - /// Common values include: - /// - `-9`: Probabilistic sampler - /// - `-4`: Errors sampler - /// - `None`: No decision maker set - pub decision_maker: Option, +impl AttributeValue { + /// Returns the inner string if this is a `String` variant. + pub fn as_string(&self) -> Option<&MetaString> { + if let AttributeValue::String(s) = self { + Some(s) + } else { + None + } + } - /// The OTLP sampling rate applied to this trace. - /// - /// This corresponds to the `_dd.otlp_sr` tag and represents the effective sampling rate - /// from the OTLP ingest path. - pub otlp_sampling_rate: Option, -} + /// Returns the inner float if this is a `Float` variant. + pub fn as_float(&self) -> Option { + if let AttributeValue::Float(f) = self { + Some(*f) + } else { + None + } + } -impl TraceSampling { - /// Creates a new `TraceSampling` instance. - pub fn new( - dropped_trace: bool, priority: Option, decision_maker: Option, otlp_sampling_rate: Option, - ) -> Self { - Self { - dropped_trace, - priority, - decision_maker, - otlp_sampling_rate, + /// Returns the inner bytes if this is a `Bytes` variant. + pub fn as_bytes(&self) -> Option<&[u8]> { + if let AttributeValue::Bytes(b) = self { + Some(b) + } else { + None } } } @@ -56,27 +59,81 @@ impl TraceSampling { /// A trace is a collection of spans that represent a distributed trace. #[derive(Clone, Debug, PartialEq)] pub struct Trace { + // ── Core fields ────────────────────────────────────────────────────────────── /// The spans that make up this trace. spans: Vec, - /// Resource-level tags associated with this trace. - /// - /// This is derived from the resource of the spans and used to construct the tracer payload. - resource_tags: TagSet, - /// Trace-level sampling metadata. - /// - /// This field contains sampling decision information (priority, decision maker, rates) - /// that applies to the entire trace. It is set by the trace sampler component and consumed - /// by the encoder to populate trace chunk metadata. - sampling: Option, + + // ── Unified fields (public) ────────────────────────────────────────────────── + /// Upper 8 bytes of the 128-bit trace ID (big-endian). Zero for 64-bit-only sources. + pub trace_id_high: u64, + /// Lower 8 bytes of the 128-bit trace ID (big-endian). + pub trace_id_low: u64, + /// Trace origin string (e.g. `"lambda"`, `"rum"`). + pub origin: MetaString, + + // Payload-level metadata (promoted from the tracer payload or OTLP resource). + /// Container ID associated with the tracer. + pub container_id: MetaString, + /// Tracer language name (e.g. `"go"`, `"python"`). + pub language_name: MetaString, + /// Tracer language runtime version. + pub language_version: MetaString, + /// Tracer library version. + pub tracer_version: MetaString, + /// Tracer runtime ID. + pub runtime_id: MetaString, + /// Deployment environment (e.g. `"production"`, `"staging"`). + pub env: MetaString, + /// Hostname of the tracer host. + pub hostname: MetaString, + /// Application version string. + pub app_version: MetaString, + /// Per-chunk weight from `Datadog-Client-Dropped-P0-Traces` header. Zero if absent. + pub client_dropped_p0s_weight: f64, + + /// Chunk-level or resource-level attributes (replaces `resource_tags` and + /// `V1TraceChunk.attributes` once downstream consumers are migrated). + pub attributes: FastHashMap, + + // Flat sampling fields. + /// Sampling priority set by the tracer or a sampler. + pub priority: Option, + /// Whether this trace was dropped during sampling. + pub dropped_trace: bool, + /// Sampling mechanism identifier (see Datadog trace agent constants). + pub sampling_mechanism: u32, + /// Identifier of the component that made the final sampling decision. + pub decision_maker: Option, + /// Effective OTLP sampling rate (`_dd.otlp_sr`), if set. + pub otlp_sampling_rate: Option, } impl Trace { /// Creates a new `Trace` with the given spans. - pub fn new(spans: Vec, resource_tags: impl Into) -> Self { + /// + /// All unified fields default to empty / zero. Callers should set them + /// directly after construction. + pub fn new(spans: Vec) -> Self { Self { spans, - resource_tags: resource_tags.into(), - sampling: None, + trace_id_high: 0, + trace_id_low: 0, + origin: MetaString::empty(), + container_id: MetaString::empty(), + language_name: MetaString::empty(), + language_version: MetaString::empty(), + tracer_version: MetaString::empty(), + runtime_id: MetaString::empty(), + env: MetaString::empty(), + hostname: MetaString::empty(), + app_version: MetaString::empty(), + client_dropped_p0s_weight: 0.0, + attributes: FastHashMap::default(), + priority: None, + dropped_trace: false, + sampling_mechanism: 0, + decision_maker: None, + otlp_sampling_rate: None, } } @@ -141,20 +198,6 @@ impl Trace { let _ = std::mem::replace(&mut self.spans, spans); } - /// Returns the resource-level tags associated with this trace. - pub fn resource_tags(&self) -> &TagSet { - &self.resource_tags - } - - /// Returns a reference to the trace-level sampling metadata, if present. - pub fn sampling(&self) -> Option<&TraceSampling> { - self.sampling.as_ref() - } - - /// Sets the trace-level sampling metadata. - pub fn set_sampling(&mut self, sampling: Option) { - self.sampling = sampling; - } } /// A span event. @@ -166,8 +209,6 @@ pub struct Span { name: MetaString, /// The resource associated with this span. resource: MetaString, - /// The trace identifier this span belongs to. - trace_id: u64, /// The unique identifier of this span. span_id: u64, /// The identifier of this span's parent, if any. @@ -178,18 +219,24 @@ pub struct Span { duration: u64, /// Error flag represented as 0 (no error) or 1 (error). error: i32, - /// String-valued tags attached to this span. - meta: FastHashMap, - /// Numeric-valued tags attached to this span. - metrics: FastHashMap, /// Span type classification (for example, web, db, lambda). span_type: MetaString, - /// Structured metadata payloads. - meta_struct: FastHashMap>, /// Links describing relationships to other spans. span_links: Vec, /// Events associated with this span. span_events: Vec, + + // ── New V1 / unified fields ────────────────────────────────────────────────── + /// Per-span environment override (V1 path). Overrides `Trace.env` when non-empty. + pub env: MetaString, + /// Per-span application version (V1 path). + pub version: MetaString, + /// Instrumentation component name (V1 path). + pub component: MetaString, + /// Span kind (OTEL values): 0=unspecified, 1=internal, 2=server, 3=client, 4=producer, 5=consumer. + pub kind: u32, + /// Typed span-level attributes (replaces `meta`, `metrics`, and `meta_struct`). + pub attributes: FastHashMap, } impl Span { @@ -197,15 +244,13 @@ impl Span { #[allow(clippy::too_many_arguments)] pub fn new( service: impl Into, name: impl Into, resource: impl Into, - span_type: impl Into, trace_id: u64, span_id: u64, parent_id: u64, start: u64, duration: u64, - error: i32, + span_type: impl Into, span_id: u64, parent_id: u64, start: u64, duration: u64, error: i32, ) -> Self { Self { service: service.into(), name: name.into(), resource: resource.into(), span_type: span_type.into(), - trace_id, span_id, parent_id, start, @@ -233,12 +278,6 @@ impl Span { self } - /// Sets the trace identifier. - pub fn with_trace_id(mut self, trace_id: u64) -> Self { - self.trace_id = trace_id; - self - } - /// Sets the span identifier. pub fn with_span_id(mut self, span_id: u64) -> Self { self.span_id = span_id; @@ -275,21 +314,45 @@ impl Span { self } - /// Replaces the string-valued tag map. + /// Inserts string-valued entries into the unified attributes map. + /// + /// Entries are merged into `attributes`; passing `None` is a no-op. Keys must be unique across + /// `with_meta`, `with_metrics`, and `with_meta_struct` — a key present in more than one call + /// will be overwritten by the last call. pub fn with_meta(mut self, meta: impl Into>>) -> Self { - self.meta = meta.into().unwrap_or_default(); + if let Some(m) = meta.into() { + for (k, v) in m { + self.attributes.insert(k, AttributeValue::String(v)); + } + } self } - /// Replaces the numeric-valued tag map. + /// Inserts float-valued entries into the unified attributes map. + /// + /// Entries are merged into `attributes`; passing `None` is a no-op. Keys must be unique across + /// `with_meta`, `with_metrics`, and `with_meta_struct` — a key present in more than one call + /// will be overwritten by the last call. pub fn with_metrics(mut self, metrics: impl Into>>) -> Self { - self.metrics = metrics.into().unwrap_or_default(); + if let Some(m) = metrics.into() { + for (k, v) in m { + self.attributes.insert(k, AttributeValue::Float(v)); + } + } self } - /// Replaces the structured metadata map. + /// Inserts bytes-valued entries into the unified attributes map. + /// + /// Entries are merged into `attributes`; passing `None` is a no-op. Keys must be unique across + /// `with_meta`, `with_metrics`, and `with_meta_struct` — a key present in more than one call + /// will be overwritten by the last call. pub fn with_meta_struct(mut self, meta_struct: impl Into>>>) -> Self { - self.meta_struct = meta_struct.into().unwrap_or_default(); + if let Some(m) = meta_struct.into() { + for (k, v) in m { + self.attributes.insert(k, AttributeValue::Bytes(v)); + } + } self } @@ -305,6 +368,30 @@ impl Span { self } + /// Sets the per-span environment override. + pub fn with_env(mut self, env: impl Into) -> Self { + self.env = env.into(); + self + } + + /// Sets the per-span application version. + pub fn with_version(mut self, version: impl Into) -> Self { + self.version = version.into(); + self + } + + /// Sets the instrumentation component. + pub fn with_component(mut self, component: impl Into) -> Self { + self.component = component.into(); + self + } + + /// Sets the span kind. + pub fn with_kind(mut self, kind: u32) -> Self { + self.kind = kind; + self + } + /// Returns the service name. pub fn service(&self) -> &str { &self.service @@ -325,11 +412,6 @@ impl Span { self.resource = resource.into(); } - /// Returns the trace identifier. - pub fn trace_id(&self) -> u64 { - self.trace_id - } - /// Returns the span identifier. pub fn span_id(&self) -> u64 { self.span_id @@ -360,31 +442,6 @@ impl Span { &self.span_type } - /// Returns the string-valued tag map. - pub fn meta(&self) -> &FastHashMap { - &self.meta - } - - /// Returns a mutable reference to the meta map. - pub fn meta_mut(&mut self) -> &mut FastHashMap { - &mut self.meta - } - - /// Returns the numeric-valued tag map. - pub fn metrics(&self) -> &FastHashMap { - &self.metrics - } - - /// Returns a mutable reference to the metrics map. - pub fn metrics_mut(&mut self) -> &mut FastHashMap { - &mut self.metrics - } - - /// Returns the structured metadata map. - pub fn meta_struct(&self) -> &FastHashMap> { - &self.meta_struct - } - /// Returns the span links collection. pub fn span_links(&self) -> &[SpanLink] { &self.span_links @@ -406,7 +463,7 @@ pub struct SpanLink { /// Span identifier for the linked span. span_id: u64, /// Additional attributes attached to the link. - attributes: FastHashMap, + attributes: FastHashMap, /// W3C tracestate value. tracestate: MetaString, /// W3C trace flags where the high bit must be set when provided. @@ -442,7 +499,7 @@ impl SpanLink { } /// Replaces the attributes map. - pub fn with_attributes(mut self, attributes: impl Into>>) -> Self { + pub fn with_attributes(mut self, attributes: impl Into>>) -> Self { self.attributes = attributes.into().unwrap_or_default(); self } @@ -475,7 +532,7 @@ impl SpanLink { } /// Returns the attributes map. - pub fn attributes(&self) -> &FastHashMap { + pub fn attributes(&self) -> &FastHashMap { &self.attributes } @@ -524,7 +581,9 @@ impl SpanEvent { } /// Replaces the attributes map. - pub fn with_attributes(mut self, attributes: impl Into>>) -> Self { + pub fn with_attributes( + mut self, attributes: impl Into>>, + ) -> Self { self.attributes = attributes.into().unwrap_or_default(); self } @@ -544,31 +603,3 @@ impl SpanEvent { &self.attributes } } - -/// Values supported for span and event attributes. -#[derive(Clone, Debug, PartialEq)] -pub enum AttributeValue { - /// String attribute value. - String(MetaString), - /// Boolean attribute value. - Bool(bool), - /// Integer attribute value. - Int(i64), - /// Floating-point attribute value. - Double(f64), - /// Array attribute values. - Array(Vec), -} - -/// Scalar values supported inside attribute arrays. -#[derive(Clone, Debug, PartialEq)] -pub enum AttributeScalarValue { - /// String array value. - String(MetaString), - /// Boolean array value. - Bool(bool), - /// Integer array value. - Int(i64), - /// Floating-point array value. - Double(f64), -}