From ff1728654aa1d2efd7f9e944477c516bfbf6fa7f Mon Sep 17 00:00:00 2001 From: Thomas Date: Tue, 28 Apr 2026 16:48:08 -0400 Subject: [PATCH 1/6] feat(codecs): add Deserializer::parse_with_secrets and Decoder::with_secrets_template Allows sources to forward per-request secrets (e.g. authentication tokens) into the Deserializer pipeline so user-authored programs like VRL decoders can read them via get_secret!() during decoding. VrlDeserializer overrides parse_with_secrets to inject the secrets into the synthetic event before the VRL program executes, making them visible as %vector.secrets.* at runtime. All other Deserializer implementations use the default implementation which merges the template onto each emitted event after parsing, with the codec's own values taking priority. --- lib/codecs/src/decoding/decoder.rs | 40 +++++++++-- lib/codecs/src/decoding/format/mod.rs | 26 +++++++- lib/codecs/src/decoding/format/vrl.rs | 96 +++++++++++++++++++++++++-- lib/vector-core/src/event/mod.rs | 2 +- 4 files changed, 152 insertions(+), 12 deletions(-) diff --git a/lib/codecs/src/decoding/decoder.rs b/lib/codecs/src/decoding/decoder.rs index 85a0fce42148e..c54199788451c 100644 --- a/lib/codecs/src/decoding/decoder.rs +++ b/lib/codecs/src/decoding/decoder.rs @@ -1,7 +1,7 @@ use bytes::{Bytes, BytesMut}; use smallvec::SmallVec; use vector_common::internal_event::emit; -use vector_core::{config::LogNamespace, event::Event}; +use vector_core::{config::LogNamespace, event::{Event, Secrets}}; use crate::{ decoding::format::Deserializer as _, @@ -23,6 +23,10 @@ pub struct Decoder { pub deserializer: Deserializer, /// The `log_namespace` being used. pub log_namespace: LogNamespace, + /// Per-decode-call secrets template. When set, the secrets are forwarded to + /// [`Deserializer::parse_with_secrets`] so user-authored programs (e.g. VRL) + /// that run inside the deserializer can read them via `%vector.secrets.*`. + secrets_template: Option, } impl Default for Decoder { @@ -31,6 +35,7 @@ impl Default for Decoder { framer: Framer::NewlineDelimited(NewlineDelimitedDecoder::new()), deserializer: Deserializer::Bytes(BytesDeserializer), log_namespace: LogNamespace::Legacy, + secrets_template: None, } } } @@ -39,20 +44,35 @@ impl Decoder { /// Creates a new `Decoder` with the specified `Framer` to produce byte /// frames from the byte stream / byte messages and `Deserializer` to parse /// structured events from a byte frame. - pub const fn new(framer: Framer, deserializer: Deserializer) -> Self { + pub fn new(framer: Framer, deserializer: Deserializer) -> Self { Self { framer, deserializer, log_namespace: LogNamespace::Legacy, + secrets_template: None, } } /// Sets the log namespace that will be used when decoding. - pub const fn with_log_namespace(mut self, log_namespace: LogNamespace) -> Self { + pub fn with_log_namespace(mut self, log_namespace: LogNamespace) -> Self { self.log_namespace = log_namespace; self } + /// Attaches a per-decode-call secrets template. When set, the secrets are + /// forwarded to [`Deserializer::parse_with_secrets`] so that deserializers + /// like the VRL codec can expose them to user-authored programs via + /// `%vector.secrets.*` during decoding. + /// + /// For the VRL deserializer, secrets are injected into the synthetic event + /// *before* the VRL program executes. For all other deserializers they are + /// merged into each emitted event's secret store after parsing, filling gaps + /// without overwriting anything the codec set itself. + pub fn with_secrets_template(mut self, secrets: Secrets) -> Self { + self.secrets_template = Some(secrets); + self + } + /// Handles the framing result and parses it into a structured event, if /// possible. /// @@ -75,9 +95,17 @@ impl Decoder { pub fn deserializer_parse(&self, frame: Bytes) -> Result { let byte_size = frame.len(); - // Parse structured events from the byte frame. - self.deserializer - .parse(frame, self.log_namespace) + // Parse structured events from the byte frame, forwarding any secrets + // template so that deserializers that run user programs (e.g. VRL) can + // make them available during execution. + let result = match &self.secrets_template { + Some(secrets) => self + .deserializer + .parse_with_secrets(frame, self.log_namespace, secrets), + None => self.deserializer.parse(frame, self.log_namespace), + }; + + result .map(|events| (events, byte_size)) .map_err(|error| { emit(DecoderDeserializeError { error: &error }); diff --git a/lib/codecs/src/decoding/format/mod.rs b/lib/codecs/src/decoding/format/mod.rs index cbb2172c3392d..0a986eae22afc 100644 --- a/lib/codecs/src/decoding/format/mod.rs +++ b/lib/codecs/src/decoding/format/mod.rs @@ -33,7 +33,7 @@ pub use protobuf::{ProtobufDeserializer, ProtobufDeserializerConfig, ProtobufDes use smallvec::SmallVec; #[cfg(feature = "syslog")] pub use syslog::{SyslogDeserializer, SyslogDeserializerConfig, SyslogDeserializerOptions}; -use vector_core::{config::LogNamespace, event::Event}; +use vector_core::{config::LogNamespace, event::{Event, Secrets}}; pub use self::{ bytes::{BytesDeserializer, BytesDeserializerConfig}, @@ -57,6 +57,30 @@ pub trait Deserializer: DynClone + Send + Sync { bytes: Bytes, log_namespace: LogNamespace, ) -> vector_common::Result>; + + /// Parses structured events from bytes, making per-request `secrets` available + /// to any user-authored program (e.g. VRL) that runs during decoding. + /// + /// The default implementation calls [`Self::parse`] and then merges the + /// template secrets into each emitted event's secret store. Because the merge + /// uses the event's own secrets as the authoritative source, codec-produced + /// secrets take priority and the template only fills gaps. + /// + /// Override this method when the deserializer needs the secrets to be visible + /// *during* parsing rather than after — for example so that a VRL program can + /// read `%vector.secrets.*` as it executes. + fn parse_with_secrets( + &self, + bytes: Bytes, + log_namespace: LogNamespace, + secrets: &Secrets, + ) -> vector_common::Result> { + let mut events = self.parse(bytes, log_namespace)?; + for event in &mut events { + event.metadata_mut().secrets_mut().merge(secrets.clone()); + } + Ok(events) + } } dyn_clone::clone_trait_object!(Deserializer); diff --git a/lib/codecs/src/decoding/format/vrl.rs b/lib/codecs/src/decoding/format/vrl.rs index 6b6a20be3be55..61ad4ec602bd7 100644 --- a/lib/codecs/src/decoding/format/vrl.rs +++ b/lib/codecs/src/decoding/format/vrl.rs @@ -13,6 +13,8 @@ use vrl::{ value::Kind, }; +use vector_core::event::Secrets; + use crate::{BytesDeserializerConfig, decoding::format::Deserializer}; /// Config used to build a `VrlDeserializer`. @@ -109,10 +111,21 @@ impl Deserializer for VrlDeserializer { log_namespace: LogNamespace, ) -> vector_common::Result> { let event = parse_bytes(bytes, log_namespace); - match self.run_vrl(event, log_namespace) { - Ok(events) => Ok(events), - Err(e) => Err(e), - } + self.run_vrl(event, log_namespace) + } + + /// Overrides the default implementation so that `secrets` are injected into + /// the synthetic event *before* the VRL program executes, making them + /// readable via `%vector.secrets.*` from within the program. + fn parse_with_secrets( + &self, + bytes: Bytes, + log_namespace: LogNamespace, + secrets: &Secrets, + ) -> vector_common::Result> { + let mut event = parse_bytes(bytes, log_namespace); + event.metadata_mut().secrets_mut().merge(secrets.clone()); + self.run_vrl(event, log_namespace) } } @@ -320,4 +333,79 @@ mod tests { .to_string(); assert!(error.contains("aborted")); } + + // Tests for `parse_with_secrets` ————————————————————————————————————————— + + /// `parse_with_secrets` with a VRL program that reads a secret injected via + /// the template. The secret must be readable from within the program via + /// `get_secret!()`. + #[test] + fn test_parse_with_secrets_vrl_can_read_secret() { + // VRL program copies the injected secret into an event field so we can + // assert on its value. The input bytes become `.message` (Legacy namespace) + // and we add `.secret_value` alongside it. + let decoder = make_decoder(r#".secret_value = get_secret!("my_token")"#); + + let mut secrets = Secrets::new(); + secrets.insert("my_token", "super-secret"); + + let bytes = Bytes::from(r#"hello"#); + let events = decoder + .parse_with_secrets(bytes, LogNamespace::Legacy, &secrets) + .expect("parse should succeed"); + + assert_eq!(events.len(), 1); + assert_eq!( + *events[0].as_log().get("secret_value").unwrap(), + Value::from("super-secret") + ); + } + + /// Verify that `parse_with_secrets` without an override (i.e. the default + /// implementation, exercised here through a non-VRL deserializer) merges the + /// template secrets onto the emitted event without overwriting codec-produced + /// secrets. + #[test] + fn test_parse_with_secrets_default_impl_fills_gaps() { + use crate::BytesDeserializerConfig; + + let decoder = BytesDeserializerConfig::new().build(); + + let mut template = Secrets::new(); + template.insert("source_token", "from-source"); + + let bytes = Bytes::from(b"raw payload".as_ref()); + let events = decoder + .parse_with_secrets(bytes, LogNamespace::Legacy, &template) + .expect("parse should succeed"); + + assert_eq!(events.len(), 1); + assert_eq!( + events[0].metadata().secrets().get("source_token").unwrap().as_ref(), + "from-source" + ); + } + + /// Secrets explicitly set by the VRL program must win over the template + /// (template only fills gaps, codec has priority). + #[test] + fn test_parse_with_secrets_codec_wins_on_collision() { + // VRL explicitly sets a secret. The template also supplies a value for + // the same key. Because `set_secret` is called during VRL execution + // (AFTER the template is merged in), the VRL-produced value wins. + let decoder = make_decoder(r#"set_secret!("my_token", "codec-wins")"#); + + let mut template = Secrets::new(); + template.insert("my_token", "template-loses"); + + let bytes = Bytes::from(r#"hello"#); + let events = decoder + .parse_with_secrets(bytes, LogNamespace::Legacy, &template) + .expect("parse should succeed"); + + assert_eq!( + events[0].metadata().secrets().get("my_token").unwrap().as_ref(), + "codec-wins" + ); + } } diff --git a/lib/vector-core/src/event/mod.rs b/lib/vector-core/src/event/mod.rs index e70ebc4335c9d..fd9d9c7c88bbd 100644 --- a/lib/vector-core/src/event/mod.rs +++ b/lib/vector-core/src/event/mod.rs @@ -7,7 +7,7 @@ pub use finalization::{ Finalizable, }; pub use log_event::LogEvent; -pub use metadata::{DatadogMetricOriginMetadata, EventMetadata, WithMetadata}; +pub use metadata::{DatadogMetricOriginMetadata, EventMetadata, Secrets, WithMetadata}; pub use metric::{Metric, MetricKind, MetricTags, MetricValue, StatisticKind}; pub use r#ref::{EventMutRef, EventRef}; use serde::{Deserialize, Serialize}; From a12389d7f106cf09f453d6f77ce736a23b29559a Mon Sep 17 00:00:00 2001 From: Thomas Date: Wed, 29 Apr 2026 10:44:34 -0400 Subject: [PATCH 2/6] refactor: move metadata_template to VrlDeserializer only; no Decoder/trait changes --- lib/codecs/src/decoding/decoder.rs | 43 +++------- lib/codecs/src/decoding/format/mod.rs | 25 +----- lib/codecs/src/decoding/format/vrl.rs | 109 +++++++++++--------------- lib/codecs/src/decoding/mod.rs | 14 +++- 4 files changed, 74 insertions(+), 117 deletions(-) diff --git a/lib/codecs/src/decoding/decoder.rs b/lib/codecs/src/decoding/decoder.rs index c54199788451c..499955495941b 100644 --- a/lib/codecs/src/decoding/decoder.rs +++ b/lib/codecs/src/decoding/decoder.rs @@ -1,7 +1,7 @@ use bytes::{Bytes, BytesMut}; use smallvec::SmallVec; use vector_common::internal_event::emit; -use vector_core::{config::LogNamespace, event::{Event, Secrets}}; +use vector_core::{config::LogNamespace, event::{Event, EventMetadata}}; use crate::{ decoding::format::Deserializer as _, @@ -23,10 +23,6 @@ pub struct Decoder { pub deserializer: Deserializer, /// The `log_namespace` being used. pub log_namespace: LogNamespace, - /// Per-decode-call secrets template. When set, the secrets are forwarded to - /// [`Deserializer::parse_with_secrets`] so user-authored programs (e.g. VRL) - /// that run inside the deserializer can read them via `%vector.secrets.*`. - secrets_template: Option, } impl Default for Decoder { @@ -35,7 +31,6 @@ impl Default for Decoder { framer: Framer::NewlineDelimited(NewlineDelimitedDecoder::new()), deserializer: Deserializer::Bytes(BytesDeserializer), log_namespace: LogNamespace::Legacy, - secrets_template: None, } } } @@ -44,32 +39,28 @@ impl Decoder { /// Creates a new `Decoder` with the specified `Framer` to produce byte /// frames from the byte stream / byte messages and `Deserializer` to parse /// structured events from a byte frame. - pub fn new(framer: Framer, deserializer: Deserializer) -> Self { + pub const fn new(framer: Framer, deserializer: Deserializer) -> Self { Self { framer, deserializer, log_namespace: LogNamespace::Legacy, - secrets_template: None, } } /// Sets the log namespace that will be used when decoding. - pub fn with_log_namespace(mut self, log_namespace: LogNamespace) -> Self { + pub const fn with_log_namespace(mut self, log_namespace: LogNamespace) -> Self { self.log_namespace = log_namespace; self } - /// Attaches a per-decode-call secrets template. When set, the secrets are - /// forwarded to [`Deserializer::parse_with_secrets`] so that deserializers - /// like the VRL codec can expose them to user-authored programs via - /// `%vector.secrets.*` during decoding. + /// Attaches a per-decode-call metadata template to the inner deserializer. /// - /// For the VRL deserializer, secrets are injected into the synthetic event - /// *before* the VRL program executes. For all other deserializers they are - /// merged into each emitted event's secret store after parsing, filling gaps - /// without overwriting anything the codec set itself. - pub fn with_secrets_template(mut self, secrets: Secrets) -> Self { - self.secrets_template = Some(secrets); + /// For deserializers that support it (currently only `VrlDeserializer`) the + /// template is pre-populated on the synthetic event before any user program + /// executes, making every `%`-prefixed path readable (e.g. `%splunk_hec.host`, + /// `%vector.secrets.*`). For all other deserializers this is a no-op. + pub fn with_metadata_template(mut self, metadata: EventMetadata) -> Self { + self.deserializer = self.deserializer.with_metadata_template(metadata); self } @@ -95,17 +86,9 @@ impl Decoder { pub fn deserializer_parse(&self, frame: Bytes) -> Result { let byte_size = frame.len(); - // Parse structured events from the byte frame, forwarding any secrets - // template so that deserializers that run user programs (e.g. VRL) can - // make them available during execution. - let result = match &self.secrets_template { - Some(secrets) => self - .deserializer - .parse_with_secrets(frame, self.log_namespace, secrets), - None => self.deserializer.parse(frame, self.log_namespace), - }; - - result + // Parse structured events from the byte frame. + self.deserializer + .parse(frame, self.log_namespace) .map(|events| (events, byte_size)) .map_err(|error| { emit(DecoderDeserializeError { error: &error }); diff --git a/lib/codecs/src/decoding/format/mod.rs b/lib/codecs/src/decoding/format/mod.rs index 0a986eae22afc..9a433861b277f 100644 --- a/lib/codecs/src/decoding/format/mod.rs +++ b/lib/codecs/src/decoding/format/mod.rs @@ -33,7 +33,7 @@ pub use protobuf::{ProtobufDeserializer, ProtobufDeserializerConfig, ProtobufDes use smallvec::SmallVec; #[cfg(feature = "syslog")] pub use syslog::{SyslogDeserializer, SyslogDeserializerConfig, SyslogDeserializerOptions}; -use vector_core::{config::LogNamespace, event::{Event, Secrets}}; +use vector_core::{config::LogNamespace, event::Event}; pub use self::{ bytes::{BytesDeserializer, BytesDeserializerConfig}, @@ -58,29 +58,6 @@ pub trait Deserializer: DynClone + Send + Sync { log_namespace: LogNamespace, ) -> vector_common::Result>; - /// Parses structured events from bytes, making per-request `secrets` available - /// to any user-authored program (e.g. VRL) that runs during decoding. - /// - /// The default implementation calls [`Self::parse`] and then merges the - /// template secrets into each emitted event's secret store. Because the merge - /// uses the event's own secrets as the authoritative source, codec-produced - /// secrets take priority and the template only fills gaps. - /// - /// Override this method when the deserializer needs the secrets to be visible - /// *during* parsing rather than after — for example so that a VRL program can - /// read `%vector.secrets.*` as it executes. - fn parse_with_secrets( - &self, - bytes: Bytes, - log_namespace: LogNamespace, - secrets: &Secrets, - ) -> vector_common::Result> { - let mut events = self.parse(bytes, log_namespace)?; - for event in &mut events { - event.metadata_mut().secrets_mut().merge(secrets.clone()); - } - Ok(events) - } } dyn_clone::clone_trait_object!(Deserializer); diff --git a/lib/codecs/src/decoding/format/vrl.rs b/lib/codecs/src/decoding/format/vrl.rs index 61ad4ec602bd7..90e505d6850e7 100644 --- a/lib/codecs/src/decoding/format/vrl.rs +++ b/lib/codecs/src/decoding/format/vrl.rs @@ -13,9 +13,9 @@ use vrl::{ value::Kind, }; -use vector_core::event::Secrets; +use vector_core::event::EventMetadata; -use crate::{BytesDeserializerConfig, decoding::format::Deserializer}; +use crate::decoding::format::Deserializer; /// Config used to build a `VrlDeserializer`. #[configurable_component] @@ -66,6 +66,7 @@ impl VrlDeserializerConfig { Ok(result) => Ok(VrlDeserializer { program: result.program, timezone: self.vrl.timezone.unwrap_or(TimeZone::Local), + metadata_template: None, }), Err(diagnostics) => Err(Formatter::new(&self.vrl.source, diagnostics) .to_string() @@ -96,9 +97,26 @@ impl VrlDeserializerConfig { pub struct VrlDeserializer { program: Program, timezone: TimeZone, + /// When set, this metadata is injected into the synthetic event *before* the + /// VRL program executes, making every `%`-prefixed path in the template readable + /// from within the program (e.g. `%splunk_hec.host`, `%vector.secrets.*`). + metadata_template: Option, +} + +impl VrlDeserializer { + /// Set a metadata template that will be pre-populated on each synthetic event + /// before the VRL program runs. This allows sources to expose per-request + /// context (authentication tokens, envelope fields, etc.) to VRL via the + /// `%` path namespace. + #[must_use] + pub fn with_metadata_template(mut self, metadata: EventMetadata) -> Self { + self.metadata_template = Some(metadata); + self + } } fn parse_bytes(bytes: Bytes, log_namespace: LogNamespace) -> Event { + use crate::BytesDeserializerConfig; let bytes_deserializer = BytesDeserializerConfig::new().build(); let log_event = bytes_deserializer.parse_single(bytes, log_namespace); Event::from(log_event) @@ -109,22 +127,14 @@ impl Deserializer for VrlDeserializer { &self, bytes: Bytes, log_namespace: LogNamespace, - ) -> vector_common::Result> { - let event = parse_bytes(bytes, log_namespace); - self.run_vrl(event, log_namespace) - } - - /// Overrides the default implementation so that `secrets` are injected into - /// the synthetic event *before* the VRL program executes, making them - /// readable via `%vector.secrets.*` from within the program. - fn parse_with_secrets( - &self, - bytes: Bytes, - log_namespace: LogNamespace, - secrets: &Secrets, ) -> vector_common::Result> { let mut event = parse_bytes(bytes, log_namespace); - event.metadata_mut().secrets_mut().merge(secrets.clone()); + if let Some(template) = &self.metadata_template { + // Pre-populate the synthetic event with the source-assembled metadata so + // every `%`-prefixed path is in scope when VRL executes. This lets + // user programs read `%splunk_hec.host`, `%vector.secrets.*`, etc. + *event.metadata_mut() = template.clone(); + } self.run_vrl(event, log_namespace) } } @@ -334,24 +344,29 @@ mod tests { assert!(error.contains("aborted")); } - // Tests for `parse_with_secrets` ————————————————————————————————————————— + // Tests for `with_metadata_template` ————————————————————————————————————— - /// `parse_with_secrets` with a VRL program that reads a secret injected via - /// the template. The secret must be readable from within the program via - /// `get_secret!()`. + fn metadata_with_secret(key: &str, value: &str) -> EventMetadata { + let mut metadata = EventMetadata::default(); + metadata.secrets_mut().insert(key, value); + metadata + } + + /// A VRL program that uses `get_secret!()` can read a secret injected via + /// `with_metadata_template`. #[test] - fn test_parse_with_secrets_vrl_can_read_secret() { + fn test_with_metadata_template_vrl_can_read_secret() { // VRL program copies the injected secret into an event field so we can // assert on its value. The input bytes become `.message` (Legacy namespace) // and we add `.secret_value` alongside it. - let decoder = make_decoder(r#".secret_value = get_secret!("my_token")"#); - - let mut secrets = Secrets::new(); - secrets.insert("my_token", "super-secret"); + let decoder = + make_decoder(r#".secret_value = get_secret!("my_token")"#).with_metadata_template( + metadata_with_secret("my_token", "super-secret"), + ); let bytes = Bytes::from(r#"hello"#); let events = decoder - .parse_with_secrets(bytes, LogNamespace::Legacy, &secrets) + .parse(bytes, LogNamespace::Legacy) .expect("parse should succeed"); assert_eq!(events.len(), 1); @@ -361,46 +376,16 @@ mod tests { ); } - /// Verify that `parse_with_secrets` without an override (i.e. the default - /// implementation, exercised here through a non-VRL deserializer) merges the - /// template secrets onto the emitted event without overwriting codec-produced - /// secrets. - #[test] - fn test_parse_with_secrets_default_impl_fills_gaps() { - use crate::BytesDeserializerConfig; - - let decoder = BytesDeserializerConfig::new().build(); - - let mut template = Secrets::new(); - template.insert("source_token", "from-source"); - - let bytes = Bytes::from(b"raw payload".as_ref()); - let events = decoder - .parse_with_secrets(bytes, LogNamespace::Legacy, &template) - .expect("parse should succeed"); - - assert_eq!(events.len(), 1); - assert_eq!( - events[0].metadata().secrets().get("source_token").unwrap().as_ref(), - "from-source" - ); - } - - /// Secrets explicitly set by the VRL program must win over the template - /// (template only fills gaps, codec has priority). + /// Secrets explicitly set by the VRL program win over the template because + /// `set_secret!` runs after the template is pre-populated. #[test] - fn test_parse_with_secrets_codec_wins_on_collision() { - // VRL explicitly sets a secret. The template also supplies a value for - // the same key. Because `set_secret` is called during VRL execution - // (AFTER the template is merged in), the VRL-produced value wins. - let decoder = make_decoder(r#"set_secret!("my_token", "codec-wins")"#); - - let mut template = Secrets::new(); - template.insert("my_token", "template-loses"); + fn test_with_metadata_template_codec_wins_on_collision() { + let decoder = make_decoder(r#"set_secret!("my_token", "codec-wins")"#) + .with_metadata_template(metadata_with_secret("my_token", "template-loses")); let bytes = Bytes::from(r#"hello"#); let events = decoder - .parse_with_secrets(bytes, LogNamespace::Legacy, &template) + .parse(bytes, LogNamespace::Legacy) .expect("parse should succeed"); assert_eq!( diff --git a/lib/codecs/src/decoding/mod.rs b/lib/codecs/src/decoding/mod.rs index c87337856454a..c178b31a818d6 100644 --- a/lib/codecs/src/decoding/mod.rs +++ b/lib/codecs/src/decoding/mod.rs @@ -37,7 +37,7 @@ use smallvec::SmallVec; use vector_config::configurable_component; use vector_core::{ config::{DataType, LogNamespace}, - event::Event, + event::{Event, EventMetadata}, schema, }; @@ -542,6 +542,18 @@ pub enum Deserializer { Vrl(VrlDeserializer), } +impl Deserializer { + /// Attaches a metadata template to the inner deserializer, if it supports + /// one. Currently only [`VrlDeserializer`] uses this; for all other variants + /// this is a no-op and `self` is returned unchanged. + pub fn with_metadata_template(self, metadata: EventMetadata) -> Self { + match self { + Deserializer::Vrl(d) => Deserializer::Vrl(d.with_metadata_template(metadata)), + other => other, + } + } +} + impl format::Deserializer for Deserializer { fn parse( &self, From ac4873bf135cee789271410f77b050df1e0a4e6e Mon Sep 17 00:00:00 2001 From: Thomas Date: Wed, 29 Apr 2026 11:07:50 -0400 Subject: [PATCH 3/6] feat(codecs): add inject_metadata to VRL decoder; wire up in exec source Adds VrlDeserializerOptions.inject_metadata. When true, the source can call Decoder::with_metadata_template to pre-populate the synthetic event before VRL executes, making source context readable via % paths (e.g. %exec.host, %exec.command). VRL-produced values always win over injected values on collision. The exec source is the first consumer: it injects hostname and command into the decoder template at build time. Zero overhead when inject_metadata is false. --- ...vrl_decoder_inject_metadata.enhancement.md | 1 + lib/codecs/src/decoding/format/vrl.rs | 60 ++++++++++++++----- src/sources/exec/mod.rs | 20 ++++++- .../sinks/generated/websocket_server.cue | 13 ++++ .../components/sources/generated/amqp.cue | 13 ++++ .../generated/aws_kinesis_firehose.cue | 13 ++++ .../components/sources/generated/aws_s3.cue | 13 ++++ .../components/sources/generated/aws_sqs.cue | 13 ++++ .../sources/generated/datadog_agent.cue | 13 ++++ .../sources/generated/demo_logs.cue | 13 ++++ .../components/sources/generated/exec.cue | 13 ++++ .../sources/generated/file_descriptor.cue | 13 ++++ .../sources/generated/gcp_pubsub.cue | 13 ++++ .../sources/generated/heroku_logs.cue | 13 ++++ .../components/sources/generated/http.cue | 13 ++++ .../sources/generated/http_client.cue | 13 ++++ .../sources/generated/http_server.cue | 13 ++++ .../components/sources/generated/kafka.cue | 13 ++++ .../components/sources/generated/mqtt.cue | 13 ++++ .../components/sources/generated/nats.cue | 13 ++++ .../components/sources/generated/pulsar.cue | 13 ++++ .../components/sources/generated/redis.cue | 13 ++++ .../components/sources/generated/socket.cue | 13 ++++ .../components/sources/generated/stdin.cue | 13 ++++ .../sources/generated/websocket.cue | 13 ++++ 25 files changed, 352 insertions(+), 15 deletions(-) create mode 100644 changelog.d/vrl_decoder_inject_metadata.enhancement.md diff --git a/changelog.d/vrl_decoder_inject_metadata.enhancement.md b/changelog.d/vrl_decoder_inject_metadata.enhancement.md new file mode 100644 index 0000000000000..cec8026620e9c --- /dev/null +++ b/changelog.d/vrl_decoder_inject_metadata.enhancement.md @@ -0,0 +1 @@ +The `vrl` codec now supports an `inject_metadata` option. When set to `true`, sources can inject per-request metadata into the VRL program before it executes, making source-specific context readable via `%`-prefixed paths (e.g. `%exec.host`, `%exec.command`, `%vector.secrets.*`). The `exec` source is the first to support this. VRL-produced metadata always takes priority over injected values on collision. diff --git a/lib/codecs/src/decoding/format/vrl.rs b/lib/codecs/src/decoding/format/vrl.rs index 90e505d6850e7..d1df402632701 100644 --- a/lib/codecs/src/decoding/format/vrl.rs +++ b/lib/codecs/src/decoding/format/vrl.rs @@ -47,6 +47,16 @@ pub struct VrlDeserializerOptions { #[serde(default)] #[configurable(metadata(docs::advanced))] pub timezone: Option, + + /// When `true`, the source may inject per-request metadata into the VRL + /// runtime before the program executes. Injected metadata is accessible + /// via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + /// + /// Each source controls which metadata it injects; see the source + /// documentation for details. If the source does not support metadata + /// injection, this option has no effect. + #[serde(default)] + pub inject_metadata: bool, } impl VrlDeserializerConfig { @@ -66,6 +76,7 @@ impl VrlDeserializerConfig { Ok(result) => Ok(VrlDeserializer { program: result.program, timezone: self.vrl.timezone.unwrap_or(TimeZone::Local), + inject_metadata_enabled: self.vrl.inject_metadata, metadata_template: None, }), Err(diagnostics) => Err(Formatter::new(&self.vrl.source, diagnostics) @@ -97,20 +108,28 @@ impl VrlDeserializerConfig { pub struct VrlDeserializer { program: Program, timezone: TimeZone, - /// When set, this metadata is injected into the synthetic event *before* the - /// VRL program executes, making every `%`-prefixed path in the template readable - /// from within the program (e.g. `%splunk_hec.host`, `%vector.secrets.*`). + /// Whether this deserializer accepts a metadata template from its source. + /// Set from [`VrlDeserializerOptions::inject_metadata`] at build time. + inject_metadata_enabled: bool, + /// Per-call metadata template. Only populated when `inject_metadata_enabled` + /// is true and the source calls [`VrlDeserializer::with_metadata_template`]. metadata_template: Option, } impl VrlDeserializer { - /// Set a metadata template that will be pre-populated on each synthetic event - /// before the VRL program runs. This allows sources to expose per-request - /// context (authentication tokens, envelope fields, etc.) to VRL via the - /// `%` path namespace. + /// Attach a metadata template that will be pre-populated on each synthetic + /// event before the VRL program runs. This is a no-op unless + /// `inject_metadata: true` was set in the VRL decoder config. + /// + /// Sources call this once per request/frame with the metadata they have + /// assembled (e.g. envelope fields, auth tokens). VRL can then read those + /// values via `%`-prefixed paths such as `%exec.host` or + /// `%vector.secrets.*`. #[must_use] pub fn with_metadata_template(mut self, metadata: EventMetadata) -> Self { - self.metadata_template = Some(metadata); + if self.inject_metadata_enabled { + self.metadata_template = Some(metadata); + } self } } @@ -171,6 +190,19 @@ mod tests { vrl: VrlDeserializerOptions { source: source.to_string(), timezone: None, + inject_metadata: false, + }, + } + .build() + .expect("Failed to build VrlDeserializer") + } + + fn make_decoder_with_inject_metadata(source: &str) -> VrlDeserializer { + VrlDeserializerConfig { + vrl: VrlDeserializerOptions { + source: source.to_string(), + timezone: None, + inject_metadata: true, }, } .build() @@ -325,6 +357,7 @@ mod tests { vrl: VrlDeserializerOptions { source: ". ?".to_string(), timezone: None, + inject_metadata: false, }, } .build() @@ -359,10 +392,8 @@ mod tests { // VRL program copies the injected secret into an event field so we can // assert on its value. The input bytes become `.message` (Legacy namespace) // and we add `.secret_value` alongside it. - let decoder = - make_decoder(r#".secret_value = get_secret!("my_token")"#).with_metadata_template( - metadata_with_secret("my_token", "super-secret"), - ); + let decoder = make_decoder_with_inject_metadata(r#".secret_value = get_secret!("my_token")"#) + .with_metadata_template(metadata_with_secret("my_token", "super-secret")); let bytes = Bytes::from(r#"hello"#); let events = decoder @@ -380,8 +411,9 @@ mod tests { /// `set_secret!` runs after the template is pre-populated. #[test] fn test_with_metadata_template_codec_wins_on_collision() { - let decoder = make_decoder(r#"set_secret!("my_token", "codec-wins")"#) - .with_metadata_template(metadata_with_secret("my_token", "template-loses")); + let decoder = + make_decoder_with_inject_metadata(r#"set_secret!("my_token", "codec-wins")"#) + .with_metadata_template(metadata_with_secret("my_token", "template-loses")); let bytes = Bytes::from(r#"hello"#); let events = decoder diff --git a/src/sources/exec/mod.rs b/src/sources/exec/mod.rs index b87452ef107d1..1979735c375ed 100644 --- a/src/sources/exec/mod.rs +++ b/src/sources/exec/mod.rs @@ -19,6 +19,7 @@ use vector_lib::{ }, config::{LegacyKey, LogNamespace, log_schema}, configurable::configurable_component, + event::EventMetadata, internal_event::{ByteSize, BytesReceived, InternalEventHandle as _, Protocol}, lookup::{owned_value_path, path}, }; @@ -254,7 +255,24 @@ impl SourceConfig for ExecConfig { .framing .clone() .unwrap_or_else(|| self.decoding.default_stream_framing()); - let decoder = DecodingConfig::new(framing, self.decoding.clone(), log_namespace).build()?; + let mut decoder = + DecodingConfig::new(framing, self.decoding.clone(), log_namespace).build()?; + + // If the VRL decoder has `inject_metadata: true`, build a metadata + // template with per-source context (hostname, command) so VRL programs + // can read `%exec.host`, `%exec.command`, etc. during decoding. + // `with_metadata_template` is a no-op for non-VRL deserializers and for + // VRL deserializers with `inject_metadata: false`. + let mut source_metadata = EventMetadata::default(); + if let Some(ref hostname) = hostname { + source_metadata + .value_mut() + .insert("exec.host", hostname.clone()); + } + source_metadata + .value_mut() + .insert("exec.command", self.command.clone()); + decoder = decoder.with_metadata_template(source_metadata); match &self.mode { Mode::Scheduled => { diff --git a/website/cue/reference/components/sinks/generated/websocket_server.cue b/website/cue/reference/components/sinks/generated/websocket_server.cue index 28e5823c8c7e5..7e8ed45d1e26f 100644 --- a/website/cue/reference/components/sinks/generated/websocket_server.cue +++ b/website/cue/reference/components/sinks/generated/websocket_server.cue @@ -867,6 +867,19 @@ generated: components: sinks: websocket_server: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/amqp.cue b/website/cue/reference/components/sources/generated/amqp.cue index ef71f410f0e29..7b7ba1c4bfb2c 100644 --- a/website/cue/reference/components/sources/generated/amqp.cue +++ b/website/cue/reference/components/sources/generated/amqp.cue @@ -335,6 +335,19 @@ generated: components: sources: amqp: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/aws_kinesis_firehose.cue b/website/cue/reference/components/sources/generated/aws_kinesis_firehose.cue index 4464a2305412e..bf36d0fdb716b 100644 --- a/website/cue/reference/components/sources/generated/aws_kinesis_firehose.cue +++ b/website/cue/reference/components/sources/generated/aws_kinesis_firehose.cue @@ -338,6 +338,19 @@ generated: components: sources: aws_kinesis_firehose: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/aws_s3.cue b/website/cue/reference/components/sources/generated/aws_s3.cue index 95730d6c68ca6..4ed0d6d1a045e 100644 --- a/website/cue/reference/components/sources/generated/aws_s3.cue +++ b/website/cue/reference/components/sources/generated/aws_s3.cue @@ -453,6 +453,19 @@ generated: components: sources: aws_s3: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/aws_sqs.cue b/website/cue/reference/components/sources/generated/aws_sqs.cue index b52cfcea28a31..2e27f1a391d22 100644 --- a/website/cue/reference/components/sources/generated/aws_sqs.cue +++ b/website/cue/reference/components/sources/generated/aws_sqs.cue @@ -448,6 +448,19 @@ generated: components: sources: aws_sqs: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/datadog_agent.cue b/website/cue/reference/components/sources/generated/datadog_agent.cue index 70bd89821cb1f..52d5d20627be9 100644 --- a/website/cue/reference/components/sources/generated/datadog_agent.cue +++ b/website/cue/reference/components/sources/generated/datadog_agent.cue @@ -320,6 +320,19 @@ generated: components: sources: datadog_agent: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/demo_logs.cue b/website/cue/reference/components/sources/generated/demo_logs.cue index e1227f9fc26ca..a76ed238d0309 100644 --- a/website/cue/reference/components/sources/generated/demo_logs.cue +++ b/website/cue/reference/components/sources/generated/demo_logs.cue @@ -299,6 +299,19 @@ generated: components: sources: demo_logs: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/exec.cue b/website/cue/reference/components/sources/generated/exec.cue index a5380b6c01f1d..53f3a7b0929c4 100644 --- a/website/cue/reference/components/sources/generated/exec.cue +++ b/website/cue/reference/components/sources/generated/exec.cue @@ -300,6 +300,19 @@ generated: components: sources: exec: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/file_descriptor.cue b/website/cue/reference/components/sources/generated/file_descriptor.cue index 84ecfd0d85e94..229910f235ca1 100644 --- a/website/cue/reference/components/sources/generated/file_descriptor.cue +++ b/website/cue/reference/components/sources/generated/file_descriptor.cue @@ -290,6 +290,19 @@ generated: components: sources: file_descriptor: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/gcp_pubsub.cue b/website/cue/reference/components/sources/generated/gcp_pubsub.cue index 74ceb80df85ff..953eb998fff7e 100644 --- a/website/cue/reference/components/sources/generated/gcp_pubsub.cue +++ b/website/cue/reference/components/sources/generated/gcp_pubsub.cue @@ -366,6 +366,19 @@ generated: components: sources: gcp_pubsub: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/heroku_logs.cue b/website/cue/reference/components/sources/generated/heroku_logs.cue index cb44587b15e4b..52a2e4e22094b 100644 --- a/website/cue/reference/components/sources/generated/heroku_logs.cue +++ b/website/cue/reference/components/sources/generated/heroku_logs.cue @@ -363,6 +363,19 @@ generated: components: sources: heroku_logs: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/http.cue b/website/cue/reference/components/sources/generated/http.cue index 259c800829438..5a5f72ad7f62c 100644 --- a/website/cue/reference/components/sources/generated/http.cue +++ b/website/cue/reference/components/sources/generated/http.cue @@ -364,6 +364,19 @@ generated: components: sources: http: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/http_client.cue b/website/cue/reference/components/sources/generated/http_client.cue index 6631650e47fb5..58036ce85a5df 100644 --- a/website/cue/reference/components/sources/generated/http_client.cue +++ b/website/cue/reference/components/sources/generated/http_client.cue @@ -501,6 +501,19 @@ generated: components: sources: http_client: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/http_server.cue b/website/cue/reference/components/sources/generated/http_server.cue index c84425dd8db2c..e967c55f2d0f1 100644 --- a/website/cue/reference/components/sources/generated/http_server.cue +++ b/website/cue/reference/components/sources/generated/http_server.cue @@ -364,6 +364,19 @@ generated: components: sources: http_server: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/kafka.cue b/website/cue/reference/components/sources/generated/kafka.cue index 7a0ebfdcf1006..db2f617fc1b3f 100644 --- a/website/cue/reference/components/sources/generated/kafka.cue +++ b/website/cue/reference/components/sources/generated/kafka.cue @@ -344,6 +344,19 @@ generated: components: sources: kafka: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/mqtt.cue b/website/cue/reference/components/sources/generated/mqtt.cue index 0bcd847fd4717..a648cbc5b82a6 100644 --- a/website/cue/reference/components/sources/generated/mqtt.cue +++ b/website/cue/reference/components/sources/generated/mqtt.cue @@ -295,6 +295,19 @@ generated: components: sources: mqtt: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/nats.cue b/website/cue/reference/components/sources/generated/nats.cue index c57371bafaa94..a16716413c343 100644 --- a/website/cue/reference/components/sources/generated/nats.cue +++ b/website/cue/reference/components/sources/generated/nats.cue @@ -387,6 +387,19 @@ generated: components: sources: nats: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/pulsar.cue b/website/cue/reference/components/sources/generated/pulsar.cue index f51e1a449dbcb..c94344ffe1d53 100644 --- a/website/cue/reference/components/sources/generated/pulsar.cue +++ b/website/cue/reference/components/sources/generated/pulsar.cue @@ -393,6 +393,19 @@ generated: components: sources: pulsar: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/redis.cue b/website/cue/reference/components/sources/generated/redis.cue index 4ddff3bf52d73..e39326aeb897a 100644 --- a/website/cue/reference/components/sources/generated/redis.cue +++ b/website/cue/reference/components/sources/generated/redis.cue @@ -305,6 +305,19 @@ generated: components: sources: redis: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/socket.cue b/website/cue/reference/components/sources/generated/socket.cue index 63495e4a1fbaf..7bffe1a706f0a 100644 --- a/website/cue/reference/components/sources/generated/socket.cue +++ b/website/cue/reference/components/sources/generated/socket.cue @@ -307,6 +307,19 @@ generated: components: sources: socket: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/stdin.cue b/website/cue/reference/components/sources/generated/stdin.cue index 72de586122b44..8a3a3fbea372a 100644 --- a/website/cue/reference/components/sources/generated/stdin.cue +++ b/website/cue/reference/components/sources/generated/stdin.cue @@ -290,6 +290,19 @@ generated: components: sources: stdin: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. diff --git a/website/cue/reference/components/sources/generated/websocket.cue b/website/cue/reference/components/sources/generated/websocket.cue index c770d5558e71a..7c60374c2a3f1 100644 --- a/website/cue/reference/components/sources/generated/websocket.cue +++ b/website/cue/reference/components/sources/generated/websocket.cue @@ -477,6 +477,19 @@ generated: components: sources: websocket: configuration: { relevant_when: "codec = \"vrl\"" required: true type: object: options: { + inject_metadata: { + description: """ + When `true`, the source may inject per-request metadata into the VRL + runtime before the program executes. Injected metadata is accessible + via `%`-prefixed paths (e.g. `%exec.host`, `%vector.secrets.*`). + + Each source controls which metadata it injects; see the source + documentation for details. If the source does not support metadata + injection, this option has no effect. + """ + required: false + type: bool: default: false + } source: { description: """ The [Vector Remap Language][vrl] (VRL) program to execute for each event. From aecddcfb10ea39f63494bddf43f33ca1e248e6b6 Mon Sep 17 00:00:00 2001 From: Thomas Date: Wed, 29 Apr 2026 12:17:34 -0400 Subject: [PATCH 4/6] fix(exec source): preserve VRL-written metadata; add DeserializerConfig::inject_metadata_enabled Move the inject_metadata check to DeserializerConfig::inject_metadata_enabled() so sources don't need VRL-specific knowledge. In handle_event, use try_insert for the vector-namespace metadata paths (host, command) when inject_metadata is enabled, so any value the VRL program wrote to %exec.host or %exec.command survives post-decode enrichment. No behavior change for Legacy namespace or when inject_metadata is false. --- lib/codecs/src/decoding/mod.rs | 6 +++++ src/sources/exec/mod.rs | 46 +++++++++++++++++++++++----------- src/sources/exec/tests.rs | 4 +++ 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/lib/codecs/src/decoding/mod.rs b/lib/codecs/src/decoding/mod.rs index c178b31a818d6..8de1039559fb7 100644 --- a/lib/codecs/src/decoding/mod.rs +++ b/lib/codecs/src/decoding/mod.rs @@ -426,6 +426,12 @@ impl DeserializerConfig { } } + /// Returns `true` if this is a VRL deserializer with `inject_metadata: true`. + /// Sources use this to decide whether to call `Decoder::with_metadata_template`. + pub fn inject_metadata_enabled(&self) -> bool { + matches!(self, DeserializerConfig::Vrl(c) if c.vrl.inject_metadata) + } + /// Return the type of event build by this deserializer. pub fn output_type(&self) -> DataType { match self { diff --git a/src/sources/exec/mod.rs b/src/sources/exec/mod.rs index 1979735c375ed..300a6eccbffb7 100644 --- a/src/sources/exec/mod.rs +++ b/src/sources/exec/mod.rs @@ -21,7 +21,7 @@ use vector_lib::{ configurable::configurable_component, event::EventMetadata, internal_event::{ByteSize, BytesReceived, InternalEventHandle as _, Protocol}, - lookup::{owned_value_path, path}, + lookup::{metadata_path, owned_value_path, path}, }; use vrl::{path::OwnedValuePath, value::Kind}; @@ -241,6 +241,7 @@ impl ExecConfig { Some(config) => config.respawn_interval_secs, } } + } #[async_trait::async_trait] @@ -537,8 +538,9 @@ async fn run_command( byte_size: events.estimated_json_encoded_size_of(), }); + let vrl_inject_metadata = config.decoding.inject_metadata_enabled(); for event in &mut events { - handle_event(&config, &hostname, &Some(stream.to_string()), pid, event, log_namespace); + handle_event(&config, &hostname, &Some(stream.to_string()), pid, event, log_namespace, vrl_inject_metadata); } if (out.send_batch(events).await).is_err() { emit!(StreamClosedError { count }); @@ -687,6 +689,7 @@ fn handle_event( pid: Option, event: &mut Event, log_namespace: LogNamespace, + vrl_inject_metadata: bool, ) { if let Event::Log(log) = event { log_namespace.insert_standard_vector_source_metadata(log, ExecConfig::NAME, Utc::now()); @@ -713,25 +716,38 @@ fn handle_event( ); } - // Add hostname (if needed) + // Add hostname (if needed). When the VRL decoder has inject_metadata enabled, + // use try_insert for the vector metadata path so any value the VRL program + // wrote to %exec.host is not overwritten here. if let Some(hostname) = hostname { + if vrl_inject_metadata && matches!(log_namespace, LogNamespace::Vector) { + log.try_insert(metadata_path!(ExecConfig::NAME, "host"), hostname.clone()); + } else { + log_namespace.insert_source_metadata( + ExecConfig::NAME, + log, + log_schema().host_key().map(LegacyKey::InsertIfEmpty), + path!("host"), + hostname.clone(), + ); + } + } + + // Add command. Same try_insert guard as hostname above. + if vrl_inject_metadata && matches!(log_namespace, LogNamespace::Vector) { + log.try_insert( + metadata_path!(ExecConfig::NAME, COMMAND_KEY), + config.command.clone(), + ); + } else { log_namespace.insert_source_metadata( ExecConfig::NAME, log, - log_schema().host_key().map(LegacyKey::InsertIfEmpty), - path!("host"), - hostname.clone(), + Some(LegacyKey::InsertIfEmpty(path!(COMMAND_KEY))), + path!(COMMAND_KEY), + config.command.clone(), ); } - - // Add command - log_namespace.insert_source_metadata( - ExecConfig::NAME, - log, - Some(LegacyKey::InsertIfEmpty(path!(COMMAND_KEY))), - path!(COMMAND_KEY), - config.command.clone(), - ); } } diff --git a/src/sources/exec/tests.rs b/src/sources/exec/tests.rs index a7c2e0ab4d71c..e6a10f002905b 100644 --- a/src/sources/exec/tests.rs +++ b/src/sources/exec/tests.rs @@ -28,6 +28,7 @@ fn test_scheduled_handle_event() { pid, &mut event, LogNamespace::Legacy, + false, ); let log = event.as_log(); @@ -57,6 +58,7 @@ fn test_scheduled_handle_event_vector_namespace() { pid, &mut event, LogNamespace::Vector, + false, ); let log = event.as_log(); @@ -105,6 +107,7 @@ fn test_streaming_create_event() { pid, &mut event, LogNamespace::Legacy, + false, ); let log = event.as_log(); @@ -134,6 +137,7 @@ fn test_streaming_create_event_vector_namespace() { pid, &mut event, LogNamespace::Vector, + false, ); let log = event.as_log(); From 3f04870374d5921097f7f8a8032e2e5900b57119 Mon Sep 17 00:00:00 2001 From: Thomas Date: Wed, 29 Apr 2026 12:24:45 -0400 Subject: [PATCH 5/6] fmt --- lib/codecs/src/decoding/decoder.rs | 5 ++++- lib/codecs/src/decoding/format/mod.rs | 1 - lib/codecs/src/decoding/format/vrl.rs | 17 +++++++++++------ src/sources/exec/mod.rs | 1 - 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/lib/codecs/src/decoding/decoder.rs b/lib/codecs/src/decoding/decoder.rs index 499955495941b..d62e24713f571 100644 --- a/lib/codecs/src/decoding/decoder.rs +++ b/lib/codecs/src/decoding/decoder.rs @@ -1,7 +1,10 @@ use bytes::{Bytes, BytesMut}; use smallvec::SmallVec; use vector_common::internal_event::emit; -use vector_core::{config::LogNamespace, event::{Event, EventMetadata}}; +use vector_core::{ + config::LogNamespace, + event::{Event, EventMetadata}, +}; use crate::{ decoding::format::Deserializer as _, diff --git a/lib/codecs/src/decoding/format/mod.rs b/lib/codecs/src/decoding/format/mod.rs index 9a433861b277f..cbb2172c3392d 100644 --- a/lib/codecs/src/decoding/format/mod.rs +++ b/lib/codecs/src/decoding/format/mod.rs @@ -57,7 +57,6 @@ pub trait Deserializer: DynClone + Send + Sync { bytes: Bytes, log_namespace: LogNamespace, ) -> vector_common::Result>; - } dyn_clone::clone_trait_object!(Deserializer); diff --git a/lib/codecs/src/decoding/format/vrl.rs b/lib/codecs/src/decoding/format/vrl.rs index d1df402632701..6f37f4502aa05 100644 --- a/lib/codecs/src/decoding/format/vrl.rs +++ b/lib/codecs/src/decoding/format/vrl.rs @@ -392,8 +392,9 @@ mod tests { // VRL program copies the injected secret into an event field so we can // assert on its value. The input bytes become `.message` (Legacy namespace) // and we add `.secret_value` alongside it. - let decoder = make_decoder_with_inject_metadata(r#".secret_value = get_secret!("my_token")"#) - .with_metadata_template(metadata_with_secret("my_token", "super-secret")); + let decoder = + make_decoder_with_inject_metadata(r#".secret_value = get_secret!("my_token")"#) + .with_metadata_template(metadata_with_secret("my_token", "super-secret")); let bytes = Bytes::from(r#"hello"#); let events = decoder @@ -411,9 +412,8 @@ mod tests { /// `set_secret!` runs after the template is pre-populated. #[test] fn test_with_metadata_template_codec_wins_on_collision() { - let decoder = - make_decoder_with_inject_metadata(r#"set_secret!("my_token", "codec-wins")"#) - .with_metadata_template(metadata_with_secret("my_token", "template-loses")); + let decoder = make_decoder_with_inject_metadata(r#"set_secret!("my_token", "codec-wins")"#) + .with_metadata_template(metadata_with_secret("my_token", "template-loses")); let bytes = Bytes::from(r#"hello"#); let events = decoder @@ -421,7 +421,12 @@ mod tests { .expect("parse should succeed"); assert_eq!( - events[0].metadata().secrets().get("my_token").unwrap().as_ref(), + events[0] + .metadata() + .secrets() + .get("my_token") + .unwrap() + .as_ref(), "codec-wins" ); } diff --git a/src/sources/exec/mod.rs b/src/sources/exec/mod.rs index 300a6eccbffb7..c314b25111a97 100644 --- a/src/sources/exec/mod.rs +++ b/src/sources/exec/mod.rs @@ -241,7 +241,6 @@ impl ExecConfig { Some(config) => config.respawn_interval_secs, } } - } #[async_trait::async_trait] From bba7ae5f7165013693e9db5e6a642c272ee132a2 Mon Sep 17 00:00:00 2001 From: Thomas Date: Wed, 29 Apr 2026 12:27:09 -0400 Subject: [PATCH 6/6] Add authors line to changelog --- changelog.d/vrl_decoder_inject_metadata.enhancement.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/changelog.d/vrl_decoder_inject_metadata.enhancement.md b/changelog.d/vrl_decoder_inject_metadata.enhancement.md index cec8026620e9c..6319479c81e95 100644 --- a/changelog.d/vrl_decoder_inject_metadata.enhancement.md +++ b/changelog.d/vrl_decoder_inject_metadata.enhancement.md @@ -1 +1,3 @@ The `vrl` codec now supports an `inject_metadata` option. When set to `true`, sources can inject per-request metadata into the VRL program before it executes, making source-specific context readable via `%`-prefixed paths (e.g. `%exec.host`, `%exec.command`, `%vector.secrets.*`). The `exec` source is the first to support this. VRL-produced metadata always takes priority over injected values on collision. + +authors: thomasqueirozb