diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs index 48b65e4dd29..c3d7e9d8b92 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs @@ -13,8 +13,10 @@ // limitations under the License. use std::collections::{BTreeSet, HashMap}; +use std::sync::LazyLock; use serde::Deserialize; +use tracing::error; use crate::elastic_query_dsl::one_field_map::OneFieldMap; use crate::elastic_query_dsl::{ConvertibleToQueryAst, ElasticQueryDslInner}; @@ -85,8 +87,29 @@ impl TryFrom for TermsQuery { } } +/// Maximum number of terms allowed in a `terms` query. +/// Large term sets generate a SQL condition that grows linearly with the number of values, +/// which can cause PostgreSQL to OOM while parsing/planning the query. +/// Can be overridden via the `QW_MAX_TERMS_QUERY_SIZE` environment variable. +static MAX_TERMS_QUERY_SIZE: LazyLock = + LazyLock::new(|| quickwit_common::get_from_env("QW_MAX_TERMS_QUERY_SIZE", 100usize, false)); + impl ConvertibleToQueryAst for TermsQuery { fn convert_to_query_ast(self) -> anyhow::Result { + let max_terms = *MAX_TERMS_QUERY_SIZE; + if self.values.len() > max_terms { + error!( + field = self.field, + nb_values = self.values.len(), + "terms query exceeds maximum allowed values" + ); + anyhow::bail!( + "terms query on field `{}` contains {} values, which exceeds the maximum allowed \ + ({max_terms})", + self.field, + self.values.len() + ); + } let mut terms_per_field = HashMap::new(); let values_set: BTreeSet = self.values.into_iter().collect(); terms_per_field.insert(self.field, values_set); @@ -135,6 +158,18 @@ mod tests { assert_eq!(&terms_query.values[..], &["1".to_string(), "2".to_string()]); } + #[test] + fn test_terms_query_too_many_values() { + let values: Vec = (0..=*MAX_TERMS_QUERY_SIZE).map(|i| i.to_string()).collect(); + let terms_query = TermsQuery { + boost: None, + field: "my_field".to_string(), + values, + }; + let err = terms_query.convert_to_query_ast().unwrap_err(); + assert!(err.to_string().contains("exceeds the maximum allowed")); + } + #[test] fn test_terms_query_single_term_boost() { let terms_query_json = r#"{ "user.id": ["hello", "happy"], "boost": 2 }"#;