From 3756487e312fcb2dc84ec38d248693db40309bee Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Fri, 29 May 2026 20:39:09 +0200 Subject: [PATCH 1/2] Limit term count --- .../src/elastic_query_dsl/terms_query.rs | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs index 48b65e4dd29..2b1e096270a 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::collections::{BTreeSet, HashMap}; +use std::sync::LazyLock; use serde::Deserialize; @@ -85,8 +86,24 @@ impl TryFrom for TermsQuery { } } +/// Maximum number of terms allowed in a `terms` query. +/// Large term sets generate a SQL condition that grows linearly with the number of values, +/// which can cause PostgreSQL to OOM while parsing/planning the query. +/// Can be overridden via the `QW_MAX_TERMS_QUERY_SIZE` environment variable. +static MAX_TERMS_QUERY_SIZE: LazyLock = + LazyLock::new(|| quickwit_common::get_from_env("QW_MAX_TERMS_QUERY_SIZE", 100usize, false)); + impl ConvertibleToQueryAst for TermsQuery { fn convert_to_query_ast(self) -> anyhow::Result { + let max_terms = *MAX_TERMS_QUERY_SIZE; + if self.values.len() > max_terms { + anyhow::bail!( + "`terms` query on field `{}` contains {} values, which exceeds the maximum \ + allowed ({max_terms})", + self.field, + self.values.len() + ); + } let mut terms_per_field = HashMap::new(); let values_set: BTreeSet = self.values.into_iter().collect(); terms_per_field.insert(self.field, values_set); @@ -135,6 +152,18 @@ mod tests { assert_eq!(&terms_query.values[..], &["1".to_string(), "2".to_string()]); } + #[test] + fn test_terms_query_too_many_values() { + let values: Vec = (0..=*MAX_TERMS_QUERY_SIZE).map(|i| i.to_string()).collect(); + let terms_query = TermsQuery { + boost: None, + field: "my_field".to_string(), + values, + }; + let err = terms_query.convert_to_query_ast().unwrap_err(); + assert!(err.to_string().contains("exceeds the maximum allowed")); + } + #[test] fn test_terms_query_single_term_boost() { let terms_query_json = r#"{ "user.id": ["hello", "happy"], "boost": 2 }"#; From 1b54cb381f8759a8368167d566803aa0819db8a6 Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Fri, 29 May 2026 21:15:13 +0200 Subject: [PATCH 2/2] Add log --- .../src/elastic_query_dsl/terms_query.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs index 2b1e096270a..c3d7e9d8b92 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs @@ -16,6 +16,7 @@ use std::collections::{BTreeSet, HashMap}; use std::sync::LazyLock; use serde::Deserialize; +use tracing::error; use crate::elastic_query_dsl::one_field_map::OneFieldMap; use crate::elastic_query_dsl::{ConvertibleToQueryAst, ElasticQueryDslInner}; @@ -97,9 +98,14 @@ impl ConvertibleToQueryAst for TermsQuery { fn convert_to_query_ast(self) -> anyhow::Result { let max_terms = *MAX_TERMS_QUERY_SIZE; if self.values.len() > max_terms { + error!( + field = self.field, + nb_values = self.values.len(), + "terms query exceeds maximum allowed values" + ); anyhow::bail!( - "`terms` query on field `{}` contains {} values, which exceeds the maximum \ - allowed ({max_terms})", + "terms query on field `{}` contains {} values, which exceeds the maximum allowed \ + ({max_terms})", self.field, self.values.len() );