Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
// limitations under the License.

use std::collections::{BTreeSet, HashMap};
use std::sync::LazyLock;

use serde::Deserialize;
use tracing::error;

use crate::elastic_query_dsl::one_field_map::OneFieldMap;
use crate::elastic_query_dsl::{ConvertibleToQueryAst, ElasticQueryDslInner};
Expand Down Expand Up @@ -85,8 +87,29 @@ impl TryFrom<TermsQueryForSerialization> for TermsQuery {
}
}

/// Maximum number of terms allowed in a `terms` query.
/// Large term sets generate a SQL condition that grows linearly with the number of values,
/// which can cause PostgreSQL to OOM while parsing/planning the query.
/// Can be overridden via the `QW_MAX_TERMS_QUERY_SIZE` environment variable.
static MAX_TERMS_QUERY_SIZE: LazyLock<usize> =
LazyLock::new(|| quickwit_common::get_from_env("QW_MAX_TERMS_QUERY_SIZE", 100usize, false));

impl ConvertibleToQueryAst for TermsQuery {
fn convert_to_query_ast(self) -> anyhow::Result<QueryAst> {
let max_terms = *MAX_TERMS_QUERY_SIZE;
if self.values.len() > max_terms {
error!(
field = self.field,
nb_values = self.values.len(),
"terms query exceeds maximum allowed values"
);
anyhow::bail!(
"terms query on field `{}` contains {} values, which exceeds the maximum allowed \
({max_terms})",
self.field,
self.values.len()
);
}
let mut terms_per_field = HashMap::new();
let values_set: BTreeSet<String> = self.values.into_iter().collect();
terms_per_field.insert(self.field, values_set);
Expand Down Expand Up @@ -135,6 +158,18 @@ mod tests {
assert_eq!(&terms_query.values[..], &["1".to_string(), "2".to_string()]);
}

#[test]
fn test_terms_query_too_many_values() {
let values: Vec<String> = (0..=*MAX_TERMS_QUERY_SIZE).map(|i| i.to_string()).collect();
let terms_query = TermsQuery {
boost: None,
field: "my_field".to_string(),
values,
};
let err = terms_query.convert_to_query_ast().unwrap_err();
assert!(err.to_string().contains("exceeds the maximum allowed"));
}

#[test]
fn test_terms_query_single_term_boost() {
let terms_query_json = r#"{ "user.id": ["hello", "happy"], "boost": 2 }"#;
Expand Down
Loading