From d33a655bda6b13c603a0639c21d70f79939536f5 Mon Sep 17 00:00:00 2001 From: Milind Srivastava Date: Wed, 18 Mar 2026 15:04:30 -0400 Subject: [PATCH] Changed exact string match to match based on SQLQueryData --- .../src/ast_matching/sqlhelper.rs | 49 +++++ .../src/ast_matching/sqlparser_test.rs | 95 +++++++++ .../src/engines/simple_engine.rs | 37 +++- asap-query-engine/src/tests/mod.rs | 1 + .../src/tests/sql_pattern_matching_tests.rs | 195 ++++++++++++++++++ 5 files changed, 372 insertions(+), 5 deletions(-) create mode 100644 asap-query-engine/src/tests/sql_pattern_matching_tests.rs diff --git a/asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlhelper.rs b/asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlhelper.rs index 3e3176f..a43f3f0 100644 --- a/asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlhelper.rs +++ b/asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlhelper.rs @@ -154,4 +154,53 @@ impl AggregationInfo { pub fn get_args(&self) -> &Vec { &self.args } + + /// Returns true if this aggregation matches the given template + /// (same function name, value column, and arguments). + pub fn matches_pattern(&self, other: &AggregationInfo) -> bool { + self.name == other.name + && self.value_column_name == other.value_column_name + && self.args == other.args + } +} + +impl TimeInfo { + /// Returns true if this time info matches the given template. + /// + /// For "UNUSED" time columns (the outer level of a subquery which has no WHERE + /// time clause), only the column name is compared. + /// For real time columns, the column name and duration are compared but the + /// absolute start time is ignored — this allows NOW()-based templates to match + /// incoming queries that use absolute timestamps. + pub fn matches_pattern(&self, other: &TimeInfo) -> bool { + if self.time_col_name != other.time_col_name { + return false; + } + if self.time_col_name == "UNUSED" { + return true; + } + (self.duration - other.duration).abs() < f64::EPSILON + } +} + +impl SQLQueryData { + /// Returns true if this query data structurally matches the given template. + /// + /// Templates in inference_config use NOW()-relative timestamps; actual incoming + /// queries use absolute timestamps. Only the duration is compared, not the + /// absolute start time. All other fields (metric, aggregation, labels, time + /// column name) must match exactly. + pub fn matches_sql_pattern(&self, template: &SQLQueryData) -> bool { + self.metric == template.metric + && self + .aggregation_info + .matches_pattern(&template.aggregation_info) + && self.labels == template.labels + && self.time_info.matches_pattern(&template.time_info) + && match (&self.subquery, &template.subquery) { + (None, None) => true, + (Some(sq), Some(tq)) => sq.matches_sql_pattern(tq), + _ => false, + } + } } diff --git a/asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlparser_test.rs b/asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlparser_test.rs index f694439..72b0940 100644 --- a/asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlparser_test.rs +++ b/asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlparser_test.rs @@ -544,4 +544,99 @@ mod tests { Some(QueryError::SpatialDurationSmall), ); } + + // ── matches_sql_pattern tests ───────────────────────────────────────────── + + #[test] + fn test_matches_now_vs_absolute_timestamp() { + // Same 10s window, same metric/agg/labels — should match + let template = parse_sql_query( + "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4" + ).unwrap(); + let incoming = parse_sql_query( + "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, '2025-10-01 00:00:10') AND '2025-10-01 00:00:10' GROUP BY L1, L2, L3, L4" + ).unwrap(); + assert!(incoming.matches_sql_pattern(&template)); + } + + #[test] + fn test_no_match_different_duration() { + let template = parse_sql_query( + "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4" + ).unwrap(); + let incoming = parse_sql_query( + "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -5, NOW()) AND NOW() GROUP BY L1, L2, L3, L4" + ).unwrap(); + assert!(!incoming.matches_sql_pattern(&template)); + } + + #[test] + fn test_no_match_different_metric() { + let template = parse_sql_query( + "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4" + ).unwrap(); + let incoming = parse_sql_query( + "SELECT SUM(mb) FROM mem_usage WHERE ms BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4" + ).unwrap(); + assert!(!incoming.matches_sql_pattern(&template)); + } + + #[test] + fn test_no_match_different_aggregation() { + let template = parse_sql_query( + "SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4" + ).unwrap(); + let incoming = parse_sql_query( + "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4" + ).unwrap(); + assert!(!incoming.matches_sql_pattern(&template)); + } + + #[test] + fn test_no_match_different_labels() { + let template = parse_sql_query( + "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4" + ).unwrap(); + let incoming = parse_sql_query( + "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1" + ).unwrap(); + assert!(!incoming.matches_sql_pattern(&template)); + } + + #[test] + fn test_no_match_different_time_column() { + // cpu_usage uses "time", mem_usage uses "ms" — query same metric but wrong time col + let template = parse_sql_query( + "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4" + ).unwrap(); + // Force a different time column by using mem_usage schema (col: ms) but same duration + let incoming = parse_sql_query( + "SELECT SUM(mb) FROM mem_usage WHERE ms BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4" + ).unwrap(); + // Different metric AND time column — must not match + assert!(!incoming.matches_sql_pattern(&template)); + } + + #[test] + fn test_no_match_different_quantile_args() { + let template = parse_sql_query( + "SELECT QUANTILE(0.95, value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4" + ).unwrap(); + let incoming = parse_sql_query( + "SELECT QUANTILE(0.99, value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4" + ).unwrap(); + assert!(!incoming.matches_sql_pattern(&template)); + } + + #[test] + fn test_matches_subquery_now_vs_absolute() { + // Spatial-of-temporal: outer has no time clause (UNUSED), inner has time clause + let template = parse_sql_query( + "SELECT SUM(result) FROM (SELECT SUM(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1" + ).unwrap(); + let incoming = parse_sql_query( + "SELECT SUM(result) FROM (SELECT SUM(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, '2025-10-01 00:00:10') AND '2025-10-01 00:00:10' GROUP BY L1, L2, L3, L4) GROUP BY L1" + ).unwrap(); + assert!(incoming.matches_sql_pattern(&template)); + } } diff --git a/asap-query-engine/src/engines/simple_engine.rs b/asap-query-engine/src/engines/simple_engine.rs index 1fa7558..e60fa58 100644 --- a/asap-query-engine/src/engines/simple_engine.rs +++ b/asap-query-engine/src/engines/simple_engine.rs @@ -25,7 +25,7 @@ use promql_utilities::query_logics::parsing::{ use sql_utilities::ast_matching::QueryType; use sql_utilities::ast_matching::{SQLPatternMatcher, SQLPatternParser, SQLQuery}; -use sql_utilities::sqlhelper::AggregationInfo; +use sql_utilities::sqlhelper::{AggregationInfo, SQLQueryData}; use sqlparser::dialect::*; use sqlparser::parser::Parser as parser; @@ -292,6 +292,33 @@ impl SimpleEngine { .find(|config| config.query == query) } + /// Finds the query configuration for a SQL query using structural pattern matching. + /// + /// Unlike `find_query_config` (which does exact string comparison), this method parses + /// each template in query_configs and compares it structurally against the incoming + /// query_data — ignoring absolute timestamps and comparing only metric, aggregation, + /// labels, time column name, and duration. + fn find_query_config_sql(&self, query_data: &SQLQueryData) -> Option<&QueryConfig> { + let schema = match &self.inference_config.schema { + SchemaConfig::SQL(sql_schema) => sql_schema, + _ => return None, + }; + + self.inference_config.query_configs.iter().find(|config| { + let template_statements = + match parser::parse_sql(&GenericDialect {}, config.query.as_str()) { + Ok(stmts) => stmts, + Err(_) => return false, + }; + let template_data = + match SQLPatternParser::new(schema, 0.0).parse_query(&template_statements) { + Some(data) => data, + None => return false, + }; + query_data.matches_sql_pattern(&template_data) + }) + } + /// Validates and potentially aligns end timestamp based on query pattern fn validate_and_align_end_timestamp( &self, @@ -1134,7 +1161,7 @@ impl SimpleEngine { let query_time = Self::convert_query_time_to_data_time( query_data.time_info.get_start() + query_data.time_info.get_duration(), ); - return self.build_spatiotemporal_context(&match_result, query_time, &query); + return self.build_spatiotemporal_context(&match_result, query_time, &query_data); } let query_pattern_type = match &match_result.query_type[..] { @@ -1156,7 +1183,7 @@ impl SimpleEngine { _ => panic!("Unsupported query type found"), }; - let query_config = self.find_query_config(&query)?; + let query_config = self.find_query_config_sql(&query_data)?; // For nested queries (spatial of temporal), the outer query has no time clause, // so we need to use the inner (temporal) query's time_info to compute query_time @@ -1341,9 +1368,9 @@ impl SimpleEngine { &self, match_result: &SQLQuery, query_time: u64, - query: &str, + query_data: &SQLQueryData, ) -> Option { - let query_config = self.find_query_config(query)?; + let query_config = self.find_query_config_sql(query_data)?; // Output labels are the GROUP BY columns (subset of all labels) let query_output_labels = KeyByLabelNames::new( diff --git a/asap-query-engine/src/tests/mod.rs b/asap-query-engine/src/tests/mod.rs index 3457e5b..06ac375 100644 --- a/asap-query-engine/src/tests/mod.rs +++ b/asap-query-engine/src/tests/mod.rs @@ -3,6 +3,7 @@ pub mod datafusion; pub mod elastic_forwarding_tests; pub mod prometheus_forwarding_tests; pub mod query_equivalence_tests; +pub mod sql_pattern_matching_tests; pub mod trait_design_tests; #[cfg(test)] diff --git a/asap-query-engine/src/tests/sql_pattern_matching_tests.rs b/asap-query-engine/src/tests/sql_pattern_matching_tests.rs new file mode 100644 index 0000000..d9315fc --- /dev/null +++ b/asap-query-engine/src/tests/sql_pattern_matching_tests.rs @@ -0,0 +1,195 @@ +//! Tests for SQL query pattern matching against inference_config templates. +//! +//! Verifies that incoming SQL queries with absolute timestamps are correctly matched +//! against NOW()-based template queries in the inference_config. + +#[cfg(test)] +mod tests { + use crate::data_model::{ + AggregationConfig, AggregationReference, CleanupPolicy, InferenceConfig, QueryConfig, + QueryLanguage, SchemaConfig, StreamingConfig, + }; + use crate::engines::simple_engine::SimpleEngine; + use crate::stores::simple_map_store::SimpleMapStore; + use promql_utilities::data_model::KeyByLabelNames; + use sql_utilities::sqlhelper::{SQLSchema, Table}; + use std::collections::{HashMap, HashSet}; + use std::sync::Arc; + + /// Build a minimal SQL SimpleEngine with one template query config. + /// + /// * `template_sql` — the NOW()-based query stored in inference_config + /// * `agg_id` — aggregation id + /// * `window_secs` — window size in seconds + fn build_sql_engine(template_sql: &str, agg_id: u64, window_secs: u64) -> SimpleEngine { + // Schema: cpu_usage table + let labels: HashSet = ["L1", "L2", "L3", "L4"] + .iter() + .map(|s| s.to_string()) + .collect(); + let value_cols: HashSet = ["value"].iter().map(|s| s.to_string()).collect(); + let table = Table::new( + "cpu_usage".to_string(), + "time".to_string(), + value_cols, + labels.clone(), + ); + let sql_schema = SQLSchema::new(vec![table]); + + // Query config with the template + let query_config = QueryConfig::new(template_sql.to_string()) + .add_aggregation(AggregationReference::new(agg_id, None)); + + let inference_config = InferenceConfig { + schema: SchemaConfig::SQL(sql_schema), + query_configs: vec![query_config], + cleanup_policy: CleanupPolicy::NoCleanup, + }; + + // Streaming config + let agg_config = AggregationConfig { + aggregation_id: agg_id, + aggregation_type: "SumAccumulator".to_string(), + aggregation_sub_type: String::new(), + parameters: HashMap::new(), + grouping_labels: KeyByLabelNames::new( + ["L1", "L2", "L3", "L4"] + .iter() + .map(|s| s.to_string()) + .collect(), + ), + aggregated_labels: KeyByLabelNames::empty(), + rollup_labels: KeyByLabelNames::empty(), + original_yaml: String::new(), + window_size: window_secs, + slide_interval: window_secs, + window_type: "tumbling".to_string(), + tumbling_window_size: window_secs, + spatial_filter: String::new(), + spatial_filter_normalized: String::new(), + metric: "cpu_usage".to_string(), + num_aggregates_to_retain: None, + read_count_threshold: None, + table_name: None, + value_column: None, + }; + + let mut agg_configs = HashMap::new(); + agg_configs.insert(agg_id, agg_config); + let streaming_config = Arc::new(StreamingConfig { + aggregation_configs: agg_configs, + }); + + let store = Arc::new(SimpleMapStore::new( + streaming_config.clone(), + CleanupPolicy::NoCleanup, + )); + + SimpleEngine::new( + store, + inference_config, + streaming_config, + 1, + QueryLanguage::sql, + ) + } + + #[test] + fn test_temporal_query_matches_now_template() { + // Template in inference_config uses NOW() + let template = "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4"; + let engine = build_sql_engine(template, 1, 10); + + // Incoming query uses absolute timestamps for the same 10s window + let incoming = "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, '2025-10-01 00:00:10') AND '2025-10-01 00:00:10' GROUP BY L1, L2, L3, L4"; + let query_time = 1727740810.0_f64; // '2025-10-01 00:00:10' as unix seconds + + let context = engine.build_query_execution_context_sql(incoming.to_string(), query_time); + assert!( + context.is_some(), + "Expected build_query_execution_context_sql to return Some, got None. \ + The incoming query with absolute timestamps was not matched against the NOW() template." + ); + } + + #[test] + fn test_spatiotemporal_query_matches_now_template() { + // SpatioTemporal: same metric, spans multiple intervals, GROUP BY subset of labels + let template = "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1"; + let engine = build_sql_engine(template, 1, 10); + + let incoming = "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, '2025-10-01 00:00:10') AND '2025-10-01 00:00:10' GROUP BY L1"; + let query_time = 1727740810.0_f64; + + let context = engine.build_query_execution_context_sql(incoming.to_string(), query_time); + assert!( + context.is_some(), + "Expected build_query_execution_context_sql to return Some for spatiotemporal query, got None." + ); + } + + #[test] + fn test_spatial_query_matches_now_template() { + // Spatial: window equals the scrape interval (1s), GROUP BY all labels + let template = "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, NOW()) AND NOW() GROUP BY L1, L2, L3, L4"; + // scrape_interval=1, window=1 → classified as Spatial by the matcher + let engine = build_sql_engine(template, 1, 1); + + let incoming = "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -1, '2025-10-01 00:00:10') AND '2025-10-01 00:00:10' GROUP BY L1, L2, L3, L4"; + let query_time = 1727740810.0_f64; + + let context = engine.build_query_execution_context_sql(incoming.to_string(), query_time); + assert!( + context.is_some(), + "Expected build_query_execution_context_sql to return Some for spatial query, got None." + ); + } + + #[test] + fn test_temporal_quantile_query_matches_now_template() { + // TemporalQuantile: QUANTILE aggregation, window > scrape interval, GROUP BY all labels + let template = "SELECT QUANTILE(0.95, value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4"; + let engine = build_sql_engine(template, 1, 10); + + let incoming = "SELECT QUANTILE(0.95, value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, '2025-10-01 00:00:10') AND '2025-10-01 00:00:10' GROUP BY L1, L2, L3, L4"; + let query_time = 1727740810.0_f64; + + let context = engine.build_query_execution_context_sql(incoming.to_string(), query_time); + assert!( + context.is_some(), + "Expected build_query_execution_context_sql to return Some for temporal quantile query, got None." + ); + } + + #[test] + fn test_spatial_of_temporal_subquery_matches_now_template() { + // Spatial-of-temporal: outer GROUP BY L1 (subset), inner GROUP BY all labels + let template = "SELECT SUM(result) FROM (SELECT SUM(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4) GROUP BY L1"; + let engine = build_sql_engine(template, 1, 10); + + let incoming = "SELECT SUM(result) FROM (SELECT SUM(value) AS result FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, '2025-10-01 00:00:10') AND '2025-10-01 00:00:10' GROUP BY L1, L2, L3, L4) GROUP BY L1"; + let query_time = 1727740810.0_f64; + + let context = engine.build_query_execution_context_sql(incoming.to_string(), query_time); + assert!( + context.is_some(), + "Expected build_query_execution_context_sql to return Some for spatial-of-temporal subquery, got None." + ); + } + + #[test] + fn test_no_match_returns_none() { + // Engine has a SUM template; incoming uses AVG — should never match + let template = "SELECT SUM(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4"; + let engine = build_sql_engine(template, 1, 10); + + let incoming = "SELECT AVG(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, '2025-10-01 00:00:10') AND '2025-10-01 00:00:10' GROUP BY L1, L2, L3, L4"; + let query_time = 1727740810.0_f64; + + let context = engine.build_query_execution_context_sql(incoming.to_string(), query_time); + assert!( + context.is_none(), + "Expected build_query_execution_context_sql to return None for a query that doesn't match the template, got Some." + ); + } +}