From 2e9e3382c418d9632c8444bdbf587c0a91a99bbc Mon Sep 17 00:00:00 2001 From: benjamib112 Date: Thu, 19 Mar 2026 20:57:53 -0400 Subject: [PATCH 1/5] preliminary migration to new repo, filepaths not yet fixed --- .../asap_h2o_queries.sql | 249 ++++++++++++ .../asap_mode_queries.sql | 249 ++++++++++++ .../asap_benchmark_pipeline/compare_values.py | 79 ++++ .../asap_benchmark_pipeline/h2o_init.sql | 40 ++ .../inference_config.yaml | 17 + .../asap_benchmark_pipeline/run_benchmark.py | 289 ++++++++++++++ .../asap_benchmark_pipeline/run_commands.txt | 77 ++++ .../asap_benchmark_pipeline/run_experiment.py | 254 +++++++++++++ .../asap_benchmark_pipeline/run_pipeline.sh | 353 ++++++++++++++++++ .../streaming_config.yaml | 22 ++ 10 files changed, 1629 insertions(+) create mode 100644 asap-tools/execution-utilities/asap_benchmark_pipeline/asap_h2o_queries.sql create mode 100644 asap-tools/execution-utilities/asap_benchmark_pipeline/asap_mode_queries.sql create mode 100644 asap-tools/execution-utilities/asap_benchmark_pipeline/compare_values.py create mode 100644 asap-tools/execution-utilities/asap_benchmark_pipeline/h2o_init.sql create mode 100644 asap-tools/execution-utilities/asap_benchmark_pipeline/inference_config.yaml create mode 100644 asap-tools/execution-utilities/asap_benchmark_pipeline/run_benchmark.py create mode 100644 asap-tools/execution-utilities/asap_benchmark_pipeline/run_commands.txt create mode 100755 asap-tools/execution-utilities/asap_benchmark_pipeline/run_experiment.py create mode 100755 asap-tools/execution-utilities/asap_benchmark_pipeline/run_pipeline.sh create mode 100644 asap-tools/execution-utilities/asap_benchmark_pipeline/streaming_config.yaml diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/asap_h2o_queries.sql b/asap-tools/execution-utilities/asap_benchmark_pipeline/asap_h2o_queries.sql new file mode 100644 index 0000000..a844553 --- /dev/null +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/asap_h2o_queries.sql @@ -0,0 +1,249 @@ +-- Q1: Sum v1 by id1 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:11:10') AND '1971-01-01 00:11:10' +GROUP BY id1, id2, id3; + +-- Q2: Sum v1 by id1:id2 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:13:10') AND '1971-01-01 00:13:10' +GROUP BY id1, id2, id3; + +-- Q3: Sum v1 mean v3 by id3 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:15:10') AND '1971-01-01 00:15:10' +GROUP BY id1, id2, id3; + +-- Q4: Mean v1:v3 by id4 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:17:10') AND '1971-01-01 00:17:10' +GROUP BY id1, id2, id3; + +-- Q5: Sum v1:v3 by id6 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:19:10') AND '1971-01-01 00:19:10' +GROUP BY id1, id2, id3; + +-- Q6: Sum v1 by id1 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:21:10') AND '1971-01-01 00:21:10' +GROUP BY id1, id2, id3; + +-- Q7: Sum v1 by id1:id2 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:23:10') AND '1971-01-01 00:23:10' +GROUP BY id1, id2, id3; + +-- Q8: Sum v1 mean v3 by id3 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:25:10') AND '1971-01-01 00:25:10' +GROUP BY id1, id2, id3; + +-- Q9: Mean v1:v3 by id4 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:27:10') AND '1971-01-01 00:27:10' +GROUP BY id1, id2, id3; + +-- Q10: Sum v1:v3 by id6 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:29:10') AND '1971-01-01 00:29:10' +GROUP BY id1, id2, id3; + +-- Q11: Sum v1 by id1 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:31:10') AND '1971-01-01 00:31:10' +GROUP BY id1, id2, id3; + +-- Q12: Sum v1 by id1:id2 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:33:10') AND '1971-01-01 00:33:10' +GROUP BY id1, id2, id3; + +-- Q13: Sum v1 mean v3 by id3 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:35:10') AND '1971-01-01 00:35:10' +GROUP BY id1, id2, id3; + +-- Q14: Mean v1:v3 by id4 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:37:10') AND '1971-01-01 00:37:10' +GROUP BY id1, id2, id3; + +-- Q15: Sum v1:v3 by id6 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:39:10') AND '1971-01-01 00:39:10' +GROUP BY id1, id2, id3; + +-- Q16: Sum v1 by id1 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:41:10') AND '1971-01-01 00:41:10' +GROUP BY id1, id2, id3; + +-- Q17: Sum v1 by id1:id2 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:43:10') AND '1971-01-01 00:43:10' +GROUP BY id1, id2, id3; + +-- Q18: Sum v1 mean v3 by id3 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:45:10') AND '1971-01-01 00:45:10' +GROUP BY id1, id2, id3; + +-- Q19: Mean v1:v3 by id4 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:47:10') AND '1971-01-01 00:47:10' +GROUP BY id1, id2, id3; + +-- Q20: Sum v1:v3 by id6 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:49:10') AND '1971-01-01 00:49:10' +GROUP BY id1, id2, id3; + +-- Q21: Sum v1 by id1 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:51:10') AND '1971-01-01 00:51:10' +GROUP BY id1, id2, id3; + +-- Q22: Sum v1 by id1:id2 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:53:10') AND '1971-01-01 00:53:10' +GROUP BY id1, id2, id3; + +-- Q23: Sum v1 mean v3 by id3 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:55:10') AND '1971-01-01 00:55:10' +GROUP BY id1, id2, id3; + +-- Q24: Mean v1:v3 by id4 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:57:10') AND '1971-01-01 00:57:10' +GROUP BY id1, id2, id3; + +-- Q25: Sum v1:v3 by id6 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:59:10') AND '1971-01-01 00:59:10' +GROUP BY id1, id2, id3; + +-- Q26: Sum v1 by id1 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:01:10') AND '1971-01-01 01:01:10' +GROUP BY id1, id2, id3; + +-- Q27: Sum v1 by id1:id2 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:03:10') AND '1971-01-01 01:03:10' +GROUP BY id1, id2, id3; + +-- Q28: Sum v1 mean v3 by id3 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:05:10') AND '1971-01-01 01:05:10' +GROUP BY id1, id2, id3; + +-- Q29: Mean v1:v3 by id4 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:07:10') AND '1971-01-01 01:07:10' +GROUP BY id1, id2, id3; + +-- Q30: Sum v1:v3 by id6 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:09:10') AND '1971-01-01 01:09:10' +GROUP BY id1, id2, id3; + +-- Q31: Sum v1 by id1 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:11:10') AND '1971-01-01 01:11:10' +GROUP BY id1, id2, id3; + +-- Q32: Sum v1 by id1:id2 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:13:10') AND '1971-01-01 01:13:10' +GROUP BY id1, id2, id3; + +-- Q33: Sum v1 mean v3 by id3 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:15:10') AND '1971-01-01 01:15:10' +GROUP BY id1, id2, id3; + +-- Q34: Mean v1:v3 by id4 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:17:10') AND '1971-01-01 01:17:10' +GROUP BY id1, id2, id3; + +-- Q35: Sum v1:v3 by id6 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:19:10') AND '1971-01-01 01:19:10' +GROUP BY id1, id2, id3; + +-- Q36: Sum v1 by id1 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:21:10') AND '1971-01-01 01:21:10' +GROUP BY id1, id2, id3; + +-- Q37: Sum v1 by id1:id2 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:23:10') AND '1971-01-01 01:23:10' +GROUP BY id1, id2, id3; + +-- Q38: Sum v1 mean v3 by id3 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:25:10') AND '1971-01-01 01:25:10' +GROUP BY id1, id2, id3; + +-- Q39: Mean v1:v3 by id4 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:27:10') AND '1971-01-01 01:27:10' +GROUP BY id1, id2, id3; + +-- Q40: Sum v1:v3 by id6 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:29:10') AND '1971-01-01 01:29:10' +GROUP BY id1, id2, id3; + +-- Q41: Sum v1 by id1 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:31:10') AND '1971-01-01 01:31:10' +GROUP BY id1, id2, id3; + +-- Q42: Sum v1 by id1:id2 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:33:10') AND '1971-01-01 01:33:10' +GROUP BY id1, id2, id3; + +-- Q43: Sum v1 mean v3 by id3 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:35:10') AND '1971-01-01 01:35:10' +GROUP BY id1, id2, id3; + +-- Q44: Mean v1:v3 by id4 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:37:10') AND '1971-01-01 01:37:10' +GROUP BY id1, id2, id3; + +-- Q45: Sum v1:v3 by id6 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:39:10') AND '1971-01-01 01:39:10' +GROUP BY id1, id2, id3; + +-- Q46: Sum v1 by id1 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:41:10') AND '1971-01-01 01:41:10' +GROUP BY id1, id2, id3; + +-- Q47: Sum v1 by id1:id2 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:43:10') AND '1971-01-01 01:43:10' +GROUP BY id1, id2, id3; + +-- Q48: Sum v1 mean v3 by id3 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:45:10') AND '1971-01-01 01:45:10' +GROUP BY id1, id2, id3; + +-- Q49: Mean v1:v3 by id4 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:47:10') AND '1971-01-01 01:47:10' +GROUP BY id1, id2, id3; + +-- Q50: Sum v1:v3 by id6 +SELECT quantile(0.95)(v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:49:10') AND '1971-01-01 01:49:10' +GROUP BY id1, id2, id3; diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/asap_mode_queries.sql b/asap-tools/execution-utilities/asap_benchmark_pipeline/asap_mode_queries.sql new file mode 100644 index 0000000..4c0524e --- /dev/null +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/asap_mode_queries.sql @@ -0,0 +1,249 @@ +-- Q1: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q2: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q3: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q4: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q5: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q6: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q7: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q8: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q9: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q10: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q11: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q12: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q13: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q14: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q15: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q16: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q17: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q18: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q19: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q20: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q21: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q22: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q23: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q24: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q25: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q26: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q27: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q28: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q29: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q30: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q31: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q32: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q33: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q34: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q35: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q36: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q37: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q38: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q39: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q40: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q41: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q42: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q43: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q44: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q45: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q46: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q47: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q48: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q49: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; + +-- Q50: p95 v1 by id1 id2 id3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby +WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() +GROUP BY id1, id2, id3; diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/compare_values.py b/asap-tools/execution-utilities/asap_benchmark_pipeline/compare_values.py new file mode 100644 index 0000000..092a0a7 --- /dev/null +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/compare_values.py @@ -0,0 +1,79 @@ +import argparse +import csv +import matplotlib.pyplot as plt +import re +import numpy as np + +def extract_value(result_str): + """Extracts the first numerical value from the result preview string.""" + if not result_str: + return 0.0 + # Match integers or floats + match = re.search(r"[-+]?\d*\.\d+|\d+", result_str) + if match: + return float(match.group()) + return 0.0 + +def load_results(csv_file): + values = [] + try: + with open(csv_file, 'r', encoding='utf-8') as f: + reader = csv.DictReader(f) + for row in reader: + if row['error']: + values.append(0.0) + else: + values.append(extract_value(row['result_preview'])) + except FileNotFoundError: + print(f"✗ Could not find {csv_file}") + return [] + return values + +def main(): + parser = argparse.ArgumentParser(description="Compare computed values from Baseline and ASAP runs.") + parser.add_argument("--baseline", default="baseline_results.csv", help="Baseline CSV file") + parser.add_argument("--asap", default="asap_results_run1.csv", help="ASAP CSV file") + parser.add_argument("--output", default="value_comparison.png", help="Output image file") + + args = parser.parse_args() + + baseline_values = load_results(args.baseline) + asap_values = load_results(args.asap) + + if not baseline_values or not asap_values: + print("Missing data. Please make sure both CSVs exist and have data.") + return + + # Ensure we only compare up to the matched length in case one failed early + min_len = min(len(baseline_values), len(asap_values)) + baseline_values = baseline_values[:min_len] + asap_values = asap_values[:min_len] + + # --- Plotting Code --- + plt.figure(figsize=(12, 6)) + + execution_order = np.arange(1, min_len + 1) + bar_width = 0.4 + + # Create grouped bars + plt.bar(execution_order - bar_width/2, baseline_values, width=bar_width, + label='Baseline (Exact)', color='#1f77b4', edgecolor='black') + plt.bar(execution_order + bar_width/2, asap_values, width=bar_width, + label='ASAP (Approximate)', color='#ff7f0e', edgecolor='black') + + plt.xlabel("Query Execution Order", fontsize=12, fontweight='bold') + plt.ylabel("Computed Value (95th Quantile)", fontsize=12, fontweight='bold') + plt.title("Query Output Comparison: Exact vs Approximate", fontsize=14, fontweight='bold') + + # Set tick marks at every 10 on the X axis + plt.xticks(np.arange(0, min_len + 1, 10)) + + plt.legend(loc='upper right') + plt.grid(axis='y', linestyle='--', alpha=0.7) + plt.tight_layout() + + plt.savefig(args.output) + print(f"✓ Value comparison graph successfully saved to {args.output}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/h2o_init.sql b/asap-tools/execution-utilities/asap_benchmark_pipeline/h2o_init.sql new file mode 100644 index 0000000..2f62d03 --- /dev/null +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/h2o_init.sql @@ -0,0 +1,40 @@ +DROP VIEW IF EXISTS h2o_groupby_mv; +DROP TABLE IF EXISTS h2o_groupby; +DROP TABLE IF EXISTS h2o_groupby_queue; + +CREATE TABLE IF NOT EXISTS h2o_groupby_queue +( + timestamp String, + id1 String, + id2 String, + id3 String, + id4 Int32, + id5 Int32, + id6 Int32, + v1 Int32, + v2 Int32, + v3 Float64 +) ENGINE = Kafka +SETTINGS kafka_broker_list = 'localhost:9092', + kafka_topic_list = 'h2o_groupby', + kafka_group_name = 'clickhouse_h2o', + kafka_format = 'JSONEachRow'; + +CREATE TABLE IF NOT EXISTS h2o_groupby +( + timestamp DateTime, + id1 String, + id2 String, + id3 String, + id4 Int32, + id5 Int32, + id6 Int32, + v1 Int32, + v2 Int32, + v3 Float64 +) ENGINE = MergeTree +ORDER BY (id1, id2, id3, id4); + +CREATE MATERIALIZED VIEW IF NOT EXISTS h2o_groupby_mv TO h2o_groupby AS +SELECT parseDateTimeBestEffort(timestamp) AS timestamp, id1, id2, id3, id4, id5, id6, v1, v2, v3 +FROM h2o_groupby_queue; \ No newline at end of file diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/inference_config.yaml b/asap-tools/execution-utilities/asap_benchmark_pipeline/inference_config.yaml new file mode 100644 index 0000000..b0f3dc2 --- /dev/null +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/inference_config.yaml @@ -0,0 +1,17 @@ +tables: + - name: h2o_groupby + time_column: timestamp + metadata_columns: [id1, id2, id3] + value_columns: [v1] + +cleanup_policy: + name: read_based + +queries: +- aggregations: + - aggregation_id: 12 + read_count_threshold: 999999 + query: |- + SELECT QUANTILE(0.95, v1) FROM h2o_groupby + WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() + GROUP BY id1, id2, id3; \ No newline at end of file diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/run_benchmark.py b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_benchmark.py new file mode 100644 index 0000000..d9d0c74 --- /dev/null +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_benchmark.py @@ -0,0 +1,289 @@ +from typing import Tuple, List, Optional +from pathlib import Path +from datetime import datetime, timedelta +import re +import argparse +import os +import gdown +import requests +import urllib.parse +import time +import csv +import matplotlib.pyplot as plt + +def extract_queries_from_sql(sql_file: Path) -> List[Tuple[str, str]]: + """Extract query ID and SQL from asap_h2o_queries.sql""" + queries = [] + with open(sql_file, "r") as f: + content = f.read() + + pattern = r"-- ([A-Za-z0-9_]+):[^\n]*\n(SELECT[^;]+;)" + matches = re.findall(pattern, content, re.DOTALL | re.IGNORECASE) + + for query_id, sql in matches: + sql = sql.strip() + queries.append((query_id, sql)) + + return queries + +def data_loaded(clickhouse_url: str): + try: + response = requests.post(clickhouse_url, data="SELECT count(*) FROM h2o_groupby") + if response.status_code != 200: + return False + count = int(response.text.strip()) + if count > 0: + print(f"✓ Data already loaded ({count:,} rows)") + return count > 0 + except: + return False + +def stream_csv_with_timestamps(filename: str): + """ + Generator that reads the CSV and prepends a timestamp column. + Starts at 1971-01-01 00:00:00 and increments by 10s every 100 rows. + """ + start_time = datetime(1971, 1, 1, 0, 0, 0) + + with open(filename, 'r', encoding='utf-8') as f: + header = f.readline().strip() + yield f"timestamp,{header}\n".encode('utf-8') + + chunk = bytearray() + row_count = 0 + ts_bytes = b"" + + for line in f: + if row_count % 100 == 0: + delta_seconds = (row_count // 100) * 10 + current_time = start_time + timedelta(seconds=delta_seconds) + ts_str = current_time.strftime('%Y-%m-%dT%H:%M:%SZ') + ',' + ts_bytes = ts_str.encode('utf-8') + + chunk.extend(ts_bytes) + chunk.extend(line.encode('utf-8')) + row_count += 1 + + if len(chunk) > 65536: + yield bytes(chunk) + chunk = bytearray() + + if chunk: + yield bytes(chunk) + +def load_h2o_data(clickhouse_url: str, mode: str): + # 1. SETUP TABLES + try: + with open("h2o_init.sql", 'r') as f: + file_content = f.read() + except FileNotFoundError: + print("✗ Error: h2o_init.sql not found.") + return False + + statements = [s.strip() for s in file_content.split(';') if s.strip()] + print(f"Executing {len(statements)} setup statements...") + + try: + for sql in statements: + response = requests.post(clickhouse_url, data=sql) + response.raise_for_status() + except Exception as e: + print(f"Error executing statement: {e}") + return False + + print("✓ Created h2o_groupby tables and views") + + if data_loaded(clickhouse_url): + return True + + # 2. DOWNLOAD DATA + FILE_ID = "15SVQjQ2QehzYDLoDonio4aP7xqdMiNyi" + FILENAME = "G1_1e7_1e2_0_0.csv" + + if os.path.exists(FILENAME) and os.path.getsize(FILENAME) > 100 * 1024 * 1024: + print(f"File {FILENAME} already exists. Skipping download.") + else: + print(f"Downloading H2O dataset (ID: {FILE_ID}) using gdown...") + url = f"https://drive.google.com/uc?id={FILE_ID}" + gdown.download(url, FILENAME, quiet=False) + + # 3. INSERT DATA VIA HTTP + if mode == "asap": + print("Publishing data to Kafka via ClickHouse HTTP (ASAP mode)...") + insert_query = "INSERT INTO h2o_groupby_queue FORMAT CSVWithNames" + else: + print("Inserting data directly into ClickHouse MergeTree (Baseline mode)...") + insert_query = "INSERT INTO h2o_groupby FORMAT CSVWithNames" + + url = f"{clickhouse_url.rstrip('/')}/" + params = {"query": insert_query} + + try: + response = requests.post(url, params=params, data=stream_csv_with_timestamps(FILENAME)) + if response.status_code != 200: + print(f"✗ Error loading data: {response.text}") + return False + except Exception as e: + print(f"✗ Exception during data load: {e}") + return False + + if mode == "asap": + print("Waiting for materialized view to consume all rows from Kafka...") + prev_count = -1 + stable_rounds = 0 + while stable_rounds < 3: + time.sleep(5) + response = requests.post(clickhouse_url, data="SELECT count(*) FROM h2o_groupby") + count = int(response.text.strip()) + print(f" h2o_groupby row count: {count:,}") + if count == prev_count: + stable_rounds += 1 + else: + stable_rounds = 0 + prev_count = count + else: + response = requests.post(clickhouse_url, data="SELECT count(*) FROM h2o_groupby") + count = int(response.text.strip()) + + print(f"✓ Loaded {count:,} rows") + + return True + +def run_query(query: str, endpoint_url: str, session: requests.Session, timeout: int = 30) -> Tuple[float, Optional[str], Optional[str]]: + encoded_query = urllib.parse.quote(query) + + if "?" in endpoint_url: + url = f"{endpoint_url}&query={encoded_query}" + else: + url = f"{endpoint_url}?query={encoded_query}" + + try: + start_time = time.time() + response = session.get(url, timeout=timeout) + latency_ms = (time.time() - start_time) * 1000 + + if response.status_code == 200: + return latency_ms, response.text.strip(), None + else: + return latency_ms, None, f"HTTP {response.status_code}: {response.text}" + + except requests.Timeout: + return timeout * 1000, None, "Timeout" + except Exception as e: + return 0, None, str(e) + +def run_benchmark(sql_file: Path, endpoint_url: str, output_csv: Path, mode: str, load_data: bool, query_filter: Optional[List[str]] = None): + print(f"\nRunning benchmark in {mode} mode...") + print(f"Endpoint: {endpoint_url}") + print(f"Output: {output_csv}") + + queries = extract_queries_from_sql(sql_file) + if query_filter: + queries = [(qid, sql) for qid, sql in queries if qid in query_filter] + print(f"Found {len(queries)} queries") + + session = requests.Session() + + # Lists to store plotting data + plot_query_ids = [] + plot_latencies = [] + + with open(output_csv, "w", newline="") as csvfile: + writer = csv.writer(csvfile) + writer.writerow(["query_id", "latency_ms", "result_rows", "result_preview", "error", "mode"]) + + for query_id, sql in queries: + print(f"Running {query_id}...", end=" ", flush=True) + + latency_ms, result, error = run_query(sql, endpoint_url, session) + + if error: + print(f"✗ {error}") + writer.writerow([query_id, latency_ms, 0, "", error, mode]) + # Append 0 for failed queries on the plot to show they failed + plot_query_ids.append(query_id) + plot_latencies.append(0.0) + else: + result_lines = result.strip().split("\n") if result else [] + num_rows = len(result_lines) + preview = result[:100].replace("\n", " | ") if result else "" + print(f"✓ {latency_ms:.2f}ms ({num_rows} rows)") + writer.writerow([query_id, f"{latency_ms:.2f}", num_rows, preview, "", mode]) + + plot_query_ids.append(query_id) + plot_latencies.append(latency_ms) + + time.sleep(0.1) + + print(f"\n✓ Results saved to {output_csv}") + + # --- Plotting Code --- + if plot_latencies: + plt.figure(figsize=(10, 6)) + + # Give ASAP and Baseline distinct colors + bar_color = '#1f77b4' if mode == 'baseline' else '#ff7f0e' + + # Create a numerical X-axis (1, 2, 3...) + execution_order = list(range(1, len(plot_latencies) + 1)) + + plt.bar(execution_order, plot_latencies, color=bar_color, edgecolor='black') + + plt.xlabel("Query Execution Order", fontsize=12, fontweight='bold') + plt.ylabel("Latency (ms)", fontsize=12, fontweight='bold') + + # Set tick marks at every 10 on the X axis + max_order = len(execution_order) + plt.xticks(range(0, max_order + 1, 10)) + + # Build dynamic title based on parameters + load_text = "With Data Loading" if load_data else "Without Data Loading" + plt.title(f"Query Latency - {mode.upper()} Mode ({load_text})", fontsize=14, fontweight='bold') + + plt.grid(axis='y', linestyle='--', alpha=0.7) + plt.tight_layout() + + # Save plot to the same directory as the output CSV, replacing the extension with .png + plot_output = output_csv.with_suffix(".png") + plt.savefig(plot_output) + print(f"✓ Graph successfully saved to {plot_output}") + + +def main(): + parser = argparse.ArgumentParser(description="Benchmark ASAP queries on H2o data") + parser.add_argument("--mode", choices=["baseline", "asap"], default="asap", help="ASAP mode (default) or Baseline (ClickHouse only)") + parser.add_argument("--load-data", action="store_true", help="Load H2o data into ClickHouse") + parser.add_argument("--clickhouse-url", default="http://localhost:8123", help="ClickHouse server URL") + parser.add_argument("--asap-url", default="http://localhost:8088/clickhouse/query", help="ASAP QueryEngine URL") + parser.add_argument("--output", default="asap_results.csv", help="Output CSV file") + parser.add_argument("--sql-file", default=None, help="SQL file to use (default: asap_h2o_queries.sql)") + parser.add_argument("--filter", default=None, help="Comma-separated query IDs to run (e.g. T5,T6)") + + args = parser.parse_args() + + output_path = Path(args.output) + if output_path.exists() and output_path.is_dir(): + print(f"Error: Output {output_path} is a directory. Please specify a file path (e.g., results.csv)") + return 1 + + if args.sql_file: + sql_file = Path(args.sql_file) + elif args.mode == "asap": + sql_file = Path(__file__).parent / "asap_mode_queries.sql" + else: + sql_file = Path(__file__).parent / "asap_h2o_queries.sql" + + if args.load_data: + if not load_h2o_data(args.clickhouse_url, args.mode): + print("Failed to load data") + return 1 + + endpoint = args.clickhouse_url if args.mode == "baseline" else args.asap_url + query_filter = [q.strip() for q in args.filter.split(",")] if args.filter else None + + # Notice we pass args.load_data so the plotting logic knows whether data was loaded + run_benchmark(sql_file, endpoint, output_path, args.mode, args.load_data, query_filter) + return 0 + +if __name__ == "__main__": + exit(main()) \ No newline at end of file diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/run_commands.txt b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_commands.txt new file mode 100644 index 0000000..53bf34f --- /dev/null +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_commands.txt @@ -0,0 +1,77 @@ +# baseline mode + +cd ~/asap-internal/Utilities/installation/kafka +./run.sh kafka/ + +cd ~/asap-internal/Utilities/installation/clickhouse +./run.sh clickhouse/ + +curl -s "http://localhost:8123/" -d "TRUNCATE TABLE h2o_groupby" + +python3 run_benchmark.py --output asap_results.csv --load-data --mode baseline + +# asap mode + +Setup +------ + +1. Edit adapter config in ~/asap-internal/QueryEngineRust/src/main.rs to: + +let adapter_config = AdapterConfig::clickhouse_sql( + "http://localhost:8123".to_string(), // ClickHouse server URL + "default".to_string(), // Database name + true, // Always forward (fallback for every query) +); + +2. Compile the query engine: + +cd ~/asap-internal/QueryEngineRust +cargo build --release + +Run +------ + +1. Launch Arroyo: + +cd ~/asap-internal/arroyo +./target/release/arroyo --config ~/asap-internal/ArroyoSketch/config.yaml cluster \ + > /tmp/arroyo.log 2>&1 & + +2. Submit pipeline: + +cd ~/asap-internal/ArroyoSketch +python3 run_arroyosketch.py \ + --source_type kafka \ + --kafka_input_format json \ + --input_kafka_topic h2o_groupby \ + --output_format json \ + --pipeline_name asap_h2o_pipeline \ + --config_file_path ~/asap-internal/ExecutionUtilities/asap_query_latency/streaming_config.yaml \ + --output_kafka_topic sketch_topic \ + --output_dir ./outputs \ + --parallelism 1 \ + --query_language sql + +3. Wait until pipeline is running: + +(Optional) Check output on localhost:8000 + +4. Load data through Kafka so Arroyo can build sketches: + +python3 run_benchmark.py --load-data --mode asap + +5. Start QueryEngine: + +cd ~/asap-internal/QueryEngineRust +nohup ./target/release/query_engine_rust \ + --kafka-topic sketch_topic --input-format json \ + --config ~/asap-internal/ExecutionUtilities/asap_query_latency/inference_config.yaml \ + --streaming-config ~/asap-internal/ExecutionUtilities/asap_query_latency/streaming_config.yaml \ + --http-port 8088 --delete-existing-db --log-level info \ + --output-dir ./output --streaming-engine arroyo \ + --query-language SQL --lock-strategy per-key \ + --prometheus-scrape-interval 1 > /tmp/query_engine.log 2>&1 & + +6. Run ASAP mode: + +python3 run_benchmark.py --mode asap --load-data --output asap_results_run1.csv diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/run_experiment.py b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_experiment.py new file mode 100755 index 0000000..07a9e65 --- /dev/null +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_experiment.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 +""" +Run baseline and ASAP experiments, then generate comparison graphs. + +Usage: + # Full run from scratch (starts all infrastructure): + python3 run_experiment.py --runs 3 --load-data + + # Infra already running, data already loaded: + python3 run_experiment.py --runs 3 --skip-infra + + # Skip baseline (use existing baseline_results.csv): + python3 run_experiment.py --runs 3 --skip-infra --skip-baseline + + # Quick single-run comparison: + python3 run_experiment.py --runs 1 --skip-infra +""" + +import argparse +import subprocess +import sys +import csv +from pathlib import Path +from datetime import datetime + +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +import numpy as np + + +def run_pipeline(mode, output_file, load_data=False, skip_infra=False): + """Invoke run_pipeline.sh and return True on success.""" + script_dir = Path(__file__).parent + cmd = [ + "bash", str(script_dir / "run_pipeline.sh"), + "--mode", mode, + "--output", str(output_file), + ] + if load_data: + cmd.append("--load-data") + if skip_infra: + cmd.append("--skip-infra") + + print(f"\n{'='*60}") + print(f"Running: {' '.join(cmd)}") + print(f"{'='*60}\n") + + result = subprocess.run(cmd, check=False) + return result.returncode == 0 + + +def load_csv(csv_file): + """Return (query_ids, latencies_ms, errors) from a benchmark CSV.""" + query_ids, latencies, errors = [], [], [] + try: + with open(csv_file, newline="") as f: + for row in csv.DictReader(f): + query_ids.append(row["query_id"]) + try: + latencies.append(float(row["latency_ms"])) + except (ValueError, KeyError): + latencies.append(0.0) + errors.append(bool(row.get("error", "").strip())) + except FileNotFoundError: + print(f" Warning: {csv_file} not found") + return query_ids, latencies, errors + + +def generate_comparison_graphs(baseline_file, asap_files, output_dir, timestamp): + """Generate side-by-side latency comparison and per-run overlay graphs.""" + b_ids, b_lat, b_err = load_csv(baseline_file) + if not b_lat: + print("No baseline data; skipping graphs") + return + + asap_runs = [] + for f in asap_files: + _, lats, _ = load_csv(f) + if lats: + asap_runs.append(lats) + + if not asap_runs: + print("No ASAP run data; skipping graphs") + return + + n = len(b_lat) + x = np.arange(1, n + 1) + asap_matrix = np.array([r[:n] for r in asap_runs]) # shape (runs, queries) + asap_mean = asap_matrix.mean(axis=0) + asap_std = asap_matrix.std(axis=0) if len(asap_runs) > 1 else np.zeros(n) + + # --- Figure 1: Baseline vs ASAP mean latency (grouped bars) --- + fig, axes = plt.subplots(2, 1, figsize=(14, 12)) + fig.suptitle( + f"ASAP vs Baseline — {len(asap_runs)} ASAP run(s) — {timestamp}", + fontsize=14, fontweight="bold" + ) + + ax = axes[0] + bar_w = 0.38 + ax.bar(x - bar_w / 2, b_lat, bar_w, label="Baseline (exact)", color="#1f77b4", alpha=0.85, edgecolor="black") + ax.bar(x + bar_w / 2, asap_mean, bar_w, label=f"ASAP avg (n={len(asap_runs)})", color="#ff7f0e", alpha=0.85, edgecolor="black") + if len(asap_runs) > 1: + ax.errorbar(x + bar_w / 2, asap_mean, yerr=asap_std, fmt="none", color="black", capsize=2) + ax.set_xlabel("Query Number") + ax.set_ylabel("Latency (ms)") + ax.set_title("Latency per Query") + ax.set_xticks(np.arange(0, n + 1, 5)) + ax.legend() + ax.grid(axis="y", linestyle="--", alpha=0.5) + + # --- Subplot 2: Speedup ratio --- + ax2 = axes[1] + with np.errstate(divide="ignore", invalid="ignore"): + speedup = np.where(asap_mean > 0, np.array(b_lat[:n]) / asap_mean, 0.0) + colors = ["#2ca02c" if s >= 1.0 else "#d62728" for s in speedup] + ax2.bar(x, speedup, color=colors, alpha=0.85, edgecolor="black") + ax2.axhline(1.0, color="black", linestyle="--", linewidth=1, label="1x (no speedup)") + ax2.axhline(2.0, color="gray", linestyle=":", linewidth=1, label="2x target") + ax2.set_xlabel("Query Number") + ax2.set_ylabel("Speedup (Baseline / ASAP)") + ax2.set_title("ASAP Speedup Factor per Query (green = faster, red = slower)") + ax2.set_xticks(np.arange(0, n + 1, 5)) + ax2.legend() + ax2.grid(axis="y", linestyle="--", alpha=0.5) + + out1 = output_dir / f"comparison_{timestamp}.png" + plt.tight_layout() + plt.savefig(out1, dpi=150) + plt.close() + print(f"Saved: {out1}") + + # --- Figure 2: All ASAP runs overlaid (consistency check) --- + if len(asap_runs) > 1: + fig2, ax3 = plt.subplots(figsize=(14, 6)) + colors_runs = plt.cm.tab10.colors + for i, run_lats in enumerate(asap_runs): + ax3.plot(x, run_lats[:n], marker="o", markersize=3, linewidth=1, + label=f"ASAP run {i + 1}", color=colors_runs[i % 10], alpha=0.7) + ax3.plot(x, b_lat, marker="s", markersize=3, linewidth=1.5, + label="Baseline", color="black", linestyle="--") + ax3.set_xlabel("Query Number") + ax3.set_ylabel("Latency (ms)") + ax3.set_title("ASAP Run Consistency — All Runs Overlaid") + ax3.set_xticks(np.arange(0, n + 1, 5)) + ax3.legend() + ax3.grid(linestyle="--", alpha=0.4) + out2 = output_dir / f"asap_runs_overlay_{timestamp}.png" + plt.tight_layout() + plt.savefig(out2, dpi=150) + plt.close() + print(f"Saved: {out2}") + + # --- Summary statistics --- + valid_b = [v for v in b_lat if v > 0] + valid_a = [v for v in asap_mean if v > 0] + if valid_b and valid_a: + print(f"\nSummary Statistics:") + print(f" Baseline — mean: {np.mean(valid_b):7.1f}ms median: {np.median(valid_b):7.1f}ms p95: {np.percentile(valid_b, 95):7.1f}ms") + print(f" ASAP avg — mean: {np.mean(valid_a):7.1f}ms median: {np.median(valid_a):7.1f}ms p95: {np.percentile(valid_a, 95):7.1f}ms") + mean_speedup = np.mean(valid_b) / np.mean(valid_a) + median_speedup = np.median(valid_b) / np.median(valid_a) + print(f" Mean speedup: {mean_speedup:.2f}x") + print(f" Median speedup: {median_speedup:.2f}x") + if mean_speedup < 2.0: + print(f" WARNING: Mean speedup {mean_speedup:.2f}x is below the 2x target.") + print(" Check /tmp/query_engine.log to confirm ASAP is serving from sketches,") + print(" not falling back to ClickHouse for every query.") + + +def main(): + parser = argparse.ArgumentParser(description="Run ASAP vs baseline experiments") + parser.add_argument("--runs", type=int, default=3, help="Number of ASAP runs (default: 3)") + parser.add_argument("--load-data", action="store_true", help="Download and load H2O dataset") + parser.add_argument("--skip-baseline", action="store_true", help="Skip baseline; use existing baseline_results.csv") + parser.add_argument("--skip-infra", action="store_true", help="Skip starting Kafka/ClickHouse (assume already running)") + parser.add_argument("--output-dir", default=".", help="Directory for output files (default: .)") + args = parser.parse_args() + + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + baseline_file = output_dir / "baseline_results.csv" + asap_files = [] + + # ---- Step 1: Baseline ---- + if not args.skip_baseline: + print("\n" + "="*60) + print("STEP 1: Baseline run") + print("="*60) + ok = run_pipeline( + "baseline", baseline_file, + load_data=args.load_data, + skip_infra=args.skip_infra, + ) + if not ok: + print("WARNING: Baseline run reported a non-zero exit code") + else: + print(f"Skipping baseline; using: {baseline_file}") + + # ---- Steps 2..N+1: ASAP runs ---- + for i in range(1, args.runs + 1): + print(f"\n{'='*60}") + print(f"STEP {1 + i}: ASAP run {i}/{args.runs}") + print("="*60) + + asap_file = output_dir / f"asap_results_run{i}_{timestamp}.csv" + + # Load data on the first ASAP run only (it also loads into MergeTree via + # the materialized view, so baseline can reuse that data afterwards if needed) + load_this_run = (i == 1) and args.load_data + + ok = run_pipeline( + "asap", asap_file, + load_data=load_this_run, + skip_infra=args.skip_infra, + ) + if ok or asap_file.exists(): + asap_files.append(asap_file) + else: + print(f"WARNING: ASAP run {i} produced no output") + + # ---- Graphs ---- + print(f"\n{'='*60}") + print("Generating comparison graphs") + print("="*60) + + if baseline_file.exists() and asap_files: + generate_comparison_graphs(baseline_file, asap_files, output_dir, timestamp) + + # Run existing value-accuracy comparison for first ASAP run + script_dir = Path(__file__).parent + values_png = output_dir / f"value_comparison_{timestamp}.png" + subprocess.run([ + "python3", str(script_dir / "compare_values.py"), + "--baseline", str(baseline_file), + "--asap", str(asap_files[0]), + "--output", str(values_png), + ], check=False) + else: + missing = [] + if not baseline_file.exists(): + missing.append(str(baseline_file)) + if not asap_files: + missing.append("(no ASAP results)") + print(f"Missing files, skipping graphs: {', '.join(missing)}") + + print("\nExperiment complete!") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/run_pipeline.sh b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_pipeline.sh new file mode 100755 index 0000000..82229a6 --- /dev/null +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_pipeline.sh @@ -0,0 +1,353 @@ +#!/bin/bash + +# Full pipeline script: starts Kafka, ClickHouse, Arroyo, and QueryEngine, +# then runs the benchmark. +# +# Usage: +# ASAP mode (full from scratch): +# ./run_pipeline.sh --mode asap --load-data --output asap_results_run1.csv +# +# ASAP mode (infra already running, data already loaded): +# ./run_pipeline.sh --mode asap --skip-infra --output asap_results_run2.csv +# +# Baseline mode (full from scratch): +# ./run_pipeline.sh --mode baseline --load-data --output baseline_results.csv +# +# Baseline mode (ClickHouse already running): +# ./run_pipeline.sh --mode baseline --skip-infra --output baseline_results.csv + +set -euo pipefail + +# ========================================== +# 1. DYNAMIC PATH RESOLUTION +# ========================================== +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/../.." &>/dev/null && pwd)" + +KAFKA_INSTALL_DIR="$ROOT_DIR/Utilities/installation/kafka" +CLICKHOUSE_INSTALL_DIR="$ROOT_DIR/Utilities/installation/clickhouse" +KAFKA_DIR="$KAFKA_INSTALL_DIR/kafka" +CLICKHOUSE_DIR="$CLICKHOUSE_INSTALL_DIR/clickhouse" + +# ========================================== +# 2. ARGUMENT PARSING +# ========================================== +MODE="asap" +LOAD_DATA=0 +OUTPUT_FILE="asap_results_run1.csv" +SKIP_INFRA=0 + +print_usage() { + echo "Usage: ./run_pipeline.sh [OPTIONS]" + echo "Options:" + echo " --mode [asap|baseline] Execution mode (default: asap)" + echo " --load-data Stream H2O dataset into ClickHouse/Kafka" + echo " --output [FILE] Output CSV file (default: asap_results_run1.csv)" + echo " --skip-infra Skip starting Kafka/ClickHouse (assume already running)" + echo " --help Show this message" +} + +while [[ "$#" -gt 0 ]]; do + case $1 in + --mode) MODE="$2"; shift ;; + --load-data) LOAD_DATA=1 ;; + --output) OUTPUT_FILE="$2"; shift ;; + --skip-infra) SKIP_INFRA=1 ;; + --help) print_usage; exit 0 ;; + *) echo "Unknown parameter: $1"; print_usage; exit 1 ;; + esac + shift +done + +# ========================================== +# 3. HELPER FUNCTIONS +# ========================================== + +# Wait for a URL to return HTTP 200. Args: name url [max_seconds] +wait_for_url() { + local name="$1" + local url="$2" + local max_seconds="${3:-120}" + local elapsed=0 + echo "Waiting for $name..." + while ! curl -sf "$url" >/dev/null 2>&1; do + sleep 2 + elapsed=$((elapsed + 2)) + if [ "$elapsed" -ge "$max_seconds" ]; then + echo "ERROR: $name did not become ready within ${max_seconds}s" + echo "Check logs for details" + exit 1 + fi + done + echo "$name is ready" +} + +wait_for_kafka() { + local max_seconds="${1:-120}" + local elapsed=0 + echo "Waiting for Kafka..." + while ! "$KAFKA_DIR/bin/kafka-topics.sh" --bootstrap-server localhost:9092 --list >/dev/null 2>&1; do + sleep 2 + elapsed=$((elapsed + 2)) + if [ "$elapsed" -ge "$max_seconds" ]; then + echo "ERROR: Kafka did not become ready within ${max_seconds}s" + echo "Check /tmp/kafka.log for details" + exit 1 + fi + done + echo "Kafka is ready" +} + +wait_for_arroyo_pipeline_running() { + local max_seconds="${1:-300}" + local elapsed=0 + echo "Waiting for Arroyo pipeline 'asap_h2o_pipeline' to reach RUNNING state..." + echo "(This may take up to ${max_seconds}s while Arroyo compiles Rust UDFs)" + while true; do + state=$(curl -sf "http://localhost:5115/api/v1/pipelines" 2>/dev/null | \ + python3 -c " +import sys, json +try: + data = json.load(sys.stdin) + for p in data.get('data', []): + if p.get('name') == 'asap_h2o_pipeline': + # Arroyo returns null/None for state when actively running + state = p.get('state') + action = p.get('action', '') + stop = p.get('stop', '') + if state is None and stop == 'none': + print('running') + else: + print(str(state).lower() if state else 'unknown') + sys.exit(0) + print('not_found') +except Exception: + print('error') +" 2>/dev/null || echo "error") + + if [ "$state" = "running" ]; then + echo "Pipeline is RUNNING" + return 0 + fi + + echo " Pipeline state: $state (elapsed: ${elapsed}s)" + sleep 5 + elapsed=$((elapsed + 5)) + + if [ "$elapsed" -ge "$max_seconds" ]; then + echo "ERROR: Pipeline did not reach RUNNING state within ${max_seconds}s" + echo "Check /tmp/arroyo.log for details" + exit 1 + fi + done +} + +wait_for_data_loaded() { + local min_rows="${1:-9000000}" + echo "Waiting for ClickHouse h2o_groupby to have at least $min_rows rows..." + while true; do + count=$(curl -sf "http://localhost:8123/" -d "SELECT count(*) FROM h2o_groupby" 2>/dev/null | tr -d '[:space:]' || echo "0") + if [ -n "$count" ] && [ "$count" -ge "$min_rows" ] 2>/dev/null; then + echo "Data ready: $count rows" + return 0 + fi + echo " Rows in h2o_groupby: ${count:-0}" + sleep 5 + done +} + +ensure_extracted() { + local zip_file="$1" + local target_dir="$2" + if [ ! -d "$target_dir" ]; then + echo "Extracting $(basename "$zip_file")..." + unzip -q "$zip_file" -d "$(dirname "$target_dir")" + echo "Extracted to $target_dir" + fi +} + +# Start Kafka only if it isn't already responding +start_kafka_if_needed() { + if "$KAFKA_DIR/bin/kafka-topics.sh" --bootstrap-server localhost:9092 --list >/dev/null 2>&1; then + echo "Kafka already running, skipping start" + return 0 + fi + echo "Starting Kafka..." + nohup bash "$KAFKA_INSTALL_DIR/run.sh" "$KAFKA_DIR" >/tmp/kafka.log 2>&1 & + wait_for_kafka 120 +} + +# Start ClickHouse only if it isn't already responding +start_clickhouse_if_needed() { + if curl -sf "http://localhost:8123/ping" >/dev/null 2>&1; then + echo "ClickHouse already running, skipping start" + return 0 + fi + echo "Starting ClickHouse..." + nohup bash "$CLICKHOUSE_INSTALL_DIR/run.sh" "$CLICKHOUSE_DIR" >/tmp/clickhouse.log 2>&1 & + wait_for_url "ClickHouse" "http://localhost:8123/ping" 120 +} + +init_clickhouse_tables() { + echo "Initializing ClickHouse tables..." + python3 - <<'PYEOF' +import requests + +with open("h2o_init.sql") as f: + content = f.read() + +statements = [s.strip() for s in content.split(";") if s.strip()] +for sql in statements: + r = requests.post("http://localhost:8123/", data=sql) + if not r.ok: + print(f" WARN: {r.text.strip()[:120]} | SQL: {sql[:60]}") + else: + print(f" OK: {sql[:60]}") +PYEOF +} + +cleanup_background_jobs() { + echo "Cleaning up ASAP background processes..." + pkill -f "arroyo.*cluster" || true + pkill -f "query_engine_rust" || true + sleep 2 +} + +# ========================================== +# 4. BASELINE MODE +# ========================================== +if [ "$MODE" = "baseline" ]; then + echo "RUNNING IN BASELINE MODE" + + if [ "$SKIP_INFRA" -eq 0 ]; then + ensure_extracted "$KAFKA_INSTALL_DIR/kafka.zip" "$KAFKA_DIR" + ensure_extracted "$CLICKHOUSE_INSTALL_DIR/clickhouse.zip" "$CLICKHOUSE_DIR" + start_kafka_if_needed + start_clickhouse_if_needed + fi + + if [ "$LOAD_DATA" -eq 1 ]; then + cd "$SCRIPT_DIR" + init_clickhouse_tables + fi + + CMD="python3 run_benchmark.py --mode baseline --output $OUTPUT_FILE" + [ "$LOAD_DATA" -eq 1 ] && CMD="$CMD --load-data" + + echo "Executing: $CMD" + eval "$CMD" + echo "Baseline run complete!" + +# ========================================== +# 5. ASAP MODE +# ========================================== +elif [ "$MODE" = "asap" ]; then + echo "RUNNING IN ASAP MODE" + + # Clean up any stale processes from previous runs + cleanup_background_jobs + + if [ "$SKIP_INFRA" -eq 0 ]; then + ensure_extracted "$KAFKA_INSTALL_DIR/kafka.zip" "$KAFKA_DIR" + ensure_extracted "$CLICKHOUSE_INSTALL_DIR/clickhouse.zip" "$CLICKHOUSE_DIR" + start_kafka_if_needed + start_clickhouse_if_needed + fi + + # Initialize ClickHouse tables only when loading fresh data + # (h2o_init.sql drops and recreates tables, which would wipe existing data) + if [ "$LOAD_DATA" -eq 1 ]; then + cd "$SCRIPT_DIR" + init_clickhouse_tables + fi + + # Start Arroyo cluster + echo "Starting Arroyo cluster..." + cd "$ROOT_DIR/arroyo" + nohup ./target/release/arroyo --config "$ROOT_DIR/ArroyoSketch/config.yaml" cluster \ + >/tmp/arroyo.log 2>&1 & + + wait_for_url "Arroyo API" "http://localhost:5115/api/v1/pipelines" 60 + + # Submit Arroyo pipeline + echo "Submitting Arroyo pipeline..." + cd "$ROOT_DIR/ArroyoSketch" + python3 run_arroyosketch.py \ + --source_type kafka \ + --kafka_input_format json \ + --input_kafka_topic h2o_groupby \ + --output_format json \ + --pipeline_name asap_h2o_pipeline \ + --config_file_path "$SCRIPT_DIR/streaming_config.yaml" \ + --output_kafka_topic sketch_topic \ + --output_dir ./outputs \ + --parallelism 1 \ + --query_language sql + + # Poll until pipeline is RUNNING (Arroyo compiles Rust UDFs, takes ~1-3 minutes) + wait_for_arroyo_pipeline_running 300 + + # Wait for Arroyo's Kafka source worker to fully initialize and assign partitions. + # load_h2o_data re-runs h2o_init.sql (DROP/CREATE h2o_groupby_queue), which causes a + # brief Kafka metadata disruption. If this races with Arroyo's initial partition assignment, + # the worker sees 0 partitions and goes permanently idle. A short sleep avoids the race. + echo "Waiting 20s for Arroyo worker to initialize Kafka partition assignment..." + sleep 20 + + # Load data through Kafka so Arroyo builds sketches AND MergeTree is populated + cd "$SCRIPT_DIR" + if [ "$LOAD_DATA" -eq 1 ]; then + echo "Loading data through Kafka (ASAP mode)..." + python3 run_benchmark.py --mode asap --load-data + + # Wait for MergeTree to reflect the data (materialized view consumes from Kafka) + wait_for_data_loaded 9000000 + + # Send a flush record to advance Arroyo's watermark past the last window. + # This ensures the final 120s tumbling window is closed and its sketch is emitted. + echo "Sending watermark flush record to Kafka..." + FLUSH_TS=$(date -u +%Y-%m-%dT%H:%M:%SZ) + curl -sf "http://localhost:8123/?query=INSERT%20INTO%20h2o_groupby_queue%20FORMAT%20JSONEachRow" \ + --data-raw "{\"timestamp\":\"${FLUSH_TS}\",\"id1\":\"flush\",\"id2\":\"flush\",\"id3\":\"flush\",\"id4\":0,\"id5\":0,\"id6\":0,\"v1\":0,\"v2\":0,\"v3\":0.0}" \ + || echo "Warning: flush record insert failed (non-fatal)" + + # Give Arroyo additional time to close and flush the final sketch windows + echo "Waiting 30s for Arroyo to flush all sketch windows..." + sleep 30 + else + echo "Skipping data load (--load-data not provided)" + fi + + # Start QueryEngine + echo "Starting QueryEngine..." + cd "$ROOT_DIR/QueryEngineRust" + nohup ./target/release/query_engine_rust \ + --kafka-topic sketch_topic \ + --input-format json \ + --config "$SCRIPT_DIR/inference_config.yaml" \ + --streaming-config "$SCRIPT_DIR/streaming_config.yaml" \ + --http-port 8088 \ + --delete-existing-db \ + --log-level info \ + --output-dir ./output \ + --streaming-engine arroyo \ + --query-language SQL \ + --lock-strategy per-key \ + --prometheus-scrape-interval 1 >/tmp/query_engine.log 2>&1 & + + # Poll until QueryEngine HTTP server is accepting connections + wait_for_url "QueryEngine" "http://localhost:8088/clickhouse/query?query=SELECT+1" 60 + + # Run benchmark against the sketches built during the data load above. + # Uses asap_mode_queries.sql (default for asap mode) with QUANTILE(0.95, v1) and NOW()-based + # 600s windows that contain the recently-closed 120s tumbling sketch windows. + echo "Executing benchmark queries against existing sketches..." + cd "$SCRIPT_DIR" + python3 run_benchmark.py --mode asap --output "$OUTPUT_FILE" + + echo "ASAP run complete! Results: $OUTPUT_FILE" + +else + echo "Invalid mode: $MODE. Use 'asap' or 'baseline'." + exit 1 +fi diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/streaming_config.yaml b/asap-tools/execution-utilities/asap_benchmark_pipeline/streaming_config.yaml new file mode 100644 index 0000000..ebf8565 --- /dev/null +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/streaming_config.yaml @@ -0,0 +1,22 @@ +tables: + - name: h2o_groupby + time_column: timestamp + metadata_columns: [id1, id2, id3] + value_columns: [v1] + +aggregations: + - aggregationId: 12 + aggregationType: DatasketchesKLL + aggregationSubType: '' + labels: + grouping: [id1, id2, id3] + rollup: [] + aggregated: [] + table_name: h2o_groupby + value_column: v1 + parameters: + K: 200 + tumblingWindowSize: 120 + windowSize: 120 + windowType: tumbling + spatialFilter: '' \ No newline at end of file From 7b64d111947c80489703164661936cd811522e42 Mon Sep 17 00:00:00 2001 From: benjamib112 Date: Mon, 23 Mar 2026 06:04:40 -0400 Subject: [PATCH 2/5] working pipeline --- .../asap_quantile_queries.sql | 779 ++++++++++++++++++ .../asap_benchmark_pipeline/cleanup.sh | 99 +++ .../clickhouse_quantile_queries.sql | 779 ++++++++++++++++++ .../asap_benchmark_pipeline/h2o_init.sql | 28 +- .../inference_config.yaml | 6 +- .../asap_benchmark_pipeline/plot_latency.py | 66 ++ .../asap_benchmark_pipeline/run_benchmark.py | 537 +++++++----- .../asap_benchmark_pipeline/run_pipeline.sh | 333 ++++---- .../streaming_config.yaml | 10 +- 9 files changed, 2243 insertions(+), 394 deletions(-) create mode 100644 asap-tools/execution-utilities/asap_benchmark_pipeline/asap_quantile_queries.sql create mode 100755 asap-tools/execution-utilities/asap_benchmark_pipeline/cleanup.sh create mode 100644 asap-tools/execution-utilities/asap_benchmark_pipeline/clickhouse_quantile_queries.sql create mode 100755 asap-tools/execution-utilities/asap_benchmark_pipeline/plot_latency.py mode change 100644 => 100755 asap-tools/execution-utilities/asap_benchmark_pipeline/run_benchmark.py diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/asap_quantile_queries.sql b/asap-tools/execution-utilities/asap_benchmark_pipeline/asap_quantile_queries.sql new file mode 100644 index 0000000..010e0dc --- /dev/null +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/asap_quantile_queries.sql @@ -0,0 +1,779 @@ +-- T000: 10s window 0 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:00:00Z' AND '2024-01-01T00:00:10Z' GROUP BY id1, id2; + +-- T001: 10s window 3 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:00:30Z' AND '2024-01-01T00:00:40Z' GROUP BY id1, id2; + +-- T002: 10s window 6 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:01:00Z' AND '2024-01-01T00:01:10Z' GROUP BY id1, id2; + +-- T003: 10s window 9 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:01:30Z' AND '2024-01-01T00:01:40Z' GROUP BY id1, id2; + +-- T004: 10s window 12 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:02:00Z' AND '2024-01-01T00:02:10Z' GROUP BY id1, id2; + +-- T005: 10s window 15 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:02:30Z' AND '2024-01-01T00:02:40Z' GROUP BY id1, id2; + +-- T006: 10s window 18 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:03:00Z' AND '2024-01-01T00:03:10Z' GROUP BY id1, id2; + +-- T007: 10s window 21 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:03:30Z' AND '2024-01-01T00:03:40Z' GROUP BY id1, id2; + +-- T008: 10s window 24 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:04:00Z' AND '2024-01-01T00:04:10Z' GROUP BY id1, id2; + +-- T009: 10s window 27 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:04:30Z' AND '2024-01-01T00:04:40Z' GROUP BY id1, id2; + +-- T010: 10s window 30 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:05:00Z' AND '2024-01-01T00:05:10Z' GROUP BY id1, id2; + +-- T011: 10s window 33 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:05:30Z' AND '2024-01-01T00:05:40Z' GROUP BY id1, id2; + +-- T012: 10s window 36 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:06:00Z' AND '2024-01-01T00:06:10Z' GROUP BY id1, id2; + +-- T013: 10s window 39 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:06:30Z' AND '2024-01-01T00:06:40Z' GROUP BY id1, id2; + +-- T014: 10s window 42 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:07:00Z' AND '2024-01-01T00:07:10Z' GROUP BY id1, id2; + +-- T015: 10s window 45 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:07:30Z' AND '2024-01-01T00:07:40Z' GROUP BY id1, id2; + +-- T016: 10s window 48 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:08:00Z' AND '2024-01-01T00:08:10Z' GROUP BY id1, id2; + +-- T017: 10s window 51 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:08:30Z' AND '2024-01-01T00:08:40Z' GROUP BY id1, id2; + +-- T018: 10s window 54 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:09:00Z' AND '2024-01-01T00:09:10Z' GROUP BY id1, id2; + +-- T019: 10s window 57 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:09:30Z' AND '2024-01-01T00:09:40Z' GROUP BY id1, id2; + +-- T020: 10s window 60 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:10:00Z' AND '2024-01-01T00:10:10Z' GROUP BY id1, id2; + +-- T021: 10s window 63 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:10:30Z' AND '2024-01-01T00:10:40Z' GROUP BY id1, id2; + +-- T022: 10s window 66 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:11:00Z' AND '2024-01-01T00:11:10Z' GROUP BY id1, id2; + +-- T023: 10s window 69 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:11:30Z' AND '2024-01-01T00:11:40Z' GROUP BY id1, id2; + +-- T024: 10s window 72 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:12:00Z' AND '2024-01-01T00:12:10Z' GROUP BY id1, id2; + +-- T025: 10s window 75 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:12:30Z' AND '2024-01-01T00:12:40Z' GROUP BY id1, id2; + +-- T026: 10s window 78 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:13:00Z' AND '2024-01-01T00:13:10Z' GROUP BY id1, id2; + +-- T027: 10s window 81 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:13:30Z' AND '2024-01-01T00:13:40Z' GROUP BY id1, id2; + +-- T028: 10s window 84 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:14:00Z' AND '2024-01-01T00:14:10Z' GROUP BY id1, id2; + +-- T029: 10s window 87 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:14:30Z' AND '2024-01-01T00:14:40Z' GROUP BY id1, id2; + +-- T030: 10s window 90 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:15:00Z' AND '2024-01-01T00:15:10Z' GROUP BY id1, id2; + +-- T031: 10s window 93 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:15:30Z' AND '2024-01-01T00:15:40Z' GROUP BY id1, id2; + +-- T032: 10s window 96 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:16:00Z' AND '2024-01-01T00:16:10Z' GROUP BY id1, id2; + +-- T033: 10s window 99 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:16:30Z' AND '2024-01-01T00:16:40Z' GROUP BY id1, id2; + +-- T034: 10s window 102 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:17:00Z' AND '2024-01-01T00:17:10Z' GROUP BY id1, id2; + +-- T035: 10s window 105 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:17:30Z' AND '2024-01-01T00:17:40Z' GROUP BY id1, id2; + +-- T036: 10s window 108 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:18:00Z' AND '2024-01-01T00:18:10Z' GROUP BY id1, id2; + +-- T037: 10s window 111 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:18:30Z' AND '2024-01-01T00:18:40Z' GROUP BY id1, id2; + +-- T038: 10s window 114 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:19:00Z' AND '2024-01-01T00:19:10Z' GROUP BY id1, id2; + +-- T039: 10s window 117 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:19:30Z' AND '2024-01-01T00:19:40Z' GROUP BY id1, id2; + +-- T040: 10s window 120 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:20:00Z' AND '2024-01-01T00:20:10Z' GROUP BY id1, id2; + +-- T041: 10s window 123 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:20:30Z' AND '2024-01-01T00:20:40Z' GROUP BY id1, id2; + +-- T042: 10s window 126 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:21:00Z' AND '2024-01-01T00:21:10Z' GROUP BY id1, id2; + +-- T043: 10s window 129 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:21:30Z' AND '2024-01-01T00:21:40Z' GROUP BY id1, id2; + +-- T044: 10s window 132 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:22:00Z' AND '2024-01-01T00:22:10Z' GROUP BY id1, id2; + +-- T045: 10s window 135 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:22:30Z' AND '2024-01-01T00:22:40Z' GROUP BY id1, id2; + +-- T046: 10s window 138 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:23:00Z' AND '2024-01-01T00:23:10Z' GROUP BY id1, id2; + +-- T047: 10s window 141 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:23:30Z' AND '2024-01-01T00:23:40Z' GROUP BY id1, id2; + +-- T048: 10s window 144 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:24:00Z' AND '2024-01-01T00:24:10Z' GROUP BY id1, id2; + +-- T049: 10s window 147 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:24:30Z' AND '2024-01-01T00:24:40Z' GROUP BY id1, id2; + +-- T050: 10s window 150 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:25:00Z' AND '2024-01-01T00:25:10Z' GROUP BY id1, id2; + +-- T051: 10s window 153 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:25:30Z' AND '2024-01-01T00:25:40Z' GROUP BY id1, id2; + +-- T052: 10s window 156 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:26:00Z' AND '2024-01-01T00:26:10Z' GROUP BY id1, id2; + +-- T053: 10s window 159 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:26:30Z' AND '2024-01-01T00:26:40Z' GROUP BY id1, id2; + +-- T054: 10s window 162 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:27:00Z' AND '2024-01-01T00:27:10Z' GROUP BY id1, id2; + +-- T055: 10s window 165 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:27:30Z' AND '2024-01-01T00:27:40Z' GROUP BY id1, id2; + +-- T056: 10s window 168 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:28:00Z' AND '2024-01-01T00:28:10Z' GROUP BY id1, id2; + +-- T057: 10s window 171 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:28:30Z' AND '2024-01-01T00:28:40Z' GROUP BY id1, id2; + +-- T058: 10s window 174 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:29:00Z' AND '2024-01-01T00:29:10Z' GROUP BY id1, id2; + +-- T059: 10s window 177 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:29:30Z' AND '2024-01-01T00:29:40Z' GROUP BY id1, id2; + +-- T060: 10s window 180 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:30:00Z' AND '2024-01-01T00:30:10Z' GROUP BY id1, id2; + +-- T061: 10s window 183 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:30:30Z' AND '2024-01-01T00:30:40Z' GROUP BY id1, id2; + +-- T062: 10s window 186 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:31:00Z' AND '2024-01-01T00:31:10Z' GROUP BY id1, id2; + +-- T063: 10s window 189 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:31:30Z' AND '2024-01-01T00:31:40Z' GROUP BY id1, id2; + +-- T064: 10s window 192 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:32:00Z' AND '2024-01-01T00:32:10Z' GROUP BY id1, id2; + +-- T065: 10s window 195 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:32:30Z' AND '2024-01-01T00:32:40Z' GROUP BY id1, id2; + +-- T066: 10s window 198 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:33:00Z' AND '2024-01-01T00:33:10Z' GROUP BY id1, id2; + +-- T067: 10s window 201 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:33:30Z' AND '2024-01-01T00:33:40Z' GROUP BY id1, id2; + +-- T068: 10s window 204 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:34:00Z' AND '2024-01-01T00:34:10Z' GROUP BY id1, id2; + +-- T069: 10s window 207 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:34:30Z' AND '2024-01-01T00:34:40Z' GROUP BY id1, id2; + +-- T070: 10s window 210 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:35:00Z' AND '2024-01-01T00:35:10Z' GROUP BY id1, id2; + +-- T071: 10s window 213 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:35:30Z' AND '2024-01-01T00:35:40Z' GROUP BY id1, id2; + +-- T072: 10s window 216 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:36:00Z' AND '2024-01-01T00:36:10Z' GROUP BY id1, id2; + +-- T073: 10s window 219 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:36:30Z' AND '2024-01-01T00:36:40Z' GROUP BY id1, id2; + +-- T074: 10s window 222 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:37:00Z' AND '2024-01-01T00:37:10Z' GROUP BY id1, id2; + +-- T075: 10s window 225 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:37:30Z' AND '2024-01-01T00:37:40Z' GROUP BY id1, id2; + +-- T076: 10s window 228 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:38:00Z' AND '2024-01-01T00:38:10Z' GROUP BY id1, id2; + +-- T077: 10s window 231 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:38:30Z' AND '2024-01-01T00:38:40Z' GROUP BY id1, id2; + +-- T078: 10s window 234 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:39:00Z' AND '2024-01-01T00:39:10Z' GROUP BY id1, id2; + +-- T079: 10s window 237 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:39:30Z' AND '2024-01-01T00:39:40Z' GROUP BY id1, id2; + +-- T080: 10s window 240 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:40:00Z' AND '2024-01-01T00:40:10Z' GROUP BY id1, id2; + +-- T081: 10s window 243 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:40:30Z' AND '2024-01-01T00:40:40Z' GROUP BY id1, id2; + +-- T082: 10s window 246 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:41:00Z' AND '2024-01-01T00:41:10Z' GROUP BY id1, id2; + +-- T083: 10s window 249 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:41:30Z' AND '2024-01-01T00:41:40Z' GROUP BY id1, id2; + +-- T084: 10s window 252 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:42:00Z' AND '2024-01-01T00:42:10Z' GROUP BY id1, id2; + +-- T085: 10s window 255 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:42:30Z' AND '2024-01-01T00:42:40Z' GROUP BY id1, id2; + +-- T086: 10s window 258 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:43:00Z' AND '2024-01-01T00:43:10Z' GROUP BY id1, id2; + +-- T087: 10s window 261 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:43:30Z' AND '2024-01-01T00:43:40Z' GROUP BY id1, id2; + +-- T088: 10s window 264 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:44:00Z' AND '2024-01-01T00:44:10Z' GROUP BY id1, id2; + +-- T089: 10s window 267 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:44:30Z' AND '2024-01-01T00:44:40Z' GROUP BY id1, id2; + +-- T090: 10s window 270 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:45:00Z' AND '2024-01-01T00:45:10Z' GROUP BY id1, id2; + +-- T091: 10s window 273 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:45:30Z' AND '2024-01-01T00:45:40Z' GROUP BY id1, id2; + +-- T092: 10s window 276 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:46:00Z' AND '2024-01-01T00:46:10Z' GROUP BY id1, id2; + +-- T093: 10s window 279 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:46:30Z' AND '2024-01-01T00:46:40Z' GROUP BY id1, id2; + +-- T094: 10s window 282 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:47:00Z' AND '2024-01-01T00:47:10Z' GROUP BY id1, id2; + +-- T095: 10s window 285 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:47:30Z' AND '2024-01-01T00:47:40Z' GROUP BY id1, id2; + +-- T096: 10s window 288 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:48:00Z' AND '2024-01-01T00:48:10Z' GROUP BY id1, id2; + +-- T097: 10s window 291 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:48:30Z' AND '2024-01-01T00:48:40Z' GROUP BY id1, id2; + +-- T098: 10s window 294 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:49:00Z' AND '2024-01-01T00:49:10Z' GROUP BY id1, id2; + +-- T099: 10s window 297 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:49:30Z' AND '2024-01-01T00:49:40Z' GROUP BY id1, id2; + +-- T100: 10s window 300 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:50:00Z' AND '2024-01-01T00:50:10Z' GROUP BY id1, id2; + +-- T101: 10s window 303 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:50:30Z' AND '2024-01-01T00:50:40Z' GROUP BY id1, id2; + +-- T102: 10s window 306 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:51:00Z' AND '2024-01-01T00:51:10Z' GROUP BY id1, id2; + +-- T103: 10s window 309 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:51:30Z' AND '2024-01-01T00:51:40Z' GROUP BY id1, id2; + +-- T104: 10s window 312 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:52:00Z' AND '2024-01-01T00:52:10Z' GROUP BY id1, id2; + +-- T105: 10s window 315 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:52:30Z' AND '2024-01-01T00:52:40Z' GROUP BY id1, id2; + +-- T106: 10s window 318 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:53:00Z' AND '2024-01-01T00:53:10Z' GROUP BY id1, id2; + +-- T107: 10s window 321 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:53:30Z' AND '2024-01-01T00:53:40Z' GROUP BY id1, id2; + +-- T108: 10s window 324 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:54:00Z' AND '2024-01-01T00:54:10Z' GROUP BY id1, id2; + +-- T109: 10s window 327 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:54:30Z' AND '2024-01-01T00:54:40Z' GROUP BY id1, id2; + +-- T110: 10s window 330 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:55:00Z' AND '2024-01-01T00:55:10Z' GROUP BY id1, id2; + +-- T111: 10s window 333 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:55:30Z' AND '2024-01-01T00:55:40Z' GROUP BY id1, id2; + +-- T112: 10s window 336 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:56:00Z' AND '2024-01-01T00:56:10Z' GROUP BY id1, id2; + +-- T113: 10s window 339 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:56:30Z' AND '2024-01-01T00:56:40Z' GROUP BY id1, id2; + +-- T114: 10s window 342 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:57:00Z' AND '2024-01-01T00:57:10Z' GROUP BY id1, id2; + +-- T115: 10s window 345 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:57:30Z' AND '2024-01-01T00:57:40Z' GROUP BY id1, id2; + +-- T116: 10s window 348 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:58:00Z' AND '2024-01-01T00:58:10Z' GROUP BY id1, id2; + +-- T117: 10s window 351 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:58:30Z' AND '2024-01-01T00:58:40Z' GROUP BY id1, id2; + +-- T118: 10s window 354 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:59:00Z' AND '2024-01-01T00:59:10Z' GROUP BY id1, id2; + +-- T119: 10s window 357 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T00:59:30Z' AND '2024-01-01T00:59:40Z' GROUP BY id1, id2; + +-- T120: 10s window 360 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:00:00Z' AND '2024-01-01T01:00:10Z' GROUP BY id1, id2; + +-- T121: 10s window 363 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:00:30Z' AND '2024-01-01T01:00:40Z' GROUP BY id1, id2; + +-- T122: 10s window 366 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:01:00Z' AND '2024-01-01T01:01:10Z' GROUP BY id1, id2; + +-- T123: 10s window 369 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:01:30Z' AND '2024-01-01T01:01:40Z' GROUP BY id1, id2; + +-- T124: 10s window 372 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:02:00Z' AND '2024-01-01T01:02:10Z' GROUP BY id1, id2; + +-- T125: 10s window 375 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:02:30Z' AND '2024-01-01T01:02:40Z' GROUP BY id1, id2; + +-- T126: 10s window 378 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:03:00Z' AND '2024-01-01T01:03:10Z' GROUP BY id1, id2; + +-- T127: 10s window 381 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:03:30Z' AND '2024-01-01T01:03:40Z' GROUP BY id1, id2; + +-- T128: 10s window 384 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:04:00Z' AND '2024-01-01T01:04:10Z' GROUP BY id1, id2; + +-- T129: 10s window 387 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:04:30Z' AND '2024-01-01T01:04:40Z' GROUP BY id1, id2; + +-- T130: 10s window 390 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:05:00Z' AND '2024-01-01T01:05:10Z' GROUP BY id1, id2; + +-- T131: 10s window 393 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:05:30Z' AND '2024-01-01T01:05:40Z' GROUP BY id1, id2; + +-- T132: 10s window 396 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:06:00Z' AND '2024-01-01T01:06:10Z' GROUP BY id1, id2; + +-- T133: 10s window 399 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:06:30Z' AND '2024-01-01T01:06:40Z' GROUP BY id1, id2; + +-- T134: 10s window 402 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:07:00Z' AND '2024-01-01T01:07:10Z' GROUP BY id1, id2; + +-- T135: 10s window 405 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:07:30Z' AND '2024-01-01T01:07:40Z' GROUP BY id1, id2; + +-- T136: 10s window 408 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:08:00Z' AND '2024-01-01T01:08:10Z' GROUP BY id1, id2; + +-- T137: 10s window 411 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:08:30Z' AND '2024-01-01T01:08:40Z' GROUP BY id1, id2; + +-- T138: 10s window 414 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:09:00Z' AND '2024-01-01T01:09:10Z' GROUP BY id1, id2; + +-- T139: 10s window 417 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:09:30Z' AND '2024-01-01T01:09:40Z' GROUP BY id1, id2; + +-- T140: 10s window 420 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:10:00Z' AND '2024-01-01T01:10:10Z' GROUP BY id1, id2; + +-- T141: 10s window 423 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:10:30Z' AND '2024-01-01T01:10:40Z' GROUP BY id1, id2; + +-- T142: 10s window 426 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:11:00Z' AND '2024-01-01T01:11:10Z' GROUP BY id1, id2; + +-- T143: 10s window 429 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:11:30Z' AND '2024-01-01T01:11:40Z' GROUP BY id1, id2; + +-- T144: 10s window 432 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:12:00Z' AND '2024-01-01T01:12:10Z' GROUP BY id1, id2; + +-- T145: 10s window 435 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:12:30Z' AND '2024-01-01T01:12:40Z' GROUP BY id1, id2; + +-- T146: 10s window 438 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:13:00Z' AND '2024-01-01T01:13:10Z' GROUP BY id1, id2; + +-- T147: 10s window 441 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:13:30Z' AND '2024-01-01T01:13:40Z' GROUP BY id1, id2; + +-- T148: 10s window 444 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:14:00Z' AND '2024-01-01T01:14:10Z' GROUP BY id1, id2; + +-- T149: 10s window 447 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:14:30Z' AND '2024-01-01T01:14:40Z' GROUP BY id1, id2; + +-- T150: 10s window 450 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:15:00Z' AND '2024-01-01T01:15:10Z' GROUP BY id1, id2; + +-- T151: 10s window 453 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:15:30Z' AND '2024-01-01T01:15:40Z' GROUP BY id1, id2; + +-- T152: 10s window 456 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:16:00Z' AND '2024-01-01T01:16:10Z' GROUP BY id1, id2; + +-- T153: 10s window 459 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:16:30Z' AND '2024-01-01T01:16:40Z' GROUP BY id1, id2; + +-- T154: 10s window 462 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:17:00Z' AND '2024-01-01T01:17:10Z' GROUP BY id1, id2; + +-- T155: 10s window 465 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:17:30Z' AND '2024-01-01T01:17:40Z' GROUP BY id1, id2; + +-- T156: 10s window 468 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:18:00Z' AND '2024-01-01T01:18:10Z' GROUP BY id1, id2; + +-- T157: 10s window 471 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:18:30Z' AND '2024-01-01T01:18:40Z' GROUP BY id1, id2; + +-- T158: 10s window 474 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:19:00Z' AND '2024-01-01T01:19:10Z' GROUP BY id1, id2; + +-- T159: 10s window 477 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:19:30Z' AND '2024-01-01T01:19:40Z' GROUP BY id1, id2; + +-- T160: 10s window 480 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:20:00Z' AND '2024-01-01T01:20:10Z' GROUP BY id1, id2; + +-- T161: 10s window 483 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:20:30Z' AND '2024-01-01T01:20:40Z' GROUP BY id1, id2; + +-- T162: 10s window 486 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:21:00Z' AND '2024-01-01T01:21:10Z' GROUP BY id1, id2; + +-- T163: 10s window 489 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:21:30Z' AND '2024-01-01T01:21:40Z' GROUP BY id1, id2; + +-- T164: 10s window 492 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:22:00Z' AND '2024-01-01T01:22:10Z' GROUP BY id1, id2; + +-- T165: 10s window 495 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:22:30Z' AND '2024-01-01T01:22:40Z' GROUP BY id1, id2; + +-- T166: 10s window 498 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:23:00Z' AND '2024-01-01T01:23:10Z' GROUP BY id1, id2; + +-- T167: 10s window 501 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:23:30Z' AND '2024-01-01T01:23:40Z' GROUP BY id1, id2; + +-- T168: 10s window 504 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:24:00Z' AND '2024-01-01T01:24:10Z' GROUP BY id1, id2; + +-- T169: 10s window 507 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:24:30Z' AND '2024-01-01T01:24:40Z' GROUP BY id1, id2; + +-- T170: 10s window 510 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:25:00Z' AND '2024-01-01T01:25:10Z' GROUP BY id1, id2; + +-- T171: 10s window 513 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:25:30Z' AND '2024-01-01T01:25:40Z' GROUP BY id1, id2; + +-- T172: 10s window 516 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:26:00Z' AND '2024-01-01T01:26:10Z' GROUP BY id1, id2; + +-- T173: 10s window 519 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:26:30Z' AND '2024-01-01T01:26:40Z' GROUP BY id1, id2; + +-- T174: 10s window 522 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:27:00Z' AND '2024-01-01T01:27:10Z' GROUP BY id1, id2; + +-- T175: 10s window 525 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:27:30Z' AND '2024-01-01T01:27:40Z' GROUP BY id1, id2; + +-- T176: 10s window 528 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:28:00Z' AND '2024-01-01T01:28:10Z' GROUP BY id1, id2; + +-- T177: 10s window 531 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:28:30Z' AND '2024-01-01T01:28:40Z' GROUP BY id1, id2; + +-- T178: 10s window 534 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:29:00Z' AND '2024-01-01T01:29:10Z' GROUP BY id1, id2; + +-- T179: 10s window 537 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:29:30Z' AND '2024-01-01T01:29:40Z' GROUP BY id1, id2; + +-- T180: 10s window 540 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:30:00Z' AND '2024-01-01T01:30:10Z' GROUP BY id1, id2; + +-- T181: 10s window 543 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:30:30Z' AND '2024-01-01T01:30:40Z' GROUP BY id1, id2; + +-- T182: 10s window 546 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:31:00Z' AND '2024-01-01T01:31:10Z' GROUP BY id1, id2; + +-- T183: 10s window 549 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:31:30Z' AND '2024-01-01T01:31:40Z' GROUP BY id1, id2; + +-- T184: 10s window 552 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:32:00Z' AND '2024-01-01T01:32:10Z' GROUP BY id1, id2; + +-- T185: 10s window 555 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:32:30Z' AND '2024-01-01T01:32:40Z' GROUP BY id1, id2; + +-- T186: 10s window 558 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:33:00Z' AND '2024-01-01T01:33:10Z' GROUP BY id1, id2; + +-- T187: 10s window 561 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:33:30Z' AND '2024-01-01T01:33:40Z' GROUP BY id1, id2; + +-- T188: 10s window 564 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:34:00Z' AND '2024-01-01T01:34:10Z' GROUP BY id1, id2; + +-- T189: 10s window 567 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:34:30Z' AND '2024-01-01T01:34:40Z' GROUP BY id1, id2; + +-- T190: 10s window 570 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:35:00Z' AND '2024-01-01T01:35:10Z' GROUP BY id1, id2; + +-- T191: 10s window 573 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:35:30Z' AND '2024-01-01T01:35:40Z' GROUP BY id1, id2; + +-- T192: 10s window 576 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:36:00Z' AND '2024-01-01T01:36:10Z' GROUP BY id1, id2; + +-- T193: 10s window 579 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:36:30Z' AND '2024-01-01T01:36:40Z' GROUP BY id1, id2; + +-- T194: 10s window 582 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:37:00Z' AND '2024-01-01T01:37:10Z' GROUP BY id1, id2; + +-- T195: 10s window 585 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:37:30Z' AND '2024-01-01T01:37:40Z' GROUP BY id1, id2; + +-- T196: 10s window 588 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:38:00Z' AND '2024-01-01T01:38:10Z' GROUP BY id1, id2; + +-- T197: 10s window 591 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:38:30Z' AND '2024-01-01T01:38:40Z' GROUP BY id1, id2; + +-- T198: 10s window 594 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:39:00Z' AND '2024-01-01T01:39:10Z' GROUP BY id1, id2; + +-- T199: 10s window 597 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:39:30Z' AND '2024-01-01T01:39:40Z' GROUP BY id1, id2; + +-- T200: 10s window 600 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:40:00Z' AND '2024-01-01T01:40:10Z' GROUP BY id1, id2; + +-- T201: 10s window 603 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:40:30Z' AND '2024-01-01T01:40:40Z' GROUP BY id1, id2; + +-- T202: 10s window 606 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:41:00Z' AND '2024-01-01T01:41:10Z' GROUP BY id1, id2; + +-- T203: 10s window 609 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:41:30Z' AND '2024-01-01T01:41:40Z' GROUP BY id1, id2; + +-- T204: 10s window 612 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:42:00Z' AND '2024-01-01T01:42:10Z' GROUP BY id1, id2; + +-- T205: 10s window 615 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:42:30Z' AND '2024-01-01T01:42:40Z' GROUP BY id1, id2; + +-- T206: 10s window 618 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:43:00Z' AND '2024-01-01T01:43:10Z' GROUP BY id1, id2; + +-- T207: 10s window 621 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:43:30Z' AND '2024-01-01T01:43:40Z' GROUP BY id1, id2; + +-- T208: 10s window 624 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:44:00Z' AND '2024-01-01T01:44:10Z' GROUP BY id1, id2; + +-- T209: 10s window 627 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:44:30Z' AND '2024-01-01T01:44:40Z' GROUP BY id1, id2; + +-- T210: 10s window 630 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:45:00Z' AND '2024-01-01T01:45:10Z' GROUP BY id1, id2; + +-- T211: 10s window 633 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:45:30Z' AND '2024-01-01T01:45:40Z' GROUP BY id1, id2; + +-- T212: 10s window 636 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:46:00Z' AND '2024-01-01T01:46:10Z' GROUP BY id1, id2; + +-- T213: 10s window 639 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:46:30Z' AND '2024-01-01T01:46:40Z' GROUP BY id1, id2; + +-- T214: 10s window 642 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:47:00Z' AND '2024-01-01T01:47:10Z' GROUP BY id1, id2; + +-- T215: 10s window 645 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:47:30Z' AND '2024-01-01T01:47:40Z' GROUP BY id1, id2; + +-- T216: 10s window 648 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:48:00Z' AND '2024-01-01T01:48:10Z' GROUP BY id1, id2; + +-- T217: 10s window 651 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:48:30Z' AND '2024-01-01T01:48:40Z' GROUP BY id1, id2; + +-- T218: 10s window 654 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:49:00Z' AND '2024-01-01T01:49:10Z' GROUP BY id1, id2; + +-- T219: 10s window 657 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:49:30Z' AND '2024-01-01T01:49:40Z' GROUP BY id1, id2; + +-- T220: 10s window 660 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:50:00Z' AND '2024-01-01T01:50:10Z' GROUP BY id1, id2; + +-- T221: 10s window 663 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:50:30Z' AND '2024-01-01T01:50:40Z' GROUP BY id1, id2; + +-- T222: 10s window 666 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:51:00Z' AND '2024-01-01T01:51:10Z' GROUP BY id1, id2; + +-- T223: 10s window 669 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:51:30Z' AND '2024-01-01T01:51:40Z' GROUP BY id1, id2; + +-- T224: 10s window 672 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:52:00Z' AND '2024-01-01T01:52:10Z' GROUP BY id1, id2; + +-- T225: 10s window 675 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:52:30Z' AND '2024-01-01T01:52:40Z' GROUP BY id1, id2; + +-- T226: 10s window 678 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:53:00Z' AND '2024-01-01T01:53:10Z' GROUP BY id1, id2; + +-- T227: 10s window 681 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:53:30Z' AND '2024-01-01T01:53:40Z' GROUP BY id1, id2; + +-- T228: 10s window 684 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:54:00Z' AND '2024-01-01T01:54:10Z' GROUP BY id1, id2; + +-- T229: 10s window 687 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:54:30Z' AND '2024-01-01T01:54:40Z' GROUP BY id1, id2; + +-- T230: 10s window 690 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:55:00Z' AND '2024-01-01T01:55:10Z' GROUP BY id1, id2; + +-- T231: 10s window 693 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:55:30Z' AND '2024-01-01T01:55:40Z' GROUP BY id1, id2; + +-- T232: 10s window 696 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:56:00Z' AND '2024-01-01T01:56:10Z' GROUP BY id1, id2; + +-- T233: 10s window 699 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:56:30Z' AND '2024-01-01T01:56:40Z' GROUP BY id1, id2; + +-- T234: 10s window 702 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:57:00Z' AND '2024-01-01T01:57:10Z' GROUP BY id1, id2; + +-- T235: 10s window 705 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:57:30Z' AND '2024-01-01T01:57:40Z' GROUP BY id1, id2; + +-- T236: 10s window 708 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:58:00Z' AND '2024-01-01T01:58:10Z' GROUP BY id1, id2; + +-- T237: 10s window 711 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:58:30Z' AND '2024-01-01T01:58:40Z' GROUP BY id1, id2; + +-- T238: 10s window 714 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:59:00Z' AND '2024-01-01T01:59:10Z' GROUP BY id1, id2; + +-- T239: 10s window 717 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T01:59:30Z' AND '2024-01-01T01:59:40Z' GROUP BY id1, id2; + +-- T240: 10s window 720 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:00:00Z' AND '2024-01-01T02:00:10Z' GROUP BY id1, id2; + +-- T241: 10s window 723 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:00:30Z' AND '2024-01-01T02:00:40Z' GROUP BY id1, id2; + +-- T242: 10s window 726 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:01:00Z' AND '2024-01-01T02:01:10Z' GROUP BY id1, id2; + +-- T243: 10s window 729 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:01:30Z' AND '2024-01-01T02:01:40Z' GROUP BY id1, id2; + +-- T244: 10s window 732 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:02:00Z' AND '2024-01-01T02:02:10Z' GROUP BY id1, id2; + +-- T245: 10s window 735 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:02:30Z' AND '2024-01-01T02:02:40Z' GROUP BY id1, id2; + +-- T246: 10s window 738 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:03:00Z' AND '2024-01-01T02:03:10Z' GROUP BY id1, id2; + +-- T247: 10s window 741 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:03:30Z' AND '2024-01-01T02:03:40Z' GROUP BY id1, id2; + +-- T248: 10s window 744 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:04:00Z' AND '2024-01-01T02:04:10Z' GROUP BY id1, id2; + +-- T249: 10s window 747 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:04:30Z' AND '2024-01-01T02:04:40Z' GROUP BY id1, id2; + +-- T250: 10s window 750 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:05:00Z' AND '2024-01-01T02:05:10Z' GROUP BY id1, id2; + +-- T251: 10s window 753 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:05:30Z' AND '2024-01-01T02:05:40Z' GROUP BY id1, id2; + +-- T252: 10s window 756 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:06:00Z' AND '2024-01-01T02:06:10Z' GROUP BY id1, id2; + +-- T253: 10s window 759 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:06:30Z' AND '2024-01-01T02:06:40Z' GROUP BY id1, id2; + +-- T254: 10s window 762 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:07:00Z' AND '2024-01-01T02:07:10Z' GROUP BY id1, id2; + +-- T255: 10s window 765 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:07:30Z' AND '2024-01-01T02:07:40Z' GROUP BY id1, id2; + +-- T256: 10s window 768 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:08:00Z' AND '2024-01-01T02:08:10Z' GROUP BY id1, id2; + +-- T257: 10s window 771 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:08:30Z' AND '2024-01-01T02:08:40Z' GROUP BY id1, id2; + +-- T258: 10s window 774 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:09:00Z' AND '2024-01-01T02:09:10Z' GROUP BY id1, id2; + +-- T259: 10s window 777 +SELECT QUANTILE(0.95, v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01T02:09:30Z' AND '2024-01-01T02:09:40Z' GROUP BY id1, id2; diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/cleanup.sh b/asap-tools/execution-utilities/asap_benchmark_pipeline/cleanup.sh new file mode 100755 index 0000000..432bd1a --- /dev/null +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/cleanup.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# +# Clean up all benchmark processes, Kafka state, ClickHouse data, and OS caches +# so that the next run_pipeline.sh invocation starts from identical conditions. +# +# Usage: +# ./cleanup.sh # full cleanup (requires sudo for cache drop) +# ./cleanup.sh --no-sudo # skip OS cache clearing (no sudo required) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +TOOLS_DIR="$(cd "$SCRIPT_DIR/../.." &>/dev/null && pwd)" + +KAFKA_INSTALL_DIR="${KAFKA_INSTALL_DIR:-$TOOLS_DIR/installation/kafka}" +CLICKHOUSE_INSTALL_DIR="${CLICKHOUSE_INSTALL_DIR:-$TOOLS_DIR/installation/clickhouse}" +KAFKA_DIR="$KAFKA_INSTALL_DIR/kafka" +CLICKHOUSE_DIR="$CLICKHOUSE_INSTALL_DIR/clickhouse" + +NO_SUDO=0 +for arg in "$@"; do + case "$arg" in + --no-sudo) NO_SUDO=1 ;; + --help) echo "Usage: ./cleanup.sh [--no-sudo]"; exit 0 ;; + esac +done + +# ========================================== +# 1. Kill application processes +# ========================================== +echo "Stopping application processes..." +pkill -f "query_engine_rust" 2>/dev/null || true +pkill -f "arroyo.*cluster" 2>/dev/null || true +sleep 2 +# Force-kill any stragglers +pkill -9 -f "query_engine_rust" 2>/dev/null || true +pkill -9 -f "arroyo.*cluster" 2>/dev/null || true + +# ========================================== +# 2. Stop Kafka +# ========================================== +echo "Stopping Kafka..." +if [ -x "$KAFKA_DIR/bin/kafka-server-stop.sh" ]; then + "$KAFKA_DIR/bin/kafka-server-stop.sh" 2>/dev/null || true +fi +if [ -x "$KAFKA_DIR/bin/zookeeper-server-stop.sh" ]; then + "$KAFKA_DIR/bin/zookeeper-server-stop.sh" 2>/dev/null || true +fi +sleep 2 +pkill -f "kafka\.Kafka" 2>/dev/null || true +pkill -f "QuorumPeerMain" 2>/dev/null || true +pkill -9 -f "kafka\.Kafka" 2>/dev/null || true +pkill -9 -f "QuorumPeerMain" 2>/dev/null || true + +# Clean Kafka data directories (topics, consumer group offsets, logs) +if [ -d "$KAFKA_DIR" ]; then + echo "Clearing Kafka data..." + rm -rf "$KAFKA_DIR/data" "$KAFKA_DIR/logs" /tmp/kafka-logs /tmp/zookeeper 2>/dev/null || true +fi + +# ========================================== +# 3. Stop ClickHouse and clear its data +# ========================================== +echo "Stopping ClickHouse..." +if curl -sf "http://localhost:8123/ping" >/dev/null 2>&1; then + # Drop the table before stopping so next run starts clean + curl -sf "http://localhost:8123" -d "DROP TABLE IF EXISTS h2o_groupby" 2>/dev/null || true +fi +pkill -f "clickhouse-server" 2>/dev/null || true +pkill -f "clickhouse server" 2>/dev/null || true +sleep 2 +pkill -9 -f "clickhouse-server" 2>/dev/null || true +pkill -9 -f "clickhouse server" 2>/dev/null || true + +# Clear ClickHouse data directory +if [ -d "$CLICKHOUSE_DIR" ]; then + echo "Clearing ClickHouse data..." + rm -rf "$CLICKHOUSE_DIR/data" "$CLICKHOUSE_DIR/store" "$CLICKHOUSE_DIR/metadata" 2>/dev/null || true +fi + +# ========================================== +# 4. Clear QE output directories +# ========================================== +echo "Clearing QE output..." +rm -rf "$SCRIPT_DIR/output" "$SCRIPT_DIR/outputs" 2>/dev/null || true + +# ========================================== +# 5. Drop OS page cache and dentries/inodes +# ========================================== +if [ "$NO_SUDO" -eq 0 ]; then + echo "Dropping OS page cache, dentries, and inodes..." + sync + sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches' + echo "OS caches cleared" +else + echo "Skipping OS cache clearing (--no-sudo)" +fi + +echo "Cleanup complete. Next run_pipeline.sh will start from a clean state." diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/clickhouse_quantile_queries.sql b/asap-tools/execution-utilities/asap_benchmark_pipeline/clickhouse_quantile_queries.sql new file mode 100644 index 0000000..9327e48 --- /dev/null +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/clickhouse_quantile_queries.sql @@ -0,0 +1,779 @@ +-- T000: 10s window 0 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:00:00' AND '2024-01-01 00:00:10' GROUP BY id1, id2; + +-- T001: 10s window 3 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:00:30' AND '2024-01-01 00:00:40' GROUP BY id1, id2; + +-- T002: 10s window 6 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:01:00' AND '2024-01-01 00:01:10' GROUP BY id1, id2; + +-- T003: 10s window 9 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:01:30' AND '2024-01-01 00:01:40' GROUP BY id1, id2; + +-- T004: 10s window 12 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:02:00' AND '2024-01-01 00:02:10' GROUP BY id1, id2; + +-- T005: 10s window 15 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:02:30' AND '2024-01-01 00:02:40' GROUP BY id1, id2; + +-- T006: 10s window 18 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:03:00' AND '2024-01-01 00:03:10' GROUP BY id1, id2; + +-- T007: 10s window 21 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:03:30' AND '2024-01-01 00:03:40' GROUP BY id1, id2; + +-- T008: 10s window 24 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:04:00' AND '2024-01-01 00:04:10' GROUP BY id1, id2; + +-- T009: 10s window 27 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:04:30' AND '2024-01-01 00:04:40' GROUP BY id1, id2; + +-- T010: 10s window 30 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:05:00' AND '2024-01-01 00:05:10' GROUP BY id1, id2; + +-- T011: 10s window 33 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:05:30' AND '2024-01-01 00:05:40' GROUP BY id1, id2; + +-- T012: 10s window 36 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:06:00' AND '2024-01-01 00:06:10' GROUP BY id1, id2; + +-- T013: 10s window 39 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:06:30' AND '2024-01-01 00:06:40' GROUP BY id1, id2; + +-- T014: 10s window 42 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:07:00' AND '2024-01-01 00:07:10' GROUP BY id1, id2; + +-- T015: 10s window 45 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:07:30' AND '2024-01-01 00:07:40' GROUP BY id1, id2; + +-- T016: 10s window 48 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:08:00' AND '2024-01-01 00:08:10' GROUP BY id1, id2; + +-- T017: 10s window 51 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:08:30' AND '2024-01-01 00:08:40' GROUP BY id1, id2; + +-- T018: 10s window 54 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:09:00' AND '2024-01-01 00:09:10' GROUP BY id1, id2; + +-- T019: 10s window 57 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:09:30' AND '2024-01-01 00:09:40' GROUP BY id1, id2; + +-- T020: 10s window 60 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:10:00' AND '2024-01-01 00:10:10' GROUP BY id1, id2; + +-- T021: 10s window 63 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:10:30' AND '2024-01-01 00:10:40' GROUP BY id1, id2; + +-- T022: 10s window 66 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:11:00' AND '2024-01-01 00:11:10' GROUP BY id1, id2; + +-- T023: 10s window 69 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:11:30' AND '2024-01-01 00:11:40' GROUP BY id1, id2; + +-- T024: 10s window 72 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:12:00' AND '2024-01-01 00:12:10' GROUP BY id1, id2; + +-- T025: 10s window 75 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:12:30' AND '2024-01-01 00:12:40' GROUP BY id1, id2; + +-- T026: 10s window 78 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:13:00' AND '2024-01-01 00:13:10' GROUP BY id1, id2; + +-- T027: 10s window 81 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:13:30' AND '2024-01-01 00:13:40' GROUP BY id1, id2; + +-- T028: 10s window 84 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:14:00' AND '2024-01-01 00:14:10' GROUP BY id1, id2; + +-- T029: 10s window 87 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:14:30' AND '2024-01-01 00:14:40' GROUP BY id1, id2; + +-- T030: 10s window 90 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:15:00' AND '2024-01-01 00:15:10' GROUP BY id1, id2; + +-- T031: 10s window 93 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:15:30' AND '2024-01-01 00:15:40' GROUP BY id1, id2; + +-- T032: 10s window 96 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:16:00' AND '2024-01-01 00:16:10' GROUP BY id1, id2; + +-- T033: 10s window 99 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:16:30' AND '2024-01-01 00:16:40' GROUP BY id1, id2; + +-- T034: 10s window 102 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:17:00' AND '2024-01-01 00:17:10' GROUP BY id1, id2; + +-- T035: 10s window 105 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:17:30' AND '2024-01-01 00:17:40' GROUP BY id1, id2; + +-- T036: 10s window 108 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:18:00' AND '2024-01-01 00:18:10' GROUP BY id1, id2; + +-- T037: 10s window 111 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:18:30' AND '2024-01-01 00:18:40' GROUP BY id1, id2; + +-- T038: 10s window 114 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:19:00' AND '2024-01-01 00:19:10' GROUP BY id1, id2; + +-- T039: 10s window 117 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:19:30' AND '2024-01-01 00:19:40' GROUP BY id1, id2; + +-- T040: 10s window 120 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:20:00' AND '2024-01-01 00:20:10' GROUP BY id1, id2; + +-- T041: 10s window 123 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:20:30' AND '2024-01-01 00:20:40' GROUP BY id1, id2; + +-- T042: 10s window 126 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:21:00' AND '2024-01-01 00:21:10' GROUP BY id1, id2; + +-- T043: 10s window 129 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:21:30' AND '2024-01-01 00:21:40' GROUP BY id1, id2; + +-- T044: 10s window 132 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:22:00' AND '2024-01-01 00:22:10' GROUP BY id1, id2; + +-- T045: 10s window 135 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:22:30' AND '2024-01-01 00:22:40' GROUP BY id1, id2; + +-- T046: 10s window 138 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:23:00' AND '2024-01-01 00:23:10' GROUP BY id1, id2; + +-- T047: 10s window 141 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:23:30' AND '2024-01-01 00:23:40' GROUP BY id1, id2; + +-- T048: 10s window 144 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:24:00' AND '2024-01-01 00:24:10' GROUP BY id1, id2; + +-- T049: 10s window 147 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:24:30' AND '2024-01-01 00:24:40' GROUP BY id1, id2; + +-- T050: 10s window 150 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:25:00' AND '2024-01-01 00:25:10' GROUP BY id1, id2; + +-- T051: 10s window 153 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:25:30' AND '2024-01-01 00:25:40' GROUP BY id1, id2; + +-- T052: 10s window 156 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:26:00' AND '2024-01-01 00:26:10' GROUP BY id1, id2; + +-- T053: 10s window 159 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:26:30' AND '2024-01-01 00:26:40' GROUP BY id1, id2; + +-- T054: 10s window 162 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:27:00' AND '2024-01-01 00:27:10' GROUP BY id1, id2; + +-- T055: 10s window 165 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:27:30' AND '2024-01-01 00:27:40' GROUP BY id1, id2; + +-- T056: 10s window 168 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:28:00' AND '2024-01-01 00:28:10' GROUP BY id1, id2; + +-- T057: 10s window 171 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:28:30' AND '2024-01-01 00:28:40' GROUP BY id1, id2; + +-- T058: 10s window 174 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:29:00' AND '2024-01-01 00:29:10' GROUP BY id1, id2; + +-- T059: 10s window 177 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:29:30' AND '2024-01-01 00:29:40' GROUP BY id1, id2; + +-- T060: 10s window 180 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:30:00' AND '2024-01-01 00:30:10' GROUP BY id1, id2; + +-- T061: 10s window 183 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:30:30' AND '2024-01-01 00:30:40' GROUP BY id1, id2; + +-- T062: 10s window 186 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:31:00' AND '2024-01-01 00:31:10' GROUP BY id1, id2; + +-- T063: 10s window 189 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:31:30' AND '2024-01-01 00:31:40' GROUP BY id1, id2; + +-- T064: 10s window 192 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:32:00' AND '2024-01-01 00:32:10' GROUP BY id1, id2; + +-- T065: 10s window 195 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:32:30' AND '2024-01-01 00:32:40' GROUP BY id1, id2; + +-- T066: 10s window 198 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:33:00' AND '2024-01-01 00:33:10' GROUP BY id1, id2; + +-- T067: 10s window 201 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:33:30' AND '2024-01-01 00:33:40' GROUP BY id1, id2; + +-- T068: 10s window 204 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:34:00' AND '2024-01-01 00:34:10' GROUP BY id1, id2; + +-- T069: 10s window 207 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:34:30' AND '2024-01-01 00:34:40' GROUP BY id1, id2; + +-- T070: 10s window 210 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:35:00' AND '2024-01-01 00:35:10' GROUP BY id1, id2; + +-- T071: 10s window 213 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:35:30' AND '2024-01-01 00:35:40' GROUP BY id1, id2; + +-- T072: 10s window 216 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:36:00' AND '2024-01-01 00:36:10' GROUP BY id1, id2; + +-- T073: 10s window 219 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:36:30' AND '2024-01-01 00:36:40' GROUP BY id1, id2; + +-- T074: 10s window 222 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:37:00' AND '2024-01-01 00:37:10' GROUP BY id1, id2; + +-- T075: 10s window 225 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:37:30' AND '2024-01-01 00:37:40' GROUP BY id1, id2; + +-- T076: 10s window 228 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:38:00' AND '2024-01-01 00:38:10' GROUP BY id1, id2; + +-- T077: 10s window 231 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:38:30' AND '2024-01-01 00:38:40' GROUP BY id1, id2; + +-- T078: 10s window 234 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:39:00' AND '2024-01-01 00:39:10' GROUP BY id1, id2; + +-- T079: 10s window 237 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:39:30' AND '2024-01-01 00:39:40' GROUP BY id1, id2; + +-- T080: 10s window 240 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:40:00' AND '2024-01-01 00:40:10' GROUP BY id1, id2; + +-- T081: 10s window 243 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:40:30' AND '2024-01-01 00:40:40' GROUP BY id1, id2; + +-- T082: 10s window 246 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:41:00' AND '2024-01-01 00:41:10' GROUP BY id1, id2; + +-- T083: 10s window 249 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:41:30' AND '2024-01-01 00:41:40' GROUP BY id1, id2; + +-- T084: 10s window 252 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:42:00' AND '2024-01-01 00:42:10' GROUP BY id1, id2; + +-- T085: 10s window 255 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:42:30' AND '2024-01-01 00:42:40' GROUP BY id1, id2; + +-- T086: 10s window 258 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:43:00' AND '2024-01-01 00:43:10' GROUP BY id1, id2; + +-- T087: 10s window 261 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:43:30' AND '2024-01-01 00:43:40' GROUP BY id1, id2; + +-- T088: 10s window 264 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:44:00' AND '2024-01-01 00:44:10' GROUP BY id1, id2; + +-- T089: 10s window 267 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:44:30' AND '2024-01-01 00:44:40' GROUP BY id1, id2; + +-- T090: 10s window 270 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:45:00' AND '2024-01-01 00:45:10' GROUP BY id1, id2; + +-- T091: 10s window 273 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:45:30' AND '2024-01-01 00:45:40' GROUP BY id1, id2; + +-- T092: 10s window 276 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:46:00' AND '2024-01-01 00:46:10' GROUP BY id1, id2; + +-- T093: 10s window 279 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:46:30' AND '2024-01-01 00:46:40' GROUP BY id1, id2; + +-- T094: 10s window 282 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:47:00' AND '2024-01-01 00:47:10' GROUP BY id1, id2; + +-- T095: 10s window 285 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:47:30' AND '2024-01-01 00:47:40' GROUP BY id1, id2; + +-- T096: 10s window 288 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:48:00' AND '2024-01-01 00:48:10' GROUP BY id1, id2; + +-- T097: 10s window 291 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:48:30' AND '2024-01-01 00:48:40' GROUP BY id1, id2; + +-- T098: 10s window 294 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:49:00' AND '2024-01-01 00:49:10' GROUP BY id1, id2; + +-- T099: 10s window 297 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:49:30' AND '2024-01-01 00:49:40' GROUP BY id1, id2; + +-- T100: 10s window 300 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:50:00' AND '2024-01-01 00:50:10' GROUP BY id1, id2; + +-- T101: 10s window 303 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:50:30' AND '2024-01-01 00:50:40' GROUP BY id1, id2; + +-- T102: 10s window 306 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:51:00' AND '2024-01-01 00:51:10' GROUP BY id1, id2; + +-- T103: 10s window 309 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:51:30' AND '2024-01-01 00:51:40' GROUP BY id1, id2; + +-- T104: 10s window 312 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:52:00' AND '2024-01-01 00:52:10' GROUP BY id1, id2; + +-- T105: 10s window 315 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:52:30' AND '2024-01-01 00:52:40' GROUP BY id1, id2; + +-- T106: 10s window 318 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:53:00' AND '2024-01-01 00:53:10' GROUP BY id1, id2; + +-- T107: 10s window 321 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:53:30' AND '2024-01-01 00:53:40' GROUP BY id1, id2; + +-- T108: 10s window 324 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:54:00' AND '2024-01-01 00:54:10' GROUP BY id1, id2; + +-- T109: 10s window 327 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:54:30' AND '2024-01-01 00:54:40' GROUP BY id1, id2; + +-- T110: 10s window 330 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:55:00' AND '2024-01-01 00:55:10' GROUP BY id1, id2; + +-- T111: 10s window 333 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:55:30' AND '2024-01-01 00:55:40' GROUP BY id1, id2; + +-- T112: 10s window 336 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:56:00' AND '2024-01-01 00:56:10' GROUP BY id1, id2; + +-- T113: 10s window 339 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:56:30' AND '2024-01-01 00:56:40' GROUP BY id1, id2; + +-- T114: 10s window 342 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:57:00' AND '2024-01-01 00:57:10' GROUP BY id1, id2; + +-- T115: 10s window 345 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:57:30' AND '2024-01-01 00:57:40' GROUP BY id1, id2; + +-- T116: 10s window 348 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:58:00' AND '2024-01-01 00:58:10' GROUP BY id1, id2; + +-- T117: 10s window 351 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:58:30' AND '2024-01-01 00:58:40' GROUP BY id1, id2; + +-- T118: 10s window 354 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:59:00' AND '2024-01-01 00:59:10' GROUP BY id1, id2; + +-- T119: 10s window 357 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 00:59:30' AND '2024-01-01 00:59:40' GROUP BY id1, id2; + +-- T120: 10s window 360 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:00:00' AND '2024-01-01 01:00:10' GROUP BY id1, id2; + +-- T121: 10s window 363 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:00:30' AND '2024-01-01 01:00:40' GROUP BY id1, id2; + +-- T122: 10s window 366 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:01:00' AND '2024-01-01 01:01:10' GROUP BY id1, id2; + +-- T123: 10s window 369 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:01:30' AND '2024-01-01 01:01:40' GROUP BY id1, id2; + +-- T124: 10s window 372 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:02:00' AND '2024-01-01 01:02:10' GROUP BY id1, id2; + +-- T125: 10s window 375 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:02:30' AND '2024-01-01 01:02:40' GROUP BY id1, id2; + +-- T126: 10s window 378 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:03:00' AND '2024-01-01 01:03:10' GROUP BY id1, id2; + +-- T127: 10s window 381 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:03:30' AND '2024-01-01 01:03:40' GROUP BY id1, id2; + +-- T128: 10s window 384 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:04:00' AND '2024-01-01 01:04:10' GROUP BY id1, id2; + +-- T129: 10s window 387 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:04:30' AND '2024-01-01 01:04:40' GROUP BY id1, id2; + +-- T130: 10s window 390 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:05:00' AND '2024-01-01 01:05:10' GROUP BY id1, id2; + +-- T131: 10s window 393 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:05:30' AND '2024-01-01 01:05:40' GROUP BY id1, id2; + +-- T132: 10s window 396 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:06:00' AND '2024-01-01 01:06:10' GROUP BY id1, id2; + +-- T133: 10s window 399 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:06:30' AND '2024-01-01 01:06:40' GROUP BY id1, id2; + +-- T134: 10s window 402 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:07:00' AND '2024-01-01 01:07:10' GROUP BY id1, id2; + +-- T135: 10s window 405 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:07:30' AND '2024-01-01 01:07:40' GROUP BY id1, id2; + +-- T136: 10s window 408 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:08:00' AND '2024-01-01 01:08:10' GROUP BY id1, id2; + +-- T137: 10s window 411 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:08:30' AND '2024-01-01 01:08:40' GROUP BY id1, id2; + +-- T138: 10s window 414 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:09:00' AND '2024-01-01 01:09:10' GROUP BY id1, id2; + +-- T139: 10s window 417 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:09:30' AND '2024-01-01 01:09:40' GROUP BY id1, id2; + +-- T140: 10s window 420 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:10:00' AND '2024-01-01 01:10:10' GROUP BY id1, id2; + +-- T141: 10s window 423 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:10:30' AND '2024-01-01 01:10:40' GROUP BY id1, id2; + +-- T142: 10s window 426 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:11:00' AND '2024-01-01 01:11:10' GROUP BY id1, id2; + +-- T143: 10s window 429 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:11:30' AND '2024-01-01 01:11:40' GROUP BY id1, id2; + +-- T144: 10s window 432 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:12:00' AND '2024-01-01 01:12:10' GROUP BY id1, id2; + +-- T145: 10s window 435 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:12:30' AND '2024-01-01 01:12:40' GROUP BY id1, id2; + +-- T146: 10s window 438 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:13:00' AND '2024-01-01 01:13:10' GROUP BY id1, id2; + +-- T147: 10s window 441 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:13:30' AND '2024-01-01 01:13:40' GROUP BY id1, id2; + +-- T148: 10s window 444 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:14:00' AND '2024-01-01 01:14:10' GROUP BY id1, id2; + +-- T149: 10s window 447 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:14:30' AND '2024-01-01 01:14:40' GROUP BY id1, id2; + +-- T150: 10s window 450 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:15:00' AND '2024-01-01 01:15:10' GROUP BY id1, id2; + +-- T151: 10s window 453 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:15:30' AND '2024-01-01 01:15:40' GROUP BY id1, id2; + +-- T152: 10s window 456 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:16:00' AND '2024-01-01 01:16:10' GROUP BY id1, id2; + +-- T153: 10s window 459 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:16:30' AND '2024-01-01 01:16:40' GROUP BY id1, id2; + +-- T154: 10s window 462 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:17:00' AND '2024-01-01 01:17:10' GROUP BY id1, id2; + +-- T155: 10s window 465 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:17:30' AND '2024-01-01 01:17:40' GROUP BY id1, id2; + +-- T156: 10s window 468 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:18:00' AND '2024-01-01 01:18:10' GROUP BY id1, id2; + +-- T157: 10s window 471 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:18:30' AND '2024-01-01 01:18:40' GROUP BY id1, id2; + +-- T158: 10s window 474 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:19:00' AND '2024-01-01 01:19:10' GROUP BY id1, id2; + +-- T159: 10s window 477 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:19:30' AND '2024-01-01 01:19:40' GROUP BY id1, id2; + +-- T160: 10s window 480 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:20:00' AND '2024-01-01 01:20:10' GROUP BY id1, id2; + +-- T161: 10s window 483 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:20:30' AND '2024-01-01 01:20:40' GROUP BY id1, id2; + +-- T162: 10s window 486 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:21:00' AND '2024-01-01 01:21:10' GROUP BY id1, id2; + +-- T163: 10s window 489 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:21:30' AND '2024-01-01 01:21:40' GROUP BY id1, id2; + +-- T164: 10s window 492 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:22:00' AND '2024-01-01 01:22:10' GROUP BY id1, id2; + +-- T165: 10s window 495 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:22:30' AND '2024-01-01 01:22:40' GROUP BY id1, id2; + +-- T166: 10s window 498 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:23:00' AND '2024-01-01 01:23:10' GROUP BY id1, id2; + +-- T167: 10s window 501 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:23:30' AND '2024-01-01 01:23:40' GROUP BY id1, id2; + +-- T168: 10s window 504 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:24:00' AND '2024-01-01 01:24:10' GROUP BY id1, id2; + +-- T169: 10s window 507 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:24:30' AND '2024-01-01 01:24:40' GROUP BY id1, id2; + +-- T170: 10s window 510 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:25:00' AND '2024-01-01 01:25:10' GROUP BY id1, id2; + +-- T171: 10s window 513 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:25:30' AND '2024-01-01 01:25:40' GROUP BY id1, id2; + +-- T172: 10s window 516 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:26:00' AND '2024-01-01 01:26:10' GROUP BY id1, id2; + +-- T173: 10s window 519 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:26:30' AND '2024-01-01 01:26:40' GROUP BY id1, id2; + +-- T174: 10s window 522 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:27:00' AND '2024-01-01 01:27:10' GROUP BY id1, id2; + +-- T175: 10s window 525 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:27:30' AND '2024-01-01 01:27:40' GROUP BY id1, id2; + +-- T176: 10s window 528 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:28:00' AND '2024-01-01 01:28:10' GROUP BY id1, id2; + +-- T177: 10s window 531 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:28:30' AND '2024-01-01 01:28:40' GROUP BY id1, id2; + +-- T178: 10s window 534 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:29:00' AND '2024-01-01 01:29:10' GROUP BY id1, id2; + +-- T179: 10s window 537 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:29:30' AND '2024-01-01 01:29:40' GROUP BY id1, id2; + +-- T180: 10s window 540 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:30:00' AND '2024-01-01 01:30:10' GROUP BY id1, id2; + +-- T181: 10s window 543 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:30:30' AND '2024-01-01 01:30:40' GROUP BY id1, id2; + +-- T182: 10s window 546 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:31:00' AND '2024-01-01 01:31:10' GROUP BY id1, id2; + +-- T183: 10s window 549 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:31:30' AND '2024-01-01 01:31:40' GROUP BY id1, id2; + +-- T184: 10s window 552 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:32:00' AND '2024-01-01 01:32:10' GROUP BY id1, id2; + +-- T185: 10s window 555 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:32:30' AND '2024-01-01 01:32:40' GROUP BY id1, id2; + +-- T186: 10s window 558 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:33:00' AND '2024-01-01 01:33:10' GROUP BY id1, id2; + +-- T187: 10s window 561 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:33:30' AND '2024-01-01 01:33:40' GROUP BY id1, id2; + +-- T188: 10s window 564 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:34:00' AND '2024-01-01 01:34:10' GROUP BY id1, id2; + +-- T189: 10s window 567 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:34:30' AND '2024-01-01 01:34:40' GROUP BY id1, id2; + +-- T190: 10s window 570 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:35:00' AND '2024-01-01 01:35:10' GROUP BY id1, id2; + +-- T191: 10s window 573 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:35:30' AND '2024-01-01 01:35:40' GROUP BY id1, id2; + +-- T192: 10s window 576 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:36:00' AND '2024-01-01 01:36:10' GROUP BY id1, id2; + +-- T193: 10s window 579 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:36:30' AND '2024-01-01 01:36:40' GROUP BY id1, id2; + +-- T194: 10s window 582 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:37:00' AND '2024-01-01 01:37:10' GROUP BY id1, id2; + +-- T195: 10s window 585 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:37:30' AND '2024-01-01 01:37:40' GROUP BY id1, id2; + +-- T196: 10s window 588 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:38:00' AND '2024-01-01 01:38:10' GROUP BY id1, id2; + +-- T197: 10s window 591 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:38:30' AND '2024-01-01 01:38:40' GROUP BY id1, id2; + +-- T198: 10s window 594 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:39:00' AND '2024-01-01 01:39:10' GROUP BY id1, id2; + +-- T199: 10s window 597 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:39:30' AND '2024-01-01 01:39:40' GROUP BY id1, id2; + +-- T200: 10s window 600 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:40:00' AND '2024-01-01 01:40:10' GROUP BY id1, id2; + +-- T201: 10s window 603 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:40:30' AND '2024-01-01 01:40:40' GROUP BY id1, id2; + +-- T202: 10s window 606 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:41:00' AND '2024-01-01 01:41:10' GROUP BY id1, id2; + +-- T203: 10s window 609 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:41:30' AND '2024-01-01 01:41:40' GROUP BY id1, id2; + +-- T204: 10s window 612 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:42:00' AND '2024-01-01 01:42:10' GROUP BY id1, id2; + +-- T205: 10s window 615 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:42:30' AND '2024-01-01 01:42:40' GROUP BY id1, id2; + +-- T206: 10s window 618 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:43:00' AND '2024-01-01 01:43:10' GROUP BY id1, id2; + +-- T207: 10s window 621 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:43:30' AND '2024-01-01 01:43:40' GROUP BY id1, id2; + +-- T208: 10s window 624 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:44:00' AND '2024-01-01 01:44:10' GROUP BY id1, id2; + +-- T209: 10s window 627 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:44:30' AND '2024-01-01 01:44:40' GROUP BY id1, id2; + +-- T210: 10s window 630 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:45:00' AND '2024-01-01 01:45:10' GROUP BY id1, id2; + +-- T211: 10s window 633 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:45:30' AND '2024-01-01 01:45:40' GROUP BY id1, id2; + +-- T212: 10s window 636 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:46:00' AND '2024-01-01 01:46:10' GROUP BY id1, id2; + +-- T213: 10s window 639 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:46:30' AND '2024-01-01 01:46:40' GROUP BY id1, id2; + +-- T214: 10s window 642 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:47:00' AND '2024-01-01 01:47:10' GROUP BY id1, id2; + +-- T215: 10s window 645 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:47:30' AND '2024-01-01 01:47:40' GROUP BY id1, id2; + +-- T216: 10s window 648 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:48:00' AND '2024-01-01 01:48:10' GROUP BY id1, id2; + +-- T217: 10s window 651 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:48:30' AND '2024-01-01 01:48:40' GROUP BY id1, id2; + +-- T218: 10s window 654 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:49:00' AND '2024-01-01 01:49:10' GROUP BY id1, id2; + +-- T219: 10s window 657 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:49:30' AND '2024-01-01 01:49:40' GROUP BY id1, id2; + +-- T220: 10s window 660 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:50:00' AND '2024-01-01 01:50:10' GROUP BY id1, id2; + +-- T221: 10s window 663 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:50:30' AND '2024-01-01 01:50:40' GROUP BY id1, id2; + +-- T222: 10s window 666 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:51:00' AND '2024-01-01 01:51:10' GROUP BY id1, id2; + +-- T223: 10s window 669 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:51:30' AND '2024-01-01 01:51:40' GROUP BY id1, id2; + +-- T224: 10s window 672 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:52:00' AND '2024-01-01 01:52:10' GROUP BY id1, id2; + +-- T225: 10s window 675 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:52:30' AND '2024-01-01 01:52:40' GROUP BY id1, id2; + +-- T226: 10s window 678 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:53:00' AND '2024-01-01 01:53:10' GROUP BY id1, id2; + +-- T227: 10s window 681 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:53:30' AND '2024-01-01 01:53:40' GROUP BY id1, id2; + +-- T228: 10s window 684 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:54:00' AND '2024-01-01 01:54:10' GROUP BY id1, id2; + +-- T229: 10s window 687 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:54:30' AND '2024-01-01 01:54:40' GROUP BY id1, id2; + +-- T230: 10s window 690 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:55:00' AND '2024-01-01 01:55:10' GROUP BY id1, id2; + +-- T231: 10s window 693 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:55:30' AND '2024-01-01 01:55:40' GROUP BY id1, id2; + +-- T232: 10s window 696 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:56:00' AND '2024-01-01 01:56:10' GROUP BY id1, id2; + +-- T233: 10s window 699 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:56:30' AND '2024-01-01 01:56:40' GROUP BY id1, id2; + +-- T234: 10s window 702 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:57:00' AND '2024-01-01 01:57:10' GROUP BY id1, id2; + +-- T235: 10s window 705 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:57:30' AND '2024-01-01 01:57:40' GROUP BY id1, id2; + +-- T236: 10s window 708 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:58:00' AND '2024-01-01 01:58:10' GROUP BY id1, id2; + +-- T237: 10s window 711 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:58:30' AND '2024-01-01 01:58:40' GROUP BY id1, id2; + +-- T238: 10s window 714 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:59:00' AND '2024-01-01 01:59:10' GROUP BY id1, id2; + +-- T239: 10s window 717 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 01:59:30' AND '2024-01-01 01:59:40' GROUP BY id1, id2; + +-- T240: 10s window 720 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:00:00' AND '2024-01-01 02:00:10' GROUP BY id1, id2; + +-- T241: 10s window 723 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:00:30' AND '2024-01-01 02:00:40' GROUP BY id1, id2; + +-- T242: 10s window 726 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:01:00' AND '2024-01-01 02:01:10' GROUP BY id1, id2; + +-- T243: 10s window 729 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:01:30' AND '2024-01-01 02:01:40' GROUP BY id1, id2; + +-- T244: 10s window 732 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:02:00' AND '2024-01-01 02:02:10' GROUP BY id1, id2; + +-- T245: 10s window 735 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:02:30' AND '2024-01-01 02:02:40' GROUP BY id1, id2; + +-- T246: 10s window 738 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:03:00' AND '2024-01-01 02:03:10' GROUP BY id1, id2; + +-- T247: 10s window 741 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:03:30' AND '2024-01-01 02:03:40' GROUP BY id1, id2; + +-- T248: 10s window 744 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:04:00' AND '2024-01-01 02:04:10' GROUP BY id1, id2; + +-- T249: 10s window 747 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:04:30' AND '2024-01-01 02:04:40' GROUP BY id1, id2; + +-- T250: 10s window 750 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:05:00' AND '2024-01-01 02:05:10' GROUP BY id1, id2; + +-- T251: 10s window 753 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:05:30' AND '2024-01-01 02:05:40' GROUP BY id1, id2; + +-- T252: 10s window 756 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:06:00' AND '2024-01-01 02:06:10' GROUP BY id1, id2; + +-- T253: 10s window 759 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:06:30' AND '2024-01-01 02:06:40' GROUP BY id1, id2; + +-- T254: 10s window 762 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:07:00' AND '2024-01-01 02:07:10' GROUP BY id1, id2; + +-- T255: 10s window 765 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:07:30' AND '2024-01-01 02:07:40' GROUP BY id1, id2; + +-- T256: 10s window 768 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:08:00' AND '2024-01-01 02:08:10' GROUP BY id1, id2; + +-- T257: 10s window 771 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:08:30' AND '2024-01-01 02:08:40' GROUP BY id1, id2; + +-- T258: 10s window 774 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:09:00' AND '2024-01-01 02:09:10' GROUP BY id1, id2; + +-- T259: 10s window 777 +SELECT quantile(0.95)(v1) FROM h2o_groupby WHERE timestamp BETWEEN '2024-01-01 02:09:30' AND '2024-01-01 02:09:40' GROUP BY id1, id2; diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/h2o_init.sql b/asap-tools/execution-utilities/asap_benchmark_pipeline/h2o_init.sql index 2f62d03..ef65283 100644 --- a/asap-tools/execution-utilities/asap_benchmark_pipeline/h2o_init.sql +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/h2o_init.sql @@ -1,24 +1,4 @@ -DROP VIEW IF EXISTS h2o_groupby_mv; DROP TABLE IF EXISTS h2o_groupby; -DROP TABLE IF EXISTS h2o_groupby_queue; - -CREATE TABLE IF NOT EXISTS h2o_groupby_queue -( - timestamp String, - id1 String, - id2 String, - id3 String, - id4 Int32, - id5 Int32, - id6 Int32, - v1 Int32, - v2 Int32, - v3 Float64 -) ENGINE = Kafka -SETTINGS kafka_broker_list = 'localhost:9092', - kafka_topic_list = 'h2o_groupby', - kafka_group_name = 'clickhouse_h2o', - kafka_format = 'JSONEachRow'; CREATE TABLE IF NOT EXISTS h2o_groupby ( @@ -32,9 +12,5 @@ CREATE TABLE IF NOT EXISTS h2o_groupby v1 Int32, v2 Int32, v3 Float64 -) ENGINE = MergeTree -ORDER BY (id1, id2, id3, id4); - -CREATE MATERIALIZED VIEW IF NOT EXISTS h2o_groupby_mv TO h2o_groupby AS -SELECT parseDateTimeBestEffort(timestamp) AS timestamp, id1, id2, id3, id4, id5, id6, v1, v2, v3 -FROM h2o_groupby_queue; \ No newline at end of file +) ENGINE = MergeTree() +ORDER BY (id1, id2); diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/inference_config.yaml b/asap-tools/execution-utilities/asap_benchmark_pipeline/inference_config.yaml index b0f3dc2..4b3db12 100644 --- a/asap-tools/execution-utilities/asap_benchmark_pipeline/inference_config.yaml +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/inference_config.yaml @@ -1,7 +1,7 @@ tables: - name: h2o_groupby time_column: timestamp - metadata_columns: [id1, id2, id3] + metadata_columns: [id1, id2] value_columns: [v1] cleanup_policy: @@ -13,5 +13,5 @@ queries: read_count_threshold: 999999 query: |- SELECT QUANTILE(0.95, v1) FROM h2o_groupby - WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() - GROUP BY id1, id2, id3; \ No newline at end of file + WHERE timestamp BETWEEN DATEADD(s, -10, NOW()) AND NOW() + GROUP BY id1, id2; diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/plot_latency.py b/asap-tools/execution-utilities/asap_benchmark_pipeline/plot_latency.py new file mode 100755 index 0000000..1188ab2 --- /dev/null +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/plot_latency.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +"""Plot ASAP vs baseline latency comparison for H2O benchmark.""" + +import csv +import matplotlib.pyplot as plt +import numpy as np +from pathlib import Path + +HERE = Path(__file__).parent + + +def load(path): + rows = {} + with open(path) as f: + for row in csv.DictReader(f): + if not row["error"]: + rows[row["query_id"]] = float(row["latency_ms"]) + return rows + + +asap = load(HERE / "asap_results.csv") +base = load(HERE / "baseline_results.csv") + +qids = sorted(set(asap) & set(base)) +x = np.arange(len(qids)) +a_vals = [asap[q] for q in qids] +b_vals = [base[q] for q in qids] +speedup = [b / a if a > 0 else 0 for a, b in zip(a_vals, b_vals)] + +fig, (ax1, ax2) = plt.subplots( + 2, 1, figsize=(14, 7), gridspec_kw={"height_ratios": [3, 1]} +) + +# --- Top: per-query latency --- +w = 0.4 +ax1.bar(x - w / 2, b_vals, w, label="ClickHouse baseline", color="#f4a460") +ax1.bar(x + w / 2, a_vals, w, label="ASAP (KLL sketch)", color="#4682b4") +ax1.set_xticks(x) +ax1.set_xticklabels(qids, rotation=90, fontsize=7) +ax1.set_ylabel("Latency (ms)") +ax1.set_title( + "H2O groupby: ASAP vs ClickHouse baseline " + f"(p50: {np.median(a_vals):.1f}ms vs {np.median(b_vals):.1f}ms)" +) +ax1.legend() +ax1.set_xlim(-0.6, len(qids) - 0.4) + +# --- Bottom: speedup --- +ax2.bar(x, speedup, color="#2e8b57", width=0.7) +ax2.axhline( + np.mean(speedup), + color="red", + linewidth=1, + linestyle="--", + label=f"mean {np.mean(speedup):.1f}x", +) +ax2.set_xticks(x) +ax2.set_xticklabels(qids, rotation=90, fontsize=7) +ax2.set_ylabel("Speedup (x)") +ax2.legend(fontsize=8) +ax2.set_xlim(-0.6, len(qids) - 0.4) + +plt.tight_layout() +out = HERE / "latency_comparison.png" +plt.savefig(out, dpi=150) +print(f"Saved {out}") diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/run_benchmark.py b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_benchmark.py old mode 100644 new mode 100755 index d9d0c74..8ed3c5b --- a/asap-tools/execution-utilities/asap_benchmark_pipeline/run_benchmark.py +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_benchmark.py @@ -1,161 +1,277 @@ -from typing import Tuple, List, Optional -from pathlib import Path -from datetime import datetime, timedelta -import re +#!/usr/bin/env python3 +""" +Benchmark ASAP vs ClickHouse baseline on the H2O groupby dataset. +Outputs CSV with query ID, latency, and results. +""" + import argparse +import csv +import json import os -import gdown -import requests -import urllib.parse +import re import time -import csv +import urllib.parse +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import List, Optional, Tuple + +import gdown import matplotlib.pyplot as plt +import requests +from kafka import KafkaProducer + + +# --------------------------------------------------------------------------- +# Query extraction +# --------------------------------------------------------------------------- def extract_queries_from_sql(sql_file: Path) -> List[Tuple[str, str]]: - """Extract query ID and SQL from asap_h2o_queries.sql""" - queries = [] - with open(sql_file, "r") as f: + """Extract (query_id, sql) pairs from an annotated SQL file.""" + with open(sql_file) as f: content = f.read() - pattern = r"-- ([A-Za-z0-9_]+):[^\n]*\n(SELECT[^;]+;)" - matches = re.findall(pattern, content, re.DOTALL | re.IGNORECASE) + return [(qid, sql.strip()) for qid, sql in re.findall(pattern, content, re.DOTALL | re.IGNORECASE)] - for query_id, sql in matches: - sql = sql.strip() - queries.append((query_id, sql)) - return queries +# --------------------------------------------------------------------------- +# Data loading — ClickHouse direct +# --------------------------------------------------------------------------- -def data_loaded(clickhouse_url: str): - try: - response = requests.post(clickhouse_url, data="SELECT count(*) FROM h2o_groupby") - if response.status_code != 200: - return False - count = int(response.text.strip()) - if count > 0: - print(f"✓ Data already loaded ({count:,} rows)") - return count > 0 - except: - return False - -def stream_csv_with_timestamps(filename: str): - """ - Generator that reads the CSV and prepends a timestamp column. - Starts at 1971-01-01 00:00:00 and increments by 10s every 100 rows. - """ - start_time = datetime(1971, 1, 1, 0, 0, 0) - - with open(filename, 'r', encoding='utf-8') as f: - header = f.readline().strip() - yield f"timestamp,{header}\n".encode('utf-8') - - chunk = bytearray() - row_count = 0 - ts_bytes = b"" - - for line in f: - if row_count % 100 == 0: - delta_seconds = (row_count // 100) * 10 - current_time = start_time + timedelta(seconds=delta_seconds) - ts_str = current_time.strftime('%Y-%m-%dT%H:%M:%SZ') + ',' - ts_bytes = ts_str.encode('utf-8') - - chunk.extend(ts_bytes) - chunk.extend(line.encode('utf-8')) - row_count += 1 - - if len(chunk) > 65536: - yield bytes(chunk) - chunk = bytearray() - - if chunk: - yield bytes(chunk) - -def load_h2o_data(clickhouse_url: str, mode: str): - # 1. SETUP TABLES - try: - with open("h2o_init.sql", 'r') as f: - file_content = f.read() - except FileNotFoundError: - print("✗ Error: h2o_init.sql not found.") - return False +def load_h2o_data_clickhouse(clickhouse_url: str, skip_table_init: bool = False, max_rows: int = 0): + """Load H2O CSV into ClickHouse MergeTree (baseline path).""" - statements = [s.strip() for s in file_content.split(';') if s.strip()] - print(f"Executing {len(statements)} setup statements...") + if not skip_table_init: + print("Initializing ClickHouse tables...") + with open(Path(__file__).parent / "h2o_init.sql") as f: + stmts = [s.strip() for s in f.read().split(";") if s.strip()] + for sql in stmts: + r = requests.post(clickhouse_url, data=sql) + if not r.ok: + print(f" WARN: {r.text.strip()[:120]}") + else: + print(f" OK: {sql[:60]}") - try: - for sql in statements: - response = requests.post(clickhouse_url, data=sql) - response.raise_for_status() - except Exception as e: - print(f"Error executing statement: {e}") - return False - - print("✓ Created h2o_groupby tables and views") - - if data_loaded(clickhouse_url): + # Check if already loaded + r = requests.post(clickhouse_url, data="SELECT count(*) FROM h2o_groupby") + count = int(r.text.strip()) + if count > 0: + print(f"Data already loaded ({count:,} rows)") return True - # 2. DOWNLOAD DATA + csv_path = _download_h2o_csv() + + print("Inserting data into ClickHouse...") + data_ts = datetime(2024, 1, 1, tzinfo=timezone.utc) + + batch_size = 50_000 + batch = [] + total = 0 + + with open(csv_path, "r", encoding="utf-8") as f: + f.readline() # skip header + for i, line in enumerate(f): + if max_rows > 0 and i >= max_rows: + break + parts = line.rstrip("\n").split(",") + abs_sec = 1704067200 + i // 1000 + ms = i % 1000 + ts = datetime.fromtimestamp(abs_sec, tz=timezone.utc) + ts_str = ts.strftime("%Y-%m-%d %H:%M:%S") + + batch.append( + f"('{ts_str}','{parts[0]}','{parts[1]}','{parts[2]}'," + f"{parts[3]},{parts[4]},{parts[5]}," + f"{parts[6]},{parts[7]},{parts[8]})" + ) + + if len(batch) >= batch_size: + _flush_batch(clickhouse_url, batch) + total += len(batch) + batch = [] + if total % 500_000 == 0: + print(f" Inserted {total:,} rows...") + + if batch: + _flush_batch(clickhouse_url, batch) + total += len(batch) + + print(f"Loaded {total:,} rows into ClickHouse") + return True + + +def _flush_batch(clickhouse_url: str, rows: list): + sql = "INSERT INTO h2o_groupby VALUES " + ",".join(rows) + r = requests.post(clickhouse_url, data=sql) + if not r.ok: + raise RuntimeError(f"ClickHouse insert failed: {r.text[:200]}") + + +# --------------------------------------------------------------------------- +# Data loading — Kafka (for Arroyo sketch pipeline) +# --------------------------------------------------------------------------- + +def produce_h2o_to_kafka(topic: str = "h2o_groupby", max_rows: int = 0): + """Stream H2O CSV rows into Kafka with fixed 2024-01-01 message timestamps.""" + csv_path = _download_h2o_csv() + + data_ts = 1704067200 # 2024-01-01T00:00:00Z + ms_suffixes = [f".{ms:03d}Z" for ms in range(1000)] + sec_prefix_cache: dict = {} + + producer = KafkaProducer( + bootstrap_servers="localhost:9092", + linger_ms=10, + batch_size=1048576, + ) + + with open(csv_path, "r", encoding="utf-8") as f: + f.readline() # skip header + for i, line in enumerate(f): + if max_rows > 0 and i >= max_rows: + break + parts = line.rstrip("\n").split(",") + abs_sec = data_ts + i // 1000 + ms = i % 1000 + if abs_sec not in sec_prefix_cache: + sec_prefix_cache[abs_sec] = datetime.fromtimestamp( + abs_sec, tz=timezone.utc + ).strftime("%Y-%m-%dT%H:%M:%S") + ts_str = sec_prefix_cache[abs_sec] + ms_suffixes[ms] + + msg = ( + f'{{"timestamp":"{ts_str}",' + f'"id1":"{parts[0]}","id2":"{parts[1]}","id3":"{parts[2]}",' + f'"id4":{parts[3]},"id5":{parts[4]},"id6":{parts[5]},' + f'"v1":{parts[6]},"v2":{parts[7]},"v3":{parts[8]}}}' + ).encode("utf-8") + + producer.send(topic, value=msg, timestamp_ms=data_ts * 1000 + i) + + if i % 500_000 == 499_999: + print(f" Produced {i + 1:,} rows...") + + producer.flush() + producer.close() + print(f"Produced {i + 1:,} rows to Kafka topic '{topic}'") + + +def _download_h2o_csv() -> str: FILE_ID = "15SVQjQ2QehzYDLoDonio4aP7xqdMiNyi" - FILENAME = "G1_1e7_1e2_0_0.csv" - + FILENAME = str(Path(__file__).parent / "G1_1e7_1e2_0_0.csv") if os.path.exists(FILENAME) and os.path.getsize(FILENAME) > 100 * 1024 * 1024: - print(f"File {FILENAME} already exists. Skipping download.") - else: - print(f"Downloading H2O dataset (ID: {FILE_ID}) using gdown...") - url = f"https://drive.google.com/uc?id={FILE_ID}" - gdown.download(url, FILENAME, quiet=False) - - # 3. INSERT DATA VIA HTTP - if mode == "asap": - print("Publishing data to Kafka via ClickHouse HTTP (ASAP mode)...") - insert_query = "INSERT INTO h2o_groupby_queue FORMAT CSVWithNames" - else: - print("Inserting data directly into ClickHouse MergeTree (Baseline mode)...") - insert_query = "INSERT INTO h2o_groupby FORMAT CSVWithNames" - - url = f"{clickhouse_url.rstrip('/')}/" - params = {"query": insert_query} - - try: - response = requests.post(url, params=params, data=stream_csv_with_timestamps(FILENAME)) - if response.status_code != 200: - print(f"✗ Error loading data: {response.text}") - return False - except Exception as e: - print(f"✗ Exception during data load: {e}") - return False - - if mode == "asap": - print("Waiting for materialized view to consume all rows from Kafka...") - prev_count = -1 - stable_rounds = 0 - while stable_rounds < 3: - time.sleep(5) - response = requests.post(clickhouse_url, data="SELECT count(*) FROM h2o_groupby") - count = int(response.text.strip()) - print(f" h2o_groupby row count: {count:,}") - if count == prev_count: - stable_rounds += 1 - else: - stable_rounds = 0 - prev_count = count - else: - response = requests.post(clickhouse_url, data="SELECT count(*) FROM h2o_groupby") - count = int(response.text.strip()) + print(f"File {FILENAME} already exists, skipping download.") + return FILENAME + print(f"Downloading H2O dataset via gdown...") + url = f"https://drive.google.com/uc?id={FILE_ID}" + gdown.download(url, FILENAME, quiet=False) + return FILENAME + + +# --------------------------------------------------------------------------- +# Pipeline latency measurement +# --------------------------------------------------------------------------- + +def measure_pipeline_latency( + kafka_topic: str = "h2o_groupby", + asap_url: str = "http://localhost:8088/clickhouse/query", + num_trials: int = 3, +) -> float: + """Measure end-to-end pipeline latency: data into Kafka → query result from QE. + + Sends a fresh 10-second window of data, then polls the QE until the query + for that window returns results. Returns the median latency across trials. + """ + from kafka import KafkaProducer - print(f"✓ Loaded {count:,} rows") + producer = KafkaProducer( + bootstrap_servers="localhost:9092", + linger_ms=0, + batch_size=16384, + ) + session = requests.Session() - return True + # Use a timestamp far enough in the future to avoid collision with test data + base_epoch = 1704200000 # 2024-01-02T12:53:20Z + + latencies = [] + for trial in range(num_trials): + window_start = base_epoch + trial * 100 # space trials apart + window_end = window_start + 10 + query_ts = datetime.fromtimestamp(window_end, tz=timezone.utc).strftime( + "%Y-%m-%d %H:%M:%S" + ) + + # Build 10,000 rows (100 id1 × 100 id2) for this 10-second window + rows = [] + for sec_offset in range(10): + ts = window_start + sec_offset + ts_str = datetime.fromtimestamp(ts, tz=timezone.utc).strftime( + "%Y-%m-%dT%H:%M:%SZ" + ) + for i1 in range(1, 101): + id1 = f"id{i1:03d}" + id2 = f"id{(i1 * 7 + sec_offset) % 100 + 1:03d}" + msg = ( + f'{{"timestamp":"{ts_str}",' + f'"id1":"{id1}","id2":"{id2}","id3":"x",' + f'"id4":1,"id5":1,"id6":1,' + f'"v1":{i1 % 5 + 1},"v2":1,"v3":1.0}}' + ).encode() + rows.append((msg, ts * 1000)) + + # Send all rows and record send-complete time + for msg, ts_ms in rows: + producer.send(kafka_topic, value=msg, timestamp_ms=ts_ms) + producer.flush() + send_done = time.time() + print(f" Trial {trial+1}: sent {len(rows)} rows for window ending {query_ts}") + + # Poll QE until query for this window succeeds + query = ( + f"SELECT QUANTILE(0.95, v1) FROM h2o_groupby " + f"WHERE timestamp BETWEEN DATEADD(s, -10, '{query_ts}') AND '{query_ts}' " + f"GROUP BY id1, id2" + ) + encoded = urllib.parse.quote(query) + url = f"{asap_url}?query={encoded}" + + timeout = 120 + while time.time() - send_done < timeout: + try: + r = session.get(url, timeout=5) + if r.status_code == 200 and r.text.strip(): + latency_s = time.time() - send_done + latencies.append(latency_s) + print(f" → result available after {latency_s:.2f}s") + break + except Exception: + pass + time.sleep(0.5) + else: + print(f" → TIMEOUT after {timeout}s") + + producer.close() + + if not latencies: + print("WARNING: Could not measure pipeline latency") + return 0.0 + + latencies.sort() + median = latencies[len(latencies) // 2] + print(f"\nPipeline latency (data→query): median={median:.2f}s across {len(latencies)} trials") + return median * 1000 # return ms -def run_query(query: str, endpoint_url: str, session: requests.Session, timeout: int = 30) -> Tuple[float, Optional[str], Optional[str]]: + +# --------------------------------------------------------------------------- +# Benchmark runner +# --------------------------------------------------------------------------- + +def run_query( + query: str, endpoint_url: str, session: requests.Session, timeout: int = 30 +) -> Tuple[float, Optional[str], Optional[str]]: encoded_query = urllib.parse.quote(query) - - if "?" in endpoint_url: - url = f"{endpoint_url}&query={encoded_query}" - else: - url = f"{endpoint_url}?query={encoded_query}" + separator = "&" if "?" in endpoint_url else "?" + url = f"{endpoint_url}{separator}query={encoded_query}" try: start_time = time.time() @@ -166,16 +282,25 @@ def run_query(query: str, endpoint_url: str, session: requests.Session, timeout: return latency_ms, response.text.strip(), None else: return latency_ms, None, f"HTTP {response.status_code}: {response.text}" - except requests.Timeout: return timeout * 1000, None, "Timeout" except Exception as e: return 0, None, str(e) -def run_benchmark(sql_file: Path, endpoint_url: str, output_csv: Path, mode: str, load_data: bool, query_filter: Optional[List[str]] = None): + +def run_benchmark( + sql_file: Path, + endpoint_url: str, + output_csv: Path, + mode: str = "baseline", + query_filter: Optional[List[str]] = None, + pipeline_overhead_ms: float = 0.0, +): print(f"\nRunning benchmark in {mode} mode...") print(f"Endpoint: {endpoint_url}") print(f"Output: {output_csv}") + if pipeline_overhead_ms > 0: + print(f"Pipeline overhead per query: {pipeline_overhead_ms:.2f}ms") queries = extract_queries_from_sql(sql_file) if query_filter: @@ -183,107 +308,123 @@ def run_benchmark(sql_file: Path, endpoint_url: str, output_csv: Path, mode: str print(f"Found {len(queries)} queries") session = requests.Session() - - # Lists to store plotting data - plot_query_ids = [] - plot_latencies = [] + serving_latencies: List[float] = [] + total_latencies: List[float] = [] + plot_latencies: List[float] = [] with open(output_csv, "w", newline="") as csvfile: writer = csv.writer(csvfile) - writer.writerow(["query_id", "latency_ms", "result_rows", "result_preview", "error", "mode"]) + writer.writerow(["query_id", "latency_ms", "serving_ms", "pipeline_ms", "result_rows", "result_preview", "error", "mode"]) for query_id, sql in queries: print(f"Running {query_id}...", end=" ", flush=True) - - latency_ms, result, error = run_query(sql, endpoint_url, session) + serving_ms, result, error = run_query(sql, endpoint_url, session) if error: - print(f"✗ {error}") - writer.writerow([query_id, latency_ms, 0, "", error, mode]) - # Append 0 for failed queries on the plot to show they failed - plot_query_ids.append(query_id) + print(f"ERROR {error}") + writer.writerow([query_id, serving_ms, serving_ms, 0, 0, "", error, mode]) plot_latencies.append(0.0) else: + total_ms = serving_ms + pipeline_overhead_ms result_lines = result.strip().split("\n") if result else [] num_rows = len(result_lines) preview = result[:100].replace("\n", " | ") if result else "" - print(f"✓ {latency_ms:.2f}ms ({num_rows} rows)") - writer.writerow([query_id, f"{latency_ms:.2f}", num_rows, preview, "", mode]) - - plot_query_ids.append(query_id) - plot_latencies.append(latency_ms) - + serving_latencies.append(serving_ms) + total_latencies.append(total_ms) + plot_latencies.append(total_ms) + if pipeline_overhead_ms > 0: + print(f"{total_ms:.2f}ms (serving={serving_ms:.2f}ms + pipeline={pipeline_overhead_ms:.2f}ms, {num_rows} rows)") + else: + print(f"{total_ms:.2f}ms ({num_rows} rows)") + writer.writerow([query_id, f"{total_ms:.2f}", f"{serving_ms:.2f}", f"{pipeline_overhead_ms:.2f}", num_rows, preview, "", mode]) + time.sleep(0.1) - print(f"\n✓ Results saved to {output_csv}") + print(f"\nResults saved to {output_csv}") + + if total_latencies: + total_latencies.sort() + serving_latencies.sort() + n = len(total_latencies) + def stats(arr): + return arr[0], sum(arr)/len(arr), arr[int(len(arr)*0.5)], arr[int(len(arr)*0.95)], arr[-1] + + t_min, t_avg, t_p50, t_p95, t_max = stats(total_latencies) + print(f"\nTotal latency summary ({n} successful queries):") + print(f" min={t_min:.2f}ms avg={t_avg:.2f}ms p50={t_p50:.2f}ms p95={t_p95:.2f}ms max={t_max:.2f}ms") + if pipeline_overhead_ms > 0: + s_min, s_avg, s_p50, s_p95, s_max = stats(serving_latencies) + print(f" (serving only: min={s_min:.2f}ms avg={s_avg:.2f}ms p50={s_p50:.2f}ms)") + print(f" (pipeline overhead: {pipeline_overhead_ms:.2f}ms per query)") - # --- Plotting Code --- if plot_latencies: plt.figure(figsize=(10, 6)) - - # Give ASAP and Baseline distinct colors bar_color = '#1f77b4' if mode == 'baseline' else '#ff7f0e' - - # Create a numerical X-axis (1, 2, 3...) execution_order = list(range(1, len(plot_latencies) + 1)) - plt.bar(execution_order, plot_latencies, color=bar_color, edgecolor='black') - plt.xlabel("Query Execution Order", fontsize=12, fontweight='bold') plt.ylabel("Latency (ms)", fontsize=12, fontweight='bold') - - # Set tick marks at every 10 on the X axis max_order = len(execution_order) - plt.xticks(range(0, max_order + 1, 10)) - - # Build dynamic title based on parameters - load_text = "With Data Loading" if load_data else "Without Data Loading" - plt.title(f"Query Latency - {mode.upper()} Mode ({load_text})", fontsize=14, fontweight='bold') - + tick_step = max(1, max_order // 20) * 5 + plt.xticks(range(0, max_order + 1, tick_step)) + plt.title(f"Query Latency - {mode.upper()} Mode", fontsize=14, fontweight='bold') plt.grid(axis='y', linestyle='--', alpha=0.7) plt.tight_layout() - - # Save plot to the same directory as the output CSV, replacing the extension with .png plot_output = output_csv.with_suffix(".png") plt.savefig(plot_output) - print(f"✓ Graph successfully saved to {plot_output}") + plt.close() + print(f"Plot saved to {plot_output}") +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + def main(): - parser = argparse.ArgumentParser(description="Benchmark ASAP queries on H2o data") - parser.add_argument("--mode", choices=["baseline", "asap"], default="asap", help="ASAP mode (default) or Baseline (ClickHouse only)") - parser.add_argument("--load-data", action="store_true", help="Load H2o data into ClickHouse") - parser.add_argument("--clickhouse-url", default="http://localhost:8123", help="ClickHouse server URL") - parser.add_argument("--asap-url", default="http://localhost:8088/clickhouse/query", help="ASAP QueryEngine URL") - parser.add_argument("--output", default="asap_results.csv", help="Output CSV file") - parser.add_argument("--sql-file", default=None, help="SQL file to use (default: asap_h2o_queries.sql)") - parser.add_argument("--filter", default=None, help="Comma-separated query IDs to run (e.g. T5,T6)") + parser = argparse.ArgumentParser(description="Benchmark ASAP vs ClickHouse on H2O groupby data") + parser.add_argument("--mode", choices=["baseline", "asap"], default="asap") + parser.add_argument("--load-data", action="store_true", help="Load H2O data") + parser.add_argument("--clickhouse-url", default="http://localhost:8123") + parser.add_argument("--asap-url", default="http://localhost:8088/clickhouse/query") + parser.add_argument("--output", default="asap_results.csv") + parser.add_argument("--sql-file", default=None) + parser.add_argument("--filter", default=None, help="Comma-separated query IDs") + parser.add_argument("--no-benchmark", action="store_true", help="Load data only") + parser.add_argument("--skip-table-init", action="store_true") + parser.add_argument("--load-kafka", action="store_true", help="Stream data to Kafka (for Arroyo sketch pipeline)") + parser.add_argument("--max-rows", type=int, default=0, help="Max rows to load (0 = all)") args = parser.parse_args() - output_path = Path(args.output) - if output_path.exists() and output_path.is_dir(): - print(f"Error: Output {output_path} is a directory. Please specify a file path (e.g., results.csv)") - return 1 + if args.load_data: + if not load_h2o_data_clickhouse(args.clickhouse_url, skip_table_init=args.skip_table_init, max_rows=args.max_rows): + print("Failed to load data") + return 1 + + if args.load_kafka: + produce_h2o_to_kafka(max_rows=args.max_rows) + + if args.no_benchmark: + return 0 if args.sql_file: sql_file = Path(args.sql_file) - elif args.mode == "asap": - sql_file = Path(__file__).parent / "asap_mode_queries.sql" + elif args.mode == "baseline": + sql_file = Path(__file__).parent / "clickhouse_quantile_queries.sql" else: - sql_file = Path(__file__).parent / "asap_h2o_queries.sql" + sql_file = Path(__file__).parent / "asap_quantile_queries.sql" - if args.load_data: - if not load_h2o_data(args.clickhouse_url, args.mode): - print("Failed to load data") - return 1 - endpoint = args.clickhouse_url if args.mode == "baseline" else args.asap_url query_filter = [q.strip() for q in args.filter.split(",")] if args.filter else None - # Notice we pass args.load_data so the plotting logic knows whether data was loaded - run_benchmark(sql_file, endpoint, output_path, args.mode, args.load_data, query_filter) + pipeline_overhead_ms = 0.0 + if args.mode == "asap": + print("\nMeasuring pipeline latency (data → Kafka → Arroyo → QE → query)...") + pipeline_overhead_ms = measure_pipeline_latency(asap_url=args.asap_url) + + run_benchmark(sql_file, endpoint, Path(args.output), args.mode, query_filter, pipeline_overhead_ms) return 0 + if __name__ == "__main__": - exit(main()) \ No newline at end of file + exit(main()) diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/run_pipeline.sh b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_pipeline.sh index 82229a6..899a149 100755 --- a/asap-tools/execution-utilities/asap_benchmark_pipeline/run_pipeline.sh +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_pipeline.sh @@ -1,50 +1,64 @@ #!/bin/bash - -# Full pipeline script: starts Kafka, ClickHouse, Arroyo, and QueryEngine, -# then runs the benchmark. +# +# Full benchmark pipeline for H2O groupby: ASAP vs ClickHouse baseline. # # Usage: -# ASAP mode (full from scratch): -# ./run_pipeline.sh --mode asap --load-data --output asap_results_run1.csv +# # ASAP mode (full from scratch): +# ./run_pipeline.sh --mode asap --load-data --output asap_results.csv +# +# # Baseline mode (full from scratch): +# ./run_pipeline.sh --mode baseline --load-data --output baseline_results.csv # -# ASAP mode (infra already running, data already loaded): -# ./run_pipeline.sh --mode asap --skip-infra --output asap_results_run2.csv +# # Both modes back-to-back: +# ./run_pipeline.sh --mode both --load-data # -# Baseline mode (full from scratch): -# ./run_pipeline.sh --mode baseline --load-data --output baseline_results.csv +# # Skip infrastructure startup (already running): +# ./run_pipeline.sh --mode asap --skip-infra --output asap_results.csv # -# Baseline mode (ClickHouse already running): -# ./run_pipeline.sh --mode baseline --skip-infra --output baseline_results.csv +# Environment variables (override defaults): +# KAFKA_INSTALL_DIR Path to kafka installation dir (contains run.sh + kafka/) +# CLICKHOUSE_INSTALL_DIR Path to clickhouse installation dir +# ARROYO_BIN Path to arroyo binary +# ARROYO_CONFIG Path to arroyo config.yaml +# QE_BIN Path to query_engine_rust binary +# ARROYOSKETCH_DIR Path to asap-summary-ingest directory set -euo pipefail # ========================================== -# 1. DYNAMIC PATH RESOLUTION +# 1. PATH RESOLUTION # ========================================== SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" -ROOT_DIR="$(cd "$SCRIPT_DIR/../.." &>/dev/null && pwd)" +TOOLS_DIR="$(cd "$SCRIPT_DIR/../.." &>/dev/null && pwd)" +ROOT_DIR="$(cd "$TOOLS_DIR/.." &>/dev/null && pwd)" -KAFKA_INSTALL_DIR="$ROOT_DIR/Utilities/installation/kafka" -CLICKHOUSE_INSTALL_DIR="$ROOT_DIR/Utilities/installation/clickhouse" +KAFKA_INSTALL_DIR="${KAFKA_INSTALL_DIR:-$TOOLS_DIR/installation/kafka}" +CLICKHOUSE_INSTALL_DIR="${CLICKHOUSE_INSTALL_DIR:-$TOOLS_DIR/installation/clickhouse}" KAFKA_DIR="$KAFKA_INSTALL_DIR/kafka" CLICKHOUSE_DIR="$CLICKHOUSE_INSTALL_DIR/clickhouse" +ARROYO_BIN="${ARROYO_BIN:-$ROOT_DIR/arroyo/target/release/arroyo}" +ARROYO_CONFIG="${ARROYO_CONFIG:-$ROOT_DIR/asap-summary-ingest/config.yaml}" +QE_BIN="${QE_BIN:-$ROOT_DIR/target/release/query_engine_rust}" +ARROYOSKETCH_DIR="${ARROYOSKETCH_DIR:-$ROOT_DIR/asap-summary-ingest}" # ========================================== # 2. ARGUMENT PARSING # ========================================== MODE="asap" LOAD_DATA=0 -OUTPUT_FILE="asap_results_run1.csv" +OUTPUT_FILE="" SKIP_INFRA=0 +MAX_ROWS=0 print_usage() { echo "Usage: ./run_pipeline.sh [OPTIONS]" echo "Options:" - echo " --mode [asap|baseline] Execution mode (default: asap)" - echo " --load-data Stream H2O dataset into ClickHouse/Kafka" - echo " --output [FILE] Output CSV file (default: asap_results_run1.csv)" - echo " --skip-infra Skip starting Kafka/ClickHouse (assume already running)" - echo " --help Show this message" + echo " --mode [asap|baseline|both] Execution mode (default: asap)" + echo " --load-data Load H2O dataset" + echo " --output [FILE] Output CSV file" + echo " --skip-infra Skip starting infrastructure" + echo " --max-rows [N] Max rows to load (0 = all)" + echo " --help Show this message" } while [[ "$#" -gt 0 ]]; do @@ -53,6 +67,7 @@ while [[ "$#" -gt 0 ]]; do --load-data) LOAD_DATA=1 ;; --output) OUTPUT_FILE="$2"; shift ;; --skip-infra) SKIP_INFRA=1 ;; + --max-rows) MAX_ROWS="$2"; shift ;; --help) print_usage; exit 0 ;; *) echo "Unknown parameter: $1"; print_usage; exit 1 ;; esac @@ -63,19 +78,13 @@ done # 3. HELPER FUNCTIONS # ========================================== -# Wait for a URL to return HTTP 200. Args: name url [max_seconds] wait_for_url() { - local name="$1" - local url="$2" - local max_seconds="${3:-120}" - local elapsed=0 + local name="$1" url="$2" max_seconds="${3:-120}" elapsed=0 echo "Waiting for $name..." while ! curl -sf "$url" >/dev/null 2>&1; do - sleep 2 - elapsed=$((elapsed + 2)) + sleep 2; elapsed=$((elapsed + 2)) if [ "$elapsed" -ge "$max_seconds" ]; then echo "ERROR: $name did not become ready within ${max_seconds}s" - echo "Check logs for details" exit 1 fi done @@ -83,15 +92,12 @@ wait_for_url() { } wait_for_kafka() { - local max_seconds="${1:-120}" - local elapsed=0 + local max_seconds="${1:-120}" elapsed=0 echo "Waiting for Kafka..." while ! "$KAFKA_DIR/bin/kafka-topics.sh" --bootstrap-server localhost:9092 --list >/dev/null 2>&1; do - sleep 2 - elapsed=$((elapsed + 2)) + sleep 2; elapsed=$((elapsed + 2)) if [ "$elapsed" -ge "$max_seconds" ]; then echo "ERROR: Kafka did not become ready within ${max_seconds}s" - echo "Check /tmp/kafka.log for details" exit 1 fi done @@ -99,10 +105,8 @@ wait_for_kafka() { } wait_for_arroyo_pipeline_running() { - local max_seconds="${1:-300}" - local elapsed=0 - echo "Waiting for Arroyo pipeline 'asap_h2o_pipeline' to reach RUNNING state..." - echo "(This may take up to ${max_seconds}s while Arroyo compiles Rust UDFs)" + local max_seconds="${1:-300}" elapsed=0 + echo "Waiting for Arroyo pipeline to reach RUNNING state..." while true; do state=$(curl -sf "http://localhost:5115/api/v1/pipelines" 2>/dev/null | \ python3 -c " @@ -111,9 +115,7 @@ try: data = json.load(sys.stdin) for p in data.get('data', []): if p.get('name') == 'asap_h2o_pipeline': - # Arroyo returns null/None for state when actively running state = p.get('state') - action = p.get('action', '') stop = p.get('stop', '') if state is None and stop == 'none': print('running') @@ -129,47 +131,18 @@ except Exception: echo "Pipeline is RUNNING" return 0 fi - echo " Pipeline state: $state (elapsed: ${elapsed}s)" - sleep 5 - elapsed=$((elapsed + 5)) - + sleep 5; elapsed=$((elapsed + 5)) if [ "$elapsed" -ge "$max_seconds" ]; then echo "ERROR: Pipeline did not reach RUNNING state within ${max_seconds}s" - echo "Check /tmp/arroyo.log for details" exit 1 fi done } -wait_for_data_loaded() { - local min_rows="${1:-9000000}" - echo "Waiting for ClickHouse h2o_groupby to have at least $min_rows rows..." - while true; do - count=$(curl -sf "http://localhost:8123/" -d "SELECT count(*) FROM h2o_groupby" 2>/dev/null | tr -d '[:space:]' || echo "0") - if [ -n "$count" ] && [ "$count" -ge "$min_rows" ] 2>/dev/null; then - echo "Data ready: $count rows" - return 0 - fi - echo " Rows in h2o_groupby: ${count:-0}" - sleep 5 - done -} - -ensure_extracted() { - local zip_file="$1" - local target_dir="$2" - if [ ! -d "$target_dir" ]; then - echo "Extracting $(basename "$zip_file")..." - unzip -q "$zip_file" -d "$(dirname "$target_dir")" - echo "Extracted to $target_dir" - fi -} - -# Start Kafka only if it isn't already responding start_kafka_if_needed() { if "$KAFKA_DIR/bin/kafka-topics.sh" --bootstrap-server localhost:9092 --list >/dev/null 2>&1; then - echo "Kafka already running, skipping start" + echo "Kafka already running" return 0 fi echo "Starting Kafka..." @@ -177,10 +150,9 @@ start_kafka_if_needed() { wait_for_kafka 120 } -# Start ClickHouse only if it isn't already responding start_clickhouse_if_needed() { if curl -sf "http://localhost:8123/ping" >/dev/null 2>&1; then - echo "ClickHouse already running, skipping start" + echo "ClickHouse already running" return 0 fi echo "Starting ClickHouse..." @@ -188,90 +160,118 @@ start_clickhouse_if_needed() { wait_for_url "ClickHouse" "http://localhost:8123/ping" 120 } -init_clickhouse_tables() { - echo "Initializing ClickHouse tables..." - python3 - <<'PYEOF' -import requests - -with open("h2o_init.sql") as f: - content = f.read() - -statements = [s.strip() for s in content.split(";") if s.strip()] -for sql in statements: - r = requests.post("http://localhost:8123/", data=sql) - if not r.ok: - print(f" WARN: {r.text.strip()[:120]} | SQL: {sql[:60]}") - else: - print(f" OK: {sql[:60]}") -PYEOF +purge_kafka_topic() { + local topic="$1" max_wait=30 elapsed=0 + echo "Purging Kafka topic '$topic'..." + "$KAFKA_DIR/bin/kafka-topics.sh" --bootstrap-server localhost:9092 \ + --delete --topic "$topic" 2>/dev/null || true + while "$KAFKA_DIR/bin/kafka-topics.sh" --bootstrap-server localhost:9092 \ + --list 2>/dev/null | grep -qx "$topic"; do + sleep 1; elapsed=$((elapsed + 1)) + if [ "$elapsed" -ge "$max_wait" ]; then + echo " WARN: topic '$topic' still exists after ${max_wait}s" + break + fi + done + "$KAFKA_DIR/bin/kafka-topics.sh" --bootstrap-server localhost:9092 \ + --create --topic "$topic" --partitions 1 --replication-factor 1 \ + --config retention.ms=-1 \ + 2>/dev/null || true + echo " '$topic' reset" +} + +get_sketch_topic_offset() { + "$KAFKA_DIR/bin/kafka-get-offsets.sh" --bootstrap-server localhost:9092 \ + --topic sketch_topic 2>/dev/null | cut -d: -f3 || echo "0" +} + +wait_for_new_sketches() { + local initial_offset="$1" max_seconds="${2:-600}" elapsed=0 + echo "Waiting for sketches (baseline offset: $initial_offset)..." + while true; do + current_offset=$(get_sketch_topic_offset) + if [ -n "$current_offset" ] && [ "$current_offset" -gt "$initial_offset" ] 2>/dev/null; then + echo "Sketches emitted (offset: $current_offset, +$((current_offset - initial_offset)) new)" + return 0 + fi + echo " sketch_topic offset: ${current_offset} (elapsed: ${elapsed}s) — sending flush nudge..." + FLUSH_TS=$(date -u +%Y-%m-%dT%H:%M:%SZ) + echo "{\"timestamp\":\"${FLUSH_TS}\",\"id1\":\"flush\",\"id2\":\"flush\",\"id3\":\"flush\",\"id4\":0,\"id5\":0,\"id6\":0,\"v1\":0,\"v2\":0,\"v3\":0.0}" | \ + "$KAFKA_DIR/bin/kafka-console-producer.sh" --bootstrap-server localhost:9092 --topic h2o_groupby \ + >/dev/null 2>&1 || true + sleep 30; elapsed=$((elapsed + 30)) + if [ "$elapsed" -ge "$max_seconds" ]; then + echo "ERROR: No sketches after ${max_seconds}s" + exit 1 + fi + done } cleanup_background_jobs() { - echo "Cleaning up ASAP background processes..." - pkill -f "arroyo.*cluster" || true - pkill -f "query_engine_rust" || true + echo "Cleaning up background processes..." + pkill -f "arroyo.*cluster" 2>/dev/null || true + pkill -f "query_engine_rust" 2>/dev/null || true sleep 2 } # ========================================== # 4. BASELINE MODE # ========================================== -if [ "$MODE" = "baseline" ]; then - echo "RUNNING IN BASELINE MODE" +run_baseline() { + local output="${1:-baseline_results.csv}" + echo "========================================" + echo "RUNNING BASELINE MODE" + echo "========================================" if [ "$SKIP_INFRA" -eq 0 ]; then - ensure_extracted "$KAFKA_INSTALL_DIR/kafka.zip" "$KAFKA_DIR" - ensure_extracted "$CLICKHOUSE_INSTALL_DIR/clickhouse.zip" "$CLICKHOUSE_DIR" - start_kafka_if_needed start_clickhouse_if_needed fi if [ "$LOAD_DATA" -eq 1 ]; then cd "$SCRIPT_DIR" - init_clickhouse_tables + python3 run_benchmark.py --mode baseline --load-data --no-benchmark --max-rows "$MAX_ROWS" fi - CMD="python3 run_benchmark.py --mode baseline --output $OUTPUT_FILE" - [ "$LOAD_DATA" -eq 1 ] && CMD="$CMD --load-data" - - echo "Executing: $CMD" - eval "$CMD" - echo "Baseline run complete!" + cd "$SCRIPT_DIR" + python3 run_benchmark.py --mode baseline --output "$output" + echo "Baseline complete: $output" +} # ========================================== # 5. ASAP MODE # ========================================== -elif [ "$MODE" = "asap" ]; then - echo "RUNNING IN ASAP MODE" +run_asap() { + local output="${1:-asap_results.csv}" + echo "========================================" + echo "RUNNING ASAP MODE" + echo "========================================" - # Clean up any stale processes from previous runs cleanup_background_jobs if [ "$SKIP_INFRA" -eq 0 ]; then - ensure_extracted "$KAFKA_INSTALL_DIR/kafka.zip" "$KAFKA_DIR" - ensure_extracted "$CLICKHOUSE_INSTALL_DIR/clickhouse.zip" "$CLICKHOUSE_DIR" start_kafka_if_needed start_clickhouse_if_needed fi - # Initialize ClickHouse tables only when loading fresh data - # (h2o_init.sql drops and recreates tables, which would wipe existing data) + # --- Purge Kafka topics for clean slate --- + purge_kafka_topic h2o_groupby + purge_kafka_topic sketch_topic + + # --- Load data into ClickHouse (direct) for data presence --- if [ "$LOAD_DATA" -eq 1 ]; then cd "$SCRIPT_DIR" - init_clickhouse_tables + python3 run_benchmark.py --mode asap --load-data --no-benchmark --max-rows "$MAX_ROWS" fi - # Start Arroyo cluster + # --- Start Arroyo --- echo "Starting Arroyo cluster..." - cd "$ROOT_DIR/arroyo" - nohup ./target/release/arroyo --config "$ROOT_DIR/ArroyoSketch/config.yaml" cluster \ - >/tmp/arroyo.log 2>&1 & - + cd "$(dirname "$ARROYO_BIN")" + nohup "$ARROYO_BIN" --config "$ARROYO_CONFIG" cluster >/tmp/arroyo.log 2>&1 & wait_for_url "Arroyo API" "http://localhost:5115/api/v1/pipelines" 60 - # Submit Arroyo pipeline + # --- Submit pipeline --- echo "Submitting Arroyo pipeline..." - cd "$ROOT_DIR/ArroyoSketch" + cd "$ARROYOSKETCH_DIR" python3 run_arroyosketch.py \ --source_type kafka \ --kafka_input_format json \ @@ -280,48 +280,37 @@ elif [ "$MODE" = "asap" ]; then --pipeline_name asap_h2o_pipeline \ --config_file_path "$SCRIPT_DIR/streaming_config.yaml" \ --output_kafka_topic sketch_topic \ - --output_dir ./outputs \ + --output_dir "$SCRIPT_DIR/outputs" \ --parallelism 1 \ --query_language sql - # Poll until pipeline is RUNNING (Arroyo compiles Rust UDFs, takes ~1-3 minutes) wait_for_arroyo_pipeline_running 300 - # Wait for Arroyo's Kafka source worker to fully initialize and assign partitions. - # load_h2o_data re-runs h2o_init.sql (DROP/CREATE h2o_groupby_queue), which causes a - # brief Kafka metadata disruption. If this races with Arroyo's initial partition assignment, - # the worker sees 0 partitions and goes permanently idle. A short sleep avoids the race. - echo "Waiting 20s for Arroyo worker to initialize Kafka partition assignment..." + echo "Waiting 20s for Arroyo worker initialization..." sleep 20 - # Load data through Kafka so Arroyo builds sketches AND MergeTree is populated - cd "$SCRIPT_DIR" - if [ "$LOAD_DATA" -eq 1 ]; then - echo "Loading data through Kafka (ASAP mode)..." - python3 run_benchmark.py --mode asap --load-data + INITIAL_SKETCH_OFFSET=$(get_sketch_topic_offset) - # Wait for MergeTree to reflect the data (materialized view consumes from Kafka) - wait_for_data_loaded 9000000 + # --- Stream data through Kafka for Arroyo --- + if [ "$LOAD_DATA" -eq 1 ]; then + cd "$SCRIPT_DIR" + echo "Streaming data to Kafka for sketch generation..." + python3 run_benchmark.py --load-kafka --no-benchmark --max-rows "$MAX_ROWS" - # Send a flush record to advance Arroyo's watermark past the last window. - # This ensures the final 120s tumbling window is closed and its sketch is emitted. - echo "Sending watermark flush record to Kafka..." - FLUSH_TS=$(date -u +%Y-%m-%dT%H:%M:%SZ) - curl -sf "http://localhost:8123/?query=INSERT%20INTO%20h2o_groupby_queue%20FORMAT%20JSONEachRow" \ - --data-raw "{\"timestamp\":\"${FLUSH_TS}\",\"id1\":\"flush\",\"id2\":\"flush\",\"id3\":\"flush\",\"id4\":0,\"id5\":0,\"id6\":0,\"v1\":0,\"v2\":0,\"v3\":0.0}" \ - || echo "Warning: flush record insert failed (non-fatal)" - - # Give Arroyo additional time to close and flush the final sketch windows - echo "Waiting 30s for Arroyo to flush all sketch windows..." - sleep 30 - else - echo "Skipping data load (--load-data not provided)" + # Wait for sketches + wait_for_new_sketches "$INITIAL_SKETCH_OFFSET" 600 fi - # Start QueryEngine + # --- Reset consumer group offsets --- + echo "Resetting query-engine-rust consumer group offsets..." + "$KAFKA_DIR/bin/kafka-consumer-groups.sh" --bootstrap-server localhost:9092 \ + --group query-engine-rust --topic sketch_topic \ + --reset-offsets --to-earliest --execute 2>/dev/null || true + + # --- Start QueryEngine --- echo "Starting QueryEngine..." - cd "$ROOT_DIR/QueryEngineRust" - nohup ./target/release/query_engine_rust \ + cd "$(dirname "$QE_BIN")" + nohup env TZ=UTC "$QE_BIN" \ --kafka-topic sketch_topic \ --input-format json \ --config "$SCRIPT_DIR/inference_config.yaml" \ @@ -329,25 +318,45 @@ elif [ "$MODE" = "asap" ]; then --http-port 8088 \ --delete-existing-db \ --log-level info \ - --output-dir ./output \ + --output-dir "$SCRIPT_DIR/output" \ --streaming-engine arroyo \ --query-language SQL \ --lock-strategy per-key \ --prometheus-scrape-interval 1 >/tmp/query_engine.log 2>&1 & - # Poll until QueryEngine HTTP server is accepting connections wait_for_url "QueryEngine" "http://localhost:8088/clickhouse/query?query=SELECT+1" 60 - # Run benchmark against the sketches built during the data load above. - # Uses asap_mode_queries.sql (default for asap mode) with QUANTILE(0.95, v1) and NOW()-based - # 600s windows that contain the recently-closed 120s tumbling sketch windows. - echo "Executing benchmark queries against existing sketches..." - cd "$SCRIPT_DIR" - python3 run_benchmark.py --mode asap --output "$OUTPUT_FILE" + echo "Waiting 60s for QueryEngine to ingest sketches..." + sleep 60 - echo "ASAP run complete! Results: $OUTPUT_FILE" + # --- Run benchmark --- + cd "$SCRIPT_DIR" + python3 run_benchmark.py --mode asap --output "$output" + echo "ASAP complete: $output" +} -else - echo "Invalid mode: $MODE. Use 'asap' or 'baseline'." - exit 1 -fi +# ========================================== +# 6. DISPATCH +# ========================================== +case "$MODE" in + baseline) + run_baseline "${OUTPUT_FILE:-baseline_results.csv}" + ;; + asap) + run_asap "${OUTPUT_FILE:-asap_results.csv}" + ;; + both) + run_baseline "baseline_results.csv" + run_asap "asap_results.csv" + echo "" + echo "========================================" + echo "Generating comparison plot..." + echo "========================================" + cd "$SCRIPT_DIR" + python3 plot_latency.py + ;; + *) + echo "Invalid mode: $MODE. Use 'asap', 'baseline', or 'both'." + exit 1 + ;; +esac diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/streaming_config.yaml b/asap-tools/execution-utilities/asap_benchmark_pipeline/streaming_config.yaml index ebf8565..4883e33 100644 --- a/asap-tools/execution-utilities/asap_benchmark_pipeline/streaming_config.yaml +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/streaming_config.yaml @@ -1,7 +1,7 @@ tables: - name: h2o_groupby time_column: timestamp - metadata_columns: [id1, id2, id3] + metadata_columns: [id1, id2] value_columns: [v1] aggregations: @@ -9,14 +9,14 @@ aggregations: aggregationType: DatasketchesKLL aggregationSubType: '' labels: - grouping: [id1, id2, id3] + grouping: [id1, id2] rollup: [] aggregated: [] table_name: h2o_groupby value_column: v1 parameters: K: 200 - tumblingWindowSize: 120 - windowSize: 120 + tumblingWindowSize: 10 + windowSize: 10 windowType: tumbling - spatialFilter: '' \ No newline at end of file + spatialFilter: '' From ea50243ec551cc2a119aab907fc9c4f820e838a8 Mon Sep 17 00:00:00 2001 From: benjamib112 Date: Mon, 23 Mar 2026 06:06:14 -0400 Subject: [PATCH 3/5] added README --- .../asap_benchmark_pipeline/README.md | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 asap-tools/execution-utilities/asap_benchmark_pipeline/README.md diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/README.md b/asap-tools/execution-utilities/asap_benchmark_pipeline/README.md new file mode 100644 index 0000000..d3d5b55 --- /dev/null +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/README.md @@ -0,0 +1,93 @@ +# ASAP H2O Benchmark Pipeline + +Benchmarks ASAP (KLL sketch-based) query serving against a ClickHouse baseline using the [H2O groupby dataset](https://h2oai.github.io/db-benchmark/) (10M rows, 100 groups). + +**ASAP mode** streams data through Kafka into Arroyo, which builds KLL sketches per tumbling window per group. The QueryEngine (QE) ingests these sketches and serves approximate quantile queries directly from them. + +**Baseline mode** loads the same data into a ClickHouse MergeTree table and runs equivalent quantile queries using ClickHouse's native `quantile()` function. + +Each benchmark run produces a CSV of per-query latencies and a latency plot (`.png`). + +## Prerequisites + +- Kafka and ClickHouse installed (see `../../installation/`) +- Arroyo binary built (`arroyo/target/release/arroyo`) +- QueryEngine binary built (`target/release/query_engine_rust`) +- Python 3 with `requests`, `kafka-python`, `gdown`, `matplotlib` + +## Usage + +### Full pipeline (recommended) + +```bash +# Run baseline benchmark (starts infra, loads data, runs queries) +./run_pipeline.sh --mode baseline --load-data --output baseline_results.csv + +# Clean up between runs for fair comparison +./cleanup.sh + +# Run ASAP benchmark +./run_pipeline.sh --mode asap --load-data --output asap_results.csv + +# Run both back-to-back and generate comparison plot +./run_pipeline.sh --mode both --load-data +``` + +### Options + +``` +--mode [asap|baseline|both] Execution mode (default: asap) +--load-data Download and load the H2O dataset +--output [FILE] Output CSV file +--skip-infra Skip starting Kafka/ClickHouse (already running) +--max-rows [N] Limit rows loaded (0 = all, default: all) +``` + +### Cleanup + +`cleanup.sh` kills all processes (QE, Arroyo, Kafka, ClickHouse), clears Kafka topics, drops the ClickHouse table, and flushes OS page caches to ensure identical starting conditions between runs. + +```bash +./cleanup.sh # full cleanup (requires sudo for cache drop) +./cleanup.sh --no-sudo # skip OS cache clearing +``` + +### Benchmark only (infra already running, data already loaded) + +```bash +python3 run_benchmark.py --mode baseline --output baseline_results.csv +python3 run_benchmark.py --mode asap --output asap_results.csv +``` + +### Comparison plot + +After running both modes, generate a side-by-side comparison: + +```bash +python3 plot_latency.py +``` + +## Configuration + +| File | Purpose | +|------|---------| +| `streaming_config.yaml` | Arroyo sketch pipeline config (window size, aggregation type, grouping) | +| `inference_config.yaml` | QE query-to-sketch mapping (must match `streaming_config.yaml` window size) | +| `h2o_init.sql` | ClickHouse table schema | +| `asap_quantile_queries.sql` | Queries for ASAP mode (QUANTILE syntax) | +| `clickhouse_quantile_queries.sql` | Queries for baseline mode (quantile() syntax) | + +### Changing window size + +To benchmark with a different tumbling window size (e.g., 120s): + +1. Set `tumblingWindowSize` and `windowSize` in `streaming_config.yaml` +2. Set the DATEADD offset in `inference_config.yaml` to match (e.g., `-120`) +3. Regenerate query files with matching window boundaries + +## Output + +Each run produces: +- `.csv` — per-query latencies, row counts, and result previews +- `.png` — bar chart of latency by query execution order +- `latency_comparison.png` — side-by-side comparison (from `plot_latency.py`) From 522233b0861ec9745e494e0b4abd1fe433be2bb5 Mon Sep 17 00:00:00 2001 From: benjamib112 Date: Mon, 23 Mar 2026 07:25:11 -0400 Subject: [PATCH 4/5] added cleanup between runs in pipeline, visualization per benchmark --- .../asap_benchmark_pipeline/run_benchmark.py | 113 ++++++++++++++---- .../asap_benchmark_pipeline/run_pipeline.sh | 5 + 2 files changed, 97 insertions(+), 21 deletions(-) diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/run_benchmark.py b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_benchmark.py index 8ed3c5b..ebf7fc6 100755 --- a/asap-tools/execution-utilities/asap_benchmark_pipeline/run_benchmark.py +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_benchmark.py @@ -25,19 +25,26 @@ # Query extraction # --------------------------------------------------------------------------- + def extract_queries_from_sql(sql_file: Path) -> List[Tuple[str, str]]: """Extract (query_id, sql) pairs from an annotated SQL file.""" with open(sql_file) as f: content = f.read() pattern = r"-- ([A-Za-z0-9_]+):[^\n]*\n(SELECT[^;]+;)" - return [(qid, sql.strip()) for qid, sql in re.findall(pattern, content, re.DOTALL | re.IGNORECASE)] + return [ + (qid, sql.strip()) + for qid, sql in re.findall(pattern, content, re.DOTALL | re.IGNORECASE) + ] # --------------------------------------------------------------------------- # Data loading — ClickHouse direct # --------------------------------------------------------------------------- -def load_h2o_data_clickhouse(clickhouse_url: str, skip_table_init: bool = False, max_rows: int = 0): + +def load_h2o_data_clickhouse( + clickhouse_url: str, skip_table_init: bool = False, max_rows: int = 0 +): """Load H2O CSV into ClickHouse MergeTree (baseline path).""" if not skip_table_init: @@ -110,6 +117,7 @@ def _flush_batch(clickhouse_url: str, rows: list): # Data loading — Kafka (for Arroyo sketch pipeline) # --------------------------------------------------------------------------- + def produce_h2o_to_kafka(topic: str = "h2o_groupby", max_rows: int = 0): """Stream H2O CSV rows into Kafka with fixed 2024-01-01 message timestamps.""" csv_path = _download_h2o_csv() @@ -171,6 +179,7 @@ def _download_h2o_csv() -> str: # Pipeline latency measurement # --------------------------------------------------------------------------- + def measure_pipeline_latency( kafka_topic: str = "h2o_groupby", asap_url: str = "http://localhost:8088/clickhouse/query", @@ -258,7 +267,9 @@ def measure_pipeline_latency( latencies.sort() median = latencies[len(latencies) // 2] - print(f"\nPipeline latency (data→query): median={median:.2f}s across {len(latencies)} trials") + print( + f"\nPipeline latency (data→query): median={median:.2f}s across {len(latencies)} trials" + ) return median * 1000 # return ms @@ -266,6 +277,7 @@ def measure_pipeline_latency( # Benchmark runner # --------------------------------------------------------------------------- + def run_query( query: str, endpoint_url: str, session: requests.Session, timeout: int = 30 ) -> Tuple[float, Optional[str], Optional[str]]: @@ -314,7 +326,18 @@ def run_benchmark( with open(output_csv, "w", newline="") as csvfile: writer = csv.writer(csvfile) - writer.writerow(["query_id", "latency_ms", "serving_ms", "pipeline_ms", "result_rows", "result_preview", "error", "mode"]) + writer.writerow( + [ + "query_id", + "latency_ms", + "serving_ms", + "pipeline_ms", + "result_rows", + "result_preview", + "error", + "mode", + ] + ) for query_id, sql in queries: print(f"Running {query_id}...", end=" ", flush=True) @@ -322,7 +345,9 @@ def run_benchmark( if error: print(f"ERROR {error}") - writer.writerow([query_id, serving_ms, serving_ms, 0, 0, "", error, mode]) + writer.writerow( + [query_id, serving_ms, serving_ms, 0, 0, "", error, mode] + ) plot_latencies.append(0.0) else: total_ms = serving_ms + pipeline_overhead_ms @@ -333,10 +358,23 @@ def run_benchmark( total_latencies.append(total_ms) plot_latencies.append(total_ms) if pipeline_overhead_ms > 0: - print(f"{total_ms:.2f}ms (serving={serving_ms:.2f}ms + pipeline={pipeline_overhead_ms:.2f}ms, {num_rows} rows)") + print( + f"{total_ms:.2f}ms (serving={serving_ms:.2f}ms + pipeline={pipeline_overhead_ms:.2f}ms, {num_rows} rows)" + ) else: print(f"{total_ms:.2f}ms ({num_rows} rows)") - writer.writerow([query_id, f"{total_ms:.2f}", f"{serving_ms:.2f}", f"{pipeline_overhead_ms:.2f}", num_rows, preview, "", mode]) + writer.writerow( + [ + query_id, + f"{total_ms:.2f}", + f"{serving_ms:.2f}", + f"{pipeline_overhead_ms:.2f}", + num_rows, + preview, + "", + mode, + ] + ) time.sleep(0.1) @@ -346,29 +384,42 @@ def run_benchmark( total_latencies.sort() serving_latencies.sort() n = len(total_latencies) + def stats(arr): - return arr[0], sum(arr)/len(arr), arr[int(len(arr)*0.5)], arr[int(len(arr)*0.95)], arr[-1] + return ( + arr[0], + sum(arr) / len(arr), + arr[int(len(arr) * 0.5)], + arr[int(len(arr) * 0.95)], + arr[-1], + ) t_min, t_avg, t_p50, t_p95, t_max = stats(total_latencies) print(f"\nTotal latency summary ({n} successful queries):") - print(f" min={t_min:.2f}ms avg={t_avg:.2f}ms p50={t_p50:.2f}ms p95={t_p95:.2f}ms max={t_max:.2f}ms") + print( + f" min={t_min:.2f}ms avg={t_avg:.2f}ms p50={t_p50:.2f}ms p95={t_p95:.2f}ms max={t_max:.2f}ms" + ) if pipeline_overhead_ms > 0: s_min, s_avg, s_p50, s_p95, s_max = stats(serving_latencies) - print(f" (serving only: min={s_min:.2f}ms avg={s_avg:.2f}ms p50={s_p50:.2f}ms)") + print( + f" (serving only: min={s_min:.2f}ms avg={s_avg:.2f}ms p50={s_p50:.2f}ms)" + ) print(f" (pipeline overhead: {pipeline_overhead_ms:.2f}ms per query)") if plot_latencies: plt.figure(figsize=(10, 6)) - bar_color = '#1f77b4' if mode == 'baseline' else '#ff7f0e' + bar_color = "#1f77b4" if mode == "baseline" else "#ff7f0e" execution_order = list(range(1, len(plot_latencies) + 1)) - plt.bar(execution_order, plot_latencies, color=bar_color, edgecolor='black') - plt.xlabel("Query Execution Order", fontsize=12, fontweight='bold') - plt.ylabel("Latency (ms)", fontsize=12, fontweight='bold') + plt.bar(execution_order, plot_latencies, color=bar_color, edgecolor="black") + plt.xlabel("Query Execution Order", fontsize=12, fontweight="bold") + plt.ylabel("Latency (ms)", fontsize=12, fontweight="bold") max_order = len(execution_order) tick_step = max(1, max_order // 20) * 5 plt.xticks(range(0, max_order + 1, tick_step)) - plt.title(f"Query Latency - {mode.upper()} Mode", fontsize=14, fontweight='bold') - plt.grid(axis='y', linestyle='--', alpha=0.7) + plt.title( + f"Query Latency - {mode.upper()} Mode", fontsize=14, fontweight="bold" + ) + plt.grid(axis="y", linestyle="--", alpha=0.7) plt.tight_layout() plot_output = output_csv.with_suffix(".png") plt.savefig(plot_output) @@ -380,8 +431,11 @@ def stats(arr): # Main # --------------------------------------------------------------------------- + def main(): - parser = argparse.ArgumentParser(description="Benchmark ASAP vs ClickHouse on H2O groupby data") + parser = argparse.ArgumentParser( + description="Benchmark ASAP vs ClickHouse on H2O groupby data" + ) parser.add_argument("--mode", choices=["baseline", "asap"], default="asap") parser.add_argument("--load-data", action="store_true", help="Load H2O data") parser.add_argument("--clickhouse-url", default="http://localhost:8123") @@ -391,13 +445,23 @@ def main(): parser.add_argument("--filter", default=None, help="Comma-separated query IDs") parser.add_argument("--no-benchmark", action="store_true", help="Load data only") parser.add_argument("--skip-table-init", action="store_true") - parser.add_argument("--load-kafka", action="store_true", help="Stream data to Kafka (for Arroyo sketch pipeline)") - parser.add_argument("--max-rows", type=int, default=0, help="Max rows to load (0 = all)") + parser.add_argument( + "--load-kafka", + action="store_true", + help="Stream data to Kafka (for Arroyo sketch pipeline)", + ) + parser.add_argument( + "--max-rows", type=int, default=0, help="Max rows to load (0 = all)" + ) args = parser.parse_args() if args.load_data: - if not load_h2o_data_clickhouse(args.clickhouse_url, skip_table_init=args.skip_table_init, max_rows=args.max_rows): + if not load_h2o_data_clickhouse( + args.clickhouse_url, + skip_table_init=args.skip_table_init, + max_rows=args.max_rows, + ): print("Failed to load data") return 1 @@ -422,7 +486,14 @@ def main(): print("\nMeasuring pipeline latency (data → Kafka → Arroyo → QE → query)...") pipeline_overhead_ms = measure_pipeline_latency(asap_url=args.asap_url) - run_benchmark(sql_file, endpoint, Path(args.output), args.mode, query_filter, pipeline_overhead_ms) + run_benchmark( + sql_file, + endpoint, + Path(args.output), + args.mode, + query_filter, + pipeline_overhead_ms, + ) return 0 diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/run_pipeline.sh b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_pipeline.sh index 899a149..8832fbb 100755 --- a/asap-tools/execution-utilities/asap_benchmark_pipeline/run_pipeline.sh +++ b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_pipeline.sh @@ -347,6 +347,11 @@ case "$MODE" in ;; both) run_baseline "baseline_results.csv" + echo "" + echo "========================================" + echo "Cleaning up between runs..." + echo "========================================" + "$SCRIPT_DIR/cleanup.sh" --no-sudo run_asap "asap_results.csv" echo "" echo "========================================" From 6ca934a28e4598ae36d8e65803e6fa086528f9f2 Mon Sep 17 00:00:00 2001 From: benjamib112 Date: Mon, 23 Mar 2026 07:26:51 -0400 Subject: [PATCH 5/5] removed unnecessary old files --- .../asap_h2o_queries.sql | 249 ----------------- .../asap_mode_queries.sql | 249 ----------------- .../asap_benchmark_pipeline/compare_values.py | 79 ------ .../asap_benchmark_pipeline/run_commands.txt | 77 ------ .../asap_benchmark_pipeline/run_experiment.py | 254 ------------------ 5 files changed, 908 deletions(-) delete mode 100644 asap-tools/execution-utilities/asap_benchmark_pipeline/asap_h2o_queries.sql delete mode 100644 asap-tools/execution-utilities/asap_benchmark_pipeline/asap_mode_queries.sql delete mode 100644 asap-tools/execution-utilities/asap_benchmark_pipeline/compare_values.py delete mode 100644 asap-tools/execution-utilities/asap_benchmark_pipeline/run_commands.txt delete mode 100755 asap-tools/execution-utilities/asap_benchmark_pipeline/run_experiment.py diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/asap_h2o_queries.sql b/asap-tools/execution-utilities/asap_benchmark_pipeline/asap_h2o_queries.sql deleted file mode 100644 index a844553..0000000 --- a/asap-tools/execution-utilities/asap_benchmark_pipeline/asap_h2o_queries.sql +++ /dev/null @@ -1,249 +0,0 @@ --- Q1: Sum v1 by id1 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:11:10') AND '1971-01-01 00:11:10' -GROUP BY id1, id2, id3; - --- Q2: Sum v1 by id1:id2 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:13:10') AND '1971-01-01 00:13:10' -GROUP BY id1, id2, id3; - --- Q3: Sum v1 mean v3 by id3 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:15:10') AND '1971-01-01 00:15:10' -GROUP BY id1, id2, id3; - --- Q4: Mean v1:v3 by id4 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:17:10') AND '1971-01-01 00:17:10' -GROUP BY id1, id2, id3; - --- Q5: Sum v1:v3 by id6 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:19:10') AND '1971-01-01 00:19:10' -GROUP BY id1, id2, id3; - --- Q6: Sum v1 by id1 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:21:10') AND '1971-01-01 00:21:10' -GROUP BY id1, id2, id3; - --- Q7: Sum v1 by id1:id2 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:23:10') AND '1971-01-01 00:23:10' -GROUP BY id1, id2, id3; - --- Q8: Sum v1 mean v3 by id3 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:25:10') AND '1971-01-01 00:25:10' -GROUP BY id1, id2, id3; - --- Q9: Mean v1:v3 by id4 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:27:10') AND '1971-01-01 00:27:10' -GROUP BY id1, id2, id3; - --- Q10: Sum v1:v3 by id6 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:29:10') AND '1971-01-01 00:29:10' -GROUP BY id1, id2, id3; - --- Q11: Sum v1 by id1 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:31:10') AND '1971-01-01 00:31:10' -GROUP BY id1, id2, id3; - --- Q12: Sum v1 by id1:id2 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:33:10') AND '1971-01-01 00:33:10' -GROUP BY id1, id2, id3; - --- Q13: Sum v1 mean v3 by id3 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:35:10') AND '1971-01-01 00:35:10' -GROUP BY id1, id2, id3; - --- Q14: Mean v1:v3 by id4 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:37:10') AND '1971-01-01 00:37:10' -GROUP BY id1, id2, id3; - --- Q15: Sum v1:v3 by id6 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:39:10') AND '1971-01-01 00:39:10' -GROUP BY id1, id2, id3; - --- Q16: Sum v1 by id1 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:41:10') AND '1971-01-01 00:41:10' -GROUP BY id1, id2, id3; - --- Q17: Sum v1 by id1:id2 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:43:10') AND '1971-01-01 00:43:10' -GROUP BY id1, id2, id3; - --- Q18: Sum v1 mean v3 by id3 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:45:10') AND '1971-01-01 00:45:10' -GROUP BY id1, id2, id3; - --- Q19: Mean v1:v3 by id4 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:47:10') AND '1971-01-01 00:47:10' -GROUP BY id1, id2, id3; - --- Q20: Sum v1:v3 by id6 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:49:10') AND '1971-01-01 00:49:10' -GROUP BY id1, id2, id3; - --- Q21: Sum v1 by id1 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:51:10') AND '1971-01-01 00:51:10' -GROUP BY id1, id2, id3; - --- Q22: Sum v1 by id1:id2 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:53:10') AND '1971-01-01 00:53:10' -GROUP BY id1, id2, id3; - --- Q23: Sum v1 mean v3 by id3 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:55:10') AND '1971-01-01 00:55:10' -GROUP BY id1, id2, id3; - --- Q24: Mean v1:v3 by id4 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:57:10') AND '1971-01-01 00:57:10' -GROUP BY id1, id2, id3; - --- Q25: Sum v1:v3 by id6 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 00:59:10') AND '1971-01-01 00:59:10' -GROUP BY id1, id2, id3; - --- Q26: Sum v1 by id1 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:01:10') AND '1971-01-01 01:01:10' -GROUP BY id1, id2, id3; - --- Q27: Sum v1 by id1:id2 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:03:10') AND '1971-01-01 01:03:10' -GROUP BY id1, id2, id3; - --- Q28: Sum v1 mean v3 by id3 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:05:10') AND '1971-01-01 01:05:10' -GROUP BY id1, id2, id3; - --- Q29: Mean v1:v3 by id4 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:07:10') AND '1971-01-01 01:07:10' -GROUP BY id1, id2, id3; - --- Q30: Sum v1:v3 by id6 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:09:10') AND '1971-01-01 01:09:10' -GROUP BY id1, id2, id3; - --- Q31: Sum v1 by id1 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:11:10') AND '1971-01-01 01:11:10' -GROUP BY id1, id2, id3; - --- Q32: Sum v1 by id1:id2 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:13:10') AND '1971-01-01 01:13:10' -GROUP BY id1, id2, id3; - --- Q33: Sum v1 mean v3 by id3 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:15:10') AND '1971-01-01 01:15:10' -GROUP BY id1, id2, id3; - --- Q34: Mean v1:v3 by id4 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:17:10') AND '1971-01-01 01:17:10' -GROUP BY id1, id2, id3; - --- Q35: Sum v1:v3 by id6 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:19:10') AND '1971-01-01 01:19:10' -GROUP BY id1, id2, id3; - --- Q36: Sum v1 by id1 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:21:10') AND '1971-01-01 01:21:10' -GROUP BY id1, id2, id3; - --- Q37: Sum v1 by id1:id2 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:23:10') AND '1971-01-01 01:23:10' -GROUP BY id1, id2, id3; - --- Q38: Sum v1 mean v3 by id3 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:25:10') AND '1971-01-01 01:25:10' -GROUP BY id1, id2, id3; - --- Q39: Mean v1:v3 by id4 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:27:10') AND '1971-01-01 01:27:10' -GROUP BY id1, id2, id3; - --- Q40: Sum v1:v3 by id6 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:29:10') AND '1971-01-01 01:29:10' -GROUP BY id1, id2, id3; - --- Q41: Sum v1 by id1 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:31:10') AND '1971-01-01 01:31:10' -GROUP BY id1, id2, id3; - --- Q42: Sum v1 by id1:id2 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:33:10') AND '1971-01-01 01:33:10' -GROUP BY id1, id2, id3; - --- Q43: Sum v1 mean v3 by id3 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:35:10') AND '1971-01-01 01:35:10' -GROUP BY id1, id2, id3; - --- Q44: Mean v1:v3 by id4 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:37:10') AND '1971-01-01 01:37:10' -GROUP BY id1, id2, id3; - --- Q45: Sum v1:v3 by id6 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:39:10') AND '1971-01-01 01:39:10' -GROUP BY id1, id2, id3; - --- Q46: Sum v1 by id1 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:41:10') AND '1971-01-01 01:41:10' -GROUP BY id1, id2, id3; - --- Q47: Sum v1 by id1:id2 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:43:10') AND '1971-01-01 01:43:10' -GROUP BY id1, id2, id3; - --- Q48: Sum v1 mean v3 by id3 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:45:10') AND '1971-01-01 01:45:10' -GROUP BY id1, id2, id3; - --- Q49: Mean v1:v3 by id4 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:47:10') AND '1971-01-01 01:47:10' -GROUP BY id1, id2, id3; - --- Q50: Sum v1:v3 by id6 -SELECT quantile(0.95)(v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -120, '1971-01-01 01:49:10') AND '1971-01-01 01:49:10' -GROUP BY id1, id2, id3; diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/asap_mode_queries.sql b/asap-tools/execution-utilities/asap_benchmark_pipeline/asap_mode_queries.sql deleted file mode 100644 index 4c0524e..0000000 --- a/asap-tools/execution-utilities/asap_benchmark_pipeline/asap_mode_queries.sql +++ /dev/null @@ -1,249 +0,0 @@ --- Q1: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q2: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q3: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q4: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q5: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q6: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q7: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q8: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q9: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q10: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q11: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q12: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q13: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q14: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q15: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q16: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q17: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q18: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q19: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q20: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q21: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q22: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q23: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q24: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q25: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q26: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q27: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q28: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q29: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q30: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q31: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q32: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q33: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q34: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q35: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q36: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q37: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q38: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q39: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q40: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q41: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q42: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q43: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q44: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q45: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q46: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q47: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q48: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q49: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; - --- Q50: p95 v1 by id1 id2 id3 -SELECT QUANTILE(0.95, v1) FROM h2o_groupby -WHERE timestamp BETWEEN DATEADD(s, -600, NOW()) AND NOW() -GROUP BY id1, id2, id3; diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/compare_values.py b/asap-tools/execution-utilities/asap_benchmark_pipeline/compare_values.py deleted file mode 100644 index 092a0a7..0000000 --- a/asap-tools/execution-utilities/asap_benchmark_pipeline/compare_values.py +++ /dev/null @@ -1,79 +0,0 @@ -import argparse -import csv -import matplotlib.pyplot as plt -import re -import numpy as np - -def extract_value(result_str): - """Extracts the first numerical value from the result preview string.""" - if not result_str: - return 0.0 - # Match integers or floats - match = re.search(r"[-+]?\d*\.\d+|\d+", result_str) - if match: - return float(match.group()) - return 0.0 - -def load_results(csv_file): - values = [] - try: - with open(csv_file, 'r', encoding='utf-8') as f: - reader = csv.DictReader(f) - for row in reader: - if row['error']: - values.append(0.0) - else: - values.append(extract_value(row['result_preview'])) - except FileNotFoundError: - print(f"✗ Could not find {csv_file}") - return [] - return values - -def main(): - parser = argparse.ArgumentParser(description="Compare computed values from Baseline and ASAP runs.") - parser.add_argument("--baseline", default="baseline_results.csv", help="Baseline CSV file") - parser.add_argument("--asap", default="asap_results_run1.csv", help="ASAP CSV file") - parser.add_argument("--output", default="value_comparison.png", help="Output image file") - - args = parser.parse_args() - - baseline_values = load_results(args.baseline) - asap_values = load_results(args.asap) - - if not baseline_values or not asap_values: - print("Missing data. Please make sure both CSVs exist and have data.") - return - - # Ensure we only compare up to the matched length in case one failed early - min_len = min(len(baseline_values), len(asap_values)) - baseline_values = baseline_values[:min_len] - asap_values = asap_values[:min_len] - - # --- Plotting Code --- - plt.figure(figsize=(12, 6)) - - execution_order = np.arange(1, min_len + 1) - bar_width = 0.4 - - # Create grouped bars - plt.bar(execution_order - bar_width/2, baseline_values, width=bar_width, - label='Baseline (Exact)', color='#1f77b4', edgecolor='black') - plt.bar(execution_order + bar_width/2, asap_values, width=bar_width, - label='ASAP (Approximate)', color='#ff7f0e', edgecolor='black') - - plt.xlabel("Query Execution Order", fontsize=12, fontweight='bold') - plt.ylabel("Computed Value (95th Quantile)", fontsize=12, fontweight='bold') - plt.title("Query Output Comparison: Exact vs Approximate", fontsize=14, fontweight='bold') - - # Set tick marks at every 10 on the X axis - plt.xticks(np.arange(0, min_len + 1, 10)) - - plt.legend(loc='upper right') - plt.grid(axis='y', linestyle='--', alpha=0.7) - plt.tight_layout() - - plt.savefig(args.output) - print(f"✓ Value comparison graph successfully saved to {args.output}") - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/run_commands.txt b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_commands.txt deleted file mode 100644 index 53bf34f..0000000 --- a/asap-tools/execution-utilities/asap_benchmark_pipeline/run_commands.txt +++ /dev/null @@ -1,77 +0,0 @@ -# baseline mode - -cd ~/asap-internal/Utilities/installation/kafka -./run.sh kafka/ - -cd ~/asap-internal/Utilities/installation/clickhouse -./run.sh clickhouse/ - -curl -s "http://localhost:8123/" -d "TRUNCATE TABLE h2o_groupby" - -python3 run_benchmark.py --output asap_results.csv --load-data --mode baseline - -# asap mode - -Setup ------- - -1. Edit adapter config in ~/asap-internal/QueryEngineRust/src/main.rs to: - -let adapter_config = AdapterConfig::clickhouse_sql( - "http://localhost:8123".to_string(), // ClickHouse server URL - "default".to_string(), // Database name - true, // Always forward (fallback for every query) -); - -2. Compile the query engine: - -cd ~/asap-internal/QueryEngineRust -cargo build --release - -Run ------- - -1. Launch Arroyo: - -cd ~/asap-internal/arroyo -./target/release/arroyo --config ~/asap-internal/ArroyoSketch/config.yaml cluster \ - > /tmp/arroyo.log 2>&1 & - -2. Submit pipeline: - -cd ~/asap-internal/ArroyoSketch -python3 run_arroyosketch.py \ - --source_type kafka \ - --kafka_input_format json \ - --input_kafka_topic h2o_groupby \ - --output_format json \ - --pipeline_name asap_h2o_pipeline \ - --config_file_path ~/asap-internal/ExecutionUtilities/asap_query_latency/streaming_config.yaml \ - --output_kafka_topic sketch_topic \ - --output_dir ./outputs \ - --parallelism 1 \ - --query_language sql - -3. Wait until pipeline is running: - -(Optional) Check output on localhost:8000 - -4. Load data through Kafka so Arroyo can build sketches: - -python3 run_benchmark.py --load-data --mode asap - -5. Start QueryEngine: - -cd ~/asap-internal/QueryEngineRust -nohup ./target/release/query_engine_rust \ - --kafka-topic sketch_topic --input-format json \ - --config ~/asap-internal/ExecutionUtilities/asap_query_latency/inference_config.yaml \ - --streaming-config ~/asap-internal/ExecutionUtilities/asap_query_latency/streaming_config.yaml \ - --http-port 8088 --delete-existing-db --log-level info \ - --output-dir ./output --streaming-engine arroyo \ - --query-language SQL --lock-strategy per-key \ - --prometheus-scrape-interval 1 > /tmp/query_engine.log 2>&1 & - -6. Run ASAP mode: - -python3 run_benchmark.py --mode asap --load-data --output asap_results_run1.csv diff --git a/asap-tools/execution-utilities/asap_benchmark_pipeline/run_experiment.py b/asap-tools/execution-utilities/asap_benchmark_pipeline/run_experiment.py deleted file mode 100755 index 07a9e65..0000000 --- a/asap-tools/execution-utilities/asap_benchmark_pipeline/run_experiment.py +++ /dev/null @@ -1,254 +0,0 @@ -#!/usr/bin/env python3 -""" -Run baseline and ASAP experiments, then generate comparison graphs. - -Usage: - # Full run from scratch (starts all infrastructure): - python3 run_experiment.py --runs 3 --load-data - - # Infra already running, data already loaded: - python3 run_experiment.py --runs 3 --skip-infra - - # Skip baseline (use existing baseline_results.csv): - python3 run_experiment.py --runs 3 --skip-infra --skip-baseline - - # Quick single-run comparison: - python3 run_experiment.py --runs 1 --skip-infra -""" - -import argparse -import subprocess -import sys -import csv -from pathlib import Path -from datetime import datetime - -import matplotlib.pyplot as plt -import matplotlib.gridspec as gridspec -import numpy as np - - -def run_pipeline(mode, output_file, load_data=False, skip_infra=False): - """Invoke run_pipeline.sh and return True on success.""" - script_dir = Path(__file__).parent - cmd = [ - "bash", str(script_dir / "run_pipeline.sh"), - "--mode", mode, - "--output", str(output_file), - ] - if load_data: - cmd.append("--load-data") - if skip_infra: - cmd.append("--skip-infra") - - print(f"\n{'='*60}") - print(f"Running: {' '.join(cmd)}") - print(f"{'='*60}\n") - - result = subprocess.run(cmd, check=False) - return result.returncode == 0 - - -def load_csv(csv_file): - """Return (query_ids, latencies_ms, errors) from a benchmark CSV.""" - query_ids, latencies, errors = [], [], [] - try: - with open(csv_file, newline="") as f: - for row in csv.DictReader(f): - query_ids.append(row["query_id"]) - try: - latencies.append(float(row["latency_ms"])) - except (ValueError, KeyError): - latencies.append(0.0) - errors.append(bool(row.get("error", "").strip())) - except FileNotFoundError: - print(f" Warning: {csv_file} not found") - return query_ids, latencies, errors - - -def generate_comparison_graphs(baseline_file, asap_files, output_dir, timestamp): - """Generate side-by-side latency comparison and per-run overlay graphs.""" - b_ids, b_lat, b_err = load_csv(baseline_file) - if not b_lat: - print("No baseline data; skipping graphs") - return - - asap_runs = [] - for f in asap_files: - _, lats, _ = load_csv(f) - if lats: - asap_runs.append(lats) - - if not asap_runs: - print("No ASAP run data; skipping graphs") - return - - n = len(b_lat) - x = np.arange(1, n + 1) - asap_matrix = np.array([r[:n] for r in asap_runs]) # shape (runs, queries) - asap_mean = asap_matrix.mean(axis=0) - asap_std = asap_matrix.std(axis=0) if len(asap_runs) > 1 else np.zeros(n) - - # --- Figure 1: Baseline vs ASAP mean latency (grouped bars) --- - fig, axes = plt.subplots(2, 1, figsize=(14, 12)) - fig.suptitle( - f"ASAP vs Baseline — {len(asap_runs)} ASAP run(s) — {timestamp}", - fontsize=14, fontweight="bold" - ) - - ax = axes[0] - bar_w = 0.38 - ax.bar(x - bar_w / 2, b_lat, bar_w, label="Baseline (exact)", color="#1f77b4", alpha=0.85, edgecolor="black") - ax.bar(x + bar_w / 2, asap_mean, bar_w, label=f"ASAP avg (n={len(asap_runs)})", color="#ff7f0e", alpha=0.85, edgecolor="black") - if len(asap_runs) > 1: - ax.errorbar(x + bar_w / 2, asap_mean, yerr=asap_std, fmt="none", color="black", capsize=2) - ax.set_xlabel("Query Number") - ax.set_ylabel("Latency (ms)") - ax.set_title("Latency per Query") - ax.set_xticks(np.arange(0, n + 1, 5)) - ax.legend() - ax.grid(axis="y", linestyle="--", alpha=0.5) - - # --- Subplot 2: Speedup ratio --- - ax2 = axes[1] - with np.errstate(divide="ignore", invalid="ignore"): - speedup = np.where(asap_mean > 0, np.array(b_lat[:n]) / asap_mean, 0.0) - colors = ["#2ca02c" if s >= 1.0 else "#d62728" for s in speedup] - ax2.bar(x, speedup, color=colors, alpha=0.85, edgecolor="black") - ax2.axhline(1.0, color="black", linestyle="--", linewidth=1, label="1x (no speedup)") - ax2.axhline(2.0, color="gray", linestyle=":", linewidth=1, label="2x target") - ax2.set_xlabel("Query Number") - ax2.set_ylabel("Speedup (Baseline / ASAP)") - ax2.set_title("ASAP Speedup Factor per Query (green = faster, red = slower)") - ax2.set_xticks(np.arange(0, n + 1, 5)) - ax2.legend() - ax2.grid(axis="y", linestyle="--", alpha=0.5) - - out1 = output_dir / f"comparison_{timestamp}.png" - plt.tight_layout() - plt.savefig(out1, dpi=150) - plt.close() - print(f"Saved: {out1}") - - # --- Figure 2: All ASAP runs overlaid (consistency check) --- - if len(asap_runs) > 1: - fig2, ax3 = plt.subplots(figsize=(14, 6)) - colors_runs = plt.cm.tab10.colors - for i, run_lats in enumerate(asap_runs): - ax3.plot(x, run_lats[:n], marker="o", markersize=3, linewidth=1, - label=f"ASAP run {i + 1}", color=colors_runs[i % 10], alpha=0.7) - ax3.plot(x, b_lat, marker="s", markersize=3, linewidth=1.5, - label="Baseline", color="black", linestyle="--") - ax3.set_xlabel("Query Number") - ax3.set_ylabel("Latency (ms)") - ax3.set_title("ASAP Run Consistency — All Runs Overlaid") - ax3.set_xticks(np.arange(0, n + 1, 5)) - ax3.legend() - ax3.grid(linestyle="--", alpha=0.4) - out2 = output_dir / f"asap_runs_overlay_{timestamp}.png" - plt.tight_layout() - plt.savefig(out2, dpi=150) - plt.close() - print(f"Saved: {out2}") - - # --- Summary statistics --- - valid_b = [v for v in b_lat if v > 0] - valid_a = [v for v in asap_mean if v > 0] - if valid_b and valid_a: - print(f"\nSummary Statistics:") - print(f" Baseline — mean: {np.mean(valid_b):7.1f}ms median: {np.median(valid_b):7.1f}ms p95: {np.percentile(valid_b, 95):7.1f}ms") - print(f" ASAP avg — mean: {np.mean(valid_a):7.1f}ms median: {np.median(valid_a):7.1f}ms p95: {np.percentile(valid_a, 95):7.1f}ms") - mean_speedup = np.mean(valid_b) / np.mean(valid_a) - median_speedup = np.median(valid_b) / np.median(valid_a) - print(f" Mean speedup: {mean_speedup:.2f}x") - print(f" Median speedup: {median_speedup:.2f}x") - if mean_speedup < 2.0: - print(f" WARNING: Mean speedup {mean_speedup:.2f}x is below the 2x target.") - print(" Check /tmp/query_engine.log to confirm ASAP is serving from sketches,") - print(" not falling back to ClickHouse for every query.") - - -def main(): - parser = argparse.ArgumentParser(description="Run ASAP vs baseline experiments") - parser.add_argument("--runs", type=int, default=3, help="Number of ASAP runs (default: 3)") - parser.add_argument("--load-data", action="store_true", help="Download and load H2O dataset") - parser.add_argument("--skip-baseline", action="store_true", help="Skip baseline; use existing baseline_results.csv") - parser.add_argument("--skip-infra", action="store_true", help="Skip starting Kafka/ClickHouse (assume already running)") - parser.add_argument("--output-dir", default=".", help="Directory for output files (default: .)") - args = parser.parse_args() - - output_dir = Path(args.output_dir) - output_dir.mkdir(parents=True, exist_ok=True) - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - - baseline_file = output_dir / "baseline_results.csv" - asap_files = [] - - # ---- Step 1: Baseline ---- - if not args.skip_baseline: - print("\n" + "="*60) - print("STEP 1: Baseline run") - print("="*60) - ok = run_pipeline( - "baseline", baseline_file, - load_data=args.load_data, - skip_infra=args.skip_infra, - ) - if not ok: - print("WARNING: Baseline run reported a non-zero exit code") - else: - print(f"Skipping baseline; using: {baseline_file}") - - # ---- Steps 2..N+1: ASAP runs ---- - for i in range(1, args.runs + 1): - print(f"\n{'='*60}") - print(f"STEP {1 + i}: ASAP run {i}/{args.runs}") - print("="*60) - - asap_file = output_dir / f"asap_results_run{i}_{timestamp}.csv" - - # Load data on the first ASAP run only (it also loads into MergeTree via - # the materialized view, so baseline can reuse that data afterwards if needed) - load_this_run = (i == 1) and args.load_data - - ok = run_pipeline( - "asap", asap_file, - load_data=load_this_run, - skip_infra=args.skip_infra, - ) - if ok or asap_file.exists(): - asap_files.append(asap_file) - else: - print(f"WARNING: ASAP run {i} produced no output") - - # ---- Graphs ---- - print(f"\n{'='*60}") - print("Generating comparison graphs") - print("="*60) - - if baseline_file.exists() and asap_files: - generate_comparison_graphs(baseline_file, asap_files, output_dir, timestamp) - - # Run existing value-accuracy comparison for first ASAP run - script_dir = Path(__file__).parent - values_png = output_dir / f"value_comparison_{timestamp}.png" - subprocess.run([ - "python3", str(script_dir / "compare_values.py"), - "--baseline", str(baseline_file), - "--asap", str(asap_files[0]), - "--output", str(values_png), - ], check=False) - else: - missing = [] - if not baseline_file.exists(): - missing.append(str(baseline_file)) - if not asap_files: - missing.append("(no ASAP results)") - print(f"Missing files, skipping graphs: {', '.join(missing)}") - - print("\nExperiment complete!") - return 0 - - -if __name__ == "__main__": - sys.exit(main())