From 8da47b0fe14d54e0e45f349307c5a2dac02a7f06 Mon Sep 17 00:00:00 2001 From: semyonsinchenko Date: Sat, 6 Sep 2025 20:17:48 +0200 Subject: [PATCH 1/4] Using of dataframe! macro in tests --- src/lib.rs | 212 ++++++++++++++++++++++++++++++++++++++++---------- src/pregel.rs | 33 ++------ 2 files changed, 179 insertions(+), 66 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2590090..3b8d9ec 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +mod connected_components; mod pagerank; mod pregel; mod shortest_paths; @@ -7,13 +8,54 @@ use datafusion::error::Result; use datafusion::functions_aggregate::count::count; use datafusion::prelude::*; +/// Column names for the vertex id column. pub const VERTEX_ID: &str = "id"; +/// Column names for the edge source column. pub const EDGE_SRC: &str = "src"; +/// Column names for the edge destination column. pub const EDGE_DST: &str = "dst"; +/// Column names for the edge column in triplet representation. pub const EDGE_COL: &str = "edge"; +/// Column names for the source vertex in triplet representation. pub const SRC_VERTEX: &str = "src_vertex"; +/// Column names for the destination vertex in triplet representation. pub const DST_VERTEX: &str = "dst_vertex"; +/// A data structure representing a graph in the form of vertices and edges. +/// +/// The `GraphFrame` struct is designed to hold a graph's data where vertices +/// (nodes) and edges (connections) are represented as `DataFrame` structures. +/// +/// # Fields +/// +/// * `vertices` - A `DataFrame` that contains information about the graph's vertices. +/// Each row in the `DataFrame` represents a vertex (`VERTEX_ID`), and additional +/// columns can store attributes (e.g., labels or properties) for +/// each vertex. +/// +/// * `edges` - A `DataFrame` that contains information about the graph's edges. +/// Each row in the `DataFrame` represents an edge, with columns +/// typically storing the source vertex (`EDGE_SRC`), destination vertex (`EDGE_DST`), and +/// any additional attributes (e.g., weights or labels) associated +/// with the edge. +/// +/// # Example +/// +/// ``` +/// use datafusion::dataframe; +/// use graphframes_rs::{GraphFrame, VERTEX_ID, EDGE_SRC, EDGE_DST}; +/// let vertices = dataframe!( +/// VERTEX_ID => vec![1i64, 2i64, 3i64], +/// "attr" => vec!["a", "b", "c"] +/// ).unwrap(); +/// let edges = dataframe!( +/// EDGE_SRC => vec![1i64, 2i64, 3i64], +/// EDGE_DST => vec![3i64, 1i64, 2i64], +/// "attr" => vec!["d", "j", "h"] +/// ).unwrap(); +/// +/// let graph = GraphFrame { vertices, edges }; +/// ``` #[derive(Debug, Clone)] pub struct GraphFrame { pub vertices: DataFrame, @@ -21,16 +63,98 @@ pub struct GraphFrame { } impl GraphFrame { + /// Returns the total number of nodes in the graph. + /// + /// # Returns + /// + /// This function returns a `Result`: + /// - `Ok(i64)`: The total number of nodes (vertices) in the graph, represented as a 64-bit signed integer. + /// - `Err`: If an error occurs during the computation or retrieval of the node count. + /// + /// # Example + /// + /// ``` + /// use datafusion::dataframe; + /// use graphframes_rs::{GraphFrame, VERTEX_ID, EDGE_SRC, EDGE_DST}; + /// let vertices = dataframe!( + /// VERTEX_ID => vec![1i64, 2i64, 3i64], + /// "attr" => vec!["a", "b", "c"] + /// ).unwrap(); + /// let edges = dataframe!( + /// EDGE_SRC => vec![1i64, 2i64, 3i64], + /// EDGE_DST => vec![3i64, 1i64, 2i64], + /// "attr" => vec!["d", "j", "h"] + /// ).unwrap(); + /// + /// let graph = GraphFrame { vertices, edges }; + /// let node_count = graph.num_nodes(); + /// ``` pub async fn num_nodes(&self) -> Result { let count = self.vertices.clone().count().await?; Ok(count as i64) } + /// Returns the total number of edges in the graph. + /// + /// # Returns + /// + /// This function returns a `Result`: + /// - `Ok(i64)` - The total number of edges, represented as a 64-bit integer. + /// - `Err(E)` - If an error occurs during the computation, the error is propagated. + /// + /// # Examples + /// + /// ``` + /// use datafusion::dataframe; + /// use graphframes_rs::{GraphFrame, VERTEX_ID, EDGE_SRC, EDGE_DST}; + /// let vertices = dataframe!( + /// VERTEX_ID => vec![1i64, 2i64, 3i64], + /// "attr" => vec!["a", "b", "c"] + /// ).unwrap(); + /// let edges = dataframe!( + /// EDGE_SRC => vec![1i64, 2i64, 3i64], + /// EDGE_DST => vec![3i64, 1i64, 2i64], + /// "attr" => vec!["d", "j", "h"] + /// ).unwrap(); + /// + /// let graph = GraphFrame { vertices, edges }; + /// let edge_count = graph.num_edges(); + /// ``` pub async fn num_edges(&self) -> Result { let count = self.edges.clone().count().await?; Ok(count as i64) } + /// Computes the in-degrees for each vertex in the graph. + /// + /// This function calculates the in-degree of each vertex by counting the number of + /// incoming edges. It returns a `DataFrame` + /// containing two columns: + /// - `VERTEX_ID`: The unique identifier of the vertex (derived from the destination of the edges). + /// - `in_degree`: The count of incoming edges (in-degrees) for each vertex. + /// + /// # Returns + /// An asynchronous function that returns: + /// - `Ok(DataFrame)` containing the vertex IDs and their corresponding in-degrees. + /// - `Err` if the aggregation or selection operation fails. + /// + /// # Example + /// ```rust + /// use datafusion::dataframe; + /// use graphframes_rs::{GraphFrame, VERTEX_ID, EDGE_SRC, EDGE_DST}; + /// let vertices = dataframe!( + /// VERTEX_ID => vec![1i64, 2i64, 3i64], + /// "attr" => vec!["a", "b", "c"] + /// ).unwrap(); + /// let edges = dataframe!( + /// EDGE_SRC => vec![1i64, 2i64, 3i64], + /// EDGE_DST => vec![3i64, 1i64, 2i64], + /// "attr" => vec!["d", "j", "h"] + /// ).unwrap(); + /// + /// let graph = GraphFrame { vertices, edges }; + /// let edge_count = graph.in_degrees(); + /// ``` pub async fn in_degrees(&self) -> Result { let df = self.edges.clone().aggregate( vec![col(EDGE_DST)], @@ -38,7 +162,36 @@ impl GraphFrame { )?; Ok(df.select(vec![col(EDGE_DST).alias(VERTEX_ID), col("in_degree")])?) } - + /// Computes the out-degrees for each vertex in the graph. + /// + /// This function calculates the out-degree of each vertex by counting the number of + /// outcoming edges. It returns a `DataFrame` + /// containing two columns: + /// - `VERTEX_ID`: The unique identifier of the vertex (derived from the destination of the edges). + /// - `in_degree`: The count of incoming edges (in-degrees) for each vertex. + /// + /// # Returns + /// An asynchronous function that returns: + /// - `Ok(DataFrame)` containing the vertex IDs and their corresponding in-degrees. + /// - `Err` if the aggregation or selection operation fails. + /// + /// # Example + /// ```rust + /// use datafusion::dataframe; + /// use graphframes_rs::{GraphFrame, VERTEX_ID, EDGE_SRC, EDGE_DST}; + /// let vertices = dataframe!( + /// VERTEX_ID => vec![1i64, 2i64, 3i64], + /// "attr" => vec!["a", "b", "c"] + /// ).unwrap(); + /// let edges = dataframe!( + /// EDGE_SRC => vec![1i64, 2i64, 3i64], + /// EDGE_DST => vec![3i64, 1i64, 2i64], + /// "attr" => vec!["d", "j", "h"] + /// ).unwrap(); + /// + /// let graph = GraphFrame { vertices, edges }; + /// let edge_count = graph.in_degrees(); + /// ``` pub async fn out_degrees(&self) -> Result { let df = self.edges.clone().aggregate( vec![col(EDGE_SRC)], @@ -133,7 +286,6 @@ impl GraphFrame { /// let graph = GraphFrame { vertices, edges }; /// let triplets = graph.triplets(); /// ``` - /// // Assuming `edges_df` and `vertices_df` are initialized DataFrames for pub async fn triplets(&self) -> Result { let edges_struct = self.edges.clone().select(vec![ col(EDGE_SRC), @@ -189,46 +341,24 @@ impl GraphFrame { #[cfg(test)] mod tests { use super::*; - use datafusion::arrow::array::{Int64Array, RecordBatch, StringArray}; - use datafusion::arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef}; + use datafusion::arrow::array::Int64Array; + use datafusion::arrow::datatypes::{DataType, Field, Fields}; use std::collections::HashMap; - use std::sync::Arc; - - fn create_test_graph() -> Result { - let ctx = SessionContext::new(); - - let vertices_data = RecordBatch::try_new( - SchemaRef::from(Schema::new(vec![ - Field::new("id", DataType::Int64, false), - Field::new("name", DataType::Utf8, false), - ])), - vec![ - Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10])), - Arc::new(StringArray::from(vec![ - "Hub", "Alice", "Bob", "Carol", "David", "Eve", "Frank", "Grace", "Henry", - "Ivy", - ])), - ], - ); - let vertices = ctx.read_batch(vertices_data?)?; - - let edges_data = RecordBatch::try_new( - SchemaRef::from(Schema::new(vec![ - Field::new("src", DataType::Int64, false), - Field::new("dst", DataType::Int64, false), - ])), - vec![ - Arc::new(Int64Array::from(vec![ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, - 8, 8, 9, 10, - ])), - Arc::new(Int64Array::from(vec![ - 2, 3, 4, 5, 6, 7, 8, 9, 10, 3, 4, 5, 6, 4, 5, 6, 5, 6, 7, 6, 7, 8, 7, 8, 8, 9, - 9, 10, 10, 1, - ])), - ], - ); - let edges = ctx.read_batch(edges_data?)?; + + pub(crate) fn create_test_graph() -> Result { + let vertices = dataframe!( + VERTEX_ID => vec![1i64, 2i64, 3i64, 4i64, 5i64, 6i64, 7i64, 8i64, 9i64, 10i64], + "name" => vec!["Hub", "Alice", "Bob", "Carol", "David", "Eve", "Frank", "Grace", "Henry", "Ivy"] + )?; + + let edges = dataframe!( + EDGE_SRC => Vec::::from( + vec![1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10,] + ), + EDGE_DST => Vec::::from( + vec![2, 3, 4, 5, 6, 7, 8, 9, 10, 3, 4, 5, 6, 4, 5, 6, 5, 6, 7, 6, 7, 8, 7, 8, 8, 9, 9, 10, 10, 1,] + ), + )?; Ok(GraphFrame { vertices, edges }) } diff --git a/src/pregel.rs b/src/pregel.rs index efbedf8..ffca023 100644 --- a/src/pregel.rs +++ b/src/pregel.rs @@ -413,36 +413,19 @@ impl GraphFrame { #[cfg(test)] mod tests { use super::*; - use datafusion::arrow::array::{Array, Int32Array, Int64Array, RecordBatch}; - use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; + use datafusion::arrow::array::{Array, Int32Array, Int64Array}; use datafusion::functions_aggregate::min_max::max; use datafusion::functions_aggregate::sum::sum; - use std::sync::Arc; fn create_graph(vertices: Vec, edges: Vec>) -> Result { - let ctx = SessionContext::new(); - - let vertices_data = RecordBatch::try_new( - SchemaRef::from(Schema::new(vec![Field::new("id", DataType::Int64, false)])), - vec![Arc::new(Int64Array::from(vertices))], - )?; - let vertices_df = ctx.read_batch(vertices_data)?; - - let edges_data = RecordBatch::try_new( - SchemaRef::from(Schema::new(vec![ - Field::new("src", DataType::Int64, false), - Field::new("dst", DataType::Int64, false), - ])), - vec![ - Arc::new(Int64Array::from( - edges.iter().map(|e| e[0]).collect::>(), - )), - Arc::new(Int64Array::from( - edges.iter().map(|e| e[1]).collect::>(), - )), - ], + let vertices_df = dataframe!( + VERTEX_ID => Vec::::from(vertices), )?; - let edges_df = ctx.read_batch(edges_data)?; + let edges_df = dataframe!(EDGE_SRC => Vec::::from( + edges.iter().map(|e| e[0]).collect::>() + ), EDGE_DST => Vec::::from( + edges.iter().map(|e| e[1]).collect::>() + ))?; Ok(GraphFrame { vertices: vertices_df, From a0ac9faf5041be68e1e781703b0537928d556738 Mon Sep 17 00:00:00 2001 From: semyonsinchenko Date: Mon, 8 Sep 2025 16:50:50 +0200 Subject: [PATCH 2/4] Add connected components benchmark and update dependencies - Added `cc_benchmark.rs` for evaluating connected components performance. - Updated `criterion` to v0.7 and refined benchmark configurations. - Upgraded `datafusion` to v49.0.2. - Modified `run_benchmarks.py` to include a `--weighted` graph option. - Updated documentation and comments across the codebase for clarity. --- Cargo.lock | 221 ++++++++++++++++++++-------------- Cargo.toml | 11 +- benches/README.md | 10 +- benches/cc_benchmark.rs | 42 +++++++ benches/pagerank_benchmark.rs | 11 +- run_benchmarks.py | 28 +++-- src/connected_components.rs | 4 +- src/util.rs | 2 + 8 files changed, 217 insertions(+), 112 deletions(-) create mode 100644 benches/cc_benchmark.rs diff --git a/Cargo.lock b/Cargo.lock index aec0f0b..1b69ad6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -223,6 +223,7 @@ dependencies = [ "arrow-schema", "flatbuffers", "lz4_flex", + "zstd", ] [[package]] @@ -320,7 +321,7 @@ version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" dependencies = [ - "bzip2", + "bzip2 0.5.2", "flate2", "futures-core", "memchr", @@ -476,6 +477,15 @@ dependencies = [ "bzip2-sys", ] +[[package]] +name = "bzip2" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bea8dcd42434048e4f7a304411d9273a411f647446c1234a65ce0554923f4cff" +dependencies = [ + "libbz2-rs-sys", +] + [[package]] name = "bzip2-sys" version = "0.1.13+1.0.8" @@ -645,26 +655,22 @@ dependencies = [ [[package]] name = "criterion" -version = "0.5.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928" dependencies = [ "anes", "cast", "ciborium", "clap", "criterion-plot", - "futures", - "is-terminal", - "itertools 0.10.5", + "itertools 0.13.0", "num-traits", - "once_cell", "oorandom", "plotters", "rayon", "regex", "serde", - "serde_derive", "serde_json", "tinytemplate", "tokio", @@ -673,12 +679,12 @@ dependencies = [ [[package]] name = "criterion-plot" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338" dependencies = [ "cast", - "itertools 0.10.5", + "itertools 0.13.0", ] [[package]] @@ -759,16 +765,16 @@ dependencies = [ [[package]] name = "datafusion" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a11e19a7ccc5bb979c95c1dceef663eab39c9061b3bbf8d1937faf0f03bf41f" +checksum = "69dfeda1633bf8ec75b068d9f6c27cdc392ffcf5ff83128d5dbab65b73c1fd02" dependencies = [ "arrow", "arrow-ipc", "arrow-schema", "async-trait", "bytes", - "bzip2", + "bzip2 0.6.0", "chrono", "datafusion-catalog", "datafusion-catalog-listing", @@ -795,6 +801,7 @@ dependencies = [ "datafusion-sql", "flate2", "futures", + "hex", "itertools 0.14.0", "log", "object_store", @@ -813,9 +820,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94985e67cab97b1099db2a7af11f31a45008b282aba921c1e1d35327c212ec18" +checksum = "2848fd1e85e2953116dab9cc2eb109214b0888d7bbd2230e30c07f1794f642c0" dependencies = [ "arrow", "async-trait", @@ -839,9 +846,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e002df133bdb7b0b9b429d89a69aa77b35caeadee4498b2ce1c7c23a99516988" +checksum = "051a1634628c2d1296d4e326823e7536640d87a118966cdaff069b68821ad53b" dependencies = [ "arrow", "async-trait", @@ -862,16 +869,18 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13242fc58fd753787b0a538e5ae77d356cb9d0656fa85a591a33c5f106267f6" +checksum = "765e4ad4ef7a4500e389a3f1e738791b71ff4c29fd00912c2f541d62b25da096" dependencies = [ "ahash", "arrow", "arrow-ipc", "base64", + "chrono", "half", "hashbrown 0.14.5", + "hex", "indexmap", "libc", "log", @@ -886,9 +895,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2239f964e95c3a5d6b4a8cde07e646de8995c1396a7fd62c6e784f5341db499" +checksum = "40a2ae8393051ce25d232a6065c4558ab5a535c9637d5373bacfd464ac88ea12" dependencies = [ "futures", "log", @@ -897,15 +906,15 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cf792579bc8bf07d1b2f68c2d5382f8a63679cce8fbebfd4ba95742b6e08864" +checksum = "90cd841a77f378bc1a5c4a1c37345e1885a9203b008203f9f4b3a769729bf330" dependencies = [ "arrow", "async-compression", "async-trait", "bytes", - "bzip2", + "bzip2 0.6.0", "chrono", "datafusion-common", "datafusion-common-runtime", @@ -933,9 +942,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfc114f9a1415174f3e8d2719c371fc72092ef2195a7955404cfe6b2ba29a706" +checksum = "77f4a2c64939c6f0dd15b246723a699fa30d59d0133eb36a86e8ff8c6e2a8dc6" dependencies = [ "arrow", "async-trait", @@ -958,9 +967,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d88dd5e215c420a52362b9988ecd4cefd71081b730663d4f7d886f706111fc75" +checksum = "11387aaf931b2993ad9273c63ddca33f05aef7d02df9b70fb757429b4b71cdae" dependencies = [ "arrow", "async-trait", @@ -983,9 +992,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33692acdd1fbe75280d14f4676fe43f39e9cb36296df56575aa2cac9a819e4cf" +checksum = "028f430c5185120bf806347848b8d8acd9823f4038875b3820eeefa35f2bb4a2" dependencies = [ "arrow", "async-trait", @@ -1001,8 +1010,10 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-optimizer", "datafusion-physical-plan", + "datafusion-pruning", "datafusion-session", "futures", + "hex", "itertools 0.14.0", "log", "object_store", @@ -1014,15 +1025,15 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0e7b648387b0c1937b83cb328533c06c923799e73a9e3750b762667f32662c0" +checksum = "8ff336d1d755399753a9e4fbab001180e346fc8bfa063a97f1214b82274c00f8" [[package]] name = "datafusion-execution" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9609d83d52ff8315283c6dad3b97566e877d8f366fab4c3297742f33dcd636c7" +checksum = "042ea192757d1b2d7dcf71643e7ff33f6542c7704f00228d8b85b40003fd8e0f" dependencies = [ "arrow", "dashmap", @@ -1039,11 +1050,12 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e75230cd67f650ef0399eb00f54d4a073698f2c0262948298e5299fc7324da63" +checksum = "025222545d6d7fab71e2ae2b356526a1df67a2872222cbae7535e557a42abd2e" dependencies = [ "arrow", + "async-trait", "chrono", "datafusion-common", "datafusion-doc", @@ -1060,9 +1072,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70fafb3a045ed6c49cfca0cd090f62cf871ca6326cc3355cb0aaf1260fa760b6" +checksum = "9d5c267104849d5fa6d81cf5ba88f35ecd58727729c5eb84066c25227b644ae2" dependencies = [ "arrow", "datafusion-common", @@ -1073,9 +1085,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdf9a9cf655265861a20453b1e58357147eab59bdc90ce7f2f68f1f35104d3bb" +checksum = "c620d105aa208fcee45c588765483314eb415f5571cfd6c1bae3a59c5b4d15bb" dependencies = [ "arrow", "arrow-buffer", @@ -1102,9 +1114,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f07e49733d847be0a05235e17b884d326a2fd402c97a89fe8bcf0bfba310005" +checksum = "35f61d5198a35ed368bf3aacac74f0d0fa33de7a7cb0c57e9f68ab1346d2f952" dependencies = [ "ahash", "arrow", @@ -1123,9 +1135,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4512607e10d72b0b0a1dc08f42cb5bd5284cb8348b7fea49dc83409493e32b1b" +checksum = "13efdb17362be39b5024f6da0d977ffe49c0212929ec36eec550e07e2bc7812f" dependencies = [ "ahash", "arrow", @@ -1136,9 +1148,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab331806e34f5545e5f03396e4d5068077395b1665795d8f88c14ec4f1e0b7a" +checksum = "9187678af567d7c9e004b72a0b6dc5b0a00ebf4901cb3511ed2db4effe092e66" dependencies = [ "arrow", "arrow-ord", @@ -1148,6 +1160,7 @@ dependencies = [ "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", "itertools 0.14.0", @@ -1157,9 +1170,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4ac2c0be983a06950ef077e34e0174aa0cb9e346f3aeae459823158037ade37" +checksum = "ecf156589cc21ef59fe39c7a9a841b4a97394549643bbfa88cc44e8588cf8fe5" dependencies = [ "arrow", "async-trait", @@ -1173,9 +1186,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36f3d92731de384c90906941d36dcadf6a86d4128409a9c5cd916662baed5f53" +checksum = "edcb25e3e369f1366ec9a261456e45b5aad6ea1c0c8b4ce546587207c501ed9e" dependencies = [ "arrow", "datafusion-common", @@ -1191,9 +1204,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c679f8bf0971704ec8fd4249fcbb2eb49d6a12cc3e7a840ac047b4928d3541b5" +checksum = "8996a8e11174d0bd7c62dc2f316485affc6ae5ffd5b8a68b508137ace2310294" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1201,9 +1214,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2821de7cb0362d12e75a5196b636a59ea3584ec1e1cc7dc6f5e34b9e8389d251" +checksum = "95ee8d1be549eb7316f437035f2cec7ec42aba8374096d807c4de006a3b5d78a" dependencies = [ "datafusion-expr", "quote", @@ -1212,14 +1225,15 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1594c7a97219ede334f25347ad8d57056621e7f4f35a0693c8da876e10dd6a53" +checksum = "c9fa98671458254928af854e5f6c915e66b860a8bde505baea0ff2892deab74d" dependencies = [ "arrow", "chrono", "datafusion-common", "datafusion-expr", + "datafusion-expr-common", "datafusion-physical-expr", "indexmap", "itertools 0.14.0", @@ -1231,9 +1245,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc6da0f2412088d23f6b01929dedd687b5aee63b19b674eb73d00c3eb3c883b7" +checksum = "3515d51531cca5f7b5a6f3ea22742b71bb36fc378b465df124ff9a2fa349b002" dependencies = [ "ahash", "arrow", @@ -1253,9 +1267,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcb0dbd9213078a593c3fe28783beaa625a4e6c6a6c797856ee2ba234311fb96" +checksum = "24485475d9c618a1d33b2a3dad003d946dc7a7bbf0354d125301abc0a5a79e3e" dependencies = [ "ahash", "arrow", @@ -1267,9 +1281,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d140854b2db3ef8ac611caad12bfb2e1e1de827077429322a6188f18fc0026a" +checksum = "b9da411a0a64702f941a12af2b979434d14ec5d36c6f49296966b2c7639cbb3a" dependencies = [ "arrow", "datafusion-common", @@ -1279,6 +1293,7 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", + "datafusion-pruning", "itertools 0.14.0", "log", "recursive", @@ -1286,9 +1301,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b46cbdf21a01206be76d467f325273b22c559c744a012ead5018dfe79597de08" +checksum = "a6d168282bb7b54880bb3159f89b51c047db4287f5014d60c3ef4c6e1468212b" dependencies = [ "ahash", "arrow", @@ -1314,11 +1329,29 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-pruning" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "391a457b9d23744c53eeb89edd1027424cba100581488d89800ed841182df905" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-datasource", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "itertools 0.14.0", + "log", +] + [[package]] name = "datafusion-session" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a72733766ddb5b41534910926e8da5836622316f6283307fd9fb7e19811a59c" +checksum = "053201c2bb729c7938f85879034df2b5a52cfaba16f1b3b66ab8505c81b2aad3" dependencies = [ "arrow", "async-trait", @@ -1340,9 +1373,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "48.0.1" +version = "49.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5162338cdec9cc7ea13a0e6015c361acad5ec1d88d83f7c86301f789473971f" +checksum = "9082779be8ce4882189b229c0cff4393bd0808282a7194130c9f32159f185e25" dependencies = [ "arrow", "bigdecimal", @@ -1559,8 +1592,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi 0.11.1+wasi-snapshot-preview1", + "wasm-bindgen", ] [[package]] @@ -1628,12 +1663,6 @@ dependencies = [ "foldhash", ] -[[package]] -name = "hermit-abi" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" - [[package]] name = "hex" version = "0.4.3" @@ -1815,22 +1844,11 @@ dependencies = [ "libc", ] -[[package]] -name = "is-terminal" -version = "0.4.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" -dependencies = [ - "hermit-abi", - "libc", - "windows-sys 0.59.0", -] - [[package]] name = "itertools" -version = "0.10.5" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ "either", ] @@ -1934,6 +1952,12 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "libbz2-rs-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" + [[package]] name = "libc" version = "0.2.174" @@ -2217,6 +2241,7 @@ dependencies = [ "num-bigint", "object_store", "paste", + "ring", "seq-macro", "simdutf8", "snap", @@ -2472,6 +2497,20 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.16", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + [[package]] name = "rustc-demangle" version = "0.1.25" @@ -2890,6 +2929,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "url" version = "2.5.4" diff --git a/Cargo.toml b/Cargo.toml index 5f64ef8..c4125ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,18 +9,17 @@ exclude = [ ] [dependencies] -datafusion = "48.0.1" +datafusion = "49.0.2" tokio = {version = "1"} [dev-dependencies] -criterion = { version = "0.5", features = ["html_reports", "async_tokio"] } +criterion = { version = "0.7", features = ["html_reports", "async_tokio"] } tokio = { version = "1", features = ["full"] } [[bench]] name = "pagerank_benchmark" harness = false # To disable Rust's default benchmarking and use the Criterion one -# Adding more benchmarks -# [[bench]] -# name = "shortestdistance_benchmark" -# harness = false \ No newline at end of file +[[bench]] +name = "cc_benchmark" +harness = false \ No newline at end of file diff --git a/benches/README.md b/benches/README.md index 0f5acb4..076b7ba 100644 --- a/benches/README.md +++ b/benches/README.md @@ -1,9 +1,9 @@ -# Running Benchmarks for Graphframe-rs +# Running Benchmarks for graphframes-rs -Benchmarking for Graphframe-rs are currently done on LDBC Graphalytics [datasets](https://ldbcouncil.org/benchmarks/graphalytics/datasets/). +Benchmarking for graphframes-rs is currently done on LDBC Graphalytics [datasets](https://ldbcouncil.org/benchmarks/graphalytics/datasets/). Benchmarking runs and reports are executed/generated as html-reports using Rust Criterion crate. -## How to run benchmarks ? +## How to run benchmarks? `run_benchmarks.py` file is the main source for running the benchmarks. @@ -24,9 +24,9 @@ CLI utility: ### Parameters for `run_benchmarks.py` -- `--dataset`: [MANDATORY] LDBC dataset name on which user want to run the benchmark (for e.g. test-pr-directed, cit-Patents). Dataset name are exactly same as mentioned in LDBC website. +- `--dataset`: LDBC dataset name on which user want to run the benchmark (for e.g., test-pr-directed, cit-Patents). Dataset name is exactly the same as mentioned in LDBC website. Default is wiki-Talk. - `--checkpoint_interval`: If user wants to define a specific number of checkpoints for Algorithms to run on. `default: 1` -- `--name`: If a particular benchmark needs to run. Name should be same as the `[[bench]]` names present in `Cargo.toml` +- `--name`: [MANDATORY] If a particular benchmark needs to run. Name should be same as the `[[bench]]` names present in `Cargo.toml` ```bash # Running all the benchmarks diff --git a/benches/cc_benchmark.rs b/benches/cc_benchmark.rs new file mode 100644 index 0000000..8dc9b9e --- /dev/null +++ b/benches/cc_benchmark.rs @@ -0,0 +1,42 @@ +use criterion::{Criterion, criterion_group, criterion_main}; +use graphframes_rs::util::create_ldbc_test_graph; +use std::env; +use tokio::runtime::Runtime; + +fn benchmark_cc(c: &mut Criterion) { + let dataset_name = + env::var("BENCHMARK_DATASET").expect("BENCHMARK_DATASET environment variable not set"); + let is_weighted = match env::var("WEIGHTED").expect("WEIGHTED environment variable not set") { + s if s == "true" => true, + _ => false, + }; + + let mut group = c.benchmark_group("Connected Components"); + group.sample_size(10); + group.measurement_time(std::time::Duration::from_secs(200)); + + // Create a Tokio runtime to execute the async graph loading function. + let rt = Runtime::new().unwrap(); + + // Load the graph data once before running the benchmark. + let graph = rt + .block_on(create_ldbc_test_graph(&dataset_name, true, is_weighted)) + .expect("Failed to create test graph"); + + // Creating cc_builder here so to exclude the time of generation in each iteration + let cc_builder = graph.connected_components(); + + // Define the benchmark. + // Criterion runs the code inside the closure many times to get a reliable measurement. + group.bench_function(String::from("cc-".to_owned() + &dataset_name), |b| { + // Use the `to_async` adapter to benchmark an async function. + b.to_async(&rt).iter(|| async { + let _ = cc_builder.clone().run().await.unwrap().data.collect().await; + }) + }); + + group.finish(); +} + +criterion_group!(benches, benchmark_cc); +criterion_main!(benches); diff --git a/benches/pagerank_benchmark.rs b/benches/pagerank_benchmark.rs index d32aa35..b3f6faa 100644 --- a/benches/pagerank_benchmark.rs +++ b/benches/pagerank_benchmark.rs @@ -1,4 +1,4 @@ -use criterion::{Criterion, criterion_group, criterion_main}; +use criterion::{criterion_group, criterion_main, Criterion}; use graphframes_rs::util::create_ldbc_test_graph; use std::env; use tokio::runtime::Runtime; @@ -12,14 +12,21 @@ fn benchmark_pagerank(c: &mut Criterion) { .parse() .expect("CHECKPOINT_INTERVAL is not a valid int"); + let is_weighted = match env::var("WEIGHTED").expect("WEIGHTED environment variable not set") { + s if s == "true" => true, + _ => false, + }; + let mut group = c.benchmark_group("PageRank"); + group.sample_size(10); + group.measurement_time(std::time::Duration::from_secs(200)); // Create a Tokio runtime to execute the async graph loading function. let rt = Runtime::new().unwrap(); // Load the graph data once before running the benchmark. let graph = rt - .block_on(create_ldbc_test_graph(&dataset_name, true, false)) + .block_on(create_ldbc_test_graph(&dataset_name, true, is_weighted)) .expect("Failed to create test graph"); // Creating pagerank_builder here so to exclude the time of generation in each iteration diff --git a/run_benchmarks.py b/run_benchmarks.py index 91405c2..6fb11a4 100644 --- a/run_benchmarks.py +++ b/run_benchmarks.py @@ -1,12 +1,13 @@ import argparse import os import pathlib -import requests +import shutil import subprocess import sys -import shutil import time +import requests + # The base URL for downloading Graphalytics datasets. BASE_URL = "https://datasets.ldbcouncil.org/graphalytics" @@ -27,7 +28,7 @@ def prepare_dataset(dataset_name: str): print(f"Dataset '{dataset_name}' is ready.") return - # make dataset_dir if doesn't exist + # make dataset_dir if it doesn't exist os.mkdir(dataset_dir) # If the archive doesn't exist, download it. @@ -97,7 +98,7 @@ def prepare_dataset(dataset_name: str): for dirpath, _, filenames in os.walk(dataset_dir): for filename in filenames: if (not filename.endswith(".properties")) and ( - not filename.endswith(".tar.zst") + not filename.endswith(".tar.zst") ): old_path = pathlib.Path(dirpath) / filename new_path = old_path.with_name(f"{old_path.name}.csv") @@ -118,7 +119,7 @@ def prepare_dataset(dataset_name: str): sys.exit(1) -def run_benchmarks(dataset_name: str, checkpoint_interval: int, benchmark_name: str): +def run_benchmarks(dataset_name: str, checkpoint_interval: int, benchmark_name: str, is_weighted: str = "false"): """ Runs the Rust benchmarks using 'cargo bench', passing the dataset name as an environment variable. @@ -128,7 +129,8 @@ def run_benchmarks(dataset_name: str, checkpoint_interval: int, benchmark_name: # Set the dataset name in an environment variable for the benchmark process. env = os.environ.copy() env["BENCHMARK_DATASET"] = dataset_name - env["CHECKPOINT_INTERVAL"] = checkpoint_interval + env["CHECKPOINT_INTERVAL"] = str(checkpoint_interval) + env["WEIGHTED"] = is_weighted # Execute 'cargo bench' and stream its output. try: @@ -181,7 +183,8 @@ def main(): parser.add_argument( "--dataset", type=str, - required=True, + default="wiki-Talk", + required=False, help="The name of the Graphalytics dataset to download and use for benchmarking (e.g., 'test-pr-directed').", ) parser.add_argument( @@ -194,9 +197,16 @@ def main(): parser.add_argument( "--name", type=str, - required=False, + required=True, help="Name of the benchmark that needs to run.", ) + parser.add_argument( + "--weighted", + type=str, + required=False, + default="false", + help="Whether the graph is weighted or not.", + ) args = parser.parse_args() dataset = args.dataset checkpoint_interval = args.checkpoint_interval @@ -206,7 +216,7 @@ def main(): BENCH_DATA_DIR.mkdir(parents=True, exist_ok=True) prepare_dataset(dataset) - run_benchmarks(dataset, checkpoint_interval, benchmark_name) + run_benchmarks(dataset, checkpoint_interval, benchmark_name, is_weighted=args.weighted) if __name__ == "__main__": diff --git a/src/connected_components.rs b/src/connected_components.rs index 27077e6..cf7b607 100644 --- a/src/connected_components.rs +++ b/src/connected_components.rs @@ -63,14 +63,14 @@ async fn min_nbr_sum(min_neighbours: &DataFrame) -> Result { .map(|a| a.value(0)) } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ConnectedComponentsOutput { pub data: DataFrame, pub num_iterations: usize, pub min_nbr_sum: Vec, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ConnectedComponentsBuilder<'a> { graph_frame: &'a GraphFrame, } diff --git a/src/util.rs b/src/util.rs index a58a0c3..1e426dc 100644 --- a/src/util.rs +++ b/src/util.rs @@ -79,6 +79,8 @@ pub async fn create_ldbc_test_graph( .schema(&vertices_schema), ) .await?; + println!("read {} vertices", vertices.clone().count().await?); + println!("read {} edges", edges.clone().count().await?); Ok(GraphFrame { vertices, edges }) } From 4eab64d6eb89fb25b39e266858adedb280e88995 Mon Sep 17 00:00:00 2001 From: semyonsinchenko Date: Mon, 8 Sep 2025 18:40:10 +0200 Subject: [PATCH 3/4] Add shortest paths benchmark - Introduced `sp_benchmark.rs` for benchmarking shortest path algorithms. - Added `Clone` and `Debug` traits to `ShortestPathsBuilder`. - Updated `Cargo.toml` to include the new benchmark configuration. --- Cargo.toml | 4 +++ benches/pagerank_benchmark.rs | 2 +- benches/sp_benchmark.rs | 47 +++++++++++++++++++++++++++++++++++ src/shortest_paths.rs | 1 + 4 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 benches/sp_benchmark.rs diff --git a/Cargo.toml b/Cargo.toml index c4125ad..7e8422a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,4 +22,8 @@ harness = false # To disable Rust's default benchmarking and use the Criterion o [[bench]] name = "cc_benchmark" +harness = false + +[[bench]] +name = "sp_benchmark" harness = false \ No newline at end of file diff --git a/benches/pagerank_benchmark.rs b/benches/pagerank_benchmark.rs index b3f6faa..e7d5491 100644 --- a/benches/pagerank_benchmark.rs +++ b/benches/pagerank_benchmark.rs @@ -1,4 +1,4 @@ -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; use graphframes_rs::util::create_ldbc_test_graph; use std::env; use tokio::runtime::Runtime; diff --git a/benches/sp_benchmark.rs b/benches/sp_benchmark.rs new file mode 100644 index 0000000..0d99e65 --- /dev/null +++ b/benches/sp_benchmark.rs @@ -0,0 +1,47 @@ +use criterion::{Criterion, criterion_group, criterion_main}; +use graphframes_rs::util::create_ldbc_test_graph; +use std::env; +use tokio::runtime::Runtime; + +fn benchmark_sp(c: &mut Criterion) { + let dataset_name = + env::var("BENCHMARK_DATASET").expect("BENCHMARK_DATASET environment variable not set"); + let checkpoint_interval: usize = env::var("CHECKPOINT_INTERVAL") + .expect("BENCHMARK_DATASET environment variable not set") + .parse() + .expect("CHECKPOINT_INTERVAL is not a valid int"); + + let is_weighted = match env::var("WEIGHTED").expect("WEIGHTED environment variable not set") { + s if s == "true" => true, + _ => false, + }; + let mut group = c.benchmark_group("ShortestPath"); + group.sample_size(10); + group.measurement_time(std::time::Duration::from_secs(200)); + + let rt = Runtime::new().unwrap(); + let graph = rt + .block_on(create_ldbc_test_graph(&dataset_name, true, is_weighted)) + .expect("Failed to create test graph"); + + let sp_builder = graph + .shortest_paths(vec![2i64]) // TODO: replace to read from props + .checkpoint_interval(checkpoint_interval); + + group.bench_function( + String::from( + "sp-".to_owned() + &dataset_name + "-cp-" + &checkpoint_interval.to_string(), + ), + |b| { + // Use the `to_async` adapter to benchmark an async function. + b.to_async(&rt).iter(|| async { + let _ = sp_builder.clone().run().await.unwrap().collect().await; + }) + }, + ); + + group.finish(); +} + +criterion_group!(benches, benchmark_sp); +criterion_main!(benches); diff --git a/src/shortest_paths.rs b/src/shortest_paths.rs index e390b45..2e399bd 100644 --- a/src/shortest_paths.rs +++ b/src/shortest_paths.rs @@ -105,6 +105,7 @@ impl Accumulator for DistancesMap { /// /// This builder helps configure and execute a Pregel algorithm that computes the shortest paths /// from all vertices in the graph to a specified set of landmark vertices. +#[derive(Debug, Clone)] pub struct ShortestPathsBuilder<'a> { /// Reference to the graph frame containing vertices and edges graph_frame: &'a GraphFrame, From fbf558e292735d7778a8a6a366eacc34b7b1d6c2 Mon Sep 17 00:00:00 2001 From: semyonsinchenko Date: Mon, 8 Sep 2025 18:41:24 +0200 Subject: [PATCH 4/4] Refactor shortest paths benchmark string construction --- benches/sp_benchmark.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/benches/sp_benchmark.rs b/benches/sp_benchmark.rs index 0d99e65..535f75d 100644 --- a/benches/sp_benchmark.rs +++ b/benches/sp_benchmark.rs @@ -29,9 +29,7 @@ fn benchmark_sp(c: &mut Criterion) { .checkpoint_interval(checkpoint_interval); group.bench_function( - String::from( - "sp-".to_owned() + &dataset_name + "-cp-" + &checkpoint_interval.to_string(), - ), + String::from("sp-".to_owned() + &dataset_name + "-cp-" + &checkpoint_interval.to_string()), |b| { // Use the `to_async` adapter to benchmark an async function. b.to_async(&rt).iter(|| async {