diff --git a/crates/exec-harness/preload/codspeed_preload.c b/crates/exec-harness/preload/codspeed_preload.c index 0b908e04..418af143 100644 --- a/crates/exec-harness/preload/codspeed_preload.c +++ b/crates/exec-harness/preload/codspeed_preload.c @@ -7,8 +7,6 @@ // // Environment variables: // CODSPEED_BENCH_URI - The benchmark URI to report (required) -// CODSPEED_PRELOAD_LOCK - Set by the first process to prevent child processes -// from re-initializing instrumentation #include #include @@ -22,8 +20,6 @@ #define RUNNING_ON_VALGRIND 0 #endif -static const char *LOCK_ENV = "CODSPEED_PRELOAD_LOCK"; - // These constants are defined by the build script (build.rs) via -D flags #ifndef CODSPEED_URI_ENV #error "CODSPEED_URI_ENV must be defined by the build system" @@ -54,14 +50,6 @@ __attribute__((constructor)) static void codspeed_preload_init(void) { return; } - // Check if another process already owns the instrumentation - if (getenv(LOCK_ENV)) { - return; - } - - // Set the lock to prevent child processes from initializing - setenv(LOCK_ENV, "1", 1); - g_bench_uri = getenv(URI_ENV); if (!g_bench_uri) { return; diff --git a/crates/exec-harness/src/analysis/mod.rs b/crates/exec-harness/src/analysis/mod.rs index 65e220d7..430a6ef9 100644 --- a/crates/exec-harness/src/analysis/mod.rs +++ b/crates/exec-harness/src/analysis/mod.rs @@ -6,6 +6,7 @@ use crate::BenchmarkCommand; use crate::constants; use crate::uri; use instrument_hooks_bindings::InstrumentHooks; +use std::path::PathBuf; use std::process::Command; mod ld_preload_check; @@ -54,9 +55,15 @@ pub fn perform_with_valgrind(commands: Vec) -> Result<()> { cmd.args(&benchmark_cmd.command[1..]); // Use LD_PRELOAD to inject instrumentation into the child process cmd.env("LD_PRELOAD", preload_lib_path); + // Make sure python processes output perf maps. This is usually done by `pytest-codspeed` + cmd.env("PYTHONPERFSUPPORT", "1"); cmd.env(constants::URI_ENV, &name_and_uri.uri); - let status = cmd.status().context("Failed to execute command")?; + let mut child = cmd.spawn().context("Failed to spawn command")?; + + let status = child.wait().context("Failed to execute command")?; + + bail_if_command_spawned_subprocesses_under_valgrind(child.id())?; if !status.success() { bail!("Command exited with non-zero status: {status}"); @@ -65,3 +72,48 @@ pub fn perform_with_valgrind(commands: Vec) -> Result<()> { Ok(()) } + +/// Checks if the benchmark process spawned subprocesses under valgrind by looking for .out +/// files in the profile folder. +/// +/// The presence of .out files where is greater than the benchmark process pid indicates +/// that the benchmark process spawned subprocesses. This .out file will be almost empty, with a 0 +/// cost reported due to the disabled instrumentation. +/// +/// We currently do not support measuring processes that spawn subprocesses under valgrind, because +/// valgrind will not have its instrumentation in the new process. +/// The LD_PRELOAD trick that we use to inject our instrumentation into the benchmark process only +/// works for the first process. +/// +/// TODO(COD-2163): Remove this once we support nested processes under valgrind +fn bail_if_command_spawned_subprocesses_under_valgrind(pid: u32) -> Result<()> { + let Some(profile_folder) = std::env::var_os("CODSPEED_PROFILE_FOLDER") else { + debug!("CODSPEED_PROFILE_FOLDER is not set, skipping subprocess detection"); + return Ok(()); + }; + + let profile_folder = PathBuf::from(profile_folder); + + // Bail if any .out where > pid of the benchmark process exists in the profile + // folder, which indicates that the benchmark process spawned subprocesses. + for entry in std::fs::read_dir(profile_folder)? { + let entry = entry?; + let file_name = entry.file_name(); + let file_name = file_name.to_string_lossy(); + + if let Some(stripped) = file_name.strip_suffix(".out") { + if let Ok(subprocess_pid) = stripped.parse::() { + if subprocess_pid > pid { + bail!( + "The codspeed CLI in CPU Simulation mode does not support measuring processes that spawn other processes yet.\n\n\ + Please either:\n\ + - Use the walltime measurement mode, or\n\ + - Benchmark a process that does not create subprocesses" + ) + } + } + } + } + + Ok(()) +} diff --git a/src/executor/helpers/harvest_perf_maps_for_pids.rs b/src/executor/helpers/harvest_perf_maps_for_pids.rs index 67270359..4ffda881 100644 --- a/src/executor/helpers/harvest_perf_maps_for_pids.rs +++ b/src/executor/helpers/harvest_perf_maps_for_pids.rs @@ -1,7 +1,7 @@ use crate::prelude::*; use std::collections::HashSet; -use std::fs; use std::path::{Path, PathBuf}; +use tokio::fs; pub async fn harvest_perf_maps_for_pids( profile_folder: &Path, @@ -21,7 +21,7 @@ pub async fn harvest_perf_maps_for_pids( debug!("Found {} perf maps", perf_maps.len()); for (src_path, dst_path) in perf_maps { - fs::copy(&src_path, &dst_path).map_err(|e| { + fs::copy(&src_path, &dst_path).await.map_err(|e| { anyhow!( "Failed to copy perf map file: {:?} to {}: {}", src_path.file_name(), diff --git a/src/executor/shared/fifo.rs b/src/executor/shared/fifo.rs index 40f2cc81..723f921c 100644 --- a/src/executor/shared/fifo.rs +++ b/src/executor/shared/fifo.rs @@ -69,7 +69,6 @@ impl GenericFifo { pub struct FifoBenchmarkData { /// Name and version of the integration pub integration: Option<(String, String)>, - pub bench_pids: HashSet, } pub struct RunnerFifo { @@ -255,10 +254,7 @@ impl RunnerFifo { ); let marker_result = ExecutionTimestamps::new(&bench_order_by_timestamp, &markers); - let fifo_data = FifoBenchmarkData { - integration, - bench_pids, - }; + let fifo_data = FifoBenchmarkData { integration }; return Ok((marker_result, fifo_data, exit_status)); } Err(e) => return Err(anyhow::Error::from(e)), diff --git a/src/executor/wall_time/perf/jit_dump.rs b/src/executor/wall_time/perf/jit_dump.rs index a337f263..9f92c4f4 100644 --- a/src/executor/wall_time/perf/jit_dump.rs +++ b/src/executor/wall_time/perf/jit_dump.rs @@ -102,7 +102,10 @@ impl JitDump { } /// Converts all the `jit-.dump` into unwind data and copies it to the profile folder. -pub async fn harvest_perf_jit_for_pids(profile_folder: &Path, pids: &HashSet) -> Result<()> { +pub async fn harvest_perf_jit_for_pids( + profile_folder: &Path, + pids: &HashSet, +) -> Result<()> { for pid in pids { let name = format!("jit-{pid}.dump"); let path = PathBuf::from("/tmp").join(&name); @@ -131,7 +134,7 @@ pub async fn harvest_perf_jit_for_pids(profile_folder: &Path, pids: &HashSet>( + pub async fn save_to>( &self, path: P, perf_file_path: P, ) -> Result<(), BenchmarkDataSaveError> { self.marker_result.save_to(&path).unwrap(); + let path_ref = path.as_ref(); + debug!("Reading perf data from file for mmap extraction"); let MemmapRecordsOutput { symbols_by_pid, @@ -294,7 +293,23 @@ impl BenchmarkData { })? }; - let path_ref = path.as_ref(); + // Harvest the perf maps generated by python. This will copy the perf + // maps from /tmp to the profile folder. We have to write our own perf + // maps to these files AFTERWARDS, otherwise it'll be overwritten! + let bench_pids = symbols_by_pid.keys().copied().collect(); + harvest_perf_maps_for_pids(path_ref, &bench_pids) + .await + .map_err(|e| { + error!("Failed to harvest perf maps: {e}"); + BenchmarkDataSaveError::FailedToHarvestPerfMaps + })?; + harvest_perf_jit_for_pids(path_ref, &bench_pids) + .await + .map_err(|e| { + error!("Failed to harvest jit dumps: {e}"); + BenchmarkDataSaveError::FailedToHarvestJitDumps + })?; + debug!("Saving symbols addresses"); symbols_by_pid.par_iter().for_each(|(_, proc_sym)| { proc_sym.save_to(path_ref).unwrap();