From d45759802586d1ffd7d6a396dd42a7e839a2f8ce Mon Sep 17 00:00:00 2001 From: Rebecca Turner Date: Sat, 23 Aug 2025 01:44:28 -0700 Subject: [PATCH] native nix-diff --- Cargo.lock | 40 +-- Cargo.toml | 3 +- src/diff_trees.rs | 31 +- src/id_hasher.rs | 45 +++ src/main.rs | 19 ++ src/nix/derivation/diff.rs | 271 ++++++++++++++++++ src/nix/derivation/mod.rs | 56 ++++ .../{derivation.rs => derivation/serde.rs} | 54 +--- src/nix/mod.rs | 2 +- src/strings_set.rs | 105 +++++++ 10 files changed, 517 insertions(+), 109 deletions(-) create mode 100644 src/id_hasher.rs create mode 100644 src/nix/derivation/diff.rs create mode 100644 src/nix/derivation/mod.rs rename src/nix/{derivation.rs => derivation/serde.rs} (50%) create mode 100644 src/strings_set.rs diff --git a/Cargo.lock b/Cargo.lock index 1515c80..5118045 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -100,7 +100,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi 0.1.19", + "hermit-abi", "libc", "winapi", ] @@ -373,12 +373,6 @@ dependencies = [ "libc", ] -[[package]] -name = "hermit-abi" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" - [[package]] name = "iddqd" version = "0.3.11" @@ -403,17 +397,6 @@ dependencies = [ "hashbrown", ] -[[package]] -name = "is-terminal" -version = "0.4.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" -dependencies = [ - "hermit-abi 0.5.2", - "libc", - "windows-sys 0.59.0", -] - [[package]] name = "is_ci" version = "1.2.0" @@ -590,6 +573,7 @@ dependencies = [ "serde_json", "shell-words", "shellexpand", + "similar", "tap", "thiserror", "toml", @@ -660,10 +644,6 @@ name = "owo-colors" version = "4.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48dd4f4a2c8405440fd0462561f0e5806bd0f77e86f51c761481bdd4018b545e" -dependencies = [ - "supports-color 2.1.0", - "supports-color 3.0.2", -] [[package]] name = "parking_lot" @@ -905,6 +885,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "similar" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" + [[package]] name = "smallvec" version = "1.15.1" @@ -933,16 +919,6 @@ dependencies = [ "is_ci", ] -[[package]] -name = "supports-color" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6398cde53adc3c4557306a96ce67b302968513830a77a95b2b17305d9719a89" -dependencies = [ - "is-terminal", - "is_ci", -] - [[package]] name = "supports-color" version = "3.0.2" diff --git a/Cargo.toml b/Cargo.toml index 83ffbb9..3483e91 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,13 +17,14 @@ gethostname = "1.0.2" iddqd = { version = "0.3.11", default-features = false, features = ["serde", "std"] } itertools = "0.14.0" miette = { version = "7.6.0", features = ["fancy"] } -owo-colors = { version = "4.2.2", features = ["supports-colors"] } +owo-colors = "4.2.2" rustc-hash = "2.1.1" same-file = "1.0.6" serde = { version = "1.0.190", features = ["derive"] } serde_json = "1.0.107" shell-words = "1.1.0" shellexpand = "3.1.1" +similar = { version = "2.7.0", features = ["inline"] } tap = "1.0.1" thiserror = "2.0.15" toml = "0.9.5" diff --git a/src/diff_trees.rs b/src/diff_trees.rs index 488113d..699b995 100644 --- a/src/diff_trees.rs +++ b/src/diff_trees.rs @@ -9,7 +9,6 @@ use camino::Utf8PathBuf; use miette::Context; use miette::IntoDiagnostic; use owo_colors::OwoColorize; -use owo_colors::Stream; use tap::TryConv; use walkdir::WalkDir; @@ -48,20 +47,12 @@ fn display_path_updates( ) -> String { let mut ret = String::new(); for path in removed_paths { - ret.push_str( - &format!("- {path}") - .if_supports_color(Stream::Stdout, |text| text.red()) - .to_string(), - ); + ret.push_str(&format!("- {path}").red().to_string()); ret.push('\n'); } for path in added_paths { - ret.push_str( - &format!("+ {path}") - .if_supports_color(Stream::Stdout, |text| text.green()) - .to_string(), - ); + ret.push_str(&format!("+ {path}").green().to_string()); ret.push('\n'); } @@ -77,21 +68,13 @@ fn display_diff(diff: &Diff) -> String { DiffKind::Same => {} DiffKind::Added => { let path = entry.format_path(path); - ret.push_str( - &format!("+ {path}") - .if_supports_color(Stream::Stdout, |text| text.green()) - .to_string(), - ); + ret.push_str(&format!("+ {path}").green().to_string()); ret.push('\n'); changed_entries += 1; } DiffKind::Removed => { let path = entry.format_path(path); - ret.push_str( - &format!("- {path}") - .if_supports_color(Stream::Stdout, |text| text.red()) - .to_string(), - ); + ret.push_str(&format!("- {path}").red().to_string()); ret.push('\n'); changed_entries += 1; } @@ -101,11 +84,7 @@ fn display_diff(diff: &Diff) -> String { } let path = entry.format_path(path); - ret.push_str( - &format!("~ {path}") - .if_supports_color(Stream::Stdout, |text| text.yellow()) - .to_string(), - ); + ret.push_str(&format!("~ {path}").yellow().to_string()); ret.push('\n'); changed_entries += 1; } diff --git a/src/id_hasher.rs b/src/id_hasher.rs new file mode 100644 index 0000000..3f86d6b --- /dev/null +++ b/src/id_hasher.rs @@ -0,0 +1,45 @@ +// Borrowed from `polars_core`: +// + +use std::hash::BuildHasher; +use std::hash::Hasher; + +#[derive(Default)] +pub struct IdHasher { + hash: u64, +} + +impl Hasher for IdHasher { + fn finish(&self) -> u64 { + self.hash + } + + fn write(&mut self, _bytes: &[u8]) { + unreachable!("IdHasher should only be used for integer keys <= 64 bit precision") + } + + fn write_u32(&mut self, i: u32) { + self.hash = i as u64; + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.hash = i; + } + + fn write_i32(&mut self, i: i32) { + self.hash = i as u64; + } + + fn write_i64(&mut self, i: i64) { + self.hash = i as u64; + } +} + +impl BuildHasher for IdHasher { + type Hasher = Self; + + fn build_hasher(&self) -> Self::Hasher { + Self::default() + } +} diff --git a/src/main.rs b/src/main.rs index b146215..5f93805 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +use camino::Utf8Path; use clap::Parser; mod app; @@ -7,8 +8,10 @@ mod diff_trees; mod directories; mod format_bulleted_list; mod fs; +mod id_hasher; mod nix; mod pins; +mod strings_set; mod tracing; mod which; @@ -18,6 +21,22 @@ use crate::app::App; use crate::config::Config; fn main() -> miette::Result<()> { + let nix = nix::Nix::new()?; + println!( + "{}", + nix::derivation::diff_derivations( + &nix, + &nix.derivation_info(Utf8Path::new( + "/nix/store/bgv2g1nwp7i0nibrpsagpxdmavi8bywv-npingler-packages.drv" + ))?, + &nix.derivation_info(Utf8Path::new( + "/nix/store/8n4z0fzzi19qli6b2rlwlddvgcqsws7j-npingler-packages.drv" + ))?, + )? + ); + + panic!("teehee :)"); + let opts = cli::Args::parse(); let filter_reload = tracing::install_tracing( opts.log_filter() diff --git a/src/nix/derivation/diff.rs b/src/nix/derivation/diff.rs new file mode 100644 index 0000000..91b5b75 --- /dev/null +++ b/src/nix/derivation/diff.rs @@ -0,0 +1,271 @@ +#![expect(dead_code)] + +use std::fmt::Display; +use std::hash::Hash; +use std::io::Write; +use std::ops::AddAssign; + +use miette::Context; +use miette::IntoDiagnostic; +use owo_colors::OwoColorize; +use similar::DiffOp; + +use crate::nix::Derivation; +use crate::nix::Nix; +use crate::strings_set::FxStringDiffSet; +use crate::strings_set::FxStringsDiffSet; +use crate::strings_set::SetAdd; + +pub fn diff_derivations(nix: &Nix, old: &Derivation, new: &Derivation) -> miette::Result { + const DERIVATIONS_TO_COMPARE: usize = 2048; + const RENDERED_DIFF_BYTES: usize = 4096; + let mut writer = Vec::with_capacity(RENDERED_DIFF_BYTES); + let mut state = DiffState { + nix, + old, + new, + builder_comparisons: FxStringDiffSet::with_capacity(DERIVATIONS_TO_COMPARE), + arg_comparisons: FxStringsDiffSet::with_capacity(DERIVATIONS_TO_COMPARE), + writer: &mut writer, + indent: String::new(), + }; + state.diff_one(old, new)?; + String::from_utf8(writer) + .into_diagnostic() + .wrap_err("derivation diff produced invalid UTF-8. uh oh!") +} + +enum DiffEvent<'d> { + Enter { + old: &'d Derivation, + new: &'d Derivation, + }, + AlreadyCompared { + description: &'d str, + }, + /// Cringe variant in my fail data structure. + /// This duplication feels bad !! + DiffOwned { + description: &'d str, + diff: RenderableDiff<'d, String>, + }, + DiffBorrowed { + description: &'d str, + diff: RenderableDiff<'d, &'d str>, + }, +} + +struct DiffState<'d, W> { + nix: &'d Nix, + old: &'d Derivation, + new: &'d Derivation, + builder_comparisons: FxStringDiffSet, + arg_comparisons: FxStringsDiffSet, + indent: String, + writer: W, +} + +impl<'d, W> DiffState<'d, W> +where + W: Write, +{ + fn diff_one( + &mut self, + old: &'d Derivation, + new: &'d Derivation, + ) -> miette::Result { + if old.path == new.path { + return Ok(DiffNovelty::Boring); + } + + self.emit(DiffEvent::Enter { old, new })?; + + let mut novelty = DiffNovelty::Boring; + + novelty += self.diff_builders(old.builder.as_str(), new.builder.as_str())?; + novelty += self.diff_args(&old.args, &new.args)?; + + // compare env + // compare input_drvs (drvs) + // compare input_srcs (store paths) + // compare outputs + // compare system + // + // did you see anything new for a particular derivation-tree? if no, collapse it! + // this requires bundling up the diff events i guess + + Ok(novelty) + } + + fn diff_builders(&mut self, old: &str, new: &str) -> miette::Result { + if old == new { + return Ok(DiffNovelty::Boring); + } + + if let SetAdd::AlreadyPresent = self.builder_comparisons.insert(old, new) { + self.emit(DiffEvent::AlreadyCompared { + description: "Builders", + })?; + return Ok(DiffNovelty::Boring); + } + + self.emit(DiffEvent::DiffBorrowed { + description: "Builders", + diff: RenderableDiff::diff(&[old], &[new]), + })?; + + Ok(DiffNovelty::Novel) + } + + fn diff_args(&mut self, old: &[String], new: &[String]) -> miette::Result { + if old == new { + return Ok(DiffNovelty::Boring); + } + + if let SetAdd::AlreadyPresent = self.arg_comparisons.insert((old, new)) { + self.emit(DiffEvent::AlreadyCompared { + description: "Builder args", + })?; + return Ok(DiffNovelty::Boring); + } + + self.emit(DiffEvent::DiffOwned { + description: "Builder args", + diff: RenderableDiff::diff(old, new), + })?; + + Ok(DiffNovelty::Novel) + } + + fn emit(&mut self, event: DiffEvent) -> miette::Result<()> { + match event { + DiffEvent::Enter { old, new } => { + self.line(format!("- {}", old.path).red())?; + self.line(format!("+ {}", new.path).green())?; + self.indent.push_str(" "); + } + DiffEvent::AlreadyCompared { description } => { + self.line(format_args!("{} already compared", description).yellow())?; + } + DiffEvent::DiffOwned { description, diff } => { + self.line(format_args!("{} changed:", description).bold())?; + self.render_diff(&diff)?; + } + DiffEvent::DiffBorrowed { description, diff } => { + self.line(format_args!("{} changed:", description).bold())?; + self.render_diff(&diff)?; + } + } + Ok(()) + } + + fn line(&mut self, line: impl Display) -> miette::Result<()> { + writeln!(self.writer, "{}{}", self.indent, line).into_diagnostic()?; + Ok(()) + } + + fn render_diff(&mut self, diff: &RenderableDiff<'_, impl AsRef>) -> miette::Result<()> { + // TODO: Customizability. + const CONTEXT: usize = 3; + + for op in diff.ops.iter().copied() { + match op { + DiffOp::Equal { + old_index: _, + new_index, + len, + } => { + let new_i_end = new_index + len; + if len > CONTEXT { + for new_i in new_index..new_index + CONTEXT { + self.line(&format!(" {}", diff.new[new_i].as_ref().dimmed()))?; + } + for new_i in new_i_end - CONTEXT..new_i_end { + self.line(&format!(" {}", diff.new[new_i].as_ref().dimmed()))?; + } + } else { + for new_i in new_index..new_i_end { + self.line(&format!(" {}", diff.new[new_i].as_ref().dimmed()))?; + } + } + } + DiffOp::Delete { + old_index, + old_len, + new_index: _, + } => { + for old_i in old_index..old_index + old_len { + self.line(&format!("- {}", diff.old[old_i].as_ref().red()))?; + } + } + DiffOp::Insert { + old_index: _, + new_index, + new_len, + } => { + for new_i in new_index..new_index + new_len { + self.line(&format!("+ {}", diff.new[new_i].as_ref().green()))?; + } + } + DiffOp::Replace { + old_index, + old_len, + new_index, + new_len, + } => { + for old_i in old_index..old_index + old_len { + self.line(&format!("- {}", diff.old[old_i].as_ref().red()))?; + } + for new_i in new_index..new_index + new_len { + self.line(&format!("+ {}", diff.new[new_i].as_ref().green()))?; + } + } + } + } + + Ok(()) + } +} + +fn diff_slices(old: &[T], new: &[T]) -> Vec +where + T: Eq + Hash + Ord, +{ + similar::capture_diff_slices(similar::Algorithm::Patience, old, new) +} + +struct RenderableDiff<'a, T> { + old: &'a [T], + new: &'a [T], + ops: Vec, +} + +impl<'a, T> RenderableDiff<'a, T> +where + T: AsRef + Hash + Ord, +{ + fn diff(old: &'a [T], new: &'a [T]) -> Self { + Self { + old, + new, + ops: diff_slices(old, new), + } + } +} + +/// Was anything "new" seen when running a diff? +#[derive(Debug, Clone, Copy)] +enum DiffNovelty { + /// Either nothing changed or nothing new changed. + Boring, + /// Something new changed. + Novel, +} + +impl AddAssign for DiffNovelty { + fn add_assign(&mut self, rhs: Self) { + if let DiffNovelty::Novel = rhs { + *self = DiffNovelty::Novel; + } + } +} diff --git a/src/nix/derivation/mod.rs b/src/nix/derivation/mod.rs new file mode 100644 index 0000000..1f77607 --- /dev/null +++ b/src/nix/derivation/mod.rs @@ -0,0 +1,56 @@ +use ::serde::Deserialize; +use camino::Utf8Path; +use camino::Utf8PathBuf; +use iddqd::IdHashItem; +use iddqd::IdHashMap; +use iddqd::id_upcast; +use rustc_hash::FxBuildHasher; +use rustc_hash::FxHashMap; + +mod diff; +mod serde; + +pub use diff::diff_derivations; + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +#[serde(from = "serde::DerivationsWire")] +pub struct Derivations(pub IdHashMap); + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Derivation { + pub path: Utf8PathBuf, + pub args: Vec, + /// I think these can be any path, but don't quote me on that? + pub builder: Utf8PathBuf, + pub env: FxHashMap, + pub input_drvs: FxHashMap, + pub input_srcs: Vec, + pub name: String, + /// Keys are output names, e.g. `out`, `man`, `bin`, etc. + pub outputs: FxHashMap, + /// Technically structured, but I don't think I care for this. + pub system: String, +} + +impl IdHashItem for Derivation { + type Key<'a> = &'a Utf8Path; + + fn key(&self) -> Self::Key<'_> { + &self.path + } + + id_upcast! {} +} +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Input { + // Always empty. Possibly by spec. + pub dynamic_outputs: serde_json::Value, + pub outputs: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Output { + pub path: Utf8PathBuf, +} diff --git a/src/nix/derivation.rs b/src/nix/derivation/serde.rs similarity index 50% rename from src/nix/derivation.rs rename to src/nix/derivation/serde.rs index b35c906..17d8549 100644 --- a/src/nix/derivation.rs +++ b/src/nix/derivation/serde.rs @@ -1,15 +1,11 @@ -use camino::Utf8Path; use camino::Utf8PathBuf; -use iddqd::IdHashItem; -use iddqd::IdHashMap; -use iddqd::id_upcast; -use rustc_hash::FxBuildHasher; use rustc_hash::FxHashMap; use serde::Deserialize; -#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] -#[serde(from = "DerivationsWire")] -pub struct Derivations(pub IdHashMap); +use crate::nix::Derivation; +use crate::nix::Derivations; +use crate::nix::derivation::Input; +use crate::nix::derivation::Output; impl From for Derivations { fn from(wire: DerivationsWire) -> Self { @@ -47,33 +43,7 @@ impl From for Derivations { } #[derive(Deserialize)] -struct DerivationsWire(FxHashMap); - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Derivation { - pub path: Utf8PathBuf, - pub args: Vec, - /// I think these can be any path, but don't quote me on that? - pub builder: Utf8PathBuf, - pub env: FxHashMap, - pub input_drvs: FxHashMap, - pub input_srcs: Vec, - pub name: String, - /// Keys are output names, e.g. `out`, `man`, `bin`, etc. - pub outputs: FxHashMap, - /// Technically structured, but I don't think I care for this. - pub system: String, -} - -impl IdHashItem for Derivation { - type Key<'a> = &'a Utf8Path; - - fn key(&self) -> Self::Key<'_> { - &self.path - } - - id_upcast! {} -} +pub struct DerivationsWire(FxHashMap); #[derive(Debug, Clone, PartialEq, Eq, Deserialize)] #[serde(rename_all = "camelCase")] @@ -87,17 +57,3 @@ struct DerivationWire { outputs: FxHashMap, system: String, } - -#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct Input { - // Always empty. Possibly by spec. - pub dynamic_outputs: serde_json::Value, - pub outputs: Vec, -} - -#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct Output { - pub path: Utf8PathBuf, -} diff --git a/src/nix/mod.rs b/src/nix/mod.rs index 634e0fd..d027b23 100644 --- a/src/nix/mod.rs +++ b/src/nix/mod.rs @@ -18,7 +18,7 @@ use utf8_command::Utf8Output; mod registry; pub use registry::Registry; -mod derivation; +pub mod derivation; pub use derivation::Derivation; pub use derivation::Derivations; diff --git a/src/strings_set.rs b/src/strings_set.rs new file mode 100644 index 0000000..382a4cb --- /dev/null +++ b/src/strings_set.rs @@ -0,0 +1,105 @@ +use std::collections::HashSet; +use std::hash::Hasher; + +use rustc_hash::FxHasher; + +use crate::id_hasher::IdHasher; + +/// Set of hashed `(old, new)` comparisons. +/// We only keep the `u64`s to save data and lifetime headaches. +/// +/// I do NOT know enough about hashes to write this correctly. Maybe. +pub struct FxStringsDiffSet(HashSet); + +impl FxStringsDiffSet { + pub fn with_capacity(capacity: usize) -> Self { + // We're only inserting hashes to the set directly! + Self(HashSet::with_capacity_and_hasher( + capacity, + IdHasher::default(), + )) + } + + pub fn insert(&mut self, (old, new): (IntoIter, IntoIter)) -> SetAdd + where + IntoIter: IntoIterator, + Iter: Iterator + ExactSizeIterator, + Item: AsRef, + { + let mut hasher = FxHasher::default(); + for into_iter in [old, new] { + let iter = into_iter.into_iter(); + hasher.write_usize(iter.len()); + for item in iter { + hasher.write(item.as_ref().as_bytes()); + } + } + + let hash = hasher.finish(); + + if self.0.insert(hash) { + SetAdd::New + } else { + SetAdd::AlreadyPresent + } + } +} + +pub struct FxStringsSet(HashSet); + +impl FxStringsSet { + pub fn insert(&mut self, items: IntoIter) -> SetAdd + where + IntoIter: IntoIterator, + Iter: Iterator + ExactSizeIterator, + Item: AsRef, + { + let mut hasher = FxHasher::default(); + let iter = items.into_iter(); + hasher.write_usize(iter.len()); + for item in iter { + hasher.write(item.as_ref().as_bytes()); + } + let hash = hasher.finish(); + + if self.0.insert(hash) { + SetAdd::New + } else { + SetAdd::AlreadyPresent + } + } +} + +pub struct FxStringDiffSet(HashSet); + +impl FxStringDiffSet { + pub fn with_capacity(capacity: usize) -> Self { + // We're only inserting hashes to the set directly! + Self(HashSet::with_capacity_and_hasher( + capacity, + IdHasher::default(), + )) + } + + pub fn insert(&mut self, old: impl AsRef, new: impl AsRef) -> SetAdd { + let mut hasher = FxHasher::default(); + hasher.write(old.as_ref().as_bytes()); + hasher.write(new.as_ref().as_bytes()); + let hash = hasher.finish(); + + if self.0.insert(hash) { + SetAdd::New + } else { + SetAdd::AlreadyPresent + } + } +} + +/// Was an item added to a set? +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SetAdd { + /// Nothing was added, the item was already present. + AlreadyPresent, + /// The item was newly-added. + New, +}