From 18fb8ac42ed4b755b2cf81fbaddcc4630d920444 Mon Sep 17 00:00:00 2001 From: Harsh Tripathi Date: Sat, 23 May 2026 03:29:41 +0530 Subject: [PATCH] Merger Engine --- README.md | 7 +- SPEC.md | 32 +- crates/memora-cli/src/cli.rs | 56 +++ crates/memora-cli/src/commands/merge.rs | 161 +++++++ crates/memora-cli/src/commands/mod.rs | 2 + crates/memora-cli/tests/cli.rs | 129 ++++++ crates/memora-core/src/commit.rs | 30 +- crates/memora-core/src/lib.rs | 7 +- crates/memora-core/src/merge.rs | 555 ++++++++++++++++++++++++ crates/memora-core/src/repo.rs | 439 ++++++++++++++++++- crates/memora-core/src/store/db.rs | 101 ++++- crates/memora-core/src/store/schema.sql | 12 + docs/ARCHITECTURE.md | 33 +- 13 files changed, 1527 insertions(+), 37 deletions(-) create mode 100644 crates/memora-cli/src/commands/merge.rs create mode 100644 crates/memora-core/src/merge.rs diff --git a/README.md b/README.md index 18cf92a..3ad44a7 100644 --- a/README.md +++ b/README.md @@ -108,13 +108,14 @@ Ephemeral ──promote──▶ Stable ──gc──▶ Deprecated | `memora status` | Show what has changed since HEAD. | | `memora log [--oneline] [-n N]` | Print commit history. | | `memora branch [NAME]` | List or create branches. | -| `memora switch NAME` | Move HEAD to an existing branch. | +| `memora switch NAME` | Move HEAD to an existing branch (working set follows). | | `memora rollback --to ` | Reset HEAD to a previous commit (auto-checkpoint first). | | `memora promote --id \| --type \| --all-confirmed [T]` | Promote ephemeral nodes to stable. | | `memora diff [FROM] [TO] [--working] [--semantic]` | Show belief changes between two revisions. | +| `memora merge BRANCH [--strategy auto\|ours\|theirs] [--no-ff] [--no-commit] [--dry-run]` | Three-way merge another branch into HEAD. | -Future phases add `merge`, `replay`, `export`, `import`, `gc`, `push`, -`pull`. See `SPEC.md` for the full roadmap. +Future phases add `replay`, `export`, `import`, `gc`, `push`, `pull`. See +`SPEC.md` for the full roadmap. --- diff --git a/SPEC.md b/SPEC.md index efbb4cd..b806662 100644 --- a/SPEC.md +++ b/SPEC.md @@ -116,6 +116,13 @@ CREATE TABLE node_versions ( PRIMARY KEY (commit_id, node_id) ); +CREATE TABLE merge_parents ( + commit_id TEXT NOT NULL REFERENCES commits(id) ON DELETE CASCADE, + parent_id TEXT NOT NULL, + sequence INTEGER NOT NULL, + PRIMARY KEY (commit_id, sequence) +); + CREATE TABLE sessions ( id TEXT PRIMARY KEY, started_at INTEGER NOT NULL, @@ -135,8 +142,14 @@ CREATE TABLE session_events ( Indexes are `idx_nodes_kind`, `idx_nodes_status`, `idx_nodes_updated`, `idx_commits_parent`, `idx_commits_ts`, `idx_commit_nodes_node`, -`idx_node_versions_node`, `idx_session_events_session`. They are advisory: -any tool may rebuild them. +`idx_node_versions_node`, `idx_merge_parents_commit`, +`idx_session_events_session`. They are advisory: any tool may rebuild them. + +A commit with two or more parents is a **merge commit**. The *first* +parent stays in `commits.parent_id` so the canonical first-parent log walk +keeps working. Any additional parents live in `merge_parents`, ordered by +`sequence`. The full parent set of a commit is therefore +`{commits.parent_id} ∪ {merge_parents.parent_id WHERE commit_id = …}`. `PRAGMA foreign_keys = ON` and `PRAGMA journal_mode = WAL` are required for correctness and concurrency. @@ -152,6 +165,21 @@ correctness and concurrency. - **Commit id**: lowercase hex SHA-256 of `"v1\nparent:\ntree:\nauthor:\nts:\nmsg:"`. +For a merge commit the `parent` line above contains the *first* parent +only. Every additional parent is appended on its own `parentN:` line +in sequence order, e.g.: + +``` +v1 +parent: +parent2: +parent3: +tree: +author: +ts: +msg: +``` + These are the canonical formulas. Implementations MUST produce identical ids for identical inputs. diff --git a/crates/memora-cli/src/cli.rs b/crates/memora-cli/src/cli.rs index 048d4a7..28d6fe1 100644 --- a/crates/memora-cli/src/cli.rs +++ b/crates/memora-cli/src/cli.rs @@ -53,6 +53,9 @@ pub enum Command { /// Show what changed between two commits (or commit vs working set). Diff(DiffArgs), + + /// Merge another branch (or commit) into HEAD. + Merge(MergeArgs), } /// Arguments for `memora init`. @@ -198,3 +201,56 @@ pub struct DiffArgs { #[arg(long)] pub semantic: bool, } + +/// Arguments for `memora merge`. +#[derive(Debug, clap::Args)] +pub struct MergeArgs { + /// Branch (or commit) whose memory to merge into HEAD. + #[arg(value_name = "BRANCH")] + pub branch: String, + + /// Strategy for resolving same-id divergences. + #[arg(long, value_enum, default_value_t = MergeStrategyArg::Auto)] + pub strategy: MergeStrategyArg, + + /// Disable fast-forward; always create a merge commit. + #[arg(long = "no-ff")] + pub no_ff: bool, + + /// Apply the merge to the working set without committing. + #[arg(long = "no-commit")] + pub no_commit: bool, + + /// Override the auto-generated merge commit message. + #[arg(short = 'm', long, value_name = "MESSAGE")] + pub message: Option, + + /// Plan only — print what would happen and exit without changing anything. + #[arg(long = "dry-run")] + pub dry_run: bool, + + /// Author for the merge commit. + #[arg(long, default_value = "human")] + pub author: String, +} + +/// Convenience clap enum mirroring [`memora_core::MergeStrategy`]. +#[derive(Debug, Clone, Copy, clap::ValueEnum)] +pub enum MergeStrategyArg { + /// Score the two sides; mark genuine ties as conflicts. + Auto, + /// On any divergence, keep `ours`. + Ours, + /// On any divergence, keep `theirs`. + Theirs, +} + +impl From for memora_core::MergeStrategy { + fn from(v: MergeStrategyArg) -> Self { + match v { + MergeStrategyArg::Auto => memora_core::MergeStrategy::Auto, + MergeStrategyArg::Ours => memora_core::MergeStrategy::Ours, + MergeStrategyArg::Theirs => memora_core::MergeStrategy::Theirs, + } + } +} diff --git a/crates/memora-cli/src/commands/merge.rs b/crates/memora-cli/src/commands/merge.rs new file mode 100644 index 0000000..bf001e0 --- /dev/null +++ b/crates/memora-cli/src/commands/merge.rs @@ -0,0 +1,161 @@ +//! `memora merge` — three-way merge another branch into HEAD. + +use std::env; + +use anyhow::Result; + +use memora_core::{MergeKind, MergeOptions, MergeStrategy, NodeDecision, Repository}; + +use crate::cli::MergeArgs; +use crate::ui::{bold, cyan, dim, green, red, short_id, yellow}; + +/// Entry point for the `merge` subcommand. +pub fn run(args: MergeArgs) -> Result<()> { + let cwd = env::current_dir()?; + let repo = Repository::open_from(&cwd)?; + let strategy: MergeStrategy = args.strategy.into(); + + if args.dry_run { + let plan = repo.plan_merge(&args.branch, strategy)?; + print_plan_header(&plan, args.no_ff); + print_plan_body(&plan); + return Ok(()); + } + + let opts = MergeOptions { + strategy, + allow_fast_forward: !args.no_ff, + commit: !args.no_commit, + message: args.message, + author: args.author, + }; + let outcome = repo.merge(&args.branch, opts)?; + + match outcome.kind { + MergeKind::AlreadyUpToDate => { + println!("{}", dim("Already up to date.")); + } + MergeKind::FastForward => { + let target = outcome + .commit + .as_ref() + .map(|c| short_id(&c.id).to_string()) + .unwrap_or_else(|| "(unknown)".to_string()); + println!( + "{} {} → {}", + bold(green("Fast-forwarded")), + short_id(&outcome.plan.ours), + yellow(target), + ); + } + MergeKind::Merged => { + println!( + "{} {} into {}", + bold(green("Merged")), + short_id(&outcome.plan.theirs), + short_id(&outcome.plan.ours), + ); + if let Some(c) = &outcome.commit { + println!(" merge commit: {}", yellow(short_id(&c.id))); + } + print_plan_body(&outcome.plan); + } + MergeKind::Conflicts => { + println!( + "{} merge completed with conflicts", + bold(red("Conflicts:")) + ); + if let Some(c) = &outcome.commit { + println!(" merge commit: {}", yellow(short_id(&c.id))); + } + print_plan_body(&outcome.plan); + println!( + "{}", + dim("conflicted nodes were marked Conflicted in the working set; resolve manually then commit.") + ); + } + MergeKind::NoCommit => { + println!("{}", bold(yellow("Plan applied to working set, no commit created."))); + print_plan_body(&outcome.plan); + } + } + + Ok(()) +} + +fn print_plan_header(plan: &memora_core::MergePlan, no_ff: bool) { + println!( + "{} {} ← {}", + bold("merge plan"), + short_id(&plan.ours), + short_id(&plan.theirs), + ); + if let Some(base) = &plan.base { + println!(" base: {}", short_id(base)); + } else { + println!(" base: {}", dim("(unrelated histories)")); + } + if plan.already_up_to_date { + println!("{}", dim(" → already up to date")); + } else if plan.can_fast_forward && !no_ff { + println!("{}", dim(" → fast-forward possible")); + } +} + +fn print_plan_body(plan: &memora_core::MergePlan) { + let mut updates = 0; + let mut removes = 0; + let mut conflicts = 0; + let mut auto_picks = Vec::new(); + let mut conflict_lines = Vec::new(); + for entry in &plan.entries { + match &entry.decision { + NodeDecision::Unchanged => {} + NodeDecision::TakeOurs(_) | NodeDecision::TakeTheirs(_) => updates += 1, + NodeDecision::Auto { ours_won, reason } => { + updates += 1; + auto_picks.push(format!( + " {} {} {} ({})", + if *ours_won { + bold(green("ours")) + } else { + bold(cyan("theirs")) + }, + short_id(&entry.id), + entry + .resolved + .as_ref() + .map(|n| n.kind.to_string()) + .unwrap_or_default(), + dim(reason), + )); + } + NodeDecision::Conflicted { reason } => { + conflicts += 1; + conflict_lines.push(format!( + " {} {} ({})", + bold(red("conflict")), + short_id(&entry.id), + dim(reason), + )); + } + NodeDecision::Removed => removes += 1, + } + } + + let summary = format!("{updates} updates · {removes} removed · {conflicts} conflicted"); + println!(" {summary}"); + + if !auto_picks.is_empty() { + println!("\n{}", bold("auto-resolved:")); + for line in auto_picks { + println!("{line}"); + } + } + if !conflict_lines.is_empty() { + println!("\n{}", bold("conflicts:")); + for line in conflict_lines { + println!("{line}"); + } + } +} diff --git a/crates/memora-cli/src/commands/mod.rs b/crates/memora-cli/src/commands/mod.rs index f083164..10c7e55 100644 --- a/crates/memora-cli/src/commands/mod.rs +++ b/crates/memora-cli/src/commands/mod.rs @@ -14,6 +14,7 @@ mod commit; mod diff; mod init; mod log; +mod merge; mod promote; mod rollback; mod status; @@ -32,5 +33,6 @@ pub fn dispatch(cli: Cli) -> Result<()> { Command::Rollback(args) => rollback::run(args), Command::Promote(args) => promote::run(args), Command::Diff(args) => diff::run(args), + Command::Merge(args) => merge::run(args), } } diff --git a/crates/memora-cli/tests/cli.rs b/crates/memora-cli/tests/cli.rs index 0d595e3..fc49f60 100644 --- a/crates/memora-cli/tests/cli.rs +++ b/crates/memora-cli/tests/cli.rs @@ -283,3 +283,132 @@ fn diff_against_working_set_picks_up_uncommitted_changes() { .stdout(predicate::str::contains("Added:")) .stdout(predicate::str::contains("v2")); } + + +#[test] +fn merge_clean_three_way_via_cli() { + let tmp = tempdir().unwrap(); + let path = tmp.path(); + + memora().arg("init").current_dir(path).assert().success(); + memora() + .args(["add", "--type", "project", "--content", "shared", "--source", "code-read"]) + .current_dir(path) + .assert() + .success(); + memora() + .args(["commit", "-m", "base"]) + .current_dir(path) + .assert() + .success(); + memora() + .args(["branch", "feature"]) + .current_dir(path) + .assert() + .success(); + // diverge: feature side + memora() + .args(["switch", "feature"]) + .current_dir(path) + .assert() + .success(); + memora() + .args([ + "add", "--type", "semantic", "--content", "auth uses jwt", "--source", "code-read", + ]) + .current_dir(path) + .assert() + .success(); + memora() + .args(["commit", "-m", "feat"]) + .current_dir(path) + .assert() + .success(); + // diverge: main side + memora() + .args(["switch", "main"]) + .current_dir(path) + .assert() + .success(); + memora() + .args([ + "add", + "--type", + "preference", + "--content", + "verbose errors", + "--source", + "manual", + ]) + .current_dir(path) + .assert() + .success(); + memora() + .args(["commit", "-m", "pref"]) + .current_dir(path) + .assert() + .success(); + // merge feature into main + memora() + .args(["merge", "feature"]) + .current_dir(path) + .assert() + .success() + .stdout(predicate::str::contains("Merged")); +} + +#[test] +fn merge_dry_run_does_not_change_anything() { + let tmp = tempdir().unwrap(); + let path = tmp.path(); + memora().arg("init").current_dir(path).assert().success(); + memora() + .args(["add", "--type", "project", "--content", "x", "--source", "code-read"]) + .current_dir(path) + .assert() + .success(); + memora() + .args(["commit", "-m", "c1"]) + .current_dir(path) + .assert() + .success(); + memora() + .args(["branch", "feature"]) + .current_dir(path) + .assert() + .success(); + memora() + .args(["merge", "feature", "--dry-run"]) + .current_dir(path) + .assert() + .success() + .stdout(predicate::str::contains("merge plan")); +} + +#[test] +fn merge_already_up_to_date_via_cli() { + let tmp = tempdir().unwrap(); + let path = tmp.path(); + memora().arg("init").current_dir(path).assert().success(); + memora() + .args(["add", "--type", "project", "--content", "x", "--source", "code-read"]) + .current_dir(path) + .assert() + .success(); + memora() + .args(["commit", "-m", "c1"]) + .current_dir(path) + .assert() + .success(); + memora() + .args(["branch", "feature"]) + .current_dir(path) + .assert() + .success(); + memora() + .args(["merge", "feature"]) + .current_dir(path) + .assert() + .success() + .stdout(predicate::str::contains("Already up to date")); +} diff --git a/crates/memora-core/src/commit.rs b/crates/memora-core/src/commit.rs index 01365ce..5a9d30f 100644 --- a/crates/memora-core/src/commit.rs +++ b/crates/memora-core/src/commit.rs @@ -109,14 +109,30 @@ pub fn commit_id( message: &str, timestamp: i64, ) -> String { - let canonical = format!( - "v1\nparent:{}\ntree:{}\nauthor:{}\nts:{}\nmsg:{}", - parent.unwrap_or(""), - tree_id, - author, - timestamp, - message, + commit_id_with_parents(parent, &[], tree_id, author, message, timestamp) +} + +/// Same as [`commit_id`] but takes any number of *additional* parents past +/// the first. The list is ordered: the first element is parent #2, etc. +/// Used by merge commits. +pub fn commit_id_with_parents( + first_parent: Option<&str>, + extra_parents: &[String], + tree_id: &str, + author: &str, + message: &str, + timestamp: i64, +) -> String { + let mut canonical = format!( + "v1\nparent:{}\n", + first_parent.unwrap_or(""), ); + for (idx, p) in extra_parents.iter().enumerate() { + canonical.push_str(&format!("parent{}:{}\n", idx + 2, p)); + } + canonical.push_str(&format!( + "tree:{tree_id}\nauthor:{author}\nts:{timestamp}\nmsg:{message}", + )); sha256_hex(canonical.as_bytes()) } diff --git a/crates/memora-core/src/lib.rs b/crates/memora-core/src/lib.rs index c5937c1..f3ff1ce 100644 --- a/crates/memora-core/src/lib.rs +++ b/crates/memora-core/src/lib.rs @@ -31,6 +31,7 @@ pub mod commit; pub mod error; pub mod hash; +pub mod merge; pub mod node; pub mod repo; pub mod store; @@ -38,8 +39,12 @@ pub mod time; pub use commit::{CommitStats, MemoryCommit}; pub use error::{MemoraError, Result}; +pub use merge::{MergeEntry, MergePlan, MergeStrategy, NodeDecision}; pub use node::{MemoryKind, MemoryNode, MemorySource, MemoryStatus}; -pub use repo::{DiffReport, ModifiedNode, NodeChange, PromotePlan, Repository}; +pub use repo::{ + DiffReport, MergeKind, MergeOptions, MergeOutcome, ModifiedNode, NodeChange, PromotePlan, + Repository, +}; /// On-disk format version written into `.memora/config`. Bumped whenever the /// schema or directory layout changes in a non backwards-compatible way. diff --git a/crates/memora-core/src/merge.rs b/crates/memora-core/src/merge.rs new file mode 100644 index 0000000..186f428 --- /dev/null +++ b/crates/memora-core/src/merge.rs @@ -0,0 +1,555 @@ +//! Three-way merge engine. +//! +//! Given two commits `ours` and `theirs`, the engine: +//! +//! 1. Finds their merge base (the most recent common ancestor across the +//! full parent DAG, including merge commits). +//! 2. Loads the per-node snapshot of all three (`base`, `ours`, `theirs`). +//! 3. Computes a per-node decision using a small precedence ladder: +//! confidence → source priority → status priority → recency. +//! 4. Returns a [`MergePlan`] the [`Repository`](crate::repo::Repository) +//! can apply to its working set. +//! +//! Phase 3 v0.1 detects only **same-id** divergence. Two nodes that +//! describe the same fact under different ids will *not* be flagged as +//! conflicting; that needs semantic-overlap detection (Phase 4+). + +use std::collections::{HashMap, HashSet, VecDeque}; + +use crate::error::Result; +use crate::node::{MemoryNode, MemorySource, MemoryStatus}; +use crate::store::Store; + +/// Strategy for resolving a same-id divergence between `ours` and `theirs`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum MergeStrategy { + /// Score the two sides and pick the winner; mark genuine ties as + /// `Conflicted` and leave them for the user. + Auto, + /// On any divergence, keep the `ours` version. + Ours, + /// On any divergence, keep the `theirs` version. + Theirs, +} + +/// What the merge plan decided to do with a single node id. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum NodeDecision { + /// The node is unchanged across all three sides; keep it as-is. + Unchanged, + /// One side modified the node, the other didn't — take the modified + /// version. The string is a short reason for tooling / tests. + TakeOurs(String), + /// Symmetric to `TakeOurs`. + TakeTheirs(String), + /// Both sides modified the node, auto-resolution picked a winner. + Auto { + /// `true` if `ours` won, `false` if `theirs` won. + ours_won: bool, + /// Short reason such as "higher confidence" or "code_read > model_inference". + reason: String, + }, + /// Both sides modified the node, scores tied, the result is marked + /// `Conflicted` and surfaced to the user. + Conflicted { + /// Reason the tie could not be auto-resolved. + reason: String, + }, + /// Node existed in `base` and was deleted by both sides, or by one side + /// while the other left it unchanged. + Removed, +} + +/// One entry in a [`MergePlan`]. +#[derive(Debug, Clone)] +pub struct MergeEntry { + /// Node id this decision applies to. + pub id: String, + /// What we decided. + pub decision: NodeDecision, + /// The node value to write into the working set. `None` means delete. + pub resolved: Option, +} + +/// What the merge engine wants the repository to do. +#[derive(Debug, Clone)] +pub struct MergePlan { + /// Resolved id of the merge base, or `None` if there is no common + /// ancestor (two completely unrelated histories — rare but possible). + pub base: Option, + /// `ours` commit id. + pub ours: String, + /// `theirs` commit id. + pub theirs: String, + /// Per-node decisions. + pub entries: Vec, + /// True if every entry is `Unchanged` — i.e. the merge is a no-op. + pub identical: bool, + /// True if `theirs` is reachable from `ours` (already up-to-date). + pub already_up_to_date: bool, + /// True if `ours` is reachable from `theirs` (fast-forward possible). + pub can_fast_forward: bool, +} + +impl MergePlan { + /// All conflict entries — empty when the merge is clean. + pub fn conflicts(&self) -> Vec<&MergeEntry> { + self.entries + .iter() + .filter(|e| matches!(e.decision, NodeDecision::Conflicted { .. })) + .collect() + } + + /// Return only the entries whose resolved value should land in the + /// working set (i.e. everything except `Unchanged` and `Removed`-with-no-value). + pub fn writable(&self) -> impl Iterator { + self.entries + .iter() + .filter(|e| !matches!(e.decision, NodeDecision::Unchanged)) + } + + /// True if any entry is `Conflicted`. + pub fn has_conflicts(&self) -> bool { + self.entries + .iter() + .any(|e| matches!(e.decision, NodeDecision::Conflicted { .. })) + } +} + +/// Plan a merge of `theirs` into `ours`. Pure: does not mutate the store. +pub fn plan_merge( + store: &Store, + ours: &str, + theirs: &str, + strategy: MergeStrategy, +) -> Result { + if ours == theirs { + return Ok(MergePlan { + base: Some(ours.to_string()), + ours: ours.to_string(), + theirs: theirs.to_string(), + entries: Vec::new(), + identical: true, + already_up_to_date: true, + can_fast_forward: false, + }); + } + + // Reachability from ours and theirs (used both for ff detection and + // for finding the merge base). + let ours_anc = ancestors(store, ours)?; + let theirs_anc = ancestors(store, theirs)?; + + let already_up_to_date = ours_anc.contains(theirs); + let can_fast_forward = theirs_anc.contains(ours); + + let base = merge_base(store, ours, theirs, &ours_anc, &theirs_anc)?; + + if already_up_to_date { + return Ok(MergePlan { + base, + ours: ours.to_string(), + theirs: theirs.to_string(), + entries: Vec::new(), + identical: true, + already_up_to_date: true, + can_fast_forward: false, + }); + } + + // Load the three snapshots (base may be missing for unrelated histories). + let base_nodes = match base.as_deref() { + Some(b) => store.commit_node_versions(b)?, + None => Vec::new(), + }; + let ours_nodes = store.commit_node_versions(ours)?; + let theirs_nodes = store.commit_node_versions(theirs)?; + + let base_map: HashMap = + base_nodes.into_iter().map(|n| (n.id.clone(), n)).collect(); + let ours_map: HashMap = + ours_nodes.into_iter().map(|n| (n.id.clone(), n)).collect(); + let theirs_map: HashMap = + theirs_nodes.into_iter().map(|n| (n.id.clone(), n)).collect(); + + let mut all_ids: HashSet<&String> = HashSet::new(); + all_ids.extend(base_map.keys()); + all_ids.extend(ours_map.keys()); + all_ids.extend(theirs_map.keys()); + + let mut sorted: Vec<&String> = all_ids.into_iter().collect(); + sorted.sort(); + + let mut entries = Vec::with_capacity(sorted.len()); + for id in sorted { + let b = base_map.get(id); + let o = ours_map.get(id); + let t = theirs_map.get(id); + let entry = decide(id, b, o, t, strategy); + entries.push(entry); + } + + Ok(MergePlan { + base, + ours: ours.to_string(), + theirs: theirs.to_string(), + entries, + identical: false, + already_up_to_date: false, + can_fast_forward, + }) +} + +/// Per-node decision logic for the three-way merge. +fn decide( + id: &str, + base: Option<&MemoryNode>, + ours: Option<&MemoryNode>, + theirs: Option<&MemoryNode>, + strategy: MergeStrategy, +) -> MergeEntry { + use NodeDecision::*; + + match (base, ours, theirs) { + // Existed nowhere → impossible to reach; skip safely. + (None, None, None) => MergeEntry { + id: id.to_string(), + decision: Unchanged, + resolved: None, + }, + // Same on both sides (whether existing or both deleted) — unchanged. + (_, None, None) => MergeEntry { + id: id.to_string(), + decision: Removed, + resolved: None, + }, + // Only ours has it: either a new add on our side, or theirs deleted. + (b, Some(o), None) => match b { + None => MergeEntry { + id: id.to_string(), + decision: TakeOurs("added by ours".into()), + resolved: Some(o.clone()), + }, + Some(bn) if state_eq(bn, o) => MergeEntry { + id: id.to_string(), + // theirs deleted, ours unchanged — accept the deletion. + decision: Removed, + resolved: None, + }, + Some(_) => { + // ours modified, theirs deleted — modify-vs-delete is a conflict. + conflict_or_strategy(id, ours, theirs, strategy, "modify on ours, delete on theirs") + } + }, + // Symmetric to the previous arm. + (b, None, Some(t)) => match b { + None => MergeEntry { + id: id.to_string(), + decision: TakeTheirs("added by theirs".into()), + resolved: Some(t.clone()), + }, + Some(bn) if state_eq(bn, t) => MergeEntry { + id: id.to_string(), + decision: Removed, + resolved: None, + }, + Some(_) => { + conflict_or_strategy(id, ours, theirs, strategy, "delete on ours, modify on theirs") + } + }, + // In both ours and theirs. + (b, Some(o), Some(t)) => { + if state_eq(o, t) { + return MergeEntry { + id: id.to_string(), + decision: Unchanged, + resolved: Some(o.clone()), + }; + } + // Different on the two sides — did each side change? + let ours_changed = b.map(|bn| !state_eq(bn, o)).unwrap_or(true); + let theirs_changed = b.map(|bn| !state_eq(bn, t)).unwrap_or(true); + if ours_changed && !theirs_changed { + return MergeEntry { + id: id.to_string(), + decision: TakeOurs("only ours changed".into()), + resolved: Some(o.clone()), + }; + } + if theirs_changed && !ours_changed { + return MergeEntry { + id: id.to_string(), + decision: TakeTheirs("only theirs changed".into()), + resolved: Some(t.clone()), + }; + } + // Both changed. + match strategy { + MergeStrategy::Ours => MergeEntry { + id: id.to_string(), + decision: TakeOurs("strategy=ours".into()), + resolved: Some(o.clone()), + }, + MergeStrategy::Theirs => MergeEntry { + id: id.to_string(), + decision: TakeTheirs("strategy=theirs".into()), + resolved: Some(t.clone()), + }, + MergeStrategy::Auto => auto_resolve(id, o, t), + } + } + } +} + +fn conflict_or_strategy( + id: &str, + ours: Option<&MemoryNode>, + theirs: Option<&MemoryNode>, + strategy: MergeStrategy, + reason: &str, +) -> MergeEntry { + match strategy { + MergeStrategy::Ours => MergeEntry { + id: id.to_string(), + decision: NodeDecision::TakeOurs(format!("{reason} (strategy=ours)")), + resolved: ours.cloned(), + }, + MergeStrategy::Theirs => MergeEntry { + id: id.to_string(), + decision: NodeDecision::TakeTheirs(format!("{reason} (strategy=theirs)")), + resolved: theirs.cloned(), + }, + MergeStrategy::Auto => MergeEntry { + id: id.to_string(), + decision: NodeDecision::Conflicted { + reason: reason.to_string(), + }, + // Surface the conflict by promoting the surviving side's body + // (whichever exists) and flipping its status to Conflicted. + resolved: ours + .or(theirs) + .cloned() + .map(|mut n| { + n.status = MemoryStatus::Conflicted; + n + }), + }, + } +} + +/// Score-based auto-resolution for both-sides-changed. +fn auto_resolve(id: &str, ours: &MemoryNode, theirs: &MemoryNode) -> MergeEntry { + use std::cmp::Ordering; + let oc = ours.confidence; + let tc = theirs.confidence; + if (oc - tc).abs() > 0.001 { + return if oc > tc { + MergeEntry { + id: id.to_string(), + decision: NodeDecision::Auto { + ours_won: true, + reason: format!("higher confidence ({oc:.2} > {tc:.2})"), + }, + resolved: Some(ours.clone()), + } + } else { + MergeEntry { + id: id.to_string(), + decision: NodeDecision::Auto { + ours_won: false, + reason: format!("higher confidence ({tc:.2} > {oc:.2})"), + }, + resolved: Some(theirs.clone()), + } + }; + } + + let op = source_priority(&ours.source); + let tp = source_priority(&theirs.source); + match op.cmp(&tp) { + Ordering::Greater => { + return MergeEntry { + id: id.to_string(), + decision: NodeDecision::Auto { + ours_won: true, + reason: format!("source priority ({} > {})", ours.source, theirs.source), + }, + resolved: Some(ours.clone()), + }; + } + Ordering::Less => { + return MergeEntry { + id: id.to_string(), + decision: NodeDecision::Auto { + ours_won: false, + reason: format!("source priority ({} > {})", theirs.source, ours.source), + }, + resolved: Some(theirs.clone()), + }; + } + Ordering::Equal => {} + } + + let os = status_priority(ours.status); + let ts = status_priority(theirs.status); + match os.cmp(&ts) { + Ordering::Greater => { + return MergeEntry { + id: id.to_string(), + decision: NodeDecision::Auto { + ours_won: true, + reason: format!("status priority ({} > {})", ours.status, theirs.status), + }, + resolved: Some(ours.clone()), + }; + } + Ordering::Less => { + return MergeEntry { + id: id.to_string(), + decision: NodeDecision::Auto { + ours_won: false, + reason: format!("status priority ({} > {})", theirs.status, ours.status), + }, + resolved: Some(theirs.clone()), + }; + } + Ordering::Equal => {} + } + + if ours.updated_at != theirs.updated_at { + return if ours.updated_at > theirs.updated_at { + MergeEntry { + id: id.to_string(), + decision: NodeDecision::Auto { + ours_won: true, + reason: "more recent (ours)".into(), + }, + resolved: Some(ours.clone()), + } + } else { + MergeEntry { + id: id.to_string(), + decision: NodeDecision::Auto { + ours_won: false, + reason: "more recent (theirs)".into(), + }, + resolved: Some(theirs.clone()), + } + }; + } + + // Genuine tie — surface it as a conflict. + MergeEntry { + id: id.to_string(), + decision: NodeDecision::Conflicted { + reason: "auto-resolution tied on confidence, source, status and recency".into(), + }, + resolved: Some({ + let mut n = ours.clone(); + n.status = MemoryStatus::Conflicted; + n + }), + } +} + +fn source_priority(source: &MemorySource) -> u8 { + match source { + MemorySource::CodeRead => 9, + MemorySource::TestResult => 8, + MemorySource::Manual => 7, + MemorySource::ClaudeCode + | MemorySource::Cursor + | MemorySource::Cline + | MemorySource::OpenHands => 6, + MemorySource::ModelInference => 4, + MemorySource::Unknown(_) => 1, + } +} + +fn status_priority(status: MemoryStatus) -> u8 { + match status { + MemoryStatus::Stable => 4, + MemoryStatus::Ephemeral => 3, + MemoryStatus::Conflicted => 2, + MemoryStatus::Deprecated => 1, + } +} + +fn state_eq(a: &MemoryNode, b: &MemoryNode) -> bool { + a.kind == b.kind + && a.content == b.content + && (a.confidence - b.confidence).abs() <= 0.001 + && a.status == b.status + && a.source == b.source + && a.evidence == b.evidence +} + +// --------------------------------------------------------------------------- +// Ancestry / merge-base computation. +// --------------------------------------------------------------------------- + +/// Set of all ancestors of `commit_id` (inclusive). Walks both first and +/// extra parents, so merge commits are handled correctly. +fn ancestors(store: &Store, commit_id: &str) -> Result> { + let mut out = HashSet::new(); + let mut queue: VecDeque = VecDeque::new(); + queue.push_back(commit_id.to_string()); + while let Some(id) = queue.pop_front() { + if !out.insert(id.clone()) { + continue; + } + for p in store.all_parents(&id)? { + if !out.contains(&p) { + queue.push_back(p); + } + } + } + Ok(out) +} + +/// Find the *best* merge base — i.e. a common ancestor with no descendants +/// among the common-ancestor set. For most practical cases this is unique; +/// when multiple candidates exist we return the one that is reachable +/// from the most others (a virtual best-of) — sufficient for v0.1. +fn merge_base( + store: &Store, + ours: &str, + theirs: &str, + ours_anc: &HashSet, + theirs_anc: &HashSet, +) -> Result> { + let common: HashSet = ours_anc.intersection(theirs_anc).cloned().collect(); + if common.is_empty() { + return Ok(None); + } + + // Walk *down* from ours and theirs and pick the first commit that is + // in `common` — that is the LCA on a single straight line. For DAGs + // with criss-cross merges we narrow further by removing any candidate + // that is an ancestor of another candidate. + let mut candidates: HashSet = common.clone(); + let candidate_list: Vec = candidates.iter().cloned().collect(); + for cand in candidate_list { + let anc = ancestors(store, &cand)?; + for other in &common { + if other != &cand && anc.contains(other) { + candidates.remove(other); + } + } + } + + // From the remaining set, pick the one with the highest timestamp + // (the "newest" common ancestor) for determinism. + let mut best: Option<(String, i64)> = None; + for c in candidates { + let ts = store.get_commit(&c)?.map(|cm| cm.timestamp).unwrap_or(0); + match &best { + None => best = Some((c, ts)), + Some((_, b_ts)) if ts > *b_ts => best = Some((c, ts)), + _ => {} + } + } + // Suppress unused-variable lints from the closure-unfriendly local code. + let _ = (ours, theirs); + Ok(best.map(|(id, _)| id)) +} diff --git a/crates/memora-core/src/repo.rs b/crates/memora-core/src/repo.rs index 9ce2702..d8ffd10 100644 --- a/crates/memora-core/src/repo.rs +++ b/crates/memora-core/src/repo.rs @@ -8,8 +8,9 @@ use std::fs; use std::path::{Path, PathBuf}; -use crate::commit::{commit_id, tree_id_for_nodes, CommitStats, MemoryCommit}; +use crate::commit::{commit_id_with_parents, tree_id_for_nodes, CommitStats, MemoryCommit}; use crate::error::{MemoraError, Result}; +use crate::merge::{plan_merge, MergePlan, MergeStrategy, NodeDecision}; use crate::node::{MemoryKind, MemoryNode, MemoryStatus, NewNode}; use crate::store::{HeadRef, Refs, Store, UnstagedSummary}; use crate::time::{Clock, SystemClock}; @@ -180,6 +181,17 @@ impl Repository { /// author. If there are no changes since HEAD, returns /// [`CommitOutcome`] with `commit: None`. pub fn commit(&self, message: &str, author: &str) -> Result { + self.commit_with_parents(message, author, &[]) + } + + /// Same as [`Self::commit`] but with explicit *additional* parent commit + /// ids. The first parent always comes from HEAD. Used by `merge`. + pub fn commit_with_parents( + &self, + message: &str, + author: &str, + extra_parents: &[String], + ) -> Result { let head_ref = self.refs.read_head()?; let parent = self.head_commit_id()?; @@ -199,7 +211,10 @@ impl Repository { let nodes = self.store.all_nodes()?; let tree = tree_id_for_nodes(&nodes); - // Detect "nothing to commit": same tree id as parent. + // Detect "nothing to commit": same tree id as parent, *and* no + // additional parents (a merge commit always wants to be recorded + // even if the tree happened to match — e.g. for already-up-to-date + // we never reach here because the merge code short-circuits earlier). let parent_tree = match parent.as_deref() { Some(p) => self .store @@ -208,7 +223,7 @@ impl Repository { .unwrap_or_default(), None => tree_id_for_nodes(&[]), }; - if parent.is_some() && tree == parent_tree { + if extra_parents.is_empty() && parent.is_some() && tree == parent_tree { return Ok(CommitOutcome { commit: None, branch: head_ref.branch().map(str::to_string), @@ -243,7 +258,7 @@ impl Repository { } let now = self.clock.now(); - let id = commit_id(parent.as_deref(), &tree, author, message, now); + let id = commit_id_with_parents(parent.as_deref(), extra_parents, &tree, author, message, now); let commit = MemoryCommit { id: id.clone(), parent: parent.clone(), @@ -258,6 +273,9 @@ impl Repository { node_ids.sort(); self.store.insert_commit_nodes(&id, &node_ids)?; self.store.insert_node_versions(&id, &nodes)?; + if !extra_parents.is_empty() { + self.store.insert_merge_parents(&id, extra_parents)?; + } let branch_name = head_ref.branch().map(str::to_string); match &head_ref { @@ -301,11 +319,37 @@ impl Repository { } /// Switch HEAD to the given branch. The branch must already exist. + /// The working set is rewritten to match the target branch's tip. + /// Refuses to switch if there are uncommitted changes; commit them or + /// stash them by branching first (`memora branch foo`). pub fn switch_branch(&self, name: &str) -> Result<()> { if !self.refs.branch_path(name).exists() { return Err(MemoraError::RefNotFound(name.to_string())); } - self.refs.write_head_branch(name) + + // Refuse if the working set has uncommitted changes. + let summary = self.status()?; + if !summary.added.is_empty() + || !summary.modified.is_empty() + || !summary.removed.is_empty() + { + return Err(MemoraError::Invalid(format!( + "uncommitted changes in working set ({} added, {} modified, {} removed) — commit or branch first", + summary.added.len(), + summary.modified.len(), + summary.removed.len(), + ))); + } + + // Move HEAD then rewrite the working set from the target's node_versions. + self.refs.write_head_branch(name)?; + let target_commit = self.refs.read_branch(name)?; + let target_nodes = match target_commit.as_deref() { + Some(c) => self.store.commit_node_versions(c)?, + None => Vec::new(), + }; + self.replace_working_set(&target_nodes)?; + Ok(()) } /// Reset HEAD to a specific commit id, leaving the working set as it @@ -340,6 +384,137 @@ impl Repository { Ok(target) } + // --- merge ----------------------------------------------------------- + + /// Plan a merge of `their_rev` into the current HEAD. Pure: does not + /// touch the working set. Useful for `--dry-run` style flows. + pub fn plan_merge( + &self, + their_rev: &str, + strategy: MergeStrategy, + ) -> Result { + let ours = self + .head_commit_id()? + .ok_or_else(|| MemoraError::CommitNotFound("HEAD".into()))?; + let theirs = self.resolve_revision(their_rev)?; + plan_merge(&self.store, &ours, &theirs, strategy) + } + + /// Merge `their_rev` into the current HEAD. The behaviour is: + /// + /// - **already up-to-date**: no-op, returns the existing HEAD. + /// - **fast-forward**: if `--ff` is allowed, just move HEAD's branch. + /// - **true merge**: rewrite the working set from the merge plan and + /// create a merge commit (unless `commit == false`). + pub fn merge( + &self, + their_rev: &str, + opts: MergeOptions, + ) -> Result { + let ours = self + .head_commit_id()? + .ok_or_else(|| MemoraError::CommitNotFound("HEAD".into()))?; + let theirs = self.resolve_revision(their_rev)?; + let plan = plan_merge(&self.store, &ours, &theirs, opts.strategy)?; + + if plan.already_up_to_date { + return Ok(MergeOutcome { + kind: MergeKind::AlreadyUpToDate, + plan, + commit: None, + }); + } + + if plan.can_fast_forward && opts.allow_fast_forward { + // Fast-forward: just point our branch at theirs and overwrite + // the working set with theirs's snapshot. + let their_nodes = self.store.commit_node_versions(&theirs)?; + self.replace_working_set(&their_nodes)?; + match self.refs.read_head()? { + HeadRef::Branch(name) => self.refs.write_branch(&name, &theirs)?, + HeadRef::Detached(_) => self.refs.write_head_detached(&theirs)?, + } + let commit = self.store.get_commit(&theirs)?; + return Ok(MergeOutcome { + kind: MergeKind::FastForward, + plan, + commit, + }); + } + + // True merge: apply the plan to the working set. + self.apply_plan_to_working_set(&plan)?; + + if !opts.commit { + return Ok(MergeOutcome { + kind: MergeKind::NoCommit, + plan, + commit: None, + }); + } + + let message = opts.message.clone().unwrap_or_else(|| { + format!( + "Merge {} into {}", + short_for_display(&theirs), + self.refs + .read_head() + .ok() + .as_ref() + .and_then(|h| h.branch().map(str::to_string)) + .unwrap_or_else(|| "HEAD".into()), + ) + }); + let outcome = self.commit_with_parents(&message, &opts.author, &[theirs.clone()])?; + let kind = if plan.has_conflicts() { + MergeKind::Conflicts + } else { + MergeKind::Merged + }; + Ok(MergeOutcome { + kind, + plan, + commit: outcome.commit, + }) + } + + /// Replace the live `nodes` table with the contents of `target`. + /// Used by fast-forward merge. + fn replace_working_set(&self, target: &[MemoryNode]) -> Result<()> { + let current = self.store.all_nodes()?; + let target_ids: std::collections::HashSet<&str> = + target.iter().map(|n| n.id.as_str()).collect(); + for n in ¤t { + if !target_ids.contains(n.id.as_str()) { + self.store.delete_node(&n.id)?; + } + } + for n in target { + self.store.upsert_node(n)?; + } + Ok(()) + } + + /// Apply a [`MergePlan`] to the working set in place. + fn apply_plan_to_working_set(&self, plan: &MergePlan) -> Result<()> { + for entry in &plan.entries { + match &entry.decision { + NodeDecision::Unchanged => {} + NodeDecision::Removed => { + self.store.delete_node(&entry.id)?; + } + _ => { + if let Some(node) = &entry.resolved { + self.store.upsert_node(node)?; + } else { + self.store.delete_node(&entry.id)?; + } + } + } + } + Ok(()) + } + // --- promotion ------------------------------------------------------- /// Promote one or more `ephemeral` nodes to `stable`. Returns the @@ -461,6 +636,65 @@ impl Repository { // Free helpers + supporting types used by promote/diff above. // --------------------------------------------------------------------------- +/// Options controlling [`Repository::merge`]. +#[derive(Debug, Clone)] +pub struct MergeOptions { + /// Strategy for resolving same-id divergences. Defaults to `Auto`. + pub strategy: MergeStrategy, + /// Allow fast-forward when possible (default: `true`). + pub allow_fast_forward: bool, + /// Create a merge commit at the end (default: `true`). When `false`, + /// the working set is left in a merged state without committing. + pub commit: bool, + /// Override commit message. Defaults to `"Merge into "`. + pub message: Option, + /// Author for the merge commit. Defaults to `"human"`. + pub author: String, +} + +impl Default for MergeOptions { + fn default() -> Self { + Self { + strategy: MergeStrategy::Auto, + allow_fast_forward: true, + commit: true, + message: None, + author: "human".into(), + } + } +} + +/// What `memora merge` actually did. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum MergeKind { + /// `theirs` was already an ancestor of `ours`. Nothing to do. + AlreadyUpToDate, + /// HEAD was fast-forwarded to `theirs`. + FastForward, + /// A real three-way merge happened, no conflicts. + Merged, + /// A real three-way merge happened with at least one conflict. + Conflicts, + /// Plan was applied to the working set but no commit was created. + NoCommit, +} + +/// Result returned by [`Repository::merge`]. +#[derive(Debug, Clone)] +pub struct MergeOutcome { + /// What kind of merge happened. + pub kind: MergeKind, + /// The plan that was computed. + pub plan: MergePlan, + /// The commit that was created, if any (None for `AlreadyUpToDate`, + /// `FastForward` returns `theirs`, `NoCommit` returns `None`). + pub commit: Option, +} + +fn short_for_display(id: &str) -> String { + id.chars().take(7).collect() +} + /// Caller intent for [`Repository::promote`]. #[derive(Debug, Clone)] pub enum PromotePlan { @@ -962,4 +1196,199 @@ mod tests { assert_eq!(diff.modified.len(), 0); assert_eq!(diff.removed.len(), 0); } + + // -------------------------- merge tests -------------------------- + + #[test] + fn merge_already_up_to_date_is_noop() { + let tmp = tempdir().unwrap(); + let repo = new_repo(tmp.path()); + repo.add_node(NewNode::new(MemoryKind::Project, "v1", MemorySource::CodeRead)) + .unwrap(); + repo.commit("c1", "human").unwrap(); + repo.create_branch("feature/x").unwrap(); + // ours == theirs. + let outcome = repo.merge("feature/x", MergeOptions::default()).unwrap(); + assert_eq!(outcome.kind, MergeKind::AlreadyUpToDate); + } + + #[test] + fn merge_fast_forward_advances_branch() { + let tmp = tempdir().unwrap(); + let repo = new_repo(tmp.path()); + // base commit on main. + repo.add_node(NewNode::new(MemoryKind::Project, "v1", MemorySource::CodeRead)) + .unwrap(); + repo.commit("c1", "human").unwrap(); + repo.create_branch("feature").unwrap(); + // advance feature. + repo.switch_branch("feature").unwrap(); + repo.add_node(NewNode::new(MemoryKind::Project, "v2", MemorySource::CodeRead)) + .unwrap(); + let c2 = repo.commit("c2", "human").unwrap().commit.unwrap(); + // back to main and merge feature. + repo.switch_branch("main").unwrap(); + let outcome = repo.merge("feature", MergeOptions::default()).unwrap(); + assert_eq!(outcome.kind, MergeKind::FastForward); + assert_eq!(repo.head_commit_id().unwrap().as_deref(), Some(c2.id.as_str())); + } + + #[test] + fn merge_clean_three_way_picks_up_disjoint_changes() { + let tmp = tempdir().unwrap(); + let repo = new_repo(tmp.path()); + repo.add_node(NewNode::new( + MemoryKind::Project, + "shared", + MemorySource::CodeRead, + )) + .unwrap(); + repo.commit("base", "human").unwrap(); + // feature: add a new node only on this side. + repo.create_branch("feature").unwrap(); + repo.switch_branch("feature").unwrap(); + repo.add_node(NewNode::new( + MemoryKind::Semantic, + "auth uses jwt", + MemorySource::CodeRead, + )) + .unwrap(); + repo.commit("feat", "human").unwrap(); + // main: add a different node. + repo.switch_branch("main").unwrap(); + repo.add_node(NewNode::new( + MemoryKind::Preference, + "verbose errors", + MemorySource::Manual, + )) + .unwrap(); + repo.commit("pref", "human").unwrap(); + // merge feature into main. + let outcome = repo.merge("feature", MergeOptions::default()).unwrap(); + assert_eq!(outcome.kind, MergeKind::Merged); + let nodes = repo.store().all_nodes().unwrap(); + let contents: std::collections::HashSet = + nodes.into_iter().map(|n| n.content).collect(); + assert!(contents.contains("shared")); + assert!(contents.contains("auth uses jwt")); + assert!(contents.contains("verbose errors")); + assert!(!outcome.plan.has_conflicts()); + assert!(outcome.commit.is_some()); + let commit = outcome.commit.unwrap(); + let merge_parents = repo.store().merge_parents(&commit.id).unwrap(); + assert_eq!(merge_parents.len(), 1); + } + + #[test] + fn merge_resolves_same_node_change_by_confidence() { + let tmp = tempdir().unwrap(); + let repo = new_repo(tmp.path()); + let base_node = repo + .add_node(NewNode::new( + MemoryKind::Assumption, + "redis is the cache", + MemorySource::ModelInference, + )) + .unwrap(); + repo.commit("base", "human").unwrap(); + repo.create_branch("feature").unwrap(); + // ours promote with model-inference baseline confidence (0.6). + repo.store() + .set_status(&base_node.id, MemoryStatus::Stable, 1234) + .unwrap(); + repo.commit("ours promote", "human").unwrap(); + // theirs: switch to feature *before* mutating, then promote with + // a higher confidence so the merge has a reason to pick theirs. + repo.switch_branch("feature").unwrap(); + let mut node = repo.store().get_node(&base_node.id).unwrap().unwrap(); + node.confidence = 0.95; + node.status = MemoryStatus::Stable; + node.updated_at += 1; + repo.store().upsert_node(&node).unwrap(); + repo.commit("theirs promote with high confidence", "human") + .unwrap(); + // back to main and merge feature. + repo.switch_branch("main").unwrap(); + let outcome = repo.merge("feature", MergeOptions::default()).unwrap(); + assert_eq!(outcome.kind, MergeKind::Merged); + let merged = repo.store().get_node(&base_node.id).unwrap().unwrap(); + assert_eq!(merged.status, MemoryStatus::Stable); + assert!(merged.confidence > 0.9); + } + + #[test] + fn merge_with_strategy_ours_keeps_our_side() { + let tmp = tempdir().unwrap(); + let repo = new_repo(tmp.path()); + let base_node = repo + .add_node(NewNode::new( + MemoryKind::Project, + "rust", + MemorySource::CodeRead, + )) + .unwrap(); + repo.commit("base", "human").unwrap(); + repo.create_branch("feature").unwrap(); + // ours edits content (still on main). + let mut ours = repo.store().get_node(&base_node.id).unwrap().unwrap(); + ours.content = "rust+wasm".into(); + ours.updated_at += 1; + repo.store().upsert_node(&ours).unwrap(); + repo.commit("ours", "human").unwrap(); + // switch to feature, edit differently. + repo.switch_branch("feature").unwrap(); + let mut theirs = repo.store().get_node(&base_node.id).unwrap().unwrap(); + theirs.content = "rust+ts".into(); + theirs.updated_at += 2; + repo.store().upsert_node(&theirs).unwrap(); + repo.commit("theirs", "human").unwrap(); + repo.switch_branch("main").unwrap(); + let outcome = repo + .merge( + "feature", + MergeOptions { + strategy: MergeStrategy::Ours, + ..MergeOptions::default() + }, + ) + .unwrap(); + assert_eq!(outcome.kind, MergeKind::Merged); + let merged = repo.store().get_node(&base_node.id).unwrap().unwrap(); + assert_eq!(merged.content, "rust+wasm"); + } + + #[test] + fn merge_marks_conflict_when_strategy_auto_ties() { + let tmp = tempdir().unwrap(); + let repo = new_repo(tmp.path()); + let base_node = repo + .add_node(NewNode::new( + MemoryKind::Project, + "rust", + MemorySource::CodeRead, + )) + .unwrap(); + repo.commit("base", "human").unwrap(); + repo.create_branch("feature").unwrap(); + // ours edits content with a fixed updated_at so it stays equal to theirs. + let ts = 5_000; + let mut ours = repo.store().get_node(&base_node.id).unwrap().unwrap(); + ours.content = "rust+wasm".into(); + ours.updated_at = ts; + repo.store().upsert_node(&ours).unwrap(); + repo.commit("ours", "human").unwrap(); + // switch to feature, edit different content with the same ts. + repo.switch_branch("feature").unwrap(); + let mut theirs = repo.store().get_node(&base_node.id).unwrap().unwrap(); + theirs.content = "rust+ts".into(); + theirs.updated_at = ts; + repo.store().upsert_node(&theirs).unwrap(); + repo.commit("theirs", "human").unwrap(); + repo.switch_branch("main").unwrap(); + let outcome = repo.merge("feature", MergeOptions::default()).unwrap(); + assert_eq!(outcome.kind, MergeKind::Conflicts); + assert!(outcome.plan.has_conflicts()); + let merged = repo.store().get_node(&base_node.id).unwrap().unwrap(); + assert_eq!(merged.status, MemoryStatus::Conflicted); + } } diff --git a/crates/memora-core/src/store/db.rs b/crates/memora-core/src/store/db.rs index 99bc748..2304495 100644 --- a/crates/memora-core/src/store/db.rs +++ b/crates/memora-core/src/store/db.rs @@ -200,6 +200,15 @@ impl Store { Ok(()) } + /// Delete a node from the live working set. Idempotent: deleting a + /// missing node is a no-op rather than an error, because that's what + /// `apply_plan_to_working_set` wants. + pub fn delete_node(&self, id: &str) -> Result<()> { + self.conn + .execute("DELETE FROM nodes WHERE id = ?1", params![id])?; + Ok(()) + } + // --- commit CRUD ------------------------------------------------------ /// Persist a commit row. @@ -235,6 +244,52 @@ impl Store { Ok(()) } + /// Persist additional merge parents for a commit. The first parent + /// lives in `commits.parent_id`; every entry here is parent #2 onward, + /// ordered by their position in `extra_parents`. + pub fn insert_merge_parents(&self, commit_id: &str, extra_parents: &[String]) -> Result<()> { + if extra_parents.is_empty() { + return Ok(()); + } + let tx = self.conn.unchecked_transaction()?; + { + let mut stmt = tx.prepare( + "INSERT OR REPLACE INTO merge_parents (commit_id, parent_id, sequence) VALUES (?1, ?2, ?3)", + )?; + for (idx, p) in extra_parents.iter().enumerate() { + stmt.execute(params![commit_id, p, idx as i64])?; + } + } + tx.commit()?; + Ok(()) + } + + /// Read the *additional* (post-first) parents of a commit, in + /// `sequence` order. Returns an empty vec for non-merge commits. + pub fn merge_parents(&self, commit_id: &str) -> Result> { + let mut stmt = self.conn.prepare( + "SELECT parent_id FROM merge_parents WHERE commit_id = ?1 ORDER BY sequence ASC", + )?; + let rows = stmt.query_map(params![commit_id], |r| r.get::<_, String>(0))?; + let mut out = Vec::new(); + for r in rows { + out.push(r?); + } + Ok(out) + } + + /// Return the full set of parents (first + extras) for a commit. + pub fn all_parents(&self, commit_id: &str) -> Result> { + let mut out = Vec::new(); + if let Some(c) = self.get_commit(commit_id)? { + if let Some(p) = c.parent { + out.push(p); + } + } + out.extend(self.merge_parents(commit_id)?); + Ok(out) + } + /// Persist the full per-node state snapshot for a commit. Called once /// per commit alongside [`Self::insert_commit_nodes`]. pub fn insert_node_versions(&self, commit_id: &str, nodes: &[MemoryNode]) -> Result<()> { @@ -362,36 +417,56 @@ impl Store { /// Compute an [`UnstagedSummary`] comparing the current node table to /// the given baseline commit (typically HEAD). Pass `None` for the /// "no commits yet" case — every existing node is reported as added. + /// + /// "Modified" means the node id matches but its state differs from + /// the snapshot in the baseline commit's `node_versions` rows. We + /// don't compare timestamps here because user-supplied `updated_at` + /// values can run ahead of commit timestamps in tests / scripted + /// pipelines. pub fn unstaged_against(&self, head_commit: Option<&str>) -> Result { let current = self.all_nodes()?; - let baseline_ids: std::collections::HashSet = match head_commit { - Some(id) => self.commit_node_ids(id)?.into_iter().collect(), - None => std::collections::HashSet::new(), + let baseline_versions: std::collections::HashMap = match head_commit { + Some(id) => self + .commit_node_versions(id)? + .into_iter() + .map(|n| (n.id.clone(), n)) + .collect(), + None => std::collections::HashMap::new(), }; let mut summary = UnstagedSummary { total: current.len(), ..Default::default() }; - let baseline_commit_ts = match head_commit { - Some(id) => self.get_commit(id)?.map(|c| c.timestamp).unwrap_or(0), - None => 0, - }; let mut current_ids = std::collections::HashSet::new(); for node in current { current_ids.insert(node.id.clone()); - if !baseline_ids.contains(&node.id) { - summary.added.push(node); - } else if node.updated_at > baseline_commit_ts { - summary.modified.push(node); + match baseline_versions.get(&node.id) { + None => summary.added.push(node), + Some(prev) => { + if !nodes_equivalent(prev, &node) { + summary.modified.push(node); + } + } } } - for id in baseline_ids.difference(¤t_ids) { - summary.removed.push(id.clone()); + for id in baseline_versions.keys() { + if !current_ids.contains(id) { + summary.removed.push(id.clone()); + } } Ok(summary) } } +fn nodes_equivalent(a: &MemoryNode, b: &MemoryNode) -> bool { + a.kind == b.kind + && a.content == b.content + && (a.confidence - b.confidence).abs() <= 0.001 + && a.status == b.status + && a.source == b.source + && a.evidence == b.evidence +} + fn row_to_node(row: &rusqlite::Row<'_>) -> rusqlite::Result { let kind_str: String = row.get(1)?; let status_str: String = row.get(4)?; diff --git a/crates/memora-core/src/store/schema.sql b/crates/memora-core/src/store/schema.sql index 5417ad0..58376f7 100644 --- a/crates/memora-core/src/store/schema.sql +++ b/crates/memora-core/src/store/schema.sql @@ -70,6 +70,18 @@ CREATE TABLE IF NOT EXISTS node_versions ( CREATE INDEX IF NOT EXISTS idx_node_versions_node ON node_versions (node_id); +-- Merge commits store their *first* parent in commits.parent_id (so the +-- existing first-parent walk keeps working). Any *additional* parents +-- live here, ordered by sequence (0 = first additional parent, 1 = next…). +CREATE TABLE IF NOT EXISTS merge_parents ( + commit_id TEXT NOT NULL REFERENCES commits(id) ON DELETE CASCADE, + parent_id TEXT NOT NULL, + sequence INTEGER NOT NULL, + PRIMARY KEY (commit_id, sequence) +); + +CREATE INDEX IF NOT EXISTS idx_merge_parents_commit ON merge_parents (commit_id); + -- Replay infrastructure. Sessions and their event streams are written here -- so future `memora replay` can step through context evolution. CREATE TABLE IF NOT EXISTS sessions ( diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 9c2dd6b..912f3ec 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -68,11 +68,14 @@ timestamps deterministically. `Store`, holds a `Clock`, and exposes intent-revealing methods: - `init`, `open`, `discover`, `open_from` - `add_node`, `status`, `commit`, `log` -- `create_branch`, `list_branches`, `switch_branch` +- `create_branch`, `list_branches`, `switch_branch` (rewrites the working + set to match the target tip; refuses with uncommitted changes) - `rollback_to` (auto-checkpoints before moving HEAD) - `promote` (ephemeral → stable, by id / kind / confidence threshold) - `diff` (graph diff between two revspecs, with optional semantic summary) - `resolve_revision` (`HEAD`, `HEAD~N`, branch names, hex prefixes) +- `plan_merge` / `merge` (three-way merge with auto / ours / theirs + strategies; produces fast-forward, merge commit, or surfaced conflicts) The diff engine compares two `node_versions` snapshots from the SQLite store and produces a `DiffReport` with `added` / `removed` / `modified` @@ -80,6 +83,24 @@ buckets. `ModifiedNode` carries a list of typed `NodeChange` deltas (`Status`, `Content`, `Confidence`, `Source`, `Evidence`) so callers can render high-level summaries without parsing strings. +### `merge.rs` +Pure three-way merge engine. Walks both parent DAGs to find the merge +base, then for each node id decides: + +1. *Unchanged* on both sides → keep. +2. Changed on exactly one side → take that side. +3. Changed on both sides → score by **confidence → source priority → + status priority → recency**. The winning side is `Auto { ours_won }`. + Genuine ties are returned as `Conflicted`, written into the working + set as `MemoryStatus::Conflicted`, and surfaced via the merge commit + stats. +4. `--strategy=ours` / `--strategy=theirs` skips scoring and forces the + choice without producing conflicts. + +Merge commits store their first parent in `commits.parent_id` (so +first-parent log walks keep working) and additional parents in the +`merge_parents` table. + ## Crate: `memora-cli` ### `cli.rs` @@ -109,14 +130,14 @@ Centralised printing helpers (timestamps, short ids, error formatter). ## What's not built yet -The roadmap in `README.md` calls out Phase 3 → Phase 5. Notable gaps: +The roadmap in `README.md` calls out Phase 4 → Phase 5. Notable gaps: -- CRDT merge (`memora merge`). - Replay (`memora replay`, session event recording). - Export / import adapters (`memora export --to=claude-code`, etc.). - GC + remote sync. +- Semantic-overlap detection during merge (different ids, same fact). The internal types and SQLite tables already make room for these (see -`sessions` / `session_events`, `MemoryStatus::Conflicted`, the per-commit -`node_versions` snapshot table); we'll layer the workflows on top in -subsequent commits. +`sessions` / `session_events`, the per-commit `node_versions` snapshot +table, `merge_parents`); we'll layer the workflows on top in subsequent +commits.