From f1f97a43c123ee9c68cfe08c99e00960e37a6a3e Mon Sep 17 00:00:00 2001 From: Mickey Scherrer <5324300+iicky@users.noreply.github.com> Date: Wed, 17 Jun 2026 21:47:48 -0400 Subject: [PATCH 1/2] refactor: route murk init key discovery through env's single read path --- src/env.rs | 29 +++++++++++++++++++++++++++++ src/init.rs | 25 +++++++------------------ 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/src/env.rs b/src/env.rs index 8469762..aacdfa6 100644 --- a/src/env.rs +++ b/src/env.rs @@ -155,6 +155,35 @@ pub fn resolve_key_for_vault(vault_path: &str) -> Result { resolve_key_with_source(vault_path).map(|(k, _)| k) } +/// Read a key the environment supplied directly — `MURK_KEY`, or the file at +/// `MURK_KEY_FILE` — and nothing else. Unlike [`resolve_key_with_source`] this +/// does NOT fall back to auto-discovery in `~/.config/murk/keys`: it answers only +/// "did the current environment hand us a key?". `murk init` uses it to reuse an +/// already-present identity instead of generating a new one, which is why it must +/// ignore the stored key it would otherwise be about to create. +/// +/// Returns `Ok(None)` when neither variable is set. The result is trimmed (init +/// stores it as a plain key string); the runtime path deliberately does not trim, +/// to keep plugin identity files intact. +/// +/// This is the single place, alongside [`resolve_key_with_source`], that reads +/// the key environment variables — see `tests/invariants.rs`. +pub fn key_from_env_only() -> Result, String> { + if let Some(k) = env::var(ENV_MURK_KEY).ok().filter(|k| !k.is_empty()) { + return Ok(Some(k)); + } + if let Ok(path) = env::var(ENV_MURK_KEY_FILE) { + let p = std::path::Path::new(&path); + reject_symlink(p, "MURK_KEY_FILE")?; + let key = std::fs::read_to_string(p) + .map_err(|e| format!("cannot read MURK_KEY_FILE: {e}"))? + .trim() + .to_string(); + return Ok(Some(key)); + } + Ok(None) +} + /// Parse a .env file into key-value pairs. /// Skips comments, blank lines, `MURK_*` keys, and strips quotes and `export` prefixes. /// diff --git a/src/init.rs b/src/init.rs index 2200906..5137d98 100644 --- a/src/init.rs +++ b/src/init.rs @@ -1,8 +1,11 @@ //! Vault initialization logic. use std::collections::{BTreeMap, HashMap}; -use std::env; use std::process::Command; +// Only the tests touch the process environment directly; the runtime key read +// now goes through env::key_from_env_only (see tests/invariants.rs). +#[cfg(test)] +use std::env; use crate::{crypto, encrypt_value, now_utc, types}; @@ -48,23 +51,9 @@ pub struct DiscoveredKey { /// variables into the environment, so the environment is the authoritative /// source and `.env` is only a write-only convenience populated by `murk init`. pub fn discover_existing_key() -> Result, String> { - let raw = if let Some(k) = env::var(crate::env::ENV_MURK_KEY) - .ok() - .filter(|k| !k.is_empty()) - { - Some(k) - } else if let Ok(path) = env::var(crate::env::ENV_MURK_KEY_FILE) { - let p = std::path::Path::new(&path); - crate::env::reject_symlink(p, "MURK_KEY_FILE")?; - Some( - std::fs::read_to_string(p) - .map_err(|e| format!("cannot read MURK_KEY_FILE: {e}"))? - .trim() - .to_string(), - ) - } else { - None - }; + // The env vars are read in one place (the env module) so the auth read path + // stays auditable — see tests/invariants.rs. + let raw = crate::env::key_from_env_only()?; match raw { Some(key) => { From 82cdc76adf90fbbc892c5fd3a662f29ade4bc187 Mon Sep 17 00:00:00 2001 From: Mickey Scherrer <5324300+iicky@users.noreply.github.com> Date: Wed, 17 Jun 2026 21:51:08 -0400 Subject: [PATCH 2/2] test: enforce secret-handling invariants as source-level lints --- Cargo.toml | 7 +++ tests/invariants.rs | 134 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 tests/invariants.rs diff --git a/Cargo.toml b/Cargo.toml index 7e1bfa4..c00c14a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,13 @@ keywords = ["secrets", "encryption", "age", "dotenv", "security"] categories = ["command-line-utilities", "cryptography"] exclude = ["CLAUDE.md", "AGENTS.md", "SPEC.md", ".beads/"] +[lints.clippy] +# Secret-handling guardrail: dbg! prints the Debug of any value — including a +# decrypted secret — and must never ship. It is allow-by-default (restriction +# group), so `-D warnings` alone won't catch it; deny it explicitly. Source-level +# invariants clippy can't express live in tests/invariants.rs. (murk-p9o.2) +dbg_macro = "deny" + [lib] name = "murk_cli" path = "src/lib.rs" diff --git a/tests/invariants.rs b/tests/invariants.rs new file mode 100644 index 0000000..f544d86 --- /dev/null +++ b/tests/invariants.rs @@ -0,0 +1,134 @@ +//! Source-level invariants that generic lints (clippy, fmt, cargo-deny) can't +//! express. These scan the crate source as text — cheap, deterministic, and they +//! fail fast when an agent or human violates a secret-handling rule that code +//! review might miss. See murk-p9o.2. +//! +//! Adding a rule here is the cheapest way to make a murk-specific invariant +//! self-enforcing. Keep each rule precise: a false positive trains people to +//! ignore the check. + +use std::fs; +use std::path::Path; + +/// Every `.rs` file under `src/`, paired with its contents. Paths are +/// `/`-normalized and relative to the crate root so comparisons are portable. +fn source_files() -> Vec<(String, String)> { + let mut out = Vec::new(); + collect(Path::new("src"), &mut out); + assert!(!out.is_empty(), "no source files found under src/"); + out +} + +fn collect(dir: &Path, out: &mut Vec<(String, String)>) { + for entry in fs::read_dir(dir).expect("read src dir") { + let path = entry.expect("dir entry").path(); + if path.is_dir() { + collect(&path, out); + } else if path.extension().is_some_and(|e| e == "rs") { + let body = fs::read_to_string(&path).expect("read source file"); + out.push((rel(&path), body)); + } + } +} + +/// Normalize a path to forward slashes relative to the crate root. +fn rel(path: &Path) -> String { + path.to_string_lossy().replace('\\', "/") +} + +/// Non-comment lines of a file, as (1-based line number, trimmed text). +fn code_lines(body: &str) -> impl Iterator { + body.lines() + .enumerate() + .map(|(i, line)| (i + 1, line.trim())) + .filter(|(_, line)| !line.starts_with("//")) +} + +/// True if a line reads from the process environment — `var`, `var_os`, `vars`, +/// or `vars_os` — as opposed to writing it (`set_var` / `remove_var` / +/// `env_remove`), which all contain `var` as a substring. We strip the write +/// accessors first so they don't masquerade as reads. +fn reads_env(line: &str) -> bool { + let cleaned = line + .replace("set_var", "") + .replace("remove_var", "") + .replace("env_remove", "") + .replace("env_set", ""); + cleaned.contains("var(") + || cleaned.contains("var_os(") + || cleaned.contains("vars(") + || cleaned.contains("vars_os(") +} + +/// Secret material must be read from the environment in one place: the `env` +/// module. A second reader is a spot where a key can bypass the strict-mode and +/// auto-discovery rules that `env` centralizes (see `env::resolve_key` / +/// `env::key_from_env_only`), or leak through a path nobody audited. +/// +/// This flags, outside `env.rs`, either: a use of the canonical `ENV_MURK_KEY*` +/// consts (the internal handle for these vars — currently env-only), or a read +/// accessor on the same line as a `MURK_KEY*` literal. It is a guardrail against +/// accidental regressions, not a soundness proof: a determined bypass (building +/// the var name dynamically, or scanning `env::vars()` across several lines) +/// would slip past — that needs a semantic lint, tracked separately. +#[test] +fn murk_key_is_read_only_in_the_env_module() { + const ALLOWED: &str = "src/env.rs"; + + let offenders: Vec = source_files() + .iter() + .filter(|(path, _)| path != ALLOWED) + .flat_map(|(path, body)| { + code_lines(body) + .filter(|(_, line)| { + // The canonical consts are the internal handle for these env + // vars; outside env.rs they have no business appearing. + let uses_const = line.contains("ENV_MURK_KEY"); + // Or a literal var name read through any std accessor. + let reads_literal = reads_env(line) + && (line.contains("\"MURK_KEY\"") || line.contains("\"MURK_KEY_FILE\"")); + uses_const || reads_literal + }) + .map(move |(n, line)| format!(" {path}:{n}: {line}")) + }) + .collect(); + + assert!( + offenders.is_empty(), + "MURK_KEY / MURK_KEY_FILE may only be read in {ALLOWED} (the single auth \ + read path). Route new reads through env::resolve_key / env::key_from_env_only.\n{}", + offenders.join("\n"), + ); +} + +/// The library crate (`murk_cli`) returns data; the binary (`main.rs`) owns all +/// user-facing output. Library code must not write to stdout — that is exactly +/// where a decrypted secret would leak into a pipe or `$(...)`. Warnings on +/// stderr (`eprintln!`) are fine; `main.rs` is the UI layer and is exempt. +#[test] +fn library_modules_do_not_print_to_stdout() { + const UI_LAYER: &str = "src/main.rs"; + + let offenders: Vec = source_files() + .iter() + .filter(|(path, _)| path != UI_LAYER) + .flat_map(|(path, body)| { + code_lines(body) + .filter(|(_, line)| { + // `print!`/`println!`, but not their `eprint!`/`eprintln!` + // (stderr) cousins, which `contains` would otherwise match. + (line.contains("println!") || line.contains("print!")) + && !line.contains("eprintln!") + && !line.contains("eprint!") + }) + .map(move |(n, line)| format!(" {path}:{n}: {line}")) + }) + .collect(); + + assert!( + offenders.is_empty(), + "library modules must not write to stdout — stdout belongs to the binary. \ + Return the value and let main.rs print it, or use eprintln! for a warning.\n{}", + offenders.join("\n"), + ); +}