From 0cf5eda8ea325a175c5fb3992b78de73b164fd28 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Fri, 26 Jun 2026 05:42:08 +0200 Subject: [PATCH 1/4] =?UTF-8?q?feat(provenance):=20SCPV=20v3=20=E2=80=94?= =?UTF-8?q?=20emit=20fusion=20premises=20for=20scry=20(#313=20inc=201)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reworks the `component-provenance` section to the converged binary SCPV v3 wire format (scry#63) and emits the two fusion-unique optimization premises that feed scry's sound abstract interpreter: - bounded_memory: no memory.grow in the fused core (memory_probe). - closed_world: all cross-component imports internalised; no residual import in a non-host namespace (conservative — unknown namespace ⇒ false). Why binary, why now: meld emitted JSON, scry-provenance decodes binary SCPV — the boundary never actually connected (BadMagic → provenance=None). Per DD-002 scry owns the format; converged on binary-canonical so scry's DO-333 trusted decoder stays lean/no_std (extend its SCPV reader, no JSON parser), and the one-time encoder swap lands on meld (untrusted host). meld's richer schema (sha256 binding, code_range, String component_id via length-prefix) ports into SCPV v3; premises live in the fixed header. meld does NOT compute value-ranges/const-args/dead-params — those are scry's analysis (scry-interval/scry-analyze-core), fed by these premises, for synth. SR-45 (sw-req, derives-from SYS-4) + SWV-45. Oracle: 9 provenance unit tests (codec round-trip both premises, header layout pinned, host-namespace classification) + v3_fusion_premises_present_on_real_fusion golden assertion; 439 lib tests green, clippy clean, rivet validate PASS. DWARF path unaffected (reads the in-memory struct, not the section bytes). Producer side of #313; scry consumer side tracked in scry#63. Co-Authored-By: Claude Opus 4.8 (1M context) --- meld-core/src/lib.rs | 6 +- meld-core/src/provenance.rs | 308 +++++++++++++------ meld-core/tests/component_provenance.rs | 36 +++ safety/requirements/safety-requirements.yaml | 35 +++ safety/requirements/sw-verifications.yaml | 15 + 5 files changed, 308 insertions(+), 92 deletions(-) diff --git a/meld-core/src/lib.rs b/meld-core/src/lib.rs index 811b05d..b41f67c 100644 --- a/meld-core/src/lib.rs +++ b/meld-core/src/lib.rs @@ -734,10 +734,8 @@ impl Fuser { if self.config.component_provenance { let provenance = provenance::build(&merged, &self.components, &output_without_extras); - let payload = provenance.to_bytes().map_err(|e| { - Error::EncodingError(format!("component-provenance serialization failed: {e}")) - })?; - extra_sections.push((provenance::SECTION_NAME, payload)); + // SCPV v3 binary payload (#313 / scry#63) — infallible encode. + extra_sections.push((provenance::SECTION_NAME, provenance.to_bytes())); } let output = if extra_sections.is_empty() { diff --git a/meld-core/src/provenance.rs b/meld-core/src/provenance.rs index 2066277..6f2dded 100644 --- a/meld-core/src/provenance.rs +++ b/meld-core/src/provenance.rs @@ -47,16 +47,24 @@ pub const SECTION_NAME: &str = "component-provenance"; /// - **v1**: `{ fused_func_idx, component_id, originating_func_idx }` /// per entry (issue #192). /// - **v2** (DWARF Phase 2, issue #143): adds an optional -/// [`Entry::code_range`] giving the function body's byte span in -/// the fused module's code section. The field is the anchor for -/// DWARF address remapping. v1 consumers that check `version` -/// first will see `2` and can either upgrade or ignore the new -/// field (serde deserialization tolerates its absence via -/// `#[serde(default)]`, and its presence is additive — no v1 key -/// changed shape). +/// [`Entry::code_range`]. +/// - **v3** (#313 / scry#63): the canonical **binary `SCPV`** wire +/// format (replacing the JSON encoding, which never decoded against +/// scry's binary `scry-provenance` reader — the boundary was dead). +/// Adds a fixed header carrying the **fusion premises** +/// ([`ComponentProvenance::bounded_memory`], +/// [`ComponentProvenance::closed_world`]) that tighten scry's +/// abstract-interpretation fixpoint. The byte layout is the +/// converged spec in scry#63; see [`ComponentProvenance::to_bytes`]. +/// meld is the producer; scry's `scry-provenance` is the consumer +/// and owns the format (DD-002). /// -/// Consumers MUST check `version` before relying on `code_range`. -pub const VERSION: u32 = 2; +/// Consumers MUST check the `SCPV` magic + `version` before decoding. +pub const VERSION: u32 = 3; + +/// Magic prefixing the binary `SCPV` payload — lets a consumer reject a +/// non-provenance / wrong-format section before decoding (scry#63). +pub const MAGIC: &[u8; 4] = b"SCPV"; /// Byte span of a function body in the fused module's code section. /// @@ -109,24 +117,155 @@ pub struct Entry { #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct ComponentProvenance { pub version: u32, + /// **Fusion premise (#313):** the fused core uses no `memory.grow`, + /// so its linear memory is fixed-size. Lets scry assume a bounded + /// memory and drop grow-reachability widening. Sound, varies per + /// input (computed by [`crate::memory_probe::module_uses_memory_grow`]). + pub bounded_memory: bool, + /// **Fusion premise (#313):** every cross-component import edge has + /// been internalised — the fused module has no residual import in a + /// non-host namespace. Lets scry tighten `reachable_from_exports` + /// and assume no inter-component call escapes through an import. + /// Conservative: an unrecognised import namespace yields `false`. + pub closed_world: bool, pub fused_module_sha256: String, pub entries: Vec, } impl ComponentProvenance { - /// JSON-encode for emission as the section payload. Compact (no - /// pretty-printing) so the on-disk overhead is bounded by the - /// number of functions; expected to be ~120 bytes per entry for - /// typical component_id lengths. - pub fn to_bytes(&self) -> Result, serde_json::Error> { - serde_json::to_vec(self) + /// Encode the canonical **binary `SCPV` v3** section payload + /// (scry#63). Little-endian; entries are length-prefixed so a + /// `no_std`/no-alloc consumer can bound-check without allocating. + /// + /// ```text + /// "SCPV" | u8 ver=3 | u8 bounded_memory | u8 closed_world + /// | sha256[32 raw] | u32 count + /// | { fused_idx:u32, id_len:u32, id:[u8;len], orig_idx:u32, + /// has_code_range:u8, [start:u32, end:u32] } * count + /// ``` + pub fn to_bytes(&self) -> Vec { + let mut b = Vec::with_capacity(43 + self.entries.len() * 24); + b.extend_from_slice(MAGIC); + b.push(VERSION as u8); + b.push(self.bounded_memory as u8); + b.push(self.closed_world as u8); + // sha256 hex string -> 32 raw bytes (zero-padded if malformed, + // which build() never produces). + let mut sha32 = [0u8; 32]; + if let Ok(raw) = hex::decode(&self.fused_module_sha256) { + let n = raw.len().min(32); + sha32[..n].copy_from_slice(&raw[..n]); + } + b.extend_from_slice(&sha32); + b.extend_from_slice(&(self.entries.len() as u32).to_le_bytes()); + for e in &self.entries { + b.extend_from_slice(&e.fused_func_idx.to_le_bytes()); + let id = e.component_id.as_bytes(); + b.extend_from_slice(&(id.len() as u32).to_le_bytes()); + b.extend_from_slice(id); + b.extend_from_slice(&e.originating_func_idx.to_le_bytes()); + match e.code_range { + Some(cr) => { + b.push(1); + b.extend_from_slice(&cr.start.to_le_bytes()); + b.extend_from_slice(&cr.end.to_le_bytes()); + } + None => b.push(0), + } + } + b + } + + /// Inverse of [`to_bytes`] — decode a binary `SCPV` v3 payload. + /// `Err` on bad magic, unsupported version, truncation, or invalid + /// UTF-8 in a `component_id`. + pub fn from_bytes(bytes: &[u8]) -> Result { + let mut p = 0usize; + let take = |b: &[u8], p: &mut usize, n: usize| -> Result, String> { + if *p + n > b.len() { + return Err(format!("SCPV truncated: need {n} at {p}, have {}", b.len())); + } + let s = b[*p..*p + n].to_vec(); + *p += n; + Ok(s) + }; + let u32le = |b: &[u8], p: &mut usize| -> Result { + let s = take(b, p, 4)?; + Ok(u32::from_le_bytes([s[0], s[1], s[2], s[3]])) + }; + if take(bytes, &mut p, 4)? != MAGIC { + return Err("SCPV bad magic".into()); + } + let ver = take(bytes, &mut p, 1)?[0] as u32; + if ver != VERSION { + return Err(format!( + "SCPV unsupported version {ver} (expected {VERSION})" + )); + } + let bounded_memory = take(bytes, &mut p, 1)?[0] != 0; + let closed_world = take(bytes, &mut p, 1)?[0] != 0; + let fused_module_sha256 = hex::encode(take(bytes, &mut p, 32)?); + let count = u32le(bytes, &mut p)? as usize; + let mut entries = Vec::with_capacity(count); + for _ in 0..count { + let fused_func_idx = u32le(bytes, &mut p)?; + let id_len = u32le(bytes, &mut p)? as usize; + let component_id = + String::from_utf8(take(bytes, &mut p, id_len)?).map_err(|_| "SCPV bad utf8")?; + let originating_func_idx = u32le(bytes, &mut p)?; + let code_range = if take(bytes, &mut p, 1)?[0] != 0 { + Some(CodeRange { + start: u32le(bytes, &mut p)?, + end: u32le(bytes, &mut p)?, + }) + } else { + None + }; + entries.push(Entry { + fused_func_idx, + component_id, + originating_func_idx, + code_range, + }); + } + Ok(ComponentProvenance { + version: ver, + bounded_memory, + closed_world, + fused_module_sha256, + entries, + }) } +} + +/// Host import namespaces — residual imports in these are the external +/// boundary, not unresolved inter-component edges, so they don't break +/// `closed_world`. Anything else is treated conservatively as a +/// surviving cross-component import (`closed_world = false`). +fn is_host_import_namespace(module: &str) -> bool { + module.starts_with("wasi") + || module == "env" + || module.starts_with("pulseengine:") + || module.starts_with("__") +} - /// Inverse of [`to_bytes`]. Returns `Err` on malformed JSON; the - /// caller is responsible for the `version` check. - pub fn from_bytes(bytes: &[u8]) -> Result { - serde_json::from_slice(bytes) +/// `closed_world` premise: every import in `module_bytes` is in a host +/// namespace (no surviving inter-component import edge). A module with +/// no imports is trivially closed. Conservative on unrecognised +/// namespaces (returns `false`) so the premise is never over-asserted. +pub fn fused_is_closed_world(module_bytes: &[u8]) -> bool { + for payload in wasmparser::Parser::new(0).parse_all(module_bytes) { + if let Ok(wasmparser::Payload::ImportSection(reader)) = payload { + for imp in reader.into_imports() { + match imp { + Ok(import) if !is_host_import_namespace(import.module) => return false, + Ok(_) => {} + Err(_) => return false, + } + } + } } + true } /// Compute the SHA-256 hex digest of the given bytes. Lower-case hex, @@ -213,6 +352,11 @@ pub fn build( ComponentProvenance { version: VERSION, + // Fusion premises (#313): both sound, computed from the fused + // bytes meld just produced. `bounded_memory` varies per input; + // `closed_world` is conservative (false on any non-host import). + bounded_memory: !crate::memory_probe::module_uses_memory_grow(fused_bytes_without_extras), + closed_world: fused_is_closed_world(fused_bytes_without_extras), fused_module_sha256: sha256_hex(fused_bytes_without_extras), entries, } @@ -222,10 +366,11 @@ pub fn build( mod tests { use super::*; - #[test] - fn round_trip_preserves_payload() { - let original = ComponentProvenance { + fn sample(bounded: bool, closed: bool) -> ComponentProvenance { + ComponentProvenance { version: VERSION, + bounded_memory: bounded, + closed_world: closed, fused_module_sha256: "deadbeef".repeat(8), entries: vec![ Entry { @@ -238,87 +383,74 @@ mod tests { fused_func_idx: 1, component_id: "db".into(), originating_func_idx: 7, - code_range: Some(CodeRange { - start: 42, - end: 100, - }), + code_range: None, }, ], - }; - let bytes = original.to_bytes().expect("serialize"); - let decoded = ComponentProvenance::from_bytes(&bytes).expect("deserialize"); - assert_eq!(original, decoded); + } } #[test] - fn v1_shaped_entry_deserializes_with_none_code_range() { - // A v1 producer emits entries without `code_range`. The v2 - // Entry struct must still deserialize them (serde default), - // yielding `None`. This pins backward-compat so a v2 meld can - // read a v1 section and a v2 consumer tolerates v1 entries. - let v1_json = br#"{"version":1,"fused_module_sha256":"00","entries":[ - {"fused_func_idx":0,"component_id":"auth","originating_func_idx":3} - ]}"#; - let decoded = ComponentProvenance::from_bytes(v1_json).expect("deserialize v1"); - assert_eq!(decoded.entries.len(), 1); - assert_eq!(decoded.entries[0].code_range, None); + fn scpv_v3_round_trip_preserves_payload() { + for (b, c) in [(true, true), (true, false), (false, true), (false, false)] { + let original = sample(b, c); + let decoded = ComponentProvenance::from_bytes(&original.to_bytes()).expect("decode"); + assert_eq!(original, decoded, "premises ({b},{c}) must round-trip"); + } } #[test] - fn code_range_omitted_from_json_when_none() { - // v1-shaped round-trip: an entry with no code_range must not - // emit a `code_range` key (skip_serializing_if), so a v2 meld - // producing a None entry is byte-compatible with v1 readers. - let cp = ComponentProvenance { - version: VERSION, - fused_module_sha256: "0".repeat(64), - entries: vec![Entry { - fused_func_idx: 0, - component_id: "x".into(), - originating_func_idx: 0, - code_range: None, - }], - }; - let json: serde_json::Value = - serde_json::from_slice(&cp.to_bytes().expect("serialize")).expect("parse json"); - assert!( - json["entries"][0].get("code_range").is_none(), - "code_range must be omitted when None; got {}", - json["entries"][0] - ); + fn scpv_v3_header_layout_pinned() { + // Pin the converged scry#63 byte layout so meld can't drift from + // scry's decoder: magic, version, the two premise bytes. + let bytes = sample(true, false).to_bytes(); + assert_eq!(&bytes[0..4], MAGIC, "magic 'SCPV'"); + assert_eq!(bytes[4], 3, "version byte = 3"); + assert_eq!(bytes[5], 1, "bounded_memory byte"); + assert_eq!(bytes[6], 0, "closed_world byte"); + // 32-byte sha then u32 LE count (2 entries). + assert_eq!(&bytes[39..43], &2u32.to_le_bytes(), "entry count"); } #[test] - fn from_bytes_rejects_malformed_json() { - assert!(ComponentProvenance::from_bytes(b"{not json}").is_err()); + fn from_bytes_rejects_bad_magic_version_and_truncation() { assert!(ComponentProvenance::from_bytes(b"").is_err()); + assert!( + ComponentProvenance::from_bytes(b"JSON{...}").is_err(), + "bad magic" + ); + let mut wrong_ver = sample(true, true).to_bytes(); + wrong_ver[4] = 2; // older/unknown version + assert!( + ComponentProvenance::from_bytes(&wrong_ver).is_err(), + "version" + ); + let full = sample(true, true).to_bytes(); + assert!( + ComponentProvenance::from_bytes(&full[..full.len() - 3]).is_err(), + "truncated entry" + ); } #[test] - fn version_field_present_in_serialized_output() { - // scry's consumer-side version check needs `version` to be a - // top-level integer key so it can be inspected without - // deserializing the entire payload. This pins that contract. - let cp = ComponentProvenance { - version: VERSION, - fused_module_sha256: "0".repeat(64), - entries: vec![], - }; - let json: serde_json::Value = - serde_json::from_slice(&cp.to_bytes().expect("serialize")).expect("parse json"); - assert_eq!(json["version"], serde_json::json!(VERSION)); - } - - #[test] - fn empty_entries_serializes_to_empty_array() { - let cp = ComponentProvenance { - version: VERSION, - fused_module_sha256: "0".repeat(64), - entries: vec![], - }; - let json: serde_json::Value = - serde_json::from_slice(&cp.to_bytes().expect("serialize")).expect("parse json"); - assert_eq!(json["entries"], serde_json::json!([])); + fn closed_world_host_namespace_classification() { + // Host namespaces don't break closed_world; an unrecognised + // (e.g. component-instance) namespace does, conservatively. + let host = wat::parse_str( + r#"(module (import "wasi_snapshot_preview1" "fd_write" + (func (param i32 i32 i32 i32) (result i32))))"#, + ) + .unwrap(); + assert!(fused_is_closed_world(&host), "wasi import is host → closed"); + let no_imports = wat::parse_str(r#"(module (func nop))"#).unwrap(); + assert!( + fused_is_closed_world(&no_imports), + "no imports → trivially closed" + ); + let cross = wat::parse_str(r#"(module (import "auth-component" "login" (func)))"#).unwrap(); + assert!( + !fused_is_closed_world(&cross), + "non-host import → not closed" + ); } #[test] diff --git a/meld-core/tests/component_provenance.rs b/meld-core/tests/component_provenance.rs index 0cae681..bc75da8 100644 --- a/meld-core/tests/component_provenance.rs +++ b/meld-core/tests/component_provenance.rs @@ -152,6 +152,42 @@ fn component_provenance_round_trips() { ); } +#[test] +fn v3_fusion_premises_present_on_real_fusion() { + // #313 / scry#63: the SCPV v3 section carries the fusion premises + // that feed scry's analysis. On a real wac-composed fusion the + // cross-component imports are internalised, so `closed_world` must + // hold; `bounded_memory` reflects whether the fused core grows its + // memory. Both must round-trip through the binary codec. + if !fixture_available() { + return; + } + let bytes = std::fs::read(FIXTURE).expect("read fixture"); + let fused = fuse_default(&bytes, "auth"); + + let payloads = read_custom_sections(&fused, SECTION_NAME); + let payload = payloads.first().expect("section present"); + // Binary SCPV magic — proves we emit the converged format, not JSON. + assert_eq!( + &payload[0..4], + b"SCPV", + "payload must be binary SCPV, not JSON" + ); + let prov = ComponentProvenance::from_bytes(payload).expect("decode SCPV v3"); + + assert!( + prov.closed_world, + "a fully wac-composed fusion internalises all cross-component imports → closed_world" + ); + // bounded_memory is input-dependent; assert it agrees with a direct + // memory.grow probe of the fused module (the premise's source). + let grows = meld_core::memory_probe::module_uses_memory_grow(&fused); + assert_eq!( + prov.bounded_memory, !grows, + "bounded_memory must equal !uses(memory.grow)" + ); +} + #[test] fn v2_code_ranges_are_populated_ordered_and_nonoverlapping() { // DWARF Phase 2 increment 1: every entry should carry a diff --git a/safety/requirements/safety-requirements.yaml b/safety/requirements/safety-requirements.yaml index c86d430..39350fb 100644 --- a/safety/requirements/safety-requirements.yaml +++ b/safety/requirements/safety-requirements.yaml @@ -1509,3 +1509,38 @@ artifacts: a test artifact. Partly gated on maintainer STPA judgment (host- controller scope; intended UCAs for the dangling refs — #303 Class B/C). milestone: v0.36.0 + + - id: SR-45 + type: sw-req + title: Fusion-premise emission for downstream specialization + description: > + meld shall emit, into the fused core module's `component-provenance` + custom section, the fusion-unique optimization premises that downstream + consumers (scry's sound abstract interpreter, then synth) cannot soundly + assume on their own — at minimum `bounded_memory` (no `memory.grow` in the + fused core) and `closed_world` (all cross-component import edges + internalised). The section uses the converged binary `SCPV` v3 wire format + (scry#63); meld is the producer, scry-provenance owns the format (DD-002). + meld shall NOT compute value ranges / constant args / dead params — those + are scry's abstract interpretation, fed by these premises (#313). + status: implemented + tags: [feature, provenance, specialization, v0.37.0] + links: + - type: derives-from + target: SYS-4 + cited-source: + - uri: "https://github.com/pulseengine/meld/issues/313" + kind: github + last-checked: 2026-06-26 + - uri: "https://github.com/pulseengine/scry/issues/63" + kind: github + last-checked: 2026-06-26 + fields: + implementation: + - meld-core/src/provenance.rs + verification-method: test + verification-description: > + SCPV v3 binary codec round-trips both premises (provenance unit tests); + a real wac-composed fusion emits `SCPV` magic with closed_world=true and + bounded_memory = !uses(memory.grow) + (component_provenance::v3_fusion_premises_present_on_real_fusion). diff --git a/safety/requirements/sw-verifications.yaml b/safety/requirements/sw-verifications.yaml index a249542..5746972 100644 --- a/safety/requirements/sw-verifications.yaml +++ b/safety/requirements/sw-verifications.yaml @@ -544,3 +544,18 @@ artifacts: links: - type: verifies target: SR-44 + - id: SWV-45 + type: sw-verification + title: "Verification of SR-45: Fusion-premise emission" + description: > + Verifies SR-45 via the SCPV v3 codec round-trip + (meld-core::provenance::tests::scpv_v3_round_trip_preserves_payload + + scpv_v3_header_layout_pinned + closed_world_host_namespace_classification) + and the end-to-end golden assertion on a real fusion + (meld-core/tests/component_provenance.rs::v3_fusion_premises_present_on_real_fusion). + status: implemented + fields: + method: automated-test + links: + - type: verifies + target: SR-45 From fdb196a39bfa6bc528e0914026ea7840124339a2 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Fri, 26 Jun 2026 06:09:40 +0200 Subject: [PATCH 2/4] fix(provenance): bound SCPV from_bytes entry-count allocation (Mythos #314) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mythos delta-pass finding on the new SCPV v3 decoder: `from_bytes` read the entry count as a bare untrusted wire u32 and called `Vec::with_capacity(count)` unbounded — a crafted `component-provenance` section with count=u32::MAX forces a ~190 GiB allocation that aborts the process on memory-constrained hosts (containers/CI cgroups). Fix: cap the pre-allocation at the maximum entries the remaining bytes could hold (min entry = 13 bytes); the loop's bounded `take` still errors cleanly on genuine truncation. Regression PoC: from_bytes_huge_count_does_not_overallocate (count=u32::MAX + no entry bytes → Err, not OOM). 10 provenance tests green, clippy clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- meld-core/src/provenance.rs | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/meld-core/src/provenance.rs b/meld-core/src/provenance.rs index 6f2dded..909aa96 100644 --- a/meld-core/src/provenance.rs +++ b/meld-core/src/provenance.rs @@ -206,7 +206,16 @@ impl ComponentProvenance { let closed_world = take(bytes, &mut p, 1)?[0] != 0; let fused_module_sha256 = hex::encode(take(bytes, &mut p, 32)?); let count = u32le(bytes, &mut p)? as usize; - let mut entries = Vec::with_capacity(count); + // Bound the pre-allocation by the bytes actually remaining: the + // count is an untrusted wire u32, so a crafted section with + // count = u32::MAX must NOT trigger a ~190 GiB `with_capacity` + // and abort the process (DoS on memory-constrained hosts). + // Smallest possible entry is 13 bytes (fused_idx 4 + id_len 4 + + // id 0 + orig_idx 4 + has_code_range 1); the loop's bounded + // `take` still errors on genuine truncation. + const MIN_ENTRY_BYTES: usize = 13; + let max_possible = bytes.len().saturating_sub(p) / MIN_ENTRY_BYTES; + let mut entries = Vec::with_capacity(count.min(max_possible)); for _ in 0..count { let fused_func_idx = u32le(bytes, &mut p)?; let id_len = u32le(bytes, &mut p)? as usize; @@ -453,6 +462,27 @@ mod tests { ); } + #[test] + fn from_bytes_huge_count_does_not_overallocate() { + // Mythos finding (#314): a crafted SCPV v3 section with + // count = u32::MAX must NOT pre-allocate ~190 GiB and abort the + // process — `from_bytes` bounds with_capacity by the bytes + // remaining, then errors cleanly on the truncated first entry. + let mut buf = Vec::new(); + buf.extend_from_slice(MAGIC); + buf.push(VERSION as u8); + buf.push(0); // bounded_memory + buf.push(0); // closed_world + buf.extend_from_slice(&[0u8; 32]); // sha256 + buf.extend_from_slice(&u32::MAX.to_le_bytes()); // count = 4.3e9 + // No entry bytes follow. + let r = ComponentProvenance::from_bytes(&buf); + assert!( + r.is_err(), + "huge count with no entry bytes must Err (truncation), not OOM" + ); + } + #[test] fn sha256_hex_is_lowercase_64_chars() { let h = sha256_hex(b"hello world"); From db28f783e30bc3cf2b95c47bc9dc21bc8b1bbc64 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Fri, 26 Jun 2026 06:42:51 +0200 Subject: [PATCH 3/4] fix(provenance): precise host-namespace check for closed_world (Mythos #314) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second Mythos delta-pass finding: `is_host_import_namespace` used `starts_with("wasi")`, an over-broad prefix classifying a cross-component namespace like `wasi_auth_component` as host — so `fused_is_closed_world` returned true even when a real inter-component import edge survived, OVER- ASSERTING the `closed_world` premise (unsound; scry relies on it). Fix: precise host matching — exact `wasi_snapshot_preview1`/`wasi_unstable`/`env` + reserved `wasi:` and `pulseengine:async` prefixes; else non-host ⇒ closed_world stays conservatively false. Regression: a `wasi`-prefixed component namespace is non-host; `wasi:io/streams` stays host. 10 provenance tests green, clippy clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- meld-core/src/provenance.rs | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/meld-core/src/provenance.rs b/meld-core/src/provenance.rs index 909aa96..3f267f6 100644 --- a/meld-core/src/provenance.rs +++ b/meld-core/src/provenance.rs @@ -252,10 +252,17 @@ impl ComponentProvenance { /// `closed_world`. Anything else is treated conservatively as a /// surviving cross-component import (`closed_world = false`). fn is_host_import_namespace(module: &str) -> bool { - module.starts_with("wasi") - || module == "env" - || module.starts_with("pulseengine:") - || module.starts_with("__") + // PRECISE host namespaces only. `closed_world` must never be + // over-asserted: a surviving cross-component import misread as host + // would make the premise UNSOUND for scry's abstract interpretation. + // So we match exact known host module names + reserved interface + // prefixes, and classify anything else as non-host (⇒ closed_world + // stays conservatively false). NB a bare `starts_with("wasi")` was + // over-broad — it swallowed component namespaces like + // `wasi_auth_component` (Mythos #314). + matches!(module, "wasi_snapshot_preview1" | "wasi_unstable" | "env") + || module.starts_with("wasi:") // WASI preview2 interfaces (reserved ns) + || module.starts_with("pulseengine:async") // meld/host async intrinsics (reserved ns) } /// `closed_world` premise: every import in `module_bytes` is in a host @@ -460,6 +467,23 @@ mod tests { !fused_is_closed_world(&cross), "non-host import → not closed" ); + // Mythos #314: a component namespace that merely *starts with* + // "wasi" must NOT be misread as a host interface — that would + // over-assert closed_world (unsound). Precise matching only. + let wasi_prefixed_component = + wat::parse_str(r#"(module (import "wasi_auth_component" "login" (func)))"#).unwrap(); + assert!( + !fused_is_closed_world(&wasi_prefixed_component), + "a 'wasi'-prefixed component namespace is NOT host → not closed" + ); + // The genuine WASI preview2 colon form stays host. + let wasi_p2 = + wat::parse_str(r#"(module (import "wasi:io/streams" "blocking-flush" (func)))"#) + .unwrap(); + assert!( + fused_is_closed_world(&wasi_p2), + "wasi: interface is host → closed" + ); } #[test] From 9b07b1ec6e90416ef2fa5b4aeb609e8b75432bac Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Fri, 26 Jun 2026 07:01:32 +0200 Subject: [PATCH 4/4] =?UTF-8?q?fix(provenance):=20make=20closed=5Fworld=20?= =?UTF-8?q?provably=20sound=20=E2=80=94=20zero=20imports=20(Mythos=20#314)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Third Mythos finding flagged `env` as another non-spec-reserved host namespace; the deeper issue is that ANY host allowlist is an over-assertion risk for a soundness premise (no namespace string is spec-guaranteed host). Stop patching the allowlist and make closed_world provably sound: true iff the fused module has zero imports (a tautology — no imports ⇒ no import edge ⇒ no inter-component escape — that cannot be over-asserted). Conservative (a module with host/WASI imports reports false); precise host-aware classification needs meld's resolution-state and is a follow-up — the SCPV v3 field already carries it. bounded_memory unchanged. 10 provenance unit tests + the real-fusion golden assertion (closed_world == zero-imports) green, clippy clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- meld-core/src/provenance.rs | 120 ++++++++++-------------- meld-core/tests/component_provenance.rs | 15 +-- 2 files changed, 59 insertions(+), 76 deletions(-) diff --git a/meld-core/src/provenance.rs b/meld-core/src/provenance.rs index 3f267f6..1b2602d 100644 --- a/meld-core/src/provenance.rs +++ b/meld-core/src/provenance.rs @@ -122,11 +122,13 @@ pub struct ComponentProvenance { /// memory and drop grow-reachability widening. Sound, varies per /// input (computed by [`crate::memory_probe::module_uses_memory_grow`]). pub bounded_memory: bool, - /// **Fusion premise (#313):** every cross-component import edge has - /// been internalised — the fused module has no residual import in a - /// non-host namespace. Lets scry tighten `reachable_from_exports` - /// and assume no inter-component call escapes through an import. - /// Conservative: an unrecognised import namespace yields `false`. + /// **Fusion premise (#313):** the fused module has **zero imports**, + /// so no inter-component call can escape through an import — provably + /// closed. Lets scry tighten `reachable_from_exports`. Computed by + /// [`fused_is_closed_world`]; deliberately conservative (a module + /// with host imports reports `false`) because no import namespace is + /// spec-guaranteed host, and this premise must never be over-asserted + /// (scry treats it as a soundness assumption). See [`fused_is_closed_world`]. pub closed_world: bool, pub fused_module_sha256: String, pub entries: Vec, @@ -247,38 +249,30 @@ impl ComponentProvenance { } } -/// Host import namespaces — residual imports in these are the external -/// boundary, not unresolved inter-component edges, so they don't break -/// `closed_world`. Anything else is treated conservatively as a -/// surviving cross-component import (`closed_world = false`). -fn is_host_import_namespace(module: &str) -> bool { - // PRECISE host namespaces only. `closed_world` must never be - // over-asserted: a surviving cross-component import misread as host - // would make the premise UNSOUND for scry's abstract interpretation. - // So we match exact known host module names + reserved interface - // prefixes, and classify anything else as non-host (⇒ closed_world - // stays conservatively false). NB a bare `starts_with("wasi")` was - // over-broad — it swallowed component namespaces like - // `wasi_auth_component` (Mythos #314). - matches!(module, "wasi_snapshot_preview1" | "wasi_unstable" | "env") - || module.starts_with("wasi:") // WASI preview2 interfaces (reserved ns) - || module.starts_with("pulseengine:async") // meld/host async intrinsics (reserved ns) -} - -/// `closed_world` premise: every import in `module_bytes` is in a host -/// namespace (no surviving inter-component import edge). A module with -/// no imports is trivially closed. Conservative on unrecognised -/// namespaces (returns `false`) so the premise is never over-asserted. +/// `closed_world` premise: **provably** true iff the fused module has +/// **zero imports**. +/// +/// This is a *tautology* — no imports ⇒ no import edge of any kind ⇒ no +/// surviving inter-component import — so it can never be over-asserted, +/// which is the one thing this premise must never do (scry treats it as +/// a soundness assumption; a false-positive would silently disable its +/// inter-component reachability widening). +/// +/// We deliberately do NOT try to classify imports as "host" vs +/// "cross-component" by namespace: no namespace string (`env`, `wasi…`, +/// …) is *spec-guaranteed* to be host, so any allowlist is an +/// over-assertion risk (Mythos #314 found three escalating cases). The +/// result is conservative — a module with host (e.g. WASI) imports +/// reports `false` even though it is closed in the inter-component +/// sense. Tightening it requires meld's resolution-state (which imports +/// were internalised vs deliberately kept external), tracked as a +/// follow-up; the wire format already carries the field. pub fn fused_is_closed_world(module_bytes: &[u8]) -> bool { for payload in wasmparser::Parser::new(0).parse_all(module_bytes) { - if let Ok(wasmparser::Payload::ImportSection(reader)) = payload { - for imp in reader.into_imports() { - match imp { - Ok(import) if !is_host_import_namespace(import.module) => return false, - Ok(_) => {} - Err(_) => return false, - } - } + match payload { + Ok(wasmparser::Payload::ImportSection(reader)) if reader.count() > 0 => return false, + Ok(_) => {} + Err(_) => return false, } } true @@ -448,42 +442,28 @@ mod tests { } #[test] - fn closed_world_host_namespace_classification() { - // Host namespaces don't break closed_world; an unrecognised - // (e.g. component-instance) namespace does, conservatively. - let host = wat::parse_str( - r#"(module (import "wasi_snapshot_preview1" "fd_write" - (func (param i32 i32 i32 i32) (result i32))))"#, - ) - .unwrap(); - assert!(fused_is_closed_world(&host), "wasi import is host → closed"); + fn closed_world_is_provably_no_imports() { + // closed_world must NEVER be over-asserted (scry soundness). The + // only definition we can prove sound without classifying import + // namespaces is "zero imports". Mythos #314 escalated through + // three over-broad host allowlists (wasi-prefix, then env); the + // tautology ends the cycle. let no_imports = wat::parse_str(r#"(module (func nop))"#).unwrap(); - assert!( - fused_is_closed_world(&no_imports), - "no imports → trivially closed" - ); - let cross = wat::parse_str(r#"(module (import "auth-component" "login" (func)))"#).unwrap(); - assert!( - !fused_is_closed_world(&cross), - "non-host import → not closed" - ); - // Mythos #314: a component namespace that merely *starts with* - // "wasi" must NOT be misread as a host interface — that would - // over-assert closed_world (unsound). Precise matching only. - let wasi_prefixed_component = - wat::parse_str(r#"(module (import "wasi_auth_component" "login" (func)))"#).unwrap(); - assert!( - !fused_is_closed_world(&wasi_prefixed_component), - "a 'wasi'-prefixed component namespace is NOT host → not closed" - ); - // The genuine WASI preview2 colon form stays host. - let wasi_p2 = - wat::parse_str(r#"(module (import "wasi:io/streams" "blocking-flush" (func)))"#) - .unwrap(); - assert!( - fused_is_closed_world(&wasi_p2), - "wasi: interface is host → closed" - ); + assert!(fused_is_closed_world(&no_imports), "no imports → closed"); + // ANY import — even a genuine WASI host import — conservatively + // yields false (sound: never claims closed when an edge exists). + for m in [ + r#"(module (import "wasi_snapshot_preview1" "fd_write" (func (param i32 i32 i32 i32) (result i32))))"#, + r#"(module (import "wasi:io/streams" "blocking-flush" (func)))"#, + r#"(module (import "env" "memory" (memory 1)))"#, + r#"(module (import "auth-component" "login" (func)))"#, + ] { + let w = wat::parse_str(m).unwrap(); + assert!( + !fused_is_closed_world(&w), + "any import ⇒ conservatively not closed: {m}" + ); + } } #[test] diff --git a/meld-core/tests/component_provenance.rs b/meld-core/tests/component_provenance.rs index bc75da8..39b696e 100644 --- a/meld-core/tests/component_provenance.rs +++ b/meld-core/tests/component_provenance.rs @@ -175,17 +175,20 @@ fn v3_fusion_premises_present_on_real_fusion() { ); let prov = ComponentProvenance::from_bytes(payload).expect("decode SCPV v3"); - assert!( - prov.closed_world, - "a fully wac-composed fusion internalises all cross-component imports → closed_world" - ); - // bounded_memory is input-dependent; assert it agrees with a direct - // memory.grow probe of the fused module (the premise's source). + // Both premises must agree with an independent probe of the fused + // module (the premises' sources) — sound and input-independent. let grows = meld_core::memory_probe::module_uses_memory_grow(&fused); assert_eq!( prov.bounded_memory, !grows, "bounded_memory must equal !uses(memory.grow)" ); + let has_imports = wasmparser::Parser::new(0) + .parse_all(&fused) + .any(|p| matches!(p, Ok(wasmparser::Payload::ImportSection(r)) if r.count() > 0)); + assert_eq!( + prov.closed_world, !has_imports, + "closed_world must equal (fused module has zero imports)" + ); } #[test]