From 1e8d119658fed10af7c25ee5f1a660243446a692 Mon Sep 17 00:00:00 2001 From: cds-amal Date: Wed, 4 Mar 2026 00:45:47 -0500 Subject: [PATCH 1/2] feat(printer): typestate enforcement for post-mono Body invariant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wrap Body in a Phased generic wrapper parameterized by phase markers Raw and Mono. Phase 1+2 code works with Item (bodies may contain ConstantKind::Unevaluated); the phase 2→3 boundary validates each body via UnevaluatedChecker (a MirVisitor that panics on any remaining unevaluated constant) before promoting to Item. This makes the invariant that was previously implicit (all unevaluated constants resolved by the fixpoint loop) into a compile-time guarantee: phase 3 code structurally receives validated data. Phased serializes transparently (delegates to inner T), so JSON output is unchanged. --- src/mk_graph/output/d2.rs | 2 +- src/mk_graph/output/dot.rs | 5 +- src/printer/collect.rs | 17 ++++--- src/printer/items.rs | 14 +++--- src/printer/mod.rs | 7 ++- src/printer/phased.rs | 97 ++++++++++++++++++++++++++++++++++++++ src/printer/schema.rs | 76 +++++++++++++++++++++-------- 7 files changed, 182 insertions(+), 36 deletions(-) create mode 100644 src/printer/phased.rs diff --git a/src/mk_graph/output/d2.rs b/src/mk_graph/output/d2.rs index c3ccc491..9eb79a3d 100644 --- a/src/mk_graph/output/d2.rs +++ b/src/mk_graph/output/d2.rs @@ -23,7 +23,7 @@ impl SmirJson { for item in self.items { match item.mono_item_kind { MonoItemKind::MonoItemFn { name, body, .. } => { - render_d2_function(&name, body.as_ref(), &ctx, &mut output); + render_d2_function(&name, body.as_ref().map(|b| b.inner()), &ctx, &mut output); } MonoItemKind::MonoItemGlobalAsm { asm } => { render_d2_asm(&asm, &mut output); diff --git a/src/mk_graph/output/dot.rs b/src/mk_graph/output/dot.rs index 78f03478..38ce80b8 100644 --- a/src/mk_graph/output/dot.rs +++ b/src/mk_graph/output/dot.rs @@ -68,6 +68,7 @@ impl SmirJson { for item in self.items { match item.mono_item_kind { MonoItemKind::MonoItemFn { name, body, id: _ } => { + let body = body.as_ref().map(|b| b.inner()); let mut c = graph.cluster(); c.set_label(&name_lines(&name)); c.set_style(Style::Filled); @@ -81,7 +82,7 @@ impl SmirJson { let mut local_node = c.node_auto(); let mut vector: Vec = vec![]; vector.push(String::from("LOCALS")); - for (index, decl) in body.clone().unwrap().local_decls() { + for (index, decl) in body.cloned().unwrap().local_decls() { let ty_with_layout = ctx.render_type_with_layout(decl.ty); vector.push(format!("{index} = {}", ty_with_layout)); } @@ -287,7 +288,7 @@ impl SmirJson { } }; - if let Some(ref body) = body { + if let Some(body) = body { add_call_edges(&mut graph, 0, &body.blocks); } } diff --git a/src/printer/collect.rs b/src/printer/collect.rs index c8089e6e..399a818f 100644 --- a/src/printer/collect.rs +++ b/src/printer/collect.rs @@ -24,6 +24,7 @@ use stable_mir::CrateDef; use super::items::{get_foreign_module_details, mk_item}; use super::mir_visitor::{maybe_add_to_link_map, BodyAnalyzer, UnevalConstInfo}; +use super::phased::Raw; use super::schema::{ AllocInfo, AllocMap, CollectedCrate, DerivedInfo, Item, LinkMap, SmirJson, SmirJsonDebugInfo, SpanMap, @@ -53,7 +54,7 @@ fn warn_missing_body(mono_item: &MonoItem) { } } -fn collect_items(tcx: TyCtxt<'_>) -> HashMap { +fn collect_items(tcx: TyCtxt<'_>) -> HashMap)> { // get initial set of mono_items let items = mono_collect(tcx); items @@ -73,7 +74,7 @@ fn enqueue_unevaluated_consts( tcx: TyCtxt<'_>, discovered: Vec, known_names: &mut HashSet, - pending: &mut HashMap, + pending: &mut HashMap)>, unevaluated_consts: &mut HashMap, ) { for info in discovered { @@ -98,7 +99,7 @@ fn enqueue_unevaluated_consts( /// this phase, then dropped; only the `Item` survives into `CollectedCrate`. fn collect_and_analyze_items( tcx: TyCtxt<'_>, - initial_items: HashMap, + initial_items: HashMap)>, ) -> (CollectedCrate, DerivedInfo) { let mut calls_map: LinkMap = HashMap::new(); let mut visited_allocs = AllocMap::new(); @@ -107,8 +108,8 @@ fn collect_and_analyze_items( let mut unevaluated_consts: HashMap = HashMap::new(); let mut known_names: HashSet = initial_items.keys().cloned().collect(); - let mut pending: HashMap = initial_items; - let mut all_items: Vec = Vec::new(); + let mut pending: HashMap)> = initial_items; + let mut all_items: Vec> = Vec::new(); while let Some((_name, (mono_item, item))) = take_any(&mut pending) { maybe_add_to_link_map(tcx, &mono_item, &mut calls_map); @@ -142,9 +143,13 @@ fn collect_and_analyze_items( all_items.push(item); } + // Phase 2 -> 3 boundary: validate that all bodies are free of + // ConstantKind::Unevaluated before handing them to phase 3. + let validated_items: Vec<_> = all_items.into_iter().map(|i| i.validate()).collect(); + ( CollectedCrate { - items: all_items, + items: validated_items, unevaluated_consts, }, DerivedInfo { diff --git a/src/printer/items.rs b/src/printer/items.rs index a253ad2c..3ebbd58a 100644 --- a/src/printer/items.rs +++ b/src/printer/items.rs @@ -22,21 +22,23 @@ use stable_mir::{CrateDef, CrateItem}; use crate::compat::bridge::mono_instance; +use super::phased::{Phased, Raw}; use super::schema::{BodyDetails, ForeignItem, ForeignModule, GenericData, Item, ItemDetails}; #[derive(Serialize, Clone)] -pub enum MonoItemKind { +#[serde(bound = "")] +pub enum MonoItemKind { MonoItemFn { name: String, id: stable_mir::DefId, - body: Option, + body: Option>, }, MonoItemStatic { name: String, id: stable_mir::DefId, allocation: Option, #[serde(skip)] - body: Option, + body: Option>, }, MonoItemGlobalAsm { asm: String, @@ -73,7 +75,7 @@ fn get_item_details( } } -pub(super) fn mk_item(tcx: TyCtxt<'_>, item: MonoItem, sym_name: String) -> (MonoItem, Item) { +pub(super) fn mk_item(tcx: TyCtxt<'_>, item: MonoItem, sym_name: String) -> (MonoItem, Item) { match item { MonoItem::Fn(inst) => { let id = inst.def.def_id(); @@ -89,7 +91,7 @@ pub(super) fn mk_item(tcx: TyCtxt<'_>, item: MonoItem, sym_name: String) -> (Mon MonoItemKind::MonoItemFn { name: name.clone(), id, - body, + body: body.map(Phased::new), }, details, ), @@ -118,7 +120,7 @@ pub(super) fn mk_item(tcx: TyCtxt<'_>, item: MonoItem, sym_name: String) -> (Mon name: static_def.name(), id: static_def.def_id(), allocation: alloc, - body, + body: body.map(Phased::new), }, get_item_details(tcx, internal_id, None, None), ), diff --git a/src/printer/mod.rs b/src/printer/mod.rs index f251e27f..22478cca 100644 --- a/src/printer/mod.rs +++ b/src/printer/mod.rs @@ -9,8 +9,9 @@ //! | Module | Responsibility | //! |--------|----------------| //! | [`schema`] | Data model types ([`SmirJson`], [`Item`], [`AllocInfo`], etc.) and type aliases; [`Item`] deliberately excludes `MonoItem` for structural phase separation | -//! | [`collect`] | Three-phase pipeline: collect items, analyze bodies, assemble final output; phase boundary is enforced structurally via the `(MonoItem, Item)` split | -//! | [`items`] | Constructing `(MonoItem, Item)` pairs and extracting debug-level details | +//! | [`collect`] | Three-phase pipeline: collect items, analyze bodies, assemble final output; phase boundary is enforced structurally via the `(MonoItem, Item)` split and [`phased::Phased`] body wrapper | +//! | [`items`] | Constructing `(MonoItem, Item)` pairs and extracting debug-level details | +//! | [`phased`] | [`Phased`](phased::Phased) typestate wrapper encoding the post-monomorphization body invariant | //! | [`mir_visitor`] | `BodyAnalyzer`: single-pass MIR body traversal collecting calls, allocs, types, spans | //! | [`ty_visitor`] | `TyCollector`: recursively collects reachable types with layout info (some special kinds are traversed but not stored) | //! | [`link_map`] | Function resolution map: type + instance kind to symbol name | @@ -49,6 +50,7 @@ mod collect; mod items; mod link_map; mod mir_visitor; +mod phased; mod schema; mod ty_visitor; mod types; @@ -57,6 +59,7 @@ mod util; // Re-exports preserving the public API pub use collect::collect_smir; pub use items::MonoItemKind; +pub use phased::Mono; pub use schema::{AllocInfo, FnSymType, Item, LinkMapKey, SmirJson, TypeMetadata}; pub(crate) use util::hash; diff --git a/src/printer/phased.rs b/src/printer/phased.rs new file mode 100644 index 00000000..4dce38a6 --- /dev/null +++ b/src/printer/phased.rs @@ -0,0 +1,97 @@ +//! Typestate wrapper encoding the post-monomorphization body invariant. +//! +//! After the collection pipeline's fixpoint loop resolves all unevaluated +//! constants, bodies stored in [`Item`](super::schema::Item) should contain +//! no `ConstantKind::Unevaluated`. [`Phased`] encodes this guarantee +//! in the type system: phase 1+2 code works with `Phased`, and +//! the phase 2 -> 3 boundary validates each body (via [`UnevaluatedChecker`]) +//! before producing `Phased`. +//! +//! [`Phased`] serializes transparently (delegates to the inner `T`), so +//! JSON output is unchanged. + +use std::marker::PhantomData; + +use crate::compat::serde; +use crate::compat::stable_mir; + +use serde::{Serialize, Serializer}; +use stable_mir::mir::visit::MirVisitor; +use stable_mir::mir::Body; + +/// Pre-validation phase marker. Bodies may still contain `ConstantKind::Unevaluated`. +pub struct Raw; + +/// Post-validation phase marker. Bodies are guaranteed free of `ConstantKind::Unevaluated`. +pub struct Mono; + +/// A value of type `T` tagged with a phase marker `S`. +/// +/// Construction of `Phased` is unrestricted; `Phased` can +/// only be obtained through [`Phased::::validate`], which walks +/// the body and panics if any unevaluated constant is found. +pub struct Phased(T, PhantomData); + +impl Phased { + /// Borrow the inner value. + pub fn inner(&self) -> &T { + &self.0 + } + + /// Consume the wrapper, returning the inner value. + pub fn into_inner(self) -> T { + self.0 + } +} + +impl Clone for Phased { + fn clone(&self) -> Self { + Phased(self.0.clone(), PhantomData) + } +} + +impl Serialize for Phased { + fn serialize(&self, serializer: Ser) -> Result + where + Ser: Serializer, + { + self.0.serialize(serializer) + } +} + +impl Phased { + /// Wrap a body in the `Raw` phase. Unrestricted. + pub fn new(body: Body) -> Self { + Phased(body, PhantomData) + } + + /// Validate that the body contains no `ConstantKind::Unevaluated`, then + /// promote to the `Mono` phase. Panics if the invariant is violated. + pub fn validate(self) -> Phased { + UnevaluatedChecker.visit_body(&self.0); + Phased(self.0, PhantomData) + } +} + +/// MIR visitor that panics if any `ConstantKind::Unevaluated` is encountered. +/// +/// Used by [`Phased::::validate`] to enforce the post-mono invariant +/// at the phase 2 -> 3 boundary. +struct UnevaluatedChecker; + +impl MirVisitor for UnevaluatedChecker { + fn visit_mir_const( + &mut self, + constant: &stable_mir::ty::MirConst, + loc: stable_mir::mir::visit::Location, + ) { + if let stable_mir::ty::ConstantKind::Unevaluated(_) = constant.kind() { + panic!( + "Phased::validate: body contains ConstantKind::Unevaluated \ + at location {loc:?}; all unevaluated constants should have been \ + resolved by the fixpoint loop before phase 3" + ); + } + self.super_mir_const(constant, loc); + } +} diff --git a/src/printer/schema.rs b/src/printer/schema.rs index d75da9ba..fdf631d5 100644 --- a/src/printer/schema.rs +++ b/src/printer/schema.rs @@ -11,6 +11,7 @@ use crate::compat::stable_mir; use std::collections::{HashMap, HashSet}; use super::items::MonoItemKind; +use super::phased::{Mono, Raw}; use serde::{Serialize, Serializer}; use stable_mir::abi::LayoutShape; use stable_mir::mir::alloc::{AllocId, GlobalAlloc}; @@ -95,17 +96,17 @@ impl AllocMap { /// Items (which is exactly the bug that the declarative pipeline /// restructuring was designed to prevent). #[cfg(debug_assertions)] - pub fn verify_coherence(&self, items: &[Item]) { + pub fn verify_coherence(&self, items: &[Item]) { // Collect alloc ids referenced in stored bodies let mut body_ids: HashSet = HashSet::new(); for item in items { let body = match &item.mono_item_kind { MonoItemKind::MonoItemFn { body: Some(body), .. - } => Some(body), + } => Some(body.inner()), MonoItemKind::MonoItemStatic { body: Some(body), .. - } => Some(body), + } => Some(body.inner()), _ => None, }; if let Some(body) = body { @@ -279,16 +280,17 @@ pub(super) struct ForeignModule { /// The `MonoItem` lives alongside the `Item` in the phase 1+2 work queue /// and is dropped before assembly begins. #[derive(Serialize, Clone)] -pub struct Item { +#[serde(bound = "")] +pub struct Item { pub symbol_name: String, - pub mono_item_kind: MonoItemKind, + pub mono_item_kind: MonoItemKind, details: Option, } -impl Item { +impl Item { pub(super) fn new( symbol_name: String, - mono_item_kind: MonoItemKind, + mono_item_kind: MonoItemKind, details: Option, ) -> Self { Item { @@ -304,32 +306,68 @@ impl Item { match &self.mono_item_kind { MonoItemKind::MonoItemFn { body: Some(body), .. - } => Some((body, body.locals())), + } => Some((body.inner(), body.inner().locals())), MonoItemKind::MonoItemStatic { body: Some(body), .. - } => Some((body, &[])), + } => Some((body.inner(), &[])), _ => None, } } } -impl PartialEq for Item { - fn eq(&self, other: &Item) -> bool { +impl Item { + /// Validate all bodies in this item, promoting from `Raw` to `Mono` phase. + pub(super) fn validate(self) -> Item { + Item { + symbol_name: self.symbol_name, + mono_item_kind: self.mono_item_kind.validate(), + details: self.details, + } + } +} + +impl MonoItemKind { + /// Validate the body (if present), promoting from `Raw` to `Mono` phase. + fn validate(self) -> MonoItemKind { + match self { + MonoItemKind::MonoItemFn { name, id, body } => MonoItemKind::MonoItemFn { + name, + id, + body: body.map(|b| b.validate()), + }, + MonoItemKind::MonoItemStatic { + name, + id, + allocation, + body, + } => MonoItemKind::MonoItemStatic { + name, + id, + allocation, + body: body.map(|b| b.validate()), + }, + MonoItemKind::MonoItemGlobalAsm { asm } => MonoItemKind::MonoItemGlobalAsm { asm }, + } + } +} + +impl PartialEq for Item { + fn eq(&self, other: &Item) -> bool { self.cmp(other) == std::cmp::Ordering::Equal } } -impl Eq for Item {} +impl Eq for Item {} -impl PartialOrd for Item { - fn partial_cmp(&self, other: &Item) -> Option { +impl PartialOrd for Item { + fn partial_cmp(&self, other: &Item) -> Option { Some(self.cmp(other)) } } -impl Ord for Item { - fn cmp(&self, other: &Item) -> std::cmp::Ordering { +impl Ord for Item { + fn cmp(&self, other: &Item) -> std::cmp::Ordering { use MonoItemKind::*; - let sort_key = |i: &Item| { + let sort_key = |i: &Item| { format!( "{}!{}", i.symbol_name, @@ -462,7 +500,7 @@ pub struct SmirJson { pub allocs: Vec, pub functions: Vec<(LinkMapKey, FnSymType)>, pub uneval_consts: Vec<(ConstDef, String)>, - pub items: Vec, + pub items: Vec>, pub types: Vec<(stable_mir::ty::Ty, TypeMetadata)>, pub spans: Vec<(usize, SourceData)>, pub debug: Option, @@ -486,7 +524,7 @@ pub struct SmirJsonDebugInfo { /// Contains only [`Item`] values (no `MonoItem`), so code that operates on a /// `CollectedCrate` structurally cannot call `inst.body()` or re-enter rustc. pub(super) struct CollectedCrate { - pub items: Vec, + pub items: Vec>, pub unevaluated_consts: HashMap, } From 6568a2557af8407114c5d199a177214c662dc4c7 Mon Sep 17 00:00:00 2001 From: cds-amal Date: Wed, 4 Mar 2026 01:20:24 -0500 Subject: [PATCH 2/2] fix(phased): add actionable guidance to validation panic message The UnevaluatedChecker panic now explains what happened (the fixpoint loop didn't resolve a surviving unevaluated constant), why it matters (the input program likely triggers an unhandled code path), and what the user should do (file an issue with the triggering .rs file). --- src/printer/phased.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/printer/phased.rs b/src/printer/phased.rs index 4dce38a6..784d0a85 100644 --- a/src/printer/phased.rs +++ b/src/printer/phased.rs @@ -88,8 +88,14 @@ impl MirVisitor for UnevaluatedChecker { if let stable_mir::ty::ConstantKind::Unevaluated(_) = constant.kind() { panic!( "Phased::validate: body contains ConstantKind::Unevaluated \ - at location {loc:?}; all unevaluated constants should have been \ - resolved by the fixpoint loop before phase 3" + at location {loc:?}.\n\n\ + The fixpoint loop in collect_and_analyze_items should have resolved \ + all unevaluated constants before phase 3, but this one survived. \ + The input program likely triggers a code path we haven't encountered yet.\n\n\ + Action: please file an issue at \ + https://github.com/runtimeverification/stable-mir-json/issues \ + with the .rs file that triggered this panic so we can add it as a \ + test case and extend the fixpoint loop to handle it." ); } self.super_mir_const(constant, loc);