diff --git a/src/mk_graph/output/d2.rs b/src/mk_graph/output/d2.rs index c3ccc491..9eb79a3d 100644 --- a/src/mk_graph/output/d2.rs +++ b/src/mk_graph/output/d2.rs @@ -23,7 +23,7 @@ impl SmirJson { for item in self.items { match item.mono_item_kind { MonoItemKind::MonoItemFn { name, body, .. } => { - render_d2_function(&name, body.as_ref(), &ctx, &mut output); + render_d2_function(&name, body.as_ref().map(|b| b.inner()), &ctx, &mut output); } MonoItemKind::MonoItemGlobalAsm { asm } => { render_d2_asm(&asm, &mut output); diff --git a/src/mk_graph/output/dot.rs b/src/mk_graph/output/dot.rs index 78f03478..38ce80b8 100644 --- a/src/mk_graph/output/dot.rs +++ b/src/mk_graph/output/dot.rs @@ -68,6 +68,7 @@ impl SmirJson { for item in self.items { match item.mono_item_kind { MonoItemKind::MonoItemFn { name, body, id: _ } => { + let body = body.as_ref().map(|b| b.inner()); let mut c = graph.cluster(); c.set_label(&name_lines(&name)); c.set_style(Style::Filled); @@ -81,7 +82,7 @@ impl SmirJson { let mut local_node = c.node_auto(); let mut vector: Vec = vec![]; vector.push(String::from("LOCALS")); - for (index, decl) in body.clone().unwrap().local_decls() { + for (index, decl) in body.cloned().unwrap().local_decls() { let ty_with_layout = ctx.render_type_with_layout(decl.ty); vector.push(format!("{index} = {}", ty_with_layout)); } @@ -287,7 +288,7 @@ impl SmirJson { } }; - if let Some(ref body) = body { + if let Some(body) = body { add_call_edges(&mut graph, 0, &body.blocks); } } diff --git a/src/printer/collect.rs b/src/printer/collect.rs index c8089e6e..399a818f 100644 --- a/src/printer/collect.rs +++ b/src/printer/collect.rs @@ -24,6 +24,7 @@ use stable_mir::CrateDef; use super::items::{get_foreign_module_details, mk_item}; use super::mir_visitor::{maybe_add_to_link_map, BodyAnalyzer, UnevalConstInfo}; +use super::phased::Raw; use super::schema::{ AllocInfo, AllocMap, CollectedCrate, DerivedInfo, Item, LinkMap, SmirJson, SmirJsonDebugInfo, SpanMap, @@ -53,7 +54,7 @@ fn warn_missing_body(mono_item: &MonoItem) { } } -fn collect_items(tcx: TyCtxt<'_>) -> HashMap { +fn collect_items(tcx: TyCtxt<'_>) -> HashMap)> { // get initial set of mono_items let items = mono_collect(tcx); items @@ -73,7 +74,7 @@ fn enqueue_unevaluated_consts( tcx: TyCtxt<'_>, discovered: Vec, known_names: &mut HashSet, - pending: &mut HashMap, + pending: &mut HashMap)>, unevaluated_consts: &mut HashMap, ) { for info in discovered { @@ -98,7 +99,7 @@ fn enqueue_unevaluated_consts( /// this phase, then dropped; only the `Item` survives into `CollectedCrate`. fn collect_and_analyze_items( tcx: TyCtxt<'_>, - initial_items: HashMap, + initial_items: HashMap)>, ) -> (CollectedCrate, DerivedInfo) { let mut calls_map: LinkMap = HashMap::new(); let mut visited_allocs = AllocMap::new(); @@ -107,8 +108,8 @@ fn collect_and_analyze_items( let mut unevaluated_consts: HashMap = HashMap::new(); let mut known_names: HashSet = initial_items.keys().cloned().collect(); - let mut pending: HashMap = initial_items; - let mut all_items: Vec = Vec::new(); + let mut pending: HashMap)> = initial_items; + let mut all_items: Vec> = Vec::new(); while let Some((_name, (mono_item, item))) = take_any(&mut pending) { maybe_add_to_link_map(tcx, &mono_item, &mut calls_map); @@ -142,9 +143,13 @@ fn collect_and_analyze_items( all_items.push(item); } + // Phase 2 -> 3 boundary: validate that all bodies are free of + // ConstantKind::Unevaluated before handing them to phase 3. + let validated_items: Vec<_> = all_items.into_iter().map(|i| i.validate()).collect(); + ( CollectedCrate { - items: all_items, + items: validated_items, unevaluated_consts, }, DerivedInfo { diff --git a/src/printer/items.rs b/src/printer/items.rs index a253ad2c..3ebbd58a 100644 --- a/src/printer/items.rs +++ b/src/printer/items.rs @@ -22,21 +22,23 @@ use stable_mir::{CrateDef, CrateItem}; use crate::compat::bridge::mono_instance; +use super::phased::{Phased, Raw}; use super::schema::{BodyDetails, ForeignItem, ForeignModule, GenericData, Item, ItemDetails}; #[derive(Serialize, Clone)] -pub enum MonoItemKind { +#[serde(bound = "")] +pub enum MonoItemKind { MonoItemFn { name: String, id: stable_mir::DefId, - body: Option, + body: Option>, }, MonoItemStatic { name: String, id: stable_mir::DefId, allocation: Option, #[serde(skip)] - body: Option, + body: Option>, }, MonoItemGlobalAsm { asm: String, @@ -73,7 +75,7 @@ fn get_item_details( } } -pub(super) fn mk_item(tcx: TyCtxt<'_>, item: MonoItem, sym_name: String) -> (MonoItem, Item) { +pub(super) fn mk_item(tcx: TyCtxt<'_>, item: MonoItem, sym_name: String) -> (MonoItem, Item) { match item { MonoItem::Fn(inst) => { let id = inst.def.def_id(); @@ -89,7 +91,7 @@ pub(super) fn mk_item(tcx: TyCtxt<'_>, item: MonoItem, sym_name: String) -> (Mon MonoItemKind::MonoItemFn { name: name.clone(), id, - body, + body: body.map(Phased::new), }, details, ), @@ -118,7 +120,7 @@ pub(super) fn mk_item(tcx: TyCtxt<'_>, item: MonoItem, sym_name: String) -> (Mon name: static_def.name(), id: static_def.def_id(), allocation: alloc, - body, + body: body.map(Phased::new), }, get_item_details(tcx, internal_id, None, None), ), diff --git a/src/printer/mod.rs b/src/printer/mod.rs index f251e27f..22478cca 100644 --- a/src/printer/mod.rs +++ b/src/printer/mod.rs @@ -9,8 +9,9 @@ //! | Module | Responsibility | //! |--------|----------------| //! | [`schema`] | Data model types ([`SmirJson`], [`Item`], [`AllocInfo`], etc.) and type aliases; [`Item`] deliberately excludes `MonoItem` for structural phase separation | -//! | [`collect`] | Three-phase pipeline: collect items, analyze bodies, assemble final output; phase boundary is enforced structurally via the `(MonoItem, Item)` split | -//! | [`items`] | Constructing `(MonoItem, Item)` pairs and extracting debug-level details | +//! | [`collect`] | Three-phase pipeline: collect items, analyze bodies, assemble final output; phase boundary is enforced structurally via the `(MonoItem, Item)` split and [`phased::Phased`] body wrapper | +//! | [`items`] | Constructing `(MonoItem, Item)` pairs and extracting debug-level details | +//! | [`phased`] | [`Phased`](phased::Phased) typestate wrapper encoding the post-monomorphization body invariant | //! | [`mir_visitor`] | `BodyAnalyzer`: single-pass MIR body traversal collecting calls, allocs, types, spans | //! | [`ty_visitor`] | `TyCollector`: recursively collects reachable types with layout info (some special kinds are traversed but not stored) | //! | [`link_map`] | Function resolution map: type + instance kind to symbol name | @@ -49,6 +50,7 @@ mod collect; mod items; mod link_map; mod mir_visitor; +mod phased; mod schema; mod ty_visitor; mod types; @@ -57,6 +59,7 @@ mod util; // Re-exports preserving the public API pub use collect::collect_smir; pub use items::MonoItemKind; +pub use phased::Mono; pub use schema::{AllocInfo, FnSymType, Item, LinkMapKey, SmirJson, TypeMetadata}; pub(crate) use util::hash; diff --git a/src/printer/phased.rs b/src/printer/phased.rs new file mode 100644 index 00000000..784d0a85 --- /dev/null +++ b/src/printer/phased.rs @@ -0,0 +1,103 @@ +//! Typestate wrapper encoding the post-monomorphization body invariant. +//! +//! After the collection pipeline's fixpoint loop resolves all unevaluated +//! constants, bodies stored in [`Item`](super::schema::Item) should contain +//! no `ConstantKind::Unevaluated`. [`Phased`] encodes this guarantee +//! in the type system: phase 1+2 code works with `Phased`, and +//! the phase 2 -> 3 boundary validates each body (via [`UnevaluatedChecker`]) +//! before producing `Phased`. +//! +//! [`Phased`] serializes transparently (delegates to the inner `T`), so +//! JSON output is unchanged. + +use std::marker::PhantomData; + +use crate::compat::serde; +use crate::compat::stable_mir; + +use serde::{Serialize, Serializer}; +use stable_mir::mir::visit::MirVisitor; +use stable_mir::mir::Body; + +/// Pre-validation phase marker. Bodies may still contain `ConstantKind::Unevaluated`. +pub struct Raw; + +/// Post-validation phase marker. Bodies are guaranteed free of `ConstantKind::Unevaluated`. +pub struct Mono; + +/// A value of type `T` tagged with a phase marker `S`. +/// +/// Construction of `Phased` is unrestricted; `Phased` can +/// only be obtained through [`Phased::::validate`], which walks +/// the body and panics if any unevaluated constant is found. +pub struct Phased(T, PhantomData); + +impl Phased { + /// Borrow the inner value. + pub fn inner(&self) -> &T { + &self.0 + } + + /// Consume the wrapper, returning the inner value. + pub fn into_inner(self) -> T { + self.0 + } +} + +impl Clone for Phased { + fn clone(&self) -> Self { + Phased(self.0.clone(), PhantomData) + } +} + +impl Serialize for Phased { + fn serialize(&self, serializer: Ser) -> Result + where + Ser: Serializer, + { + self.0.serialize(serializer) + } +} + +impl Phased { + /// Wrap a body in the `Raw` phase. Unrestricted. + pub fn new(body: Body) -> Self { + Phased(body, PhantomData) + } + + /// Validate that the body contains no `ConstantKind::Unevaluated`, then + /// promote to the `Mono` phase. Panics if the invariant is violated. + pub fn validate(self) -> Phased { + UnevaluatedChecker.visit_body(&self.0); + Phased(self.0, PhantomData) + } +} + +/// MIR visitor that panics if any `ConstantKind::Unevaluated` is encountered. +/// +/// Used by [`Phased::::validate`] to enforce the post-mono invariant +/// at the phase 2 -> 3 boundary. +struct UnevaluatedChecker; + +impl MirVisitor for UnevaluatedChecker { + fn visit_mir_const( + &mut self, + constant: &stable_mir::ty::MirConst, + loc: stable_mir::mir::visit::Location, + ) { + if let stable_mir::ty::ConstantKind::Unevaluated(_) = constant.kind() { + panic!( + "Phased::validate: body contains ConstantKind::Unevaluated \ + at location {loc:?}.\n\n\ + The fixpoint loop in collect_and_analyze_items should have resolved \ + all unevaluated constants before phase 3, but this one survived. \ + The input program likely triggers a code path we haven't encountered yet.\n\n\ + Action: please file an issue at \ + https://github.com/runtimeverification/stable-mir-json/issues \ + with the .rs file that triggered this panic so we can add it as a \ + test case and extend the fixpoint loop to handle it." + ); + } + self.super_mir_const(constant, loc); + } +} diff --git a/src/printer/schema.rs b/src/printer/schema.rs index d75da9ba..fdf631d5 100644 --- a/src/printer/schema.rs +++ b/src/printer/schema.rs @@ -11,6 +11,7 @@ use crate::compat::stable_mir; use std::collections::{HashMap, HashSet}; use super::items::MonoItemKind; +use super::phased::{Mono, Raw}; use serde::{Serialize, Serializer}; use stable_mir::abi::LayoutShape; use stable_mir::mir::alloc::{AllocId, GlobalAlloc}; @@ -95,17 +96,17 @@ impl AllocMap { /// Items (which is exactly the bug that the declarative pipeline /// restructuring was designed to prevent). #[cfg(debug_assertions)] - pub fn verify_coherence(&self, items: &[Item]) { + pub fn verify_coherence(&self, items: &[Item]) { // Collect alloc ids referenced in stored bodies let mut body_ids: HashSet = HashSet::new(); for item in items { let body = match &item.mono_item_kind { MonoItemKind::MonoItemFn { body: Some(body), .. - } => Some(body), + } => Some(body.inner()), MonoItemKind::MonoItemStatic { body: Some(body), .. - } => Some(body), + } => Some(body.inner()), _ => None, }; if let Some(body) = body { @@ -279,16 +280,17 @@ pub(super) struct ForeignModule { /// The `MonoItem` lives alongside the `Item` in the phase 1+2 work queue /// and is dropped before assembly begins. #[derive(Serialize, Clone)] -pub struct Item { +#[serde(bound = "")] +pub struct Item { pub symbol_name: String, - pub mono_item_kind: MonoItemKind, + pub mono_item_kind: MonoItemKind, details: Option, } -impl Item { +impl Item { pub(super) fn new( symbol_name: String, - mono_item_kind: MonoItemKind, + mono_item_kind: MonoItemKind, details: Option, ) -> Self { Item { @@ -304,32 +306,68 @@ impl Item { match &self.mono_item_kind { MonoItemKind::MonoItemFn { body: Some(body), .. - } => Some((body, body.locals())), + } => Some((body.inner(), body.inner().locals())), MonoItemKind::MonoItemStatic { body: Some(body), .. - } => Some((body, &[])), + } => Some((body.inner(), &[])), _ => None, } } } -impl PartialEq for Item { - fn eq(&self, other: &Item) -> bool { +impl Item { + /// Validate all bodies in this item, promoting from `Raw` to `Mono` phase. + pub(super) fn validate(self) -> Item { + Item { + symbol_name: self.symbol_name, + mono_item_kind: self.mono_item_kind.validate(), + details: self.details, + } + } +} + +impl MonoItemKind { + /// Validate the body (if present), promoting from `Raw` to `Mono` phase. + fn validate(self) -> MonoItemKind { + match self { + MonoItemKind::MonoItemFn { name, id, body } => MonoItemKind::MonoItemFn { + name, + id, + body: body.map(|b| b.validate()), + }, + MonoItemKind::MonoItemStatic { + name, + id, + allocation, + body, + } => MonoItemKind::MonoItemStatic { + name, + id, + allocation, + body: body.map(|b| b.validate()), + }, + MonoItemKind::MonoItemGlobalAsm { asm } => MonoItemKind::MonoItemGlobalAsm { asm }, + } + } +} + +impl PartialEq for Item { + fn eq(&self, other: &Item) -> bool { self.cmp(other) == std::cmp::Ordering::Equal } } -impl Eq for Item {} +impl Eq for Item {} -impl PartialOrd for Item { - fn partial_cmp(&self, other: &Item) -> Option { +impl PartialOrd for Item { + fn partial_cmp(&self, other: &Item) -> Option { Some(self.cmp(other)) } } -impl Ord for Item { - fn cmp(&self, other: &Item) -> std::cmp::Ordering { +impl Ord for Item { + fn cmp(&self, other: &Item) -> std::cmp::Ordering { use MonoItemKind::*; - let sort_key = |i: &Item| { + let sort_key = |i: &Item| { format!( "{}!{}", i.symbol_name, @@ -462,7 +500,7 @@ pub struct SmirJson { pub allocs: Vec, pub functions: Vec<(LinkMapKey, FnSymType)>, pub uneval_consts: Vec<(ConstDef, String)>, - pub items: Vec, + pub items: Vec>, pub types: Vec<(stable_mir::ty::Ty, TypeMetadata)>, pub spans: Vec<(usize, SourceData)>, pub debug: Option, @@ -486,7 +524,7 @@ pub struct SmirJsonDebugInfo { /// Contains only [`Item`] values (no `MonoItem`), so code that operates on a /// `CollectedCrate` structurally cannot call `inst.body()` or re-enter rustc. pub(super) struct CollectedCrate { - pub items: Vec, + pub items: Vec>, pub unevaluated_consts: HashMap, }