From eb137e063c404bf7d644a94c48500e7e528e57b1 Mon Sep 17 00:00:00 2001 From: Alberto Guevara Date: Mon, 20 Apr 2026 15:50:06 -0300 Subject: [PATCH 1/4] feat: Add .envrc file to enable flake support --- .envrc | 1 + 1 file changed, 1 insertion(+) create mode 100644 .envrc diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake From e335882d2a4d3fe4f440e33587361b4e1a86b9ea Mon Sep 17 00:00:00 2001 From: Alberto Guevara Date: Tue, 21 Apr 2026 00:31:59 -0300 Subject: [PATCH 2/4] feat: Add built-in unit testing framework (Project 8) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements test "name" { stmts } top-level blocks, assert expr; statements, and a --test CLI mode that runs all tests with PASS/FAIL output and a summary. Co-authored-by: Fábio Miranda <130818362+Fpmiranda7@users.noreply.github.com> Co-authored-by: Davi Guerreiro <114326602+DaviGGuerreiro@users.noreply.github.com> --- src/interpreter/exec_stmt.rs | 10 ++++ src/interpreter/mod.rs | 45 +++++++++++--- src/ir/ast.rs | 20 ++++++- src/main.rs | 9 ++- src/parser/identifiers.rs | 4 +- src/parser/program.rs | 89 ++++++++++++++++++++++----- src/parser/statements.rs | 11 +++- src/semantic/type_checker.rs | 67 ++++++++++++++++----- tests/fixtures/test_framework.minic | 12 ++++ tests/interpreter.rs | 93 ++++++++++++++++++++++++++++- tests/type_checker.rs | 42 +++++++++++++ 11 files changed, 358 insertions(+), 44 deletions(-) create mode 100644 tests/fixtures/test_framework.minic diff --git a/src/interpreter/exec_stmt.rs b/src/interpreter/exec_stmt.rs index ceeda2c..63c4934 100644 --- a/src/interpreter/exec_stmt.rs +++ b/src/interpreter/exec_stmt.rs @@ -119,6 +119,16 @@ pub fn exec_stmt(stmt: &CheckedStmt, env: &mut Environment) -> ExecResult } Statement::Return(None) => Ok(Some(Value::Void)), + // --- Assert --- + Statement::Assert(expr) => match eval_expr(expr, env)? { + Value::Bool(true) => Ok(None), + Value::Bool(false) => Err(RuntimeError::new("assertion failed")), + v => Err(RuntimeError::new(format!( + "assert requires bool, got: {}", + v + ))), + }, + // --- Statement-level function call --- Statement::Call { name, args } => { let arg_vals: Result, RuntimeError> = diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs index 08b93c1..21c71b3 100644 --- a/src/interpreter/mod.rs +++ b/src/interpreter/mod.rs @@ -61,25 +61,56 @@ use crate::stdlib::NativeRegistry; use eval_expr::eval_call; use value::{FnValue, RuntimeError, Value}; -/// Interpret a type-checked MiniC program, starting execution at `main`. -pub fn interpret(program: &CheckedProgram) -> Result<(), RuntimeError> { +fn build_env(program: &CheckedProgram) -> Environment { let mut env = Environment::::new(); - - // Register native stdlib functions as Value::Fn(FnValue::Native) bindings. let registry = NativeRegistry::default(); for (name, entry) in registry.iter() { env.declare(name.clone(), Value::Fn(FnValue::Native(entry.func))); } - - // Register user-defined functions as Value::Fn(FnValue::UserDefined) bindings. for fun in &program.functions { env.declare(fun.name.clone(), Value::Fn(FnValue::UserDefined(fun.clone()))); } + env +} +/// Interpret a type-checked MiniC program, starting execution at `main`. +pub fn interpret(program: &CheckedProgram) -> Result<(), RuntimeError> { + let mut env = build_env(program); if env.get("main").is_none() { return Err(RuntimeError::new("no 'main' function found")); } - eval_call("main", vec![], &mut env)?; Ok(()) } + +/// Run all test blocks in a program. Prints PASS/FAIL per test and a summary. +/// Returns `Ok(())` if every test passed, `Err` if any failed. +pub fn run_tests(program: &CheckedProgram) -> Result<(), RuntimeError> { + use exec_stmt::exec_stmt; + + let mut passed = 0usize; + let mut failed = 0usize; + + for test in &program.tests { + let mut env = build_env(program); + match exec_stmt(&test.body, &mut env) { + Ok(_) => { + println!("PASS {}", test.name); + passed += 1; + } + Err(e) => { + println!("FAIL {} — {}", test.name, e.message); + failed += 1; + } + } + } + + let total = passed + failed; + println!("{} / {} tests passed", passed, total); + + if failed > 0 { + Err(RuntimeError::new(format!("{} test(s) failed", failed))) + } else { + Ok(()) + } +} diff --git a/src/ir/ast.rs b/src/ir/ast.rs index 5f57b24..f4359a6 100644 --- a/src/ir/ast.rs +++ b/src/ir/ast.rs @@ -9,9 +9,10 @@ //! * [`Literal`] — a constant value written directly in source code. //! * [`Expr`] / [`ExprD`] — expressions (arithmetic, comparisons, calls, …). //! * [`Statement`] / [`StatementD`] — statements (declarations, assignments, -//! `if`, `while`, `return`, blocks). +//! `if`, `while`, `return`, `assert`, blocks). //! * [`FunDecl`] — a single function declaration with its body. -//! * [`Program`] — the top-level container: a list of function declarations. +//! * [`TestDecl`] — a named test block with a body. +//! * [`Program`] — the top-level container: function declarations and test blocks. //! //! Convenience type aliases pin the `Ty` parameter to either `()` or `Type`: //! `UncheckedExpr`, `CheckedExpr`, `UncheckedProgram`, `CheckedProgram`, etc. @@ -151,6 +152,8 @@ pub enum Statement { }, /// Return statement: `return [expr]`. Return(Option>>), + /// Assertion: `assert expr ;`. Fails at runtime if expr evaluates to false. + Assert(Box>), } /// A typed parameter: (name, type). @@ -165,10 +168,19 @@ pub struct FunDecl { pub body: Box>, } -/// A complete MiniC program: function declarations only. Execution starts at `main`. +/// A named test block: `test "name" { stmts }`. +#[derive(Debug, Clone, PartialEq)] +pub struct TestDecl { + pub name: String, + pub body: Box>, +} + +/// A complete MiniC program: function declarations and test blocks. +/// Execution starts at `main` (--run mode) or runs all tests (--test mode). #[derive(Debug, Clone, PartialEq)] pub struct Program { pub functions: Vec>, + pub tests: Vec>, } // Type synonyms for checked and unchecked phases. @@ -178,5 +190,7 @@ pub type UncheckedStmt = StatementD<()>; pub type CheckedStmt = StatementD; pub type UncheckedFunDecl = FunDecl<()>; pub type CheckedFunDecl = FunDecl; +pub type UncheckedTestDecl = TestDecl<()>; +pub type CheckedTestDecl = TestDecl; pub type UncheckedProgram = Program<()>; pub type CheckedProgram = Program; diff --git a/src/main.rs b/src/main.rs index 12514af..3712343 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,11 @@ use std::{env, fs, process}; -use mini_c::{interpreter::interpret, parser::program, semantic::type_check}; +use mini_c::{interpreter::{interpret, run_tests}, parser::program, semantic::type_check}; fn usage() -> ! { eprintln!("Usage: minic --check "); eprintln!(" minic --run "); + eprintln!(" minic --test "); process::exit(1); } @@ -58,6 +59,12 @@ fn main() { process::exit(1); } } + "--test" => { + if let Err(e) = run_tests(&checked) { + eprintln!("{}", e); + process::exit(1); + } + } // Task 1.1: unknown flag _ => usage(), } diff --git a/src/parser/identifiers.rs b/src/parser/identifiers.rs index f6bdecb..cd92ba2 100644 --- a/src/parser/identifiers.rs +++ b/src/parser/identifiers.rs @@ -28,7 +28,9 @@ use nom::{ }; /// Reserved words: boolean literals and type names. -const RESERVED: &[&str] = &["true", "false", "int", "float", "bool", "str", "void", "return"]; +const RESERVED: &[&str] = &[ + "true", "false", "int", "float", "bool", "str", "void", "return", "assert", "test", +]; /// Parse an identifier (variable name). /// Must start with letter or underscore; subsequent chars may be letter, digit, or underscore. diff --git a/src/parser/program.rs b/src/parser/program.rs index f91be58..87ce506 100644 --- a/src/parser/program.rs +++ b/src/parser/program.rs @@ -5,15 +5,12 @@ //! Exposes one public function: //! //! * [`program`] — parses a complete MiniC program as a sequence of zero or -//! more function declarations and returns an +//! more function declarations and test blocks, returning an //! [`UncheckedProgram`]. //! -//! A valid MiniC program contains **only** function declarations at the top -//! level — there are no top-level statements or variable declarations outside -//! of functions. This constraint is enforced here by the grammar: `program` -//! is defined as `many0(fun_decl)`, so any token that does not start a -//! function declaration causes the parse to stop. The type checker then -//! verifies that a `main` function exists. +//! A valid MiniC program contains only function declarations and `test` blocks +//! at the top level. The type checker verifies that a `main` function exists +//! (required in `--run` mode). //! //! # Design Decisions //! @@ -21,17 +18,77 @@ //! //! `nom`'s `many0` combinator repeatedly applies a parser until it fails, //! collecting results in a `Vec`. Using it here means the program parser -//! naturally handles empty programs (zero functions) and programs with any -//! number of functions with no extra branching logic. The existence of -//! `main` is a semantic constraint checked in the next pipeline stage, not -//! a syntactic one enforced here. +//! naturally handles empty programs and programs with any number of +//! top-level items with no extra branching logic. -use crate::ir::ast::{Program, UncheckedProgram}; +use crate::ir::ast::{Program, TestDecl, UncheckedProgram, UncheckedTestDecl}; use crate::parser::functions::fun_decl; -use nom::{combinator::map, multi::many0, IResult}; +use crate::parser::statements::statement; +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::{char, multispace0}, + combinator::map, + multi::many0, + sequence::{delimited, preceded}, + IResult, +}; -/// Parse a complete MiniC program: zero or more function declarations. -/// Execution starts at the `main` function (validated by the type checker). +enum TopItem { + Fun(crate::ir::ast::UncheckedFunDecl), + Test(UncheckedTestDecl), +} + +fn test_decl(input: &str) -> IResult<&str, UncheckedTestDecl> { + let (rest, _) = preceded(multispace0, tag("test"))(input)?; + let (rest, name) = preceded( + multispace0, + delimited(char('"'), nom::bytes::complete::take_while(|c| c != '"'), char('"')), + )(rest)?; + let (rest, body) = preceded( + multispace0, + map( + delimited( + preceded(multispace0, char('{')), + many0(statement), + preceded(multispace0, char('}')), + ), + |seq| crate::ir::ast::StatementD { + stmt: crate::ir::ast::Statement::Block { seq }, + ty: (), + }, + ), + )(rest)?; + Ok(( + rest, + TestDecl { + name: name.to_string(), + body: Box::new(body), + }, + )) +} + +fn top_item(input: &str) -> IResult<&str, TopItem> { + preceded( + multispace0, + alt(( + map(test_decl, TopItem::Test), + map(fun_decl, TopItem::Fun), + )), + )(input) +} + +/// Parse a complete MiniC program: zero or more function declarations and test blocks. pub fn program(input: &str) -> IResult<&str, UncheckedProgram> { - map(many0(fun_decl), |functions| Program { functions })(input) + map(many0(top_item), |items| { + let mut functions = Vec::new(); + let mut tests = Vec::new(); + for item in items { + match item { + TopItem::Fun(f) => functions.push(f), + TopItem::Test(t) => tests.push(t), + } + } + Program { functions, tests } + })(input) } diff --git a/src/parser/statements.rs b/src/parser/statements.rs index 9dcfef5..735ce7d 100644 --- a/src/parser/statements.rs +++ b/src/parser/statements.rs @@ -58,7 +58,7 @@ fn wrap(s: Statement<()>) -> UncheckedStmt { StatementD { stmt: s, ty: () } } -/// Parse any statement: block | if | while | return | decl | call | assignment. +/// Parse any statement: block | if | while | return | assert | decl | call | assignment. pub fn statement(input: &str) -> IResult<&str, UncheckedStmt> { preceded( multispace0, @@ -67,6 +67,7 @@ pub fn statement(input: &str) -> IResult<&str, UncheckedStmt> { if_statement, while_statement, return_statement, + assert_statement, decl_statement, call_statement, assignment, @@ -74,6 +75,14 @@ pub fn statement(input: &str) -> IResult<&str, UncheckedStmt> { )(input) } +/// Parse an assert statement: `assert expr ;`. +fn assert_statement(input: &str) -> IResult<&str, UncheckedStmt> { + let (rest, _) = preceded(multispace0, tag("assert"))(input)?; + let (rest, expr) = preceded(multispace0, expression)(rest)?; + let (rest, _) = preceded(multispace0, char(';'))(rest)?; + Ok((rest, wrap(Statement::Assert(Box::new(expr))))) +} + /// Parse a return statement: `return [expr] ;`. fn return_statement(input: &str) -> IResult<&str, UncheckedStmt> { let (rest, _) = preceded(multispace0, tag("return"))(input)?; diff --git a/src/semantic/type_checker.rs b/src/semantic/type_checker.rs index 46681cf..2869a0b 100644 --- a/src/semantic/type_checker.rs +++ b/src/semantic/type_checker.rs @@ -48,9 +48,9 @@ use std::collections::HashMap; use crate::environment::Environment; use crate::ir::ast::{ - CheckedExpr, CheckedFunDecl, CheckedProgram, CheckedStmt, Expr, ExprD, FunDecl, Literal, - Program, Statement, StatementD, Type, UncheckedExpr, UncheckedFunDecl, UncheckedProgram, - UncheckedStmt, + CheckedExpr, CheckedFunDecl, CheckedProgram, CheckedStmt, CheckedTestDecl, Expr, ExprD, + FunDecl, Literal, Program, Statement, StatementD, TestDecl, Type, UncheckedExpr, + UncheckedFunDecl, UncheckedProgram, UncheckedStmt, UncheckedTestDecl, }; use crate::stdlib::NativeRegistry; @@ -79,16 +79,25 @@ impl std::error::Error for TypeError {} /// Type-check a program. Returns `Ok(CheckedProgram)` if well-typed, `Err(TypeError)` on first error. /// Requires a `main` function with signature `void main()`. pub fn type_check(program: &UncheckedProgram) -> Result { - let main_fn = program.functions.iter().find(|f| f.name == "main"); - match main_fn { - None => return Err(TypeError::new("program must have a main function")), - Some(f) => { - if f.return_type != Type::Unit { - return Err(TypeError::new("main function must return void")); - } - if !f.params.is_empty() { - return Err(TypeError::new("main function must have no parameters")); - } + // In --test mode a program may have no main function, but if main exists it must be void(). + if let Some(f) = program.functions.iter().find(|f| f.name == "main") { + if f.return_type != Type::Unit { + return Err(TypeError::new("main function must return void")); + } + if !f.params.is_empty() { + return Err(TypeError::new("main function must have no parameters")); + } + } + // Require main when there are no test blocks (pure --run programs). + if program.tests.is_empty() && !program.functions.iter().any(|f| f.name == "main") { + return Err(TypeError::new("program must have a main function")); + } + + // Detect duplicate test names. + let mut seen_tests = std::collections::HashSet::new(); + for t in &program.tests { + if !seen_tests.insert(&t.name) { + return Err(TypeError::new(format!("duplicate test name: \"{}\"", t.name))); } } @@ -117,7 +126,27 @@ pub fn type_check(program: &UncheckedProgram) -> Result, + fn_snapshot: &HashMap, +) -> Result { + env.restore(fn_snapshot.clone()); + let body = type_check_stmt(&t.body, env, &Type::Unit)?; + Ok(TestDecl { + name: t.name.clone(), + body: Box::new(body), + }) } fn type_check_fun_decl( @@ -232,6 +261,16 @@ fn type_check_stmt( body: Box::new(body_checked), } } + Statement::Assert(expr) => { + let checked = type_check_expr_to_typed(expr, env)?; + if checked.ty != Type::Bool { + return Err(TypeError::new(format!( + "assert requires Bool, got {:?}", + checked.ty + ))); + } + Statement::Assert(Box::new(checked)) + } Statement::Return(expr) => match expr { None => { if *expected_return != Type::Unit { diff --git a/tests/fixtures/test_framework.minic b/tests/fixtures/test_framework.minic new file mode 100644 index 0000000..4e9a50f --- /dev/null +++ b/tests/fixtures/test_framework.minic @@ -0,0 +1,12 @@ +int add(int a, int b) { return a + b; } + +test "addition is correct" { + assert add(2, 3) == 5; + assert add(0, 0) == 0; +} + +test "comparison" { + int x = 10; + assert x > 5; + assert x == 10; +} diff --git a/tests/interpreter.rs b/tests/interpreter.rs index 51696c9..3c1d770 100644 --- a/tests/interpreter.rs +++ b/tests/interpreter.rs @@ -1,4 +1,4 @@ -use mini_c::{interpreter::interpret, parser::program, semantic::type_check}; +use mini_c::{interpreter::{interpret, run_tests}, parser::program, semantic::type_check}; /// Parse, type-check, and interpret a MiniC source string. fn run(src: &str) -> Result<(), String> { @@ -9,6 +9,15 @@ fn run(src: &str) -> Result<(), String> { interpret(&checked).map_err(|e| format!("runtime error: {}", e.message)) } +/// Parse, type-check, and run tests in a MiniC source string. +fn run_tests_str(src: &str) -> Result<(), String> { + let unchecked = program(src) + .map_err(|e| format!("parse error: {:?}", e)) + .map(|(_, p)| p)?; + let checked = type_check(&unchecked).map_err(|e| format!("type error: {}", e.message))?; + run_tests(&checked).map_err(|e| format!("runtime error: {}", e.message)) +} + // --------------------------------------------------------------------------- // 7.2 Empty main // --------------------------------------------------------------------------- @@ -256,3 +265,85 @@ fn test_stdlib_pow_float_args() { "#; assert!(run(src).is_ok(), "{}", run(src).unwrap_err()); } + +// --------------------------------------------------------------------------- +// 8. Built-in test framework +// --------------------------------------------------------------------------- + +#[test] +fn test_assert_true_passes() { + let src = r#" + void main() { assert true; } + "#; + assert!(run(src).is_ok(), "{}", run(src).unwrap_err()); +} + +#[test] +fn test_assert_false_fails() { + let src = r#" + void main() { assert false; } + "#; + let result = run(src); + assert!(result.is_err(), "expected assert to fail"); + assert!(result.unwrap_err().contains("assertion failed")); +} + +#[test] +fn test_assert_expression() { + let src = r#" + void main() { assert 1 + 1 == 2; } + "#; + assert!(run(src).is_ok(), "{}", run(src).unwrap_err()); +} + +#[test] +fn test_test_block_passes() { + let src = r#" + int add(int a, int b) { return a + b; } + test "addition" { + assert add(2, 3) == 5; + } + "#; + assert!(run_tests_str(src).is_ok(), "{}", run_tests_str(src).unwrap_err()); +} + +#[test] +fn test_test_block_fails() { + let src = r#" + test "bad" { + assert 1 == 2; + } + "#; + assert!(run_tests_str(src).is_err(), "expected test failure"); +} + +#[test] +fn test_multiple_tests_partial_failure() { + let src = r#" + test "ok" { assert true; } + test "fail" { assert false; } + "#; + let result = run_tests_str(src); + assert!(result.is_err(), "expected failure summary"); +} + +#[test] +fn test_multiple_tests_all_pass() { + let src = r#" + test "one" { assert 1 == 1; } + test "two" { assert 2 + 2 == 4; } + "#; + assert!(run_tests_str(src).is_ok(), "{}", run_tests_str(src).unwrap_err()); +} + +#[test] +fn test_test_block_with_variables() { + let src = r#" + test "vars" { + int x = 10; + int y = 20; + assert x + y == 30; + } + "#; + assert!(run_tests_str(src).is_ok(), "{}", run_tests_str(src).unwrap_err()); +} diff --git a/tests/type_checker.rs b/tests/type_checker.rs index 3357161..ef17e05 100644 --- a/tests/type_checker.rs +++ b/tests/type_checker.rs @@ -202,3 +202,45 @@ fn test_type_check_print_wrong_arity() { let result = parse_and_type_check("void main() { print(1, 2); }"); assert!(result.is_err(), "expected arity error for print(1, 2)"); } + +// --------------------------------------------------------------------------- +// 8. Built-in test framework +// --------------------------------------------------------------------------- + +#[test] +fn test_type_check_assert_bool_ok() { + assert!(parse_and_type_check("void main() { assert true; }").is_ok()); +} + +#[test] +fn test_type_check_assert_non_bool_err() { + let result = parse_and_type_check("void main() { assert 42; }"); + assert!(result.is_err()); + assert!(result.unwrap_err().message.contains("Bool")); +} + +#[test] +fn test_type_check_test_block_ok() { + let result = parse_and_type_check(r#"test "t" { assert true; }"#); + assert!(result.is_ok()); +} + +#[test] +fn test_type_check_test_block_no_main_required() { + // A file with only test blocks (no main) should pass type checking. + let result = parse_and_type_check(r#"test "t" { assert 1 == 1; }"#); + assert!(result.is_ok(), "{}", result.unwrap_err().message); +} + +#[test] +fn test_type_check_duplicate_test_name_err() { + let result = parse_and_type_check(r#"test "foo" { assert true; } test "foo" { assert false; }"#); + assert!(result.is_err()); + assert!(result.unwrap_err().message.contains("duplicate")); +} + +#[test] +fn test_type_check_test_block_bad_assert_type() { + let result = parse_and_type_check(r#"test "t" { assert 1 + 1; }"#); + assert!(result.is_err()); +} From ca04d17334c619261ce9046c99cfdf5d097501c4 Mon Sep 17 00:00:00 2001 From: Alberto Guevara Date: Tue, 21 Apr 2026 22:56:43 -0300 Subject: [PATCH 3/4] remove .envrc for commit in the main repo --- .envrc | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .envrc diff --git a/.envrc b/.envrc deleted file mode 100644 index 3550a30..0000000 --- a/.envrc +++ /dev/null @@ -1 +0,0 @@ -use flake From 2fb405ccc3e2184cfb1f5883d9a598af0a727b67 Mon Sep 17 00:00:00 2001 From: Felipe Torres Date: Tue, 30 Jun 2026 23:30:24 -0300 Subject: [PATCH 4/4] feat: add TAC code generator (Project 8, Milestone 3) Lower the type-checked MiniC AST into Three-Address Code (TAC), the intermediate representation defined by the professor's `tac` starter branch. This is the third milestone (code generation) for Project 8. What was ported from the starter: - src/ir/tac.rs: Address / Instruction / Operator types (verbatim) - translate_conditional / translate_relational (already complete) What was completed (previously `todo!()`): - translate_statement: Decl, Return, While, if-without-else, and the Project 8 `assert` statement - translate_expression: Sub, Mul, Div, Neg, relational operators as a value, and function calls as an expression Project 8 tie-in: - `assert e;` lowers to a conditional jump: fall through when true, jump to a failure block that calls the `assert_fail` runtime routine when false - each `test "name" { ... }` block becomes its own labelled TAC routine (`test_:`), lowered like a function body Also: - generate_tac(&CheckedProgram) entry point emits every function then every test block - codegen::format_program renders TAC as human-readable text - `--tac` CLI flag prints the generated TAC - Program::main_function() helper and the `Name` type alias on the AST - tests/codegen.rs unit tests and tests/cli/tac.test shelltest Arrays (ArrayLit / Index -> Load / Store) remain out of scope for this milestone. Co-Authored-By: Alberto Guevara Co-Authored-By: Claude Opus 4.8 --- src/codegen/mod.rs | 119 +++++++ src/codegen/tac_code_gen.rs | 506 ++++++++++++++++++++++++++++++ src/ir/ast.rs | 10 + src/ir/mod.rs | 1 + src/ir/tac.rs | 56 ++++ src/lib.rs | 3 +- src/main.rs | 13 +- tests/cli/tac.test | 21 ++ tests/codegen.rs | 140 +++++++++ tests/fixtures/tac_project8.minic | 29 ++ tests/fixtures/tac_simple.minic | 5 + 11 files changed, 901 insertions(+), 2 deletions(-) create mode 100644 src/codegen/mod.rs create mode 100644 src/codegen/tac_code_gen.rs create mode 100644 src/ir/tac.rs create mode 100644 tests/cli/tac.test create mode 100644 tests/codegen.rs create mode 100644 tests/fixtures/tac_project8.minic create mode 100644 tests/fixtures/tac_simple.minic diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs new file mode 100644 index 0000000..7e07893 --- /dev/null +++ b/src/codegen/mod.rs @@ -0,0 +1,119 @@ +//! Code generation. +//! +//! This module lowers a type-checked MiniC program into a lower-level +//! representation suitable for later back-end stages. The only target +//! currently implemented is [`Three-Address Code`](crate::ir::tac): a flat +//! list of simple instructions in which every operation refers to at most +//! three addresses. +//! +//! * [`tac_code_gen::generate_tac`] produces the TAC for a whole program. +//! * [`format_program`] renders a TAC program as human-readable text, used by +//! the `--tac` CLI mode and by the tests. + +pub mod tac_code_gen; + +use crate::ir::ast::Literal; +use crate::ir::tac::{Address, Instruction, Operator, TACProgram}; + +/// Render a TAC program as human-readable text, one instruction per line. +/// +/// Labels are printed flush-left with a trailing colon; every other +/// instruction is indented two spaces. The output mirrors the notation used in +/// the translation-rule slides (`t1 := a + b`, `if x >= y goto L3`, …). +pub fn format_program(program: &TACProgram) -> String { + program + .iter() + .map(format_instruction) + .collect::>() + .join("\n") +} + +fn format_instruction(instruction: &Instruction) -> String { + match instruction { + Instruction::Label(label) => format!("{}:", label), + Instruction::CopyAssignment(dst, src) => { + format!(" {} := {}", format_address(dst), format_address(src)) + } + Instruction::UnaryAssignment(op, dst, a) => format!( + " {} := {} {}", + format_address(dst), + format_operator(op), + format_address(a) + ), + Instruction::BinaryAssignment(op, dst, a, b) => format!( + " {} := {} {} {}", + format_address(dst), + format_address(a), + format_operator(op), + format_address(b) + ), + Instruction::JMP(label) => format!(" goto {}", label), + Instruction::ConditionalJMP(a, label) => { + format!(" if {} goto {}", format_address(a), label) + } + Instruction::ConditionalJMPFalse(a, label) => { + format!(" if_false {} goto {}", format_address(a), label) + } + Instruction::ConditionalJMPRelational(op, a, b, label) => format!( + " if {} {} {} goto {}", + format_address(a), + format_operator(op), + format_address(b), + label + ), + Instruction::Param(a) => format!(" param {}", format_address(a)), + Instruction::Call(None, name, n) => format!(" call {}, {}", name, n), + Instruction::Call(Some(dst), name, n) => { + format!(" {} := call {}, {}", format_address(dst), name, n) + } + Instruction::Store(base, index, value) => format!( + " {}[{}] := {}", + format_address(base), + format_address(index), + format_address(value) + ), + Instruction::Load(dst, base, index) => format!( + " {} := {}[{}]", + format_address(dst), + format_address(base), + format_address(index) + ), + Instruction::Return(None) => " return".to_string(), + Instruction::Return(Some(a)) => format!(" return {}", format_address(a)), + } +} + +fn format_address(address: &Address) -> String { + match address { + Address::Variable(name, _) => name.clone(), + Address::Temporary(name, _) => name.clone(), + Address::Constant(literal, _) => format_literal(literal), + } +} + +fn format_literal(literal: &Literal) -> String { + match literal { + Literal::Int(n) => n.to_string(), + Literal::Float(f) => f.to_string(), + Literal::Bool(b) => b.to_string(), + Literal::Str(s) => format!("{:?}", s), + } +} + +fn format_operator(op: &Operator) -> &'static str { + match op { + Operator::Add => "+", + Operator::Sub => "-", + Operator::Mul => "*", + Operator::Div => "/", + Operator::Neg => "-", + Operator::LT => "<", + Operator::LTE => "<=", + Operator::GT => ">", + Operator::GTE => ">=", + Operator::EQ => "==", + Operator::NE => "!=", + Operator::SL => "<<", + Operator::SR => ">>", + } +} diff --git a/src/codegen/tac_code_gen.rs b/src/codegen/tac_code_gen.rs new file mode 100644 index 0000000..eedfa07 --- /dev/null +++ b/src/codegen/tac_code_gen.rs @@ -0,0 +1,506 @@ +//! Three-Address Code (TAC) generation from a type-checked MiniC program. +//! +//! # Overview +//! +//! This module lowers the *annotated* AST produced by the type checker into a +//! flat list of [`Instruction`]s — the Three-Address Code intermediate +//! representation defined in [`crate::ir::tac`]. Each TAC instruction refers to +//! at most three addresses (a result and up to two operands); complex +//! expressions are decomposed into a sequence of simple instructions whose +//! intermediate results are held in fresh *temporaries* (`temp1`, `temp2`, …). +//! +//! # Two translation contexts for booleans +//! +//! Boolean-valued expressions are translated differently depending on how they +//! are used: +//! +//! * [`translate_conditional`] — the boolean *drives control flow* (the +//! condition of an `if`, `while`, or `assert`). The result is expressed as +//! conditional jumps to a *true* or *false* label; no temporary is produced. +//! * [`translate_expression`] — the boolean must be *stored as a value* (e.g. +//! `x = a && b`). The result is materialised into a fresh temporary through +//! labelled code. +//! +//! # Project 8 tie-in +//! +//! Two MiniC constructs introduced by Project 8 are lowered here: +//! +//! * An `assert e;` statement becomes a conditional jump: if `e` is true, +//! control falls through; if false, it jumps to a failure block that calls +//! the `assert_fail` runtime routine. +//! * Each `test "name" { … }` block becomes its own labelled TAC routine +//! (`test_:`), translated exactly like a function body. + +use crate::ir::ast::{ + CheckedExpr, CheckedFunDecl, CheckedProgram, CheckedStmt, Expr, Literal, Statement, StatementD, + Type, +}; +use crate::ir::tac::{Address, Instruction, Operator, TACProgram}; + +/// Code-generation state: monotonic counters for fresh labels and temporaries. +#[derive(Clone)] +pub struct Environment { + current_label: usize, + current_temporary: usize, +} + +impl Environment { + pub fn new() -> Self { + Self { + current_label: 0, + current_temporary: 0, + } + } + + fn new_label(&mut self) -> String { + self.current_label += 1; + format!("L{}", self.current_label) + } + + fn new_temporary(&mut self) -> String { + self.current_temporary += 1; + format!("temp{}", self.current_temporary) + } +} + +impl Default for Environment { + fn default() -> Self { + Self::new() + } +} + +/// Turn a printable test name into a label-safe suffix. +fn sanitize(name: &str) -> String { + name.chars() + .map(|c| if c.is_alphanumeric() { c } else { '_' }) + .collect() +} + +/// Generate TAC for a whole program: every function, followed by every test. +/// +/// Functions are emitted first (each prefixed with a `name:` label), then each +/// `test` block becomes a `test_:` routine. This is the Project 8 tie-in: +/// tests are first-class code that the generator lowers just like functions. +pub fn generate_tac(program: &CheckedProgram) -> TACProgram { + let mut env = Environment::new(); + let mut instructions = Vec::new(); + + for function in &program.functions { + instructions.extend(translate_function(function.clone(), &mut env)); + } + + for test in &program.tests { + instructions.push(Instruction::Label(format!("test_{}", sanitize(&test.name)))); + instructions.extend(translate_statement(*test.body.clone(), &mut env)); + } + + instructions +} + +fn translate_function(function: CheckedFunDecl, env: &mut Environment) -> Vec { + let body = *function.body; + let mut instructions = match body.stmt { + Statement::Block { seq } => seq + .into_iter() + .flat_map(|stmt| translate_statement(stmt, env)) + .collect::>(), + stmt => translate_statement( + StatementD { + stmt, + ty: body.ty, + }, + env, + ), + }; + instructions.insert(0, Instruction::Label(function.name.clone())); + instructions +} + +pub fn translate_statement(statement: CheckedStmt, env: &mut Environment) -> Vec { + match statement.stmt { + Statement::Block { seq } => seq + .into_iter() + .flat_map(|s| translate_statement(s, env)) + .collect::>(), + + Statement::Decl { name, ty, init } => { + let var = Address::Variable(name, ty); + let (addr, mut instructions) = translate_expression(*init, env); + instructions.push(Instruction::CopyAssignment(var, addr)); + instructions + } + + Statement::Assign { target, value } => { + if let Expr::Ident(name) = &target.exp { + let var = Address::Variable(name.to_string(), target.ty.clone()); + let (addr, mut instructions) = translate_expression(*value, env); + instructions.push(Instruction::CopyAssignment(var, addr)); + instructions + } else { + // Assignment through an index target (`a[i] = e`) is out of scope. + todo!("assignment target other than a plain identifier") + } + } + + Statement::Call { name, args } => { + let addresses_and_instructions = args + .into_iter() + .map(|expr| translate_expression(expr, env)) + .collect::>(); + let mut instructions = + addresses_and_instructions + .iter() + .fold(vec![], |mut acc, (_, inst)| { + acc.extend(inst.clone()); + acc + }); + for (addr, _) in &addresses_and_instructions { + instructions.push(Instruction::Param(addr.clone())); + } + instructions.push(Instruction::Call( + None, + name, + addresses_and_instructions.len(), + )); + instructions + } + + Statement::If { + cond, + then_branch, + else_branch: Some(else_branch), + } => { + let label_then = env.new_label(); + let label_else = env.new_label(); + let label_end = env.new_label(); + let mut instructions = + translate_conditional(*cond, env, label_then.clone(), label_else.clone()); + instructions.push(Instruction::Label(label_then)); + instructions.extend(translate_statement(*then_branch, env)); + instructions.push(Instruction::JMP(label_end.clone())); + instructions.push(Instruction::Label(label_else)); + instructions.extend(translate_statement(*else_branch, env)); + instructions.push(Instruction::Label(label_end)); + instructions + } + + Statement::If { + cond, + then_branch, + else_branch: None, + } => { + let label_then = env.new_label(); + let label_end = env.new_label(); + let mut instructions = + translate_conditional(*cond, env, label_then.clone(), label_end.clone()); + instructions.push(Instruction::Label(label_then)); + instructions.extend(translate_statement(*then_branch, env)); + instructions.push(Instruction::Label(label_end)); + instructions + } + + Statement::While { cond, body } => { + let label_start = env.new_label(); + let label_body = env.new_label(); + let label_end = env.new_label(); + let mut instructions = vec![Instruction::Label(label_start.clone())]; + instructions.extend(translate_conditional( + *cond, + env, + label_body.clone(), + label_end.clone(), + )); + instructions.push(Instruction::Label(label_body)); + instructions.extend(translate_statement(*body, env)); + instructions.push(Instruction::JMP(label_start)); + instructions.push(Instruction::Label(label_end)); + instructions + } + + Statement::Return(opt) => match opt { + Some(expr) => { + let (addr, mut instructions) = translate_expression(*expr, env); + instructions.push(Instruction::Return(Some(addr))); + instructions + } + None => vec![Instruction::Return(None)], + }, + + // Project 8: `assert e;` — fall through when true, jump to a failure + // block that calls the `assert_fail` runtime routine when false. + Statement::Assert(expr) => { + let label_ok = env.new_label(); + let label_fail = env.new_label(); + let mut instructions = + translate_conditional(*expr, env, label_ok.clone(), label_fail.clone()); + instructions.push(Instruction::Label(label_fail)); + instructions.push(Instruction::Param(Address::Constant( + Literal::Str("assertion failed".to_string()), + Type::Str, + ))); + instructions.push(Instruction::Call(None, "assert_fail".to_string(), 1)); + instructions.push(Instruction::Label(label_ok)); + instructions + } + } +} + +fn translate_expression( + expression: CheckedExpr, + env: &mut Environment, +) -> (Address, Vec) { + let result_ty = expression.ty.clone(); + match expression.exp { + Expr::Literal(value) => (Address::Constant(value, result_ty), vec![]), + + Expr::Ident(name) => (Address::Variable(name, result_ty), vec![]), + + // Arithmetic + Expr::Add(left, right) => translate_binary(Operator::Add, *left, *right, result_ty, env), + Expr::Sub(left, right) => translate_binary(Operator::Sub, *left, *right, result_ty, env), + Expr::Mul(left, right) => translate_binary(Operator::Mul, *left, *right, result_ty, env), + Expr::Div(left, right) => translate_binary(Operator::Div, *left, *right, result_ty, env), + + Expr::Neg(inner) => { + let (addr, mut instructions) = translate_expression(*inner, env); + let temp = Address::Temporary(env.new_temporary(), result_ty); + instructions.push(Instruction::UnaryAssignment(Operator::Neg, temp.clone(), addr)); + (temp, instructions) + } + + // Relational operators as a *value*: materialise a boolean temporary. + Expr::Lt(left, right) => translate_relational_value(Operator::LT, *left, *right, env), + Expr::Le(left, right) => translate_relational_value(Operator::LTE, *left, *right, env), + Expr::Gt(left, right) => translate_relational_value(Operator::GT, *left, *right, env), + Expr::Ge(left, right) => translate_relational_value(Operator::GTE, *left, *right, env), + Expr::Eq(left, right) => translate_relational_value(Operator::EQ, *left, *right, env), + Expr::Ne(left, right) => translate_relational_value(Operator::NE, *left, *right, env), + + // Boolean expressions as a value. `&&`/`||` keep short-circuit semantics. + Expr::Not(exp) => { + let (addr, mut instructions) = translate_expression(*exp, env); + let label_false = env.new_label(); + let label_exit = env.new_label(); + let temp = Address::Temporary(env.new_temporary(), Type::Bool); + instructions.push(Instruction::ConditionalJMPFalse(addr, label_false.clone())); + instructions.push(Instruction::CopyAssignment( + temp.clone(), + Address::Constant(Literal::Bool(false), Type::Bool), + )); + instructions.push(Instruction::JMP(label_exit.clone())); + instructions.push(Instruction::Label(label_false)); + instructions.push(Instruction::CopyAssignment( + temp.clone(), + Address::Constant(Literal::Bool(true), Type::Bool), + )); + instructions.push(Instruction::Label(label_exit)); + (temp, instructions) + } + + Expr::And(left, right) => { + let (l_addr, l_instructions) = translate_expression(*left, env); + let (r_addr, r_instructions) = translate_expression(*right, env); + let label_false = env.new_label(); + let label_exit = env.new_label(); + let temp = Address::Temporary(env.new_temporary(), Type::Bool); + let mut instructions = l_instructions; + instructions.push(Instruction::ConditionalJMPFalse(l_addr, label_false.clone())); + instructions.extend(r_instructions); + instructions.push(Instruction::ConditionalJMPFalse(r_addr, label_false.clone())); + instructions.push(Instruction::CopyAssignment( + temp.clone(), + Address::Constant(Literal::Bool(true), Type::Bool), + )); + instructions.push(Instruction::JMP(label_exit.clone())); + instructions.push(Instruction::Label(label_false)); + instructions.push(Instruction::CopyAssignment( + temp.clone(), + Address::Constant(Literal::Bool(false), Type::Bool), + )); + instructions.push(Instruction::Label(label_exit)); + (temp, instructions) + } + + Expr::Or(left, right) => { + let (l_addr, l_instructions) = translate_expression(*left, env); + let (r_addr, r_instructions) = translate_expression(*right, env); + let label_true = env.new_label(); + let label_false = env.new_label(); + let label_exit = env.new_label(); + let temp = Address::Temporary(env.new_temporary(), Type::Bool); + let mut instructions = l_instructions; + instructions.push(Instruction::ConditionalJMPFalse(l_addr, label_false.clone())); + instructions.push(Instruction::JMP(label_true.clone())); + instructions.push(Instruction::Label(label_false)); + instructions.extend(r_instructions); + instructions.push(Instruction::ConditionalJMP(r_addr, label_true.clone())); + instructions.push(Instruction::CopyAssignment( + temp.clone(), + Address::Constant(Literal::Bool(false), Type::Bool), + )); + instructions.push(Instruction::JMP(label_exit.clone())); + instructions.push(Instruction::Label(label_true)); + instructions.push(Instruction::CopyAssignment( + temp.clone(), + Address::Constant(Literal::Bool(true), Type::Bool), + )); + instructions.push(Instruction::Label(label_exit)); + (temp, instructions) + } + + // Function call as an expression: emit params, then `t := call f, n`. + Expr::Call { name, args } => { + let mut instructions = Vec::new(); + let mut arg_addrs = Vec::new(); + for arg in args { + let (addr, arg_instructions) = translate_expression(arg, env); + instructions.extend(arg_instructions); + arg_addrs.push(addr); + } + for addr in &arg_addrs { + instructions.push(Instruction::Param(addr.clone())); + } + let temp = Address::Temporary(env.new_temporary(), result_ty); + instructions.push(Instruction::Call(Some(temp.clone()), name, arg_addrs.len())); + (temp, instructions) + } + + // Arrays are out of scope for this milestone. + Expr::ArrayLit(_) | Expr::Index { .. } => { + todo!("array literals and indexing are not yet lowered to TAC") + } + } +} + +/// Lower a binary arithmetic operation into `t := a b`. +fn translate_binary( + op: Operator, + left: CheckedExpr, + right: CheckedExpr, + result_ty: Type, + env: &mut Environment, +) -> (Address, Vec) { + let (l_addr, l_instructions) = translate_expression(left, env); + let (r_addr, r_instructions) = translate_expression(right, env); + let mut instructions = [l_instructions, r_instructions].concat(); + let temp = Address::Temporary(env.new_temporary(), result_ty); + instructions.push(Instruction::BinaryAssignment(op, temp.clone(), l_addr, r_addr)); + (temp, instructions) +} + +/// Materialise a relational comparison into a boolean temporary via a +/// conditional relational jump. +fn translate_relational_value( + op: Operator, + left: CheckedExpr, + right: CheckedExpr, + env: &mut Environment, +) -> (Address, Vec) { + let (l_addr, l_instructions) = translate_expression(left, env); + let (r_addr, r_instructions) = translate_expression(right, env); + let label_true = env.new_label(); + let label_exit = env.new_label(); + let temp = Address::Temporary(env.new_temporary(), Type::Bool); + let mut instructions = l_instructions; + instructions.extend(r_instructions); + instructions.push(Instruction::ConditionalJMPRelational( + op, + l_addr, + r_addr, + label_true.clone(), + )); + instructions.push(Instruction::CopyAssignment( + temp.clone(), + Address::Constant(Literal::Bool(false), Type::Bool), + )); + instructions.push(Instruction::JMP(label_exit.clone())); + instructions.push(Instruction::Label(label_true)); + instructions.push(Instruction::CopyAssignment( + temp.clone(), + Address::Constant(Literal::Bool(true), Type::Bool), + )); + instructions.push(Instruction::Label(label_exit)); + (temp, instructions) +} + +/// Translate a boolean expression used as a *condition*: emit jumps so that +/// control reaches `true_label` when the expression is true and `false_label` +/// when it is false. No temporary is produced. +fn translate_conditional( + expression: CheckedExpr, + env: &mut Environment, + true_label: String, + false_label: String, +) -> Vec { + match expression.exp { + Expr::Literal(Literal::Bool(true)) => vec![Instruction::JMP(true_label)], + Expr::Literal(Literal::Bool(false)) => vec![Instruction::JMP(false_label)], + Expr::Ident(name) => { + let addr = Address::Variable(name, expression.ty); + vec![ + Instruction::ConditionalJMP(addr, true_label), + Instruction::JMP(false_label), + ] + } + Expr::And(left, right) => { + let label_right = env.new_label(); + let mut instructions = + translate_conditional(*left, env, label_right.clone(), false_label.clone()); + instructions.push(Instruction::Label(label_right)); + instructions.extend(translate_conditional(*right, env, true_label, false_label)); + instructions + } + Expr::Or(left, right) => { + let label_right = env.new_label(); + let mut instructions = + translate_conditional(*left, env, true_label.clone(), label_right.clone()); + instructions.push(Instruction::Label(label_right)); + instructions.extend(translate_conditional(*right, env, true_label, false_label)); + instructions + } + Expr::Not(expr) => translate_conditional(*expr, env, false_label, true_label), + Expr::Lt(left, right) => { + translate_relational(*left, *right, Operator::LT, true_label, false_label, env) + } + Expr::Le(left, right) => { + translate_relational(*left, *right, Operator::LTE, true_label, false_label, env) + } + Expr::Gt(left, right) => { + translate_relational(*left, *right, Operator::GT, true_label, false_label, env) + } + Expr::Ge(left, right) => { + translate_relational(*left, *right, Operator::GTE, true_label, false_label, env) + } + Expr::Eq(left, right) => { + translate_relational(*left, *right, Operator::EQ, true_label, false_label, env) + } + Expr::Ne(left, right) => { + translate_relational(*left, *right, Operator::NE, true_label, false_label, env) + } + _ => { + let (addr, mut instructions) = translate_expression(expression, env); + instructions.push(Instruction::ConditionalJMP(addr, true_label)); + instructions.push(Instruction::JMP(false_label)); + instructions + } + } +} + +fn translate_relational( + left: CheckedExpr, + right: CheckedExpr, + op: Operator, + true_label: String, + false_label: String, + env: &mut Environment, +) -> Vec { + let (l_addr, l_instructions) = translate_expression(left, env); + let (r_addr, r_instructions) = translate_expression(right, env); + let mut instructions = l_instructions; + instructions.extend(r_instructions); + instructions.push(Instruction::ConditionalJMPRelational( + op, l_addr, r_addr, true_label, + )); + instructions.push(Instruction::JMP(false_label)); + instructions +} diff --git a/src/ir/ast.rs b/src/ir/ast.rs index f4359a6..067602b 100644 --- a/src/ir/ast.rs +++ b/src/ir/ast.rs @@ -63,6 +63,9 @@ pub enum Type { Any, } +/// An identifier name (variable, function, or temporary). +pub type Name = String; + /// A literal value. #[derive(Debug, Clone, PartialEq)] pub enum Literal { @@ -183,6 +186,13 @@ pub struct Program { pub tests: Vec>, } +impl Program { + /// Returns the `main` function declaration, if the program defines one. + pub fn main_function(&self) -> Option<&FunDecl> { + self.functions.iter().find(|f| f.name == "main") + } +} + // Type synonyms for checked and unchecked phases. pub type UncheckedExpr = ExprD<()>; pub type CheckedExpr = ExprD; diff --git a/src/ir/mod.rs b/src/ir/mod.rs index c735bd3..df01345 100644 --- a/src/ir/mod.rs +++ b/src/ir/mod.rs @@ -43,3 +43,4 @@ //! simply do not match. pub mod ast; +pub mod tac; diff --git a/src/ir/tac.rs b/src/ir/tac.rs new file mode 100644 index 0000000..92e9623 --- /dev/null +++ b/src/ir/tac.rs @@ -0,0 +1,56 @@ +use crate::ir::ast::{Literal, Name, Type}; + +type Label = String; + +pub type TACProgram = Vec; + +#[derive(Debug, Clone, PartialEq)] +pub enum Address { + Variable(Name, Type), + Constant(Literal, Type), + Temporary(Name, Type), +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Instruction { + Label(Label), + CopyAssignment(Address, Address), + UnaryAssignment(Operator, Address, Address), + BinaryAssignment(Operator, Address, Address, Address), + JMP(Label), + ConditionalJMP(Address, Label), + ConditionalJMPFalse(Address, Label), + ConditionalJMPRelational(Operator, Address, Address, Label), + Param(Address), + Call(Option
, Name, usize), // It is either 'call p, n' or 'y = call p, n' + Store(Address, Address, Address), // x[i] = y + Load(Address, Address, Address), // x = y[i] + Return(Option
), +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Operator { + Add, // a + b + Sub, // a - b + Mul, // a * b + Div, // a / b + Neg, // -a + LT, // a < b + LTE, // a <= b + GT, // a > b + GTE, // a >= b + EQ, // a == b + NE, // a != b + SL, // shift left + SR, // shift right +} + +// do i = i + 1 while(a[i] < v); +// +// L1: # Label("L1") +// t1 = i + 1 # BinaryAssignment(Add, t1, i, 1) +// i = t1 # CopyAssignment(i, t1) +// t2 = i * 8 # BinaryAssignment(Mul, t2, i, 8) +// t3 = a[t2] # Load(t3, a, t2) +// if t3 < v goto L1 # ConditionalJMPRelational(LT, t3, v, "L1") +// diff --git a/src/lib.rs b/src/lib.rs index dac7ca7..4a184ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ -//! MiniC library: IR, parser, semantic analysis, environment, interpreter, and stdlib. +//! MiniC library: IR, parser, semantic analysis, environment, interpreter, code generation, and stdlib. +pub mod codegen; pub mod environment; pub mod interpreter; pub mod ir; diff --git a/src/main.rs b/src/main.rs index 3712343..4249899 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,11 +1,17 @@ use std::{env, fs, process}; -use mini_c::{interpreter::{interpret, run_tests}, parser::program, semantic::type_check}; +use mini_c::{ + codegen::{format_program, tac_code_gen::generate_tac}, + interpreter::{interpret, run_tests}, + parser::program, + semantic::type_check, +}; fn usage() -> ! { eprintln!("Usage: minic --check "); eprintln!(" minic --run "); eprintln!(" minic --test "); + eprintln!(" minic --tac "); process::exit(1); } @@ -65,6 +71,11 @@ fn main() { process::exit(1); } } + // Milestone 3: --tac lowers the checked program to Three-Address Code + "--tac" => { + let tac = generate_tac(&checked); + println!("{}", format_program(&tac)); + } // Task 1.1: unknown flag _ => usage(), } diff --git a/tests/cli/tac.test b/tests/cli/tac.test new file mode 100644 index 0000000..16314d3 --- /dev/null +++ b/tests/cli/tac.test @@ -0,0 +1,21 @@ +# --tac lowers a function plus a Project 8 test block to Three-Address Code +$ ./target/debug/mini_c --tac tests/fixtures/tac_simple.minic +add: + temp1 := a + b + return temp1 +test_add_works: + param 1 + param 2 + temp2 := call add, 2 + if temp2 == 3 goto L1 + goto L2 +L2: + param "assertion failed" + call assert_fail, 1 +L1: +>=0 + +# --tac on a type-error program fails before code generation +$ ./target/debug/mini_c --tac tests/fixtures/cli_type_mismatch.minic +>2 /Type error/ +>=1 diff --git a/tests/codegen.rs b/tests/codegen.rs new file mode 100644 index 0000000..9a5c649 --- /dev/null +++ b/tests/codegen.rs @@ -0,0 +1,140 @@ +//! Integration tests for the Three-Address Code (TAC) generator. +//! +//! Each test parses and type-checks a small MiniC program, generates its TAC, +//! and asserts on the resulting instruction stream. The `assert` and `test` +//! cases exercise the Project 8 tie-in. + +use nom::combinator::all_consuming; + +use mini_c::codegen::tac_code_gen::generate_tac; +use mini_c::ir::ast::CheckedProgram; +use mini_c::ir::tac::{Address, Instruction, Operator}; +use mini_c::parser::program; +use mini_c::semantic::type_check; + +fn tac_of(src: &str) -> Vec { + let (_, prog) = all_consuming(program)(src).expect("parse should succeed"); + let checked: CheckedProgram = type_check(&prog).expect("type check should succeed"); + generate_tac(&checked) +} + +fn has_label(instrs: &[Instruction], label: &str) -> bool { + instrs + .iter() + .any(|i| matches!(i, Instruction::Label(l) if l == label)) +} + +#[test] +fn arithmetic_assignment_uses_temp_and_copy() { + // total = a + b -> temp := a + b ; total := temp + let instrs = tac_of("void main() { int a = 1; int b = 2; int total = a + b; }"); + + let has_add = instrs.iter().any(|i| { + matches!( + i, + Instruction::BinaryAssignment(Operator::Add, Address::Temporary(_, _), _, _) + ) + }); + assert!(has_add, "expected a binary Add into a temporary: {instrs:?}"); + + // The declaration copies the temporary into `total`. + let has_copy_total = instrs.iter().any(|i| { + matches!( + i, + Instruction::CopyAssignment(Address::Variable(name, _), Address::Temporary(_, _)) + if name == "total" + ) + }); + assert!(has_copy_total, "expected copy into `total`: {instrs:?}"); +} + +#[test] +fn if_else_emits_two_branches_and_join() { + let instrs = tac_of( + "int f(int x) { if x >= 0 { return 1; } else { return 0; } } void main() { print(f(1)); }", + ); + // A relational conditional jump drives the branch selection. + let has_rel = instrs.iter().any(|i| { + matches!( + i, + Instruction::ConditionalJMPRelational(Operator::GTE, _, _, _) + ) + }); + assert!(has_rel, "expected relational conditional jump: {instrs:?}"); + // Two returns, one per branch. + let returns = instrs + .iter() + .filter(|i| matches!(i, Instruction::Return(_))) + .count(); + assert_eq!(returns, 2, "expected one return per branch: {instrs:?}"); +} + +#[test] +fn while_loop_has_back_edge() { + let instrs = tac_of( + "void main() { int i = 0; while i < 3 { i = i + 1; } }", + ); + // A while loop emits a back-edge: an unconditional jump to the loop head. + let jumps = instrs + .iter() + .filter(|i| matches!(i, Instruction::JMP(_))) + .count(); + assert!(jumps >= 1, "expected a back-edge jump: {instrs:?}"); + let has_rel = instrs.iter().any(|i| { + matches!(i, Instruction::ConditionalJMPRelational(Operator::LT, _, _, _)) + }); + assert!(has_rel, "expected relational loop condition: {instrs:?}"); +} + +#[test] +fn return_with_value_emits_return_instruction() { + let instrs = tac_of("int f() { return 42; } void main() { print(f()); }"); + let has_return_value = instrs + .iter() + .any(|i| matches!(i, Instruction::Return(Some(_)))); + assert!(has_return_value, "expected `return `: {instrs:?}"); +} + +#[test] +fn function_call_as_expression_binds_result_temp() { + let instrs = tac_of("int f() { return 1; } void main() { int x = f(); }"); + // A call used as a value binds its result into a temporary. + let has_valued_call = instrs + .iter() + .any(|i| matches!(i, Instruction::Call(Some(_), name, _) if name == "f")); + assert!(has_valued_call, "expected `t := call f, 0`: {instrs:?}"); +} + +// ---- Project 8 tie-in ----------------------------------------------------- + +#[test] +fn assert_lowers_to_conditional_jump_and_failure_call() { + let instrs = tac_of("test \"t\" { assert 1 == 1; }"); + + // The assertion condition becomes a relational conditional jump. + let has_rel = instrs.iter().any(|i| { + matches!(i, Instruction::ConditionalJMPRelational(Operator::EQ, _, _, _)) + }); + assert!(has_rel, "expected relational jump for assert: {instrs:?}"); + + // Failure path calls the `assert_fail` runtime routine with one argument. + let has_fail_call = instrs + .iter() + .any(|i| matches!(i, Instruction::Call(None, name, 1) if name == "assert_fail")); + assert!(has_fail_call, "expected `call assert_fail, 1`: {instrs:?}"); +} + +#[test] +fn each_test_block_becomes_a_labelled_routine() { + let instrs = tac_of( + "test \"first case\" { assert true; } test \"second\" { assert true; }", + ); + assert!( + has_label(&instrs, "test_first_case"), + "expected label test_first_case: {instrs:?}" + ); + assert!( + has_label(&instrs, "test_second"), + "expected label test_second: {instrs:?}" + ); +} diff --git a/tests/fixtures/tac_project8.minic b/tests/fixtures/tac_project8.minic new file mode 100644 index 0000000..cf0bea8 --- /dev/null +++ b/tests/fixtures/tac_project8.minic @@ -0,0 +1,29 @@ +int add(int a, int b) { return a + b; } + +int max(int a, int b) { + if a >= b { + return a; + } else { + return b; + } +} + +int sum_to(int n) { + int total = 0; + int i = 1; + while i <= n { + total = total + i; + i = i + 1; + } + return total; +} + +test "arithmetic and control flow" { + assert add(2, 3) == 5; + assert max(7, 4) == 7; + assert sum_to(5) == 15; +} + +void main() { + print(add(2, 3)); +} diff --git a/tests/fixtures/tac_simple.minic b/tests/fixtures/tac_simple.minic new file mode 100644 index 0000000..46d7ba4 --- /dev/null +++ b/tests/fixtures/tac_simple.minic @@ -0,0 +1,5 @@ +int add(int a, int b) { return a + b; } + +test "add works" { + assert add(1, 2) == 3; +}