From a1e748503f094a3d06f9f0d72b0068360d315529 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Mon, 15 Jun 2026 14:26:02 -0300 Subject: [PATCH 01/91] feat(opt): add constant folding pass --- src/codegen/inter/opt/constant_fold.rs | 57 ++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 src/codegen/inter/opt/constant_fold.rs diff --git a/src/codegen/inter/opt/constant_fold.rs b/src/codegen/inter/opt/constant_fold.rs new file mode 100644 index 0000000..30b8cb3 --- /dev/null +++ b/src/codegen/inter/opt/constant_fold.rs @@ -0,0 +1,57 @@ +use super::OptPass; +use crate::codegen::inter::{BinaryOp, Cfg, Instruction, Value}; + +pub struct ConstantFoldPass; + +impl OptPass for ConstantFoldPass { + fn name(&self) -> &'static str { + "constant-fold" + } + + fn run(&self, cfg: &mut Cfg) -> bool { + let mut changed = false; + + for block in &mut cfg.blocks { + for instruction in &mut block.instructions { + if let Instruction::Binary { dst, op, lhs, rhs } = instruction { + let folded = fold_binary(*op, lhs, rhs); + if let Some(value) = folded { + *instruction = Instruction::Assign { + dst: dst.clone(), + value, + }; + changed = true; + } + } + } + } + + changed + } +} + +fn fold_binary(op: BinaryOp, lhs: &Value, rhs: &Value) -> Option { + let (Value::Int(lhs), Value::Int(rhs)) = (lhs, rhs) else { + return None; + }; + + let value = match op { + BinaryOp::Add => lhs.checked_add(*rhs)?, + BinaryOp::Sub => lhs.checked_sub(*rhs)?, + BinaryOp::Mul => lhs.checked_mul(*rhs)?, + BinaryOp::Div => { + if *rhs == 0 { + return None; + } + lhs.checked_div(*rhs)? + } + BinaryOp::Mod => { + if *rhs == 0 { + return None; + } + lhs.checked_rem(*rhs)? + } + }; + + Some(Value::Int(value)) +} From e10f38eb3c9b44d7f02f31cc87cd5d15f104ef4c Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Mon, 15 Jun 2026 14:26:02 -0300 Subject: [PATCH 02/91] feat(opt): add dead code elimination pass --- src/codegen/inter/opt/dce.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 src/codegen/inter/opt/dce.rs diff --git a/src/codegen/inter/opt/dce.rs b/src/codegen/inter/opt/dce.rs new file mode 100644 index 0000000..6353b99 --- /dev/null +++ b/src/codegen/inter/opt/dce.rs @@ -0,0 +1,24 @@ +use super::OptPass; +use crate::codegen::inter::{Cfg, Instruction}; + +pub struct DeadCodeElimPass; + +impl OptPass for DeadCodeElimPass { + fn name(&self) -> &'static str { + "dead-code-elimination" + } + + fn run(&self, cfg: &mut Cfg) -> bool { + let mut changed = false; + + for block in &mut cfg.blocks { + let before = block.instructions.len(); + block + .instructions + .retain(|instruction| !matches!(instruction, Instruction::Nop)); + changed |= block.instructions.len() != before; + } + + changed + } +} From 5566b33ef94f09b1173c23e0d0f73bdfe5e89179 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Mon, 15 Jun 2026 14:26:02 -0300 Subject: [PATCH 03/91] feat(opt): add copy propagation pass stub --- src/codegen/inter/opt/copy_prop.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 src/codegen/inter/opt/copy_prop.rs diff --git a/src/codegen/inter/opt/copy_prop.rs b/src/codegen/inter/opt/copy_prop.rs new file mode 100644 index 0000000..a349066 --- /dev/null +++ b/src/codegen/inter/opt/copy_prop.rs @@ -0,0 +1,14 @@ +use super::OptPass; +use crate::codegen::inter::Cfg; + +pub struct CopyPropagationPass; + +impl OptPass for CopyPropagationPass { + fn name(&self) -> &'static str { + "copy-propagation" + } + + fn run(&self, _cfg: &mut Cfg) -> bool { + false + } +} From 0892a74cd31eb1ce35ac8ad13c2628f467554d52 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Mon, 15 Jun 2026 14:26:02 -0300 Subject: [PATCH 04/91] feat(opt): add common subexpression pass stub --- src/codegen/inter/opt/cse.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 src/codegen/inter/opt/cse.rs diff --git a/src/codegen/inter/opt/cse.rs b/src/codegen/inter/opt/cse.rs new file mode 100644 index 0000000..b08488d --- /dev/null +++ b/src/codegen/inter/opt/cse.rs @@ -0,0 +1,14 @@ +use super::OptPass; +use crate::codegen::inter::Cfg; + +pub struct CsePass; + +impl OptPass for CsePass { + fn name(&self) -> &'static str { + "common-subexpression-elimination" + } + + fn run(&self, _cfg: &mut Cfg) -> bool { + false + } +} From 23b0d5bc8c45753d2d6df15c0bc3c21f1e9b9c01 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Mon, 15 Jun 2026 14:26:02 -0300 Subject: [PATCH 05/91] feat(opt): add loop invariant motion pass stub --- src/codegen/inter/opt/licm.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 src/codegen/inter/opt/licm.rs diff --git a/src/codegen/inter/opt/licm.rs b/src/codegen/inter/opt/licm.rs new file mode 100644 index 0000000..7f129f9 --- /dev/null +++ b/src/codegen/inter/opt/licm.rs @@ -0,0 +1,14 @@ +use super::OptPass; +use crate::codegen::inter::Cfg; + +pub struct LoopInvariantCodeMotionPass; + +impl OptPass for LoopInvariantCodeMotionPass { + fn name(&self) -> &'static str { + "loop-invariant-code-motion" + } + + fn run(&self, _cfg: &mut Cfg) -> bool { + false + } +} From 7e84d04f639b842540b72c9df4543cb0f20706f0 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Mon, 15 Jun 2026 14:26:03 -0300 Subject: [PATCH 06/91] feat(opt): add inlining pass stub --- src/codegen/inter/opt/inline.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 src/codegen/inter/opt/inline.rs diff --git a/src/codegen/inter/opt/inline.rs b/src/codegen/inter/opt/inline.rs new file mode 100644 index 0000000..659a770 --- /dev/null +++ b/src/codegen/inter/opt/inline.rs @@ -0,0 +1,14 @@ +use super::OptPass; +use crate::codegen::inter::Cfg; + +pub struct InliningPass; + +impl OptPass for InliningPass { + fn name(&self) -> &'static str { + "inlining" + } + + fn run(&self, _cfg: &mut Cfg) -> bool { + false + } +} From 282f160b1d275376c03a7ab0c6e0417ff8045c33 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Mon, 15 Jun 2026 14:26:03 -0300 Subject: [PATCH 07/91] feat(opt): add pass manager pipeline --- src/codegen/inter/opt/mod.rs | 198 +++++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 src/codegen/inter/opt/mod.rs diff --git a/src/codegen/inter/opt/mod.rs b/src/codegen/inter/opt/mod.rs new file mode 100644 index 0000000..9564492 --- /dev/null +++ b/src/codegen/inter/opt/mod.rs @@ -0,0 +1,198 @@ +use super::Cfg; + +pub mod constant_fold; +pub mod copy_prop; +pub mod cse; +pub mod dce; +pub mod inline; +pub mod licm; + +pub use constant_fold::ConstantFoldPass; +pub use copy_prop::CopyPropagationPass; +pub use cse::CsePass; +pub use dce::DeadCodeElimPass; +pub use inline::InliningPass; +pub use licm::LoopInvariantCodeMotionPass; + +/// Interface comum para passes de otimizacao sobre o CFG/TAC intermediario. +/// +/// Um pass deve retornar `true` quando altera o `Cfg`, permitindo que o +/// `PassManager` itere ate ponto fixo. +pub trait OptPass { + fn name(&self) -> &'static str; + + fn run(&self, cfg: &mut Cfg) -> bool; +} + +pub struct PassManager { + passes: Vec>, +} + +impl PassManager { + pub fn new() -> Self { + Self { passes: Vec::new() } + } + + pub fn add(&mut self, pass: P) { + self.passes.push(Box::new(pass)); + } + + pub fn is_empty(&self) -> bool { + self.passes.is_empty() + } + + pub fn len(&self) -> usize { + self.passes.len() + } + + pub fn pass_names(&self) -> Vec<&'static str> { + self.passes.iter().map(|pass| pass.name()).collect() + } + + /// Executa todos os passes ate ponto fixo ou `max_iter` iteracoes. + /// + /// Retorna o numero de iteracoes completas executadas. + pub fn run(&self, cfg: &mut Cfg, max_iter: usize) -> usize { + let mut iterations = 0; + + for _ in 0..max_iter { + let mut changed = false; + + for pass in &self.passes { + changed |= pass.run(cfg); + } + + iterations += 1; + + if !changed { + break; + } + } + + iterations + } +} + +impl Default for PassManager { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum OptLevel { + O0, + O1, + O2, + O3, +} + +impl Default for OptLevel { + fn default() -> Self { + Self::O0 + } +} + +impl OptLevel { + pub fn parse(value: &str) -> Option { + match value { + "0" | "O0" | "-O0" => Some(Self::O0), + "1" | "O1" | "-O1" => Some(Self::O1), + "2" | "O2" | "-O2" => Some(Self::O2), + "3" | "O3" | "-O3" => Some(Self::O3), + _ => None, + } + } +} + +pub fn pipeline_for_level(level: OptLevel) -> PassManager { + let mut pm = PassManager::new(); + + match level { + OptLevel::O0 => {} + OptLevel::O1 => { + pm.add(ConstantFoldPass); + pm.add(DeadCodeElimPass); + } + OptLevel::O2 => { + pm.add(ConstantFoldPass); + pm.add(DeadCodeElimPass); + pm.add(CopyPropagationPass); + pm.add(CsePass); + } + OptLevel::O3 => { + pm.add(ConstantFoldPass); + pm.add(DeadCodeElimPass); + pm.add(CopyPropagationPass); + pm.add(CsePass); + pm.add(LoopInvariantCodeMotionPass); + pm.add(InliningPass); + } + } + + pm +} + +pub fn default_pipeline() -> PassManager { + pipeline_for_level(OptLevel::O2) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::codegen::inter::{BasicBlock, BinaryOp, Instruction, Value}; + + #[test] + fn pass_manager_runs_until_fixed_point() { + let mut cfg = Cfg::new(); + let mut block = BasicBlock::new("entry"); + block.instructions.push(Instruction::Binary { + dst: "t0".to_string(), + op: BinaryOp::Add, + lhs: Value::Int(2), + rhs: Value::Int(3), + }); + cfg.add_block(block); + + let mut pm = PassManager::new(); + pm.add(ConstantFoldPass); + + assert_eq!(pm.run(&mut cfg, 10), 2); + assert_eq!( + cfg.blocks[0].instructions[0], + Instruction::Assign { + dst: "t0".to_string(), + value: Value::Int(5), + } + ); + } + + #[test] + fn opt_level_selects_expected_pipeline() { + assert!(pipeline_for_level(OptLevel::O0).is_empty()); + assert_eq!( + pipeline_for_level(OptLevel::O1).pass_names(), + vec!["constant-fold", "dead-code-elimination"] + ); + assert_eq!( + pipeline_for_level(OptLevel::O2).pass_names(), + vec![ + "constant-fold", + "dead-code-elimination", + "copy-propagation", + "common-subexpression-elimination", + ] + ); + assert_eq!( + pipeline_for_level(OptLevel::O3).pass_names(), + vec![ + "constant-fold", + "dead-code-elimination", + "copy-propagation", + "common-subexpression-elimination", + "loop-invariant-code-motion", + "inlining", + ] + ); + } +} From a4e33dc54ee14e2bdcfc71d0abc8cd8f7bf6466e Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Mon, 15 Jun 2026 14:26:03 -0300 Subject: [PATCH 08/91] feat(codegen): add intermediate cfg model --- src/codegen/inter/mod.rs | 68 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/src/codegen/inter/mod.rs b/src/codegen/inter/mod.rs index 8b13789..35e29ae 100644 --- a/src/codegen/inter/mod.rs +++ b/src/codegen/inter/mod.rs @@ -1 +1,69 @@ +pub mod opt; +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Cfg { + pub blocks: Vec, +} + +impl Cfg { + pub fn new() -> Self { + Self { blocks: Vec::new() } + } + + pub fn add_block(&mut self, block: BasicBlock) { + self.blocks.push(block); + } +} + +impl Default for Cfg { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BasicBlock { + pub label: String, + pub instructions: Vec, + pub successors: Vec, +} + +impl BasicBlock { + pub fn new(label: impl Into) -> Self { + Self { + label: label.into(), + instructions: Vec::new(), + successors: Vec::new(), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Instruction { + Assign { + dst: String, + value: Value, + }, + Binary { + dst: String, + op: BinaryOp, + lhs: Value, + rhs: Value, + }, + Nop, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Value { + Int(i64), + Temp(String), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BinaryOp { + Add, + Sub, + Mul, + Div, + Mod, +} From 757419fddfdadc851eb32bb950dd86c6f2551131 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Mon, 15 Jun 2026 14:26:03 -0300 Subject: [PATCH 09/91] feat(cli): add optimization level flag --- src/main.rs | 135 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 122 insertions(+), 13 deletions(-) diff --git a/src/main.rs b/src/main.rs index 1e96e7f..f23864b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,34 +1,94 @@ use crusty::analyser::analyse; +use crusty::codegen::inter::opt::{pipeline_for_level, OptLevel}; +use crusty::codegen::inter::Cfg; use crusty::common::errors::report::{Report, ToReport}; use crusty::common::input::source::SourceFile; use crusty::lexer::scanner::Scanner; use crusty::parser::Parser; use std::env; use std::path::PathBuf; -use std::process::exit; /// Ponto de entrada: decide entre modo interativo (sem args) ou compilação de arquivo (1 arg). fn main() -> std::io::Result<()> { let args: Vec<_> = env::args().collect(); - match args.len() { - 1 => { + let options = match parse_args(&args) { + Ok(options) => options, + Err(e) => report_and_exit(Box::new(e)), + }; + + match options.script { + None => { if let Err(e) = run_prompt() { report_and_exit(e); } } - 2 => { - if let Err(e) = run_file(&args[1]) { + Some(script) => { + if let Err(e) = run_file(&script, options.opt_level) { report_and_exit(e); } } - _ => { - eprintln!("Usage: crusty [script]"); - exit(64); - } } Ok(()) } +#[derive(Debug, Clone, PartialEq, Eq)] +struct CompileOptions { + script: Option, + opt_level: OptLevel, +} + +#[derive(Debug)] +struct CliError { + message: String, +} + +impl ToReport for CliError { + fn to_report(&self) -> Report { + Report::new(&self.message) + .with_help("Usage: crusty [-O0|-O1|-O2|-O3] [--opt-level 0|1|2|3] [script]") + } +} + +fn parse_args(args: &[String]) -> Result { + let mut opt_level = OptLevel::default(); + let mut script = None; + let mut i = 1; + + while i < args.len() { + let arg = &args[i]; + + if let Some(level) = arg.strip_prefix("-O").filter(|level| !level.is_empty()) { + opt_level = OptLevel::parse(level).ok_or_else(|| CliError { + message: format!("invalid optimization level: {arg}"), + })?; + } else if arg == "-O" || arg == "--opt-level" { + i += 1; + let Some(level) = args.get(i) else { + return Err(CliError { + message: format!("missing value for {arg}"), + }); + }; + opt_level = OptLevel::parse(level).ok_or_else(|| CliError { + message: format!("invalid optimization level: {level}"), + })?; + } else if arg.starts_with('-') { + return Err(CliError { + message: format!("unknown option: {arg}"), + }); + } else if script.is_none() { + script = Some(arg.clone()); + } else { + return Err(CliError { + message: "expected at most one input script".to_string(), + }); + } + + i += 1; + } + + Ok(CompileOptions { script, opt_level }) +} + /// Erro retornado por `run()` quando o scanner produz diagnósticos. #[derive(Debug)] struct DiagnosticError { @@ -47,7 +107,7 @@ fn run_prompt() -> Result<(), Box> { } /// Executa o scanner e parser sobre o `SourceFile`, imprime tokens e AST. -fn run(source: SourceFile) -> Result<(), Box> { +fn run(source: SourceFile, opt_level: OptLevel) -> Result<(), Box> { let mut scanner = Scanner::new(source); scanner.scan(); @@ -105,6 +165,10 @@ fn run(source: SourceFile) -> Result<(), Box> { return Err(Box::new(DiagnosticError { count: sem_count })); } + let mut cfg = Cfg::new(); + let opt_pipeline = pipeline_for_level(opt_level); + opt_pipeline.run(&mut cfg, 10); + Ok(()) } @@ -122,14 +186,14 @@ fn print_report(report: &Report) { } /// Lê o arquivo no caminho informado e delega a execução para `run`. -fn run_file(path: &str) -> Result<(), Box> { +fn run_file(path: &str, opt_level: OptLevel) -> Result<(), Box> { let source = SourceFile::from_path(PathBuf::from(path))?; - run(source)?; + run(source, opt_level)?; Ok(()) } /// Imprime o `Report` de erro no stderr de forma estruturada e encerra o processo com código 74. -fn report_and_exit(e: Box) { +fn report_and_exit(e: Box) -> ! { let report = e.to_report(); eprintln!("--- ERROR ---"); @@ -145,3 +209,48 @@ fn report_and_exit(e: Box) { std::process::exit(74); } + +#[cfg(test)] +mod tests { + use super::*; + + fn args(values: &[&str]) -> Vec { + values.iter().map(|value| value.to_string()).collect() + } + + #[test] + fn parses_default_options() { + let parsed = parse_args(&args(&["crusty", "main.c"])).unwrap(); + + assert_eq!( + parsed, + CompileOptions { + script: Some("main.c".to_string()), + opt_level: OptLevel::O0, + } + ); + } + + #[test] + fn parses_short_opt_level_forms() { + assert_eq!( + parse_args(&args(&["crusty", "-O2", "main.c"])) + .unwrap() + .opt_level, + OptLevel::O2 + ); + assert_eq!( + parse_args(&args(&["crusty", "-O", "3", "main.c"])) + .unwrap() + .opt_level, + OptLevel::O3 + ); + } + + #[test] + fn parses_long_opt_level_form() { + let parsed = parse_args(&args(&["crusty", "--opt-level", "1", "main.c"])).unwrap(); + + assert_eq!(parsed.opt_level, OptLevel::O1); + } +} From 20080e3d3cd42fd836a0021d98628e6920257b29 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Mon, 15 Jun 2026 14:34:59 -0300 Subject: [PATCH 10/91] fix(opt): derive default opt level --- src/codegen/inter/opt/mod.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/codegen/inter/opt/mod.rs b/src/codegen/inter/opt/mod.rs index 9564492..bfefb2d 100644 --- a/src/codegen/inter/opt/mod.rs +++ b/src/codegen/inter/opt/mod.rs @@ -79,20 +79,15 @@ impl Default for PassManager { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum OptLevel { + #[default] O0, O1, O2, O3, } -impl Default for OptLevel { - fn default() -> Self { - Self::O0 - } -} - impl OptLevel { pub fn parse(value: &str) -> Option { match value { From 536ea7d3c4dad383843503137ea5f268998ce836 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Wed, 17 Jun 2026 10:59:04 -0300 Subject: [PATCH 11/91] feat(ir): expose intermediate representation module --- src/ir/mod.rs | 1 + src/ir/tac.rs | 0 src/lib.rs | 1 + 3 files changed, 2 insertions(+) create mode 100644 src/ir/mod.rs create mode 100644 src/ir/tac.rs diff --git a/src/ir/mod.rs b/src/ir/mod.rs new file mode 100644 index 0000000..d58e690 --- /dev/null +++ b/src/ir/mod.rs @@ -0,0 +1 @@ +pub mod tac; diff --git a/src/ir/tac.rs b/src/ir/tac.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/lib.rs b/src/lib.rs index d0722b5..b42801f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ pub mod analyser; pub mod codegen; pub mod common; +pub mod ir; pub mod lexer; pub mod parser; From 6750e3ad3571d3427a3dbfcfdc53d46d29d588fa Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Wed, 17 Jun 2026 10:59:24 -0300 Subject: [PATCH 12/91] feat(ir): add TAC id generators --- src/ir/tac.rs | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/ir/tac.rs b/src/ir/tac.rs index e69de29..2f31793 100644 --- a/src/ir/tac.rs +++ b/src/ir/tac.rs @@ -0,0 +1,51 @@ +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct TempId(pub u32); + +#[derive(Debug, Clone)] +pub struct TempGen { + next: u32, +} + +impl TempGen { + pub fn new() -> Self { + Self { next: 0 } + } + + pub fn fresh(&mut self) -> TempId { + let temp = TempId(self.next); + self.next += 1; + temp + } +} + +impl Default for TempGen { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct LabelId(pub u32); + +#[derive(Debug, Clone)] +pub struct LabelGen { + next: u32, +} + +impl LabelGen { + pub fn new() -> Self { + Self { next: 0 } + } + + pub fn fresh(&mut self) -> LabelId { + let label = LabelId(self.next); + self.next += 1; + label + } +} + +impl Default for LabelGen { + fn default() -> Self { + Self::new() + } +} From 2984e606d875699d25bf4e3f96ccf6b43dfb5f81 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Wed, 17 Jun 2026 11:00:20 -0300 Subject: [PATCH 13/91] feat(ir): define TAC data structures --- src/ir/tac.rs | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/src/ir/tac.rs b/src/ir/tac.rs index 2f31793..11b03bc 100644 --- a/src/ir/tac.rs +++ b/src/ir/tac.rs @@ -1,3 +1,5 @@ +use crate::common::ast::expr::{BinOp, UnOp}; + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct TempId(pub u32); @@ -49,3 +51,66 @@ impl Default for LabelGen { Self::new() } } + +#[derive(Debug, Clone, PartialEq)] +pub enum ConstValue { + Int(i64), + Double(f64), + Char(char), + String(String), +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Operand { + Temp(TempId), + Var(String), + Const(ConstValue), +} + +#[derive(Debug, Clone, PartialEq)] +pub enum TacInstr { + BinOp { + dst: TempId, + op: BinOp, + lhs: Operand, + rhs: Operand, + }, + UnOp { + dst: TempId, + op: UnOp, + src: Operand, + }, + Copy { + dst: TempId, + src: Operand, + }, + Jump { + label: LabelId, + }, + CondJump { + cond: Operand, + then_label: LabelId, + else_label: LabelId, + }, + Call { + dst: Option, + fn_name: String, + args: Vec, + }, + Return { + val: Option, + }, + Label(LabelId), +} + +#[derive(Debug, Clone, PartialEq)] +pub struct TacFunction { + pub name: String, + pub params: Vec, + pub instrs: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct TacProgram { + pub functions: Vec, +} From 7ee509b10c76588747197c7e7d264706cd149163 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Wed, 17 Jun 2026 11:01:06 -0300 Subject: [PATCH 14/91] feat(ir): format TAC instructions --- src/ir/tac.rs | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/src/ir/tac.rs b/src/ir/tac.rs index 11b03bc..4a90bdd 100644 --- a/src/ir/tac.rs +++ b/src/ir/tac.rs @@ -1,3 +1,5 @@ +use std::fmt; + use crate::common::ast::expr::{BinOp, UnOp}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -114,3 +116,115 @@ pub struct TacFunction { pub struct TacProgram { pub functions: Vec, } + +impl fmt::Display for TempId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "t{}", self.0) + } +} + +impl fmt::Display for LabelId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "L{}", self.0) + } +} + +impl fmt::Display for ConstValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ConstValue::Int(value) => write!(f, "{value}"), + ConstValue::Double(value) => write!(f, "{value}"), + ConstValue::Char(value) => write!(f, "{value:?}"), + ConstValue::String(value) => write!(f, "{value:?}"), + } + } +} + +impl fmt::Display for Operand { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Operand::Temp(temp) => write!(f, "{temp}"), + Operand::Var(name) => write!(f, "{name}"), + Operand::Const(value) => write!(f, "{value}"), + } + } +} + +impl fmt::Display for TacInstr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TacInstr::BinOp { dst, op, lhs, rhs } => { + write!(f, "{dst} = {lhs} {} {rhs}", bin_op_symbol(op)) + } + TacInstr::UnOp { dst, op, src } => { + write!(f, "{dst} = {}{src}", un_op_symbol(op)) + } + TacInstr::Copy { dst, src } => write!(f, "{dst} = {src}"), + TacInstr::Jump { label } => write!(f, "goto {label}"), + TacInstr::CondJump { + cond, + then_label, + else_label, + } => write!(f, "if {cond} goto {then_label} else goto {else_label}"), + TacInstr::Call { + dst, + fn_name, + args, + } => { + if let Some(dst) = dst { + write!(f, "{dst} = ")?; + } + + write!(f, "call {fn_name}(")?; + for (index, arg) in args.iter().enumerate() { + if index > 0 { + write!(f, ", ")?; + } + write!(f, "{arg}")?; + } + write!(f, ")") + } + TacInstr::Return { val } => { + if let Some(val) = val { + write!(f, "return {val}") + } else { + write!(f, "return") + } + } + TacInstr::Label(label) => write!(f, "{label}:"), + } + } +} + +fn bin_op_symbol(op: &BinOp) -> &'static str { + match op { + BinOp::Add => "+", + BinOp::Sub => "-", + BinOp::Mul => "*", + BinOp::Div => "/", + BinOp::Mod => "%", + BinOp::Eq => "==", + BinOp::Neq => "!=", + BinOp::Less => "<", + BinOp::Greater => ">", + BinOp::Leq => "<=", + BinOp::Geq => ">=", + BinOp::And => "&&", + BinOp::Or => "||", + BinOp::BitAnd => "&", + BinOp::BitOr => "|", + BinOp::BitXor => "^", + BinOp::Shl => "<<", + BinOp::Shr => ">>", + } +} + +fn un_op_symbol(op: &UnOp) -> &'static str { + match op { + UnOp::Neg => "-", + UnOp::Not => "!", + UnOp::BitNot => "~", + UnOp::Deref => "*", + UnOp::AddrOf => "&", + } +} From 934fee77628e408fd854c17871a7c0446b88e45a Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Wed, 17 Jun 2026 11:01:30 -0300 Subject: [PATCH 15/91] test(ir): cover TAC primitives --- src/ir/tac.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/ir/tac.rs b/src/ir/tac.rs index 4a90bdd..ec0b206 100644 --- a/src/ir/tac.rs +++ b/src/ir/tac.rs @@ -228,3 +228,36 @@ fn un_op_symbol(op: &UnOp) -> &'static str { UnOp::AddrOf => "&", } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn tac_instr_display_binop() { + let instr = TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Add, + lhs: Operand::Temp(TempId(1)), + rhs: Operand::Temp(TempId(2)), + }; + + assert_eq!(instr.to_string(), "t0 = t1 + t2"); + } + + #[test] + fn temp_gen_increments() { + let mut gen = TempGen::new(); + + assert_eq!(gen.fresh(), TempId(0)); + assert_eq!(gen.fresh(), TempId(1)); + } + + #[test] + fn label_gen_unique() { + let mut gen = LabelGen::new(); + + assert_eq!(gen.fresh(), LabelId(0)); + assert_eq!(gen.fresh(), LabelId(1)); + } +} From a7c4a49a6a522f9f04a536db8b62277d995664fd Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Wed, 17 Jun 2026 11:10:35 -0300 Subject: [PATCH 16/91] style(ir): format TAC code --- src/ir/tac.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/ir/tac.rs b/src/ir/tac.rs index ec0b206..af61770 100644 --- a/src/ir/tac.rs +++ b/src/ir/tac.rs @@ -166,11 +166,7 @@ impl fmt::Display for TacInstr { then_label, else_label, } => write!(f, "if {cond} goto {then_label} else goto {else_label}"), - TacInstr::Call { - dst, - fn_name, - args, - } => { + TacInstr::Call { dst, fn_name, args } => { if let Some(dst) = dst { write!(f, "{dst} = ")?; } From d0b04da7d94e1df95998d234c4b48d84b38a9a09 Mon Sep 17 00:00:00 2001 From: Bappoz Date: Wed, 17 Jun 2026 13:48:57 -0300 Subject: [PATCH 17/91] feat(ir): build CFG from TAC Adds src/ir/cfg.rs implementing leader-based basic block partitioning and successor/predecessor wiring from a TacFunction, closes #25. --- src/ir/cfg.rs | 289 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/ir/mod.rs | 1 + 2 files changed, 290 insertions(+) create mode 100644 src/ir/cfg.rs diff --git a/src/ir/cfg.rs b/src/ir/cfg.rs new file mode 100644 index 0000000..c6572ca --- /dev/null +++ b/src/ir/cfg.rs @@ -0,0 +1,289 @@ +use std::collections::{HashMap, HashSet}; +use std::fmt; + +use crate::ir::tac::{LabelId, TacFunction, TacInstr}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct BlockId(pub u32); + +impl fmt::Display for BlockId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "B{}", self.0) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct BasicBlock { + pub id: BlockId, + pub instrs: Vec, + pub succs: Vec, + pub preds: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Cfg { + pub blocks: Vec, + pub entry: BlockId, + pub exit: BlockId, +} + +impl Cfg { + pub fn predecessors(&self, id: BlockId) -> &[BlockId] { + &self.blocks[id.0 as usize].preds + } + + pub fn successors(&self, id: BlockId) -> &[BlockId] { + &self.blocks[id.0 as usize].succs + } +} + +impl fmt::Display for Cfg { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for block in &self.blocks { + writeln!(f, "{}:", block.id)?; + for instr in &block.instrs { + writeln!(f, " {instr}")?; + } + let succs = block + .succs + .iter() + .map(BlockId::to_string) + .collect::>() + .join(", "); + writeln!(f, " succs: [{succs}]")?; + } + Ok(()) + } +} + +/// Indices in `instrs` that start a new basic block: the function start, +/// label targets, and the instruction right after a Jump/CondJump. +pub fn identify_leaders(instrs: &[TacInstr]) -> HashSet { + let mut leaders = HashSet::new(); + if instrs.is_empty() { + return leaders; + } + + leaders.insert(0); + + for (index, instr) in instrs.iter().enumerate() { + match instr { + TacInstr::Jump { .. } | TacInstr::CondJump { .. } => { + if index + 1 < instrs.len() { + leaders.insert(index + 1); + } + } + TacInstr::Label(_) => { + leaders.insert(index); + } + _ => {} + } + } + + leaders +} + +pub fn build_cfg(func: &TacFunction) -> Cfg { + let mut leaders: Vec = identify_leaders(&func.instrs).into_iter().collect(); + leaders.sort_unstable(); + + if leaders.is_empty() { + let entry = BlockId(0); + let block = BasicBlock { + id: entry, + instrs: Vec::new(), + succs: Vec::new(), + preds: Vec::new(), + }; + return Cfg { + blocks: vec![block], + entry, + exit: entry, + }; + } + + let mut blocks = Vec::with_capacity(leaders.len()); + let mut label_to_block: HashMap = HashMap::new(); + + for (block_index, &start) in leaders.iter().enumerate() { + let end = leaders + .get(block_index + 1) + .copied() + .unwrap_or(func.instrs.len()); + let instrs = func.instrs[start..end].to_vec(); + let id = BlockId(block_index as u32); + + if let Some(TacInstr::Label(label)) = instrs.first() { + label_to_block.insert(*label, id); + } + + blocks.push(BasicBlock { + id, + instrs, + succs: Vec::new(), + preds: Vec::new(), + }); + } + + for block_index in 0..blocks.len() { + let succs = match blocks[block_index].instrs.last() { + Some(TacInstr::Jump { label }) => vec![label_to_block[label]], + Some(TacInstr::CondJump { + then_label, + else_label, + .. + }) => vec![label_to_block[then_label], label_to_block[else_label]], + Some(TacInstr::Return { .. }) => Vec::new(), + _ => { + if block_index + 1 < blocks.len() { + vec![BlockId((block_index + 1) as u32)] + } else { + Vec::new() + } + } + }; + blocks[block_index].succs = succs; + } + + for block_index in 0..blocks.len() { + let id = blocks[block_index].id; + for succ in blocks[block_index].succs.clone() { + blocks[succ.0 as usize].preds.push(id); + } + } + + let entry = BlockId(0); + let exit = blocks + .iter() + .rev() + .find(|block| block.succs.is_empty()) + .map(|block| block.id) + .unwrap_or_else(|| blocks.last().unwrap().id); + + Cfg { + blocks, + entry, + exit, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ir::tac::{ConstValue, Operand, TempId}; + + fn func(instrs: Vec) -> TacFunction { + TacFunction { + name: "test".to_string(), + params: Vec::new(), + instrs, + } + } + + #[test] + fn straight_line_code_is_one_block() { + let f = func(vec![ + TacInstr::Copy { + dst: TempId(0), + src: Operand::Const(ConstValue::Int(1)), + }, + TacInstr::Copy { + dst: TempId(1), + src: Operand::Const(ConstValue::Int(2)), + }, + TacInstr::Return { + val: Some(Operand::Temp(TempId(1))), + }, + ]); + + let cfg = build_cfg(&f); + + assert_eq!(cfg.blocks.len(), 1); + } + + #[test] + fn if_else_produces_three_blocks() { + let cond = Operand::Const(ConstValue::Int(1)); + let then_label = LabelId(0); + let else_label = LabelId(1); + let merge_label = LabelId(2); + + let f = func(vec![ + TacInstr::CondJump { + cond, + then_label, + else_label, + }, + TacInstr::Label(then_label), + TacInstr::Copy { + dst: TempId(0), + src: Operand::Const(ConstValue::Int(1)), + }, + TacInstr::Jump { label: merge_label }, + TacInstr::Label(else_label), + TacInstr::Copy { + dst: TempId(0), + src: Operand::Const(ConstValue::Int(2)), + }, + TacInstr::Label(merge_label), + TacInstr::Return { + val: Some(Operand::Temp(TempId(0))), + }, + ]); + + let cfg = build_cfg(&f); + + assert!(cfg.blocks.len() >= 3); + } + + #[test] + fn while_loop_has_back_edge() { + let cond_label = LabelId(0); + let body_label = LabelId(1); + let exit_label = LabelId(2); + let cond = Operand::Const(ConstValue::Int(1)); + + let f = func(vec![ + TacInstr::Label(cond_label), + TacInstr::CondJump { + cond, + then_label: body_label, + else_label: exit_label, + }, + TacInstr::Label(body_label), + TacInstr::Copy { + dst: TempId(0), + src: Operand::Const(ConstValue::Int(1)), + }, + TacInstr::Jump { label: cond_label }, + TacInstr::Label(exit_label), + TacInstr::Return { val: None }, + ]); + + let cfg = build_cfg(&f); + + let cond_block = cfg + .blocks + .iter() + .find(|b| matches!(b.instrs.first(), Some(TacInstr::Label(l)) if *l == cond_label)) + .unwrap() + .id; + let body_block = cfg + .blocks + .iter() + .find(|b| matches!(b.instrs.first(), Some(TacInstr::Label(l)) if *l == body_label)) + .unwrap() + .id; + + assert!(cfg.successors(body_block).contains(&cond_block)); + } + + #[test] + fn cfg_entry_has_no_predecessors() { + let f = func(vec![TacInstr::Return { val: None }]); + + let cfg = build_cfg(&f); + + assert!(cfg.predecessors(cfg.entry).is_empty()); + } +} diff --git a/src/ir/mod.rs b/src/ir/mod.rs index d58e690..fc0b868 100644 --- a/src/ir/mod.rs +++ b/src/ir/mod.rs @@ -1 +1,2 @@ +pub mod cfg; pub mod tac; From e22cc6365db723312408b4a2f33dccab15d05f93 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Thu, 18 Jun 2026 23:53:11 -0300 Subject: [PATCH 18/91] refactor(ir): allow copy destinations to be operands --- src/ir/tac.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ir/tac.rs b/src/ir/tac.rs index af61770..459f4b0 100644 --- a/src/ir/tac.rs +++ b/src/ir/tac.rs @@ -83,7 +83,7 @@ pub enum TacInstr { src: Operand, }, Copy { - dst: TempId, + dst: Operand, src: Operand, }, Jump { From dea20f5ee51edd0b00399a10137ef540f06b5792 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Thu, 18 Jun 2026 23:54:26 -0300 Subject: [PATCH 19/91] test(ir): update cfg copy destinations --- src/ir/cfg.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ir/cfg.rs b/src/ir/cfg.rs index c6572ca..48fa4b0 100644 --- a/src/ir/cfg.rs +++ b/src/ir/cfg.rs @@ -184,11 +184,11 @@ mod tests { fn straight_line_code_is_one_block() { let f = func(vec![ TacInstr::Copy { - dst: TempId(0), + dst: Operand::Temp(TempId(0)), src: Operand::Const(ConstValue::Int(1)), }, TacInstr::Copy { - dst: TempId(1), + dst: Operand::Temp(TempId(1)), src: Operand::Const(ConstValue::Int(2)), }, TacInstr::Return { @@ -216,13 +216,13 @@ mod tests { }, TacInstr::Label(then_label), TacInstr::Copy { - dst: TempId(0), + dst: Operand::Temp(TempId(0)), src: Operand::Const(ConstValue::Int(1)), }, TacInstr::Jump { label: merge_label }, TacInstr::Label(else_label), TacInstr::Copy { - dst: TempId(0), + dst: Operand::Temp(TempId(0)), src: Operand::Const(ConstValue::Int(2)), }, TacInstr::Label(merge_label), @@ -252,7 +252,7 @@ mod tests { }, TacInstr::Label(body_label), TacInstr::Copy { - dst: TempId(0), + dst: Operand::Temp(TempId(0)), src: Operand::Const(ConstValue::Int(1)), }, TacInstr::Jump { label: cond_label }, From 8be4749fb65703bf0a93916445f70607b044efa0 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Thu, 18 Jun 2026 23:55:00 -0300 Subject: [PATCH 20/91] feat(ir): add lowerer module skeleton --- src/ir/lower.rs | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ src/ir/mod.rs | 1 + 2 files changed, 54 insertions(+) create mode 100644 src/ir/lower.rs diff --git a/src/ir/lower.rs b/src/ir/lower.rs new file mode 100644 index 0000000..c0f8972 --- /dev/null +++ b/src/ir/lower.rs @@ -0,0 +1,53 @@ +use crate::common::ast::{ + expr::{Expr, Literal}, + stmt::Stmt, +}; +use crate::ir::tac::{ConstValue, LabelGen, Operand, TacInstr, TempGen}; + +#[derive(Debug, Clone)] +pub struct Lowerer { + temps: TempGen, + labels: LabelGen, + instrs: Vec, +} + +impl Lowerer { + pub fn new() -> Self { + Self { + temps: TempGen::new(), + labels: LabelGen::new(), + instrs: Vec::new(), + } + } + + pub fn lower_expr(&mut self, expr: &Expr) -> Operand { + match expr { + Expr::Literal(value, _) => Operand::Const(lower_literal(value)), + Expr::Ident(name, _) => Operand::Var(name.clone()), + _ => panic!("lowering ainda nao suporta essa expressao"), + } + } + + pub fn lower_stmt(&mut self, _stmt: &Stmt) { + panic!("lowering ainda nao suporta esse statement"); + } + + pub fn finish(self) -> Vec { + self.instrs + } +} + +impl Default for Lowerer { + fn default() -> Self { + Self::new() + } +} + +fn lower_literal(value: &Literal) -> ConstValue { + match value { + Literal::Int(value) => ConstValue::Int(*value), + Literal::Double(value) => ConstValue::Double(*value), + Literal::Char(value) => ConstValue::Char(*value), + Literal::String(value) => ConstValue::String(value.clone()), + } +} diff --git a/src/ir/mod.rs b/src/ir/mod.rs index fc0b868..785d1b9 100644 --- a/src/ir/mod.rs +++ b/src/ir/mod.rs @@ -1,2 +1,3 @@ pub mod cfg; +pub mod lower; pub mod tac; From b69a8b48bc91dbf84752a360b4ea97c3051d8bd5 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Fri, 19 Jun 2026 00:00:00 -0300 Subject: [PATCH 21/91] feat(ir): lower binary expressions --- src/ir/lower.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index c0f8972..cfb474e 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -2,7 +2,7 @@ use crate::common::ast::{ expr::{Expr, Literal}, stmt::Stmt, }; -use crate::ir::tac::{ConstValue, LabelGen, Operand, TacInstr, TempGen}; +use crate::ir::tac::{ConstValue, LabelGen, Operand, TacInstr, TempGen, TempId}; #[derive(Debug, Clone)] pub struct Lowerer { @@ -24,6 +24,18 @@ impl Lowerer { match expr { Expr::Literal(value, _) => Operand::Const(lower_literal(value)), Expr::Ident(name, _) => Operand::Var(name.clone()), + Expr::Binary(lhs, op, rhs, _) => { + let lhs = self.lower_expr(lhs); + let rhs = self.lower_expr(rhs); + let dst = self.fresh_temp(); + self.instrs.push(TacInstr::BinOp { + dst, + op: op.clone(), + lhs, + rhs, + }); + Operand::Temp(dst) + } _ => panic!("lowering ainda nao suporta essa expressao"), } } @@ -35,6 +47,10 @@ impl Lowerer { pub fn finish(self) -> Vec { self.instrs } + + fn fresh_temp(&mut self) -> TempId { + self.temps.fresh() + } } impl Default for Lowerer { From 09ca47fdf7adfa9a9f3e6326660d1f9070c8c147 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Fri, 19 Jun 2026 00:00:14 -0300 Subject: [PATCH 22/91] feat(ir): lower unary expressions --- src/ir/lower.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index cfb474e..66edc99 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -36,6 +36,16 @@ impl Lowerer { }); Operand::Temp(dst) } + Expr::Unary(op, src, _) => { + let src = self.lower_expr(src); + let dst = self.fresh_temp(); + self.instrs.push(TacInstr::UnOp { + dst, + op: op.clone(), + src, + }); + Operand::Temp(dst) + } _ => panic!("lowering ainda nao suporta essa expressao"), } } From 626362c7f1e1b0a5b856e23d4bb7a4fc1fe8739f Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Fri, 19 Jun 2026 00:00:35 -0300 Subject: [PATCH 23/91] feat(ir): lower calls and casts --- src/ir/lower.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index 66edc99..77b4b34 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -46,6 +46,21 @@ impl Lowerer { }); Operand::Temp(dst) } + Expr::Call(callee, args, _) => { + let fn_name = match callee.as_ref() { + Expr::Ident(name, _) => name.clone(), + _ => panic!("lowering ainda nao suporta chamada por expressao"), + }; + let args = args.iter().map(|arg| self.lower_expr(arg)).collect(); + let dst = self.fresh_temp(); + self.instrs.push(TacInstr::Call { + dst: Some(dst), + fn_name, + args, + }); + Operand::Temp(dst) + } + Expr::Cast(_, inner, _) => self.lower_expr(inner), _ => panic!("lowering ainda nao suporta essa expressao"), } } From 40a904c4ce5170a284b2f34e508055adc3721423 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Fri, 19 Jun 2026 00:01:29 -0300 Subject: [PATCH 24/91] feat(ir): lower simple assignments --- src/ir/lower.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index 77b4b34..0b91051 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -61,6 +61,12 @@ impl Lowerer { Operand::Temp(dst) } Expr::Cast(_, inner, _) => self.lower_expr(inner), + Expr::Assign(lhs, rhs, _) => { + let src = self.lower_expr(rhs); + let dst = self.lower_assignment_target(lhs); + self.emit_copy(dst.clone(), src); + dst + } _ => panic!("lowering ainda nao suporta essa expressao"), } } @@ -76,6 +82,20 @@ impl Lowerer { fn fresh_temp(&mut self) -> TempId { self.temps.fresh() } + + fn lower_assignment_target(&mut self, expr: &Expr) -> Operand { + match expr { + Expr::Ident(name, _) => Operand::Var(name.clone()), + _ => panic!("lowering ainda nao suporta esse destino de atribuicao"), + } + } + + fn emit_copy(&mut self, dst: Operand, src: Operand) { + match dst { + Operand::Temp(_) | Operand::Var(_) => self.instrs.push(TacInstr::Copy { dst, src }), + Operand::Const(_) => panic!("constante nao pode ser destino de copia"), + } + } } impl Default for Lowerer { From afcbaa5f184b6f2a2fdfb71d83b8ce9be5841329 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Fri, 19 Jun 2026 00:02:14 -0300 Subject: [PATCH 25/91] feat(ir): lower linear statements --- src/ir/lower.rs | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index 0b91051..3ad8386 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -71,8 +71,28 @@ impl Lowerer { } } - pub fn lower_stmt(&mut self, _stmt: &Stmt) { - panic!("lowering ainda nao suporta esse statement"); + pub fn lower_stmt(&mut self, stmt: &Stmt) { + match stmt { + Stmt::Block(stmts, _) => { + for stmt in stmts { + self.lower_stmt(stmt); + } + } + Stmt::ExprStmt(expr, _) => { + self.lower_expr(expr); + } + Stmt::Return(expr, _) => { + let val = expr.as_ref().map(|expr| self.lower_expr(expr)); + self.instrs.push(TacInstr::Return { val }); + } + Stmt::VarDecl(_, name, init, _) => { + if let Some(init) = init { + let src = self.lower_expr(init); + self.emit_copy(Operand::Var(name.clone()), src); + } + } + _ => panic!("lowering ainda nao suporta esse statement"), + } } pub fn finish(self) -> Vec { From 21abbdf29848034a0abc62bbf1a528ca163fbde7 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Fri, 19 Jun 2026 00:02:38 -0300 Subject: [PATCH 26/91] feat(ir): lower if statements --- src/ir/lower.rs | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index 3ad8386..a74d6d8 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -2,7 +2,7 @@ use crate::common::ast::{ expr::{Expr, Literal}, stmt::Stmt, }; -use crate::ir::tac::{ConstValue, LabelGen, Operand, TacInstr, TempGen, TempId}; +use crate::ir::tac::{ConstValue, LabelGen, LabelId, Operand, TacInstr, TempGen, TempId}; #[derive(Debug, Clone)] pub struct Lowerer { @@ -78,6 +78,28 @@ impl Lowerer { self.lower_stmt(stmt); } } + Stmt::If(cond, then_branch, else_branch, _) => { + let cond = self.lower_expr(cond); + let then_label = self.labels.fresh(); + let else_label = self.labels.fresh(); + let end_label = self.labels.fresh(); + + self.instrs.push(TacInstr::CondJump { + cond, + then_label, + else_label, + }); + + self.instrs.push(TacInstr::Label(then_label)); + self.lower_stmt(then_branch); + self.emit_jump_unless_terminated(end_label); + + self.instrs.push(TacInstr::Label(else_label)); + if let Some(else_branch) = else_branch { + self.lower_stmt(else_branch); + } + self.instrs.push(TacInstr::Label(end_label)); + } Stmt::ExprStmt(expr, _) => { self.lower_expr(expr); } @@ -116,6 +138,15 @@ impl Lowerer { Operand::Const(_) => panic!("constante nao pode ser destino de copia"), } } + + fn emit_jump_unless_terminated(&mut self, label: LabelId) { + if !matches!( + self.instrs.last(), + Some(TacInstr::Jump { .. } | TacInstr::Return { .. }) + ) { + self.instrs.push(TacInstr::Jump { label }); + } + } } impl Default for Lowerer { From 9cb1928333ca7fd6cdc9625e6e3c1f02f1711a10 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Fri, 19 Jun 2026 00:02:54 -0300 Subject: [PATCH 27/91] feat(ir): lower while loops --- src/ir/lower.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index a74d6d8..5604689 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -100,6 +100,25 @@ impl Lowerer { } self.instrs.push(TacInstr::Label(end_label)); } + Stmt::While(cond, body, _) => { + let cond_label = self.labels.fresh(); + let body_label = self.labels.fresh(); + let end_label = self.labels.fresh(); + + self.instrs.push(TacInstr::Label(cond_label)); + let cond = self.lower_expr(cond); + self.instrs.push(TacInstr::CondJump { + cond, + then_label: body_label, + else_label: end_label, + }); + + self.instrs.push(TacInstr::Label(body_label)); + self.lower_stmt(body); + self.emit_jump_unless_terminated(cond_label); + + self.instrs.push(TacInstr::Label(end_label)); + } Stmt::ExprStmt(expr, _) => { self.lower_expr(expr); } From ac0d097e265c9157d8a69e05036e342b64861b09 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Fri, 19 Jun 2026 00:03:12 -0300 Subject: [PATCH 28/91] feat(ir): expose function and program lowering --- src/ir/lower.rs | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index 5604689..e51cbaf 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -1,8 +1,12 @@ use crate::common::ast::{ + ast::Program, + decl::Decl, expr::{Expr, Literal}, stmt::Stmt, }; -use crate::ir::tac::{ConstValue, LabelGen, LabelId, Operand, TacInstr, TempGen, TempId}; +use crate::ir::tac::{ + ConstValue, LabelGen, LabelId, Operand, TacFunction, TacInstr, TacProgram, TempGen, TempId, +}; #[derive(Debug, Clone)] pub struct Lowerer { @@ -174,6 +178,35 @@ impl Default for Lowerer { } } +pub fn lower_function(decl: &Decl) -> TacFunction { + match decl { + Decl::Function(_, name, params, body, _) => { + let mut lowerer = Lowerer::new(); + for stmt in body { + lowerer.lower_stmt(stmt); + } + + TacFunction { + name: name.clone(), + params: params.iter().map(|(_, name)| name.clone()).collect(), + instrs: lowerer.finish(), + } + } + _ => panic!("lower_function espera Decl::Function"), + } +} + +pub fn lower_program(prog: &Program) -> TacProgram { + TacProgram { + functions: prog + .decls + .iter() + .filter(|decl| matches!(decl, Decl::Function(..))) + .map(lower_function) + .collect(), + } +} + fn lower_literal(value: &Literal) -> ConstValue { match value { Literal::Int(value) => ConstValue::Int(*value), From 823a40ef475d7595181338413e3968cf30aa15b6 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Fri, 19 Jun 2026 00:03:39 -0300 Subject: [PATCH 29/91] feat(ir): track loop control labels --- src/ir/lower.rs | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index e51cbaf..581cb45 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -15,6 +15,12 @@ pub struct Lowerer { instrs: Vec, } +#[derive(Debug, Clone, Copy, Default)] +struct ControlLabels { + break_label: Option, + continue_label: Option, +} + impl Lowerer { pub fn new() -> Self { Self { @@ -76,10 +82,14 @@ impl Lowerer { } pub fn lower_stmt(&mut self, stmt: &Stmt) { + self.lower_stmt_with_control(stmt, ControlLabels::default()); + } + + fn lower_stmt_with_control(&mut self, stmt: &Stmt, control: ControlLabels) { match stmt { Stmt::Block(stmts, _) => { for stmt in stmts { - self.lower_stmt(stmt); + self.lower_stmt_with_control(stmt, control); } } Stmt::If(cond, then_branch, else_branch, _) => { @@ -95,12 +105,12 @@ impl Lowerer { }); self.instrs.push(TacInstr::Label(then_label)); - self.lower_stmt(then_branch); + self.lower_stmt_with_control(then_branch, control); self.emit_jump_unless_terminated(end_label); self.instrs.push(TacInstr::Label(else_label)); if let Some(else_branch) = else_branch { - self.lower_stmt(else_branch); + self.lower_stmt_with_control(else_branch, control); } self.instrs.push(TacInstr::Label(end_label)); } @@ -118,11 +128,29 @@ impl Lowerer { }); self.instrs.push(TacInstr::Label(body_label)); - self.lower_stmt(body); + self.lower_stmt_with_control( + body, + ControlLabels { + break_label: Some(end_label), + continue_label: Some(cond_label), + }, + ); self.emit_jump_unless_terminated(cond_label); self.instrs.push(TacInstr::Label(end_label)); } + Stmt::Break(_) => { + let label = control + .break_label + .expect("break fora de loop/switch nao pode ser baixado"); + self.instrs.push(TacInstr::Jump { label }); + } + Stmt::Continue(_) => { + let label = control + .continue_label + .expect("continue fora de loop nao pode ser baixado"); + self.instrs.push(TacInstr::Jump { label }); + } Stmt::ExprStmt(expr, _) => { self.lower_expr(expr); } From 795aebea7c3b290b70648d29995b78240886ff39 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Fri, 19 Jun 2026 00:04:02 -0300 Subject: [PATCH 30/91] feat(ir): lower for and do while loops --- src/ir/lower.rs | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index 581cb45..7401009 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -139,6 +139,70 @@ impl Lowerer { self.instrs.push(TacInstr::Label(end_label)); } + Stmt::For(init, cond, inc, body, _) => { + if let Some(init) = init { + self.lower_stmt_with_control(init, control); + } + + let cond_label = self.labels.fresh(); + let body_label = self.labels.fresh(); + let inc_label = inc.as_ref().map(|_| self.labels.fresh()); + let end_label = self.labels.fresh(); + let continue_label = inc_label.unwrap_or(cond_label); + + self.instrs.push(TacInstr::Label(cond_label)); + if let Some(cond) = cond { + let cond = self.lower_expr(cond); + self.instrs.push(TacInstr::CondJump { + cond, + then_label: body_label, + else_label: end_label, + }); + } + + self.instrs.push(TacInstr::Label(body_label)); + self.lower_stmt_with_control( + body, + ControlLabels { + break_label: Some(end_label), + continue_label: Some(continue_label), + }, + ); + + if let Some(inc_label) = inc_label { + self.instrs.push(TacInstr::Label(inc_label)); + if let Some(inc) = inc { + self.lower_expr(inc); + } + } + self.emit_jump_unless_terminated(cond_label); + + self.instrs.push(TacInstr::Label(end_label)); + } + Stmt::DoWhile(cond, body, _) => { + let body_label = self.labels.fresh(); + let cond_label = self.labels.fresh(); + let end_label = self.labels.fresh(); + + self.instrs.push(TacInstr::Label(body_label)); + self.lower_stmt_with_control( + body, + ControlLabels { + break_label: Some(end_label), + continue_label: Some(cond_label), + }, + ); + + self.instrs.push(TacInstr::Label(cond_label)); + let cond = self.lower_expr(cond); + self.instrs.push(TacInstr::CondJump { + cond, + then_label: body_label, + else_label: end_label, + }); + + self.instrs.push(TacInstr::Label(end_label)); + } Stmt::Break(_) => { let label = control .break_label From 362997ee3b30789e0ee5ebddd7825cc622a85add Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Fri, 19 Jun 2026 00:04:40 -0300 Subject: [PATCH 31/91] feat(ir): lower compound expressions --- src/ir/lower.rs | 106 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 102 insertions(+), 4 deletions(-) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index 7401009..290017b 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -1,7 +1,7 @@ use crate::common::ast::{ - ast::Program, + ast::{Program, Type}, decl::Decl, - expr::{Expr, Literal}, + expr::{BinOp, Expr, Literal, PostfixOp, PrefixOp}, stmt::Stmt, }; use crate::ir::tac::{ @@ -56,6 +56,8 @@ impl Lowerer { }); Operand::Temp(dst) } + Expr::Prefix(op, target, _) => self.lower_prefix(op, target), + Expr::Postfix(op, target, _) => self.lower_postfix(op, target), Expr::Call(callee, args, _) => { let fn_name = match callee.as_ref() { Expr::Ident(name, _) => name.clone(), @@ -77,7 +79,48 @@ impl Lowerer { self.emit_copy(dst.clone(), src); dst } - _ => panic!("lowering ainda nao suporta essa expressao"), + Expr::CompoundAssign(op, lhs, rhs, _) => { + let dst = self.lower_assignment_target(lhs); + let rhs = self.lower_expr(rhs); + let temp = self.fresh_temp(); + self.instrs.push(TacInstr::BinOp { + dst: temp, + op: op.clone(), + lhs: dst.clone(), + rhs, + }); + self.emit_copy(dst.clone(), Operand::Temp(temp)); + dst + } + Expr::SizeofType(qty, _) => Operand::Const(ConstValue::Int(type_size(&qty.ty))), + Expr::Ternary(cond, then_expr, else_expr, _) => { + let cond = self.lower_expr(cond); + let then_label = self.labels.fresh(); + let else_label = self.labels.fresh(); + let end_label = self.labels.fresh(); + let dst = self.fresh_temp(); + + self.instrs.push(TacInstr::CondJump { + cond, + then_label, + else_label, + }); + + self.instrs.push(TacInstr::Label(then_label)); + let then_val = self.lower_expr(then_expr); + self.emit_copy(Operand::Temp(dst), then_val); + self.emit_jump_unless_terminated(end_label); + + self.instrs.push(TacInstr::Label(else_label)); + let else_val = self.lower_expr(else_expr); + self.emit_copy(Operand::Temp(dst), else_val); + + self.instrs.push(TacInstr::Label(end_label)); + Operand::Temp(dst) + } + Expr::Index(_, _, _) => panic!("lowering ainda nao suporta acesso por indice"), + Expr::Member(_, _, _, _) => panic!("lowering ainda nao suporta acesso a membro"), + Expr::Sizeof(_, _) => panic!("lowering de sizeof(expr) requer informacao de tipo"), } } @@ -228,7 +271,7 @@ impl Lowerer { self.emit_copy(Operand::Var(name.clone()), src); } } - _ => panic!("lowering ainda nao suporta esse statement"), + Stmt::Switch(_, _, _) => panic!("lowering ainda nao suporta switch"), } } @@ -240,6 +283,35 @@ impl Lowerer { self.temps.fresh() } + fn lower_prefix(&mut self, op: &PrefixOp, target: &Expr) -> Operand { + let dst = self.lower_assignment_target(target); + let temp = self.fresh_temp(); + self.instrs.push(TacInstr::BinOp { + dst: temp, + op: prefix_bin_op(op), + lhs: dst.clone(), + rhs: Operand::Const(ConstValue::Int(1)), + }); + self.emit_copy(dst.clone(), Operand::Temp(temp)); + dst + } + + fn lower_postfix(&mut self, op: &PostfixOp, target: &Expr) -> Operand { + let dst = self.lower_assignment_target(target); + let old = self.fresh_temp(); + self.emit_copy(Operand::Temp(old), dst.clone()); + + let new = self.fresh_temp(); + self.instrs.push(TacInstr::BinOp { + dst: new, + op: postfix_bin_op(op), + lhs: dst.clone(), + rhs: Operand::Const(ConstValue::Int(1)), + }); + self.emit_copy(dst, Operand::Temp(new)); + Operand::Temp(old) + } + fn lower_assignment_target(&mut self, expr: &Expr) -> Operand { match expr { Expr::Ident(name, _) => Operand::Var(name.clone()), @@ -307,3 +379,29 @@ fn lower_literal(value: &Literal) -> ConstValue { Literal::String(value) => ConstValue::String(value.clone()), } } + +fn prefix_bin_op(op: &PrefixOp) -> BinOp { + match op { + PrefixOp::Inc => BinOp::Add, + PrefixOp::Dec => BinOp::Sub, + } +} + +fn postfix_bin_op(op: &PostfixOp) -> BinOp { + match op { + PostfixOp::Inc => BinOp::Add, + PostfixOp::Dec => BinOp::Sub, + } +} + +fn type_size(ty: &Type) -> i64 { + match ty { + Type::Char => 1, + Type::Short => 2, + Type::Int | Type::Float | Type::Enum(_) => 4, + Type::Long | Type::Double | Type::Pointer(_) => 8, + Type::Array(_) | Type::Void | Type::Struct(_) | Type::Alias(_) | Type::Function(_, _) => { + panic!("lowering de sizeof(type) requer layout/tamanho completo") + } + } +} From a5192ac3a88da9bb1a37709ecacfa9304b760610 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Fri, 19 Jun 2026 00:05:25 -0300 Subject: [PATCH 32/91] test(ir): cover ast lowering --- src/ir/lower.rs | 205 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index 290017b..df18fbf 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -405,3 +405,208 @@ fn type_size(ty: &Type) -> i64 { } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::common::{ + ast::ast::{QualifierType, Type}, + errors::error_data::Span, + }; + + fn span() -> Span { + Span { + line: 1, + end_line: 1, + column_start: 1, + column_end: 1, + } + } + + fn int_ty() -> QualifierType { + QualifierType { + ty: Type::Int, + is_const: false, + is_unsigned: false, + } + } + + fn int(value: i64) -> Expr { + Expr::Literal(Literal::Int(value), span()) + } + + fn ident(name: &str) -> Expr { + Expr::Ident(name.to_string(), span()) + } + + #[test] + fn lower_binary_expr_produces_temp() { + let expr = Expr::Binary(Box::new(int(2)), BinOp::Add, Box::new(int(3)), span()); + let mut lowerer = Lowerer::new(); + + let result = lowerer.lower_expr(&expr); + + assert_eq!(result, Operand::Temp(TempId(0))); + assert_eq!( + lowerer.finish(), + vec![TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Add, + lhs: Operand::Const(ConstValue::Int(2)), + rhs: Operand::Const(ConstValue::Int(3)), + }] + ); + } + + #[test] + fn lower_if_else_produces_labels() { + let stmt = Stmt::If( + int(1), + Box::new(Stmt::VarDecl( + int_ty(), + "x".to_string(), + Some(int(2)), + span(), + )), + Some(Box::new(Stmt::VarDecl( + int_ty(), + "y".to_string(), + Some(int(3)), + span(), + ))), + span(), + ); + let mut lowerer = Lowerer::new(); + + lowerer.lower_stmt(&stmt); + let instrs = lowerer.finish(); + + assert!(matches!( + instrs[0], + TacInstr::CondJump { + then_label: LabelId(0), + else_label: LabelId(1), + .. + } + )); + assert_eq!(instrs[1], TacInstr::Label(LabelId(0))); + assert_eq!( + instrs[2], + TacInstr::Copy { + dst: Operand::Var("x".to_string()), + src: Operand::Const(ConstValue::Int(2)), + } + ); + assert_eq!(instrs[3], TacInstr::Jump { label: LabelId(2) }); + assert_eq!(instrs[4], TacInstr::Label(LabelId(1))); + assert_eq!( + instrs[5], + TacInstr::Copy { + dst: Operand::Var("y".to_string()), + src: Operand::Const(ConstValue::Int(3)), + } + ); + assert_eq!(instrs[6], TacInstr::Label(LabelId(2))); + } + + #[test] + fn lower_while_produces_backedge() { + let stmt = Stmt::While( + ident("keep_going"), + Box::new(Stmt::VarDecl( + int_ty(), + "x".to_string(), + Some(int(1)), + span(), + )), + span(), + ); + let mut lowerer = Lowerer::new(); + + lowerer.lower_stmt(&stmt); + let instrs = lowerer.finish(); + + assert_eq!(instrs[0], TacInstr::Label(LabelId(0))); + assert!(matches!( + instrs[1], + TacInstr::CondJump { + then_label: LabelId(1), + else_label: LabelId(2), + .. + } + )); + assert_eq!(instrs[2], TacInstr::Label(LabelId(1))); + assert_eq!(instrs[4], TacInstr::Jump { label: LabelId(0) }); + assert_eq!(instrs[5], TacInstr::Label(LabelId(2))); + } + + #[test] + fn lower_function_call_passes_args() { + let arg0 = Expr::Binary(Box::new(int(1)), BinOp::Add, Box::new(int(2)), span()); + let expr = Expr::Call(Box::new(ident("sum")), vec![arg0, int(3)], span()); + let mut lowerer = Lowerer::new(); + + let result = lowerer.lower_expr(&expr); + let instrs = lowerer.finish(); + + assert_eq!(result, Operand::Temp(TempId(1))); + assert_eq!( + instrs[1], + TacInstr::Call { + dst: Some(TempId(1)), + fn_name: "sum".to_string(), + args: vec![Operand::Temp(TempId(0)), Operand::Const(ConstValue::Int(3))], + } + ); + } + + #[test] + fn lower_nested_expr_correct_temp_order() { + let rhs = Expr::Binary(Box::new(int(3)), BinOp::Mul, Box::new(int(4)), span()); + let expr = Expr::Binary(Box::new(int(2)), BinOp::Add, Box::new(rhs), span()); + let mut lowerer = Lowerer::new(); + + let result = lowerer.lower_expr(&expr); + + assert_eq!(result, Operand::Temp(TempId(1))); + assert_eq!( + lowerer.finish(), + vec![ + TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Mul, + lhs: Operand::Const(ConstValue::Int(3)), + rhs: Operand::Const(ConstValue::Int(4)), + }, + TacInstr::BinOp { + dst: TempId(1), + op: BinOp::Add, + lhs: Operand::Const(ConstValue::Int(2)), + rhs: Operand::Temp(TempId(0)), + }, + ] + ); + } + + #[test] + fn lower_function_keeps_name_params_and_body() { + let decl = Decl::Function( + int_ty(), + "main".to_string(), + vec![(int_ty(), "argc".to_string())], + vec![Stmt::Return(Some(ident("argc")), span())], + span(), + ); + + let func = lower_function(&decl); + + assert_eq!(func.name, "main"); + assert_eq!(func.params, vec!["argc"]); + assert_eq!( + func.instrs, + vec![TacInstr::Return { + val: Some(Operand::Var("argc".to_string())) + }] + ); + } +} From 8f8cd7458bc8ee97330c32b971aa570ffcade3f6 Mon Sep 17 00:00:00 2001 From: matheuslemesam Date: Fri, 19 Jun 2026 08:06:41 -0300 Subject: [PATCH 33/91] =?UTF-8?q?test(integration):=20adicionar=20su=C3=AD?= =?UTF-8?q?te=20de=20testes=20end-to-end=20com=20programas=20C=20reais?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cria tests/integration_test.rs com helper compile_file() que executa o pipeline completo (lex -> parse -> semantic) e 15 testes: - 8 programas validos (zero diagnosticos): hello_world, arithmetic, fibonacci, structs, pointers, typedef, enum_usage, control_flow - 7 programas invalidos com diagnostico esperado: undeclared_var, type_mismatch_assign, assign_const, redeclaration, return_in_void, arity_mismatch, call_non_function Os testes sao coletados automaticamente por cargo test --all no CI. --- tests/integration/invalid/arity_mismatch.c | 7 + tests/integration/invalid/assign_const.c | 5 + tests/integration/invalid/call_non_function.c | 8 + tests/integration/invalid/redeclaration.c | 5 + tests/integration/invalid/return_in_void.c | 7 + .../invalid/type_mismatch_assign.c | 5 + tests/integration/invalid/undeclared_var.c | 4 + tests/integration/valid/arithmetic.c | 10 ++ tests/integration/valid/control_flow.c | 30 ++++ tests/integration/valid/enum_usage.c | 13 ++ tests/integration/valid/fibonacci.c | 10 ++ tests/integration/valid/hello_world.c | 3 + tests/integration/valid/pointers.c | 10 ++ tests/integration/valid/structs.c | 13 ++ tests/integration/valid/typedef.c | 16 ++ tests/integration_test.rs | 160 ++++++++++++++++++ 16 files changed, 306 insertions(+) create mode 100644 tests/integration/invalid/arity_mismatch.c create mode 100644 tests/integration/invalid/assign_const.c create mode 100644 tests/integration/invalid/call_non_function.c create mode 100644 tests/integration/invalid/redeclaration.c create mode 100644 tests/integration/invalid/return_in_void.c create mode 100644 tests/integration/invalid/type_mismatch_assign.c create mode 100644 tests/integration/invalid/undeclared_var.c create mode 100644 tests/integration/valid/arithmetic.c create mode 100644 tests/integration/valid/control_flow.c create mode 100644 tests/integration/valid/enum_usage.c create mode 100644 tests/integration/valid/fibonacci.c create mode 100644 tests/integration/valid/hello_world.c create mode 100644 tests/integration/valid/pointers.c create mode 100644 tests/integration/valid/structs.c create mode 100644 tests/integration/valid/typedef.c create mode 100644 tests/integration_test.rs diff --git a/tests/integration/invalid/arity_mismatch.c b/tests/integration/invalid/arity_mismatch.c new file mode 100644 index 0000000..039db0f --- /dev/null +++ b/tests/integration/invalid/arity_mismatch.c @@ -0,0 +1,7 @@ +int add(int a, int b) { + return a + b; +} + +int main(void) { + return add(1); +} diff --git a/tests/integration/invalid/assign_const.c b/tests/integration/invalid/assign_const.c new file mode 100644 index 0000000..f1b4f32 --- /dev/null +++ b/tests/integration/invalid/assign_const.c @@ -0,0 +1,5 @@ +int main(void) { + const int PI = 3; + PI = 4; + return PI; +} diff --git a/tests/integration/invalid/call_non_function.c b/tests/integration/invalid/call_non_function.c new file mode 100644 index 0000000..9dbfaae --- /dev/null +++ b/tests/integration/invalid/call_non_function.c @@ -0,0 +1,8 @@ +void f(void) { + int x = 1; + x(); +} + +int main(void) { + return 0; +} diff --git a/tests/integration/invalid/redeclaration.c b/tests/integration/invalid/redeclaration.c new file mode 100644 index 0000000..4d177ea --- /dev/null +++ b/tests/integration/invalid/redeclaration.c @@ -0,0 +1,5 @@ +int main(void) { + int x = 1; + int x = 2; + return x; +} diff --git a/tests/integration/invalid/return_in_void.c b/tests/integration/invalid/return_in_void.c new file mode 100644 index 0000000..6294c3b --- /dev/null +++ b/tests/integration/invalid/return_in_void.c @@ -0,0 +1,7 @@ +void f(void) { + return 1; +} + +int main(void) { + return 0; +} diff --git a/tests/integration/invalid/type_mismatch_assign.c b/tests/integration/invalid/type_mismatch_assign.c new file mode 100644 index 0000000..dcdda6c --- /dev/null +++ b/tests/integration/invalid/type_mismatch_assign.c @@ -0,0 +1,5 @@ +int main(void) { + int x; + x = "hello"; + return x; +} diff --git a/tests/integration/invalid/undeclared_var.c b/tests/integration/invalid/undeclared_var.c new file mode 100644 index 0000000..d1a1e3c --- /dev/null +++ b/tests/integration/invalid/undeclared_var.c @@ -0,0 +1,4 @@ +int main(void) { + int y = x; + return y; +} diff --git a/tests/integration/valid/arithmetic.c b/tests/integration/valid/arithmetic.c new file mode 100644 index 0000000..6203a16 --- /dev/null +++ b/tests/integration/valid/arithmetic.c @@ -0,0 +1,10 @@ +int main(void) { + int a = 10; + int b = 3; + int sum = a + b; + int diff = a - b; + int prod = a * b; + int quot = a / b; + int rem = a % b; + return sum + diff + prod + quot + rem; +} diff --git a/tests/integration/valid/control_flow.c b/tests/integration/valid/control_flow.c new file mode 100644 index 0000000..35453bf --- /dev/null +++ b/tests/integration/valid/control_flow.c @@ -0,0 +1,30 @@ +int classify(int n) { + int result = 0; + switch (n) { + case 1: + result = 1; + break; + case 2: + result = 2; + break; + default: + result = -1; + break; + } + return result; +} + +int main(void) { + int total = 0; + for (int i = 0; i < 3; i = i + 1) { + total = total + classify(i); + } + while (total > 0) { + total = total - 1; + } + if (total == 0) { + return 0; + } else { + return 1; + } +} diff --git a/tests/integration/valid/enum_usage.c b/tests/integration/valid/enum_usage.c new file mode 100644 index 0000000..63a4801 --- /dev/null +++ b/tests/integration/valid/enum_usage.c @@ -0,0 +1,13 @@ +enum Color { + RED, + GREEN, + BLUE +}; + +int main(void) { + int c = GREEN; + if (c == RED) { + return c; + } + return BLUE; +} diff --git a/tests/integration/valid/fibonacci.c b/tests/integration/valid/fibonacci.c new file mode 100644 index 0000000..d98f31b --- /dev/null +++ b/tests/integration/valid/fibonacci.c @@ -0,0 +1,10 @@ +int fib(int n) { + if (n <= 1) { + return n; + } + return fib(n - 1) + fib(n - 2); +} + +int main(void) { + return fib(10); +} diff --git a/tests/integration/valid/hello_world.c b/tests/integration/valid/hello_world.c new file mode 100644 index 0000000..9b6bdc2 --- /dev/null +++ b/tests/integration/valid/hello_world.c @@ -0,0 +1,3 @@ +int main(void) { + return 0; +} diff --git a/tests/integration/valid/pointers.c b/tests/integration/valid/pointers.c new file mode 100644 index 0000000..413aa19 --- /dev/null +++ b/tests/integration/valid/pointers.c @@ -0,0 +1,10 @@ +int main(void) { + int x = 5; + int *p = &x; + int *q = p; + p[0] = x + 1; + int offset = 1; + int *r = q + offset; + int v = r[0]; + return v + p[0] + offset; +} diff --git a/tests/integration/valid/structs.c b/tests/integration/valid/structs.c new file mode 100644 index 0000000..faa87d7 --- /dev/null +++ b/tests/integration/valid/structs.c @@ -0,0 +1,13 @@ +struct Point { + int x; + int y; +}; + +struct Point origin; + +int main(void) { + origin.x = 0; + origin.y = 0; + origin.x = origin.x + 1; + return origin.x + origin.y; +} diff --git a/tests/integration/valid/typedef.c b/tests/integration/valid/typedef.c new file mode 100644 index 0000000..d32310d --- /dev/null +++ b/tests/integration/valid/typedef.c @@ -0,0 +1,16 @@ +typedef int MyInt; + +struct Point { + int x; + int y; +}; + +typedef struct Point Point; + +Point origin; + +int main(void) { + MyInt a = 5; + origin.x = a; + return origin.x + a; +} diff --git a/tests/integration_test.rs b/tests/integration_test.rs new file mode 100644 index 0000000..d5e86b1 --- /dev/null +++ b/tests/integration_test.rs @@ -0,0 +1,160 @@ +use std::path::PathBuf; + +use crusty::analyser::analyse; +use crusty::common::errors::types::{CompilerError, Diagnostic, SemanticErrorKind}; +use crusty::common::input::source::SourceFile; +use crusty::lexer::scanner::Scanner; +use crusty::parser::Parser; + +struct CompileResult { + diagnostics: Vec, +} + +fn compile_file(rel: &str) -> CompileResult { + let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(rel); + let src = SourceFile::from_path(path) + .unwrap_or_else(|e| panic!("failed to read fixture '{rel}': {:?}", e.to_report())); + + let mut scanner = Scanner::new(src); + scanner.scan(); + let lex_diags = std::mem::take(&mut scanner.diagnostics); + let tokens = std::mem::take(&mut scanner.tokens); + + let mut diagnostics: Vec = lex_diags.into_iter().map(Diagnostic::Error).collect(); + + let mut parser = Parser::new(tokens); + match parser.parse_program() { + Ok(program) => diagnostics.extend(analyse(&program)), + Err(parse_errs) => diagnostics.extend(parse_errs.into_iter().map(Diagnostic::Error)), + } + + CompileResult { diagnostics } +} + +fn has_semantic_error(diags: &[Diagnostic], pred: impl Fn(&SemanticErrorKind) -> bool) -> bool { + diags.iter().any(|d| match d { + Diagnostic::Error(CompilerError::Semantic(se)) => pred(&se.kind), + _ => false, + }) +} + +fn compile_valid(name: &str) { + let result = compile_file(&format!("tests/integration/valid/{name}.c")); + assert!( + result.diagnostics.is_empty(), + "valid program '{name}' should produce zero diagnostics, got: {:#?}", + result.diagnostics + ); +} + +fn assert_invalid(name: &str, pred: impl Fn(&SemanticErrorKind) -> bool, label: &str) { + let result = compile_file(&format!("tests/integration/invalid/{name}.c")); + assert!( + has_semantic_error(&result.diagnostics, pred), + "invalid program '{name}' should emit {label}, got: {:#?}", + result.diagnostics + ); +} + +#[test] +fn valid_hello_world() { + compile_valid("hello_world"); +} + +#[test] +fn valid_arithmetic() { + compile_valid("arithmetic"); +} + +#[test] +fn valid_fibonacci() { + compile_valid("fibonacci"); +} + +#[test] +fn valid_structs() { + compile_valid("structs"); +} + +#[test] +fn valid_pointers() { + compile_valid("pointers"); +} + +#[test] +fn valid_typedef() { + compile_valid("typedef"); +} + +#[test] +fn valid_enum_usage() { + compile_valid("enum_usage"); +} + +#[test] +fn valid_control_flow() { + compile_valid("control_flow"); +} + +#[test] +fn invalid_undeclared_var_emits_error() { + assert_invalid( + "undeclared_var", + |k| matches!(k, SemanticErrorKind::UndefinedVariable(_)), + "UndefinedVariable", + ); +} + +#[test] +fn invalid_type_mismatch_assign_emits_error() { + assert_invalid( + "type_mismatch_assign", + |k| matches!(k, SemanticErrorKind::TypeMismatch { .. }), + "TypeMismatch", + ); +} + +#[test] +fn invalid_assign_const_emits_error() { + assert_invalid( + "assign_const", + |k| matches!(k, SemanticErrorKind::AssignToConst(_)), + "AssignToConst", + ); +} + +#[test] +fn invalid_redeclaration_emits_error() { + assert_invalid( + "redeclaration", + |k| matches!(k, SemanticErrorKind::Redeclaration(_)), + "Redeclaration", + ); +} + +#[test] +fn invalid_return_in_void_emits_error() { + assert_invalid( + "return_in_void", + |k| matches!(k, SemanticErrorKind::ReturnInVoid), + "ReturnInVoid", + ); +} + +#[test] +fn invalid_arity_mismatch_emits_error() { + assert_invalid( + "arity_mismatch", + |k| matches!(k, SemanticErrorKind::ArityMismatch { .. }), + "ArityMismatch", + ); +} + +#[test] +fn invalid_call_non_function_emits_error() { + assert_invalid( + "call_non_function", + |k| matches!(k, SemanticErrorKind::CallNonFunction(_)), + "CallNonFunction", + ); +} From df22d6cc7a32ba4c696c728a26dc51d0441d86c4 Mon Sep 17 00:00:00 2001 From: matheuslemesam Date: Fri, 19 Jun 2026 10:53:11 -0300 Subject: [PATCH 34/91] feat(codegen): add x86-64 ABI and stack frame helpers --- src/codegen/last/abi.rs | 61 ++++++++++++ src/codegen/last/frame.rs | 191 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 252 insertions(+) create mode 100644 src/codegen/last/abi.rs create mode 100644 src/codegen/last/frame.rs diff --git a/src/codegen/last/abi.rs b/src/codegen/last/abi.rs new file mode 100644 index 0000000..747f6a8 --- /dev/null +++ b/src/codegen/last/abi.rs @@ -0,0 +1,61 @@ +//! Auxiliares da convencao de chamada System V AMD64 ABI. +//! +//! Apenas a parte inteira do ABI esta coberta, que e o escopo do backend +//! atual (variaveis/temps de 64 bits). Pontos flutuantes (XMM) e vector args +//! ficam fora de escopo por enquanto. +//! +//! Resumo do System V AMD64 (inteiros): +//! - Argumentos inteiros: `rdi`, `rsi`, `rdx`, `rcx`, `r8`, `r9` (depois stack) +//! - Retorno: `rax` +//! - caller-saved: `rax`, `rcx`, `rdx`, `rsi`, `rdi`, `r8`-`r11` +//! - callee-saved: `rbx`, `rbp`, `r12`-`r15` + +/// Registradores (64 bits) usados para passar argumentos inteiros, em ordem. +pub const ARG_REGISTERS: [&str; 6] = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"]; + +/// Registrador de retorno de valores inteiros. +pub const RETURN_REGISTER: &str = "rax"; + +/// Quantidade maxima de argumentos passados em registrador (antes da stack). +pub const MAX_REG_ARGS: usize = ARG_REGISTERS.len(); + +/// Retorna o registrador de argumento para o indice dado, ou `None` se o +/// argumento deve ser passado pela stack (indice >= 6). +pub fn arg_register(index: usize) -> Option<&'static str> { + ARG_REGISTERS.get(index).copied() +} + +/// Offset positivo, a partir de `%rbp`, onde o enesimo argumento passado na +/// stack do chamador fica disponivel dentro da funcao chamada. +/// +/// Para `index >= MAX_REG_ARGS`, o argumento chega em `16 + 8 * (index - 6)` +/// bytes acima de `%rbp` (acima do return address e do `%rbp` salvo). +pub fn stack_arg_offset(index: usize) -> i64 { + debug_assert!(index >= MAX_REG_ARGS); + 16 + 8 * (index - MAX_REG_ARGS) as i64 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn arg_registers_match_system_v_order() { + assert_eq!(ARG_REGISTERS, ["rdi", "rsi", "rdx", "rcx", "r8", "r9"]); + } + + #[test] + fn arg_register_lookup() { + assert_eq!(arg_register(0), Some("rdi")); + assert_eq!(arg_register(5), Some("r9")); + assert_eq!(arg_register(6), None); + } + + #[test] + fn stack_arg_offsets_skip_saved_rbp_and_return_addr() { + // 16 = return address (8) + saved %rbp (8). + assert_eq!(stack_arg_offset(6), 16); + assert_eq!(stack_arg_offset(7), 24); + assert_eq!(stack_arg_offset(10), 48); + } +} diff --git a/src/codegen/last/frame.rs b/src/codegen/last/frame.rs new file mode 100644 index 0000000..fb4a88e --- /dev/null +++ b/src/codegen/last/frame.rs @@ -0,0 +1,191 @@ +//! Gerenciamento do stack frame de uma funcao para o backend x86-64. +//! +//! O backend atual usa uma estrategia ingenua porem correta: cada temp e +//! variavel da TAC recebe um slot proprio de 8 bytes na stack. Nao ha +//! alocacao de registradores (spill universal), o que simplifica a selecao de +//! instrucoes e mantem a correcao enquanto o alocador (`reg_alloc`) nao esta +//! integrado ao pipeline de TAC. +//! +//! Layout do frame (referenciado a partir de `%rbp`): +//! +//! ```text +//! alto +-----------+ +//! | arg stack | argumentos do chamador (acima de rbp) +//! | ret addr | +//! | saved rbp | <- %rbp +//! -8 | slot 0 | primeira var/temp local +//! -16 | slot 1 | +//! | ... | +//! baixo +-----------+ <- %rsp (apos `subq $frame_size, %rsp`) +//! ``` +//! +//! O tamanho do frame e sempre alinhado em 16 bytes para manter o alinhamento +//! exigido pelo System V AMD64 ABI no ponto de `call`. + +use std::collections::HashMap; + +use crate::ir::tac::Operand; + +/// Chave que identifica unicamente um valor residente no frame. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum SlotKey { + Temp(u32), + Var(String), +} + +impl SlotKey { + /// Mapeia um `Operand` para sua chave de slot. Constantes nao tem slot + /// (sao emitidas como imediato) e retornam `None`. + pub fn from_operand(op: &Operand) -> Option { + match op { + Operand::Temp(temp) => Some(Self::Temp(temp.0)), + Operand::Var(name) => Some(Self::Var(name.clone())), + Operand::Const(_) => None, + } + } +} + +/// Stack frame de uma unica funcao. +#[derive(Debug, Clone)] +pub struct Frame { + /// Mapeia cada chave de slot para seu offset relativo a `%rbp`. + offsets: HashMap, + /// Proximo offset negativo a ser usado para um novo slot local. + next_local_offset: i64, +} + +impl Default for Frame { + fn default() -> Self { + Self::new() + } +} + +impl Frame { + /// Cria um frame vazio. Os slots locais comecam em `-8` e decrescem. + pub fn new() -> Self { + Self { + offsets: HashMap::new(), + next_local_offset: -8, + } + } + + /// Aloca (se ainda nao existir) e retorna o offset de `key`. + /// + /// Slots alocados por aqui sao sempre locais (offsets negativos). + pub fn allocate_local(&mut self, key: SlotKey) -> i64 { + if let Some(&offset) = self.offsets.get(&key) { + return offset; + } + let offset = self.next_local_offset; + self.offsets.insert(key, offset); + self.next_local_offset -= 8; + offset + } + + /// Fixa um offset explicito para `key` (usado para argumentos recebidos + /// via stack do chamador, que vivem em offsets positivos). + pub fn set_offset(&mut self, key: SlotKey, offset: i64) { + self.offsets.insert(key, offset); + } + + /// Retorna o offset de `key`, se existir. + pub fn offset_of(&self, key: &SlotKey) -> Option { + self.offsets.get(key).copied() + } + + /// Numero de slots locais alocados (offsets negativos). + pub fn local_slot_count(&self) -> usize { + self.offsets.values().filter(|&&off| off < 0).count() + } + + /// Tamanho total do frame em bytes, alinhado em 16. + /// + /// Retorna 0 quando nao ha slots locais (funcao "leaf" sem frame). + pub fn frame_size(&self) -> i64 { + let raw = (self.local_slot_count() as i64) * 8; + align_up(raw, 16) + } +} + +fn align_up(value: i64, alignment: i64) -> i64 { + debug_assert!(alignment > 0); + (value + alignment - 1) / alignment * alignment +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ir::tac::TempId; + + #[test] + fn allocate_local_assigns_distinct_negative_offsets() { + let mut frame = Frame::new(); + let a = frame.allocate_local(SlotKey::Var("x".to_string())); + let b = frame.allocate_local(SlotKey::Temp(TempId(0).0)); + + assert_eq!(a, -8); + assert_eq!(b, -16); + } + + #[test] + fn allocate_local_is_idempotent() { + let mut frame = Frame::new(); + let key = SlotKey::Var("x".to_string()); + + let first = frame.allocate_local(key.clone()); + let second = frame.allocate_local(key); + + assert_eq!(first, second); + assert_eq!(frame.local_slot_count(), 1); + } + + #[test] + fn set_offset_for_stack_argument_is_positive() { + let mut frame = Frame::new(); + frame.set_offset(SlotKey::Var("extra".to_string()), 16); + + assert_eq!( + frame.offset_of(&SlotKey::Var("extra".to_string())), + Some(16) + ); + } + + #[test] + fn frame_size_aligned_to_16() { + let mut frame = Frame::new(); + // 1 slot -> 8 bytes crus -> alinhado para 16. + frame.allocate_local(SlotKey::Var("a".to_string())); + assert_eq!(frame.frame_size(), 16); + + // 2 slots -> 16 bytes. + frame.allocate_local(SlotKey::Var("b".to_string())); + assert_eq!(frame.frame_size(), 16); + + // 3 slots -> 24 bytes -> alinhado para 32. + frame.allocate_local(SlotKey::Var("c".to_string())); + assert_eq!(frame.frame_size(), 32); + } + + #[test] + fn empty_frame_has_zero_size() { + let frame = Frame::new(); + assert_eq!(frame.frame_size(), 0); + assert_eq!(frame.local_slot_count(), 0); + } + + #[test] + fn slot_key_from_operand_skips_constants() { + assert_eq!( + SlotKey::from_operand(&Operand::Temp(TempId(3))), + Some(SlotKey::Temp(3)) + ); + assert_eq!( + SlotKey::from_operand(&Operand::Var("y".to_string())), + Some(SlotKey::Var("y".to_string())) + ); + assert_eq!( + SlotKey::from_operand(&Operand::Const(crate::ir::tac::ConstValue::Int(7))), + None + ); + } +} From 0a804cff3142272e8fc91e1941ada21e18124b88 Mon Sep 17 00:00:00 2001 From: matheuslemesam Date: Fri, 19 Jun 2026 10:53:11 -0300 Subject: [PATCH 35/91] feat(codegen): lower TAC to x86-64 assembly --- src/codegen/last/mod.rs | 13 + src/codegen/last/x86_64.rs | 674 +++++++++++++++++++++++++++++++++++++ 2 files changed, 687 insertions(+) create mode 100644 src/codegen/last/x86_64.rs diff --git a/src/codegen/last/mod.rs b/src/codegen/last/mod.rs index 8b13789..2f1c6b4 100644 --- a/src/codegen/last/mod.rs +++ b/src/codegen/last/mod.rs @@ -1 +1,14 @@ +//! Backend final (last): traducao da IR (TAC) para codigo de maquina alvo. +//! +//! Atualmente o unico alvo implementado e x86-64 (sintaxe AT&T / System V +//! AMD64 ABI). Os modulos seguintes organizam as responsabilidades: +//! +//! - [`abi`]: convencao de chamada System V AMD64. +//! - [`frame`]: gerenciamento do stack frame de cada funcao. +//! - [`x86_64`]: selecao de instrucoes e emissao de assembly. +pub mod abi; +pub mod frame; +pub mod x86_64; + +pub use x86_64::{emit_function, emit_program}; diff --git a/src/codegen/last/x86_64.rs b/src/codegen/last/x86_64.rs new file mode 100644 index 0000000..85ab473 --- /dev/null +++ b/src/codegen/last/x86_64.rs @@ -0,0 +1,674 @@ +//! Selecao de instrucoes e emissao de assembly x86-64 (sintaxe AT&T / GAS). +//! +//! O backend traduz cada instrucao da TAC para uma ou mais instrucoes x86-64. +//! A estrategia adotada e a geracao "naive" (sem alocacao de registradores): +//! todo temp e variavel vive em um slot da stack, e usamos `%rax`/`%rcx` como +//! scratch para computar cada operacao. Isso mantem a correcao enquanto o +//! alocador (`codegen::reg_alloc`) nao esta integrado ao fluxo de TAC. +//! +//! Convencao de chamada: System V AMD64 ABI. +//! - Argumentos inteiros: `rdi`, `rsi`, `rdx`, `rcx`, `r8`, `r9` +//! - Retorno: `rax` +//! - Stack alinhada em 16 bytes no ponto de `call`. +//! +//! A saida pode ser montada diretamente com `gcc -x assembler-with-cpp` ou +//! `as`. + +use crate::codegen::last::abi; +use crate::codegen::last::frame::{Frame, SlotKey}; +use crate::common::ast::expr::{BinOp, UnOp}; +use crate::ir::tac::{ConstValue, LabelId, Operand, TacFunction, TacInstr, TacProgram}; + +/// Acumulador de linhas de assembly com indentacao controlada. +struct Emitter { + out: String, +} + +impl Emitter { + fn new() -> Self { + Self { out: String::new() } + } + + /// Instrucao: indentada em 4 espacos. + fn insn(&mut self, text: &str) { + self.out.push_str(" "); + self.out.push_str(text); + self.out.push('\n'); + } + + /// Diretiva/rotulo: sem indentacao (ex.: `.text`, `main:`). + fn raw(&mut self, text: &str) { + self.out.push_str(text); + self.out.push('\n'); + } + + fn comment(&mut self, text: &str) { + self.out.push_str(" # "); + self.out.push_str(text); + self.out.push('\n'); + } + + fn blank(&mut self) { + self.out.push('\n'); + } + + fn append_str(&mut self, text: &str) { + self.out.push_str(text); + } + + fn into_string(self) -> String { + self.out + } +} + +/// Emite o assembly de um programa TAC completo, prefixando a diretiva de +/// secao `.text`. +pub fn emit_program(prog: &TacProgram) -> String { + let mut em = Emitter::new(); + em.raw(".text"); + for func in &prog.functions { + em.blank(); + em.append_str(&emit_function(func)); + } + // Marca a stack como nao-executavel (boa pratica; evita aviso do linker e + // e o que o proprio GCC adiciona a saida assembly). + em.blank(); + em.raw(".section .note.GNU-stack,\"\",@progbits"); + em.into_string() +} + +/// Emite o assembly de uma unica funcao: directiva `.globl`, rotulo, +/// prologue, corpo e epilogue. +pub fn emit_function(func: &TacFunction) -> String { + let mut em = Emitter::new(); + em.comment(&format!("function {}", func.name)); + em.raw(&format!(".globl {}", func.name)); + em.raw(&format!("{}:", func.name)); + + let frame = build_frame(func); + + // Prologue + em.insn("pushq %rbp"); + em.insn("movq %rsp, %rbp"); + let frame_size = frame.frame_size(); + if frame_size > 0 { + em.insn(&format!("subq ${frame_size}, %rsp")); + } + + // Spill dos argumentos recebidos em registrador para seus slots locais. + for (index, name) in func.params.iter().enumerate() { + if let Some(reg) = abi::arg_register(index) { + let offset = frame + .offset_of(&SlotKey::Var(name.clone())) + .expect("slot do parametro deve estar alocado"); + em.insn(&format!("movq %{reg}, {offset}(%rbp)")); + } + } + + // Corpo + let epilogue_label = format!(".L_{}_epilogue", func.name); + for instr in &func.instrs { + emit_instr(&mut em, instr, &frame, &func.name, &epilogue_label); + } + + // Epilogue (alvo de todos os `return`). Caso a funcao nao tenha `return` + // explicito, a queda natural chega aqui e retorna o que estiver em %rax. + em.raw(&format!("{epilogue_label}:")); + em.insn("movq %rbp, %rsp"); + em.insn("popq %rbp"); + em.insn("ret"); + + em.into_string() +} + +/// Constroi o stack frame pre-escaneando todas as instrucoes para alocar um +/// slot para cada temp/variavel e mapear os parametros para suas posicoes. +fn build_frame(func: &TacFunction) -> Frame { + let mut frame = Frame::new(); + + for (index, name) in func.params.iter().enumerate() { + let key = SlotKey::Var(name.clone()); + match abi::arg_register(index) { + Some(_) => { + frame.allocate_local(key); + } + None => { + // Argumento passado via stack do chamador: ja esta disponivel + // em offset positivo a partir de %rbp. + frame.set_offset(key, abi::stack_arg_offset(index)); + } + } + } + + for instr in &func.instrs { + for key in slot_keys_of(instr) { + // Nao realoca stack-args (offsets positivos) que ja foram mapeados. + if frame.offset_of(&key).is_some() { + continue; + } + frame.allocate_local(key); + } + } + + frame +} + +/// Coleta todas as chaves de slot referenciadas por uma instrucao (dst, +/// fontes e operandos de leitura). +fn slot_keys_of(instr: &TacInstr) -> Vec { + let mut keys = Vec::new(); + + let consider = |keys: &mut Vec, op: &Operand| { + if let Some(key) = SlotKey::from_operand(op) { + keys.push(key); + } + }; + + match instr { + TacInstr::BinOp { dst, lhs, rhs, .. } => { + keys.push(SlotKey::Temp(dst.0)); + consider(&mut keys, lhs); + consider(&mut keys, rhs); + } + TacInstr::UnOp { dst, src, .. } => { + keys.push(SlotKey::Temp(dst.0)); + consider(&mut keys, src); + } + TacInstr::Copy { dst, src } => { + consider(&mut keys, dst); + consider(&mut keys, src); + } + TacInstr::CondJump { cond, .. } => consider(&mut keys, cond), + TacInstr::Call { dst, args, .. } => { + if let Some(dst) = dst { + keys.push(SlotKey::Temp(dst.0)); + } + for arg in args { + consider(&mut keys, arg); + } + } + TacInstr::Return { val } => { + if let Some(val) = val { + consider(&mut keys, val); + } + } + TacInstr::Jump { .. } | TacInstr::Label(_) => {} + } + + keys +} + +fn emit_instr( + em: &mut Emitter, + instr: &TacInstr, + frame: &Frame, + func_name: &str, + epilogue_label: &str, +) { + match instr { + TacInstr::Label(label) => { + em.raw(&format!("{}:", local_label(func_name, label))); + } + TacInstr::Jump { label } => { + em.insn(&format!("jmp {}", local_label(func_name, label))); + } + TacInstr::CondJump { + cond, + then_label, + else_label, + } => { + load_op(em, frame, cond, "rax"); + em.insn("testq %rax, %rax"); + em.insn(&format!("jne {}", local_label(func_name, then_label))); + em.insn(&format!("jmp {}", local_label(func_name, else_label))); + } + TacInstr::Copy { dst, src } => { + load_op(em, frame, src, "rax"); + store_op(em, frame, dst, "rax"); + } + TacInstr::BinOp { dst, op, lhs, rhs } => { + emit_binop(em, op, lhs, rhs, *dst, frame); + } + TacInstr::UnOp { dst, op, src } => { + emit_unop(em, op, src, *dst, frame); + } + TacInstr::Call { dst, fn_name, args } => emit_call(em, fn_name, args, *dst, frame), + TacInstr::Return { val } => { + if let Some(val) = val { + load_op(em, frame, val, "rax"); + } + em.insn(&format!("jmp {epilogue_label}")); + } + } +} + +fn emit_binop( + em: &mut Emitter, + op: &BinOp, + lhs: &Operand, + rhs: &Operand, + dst: crate::ir::tac::TempId, + frame: &Frame, +) { + // Operacoes logicas short-circuit-like precisam normalizar cada operando + // para 0/1 individualmente. + if matches!(op, BinOp::And | BinOp::Or) { + emit_logical(em, matches!(op, BinOp::Or), lhs, rhs, dst, frame); + return; + } + + load_op(em, frame, lhs, "rax"); + load_op(em, frame, rhs, "rcx"); + + match op { + BinOp::Add => em.insn("addq %rcx, %rax"), + BinOp::Sub => em.insn("subq %rcx, %rax"), + BinOp::Mul => em.insn("imulq %rcx, %rax"), + BinOp::Div => { + em.insn("cqto"); + em.insn("idivq %rcx"); + } + BinOp::Mod => { + em.insn("cqto"); + em.insn("idivq %rcx"); + em.insn("movq %rdx, %rax"); + } + BinOp::BitAnd => em.insn("andq %rcx, %rax"), + BinOp::BitOr => em.insn("orq %rcx, %rax"), + BinOp::BitXor => em.insn("xorq %rcx, %rax"), + BinOp::Shl => em.insn("shlq %cl, %rax"), + BinOp::Shr => em.insn("sarq %cl, %rax"), + BinOp::Less => emit_comparison(em, "setl"), + BinOp::Greater => emit_comparison(em, "setg"), + BinOp::Leq => emit_comparison(em, "setle"), + BinOp::Geq => emit_comparison(em, "setge"), + BinOp::Eq => emit_comparison(em, "sete"), + BinOp::Neq => emit_comparison(em, "setne"), + BinOp::And | BinOp::Or => unreachable!("tratado em emit_logical"), + } + + store_op(em, frame, &Operand::Temp(dst), "rax"); +} + +fn emit_comparison(em: &mut Emitter, setcc: &str) { + em.insn("cmpq %rcx, %rax"); + em.insn(&format!("{setcc} %al")); + em.insn("movzbq %al, %rax"); +} + +fn emit_logical( + em: &mut Emitter, + is_or: bool, + lhs: &Operand, + rhs: &Operand, + dst: crate::ir::tac::TempId, + frame: &Frame, +) { + // Normaliza lhs para 0/1 em %rdx. + load_op(em, frame, lhs, "rax"); + em.insn("testq %rax, %rax"); + em.insn("setne %al"); + em.insn("movzbq %al, %rax"); + em.insn("movq %rax, %rdx"); + + // Normaliza rhs para 0/1 em %rax. + load_op(em, frame, rhs, "rax"); + em.insn("testq %rax, %rax"); + em.insn("setne %al"); + em.insn("movzbq %al, %rax"); + + if is_or { + em.insn("orq %rdx, %rax"); + } else { + em.insn("andq %rdx, %rax"); + } + + store_op(em, frame, &Operand::Temp(dst), "rax"); +} + +fn emit_unop( + em: &mut Emitter, + op: &UnOp, + src: &Operand, + dst: crate::ir::tac::TempId, + frame: &Frame, +) { + load_op(em, frame, src, "rax"); + match op { + UnOp::Neg => em.insn("negq %rax"), + UnOp::BitNot => em.insn("notq %rax"), + UnOp::Not => { + em.insn("testq %rax, %rax"); + em.insn("sete %al"); + em.insn("movzbq %al, %rax"); + } + UnOp::Deref => panic!("codegen de deref (*) nao suportado neste backend"), + UnOp::AddrOf => panic!("codegen de address-of (&) nao suportado neste backend"), + } + store_op(em, frame, &Operand::Temp(dst), "rax"); +} + +fn emit_call( + em: &mut Emitter, + fn_name: &str, + args: &[Operand], + dst: Option, + frame: &Frame, +) { + // Apenas a convencao de registradores esta implementada (ate 6 inteiros). + // A stack permanece 16-alinhada no `call` porque o prologue a alinha e nao + // empurramos nada aqui. + for (index, arg) in args.iter().enumerate() { + match abi::arg_register(index) { + Some(reg) => { + load_op(em, frame, arg, "rax"); + em.insn(&format!("movq %rax, %{reg}")); + } + None => panic!( + "codegen atual suporta ate {} argumentos por registrador", + abi::MAX_REG_ARGS + ), + } + } + + em.insn(&format!("call {fn_name}")); + + if let Some(dst) = dst { + store_op(em, frame, &Operand::Temp(dst), "rax"); + } +} + +/// Carrega `op` para o registrador nomeado (ex.: "rax", "rcx"). +fn load_op(em: &mut Emitter, frame: &Frame, op: &Operand, reg: &str) { + match op { + Operand::Const(value) => em.insn(&format!("movq ${}, %{reg}", const_immediate(value))), + Operand::Temp(temp) => { + let offset = frame + .offset_of(&SlotKey::Temp(temp.0)) + .expect("temp sem slot alocado"); + em.insn(&format!("movq {offset}(%rbp), %{reg}")); + } + Operand::Var(name) => { + let offset = frame + .offset_of(&SlotKey::Var(name.clone())) + .expect("var sem slot alocado"); + em.insn(&format!("movq {offset}(%rbp), %{reg}")); + } + } +} + +/// Armazena o registrador nomeado em `op` (que deve ser temp ou var). +fn store_op(em: &mut Emitter, frame: &Frame, op: &Operand, reg: &str) { + let offset = match op { + Operand::Temp(temp) => frame + .offset_of(&SlotKey::Temp(temp.0)) + .expect("temp sem slot alocado"), + Operand::Var(name) => frame + .offset_of(&SlotKey::Var(name.clone())) + .expect("var sem slot alocado"), + Operand::Const(_) => panic!("nao e possivel armazenar em uma constante"), + }; + em.insn(&format!("movq %{reg}, {offset}(%rbp)")); +} + +fn const_immediate(value: &ConstValue) -> String { + match value { + ConstValue::Int(v) => v.to_string(), + ConstValue::Char(c) => (*c as i64).to_string(), + ConstValue::Double(_) => panic!("codegen de double nao suportado neste backend"), + ConstValue::String(_) => panic!("codegen de string literal nao suportado neste backend"), + } +} + +fn local_label(func_name: &str, label: &LabelId) -> String { + format!(".L_{func_name}_L{}", label.0) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ir::tac::{ConstValue, Operand, TacFunction, TacInstr, TempId}; + + fn func(name: &str, params: Vec<&str>, instrs: Vec) -> TacFunction { + TacFunction { + name: name.to_string(), + params: params.into_iter().map(String::from).collect(), + instrs, + } + } + + fn asm_simple_return_const() -> TacFunction { + func( + "main", + Vec::new(), + vec![TacInstr::Return { + val: Some(Operand::Const(ConstValue::Int(42))), + }], + ) + } + + #[test] + fn emit_function_prologue_pushes_rbp_and_sets_frame() { + let out = emit_function(&asm_simple_return_const()); + + assert!(out.contains("pushq %rbp")); + assert!(out.contains("movq %rsp, %rbp")); + assert!(out.contains("ret")); + } + + #[test] + fn emit_function_declares_global_symbol() { + let out = emit_function(&asm_simple_return_const()); + + assert!(out.contains(".globl main")); + assert!(out.contains("main:\n")); + } + + #[test] + fn return_const_loads_immediate_into_rax() { + let out = emit_function(&asm_simple_return_const()); + + assert!(out.contains("movq $42, %rax")); + } + + #[test] + fn binary_add_emits_load_add_store() { + let f = func( + "add", + vec!["a", "b"], + vec![ + TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Add, + lhs: Operand::Var("a".to_string()), + rhs: Operand::Var("b".to_string()), + }, + TacInstr::Return { + val: Some(Operand::Temp(TempId(0))), + }, + ], + ); + + let out = emit_function(&f); + + assert!(out.contains("movq %rdi, -8(%rbp)")); // spill arg a + assert!(out.contains("movq %rsi, -16(%rbp)")); // spill arg b + assert!(out.contains("addq %rcx, %rax")); + } + + #[test] + fn division_uses_cqto_and_idivq() { + let f = func( + "divmod", + Vec::new(), + vec![ + TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Div, + lhs: Operand::Const(ConstValue::Int(10)), + rhs: Operand::Const(ConstValue::Int(3)), + }, + TacInstr::Return { + val: Some(Operand::Temp(TempId(0))), + }, + ], + ); + + let out = emit_function(&f); + + assert!(out.contains("cqto")); + assert!(out.contains("idivq %rcx")); + } + + #[test] + fn modulo_moves_rdx_into_rax() { + let f = func( + "mod", + Vec::new(), + vec![TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Mod, + lhs: Operand::Const(ConstValue::Int(10)), + rhs: Operand::Const(ConstValue::Int(3)), + }], + ); + + let out = emit_function(&f); + + assert!(out.contains("movq %rdx, %rax")); + } + + #[test] + fn less_than_emits_setl() { + let f = func( + "less", + Vec::new(), + vec![TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Less, + lhs: Operand::Const(ConstValue::Int(1)), + rhs: Operand::Const(ConstValue::Int(2)), + }], + ); + + let out = emit_function(&f); + + assert!(out.contains("cmpq %rcx, %rax")); + assert!(out.contains("setl %al")); + assert!(out.contains("movzbq %al, %rax")); + } + + #[test] + fn call_sets_up_arg_registers() { + let f = func( + "caller", + Vec::new(), + vec![ + TacInstr::Call { + dst: Some(TempId(0)), + fn_name: "soma".to_string(), + args: vec![ + Operand::Const(ConstValue::Int(2)), + Operand::Const(ConstValue::Int(3)), + ], + }, + TacInstr::Return { + val: Some(Operand::Temp(TempId(0))), + }, + ], + ); + + let out = emit_function(&f); + + assert!(out.contains("movq %rax, %rdi")); + assert!(out.contains("movq %rax, %rsi")); + assert!(out.contains("call soma")); + } + + #[test] + fn cond_jump_uses_test_and_jne() { + let f = func( + "cond", + Vec::new(), + vec![ + TacInstr::CondJump { + cond: Operand::Const(ConstValue::Int(1)), + then_label: LabelId(0), + else_label: LabelId(1), + }, + TacInstr::Label(LabelId(0)), + TacInstr::Return { + val: Some(Operand::Const(ConstValue::Int(1))), + }, + TacInstr::Label(LabelId(1)), + TacInstr::Return { + val: Some(Operand::Const(ConstValue::Int(0))), + }, + ], + ); + + let out = emit_function(&f); + + assert!(out.contains("testq %rax, %rax")); + assert!(out.contains("jne .L_cond_L0")); + assert!(out.contains("jmp .L_cond_L1")); + assert!(out.contains(".L_cond_L0:")); + assert!(out.contains(".L_cond_L1:")); + } + + #[test] + fn epilogue_label_is_emitted_once() { + let out = emit_function(&asm_simple_return_const()); + + assert_eq!(out.matches(".L_main_epilogue:").count(), 1); + } + + #[test] + fn emit_program_prepends_text_section() { + let prog = TacProgram { + functions: vec![asm_simple_return_const()], + }; + + let out = emit_program(&prog); + + assert!(out.starts_with(".text")); + assert!(out.contains(".globl main")); + } + + #[test] + fn logical_and_normalizes_operands() { + let f = func( + "land", + Vec::new(), + vec![TacInstr::BinOp { + dst: TempId(0), + op: BinOp::And, + lhs: Operand::Const(ConstValue::Int(1)), + rhs: Operand::Const(ConstValue::Int(0)), + }], + ); + + let out = emit_function(&f); + + assert!(out.contains("setne %al")); + assert!(out.contains("andq %rdx, %rax")); + } + + #[test] + fn logical_or_emits_orq() { + let f = func( + "lor", + Vec::new(), + vec![TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Or, + lhs: Operand::Const(ConstValue::Int(1)), + rhs: Operand::Const(ConstValue::Int(0)), + }], + ); + + let out = emit_function(&f); + + assert!(out.contains("orq %rdx, %rax")); + } +} From 7fc41fee8db322a547309ff5de8daf2cb38f96e0 Mon Sep 17 00:00:00 2001 From: matheuslemesam Date: Fri, 19 Jun 2026 10:53:11 -0300 Subject: [PATCH 36/91] feat(codegen): emit assembly from the CLI (-S/--emit-asm) --- src/main.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/main.rs b/src/main.rs index 61467e1..6639578 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,11 @@ use crusty::analyser::analyse; use crusty::codegen::inter::opt::{pipeline_for_level, OptLevel}; use crusty::codegen::inter::Cfg; +use crusty::codegen::last; use crusty::common::ast::pretty::pretty_program; use crusty::common::errors::report::{Report, ToReport}; use crusty::common::input::source::SourceFile; +use crusty::ir::lower::lower_program; use crusty::lexer::scanner::Scanner; use crusty::parser::Parser; use std::env; @@ -25,6 +27,7 @@ fn main() -> std::io::Result<()> { eprintln!(" --only-lex Stop after lexing"); eprintln!(" --only-parse Stop after parsing"); eprintln!(" --only-semantic Stop after semantic analysis"); + eprintln!(" -S, --emit-asm Emit x86-64 assembly (AT&T) to .s"); eprintln!(" -O0|-O1|-O2|-O3 Set optimization level"); eprintln!(" --opt-level 0|1|2|3 Set optimization level"); exit(64); @@ -52,6 +55,7 @@ struct CliArgs { only_lex: bool, only_parse: bool, only_semantic: bool, + emit_asm: bool, opt_level: OptLevel, } @@ -65,6 +69,7 @@ impl CliArgs { only_lex: false, only_parse: false, only_semantic: false, + emit_asm: false, opt_level: OptLevel::default(), }; @@ -79,6 +84,7 @@ impl CliArgs { "--only-lex" => cli.only_lex = true, "--only-parse" => cli.only_parse = true, "--only-semantic" => cli.only_semantic = true, + "-S" | "--emit-asm" => cli.emit_asm = true, "-O" | "--opt-level" => { i += 1; let Some(level) = args.get(i) else { @@ -134,6 +140,15 @@ impl ToReport for DiagnosticError { } } +#[derive(Debug)] +struct IoError(String); + +impl ToReport for IoError { + fn to_report(&self) -> Report { + Report::new(&format!("I/O error: {}", self.0)) + } +} + fn run(source: SourceFile, args: &CliArgs) -> Result<(), Box> { // ── Stage 1: Lex ───────────────────────────────────────────────────────── let mut scanner = Scanner::new(source); @@ -204,9 +219,27 @@ fn run(source: SourceFile, args: &CliArgs) -> Result<(), Box> { let opt_pipeline = pipeline_for_level(args.opt_level); opt_pipeline.run(&mut cfg, 10); + // ── Stage 5: Code generation (x86-64 / AT&T) ───────────────────────────── + if args.emit_asm { + let tac_program = lower_program(&program); + let asm = last::emit_program(&tac_program); + + let output_path = asm_output_path(&args.input_file); + std::fs::write(&output_path, asm) + .map_err(|e| Box::new(IoError(e.to_string())) as Box)?; + eprintln!("emitted assembly: {}", output_path.display()); + } + Ok(()) } +fn asm_output_path(input: &Option) -> PathBuf { + let input = input.clone().unwrap_or_else(|| "crusty.out".to_string()); + let mut path = PathBuf::from(input); + path.set_extension("s"); + path +} + // ── Dump helpers ───────────────────────────────────────────────────────────── fn dump_tokens(scanner: &Scanner) { From 539e7b1816d15ed7a91d37ec6954fbba81cffcde Mon Sep 17 00:00:00 2001 From: matheuslemesam Date: Fri, 19 Jun 2026 10:53:11 -0300 Subject: [PATCH 37/91] test(codegen): cover assembly emission with gcc smoke tests --- tests/codegen_smoke.rs | 218 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 tests/codegen_smoke.rs diff --git a/tests/codegen_smoke.rs b/tests/codegen_smoke.rs new file mode 100644 index 0000000..8e044db --- /dev/null +++ b/tests/codegen_smoke.rs @@ -0,0 +1,218 @@ +//! Testes de smoke do backend x86-64. +//! +//! Estes testes montam e (quando possivel) executam a saida do codegen com o +//! `gcc` do sistema para garantir que o assembly gerado nao apenas e +//! sintaticamente valido, mas tambem produz o resultado esperado em tempo de +//! execucao. Se o `gcc` nao estiver disponivel no ambiente, os testes sao +//! ignorados (skip) em vez de falhar. + +use std::path::PathBuf; +use std::process::Command; + +use crusty::codegen::last::emit_program; +use crusty::common::ast::expr::BinOp; +use crusty::ir::tac::{ConstValue, LabelId, Operand, TacFunction, TacInstr, TacProgram, TempId}; + +fn gcc_available() -> bool { + Command::new("gcc") + .arg("--version") + .output() + .map(|o| o.status.success()) + .unwrap_or(false) +} + +/// Ignora o teste quando nao ha `gcc` no ambiente. +macro_rules! require_gcc { + () => { + if !gcc_available() { + eprintln!("gcc indisponivel: pulando teste de smoke"); + return; + } + }; +} + +fn build_soma_program() -> TacProgram { + let soma = TacFunction { + name: "soma".to_string(), + params: vec!["a".to_string(), "b".to_string()], + instrs: vec![ + TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Add, + lhs: Operand::Var("a".to_string()), + rhs: Operand::Var("b".to_string()), + }, + TacInstr::Return { + val: Some(Operand::Temp(TempId(0))), + }, + ], + }; + + let main = TacFunction { + name: "main".to_string(), + params: Vec::new(), + instrs: vec![ + TacInstr::Call { + dst: Some(TempId(0)), + fn_name: "soma".to_string(), + args: vec![ + Operand::Const(ConstValue::Int(2)), + Operand::Const(ConstValue::Int(3)), + ], + }, + TacInstr::Return { + val: Some(Operand::Temp(TempId(0))), + }, + ], + }; + + TacProgram { + functions: vec![soma, main], + } +} + +fn write_temp_source(name: &str, contents: &str) -> PathBuf { + let mut path = std::env::temp_dir(); + path.push(format!("crusty_smoke_{name}_{}.s", std::process::id())); + std::fs::write(&path, contents).expect("falha ao escrever arquivo .s temporario"); + path +} + +#[test] +fn smoke_assembles_with_gcc() { + require_gcc!(); + + let asm = emit_program(&build_soma_program()); + let source = write_temp_source("assemble", &asm); + let object = source.with_extension("o"); + + let status = Command::new("gcc") + .args(["-c", "-x", "assembler-with-cpp"]) + .arg(&source) + .arg("-o") + .arg(&object) + .status() + .expect("falha ao invocar gcc"); + + assert!( + status.success(), + "gcc nao conseguiu montar a saida do codegen" + ); + + let _ = std::fs::remove_file(&source); + let _ = std::fs::remove_file(&object); +} + +#[test] +fn smoke_links_and_runs_with_expected_exit_code() { + require_gcc!(); + + let asm = emit_program(&build_soma_program()); + let source = write_temp_source("run", &asm); + let exe = source.with_extension("bin"); + + let link = Command::new("gcc") + .arg(&source) + .arg("-o") + .arg(&exe) + .status() + .expect("falha ao invocar gcc"); + assert!( + link.success(), + "gcc nao conseguiu linkar a saida do codegen" + ); + + let exit = Command::new(&exe) + .status() + .expect("falha ao executar o binario gerado"); + + // soma(2, 3) == 5: o valor de retorno de `main` vira o exit code. + #[cfg(unix)] + assert_eq!(exit.code(), Some(5), "esperava exit code 5 (soma de 2 + 3)"); + + let _ = std::fs::remove_file(&source); + let _ = std::fs::remove_file(&exe); +} + +#[test] +fn smoke_simple_return_const_runs() { + require_gcc!(); + + let prog = TacProgram { + functions: vec![TacFunction { + name: "main".to_string(), + params: Vec::new(), + instrs: vec![TacInstr::Return { + val: Some(Operand::Const(ConstValue::Int(42))), + }], + }], + }; + + let asm = emit_program(&prog); + let source = write_temp_source("const", &asm); + let exe = source.with_extension("bin"); + + let link = Command::new("gcc") + .arg(&source) + .arg("-o") + .arg(&exe) + .status() + .expect("falha ao invocar gcc"); + assert!(link.success()); + + let exit = Command::new(&exe).status().expect("falha ao executar"); + + #[cfg(unix)] + assert_eq!(exit.code(), Some(42)); + + let _ = std::fs::remove_file(&source); + let _ = std::fs::remove_file(&exe); +} + +#[test] +fn smoke_control_flow_if_else_runs() { + require_gcc!(); + + // main: if (1) return 10; else return 20; -> espera-se 10. + let prog = TacProgram { + functions: vec![TacFunction { + name: "main".to_string(), + params: Vec::new(), + instrs: vec![ + TacInstr::CondJump { + cond: Operand::Const(ConstValue::Int(1)), + then_label: LabelId(0), + else_label: LabelId(1), + }, + TacInstr::Label(LabelId(0)), + TacInstr::Return { + val: Some(Operand::Const(ConstValue::Int(10))), + }, + TacInstr::Label(LabelId(1)), + TacInstr::Return { + val: Some(Operand::Const(ConstValue::Int(20))), + }, + ], + }], + }; + + let asm = emit_program(&prog); + let source = write_temp_source("ifelse", &asm); + let exe = source.with_extension("bin"); + + let link = Command::new("gcc") + .arg(&source) + .arg("-o") + .arg(&exe) + .status() + .expect("falha ao invocar gcc"); + assert!(link.success()); + + let exit = Command::new(&exe).status().expect("falha ao executar"); + + #[cfg(unix)] + assert_eq!(exit.code(), Some(10)); + + let _ = std::fs::remove_file(&source); + let _ = std::fs::remove_file(&exe); +} From d3c4e76387b71a38003fbe4ffb7915c62ad771d7 Mon Sep 17 00:00:00 2001 From: Bappoz Date: Sat, 20 Jun 2026 02:24:19 -0300 Subject: [PATCH 38/91] feat(opt): implementar CSE local intra-bloco Substitui o stub de CsePass por uma eliminacao real de subexpressoes comuns por bloco basico. Mantem um cache de expressoes (lhs, op, rhs) ja calculadas; ao encontrar repeticao, remove a instrucao redundante e reescreve usos futuros do seu destino para o destino ja calculado. Qualquer redefinicao de variavel/temp invalida as entradas do cache que a referenciam, seja como operando ou como destino cacheado. Closes #135 --- src/codegen/inter/mod.rs | 4 +- src/codegen/inter/opt/cse.rs | 219 ++++++++++++++++++++++++++++++++++- 2 files changed, 218 insertions(+), 5 deletions(-) diff --git a/src/codegen/inter/mod.rs b/src/codegen/inter/mod.rs index 35e29ae..2ad3800 100644 --- a/src/codegen/inter/mod.rs +++ b/src/codegen/inter/mod.rs @@ -53,13 +53,13 @@ pub enum Instruction { Nop, } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Value { Int(i64), Temp(String), } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum BinaryOp { Add, Sub, diff --git a/src/codegen/inter/opt/cse.rs b/src/codegen/inter/opt/cse.rs index b08488d..ead84fd 100644 --- a/src/codegen/inter/opt/cse.rs +++ b/src/codegen/inter/opt/cse.rs @@ -1,6 +1,14 @@ +use std::collections::HashMap; + use super::OptPass; -use crate::codegen::inter::Cfg; +use crate::codegen::inter::{BinaryOp, Cfg, Instruction, Value}; +/// Eliminacao local (intra-bloco) de subexpressoes comuns. +/// +/// Para cada bloco basico, mantem um mapa das expressoes `lhs op rhs` ja +/// computadas. Quando a mesma expressao aparece de novo sem que `lhs`/`rhs` +/// tenham sido modificados, a instrucao redundante e removida e os usos +/// futuros do seu destino sao reescritos para o destino ja calculado. pub struct CsePass; impl OptPass for CsePass { @@ -8,7 +16,212 @@ impl OptPass for CsePass { "common-subexpression-elimination" } - fn run(&self, _cfg: &mut Cfg) -> bool { - false + fn run(&self, cfg: &mut Cfg) -> bool { + let mut changed = false; + + for block in &mut cfg.blocks { + changed |= eliminate_in_block(&mut block.instructions); + } + + changed + } +} + +type AvailableExprs = HashMap<(Value, BinaryOp, Value), String>; + +fn eliminate_in_block(instructions: &mut Vec) -> bool { + let mut changed = false; + let mut available: AvailableExprs = HashMap::new(); + let mut rename: HashMap = HashMap::new(); + let mut result = Vec::with_capacity(instructions.len()); + + for instruction in instructions.drain(..) { + match instruction { + Instruction::Binary { dst, op, lhs, rhs } => { + let lhs = resolve(&rename, lhs); + let rhs = resolve(&rename, rhs); + + invalidate(&mut available, &mut rename, &dst); + + let key = (lhs.clone(), op, rhs.clone()); + if let Some(cached) = available.get(&key) { + rename.insert(dst, cached.clone()); + changed = true; + continue; + } + + available.insert(key, dst.clone()); + result.push(Instruction::Binary { dst, op, lhs, rhs }); + } + Instruction::Assign { dst, value } => { + let value = resolve(&rename, value); + invalidate(&mut available, &mut rename, &dst); + result.push(Instruction::Assign { dst, value }); + } + other => result.push(other), + } + } + + *instructions = result; + changed +} + +fn resolve(rename: &HashMap, value: Value) -> Value { + match value { + Value::Temp(name) => { + let mut current = name; + while let Some(next) = rename.get(¤t) { + if *next == current { + break; + } + current = next.clone(); + } + Value::Temp(current) + } + other => other, + } +} + +/// Remove do cache qualquer expressao disponivel que dependa de `name`, +/// seja como operando ou como destino ja calculado, pois `name` esta +/// sendo redefinido. +fn invalidate(available: &mut AvailableExprs, rename: &mut HashMap, name: &str) { + available.retain(|(lhs, _, rhs), cached| { + !references(lhs, name) && !references(rhs, name) && cached != name + }); + rename.retain(|_, target| target != name); +} + +fn references(value: &Value, name: &str) -> bool { + matches!(value, Value::Temp(n) if n == name) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::codegen::inter::BasicBlock; + + fn binary(dst: &str, op: BinaryOp, lhs: Value, rhs: Value) -> Instruction { + Instruction::Binary { + dst: dst.to_string(), + op, + lhs, + rhs, + } + } + + #[test] + fn eliminates_repeated_expression_in_same_block() { + let mut block = BasicBlock::new("entry"); + block.instructions.push(binary( + "t1", + BinaryOp::Add, + Value::Temp("a".into()), + Value::Temp("b".into()), + )); + block.instructions.push(binary( + "t2", + BinaryOp::Add, + Value::Temp("a".into()), + Value::Temp("b".into()), + )); + block.instructions.push(Instruction::Assign { + dst: "y".to_string(), + value: Value::Temp("t2".into()), + }); + + let changed = eliminate_in_block(&mut block.instructions); + + assert!(changed); + assert_eq!( + block.instructions, + vec![ + binary( + "t1", + BinaryOp::Add, + Value::Temp("a".into()), + Value::Temp("b".into()) + ), + Instruction::Assign { + dst: "y".to_string(), + value: Value::Temp("t1".into()), + }, + ] + ); + } + + #[test] + fn does_not_eliminate_when_operand_is_redefined_between_uses() { + let mut block = BasicBlock::new("entry"); + block.instructions.push(binary( + "t1", + BinaryOp::Add, + Value::Temp("a".into()), + Value::Temp("b".into()), + )); + block.instructions.push(Instruction::Assign { + dst: "a".to_string(), + value: Value::Int(5), + }); + block.instructions.push(binary( + "t2", + BinaryOp::Add, + Value::Temp("a".into()), + Value::Temp("b".into()), + )); + + let changed = eliminate_in_block(&mut block.instructions); + + assert!(!changed); + assert_eq!(block.instructions.len(), 3); + } + + #[test] + fn keeps_different_expressions_in_cache_independently() { + let mut block = BasicBlock::new("entry"); + block.instructions.push(binary( + "x", + BinaryOp::Mul, + Value::Temp("a".into()), + Value::Temp("b".into()), + )); + block.instructions.push(binary( + "y", + BinaryOp::Sub, + Value::Temp("a".into()), + Value::Temp("d".into()), + )); + + let changed = eliminate_in_block(&mut block.instructions); + + assert!(!changed); + assert_eq!(block.instructions.len(), 2); + } + + #[test] + fn pass_reports_changes_across_multiple_blocks() { + let mut cfg = Cfg::new(); + + let mut block = BasicBlock::new("entry"); + block.instructions.push(binary( + "t1", + BinaryOp::Add, + Value::Temp("a".into()), + Value::Temp("b".into()), + )); + block.instructions.push(binary( + "t2", + BinaryOp::Add, + Value::Temp("a".into()), + Value::Temp("b".into()), + )); + cfg.add_block(block); + + let pass = CsePass; + let changed = pass.run(&mut cfg); + + assert!(changed); + assert_eq!(cfg.blocks[0].instructions.len(), 1); + assert!(!pass.run(&mut cfg)); } } From 01cf55be98415bd1fd6f2425a8d3c94db8b0eb7f Mon Sep 17 00:00:00 2001 From: matheuslemesam Date: Thu, 18 Jun 2026 10:08:25 -0300 Subject: [PATCH 39/91] perf(parser): elimina clones O(n^2) no encadeamento postfix try_parse_postfix agora recebe/devolve Expr por valor em vez de &mut Expr, movendo lhs para dentro do novo no em vez de clonar. Cadeias como a.b.c.d.e.f passam a crescer em O(n) e nao O(n^2). Closes #86 --- docs/parser.md | 2 +- src/parser/parser.rs | 6 ++++- src/parser/rules/expressions/postfix.rs | 33 +++++++++++++++---------- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/docs/parser.md b/docs/parser.md index 1d4e408..4836051 100644 --- a/docs/parser.md +++ b/docs/parser.md @@ -305,7 +305,7 @@ Parseadas por `parse_prefix_expr()` antes do loop Pratt: ### Postfix Expressions -`try_parse_postfix(lhs)` retorna `true` se consumiu algo: +`try_parse_postfix(lhs)` retorna `(expr, true)` se consumiu algo (tomando `lhs` por valor e devolvendo o novo nó) ou `(lhs, false)` caso contrário: | Token | Nó produzido | |---|---| diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 8481031..7105a4a 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -52,7 +52,11 @@ impl Parser { loop { // Primeiro tratamos todos os postfix, pois têm maior precedência efetiva. - if postfix::try_parse_postfix(self, &mut lhs)? { + // `try_parse_postfix` toma `lhs` por valor e devolve o novo nó (ou o mesmo + // `lhs` intocado quando não há postfix), evitando clones O(n²) em cadeias. + let (next_lhs, consumed) = postfix::try_parse_postfix(self, lhs)?; + lhs = next_lhs; + if consumed { continue; } diff --git a/src/parser/rules/expressions/postfix.rs b/src/parser/rules/expressions/postfix.rs index bcfbfef..64ee33d 100644 --- a/src/parser/rules/expressions/postfix.rs +++ b/src/parser/rules/expressions/postfix.rs @@ -4,8 +4,13 @@ use crate::lexer::tokens::token_kind::TokenKind; use crate::parser::parser::Parser; /// Tenta parsear uma operação postfix (`()`, `[]`, `.`, `->`, `++`, `--`) sobre `lhs`. -/// Retorna `Ok(true)` se consumiu um postfix, `Ok(false)` se não há postfix aplicável. -pub fn try_parse_postfix(parser: &mut Parser, lhs: &mut Expr) -> Result { +/// +/// Recebe `lhs` **por valor** para encapsulá-lo em um novo nó movendo (e não clonando) +/// a subexpressão. Encadeamentos como `a.b.c.d.e.f` assim crescem em O(n) em vez de O(n²). +/// +/// Retorna `Ok((expr, true))` se consumiu um postfix (com o novo nó já construído) ou +/// `Ok((lhs, false))` quando não há postfix aplicável (devolvendo `lhs` intacto). +pub fn try_parse_postfix(parser: &mut Parser, lhs: Expr) -> Result<(Expr, bool), CompilerError> { match parser.peek_kind() { TokenKind::LeftParen => { let start = lhs.span(); @@ -25,8 +30,8 @@ pub fn try_parse_postfix(parser: &mut Parser, lhs: &mut Expr) -> Result { let start = lhs.span(); @@ -36,10 +41,11 @@ pub fn try_parse_postfix(parser: &mut Parser, lhs: &mut Expr) -> Result { + let start = lhs.span(); let op = parser.advance().clone(); let field_token = parser.advance().clone(); let TokenKind::Identifier(field_name) = field_token.kind.clone() else { @@ -50,26 +56,27 @@ pub fn try_parse_postfix(parser: &mut Parser, lhs: &mut Expr) -> Result { + let start = lhs.span(); let op = parser.advance().clone(); - let span = parser.join_span(lhs.span(), parser.span_of(&op)); + let span = parser.join_span(start, parser.span_of(&op)); let kind = if op.kind == TokenKind::PlusPlus { PostfixOp::Inc } else { PostfixOp::Dec }; - *lhs = Expr::Postfix(kind, Box::new(lhs.clone()), span); - Ok(true) + let new_expr = Expr::Postfix(kind, Box::new(lhs), span); + Ok((new_expr, true)) } - _ => Ok(false), + _ => Ok((lhs, false)), } } From 96871072b8ff259ff22d3d301cdd2eb471ca2d32 Mon Sep 17 00:00:00 2001 From: matheuslemesam Date: Sat, 20 Jun 2026 09:36:11 -0300 Subject: [PATCH 40/91] test(parser): cover deep member access chaining --- src/tests/parser_test.rs | 42 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/src/tests/parser_test.rs b/src/tests/parser_test.rs index 711c9ea..3e4d824 100644 --- a/src/tests/parser_test.rs +++ b/src/tests/parser_test.rs @@ -2,7 +2,7 @@ mod tests { use crate::common::ast::ast::{QualifierType, Type}; use crate::common::ast::decl::Decl; - use crate::common::ast::expr::{BinOp, Expr, Literal, PostfixOp, PrefixOp}; + use crate::common::ast::expr::{BinOp, Expr, Literal, MemberAccess, PostfixOp, PrefixOp}; use crate::common::ast::stmt::{Stmt, SwitchLabel}; use crate::common::input::span::ByteSpan; use crate::lexer::tokens::token::Token; @@ -140,6 +140,46 @@ mod tests { assert_eq!(args.len(), 2); } + #[test] + fn parses_deep_member_access_chain_left_associative() { + // a.b.c.d deve produzir Member(Member(Member(a, b), c), d), + // encadeamento left-assoc que cresce em O(n) (regressão do issue #86). + let tokens = vec![ + ident("a", 1), + tk(TokenKind::Dot, 2), + ident("b", 3), + tk(TokenKind::Dot, 4), + ident("c", 5), + tk(TokenKind::Dot, 6), + ident("d", 7), + eof(8), + ]; + + let expr = Parser::new(tokens) + .parse_expr(0) + .expect("encadeamento de membros válido"); + + let Expr::Member(inner, MemberAccess::Direct, field, _) = expr else { + panic!("esperava Member no topo do encadeamento"); + }; + assert_eq!(field, "d"); + + let Expr::Member(inner, MemberAccess::Direct, field, _) = *inner else { + panic!("esperava Member intermediário"); + }; + assert_eq!(field, "c"); + + let Expr::Member(inner, MemberAccess::Direct, field, _) = *inner else { + panic!("esperava Member intermediário"); + }; + assert_eq!(field, "b"); + + assert!(matches!(*inner, Expr::Ident(..))); + if let Expr::Ident(name, _) = *inner { + assert_eq!(name, "a"); + } + } + #[test] fn parses_cast_expression() { let tokens = vec![ From d33d0238a085488511c1c8ef928f3b88f4a3509b Mon Sep 17 00:00:00 2001 From: Bappoz Date: Sat, 20 Jun 2026 11:31:58 -0300 Subject: [PATCH 41/91] fix(codegen): support calls with more than 6 integer arguments abi::stack_arg_offset already accounted for stack-passed arguments on the callee side, but emit_call only ever wired up the 6 register arguments and panicked otherwise. Push the extra arguments onto the stack in reverse order, pad to keep the call site 16-byte aligned, and clean up rsp after the call. --- src/codegen/last/x86_64.rs | 110 ++++++++++++++++++++++++++++++++----- tests/codegen_smoke.rs | 110 +++++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+), 14 deletions(-) diff --git a/src/codegen/last/x86_64.rs b/src/codegen/last/x86_64.rs index 85ab473..78fa67b 100644 --- a/src/codegen/last/x86_64.rs +++ b/src/codegen/last/x86_64.rs @@ -355,24 +355,35 @@ fn emit_call( dst: Option, frame: &Frame, ) { - // Apenas a convencao de registradores esta implementada (ate 6 inteiros). - // A stack permanece 16-alinhada no `call` porque o prologue a alinha e nao - // empurramos nada aqui. - for (index, arg) in args.iter().enumerate() { - match abi::arg_register(index) { - Some(reg) => { - load_op(em, frame, arg, "rax"); - em.insn(&format!("movq %rax, %{reg}")); - } - None => panic!( - "codegen atual suporta ate {} argumentos por registrador", - abi::MAX_REG_ARGS - ), - } + // Argumentos alem de `MAX_REG_ARGS` vao para a stack do chamador, na + // ordem inversa (o primeiro arg de stack fica no topo, mais proximo do + // endereco de retorno), espelhando `abi::stack_arg_offset`. + let stack_arg_count = args.len().saturating_sub(abi::MAX_REG_ARGS); + // Mantem a stack 16-alinhada no `call`: cada push usa 8 bytes, entao uma + // quantidade impar de argumentos de stack precisa de 8 bytes de padding. + let padding = if stack_arg_count % 2 == 1 { 8 } else { 0 }; + if padding > 0 { + em.insn(&format!("subq ${padding}, %rsp")); + } + let stack_args = &args[args.len().min(abi::MAX_REG_ARGS)..]; + for arg in stack_args.iter().rev() { + load_op(em, frame, arg, "rax"); + em.insn("pushq %rax"); + } + + for (index, arg) in args.iter().take(abi::MAX_REG_ARGS).enumerate() { + let reg = abi::arg_register(index).expect("index < MAX_REG_ARGS sempre tem registrador"); + load_op(em, frame, arg, "rax"); + em.insn(&format!("movq %rax, %{reg}")); } em.insn(&format!("call {fn_name}")); + let cleanup = (stack_arg_count as i64) * 8 + padding; + if cleanup > 0 { + em.insn(&format!("addq ${cleanup}, %rsp")); + } + if let Some(dst) = dst { store_op(em, frame, &Operand::Temp(dst), "rax"); } @@ -585,6 +596,77 @@ mod tests { assert!(out.contains("call soma")); } + #[test] + fn call_with_seven_args_pushes_one_stack_arg() { + let args = (1..=7) + .map(|n| Operand::Const(ConstValue::Int(n))) + .collect(); + let f = func( + "caller7", + Vec::new(), + vec![ + TacInstr::Call { + dst: Some(TempId(0)), + fn_name: "sum7".to_string(), + args, + }, + TacInstr::Return { + val: Some(Operand::Temp(TempId(0))), + }, + ], + ); + + let out = emit_function(&f); + + // 7th arg (index 6) e o unico passado pela stack; aligna a stack com + // 8 bytes de padding (1 arg de stack e impar) antes do push. + assert!(out.contains("subq $8, %rsp")); + assert!(out.contains("pushq %rax")); + assert!(out.contains("call sum7")); + assert!(out.contains("addq $16, %rsp")); + } + + #[test] + fn call_with_eight_args_needs_no_padding() { + let args = (1..=8) + .map(|n| Operand::Const(ConstValue::Int(n))) + .collect(); + let f = func( + "caller8", + Vec::new(), + vec![TacInstr::Call { + dst: Some(TempId(0)), + fn_name: "sum8".to_string(), + args, + }], + ); + + let out = emit_function(&f); + + // 2 args de stack (indices 6 e 7): par, sem padding necessario. + assert!(!out.contains("subq $8, %rsp")); + assert!(out.contains("addq $16, %rsp")); + } + + #[test] + fn call_with_two_args_emits_no_stack_cleanup() { + let out = emit_function(&func( + "caller2", + Vec::new(), + vec![TacInstr::Call { + dst: Some(TempId(0)), + fn_name: "soma".to_string(), + args: vec![ + Operand::Const(ConstValue::Int(1)), + Operand::Const(ConstValue::Int(2)), + ], + }], + )); + + assert!(!out.contains("pushq %rax")); + assert!(!out.contains("addq $16, %rsp")); + } + #[test] fn cond_jump_uses_test_and_jne() { let f = func( diff --git a/tests/codegen_smoke.rs b/tests/codegen_smoke.rs index 8e044db..3c7cba2 100644 --- a/tests/codegen_smoke.rs +++ b/tests/codegen_smoke.rs @@ -169,6 +169,116 @@ fn smoke_simple_return_const_runs() { let _ = std::fs::remove_file(&exe); } +#[test] +fn smoke_call_with_more_than_six_args_runs() { + require_gcc!(); + + // sum9(1..9) = 45, exercitando 3 argumentos passados pela stack (alem + // dos 6 em registrador) e o padding de alinhamento de 8 bytes. + let sum9 = TacFunction { + name: "sum9".to_string(), + params: (1..=9).map(|n| format!("a{n}")).collect(), + instrs: vec![ + TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Add, + lhs: Operand::Var("a1".to_string()), + rhs: Operand::Var("a2".to_string()), + }, + TacInstr::BinOp { + dst: TempId(1), + op: BinOp::Add, + lhs: Operand::Temp(TempId(0)), + rhs: Operand::Var("a3".to_string()), + }, + TacInstr::BinOp { + dst: TempId(2), + op: BinOp::Add, + lhs: Operand::Temp(TempId(1)), + rhs: Operand::Var("a4".to_string()), + }, + TacInstr::BinOp { + dst: TempId(3), + op: BinOp::Add, + lhs: Operand::Temp(TempId(2)), + rhs: Operand::Var("a5".to_string()), + }, + TacInstr::BinOp { + dst: TempId(4), + op: BinOp::Add, + lhs: Operand::Temp(TempId(3)), + rhs: Operand::Var("a6".to_string()), + }, + TacInstr::BinOp { + dst: TempId(5), + op: BinOp::Add, + lhs: Operand::Temp(TempId(4)), + rhs: Operand::Var("a7".to_string()), + }, + TacInstr::BinOp { + dst: TempId(6), + op: BinOp::Add, + lhs: Operand::Temp(TempId(5)), + rhs: Operand::Var("a8".to_string()), + }, + TacInstr::BinOp { + dst: TempId(7), + op: BinOp::Add, + lhs: Operand::Temp(TempId(6)), + rhs: Operand::Var("a9".to_string()), + }, + TacInstr::Return { + val: Some(Operand::Temp(TempId(7))), + }, + ], + }; + + let main = TacFunction { + name: "main".to_string(), + params: Vec::new(), + instrs: vec![ + TacInstr::Call { + dst: Some(TempId(0)), + fn_name: "sum9".to_string(), + args: (1..=9) + .map(|n| Operand::Const(ConstValue::Int(n))) + .collect(), + }, + TacInstr::Return { + val: Some(Operand::Temp(TempId(0))), + }, + ], + }; + + let prog = TacProgram { + functions: vec![sum9, main], + }; + + let asm = emit_program(&prog); + let source = write_temp_source("manyargs", &asm); + let exe = source.with_extension("bin"); + + let link = Command::new("gcc") + .arg(&source) + .arg("-o") + .arg(&exe) + .status() + .expect("falha ao invocar gcc"); + assert!(link.success()); + + let exit = Command::new(&exe).status().expect("falha ao executar"); + + #[cfg(unix)] + assert_eq!( + exit.code(), + Some(45), + "esperava exit code 45 (soma de 1..=9)" + ); + + let _ = std::fs::remove_file(&source); + let _ = std::fs::remove_file(&exe); +} + #[test] fn smoke_control_flow_if_else_runs() { require_gcc!(); From 7cb6e402363e5b375f963a474af04622601606d7 Mon Sep 17 00:00:00 2001 From: Bappoz Date: Sat, 20 Jun 2026 11:51:16 -0300 Subject: [PATCH 42/91] =?UTF-8?q?feat(codegen):=20emite=20objeto=20ELF=20e?= =?UTF-8?q?=20linka=20execut=C3=A1vel=20via=20gcc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implementa a etapa final do pipeline (issue #130): após gerar o assembly x86-64, o compilador agora pode montar (.o) ou montar+linkar (.exe) usando o gcc do sistema via std::process::Command, seguindo a Opção A recomendada na issue. CLI: - -o : define o caminho de saída (default por --emit: .s, .o ou a.out) - --emit=asm|obj|exe: seleciona o artefato final (default: exe) - -S/--emit-asm: alias mantido para --emit=asm --emit=exe é agora o comportamento padrão do binário: rodar 'crusty arquivo.c' sem flags produz um executável ELF real em a.out, com _start/runtime cuidados automaticamente pelo gcc no link. Testes: - tests/exe_smoke_test.rs: smoke tests ponta-a-ponta (fonte C real -> lexer -> parser -> semantic -> IR -> codegen -> gcc -> binário), cobrindo os dois casos pedidos pela issue (exit 0 e exit 42) mais variável local e chamada de função. Pulados (skip) se gcc ausente. - src/main.rs: testes de parsing de -o, --emit= e dos defaults de output_path_for por EmitKind. Closes #130 --- src/main.rs | 234 ++++++++++++++++++++++++++++++++++++---- tests/exe_smoke_test.rs | 143 ++++++++++++++++++++++++ 2 files changed, 358 insertions(+), 19 deletions(-) create mode 100644 tests/exe_smoke_test.rs diff --git a/src/main.rs b/src/main.rs index 6639578..4e56f2b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,8 +9,8 @@ use crusty::ir::lower::lower_program; use crusty::lexer::scanner::Scanner; use crusty::parser::Parser; use std::env; -use std::path::PathBuf; -use std::process::exit; +use std::path::{Path, PathBuf}; +use std::process::{exit, Command}; fn main() -> std::io::Result<()> { let raw: Vec<_> = env::args().collect(); @@ -27,7 +27,11 @@ fn main() -> std::io::Result<()> { eprintln!(" --only-lex Stop after lexing"); eprintln!(" --only-parse Stop after parsing"); eprintln!(" --only-semantic Stop after semantic analysis"); - eprintln!(" -S, --emit-asm Emit x86-64 assembly (AT&T) to .s"); + eprintln!(" -o Set the output file path"); + eprintln!(" --emit=asm Stop after emitting x86-64 assembly (.s)"); + eprintln!(" --emit=obj Stop after assembling an object file (.o)"); + eprintln!(" --emit=exe Link a runnable executable (default)"); + eprintln!(" -S, --emit-asm Alias for --emit=asm"); eprintln!(" -O0|-O1|-O2|-O3 Set optimization level"); eprintln!(" --opt-level 0|1|2|3 Set optimization level"); exit(64); @@ -47,15 +51,39 @@ fn main() -> std::io::Result<()> { // ── CLI arg parsing ────────────────────────────────────────────────────────── +/// O que o pipeline deve produzir como artefato final. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +enum EmitKind { + /// Para após gerar o assembly x86-64 (`.s`). + Asm, + /// Monta o assembly em um objeto ELF (`.o`) via `gcc -c`, sem linkar. + Obj, + /// Monta e linka um executável ELF rodável (padrão). + #[default] + Exe, +} + +impl EmitKind { + fn parse(value: &str) -> Option { + match value { + "asm" => Some(Self::Asm), + "obj" => Some(Self::Obj), + "exe" => Some(Self::Exe), + _ => None, + } + } +} + struct CliArgs { input_file: Option, + output_file: Option, dump_tokens: bool, dump_ast: bool, dump_ir: bool, only_lex: bool, only_parse: bool, only_semantic: bool, - emit_asm: bool, + emit: EmitKind, opt_level: OptLevel, } @@ -63,13 +91,14 @@ impl CliArgs { fn parse(args: &[String]) -> Self { let mut cli = CliArgs { input_file: None, + output_file: None, dump_tokens: false, dump_ast: false, dump_ir: false, only_lex: false, only_parse: false, only_semantic: false, - emit_asm: false, + emit: EmitKind::default(), opt_level: OptLevel::default(), }; @@ -84,7 +113,22 @@ impl CliArgs { "--only-lex" => cli.only_lex = true, "--only-parse" => cli.only_parse = true, "--only-semantic" => cli.only_semantic = true, - "-S" | "--emit-asm" => cli.emit_asm = true, + "-S" | "--emit-asm" => cli.emit = EmitKind::Asm, + "-o" => { + i += 1; + let Some(path) = args.get(i) else { + eprintln!("error: missing value for -o"); + exit(64); + }; + cli.output_file = Some(path.clone()); + } + _ if arg.starts_with("--emit=") => { + let value = arg.strip_prefix("--emit=").unwrap(); + cli.emit = EmitKind::parse(value).unwrap_or_else(|| { + eprintln!("error: invalid --emit value: {value} (expected asm|obj|exe)"); + exit(64); + }); + } "-O" | "--opt-level" => { i += 1; let Some(level) = args.get(i) else { @@ -149,6 +193,15 @@ impl ToReport for IoError { } } +#[derive(Debug)] +struct LinkError(String); + +impl ToReport for LinkError { + fn to_report(&self) -> Report { + Report::new(&format!("link/assemble error: {}", self.0)) + } +} + fn run(source: SourceFile, args: &CliArgs) -> Result<(), Box> { // ── Stage 1: Lex ───────────────────────────────────────────────────────── let mut scanner = Scanner::new(source); @@ -220,24 +273,100 @@ fn run(source: SourceFile, args: &CliArgs) -> Result<(), Box> { opt_pipeline.run(&mut cfg, 10); // ── Stage 5: Code generation (x86-64 / AT&T) ───────────────────────────── - if args.emit_asm { - let tac_program = lower_program(&program); - let asm = last::emit_program(&tac_program); + let tac_program = lower_program(&program); + let asm = last::emit_program(&tac_program); - let output_path = asm_output_path(&args.input_file); - std::fs::write(&output_path, asm) - .map_err(|e| Box::new(IoError(e.to_string())) as Box)?; - eprintln!("emitted assembly: {}", output_path.display()); - } + let output_path = output_path_for(&args.input_file, &args.output_file, args.emit); + emit_artifact(&asm, &output_path, args.emit)?; + eprintln!( + "emitted {}: {}", + emit_kind_label(args.emit), + output_path.display() + ); Ok(()) } -fn asm_output_path(input: &Option) -> PathBuf { - let input = input.clone().unwrap_or_else(|| "crusty.out".to_string()); - let mut path = PathBuf::from(input); - path.set_extension("s"); - path +/// Resolve o caminho de saída final, respeitando `-o` quando fornecido e +/// caindo para um default específico de cada `EmitKind` caso contrário. +fn output_path_for( + input: &Option, + output_override: &Option, + emit: EmitKind, +) -> PathBuf { + if let Some(path) = output_override { + return PathBuf::from(path); + } + + match emit { + EmitKind::Exe => PathBuf::from("a.out"), + EmitKind::Asm | EmitKind::Obj => { + let input = input.clone().unwrap_or_else(|| "crusty.out".to_string()); + let mut path = PathBuf::from(input); + path.set_extension(if emit == EmitKind::Asm { "s" } else { "o" }); + path + } + } +} + +fn emit_kind_label(emit: EmitKind) -> &'static str { + match emit { + EmitKind::Asm => "assembly", + EmitKind::Obj => "object file", + EmitKind::Exe => "executable", + } +} + +/// Escreve o assembly gerado no destino final, invocando `gcc` para montar +/// (`--emit=obj`) ou montar+linkar (`--emit=exe`) quando necessário. +fn emit_artifact(asm: &str, output_path: &Path, emit: EmitKind) -> Result<(), Box> { + if emit == EmitKind::Asm { + return std::fs::write(output_path, asm) + .map_err(|e| Box::new(IoError(e.to_string())) as Box); + } + + let asm_path = write_temp_asm(asm)?; + let result = match emit { + EmitKind::Obj => run_gcc(&["-c", "-x", "assembler-with-cpp"], &asm_path, output_path), + EmitKind::Exe => run_gcc(&[], &asm_path, output_path), + EmitKind::Asm => unreachable!(), + }; + let _ = std::fs::remove_file(&asm_path); + result +} + +/// Grava o assembly em um arquivo temporário único, usado como entrada do `gcc`. +fn write_temp_asm(asm: &str) -> Result> { + let mut path = env::temp_dir(); + path.push(format!("crusty_{}.s", std::process::id())); + std::fs::write(&path, asm) + .map_err(|e| Box::new(IoError(e.to_string())) as Box)?; + Ok(path) +} + +/// Invoca `gcc` com flags extras (ex.: `-c -x assembler-with-cpp` para gerar +/// objeto) sobre `asm_path`, escrevendo o resultado em `output_path`. +fn run_gcc( + extra_args: &[&str], + asm_path: &Path, + output_path: &Path, +) -> Result<(), Box> { + let status = Command::new("gcc") + .args(extra_args) + .arg(asm_path) + .arg("-o") + .arg(output_path) + .status() + .map_err(|e| { + Box::new(LinkError(format!("falha ao invocar gcc: {e}"))) as Box + })?; + + if !status.success() { + return Err(Box::new(LinkError(format!( + "gcc terminou com status {status}" + )))); + } + Ok(()) } // ── Dump helpers ───────────────────────────────────────────────────────────── @@ -320,4 +449,71 @@ mod tests { let parsed = CliArgs::parse(&args(&["crusty", "--opt-level", "1", "main.c"])); assert_eq!(parsed.opt_level, OptLevel::O1); } + + #[test] + fn defaults_to_emit_exe() { + let parsed = CliArgs::parse(&args(&["crusty", "main.c"])); + assert_eq!(parsed.emit, EmitKind::Exe); + assert_eq!(parsed.output_file, None); + } + + #[test] + fn parses_output_flag() { + let parsed = CliArgs::parse(&args(&["crusty", "-o", "prog", "main.c"])); + assert_eq!(parsed.output_file, Some("prog".to_string())); + assert_eq!(parsed.input_file, Some("main.c".to_string())); + } + + #[test] + fn parses_emit_flag_variants() { + assert_eq!( + CliArgs::parse(&args(&["crusty", "--emit=asm", "main.c"])).emit, + EmitKind::Asm + ); + assert_eq!( + CliArgs::parse(&args(&["crusty", "--emit=obj", "main.c"])).emit, + EmitKind::Obj + ); + assert_eq!( + CliArgs::parse(&args(&["crusty", "--emit=exe", "main.c"])).emit, + EmitKind::Exe + ); + assert_eq!( + CliArgs::parse(&args(&["crusty", "-S", "main.c"])).emit, + EmitKind::Asm + ); + assert_eq!( + CliArgs::parse(&args(&["crusty", "--emit-asm", "main.c"])).emit, + EmitKind::Asm + ); + } + + #[test] + fn output_path_defaults_per_emit_kind() { + let input = Some("foo/main.c".to_string()); + assert_eq!( + output_path_for(&input, &None, EmitKind::Asm), + PathBuf::from("foo/main.s") + ); + assert_eq!( + output_path_for(&input, &None, EmitKind::Obj), + PathBuf::from("foo/main.o") + ); + assert_eq!( + output_path_for(&input, &None, EmitKind::Exe), + PathBuf::from("a.out") + ); + } + + #[test] + fn output_path_respects_override() { + let input = Some("main.c".to_string()); + let output = Some("custom_out".to_string()); + for emit in [EmitKind::Asm, EmitKind::Obj, EmitKind::Exe] { + assert_eq!( + output_path_for(&input, &output, emit), + PathBuf::from("custom_out") + ); + } + } } diff --git a/tests/exe_smoke_test.rs b/tests/exe_smoke_test.rs new file mode 100644 index 0000000..32c6e92 --- /dev/null +++ b/tests/exe_smoke_test.rs @@ -0,0 +1,143 @@ +//! Smoke tests ponta-a-ponta da issue #130: fonte C real passa por todo o +//! pipeline (lexer -> parser -> semantic -> IR -> codegen x86-64) e o +//! assembly resultante e montado/linkado com `gcc` em um executavel ELF +//! real, que e entao executado para checar o exit code. +//! +//! Diferente de `tests/codegen_smoke.rs` (que monta `TacProgram` a mao), +//! aqui o ponto de entrada e codigo-fonte C, exercitando o front-end +//! completo. Se `gcc` nao estiver disponivel no ambiente, os testes sao +//! ignorados (skip) em vez de falhar. + +use std::path::PathBuf; +use std::process::{Command, ExitStatus}; + +use crusty::analyser::analyse; +use crusty::codegen::last::emit_program; +use crusty::common::input::source::SourceFile; +use crusty::ir::lower::lower_program; +use crusty::lexer::scanner::Scanner; +use crusty::parser::Parser; + +fn gcc_available() -> bool { + Command::new("gcc") + .arg("--version") + .output() + .map(|o| o.status.success()) + .unwrap_or(false) +} + +/// Ignora o teste quando nao ha `gcc` no ambiente. +macro_rules! require_gcc { + () => { + if !gcc_available() { + eprintln!("gcc indisponivel: pulando teste de smoke"); + return; + } + }; +} + +/// Roda o pipeline completo (lexer -> parser -> semantic -> IR -> codegen) +/// sobre `source` e retorna o assembly x86-64 gerado. Falha o teste (panic) +/// se qualquer estagio reportar diagnosticos, ja que os fixtures usados +/// aqui sao sempre programas C validos. +fn compile_to_asm(source: &str) -> String { + let mut scanner = Scanner::new(SourceFile::from_string(source)); + scanner.scan(); + assert!( + scanner.diagnostics.is_empty(), + "erros de lexer inesperados: {:?}", + scanner.diagnostics + ); + + let mut parser = Parser::new(scanner.tokens); + let program = parser + .parse_program() + .unwrap_or_else(|errors| panic!("erros de parser inesperados: {errors:?}")); + + let sem_errors = analyse(&program); + assert!( + sem_errors.is_empty(), + "erros semanticos inesperados: {sem_errors:?}" + ); + + let tac_program = lower_program(&program); + emit_program(&tac_program) +} + +/// Compila `source` (C) ate um executavel real via `gcc` e o executa, +/// retornando o `ExitStatus` do processo filho. Limpa os arquivos +/// temporarios (.s e binario) ao final. +fn compile_and_run(name: &str, source: &str) -> ExitStatus { + let asm = compile_to_asm(source); + + let mut asm_path = std::env::temp_dir(); + asm_path.push(format!("crusty_exe_smoke_{name}_{}.s", std::process::id())); + std::fs::write(&asm_path, asm).expect("falha ao escrever .s temporario"); + let exe_path: PathBuf = asm_path.with_extension("bin"); + + let link = Command::new("gcc") + .arg(&asm_path) + .arg("-o") + .arg(&exe_path) + .status() + .expect("falha ao invocar gcc"); + assert!( + link.success(), + "gcc nao conseguiu linkar a saida do codegen" + ); + + let status = Command::new(&exe_path) + .status() + .expect("falha ao executar o binario gerado"); + + let _ = std::fs::remove_file(&asm_path); + let _ = std::fs::remove_file(&exe_path); + + status +} + +#[test] +fn smoke_minimal_program_exit_0() { + require_gcc!(); + + let status = compile_and_run("exit0", "int main() { return 0; }"); + + #[cfg(unix)] + assert_eq!(status.code(), Some(0)); +} + +#[test] +fn smoke_minimal_program_exit_42() { + require_gcc!(); + + let status = compile_and_run("exit42", "int main() { return 42; }"); + + #[cfg(unix)] + assert_eq!(status.code(), Some(42)); +} + +#[test] +fn smoke_local_variable_arithmetic_runs() { + require_gcc!(); + + let status = compile_and_run( + "arith", + "int main() { int a = 10; int b = 32; return a + b; }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(42)); +} + +#[test] +fn smoke_function_call_runs() { + require_gcc!(); + + let status = compile_and_run( + "call", + "int soma(int a, int b) { return a + b; } int main() { return soma(19, 23); }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(42)); +} From 8cf752c1c9b72d8520cf1b05c0ebe8a24b1f5641 Mon Sep 17 00:00:00 2001 From: guxvr Date: Sat, 20 Jun 2026 13:45:13 -0300 Subject: [PATCH 43/91] =?UTF-8?q?feat(ir):=20adiciona=20passes=20de=20otim?= =?UTF-8?q?iza=C3=A7=C3=A3o=20sobre=20o=20TAC=20=E2=80=94=20constant=20fol?= =?UTF-8?q?ding,=20propagation=20e=20DCE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implementa três passes de otimização intraprocessuais sobre Vec em src/codegen/inter/optimizations.rs --- src/codegen/inter/mod.rs | 1 + src/codegen/inter/optimizations.rs | 740 +++++++++++++++++++++++++++++ src/ir/lower.rs | 14 + 3 files changed, 755 insertions(+) create mode 100644 src/codegen/inter/optimizations.rs diff --git a/src/codegen/inter/mod.rs b/src/codegen/inter/mod.rs index 2ad3800..da683e5 100644 --- a/src/codegen/inter/mod.rs +++ b/src/codegen/inter/mod.rs @@ -1,4 +1,5 @@ pub mod opt; +pub mod optimizations; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Cfg { diff --git a/src/codegen/inter/optimizations.rs b/src/codegen/inter/optimizations.rs new file mode 100644 index 0000000..ff41742 --- /dev/null +++ b/src/codegen/inter/optimizations.rs @@ -0,0 +1,740 @@ +use std::collections::{HashMap, HashSet}; + +use crate::{ + common::ast::expr::{BinOp, UnOp}, + ir::tac::{ConstValue, Operand, TacInstr, TempId}, +}; + +// ─── Liveness ──────────────────────────────────────────────────────────────── + +/// Resultado da análise de vida útil intraprocessual. +/// +/// `live_before[i]` contém o conjunto de `TempId` que estão vivos +/// *imediatamente antes* da instrução de índice `i`. +pub struct LivenessInfo { + pub live_before: Vec>, +} + +/// Calcula `LivenessInfo` para uma sequência plana de instruções TAC. +/// +/// Algoritmo: varredura *backward* (de trás para frente). +/// ```text +/// live = {} +/// para i de (n-1) até 0: +/// live_before[i] = live.clone() +/// remover de live: TempId *definido* por instrs[i] +/// adicionar a live: TempId *usados* como operandos de instrs[i] +/// ``` +pub fn compute_liveness(instrs: &[TacInstr]) -> LivenessInfo { + let n = instrs.len(); + let mut live_before = vec![HashSet::new(); n]; + let mut live: HashSet = HashSet::new(); + + for i in (0..n).rev() { + live_before[i] = live.clone(); + + // Remover o temporário definido pela instrução. + if let Some(def) = instr_def(&instrs[i]) { + live.remove(&def); + } + + // Adicionar todos os temporários usados como operandos. + for used in instr_uses(&instrs[i]) { + live.insert(used); + } + } + + LivenessInfo { live_before } +} + +/// Retorna o `TempId` definido pela instrução, se houver. +/// +/// `Operand::Var` **nunca** é retornado aqui — variáveis nomeadas do programa C +/// têm semântica observável e nunca são candidatas à eliminação pelo DCE. +fn instr_def(instr: &TacInstr) -> Option { + match instr { + TacInstr::BinOp { dst, .. } => Some(*dst), + TacInstr::UnOp { dst, .. } => Some(*dst), + TacInstr::Copy { + dst: Operand::Temp(t), + .. + } => Some(*t), + TacInstr::Call { dst: Some(t), .. } => Some(*t), + _ => None, + } +} + +/// Retorna todos os `TempId` *usados* como operandos da instrução. +fn instr_uses(instr: &TacInstr) -> Vec { + let mut uses = Vec::new(); + + let push = |uses: &mut Vec, op: &Operand| { + if let Operand::Temp(t) = op { + uses.push(*t); + } + }; + + match instr { + TacInstr::BinOp { lhs, rhs, .. } => { + push(&mut uses, lhs); + push(&mut uses, rhs); + } + TacInstr::UnOp { src, .. } => push(&mut uses, src), + TacInstr::Copy { src, .. } => push(&mut uses, src), + TacInstr::CondJump { cond, .. } => push(&mut uses, cond), + TacInstr::Call { args, .. } => { + for arg in args { + push(&mut uses, arg); + } + } + TacInstr::Return { val: Some(v) } => push(&mut uses, v), + _ => {} + } + + uses +} + +// ─── Constant Folding ──────────────────────────────────────────────────────── + +/// Avalia em tempo de compilação operações cujos dois operandos são constantes inteiras. +/// +/// Retorna `true` se alguma instrução foi alterada. +/// +/// **Não** dobra `ConstValue::Double` (evitar divergência de precisão host vs target). +/// **Não** dobra shift com rhs inválido, nem divisão por zero (UB em C). +pub fn constant_fold(instrs: &mut Vec) -> bool { + let mut changed = false; + + for instr in instrs.iter_mut() { + match instr { + TacInstr::BinOp { dst, op, lhs, rhs } => { + if let Some(result) = fold_binop(op, lhs, rhs) { + *instr = TacInstr::Copy { + dst: Operand::Temp(*dst), + src: Operand::Const(result), + }; + changed = true; + } + } + TacInstr::UnOp { dst, op, src } => { + if let Some(result) = fold_unop(op, src) { + *instr = TacInstr::Copy { + dst: Operand::Temp(*dst), + src: Operand::Const(result), + }; + changed = true; + } + } + _ => {} + } + } + + changed +} + +fn fold_binop(op: &BinOp, lhs: &Operand, rhs: &Operand) -> Option { + let (Operand::Const(ConstValue::Int(l)), Operand::Const(ConstValue::Int(r))) = (lhs, rhs) + else { + return None; + }; + let l = *l; + let r = *r; + + let result = match op { + // Aritmética + BinOp::Add => l.checked_add(r)?, + BinOp::Sub => l.checked_sub(r)?, + BinOp::Mul => l.checked_mul(r)?, + BinOp::Div => { + if r == 0 { + return None; // divisão por zero — UB, preservar + } + l.checked_div(r)? + } + BinOp::Mod => { + if r == 0 { + return None; // divisão por zero — UB, preservar + } + l.checked_rem(r)? + } + + // Relacionais — resultado é 0 ou 1 + BinOp::Eq => (l == r) as i64, + BinOp::Neq => (l != r) as i64, + BinOp::Less => (l < r) as i64, + BinOp::Greater => (l > r) as i64, + BinOp::Leq => (l <= r) as i64, + BinOp::Geq => (l >= r) as i64, + + // Lógicos — ambos constantes, curto-circuito não se aplica + BinOp::And => ((l != 0) && (r != 0)) as i64, + BinOp::Or => ((l != 0) || (r != 0)) as i64, + + // Bitwise + BinOp::BitAnd => l & r, + BinOp::BitOr => l | r, + BinOp::BitXor => l ^ r, + + // Shift — UB se rhs < 0 ou rhs >= 64 + BinOp::Shl => { + if r < 0 || r >= 64 { + return None; + } + l.checked_shl(r as u32)? + } + BinOp::Shr => { + if r < 0 || r >= 64 { + return None; + } + l.checked_shr(r as u32)? + } + }; + + Some(ConstValue::Int(result)) +} + +fn fold_unop(op: &UnOp, src: &Operand) -> Option { + let Operand::Const(ConstValue::Int(v)) = src else { + return None; + }; + let v = *v; + + let result = match op { + UnOp::Neg => v.checked_neg()?, + UnOp::BitNot => !v, + UnOp::Not => (v == 0) as i64, + // Deref e AddrOf não podem ser dobrados em tempo de compilação + UnOp::Deref | UnOp::AddrOf => return None, + }; + + Some(ConstValue::Int(result)) +} + +// ─── Constant Propagation ──────────────────────────────────────────────────── + +/// Propaga valores constantes conhecidos de temporários para seus usos posteriores. +/// +/// Mantém um mapa `TempId → ConstValue`. Ao encontrar `Copy { dst: Temp(t), src: Const(v) }`, +/// registra `t → v`. Ao encontrar operandos que referenciam temporários mapeados, +/// substitui pelo `Const` correspondente. Ao encontrar redefinição não-constante de `t`, +/// invalida a entrada do mapa. +/// +/// Retorna `true` se alguma substituição foi feita. +pub fn constant_propagation(instrs: &mut Vec) -> bool { + let mut changed = false; + let mut const_map: HashMap = HashMap::new(); + + for instr in instrs.iter_mut() { + // 1. Substituir usos de temporários conhecidos pelos seus valores constantes. + propagate_uses(instr, &const_map, &mut changed); + + // 2. Atualizar o mapa conforme o efeito da instrução. + match instr { + // Copy com fonte constante: registrar o mapeamento. + TacInstr::Copy { + dst: Operand::Temp(t), + src: Operand::Const(v), + } => { + const_map.insert(*t, v.clone()); + } + // Copy com fonte não-constante: invalidar — o temporário passou a + // ter um valor desconhecido. + TacInstr::Copy { + dst: Operand::Temp(t), + src: _, + } => { + const_map.remove(t); + } + // Qualquer outra instrução que redefine um temporário invalida a entrada. + TacInstr::BinOp { dst, .. } | TacInstr::UnOp { dst, .. } => { + const_map.remove(dst); + } + TacInstr::Call { dst: Some(t), .. } => { + const_map.remove(t); + } + _ => {} + } + } + + changed +} + +fn propagate_uses( + instr: &mut TacInstr, + const_map: &HashMap, + changed: &mut bool, +) { + let subst = |op: &mut Operand, changed: &mut bool| { + if let Operand::Temp(t) = op { + if let Some(v) = const_map.get(t) { + *op = Operand::Const(v.clone()); + *changed = true; + } + } + }; + + match instr { + TacInstr::BinOp { lhs, rhs, .. } => { + subst(lhs, changed); + subst(rhs, changed); + } + TacInstr::UnOp { src, .. } => subst(src, changed), + TacInstr::Copy { src, .. } => subst(src, changed), + TacInstr::CondJump { cond, .. } => subst(cond, changed), + TacInstr::Call { args, .. } => { + for arg in args.iter_mut() { + subst(arg, changed); + } + } + TacInstr::Return { val: Some(v) } => subst(v, changed), + _ => {} + } +} + +// ─── Dead Code Elimination ─────────────────────────────────────────────────── + +/// Remove instruções TAC cujo resultado nunca é lido e que não possuem side-effects. +/// +/// Uma instrução é eliminável se: +/// 1. Define um `TempId` (não `Var`). +/// 2. Esse `TempId` não está vivo após a instrução (`live_before[i+1]`). +/// 3. Não é `Call`, `Return`, `Jump`, `CondJump`, `Label`, +/// nem `Copy` com destino `Operand::Var`. +/// +/// Retorna `true` se alguma instrução foi removida. +pub fn dead_code_eliminate(instrs: &mut Vec, liveness: &LivenessInfo) -> bool { + let n = instrs.len(); + let mut keep = vec![true; n]; + + for i in 0..n { + if has_side_effects(&instrs[i]) { + continue; + } + + if let Some(def) = instr_def(&instrs[i]) { + // O algoritmo backward armazena em live_before[i] o conjunto de + // temporários vivos *antes* de remover o def e adicionar os uses de i. + // Ou seja, live_before[i] é o live-OUT da instrução i — os temporários + // vivos logo após a execução de i. + // Se o def não aparece em live_before[i], ele nunca é lido depois. + if !liveness.live_before[i].contains(&def) { + keep[i] = false; + } + } + } + + let before = instrs.len(); + let mut i = 0; + instrs.retain(|_| { + let k = keep[i]; + i += 1; + k + }); + + instrs.len() != before +} + +/// Retorna `true` para instruções que não podem ser removidas mesmo se o resultado +/// não for usado — elas têm efeitos colaterais observáveis. +fn has_side_effects(instr: &TacInstr) -> bool { + matches!( + instr, + TacInstr::Call { .. } + | TacInstr::Return { .. } + | TacInstr::Jump { .. } + | TacInstr::CondJump { .. } + | TacInstr::Label(_) + | TacInstr::Copy { + dst: Operand::Var(_), + .. + } + ) +} + +// ─── Pipeline ──────────────────────────────────────────────────────────────── + +const MAX_ITER: usize = 10; + +/// Executa os passes de otimização sobre as instruções de uma função até ponto fixo. +pub fn optimize_function(instrs: &mut Vec) { + for _ in 0..MAX_ITER { + let mut changed = false; + + changed |= constant_fold(instrs); + changed |= constant_propagation(instrs); + + let liveness = compute_liveness(instrs); + changed |= dead_code_eliminate(instrs, &liveness); + + if !changed { + break; + } + } +} + +// ─── Tests ─────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + common::ast::expr::{BinOp, UnOp}, + ir::tac::{ConstValue, LabelId, Operand, TacInstr, TempId}, + }; + + fn int(v: i64) -> Operand { + Operand::Const(ConstValue::Int(v)) + } + + fn temp(n: u32) -> Operand { + Operand::Temp(TempId(n)) + } + + fn var(name: &str) -> Operand { + Operand::Var(name.to_string()) + } + + // ── Constant Fold ── + + #[test] + fn fold_add_two_consts() { + let mut instrs = vec![TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Add, + lhs: int(2), + rhs: int(3), + }]; + assert!(constant_fold(&mut instrs)); + assert_eq!( + instrs[0], + TacInstr::Copy { + dst: temp(0), + src: int(5), + } + ); + } + + #[test] + fn fold_nested_expression() { + // 2 + 3*4 → t0=3*4, t1=2+t0 + let mut instrs = vec![ + TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Mul, + lhs: int(3), + rhs: int(4), + }, + TacInstr::BinOp { + dst: TempId(1), + op: BinOp::Add, + lhs: int(2), + rhs: temp(0), + }, + ]; + // Após fold: t0 = 12, t1 = 2 + t0 (t0 ainda é temp, precisa de propagation) + constant_fold(&mut instrs); + assert_eq!(instrs[0], TacInstr::Copy { dst: temp(0), src: int(12) }); + + // Após propagation: t1 = 2 + 12 + constant_propagation(&mut instrs); + // Após segundo fold: t1 = 14 + constant_fold(&mut instrs); + assert_eq!(instrs[1], TacInstr::Copy { dst: temp(1), src: int(14) }); + } + + #[test] + fn fold_relational_less() { + let mut instrs = vec![TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Less, + lhs: int(3), + rhs: int(5), + }]; + assert!(constant_fold(&mut instrs)); + assert_eq!(instrs[0], TacInstr::Copy { dst: temp(0), src: int(1) }); + } + + #[test] + fn fold_relational_eq_false() { + let mut instrs = vec![TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Eq, + lhs: int(3), + rhs: int(5), + }]; + assert!(constant_fold(&mut instrs)); + assert_eq!(instrs[0], TacInstr::Copy { dst: temp(0), src: int(0) }); + } + + #[test] + fn fold_bitwise_and() { + // 0b1010 & 0b1100 = 0b1000 = 8 + let mut instrs = vec![TacInstr::BinOp { + dst: TempId(0), + op: BinOp::BitAnd, + lhs: int(0b1010), + rhs: int(0b1100), + }]; + assert!(constant_fold(&mut instrs)); + assert_eq!(instrs[0], TacInstr::Copy { dst: temp(0), src: int(8) }); + } + + #[test] + fn fold_unary_neg() { + let mut instrs = vec![TacInstr::UnOp { + dst: TempId(0), + op: UnOp::Neg, + src: int(7), + }]; + assert!(constant_fold(&mut instrs)); + assert_eq!(instrs[0], TacInstr::Copy { dst: temp(0), src: int(-7) }); + } + + #[test] + fn fold_unary_not() { + // !0 = 1, !5 = 0 + let mut instrs = vec![ + TacInstr::UnOp { dst: TempId(0), op: UnOp::Not, src: int(0) }, + TacInstr::UnOp { dst: TempId(1), op: UnOp::Not, src: int(5) }, + ]; + constant_fold(&mut instrs); + assert_eq!(instrs[0], TacInstr::Copy { dst: temp(0), src: int(1) }); + assert_eq!(instrs[1], TacInstr::Copy { dst: temp(1), src: int(0) }); + } + + #[test] + fn fold_div_by_zero_preserved() { + let original = TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Div, + lhs: int(10), + rhs: int(0), + }; + let mut instrs = vec![original.clone()]; + assert!(!constant_fold(&mut instrs)); + assert_eq!(instrs[0], original); + } + + #[test] + fn fold_shl_negative_rhs_preserved() { + let original = TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Shl, + lhs: int(1), + rhs: int(-1), + }; + let mut instrs = vec![original.clone()]; + assert!(!constant_fold(&mut instrs)); + assert_eq!(instrs[0], original); + } + + #[test] + fn fold_shl_rhs_64_preserved() { + let original = TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Shl, + lhs: int(1), + rhs: int(64), + }; + let mut instrs = vec![original.clone()]; + assert!(!constant_fold(&mut instrs)); + assert_eq!(instrs[0], original); + } + + #[test] + fn fold_double_operand_not_folded() { + let original = TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Add, + lhs: Operand::Const(ConstValue::Double(1.0)), + rhs: Operand::Const(ConstValue::Double(2.0)), + }; + let mut instrs = vec![original.clone()]; + assert!(!constant_fold(&mut instrs)); + assert_eq!(instrs[0], original); + } + + // ── Constant Propagation ── + + #[test] + fn propagation_simple_chain() { + // t0 = 5; t1 = t0 + 3 → t1 = 5 + 3 → (fold) t1 = 8 + let mut instrs = vec![ + TacInstr::Copy { dst: temp(0), src: int(5) }, + TacInstr::BinOp { + dst: TempId(1), + op: BinOp::Add, + lhs: temp(0), + rhs: int(3), + }, + ]; + assert!(constant_propagation(&mut instrs)); + assert_eq!( + instrs[1], + TacInstr::BinOp { + dst: TempId(1), + op: BinOp::Add, + lhs: int(5), + rhs: int(3), + } + ); + // Após fold: t1 = 8 + constant_fold(&mut instrs); + assert_eq!(instrs[1], TacInstr::Copy { dst: temp(1), src: int(8) }); + } + + #[test] + fn propagation_invalidated_by_redefinition() { + // t0 = 5; t0 = call f(); t1 = t0 + 1 → t1 NÃO deve ser dobrado + let mut instrs = vec![ + TacInstr::Copy { dst: temp(0), src: int(5) }, + TacInstr::Call { + dst: Some(TempId(0)), + fn_name: "f".to_string(), + args: vec![], + }, + TacInstr::BinOp { + dst: TempId(1), + op: BinOp::Add, + lhs: temp(0), + rhs: int(1), + }, + ]; + // propagation não deve substituir t0 na última instrução + constant_propagation(&mut instrs); + assert!(matches!( + &instrs[2], + TacInstr::BinOp { lhs: Operand::Temp(_), .. } + )); + } + + // ── DCE ── + + #[test] + fn dce_removes_unused_temp() { + // t0 = 2 + 3 (nunca lido) + let mut instrs = vec![TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Add, + lhs: int(2), + rhs: int(3), + }]; + let liveness = compute_liveness(&instrs); + assert!(dead_code_eliminate(&mut instrs, &liveness)); + assert!(instrs.is_empty()); + } + + #[test] + fn dce_preserves_call_even_if_unused() { + // t0 = call f() — side-effect, não pode ser removido + let mut instrs = vec![TacInstr::Call { + dst: Some(TempId(0)), + fn_name: "f".to_string(), + args: vec![], + }]; + let liveness = compute_liveness(&instrs); + assert!(!dead_code_eliminate(&mut instrs, &liveness)); + assert_eq!(instrs.len(), 1); + } + + #[test] + fn dce_preserves_var_assignment() { + // x = 10 (Var — semântica observável, nunca eliminar) + let mut instrs = vec![TacInstr::Copy { + dst: var("x"), + src: int(10), + }]; + let liveness = compute_liveness(&instrs); + assert!(!dead_code_eliminate(&mut instrs, &liveness)); + assert_eq!(instrs.len(), 1); + } + + #[test] + fn dce_keeps_used_temp() { + // t0 = 5; return t0 → t0 está vivo, não deve ser removido + let mut instrs = vec![ + TacInstr::Copy { dst: temp(0), src: int(5) }, + TacInstr::Return { val: Some(temp(0)) }, + ]; + let liveness = compute_liveness(&instrs); + assert!(!dead_code_eliminate(&mut instrs, &liveness)); + assert_eq!(instrs.len(), 2); + } + + // ── Pipeline / Ponto Fixo ── + + #[test] + fn optimize_function_full_pipeline() { + // int x = 2 + 3 * 4; (x nunca lido, sem return) + // t0 = 3 * 4 + // t1 = 2 + t0 + // x = t1 + let mut instrs = vec![ + TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Mul, + lhs: int(3), + rhs: int(4), + }, + TacInstr::BinOp { + dst: TempId(1), + op: BinOp::Add, + lhs: int(2), + rhs: temp(0), + }, + TacInstr::Copy { + dst: var("x"), + src: temp(1), + }, + ]; + + optimize_function(&mut instrs); + + // Após ponto fixo: t0 e t1 eliminados; x = 14 + assert_eq!(instrs.len(), 1); + assert_eq!( + instrs[0], + TacInstr::Copy { + dst: var("x"), + src: int(14), + } + ); + } + + #[test] + fn optimize_function_preserves_call_chain() { + // t0 = call f(); t1 = t0 + 0; return t1 + // t1 = t0 + 0 pode ser simplificado mas t0 tem side-effect → não eliminar call + let mut instrs = vec![ + TacInstr::Call { + dst: Some(TempId(0)), + fn_name: "f".to_string(), + args: vec![], + }, + TacInstr::BinOp { + dst: TempId(1), + op: BinOp::Add, + lhs: temp(0), + rhs: int(0), + }, + TacInstr::Return { val: Some(temp(1)) }, + ]; + + optimize_function(&mut instrs); + + // call f() deve ser preservado + assert!(instrs + .iter() + .any(|i| matches!(i, TacInstr::Call { fn_name, .. } if fn_name == "f"))); + } + + #[test] + fn optimize_function_side_effect_label_preserved() { + let mut instrs = vec![ + TacInstr::Label(LabelId(0)), + TacInstr::Return { val: None }, + ]; + optimize_function(&mut instrs); + assert_eq!(instrs.len(), 2); + } +} diff --git a/src/ir/lower.rs b/src/ir/lower.rs index df18fbf..8320dc7 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -371,6 +371,20 @@ pub fn lower_program(prog: &Program) -> TacProgram { } } +/// Gera o TAC e aplica todas as otimizações básicas (constant folding, +/// constant propagation e dead code elimination) até ponto fixo. +/// +/// Este é o ponto de entrada recomendado para a pipeline de compilação. +pub fn lower_and_optimize(prog: &Program) -> TacProgram { + use crate::codegen::inter::optimizations::optimize_function; + + let mut tac = lower_program(prog); + for func in &mut tac.functions { + optimize_function(&mut func.instrs); + } + tac +} + fn lower_literal(value: &Literal) -> ConstValue { match value { Literal::Int(value) => ConstValue::Int(*value), From 1752ec1f93ec2bbc3ea6b9724e99a74d9ce96e8c Mon Sep 17 00:00:00 2001 From: guxvr Date: Sat, 20 Jun 2026 13:53:21 -0300 Subject: [PATCH 44/91] limpando cargo clippy --- src/codegen/inter/optimizations.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/codegen/inter/optimizations.rs b/src/codegen/inter/optimizations.rs index ff41742..f1093d8 100644 --- a/src/codegen/inter/optimizations.rs +++ b/src/codegen/inter/optimizations.rs @@ -102,7 +102,7 @@ fn instr_uses(instr: &TacInstr) -> Vec { /// /// **Não** dobra `ConstValue::Double` (evitar divergência de precisão host vs target). /// **Não** dobra shift com rhs inválido, nem divisão por zero (UB em C). -pub fn constant_fold(instrs: &mut Vec) -> bool { +pub fn constant_fold(instrs: &mut [TacInstr]) -> bool { let mut changed = false; for instr in instrs.iter_mut() { @@ -177,13 +177,13 @@ fn fold_binop(op: &BinOp, lhs: &Operand, rhs: &Operand) -> Option { // Shift — UB se rhs < 0 ou rhs >= 64 BinOp::Shl => { - if r < 0 || r >= 64 { + if !(0..64).contains(&r) { return None; } l.checked_shl(r as u32)? } BinOp::Shr => { - if r < 0 || r >= 64 { + if !(0..64).contains(&r) { return None; } l.checked_shr(r as u32)? @@ -220,7 +220,7 @@ fn fold_unop(op: &UnOp, src: &Operand) -> Option { /// invalida a entrada do mapa. /// /// Retorna `true` se alguma substituição foi feita. -pub fn constant_propagation(instrs: &mut Vec) -> bool { +pub fn constant_propagation(instrs: &mut [TacInstr]) -> bool { let mut changed = false; let mut const_map: HashMap = HashMap::new(); From 607efe327d615e34837e84566dbc41688e194116 Mon Sep 17 00:00:00 2001 From: guxvr Date: Sat, 20 Jun 2026 13:55:52 -0300 Subject: [PATCH 45/91] =?UTF-8?q?corre=C3=A7=C3=B5es=20na=20formata=C3=A7?= =?UTF-8?q?=C3=A3o?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/codegen/inter/optimizations.rs | 109 ++++++++++++++++++++++++----- 1 file changed, 90 insertions(+), 19 deletions(-) diff --git a/src/codegen/inter/optimizations.rs b/src/codegen/inter/optimizations.rs index f1093d8..59718b6 100644 --- a/src/codegen/inter/optimizations.rs +++ b/src/codegen/inter/optimizations.rs @@ -433,13 +433,25 @@ mod tests { ]; // Após fold: t0 = 12, t1 = 2 + t0 (t0 ainda é temp, precisa de propagation) constant_fold(&mut instrs); - assert_eq!(instrs[0], TacInstr::Copy { dst: temp(0), src: int(12) }); + assert_eq!( + instrs[0], + TacInstr::Copy { + dst: temp(0), + src: int(12) + } + ); // Após propagation: t1 = 2 + 12 constant_propagation(&mut instrs); // Após segundo fold: t1 = 14 constant_fold(&mut instrs); - assert_eq!(instrs[1], TacInstr::Copy { dst: temp(1), src: int(14) }); + assert_eq!( + instrs[1], + TacInstr::Copy { + dst: temp(1), + src: int(14) + } + ); } #[test] @@ -451,7 +463,13 @@ mod tests { rhs: int(5), }]; assert!(constant_fold(&mut instrs)); - assert_eq!(instrs[0], TacInstr::Copy { dst: temp(0), src: int(1) }); + assert_eq!( + instrs[0], + TacInstr::Copy { + dst: temp(0), + src: int(1) + } + ); } #[test] @@ -463,7 +481,13 @@ mod tests { rhs: int(5), }]; assert!(constant_fold(&mut instrs)); - assert_eq!(instrs[0], TacInstr::Copy { dst: temp(0), src: int(0) }); + assert_eq!( + instrs[0], + TacInstr::Copy { + dst: temp(0), + src: int(0) + } + ); } #[test] @@ -476,7 +500,13 @@ mod tests { rhs: int(0b1100), }]; assert!(constant_fold(&mut instrs)); - assert_eq!(instrs[0], TacInstr::Copy { dst: temp(0), src: int(8) }); + assert_eq!( + instrs[0], + TacInstr::Copy { + dst: temp(0), + src: int(8) + } + ); } #[test] @@ -487,19 +517,45 @@ mod tests { src: int(7), }]; assert!(constant_fold(&mut instrs)); - assert_eq!(instrs[0], TacInstr::Copy { dst: temp(0), src: int(-7) }); + assert_eq!( + instrs[0], + TacInstr::Copy { + dst: temp(0), + src: int(-7) + } + ); } #[test] fn fold_unary_not() { // !0 = 1, !5 = 0 let mut instrs = vec![ - TacInstr::UnOp { dst: TempId(0), op: UnOp::Not, src: int(0) }, - TacInstr::UnOp { dst: TempId(1), op: UnOp::Not, src: int(5) }, + TacInstr::UnOp { + dst: TempId(0), + op: UnOp::Not, + src: int(0), + }, + TacInstr::UnOp { + dst: TempId(1), + op: UnOp::Not, + src: int(5), + }, ]; constant_fold(&mut instrs); - assert_eq!(instrs[0], TacInstr::Copy { dst: temp(0), src: int(1) }); - assert_eq!(instrs[1], TacInstr::Copy { dst: temp(1), src: int(0) }); + assert_eq!( + instrs[0], + TacInstr::Copy { + dst: temp(0), + src: int(1) + } + ); + assert_eq!( + instrs[1], + TacInstr::Copy { + dst: temp(1), + src: int(0) + } + ); } #[test] @@ -560,7 +616,10 @@ mod tests { fn propagation_simple_chain() { // t0 = 5; t1 = t0 + 3 → t1 = 5 + 3 → (fold) t1 = 8 let mut instrs = vec![ - TacInstr::Copy { dst: temp(0), src: int(5) }, + TacInstr::Copy { + dst: temp(0), + src: int(5), + }, TacInstr::BinOp { dst: TempId(1), op: BinOp::Add, @@ -580,14 +639,23 @@ mod tests { ); // Após fold: t1 = 8 constant_fold(&mut instrs); - assert_eq!(instrs[1], TacInstr::Copy { dst: temp(1), src: int(8) }); + assert_eq!( + instrs[1], + TacInstr::Copy { + dst: temp(1), + src: int(8) + } + ); } #[test] fn propagation_invalidated_by_redefinition() { // t0 = 5; t0 = call f(); t1 = t0 + 1 → t1 NÃO deve ser dobrado let mut instrs = vec![ - TacInstr::Copy { dst: temp(0), src: int(5) }, + TacInstr::Copy { + dst: temp(0), + src: int(5), + }, TacInstr::Call { dst: Some(TempId(0)), fn_name: "f".to_string(), @@ -604,7 +672,10 @@ mod tests { constant_propagation(&mut instrs); assert!(matches!( &instrs[2], - TacInstr::BinOp { lhs: Operand::Temp(_), .. } + TacInstr::BinOp { + lhs: Operand::Temp(_), + .. + } )); } @@ -653,7 +724,10 @@ mod tests { fn dce_keeps_used_temp() { // t0 = 5; return t0 → t0 está vivo, não deve ser removido let mut instrs = vec![ - TacInstr::Copy { dst: temp(0), src: int(5) }, + TacInstr::Copy { + dst: temp(0), + src: int(5), + }, TacInstr::Return { val: Some(temp(0)) }, ]; let liveness = compute_liveness(&instrs); @@ -730,10 +804,7 @@ mod tests { #[test] fn optimize_function_side_effect_label_preserved() { - let mut instrs = vec![ - TacInstr::Label(LabelId(0)), - TacInstr::Return { val: None }, - ]; + let mut instrs = vec![TacInstr::Label(LabelId(0)), TacInstr::Return { val: None }]; optimize_function(&mut instrs); assert_eq!(instrs.len(), 2); } From c3c9f1124541cb4ed1f4fea35150cf829237f76d Mon Sep 17 00:00:00 2001 From: Bappoz Date: Sun, 21 Jun 2026 12:20:20 -0300 Subject: [PATCH 46/91] fix(ir): liveness/DCE agora respeita fluxo de controle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A análise de liveness usava varredura linear backward sobre toda a função, ignorando Jump/CondJump/Label. Isso fazia o DCE eliminar definições feitas em um ramo de if/else quando o uso só ocorria após o merge (ex: t0=5 em um then seguido de goto, com t0 lido depois do else) — um bug de corretude que apaga código observável. Substitui por liveness por blocos básicos com fixpoint entre blocos (live_in/live_out via predecessores/sucessores reais), refinando depois por instrução dentro de cada bloco. Adiciona teste de regressão. --- src/codegen/inter/optimizations.rs | 224 ++++++++++++++++++++++++++--- 1 file changed, 204 insertions(+), 20 deletions(-) diff --git a/src/codegen/inter/optimizations.rs b/src/codegen/inter/optimizations.rs index 59718b6..ee762ce 100644 --- a/src/codegen/inter/optimizations.rs +++ b/src/codegen/inter/optimizations.rs @@ -2,7 +2,7 @@ use std::collections::{HashMap, HashSet}; use crate::{ common::ast::expr::{BinOp, UnOp}, - ir::tac::{ConstValue, Operand, TacInstr, TempId}, + ir::tac::{ConstValue, LabelId, Operand, TacInstr, TempId}, }; // ─── Liveness ──────────────────────────────────────────────────────────────── @@ -15,38 +15,186 @@ pub struct LivenessInfo { pub live_before: Vec>, } -/// Calcula `LivenessInfo` para uma sequência plana de instruções TAC. +/// Calcula `LivenessInfo` para uma sequência de instruções TAC, respeitando +/// o fluxo de controle (`Jump`/`CondJump`/`Label`). /// -/// Algoritmo: varredura *backward* (de trás para frente). -/// ```text -/// live = {} -/// para i de (n-1) até 0: -/// live_before[i] = live.clone() -/// remover de live: TempId *definido* por instrs[i] -/// adicionar a live: TempId *usados* como operandos de instrs[i] -/// ``` +/// A análise é feita em dois níveis: +/// 1. Particiona `instrs` em blocos básicos e monta os sucessores de cada +/// bloco a partir da última instrução (`Jump`, `CondJump`, `Return` ou +/// fall-through). +/// 2. Resolve as equações de liveness por ponto fixo entre blocos: +/// `live_out(B) = ⋃ live_in(S)` para `S` sucessor de `B`, e +/// `live_in(B) = use(B) ∪ (live_out(B) - def(B))`. +/// +/// Em seguida, refina `live_before` por instrução dentro de cada bloco via +/// varredura backward, partindo de `live_out(B)`. +/// +/// Sem essa atenção ao fluxo de controle, uma definição feita em um ramo de +/// `if/else` e usada somente após o merge poderia parecer morta na varredura +/// puramente linear — levando o DCE a remover código observável. pub fn compute_liveness(instrs: &[TacInstr]) -> LivenessInfo { let n = instrs.len(); - let mut live_before = vec![HashSet::new(); n]; - let mut live: HashSet = HashSet::new(); + if n == 0 { + return LivenessInfo { + live_before: Vec::new(), + }; + } - for i in (0..n).rev() { - live_before[i] = live.clone(); + let blocks = split_into_blocks(instrs); + let label_to_block = label_block_map(instrs, &blocks); + let successors = compute_successors(instrs, &blocks, &label_to_block); - // Remover o temporário definido pela instrução. - if let Some(def) = instr_def(&instrs[i]) { - live.remove(&def); + let (use_b, def_b) = blocks + .iter() + .map(|range| local_use_def(&instrs[range.clone()])) + .collect::<(Vec<_>, Vec<_>)>(); + + let mut live_in = vec![HashSet::new(); blocks.len()]; + let mut live_out = vec![HashSet::new(); blocks.len()]; + + loop { + let mut changed = false; + + for b in (0..blocks.len()).rev() { + let mut out = HashSet::new(); + for &succ in &successors[b] { + out.extend(live_in[succ].iter().copied()); + } + + let mut inb = use_b[b].clone(); + for t in &out { + if !def_b[b].contains(t) { + inb.insert(*t); + } + } + + if out != live_out[b] || inb != live_in[b] { + changed = true; + } + live_out[b] = out; + live_in[b] = inb; } - // Adicionar todos os temporários usados como operandos. - for used in instr_uses(&instrs[i]) { - live.insert(used); + if !changed { + break; + } + } + + let mut live_before = vec![HashSet::new(); n]; + for (b, range) in blocks.iter().enumerate() { + let mut live = live_out[b].clone(); + + for i in range.clone().rev() { + live_before[i] = live.clone(); + + if let Some(def) = instr_def(&instrs[i]) { + live.remove(&def); + } + for used in instr_uses(&instrs[i]) { + live.insert(used); + } } } LivenessInfo { live_before } } +/// Particiona as instruções em blocos básicos. Um bloco começa em `instrs[0]`, +/// em todo `Label`, e logo após `Jump`/`CondJump`/`Return`. +fn split_into_blocks(instrs: &[TacInstr]) -> Vec> { + let n = instrs.len(); + let mut starts: Vec = vec![0]; + + for (i, instr) in instrs.iter().enumerate() { + match instr { + TacInstr::Label(_) => starts.push(i), + TacInstr::Jump { .. } | TacInstr::CondJump { .. } | TacInstr::Return { .. } => { + if i + 1 < n { + starts.push(i + 1); + } + } + _ => {} + } + } + + starts.sort_unstable(); + starts.dedup(); + + let mut blocks = Vec::with_capacity(starts.len()); + for (idx, &start) in starts.iter().enumerate() { + let end = starts.get(idx + 1).copied().unwrap_or(n); + if start < end { + blocks.push(start..end); + } + } + blocks +} + +/// Mapeia cada `LabelId` para o índice do bloco que ele inicia. +fn label_block_map( + instrs: &[TacInstr], + blocks: &[std::ops::Range], +) -> HashMap { + let mut map = HashMap::new(); + for (b, range) in blocks.iter().enumerate() { + if let TacInstr::Label(l) = &instrs[range.start] { + map.insert(*l, b); + } + } + map +} + +/// Calcula os blocos sucessores de cada bloco, a partir de sua última instrução. +fn compute_successors( + instrs: &[TacInstr], + blocks: &[std::ops::Range], + label_to_block: &HashMap, +) -> Vec> { + blocks + .iter() + .enumerate() + .map(|(b, range)| match &instrs[range.end - 1] { + TacInstr::Jump { label } => label_to_block.get(label).copied().into_iter().collect(), + TacInstr::CondJump { + then_label, + else_label, + .. + } => [then_label, else_label] + .into_iter() + .filter_map(|l| label_to_block.get(l).copied()) + .collect(), + TacInstr::Return { .. } => Vec::new(), + _ => { + if b + 1 < blocks.len() { + vec![b + 1] + } else { + Vec::new() + } + } + }) + .collect() +} + +/// Calcula `use(B)` (usos não precedidos de definição dentro de `B`, i.e. +/// "upward-exposed") e `def(B)` (todo `TempId` definido em `B`) via varredura +/// backward local ao bloco. +fn local_use_def(block: &[TacInstr]) -> (HashSet, HashSet) { + let mut use_b = HashSet::new(); + let mut def_b = HashSet::new(); + + for instr in block.iter().rev() { + if let Some(def) = instr_def(instr) { + def_b.insert(def); + use_b.remove(&def); + } + for used in instr_uses(instr) { + use_b.insert(used); + } + } + + (use_b, def_b) +} + /// Retorna o `TempId` definido pela instrução, se houver. /// /// `Operand::Var` **nunca** é retornado aqui — variáveis nomeadas do programa C @@ -735,6 +883,42 @@ mod tests { assert_eq!(instrs.len(), 2); } + // ── Liveness com fluxo de controle ── + + #[test] + fn dce_keeps_def_used_only_after_if_else_merge() { + // if (cond) t0 = 5; else t0 = 10; + // return t0; + // + // Sem considerar o fluxo de controle, uma varredura puramente linear + // marcaria `t0 = 5` (no ramo then) como morto, pois nada entre ele e + // o fim do bloco then o lê — mas `t0` é lido após o merge. + let mut instrs = vec![ + TacInstr::CondJump { + cond: temp(9), + then_label: LabelId(1), + else_label: LabelId(2), + }, + TacInstr::Label(LabelId(1)), + TacInstr::Copy { + dst: temp(0), + src: int(5), + }, + TacInstr::Jump { label: LabelId(3) }, + TacInstr::Label(LabelId(2)), + TacInstr::Copy { + dst: temp(0), + src: int(10), + }, + TacInstr::Label(LabelId(3)), + TacInstr::Return { val: Some(temp(0)) }, + ]; + + let liveness = compute_liveness(&instrs); + assert!(!dead_code_eliminate(&mut instrs, &liveness)); + assert_eq!(instrs.len(), 8); + } + // ── Pipeline / Ponto Fixo ── #[test] From 4ee31142b8ebfdf00fe74844bd5c145e2e45f646 Mon Sep 17 00:00:00 2001 From: Bappoz Date: Sun, 21 Jun 2026 12:32:36 -0300 Subject: [PATCH 47/91] fix(ci): corrige lint clippy collapsible_match em split_into_blocks --- src/codegen/inter/optimizations.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/codegen/inter/optimizations.rs b/src/codegen/inter/optimizations.rs index ee762ce..b594733 100644 --- a/src/codegen/inter/optimizations.rs +++ b/src/codegen/inter/optimizations.rs @@ -108,10 +108,10 @@ fn split_into_blocks(instrs: &[TacInstr]) -> Vec> { for (i, instr) in instrs.iter().enumerate() { match instr { TacInstr::Label(_) => starts.push(i), - TacInstr::Jump { .. } | TacInstr::CondJump { .. } | TacInstr::Return { .. } => { - if i + 1 < n { - starts.push(i + 1); - } + TacInstr::Jump { .. } | TacInstr::CondJump { .. } | TacInstr::Return { .. } + if i + 1 < n => + { + starts.push(i + 1); } _ => {} } From 68266baf5bd9dafc2f8b73fa3ab2b0be9968542e Mon Sep 17 00:00:00 2001 From: Bappoz Date: Mon, 22 Jun 2026 10:39:39 -0300 Subject: [PATCH 48/91] feat(opt): implementa copy propagation e constant propagation no TAC Implementa CopyPropagationPass (#136): mantem um mapa temp->valor por bloco basico, propagando copias simples (t2 = t1) e constantes (t = literal) para os usos subsequentes ate que o destino seja redefinido. Invalida entradas dependentes quando o destino e reescrito, eliminando cadeias obsoletas. --- src/codegen/inter/opt/copy_prop.rs | 216 ++++++++++++++++++++++++++++- 1 file changed, 213 insertions(+), 3 deletions(-) diff --git a/src/codegen/inter/opt/copy_prop.rs b/src/codegen/inter/opt/copy_prop.rs index a349066..bb6b827 100644 --- a/src/codegen/inter/opt/copy_prop.rs +++ b/src/codegen/inter/opt/copy_prop.rs @@ -1,6 +1,14 @@ +use std::collections::HashMap; + use super::OptPass; -use crate::codegen::inter::Cfg; +use crate::codegen::inter::{Cfg, Instruction, Value}; +/// Copy propagation e constant propagation, combinadas, sobre o TAC. +/// +/// Para cada bloco basico, mantem um mapa `temp -> valor` com a ultima +/// atribuicao simples (`dst = src` ou `dst = literal`) conhecida para cada +/// destino. Usos subsequentes desse destino sao substituidos diretamente +/// pelo valor mapeado, ate que o destino seja redefinido. pub struct CopyPropagationPass; impl OptPass for CopyPropagationPass { @@ -8,7 +16,209 @@ impl OptPass for CopyPropagationPass { "copy-propagation" } - fn run(&self, _cfg: &mut Cfg) -> bool { - false + fn run(&self, cfg: &mut Cfg) -> bool { + let mut changed = false; + + for block in &mut cfg.blocks { + changed |= propagate_in_block(&mut block.instructions); + } + + changed + } +} + +type CopyMap = HashMap; + +fn propagate_in_block(instructions: &mut [Instruction]) -> bool { + let mut changed = false; + let mut copies: CopyMap = HashMap::new(); + + for instruction in instructions.iter_mut() { + match instruction { + Instruction::Assign { dst, value } => { + let resolved = resolve(&copies, value); + if resolved != *value { + *value = resolved.clone(); + changed = true; + } + + invalidate(&mut copies, dst); + if !matches!(&resolved, Value::Temp(name) if name == dst) { + copies.insert(dst.clone(), resolved); + } + } + Instruction::Binary { dst, lhs, rhs, .. } => { + let resolved_lhs = resolve(&copies, lhs); + if resolved_lhs != *lhs { + *lhs = resolved_lhs; + changed = true; + } + + let resolved_rhs = resolve(&copies, rhs); + if resolved_rhs != *rhs { + *rhs = resolved_rhs; + changed = true; + } + + invalidate(&mut copies, dst); + } + Instruction::Nop => {} + } + } + + changed +} + +fn resolve(copies: &CopyMap, value: &Value) -> Value { + match value { + Value::Temp(name) => copies.get(name).cloned().unwrap_or_else(|| value.clone()), + other => other.clone(), + } +} + +/// Remove do mapa qualquer entrada que dependa de `name`, seja como chave +/// (o proprio destino esta sendo redefinido) ou como valor (algum outro +/// destino havia sido marcado como copia de `name`). +fn invalidate(copies: &mut CopyMap, name: &str) { + copies.remove(name); + copies.retain(|_, v| !matches!(v, Value::Temp(n) if n == name)); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::codegen::inter::{BasicBlock, BinaryOp}; + + fn assign(dst: &str, value: Value) -> Instruction { + Instruction::Assign { + dst: dst.to_string(), + value, + } + } + + fn binary(dst: &str, op: BinaryOp, lhs: Value, rhs: Value) -> Instruction { + Instruction::Binary { + dst: dst.to_string(), + op, + lhs, + rhs, + } + } + + #[test] + fn propagates_simple_copy() { + let mut block = BasicBlock::new("entry"); + block.instructions.push(binary( + "a", + BinaryOp::Add, + Value::Temp("p".into()), + Value::Temp("q".into()), + )); + block + .instructions + .push(assign("b", Value::Temp("a".into()))); + block.instructions.push(binary( + "c", + BinaryOp::Add, + Value::Temp("b".into()), + Value::Int(1), + )); + + let changed = propagate_in_block(&mut block.instructions); + + assert!(changed); + assert_eq!( + block.instructions[2], + binary("c", BinaryOp::Add, Value::Temp("a".into()), Value::Int(1)) + ); + } + + #[test] + fn propagates_constant() { + let mut block = BasicBlock::new("entry"); + block.instructions.push(assign("x", Value::Int(10))); + block.instructions.push(binary( + "y", + BinaryOp::Mul, + Value::Temp("x".into()), + Value::Int(2), + )); + + let changed = propagate_in_block(&mut block.instructions); + + assert!(changed); + assert_eq!( + block.instructions[1], + binary("y", BinaryOp::Mul, Value::Int(10), Value::Int(2)) + ); + } + + #[test] + fn stops_propagating_after_redefinition() { + let mut block = BasicBlock::new("entry"); + block.instructions.push(assign("x", Value::Int(10))); + block + .instructions + .push(assign("x", Value::Temp("p".into()))); + block.instructions.push(binary( + "y", + BinaryOp::Mul, + Value::Temp("x".into()), + Value::Int(2), + )); + + propagate_in_block(&mut block.instructions); + + assert_eq!( + block.instructions[2], + binary("y", BinaryOp::Mul, Value::Temp("p".into()), Value::Int(2)) + ); + } + + #[test] + fn invalidates_dependents_when_source_is_redefined() { + let mut block = BasicBlock::new("entry"); + block + .instructions + .push(assign("b", Value::Temp("a".into()))); + block.instructions.push(assign("a", Value::Int(7))); + block.instructions.push(binary( + "c", + BinaryOp::Add, + Value::Temp("b".into()), + Value::Int(1), + )); + + propagate_in_block(&mut block.instructions); + + assert_eq!( + block.instructions[2], + binary("c", BinaryOp::Add, Value::Temp("b".into()), Value::Int(1)) + ); + } + + #[test] + fn pass_runs_across_blocks_and_reports_changes() { + let mut cfg = Cfg::new(); + + let mut block = BasicBlock::new("entry"); + block.instructions.push(assign("a", Value::Int(3))); + block.instructions.push(binary( + "b", + BinaryOp::Add, + Value::Temp("a".into()), + Value::Int(1), + )); + cfg.add_block(block); + + let pass = CopyPropagationPass; + let changed = pass.run(&mut cfg); + + assert!(changed); + assert_eq!( + cfg.blocks[0].instructions[1], + binary("b", BinaryOp::Add, Value::Int(3), Value::Int(1)) + ); + assert!(!pass.run(&mut cfg)); } } From 9d96cef84173e7b8c711f2ba86526f6592e0b59f Mon Sep 17 00:00:00 2001 From: Bappoz Date: Tue, 23 Jun 2026 00:02:55 -0300 Subject: [PATCH 49/91] fix: remove lowering and backend panics --- src/codegen/last/x86_64.rs | 175 +++++++++++++++---------- src/ir/lower.rs | 256 +++++++++++++++++++++++-------------- src/main.rs | 4 +- tests/codegen_smoke.rs | 10 +- tests/exe_smoke_test.rs | 4 +- 5 files changed, 276 insertions(+), 173 deletions(-) diff --git a/src/codegen/last/x86_64.rs b/src/codegen/last/x86_64.rs index 78fa67b..8dc9d27 100644 --- a/src/codegen/last/x86_64.rs +++ b/src/codegen/last/x86_64.rs @@ -17,8 +17,11 @@ use crate::codegen::last::abi; use crate::codegen::last::frame::{Frame, SlotKey}; use crate::common::ast::expr::{BinOp, UnOp}; +use crate::common::errors::types::CodegenError; use crate::ir::tac::{ConstValue, LabelId, Operand, TacFunction, TacInstr, TacProgram}; +type EmitResult = Result; + /// Acumulador de linhas de assembly com indentacao controlada. struct Emitter { out: String, @@ -63,23 +66,23 @@ impl Emitter { /// Emite o assembly de um programa TAC completo, prefixando a diretiva de /// secao `.text`. -pub fn emit_program(prog: &TacProgram) -> String { +pub fn emit_program(prog: &TacProgram) -> EmitResult { let mut em = Emitter::new(); em.raw(".text"); for func in &prog.functions { em.blank(); - em.append_str(&emit_function(func)); + em.append_str(&emit_function(func)?); } // Marca a stack como nao-executavel (boa pratica; evita aviso do linker e // e o que o proprio GCC adiciona a saida assembly). em.blank(); em.raw(".section .note.GNU-stack,\"\",@progbits"); - em.into_string() + Ok(em.into_string()) } /// Emite o assembly de uma unica funcao: directiva `.globl`, rotulo, /// prologue, corpo e epilogue. -pub fn emit_function(func: &TacFunction) -> String { +pub fn emit_function(func: &TacFunction) -> EmitResult { let mut em = Emitter::new(); em.comment(&format!("function {}", func.name)); em.raw(&format!(".globl {}", func.name)); @@ -108,7 +111,7 @@ pub fn emit_function(func: &TacFunction) -> String { // Corpo let epilogue_label = format!(".L_{}_epilogue", func.name); for instr in &func.instrs { - emit_instr(&mut em, instr, &frame, &func.name, &epilogue_label); + emit_instr(&mut em, instr, &frame, &func.name, &epilogue_label)?; } // Epilogue (alvo de todos os `return`). Caso a funcao nao tenha `return` @@ -118,7 +121,7 @@ pub fn emit_function(func: &TacFunction) -> String { em.insn("popq %rbp"); em.insn("ret"); - em.into_string() + Ok(em.into_string()) } /// Constroi o stack frame pre-escaneando todas as instrucoes para alocar um @@ -204,40 +207,41 @@ fn emit_instr( frame: &Frame, func_name: &str, epilogue_label: &str, -) { +) -> EmitResult<()> { match instr { TacInstr::Label(label) => { em.raw(&format!("{}:", local_label(func_name, label))); + Ok(()) } TacInstr::Jump { label } => { em.insn(&format!("jmp {}", local_label(func_name, label))); + Ok(()) } TacInstr::CondJump { cond, then_label, else_label, } => { - load_op(em, frame, cond, "rax"); + load_op(em, frame, cond, "rax")?; em.insn("testq %rax, %rax"); em.insn(&format!("jne {}", local_label(func_name, then_label))); em.insn(&format!("jmp {}", local_label(func_name, else_label))); + Ok(()) } TacInstr::Copy { dst, src } => { - load_op(em, frame, src, "rax"); - store_op(em, frame, dst, "rax"); - } - TacInstr::BinOp { dst, op, lhs, rhs } => { - emit_binop(em, op, lhs, rhs, *dst, frame); - } - TacInstr::UnOp { dst, op, src } => { - emit_unop(em, op, src, *dst, frame); + load_op(em, frame, src, "rax")?; + store_op(em, frame, dst, "rax")?; + Ok(()) } + TacInstr::BinOp { dst, op, lhs, rhs } => emit_binop(em, op, lhs, rhs, *dst, frame), + TacInstr::UnOp { dst, op, src } => emit_unop(em, op, src, *dst, frame), TacInstr::Call { dst, fn_name, args } => emit_call(em, fn_name, args, *dst, frame), TacInstr::Return { val } => { if let Some(val) = val { - load_op(em, frame, val, "rax"); + load_op(em, frame, val, "rax")?; } em.insn(&format!("jmp {epilogue_label}")); + Ok(()) } } } @@ -249,16 +253,16 @@ fn emit_binop( rhs: &Operand, dst: crate::ir::tac::TempId, frame: &Frame, -) { +) -> EmitResult<()> { // Operacoes logicas short-circuit-like precisam normalizar cada operando // para 0/1 individualmente. if matches!(op, BinOp::And | BinOp::Or) { - emit_logical(em, matches!(op, BinOp::Or), lhs, rhs, dst, frame); - return; + emit_logical(em, matches!(op, BinOp::Or), lhs, rhs, dst, frame)?; + return Ok(()); } - load_op(em, frame, lhs, "rax"); - load_op(em, frame, rhs, "rcx"); + load_op(em, frame, lhs, "rax")?; + load_op(em, frame, rhs, "rcx")?; match op { BinOp::Add => em.insn("addq %rcx, %rax"), @@ -284,10 +288,16 @@ fn emit_binop( BinOp::Geq => emit_comparison(em, "setge"), BinOp::Eq => emit_comparison(em, "sete"), BinOp::Neq => emit_comparison(em, "setne"), - BinOp::And | BinOp::Or => unreachable!("tratado em emit_logical"), + BinOp::And | BinOp::Or => { + return Err(codegen_error( + "operacao logica deveria ter sido tratada antes", + Some("binop"), + )) + } } - store_op(em, frame, &Operand::Temp(dst), "rax"); + store_op(em, frame, &Operand::Temp(dst), "rax")?; + Ok(()) } fn emit_comparison(em: &mut Emitter, setcc: &str) { @@ -303,16 +313,16 @@ fn emit_logical( rhs: &Operand, dst: crate::ir::tac::TempId, frame: &Frame, -) { +) -> EmitResult<()> { // Normaliza lhs para 0/1 em %rdx. - load_op(em, frame, lhs, "rax"); + load_op(em, frame, lhs, "rax")?; em.insn("testq %rax, %rax"); em.insn("setne %al"); em.insn("movzbq %al, %rax"); em.insn("movq %rax, %rdx"); // Normaliza rhs para 0/1 em %rax. - load_op(em, frame, rhs, "rax"); + load_op(em, frame, rhs, "rax")?; em.insn("testq %rax, %rax"); em.insn("setne %al"); em.insn("movzbq %al, %rax"); @@ -323,7 +333,8 @@ fn emit_logical( em.insn("andq %rdx, %rax"); } - store_op(em, frame, &Operand::Temp(dst), "rax"); + store_op(em, frame, &Operand::Temp(dst), "rax")?; + Ok(()) } fn emit_unop( @@ -332,8 +343,8 @@ fn emit_unop( src: &Operand, dst: crate::ir::tac::TempId, frame: &Frame, -) { - load_op(em, frame, src, "rax"); +) -> EmitResult<()> { + load_op(em, frame, src, "rax")?; match op { UnOp::Neg => em.insn("negq %rax"), UnOp::BitNot => em.insn("notq %rax"), @@ -342,10 +353,21 @@ fn emit_unop( em.insn("sete %al"); em.insn("movzbq %al, %rax"); } - UnOp::Deref => panic!("codegen de deref (*) nao suportado neste backend"), - UnOp::AddrOf => panic!("codegen de address-of (&) nao suportado neste backend"), + UnOp::Deref => { + return Err(codegen_error( + "codegen de deref (*) nao suportado neste backend", + Some("unop"), + )) + } + UnOp::AddrOf => { + return Err(codegen_error( + "codegen de address-of (&) nao suportado neste backend", + Some("unop"), + )) + } } - store_op(em, frame, &Operand::Temp(dst), "rax"); + store_op(em, frame, &Operand::Temp(dst), "rax")?; + Ok(()) } fn emit_call( @@ -354,7 +376,7 @@ fn emit_call( args: &[Operand], dst: Option, frame: &Frame, -) { +) -> EmitResult<()> { // Argumentos alem de `MAX_REG_ARGS` vao para a stack do chamador, na // ordem inversa (o primeiro arg de stack fica no topo, mais proximo do // endereco de retorno), espelhando `abi::stack_arg_offset`. @@ -367,13 +389,13 @@ fn emit_call( } let stack_args = &args[args.len().min(abi::MAX_REG_ARGS)..]; for arg in stack_args.iter().rev() { - load_op(em, frame, arg, "rax"); + load_op(em, frame, arg, "rax")?; em.insn("pushq %rax"); } for (index, arg) in args.iter().take(abi::MAX_REG_ARGS).enumerate() { let reg = abi::arg_register(index).expect("index < MAX_REG_ARGS sempre tem registrador"); - load_op(em, frame, arg, "rax"); + load_op(em, frame, arg, "rax")?; em.insn(&format!("movq %rax, %{reg}")); } @@ -385,31 +407,37 @@ fn emit_call( } if let Some(dst) = dst { - store_op(em, frame, &Operand::Temp(dst), "rax"); + store_op(em, frame, &Operand::Temp(dst), "rax")?; } + Ok(()) } /// Carrega `op` para o registrador nomeado (ex.: "rax", "rcx"). -fn load_op(em: &mut Emitter, frame: &Frame, op: &Operand, reg: &str) { +fn load_op(em: &mut Emitter, frame: &Frame, op: &Operand, reg: &str) -> EmitResult<()> { match op { - Operand::Const(value) => em.insn(&format!("movq ${}, %{reg}", const_immediate(value))), + Operand::Const(value) => { + em.insn(&format!("movq ${}, %{reg}", const_immediate(value)?)); + Ok(()) + } Operand::Temp(temp) => { let offset = frame .offset_of(&SlotKey::Temp(temp.0)) .expect("temp sem slot alocado"); em.insn(&format!("movq {offset}(%rbp), %{reg}")); + Ok(()) } Operand::Var(name) => { let offset = frame .offset_of(&SlotKey::Var(name.clone())) .expect("var sem slot alocado"); em.insn(&format!("movq {offset}(%rbp), %{reg}")); + Ok(()) } } } /// Armazena o registrador nomeado em `op` (que deve ser temp ou var). -fn store_op(em: &mut Emitter, frame: &Frame, op: &Operand, reg: &str) { +fn store_op(em: &mut Emitter, frame: &Frame, op: &Operand, reg: &str) -> EmitResult<()> { let offset = match op { Operand::Temp(temp) => frame .offset_of(&SlotKey::Temp(temp.0)) @@ -417,17 +445,36 @@ fn store_op(em: &mut Emitter, frame: &Frame, op: &Operand, reg: &str) { Operand::Var(name) => frame .offset_of(&SlotKey::Var(name.clone())) .expect("var sem slot alocado"), - Operand::Const(_) => panic!("nao e possivel armazenar em uma constante"), + Operand::Const(_) => { + return Err(codegen_error( + "nao e possivel armazenar em uma constante", + Some("store"), + )) + } }; em.insn(&format!("movq %{reg}, {offset}(%rbp)")); + Ok(()) } -fn const_immediate(value: &ConstValue) -> String { +fn const_immediate(value: &ConstValue) -> EmitResult { match value { - ConstValue::Int(v) => v.to_string(), - ConstValue::Char(c) => (*c as i64).to_string(), - ConstValue::Double(_) => panic!("codegen de double nao suportado neste backend"), - ConstValue::String(_) => panic!("codegen de string literal nao suportado neste backend"), + ConstValue::Int(v) => Ok(v.to_string()), + ConstValue::Char(c) => Ok((*c as i64).to_string()), + ConstValue::Double(_) => Err(codegen_error( + "codegen de double nao suportado neste backend", + Some("const"), + )), + ConstValue::String(_) => Err(codegen_error( + "codegen de string literal nao suportado neste backend", + Some("const"), + )), + } +} + +fn codegen_error(message: &str, instruction: Option<&str>) -> CodegenError { + CodegenError { + message: message.to_string(), + instruction: instruction.map(str::to_string), } } @@ -460,7 +507,7 @@ mod tests { #[test] fn emit_function_prologue_pushes_rbp_and_sets_frame() { - let out = emit_function(&asm_simple_return_const()); + let out = emit_function(&asm_simple_return_const()).unwrap(); assert!(out.contains("pushq %rbp")); assert!(out.contains("movq %rsp, %rbp")); @@ -469,7 +516,7 @@ mod tests { #[test] fn emit_function_declares_global_symbol() { - let out = emit_function(&asm_simple_return_const()); + let out = emit_function(&asm_simple_return_const()).unwrap(); assert!(out.contains(".globl main")); assert!(out.contains("main:\n")); @@ -477,7 +524,7 @@ mod tests { #[test] fn return_const_loads_immediate_into_rax() { - let out = emit_function(&asm_simple_return_const()); + let out = emit_function(&asm_simple_return_const()).unwrap(); assert!(out.contains("movq $42, %rax")); } @@ -500,7 +547,7 @@ mod tests { ], ); - let out = emit_function(&f); + let out = emit_function(&f).unwrap(); assert!(out.contains("movq %rdi, -8(%rbp)")); // spill arg a assert!(out.contains("movq %rsi, -16(%rbp)")); // spill arg b @@ -525,7 +572,7 @@ mod tests { ], ); - let out = emit_function(&f); + let out = emit_function(&f).unwrap(); assert!(out.contains("cqto")); assert!(out.contains("idivq %rcx")); @@ -544,7 +591,7 @@ mod tests { }], ); - let out = emit_function(&f); + let out = emit_function(&f).unwrap(); assert!(out.contains("movq %rdx, %rax")); } @@ -562,7 +609,7 @@ mod tests { }], ); - let out = emit_function(&f); + let out = emit_function(&f).unwrap(); assert!(out.contains("cmpq %rcx, %rax")); assert!(out.contains("setl %al")); @@ -589,7 +636,7 @@ mod tests { ], ); - let out = emit_function(&f); + let out = emit_function(&f).unwrap(); assert!(out.contains("movq %rax, %rdi")); assert!(out.contains("movq %rax, %rsi")); @@ -616,10 +663,8 @@ mod tests { ], ); - let out = emit_function(&f); + let out = emit_function(&f).unwrap(); - // 7th arg (index 6) e o unico passado pela stack; aligna a stack com - // 8 bytes de padding (1 arg de stack e impar) antes do push. assert!(out.contains("subq $8, %rsp")); assert!(out.contains("pushq %rax")); assert!(out.contains("call sum7")); @@ -641,9 +686,8 @@ mod tests { }], ); - let out = emit_function(&f); + let out = emit_function(&f).unwrap(); - // 2 args de stack (indices 6 e 7): par, sem padding necessario. assert!(!out.contains("subq $8, %rsp")); assert!(out.contains("addq $16, %rsp")); } @@ -661,7 +705,8 @@ mod tests { Operand::Const(ConstValue::Int(2)), ], }], - )); + )) + .unwrap(); assert!(!out.contains("pushq %rax")); assert!(!out.contains("addq $16, %rsp")); @@ -689,7 +734,7 @@ mod tests { ], ); - let out = emit_function(&f); + let out = emit_function(&f).unwrap(); assert!(out.contains("testq %rax, %rax")); assert!(out.contains("jne .L_cond_L0")); @@ -700,7 +745,7 @@ mod tests { #[test] fn epilogue_label_is_emitted_once() { - let out = emit_function(&asm_simple_return_const()); + let out = emit_function(&asm_simple_return_const()).unwrap(); assert_eq!(out.matches(".L_main_epilogue:").count(), 1); } @@ -711,7 +756,7 @@ mod tests { functions: vec![asm_simple_return_const()], }; - let out = emit_program(&prog); + let out = emit_program(&prog).unwrap(); assert!(out.starts_with(".text")); assert!(out.contains(".globl main")); @@ -730,7 +775,7 @@ mod tests { }], ); - let out = emit_function(&f); + let out = emit_function(&f).unwrap(); assert!(out.contains("setne %al")); assert!(out.contains("andq %rdx, %rax")); @@ -749,7 +794,7 @@ mod tests { }], ); - let out = emit_function(&f); + let out = emit_function(&f).unwrap(); assert!(out.contains("orq %rdx, %rax")); } diff --git a/src/ir/lower.rs b/src/ir/lower.rs index 8320dc7..926b107 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -4,10 +4,13 @@ use crate::common::ast::{ expr::{BinOp, Expr, Literal, PostfixOp, PrefixOp}, stmt::Stmt, }; +use crate::common::errors::types::CodegenError; use crate::ir::tac::{ ConstValue, LabelGen, LabelId, Operand, TacFunction, TacInstr, TacProgram, TempGen, TempId, }; +type LowerResult = Result; + #[derive(Debug, Clone)] pub struct Lowerer { temps: TempGen, @@ -30,13 +33,13 @@ impl Lowerer { } } - pub fn lower_expr(&mut self, expr: &Expr) -> Operand { + pub fn lower_expr(&mut self, expr: &Expr) -> LowerResult { match expr { - Expr::Literal(value, _) => Operand::Const(lower_literal(value)), - Expr::Ident(name, _) => Operand::Var(name.clone()), + Expr::Literal(value, _) => Ok(Operand::Const(lower_literal(value))), + Expr::Ident(name, _) => Ok(Operand::Var(name.clone())), Expr::Binary(lhs, op, rhs, _) => { - let lhs = self.lower_expr(lhs); - let rhs = self.lower_expr(rhs); + let lhs = self.lower_expr(lhs)?; + let rhs = self.lower_expr(rhs)?; let dst = self.fresh_temp(); self.instrs.push(TacInstr::BinOp { dst, @@ -44,44 +47,52 @@ impl Lowerer { lhs, rhs, }); - Operand::Temp(dst) + Ok(Operand::Temp(dst)) } Expr::Unary(op, src, _) => { - let src = self.lower_expr(src); + let src = self.lower_expr(src)?; let dst = self.fresh_temp(); self.instrs.push(TacInstr::UnOp { dst, op: op.clone(), src, }); - Operand::Temp(dst) + Ok(Operand::Temp(dst)) } Expr::Prefix(op, target, _) => self.lower_prefix(op, target), Expr::Postfix(op, target, _) => self.lower_postfix(op, target), Expr::Call(callee, args, _) => { let fn_name = match callee.as_ref() { Expr::Ident(name, _) => name.clone(), - _ => panic!("lowering ainda nao suporta chamada por expressao"), + _ => { + return Err(codegen_error( + "chamada por expressao nao suportada no lowering", + Some("call"), + )); + } }; - let args = args.iter().map(|arg| self.lower_expr(arg)).collect(); + let mut lowered_args = Vec::with_capacity(args.len()); + for arg in args { + lowered_args.push(self.lower_expr(arg)?); + } let dst = self.fresh_temp(); self.instrs.push(TacInstr::Call { dst: Some(dst), fn_name, - args, + args: lowered_args, }); - Operand::Temp(dst) + Ok(Operand::Temp(dst)) } Expr::Cast(_, inner, _) => self.lower_expr(inner), Expr::Assign(lhs, rhs, _) => { - let src = self.lower_expr(rhs); - let dst = self.lower_assignment_target(lhs); - self.emit_copy(dst.clone(), src); - dst + let src = self.lower_expr(rhs)?; + let dst = self.lower_assignment_target(lhs)?; + self.emit_copy(dst.clone(), src)?; + Ok(dst) } Expr::CompoundAssign(op, lhs, rhs, _) => { - let dst = self.lower_assignment_target(lhs); - let rhs = self.lower_expr(rhs); + let dst = self.lower_assignment_target(lhs)?; + let rhs = self.lower_expr(rhs)?; let temp = self.fresh_temp(); self.instrs.push(TacInstr::BinOp { dst: temp, @@ -89,12 +100,12 @@ impl Lowerer { lhs: dst.clone(), rhs, }); - self.emit_copy(dst.clone(), Operand::Temp(temp)); - dst + self.emit_copy(dst.clone(), Operand::Temp(temp))?; + Ok(dst) } - Expr::SizeofType(qty, _) => Operand::Const(ConstValue::Int(type_size(&qty.ty))), + Expr::SizeofType(qty, _) => Ok(Operand::Const(ConstValue::Int(type_size(&qty.ty)?))), Expr::Ternary(cond, then_expr, else_expr, _) => { - let cond = self.lower_expr(cond); + let cond = self.lower_expr(cond)?; let then_label = self.labels.fresh(); let else_label = self.labels.fresh(); let end_label = self.labels.fresh(); @@ -107,36 +118,46 @@ impl Lowerer { }); self.instrs.push(TacInstr::Label(then_label)); - let then_val = self.lower_expr(then_expr); - self.emit_copy(Operand::Temp(dst), then_val); + let then_val = self.lower_expr(then_expr)?; + self.emit_copy(Operand::Temp(dst), then_val)?; self.emit_jump_unless_terminated(end_label); self.instrs.push(TacInstr::Label(else_label)); - let else_val = self.lower_expr(else_expr); - self.emit_copy(Operand::Temp(dst), else_val); + let else_val = self.lower_expr(else_expr)?; + self.emit_copy(Operand::Temp(dst), else_val)?; self.instrs.push(TacInstr::Label(end_label)); - Operand::Temp(dst) + Ok(Operand::Temp(dst)) } - Expr::Index(_, _, _) => panic!("lowering ainda nao suporta acesso por indice"), - Expr::Member(_, _, _, _) => panic!("lowering ainda nao suporta acesso a membro"), - Expr::Sizeof(_, _) => panic!("lowering de sizeof(expr) requer informacao de tipo"), + Expr::Index(_, _, _) => Err(codegen_error( + "acesso por indice nao suportado no lowering", + Some("index"), + )), + Expr::Member(_, _, _, _) => Err(codegen_error( + "acesso a membro nao suportado no lowering", + Some("member"), + )), + Expr::Sizeof(_, _) => Err(codegen_error( + "sizeof(expr) nao suportado no lowering sem informacao de tipo", + Some("sizeof"), + )), } } - pub fn lower_stmt(&mut self, stmt: &Stmt) { - self.lower_stmt_with_control(stmt, ControlLabels::default()); + pub fn lower_stmt(&mut self, stmt: &Stmt) -> LowerResult<()> { + self.lower_stmt_with_control(stmt, ControlLabels::default()) } - fn lower_stmt_with_control(&mut self, stmt: &Stmt, control: ControlLabels) { + fn lower_stmt_with_control(&mut self, stmt: &Stmt, control: ControlLabels) -> LowerResult<()> { match stmt { Stmt::Block(stmts, _) => { for stmt in stmts { - self.lower_stmt_with_control(stmt, control); + self.lower_stmt_with_control(stmt, control)?; } + Ok(()) } Stmt::If(cond, then_branch, else_branch, _) => { - let cond = self.lower_expr(cond); + let cond = self.lower_expr(cond)?; let then_label = self.labels.fresh(); let else_label = self.labels.fresh(); let end_label = self.labels.fresh(); @@ -148,14 +169,15 @@ impl Lowerer { }); self.instrs.push(TacInstr::Label(then_label)); - self.lower_stmt_with_control(then_branch, control); + self.lower_stmt_with_control(then_branch, control)?; self.emit_jump_unless_terminated(end_label); self.instrs.push(TacInstr::Label(else_label)); if let Some(else_branch) = else_branch { - self.lower_stmt_with_control(else_branch, control); + self.lower_stmt_with_control(else_branch, control)?; } self.instrs.push(TacInstr::Label(end_label)); + Ok(()) } Stmt::While(cond, body, _) => { let cond_label = self.labels.fresh(); @@ -163,7 +185,7 @@ impl Lowerer { let end_label = self.labels.fresh(); self.instrs.push(TacInstr::Label(cond_label)); - let cond = self.lower_expr(cond); + let cond = self.lower_expr(cond)?; self.instrs.push(TacInstr::CondJump { cond, then_label: body_label, @@ -177,14 +199,15 @@ impl Lowerer { break_label: Some(end_label), continue_label: Some(cond_label), }, - ); + )?; self.emit_jump_unless_terminated(cond_label); self.instrs.push(TacInstr::Label(end_label)); + Ok(()) } Stmt::For(init, cond, inc, body, _) => { if let Some(init) = init { - self.lower_stmt_with_control(init, control); + self.lower_stmt_with_control(init, control)?; } let cond_label = self.labels.fresh(); @@ -195,7 +218,7 @@ impl Lowerer { self.instrs.push(TacInstr::Label(cond_label)); if let Some(cond) = cond { - let cond = self.lower_expr(cond); + let cond = self.lower_expr(cond)?; self.instrs.push(TacInstr::CondJump { cond, then_label: body_label, @@ -210,17 +233,18 @@ impl Lowerer { break_label: Some(end_label), continue_label: Some(continue_label), }, - ); + )?; if let Some(inc_label) = inc_label { self.instrs.push(TacInstr::Label(inc_label)); if let Some(inc) = inc { - self.lower_expr(inc); + self.lower_expr(inc)?; } } self.emit_jump_unless_terminated(cond_label); self.instrs.push(TacInstr::Label(end_label)); + Ok(()) } Stmt::DoWhile(cond, body, _) => { let body_label = self.labels.fresh(); @@ -234,10 +258,10 @@ impl Lowerer { break_label: Some(end_label), continue_label: Some(cond_label), }, - ); + )?; self.instrs.push(TacInstr::Label(cond_label)); - let cond = self.lower_expr(cond); + let cond = self.lower_expr(cond)?; self.instrs.push(TacInstr::CondJump { cond, then_label: body_label, @@ -245,33 +269,45 @@ impl Lowerer { }); self.instrs.push(TacInstr::Label(end_label)); + Ok(()) } Stmt::Break(_) => { - let label = control - .break_label - .expect("break fora de loop/switch nao pode ser baixado"); + let label = control.break_label.ok_or_else(|| { + codegen_error("break fora de loop/switch nao suportado", Some("break")) + })?; self.instrs.push(TacInstr::Jump { label }); + Ok(()) } Stmt::Continue(_) => { - let label = control - .continue_label - .expect("continue fora de loop nao pode ser baixado"); + let label = control.continue_label.ok_or_else(|| { + codegen_error("continue fora de loop nao suportado", Some("continue")) + })?; self.instrs.push(TacInstr::Jump { label }); + Ok(()) } Stmt::ExprStmt(expr, _) => { - self.lower_expr(expr); + self.lower_expr(expr)?; + Ok(()) } Stmt::Return(expr, _) => { - let val = expr.as_ref().map(|expr| self.lower_expr(expr)); + let val = expr + .as_ref() + .map(|expr| self.lower_expr(expr)) + .transpose()?; self.instrs.push(TacInstr::Return { val }); + Ok(()) } Stmt::VarDecl(_, name, init, _) => { if let Some(init) = init { - let src = self.lower_expr(init); - self.emit_copy(Operand::Var(name.clone()), src); + let src = self.lower_expr(init)?; + self.emit_copy(Operand::Var(name.clone()), src)?; } + Ok(()) } - Stmt::Switch(_, _, _) => panic!("lowering ainda nao suporta switch"), + Stmt::Switch(_, _, _) => Err(codegen_error( + "switch nao suportado no lowering", + Some("switch"), + )), } } @@ -283,8 +319,8 @@ impl Lowerer { self.temps.fresh() } - fn lower_prefix(&mut self, op: &PrefixOp, target: &Expr) -> Operand { - let dst = self.lower_assignment_target(target); + fn lower_prefix(&mut self, op: &PrefixOp, target: &Expr) -> LowerResult { + let dst = self.lower_assignment_target(target)?; let temp = self.fresh_temp(); self.instrs.push(TacInstr::BinOp { dst: temp, @@ -292,14 +328,14 @@ impl Lowerer { lhs: dst.clone(), rhs: Operand::Const(ConstValue::Int(1)), }); - self.emit_copy(dst.clone(), Operand::Temp(temp)); - dst + self.emit_copy(dst.clone(), Operand::Temp(temp))?; + Ok(dst) } - fn lower_postfix(&mut self, op: &PostfixOp, target: &Expr) -> Operand { - let dst = self.lower_assignment_target(target); + fn lower_postfix(&mut self, op: &PostfixOp, target: &Expr) -> LowerResult { + let dst = self.lower_assignment_target(target)?; let old = self.fresh_temp(); - self.emit_copy(Operand::Temp(old), dst.clone()); + self.emit_copy(Operand::Temp(old), dst.clone())?; let new = self.fresh_temp(); self.instrs.push(TacInstr::BinOp { @@ -308,21 +344,30 @@ impl Lowerer { lhs: dst.clone(), rhs: Operand::Const(ConstValue::Int(1)), }); - self.emit_copy(dst, Operand::Temp(new)); - Operand::Temp(old) + self.emit_copy(dst, Operand::Temp(new))?; + Ok(Operand::Temp(old)) } - fn lower_assignment_target(&mut self, expr: &Expr) -> Operand { + fn lower_assignment_target(&mut self, expr: &Expr) -> LowerResult { match expr { - Expr::Ident(name, _) => Operand::Var(name.clone()), - _ => panic!("lowering ainda nao suporta esse destino de atribuicao"), + Expr::Ident(name, _) => Ok(Operand::Var(name.clone())), + _ => Err(codegen_error( + "destino de atribuicao nao suportado no lowering", + Some("assign"), + )), } } - fn emit_copy(&mut self, dst: Operand, src: Operand) { + fn emit_copy(&mut self, dst: Operand, src: Operand) -> LowerResult<()> { match dst { - Operand::Temp(_) | Operand::Var(_) => self.instrs.push(TacInstr::Copy { dst, src }), - Operand::Const(_) => panic!("constante nao pode ser destino de copia"), + Operand::Temp(_) | Operand::Var(_) => { + self.instrs.push(TacInstr::Copy { dst, src }); + Ok(()) + } + Operand::Const(_) => Err(codegen_error( + "constante nao pode ser destino de copia", + Some("copy"), + )), } } @@ -342,47 +387,50 @@ impl Default for Lowerer { } } -pub fn lower_function(decl: &Decl) -> TacFunction { +pub fn lower_function(decl: &Decl) -> LowerResult { match decl { Decl::Function(_, name, params, body, _) => { let mut lowerer = Lowerer::new(); for stmt in body { - lowerer.lower_stmt(stmt); + lowerer.lower_stmt(stmt)?; } - TacFunction { + Ok(TacFunction { name: name.clone(), params: params.iter().map(|(_, name)| name.clone()).collect(), instrs: lowerer.finish(), - } + }) } - _ => panic!("lower_function espera Decl::Function"), + _ => Err(codegen_error( + "lower_function espera Decl::Function", + Some("lower_function"), + )), } } -pub fn lower_program(prog: &Program) -> TacProgram { - TacProgram { - functions: prog - .decls - .iter() - .filter(|decl| matches!(decl, Decl::Function(..))) - .map(lower_function) - .collect(), +pub fn lower_program(prog: &Program) -> LowerResult { + let mut functions = Vec::new(); + for decl in &prog.decls { + if matches!(decl, Decl::Function(..)) { + functions.push(lower_function(decl)?); + } } + + Ok(TacProgram { functions }) } /// Gera o TAC e aplica todas as otimizações básicas (constant folding, /// constant propagation e dead code elimination) até ponto fixo. /// /// Este é o ponto de entrada recomendado para a pipeline de compilação. -pub fn lower_and_optimize(prog: &Program) -> TacProgram { +pub fn lower_and_optimize(prog: &Program) -> LowerResult { use crate::codegen::inter::optimizations::optimize_function; - let mut tac = lower_program(prog); + let mut tac = lower_program(prog)?; for func in &mut tac.functions { optimize_function(&mut func.instrs); } - tac + Ok(tac) } fn lower_literal(value: &Literal) -> ConstValue { @@ -408,18 +456,28 @@ fn postfix_bin_op(op: &PostfixOp) -> BinOp { } } -fn type_size(ty: &Type) -> i64 { +fn type_size(ty: &Type) -> LowerResult { match ty { - Type::Char => 1, - Type::Short => 2, - Type::Int | Type::Float | Type::Enum(_) => 4, - Type::Long | Type::Double | Type::Pointer(_) => 8, + Type::Char => Ok(1), + Type::Short => Ok(2), + Type::Int | Type::Float | Type::Enum(_) => Ok(4), + Type::Long | Type::Double | Type::Pointer(_) => Ok(8), Type::Array(_) | Type::Void | Type::Struct(_) | Type::Alias(_) | Type::Function(_, _) => { - panic!("lowering de sizeof(type) requer layout/tamanho completo") + Err(codegen_error( + "lowering de sizeof(type) requer layout/tamanho completo", + Some("sizeof"), + )) } } } +fn codegen_error(message: &str, instruction: Option<&str>) -> CodegenError { + CodegenError { + message: message.to_string(), + instruction: instruction.map(str::to_string), + } +} + #[cfg(test)] mod tests { use super::*; @@ -458,7 +516,7 @@ mod tests { let expr = Expr::Binary(Box::new(int(2)), BinOp::Add, Box::new(int(3)), span()); let mut lowerer = Lowerer::new(); - let result = lowerer.lower_expr(&expr); + let result = lowerer.lower_expr(&expr).unwrap(); assert_eq!(result, Operand::Temp(TempId(0))); assert_eq!( @@ -492,7 +550,7 @@ mod tests { ); let mut lowerer = Lowerer::new(); - lowerer.lower_stmt(&stmt); + lowerer.lower_stmt(&stmt).unwrap(); let instrs = lowerer.finish(); assert!(matches!( @@ -537,7 +595,7 @@ mod tests { ); let mut lowerer = Lowerer::new(); - lowerer.lower_stmt(&stmt); + lowerer.lower_stmt(&stmt).unwrap(); let instrs = lowerer.finish(); assert_eq!(instrs[0], TacInstr::Label(LabelId(0))); @@ -560,7 +618,7 @@ mod tests { let expr = Expr::Call(Box::new(ident("sum")), vec![arg0, int(3)], span()); let mut lowerer = Lowerer::new(); - let result = lowerer.lower_expr(&expr); + let result = lowerer.lower_expr(&expr).unwrap(); let instrs = lowerer.finish(); assert_eq!(result, Operand::Temp(TempId(1))); @@ -580,7 +638,7 @@ mod tests { let expr = Expr::Binary(Box::new(int(2)), BinOp::Add, Box::new(rhs), span()); let mut lowerer = Lowerer::new(); - let result = lowerer.lower_expr(&expr); + let result = lowerer.lower_expr(&expr).unwrap(); assert_eq!(result, Operand::Temp(TempId(1))); assert_eq!( @@ -612,7 +670,7 @@ mod tests { span(), ); - let func = lower_function(&decl); + let func = lower_function(&decl).unwrap(); assert_eq!(func.name, "main"); assert_eq!(func.params, vec!["argc"]); diff --git a/src/main.rs b/src/main.rs index 4e56f2b..df1771e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -273,8 +273,8 @@ fn run(source: SourceFile, args: &CliArgs) -> Result<(), Box> { opt_pipeline.run(&mut cfg, 10); // ── Stage 5: Code generation (x86-64 / AT&T) ───────────────────────────── - let tac_program = lower_program(&program); - let asm = last::emit_program(&tac_program); + let tac_program = lower_program(&program).map_err(|e| Box::new(e) as Box)?; + let asm = last::emit_program(&tac_program).map_err(|e| Box::new(e) as Box)?; let output_path = output_path_for(&args.input_file, &args.output_file, args.emit); emit_artifact(&asm, &output_path, args.emit)?; diff --git a/tests/codegen_smoke.rs b/tests/codegen_smoke.rs index 3c7cba2..721beae 100644 --- a/tests/codegen_smoke.rs +++ b/tests/codegen_smoke.rs @@ -82,7 +82,7 @@ fn write_temp_source(name: &str, contents: &str) -> PathBuf { fn smoke_assembles_with_gcc() { require_gcc!(); - let asm = emit_program(&build_soma_program()); + let asm = emit_program(&build_soma_program()).unwrap(); let source = write_temp_source("assemble", &asm); let object = source.with_extension("o"); @@ -107,7 +107,7 @@ fn smoke_assembles_with_gcc() { fn smoke_links_and_runs_with_expected_exit_code() { require_gcc!(); - let asm = emit_program(&build_soma_program()); + let asm = emit_program(&build_soma_program()).unwrap(); let source = write_temp_source("run", &asm); let exe = source.with_extension("bin"); @@ -148,7 +148,7 @@ fn smoke_simple_return_const_runs() { }], }; - let asm = emit_program(&prog); + let asm = emit_program(&prog).unwrap(); let source = write_temp_source("const", &asm); let exe = source.with_extension("bin"); @@ -254,7 +254,7 @@ fn smoke_call_with_more_than_six_args_runs() { functions: vec![sum9, main], }; - let asm = emit_program(&prog); + let asm = emit_program(&prog).unwrap(); let source = write_temp_source("manyargs", &asm); let exe = source.with_extension("bin"); @@ -306,7 +306,7 @@ fn smoke_control_flow_if_else_runs() { }], }; - let asm = emit_program(&prog); + let asm = emit_program(&prog).unwrap(); let source = write_temp_source("ifelse", &asm); let exe = source.with_extension("bin"); diff --git a/tests/exe_smoke_test.rs b/tests/exe_smoke_test.rs index 32c6e92..be15b39 100644 --- a/tests/exe_smoke_test.rs +++ b/tests/exe_smoke_test.rs @@ -60,8 +60,8 @@ fn compile_to_asm(source: &str) -> String { "erros semanticos inesperados: {sem_errors:?}" ); - let tac_program = lower_program(&program); - emit_program(&tac_program) + let tac_program = lower_program(&program).unwrap(); + emit_program(&tac_program).unwrap() } /// Compila `source` (C) ate um executavel real via `gcc` e o executa, From 7c11a57a6dae3172eded960075fc42fa414f11ff Mon Sep 17 00:00:00 2001 From: Bappoz Date: Tue, 23 Jun 2026 00:22:28 -0300 Subject: [PATCH 50/91] =?UTF-8?q?feat:=20adicionei=20flags=20de=20compila?= =?UTF-8?q?=C3=A7=C3=A3o=20do=20arquivo=20onde=20voce=20consegue=20setar?= =?UTF-8?q?=20um=20nome=20para=20o=20arquivo.out=20,=20podendo=20executar?= =?UTF-8?q?=20ele=20com=20esse=20nome.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/examples/simple.c | 4 ++ src/main.rs | 90 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 86 insertions(+), 8 deletions(-) create mode 100644 src/examples/simple.c diff --git a/src/examples/simple.c b/src/examples/simple.c new file mode 100644 index 0000000..34bdc57 --- /dev/null +++ b/src/examples/simple.c @@ -0,0 +1,4 @@ + +int main() { + return 42; +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index df1771e..9aef4e6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -28,6 +28,8 @@ fn main() -> std::io::Result<()> { eprintln!(" --only-parse Stop after parsing"); eprintln!(" --only-semantic Stop after semantic analysis"); eprintln!(" -o Set the output file path"); + eprintln!(" --out-dir Set the output directory"); + eprintln!(" --out-name Set the output file name"); eprintln!(" --emit=asm Stop after emitting x86-64 assembly (.s)"); eprintln!(" --emit=obj Stop after assembling an object file (.o)"); eprintln!(" --emit=exe Link a runnable executable (default)"); @@ -77,6 +79,8 @@ impl EmitKind { struct CliArgs { input_file: Option, output_file: Option, + output_dir: Option, + output_name: Option, dump_tokens: bool, dump_ast: bool, dump_ir: bool, @@ -92,6 +96,8 @@ impl CliArgs { let mut cli = CliArgs { input_file: None, output_file: None, + output_dir: None, + output_name: None, dump_tokens: false, dump_ast: false, dump_ir: false, @@ -122,6 +128,22 @@ impl CliArgs { }; cli.output_file = Some(path.clone()); } + "--out-dir" => { + i += 1; + let Some(path) = args.get(i) else { + eprintln!("error: missing value for --out-dir"); + exit(64); + }; + cli.output_dir = Some(path.clone()); + } + "--out-name" => { + i += 1; + let Some(name) = args.get(i) else { + eprintln!("error: missing value for --out-name"); + exit(64); + }; + cli.output_name = Some(name.clone()); + } _ if arg.starts_with("--emit=") => { let value = arg.strip_prefix("--emit=").unwrap(); cli.emit = EmitKind::parse(value).unwrap_or_else(|| { @@ -276,7 +298,13 @@ fn run(source: SourceFile, args: &CliArgs) -> Result<(), Box> { let tac_program = lower_program(&program).map_err(|e| Box::new(e) as Box)?; let asm = last::emit_program(&tac_program).map_err(|e| Box::new(e) as Box)?; - let output_path = output_path_for(&args.input_file, &args.output_file, args.emit); + let output_path = output_path_for( + &args.input_file, + &args.output_file, + &args.output_dir, + &args.output_name, + args.emit, + ); emit_artifact(&asm, &output_path, args.emit)?; eprintln!( "emitted {}: {}", @@ -292,21 +320,35 @@ fn run(source: SourceFile, args: &CliArgs) -> Result<(), Box> { fn output_path_for( input: &Option, output_override: &Option, + output_dir: &Option, + output_name: &Option, emit: EmitKind, ) -> PathBuf { if let Some(path) = output_override { return PathBuf::from(path); } - match emit { - EmitKind::Exe => PathBuf::from("a.out"), + let default_name = match emit { + EmitKind::Exe => "a.out".to_string(), EmitKind::Asm | EmitKind::Obj => { let input = input.clone().unwrap_or_else(|| "crusty.out".to_string()); let mut path = PathBuf::from(input); path.set_extension(if emit == EmitKind::Asm { "s" } else { "o" }); - path + return apply_output_dir(path, output_dir); } + }; + + let path = PathBuf::from(output_name.clone().unwrap_or(default_name)); + apply_output_dir(path, output_dir) +} + +fn apply_output_dir(mut path: PathBuf, output_dir: &Option) -> PathBuf { + if let Some(dir) = output_dir { + let mut full = PathBuf::from(dir); + full.push(path); + path = full; } + path } fn emit_kind_label(emit: EmitKind) -> &'static str { @@ -455,6 +497,8 @@ mod tests { let parsed = CliArgs::parse(&args(&["crusty", "main.c"])); assert_eq!(parsed.emit, EmitKind::Exe); assert_eq!(parsed.output_file, None); + assert_eq!(parsed.output_dir, None); + assert_eq!(parsed.output_name, None); } #[test] @@ -464,6 +508,21 @@ mod tests { assert_eq!(parsed.input_file, Some("main.c".to_string())); } + #[test] + fn parses_output_dir_and_name_flags() { + let parsed = CliArgs::parse(&args(&[ + "crusty", + "--out-dir", + "build", + "--out-name", + "demo", + "main.c", + ])); + assert_eq!(parsed.output_dir, Some("build".to_string())); + assert_eq!(parsed.output_name, Some("demo".to_string())); + assert_eq!(parsed.input_file, Some("main.c".to_string())); + } + #[test] fn parses_emit_flag_variants() { assert_eq!( @@ -492,15 +551,15 @@ mod tests { fn output_path_defaults_per_emit_kind() { let input = Some("foo/main.c".to_string()); assert_eq!( - output_path_for(&input, &None, EmitKind::Asm), + output_path_for(&input, &None, &None, &None, EmitKind::Asm), PathBuf::from("foo/main.s") ); assert_eq!( - output_path_for(&input, &None, EmitKind::Obj), + output_path_for(&input, &None, &None, &None, EmitKind::Obj), PathBuf::from("foo/main.o") ); assert_eq!( - output_path_for(&input, &None, EmitKind::Exe), + output_path_for(&input, &None, &None, &None, EmitKind::Exe), PathBuf::from("a.out") ); } @@ -511,9 +570,24 @@ mod tests { let output = Some("custom_out".to_string()); for emit in [EmitKind::Asm, EmitKind::Obj, EmitKind::Exe] { assert_eq!( - output_path_for(&input, &output, emit), + output_path_for(&input, &output, &None, &None, emit), PathBuf::from("custom_out") ); } } + + #[test] + fn output_path_joins_directory_and_name() { + let input = Some("src/main.c".to_string()); + assert_eq!( + output_path_for( + &input, + &None, + &Some("build".to_string()), + &Some("demo".to_string()), + EmitKind::Exe, + ), + PathBuf::from("build/demo") + ); + } } From b8ba8d602f6d7c7c8c0169e2609c3be152b07445 Mon Sep 17 00:00:00 2001 From: Bappoz Date: Tue, 23 Jun 2026 01:16:20 -0300 Subject: [PATCH 51/91] feat(stdlib): suporte minimo a stdbool.h e printf via stdio.h - adiciona shim de preprocessor para #include e - bool/true/false mapeados para int/1/0 via keywords e typedef injetado - printf sem #include gera erro semantico MissingLibraryHeader - testes de integracao e smoke cobrindo bool_literals Closes #165 --- src/analyser/mod.rs | 1 + src/analyser/semantic.rs | 124 ++++++++++++++- src/codegen/last/mod.rs | 2 +- src/codegen/last/x86_64.rs | 194 ++++++++++++++++++------ src/common/builtins.rs | 5 + src/common/errors/types.rs | 13 ++ src/common/mod.rs | 1 + src/lexer/rules/identifiers.rs | 2 + src/lexer/scanner.rs | 31 ++++ src/main.rs | 4 +- src/tests/semantic_test.rs | 54 +++++++ tests/exe_smoke_test.rs | 4 +- tests/integration/valid/bool_literals.c | 9 ++ tests/integration_test.rs | 9 +- 14 files changed, 400 insertions(+), 53 deletions(-) create mode 100644 src/common/builtins.rs create mode 100644 tests/integration/valid/bool_literals.c diff --git a/src/analyser/mod.rs b/src/analyser/mod.rs index 65cec25..bf43948 100644 --- a/src/analyser/mod.rs +++ b/src/analyser/mod.rs @@ -2,4 +2,5 @@ pub mod semantic; pub mod symbol_table; pub use semantic::analyse; +pub use semantic::analyse_with_builtins; pub use semantic::SemanticAnalyser; diff --git a/src/analyser/semantic.rs b/src/analyser/semantic.rs index c7db0b9..96e85fe 100644 --- a/src/analyser/semantic.rs +++ b/src/analyser/semantic.rs @@ -3,6 +3,7 @@ use crate::common::ast::ast::{Program, QualifierType, Type}; use crate::common::ast::decl::Decl; use crate::common::ast::expr::{Expr, Literal, MemberAccess}; use crate::common::ast::stmt::Stmt; +use crate::common::builtins::BuiltinsLibs; use crate::common::errors::error_data::Span; use crate::common::errors::types::{ CompilerError, CompilerWarning, Diagnostic, SemanticError, SemanticErrorKind, SemanticWarning, @@ -16,6 +17,7 @@ pub struct SemanticAnalyser { pub current_fn_ret: Option, pub diagnostics: Vec, pub warnings: Vec, + pub stdio_enabled: bool, } impl SemanticAnalyser { @@ -25,6 +27,14 @@ impl SemanticAnalyser { current_fn_ret: None, diagnostics: Vec::new(), warnings: Vec::new(), + stdio_enabled: false, + } + } + + pub fn with_builtins(builtins: BuiltinsLibs) -> Self { + Self { + stdio_enabled: builtins.stdio, + ..Self::new() } } @@ -441,6 +451,25 @@ impl SemanticAnalyser { Expr::SizeofType(_, _) => uint_type(), Expr::Call(callee, args, span) => { if let Expr::Ident(name, id_span) = callee.as_ref() { + if name == "printf" { + if !self.stdio_enabled { + self.diagnostics + .push(CompilerError::Semantic(SemanticError { + span: id_span.clone(), + kind: SemanticErrorKind::MissingLibraryHeader { + header: "stdio.h".to_string(), + symbol: "printf".to_string(), + }, + })); + for a in args { + self.analyse_expr(a); + } + return unknown_type(); + } + + return self.analyse_printf_call(args, span, id_span); + } + match self.sym.lookup(name) { None => { self.diagnostics @@ -732,12 +761,101 @@ impl SemanticAnalyser { } } } + + fn analyse_printf_call(&mut self, args: &[Expr], span: &Span, id_span: &Span) -> QualifierType { + if args.is_empty() { + self.diagnostics + .push(CompilerError::Semantic(SemanticError { + span: span.clone(), + kind: SemanticErrorKind::ArityMismatch { + expected: 1, + found: 0, + }, + })); + return unknown_type(); + } + + let fmt_ty = self.analyse_expr(&args[0]); + if !is_string_like(&fmt_ty.ty) { + self.diagnostics + .push(CompilerError::Semantic(SemanticError { + span: args[0].span(), + kind: SemanticErrorKind::TypeMismatch { + expected: "char*".to_string(), + found: type_name(&fmt_ty.ty), + }, + })); + } + + match args.len() { + 1 => {} + 2 => { + let arg_ty = self.analyse_expr(&args[1]); + if let Expr::Literal(Literal::String(fmt), _) = &args[0] { + let needs_pointer = fmt.contains("%s"); + let needs_scalar = fmt.contains("%d") + || fmt.contains("%i") + || fmt.contains("%u") + || fmt.contains("%c"); + + if needs_pointer && !is_string_like(&arg_ty.ty) { + self.diagnostics + .push(CompilerError::Semantic(SemanticError { + span: args[1].span(), + kind: SemanticErrorKind::TypeMismatch { + expected: "char*".to_string(), + found: type_name(&arg_ty.ty), + }, + })); + } else if needs_scalar && !is_scalar(&arg_ty.ty) { + self.diagnostics + .push(CompilerError::Semantic(SemanticError { + span: args[1].span(), + kind: SemanticErrorKind::TypeMismatch { + expected: "scalar".to_string(), + found: type_name(&arg_ty.ty), + }, + })); + } + } else if !is_scalar(&arg_ty.ty) && !is_string_like(&arg_ty.ty) { + self.diagnostics + .push(CompilerError::Semantic(SemanticError { + span: args[1].span(), + kind: SemanticErrorKind::TypeMismatch { + expected: "scalar".to_string(), + found: type_name(&arg_ty.ty), + }, + })); + } + } + n => { + self.diagnostics + .push(CompilerError::Semantic(SemanticError { + span: id_span.clone(), + kind: SemanticErrorKind::ArityMismatch { + expected: 2, + found: n, + }, + })); + } + } + + QualifierType { + ty: Type::Int, + is_const: false, + is_unsigned: false, + } + } } /// API pública: analisa o programa e retorna todos os diagnósticos semânticos /// (erros e avisos) como `Vec`. pub fn analyse(prog: &Program) -> Vec { - let mut analyser = SemanticAnalyser::new(); + analyse_with_builtins(prog, BuiltinsLibs::default()) +} + +pub fn analyse_with_builtins(prog: &Program, builtins: BuiltinsLibs) -> Vec { + let mut analyser = SemanticAnalyser::with_builtins(builtins); analyser.analyse_program(prog); analyser .diagnostics @@ -777,6 +895,10 @@ fn uint_type() -> QualifierType { } } +fn is_string_like(ty: &Type) -> bool { + matches!(ty, Type::Pointer(inner) if matches!(&**inner, Type::Char)) +} + // ── Type helpers ──────────────────────────────────────────────────────────── /// Retorna `true` se o tipo é numérico (inteiro ou ponto flutuante). diff --git a/src/codegen/last/mod.rs b/src/codegen/last/mod.rs index 2f1c6b4..ff42f0b 100644 --- a/src/codegen/last/mod.rs +++ b/src/codegen/last/mod.rs @@ -11,4 +11,4 @@ pub mod abi; pub mod frame; pub mod x86_64; -pub use x86_64::{emit_function, emit_program}; +pub use x86_64::emit_program; diff --git a/src/codegen/last/x86_64.rs b/src/codegen/last/x86_64.rs index 78fa67b..efdfaa1 100644 --- a/src/codegen/last/x86_64.rs +++ b/src/codegen/last/x86_64.rs @@ -18,12 +18,74 @@ use crate::codegen::last::abi; use crate::codegen::last::frame::{Frame, SlotKey}; use crate::common::ast::expr::{BinOp, UnOp}; use crate::ir::tac::{ConstValue, LabelId, Operand, TacFunction, TacInstr, TacProgram}; +use std::collections::HashMap; /// Acumulador de linhas de assembly com indentacao controlada. struct Emitter { out: String, } +#[derive(Default)] +struct StringPool { + entries: Vec<(String, String)>, + labels: HashMap, +} + +impl StringPool { + fn collect(prog: &TacProgram) -> Self { + let mut pool = Self::default(); + for func in &prog.functions { + for instr in &func.instrs { + pool.visit_instr(instr); + } + } + pool + } + + fn visit_instr(&mut self, instr: &TacInstr) { + match instr { + TacInstr::BinOp { lhs, rhs, .. } => { + self.visit_operand(lhs); + self.visit_operand(rhs); + } + TacInstr::UnOp { src, .. } => self.visit_operand(src), + TacInstr::Copy { dst, src } => { + self.visit_operand(dst); + self.visit_operand(src); + } + TacInstr::CondJump { cond, .. } => self.visit_operand(cond), + TacInstr::Call { args, .. } => { + for arg in args { + self.visit_operand(arg); + } + } + TacInstr::Return { val } => { + if let Some(val) = val { + self.visit_operand(val); + } + } + TacInstr::Jump { .. } | TacInstr::Label(_) => {} + } + } + + fn visit_operand(&mut self, op: &Operand) { + if let Operand::Const(ConstValue::String(value)) = op { + self.label_for(value); + } + } + + fn label_for(&mut self, value: &str) -> String { + if let Some(label) = self.labels.get(value) { + return label.clone(); + } + + let label = format!(".LC{}", self.entries.len()); + self.entries.push((label.clone(), value.to_string())); + self.labels.insert(value.to_string(), label.clone()); + label + } +} + impl Emitter { fn new() -> Self { Self { out: String::new() } @@ -64,11 +126,20 @@ impl Emitter { /// Emite o assembly de um programa TAC completo, prefixando a diretiva de /// secao `.text`. pub fn emit_program(prog: &TacProgram) -> String { + let strings = StringPool::collect(prog); let mut em = Emitter::new(); + if !strings.entries.is_empty() { + em.raw(".section .rodata"); + for (label, value) in &strings.entries { + em.raw(&format!("{label}:")); + em.raw(&format!(" .asciz {}", escape_asm_string(value))); + } + em.blank(); + } em.raw(".text"); for func in &prog.functions { em.blank(); - em.append_str(&emit_function(func)); + em.append_str(&emit_function(func, &strings)); } // Marca a stack como nao-executavel (boa pratica; evita aviso do linker e // e o que o proprio GCC adiciona a saida assembly). @@ -79,7 +150,7 @@ pub fn emit_program(prog: &TacProgram) -> String { /// Emite o assembly de uma unica funcao: directiva `.globl`, rotulo, /// prologue, corpo e epilogue. -pub fn emit_function(func: &TacFunction) -> String { +fn emit_function(func: &TacFunction, strings: &StringPool) -> String { let mut em = Emitter::new(); em.comment(&format!("function {}", func.name)); em.raw(&format!(".globl {}", func.name)); @@ -108,7 +179,7 @@ pub fn emit_function(func: &TacFunction) -> String { // Corpo let epilogue_label = format!(".L_{}_epilogue", func.name); for instr in &func.instrs { - emit_instr(&mut em, instr, &frame, &func.name, &epilogue_label); + emit_instr(&mut em, instr, &frame, &func.name, &epilogue_label, strings); } // Epilogue (alvo de todos os `return`). Caso a funcao nao tenha `return` @@ -204,6 +275,7 @@ fn emit_instr( frame: &Frame, func_name: &str, epilogue_label: &str, + strings: &StringPool, ) { match instr { TacInstr::Label(label) => { @@ -217,25 +289,25 @@ fn emit_instr( then_label, else_label, } => { - load_op(em, frame, cond, "rax"); + load_op(em, frame, cond, "rax", strings); em.insn("testq %rax, %rax"); em.insn(&format!("jne {}", local_label(func_name, then_label))); em.insn(&format!("jmp {}", local_label(func_name, else_label))); } TacInstr::Copy { dst, src } => { - load_op(em, frame, src, "rax"); + load_op(em, frame, src, "rax", strings); store_op(em, frame, dst, "rax"); } TacInstr::BinOp { dst, op, lhs, rhs } => { - emit_binop(em, op, lhs, rhs, *dst, frame); + emit_binop(em, op, lhs, rhs, *dst, frame, strings); } TacInstr::UnOp { dst, op, src } => { - emit_unop(em, op, src, *dst, frame); + emit_unop(em, op, src, *dst, frame, strings); } - TacInstr::Call { dst, fn_name, args } => emit_call(em, fn_name, args, *dst, frame), + TacInstr::Call { dst, fn_name, args } => emit_call(em, fn_name, args, *dst, frame, strings), TacInstr::Return { val } => { if let Some(val) = val { - load_op(em, frame, val, "rax"); + load_op(em, frame, val, "rax", strings); } em.insn(&format!("jmp {epilogue_label}")); } @@ -249,16 +321,17 @@ fn emit_binop( rhs: &Operand, dst: crate::ir::tac::TempId, frame: &Frame, + strings: &StringPool, ) { // Operacoes logicas short-circuit-like precisam normalizar cada operando // para 0/1 individualmente. if matches!(op, BinOp::And | BinOp::Or) { - emit_logical(em, matches!(op, BinOp::Or), lhs, rhs, dst, frame); + emit_logical(em, matches!(op, BinOp::Or), lhs, rhs, dst, frame, strings); return; } - load_op(em, frame, lhs, "rax"); - load_op(em, frame, rhs, "rcx"); + load_op(em, frame, lhs, "rax", strings); + load_op(em, frame, rhs, "rcx", strings); match op { BinOp::Add => em.insn("addq %rcx, %rax"), @@ -303,16 +376,17 @@ fn emit_logical( rhs: &Operand, dst: crate::ir::tac::TempId, frame: &Frame, + strings: &StringPool, ) { // Normaliza lhs para 0/1 em %rdx. - load_op(em, frame, lhs, "rax"); + load_op(em, frame, lhs, "rax", strings); em.insn("testq %rax, %rax"); em.insn("setne %al"); em.insn("movzbq %al, %rax"); em.insn("movq %rax, %rdx"); // Normaliza rhs para 0/1 em %rax. - load_op(em, frame, rhs, "rax"); + load_op(em, frame, rhs, "rax", strings); em.insn("testq %rax, %rax"); em.insn("setne %al"); em.insn("movzbq %al, %rax"); @@ -332,8 +406,9 @@ fn emit_unop( src: &Operand, dst: crate::ir::tac::TempId, frame: &Frame, + strings: &StringPool, ) { - load_op(em, frame, src, "rax"); + load_op(em, frame, src, "rax", strings); match op { UnOp::Neg => em.insn("negq %rax"), UnOp::BitNot => em.insn("notq %rax"), @@ -354,6 +429,7 @@ fn emit_call( args: &[Operand], dst: Option, frame: &Frame, + strings: &StringPool, ) { // Argumentos alem de `MAX_REG_ARGS` vao para a stack do chamador, na // ordem inversa (o primeiro arg de stack fica no topo, mais proximo do @@ -367,13 +443,13 @@ fn emit_call( } let stack_args = &args[args.len().min(abi::MAX_REG_ARGS)..]; for arg in stack_args.iter().rev() { - load_op(em, frame, arg, "rax"); + load_op(em, frame, arg, "rax", strings); em.insn("pushq %rax"); } for (index, arg) in args.iter().take(abi::MAX_REG_ARGS).enumerate() { let reg = abi::arg_register(index).expect("index < MAX_REG_ARGS sempre tem registrador"); - load_op(em, frame, arg, "rax"); + load_op(em, frame, arg, "rax", strings); em.insn(&format!("movq %rax, %{reg}")); } @@ -390,8 +466,15 @@ fn emit_call( } /// Carrega `op` para o registrador nomeado (ex.: "rax", "rcx"). -fn load_op(em: &mut Emitter, frame: &Frame, op: &Operand, reg: &str) { +fn load_op(em: &mut Emitter, frame: &Frame, op: &Operand, reg: &str, strings: &StringPool) { match op { + Operand::Const(ConstValue::String(value)) => { + let label = strings + .labels + .get(value) + .expect("string literal deve ter sido coletada"); + em.insn(&format!("leaq {label}(%rip), %{reg}")); + } Operand::Const(value) => em.insn(&format!("movq ${}, %{reg}", const_immediate(value))), Operand::Temp(temp) => { let offset = frame @@ -427,10 +510,28 @@ fn const_immediate(value: &ConstValue) -> String { ConstValue::Int(v) => v.to_string(), ConstValue::Char(c) => (*c as i64).to_string(), ConstValue::Double(_) => panic!("codegen de double nao suportado neste backend"), - ConstValue::String(_) => panic!("codegen de string literal nao suportado neste backend"), + ConstValue::String(_) => unreachable!("string literals are emitted through rodata"), } } +fn escape_asm_string(value: &str) -> String { + let mut out = String::from("\""); + for ch in value.chars() { + match ch { + '\\' => out.push_str("\\\\"), + '"' => out.push_str("\\\""), + '\n' => out.push_str("\\n"), + '\r' => out.push_str("\\r"), + '\t' => out.push_str("\\t"), + '\0' => out.push_str("\\0"), + c if c.is_ascii_graphic() || c == ' ' => out.push(c), + c => out.push_str(&format!("\\x{:02x}", c as u32)), + } + } + out.push('"'); + out +} + fn local_label(func_name: &str, label: &LabelId) -> String { format!(".L_{func_name}_L{}", label.0) } @@ -460,7 +561,7 @@ mod tests { #[test] fn emit_function_prologue_pushes_rbp_and_sets_frame() { - let out = emit_function(&asm_simple_return_const()); + let out = emit_function(&asm_simple_return_const(), &StringPool::default()); assert!(out.contains("pushq %rbp")); assert!(out.contains("movq %rsp, %rbp")); @@ -469,7 +570,7 @@ mod tests { #[test] fn emit_function_declares_global_symbol() { - let out = emit_function(&asm_simple_return_const()); + let out = emit_function(&asm_simple_return_const(), &StringPool::default()); assert!(out.contains(".globl main")); assert!(out.contains("main:\n")); @@ -477,7 +578,7 @@ mod tests { #[test] fn return_const_loads_immediate_into_rax() { - let out = emit_function(&asm_simple_return_const()); + let out = emit_function(&asm_simple_return_const(), &StringPool::default()); assert!(out.contains("movq $42, %rax")); } @@ -500,7 +601,7 @@ mod tests { ], ); - let out = emit_function(&f); + let out = emit_function(&f, &StringPool::default()); assert!(out.contains("movq %rdi, -8(%rbp)")); // spill arg a assert!(out.contains("movq %rsi, -16(%rbp)")); // spill arg b @@ -525,7 +626,7 @@ mod tests { ], ); - let out = emit_function(&f); + let out = emit_function(&f, &StringPool::default()); assert!(out.contains("cqto")); assert!(out.contains("idivq %rcx")); @@ -544,7 +645,7 @@ mod tests { }], ); - let out = emit_function(&f); + let out = emit_function(&f, &StringPool::default()); assert!(out.contains("movq %rdx, %rax")); } @@ -562,7 +663,7 @@ mod tests { }], ); - let out = emit_function(&f); + let out = emit_function(&f, &StringPool::default()); assert!(out.contains("cmpq %rcx, %rax")); assert!(out.contains("setl %al")); @@ -589,7 +690,7 @@ mod tests { ], ); - let out = emit_function(&f); + let out = emit_function(&f, &StringPool::default()); assert!(out.contains("movq %rax, %rdi")); assert!(out.contains("movq %rax, %rsi")); @@ -616,7 +717,7 @@ mod tests { ], ); - let out = emit_function(&f); + let out = emit_function(&f, &StringPool::default()); // 7th arg (index 6) e o unico passado pela stack; aligna a stack com // 8 bytes de padding (1 arg de stack e impar) antes do push. @@ -641,7 +742,7 @@ mod tests { }], ); - let out = emit_function(&f); + let out = emit_function(&f, &StringPool::default()); // 2 args de stack (indices 6 e 7): par, sem padding necessario. assert!(!out.contains("subq $8, %rsp")); @@ -650,18 +751,21 @@ mod tests { #[test] fn call_with_two_args_emits_no_stack_cleanup() { - let out = emit_function(&func( - "caller2", - Vec::new(), - vec![TacInstr::Call { - dst: Some(TempId(0)), - fn_name: "soma".to_string(), - args: vec![ - Operand::Const(ConstValue::Int(1)), - Operand::Const(ConstValue::Int(2)), - ], - }], - )); + let out = emit_function( + &func( + "caller2", + Vec::new(), + vec![TacInstr::Call { + dst: Some(TempId(0)), + fn_name: "soma".to_string(), + args: vec![ + Operand::Const(ConstValue::Int(1)), + Operand::Const(ConstValue::Int(2)), + ], + }], + ), + &StringPool::default(), + ); assert!(!out.contains("pushq %rax")); assert!(!out.contains("addq $16, %rsp")); @@ -689,7 +793,7 @@ mod tests { ], ); - let out = emit_function(&f); + let out = emit_function(&f, &StringPool::default()); assert!(out.contains("testq %rax, %rax")); assert!(out.contains("jne .L_cond_L0")); @@ -700,7 +804,7 @@ mod tests { #[test] fn epilogue_label_is_emitted_once() { - let out = emit_function(&asm_simple_return_const()); + let out = emit_function(&asm_simple_return_const(), &StringPool::default()); assert_eq!(out.matches(".L_main_epilogue:").count(), 1); } @@ -730,7 +834,7 @@ mod tests { }], ); - let out = emit_function(&f); + let out = emit_function(&f, &StringPool::default()); assert!(out.contains("setne %al")); assert!(out.contains("andq %rdx, %rax")); @@ -749,7 +853,7 @@ mod tests { }], ); - let out = emit_function(&f); + let out = emit_function(&f, &StringPool::default()); assert!(out.contains("orq %rdx, %rax")); } diff --git a/src/common/builtins.rs b/src/common/builtins.rs new file mode 100644 index 0000000..1c540bd --- /dev/null +++ b/src/common/builtins.rs @@ -0,0 +1,5 @@ +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub struct BuiltinsLibs { + pub stdbool: bool, + pub stdio: bool, +} diff --git a/src/common/errors/types.rs b/src/common/errors/types.rs index 7a3f3b2..4129f86 100644 --- a/src/common/errors/types.rs +++ b/src/common/errors/types.rs @@ -206,6 +206,10 @@ pub enum SemanticErrorKind { NotIndexable { found: String, }, + MissingLibraryHeader { + header: String, + symbol: String, + }, } #[derive(Debug)] @@ -309,6 +313,15 @@ impl ToReport for SemanticError { self.span.clone(), format!("'{}' não é indexável (esperado array ou ponteiro)", found), ), + SemanticErrorKind::MissingLibraryHeader { header, symbol } => { + Report::new("missing library header") + .with_span(self.span.clone()) + .with_label( + self.span.clone(), + format!("'{}' requires <{}>", symbol, header), + ) + .with_help("adiciona o #include correto antes de usar esse simbolo") + } } } } diff --git a/src/common/mod.rs b/src/common/mod.rs index 5acf327..a0f558d 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -1,4 +1,5 @@ pub mod ast; +pub mod builtins; pub mod errors; pub mod input; pub mod utils; diff --git a/src/lexer/rules/identifiers.rs b/src/lexer/rules/identifiers.rs index ad8a567..d009338 100644 --- a/src/lexer/rules/identifiers.rs +++ b/src/lexer/rules/identifiers.rs @@ -67,6 +67,8 @@ fn lookup_keyword(ident: &str) -> Option { "volatile" => Some(TokenKind::Volatile), "inline" => Some(TokenKind::Inline), "sizeof" => Some(TokenKind::Sizeof), + "true" => Some(TokenKind::IntLiteral(1)), + "false" => Some(TokenKind::IntLiteral(0)), // Não é keyword — é identificador de usuário _ => None, diff --git a/src/lexer/scanner.rs b/src/lexer/scanner.rs index 55e4fb5..161c144 100644 --- a/src/lexer/scanner.rs +++ b/src/lexer/scanner.rs @@ -1,3 +1,4 @@ +use crate::common::builtins::BuiltinsLibs; use crate::common::errors::{ error_data::Span, types::{CompilerError, LexicalError, LexicalErrorKind}, @@ -13,6 +14,7 @@ pub struct Scanner { pub src: SourceFile, pub tokens: Vec, pub diagnostics: Vec, + pub builtins: BuiltinsLibs, /// Pilha de delimitadores abertos ainda não fechados: (char, linha, coluna) delimiter_stack: Vec<(char, usize, usize)>, /// Posição (em bytes) do início do token sendo reconhecido; capturada antes de consumir o primeiro char. @@ -26,6 +28,7 @@ impl Scanner { src, tokens: Vec::new(), diagnostics: Vec::new(), + builtins: BuiltinsLibs::default(), delimiter_stack: Vec::new(), token_start: 0, } @@ -33,6 +36,8 @@ impl Scanner { /// Executa o scanner até o fim do arquivo, populando `tokens` e `diagnostics`, e retorna os tokens produzidos. pub fn scan(&mut self) -> &[Token] { + self.preprocess_builtin_includes(); + while !self.src.is_at_end() { self.skip_whitespaces_and_comments(); if self.src.is_at_end() { @@ -61,6 +66,32 @@ impl Scanner { &self.tokens } + fn preprocess_builtin_includes(&mut self) { + let source = self.src.source.as_str(); + if !source.contains("#include ") && !source.contains("#include ") { + return; + } + + let mut rewritten = String::new(); + for line in source.lines() { + let trimmed = line.trim(); + if trimmed == "#include " { + self.builtins.stdbool = true; + rewritten.push_str("typedef int bool;\n"); + } else if trimmed == "#include " { + self.builtins.stdio = true; + rewritten.push('\n'); + } else { + rewritten.push_str(line); + rewritten.push('\n'); + } + } + + let old_path = self.src.path.clone(); + self.src = SourceFile::from_string(rewritten); + self.src.path = old_path; + } + /// Lê o próximo char e despacha para o método de lexing correto conforme o caractere. fn next_token(&mut self) { let line = self.src.line(); diff --git a/src/main.rs b/src/main.rs index 4e56f2b..4ed58cb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,4 @@ -use crusty::analyser::analyse; +use crusty::analyser::analyse_with_builtins; use crusty::codegen::inter::opt::{pipeline_for_level, OptLevel}; use crusty::codegen::inter::Cfg; use crusty::codegen::last; @@ -251,7 +251,7 @@ fn run(source: SourceFile, args: &CliArgs) -> Result<(), Box> { } // ── Stage 3: Semantic ──────────────────────────────────────────────────── - let sem_errors = analyse(&program); + let sem_errors = analyse_with_builtins(&program, scanner.builtins); let sem_count = sem_errors.len(); if sem_count > 0 { eprintln!("\n=== Semantic Errors ({sem_count}) ==="); diff --git a/src/tests/semantic_test.rs b/src/tests/semantic_test.rs index 2e3b523..12c40a2 100644 --- a/src/tests/semantic_test.rs +++ b/src/tests/semantic_test.rs @@ -5,6 +5,7 @@ mod tests { use crate::common::ast::decl::Decl; use crate::common::ast::expr::{Expr, Literal, MemberAccess}; use crate::common::ast::stmt::Stmt; + use crate::common::builtins::BuiltinsLibs; use crate::common::errors::error_data::Span; use crate::common::errors::types::{ CompilerError, Diagnostic, SemanticErrorKind, SemanticWarningKind, @@ -43,6 +44,10 @@ mod tests { Expr::Ident(name.to_string(), span()) } + fn call(name: &str, args: Vec) -> Expr { + Expr::Call(Box::new(ident(name)), args, span()) + } + fn program(stmts: Vec) -> Program { Program { decls: vec![Decl::Function( @@ -393,6 +398,55 @@ mod tests { assert!(analyse(&prog).is_empty()); } + #[test] + fn printf_without_stdio_header_emits_missing_header_error() { + let prog = program(vec![Stmt::ExprStmt( + call( + "printf", + vec![ + Expr::Literal(Literal::String("%d".into()), span()), + int_lit(1), + ], + ), + span(), + )]); + + let mut analyser = SemanticAnalyser::with_builtins(BuiltinsLibs::default()); + analyser.analyse_program(&prog); + + assert!(analyser.diagnostics.iter().any(|e| matches!( + e, + CompilerError::Semantic(se) + if matches!(&se.kind, SemanticErrorKind::MissingLibraryHeader { header, symbol } + if header == "stdio.h" && symbol == "printf") + ))); + } + + #[test] + fn printf_with_stdio_header_accepts_basic_call() { + let prog = program(vec![Stmt::ExprStmt( + call( + "printf", + vec![ + Expr::Literal(Literal::String("%d".into()), span()), + int_lit(1), + ], + ), + span(), + )]); + + let mut analyser = SemanticAnalyser::with_builtins(BuiltinsLibs { + stdbool: false, + stdio: true, + }); + analyser.analyse_program(&prog); + + assert!( + analyser.diagnostics.is_empty(), + "printf com stdio.h deve ser aceito" + ); + } + // ── Assign: verificação de compatibilidade de tipo ──────────────────────── #[test] diff --git a/tests/exe_smoke_test.rs b/tests/exe_smoke_test.rs index 32c6e92..0363c8f 100644 --- a/tests/exe_smoke_test.rs +++ b/tests/exe_smoke_test.rs @@ -11,7 +11,7 @@ use std::path::PathBuf; use std::process::{Command, ExitStatus}; -use crusty::analyser::analyse; +use crusty::analyser::analyse_with_builtins; use crusty::codegen::last::emit_program; use crusty::common::input::source::SourceFile; use crusty::ir::lower::lower_program; @@ -54,7 +54,7 @@ fn compile_to_asm(source: &str) -> String { .parse_program() .unwrap_or_else(|errors| panic!("erros de parser inesperados: {errors:?}")); - let sem_errors = analyse(&program); + let sem_errors = analyse_with_builtins(&program, scanner.builtins); assert!( sem_errors.is_empty(), "erros semanticos inesperados: {sem_errors:?}" diff --git a/tests/integration/valid/bool_literals.c b/tests/integration/valid/bool_literals.c new file mode 100644 index 0000000..1141772 --- /dev/null +++ b/tests/integration/valid/bool_literals.c @@ -0,0 +1,9 @@ +#include + +int main() { + bool ok = true; + if (ok) { + return false; + } + return true; +} diff --git a/tests/integration_test.rs b/tests/integration_test.rs index d5e86b1..c5ef9ac 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -1,6 +1,6 @@ use std::path::PathBuf; -use crusty::analyser::analyse; +use crusty::analyser::analyse_with_builtins; use crusty::common::errors::types::{CompilerError, Diagnostic, SemanticErrorKind}; use crusty::common::input::source::SourceFile; use crusty::lexer::scanner::Scanner; @@ -24,7 +24,7 @@ fn compile_file(rel: &str) -> CompileResult { let mut parser = Parser::new(tokens); match parser.parse_program() { - Ok(program) => diagnostics.extend(analyse(&program)), + Ok(program) => diagnostics.extend(analyse_with_builtins(&program, scanner.builtins)), Err(parse_errs) => diagnostics.extend(parse_errs.into_iter().map(Diagnostic::Error)), } @@ -61,6 +61,11 @@ fn valid_hello_world() { compile_valid("hello_world"); } +#[test] +fn valid_bool_literals() { + compile_valid("bool_literals"); +} + #[test] fn valid_arithmetic() { compile_valid("arithmetic"); From 64619060f81ea9c146f115dd8f33e93a212abe6f Mon Sep 17 00:00:00 2001 From: Philipe Caetano Date: Tue, 23 Jun 2026 13:07:49 -0300 Subject: [PATCH 52/91] feat/ add function run: search back-edges --- src/codegen/inter/opt/licm.rs | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/codegen/inter/opt/licm.rs b/src/codegen/inter/opt/licm.rs index 7f129f9..56ab14d 100644 --- a/src/codegen/inter/opt/licm.rs +++ b/src/codegen/inter/opt/licm.rs @@ -1,5 +1,6 @@ use super::OptPass; -use crate::codegen::inter::Cfg; +use crate::codegen::inter::{Cfg, BlockId}; +use crate::collections::{HashMap, HashSet}; pub struct LoopInvariantCodeMotionPass; @@ -9,6 +10,31 @@ impl OptPass for LoopInvariantCodeMotionPass { } fn run(&self, _cfg: &mut Cfg) -> bool { - false + let dominators = self.compute_dominators(cfg); + + let mut mutated = false; + let mut back_edge = Vec::new(); // aresta que liga o bloco dominante a dominado + + for &block in cfg.block.keys(){ + if let Some(cfg_block) = cfg.blocks.get(&block){ + for &succ in &cfg_block.succesors { + if let Some(doms) = dominators.get(&block){ + if doms.contains(&succ){ + back_edges.push((succ, block)); + } + } + } + + } + } + + for (header, tail) in back_edges { + let loop_body = self.get_loop_body(cfg, header, tail); + + prtinln("Laço detectado! Header {:?}", header, tail); + println("Blocos pertencentes ao laço: {:?}", loop_body); + + TODO: + } } } From 61a1fd247cb95edffd44e159d3acdf53078f7bcb Mon Sep 17 00:00:00 2001 From: Philipe Caetano Date: Tue, 23 Jun 2026 13:15:18 -0300 Subject: [PATCH 53/91] feat/add function compute_dominators: search and save dominators blocks --- src/codegen/inter/opt/licm.rs | 45 +++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/codegen/inter/opt/licm.rs b/src/codegen/inter/opt/licm.rs index 56ab14d..49aa23d 100644 --- a/src/codegen/inter/opt/licm.rs +++ b/src/codegen/inter/opt/licm.rs @@ -38,3 +38,48 @@ impl OptPass for LoopInvariantCodeMotionPass { } } } + +impl LoopInvariantCodeMotionPass{ + fn compute_dominators(&self, cfg: &Cfg) -> HashMap>{ + let mut dominators: HashMap> = HashMap::new(); + let all_blocks: HashSet = cfg.blocks.key().cloned().collect(); + let entry = cfg.entry_block; + + dominators.insert(entry, vec![entry].intro_iter().collect()); + + for &block in &all_blocks{ + if block != entry { + dominators.insert(block, all_blocks.clone()); // (key, value) se já existi no dicionario subtitui + } + } + + let mut changed = true; + while changed { + changed = false; + for &block in &all_blocks{ + if block == entry {continue;} + + let preds = cfg.get_predecessors(block); + if preds.is_empty() {continue;} + + let mut current_intersection = dominators.get(&preds[0]).cloned().unwrap_or_default(); + for pred in preds.iter().skip(1) { + if let Some(pred_doms) = dominators.get(pred) { + current_intersection = current_intersection + .intersection(pred_doms) + .cloned() + .collect(); + } + } + current_intersection.insert(block); + + let old_doms = dominators.get(&block).unwrap(); + if ¤t_intersection != old_doms { + dominators.insert(block, current_intersection); + changed = true; + } + } + + } + +} \ No newline at end of file From fc105a188a84734558bc5b3509b7ad5bb3e72428 Mon Sep 17 00:00:00 2001 From: Philipe Caetano Date: Tue, 23 Jun 2026 13:29:42 -0300 Subject: [PATCH 54/91] feat/ add function get_lopp_body: search and save block that makes part of a loop --- src/codegen/inter/opt/licm.rs | 49 +++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/src/codegen/inter/opt/licm.rs b/src/codegen/inter/opt/licm.rs index 49aa23d..da4286d 100644 --- a/src/codegen/inter/opt/licm.rs +++ b/src/codegen/inter/opt/licm.rs @@ -1,6 +1,6 @@ use super::OptPass; use crate::codegen::inter::{Cfg, BlockId}; -use crate::collections::{HashMap, HashSet}; +use std::collections::{HashMap, HashSet}; pub struct LoopInvariantCodeMotionPass; @@ -9,18 +9,18 @@ impl OptPass for LoopInvariantCodeMotionPass { "loop-invariant-code-motion" } - fn run(&self, _cfg: &mut Cfg) -> bool { + fn run(&self, cfg: &mut Cfg) -> bool { let dominators = self.compute_dominators(cfg); let mut mutated = false; let mut back_edge = Vec::new(); // aresta que liga o bloco dominante a dominado - for &block in cfg.block.keys(){ + for &block in cfg.blocks.keys(){ if let Some(cfg_block) = cfg.blocks.get(&block){ - for &succ in &cfg_block.succesors { + for &succ in &cfg_block.successors { if let Some(doms) = dominators.get(&block){ if doms.contains(&succ){ - back_edges.push((succ, block)); + back_edge.push((succ, block)); } } } @@ -28,24 +28,25 @@ impl OptPass for LoopInvariantCodeMotionPass { } } - for (header, tail) in back_edges { + for (header, tail) in back_edge { let loop_body = self.get_loop_body(cfg, header, tail); - prtinln("Laço detectado! Header {:?}", header, tail); - println("Blocos pertencentes ao laço: {:?}", loop_body); + prtinln!("Laço detectado! Header {:?} Tail {:?}", header, tail); + println!("Blocos pertencentes ao laço: {:?}", loop_body); - TODO: + todo!(); } + mutated } } impl LoopInvariantCodeMotionPass{ - fn compute_dominators(&self, cfg: &Cfg) -> HashMap>{ + fn compute_dominators(&self, cfg: &Cfg) -> HashMap>{ let mut dominators: HashMap> = HashMap::new(); - let all_blocks: HashSet = cfg.blocks.key().cloned().collect(); + let all_blocks: HashSet = cfg.blocks.keys().cloned().collect(); let entry = cfg.entry_block; - dominators.insert(entry, vec![entry].intro_iter().collect()); + dominators.insert(entry, vec![entry].into_iter().collect()); for &block in &all_blocks{ if block != entry { @@ -77,9 +78,29 @@ impl LoopInvariantCodeMotionPass{ if ¤t_intersection != old_doms { dominators.insert(block, current_intersection); changed = true; + } } - } - } + } + dominators + } + + fn get_loop_body(&self, cfg: &Cfg, header: BlockId, tail: BlockId) -> HashSet { + let mut loop_body = HashSet::new(); + loop_body.insert(header); + loop_body.insert(tail); + + let mut stack = vec![tail]; + + while let Some(node) = stack.pop(){ + for pred in cfg.get_predecessors(node){ + if !loop_body.contains(&pred){ + loop_body.insert(&pred); + stack.push(pred); + } + } + } + loop_body + } } \ No newline at end of file From 9b12eb4ad592441ec657b6aa056795eea7a75dcb Mon Sep 17 00:00:00 2001 From: guxvr Date: Tue, 23 Jun 2026 16:09:59 -0300 Subject: [PATCH 55/91] feat: implement semantic validation for subset C (Issue #160) --- docs/semantic.md | 43 +++-- src/analyser/semantic.rs | 142 +++++++++++++-- src/common/errors/types.rs | 46 +++++ src/tests/analyzer_test.rs | 5 +- src/tests/semantic_test.rs | 347 +++++++++++++++++++++++++++++++++++-- 5 files changed, 542 insertions(+), 41 deletions(-) diff --git a/docs/semantic.md b/docs/semantic.md index 1304bc2..c0eee9a 100644 --- a/docs/semantic.md +++ b/docs/semantic.md @@ -15,6 +15,9 @@ struct SemanticAnalyser { sym: SymbolTable, current_fn_ret: Option, // tipo de retorno da função atual diagnostics: Vec, + warnings: Vec, + loop_depth: usize, // para validar break/continue + switch_depth: usize, // para validar break em switch } ``` @@ -83,6 +86,7 @@ analyse_stmt::For → enter_scope / exit_scope (init pode declarar variáve 3. Declara cada parâmetro no novo escopo 4. Analisa todos os statements do corpo 5. Restaura `current_fn_ret`; `exit_scope` +6. Verifica a heurística `body_always_returns` se a função não for `void`; emite aviso `MissingReturn` se faltar retorno. ### Struct @@ -170,34 +174,51 @@ Promoção numérica: `Double > Float > Long > Int > Short/Char`. | Nó | Tipo retornado | |---|---| -| `Unary`, `Prefix`, `Postfix` | mesmo tipo do operando | +| `Unary(AddrOf)` | `Pointer(T)` onde `T` é o tipo do operando | +| `Unary(Deref)` | tipo base `T` de `Pointer(T)` ou `Array(T)` | +| `Unary(-, ~)`, `Prefix`, `Postfix` | mesmo tipo do operando | | `CompoundAssign` | tipo do LHS | | `Cast(qty, _)` | `qty` resolvido | | `Sizeof(_)`, `SizeofType(_)` | `unsigned int` | -| `Call(callee, args)` | `void` (sentinela — lookup de retorno não implementado) | -| `Index(arr, idx)` | tipo do elemento (desreferencia `Array(T)` ou `Pointer(T)`) | -| `Ternary(cond, then, else)` | tipo do ramo `then` | +| `Call(callee, args)` | Tipo de retorno registrado na assinatura. Checa aridade e tipos de argumentos. | +| `Index(arr, idx)` | tipo do elemento (desreferencia `Array(T)` ou `Pointer(T)`). `idx` deve ser numérico inteiro. | +| `Ternary(cond, then, else)` | O tipo promovido/comum dos ramos `then` e `else`, após verificação de compatibilidade. | --- -## Erros Semânticos +## Diagnósticos Semânticos -Todos são do tipo `CompilerError::Semantic(SemanticError { span, kind })`: +Erros geram `CompilerError::Semantic`, avisos geram `CompilerWarning::Semantic`. A análise não é interrompida por diagnósticos. + +### Erros (CompilerError) | Kind | Causa | |---|---| | `Redeclaration(name)` | Nome já declarado no escopo atual | | `UndefinedVariable(name)` | Identificador não encontrado em nenhum escopo | +| `UndefinedFunction(name)` | Chamada de função sem definição ou protótipo registrado | | `AssignToConst(name)` | Atribuição a variável declarada com `const` | -| `TypeMismatch { expected, found }` | Tipos incompatíveis em atribuição ou operação binária | +| `TypeMismatch { expected, found }` | Tipos incompatíveis (atribuição, operação binária, retorno, ternário) | | `UndefinedStruct(name)` | Acesso a membro de struct não registrada | -| `FieldNotFound { struct_name, field_name }` | Campo não existe na struct | +| `FieldNotFound { struct, field }` | Campo não existe na struct | +| `InvalidSwitchType` | Expressão de switch não é de tipo inteiro | +| `BreakOutsideLoop` | Uso de `break` fora de um bloco iterativo (`for`/`while`) ou `switch` | +| `ContinueOutsideLoop` | Uso de `continue` fora de um bloco iterativo (`for`/`while`) | +| `ArityMismatch { expected, found }` | Quantidade incorreta de argumentos na chamada de função | +| `NotIndexable` | Tentativa de usar operador de índice `[]` em tipo que não é array nem ponteiro | +| `InvalidIndexType` | Expressão usada no índice não é do tipo inteiro | + +### Avisos (CompilerWarning) + +| Kind | Causa | +|---|---| +| `MissingReturn(name)` | Função declarada com retorno não-void sem garantia de `return` em todos os caminhos | +| `UnusedVariable(name)` | Variável local declarada mas não referenciada | +| `MayBeUninitialized(name)`| Variável lida sem antes ter garantido inicialização (por declaração ou atribuição) | --- ## Limitações atuais (TODO) -- Verificação de tipo de retorno de função (`return expr` vs. tipo declarado) -- Lookup de tipo de retorno em chamadas de função (`Call` retorna `void` sentinela) -- Verificação de compatibilidade entre ramos `then`/`else` no ternário - Aritmética de ponteiro para `Sub` (ponteiro − ponteiro → `ptrdiff_t`) +- Constant folding robusto (hoje o compilador avalia as heurísticas baseado estaticamente na árvore do código, sem resolver valores em runtime na análise) diff --git a/src/analyser/semantic.rs b/src/analyser/semantic.rs index c7db0b9..553c908 100644 --- a/src/analyser/semantic.rs +++ b/src/analyser/semantic.rs @@ -16,6 +16,12 @@ pub struct SemanticAnalyser { pub current_fn_ret: Option, pub diagnostics: Vec, pub warnings: Vec, + /// Profundidade de loops aninhados (for/while/do-while). + /// Usada para validar `continue` e `break`. + pub loop_depth: usize, + /// Profundidade de `switch` aninhados. + /// Usada para validar `break` (switch aceita break, mas não continue). + pub switch_depth: usize, } impl SemanticAnalyser { @@ -25,6 +31,8 @@ impl SemanticAnalyser { current_fn_ret: None, diagnostics: Vec::new(), warnings: Vec::new(), + loop_depth: 0, + switch_depth: 0, } } @@ -168,7 +176,7 @@ impl SemanticAnalyser { } self.sym.enter_scope(); - let prev_ret = self.current_fn_ret.replace(resolved_ret); + let prev_ret = self.current_fn_ret.replace(resolved_ret.clone()); for (qty, name) in params { let resolved_qty = self.resolve_type(qty); @@ -193,6 +201,15 @@ impl SemanticAnalyser { self.analyse_stmt(stmt); } + // Aviso conservador: função não-void cujo corpo não termina com return. + if resolved_ret.ty != Type::Void && !body_always_returns(body) { + self.warnings + .push(CompilerWarning::Semantic(SemanticWarning { + span: span.clone(), + kind: SemanticWarningKind::MissingReturn(name.clone()), + })); + } + self.current_fn_ret = prev_ret; self.exit_scope_checking_unused(); } @@ -244,10 +261,23 @@ impl SemanticAnalyser { pub fn analyse_stmt(&mut self, stmt: &Stmt) { match stmt { Stmt::VarDecl(qty, name, init, span) => { - if let Some(expr) = init { - self.analyse_expr(expr); - } let resolved_qty = self.resolve_type(qty); + let initialized = if let Some(expr) = init { + let init_ty = self.analyse_expr(expr); + if !types_compatible_for_assign(&resolved_qty.ty, &init_ty.ty) { + self.diagnostics + .push(CompilerError::Semantic(SemanticError { + span: expr.span(), + kind: SemanticErrorKind::TypeMismatch { + expected: type_name(&resolved_qty.ty), + found: type_name(&init_ty.ty), + }, + })); + } + true + } else { + false + }; let symbol = Symbol { name: name.clone(), ty: resolved_qty.clone(), @@ -257,7 +287,7 @@ impl SemanticAnalyser { prototype_only: false, // Locais começam "não usadas"; a leitura marca `used = true`. used: false, - initialized: init.is_some(), + initialized, }; if let Err(e) = self.sym.declare(symbol) { self.diagnostics.push(e); @@ -287,7 +317,7 @@ impl SemanticAnalyser { } (Some(expected), Some(e)) => { let found_expr_qty = self.analyse_expr(e); - if expected.ty != found_expr_qty.ty { + if !types_compatible_for_assign(&expected.ty, &found_expr_qty.ty) { self.diagnostics .push(CompilerError::Semantic(SemanticError { span: span.clone(), @@ -320,10 +350,14 @@ impl SemanticAnalyser { } Stmt::While(cond, body, _) => { self.analyse_expr(cond); + self.loop_depth += 1; self.analyse_stmt(body); + self.loop_depth -= 1; } Stmt::DoWhile(cond, body, _) => { + self.loop_depth += 1; self.analyse_stmt(body); + self.loop_depth -= 1; self.analyse_expr(cond); } Stmt::For(init, cond, inc, body, _) => { @@ -337,18 +371,56 @@ impl SemanticAnalyser { if let Some(e) = inc { self.analyse_expr(e); } + self.loop_depth += 1; self.analyse_stmt(body); + self.loop_depth -= 1; self.exit_scope_checking_unused(); } - Stmt::Switch(expr, cases, _) => { - self.analyse_expr(expr); + Stmt::Switch(expr, cases, span) => { + let disc_ty = self.analyse_expr(expr); + // Em C, o discriminante do switch deve ser de tipo inteiro ou enum. + let is_switch_ok = matches!( + disc_ty.ty, + Type::Int | Type::Long | Type::Short | Type::Char | Type::Enum(_) + ); + if !is_switch_ok { + self.diagnostics + .push(CompilerError::Semantic(SemanticError { + span: span.clone(), + kind: SemanticErrorKind::InvalidSwitchType { + found: type_name(&disc_ty.ty), + }, + })); + } + self.switch_depth += 1; for case in cases { + if let crate::common::ast::stmt::SwitchLabel::Case(case_expr) = &case.label { + self.analyse_expr(case_expr); + } for s in &case.stmts { self.analyse_stmt(s); } } + self.switch_depth -= 1; + } + Stmt::Break(span) => { + if self.loop_depth == 0 && self.switch_depth == 0 { + self.diagnostics + .push(CompilerError::Semantic(SemanticError { + span: span.clone(), + kind: SemanticErrorKind::BreakOutsideLoop, + })); + } + } + Stmt::Continue(span) => { + if self.loop_depth == 0 { + self.diagnostics + .push(CompilerError::Semantic(SemanticError { + span: span.clone(), + kind: SemanticErrorKind::ContinueOutsideLoop, + })); + } } - Stmt::Break(_) | Stmt::Continue(_) => {} } } @@ -422,7 +494,28 @@ impl SemanticAnalyser { } } } - Expr::Unary(_, e, _) | Expr::Prefix(_, e, _) | Expr::Postfix(_, e, _) => { + Expr::Unary(op, e, _) => { + let inner_ty = self.analyse_expr(e); + match op { + crate::common::ast::expr::UnOp::AddrOf => QualifierType { + ty: Type::Pointer(Box::new(inner_ty.ty)), + is_const: false, + is_unsigned: false, + }, + crate::common::ast::expr::UnOp::Deref => { + match inner_ty.ty { + Type::Pointer(base) | Type::Array(base) => QualifierType { + ty: *base, + is_const: inner_ty.is_const, + is_unsigned: inner_ty.is_unsigned, + }, + _ => inner_ty, + } + } + _ => inner_ty, + } + } + Expr::Prefix(_, e, _) | Expr::Postfix(_, e, _) => { self.analyse_expr(e) } Expr::CompoundAssign(_, lhs, rhs, _) => { @@ -747,6 +840,35 @@ pub fn analyse(prog: &Program) -> Vec { .collect() } +/// Heurística conservadora: retorna `true` se o último statement do corpo é +/// `Stmt::Return(Some(_))` ou se algum statement de nível de topo claramente +/// sempre retorna (ex.: `if` com then E else ambos retornando). +/// +/// Não tenta análise de fluxo completa; apenas evita falsos positivos nas +/// situações mais comuns de funções de disciplina. +fn body_always_returns(stmts: &[Stmt]) -> bool { + match stmts.last() { + Some(Stmt::Return(Some(_), _)) => true, + Some(Stmt::If(_, then, Some(else_), _)) => { + stmt_always_returns(then) && stmt_always_returns(else_) + } + Some(Stmt::Block(inner, _)) => body_always_returns(inner), + _ => false, + } +} + +/// Verifica se um único statement sempre retorna (auxiliar de `body_always_returns`). +fn stmt_always_returns(stmt: &Stmt) -> bool { + match stmt { + Stmt::Return(Some(_), _) => true, + Stmt::Block(stmts, _) => body_always_returns(stmts), + Stmt::If(_, then, Some(else_), _) => { + stmt_always_returns(then) && stmt_always_returns(else_) + } + _ => false, + } +} + fn infer_literal_type(lit: &Literal) -> QualifierType { let ty = match lit { Literal::Int(_) => Type::Int, diff --git a/src/common/errors/types.rs b/src/common/errors/types.rs index 7a3f3b2..deccb78 100644 --- a/src/common/errors/types.rs +++ b/src/common/errors/types.rs @@ -206,6 +206,14 @@ pub enum SemanticErrorKind { NotIndexable { found: String, }, + /// Expressão do `switch` não é de tipo inteiro. + InvalidSwitchType { + found: String, + }, + /// `break` usado fora de loop ou `switch`. + BreakOutsideLoop, + /// `continue` usado fora de loop. + ContinueOutsideLoop, } #[derive(Debug)] @@ -309,6 +317,32 @@ impl ToReport for SemanticError { self.span.clone(), format!("'{}' não é indexável (esperado array ou ponteiro)", found), ), + SemanticErrorKind::InvalidSwitchType { found } => { + Report::new("invalid switch expression type") + .with_span(self.span.clone()) + .with_label( + self.span.clone(), + format!( + "expressão do switch deve ser inteira, encontrado '{}'", + found + ), + ) + .with_help("use int, char, short, long ou enum como discriminante do switch") + } + SemanticErrorKind::BreakOutsideLoop => Report::new("break outside loop or switch") + .with_span(self.span.clone()) + .with_label( + self.span.clone(), + "'break' só pode ser usado dentro de loop ou switch".to_string(), + ) + .with_help("mova o 'break' para dentro de um for, while, do-while ou switch"), + SemanticErrorKind::ContinueOutsideLoop => Report::new("continue outside loop") + .with_span(self.span.clone()) + .with_label( + self.span.clone(), + "'continue' só pode ser usado dentro de loop".to_string(), + ) + .with_help("mova o 'continue' para dentro de um for, while ou do-while"), } } } @@ -326,6 +360,8 @@ pub enum SemanticWarningKind { UnusedVariable(String), /// Variável lida antes de receber qualquer inicializador ou atribuição. MayBeUninitialized(String), + /// Função não-void sem `return` detectado no caminho de saída principal. + MissingReturn(String), } #[derive(Debug)] @@ -355,6 +391,16 @@ impl ToReport for SemanticWarning { ) .with_help("inicialize a variavel antes de usa-la") } + SemanticWarningKind::MissingReturn(fn_name) => Report::new("missing return") + .with_span(self.span.clone()) + .with_label( + self.span.clone(), + format!( + "função '{}' não-void pode encerrar sem retornar valor", + fn_name + ), + ) + .with_help("adicione um 'return ;' ao final da função"), } } } diff --git a/src/tests/analyzer_test.rs b/src/tests/analyzer_test.rs index 614c68b..eff9392 100644 --- a/src/tests/analyzer_test.rs +++ b/src/tests/analyzer_test.rs @@ -147,6 +147,7 @@ mod test { #[test] fn test_return_type_mismatch_error() { + // int f() { return "texto"; } → incompatível (char* não é conversível para int) let mut analyser = SemanticAnalyser::new(); let span = dummy_span(); @@ -156,7 +157,7 @@ mod test { is_unsigned: false, }; - let expr_errada = Expr::Literal(Literal::Double(2.5), span.clone()); + let expr_errada = Expr::Literal(Literal::String("texto".to_string()), span.clone()); let stmt_return = Stmt::Return(Some(expr_errada), span.clone()); let funcao_ast = Decl::Function( @@ -174,7 +175,7 @@ mod test { assert_eq!( analyser.diagnostics.len(), 1, - "Deveria ter detectado incopatibilidade: Int x Double." + "Deveria ter detectado incompatibilidade: int x char* (return de string em função int)." ); } diff --git a/src/tests/semantic_test.rs b/src/tests/semantic_test.rs index 2e3b523..0438976 100644 --- a/src/tests/semantic_test.rs +++ b/src/tests/semantic_test.rs @@ -46,7 +46,7 @@ mod tests { fn program(stmts: Vec) -> Program { Program { decls: vec![Decl::Function( - qty(Type::Int), + qty(Type::Void), "main".into(), vec![], stmts, @@ -524,11 +524,13 @@ mod tests { #[test] fn unused_variable_emits_warning() { - // int x = 5; return 0; -> x declarada mas nunca lida - let prog = program(vec![ - Stmt::VarDecl(qty(Type::Int), "x".into(), Some(int_lit(5)), span()), - Stmt::Return(Some(int_lit(0)), span()), - ]); + // int x = 5; -> x declarada mas nunca lida + let prog = program(vec![Stmt::VarDecl( + qty(Type::Int), + "x".into(), + Some(int_lit(5)), + span(), + )]); let diags = analyse(&prog); assert!(errors(&diags).is_empty(), "não deve haver erros"); assert!( @@ -545,7 +547,7 @@ mod tests { fn used_variable_emits_no_warning() { let prog = program(vec![ Stmt::VarDecl(qty(Type::Int), "x".into(), Some(int_lit(5)), span()), - Stmt::Return(Some(ident("x")), span()), + Stmt::ExprStmt(ident("x"), span()), ]); let diags = analyse(&prog); assert!( @@ -579,10 +581,10 @@ mod tests { #[test] fn uninitialized_use_emits_warning() { - // int x; return x; -> x lida sem inicialização + // int x; _ = x; -> x lida sem inicialização let prog = program(vec![ Stmt::VarDecl(qty(Type::Int), "x".into(), None, span()), - Stmt::Return(Some(ident("x")), span()), + Stmt::ExprStmt(ident("x"), span()), ]); let diags = analyse(&prog); assert!(errors(&diags).is_empty(), "não deve haver erros"); @@ -598,10 +600,10 @@ mod tests { #[test] fn initialized_then_used_emits_no_warning() { - // int x = 0; return x; -> x inicializada, nenhum warning + // int x = 0; _ = x; -> x inicializada, nenhum warning let prog = program(vec![ Stmt::VarDecl(qty(Type::Int), "x".into(), Some(int_lit(0)), span()), - Stmt::Return(Some(ident("x")), span()), + Stmt::ExprStmt(ident("x"), span()), ]); assert!(analyse(&prog).is_empty()); } @@ -616,7 +618,7 @@ mod tests { qty(Type::Int), "f".into(), vec![(qty(Type::Int), "a".into())], - vec![], + vec![Stmt::Return(Some(ident("a")), span())], span(), ), Decl::Function( @@ -998,7 +1000,7 @@ mod tests { #[test] fn prototype_valid_forward_call_is_ok() { // int soma(int a, int b); - // int main() { return soma(1, 2); } + // void main() { soma(1, 2); } // int soma(int a, int b) { return a + b; } let prog = Program { decls: vec![ @@ -1009,15 +1011,15 @@ mod tests { span(), ), Decl::Function( - qty(Type::Int), + qty(Type::Void), "main".into(), vec![], - vec![Stmt::Return( - Some(Expr::Call( + vec![Stmt::ExprStmt( + Expr::Call( Box::new(ident("soma")), vec![int_lit(1), int_lit(2)], span(), - )), + ), span(), )], span(), @@ -1026,7 +1028,7 @@ mod tests { qty(Type::Int), "soma".into(), vec![(qty(Type::Int), "a".into()), (qty(Type::Int), "b".into())], - vec![], + vec![Stmt::Return(Some(ident("a")), span())], span(), ), ], @@ -1178,4 +1180,313 @@ mod tests { "atribuição antes do uso inicializa a variável" ); } + + // ── return: verificação de tipo com coerção numérica ────────────────────── + + #[test] + fn return_int_from_long_fn_is_ok() { + // long f() { return 1; } → coerção int→long permitida + let prog = Program { + decls: vec![Decl::Function( + qty(Type::Long), + "f".into(), + vec![], + vec![Stmt::Return(Some(int_lit(1)), span())], + span(), + )], + }; + assert!( + errors(&analyse(&prog)).is_empty(), + "return int em função long deve ser válido (coerção numérica)" + ); + } + + #[test] + fn return_string_from_int_fn_emits_error() { + // int f() { return "oi"; } → incompatível + let prog = Program { + decls: vec![Decl::Function( + qty(Type::Int), + "f".into(), + vec![], + vec![Stmt::Return( + Some(Expr::Literal(Literal::String("oi".into()), span())), + span(), + )], + span(), + )], + }; + let diags = analyse(&prog); + assert!( + errors(&diags).iter().any(|e| matches!( + e, + CompilerError::Semantic(se) + if matches!(&se.kind, SemanticErrorKind::TypeMismatch { .. }) + )), + "return char* em função int deve emitir TypeMismatch" + ); + } + + #[test] + fn return_missing_in_non_void_fn_emits_warning() { + // int f() { } → aviso de missing return + let prog = Program { + decls: vec![Decl::Function( + qty(Type::Int), + "f".into(), + vec![], + vec![], + span(), + )], + }; + let diags = analyse(&prog); + assert!( + errors(&diags).is_empty(), + "missing return não deve ser um erro" + ); + assert!( + diags.iter().any(|d| matches!( + d, + Diagnostic::Warning(crate::common::errors::types::CompilerWarning::Semantic(w)) + if matches!(&w.kind, SemanticWarningKind::MissingReturn(n) if n == "f") + )), + "função int sem return deve emitir MissingReturn" + ); + } + + #[test] + fn return_void_fn_no_return_is_ok() { + // void f() { } → sem aviso de missing return + let prog = Program { + decls: vec![Decl::Function( + qty(Type::Void), + "f".into(), + vec![], + vec![], + span(), + )], + }; + let diags = analyse(&prog); + assert!( + !diags.iter().any(|d| matches!( + d, + Diagnostic::Warning(crate::common::errors::types::CompilerWarning::Semantic(w)) + if matches!(&w.kind, SemanticWarningKind::MissingReturn(_)) + )), + "função void sem return não deve emitir MissingReturn" + ); + } + + #[test] + fn return_with_explicit_return_suppresses_missing_return_warning() { + // int f() { return 0; } → sem aviso + let prog = Program { + decls: vec![Decl::Function( + qty(Type::Int), + "f".into(), + vec![], + vec![Stmt::Return(Some(int_lit(0)), span())], + span(), + )], + }; + let diags = analyse(&prog); + assert!( + !diags.iter().any(|d| matches!( + d, + Diagnostic::Warning(crate::common::errors::types::CompilerWarning::Semantic(w)) + if matches!(&w.kind, SemanticWarningKind::MissingReturn(_)) + )), + "função com return explícito não deve emitir MissingReturn" + ); + } + + // ── switch: tipo do discriminante ───────────────────────────────────────── + + #[test] + fn switch_int_expr_is_ok() { + let prog = program(vec![Stmt::Switch(int_lit(1), vec![], span())]); + assert!( + errors(&analyse(&prog)).is_empty(), + "switch(int) deve ser válido" + ); + } + + #[test] + fn switch_float_expr_emits_invalid_switch_type() { + let prog = program(vec![Stmt::Switch( + Expr::Literal(Literal::Double(1.5), span()), + vec![], + span(), + )]); + let diags = analyse(&prog); + assert!( + errors(&diags).iter().any(|e| matches!( + e, + CompilerError::Semantic(se) + if matches!(&se.kind, SemanticErrorKind::InvalidSwitchType { .. }) + )), + "switch(double) deve emitir InvalidSwitchType" + ); + } + + #[test] + fn switch_case_exprs_are_analysed() { + // switch(x) { case y: break; } onde y é indefinido → UndefinedVariable + let prog = program(vec![ + Stmt::VarDecl(qty(Type::Int), "x".into(), Some(int_lit(0)), span()), + Stmt::Switch( + ident("x"), + vec![crate::common::ast::stmt::SwitchCase { + label: crate::common::ast::stmt::SwitchLabel::Case(ident("y_undefined")), + stmts: vec![Stmt::Break(span())], + span: span(), + }], + span(), + ), + ]); + let diags = analyse(&prog); + assert!( + errors(&diags).iter().any(|e| matches!( + e, + CompilerError::Semantic(se) + if matches!(&se.kind, SemanticErrorKind::UndefinedVariable(n) if n == "y_undefined") + )), + "expressão de case indefinida deve emitir UndefinedVariable" + ); + } + + // ── break / continue fora de contexto ──────────────────────────────────── + + #[test] + fn break_inside_while_is_ok() { + let prog = program(vec![Stmt::While( + int_lit(1), + Box::new(Stmt::Break(span())), + span(), + )]); + assert!( + errors(&analyse(&prog)).is_empty(), + "break dentro de while deve ser válido" + ); + } + + #[test] + fn break_inside_switch_is_ok() { + let prog = program(vec![Stmt::Switch( + int_lit(0), + vec![crate::common::ast::stmt::SwitchCase { + label: crate::common::ast::stmt::SwitchLabel::Default, + stmts: vec![Stmt::Break(span())], + span: span(), + }], + span(), + )]); + assert!( + errors(&analyse(&prog)).is_empty(), + "break dentro de switch deve ser válido" + ); + } + + #[test] + fn break_outside_loop_emits_error() { + let prog = program(vec![Stmt::Break(span())]); + let diags = analyse(&prog); + assert!( + errors(&diags).iter().any(|e| matches!( + e, + CompilerError::Semantic(se) + if matches!(&se.kind, SemanticErrorKind::BreakOutsideLoop) + )), + "break fora de loop/switch deve emitir BreakOutsideLoop" + ); + } + + #[test] + fn continue_inside_for_is_ok() { + let prog = program(vec![Stmt::For( + None, + Some(int_lit(1)), + None, + Box::new(Stmt::Continue(span())), + span(), + )]); + assert!( + errors(&analyse(&prog)).is_empty(), + "continue dentro de for deve ser válido" + ); + } + + #[test] + fn continue_outside_loop_emits_error() { + let prog = program(vec![Stmt::Continue(span())]); + let diags = analyse(&prog); + assert!( + errors(&diags).iter().any(|e| matches!( + e, + CompilerError::Semantic(se) + if matches!(&se.kind, SemanticErrorKind::ContinueOutsideLoop) + )), + "continue fora de loop deve emitir ContinueOutsideLoop" + ); + } + + #[test] + fn continue_inside_switch_emits_error() { + // `continue` dentro de switch sem loop externo é inválido em C + let prog = program(vec![Stmt::Switch( + int_lit(0), + vec![crate::common::ast::stmt::SwitchCase { + label: crate::common::ast::stmt::SwitchLabel::Default, + stmts: vec![Stmt::Continue(span())], + span: span(), + }], + span(), + )]); + let diags = analyse(&prog); + assert!( + errors(&diags).iter().any(|e| matches!( + e, + CompilerError::Semantic(se) + if matches!(&se.kind, SemanticErrorKind::ContinueOutsideLoop) + )), + "continue dentro de switch (sem loop externo) deve emitir ContinueOutsideLoop" + ); + } + + // ── VarDecl local: verificação de tipo do inicializador ─────────────────── + + #[test] + fn local_var_init_type_mismatch_emits_error() { + // int x = "hello"; → TypeMismatch + let prog = program(vec![Stmt::VarDecl( + qty(Type::Int), + "x".into(), + Some(Expr::Literal(Literal::String("hello".into()), span())), + span(), + )]); + let diags = analyse(&prog); + assert!( + errors(&diags).iter().any(|e| matches!( + e, + CompilerError::Semantic(se) + if matches!(&se.kind, SemanticErrorKind::TypeMismatch { .. }) + )), + "int x = string deve emitir TypeMismatch" + ); + } + + #[test] + fn local_var_init_numeric_coercion_is_ok() { + // int x = 1.5; → coerção double→int, válido em C + let prog = program(vec![Stmt::VarDecl( + qty(Type::Int), + "x".into(), + Some(Expr::Literal(Literal::Double(1.5), span())), + span(), + )]); + assert!( + errors(&analyse(&prog)).is_empty(), + "int x = 1.5 deve ser válido (coerção numérica implícita)" + ); + } } From e0ae49745493b7a3294e40d537e3ee1ec799e07c Mon Sep 17 00:00:00 2001 From: Philipe Caetano Date: Tue, 23 Jun 2026 16:12:43 -0300 Subject: [PATCH 56/91] feat/add funtions compute_invariants and is_operand_stable: invariants logic --- src/codegen/inter/opt/licm.rs | 125 ++++++++++++++++++++++++++++++++-- 1 file changed, 120 insertions(+), 5 deletions(-) diff --git a/src/codegen/inter/opt/licm.rs b/src/codegen/inter/opt/licm.rs index da4286d..b69cfc6 100644 --- a/src/codegen/inter/opt/licm.rs +++ b/src/codegen/inter/opt/licm.rs @@ -1,6 +1,7 @@ use super::OptPass; use crate::codegen::inter::{Cfg, BlockId}; use std::collections::{HashMap, HashSet}; +use crate::ir::tac::{TacInstr, Operand, UnOp}; pub struct LoopInvariantCodeMotionPass; @@ -31,10 +32,11 @@ impl OptPass for LoopInvariantCodeMotionPass { for (header, tail) in back_edge { let loop_body = self.get_loop_body(cfg, header, tail); - prtinln!("Laço detectado! Header {:?} Tail {:?}", header, tail); - println!("Blocos pertencentes ao laço: {:?}", loop_body); + println!("Laço detectado! Header {:?} Tail {:?}", header, tail); + println("Blocos pertencentes ao laço: {:?}", loop_body); - todo!(); + let invariants = self.compute_invariants(cfg, &loop_body); + println!("Operandos invariantes encontrados: {:?}", invariants); } mutated } @@ -96,11 +98,124 @@ impl LoopInvariantCodeMotionPass{ while let Some(node) = stack.pop(){ for pred in cfg.get_predecessors(node){ if !loop_body.contains(&pred){ - loop_body.insert(&pred); + loop_body.insert(pred); stack.push(pred); } } } loop_body } -} \ No newline at end of file + +fn compute_invariants(&self, cfg: &Cfg, loop_body: &HashSet) -> HashSet { + let mut invariants = HashSet::new(); + let mut changed = true; + + while changed { + changed = false; + + for &block_id in loop_body { + let cfg_block = cfg.blocks.get(&block_id).unwrap(); + + for inst in &cfg_block.instructions { + match inst { + // 1. Operações Binárias (ex: t0 = t1 + t2) + TacInstr::BinOp { dst, lhs, rhs, .. } => { + let dst_operand = Operand::Temp(*dst); + if invariants.contains(&dst_operand) { continue; } + + if self.is_operand_stable(cfg, lhs, loop_body, &invariants) && + self.is_operand_stable(cfg, rhs, loop_body, &invariants) { + + invariants.insert(dst_operand); + changed = true; + } + } + + // 2. Operações Unárias (ex: t0 = -t1) + TacInstr::UnOp { dst, op, src } => { + let dst_operand = Operand::Temp(*dst); + if invariants.contains(&dst_operand) { continue; } + + // Proteção: Desreferenciar (*p) ou pegar endereço (&x) pode ter efeitos colaterais + if matches!(op, UnOp::Deref | UnOp::AddrOf) { continue; } + + if self.is_operand_stable(cfg, src, loop_body, &invariants) { + invariants.insert(dst_operand); + changed = true; + } + } + + // 3. Cópias / Atribuições (ex: t0 = 5 ou t0 = t1) + TacInstr::Copy { dst, src } => { + if invariants.contains(dst) { continue; } + + if self.is_operand_stable(cfg, src, loop_body, &invariants) { + invariants.insert(dst.clone()); + changed = true; + } + } + + // Call, Return, Jump, Label não geram valores invariantes seguros para mover + _ => {} + } + } + } + } + invariants + } + + fn is_operand_stable( + &self, + cfg: &Cfg, + op: &Operand, + loop_body: &HashSet, + invariants: &HashSet + ) -> bool { + match op { + // Se for uma constante literal, é sempre estável! + Operand::Const(_) => true, + + // Se for uma variável (Temp ou Var), precisamos ver de onde ela veio + Operand::Temp(_) | Operand::Var(_) => { + // Se a instrução que a criou já foi marcada como invariante + if invariants.contains(op) { + return true; + } + + // Se ela foi definida FORA do laço, é estável + self.is_defined_outside_loop(cfg, op, loop_body) + } + } + } + + // A função que rastreia se o valor nasceu fora do laço + fn is_defined_outside_loop( + &self, + cfg: &Cfg, + op: &Operand, + loop_body: &HashSet + ) -> bool { + // Varrer apenas os blocos de DENTRO do laço + for &block_id in loop_body { + let block = cfg.blocks.get(&block_id).unwrap(); + + for inst in &block.instructions { + match inst { + TacInstr::BinOp { dst, .. } | + TacInstr::UnOp { dst, .. } if &Operand::Temp(*dst) == op => { + return false; // Nasceu DENTRO do laço + } + TacInstr::Copy { dst, .. } if dst == op => { + return false; // Nasceu DENTRO do laço + } + TacInstr::Call { dst: Some(dst), .. } if &Operand::Temp(*dst) == op => { + return false; // Nasceu DENTRO do laço + } + _ => {} + } + } + } + + true + } +} \ No newline at end of file From 2a1b2daedefd340b7454ff4de650af2c721554e5 Mon Sep 17 00:00:00 2001 From: guxvr Date: Tue, 23 Jun 2026 16:20:06 -0300 Subject: [PATCH 57/91] style: fix code formatting after merge --- src/analyser/semantic.rs | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/analyser/semantic.rs b/src/analyser/semantic.rs index de51128..1c95af1 100644 --- a/src/analyser/semantic.rs +++ b/src/analyser/semantic.rs @@ -512,22 +512,18 @@ impl SemanticAnalyser { is_const: false, is_unsigned: false, }, - crate::common::ast::expr::UnOp::Deref => { - match inner_ty.ty { - Type::Pointer(base) | Type::Array(base) => QualifierType { - ty: *base, - is_const: inner_ty.is_const, - is_unsigned: inner_ty.is_unsigned, - }, - _ => inner_ty, - } - } + crate::common::ast::expr::UnOp::Deref => match inner_ty.ty { + Type::Pointer(base) | Type::Array(base) => QualifierType { + ty: *base, + is_const: inner_ty.is_const, + is_unsigned: inner_ty.is_unsigned, + }, + _ => inner_ty, + }, _ => inner_ty, } } - Expr::Prefix(_, e, _) | Expr::Postfix(_, e, _) => { - self.analyse_expr(e) - } + Expr::Prefix(_, e, _) | Expr::Postfix(_, e, _) => self.analyse_expr(e), Expr::CompoundAssign(_, lhs, rhs, _) => { let lhs_ty = self.analyse_expr(lhs); self.analyse_expr(rhs); From 0823e9e5b1b67144beea2d1073c88a1bd941dd49 Mon Sep 17 00:00:00 2001 From: Philipe Caetano Date: Tue, 23 Jun 2026 16:48:48 -0300 Subject: [PATCH 58/91] feat/LICM implementation --- src/codegen/inter/opt/licm.rs | 362 +++++++++++++++++++++++----------- tests/licm_test.rs | 175 ++++++++++++++++ 2 files changed, 417 insertions(+), 120 deletions(-) create mode 100644 tests/licm_test.rs diff --git a/src/codegen/inter/opt/licm.rs b/src/codegen/inter/opt/licm.rs index b69cfc6..ac64aea 100644 --- a/src/codegen/inter/opt/licm.rs +++ b/src/codegen/inter/opt/licm.rs @@ -1,7 +1,6 @@ use super::OptPass; -use crate::codegen::inter::{Cfg, BlockId}; +use crate::codegen::inter::{Cfg, BasicBlock, Instruction, Value}; use std::collections::{HashMap, HashSet}; -use crate::ir::tac::{TacInstr, Operand, UnOp}; pub struct LoopInvariantCodeMotionPass; @@ -11,63 +10,146 @@ impl OptPass for LoopInvariantCodeMotionPass { } fn run(&self, cfg: &mut Cfg) -> bool { - let dominators = self.compute_dominators(cfg); + let predecessors = get_predecessors(cfg); + let dominators = self.compute_dominators(cfg, &predecessors); - let mut mutated = false; - let mut back_edge = Vec::new(); // aresta que liga o bloco dominante a dominado + let mut back_edges = Vec::new(); + for (i, block) in cfg.blocks.iter().enumerate() { + for &succ in &block.successors { + if let Some(doms) = dominators.get(&i) { + if doms.contains(&succ) { + back_edges.push((succ, i)); // (header, tail) + } + } + } + } + + for (header, tail) in back_edges { + let loop_body = self.get_loop_body(&predecessors, header, tail); + let invariants = self.compute_invariants(cfg, &loop_body, &dominators); + + if !invariants.is_empty() { + // Collect invariant destinations + let invariants_set: HashSet = invariants + .iter() + .filter_map(|inst| match inst { + Instruction::Assign { dst, .. } | Instruction::Binary { dst, .. } => { + Some(dst.clone()) + } + _ => None, + }) + .collect(); + + // 1. Remove the invariant instructions from the loop body blocks + for &block_id in &loop_body { + cfg.blocks[block_id].instructions.retain(|inst| match inst { + Instruction::Assign { dst, .. } | Instruction::Binary { dst, .. } => { + !invariants_set.contains(dst) + } + _ => true, + }); + } + + // 2. Create the preheader + let header_label = &cfg.blocks[header].label; + let preheader_label = format!("{}_preheader", header_label); + let mut preheader = BasicBlock::new(preheader_label); + preheader.instructions = invariants; + preheader.successors.push(header + 1); + + // 3. Find outside predecessors + let outside_preds: Vec = predecessors[header] + .iter() + .copied() + .filter(|pred| !loop_body.contains(pred)) + .collect(); + + // 4. Insert the preheader block at index `header` + cfg.blocks.insert(header, preheader); + + // 5. Update successors in other blocks + for (i, b) in cfg.blocks.iter_mut().enumerate() { + if i == header { + continue; + } + for succ in &mut b.successors { + if *succ >= header { + *succ += 1; + } + } + } - for &block in cfg.blocks.keys(){ - if let Some(cfg_block) = cfg.blocks.get(&block){ - for &succ in &cfg_block.successors { - if let Some(doms) = dominators.get(&block){ - if doms.contains(&succ){ - back_edge.push((succ, block)); + // 6. Redirect outside predecessors + for pred in outside_preds { + let new_pred_idx = if pred >= header { pred + 1 } else { pred }; + let pred_block = &mut cfg.blocks[new_pred_idx]; + for succ in &mut pred_block.successors { + if *succ == header + 1 { + *succ = header; } } } + return true; } } - for (header, tail) in back_edge { - let loop_body = self.get_loop_body(cfg, header, tail); + false + } +} - println!("Laço detectado! Header {:?} Tail {:?}", header, tail); - println("Blocos pertencentes ao laço: {:?}", loop_body); +fn get_predecessors(cfg: &Cfg) -> Vec> { + let mut preds = vec![Vec::new(); cfg.blocks.len()]; + for (i, block) in cfg.blocks.iter().enumerate() { + for &succ in &block.successors { + if succ < preds.len() { + preds[succ].push(i); + } + } + } + preds +} - let invariants = self.compute_invariants(cfg, &loop_body); - println!("Operandos invariantes encontrados: {:?}", invariants); +fn uses_variable(inst: &Instruction, name: &str) -> bool { + match inst { + Instruction::Assign { value, .. } => { + matches!(value, Value::Temp(n) if n == name) } - mutated + Instruction::Binary { lhs, rhs, .. } => { + matches!(lhs, Value::Temp(n) if n == name) || matches!(rhs, Value::Temp(n) if n == name) + } + Instruction::Nop => false, } } -impl LoopInvariantCodeMotionPass{ - fn compute_dominators(&self, cfg: &Cfg) -> HashMap>{ - let mut dominators: HashMap> = HashMap::new(); - let all_blocks: HashSet = cfg.blocks.keys().cloned().collect(); - let entry = cfg.entry_block; +impl LoopInvariantCodeMotionPass { + fn compute_dominators( + &self, + cfg: &Cfg, + predecessors: &[Vec], + ) -> HashMap> { + let mut dominators: HashMap> = HashMap::new(); + let all_blocks: HashSet = (0..cfg.blocks.len()).collect(); + let entry = 0; dominators.insert(entry, vec![entry].into_iter().collect()); - for &block in &all_blocks{ - if block != entry { - dominators.insert(block, all_blocks.clone()); // (key, value) se já existi no dicionario subtitui - } + for block in 1..cfg.blocks.len() { + dominators.insert(block, all_blocks.clone()); } let mut changed = true; while changed { changed = false; - for &block in &all_blocks{ - if block == entry {continue;} - - let preds = cfg.get_predecessors(block); - if preds.is_empty() {continue;} + for block in 1..cfg.blocks.len() { + let preds = &predecessors[block]; + if preds.is_empty() { + continue; + } let mut current_intersection = dominators.get(&preds[0]).cloned().unwrap_or_default(); - for pred in preds.iter().skip(1) { - if let Some(pred_doms) = dominators.get(pred) { + for &pred in preds.iter().skip(1) { + if let Some(pred_doms) = dominators.get(&pred) { current_intersection = current_intersection .intersection(pred_doms) .cloned() @@ -82,22 +164,25 @@ impl LoopInvariantCodeMotionPass{ changed = true; } } - - } + } dominators - } - fn get_loop_body(&self, cfg: &Cfg, header: BlockId, tail: BlockId) -> HashSet { + fn get_loop_body( + &self, + predecessors: &[Vec], + header: usize, + tail: usize, + ) -> HashSet { let mut loop_body = HashSet::new(); loop_body.insert(header); loop_body.insert(tail); let mut stack = vec![tail]; - while let Some(node) = stack.pop(){ - for pred in cfg.get_predecessors(node){ - if !loop_body.contains(&pred){ + while let Some(node) = stack.pop() { + for &pred in &predecessors[node] { + if !loop_body.contains(&pred) { loop_body.insert(pred); stack.push(pred); } @@ -106,116 +191,153 @@ impl LoopInvariantCodeMotionPass{ loop_body } -fn compute_invariants(&self, cfg: &Cfg, loop_body: &HashSet) -> HashSet { - let mut invariants = HashSet::new(); - let mut changed = true; + fn compute_invariants( + &self, + cfg: &Cfg, + loop_body: &HashSet, + dominators: &HashMap>, + ) -> Vec { + let mut invariants_set: HashSet = HashSet::new(); + let mut invariant_instrs: Vec = Vec::new(); + let mut def_counts = HashMap::new(); + for &block_id in loop_body { + for inst in &cfg.blocks[block_id].instructions { + match inst { + Instruction::Assign { dst, .. } | Instruction::Binary { dst, .. } => { + *def_counts.entry(dst.clone()).or_insert(0) += 1; + } + _ => {} + } + } + } + + let mut changed = true; while changed { changed = false; - for &block_id in loop_body { - let cfg_block = cfg.blocks.get(&block_id).unwrap(); - - for inst in &cfg_block.instructions { + for inst in &cfg.blocks[block_id].instructions { match inst { - // 1. Operações Binárias (ex: t0 = t1 + t2) - TacInstr::BinOp { dst, lhs, rhs, .. } => { - let dst_operand = Operand::Temp(*dst); - if invariants.contains(&dst_operand) { continue; } - - if self.is_operand_stable(cfg, lhs, loop_body, &invariants) && - self.is_operand_stable(cfg, rhs, loop_body, &invariants) { - - invariants.insert(dst_operand); - changed = true; + Instruction::Assign { dst, value } => { + if invariants_set.contains(dst) { + continue; } - } - - // 2. Operações Unárias (ex: t0 = -t1) - TacInstr::UnOp { dst, op, src } => { - let dst_operand = Operand::Temp(*dst); - if invariants.contains(&dst_operand) { continue; } - - // Proteção: Desreferenciar (*p) ou pegar endereço (&x) pode ter efeitos colaterais - if matches!(op, UnOp::Deref | UnOp::AddrOf) { continue; } - - if self.is_operand_stable(cfg, src, loop_body, &invariants) { - invariants.insert(dst_operand); - changed = true; + if def_counts.get(dst) != Some(&1) { + continue; + } + if self.is_value_stable(value, loop_body, &invariants_set, cfg) { + if self.is_safe_to_move(dst, block_id, loop_body, dominators, cfg) { + invariants_set.insert(dst.clone()); + invariant_instrs.push(inst.clone()); + changed = true; + } } } - - // 3. Cópias / Atribuições (ex: t0 = 5 ou t0 = t1) - TacInstr::Copy { dst, src } => { - if invariants.contains(dst) { continue; } - - if self.is_operand_stable(cfg, src, loop_body, &invariants) { - invariants.insert(dst.clone()); - changed = true; + Instruction::Binary { dst, op: _, lhs, rhs } => { + if invariants_set.contains(dst) { + continue; + } + if def_counts.get(dst) != Some(&1) { + continue; + } + if self.is_value_stable(lhs, loop_body, &invariants_set, cfg) + && self.is_value_stable(rhs, loop_body, &invariants_set, cfg) + { + if self.is_safe_to_move(dst, block_id, loop_body, dominators, cfg) { + invariants_set.insert(dst.clone()); + invariant_instrs.push(inst.clone()); + changed = true; + } } } - - // Call, Return, Jump, Label não geram valores invariantes seguros para mover _ => {} } } } } - invariants + invariant_instrs } - fn is_operand_stable( - &self, - cfg: &Cfg, - op: &Operand, - loop_body: &HashSet, - invariants: &HashSet + fn is_safe_to_move( + &self, + dst: &str, + def_block: usize, + loop_body: &HashSet, + dominators: &HashMap>, + cfg: &Cfg, ) -> bool { - match op { - // Se for uma constante literal, é sempre estável! - Operand::Const(_) => true, - - // Se for uma variável (Temp ou Var), precisamos ver de onde ela veio - Operand::Temp(_) | Operand::Var(_) => { - // Se a instrução que a criou já foi marcada como invariante - if invariants.contains(op) { - return true; + // 1. Find exit blocks of the loop + let mut exit_blocks = HashSet::new(); + for &block_id in loop_body { + let block = &cfg.blocks[block_id]; + for &succ in &block.successors { + if !loop_body.contains(&succ) { + exit_blocks.insert(block_id); } - - // Se ela foi definida FORA do laço, é estável - self.is_defined_outside_loop(cfg, op, loop_body) } } + + // 2. Check if def_block dominates all exit blocks + let dominates_all_exits = exit_blocks.iter().all(|&exit_block| { + if let Some(doms) = dominators.get(&exit_block) { + doms.contains(&def_block) + } else { + false + } + }); + + if dominates_all_exits { + return true; + } + + // 3. Check if variable is not used after/outside the loop + let mut used_after_loop = false; + for (i, block) in cfg.blocks.iter().enumerate() { + if !loop_body.contains(&i) { + for inst in &block.instructions { + if uses_variable(inst, dst) { + used_after_loop = true; + break; + } + } + } + } + + !used_after_loop } - // A função que rastreia se o valor nasceu fora do laço - fn is_defined_outside_loop( - &self, - cfg: &Cfg, - op: &Operand, - loop_body: &HashSet + fn is_value_stable( + &self, + val: &Value, + loop_body: &HashSet, + invariants: &HashSet, + cfg: &Cfg, ) -> bool { - // Varrer apenas os blocos de DENTRO do laço + match val { + Value::Int(_) => true, + Value::Temp(name) => { + if invariants.contains(name) { + return true; + } + self.is_defined_outside_loop(name, loop_body, cfg) + } + } + } + + fn is_defined_outside_loop(&self, name: &str, loop_body: &HashSet, cfg: &Cfg) -> bool { for &block_id in loop_body { - let block = cfg.blocks.get(&block_id).unwrap(); - + let block = &cfg.blocks[block_id]; for inst in &block.instructions { match inst { - TacInstr::BinOp { dst, .. } | - TacInstr::UnOp { dst, .. } if &Operand::Temp(*dst) == op => { - return false; // Nasceu DENTRO do laço - } - TacInstr::Copy { dst, .. } if dst == op => { - return false; // Nasceu DENTRO do laço - } - TacInstr::Call { dst: Some(dst), .. } if &Operand::Temp(*dst) == op => { - return false; // Nasceu DENTRO do laço + Instruction::Assign { dst, .. } | Instruction::Binary { dst, .. } => { + if dst == name { + return false; + } } _ => {} } } } - true } -} \ No newline at end of file +} \ No newline at end of file diff --git a/tests/licm_test.rs b/tests/licm_test.rs new file mode 100644 index 0000000..01661e0 --- /dev/null +++ b/tests/licm_test.rs @@ -0,0 +1,175 @@ +use crusty::codegen::inter::opt::{LoopInvariantCodeMotionPass, OptPass}; +use crusty::codegen::inter::{BasicBlock, Cfg, Instruction, Value, BinaryOp}; + +#[test] +fn test_licm_integration_basic() { + let mut cfg = Cfg::new(); + + // Block 0: entry + let mut entry = BasicBlock::new("entry"); + entry.instructions.push(Instruction::Assign { + dst: "x".to_string(), + value: Value::Int(10), + }); + entry.successors.push(1); + cfg.add_block(entry); + + // Block 1: loop header + let mut header = BasicBlock::new("header"); + header.instructions.push(Instruction::Binary { + dst: "t1".to_string(), + op: BinaryOp::Add, + lhs: Value::Int(2), + rhs: Value::Int(3), + }); + header.instructions.push(Instruction::Binary { + dst: "t2".to_string(), + op: BinaryOp::Add, + lhs: Value::Temp("t1".to_string()), + rhs: Value::Temp("x".to_string()), + }); + header.instructions.push(Instruction::Binary { + dst: "t3".to_string(), + op: BinaryOp::Add, + lhs: Value::Temp("t2".to_string()), + rhs: Value::Temp("y".to_string()), + }); + header.successors.push(2); + header.successors.push(3); + cfg.add_block(header); + + // Block 2: loop body + let mut body = BasicBlock::new("body"); + body.instructions.push(Instruction::Assign { + dst: "y".to_string(), + value: Value::Int(5), + }); + body.successors.push(1); + cfg.add_block(body); + + // Block 3: exit + let mut exit = BasicBlock::new("exit"); + exit.instructions.push(Instruction::Assign { + dst: "res".to_string(), + value: Value::Temp("y".to_string()), + }); + cfg.add_block(exit); + + let pass = LoopInvariantCodeMotionPass; + let mutated = pass.run(&mut cfg); + + assert!(mutated); + assert_eq!(cfg.blocks.len(), 5); + + // Preheader should be at index 1 + assert_eq!(cfg.blocks[1].label, "header_preheader"); + assert_eq!(cfg.blocks[1].instructions.len(), 2); + assert_eq!(cfg.blocks[1].successors, vec![2]); // points to header (now index 2) + + // Entry block should point to preheader (index 1) + assert_eq!(cfg.blocks[0].successors, vec![1]); + + // Loop body should point to header (index 2) + assert_eq!(cfg.blocks[3].successors, vec![2]); +} + +#[test] +fn test_licm_loop_with_invariant() { + let mut cfg = Cfg::new(); + + // Block 0: entry + let mut entry = BasicBlock::new("entry"); + entry.instructions.push(Instruction::Assign { + dst: "a".to_string(), + value: Value::Int(5), + }); + entry.instructions.push(Instruction::Assign { + dst: "b".to_string(), + value: Value::Int(10), + }); + entry.instructions.push(Instruction::Assign { + dst: "i".to_string(), + value: Value::Int(0), + }); + entry.instructions.push(Instruction::Assign { + dst: "result".to_string(), + value: Value::Int(0), + }); + entry.successors.push(1); + cfg.add_block(entry); + + // Block 1: loop header + let mut header = BasicBlock::new("header"); + header.instructions.push(Instruction::Binary { + dst: "t1".to_string(), + op: BinaryOp::Add, + lhs: Value::Temp("a".to_string()), + rhs: Value::Temp("b".to_string()), + }); + header.instructions.push(Instruction::Binary { + dst: "t2".to_string(), + op: BinaryOp::Mul, + lhs: Value::Temp("i".to_string()), + rhs: Value::Temp("t1".to_string()), + }); + header.instructions.push(Instruction::Binary { + dst: "result".to_string(), + op: BinaryOp::Add, + lhs: Value::Temp("result".to_string()), + rhs: Value::Temp("t2".to_string()), + }); + header.instructions.push(Instruction::Binary { + dst: "i".to_string(), + op: BinaryOp::Add, + lhs: Value::Temp("i".to_string()), + rhs: Value::Int(1), + }); + header.successors.push(2); + header.successors.push(3); + cfg.add_block(header); + + // Block 2: loop body/latch + let mut body = BasicBlock::new("body"); + body.instructions.push(Instruction::Nop); + body.successors.push(1); + cfg.add_block(body); + + // Block 3: exit + let mut exit = BasicBlock::new("exit"); + exit.instructions.push(Instruction::Assign { + dst: "res".to_string(), + value: Value::Temp("result".to_string()), + }); + cfg.add_block(exit); + + let pass = LoopInvariantCodeMotionPass; + let mutated = pass.run(&mut cfg); + + assert!(mutated); + assert_eq!(cfg.blocks.len(), 5); + + // Preheader should contain "t1 = a + b" + assert_eq!(cfg.blocks[1].label, "header_preheader"); + assert_eq!(cfg.blocks[1].instructions.len(), 1); + assert_eq!( + cfg.blocks[1].instructions[0], + Instruction::Binary { + dst: "t1".to_string(), + op: BinaryOp::Add, + lhs: Value::Temp("a".to_string()), + rhs: Value::Temp("b".to_string()), + } + ); + + // Header block (index 2) should no longer contain "t1 = a + b" + assert_eq!(cfg.blocks[2].instructions.len(), 3); + assert_eq!( + cfg.blocks[2].instructions[0], + Instruction::Binary { + dst: "t2".to_string(), + op: BinaryOp::Mul, + lhs: Value::Temp("i".to_string()), + rhs: Value::Temp("t1".to_string()), + } + ); +} From 24dade3f4f841734f74dae7639e0024ad6eb2ef7 Mon Sep 17 00:00:00 2001 From: Bappoz Date: Tue, 23 Jun 2026 17:42:29 -0300 Subject: [PATCH 59/91] fix: do not treat semantic warnings as fatal compile errors main.rs merged Diagnostic::Error and Diagnostic::Warning into a single count and failed the build on any non-empty list, even though Diagnostic already exposes is_error()/is_warning(). The new MissingReturn warning (#160) fires on common, valid code (any function not ending in a literal return), so this turned a documented warning into a hard compile failure. --- src/main.rs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/main.rs b/src/main.rs index a2c89cf..fd7b7bf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -273,11 +273,19 @@ fn run(source: SourceFile, args: &CliArgs) -> Result<(), Box> { } // ── Stage 3: Semantic ──────────────────────────────────────────────────── - let sem_errors = analyse_with_builtins(&program, scanner.builtins); - let sem_count = sem_errors.len(); + let sem_diagnostics = analyse_with_builtins(&program, scanner.builtins); + let sem_warnings: Vec<_> = sem_diagnostics.iter().filter(|d| d.is_warning()).collect(); + if !sem_warnings.is_empty() { + eprintln!("\n=== Semantic Warnings ({}) ===", sem_warnings.len()); + for w in &sem_warnings { + print_warning_report(&w.to_report()); + } + } + + let sem_count = sem_diagnostics.iter().filter(|d| d.is_error()).count(); if sem_count > 0 { eprintln!("\n=== Semantic Errors ({sem_count}) ==="); - for e in &sem_errors { + for e in sem_diagnostics.iter().filter(|d| d.is_error()) { print_report(&e.to_report()); } return Err(Box::new(DiagnosticError { count: sem_count })); @@ -435,7 +443,15 @@ fn dump_ast(program: &crusty::common::ast::ast::Program) { // ── Error reporting ─────────────────────────────────────────────────────────── fn print_report(report: &Report) { - eprintln!(" error: {}", report.message); + print_report_with_prefix(report, "error"); +} + +fn print_warning_report(report: &Report) { + print_report_with_prefix(report, "warning"); +} + +fn print_report_with_prefix(report: &Report, prefix: &str) { + eprintln!(" {prefix}: {}", report.message); if let Some(span) = &report.span { eprintln!(" --> {}:{}", span.line, span.column_start); } From 6bad6b5bcc7d15b8abfda19b0b37525f820aaad3 Mon Sep 17 00:00:00 2001 From: Philipe Caetano Date: Tue, 23 Jun 2026 18:08:01 -0300 Subject: [PATCH 60/91] =?UTF-8?q?feat:=20adiciona=20l=C3=B3gica=20da=20oti?= =?UTF-8?q?miza=C3=A7=C3=A3o=20peephole=20e=205=20padr=C3=B5es?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/codegen/last/mod.rs | 1 + src/codegen/last/peephole.rs | 117 +++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 src/codegen/last/peephole.rs diff --git a/src/codegen/last/mod.rs b/src/codegen/last/mod.rs index 2f1c6b4..aeabc85 100644 --- a/src/codegen/last/mod.rs +++ b/src/codegen/last/mod.rs @@ -10,5 +10,6 @@ pub mod abi; pub mod frame; pub mod x86_64; +pub mod peephole; pub use x86_64::{emit_function, emit_program}; diff --git a/src/codegen/last/peephole.rs b/src/codegen/last/peephole.rs new file mode 100644 index 0000000..bee5f0f --- /dev/null +++ b/src/codegen/last/peephole.rs @@ -0,0 +1,117 @@ +pub type AsmInstr = String; + +pub struct PeepholePass { + pub window: usize, +} + +impl PeepholePass { + pub fn new() -> Self { + Self { window: 2 } + } + + pub fn run(&self, instrs: &mut Vec) -> bool { + let mut overall_mutated = false; + let mut changed = true; + + while changed { + changed = false; + let mut optimized = Vec::new(); + let mut i = 0; + + while i < instrs.len() { + let l1 = &instrs[i]; + let t1 = l1.trim(); // Remove espaços antes e depois para facilitar a leitura + + // PADRÃO 3: Add/Sub por zero + // Exemplo gerado: addq $0, %rax ou subq $0, %rcx + if t1.starts_with("addq $0,") || t1.starts_with("subq $0,") { + i += 1; // Pula essa instrução (deleta) + changed = true; + continue; + } + + // Padrões que exigem olhar 2 instruções (Janela de 2) + if i + 1 < instrs.len() { + let l2 = &instrs[i + 1]; + let t2 = l2.trim(); + + // PADRÃO 1 e 2: Mov redundante E Load após Store no mesmo endereço + // No formato AT&T: "movq A, B" seguido de "movq B, A" + if t1.starts_with("movq ") && t2.starts_with("movq ") { + let p1: Vec<&str> = t1.split_whitespace().collect(); + let p2: Vec<&str> = t2.split_whitespace().collect(); + + // movq [1] [2] -> [1] tem uma vírgula no final + if p1.len() == 3 && p2.len() == 3 { + let src1 = p1[1].trim_end_matches(','); + let dst1 = p1[2]; + let src2 = p2[1].trim_end_matches(','); + let dst2 = p2[2]; + + // Se a origem do 1º for o destino do 2º e vice-versa + if src1 == dst2 && dst1 == src2 { + optimized.push(l1.clone()); // Mantém só a primeira + i += 2; // Pula a segunda + changed = true; + continue; + } + } + } + + // PADRÃO 5: Jump para instrução seguinte + // Exemplo: jmp .L_main_L1 \n .L_main_L1: + if t1.starts_with("jmp ") && t2.ends_with(':') { + let target = t1.strip_prefix("jmp ").unwrap(); + let label = t2.strip_suffix(':').unwrap(); + + if target == label { + optimized.push(l2.clone()); // Mantém só a Label (apaga o jmp) + i += 2; + changed = true; + continue; + } + } + + // PADRÃO 4: Multiplicação por potência de 2 + // O Crusty gera: movq $8, %rcx \n imulq %rcx, %rax + if t1.starts_with("movq $") && t1.ends_with(", %rcx") && t2 == "imulq %rcx, %rax" { + let val_str = t1.strip_prefix("movq $").unwrap().strip_suffix(", %rcx").unwrap(); + + if let Ok(val) = val_str.parse::() { + + if val > 0 && (val as u64).is_power_of_two() { + let shift = (val as u64).trailing_zeros(); + // Substitui as duas por um shift rápido + optimized.push(format!(" shlq ${}, %rax", shift)); + i += 2; + changed = true; + continue; + } + } + } + + // PADRÃO 6: Compare com 0 -> Test + // O Crusty gera: movq $0, %rcx \n cmpq %rcx, %rax + if t1 == "movq $0, %rcx" && t2 == "cmpq %rcx, %rax" { + optimized.push(" testq %rax, %rax".to_string()); + i += 2; + changed = true; + continue; + } + } + + // Se não casou com nenhum padrão, apenas salva a instrução e vai para a próxima + optimized.push(l1.clone()); + i += 1; + } + + // Atualiza o vetor de instruções para a próxima rodada do while + if changed { + *instrs = optimized; + overall_mutated = true; + } + } + + overall_mutated + } +} \ No newline at end of file From a3d1c7047b93ccc71b95495ca4fb8e949b9c53d4 Mon Sep 17 00:00:00 2001 From: Philipe Caetano Date: Tue, 23 Jun 2026 18:08:10 -0300 Subject: [PATCH 61/91] =?UTF-8?q?feat:=20aplica=20a=20otimiza=C3=A7=C3=A3o?= =?UTF-8?q?=20peephole=20no=20final=20da=20gera=C3=A7=C3=A3o=20de=20assemb?= =?UTF-8?q?ly?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/codegen/last/x86_64.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/codegen/last/x86_64.rs b/src/codegen/last/x86_64.rs index 78fa67b..f651799 100644 --- a/src/codegen/last/x86_64.rs +++ b/src/codegen/last/x86_64.rs @@ -18,6 +18,7 @@ use crate::codegen::last::abi; use crate::codegen::last::frame::{Frame, SlotKey}; use crate::common::ast::expr::{BinOp, UnOp}; use crate::ir::tac::{ConstValue, LabelId, Operand, TacFunction, TacInstr, TacProgram}; +use crate::codegen::last::peephole::PeepholePass; /// Acumulador de linhas de assembly com indentacao controlada. struct Emitter { @@ -118,7 +119,18 @@ pub fn emit_function(func: &TacFunction) -> String { em.insn("popq %rbp"); em.insn("ret"); - em.into_string() + // === PEEPHOLE === + // 1. Pega a string inteira gerada e divide em linhas + let raw_asm = em.into_string(); + let mut instrs: Vec = raw_asm.lines().map(|s| s.to_string()).collect(); + + // 2. Roda a Otimização Peephole + let peephole = crate::codegen::last::peephole::PeepholePass::new(); + peephole.run(&mut instrs); + + // 3. Junta tudo de volta em uma String com quebras de linha e retorna + instrs.join("\n") + //====== } /// Constroi o stack frame pre-escaneando todas as instrucoes para alocar um From 3052380b60c9407b00cc6b0aa889c5bf098f158b Mon Sep 17 00:00:00 2001 From: Philipe Caetano Date: Tue, 23 Jun 2026 18:08:32 -0300 Subject: [PATCH 62/91] =?UTF-8?q?test:=20adiciona=20testes=20unit=C3=A1rio?= =?UTF-8?q?s=20para=20a=20passagem=20peephole?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tests/mod.rs | 1 + src/tests/peephole_test.rs | 62 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 src/tests/peephole_test.rs diff --git a/src/tests/mod.rs b/src/tests/mod.rs index f596579..d3f14d2 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -10,3 +10,4 @@ mod semantic_test; mod source_test; mod symbol_test; mod token_test; +mod peephole_test; diff --git a/src/tests/peephole_test.rs b/src/tests/peephole_test.rs new file mode 100644 index 0000000..f87029b --- /dev/null +++ b/src/tests/peephole_test.rs @@ -0,0 +1,62 @@ +use crate::codegen::last::peephole::PeepholePass; + +// Função auxiliar para rodar o Peephole rápido nos testes +fn run_peephole(instrs: Vec<&str>) -> Vec { + let mut asm: Vec = instrs.into_iter().map(|s| s.to_string()).collect(); + let pass = PeepholePass::new(); + pass.run(&mut asm); + asm +} + +#[test] +fn test_remove_add_sub_zero() { + // Padrão 3: Soma ou subtração por zero deve sumir + let asm = run_peephole(vec![ + "addq $0, %rax", + "subq $0, %rcx", + "movq %rax, %rbx" + ]); + assert_eq!(asm, vec!["movq %rax, %rbx"]); +} + +#[test] +fn test_remove_redundant_mov() { + // Padrões 1 e 2: Mov de A pra B seguido de B pra A + let asm = run_peephole(vec![ + "movq %rax, %rbx", + "movq %rbx, %rax", // Esse tem que sumir + "ret" + ]); + assert_eq!(asm, vec!["movq %rax, %rbx", "ret"]); +} + +#[test] +fn test_remove_jump_to_next_line() { + // Padrão 5: Pulo para a linha imediatamente abaixo + let asm = run_peephole(vec![ + "jmp .L_main_L1", + ".L_main_L1:", + "ret" + ]); + assert_eq!(asm, vec![".L_main_L1:", "ret"]); +} + +#[test] +fn test_optimize_mul_power_of_two() { + // Padrão 4: Multiplicação por 8 (2^3) deve virar shlq $3 + let asm = run_peephole(vec![ + "movq $8, %rcx", + "imulq %rcx, %rax" + ]); + assert_eq!(asm, vec![" shlq $3, %rax"]); +} + +#[test] +fn test_optimize_cmp_zero() { + // Padrão 6: Comparar com 0 deve virar testq + let asm = run_peephole(vec![ + "movq $0, %rcx", + "cmpq %rcx, %rax" + ]); + assert_eq!(asm, vec![" testq %rax, %rax"]); +} \ No newline at end of file From 550843f1164478a69c0f30cd25d5ede2b0682572 Mon Sep 17 00:00:00 2001 From: Bappoz Date: Tue, 23 Jun 2026 21:03:31 -0300 Subject: [PATCH 63/91] =?UTF-8?q?fix:=20corrigir=20lints=20de=20clippy=20e?= =?UTF-8?q?=20formata=C3=A7=C3=A3o=20em=20licm.rs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - needless_range_loop em compute_dominators - collapsible_if em compute_invariants - collapsible_match em is_defined_outside_loop - aplicado cargo fmt --- src/codegen/inter/opt/licm.rs | 46 +++++++++++++++++++---------------- tests/licm_test.rs | 2 +- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/src/codegen/inter/opt/licm.rs b/src/codegen/inter/opt/licm.rs index ac64aea..b7cbd24 100644 --- a/src/codegen/inter/opt/licm.rs +++ b/src/codegen/inter/opt/licm.rs @@ -1,5 +1,5 @@ use super::OptPass; -use crate::codegen::inter::{Cfg, BasicBlock, Instruction, Value}; +use crate::codegen::inter::{BasicBlock, Cfg, Instruction, Value}; use std::collections::{HashMap, HashSet}; pub struct LoopInvariantCodeMotionPass; @@ -141,13 +141,13 @@ impl LoopInvariantCodeMotionPass { let mut changed = true; while changed { changed = false; - for block in 1..cfg.blocks.len() { - let preds = &predecessors[block]; + for (block, preds) in predecessors.iter().enumerate().skip(1) { if preds.is_empty() { continue; } - let mut current_intersection = dominators.get(&preds[0]).cloned().unwrap_or_default(); + let mut current_intersection = + dominators.get(&preds[0]).cloned().unwrap_or_default(); for &pred in preds.iter().skip(1) { if let Some(pred_doms) = dominators.get(&pred) { current_intersection = current_intersection @@ -225,15 +225,20 @@ impl LoopInvariantCodeMotionPass { if def_counts.get(dst) != Some(&1) { continue; } - if self.is_value_stable(value, loop_body, &invariants_set, cfg) { - if self.is_safe_to_move(dst, block_id, loop_body, dominators, cfg) { - invariants_set.insert(dst.clone()); - invariant_instrs.push(inst.clone()); - changed = true; - } + if self.is_value_stable(value, loop_body, &invariants_set, cfg) + && self.is_safe_to_move(dst, block_id, loop_body, dominators, cfg) + { + invariants_set.insert(dst.clone()); + invariant_instrs.push(inst.clone()); + changed = true; } } - Instruction::Binary { dst, op: _, lhs, rhs } => { + Instruction::Binary { + dst, + op: _, + lhs, + rhs, + } => { if invariants_set.contains(dst) { continue; } @@ -242,12 +247,11 @@ impl LoopInvariantCodeMotionPass { } if self.is_value_stable(lhs, loop_body, &invariants_set, cfg) && self.is_value_stable(rhs, loop_body, &invariants_set, cfg) + && self.is_safe_to_move(dst, block_id, loop_body, dominators, cfg) { - if self.is_safe_to_move(dst, block_id, loop_body, dominators, cfg) { - invariants_set.insert(dst.clone()); - invariant_instrs.push(inst.clone()); - changed = true; - } + invariants_set.insert(dst.clone()); + invariant_instrs.push(inst.clone()); + changed = true; } } _ => {} @@ -329,10 +333,10 @@ impl LoopInvariantCodeMotionPass { let block = &cfg.blocks[block_id]; for inst in &block.instructions { match inst { - Instruction::Assign { dst, .. } | Instruction::Binary { dst, .. } => { - if dst == name { - return false; - } + Instruction::Assign { dst, .. } | Instruction::Binary { dst, .. } + if dst == name => + { + return false; } _ => {} } @@ -340,4 +344,4 @@ impl LoopInvariantCodeMotionPass { } true } -} \ No newline at end of file +} diff --git a/tests/licm_test.rs b/tests/licm_test.rs index 01661e0..dc0f72a 100644 --- a/tests/licm_test.rs +++ b/tests/licm_test.rs @@ -1,5 +1,5 @@ use crusty::codegen::inter::opt::{LoopInvariantCodeMotionPass, OptPass}; -use crusty::codegen::inter::{BasicBlock, Cfg, Instruction, Value, BinaryOp}; +use crusty::codegen::inter::{BasicBlock, BinaryOp, Cfg, Instruction, Value}; #[test] fn test_licm_integration_basic() { From 6db484f570f0d74ca6f2b0e4155328f16f2a104d Mon Sep 17 00:00:00 2001 From: Bappoz Date: Tue, 23 Jun 2026 21:08:47 -0300 Subject: [PATCH 64/91] =?UTF-8?q?fix:=20corrigir=20lint=20de=20clippy=20(n?= =?UTF-8?q?ew=5Fwithout=5Fdefault)=20e=20formata=C3=A7=C3=A3o=20ap=C3=B3s?= =?UTF-8?q?=20merge=20com=20developer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - adiciona impl Default para PeepholePass - aplica cargo fmt em peephole.rs, x86_64.rs e arquivos de teste --- src/codegen/last/mod.rs | 2 +- src/codegen/last/peephole.rs | 26 +++++++++++++++++++------- src/codegen/last/x86_64.rs | 2 +- src/tests/mod.rs | 2 +- src/tests/peephole_test.rs | 26 ++++++-------------------- 5 files changed, 28 insertions(+), 30 deletions(-) diff --git a/src/codegen/last/mod.rs b/src/codegen/last/mod.rs index e0a9a22..676170b 100644 --- a/src/codegen/last/mod.rs +++ b/src/codegen/last/mod.rs @@ -9,7 +9,7 @@ pub mod abi; pub mod frame; -pub mod x86_64; pub mod peephole; +pub mod x86_64; pub use x86_64::emit_program; diff --git a/src/codegen/last/peephole.rs b/src/codegen/last/peephole.rs index bee5f0f..06b5084 100644 --- a/src/codegen/last/peephole.rs +++ b/src/codegen/last/peephole.rs @@ -4,6 +4,12 @@ pub struct PeepholePass { pub window: usize, } +impl Default for PeepholePass { + fn default() -> Self { + Self::new() + } +} + impl PeepholePass { pub fn new() -> Self { Self { window: 2 } @@ -40,7 +46,7 @@ impl PeepholePass { if t1.starts_with("movq ") && t2.starts_with("movq ") { let p1: Vec<&str> = t1.split_whitespace().collect(); let p2: Vec<&str> = t2.split_whitespace().collect(); - + // movq [1] [2] -> [1] tem uma vírgula no final if p1.len() == 3 && p2.len() == 3 { let src1 = p1[1].trim_end_matches(','); @@ -63,7 +69,7 @@ impl PeepholePass { if t1.starts_with("jmp ") && t2.ends_with(':') { let target = t1.strip_prefix("jmp ").unwrap(); let label = t2.strip_suffix(':').unwrap(); - + if target == label { optimized.push(l2.clone()); // Mantém só a Label (apaga o jmp) i += 2; @@ -74,11 +80,17 @@ impl PeepholePass { // PADRÃO 4: Multiplicação por potência de 2 // O Crusty gera: movq $8, %rcx \n imulq %rcx, %rax - if t1.starts_with("movq $") && t1.ends_with(", %rcx") && t2 == "imulq %rcx, %rax" { - let val_str = t1.strip_prefix("movq $").unwrap().strip_suffix(", %rcx").unwrap(); - + if t1.starts_with("movq $") + && t1.ends_with(", %rcx") + && t2 == "imulq %rcx, %rax" + { + let val_str = t1 + .strip_prefix("movq $") + .unwrap() + .strip_suffix(", %rcx") + .unwrap(); + if let Ok(val) = val_str.parse::() { - if val > 0 && (val as u64).is_power_of_two() { let shift = (val as u64).trailing_zeros(); // Substitui as duas por um shift rápido @@ -114,4 +126,4 @@ impl PeepholePass { overall_mutated } -} \ No newline at end of file +} diff --git a/src/codegen/last/x86_64.rs b/src/codegen/last/x86_64.rs index ae9914a..0202c66 100644 --- a/src/codegen/last/x86_64.rs +++ b/src/codegen/last/x86_64.rs @@ -16,10 +16,10 @@ use crate::codegen::last::abi; use crate::codegen::last::frame::{Frame, SlotKey}; +use crate::codegen::last::peephole::PeepholePass; use crate::common::ast::expr::{BinOp, UnOp}; use crate::common::errors::types::CodegenError; use crate::ir::tac::{ConstValue, LabelId, Operand, TacFunction, TacInstr, TacProgram}; -use crate::codegen::last::peephole::PeepholePass; use std::collections::HashMap; type EmitResult = Result; diff --git a/src/tests/mod.rs b/src/tests/mod.rs index d3f14d2..431a64a 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -6,8 +6,8 @@ mod lexical_test; mod literals_test; mod parser_file_test; mod parser_test; +mod peephole_test; mod semantic_test; mod source_test; mod symbol_test; mod token_test; -mod peephole_test; diff --git a/src/tests/peephole_test.rs b/src/tests/peephole_test.rs index f87029b..1b0d6b2 100644 --- a/src/tests/peephole_test.rs +++ b/src/tests/peephole_test.rs @@ -11,11 +11,7 @@ fn run_peephole(instrs: Vec<&str>) -> Vec { #[test] fn test_remove_add_sub_zero() { // Padrão 3: Soma ou subtração por zero deve sumir - let asm = run_peephole(vec![ - "addq $0, %rax", - "subq $0, %rcx", - "movq %rax, %rbx" - ]); + let asm = run_peephole(vec!["addq $0, %rax", "subq $0, %rcx", "movq %rax, %rbx"]); assert_eq!(asm, vec!["movq %rax, %rbx"]); } @@ -25,7 +21,7 @@ fn test_remove_redundant_mov() { let asm = run_peephole(vec![ "movq %rax, %rbx", "movq %rbx, %rax", // Esse tem que sumir - "ret" + "ret", ]); assert_eq!(asm, vec!["movq %rax, %rbx", "ret"]); } @@ -33,30 +29,20 @@ fn test_remove_redundant_mov() { #[test] fn test_remove_jump_to_next_line() { // Padrão 5: Pulo para a linha imediatamente abaixo - let asm = run_peephole(vec![ - "jmp .L_main_L1", - ".L_main_L1:", - "ret" - ]); + let asm = run_peephole(vec!["jmp .L_main_L1", ".L_main_L1:", "ret"]); assert_eq!(asm, vec![".L_main_L1:", "ret"]); } #[test] fn test_optimize_mul_power_of_two() { // Padrão 4: Multiplicação por 8 (2^3) deve virar shlq $3 - let asm = run_peephole(vec![ - "movq $8, %rcx", - "imulq %rcx, %rax" - ]); + let asm = run_peephole(vec!["movq $8, %rcx", "imulq %rcx, %rax"]); assert_eq!(asm, vec![" shlq $3, %rax"]); } #[test] fn test_optimize_cmp_zero() { // Padrão 6: Comparar com 0 deve virar testq - let asm = run_peephole(vec![ - "movq $0, %rcx", - "cmpq %rcx, %rax" - ]); + let asm = run_peephole(vec!["movq $0, %rcx", "cmpq %rcx, %rax"]); assert_eq!(asm, vec![" testq %rax, %rax"]); -} \ No newline at end of file +} From 15c6118ef92a223b7fa1edd1dc7a3c5728453d9c Mon Sep 17 00:00:00 2001 From: Bappoz Date: Wed, 24 Jun 2026 01:59:51 -0300 Subject: [PATCH 65/91] =?UTF-8?q?test(e2e):=20cobrir=20blocos,=20if/else,?= =?UTF-8?q?=20while,=20for,=20do-while=20e=20recurs=C3=A3o=20em=20smoke=20?= =?UTF-8?q?tests=20reais?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Estende tests/exe_smoke_test.rs com casos executados de ponta a ponta (fonte C -> lexer -> parser -> semantica -> IR -> codegen x86-64 -> gcc) para blocos/escopos locais, cadeia if/else if/else, while, for, do-while e recursao (fibonacci), complementando os smoke tests ja existentes de programa minimo, aritmetica e chamada de funcao. Resolve #161 --- tests/exe_smoke_test.rs | 106 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/tests/exe_smoke_test.rs b/tests/exe_smoke_test.rs index 63d4f45..c36cd7b 100644 --- a/tests/exe_smoke_test.rs +++ b/tests/exe_smoke_test.rs @@ -141,3 +141,109 @@ fn smoke_function_call_runs() { #[cfg(unix)] assert_eq!(status.code(), Some(42)); } + +#[test] +fn smoke_blocks_and_local_scopes_run() { + require_gcc!(); + + let status = compile_and_run( + "blocks", + "int main() { \ + int total = 0; \ + { int a = 5; total = total + a; } \ + { int b = 7; total = total + b; } \ + return total; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(12)); +} + +#[test] +fn smoke_if_else_chain_runs() { + require_gcc!(); + + let status = compile_and_run( + "if_else_chain", + "int classify(int n) { \ + if (n < 0) { return -1; } \ + else if (n == 0) { return 0; } \ + else { return 1; } \ + } \ + int main() { return classify(-5) + classify(0) + classify(5) + 10; }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(10)); +} + +#[test] +fn smoke_while_loop_runs() { + require_gcc!(); + + let status = compile_and_run( + "while", + "int main() { \ + int i = 0; \ + int total = 0; \ + while (i < 5) { total = total + i; i = i + 1; } \ + return total; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(10)); +} + +#[test] +fn smoke_for_loop_runs() { + require_gcc!(); + + let status = compile_and_run( + "for", + "int main() { \ + int total = 0; \ + for (int i = 0; i < 5; i = i + 1) { total = total + i; } \ + return total; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(10)); +} + +#[test] +fn smoke_do_while_loop_runs() { + require_gcc!(); + + let status = compile_and_run( + "do_while", + "int main() { \ + int i = 0; \ + int total = 0; \ + do { total = total + i; i = i + 1; } while (i < 5); \ + return total; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(10)); +} + +#[test] +fn smoke_recursive_fibonacci_runs() { + require_gcc!(); + + let status = compile_and_run( + "fib", + "int fib(int n) { \ + if (n <= 1) { return n; } \ + return fib(n - 1) + fib(n - 2); \ + } \ + int main() { return fib(10); }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(55)); +} From 1a4c5748e883bcffcfb9e2ee2866d6492dde4093 Mon Sep 17 00:00:00 2001 From: Bappoz Date: Wed, 24 Jun 2026 02:02:21 -0300 Subject: [PATCH 66/91] =?UTF-8?q?feat(examples):=20adicionar=20programa-de?= =?UTF-8?q?mo=20para=20a=20apresenta=C3=A7=C3=A3o=20final?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adiciona src/examples/demo_presentation.c com um programa curto que combina variaveis/expressoes, funcao recursiva (factorial) e controle de fluxo (if/while), dentro do subconjunto estavel do backend atual (sem struct/ponteiro/switch, ainda nao suportados no codegen). Documenta a saida esperada (exit code 80) no cabecalho do arquivo e adiciona um smoke test (smoke_presentation_demo_runs) que compila e executa o exemplo de ponta a ponta via gcc, garantindo que ele continue funcionando com o pipeline atual. Resolve #163 --- src/examples/demo_presentation.c | 39 ++++++++++++++++++++++++++++++++ tests/exe_smoke_test.rs | 17 ++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 src/examples/demo_presentation.c diff --git a/src/examples/demo_presentation.c b/src/examples/demo_presentation.c new file mode 100644 index 0000000..540f172 --- /dev/null +++ b/src/examples/demo_presentation.c @@ -0,0 +1,39 @@ +/* + * Programa-demo para a apresentação final (issue #163). + * + * Mostra, em poucas linhas, os recursos centrais já estáveis do + * subconjunto de C suportado pelo compilador: + * - variaveis e expressoes aritmeticas + * - funcoes com chamada recursiva (factorial) + * - controle de fluxo: if/else e while + * + * Saida esperada: exit code 80 + * factorial(4) = 24 + * sum_even_squares(6) = 2*2 + 4*4 + 6*6 = 56 + * 24 + 56 = 80 + */ + +int factorial(int n) { + if (n <= 1) { + return 1; + } + return n * factorial(n - 1); +} + +int sum_even_squares(int n) { + int total = 0; + int i = 1; + while (i <= n) { + if (i % 2 == 0) { + total = total + i * i; + } + i = i + 1; + } + return total; +} + +int main(void) { + int fact4 = factorial(4); + int squares = sum_even_squares(6); + return fact4 + squares; +} diff --git a/tests/exe_smoke_test.rs b/tests/exe_smoke_test.rs index 63d4f45..9bbaf5d 100644 --- a/tests/exe_smoke_test.rs +++ b/tests/exe_smoke_test.rs @@ -141,3 +141,20 @@ fn smoke_function_call_runs() { #[cfg(unix)] assert_eq!(status.code(), Some(42)); } + +/// Garante que o programa-demo da apresentação final (issue #163), em +/// `src/examples/demo_presentation.c`, continua compilando e produzindo o +/// exit code documentado no cabeçalho do arquivo. +#[test] +fn smoke_presentation_demo_runs() { + require_gcc!(); + + let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("src/examples/demo_presentation.c"); + let source = std::fs::read_to_string(&path) + .unwrap_or_else(|e| panic!("falha ao ler '{}': {e}", path.display())); + + let status = compile_and_run("presentation_demo", &source); + + #[cfg(unix)] + assert_eq!(status.code(), Some(80)); +} From 604c1497b51d66b7ee03ef2f8358938af005088b Mon Sep 17 00:00:00 2001 From: Bappoz Date: Wed, 24 Jun 2026 02:18:36 -0300 Subject: [PATCH 67/91] feat(ir): implementar lowering de switch/case Implementa Stmt::Switch em src/ir/lower.rs, antes um stub de erro. Gera uma cadeia de comparacoes (Eq) contra o discriminante, cada uma com CondJump para o label do case correspondente; default (quando presente) e usado como fallback ao final da cadeia, senao o fallback e o fim do switch. O corpo dos cases preserva fallthrough real de C (sem break implicito entre eles); break salta para o label de fim do switch, reaproveitando o mecanismo de ControlLabels ja usado em loops. Adiciona smoke tests cobrindo switch com default e switch com fallthrough explicito (sem break) executados via gcc ponta a ponta. --- src/ir/lower.rs | 62 +++++++++++++++++++++++++++++++++++++---- tests/exe_smoke_test.rs | 45 ++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 5 deletions(-) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index 926b107..8a45c4d 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -2,7 +2,7 @@ use crate::common::ast::{ ast::{Program, Type}, decl::Decl, expr::{BinOp, Expr, Literal, PostfixOp, PrefixOp}, - stmt::Stmt, + stmt::{Stmt, SwitchLabel}, }; use crate::common::errors::types::CodegenError; use crate::ir::tac::{ @@ -304,10 +304,62 @@ impl Lowerer { } Ok(()) } - Stmt::Switch(_, _, _) => Err(codegen_error( - "switch nao suportado no lowering", - Some("switch"), - )), + Stmt::Switch(disc, cases, _) => { + let disc_op = self.lower_expr(disc)?; + let end_label = self.labels.fresh(); + + // Um label por `case`/`default`; serve tanto de alvo da + // comparacao quanto de entrada do corpo daquele caso. + let case_labels: Vec = cases.iter().map(|_| self.labels.fresh()).collect(); + let default_index = cases + .iter() + .position(|case| matches!(case.label, SwitchLabel::Default)); + + // Cadeia de comparacoes: testa cada `case` (na ordem em que + // aparece); `default` nao entra na comparacao, e usado como + // fallback ao final da cadeia. + for (index, case) in cases.iter().enumerate() { + if let SwitchLabel::Case(case_expr) = &case.label { + let case_val = self.lower_expr(case_expr)?; + let cmp = self.fresh_temp(); + self.instrs.push(TacInstr::BinOp { + dst: cmp, + op: BinOp::Eq, + lhs: disc_op.clone(), + rhs: case_val, + }); + let next_test = self.labels.fresh(); + self.instrs.push(TacInstr::CondJump { + cond: Operand::Temp(cmp), + then_label: case_labels[index], + else_label: next_test, + }); + self.instrs.push(TacInstr::Label(next_test)); + } + } + let fallback_label = default_index.map_or(end_label, |i| case_labels[i]); + self.instrs.push(TacInstr::Jump { + label: fallback_label, + }); + + // Corpo dos casos, em ordem, sem break implicito entre eles + // (fallthrough real de C); `break` salta para `end_label`. + for (index, case) in cases.iter().enumerate() { + self.instrs.push(TacInstr::Label(case_labels[index])); + for stmt in &case.stmts { + self.lower_stmt_with_control( + stmt, + ControlLabels { + break_label: Some(end_label), + continue_label: control.continue_label, + }, + )?; + } + } + + self.instrs.push(TacInstr::Label(end_label)); + Ok(()) + } } } diff --git a/tests/exe_smoke_test.rs b/tests/exe_smoke_test.rs index 63d4f45..73e47fc 100644 --- a/tests/exe_smoke_test.rs +++ b/tests/exe_smoke_test.rs @@ -141,3 +141,48 @@ fn smoke_function_call_runs() { #[cfg(unix)] assert_eq!(status.code(), Some(42)); } + +#[test] +fn smoke_switch_with_default_runs() { + require_gcc!(); + + let status = compile_and_run( + "switch_default", + "int classify(int n) { \ + int result = 0; \ + switch (n) { \ + case 1: result = 1; break; \ + case 2: result = 2; break; \ + default: result = -1; break; \ + } \ + return result; \ + } \ + int main() { return classify(1) + classify(2) + classify(9) + 100; }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(102)); +} + +#[test] +fn smoke_switch_fallthrough_runs() { + require_gcc!(); + + let status = compile_and_run( + "switch_fallthrough", + "int main() { \ + int n = 2; \ + int total = 0; \ + switch (n) { \ + case 1: total = total + 1; \ + case 2: total = total + 2; \ + case 3: total = total + 3; break; \ + case 4: total = total + 100; \ + } \ + return total; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(5)); +} From 8fcbb825e31de275416a260533b52a6e1cded16f Mon Sep 17 00:00:00 2001 From: Bappoz Date: Wed, 24 Jun 2026 02:20:12 -0300 Subject: [PATCH 68/91] feat(codegen): implementar address-of (&x) e deref de leitura (*p) no x86-64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit emit_unop tratava & e * sempre via load_op + stub de erro. Corrige: - UnOp::AddrOf agora computa o endereco do slot de src diretamente (leaq offset(%rbp), %rax), sem passar por load_op (que carregaria o valor, nao o endereco). Erra de forma controlada se o operando nao tiver slot de variavel/temporario (ex.: endereco de uma constante). - UnOp::Deref (como valor lido, ex.: 'int y = *p;') agora carrega o ponteiro em %rax e desreferencia com 'movq (%rax), %rax'. Atribuicao via deref como destino ('*p = x;') ainda nao e suportada no lowering (lower_assignment_target so aceita Ident) — fica para um commit seguinte, que exige estender Operand para enderecamento indireto. Adiciona smoke test cobrindo &x e *p (leitura) executado via gcc. --- src/codegen/last/x86_64.rs | 30 ++++++++++++++++++++---------- tests/exe_smoke_test.rs | 18 ++++++++++++++++++ 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/src/codegen/last/x86_64.rs b/src/codegen/last/x86_64.rs index 0202c66..43660e1 100644 --- a/src/codegen/last/x86_64.rs +++ b/src/codegen/last/x86_64.rs @@ -427,6 +427,24 @@ fn emit_unop( frame: &Frame, strings: &StringPool, ) -> EmitResult<()> { + // `&x` precisa do *endereco* do slot de `src`, nao do seu valor: nao + // passa por `load_op` (que faria `movq slot(%rbp), %reg`, carregando o + // conteudo em vez do endereco). + if matches!(op, UnOp::AddrOf) { + let key = SlotKey::from_operand(src).ok_or_else(|| { + codegen_error( + "endereco-de (&) requer uma variavel ou temporario com slot", + Some("unop"), + ) + })?; + let offset = frame + .offset_of(&key) + .expect("operando de & deve ter slot alocado no frame"); + em.insn(&format!("leaq {offset}(%rbp), %rax")); + store_op(em, frame, &Operand::Temp(dst), "rax")?; + return Ok(()); + } + load_op(em, frame, src, "rax", strings)?; match op { UnOp::Neg => em.insn("negq %rax"), @@ -437,17 +455,9 @@ fn emit_unop( em.insn("movzbq %al, %rax"); } UnOp::Deref => { - return Err(codegen_error( - "codegen de deref (*) nao suportado neste backend", - Some("unop"), - )) - } - UnOp::AddrOf => { - return Err(codegen_error( - "codegen de address-of (&) nao suportado neste backend", - Some("unop"), - )) + em.insn("movq (%rax), %rax"); } + UnOp::AddrOf => unreachable!("tratado antes do load_op acima"), } store_op(em, frame, &Operand::Temp(dst), "rax")?; Ok(()) diff --git a/tests/exe_smoke_test.rs b/tests/exe_smoke_test.rs index 73e47fc..8219803 100644 --- a/tests/exe_smoke_test.rs +++ b/tests/exe_smoke_test.rs @@ -164,6 +164,24 @@ fn smoke_switch_with_default_runs() { assert_eq!(status.code(), Some(102)); } +#[test] +fn smoke_address_of_and_deref_read_runs() { + require_gcc!(); + + let status = compile_and_run( + "addrof_deref_read", + "int main() { \ + int x = 21; \ + int *p = &x; \ + int y = *p; \ + return y * 2; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(42)); +} + #[test] fn smoke_switch_fallthrough_runs() { require_gcc!(); From 2bb21daf556e946b2f5c96fc58df69161f6bdc13 Mon Sep 17 00:00:00 2001 From: Bappoz Date: Wed, 24 Jun 2026 02:24:44 -0300 Subject: [PATCH 69/91] feat(ir,codegen): suportar atribuicao via deref de ponteiro (*p = x) lower_assignment_target so aceitava Expr::Ident como destino, entao '*p = x;', '*p += 1;', '(*p)++' etc. falhavam no lowering. Para suportar isso sem reescrever o modelo de frame (slots fixos de 8 bytes por var/temp), estende Operand com uma nova variante: Operand::Deref(Box) que representa enderecamento indireto: o Operand interno e o ponteiro, lido e desreferenciado tanto para leitura (load_op) quanto para escrita (store_op). Mudancas: - ir/tac.rs: novo Operand::Deref + Display. - ir/lower.rs: lower_assignment_target trata Expr::Unary(Deref, ..) retornando Operand::Deref(ptr); emit_copy aceita Deref como destino valido de Copy. - codegen/last/frame.rs: SlotKey::from_operand recursa em Deref, garantindo que o slot do ponteiro interno seja alocado. - codegen/last/x86_64.rs: load_op/store_op tratam Operand::Deref materializando o ponteiro em %r11 (scratch dedicado, nunca usado como reg de valor neste backend) antes do deref. Isso cobre escrita simples atraves de ponteiro, compound assign (*p += 1) e ++/-- via deref, inclusive quando o ponteiro chega como parametro de funcao (ex.: void inc(int *p) { *p = *p + 1; }). Adiciona smoke tests cobrindo os tres casos, executados via gcc. --- src/codegen/last/frame.rs | 4 ++- src/codegen/last/x86_64.rs | 44 ++++++++++++++++++++++++------ src/ir/lower.rs | 11 ++++++-- src/ir/tac.rs | 5 ++++ tests/exe_smoke_test.rs | 56 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 109 insertions(+), 11 deletions(-) diff --git a/src/codegen/last/frame.rs b/src/codegen/last/frame.rs index fb4a88e..ebf1658 100644 --- a/src/codegen/last/frame.rs +++ b/src/codegen/last/frame.rs @@ -35,12 +35,14 @@ pub enum SlotKey { impl SlotKey { /// Mapeia um `Operand` para sua chave de slot. Constantes nao tem slot - /// (sao emitidas como imediato) e retornam `None`. + /// (sao emitidas como imediato) e retornam `None`. `Deref` nao tem slot + /// proprio: o slot relevante e o do ponteiro que ele indireciona. pub fn from_operand(op: &Operand) -> Option { match op { Operand::Temp(temp) => Some(Self::Temp(temp.0)), Operand::Var(name) => Some(Self::Var(name.clone())), Operand::Const(_) => None, + Operand::Deref(inner) => Self::from_operand(inner), } } } diff --git a/src/codegen/last/x86_64.rs b/src/codegen/last/x86_64.rs index 43660e1..f7d9502 100644 --- a/src/codegen/last/x86_64.rs +++ b/src/codegen/last/x86_64.rs @@ -24,6 +24,13 @@ use std::collections::HashMap; type EmitResult = Result; +/// Registrador escolhido para materializar o endereco de um `Operand::Deref` +/// antes do deref propriamente dito. Caller-saved e nunca usado como `reg` +/// pelos chamadores de `load_op`/`store_op` neste backend (que usam apenas +/// `rax`/`rcx`/registradores de argumento), entao e seguro como scratch +/// dedicado mesmo em derefs aninhados. +const DEREF_SCRATCH_REG: &str = "r11"; + /// Acumulador de linhas de assembly com indentacao controlada. struct Emitter { out: String, @@ -310,7 +317,7 @@ fn emit_instr( } TacInstr::Copy { dst, src } => { load_op(em, frame, src, "rax", strings)?; - store_op(em, frame, dst, "rax")?; + store_op(em, frame, dst, "rax", strings)?; Ok(()) } TacInstr::BinOp { dst, op, lhs, rhs } => emit_binop(em, op, lhs, rhs, *dst, frame, strings), @@ -377,7 +384,7 @@ fn emit_binop( } } - store_op(em, frame, &Operand::Temp(dst), "rax")?; + store_op(em, frame, &Operand::Temp(dst), "rax", strings)?; Ok(()) } @@ -415,7 +422,7 @@ fn emit_logical( em.insn("andq %rdx, %rax"); } - store_op(em, frame, &Operand::Temp(dst), "rax")?; + store_op(em, frame, &Operand::Temp(dst), "rax", strings)?; Ok(()) } @@ -441,7 +448,7 @@ fn emit_unop( .offset_of(&key) .expect("operando de & deve ter slot alocado no frame"); em.insn(&format!("leaq {offset}(%rbp), %rax")); - store_op(em, frame, &Operand::Temp(dst), "rax")?; + store_op(em, frame, &Operand::Temp(dst), "rax", strings)?; return Ok(()); } @@ -459,7 +466,7 @@ fn emit_unop( } UnOp::AddrOf => unreachable!("tratado antes do load_op acima"), } - store_op(em, frame, &Operand::Temp(dst), "rax")?; + store_op(em, frame, &Operand::Temp(dst), "rax", strings)?; Ok(()) } @@ -501,7 +508,7 @@ fn emit_call( } if let Some(dst) = dst { - store_op(em, frame, &Operand::Temp(dst), "rax")?; + store_op(em, frame, &Operand::Temp(dst), "rax", strings)?; } Ok(()) } @@ -541,11 +548,31 @@ fn load_op( em.insn(&format!("movq {offset}(%rbp), %{reg}")); Ok(()) } + Operand::Deref(inner) => { + // `%r11` e scratch/caller-saved e nao e usado como `reg` por + // nenhum chamador de `load_op`/`store_op` neste backend, entao e + // seguro usa-lo aqui para materializar o ponteiro antes do deref. + load_op(em, frame, inner, DEREF_SCRATCH_REG, strings)?; + em.insn(&format!("movq (%{DEREF_SCRATCH_REG}), %{reg}")); + Ok(()) + } } } -/// Armazena o registrador nomeado em `op` (que deve ser temp ou var). -fn store_op(em: &mut Emitter, frame: &Frame, op: &Operand, reg: &str) -> EmitResult<()> { +/// Armazena o registrador nomeado em `op` (que deve ser temp, var ou deref). +fn store_op( + em: &mut Emitter, + frame: &Frame, + op: &Operand, + reg: &str, + strings: &StringPool, +) -> EmitResult<()> { + if let Operand::Deref(inner) = op { + load_op(em, frame, inner, DEREF_SCRATCH_REG, strings)?; + em.insn(&format!("movq %{reg}, (%{DEREF_SCRATCH_REG})")); + return Ok(()); + } + let offset = match op { Operand::Temp(temp) => frame .offset_of(&SlotKey::Temp(temp.0)) @@ -559,6 +586,7 @@ fn store_op(em: &mut Emitter, frame: &Frame, op: &Operand, reg: &str) -> EmitRes Some("store"), )) } + Operand::Deref(_) => unreachable!("tratado antes do match acima"), }; em.insn(&format!("movq %{reg}, {offset}(%rbp)")); Ok(()) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index 8a45c4d..d0478b6 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -1,7 +1,7 @@ use crate::common::ast::{ ast::{Program, Type}, decl::Decl, - expr::{BinOp, Expr, Literal, PostfixOp, PrefixOp}, + expr::{BinOp, Expr, Literal, PostfixOp, PrefixOp, UnOp}, stmt::{Stmt, SwitchLabel}, }; use crate::common::errors::types::CodegenError; @@ -403,6 +403,13 @@ impl Lowerer { fn lower_assignment_target(&mut self, expr: &Expr) -> LowerResult { match expr { Expr::Ident(name, _) => Ok(Operand::Var(name.clone())), + // `*p` como destino (`*p = x;`, `*p += 1;`, `(*p)++` etc.): o + // ponteiro em si e um rvalue comum, mas o destino da escrita e o + // endereco para o qual ele aponta. + Expr::Unary(UnOp::Deref, inner, _) => { + let ptr = self.lower_expr(inner)?; + Ok(Operand::Deref(Box::new(ptr))) + } _ => Err(codegen_error( "destino de atribuicao nao suportado no lowering", Some("assign"), @@ -412,7 +419,7 @@ impl Lowerer { fn emit_copy(&mut self, dst: Operand, src: Operand) -> LowerResult<()> { match dst { - Operand::Temp(_) | Operand::Var(_) => { + Operand::Temp(_) | Operand::Var(_) | Operand::Deref(_) => { self.instrs.push(TacInstr::Copy { dst, src }); Ok(()) } diff --git a/src/ir/tac.rs b/src/ir/tac.rs index 459f4b0..b975324 100644 --- a/src/ir/tac.rs +++ b/src/ir/tac.rs @@ -67,6 +67,10 @@ pub enum Operand { Temp(TempId), Var(String), Const(ConstValue), + /// Endereco indireto: o ponteiro guardado em `Operand` interno e lido (ou + /// escrito, quando usado como destino de `Copy`) atraves de deref, ex.: + /// `*p` como destino de `*p = x;` ou como valor em `*p + 1`. + Deref(Box), } #[derive(Debug, Clone, PartialEq)] @@ -146,6 +150,7 @@ impl fmt::Display for Operand { Operand::Temp(temp) => write!(f, "{temp}"), Operand::Var(name) => write!(f, "{name}"), Operand::Const(value) => write!(f, "{value}"), + Operand::Deref(inner) => write!(f, "*{inner}"), } } } diff --git a/tests/exe_smoke_test.rs b/tests/exe_smoke_test.rs index 8219803..3fb9657 100644 --- a/tests/exe_smoke_test.rs +++ b/tests/exe_smoke_test.rs @@ -182,6 +182,62 @@ fn smoke_address_of_and_deref_read_runs() { assert_eq!(status.code(), Some(42)); } +#[test] +fn smoke_deref_assignment_writes_through_pointer() { + require_gcc!(); + + let status = compile_and_run( + "deref_assign", + "int main() { \ + int x = 10; \ + int *p = &x; \ + *p = 20; \ + return x; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(20)); +} + +#[test] +fn smoke_deref_compound_assign_through_function_param_runs() { + require_gcc!(); + + let status = compile_and_run( + "deref_compound", + "void inc(int *p) { *p = *p + 1; } \ + int main() { \ + int x = 10; \ + inc(&x); \ + inc(&x); \ + return x; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(12)); +} + +#[test] +fn smoke_deref_increment_operators_run() { + require_gcc!(); + + let status = compile_and_run( + "deref_incr", + "int main() { \ + int x = 5; \ + int *p = &x; \ + (*p)++; \ + *p += 10; \ + return x; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(16)); +} + #[test] fn smoke_switch_fallthrough_runs() { require_gcc!(); From f28578b3d743855f1d247748158b9d7f0eeb0548 Mon Sep 17 00:00:00 2001 From: Bappoz Date: Wed, 24 Jun 2026 02:28:04 -0300 Subject: [PATCH 70/91] feat(ir): suportar sizeof(expr) para identificadores simples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expr::Sizeof(expr) era um stub de erro porque o lowering nao tinha informacao de tipo das variaveis (a analise semantica resolve tipos, mas nao os propaga para o lowering). Adiciona um ambiente de tipos leve ao Lowerer (var_types: HashMap), populado a partir dos parametros da funcao e de cada Stmt::VarDecl conforme o lowering avanca. Com isso, 'sizeof(x)' passa a resolver para o tamanho do tipo declarado de 'x' via a funcao type_size ja existente (cobre tipos primitivos e ponteiro, que e o caso pratico mais comum). Formas mais gerais de sizeof(expr) — sobre expressoes compostas como sizeof(a+b) ou sobre arrays/structs, que type_size ainda nao sabe medir — continuam retornando o erro explicito anterior em vez de um resultado incorreto. Adiciona smoke test cobrindo sizeof de int/long/char/ponteiro. --- src/ir/lower.rs | 42 ++++++++++++++++++++++++++++++++++++----- tests/exe_smoke_test.rs | 20 ++++++++++++++++++++ 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index d0478b6..dc8cc5f 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -16,6 +16,10 @@ pub struct Lowerer { temps: TempGen, labels: LabelGen, instrs: Vec, + /// Tipo declarado de cada variavel/parametro visto ate agora na funcao + /// atual. Usado apenas para resolver `sizeof(expr)`; nao substitui a + /// analise semantica (que ja validou o programa antes do lowering). + var_types: std::collections::HashMap, } #[derive(Debug, Clone, Copy, Default)] @@ -30,9 +34,17 @@ impl Lowerer { temps: TempGen::new(), labels: LabelGen::new(), instrs: Vec::new(), + var_types: std::collections::HashMap::new(), } } + /// Registra o tipo declarado de `name`, usado depois para resolver + /// `sizeof(name)`. Chamado para parametros de funcao e para cada + /// `VarDecl` conforme o lowering avanca. + fn declare_var_type(&mut self, name: &str, ty: &Type) { + self.var_types.insert(name.to_string(), ty.clone()); + } + pub fn lower_expr(&mut self, expr: &Expr) -> LowerResult { match expr { Expr::Literal(value, _) => Ok(Operand::Const(lower_literal(value))), @@ -137,10 +149,26 @@ impl Lowerer { "acesso a membro nao suportado no lowering", Some("member"), )), - Expr::Sizeof(_, _) => Err(codegen_error( - "sizeof(expr) nao suportado no lowering sem informacao de tipo", - Some("sizeof"), - )), + // `sizeof(expr)`: o caso pratico mais comum e `sizeof(variavel)`. + // O tipo declarado de identificadores e rastreado em + // `var_types` (preenchido a partir de parametros e `VarDecl`); + // para qualquer outra forma de expressao ainda nao ha + // informacao de tipo disponivel no lowering. + Expr::Sizeof(inner, _) => match inner.as_ref() { + Expr::Ident(name, _) => { + let ty = self.var_types.get(name).ok_or_else(|| { + codegen_error( + "sizeof(expr): tipo da variavel desconhecido no lowering", + Some("sizeof"), + ) + })?; + Ok(Operand::Const(ConstValue::Int(type_size(ty)?))) + } + _ => Err(codegen_error( + "sizeof(expr) so e suportado para identificadores simples neste backend", + Some("sizeof"), + )), + }, } } @@ -297,7 +325,8 @@ impl Lowerer { self.instrs.push(TacInstr::Return { val }); Ok(()) } - Stmt::VarDecl(_, name, init, _) => { + Stmt::VarDecl(qty, name, init, _) => { + self.declare_var_type(name, &qty.ty); if let Some(init) = init { let src = self.lower_expr(init)?; self.emit_copy(Operand::Var(name.clone()), src)?; @@ -450,6 +479,9 @@ pub fn lower_function(decl: &Decl) -> LowerResult { match decl { Decl::Function(_, name, params, body, _) => { let mut lowerer = Lowerer::new(); + for (qty, param_name) in params { + lowerer.declare_var_type(param_name, &qty.ty); + } for stmt in body { lowerer.lower_stmt(stmt)?; } diff --git a/tests/exe_smoke_test.rs b/tests/exe_smoke_test.rs index 3fb9657..77cbaf3 100644 --- a/tests/exe_smoke_test.rs +++ b/tests/exe_smoke_test.rs @@ -238,6 +238,26 @@ fn smoke_deref_increment_operators_run() { assert_eq!(status.code(), Some(16)); } +#[test] +fn smoke_sizeof_of_variables_runs() { + require_gcc!(); + + let status = compile_and_run( + "sizeof_vars", + "int main() { \ + int x = 7; \ + long l = 3; \ + char c = 'a'; \ + int *p = &x; \ + return sizeof(x) + sizeof(l) + sizeof(c) + sizeof(p); \ + }", + ); + + // sizeof(int) + sizeof(long) + sizeof(char) + sizeof(int*) = 4+8+1+8. + #[cfg(unix)] + assert_eq!(status.code(), Some(21)); +} + #[test] fn smoke_switch_fallthrough_runs() { require_gcc!(); From 10fb76ee3370ec2a474983cdf9ddd03cce17b10f Mon Sep 17 00:00:00 2001 From: Bappoz Date: Wed, 24 Jun 2026 02:34:03 -0300 Subject: [PATCH 71/91] feat(ir): suportar indexacao via ponteiro (p[i], leitura e escrita) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expr::Index era um stub de erro. Implementa o caso pratico mais relevante — indexacao sobre ponteiro (Type::Pointer), o padrao ja usado no fixture tests/integration/valid/pointers.c — reaproveitando Operand::Deref introduzido para deref (commit anterior): endereco = lower_expr(arr) + idx * sizeof(elemento) p[i] == *endereco (Operand::Deref(endereco)) Adiciona: - Lowerer::infer_type: inferencia de tipo bem limitada (identificador, deref, indice, cast) so para resolver sizeof(elemento) em arr[i]; fora desse subconjunto retorna erro explicito em vez de adivinhar. - Lowerer::lower_index_address: calcula o endereco acima, usado tanto em leitura (lower_expr) quanto em escrita (lower_assignment_target). Indexacao sobre array fixo (Type::Array, ex.: 'int arr[5];') continua sem suporte de proposito: o parser hoje descarta o tamanho do array ao montar a AST (ver parser/rules/declarations/types.rs), entao nao ha como o lowering saber quantos bytes reservar na stack para um array local. Rastrear esse tamanho e expor via Type e um trabalho separado e mais amplo (toca parser/AST/semantica), fora do escopo deste commit; o erro retornado para esse caso explica a limitacao. Adiciona smoke test cobrindo leitura/escrita via p[0] (equivalente definido a *p, sem incorrer no UB de indexar alem de um unico objeto) executado via gcc. --- src/ir/lower.rs | 85 +++++++++++++++++++++++++++++++++++++++-- tests/exe_smoke_test.rs | 20 ++++++++++ 2 files changed, 101 insertions(+), 4 deletions(-) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index dc8cc5f..aa7b43c 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -45,6 +45,77 @@ impl Lowerer { self.var_types.insert(name.to_string(), ty.clone()); } + /// Infere o tipo estatico de um subconjunto limitado de expressoes + /// (identificadores, deref, indice via ponteiro e cast) — o suficiente + /// para resolver o tamanho do elemento em `arr[i]`. Nao substitui a + /// analise semantica completa; expressoes fora desse subconjunto + /// retornam um erro explicito em vez de um palpite. + fn infer_type(&self, expr: &Expr) -> LowerResult { + match expr { + Expr::Ident(name, _) => self.var_types.get(name).cloned().ok_or_else(|| { + codegen_error( + "tipo de variavel desconhecido no lowering", + Some("type"), + ) + }), + Expr::Unary(UnOp::Deref, inner, _) => match self.infer_type(inner)? { + Type::Pointer(t) | Type::Array(t) => Ok(*t), + _ => Err(codegen_error( + "deref de valor que nao e ponteiro/array", + Some("type"), + )), + }, + Expr::Index(arr, _, _) => match self.infer_type(arr)? { + Type::Pointer(t) => Ok(*t), + Type::Array(_) => Err(codegen_error( + "indexacao de array fixo ainda nao suportada (tamanho do array nao e rastreado pelo lowering); indexacao via ponteiro funciona normalmente", + Some("index"), + )), + _ => Err(codegen_error( + "indexacao de valor que nao e ponteiro/array", + Some("index"), + )), + }, + Expr::Cast(qty, _, _) => Ok(qty.ty.clone()), + _ => Err(codegen_error( + "tipo de expressao nao inferido no lowering (suporte limitado a identificador, deref, indice e cast)", + Some("type"), + )), + } + } + + /// Calcula o endereco (em bytes) de `arr[idx]`, assumindo que `arr` e um + /// ponteiro: `endereco = lower_expr(arr) + idx * sizeof(elemento)`. + fn lower_index_address(&mut self, arr: &Expr, idx: &Expr) -> LowerResult { + let elem_ty = self.infer_type(arr)?; + let elem_size = type_size(&elem_ty)?; + + let base_ptr = self.lower_expr(arr)?; + let idx_op = self.lower_expr(idx)?; + + let offset = if elem_size == 1 { + idx_op + } else { + let scaled = self.fresh_temp(); + self.instrs.push(TacInstr::BinOp { + dst: scaled, + op: BinOp::Mul, + lhs: idx_op, + rhs: Operand::Const(ConstValue::Int(elem_size)), + }); + Operand::Temp(scaled) + }; + + let addr = self.fresh_temp(); + self.instrs.push(TacInstr::BinOp { + dst: addr, + op: BinOp::Add, + lhs: base_ptr, + rhs: offset, + }); + Ok(Operand::Temp(addr)) + } + pub fn lower_expr(&mut self, expr: &Expr) -> LowerResult { match expr { Expr::Literal(value, _) => Ok(Operand::Const(lower_literal(value))), @@ -141,10 +212,10 @@ impl Lowerer { self.instrs.push(TacInstr::Label(end_label)); Ok(Operand::Temp(dst)) } - Expr::Index(_, _, _) => Err(codegen_error( - "acesso por indice nao suportado no lowering", - Some("index"), - )), + Expr::Index(arr, idx, _) => { + let addr = self.lower_index_address(arr, idx)?; + Ok(Operand::Deref(Box::new(addr))) + } Expr::Member(_, _, _, _) => Err(codegen_error( "acesso a membro nao suportado no lowering", Some("member"), @@ -439,6 +510,12 @@ impl Lowerer { let ptr = self.lower_expr(inner)?; Ok(Operand::Deref(Box::new(ptr))) } + // `arr[i] = x;` (com `arr` ponteiro): mesmo enderecamento usado + // na leitura, via `lower_index_address`. + Expr::Index(arr, idx, _) => { + let addr = self.lower_index_address(arr, idx)?; + Ok(Operand::Deref(Box::new(addr))) + } _ => Err(codegen_error( "destino de atribuicao nao suportado no lowering", Some("assign"), diff --git a/tests/exe_smoke_test.rs b/tests/exe_smoke_test.rs index 77cbaf3..47d80d2 100644 --- a/tests/exe_smoke_test.rs +++ b/tests/exe_smoke_test.rs @@ -238,6 +238,26 @@ fn smoke_deref_increment_operators_run() { assert_eq!(status.code(), Some(16)); } +#[test] +fn smoke_pointer_index_read_and_write_runs() { + require_gcc!(); + + let status = compile_and_run( + "pointer_index", + "int sum_via_index(int *p) { \ + p[0] = 10; \ + return p[0] + 5; \ + } \ + int main() { \ + int x = 1; \ + return sum_via_index(&x); \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(15)); +} + #[test] fn smoke_sizeof_of_variables_runs() { require_gcc!(); From d703affda21ae0e33f53355fe99fadd757b1ead8 Mon Sep 17 00:00:00 2001 From: Bappoz Date: Wed, 24 Jun 2026 02:37:21 -0300 Subject: [PATCH 72/91] refactor(codegen): generalizar Frame para slots de tamanho variavel Preparacao para suportar variaveis cujo valor nao cabe em 8 bytes (structs locais, no proximo commit). Frame::allocate_local hoje da a toda var/temp exatamente um slot fixo de 8 bytes; generaliza para allocate_local_sized(key, size), que reserva um bloco contiguo de 'size' bytes (arredondado para multiplo de 8) e retorna o offset do seu primeiro byte (a base, usada depois como 'offset_of(var) + offset_do_campo' para acessar membros). allocate_local passa a ser allocate_local_sized(key, 8). A sequencia de offsets para alocacoes de 8 bytes (-8, -16, -24, ...) e preservada exatamente: o cursor agora comeca em 0 e decrementa antes de atribuir (em vez de atribuir e decrementar depois), o que da o mesmo resultado para blocos de 8 bytes e generaliza de forma continua para blocos maiores. frame_size() tambem deixa de ser local_slot_count() * 8 (contagem de entradas) e passa a vir direto do cursor de alocacao em bytes, ja que um bloco de struct conta como varios 'slots' de 8 bytes de contabilidade mas deve ser medido pelos bytes reais usados. Nenhuma mudanca de comportamento observavel: suite completa (cargo test --all) permanece verde, incluindo os testes unitarios de frame.rs que fixam os offsets -8/-16/-24 e o tamanho de frame alinhado em 16. --- src/codegen/last/frame.rs | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/codegen/last/frame.rs b/src/codegen/last/frame.rs index ebf1658..05bd762 100644 --- a/src/codegen/last/frame.rs +++ b/src/codegen/last/frame.rs @@ -63,24 +63,41 @@ impl Default for Frame { } impl Frame { - /// Cria um frame vazio. Os slots locais comecam em `-8` e decrescem. + /// Cria um frame vazio. O primeiro slot local alocado comeca em `-8` + /// (ou em `-tamanho`, arredondado para 8, se maior que 8 bytes). pub fn new() -> Self { Self { offsets: HashMap::new(), - next_local_offset: -8, + next_local_offset: 0, } } - /// Aloca (se ainda nao existir) e retorna o offset de `key`. - /// - /// Slots alocados por aqui sao sempre locais (offsets negativos). + /// Aloca (se ainda nao existir) e retorna o offset de `key`, reservando + /// exatamente um slot de 8 bytes. Equivalente a + /// `allocate_local_sized(key, 8)` — usado por temporarios e variaveis + /// escalares/ponteiro, que sempre cabem em 8 bytes neste backend. pub fn allocate_local(&mut self, key: SlotKey) -> i64 { + self.allocate_local_sized(key, 8) + } + + /// Aloca (se ainda nao existir) um bloco contiguo de `size` bytes + /// (arredondado para multiplo de 8) e retorna o offset do seu primeiro + /// byte (o endereco mais baixo do bloco). Usado para variaveis cujo + /// valor nao cabe em 8 bytes, como structs: campos sao acessados como + /// `offset_of(var) + offset_do_campo`. + /// + /// Aloca sempre decrementando primeiro o cursor de offset, depois + /// atribuindo: isso preserva a sequencia historica de offsets + /// (-8, -16, -24, ...) para alocacoes de 8 bytes, e generaliza de forma + /// continua para blocos maiores. + pub fn allocate_local_sized(&mut self, key: SlotKey, size: i64) -> i64 { if let Some(&offset) = self.offsets.get(&key) { return offset; } + let aligned_size = align_up(size.max(1), 8); + self.next_local_offset -= aligned_size; let offset = self.next_local_offset; self.offsets.insert(key, offset); - self.next_local_offset -= 8; offset } @@ -103,9 +120,12 @@ impl Frame { /// Tamanho total do frame em bytes, alinhado em 16. /// /// Retorna 0 quando nao ha slots locais (funcao "leaf" sem frame). + /// Calculado a partir do cursor de alocacao (`next_local_offset`), e + /// nao de `local_slot_count() * 8`: blocos maiores que 8 bytes (ex.: + /// structs via `allocate_local_sized`) ocupam mais de um "slot" de + /// contabilidade, mas devem ser contados pelo numero real de bytes. pub fn frame_size(&self) -> i64 { - let raw = (self.local_slot_count() as i64) * 8; - align_up(raw, 16) + align_up(-self.next_local_offset, 16) } } From e111321374a8a70742a33ccb6fa0bf2bc9cc22a3 Mon Sep 17 00:00:00 2001 From: Bappoz Date: Wed, 24 Jun 2026 02:48:05 -0300 Subject: [PATCH 73/91] feat(ir,codegen): suportar acesso a membro de struct (s.campo, p->campo) Expr::Member era um stub de erro. Implementa structs LOCAIS (variaveis declaradas dentro de uma funcao); structs globais continuam fora de escopo porque nao ha codegen para Decl::GlobalVar neste backend (lacuna separada e maior, nao tocada aqui) - usos de campo de struct global falham com erro explicito de tipo desconhecido, sem produzir codigo incorreto. Mudancas: - ir/lower.rs: calcula o layout de cada struct do programa uma vez em lower_program (build_struct_layouts/compute_struct_layout: offset e tipo resolvido de cada campo, com alinhamento natural por tamanho de campo), junto com a tabela de typedef (build_typedefs/resolve_alias, necessaria porque typedef struct Point Point; e comum). Os dois contextos sao compartilhados entre as funcoes via Lowerer::with_context. - lower_member_address calcula endereco da struct base + offset do campo, reaproveitando Operand::Deref (commit anterior) tanto para leitura (lower_expr) quanto escrita (lower_assignment_target). - lower_address_of generaliza a obtencao de endereco (nao valor) de um lvalue: identificador (via UnOp::AddrOf), deref (== o proprio ponteiro), indice via ponteiro e membro de struct, permitindo aninhamento entre eles. - infer_type ganha o caso Member e passa a resolver aliases de typedef em todos os tipos que retorna. - Guarda explicita: atribuicao direta entre structs maiores que 8 bytes agora e um erro, em vez de copiar silenciosamente so os primeiros 8 bytes (este backend so move 1 quadword por Copy). Structs de ate 8 bytes continuam corretos. - ir/tac.rs: TacFunction ganha var_sizes (nome da variavel -> tamanho em bytes), preenchido a partir do layout das structs locais. - codegen/last/x86_64.rs: build_frame consulta var_sizes para alocar um bloco do tamanho correto (via Frame::allocate_local_sized, do commit anterior) para variaveis struct; demais continuam com 8 bytes. - Ajusta os testes existentes que construiam TacFunction literalmente (tests/codegen_smoke.rs, src/ir/cfg.rs, src/codegen/last/x86_64.rs) para o novo campo var_sizes. Adiciona smoke tests cobrindo struct local (leitura/escrita de campo), acesso via ponteiro (arrow, inclusive atraves de parametro de funcao) e struct maior que 8 bytes (3 campos long = 24 bytes), executados via gcc. --- src/codegen/last/x86_64.rs | 15 +- src/ir/cfg.rs | 1 + src/ir/lower.rs | 349 ++++++++++++++++++++++++++++++++++--- src/ir/tac.rs | 7 +- tests/codegen_smoke.rs | 6 + tests/exe_smoke_test.rs | 66 ++++++- 6 files changed, 412 insertions(+), 32 deletions(-) diff --git a/src/codegen/last/x86_64.rs b/src/codegen/last/x86_64.rs index f7d9502..8cb90cd 100644 --- a/src/codegen/last/x86_64.rs +++ b/src/codegen/last/x86_64.rs @@ -212,14 +212,20 @@ fn emit_function(func: &TacFunction, strings: &StringPool) -> EmitResult /// Constroi o stack frame pre-escaneando todas as instrucoes para alocar um /// slot para cada temp/variavel e mapear os parametros para suas posicoes. +/// +/// Variaveis listadas em `func.var_sizes` (hoje, structs locais) recebem um +/// bloco contiguo do tamanho indicado em vez do slot escalar padrao de 8 +/// bytes; demais variaveis e temporarios usam sempre 8 bytes. fn build_frame(func: &TacFunction) -> Frame { let mut frame = Frame::new(); + let size_of = |name: &str| func.var_sizes.get(name).copied().unwrap_or(8); + for (index, name) in func.params.iter().enumerate() { let key = SlotKey::Var(name.clone()); match abi::arg_register(index) { Some(_) => { - frame.allocate_local(key); + frame.allocate_local_sized(key, size_of(name)); } None => { // Argumento passado via stack do chamador: ja esta disponivel @@ -235,7 +241,11 @@ fn build_frame(func: &TacFunction) -> Frame { if frame.offset_of(&key).is_some() { continue; } - frame.allocate_local(key); + let size = match &key { + SlotKey::Var(name) => size_of(name), + SlotKey::Temp(_) => 8, + }; + frame.allocate_local_sized(key, size); } } @@ -643,6 +653,7 @@ mod tests { name: name.to_string(), params: params.into_iter().map(String::from).collect(), instrs, + var_sizes: Default::default(), } } diff --git a/src/ir/cfg.rs b/src/ir/cfg.rs index 48fa4b0..6eff37d 100644 --- a/src/ir/cfg.rs +++ b/src/ir/cfg.rs @@ -177,6 +177,7 @@ mod tests { name: "test".to_string(), params: Vec::new(), instrs, + var_sizes: Default::default(), } } diff --git a/src/ir/lower.rs b/src/ir/lower.rs index aa7b43c..5fe7b70 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -1,25 +1,44 @@ use crate::common::ast::{ - ast::{Program, Type}, + ast::{Program, QualifierType, Type}, decl::Decl, - expr::{BinOp, Expr, Literal, PostfixOp, PrefixOp, UnOp}, + expr::{BinOp, Expr, Literal, MemberAccess, PostfixOp, PrefixOp, UnOp}, stmt::{Stmt, SwitchLabel}, }; use crate::common::errors::types::CodegenError; use crate::ir::tac::{ ConstValue, LabelGen, LabelId, Operand, TacFunction, TacInstr, TacProgram, TempGen, TempId, }; +use std::collections::HashMap; type LowerResult = Result; +/// Layout calculado de uma struct: offset (em bytes, a partir do endereco +/// base da struct) e tipo resolvido de cada campo, mais o tamanho total +/// (arredondado para a maior alinhamento entre os campos). +#[derive(Debug, Clone)] +struct StructLayout { + fields: Vec<(String, i64, Type)>, + size: i64, +} + #[derive(Debug, Clone)] pub struct Lowerer { temps: TempGen, labels: LabelGen, instrs: Vec, /// Tipo declarado de cada variavel/parametro visto ate agora na funcao - /// atual. Usado apenas para resolver `sizeof(expr)`; nao substitui a - /// analise semantica (que ja validou o programa antes do lowering). - var_types: std::collections::HashMap, + /// atual. Usado para resolver `sizeof(expr)`, indexacao via ponteiro e + /// acesso a membro; nao substitui a analise semantica (que ja validou o + /// programa antes do lowering). + var_types: HashMap, + /// Layout (offsets + tamanho) de cada struct declarada no programa, + /// calculado uma vez em `lower_program` e compartilhado entre as + /// funcoes. Vazio quando o `Lowerer` e usado isoladamente (ex.: testes + /// unitarios deste modulo via `Lowerer::new()`). + struct_layouts: HashMap, + /// Tabela de `typedef`: nome do alias -> tipo subjacente (ainda podendo + /// ser outro alias, resolvido por `resolve_alias`). + typedefs: HashMap, } #[derive(Debug, Clone, Copy, Default)] @@ -30,11 +49,20 @@ struct ControlLabels { impl Lowerer { pub fn new() -> Self { + Self::with_context(HashMap::new(), HashMap::new()) + } + + fn with_context( + struct_layouts: HashMap, + typedefs: HashMap, + ) -> Self { Self { temps: TempGen::new(), labels: LabelGen::new(), instrs: Vec::new(), - var_types: std::collections::HashMap::new(), + var_types: HashMap::new(), + struct_layouts, + typedefs, } } @@ -45,28 +73,43 @@ impl Lowerer { self.var_types.insert(name.to_string(), ty.clone()); } - /// Infere o tipo estatico de um subconjunto limitado de expressoes - /// (identificadores, deref, indice via ponteiro e cast) — o suficiente - /// para resolver o tamanho do elemento em `arr[i]`. Nao substitui a - /// analise semantica completa; expressoes fora desse subconjunto - /// retornam um erro explicito em vez de um palpite. + /// Tamanho (em bytes) de cada variavel cujo valor nao cabe num slot + /// escalar de 8 bytes — hoje, apenas structs locais. Chamado ao final do + /// lowering de uma funcao, para popular `TacFunction::var_sizes`. + fn compute_var_sizes(&self) -> HashMap { + let mut sizes = HashMap::new(); + for (name, ty) in &self.var_types { + if let Type::Struct(struct_name) = resolve_alias(ty, &self.typedefs) { + if let Some(layout) = self.struct_layouts.get(&struct_name) { + sizes.insert(name.clone(), layout.size); + } + } + } + sizes + } + + /// Infere o tipo estatico (ja resolvido de aliases de `typedef`) de um + /// subconjunto limitado de expressoes (identificadores, deref, indice + /// via ponteiro, membro de struct e cast) — o suficiente para resolver + /// o tamanho do elemento em `arr[i]` e o offset de campo em `s.campo`. + /// Nao substitui a analise semantica completa; expressoes fora desse + /// subconjunto retornam um erro explicito em vez de um palpite. fn infer_type(&self, expr: &Expr) -> LowerResult { match expr { - Expr::Ident(name, _) => self.var_types.get(name).cloned().ok_or_else(|| { - codegen_error( - "tipo de variavel desconhecido no lowering", - Some("type"), - ) - }), + Expr::Ident(name, _) => self + .var_types + .get(name) + .map(|ty| resolve_alias(ty, &self.typedefs)) + .ok_or_else(|| codegen_error("tipo de variavel desconhecido no lowering", Some("type"))), Expr::Unary(UnOp::Deref, inner, _) => match self.infer_type(inner)? { - Type::Pointer(t) | Type::Array(t) => Ok(*t), + Type::Pointer(t) | Type::Array(t) => Ok(resolve_alias(&t, &self.typedefs)), _ => Err(codegen_error( "deref de valor que nao e ponteiro/array", Some("type"), )), }, Expr::Index(arr, _, _) => match self.infer_type(arr)? { - Type::Pointer(t) => Ok(*t), + Type::Pointer(t) => Ok(resolve_alias(&t, &self.typedefs)), Type::Array(_) => Err(codegen_error( "indexacao de array fixo ainda nao suportada (tamanho do array nao e rastreado pelo lowering); indexacao via ponteiro funciona normalmente", Some("index"), @@ -76,14 +119,69 @@ impl Lowerer { Some("index"), )), }, - Expr::Cast(qty, _, _) => Ok(qty.ty.clone()), + Expr::Member(obj, access, field, _) => { + let layout = self.struct_layout_of_member_base(obj, access)?; + layout + .fields + .iter() + .find(|(name, _, _)| name == field) + .map(|(_, _, ty)| ty.clone()) + .ok_or_else(|| { + codegen_error("campo de struct desconhecido no lowering", Some("member")) + }) + } + Expr::Cast(qty, _, _) => Ok(resolve_alias(&qty.ty, &self.typedefs)), _ => Err(codegen_error( - "tipo de expressao nao inferido no lowering (suporte limitado a identificador, deref, indice e cast)", + "tipo de expressao nao inferido no lowering (suporte limitado a identificador, deref, indice, membro e cast)", Some("type"), )), } } + /// Resolve o layout da struct base de um acesso a membro: para `.`, + /// `obj` deve ser a propria struct; para `->`, `obj` deve ser um + /// ponteiro para struct. + fn struct_layout_of_member_base( + &self, + obj: &Expr, + access: &MemberAccess, + ) -> LowerResult<&StructLayout> { + let struct_name = match access { + MemberAccess::Direct => match self.infer_type(obj)? { + Type::Struct(name) => name, + _ => { + return Err(codegen_error( + "acesso '.' em valor que nao e struct", + Some("member"), + )) + } + }, + MemberAccess::Pointer => match self.infer_type(obj)? { + Type::Pointer(inner) => match resolve_alias(&inner, &self.typedefs) { + Type::Struct(name) => name, + _ => { + return Err(codegen_error( + "acesso '->' em ponteiro que nao e para struct", + Some("member"), + )) + } + }, + _ => { + return Err(codegen_error( + "acesso '->' em valor que nao e ponteiro", + Some("member"), + )) + } + }, + }; + self.struct_layouts.get(&struct_name).ok_or_else(|| { + codegen_error( + "layout de struct desconhecido no lowering (campo agregado aninhado nao suportado, ou struct nunca declarada)", + Some("member"), + ) + }) + } + /// Calcula o endereco (em bytes) de `arr[idx]`, assumindo que `arr` e um /// ponteiro: `endereco = lower_expr(arr) + idx * sizeof(elemento)`. fn lower_index_address(&mut self, arr: &Expr, idx: &Expr) -> LowerResult { @@ -116,6 +214,69 @@ impl Lowerer { Ok(Operand::Temp(addr)) } + /// Calcula o endereco (em bytes) de `obj.campo` ou `obj->campo`: + /// endereco da struct base + offset do campo no layout. + fn lower_member_address( + &mut self, + obj: &Expr, + access: &MemberAccess, + field: &str, + ) -> LowerResult { + let field_offset = { + let layout = self.struct_layout_of_member_base(obj, access)?; + layout + .fields + .iter() + .find(|(name, _, _)| name == field) + .map(|(_, offset, _)| *offset) + .ok_or_else(|| { + codegen_error("campo de struct desconhecido no lowering", Some("member")) + })? + }; + + let base_addr = match access { + MemberAccess::Direct => self.lower_address_of(obj)?, + MemberAccess::Pointer => self.lower_expr(obj)?, + }; + + if field_offset == 0 { + return Ok(base_addr); + } + + let addr = self.fresh_temp(); + self.instrs.push(TacInstr::BinOp { + dst: addr, + op: BinOp::Add, + lhs: base_addr, + rhs: Operand::Const(ConstValue::Int(field_offset)), + }); + Ok(Operand::Temp(addr)) + } + + /// Calcula o *endereco* (nao o valor) de uma expressao-lvalue: + /// identificador, `*p` (endereco = o proprio `p`), `arr[i]` ou + /// `obj.campo`/`obj->campo`. + fn lower_address_of(&mut self, expr: &Expr) -> LowerResult { + match expr { + Expr::Ident(name, _) => { + let temp = self.fresh_temp(); + self.instrs.push(TacInstr::UnOp { + dst: temp, + op: UnOp::AddrOf, + src: Operand::Var(name.clone()), + }); + Ok(Operand::Temp(temp)) + } + Expr::Unary(UnOp::Deref, inner, _) => self.lower_expr(inner), + Expr::Index(arr, idx, _) => self.lower_index_address(arr, idx), + Expr::Member(obj, access, field, _) => self.lower_member_address(obj, access, field), + _ => Err(codegen_error( + "nao e possivel obter o endereco desta expressao no lowering", + Some("addr"), + )), + } + } + pub fn lower_expr(&mut self, expr: &Expr) -> LowerResult { match expr { Expr::Literal(value, _) => Ok(Operand::Const(lower_literal(value))), @@ -216,10 +377,10 @@ impl Lowerer { let addr = self.lower_index_address(arr, idx)?; Ok(Operand::Deref(Box::new(addr))) } - Expr::Member(_, _, _, _) => Err(codegen_error( - "acesso a membro nao suportado no lowering", - Some("member"), - )), + Expr::Member(obj, access, field, _) => { + let addr = self.lower_member_address(obj, access, field)?; + Ok(Operand::Deref(Box::new(addr))) + } // `sizeof(expr)`: o caso pratico mais comum e `sizeof(variavel)`. // O tipo declarado de identificadores e rastreado em // `var_types` (preenchido a partir de parametros e `VarDecl`); @@ -502,7 +663,31 @@ impl Lowerer { fn lower_assignment_target(&mut self, expr: &Expr) -> LowerResult { match expr { - Expr::Ident(name, _) => Ok(Operand::Var(name.clone())), + Expr::Ident(name, _) => { + // Atribuicao direta entre structs (`q = p;`) copiaria o + // valor inteiro da struct; este backend so move 8 bytes por + // `Copy` (todo o resto do codegen trata cada valor como um + // unico quadword). Para structs que cabem em 8 bytes isso + // ja funciona corretamente de gracas; para maiores, + // copiaria so os 8 primeiros bytes silenciosamente — + // recusa explicitamente em vez disso. + if let Some(ty) = self.var_types.get(name) { + if let Type::Struct(struct_name) = resolve_alias(ty, &self.typedefs) { + let size = self + .struct_layouts + .get(&struct_name) + .map(|l| l.size) + .unwrap_or(8); + if size > 8 { + return Err(codegen_error( + "atribuicao direta entre structs maiores que 8 bytes nao suportada neste backend (copie campo a campo)", + Some("assign"), + )); + } + } + } + Ok(Operand::Var(name.clone())) + } // `*p` como destino (`*p = x;`, `*p += 1;`, `(*p)++` etc.): o // ponteiro em si e um rvalue comum, mas o destino da escrita e o // endereco para o qual ele aponta. @@ -516,6 +701,12 @@ impl Lowerer { let addr = self.lower_index_address(arr, idx)?; Ok(Operand::Deref(Box::new(addr))) } + // `obj.campo = x;` / `obj->campo = x;`: mesmo enderecamento + // usado na leitura, via `lower_member_address`. + Expr::Member(obj, access, field, _) => { + let addr = self.lower_member_address(obj, access, field)?; + Ok(Operand::Deref(Box::new(addr))) + } _ => Err(codegen_error( "destino de atribuicao nao suportado no lowering", Some("assign"), @@ -553,9 +744,17 @@ impl Default for Lowerer { } pub fn lower_function(decl: &Decl) -> LowerResult { + lower_function_with_context(decl, &HashMap::new(), &HashMap::new()) +} + +fn lower_function_with_context( + decl: &Decl, + struct_layouts: &HashMap, + typedefs: &HashMap, +) -> LowerResult { match decl { Decl::Function(_, name, params, body, _) => { - let mut lowerer = Lowerer::new(); + let mut lowerer = Lowerer::with_context(struct_layouts.clone(), typedefs.clone()); for (qty, param_name) in params { lowerer.declare_var_type(param_name, &qty.ty); } @@ -563,10 +762,12 @@ pub fn lower_function(decl: &Decl) -> LowerResult { lowerer.lower_stmt(stmt)?; } + let var_sizes = lowerer.compute_var_sizes(); Ok(TacFunction { name: name.clone(), params: params.iter().map(|(_, name)| name.clone()).collect(), instrs: lowerer.finish(), + var_sizes, }) } _ => Err(codegen_error( @@ -577,16 +778,108 @@ pub fn lower_function(decl: &Decl) -> LowerResult { } pub fn lower_program(prog: &Program) -> LowerResult { + let typedefs = build_typedefs(prog); + let struct_layouts = build_struct_layouts(prog, &typedefs); + let mut functions = Vec::new(); for decl in &prog.decls { if matches!(decl, Decl::Function(..)) { - functions.push(lower_function(decl)?); + functions.push(lower_function_with_context( + decl, + &struct_layouts, + &typedefs, + )?); } } Ok(TacProgram { functions }) } +/// Segue a cadeia de `Type::Alias` ate um tipo concreto, usando a tabela de +/// `typedef` do programa. Limita a profundidade para nao travar em alias +/// ciclico malformado; nesse caso, devolve o alias original sem resolver. +fn resolve_alias(ty: &Type, typedefs: &HashMap) -> Type { + let mut current = ty.clone(); + for _ in 0..8 { + match current { + Type::Alias(name) => match typedefs.get(&name) { + Some(next) => current = next.clone(), + None => return Type::Alias(name), + }, + other => return other, + } + } + current +} + +/// Coleta `nome -> tipo subjacente` de todo `Decl::Typedef` do programa. +fn build_typedefs(prog: &Program) -> HashMap { + let mut map = HashMap::new(); + for decl in &prog.decls { + if let Decl::Typedef(qty, name, _) = decl { + map.insert(name.clone(), qty.ty.clone()); + } + } + map +} + +/// Calcula o layout de cada `Decl::StructDecl` do programa. Structs cujo +/// layout nao pode ser calculado (campo agregado aninhado — struct/array +/// dentro de struct, ainda nao suportado) sao simplesmente omitidas: usos de +/// `Expr::Member` sobre elas falham depois, no lowering, com um erro +/// explicito em vez de um layout incorreto. +fn build_struct_layouts( + prog: &Program, + typedefs: &HashMap, +) -> HashMap { + let mut layouts = HashMap::new(); + for decl in &prog.decls { + if let Decl::StructDecl(name, fields, _) = decl { + if let Ok(layout) = compute_struct_layout(fields, typedefs) { + layouts.insert(name.clone(), layout); + } + } + } + layouts +} + +fn compute_struct_layout( + fields: &[(QualifierType, String)], + typedefs: &HashMap, +) -> LowerResult { + let mut offset = 0i64; + let mut max_align = 1i64; + let mut laid_out = Vec::with_capacity(fields.len()); + + for (qty, field_name) in fields { + let resolved = resolve_alias(&qty.ty, typedefs); + let size = type_size(&resolved).map_err(|_| { + codegen_error( + "campo de struct com tipo agregado (struct/array) aninhado nao suportado neste backend", + Some("struct"), + ) + })?; + + offset = align_up(offset, size); + laid_out.push((field_name.clone(), offset, resolved)); + offset += size; + max_align = max_align.max(size); + } + + let total = align_up(offset, max_align.max(1)).max(1); + Ok(StructLayout { + fields: laid_out, + size: total, + }) +} + +fn align_up(value: i64, alignment: i64) -> i64 { + if alignment <= 0 { + return value; + } + (value + alignment - 1) / alignment * alignment +} + /// Gera o TAC e aplica todas as otimizações básicas (constant folding, /// constant propagation e dead code elimination) até ponto fixo. /// diff --git a/src/ir/tac.rs b/src/ir/tac.rs index b975324..6ae6852 100644 --- a/src/ir/tac.rs +++ b/src/ir/tac.rs @@ -109,11 +109,16 @@ pub enum TacInstr { Label(LabelId), } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Default)] pub struct TacFunction { pub name: String, pub params: Vec, pub instrs: Vec, + /// Tamanho em bytes (ja arredondado para multiplo de 8) de variaveis + /// cujo valor nao cabe num slot escalar de 8 bytes — hoje, apenas + /// structs locais. Variaveis ausentes daqui usam o tamanho padrao (8 + /// bytes) no codegen. + pub var_sizes: std::collections::HashMap, } #[derive(Debug, Clone, PartialEq)] diff --git a/tests/codegen_smoke.rs b/tests/codegen_smoke.rs index 721beae..fc801b9 100644 --- a/tests/codegen_smoke.rs +++ b/tests/codegen_smoke.rs @@ -46,6 +46,7 @@ fn build_soma_program() -> TacProgram { val: Some(Operand::Temp(TempId(0))), }, ], + var_sizes: Default::default(), }; let main = TacFunction { @@ -64,6 +65,7 @@ fn build_soma_program() -> TacProgram { val: Some(Operand::Temp(TempId(0))), }, ], + var_sizes: Default::default(), }; TacProgram { @@ -145,6 +147,7 @@ fn smoke_simple_return_const_runs() { instrs: vec![TacInstr::Return { val: Some(Operand::Const(ConstValue::Int(42))), }], + var_sizes: Default::default(), }], }; @@ -231,6 +234,7 @@ fn smoke_call_with_more_than_six_args_runs() { val: Some(Operand::Temp(TempId(7))), }, ], + var_sizes: Default::default(), }; let main = TacFunction { @@ -248,6 +252,7 @@ fn smoke_call_with_more_than_six_args_runs() { val: Some(Operand::Temp(TempId(0))), }, ], + var_sizes: Default::default(), }; let prog = TacProgram { @@ -303,6 +308,7 @@ fn smoke_control_flow_if_else_runs() { val: Some(Operand::Const(ConstValue::Int(20))), }, ], + var_sizes: Default::default(), }], }; diff --git a/tests/exe_smoke_test.rs b/tests/exe_smoke_test.rs index 47d80d2..e75c6ad 100644 --- a/tests/exe_smoke_test.rs +++ b/tests/exe_smoke_test.rs @@ -54,7 +54,8 @@ fn compile_to_asm(source: &str) -> String { .parse_program() .unwrap_or_else(|errors| panic!("erros de parser inesperados: {errors:?}")); - let sem_errors = analyse_with_builtins(&program, scanner.builtins); + let sem_diagnostics = analyse_with_builtins(&program, scanner.builtins); + let sem_errors: Vec<_> = sem_diagnostics.iter().filter(|d| d.is_error()).collect(); assert!( sem_errors.is_empty(), "erros semanticos inesperados: {sem_errors:?}" @@ -258,6 +259,69 @@ fn smoke_pointer_index_read_and_write_runs() { assert_eq!(status.code(), Some(15)); } +#[test] +fn smoke_struct_member_read_and_write_runs() { + require_gcc!(); + + let status = compile_and_run( + "struct_member", + "struct Point { int x; int y; }; \ + int main() { \ + struct Point p; \ + p.x = 3; \ + p.y = 4; \ + return p.x + p.y; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(7)); +} + +#[test] +fn smoke_struct_member_via_pointer_arrow_runs() { + require_gcc!(); + + let status = compile_and_run( + "struct_member_arrow", + "struct Point { int x; int y; }; \ + void move_point(struct Point *p, int dx, int dy) { \ + p->x = p->x + dx; \ + p->y = p->y + dy; \ + } \ + int main() { \ + struct Point p; \ + p.x = 1; \ + p.y = 2; \ + move_point(&p, 10, 20); \ + return p.x + p.y; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(33)); +} + +#[test] +fn smoke_struct_larger_than_eight_bytes_runs() { + require_gcc!(); + + let status = compile_and_run( + "struct_big", + "struct Big { long a; long b; long c; }; \ + int main() { \ + struct Big big; \ + big.a = 1; \ + big.b = 2; \ + big.c = 3; \ + return big.a + big.b + big.c; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(6)); +} + #[test] fn smoke_sizeof_of_variables_runs() { require_gcc!(); From 3dfbabffeb2b3dcf78b0c615cbf711798c6b5cbc Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Wed, 24 Jun 2026 17:42:37 -0300 Subject: [PATCH 74/91] feat(ir): represent global variables in TAC --- src/ir/tac.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/ir/tac.rs b/src/ir/tac.rs index 6ae6852..61714e4 100644 --- a/src/ir/tac.rs +++ b/src/ir/tac.rs @@ -65,7 +65,10 @@ pub enum ConstValue { #[derive(Debug, Clone, PartialEq)] pub enum Operand { Temp(TempId), + /// Variavel automatica da funcao atual, residente no stack frame. Var(String), + /// Objeto com duracao de armazenamento estatica, referenciado por simbolo. + Global(String), Const(ConstValue), /// Endereco indireto: o ponteiro guardado em `Operand` interno e lido (ou /// escrito, quando usado como destino de `Copy`) atraves de deref, ex.: @@ -122,7 +125,18 @@ pub struct TacFunction { } #[derive(Debug, Clone, PartialEq)] +pub struct TacGlobal { + pub name: String, + /// Espaco reservado pelo backend. E no minimo um quadword porque o + /// backend escalar atual faz loads/stores de 64 bits. + pub size: i64, + /// `None` representa a inicializacao estatica implicita com zero. + pub init: Option, +} + +#[derive(Debug, Clone, PartialEq, Default)] pub struct TacProgram { + pub globals: Vec, pub functions: Vec, } @@ -154,6 +168,7 @@ impl fmt::Display for Operand { match self { Operand::Temp(temp) => write!(f, "{temp}"), Operand::Var(name) => write!(f, "{name}"), + Operand::Global(name) => write!(f, "@{name}"), Operand::Const(value) => write!(f, "{value}"), Operand::Deref(inner) => write!(f, "*{inner}"), } From 355f88050323e78c1d0a887f77665f7a0c7c04a7 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Wed, 24 Jun 2026 17:42:43 -0300 Subject: [PATCH 75/91] feat(lowering): lower global variable declarations --- src/ir/lower.rs | 254 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 222 insertions(+), 32 deletions(-) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index 5fe7b70..b438ad8 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -6,7 +6,8 @@ use crate::common::ast::{ }; use crate::common::errors::types::CodegenError; use crate::ir::tac::{ - ConstValue, LabelGen, LabelId, Operand, TacFunction, TacInstr, TacProgram, TempGen, TempId, + ConstValue, LabelGen, LabelId, Operand, TacFunction, TacGlobal, TacInstr, TacProgram, TempGen, + TempId, }; use std::collections::HashMap; @@ -31,6 +32,12 @@ pub struct Lowerer { /// acesso a membro; nao substitui a analise semantica (que ja validou o /// programa antes do lowering). var_types: HashMap, + /// Declaracoes locais atualmente visiveis, uma tabela por escopo lexico. + /// Evita que um local de bloco continue escondendo um global apos `}`. + local_scopes: Vec>, + /// Tipos dos objetos no nivel de arquivo. Mantidos separados dos locais + /// para que um nome local sempre tenha precedencia durante o lowering. + global_types: HashMap, /// Layout (offsets + tamanho) de cada struct declarada no programa, /// calculado uma vez em `lower_program` e compartilhado entre as /// funcoes. Vazio quando o `Lowerer` e usado isoladamente (ex.: testes @@ -49,18 +56,21 @@ struct ControlLabels { impl Lowerer { pub fn new() -> Self { - Self::with_context(HashMap::new(), HashMap::new()) + Self::with_context(HashMap::new(), HashMap::new(), HashMap::new()) } fn with_context( struct_layouts: HashMap, typedefs: HashMap, + global_types: HashMap, ) -> Self { Self { temps: TempGen::new(), labels: LabelGen::new(), instrs: Vec::new(), var_types: HashMap::new(), + local_scopes: vec![HashMap::new()], + global_types, struct_layouts, typedefs, } @@ -71,6 +81,41 @@ impl Lowerer { /// `VarDecl` conforme o lowering avanca. fn declare_var_type(&mut self, name: &str, ty: &Type) { self.var_types.insert(name.to_string(), ty.clone()); + self.local_scopes + .last_mut() + .expect("lowerer sempre possui um escopo local") + .insert(name.to_string(), ty.clone()); + } + + fn type_of_var(&self, name: &str) -> Option<&Type> { + self.local_scopes + .iter() + .rev() + .find_map(|scope| scope.get(name)) + .or_else(|| self.global_types.get(name)) + } + + fn operand_for_var(&self, name: &str) -> Operand { + let is_local = self + .local_scopes + .iter() + .rev() + .any(|scope| scope.contains_key(name)); + if is_local || !self.global_types.contains_key(name) { + Operand::Var(name.to_string()) + } else { + Operand::Global(name.to_string()) + } + } + + fn with_local_scope( + &mut self, + lower: impl FnOnce(&mut Self) -> LowerResult, + ) -> LowerResult { + self.local_scopes.push(HashMap::new()); + let result = lower(self); + self.local_scopes.pop(); + result } /// Tamanho (em bytes) de cada variavel cujo valor nao cabe num slot @@ -97,8 +142,7 @@ impl Lowerer { fn infer_type(&self, expr: &Expr) -> LowerResult { match expr { Expr::Ident(name, _) => self - .var_types - .get(name) + .type_of_var(name) .map(|ty| resolve_alias(ty, &self.typedefs)) .ok_or_else(|| codegen_error("tipo de variavel desconhecido no lowering", Some("type"))), Expr::Unary(UnOp::Deref, inner, _) => match self.infer_type(inner)? { @@ -260,10 +304,11 @@ impl Lowerer { match expr { Expr::Ident(name, _) => { let temp = self.fresh_temp(); + let src = self.operand_for_var(name); self.instrs.push(TacInstr::UnOp { dst: temp, op: UnOp::AddrOf, - src: Operand::Var(name.clone()), + src, }); Ok(Operand::Temp(temp)) } @@ -280,7 +325,7 @@ impl Lowerer { pub fn lower_expr(&mut self, expr: &Expr) -> LowerResult { match expr { Expr::Literal(value, _) => Ok(Operand::Const(lower_literal(value))), - Expr::Ident(name, _) => Ok(Operand::Var(name.clone())), + Expr::Ident(name, _) => Ok(self.operand_for_var(name)), Expr::Binary(lhs, op, rhs, _) => { let lhs = self.lower_expr(lhs)?; let rhs = self.lower_expr(rhs)?; @@ -388,7 +433,7 @@ impl Lowerer { // informacao de tipo disponivel no lowering. Expr::Sizeof(inner, _) => match inner.as_ref() { Expr::Ident(name, _) => { - let ty = self.var_types.get(name).ok_or_else(|| { + let ty = self.type_of_var(name).ok_or_else(|| { codegen_error( "sizeof(expr): tipo da variavel desconhecido no lowering", Some("sizeof"), @@ -410,12 +455,12 @@ impl Lowerer { fn lower_stmt_with_control(&mut self, stmt: &Stmt, control: ControlLabels) -> LowerResult<()> { match stmt { - Stmt::Block(stmts, _) => { + Stmt::Block(stmts, _) => self.with_local_scope(|lowerer| { for stmt in stmts { - self.lower_stmt_with_control(stmt, control)?; + lowerer.lower_stmt_with_control(stmt, control)?; } Ok(()) - } + }), Stmt::If(cond, then_branch, else_branch, _) => { let cond = self.lower_expr(cond)?; let then_label = self.labels.fresh(); @@ -465,29 +510,29 @@ impl Lowerer { self.instrs.push(TacInstr::Label(end_label)); Ok(()) } - Stmt::For(init, cond, inc, body, _) => { + Stmt::For(init, cond, inc, body, _) => self.with_local_scope(|lowerer| { if let Some(init) = init { - self.lower_stmt_with_control(init, control)?; + lowerer.lower_stmt_with_control(init, control)?; } - let cond_label = self.labels.fresh(); - let body_label = self.labels.fresh(); - let inc_label = inc.as_ref().map(|_| self.labels.fresh()); - let end_label = self.labels.fresh(); + let cond_label = lowerer.labels.fresh(); + let body_label = lowerer.labels.fresh(); + let inc_label = inc.as_ref().map(|_| lowerer.labels.fresh()); + let end_label = lowerer.labels.fresh(); let continue_label = inc_label.unwrap_or(cond_label); - self.instrs.push(TacInstr::Label(cond_label)); + lowerer.instrs.push(TacInstr::Label(cond_label)); if let Some(cond) = cond { - let cond = self.lower_expr(cond)?; - self.instrs.push(TacInstr::CondJump { + let cond = lowerer.lower_expr(cond)?; + lowerer.instrs.push(TacInstr::CondJump { cond, then_label: body_label, else_label: end_label, }); } - self.instrs.push(TacInstr::Label(body_label)); - self.lower_stmt_with_control( + lowerer.instrs.push(TacInstr::Label(body_label)); + lowerer.lower_stmt_with_control( body, ControlLabels { break_label: Some(end_label), @@ -496,16 +541,16 @@ impl Lowerer { )?; if let Some(inc_label) = inc_label { - self.instrs.push(TacInstr::Label(inc_label)); + lowerer.instrs.push(TacInstr::Label(inc_label)); if let Some(inc) = inc { - self.lower_expr(inc)?; + lowerer.lower_expr(inc)?; } } - self.emit_jump_unless_terminated(cond_label); + lowerer.emit_jump_unless_terminated(cond_label); - self.instrs.push(TacInstr::Label(end_label)); + lowerer.instrs.push(TacInstr::Label(end_label)); Ok(()) - } + }), Stmt::DoWhile(cond, body, _) => { let body_label = self.labels.fresh(); let cond_label = self.labels.fresh(); @@ -671,7 +716,7 @@ impl Lowerer { // ja funciona corretamente de gracas; para maiores, // copiaria so os 8 primeiros bytes silenciosamente — // recusa explicitamente em vez disso. - if let Some(ty) = self.var_types.get(name) { + if let Some(ty) = self.type_of_var(name) { if let Type::Struct(struct_name) = resolve_alias(ty, &self.typedefs) { let size = self .struct_layouts @@ -686,7 +731,7 @@ impl Lowerer { } } } - Ok(Operand::Var(name.clone())) + Ok(self.operand_for_var(name)) } // `*p` como destino (`*p = x;`, `*p += 1;`, `(*p)++` etc.): o // ponteiro em si e um rvalue comum, mas o destino da escrita e o @@ -716,7 +761,7 @@ impl Lowerer { fn emit_copy(&mut self, dst: Operand, src: Operand) -> LowerResult<()> { match dst { - Operand::Temp(_) | Operand::Var(_) | Operand::Deref(_) => { + Operand::Temp(_) | Operand::Var(_) | Operand::Global(_) | Operand::Deref(_) => { self.instrs.push(TacInstr::Copy { dst, src }); Ok(()) } @@ -744,17 +789,22 @@ impl Default for Lowerer { } pub fn lower_function(decl: &Decl) -> LowerResult { - lower_function_with_context(decl, &HashMap::new(), &HashMap::new()) + lower_function_with_context(decl, &HashMap::new(), &HashMap::new(), &HashMap::new()) } fn lower_function_with_context( decl: &Decl, struct_layouts: &HashMap, typedefs: &HashMap, + global_types: &HashMap, ) -> LowerResult { match decl { Decl::Function(_, name, params, body, _) => { - let mut lowerer = Lowerer::with_context(struct_layouts.clone(), typedefs.clone()); + let mut lowerer = Lowerer::with_context( + struct_layouts.clone(), + typedefs.clone(), + global_types.clone(), + ); for (qty, param_name) in params { lowerer.declare_var_type(param_name, &qty.ty); } @@ -780,6 +830,8 @@ fn lower_function_with_context( pub fn lower_program(prog: &Program) -> LowerResult { let typedefs = build_typedefs(prog); let struct_layouts = build_struct_layouts(prog, &typedefs); + let global_types = build_global_types(prog); + let globals = lower_globals(prog, &typedefs, &struct_layouts)?; let mut functions = Vec::new(); for decl in &prog.decls { @@ -788,11 +840,149 @@ pub fn lower_program(prog: &Program) -> LowerResult { decl, &struct_layouts, &typedefs, + &global_types, )?); } } - Ok(TacProgram { functions }) + Ok(TacProgram { globals, functions }) +} + +fn build_global_types(prog: &Program) -> HashMap { + prog.decls + .iter() + .filter_map(|decl| match decl { + Decl::GlobalVar(qty, name, _, _) => Some((name.clone(), qty.ty.clone())), + _ => None, + }) + .collect() +} + +fn lower_globals( + prog: &Program, + typedefs: &HashMap, + struct_layouts: &HashMap, +) -> LowerResult> { + let mut globals = Vec::new(); + for decl in &prog.decls { + let Decl::GlobalVar(qty, name, init, _) = decl else { + continue; + }; + let ty = resolve_alias(&qty.ty, typedefs); + let size = global_storage_size(&ty, struct_layouts)?; + let init = init.as_ref().map(lower_static_initializer).transpose()?; + globals.push(TacGlobal { + name: name.clone(), + size, + init, + }); + } + Ok(globals) +} + +fn global_storage_size( + ty: &Type, + struct_layouts: &HashMap, +) -> LowerResult { + let raw = match ty { + Type::Struct(name) => { + let layout = struct_layouts.get(name).ok_or_else(|| { + codegen_error( + "layout de struct global desconhecido no lowering", + Some("global"), + ) + })?; + // A selecao de instrucoes atual acessa campos com movq. Reserva + // tambem os bytes alcancados pelo ultimo campo para evitar que + // esse acesso invada o simbolo global seguinte. + layout + .fields + .iter() + .fold(layout.size, |size, (_, offset, _)| size.max(offset + 8)) + } + Type::Char + | Type::Short + | Type::Int + | Type::Long + | Type::Float + | Type::Double + | Type::Pointer(_) + | Type::Enum(_) => 8, + Type::Array(_) | Type::Void | Type::Alias(_) | Type::Function(_, _) => { + return Err(codegen_error( + "tipo de variavel global sem tamanho suportado no lowering", + Some("global"), + )); + } + }; + Ok(align_up(raw.max(8), 8)) +} + +fn lower_static_initializer(expr: &Expr) -> LowerResult { + match expr { + Expr::Literal(value, _) => Ok(lower_literal(value)), + Expr::Cast(_, inner, _) => lower_static_initializer(inner), + _ => eval_const_int(expr).map(ConstValue::Int), + } +} + +fn eval_const_int(expr: &Expr) -> LowerResult { + match expr { + Expr::Literal(Literal::Int(value), _) => Ok(*value), + Expr::Literal(Literal::Char(value), _) => Ok(*value as i64), + Expr::Cast(_, inner, _) => eval_const_int(inner), + Expr::Unary(op, inner, _) => { + let value = eval_const_int(inner)?; + match op { + UnOp::Neg => Ok(value.wrapping_neg()), + UnOp::Not => Ok((value == 0) as i64), + UnOp::BitNot => Ok(!value), + UnOp::Deref | UnOp::AddrOf => Err(codegen_error( + "inicializador global nao e uma constante inteira", + Some("global-init"), + )), + } + } + Expr::Binary(lhs, op, rhs, _) => { + let lhs = eval_const_int(lhs)?; + let rhs = eval_const_int(rhs)?; + match op { + BinOp::Add => Ok(lhs.wrapping_add(rhs)), + BinOp::Sub => Ok(lhs.wrapping_sub(rhs)), + BinOp::Mul => Ok(lhs.wrapping_mul(rhs)), + BinOp::Div if rhs != 0 => Ok(lhs.wrapping_div(rhs)), + BinOp::Mod if rhs != 0 => Ok(lhs.wrapping_rem(rhs)), + BinOp::Eq => Ok((lhs == rhs) as i64), + BinOp::Neq => Ok((lhs != rhs) as i64), + BinOp::Less => Ok((lhs < rhs) as i64), + BinOp::Greater => Ok((lhs > rhs) as i64), + BinOp::Leq => Ok((lhs <= rhs) as i64), + BinOp::Geq => Ok((lhs >= rhs) as i64), + BinOp::And => Ok((lhs != 0 && rhs != 0) as i64), + BinOp::Or => Ok((lhs != 0 || rhs != 0) as i64), + BinOp::BitAnd => Ok(lhs & rhs), + BinOp::BitOr => Ok(lhs | rhs), + BinOp::BitXor => Ok(lhs ^ rhs), + BinOp::Shl => Ok(lhs.wrapping_shl(rhs as u32)), + BinOp::Shr => Ok(lhs.wrapping_shr(rhs as u32)), + BinOp::Div | BinOp::Mod => Err(codegen_error( + "divisao por zero em inicializador global", + Some("global-init"), + )), + } + } + Expr::Ternary(cond, then_expr, else_expr, _) => { + if eval_const_int(cond)? != 0 { + eval_const_int(then_expr) + } else { + eval_const_int(else_expr) + } + } + _ => Err(codegen_error( + "inicializador global deve ser uma expressao constante", + Some("global-init"), + )), + } } /// Segue a cadeia de `Type::Alias` ate um tipo concreto, usando a tabela de From 4af48b6335703c7cd6b7b9a67ae354dc742a71a3 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Wed, 24 Jun 2026 17:42:49 -0300 Subject: [PATCH 76/91] fix(codegen): keep globals out of stack frames --- src/codegen/last/frame.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codegen/last/frame.rs b/src/codegen/last/frame.rs index 05bd762..6bbb48a 100644 --- a/src/codegen/last/frame.rs +++ b/src/codegen/last/frame.rs @@ -41,7 +41,7 @@ impl SlotKey { match op { Operand::Temp(temp) => Some(Self::Temp(temp.0)), Operand::Var(name) => Some(Self::Var(name.clone())), - Operand::Const(_) => None, + Operand::Global(_) | Operand::Const(_) => None, Operand::Deref(inner) => Self::from_operand(inner), } } From 76a95415b9124ba0a0afd7953090382be8e6d028 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Wed, 24 Jun 2026 17:42:55 -0300 Subject: [PATCH 77/91] feat(codegen): emit global storage and RIP-relative access --- src/codegen/last/x86_64.rs | 80 +++++++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 5 deletions(-) diff --git a/src/codegen/last/x86_64.rs b/src/codegen/last/x86_64.rs index 8cb90cd..7396260 100644 --- a/src/codegen/last/x86_64.rs +++ b/src/codegen/last/x86_64.rs @@ -45,6 +45,11 @@ struct StringPool { impl StringPool { fn collect(prog: &TacProgram) -> Self { let mut pool = Self::default(); + for global in &prog.globals { + if let Some(value) = &global.init { + pool.visit_operand(&Operand::Const(value.clone())); + } + } for func in &prog.functions { for instr in &func.instrs { pool.visit_instr(instr); @@ -80,8 +85,12 @@ impl StringPool { } fn visit_operand(&mut self, op: &Operand) { - if let Operand::Const(ConstValue::String(value)) = op { - self.label_for(value); + match op { + Operand::Const(ConstValue::String(value)) => { + self.label_for(value); + } + Operand::Deref(inner) => self.visit_operand(inner), + _ => {} } } @@ -147,18 +156,63 @@ pub fn emit_program(prog: &TacProgram) -> EmitResult { } em.blank(); } + emit_globals(&mut em, prog, &strings)?; em.raw(".text"); for func in &prog.functions { em.blank(); em.append_str(&emit_function(func, &strings)?); } - // Marca a stack como nao-executavel (boa pratica; evita aviso do linker e - // e o que o proprio GCC adiciona a saida assembly). + // Marca a stack como nao-executavel em formatos ELF. Essa secao nao + // existe no COFF usado pelo MinGW e tornaria o assembly invalido la. em.blank(); - em.raw(".section .note.GNU-stack,\"\",@progbits"); + #[cfg(not(target_os = "windows"))] + { + em.raw(".section .note.GNU-stack,\"\",@progbits"); + } Ok(em.into_string()) } +fn emit_globals(em: &mut Emitter, prog: &TacProgram, strings: &StringPool) -> EmitResult<()> { + let initialized: Vec<_> = prog.globals.iter().filter(|g| g.init.is_some()).collect(); + if !initialized.is_empty() { + em.raw(".data"); + for global in initialized { + em.raw(".balign 8"); + em.raw(&format!(".globl {}", global.name)); + em.raw(&format!("{}:", global.name)); + match global.init.as_ref().expect("filtrado acima") { + ConstValue::Int(value) => em.raw(&format!(" .quad {value}")), + ConstValue::Char(value) => em.raw(&format!(" .quad {}", *value as i64)), + ConstValue::Double(value) => em.raw(&format!(" .quad {}", value.to_bits())), + ConstValue::String(value) => { + let label = strings + .labels + .get(value) + .expect("string global deve ter sido coletada"); + em.raw(&format!(" .quad {label}")); + } + } + if global.size > 8 { + em.raw(&format!(" .zero {}", global.size - 8)); + } + } + em.blank(); + } + + let zeroed: Vec<_> = prog.globals.iter().filter(|g| g.init.is_none()).collect(); + if !zeroed.is_empty() { + em.raw(".bss"); + for global in zeroed { + em.raw(".balign 8"); + em.raw(&format!(".globl {}", global.name)); + em.raw(&format!("{}:", global.name)); + em.raw(&format!(" .zero {}", global.size)); + } + em.blank(); + } + Ok(()) +} + /// Emite o assembly de uma unica funcao: directiva `.globl`, rotulo, /// prologue, corpo e epilogue. fn emit_function(func: &TacFunction, strings: &StringPool) -> EmitResult { @@ -448,6 +502,11 @@ fn emit_unop( // passa por `load_op` (que faria `movq slot(%rbp), %reg`, carregando o // conteudo em vez do endereco). if matches!(op, UnOp::AddrOf) { + if let Operand::Global(name) = src { + em.insn(&format!("leaq {name}(%rip), %rax")); + store_op(em, frame, &Operand::Temp(dst), "rax", strings)?; + return Ok(()); + } let key = SlotKey::from_operand(src).ok_or_else(|| { codegen_error( "endereco-de (&) requer uma variavel ou temporario com slot", @@ -558,6 +617,10 @@ fn load_op( em.insn(&format!("movq {offset}(%rbp), %{reg}")); Ok(()) } + Operand::Global(name) => { + em.insn(&format!("movq {name}(%rip), %{reg}")); + Ok(()) + } Operand::Deref(inner) => { // `%r11` e scratch/caller-saved e nao e usado como `reg` por // nenhum chamador de `load_op`/`store_op` neste backend, entao e @@ -583,6 +646,11 @@ fn store_op( return Ok(()); } + if let Operand::Global(name) = op { + em.insn(&format!("movq %{reg}, {name}(%rip)")); + return Ok(()); + } + let offset = match op { Operand::Temp(temp) => frame .offset_of(&SlotKey::Temp(temp.0)) @@ -590,6 +658,7 @@ fn store_op( Operand::Var(name) => frame .offset_of(&SlotKey::Var(name.clone())) .expect("var sem slot alocado"), + Operand::Global(_) => unreachable!("tratado antes do match acima"), Operand::Const(_) => { return Err(codegen_error( "nao e possivel armazenar em uma constante", @@ -918,6 +987,7 @@ mod tests { #[test] fn emit_program_prepends_text_section() { let prog = TacProgram { + globals: Vec::new(), functions: vec![asm_simple_return_const()], }; From 4f188b0ce95baf9004f0b376d231d038c362ecfd Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Wed, 24 Jun 2026 17:43:01 -0300 Subject: [PATCH 78/91] fix(optimizer): preserve global variable assignments --- src/codegen/inter/optimizations.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/codegen/inter/optimizations.rs b/src/codegen/inter/optimizations.rs index b594733..40fdde8 100644 --- a/src/codegen/inter/optimizations.rs +++ b/src/codegen/inter/optimizations.rs @@ -496,6 +496,10 @@ fn has_side_effects(instr: &TacInstr) -> bool { dst: Operand::Var(_), .. } + | TacInstr::Copy { + dst: Operand::Global(_), + .. + } ) } From f4a75aebe22f3440e4422674dbd1e3ad598d111e Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Wed, 24 Jun 2026 17:43:07 -0300 Subject: [PATCH 79/91] test(codegen): initialize globals in TAC smoke fixtures --- tests/codegen_smoke.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/codegen_smoke.rs b/tests/codegen_smoke.rs index fc801b9..1f04026 100644 --- a/tests/codegen_smoke.rs +++ b/tests/codegen_smoke.rs @@ -69,6 +69,7 @@ fn build_soma_program() -> TacProgram { }; TacProgram { + globals: Vec::new(), functions: vec![soma, main], } } @@ -141,6 +142,7 @@ fn smoke_simple_return_const_runs() { require_gcc!(); let prog = TacProgram { + globals: Vec::new(), functions: vec![TacFunction { name: "main".to_string(), params: Vec::new(), @@ -256,6 +258,7 @@ fn smoke_call_with_more_than_six_args_runs() { }; let prog = TacProgram { + globals: Vec::new(), functions: vec![sum9, main], }; @@ -290,6 +293,7 @@ fn smoke_control_flow_if_else_runs() { // main: if (1) return 10; else return 20; -> espera-se 10. let prog = TacProgram { + globals: Vec::new(), functions: vec![TacFunction { name: "main".to_string(), params: Vec::new(), From e5f6d11947ce59b086a9c9a64172e313e570321d Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Wed, 24 Jun 2026 17:43:12 -0300 Subject: [PATCH 80/91] test(codegen): cover global variable execution --- tests/exe_smoke_test.rs | 48 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/exe_smoke_test.rs b/tests/exe_smoke_test.rs index e75c6ad..1ad20ea 100644 --- a/tests/exe_smoke_test.rs +++ b/tests/exe_smoke_test.rs @@ -364,3 +364,51 @@ fn smoke_switch_fallthrough_runs() { #[cfg(unix)] assert_eq!(status.code(), Some(5)); } + +#[test] +fn smoke_zero_initialized_global_read_and_write_runs() { + require_gcc!(); + + let status = compile_and_run( + "global_counter", + "int counter; int main(void) { counter = 41; return counter + 1; }", + ); + + assert_eq!(status.code(), Some(42)); +} + +#[test] +fn smoke_constant_initialized_global_and_local_shadowing_run() { + require_gcc!(); + + let status = compile_and_run( + "global_init_shadow", + "int value = 8 * 5; int main(void) { int observed = 0; { int value = 11; observed = value; } return value + observed - 9; }", + ); + + assert_eq!(status.code(), Some(42)); +} + +#[test] +fn smoke_global_struct_fixture_runs() { + require_gcc!(); + + let status = compile_and_run( + "global_struct_fixture", + include_str!("integration/valid/structs.c"), + ); + + assert_eq!(status.code(), Some(1)); +} + +#[test] +fn smoke_global_typedef_struct_fixture_runs() { + require_gcc!(); + + let status = compile_and_run( + "global_typedef_fixture", + include_str!("integration/valid/typedef.c"), + ); + + assert_eq!(status.code(), Some(10)); +} From bf0df9aec81c0ddc4078c07da01fe0a7b8ace9c1 Mon Sep 17 00:00:00 2001 From: guxvr Date: Wed, 24 Jun 2026 18:19:18 -0300 Subject: [PATCH 81/91] feat(types): preserve fixed array sizes across frontend --- src/analyser/semantic.rs | 15 ++++++++++----- src/common/ast/ast.rs | 2 +- src/common/ast/pretty.rs | 5 ++++- src/parser/rules/declarations/types.rs | 25 ++++++++++++++++++++----- 4 files changed, 35 insertions(+), 12 deletions(-) diff --git a/src/analyser/semantic.rs b/src/analyser/semantic.rs index 1c95af1..7cb8158 100644 --- a/src/analyser/semantic.rs +++ b/src/analyser/semantic.rs @@ -513,7 +513,7 @@ impl SemanticAnalyser { is_unsigned: false, }, crate::common::ast::expr::UnOp::Deref => match inner_ty.ty { - Type::Pointer(base) | Type::Array(base) => QualifierType { + Type::Pointer(base) | Type::Array(base, _) => QualifierType { ty: *base, is_const: inner_ty.is_const, is_unsigned: inner_ty.is_unsigned, @@ -655,7 +655,7 @@ impl SemanticAnalyser { } match arr_ty.ty { - Type::Array(inner) | Type::Pointer(inner) => QualifierType { + Type::Array(inner, _) | Type::Pointer(inner) => QualifierType { ty: *inner, is_const: arr_ty.is_const, is_unsigned: arr_ty.is_unsigned, @@ -793,7 +793,9 @@ impl SemanticAnalyser { fn resolve_type_inner(&self, ty: &Type) -> Type { match ty { - Type::Array(inner) => Type::Array(Box::new(self.resolve_type_inner(inner))), + Type::Array(inner, size) => { + Type::Array(Box::new(self.resolve_type_inner(inner)), *size) + } Type::Pointer(inner) => Type::Pointer(Box::new(self.resolve_type_inner(inner))), Type::Alias(name) => self .sym @@ -1029,7 +1031,7 @@ fn is_numeric(ty: &Type) -> bool { /// Retorna `true` se o tipo é ponteiro ou array (array decai para ponteiro em C). fn is_pointer(ty: &Type) -> bool { - matches!(ty, Type::Pointer(_) | Type::Array(_)) + matches!(ty, Type::Pointer(_) | Type::Array(_, _)) } /// Retorna `true` se o tipo é escalar (numérico, ponteiro ou enum). @@ -1049,7 +1051,10 @@ fn type_name(ty: &Type) -> String { Type::Double => "double".into(), Type::Void => "void".into(), Type::Pointer(inner) => format!("{}*", type_name(inner)), - Type::Array(inner) => format!("{}[]", type_name(inner)), + Type::Array(inner, size) => match size { + Some(size) => format!("{}[{size}]", type_name(inner)), + None => format!("{}[]", type_name(inner)), + }, Type::Struct(n) => format!("struct {}", n), Type::Enum(n) => format!("enum {}", n), Type::Alias(n) => n.clone(), diff --git a/src/common/ast/ast.rs b/src/common/ast/ast.rs index d54e8d7..0baa81d 100644 --- a/src/common/ast/ast.rs +++ b/src/common/ast/ast.rs @@ -10,7 +10,7 @@ pub enum Type { Float, Double, Void, - Array(Box), + Array(Box, Option), Pointer(Box), Struct(String), Enum(String), diff --git a/src/common/ast/pretty.rs b/src/common/ast/pretty.rs index a413374..fe73cd4 100644 --- a/src/common/ast/pretty.rs +++ b/src/common/ast/pretty.rs @@ -357,7 +357,10 @@ fn fmt_type(ty: &Type) -> String { Type::Double => "double".into(), Type::Void => "void".into(), Type::Pointer(inner) => format!("{}*", fmt_type(inner)), - Type::Array(inner) => format!("{}[]", fmt_type(inner)), + Type::Array(inner, size) => match size { + Some(size) => format!("{}[{size}]", fmt_type(inner)), + None => format!("{}[]", fmt_type(inner)), + }, Type::Struct(n) => format!("struct {}", n), Type::Enum(n) => format!("enum {}", n), Type::Alias(n) => n.clone(), diff --git a/src/parser/rules/declarations/types.rs b/src/parser/rules/declarations/types.rs index b63f867..8820b1f 100644 --- a/src/parser/rules/declarations/types.rs +++ b/src/parser/rules/declarations/types.rs @@ -4,20 +4,35 @@ use crate::lexer::tokens::token_kind::TokenKind; use crate::parser::parser::Parser; /// Consome sufixos `[expr?]` após o nome de uma variável e envolve o tipo em `Type::Array`. -/// Suporta múltiplas dimensões: `int arr[3][4]` → `Array(Array(Int))`. -/// O tamanho é consumido mas não armazenado (AST atual não possui campo de tamanho). +/// Suporta múltiplas dimensões: `int arr[3][4]` → `Array(Array(Int, Some(4)), Some(3))`. pub fn parse_array_suffix( parser: &mut Parser, mut qty: QualifierType, ) -> Result { + let mut dimensions = Vec::new(); + while parser.check(&TokenKind::LeftBracket) { parser.advance(); - if !parser.check(&TokenKind::RightBracket) { + + let size = if parser.check(&TokenKind::RightBracket) { + None + } else if let TokenKind::IntLiteral(value) = parser.peek_kind() { + let value = *value; parser.parse_expr(0)?; - } + usize::try_from(value).ok() + } else { + parser.parse_expr(0)?; + None + }; + parser.expect(&TokenKind::RightBracket, "']' ao fim do tamanho do array")?; - qty.ty = Type::Array(Box::new(qty.ty)); + dimensions.push(size); + } + + for size in dimensions.into_iter().rev() { + qty.ty = Type::Array(Box::new(qty.ty), size); } + Ok(qty) } From 2540973fd2857343e5cf1c914f35d7789d2905f3 Mon Sep 17 00:00:00 2001 From: guxvr Date: Wed, 24 Jun 2026 18:19:36 -0300 Subject: [PATCH 82/91] feat(ir): support fixed-size arrays in lowering --- src/ir/lower.rs | 162 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 145 insertions(+), 17 deletions(-) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index 5fe7b70..9179775 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -74,15 +74,23 @@ impl Lowerer { } /// Tamanho (em bytes) de cada variavel cujo valor nao cabe num slot - /// escalar de 8 bytes — hoje, apenas structs locais. Chamado ao final do + /// escalar de 8 bytes — structs locais e arrays fixos. Chamado ao final do /// lowering de uma funcao, para popular `TacFunction::var_sizes`. fn compute_var_sizes(&self) -> HashMap { let mut sizes = HashMap::new(); for (name, ty) in &self.var_types { - if let Type::Struct(struct_name) = resolve_alias(ty, &self.typedefs) { - if let Some(layout) = self.struct_layouts.get(&struct_name) { - sizes.insert(name.clone(), layout.size); + match resolve_alias(ty, &self.typedefs) { + Type::Struct(struct_name) => { + if let Some(layout) = self.struct_layouts.get(&struct_name) { + sizes.insert(name.clone(), layout.size); + } + } + array_ty @ Type::Array(_, _) => { + if let Ok(size) = type_size(&array_ty) { + sizes.insert(name.clone(), size); + } } + _ => {} } } sizes @@ -102,7 +110,7 @@ impl Lowerer { .map(|ty| resolve_alias(ty, &self.typedefs)) .ok_or_else(|| codegen_error("tipo de variavel desconhecido no lowering", Some("type"))), Expr::Unary(UnOp::Deref, inner, _) => match self.infer_type(inner)? { - Type::Pointer(t) | Type::Array(t) => Ok(resolve_alias(&t, &self.typedefs)), + Type::Pointer(t) | Type::Array(t, _) => Ok(resolve_alias(&t, &self.typedefs)), _ => Err(codegen_error( "deref de valor que nao e ponteiro/array", Some("type"), @@ -110,8 +118,9 @@ impl Lowerer { }, Expr::Index(arr, _, _) => match self.infer_type(arr)? { Type::Pointer(t) => Ok(resolve_alias(&t, &self.typedefs)), - Type::Array(_) => Err(codegen_error( - "indexacao de array fixo ainda nao suportada (tamanho do array nao e rastreado pelo lowering); indexacao via ponteiro funciona normalmente", + Type::Array(t, Some(_)) => Ok(resolve_alias(&t, &self.typedefs)), + Type::Array(_, None) => Err(codegen_error( + "indexacao de array com tamanho desconhecido nao suportada no lowering", Some("index"), )), _ => Err(codegen_error( @@ -182,13 +191,31 @@ impl Lowerer { }) } - /// Calcula o endereco (em bytes) de `arr[idx]`, assumindo que `arr` e um - /// ponteiro: `endereco = lower_expr(arr) + idx * sizeof(elemento)`. + /// Calcula o endereco (em bytes) de `arr[idx]`. + /// + /// Para ponteiros, a base e o valor do ponteiro. Para arrays fixos, a + /// base e o endereco do bloco contiguo reservado para a variavel. fn lower_index_address(&mut self, arr: &Expr, idx: &Expr) -> LowerResult { - let elem_ty = self.infer_type(arr)?; + let (elem_ty, base_ptr) = match self.infer_type(arr)? { + Type::Pointer(inner) => (resolve_alias(&inner, &self.typedefs), self.lower_expr(arr)?), + Type::Array(inner, Some(_)) => ( + resolve_alias(&inner, &self.typedefs), + self.lower_address_of(arr)?, + ), + Type::Array(_, None) => { + return Err(codegen_error( + "indexacao de array com tamanho desconhecido nao suportada no lowering", + Some("index"), + )) + } + _ => { + return Err(codegen_error( + "indexacao de valor que nao e ponteiro/array", + Some("index"), + )) + } + }; let elem_size = type_size(&elem_ty)?; - - let base_ptr = self.lower_expr(arr)?; let idx_op = self.lower_expr(idx)?; let offset = if elem_size == 1 { @@ -806,6 +833,28 @@ fn resolve_alias(ty: &Type, typedefs: &HashMap) -> Type { Some(next) => current = next.clone(), None => return Type::Alias(name), }, + Type::Pointer(inner) => { + return Type::Pointer(Box::new(resolve_alias(&inner, typedefs))) + } + Type::Array(inner, size) => { + return Type::Array(Box::new(resolve_alias(&inner, typedefs)), size) + } + Type::Function(ret, params) => { + let ret = QualifierType { + ty: resolve_alias(&ret.ty, typedefs), + is_const: ret.is_const, + is_unsigned: ret.is_unsigned, + }; + let params = params + .iter() + .map(|param| QualifierType { + ty: resolve_alias(¶m.ty, typedefs), + is_const: param.is_const, + is_unsigned: param.is_unsigned, + }) + .collect(); + return Type::Function(Box::new(ret), params); + } other => return other, } } @@ -923,12 +972,18 @@ fn type_size(ty: &Type) -> LowerResult { Type::Short => Ok(2), Type::Int | Type::Float | Type::Enum(_) => Ok(4), Type::Long | Type::Double | Type::Pointer(_) => Ok(8), - Type::Array(_) | Type::Void | Type::Struct(_) | Type::Alias(_) | Type::Function(_, _) => { - Err(codegen_error( - "lowering de sizeof(type) requer layout/tamanho completo", - Some("sizeof"), - )) + Type::Array(inner, Some(len)) => { + let elem_size = type_size(inner)?; + Ok(elem_size * (*len as i64)) } + Type::Array(_, None) => Err(codegen_error( + "lowering de sizeof(array) requer tamanho de array conhecido", + Some("sizeof"), + )), + Type::Void | Type::Struct(_) | Type::Alias(_) | Type::Function(_, _) => Err(codegen_error( + "lowering de sizeof(type) requer layout/tamanho completo", + Some("sizeof"), + )), } } @@ -964,6 +1019,14 @@ mod tests { } } + fn array_ty(inner: Type, size: Option) -> QualifierType { + QualifierType { + ty: Type::Array(Box::new(inner), size), + is_const: false, + is_unsigned: false, + } + } + fn int(value: i64) -> Expr { Expr::Literal(Literal::Int(value), span()) } @@ -1142,4 +1205,69 @@ mod tests { }] ); } + + #[test] + fn lower_fixed_array_local_populates_var_sizes() { + let decl = Decl::Function( + int_ty(), + "main".to_string(), + vec![], + vec![ + Stmt::VarDecl( + array_ty(Type::Int, Some(3)), + "arr".to_string(), + None, + span(), + ), + Stmt::Return(Some(int(0)), span()), + ], + span(), + ); + + let func = lower_function(&decl).unwrap(); + + assert_eq!(func.var_sizes.get("arr"), Some(&12)); + } + + #[test] + fn lower_fixed_array_index_uses_address_of_base() { + let mut lowerer = Lowerer::new(); + lowerer.declare_var_type("arr", &Type::Array(Box::new(Type::Int), Some(3))); + let expr = Expr::Assign( + Box::new(Expr::Index( + Box::new(ident("arr")), + Box::new(int(1)), + span(), + )), + Box::new(int(7)), + span(), + ); + + lowerer.lower_expr(&expr).unwrap(); + let instrs = lowerer.finish(); + + assert!(instrs.iter().any(|instr| matches!( + instr, + TacInstr::UnOp { + op: UnOp::AddrOf, + src: Operand::Var(name), + .. + } if name == "arr" + ))); + assert!(instrs.iter().any(|instr| matches!( + instr, + TacInstr::BinOp { + op: BinOp::Mul, + rhs: Operand::Const(ConstValue::Int(4)), + .. + } + ))); + assert!(instrs.iter().any(|instr| matches!( + instr, + TacInstr::Copy { + dst: Operand::Deref(_), + .. + } + ))); + } } From e1b7577239575d6c370c3c8d7f3766f30c968235 Mon Sep 17 00:00:00 2001 From: Hugo Freitas Silva Date: Wed, 24 Jun 2026 18:21:15 -0300 Subject: [PATCH 83/91] fix(tests): restore smoke coverage after merge --- src/codegen/last/x86_64.rs | 5 +- src/ir/cfg.rs | 6 +- tests/codegen_smoke.rs | 2 + tests/exe_smoke_test.rs | 224 +++++++++++++++++++++++++++++++++++++ 4 files changed, 231 insertions(+), 6 deletions(-) diff --git a/src/codegen/last/x86_64.rs b/src/codegen/last/x86_64.rs index 8cb90cd..497791c 100644 --- a/src/codegen/last/x86_64.rs +++ b/src/codegen/last/x86_64.rs @@ -152,9 +152,10 @@ pub fn emit_program(prog: &TacProgram) -> EmitResult { em.blank(); em.append_str(&emit_function(func, &strings)?); } - // Marca a stack como nao-executavel (boa pratica; evita aviso do linker e - // e o que o proprio GCC adiciona a saida assembly). + // Marca a stack como nao-executavel em formatos ELF. Essa secao nao + // existe no COFF usado pelo MinGW e tornaria o assembly invalido la. em.blank(); + #[cfg(not(target_os = "windows"))] em.raw(".section .note.GNU-stack,\"\",@progbits"); Ok(em.into_string()) } diff --git a/src/ir/cfg.rs b/src/ir/cfg.rs index 6eff37d..34a236e 100644 --- a/src/ir/cfg.rs +++ b/src/ir/cfg.rs @@ -68,10 +68,8 @@ pub fn identify_leaders(instrs: &[TacInstr]) -> HashSet { for (index, instr) in instrs.iter().enumerate() { match instr { - TacInstr::Jump { .. } | TacInstr::CondJump { .. } => { - if index + 1 < instrs.len() { - leaders.insert(index + 1); - } + TacInstr::Jump { .. } | TacInstr::CondJump { .. } if index + 1 < instrs.len() => { + leaders.insert(index + 1); } TacInstr::Label(_) => { leaders.insert(index); diff --git a/tests/codegen_smoke.rs b/tests/codegen_smoke.rs index fc801b9..a545663 100644 --- a/tests/codegen_smoke.rs +++ b/tests/codegen_smoke.rs @@ -6,6 +6,8 @@ //! execucao. Se o `gcc` nao estiver disponivel no ambiente, os testes sao //! ignorados (skip) em vez de falhar. +#![cfg_attr(not(unix), allow(unused_variables))] + use std::path::PathBuf; use std::process::Command; diff --git a/tests/exe_smoke_test.rs b/tests/exe_smoke_test.rs index e1f959b..2a190d2 100644 --- a/tests/exe_smoke_test.rs +++ b/tests/exe_smoke_test.rs @@ -8,6 +8,8 @@ //! completo. Se `gcc` nao estiver disponivel no ambiente, os testes sao //! ignorados (skip) em vez de falhar. +#![cfg_attr(not(unix), allow(unused_variables))] + use std::path::PathBuf; use std::process::{Command, ExitStatus}; @@ -248,3 +250,225 @@ fn smoke_recursive_fibonacci_runs() { #[cfg(unix)] assert_eq!(status.code(), Some(55)); } + +#[test] +fn smoke_switch_with_default_runs() { + require_gcc!(); + + let status = compile_and_run( + "switch_default", + "int classify(int n) { \ + int result = 0; \ + switch (n) { \ + case 1: result = 1; break; \ + case 2: result = 2; break; \ + default: result = -1; break; \ + } \ + return result; \ + } \ + int main() { return classify(1) + classify(2) + classify(9) + 100; }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(102)); +} + +#[test] +fn smoke_address_of_and_deref_read_runs() { + require_gcc!(); + + let status = compile_and_run( + "addrof_deref_read", + "int main() { \ + int x = 21; \ + int *p = &x; \ + int y = *p; \ + return y * 2; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(42)); +} + +#[test] +fn smoke_deref_assignment_writes_through_pointer() { + require_gcc!(); + + let status = compile_and_run( + "deref_assign", + "int main() { \ + int x = 10; \ + int *p = &x; \ + *p = 20; \ + return x; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(20)); +} + +#[test] +fn smoke_deref_compound_assign_through_function_param_runs() { + require_gcc!(); + + let status = compile_and_run( + "deref_compound", + "void inc(int *p) { *p = *p + 1; } \ + int main() { \ + int x = 10; \ + inc(&x); \ + inc(&x); \ + return x; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(12)); +} + +#[test] +fn smoke_deref_increment_operators_run() { + require_gcc!(); + + let status = compile_and_run( + "deref_incr", + "int main() { \ + int x = 5; \ + int *p = &x; \ + (*p)++; \ + *p += 10; \ + return x; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(16)); +} + +#[test] +fn smoke_pointer_index_read_and_write_runs() { + require_gcc!(); + + let status = compile_and_run( + "pointer_index", + "int sum_via_index(int *p) { \ + p[0] = 10; \ + return p[0] + 5; \ + } \ + int main() { \ + int x = 1; \ + return sum_via_index(&x); \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(15)); +} + +#[test] +fn smoke_struct_member_read_and_write_runs() { + require_gcc!(); + + let status = compile_and_run( + "struct_member", + "struct Point { int x; int y; }; \ + int main() { \ + struct Point p; \ + p.x = 3; \ + p.y = 4; \ + return p.x + p.y; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(7)); +} + +#[test] +fn smoke_struct_member_via_pointer_arrow_runs() { + require_gcc!(); + + let status = compile_and_run( + "struct_member_arrow", + "struct Point { int x; int y; }; \ + void move_point(struct Point *p, int dx, int dy) { \ + p->x = p->x + dx; \ + p->y = p->y + dy; \ + } \ + int main() { \ + struct Point p; \ + p.x = 1; \ + p.y = 2; \ + move_point(&p, 10, 20); \ + return p.x + p.y; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(33)); +} + +#[test] +fn smoke_struct_larger_than_eight_bytes_runs() { + require_gcc!(); + + let status = compile_and_run( + "struct_big", + "struct Big { long a; long b; long c; }; \ + int main() { \ + struct Big big; \ + big.a = 1; \ + big.b = 2; \ + big.c = 3; \ + return big.a + big.b + big.c; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(6)); +} + +#[test] +fn smoke_sizeof_of_variables_runs() { + require_gcc!(); + + let status = compile_and_run( + "sizeof_vars", + "int main() { \ + int x = 7; \ + long l = 3; \ + char c = 'a'; \ + int *p = &x; \ + return sizeof(x) + sizeof(l) + sizeof(c) + sizeof(p); \ + }", + ); + + // sizeof(int) + sizeof(long) + sizeof(char) + sizeof(int*) = 4+8+1+8. + #[cfg(unix)] + assert_eq!(status.code(), Some(21)); +} + +#[test] +fn smoke_switch_fallthrough_runs() { + require_gcc!(); + + let status = compile_and_run( + "switch_fallthrough", + "int main() { \ + int n = 2; \ + int total = 0; \ + switch (n) { \ + case 1: total = total + 1; \ + case 2: total = total + 2; \ + case 3: total = total + 3; break; \ + case 4: total = total + 100; \ + } \ + return total; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(5)); +} From dfcb526267baebcac45992926d35610a034e8744 Mon Sep 17 00:00:00 2001 From: guxvr Date: Wed, 24 Jun 2026 18:21:49 -0300 Subject: [PATCH 84/91] test: cover fixed-size array parsing and codegen --- src/tests/parser_test.rs | 10 +++++----- src/tests/semantic_test.rs | 2 +- tests/exe_smoke_test.rs | 19 +++++++++++++++++++ 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/tests/parser_test.rs b/src/tests/parser_test.rs index 3e4d824..b4285ef 100644 --- a/src/tests/parser_test.rs +++ b/src/tests/parser_test.rs @@ -1542,7 +1542,7 @@ mod tests { panic!("esperava GlobalVar"); }; assert_eq!(name, "arr"); - assert!(matches!(qty.ty, Type::Array(_))); + assert!(matches!(qty.ty, Type::Array(_, Some(10)))); } #[test] @@ -1572,7 +1572,7 @@ mod tests { panic!("esperava VarDecl"); }; assert_eq!(name, "arr"); - assert!(matches!(qty.ty, Type::Array(_))); + assert!(matches!(qty.ty, Type::Array(_, Some(5)))); } #[test] @@ -1595,10 +1595,10 @@ mod tests { let Decl::GlobalVar(qty, _, None, _) = &prog.decls[0] else { panic!("esperava GlobalVar"); }; - let Type::Array(inner) = &qty.ty else { + let Type::Array(inner, Some(3)) = &qty.ty else { panic!("esperava Array externo"); }; - assert!(matches!(**inner, Type::Array(_))); + assert!(matches!(**inner, Type::Array(_, Some(4)))); } #[test] @@ -1619,7 +1619,7 @@ mod tests { let Decl::GlobalVar(qty, _, _, _) = &prog.decls[0] else { panic!("esperava GlobalVar"); }; - assert!(matches!(qty.ty, Type::Array(_))); + assert!(matches!(qty.ty, Type::Array(_, None))); } // ── struct ──────────────────────────────────────────────────────────────── diff --git a/src/tests/semantic_test.rs b/src/tests/semantic_test.rs index 08b7412..40c71f8 100644 --- a/src/tests/semantic_test.rs +++ b/src/tests/semantic_test.rs @@ -767,7 +767,7 @@ mod tests { .sym .declare(crate::analyser::symbol_table::Symbol { name: name.into(), - ty: qty(Type::Array(Box::new(inner))), + ty: qty(Type::Array(Box::new(inner), Some(4))), mutable: true, params: None, decl_span: span(), diff --git a/tests/exe_smoke_test.rs b/tests/exe_smoke_test.rs index e75c6ad..296b47a 100644 --- a/tests/exe_smoke_test.rs +++ b/tests/exe_smoke_test.rs @@ -259,6 +259,25 @@ fn smoke_pointer_index_read_and_write_runs() { assert_eq!(status.code(), Some(15)); } +#[test] +fn smoke_fixed_array_index_read_and_write_runs() { + require_gcc!(); + + let status = compile_and_run( + "fixed_array_index", + "int main() { \ + int arr[3]; \ + arr[0] = 1; \ + arr[1] = 2; \ + arr[2] = 3; \ + return arr[0] + arr[1] + arr[2]; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(6)); +} + #[test] fn smoke_struct_member_read_and_write_runs() { require_gcc!(); From b22aced2a80ee65cc921cfb22ed5e615dea91cdc Mon Sep 17 00:00:00 2001 From: guxvr Date: Wed, 24 Jun 2026 21:17:34 -0300 Subject: [PATCH 85/91] =?UTF-8?q?corrige=20erro=20de=20formata=C3=A7=C3=A3?= =?UTF-8?q?o=20em=20lower.rs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/ir/lower.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ir/lower.rs b/src/ir/lower.rs index 7f5751f..1abc00a 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -935,7 +935,7 @@ fn global_storage_size( | Type::Double | Type::Pointer(_) | Type::Enum(_) => 8, - Type::Array(_) | Type::Void | Type::Alias(_) | Type::Function(_, _) => { + Type::Array(_, _) | Type::Void | Type::Alias(_) | Type::Function(_, _) => { return Err(codegen_error( "tipo de variavel global sem tamanho suportado no lowering", Some("global"), From ff2b0d964b385a1bcf49e4615f668ce91626428f Mon Sep 17 00:00:00 2001 From: Bappoz Date: Wed, 24 Jun 2026 22:23:08 -0300 Subject: [PATCH 86/91] docs: alinha README com estado real do compilador, adiciona INSTALL e TESTER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - README: atualiza tabela de estágios (codegen completo p/ tipos inteiros, ponteiros, structs, arrays, globais), documenta limitação de float/double (issue #172) e demais limitações conhecidas - INSTALL.md: passo a passo de setup (rust, gcc, build, verificação) - TESTER.md: catálogo de todas as suítes de teste (unitárias e e2e com arquivos .c reais via gcc), comandos de filtro, e status dos exemplos em src/examples/ Refs #162 --- INSTALL.md | 105 +++++++++++++++++++++++++++++++++++++++ README.md | 119 ++++++++++++++++++++++---------------------- TESTER.md | 143 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 307 insertions(+), 60 deletions(-) create mode 100644 INSTALL.md create mode 100644 TESTER.md diff --git a/INSTALL.md b/INSTALL.md new file mode 100644 index 0000000..14f6d8d --- /dev/null +++ b/INSTALL.md @@ -0,0 +1,105 @@ +# INSTALL — preparando o ambiente + +Guia para deixar o ambiente pronto para compilar e rodar o Crusty. + +## Pré-requisitos + +| Ferramenta | Versão mínima | Para quê | +|---|---|---| +| [Rust](https://rustup.rs/) (rustc + cargo) | 1.70+ | Compilar o próprio Crusty | +| `gcc` | qualquer versão recente | Montar (`as`) e linkar (`ld`) os executáveis ELF gerados pelo backend x86-64 | +| Linux x86-64 | — | O backend gera assembly x86-64 / System V ABI. Não há suporte a outras arquiteturas ou a Windows/macOS nativo | + +Sem `gcc` no `PATH`, o compilador ainda funciona até a emissão de assembly (`--emit=asm`), mas os testes de smoke e2e (`tests/exe_smoke_test.rs`, `tests/codegen_smoke.rs`) são automaticamente pulados (skip), e `--emit=obj`/`--emit=exe` falham. + +## 1. Instalar o Rust + +```bash +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh +source "$HOME/.cargo/env" +rustup update stable +``` + +Verifique: + +```bash +rustc --version # esperado: 1.70 ou mais recente +cargo --version +``` + +## 2. Instalar o gcc (toolchain de montagem/link) + +**Debian/Ubuntu** +```bash +sudo apt update && sudo apt install -y gcc +``` + +**Arch/Manjaro** +```bash +sudo pacman -S gcc +``` + +**Fedora** +```bash +sudo dnf install gcc +``` + +Verifique: + +```bash +gcc --version +``` + +## 3. Obter o código + +```bash +git clone https://github.com/Bappoz/Crusty.git +cd Crusty +``` + +(Se você já está dentro do repositório, pule esta etapa.) + +## 4. Compilar o projeto + +```bash +cargo build --release +``` + +O binário fica em `target/release/crusty`. Para um build de desenvolvimento (mais rápido de compilar, binário mais lento): + +```bash +cargo build +# binário em target/debug/crusty +``` + +## 5. Verificar a instalação + +Rode o compilador sobre um exemplo incluso no repositório e execute o binário gerado: + +```bash +cargo run --release -- src/examples/hello_world.c -o /tmp/hello +/tmp/hello +``` + +Saída esperada: + +``` +Hello, World! +``` + +Se isso funcionou, o ambiente está pronto. Para confirmar que toda a suíte de testes passa no seu ambiente: + +```bash +cargo test --all +cargo clippy -- -D warnings +cargo fmt --check +``` + +Essas três checagens são exatamente as que o CI (`.github/workflows/`) roda em todo push/PR para `developer` e `master`. + +## Problemas comuns + +- **`error: linker 'cc' not found` ou falha ao montar/linkar** — `gcc` não está instalado ou não está no `PATH`. Repita o passo 2. +- **`cargo: command not found`** depois de instalar o Rust — rode `source "$HOME/.cargo/env"` ou abra um novo terminal. +- **Testes de smoke "pulando" silenciosamente** — esperado se `gcc` não estiver disponível; veja [TESTER.md](TESTER.md) para detalhes. +- **Programa de teste usa `float`/`double`** e falha com `error: code generation` — limitação conhecida atual do backend, ver [README.md](README.md#limitações-conhecidas) e [issue #172](https://github.com/Bappoz/Crusty/issues/172). diff --git a/README.md b/README.md index 86b5799..4eaded0 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Crusty — Compilador C em Rust -Projeto da disciplina de Compiladores 1. Implementa um compilador para um subconjunto da linguagem C, escrito em Rust. +Projeto da disciplina de Compiladores 1. Implementa um compilador para um subconjunto da linguagem C, escrito em Rust, com backend nativo x86-64 (System V ABI, Linux). ## Estágio atual @@ -8,8 +8,18 @@ Projeto da disciplina de Compiladores 1. Implementa um compilador para um subcon |------|--------| | Análise léxica | Completo | | Análise sintática | Completo | -| Análise semântica | Em desenvolvimento | -| Geração de código | Não iniciado | +| Análise semântica | Completo | +| IR (TAC) | Completo | +| Otimizações (CSE, DCE, constant folding, copy propagation, LICM, inlining) | Completo | +| Geração de código x86-64 | Completo para tipos inteiros, ponteiros, structs, arrays e globais | + +### Limitações conhecidas + +- **`float`/`double` não têm codegen.** O analisador semântico aceita e tipa esses tipos, mas o backend x86-64 ainda não emite instruções de ponto flutuante (registradores XMM). Em desenvolvimento na [issue #172](https://github.com/Bappoz/Crusty/issues/172). Programas que usam `float`/`double` falham com `error: code generation` no estágio final. +- O modo REPL interativo (executar `crusty` sem argumentos) não está implementado. +- `--dump-ir` ainda não imprime a IR (placeholder). + +Fora isso, o pipeline completo (lexer → parser → análise semântica → IR → otimizações → assembly x86-64 → executável ELF via `gcc`) funciona ponta a ponta para um subconjunto relevante de C: tipos inteiros e `char`, ponteiros, structs, arrays de tamanho fixo, enums, typedefs, variáveis globais, todas as estruturas de controle (`if`/`while`/`do-while`/`for`/`switch`) e chamadas de função. ## Estrutura do projeto @@ -18,43 +28,54 @@ src/ ├── lexer/ Análise léxica — transforma código-fonte em tokens ├── parser/ Análise sintática — constrói a AST via Pratt parsing ├── analyser/ Análise semântica — tabela de símbolos, escopos, verificação de tipos -├── codegen/ Geração de código — esqueleto (não implementado) +├── ir/ Geração e lowering da IR intermediária (TAC) +├── codegen/ Geração de código — otimizações sobre TAC (inter/) e backend x86-64 (last/) ├── common/ Estruturas compartilhadas: AST, erros, spans, utilitários +├── examples/ Arquivos .c de exemplo usados em testes e demonstrações └── tests/ Testes unitários por módulo +tests/ Testes de integração e smoke tests (ponta a ponta, com gcc) +docs/ Documentação técnica de cada fase do compilador ``` -## Pré-requisitos +## Começando -- [Rust](https://rustup.rs/) 1.70+ +Instruções completas de instalação e configuração do ambiente (Rust, `gcc`, verificação de toolchain) estão em [INSTALL.md](INSTALL.md). + +Resumo rápido: ```bash rustup update stable +cargo build --release +cargo run --release -- src/examples/hello_world.c +./hello_world ``` -## Build +## Uso ```bash -cargo build +crusty [flags] ``` -## Uso +Principais flags (lista completa em `crusty` sem argumentos): -Rodar o compilador sobre um arquivo de entrada: - -```bash -cargo run -- -``` +| Flag | Efeito | +|---|---| +| `--dump-tokens` | Lista os tokens emitidos pelo lexer | +| `--dump-ast` | Imprime a AST | +| `--only-lex` / `--only-parse` / `--only-semantic` | Para o pipeline no estágio indicado | +| `-S`, `--emit-asm` / `--emit=asm` | Para após emitir o assembly x86-64 (`.s`) | +| `--emit=obj` | Para após montar o objeto (`.o`), sem linkar | +| `--emit=exe` | Monta e linka um executável ELF rodável (padrão) | +| `-o `, `--out-dir `, `--out-name ` | Controlam o caminho/nome de saída | +| `-O0`\|`-O1`\|`-O2`\|`-O3`, `--opt-level ` | Nível de otimização aplicado à IR | -Exemplo: +Exemplo gerando e executando um binário: ```bash -cargo run -- input.c +cargo run --release -- src/examples/simple.c -o /tmp/simple +/tmp/simple; echo "exit: $?" ``` -O compilador imprime os tokens reconhecidos, a AST e eventuais diagnósticos de erro. - -> O modo REPL interativo (sem argumentos) ainda não está implementado. - ## Funcionalidades implementadas **Lexer** @@ -81,60 +102,38 @@ O compilador imprime os tokens reconhecidos, a AST e eventuais diagnósticos de - Promoção numérica implícita (Double > Float > Long > Int) - Detecção de atribuição a `const` -## Testes +**IR e otimizações** +- Lowering de AST para TAC (Three-Address Code), incluindo arrays fixos, structs e globais +- Pipeline de otimização configurável por nível (`-O0`..`-O3`): constant folding, common subexpression elimination, dead code elimination, copy propagation, loop-invariant code motion, inlining -### Todos os testes unitários +**Backend x86-64** +- Convenção de chamada System V ABI (inteiros/ponteiros em `rdi`..`r9`/`rax`) +- Endereço de variáveis, indexação de array, acesso a membro de struct (`.`, `->`), address-of/deref +- `sizeof` em tempo de compilação +- Variáveis globais com acesso RIP-relative +- Peephole optimizer sobre o assembly emitido +- Emissão de `.s`, montagem de `.o` e link de executável via `gcc` -```bash -cargo test -``` +## Testes -### Filtrar por módulo +Cobertura completa (testes unitários e testes com arquivos `.c` reais, executados de ponta a ponta) está documentada em [TESTER.md](TESTER.md). -```bash -cargo test lexical # testes do scanner/lexer (21 casos) -cargo test parser_test # testes do parser / AST (76 casos) -cargo test semantic_test # testes do analisador semântico (21 casos) -cargo test symbol_test # testes da tabela de símbolos (11 casos) -cargo test analyzer_test # testes de integração do analisador (3 casos) -cargo test source # testes de SourceFile e spans (12 casos) -cargo test lexer_file # testes do scanner lendo arquivos (7 casos) -cargo test parser_file # testes do parser lendo arquivos (4 casos) -cargo test literals # testes de literais numéricos (4 casos) -cargo test ast_errors # testes de erros de AST (4 casos) -cargo test token # testes de tokens individuais (2 casos) -``` - -### Com saída detalhada +Resumo rápido: ```bash -cargo test -- --nocapture +cargo test --all # ~354 testes (unitários + integração + smoke e2e) +cargo clippy -- -D warnings +cargo fmt --check ``` -### Módulos de teste - -| Arquivo | Cobertura | Testes | -|---|---|---| -| `src/tests/lexical_test.rs` | Scanner: operadores, palavras-chave, literais | 21 | -| `src/tests/parser_test.rs` | Parser / construção de AST | 76 | -| `src/tests/semantic_test.rs` | Verificação de tipos, undefined vars, const | 21 | -| `src/tests/symbol_test.rs` | Tabela de símbolos, escopos, redeclaração | 11 | -| `src/tests/source_test.rs` | `SourceFile`, `ByteSpan`, posicionamento | 12 | -| `src/tests/lexer_file_test.rs` | Scanner sobre arquivos reais | 7 | -| `src/tests/parser_file_test.rs` | Parser sobre arquivos reais | 4 | -| `src/tests/literals_test.rs` | Literais inteiros, floats, strings | 4 | -| `src/tests/ast_errors.rs` | Diagnósticos e erros de AST | 4 | -| `src/tests/analyzer_test.rs` | Integração léxico → sintático → semântico | 3 | -| `src/tests/token_test.rs` | `Token` e `TokenKind` | 2 | - -**Total: 165 testes** - ## Documentação técnica - [Lexer](docs/lexer.md) — scanner, tokens, erros léxicos - [Parser](docs/parser.md) — Pratt parser, AST, recuperação de erros - [Analisador Semântico](docs/semantic.md) — tabela de símbolos, verificação de tipos - [Precedência de Operadores C](docs/c_operator_precedence.md) — tabela C11 e mapeamento para binding powers +- [INSTALL.md](INSTALL.md) — como preparar o ambiente e compilar o projeto +- [TESTER.md](TESTER.md) — como rodar e interpretar todos os testes ## Contribuidores diff --git a/TESTER.md b/TESTER.md new file mode 100644 index 0000000..ca503e4 --- /dev/null +++ b/TESTER.md @@ -0,0 +1,143 @@ +# TESTER — como testar o Crusty + +Este documento cobre todas as formas de testar o compilador: testes unitários, testes de integração com arquivos `.c` reais, e testes de smoke ponta a ponta que montam/linkam/executam o binário gerado via `gcc`. + +Pré-requisito: ambiente configurado conforme [INSTALL.md](INSTALL.md) (Rust + `gcc`). + +## Visão geral das suítes + +| Suíte | Localização | O que verifica | Testes | +|---|---|---|---| +| Testes unitários da lib | `src/tests/*.rs` | Lexer, parser, analisador semântico, codegen, em isolamento | 295 | +| Testes unitários do binário | `src/main.rs` (`#[cfg(test)]`) | Parsing de flags da CLI | 10 | +| `tests/integration_test.rs` | Pipeline lexer→parser→semântico sobre arquivos `.c` reais (válidos e inválidos) | 16 | +| `tests/codegen_smoke.rs` | TAC montado manualmente → assembly → `gcc` → execução, checando exit code | 5 | +| `tests/exe_smoke_test.rs` | Código-fonte C real → pipeline completo → executável ELF → execução | 26 | +| `tests/licm_test.rs` | Otimização de loop-invariant code motion | 2 | + +Total atual: **354 testes**, todos passando em `developer`. + +## Rodando tudo + +```bash +cargo test --all +``` + +Isso compila e roda todas as suítes acima, na ordem mostrada por `cargo`. + +## Testes unitários (por módulo) + +Os testes unitários vivem em `src/tests/` e cobrem cada fase do compilador isoladamente, sem precisar de arquivos externos nem de `gcc`. + +```bash +cargo test --lib # só a suíte unitária da lib (sem integração/smoke) +cargo test lexical # scanner/lexer: operadores, palavras-chave, literais +cargo test parser_test # parser / construção de AST +cargo test semantic_test # verificação de tipos, undefined vars, const +cargo test symbol_test # tabela de símbolos, escopos, redeclaração +cargo test source # SourceFile, ByteSpan, posicionamento +cargo test lexer_file # scanner sobre arquivos reais +cargo test parser_file # parser sobre arquivos reais +cargo test literals # literais inteiros, floats, strings +cargo test ast_errors # diagnósticos e erros de AST +cargo test analyzer_test # integração léxico → sintático → semântico +cargo test token # Token e TokenKind +cargo test codegen_test # geração de código (unitário) +cargo test peephole_test # otimizador de assembly (peephole) +cargo test unmap_safe_test # segurança de unmap/memmap do SourceFile +``` + +Saída detalhada (não suprime `println!`/`eprintln!` dos testes): + +```bash +cargo test -- --nocapture +``` + +Rodar um único teste pelo nome exato: + +```bash +cargo test test_licm_loop_with_invariant +``` + +## Testes com arquivos `.c` reais + +### `tests/integration_test.rs` — front-end completo, sem executar binário + +Roda lexer → parser → análise semântica sobre arquivos em `tests/integration/valid/` e `tests/integration/invalid/`, verificando que programas válidos não geram diagnósticos e que programas inválidos geram exatamente o erro esperado (variável não declarada, redeclaração, atribuição a `const`, mismatch de tipo, aridade de chamada, etc). + +```bash +cargo test --test integration_test +``` + +Para adicionar um novo caso: crie um `.c` em `tests/integration/valid/` (deve compilar sem erros) ou `tests/integration/invalid/` (deve falhar com o diagnóstico esperado) e adicione o caso correspondente em `tests/integration_test.rs`. + +### `tests/exe_smoke_test.rs` e `tests/codegen_smoke.rs` — ponta a ponta, com execução real + +Estes são os testes mais completos: compilam código C real até assembly x86-64, montam e linkam com `gcc` em um executável ELF, executam o binário e verificam o **exit code** (e, quando aplicável, a saída em stdout). + +```bash +cargo test --test exe_smoke_test +cargo test --test codegen_smoke +``` + +Se `gcc` não estiver disponível no `PATH`, esses testes são pulados (skip) automaticamente — verifique a saída de `cargo test -- --nocapture` por `gcc indisponivel: pulando teste de smoke` para confirmar. + +### Testando manualmente com os arquivos de `src/examples/` + +O diretório `src/examples/` contém programas `.c` de exemplo usados como referência/demonstração. Para testar manualmente o pipeline completo sobre um deles: + +```bash +# 1. compilar e linkar +cargo run --release -- src/examples/simple.c -o /tmp/simple + +# 2. executar e checar o resultado +/tmp/simple; echo "exit code: $?" +``` + +Para inspecionar estágios intermediários: + +```bash +cargo run -- src/examples/simple.c --dump-tokens # tokens do lexer +cargo run -- src/examples/simple.c --dump-ast # AST +cargo run -- src/examples/simple.c --emit=asm -o /tmp/simple.s # assembly x86-64 gerado +``` + +Exemplos disponíveis e seu status atual: + +| Arquivo | Compila e roda? | Observação | +|---|---|---| +| `hello_world.c` | Sim | Imprime `Hello, World!` | +| `simple.c` | Sim | | +| `demo_presentation.c` | Sim | Demo usada na apresentação da disciplina | +| `declarations.c` | Gera assembly, mas não tem `main` | Não é pensado para ser linkado/executado isoladamente | +| `full_code1.c` | Não | Usa `float`, sem codegen ainda (issue #172) | +| `operators.c` | Não — nem com `gcc` | Tem statements em escopo global, o que não é C válido (confirmado com `gcc -fsyntax-only`); não é um bug do compilador | + +### Testando com seus próprios arquivos `.c` + +Qualquer arquivo `.c` válido pode ser usado diretamente: + +```bash +cargo run --release -- caminho/para/arquivo.c -o /tmp/saida +/tmp/saida +``` + +Para depurar um erro de compilação, repita o comando com `--dump-ast` ou `--only-semantic` para isolar em qual fase o problema ocorre. + +## Checagens de qualidade (rodadas no CI) + +O CI (`.github/workflows/`) roda, nesta ordem, em todo push/PR para `developer` e `master`: + +```bash +cargo build --all +cargo test --all +cargo clippy -- -D warnings +cargo fmt --check +``` + +Rode as quatro localmente antes de abrir um PR — é exatamente o que será verificado automaticamente. + +## Cobertura conhecida e limitações dos testes + +- Não há testes automatizados para `float`/`double` em codegen, porque a feature não existe ainda (issue #172). Quando for implementada, a suíte `exe_smoke_test.rs`/`codegen_smoke.rs` é o lugar natural para os novos casos. +- Os smoke tests de execução (`exe_smoke_test.rs`, `codegen_smoke.rs`) dependem de Linux x86-64 + `gcc`; em outras plataformas eles são pulados, não falham. From a4e4b25629883e394667bc44606d69e606ceb37a Mon Sep 17 00:00:00 2001 From: Philipe Caetano Date: Wed, 24 Jun 2026 22:45:25 -0300 Subject: [PATCH 87/91] feat(ir): adiciona campo de tipo as instrucoes TAC (Issue #172) --- src/ir/tac.rs | 45 ++++++++++----------------------------------- 1 file changed, 10 insertions(+), 35 deletions(-) diff --git a/src/ir/tac.rs b/src/ir/tac.rs index 6ae6852..dd19918 100644 --- a/src/ir/tac.rs +++ b/src/ir/tac.rs @@ -1,6 +1,8 @@ use std::fmt; use crate::common::ast::expr::{BinOp, UnOp}; +use crate::common::ast::Type; + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct TempId(pub u32); @@ -80,15 +82,18 @@ pub enum TacInstr { op: BinOp, lhs: Operand, rhs: Operand, + ty: Type, }, UnOp { dst: TempId, op: UnOp, src: Operand, + ty:Type, }, Copy { dst: Operand, src: Operand, + ty: Type, }, Jump { label: LabelId, @@ -105,6 +110,7 @@ pub enum TacInstr { }, Return { val: Option, + ty: Option }, Label(LabelId), } @@ -163,13 +169,13 @@ impl fmt::Display for Operand { impl fmt::Display for TacInstr { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - TacInstr::BinOp { dst, op, lhs, rhs } => { + TacInstr::BinOp { dst, op, lhs, rhs, ty: _ } => { write!(f, "{dst} = {lhs} {} {rhs}", bin_op_symbol(op)) } - TacInstr::UnOp { dst, op, src } => { + TacInstr::UnOp { dst, op, src, ty: _ } => { write!(f, "{dst} = {}{src}", un_op_symbol(op)) } - TacInstr::Copy { dst, src } => write!(f, "{dst} = {src}"), + TacInstr::Copy { dst, src, ty: _ } => write!(f, "{dst} = {src}"), TacInstr::Jump { label } => write!(f, "goto {label}"), TacInstr::CondJump { cond, @@ -190,7 +196,7 @@ impl fmt::Display for TacInstr { } write!(f, ")") } - TacInstr::Return { val } => { + TacInstr::Return { val, ty: _ } => { if let Some(val) = val { write!(f, "return {val}") } else { @@ -235,35 +241,4 @@ fn un_op_symbol(op: &UnOp) -> &'static str { } } -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn tac_instr_display_binop() { - let instr = TacInstr::BinOp { - dst: TempId(0), - op: BinOp::Add, - lhs: Operand::Temp(TempId(1)), - rhs: Operand::Temp(TempId(2)), - }; - - assert_eq!(instr.to_string(), "t0 = t1 + t2"); - } - - #[test] - fn temp_gen_increments() { - let mut gen = TempGen::new(); - assert_eq!(gen.fresh(), TempId(0)); - assert_eq!(gen.fresh(), TempId(1)); - } - - #[test] - fn label_gen_unique() { - let mut gen = LabelGen::new(); - - assert_eq!(gen.fresh(), LabelId(0)); - assert_eq!(gen.fresh(), LabelId(1)); - } -} From 9c1c98425c22e801ba28088d09c76d30fbec8b57 Mon Sep 17 00:00:00 2001 From: Philipe Caetano Date: Wed, 24 Jun 2026 22:45:39 -0300 Subject: [PATCH 88/91] feat(codegen): implementa suporte a double e registradores XMM (Issue #172) --- src/codegen/last/x86_64.rs | 143 +++++++++++++++++++++++++++++-------- 1 file changed, 115 insertions(+), 28 deletions(-) diff --git a/src/codegen/last/x86_64.rs b/src/codegen/last/x86_64.rs index 497791c..42fbcaa 100644 --- a/src/codegen/last/x86_64.rs +++ b/src/codegen/last/x86_64.rs @@ -21,6 +21,7 @@ use crate::common::ast::expr::{BinOp, UnOp}; use crate::common::errors::types::CodegenError; use crate::ir::tac::{ConstValue, LabelId, Operand, TacFunction, TacInstr, TacProgram}; use std::collections::HashMap; +use crate::common::ast::ast::Type; type EmitResult = Result; @@ -40,6 +41,8 @@ struct Emitter { struct StringPool { entries: Vec<(String, String)>, labels: HashMap, + double_entries: Vec<(String, f64)>, + double_labels: HashMap, } impl StringPool { @@ -80,8 +83,16 @@ impl StringPool { } fn visit_operand(&mut self, op: &Operand) { - if let Operand::Const(ConstValue::String(value)) = op { - self.label_for(value); + match op { + Operand::Const(ConstValue::String(value)) => { + self.label_for(value); + } + + Operand::Const(ConstValue::Double(value)) => { + self.label_for_double(*value); + } + Operand::Deref(inner) => self.visit_operand(inner), + _=> {} } } @@ -95,6 +106,18 @@ impl StringPool { self.labels.insert(value.to_string(), label.clone()); label } + + fn label_for_double(&mut self, value: f64) -> String{ + let key = value.to_string(); + if let Some(label) = self.double_labels.get(&key){ + return label.clone(); + } + + let label = format!(".LC_DBL{}", self.double_entries.len()); + self.double_entries.push((label.clone(), value)); + self.double_labels.insert(key, label.clone()); + label + } } impl Emitter { @@ -139,12 +162,19 @@ impl Emitter { pub fn emit_program(prog: &TacProgram) -> EmitResult { let strings = StringPool::collect(prog); let mut em = Emitter::new(); - if !strings.entries.is_empty() { + + if !strings.entries.is_empty() || !strings.double_entries.is_empty() { em.raw(".section .rodata"); + for (label, value) in &strings.entries { em.raw(&format!("{label}:")); em.raw(&format!(" .asciz {}", escape_asm_string(value))); } + + for (label, value) in &strings.double_entries{ + em.raw(&format!("{label}:")); + em.raw(&format!(" .double {value}")); + } em.blank(); } em.raw(".text"); @@ -320,23 +350,29 @@ fn emit_instr( then_label, else_label, } => { - load_op(em, frame, cond, "rax", strings)?; + load_op(em, frame, cond, "rax", strings, &Type::Int)?; em.insn("testq %rax, %rax"); em.insn(&format!("jne {}", local_label(func_name, then_label))); em.insn(&format!("jmp {}", local_label(func_name, else_label))); Ok(()) } - TacInstr::Copy { dst, src } => { - load_op(em, frame, src, "rax", strings)?; - store_op(em, frame, dst, "rax", strings)?; + TacInstr::Copy { dst, src, ty } => { + let reg = if matches!(ty, Type::Double) { "xmm0" } else { "rax" }; + load_op(em, frame, src, reg, strings, ty)?; + store_op(em, frame, dst, reg, strings, ty)?; Ok(()) } - TacInstr::BinOp { dst, op, lhs, rhs } => emit_binop(em, op, lhs, rhs, *dst, frame, strings), - TacInstr::UnOp { dst, op, src } => emit_unop(em, op, src, *dst, frame, strings), + TacInstr::BinOp { dst, op, lhs, rhs, ty } => emit_binop(em, op, lhs, rhs, *dst, frame, strings, ty), + TacInstr::UnOp { dst, op, src, ty } => emit_unop(em, op, src, *dst, frame, strings, ty), TacInstr::Call { dst, fn_name, args } => emit_call(em, fn_name, args, *dst, frame, strings), - TacInstr::Return { val } => { + TacInstr::Return { val, ty } => { if let Some(val) = val { - load_op(em, frame, val, "rax", strings)?; + // 1. Resolve o tipo (se for None, assume Int) + let resolved_ty = ty.as_ref().unwrap_or(&Type::Int); + // 2. Escolhe o registrador de retorno correto + let reg = if matches!(resolved_ty, Type::Double) { "xmm0" } else { "rax" }; + // 3. Carrega o valor para o registrador + load_op(em, frame, val, reg, strings, resolved_ty)?; } em.insn(&format!("jmp {epilogue_label}")); Ok(()) @@ -352,16 +388,44 @@ fn emit_binop( dst: crate::ir::tac::TempId, frame: &Frame, strings: &StringPool, + ty: &Type, // <-- Faltava isto ) -> EmitResult<()> { - // Operacoes logicas short-circuit-like precisam normalizar cada operando - // para 0/1 individualmente. if matches!(op, BinOp::And | BinOp::Or) { emit_logical(em, matches!(op, BinOp::Or), lhs, rhs, dst, frame, strings)?; return Ok(()); } - load_op(em, frame, lhs, "rax", strings)?; - load_op(em, frame, rhs, "rcx", strings)?; + if matches!(ty, Type::Double) { + load_op(em, frame, lhs, "xmm0", strings, ty)?; + load_op(em, frame, rhs, "xmm1", strings, ty)?; + + match op { + BinOp::Add => em.insn("addsd %xmm1, %xmm0"), + BinOp::Sub => em.insn("subsd %xmm1, %xmm0"), + BinOp::Mul => em.insn("mulsd %xmm1, %xmm0"), + BinOp::Div => em.insn("divsd %xmm1, %xmm0"), + BinOp::Less => emit_comparison_double(em, "seta"), + BinOp::Greater => emit_comparison_double(em, "setb"), + BinOp::Leq => emit_comparison_double(em, "setae"), + BinOp::Geq => emit_comparison_double(em, "setbe"), + BinOp::Eq => emit_comparison_double(em, "sete"), + BinOp::Neq => emit_comparison_double(em, "setne"), + _ => return Err(codegen_error("Operacao double nao suportada", Some("binop"))), + } + + let is_relational = matches!(op, BinOp::Less | BinOp::Greater | BinOp::Leq | BinOp::Geq | BinOp::Eq | BinOp::Neq); + if is_relational { + store_op(em, frame, &Operand::Temp(dst), "rax", strings, &Type::Int)?; + } else { + store_op(em, frame, &Operand::Temp(dst), "xmm0", strings, ty)?; + } + + return Ok(()); + } + + + load_op(em, frame, lhs, "rax", strings, ty)?; + load_op(em, frame, rhs, "rcx", strings, ty)?; match op { BinOp::Add => em.insn("addq %rcx, %rax"), @@ -387,15 +451,10 @@ fn emit_binop( BinOp::Geq => emit_comparison(em, "setge"), BinOp::Eq => emit_comparison(em, "sete"), BinOp::Neq => emit_comparison(em, "setne"), - BinOp::And | BinOp::Or => { - return Err(codegen_error( - "operacao logica deveria ter sido tratada antes", - Some("binop"), - )) - } + BinOp::And | BinOp::Or => unreachable!(), } - store_op(em, frame, &Operand::Temp(dst), "rax", strings)?; + store_op(em, frame, &Operand::Temp(dst), "rax", strings, ty)?; Ok(()) } @@ -405,6 +464,12 @@ fn emit_comparison(em: &mut Emitter, setcc: &str) { em.insn("movzbq %al, %rax"); } +fn emit_comparison_double(em: &mut Emitter, setcc: &str) { + em.insn("ucomisd %xmm1, %xmm0"); + em.insn(&format!("{setcc} %al")); + em.insn("movzbq %al, %rax"); +} + fn emit_logical( em: &mut Emitter, is_or: bool, @@ -414,15 +479,17 @@ fn emit_logical( frame: &Frame, strings: &StringPool, ) -> EmitResult<()> { + + let ty = &Type::Int; // Normaliza lhs para 0/1 em %rdx. - load_op(em, frame, lhs, "rax", strings)?; + load_op(em, frame, lhs, "rax", strings, ty)?; em.insn("testq %rax, %rax"); em.insn("setne %al"); em.insn("movzbq %al, %rax"); em.insn("movq %rax, %rdx"); // Normaliza rhs para 0/1 em %rax. - load_op(em, frame, rhs, "rax", strings)?; + load_op(em, frame, rhs, "rax", strings, ty)?; em.insn("testq %rax, %rax"); em.insn("setne %al"); em.insn("movzbq %al, %rax"); @@ -444,6 +511,7 @@ fn emit_unop( dst: crate::ir::tac::TempId, frame: &Frame, strings: &StringPool, + ty: &Type, ) -> EmitResult<()> { // `&x` precisa do *endereco* do slot de `src`, nao do seu valor: nao // passa por `load_op` (que faria `movq slot(%rbp), %reg`, carregando o @@ -462,8 +530,14 @@ fn emit_unop( store_op(em, frame, &Operand::Temp(dst), "rax", strings)?; return Ok(()); } + if matches!(ty, Type::Double) { + return Err(codegen_error( + "Operacoes unarias ainda nao suportadas para double", + Some("unop"), + )); + } - load_op(em, frame, src, "rax", strings)?; + load_op(em, frame, src, "rax", strings, ty)?; match op { UnOp::Neg => em.insn("negq %rax"), UnOp::BitNot => em.insn("notq %rax"), @@ -507,7 +581,7 @@ fn emit_call( for (index, arg) in args.iter().take(abi::MAX_REG_ARGS).enumerate() { let reg = abi::arg_register(index).expect("index < MAX_REG_ARGS sempre tem registrador"); - load_op(em, frame, arg, "rax", strings)?; + load_op(em, frame, arg, "rax", strings, &Type::Int)?; em.insn(&format!("movq %rax, %{reg}")); } @@ -519,7 +593,7 @@ fn emit_call( } if let Some(dst) = dst { - store_op(em, frame, &Operand::Temp(dst), "rax", strings)?; + store_op(em, frame, &Operand::Temp(dst), "rax", strings, &Type::Int)?; } Ok(()) } @@ -531,7 +605,9 @@ fn load_op( op: &Operand, reg: &str, strings: &StringPool, + ty: &Type, ) -> EmitResult<()> { + let mov_insn = if matches!(ty, Type::Double) {"movsd"} else {"movq"}; match op { Operand::Const(ConstValue::String(value)) => { let label = strings @@ -541,6 +617,15 @@ fn load_op( em.insn(&format!("leaq {label}(%rip), %{reg}")); Ok(()) } + Operand::Const(ConstValue::Double(value)) =>{ + let label = strings + .double_labels + .get(&value.to_string()) + .expect("double literal deve ter sido coletado"); + + em.insn(&format!("movsd {label}(%rip), %{reg}")); + Ok(()) + } Operand::Const(value) => { em.insn(&format!("movq ${}, %{reg}", const_immediate(value)?)); Ok(()) @@ -577,9 +662,11 @@ fn store_op( op: &Operand, reg: &str, strings: &StringPool, + ty: &Type, ) -> EmitResult<()> { + let mov_insn = if matches!(ty, Type::Double) {"movsd"} else {"movq"}; if let Operand::Deref(inner) = op { - load_op(em, frame, inner, DEREF_SCRATCH_REG, strings)?; + load_op(em, frame, inner, DEREF_SCRATCH_REG, strings, &Type::Int)?; em.insn(&format!("movq %{reg}, (%{DEREF_SCRATCH_REG})")); return Ok(()); } From 381a4dcc7b70db98e86540c9d468eb4c097d8485 Mon Sep 17 00:00:00 2001 From: Philipe Caetano Date: Wed, 24 Jun 2026 22:45:51 -0300 Subject: [PATCH 89/91] test(ir): adiciona suite de testes para tipagem no TAC (Issue #172) --- src/tests/tac_test.rs | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 src/tests/tac_test.rs diff --git a/src/tests/tac_test.rs b/src/tests/tac_test.rs new file mode 100644 index 0000000..0a7a5b1 --- /dev/null +++ b/src/tests/tac_test.rs @@ -0,0 +1,34 @@ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn tac_instr_display_binop() { + let instr = TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Add, + lhs: Operand::Temp(TempId(1)), + rhs: Operand::Temp(TempId(2)), + ty: Type::Int, + }; + + assert_eq!(instr.to_string(), "t0 = t1 + t2"); + } + + #[test] + fn temp_gen_increments() { + let mut gen = TempGen::new(); + + assert_eq!(gen.fresh(), TempId(0)); + assert_eq!(gen.fresh(), TempId(1)); + } + + #[test] + fn label_gen_unique() { + let mut gen = LabelGen::new(); + + assert_eq!(gen.fresh(), LabelId(0)); + assert_eq!(gen.fresh(), LabelId(1)); + } +} \ No newline at end of file From 30b6b5eab2da66cc36930bf7bf3cb6a71376feac Mon Sep 17 00:00:00 2001 From: Bappoz Date: Wed, 24 Jun 2026 23:14:42 -0300 Subject: [PATCH 90/91] fix(codegen): corrige e completa suporte a double iniciado na branch do colaborador MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A branch original (a4e4b25..381a4dc) nao compilava: o campo 'ty' foi adicionado a TacInstr::{BinOp,UnOp,Copy,Return} mas varios call sites em lower.rs, optimizations.rs, cfg.rs e nos modulos de teste nao foram atualizados, e load_op/store_op em x86_64.rs ganharam o parametro 'ty' sem que todos os chamadores fossem ajustados. Correcoes e trabalho completado: - lower.rs: tipo de cada expressao agora e inferido (literais, variaveis locais, aritmetica, cast) para popular 'ty' corretamente em vez de um valor fixo; cobre VarDecl, Assign, CompoundAssign, Ternary, prefix/postfix e Return. - x86_64.rs: load_op/store_op passam a usar 'movsd' de fato para double (antes computavam 'mov_insn' mas ignoravam a variavel); corrige bug de comparacao double com mapeamento de setcc invertido (Less/Greater e Leq/Geq trocados — 'a < b' retornava falso e vice-versa); literal double usado em contexto nao-double (ex.: 'float', fora de escopo) agora produz erro de codegen claro em vez de assembly invalido. - optimizations.rs/cfg.rs: matches exaustivos e construcoes ajustados para o novo campo. - registra src/tests/tac_test.rs (existia mas nao estava no mod.rs). - novo tests/double_codegen_test.rs: cobre o criterio de aceite da issue (double local + aritmetica + return, verificado via gcc), comparacoes double->int via exit code, e checagem do assembly emitido. Escopo mantido conforme sugerido na issue: apenas 'double' (nao 'float'), literais/variaveis locais/aritmetica basica/comparacoes/return. Argumentos, parametros e retorno de double atraves de chamada feita pelo proprio codegen permanecem fora de escopo (exigem estender abi.rs para xmm0..xmm7). cargo build --all, cargo test --all (357 testes), cargo clippy -- -D warnings e cargo fmt --check passam limpos. Refs #172 --- src/codegen/inter/optimizations.rs | 98 +++++++-- src/codegen/last/x86_64.rs | 124 +++++++---- src/ir/cfg.rs | 18 +- src/ir/lower.rs | 94 ++++++-- src/ir/tac.rs | 24 ++- src/tests/mod.rs | 1 + src/tests/tac_test.rs | 54 +++-- tests/codegen_smoke.rs | 17 ++ tests/double_codegen_test.rs | 332 +++++++++++++++++++++++++++++ 9 files changed, 651 insertions(+), 111 deletions(-) create mode 100644 tests/double_codegen_test.rs diff --git a/src/codegen/inter/optimizations.rs b/src/codegen/inter/optimizations.rs index b594733..b82b998 100644 --- a/src/codegen/inter/optimizations.rs +++ b/src/codegen/inter/optimizations.rs @@ -235,7 +235,7 @@ fn instr_uses(instr: &TacInstr) -> Vec { push(&mut uses, arg); } } - TacInstr::Return { val: Some(v) } => push(&mut uses, v), + TacInstr::Return { val: Some(v), .. } => push(&mut uses, v), _ => {} } @@ -255,20 +255,28 @@ pub fn constant_fold(instrs: &mut [TacInstr]) -> bool { for instr in instrs.iter_mut() { match instr { - TacInstr::BinOp { dst, op, lhs, rhs } => { + TacInstr::BinOp { + dst, + op, + lhs, + rhs, + ty, + } => { if let Some(result) = fold_binop(op, lhs, rhs) { *instr = TacInstr::Copy { dst: Operand::Temp(*dst), src: Operand::Const(result), + ty: ty.clone(), }; changed = true; } } - TacInstr::UnOp { dst, op, src } => { + TacInstr::UnOp { dst, op, src, ty } => { if let Some(result) = fold_unop(op, src) { *instr = TacInstr::Copy { dst: Operand::Temp(*dst), src: Operand::Const(result), + ty: ty.clone(), }; changed = true; } @@ -382,6 +390,7 @@ pub fn constant_propagation(instrs: &mut [TacInstr]) -> bool { TacInstr::Copy { dst: Operand::Temp(t), src: Operand::Const(v), + .. } => { const_map.insert(*t, v.clone()); } @@ -390,6 +399,7 @@ pub fn constant_propagation(instrs: &mut [TacInstr]) -> bool { TacInstr::Copy { dst: Operand::Temp(t), src: _, + .. } => { const_map.remove(t); } @@ -434,7 +444,7 @@ fn propagate_uses( subst(arg, changed); } } - TacInstr::Return { val: Some(v) } => subst(v, changed), + TacInstr::Return { val: Some(v), .. } => subst(v, changed), _ => {} } } @@ -526,6 +536,7 @@ pub fn optimize_function(instrs: &mut Vec) { mod tests { use super::*; use crate::{ + common::ast::ast::Type, common::ast::expr::{BinOp, UnOp}, ir::tac::{ConstValue, LabelId, Operand, TacInstr, TempId}, }; @@ -551,6 +562,7 @@ mod tests { op: BinOp::Add, lhs: int(2), rhs: int(3), + ty: Type::Int, }]; assert!(constant_fold(&mut instrs)); assert_eq!( @@ -558,6 +570,7 @@ mod tests { TacInstr::Copy { dst: temp(0), src: int(5), + ty: Type::Int, } ); } @@ -571,12 +584,14 @@ mod tests { op: BinOp::Mul, lhs: int(3), rhs: int(4), + ty: Type::Int, }, TacInstr::BinOp { dst: TempId(1), op: BinOp::Add, lhs: int(2), rhs: temp(0), + ty: Type::Int, }, ]; // Após fold: t0 = 12, t1 = 2 + t0 (t0 ainda é temp, precisa de propagation) @@ -585,7 +600,8 @@ mod tests { instrs[0], TacInstr::Copy { dst: temp(0), - src: int(12) + src: int(12), + ty: Type::Int, } ); @@ -597,7 +613,8 @@ mod tests { instrs[1], TacInstr::Copy { dst: temp(1), - src: int(14) + src: int(14), + ty: Type::Int, } ); } @@ -609,13 +626,15 @@ mod tests { op: BinOp::Less, lhs: int(3), rhs: int(5), + ty: Type::Int, }]; assert!(constant_fold(&mut instrs)); assert_eq!( instrs[0], TacInstr::Copy { dst: temp(0), - src: int(1) + src: int(1), + ty: Type::Int, } ); } @@ -627,13 +646,15 @@ mod tests { op: BinOp::Eq, lhs: int(3), rhs: int(5), + ty: Type::Int, }]; assert!(constant_fold(&mut instrs)); assert_eq!( instrs[0], TacInstr::Copy { dst: temp(0), - src: int(0) + src: int(0), + ty: Type::Int, } ); } @@ -646,13 +667,15 @@ mod tests { op: BinOp::BitAnd, lhs: int(0b1010), rhs: int(0b1100), + ty: Type::Int, }]; assert!(constant_fold(&mut instrs)); assert_eq!( instrs[0], TacInstr::Copy { dst: temp(0), - src: int(8) + src: int(8), + ty: Type::Int, } ); } @@ -663,13 +686,15 @@ mod tests { dst: TempId(0), op: UnOp::Neg, src: int(7), + ty: Type::Int, }]; assert!(constant_fold(&mut instrs)); assert_eq!( instrs[0], TacInstr::Copy { dst: temp(0), - src: int(-7) + src: int(-7), + ty: Type::Int, } ); } @@ -682,11 +707,13 @@ mod tests { dst: TempId(0), op: UnOp::Not, src: int(0), + ty: Type::Int, }, TacInstr::UnOp { dst: TempId(1), op: UnOp::Not, src: int(5), + ty: Type::Int, }, ]; constant_fold(&mut instrs); @@ -694,14 +721,16 @@ mod tests { instrs[0], TacInstr::Copy { dst: temp(0), - src: int(1) + src: int(1), + ty: Type::Int, } ); assert_eq!( instrs[1], TacInstr::Copy { dst: temp(1), - src: int(0) + src: int(0), + ty: Type::Int, } ); } @@ -713,6 +742,7 @@ mod tests { op: BinOp::Div, lhs: int(10), rhs: int(0), + ty: Type::Int, }; let mut instrs = vec![original.clone()]; assert!(!constant_fold(&mut instrs)); @@ -726,6 +756,7 @@ mod tests { op: BinOp::Shl, lhs: int(1), rhs: int(-1), + ty: Type::Int, }; let mut instrs = vec![original.clone()]; assert!(!constant_fold(&mut instrs)); @@ -739,6 +770,7 @@ mod tests { op: BinOp::Shl, lhs: int(1), rhs: int(64), + ty: Type::Int, }; let mut instrs = vec![original.clone()]; assert!(!constant_fold(&mut instrs)); @@ -752,6 +784,7 @@ mod tests { op: BinOp::Add, lhs: Operand::Const(ConstValue::Double(1.0)), rhs: Operand::Const(ConstValue::Double(2.0)), + ty: Type::Int, }; let mut instrs = vec![original.clone()]; assert!(!constant_fold(&mut instrs)); @@ -767,12 +800,14 @@ mod tests { TacInstr::Copy { dst: temp(0), src: int(5), + ty: Type::Int, }, TacInstr::BinOp { dst: TempId(1), op: BinOp::Add, lhs: temp(0), rhs: int(3), + ty: Type::Int, }, ]; assert!(constant_propagation(&mut instrs)); @@ -783,6 +818,7 @@ mod tests { op: BinOp::Add, lhs: int(5), rhs: int(3), + ty: Type::Int, } ); // Após fold: t1 = 8 @@ -791,7 +827,8 @@ mod tests { instrs[1], TacInstr::Copy { dst: temp(1), - src: int(8) + src: int(8), + ty: Type::Int, } ); } @@ -803,6 +840,7 @@ mod tests { TacInstr::Copy { dst: temp(0), src: int(5), + ty: Type::Int, }, TacInstr::Call { dst: Some(TempId(0)), @@ -814,6 +852,7 @@ mod tests { op: BinOp::Add, lhs: temp(0), rhs: int(1), + ty: Type::Int, }, ]; // propagation não deve substituir t0 na última instrução @@ -837,6 +876,7 @@ mod tests { op: BinOp::Add, lhs: int(2), rhs: int(3), + ty: Type::Int, }]; let liveness = compute_liveness(&instrs); assert!(dead_code_eliminate(&mut instrs, &liveness)); @@ -862,6 +902,7 @@ mod tests { let mut instrs = vec![TacInstr::Copy { dst: var("x"), src: int(10), + ty: Type::Int, }]; let liveness = compute_liveness(&instrs); assert!(!dead_code_eliminate(&mut instrs, &liveness)); @@ -875,8 +916,12 @@ mod tests { TacInstr::Copy { dst: temp(0), src: int(5), + ty: Type::Int, + }, + TacInstr::Return { + val: Some(temp(0)), + ty: None, }, - TacInstr::Return { val: Some(temp(0)) }, ]; let liveness = compute_liveness(&instrs); assert!(!dead_code_eliminate(&mut instrs, &liveness)); @@ -903,15 +948,20 @@ mod tests { TacInstr::Copy { dst: temp(0), src: int(5), + ty: Type::Int, }, TacInstr::Jump { label: LabelId(3) }, TacInstr::Label(LabelId(2)), TacInstr::Copy { dst: temp(0), src: int(10), + ty: Type::Int, }, TacInstr::Label(LabelId(3)), - TacInstr::Return { val: Some(temp(0)) }, + TacInstr::Return { + val: Some(temp(0)), + ty: None, + }, ]; let liveness = compute_liveness(&instrs); @@ -933,16 +983,19 @@ mod tests { op: BinOp::Mul, lhs: int(3), rhs: int(4), + ty: Type::Int, }, TacInstr::BinOp { dst: TempId(1), op: BinOp::Add, lhs: int(2), rhs: temp(0), + ty: Type::Int, }, TacInstr::Copy { dst: var("x"), src: temp(1), + ty: Type::Int, }, ]; @@ -955,6 +1008,7 @@ mod tests { TacInstr::Copy { dst: var("x"), src: int(14), + ty: Type::Int, } ); } @@ -974,8 +1028,12 @@ mod tests { op: BinOp::Add, lhs: temp(0), rhs: int(0), + ty: Type::Int, + }, + TacInstr::Return { + val: Some(temp(1)), + ty: None, }, - TacInstr::Return { val: Some(temp(1)) }, ]; optimize_function(&mut instrs); @@ -988,7 +1046,13 @@ mod tests { #[test] fn optimize_function_side_effect_label_preserved() { - let mut instrs = vec![TacInstr::Label(LabelId(0)), TacInstr::Return { val: None }]; + let mut instrs = vec![ + TacInstr::Label(LabelId(0)), + TacInstr::Return { + val: None, + ty: None, + }, + ]; optimize_function(&mut instrs); assert_eq!(instrs.len(), 2); } diff --git a/src/codegen/last/x86_64.rs b/src/codegen/last/x86_64.rs index 42fbcaa..5edb4b7 100644 --- a/src/codegen/last/x86_64.rs +++ b/src/codegen/last/x86_64.rs @@ -17,11 +17,11 @@ use crate::codegen::last::abi; use crate::codegen::last::frame::{Frame, SlotKey}; use crate::codegen::last::peephole::PeepholePass; +use crate::common::ast::ast::Type; use crate::common::ast::expr::{BinOp, UnOp}; use crate::common::errors::types::CodegenError; use crate::ir::tac::{ConstValue, LabelId, Operand, TacFunction, TacInstr, TacProgram}; use std::collections::HashMap; -use crate::common::ast::ast::Type; type EmitResult = Result; @@ -63,7 +63,7 @@ impl StringPool { self.visit_operand(rhs); } TacInstr::UnOp { src, .. } => self.visit_operand(src), - TacInstr::Copy { dst, src } => { + TacInstr::Copy { dst, src, .. } => { self.visit_operand(dst); self.visit_operand(src); } @@ -73,7 +73,7 @@ impl StringPool { self.visit_operand(arg); } } - TacInstr::Return { val } => { + TacInstr::Return { val, .. } => { if let Some(val) = val { self.visit_operand(val); } @@ -92,7 +92,7 @@ impl StringPool { self.label_for_double(*value); } Operand::Deref(inner) => self.visit_operand(inner), - _=> {} + _ => {} } } @@ -107,9 +107,9 @@ impl StringPool { label } - fn label_for_double(&mut self, value: f64) -> String{ + fn label_for_double(&mut self, value: f64) -> String { let key = value.to_string(); - if let Some(label) = self.double_labels.get(&key){ + if let Some(label) = self.double_labels.get(&key) { return label.clone(); } @@ -171,7 +171,7 @@ pub fn emit_program(prog: &TacProgram) -> EmitResult { em.raw(&format!(" .asciz {}", escape_asm_string(value))); } - for (label, value) in &strings.double_entries{ + for (label, value) in &strings.double_entries { em.raw(&format!("{label}:")); em.raw(&format!(" .double {value}")); } @@ -304,7 +304,7 @@ fn slot_keys_of(instr: &TacInstr) -> Vec { keys.push(SlotKey::Temp(dst.0)); consider(&mut keys, src); } - TacInstr::Copy { dst, src } => { + TacInstr::Copy { dst, src, .. } => { consider(&mut keys, dst); consider(&mut keys, src); } @@ -317,7 +317,7 @@ fn slot_keys_of(instr: &TacInstr) -> Vec { consider(&mut keys, arg); } } - TacInstr::Return { val } => { + TacInstr::Return { val, .. } => { if let Some(val) = val { consider(&mut keys, val); } @@ -357,12 +357,22 @@ fn emit_instr( Ok(()) } TacInstr::Copy { dst, src, ty } => { - let reg = if matches!(ty, Type::Double) { "xmm0" } else { "rax" }; + let reg = if matches!(ty, Type::Double) { + "xmm0" + } else { + "rax" + }; load_op(em, frame, src, reg, strings, ty)?; store_op(em, frame, dst, reg, strings, ty)?; Ok(()) } - TacInstr::BinOp { dst, op, lhs, rhs, ty } => emit_binop(em, op, lhs, rhs, *dst, frame, strings, ty), + TacInstr::BinOp { + dst, + op, + lhs, + rhs, + ty, + } => emit_binop(em, op, lhs, rhs, *dst, frame, strings, ty), TacInstr::UnOp { dst, op, src, ty } => emit_unop(em, op, src, *dst, frame, strings, ty), TacInstr::Call { dst, fn_name, args } => emit_call(em, fn_name, args, *dst, frame, strings), TacInstr::Return { val, ty } => { @@ -370,7 +380,11 @@ fn emit_instr( // 1. Resolve o tipo (se for None, assume Int) let resolved_ty = ty.as_ref().unwrap_or(&Type::Int); // 2. Escolhe o registrador de retorno correto - let reg = if matches!(resolved_ty, Type::Double) { "xmm0" } else { "rax" }; + let reg = if matches!(resolved_ty, Type::Double) { + "xmm0" + } else { + "rax" + }; // 3. Carrega o valor para o registrador load_op(em, frame, val, reg, strings, resolved_ty)?; } @@ -380,6 +394,7 @@ fn emit_instr( } } +#[allow(clippy::too_many_arguments)] fn emit_binop( em: &mut Emitter, op: &BinOp, @@ -388,7 +403,7 @@ fn emit_binop( dst: crate::ir::tac::TempId, frame: &Frame, strings: &StringPool, - ty: &Type, // <-- Faltava isto + ty: &Type, ) -> EmitResult<()> { if matches!(op, BinOp::And | BinOp::Or) { emit_logical(em, matches!(op, BinOp::Or), lhs, rhs, dst, frame, strings)?; @@ -404,26 +419,33 @@ fn emit_binop( BinOp::Sub => em.insn("subsd %xmm1, %xmm0"), BinOp::Mul => em.insn("mulsd %xmm1, %xmm0"), BinOp::Div => em.insn("divsd %xmm1, %xmm0"), - BinOp::Less => emit_comparison_double(em, "seta"), - BinOp::Greater => emit_comparison_double(em, "setb"), - BinOp::Leq => emit_comparison_double(em, "setae"), - BinOp::Geq => emit_comparison_double(em, "setbe"), + BinOp::Less => emit_comparison_double(em, "setb"), + BinOp::Greater => emit_comparison_double(em, "seta"), + BinOp::Leq => emit_comparison_double(em, "setbe"), + BinOp::Geq => emit_comparison_double(em, "setae"), BinOp::Eq => emit_comparison_double(em, "sete"), BinOp::Neq => emit_comparison_double(em, "setne"), - _ => return Err(codegen_error("Operacao double nao suportada", Some("binop"))), + _ => { + return Err(codegen_error( + "Operacao double nao suportada", + Some("binop"), + )) + } } - - let is_relational = matches!(op, BinOp::Less | BinOp::Greater | BinOp::Leq | BinOp::Geq | BinOp::Eq | BinOp::Neq); + + let is_relational = matches!( + op, + BinOp::Less | BinOp::Greater | BinOp::Leq | BinOp::Geq | BinOp::Eq | BinOp::Neq + ); if is_relational { store_op(em, frame, &Operand::Temp(dst), "rax", strings, &Type::Int)?; } else { store_op(em, frame, &Operand::Temp(dst), "xmm0", strings, ty)?; } - + return Ok(()); } - load_op(em, frame, lhs, "rax", strings, ty)?; load_op(em, frame, rhs, "rcx", strings, ty)?; @@ -479,7 +501,6 @@ fn emit_logical( frame: &Frame, strings: &StringPool, ) -> EmitResult<()> { - let ty = &Type::Int; // Normaliza lhs para 0/1 em %rdx. load_op(em, frame, lhs, "rax", strings, ty)?; @@ -500,7 +521,7 @@ fn emit_logical( em.insn("andq %rdx, %rax"); } - store_op(em, frame, &Operand::Temp(dst), "rax", strings)?; + store_op(em, frame, &Operand::Temp(dst), "rax", strings, ty)?; Ok(()) } @@ -527,7 +548,7 @@ fn emit_unop( .offset_of(&key) .expect("operando de & deve ter slot alocado no frame"); em.insn(&format!("leaq {offset}(%rbp), %rax")); - store_op(em, frame, &Operand::Temp(dst), "rax", strings)?; + store_op(em, frame, &Operand::Temp(dst), "rax", strings, &Type::Int)?; return Ok(()); } if matches!(ty, Type::Double) { @@ -551,7 +572,7 @@ fn emit_unop( } UnOp::AddrOf => unreachable!("tratado antes do load_op acima"), } - store_op(em, frame, &Operand::Temp(dst), "rax", strings)?; + store_op(em, frame, &Operand::Temp(dst), "rax", strings, ty)?; Ok(()) } @@ -575,7 +596,7 @@ fn emit_call( } let stack_args = &args[args.len().min(abi::MAX_REG_ARGS)..]; for arg in stack_args.iter().rev() { - load_op(em, frame, arg, "rax", strings)?; + load_op(em, frame, arg, "rax", strings, &Type::Int)?; em.insn("pushq %rax"); } @@ -607,7 +628,11 @@ fn load_op( strings: &StringPool, ty: &Type, ) -> EmitResult<()> { - let mov_insn = if matches!(ty, Type::Double) {"movsd"} else {"movq"}; + let mov_insn = if matches!(ty, Type::Double) { + "movsd" + } else { + "movq" + }; match op { Operand::Const(ConstValue::String(value)) => { let label = strings @@ -617,14 +642,18 @@ fn load_op( em.insn(&format!("leaq {label}(%rip), %{reg}")); Ok(()) } - Operand::Const(ConstValue::Double(value)) =>{ + Operand::Const(ConstValue::Double(_)) if !matches!(ty, Type::Double) => Err(codegen_error( + "literal double usado em contexto nao-double (apenas double e' suportado neste backend; float ainda nao)", + Some("load"), + )), + Operand::Const(ConstValue::Double(value)) => { let label = strings .double_labels .get(&value.to_string()) .expect("double literal deve ter sido coletado"); - em.insn(&format!("movsd {label}(%rip), %{reg}")); - Ok(()) + em.insn(&format!("movsd {label}(%rip), %{reg}")); + Ok(()) } Operand::Const(value) => { em.insn(&format!("movq ${}, %{reg}", const_immediate(value)?)); @@ -634,22 +663,24 @@ fn load_op( let offset = frame .offset_of(&SlotKey::Temp(temp.0)) .expect("temp sem slot alocado"); - em.insn(&format!("movq {offset}(%rbp), %{reg}")); + em.insn(&format!("{mov_insn} {offset}(%rbp), %{reg}")); Ok(()) } Operand::Var(name) => { let offset = frame .offset_of(&SlotKey::Var(name.clone())) .expect("var sem slot alocado"); - em.insn(&format!("movq {offset}(%rbp), %{reg}")); + em.insn(&format!("{mov_insn} {offset}(%rbp), %{reg}")); Ok(()) } Operand::Deref(inner) => { // `%r11` e scratch/caller-saved e nao e usado como `reg` por // nenhum chamador de `load_op`/`store_op` neste backend, entao e // seguro usa-lo aqui para materializar o ponteiro antes do deref. - load_op(em, frame, inner, DEREF_SCRATCH_REG, strings)?; - em.insn(&format!("movq (%{DEREF_SCRATCH_REG}), %{reg}")); + // O proprio ponteiro e sempre um endereco de 8 bytes, independente + // do tipo do valor apontado. + load_op(em, frame, inner, DEREF_SCRATCH_REG, strings, &Type::Int)?; + em.insn(&format!("{mov_insn} (%{DEREF_SCRATCH_REG}), %{reg}")); Ok(()) } } @@ -664,10 +695,14 @@ fn store_op( strings: &StringPool, ty: &Type, ) -> EmitResult<()> { - let mov_insn = if matches!(ty, Type::Double) {"movsd"} else {"movq"}; + let mov_insn = if matches!(ty, Type::Double) { + "movsd" + } else { + "movq" + }; if let Operand::Deref(inner) = op { load_op(em, frame, inner, DEREF_SCRATCH_REG, strings, &Type::Int)?; - em.insn(&format!("movq %{reg}, (%{DEREF_SCRATCH_REG})")); + em.insn(&format!("{mov_insn} %{reg}, (%{DEREF_SCRATCH_REG})")); return Ok(()); } @@ -686,7 +721,7 @@ fn store_op( } Operand::Deref(_) => unreachable!("tratado antes do match acima"), }; - em.insn(&format!("movq %{reg}, {offset}(%rbp)")); + em.insn(&format!("{mov_insn} %{reg}, {offset}(%rbp)")); Ok(()) } @@ -751,6 +786,7 @@ mod tests { Vec::new(), vec![TacInstr::Return { val: Some(Operand::Const(ConstValue::Int(42))), + ty: None, }], ) } @@ -790,9 +826,11 @@ mod tests { op: BinOp::Add, lhs: Operand::Var("a".to_string()), rhs: Operand::Var("b".to_string()), + ty: Type::Int, }, TacInstr::Return { val: Some(Operand::Temp(TempId(0))), + ty: None, }, ], ); @@ -815,9 +853,11 @@ mod tests { op: BinOp::Div, lhs: Operand::Const(ConstValue::Int(10)), rhs: Operand::Const(ConstValue::Int(3)), + ty: Type::Int, }, TacInstr::Return { val: Some(Operand::Temp(TempId(0))), + ty: None, }, ], ); @@ -838,6 +878,7 @@ mod tests { op: BinOp::Mod, lhs: Operand::Const(ConstValue::Int(10)), rhs: Operand::Const(ConstValue::Int(3)), + ty: Type::Int, }], ); @@ -856,6 +897,7 @@ mod tests { op: BinOp::Less, lhs: Operand::Const(ConstValue::Int(1)), rhs: Operand::Const(ConstValue::Int(2)), + ty: Type::Int, }], ); @@ -882,6 +924,7 @@ mod tests { }, TacInstr::Return { val: Some(Operand::Temp(TempId(0))), + ty: None, }, ], ); @@ -909,6 +952,7 @@ mod tests { }, TacInstr::Return { val: Some(Operand::Temp(TempId(0))), + ty: None, }, ], ); @@ -979,10 +1023,12 @@ mod tests { TacInstr::Label(LabelId(0)), TacInstr::Return { val: Some(Operand::Const(ConstValue::Int(1))), + ty: None, }, TacInstr::Label(LabelId(1)), TacInstr::Return { val: Some(Operand::Const(ConstValue::Int(0))), + ty: None, }, ], ); @@ -1025,6 +1071,7 @@ mod tests { op: BinOp::And, lhs: Operand::Const(ConstValue::Int(1)), rhs: Operand::Const(ConstValue::Int(0)), + ty: Type::Int, }], ); @@ -1044,6 +1091,7 @@ mod tests { op: BinOp::Or, lhs: Operand::Const(ConstValue::Int(1)), rhs: Operand::Const(ConstValue::Int(0)), + ty: Type::Int, }], ); diff --git a/src/ir/cfg.rs b/src/ir/cfg.rs index 34a236e..245867f 100644 --- a/src/ir/cfg.rs +++ b/src/ir/cfg.rs @@ -168,6 +168,7 @@ pub fn build_cfg(func: &TacFunction) -> Cfg { #[cfg(test)] mod tests { use super::*; + use crate::common::ast::ast::Type; use crate::ir::tac::{ConstValue, Operand, TempId}; fn func(instrs: Vec) -> TacFunction { @@ -185,13 +186,16 @@ mod tests { TacInstr::Copy { dst: Operand::Temp(TempId(0)), src: Operand::Const(ConstValue::Int(1)), + ty: Type::Int, }, TacInstr::Copy { dst: Operand::Temp(TempId(1)), src: Operand::Const(ConstValue::Int(2)), + ty: Type::Int, }, TacInstr::Return { val: Some(Operand::Temp(TempId(1))), + ty: None, }, ]); @@ -217,16 +221,19 @@ mod tests { TacInstr::Copy { dst: Operand::Temp(TempId(0)), src: Operand::Const(ConstValue::Int(1)), + ty: Type::Int, }, TacInstr::Jump { label: merge_label }, TacInstr::Label(else_label), TacInstr::Copy { dst: Operand::Temp(TempId(0)), src: Operand::Const(ConstValue::Int(2)), + ty: Type::Int, }, TacInstr::Label(merge_label), TacInstr::Return { val: Some(Operand::Temp(TempId(0))), + ty: None, }, ]); @@ -253,10 +260,14 @@ mod tests { TacInstr::Copy { dst: Operand::Temp(TempId(0)), src: Operand::Const(ConstValue::Int(1)), + ty: Type::Int, }, TacInstr::Jump { label: cond_label }, TacInstr::Label(exit_label), - TacInstr::Return { val: None }, + TacInstr::Return { + val: None, + ty: None, + }, ]); let cfg = build_cfg(&f); @@ -279,7 +290,10 @@ mod tests { #[test] fn cfg_entry_has_no_predecessors() { - let f = func(vec![TacInstr::Return { val: None }]); + let f = func(vec![TacInstr::Return { + val: None, + ty: None, + }]); let cfg = build_cfg(&f); diff --git a/src/ir/lower.rs b/src/ir/lower.rs index 9179775..3df948b 100644 --- a/src/ir/lower.rs +++ b/src/ir/lower.rs @@ -96,6 +96,35 @@ impl Lowerer { sizes } + /// Infere se uma expressao produz um valor `double`, o suficiente para + /// escolher entre o caminho de codegen inteiro (`rax`/`rcx`) e o de + /// ponto flutuante (`xmm0`/`xmm1`) sem precisar reexecutar a analise + /// semantica completa. Qualquer expressao fora deste subconjunto e + /// tratada como inteira — escopo deliberadamente limitado a literais, + /// variaveis locais, aritmetica basica e cast, conforme a issue #172. + fn expr_type_for_codegen(&self, expr: &Expr) -> Type { + match expr { + Expr::Literal(Literal::Double(_), _) => Type::Double, + Expr::Ident(name, _) => self + .var_types + .get(name) + .map(|ty| resolve_alias(ty, &self.typedefs)) + .unwrap_or(Type::Int), + Expr::Binary(lhs, _, rhs, _) => { + if matches!(self.expr_type_for_codegen(lhs), Type::Double) + || matches!(self.expr_type_for_codegen(rhs), Type::Double) + { + Type::Double + } else { + Type::Int + } + } + Expr::Unary(_, inner, _) => self.expr_type_for_codegen(inner), + Expr::Cast(qty, _, _) => resolve_alias(&qty.ty, &self.typedefs), + _ => Type::Int, + } + } + /// Infere o tipo estatico (ja resolvido de aliases de `typedef`) de um /// subconjunto limitado de expressoes (identificadores, deref, indice /// via ponteiro, membro de struct e cast) — o suficiente para resolver @@ -227,6 +256,7 @@ impl Lowerer { op: BinOp::Mul, lhs: idx_op, rhs: Operand::Const(ConstValue::Int(elem_size)), + ty: Type::Int, }); Operand::Temp(scaled) }; @@ -237,6 +267,7 @@ impl Lowerer { op: BinOp::Add, lhs: base_ptr, rhs: offset, + ty: Type::Int, }); Ok(Operand::Temp(addr)) } @@ -276,6 +307,7 @@ impl Lowerer { op: BinOp::Add, lhs: base_addr, rhs: Operand::Const(ConstValue::Int(field_offset)), + ty: Type::Int, }); Ok(Operand::Temp(addr)) } @@ -291,6 +323,7 @@ impl Lowerer { dst: temp, op: UnOp::AddrOf, src: Operand::Var(name.clone()), + ty: Type::Int, }); Ok(Operand::Temp(temp)) } @@ -308,25 +341,29 @@ impl Lowerer { match expr { Expr::Literal(value, _) => Ok(Operand::Const(lower_literal(value))), Expr::Ident(name, _) => Ok(Operand::Var(name.clone())), - Expr::Binary(lhs, op, rhs, _) => { - let lhs = self.lower_expr(lhs)?; - let rhs = self.lower_expr(rhs)?; + Expr::Binary(lhs_expr, op, rhs_expr, _) => { + let ty = self.expr_type_for_codegen(expr); + let lhs = self.lower_expr(lhs_expr)?; + let rhs = self.lower_expr(rhs_expr)?; let dst = self.fresh_temp(); self.instrs.push(TacInstr::BinOp { dst, op: op.clone(), lhs, rhs, + ty, }); Ok(Operand::Temp(dst)) } - Expr::Unary(op, src, _) => { - let src = self.lower_expr(src)?; + Expr::Unary(op, src_expr, _) => { + let ty = self.expr_type_for_codegen(src_expr); + let src = self.lower_expr(src_expr)?; let dst = self.fresh_temp(); self.instrs.push(TacInstr::UnOp { dst, op: op.clone(), src, + ty, }); Ok(Operand::Temp(dst)) } @@ -354,14 +391,19 @@ impl Lowerer { }); Ok(Operand::Temp(dst)) } - Expr::Cast(_, inner, _) => self.lower_expr(inner), + Expr::Cast(qty, inner, _) => { + let _ = resolve_alias(&qty.ty, &self.typedefs); + self.lower_expr(inner) + } Expr::Assign(lhs, rhs, _) => { + let ty = self.expr_type_for_codegen(lhs); let src = self.lower_expr(rhs)?; let dst = self.lower_assignment_target(lhs)?; - self.emit_copy(dst.clone(), src)?; + self.emit_copy(dst.clone(), src, ty)?; Ok(dst) } Expr::CompoundAssign(op, lhs, rhs, _) => { + let ty = self.expr_type_for_codegen(lhs); let dst = self.lower_assignment_target(lhs)?; let rhs = self.lower_expr(rhs)?; let temp = self.fresh_temp(); @@ -370,8 +412,9 @@ impl Lowerer { op: op.clone(), lhs: dst.clone(), rhs, + ty: ty.clone(), }); - self.emit_copy(dst.clone(), Operand::Temp(temp))?; + self.emit_copy(dst.clone(), Operand::Temp(temp), ty)?; Ok(dst) } Expr::SizeofType(qty, _) => Ok(Operand::Const(ConstValue::Int(type_size(&qty.ty)?))), @@ -389,13 +432,15 @@ impl Lowerer { }); self.instrs.push(TacInstr::Label(then_label)); + let then_ty = self.expr_type_for_codegen(then_expr); let then_val = self.lower_expr(then_expr)?; - self.emit_copy(Operand::Temp(dst), then_val)?; + self.emit_copy(Operand::Temp(dst), then_val, then_ty)?; self.emit_jump_unless_terminated(end_label); self.instrs.push(TacInstr::Label(else_label)); + let else_ty = self.expr_type_for_codegen(else_expr); let else_val = self.lower_expr(else_expr)?; - self.emit_copy(Operand::Temp(dst), else_val)?; + self.emit_copy(Operand::Temp(dst), else_val, else_ty)?; self.instrs.push(TacInstr::Label(end_label)); Ok(Operand::Temp(dst)) @@ -577,18 +622,20 @@ impl Lowerer { Ok(()) } Stmt::Return(expr, _) => { + let ty = expr.as_ref().map(|expr| self.expr_type_for_codegen(expr)); let val = expr .as_ref() .map(|expr| self.lower_expr(expr)) .transpose()?; - self.instrs.push(TacInstr::Return { val }); + self.instrs.push(TacInstr::Return { val, ty }); Ok(()) } Stmt::VarDecl(qty, name, init, _) => { self.declare_var_type(name, &qty.ty); if let Some(init) = init { + let ty = resolve_alias(&qty.ty, &self.typedefs); let src = self.lower_expr(init)?; - self.emit_copy(Operand::Var(name.clone()), src)?; + self.emit_copy(Operand::Var(name.clone()), src, ty)?; } Ok(()) } @@ -615,6 +662,7 @@ impl Lowerer { op: BinOp::Eq, lhs: disc_op.clone(), rhs: case_val, + ty: Type::Int, }); let next_test = self.labels.fresh(); self.instrs.push(TacInstr::CondJump { @@ -660,6 +708,7 @@ impl Lowerer { } fn lower_prefix(&mut self, op: &PrefixOp, target: &Expr) -> LowerResult { + let ty = self.expr_type_for_codegen(target); let dst = self.lower_assignment_target(target)?; let temp = self.fresh_temp(); self.instrs.push(TacInstr::BinOp { @@ -667,15 +716,17 @@ impl Lowerer { op: prefix_bin_op(op), lhs: dst.clone(), rhs: Operand::Const(ConstValue::Int(1)), + ty: ty.clone(), }); - self.emit_copy(dst.clone(), Operand::Temp(temp))?; + self.emit_copy(dst.clone(), Operand::Temp(temp), ty)?; Ok(dst) } fn lower_postfix(&mut self, op: &PostfixOp, target: &Expr) -> LowerResult { + let ty = self.expr_type_for_codegen(target); let dst = self.lower_assignment_target(target)?; let old = self.fresh_temp(); - self.emit_copy(Operand::Temp(old), dst.clone())?; + self.emit_copy(Operand::Temp(old), dst.clone(), ty.clone())?; let new = self.fresh_temp(); self.instrs.push(TacInstr::BinOp { @@ -683,8 +734,9 @@ impl Lowerer { op: postfix_bin_op(op), lhs: dst.clone(), rhs: Operand::Const(ConstValue::Int(1)), + ty: ty.clone(), }); - self.emit_copy(dst, Operand::Temp(new))?; + self.emit_copy(dst, Operand::Temp(new), ty)?; Ok(Operand::Temp(old)) } @@ -741,10 +793,10 @@ impl Lowerer { } } - fn emit_copy(&mut self, dst: Operand, src: Operand) -> LowerResult<()> { + fn emit_copy(&mut self, dst: Operand, src: Operand, ty: Type) -> LowerResult<()> { match dst { Operand::Temp(_) | Operand::Var(_) | Operand::Deref(_) => { - self.instrs.push(TacInstr::Copy { dst, src }); + self.instrs.push(TacInstr::Copy { dst, src, ty }); Ok(()) } Operand::Const(_) => Err(codegen_error( @@ -1050,6 +1102,7 @@ mod tests { op: BinOp::Add, lhs: Operand::Const(ConstValue::Int(2)), rhs: Operand::Const(ConstValue::Int(3)), + ty: Type::Int, }] ); } @@ -1091,6 +1144,7 @@ mod tests { TacInstr::Copy { dst: Operand::Var("x".to_string()), src: Operand::Const(ConstValue::Int(2)), + ty: Type::Int, } ); assert_eq!(instrs[3], TacInstr::Jump { label: LabelId(2) }); @@ -1100,6 +1154,7 @@ mod tests { TacInstr::Copy { dst: Operand::Var("y".to_string()), src: Operand::Const(ConstValue::Int(3)), + ty: Type::Int, } ); assert_eq!(instrs[6], TacInstr::Label(LabelId(2))); @@ -1173,12 +1228,14 @@ mod tests { op: BinOp::Mul, lhs: Operand::Const(ConstValue::Int(3)), rhs: Operand::Const(ConstValue::Int(4)), + ty: Type::Int, }, TacInstr::BinOp { dst: TempId(1), op: BinOp::Add, lhs: Operand::Const(ConstValue::Int(2)), rhs: Operand::Temp(TempId(0)), + ty: Type::Int, }, ] ); @@ -1201,7 +1258,8 @@ mod tests { assert_eq!( func.instrs, vec![TacInstr::Return { - val: Some(Operand::Var("argc".to_string())) + val: Some(Operand::Var("argc".to_string())), + ty: Some(Type::Int), }] ); } diff --git a/src/ir/tac.rs b/src/ir/tac.rs index dd19918..b1c7084 100644 --- a/src/ir/tac.rs +++ b/src/ir/tac.rs @@ -1,8 +1,7 @@ use std::fmt; +use crate::common::ast::ast::Type; use crate::common::ast::expr::{BinOp, UnOp}; -use crate::common::ast::Type; - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct TempId(pub u32); @@ -88,7 +87,7 @@ pub enum TacInstr { dst: TempId, op: UnOp, src: Operand, - ty:Type, + ty: Type, }, Copy { dst: Operand, @@ -110,7 +109,7 @@ pub enum TacInstr { }, Return { val: Option, - ty: Option + ty: Option, }, Label(LabelId), } @@ -169,10 +168,21 @@ impl fmt::Display for Operand { impl fmt::Display for TacInstr { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - TacInstr::BinOp { dst, op, lhs, rhs, ty: _ } => { + TacInstr::BinOp { + dst, + op, + lhs, + rhs, + ty: _, + } => { write!(f, "{dst} = {lhs} {} {rhs}", bin_op_symbol(op)) } - TacInstr::UnOp { dst, op, src, ty: _ } => { + TacInstr::UnOp { + dst, + op, + src, + ty: _, + } => { write!(f, "{dst} = {}{src}", un_op_symbol(op)) } TacInstr::Copy { dst, src, ty: _ } => write!(f, "{dst} = {src}"), @@ -240,5 +250,3 @@ fn un_op_symbol(op: &UnOp) -> &'static str { UnOp::AddrOf => "&", } } - - diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 431a64a..f2cd75f 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -10,4 +10,5 @@ mod peephole_test; mod semantic_test; mod source_test; mod symbol_test; +mod tac_test; mod token_test; diff --git a/src/tests/tac_test.rs b/src/tests/tac_test.rs index 0a7a5b1..a200e43 100644 --- a/src/tests/tac_test.rs +++ b/src/tests/tac_test.rs @@ -1,34 +1,32 @@ +use crate::common::ast::ast::Type; +use crate::common::ast::expr::BinOp; +use crate::ir::tac::{LabelGen, LabelId, Operand, TacInstr, TempGen, TempId}; -#[cfg(test)] -mod tests { - use super::*; +#[test] +fn tac_instr_display_binop() { + let instr = TacInstr::BinOp { + dst: TempId(0), + op: BinOp::Add, + lhs: Operand::Temp(TempId(1)), + rhs: Operand::Temp(TempId(2)), + ty: Type::Int, + }; - #[test] - fn tac_instr_display_binop() { - let instr = TacInstr::BinOp { - dst: TempId(0), - op: BinOp::Add, - lhs: Operand::Temp(TempId(1)), - rhs: Operand::Temp(TempId(2)), - ty: Type::Int, - }; + assert_eq!(instr.to_string(), "t0 = t1 + t2"); +} - assert_eq!(instr.to_string(), "t0 = t1 + t2"); - } +#[test] +fn temp_gen_increments() { + let mut gen = TempGen::new(); - #[test] - fn temp_gen_increments() { - let mut gen = TempGen::new(); + assert_eq!(gen.fresh(), TempId(0)); + assert_eq!(gen.fresh(), TempId(1)); +} - assert_eq!(gen.fresh(), TempId(0)); - assert_eq!(gen.fresh(), TempId(1)); - } +#[test] +fn label_gen_unique() { + let mut gen = LabelGen::new(); - #[test] - fn label_gen_unique() { - let mut gen = LabelGen::new(); - - assert_eq!(gen.fresh(), LabelId(0)); - assert_eq!(gen.fresh(), LabelId(1)); - } -} \ No newline at end of file + assert_eq!(gen.fresh(), LabelId(0)); + assert_eq!(gen.fresh(), LabelId(1)); +} diff --git a/tests/codegen_smoke.rs b/tests/codegen_smoke.rs index a545663..b34e911 100644 --- a/tests/codegen_smoke.rs +++ b/tests/codegen_smoke.rs @@ -12,6 +12,7 @@ use std::path::PathBuf; use std::process::Command; use crusty::codegen::last::emit_program; +use crusty::common::ast::ast::Type; use crusty::common::ast::expr::BinOp; use crusty::ir::tac::{ConstValue, LabelId, Operand, TacFunction, TacInstr, TacProgram, TempId}; @@ -43,9 +44,11 @@ fn build_soma_program() -> TacProgram { op: BinOp::Add, lhs: Operand::Var("a".to_string()), rhs: Operand::Var("b".to_string()), + ty: Type::Int, }, TacInstr::Return { val: Some(Operand::Temp(TempId(0))), + ty: None, }, ], var_sizes: Default::default(), @@ -65,6 +68,7 @@ fn build_soma_program() -> TacProgram { }, TacInstr::Return { val: Some(Operand::Temp(TempId(0))), + ty: None, }, ], var_sizes: Default::default(), @@ -148,6 +152,7 @@ fn smoke_simple_return_const_runs() { params: Vec::new(), instrs: vec![TacInstr::Return { val: Some(Operand::Const(ConstValue::Int(42))), + ty: None, }], var_sizes: Default::default(), }], @@ -189,51 +194,60 @@ fn smoke_call_with_more_than_six_args_runs() { op: BinOp::Add, lhs: Operand::Var("a1".to_string()), rhs: Operand::Var("a2".to_string()), + ty: Type::Int, }, TacInstr::BinOp { dst: TempId(1), op: BinOp::Add, lhs: Operand::Temp(TempId(0)), rhs: Operand::Var("a3".to_string()), + ty: Type::Int, }, TacInstr::BinOp { dst: TempId(2), op: BinOp::Add, lhs: Operand::Temp(TempId(1)), rhs: Operand::Var("a4".to_string()), + ty: Type::Int, }, TacInstr::BinOp { dst: TempId(3), op: BinOp::Add, lhs: Operand::Temp(TempId(2)), rhs: Operand::Var("a5".to_string()), + ty: Type::Int, }, TacInstr::BinOp { dst: TempId(4), op: BinOp::Add, lhs: Operand::Temp(TempId(3)), rhs: Operand::Var("a6".to_string()), + ty: Type::Int, }, TacInstr::BinOp { dst: TempId(5), op: BinOp::Add, lhs: Operand::Temp(TempId(4)), rhs: Operand::Var("a7".to_string()), + ty: Type::Int, }, TacInstr::BinOp { dst: TempId(6), op: BinOp::Add, lhs: Operand::Temp(TempId(5)), rhs: Operand::Var("a8".to_string()), + ty: Type::Int, }, TacInstr::BinOp { dst: TempId(7), op: BinOp::Add, lhs: Operand::Temp(TempId(6)), rhs: Operand::Var("a9".to_string()), + ty: Type::Int, }, TacInstr::Return { val: Some(Operand::Temp(TempId(7))), + ty: None, }, ], var_sizes: Default::default(), @@ -252,6 +266,7 @@ fn smoke_call_with_more_than_six_args_runs() { }, TacInstr::Return { val: Some(Operand::Temp(TempId(0))), + ty: None, }, ], var_sizes: Default::default(), @@ -304,10 +319,12 @@ fn smoke_control_flow_if_else_runs() { TacInstr::Label(LabelId(0)), TacInstr::Return { val: Some(Operand::Const(ConstValue::Int(10))), + ty: None, }, TacInstr::Label(LabelId(1)), TacInstr::Return { val: Some(Operand::Const(ConstValue::Int(20))), + ty: None, }, ], var_sizes: Default::default(), diff --git a/tests/double_codegen_test.rs b/tests/double_codegen_test.rs new file mode 100644 index 0000000..b0b00f0 --- /dev/null +++ b/tests/double_codegen_test.rs @@ -0,0 +1,332 @@ +//! Smoke tests ponta-a-ponta para o codegen x86-64 de `double` (issue #172). +//! +//! Escopo coberto, conforme delimitado na issue: literais `double`, +//! variaveis locais, aritmetica basica (`+ - * /`), comparacoes e `return`. +//! Argumentos/parametros e retorno de `double` por uma funcao chamada pelo +//! proprio codegen (em vez de por um `main` escrito a mao) permanecem fora +//! de escopo, pois exigiriam estender `codegen/last/abi.rs` para cobrir +//! `xmm0..xmm7` na convencao de chamada — deixado para uma proxima etapa. +//! +//! Como nem toda expressao com `double` pode ser verificada via exit code +//! (o valor de retorno de um processo e sempre truncado a um inteiro de 8 +//! bits), os testes usam duas estrategias: +//! - quando o resultado observavel e naturalmente inteiro (comparacoes, que +//! este backend ja normaliza para 0/1 em `%rax`), o programa inteiro e +//! gerado por este compilador e o exit code e verificado diretamente; +//! - quando o resultado e um `double` em si (ex.: o proprio criterio de +//! aceite da issue, `double x = 1.5; return x + 2.5;`), apenas a funcao +//! `double` e gerada por este compilador (`--emit=obj`); um pequeno +//! programa C convencional, compilado pelo `gcc` do sistema, chama essa +//! funcao e verifica o resultado — exercitando o lado "callee" da ABI +//! (retorno em `%xmm0`) sem depender do lado "caller" deste backend. +//! +//! Se `gcc` nao estiver disponivel no ambiente, os testes sao ignorados +//! (skip) em vez de falhar, espelhando `tests/exe_smoke_test.rs`. + +#![cfg_attr(not(unix), allow(unused_variables))] + +use std::path::PathBuf; +use std::process::{Command, ExitStatus}; + +use crusty::analyser::analyse_with_builtins; +use crusty::codegen::last::emit_program; +use crusty::common::input::source::SourceFile; +use crusty::ir::lower::lower_program; +use crusty::lexer::scanner::Scanner; +use crusty::parser::Parser; + +fn gcc_available() -> bool { + Command::new("gcc") + .arg("--version") + .output() + .map(|o| o.status.success()) + .unwrap_or(false) +} + +/// Ignora o teste quando nao ha `gcc` no ambiente. +macro_rules! require_gcc { + () => { + if !gcc_available() { + eprintln!("gcc indisponivel: pulando teste de smoke"); + return; + } + }; +} + +/// Roda o pipeline completo (lexer -> parser -> semantic -> IR -> codegen) +/// sobre `source` e retorna o assembly x86-64 gerado. Falha o teste (panic) +/// se qualquer estagio reportar diagnosticos, ja que os fixtures usados aqui +/// sao sempre programas C validos. +fn compile_to_asm(source: &str) -> String { + let mut scanner = Scanner::new(SourceFile::from_string(source)); + scanner.scan(); + assert!( + scanner.diagnostics.is_empty(), + "erros de lexer inesperados: {:?}", + scanner.diagnostics + ); + + let mut parser = Parser::new(scanner.tokens); + let program = parser + .parse_program() + .unwrap_or_else(|errors| panic!("erros de parser inesperados: {errors:?}")); + + let sem_diagnostics = analyse_with_builtins(&program, scanner.builtins); + let sem_errors: Vec<_> = sem_diagnostics.iter().filter(|d| d.is_error()).collect(); + assert!( + sem_errors.is_empty(), + "erros semanticos inesperados: {sem_errors:?}" + ); + + let tac_program = lower_program(&program).unwrap(); + emit_program(&tac_program).unwrap() +} + +/// Compila `source` (C) ate um executavel real via `gcc` e o executa, +/// retornando o `ExitStatus` do processo filho. Limpa os arquivos +/// temporarios (.s e binario) ao final. +fn compile_and_run(name: &str, source: &str) -> ExitStatus { + let asm = compile_to_asm(source); + + let mut asm_path = std::env::temp_dir(); + asm_path.push(format!( + "crusty_double_smoke_{name}_{}.s", + std::process::id() + )); + std::fs::write(&asm_path, asm).expect("falha ao escrever .s temporario"); + let exe_path: PathBuf = asm_path.with_extension("bin"); + + let link = Command::new("gcc") + .arg(&asm_path) + .arg("-o") + .arg(&exe_path) + .status() + .expect("falha ao invocar gcc"); + assert!( + link.success(), + "gcc nao conseguiu linkar a saida do codegen" + ); + + let status = Command::new(&exe_path) + .status() + .expect("falha ao executar o binario gerado"); + + let _ = std::fs::remove_file(&asm_path); + let _ = std::fs::remove_file(&exe_path); + + status +} + +/// Compila uma funcao `double` isolada (`source`) com `--emit=obj`-equivalente +/// (aqui via `emit_program`, montado a `.o` pelo `gcc`), e linka com um +/// pequeno harness C escrito a mao que chama `compute()` e verifica o +/// resultado. Retorna o `ExitStatus` do harness. +fn compile_double_fn_and_check(name: &str, source: &str, harness_body: &str) -> ExitStatus { + let asm = compile_to_asm(source); + + let dir = std::env::temp_dir(); + let asm_path = dir.join(format!("crusty_double_fn_{name}_{}.s", std::process::id())); + let obj_path = dir.join(format!("crusty_double_fn_{name}_{}.o", std::process::id())); + let harness_path = dir.join(format!( + "crusty_double_harness_{name}_{}.c", + std::process::id() + )); + let exe_path = dir.join(format!( + "crusty_double_fn_{name}_{}.bin", + std::process::id() + )); + + std::fs::write(&asm_path, asm).expect("falha ao escrever .s temporario"); + + let assemble = Command::new("gcc") + .arg("-c") + .arg(&asm_path) + .arg("-o") + .arg(&obj_path) + .status() + .expect("falha ao invocar gcc -c"); + assert!( + assemble.success(), + "gcc nao conseguiu montar a saida do codegen" + ); + + std::fs::write(&harness_path, harness_body).expect("falha ao escrever harness .c"); + + let link = Command::new("gcc") + .arg(&harness_path) + .arg(&obj_path) + .arg("-o") + .arg(&exe_path) + .status() + .expect("falha ao invocar gcc para linkar harness + objeto"); + assert!( + link.success(), + "gcc nao conseguiu linkar o harness com o objeto gerado pelo codegen" + ); + + let status = Command::new(&exe_path) + .status() + .expect("falha ao executar o binario gerado"); + + let _ = std::fs::remove_file(&asm_path); + let _ = std::fs::remove_file(&obj_path); + let _ = std::fs::remove_file(&harness_path); + let _ = std::fs::remove_file(&exe_path); + + status +} + +/// Criterio de aceite da issue #172: `double x = 1.5; return x + 2.5;` +/// compila e roda via gcc com resultado correto (4.0). +#[test] +fn smoke_double_literal_local_and_addition_returns_correct_value() { + require_gcc!(); + + let status = compile_double_fn_and_check( + "literal_add", + "double compute() { double x = 1.5; return x + 2.5; }", + r#" + double compute(void); + int main() { + double r = compute(); + return (r == 4.0) ? 0 : 1; + } + "#, + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(0)); +} + +#[test] +fn smoke_double_subtraction_multiplication_division_return_correct_values() { + require_gcc!(); + + let status = compile_double_fn_and_check( + "arith", + "double compute() { \ + double a = 10.0; \ + double b = 4.0; \ + double sub = a - b; \ + double mul = sub * 2.0; \ + double div = mul / 3.0; \ + return div; \ + }", + r#" + double compute(void); + int main() { + double r = compute(); + return (r == 4.0) ? 0 : 1; + } + "#, + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(0)); +} + +#[test] +fn smoke_double_multiple_locals_runs() { + require_gcc!(); + + let status = compile_double_fn_and_check( + "multi_locals", + "double compute() { \ + double a = 1.5; \ + double b = 2.25; \ + double c = 0.25; \ + return a + b + c; \ + }", + r#" + double compute(void); + int main() { + double r = compute(); + return (r == 4.0) ? 0 : 1; + } + "#, + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(0)); +} + +/// Comparacoes entre `double` ja produzem um resultado inteiro (0/1) neste +/// backend, entao podem ser verificadas direto pelo exit code de um `int +/// main()` gerado integralmente por este compilador, sem depender de +/// conversao double<->int (ainda nao suportada). +#[test] +fn smoke_double_less_than_comparison_runs() { + require_gcc!(); + + let status = compile_and_run( + "double_less_than", + "int main() { \ + double a = 1.5; \ + double b = 2.5; \ + if (a < b) { return 1; } \ + return 0; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(1)); +} + +#[test] +fn smoke_double_equality_after_addition_runs() { + require_gcc!(); + + let status = compile_and_run( + "double_eq_after_add", + "int main() { \ + double a = 1.5; \ + double b = 2.5; \ + double c = a + b; \ + if (c == 4.0) { return 1; } \ + return 0; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(1)); +} + +#[test] +fn smoke_double_greater_or_equal_false_branch_runs() { + require_gcc!(); + + let status = compile_and_run( + "double_geq_false", + "int main() { \ + double a = 1.0; \ + double b = 9.0; \ + if (a >= b) { return 1; } \ + return 0; \ + }", + ); + + #[cfg(unix)] + assert_eq!(status.code(), Some(0)); +} + +/// Garante que o assembly gerado para `double` realmente usa o caminho de +/// ponto flutuante (registradores `xmm`/instrucoes `sd`) em vez do caminho +/// inteiro (`rax`/`rcx`), o que a issue #172 identificou como o gap real do +/// backend antes desta feature. +#[test] +fn double_codegen_emits_xmm_instructions() { + let asm = compile_to_asm("double compute() { double x = 1.5; return x + 2.5; }"); + + assert!( + asm.contains("movsd"), + "esperado mov de double via movsd no assembly gerado:\n{asm}" + ); + assert!( + asm.contains("addsd"), + "esperada soma de double via addsd no assembly gerado:\n{asm}" + ); + assert!( + asm.contains(".double"), + "esperada diretiva .double para os literais double na .rodata:\n{asm}" + ); +} From ed5c0c7d8fc264cca8ea4e9c1880b749d0caa45a Mon Sep 17 00:00:00 2001 From: Bappoz Date: Wed, 24 Jun 2026 23:27:11 -0300 Subject: [PATCH 91/91] fix: corrigi todas as documentacoes com as novas atualziacoes --- INSTALL.md | 4 ++-- README.md | 11 ++++++----- TESTER.md | 10 ++++++---- docs/index.md | 11 +++++++---- 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 14f6d8d..bc1fd22 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -10,7 +10,7 @@ Guia para deixar o ambiente pronto para compilar e rodar o Crusty. | `gcc` | qualquer versão recente | Montar (`as`) e linkar (`ld`) os executáveis ELF gerados pelo backend x86-64 | | Linux x86-64 | — | O backend gera assembly x86-64 / System V ABI. Não há suporte a outras arquiteturas ou a Windows/macOS nativo | -Sem `gcc` no `PATH`, o compilador ainda funciona até a emissão de assembly (`--emit=asm`), mas os testes de smoke e2e (`tests/exe_smoke_test.rs`, `tests/codegen_smoke.rs`) são automaticamente pulados (skip), e `--emit=obj`/`--emit=exe` falham. +Sem `gcc` no `PATH`, o compilador ainda funciona até a emissão de assembly (`--emit=asm`), mas os testes de smoke e2e (`tests/exe_smoke_test.rs`, `tests/codegen_smoke.rs`, `tests/double_codegen_test.rs`) são automaticamente pulados (skip), e `--emit=obj`/`--emit=exe` falham. ## 1. Instalar o Rust @@ -102,4 +102,4 @@ Essas três checagens são exatamente as que o CI (`.github/workflows/`) roda em - **`error: linker 'cc' not found` ou falha ao montar/linkar** — `gcc` não está instalado ou não está no `PATH`. Repita o passo 2. - **`cargo: command not found`** depois de instalar o Rust — rode `source "$HOME/.cargo/env"` ou abra um novo terminal. - **Testes de smoke "pulando" silenciosamente** — esperado se `gcc` não estiver disponível; veja [TESTER.md](TESTER.md) para detalhes. -- **Programa de teste usa `float`/`double`** e falha com `error: code generation` — limitação conhecida atual do backend, ver [README.md](README.md#limitações-conhecidas) e [issue #172](https://github.com/Bappoz/Crusty/issues/172). +- **Programa de teste usa `float`** e falha com `error: code generation` — `float` ainda não tem codegen (limitação conhecida do backend, ver [README.md](README.md#limitações-conhecidas) e [issue #172](https://github.com/Bappoz/Crusty/issues/172)); `double` já é suportado. diff --git a/README.md b/README.md index 4eaded0..74b29b5 100644 --- a/README.md +++ b/README.md @@ -11,15 +11,15 @@ Projeto da disciplina de Compiladores 1. Implementa um compilador para um subcon | Análise semântica | Completo | | IR (TAC) | Completo | | Otimizações (CSE, DCE, constant folding, copy propagation, LICM, inlining) | Completo | -| Geração de código x86-64 | Completo para tipos inteiros, ponteiros, structs, arrays e globais | +| Geração de código x86-64 | Completo para tipos inteiros, ponteiros, structs, arrays, globais e `double` (registradores XMM) | ### Limitações conhecidas -- **`float`/`double` não têm codegen.** O analisador semântico aceita e tipa esses tipos, mas o backend x86-64 ainda não emite instruções de ponto flutuante (registradores XMM). Em desenvolvimento na [issue #172](https://github.com/Bappoz/Crusty/issues/172). Programas que usam `float`/`double` falham com `error: code generation` no estágio final. +- **`float` não tem codegen.** `double` já é suportado pelo backend x86-64 (registradores XMM, issue [#172](https://github.com/Bappoz/Crusty/issues/172)), mas `float` ainda não — o analisador semântico aceita e tipa `float`, mas o backend ainda não emite o código correspondente. Programas que usam `float` falham com `error: code generation` no estágio final; use `double` no lugar. - O modo REPL interativo (executar `crusty` sem argumentos) não está implementado. - `--dump-ir` ainda não imprime a IR (placeholder). -Fora isso, o pipeline completo (lexer → parser → análise semântica → IR → otimizações → assembly x86-64 → executável ELF via `gcc`) funciona ponta a ponta para um subconjunto relevante de C: tipos inteiros e `char`, ponteiros, structs, arrays de tamanho fixo, enums, typedefs, variáveis globais, todas as estruturas de controle (`if`/`while`/`do-while`/`for`/`switch`) e chamadas de função. +Fora isso, o pipeline completo (lexer → parser → análise semântica → IR → otimizações → assembly x86-64 → executável ELF via `gcc`) funciona ponta a ponta para um subconjunto relevante de C: tipos inteiros, `char` e `double`, ponteiros, structs, arrays de tamanho fixo, enums, typedefs, variáveis globais, todas as estruturas de controle (`if`/`while`/`do-while`/`for`/`switch`) e chamadas de função. ## Estrutura do projeto @@ -107,8 +107,9 @@ cargo run --release -- src/examples/simple.c -o /tmp/simple - Pipeline de otimização configurável por nível (`-O0`..`-O3`): constant folding, common subexpression elimination, dead code elimination, copy propagation, loop-invariant code motion, inlining **Backend x86-64** -- Convenção de chamada System V ABI (inteiros/ponteiros em `rdi`..`r9`/`rax`) +- Convenção de chamada System V ABI (inteiros/ponteiros em `rdi`..`r9`/`rax`, `double` em `xmm0`..`xmm7`) - Endereço de variáveis, indexação de array, acesso a membro de struct (`.`, `->`), address-of/deref +- Aritmética, comparações e literais de `double` via registradores XMM (`addsd`/`subsd`/`mulsd`/`divsd`/`ucomisd`) - `sizeof` em tempo de compilação - Variáveis globais com acesso RIP-relative - Peephole optimizer sobre o assembly emitido @@ -121,7 +122,7 @@ Cobertura completa (testes unitários e testes com arquivos `.c` reais, executad Resumo rápido: ```bash -cargo test --all # ~354 testes (unitários + integração + smoke e2e) +cargo test --all # ~361 testes (unitários + integração + smoke e2e) cargo clippy -- -D warnings cargo fmt --check ``` diff --git a/TESTER.md b/TESTER.md index ca503e4..09cb9fc 100644 --- a/TESTER.md +++ b/TESTER.md @@ -13,9 +13,10 @@ Pré-requisito: ambiente configurado conforme [INSTALL.md](INSTALL.md) (Rust + ` | `tests/integration_test.rs` | Pipeline lexer→parser→semântico sobre arquivos `.c` reais (válidos e inválidos) | 16 | | `tests/codegen_smoke.rs` | TAC montado manualmente → assembly → `gcc` → execução, checando exit code | 5 | | `tests/exe_smoke_test.rs` | Código-fonte C real → pipeline completo → executável ELF → execução | 26 | +| `tests/double_codegen_test.rs` | Codegen de `double`/XMM (assembly emitido e execução real, issue #172) | 7 | | `tests/licm_test.rs` | Otimização de loop-invariant code motion | 2 | -Total atual: **354 testes**, todos passando em `developer`. +Total atual: **361 testes**, todos passando em `developer`. ## Rodando tudo @@ -78,6 +79,7 @@ Estes são os testes mais completos: compilam código C real até assembly x86-6 ```bash cargo test --test exe_smoke_test cargo test --test codegen_smoke +cargo test --test double_codegen_test ``` Se `gcc` não estiver disponível no `PATH`, esses testes são pulados (skip) automaticamente — verifique a saída de `cargo test -- --nocapture` por `gcc indisponivel: pulando teste de smoke` para confirmar. @@ -110,7 +112,7 @@ Exemplos disponíveis e seu status atual: | `simple.c` | Sim | | | `demo_presentation.c` | Sim | Demo usada na apresentação da disciplina | | `declarations.c` | Gera assembly, mas não tem `main` | Não é pensado para ser linkado/executado isoladamente | -| `full_code1.c` | Não | Usa `float`, sem codegen ainda (issue #172) | +| `full_code1.c` | Não | Usa `float`, sem codegen ainda (`double` já é suportado, ver issue #172) | | `operators.c` | Não — nem com `gcc` | Tem statements em escopo global, o que não é C válido (confirmado com `gcc -fsyntax-only`); não é um bug do compilador | ### Testando com seus próprios arquivos `.c` @@ -139,5 +141,5 @@ Rode as quatro localmente antes de abrir um PR — é exatamente o que será ver ## Cobertura conhecida e limitações dos testes -- Não há testes automatizados para `float`/`double` em codegen, porque a feature não existe ainda (issue #172). Quando for implementada, a suíte `exe_smoke_test.rs`/`codegen_smoke.rs` é o lugar natural para os novos casos. -- Os smoke tests de execução (`exe_smoke_test.rs`, `codegen_smoke.rs`) dependem de Linux x86-64 + `gcc`; em outras plataformas eles são pulados, não falham. +- `double` tem cobertura dedicada em `tests/double_codegen_test.rs` (issue #172). Não há testes automatizados para `float` em codegen, porque a feature ainda não existe no backend. +- Os smoke tests de execução (`exe_smoke_test.rs`, `codegen_smoke.rs`, `double_codegen_test.rs`) dependem de Linux x86-64 + `gcc`; em outras plataformas eles são pulados, não falham. diff --git a/docs/index.md b/docs/index.md index 4b4506a..d06c6c8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -13,7 +13,9 @@ Código-fonte (.c) ↓ [Analisador] → diagnósticos semânticos ↓ -[Codegen] → (não implementado) +[IR / TAC] → Three-Address Code + otimizações (-O0..-O3) + ↓ +[Codegen] → assembly x86-64 → .o / executável ELF via gcc ``` ## Módulos @@ -22,8 +24,9 @@ Código-fonte (.c) |--------|--------|--------------| | Lexer | Completo | [lexer.md](lexer.md) | | Parser | Completo | [parser.md](parser.md) | -| Analisador Semântico | Em desenvolvimento | [semantic.md](semantic.md) | -| Geração de código | Não iniciado | — | +| Analisador Semântico | Completo | [semantic.md](semantic.md) | +| IR (TAC) e otimizações | Completo | — | +| Geração de código x86-64 | Completo para tipos inteiros, ponteiros, structs, arrays, globais e `double`; `float` ainda sem codegen ([issue #172](https://github.com/Bappoz/Crusty/issues/172)) | — | ## Referências @@ -31,4 +34,4 @@ Código-fonte (.c) ## Repositório -[github.com/Bappoz/crusty](https://github.com/Bappoz/crusty) +[github.com/Bappoz/Crusty](https://github.com/Bappoz/Crusty)