From bcbe48b401b2cee7e6c11ed8b8aad49e7239d356 Mon Sep 17 00:00:00 2001 From: BrunoBezerra Date: Sun, 12 Apr 2026 12:07:35 -0300 Subject: [PATCH 01/10] =?UTF-8?q?modifica=C3=A7=C3=B5es=20para=20o=20supor?= =?UTF-8?q?te=20da=20opera=C3=A7=C3=A3o=20++=20no=20parser,=20ast,=20typec?= =?UTF-8?q?hecker,=20interpreter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/01-language.md | 2 +- docs/04-parser.md | 2 +- src/interpreter/eval_expr.rs | 16 ++++++++++++++++ src/ir/ast.rs | 1 + src/parser/expressions.rs | 6 ++++++ src/semantic/type_checker.rs | 16 ++++++++++++++++ 6 files changed, 41 insertions(+), 2 deletions(-) diff --git a/docs/01-language.md b/docs/01-language.md index 7ecf417..574ad1f 100644 --- a/docs/01-language.md +++ b/docs/01-language.md @@ -176,7 +176,7 @@ From highest (evaluated first) to lowest (evaluated last): | 1 (highest) | function call, array index `[]`, parentheses `()` | | 2 | unary minus `-`, logical not `!` | | 3 | `*`, `/` | -| 4 | `+`, `-` | +| 4 | `+`, `-`, `++`| | 5 | `==`, `!=`, `<`, `<=`, `>`, `>=` | | 6 | `and` | | 7 (lowest) | `or` | diff --git a/docs/04-parser.md b/docs/04-parser.md index 99c4adf..8cc9590 100644 --- a/docs/04-parser.md +++ b/docs/04-parser.md @@ -103,7 +103,7 @@ expression └── logical_and (and) └── logical_not (!) └── relational (== != < <= > >=) - └── additive (+ -) + └── additive (+ - ++) └── multiplicative (* /) └── unary (unary -) └── primary (atoms + indexing) diff --git a/src/interpreter/eval_expr.rs b/src/interpreter/eval_expr.rs index 49fcbef..660c779 100644 --- a/src/interpreter/eval_expr.rs +++ b/src/interpreter/eval_expr.rs @@ -69,6 +69,8 @@ pub fn eval_expr(expr: &CheckedExpr, env: &mut Environment) -> Result numeric_binop(eval_expr(l, env)?, eval_expr(r, env)?, |a, b| a * b, |a, b| a * b), Expr::Div(l, r) => numeric_binop(eval_expr(l, env)?, eval_expr(r, env)?, |a, b| a / b, |a, b| a / b), + Expr::Concat(l, r) => string_binop(eval_expr(l, env)?, eval_expr(r, env)?, |a, b| a + &b), + Expr::Lt(l, r) => numeric_cmp(eval_expr(l, env)?, eval_expr(r, env)?, |a, b| a < b, |a, b| a < b), Expr::Le(l, r) => numeric_cmp(eval_expr(l, env)?, eval_expr(r, env)?, |a, b| a <= b, |a, b| a <= b), Expr::Gt(l, r) => numeric_cmp(eval_expr(l, env)?, eval_expr(r, env)?, |a, b| a > b, |a, b| a > b), @@ -209,6 +211,20 @@ fn numeric_binop( } } +fn string_binop( + lv: Value, + rv: Value, + concat_op: impl Fn(String, String) -> String, +) -> Result { + match (lv, rv) { + (Value::Str(a), Value::Str(b)) => Ok(Value::Str(concat_op(a, b))), + (l, r) => Err(RuntimeError::new(format!( + "string concatenation requires Str operands, got: {} and {}", + l, r + ))), + } +} + fn numeric_cmp( lv: Value, rv: Value, diff --git a/src/ir/ast.rs b/src/ir/ast.rs index 5f57b24..a29d6a7 100644 --- a/src/ir/ast.rs +++ b/src/ir/ast.rs @@ -87,6 +87,7 @@ pub enum Expr { Neg(Box>), Add(Box>, Box>), Sub(Box>, Box>), + Concat(Box>, Box>), Mul(Box>, Box>), Div(Box>, Box>), Eq(Box>, Box>), diff --git a/src/parser/expressions.rs b/src/parser/expressions.rs index 8cfcab4..31a8d30 100644 --- a/src/parser/expressions.rs +++ b/src/parser/expressions.rs @@ -160,6 +160,12 @@ fn additive(input: &str) -> IResult<&str, UncheckedExpr> { rest = r; continue; } + let str_concat = tuple((multispace0, tag("++"), multispace0, multiplicative))(rest); + if let Ok((r, (_, _, _, e))) = str_concat { + acc = wrap(Expr::Concat(Box::new(acc), Box::new(e))); + rest = r; + continue; + } break; } Ok((rest, acc)) diff --git a/src/semantic/type_checker.rs b/src/semantic/type_checker.rs index 46681cf..2088ab1 100644 --- a/src/semantic/type_checker.rs +++ b/src/semantic/type_checker.rs @@ -358,6 +358,10 @@ fn type_check_expr_inner( Box::new(type_check_expr_to_typed(l, env)?), Box::new(type_check_expr_to_typed(r, env)?), )), + Expr::Concat(l, r) => Ok(Expr::Concat( + Box::new(type_check_expr_to_typed(l, env)?), + Box::new(type_check_expr_to_typed(r, env)?), + )), Expr::Mul(l, r) => Ok(Expr::Mul( Box::new(type_check_expr_to_typed(l, env)?), Box::new(type_check_expr_to_typed(r, env)?), @@ -446,6 +450,11 @@ fn type_check_expr( let rt = type_check_expr(r, env)?; numeric_binop_result(<, &rt) } + Expr::Concat(l, r) => { + let lt = type_check_expr(l, env)?; + let rt = type_check_expr(r, env)?; + string_binop_result(<, &rt) + } Expr::Eq(l, r) | Expr::Ne(l, r) => { let lt = type_check_expr(l, env)?; let rt = type_check_expr(r, env)?; @@ -565,6 +574,13 @@ fn numeric_binop_result(l: &Type, r: &Type) -> Result { } } +fn string_binop_result(l: &Type, r: &Type) -> Result { + match (l, r) { + (Type::Str, Type::Str) => Ok(Type::Str), + _ => Err(TypeError::new("string concatenation requires Str operands")), + } +} + fn is_numeric(ty: &Type) -> bool { matches!(ty, Type::Int | Type::Float) } From 37dbd9b93b94c3336e63b82cdb236c4a42478336 Mon Sep 17 00:00:00 2001 From: BrunoBezerra Date: Sun, 12 Apr 2026 12:29:50 -0300 Subject: [PATCH 02/10] =?UTF-8?q?testes=20unit=C3=A1rios=20para=20o=20oper?= =?UTF-8?q?ador=20de=20concatena=C3=A7=C3=A3o=20++?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/parser.rs | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/parser.rs b/tests/parser.rs index eca6640..b5d6fa3 100644 --- a/tests/parser.rs +++ b/tests/parser.rs @@ -672,3 +672,47 @@ fn test_array_in_expression() { assert!(matches!(result.exp, Expr::Index { ref base, ref index } if matches!(base.exp, Expr::ArrayLit(_)) && index.exp == Expr::Literal(Literal::Int(0)))); } + +#[test] +fn test_string_concat() { + assert_eq!( + expression(r#""Hello, " ++ "world""#).map(|(r, e)| (r, e.exp)), + Ok(( + "", + Expr::Concat( + Box::new(ExprD { + exp: Expr::Literal(Literal::Str("Hello, ".to_string())), + ty: (), + }), + Box::new(ExprD { + exp: Expr::Literal(Literal::Str("world".to_string())), + ty: (), + }) + ) + )) + ); +} + +#[test] +fn test_string_concat_left_associative() { + let result = expression(r#""a" ++ "b" ++ "c""#).unwrap().1.exp; + match &result { + Expr::Concat(left, right) => { + assert_eq!(right.exp, Expr::Literal(Literal::Str("c".to_string()))); + match &left.exp { + Expr::Concat(left2, right2) => { + assert_eq!(left2.exp, Expr::Literal(Literal::Str("a".to_string()))); + assert_eq!(right2.exp, Expr::Literal(Literal::Str("b".to_string()))); + } + _ => panic!("expected nested Concat"), + } + } + _ => panic!("expected Concat"), + } +} + +#[test] +fn test_string_concat_whitespace_and_precedence() { + let result = expression(r#""x" ++ "y" * 2"#).unwrap().1.exp; + assert!(matches!(result, Expr::Concat(_, _))); +} From d064a1bc36497e2a74eabf00de5b0a15781f35a3 Mon Sep 17 00:00:00 2001 From: BrunoBezerra Date: Wed, 15 Apr 2026 12:18:59 -0300 Subject: [PATCH 03/10] =?UTF-8?q?implementa=C3=A7=C3=A3o=20das=20fun=C3=A7?= =?UTF-8?q?=C3=B5es=20de=20string=20na=20stdlib?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/stdlib/string.rs | 209 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 209 insertions(+) create mode 100644 src/stdlib/string.rs diff --git a/src/stdlib/string.rs b/src/stdlib/string.rs new file mode 100644 index 0000000..c141091 --- /dev/null +++ b/src/stdlib/string.rs @@ -0,0 +1,209 @@ +//! String built-in functions for MiniC: `len`, `substr`, `toUpper`, `toLower`, `strToInt`, `strToFloat`, `contains`. +//! +//! # Overview +//! +//! Exposes seven public functions, each matching the [`crate::interpreter::value::NativeFn`] signature +//! `fn(Vec) -> Result`: +//! +//! * [`len`] — returns the length of a supported container value. +//! * [`substr`] — returns a substring of a string. +//! * [`toUpper`] — converts a string to uppercase. +//! * [`toLower`] — converts a string to lowercase. +//! * [`strToInt`] — converts a string to an integer. +//! * [`strToFloat`] — converts a string to a float. +//! * [`contains`] — checks membership for supported container values. +//! + +use crate::interpreter::value::{RuntimeError, Value}; + +pub fn len(args: Vec) -> Result { + if args.len() != 1 { + return Err(RuntimeError::new(format!( + "len expects 1 argument, got {}", + args.len() + ))); + } + match &args[0] { + Value::Str(s) => Ok(Value::Int(s.chars().count() as i64)), + Value::Array(items) => Ok(Value::Int(items.len() as i64)), + v => Err(RuntimeError::new(format!( + "len: expected string or array argument, got {}", + v + ))), + } +} + +pub fn substr(args: Vec) -> Result { + if args.len() != 3 { + return Err(RuntimeError::new(format!( + "substr expects 3 arguments, got {}", + args.len() + ))); + } + let s = match &args[0] { + Value::Str(s) => s, + v => { + return Err(RuntimeError::new(format!( + "substr: expected string argument, got {}", + v + ))) + } + }; + let start = match &args[1] { + Value::Int(n) => *n, + v => { + return Err(RuntimeError::new(format!( + "substr: expected int start index, got {}", + v + ))) + } + }; + let length = match &args[2] { + Value::Int(n) => *n, + v => { + return Err(RuntimeError::new(format!( + "substr: expected int length, got {}", + v + ))) + } + }; + if start < 0 { + return Err(RuntimeError::new(format!( + "substr: start index out of bounds: {}", + start + ))); + } + if length < 0 { + return Err(RuntimeError::new(format!( + "substr: length out of bounds: {}", + length + ))); + } + + let chars: Vec = s.chars().collect(); + let start = start as usize; + let length = length as usize; + + if start > chars.len() { + return Err(RuntimeError::new(format!( + "substr: start index out of bounds: {} (len: {})", + start, + chars.len() + ))); + } + + let end = start.checked_add(length).ok_or_else(|| { + RuntimeError::new(format!( + "substr: range overflow for start {} and length {}", + start, length + )) + })?; + + if end > chars.len() { + return Err(RuntimeError::new(format!( + "substr: range out of bounds: [{}..{}) for len {}", + start, + end, + chars.len() + ))); + } + + Ok(Value::Str(chars[start..end].iter().collect())) +} + +pub fn to_upper(args: Vec) -> Result { + if args.len() != 1 { + return Err(RuntimeError::new(format!( + "toUpper expects 1 argument, got {}", + args.len() + ))); + } + match &args[0] { + Value::Str(s) => Ok(Value::Str(s.to_uppercase())), + v => Err(RuntimeError::new(format!( + "toUpper: expected string argument, got {}", + v + ))), + } +} + +pub fn to_lower(args: Vec) -> Result { + if args.len() != 1 { + return Err(RuntimeError::new(format!( + "toLower expects 1 argument, got {}", + args.len() + ))); + } + match &args[0] { + Value::Str(s) => Ok(Value::Str(s.to_lowercase())), + v => Err(RuntimeError::new(format!( + "toLower: expected string argument, got {}", + v + ))), + } +} + +pub fn str_to_int(args: Vec) -> Result { + if args.len() != 1 { + return Err(RuntimeError::new(format!( + "strToInt expects 1 argument, got {}", + args.len() + ))); + } + match &args[0] { + Value::Str(s) => match s.trim().parse::() { + Ok(n) => Ok(Value::Int(n)), + Err(_) => Err(RuntimeError::new(format!( + "strToInt: cannot convert '{}' to int", + s + ))), + }, + v => Err(RuntimeError::new(format!( + "strToInt: expected string argument, got {}", + v + ))), + } +} + +pub fn str_to_float(args: Vec) -> Result { + if args.len() != 1 { + return Err(RuntimeError::new(format!( + "strToFloat expects 1 argument, got {}", + args.len() + ))); + } + match &args[0] { + Value::Str(s) => match s.trim().parse::() { + Ok(x) => Ok(Value::Float(x)), + Err(_) => Err(RuntimeError::new(format!( + "strToFloat: cannot convert '{}' to float", + s + ))), + }, + v => Err(RuntimeError::new(format!( + "strToFloat: expected string argument, got {}", + v + ))), + } +} + +pub fn contains(args: Vec) -> Result { + if args.len() != 2 { + return Err(RuntimeError::new(format!( + "contains expects 2 arguments, got {}", + args.len() + ))); + } + match (&args[0], &args[1]) { + (Value::Str(s), Value::Str(needle)) => Ok(Value::Bool(s.contains(needle))), + (Value::Array(items), needle) => Ok(Value::Bool(items.contains(needle))), + (Value::Str(_), other) => Err(RuntimeError::new(format!( + "contains: expected string needle for string haystack, got {}", + other + ))), + (haystack, _) => Err(RuntimeError::new(format!( + "contains: expected string or array haystack, got {}", + haystack + ))), + } +} \ No newline at end of file From 487014513d9da44f88d05a81698d6944cc5d179d Mon Sep 17 00:00:00 2001 From: BrunoBezerra Date: Wed, 15 Apr 2026 13:19:30 -0300 Subject: [PATCH 04/10] =?UTF-8?q?adi=C3=A7=C3=A3o=20das=20fun=C3=A7=C3=B5e?= =?UTF-8?q?s=20em=20nativeregistry?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/stdlib/mod.rs | 44 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index 283f122..2e5de20 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -12,8 +12,10 @@ //! return type) with the Rust function that implements the behaviour. //! //! The default registry (via `NativeRegistry::default()`) registers: -//! `print`, `readInt`, `readFloat`, `readString` (IO), and `pow`, `sqrt` -//! (math). Implementations live in the [`io`] and [`math`] sub-modules. +//! `print`, `readInt`, `readFloat`, `readString` (IO), `pow`, `sqrt` +//! (math), and string utilities like `len`, `substr`, `toUpper`, `toLower`, +//! `strToInt`, `strToFloat`, `contains`. Implementations live in the [`io`], +//! [`math`], and [`string`] sub-modules. //! //! # Design Decisions //! @@ -55,6 +57,7 @@ use crate::interpreter::value::NativeFn; pub mod io; pub mod math; +pub mod string; /// A registry entry: MiniC type signature + Rust implementation. pub struct NativeEntry { @@ -129,6 +132,43 @@ impl Default for NativeRegistry { func: math::sqrt_fn, }); + // String + r.register("len", NativeEntry { + params: vec![Type::Any], + return_type: Type::Int, + func: string::len, + }); + r.register("substr", NativeEntry { + params: vec![Type::Str, Type::Int, Type::Int], + return_type: Type::Str, + func: string::substr, + }); + r.register("toUpper", NativeEntry { + params: vec![Type::Str], + return_type: Type::Str, + func: string::to_upper, + }); + r.register("toLower", NativeEntry { + params: vec![Type::Str], + return_type: Type::Str, + func: string::to_lower, + }); + r.register("strToInt", NativeEntry { + params: vec![Type::Str], + return_type: Type::Int, + func: string::str_to_int, + }); + r.register("strToFloat", NativeEntry { + params: vec![Type::Str], + return_type: Type::Float, + func: string::str_to_float, + }); + r.register("contains", NativeEntry { + params: vec![Type::Any, Type::Any], + return_type: Type::Bool, + func: string::contains, + }); + r } } From eeb40ddfe1f443ad14d24d7651515120a2bc71ca Mon Sep 17 00:00:00 2001 From: BrunoBezerra Date: Wed, 15 Apr 2026 13:19:55 -0300 Subject: [PATCH 05/10] =?UTF-8?q?testes=20para=20novas=20fun=C3=A7=C3=B5es?= =?UTF-8?q?=20de=20string?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/stdlib.rs | 106 ++++++++++++++++++++++++++++++++++++++++++ tests/type_checker.rs | 19 ++++++++ 2 files changed, 125 insertions(+) diff --git a/tests/stdlib.rs b/tests/stdlib.rs index 9e60ec3..eeb60e2 100644 --- a/tests/stdlib.rs +++ b/tests/stdlib.rs @@ -2,6 +2,7 @@ use mini_c::interpreter::value::Value; use mini_c::ir::ast::Type; use mini_c::stdlib::io::print_fn; use mini_c::stdlib::math::{pow_fn, sqrt_fn}; +use mini_c::stdlib::string::{contains, len, substr}; use mini_c::stdlib::NativeRegistry; // --- io tests --- @@ -86,6 +87,88 @@ fn test_sqrt_wrong_type() { assert!(result.is_err()); } +// --- string tests --- + +#[test] +fn test_len_string() { + let result = len(vec![Value::Str("hello".to_string())]); + assert_eq!(result, Ok(Value::Int(5))); +} + +#[test] +fn test_len_array() { + let result = len(vec![Value::Array(vec![Value::Int(1), Value::Int(2), Value::Int(3)])]); + assert_eq!(result, Ok(Value::Int(3))); +} + +#[test] +fn test_contains_string() { + let result = contains(vec![ + Value::Str("abcdef".to_string()), + Value::Str("cd".to_string()), + ]); + assert_eq!(result, Ok(Value::Bool(true))); +} + +#[test] +fn test_contains_array() { + let result = contains(vec![ + Value::Array(vec![Value::Int(1), Value::Int(2), Value::Int(3)]), + Value::Int(2), + ]); + assert_eq!(result, Ok(Value::Bool(true))); +} + +#[test] +fn test_substr_valid_slice() { + let result = substr(vec![ + Value::Str("abcdef".to_string()), + Value::Int(2), + Value::Int(3), + ]); + assert_eq!(result, Ok(Value::Str("cde".to_string()))); +} + +#[test] +fn test_substr_start_out_of_bounds() { + let result = substr(vec![ + Value::Str("abc".to_string()), + Value::Int(4), + Value::Int(1), + ]); + assert!(result.is_err()); +} + +#[test] +fn test_substr_range_out_of_bounds() { + let result = substr(vec![ + Value::Str("abc".to_string()), + Value::Int(2), + Value::Int(2), + ]); + assert!(result.is_err()); +} + +#[test] +fn test_substr_negative_start_rejected() { + let result = substr(vec![ + Value::Str("abc".to_string()), + Value::Int(-1), + Value::Int(1), + ]); + assert!(result.is_err()); +} + +#[test] +fn test_substr_negative_length_rejected() { + let result = substr(vec![ + Value::Str("abc".to_string()), + Value::Int(0), + Value::Int(-1), + ]); + assert!(result.is_err()); +} + // --- registry tests --- #[test] @@ -97,6 +180,13 @@ fn test_default_registry_contains_all_stdlib() { assert!(r.lookup("readString").is_some()); assert!(r.lookup("pow").is_some()); assert!(r.lookup("sqrt").is_some()); + assert!(r.lookup("len").is_some()); + assert!(r.lookup("substr").is_some()); + assert!(r.lookup("toUpper").is_some()); + assert!(r.lookup("toLower").is_some()); + assert!(r.lookup("strToInt").is_some()); + assert!(r.lookup("strToFloat").is_some()); + assert!(r.lookup("contains").is_some()); } #[test] @@ -119,3 +209,19 @@ fn test_print_uses_type_any() { let entry = r.lookup("print").unwrap(); assert_eq!(entry.params, vec![Type::Any]); } + +#[test] +fn test_len_uses_type_any() { + let r = NativeRegistry::default(); + let entry = r.lookup("len").unwrap(); + assert_eq!(entry.params, vec![Type::Any]); + assert_eq!(entry.return_type, Type::Int); +} + +#[test] +fn test_contains_uses_type_any() { + let r = NativeRegistry::default(); + let entry = r.lookup("contains").unwrap(); + assert_eq!(entry.params, vec![Type::Any, Type::Any]); + assert_eq!(entry.return_type, Type::Bool); +} diff --git a/tests/type_checker.rs b/tests/type_checker.rs index 3357161..589e30b 100644 --- a/tests/type_checker.rs +++ b/tests/type_checker.rs @@ -202,3 +202,22 @@ fn test_type_check_print_wrong_arity() { let result = parse_and_type_check("void main() { print(1, 2); }"); assert!(result.is_err(), "expected arity error for print(1, 2)"); } + +#[test] +fn test_type_check_len_accepts_array() { + let result = parse_and_type_check("void main() { int n = len([1, 2, 3]); }"); + assert!(result.is_ok()); +} + +#[test] +fn test_type_check_contains_accepts_array_and_element() { + let result = parse_and_type_check("void main() { bool ok = contains([1, 2, 3], 2); }"); + assert!(result.is_ok()); +} + +#[test] +fn test_type_check_contains_returns_bool() { + let result = parse_and_type_check("void main() { int x = contains([1], 1); }"); + assert!(result.is_err()); + assert!(result.unwrap_err().message.contains("expected Int, got Bool")); +} From 0f808c83c0ab0a189fb2aac85666db18d03513d0 Mon Sep 17 00:00:00 2001 From: BrunoBezerra Date: Wed, 15 Apr 2026 21:46:14 -0300 Subject: [PATCH 06/10] =?UTF-8?q?remo=C3=A7=C3=A3o=20das=20fun=C3=A7=C3=B5?= =?UTF-8?q?es=20de=20len=20e=20contains=20da=20string=20standard=20lib?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/stdlib/mod.rs | 14 ++------------ src/stdlib/string.rs | 39 --------------------------------------- 2 files changed, 2 insertions(+), 51 deletions(-) diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index 2e5de20..a38cbf5 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -13,8 +13,8 @@ //! //! The default registry (via `NativeRegistry::default()`) registers: //! `print`, `readInt`, `readFloat`, `readString` (IO), `pow`, `sqrt` -//! (math), and string utilities like `len`, `substr`, `toUpper`, `toLower`, -//! `strToInt`, `strToFloat`, `contains`. Implementations live in the [`io`], +//! (math), and string utilities like `substr`, `toUpper`, `toLower`, +//! `strToInt`, `strToFloat`. Implementations live in the [`io`], //! [`math`], and [`string`] sub-modules. //! //! # Design Decisions @@ -133,11 +133,6 @@ impl Default for NativeRegistry { }); // String - r.register("len", NativeEntry { - params: vec![Type::Any], - return_type: Type::Int, - func: string::len, - }); r.register("substr", NativeEntry { params: vec![Type::Str, Type::Int, Type::Int], return_type: Type::Str, @@ -163,11 +158,6 @@ impl Default for NativeRegistry { return_type: Type::Float, func: string::str_to_float, }); - r.register("contains", NativeEntry { - params: vec![Type::Any, Type::Any], - return_type: Type::Bool, - func: string::contains, - }); r } diff --git a/src/stdlib/string.rs b/src/stdlib/string.rs index c141091..a125bb2 100644 --- a/src/stdlib/string.rs +++ b/src/stdlib/string.rs @@ -5,33 +5,15 @@ //! Exposes seven public functions, each matching the [`crate::interpreter::value::NativeFn`] signature //! `fn(Vec) -> Result`: //! -//! * [`len`] — returns the length of a supported container value. //! * [`substr`] — returns a substring of a string. //! * [`toUpper`] — converts a string to uppercase. //! * [`toLower`] — converts a string to lowercase. //! * [`strToInt`] — converts a string to an integer. //! * [`strToFloat`] — converts a string to a float. -//! * [`contains`] — checks membership for supported container values. //! use crate::interpreter::value::{RuntimeError, Value}; -pub fn len(args: Vec) -> Result { - if args.len() != 1 { - return Err(RuntimeError::new(format!( - "len expects 1 argument, got {}", - args.len() - ))); - } - match &args[0] { - Value::Str(s) => Ok(Value::Int(s.chars().count() as i64)), - Value::Array(items) => Ok(Value::Int(items.len() as i64)), - v => Err(RuntimeError::new(format!( - "len: expected string or array argument, got {}", - v - ))), - } -} pub fn substr(args: Vec) -> Result { if args.len() != 3 { @@ -186,24 +168,3 @@ pub fn str_to_float(args: Vec) -> Result { ))), } } - -pub fn contains(args: Vec) -> Result { - if args.len() != 2 { - return Err(RuntimeError::new(format!( - "contains expects 2 arguments, got {}", - args.len() - ))); - } - match (&args[0], &args[1]) { - (Value::Str(s), Value::Str(needle)) => Ok(Value::Bool(s.contains(needle))), - (Value::Array(items), needle) => Ok(Value::Bool(items.contains(needle))), - (Value::Str(_), other) => Err(RuntimeError::new(format!( - "contains: expected string needle for string haystack, got {}", - other - ))), - (haystack, _) => Err(RuntimeError::new(format!( - "contains: expected string or array haystack, got {}", - haystack - ))), - } -} \ No newline at end of file From 17c918eef9c37ab310067a96f73558d8e9312811 Mon Sep 17 00:00:00 2001 From: BrunoBezerra Date: Wed, 15 Apr 2026 21:48:50 -0300 Subject: [PATCH 07/10] =?UTF-8?q?modifica=C3=A7=C3=A3o=20da=20ast=20e=20pa?= =?UTF-8?q?rser=20para=20tratamento=20das=20opera=C3=A7=C3=B5es=20len=20e?= =?UTF-8?q?=20contains?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/ir/ast.rs | 2 ++ src/parser/expressions.rs | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/src/ir/ast.rs b/src/ir/ast.rs index a29d6a7..a2d4746 100644 --- a/src/ir/ast.rs +++ b/src/ir/ast.rs @@ -99,6 +99,8 @@ pub enum Expr { Not(Box>), And(Box>, Box>), Or(Box>, Box>), + Len(Box>), + Contains(Box>, Box>), /// Function call: name(args) Call { name: String, diff --git a/src/parser/expressions.rs b/src/parser/expressions.rs index 31a8d30..5ff056d 100644 --- a/src/parser/expressions.rs +++ b/src/parser/expressions.rs @@ -43,7 +43,7 @@ use nom::{ character::complete::{char, multispace0}, combinator::map, multi::separated_list0, - sequence::{delimited, pair, preceded, tuple}, + sequence::{delimited, pair, preceded, tuple, separated_pair}, IResult, }; @@ -65,11 +65,42 @@ pub fn parse_call(input: &str) -> IResult<&str, (String, Vec)> { Ok((rest, (name.to_string(), args))) } -/// Atom: literal, call, array literal, identifier, or parenthesized expression. +/// Parse length: `len ( expr )`. Returns the inner expr. +pub fn parse_len(input: &str) -> IResult<&str, UncheckedExpr> { + let (rest, _) = preceded(multispace0, tag("len"))(input)?; + let (rest, arg) = delimited( + preceded(multispace0, tag("(")), + preceded(multispace0, expression), + preceded(multispace0, tag(")")), + )(rest)?; + Ok((rest, arg)) +} + +/// Parse contains: `contains ( expr, expr )`. Returns (container, item). +pub fn parse_contains(input: &str) -> IResult<&str, (UncheckedExpr, UncheckedExpr)> { + let (rest, _) = preceded(multispace0, tag("contains"))(input)?; + let (rest, (container, item)) = delimited( + preceded(multispace0, tag("(")), + separated_pair( + preceded(multispace0, expression), + preceded(multispace0, tag(",")), + preceded(multispace0, expression), + ), + preceded(multispace0, tag(")")), + )(rest)?; + Ok((rest, (container, item))) +} + +/// Atom: literal, len, contains, call, array literal, identifier, or parenthesized expression. fn atom(input: &str) -> IResult<&str, UncheckedExpr> { alt(( map(literal, |l| wrap(Expr::Literal(l.into()))), + map(parse_len, |arg| wrap(Expr::Len(Box::new(arg)))), + map(parse_contains, |(container, item)| { + wrap(Expr::Contains(Box::new(container), Box::new(item))) + }), map(parse_call, |(name, args)| wrap(Expr::Call { name, args })), + map( delimited( preceded(multispace0, char('[')), From bd246d7afe447f9f223490c9762d0d222f0abc56 Mon Sep 17 00:00:00 2001 From: BrunoBezerra Date: Wed, 15 Apr 2026 21:55:34 -0300 Subject: [PATCH 08/10] =?UTF-8?q?modifica=C3=A7=C3=A3o=20do=20type=20check?= =?UTF-8?q?er=20para=20as=20express=C3=B5es=20len=20e=20contains?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/semantic/type_checker.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/semantic/type_checker.rs b/src/semantic/type_checker.rs index 2088ab1..0e754f6 100644 --- a/src/semantic/type_checker.rs +++ b/src/semantic/type_checker.rs @@ -403,6 +403,11 @@ fn type_check_expr_inner( Box::new(type_check_expr_to_typed(l, env)?), Box::new(type_check_expr_to_typed(r, env)?), )), + Expr::Len(arg) => Ok(Expr::Len(Box::new(type_check_expr_to_typed(arg, env)?))), + Expr::Contains(container, item) => Ok(Expr::Contains( + Box::new(type_check_expr_to_typed(container, env)?), + Box::new(type_check_expr_to_typed(item, env)?), + )), Expr::Call { name, args } => { let args_checked: Result, _> = args.iter().map(|a| type_check_expr_to_typed(a, env)).collect(); @@ -494,6 +499,34 @@ fn type_check_expr( Err(TypeError::new("and/or require Bool operands")) } } + Expr::Len(arg) => { + let ty = type_check_expr(arg, env)?; + match ty { + Type::Str | Type::Array(_) => Ok(Type::Int), + _ => Err(TypeError::new("len requires a string or array operand")), + } + } + Expr::Contains(container, item) => { + let container_ty = type_check_expr(container, env)?; + let item_ty = type_check_expr(item, env)?; + match container_ty { + Type::Str => { + if item_ty == Type::Str { + Ok(Type::Bool) + } else { + Err(TypeError::new("contains: string container requires string item")) + } + } + Type::Array(elem_ty) => { + if types_compatible(&item_ty, &elem_ty) { + Ok(Type::Bool) + } else { + Err(TypeError::new("contains: array item type mismatch")) + } + } + _ => Err(TypeError::new("contains requires a string or array container")), + } + } Expr::Call { name, args } => { let args_checked: Result, _> = args.iter().map(|a| type_check_expr_to_typed(a, env)).collect(); From e02f64057fc0971568442948f281b1df567c7fe5 Mon Sep 17 00:00:00 2001 From: BrunoBezerra Date: Wed, 15 Apr 2026 21:57:09 -0300 Subject: [PATCH 09/10] =?UTF-8?q?implementa=C3=A7=C3=A3o=20das=20express?= =?UTF-8?q?=C3=B5es=20len=20e=20contains?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/interpreter/eval_expr.rs | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/interpreter/eval_expr.rs b/src/interpreter/eval_expr.rs index 660c779..e37b2ee 100644 --- a/src/interpreter/eval_expr.rs +++ b/src/interpreter/eval_expr.rs @@ -123,6 +123,43 @@ pub fn eval_expr(expr: &CheckedExpr, env: &mut Environment) -> Result { + let val = eval_expr(arg, env)?; + match val { + Value::Str(s) => Ok(Value::Int(s.chars().count() as i64)), + Value::Array(elems) => Ok(Value::Int(elems.len() as i64)), + v => Err(RuntimeError::new(format!( + "len: expected string or array argument, got: {}", + v + ))), + } + } + + Expr::Contains(container, item) => { + let container_val = eval_expr(container, env)?; + let item_val = eval_expr(item, env)?; + match container_val { + Value::Str(s) => { + if let Value::Str(item_str) = item_val { + Ok(Value::Bool(s.contains(&item_str))) + } else { + Err(RuntimeError::new("contains: string container requires string item")) + } + } + Value::Array(elems) => { + if let Some(_) = elems.iter().find(|&e| values_equal(e, &item_val)) { + Ok(Value::Bool(true)) + } else { + Ok(Value::Bool(false)) + } + } + v => Err(RuntimeError::new(format!( + "contains: expected string or array container, got: {}", + v + ))), + } + } + Expr::Index { base, index } => { let base_val = eval_expr(base, env)?; let idx_val = eval_expr(index, env)?; From 24d7f052842857dc6d0aa6f2cfa00cac3b1d3776 Mon Sep 17 00:00:00 2001 From: BrunoBezerra Date: Wed, 15 Apr 2026 21:57:47 -0300 Subject: [PATCH 10/10] =?UTF-8?q?implementa=C3=A7=C3=A3o=20dos=20testes=20?= =?UTF-8?q?unit=C3=A1rios,=20e=20altera=C3=A7=C3=B5es=20na=20documenta?= =?UTF-8?q?=C3=A7=C3=A3o=20para=20len=20e=20contains?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/03-ast.md | 2 ++ docs/04-parser.md | 13 ++++++++++ docs/05-type-checker.md | 14 ++++++++++ docs/06-interpreter.md | 9 +++++++ docs/07-stdlib.md | 4 +++ tests/interpreter.rs | 47 +++++++++++++++++++++++++++++++++ tests/parser.rs | 16 +++++++++++- tests/stdlib.rs | 57 ++++++----------------------------------- tests/type_checker.rs | 14 ++++++++++ 9 files changed, 126 insertions(+), 50 deletions(-) diff --git a/docs/03-ast.md b/docs/03-ast.md index 3b1e8f8..8315e20 100644 --- a/docs/03-ast.md +++ b/docs/03-ast.md @@ -45,6 +45,8 @@ Expr::Literal(42) -- the integer 42 Expr::Ident("x") -- the variable x Expr::Add(left, right) -- left + right Expr::Mul(left, right) -- left * right +Expr::len(expr) -- length of a string/array expression +Expr::contains(a, b) -- membership/substring check expression Expr::Call { name, args } -- a function call Expr::Index { base, index } -- base[index] ``` diff --git a/docs/04-parser.md b/docs/04-parser.md index 8cc9590..aef00c5 100644 --- a/docs/04-parser.md +++ b/docs/04-parser.md @@ -33,6 +33,15 @@ Here is roughly what happens, step by step: The same recursive logic handles arbitrarily complex expressions like `a * (b + c) - sqrt(d)`. +In addition to generic function calls, MiniC also parses two dedicated +expression forms with function-like syntax: + +- `len(expr)` +- `contains(expr, expr)` + +Even though they look like calls, they are mapped to dedicated AST nodes +(`Expr::len` and `Expr::contains`) instead of `Expr::Call`. + --- ## What is a Parser Combinator? @@ -110,6 +119,10 @@ expression └── atom ``` +At the `atom` level, the parser gives dedicated precedence to `len(...)` +and `contains(...)` before the generic call parser, so these two constructs +always become core expression nodes. + When `additive` needs its right operand, it calls `multiplicative`. So `*` always groups before `+` — naturally, without any precedence table. diff --git a/docs/05-type-checker.md b/docs/05-type-checker.md index ce49296..b089e25 100644 --- a/docs/05-type-checker.md +++ b/docs/05-type-checker.md @@ -144,6 +144,20 @@ pass type checking. `Type::Any` is never inferred for a variable or expression — it only appears in the registry as a parameter type for built-in functions. +### `len` and `contains` as expression forms + +`len` and `contains` are no longer validated through stdlib function +signatures. They are checked as dedicated expression nodes: + +- `len(expr)` +: `expr` must be `str` or `array`, result type is `int`. +- `contains(container, item)` +: if `container` is `str`, `item` must be `str`; if `container` is + `array(T)`, `item` must be compatible with `T`; result type is `bool`. + +This moves type errors for those constructs to their specific expression +rules, instead of generic call-argument validation. + --- ## Key Design Decision: Fail on the First Error diff --git a/docs/06-interpreter.md b/docs/06-interpreter.md index 65e87c1..bf5f6f5 100644 --- a/docs/06-interpreter.md +++ b/docs/06-interpreter.md @@ -97,6 +97,15 @@ executor `exec_stmt` handles each statement form: | `return expr` | Evaluates `expr` and signals an early return | | `f(args)` | Evaluates arguments, calls `f`, discards the return value | +In expression evaluation, MiniC also supports dedicated nodes for: + +- `len(expr)` +: evaluates `expr` and returns `Int` with character count (`str`) or element + count (`array`). +- `contains(container, item)` +: evaluates both operands and returns `Bool` using substring semantics for + strings and membership semantics for arrays. + ### How `return` propagates Statements do not normally produce values, but `return` must pass its value diff --git a/docs/07-stdlib.md b/docs/07-stdlib.md index f5fdc91..03653c3 100644 --- a/docs/07-stdlib.md +++ b/docs/07-stdlib.md @@ -4,6 +4,10 @@ MiniC comes with a small set of built-in functions available to every program. This document describes them from a user perspective and then explains how they are implemented and how to add new ones. +Note: `len(...)` and `contains(...)` are core language expressions in the +parser/type-checker/interpreter pipeline. They are not registered as native +functions in `NativeRegistry`. + --- ## Built-in Functions diff --git a/tests/interpreter.rs b/tests/interpreter.rs index 51696c9..3f31589 100644 --- a/tests/interpreter.rs +++ b/tests/interpreter.rs @@ -256,3 +256,50 @@ fn test_stdlib_pow_float_args() { "#; assert!(run(src).is_ok(), "{}", run(src).unwrap_err()); } + +// --------------------------------------------------------------------------- +// 7.12 len/contains as core expressions +// --------------------------------------------------------------------------- +#[test] +fn test_len_expression_with_string() { + let src = r#" + void main() { + int n = len("abc"); + print(n); + } + "#; + assert!(run(src).is_ok(), "{}", run(src).unwrap_err()); +} + +#[test] +fn test_len_expression_with_array() { + let src = r#" + void main() { + int n = len([1, 2, 3, 4]); + print(n); + } + "#; + assert!(run(src).is_ok(), "{}", run(src).unwrap_err()); +} + +#[test] +fn test_contains_expression_with_string() { + let src = r#" + void main() { + bool ok = contains("abcdef", "cd"); + print(ok); + } + "#; + assert!(run(src).is_ok(), "{}", run(src).unwrap_err()); +} + +#[test] +fn test_contains_expression_with_array() { + let src = r#" + void main() { + bool ok = contains([1, 2, 3], 2); + print(ok); + } + "#; + assert!(run(src).is_ok(), "{}", run(src).unwrap_err()); +} diff --git a/tests/parser.rs b/tests/parser.rs index b5d6fa3..3a2097e 100644 --- a/tests/parser.rs +++ b/tests/parser.rs @@ -539,6 +539,20 @@ fn test_call_in_expression() { } } +#[test] +fn test_len_as_expression_node() { + let result = expression("len([1, 2, 3])").unwrap().1; + assert!(matches!(result.exp, Expr::Len(_))); + assert!(!matches!(result.exp, Expr::Call { .. })); +} + +#[test] +fn test_contains_as_expression_node() { + let result = expression("contains([1, 2, 3], 2)").unwrap().1; + assert!(matches!(result.exp, Expr::Contains(_, _))); + assert!(!matches!(result.exp, Expr::Call { .. })); +} + #[test] fn test_call_as_statement() { let result = statement("foo(1, 2);").unwrap().1; @@ -658,7 +672,7 @@ fn test_multidimensional_indexed_assignment() { #[test] fn test_nested_index() { let result = expression("arr[i][j]").unwrap().1; - assert!(matches!(result.exp, Expr::Index { ref base, ref index } + assert!(matches!(result.exp, Expr::Index { ref index, .. } if matches!(index.exp, Expr::Ident(ref s) if s == "j"))); if let Expr::Index { ref base, .. } = result.exp { assert!(matches!(base.exp, Expr::Index { ref base, ref index } diff --git a/tests/stdlib.rs b/tests/stdlib.rs index eeb60e2..03e93c0 100644 --- a/tests/stdlib.rs +++ b/tests/stdlib.rs @@ -2,7 +2,7 @@ use mini_c::interpreter::value::Value; use mini_c::ir::ast::Type; use mini_c::stdlib::io::print_fn; use mini_c::stdlib::math::{pow_fn, sqrt_fn}; -use mini_c::stdlib::string::{contains, len, substr}; +use mini_c::stdlib::string::substr; use mini_c::stdlib::NativeRegistry; // --- io tests --- @@ -89,36 +89,6 @@ fn test_sqrt_wrong_type() { // --- string tests --- -#[test] -fn test_len_string() { - let result = len(vec![Value::Str("hello".to_string())]); - assert_eq!(result, Ok(Value::Int(5))); -} - -#[test] -fn test_len_array() { - let result = len(vec![Value::Array(vec![Value::Int(1), Value::Int(2), Value::Int(3)])]); - assert_eq!(result, Ok(Value::Int(3))); -} - -#[test] -fn test_contains_string() { - let result = contains(vec![ - Value::Str("abcdef".to_string()), - Value::Str("cd".to_string()), - ]); - assert_eq!(result, Ok(Value::Bool(true))); -} - -#[test] -fn test_contains_array() { - let result = contains(vec![ - Value::Array(vec![Value::Int(1), Value::Int(2), Value::Int(3)]), - Value::Int(2), - ]); - assert_eq!(result, Ok(Value::Bool(true))); -} - #[test] fn test_substr_valid_slice() { let result = substr(vec![ @@ -180,13 +150,18 @@ fn test_default_registry_contains_all_stdlib() { assert!(r.lookup("readString").is_some()); assert!(r.lookup("pow").is_some()); assert!(r.lookup("sqrt").is_some()); - assert!(r.lookup("len").is_some()); assert!(r.lookup("substr").is_some()); assert!(r.lookup("toUpper").is_some()); assert!(r.lookup("toLower").is_some()); assert!(r.lookup("strToInt").is_some()); assert!(r.lookup("strToFloat").is_some()); - assert!(r.lookup("contains").is_some()); +} + +#[test] +fn test_len_and_contains_not_in_registry() { + let r = NativeRegistry::default(); + assert!(r.lookup("len").is_none()); + assert!(r.lookup("contains").is_none()); } #[test] @@ -209,19 +184,3 @@ fn test_print_uses_type_any() { let entry = r.lookup("print").unwrap(); assert_eq!(entry.params, vec![Type::Any]); } - -#[test] -fn test_len_uses_type_any() { - let r = NativeRegistry::default(); - let entry = r.lookup("len").unwrap(); - assert_eq!(entry.params, vec![Type::Any]); - assert_eq!(entry.return_type, Type::Int); -} - -#[test] -fn test_contains_uses_type_any() { - let r = NativeRegistry::default(); - let entry = r.lookup("contains").unwrap(); - assert_eq!(entry.params, vec![Type::Any, Type::Any]); - assert_eq!(entry.return_type, Type::Bool); -} diff --git a/tests/type_checker.rs b/tests/type_checker.rs index 589e30b..a2440e1 100644 --- a/tests/type_checker.rs +++ b/tests/type_checker.rs @@ -221,3 +221,17 @@ fn test_type_check_contains_returns_bool() { assert!(result.is_err()); assert!(result.unwrap_err().message.contains("expected Int, got Bool")); } + +#[test] +fn test_type_check_len_rejects_int_operand() { + let result = parse_and_type_check("void main() { int n = len(42); }"); + assert!(result.is_err()); + assert!(result.unwrap_err().message.contains("len requires a string or array operand")); +} + +#[test] +fn test_type_check_contains_rejects_string_and_int() { + let result = parse_and_type_check("void main() { bool ok = contains(\"abc\", 1); }"); + assert!(result.is_err()); + assert!(result.unwrap_err().message.contains("string container requires string item")); +}