diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 44437a5..a3c835d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,6 +36,7 @@ jobs: - uses: Swatinem/rust-cache@v2 with: prefix-key: clippy + - run: cargo build -p ryo-runtime --release - run: cargo clippy --all-targets test: @@ -65,4 +66,5 @@ jobs: - name: Install Zig toolchain if: steps.zig-cache.outputs.cache-hit != 'true' run: cargo run -- toolchain install + - run: cargo build -p ryo-runtime --release - run: cargo test diff --git a/CLAUDE.md b/CLAUDE.md index 875a836..50959de 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -36,16 +36,18 @@ All Ryo code examples **must** use Python-style colons and indentation, **NOT** ## Build & Test Commands +Standard cargo commands work fully out-of-the-box (even on a clean checkout) because `build.rs` automatically compiles the `ryo-runtime` static library in a separate target directory if it isn't found. + ```bash -cargo fmt # Auto-format (CI runs --check with -Dwarnings) -cargo clippy --all-targets # Lint (CI enforces, warnings are errors) -cargo check # Check for errors -cargo build [--release] # Build debug or release +cargo build # Automatically builds the runtime (if missing) and then compiles the compiler +cargo check # Check compiler for errors +cargo test # Run all unit + integration tests cargo run -- run # JIT compile and execute cargo run -- build # AOT compile to binary -cargo test # Run tests cargo run -- toolchain install # Download Zig linker cargo run -- toolchain status # Check Zig status +cargo clippy --all-targets # Lint (warnings are errors) +cargo fmt --check # Check code formatting style ``` **File extensions:** `.ryo` (source), `.md` (docs), `.rs` (Rust), `.o`/`.obj` (generated) @@ -65,7 +67,15 @@ GitHub Actions runs on pushes to `main` and PRs targeting `main`: `cargo fmt --c **Commit prefixes:** `feat:`, `fix:`, `docs:`, `spec:`, `dev:`, `roadmap:`, `test:`, `chore:`, `refactor:`. Keep subjects under 72 chars. Add body for non-obvious changes. -Never author Claude on commits nor PRs. +IMPORTANT: Never author Claude on commits nor PRs. + +--- + +## Issue Tracking + +Non-immediate issues that affect architecture, correctness, or long-term code health go in `ISSUES.md`. Create an entry when you identify a problem that won't be resolved in the current session but must be addressed for better architecture or sustainability. Use the next sequential `I-XXX` number, pick the appropriate severity (Blocking / Correctness / Cleanup), and include Files, Summary, and Resolution fields. + +Do **not** create issues for things you're fixing right now — just fix them. Do **not** use GitHub Issues for these; `ISSUES.md` is the single source of truth. --- diff --git a/Cargo.lock b/Cargo.lock index 5646765..d6358b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -122,6 +122,15 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bumpalo" version = "3.20.2" @@ -213,6 +222,15 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "cranelift" version = "0.131.1" @@ -415,6 +433,26 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "dirs" version = "6.0.0" @@ -503,6 +541,16 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -995,6 +1043,8 @@ dependencies = [ "dirs", "hashbrown 0.17.1", "logos", + "ryo-runtime", + "sha2", "tar", "target-lexicon", "tempfile", @@ -1002,6 +1052,19 @@ dependencies = [ "xz2", ] +[[package]] +name = "ryo-runtime" +version = "0.1.0" +dependencies = [ + "ryu", +] + +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + [[package]] name = "semver" version = "1.0.28" @@ -1051,6 +1114,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "shlex" version = "1.3.0" @@ -1161,6 +1235,12 @@ dependencies = [ "syn", ] +[[package]] +name = "typenum" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" + [[package]] name = "unicode-ident" version = "1.0.24" @@ -1232,6 +1312,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index 0d3a475..3ec916f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,26 @@ +[workspace] +members = ["runtime"] + +# Without `panic = "abort"`, Rust emits unwinding metadata (`_Unwind_*` +# symbols) that zig cc cannot resolve when linking user binaries. +# These settings live at the workspace root because profile configuration +# isn't permitted in workspace member manifests. +[profile.release] +panic = "abort" + +[profile.dev] +panic = "abort" + [package] name = "ryo" version = "0.1.0" edition = "2024" [build-dependencies] +sha2 = "0.10" [dependencies] +ryo-runtime = { path = "runtime" } ariadne = "0.6" chumsky = "0.12" diff --git a/ISSUES.md b/ISSUES.md index 3dbc17a..86f0bd3 100644 --- a/ISSUES.md +++ b/ISSUES.md @@ -206,6 +206,11 @@ Resolved entries are removed (not kept around as a changelog). Look at `git log` **Summary:** Currently, `for-range` loops have bespoke code generation that manually emits basic blocks, jump instructions, and raw counter increments. When general iterators are added, loops should be desugared during the AST-to-UIR phase into standard `while` loops that call `.next()`. **Resolution:** Once iterators land, remove the `generate_for_range` codegen entirely and rely on standard `while` codegen to emit loops. +### I-043 — Migrate `ryo-runtime` to `#![no_std]` +**Files:** `runtime/src/lib.rs`, `runtime/Cargo.toml`, `src/linker.rs` +**Summary:** The runtime staticlib only uses `std::alloc`, `std::process::abort()`, and `eprintln!`, yet linking against precompiled `std` bundles objects with `_Unwind_*` symbol references. This forces the linker to pass `-lunwind` on Linux (workaround in `src/linker.rs`). Migrating to `#![no_std]` with `extern crate alloc` eliminates the dependency entirely. +**Resolution:** Replace `std::alloc` with `alloc::alloc` (identical API). Replace `eprintln!` + `process::abort()` with `extern "C" { fn abort() -> !; }`. Add `#[panic_handler]` that aborts. Keep the `rlib` crate-type for `cargo test` via a `#[cfg(test)]` std gate. `ryu` already supports `no_std`. Benefits: smaller archive, faster link times, no hidden unwind dependency, simpler cross-compilation. + --- ## Cross-References diff --git a/build.rs b/build.rs index afd3ccf..0090267 100644 --- a/build.rs +++ b/build.rs @@ -1,3 +1,4 @@ +use sha2::{Digest, Sha256}; use std::env; fn main() { @@ -14,6 +15,90 @@ fn main() { _ => pkg_version, }; println!("cargo:rustc-env=RYO_VERSION={version}"); + + // Runtime archive path. Honor RYO_RUNTIME_LIB if set (used by downstream + // packagers). Otherwise build it on demand using the current cargo profile + // in a separate target directory to avoid cargo lock deadlocks. + let runtime_path = env::var("RYO_RUNTIME_LIB").unwrap_or_else(|_| { + let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); + let target_dir = + env::var("CARGO_TARGET_DIR").unwrap_or_else(|_| format!("{manifest_dir}/target")); + let raw_profile = env::var("PROFILE").unwrap_or_else(|_| "debug".to_string()); + // Mapping rules for Cargo profile resolution: + // - Known "release", "production", and "prod" profiles map to "release". + // - Known "debug" and "dev" profiles map to "debug". + // - For unrecognized/custom profiles, we consult OPT_LEVEL and treat any + // non-"0" optimization level as "release" (since custom optimized profiles + // typically build under optimized target layouts). + // NOTE: Custom profiles with debug = true but OPT_LEVEL > 0 (e.g., opt-level = 1, 2, 3) + // will be classified as "release", avoiding build directory mismatch surprises. + let profile = match raw_profile.as_str() { + "release" | "production" | "prod" => "release", + "debug" | "dev" => "debug", + _ => { + let opt_level = env::var("OPT_LEVEL").unwrap_or_else(|_| "0".to_string()); + if opt_level != "0" { "release" } else { "debug" } + } + }; + let mut path = std::path::PathBuf::from(&target_dir) + .join(profile) + .join("libryo_runtime.a"); + if !path.exists() { + // Build the runtime archive in-process in a separate target directory to avoid deadlocks. + let custom_target_dir = + std::path::PathBuf::from(&manifest_dir).join("target/runtime-build"); + let cargo = env::var("CARGO").unwrap_or_else(|_| "cargo".to_string()); + let mut cmd = std::process::Command::new(&cargo); + cmd.arg("build") + .arg("-p") + .arg("ryo-runtime") + .arg("--target-dir") + .arg(&custom_target_dir); + if profile == "release" { + cmd.arg("--release"); + } + let status = cmd + .status() + .unwrap_or_else(|e| panic!("failed to spawn `cargo build -p ryo-runtime`: {e}")); + if status.success() { + path = custom_target_dir.join(profile).join("libryo_runtime.a"); + } + } + if !path.exists() { + panic!( + "libryo_runtime.a still missing at {} after build attempt", + path.display() + ); + } + // Safely check if path contains non-UTF-8 characters, providing clear instructions if so. + match path.to_str() { + Some(s) => s.to_string(), + None => { + panic!( + "The resolved runtime library path at '{}' contains non-UTF-8 characters. \ + Please set the RYO_RUNTIME_LIB environment variable explicitly to override it.", + path.display() + ); + } + } + }); + + println!("cargo:rustc-env=RYO_RUNTIME_LIB={runtime_path}"); + println!("cargo:rerun-if-env-changed=RYO_RUNTIME_LIB"); + println!("cargo:rerun-if-changed={runtime_path}"); + + let runtime_bytes = std::fs::read(&runtime_path).unwrap_or_else(|e| { + panic!("failed to read runtime lib at {}: {}", runtime_path, e); + }); + let mut hasher = Sha256::new(); + hasher.update(&runtime_bytes); + let hash_result = hasher.finalize(); + let hash_string = format!("{:x}", hash_result); + println!("cargo:rustc-env=RYO_RUNTIME_HASH={hash_string}"); + + let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); + let runtime_src = std::path::PathBuf::from(&manifest_dir).join("runtime/src"); + println!("cargo:rerun-if-changed={}", runtime_src.display()); } fn resolve_git_ref() -> Option { diff --git a/docs/dev/implementation_roadmap.md b/docs/dev/implementation_roadmap.md index 028e8bc..f609c57 100644 --- a/docs/dev/implementation_roadmap.md +++ b/docs/dev/implementation_roadmap.md @@ -903,8 +903,8 @@ Every downstream milestone in Phase 2 (structs, tuples, enums, pattern matching, - **F-strings (`f"Value: {x}"`) are deferred to v0.2** — see Phase 5: F-strings & String Interpolation. v0.1 uses `+` concatenation with the `*_to_str` helpers above. - Parser/AST: accept the **`move` keyword** as a prefix on parameter declarations (`fn consume(move s: str)`). Without `move`, parameters borrow. Sema records the convention on the function signature (type-only; ownership lives elsewhere). - Add a **new pipeline stage `src/ownership.rs`** between Sema and Codegen — modeled on Mojo's MLIR-based lifetime/ASAP-destruction passes (Zig stops being a useful compiler reference for the borrow checker; see [mojo_reference.md](mojo_reference.md)). The pass mutates each `Tir` in place: inserts `TirTag::Free`, tracks per-`TirRef` ownership state, and reports diagnostics. - - Per-`TirRef` (SSA value) state lattice: `NotTracked` / `Valid` / `Moved { moved_at, moved_via }` - - `current_owner: HashMap` shadow table for named bindings + - Per-`TirRef` (SSA value) state lattice: `NotTracked` / `Valid` / `Borrowed` / `Moved { moved_at, kind }` + - `current_owner: HashMap` shadow table for named bindings (resolves binding-read sites and feeds diagnostics) - Implicit immutable borrow for function parameters (Rule 2); `move` opts into ownership transfer (Rule 4) - Standard forward dataflow with CFG-join merges; loop fixed-point (typically converges in 2 iterations) - Reassignment of `mut` move-typed bindings frees the prior buffer @@ -952,7 +952,7 @@ fn main(): - Move tracking covers **named bindings and anonymous owned temporaries** in this milestone. Explicit `&T` / `inout T` borrow syntax arrives in M8.2 / M8.3; field-by-field move tracking (partial moves out of structs/tuples) follows naturally because the same dataflow analysis is reused. - `str` deallocation follows hybrid eager destruction (spec Section 5.4) — `Free` is inserted after the binding's last use, after the old buffer when a `mut` binding is reassigned over a `Valid` slot, and at the end of the enclosing statement for anonymous owned temporaries. Lexical scope-exit RAII would be too late and would leak intermediate buffers in concat chains. User-extensible cleanup via the `drop` method lands in M23. - Allocator failure surfaces as a panic in v0.1; allocation-fallible APIs ship alongside error unions (M13). -- Detailed design: see [2026-05-11-milestone-8.1-heap-str-and-move-semantics-design.md](../superpowers/specs/2026-05-11-milestone-8.1-heap-str-and-move-semantics-design.md). +- Detailed design: see [2026-05-20-milestone-8.1-heap-str-and-move-semantics-design.md](../superpowers/specs/2026-05-20-milestone-8.1-heap-str-and-move-semantics-design.md). - Dependencies: Milestone 8 (control flow blocks shape the dataflow regions the move tracker walks). ### Milestone 8.2: Immutable Borrows (`&T`) [alpha] diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml new file mode 100644 index 0000000..b20de59 --- /dev/null +++ b/runtime/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "ryo-runtime" +version = "0.1.0" +edition = "2024" + +[lib] +crate-type = ["staticlib", "rlib"] +# rlib is needed for `cargo test` to work (staticlib alone doesn't support test harness) + +[dependencies] +ryu = "1" diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs new file mode 100644 index 0000000..ae00762 --- /dev/null +++ b/runtime/src/lib.rs @@ -0,0 +1,667 @@ +use std::alloc::{Layout, alloc, dealloc, realloc}; + +#[repr(C)] +pub struct RyoStrFat { + pub ptr: *mut u8, + pub len: u64, + pub cap: u64, +} + +#[unsafe(no_mangle)] +pub extern "C" fn ryo_str_alloc(cap: u64) -> *mut u8 { + if cap == 0 { + return std::ptr::null_mut(); + } + let layout = layout_for(cap); + // SAFETY: layout has nonzero size (cap > 0 checked above) and align 1 is valid for u8. + let ptr = unsafe { alloc(layout) }; + if ptr.is_null() { + oom_abort(); + } + ptr +} + +/// # Safety +/// `ptr` must have been returned by `ryo_str_alloc` or `ryo_str_realloc` +/// with the given `cap`, or be null. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn ryo_str_free(ptr: *mut u8, cap: u64) { + if ptr.is_null() || cap == 0 { + return; + } + let layout = layout_for(cap); + // SAFETY: caller contract — ptr came from ryo_str_alloc/realloc with this exact cap. + unsafe { dealloc(ptr, layout) }; +} + +/// # Safety +/// `ptr` must have been returned by `ryo_str_alloc` or `ryo_str_realloc` +/// with the given `old_cap`, or be null. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn ryo_str_realloc(ptr: *mut u8, old_cap: u64, new_cap: u64) -> *mut u8 { + if ptr.is_null() || old_cap == 0 { + return ryo_str_alloc(new_cap); + } + if new_cap == 0 { + // SAFETY: ptr/old_cap came from a prior alloc per our # Safety doc. + unsafe { ryo_str_free(ptr, old_cap) }; + return std::ptr::null_mut(); + } + let layout = layout_for(old_cap); + // SAFETY: ptr/old_cap pair from prior alloc; new_cap > 0 checked above; layout matches old_cap. + let new_ptr = unsafe { realloc(ptr, layout, new_cap as usize) }; + if new_ptr.is_null() { + oom_abort(); + } + new_ptr +} + +/// Helper for fixed-string results (nan, inf, etc.) +/// +/// # Safety +/// `out` must point to a valid `RyoStrFat`. +unsafe fn write_str_result(s: &[u8], out: *mut RyoStrFat) { + let ptr = ryo_str_alloc(s.len() as u64); + // SAFETY: both ptr and s.as_ptr() are valid for s.len() bytes, and copy_nonoverlapping is safe. + unsafe { + core::ptr::copy_nonoverlapping(s.as_ptr(), ptr, s.len()); + (*out).ptr = ptr; + (*out).len = s.len() as u64; + (*out).cap = s.len() as u64; + } +} + +fn layout_for(cap: u64) -> Layout { + Layout::from_size_align(cap as usize, 1).unwrap_or_else(|_| oom_abort()) +} + +fn oom_abort() -> ! { + eprintln!("ryo: out of memory"); + std::process::abort(); +} + +/// # Safety +/// `data` must point to `len` readable bytes. `out` must point to a valid `RyoStrFat`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn ryo_str_from_literal(data: *const u8, len: u64, out: *mut RyoStrFat) { + // SAFETY: caller contract — out points to a valid RyoStrFat. + unsafe { + if len == 0 { + (*out).ptr = std::ptr::null_mut(); + (*out).len = 0; + (*out).cap = 0; + return; + } + // Return pointer directly to rodata with cap=0 as static sentinel + (*out).ptr = data as *mut u8; + (*out).len = len; + (*out).cap = 0; + } +} + +/// # Safety +/// `l_ptr` must point to `l_len` readable bytes (or be null if l_len==0). +/// Same for `r_ptr`/`r_len`. `out` must point to a valid `RyoStrFat`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn ryo_str_concat( + l_ptr: *const u8, + l_len: u64, + r_ptr: *const u8, + r_len: u64, + out: *mut RyoStrFat, +) { + // SAFETY: caller contract — out points to a valid RyoStrFat and input buffers are valid for reading. + unsafe { + let total = match l_len.checked_add(r_len) { + Some(t) => t, + None => oom_abort(), + }; + if total == 0 { + (*out).ptr = std::ptr::null_mut(); + (*out).len = 0; + (*out).cap = 0; + return; + } + let l_sz: usize = l_len.try_into().unwrap_or_else(|_| oom_abort()); + let r_sz: usize = r_len.try_into().unwrap_or_else(|_| oom_abort()); + let _: usize = total.try_into().unwrap_or_else(|_| oom_abort()); + let ptr = ryo_str_alloc(total); + if l_sz > 0 { + debug_assert!(!l_ptr.is_null()); + core::ptr::copy_nonoverlapping(l_ptr, ptr, l_sz); + } + if r_sz > 0 { + debug_assert!(!r_ptr.is_null()); + core::ptr::copy_nonoverlapping(r_ptr, ptr.add(l_sz), r_sz); + } + (*out).ptr = ptr; + (*out).len = total; + (*out).cap = total; + } +} + +/// # Safety +/// `a_ptr` must point to `a_len` readable bytes (or be null/dangling if a_len==0). +/// Same for `b_ptr`/`b_len`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn ryo_str_eq( + a_ptr: *const u8, + a_len: u64, + b_ptr: *const u8, + b_len: u64, +) -> u8 { + if a_len != b_len { + return 0; + } + if a_len == 0 { + return 1; + } + // SAFETY: caller contract — a_ptr/a_len and b_ptr/b_len describe valid byte ranges. + let a_slice = unsafe { core::slice::from_raw_parts(a_ptr, a_len as usize) }; + let b_slice = unsafe { core::slice::from_raw_parts(b_ptr, b_len as usize) }; + if a_slice == b_slice { 1 } else { 0 } +} + +/// # Safety +/// `out` must point to a valid `RyoStrFat`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn ryo_int_to_str(value: i64, out: *mut RyoStrFat) { + let mut buf = [0u8; 32]; + let negative = value < 0; + // Work with unsigned magnitude to handle i64::MIN correctly + // (i64::MIN.wrapping_neg() overflows back to i64::MIN). + let mut n: u64 = if negative { + (value as u64).wrapping_neg() + } else { + value as u64 + }; + let mut pos = buf.len(); + if n == 0 { + pos -= 1; + buf[pos] = b'0'; + } else { + while n > 0 { + pos -= 1; + buf[pos] = b'0' + (n % 10) as u8; + n /= 10; + } + } + if negative { + pos -= 1; + buf[pos] = b'-'; + } + let len = (buf.len() - pos) as u64; + let ptr = ryo_str_alloc(len); + // SAFETY: ptr is newly allocated for len bytes, out is valid to write. + unsafe { + core::ptr::copy_nonoverlapping(buf.as_ptr().add(pos), ptr, len as usize); + (*out).ptr = ptr; + (*out).len = len; + (*out).cap = len; + } +} + +/// # Safety +/// `out` must point to a valid `RyoStrFat`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn ryo_float_to_str(value: f64, out: *mut RyoStrFat) { + if value.is_nan() { + // SAFETY: write_str_result safely writes Nan string through out. + return unsafe { write_str_result(b"nan", out) }; + } + if value.is_infinite() { + if value < 0.0 { + // SAFETY: write_str_result safely writes -inf string through out. + return unsafe { write_str_result(b"-inf", out) }; + } else { + // SAFETY: write_str_result safely writes inf string through out. + return unsafe { write_str_result(b"inf", out) }; + } + } + + let mut buf = ryu::Buffer::new(); + let s = buf.format(value); + let bytes = s.as_bytes(); + let len = bytes.len() as u64; + let ptr = ryo_str_alloc(len); + // SAFETY: ptr is newly allocated for len bytes, out is valid to write. + unsafe { + core::ptr::copy_nonoverlapping(bytes.as_ptr(), ptr, len as usize); + (*out).ptr = ptr; + (*out).len = len; + (*out).cap = len; + } +} + +/// # Safety +/// `out` must point to a valid `RyoStrFat`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn ryo_bool_to_str(value: u8, out: *mut RyoStrFat) { + let s: &[u8] = if value != 0 { b"true" } else { b"false" }; + // SAFETY: write_str_result safely writes through out. + unsafe { write_str_result(s, out) }; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_alloc_and_free() { + unsafe { + let ptr = ryo_str_alloc(16); + assert!(!ptr.is_null()); + ryo_str_free(ptr, 16); + } + } + + #[test] + fn test_alloc_zero_returns_null() { + let ptr = ryo_str_alloc(0); + assert!(ptr.is_null()); + } + + #[test] + fn test_free_null_is_noop() { + unsafe { ryo_str_free(std::ptr::null_mut(), 0) }; + } + + #[test] + fn test_realloc_grow() { + unsafe { + let ptr = ryo_str_alloc(8); + assert!(!ptr.is_null()); + let ptr2 = ryo_str_realloc(ptr, 8, 32); + assert!(!ptr2.is_null()); + ryo_str_free(ptr2, 32); + } + } + + #[test] + fn test_realloc_from_null() { + unsafe { + let ptr = ryo_str_realloc(std::ptr::null_mut(), 0, 16); + assert!(!ptr.is_null()); + ryo_str_free(ptr, 16); + } + } + + #[test] + fn test_realloc_to_zero() { + unsafe { + let ptr = ryo_str_alloc(16); + assert!(!ptr.is_null()); + let ptr2 = ryo_str_realloc(ptr, 16, 0); + assert!(ptr2.is_null()); + } + } + + #[test] + fn test_from_literal_nonempty() { + unsafe { + let data = b"hello"; + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_str_from_literal(data.as_ptr(), 5, &mut out); + assert_eq!(out.ptr as *const u8, data.as_ptr()); + assert_eq!(out.len, 5); + assert_eq!(out.cap, 0); + let slice = core::slice::from_raw_parts(out.ptr, out.len as usize); + assert_eq!(slice, b"hello"); + } + } + + #[test] + fn test_from_literal_returns_static_pointer() { + unsafe { + let data = b"hello"; + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_str_from_literal(data.as_ptr(), 5, &mut out); + assert_eq!(out.ptr as *const u8, data.as_ptr()); + assert_eq!(out.len, 5); + assert_eq!(out.cap, 0); + } + } + + #[test] + fn test_free_static_str_is_noop() { + unsafe { + let data = b"hello"; + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_str_from_literal(data.as_ptr(), 5, &mut out); + ryo_str_free(out.ptr, out.cap); + } + } + + #[test] + fn test_from_literal_empty() { + unsafe { + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_str_from_literal(b"".as_ptr(), 0, &mut out); + assert!(out.ptr.is_null()); + assert_eq!(out.len, 0); + assert_eq!(out.cap, 0); + } + } + + #[test] + fn test_concat_two_strings() { + unsafe { + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_str_concat(b"Hello, ".as_ptr(), 7, b"World!".as_ptr(), 6, &mut out); + assert_eq!(out.len, 13); + let slice = core::slice::from_raw_parts(out.ptr, out.len as usize); + assert_eq!(slice, b"Hello, World!"); + ryo_str_free(out.ptr, out.cap); + } + } + + #[test] + fn test_concat_empty_left() { + unsafe { + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_str_concat(b"".as_ptr(), 0, b"abc".as_ptr(), 3, &mut out); + assert_eq!(out.len, 3); + let slice = core::slice::from_raw_parts(out.ptr, out.len as usize); + assert_eq!(slice, b"abc"); + ryo_str_free(out.ptr, out.cap); + } + } + + #[test] + fn test_concat_both_empty() { + unsafe { + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_str_concat(std::ptr::null(), 0, std::ptr::null(), 0, &mut out); + assert!(out.ptr.is_null()); + assert_eq!(out.len, 0); + assert_eq!(out.cap, 0); + } + } + + #[test] + fn test_eq_same_content() { + let result = unsafe { ryo_str_eq(b"hello".as_ptr(), 5, b"hello".as_ptr(), 5) }; + assert_eq!(result, 1); + } + + #[test] + fn test_eq_different_content() { + let result = unsafe { ryo_str_eq(b"hello".as_ptr(), 5, b"world".as_ptr(), 5) }; + assert_eq!(result, 0); + } + + #[test] + fn test_eq_both_empty() { + let result = unsafe { ryo_str_eq(std::ptr::null(), 0, std::ptr::null(), 0) }; + assert_eq!(result, 1); + } + + #[test] + fn test_eq_different_lengths() { + let result = unsafe { ryo_str_eq(b"hi".as_ptr(), 2, b"hello".as_ptr(), 5) }; + assert_eq!(result, 0); + } + + #[test] + fn test_int_to_str_positive() { + unsafe { + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_int_to_str(42, &mut out); + let slice = core::slice::from_raw_parts(out.ptr, out.len as usize); + assert_eq!(slice, b"42"); + ryo_str_free(out.ptr, out.cap); + } + } + + #[test] + fn test_int_to_str_negative() { + unsafe { + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_int_to_str(-123, &mut out); + let slice = core::slice::from_raw_parts(out.ptr, out.len as usize); + assert_eq!(slice, b"-123"); + ryo_str_free(out.ptr, out.cap); + } + } + + #[test] + fn test_int_to_str_zero() { + unsafe { + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_int_to_str(0, &mut out); + let slice = core::slice::from_raw_parts(out.ptr, out.len as usize); + assert_eq!(slice, b"0"); + ryo_str_free(out.ptr, out.cap); + } + } + + #[test] + fn test_int_to_str_min() { + unsafe { + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_int_to_str(i64::MIN, &mut out); + let slice = core::slice::from_raw_parts(out.ptr, out.len as usize); + assert_eq!(slice, b"-9223372036854775808"); + ryo_str_free(out.ptr, out.cap); + } + } + + #[test] + fn test_float_to_str_nan() { + unsafe { + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_float_to_str(f64::NAN, &mut out); + let slice = core::slice::from_raw_parts(out.ptr, out.len as usize); + assert_eq!(slice, b"nan"); + ryo_str_free(out.ptr, out.cap); + } + } + + #[test] + fn test_float_to_str_inf() { + unsafe { + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_float_to_str(f64::INFINITY, &mut out); + let slice = core::slice::from_raw_parts(out.ptr, out.len as usize); + assert_eq!(slice, b"inf"); + ryo_str_free(out.ptr, out.cap); + } + } + + #[test] + fn test_float_to_str_neg_inf() { + unsafe { + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_float_to_str(f64::NEG_INFINITY, &mut out); + let slice = core::slice::from_raw_parts(out.ptr, out.len as usize); + assert_eq!(slice, b"-inf"); + ryo_str_free(out.ptr, out.cap); + } + } + + #[test] + fn test_float_to_str() { + unsafe { + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_float_to_str(2.75, &mut out); + let slice = core::slice::from_raw_parts(out.ptr, out.len as usize); + let s = core::str::from_utf8(slice).unwrap(); + assert!(s.starts_with("2.75"), "got: {}", s); + ryo_str_free(out.ptr, out.cap); + } + } + + #[test] + fn test_float_to_str_large_value() { + unsafe { + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + // Value larger than u64::MAX — old code would saturate + ryo_float_to_str(1.8e19, &mut out); + let slice = core::slice::from_raw_parts(out.ptr, out.len as usize); + let s = core::str::from_utf8(slice).unwrap(); + let parsed: f64 = s.parse().unwrap(); + assert_eq!(parsed, 1.8e19); + ryo_str_free(out.ptr, out.cap); + } + } + + #[test] + fn test_float_to_str_precision() { + unsafe { + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_float_to_str(0.1 + 0.2, &mut out); + let slice = core::slice::from_raw_parts(out.ptr, out.len as usize); + let s = core::str::from_utf8(slice).unwrap(); + let parsed: f64 = s.parse().unwrap(); + assert_eq!(parsed, 0.1 + 0.2); + ryo_str_free(out.ptr, out.cap); + } + } + + #[test] + fn test_bool_to_str_true() { + unsafe { + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_bool_to_str(1, &mut out); + let slice = core::slice::from_raw_parts(out.ptr, out.len as usize); + assert_eq!(slice, b"true"); + ryo_str_free(out.ptr, out.cap); + } + } + + #[test] + fn test_bool_to_str_false() { + unsafe { + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_bool_to_str(0, &mut out); + let slice = core::slice::from_raw_parts(out.ptr, out.len as usize); + assert_eq!(slice, b"false"); + ryo_str_free(out.ptr, out.cap); + } + } + + #[test] + fn test_concat_static_left_heap_right() { + unsafe { + // Simulate: "Hello, " + heap_string + let left = b"Hello, "; + let left_fat = RyoStrFat { + ptr: left.as_ptr() as *mut u8, + len: 7, + cap: 0, // static + }; + + // Create a heap string for the right side + let mut right_fat = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + let right_data = b"World!"; + let right_ptr = ryo_str_alloc(6); + core::ptr::copy_nonoverlapping(right_data.as_ptr(), right_ptr, 6); + right_fat.ptr = right_ptr; + right_fat.len = 6; + right_fat.cap = 6; + + let mut out = RyoStrFat { + ptr: std::ptr::null_mut(), + len: 0, + cap: 0, + }; + ryo_str_concat( + left_fat.ptr, + left_fat.len, + right_fat.ptr, + right_fat.len, + &mut out, + ); + + assert_eq!(out.len, 13); + assert!(out.cap > 0); // heap-allocated result + let slice = core::slice::from_raw_parts(out.ptr, out.len as usize); + assert_eq!(slice, b"Hello, World!"); + + // Free: static left is safe (cap=0 → noop), heap right and result freed + ryo_str_free(left_fat.ptr, left_fat.cap); + ryo_str_free(right_fat.ptr, right_fat.cap); + ryo_str_free(out.ptr, out.cap); + } + } +} diff --git a/src/ast.rs b/src/ast.rs index b31027d..f9f2c62 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -337,6 +337,9 @@ impl Expression { ExprKind::BinaryOp(_, op, _) => format!("BinaryOp({})", op), ExprKind::UnaryOp(op, _) => format!("UnaryOp({})", op), ExprKind::Call(name, _) => format!("Call({})", pool.str(*name)), + ExprKind::MethodCall { method, .. } => { + format!("MethodCall(.{})", pool.str(*method)) + } }; println!( @@ -361,6 +364,14 @@ impl Expression { arg.pretty_print(&format!("{}{}", new_prefix, prefix_char), pool); } } + ExprKind::MethodCall { receiver, args, .. } => { + receiver.pretty_print(&format!("{}├── recv: ", new_prefix), pool); + for (i, arg) in args.iter().enumerate() { + let is_last = i == args.len() - 1; + let prefix_char = if is_last { "└── " } else { "├── " }; + arg.pretty_print(&format!("{}{}", new_prefix, prefix_char), pool); + } + } } } } @@ -372,6 +383,11 @@ pub enum ExprKind { BinaryOp(Box, BinaryOperator, Box), UnaryOp(UnaryOperator, Box), Call(StringId, Vec), + MethodCall { + receiver: Box, + method: StringId, + args: Vec, + }, } #[derive(Debug, Clone, Copy, PartialEq)] diff --git a/src/astgen.rs b/src/astgen.rs index 439f1a1..81d2c48 100644 --- a/src/astgen.rs +++ b/src/astgen.rs @@ -386,6 +386,15 @@ fn gen_expr(b: &mut UirBuilder, expr: &ast::Expression) -> InstRef { let arg_refs: Vec = args.iter().map(|a| gen_expr(b, a)).collect(); b.call(*name, &arg_refs, span) } + ast::ExprKind::MethodCall { + receiver, + method, + args, + } => { + let receiver_ref = gen_expr(b, receiver); + let arg_refs: Vec = args.iter().map(|a| gen_expr(b, a)).collect(); + b.method_call(receiver_ref, *method, &arg_refs, span) + } } } diff --git a/src/builtins.rs b/src/builtins.rs index c81a646..6588d47 100644 --- a/src/builtins.rs +++ b/src/builtins.rs @@ -10,6 +10,7 @@ pub struct BuiltinFunction { enum BuiltinReturn { Void, Never, + Str, } impl BuiltinFunction { @@ -17,6 +18,7 @@ impl BuiltinFunction { match self.return_ty { BuiltinReturn::Void => pool.void(), BuiltinReturn::Never => pool.never(), + BuiltinReturn::Str => pool.str_(), } } } @@ -34,6 +36,18 @@ pub const BUILTINS: &[BuiltinFunction] = &[ name: "panic", return_ty: BuiltinReturn::Never, }, + BuiltinFunction { + name: "int_to_str", + return_ty: BuiltinReturn::Str, + }, + BuiltinFunction { + name: "float_to_str", + return_ty: BuiltinReturn::Str, + }, + BuiltinFunction { + name: "bool_to_str", + return_ty: BuiltinReturn::Str, + }, ]; pub fn lookup(name: &str) -> Option<&'static BuiltinFunction> { diff --git a/src/codegen.rs b/src/codegen.rs index a51124c..c3801ae 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -15,7 +15,7 @@ //! instructions (e.g. `IAdd %3, %5` materializes `%3` and `%5` //! first). Cranelift always needs nested values; doing it //! through `TirRef` indexing is the point. -//! 2. The `eval_inst` memoization map (`HashMap`) +//! 2. The `eval_inst` memoization map (`HashMap`) //! so a shared sub-expression isn't re-emitted. TIR today is //! tree-shaped (one parent per inst) so this is purely //! defensive — but it's the right invariant before lazy sema @@ -25,7 +25,7 @@ use crate::ast::CompoundOp; use crate::tir::{Tir, TirData, TirRef, TirTag}; use crate::types::{InternPool, StringId, TypeId, TypeKind}; -use cranelift::codegen::ir::{BlockArg, FuncRef}; +use cranelift::codegen::ir::{ArgumentPurpose, BlockArg, FuncRef}; use cranelift::codegen::isa; use cranelift::codegen::settings::{self, Configurable}; use cranelift::prelude::*; @@ -35,16 +35,25 @@ use cranelift_object::{ObjectBuilder, ObjectModule}; use std::collections::HashMap; use target_lexicon::Triple; +/// Returns `true` if `ty` resolves to `Str` in the pool. +/// +/// Callers use this to gate multi-value (fat-pointer) paths before +/// reaching `cranelift_type_for`, which panics on `Str`. +fn is_str_type(ty: TypeId, pool: &InternPool) -> bool { + matches!(pool.kind(ty), TypeKind::Str) +} + /// Map a TIR type to the corresponding Cranelift IR type. /// /// `Int` uses the target's pointer-sized integer (i64 on 64-bit). /// `Bool` uses I8 (matches Cranelift's `icmp` result width and Rust's bool layout). -/// `Str` is represented as a pointer (pointer-sized integer). +/// `Str` is a fat pointer (ptr, len, cap) — it cannot map to a single type; +/// callers must gate with `is_str_type` before reaching this function. /// `Void` has no Cranelift representation and should not be mapped here. fn cranelift_type_for(ty: TypeId, pool: &InternPool, pointer_ty: types::Type) -> types::Type { match pool.kind(ty) { TypeKind::Int => pointer_ty, - TypeKind::Str => pointer_ty, + TypeKind::Str => panic!("cranelift_type_for: str is multi-value; use is_str_type gate"), TypeKind::Bool => types::I8, TypeKind::Float => types::F64, // Dead code after trap, but Cranelift needs a concrete type for every SSA value @@ -87,6 +96,29 @@ struct LoopContext { continue_target: Block, } +#[derive(Debug, Clone, Copy)] +enum ValueRepr { + Scalar(Value), + Str { ptr: Value, len: Value, cap: Value }, +} + +impl ValueRepr { + #[cfg(test)] + fn expect_scalar(self) -> Value { + match self { + ValueRepr::Scalar(v) => v, + ValueRepr::Str { .. } => panic!("expected Scalar, got Str"), + } + } +} + +#[derive(Clone)] +struct StrLocals { + ptr: Variable, + len: Variable, + cap: Variable, +} + /// Per-function emission state. Lives only for the duration of one /// `compile_function` call; reset between functions because /// Cranelift `Variable` ids and the `TirRef → Value` memo are both @@ -102,11 +134,15 @@ struct FunctionContext<'a, M: Module> { tir: &'a Tir, locals: HashMap, func_ids: &'a HashMap, - /// `TirRef → Value` memo. Materializing the same instruction + /// `TirRef → ValueRepr` memo. Materializing the same instruction /// twice in one function would either duplicate side effects /// (calls) or waste Cranelift IR; both are cheap-but-wrong. - inst_values: HashMap, + inst_values: HashMap, loop_stack: Vec, + str_locals: HashMap, + /// For str-returning functions: the hidden sret pointer (first block param) + /// through which the callee writes the (ptr, len, cap) triple. + sret_ptr: Option, } impl Codegen { @@ -163,9 +199,26 @@ impl Codegen { impl Codegen { pub fn new_jit() -> Result { - let jit_builder = JITBuilder::new(cranelift_module::default_libcall_names()) + let mut jit_builder = JITBuilder::new(cranelift_module::default_libcall_names()) .map_err(|e| format!("Failed to create JIT builder: {}", e))?; + // Register runtime symbols so the JIT can resolve them. + jit_builder.symbols([ + ( + "ryo_str_from_literal", + ryo_runtime::ryo_str_from_literal as *const u8, + ), + ("ryo_str_alloc", ryo_runtime::ryo_str_alloc as *const u8), + ("ryo_str_concat", ryo_runtime::ryo_str_concat as *const u8), + ("ryo_str_eq", ryo_runtime::ryo_str_eq as *const u8), + ("ryo_int_to_str", ryo_runtime::ryo_int_to_str as *const u8), + ( + "ryo_float_to_str", + ryo_runtime::ryo_float_to_str as *const u8, + ), + ("ryo_bool_to_str", ryo_runtime::ryo_bool_to_str as *const u8), + ]); + Ok(Self::from_module( JITModule::new(jit_builder), Triple::host(), @@ -296,8 +349,14 @@ impl Codegen { fn build_signature(&self, tir: &Tir, pool: &InternPool) -> Signature { let mut sig = self.module.make_signature(); for param in &tir.params { - let cl_ty = cranelift_type_for(param.ty, pool, self.int_type); - sig.params.push(AbiParam::new(cl_ty)); + if is_str_type(param.ty, pool) { + sig.params.push(AbiParam::new(self.int_type)); // ptr + sig.params.push(AbiParam::new(types::I64)); // len + sig.params.push(AbiParam::new(types::I64)); // cap + } else { + let cl_ty = cranelift_type_for(param.ty, pool, self.int_type); + sig.params.push(AbiParam::new(cl_ty)); + } } // C-ABI shim for `main`: Ryo's `fn main()` is void, but the // host C runtime (crt0 via zig cc, or our JIT trampoline) @@ -308,8 +367,16 @@ impl Codegen { if is_main { sig.returns.push(AbiParam::new(self.int_type)); } else if tir.return_type != pool.void() { - let cl_ty = cranelift_type_for(tir.return_type, pool, self.int_type); - sig.returns.push(AbiParam::new(cl_ty)); + if is_str_type(tir.return_type, pool) { + // sret: hidden pointer prepended to regular params, no IR-level return. + sig.params.insert( + 0, + AbiParam::special(self.int_type, ArgumentPurpose::StructReturn), + ); + } else { + let cl_ty = cranelift_type_for(tir.return_type, pool, self.int_type); + sig.returns.push(AbiParam::new(cl_ty)); + } } sig } @@ -336,12 +403,41 @@ impl Codegen { let int_type = self.int_type; let mut locals: HashMap = HashMap::new(); - for (i, param) in tir.params.iter().enumerate() { - let cl_ty = cranelift_type_for(param.ty, pool, int_type); - let var = builder.declare_var(cl_ty); - let param_val = builder.block_params(entry_block)[i]; - builder.def_var(var, param_val); - locals.insert(param.name, var); + let is_main = pool.str(tir.name) == "main"; + let returns_str = !is_main && is_str_type(tir.return_type, pool); + let mut block_idx: usize = if returns_str { 1 } else { 0 }; + let sret_ptr = if returns_str { + Some(builder.block_params(entry_block)[0]) + } else { + None + }; + + let mut str_param_locals: HashMap = HashMap::new(); + + for param in tir.params.iter() { + if is_str_type(param.ty, pool) { + let var_ptr = builder.declare_var(int_type); + let var_len = builder.declare_var(types::I64); + let var_cap = builder.declare_var(types::I64); + builder.def_var(var_ptr, builder.block_params(entry_block)[block_idx]); + builder.def_var(var_len, builder.block_params(entry_block)[block_idx + 1]); + builder.def_var(var_cap, builder.block_params(entry_block)[block_idx + 2]); + str_param_locals.insert( + param.name, + StrLocals { + ptr: var_ptr, + len: var_len, + cap: var_cap, + }, + ); + block_idx += 3; + } else { + let cl_ty = cranelift_type_for(param.ty, pool, int_type); + let var = builder.declare_var(cl_ty); + builder.def_var(var, builder.block_params(entry_block)[block_idx]); + locals.insert(param.name, var); + block_idx += 1; + } } let mut ctx: FunctionContext<'_, M> = FunctionContext { @@ -356,22 +452,21 @@ impl Codegen { func_ids, inst_values: HashMap::new(), loop_stack: Vec::new(), + str_locals: str_param_locals, + sret_ptr, }; let has_return = Self::emit_body(&mut builder, &mut ctx, &tir.body_stmts())?; if !has_return { - let is_main = pool.str(tir.name) == "main"; - if is_main || tir.return_type != pool.void() { - // `main` always returns int 0 to the OS even - // when Ryo declares it void; non-main - // non-void functions also fall through to a - // zero return today (sema accepts missing - // returns; control-flow analysis lands in M8b). + if is_main { let zero = builder.ins().iconst(int_type, 0); builder.ins().return_(&[zero]); - } else { + } else if returns_str || tir.return_type == pool.void() { builder.ins().return_(&[]); + } else { + let zero = builder.ins().iconst(int_type, 0); + builder.ins().return_(&[zero]); } } @@ -409,8 +504,10 @@ impl Codegen { stmts: &[TirRef], ) -> Result { let saved_locals = ctx.locals.clone(); + let saved_str_locals = ctx.str_locals.clone(); let block_terminated = Self::emit_body(builder, ctx, stmts)?; ctx.locals = saved_locals; + ctx.str_locals = saved_str_locals; Ok(block_terminated) } @@ -426,6 +523,29 @@ impl Codegen { match inst.tag { TirTag::VarDecl => { let view = ctx.tir.var_decl_view(r); + if is_str_type(inst.ty, ctx.pool) { + let repr = Self::eval_inst_str(builder, ctx, view.initializer)?; + match repr { + ValueRepr::Str { ptr, len, cap } => { + let var_ptr = builder.declare_var(ctx.int_type); + let var_len = builder.declare_var(types::I64); + let var_cap = builder.declare_var(types::I64); + builder.def_var(var_ptr, ptr); + builder.def_var(var_len, len); + builder.def_var(var_cap, cap); + ctx.str_locals.insert( + view.name, + StrLocals { + ptr: var_ptr, + len: var_len, + cap: var_cap, + }, + ); + } + _ => unreachable!("str-typed initializer should produce ValueRepr::Str"), + } + return Ok(false); + } let val = Self::eval_inst(builder, ctx, view.initializer)?; // The variable's resolved type lives in the VarDecl // inst's `ty` slot directly — no side-table lookup. @@ -440,8 +560,21 @@ impl Codegen { TirData::UnOp(o) => o, _ => unreachable!("Return must carry TirData::UnOp"), }; - let val = Self::eval_inst(builder, ctx, operand)?; - builder.ins().return_(&[val]); + if is_str_type(ctx.tir.return_type, ctx.pool) { + let sret = ctx.sret_ptr.expect("str-returning fn must have sret_ptr"); + let repr = Self::eval_inst_str(builder, ctx, operand)?; + let (ptr, len, cap) = match repr { + ValueRepr::Str { ptr, len, cap } => (ptr, len, cap), + _ => unreachable!("str return must produce ValueRepr::Str"), + }; + builder.ins().store(MemFlags::trusted(), ptr, sret, 0); + builder.ins().store(MemFlags::trusted(), len, sret, 8); + builder.ins().store(MemFlags::trusted(), cap, sret, 16); + builder.ins().return_(&[]); + } else { + let val = Self::eval_inst(builder, ctx, operand)?; + builder.ins().return_(&[val]); + } Ok(true) } TirTag::ReturnVoid => { @@ -467,6 +600,22 @@ impl Codegen { TirTag::IfStmt => Self::generate_if_stmt(builder, ctx, r), TirTag::Assign => { let view = ctx.tir.assign_view(r); + if is_str_type(inst.ty, ctx.pool) { + let repr = Self::eval_inst_str(builder, ctx, view.value)?; + let ValueRepr::Str { ptr, len, cap } = repr else { + unreachable!("str-typed assign should produce ValueRepr::Str"); + }; + let locals = ctx.str_locals.get(&view.name).ok_or_else(|| { + format!( + "Undefined string variable in assign: '{}'", + ctx.pool.str(view.name) + ) + })?; + builder.def_var(locals.ptr, ptr); + builder.def_var(locals.len, len); + builder.def_var(locals.cap, cap); + return Ok(false); + } let val = Self::eval_inst(builder, ctx, view.value)?; let var = ctx.locals.get(&view.name).ok_or_else(|| { format!( @@ -751,8 +900,14 @@ impl Codegen { ctx: &mut FunctionContext<'_, M>, r: TirRef, ) -> Result { - if let Some(&v) = ctx.inst_values.get(&r) { - return Ok(v); + if let Some(repr) = ctx.inst_values.get(&r) { + return Ok(match repr { + ValueRepr::Scalar(v) => *v, + // str-returning calls cache ValueRepr::Str; return the ptr + // component as the scalar stand-in (callers that need the + // full triple use eval_inst_str). + ValueRepr::Str { ptr, .. } => *ptr, + }); } let inst = ctx.tir.inst(r); let value = match inst.tag { @@ -769,7 +924,16 @@ impl Codegen { _ => unreachable!("FloatConst must carry TirData::Float"), }, TirTag::StrConst => match inst.data { - TirData::Str(id) => emit_str_literal(builder, ctx, id)?, + TirData::Str(id) => { + // Returns the raw .rodata pointer — used by __ryo_panic + // which takes (ptr, len) scalars. For fat-pointer str + // materialisation, callers use eval_inst_str instead. + let content = ctx.pool.str(id); + let data_id = + store_string(id, content, ctx.module, ctx.data_ctx, ctx.string_data)?; + let data_ref = ctx.module.declare_data_in_func(data_id, builder.func); + builder.ins().global_value(ctx.int_type, data_ref) + } _ => unreachable!("StrConst must carry TirData::Str"), }, TirTag::Var => match inst.data { @@ -918,6 +1082,53 @@ impl Codegen { Self::generate_if_stmt(builder, ctx, r)?; builder.ins().iconst(ctx.int_type, 0) } + TirTag::StrLen => { + let operand = match inst.data { + TirData::UnOp(r) => r, + _ => unreachable!("StrLen must carry TirData::UnOp"), + }; + let repr = Self::eval_inst_str(builder, ctx, operand)?; + match repr { + ValueRepr::Str { len, .. } => len, + _ => unreachable!("StrLen operand must produce ValueRepr::Str"), + } + } + TirTag::StrCmpEq | TirTag::StrCmpNe => { + let (lhs, rhs) = match inst.data { + TirData::BinOp { lhs, rhs } => (lhs, rhs), + _ => unreachable!(), + }; + let l_repr = Self::eval_inst_str(builder, ctx, lhs)?; + let r_repr = Self::eval_inst_str(builder, ctx, rhs)?; + let (l_ptr, l_len) = match l_repr { + ValueRepr::Str { ptr, len, .. } => (ptr, len), + _ => unreachable!(), + }; + let (r_ptr, r_len) = match r_repr { + ValueRepr::Str { ptr, len, .. } => (ptr, len), + _ => unreachable!(), + }; + + let eq_ref = Self::declare_runtime_fn( + ctx.module, + builder, + "ryo_str_eq", + &[ctx.int_type, types::I64, ctx.int_type, types::I64], + &[types::I8], + )?; + let call = builder.ins().call(eq_ref, &[l_ptr, l_len, r_ptr, r_len]); + let result = builder.inst_results(call)[0]; + + if inst.tag == TirTag::StrCmpNe { + let one = builder.ins().iconst(types::I8, 1); + builder.ins().bxor(result, one) + } else { + result + } + } + TirTag::StrConcat => { + return Err("StrConcat must be materialized through eval_inst_str".to_string()); + } TirTag::Unreachable => { return Err( "codegen reached an Unreachable TIR inst — sema must have errored".to_string(), @@ -931,10 +1142,229 @@ impl Codegen { )); } }; - ctx.inst_values.insert(r, value); + // Don't overwrite if emit_call already cached a Str repr (sret convention). + ctx.inst_values.entry(r).or_insert(ValueRepr::Scalar(value)); Ok(value) } + /// Declare an external runtime function by name and return a + /// `FuncRef` usable in the current function being built. + fn declare_runtime_fn( + module: &mut M, + builder: &mut FunctionBuilder, + name: &str, + params: &[types::Type], + returns: &[types::Type], + ) -> Result { + let mut sig = module.make_signature(); + for &p in params { + sig.params.push(AbiParam::new(p)); + } + for &r in returns { + sig.returns.push(AbiParam::new(r)); + } + let func_id = module + .declare_function(name, Linkage::Import, &sig) + .map_err(|e| format!("Failed to declare {}: {}", name, e))?; + Ok(module.declare_func_in_func(func_id, builder.func)) + } + + /// Materialize a str-typed TIR instruction, returning a + /// `ValueRepr::Str` triple. Falls back to scalar `eval_inst` + /// for non-str instructions. + fn eval_inst_str( + builder: &mut FunctionBuilder, + ctx: &mut FunctionContext<'_, M>, + r: TirRef, + ) -> Result { + if let Some(repr) = ctx.inst_values.get(&r) { + return Ok(*repr); + } + let inst = ctx.tir.inst(r); + let repr = match inst.tag { + TirTag::StrConst => { + let id = match inst.data { + TirData::Str(id) => id, + _ => unreachable!(), + }; + Self::emit_str_literal_fat(builder, ctx, id)? + } + TirTag::Var => { + let name = match inst.data { + TirData::Var(name) => name, + _ => unreachable!(), + }; + if let Some(locals) = ctx.str_locals.get(&name) { + ValueRepr::Str { + ptr: builder.use_var(locals.ptr), + len: builder.use_var(locals.len), + cap: builder.use_var(locals.cap), + } + } else { + // Not a str local — fall through to scalar + let val = Self::eval_inst(builder, ctx, r)?; + return Ok(ValueRepr::Scalar(val)); + } + } + TirTag::Call => { + let view = ctx.tir.call_view(r); + let name_str = ctx.pool.str(view.name); + if name_str == "int_to_str" + || name_str == "float_to_str" + || name_str == "bool_to_str" + { + let arg_val = Self::eval_inst(builder, ctx, view.args[0])?; + + let slot = builder.create_sized_stack_slot(StackSlotData::new( + StackSlotKind::ExplicitSlot, + 24, + 3, + )); + let out_ptr = builder.ins().stack_addr(ctx.int_type, slot, 0); + + let (fn_name, param_ty) = match name_str { + "int_to_str" => ("ryo_int_to_str", ctx.int_type), + "float_to_str" => ("ryo_float_to_str", types::F64), + "bool_to_str" => ("ryo_bool_to_str", types::I8), + _ => unreachable!(), + }; + + let func_ref = Self::declare_runtime_fn( + ctx.module, + builder, + fn_name, + &[param_ty, ctx.int_type], + &[], + )?; + builder.ins().call(func_ref, &[arg_val, out_ptr]); + + let ptr = builder + .ins() + .load(ctx.int_type, MemFlags::trusted(), out_ptr, 0); + let len = builder + .ins() + .load(types::I64, MemFlags::trusted(), out_ptr, 8); + let cap = builder + .ins() + .load(types::I64, MemFlags::trusted(), out_ptr, 16); + + ValueRepr::Str { ptr, len, cap } + } else { + // User call — eval_inst triggers emit_call which handles + // sret for str-returning calls and caches ValueRepr::Str. + Self::eval_inst(builder, ctx, r)?; + if let Some(repr) = ctx.inst_values.get(&r) { + return Ok(*repr); + } + unreachable!("str-returning user call must cache ValueRepr::Str via emit_call"); + } + } + TirTag::StrConcat => { + let (lhs, rhs) = match inst.data { + TirData::BinOp { lhs, rhs } => (lhs, rhs), + _ => unreachable!(), + }; + let l_repr = Self::eval_inst_str(builder, ctx, lhs)?; + let r_repr = Self::eval_inst_str(builder, ctx, rhs)?; + let (l_ptr, l_len) = match l_repr { + ValueRepr::Str { ptr, len, .. } => (ptr, len), + _ => unreachable!(), + }; + let (r_ptr, r_len) = match r_repr { + ValueRepr::Str { ptr, len, .. } => (ptr, len), + _ => unreachable!(), + }; + + let slot = builder.create_sized_stack_slot(StackSlotData::new( + StackSlotKind::ExplicitSlot, + 24, + 3, + )); + let out_ptr = builder.ins().stack_addr(ctx.int_type, slot, 0); + + let concat_ref = Self::declare_runtime_fn( + ctx.module, + builder, + "ryo_str_concat", + &[ + ctx.int_type, + types::I64, + ctx.int_type, + types::I64, + ctx.int_type, + ], + &[], + )?; + builder + .ins() + .call(concat_ref, &[l_ptr, l_len, r_ptr, r_len, out_ptr]); + + let ptr = builder + .ins() + .load(ctx.int_type, MemFlags::trusted(), out_ptr, 0); + let len = builder + .ins() + .load(types::I64, MemFlags::trusted(), out_ptr, 8); + let cap = builder + .ins() + .load(types::I64, MemFlags::trusted(), out_ptr, 16); + + ValueRepr::Str { ptr, len, cap } + } + _ => { + // Delegate to scalar eval_inst for non-str instructions + let val = Self::eval_inst(builder, ctx, r)?; + return Ok(ValueRepr::Scalar(val)); + } + }; + ctx.inst_values.insert(r, repr); + Ok(repr) + } + + /// Emit a string literal as a fat pointer triple (ptr, len, cap) + /// by calling `ryo_str_from_literal` at runtime. + fn emit_str_literal_fat( + builder: &mut FunctionBuilder, + ctx: &mut FunctionContext<'_, M>, + id: StringId, + ) -> Result { + let content = ctx.pool.str(id); + let data_id = store_string(id, content, ctx.module, ctx.data_ctx, ctx.string_data)?; + let data_ref = ctx.module.declare_data_in_func(data_id, builder.func); + let rodata_ptr = builder.ins().global_value(ctx.int_type, data_ref); + let lit_len = builder.ins().iconst(types::I64, content.len() as i64); + + // Allocate 24-byte stack slot for out parameter (8-byte aligned) + let slot = + builder.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 24, 3)); + let out_ptr = builder.ins().stack_addr(ctx.int_type, slot, 0); + + // Call ryo_str_from_literal(data, len, out) + let from_literal_ref = Self::declare_runtime_fn( + ctx.module, + builder, + "ryo_str_from_literal", + &[ctx.int_type, types::I64, ctx.int_type], + &[], + )?; + builder + .ins() + .call(from_literal_ref, &[rodata_ptr, lit_len, out_ptr]); + + // Load the triple back from the stack slot + let ptr = builder + .ins() + .load(ctx.int_type, MemFlags::trusted(), out_ptr, 0); + let len = builder + .ins() + .load(types::I64, MemFlags::trusted(), out_ptr, 8); + let cap = builder + .ins() + .load(types::I64, MemFlags::trusted(), out_ptr, 16); + + Ok(ValueRepr::Str { ptr, len, cap }) + } + fn emit_call( builder: &mut FunctionBuilder, ctx: &mut FunctionContext<'_, M>, @@ -944,34 +1374,97 @@ impl Codegen { let name_id = view.name; let name_str = ctx.pool.str(name_id); - // print is the only builtin with custom codegen (inline syscall). - // __ryo_panic and user functions go through the normal call path. + // print and __ryo_panic have custom codegen (inline syscall / raw scalar ABI). + // They do NOT use the str-triple expansion that user functions use. + if name_str == "__ryo_panic" { + // __ryo_panic(ptr, len) takes raw scalars — the StrConst .rodata + // pointer and an int len — NOT the str-triple ABI. + let mut arg_values = Vec::with_capacity(view.args.len()); + for arg in &view.args { + arg_values.push(Self::eval_inst(builder, ctx, *arg)?); + } + let callee_id = *ctx + .func_ids + .get(&name_id) + .ok_or_else(|| format!("Undefined function: '{}'", name_str))?; + let callee_ref = ctx.module.declare_func_in_func(callee_id, builder.func); + builder.ins().call(callee_ref, &arg_values); + builder.ins().trap(TrapCode::user(1).unwrap()); + let dead = builder.create_block(); + builder.seal_block(dead); + builder.switch_to_block(dead); + return Ok(builder.ins().iconst(types::I8, 0)); + } + if name_str == "print" { Self::generate_print_call(builder, ctx, &view.args)?; return Ok(builder.ins().iconst(ctx.int_type, 0)); } + // Formatter builtins — when called as a bare statement (result + // discarded), we still emit the call but throw away the output. + // The primary path is eval_inst_str (used when result is assigned + // to a str variable or passed to print). + if name_str == "int_to_str" || name_str == "float_to_str" || name_str == "bool_to_str" { + let arg_val = Self::eval_inst(builder, ctx, view.args[0])?; + + let slot = builder.create_sized_stack_slot(StackSlotData::new( + StackSlotKind::ExplicitSlot, + 24, + 3, + )); + let out_ptr = builder.ins().stack_addr(ctx.int_type, slot, 0); + + let (fn_name, param_ty) = match name_str { + "int_to_str" => ("ryo_int_to_str", ctx.int_type), + "float_to_str" => ("ryo_float_to_str", types::F64), + "bool_to_str" => ("ryo_bool_to_str", types::I8), + _ => unreachable!(), + }; + + let func_ref = Self::declare_runtime_fn( + ctx.module, + builder, + fn_name, + &[param_ty, ctx.int_type], + &[], + )?; + builder.ins().call(func_ref, &[arg_val, out_ptr]); + + return Ok(builder.ins().iconst(ctx.int_type, 0)); + } + let callee_id = *ctx .func_ids .get(&name_id) .ok_or_else(|| format!("Undefined function: '{}'", name_str))?; - let mut arg_values = Vec::with_capacity(view.args.len()); + let mut arg_values = Vec::with_capacity(view.args.len() * 3 + 1); for arg in &view.args { - arg_values.push(Self::eval_inst(builder, ctx, *arg)?); + let arg_ty = ctx.tir.inst(*arg).ty; + if is_str_type(arg_ty, ctx.pool) { + let repr = Self::eval_inst_str(builder, ctx, *arg)?; + match repr { + ValueRepr::Str { ptr, len, cap } => { + arg_values.push(ptr); + arg_values.push(len); + arg_values.push(cap); + } + _ => unreachable!("str-typed arg must produce ValueRepr::Str"), + } + } else { + arg_values.push(Self::eval_inst(builder, ctx, *arg)?); + } } let callee_ref = ctx.module.declare_func_in_func(callee_id, builder.func); - let call = builder.ins().call(callee_ref, &arg_values); - let results = builder.inst_results(call); + + let ret_ty = ctx.tir.inst(r).ty; // If the callee returns never (e.g. __ryo_panic), the call is // a terminator. Emit a trap + dead block for subsequent IR. - // The dead block needs no explicit terminator — compile_function's - // fallthrough `return 0` provides one. Cranelift verifier is - // happy as long as every block has exactly one terminator. - let ret_ty = ctx.tir.inst(r).ty; if ctx.pool.is_never(ret_ty) { + builder.ins().call(callee_ref, &arg_values); builder.ins().trap(TrapCode::user(1).unwrap()); let dead = builder.create_block(); builder.seal_block(dead); @@ -980,6 +1473,33 @@ impl Codegen { return Ok(builder.ins().iconst(dummy_ty, 0)); } + if is_str_type(ret_ty, ctx.pool) { + // sret: allocate 24-byte slot, prepend pointer to args + let slot = builder.create_sized_stack_slot(StackSlotData::new( + StackSlotKind::ExplicitSlot, + 24, + 3, + )); + let out = builder.ins().stack_addr(ctx.int_type, slot, 0); + + let mut all_args = Vec::with_capacity(arg_values.len() + 1); + all_args.push(out); + all_args.extend(arg_values); + + builder.ins().call(callee_ref, &all_args); + + let ptr = builder + .ins() + .load(ctx.int_type, MemFlags::trusted(), out, 0); + let len = builder.ins().load(types::I64, MemFlags::trusted(), out, 8); + let cap = builder.ins().load(types::I64, MemFlags::trusted(), out, 16); + ctx.inst_values.insert(r, ValueRepr::Str { ptr, len, cap }); + return Ok(ptr); // dummy scalar — consumers use eval_inst_str + } + + let call = builder.ins().call(callee_ref, &arg_values); + let results = builder.inst_results(call); + if results.is_empty() { Ok(builder.ins().iconst(ctx.int_type, 0)) } else { @@ -992,40 +1512,22 @@ impl Codegen { ctx: &mut FunctionContext<'_, M>, args: &[TirRef], ) -> Result<(), String> { - // Sema has already validated arity and the string-literal - // constraint (see `sema::check_builtin_call`). The matches - // below are therefore infallible. debug_assert_eq!(args.len(), 1, "sema should reject print() arity errors"); - let string_id = match ctx.tir.inst(args[0]).data { - TirData::Str(id) => id, - other => unreachable!( - "sema should reject non-literal print() args, got {:?}", - other - ), - }; - let string_content = ctx.pool.str(string_id); - - let data_id = store_string( - string_id, - string_content, - ctx.module, - ctx.data_ctx, - ctx.string_data, - )?; - let data_ref = ctx.module.declare_data_in_func(data_id, builder.func); - let string_ptr = builder.ins().global_value(ctx.int_type, data_ref); + debug_assert!( + is_str_type(ctx.tir.inst(args[0]).ty, ctx.pool), + "sema should reject non-str print() args", + ); - let string_len = builder - .ins() - .iconst(ctx.int_type, string_content.len() as i64); - let fd = builder.ins().iconst(types::I32, 1); + let repr = Self::eval_inst_str(builder, ctx, args[0])?; + let (ptr, len) = match repr { + ValueRepr::Str { ptr, len, .. } => (ptr, len), + _ => unreachable!("str-typed arg produced Scalar"), + }; check_platform_support(ctx.triple)?; - + let fd = builder.ins().iconst(types::I32, 1); let write_ref = declare_write(ctx.module, builder, ctx.int_type)?; - let call_inst = builder.ins().call(write_ref, &[fd, string_ptr, string_len]); - let _bytes_written = builder.inst_results(call_inst)[0]; - + builder.ins().call(write_ref, &[fd, ptr, len]); Ok(()) } } @@ -1113,21 +1615,6 @@ fn declare_write( Ok(module.declare_func_in_func(func_id, builder.func)) } -/// Materialize a string literal pointer into the function. Pulled -/// out of the `Codegen` impl so it can be called without juggling -/// `&mut self` borrows alongside the `FunctionContext`'s mutable -/// references to the same fields. -fn emit_str_literal( - builder: &mut FunctionBuilder, - ctx: &mut FunctionContext<'_, M>, - id: StringId, -) -> Result { - let content = ctx.pool.str(id); - let data_id = store_string(id, content, ctx.module, ctx.data_ctx, ctx.string_data)?; - let data_ref = ctx.module.declare_data_in_func(data_id, builder.func); - Ok(builder.ins().global_value(ctx.int_type, data_ref)) -} - fn store_string( content_id: StringId, content: &str, @@ -1171,3 +1658,43 @@ fn no_unreachable_in(tirs: &[Tir]) -> bool { } true } + +#[cfg(test)] +mod tests { + use super::*; + use cranelift::codegen::ir::Value as ClifValue; + + #[test] + fn value_repr_scalar_roundtrip() { + let v = ClifValue::from_u32(1); + let repr = ValueRepr::Scalar(v); + assert_eq!(repr.expect_scalar(), v); + } + + #[test] + fn value_repr_str_fields() { + let repr = ValueRepr::Str { + ptr: ClifValue::from_u32(1), + len: ClifValue::from_u32(2), + cap: ClifValue::from_u32(3), + }; + match repr { + ValueRepr::Str { ptr, len, cap } => { + assert_ne!(ptr, len); + assert_ne!(len, cap); + } + _ => panic!("expected Str"), + } + } + + #[test] + #[should_panic(expected = "expected Scalar, got Str")] + fn value_repr_expect_scalar_panics_on_str() { + let repr = ValueRepr::Str { + ptr: ClifValue::from_u32(1), + len: ClifValue::from_u32(2), + cap: ClifValue::from_u32(3), + }; + repr.expect_scalar(); + } +} diff --git a/src/lexer.rs b/src/lexer.rs index 4934264..c2ea72f 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -90,6 +90,7 @@ pub enum Token { LBrace, RBrace, Comma, + Dot, // Newline + indentation tokens (post-processed by `indent`). Newline, @@ -153,6 +154,7 @@ impl fmt::Display for Token { Self::LBrace => write!(f, "{{"), Self::RBrace => write!(f, "}}"), Self::Comma => write!(f, ","), + Self::Dot => write!(f, "."), Self::Newline => write!(f, ""), Self::Indent => write!(f, ""), Self::Dedent => write!(f, ""), @@ -268,6 +270,8 @@ pub(crate) enum RawToken<'a> { RBrace, #[token(",")] Comma, + #[token(".")] + Dot, #[regex(r"\n[ \t]*")] Newline(&'a str), @@ -458,6 +462,7 @@ fn intern_token(raw: RawToken<'_>, span: Span, pool: &mut InternPool) -> Result< RawToken::LBrace => Token::LBrace, RawToken::RBrace => Token::RBrace, RawToken::Comma => Token::Comma, + RawToken::Dot => Token::Dot, RawToken::Newline(_) => Token::Newline, RawToken::Indent => Token::Indent, diff --git a/src/linker.rs b/src/linker.rs index e1a3548..f9d4627 100644 --- a/src/linker.rs +++ b/src/linker.rs @@ -1,12 +1,29 @@ use crate::errors::CompilerError; use crate::toolchain; +use std::ffi::OsStr; +use std::path::Path; use std::process::Command; -pub(crate) fn link_executable(obj_file: &str, exe_file: &str) -> Result<(), CompilerError> { +pub(crate) fn link_executable( + obj_file: &str, + exe_file: &str, + runtime_lib: &Path, +) -> Result<(), CompilerError> { let zig_path = toolchain::ensure_zig()?; - let output = Command::new(&zig_path) - .args(["cc", "-o", exe_file, obj_file]) + let mut cmd = Command::new(&zig_path); + cmd.args(["cc", "-o", exe_file, obj_file]); + cmd.arg(runtime_lib.as_os_str()); + + // Rust's staticlib bundles precompiled std objects that reference + // _Unwind_* symbols even with panic=abort (from backtrace support). + // On macOS the system libunwind satisfies them; on Linux we must + // explicitly link zig's bundled libunwind. + if cfg!(target_os = "linux") { + cmd.arg(OsStr::new("-lunwind")); + } + + let output = cmd .output() .map_err(|e| CompilerError::LinkError(format!("Failed to run zig cc: {e}")))?; diff --git a/src/main.rs b/src/main.rs index e27c8ee..5e1a413 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,6 +12,8 @@ mod lexer; mod linker; mod parser; mod pipeline; +#[allow(dead_code)] +mod runtime_lib; mod sema; mod tir; mod toolchain; diff --git a/src/parser.rs b/src/parser.rs index bb739de..08bb6ec 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -406,28 +406,57 @@ where let ident_expr = select! { Token::Ident(name) => name } .map_with(|name, e| Expression::new(ExprKind::Ident(name), e.span())); - let parenthesized = expr.delimited_by(just(Token::LParen), just(Token::RParen)); + let parenthesized = expr + .clone() + .delimited_by(just(Token::LParen), just(Token::RParen)); call.or(ident_expr).or(literal).or(parenthesized) }; + let postfix = atom + .foldl( + just(Token::Dot) + .ignore_then(select! { Token::Ident(name) => name }) + .then( + expr.clone() + .separated_by(just(Token::Comma)) + .allow_trailing() + .collect::>() + .delimited_by(just(Token::LParen), just(Token::RParen)), + ) + .map_with(|(method, args), e| (method, args, e.span())) + .repeated(), + |receiver, (method, args, span): (_, _, SimpleSpan)| { + let start = receiver.span.start; + let end = span.end; + Expression::new( + ExprKind::MethodCall { + receiver: Box::new(receiver), + method, + args, + }, + SimpleSpan::new((), start..end), + ) + }, + ) + .boxed(); + let unary_op = choice(( just(Token::Sub).to(UnaryOperator::Neg), just(Token::Not).to(UnaryOperator::Not), )); - let unary = - unary_op - .repeated() - .collect::>() - .then(atom) - .map_with(|(ops, expr), e| { - let mut result = expr; - for op in ops.into_iter().rev() { - result = Expression::new(ExprKind::UnaryOp(op, Box::new(result)), e.span()); - } - result - }); + let unary = unary_op + .repeated() + .collect::>() + .then(postfix) + .map_with(|(ops, expr), e| { + let mut result = expr; + for op in ops.into_iter().rev() { + result = Expression::new(ExprKind::UnaryOp(op, Box::new(result)), e.span()); + } + result + }); let term = unary.clone().foldl( choice(( diff --git a/src/pipeline.rs b/src/pipeline.rs index b3f27ea..110f692 100644 --- a/src/pipeline.rs +++ b/src/pipeline.rs @@ -7,6 +7,7 @@ use crate::errors::CompilerError; use crate::lexer::{self, Token}; use crate::linker; use crate::parser::program_parser; +use crate::runtime_lib; use crate::sema; use crate::tir::{self, Tir}; use crate::types::InternPool; @@ -457,7 +458,13 @@ pub(crate) fn build_file(file: &Path) -> Result<(), CompilerError> { fs::write(&obj_filename, obj_bytes).map_err(CompilerError::from)?; println!("Generated object file: {}", obj_filename); - linker::link_executable(&obj_filename, &exe_filename)?; + // Extract embedded runtime archive and link + let runtime_path = runtime_lib::extract_runtime_to_temp() + .map_err(|e| CompilerError::LinkError(format!("Failed to extract runtime: {e}")))?; + + linker::link_executable(&obj_filename, &exe_filename, &runtime_path)?; + + runtime_lib::cleanup_runtime_temp(&runtime_path); let _ = fs::remove_file(&obj_filename); println!("Built: {}", exe_filename); diff --git a/src/runtime_lib.rs b/src/runtime_lib.rs new file mode 100644 index 0000000..f328e53 --- /dev/null +++ b/src/runtime_lib.rs @@ -0,0 +1,40 @@ +use std::fs; +use std::io; +use std::path::PathBuf; + +const RYO_RUNTIME_LIB: &[u8] = include_bytes!(env!("RYO_RUNTIME_LIB")); + +fn cache_dir() -> Result { + dirs::home_dir() + .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "cannot determine home directory")) + .map(|h| h.join(".ryo").join("cache")) +} + +fn content_hash() -> String { + env!("RYO_RUNTIME_HASH")[..16].to_string() +} + +pub fn extract_runtime_to_temp() -> Result { + let dir = cache_dir()?; + let hash = content_hash(); + let path = dir.join(format!("libryo_runtime-{}.a", hash)); + + if path.exists() { + return Ok(path); + } + + fs::create_dir_all(&dir)?; + // Write to a temp name and rename for atomicity + let tmp_path = dir.join(format!( + "libryo_runtime-{}.a.tmp.{}", + hash, + std::process::id() + )); + fs::write(&tmp_path, RYO_RUNTIME_LIB)?; + fs::rename(&tmp_path, &path)?; + Ok(path) +} + +pub fn cleanup_runtime_temp(_path: &std::path::Path) { + // Cached — no cleanup needed. The file persists for future builds. +} diff --git a/src/sema.rs b/src/sema.rs index c416603..6ad4edc 100644 --- a/src/sema.rs +++ b/src/sema.rs @@ -48,7 +48,7 @@ use crate::ast::CompoundOp; use crate::builtins; use crate::diag::{Diag, DiagCode, DiagSink}; -use crate::tir::{Tir, TirBuilder, TirParam, TirRef, TirTag}; +use crate::tir::{Tir, TirBuilder, TirData, TirParam, TirRef, TirTag}; use crate::types::{InternPool, StringId, TypeId, TypeKind}; use crate::uir::{CallView, FuncBody, InstData, InstRef, InstTag, Span, Uir, VarDeclView}; use std::collections::{HashMap, VecDeque}; @@ -965,6 +965,74 @@ fn analyze_expr(sema: &mut Sema<'_>, fcx: &mut FuncCtx, scope: &Scope, r: InstRe } check_call(sema, fcx, &view, &arg_tirs, span) } + InstTag::MethodCall => { + let view = sema.uir.method_call_view(r); + let receiver_tir = analyze_expr(sema, fcx, scope, view.receiver); + let receiver_ty = fcx.builder.ty_of(receiver_tir); + let method_name = sema.pool.str(view.name).to_string(); + + for &arg in &view.args { + analyze_expr(sema, fcx, scope, arg); + } + + // For now, only str has methods + if sema.pool.kind(receiver_ty) != TypeKind::Str { + if !sema.pool.is_error(receiver_ty) { + sema.sink.emit(Diag::error( + span, + DiagCode::TypeMismatch, + format!("type '{}' has no methods", sema.pool.display(receiver_ty)), + )); + } + return fcx.builder.unreachable(sema.pool.error_type(), span); + } + + match method_name.as_str() { + "len" => { + if !view.args.is_empty() { + sema.sink.emit(Diag::error( + span, + DiagCode::ArityMismatch, + "str.len() takes no arguments".to_string(), + )); + return fcx.builder.unreachable(sema.pool.error_type(), span); + } + fcx.builder.push_typed( + TirTag::StrLen, + TirData::UnOp(receiver_tir), + sema.pool.int(), + span, + ) + } + "is_empty" => { + if !view.args.is_empty() { + sema.sink.emit(Diag::error( + span, + DiagCode::ArityMismatch, + "str.is_empty() takes no arguments".to_string(), + )); + return fcx.builder.unreachable(sema.pool.error_type(), span); + } + let len_tir = fcx.builder.push_typed( + TirTag::StrLen, + TirData::UnOp(receiver_tir), + sema.pool.int(), + span, + ); + let zero = fcx.builder.int_const(0, sema.pool.int(), span); + fcx.builder + .binary(TirTag::ICmpEq, sema.pool.bool_(), len_tir, zero, span) + } + _ => { + sema.sink.emit(Diag::error( + span, + DiagCode::UndefinedFunction, + format!("str has no method '{}'", method_name), + )); + fcx.builder.unreachable(sema.pool.error_type(), span) + } + } + } other => panic!( "analyze_expr: instruction at %{} is not an expression (tag={:?})", r.index(), @@ -1061,15 +1129,13 @@ fn check_binary_op( } TypeKind::Error => fcx.builder.unreachable(sema.pool.error_type(), span), TypeKind::Str => { - sema.sink.emit(Diag::error( - span, - DiagCode::UnsupportedOperator, - format!( - "equality operator '{}' not supported for type 'str' (yet)", - bin_op_symbol(tag), - ), - )); - fcx.builder.unreachable(sema.pool.error_type(), span) + let tir_tag = match tag { + InstTag::Eq => TirTag::StrCmpEq, + InstTag::NotEq => TirTag::StrCmpNe, + _ => unreachable!(), + }; + fcx.builder + .binary(tir_tag, sema.pool.bool_(), lhs, rhs, span) } TypeKind::Void | TypeKind::Never | TypeKind::Tuple => { sema.sink.emit(Diag::error( @@ -1176,6 +1242,21 @@ fn check_binary_op( fcx.builder .binary(tir_tag, sema.pool.float(), lhs, rhs, span) } + TypeKind::Str => { + if tag != InstTag::Add { + sema.sink.emit(Diag::error( + span, + DiagCode::UnsupportedOperator, + format!( + "arithmetic operator '{}' not supported for type 'str'", + bin_op_symbol(tag), + ), + )); + return fcx.builder.unreachable(sema.pool.error_type(), span); + } + fcx.builder + .binary(TirTag::StrConcat, sema.pool.str_(), lhs, rhs, span) + } TypeKind::Error => fcx.builder.unreachable(sema.pool.error_type(), span), _ => { sema.sink.emit(Diag::error( @@ -1301,7 +1382,7 @@ fn emit_builtin_call( let name = sema.pool.str(view.name); match name { "print" => { - if !check_print_args(sema, view, span) { + if !check_print_args(sema, fcx, view, arg_tirs, span) { return fcx.builder.unreachable(sema.pool.error_type(), span); } let ret_ty = builtin.return_type(sema.pool); @@ -1309,6 +1390,96 @@ fn emit_builtin_call( } "panic" => emit_panic(sema, fcx, view, span), "assert" => emit_assert(sema, fcx, view, arg_tirs, span), + "int_to_str" => { + if view.args.len() != 1 { + sema.sink.emit(Diag::error( + span, + DiagCode::ArityMismatch, + format!( + "int_to_str() takes exactly 1 argument, got {}", + view.args.len() + ), + )); + return fcx.builder.unreachable(sema.pool.error_type(), span); + } + let arg_ty = fcx.builder.ty_of(arg_tirs[0]); + if sema.pool.is_error(arg_ty) { + return fcx.builder.unreachable(sema.pool.error_type(), span); + } + if !matches!(sema.pool.kind(arg_ty), TypeKind::Int) { + sema.sink.emit(Diag::error( + sema.uir.span(view.args[0]), + DiagCode::TypeMismatch, + format!( + "int_to_str() argument must be int, got {}", + sema.pool.display(arg_ty) + ), + )); + return fcx.builder.unreachable(sema.pool.error_type(), span); + } + let ret_ty = builtin.return_type(sema.pool); + fcx.builder.call(view.name, arg_tirs, ret_ty, span) + } + "float_to_str" => { + if view.args.len() != 1 { + sema.sink.emit(Diag::error( + span, + DiagCode::ArityMismatch, + format!( + "float_to_str() takes exactly 1 argument, got {}", + view.args.len() + ), + )); + return fcx.builder.unreachable(sema.pool.error_type(), span); + } + let arg_ty = fcx.builder.ty_of(arg_tirs[0]); + if sema.pool.is_error(arg_ty) { + return fcx.builder.unreachable(sema.pool.error_type(), span); + } + if !matches!(sema.pool.kind(arg_ty), TypeKind::Float) { + sema.sink.emit(Diag::error( + sema.uir.span(view.args[0]), + DiagCode::TypeMismatch, + format!( + "float_to_str() argument must be float, got {}", + sema.pool.display(arg_ty) + ), + )); + return fcx.builder.unreachable(sema.pool.error_type(), span); + } + let ret_ty = builtin.return_type(sema.pool); + fcx.builder.call(view.name, arg_tirs, ret_ty, span) + } + "bool_to_str" => { + if view.args.len() != 1 { + sema.sink.emit(Diag::error( + span, + DiagCode::ArityMismatch, + format!( + "bool_to_str() takes exactly 1 argument, got {}", + view.args.len() + ), + )); + return fcx.builder.unreachable(sema.pool.error_type(), span); + } + let arg_ty = fcx.builder.ty_of(arg_tirs[0]); + if sema.pool.is_error(arg_ty) { + return fcx.builder.unreachable(sema.pool.error_type(), span); + } + if !matches!(sema.pool.kind(arg_ty), TypeKind::Bool) { + sema.sink.emit(Diag::error( + sema.uir.span(view.args[0]), + DiagCode::TypeMismatch, + format!( + "bool_to_str() argument must be bool, got {}", + sema.pool.display(arg_ty) + ), + )); + return fcx.builder.unreachable(sema.pool.error_type(), span); + } + let ret_ty = builtin.return_type(sema.pool); + fcx.builder.call(view.name, arg_tirs, ret_ty, span) + } _ => { let ret_ty = builtin.return_type(sema.pool); fcx.builder.call(view.name, arg_tirs, ret_ty, span) @@ -1428,7 +1599,13 @@ fn build_panic_call( .call(panic_name, &[str_ref, len_ref], sema.pool.never(), span) } -fn check_print_args(sema: &mut Sema<'_>, view: &CallView, span: Span) -> bool { +fn check_print_args( + sema: &mut Sema<'_>, + fcx: &FuncCtx, + view: &CallView, + arg_tirs: &[TirRef], + span: Span, +) -> bool { if view.args.len() != 1 { sema.sink.emit(Diag::error( span, @@ -1437,11 +1614,18 @@ fn check_print_args(sema: &mut Sema<'_>, view: &CallView, span: Span) -> bool { )); return false; } - if !matches!(sema.uir.inst(view.args[0]).tag, InstTag::StrLiteral) { + let arg_ty = fcx.builder.ty_of(arg_tirs[0]); + if sema.pool.is_error(arg_ty) { + return false; + } + if !matches!(sema.pool.kind(arg_ty), TypeKind::Str) { sema.sink.emit(Diag::error( sema.uir.span(view.args[0]), - DiagCode::BuiltinArgKind, - "print() argument must be a string literal", + DiagCode::TypeMismatch, + format!( + "print() argument must be str, got {}", + sema.pool.display(arg_ty) + ), )); return false; } @@ -1723,9 +1907,12 @@ mod tests { } #[test] - fn string_equality_rejected() { - let diags = run("x = \"a\" == \"b\"").unwrap_err(); - assert!(any_code(&diags, DiagCode::UnsupportedOperator)); + fn string_equality_accepted() { + let (tirs, _pool) = run("x = \"a\" == \"b\"").unwrap(); + // The equality produces a bool-typed StrCmpEq instruction. + let body = &tirs[0]; + let has_str_eq = body.instructions.iter().any(|i| i.tag == TirTag::StrCmpEq); + assert!(has_str_eq, "expected StrCmpEq in TIR"); } #[test] @@ -1750,12 +1937,6 @@ mod tests { assert!(matches!(main.inst(v.initializer).data, TirData::Bool(true))); } - #[test] - fn print_with_non_literal_rejected_in_sema() { - let diags = run("x = \"hi\"\nprint(x)").unwrap_err(); - assert!(any_code(&diags, DiagCode::BuiltinArgKind)); - } - #[test] fn print_arity_rejected_in_sema() { let diags = run("print(\"a\", \"b\")").unwrap_err(); diff --git a/src/tir.rs b/src/tir.rs index 7c2bd8d..e7e4696 100644 --- a/src/tir.rs +++ b/src/tir.rs @@ -135,6 +135,16 @@ pub enum TirTag { ICmpGt, ICmpGe, + // String concatenation. + StrConcat, + + // String equality. + StrCmpEq, + StrCmpNe, + + /// Read the `len` field of a str fat pointer. Operand in `TirData::UnOp`. + StrLen, + // Float arithmetic / comparison. FAdd, FSub, @@ -474,6 +484,9 @@ impl TirBuilder { | TirTag::ICmpLe | TirTag::ICmpGt | TirTag::ICmpGe + | TirTag::StrConcat + | TirTag::StrCmpEq + | TirTag::StrCmpNe | TirTag::FAdd | TirTag::FSub | TirTag::FMul @@ -498,6 +511,13 @@ impl TirBuilder { self.push(TirTag::Unreachable, ty, TirData::None, span) } + /// General-purpose instruction emit for tags that don't fit the + /// `unary` / `binary` debug-assert gates. Sema uses this for + /// method-call lowerings like `StrLen`. + pub fn push_typed(&mut self, tag: TirTag, data: TirData, ty: TypeId, span: Span) -> TirRef { + self.push(tag, ty, data, span) + } + fn extra_offset(&self) -> u32 { u32::try_from(self.extra.len()).expect("TIR extra arena exceeded u32::MAX words") } @@ -1083,6 +1103,9 @@ fn bin_op_name(t: TirTag) -> &'static str { TirTag::FCmpLe => "fcmp_le", TirTag::FCmpGt => "fcmp_gt", TirTag::FCmpGe => "fcmp_ge", + TirTag::StrConcat => "str_concat", + TirTag::StrCmpEq => "str_eq", + TirTag::StrCmpNe => "str_ne", TirTag::BoolAnd => "bool_and", TirTag::BoolOr => "bool_or", _ => "?bin", @@ -1095,6 +1118,7 @@ fn un_op_name(t: TirTag) -> &'static str { TirTag::BoolNot => "bool_not", TirTag::Return => "ret", TirTag::ExprStmt => "expr_stmt", + TirTag::StrLen => "str_len", _ => "?un", } } diff --git a/src/uir.rs b/src/uir.rs index 869192f..5664eec 100644 --- a/src/uir.rs +++ b/src/uir.rs @@ -200,6 +200,9 @@ pub enum InstTag { /// `continue` statement. Continue, + + /// Method call (e.g. `receiver.name(args)`). Variable payload in `extra` — see [`method_call_extra`]. + MethodCall, // Reserved for the comptime milestone: // ComptimeBlock, Decl. } @@ -378,6 +381,21 @@ pub mod compound_assign_extra { pub const LEN: usize = 3; } +/// Layout in `extra` for [`InstTag::MethodCall`]: +/// +/// ```text +/// [0] receiver: InstRef.raw() +/// [1] name: StringId.raw() +/// [2] argc: u32 +/// [3..3+argc] args: InstRef.raw() +/// ``` +pub mod method_call_extra { + pub const RECEIVER: usize = 0; + pub const NAME: usize = 1; + pub const ARGC: usize = 2; + pub const ARGS: usize = 3; +} + /// Layout in `extra` for [`InstTag::WhileLoop`]: /// /// ```text @@ -710,6 +728,29 @@ impl UirBuilder { pub fn continue_stmt(&mut self, span: Span) -> InstRef { self.push(InstTag::Continue, InstData::None, span) } + + /// Emits a `MethodCall` with receiver, name, and arg list packed into `extra`. + pub fn method_call( + &mut self, + receiver: InstRef, + name: StringId, + args: &[InstRef], + span: Span, + ) -> InstRef { + let offset = self.extra_offset(); + self.uir.extra.push(receiver.raw()); + self.uir.extra.push(name.raw()); + self.uir.extra.push(Self::len_u32(args.len())); + for arg in args { + self.uir.extra.push(arg.raw()); + } + let len = Self::len_u32(method_call_extra::ARGS + args.len()); + self.push( + InstTag::MethodCall, + InstData::Extra(ExtraRange { offset, len }), + span, + ) + } } // ---------- Read-side helpers ---------- @@ -752,6 +793,13 @@ pub struct ForRangeView { pub body: Vec, } +/// Decoded view of an [`InstTag::MethodCall`] payload. +pub struct MethodCallView { + pub receiver: InstRef, + pub name: StringId, + pub args: Vec, +} + pub struct ElifView { pub cond: InstRef, pub body: Vec, @@ -876,6 +924,29 @@ impl Uir { } } + pub fn method_call_view(&self, r: InstRef) -> MethodCallView { + let inst = self.inst(r); + debug_assert!(matches!(inst.tag, InstTag::MethodCall)); + let range = match inst.data { + InstData::Extra(rng) => rng, + _ => unreachable!("MethodCall must carry InstData::Extra"), + }; + let slice = &self.extra[range.as_range()]; + let receiver = InstRef::from_raw(slice[method_call_extra::RECEIVER]); + let name = StringId::from_raw(slice[method_call_extra::NAME]); + let argc = slice[method_call_extra::ARGC] as usize; + let args = slice[method_call_extra::ARGS..method_call_extra::ARGS + argc] + .iter() + .copied() + .map(InstRef::from_raw) + .collect(); + MethodCallView { + receiver, + name, + args, + } + } + pub fn if_stmt_view(&self, r: InstRef) -> IfStmtView { let inst = self.inst(r); debug_assert!(matches!(inst.tag, InstTag::IfStmt)); @@ -1104,6 +1175,22 @@ fn write_inst( body_refs.join(", ") ) } + (InstTag::MethodCall, InstData::Extra(_)) => { + let view = uir.method_call_view(r); + write!( + f, + "method_call %{}.{}(", + view.receiver.index(), + pool.str(view.name) + )?; + for (i, a) in view.args.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "%{}", a.index())?; + } + writeln!(f, ")") + } (InstTag::Break, InstData::None) => writeln!(f, "break"), (InstTag::Continue, InstData::None) => writeln!(f, "continue"), (tag, data) => writeln!(f, "", tag, data), diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 30da4b4..07cf35f 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -1904,3 +1904,378 @@ fn test_for_body_return() { String::from_utf8_lossy(&output.stderr) ); } + +#[test] +fn test_str_variable_print() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = "fn main():\n\tname: str = \"Hello\"\n\tprint(name)\n"; + let test_file = create_test_file(temp_dir.path(), "str_var_print.ryo", code); + + let output = run_ryo_command(&["run", "str_var_print.ryo"], &test_file) + .expect("Failed to run ryo command"); + + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("Hello"), + "Output should contain 'Hello', got: {}", + stdout + ); + assert!(stdout.contains("[Result] => 0"), "Should exit with code 0"); +} + +#[test] +fn test_str_concat() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = + "fn main():\n\ta: str = \"Hello, \"\n\tb: str = \"World!\"\n\tc: str = a + b\n\tprint(c)\n"; + let test_file = create_test_file(temp_dir.path(), "str_concat.ryo", code); + + let output = + run_ryo_command(&["run", "str_concat.ryo"], &test_file).expect("Failed to run ryo command"); + + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("Hello, World!"), + "Output should contain 'Hello, World!', got: {}", + stdout + ); + assert!(stdout.contains("[Result] => 0"), "Should exit with code 0"); +} + +#[test] +fn test_str_concat_chained() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = "fn main():\n\tresult: str = \"a\" + \"b\" + \"c\"\n\tprint(result)\n"; + let test_file = create_test_file(temp_dir.path(), "str_concat_chained.ryo", code); + + let output = run_ryo_command(&["run", "str_concat_chained.ryo"], &test_file) + .expect("Failed to run ryo command"); + + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("abc"), + "Output should contain 'abc', got: {}", + stdout + ); + assert!(stdout.contains("[Result] => 0"), "Should exit with code 0"); +} + +#[test] +fn test_str_equality() { + let code = "fn main():\n\ta: str = \"hello\"\n\tb: str = \"hello\"\n\tassert(a == b, \"equal strings should be equal\")\n"; + assert_ryo_runs!("str_equality.ryo", code); +} + +#[test] +fn test_str_inequality() { + let code = "fn main():\n\ta: str = \"hello\"\n\tb: str = \"world\"\n\tassert(a != b, \"different strings should not be equal\")\n"; + assert_ryo_runs!("str_inequality.ryo", code); +} + +#[test] +fn test_int_to_str_builtin() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = "fn main():\n\ts: str = int_to_str(42)\n\tprint(s)\n"; + let test_file = create_test_file(temp_dir.path(), "int_to_str.ryo", code); + + let output = + run_ryo_command(&["run", "int_to_str.ryo"], &test_file).expect("Failed to run ryo command"); + + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("42"), + "Output should contain '42', got: {}", + stdout + ); +} + +#[test] +fn test_float_to_str_builtin() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = "fn main():\n\ts: str = float_to_str(2.75)\n\tprint(s)\n"; + let test_file = create_test_file(temp_dir.path(), "float_to_str.ryo", code); + + let output = run_ryo_command(&["run", "float_to_str.ryo"], &test_file) + .expect("Failed to run ryo command"); + + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("2.75"), + "Output should contain '2.75', got: {}", + stdout + ); +} + +#[test] +fn test_float_to_str_large_number() { + let dir = tempfile::tempdir().unwrap(); + // 18000000000000000000.0 is a large number (1.8e19) + let src = create_test_file( + dir.path(), + "large_float.ryo", + "fn main():\n\tprint(float_to_str(18000000000000000000.0))\n", + ); + let output = run_ryo_command(&["run", "large_float.ryo"], &src); + let output = output.unwrap(); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + let stdout = String::from_utf8_lossy(&output.stdout); + // Extract the float value: it's after "[Codegen]" and before "[Result]" + let after_codegen = stdout.split("[Codegen]").nth(1).unwrap(); + let float_str = after_codegen.split("[Result]").next().unwrap().trim(); + let parsed: f64 = float_str.parse().unwrap(); + assert_eq!(parsed, 1.8e19); +} + +#[test] +fn test_float_to_str_small_decimal() { + let dir = tempfile::tempdir().unwrap(); + let src = create_test_file( + dir.path(), + "small_float.ryo", + "fn main():\n\tprint(float_to_str(0.1))\n", + ); + let output = run_ryo_command(&["run", "small_float.ryo"], &src); + let output = output.unwrap(); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + let stdout = String::from_utf8_lossy(&output.stdout); + // Extract the float value: it's after "[Codegen]" and before "[Result]" + let after_codegen = stdout.split("[Codegen]").nth(1).unwrap(); + let float_str = after_codegen.split("[Result]").next().unwrap().trim(); + let parsed: f64 = float_str.parse().unwrap(); + assert_eq!(parsed, 0.1); +} + +#[test] +fn test_bool_to_str_builtin() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = "fn main():\n\ts: str = bool_to_str(true)\n\tprint(s)\n"; + let test_file = create_test_file(temp_dir.path(), "bool_to_str.ryo", code); + + let output = run_ryo_command(&["run", "bool_to_str.ryo"], &test_file) + .expect("Failed to run ryo command"); + + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("true"), + "Output should contain 'true', got: {}", + stdout + ); +} + +// ---- str.len() and str.is_empty() method calls ---- + +#[test] +fn test_str_len() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = "s: str = \"hello\"\nassert(s.len() == 5, \"len should be 5\")"; + let test_file = create_test_file(temp_dir.path(), "str_len.ryo", code); + let output = run_ryo_command(&["run", "str_len.ryo"], &test_file).expect("Failed to run"); + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn test_str_is_empty() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = "s: str = \"\"\nassert(s.is_empty(), \"empty string should be empty\")"; + let test_file = create_test_file(temp_dir.path(), "str_empty.ryo", code); + let output = run_ryo_command(&["run", "str_empty.ryo"], &test_file).expect("Failed to run"); + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn test_str_is_empty_false() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = + "s: str = \"hi\"\nassert(not s.is_empty(), \"non-empty string should not be empty\")"; + let test_file = create_test_file(temp_dir.path(), "str_not_empty.ryo", code); + let output = run_ryo_command(&["run", "str_not_empty.ryo"], &test_file).expect("Failed to run"); + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn test_str_len_concat() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = "s: str = \"ab\" + \"cde\"\nassert(s.len() == 5, \"concat len should be 5\")"; + let test_file = create_test_file(temp_dir.path(), "str_len_concat.ryo", code); + let output = + run_ryo_command(&["run", "str_len_concat.ryo"], &test_file).expect("Failed to run"); + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn test_str_empty_concat_left() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = + "s: str = \"\" + \"hello\"\nassert(s.len() == 5, \"empty + hello should have len 5\")"; + let test_file = create_test_file(temp_dir.path(), "empty_left.ryo", code); + let output = run_ryo_command(&["run", "empty_left.ryo"], &test_file).expect("Failed"); + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn test_str_empty_concat_both() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = "s: str = \"\" + \"\"\nassert(s.is_empty(), \"empty + empty should be empty\")"; + let test_file = create_test_file(temp_dir.path(), "empty_both.ryo", code); + let output = run_ryo_command(&["run", "empty_both.ryo"], &test_file).expect("Failed"); + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn test_str_empty_equality() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = + "a: str = \"\"\nb: str = \"\"\nassert(a == b, \"two empty strings should be equal\")"; + let test_file = create_test_file(temp_dir.path(), "empty_eq.ryo", code); + let output = run_ryo_command(&["run", "empty_eq.ryo"], &test_file).expect("Failed"); + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn test_str_concat_with_to_str() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = "n: int = 42\ns: str = \"value = \" + int_to_str(n)\nprint(s)"; + let test_file = create_test_file(temp_dir.path(), "concat_int.ryo", code); + let output = run_ryo_command(&["run", "concat_int.ryo"], &test_file).expect("Failed"); + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn test_str_empty_len_zero() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = "s: str = \"\"\nassert(s.len() == 0, \"empty string len should be 0\")"; + let test_file = create_test_file(temp_dir.path(), "empty_len.ryo", code); + let output = run_ryo_command(&["run", "empty_len.ryo"], &test_file).expect("Failed"); + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn test_str_passed_to_function() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = "fn greet(name: str):\n\tprint(name)\n\ngreet(\"Alice\")"; + let test_file = create_test_file(temp_dir.path(), "str_param.ryo", code); + let output = run_ryo_command(&["run", "str_param.ryo"], &test_file).expect("Failed"); + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn test_str_returned_from_function() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = + "fn make_greeting() -> str:\n\treturn \"Hello!\"\n\ns: str = make_greeting()\nprint(s)"; + let test_file = create_test_file(temp_dir.path(), "str_return.ryo", code); + let output = run_ryo_command(&["run", "str_return.ryo"], &test_file).expect("Failed"); + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn test_str_shadowed_by_int_assignment_does_not_panic() { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + let code = "fn main():\n\tmut s: str = \"hello\"\n\tif true:\n\t\tmut s: int = 1\n\t\ts = 2\n\t\tprint(int_to_str(s))\n\tprint(s)\n"; + let test_file = create_test_file(temp_dir.path(), "str_shadow.ryo", code); + let output = run_ryo_command(&["run", "str_shadow.ryo"], &test_file).expect("Failed"); + assert!( + output.status.success(), + "STDERR: {}", + String::from_utf8_lossy(&output.stderr) + ); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("2"), + "Output should contain '2', got: {}", + stdout + ); + assert!( + stdout.contains("hello"), + "Output should contain 'hello', got: {}", + stdout + ); +}