Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions src/lexer/backtick_opaque_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
//! Tests for the opaque-backtick fallback (issue #38).
//!
//! When `parse_backtick_body` rejects a backtick body, `read_backtick_inner`
//! falls back to a raw byte-level scan for the closing `` ` ``, matching
//! bash's lexing rule that a backtick body is a single word token whose
//! errors (if any) are runtime concerns, not parse-time.

use super::Lexer;
use crate::error::RableError;
use crate::token::TokenType;

#[allow(clippy::unwrap_used)]
fn collect_tokens(source: &str) -> Vec<(TokenType, String)> {
let mut lexer = Lexer::new(source, false);
let mut tokens = Vec::new();
loop {
let tok = lexer.next_token().unwrap();
if tok.kind == TokenType::Eof {
break;
}
tokens.push((tok.kind, tok.value));
}
tokens
}

#[test]
fn invalid_body_becomes_opaque_word() {
// `else echo` — fork fails because `else` at command start is a
// reserved word and cannot begin a simple command. The fallback
// scanner must emit the whole backtick as one Word token.
let tokens = collect_tokens("`else echo`");
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0].0, TokenType::Word);
assert_eq!(tokens[0].1, "`else echo`");
}

#[test]
fn escape_does_not_terminate() {
// Inside an opaque backtick body, `\<x>` consumes two bytes,
// so an escaped `` ` `` does not falsely terminate the body.
let tokens = collect_tokens("`else \\`then\\` echo`");
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0].0, TokenType::Word);
assert_eq!(tokens[0].1, "`else \\`then\\` echo`");
}

#[test]
fn literal_newline_escape_consumes_two_bytes() {
// `\n` inside an opaque body is literal backslash-then-n, not a
// newline. The scanner's two-byte escape rule must consume both
// without touching the line counter.
let tokens = collect_tokens("`else a\\nb`");
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0].0, TokenType::Word);
assert_eq!(tokens[0].1, "`else a\\nb`");
}

#[test]
fn trailing_backslash_at_eof_surfaces_error() {
// A lone trailing `\` with no following byte must still produce
// a MatchedPair error — the inner `if let` is a no-op, the outer
// loop sees EOF, and the scanner reports unterminated backtick.
let mut lexer = Lexer::new("`else\\", false);
assert!(matches!(
lexer.next_token(),
Err(RableError::MatchedPair { .. }),
));
}

#[test]
fn unterminated_body_surfaces_error() {
// Invalid body with no closing backtick must surface a
// MatchedPair error rather than silently consuming input.
let mut lexer = Lexer::new("`else echo", false);
assert!(matches!(
lexer.next_token(),
Err(RableError::MatchedPair { .. }),
));
}

#[test]
#[allow(clippy::unwrap_used)]
fn newlines_in_body_advance_line_counter() {
// Newlines inside an opaque backtick body must advance the
// line counter so subsequent tokens report the correct line.
let mut lexer = Lexer::new("`else\necho\n`\nok", false);
let bt = lexer.next_token().unwrap();
assert_eq!(bt.kind, TokenType::Word);
assert_eq!(bt.value, "`else\necho\n`");
let nl = lexer.next_token().unwrap();
assert_eq!(nl.kind, TokenType::Newline);
let ok = lexer.next_token().unwrap();
assert_eq!(ok.value, "ok");
assert_eq!(ok.line, 4);
}
39 changes: 38 additions & 1 deletion src/lexer/expansions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -419,14 +419,51 @@ impl Lexer {
fn read_backtick_inner(&mut self, wb: &mut WordBuilder) -> Result<()> {
let body_start = self.pos;
let outer_depth = self.parser_depth();
let (end_pos, end_line) = crate::parser::parse_backtick_body(self, outer_depth)?;
let (end_pos, end_line) = match crate::parser::parse_backtick_body(self, outer_depth) {
Ok(r) => r,
Err(_) => self.scan_backtick_opaque(body_start)?,
};
wb.value
.extend(self.input[body_start..end_pos].iter().copied());
self.pos = end_pos;
self.line = end_line;
Ok(())
}

/// Raw scan for the closing backtick, used as a fallback when
/// `parse_backtick_body` rejects the body. Bash treats a backtick
/// body as a single word token at the initial lexing stage — errors
/// inside are runtime, not parse, concerns. Issue #38.
///
/// Only recognizes `\<x>` as a two-byte escape (so an escaped
/// `` ` `` does not terminate). Returns `(end_pos, end_line)` with
/// `end_pos` one past the closing backtick; errors with
/// `MatchedPair` when EOF is reached first.
fn scan_backtick_opaque(&self, body_start: usize) -> Result<(usize, usize)> {
let mut pos = body_start;
let mut line = self.line;
while let Some(c) = self.input.get(pos).copied() {
match c {
'\\' => {
pos += 1;
if let Some(next) = self.input.get(pos).copied() {
if next == '\n' {
line += 1;
}
pos += 1;
}
}
'`' => return Ok((pos + 1, line)),
'\n' => {
line += 1;
pos += 1;
}
_ => pos += 1,
}
}
Err(RableError::matched_pair("unterminated backtick", pos, line))
}

/// Reads deprecated `$[...]` arithmetic with bracket depth tracking.
fn read_deprecated_arith(&mut self, wb: &mut WordBuilder) -> Result<()> {
let mut depth = 1;
Expand Down
2 changes: 2 additions & 0 deletions src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ mod quotes;
pub(super) mod word_builder;
mod words;

#[cfg(test)]
mod backtick_opaque_tests;
#[cfg(test)]
mod tests;

Expand Down
9 changes: 2 additions & 7 deletions tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,8 @@ const KNOWN_ORACLE_FAILURES: &[&str] = &[
// #37 — reserved words as plain words: cases 1, 3, 4 fixed by #44;
// case 5 fixed as a side effect of #35; case 2 fixed by #42
// (`((` → nested subshell fallback).
// #38 — backticks opaque on invalid content
"backtick_opaque 1",
"backtick_opaque 2",
"backtick_opaque 3",
"backtick_opaque 4",
"backtick_opaque 5",
"backtick_opaque 6",
// #38 — backticks opaque on invalid content: all 6 cases fixed via
// `scan_backtick_opaque` fallback in `read_backtick_inner`.
// #39 — heredoc inside $(...); both cases fixed: case 1 fell out
// of #40's `<<'EOF'` fix, case 2 via the sloppy-delimiter path
// in `read_heredoc_body` + `reformat_bash` using Cmdsub mode.
Expand Down
Loading