Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions src/ast.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
use crate::lexer::word_builder::{QuotingContext, WordSpanKind};

/// Source span representing a byte range in the original input.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Span {
pub start: usize,
pub end: usize,
}

/// Opaque span metadata attached to `Word` and `CondTerm` nodes.
///
/// Records where each expansion starts/ends in the raw token text.
/// External consumers see this type through the variant field but cannot
/// read or construct it — the fields are crate-private.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WordSpan {
pub(crate) start: usize,
pub(crate) end: usize,
pub(crate) kind: WordSpanKind,
pub(crate) context: QuotingContext,
}

impl Span {
/// Creates a new span with the given byte offsets.
pub const fn new(start: usize, end: usize) -> Self {
Expand Down Expand Up @@ -75,7 +90,7 @@ pub enum NodeKind {
Word {
value: String,
parts: Vec<Node>,
spans: Vec<crate::lexer::word_builder::WordSpan>,
spans: Vec<WordSpan>,
},

/// A literal text segment within a word's parts list.
Expand Down Expand Up @@ -344,10 +359,7 @@ pub enum NodeKind {
CondParen { inner: Box<Node> },

/// A term (word) in a conditional expression.
CondTerm {
value: String,
spans: Vec<crate::lexer::word_builder::WordSpan>,
},
CondTerm { value: String, spans: Vec<WordSpan> },

// -- Other --
/// Pipeline negation with `!`.
Expand Down
15 changes: 6 additions & 9 deletions src/lexer/heredoc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use super::Lexer;
/// `\X` quoting. Returns the normalized delimiter and a flag indicating
/// whether any quoting was present (bash uses this to decide whether to
/// expand the body).
pub fn parse_heredoc_delimiter(raw: &str) -> (String, bool) {
pub(crate) fn parse_heredoc_delimiter(raw: &str) -> (String, bool) {
let mut result = String::new();
let mut quoted = false;
let mut chars = raw.chars();
Expand Down Expand Up @@ -42,11 +42,9 @@ pub fn parse_heredoc_delimiter(raw: &str) -> (String, bool) {

/// Pending here-document to be read after the current line.
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct PendingHereDoc {
pub delimiter: String,
pub strip_tabs: bool,
pub quoted: bool,
pub(crate) struct PendingHereDoc {
pub(crate) delimiter: String,
pub(crate) strip_tabs: bool,
}

/// One heredoc body line in normalized form (delimiter-match-ready) plus
Expand All @@ -63,11 +61,10 @@ struct ReadLine {

impl Lexer {
/// Queues a here-document to be read after the next newline.
pub fn queue_heredoc(&mut self, delimiter: String, strip_tabs: bool, quoted: bool) {
pub(crate) fn queue_heredoc(&mut self, delimiter: String, strip_tabs: bool) {
self.pending_heredocs.push(PendingHereDoc {
delimiter,
strip_tabs,
quoted,
});
}

Expand Down Expand Up @@ -231,7 +228,7 @@ impl Lexer {
}

/// Takes the next completed here-doc content, if any.
pub fn take_heredoc_content(&mut self) -> Option<String> {
pub(crate) fn take_heredoc_content(&mut self) -> Option<String> {
if self.heredoc_contents.is_empty() {
None
} else {
Expand Down
56 changes: 29 additions & 27 deletions src/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
#![allow(clippy::redundant_pub_crate)]

use std::rc::Rc;

use crate::error::{RableError, Result};
use crate::token::{Token, TokenType};

mod brace_expansion;
mod expansions;
pub mod heredoc;
pub(crate) mod heredoc;
mod operators;
mod quotes;
pub mod word_builder;
pub(super) mod word_builder;
mod words;

#[cfg(test)]
mod tests;

pub use heredoc::PendingHereDoc;
pub(crate) use heredoc::PendingHereDoc;

/// Immutable lexer configuration set at construction time.
#[derive(Debug, Clone, Copy)]
Expand All @@ -26,18 +28,18 @@ struct LexerConfig {
/// Mutable context flags the parser uses to inform the lexer.
/// Private — the parser interacts via methods on `Lexer`.
#[derive(Debug, Clone)]
pub struct LexerContext {
pub(crate) struct LexerContext {
/// At command start position — eligible to begin a new simple command
/// or to accept an `AssignmentWord`.
pub command_start: bool,
pub(crate) command_start: bool,
/// Reserved-word recognition is enabled. Distinct from `command_start`:
/// after a simple command has consumed one or more `AssignmentWord`s,
/// subsequent words must NOT be classified as reserved words, even
/// though we are still at command-word position. Re-armed whenever
/// `command_start` is re-armed (separators, newlines, etc.).
pub reserved_words_ok: bool,
pub(crate) reserved_words_ok: bool,
/// Inside a `[[ ]]` conditional expression.
pub cond_expr: bool,
pub(crate) cond_expr: bool,
}

impl Default for LexerContext {
Expand All @@ -51,17 +53,17 @@ impl Default for LexerContext {
}

/// Hand-written context-sensitive lexer for bash.
pub struct Lexer {
pub(crate) struct Lexer {
input: Rc<[char]>,
pos: usize,
line: usize,
peeked: Option<Token>,
config: LexerConfig,
pub(crate) ctx: LexerContext,
/// Pending here-documents to be read after the next newline.
pub pending_heredocs: Vec<PendingHereDoc>,
pub(crate) pending_heredocs: Vec<PendingHereDoc>,
/// Completed here-document contents (filled after newline).
pub heredoc_contents: Vec<String>,
pub(crate) heredoc_contents: Vec<String>,
/// End position (char index) of the most recently consumed token.
last_token_end: usize,
/// Which nested construct, if any, this lexer is a fork of.
Expand All @@ -82,7 +84,7 @@ pub struct Lexer {
/// `pending_heredocs`, and `heredoc_contents` are deliberately omitted
/// because the arithmetic parse does not observe newlines or heredocs.
#[derive(Debug, Clone)]
pub struct LexerCheckpoint {
pub(crate) struct LexerCheckpoint {
pos: usize,
line: usize,
peeked: Option<Token>,
Expand All @@ -91,7 +93,7 @@ pub struct LexerCheckpoint {

/// Which nested construct the lexer is parsing, if any.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LexerMode {
pub(crate) enum LexerMode {
/// Top-level lexer, or a fork that is not inside a nested construct.
Normal,
/// Fork running inside a `$(...)` command substitution. Makes
Expand All @@ -115,18 +117,18 @@ impl Lexer {
/// Signal that the next word position is a command start. Also
/// re-arms reserved-word recognition — a fresh command is always
/// allowed to begin with a reserved word.
pub const fn set_command_start(&mut self) {
pub(crate) const fn set_command_start(&mut self) {
self.ctx.command_start = true;
self.ctx.reserved_words_ok = true;
}

/// Signal entering a `[[ ]]` conditional expression context.
pub const fn enter_cond_expr(&mut self) {
pub(crate) const fn enter_cond_expr(&mut self) {
self.ctx.cond_expr = true;
}

/// Signal leaving a `[[ ]]` conditional expression context.
pub const fn leave_cond_expr(&mut self) {
pub(crate) const fn leave_cond_expr(&mut self) {
self.ctx.cond_expr = false;
}

Expand All @@ -137,7 +139,7 @@ impl Lexer {
self.mode = mode;
}

pub fn checkpoint(&self) -> LexerCheckpoint {
pub(crate) fn checkpoint(&self) -> LexerCheckpoint {
LexerCheckpoint {
pos: self.pos,
line: self.line,
Expand All @@ -146,7 +148,7 @@ impl Lexer {
}
}

pub fn restore(&mut self, cp: LexerCheckpoint) {
pub(crate) fn restore(&mut self, cp: LexerCheckpoint) {
self.pos = cp.pos;
self.line = cp.line;
self.peeked = cp.peeked;
Expand All @@ -155,7 +157,7 @@ impl Lexer {
}

impl Lexer {
pub fn new(source: &str, extglob: bool) -> Self {
pub(crate) fn new(source: &str, extglob: bool) -> Self {
Self {
input: source.chars().collect::<Vec<_>>().into(),
pos: 0,
Expand Down Expand Up @@ -214,36 +216,36 @@ impl Lexer {
}

/// Returns the current position.
pub const fn pos(&self) -> usize {
pub(crate) const fn pos(&self) -> usize {
self.pos
}

/// Returns the end position (char index) of the most recently consumed token.
pub const fn last_token_end(&self) -> usize {
pub(crate) const fn last_token_end(&self) -> usize {
self.last_token_end
}

/// Returns the current line number.
pub const fn line(&self) -> usize {
pub(crate) const fn line(&self) -> usize {
self.line
}

/// Returns the total input length.
pub fn input_len(&self) -> usize {
pub(crate) fn input_len(&self) -> usize {
self.input.len()
}

/// Returns the character right after the current position (after peeked token).
/// Used to detect `((` — the first `(` is peeked as `LeftParen`,
/// and we check if the next raw character is also `(`.
pub fn char_after_peeked(&self) -> Option<char> {
pub(crate) fn char_after_peeked(&self) -> Option<char> {
// The peeked token consumed `(` at self.pos-1 (or wherever)
// We need to check the char at the current pos
self.input.get(self.pos).copied()
}

/// Returns true if at end of input.
pub fn at_end(&self) -> bool {
pub(crate) fn at_end(&self) -> bool {
self.pos >= self.input.len()
}

Expand Down Expand Up @@ -307,7 +309,7 @@ impl Lexer {
/// # Errors
///
/// Returns `RableError` on unterminated quotes or unexpected input.
pub fn next_token(&mut self) -> Result<Token> {
pub(crate) fn next_token(&mut self) -> Result<Token> {
let tok = if let Some(tok) = self.peeked.take() {
tok
} else {
Expand All @@ -322,7 +324,7 @@ impl Lexer {
/// # Errors
///
/// Returns `RableError` on unterminated quotes or unexpected input.
pub fn peek_token(&mut self) -> Result<&Token> {
pub(crate) fn peek_token(&mut self) -> Result<&Token> {
if self.peeked.is_none() {
let tok = self.read_token()?;
self.peeked = Some(tok);
Expand Down Expand Up @@ -411,7 +413,7 @@ impl Lexer {
/// # Errors
///
/// Returns `RableError` if `))` is not found.
pub fn read_until_double_paren(&mut self) -> Result<String> {
pub(crate) fn read_until_double_paren(&mut self) -> Result<String> {
// Clear any peeked token since we're reading raw
self.peeked = None;
let mut result = String::new();
Expand Down
Loading
Loading