diff --git a/src/parser/conditionals.rs b/src/parser/conditionals.rs index 635cd9f..b111014 100644 --- a/src/parser/conditionals.rs +++ b/src/parser/conditionals.rs @@ -127,40 +127,57 @@ impl Parser { Ok(left) } - #[allow(clippy::too_many_lines)] fn parse_cond_primary(&mut self) -> Result { let start = self.peek_pos()?; - let tok = self.lexer.peek_token()?; + let kind = self.lexer.peek_token()?.kind; - // Handle ! (negation) — Parable drops it in S-expression output, - // but we keep it in the AST so the reformatter can preserve it. - if tok.kind == TokenType::Bang { - self.lexer.next_token()?; - let inner = self.parse_cond_primary()?; - return Ok(self.spanned( - start, - NodeKind::CondNot { - operand: Box::new(inner), - }, - )); + if let Some(node) = self.try_parse_cond_negation(start, kind)? { + return Ok(node); } - - // Handle ( grouped expression ) - if tok.kind == TokenType::LeftParen { - self.lexer.next_token()?; - let inner = self.parse_cond_or()?; - self.expect(TokenType::RightParen)?; - return Ok(self.spanned( - start, - NodeKind::CondParen { - inner: Box::new(inner), - }, - )); + if let Some(node) = self.try_parse_cond_group(start, kind)? { + return Ok(node); } let first = self.lexer.next_token()?; + self.parse_cond_operand(start, first) + } + + /// `! expr` — Parable drops the negation in S-expression output, but we + /// keep it in the AST so the reformatter can preserve it. + fn try_parse_cond_negation(&mut self, start: usize, kind: TokenType) -> Result> { + if kind != TokenType::Bang { + return Ok(None); + } + self.lexer.next_token()?; + let inner = self.parse_cond_primary()?; + Ok(Some(self.spanned( + start, + NodeKind::CondNot { + operand: Box::new(inner), + }, + ))) + } + + /// `( expr )` — a grouped expression inside `[[ … ]]`. + fn try_parse_cond_group(&mut self, start: usize, kind: TokenType) -> Result> { + if kind != TokenType::LeftParen { + return Ok(None); + } + self.lexer.next_token()?; + let inner = self.parse_cond_or()?; + self.expect(TokenType::RightParen)?; + Ok(Some(self.spanned( + start, + NodeKind::CondParen { + inner: Box::new(inner), + }, + ))) + } - // Check for unary operators: -f, -d, -z, -n, etc. + /// Parse `-f EXPR` (unary), `EXPR OP EXPR` (binary), or a bare word + /// (`[-n] EXPR`). `first` is the already-consumed leading token. + fn parse_cond_operand(&mut self, start: usize, first: Token) -> Result { + // Unary operators: -f, -d, -z, -n, etc. if first.value.starts_with('-') && first.value.len() <= 3 && self.peek_cond_term()?.is_some() @@ -175,7 +192,7 @@ impl Parser { )); } - // Check for binary operators + // Binary operators: ==, !=, =~, <, >, -eq, -ne, ... if !self.is_cond_close()? && !self.peek_is(TokenType::And)? && !self.peek_is(TokenType::Or)? diff --git a/src/parser/functions.rs b/src/parser/functions.rs index 01a82a2..9234558 100644 --- a/src/parser/functions.rs +++ b/src/parser/functions.rs @@ -6,7 +6,7 @@ use crate::error::Result; use crate::token::{Token, TokenType}; use super::Parser; -use super::helpers::{is_fd_number, word_node_from_token}; +use super::helpers::{is_fd_number, is_redirect_op_kind, word_node_from_token}; impl Parser { pub(super) fn parse_subshell(&mut self) -> Result { @@ -107,84 +107,58 @@ impl Parser { )) } - #[allow(clippy::too_many_lines)] pub(super) fn parse_coproc(&mut self) -> Result { let start = self.peek_pos()?; self.expect(TokenType::Coproc)?; - let tok = self.lexer.peek_token()?; - if tok.kind.starts_command() - && !matches!( - tok.kind, - TokenType::Coproc | TokenType::Time | TokenType::Bang - ) - { - let command = self.parse_command()?; - return Ok(self.spanned( - start, - NodeKind::Coproc { - name: None, - command: Box::new(command), - }, - )); + // Path A: `coproc CMD` — no name, body is whatever starts a command. + if coproc_starts_command(self.lexer.peek_token()?.kind) { + return self.build_coproc_with_command(start, None); } let first_tok = self.lexer.next_token()?; self.lexer.set_command_start(); - // If first token after coproc is a redirect operator, parse as - // a command with redirects (no name, no command word) - if matches!( - first_tok.kind, - TokenType::Less - | TokenType::Greater - | TokenType::DoubleGreater - | TokenType::LessAnd - | TokenType::GreaterAnd - | TokenType::LessGreater - | TokenType::GreaterPipe - | TokenType::AndGreater - | TokenType::AndDoubleGreater - | TokenType::DoubleLess - | TokenType::DoubleLessDash - | TokenType::TripleLess - ) { - let mut redirects = vec![self.build_redirect(first_tok, -1, None)?]; - redirects.extend(self.parse_trailing_redirects()?); - return Ok(self.spanned( - start, - NodeKind::Coproc { - name: None, - command: Box::new(self.spanned( - start, - NodeKind::Command { - assignments: Vec::new(), - words: Vec::new(), - redirects, - }, - )), - }, - )); + // Path B: `coproc ) -> Result { + let command = self.parse_command()?; + Ok(self.spanned( + start, + NodeKind::Coproc { + name, + command: Box::new(command), + }, + )) + } + + /// Path B: first token after `coproc` is a redirect operator. Build a + /// synthetic `Command { redirects }` wrapped in a nameless `Coproc`. + fn parse_coproc_redirect_only(&mut self, start: usize, first_tok: Token) -> Result { + let mut redirects = vec![self.build_redirect(first_tok, -1, None)?]; + redirects.extend(self.parse_trailing_redirects()?); + Ok(self.build_coproc_synthetic_command(start, None, Vec::new(), redirects)) + } + + /// Path D: loop over words and redirects after `coproc WORD` to collect + /// the synthetic command's contents. + fn parse_coproc_word_loop(&mut self, first_tok: Token) -> Result<(Vec, Vec)> { let mut words = vec![word_node_from_token(first_tok)]; let mut redirects = Vec::new(); loop { @@ -196,31 +170,43 @@ impl Parser { continue; } let tok = self.lexer.peek_token()?; - if matches!(tok.kind, TokenType::Word | TokenType::Number) { - let tok = self.lexer.next_token()?; - if is_fd_number(&tok.value) && self.is_redirect_operator()? { - redirects.push(self.parse_redirect_with_fd(&tok)?); - } else { - words.push(word_node_from_token(tok)); - } - } else { + if !matches!(tok.kind, TokenType::Word | TokenType::Number) { break; } + let tok = self.lexer.next_token()?; + if is_fd_number(&tok.value) && self.is_redirect_operator()? { + redirects.push(self.parse_redirect_with_fd(&tok)?); + } else { + words.push(word_node_from_token(tok)); + } } - Ok(self.spanned( + Ok((words, redirects)) + } + + /// Wraps a synthetic `Command { assignments: [], words, redirects }` in + /// a `Coproc { name, command }` at `start`. + fn build_coproc_synthetic_command( + &self, + start: usize, + name: Option, + words: Vec, + redirects: Vec, + ) -> Node { + let command = self.spanned( + start, + NodeKind::Command { + assignments: Vec::new(), + words, + redirects, + }, + ); + self.spanned( start, NodeKind::Coproc { name, - command: Box::new(self.spanned( - start, - NodeKind::Command { - assignments: Vec::new(), - words, - redirects, - }, - )), + command: Box::new(command), }, - )) + ) } pub(super) fn parse_arith_command(&mut self) -> Result { @@ -243,3 +229,10 @@ impl Parser { self.expect_closing(TokenType::RightBrace, "}") } } + +/// Returns true when `kind` can start a command at the body position of a +/// `coproc` clause. Excludes `coproc`, `time`, and `!` since they would +/// cause re-entry into `parse_coproc` or ambiguous negation. +const fn coproc_starts_command(kind: TokenType) -> bool { + kind.starts_command() && !matches!(kind, TokenType::Coproc | TokenType::Time | TokenType::Bang) +} diff --git a/src/parser/helpers.rs b/src/parser/helpers.rs index 5c6c4d8..12d1cf6 100644 --- a/src/parser/helpers.rs +++ b/src/parser/helpers.rs @@ -1,7 +1,26 @@ //! Helper functions for the parser. use crate::ast::{Node, NodeKind}; -use crate::token::Token; +use crate::token::{Token, TokenType}; + +/// Returns true for the 12 token kinds that start a redirect operator. +pub(super) const fn is_redirect_op_kind(kind: TokenType) -> bool { + matches!( + kind, + TokenType::Less + | TokenType::Greater + | TokenType::DoubleGreater + | TokenType::LessAnd + | TokenType::GreaterAnd + | TokenType::LessGreater + | TokenType::GreaterPipe + | TokenType::AndGreater + | TokenType::AndDoubleGreater + | TokenType::DoubleLess + | TokenType::DoubleLessDash + | TokenType::TripleLess + ) +} /// Creates a `Word` node from a lexer token, moving value and spans. pub fn word_node_from_token(tok: Token) -> Node { diff --git a/src/parser/redirects.rs b/src/parser/redirects.rs index 20edfde..2d722ae 100644 --- a/src/parser/redirects.rs +++ b/src/parser/redirects.rs @@ -7,7 +7,9 @@ use crate::lexer::heredoc::parse_heredoc_delimiter; use crate::token::{Token, TokenType}; use super::Parser; -use super::helpers::{is_fd_number, is_varfd, word_node, word_node_from_token}; +use super::helpers::{ + is_fd_number, is_redirect_op_kind, is_varfd, word_node, word_node_from_token, +}; impl Parser { pub(super) fn parse_redirect(&mut self) -> Result { @@ -165,20 +167,6 @@ impl Parser { pub(super) fn is_redirect_operator(&mut self) -> Result { let tok = self.lexer.peek_token()?; - Ok(matches!( - tok.kind, - TokenType::Less - | TokenType::Greater - | TokenType::DoubleGreater - | TokenType::LessAnd - | TokenType::GreaterAnd - | TokenType::LessGreater - | TokenType::GreaterPipe - | TokenType::AndGreater - | TokenType::AndDoubleGreater - | TokenType::DoubleLess - | TokenType::DoubleLessDash - | TokenType::TripleLess - )) + Ok(is_redirect_op_kind(tok.kind)) } } diff --git a/src/parser/simple_command.rs b/src/parser/simple_command.rs index ef5922f..6519b60 100644 --- a/src/parser/simple_command.rs +++ b/src/parser/simple_command.rs @@ -2,10 +2,10 @@ use crate::ast::{Node, NodeKind, Span}; use crate::error::{RableError, Result}; -use crate::token::TokenType; +use crate::token::{Token, TokenType}; use super::Parser; -use super::helpers::{is_fd_number, is_varfd}; +use super::helpers::{is_fd_number, is_redirect_op_kind, is_varfd}; use super::word_parts; impl Parser { @@ -63,7 +63,6 @@ impl Parser { } } - #[allow(clippy::too_many_lines)] fn parse_simple_command(&mut self) -> Result { let start = self.peek_pos()?; let mut assignments = Vec::new(); @@ -77,62 +76,29 @@ impl Parser { } let tok = self.lexer.peek_token()?; match tok.kind { - TokenType::Less - | TokenType::Greater - | TokenType::DoubleGreater - | TokenType::LessAnd - | TokenType::GreaterAnd - | TokenType::LessGreater - | TokenType::GreaterPipe - | TokenType::AndGreater - | TokenType::AndDoubleGreater - | TokenType::DoubleLess - | TokenType::DoubleLessDash - | TokenType::TripleLess => { + kind if is_redirect_op_kind(kind) => { redirects.push(self.parse_redirect()?); } TokenType::Word | TokenType::AssignmentWord | TokenType::Number => { let is_assignment = tok.kind == TokenType::AssignmentWord; let tok = self.lexer.next_token()?; - // fd numbers before redirects — only when adjacent (no space) - // and never before &> or &>> - let adjacent = self - .lexer - .peek_token() - .map(|next| tok.adjacent_to(next)) - .unwrap_or(false); - if adjacent - && is_fd_number(&tok.value) - && self.is_redirect_operator()? - && !self.is_and_redirect()? + if let Some(redirect) = self.try_parse_fd_redirect(&tok)? { + redirects.push(redirect); + continue; + } + if !saw_command_word + && assignments.is_empty() + && words.is_empty() + && self.peek_is(TokenType::LeftParen)? { - redirects.push(self.parse_redirect_with_fd(&tok)?); - } else if adjacent && is_varfd(&tok.value) && self.is_redirect_operator()? { - redirects.push(self.parse_redirect_with_varfd(&tok)?); + return self.parse_function_def(&tok); + } + let node = build_word_node(tok); + if is_assignment && !saw_command_word { + assignments.push(node); } else { - if !saw_command_word - && assignments.is_empty() - && words.is_empty() - && self.peek_is(TokenType::LeftParen)? - { - return self.parse_function_def(&tok); - } - let word_span = Span::new(tok.pos, tok.pos + tok.value.len()); - let parts = word_parts::decompose_word_with_spans(&tok.value, &tok.spans); - let node = Node::new( - NodeKind::Word { - value: tok.value, - parts, - spans: tok.spans, - }, - word_span, - ); - if is_assignment && !saw_command_word { - assignments.push(node); - } else { - saw_command_word = true; - words.push(node); - } + saw_command_word = true; + words.push(node); } } _ => break, @@ -152,4 +118,42 @@ impl Parser { }, )) } + + /// If `tok` is adjacent to an fd-style redirect prefix (`NN<`, `{var}<`), + /// consume the redirect and return it. Otherwise return `Ok(None)` so + /// the caller treats `tok` as a regular word/assignment. + fn try_parse_fd_redirect(&mut self, tok: &Token) -> Result> { + let adjacent = self + .lexer + .peek_token() + .map(|next| tok.adjacent_to(next)) + .unwrap_or(false); + if !adjacent { + return Ok(None); + } + if is_fd_number(&tok.value) && self.is_redirect_operator()? && !self.is_and_redirect()? { + return Ok(Some(self.parse_redirect_with_fd(tok)?)); + } + if is_varfd(&tok.value) && self.is_redirect_operator()? { + return Ok(Some(self.parse_redirect_with_varfd(tok)?)); + } + Ok(None) + } +} + +/// Builds a `Word` node from a consumed token, preserving the token's +/// position as the node span. Distinct from `helpers::word_node_from_token`, +/// which uses `Node::empty` (no span) for AST constructors where positional +/// information is not available. +fn build_word_node(tok: Token) -> Node { + let word_span = Span::new(tok.pos, tok.pos + tok.value.len()); + let parts = word_parts::decompose_word_with_spans(&tok.value, &tok.spans); + Node::new( + NodeKind::Word { + value: tok.value, + parts, + spans: tok.spans, + }, + word_span, + ) }