diff --git a/CMakeLists.txt b/CMakeLists.txt index 3893af136..35964696c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -115,12 +115,12 @@ set(FISH_BUILTIN_SRCS set(FISH_SRCS src/ast.cpp src/autoload.cpp src/color.cpp src/common.cpp src/complete.cpp src/env.cpp src/env_dispatch.cpp src/env_universal_common.cpp src/event.cpp - src/exec.cpp src/expand.cpp src/fallback.cpp src/fish_version.cpp + src/exec.cpp src/expand.cpp src/fallback.cpp src/fish_indent_common.cpp src/fish_version.cpp src/flog.cpp src/function.cpp src/highlight.cpp src/history.cpp src/history_file.cpp src/input.cpp src/input_common.cpp src/io.cpp src/iothread.cpp src/kill.cpp src/null_terminated_array.cpp src/operation_context.cpp src/output.cpp - src/pager.cpp src/parse_execution.cpp src/parse_tree.cpp src/parse_util.cpp + src/pager.cpp src/parse_execution.cpp src/parse_util.cpp src/parser.cpp src/parser_keywords.cpp src/path.cpp src/postfork.cpp src/proc.cpp src/re.cpp src/reader.cpp src/screen.cpp src/signals.cpp src/tinyexpr.cpp src/utf8.cpp diff --git a/fish-rust/build.rs b/fish-rust/build.rs index 4d2edfee5..f0f80dc26 100644 --- a/fish-rust/build.rs +++ b/fish-rust/build.rs @@ -26,6 +26,7 @@ fn main() -> miette::Result<()> { // This must come before autocxx so that cxx can emit its cxx.h header. let source_files = vec![ "src/abbrs.rs", + "src/ast.rs", "src/event.rs", "src/common.rs", "src/fd_monitor.rs", @@ -33,9 +34,13 @@ fn main() -> miette::Result<()> { "src/fds.rs", "src/ffi_init.rs", "src/ffi_tests.rs", + "src/fish_indent.rs", "src/future_feature_flags.rs", + "src/highlight.rs", "src/job_group.rs", "src/parse_constants.rs", + "src/parse_tree.rs", + "src/parse_util.rs", "src/redirection.rs", "src/smoke.rs", "src/termsize.rs", diff --git a/fish-rust/src/ast.rs b/fish-rust/src/ast.rs new file mode 100644 index 000000000..39e136263 --- /dev/null +++ b/fish-rust/src/ast.rs @@ -0,0 +1,5708 @@ +/*! + * This defines the fish abstract syntax tree. + * The fish ast is a tree data structure. The nodes of the tree + * are divided into three categories: + * + * - leaf nodes refer to a range of source, and have no child nodes. + * - branch nodes have ONLY child nodes, and no other fields. + * - list nodes contain a list of some other node type (branch or leaf). + * + * Most clients will be interested in visiting the nodes of an ast. + */ +use crate::common::{unescape_string, UnescapeStringStyle}; +use crate::flog::FLOG; +use crate::parse_constants::{ + token_type_user_presentable_description, ParseError, ParseErrorCode, ParseErrorList, + ParseKeyword, ParseTokenType, ParseTreeFlags, SourceRange, StatementDecoration, + INVALID_PIPELINE_CMD_ERR_MSG, PARSE_FLAG_ACCEPT_INCOMPLETE_TOKENS, + PARSE_FLAG_CONTINUE_AFTER_ERROR, PARSE_FLAG_INCLUDE_COMMENTS, PARSE_FLAG_LEAVE_UNTERMINATED, + PARSE_FLAG_SHOW_EXTRA_SEMIS, SOURCE_OFFSET_INVALID, +}; +use crate::parse_tree::ParseToken; +use crate::tokenizer::{ + variable_assignment_equals_pos, TokFlags, TokenType, Tokenizer, TokenizerError, + TOK_ACCEPT_UNFINISHED, TOK_CONTINUE_AFTER_ERROR, TOK_SHOW_COMMENTS, +}; +use crate::wchar::{wstr, WString, L}; +use crate::wchar_ext::WExt; +use crate::wchar_ffi::{wcharz, wcharz_t, WCharFromFFI, WCharToFFI}; +use crate::wutil::printf::sprintf; +use crate::wutil::wgettext_fmt; +use cxx::{type_id, ExternType}; +use cxx::{CxxWString, UniquePtr}; +use std::ops::{ControlFlow, Index, IndexMut}; +use widestring_suffix::widestrs; + +/** + * A NodeVisitor is something which can visit an AST node. + * + * To visit a node's fields, use the node's accept() function: + * let mut v = MyNodeVisitor{}; + * node.accept(&mut v); + */ +pub trait NodeVisitor<'a> { + fn visit(&mut self, node: &'a dyn Node); +} + +pub trait Acceptor { + fn accept<'a>(&'a self, visitor: &mut dyn NodeVisitor<'a>, reversed: bool); +} + +impl Acceptor for Option { + fn accept<'a>(&'a self, visitor: &mut dyn NodeVisitor<'a>, reversed: bool) { + match self { + Some(node) => node.accept(visitor, reversed), + None => (), + } + } +} + +pub struct MissingEndError { + allowed_keywords: &'static [ParseKeyword], + token: ParseToken, +} + +pub type VisitResult = ControlFlow; + +trait NodeVisitorMut { + /// will_visit (did_visit) is called before (after) a node's fields are visited. + fn will_visit_fields_of(&mut self, node: &mut dyn NodeMut); + fn visit_mut(&mut self, node: &mut dyn NodeMut) -> VisitResult; + fn did_visit_fields_of<'a>(&'a mut self, node: &'a dyn NodeMut, flow: VisitResult); + + fn visit_argument_or_redirection( + &mut self, + _node: &mut Box, + ) -> VisitResult; + fn visit_block_statement_header( + &mut self, + _node: &mut Box, + ) -> VisitResult; + fn visit_statement(&mut self, _node: &mut Box) -> VisitResult; + + fn visit_decorated_statement_decorator( + &mut self, + _node: &mut Option, + ); + fn visit_job_conjunction_decorator(&mut self, _node: &mut Option); + fn visit_else_clause(&mut self, _node: &mut Option); + fn visit_semi_nl(&mut self, _node: &mut Option); + fn visit_time(&mut self, _node: &mut Option); + fn visit_token_background(&mut self, _node: &mut Option); +} + +trait AcceptorMut { + fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool); +} + +impl AcceptorMut for Option { + fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool) { + match self { + Some(node) => node.accept_mut(visitor, reversed), + None => (), + } + } +} + +/// Node is the base trait of all AST nodes. +pub trait Node: Acceptor + ConcreteNode + std::fmt::Debug { + /// The parent node, or null if this is root. + fn parent(&self) -> Option<&dyn Node>; + fn parent_ffi(&self) -> &Option<*const dyn Node>; + + /// The type of this node. + fn typ(&self) -> Type; + + /// The category of this node. + fn category(&self) -> Category; + + /// \return a helpful string description of this node. + #[widestrs] + fn describe(&self) -> WString { + let mut res = ast_type_to_string(self.typ()).to_owned(); + if let Some(n) = self.as_token() { + let token_type: &'static wstr = n.token_type().into(); + res += &sprintf!(" '%ls'"L, token_type)[..]; + } else if let Some(n) = self.as_keyword() { + let keyword: &'static wstr = n.keyword().into(); + res += &sprintf!(" '%ls'"L, keyword)[..]; + } + res + } + + /// \return the source range for this node, or none if unsourced. + /// This may return none if the parse was incomplete or had an error. + fn try_source_range(&self) -> Option; + + /// \return the source range for this node, or an empty range {0, 0} if unsourced. + fn source_range(&self) -> SourceRange { + self.try_source_range().unwrap_or(SourceRange::new(0, 0)) + } + + /// \return the source code for this node, or none if unsourced. + fn try_source<'s>(&self, orig: &'s wstr) -> Option<&'s wstr> { + self.try_source_range() + .map(|r| &orig[r.start as usize..r.end() as usize]) + } + + /// \return the source code for this node, or an empty string if unsourced. + fn source<'s>(&self, orig: &'s wstr) -> &'s wstr { + self.try_source(orig).unwrap_or_default() + } + + // The address of the object, for comparison. + fn as_ptr(&self) -> *const (); +} + +/// NodeMut is a mutable node. +trait NodeMut: Node + AcceptorMut + ConcreteNodeMut { + fn as_node(&self) -> &dyn Node; +} + +pub trait ConcreteNode { + // Cast to any sub-trait. + fn as_leaf(&self) -> Option<&dyn Leaf> { + None + } + fn as_keyword(&self) -> Option<&dyn Keyword> { + None + } + fn as_token(&self) -> Option<&dyn Token> { + None + } + + // Cast to any node type. + fn as_redirection(&self) -> Option<&Redirection> { + None + } + fn as_variable_assignment(&self) -> Option<&VariableAssignment> { + None + } + fn as_variable_assignment_list(&self) -> Option<&VariableAssignmentList> { + None + } + fn as_argument_or_redirection(&self) -> Option<&ArgumentOrRedirection> { + None + } + fn as_argument_or_redirection_list(&self) -> Option<&ArgumentOrRedirectionList> { + None + } + fn as_statement(&self) -> Option<&Statement> { + None + } + fn as_job_pipeline(&self) -> Option<&JobPipeline> { + None + } + fn as_job_conjunction(&self) -> Option<&JobConjunction> { + None + } + fn as_for_header(&self) -> Option<&ForHeader> { + None + } + fn as_while_header(&self) -> Option<&WhileHeader> { + None + } + fn as_function_header(&self) -> Option<&FunctionHeader> { + None + } + fn as_begin_header(&self) -> Option<&BeginHeader> { + None + } + fn as_block_statement(&self) -> Option<&BlockStatement> { + None + } + fn as_if_clause(&self) -> Option<&IfClause> { + None + } + fn as_elseif_clause(&self) -> Option<&ElseifClause> { + None + } + fn as_elseif_clause_list(&self) -> Option<&ElseifClauseList> { + None + } + fn as_else_clause(&self) -> Option<&ElseClause> { + None + } + fn as_if_statement(&self) -> Option<&IfStatement> { + None + } + fn as_case_item(&self) -> Option<&CaseItem> { + None + } + fn as_switch_statement(&self) -> Option<&SwitchStatement> { + None + } + fn as_decorated_statement(&self) -> Option<&DecoratedStatement> { + None + } + fn as_not_statement(&self) -> Option<&NotStatement> { + None + } + fn as_job_continuation(&self) -> Option<&JobContinuation> { + None + } + fn as_job_continuation_list(&self) -> Option<&JobContinuationList> { + None + } + fn as_job_conjunction_continuation(&self) -> Option<&JobConjunctionContinuation> { + None + } + fn as_andor_job(&self) -> Option<&AndorJob> { + None + } + fn as_andor_job_list(&self) -> Option<&AndorJobList> { + None + } + fn as_freestanding_argument_list(&self) -> Option<&FreestandingArgumentList> { + None + } + fn as_job_conjunction_continuation_list(&self) -> Option<&JobConjunctionContinuationList> { + None + } + fn as_maybe_newlines(&self) -> Option<&MaybeNewlines> { + None + } + fn as_case_item_list(&self) -> Option<&CaseItemList> { + None + } + fn as_argument(&self) -> Option<&Argument> { + None + } + fn as_argument_list(&self) -> Option<&ArgumentList> { + None + } + fn as_job_list(&self) -> Option<&JobList> { + None + } +} + +trait ConcreteNodeMut { + // Cast to any sub-trait. + fn as_mut_leaf(&mut self) -> Option<&mut dyn Leaf> { + None + } + fn as_mut_keyword(&mut self) -> Option<&mut dyn Keyword> { + None + } + fn as_mut_token(&mut self) -> Option<&mut dyn Token> { + None + } + + // Cast to any node type. + fn as_mut_redirection(&mut self) -> Option<&mut Redirection> { + None + } + fn as_mut_variable_assignment(&mut self) -> Option<&mut VariableAssignment> { + None + } + fn as_mut_variable_assignment_list(&mut self) -> Option<&mut VariableAssignmentList> { + None + } + fn as_mut_argument_or_redirection(&mut self) -> Option<&mut ArgumentOrRedirection> { + None + } + fn as_mut_argument_or_redirection_list(&mut self) -> Option<&mut ArgumentOrRedirectionList> { + None + } + fn as_mut_statement(&mut self) -> Option<&mut Statement> { + None + } + fn as_mut_job_pipeline(&mut self) -> Option<&mut JobPipeline> { + None + } + fn as_mut_job_conjunction(&mut self) -> Option<&mut JobConjunction> { + None + } + fn as_mut_for_header(&mut self) -> Option<&mut ForHeader> { + None + } + fn as_mut_while_header(&mut self) -> Option<&mut WhileHeader> { + None + } + fn as_mut_function_header(&mut self) -> Option<&mut FunctionHeader> { + None + } + fn as_mut_begin_header(&mut self) -> Option<&mut BeginHeader> { + None + } + fn as_mut_block_statement(&mut self) -> Option<&mut BlockStatement> { + None + } + fn as_mut_if_clause(&mut self) -> Option<&mut IfClause> { + None + } + fn as_mut_elseif_clause(&mut self) -> Option<&mut ElseifClause> { + None + } + fn as_mut_elseif_clause_list(&mut self) -> Option<&mut ElseifClauseList> { + None + } + fn as_mut_else_clause(&mut self) -> Option<&mut ElseClause> { + None + } + fn as_mut_if_statement(&mut self) -> Option<&mut IfStatement> { + None + } + fn as_mut_case_item(&mut self) -> Option<&mut CaseItem> { + None + } + fn as_mut_switch_statement(&mut self) -> Option<&mut SwitchStatement> { + None + } + fn as_mut_decorated_statement(&mut self) -> Option<&mut DecoratedStatement> { + None + } + fn as_mut_not_statement(&mut self) -> Option<&mut NotStatement> { + None + } + fn as_mut_job_continuation(&mut self) -> Option<&mut JobContinuation> { + None + } + fn as_mut_job_continuation_list(&mut self) -> Option<&mut JobContinuationList> { + None + } + fn as_mut_job_conjunction_continuation(&mut self) -> Option<&mut JobConjunctionContinuation> { + None + } + fn as_mut_andor_job(&mut self) -> Option<&mut AndorJob> { + None + } + fn as_mut_andor_job_list(&mut self) -> Option<&mut AndorJobList> { + None + } + fn as_mut_freestanding_argument_list(&mut self) -> Option<&mut FreestandingArgumentList> { + None + } + fn as_mut_job_conjunction_continuation_list( + &mut self, + ) -> Option<&mut JobConjunctionContinuationList> { + None + } + fn as_mut_maybe_newlines(&mut self) -> Option<&mut MaybeNewlines> { + None + } + fn as_mut_case_item_list(&mut self) -> Option<&mut CaseItemList> { + None + } + fn as_mut_argument(&mut self) -> Option<&mut Argument> { + None + } + fn as_mut_argument_list(&mut self) -> Option<&mut ArgumentList> { + None + } + fn as_mut_job_list(&mut self) -> Option<&mut JobList> { + None + } +} + +/// Trait for all "leaf" nodes: nodes with no ast children. +pub trait Leaf: Node { + /// Returns none if this node is "unsourced." This happens if for whatever reason we are + /// unable to parse the node, either because we had a parse error and recovered, or because + /// we accepted incomplete and the token stream was exhausted. + fn range(&self) -> Option; + fn range_mut(&mut self) -> &mut Option; + fn leaf_as_node_ffi(&self) -> &dyn Node; +} + +// A token node is a node which contains a token, which must be one of a fixed set. +pub trait Token: Leaf { + /// The token type which was parsed. + fn token_type(&self) -> ParseTokenType; + fn token_type_mut(&mut self) -> &mut ParseTokenType; + fn allowed_tokens(&self) -> &'static [ParseTokenType]; + /// \return whether a token type is allowed in this token_t, i.e. is a member of our Toks list. + fn allows_token(&self, token_type: ParseTokenType) -> bool { + self.allowed_tokens().contains(&token_type) + } +} + +/// A keyword node is a node which contains a keyword, which must be one of a fixed set. +pub trait Keyword: Leaf { + fn keyword(&self) -> ParseKeyword; + fn keyword_mut(&mut self) -> &mut ParseKeyword; + fn allowed_keywords(&self) -> &'static [ParseKeyword]; + fn allows_keyword(&self, kw: ParseKeyword) -> bool { + self.allowed_keywords().contains(&kw) + } +} + +// A simple variable-sized array, possibly empty. +pub trait List: Node { + type ContentsNode: Node + Default; + fn contents(&self) -> &[Box]; + fn contents_mut(&mut self) -> &mut Vec>; + /// \return our count. + fn count(&self) -> usize { + self.contents().len() + } + /// \return whether we are empty. + fn is_empty(&self) -> bool { + self.contents().is_empty() + } +} + +/// Implement the node trait. +macro_rules! implement_node { + ( + $name:ident, + $category:ident, + $type:ident $(,)? + ) => { + impl Node for $name { + fn typ(&self) -> Type { + Type::$type + } + fn parent(&self) -> Option<&dyn Node> { + self.parent.map(|p| unsafe { &*p }) + } + fn parent_ffi(&self) -> &Option<*const dyn Node> { + &self.parent + } + fn category(&self) -> Category { + Category::$category + } + fn try_source_range(&self) -> Option { + let mut visitor = SourceRangeVisitor { + total: SourceRange::new(0, 0), + any_unsourced: false, + }; + visitor.visit(self); + if visitor.any_unsourced { + None + } else { + Some(visitor.total) + } + } + fn as_ptr(&self) -> *const () { + (self as *const $name).cast() + } + } + impl NodeMut for $name { + fn as_node(&self) -> &dyn Node { + self + } + } + }; +} + +/// Implement the leaf trait. +macro_rules! implement_leaf { + ( $name:ident ) => { + impl Leaf for $name { + fn range(&self) -> Option { + self.range + } + fn range_mut(&mut self) -> &mut Option { + &mut self.range + } + fn leaf_as_node_ffi(&self) -> &dyn Node { + self + } + } + impl Acceptor for $name { + #[allow(unused_variables)] + fn accept<'a>(&'a self, visitor: &mut dyn NodeVisitor<'a>, reversed: bool) {} + } + impl AcceptorMut for $name { + #[allow(unused_variables)] + fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool) { + visitor.will_visit_fields_of(self); + visitor.did_visit_fields_of(self, VisitResult::Continue(())); + } + } + impl $name { + /// Set the parent fields of all nodes in the tree rooted at \p self. + fn set_parents(&mut self) {} + } + }; +} + +/// Define a node that implements the keyword trait. +macro_rules! define_keyword_node { + ( $name:ident, $($allowed:expr),* $(,)? ) => { + #[derive(Default, Debug)] + pub struct $name { + parent: Option<*const dyn Node>, + range: Option, + keyword: ParseKeyword, + } + implement_node!($name, leaf, keyword_base); + implement_leaf!($name); + impl ConcreteNode for $name { + fn as_leaf(&self) -> Option<&dyn Leaf> { + Some(self) + } + fn as_keyword(&self) -> Option<&dyn Keyword> { + Some(self) + } + } + impl ConcreteNodeMut for $name { + fn as_mut_leaf(&mut self) -> Option<&mut dyn Leaf> { + Some(self) + } + fn as_mut_keyword(&mut self) -> Option<&mut dyn Keyword> { + Some(self) + } + } + impl Keyword for $name { + fn keyword(&self) -> ParseKeyword { + self.keyword + } + fn keyword_mut(&mut self) -> &mut ParseKeyword { + &mut self.keyword + } + fn allowed_keywords(&self) -> &'static [ParseKeyword] { + &[$($allowed),*] + } + } + } +} + +/// Define a node that implements the token trait. +macro_rules! define_token_node { + ( $name:ident, $($allowed:expr),* $(,)? ) => { + #[derive(Default, Debug)] + pub struct $name { + parent: Option<*const dyn Node>, + range: Option, + parse_token_type: ParseTokenType, + } + implement_node!($name, leaf, token_base); + implement_leaf!($name); + impl ConcreteNode for $name { + fn as_leaf(&self) -> Option<&dyn Leaf> { + Some(self) + } + fn as_token(&self) -> Option<&dyn Token> { + Some(self) + } + } + impl ConcreteNodeMut for $name { + fn as_mut_leaf(&mut self) -> Option<&mut dyn Leaf> { + Some(self) + } + fn as_mut_token(&mut self) -> Option<&mut dyn Token> { + Some(self) + } + } + impl Token for $name { + fn token_type(&self) -> ParseTokenType { + self.parse_token_type + } + fn token_type_mut(&mut self) -> &mut ParseTokenType { + &mut self.parse_token_type + } + fn allowed_tokens(&self) -> &'static [ParseTokenType] { + &[$($allowed),*] + } + } + } +} + +/// Define a node that implements the list trait. +macro_rules! define_list_node { + ( + $name:ident, + $type:tt, + $contents:ident + ) => { + #[derive(Default, Debug)] + pub struct $name { + parent: Option<*const dyn Node>, + list_contents: Vec>, + } + implement_node!($name, list, $type); + impl List for $name { + type ContentsNode = $contents; + fn contents(&self) -> &[Box] { + &self.list_contents + } + fn contents_mut(&mut self) -> &mut Vec> { + &mut self.list_contents + } + } + impl $name { + /// Iteration support. + fn iter(&self) -> impl Iterator::ContentsNode> { + self.contents().iter().map(|b| &**b) + } + } + impl Index for $name { + type Output = <$name as List>::ContentsNode; + fn index(&self, index: usize) -> &Self::Output { + &*self.contents()[index] + } + } + impl IndexMut for $name { + fn index_mut(&mut self, index: usize) -> &mut Self::Output { + &mut *self.contents_mut()[index] + } + } + impl Acceptor for $name { + #[allow(unused_variables)] + fn accept<'a>(&'a self, visitor: &mut dyn NodeVisitor<'a>, reversed: bool) { + accept_list_visitor!(Self, accept, visit, self, visitor, reversed, $contents); + } + } + impl AcceptorMut for $name { + #[allow(unused_variables)] + fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool) { + visitor.will_visit_fields_of(self); + let flow = accept_list_visitor!( + Self, accept_mut, visit_mut, self, visitor, reversed, $contents + ); + visitor.did_visit_fields_of(self, flow); + } + } + impl $name { + /// Set the parent fields of all nodes in the tree rooted at \p self. + fn set_parents(&mut self) { + for i in 0..self.count() { + self[i].parent = Some(self); + self[i].set_parents(); + } + } + } + }; +} + +macro_rules! accept_list_visitor { + ( + $Self:ident, + $accept:ident, + $visit:ident, + $self:ident, + $visitor:ident, + $reversed:ident, + $list_element:ident + ) => { + loop { + let mut result = VisitResult::Continue(()); + // list types pretend their child nodes are direct embeddings. + // This isn't used during AST construction because we need to construct the list. + if $reversed { + for i in (0..$self.count()).rev() { + result = accept_list_visitor_impl!($self, $visitor, $visit, $self[i]); + if result.is_break() { + break; + } + } + } else { + for i in 0..$self.count() { + result = accept_list_visitor_impl!($self, $visitor, $visit, $self[i]); + if result.is_break() { + break; + } + } + } + break result; + } + }; +} + +macro_rules! accept_list_visitor_impl { + ( + $self:ident, + $visitor:ident, + visit, + $child:expr) => {{ + $visitor.visit(&$child); + VisitResult::Continue(()) + }}; + ( + $self:ident, + $visitor:ident, + visit_mut, + $child:expr) => { + $visitor.visit_mut(&mut $child) + }; +} + +/// Implement the acceptor trait for the given branch node. +macro_rules! implement_acceptor_for_branch { + ( + $name:ident + $(, ($field_name:ident: $field_type:tt) )* + $(,)? + ) => { + impl Acceptor for $name { + #[allow(unused_variables)] + fn accept<'a>(&'a self, visitor: &mut dyn NodeVisitor<'a>, reversed: bool){ + visitor_accept_field!( + Self, + accept, + visit, + self, + visitor, + reversed, + ( $( $field_name: $field_type, )* ) ); + } + } + impl AcceptorMut for $name { + #[allow(unused_variables)] + fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool) { + visitor.will_visit_fields_of(self); + let flow = visitor_accept_field!( + Self, + accept_mut, + visit_mut, + self, + visitor, + reversed, + ( $( $field_name: $field_type, )* )); + visitor.did_visit_fields_of(self, flow); + } + } + impl $name { + /// Set the parent fields of all nodes in the tree rooted at \p self. + fn set_parents(&mut self) { + $( + set_parent_of_field!(self, $field_name, $field_type); + )* + } + } + } +} + +/// Visit the given fields in order, returning whether the visitation succeded. +macro_rules! visitor_accept_field { + ( + $Self:ident, + $accept:ident, + $visit:ident, + $self:ident, + $visitor:ident, + $reversed:ident, + $fields:tt + ) => { + loop { + visitor_accept_field_impl!($visit, $self, $visitor, $reversed, $fields); + break VisitResult::Continue(()); + } + }; +} + +/// Visit the given fields in order, breaking if a visitation fails. +macro_rules! visitor_accept_field_impl { + // Base case: no fields left to visit. + ( + $visit:ident, + $self:ident, + $visitor:ident, + $reversed:ident, + () + ) => {}; + // Visit the first or last field and then the rest. + ( + $visit:ident, + $self:ident, + $visitor:ident, + $reversed:ident, + ( + $field_name:ident: $field_type:tt, + $( $field_names:ident: $field_types:tt, )* + ) + ) => { + if !$reversed { + visit_1_field!($visit, ($self.$field_name), $field_type, $visitor); + } + visitor_accept_field_impl!( + $visit, $self, $visitor, $reversed, + ( $( $field_names: $field_types, )* )); + if $reversed { + visit_1_field!($visit, ($self.$field_name), $field_type, $visitor); + } + } +} + +/// Visit the given field, breaking on failure. +macro_rules! visit_1_field { + ( + visit, + $field:expr, + $field_type:tt, + $visitor:ident + ) => { + visit_1_field_impl!(visit, $field, $field_type, $visitor); + }; + ( + visit_mut, + $field:expr, + $field_type:tt, + $visitor:ident + ) => { + let result = visit_1_field_impl!(visit_mut, $field, $field_type, $visitor); + if result.is_break() { + break result; + } + }; +} + +/// Visit the given field. +macro_rules! visit_1_field_impl { + ( + $visit:ident, + $field:expr, + (Box<$field_type:ident>), + $visitor:ident + ) => { + visit_union_field!($visit, $field_type, $field, $visitor) + }; + ( + $visit:ident, + $field:expr, + (Option<$field_type:ident>), + $visitor:ident + ) => { + visit_optional_field!($visit, $field_type, $field, $visitor) + }; + ( + $visit:ident, + $field:expr, + $field_type:tt, + $visitor:ident + ) => { + $visitor.$visit(apply_borrow!($visit, $field)) + }; +} + +macro_rules! apply_borrow { + ( visit, $expr:expr ) => { + &$expr + }; + ( visit_mut, $expr:expr ) => { + &mut $expr + }; +} + +macro_rules! visit_union_field { + ( + visit, + $field_type:ident, + $field:expr, + $visitor:ident + ) => { + $visitor.visit($field.embedded_node().as_node()) + }; + ( + visit_mut, + $field_type:ident, + $field:expr, + $visitor:ident + ) => { + visit_union_field_mut!($field_type, $visitor, $field) + }; +} + +macro_rules! visit_union_field_mut { + (ArgumentOrRedirectionVariant, $visitor:ident, $field:expr) => { + $visitor.visit_argument_or_redirection(&mut $field) + }; + (BlockStatementHeaderVariant, $visitor:ident, $field:expr) => { + $visitor.visit_block_statement_header(&mut $field) + }; + (StatementVariant, $visitor:ident, $field:expr) => { + $visitor.visit_statement(&mut $field) + }; +} + +macro_rules! visit_optional_field { + ( + visit, + $field_type:ident, + $field:expr, + $visitor:ident + ) => { + match &$field { + Some(value) => $visitor.visit(&*value), + None => visit_result!(visit), + } + }; + ( + visit_mut, + $field_type:ident, + $field:expr, + $visitor:ident + ) => {{ + visit_optional_field_mut!($field_type, $field, $visitor); + VisitResult::Continue(()) + }}; +} + +macro_rules! visit_optional_field_mut { + (DecoratedStatementDecorator, $field:expr, $visitor:ident) => { + $visitor.visit_decorated_statement_decorator(&mut $field); + }; + (JobConjunctionDecorator, $field:expr, $visitor:ident) => { + $visitor.visit_job_conjunction_decorator(&mut $field); + }; + (ElseClause, $field:expr, $visitor:ident) => { + $visitor.visit_else_clause(&mut $field); + }; + (SemiNl, $field:expr, $visitor:ident) => { + $visitor.visit_semi_nl(&mut $field); + }; + (KeywordTime, $field:expr, $visitor:ident) => { + $visitor.visit_time(&mut $field); + }; + (TokenBackground, $field:expr, $visitor:ident) => { + $visitor.visit_token_background(&mut $field); + }; +} + +macro_rules! visit_result { + ( visit) => { + () + }; + ( visit_mut ) => { + VisitResult::Continue(()) + }; +} + +macro_rules! set_parent_of_field { + ( + $self:ident, + $field_name:ident, + (Box<$field_type:ident>) + ) => { + set_parent_of_union_field!($self, $field_name, $field_type); + }; + ( + $self:ident, + $field_name:ident, + (Option<$field_type:ident>) + ) => { + if $self.$field_name.is_some() { + $self.$field_name.as_mut().unwrap().parent = Some($self); + $self.$field_name.as_mut().unwrap().set_parents(); + } + }; + ( + $self:ident, + $field_name:ident, + $field_type:tt + ) => { + $self.$field_name.parent = Some($self); + $self.$field_name.set_parents(); + }; +} + +macro_rules! set_parent_of_union_field { + ( + $self:ident, + $field_name:ident, + ArgumentOrRedirectionVariant + ) => { + if matches!( + *$self.$field_name, + ArgumentOrRedirectionVariant::Argument(_) + ) { + $self.$field_name.as_mut_argument().parent = Some($self); + $self.$field_name.as_mut_argument().set_parents(); + } else { + $self.$field_name.as_mut_redirection().parent = Some($self); + $self.$field_name.as_mut_redirection().set_parents(); + } + }; + ( + $self:ident, + $field_name:ident, + StatementVariant + ) => { + if matches!(*$self.$field_name, StatementVariant::NotStatement(_)) { + $self.$field_name.as_mut_not_statement().parent = Some($self); + $self.$field_name.as_mut_not_statement().set_parents(); + } else if matches!(*$self.$field_name, StatementVariant::BlockStatement(_)) { + $self.$field_name.as_mut_block_statement().parent = Some($self); + $self.$field_name.as_mut_block_statement().set_parents(); + } else if matches!(*$self.$field_name, StatementVariant::IfStatement(_)) { + $self.$field_name.as_mut_if_statement().parent = Some($self); + $self.$field_name.as_mut_if_statement().set_parents(); + } else if matches!(*$self.$field_name, StatementVariant::SwitchStatement(_)) { + $self.$field_name.as_mut_switch_statement().parent = Some($self); + $self.$field_name.as_mut_switch_statement().set_parents(); + } else if matches!(*$self.$field_name, StatementVariant::DecoratedStatement(_)) { + $self.$field_name.as_mut_decorated_statement().parent = Some($self); + $self.$field_name.as_mut_decorated_statement().set_parents(); + } + }; + ( + $self:ident, + $field_name:ident, + BlockStatementHeaderVariant + ) => { + if matches!( + *$self.$field_name, + BlockStatementHeaderVariant::ForHeader(_) + ) { + $self.$field_name.as_mut_for_header().parent = Some($self); + $self.$field_name.as_mut_for_header().set_parents(); + } else if matches!( + *$self.$field_name, + BlockStatementHeaderVariant::WhileHeader(_) + ) { + $self.$field_name.as_mut_while_header().parent = Some($self); + $self.$field_name.as_mut_while_header().set_parents(); + } else if matches!( + *$self.$field_name, + BlockStatementHeaderVariant::FunctionHeader(_) + ) { + $self.$field_name.as_mut_function_header().parent = Some($self); + $self.$field_name.as_mut_function_header().set_parents(); + } else if matches!( + *$self.$field_name, + BlockStatementHeaderVariant::BeginHeader(_) + ) { + $self.$field_name.as_mut_begin_header().parent = Some($self); + $self.$field_name.as_mut_begin_header().set_parents(); + } + }; +} + +/// A redirection has an operator like > or 2>, and a target like /dev/null or &1. +/// Note that pipes are not redirections. +#[derive(Default, Debug)] +pub struct Redirection { + parent: Option<*const dyn Node>, + pub oper: TokenRedirection, + pub target: String_, +} +implement_node!(Redirection, branch, redirection); +implement_acceptor_for_branch!(Redirection, (oper: TokenRedirection), (target: String_)); +impl ConcreteNode for Redirection { + fn as_redirection(&self) -> Option<&Redirection> { + Some(self) + } +} +impl ConcreteNodeMut for Redirection { + fn as_mut_redirection(&mut self) -> Option<&mut Redirection> { + Some(self) + } +} + +define_list_node!( + VariableAssignmentList, + variable_assignment_list, + VariableAssignment +); +impl ConcreteNode for VariableAssignmentList { + fn as_variable_assignment_list(&self) -> Option<&VariableAssignmentList> { + Some(self) + } +} +impl ConcreteNodeMut for VariableAssignmentList { + fn as_mut_variable_assignment_list(&mut self) -> Option<&mut VariableAssignmentList> { + Some(self) + } +} + +/// An argument or redirection holds either an argument or redirection. +#[derive(Default, Debug)] +pub struct ArgumentOrRedirection { + parent: Option<*const dyn Node>, + pub contents: Box, +} +implement_node!(ArgumentOrRedirection, branch, argument_or_redirection); +implement_acceptor_for_branch!( + ArgumentOrRedirection, + (contents: (Box)) +); +impl ConcreteNode for ArgumentOrRedirection { + fn as_argument_or_redirection(&self) -> Option<&ArgumentOrRedirection> { + Some(self) + } +} +impl ConcreteNodeMut for ArgumentOrRedirection { + fn as_mut_argument_or_redirection(&mut self) -> Option<&mut ArgumentOrRedirection> { + Some(self) + } +} + +define_list_node!( + ArgumentOrRedirectionList, + argument_or_redirection_list, + ArgumentOrRedirection +); +impl ConcreteNode for ArgumentOrRedirectionList { + fn as_argument_or_redirection_list(&self) -> Option<&ArgumentOrRedirectionList> { + Some(self) + } +} +impl ConcreteNodeMut for ArgumentOrRedirectionList { + fn as_mut_argument_or_redirection_list(&mut self) -> Option<&mut ArgumentOrRedirectionList> { + Some(self) + } +} + +/// A statement is a normal command, or an if / while / etc +#[derive(Default, Debug)] +pub struct Statement { + parent: Option<*const dyn Node>, + pub contents: Box, +} +implement_node!(Statement, branch, statement); +implement_acceptor_for_branch!(Statement, (contents: (Box))); +impl ConcreteNode for Statement { + fn as_statement(&self) -> Option<&Statement> { + Some(self) + } +} +impl ConcreteNodeMut for Statement { + fn as_mut_statement(&mut self) -> Option<&mut Statement> { + Some(self) + } +} + +/// A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases +/// like if statements, where we require a command). +#[derive(Default, Debug)] +pub struct JobPipeline { + parent: Option<*const dyn Node>, + /// Maybe the time keyword. + pub time: Option, + /// A (possibly empty) list of variable assignments. + pub variables: VariableAssignmentList, + /// The statement. + pub statement: Statement, + /// Piped remainder. + pub continuation: JobContinuationList, + /// Maybe backgrounded. + pub bg: Option, +} +implement_node!(JobPipeline, branch, job_pipeline); +implement_acceptor_for_branch!( + JobPipeline, + (time: (Option)), + (variables: (VariableAssignmentList)), + (statement: (Statement)), + (continuation: (JobContinuationList)), + (bg: (Option)), +); +impl ConcreteNode for JobPipeline { + fn as_job_pipeline(&self) -> Option<&JobPipeline> { + Some(self) + } +} +impl ConcreteNodeMut for JobPipeline { + fn as_mut_job_pipeline(&mut self) -> Option<&mut JobPipeline> { + Some(self) + } +} + +/// A job_conjunction is a job followed by a && or || continuations. +#[derive(Default, Debug)] +pub struct JobConjunction { + parent: Option<*const dyn Node>, + /// The job conjunction decorator. + pub decorator: Option, + /// The job itself. + pub job: JobPipeline, + /// The rest of the job conjunction, with && or ||s. + pub continuations: JobConjunctionContinuationList, + /// A terminating semicolon or newline. This is marked optional because it may not be + /// present, for example the command `echo foo` may not have a terminating newline. It will + /// only fail to be present if we ran out of tokens. + pub semi_nl: Option, +} +implement_node!(JobConjunction, branch, job_conjunction); +implement_acceptor_for_branch!( + JobConjunction, + (decorator: (Option)), + (job: (JobPipeline)), + (continuations: (JobConjunctionContinuationList)), + (semi_nl: (Option)), +); +impl ConcreteNode for JobConjunction { + fn as_job_conjunction(&self) -> Option<&JobConjunction> { + Some(self) + } +} +impl ConcreteNodeMut for JobConjunction { + fn as_mut_job_conjunction(&mut self) -> Option<&mut JobConjunction> { + Some(self) + } +} + +#[derive(Default, Debug)] +pub struct ForHeader { + parent: Option<*const dyn Node>, + /// 'for' + pub kw_for: KeywordFor, + /// var_name + pub var_name: String_, + /// 'in' + pub kw_in: KeywordIn, + /// list of arguments + pub args: ArgumentList, + /// newline or semicolon + pub semi_nl: SemiNl, +} +implement_node!(ForHeader, branch, for_header); +implement_acceptor_for_branch!( + ForHeader, + (kw_for: (KeywordFor)), + (var_name: (String_)), + (kw_in: (KeywordIn)), + (args: (ArgumentList)), + (semi_nl: (SemiNl)), +); +impl ConcreteNode for ForHeader { + fn as_for_header(&self) -> Option<&ForHeader> { + Some(self) + } +} +impl ConcreteNodeMut for ForHeader { + fn as_mut_for_header(&mut self) -> Option<&mut ForHeader> { + Some(self) + } +} + +#[derive(Default, Debug)] +pub struct WhileHeader { + parent: Option<*const dyn Node>, + /// 'while' + pub kw_while: KeywordWhile, + pub condition: JobConjunction, + pub andor_tail: AndorJobList, +} +implement_node!(WhileHeader, branch, while_header); +implement_acceptor_for_branch!( + WhileHeader, + (kw_while: (KeywordWhile)), + (condition: (JobConjunction)), + (andor_tail: (AndorJobList)), +); +impl ConcreteNode for WhileHeader { + fn as_while_header(&self) -> Option<&WhileHeader> { + Some(self) + } +} +impl ConcreteNodeMut for WhileHeader { + fn as_mut_while_header(&mut self) -> Option<&mut WhileHeader> { + Some(self) + } +} + +#[derive(Default, Debug)] +pub struct FunctionHeader { + parent: Option<*const dyn Node>, + pub kw_function: KeywordFunction, + /// functions require at least one argument. + pub first_arg: Argument, + pub args: ArgumentList, + pub semi_nl: SemiNl, +} +implement_node!(FunctionHeader, branch, function_header); +implement_acceptor_for_branch!( + FunctionHeader, + (kw_function: (KeywordFunction)), + (first_arg: (Argument)), + (args: (ArgumentList)), + (semi_nl: (SemiNl)), +); +impl ConcreteNode for FunctionHeader { + fn as_function_header(&self) -> Option<&FunctionHeader> { + Some(self) + } +} +impl ConcreteNodeMut for FunctionHeader { + fn as_mut_function_header(&mut self) -> Option<&mut FunctionHeader> { + Some(self) + } +} + +#[derive(Default, Debug)] +pub struct BeginHeader { + parent: Option<*const dyn Node>, + pub kw_begin: KeywordBegin, + /// Note that 'begin' does NOT require a semi or nl afterwards. + /// This is valid: begin echo hi; end + pub semi_nl: Option, +} +implement_node!(BeginHeader, branch, begin_header); +implement_acceptor_for_branch!( + BeginHeader, + (kw_begin: (KeywordBegin)), + (semi_nl: (Option)) +); +impl ConcreteNode for BeginHeader { + fn as_begin_header(&self) -> Option<&BeginHeader> { + Some(self) + } +} +impl ConcreteNodeMut for BeginHeader { + fn as_mut_begin_header(&mut self) -> Option<&mut BeginHeader> { + Some(self) + } +} + +#[derive(Default, Debug)] +pub struct BlockStatement { + parent: Option<*const dyn Node>, + /// A header like for, while, etc. + pub header: Box, + /// List of jobs in this block. + pub jobs: JobList, + /// The 'end' node. + pub end: KeywordEnd, + /// Arguments and redirections associated with the block. + pub args_or_redirs: ArgumentOrRedirectionList, +} +implement_node!(BlockStatement, branch, block_statement); +implement_acceptor_for_branch!( + BlockStatement, + (header: (Box)), + (jobs: (JobList)), + (end: (KeywordEnd)), + (args_or_redirs: (ArgumentOrRedirectionList)), +); +impl ConcreteNode for BlockStatement { + fn as_block_statement(&self) -> Option<&BlockStatement> { + Some(self) + } +} +impl ConcreteNodeMut for BlockStatement { + fn as_mut_block_statement(&mut self) -> Option<&mut BlockStatement> { + Some(self) + } +} + +#[derive(Default, Debug)] +pub struct IfClause { + parent: Option<*const dyn Node>, + /// The 'if' keyword. + pub kw_if: KeywordIf, + /// The 'if' condition. + pub condition: JobConjunction, + /// 'and/or' tail. + pub andor_tail: AndorJobList, + /// The body to execute if the condition is true. + pub body: JobList, +} +implement_node!(IfClause, branch, if_clause); +implement_acceptor_for_branch!( + IfClause, + (kw_if: (KeywordIf)), + (condition: (JobConjunction)), + (andor_tail: (AndorJobList)), + (body: (JobList)), +); +impl ConcreteNode for IfClause { + fn as_if_clause(&self) -> Option<&IfClause> { + Some(self) + } +} +impl ConcreteNodeMut for IfClause { + fn as_mut_if_clause(&mut self) -> Option<&mut IfClause> { + Some(self) + } +} + +#[derive(Default, Debug)] +pub struct ElseifClause { + parent: Option<*const dyn Node>, + /// The 'else' keyword. + pub kw_else: KeywordElse, + /// The 'if' clause following it. + pub if_clause: IfClause, +} +implement_node!(ElseifClause, branch, elseif_clause); +implement_acceptor_for_branch!( + ElseifClause, + (kw_else: (KeywordElse)), + (if_clause: (IfClause)), +); +impl ConcreteNode for ElseifClause { + fn as_elseif_clause(&self) -> Option<&ElseifClause> { + Some(self) + } +} +impl ConcreteNodeMut for ElseifClause { + fn as_mut_elseif_clause(&mut self) -> Option<&mut ElseifClause> { + Some(self) + } +} + +define_list_node!(ElseifClauseList, elseif_clause_list, ElseifClause); +impl ConcreteNode for ElseifClauseList { + fn as_elseif_clause_list(&self) -> Option<&ElseifClauseList> { + Some(self) + } +} +impl ConcreteNodeMut for ElseifClauseList { + fn as_mut_elseif_clause_list(&mut self) -> Option<&mut ElseifClauseList> { + Some(self) + } +} + +#[derive(Default, Debug)] +pub struct ElseClause { + parent: Option<*const dyn Node>, + /// else ; body + pub kw_else: KeywordElse, + pub semi_nl: SemiNl, + pub body: JobList, +} +implement_node!(ElseClause, branch, else_clause); +implement_acceptor_for_branch!( + ElseClause, + (kw_else: (KeywordElse)), + (semi_nl: (SemiNl)), + (body: (JobList)), +); +impl ConcreteNode for ElseClause { + fn as_else_clause(&self) -> Option<&ElseClause> { + Some(self) + } +} +impl ConcreteNodeMut for ElseClause { + fn as_mut_else_clause(&mut self) -> Option<&mut ElseClause> { + Some(self) + } +} + +#[derive(Default, Debug)] +pub struct IfStatement { + parent: Option<*const dyn Node>, + /// if part + pub if_clause: IfClause, + /// else if list + pub elseif_clauses: ElseifClauseList, + /// else part + pub else_clause: Option, + /// literal end + pub end: KeywordEnd, + /// block args / redirs + pub args_or_redirs: ArgumentOrRedirectionList, +} +implement_node!(IfStatement, branch, if_statement); +implement_acceptor_for_branch!( + IfStatement, + (if_clause: (IfClause)), + (elseif_clauses: (ElseifClauseList)), + (else_clause: (Option)), + (end: (KeywordEnd)), + (args_or_redirs: (ArgumentOrRedirectionList)), +); +impl ConcreteNode for IfStatement { + fn as_if_statement(&self) -> Option<&IfStatement> { + Some(self) + } +} +impl ConcreteNodeMut for IfStatement { + fn as_mut_if_statement(&mut self) -> Option<&mut IfStatement> { + Some(self) + } +} + +#[derive(Default, Debug)] +pub struct CaseItem { + parent: Option<*const dyn Node>, + /// case ; body + pub kw_case: KeywordCase, + pub arguments: ArgumentList, + pub semi_nl: SemiNl, + pub body: JobList, +} +implement_node!(CaseItem, branch, case_item); +implement_acceptor_for_branch!( + CaseItem, + (kw_case: (KeywordCase)), + (arguments: (ArgumentList)), + (semi_nl: (SemiNl)), + (body: (JobList)), +); +impl ConcreteNode for CaseItem { + fn as_case_item(&self) -> Option<&CaseItem> { + Some(self) + } +} +impl ConcreteNodeMut for CaseItem { + fn as_mut_case_item(&mut self) -> Option<&mut CaseItem> { + Some(self) + } +} + +#[derive(Default, Debug)] +pub struct SwitchStatement { + parent: Option<*const dyn Node>, + /// switch ; body ; end args_redirs + pub kw_switch: KeywordSwitch, + pub argument: Argument, + pub semi_nl: SemiNl, + pub cases: CaseItemList, + pub end: KeywordEnd, + pub args_or_redirs: ArgumentOrRedirectionList, +} +implement_node!(SwitchStatement, branch, switch_statement); +implement_acceptor_for_branch!( + SwitchStatement, + (kw_switch: (KeywordSwitch)), + (argument: (Argument)), + (semi_nl: (SemiNl)), + (cases: (CaseItemList)), + (end: (KeywordEnd)), + (args_or_redirs: (ArgumentOrRedirectionList)), +); +impl ConcreteNode for SwitchStatement { + fn as_switch_statement(&self) -> Option<&SwitchStatement> { + Some(self) + } +} +impl ConcreteNodeMut for SwitchStatement { + fn as_mut_switch_statement(&mut self) -> Option<&mut SwitchStatement> { + Some(self) + } +} + +/// A decorated_statement is a command with a list of arguments_or_redirections, possibly with +/// "builtin" or "command" or "exec" +#[derive(Default, Debug)] +pub struct DecoratedStatement { + parent: Option<*const dyn Node>, + /// An optional decoration (command, builtin, exec, etc). + pub opt_decoration: Option, + /// Command to run. + pub command: String_, + /// Args and redirs + pub args_or_redirs: ArgumentOrRedirectionList, +} +implement_node!(DecoratedStatement, branch, decorated_statement); +implement_acceptor_for_branch!( + DecoratedStatement, + (opt_decoration: (Option)), + (command: (String_)), + (args_or_redirs: (ArgumentOrRedirectionList)), +); +impl ConcreteNode for DecoratedStatement { + fn as_decorated_statement(&self) -> Option<&DecoratedStatement> { + Some(self) + } +} +impl ConcreteNodeMut for DecoratedStatement { + fn as_mut_decorated_statement(&mut self) -> Option<&mut DecoratedStatement> { + Some(self) + } +} + +/// A not statement like `not true` or `! true` +#[derive(Default, Debug)] +pub struct NotStatement { + parent: Option<*const dyn Node>, + /// Keyword, either not or exclam. + pub kw: KeywordNot, + pub variables: VariableAssignmentList, + pub time: Option, + pub contents: Statement, +} +implement_node!(NotStatement, branch, not_statement); +implement_acceptor_for_branch!( + NotStatement, + (kw: (KeywordNot)), + (variables: (VariableAssignmentList)), + (time: (Option)), + (contents: (Statement)), +); +impl ConcreteNode for NotStatement { + fn as_not_statement(&self) -> Option<&NotStatement> { + Some(self) + } +} +impl ConcreteNodeMut for NotStatement { + fn as_mut_not_statement(&mut self) -> Option<&mut NotStatement> { + Some(self) + } +} + +#[derive(Default, Debug)] +pub struct JobContinuation { + parent: Option<*const dyn Node>, + pub pipe: TokenPipe, + pub newlines: MaybeNewlines, + pub variables: VariableAssignmentList, + pub statement: Statement, +} +implement_node!(JobContinuation, branch, job_continuation); +implement_acceptor_for_branch!( + JobContinuation, + (pipe: (TokenPipe)), + (newlines: (MaybeNewlines)), + (variables: (VariableAssignmentList)), + (statement: (Statement)), +); +impl ConcreteNode for JobContinuation { + fn as_job_continuation(&self) -> Option<&JobContinuation> { + Some(self) + } +} +impl ConcreteNodeMut for JobContinuation { + fn as_mut_job_continuation(&mut self) -> Option<&mut JobContinuation> { + Some(self) + } +} + +define_list_node!(JobContinuationList, job_continuation_list, JobContinuation); +impl ConcreteNode for JobContinuationList { + fn as_job_continuation_list(&self) -> Option<&JobContinuationList> { + Some(self) + } +} +impl ConcreteNodeMut for JobContinuationList { + fn as_mut_job_continuation_list(&mut self) -> Option<&mut JobContinuationList> { + Some(self) + } +} + +#[derive(Default, Debug)] +pub struct JobConjunctionContinuation { + parent: Option<*const dyn Node>, + /// The && or || token. + pub conjunction: TokenConjunction, + pub newlines: MaybeNewlines, + /// The job itself. + pub job: JobPipeline, +} +implement_node!( + JobConjunctionContinuation, + branch, + job_conjunction_continuation +); +implement_acceptor_for_branch!( + JobConjunctionContinuation, + (conjunction: (TokenConjunction)), + (newlines: (MaybeNewlines)), + (job: (JobPipeline)), +); +impl ConcreteNode for JobConjunctionContinuation { + fn as_job_conjunction_continuation(&self) -> Option<&JobConjunctionContinuation> { + Some(self) + } +} +impl ConcreteNodeMut for JobConjunctionContinuation { + fn as_mut_job_conjunction_continuation(&mut self) -> Option<&mut JobConjunctionContinuation> { + Some(self) + } +} + +/// An andor_job just wraps a job, but requires that the job have an 'and' or 'or' job_decorator. +/// Note this is only used for andor_job_list; jobs that are not part of an andor_job_list are not +/// instances of this. +#[derive(Default, Debug)] +pub struct AndorJob { + parent: Option<*const dyn Node>, + pub job: JobConjunction, +} +implement_node!(AndorJob, branch, andor_job); +implement_acceptor_for_branch!(AndorJob, (job: (JobConjunction))); +impl ConcreteNode for AndorJob { + fn as_andor_job(&self) -> Option<&AndorJob> { + Some(self) + } +} +impl ConcreteNodeMut for AndorJob { + fn as_mut_andor_job(&mut self) -> Option<&mut AndorJob> { + Some(self) + } +} + +define_list_node!(AndorJobList, andor_job_list, AndorJob); +impl ConcreteNode for AndorJobList { + fn as_andor_job_list(&self) -> Option<&AndorJobList> { + Some(self) + } +} +impl ConcreteNodeMut for AndorJobList { + fn as_mut_andor_job_list(&mut self) -> Option<&mut AndorJobList> { + Some(self) + } +} + +/// A freestanding_argument_list is equivalent to a normal argument list, except it may contain +/// TOK_END (newlines, and even semicolons, for historical reasons). +/// In practice the tok_ends are ignored by fish code so we do not bother to store them. +#[derive(Default, Debug)] +pub struct FreestandingArgumentList { + parent: Option<*const dyn Node>, + pub arguments: ArgumentList, +} +implement_node!(FreestandingArgumentList, branch, freestanding_argument_list); +implement_acceptor_for_branch!(FreestandingArgumentList, (arguments: (ArgumentList))); +impl ConcreteNode for FreestandingArgumentList { + fn as_freestanding_argument_list(&self) -> Option<&FreestandingArgumentList> { + Some(self) + } +} +impl ConcreteNodeMut for FreestandingArgumentList { + fn as_mut_freestanding_argument_list(&mut self) -> Option<&mut FreestandingArgumentList> { + Some(self) + } +} + +define_list_node!( + JobConjunctionContinuationList, + job_conjunction_continuation_list, + JobConjunctionContinuation +); +impl ConcreteNode for JobConjunctionContinuationList { + fn as_job_conjunction_continuation_list(&self) -> Option<&JobConjunctionContinuationList> { + Some(self) + } +} +impl ConcreteNodeMut for JobConjunctionContinuationList { + fn as_mut_job_conjunction_continuation_list( + &mut self, + ) -> Option<&mut JobConjunctionContinuationList> { + Some(self) + } +} + +define_list_node!(ArgumentList, argument_list, Argument); +impl ConcreteNode for ArgumentList { + fn as_argument_list(&self) -> Option<&ArgumentList> { + Some(self) + } +} +impl ConcreteNodeMut for ArgumentList { + fn as_mut_argument_list(&mut self) -> Option<&mut ArgumentList> { + Some(self) + } +} + +// For historical reasons, a job list is a list of job *conjunctions*. This should be fixed. +define_list_node!(JobList, job_list, JobConjunction); +impl ConcreteNode for JobList { + fn as_job_list(&self) -> Option<&JobList> { + Some(self) + } +} +impl ConcreteNodeMut for JobList { + fn as_mut_job_list(&mut self) -> Option<&mut JobList> { + Some(self) + } +} + +define_list_node!(CaseItemList, case_item_list, CaseItem); +impl ConcreteNode for CaseItemList { + fn as_case_item_list(&self) -> Option<&CaseItemList> { + Some(self) + } +} +impl ConcreteNodeMut for CaseItemList { + fn as_mut_case_item_list(&mut self) -> Option<&mut CaseItemList> { + Some(self) + } +} + +/// A variable_assignment contains a source range like FOO=bar. +#[derive(Default, Debug)] +pub struct VariableAssignment { + parent: Option<*const dyn Node>, + range: Option, +} +implement_node!(VariableAssignment, leaf, variable_assignment); +implement_leaf!(VariableAssignment); +impl ConcreteNode for VariableAssignment { + fn as_leaf(&self) -> Option<&dyn Leaf> { + Some(self) + } + fn as_variable_assignment(&self) -> Option<&VariableAssignment> { + Some(self) + } +} +impl ConcreteNodeMut for VariableAssignment { + fn as_mut_variable_assignment(&mut self) -> Option<&mut VariableAssignment> { + Some(self) + } +} + +/// Zero or more newlines. +#[derive(Default, Debug)] +pub struct MaybeNewlines { + parent: Option<*const dyn Node>, + range: Option, +} +implement_node!(MaybeNewlines, leaf, maybe_newlines); +implement_leaf!(MaybeNewlines); +impl ConcreteNode for MaybeNewlines { + fn as_leaf(&self) -> Option<&dyn Leaf> { + Some(self) + } + fn as_maybe_newlines(&self) -> Option<&MaybeNewlines> { + Some(self) + } +} +impl ConcreteNodeMut for MaybeNewlines { + fn as_mut_leaf(&mut self) -> Option<&mut dyn Leaf> { + Some(self) + } + fn as_mut_maybe_newlines(&mut self) -> Option<&mut MaybeNewlines> { + Some(self) + } +} + +/// An argument is just a node whose source range determines its contents. +/// This is a separate type because it is sometimes useful to find all arguments. +#[derive(Default, Debug)] +pub struct Argument { + parent: Option<*const dyn Node>, + range: Option, +} +implement_node!(Argument, leaf, argument); +implement_leaf!(Argument); +impl ConcreteNode for Argument { + fn as_leaf(&self) -> Option<&dyn Leaf> { + Some(self) + } + fn as_argument(&self) -> Option<&Argument> { + Some(self) + } +} +impl ConcreteNodeMut for Argument { + fn as_mut_leaf(&mut self) -> Option<&mut dyn Leaf> { + Some(self) + } + fn as_mut_argument(&mut self) -> Option<&mut Argument> { + Some(self) + } +} + +define_token_node!(SemiNl, ParseTokenType::end); +define_token_node!(String_, ParseTokenType::string); +define_token_node!(TokenBackground, ParseTokenType::background); +#[rustfmt::skip] +define_token_node!(TokenConjunction, ParseTokenType::andand, ParseTokenType::oror); +define_token_node!(TokenPipe, ParseTokenType::pipe); +define_token_node!(TokenRedirection, ParseTokenType::redirection); + +#[rustfmt::skip] +define_keyword_node!(DecoratedStatementDecorator, ParseKeyword::kw_command, ParseKeyword::kw_builtin, ParseKeyword::kw_exec); +#[rustfmt::skip] +define_keyword_node!(JobConjunctionDecorator, ParseKeyword::kw_and, ParseKeyword::kw_or); +#[rustfmt::skip] +define_keyword_node!(KeywordBegin, ParseKeyword::kw_begin); +define_keyword_node!(KeywordCase, ParseKeyword::kw_case); +define_keyword_node!(KeywordElse, ParseKeyword::kw_else); +define_keyword_node!(KeywordEnd, ParseKeyword::kw_end); +define_keyword_node!(KeywordFor, ParseKeyword::kw_for); +define_keyword_node!(KeywordFunction, ParseKeyword::kw_function); +define_keyword_node!(KeywordIf, ParseKeyword::kw_if); +define_keyword_node!(KeywordIn, ParseKeyword::kw_in); +#[rustfmt::skip] +define_keyword_node!(KeywordNot, ParseKeyword::kw_not, ParseKeyword::kw_builtin, ParseKeyword::kw_exclam); +define_keyword_node!(KeywordSwitch, ParseKeyword::kw_switch); +define_keyword_node!(KeywordTime, ParseKeyword::kw_time); +define_keyword_node!(KeywordWhile, ParseKeyword::kw_while); + +impl DecoratedStatement { + /// \return the decoration for this statement. + fn decoration(&self) -> StatementDecoration { + let Some(decorator) = &self.opt_decoration else { + return StatementDecoration::none; + }; + let decorator: &dyn Keyword = decorator; + match decorator.keyword() { + ParseKeyword::kw_command => StatementDecoration::command, + ParseKeyword::kw_builtin => StatementDecoration::builtin, + ParseKeyword::kw_exec => StatementDecoration::exec, + _ => panic!("Unexpected keyword in statement decoration"), + } + } +} + +#[derive(Debug)] +pub enum ArgumentOrRedirectionVariant { + Argument(Argument), + Redirection(Redirection), +} + +impl Default for ArgumentOrRedirectionVariant { + fn default() -> Self { + ArgumentOrRedirectionVariant::Argument(Argument::default()) + } +} + +impl Acceptor for ArgumentOrRedirectionVariant { + fn accept<'a>(&'a self, visitor: &mut dyn NodeVisitor<'a>, reversed: bool) { + match self { + ArgumentOrRedirectionVariant::Argument(child) => child.accept(visitor, reversed), + ArgumentOrRedirectionVariant::Redirection(child) => child.accept(visitor, reversed), + } + } +} +impl AcceptorMut for ArgumentOrRedirectionVariant { + fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool) { + match self { + ArgumentOrRedirectionVariant::Argument(child) => child.accept_mut(visitor, reversed), + ArgumentOrRedirectionVariant::Redirection(child) => child.accept_mut(visitor, reversed), + } + } +} + +impl ArgumentOrRedirectionVariant { + fn embedded_node(&self) -> &dyn NodeMut { + match self { + ArgumentOrRedirectionVariant::Argument(node) => node, + ArgumentOrRedirectionVariant::Redirection(node) => node, + } + } + fn as_mut_argument(&mut self) -> &mut Argument { + match self { + ArgumentOrRedirectionVariant::Argument(node) => node, + _ => panic!(), + } + } + fn as_mut_redirection(&mut self) -> &mut Redirection { + match self { + ArgumentOrRedirectionVariant::Redirection(redirection) => redirection, + _ => panic!(), + } + } +} + +impl ArgumentOrRedirection { + /// \return whether this represents an argument. + pub fn is_argument(&self) -> bool { + matches!(*self.contents, ArgumentOrRedirectionVariant::Argument(_)) + } + + /// \return whether this represents a redirection + pub fn is_redirection(&self) -> bool { + matches!(*self.contents, ArgumentOrRedirectionVariant::Redirection(_)) + } + + /// \return this as an argument, assuming it wraps one. + pub fn argument(&self) -> &Argument { + match *self.contents { + ArgumentOrRedirectionVariant::Argument(ref arg) => arg, + _ => panic!("Is not an argument"), + } + } + + /// \return this as an argument, assuming it wraps one. + pub fn redirection(&self) -> &Redirection { + match *self.contents { + ArgumentOrRedirectionVariant::Redirection(ref arg) => arg, + _ => panic!("Is not a redirection"), + } + } +} + +#[derive(Debug)] +pub enum StatementVariant { + None, + NotStatement(NotStatement), + BlockStatement(BlockStatement), + IfStatement(IfStatement), + SwitchStatement(SwitchStatement), + DecoratedStatement(DecoratedStatement), +} + +impl Default for StatementVariant { + fn default() -> Self { + StatementVariant::None + } +} + +impl Acceptor for StatementVariant { + fn accept<'a>(&'a self, visitor: &mut dyn NodeVisitor<'a>, reversed: bool) { + match self { + StatementVariant::None => panic!("cannot visit null statement"), + StatementVariant::NotStatement(node) => node.accept(visitor, reversed), + StatementVariant::BlockStatement(node) => node.accept(visitor, reversed), + StatementVariant::IfStatement(node) => node.accept(visitor, reversed), + StatementVariant::SwitchStatement(node) => node.accept(visitor, reversed), + StatementVariant::DecoratedStatement(node) => node.accept(visitor, reversed), + } + } +} +impl AcceptorMut for StatementVariant { + fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool) { + match self { + StatementVariant::None => panic!("cannot visit null statement"), + StatementVariant::NotStatement(node) => node.accept_mut(visitor, reversed), + StatementVariant::BlockStatement(node) => node.accept_mut(visitor, reversed), + StatementVariant::IfStatement(node) => node.accept_mut(visitor, reversed), + StatementVariant::SwitchStatement(node) => node.accept_mut(visitor, reversed), + StatementVariant::DecoratedStatement(node) => node.accept_mut(visitor, reversed), + } + } +} + +impl StatementVariant { + fn embedded_node(&self) -> &dyn NodeMut { + match self { + StatementVariant::None => panic!("cannot visit null statement"), + StatementVariant::NotStatement(node) => node, + StatementVariant::BlockStatement(node) => node, + StatementVariant::IfStatement(node) => node, + StatementVariant::SwitchStatement(node) => node, + StatementVariant::DecoratedStatement(node) => node, + } + } + fn as_mut_not_statement(&mut self) -> &mut NotStatement { + match self { + StatementVariant::NotStatement(node) => node, + _ => panic!(), + } + } + fn as_mut_block_statement(&mut self) -> &mut BlockStatement { + match self { + StatementVariant::BlockStatement(node) => node, + _ => panic!(), + } + } + fn as_mut_if_statement(&mut self) -> &mut IfStatement { + match self { + StatementVariant::IfStatement(node) => node, + _ => panic!(), + } + } + fn as_mut_switch_statement(&mut self) -> &mut SwitchStatement { + match self { + StatementVariant::SwitchStatement(node) => node, + _ => panic!(), + } + } + fn as_mut_decorated_statement(&mut self) -> &mut DecoratedStatement { + match self { + StatementVariant::DecoratedStatement(node) => node, + _ => panic!(), + } + } +} + +#[derive(Debug)] +pub enum BlockStatementHeaderVariant { + None, + ForHeader(ForHeader), + WhileHeader(WhileHeader), + FunctionHeader(FunctionHeader), + BeginHeader(BeginHeader), +} + +impl Default for BlockStatementHeaderVariant { + fn default() -> Self { + BlockStatementHeaderVariant::None + } +} + +impl Acceptor for BlockStatementHeaderVariant { + fn accept<'a>(&'a self, visitor: &mut dyn NodeVisitor<'a>, reversed: bool) { + match self { + BlockStatementHeaderVariant::None => panic!("cannot visit null block header"), + BlockStatementHeaderVariant::ForHeader(node) => node.accept(visitor, reversed), + BlockStatementHeaderVariant::WhileHeader(node) => node.accept(visitor, reversed), + BlockStatementHeaderVariant::FunctionHeader(node) => node.accept(visitor, reversed), + BlockStatementHeaderVariant::BeginHeader(node) => node.accept(visitor, reversed), + } + } +} +impl AcceptorMut for BlockStatementHeaderVariant { + fn accept_mut(&mut self, visitor: &mut dyn NodeVisitorMut, reversed: bool) { + match self { + BlockStatementHeaderVariant::None => panic!("cannot visit null block header"), + BlockStatementHeaderVariant::ForHeader(node) => node.accept_mut(visitor, reversed), + BlockStatementHeaderVariant::WhileHeader(node) => node.accept_mut(visitor, reversed), + BlockStatementHeaderVariant::FunctionHeader(node) => node.accept_mut(visitor, reversed), + BlockStatementHeaderVariant::BeginHeader(node) => node.accept_mut(visitor, reversed), + } + } +} + +impl BlockStatementHeaderVariant { + fn embedded_node(&self) -> &dyn NodeMut { + match self { + BlockStatementHeaderVariant::None => panic!("cannot visit null block header"), + BlockStatementHeaderVariant::ForHeader(node) => node, + BlockStatementHeaderVariant::WhileHeader(node) => node, + BlockStatementHeaderVariant::FunctionHeader(node) => node, + BlockStatementHeaderVariant::BeginHeader(node) => node, + } + } + fn as_mut_for_header(&mut self) -> &mut ForHeader { + match self { + BlockStatementHeaderVariant::ForHeader(node) => node, + _ => panic!(), + } + } + fn as_mut_while_header(&mut self) -> &mut WhileHeader { + match self { + BlockStatementHeaderVariant::WhileHeader(node) => node, + _ => panic!(), + } + } + fn as_mut_function_header(&mut self) -> &mut FunctionHeader { + match self { + BlockStatementHeaderVariant::FunctionHeader(node) => node, + _ => panic!(), + } + } + fn as_mut_begin_header(&mut self) -> &mut BeginHeader { + match self { + BlockStatementHeaderVariant::BeginHeader(node) => node, + _ => panic!(), + } + } +} + +/// \return a string literal name for an ast type. +#[widestrs] +pub fn ast_type_to_string(t: Type) -> &'static wstr { + match t { + Type::token_base => "token_base"L, + Type::keyword_base => "keyword_base"L, + Type::redirection => "redirection"L, + Type::variable_assignment => "variable_assignment"L, + Type::variable_assignment_list => "variable_assignment_list"L, + Type::argument_or_redirection => "argument_or_redirection"L, + Type::argument_or_redirection_list => "argument_or_redirection_list"L, + Type::statement => "statement"L, + Type::job_pipeline => "job_pipeline"L, + Type::job_conjunction => "job_conjunction"L, + Type::for_header => "for_header"L, + Type::while_header => "while_header"L, + Type::function_header => "function_header"L, + Type::begin_header => "begin_header"L, + Type::block_statement => "block_statement"L, + Type::if_clause => "if_clause"L, + Type::elseif_clause => "elseif_clause"L, + Type::elseif_clause_list => "elseif_clause_list"L, + Type::else_clause => "else_clause"L, + Type::if_statement => "if_statement"L, + Type::case_item => "case_item"L, + Type::switch_statement => "switch_statement"L, + Type::decorated_statement => "decorated_statement"L, + Type::not_statement => "not_statement"L, + Type::job_continuation => "job_continuation"L, + Type::job_continuation_list => "job_continuation_list"L, + Type::job_conjunction_continuation => "job_conjunction_continuation"L, + Type::andor_job => "andor_job"L, + Type::andor_job_list => "andor_job_list"L, + Type::freestanding_argument_list => "freestanding_argument_list"L, + Type::token_conjunction => "token_conjunction"L, + Type::job_conjunction_continuation_list => "job_conjunction_continuation_list"L, + Type::maybe_newlines => "maybe_newlines"L, + Type::token_pipe => "token_pipe"L, + Type::case_item_list => "case_item_list"L, + Type::argument => "argument"L, + Type::argument_list => "argument_list"L, + Type::job_list => "job_list"L, + _ => panic!("unknown AST type"), + } +} + +// A way to visit nodes iteratively. +// This is pre-order. Each node is visited before its children. +// Example: +// let tv = Traversal::new(start); +// while let Some(node) = tv.next() {...} +pub struct Traversal<'a> { + stack: Vec<&'a dyn Node>, +} + +impl<'a> Traversal<'a> { + // Construct starting with a node + pub fn new(n: &'a dyn Node) -> Self { + Self { stack: vec![n] } + } +} + +impl<'a> Iterator for Traversal<'a> { + type Item = &'a dyn Node; + fn next(&mut self) -> Option<&'a dyn Node> { + let Some(node) = self.stack.pop() else { + return None; + }; + // We want to visit in reverse order so the first child ends up on top of the stack. + node.accept(self, true /* reverse */); + Some(node) + } +} + +impl<'a, 'v: 'a> NodeVisitor<'v> for Traversal<'a> { + fn visit(&mut self, node: &'a dyn Node) { + self.stack.push(node) + } +} + +fn ast_type_to_string_ffi(typ: Type) -> wcharz_t { + wcharz!(ast_type_to_string(typ)) +} + +pub type SourceRangeList = Vec; + +/// Extra source ranges. +/// These are only generated if the corresponding flags are set. +#[derive(Default)] +pub struct Extras { + /// Set of comments, sorted by offset. + pub comments: SourceRangeList, + + /// Set of semicolons, sorted by offset. + pub semis: SourceRangeList, + + /// Set of error ranges, sorted by offset. + pub errors: SourceRangeList, +} + +/// The ast type itself. +pub struct Ast { + // The top node. + // Its type depends on what was requested to parse. + top: Box, + /// Whether any errors were encountered during parsing. + pub any_error: bool, + /// Extra fields. + pub extras: Extras, +} + +#[allow(clippy::derivable_impls)] // false positive +impl Default for Ast { + fn default() -> Ast { + Self { + top: Box::::default(), + any_error: false, + extras: Extras::default(), + } + } +} + +impl Ast { + /// Construct an ast by parsing \p src as a job list. + /// The ast attempts to produce \p type as the result. + /// \p type may only be JobList or FreestandingArgumentList. + pub fn parse( + src: &wstr, + flags: ParseTreeFlags, + out_errors: &mut Option, + ) -> Self { + parse_from_top(src, flags, out_errors, Type::job_list) + } + /// Like parse(), but constructs a freestanding_argument_list. + pub fn parse_argument_list( + src: &wstr, + flags: ParseTreeFlags, + out_errors: &mut Option, + ) -> Self { + parse_from_top(src, flags, out_errors, Type::freestanding_argument_list) + } + /// \return a traversal, allowing iteration over the nodes. + pub fn walk(&'_ self) -> Traversal<'_> { + Traversal::new(self.top.as_node()) + } + /// \return the top node. This has the type requested in the 'parse' method. + pub fn top(&self) -> &dyn Node { + self.top.as_node() + } + fn top_mut(&mut self) -> &mut dyn NodeMut { + &mut *self.top + } + /// \return whether any errors were encountered during parsing. + pub fn errored(&self) -> bool { + self.any_error + } + /// \return a textual representation of the tree. + /// Pass the original source as \p orig. + #[widestrs] + fn dump(&self, orig: &wstr) -> WString { + let mut result = WString::new(); + + let mut tv = self.walk(); + while let Some(node) = tv.next() { + let depth = get_depth(node); + // dot-| padding + result += &wstr::repeat("! "L, depth)[..]; + + if let Some(n) = node.as_argument() { + result += "argument"L; + if let Some(argsrc) = n.try_source(orig) { + result += &sprintf!(": '%ls'"L, argsrc)[..]; + } + } else if let Some(n) = node.as_keyword() { + result += &sprintf!("keyword: %ls"L, Into::<&'static wstr>::into(n.keyword()))[..]; + } else if let Some(n) = node.as_token() { + let desc = match n.token_type() { + ParseTokenType::string => { + let mut desc = "string"L.to_owned(); + if let Some(strsource) = n.try_source(orig) { + desc += &sprintf!(": '%ls'"L, strsource)[..]; + } + desc + } + ParseTokenType::redirection => { + let mut desc = "redirection"L.to_owned(); + if let Some(strsource) = n.try_source(orig) { + desc += &sprintf!(": '%ls'"L, strsource)[..]; + } + desc + } + ParseTokenType::end => "<;>"L.to_owned(), + ParseTokenType::invalid => { + // This may occur with errors, e.g. we expected to see a string but saw a + // redirection. + ""L.to_owned() + } + _ => { + token_type_user_presentable_description(n.token_type(), ParseKeyword::none) + } + }; + result += &desc[..]; + } else { + result += &node.describe()[..]; + } + result += "\n"L; + } + result + } +} + +// \return the depth of a node, i.e. number of parent links. +fn get_depth(node: &dyn Node) -> usize { + let mut result = 0; + let mut cursor = node; + loop { + cursor = match cursor.parent() { + Some(parent) => parent, + None => return result, + }; + result += 1; + } +} + +struct SourceRangeVisitor { + /// Total range we have encountered. + total: SourceRange, + /// Whether any node was found to be unsourced. + any_unsourced: bool, +} + +impl<'a> NodeVisitor<'a> for SourceRangeVisitor { + fn visit(&mut self, node: &'a dyn Node) { + match node.category() { + Category::leaf => match node.as_leaf().unwrap().range() { + None => self.any_unsourced = true, + // Union with our range. + Some(range) if range.length > 0 => { + if self.total.length == 0 { + self.total = range; + } else { + let end = + (self.total.start + self.total.length).max(range.start + range.length); + self.total.start = self.total.start.min(range.start); + self.total.length = end - self.total.start; + } + } + _ => (), + }, + _ => { + // Other node types recurse. + node.accept(self, false); + } + } + } +} + +/// A token stream generates a sequence of parser tokens, permitting arbitrary lookahead. +struct TokenStream<'a> { + // We implement a queue with a simple circular buffer. + // Note that peek() returns an address, so we must not move elements which are peek'd. + // This prevents using vector (which may reallocate). + // Deque would work but is too heavyweight for just 2 items. + lookahead: [ParseToken; TokenStream::MAX_LOOKAHEAD], + + // Starting index in our lookahead. + // The "first" token is at this index. + start: usize, + + // Number of items in our lookahead. + count: usize, + + // A reference to the original source. + src: &'a wstr, + + // The tokenizer to generate new tokens. + tok: Tokenizer, + + /// Any comment nodes are collected here. + /// These are only collected if parse_flag_include_comments is set. + comment_ranges: SourceRangeList, +} + +impl<'a> TokenStream<'a> { + // The maximum number of lookahead supported. + const MAX_LOOKAHEAD: usize = 2; + + fn new(src: &'a wstr, flags: ParseTreeFlags) -> Self { + Self { + lookahead: [ParseToken::new(ParseTokenType::invalid); Self::MAX_LOOKAHEAD], + start: 0, + count: 0, + src, + tok: Tokenizer::new(src, TokFlags::from(flags)), + comment_ranges: vec![], + } + } + + /// \return the token at the given index, without popping it. If the token stream is exhausted, + /// it will have parse_token_type_t::terminate. idx = 0 means the next token, idx = 1 means the + /// next-next token, and so forth. + /// We must have that idx < kMaxLookahead. + fn peek(&mut self, idx: usize) -> &ParseToken { + assert!(idx < Self::MAX_LOOKAHEAD, "Trying to look too far ahead"); + while idx >= self.count { + self.lookahead[Self::mask(self.start + self.count)] = self.next_from_tok(); + self.count += 1 + } + &self.lookahead[Self::mask(self.start + idx)] + } + + /// Pop the next token. + fn pop(&mut self) -> ParseToken { + if self.count == 0 { + return self.next_from_tok(); + } + let result = self.lookahead[self.start]; + self.start = Self::mask(self.start + 1); + self.count -= 1; + result + } + + // Helper to mask our circular buffer. + fn mask(idx: usize) -> usize { + idx % Self::MAX_LOOKAHEAD + } + + /// \return the next parse token from the tokenizer. + /// This consumes and stores comments. + fn next_from_tok(&mut self) -> ParseToken { + loop { + let res = self.advance_1(); + if res.typ == ParseTokenType::comment { + self.comment_ranges.push(res.range()); + continue; + } + return res; + } + } + + /// \return a new parse token, advancing the tokenizer. + /// This returns comments. + fn advance_1(&mut self) -> ParseToken { + let Some(token) = self.tok.next() else { + return ParseToken::new(ParseTokenType::terminate); + }; + // Set the type, keyword, and whether there's a dash prefix. Note that this is quite + // sketchy, because it ignores quotes. This is the historical behavior. For example, + // `builtin --names` lists builtins, but `builtin "--names"` attempts to run --names as a + // command. Amazingly as of this writing (10/12/13) nobody seems to have noticed this. + // Squint at it really hard and it even starts to look like a feature. + let mut result = ParseToken::new(ParseTokenType::from(token.type_)); + let text = self.tok.text_of(&token); + result.keyword = keyword_for_token(token.type_, text); + result.has_dash_prefix = text.starts_with('-'); + result.is_help_argument = [L!("-h"), L!("--help")].contains(&text); + result.is_newline = result.typ == ParseTokenType::end && text == L!("\n"); + result.may_be_variable_assignment = variable_assignment_equals_pos(text).is_some(); + result.tok_error = token.error; + + assert!(token.offset < SOURCE_OFFSET_INVALID); + result.source_start = token.offset; + result.source_length = token.length; + + if token.error != TokenizerError::none { + let subtoken_offset = token.error_offset_within_token; + // Skip invalid tokens that have a zero length, especially if they are at EOF. + if subtoken_offset < result.source_length { + result.source_start += subtoken_offset; + result.source_length = token.error_length; + } + } + + result + } +} + +/// This indicates a bug in fish code. +macro_rules! internal_error { + ( + $self:ident, + $func:ident, + $fmt:expr + $(, $args:expr)* + $(,)? + ) => { + FLOG!( + debug, + concat!( + "Internal parse error from {$func} - this indicates a bug in fish.", + $fmt, + ) + $(, $args)* + ); + FLOG!(debug, "Encountered while parsing:<<<<\n{}\n>>>", $self.tokens.src); + panic!(); + }; +} + +/// Report an error based on \p fmt for the tokens' range +macro_rules! parse_error { + ( + $self:ident, + $token:expr, + $code:expr, + $fmt:expr + $(, $args:expr)* + $(,)? + ) => { + let range = $token.range(); + parse_error_range!($self, range, $code, $fmt $(, $args)*); + } +} + +/// Report an error based on \p fmt for the source range \p range. +macro_rules! parse_error_range { + ( + $self:ident, + $range:expr, + $code:expr, + $fmt:expr + $(, $args:expr)* + $(,)? + ) => { + let text = if $self.out_errors.is_some() && !$self.unwinding { + Some(wgettext_fmt!($fmt $(, $args)*)) + } else { + None + }; + $self.any_error = true; + + // Ignore additional parse errors while unwinding. + // These may come about e.g. from `true | and`. + if !$self.unwinding { + $self.unwinding = true; + + FLOG!(ast_construction, "%*sparse error - begin unwinding", $self.spaces(), ""); + // TODO: can store this conditionally dependent on flags. + if $range.start != SOURCE_OFFSET_INVALID { + $self.errors.push($range); + } + + if let Some(errors) = &mut $self.out_errors { + let mut err = ParseError::default(); + err.text = text.unwrap(); + err.code = $code; + err.source_start = $range.start as usize; + err.source_length = $range.length as usize; + errors.0.push(err); + } + } + } +} + +struct Populator<'a> { + /// Flags controlling parsing. + flags: ParseTreeFlags, + + /// Set of semicolons, sorted by offset. + semis: SourceRangeList, + + /// Set of error ranges, sorted by offset. + errors: SourceRangeList, + + /// Stream of tokens which we consume. + tokens: TokenStream<'a>, + + /** The type which we are attempting to parse, typically job_list but may be + freestanding_argument_list. */ + top_type: Type, + + /// If set, we are unwinding due to error recovery. + unwinding: bool, + + /// If set, we have encountered an error. + any_error: bool, + + /// The number of parent links of the node we are visiting + depth: usize, + + // If non-null, populate with errors. + out_errors: &'a mut Option, +} + +impl<'s> NodeVisitorMut for Populator<'s> { + fn visit_mut(&mut self, node: &mut dyn NodeMut) -> VisitResult { + match node.typ() { + Type::argument => { + self.visit_argument(node.as_mut_argument().unwrap()); + return VisitResult::Continue(()); + } + Type::variable_assignment => { + self.visit_variable_assignment(node.as_mut_variable_assignment().unwrap()); + return VisitResult::Continue(()); + } + Type::job_continuation => { + self.visit_job_continuation(node.as_mut_job_continuation().unwrap()); + return VisitResult::Continue(()); + } + Type::token_base => { + self.visit_token(node.as_mut_token().unwrap()); + return VisitResult::Continue(()); + } + Type::keyword_base => { + return self.visit_keyword(node.as_mut_keyword().unwrap()); + } + Type::maybe_newlines => { + self.visit_maybe_newlines(node.as_mut_maybe_newlines().unwrap()); + return VisitResult::Continue(()); + } + + _ => (), + } + + match node.category() { + Category::leaf => {} + // Visit branch nodes by just calling accept() to visit their fields. + Category::branch => { + // This field is a direct embedding of an AST value. + node.accept_mut(self, false); + return VisitResult::Continue(()); + } + Category::list => { + // This field is an embedding of an array of (pointers to) ContentsNode. + // Parse as many as we can. + match node.typ() { + Type::andor_job_list => self.populate_list::( + node.as_mut_andor_job_list().unwrap(), + false, + ), + Type::argument_list => self + .populate_list::(node.as_mut_argument_list().unwrap(), false), + Type::argument_or_redirection_list => self + .populate_list::( + node.as_mut_argument_or_redirection_list().unwrap(), + false, + ), + Type::case_item_list => self.populate_list::( + node.as_mut_case_item_list().unwrap(), + false, + ), + Type::elseif_clause_list => self.populate_list::( + node.as_mut_elseif_clause_list().unwrap(), + false, + ), + Type::job_conjunction_continuation_list => self + .populate_list::( + node.as_mut_job_conjunction_continuation_list().unwrap(), + false, + ), + Type::job_continuation_list => self.populate_list::( + node.as_mut_job_continuation_list().unwrap(), + false, + ), + Type::job_list => { + self.populate_list::(node.as_mut_job_list().unwrap(), false) + } + Type::variable_assignment_list => self.populate_list::( + node.as_mut_variable_assignment_list().unwrap(), + false, + ), + _ => (), + } + } + _ => panic!(), + } + VisitResult::Continue(()) + } + + fn will_visit_fields_of(&mut self, node: &mut dyn NodeMut) { + FLOG!( + ast_construction, + "%*swill_visit %ls %p", + self.spaces(), + "", + node.describe() + ); + self.depth += 1 + } + + #[widestrs] + fn did_visit_fields_of<'a>(&'a mut self, node: &'a dyn NodeMut, flow: VisitResult) { + self.depth -= 1; + + if self.unwinding { + return; + } + let VisitResult::Break(error) = flow else { return; }; + + /// We believe the node is some sort of block statement. Attempt to find a source range + /// for the block's keyword (for, if, etc) and a user-presentable description. This + /// is used to provide better error messages. Note at this point the parse tree is + /// incomplete; in particular parent nodes are not set. + let mut cursor = node; + let header = loop { + match cursor.typ() { + Type::block_statement => { + cursor = cursor.as_block_statement().unwrap().header.embedded_node(); + } + Type::for_header => { + let n = cursor.as_for_header().unwrap(); + break Some((n.kw_for.range.unwrap(), "for loop"L)); + } + Type::while_header => { + let n = cursor.as_while_header().unwrap(); + break Some((n.kw_while.range.unwrap(), "while loop"L)); + } + Type::function_header => { + let n = cursor.as_function_header().unwrap(); + break Some((n.kw_function.range.unwrap(), "function definition"L)); + } + Type::begin_header => { + let n = cursor.as_begin_header().unwrap(); + break Some((n.kw_begin.range.unwrap(), "begin"L)); + } + Type::if_statement => { + let n = cursor.as_if_statement().unwrap(); + break Some((n.if_clause.kw_if.range.unwrap(), "if statement"L)); + } + Type::switch_statement => { + let n = cursor.as_switch_statement().unwrap(); + break Some((n.kw_switch.range.unwrap(), "switch statement"L)); + } + _ => break None, + } + }; + + if let Some((header_kw_range, enclosing_stmt)) = header { + parse_error_range!( + self, + header_kw_range, + ParseErrorCode::generic, + "Missing end to balance this %ls", + enclosing_stmt + ); + } else { + parse_error!( + self, + error.token, + ParseErrorCode::generic, + "Expected %ls, but found %ls", + keywords_user_presentable_description(error.allowed_keywords), + error.token.user_presentable_description(), + ); + } + } + + // We currently only have a handful of union pointer types. + // Handle them directly. + fn visit_argument_or_redirection( + &mut self, + node: &mut Box, + ) -> VisitResult { + if let Some(arg) = self.try_parse::() { + **node = ArgumentOrRedirectionVariant::Argument(*arg); + } else if let Some(redir) = self.try_parse::() { + **node = ArgumentOrRedirectionVariant::Redirection(*redir); + } else { + internal_error!( + self, + visit_argument_or_redirection, + "Unable to parse argument or redirection" + ); + } + VisitResult::Continue(()) + } + fn visit_block_statement_header( + &mut self, + node: &mut Box, + ) -> VisitResult { + *node = self.allocate_populate_block_header(); + VisitResult::Continue(()) + } + fn visit_statement(&mut self, node: &mut Box) -> VisitResult { + *node = self.allocate_populate_statement_contents(); + VisitResult::Continue(()) + } + + fn visit_decorated_statement_decorator( + &mut self, + node: &mut Option, + ) { + *node = self.try_parse::().map(|b| *b); + } + fn visit_job_conjunction_decorator(&mut self, node: &mut Option) { + *node = self.try_parse::().map(|b| *b); + } + fn visit_else_clause(&mut self, node: &mut Option) { + *node = self.try_parse::().map(|b| *b); + } + fn visit_semi_nl(&mut self, node: &mut Option) { + *node = self.try_parse::().map(|b| *b); + } + fn visit_time(&mut self, node: &mut Option) { + *node = self.try_parse::().map(|b| *b); + } + fn visit_token_background(&mut self, node: &mut Option) { + *node = self.try_parse::().map(|b| *b); + } +} + +/// Helper to describe a list of keywords. +/// TODO: these need to be localized properly. +#[widestrs] +fn keywords_user_presentable_description(kws: &'static [ParseKeyword]) -> WString { + assert!(!kws.is_empty(), "Should not be empty list"); + if kws.len() == 1 { + return sprintf!("keyword '%ls'"L, kws[0]); + } + let mut res = "keywords "L.to_owned(); + for (i, kw) in kws.iter().enumerate() { + if i != 0 { + res += " or "L; + } + res += &sprintf!("'%ls'"L, *kw)[..]; + } + res +} + +/// Helper to describe a list of token types. +/// TODO: these need to be localized properly. +#[widestrs] +fn token_types_user_presentable_description(types: &'static [ParseTokenType]) -> WString { + assert!(!types.is_empty(), "Should not be empty list"); + let mut res = WString::new(); + for typ in types { + if !res.is_empty() { + res += " or "L; + } + res += &token_type_user_presentable_description(*typ, ParseKeyword::none)[..]; + } + res +} + +impl<'s> Populator<'s> { + /// Construct from a source, flags, top type, and out_errors, which may be null. + fn new( + src: &'s wstr, + flags: ParseTreeFlags, + top_type: Type, + out_errors: &'s mut Option, + ) -> Self { + Self { + flags, + semis: vec![], + errors: vec![], + tokens: TokenStream::new(src, flags), + top_type, + unwinding: false, + any_error: false, + depth: 0, + out_errors, + } + } + + /// Helper for FLOGF. This returns a number of spaces appropriate for a '%*c' format. + fn spaces(&self) -> usize { + self.depth * 2 + } + + /// \return the parser's status. + fn status(&mut self) -> ParserStatus { + if self.unwinding { + ParserStatus::unwinding + } else if self.flags & PARSE_FLAG_LEAVE_UNTERMINATED + && self.peek_type(0) == ParseTokenType::terminate + { + ParserStatus::unsourcing + } else { + ParserStatus::ok + } + } + + /// \return whether any leaf nodes we visit should be marked as unsourced. + fn unsource_leaves(&mut self) -> bool { + matches!( + self.status(), + ParserStatus::unsourcing | ParserStatus::unwinding + ) + } + + /// \return whether we permit an incomplete parse tree. + fn allow_incomplete(&self) -> bool { + self.flags & PARSE_FLAG_LEAVE_UNTERMINATED + } + + /// \return whether a list type \p type allows arbitrary newlines in it. + fn list_type_chomps_newlines(&self, typ: Type) -> bool { + match typ { + Type::argument_list => { + // Hackish. If we are producing a freestanding argument list, then it allows + // semicolons, for hysterical raisins. + self.top_type == Type::freestanding_argument_list + } + Type::argument_or_redirection_list => { + // No newlines inside arguments. + false + } + Type::variable_assignment_list => { + // No newlines inside variable assignment lists. + false + } + Type::job_list => { + // Like echo a \n \n echo b + true + } + Type::case_item_list => { + // Like switch foo \n \n \n case a \n end + true + } + Type::andor_job_list => { + // Like while true ; \n \n and true ; end + true + } + Type::elseif_clause_list => { + // Like if true ; \n \n else if false; end + true + } + Type::job_conjunction_continuation_list => { + // This would be like echo a && echo b \n && echo c + // We could conceivably support this but do not now. + false + } + Type::job_continuation_list => { + // This would be like echo a \n | echo b + // We could conceivably support this but do not now. + false + } + _ => { + internal_error!( + self, + list_type_chomps_newlines, + "Type %ls not handled", + ast_type_to_string(typ) + ); + } + } + } + + /// \return whether a list type \p type allows arbitrary semicolons in it. + fn list_type_chomps_semis(&self, typ: Type) -> bool { + match typ { + Type::argument_list => { + // Hackish. If we are producing a freestanding argument list, then it allows + // semicolons, for hysterical raisins. + // That is, this is OK: complete -c foo -a 'x ; y ; z' + // But this is not: foo x ; y ; z + self.top_type == Type::freestanding_argument_list + } + + Type::argument_or_redirection_list | Type::variable_assignment_list => false, + Type::job_list => { + // Like echo a ; ; echo b + true + } + Type::case_item_list => { + // Like switch foo ; ; ; case a \n end + // This is historically allowed. + true + } + Type::andor_job_list => { + // Like while true ; ; ; and true ; end + true + } + Type::elseif_clause_list => { + // Like if true ; ; ; else if false; end + false + } + Type::job_conjunction_continuation_list => { + // Like echo a ; ; && echo b. Not supported. + false + } + Type::job_continuation_list => { + // This would be like echo a ; | echo b + // Not supported. + // We could conceivably support this but do not now. + false + } + _ => { + internal_error!( + self, + list_type_chomps_semis, + "Type %ls not handled", + ast_type_to_string(typ) + ); + } + } + } + + /// Chomp extra comments, semicolons, etc. for a given list type. + fn chomp_extras(&mut self, typ: Type) { + let chomp_semis = self.list_type_chomps_semis(typ); + let chomp_newlines = self.list_type_chomps_newlines(typ); + loop { + let peek = self.tokens.peek(0); + if chomp_newlines && peek.typ == ParseTokenType::end && peek.is_newline { + // Just skip this newline, no need to save it. + self.tokens.pop(); + } else if chomp_semis && peek.typ == ParseTokenType::end && !peek.is_newline { + let tok = self.tokens.pop(); + // Perhaps save this extra semi. + if self.flags & PARSE_FLAG_SHOW_EXTRA_SEMIS { + self.semis.push(tok.range()); + } + } else { + break; + } + } + } + + /// \return whether a list type should recover from errors.s + /// That is, whether we should stop unwinding when we encounter this type. + fn list_type_stops_unwind(&self, typ: Type) -> bool { + typ == Type::job_list && self.flags & PARSE_FLAG_CONTINUE_AFTER_ERROR + } + + /// \return a reference to a non-comment token at index \p idx. + fn peek_token(&mut self, idx: usize) -> &ParseToken { + self.tokens.peek(idx) + } + + /// \return the type of a non-comment token. + fn peek_type(&mut self, idx: usize) -> ParseTokenType { + self.peek_token(idx).typ + } + + /// Consume the next token, chomping any comments. + /// It is an error to call this unless we know there is a non-terminate token available. + /// \return the token. + fn consume_any_token(&mut self) -> ParseToken { + let tok = self.tokens.pop(); + assert!( + tok.typ != ParseTokenType::comment, + "Should not be a comment" + ); + assert!( + tok.typ != ParseTokenType::terminate, + "Cannot consume terminate token, caller should check status first" + ); + tok + } + + /// Consume the next token which is expected to be of the given type. + fn consume_token_type(&mut self, typ: ParseTokenType) -> SourceRange { + assert!( + typ != ParseTokenType::terminate, + "Should not attempt to consume terminate token" + ); + let tok = self.consume_any_token(); + if tok.typ != typ { + parse_error!( + self, + tok, + ParseErrorCode::generic, + "Expected %ls, but found %ls", + token_type_user_presentable_description(typ, ParseKeyword::none), + tok.user_presentable_description() + ); + return SourceRange::new(0, 0); + } + tok.range() + } + + /// The next token could not be parsed at the top level. + /// For example a trailing end like `begin ; end ; end` + /// Or an unexpected redirection like `>` + /// Consume it and add an error. + fn consume_excess_token_generating_error(&mut self) { + let tok = self.consume_any_token(); + + // In the rare case that we are parsing a freestanding argument list and not a job list, + // generate a generic error. + // TODO: this is a crummy message if we get a tokenizer error, for example: + // complete -c foo -a "'abc" + if self.top_type == Type::freestanding_argument_list { + parse_error!( + self, + tok, + ParseErrorCode::generic, + "Expected %ls, but found %ls", + token_type_user_presentable_description(ParseTokenType::string, ParseKeyword::none), + tok.user_presentable_description() + ); + return; + } + + assert!(self.top_type == Type::job_list); + match tok.typ { + ParseTokenType::string => { + // There are three keywords which end a job list. + match tok.keyword { + ParseKeyword::kw_end => { + parse_error!( + self, + tok, + ParseErrorCode::unbalancing_end, + "'end' outside of a block" + ); + } + ParseKeyword::kw_else => { + parse_error!( + self, + tok, + ParseErrorCode::unbalancing_else, + "'else' builtin not inside of if block" + ); + } + ParseKeyword::kw_case => { + parse_error!( + self, + tok, + ParseErrorCode::unbalancing_case, + "'case' builtin not inside of switch block" + ); + } + _ => { + internal_error!( + self, + consume_excess_token_generating_error, + "Token %ls should not have prevented parsing a job list", + tok.user_presentable_description() + ); + } + } + } + ParseTokenType::pipe + | ParseTokenType::redirection + | ParseTokenType::background + | ParseTokenType::andand + | ParseTokenType::oror => { + parse_error!( + self, + tok, + ParseErrorCode::generic, + "Expected a string, but found %ls", + tok.user_presentable_description() + ); + } + ParseTokenType::tokenizer_error => { + parse_error!( + self, + tok, + ParseErrorCode::from(tok.tok_error), + "%ls", + tok.tok_error + ); + } + ParseTokenType::end => { + internal_error!( + self, + consume_excess_token_generating_error, + "End token should never be excess" + ); + } + ParseTokenType::terminate => { + internal_error!( + self, + consume_excess_token_generating_error, + "Terminate token should never be excess" + ); + } + _ => { + internal_error!( + self, + consume_excess_token_generating_error, + "Unexpected excess token type: %ls", + tok.user_presentable_description() + ); + } + } + } + + /// This is for optional values and for lists. + /// A true return means we should descend into the production, false means stop. + /// Note that the argument is always nullptr and should be ignored. It is provided strictly + /// for overloading purposes. + fn can_parse(&mut self, node: &dyn Node) -> bool { + match node.typ() { + Type::job_conjunction => { + let token = self.peek_token(0); + if token.typ != ParseTokenType::string { + return false; + } + !matches!( + token.keyword, + // These end a job list. + ParseKeyword::kw_end | ParseKeyword::kw_else | ParseKeyword::kw_case + ) + } + Type::argument => self.peek_type(0) == ParseTokenType::string, + Type::redirection => self.peek_type(0) == ParseTokenType::redirection, + Type::argument_or_redirection => { + [ParseTokenType::string, ParseTokenType::redirection].contains(&self.peek_type(0)) + } + Type::variable_assignment => { + // Do we have a variable assignment at all? + if !self.peek_token(0).may_be_variable_assignment { + return false; + } + // What is the token after it? + match self.peek_type(1) { + ParseTokenType::string => { + // We have `a= cmd` and should treat it as a variable assignment. + true + } + ParseTokenType::terminate => { + // We have `a=` which is OK if we are allowing incomplete, an error + // otherwise. + self.allow_incomplete() + } + _ => { + // We have e.g. `a= >` which is an error. + // Note that we do not produce an error here. Instead we return false + // so this the token will be seen by allocate_populate_statement_contents. + false + } + } + } + Type::token_base => node + .as_token() + .unwrap() + .allows_token(self.peek_token(0).typ), + + // Note we have specific overloads for our keyword nodes, as they need custom logic. + Type::keyword_base => { + let keyword = node.as_keyword().unwrap(); + match keyword.allowed_keywords() { + // job conjunction decorator + [ParseKeyword::kw_and, ParseKeyword::kw_or] => { + // This is for a job conjunction like `and stuff` + // But if it's `and --help` then we treat it as an ordinary command. + keyword.allows_keyword(self.peek_token(0).keyword) + && !self.peek_token(1).is_help_argument + } + // decorated statement decorator + [ParseKeyword::kw_command, ParseKeyword::kw_builtin, ParseKeyword::kw_exec] => { + // Here the keyword is 'command' or 'builtin' or 'exec'. + // `command stuff` executes a command called stuff. + // `command -n` passes the -n argument to the 'command' builtin. + // `command` by itself is a command. + if !keyword.allows_keyword(self.peek_token(0).keyword) { + return false; + } + let tok1 = self.peek_token(1); + tok1.typ == ParseTokenType::string && !tok1.is_dash_prefix_string() + } + [ParseKeyword::kw_time] => { + // Time keyword is only the time builtin if the next argument doesn't + // have a dash. + keyword.allows_keyword(self.peek_token(0).keyword) + && !self.peek_token(1).is_dash_prefix_string() + } + _ => panic!("Unexpected keyword in can_parse()"), + } + } + Type::job_continuation => self.peek_type(0) == ParseTokenType::pipe, + Type::job_conjunction_continuation => { + [ParseTokenType::andand, ParseTokenType::oror].contains(&self.peek_type(0)) + } + Type::andor_job => { + match self.peek_token(0).keyword { + ParseKeyword::kw_and | ParseKeyword::kw_or => { + // Check that the argument to and/or is a string that's not help. Otherwise + // it's either 'and --help' or a naked 'and', and not part of this list. + let nexttok = self.peek_token(1); + nexttok.typ == ParseTokenType::string && !nexttok.is_help_argument + } + _ => false, + } + } + Type::elseif_clause => { + self.peek_token(0).keyword == ParseKeyword::kw_else + && self.peek_token(1).keyword == ParseKeyword::kw_if + } + Type::else_clause => self.peek_token(0).keyword == ParseKeyword::kw_else, + Type::case_item => self.peek_token(0).keyword == ParseKeyword::kw_case, + _ => panic!("Unexpected token type in can_parse()"), + } + } + + /// Given that we are a list of type ListNodeType, whose contents type is ContentsNode, + /// populate as many elements as we can. + /// If exhaust_stream is set, then keep going until we get parse_token_type_t::terminate. + fn populate_list(&mut self, list: &mut ListType, exhaust_stream: bool) + where + ::ContentsNode: NodeMut, + { + assert!(list.contents().is_empty(), "List is not initially empty"); + + // Do not attempt to parse a list if we are unwinding. + if self.unwinding { + assert!( + !exhaust_stream, + "exhaust_stream should only be set at top level, and so we should not be unwinding" + ); + // Mark in the list that it was unwound. + FLOG!( + ast_construction, + "%*sunwinding %ls", + self.spaces(), + "", + ast_type_to_string(list.typ()) + ); + assert!(list.contents().is_empty(), "Should be an empty list"); + return; + } + + // We're going to populate a vector with our nodes. + // Later on we will copy this to the heap with a single allocation. + let mut contents = vec![]; + + loop { + // If we are unwinding, then either we recover or we break the loop, dependent on the + // loop type. + if self.unwinding { + if !self.list_type_stops_unwind(list.typ()) { + break; + } + // We are going to stop unwinding. + // Rather hackish. Just chomp until we get to a string or end node. + loop { + let typ = self.peek_type(0); + if [ + ParseTokenType::string, + ParseTokenType::terminate, + ParseTokenType::end, + ] + .contains(&typ) + { + break; + } + let tok = self.tokens.pop(); + self.errors.push(tok.range()); + FLOG!( + ast_construction, + "%*schomping range %u-%u", + self.spaces(), + "", + tok.source_start, + tok.source_length + ); + } + FLOG!(ast_construction, "%*sdone unwinding", self.spaces(), ""); + self.unwinding = false; + } + + // Chomp semis and newlines. + self.chomp_extras(list.typ()); + + // Now try parsing a node. + if let Some(node) = self.try_parse::() { + // #7201: Minimize reallocations of contents vector + if contents.is_empty() { + contents.reserve(64); + } + contents.push(node); + } else if exhaust_stream && self.peek_type(0) != ParseTokenType::terminate { + // We aren't allowed to stop. Produce an error and keep going. + self.consume_excess_token_generating_error() + } else { + // We either stop once we can't parse any more of this contents node, or we + // exhausted the stream as requested. + break; + } + } + + // Populate our list from our contents. + if !contents.is_empty() { + assert!( + contents.len() <= u32::MAX.try_into().unwrap(), + "Contents size out of bounds" + ); + assert!(list.contents().is_empty(), "List should still be empty"); + *list.contents_mut() = contents; + } + + FLOG!( + ast_construction, + "%*s%ls size: %lu", + self.spaces(), + "", + ast_type_to_string(list.typ()), + list.count() + ); + } + + /// Allocate and populate a statement contents pointer. + /// This must never return null. + fn allocate_populate_statement_contents(&mut self) -> Box { + // In case we get a parse error, we still need to return something non-null. Use a + // decorated statement; all of its leaf nodes will end up unsourced. + fn got_error(slf: &mut Populator<'_>) -> Box { + assert!(slf.unwinding, "Should have produced an error"); + new_decorated_statement(slf) + } + + fn new_decorated_statement(slf: &mut Populator<'_>) -> Box { + let embedded = slf.allocate_visit::(); + Box::new(StatementVariant::DecoratedStatement(*embedded)) + } + + if self.peek_token(0).typ == ParseTokenType::terminate && self.allow_incomplete() { + // This may happen if we just have a 'time' prefix. + // Construct a decorated statement, which will be unsourced. + self.allocate_visit::(); + } else if self.peek_token(0).typ != ParseTokenType::string { + // We may be unwinding already; do not produce another error. + // For example in `true | and`. + parse_error!( + self, + self.peek_token(0), + ParseErrorCode::generic, + "Expected a command, but found %ls", + self.peek_token(0).user_presentable_description() + ); + return got_error(self); + } else if self.peek_token(0).may_be_variable_assignment { + // Here we have a variable assignment which we chose to not parse as a variable + // assignment because there was no string after it. + // Ensure we consume the token, so we don't get back here again at the same place. + parse_error!( + self, + self.consume_any_token(), + ParseErrorCode::bare_variable_assignment, + "" + ); + return got_error(self); + } + + // The only block-like builtin that takes any parameters is 'function'. So go to decorated + // statements if the subsequent token looks like '--'. The logic here is subtle: + // + // If we are 'begin', then we expect to be invoked with no arguments. + // If we are 'function', then we are a non-block if we are invoked with -h or --help + // If we are anything else, we require an argument, so do the same thing if the subsequent + // token is a statement terminator. + if self.peek_token(0).typ == ParseTokenType::string { + // If we are a function, then look for help arguments. Otherwise, if the next token + // looks like an option (starts with a dash), then parse it as a decorated statement. + if (self.peek_token(0).keyword == ParseKeyword::kw_function + && self.peek_token(1).is_help_argument) + || (self.peek_token(0).keyword != ParseKeyword::kw_function + && self.peek_token(1).has_dash_prefix) + { + return new_decorated_statement(self); + } + + // Likewise if the next token doesn't look like an argument at all. This corresponds to + // e.g. a "naked if". + let naked_invocation_invokes_help = ![ParseKeyword::kw_begin, ParseKeyword::kw_end] + .contains(&self.peek_token(0).keyword); + if naked_invocation_invokes_help + && [ParseTokenType::end, ParseTokenType::terminate] + .contains(&self.peek_token(1).typ) + { + return new_decorated_statement(self); + } + } + + match self.peek_token(0).keyword { + ParseKeyword::kw_not | ParseKeyword::kw_exclam => { + let embedded = self.allocate_visit::(); + Box::new(StatementVariant::NotStatement(*embedded)) + } + ParseKeyword::kw_for + | ParseKeyword::kw_while + | ParseKeyword::kw_function + | ParseKeyword::kw_begin => { + let embedded = self.allocate_visit::(); + Box::new(StatementVariant::BlockStatement(*embedded)) + } + ParseKeyword::kw_if => { + let embedded = self.allocate_visit::(); + Box::new(StatementVariant::IfStatement(*embedded)) + } + ParseKeyword::kw_switch => { + let embedded = self.allocate_visit::(); + Box::new(StatementVariant::SwitchStatement(*embedded)) + } + ParseKeyword::kw_end => { + // 'end' is forbidden as a command. + // For example, `if end` or `while end` will produce this error. + // We still have to descend into the decorated statement because + // we can't leave our pointer as null. + parse_error!( + self, + self.peek_token(0), + ParseErrorCode::generic, + "Expected a command, but found %ls", + self.peek_token(0).user_presentable_description() + ); + return got_error(self); + } + _ => new_decorated_statement(self), + } + } + + /// Allocate and populate a block statement header. + /// This must never return null. + fn allocate_populate_block_header(&mut self) -> Box { + Box::new(match self.peek_token(0).keyword { + ParseKeyword::kw_for => { + let embedded = self.allocate_visit::(); + BlockStatementHeaderVariant::ForHeader(*embedded) + } + ParseKeyword::kw_while => { + let embedded = self.allocate_visit::(); + BlockStatementHeaderVariant::WhileHeader(*embedded) + } + ParseKeyword::kw_function => { + let embedded = self.allocate_visit::(); + BlockStatementHeaderVariant::FunctionHeader(*embedded) + } + ParseKeyword::kw_begin => { + let embedded = self.allocate_visit::(); + BlockStatementHeaderVariant::BeginHeader(*embedded) + } + _ => { + internal_error!( + self, + allocate_populate_block_header, + "should not have descended into block_header" + ); + } + }) + } + + fn try_parse(&mut self) -> Option> { + // TODO Optimize this. + let prototype = T::default(); + if !self.can_parse(&prototype) { + return None; + } + Some(self.allocate_visit()) + } + + /// Given a node type, allocate it and invoke its default constructor. + /// \return the resulting Node + fn allocate(&self) -> Box { + let result = Box::::default(); + FLOG!( + ast_construction, + "%*smake %ls %p", + self.spaces(), + "", + ast_type_to_string(result.typ()), + format!("{result:p}") + ); + result + } + + // Given a node type, allocate it, invoke its default constructor, + // and then visit it as a field. + // \return the resulting Node pointer. It is never null. + fn allocate_visit(&mut self) -> Box { + let mut result = Box::::default(); + self.visit_mut(&mut *result); + result + } + + fn visit_argument(&mut self, arg: &mut Argument) { + if self.unsource_leaves() { + arg.range = None; + return; + } + arg.range = Some(self.consume_token_type(ParseTokenType::string)); + } + + fn visit_variable_assignment(&mut self, varas: &mut VariableAssignment) { + if self.unsource_leaves() { + varas.range = None; + return; + } + if !self.peek_token(0).may_be_variable_assignment { + internal_error!( + self, + visit_variable_assignment, + "Should not have created variable_assignment_t from this token" + ); + } + varas.range = Some(self.consume_token_type(ParseTokenType::string)); + } + + fn visit_job_continuation(&mut self, node: &mut JobContinuation) { + // Special error handling to catch 'and' and 'or' in pipelines, like `true | and false`. + if [ParseKeyword::kw_and, ParseKeyword::kw_or].contains(&self.peek_token(1).keyword) { + parse_error!( + self, + self.peek_token(1), + ParseErrorCode::andor_in_pipeline, + INVALID_PIPELINE_CMD_ERR_MSG, + self.peek_token(1).keyword + ); + } + node.accept_mut(self, false); + } + + // Overload for token fields. + fn visit_token(&mut self, token: &mut dyn Token) { + if self.unsource_leaves() { + *token.range_mut() = None; + return; + } + + if !token.allows_token(self.peek_token(0).typ) { + if self.flags & PARSE_FLAG_LEAVE_UNTERMINATED + && [ + TokenizerError::unterminated_quote, + TokenizerError::unterminated_subshell, + ] + .contains(&self.peek_token(0).tok_error) + { + return; + } + + parse_error!( + self, + self.peek_token(0), + ParseErrorCode::generic, + "Expected %ls, but found %ls", + token_types_user_presentable_description(token.allowed_tokens()), + self.peek_token(0).user_presentable_description() + ); + *token.range_mut() = None; + return; + } + let tok = self.consume_any_token(); + *token.token_type_mut() = tok.typ; + *token.range_mut() = Some(tok.range()); + } + + // Overload for keyword fields. + fn visit_keyword(&mut self, keyword: &mut dyn Keyword) -> VisitResult { + if self.unsource_leaves() { + *keyword.range_mut() = None; + return VisitResult::Continue(()); + } + + if !keyword.allows_keyword(self.peek_token(0).keyword) { + *keyword.range_mut() = None; + + if self.flags & PARSE_FLAG_LEAVE_UNTERMINATED + && [ + TokenizerError::unterminated_quote, + TokenizerError::unterminated_subshell, + ] + .contains(&self.peek_token(0).tok_error) + { + return VisitResult::Continue(()); + } + + // Special error reporting for keyword_t. + let allowed_keywords = keyword.allowed_keywords(); + if keyword.allowed_keywords() == [ParseKeyword::kw_end] { + return VisitResult::Break(MissingEndError { + allowed_keywords, + token: *self.peek_token(0), + }); + } else { + parse_error!( + self, + self.peek_token(0), + ParseErrorCode::generic, + "Expected %ls, but found %ls", + keywords_user_presentable_description(allowed_keywords), + self.peek_token(0).user_presentable_description(), + ); + return VisitResult::Continue(()); + } + } + let tok = self.consume_any_token(); + *keyword.keyword_mut() = tok.keyword; + *keyword.range_mut() = Some(tok.range()); + VisitResult::Continue(()) + } + + fn visit_maybe_newlines(&mut self, nls: &mut MaybeNewlines) { + if self.unsource_leaves() { + nls.range = None; + return; + } + let mut range = SourceRange::new(0, 0); + // TODO: it would be nice to have the start offset be the current position in the token + // stream, even if there are no newlines. + while self.peek_token(0).is_newline { + let r = self.consume_token_type(ParseTokenType::end); + if range.length == 0 { + range = r; + } else { + range.length = r.start + r.length - range.start + } + } + nls.range = Some(range); + } +} + +/// The status of our parser. +enum ParserStatus { + /// Parsing is going just fine, thanks for asking. + ok, + + /// We have exhausted the token stream, but the caller was OK with an incomplete parse tree. + /// All further leaf nodes should have the unsourced flag set. + unsourcing, + + /// We encountered an parse error and are "unwinding." + /// Do not consume any tokens until we get back to a list type which stops unwinding. + unwinding, +} + +fn parse_from_top( + src: &wstr, + flags: ParseTreeFlags, + out_errors: &mut Option, + top_type: Type, +) -> Ast { + assert!( + [Type::job_list, Type::freestanding_argument_list].contains(&top_type), + "Invalid top type" + ); + + let mut ast = Ast::default(); + + let mut pops = Populator::new(src, flags, top_type, out_errors); + if top_type == Type::job_list { + let mut list = pops.allocate::(); + pops.populate_list(&mut *list, true /* exhaust_stream */); + ast.top = list; + } else { + let mut list = pops.allocate::(); + pops.populate_list(&mut list.arguments, true /* exhaust_stream */); + ast.top = list; + } + + // Chomp trailing extras, etc. + pops.chomp_extras(Type::job_list); + + ast.any_error = pops.any_error; + ast.extras = Extras { + comments: pops.tokens.comment_ranges, + semis: pops.semis, + errors: pops.errors, + }; + + if top_type == Type::job_list { + // Set all parent nodes. + // It turns out to be more convenient to do this after the parse phase. + ast.top_mut() + .as_mut_job_list() + .as_mut() + .unwrap() + .set_parents(); + } else { + ast.top_mut() + .as_mut_freestanding_argument_list() + .as_mut() + .unwrap() + .set_parents(); + } + + ast +} + +/// \return tokenizer flags corresponding to parse tree flags. +impl From for TokFlags { + fn from(flags: ParseTreeFlags) -> Self { + let mut tok_flags = TokFlags(0); + // Note we do not need to respect parse_flag_show_blank_lines, no clients are interested + // in them. + if flags & PARSE_FLAG_INCLUDE_COMMENTS { + tok_flags |= TOK_SHOW_COMMENTS; + } + if flags & PARSE_FLAG_ACCEPT_INCOMPLETE_TOKENS { + tok_flags |= TOK_ACCEPT_UNFINISHED; + } + if flags & PARSE_FLAG_CONTINUE_AFTER_ERROR { + tok_flags |= TOK_CONTINUE_AFTER_ERROR + } + tok_flags + } +} + +/// Convert from Tokenizer's token type to a parse_token_t type. +impl From for ParseTokenType { + fn from(token_type: TokenType) -> Self { + match token_type { + TokenType::string => ParseTokenType::string, + TokenType::pipe => ParseTokenType::pipe, + TokenType::andand => ParseTokenType::andand, + TokenType::oror => ParseTokenType::oror, + TokenType::end => ParseTokenType::end, + TokenType::background => ParseTokenType::background, + TokenType::redirect => ParseTokenType::redirection, + TokenType::error => ParseTokenType::tokenizer_error, + TokenType::comment => ParseTokenType::comment, + _ => panic!("bad token type"), + } + } +} + +fn is_keyword_char(c: char) -> bool { + ('a'..='z').contains(&c) + || ('A'..='Z').contains(&c) + || ('0'..='9').contains(&c) + || c == '\'' + || c == '"' + || c == '\\' + || c == '\n' + || c == '!' +} + +/// Given a token, returns the keyword it matches, or ParseKeyword::none. +fn keyword_for_token(tok: TokenType, token: &wstr) -> ParseKeyword { + /* Only strings can be keywords */ + if tok != TokenType::string { + return ParseKeyword::none; + } + + // If token is clean (which most are), we can compare it directly. Otherwise we have to expand + // it. We only expand quotes, and we don't want to do expensive expansions like tilde + // expansions. So we do our own "cleanliness" check; if we find a character not in our allowed + // set we know it's not a keyword, and if we never find a quote we don't have to expand! Note + // that this lowercase set could be shrunk to be just the characters that are in keywords. + let mut result = ParseKeyword::none; + let mut needs_expand = false; + let mut all_chars_valid = true; + for c in token.chars() { + if !is_keyword_char(c) { + all_chars_valid = false; + break; + } + // If we encounter a quote, we need expansion. + needs_expand = needs_expand || c == '"' || c == '\'' || c == '\\' + } + + if all_chars_valid { + // Expand if necessary. + if !needs_expand { + result = ParseKeyword::from(token); + } else if let Some(unescaped) = unescape_string(token, UnescapeStringStyle::default()) { + result = ParseKeyword::from(&unescaped[..]); + } + } + result +} + +use crate::ffi_tests::add_test; +add_test!("test_ast_parse", || { + use crate::parse_constants::PARSE_FLAG_NONE; + let src = L!("echo"); + let ast = Ast::parse(src, PARSE_FLAG_NONE, &mut None); + assert!(!ast.any_error); +}); + +pub use ast_ffi::{Category, Type}; + +#[cxx::bridge] +#[allow(clippy::needless_lifetimes)] // false positive +pub mod ast_ffi { + extern "C++" { + include!("wutil.h"); + include!("tokenizer.h"); + include!("parse_constants.h"); + type wcharz_t = crate::wchar_ffi::wcharz_t; + type ParseTokenType = crate::parse_constants::ParseTokenType; + type ParseKeyword = crate::parse_constants::ParseKeyword; + type SourceRange = crate::parse_constants::SourceRange; + type ParseErrorList = crate::parse_constants::ParseErrorList; + type StatementDecoration = crate::parse_constants::StatementDecoration; + } + + #[derive(Debug)] + pub enum Category { + branch, + leaf, + list, + } + + #[derive(Debug)] + pub enum Type { + token_base, + keyword_base, + redirection, + variable_assignment, + variable_assignment_list, + argument_or_redirection, + argument_or_redirection_list, + statement, + job_pipeline, + job_conjunction, + for_header, + while_header, + function_header, + begin_header, + block_statement, + if_clause, + elseif_clause, + elseif_clause_list, + else_clause, + if_statement, + case_item, + switch_statement, + decorated_statement, + not_statement, + job_continuation, + job_continuation_list, + job_conjunction_continuation, + andor_job, + andor_job_list, + freestanding_argument_list, + token_conjunction, + job_conjunction_continuation_list, + maybe_newlines, + token_pipe, + case_item_list, + argument, + argument_list, + job_list, + } + extern "Rust" { + type Ast; + type NodeFfi<'a>; + type ExtrasFFI<'a>; + unsafe fn ast_parse_ffi( + src: &CxxWString, + flags: u8, + errors: *mut ParseErrorList, + ) -> Box; + unsafe fn ast_parse_argument_list_ffi( + src: &CxxWString, + flags: u8, + errors: *mut ParseErrorList, + ) -> Box; + unsafe fn errored(self: &Ast) -> bool; + #[cxx_name = "top"] + unsafe fn top_ffi(self: &Ast) -> Box>; + + #[cxx_name = "parent"] + unsafe fn parent_ffi<'a>(self: &'a NodeFfi<'a>) -> Box>; + + #[cxx_name = "dump"] + unsafe fn dump_ffi(self: &Ast, orig: &CxxWString) -> UniquePtr; + #[cxx_name = "extras"] + fn extras_ffi(self: &Ast) -> Box>; + unsafe fn comments<'a>(self: &'a ExtrasFFI<'a>) -> &'a [SourceRange]; + unsafe fn semis<'a>(self: &'a ExtrasFFI<'a>) -> &'a [SourceRange]; + unsafe fn errors<'a>(self: &'a ExtrasFFI<'a>) -> &'a [SourceRange]; + #[cxx_name = "ast_type_to_string"] + fn ast_type_to_string_ffi(typ: Type) -> wcharz_t; + type Traversal<'a>; + unsafe fn new_ast_traversal<'a>(root: &'a NodeFfi<'a>) -> Box>; + #[cxx_name = "next"] + unsafe fn next_ffi<'a>(self: &'a mut Traversal) -> Box>; + } + + #[rustfmt::skip] + extern "Rust" { + type BlockStatementHeaderVariant; + type StatementVariant; + + unsafe fn ptr<'a>(self: &'a NodeFfi<'a>) -> Box>; + unsafe fn describe(self: &NodeFfi<'_>) -> UniquePtr; + unsafe fn typ(self: &NodeFfi<'_>) -> Type; + unsafe fn category(self: &NodeFfi<'_>) -> Category; + unsafe fn pointer_eq(self: &NodeFfi<'_>, rhs: &NodeFfi) -> bool; + unsafe fn has_value(self: &NodeFfi<'_>) -> bool; + + unsafe fn kw(self: &NodeFfi<'_>) -> ParseKeyword; + unsafe fn token_type(self: &NodeFfi<'_>) -> ParseTokenType; + unsafe fn has_source(self: &NodeFfi<'_>) -> bool; + + fn token_type(self: &TokenConjunction) -> ParseTokenType; + + type AndorJobList; + type AndorJob; + type ArgumentList; + type Argument; + type ArgumentOrRedirectionList; + type ArgumentOrRedirection; + type BeginHeader; + type BlockStatement; + type CaseItemList; + type CaseItem; + type DecoratedStatementDecorator; + type DecoratedStatement; + type ElseClause; + type ElseifClauseList; + type ElseifClause; + type ForHeader; + type FreestandingArgumentList; + type FunctionHeader; + type IfClause; + type IfStatement; + type JobConjunctionContinuationList; + type JobConjunctionContinuation; + type JobConjunctionDecorator; + type JobConjunction; + type JobContinuationList; + type JobContinuation; + type JobList; + type JobPipeline; + type KeywordBegin; + type KeywordCase; + type KeywordElse; + type KeywordEnd; + type KeywordFor; + type KeywordFunction; + type KeywordIf; + type KeywordIn; + type KeywordNot; + type KeywordTime; + type KeywordWhile; + type MaybeNewlines; + type NotStatement; + type Redirection; + type SemiNl; + type Statement; + type String_; + type SwitchStatement; + type TokenBackground; + type TokenConjunction; + type TokenPipe; + type TokenRedirection; + type VariableAssignmentList; + type VariableAssignment; + type WhileHeader; + + fn count(self: &ArgumentList) -> usize; + fn empty(self: &ArgumentList) -> bool; + unsafe fn at(self: & ArgumentList, i: usize) -> *const Argument; + + fn count(self: &ArgumentOrRedirectionList) -> usize; + fn empty(self: &ArgumentOrRedirectionList) -> bool; + unsafe fn at(self: & ArgumentOrRedirectionList, i: usize) -> *const ArgumentOrRedirection; + + fn count(self: &JobList) -> usize; + fn empty(self: &JobList) -> bool; + unsafe fn at(self: & JobList, i: usize) -> *const JobConjunction; + + fn count(self: &JobContinuationList) -> usize; + fn empty(self: &JobContinuationList) -> bool; + unsafe fn at(self: & JobContinuationList, i: usize) -> *const JobContinuation; + + fn count(self: &ElseifClauseList) -> usize; + fn empty(self: &ElseifClauseList) -> bool; + unsafe fn at(self: & ElseifClauseList, i: usize) -> *const ElseifClause; + + fn count(self: &CaseItemList) -> usize; + fn empty(self: &CaseItemList) -> bool; + unsafe fn at(self: & CaseItemList, i: usize) -> *const CaseItem; + + fn count(self: &VariableAssignmentList) -> usize; + fn empty(self: &VariableAssignmentList) -> bool; + unsafe fn at(self: & VariableAssignmentList, i: usize) -> *const VariableAssignment; + + fn count(self: &JobConjunctionContinuationList) -> usize; + fn empty(self: &JobConjunctionContinuationList) -> bool; + unsafe fn at(self: & JobConjunctionContinuationList, i: usize) -> *const JobConjunctionContinuation; + + fn count(self: &AndorJobList) -> usize; + fn empty(self: &AndorJobList) -> bool; + unsafe fn at(self: & AndorJobList, i: usize) -> *const AndorJob; + + fn describe(self: &Statement) -> UniquePtr; + + fn kw(self: &JobConjunctionDecorator) -> ParseKeyword; + fn decoration(self: &DecoratedStatement) -> StatementDecoration; + + fn is_argument(self: &ArgumentOrRedirection) -> bool; + unsafe fn argument(self: & ArgumentOrRedirection) -> & Argument; + fn is_redirection(self: &ArgumentOrRedirection) -> bool; + unsafe fn redirection(self: & ArgumentOrRedirection) -> & Redirection; + + unsafe fn contents(self: &Statement) -> &StatementVariant; + unsafe fn oper(self: &Redirection) -> &TokenRedirection; + unsafe fn target(self: &Redirection) -> &String_; + unsafe fn argument_ffi(self: &ArgumentOrRedirection) -> &Argument; + unsafe fn redirection_ffi(self: &ArgumentOrRedirection) -> &Redirection; + fn has_time(self: &JobPipeline) -> bool; + unsafe fn time(self: &JobPipeline) -> &KeywordTime; + unsafe fn variables(self: &JobPipeline) -> &VariableAssignmentList; + unsafe fn statement(self: &JobPipeline) -> &Statement; + unsafe fn continuation(self: &JobPipeline) -> &JobContinuationList; + fn has_bg(self: &JobPipeline) -> bool; + unsafe fn bg(self: &JobPipeline) -> &TokenBackground; + fn has_decorator(self: &JobConjunction) -> bool; + unsafe fn decorator(self: &JobConjunction) -> &JobConjunctionDecorator; + unsafe fn job(self: &JobConjunction) -> &JobPipeline; + unsafe fn continuations(self: &JobConjunction) -> &JobConjunctionContinuationList; + fn has_semi_nl(self: &JobConjunction) -> bool; + unsafe fn semi_nl(self: &JobConjunction) -> &SemiNl; + unsafe fn var_name(self: &ForHeader) -> &String_; + unsafe fn args(self: &ForHeader) -> &ArgumentList; + unsafe fn semi_nl(self: &ForHeader) -> &SemiNl; + unsafe fn condition(self: &WhileHeader) -> &JobConjunction; + unsafe fn andor_tail(self: &WhileHeader) -> &AndorJobList; + unsafe fn first_arg(self: &FunctionHeader) -> &Argument; + unsafe fn args(self: &FunctionHeader) -> &ArgumentList; + unsafe fn semi_nl(self: &FunctionHeader) -> &SemiNl; + fn has_semi_nl(self: &BeginHeader) -> bool; + unsafe fn semi_nl(self: &BeginHeader) -> &SemiNl; + unsafe fn header(self: &BlockStatement) -> &BlockStatementHeaderVariant; + unsafe fn jobs(self: &BlockStatement) -> &JobList; + unsafe fn args_or_redirs(self: &BlockStatement) -> &ArgumentOrRedirectionList; + unsafe fn condition(self: &IfClause) -> &JobConjunction; + unsafe fn andor_tail(self: &IfClause) -> &AndorJobList; + unsafe fn body(self: &IfClause) -> &JobList; + unsafe fn if_clause(self: &ElseifClause) -> &IfClause; + unsafe fn semi_nl(self: &ElseClause) -> &SemiNl; + unsafe fn body(self: &ElseClause) -> &JobList; + unsafe fn if_clause(self: &IfStatement) -> &IfClause; + unsafe fn elseif_clauses(self: &IfStatement) -> &ElseifClauseList; + fn has_else_clause(self: &IfStatement) -> bool; + unsafe fn else_clause(self: &IfStatement) -> &ElseClause; + unsafe fn end(self: &IfStatement) -> &KeywordEnd; + unsafe fn args_or_redirs(self: &IfStatement) -> &ArgumentOrRedirectionList; + unsafe fn arguments(self: &CaseItem) -> &ArgumentList; + unsafe fn semi_nl(self: &CaseItem) -> &SemiNl; + unsafe fn body(self: &CaseItem) -> &JobList; + unsafe fn argument(self: &SwitchStatement) -> &Argument; + unsafe fn semi_nl(self: &SwitchStatement) -> &SemiNl; + unsafe fn cases(self: &SwitchStatement) -> &CaseItemList; + unsafe fn end(self: &SwitchStatement) -> &KeywordEnd; + unsafe fn args_or_redirs(self: &SwitchStatement) -> &ArgumentOrRedirectionList; + fn has_opt_decoration(self: &DecoratedStatement) -> bool; + unsafe fn opt_decoration(self: &DecoratedStatement) -> &DecoratedStatementDecorator; + unsafe fn command(self: &DecoratedStatement) -> &String_; + unsafe fn args_or_redirs(self: &DecoratedStatement) -> &ArgumentOrRedirectionList; + unsafe fn variables(self: &NotStatement) -> &VariableAssignmentList; + fn has_time(self: &NotStatement) -> bool; + unsafe fn time(self: &NotStatement) -> &KeywordTime; + unsafe fn contents(self: &NotStatement) -> &Statement; + unsafe fn pipe(self: &JobContinuation) -> &TokenPipe; + unsafe fn newlines(self: &JobContinuation) -> &MaybeNewlines; + unsafe fn variables(self: &JobContinuation) -> &VariableAssignmentList; + unsafe fn statement(self: &JobContinuation) -> &Statement; + unsafe fn conjunction(self: &JobConjunctionContinuation) -> &TokenConjunction; + unsafe fn newlines(self: &JobConjunctionContinuation) -> &MaybeNewlines; + unsafe fn job(self: &JobConjunctionContinuation) -> &JobPipeline; + unsafe fn job(self: &AndorJob) -> &JobConjunction; + unsafe fn arguments(self: &FreestandingArgumentList) -> &ArgumentList; + unsafe fn kw_begin(self: &BeginHeader) -> &KeywordBegin; + unsafe fn end(self: &BlockStatement) -> &KeywordEnd; + } + + #[rustfmt::skip] + extern "Rust" { + #[cxx_name="source"] fn source_ffi(self: &NodeFfi<'_>, orig: &CxxWString) -> UniquePtr; + #[cxx_name="source"] fn source_ffi(self: &Argument, orig: &CxxWString) -> UniquePtr; + #[cxx_name="source"] fn source_ffi(self: &VariableAssignment, orig: &CxxWString) -> UniquePtr; + #[cxx_name="source"] fn source_ffi(self: &String_, orig: &CxxWString) -> UniquePtr; + #[cxx_name="source"] fn source_ffi(self: &TokenRedirection, orig: &CxxWString) -> UniquePtr; + + #[cxx_name = "try_source_range"] unsafe fn try_source_range_ffi(self: &NodeFfi) -> bool; + #[cxx_name = "try_source_range"] fn try_source_range_ffi(self: &Argument) -> bool; + #[cxx_name = "try_source_range"] fn try_source_range_ffi(self: &JobPipeline) -> bool; + #[cxx_name = "try_source_range"] fn try_source_range_ffi(self: &String_) -> bool; + #[cxx_name = "try_source_range"] fn try_source_range_ffi(self: &BlockStatement) -> bool; + #[cxx_name = "try_source_range"] fn try_source_range_ffi(self: &KeywordEnd) -> bool; + #[cxx_name = "try_source_range"] fn try_source_range_ffi(self: &VariableAssignment) -> bool; + #[cxx_name = "try_source_range"] fn try_source_range_ffi(self: &SemiNl) -> bool; + + #[cxx_name = "source_range"] unsafe fn source_range_ffi(self: &NodeFfi) -> SourceRange; + #[cxx_name = "source_range"] fn source_range_ffi(self: &JobConjunctionDecorator) -> SourceRange; + #[cxx_name = "source_range"] fn source_range_ffi(self: &DecoratedStatement) -> SourceRange; + #[cxx_name = "source_range"] fn source_range_ffi(self: &Argument) -> SourceRange; + #[cxx_name = "source_range"] fn source_range_ffi(self: &JobPipeline) -> SourceRange; + #[cxx_name = "source_range"] fn source_range_ffi(self: &String_) -> SourceRange; + #[cxx_name = "source_range"] fn source_range_ffi(self: &BlockStatement) -> SourceRange; + #[cxx_name = "source_range"] fn source_range_ffi(self: &KeywordEnd) -> SourceRange; + #[cxx_name = "source_range"] fn source_range_ffi(self: &VariableAssignment) -> SourceRange; + #[cxx_name = "source_range"] fn source_range_ffi(self: &SemiNl) -> SourceRange; + } + + #[rustfmt::skip] + extern "Rust" { + unsafe fn try_as_block_statement(self: & StatementVariant) -> *const BlockStatement; + unsafe fn try_as_if_statement(self: & StatementVariant) -> *const IfStatement; + unsafe fn try_as_switch_statement(self: & StatementVariant) -> *const SwitchStatement; + unsafe fn try_as_decorated_statement(self: & StatementVariant) -> *const DecoratedStatement; + } + + #[rustfmt::skip] + extern "Rust" { + unsafe fn try_as_argument(self: &NodeFfi) -> *const Argument; + unsafe fn try_as_begin_header(self: &NodeFfi) -> *const BeginHeader; + unsafe fn try_as_block_statement(self: &NodeFfi) -> *const BlockStatement; + unsafe fn try_as_decorated_statement(self: &NodeFfi) -> *const DecoratedStatement; + unsafe fn try_as_for_header(self: &NodeFfi) -> *const ForHeader; + unsafe fn try_as_function_header(self: &NodeFfi) -> *const FunctionHeader; + unsafe fn try_as_if_clause(self: &NodeFfi) -> *const IfClause; + unsafe fn try_as_if_statement(self: &NodeFfi) -> *const IfStatement; + unsafe fn try_as_job_conjunction(self: &NodeFfi) -> *const JobConjunction; + unsafe fn try_as_job_conjunction_continuation(self: &NodeFfi) -> *const JobConjunctionContinuation; + unsafe fn try_as_job_continuation(self: &NodeFfi) -> *const JobContinuation; + unsafe fn try_as_job_list(self: &NodeFfi) -> *const JobList; + unsafe fn try_as_job_pipeline(self: &NodeFfi) -> *const JobPipeline; + unsafe fn try_as_not_statement(self: &NodeFfi) -> *const NotStatement; + unsafe fn try_as_switch_statement(self: &NodeFfi) -> *const SwitchStatement; + unsafe fn try_as_while_header(self: &NodeFfi) -> *const WhileHeader; + } + + #[rustfmt::skip] + extern "Rust" { + unsafe fn as_if_clause<'a>(self: &'a NodeFfi<'a>) -> &'a IfClause; + unsafe fn as_job_conjunction<'a>(self: &'a NodeFfi) -> &'a JobConjunction; + unsafe fn as_job_pipeline<'a>(self: &'a NodeFfi<'a>) -> &'a JobPipeline; + unsafe fn as_argument<'a>(self: &'a NodeFfi<'a>) -> &'a Argument; + unsafe fn as_begin_header<'a>(self: &'a NodeFfi<'a>) -> &'a BeginHeader; + unsafe fn as_block_statement<'a>(self: &'a NodeFfi<'a>) -> &'a BlockStatement; + unsafe fn as_decorated_statement<'a>(self: &'a NodeFfi<'a>) -> &'a DecoratedStatement; + unsafe fn as_for_header<'a>(self: &'a NodeFfi<'a>) -> &'a ForHeader; + unsafe fn as_freestanding_argument_list<'a>(self: &'a NodeFfi<'a>) -> &'a FreestandingArgumentList; + unsafe fn as_function_header<'a>(self: &'a NodeFfi<'a>) -> &'a FunctionHeader; + unsafe fn as_if_statement<'a>(self: &'a NodeFfi<'a>) -> &'a IfStatement; + unsafe fn as_job_conjunction_continuation<'a>(self: &'a NodeFfi<'a>) -> &'a JobConjunctionContinuation; + unsafe fn as_job_continuation<'a>(self: &'a NodeFfi<'a>) -> &'a JobContinuation; + unsafe fn as_job_list<'a>(self: &'a NodeFfi<'a>) -> &'a JobList; + unsafe fn as_not_statement<'a>(self: &'a NodeFfi<'a>) -> &'a NotStatement; + unsafe fn as_redirection<'a>(self: &'a NodeFfi<'a>) -> &'a Redirection; + unsafe fn as_statement<'a>(self: &'a NodeFfi<'a>) -> &'a Statement; + unsafe fn as_switch_statement<'a>(self: &'a NodeFfi<'a>) -> &'a SwitchStatement; + unsafe fn as_while_header<'a>(self: &'a NodeFfi<'a>) -> &'a WhileHeader; + } + + #[rustfmt::skip] + extern "Rust" { + unsafe fn ptr(self: &StatementVariant) -> Box>; + unsafe fn ptr(self: &BlockStatementHeaderVariant) -> Box>; + unsafe fn ptr(self: &AndorJobList) -> Box>; + unsafe fn ptr(self: &AndorJob) -> Box>; + unsafe fn ptr(self: &ArgumentList) -> Box>; + unsafe fn ptr(self: &Argument) -> Box>; + unsafe fn ptr(self: &ArgumentOrRedirectionList) -> Box>; + unsafe fn ptr(self: &ArgumentOrRedirection) -> Box>; + unsafe fn ptr(self: &BeginHeader) -> Box>; + unsafe fn ptr(self: &BlockStatement) -> Box>; + unsafe fn ptr(self: &CaseItemList) -> Box>; + unsafe fn ptr(self: &CaseItem) -> Box>; + unsafe fn ptr(self: &DecoratedStatementDecorator) -> Box>; + unsafe fn ptr(self: &DecoratedStatement) -> Box>; + unsafe fn ptr(self: &ElseClause) -> Box>; + unsafe fn ptr(self: &ElseifClauseList) -> Box>; + unsafe fn ptr(self: &ElseifClause) -> Box>; + unsafe fn ptr(self: &ForHeader) -> Box>; + unsafe fn ptr(self: &FreestandingArgumentList) -> Box>; + unsafe fn ptr(self: &FunctionHeader) -> Box>; + unsafe fn ptr(self: &IfClause) -> Box>; + unsafe fn ptr(self: &IfStatement) -> Box>; + unsafe fn ptr(self: &JobConjunctionContinuationList) -> Box>; + unsafe fn ptr(self: &JobConjunctionContinuation) -> Box>; + unsafe fn ptr(self: &JobConjunctionDecorator) -> Box>; + unsafe fn ptr(self: &JobConjunction) -> Box>; + unsafe fn ptr(self: &JobContinuationList) -> Box>; + unsafe fn ptr(self: &JobContinuation) -> Box>; + unsafe fn ptr(self: &JobList) -> Box>; + unsafe fn ptr(self: &JobPipeline) -> Box>; + unsafe fn ptr(self: &KeywordBegin) -> Box>; + unsafe fn ptr(self: &KeywordCase) -> Box>; + unsafe fn ptr(self: &KeywordElse) -> Box>; + unsafe fn ptr(self: &KeywordEnd) -> Box>; + unsafe fn ptr(self: &KeywordFor) -> Box>; + unsafe fn ptr(self: &KeywordFunction) -> Box>; + unsafe fn ptr(self: &KeywordIf) -> Box>; + unsafe fn ptr(self: &KeywordIn) -> Box>; + unsafe fn ptr(self: &KeywordNot) -> Box>; + unsafe fn ptr(self: &KeywordTime) -> Box>; + unsafe fn ptr(self: &KeywordWhile) -> Box>; + unsafe fn ptr(self: &MaybeNewlines) -> Box>; + unsafe fn ptr(self: &NotStatement) -> Box>; + unsafe fn ptr(self: &Redirection) -> Box>; + unsafe fn ptr(self: &SemiNl) -> Box>; + unsafe fn ptr(self: &Statement) -> Box>; + unsafe fn ptr(self: &String_) -> Box>; + unsafe fn ptr(self: &SwitchStatement) -> Box>; + unsafe fn ptr(self: &TokenBackground) -> Box>; + unsafe fn ptr(self: &TokenConjunction) -> Box>; + unsafe fn ptr(self: &TokenPipe) -> Box>; + unsafe fn ptr(self: &TokenRedirection) -> Box>; + unsafe fn ptr(self: &VariableAssignmentList) -> Box>; + unsafe fn ptr(self: &VariableAssignment) -> Box>; + unsafe fn ptr(self: &WhileHeader) -> Box>; + } + + #[rustfmt::skip] + extern "Rust" { + unsafe fn range(self: &VariableAssignment) -> SourceRange; + unsafe fn range(self: &TokenConjunction) -> SourceRange; + unsafe fn range(self: &MaybeNewlines) -> SourceRange; + unsafe fn range(self: &TokenPipe) -> SourceRange; + unsafe fn range(self: &KeywordNot) -> SourceRange; + unsafe fn range(self: &DecoratedStatementDecorator) -> SourceRange; + unsafe fn range(self: &KeywordEnd) -> SourceRange; + unsafe fn range(self: &KeywordCase) -> SourceRange; + unsafe fn range(self: &KeywordElse) -> SourceRange; + unsafe fn range(self: &KeywordIf) -> SourceRange; + unsafe fn range(self: &KeywordBegin) -> SourceRange; + unsafe fn range(self: &KeywordFunction) -> SourceRange; + unsafe fn range(self: &KeywordWhile) -> SourceRange; + unsafe fn range(self: &KeywordFor) -> SourceRange; + unsafe fn range(self: &KeywordIn) -> SourceRange; + unsafe fn range(self: &SemiNl) -> SourceRange; + unsafe fn range(self: &JobConjunctionDecorator) -> SourceRange; + unsafe fn range(self: &TokenBackground) -> SourceRange; + unsafe fn range(self: &KeywordTime) -> SourceRange; + unsafe fn range(self: &TokenRedirection) -> SourceRange; + unsafe fn range(self: &String_) -> SourceRange; + unsafe fn range(self: &Argument) -> SourceRange; + } +} + +impl Ast { + fn extras_ffi(self: &Ast) -> Box> { + Box::new(ExtrasFFI(&self.extras)) + } +} + +struct ExtrasFFI<'a>(&'a Extras); + +impl<'a> ExtrasFFI<'a> { + fn comments(&self) -> &'a [SourceRange] { + &self.0.comments + } + fn semis(&self) -> &'a [SourceRange] { + &self.0.semis + } + fn errors(&self) -> &'a [SourceRange] { + &self.0.errors + } +} + +unsafe impl ExternType for Ast { + type Id = type_id!("Ast"); + type Kind = cxx::kind::Opaque; +} + +impl Ast { + fn top_ffi(&self) -> Box { + Box::new(NodeFfi::new(self.top.as_node())) + } + fn dump_ffi(&self, orig: &CxxWString) -> UniquePtr { + self.dump(&orig.from_ffi()).to_ffi() + } +} + +fn ast_parse_ffi(src: &CxxWString, flags: u8, errors: *mut ParseErrorList) -> Box { + let mut out_errors: Option = if errors.is_null() { + None + } else { + Some(unsafe { &*errors }.clone()) + }; + let ast = Box::new(Ast::parse( + &src.from_ffi(), + ParseTreeFlags(flags), + &mut out_errors, + )); + if let Some(out_errors) = out_errors { + unsafe { *errors = out_errors }; + } + ast +} + +fn ast_parse_argument_list_ffi( + src: &CxxWString, + flags: u8, + errors: *mut ParseErrorList, +) -> Box { + let mut out_errors: Option = if errors.is_null() { + None + } else { + Some(unsafe { &*errors }.clone()) + }; + let ast = Box::new(Ast::parse_argument_list( + &src.from_ffi(), + ParseTreeFlags(flags), + &mut out_errors, + )); + if let Some(out_errors) = out_errors { + unsafe { *errors = out_errors }; + } + ast +} + +fn new_ast_traversal<'a>(root: &'a NodeFfi<'a>) -> Box> { + Box::new(Traversal::new(root.as_node())) +} + +impl<'a> Traversal<'a> { + fn next_ffi(&mut self) -> Box> { + Box::new(match self.next() { + Some(node) => NodeFfi::Reference(node), + None => NodeFfi::None, + }) + } +} + +impl TokenConjunction { + fn token_type(&self) -> ParseTokenType { + (self as &dyn Token).token_type() + } +} + +impl Statement { + fn contents(&self) -> &StatementVariant { + &self.contents + } +} + +#[derive(Clone)] +pub enum NodeFfi<'a> { + None, + Reference(&'a dyn Node), + Pointer(*const dyn Node), +} + +unsafe impl ExternType for NodeFfi<'_> { + type Id = type_id!("NodeFfi"); + type Kind = cxx::kind::Opaque; +} + +impl<'a> NodeFfi<'a> { + pub fn new(node: &'a dyn Node) -> Self { + NodeFfi::Reference(node) + } + fn has_value(&self) -> bool { + !matches!(self, NodeFfi::None) + } + pub fn as_node(&self) -> &dyn Node { + match *self { + NodeFfi::None => panic!("tried to dereference null node"), + NodeFfi::Reference(node) => node, + NodeFfi::Pointer(node) => unsafe { &*node }, + } + } + fn parent_ffi(&self) -> Box> { + Box::new(match *self.as_node().parent_ffi() { + Some(node) => NodeFfi::Pointer(node), + None => NodeFfi::None, + }) + } + fn category(&self) -> Category { + self.as_node().category() + } + fn typ(&self) -> Type { + self.as_node().typ() + } + fn describe(&self) -> UniquePtr { + self.as_node().describe().to_ffi() + } + // Pointer comparison + fn pointer_eq(&self, rhs: &NodeFfi) -> bool { + std::ptr::eq(self.as_node().as_ptr(), rhs.as_node().as_ptr()) + } + fn kw(&self) -> ParseKeyword { + self.as_node().as_keyword().unwrap().keyword() + } + fn token_type(&self) -> ParseTokenType { + self.as_node().as_token().unwrap().token_type() + } + fn has_source(&self) -> bool { + self.as_node().as_leaf().unwrap().range().is_some() + } + fn ptr(&self) -> Box> { + Box::new(self.clone()) + } + fn try_source_range_ffi(&self) -> bool { + self.as_node().try_source_range().is_some() + } + fn source_range_ffi(&self) -> SourceRange { + self.as_node().source_range() + } + fn source_ffi(&self, orig: &CxxWString) -> UniquePtr { + self.as_node().source(&orig.from_ffi()).to_ffi() + } +} + +impl Argument { + fn source_ffi(&self, orig: &CxxWString) -> UniquePtr { + self.source(&orig.from_ffi()).to_ffi() + } +} +impl VariableAssignment { + fn source_ffi(&self, orig: &CxxWString) -> UniquePtr { + self.source(&orig.from_ffi()).to_ffi() + } +} +impl String_ { + fn source_ffi(&self, orig: &CxxWString) -> UniquePtr { + self.source(&orig.from_ffi()).to_ffi() + } +} +impl TokenRedirection { + fn source_ffi(&self, orig: &CxxWString) -> UniquePtr { + self.source(&orig.from_ffi()).to_ffi() + } +} + +impl ArgumentList { + fn count(&self) -> usize { + >::count(self) + } + fn empty(&self) -> bool { + self.is_empty() + } + fn at(&self, i: usize) -> *const Argument { + &self[i] + } +} + +impl ArgumentOrRedirectionList { + fn count(&self) -> usize { + >::count(self) + } + fn empty(&self) -> bool { + self.is_empty() + } + fn at(&self, i: usize) -> *const ArgumentOrRedirection { + if i >= self.count() { + std::ptr::null() + } else { + &self[i] + } + } +} + +impl JobList { + fn count(&self) -> usize { + >::count(self) + } + fn empty(&self) -> bool { + self.is_empty() + } + fn at(&self, i: usize) -> *const JobConjunction { + if i >= self.count() { + std::ptr::null() + } else { + &self[i] + } + } +} + +impl JobContinuationList { + fn count(&self) -> usize { + >::count(self) + } + fn empty(&self) -> bool { + self.is_empty() + } + fn at(&self, i: usize) -> *const JobContinuation { + if i >= self.count() { + std::ptr::null() + } else { + &self[i] + } + } +} + +impl ElseifClauseList { + fn count(&self) -> usize { + >::count(self) + } + fn empty(&self) -> bool { + self.is_empty() + } + fn at(&self, i: usize) -> *const ElseifClause { + if i >= self.count() { + std::ptr::null() + } else { + &self[i] + } + } +} + +impl CaseItemList { + fn count(&self) -> usize { + >::count(self) + } + fn empty(&self) -> bool { + self.is_empty() + } + fn at(&self, i: usize) -> *const CaseItem { + if i >= self.count() { + std::ptr::null() + } else { + &self[i] + } + } +} + +impl VariableAssignmentList { + fn count(&self) -> usize { + >::count(self) + } + fn empty(&self) -> bool { + self.is_empty() + } + fn at(&self, i: usize) -> *const VariableAssignment { + if i >= self.count() { + std::ptr::null() + } else { + &self[i] + } + } +} + +impl JobConjunctionContinuationList { + fn count(&self) -> usize { + >::count(self) + } + fn empty(&self) -> bool { + self.is_empty() + } + fn at(&self, i: usize) -> *const JobConjunctionContinuation { + if i >= self.count() { + std::ptr::null() + } else { + &self[i] + } + } +} + +impl AndorJobList { + fn count(&self) -> usize { + >::count(self) + } + fn empty(&self) -> bool { + self.is_empty() + } + fn at(&self, i: usize) -> *const AndorJob { + if i >= self.count() { + std::ptr::null() + } else { + &self[i] + } + } +} + +impl Statement { + fn describe(&self) -> UniquePtr { + (self as &dyn Node).describe().to_ffi() + } +} + +impl JobConjunctionDecorator { + fn kw(&self) -> ParseKeyword { + self.keyword() + } + fn source_range_ffi(&self) -> SourceRange { + self.source_range() + } +} +impl DecoratedStatement { + fn source_range_ffi(&self) -> SourceRange { + self.source_range() + } +} + +impl Argument { + fn try_source_range_ffi(&self) -> bool { + self.try_source_range().is_some() + } + fn source_range_ffi(&self) -> SourceRange { + self.source_range() + } +} + +impl JobPipeline { + fn try_source_range_ffi(&self) -> bool { + self.try_source_range().is_some() + } + fn source_range_ffi(&self) -> SourceRange { + self.source_range() + } +} + +impl String_ { + fn try_source_range_ffi(&self) -> bool { + self.try_source_range().is_some() + } + fn source_range_ffi(&self) -> SourceRange { + self.source_range() + } +} + +impl BlockStatement { + fn try_source_range_ffi(&self) -> bool { + self.try_source_range().is_some() + } + fn source_range_ffi(&self) -> SourceRange { + self.source_range() + } +} + +impl KeywordEnd { + fn try_source_range_ffi(&self) -> bool { + self.try_source_range().is_some() + } + fn source_range_ffi(&self) -> SourceRange { + self.source_range() + } +} + +impl VariableAssignment { + fn try_source_range_ffi(&self) -> bool { + self.try_source_range().is_some() + } + fn source_range_ffi(&self) -> SourceRange { + self.source_range() + } +} + +impl SemiNl { + fn try_source_range_ffi(&self) -> bool { + self.try_source_range().is_some() + } + fn source_range_ffi(&self) -> SourceRange { + self.source_range() + } +} + +impl Redirection { + fn oper(&self) -> &TokenRedirection { + &self.oper + } +} +impl Redirection { + fn target(&self) -> &String_ { + &self.target + } +} +impl ArgumentOrRedirection { + fn argument_ffi(&self) -> &Argument { + self.argument() + } +} +impl ArgumentOrRedirection { + fn redirection_ffi(&self) -> &Redirection { + self.redirection() + } +} +impl JobPipeline { + fn has_time(&self) -> bool { + self.time.is_some() + } +} +impl JobPipeline { + fn time(&self) -> &KeywordTime { + self.time.as_ref().unwrap() + } +} +impl JobPipeline { + fn variables(&self) -> &VariableAssignmentList { + &self.variables + } +} +impl JobPipeline { + fn statement(&self) -> &Statement { + &self.statement + } +} +impl JobPipeline { + fn continuation(&self) -> &JobContinuationList { + &self.continuation + } +} +impl JobPipeline { + fn has_bg(&self) -> bool { + self.bg.is_some() + } +} +impl JobPipeline { + fn bg(&self) -> &TokenBackground { + self.bg.as_ref().unwrap() + } +} +impl JobConjunction { + fn has_decorator(&self) -> bool { + self.decorator.is_some() + } +} +impl JobConjunction { + fn decorator(&self) -> &JobConjunctionDecorator { + self.decorator.as_ref().unwrap() + } +} +impl JobConjunction { + fn job(&self) -> &JobPipeline { + &self.job + } +} +impl JobConjunction { + fn continuations(&self) -> &JobConjunctionContinuationList { + &self.continuations + } +} +impl JobConjunction { + fn has_semi_nl(&self) -> bool { + self.semi_nl.is_some() + } +} +impl JobConjunction { + fn semi_nl(&self) -> &SemiNl { + self.semi_nl.as_ref().unwrap() + } +} +impl ForHeader { + fn var_name(&self) -> &String_ { + &self.var_name + } +} +impl ForHeader { + fn args(&self) -> &ArgumentList { + &self.args + } +} +impl ForHeader { + fn semi_nl(&self) -> &SemiNl { + &self.semi_nl + } +} +impl WhileHeader { + fn condition(&self) -> &JobConjunction { + &self.condition + } +} +impl WhileHeader { + fn andor_tail(&self) -> &AndorJobList { + &self.andor_tail + } +} +impl FunctionHeader { + fn first_arg(&self) -> &Argument { + &self.first_arg + } +} +impl FunctionHeader { + fn args(&self) -> &ArgumentList { + &self.args + } +} +impl FunctionHeader { + fn semi_nl(&self) -> &SemiNl { + &self.semi_nl + } +} +impl BeginHeader { + fn has_semi_nl(&self) -> bool { + self.semi_nl.is_some() + } +} +impl BeginHeader { + fn semi_nl(&self) -> &SemiNl { + self.semi_nl.as_ref().unwrap() + } +} +impl BlockStatement { + fn header(&self) -> &BlockStatementHeaderVariant { + &self.header + } +} +impl BlockStatement { + fn jobs(&self) -> &JobList { + &self.jobs + } +} +impl BlockStatement { + fn args_or_redirs(&self) -> &ArgumentOrRedirectionList { + &self.args_or_redirs + } +} +impl IfClause { + fn condition(&self) -> &JobConjunction { + &self.condition + } +} +impl IfClause { + fn andor_tail(&self) -> &AndorJobList { + &self.andor_tail + } +} +impl IfClause { + fn body(&self) -> &JobList { + &self.body + } +} +impl ElseifClause { + fn if_clause(&self) -> &IfClause { + &self.if_clause + } +} +impl ElseClause { + fn semi_nl(&self) -> &SemiNl { + &self.semi_nl + } +} +impl ElseClause { + fn body(&self) -> &JobList { + &self.body + } +} +impl IfStatement { + fn if_clause(&self) -> &IfClause { + &self.if_clause + } +} +impl IfStatement { + fn elseif_clauses(&self) -> &ElseifClauseList { + &self.elseif_clauses + } +} +impl IfStatement { + fn has_else_clause(&self) -> bool { + self.else_clause.is_some() + } +} +impl IfStatement { + fn else_clause(&self) -> &ElseClause { + self.else_clause.as_ref().unwrap() + } +} +impl IfStatement { + fn end(&self) -> &KeywordEnd { + &self.end + } +} +impl IfStatement { + fn args_or_redirs(&self) -> &ArgumentOrRedirectionList { + &self.args_or_redirs + } +} +impl CaseItem { + fn arguments(&self) -> &ArgumentList { + &self.arguments + } +} +impl CaseItem { + fn semi_nl(&self) -> &SemiNl { + &self.semi_nl + } +} +impl CaseItem { + fn body(&self) -> &JobList { + &self.body + } +} +impl SwitchStatement { + fn argument(&self) -> &Argument { + &self.argument + } +} +impl SwitchStatement { + fn semi_nl(&self) -> &SemiNl { + &self.semi_nl + } +} +impl SwitchStatement { + fn cases(&self) -> &CaseItemList { + &self.cases + } +} +impl SwitchStatement { + fn end(&self) -> &KeywordEnd { + &self.end + } +} +impl SwitchStatement { + fn args_or_redirs(&self) -> &ArgumentOrRedirectionList { + &self.args_or_redirs + } +} +impl DecoratedStatement { + fn has_opt_decoration(&self) -> bool { + self.opt_decoration.is_some() + } +} +impl DecoratedStatement { + fn opt_decoration(&self) -> &DecoratedStatementDecorator { + self.opt_decoration.as_ref().unwrap() + } +} +impl DecoratedStatement { + fn command(&self) -> &String_ { + &self.command + } +} +impl DecoratedStatement { + fn args_or_redirs(&self) -> &ArgumentOrRedirectionList { + &self.args_or_redirs + } +} +impl NotStatement { + fn variables(&self) -> &VariableAssignmentList { + &self.variables + } +} +impl NotStatement { + fn has_time(&self) -> bool { + self.time.is_some() + } +} +impl NotStatement { + fn time(&self) -> &KeywordTime { + self.time.as_ref().unwrap() + } +} +impl NotStatement { + fn contents(&self) -> &Statement { + &self.contents + } +} +impl JobContinuation { + fn pipe(&self) -> &TokenPipe { + &self.pipe + } +} +impl JobContinuation { + fn newlines(&self) -> &MaybeNewlines { + &self.newlines + } +} +impl JobContinuation { + fn variables(&self) -> &VariableAssignmentList { + &self.variables + } +} +impl JobContinuation { + fn statement(&self) -> &Statement { + &self.statement + } +} +impl JobConjunctionContinuation { + fn conjunction(&self) -> &TokenConjunction { + &self.conjunction + } +} +impl JobConjunctionContinuation { + fn newlines(&self) -> &MaybeNewlines { + &self.newlines + } +} +impl JobConjunctionContinuation { + fn job(&self) -> &JobPipeline { + &self.job + } +} +impl AndorJob { + fn job(&self) -> &JobConjunction { + &self.job + } +} +impl FreestandingArgumentList { + fn arguments(&self) -> &ArgumentList { + &self.arguments + } +} +impl BeginHeader { + fn kw_begin(&self) -> &KeywordBegin { + &self.kw_begin + } +} +impl BlockStatement { + fn end(&self) -> &KeywordEnd { + &self.end + } +} + +impl StatementVariant { + fn try_as_not_statement(&self) -> *const NotStatement { + match self { + StatementVariant::NotStatement(node) => node, + _ => std::ptr::null(), + } + } + fn try_as_block_statement(&self) -> *const BlockStatement { + match self { + StatementVariant::BlockStatement(node) => node, + _ => std::ptr::null(), + } + } + fn try_as_if_statement(&self) -> *const IfStatement { + match self { + StatementVariant::IfStatement(node) => node, + _ => std::ptr::null(), + } + } + fn try_as_switch_statement(&self) -> *const SwitchStatement { + match self { + StatementVariant::SwitchStatement(node) => node, + _ => std::ptr::null(), + } + } + fn try_as_decorated_statement(&self) -> *const DecoratedStatement { + match self { + StatementVariant::DecoratedStatement(node) => node, + _ => std::ptr::null(), + } + } +} + +#[rustfmt::skip] +impl NodeFfi<'_> { + fn try_as_argument(&self) -> *const Argument { + match self.as_node().as_argument() { + Some(node) => node, + None => std::ptr::null(), + } + } + fn try_as_begin_header(&self) -> *const BeginHeader { + match self.as_node().as_begin_header() { + Some(node) => node, + None => std::ptr::null(), + } + } + fn try_as_block_statement(&self) -> *const BlockStatement { + match self.as_node().as_block_statement() { + Some(node) => node, + None => std::ptr::null(), + } + } + fn try_as_decorated_statement(&self) -> *const DecoratedStatement { + match self.as_node().as_decorated_statement() { + Some(node) => node, + None => std::ptr::null(), + } + } + fn try_as_for_header(&self) -> *const ForHeader { + match self.as_node().as_for_header() { + Some(node) => node, + None => std::ptr::null(), + } + } + fn try_as_function_header(&self) -> *const FunctionHeader { + match self.as_node().as_function_header() { + Some(node) => node, + None => std::ptr::null(), + } + } + fn try_as_if_clause(&self) -> *const IfClause { + match self.as_node().as_if_clause() { + Some(node) => node, + None => std::ptr::null(), + } + } + fn try_as_if_statement(&self) -> *const IfStatement { + match self.as_node().as_if_statement() { + Some(node) => node, + None => std::ptr::null(), + } + } + fn try_as_job_conjunction(&self) -> *const JobConjunction { + match self.as_node().as_job_conjunction() { + Some(node) => node, + None => std::ptr::null(), + } + } + fn try_as_job_conjunction_continuation(&self) -> *const JobConjunctionContinuation { + match self.as_node().as_job_conjunction_continuation() { + Some(node) => node, + None => std::ptr::null(), + } + } + fn try_as_job_continuation(&self) -> *const JobContinuation { + match self.as_node().as_job_continuation() { + Some(node) => node, + None => std::ptr::null(), + } + } + fn try_as_job_list(&self) -> *const JobList { + match self.as_node().as_job_list() { + Some(node) => node, + None => std::ptr::null(), + } + } + fn try_as_job_pipeline(&self) -> *const JobPipeline { + match self.as_node().as_job_pipeline() { + Some(node) => node, + None => std::ptr::null(), + } + } + fn try_as_not_statement(&self) -> *const NotStatement { + match self.as_node().as_not_statement() { + Some(node) => node, + None => std::ptr::null(), + } + } + fn try_as_switch_statement(&self) -> *const SwitchStatement { + match self.as_node().as_switch_statement() { + Some(node) => node, + None => std::ptr::null(), + } + } + fn try_as_while_header(&self) -> *const WhileHeader { + match self.as_node().as_while_header() { + Some(node) => node, + None => std::ptr::null(), + } + } +} + +#[rustfmt::skip] +impl NodeFfi<'_> { + fn as_if_clause(&self) -> &IfClause { + self.as_node().as_if_clause().unwrap() + } + fn as_job_conjunction(&self) -> &JobConjunction { + self.as_node().as_job_conjunction().unwrap() + } + fn as_job_pipeline(&self) -> &JobPipeline { + self.as_node().as_job_pipeline().unwrap() + } + fn as_argument(&self) -> &Argument { + self.as_node().as_argument().unwrap() + } + fn as_begin_header(&self) -> &BeginHeader { + self.as_node().as_begin_header().unwrap() + } + fn as_block_statement(&self) -> &BlockStatement { + self.as_node().as_block_statement().unwrap() + } + fn as_decorated_statement(&self) -> &DecoratedStatement { + self.as_node().as_decorated_statement().unwrap() + } + fn as_for_header(&self) -> &ForHeader { + self.as_node().as_for_header().unwrap() + } + fn as_freestanding_argument_list(&self) -> &FreestandingArgumentList { + self.as_node().as_freestanding_argument_list().unwrap() + } + fn as_function_header(&self) -> &FunctionHeader { + self.as_node().as_function_header().unwrap() + } + fn as_if_statement(&self) -> &IfStatement { + self.as_node().as_if_statement().unwrap() + } + fn as_job_conjunction_continuation(&self) -> &JobConjunctionContinuation { + self.as_node().as_job_conjunction_continuation().unwrap() + } + fn as_job_continuation(&self) -> &JobContinuation { + self.as_node().as_job_continuation().unwrap() + } + fn as_job_list(&self) -> &JobList { + self.as_node().as_job_list().unwrap() + } + fn as_not_statement(&self) -> &NotStatement { + self.as_node().as_not_statement().unwrap() + } + fn as_redirection(&self) -> &Redirection { + self.as_node().as_redirection().unwrap() + } + fn as_statement(&self) -> &Statement { + self.as_node().as_statement().unwrap() + } + fn as_switch_statement(&self) -> &SwitchStatement { + self.as_node().as_switch_statement().unwrap() + } + fn as_while_header(&self) -> &WhileHeader { + self.as_node().as_while_header().unwrap() + } +} + +impl StatementVariant { + fn ptr(&self) -> Box> { + match self { + StatementVariant::None => panic!(), + StatementVariant::NotStatement(node) => node.ptr(), + StatementVariant::BlockStatement(node) => node.ptr(), + StatementVariant::IfStatement(node) => node.ptr(), + StatementVariant::SwitchStatement(node) => node.ptr(), + StatementVariant::DecoratedStatement(node) => node.ptr(), + } + } +} +impl BlockStatementHeaderVariant { + fn ptr(&self) -> Box> { + match self { + BlockStatementHeaderVariant::None => panic!(), + BlockStatementHeaderVariant::ForHeader(node) => node.ptr(), + BlockStatementHeaderVariant::WhileHeader(node) => node.ptr(), + BlockStatementHeaderVariant::FunctionHeader(node) => node.ptr(), + BlockStatementHeaderVariant::BeginHeader(node) => node.ptr(), + } + } +} + +impl AndorJobList { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl AndorJob { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl ArgumentList { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl Argument { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl ArgumentOrRedirectionList { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl ArgumentOrRedirection { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl BeginHeader { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl BlockStatement { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl CaseItemList { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl CaseItem { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl DecoratedStatementDecorator { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl DecoratedStatement { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl ElseClause { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl ElseifClauseList { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl ElseifClause { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl ForHeader { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl FreestandingArgumentList { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl FunctionHeader { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl IfClause { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl IfStatement { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl JobConjunctionContinuationList { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl JobConjunctionContinuation { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl JobConjunctionDecorator { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl JobConjunction { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl JobContinuationList { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl JobContinuation { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl JobList { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl JobPipeline { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl KeywordBegin { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl KeywordCase { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl KeywordElse { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl KeywordEnd { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl KeywordFor { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl KeywordFunction { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl KeywordIf { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl KeywordIn { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl KeywordNot { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl KeywordTime { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl KeywordWhile { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl MaybeNewlines { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl NotStatement { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl Redirection { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl SemiNl { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl Statement { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl String_ { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl SwitchStatement { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl TokenBackground { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl TokenConjunction { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl TokenPipe { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl TokenRedirection { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl VariableAssignmentList { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl VariableAssignment { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} +impl WhileHeader { + fn ptr(&self) -> Box> { + Box::new(NodeFfi::new(self)) + } +} + +impl VariableAssignment { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl TokenConjunction { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl MaybeNewlines { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl TokenPipe { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl KeywordNot { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl DecoratedStatementDecorator { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl KeywordEnd { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl KeywordCase { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl KeywordElse { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl KeywordIf { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl KeywordBegin { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl KeywordFunction { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl KeywordWhile { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl KeywordFor { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl KeywordIn { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl SemiNl { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl JobConjunctionDecorator { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl TokenBackground { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl KeywordTime { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl TokenRedirection { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl String_ { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} +impl Argument { + fn range(&self) -> SourceRange { + self.range.unwrap() + } +} diff --git a/fish-rust/src/ffi.rs b/fish-rust/src/ffi.rs index 31f1c6637..7cc800b18 100644 --- a/fish-rust/src/ffi.rs +++ b/fish-rust/src/ffi.rs @@ -24,6 +24,7 @@ include_cpp! { #include "event.h" #include "fallback.h" #include "fds.h" + #include "fish_indent_common.h" #include "flog.h" #include "function.h" #include "highlight.h" @@ -57,6 +58,7 @@ include_cpp! { generate!("get_flog_file_fd") generate!("log_extra_to_flog_file") + generate!("indent_visitor_t") generate!("parse_util_unescape_wildcards") generate!("fish_wcwidth") @@ -73,6 +75,8 @@ include_cpp! { generate!("library_data_t") generate_pod!("library_data_pod_t") + generate!("highlighter_t") + generate!("proc_wait_any") generate!("output_stream_t") @@ -89,6 +93,8 @@ include_cpp! { generate!("builtin_print_error_trailer") generate!("builtin_get_names_ffi") + generate!("pretty_printer_t") + generate!("escape_string") generate!("sig2wcs") generate!("wcs2sig") diff --git a/fish-rust/src/fish_indent.rs b/fish-rust/src/fish_indent.rs new file mode 100644 index 000000000..cc655a719 --- /dev/null +++ b/fish-rust/src/fish_indent.rs @@ -0,0 +1,92 @@ +use crate::ast::{self, Category, Node, NodeFfi, NodeVisitor, Type}; +use crate::ffi::pretty_printer_t; +use crate::parse_constants::ParseTokenType; +use std::pin::Pin; + +struct PrettyPrinter<'a> { + companion: Pin<&'a mut pretty_printer_t>, +} +impl<'a> NodeVisitor<'a> for &mut PrettyPrinter<'a> { + // Default implementation is to just visit children. + fn visit(&mut self, node: &'a dyn Node) { + let ffi_node = NodeFfi::new(node); + // Leaf nodes we just visit their text. + if node.as_keyword().is_some() { + self.companion + .as_mut() + .emit_node_text((&ffi_node as *const NodeFfi<'_>).cast()); + return; + } + if let Some(token) = node.as_token() { + if token.token_type() == ParseTokenType::end { + self.companion + .as_mut() + .visit_semi_nl((&ffi_node as *const NodeFfi<'_>).cast()); + return; + } + self.companion + .as_mut() + .emit_node_text((&ffi_node as *const NodeFfi<'_>).cast()); + return; + } + match node.typ() { + Type::argument | Type::variable_assignment => { + self.companion + .as_mut() + .emit_node_text((&ffi_node as *const NodeFfi<'_>).cast()); + } + Type::redirection => { + self.companion.as_mut().visit_redirection( + (node.as_redirection().unwrap() as *const ast::Redirection).cast(), + ); + } + Type::maybe_newlines => { + self.companion.as_mut().visit_maybe_newlines( + (node.as_maybe_newlines().unwrap() as *const ast::MaybeNewlines).cast(), + ); + } + Type::begin_header => { + // 'begin' does not require a newline after it, but we insert one. + node.accept(self, false); + self.companion.as_mut().visit_begin_header(); + } + _ => { + // For branch and list nodes, default is to visit their children. + if [Category::branch, Category::list].contains(&node.category()) { + node.accept(self, false); + return; + } + panic!("unexpected node type"); + } + } + } +} + +#[cxx::bridge] +#[allow(clippy::needless_lifetimes)] // false positive +mod fish_indent_ffi { + extern "C++" { + include!("ast.h"); + include!("fish_indent_common.h"); + type pretty_printer_t = crate::ffi::pretty_printer_t; + type Ast = crate::ast::Ast; + type NodeFfi<'a> = crate::ast::NodeFfi<'a>; + } + extern "Rust" { + type PrettyPrinter<'a>; + unsafe fn new_pretty_printer( + companion: Pin<&mut pretty_printer_t>, + ) -> Box>; + #[cxx_name = "visit"] + unsafe fn visit_ffi<'a>(self: &mut PrettyPrinter<'a>, node: &'a NodeFfi<'a>); + } +} + +fn new_pretty_printer(companion: Pin<&mut pretty_printer_t>) -> Box> { + Box::new(PrettyPrinter { companion }) +} +impl<'a> PrettyPrinter<'a> { + fn visit_ffi(mut self: &mut PrettyPrinter<'a>, node: &'a NodeFfi<'a>) { + self.visit(node.as_node()); + } +} diff --git a/fish-rust/src/highlight.rs b/fish-rust/src/highlight.rs new file mode 100644 index 000000000..eb0c3fa65 --- /dev/null +++ b/fish-rust/src/highlight.rs @@ -0,0 +1,139 @@ +use crate::ast::{ + Argument, Ast, BlockStatement, BlockStatementHeaderVariant, DecoratedStatement, Keyword, Node, + NodeFfi, NodeVisitor, Redirection, Token, Type, VariableAssignment, +}; +use crate::ffi::highlighter_t; +use crate::parse_constants::ParseTokenType; +use std::pin::Pin; + +struct Highlighter<'a> { + companion: Pin<&'a mut highlighter_t>, + ast: &'a Ast, +} +impl<'a> Highlighter<'a> { + // Visit the children of a node. + fn visit_children(&mut self, node: &'a dyn Node) { + node.accept(self, false); + } + // AST visitor implementations. + fn visit_keyword(&mut self, node: &dyn Keyword) { + let ffi_node = NodeFfi::new(node.leaf_as_node_ffi()); + self.companion + .as_mut() + .visit_keyword((&ffi_node as *const NodeFfi<'_>).cast()); + } + fn visit_token(&mut self, node: &dyn Token) { + let ffi_node = NodeFfi::new(node.leaf_as_node_ffi()); + self.companion + .as_mut() + .visit_token((&ffi_node as *const NodeFfi<'_>).cast()); + } + fn visit_argument(&mut self, node: &Argument) { + self.companion + .as_mut() + .visit_argument((node as *const Argument).cast(), false, true); + } + fn visit_redirection(&mut self, node: &Redirection) { + self.companion + .as_mut() + .visit_redirection((node as *const Redirection).cast()); + } + fn visit_variable_assignment(&mut self, node: &VariableAssignment) { + self.companion + .as_mut() + .visit_variable_assignment((node as *const VariableAssignment).cast()); + } + fn visit_semi_nl(&mut self, node: &dyn Node) { + let ffi_node = NodeFfi::new(node); + self.companion + .as_mut() + .visit_semi_nl((&ffi_node as *const NodeFfi<'_>).cast()); + } + fn visit_decorated_statement(&mut self, node: &DecoratedStatement) { + self.companion + .as_mut() + .visit_decorated_statement((node as *const DecoratedStatement).cast()); + } + fn visit_block_statement(&mut self, node: &'a BlockStatement) { + match &*node.header { + BlockStatementHeaderVariant::None => panic!(), + BlockStatementHeaderVariant::ForHeader(node) => self.visit(node), + BlockStatementHeaderVariant::WhileHeader(node) => self.visit(node), + BlockStatementHeaderVariant::FunctionHeader(node) => self.visit(node), + BlockStatementHeaderVariant::BeginHeader(node) => self.visit(node), + } + self.visit(&node.args_or_redirs); + let pending_variables_count = self + .companion + .as_mut() + .visit_block_statement1((node as *const BlockStatement).cast()); + self.visit(&node.jobs); + self.visit(&node.end); + self.companion + .as_mut() + .visit_block_statement2(pending_variables_count); + } +} + +impl<'a> NodeVisitor<'a> for Highlighter<'a> { + fn visit(&mut self, node: &'a dyn Node) { + if let Some(keyword) = node.as_keyword() { + return self.visit_keyword(keyword); + } + if let Some(token) = node.as_token() { + if token.token_type() == ParseTokenType::end { + self.visit_semi_nl(node); + return; + } + self.visit_token(token); + return; + } + match node.typ() { + Type::argument => self.visit_argument(node.as_argument().unwrap()), + Type::redirection => self.visit_redirection(node.as_redirection().unwrap()), + Type::variable_assignment => { + self.visit_variable_assignment(node.as_variable_assignment().unwrap()) + } + Type::decorated_statement => { + self.visit_decorated_statement(node.as_decorated_statement().unwrap()) + } + Type::block_statement => self.visit_block_statement(node.as_block_statement().unwrap()), + // Default implementation is to just visit children. + _ => self.visit_children(node), + } + } +} + +#[cxx::bridge] +#[allow(clippy::needless_lifetimes)] // false positive +mod highlighter_ffi { + extern "C++" { + include!("ast.h"); + include!("highlight.h"); + include!("parse_constants.h"); + type highlighter_t = crate::ffi::highlighter_t; + type Ast = crate::ast::Ast; + type NodeFfi<'a> = crate::ast::NodeFfi<'a>; + } + extern "Rust" { + type Highlighter<'a>; + unsafe fn new_highlighter<'a>( + companion: Pin<&'a mut highlighter_t>, + ast: &'a Ast, + ) -> Box>; + #[cxx_name = "visit_children"] + unsafe fn visit_children_ffi<'a>(self: &mut Highlighter<'a>, node: &'a NodeFfi<'a>); + } +} + +fn new_highlighter<'a>( + companion: Pin<&'a mut highlighter_t>, + ast: &'a Ast, +) -> Box> { + Box::new(Highlighter { companion, ast }) +} +impl<'a> Highlighter<'a> { + fn visit_children_ffi(&mut self, node: &'a NodeFfi<'a>) { + self.visit_children(node.as_node()); + } +} diff --git a/fish-rust/src/lib.rs b/fish-rust/src/lib.rs index dfb528609..4feb0b09a 100644 --- a/fish-rust/src/lib.rs +++ b/fish-rust/src/lib.rs @@ -11,6 +11,7 @@ mod common; mod abbrs; +mod ast; mod builtins; mod color; mod compat; @@ -29,14 +30,18 @@ mod fds; mod ffi; mod ffi_init; mod ffi_tests; +mod fish_indent; mod flog; mod future_feature_flags; mod global_safety; +mod highlight; mod io; mod job_group; mod locale; mod nix; mod parse_constants; +mod parse_tree; +mod parse_util; mod path; mod re; mod redirection; diff --git a/fish-rust/src/parse_constants.rs b/fish-rust/src/parse_constants.rs index 7877a9f1e..724ebab10 100644 --- a/fish-rust/src/parse_constants.rs +++ b/fish-rust/src/parse_constants.rs @@ -5,6 +5,7 @@ use crate::tokenizer::variable_assignment_equals_pos; use crate::wchar::{wstr, WString, L}; use crate::wchar_ffi::{wcharz, WCharFromFFI, WCharToFFI}; use crate::wutil::{sprintf, wgettext_fmt}; +use cxx::{type_id, ExternType}; use cxx::{CxxWString, UniquePtr}; use std::ops::{BitAnd, BitOr, BitOrAssign}; use widestring_suffix::widestrs; @@ -616,8 +617,14 @@ fn token_type_user_presentable_description_ffi( } /// TODO This should be type alias once we drop the FFI. +#[derive(Clone)] pub struct ParseErrorList(pub Vec); +unsafe impl ExternType for ParseErrorList { + type Id = type_id!("ParseErrorList"); + type Kind = cxx::kind::Opaque; +} + /// Helper function to offset error positions by the given amount. This is used when determining /// errors in a substring of a larger source buffer. pub fn parse_error_offset_source_start(errors: &mut ParseErrorList, amt: usize) { diff --git a/fish-rust/src/parse_tree.rs b/fish-rust/src/parse_tree.rs new file mode 100644 index 000000000..dfa985010 --- /dev/null +++ b/fish-rust/src/parse_tree.rs @@ -0,0 +1,190 @@ +//! Programmatic representation of fish code. + +use std::pin::Pin; +use std::rc::Rc; + +use crate::ast::Ast; +use crate::parse_constants::{ + token_type_user_presentable_description, ParseErrorCode, ParseErrorList, ParseKeyword, + ParseTokenType, ParseTreeFlags, SourceOffset, SourceRange, PARSE_FLAG_CONTINUE_AFTER_ERROR, + SOURCE_OFFSET_INVALID, +}; +use crate::tokenizer::TokenizerError; +use crate::wchar::{wstr, WString, L}; +use crate::wchar_ffi::{WCharFromFFI, WCharToFFI}; +use crate::wutil::sprintf; +use cxx::{CxxWString, UniquePtr}; + +/// A struct representing the token type that we use internally. +#[derive(Clone, Copy)] +pub struct ParseToken { + /// The type of the token as represented by the parser + pub typ: ParseTokenType, + /// Any keyword represented by this token + pub keyword: ParseKeyword, + /// Hackish: whether the source contains a dash prefix + pub has_dash_prefix: bool, + /// Hackish: whether the source looks like '-h' or '--help' + pub is_help_argument: bool, + /// Hackish: if TOK_END, whether the source is a newline. + pub is_newline: bool, + // Hackish: whether this token is a string like FOO=bar + pub may_be_variable_assignment: bool, + /// If this is a tokenizer error, that error. + pub tok_error: TokenizerError, + pub source_start: SourceOffset, + pub source_length: SourceOffset, +} + +impl ParseToken { + pub fn new(typ: ParseTokenType) -> Self { + ParseToken { + typ, + keyword: ParseKeyword::none, + has_dash_prefix: false, + is_help_argument: false, + is_newline: false, + may_be_variable_assignment: false, + tok_error: TokenizerError::none, + source_start: SOURCE_OFFSET_INVALID, + source_length: 0, + } + } + /// \return the source range. + /// Note the start may be invalid. + pub fn range(&self) -> SourceRange { + SourceRange::new(self.source_start, self.source_length) + } + /// \return whether we are a string with the dash prefix set. + pub fn is_dash_prefix_string(&self) -> bool { + self.typ == ParseTokenType::string && self.has_dash_prefix + } + /// Returns a string description of the given parse token. + pub fn describe(&self) -> WString { + let mut result = Into::<&'static wstr>::into(self.typ).to_owned(); + if self.keyword != ParseKeyword::none { + result += &sprintf!(L!(" <%ls>"), Into::<&'static wstr>::into(self.keyword))[..] + } + result + } + pub fn user_presentable_description(&self) -> WString { + token_type_user_presentable_description(self.typ, self.keyword) + } +} + +impl From for ParseErrorCode { + fn from(err: TokenizerError) -> Self { + match err { + TokenizerError::none => ParseErrorCode::none, + TokenizerError::unterminated_quote => ParseErrorCode::tokenizer_unterminated_quote, + TokenizerError::unterminated_subshell => { + ParseErrorCode::tokenizer_unterminated_subshell + } + TokenizerError::unterminated_slice => ParseErrorCode::tokenizer_unterminated_slice, + TokenizerError::unterminated_escape => ParseErrorCode::tokenizer_unterminated_escape, + _ => ParseErrorCode::tokenizer_other, + } + } +} + +/// A type wrapping up a parse tree and the original source behind it. +pub struct ParsedSource { + src: WString, + src_ffi: UniquePtr, + ast: Ast, +} + +impl ParsedSource { + fn new(src: WString, ast: Ast) -> Self { + let src_ffi = src.to_ffi(); + ParsedSource { src, src_ffi, ast } + } +} + +pub type ParsedSourceRef = Option>; + +/// Return a shared pointer to ParsedSource, or null on failure. +/// If parse_flag_continue_after_error is not set, this will return null on any error. +pub fn parse_source( + src: WString, + flags: ParseTreeFlags, + errors: &mut Option, +) -> ParsedSourceRef { + let ast = Ast::parse(&src, flags, errors); + if ast.errored() && !(flags & PARSE_FLAG_CONTINUE_AFTER_ERROR) { + None + } else { + Some(Rc::new(ParsedSource::new(src, ast))) + } +} + +struct ParsedSourceRefFFI(pub ParsedSourceRef); + +#[cxx::bridge] +mod parse_tree_ffi { + extern "C++" { + include!("ast.h"); + pub type Ast = crate::ast::Ast; + pub type ParseErrorList = crate::parse_constants::ParseErrorList; + } + extern "Rust" { + type ParsedSourceRefFFI; + fn empty_parsed_source_ref() -> Box; + fn has_value(&self) -> bool; + fn new_parsed_source_ref(src: &CxxWString, ast: Pin<&mut Ast>) -> Box; + #[cxx_name = "parse_source"] + fn parse_source_ffi( + src: &CxxWString, + flags: u8, + errors: *mut ParseErrorList, + ) -> Box; + fn clone(self: &ParsedSourceRefFFI) -> Box; + fn src(self: &ParsedSourceRefFFI) -> &CxxWString; + fn ast(self: &ParsedSourceRefFFI) -> &Ast; + } +} + +impl ParsedSourceRefFFI { + fn has_value(&self) -> bool { + self.0.is_some() + } +} +fn empty_parsed_source_ref() -> Box { + Box::new(ParsedSourceRefFFI(None)) +} +fn new_parsed_source_ref(src: &CxxWString, ast: Pin<&mut Ast>) -> Box { + let mut stolen_ast = Ast::default(); + std::mem::swap(&mut stolen_ast, ast.get_mut()); + Box::new(ParsedSourceRefFFI(Some(Rc::new(ParsedSource::new( + src.from_ffi(), + stolen_ast, + ))))) +} +fn parse_source_ffi( + src: &CxxWString, + flags: u8, + errors: *mut ParseErrorList, +) -> Box { + let mut out_errors: Option = if errors.is_null() { + None + } else { + Some(unsafe { &*errors }.clone()) + }; + let ps = parse_source(src.from_ffi(), ParseTreeFlags(flags), &mut out_errors); + if let Some(out_errors) = out_errors { + unsafe { *errors = out_errors }; + } + + Box::new(ParsedSourceRefFFI(ps)) +} +impl ParsedSourceRefFFI { + fn clone(&self) -> Box { + Box::new(ParsedSourceRefFFI(self.0.clone())) + } + fn src(&self) -> &CxxWString { + &self.0.as_ref().unwrap().src_ffi + } + fn ast(&self) -> &Ast { + &self.0.as_ref().unwrap().ast + } +} diff --git a/fish-rust/src/parse_util.rs b/fish-rust/src/parse_util.rs new file mode 100644 index 000000000..d19faf089 --- /dev/null +++ b/fish-rust/src/parse_util.rs @@ -0,0 +1,48 @@ +use crate::ast::{Node, NodeFfi, NodeVisitor}; +use crate::ffi::indent_visitor_t; +use std::pin::Pin; + +struct IndentVisitor<'a> { + companion: Pin<&'a mut indent_visitor_t>, +} +impl<'a> NodeVisitor<'a> for IndentVisitor<'a> { + // Default implementation is to just visit children. + fn visit(&mut self, node: &'a dyn Node) { + let ffi_node = NodeFfi::new(node); + let dec = self + .companion + .as_mut() + .visit((&ffi_node as *const NodeFfi<'_>).cast()); + node.accept(self, false); + self.companion.as_mut().did_visit(dec); + } +} + +#[cxx::bridge] +#[allow(clippy::needless_lifetimes)] // false positive +mod parse_util_ffi { + extern "C++" { + include!("ast.h"); + include!("parse_util.h"); + type indent_visitor_t = crate::ffi::indent_visitor_t; + type Ast = crate::ast::Ast; + type NodeFfi<'a> = crate::ast::NodeFfi<'a>; + } + extern "Rust" { + type IndentVisitor<'a>; + unsafe fn new_indent_visitor( + companion: Pin<&mut indent_visitor_t>, + ) -> Box>; + #[cxx_name = "visit"] + unsafe fn visit_ffi<'a>(self: &mut IndentVisitor<'a>, node: &'a NodeFfi<'a>); + } +} + +fn new_indent_visitor(companion: Pin<&mut indent_visitor_t>) -> Box> { + Box::new(IndentVisitor { companion }) +} +impl<'a> IndentVisitor<'a> { + fn visit_ffi(self: &mut IndentVisitor<'a>, node: &'a NodeFfi<'a>) { + self.visit(node.as_node()); + } +} diff --git a/src/ast.cpp b/src/ast.cpp index 0ee6bd1ee..89bee25da 100644 --- a/src/ast.cpp +++ b/src/ast.cpp @@ -16,1377 +16,11 @@ #include "tokenizer.h" #include "wutil.h" // IWYU pragma: keep -namespace { - -/// \return tokenizer flags corresponding to parse tree flags. -static tok_flags_t tokenizer_flags_from_parse_flags(parse_tree_flags_t flags) { - tok_flags_t tok_flags = 0; - // Note we do not need to respect parse_flag_show_blank_lines, no clients are interested in - // them. - if (flags & parse_flag_include_comments) tok_flags |= TOK_SHOW_COMMENTS; - if (flags & parse_flag_accept_incomplete_tokens) tok_flags |= TOK_ACCEPT_UNFINISHED; - if (flags & parse_flag_continue_after_error) tok_flags |= TOK_CONTINUE_AFTER_ERROR; - return tok_flags; +rust::Box ast_parse(const wcstring &src, parse_tree_flags_t flags, + parse_error_list_t *out_errors) { + return ast_parse_ffi(src, flags, out_errors); } - -// Given an expanded string, returns any keyword it matches. -static parse_keyword_t keyword_with_name(const wcstring &name) { - return keyword_from_string(name.c_str()); +rust::Box ast_parse_argument_list(const wcstring &src, parse_tree_flags_t flags, + parse_error_list_t *out_errors) { + return ast_parse_argument_list_ffi(src, flags, out_errors); } - -static bool is_keyword_char(wchar_t c) { - return (c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') || (c >= L'0' && c <= L'9') || - c == L'\'' || c == L'"' || c == L'\\' || c == '\n' || c == L'!'; -} - -/// Given a token, returns the keyword it matches, or parse_keyword_t::none. -static parse_keyword_t keyword_for_token(token_type_t tok, const wcstring &token) { - /* Only strings can be keywords */ - if (tok != token_type_t::string) { - return parse_keyword_t::none; - } - - // If token is clean (which most are), we can compare it directly. Otherwise we have to expand - // it. We only expand quotes, and we don't want to do expensive expansions like tilde - // expansions. So we do our own "cleanliness" check; if we find a character not in our allowed - // set we know it's not a keyword, and if we never find a quote we don't have to expand! Note - // that this lowercase set could be shrunk to be just the characters that are in keywords. - parse_keyword_t result = parse_keyword_t::none; - bool needs_expand = false, all_chars_valid = true; - for (wchar_t c : token) { - if (!is_keyword_char(c)) { - all_chars_valid = false; - break; - } - // If we encounter a quote, we need expansion. - needs_expand = needs_expand || c == L'"' || c == L'\'' || c == L'\\'; - } - - if (all_chars_valid) { - // Expand if necessary. - if (!needs_expand) { - result = keyword_with_name(token); - } else { - if (auto unescaped = unescape_string(token, 0)) { - result = keyword_with_name(*unescaped); - } - } - } - return result; -} - -/// Convert from tokenizer_t's token type to a parse_token_t type. -static parse_token_type_t parse_token_type_from_tokenizer_token(token_type_t tokenizer_token_type) { - switch (tokenizer_token_type) { - case token_type_t::string: - return parse_token_type_t::string; - case token_type_t::pipe: - return parse_token_type_t::pipe; - case token_type_t::andand: - return parse_token_type_t::andand; - case token_type_t::oror: - return parse_token_type_t::oror; - case token_type_t::end: - return parse_token_type_t::end; - case token_type_t::background: - return parse_token_type_t::background; - case token_type_t::redirect: - return parse_token_type_t::redirection; - case token_type_t::error: - return parse_token_type_t::tokenizer_error; - case token_type_t::comment: - return parse_token_type_t::comment; - } - FLOGF(error, L"Bad token type %d passed to %s", static_cast(tokenizer_token_type), - __FUNCTION__); - DIE("bad token type"); - return parse_token_type_t::invalid; -} - -/// A token stream generates a sequence of parser tokens, permitting arbitrary lookahead. -class token_stream_t { - public: - explicit token_stream_t(const wcstring &src, parse_tree_flags_t flags, - std::vector &comments) - : src_(src), - tok_(new_tokenizer(src_.c_str(), tokenizer_flags_from_parse_flags(flags))), - comment_ranges(comments) {} - - /// \return the token at the given index, without popping it. If the token stream is exhausted, - /// it will have parse_token_type_t::terminate. idx = 0 means the next token, idx = 1 means the - /// next-next token, and so forth. - /// We must have that idx < kMaxLookahead. - const parse_token_t &peek(size_t idx = 0) { - assert(idx < kMaxLookahead && "Trying to look too far ahead"); - while (idx >= count_) { - lookahead_.at(mask(start_ + count_)) = next_from_tok(); - count_ += 1; - } - return lookahead_.at(mask(start_ + idx)); - } - - /// Pop the next token. - parse_token_t pop() { - if (count_ == 0) { - return next_from_tok(); - } - parse_token_t result = lookahead_[start_]; - start_ = mask(start_ + 1); - count_ -= 1; - return result; - } - - /// Provide the original source code. - const wcstring &source() const { return src_; } - - private: - // Helper to mask our circular buffer. - static constexpr size_t mask(size_t idx) { return idx % kMaxLookahead; } - - /// \return the next parse token from the tokenizer. - /// This consumes and stores comments. - parse_token_t next_from_tok() { - for (;;) { - parse_token_t res = advance_1(); - if (res.type == parse_token_type_t::comment) { - comment_ranges.push_back(res.range()); - continue; - } - return res; - } - } - - /// \return a new parse token, advancing the tokenizer. - /// This returns comments. - parse_token_t advance_1() { - auto mtoken = tok_->next(); - if (!mtoken) { - return parse_token_t{parse_token_type_t::terminate}; - } - const tok_t &token = *mtoken; - // Set the type, keyword, and whether there's a dash prefix. Note that this is quite - // sketchy, because it ignores quotes. This is the historical behavior. For example, - // `builtin --names` lists builtins, but `builtin "--names"` attempts to run --names as a - // command. Amazingly as of this writing (10/12/13) nobody seems to have noticed this. - // Squint at it really hard and it even starts to look like a feature. - parse_token_t result{parse_token_type_from_tokenizer_token(token.type_)}; - const wcstring &text = storage_ = *tok_->text_of(token); - result.keyword = keyword_for_token(token.type_, text); - result.has_dash_prefix = !text.empty() && text.at(0) == L'-'; - result.is_help_argument = (text == L"-h" || text == L"--help"); - result.is_newline = (result.type == parse_token_type_t::end && text == L"\n"); - result.may_be_variable_assignment = variable_assignment_equals_pos(text) != nullptr; - result.tok_error = token.error; - - // These assertions are totally bogus. Basically our tokenizer works in size_t but we work - // in uint32_t to save some space. If we have a source file larger than 4 GB, we'll probably - // just crash. - assert(token.offset < SOURCE_OFFSET_INVALID); - result.source_start = static_cast(token.offset); - - assert(token.length <= SOURCE_OFFSET_INVALID); - result.source_length = static_cast(token.length); - - if (token.error != tokenizer_error_t::none) { - auto subtoken_offset = static_cast(token.error_offset_within_token); - // Skip invalid tokens that have a zero length, especially if they are at EOF. - if (subtoken_offset < result.source_length) { - result.source_start += subtoken_offset; - result.source_length = token.error_length; - } - } - - return result; - } - - // The maximum number of lookahead supported. - static constexpr size_t kMaxLookahead = 2; - - // We implement a queue with a simple circular buffer. - // Note that peek() returns an address, so we must not move elements which are peek'd. - // This prevents using vector (which may reallocate). - // Deque would work but is too heavyweight for just 2 items. - std::array lookahead_ = { - {parse_token_type_t::invalid, parse_token_type_t::invalid}}; - - // Starting index in our lookahead. - // The "first" token is at this index. - size_t start_ = 0; - - // Number of items in our lookahead. - size_t count_ = 0; - - // A reference to the original source. - const wcstring &src_; - - // The tokenizer to generate new tokens. - rust::Box tok_; - - /// Any comment nodes are collected here. - /// These are only collected if parse_flag_include_comments is set. - std::vector &comment_ranges; - - // Temporary storage. - wcstring storage_; -}; - -} // namespace - -namespace ast { - -/// Given a node which we believe to be some sort of block statement, attempt to return a source -/// range for the block's keyword (for, if, etc) and a user-presentable description. This is used to -/// provide better error messages. \return {nullptr, nullptr} if we couldn't find it. Note at this -/// point the parse tree is incomplete; in particular parent nodes are not set. -static std::pair find_block_open_keyword(const node_t *node) { - const node_t *cursor = node; - while (cursor != nullptr) { - switch (cursor->type) { - case type_t::block_statement: - cursor = cursor->as()->header.contents.get(); - break; - case type_t::for_header: { - const auto *h = cursor->as(); - return {h->kw_for.range, L"for loop"}; - } - case type_t::while_header: { - const auto *h = cursor->as(); - return {h->kw_while.range, L"while loop"}; - } - case type_t::function_header: { - const auto *h = cursor->as(); - return {h->kw_function.range, L"function definition"}; - } - case type_t::begin_header: { - const auto *h = cursor->as(); - return {h->kw_begin.range, L"begin"}; - } - case type_t::if_statement: { - const auto *h = cursor->as(); - return {h->if_clause.kw_if.range, L"if statement"}; - } - case type_t::switch_statement: { - const auto *h = cursor->as(); - return {h->kw_switch.range, L"switch statement"}; - } - default: - return {source_range_t{}, nullptr}; - } - } - return {source_range_t{}, nullptr}; -} - -/// \return the decoration for this statement. -statement_decoration_t decorated_statement_t::decoration() const { - if (!opt_decoration) { - return statement_decoration_t::none; - } - switch (opt_decoration->kw) { - case parse_keyword_t::kw_command: - return statement_decoration_t::command; - case parse_keyword_t::kw_builtin: - return statement_decoration_t::builtin; - case parse_keyword_t::kw_exec: - return statement_decoration_t::exec; - default: - assert(0 && "Unexpected keyword in statement decoration"); - return statement_decoration_t::none; - } -} - -/// \return a string literal name for an ast type. -const wchar_t *ast_type_to_string(type_t type) { - switch (type) { -#define ELEM(T) \ - case type_t::T: \ - return L"" #T; -#include "ast_node_types.inc" - } - assert(0 && "unreachable"); - return L"(unknown)"; -} - -/// Delete an untyped node. -void node_deleter_t::operator()(node_t *n) { - if (!n) return; - switch (n->type) { -#define ELEM(T) \ - case type_t::T: \ - delete n->as(); \ - break; -#include "ast_node_types.inc" - } -} - -wcstring node_t::describe() const { - wcstring res = ast_type_to_string(this->type); - if (const auto *n = this->try_as()) { - append_format(res, L" '%ls'", token_type_description(n->type)); - } else if (const auto *n = this->try_as()) { - append_format(res, L" '%ls'", keyword_description(n->kw)); - } - return res; -} - -/// From C++14. -template -using enable_if_t = typename std::enable_if::type; - -namespace { -struct source_range_visitor_t { - template - enable_if_t visit(const Node &node) { - if (node.unsourced) any_unsourced = true; - // Union with our range. - if (node.range.length > 0) { - if (total.length == 0) { - total = node.range; - } else { - auto end = - std::max(total.start + total.length, node.range.start + node.range.length); - total.start = std::min(total.start, node.range.start); - total.length = end - total.start; - } - } - return; - } - - // Other node types recurse. - template - enable_if_t visit(const Node &node) { - node_visitor(*this).accept_children_of(node); - } - - // Total range we have encountered. - source_range_t total{0, 0}; - - // Whether any node was found to be unsourced. - bool any_unsourced{false}; -}; -} // namespace - -maybe_t node_t::try_source_range() const { - source_range_visitor_t v; - node_visitor(v).accept(this); - if (v.any_unsourced) return none(); - return v.total; -} - -// Helper to describe a list of keywords. -// TODO: these need to be localized properly. -static wcstring keywords_user_presentable_description(std::initializer_list kws) { - assert(kws.size() > 0 && "Should not be empty list"); - if (kws.size() == 1) { - return format_string(L"keyword '%ls'", keyword_description(*kws.begin())); - } - size_t idx = 0; - wcstring res = L"keywords "; - for (parse_keyword_t kw : kws) { - const wchar_t *optor = (idx++ ? L" or " : L""); - append_format(res, L"%ls'%ls'", optor, keyword_description(kw)); - } - return res; -} - -// Helper to describe a list of token types. -// TODO: these need to be localized properly. -static wcstring token_types_user_presentable_description( - std::initializer_list types) { - assert(types.size() > 0 && "Should not be empty list"); - if (types.size() == 1) { - return *token_type_user_presentable_description(*types.begin(), parse_keyword_t::none); - } - size_t idx = 0; - wcstring res; - for (parse_token_type_t type : types) { - const wchar_t *optor = (idx++ ? L" or " : L""); - append_format( - res, L"%ls%ls", optor, - token_type_user_presentable_description(type, parse_keyword_t::none)->c_str()); - } - return res; -} - -namespace { -using namespace ast; - -struct populator_t { - template - using unique_ptr = std::unique_ptr; - - // Construct from a source, flags, top type, and out_errors, which may be null. - populator_t(const wcstring &src, parse_tree_flags_t flags, type_t top_type, - parse_error_list_t *out_errors) - : flags_(flags), - tokens_(src, flags, extras_.comments), - top_type_(top_type), - out_errors_(out_errors) {} - - // Given a node type, allocate it and invoke its default constructor. - // \return the resulting Node pointer. It is never null. - template - unique_ptr allocate() { - unique_ptr node = make_unique(); - FLOGF(ast_construction, L"%*smake %ls %p", spaces(), "", ast_type_to_string(Node::AstType), - node.get()); - return node; - } - - // Given a node type, allocate it, invoke its default constructor, - // and then visit it as a field. - // \return the resulting Node pointer. It is never null. - template - unique_ptr allocate_visit() { - unique_ptr node = allocate(); - this->visit_node_field(*node); - return node; - } - - /// Helper for FLOGF. This returns a number of spaces appropriate for a '%*c' format. - int spaces() const { return static_cast(visit_stack_.size() * 2); } - - /// The status of our parser. - enum class status_t { - // Parsing is going just fine, thanks for asking. - ok, - - // We have exhausted the token stream, but the caller was OK with an incomplete parse tree. - // All further leaf nodes should have the unsourced flag set. - unsourcing, - - // We encountered an parse error and are "unwinding." - // Do not consume any tokens until we get back to a list type which stops unwinding. - unwinding, - }; - - /// \return the parser's status. - status_t status() { - if (unwinding_) { - return status_t::unwinding; - } else if ((flags_ & parse_flag_leave_unterminated) && - peek_type() == parse_token_type_t::terminate) { - return status_t::unsourcing; - } - return status_t::ok; - } - - /// \return whether the status is unwinding. - /// This is more efficient than checking the status directly. - bool is_unwinding() const { return unwinding_; } - - /// \return whether any leaf nodes we visit should be marked as unsourced. - bool unsource_leaves() { - status_t s = status(); - return s == status_t::unsourcing || s == status_t::unwinding; - } - - /// \return whether we permit an incomplete parse tree. - bool allow_incomplete() const { return flags_ & parse_flag_leave_unterminated; } - - /// This indicates a bug in fish code. - void internal_error(const char *func, const wchar_t *fmt, ...) const { - va_list va; - va_start(va, fmt); - wcstring msg = vformat_string(fmt, va); - va_end(va); - - FLOG(debug, "Internal parse error from", func, "- this indicates a bug in fish.", msg); - FLOG(debug, "Encountered while parsing:<<<\n%ls\n>>>", tokens_.source().c_str()); - abort(); - } - - /// \return whether a list type \p type allows arbitrary newlines in it. - bool list_type_chomps_newlines(type_t type) const { - switch (type) { - case type_t::argument_list: - // Hackish. If we are producing a freestanding argument list, then it allows - // semicolons, for hysterical raisins. - return top_type_ == type_t::freestanding_argument_list; - - case type_t::argument_or_redirection_list: - // No newlines inside arguments. - return false; - - case type_t::variable_assignment_list: - // No newlines inside variable assignment lists. - return false; - - case type_t::job_list: - // Like echo a \n \n echo b - return true; - - case type_t::case_item_list: - // Like switch foo \n \n \n case a \n end - return true; - - case type_t::andor_job_list: - // Like while true ; \n \n and true ; end - return true; - - case type_t::elseif_clause_list: - // Like if true ; \n \n else if false; end - return true; - - case type_t::job_conjunction_continuation_list: - // This would be like echo a && echo b \n && echo c - // We could conceivably support this but do not now. - return false; - - case type_t::job_continuation_list: - // This would be like echo a \n | echo b - // We could conceivably support this but do not now. - return false; - - default: - internal_error(__FUNCTION__, L"Type %ls not handled", ast_type_to_string(type)); - return false; - } - } - - /// \return whether a list type \p type allows arbitrary semicolons in it. - bool list_type_chomps_semis(type_t type) const { - switch (type) { - case type_t::argument_list: - // Hackish. If we are producing a freestanding argument list, then it allows - // semicolons, for hysterical raisins. - // That is, this is OK: complete -c foo -a 'x ; y ; z' - // But this is not: foo x ; y ; z - return top_type_ == type_t::freestanding_argument_list; - - case type_t::argument_or_redirection_list: - case type_t::variable_assignment_list: - return false; - - case type_t::job_list: - // Like echo a ; ; echo b - return true; - - case type_t::case_item_list: - // Like switch foo ; ; ; case a \n end - // This is historically allowed. - return true; - - case type_t::andor_job_list: - // Like while true ; ; ; and true ; end - return true; - - case type_t::elseif_clause_list: - // Like if true ; ; ; else if false; end - return false; - - case type_t::job_conjunction_continuation_list: - // Like echo a ; ; && echo b. Not supported. - return false; - - case type_t::job_continuation_list: - // This would be like echo a ; | echo b - // Not supported. - // We could conceivably support this but do not now. - return false; - - default: - internal_error(__FUNCTION__, L"Type %ls not handled", ast_type_to_string(type)); - return false; - } - } - - // Chomp extra comments, semicolons, etc. for a given list type. - void chomp_extras(type_t type) { - bool chomp_semis = list_type_chomps_semis(type); - bool chomp_newlines = list_type_chomps_newlines(type); - for (;;) { - const auto &peek = this->tokens_.peek(); - if (chomp_newlines && peek.type == parse_token_type_t::end && peek.is_newline) { - // Just skip this newline, no need to save it. - this->tokens_.pop(); - } else if (chomp_semis && peek.type == parse_token_type_t::end && !peek.is_newline) { - auto tok = this->tokens_.pop(); - // Perhaps save this extra semi. - if (flags_ & parse_flag_show_extra_semis) { - extras_.semis.push_back(tok.range()); - } - } else { - break; - } - } - } - - /// \return whether a list type should recover from errors.s - /// That is, whether we should stop unwinding when we encounter this type. - bool list_type_stops_unwind(type_t type) const { - return type == type_t::job_list && (flags_ & parse_flag_continue_after_error); - } - - /// Report an error based on \p fmt for the source range \p range. - void parse_error_impl(source_range_t range, parse_error_code_t code, const wchar_t *fmt, - va_list va) { - any_error_ = true; - - // Ignore additional parse errors while unwinding. - // These may come about e.g. from `true | and`. - if (unwinding_) return; - unwinding_ = true; - - FLOGF(ast_construction, L"%*sparse error - begin unwinding", spaces(), ""); - // TODO: can store this conditionally dependent on flags. - if (range.start != SOURCE_OFFSET_INVALID) { - extras_.errors.push_back(range); - } - - if (out_errors_) { - parse_error_t err; - err.text = std::make_unique(vformat_string(fmt, va)); - err.code = code; - err.source_start = range.start; - err.source_length = range.length; - out_errors_->push_back(std::move(err)); - } - } - - /// Report an error based on \p fmt for the source range \p range. - void parse_error(source_range_t range, parse_error_code_t code, const wchar_t *fmt, ...) { - va_list va; - va_start(va, fmt); - parse_error_impl(range, code, fmt, va); - va_end(va); - } - - /// Report an error based on \p fmt for the source range \p range. - void parse_error(const parse_token_t &token, parse_error_code_t code, const wchar_t *fmt, ...) { - va_list va; - va_start(va, fmt); - parse_error_impl(token.range(), code, fmt, va); - va_end(va); - } - - // \return a reference to a non-comment token at index \p idx. - const parse_token_t &peek_token(size_t idx = 0) { return tokens_.peek(idx); } - - // \return the type of a non-comment token. - parse_token_type_t peek_type(size_t idx = 0) { return peek_token(idx).type; } - - // Consume the next token, chomping any comments. - // It is an error to call this unless we know there is a non-terminate token available. - // \return the token. - parse_token_t consume_any_token() { - parse_token_t tok = tokens_.pop(); - assert(tok.type != parse_token_type_t::comment && "Should not be a comment"); - assert(tok.type != parse_token_type_t::terminate && - "Cannot consume terminate token, caller should check status first"); - return tok; - } - - // Consume the next token which is expected to be of the given type. - source_range_t consume_token_type(parse_token_type_t type) { - assert(type != parse_token_type_t::terminate && - "Should not attempt to consume terminate token"); - auto tok = consume_any_token(); - if (tok.type != type) { - parse_error( - tok, parse_error_code_t::generic, _(L"Expected %ls, but found %ls"), - token_type_user_presentable_description(type, parse_keyword_t::none)->c_str(), - tok.user_presentable_description().c_str()); - return source_range_t{0, 0}; - } - return tok.range(); - } - - // The next token could not be parsed at the top level. - // For example a trailing end like `begin ; end ; end` - // Or an unexpected redirection like `>` - // Consume it and add an error. - void consume_excess_token_generating_error() { - auto tok = consume_any_token(); - - // In the rare case that we are parsing a freestanding argument list and not a job list, - // generate a generic error. - // TODO: this is a crummy message if we get a tokenizer error, for example: - // complete -c foo -a "'abc" - if (this->top_type_ == type_t::freestanding_argument_list) { - this->parse_error(tok, parse_error_code_t::generic, _(L"Expected %ls, but found %ls"), - token_type_user_presentable_description(parse_token_type_t::string, - parse_keyword_t::none) - ->c_str(), - tok.user_presentable_description().c_str()); - return; - } - - assert(this->top_type_ == type_t::job_list); - switch (tok.type) { - case parse_token_type_t::string: - // There are three keywords which end a job list. - switch (tok.keyword) { - case parse_keyword_t::kw_end: - this->parse_error(tok, parse_error_code_t::unbalancing_end, - _(L"'end' outside of a block")); - break; - case parse_keyword_t::kw_else: - this->parse_error(tok, parse_error_code_t::unbalancing_else, - _(L"'else' builtin not inside of if block")); - break; - case parse_keyword_t::kw_case: - this->parse_error(tok, parse_error_code_t::unbalancing_case, - _(L"'case' builtin not inside of switch block")); - break; - default: - internal_error(__FUNCTION__, - L"Token %ls should not have prevented parsing a job list", - tok.user_presentable_description().c_str()); - break; - } - break; - case parse_token_type_t::pipe: - case parse_token_type_t::redirection: - case parse_token_type_t::background: - case parse_token_type_t::andand: - case parse_token_type_t::oror: - parse_error(tok, parse_error_code_t::generic, - _(L"Expected a string, but found %ls"), - tok.user_presentable_description().c_str()); - break; - - case parse_token_type_t::tokenizer_error: - parse_error(tok, parse_error_from_tokenizer_error(tok.tok_error), L"%ls", - tokenizer_get_error_message(tok.tok_error)->c_str()); - break; - - case parse_token_type_t::end: - internal_error(__FUNCTION__, L"End token should never be excess"); - break; - case parse_token_type_t::terminate: - internal_error(__FUNCTION__, L"Terminate token should never be excess"); - break; - default: - internal_error(__FUNCTION__, L"Unexpected excess token type: %ls", - tok.user_presentable_description().c_str()); - break; - } - } - - // Our can_parse implementations are for optional values and for lists. - // A true return means we should descend into the production, false means stop. - // Note that the argument is always nullptr and should be ignored. It is provided strictly for - // overloading purposes. - bool can_parse(job_conjunction_t *) { - const auto &token = peek_token(); - if (token.type != parse_token_type_t::string) return false; - switch (peek_token().keyword) { - case parse_keyword_t::kw_end: - case parse_keyword_t::kw_else: - case parse_keyword_t::kw_case: - // These end a job list. - return false; - case parse_keyword_t::none: - default: - return true; - } - } - - bool can_parse(argument_t *) { return peek_type() == parse_token_type_t::string; } - bool can_parse(redirection_t *) { return peek_type() == parse_token_type_t::redirection; } - bool can_parse(argument_or_redirection_t *) { - return can_parse((argument_t *)nullptr) || can_parse((redirection_t *)nullptr); - } - - bool can_parse(variable_assignment_t *) { - // Do we have a variable assignment at all? - if (!peek_token(0).may_be_variable_assignment) return false; - - // What is the token after it? - switch (peek_type(1)) { - case parse_token_type_t::string: - // We have `a= cmd` and should treat it as a variable assignment. - return true; - case parse_token_type_t::terminate: - // We have `a=` which is OK if we are allowing incomplete, an error otherwise. - return allow_incomplete(); - default: - // We have e.g. `a= >` which is an error. - // Note that we do not produce an error here. Instead we return false so this the - // token will be seen by allocate_populate_statement_contents. - return false; - } - } - - template - bool can_parse(token_t *tok) { - return tok->allows_token(peek_token().type); - } - - // Note we have specific overloads for our keyword nodes, as they need custom logic. - bool can_parse(job_conjunction_t::decorator_t *) { - // This is for a job conjunction like `and stuff` - // But if it's `and --help` then we treat it as an ordinary command. - return job_conjunction_t::decorator_t::allows_keyword(peek_token(0).keyword) && - !peek_token(1).is_help_argument; - } - - bool can_parse(decorated_statement_t::decorator_t *) { - // Here the keyword is 'command' or 'builtin' or 'exec'. - // `command stuff` executes a command called stuff. - // `command -n` passes the -n argument to the 'command' builtin. - // `command` by itself is a command. - if (!decorated_statement_t::decorator_t::allows_keyword(peek_token(0).keyword)) { - return false; - } - // Is it like `command --stuff` or `command` by itself? - auto tok1 = peek_token(1); - return tok1.type == parse_token_type_t::string && !tok1.is_dash_prefix_string(); - } - - bool can_parse(keyword_t *) { - // Time keyword is only the time builtin if the next argument doesn't have a dash. - return keyword_t::allows_keyword(peek_token(0).keyword) && - !peek_token(1).is_dash_prefix_string(); - } - - bool can_parse(job_continuation_t *) { return peek_type() == parse_token_type_t::pipe; } - - bool can_parse(job_conjunction_continuation_t *) { - auto type = peek_type(); - return type == parse_token_type_t::andand || type == parse_token_type_t::oror; - } - - bool can_parse(andor_job_t *) { - switch (peek_token().keyword) { - case parse_keyword_t::kw_and: - case parse_keyword_t::kw_or: { - // Check that the argument to and/or is a string that's not help. Otherwise it's - // either 'and --help' or a naked 'and', and not part of this list. - const auto &nexttok = peek_token(1); - return nexttok.type == parse_token_type_t::string && !nexttok.is_help_argument; - } - default: - return false; - } - } - - bool can_parse(elseif_clause_t *) { - return peek_token(0).keyword == parse_keyword_t::kw_else && - peek_token(1).keyword == parse_keyword_t::kw_if; - } - - bool can_parse(else_clause_t *) { return peek_token().keyword == parse_keyword_t::kw_else; } - bool can_parse(case_item_t *) { return peek_token().keyword == parse_keyword_t::kw_case; } - - // Given that we are a list of type ListNodeType, whose contents type is ContentsNode, populate - // as many elements as we can. - // If exhaust_stream is set, then keep going until we get parse_token_type_t::terminate. - template - void populate_list(list_t &list, bool exhaust_stream = false) { - assert(list.contents == nullptr && "List is not initially empty"); - - // Do not attempt to parse a list if we are unwinding. - if (is_unwinding()) { - assert(!exhaust_stream && - "exhaust_stream should only be set at top level, and so we should not be " - "unwinding"); - // Mark in the list that it was unwound. - FLOGF(ast_construction, L"%*sunwinding %ls", spaces(), "", - ast_type_to_string(ListType)); - assert(list.empty() && "Should be an empty list"); - return; - } - - // We're going to populate a vector with our nodes. - // Later on we will copy this to the heap with a single allocation. - std::vector> contents; - - for (;;) { - // If we are unwinding, then either we recover or we break the loop, dependent on the - // loop type. - if (is_unwinding()) { - if (!list_type_stops_unwind(ListType)) { - break; - } - // We are going to stop unwinding. - // Rather hackish. Just chomp until we get to a string or end node. - for (auto type = peek_type(); - type != parse_token_type_t::string && type != parse_token_type_t::terminate && - type != parse_token_type_t::end; - type = peek_type()) { - parse_token_t tok = tokens_.pop(); - extras_.errors.push_back(tok.range()); - FLOGF(ast_construction, L"%*schomping range %u-%u", spaces(), "", - tok.source_start, tok.source_length); - } - FLOGF(ast_construction, L"%*sdone unwinding", spaces(), ""); - unwinding_ = false; - } - - // Chomp semis and newlines. - chomp_extras(ListType); - - // Now try parsing a node. - if (auto node = this->try_parse()) { - // #7201: Minimize reallocations of contents vector - if (contents.empty()) { - contents.reserve(64); - } - contents.emplace_back(std::move(node)); - } else if (exhaust_stream && peek_type() != parse_token_type_t::terminate) { - // We aren't allowed to stop. Produce an error and keep going. - consume_excess_token_generating_error(); - } else { - // We either stop once we can't parse any more of this contents node, or we - // exhausted the stream as requested. - break; - } - } - - // Populate our list from our contents. - if (!contents.empty()) { - assert(contents.size() <= UINT32_MAX && "Contents size out of bounds"); - assert(list.contents == nullptr && "List should still be empty"); - - // We're going to heap-allocate our array. - using contents_ptr_t = typename list_t::contents_ptr_t; - auto *array = new contents_ptr_t[contents.size()]; - std::move(contents.begin(), contents.end(), array); - - list.length = static_cast(contents.size()); - list.contents = array; - } - - FLOGF(ast_construction, L"%*s%ls size: %lu", spaces(), "", ast_type_to_string(ListType), - (unsigned long)list.count()); - } - - /// Allocate and populate a statement contents pointer. - /// This must never return null. - statement_t::contents_ptr_t allocate_populate_statement_contents() { - // In case we get a parse error, we still need to return something non-null. Use a decorated - // statement; all of its leaf nodes will end up unsourced. - auto got_error = [this] { - assert(unwinding_ && "Should have produced an error"); - return this->allocate_visit(); - }; - - using pkt = parse_keyword_t; - const auto &token1 = peek_token(0); - if (token1.type == parse_token_type_t::terminate && allow_incomplete()) { - // This may happen if we just have a 'time' prefix. - // Construct a decorated statement, which will be unsourced. - return this->allocate_visit(); - } else if (token1.type != parse_token_type_t::string) { - // We may be unwinding already; do not produce another error. - // For example in `true | and`. - parse_error(token1, parse_error_code_t::generic, - _(L"Expected a command, but found %ls"), - token1.user_presentable_description().c_str()); - return got_error(); - } else if (token1.may_be_variable_assignment) { - // Here we have a variable assignment which we chose to not parse as a variable - // assignment because there was no string after it. - // Ensure we consume the token, so we don't get back here again at the same place. - parse_error(consume_any_token(), parse_error_code_t::bare_variable_assignment, L""); - return got_error(); - } - - // The only block-like builtin that takes any parameters is 'function'. So go to decorated - // statements if the subsequent token looks like '--'. The logic here is subtle: - // - // If we are 'begin', then we expect to be invoked with no arguments. - // If we are 'function', then we are a non-block if we are invoked with -h or --help - // If we are anything else, we require an argument, so do the same thing if the subsequent - // token is a statement terminator. - if (token1.type == parse_token_type_t::string) { - const auto &token2 = peek_token(1); - // If we are a function, then look for help arguments. Otherwise, if the next token - // looks like an option (starts with a dash), then parse it as a decorated statement. - if (token1.keyword == pkt::kw_function && token2.is_help_argument) { - return allocate_visit(); - } else if (token1.keyword != pkt::kw_function && token2.has_dash_prefix) { - return allocate_visit(); - } - - // Likewise if the next token doesn't look like an argument at all. This corresponds to - // e.g. a "naked if". - bool naked_invocation_invokes_help = - (token1.keyword != pkt::kw_begin && token1.keyword != pkt::kw_end); - if (naked_invocation_invokes_help && (token2.type == parse_token_type_t::end || - token2.type == parse_token_type_t::terminate)) { - return allocate_visit(); - } - } - - switch (token1.keyword) { - case pkt::kw_not: - case pkt::kw_exclam: - return allocate_visit(); - case pkt::kw_for: - case pkt::kw_while: - case pkt::kw_function: - case pkt::kw_begin: - return allocate_visit(); - case pkt::kw_if: - return allocate_visit(); - case pkt::kw_switch: - return allocate_visit(); - - case pkt::kw_end: - // 'end' is forbidden as a command. - // For example, `if end` or `while end` will produce this error. - // We still have to descend into the decorated statement because - // we can't leave our pointer as null. - parse_error(token1, parse_error_code_t::generic, - _(L"Expected a command, but found %ls"), - token1.user_presentable_description().c_str()); - return got_error(); - - default: - return allocate_visit(); - } - } - - /// Allocate and populate a block statement header. - /// This must never return null. - block_statement_t::header_ptr_t allocate_populate_block_header() { - switch (peek_token().keyword) { - case parse_keyword_t::kw_for: - return allocate_visit(); - case parse_keyword_t::kw_while: - return allocate_visit(); - case parse_keyword_t::kw_function: - return allocate_visit(); - case parse_keyword_t::kw_begin: - return allocate_visit(); - default: - internal_error(__FUNCTION__, L"should not have descended into block_header"); - DIE("Unreachable"); - } - } - - template - unique_ptr try_parse() { - if (!can_parse((AstNode *)nullptr)) return nullptr; - return allocate_visit(); - } - - void visit_node_field(argument_t &arg) { - if (unsource_leaves()) { - arg.unsourced = true; - return; - } - arg.range = consume_token_type(parse_token_type_t::string); - } - - void visit_node_field(variable_assignment_t &varas) { - if (unsource_leaves()) { - varas.unsourced = true; - return; - } - if (!peek_token().may_be_variable_assignment) { - internal_error(__FUNCTION__, - L"Should not have created variable_assignment_t from this token"); - } - varas.range = consume_token_type(parse_token_type_t::string); - } - - void visit_node_field(job_continuation_t &node) { - // Special error handling to catch 'and' and 'or' in pipelines, like `true | and false`. - const auto &tok = peek_token(1); - if (tok.keyword == parse_keyword_t::kw_and || tok.keyword == parse_keyword_t::kw_or) { - const wchar_t *cmdname = (tok.keyword == parse_keyword_t::kw_and ? L"and" : L"or"); - parse_error(tok, parse_error_code_t::andor_in_pipeline, INVALID_PIPELINE_CMD_ERR_MSG, - cmdname); - } - node.accept(*this); - } - - // Visit branch nodes by just calling accept() to visit their fields. - template - enable_if_t visit_node_field(Node &node) { - // This field is a direct embedding of an AST value. - node.accept(*this); - return; - } - - // Overload for token fields. - template - void visit_node_field(token_t &token) { - if (unsource_leaves()) { - token.unsourced = true; - return; - } - - if (!token.allows_token(peek_token().type)) { - const auto &peek = peek_token(); - if ((flags_ & parse_flag_leave_unterminated) && - (peek.tok_error == tokenizer_error_t::unterminated_quote || - peek.tok_error == tokenizer_error_t::unterminated_subshell)) { - return; - } - - parse_error(peek, parse_error_code_t::generic, L"Expected %ls, but found %ls", - token_types_user_presentable_description({TokTypes...}).c_str(), - peek.user_presentable_description().c_str()); - token.unsourced = true; - return; - } - parse_token_t tok = consume_any_token(); - token.type = tok.type; - token.range = tok.range(); - } - - // Overload for keyword fields. - template - void visit_node_field(keyword_t &keyword) { - if (unsource_leaves()) { - keyword.unsourced = true; - return; - } - - if (!keyword.allows_keyword(peek_token().keyword)) { - keyword.unsourced = true; - const auto &peek = peek_token(); - - if ((flags_ & parse_flag_leave_unterminated) && - (peek.tok_error == tokenizer_error_t::unterminated_quote || - peek.tok_error == tokenizer_error_t::unterminated_subshell)) { - return; - } - - // Special error reporting for keyword_t. - std::array allowed = {{KWs...}}; - if (allowed.size() == 1 && allowed[0] == parse_keyword_t::kw_end) { - assert(!visit_stack_.empty() && "Visit stack should not be empty"); - auto p = find_block_open_keyword(visit_stack_.back()); - source_range_t kw_range = p.first; - const wchar_t *kw_name = p.second; - if (kw_name) { - this->parse_error(kw_range, parse_error_code_t::generic, - L"Missing end to balance this %ls", kw_name); - } - } - parse_error(peek, parse_error_code_t::generic, L"Expected %ls, but found %ls", - keywords_user_presentable_description({KWs...}).c_str(), - peek.user_presentable_description().c_str()); - return; - } - parse_token_t tok = consume_any_token(); - keyword.kw = tok.keyword; - keyword.range = tok.range(); - } - - // Overload for maybe_newlines - void visit_node_field(maybe_newlines_t &nls) { - if (unsource_leaves()) { - nls.unsourced = true; - return; - } - // TODO: it would be nice to have the start offset be the current position in the token - // stream, even if there are no newlines. - nls.range = {0, 0}; - while (peek_token().is_newline) { - auto r = consume_token_type(parse_token_type_t::end); - if (nls.range.length == 0) { - nls.range = r; - } else { - nls.range.length = r.start + r.length - nls.range.start; - } - } - } - - template - void visit_optional_field(optional_t &ptr) { - // This field is an optional node. - ptr.contents = this->try_parse(); - } - - template - void visit_list_field(list_t &list) { - // This field is an embedding of an array of (pointers to) ContentsNode. - // Parse as many as we can. - populate_list(list); - } - - // We currently only have a handful of union pointer types. - // Handle them directly. - void visit_union_field(statement_t::contents_ptr_t &ptr) { - ptr = this->allocate_populate_statement_contents(); - assert(ptr && "Statement contents must never be null"); - } - - void visit_union_field(argument_or_redirection_t::contents_ptr_t &contents) { - if (auto arg = try_parse()) { - contents = std::move(arg); - } else if (auto redir = try_parse()) { - contents = std::move(redir); - } else { - internal_error(__FUNCTION__, L"Unable to parse argument or redirection"); - } - assert(contents && "Statement contents must never be null"); - } - - void visit_union_field(block_statement_t::header_ptr_t &ptr) { - ptr = this->allocate_populate_block_header(); - assert(ptr && "Header pointer must never be null"); - } - - void will_visit_fields_of(const node_t &node) { - FLOGF(ast_construction, L"%*swill_visit %ls %p", spaces(), "", node.describe().c_str(), - (const void *)&node); - visit_stack_.push_back(&node); - } - - void did_visit_fields_of(const node_t &node) { - assert(!visit_stack_.empty() && visit_stack_.back() == &node && - "Node was not at the top of the visit stack"); - visit_stack_.pop_back(); - } - - /// Flags controlling parsing. - parse_tree_flags_t flags_{}; - - /// Extra stuff like comment ranges. - ast_t::extras_t extras_{}; - - /// Stream of tokens which we consume. - token_stream_t tokens_; - - /** The type which we are attempting to parse, typically job_list but may be - freestanding_argument_list. */ - const type_t top_type_; - - /// If set, we are unwinding due to error recovery. - bool unwinding_{false}; - - /// If set, we have encountered an error. - bool any_error_{false}; - - /// A stack containing the nodes whose fields we are visiting. - std::vector visit_stack_{}; - - // If non-null, populate with errors. - parse_error_list_t *out_errors_{}; -}; -} // namespace - -// Set the parent fields of all nodes in the tree rooted at \p node. -static void set_parents(const node_t *top) { - struct parent_setter_t { - void visit(const node_t &node) { - const_cast(node).parent = parent_; - const node_t *saved = parent_; - parent_ = &node; - node_visitor(*this).accept_children_of(&node); - parent_ = saved; - } - - const node_t *parent_{nullptr}; - }; - struct parent_setter_t ps; - node_visitor(ps).accept(top); -} - -// static -ast_t ast_t::parse_from_top(const wcstring &src, parse_tree_flags_t parse_flags, - parse_error_list_t *out_errors, type_t top_type) { - assert((top_type == type_t::job_list || top_type == type_t::freestanding_argument_list) && - "Invalid top type"); - ast_t ast; - - populator_t pops(src, parse_flags, top_type, out_errors); - if (top_type == type_t::job_list) { - std::unique_ptr list = pops.allocate(); - pops.populate_list(*list, true /* exhaust_stream */); - ast.top_.reset(list.release()); - } else { - std::unique_ptr list = - pops.allocate(); - pops.populate_list(list->arguments, true /* exhaust_stream */); - ast.top_.reset(list.release()); - } - // Chomp trailing extras, etc. - pops.chomp_extras(type_t::job_list); - - ast.any_error_ = pops.any_error_; - ast.extras_ = std::move(pops.extras_); - - // Set all parent nodes. - // It turns out to be more convenient to do this after the parse phase. - set_parents(ast.top()); - - return ast; -} - -// static -ast_t ast_t::parse(const wcstring &src, parse_tree_flags_t flags, parse_error_list_t *out_errors) { - return parse_from_top(src, flags, out_errors, type_t::job_list); -} - -// static -ast_t ast_t::parse_argument_list(const wcstring &src, parse_tree_flags_t flags, - parse_error_list_t *out_errors) { - return parse_from_top(src, flags, out_errors, type_t::freestanding_argument_list); -} - -// \return the depth of a node, i.e. number of parent links. -static int get_depth(const node_t *node) { - int result = 0; - for (const node_t *cursor = node->parent; cursor; cursor = cursor->parent) { - result += 1; - } - return result; -} - -wcstring ast_t::dump(const wcstring &orig) const { - wcstring result; - - // Return a string that repeats "| " \p amt times. - auto pipespace = [](int amt) { - std::string result; - result.reserve(amt * 2); - for (int i = 0; i < amt; i++) result.append("! "); - return result; - }; - - traversal_t tv = this->walk(); - while (const auto *node = tv.next()) { - int depth = get_depth(node); - // dot-| padding - append_format(result, L"%s", pipespace(depth).c_str()); - if (const auto *n = node->try_as()) { - append_format(result, L"argument"); - if (auto argsrc = n->try_source(orig)) { - append_format(result, L": '%ls'", argsrc->c_str()); - } - } else if (const auto *n = node->try_as()) { - append_format(result, L"keyword: %ls", keyword_description(n->kw)); - } else if (const auto *n = node->try_as()) { - wcstring desc; - switch (n->type) { - case parse_token_type_t::string: - desc = format_string(L"string"); - if (auto strsource = n->try_source(orig)) { - append_format(desc, L": '%ls'", strsource->c_str()); - } - break; - case parse_token_type_t::redirection: - desc = L"redirection"; - if (auto strsource = n->try_source(orig)) { - append_format(desc, L": '%ls'", strsource->c_str()); - } - break; - case parse_token_type_t::end: - desc = L"<;>"; - break; - case parse_token_type_t::invalid: - // This may occur with errors, e.g. we expected to see a string but saw a - // redirection. - desc = L""; - break; - default: - desc = *token_type_user_presentable_description(n->type, parse_keyword_t::none); - break; - } - append_format(result, L"%ls", desc.c_str()); - } else { - append_format(result, L"%ls", node->describe().c_str()); - } - append_format(result, L"\n"); - } - return result; -} -} // namespace ast diff --git a/src/ast.h b/src/ast.h index 86ea1b853..088d65b5c 100644 --- a/src/ast.h +++ b/src/ast.h @@ -13,1031 +13,81 @@ #include #include "common.h" +#include "cxx.h" #include "maybe.h" #include "parse_constants.h" +#if INCLUDE_RUST_HEADERS +#include "ast.rs.h" namespace ast { -/** - * This defines the fish abstract syntax tree. - * The fish ast is a tree data structure. The nodes of the tree - * are divided into three categories: - * - * - leaf nodes refer to a range of source, and have no child nodes. - * - branch nodes have ONLY child nodes, and no other fields. - * - list nodes contain a list of some other node type (branch or leaf). - * - * Most clients will be interested in visiting the nodes of an ast. - * See node_visitation_t below. - */ - -struct node_t; - -enum class category_t : uint8_t { - branch, - leaf, - list, -}; - -// Declare our type enum. -// For each member of our ast, this creates an enum value. -// For example this creates `type_t::job_list`. -enum class type_t : uint8_t { -#define ELEM(T) T, -#include "ast_node_types.inc" -}; - -// Helper to return a string description of a type. -const wchar_t *ast_type_to_string(type_t type); - -// Forward declare all AST structs. -#define ELEM(T) struct T##_t; -#include "ast_node_types.inc" - -/* - * A FieldVisitor is something which can visit the fields of an ast node. - * This is used during ast construction. - * - * To trigger field visitation, use the accept() function: - * MyFieldVisitor v; - * node->accept(v); - * - * Example FieldVisitor: - * - * struct MyFieldVisitor { - * - * /// will_visit (did_visit) is called before (after) a node's fields are visited. - * void will_visit_fields_of(node_t &node); - * void did_visit_fields_of(node_t &node); - * - * /// These are invoked with the concrete type of each node, - * /// so they may be overloaded to distinguish node types. - * /// Example: - * void will_visit_fields_of(job_t &job); - * - * /// The visitor needs to be prepared for the following four field types. - * /// Naturally the visitor may overload visit_field to carve this - * /// arbitrarily finely. - * - * /// A field may be a "direct embedding" of a node. - * /// That is, an ast node may have another node as a member. - * template - * void visit_node_field(Node &node); - - * /// A field may be a list_t of (pointers to) some other node type. - * template - * void visit_list_field(list_t &list); - * - * /// A field may be a unique_ptr to another node. - * /// Every such pointer must be non-null after construction. - * template - * void visit_pointer_field(std::unique_ptr &ptr); - * - * /// A field may be optional, meaning it may or may not exist. - * template - * void visit_optional_field(optional_t &opt); - * - * /// A field may be a union pointer, meaning it points to one of - * /// a fixed set of node types. A union pointer is never null - * /// after construction. - * template - * void visit_union_field(union_ptr_t &union_ptr); - * }; - */ - -// Our node base type is not virtual, so we must not invoke its destructor directly. -// If you want to delete a node and don't know its concrete type, use this deleter type. -struct node_deleter_t { - void operator()(node_t *node); -}; -using node_unique_ptr_t = std::unique_ptr; - -// A union pointer field is a pointer to one of a fixed set of node types. -// It is never null after construction. -template -struct union_ptr_t { - node_unique_ptr_t contents{}; - - /// \return a pointer to the node contents. - const node_t *get() const { - assert(contents && "Null pointer"); - return contents.get(); - } - - /// \return whether we have non-null contents. - explicit operator bool() const { return contents != nullptr; } - - const node_t *operator->() const { return get(); } - - union_ptr_t() = default; - - // Allow setting a typed unique pointer. - template - inline void operator=(std::unique_ptr n); - - // Construct from a typed unique pointer. - template - inline union_ptr_t(std::unique_ptr n); -}; - -// A pointer to something, or nullptr if not present. -template -struct optional_t { - std::unique_ptr contents{}; - - explicit operator bool() const { return contents != nullptr; } - - AstNode *operator->() const { - assert(contents && "Null pointer"); - return contents.get(); - } - - const AstNode &operator*() const { - assert(contents && "Null pointer"); - return *contents; - } - - bool has_value() const { return contents != nullptr; } -}; - -namespace template_goo { - -// void if B is true, SFINAE'd away otherwise. -template -using only_if_t = typename std::enable_if::type; - -template -only_if_t visit_1_field(FieldVisitor &v, Field &field) { - v.visit_node_field(field); - return; -} - -template -only_if_t visit_1_field(FieldVisitor &v, Field &field) { - v.visit_list_field(field); - return; -} - -template -void visit_1_field(FieldVisitor &v, Field *&field) { - v.visit_pointer_field(field); -} - -template -void visit_1_field(FieldVisitor &v, optional_t &field) { - v.visit_optional_field(field); -} - -template -void visit_1_field(FieldVisitor &v, union_ptr_t &field) { - v.visit_union_field(field); -} - -// Call the field visit methods on visitor \p v passing field \p field. -template -void accept_field_visitor(FieldVisitor &v, bool /*reverse*/, Field &field) { - visit_1_field(v, field); -} - -// Call visit_field on visitor \p v, for the field \p field and also \p rest. -template -void accept_field_visitor(FieldVisitor &v, bool reverse, Field &field, Rest &...rest) { - if (!reverse) visit_1_field(v, field); - accept_field_visitor(v, reverse, rest...); - if (reverse) visit_1_field(v, field); -} - -} // namespace template_goo - -#define FIELDS(...) \ - template \ - void accept(FieldVisitor &visitor, bool reversed = false) { \ - visitor.will_visit_fields_of(*this); \ - template_goo::accept_field_visitor(visitor, reversed, __VA_ARGS__); \ - visitor.did_visit_fields_of(*this); \ - } - -/// node_t is the base node of all AST nodes. -/// It is not a template: it is possible to work concretely with this type. -struct node_t : noncopyable_t { - /// The parent node, or null if this is root. - const node_t *parent{nullptr}; - - /// The type of this node. - const type_t type; - - /// The category of this node. - const category_t category; - - constexpr explicit node_t(type_t t, category_t c) : type(t), category(c) {} - - /// Cast to a concrete node type, aborting on failure. - /// Example usage: - /// if (node->type == type_t::job_list) node->as()->... - template - To *as() { - assert(this->type == To::AstType && "Invalid type conversion"); - return static_cast(this); - } - - template - const To *as() const { - assert(this->type == To::AstType && "Invalid type conversion"); - return static_cast(this); - } - - /// Try casting to a concrete node type, except returns nullptr on failure. - /// Example usage: - /// if (const auto *job_list = node->try_as()) job_list->... - template - To *try_as() { - if (this->type == To::AstType) return as(); - return nullptr; - } - - template - const To *try_as() const { - if (this->type == To::AstType) return as(); - return nullptr; - } - - /// Base accept() function which trampolines to overriding implementations for each node type. - /// This may be used when you don't know what the type of a particular node is. - template - void base_accept(FieldVisitor &v, bool reverse = false); - - /// \return a helpful string description of this node. - wcstring describe() const; - - /// \return the source range for this node, or none if unsourced. - /// This may return none if the parse was incomplete or had an error. - maybe_t try_source_range() const; - - /// \return the source range for this node, or an empty range {0, 0} if unsourced. - source_range_t source_range() const { - if (auto r = try_source_range()) return *r; - return source_range_t{0, 0}; - } - - /// \return the source code for this node, or none if unsourced. - maybe_t try_source(const wcstring &orig) const { - if (auto r = try_source_range()) return orig.substr(r->start, r->length); - return none(); - } - - /// \return the source code for this node, or an empty string if unsourced. - wcstring source(const wcstring &orig) const { - wcstring res{}; - if (auto s = try_source(orig)) res = s.acquire(); - return res; - } - - /// \return the source code for this node, or an empty string if unsourced. - /// This uses \p storage to reduce allocations. - const wcstring &source(const wcstring &orig, wcstring *storage) const { - if (auto r = try_source_range()) { - storage->assign(orig, r->start, r->length); - } else { - storage->clear(); - } - return *storage; - } - - protected: - // We are NOT a virtual class - we have no vtable or virtual methods and our destructor is not - // virtual, so as to keep the size down. Only typed nodes should invoke the destructor. - // Use node_deleter_t to delete an untyped node. - ~node_t() = default; -}; - -// Base class for all "branch" nodes: nodes with at least one ast child. -template -struct branch_t : public node_t { - static constexpr type_t AstType = Type; - static constexpr category_t Category = category_t::branch; - - branch_t() : node_t(Type, Category) {} -}; - -// Base class for all "leaf" nodes: nodes with no ast children. -// It declares an empty visit method to avoid requiring the CHILDREN macro. -template -struct leaf_t : public node_t { - static constexpr type_t AstType = Type; - static constexpr category_t Category = category_t::leaf; - - // Whether this node is "unsourced." This happens if for whatever reason we are unable to parse - // the node, either because we had a parse error and recovered, or because we accepted - // incomplete and the token stream was exhausted. - bool unsourced{false}; - - // The source range. - source_range_t range{0, 0}; - - // Convenience helper to return whether we are not unsourced. - bool has_source() const { return !unsourced; } - - template - void accept(FieldVisitor &visitor, bool /* reverse */ = false) { - visitor.will_visit_fields_of(*this); - visitor.did_visit_fields_of(*this); - } - - leaf_t() : node_t(Type, Category) {} -}; - -// A simple fixed-size array, possibly empty. -// Disallow moving as we own a raw pointer. -template -struct list_t : public node_t, nonmovable_t { - static constexpr type_t AstType = ListType; - static constexpr category_t Category = category_t::list; - - // A list wraps a "contents pointer" which is just a unique_ptr that converts to a reference. - // This enables more natural iteration: - // for (const argument_t &arg : argument_list) ... - struct contents_ptr_t { - std::unique_ptr ptr{}; - - void operator=(std::unique_ptr p) { ptr = std::move(p); } - - const ContentsNode *get() const { - assert(ptr && "Null pointer"); - return ptr.get(); - } - - /* implicit */ operator const ContentsNode &() const { return *get(); } - }; - - // We use a new[]-allocated array to store our contents pointers, to reduce size. - // This would be a nice use case for std::dynarray. - uint32_t length{0}; - const contents_ptr_t *contents{}; - - /// \return a node at a given index, or nullptr if out of range. - const ContentsNode *at(size_t idx, bool reverse = false) const { - if (idx >= count()) return nullptr; - return contents[reverse ? count() - idx - 1 : idx].get(); - } - - /// \return our count. - size_t count() const { return length; } - - /// \return whether we are empty. - bool empty() const { return length == 0; } - - /// Iteration support. - using iterator = const contents_ptr_t *; - iterator begin() const { return contents; } - iterator end() const { return contents + length; } - - // list types pretend their child nodes are direct embeddings. - // This isn't used during AST construction because we need to construct the list. - // It is used by node_visitation_t. - template - void accept(FieldVisitor &visitor, bool reverse = false) { - visitor.will_visit_fields_of(*this); - for (size_t i = 0; i < count(); i++) visitor.visit_node_field(*this->at(i, reverse)); - visitor.did_visit_fields_of(*this); - } - - list_t() : node_t(ListType, Category) {} - ~list_t() { delete[] contents; } -}; - -// Fully define all list types, as they are very uniform. -// This is where types like job_list_t come from. -#define ELEM(T) -#define ELEMLIST(ListT, ContentsT) \ - struct ListT##_t final : public list_t {}; -#include "ast_node_types.inc" - -struct keyword_base_t : public leaf_t { - // The keyword which was parsed. - parse_keyword_t kw; -}; - -// A keyword node is a node which contains a keyword, which must be one of the provided values. -template -struct keyword_t final : public keyword_base_t { - static bool allows_keyword(parse_keyword_t); -}; - -struct token_base_t : public leaf_t { - // The token type which was parsed. - parse_token_type_t type{parse_token_type_t::invalid}; -}; - -// A token node is a node which contains a token, which must be one of the provided values. -template -struct token_t final : public token_base_t { - /// \return whether a token type is allowed in this token_t, i.e. is a member of our Toks list. - static bool allows_token(parse_token_type_t); -}; - -// Zero or more newlines. -struct maybe_newlines_t final : public leaf_t {}; - -// A single newline or semicolon, terminating statements. -// Note this is not a separate type, it is just a convenience typedef. -using semi_nl_t = token_t; - -// Convenience typedef for string nodes. -using string_t = token_t; - -// An argument is just a node whose source range determines its contents. -// This is a separate type because it is sometimes useful to find all arguments. -struct argument_t final : public leaf_t {}; - -// A redirection has an operator like > or 2>, and a target like /dev/null or &1. -// Note that pipes are not redirections. -struct redirection_t final : public branch_t { - token_t oper; - string_t target; - - FIELDS(oper, target) -}; - -// A variable_assignment_t contains a source range like FOO=bar. -struct variable_assignment_t final : public leaf_t {}; - -// An argument or redirection holds either an argument or redirection. -struct argument_or_redirection_t final : public branch_t { - using contents_ptr_t = union_ptr_t; - contents_ptr_t contents{}; - - /// \return whether this represents an argument. - bool is_argument() const { return contents->type == type_t::argument; } - - /// \return whether this represents a redirection - bool is_redirection() const { return contents->type == type_t::redirection; } - - /// \return this as an argument, assuming it wraps one. - const argument_t &argument() const { - assert(is_argument() && "Is not an argument"); - return *this->contents.contents->as(); - } - - /// \return this as an argument, assuming it wraps one. - const redirection_t &redirection() const { - assert(is_redirection() && "Is not a redirection"); - return *this->contents.contents->as(); - } - - FIELDS(contents); -}; - -// A statement is a normal command, or an if / while / etc -struct statement_t final : public branch_t { - using contents_ptr_t = union_ptr_t; - contents_ptr_t contents{}; - - FIELDS(contents) -}; - -// A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases -// like if statements, where we require a command). -struct job_pipeline_t final : public branch_t { - // Maybe the time keyword. - optional_t> time; - - // A (possibly empty) list of variable assignments. - variable_assignment_list_t variables; - - // The statement. - statement_t statement; - - // Piped remainder. - job_continuation_list_t continuation; - - // Maybe backgrounded. - optional_t> bg; - - FIELDS(time, variables, statement, continuation, bg) -}; - -// A job_conjunction is a job followed by a && or || continuations. -struct job_conjunction_t final : public branch_t { - // The job conjunction decorator. - using decorator_t = keyword_t; - optional_t decorator{}; - - // The job itself. - job_pipeline_t job; - - // The rest of the job conjunction, with && or ||s. - job_conjunction_continuation_list_t continuations; - - // A terminating semicolon or newline. - // This is marked optional because it may not be present, for example the command `echo foo` may - // not have a terminating newline. It will only fail to be present if we ran out of tokens. - optional_t semi_nl; - - FIELDS(decorator, job, continuations, semi_nl) -}; - -struct for_header_t final : public branch_t { - // 'for' - keyword_t kw_for; - - // var_name - string_t var_name; - - // 'in' - keyword_t kw_in; - - // list of arguments - argument_list_t args; - - // newline or semicolon - semi_nl_t semi_nl; - - FIELDS(kw_for, var_name, kw_in, args, semi_nl) -}; - -struct while_header_t final : public branch_t { - // 'while' - keyword_t kw_while; - - job_conjunction_t condition{}; - andor_job_list_t andor_tail{}; - - FIELDS(kw_while, condition, andor_tail) -}; - -struct function_header_t final : public branch_t { - // functions require at least one argument. - keyword_t kw_function; - argument_t first_arg; - argument_list_t args; - semi_nl_t semi_nl; - - FIELDS(kw_function, first_arg, args, semi_nl) -}; - -struct begin_header_t final : public branch_t { - keyword_t kw_begin; - - // Note that 'begin' does NOT require a semi or nl afterwards. - // This is valid: begin echo hi; end - optional_t semi_nl; - - FIELDS(kw_begin, semi_nl) -}; - -struct block_statement_t final : public branch_t { - // A header like for, while, etc. - using header_ptr_t = - union_ptr_t; - header_ptr_t header; - - // List of jobs in this block. - job_list_t jobs; - - // The 'end' node. - keyword_t end; - - // Arguments and redirections associated with the block. - argument_or_redirection_list_t args_or_redirs; - - FIELDS(header, jobs, end, args_or_redirs) -}; - -// Represents an 'if', either as the first part of an if statement or after an 'else'. -struct if_clause_t final : public branch_t { - // The 'if' keyword. - keyword_t kw_if; - - // The 'if' condition. - job_conjunction_t condition{}; - - // 'and/or' tail. - andor_job_list_t andor_tail{}; - - // The body to execute if the condition is true. - job_list_t body; - - FIELDS(kw_if, condition, andor_tail, body) -}; - -struct elseif_clause_t final : public branch_t { - // The 'else' keyword. - keyword_t kw_else; - - // The 'if' clause following it. - if_clause_t if_clause; - - FIELDS(kw_else, if_clause) -}; - -struct else_clause_t final : public branch_t { - // else ; body - keyword_t kw_else; - semi_nl_t semi_nl; - job_list_t body; - - FIELDS(kw_else, semi_nl, body) -}; - -struct if_statement_t final : public branch_t { - // if part - if_clause_t if_clause; - - // else if list - elseif_clause_list_t elseif_clauses; - - // else part - optional_t else_clause; - - // literal end - keyword_t end; - - // block args / redirs - argument_or_redirection_list_t args_or_redirs; - - FIELDS(if_clause, elseif_clauses, else_clause, end, args_or_redirs) -}; - -struct case_item_t final : public branch_t { - // case ; body - keyword_t kw_case; - argument_list_t arguments; - semi_nl_t semi_nl; - job_list_t body; - FIELDS(kw_case, arguments, semi_nl, body) -}; - -struct switch_statement_t final : public branch_t { - // switch ; body ; end args_redirs - keyword_t kw_switch; - argument_t argument; - semi_nl_t semi_nl; - case_item_list_t cases; - keyword_t end; - argument_or_redirection_list_t args_or_redirs; - - FIELDS(kw_switch, argument, semi_nl, cases, end, args_or_redirs) -}; - -// A decorated_statement is a command with a list of arguments_or_redirections, possibly with -// "builtin" or "command" or "exec" -struct decorated_statement_t final : public branch_t { - // An optional decoration (command, builtin, exec, etc). - using pk = parse_keyword_t; - using decorator_t = keyword_t; - optional_t opt_decoration; - - // Command to run. - string_t command; - - // Args and redirs - argument_or_redirection_list_t args_or_redirs; - - // Helper to return the decoration. - statement_decoration_t decoration() const; - - FIELDS(opt_decoration, command, args_or_redirs) -}; - -// A not statement like `not true` or `! true` -struct not_statement_t final : public branch_t { - // Keyword, either not or exclam. - keyword_t kw; - - variable_assignment_list_t variables; - optional_t> time{}; - statement_t contents{}; - - FIELDS(kw, variables, time, contents) -}; - -struct job_continuation_t final : public branch_t { - token_t pipe; - maybe_newlines_t newlines; - variable_assignment_list_t variables; - statement_t statement; - - FIELDS(pipe, newlines, variables, statement) -}; - -struct job_conjunction_continuation_t final - : public branch_t { - // The && or || token. - token_t conjunction; - maybe_newlines_t newlines; - - // The job itself. - job_pipeline_t job; - - FIELDS(conjunction, newlines, job) -}; - -// An andor_job just wraps a job, but requires that the job have an 'and' or 'or' job_decorator. -// Note this is only used for andor_job_list; jobs that are not part of an andor_job_list are not -// instances of this. -struct andor_job_t final : public branch_t { - job_conjunction_t job; - - FIELDS(job) -}; - -// A freestanding_argument_list is equivalent to a normal argument list, except it may contain -// TOK_END (newlines, and even semicolons, for historical reasons). -// In practice the tok_ends are ignored by fish code so we do not bother to store them. -struct freestanding_argument_list_t final : public branch_t { - argument_list_t arguments; - FIELDS(arguments) -}; - -template -void node_t::base_accept(FieldVisitor &v, bool reverse) { - switch (this->type) { -#define ELEM(T) \ - case type_t::T: \ - this->as()->accept(v, reverse); \ - break; - -#include "ast_node_types.inc" - } -} - -// static -template -bool token_t::allows_token(parse_token_type_t type) { - for (parse_token_type_t t : {Toks...}) { - if (type == t) return true; - } - return false; -} - -// static -template -bool keyword_t::allows_keyword(parse_keyword_t kw) { - for (parse_keyword_t k : {KWs...}) { - if (k == kw) return true; - } - return false; -} - -namespace template_goo { -/// \return true if type Type is in the Candidates list. -template -constexpr bool type_in_list() { - return false; -} - -template -constexpr bool type_in_list() { - return std::is_same::value || type_in_list(); -} -} // namespace template_goo - -template -template -void union_ptr_t::operator=(std::unique_ptr n) { - static_assert(template_goo::type_in_list(), - "Cannot construct from this node type"); - contents.reset(n.release()); -} - -template -template -union_ptr_t::union_ptr_t(std::unique_ptr n) : contents(n.release()) { - static_assert(template_goo::type_in_list(), - "Cannot construct from this node type"); -} - -/** - * A node visitor is like a field visitor, but adapted to only visit actual nodes, as const - * references. It calls the visit() function of its visitor with a const reference to each node - * found under a given node. - * - * Example: - * struct MyNodeVisitor { - * template - * void visit(const Node &n) {...} - * }; - */ -template -class node_visitation_t : noncopyable_t { - public: - explicit node_visitation_t(NodeVisitor &v, bool reverse = false) : v_(v), reverse_(reverse) {} - - // Visit the (direct) child nodes of a given node. - template - void accept_children_of(const Node &n) { - // We play fast and loose with const to avoid having to duplicate our FIELDS macros. - const_cast(n).accept(*this, reverse_); - } - - // Visit the (direct) child nodes of a given node. - void accept_children_of(const node_t *n) { - const_cast(n)->base_accept(*this, reverse_); - } - - // Invoke visit() on our visitor for a given node, resolving that node's type. - void accept(const node_t *n) { - assert(n && "Node should not be null"); - switch (n->type) { -#define ELEM(T) \ - case type_t::T: \ - v_.visit(*(n->as())); \ - break; -#include "ast_node_types.inc" - } - } - - // Here is our field visit implementations which adapt to the node visiting. - - // Direct embeddings. - template - void visit_node_field(const Node &node) { - v_.visit(node); - } - - // Pointer embeddings. - template - void visit_pointer_field(const Node *ptr) { - v_.visit(*ptr); - } - - // List embeddings. - template - void visit_list_field(const List &list) { - v_.visit(list); - } - - // Optional pointers get visited if not null. - template - void visit_optional_field(optional_t &node) { - if (node.contents) v_.visit(*node.contents); - } - - // Define our custom implementations of non-node fields. - // Union pointers just dispatch to the generic one. - template - void visit_union_field(union_ptr_t &ptr) { - assert(ptr && "Should not have null ptr"); - this->accept(ptr.contents.get()); - } - - void will_visit_fields_of(node_t &) {} - void did_visit_fields_of(node_t &) {} - - private: - // Our adapted visitor. - NodeVisitor &v_; - - // Whether to iterate in reverse order. - const bool reverse_; -}; - -// Type-deducing helper. -template -node_visitation_t node_visitor(NodeVisitor &nv, bool reverse = false) { - return node_visitation_t(nv, reverse); -} - -// A way to visit nodes iteratively. -// This is pre-order. Each node is visited before its children. -// Example: -// traversal_t tv(start); -// while (const node_t *node = tv.next()) {...} -class traversal_t { - public: - // Construct starting with a node - traversal_t(const node_t *n) { - assert(n && "Should not have null node"); - push(n); - } - - // \return the next node, or nullptr if exhausted. - const node_t *next() { - if (stack_.empty()) return nullptr; - const node_t *node = stack_.back(); - stack_.pop_back(); - - // We want to visit in reverse order so the first child ends up on top of the stack. - node_visitor(*this, true /* reverse */).accept_children_of(node); - return node; - } - - private: - // Callback for node_visitation_t. - void visit(const node_t &node) { push(&node); } - - // Construct an empty visitor, used for iterator support. - traversal_t() = default; - - // Append a node. - void push(const node_t *n) { - assert(n && "Should not push null node"); - stack_.push_back(n); - } - - // Stack of nodes. - std::vector stack_{}; - - friend class ast_t; - friend class node_visitation_t; -}; - -/// The ast type itself. -class ast_t : noncopyable_t { - public: - using source_range_list_t = std::vector; - - /// Construct an ast by parsing \p src as a job list. - /// The ast attempts to produce \p type as the result. - /// \p type may only be job_list or freestanding_argument_list. - static ast_t parse(const wcstring &src, parse_tree_flags_t flags = parse_flag_none, - parse_error_list_t *out_errors = nullptr); - - /// Like parse(), but constructs a freestanding_argument_list. - static ast_t parse_argument_list(const wcstring &src, - parse_tree_flags_t flags = parse_flag_none, - parse_error_list_t *out_errors = nullptr); - - /// \return a traversal, allowing iteration over the nodes. - traversal_t walk() const { return traversal_t{top()}; } - - /// \return the top node. This has the type requested in the 'parse' method. - const node_t *top() const { return top_.get(); } - - /// \return whether any errors were encountered during parsing. - bool errored() const { return any_error_; } - - /// \return a textual representation of the tree. - /// Pass the original source as \p orig. - wcstring dump(const wcstring &orig) const; - - /// Extra source ranges. - /// These are only generated if the corresponding flags are set. - struct extras_t { - /// Set of comments, sorted by offset. - source_range_list_t comments; - - /// Set of semicolons, sorted by offset. - source_range_list_t semis; - - /// Set of error ranges, sorted by offset. - source_range_list_t errors; - }; - - /// Access the set of extraneous source ranges. - const extras_t &extras() const { return extras_; } - - /// Iterator support. - class iterator { - public: - using iterator_category = std::input_iterator_tag; - using difference_type = void; - using value_type = node_t; - using pointer = const node_t *; - using reference = const node_t &; - - bool operator==(const iterator &rhs) { return current_ == rhs.current_; } - bool operator!=(const iterator &rhs) { return !(*this == rhs); } - - iterator &operator++() { - current_ = v_.next(); - return *this; - } - - const node_t &operator*() const { return *current_; } - - private: - explicit iterator(const node_t *start) : v_(start), current_(v_.next()) {} - iterator() = default; - - traversal_t v_{}; - const node_t *current_{}; - friend ast_t; - }; - - iterator begin() const { return iterator{top()}; } - iterator end() const { return iterator{}; } - - ast_t(ast_t &&) = default; - ast_t &operator=(ast_t &&) = default; - - private: - ast_t() = default; - - // Shared parsing code that takes the top type. - static ast_t parse_from_top(const wcstring &src, parse_tree_flags_t parse_flags, - parse_error_list_t *out_errors, type_t top_type); - - // The top node. - // Its type depends on what was requested to parse. - node_unique_ptr_t top_{}; - - /// Whether any errors were encountered during parsing. - bool any_error_{false}; - - /// Extra fields. - extras_t extras_{}; -}; +using ast_t = Ast; +using category_t = Category; +using type_t = Type; + +using andor_job_list_t = AndorJobList; +using andor_job_t = AndorJob; +using argument_list_t = ArgumentList; +using argument_or_redirection_list_t = ArgumentOrRedirectionList; +using argument_or_redirection_t = ArgumentOrRedirection; +using argument_t = Argument; +using begin_header_t = BeginHeader; +using block_statement_t = BlockStatement; +using case_item_t = CaseItem; +using decorated_statement_t = DecoratedStatement; +using elseif_clause_list_t = ElseifClauseList; +using for_header_t = ForHeader; +using freestanding_argument_list_t = FreestandingArgumentList; +using function_header_t = FunctionHeader; +using if_clause_t = IfClause; +using if_statement_t = IfStatement; +using job_conjunction_continuation_t = JobConjunctionContinuation; +using job_conjunction_t = JobConjunction; +using job_continuation_t = JobContinuation; +using job_list_t = JobList; +using job_pipeline_t = JobPipeline; +using maybe_newlines_t = MaybeNewlines; +using not_statement_t = NotStatement; +using redirection_t = Redirection; +using semi_nl_t = SemiNl; +using statement_t = Statement; +using string_t = String_; +using switch_statement_t = SwitchStatement; +using variable_assignment_list_t = VariableAssignmentList; +using variable_assignment_t = VariableAssignment; +using while_header_t = WhileHeader; } // namespace ast + +#else +struct Ast; +struct NodeFfi; +namespace ast { +using ast_t = Ast; + +struct argument_t; +struct block_statement_t; +struct statement_t; +struct string_t; +struct maybe_newlines_t; +struct redirection_t; +struct variable_assignment_t; +struct semi_nl_t; +struct decorated_statement_t; + +struct keyword_base_t; + +} // namespace ast + +#endif + +namespace ast { +using node_t = ::NodeFfi; +} + +rust::Box ast_parse(const wcstring &src, parse_tree_flags_t flags = parse_flag_none, + parse_error_list_t *out_errors = nullptr); +rust::Box ast_parse_argument_list(const wcstring &src, + parse_tree_flags_t flags = parse_flag_none, + parse_error_list_t *out_errors = nullptr); + #endif // FISH_AST_H diff --git a/src/ast_node_types.inc b/src/ast_node_types.inc deleted file mode 100644 index 1a18675e2..000000000 --- a/src/ast_node_types.inc +++ /dev/null @@ -1,60 +0,0 @@ -// Define ELEM and optionally ELEMLIST before including this file. -// ELEM is for ordinary nodes. -// ELEMLIST(x, y) marks list nodes and the type they contain. -#ifndef ELEMLIST -#define ELEMLIST(x, y) ELEM(x) -#endif - -ELEM(keyword_base) -ELEM(token_base) -ELEM(maybe_newlines) - -ELEM(argument) -ELEMLIST(argument_list, argument) - -ELEM(redirection) -ELEM(argument_or_redirection) -ELEMLIST(argument_or_redirection_list, argument_or_redirection) - -ELEM(variable_assignment) -ELEMLIST(variable_assignment_list, variable_assignment) - -ELEM(job_pipeline) -ELEM(job_conjunction) -// For historical reasons, a job list is a list of job *conjunctions*. This should be fixed. -ELEMLIST(job_list, job_conjunction) -ELEM(job_conjunction_continuation) -ELEMLIST(job_conjunction_continuation_list, job_conjunction_continuation) - -ELEM(job_continuation) -ELEMLIST(job_continuation_list, job_continuation) - -ELEM(andor_job) -ELEMLIST(andor_job_list, andor_job) - -ELEM(statement) - -ELEM(not_statement) - -ELEM(block_statement) -ELEM(for_header) -ELEM(while_header) -ELEM(function_header) -ELEM(begin_header) - -ELEM(if_statement) -ELEM(if_clause) -ELEM(elseif_clause) -ELEMLIST(elseif_clause_list, elseif_clause) -ELEM(else_clause) - -ELEM(switch_statement) -ELEM(case_item) -ELEMLIST(case_item_list, case_item) - -ELEM(decorated_statement) - -ELEM(freestanding_argument_list) - -#undef ELEM -#undef ELEMLIST diff --git a/src/builtins/function.cpp b/src/builtins/function.cpp index d4c0f87be..386c65831 100644 --- a/src/builtins/function.cpp +++ b/src/builtins/function.cpp @@ -231,7 +231,7 @@ static int validate_function_name(int argc, const wchar_t *const *argv, wcstring /// function. int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_list_t &c_args, const parsed_source_ref_t &source, const ast::block_statement_t &func_node) { - assert(source && "Missing source in builtin_function"); + assert(source.has_value() && "Missing source in builtin_function"); // The wgetopt function expects 'function' as the first argument. Make a new wcstring_list with // that property. This is needed because this builtin has a different signature than the other // builtins. @@ -280,7 +280,7 @@ int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_lis auto props = std::make_shared(); props->shadow_scope = opts.shadow_scope; props->named_arguments = std::move(opts.named_arguments); - props->parsed_source = source; + props->parsed_source = source.clone(); props->func_node = &func_node; props->description = opts.description; props->definition_file = parser.libdata().current_filename; diff --git a/src/builtins/function.h b/src/builtins/function.h index 50c1fd373..e0ab70edc 100644 --- a/src/builtins/function.h +++ b/src/builtins/function.h @@ -2,17 +2,13 @@ #ifndef FISH_BUILTIN_FUNCTION_H #define FISH_BUILTIN_FUNCTION_H +#include "../ast.h" #include "../common.h" #include "../parse_tree.h" class parser_t; struct io_streams_t; -namespace ast { -struct block_statement_t; -} - -int builtin_function(parser_t &parser, io_streams_t &streams, - const wcstring_list_t &c_args, const parsed_source_ref_t &source, - const ast::block_statement_t &func_node); +int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_list_t &c_args, + const parsed_source_ref_t &source, const ast::block_statement_t &func_node); #endif diff --git a/src/exec.cpp b/src/exec.cpp index 9e1b6ab2c..afd670381 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -634,11 +634,14 @@ static proc_performer_t get_performer_for_process(process_t *p, job_t *job, job_group_ref_t job_group = job->group; if (p->type == process_type_t::block_node) { - const parsed_source_ref_t &source = p->block_node_source; + const parsed_source_ref_t &source = *p->block_node_source; const ast::statement_t *node = p->internal_block_node; - assert(source && node && "Process is missing node info"); + assert(source.has_value() && node && "Process is missing node info"); + // The lambda will convert into a std::function which requires copyability. A Box can't + // be copied, so add another indirection. + auto source_box = std::make_shared>(source.clone()); return [=](parser_t &parser) { - return parser.eval_node(source, *node, io_chain, job_group).status; + return parser.eval_node(**source_box, *node, io_chain, job_group).status; }; } else { assert(p->type == process_type_t::function); @@ -650,9 +653,9 @@ static proc_performer_t get_performer_for_process(process_t *p, job_t *job, const wcstring_list_t &argv = p->argv(); return [=](parser_t &parser) { // Pull out the job list from the function. - const ast::job_list_t &body = props->func_node->jobs; + const ast::job_list_t &body = props->func_node->jobs(); const block_t *fb = function_prepare_environment(parser, argv, *props); - auto res = parser.eval_node(props->parsed_source, body, io_chain, job_group); + auto res = parser.eval_node(*props->parsed_source, body, io_chain, job_group); function_restore_environment(parser, fb); // If the function did not execute anything, treat it as success. diff --git a/src/ffi_baggage.h b/src/ffi_baggage.h new file mode 100644 index 000000000..4f82afb3e --- /dev/null +++ b/src/ffi_baggage.h @@ -0,0 +1,9 @@ +#include "fish_indent_common.h" + +// Symbols that get autocxx bindings but are not used in a given binary, will cause "undefined +// reference" when trying to link that binary. Work around this by marking them as used in +// all binaries. +void mark_as_used() { + // + pretty_printer_t({}, {}); +} diff --git a/src/fish.cpp b/src/fish.cpp index eed026673..b53583213 100644 --- a/src/fish.cpp +++ b/src/fish.cpp @@ -45,6 +45,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA #include "expand.h" #include "fallback.h" // IWYU pragma: keep #include "fds.h" +#include "ffi_baggage.h" #include "ffi_init.rs.h" #include "fish_version.h" #include "flog.h" @@ -264,17 +265,16 @@ static int run_command_list(parser_t &parser, const std::vector &cm wcstring cmd_wcs = str2wcstring(cmd); // Parse into an ast and detect errors. auto errors = new_parse_error_list(); - auto ast = ast::ast_t::parse(cmd_wcs, parse_flag_none, &*errors); - bool errored = ast.errored(); + auto ast = ast_parse(cmd_wcs, parse_flag_none, &*errors); + bool errored = ast->errored(); if (!errored) { - errored = parse_util_detect_errors(ast, cmd_wcs, &*errors); + errored = parse_util_detect_errors(*ast, cmd_wcs, &*errors); } if (!errored) { // Construct a parsed source ref. // Be careful to transfer ownership, this could be a very large string. - parsed_source_ref_t ps = - std::make_shared(std::move(cmd_wcs), std::move(ast)); - parser.eval(ps, io); + auto ps = new_parsed_source_ref(cmd_wcs, *ast); + parser.eval(*ps, io); } else { wcstring sb; parser.get_backtrace(cmd_wcs, *errors, sb); diff --git a/src/fish_indent.cpp b/src/fish_indent.cpp index 456d790da..4867ae488 100644 --- a/src/fish_indent.cpp +++ b/src/fish_indent.cpp @@ -36,29 +36,21 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA #include #include "ast.h" -#include "common.h" #include "env.h" -#include "expand.h" #include "fds.h" +#include "ffi_baggage.h" #include "ffi_init.rs.h" +#include "fish_indent_common.h" #include "fish_version.h" #include "flog.h" #include "future_feature_flags.h" -#include "global_safety.h" #include "highlight.h" -#include "maybe.h" #include "operation_context.h" -#include "parse_constants.h" -#include "parse_util.h" #include "print_help.h" #include "tokenizer.h" #include "wcstringutil.h" #include "wutil.h" // IWYU pragma: keep -// The number of spaces per indent isn't supposed to be configurable. -// See discussion at https://github.com/fish-shell/fish-shell/pull/6790 -#define SPACES_PER_INDENT 4 - static bool dump_parse_tree = false; static int ret = 0; @@ -89,581 +81,6 @@ static wcstring read_file(FILE *f) { return result; } -namespace { -/// From C++14. -template -using enable_if_t = typename std::enable_if::type; - -/// \return whether a character at a given index is escaped. -/// A character is escaped if it has an odd number of backslashes. -bool char_is_escaped(const wcstring &text, size_t idx) { - return count_preceding_backslashes(text, idx) % 2 == 1; -} - -using namespace ast; -struct pretty_printer_t { - // Note: this got somewhat more complicated after introducing the new AST, because that AST no - // longer encodes detailed lexical information (e.g. every newline). This feels more complex - // than necessary and would probably benefit from a more layered approach where we identify - // certain runs, weight line breaks, have a cost model, etc. - pretty_printer_t(const wcstring &src, bool do_indent) - : source(src), - indents(do_indent ? parse_util_compute_indents(source) : std::vector(src.size(), 0)), - ast(ast_t::parse(src, parse_flags())), - do_indent(do_indent), - gaps(compute_gaps()), - preferred_semi_locations(compute_preferred_semi_locations()) { - assert(indents.size() == source.size() && "indents and source should be same length"); - } - - // Original source. - const wcstring &source; - - // The indents of our string. - // This has the same length as 'source' and describes the indentation level. - const std::vector indents; - - // The parsed ast. - const ast_t ast; - - // The prettifier output. - wcstring output; - - // The indent of the source range which we are currently emitting. - int current_indent{0}; - - // Whether to indent, or just insert spaces. - const bool do_indent; - - // Whether the next gap text should hide the first newline. - bool gap_text_mask_newline{false}; - - // The "gaps": a sorted set of ranges between tokens. - // These contain whitespace, comments, semicolons, and other lexical elements which are not - // present in the ast. - const std::vector gaps; - - // The sorted set of source offsets of nl_semi_t which should be set as semis, not newlines. - // This is computed ahead of time for convenience. - const std::vector preferred_semi_locations; - - // Flags we support. - using gap_flags_t = uint32_t; - enum { - default_flags = 0, - - // Whether to allow line splitting via escaped newlines. - // For example, in argument lists: - // - // echo a \ - // b - // - // If this is not set, then split-lines will be joined. - allow_escaped_newlines = 1 << 0, - - // Whether to require a space before this token. - // This is used when emitting semis: - // echo a; echo b; - // No space required between 'a' and ';', or 'b' and ';'. - skip_space = 1 << 1, - }; - - // \return gap text flags for the gap text that comes *before* a given node type. - static gap_flags_t gap_text_flags_before_node(const node_t &node) { - gap_flags_t result = default_flags; - switch (node.type) { - // Allow escaped newlines before leaf nodes that can be part of a long command. - case type_t::argument: - case type_t::redirection: - case type_t::variable_assignment: - result |= allow_escaped_newlines; - break; - - case type_t::token_base: - // Allow escaped newlines before && and ||, and also pipes. - switch (node.as()->type) { - case parse_token_type_t::andand: - case parse_token_type_t::oror: - case parse_token_type_t::pipe: - result |= allow_escaped_newlines; - break; - case parse_token_type_t::string: { - // Allow escaped newlines before commands that follow a variable assignment - // since both can be long (#7955). - const node_t *p = node.parent; - if (p->type != type_t::decorated_statement) break; - p = p->parent; - assert(p->type == type_t::statement); - p = p->parent; - if (auto job = p->try_as()) { - if (!job->variables.empty()) result |= allow_escaped_newlines; - } else if (auto job_cnt = p->try_as()) { - if (!job_cnt->variables.empty()) result |= allow_escaped_newlines; - } else if (auto not_stmt = p->try_as()) { - if (!not_stmt->variables.empty()) result |= allow_escaped_newlines; - } - break; - } - default: - break; - } - break; - - default: - break; - } - return result; - } - - // \return whether we are at the start of a new line. - bool at_line_start() const { return output.empty() || output.back() == L'\n'; } - - // \return whether we have a space before the output. - // This ignores escaped spaces and escaped newlines. - bool has_preceding_space() const { - long idx = static_cast(output.size()) - 1; - // Skip escaped newlines. - // This is historical. Example: - // - // cmd1 \ - // | cmd2 - // - // we want the pipe to "see" the space after cmd1. - // TODO: this is too tricky, we should factor this better. - while (idx >= 0 && output.at(idx) == L'\n') { - size_t backslashes = count_preceding_backslashes(source, idx); - if (backslashes % 2 == 0) { - // Not escaped. - return false; - } - idx -= (1 + backslashes); - } - return idx >= 0 && output.at(idx) == L' ' && !char_is_escaped(output, idx); - } - - // Entry point. Prettify our source code and return it. - wcstring prettify() { - output = wcstring{}; - node_visitor(*this).accept(ast.top()); - - // Trailing gap text. - emit_gap_text_before(source_range_t{(uint32_t)source.size(), 0}, default_flags); - - // Replace all trailing newlines with just a single one. - while (!output.empty() && at_line_start()) { - output.pop_back(); - } - emit_newline(); - - wcstring result = std::move(output); - return result; - } - - // \return a substring of source. - wcstring substr(source_range_t r) const { return source.substr(r.start, r.length); } - - // Return the gap ranges from our ast. - std::vector compute_gaps() const { - auto range_compare = [](source_range_t r1, source_range_t r2) { - if (r1.start != r2.start) return r1.start < r2.start; - return r1.length < r2.length; - }; - // Collect the token ranges into a list. - std::vector tok_ranges; - for (const node_t &node : ast) { - if (node.category == category_t::leaf) { - auto r = node.source_range(); - if (r.length > 0) tok_ranges.push_back(r); - } - } - // Place a zero length range at end to aid in our inverting. - tok_ranges.push_back(source_range_t{(uint32_t)source.size(), 0}); - - // Our tokens should be sorted. - assert(std::is_sorted(tok_ranges.begin(), tok_ranges.end(), range_compare)); - - // For each range, add a gap range between the previous range and this range. - std::vector gaps; - uint32_t prev_end = 0; - for (source_range_t tok_range : tok_ranges) { - assert(tok_range.start >= prev_end && - "Token range should not overlap or be out of order"); - if (tok_range.start >= prev_end) { - gaps.push_back(source_range_t{prev_end, tok_range.start - prev_end}); - } - prev_end = tok_range.start + tok_range.length; - } - return gaps; - } - - // Return sorted list of semi-preferring semi_nl nodes. - std::vector compute_preferred_semi_locations() const { - std::vector result; - auto mark_semi_from_input = [&](const optional_t &n) { - if (n && n->has_source() && substr(n->range) == L";") { - result.push_back(n->range.start); - } - }; - - // andor_job_lists get semis if the input uses semis. - for (const auto &node : ast) { - // See if we have a condition and an andor_job_list. - const optional_t *condition = nullptr; - const andor_job_list_t *andors = nullptr; - if (const auto *ifc = node.try_as()) { - condition = &ifc->condition.semi_nl; - andors = &ifc->andor_tail; - } else if (const auto *wc = node.try_as()) { - condition = &wc->condition.semi_nl; - andors = &wc->andor_tail; - } - - // If there is no and-or tail then we always use a newline. - if (andors && andors->count() > 0) { - if (condition) mark_semi_from_input(*condition); - // Mark all but last of the andor list. - for (uint32_t i = 0; i + 1 < andors->count(); i++) { - mark_semi_from_input(andors->at(i)->job.semi_nl); - } - } - } - - // `x ; and y` gets semis if it has them already, and they are on the same line. - for (const auto &node : ast) { - if (const auto *job_list = node.try_as()) { - const semi_nl_t *prev_job_semi_nl = nullptr; - for (const job_conjunction_t &job : *job_list) { - // Set up prev_job_semi_nl for the next iteration to make control flow easier. - const semi_nl_t *prev = prev_job_semi_nl; - prev_job_semi_nl = job.semi_nl.contents.get(); - - // Is this an 'and' or 'or' job? - if (!job.decorator) continue; - - // Now see if we want to mark 'prev' as allowing a semi. - // Did we have a previous semi_nl which was a newline? - if (!prev || substr(prev->range) != L";") continue; - - // Is there a newline between them? - assert(prev->range.start <= job.decorator->range.start && - "Ranges out of order"); - auto start = source.begin() + prev->range.start; - auto end = source.begin() + job.decorator->range.end(); - if (std::find(start, end, L'\n') == end) { - // We're going to allow the previous semi_nl to be a semi. - result.push_back(prev->range.start); - } - } - } - } - std::sort(result.begin(), result.end()); - return result; - } - - // Emit a space or indent as necessary, depending on the previous output. - void emit_space_or_indent(gap_flags_t flags = default_flags) { - if (at_line_start()) { - output.append(SPACES_PER_INDENT * current_indent, L' '); - } else if (!(flags & skip_space) && !has_preceding_space()) { - output.append(1, L' '); - } - } - - // Emit "gap text:" newlines and comments from the original source. - // Gap text may be a few things: - // - // 1. Just a space is common. We will trim the spaces to be empty. - // - // Here the gap text is the comment, followed by the newline: - // - // echo abc # arg - // echo def - // - // 2. It may also be an escaped newline: - // Here the gap text is a space, backslash, newline, space. - // - // echo \ - // hi - // - // 3. Lastly it may be an error, if there was an error token. Here the gap text is the pipe: - // - // begin | stuff - // - // We do not handle errors here - instead our caller does. - bool emit_gap_text(source_range_t range, gap_flags_t flags) { - wcstring gap_text = substr(range); - // Common case: if we are only spaces, do nothing. - if (gap_text.find_first_not_of(L' ') == wcstring::npos) return false; - - // Look to see if there is an escaped newline. - // Emit it if either we allow it, or it comes before the first comment. - // Note we do not have to be concerned with escaped backslashes or escaped #s. This is gap - // text - we already know it has no semantic significance. - size_t escaped_nl = gap_text.find(L"\\\n"); - if (escaped_nl != wcstring::npos) { - size_t comment_idx = gap_text.find(L'#'); - if ((flags & allow_escaped_newlines) || - (comment_idx != wcstring::npos && escaped_nl < comment_idx)) { - // Emit a space before the escaped newline. - if (!at_line_start() && !has_preceding_space()) { - output.append(L" "); - } - output.append(L"\\\n"); - // Indent the continuation line and any leading comments (#7252). - // Use the indentation level of the next newline. - current_indent = indents.at(range.start + escaped_nl + 1); - emit_space_or_indent(); - } - } - - // It seems somewhat ambiguous whether we always get a newline after a comment. Ensure we - // always emit one. - bool needs_nl = false; - - auto tokenizer = new_tokenizer(gap_text.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES); - while (auto tok = tokenizer->next()) { - wcstring tok_text = *tokenizer->text_of(*tok); - - if (needs_nl) { - emit_newline(); - needs_nl = false; - if (tok_text == L"\n") continue; - } else if (gap_text_mask_newline) { - // We only respect mask_newline the first time through the loop. - gap_text_mask_newline = false; - if (tok_text == L"\n") continue; - } - - if (tok->type_ == token_type_t::comment) { - emit_space_or_indent(); - output.append(tok_text); - needs_nl = true; - } else if (tok->type_ == token_type_t::end) { - // This may be either a newline or semicolon. - // Semicolons found here are not part of the ast and can simply be removed. - // Newlines are preserved unless mask_newline is set. - if (tok_text == L"\n") { - emit_newline(); - } - } else { - fprintf(stderr, - "Gap text should only have comments and newlines - instead found token " - "type %d with text: %ls\n", - (int)tok->type_, tok_text.c_str()); - DIE("Gap text should only have comments and newlines"); - } - } - if (needs_nl) emit_newline(); - return needs_nl; - } - - /// \return the gap text ending at a given index into the string, or empty if none. - source_range_t gap_text_to(uint32_t end) const { - auto where = std::lower_bound( - gaps.begin(), gaps.end(), end, - [](source_range_t r, uint32_t end) { return r.start + r.length < end; }); - if (where == gaps.end() || where->start + where->length != end) { - // Not found. - return source_range_t{0, 0}; - } else { - return *where; - } - } - - /// \return whether a range \p r overlaps an error range from our ast. - bool range_contained_error(source_range_t r) const { - const auto &errs = ast.extras().errors; - auto range_is_before = [](source_range_t x, source_range_t y) { - return x.start + x.length <= y.start; - }; - assert(std::is_sorted(errs.begin(), errs.end(), range_is_before) && - "Error ranges should be sorted"); - return std::binary_search(errs.begin(), errs.end(), r, range_is_before); - } - - // Emit the gap text before a source range. - bool emit_gap_text_before(source_range_t r, gap_flags_t flags) { - assert(r.start <= source.size() && "source out of bounds"); - bool added_newline = false; - - // Find the gap text which ends at start. - source_range_t range = gap_text_to(r.start); - if (range.length > 0) { - // Set the indent from the beginning of this gap text. - // For example: - // begin - // cmd - // # comment - // end - // Here the comment is the gap text before the end, but we want the indent from the - // command. - if (range.start < indents.size()) current_indent = indents.at(range.start); - - // If this range contained an error, append the gap text without modification. - // For example in: echo foo " - // We don't want to mess with the quote. - if (range_contained_error(range)) { - output.append(substr(range)); - } else { - added_newline = emit_gap_text(range, flags); - } - } - // Always clear gap_text_mask_newline after emitting even empty gap text. - gap_text_mask_newline = false; - return added_newline; - } - - /// Given a string \p input, remove unnecessary quotes, etc. - wcstring clean_text(const wcstring &input) { - // Unescape the string - this leaves special markers around if there are any - // expansions or anything. We specifically tell it to not compute backslash-escapes - // like \U or \x, because we want to leave them intact. - wcstring unescaped = input; - unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES); - - // Remove INTERNAL_SEPARATOR because that's a quote. - auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; }; - unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote), unescaped.end()); - - // If no non-"good" char is left, use the unescaped version. - // This can be extended to other characters, but giving the precise list is tough, - // can change over time (see "^", "%" and "?", in some cases "{}") and it just makes - // people feel more at ease. - auto goodchars = [](wchar_t ch) { - return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/'; - }; - if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) == unescaped.end() && - !unescaped.empty()) { - return unescaped; - } else { - return input; - } - } - - // Emit a range of original text. This indents as needed, and also inserts preceding gap text. - // If \p tolerate_line_splitting is set, then permit escaped newlines; otherwise collapse such - // lines. - void emit_text(source_range_t r, gap_flags_t flags) { - emit_gap_text_before(r, flags); - current_indent = indents.at(r.start); - if (r.length > 0) { - emit_space_or_indent(flags); - output.append(clean_text(substr(r))); - } - } - - template - void emit_node_text(const leaf_t &node) { - source_range_t range = node.range; - - // Weird special-case: a token may end in an escaped newline. Notably, the newline is - // not part of the following gap text, handle indentation here (#8197). - bool ends_with_escaped_nl = node.range.length >= 2 && - source.at(node.range.end() - 2) == L'\\' && - source.at(node.range.end() - 1) == L'\n'; - if (ends_with_escaped_nl) { - range = {range.start, range.length - 2}; - } - - emit_text(range, gap_text_flags_before_node(node)); - - if (ends_with_escaped_nl) { - // By convention, escaped newlines are preceded with a space. - output.append(L" \\\n"); - // TODO Maybe check "allow_escaped_newlines" and use the precomputed indents. - // The cases where this matters are probably very rare. - current_indent++; - emit_space_or_indent(); - current_indent--; - } - } - - // Emit one newline. - void emit_newline() { output.push_back(L'\n'); } - - // Emit a semicolon. - void emit_semi() { output.push_back(L';'); } - - // For branch and list nodes, default is to visit their children. - template - enable_if_t visit(const Node &node) { - node_visitor(*this).accept_children_of(node); - } - - template - enable_if_t visit(const Node &node) { - node_visitor(*this).accept_children_of(node); - } - - // Leaf nodes we just visit their text. - void visit(const keyword_base_t &node) { emit_node_text(node); } - void visit(const token_base_t &node) { emit_node_text(node); } - void visit(const argument_t &node) { emit_node_text(node); } - void visit(const variable_assignment_t &node) { emit_node_text(node); } - - void visit(const semi_nl_t &node) { - // These are semicolons or newlines which are part of the ast. That means it includes e.g. - // ones terminating a job or 'if' header, but not random semis in job lists. We respect - // preferred_semi_locations to decide whether or not these should stay as newlines or - // become semicolons. - - // Check if we should prefer a semicolon. - bool prefer_semi = node.range.length > 0 && - std::binary_search(preferred_semi_locations.begin(), - preferred_semi_locations.end(), node.range.start); - emit_gap_text_before(node.range, gap_text_flags_before_node(node)); - - // Don't emit anything if the gap text put us on a newline (because it had a comment). - if (!at_line_start()) { - prefer_semi ? emit_semi() : emit_newline(); - - // If it was a semi but we emitted a newline, swallow a subsequent newline. - if (!prefer_semi && substr(node.range) == L";") { - gap_text_mask_newline = true; - } - } - } - - void visit(const redirection_t &node) { - // No space between a redirection operator and its target (#2899). - emit_text(node.oper.range, default_flags); - emit_text(node.target.range, skip_space); - } - - void visit(const maybe_newlines_t &node) { - // Our newlines may have comments embedded in them, example: - // cmd | - // # something - // cmd2 - // Treat it as gap text. - if (node.range.length > 0) { - auto flags = gap_text_flags_before_node(node); - current_indent = indents.at(node.range.start); - bool added_newline = emit_gap_text_before(node.range, flags); - source_range_t gap_range = node.range; - if (added_newline && gap_range.length > 0 && source.at(gap_range.start) == L'\n') { - gap_range.start++; - } - emit_gap_text(gap_range, flags); - } - } - - void visit(const begin_header_t &node) { - // 'begin' does not require a newline after it, but we insert one. - node_visitor(*this).accept_children_of(node); - if (!at_line_start()) { - emit_newline(); - } - } - - // The flags we use to parse. - static parse_tree_flags_t parse_flags() { - return parse_flag_continue_after_error | parse_flag_include_comments | - parse_flag_leave_unterminated | parse_flag_show_blank_lines; - } -}; -} // namespace - static const char *highlight_role_to_string(highlight_role_t role) { #define TEST_ROLE(x) \ case highlight_role_t::x: \ @@ -750,10 +167,9 @@ static std::string make_pygments_csv(const wcstring &src) { // Entry point for prettification. static wcstring prettify(const wcstring &src, bool do_indent) { if (dump_parse_tree) { - auto ast = - ast::ast_t::parse(src, parse_flag_leave_unterminated | parse_flag_include_comments | - parse_flag_show_extra_semis); - wcstring ast_dump = ast.dump(src); + auto ast = ast_parse(src, parse_flag_leave_unterminated | parse_flag_include_comments | + parse_flag_show_extra_semis); + wcstring ast_dump = *ast->dump(src); std::fwprintf(stderr, L"%ls\n", ast_dump.c_str()); } diff --git a/src/fish_indent_common.cpp b/src/fish_indent_common.cpp new file mode 100644 index 000000000..a2fa42dc2 --- /dev/null +++ b/src/fish_indent_common.cpp @@ -0,0 +1,475 @@ +#include "fish_indent_common.h" + +#include "ast.h" +#include "common.h" +#include "env.h" +#include "expand.h" +#include "flog.h" +#include "global_safety.h" +#include "maybe.h" +#include "operation_context.h" +#include "parse_constants.h" +#include "parse_util.h" +#include "tokenizer.h" +#include "wcstringutil.h" +#if INCLUDE_RUST_HEADERS +#include "fish_indent.rs.h" +#endif + +using namespace ast; + +// The number of spaces per indent isn't supposed to be configurable. +// See discussion at https://github.com/fish-shell/fish-shell/pull/6790 +#define SPACES_PER_INDENT 4 + +/// \return whether a character at a given index is escaped. +/// A character is escaped if it has an odd number of backslashes. +static bool char_is_escaped(const wcstring &text, size_t idx) { + return count_preceding_backslashes(text, idx) % 2 == 1; +} + +pretty_printer_t::pretty_printer_t(const wcstring &src, bool do_indent) + : source(src), + indents(do_indent ? parse_util_compute_indents(source) : std::vector(src.size(), 0)), + ast(ast_parse(src, parse_flags())), + visitor(new_pretty_printer(*this)), + do_indent(do_indent), + gaps(compute_gaps()), + preferred_semi_locations(compute_preferred_semi_locations()) { + assert(indents.size() == source.size() && "indents and source should be same length"); +} + +pretty_printer_t::gap_flags_t pretty_printer_t::gap_text_flags_before_node(const node_t &node) { + gap_flags_t result = default_flags; + switch (node.typ()) { + // Allow escaped newlines before leaf nodes that can be part of a long command. + case type_t::argument: + case type_t::redirection: + case type_t::variable_assignment: + result |= allow_escaped_newlines; + break; + + case type_t::token_base: + // Allow escaped newlines before && and ||, and also pipes. + switch (node.token_type()) { + case parse_token_type_t::andand: + case parse_token_type_t::oror: + case parse_token_type_t::pipe: + result |= allow_escaped_newlines; + break; + case parse_token_type_t::string: { + // Allow escaped newlines before commands that follow a variable assignment + // since both can be long (#7955). + auto p = node.parent(); + if (p->typ() != type_t::decorated_statement) break; + p = p->parent(); + assert(p->typ() == type_t::statement); + p = p->parent(); + if (auto *job = p->try_as_job_pipeline()) { + if (!job->variables().empty()) result |= allow_escaped_newlines; + } else if (auto *job_cnt = p->try_as_job_continuation()) { + if (!job_cnt->variables().empty()) result |= allow_escaped_newlines; + } else if (auto *not_stmt = p->try_as_not_statement()) { + if (!not_stmt->variables().empty()) result |= allow_escaped_newlines; + } + break; + } + default: + break; + } + break; + + default: + break; + } + return result; +} + +bool pretty_printer_t::has_preceding_space() const { + long idx = static_cast(output.size()) - 1; + // Skip escaped newlines. + // This is historical. Example: + // + // cmd1 \ + // | cmd2 + // + // we want the pipe to "see" the space after cmd1. + // TODO: this is too tricky, we should factor this better. + while (idx >= 0 && output.at(idx) == L'\n') { + size_t backslashes = count_preceding_backslashes(source, idx); + if (backslashes % 2 == 0) { + // Not escaped. + return false; + } + idx -= (1 + backslashes); + } + return idx >= 0 && output.at(idx) == L' ' && !char_is_escaped(output, idx); +} + +wcstring pretty_printer_t::prettify() { + output = wcstring{}; + visitor->visit(*ast->top()); + + // Trailing gap text. + emit_gap_text_before(source_range_t{(uint32_t)source.size(), 0}, default_flags); + + // Replace all trailing newlines with just a single one. + while (!output.empty() && at_line_start()) { + output.pop_back(); + } + emit_newline(); + + wcstring result = std::move(output); + return result; +} + +std::vector pretty_printer_t::compute_gaps() const { + auto range_compare = [](source_range_t r1, source_range_t r2) { + if (r1.start != r2.start) return r1.start < r2.start; + return r1.length < r2.length; + }; + // Collect the token ranges into a list. + std::vector tok_ranges; + for (auto ast_traversal = new_ast_traversal(*ast->top());;) { + auto node = ast_traversal->next(); + if (!node->has_value()) break; + if (node->category() == category_t::leaf) { + auto r = node->source_range(); + if (r.length > 0) tok_ranges.push_back(r); + } + } + // Place a zero length range at end to aid in our inverting. + tok_ranges.push_back(source_range_t{(uint32_t)source.size(), 0}); + + // Our tokens should be sorted. + assert(std::is_sorted(tok_ranges.begin(), tok_ranges.end(), range_compare)); + + // For each range, add a gap range between the previous range and this range. + std::vector gaps; + uint32_t prev_end = 0; + for (source_range_t tok_range : tok_ranges) { + assert(tok_range.start >= prev_end && "Token range should not overlap or be out of order"); + if (tok_range.start >= prev_end) { + gaps.push_back(source_range_t{prev_end, tok_range.start - prev_end}); + } + prev_end = tok_range.start + tok_range.length; + } + return gaps; +} + +void pretty_printer_t::visit_begin_header() { + if (!at_line_start()) { + emit_newline(); + } +} + +void pretty_printer_t::visit_maybe_newlines(const void *node_) { + const auto &node = *static_cast(node_); + // Our newlines may have comments embedded in them, example: + // cmd | + // # something + // cmd2 + // Treat it as gap text. + if (node.range().length > 0) { + auto flags = gap_text_flags_before_node(*node.ptr()); + current_indent = indents.at(node.range().start); + bool added_newline = emit_gap_text_before(node.range(), flags); + source_range_t gap_range = node.range(); + if (added_newline && gap_range.length > 0 && source.at(gap_range.start) == L'\n') { + gap_range.start++; + } + emit_gap_text(gap_range, flags); + } +} + +void pretty_printer_t::visit_redirection(const void *node_) { + const auto &node = *static_cast(node_); + // No space between a redirection operator and its target (#2899). + emit_text(node.oper().range(), default_flags); + emit_text(node.target().range(), skip_space); +} + +void pretty_printer_t::visit_semi_nl(const void *node_) { + // These are semicolons or newlines which are part of the ast. That means it includes e.g. + // ones terminating a job or 'if' header, but not random semis in job lists. We respect + // preferred_semi_locations to decide whether or not these should stay as newlines or + // become semicolons. + const auto &node = *static_cast(node_); + auto range = node.source_range(); + + // Check if we should prefer a semicolon. + bool prefer_semi = + range.length > 0 && std::binary_search(preferred_semi_locations.begin(), + preferred_semi_locations.end(), range.start); + emit_gap_text_before(range, gap_text_flags_before_node(*node.ptr())); + + // Don't emit anything if the gap text put us on a newline (because it had a comment). + if (!at_line_start()) { + prefer_semi ? emit_semi() : emit_newline(); + + // If it was a semi but we emitted a newline, swallow a subsequent newline. + if (!prefer_semi && substr(range) == L";") { + gap_text_mask_newline = true; + } + } +} + +void pretty_printer_t::emit_node_text(const void *node_) { + const auto &node = *static_cast(node_); + source_range_t range = node.source_range(); + + // Weird special-case: a token may end in an escaped newline. Notably, the newline is + // not part of the following gap text, handle indentation here (#8197). + bool ends_with_escaped_nl = range.length >= 2 && source.at(range.end() - 2) == L'\\' && + source.at(range.end() - 1) == L'\n'; + if (ends_with_escaped_nl) { + range = {range.start, range.length - 2}; + } + + emit_text(range, gap_text_flags_before_node(node)); + + if (ends_with_escaped_nl) { + // By convention, escaped newlines are preceded with a space. + output.append(L" \\\n"); + // TODO Maybe check "allow_escaped_newlines" and use the precomputed indents. + // The cases where this matters are probably very rare. + current_indent++; + emit_space_or_indent(); + current_indent--; + } +} + +void pretty_printer_t::emit_text(source_range_t r, gap_flags_t flags) { + emit_gap_text_before(r, flags); + current_indent = indents.at(r.start); + if (r.length > 0) { + emit_space_or_indent(flags); + output.append(clean_text(substr(r))); + } +} + +wcstring pretty_printer_t::clean_text(const wcstring &input) { + // Unescape the string - this leaves special markers around if there are any + // expansions or anything. We specifically tell it to not compute backslash-escapes + // like \U or \x, because we want to leave them intact. + wcstring unescaped = input; + unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES); + + // Remove INTERNAL_SEPARATOR because that's a quote. + auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; }; + unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote), unescaped.end()); + + // If no non-"good" char is left, use the unescaped version. + // This can be extended to other characters, but giving the precise list is tough, + // can change over time (see "^", "%" and "?", in some cases "{}") and it just makes + // people feel more at ease. + auto goodchars = [](wchar_t ch) { + return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/'; + }; + if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) == unescaped.end() && + !unescaped.empty()) { + return unescaped; + } else { + return input; + } +} + +bool pretty_printer_t::emit_gap_text_before(source_range_t r, gap_flags_t flags) { + assert(r.start <= source.size() && "source out of bounds"); + bool added_newline = false; + + // Find the gap text which ends at start. + source_range_t range = gap_text_to(r.start); + if (range.length > 0) { + // Set the indent from the beginning of this gap text. + // For example: + // begin + // cmd + // # comment + // end + // Here the comment is the gap text before the end, but we want the indent from the + // command. + if (range.start < indents.size()) current_indent = indents.at(range.start); + + // If this range contained an error, append the gap text without modification. + // For example in: echo foo " + // We don't want to mess with the quote. + if (range_contained_error(range)) { + output.append(substr(range)); + } else { + added_newline = emit_gap_text(range, flags); + } + } + // Always clear gap_text_mask_newline after emitting even empty gap text. + gap_text_mask_newline = false; + return added_newline; +} + +bool pretty_printer_t::range_contained_error(source_range_t r) const { + const auto &errs = ast->extras()->errors(); + auto range_is_before = [](source_range_t x, source_range_t y) { + return x.start + x.length <= y.start; + }; + assert(std::is_sorted(errs.begin(), errs.end(), range_is_before) && + "Error ranges should be sorted"); + return std::binary_search(errs.begin(), errs.end(), r, range_is_before); +} + +source_range_t pretty_printer_t::gap_text_to(uint32_t end) const { + auto where = + std::lower_bound(gaps.begin(), gaps.end(), end, + [](source_range_t r, uint32_t end) { return r.start + r.length < end; }); + if (where == gaps.end() || where->start + where->length != end) { + // Not found. + return source_range_t{0, 0}; + } else { + return *where; + } +} + +bool pretty_printer_t::emit_gap_text(source_range_t range, gap_flags_t flags) { + wcstring gap_text = substr(range); + // Common case: if we are only spaces, do nothing. + if (gap_text.find_first_not_of(L' ') == wcstring::npos) return false; + + // Look to see if there is an escaped newline. + // Emit it if either we allow it, or it comes before the first comment. + // Note we do not have to be concerned with escaped backslashes or escaped #s. This is gap + // text - we already know it has no semantic significance. + size_t escaped_nl = gap_text.find(L"\\\n"); + if (escaped_nl != wcstring::npos) { + size_t comment_idx = gap_text.find(L'#'); + if ((flags & allow_escaped_newlines) || + (comment_idx != wcstring::npos && escaped_nl < comment_idx)) { + // Emit a space before the escaped newline. + if (!at_line_start() && !has_preceding_space()) { + output.append(L" "); + } + output.append(L"\\\n"); + // Indent the continuation line and any leading comments (#7252). + // Use the indentation level of the next newline. + current_indent = indents.at(range.start + escaped_nl + 1); + emit_space_or_indent(); + } + } + + // It seems somewhat ambiguous whether we always get a newline after a comment. Ensure we + // always emit one. + bool needs_nl = false; + + auto tokenizer = new_tokenizer(gap_text.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES); + while (auto tok = tokenizer->next()) { + wcstring tok_text = *tokenizer->text_of(*tok); + + if (needs_nl) { + emit_newline(); + needs_nl = false; + if (tok_text == L"\n") continue; + } else if (gap_text_mask_newline) { + // We only respect mask_newline the first time through the loop. + gap_text_mask_newline = false; + if (tok_text == L"\n") continue; + } + + if (tok->type_ == token_type_t::comment) { + emit_space_or_indent(); + output.append(tok_text); + needs_nl = true; + } else if (tok->type_ == token_type_t::end) { + // This may be either a newline or semicolon. + // Semicolons found here are not part of the ast and can simply be removed. + // Newlines are preserved unless mask_newline is set. + if (tok_text == L"\n") { + emit_newline(); + } + } else { + fprintf(stderr, + "Gap text should only have comments and newlines - instead found token " + "type %d with text: %ls\n", + (int)tok->type_, tok_text.c_str()); + DIE("Gap text should only have comments and newlines"); + } + } + if (needs_nl) emit_newline(); + return needs_nl; +} + +void pretty_printer_t::emit_space_or_indent(gap_flags_t flags) { + if (at_line_start()) { + output.append(SPACES_PER_INDENT * current_indent, L' '); + } else if (!(flags & skip_space) && !has_preceding_space()) { + output.append(1, L' '); + } +} + +std::vector pretty_printer_t::compute_preferred_semi_locations() const { + std::vector result; + auto mark_semi_from_input = [&](const semi_nl_t &n) { + if (n.ptr()->has_source() && substr(n.range()) == L";") { + result.push_back(n.range().start); + } + }; + + // andor_job_lists get semis if the input uses semis. + for (auto ast_traversal = new_ast_traversal(*ast->top());;) { + auto node = ast_traversal->next(); + if (!node->has_value()) break; + // See if we have a condition and an andor_job_list. + const semi_nl_t *condition = nullptr; + const andor_job_list_t *andors = nullptr; + if (const auto *ifc = node->try_as_if_clause()) { + if (ifc->condition().has_semi_nl()) { + condition = &ifc->condition().semi_nl(); + } + andors = &ifc->andor_tail(); + } else if (const auto *wc = node->try_as_while_header()) { + if (wc->condition().has_semi_nl()) { + condition = &wc->condition().semi_nl(); + } + andors = &wc->andor_tail(); + } + + // If there is no and-or tail then we always use a newline. + if (andors && andors->count() > 0) { + if (condition) mark_semi_from_input(*condition); + // Mark all but last of the andor list. + for (uint32_t i = 0; i + 1 < andors->count(); i++) { + mark_semi_from_input(andors->at(i)->job().semi_nl()); + } + } + } + + // `x ; and y` gets semis if it has them already, and they are on the same line. + for (auto ast_traversal = new_ast_traversal(*ast->top());;) { + auto node = ast_traversal->next(); + if (!node->has_value()) break; + if (const auto *job_list = node->try_as_job_list()) { + const semi_nl_t *prev_job_semi_nl = nullptr; + for (size_t i = 0; i < job_list->count(); i++) { + const job_conjunction_t &job = *job_list->at(i); + // Set up prev_job_semi_nl for the next iteration to make control flow easier. + const semi_nl_t *prev = prev_job_semi_nl; + prev_job_semi_nl = job.has_semi_nl() ? &job.semi_nl() : nullptr; + + // Is this an 'and' or 'or' job? + if (!job.has_decorator()) continue; + + // Now see if we want to mark 'prev' as allowing a semi. + // Did we have a previous semi_nl which was a newline? + if (!prev || substr(prev->range()) != L";") continue; + + // Is there a newline between them? + assert(prev->range().start <= job.decorator().range().start && + "Ranges out of order"); + auto start = source.begin() + prev->range().start; + auto end = source.begin() + job.decorator().range().end(); + if (std::find(start, end, L'\n') == end) { + // We're going to allow the previous semi_nl to be a semi. + result.push_back(prev->range().start); + } + } + } + } + std::sort(result.begin(), result.end()); + return result; +} diff --git a/src/fish_indent_common.h b/src/fish_indent_common.h new file mode 100644 index 000000000..67446b2be --- /dev/null +++ b/src/fish_indent_common.h @@ -0,0 +1,160 @@ +#ifndef FISH_INDENT_STAGING_H +#define FISH_INDENT_STAGING_H + +#include "ast.h" +#include "common.h" +#include "cxx.h" + +struct PrettyPrinter; +struct pretty_printer_t { + // Note: this got somewhat more complicated after introducing the new AST, because that AST no + // longer encodes detailed lexical information (e.g. every newline). This feels more complex + // than necessary and would probably benefit from a more layered approach where we identify + // certain runs, weight line breaks, have a cost model, etc. + pretty_printer_t(const wcstring &src, bool do_indent); + + // Original source. + const wcstring &source; + + // The indents of our string. + // This has the same length as 'source' and describes the indentation level. + const std::vector indents; + + // The parsed ast. + rust::Box ast; + + rust::Box visitor; + + // The prettifier output. + wcstring output; + + // The indent of the source range which we are currently emitting. + int current_indent{0}; + + // Whether to indent, or just insert spaces. + const bool do_indent; + + // Whether the next gap text should hide the first newline. + bool gap_text_mask_newline{false}; + + // The "gaps": a sorted set of ranges between tokens. + // These contain whitespace, comments, semicolons, and other lexical elements which are not + // present in the ast. + const std::vector gaps; + + // The sorted set of source offsets of nl_semi_t which should be set as semis, not newlines. + // This is computed ahead of time for convenience. + const std::vector preferred_semi_locations; + + // Flags we support. + using gap_flags_t = uint32_t; + enum { + default_flags = 0, + + // Whether to allow line splitting via escaped newlines. + // For example, in argument lists: + // + // echo a \ + // b + // + // If this is not set, then split-lines will be joined. + allow_escaped_newlines = 1 << 0, + + // Whether to require a space before this token. + // This is used when emitting semis: + // echo a; echo b; + // No space required between 'a' and ';', or 'b' and ';'. + skip_space = 1 << 1, + }; + +#if INCLUDE_RUST_HEADERS + // \return gap text flags for the gap text that comes *before* a given node type. + static gap_flags_t gap_text_flags_before_node(const ast::node_t &node); +#endif + + // \return whether we are at the start of a new line. + bool at_line_start() const { return output.empty() || output.back() == L'\n'; } + + // \return whether we have a space before the output. + // This ignores escaped spaces and escaped newlines. + bool has_preceding_space() const; + + // Entry point. Prettify our source code and return it. + wcstring prettify(); + + // \return a substring of source. + wcstring substr(source_range_t r) const { return source.substr(r.start, r.length); } + + // Return the gap ranges from our ast. + std::vector compute_gaps() const; + + // Return sorted list of semi-preferring semi_nl nodes. + std::vector compute_preferred_semi_locations() const; + + // Emit a space or indent as necessary, depending on the previous output. + void emit_space_or_indent(gap_flags_t flags = default_flags); + + // Emit "gap text:" newlines and comments from the original source. + // Gap text may be a few things: + // + // 1. Just a space is common. We will trim the spaces to be empty. + // + // Here the gap text is the comment, followed by the newline: + // + // echo abc # arg + // echo def + // + // 2. It may also be an escaped newline: + // Here the gap text is a space, backslash, newline, space. + // + // echo \ + // hi + // + // 3. Lastly it may be an error, if there was an error token. Here the gap text is the pipe: + // + // begin | stuff + // + // We do not handle errors here - instead our caller does. + bool emit_gap_text(source_range_t range, gap_flags_t flags); + + /// \return the gap text ending at a given index into the string, or empty if none. + source_range_t gap_text_to(uint32_t end) const; + + /// \return whether a range \p r overlaps an error range from our ast. + bool range_contained_error(source_range_t r) const; + + // Emit the gap text before a source range. + bool emit_gap_text_before(source_range_t r, gap_flags_t flags); + + /// Given a string \p input, remove unnecessary quotes, etc. + wcstring clean_text(const wcstring &input); + + // Emit a range of original text. This indents as needed, and also inserts preceding gap text. + // If \p tolerate_line_splitting is set, then permit escaped newlines; otherwise collapse such + // lines. + void emit_text(source_range_t r, gap_flags_t flags); + + void emit_node_text(const void *node); + + // Emit one newline. + void emit_newline() { output.push_back(L'\n'); } + + // Emit a semicolon. + void emit_semi() { output.push_back(L';'); } + + void visit_semi_nl(const void *node_); + + void visit_redirection(const void *node_); + + void visit_maybe_newlines(const void *node_); + + void visit_begin_header(); + + // The flags we use to parse. + static parse_tree_flags_t parse_flags() { + return parse_flag_continue_after_error | parse_flag_include_comments | + parse_flag_leave_unterminated | parse_flag_show_blank_lines; + } +}; + +#endif // FISH_INDENT_STAGING_H diff --git a/src/fish_key_reader.cpp b/src/fish_key_reader.cpp index 51a554409..e70997814 100644 --- a/src/fish_key_reader.cpp +++ b/src/fish_key_reader.cpp @@ -23,6 +23,7 @@ #include "cxxgen.h" #include "env.h" #include "fallback.h" // IWYU pragma: keep +#include "ffi_baggage.h" #include "ffi_init.rs.h" #include "fish_version.h" #include "input.h" diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index c03af9a59..bac2ec3ac 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -65,6 +65,7 @@ #include "fd_monitor.rs.h" #include "fd_readable_set.rs.h" #include "fds.h" +#include "ffi_baggage.h" #include "ffi_init.rs.h" #include "ffi_tests.rs.h" #include "function.h" @@ -928,17 +929,17 @@ static void test_debounce_timeout() { static parser_test_error_bits_t detect_argument_errors(const wcstring &src) { using namespace ast; - auto ast = ast_t::parse_argument_list(src, parse_flag_none); - if (ast.errored()) { + auto ast = ast_parse_argument_list(src, parse_flag_none); + if (ast->errored()) { return PARSER_TEST_ERROR; } const ast::argument_t *first_arg = - ast.top()->as()->arguments.at(0); + ast->top()->as_freestanding_argument_list().arguments().at(0); if (!first_arg) { err(L"Failed to parse an argument"); return 0; } - return parse_util_detect_errors_in_argument(*first_arg, first_arg->source(src)); + return parse_util_detect_errors_in_argument(*first_arg, *first_arg->source(src)); } /// Test the parser. @@ -3066,9 +3067,11 @@ static void test_autoload() { static std::shared_ptr make_test_func_props() { auto ret = std::make_shared(); ret->parsed_source = parse_source(L"function stuff; end", parse_flag_none, nullptr); - assert(ret->parsed_source && "Failed to parse"); - for (const auto &node : ret->parsed_source->ast) { - if (const auto *s = node.try_as()) { + assert(ret->parsed_source->has_value() && "Failed to parse"); + for (auto ast_traversal = new_ast_traversal(*ret->parsed_source->ast().top());;) { + auto node = ast_traversal->next(); + if (!node->has_value()) break; + if (const auto *s = node->try_as_block_statement()) { ret->func_node = s; break; } @@ -4757,8 +4760,8 @@ static void test_new_parser_correctness() { }; for (const auto &test : parser_tests) { - auto ast = ast::ast_t::parse(test.src); - bool success = !ast.errored(); + auto ast = ast_parse(test.src); + bool success = !ast->errored(); if (success && !test.ok) { err(L"\"%ls\" should NOT have parsed, but did", test.src); } else if (!success && test.ok) { @@ -4811,7 +4814,7 @@ static void test_new_parser_fuzzing() { unsigned long permutation = 0; while (string_for_permutation(fuzzes, sizeof fuzzes / sizeof *fuzzes, len, permutation++, &src)) { - ast::ast_t::parse(src); + ast_parse(src); } if (log_it) std::fwprintf(stderr, L"done (%lu)\n", permutation); } @@ -4828,13 +4831,15 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o out_joined_args->clear(); *out_deco = statement_decoration_t::none; - auto ast = ast_t::parse(src); - if (ast.errored()) return false; + auto ast = ast_parse(src); + if (ast->errored()) return false; // Get the statement. Should only have one. const decorated_statement_t *statement = nullptr; - for (const auto &n : ast) { - if (const auto *tmp = n.try_as()) { + for (auto ast_traversal = new_ast_traversal(*ast->top());;) { + auto n = ast_traversal->next(); + if (!n->has_value()) break; + if (const auto *tmp = n->try_as_decorated_statement()) { if (statement) { say(L"More than one decorated statement found in '%ls'", src.c_str()); return false; @@ -4849,14 +4854,15 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o // Return its decoration and command. *out_deco = statement->decoration(); - *out_cmd = statement->command.source(src); + *out_cmd = *statement->command().source(src); // Return arguments separated by spaces. bool first = true; - for (const ast::argument_or_redirection_t &arg : statement->args_or_redirs) { + for (size_t i = 0; i < statement->args_or_redirs().count(); i++) { + const ast::argument_or_redirection_t &arg = *statement->args_or_redirs().at(i); if (!arg.is_argument()) continue; if (!first) out_joined_args->push_back(L' '); - out_joined_args->append(arg.source(src)); + out_joined_args->append(*arg.ptr()->source(src)); first = false; } @@ -4868,14 +4874,16 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o template static void check_function_help(const wchar_t *src) { using namespace ast; - auto ast = ast_t::parse(src); - if (ast.errored()) { + auto ast = ast_parse(src); + if (ast->errored()) { err(L"Failed to parse '%ls'", src); } int count = 0; - for (const node_t &node : ast) { - count += (node.type == Type); + for (auto ast_traversal = new_ast_traversal(*ast->top());;) { + auto node = ast_traversal->next(); + if (!node->has_value()) break; + count += (node->typ() == Type); } if (count == 0) { err(L"Failed to find node of type '%ls'", ast_type_to_string(Type)); @@ -4939,16 +4947,18 @@ static void test_new_parser_ad_hoc() { // Ensure that 'case' terminates a job list. const wcstring src = L"switch foo ; case bar; case baz; end"; - auto ast = ast_t::parse(src); - if (ast.errored()) { + auto ast = ast_parse(src); + if (ast->errored()) { err(L"Parsing failed"); } // Expect two case_item_lists. The bug was that we'd // try to run a command 'case'. int count = 0; - for (const auto &n : ast) { - count += (n.type == type_t::case_item); + for (auto ast_traversal = new_ast_traversal(*ast->top());;) { + auto n = ast_traversal->next(); + if (!n->has_value()) break; + count += (n->typ() == type_t::case_item); } if (count != 2) { err(L"Expected 2 case item nodes, found %d", count); @@ -4959,27 +4969,27 @@ static void test_new_parser_ad_hoc() { // leading to an infinite loop. // By itself it should produce an error. - ast = ast_t::parse(L"a="); - do_test(ast.errored()); + ast = ast_parse(L"a="); + do_test(ast->errored()); // If we are leaving things unterminated, this should not produce an error. // i.e. when typing "a=" at the command line, it should be treated as valid // because we don't want to color it as an error. - ast = ast_t::parse(L"a=", parse_flag_leave_unterminated); - do_test(!ast.errored()); + ast = ast_parse(L"a=", parse_flag_leave_unterminated); + do_test(!ast->errored()); auto errors = new_parse_error_list(); - ast = ast_t::parse(L"begin; echo (", parse_flag_leave_unterminated, &*errors); + ast = ast_parse(L"begin; echo (", parse_flag_leave_unterminated, &*errors); do_test(errors->size() == 1 && errors->at(0)->code() == parse_error_code_t::tokenizer_unterminated_subshell); errors->clear(); - ast = ast_t::parse(L"for x in (", parse_flag_leave_unterminated, &*errors); + ast = ast_parse(L"for x in (", parse_flag_leave_unterminated, &*errors); do_test(errors->size() == 1 && errors->at(0)->code() == parse_error_code_t::tokenizer_unterminated_subshell); errors->clear(); - ast = ast_t::parse(L"begin; echo '", parse_flag_leave_unterminated, &*errors); + ast = ast_parse(L"begin; echo '", parse_flag_leave_unterminated, &*errors); do_test(errors->size() == 1 && errors->at(0)->code() == parse_error_code_t::tokenizer_unterminated_quote); } @@ -5013,8 +5023,8 @@ static void test_new_parser_errors() { parse_error_code_t expected_code = test.code; auto errors = new_parse_error_list(); - auto ast = ast::ast_t::parse(src, parse_flag_none, &*errors); - if (!ast.errored()) { + auto ast = ast_parse(src, parse_flag_none, &*errors); + if (!ast->errored()) { err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str()); } diff --git a/src/function.cpp b/src/function.cpp index 36218245b..5f83a922b 100644 --- a/src/function.cpp +++ b/src/function.cpp @@ -227,13 +227,14 @@ void function_remove(const wcstring &name) { static wcstring get_function_body_source(const function_properties_t &props) { // We want to preserve comments that the AST attaches to the header (#5285). // Take everything from the end of the header to the 'end' keyword. - auto header_src = props.func_node->header->try_source_range(); - auto end_kw_src = props.func_node->end.try_source_range(); - if (header_src && end_kw_src) { - uint32_t body_start = header_src->start + header_src->length; - uint32_t body_end = end_kw_src->start; + if (props.func_node->header().ptr()->try_source_range() && + props.func_node->end().try_source_range()) { + auto header_src = props.func_node->header().ptr()->source_range(); + auto end_kw_src = props.func_node->end().range(); + uint32_t body_start = header_src.start + header_src.length; + uint32_t body_end = end_kw_src.start; assert(body_start <= body_end && "end keyword should come after header"); - return wcstring(props.parsed_source->src, body_start, body_end - body_start); + return wcstring(props.parsed_source->src(), body_start, body_end - body_start); } return wcstring{}; } @@ -308,6 +309,25 @@ void function_invalidate_path() { funcset->autoloader.clear(); } +function_properties_t::function_properties_t() : parsed_source(empty_parsed_source_ref()) {} + +function_properties_t::function_properties_t(const function_properties_t &other) + : parsed_source(empty_parsed_source_ref()) { + *this = other; +} + +function_properties_t &function_properties_t::operator=(const function_properties_t &other) { + parsed_source = other.parsed_source->clone(); + func_node = other.func_node; + named_arguments = other.named_arguments; + description = other.description; + inherit_vars = other.inherit_vars; + shadow_scope = other.shadow_scope; + is_autoload = other.is_autoload; + definition_file = other.definition_file; + return *this; +} + wcstring function_properties_t::annotated_definition(const wcstring &name) const { wcstring out; wcstring desc = this->localized_description(); @@ -415,10 +435,10 @@ int function_properties_t::definition_lineno() const { // return one plus the number of newlines at offsets less than the start of our function's // statement (which includes the header). // TODO: merge with line_offset_of_character_at_offset? - auto source_range = func_node->try_source_range(); - assert(source_range && "Function has no source range"); - uint32_t func_start = source_range->start; - const wcstring &source = parsed_source->src; + assert(func_node->try_source_range() && "Function has no source range"); + auto source_range = func_node->source_range(); + uint32_t func_start = source_range.start; + const wcstring &source = parsed_source->src(); assert(func_start <= source.size() && "function start out of bounds"); return 1 + std::count(source.begin(), source.begin() + func_start, L'\n'); } diff --git a/src/function.h b/src/function.h index 0df00ac8f..5d65838ce 100644 --- a/src/function.h +++ b/src/function.h @@ -8,19 +8,20 @@ #include #include +#include "ast.h" #include "common.h" #include "parse_tree.h" class parser_t; -namespace ast { -struct block_statement_t; -} - /// A function's constant properties. These do not change once initialized. struct function_properties_t { + function_properties_t(); + function_properties_t(const function_properties_t &other); + function_properties_t &operator=(const function_properties_t &other); + /// Parsed source containing the function. - parsed_source_ref_t parsed_source; + rust::Box parsed_source; /// Node containing the function statement, pointing into parsed_source. /// We store block_statement, not job_list, so that comments attached to the header are diff --git a/src/highlight.cpp b/src/highlight.cpp index 041a6e1f3..c43316eed 100644 --- a/src/highlight.cpp +++ b/src/highlight.cpp @@ -26,6 +26,7 @@ #include "fallback.h" // IWYU pragma: keep #include "function.h" #include "future_feature_flags.h" +#include "highlight.rs.h" #include "history.h" #include "maybe.h" #include "operation_context.h" @@ -331,7 +332,7 @@ static bool statement_get_expanded_command(const wcstring &src, const ast::decorated_statement_t &stmt, const operation_context_t &ctx, wcstring *out_cmd) { // Get the command. Try expanding it. If we cannot, it's an error. - maybe_t cmd = stmt.command.source(src); + maybe_t cmd = stmt.command().source(src); if (!cmd) return false; expand_result_t err = expand_to_command_and_args(*cmd, ctx, out_cmd, nullptr); return err == expand_result_t::ok; @@ -413,21 +414,21 @@ static bool has_expand_reserved(const wcstring &str) { // command (as a string), if any. This is used to validate autosuggestions. static void autosuggest_parse_command(const wcstring &buff, const operation_context_t &ctx, wcstring *out_expanded_command, wcstring *out_arg) { - auto ast = ast::ast_t::parse( - buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens); + auto ast = + ast_parse(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens); // Find the first statement. const ast::decorated_statement_t *first_statement = nullptr; - if (const ast::job_conjunction_t *jc = ast.top()->as()->at(0)) { - first_statement = jc->job.statement.contents->try_as(); + if (const ast::job_conjunction_t *jc = ast->top()->as_job_list().at(0)) { + first_statement = jc->job().statement().contents().ptr()->try_as_decorated_statement(); } if (first_statement && statement_get_expanded_command(buff, *first_statement, ctx, out_expanded_command)) { // Check if the first argument or redirection is, in fact, an argument. - if (const auto *arg_or_redir = first_statement->args_or_redirs.at(0)) { + if (const auto *arg_or_redir = first_statement->args_or_redirs().at(0)) { if (arg_or_redir && arg_or_redir->is_argument()) { - *out_arg = arg_or_redir->argument().source(buff); + *out_arg = *arg_or_redir->argument().source(buff); } } } @@ -776,83 +777,17 @@ static void color_string_internal(const wcstring &buffstr, highlight_spec_t base } } -namespace { -/// Syntax highlighter helper. -class highlighter_t { - // The string we're highlighting. Note this is a reference member variable (to avoid copying)! - // We must not outlive this! - const wcstring &buff; - // The position of the cursor within the string. - const maybe_t cursor; - // The operation context. Again, a reference member variable! - const operation_context_t &ctx; - // Whether it's OK to do I/O. - const bool io_ok; - // Working directory. - const wcstring working_directory; - // The ast we produced. - ast::ast_t ast; - // The resulting colors. - using color_array_t = std::vector; - color_array_t color_array; - // A stack of variables that the current commandline probably defines. We mark redirections - // as valid if they use one of these variables, to avoid marking valid targets as error. - std::vector pending_variables; +highlighter_t::highlighter_t(const wcstring &str, maybe_t cursor, + const operation_context_t &ctx, wcstring wd, bool can_do_io) + : buff(str), + cursor(cursor), + ctx(ctx), + io_ok(can_do_io), + working_directory(std::move(wd)), + ast(ast_parse(buff, ast_flags)), + highlighter(new_highlighter(*this, *ast)) {} - // Flags we use for AST parsing. - static constexpr parse_tree_flags_t ast_flags = - parse_flag_continue_after_error | parse_flag_include_comments | - parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated | - parse_flag_show_extra_semis; - - bool io_still_ok() const { return io_ok && !ctx.check_cancel(); } - - // Color a command. - void color_command(const ast::string_t &node); - // Color a node as if it were an argument. - void color_as_argument(const ast::node_t &node, bool options_allowed = true); - // Colors the source range of a node with a given color. - void color_node(const ast::node_t &node, highlight_spec_t color); - // Colors a range with a given color. - void color_range(source_range_t range, highlight_spec_t color); - - /// \return a substring of our buffer. - wcstring get_source(source_range_t r) const; - - public: - // Visit the children of a node. - void visit_children(const ast::node_t &node) { - ast::node_visitor(*this).accept_children_of(&node); - } - - // AST visitor implementations. - void visit(const ast::keyword_base_t &kw); - void visit(const ast::token_base_t &tok); - void visit(const ast::redirection_t &redir); - void visit(const ast::variable_assignment_t &varas); - void visit(const ast::semi_nl_t &semi_nl); - void visit(const ast::decorated_statement_t &stmt); - void visit(const ast::block_statement_t &block); - - // Visit an argument, perhaps knowing that our command is cd. - void visit(const ast::argument_t &arg, bool cmd_is_cd = false, bool options_allowed = true); - - // Default implementation is to just visit children. - void visit(const ast::node_t &node) { visit_children(node); } - - // Constructor - highlighter_t(const wcstring &str, maybe_t cursor, const operation_context_t &ctx, - wcstring wd, bool can_do_io) - : buff(str), - cursor(cursor), - ctx(ctx), - io_ok(can_do_io), - working_directory(std::move(wd)), - ast(ast::ast_t::parse(buff, ast_flags)) {} - - // Perform highlighting, returning an array of colors. - color_array_t highlight(); -}; +bool highlighter_t::io_still_ok() const { return io_ok && !ctx.check_cancel(); } wcstring highlighter_t::get_source(source_range_t r) const { assert(r.start + r.length >= r.start && "Overflow"); @@ -961,9 +896,9 @@ static bool range_is_potential_path(const wcstring &src, const source_range_t &r return result; } -void highlighter_t::visit(const ast::keyword_base_t &kw) { +void highlighter_t::visit_keyword(const ast::node_t *kw) { highlight_role_t role = highlight_role_t::normal; - switch (kw.kw) { + switch (kw->kw()) { case parse_keyword_t::kw_begin: case parse_keyword_t::kw_builtin: case parse_keyword_t::kw_case: @@ -991,12 +926,12 @@ void highlighter_t::visit(const ast::keyword_base_t &kw) { case parse_keyword_t::none: break; } - color_node(kw, role); + color_node(*kw, role); } -void highlighter_t::visit(const ast::token_base_t &tok) { +void highlighter_t::visit_token(const ast::node_t *tok) { maybe_t role = highlight_role_t::normal; - switch (tok.type) { + switch (tok->token_type()) { case parse_token_type_t::end: case parse_token_type_t::pipe: case parse_token_type_t::background: @@ -1017,15 +952,16 @@ void highlighter_t::visit(const ast::token_base_t &tok) { default: break; } - if (role) color_node(tok, *role); + if (role) color_node(*tok, *role); } -void highlighter_t::visit(const ast::semi_nl_t &semi_nl) { - color_node(semi_nl, highlight_role_t::statement_terminator); +void highlighter_t::visit_semi_nl(const ast::node_t *semi_nl) { + color_node(*semi_nl, highlight_role_t::statement_terminator); } -void highlighter_t::visit(const ast::argument_t &arg, bool cmd_is_cd, bool options_allowed) { - color_as_argument(arg, options_allowed); +void highlighter_t::visit_argument(const void *arg_, bool cmd_is_cd, bool options_allowed) { + const auto &arg = *static_cast(arg_); + color_as_argument(*arg.ptr(), options_allowed); if (!io_still_ok()) { return; } @@ -1034,7 +970,7 @@ void highlighter_t::visit(const ast::argument_t &arg, bool cmd_is_cd, bool optio bool at_cursor = cursor.has_value() && arg.source_range().contains_inclusive(*cursor); if (cmd_is_cd) { // Mark this as an error if it's not 'help' and not a valid cd path. - wcstring param = arg.source(this->buff); + wcstring param = *arg.source(this->buff); if (expand_one(param, expand_flag::skip_cmdsubst, ctx)) { bool is_help = string_prefixes_string(param, L"--help") || string_prefixes_string(param, L"-h"); @@ -1042,45 +978,51 @@ void highlighter_t::visit(const ast::argument_t &arg, bool cmd_is_cd, bool optio is_valid_path = is_potential_cd_path(param, at_cursor, working_directory, ctx, PATH_EXPAND_TILDE); if (!is_valid_path) { - this->color_node(arg, highlight_role_t::error); + this->color_node(*arg.ptr(), highlight_role_t::error); } } } - } else if (range_is_potential_path(buff, arg.range, at_cursor, ctx, working_directory)) { + } else if (range_is_potential_path(buff, arg.range(), at_cursor, ctx, working_directory)) { is_valid_path = true; } if (is_valid_path) - for (size_t i = arg.range.start, end = arg.range.start + arg.range.length; i < end; i++) + for (size_t i = arg.range().start, end = arg.range().start + arg.range().length; i < end; + i++) this->color_array.at(i).valid_path = true; } -void highlighter_t::visit(const ast::variable_assignment_t &varas) { - color_as_argument(varas); +void highlighter_t::visit_variable_assignment(const void *varas_) { + const auto &varas = *static_cast(varas_); + color_as_argument(*varas.ptr()); // Highlight the '=' in variable assignments as an operator. - auto where = variable_assignment_equals_pos(varas.source(this->buff)); + auto where = variable_assignment_equals_pos(*varas.source(this->buff)); if (where) { size_t equals_loc = varas.source_range().start + *where; this->color_array.at(equals_loc) = highlight_role_t::operat; - auto var_name = varas.source(this->buff).substr(0, *where); + auto var_name = varas.source(this->buff)->substr(0, *where); this->pending_variables.push_back(std::move(var_name)); } } -void highlighter_t::visit(const ast::decorated_statement_t &stmt) { +void highlighter_t::visit_decorated_statement(const void *stmt_) { + const auto &stmt = *static_cast(stmt_); // Color any decoration. - if (stmt.opt_decoration) this->visit(*stmt.opt_decoration); + if (stmt.has_opt_decoration()) { + auto decoration = stmt.opt_decoration().ptr(); + this->visit_keyword(&*decoration); + } // Color the command's source code. // If we get no source back, there's nothing to color. - maybe_t cmd = stmt.command.try_source(this->buff); - if (!cmd.has_value()) return; + if (!stmt.command().try_source_range()) return; + wcstring cmd = *stmt.command().source(this->buff); wcstring expanded_cmd; bool is_valid_cmd = false; if (!this->io_still_ok()) { // We cannot check if the command is invalid, so just assume it's valid. is_valid_cmd = true; - } else if (variable_assignment_equals_pos(*cmd)) { + } else if (variable_assignment_equals_pos(cmd)) { is_valid_cmd = true; } else { // Check to see if the command is valid. @@ -1094,9 +1036,9 @@ void highlighter_t::visit(const ast::decorated_statement_t &stmt) { // Color our statement. if (is_valid_cmd) { - this->color_command(stmt.command); + this->color_command(stmt.command()); } else { - this->color_node(stmt.command, highlight_role_t::error); + this->color_node(*stmt.command().ptr(), highlight_role_t::error); } // Color arguments and redirections. @@ -1105,34 +1047,36 @@ void highlighter_t::visit(const ast::decorated_statement_t &stmt) { bool is_set = (expanded_cmd == L"set"); // If we have seen a "--" argument, color all options from then on as normal arguments. bool have_dashdash = false; - for (const ast::argument_or_redirection_t &v : stmt.args_or_redirs) { + for (size_t i = 0; i < stmt.args_or_redirs().count(); i++) { + const auto &v = *stmt.args_or_redirs().at(i); if (v.is_argument()) { if (is_set) { - auto arg = v.argument().source(this->buff); + auto arg = *v.argument().source(this->buff); if (valid_var_name(arg)) { this->pending_variables.push_back(std::move(arg)); is_set = false; } } - this->visit(v.argument(), is_cd, !have_dashdash); - if (v.argument().source(this->buff) == L"--") have_dashdash = true; + this->visit_argument(&v.argument(), is_cd, !have_dashdash); + if (*v.argument().source(this->buff) == L"--") have_dashdash = true; } else { - this->visit(v.redirection()); + this->visit_redirection(&v.redirection()); } } } -void highlighter_t::visit(const ast::block_statement_t &block) { - this->visit(*block.header.contents.get()); - this->visit(block.args_or_redirs); - const ast::node_t &bh = *block.header.contents; +size_t highlighter_t::visit_block_statement1(const void *block_) { + const auto &block = *static_cast(block_); + auto bh = block.header().ptr(); size_t pending_variables_count = this->pending_variables.size(); - if (const auto *fh = bh.try_as()) { - auto var_name = fh->var_name.source(this->buff); + if (const auto *fh = bh->try_as_for_header()) { + auto var_name = *fh->var_name().source(this->buff); pending_variables.push_back(std::move(var_name)); } - this->visit(block.jobs); - this->visit(block.end); + return pending_variables_count; +} + +void highlighter_t::visit_block_statement2(size_t pending_variables_count) { pending_variables.resize(pending_variables_count); } @@ -1158,9 +1102,10 @@ static bool contains_pending_variable(const std::vector &pending_varia return false; } -void highlighter_t::visit(const ast::redirection_t &redir) { - auto oper = pipe_or_redir_from_string(redir.oper.source(this->buff).c_str()); // like 2> - wcstring target = redir.target.source(this->buff); // like &1 or file path +void highlighter_t::visit_redirection(const void *redir_) { + const auto &redir = *static_cast(redir_); + auto oper = pipe_or_redir_from_string(redir.oper().source(this->buff)->c_str()); // like 2> + wcstring target = *redir.target().source(this->buff); // like &1 or file path assert(oper && "Should have successfully parsed a pipe_or_redir_t since it was in our ast"); @@ -1168,18 +1113,18 @@ void highlighter_t::visit(const ast::redirection_t &redir) { // It may have parsed successfully yet still be invalid (e.g. 9999999999999>&1) // If so, color the whole thing invalid and stop. if (!oper->is_valid()) { - this->color_node(redir, highlight_role_t::error); + this->color_node(*redir.ptr(), highlight_role_t::error); return; } // Color the operator part like 2>. - this->color_node(redir.oper, highlight_role_t::redirection); + this->color_node(*redir.oper().ptr(), highlight_role_t::redirection); // Color the target part. // Check if the argument contains a command substitution. If so, highlight it as a param // even though it's a command redirection, and don't try to do any other validation. if (has_cmdsub(target)) { - this->color_as_argument(redir.target); + this->color_as_argument(*redir.target().ptr()); } else { // No command substitution, so we can highlight the target file or fd. For example, // disallow redirections into a non-existent directory. @@ -1266,7 +1211,7 @@ void highlighter_t::visit(const ast::redirection_t &redir) { } } } - this->color_node(redir.target, + this->color_node(*redir.target().ptr(), target_is_valid ? highlight_role_t::redirection : highlight_role_t::error); } } @@ -1280,28 +1225,27 @@ highlighter_t::color_array_t highlighter_t::highlight() { this->color_array.resize(this->buff.size()); std::fill(this->color_array.begin(), this->color_array.end(), highlight_spec_t{}); - this->visit_children(*ast.top()); + this->highlighter->visit_children(*ast->top()); if (ctx.check_cancel()) return std::move(color_array); // Color every comment. - const auto &extras = ast.extras(); - for (const source_range_t &r : extras.comments) { + auto extras = ast->extras(); + for (const source_range_t &r : extras->comments()) { this->color_range(r, highlight_role_t::comment); } // Color every extra semi. - for (const source_range_t &r : extras.semis) { + for (const source_range_t &r : extras->semis()) { this->color_range(r, highlight_role_t::statement_terminator); } // Color every error range. - for (const source_range_t &r : extras.errors) { + for (const source_range_t &r : extras->errors()) { this->color_range(r, highlight_role_t::error); } return std::move(color_array); } -} // namespace /// Determine if a command is valid. static bool command_is_valid(const wcstring &cmd, statement_decoration_t decoration, diff --git a/src/highlight.h b/src/highlight.h index 87be02a03..d9e9f7384 100644 --- a/src/highlight.h +++ b/src/highlight.h @@ -11,10 +11,14 @@ #include #include +#include "ast.h" #include "color.h" +#include "cxx.h" #include "flog.h" #include "maybe.h" +struct Highlighter; + class environment_t; /// Describes the role of a span of text. @@ -156,4 +160,76 @@ bool is_potential_path(const wcstring &potential_path_fragment, bool at_cursor, const wcstring_list_t &directories, const operation_context_t &ctx, path_flags_t flags); +/// Syntax highlighter helper. +class highlighter_t { + // The string we're highlighting. Note this is a reference member variable (to avoid copying)! + // We must not outlive this! + const wcstring &buff; + // The position of the cursor within the string. + const maybe_t cursor; + // The operation context. Again, a reference member variable! + const operation_context_t &ctx; + // Whether it's OK to do I/O. + const bool io_ok; + // Working directory. + const wcstring working_directory; + // The ast we produced. + rust::Box ast; + rust::Box highlighter; + // The resulting colors. + using color_array_t = std::vector; + color_array_t color_array; + // A stack of variables that the current commandline probably defines. We mark redirections + // as valid if they use one of these variables, to avoid marking valid targets as error. + std::vector pending_variables; + + // Flags we use for AST parsing. + static constexpr parse_tree_flags_t ast_flags = + parse_flag_continue_after_error | parse_flag_include_comments | + parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated | + parse_flag_show_extra_semis; + + bool io_still_ok() const; + +#if INCLUDE_RUST_HEADERS + // Declaring methods with forward-declared opaque Rust types like "ast::node_t" will cause + // undefined reference errors. + // Color a command. + void color_command(const ast::string_t &node); + // Color a node as if it were an argument. + void color_as_argument(const ast::node_t &node, bool options_allowed = true); + // Colors the source range of a node with a given color. + void color_node(const ast::node_t &node, highlight_spec_t color); + // Colors a range with a given color. + void color_range(source_range_t range, highlight_spec_t color); +#endif + + public: + /// \return a substring of our buffer. + wcstring get_source(source_range_t r) const; + + // AST visitor implementations. + void visit_keyword(const ast::node_t *kw); + void visit_token(const ast::node_t *tok); + void visit_argument(const void *arg, bool cmd_is_cd, bool options_allowed); + void visit_redirection(const void *redir); + void visit_variable_assignment(const void *varas); + void visit_semi_nl(const ast::node_t *semi_nl); + void visit_decorated_statement(const void *stmt); + size_t visit_block_statement1(const void *block); + void visit_block_statement2(size_t pending_variables_count); + +#if INCLUDE_RUST_HEADERS + // Visit an argument, perhaps knowing that our command is cd. + void visit(const ast::argument_t &arg, bool cmd_is_cd = false, bool options_allowed = true); +#endif + + // Constructor + highlighter_t(const wcstring &str, maybe_t cursor, const operation_context_t &ctx, + wcstring wd, bool can_do_io); + + // Perform highlighting, returning an array of colors. + color_array_t highlight(); +}; + #endif diff --git a/src/history.cpp b/src/history.cpp index b9f24569c..ac9dae85f 100644 --- a/src/history.cpp +++ b/src/history.cpp @@ -1202,7 +1202,7 @@ static bool should_import_bash_history_line(const wcstring &line) { // "<<" here is a proxy for heredocs (and herestrings). if (line.find(L"<<") != std::string::npos) return false; - if (ast::ast_t::parse(line).errored()) return false; + if (ast_parse(line)->errored()) return false; // In doing this test do not allow incomplete strings. Hence the "false" argument. auto errors = new_parse_error_list(); @@ -1396,16 +1396,18 @@ void history_t::add_pending_with_file_detection(const std::shared_ptr // Find all arguments that look like they could be file paths. bool needs_sync_write = false; using namespace ast; - auto ast = ast_t::parse(str); + auto ast = ast_parse(str); path_list_t potential_paths; - for (const node_t &node : ast) { - if (const argument_t *arg = node.try_as()) { - wcstring potential_path = arg->source(str); + for (auto ast_traversal = new_ast_traversal(*ast->top());;) { + auto node = ast_traversal->next(); + if (!node->has_value()) break; + if (const argument_t *arg = node->try_as_argument()) { + wcstring potential_path = *arg->source(str); if (string_could_be_path(potential_path)) { potential_paths.push_back(std::move(potential_path)); } - } else if (const decorated_statement_t *stmt = node.try_as()) { + } else if (const decorated_statement_t *stmt = node->try_as_decorated_statement()) { // Hack hack hack - if the command is likely to trigger an exit, then don't do // background file detection, because we won't be able to write it to our history file // before we exit. @@ -1416,7 +1418,7 @@ void history_t::add_pending_with_file_detection(const std::shared_ptr needs_sync_write = true; } - wcstring command = stmt->command.source(str); + wcstring command = *stmt->command().source(str); unescape_string_in_place(&command, UNESCAPE_DEFAULT); if (command == L"exit" || command == L"reboot" || command == L"restart" || command == L"echo") { diff --git a/src/parse_execution.cpp b/src/parse_execution.cpp index a8ed65c4f..369f0b724 100644 --- a/src/parse_execution.cpp +++ b/src/parse_execution.cpp @@ -53,37 +53,39 @@ static constexpr bool type_is_redirectable_block(ast::type_t type) { } static bool specific_statement_type_is_redirectable_block(const ast::node_t &node) { - return type_is_redirectable_block(node.type); + return type_is_redirectable_block(node.typ()); } /// Get the name of a redirectable block, for profiling purposes. static wcstring profiling_cmd_name_for_redirectable_block(const ast::node_t &node, - const parsed_source_t &pstree) { + const parsed_source_ref_t &pstree) { using namespace ast; assert(specific_statement_type_is_redirectable_block(node)); - auto source_range = node.try_source_range(); - assert(source_range.has_value() && "No source range for block"); + assert(node.try_source_range() && "No source range for block"); + auto source_range = node.source_range(); size_t src_end = 0; - switch (node.type) { + switch (node.typ()) { case type_t::block_statement: { - const node_t *block_header = node.as()->header.get(); - switch (block_header->type) { + auto block_header = node.as_block_statement().header().ptr(); + switch (block_header->typ()) { case type_t::for_header: - src_end = block_header->as()->semi_nl.source_range().start; + src_end = block_header->as_for_header().semi_nl().source_range().start; break; case type_t::while_header: - src_end = block_header->as()->condition.source_range().end(); + src_end = + block_header->as_while_header().condition().ptr()->source_range().end(); break; case type_t::function_header: - src_end = block_header->as()->semi_nl.source_range().start; + src_end = block_header->as_function_header().semi_nl().source_range().start; break; case type_t::begin_header: - src_end = block_header->as()->kw_begin.source_range().end(); + src_end = + block_header->as_begin_header().kw_begin().ptr()->source_range().end(); break; default: @@ -92,11 +94,12 @@ static wcstring profiling_cmd_name_for_redirectable_block(const ast::node_t &nod } break; case type_t::if_statement: - src_end = node.as()->if_clause.condition.job.source_range().end(); + src_end = + node.as_if_statement().if_clause().condition().job().ptr()->source_range().end(); break; case type_t::switch_statement: - src_end = node.as()->semi_nl.source_range().start; + src_end = node.as_switch_statement().semi_nl().source_range().start; break; default: @@ -104,10 +107,10 @@ static wcstring profiling_cmd_name_for_redirectable_block(const ast::node_t &nod break; } - assert(src_end >= source_range->start && "Invalid source end"); + assert(src_end >= source_range.start && "Invalid source end"); // Get the source for the block, and cut it at the next statement terminator. - wcstring result = pstree.src.substr(source_range->start, src_end - source_range->start); + wcstring result = pstree.src().substr(source_range.start, src_end - source_range.start); result.append(L"..."); return result; } @@ -118,7 +121,7 @@ static rust::Box get_stderr_merge() { return new_redirection_spec(STDERR_FILENO, redirection_mode_t::fd, stdout_fileno_str); } -parse_execution_context_t::parse_execution_context_t(parsed_source_ref_t pstree, +parse_execution_context_t::parse_execution_context_t(rust::Box pstree, const operation_context_t &ctx, io_chain_t block_io) : pstree(std::move(pstree)), @@ -129,7 +132,7 @@ parse_execution_context_t::parse_execution_context_t(parsed_source_ref_t pstree, // Utilities wcstring parse_execution_context_t::get_source(const ast::node_t &node) const { - return node.source(pstree->src); + return *node.source(pstree->src()); } const ast::decorated_statement_t * @@ -151,14 +154,16 @@ parse_execution_context_t::infinite_recursive_statement_in_job_list(const ast::j // Get the first job in the job list. const ast::job_conjunction_t *jc = jobs.at(0); if (!jc) return nullptr; - const ast::job_pipeline_t *job = &jc->job; + const ast::job_pipeline_t *job = &jc->job(); // Helper to return if a statement is infinitely recursive in this function. auto statement_recurses = [&](const ast::statement_t &stat) -> const ast::decorated_statement_t * { // Ignore non-decorated statements like `if`, etc. const ast::decorated_statement_t *dc = - stat.contents.contents->try_as(); + stat.contents().ptr()->try_as_decorated_statement() + ? &stat.contents().ptr()->as_decorated_statement() + : nullptr; if (!dc) return nullptr; // Ignore statements with decorations like 'builtin' or 'command', since those @@ -166,7 +171,7 @@ parse_execution_context_t::infinite_recursive_statement_in_job_list(const ast::j if (dc->decoration() != statement_decoration_t::none) return nullptr; // Check the command. - wcstring cmd = dc->command.source(pstree->src); + wcstring cmd = *dc->command().source(pstree->src()); bool forbidden = !cmd.empty() && expand_one(cmd, {expand_flag::skip_cmdsubst, expand_flag::skip_variables}, ctx) && @@ -177,12 +182,13 @@ parse_execution_context_t::infinite_recursive_statement_in_job_list(const ast::j const ast::decorated_statement_t *infinite_recursive_statement = nullptr; // Check main statement. - infinite_recursive_statement = statement_recurses(jc->job.statement); + infinite_recursive_statement = statement_recurses(jc->job().statement()); // Check piped remainder. if (!infinite_recursive_statement) { - for (const ast::job_continuation_t &c : job->continuation) { - if (const auto *s = statement_recurses(c.statement)) { + for (size_t i = 0; i < job->continuation().count(); i++) { + const ast::job_continuation_t &c = *job->continuation().at(i); + if (const auto *s = statement_recurses(c.statement())) { infinite_recursive_statement = s; break; } @@ -249,13 +255,14 @@ maybe_t parse_execution_context_t::check_end_execution() bool parse_execution_context_t::job_is_simple_block(const ast::job_pipeline_t &job) const { using namespace ast; // Must be no pipes. - if (!job.continuation.empty()) { + if (!job.continuation().empty()) { return false; } // Helper to check if an argument_or_redirection_list_t has no redirections. auto no_redirs = [](const argument_or_redirection_list_t &list) -> bool { - for (const argument_or_redirection_t &val : list) { + for (size_t i = 0; i < list.count(); i++) { + const argument_or_redirection_t &val = *list.at(i); if (val.is_redirection()) return false; } return true; @@ -263,14 +270,14 @@ bool parse_execution_context_t::job_is_simple_block(const ast::job_pipeline_t &j // Check if we're a block statement with redirections. We do it this obnoxious way to preserve // type safety (in case we add more specific statement types). - const node_t &ss = *job.statement.contents.contents; - switch (ss.type) { + const auto ss = job.statement().contents().ptr(); + switch (ss->typ()) { case type_t::block_statement: - return no_redirs(ss.as()->args_or_redirs); + return no_redirs(ss->as_block_statement().args_or_redirs()); case type_t::switch_statement: - return no_redirs(ss.as()->args_or_redirs); + return no_redirs(ss->as_switch_statement().args_or_redirs()); case type_t::if_statement: - return no_redirs(ss.as()->args_or_redirs); + return no_redirs(ss->as_if_statement().args_or_redirs()); case type_t::not_statement: case type_t::decorated_statement: // not block statements @@ -290,10 +297,10 @@ end_execution_reason_t parse_execution_context_t::run_if_statement( // We have a sequence of if clauses, with a final else, resulting in a single job list that we // execute. const job_list_t *job_list_to_execute = nullptr; - const if_clause_t *if_clause = &statement.if_clause; + const if_clause_t *if_clause = &statement.if_clause(); // Index of the *next* elseif_clause to test. - const elseif_clause_list_t &elseif_clauses = statement.elseif_clauses; + const elseif_clause_list_t &elseif_clauses = statement.elseif_clauses(); size_t next_elseif_idx = 0; // We start with the 'if'. @@ -309,16 +316,16 @@ end_execution_reason_t parse_execution_context_t::run_if_statement( // Check the condition and the tail. We treat end_execution_reason_t::error here as failure, // in accordance with historic behavior. end_execution_reason_t cond_ret = - run_job_conjunction(if_clause->condition, associated_block); + run_job_conjunction(if_clause->condition(), associated_block); if (cond_ret == end_execution_reason_t::ok) { - cond_ret = run_job_list(if_clause->andor_tail, associated_block); + cond_ret = run_job_list(if_clause->andor_tail(), associated_block); } const bool take_branch = (cond_ret == end_execution_reason_t::ok) && parser->get_last_status() == EXIT_SUCCESS; if (take_branch) { // Condition succeeded. - job_list_to_execute = &if_clause->body; + job_list_to_execute = &if_clause->body(); break; } @@ -326,7 +333,7 @@ end_execution_reason_t parse_execution_context_t::run_if_statement( const auto *elseif_clause = elseif_clauses.at(next_elseif_idx++); if (elseif_clause) { trace_if_enabled(*parser, L"else if"); - if_clause = &elseif_clause->if_clause; + if_clause = &elseif_clause->if_clause(); } else { break; } @@ -335,9 +342,9 @@ end_execution_reason_t parse_execution_context_t::run_if_statement( if (!job_list_to_execute) { // our ifs and elseifs failed. // Check our else body. - if (statement.else_clause) { + if (statement.has_else_clause()) { trace_if_enabled(*parser, L"else"); - job_list_to_execute = &statement.else_clause->body; + job_list_to_execute = &statement.else_clause().body(); } } @@ -382,8 +389,8 @@ end_execution_reason_t parse_execution_context_t::run_function_statement( using namespace ast; // Get arguments. wcstring_list_t arguments; - ast_args_list_t arg_nodes = get_argument_nodes(header.args); - arg_nodes.insert(arg_nodes.begin(), &header.first_arg); + ast_args_list_t arg_nodes = get_argument_nodes(header.args()); + arg_nodes.insert(arg_nodes.begin(), &header.first_arg()); end_execution_reason_t result = this->expand_arguments_from_nodes(arg_nodes, &arguments, failglob); @@ -395,32 +402,32 @@ end_execution_reason_t parse_execution_context_t::run_function_statement( null_output_stream_t outs; string_output_stream_t errs; io_streams_t streams(outs, errs); - int err_code = builtin_function(*parser, streams, arguments, pstree, statement); + int err_code = builtin_function(*parser, streams, arguments, *pstree, statement); parser->libdata().status_count++; parser->set_last_statuses(statuses_t::just(err_code)); const wcstring &errtext = errs.contents(); if (!errtext.empty()) { - return this->report_error(err_code, header, L"%ls", errtext.c_str()); + return this->report_error(err_code, *header.ptr(), L"%ls", errtext.c_str()); } return result; } end_execution_reason_t parse_execution_context_t::run_block_statement( const ast::block_statement_t &statement, const block_t *associated_block) { - const ast::node_t &bh = *statement.header.contents; - const ast::job_list_t &contents = statement.jobs; + auto bh = statement.header().ptr(); + const ast::job_list_t &contents = statement.jobs(); end_execution_reason_t ret = end_execution_reason_t::ok; - if (const auto *fh = bh.try_as()) { + if (const auto *fh = bh->try_as_for_header()) { ret = run_for_statement(*fh, contents); - } else if (const auto *wh = bh.try_as()) { + } else if (const auto *wh = bh->try_as_while_header()) { ret = run_while_statement(*wh, contents, associated_block); - } else if (const auto *fh = bh.try_as()) { + } else if (const auto *fh = bh->try_as_function_header()) { ret = run_function_statement(statement, *fh); - } else if (bh.try_as()) { + } else if (bh->try_as_begin_header()) { ret = run_begin_statement(contents); } else { - FLOGF(error, L"Unexpected block header: %ls\n", bh.describe().c_str()); + FLOGF(error, L"Unexpected block header: %ls\n", bh->describe()->c_str()); PARSER_DIE(); } return ret; @@ -430,20 +437,20 @@ end_execution_reason_t parse_execution_context_t::run_for_statement( const ast::for_header_t &header, const ast::job_list_t &block_contents) { // Get the variable name: `for var_name in ...`. We expand the variable name. It better result // in just one. - wcstring for_var_name = header.var_name.source(get_source()); + wcstring for_var_name = *header.var_name().source(get_source()); if (!expand_one(for_var_name, expand_flags_t{}, ctx)) { - return report_error(STATUS_EXPAND_ERROR, header.var_name, + return report_error(STATUS_EXPAND_ERROR, *header.var_name().ptr(), FAILED_EXPANSION_VARIABLE_NAME_ERR_MSG, for_var_name.c_str()); } if (!valid_var_name(for_var_name)) { - return report_error(STATUS_INVALID_ARGS, header.var_name, BUILTIN_ERR_VARNAME, L"for", - for_var_name.c_str()); + return report_error(STATUS_INVALID_ARGS, *header.var_name().ptr(), BUILTIN_ERR_VARNAME, + L"for", for_var_name.c_str()); } // Get the contents to iterate over. wcstring_list_t arguments; - ast_args_list_t arg_nodes = get_argument_nodes(header.args); + ast_args_list_t arg_nodes = get_argument_nodes(header.args()); end_execution_reason_t ret = this->expand_arguments_from_nodes(arg_nodes, &arguments, nullglob); if (ret != end_execution_reason_t::ok) { return ret; @@ -451,7 +458,7 @@ end_execution_reason_t parse_execution_context_t::run_for_statement( auto var = parser->vars().get(for_var_name, ENV_DEFAULT); if (env_var_t::flags_for(for_var_name.c_str()) & env_var_t::flag_read_only) { - return report_error(STATUS_INVALID_ARGS, header.var_name, + return report_error(STATUS_INVALID_ARGS, *header.var_name().ptr(), _(L"%ls: %ls: cannot overwrite read-only variable"), L"for", for_var_name.c_str()); } @@ -501,14 +508,14 @@ end_execution_reason_t parse_execution_context_t::run_for_statement( end_execution_reason_t parse_execution_context_t::run_switch_statement( const ast::switch_statement_t &statement) { // Get the switch variable. - const wcstring switch_value = get_source(statement.argument); + const wcstring switch_value = get_source(*statement.argument().ptr()); // Expand it. We need to offset any errors by the position of the string. completion_list_t switch_values_expanded; auto errors = new_parse_error_list(); auto expand_ret = expand_string(switch_value, &switch_values_expanded, expand_flags_t{}, ctx, &*errors); - errors->offset_source_start(statement.argument.range.start); + errors->offset_source_start(statement.argument().range().start); switch (expand_ret.result) { case expand_result_t::error: @@ -518,12 +525,12 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement( return end_execution_reason_t::cancelled; case expand_result_t::wildcard_no_match: - return report_error(STATUS_UNMATCHED_WILDCARD, statement.argument, WILDCARD_ERR_MSG, - get_source(statement.argument).c_str()); + return report_error(STATUS_UNMATCHED_WILDCARD, *statement.argument().ptr(), + WILDCARD_ERR_MSG, get_source(*statement.argument().ptr()).c_str()); case expand_result_t::ok: if (switch_values_expanded.size() > 1) { - return report_error(STATUS_INVALID_ARGS, statement.argument, + return report_error(STATUS_INVALID_ARGS, *statement.argument().ptr(), _(L"switch: Expected at most one argument, got %lu\n"), switch_values_expanded.size()); } @@ -544,7 +551,8 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement( // Expand case statements. const ast::case_item_t *matching_case_item = nullptr; - for (const ast::case_item_t &case_item : statement.cases) { + for (size_t i = 0; i < statement.cases().count(); i++) { + const ast::case_item_t &case_item = *statement.cases().at(i); if (auto ret = check_end_execution()) { result = *ret; break; @@ -553,7 +561,7 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement( // Expand arguments. A case item list may have a wildcard that fails to expand to // anything. We also report case errors, but don't stop execution; i.e. a case item that // contains an unexpandable process will report and then fail to match. - ast_args_list_t arg_nodes = get_argument_nodes(case_item.arguments); + ast_args_list_t arg_nodes = get_argument_nodes(case_item.arguments()); wcstring_list_t case_args; end_execution_reason_t case_result = this->expand_arguments_from_nodes(arg_nodes, &case_args, failglob); @@ -576,7 +584,7 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement( if (matching_case_item) { // Success, evaluate the job list. assert(result == end_execution_reason_t::ok && "Expected success"); - result = this->run_job_list(matching_case_item->body, sb); + result = this->run_job_list(matching_case_item->body(), sb); } parser->pop_block(sb); @@ -612,9 +620,9 @@ end_execution_reason_t parse_execution_context_t::run_while_statement( // Check the condition. end_execution_reason_t cond_ret = - this->run_job_conjunction(header.condition, associated_block); + this->run_job_conjunction(header.condition(), associated_block); if (cond_ret == end_execution_reason_t::ok) { - cond_ret = run_job_list(header.andor_tail, associated_block); + cond_ret = run_job_list(header.andor_tail(), associated_block); } // If the loop condition failed to execute, then exit the loop without modifying the exit @@ -694,7 +702,7 @@ end_execution_reason_t parse_execution_context_t::report_errors( // Get a backtrace. wcstring backtrace_and_desc; - parser->get_backtrace(pstree->src, error_list, backtrace_and_desc); + parser->get_backtrace(pstree->src(), error_list, backtrace_and_desc); // Print it. if (!should_suppress_stderr_for_tests()) { @@ -711,7 +719,10 @@ end_execution_reason_t parse_execution_context_t::report_errors( parse_execution_context_t::ast_args_list_t parse_execution_context_t::get_argument_nodes( const ast::argument_list_t &args) { ast_args_list_t result; - for (const ast::argument_t &arg : args) result.push_back(&arg); + for (size_t i = 0; i < args.count(); i++) { + const ast::argument_t &arg = *args.at(i); + result.push_back(&arg); + } return result; } @@ -719,7 +730,8 @@ parse_execution_context_t::ast_args_list_t parse_execution_context_t::get_argume parse_execution_context_t::ast_args_list_t parse_execution_context_t::get_argument_nodes( const ast::argument_or_redirection_list_t &args) { ast_args_list_t result; - for (const ast::argument_or_redirection_t &v : args) { + for (size_t i = 0; i < args.count(); i++) { + const ast::argument_or_redirection_t &v = *args.at(i); if (v.is_argument()) result.push_back(&v.argument()); } return result; @@ -739,21 +751,21 @@ end_execution_reason_t parse_execution_context_t::handle_command_not_found( // ENAMETOOLONG if (err_code == ENOTDIR) { // If the original command did not include a "/", assume we found it via $PATH. - auto src = get_source(statement.command); + auto src = get_source(*statement.command().ptr()); if (src.find(L"/") == wcstring::npos) { - return this->report_error(STATUS_NOT_EXECUTABLE, statement.command, + return this->report_error(STATUS_NOT_EXECUTABLE, *statement.command().ptr(), _(L"Unknown command. A component of '%ls' is not a " L"directory. Check your $PATH."), cmd); } else { return this->report_error( - STATUS_NOT_EXECUTABLE, statement.command, + STATUS_NOT_EXECUTABLE, *statement.command().ptr(), _(L"Unknown command. A component of '%ls' is not a directory."), cmd); } } return this->report_error( - STATUS_NOT_EXECUTABLE, statement.command, + STATUS_NOT_EXECUTABLE, *statement.command().ptr(), _(L"Unknown command. '%ls' exists but is not an executable file."), cmd); } @@ -761,7 +773,7 @@ end_execution_reason_t parse_execution_context_t::handle_command_not_found( // error messages. wcstring_list_t event_args; { - ast_args_list_t args = get_argument_nodes(statement.args_or_redirs); + ast_args_list_t args = get_argument_nodes(statement.args_or_redirs()); end_execution_reason_t arg_result = this->expand_arguments_from_nodes(args, &event_args, failglob); @@ -809,7 +821,7 @@ end_execution_reason_t parse_execution_context_t::handle_command_not_found( // Here we want to report an error (so it shows a backtrace). // If the handler printed text, that's already shown, so error will be empty. - return this->report_error(STATUS_CMD_UNKNOWN, statement.command, error.c_str()); + return this->report_error(STATUS_CMD_UNKNOWN, *statement.command().ptr(), error.c_str()); } end_execution_reason_t parse_execution_context_t::expand_command( @@ -821,8 +833,8 @@ end_execution_reason_t parse_execution_context_t::expand_command( auto errors = new_parse_error_list(); // Get the unexpanded command string. We expect to always get it here. - wcstring unexp_cmd = get_source(statement.command); - size_t pos_of_command_token = statement.command.range.start; + wcstring unexp_cmd = get_source(*statement.command().ptr()); + size_t pos_of_command_token = statement.command().range().start; // Expand the string to produce completions, and report errors. expand_result_t expand_err = @@ -835,15 +847,15 @@ end_execution_reason_t parse_execution_context_t::expand_command( errors->offset_source_start(pos_of_command_token); return report_errors(STATUS_ILLEGAL_CMD, *errors); } else if (expand_err == expand_result_t::wildcard_no_match) { - return report_error(STATUS_UNMATCHED_WILDCARD, statement, WILDCARD_ERR_MSG, - get_source(statement).c_str()); + return report_error(STATUS_UNMATCHED_WILDCARD, *statement.ptr(), WILDCARD_ERR_MSG, + get_source(*statement.ptr()).c_str()); } assert(expand_err == expand_result_t::ok); // Complain if the resulting expansion was empty, or expanded to an empty string. // For no-exec it's okay, as we can't really perform the expansion. if (out_cmd->empty() && !no_exec()) { - return this->report_error(STATUS_ILLEGAL_CMD, statement.command, + return this->report_error(STATUS_ILLEGAL_CMD, *statement.command().ptr(), _(L"The expanded command was empty.")); } return end_execution_reason_t::ok; @@ -880,7 +892,7 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process( // If the specified command does not exist, and is undecorated, try using an implicit cd. if (!has_command && statement.decoration() == statement_decoration_t::none) { // Implicit cd requires an empty argument and redirection list. - if (statement.args_or_redirs.empty()) { + if (statement.args_or_redirs().empty()) { // Ok, no arguments or redirections; check to see if the command is a directory. use_implicit_cd = path_as_implicit_cd(cmd, parser->vars().get_pwd_slash(), parser->vars()) @@ -917,7 +929,7 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process( cmd_args.push_back(cmd); vec_append(cmd_args, std::move(args_from_cmd_expansion)); - ast_args_list_t arg_nodes = get_argument_nodes(statement.args_or_redirs); + ast_args_list_t arg_nodes = get_argument_nodes(statement.args_or_redirs()); end_execution_reason_t arg_result = this->expand_arguments_from_nodes(arg_nodes, &cmd_args, glob_behavior); if (arg_result != end_execution_reason_t::ok) { @@ -925,7 +937,7 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process( } // The set of IO redirections that we construct for the process. - auto reason = this->determine_redirections(statement.args_or_redirs, &*redirections); + auto reason = this->determine_redirections(statement.args_or_redirs(), &*redirections); if (reason != end_execution_reason_t::ok) { return reason; } @@ -950,14 +962,14 @@ end_execution_reason_t parse_execution_context_t::expand_arguments_from_nodes( completion_list_t arg_expanded; for (const ast::argument_t *arg_node : argument_nodes) { // Expect all arguments to have source. - assert(arg_node->has_source() && "Argument should have source"); + assert(arg_node->ptr()->has_source() && "Argument should have source"); // Expand this string. auto errors = new_parse_error_list(); arg_expanded.clear(); - auto expand_ret = - expand_string(get_source(*arg_node), &arg_expanded, expand_flags_t{}, ctx, &*errors); - errors->offset_source_start(arg_node->range.start); + auto expand_ret = expand_string(get_source(*arg_node->ptr()), &arg_expanded, + expand_flags_t{}, ctx, &*errors); + errors->offset_source_start(arg_node->range().start); switch (expand_ret.result) { case expand_result_t::error: { return this->report_errors(expand_ret.status, *errors); @@ -971,8 +983,8 @@ end_execution_reason_t parse_execution_context_t::expand_arguments_from_nodes( // For no_exec, ignore the error - this might work at runtime. if (no_exec()) return end_execution_reason_t::ok; // Report the unmatched wildcard error and stop processing. - return report_error(STATUS_UNMATCHED_WILDCARD, *arg_node, WILDCARD_ERR_MSG, - get_source(*arg_node).c_str()); + return report_error(STATUS_UNMATCHED_WILDCARD, *arg_node->ptr(), + WILDCARD_ERR_MSG, get_source(*arg_node->ptr()).c_str()); } break; } @@ -1003,24 +1015,26 @@ end_execution_reason_t parse_execution_context_t::expand_arguments_from_nodes( end_execution_reason_t parse_execution_context_t::determine_redirections( const ast::argument_or_redirection_list_t &list, redirection_spec_list_t *out_redirections) { // Get all redirection nodes underneath the statement. - for (const ast::argument_or_redirection_t &arg_or_redir : list) { + for (size_t i = 0; i < list.count(); i++) { + const ast::argument_or_redirection_t &arg_or_redir = *list.at(i); if (!arg_or_redir.is_redirection()) continue; const ast::redirection_t &redir_node = arg_or_redir.redirection(); - auto oper = pipe_or_redir_from_string(get_source(redir_node.oper).c_str()); + auto oper = pipe_or_redir_from_string(get_source(*redir_node.oper().ptr()).c_str()); if (!oper || !oper->is_valid()) { // TODO: figure out if this can ever happen. If so, improve this error message. - return report_error(STATUS_INVALID_ARGS, redir_node, _(L"Invalid redirection: %ls"), - get_source(redir_node).c_str()); + return report_error(STATUS_INVALID_ARGS, *redir_node.ptr(), + _(L"Invalid redirection: %ls"), + get_source(*redir_node.ptr()).c_str()); } // PCA: I can't justify this skip_variables flag. It was like this when I got here. - wcstring target = get_source(redir_node.target); + wcstring target = get_source(*redir_node.target().ptr()); bool target_expanded = expand_one(target, no_exec() ? expand_flag::skip_variables : expand_flags_t{}, ctx); if (!target_expanded || target.empty()) { // TODO: Improve this error message. - return report_error(STATUS_INVALID_ARGS, redir_node, + return report_error(STATUS_INVALID_ARGS, *redir_node.ptr(), _(L"Invalid redirection target: %ls"), target.c_str()); } @@ -1033,7 +1047,8 @@ end_execution_reason_t parse_execution_context_t::determine_redirections( !spec->get_target_as_fd()) { const wchar_t *fmt = _(L"Requested redirection to '%ls', which is not a valid file descriptor"); - return report_error(STATUS_INVALID_ARGS, redir_node, fmt, spec->target()->c_str()); + return report_error(STATUS_INVALID_ARGS, *redir_node.ptr(), fmt, + spec->target()->c_str()); } out_redirections->push_back(std::move(spec)); @@ -1050,7 +1065,8 @@ end_execution_reason_t parse_execution_context_t::populate_not_process( job_t *job, process_t *proc, const ast::not_statement_t ¬_statement) { auto &flags = job->mut_flags(); flags.negate = !flags.negate; - return this->populate_job_process(job, proc, not_statement.contents, not_statement.variables); + return this->populate_job_process(job, proc, not_statement.contents(), + not_statement.variables()); } template @@ -1059,9 +1075,9 @@ end_execution_reason_t parse_execution_context_t::populate_block_process( using namespace ast; // We handle block statements by creating process_type_t::block_node, that will bounce back to // us when it's time to execute them. - static_assert(Type::AstType == type_t::block_statement || - Type::AstType == type_t::if_statement || - Type::AstType == type_t::switch_statement, + static_assert(std::is_same::value || + std::is_same::value || + std::is_same::value, "Invalid block process"); // Get the argument or redirections list. @@ -1069,16 +1085,16 @@ end_execution_reason_t parse_execution_context_t::populate_block_process( const argument_or_redirection_list_t *args_or_redirs = nullptr; // Upcast to permit dropping the 'template' keyword. - const node_t &ss = specific_statement; - switch (Type::AstType) { + const auto ss = specific_statement.ptr(); + switch (ss->typ()) { case type_t::block_statement: - args_or_redirs = &ss.as()->args_or_redirs; + args_or_redirs = &ss->as_block_statement().args_or_redirs(); break; case type_t::if_statement: - args_or_redirs = &ss.as()->args_or_redirs; + args_or_redirs = &ss->as_if_statement().args_or_redirs(); break; case type_t::switch_statement: - args_or_redirs = &ss.as()->args_or_redirs; + args_or_redirs = &ss->as_switch_statement().args_or_redirs(); break; default: DIE("Unexpected block node type"); @@ -1089,7 +1105,7 @@ end_execution_reason_t parse_execution_context_t::populate_block_process( auto reason = this->determine_redirections(*args_or_redirs, &*redirections); if (reason == end_execution_reason_t::ok) { proc->type = process_type_t::block_node; - proc->block_node_source = pstree; + proc->block_node_source = pstree->clone(); proc->internal_block_node = &statement; proc->set_redirection_specs(std::move(redirections)); } @@ -1101,8 +1117,9 @@ end_execution_reason_t parse_execution_context_t::apply_variable_assignments( const block_t **block) { if (variable_assignment_list.empty()) return end_execution_reason_t::ok; *block = parser->push_block(block_t::variable_assignment_block()); - for (const ast::variable_assignment_t &variable_assignment : variable_assignment_list) { - const wcstring &source = get_source(variable_assignment); + for (size_t i = 0; i < variable_assignment_list.count(); i++) { + const ast::variable_assignment_t &variable_assignment = *variable_assignment_list.at(i); + const wcstring &source = get_source(*variable_assignment.ptr()); auto equals_pos = variable_assignment_equals_pos(source); assert(equals_pos); const wcstring variable_name = source.substr(0, *equals_pos); @@ -1112,7 +1129,7 @@ end_execution_reason_t parse_execution_context_t::apply_variable_assignments( // TODO this is mostly copied from expand_arguments_from_nodes, maybe extract to function auto expand_ret = expand_string(expression, &expression_expanded, expand_flags_t{}, ctx, &*errors); - errors->offset_source_start(variable_assignment.range.start + *equals_pos + 1); + errors->offset_source_start(variable_assignment.range().start + *equals_pos + 1); switch (expand_ret.result) { case expand_result_t::error: return this->report_errors(expand_ret.status, *errors); @@ -1143,7 +1160,7 @@ end_execution_reason_t parse_execution_context_t::populate_job_process( const ast::variable_assignment_list_t &variable_assignments) { using namespace ast; // Get the "specific statement" which is boolean / block / if / switch / decorated. - const node_t &specific_statement = *statement.contents.contents; + const auto specific_statement = statement.contents().ptr(); const block_t *block = nullptr; end_execution_reason_t result = @@ -1153,32 +1170,31 @@ end_execution_reason_t parse_execution_context_t::populate_job_process( }); if (result != end_execution_reason_t::ok) return result; - switch (specific_statement.type) { + switch (specific_statement->typ()) { case type_t::not_statement: { - result = - this->populate_not_process(job, proc, *specific_statement.as()); + result = this->populate_not_process(job, proc, specific_statement->as_not_statement()); break; } case type_t::block_statement: result = this->populate_block_process(proc, statement, - *specific_statement.as()); + specific_statement->as_block_statement()); break; case type_t::if_statement: result = this->populate_block_process(proc, statement, - *specific_statement.as()); + specific_statement->as_if_statement()); break; case type_t::switch_statement: result = this->populate_block_process(proc, statement, - *specific_statement.as()); + specific_statement->as_switch_statement()); break; case type_t::decorated_statement: { result = - this->populate_plain_process(proc, *specific_statement.as()); + this->populate_plain_process(proc, specific_statement->as_decorated_statement()); break; } default: { FLOGF(error, L"'%ls' not handled by new parser yet.", - specific_statement.describe().c_str()); + specific_statement->describe()->c_str()); PARSER_DIE(); break; } @@ -1196,19 +1212,20 @@ end_execution_reason_t parse_execution_context_t::populate_job_from_job_node( process_list_t processes; processes.emplace_back(new process_t()); end_execution_reason_t result = this->populate_job_process( - j, processes.back().get(), job_node.statement, job_node.variables); + j, processes.back().get(), job_node.statement(), job_node.variables()); // Construct process_ts for job continuations (pipelines). - for (const ast::job_continuation_t &jc : job_node.continuation) { + for (size_t i = 0; i < job_node.continuation().count(); i++) { + const ast::job_continuation_t &jc = *job_node.continuation().at(i); if (result != end_execution_reason_t::ok) { break; } // Handle the pipe, whose fd may not be the obvious stdout. - auto parsed_pipe = pipe_or_redir_from_string(get_source(jc.pipe).c_str()); + auto parsed_pipe = pipe_or_redir_from_string(get_source(*jc.pipe().ptr()).c_str()); assert(parsed_pipe && parsed_pipe->is_pipe && "Failed to parse valid pipe"); if (!parsed_pipe->is_valid()) { - result = report_error(STATUS_INVALID_ARGS, jc.pipe, ILLEGAL_FD_ERR_MSG, - get_source(jc.pipe).c_str()); + result = report_error(STATUS_INVALID_ARGS, *jc.pipe().ptr(), ILLEGAL_FD_ERR_MSG, + get_source(*jc.pipe().ptr()).c_str()); break; } processes.back()->pipe_write_fd = parsed_pipe->fd; @@ -1222,7 +1239,8 @@ end_execution_reason_t parse_execution_context_t::populate_job_from_job_node( // Store the new process (and maybe with an error). processes.emplace_back(new process_t()); - result = this->populate_job_process(j, processes.back().get(), jc.statement, jc.variables); + result = + this->populate_job_process(j, processes.back().get(), jc.statement(), jc.variables()); } // Inform our processes of who is first and last @@ -1254,22 +1272,27 @@ static bool remove_job(parser_t &parser, const job_t *job) { /// `sleep 1 | not time true` will time the whole job! static bool job_node_wants_timing(const ast::job_pipeline_t &job_node) { // Does our job have the job-level time prefix? - if (job_node.time) return true; + if (job_node.has_time()) return true; // Helper to return true if a node is 'not time ...' or 'not not time...' or... auto is_timed_not_statement = [](const ast::statement_t &stat) { - const auto *ns = stat.contents->try_as(); + const auto *ns = stat.contents().ptr()->try_as_not_statement() + ? &stat.contents().ptr()->as_not_statement() + : nullptr; while (ns) { - if (ns->time) return true; - ns = ns->contents.try_as(); + if (ns->has_time()) return true; + ns = ns->contents().ptr()->try_as_not_statement() + ? &ns->contents().ptr()->as_not_statement() + : nullptr; } return false; }; // Do we have a 'not time ...' anywhere in our pipeline? - if (is_timed_not_statement(job_node.statement)) return true; - for (const ast::job_continuation_t &jc : job_node.continuation) { - if (is_timed_not_statement(jc.statement)) return true; + if (is_timed_not_statement(job_node.statement())) return true; + for (size_t i = 0; i < job_node.continuation().count(); i++) { + const ast::job_continuation_t &jc = *job_node.continuation().at(i); + if (is_timed_not_statement(jc.statement())) return true; } return false; } @@ -1307,33 +1330,32 @@ end_execution_reason_t parse_execution_context_t::run_1_job(const ast::job_pipel // However, if there are no redirections, then we can just jump into the block directly, which // is significantly faster. if (job_is_simple_block(job_node)) { - bool do_time = job_node.time.has_value(); + bool do_time = job_node.has_time(); // If no-exec has been given, there is nothing to time. auto timer = push_timer(do_time && !no_exec()); const block_t *block = nullptr; end_execution_reason_t result = - this->apply_variable_assignments(nullptr, job_node.variables, &block); + this->apply_variable_assignments(nullptr, job_node.variables(), &block); cleanup_t scope([&]() { if (block) parser->pop_block(block); }); - const ast::node_t *specific_statement = job_node.statement.contents.get(); + const auto specific_statement = job_node.statement().contents().ptr(); assert(specific_statement_type_is_redirectable_block(*specific_statement)); if (result == end_execution_reason_t::ok) { - switch (specific_statement->type) { + switch (specific_statement->typ()) { case ast::type_t::block_statement: { - result = this->run_block_statement( - *specific_statement->as(), associated_block); + result = this->run_block_statement(specific_statement->as_block_statement(), + associated_block); break; } case ast::type_t::if_statement: { - result = this->run_if_statement(*specific_statement->as(), + result = this->run_if_statement(specific_statement->as_if_statement(), associated_block); break; } case ast::type_t::switch_statement: { - result = this->run_switch_statement( - *specific_statement->as()); + result = this->run_switch_statement(specific_statement->as_switch_statement()); break; } default: { @@ -1359,7 +1381,7 @@ end_execution_reason_t parse_execution_context_t::run_1_job(const ast::job_pipel const auto &ld = parser->libdata(); job_t::properties_t props{}; - props.initial_background = job_node.bg.has_value(); + props.initial_background = job_node.has_bg(); props.skip_notification = ld.is_subshell || parser->is_block() || ld.is_event || !parser->is_interactive(); props.from_event_handler = ld.is_event; @@ -1367,10 +1389,10 @@ end_execution_reason_t parse_execution_context_t::run_1_job(const ast::job_pipel // It's an error to have 'time' in a background job. if (props.wants_timing && props.initial_background) { - return this->report_error(STATUS_INVALID_ARGS, job_node, ERROR_TIME_BACKGROUND); + return this->report_error(STATUS_INVALID_ARGS, *job_node.ptr(), ERROR_TIME_BACKGROUND); } - shared_ptr job = std::make_shared(props, get_source(job_node)); + shared_ptr job = std::make_shared(props, get_source(*job_node.ptr())); // We are about to populate a job. One possible argument to the job is a command substitution // which may be interested in the job that's populating it, via '--on-job-exit caller'. Record @@ -1426,9 +1448,10 @@ end_execution_reason_t parse_execution_context_t::run_job_conjunction( if (auto reason = check_end_execution()) { return *reason; } - end_execution_reason_t result = run_1_job(job_expr.job, associated_block); + end_execution_reason_t result = run_1_job(job_expr.job(), associated_block); - for (const ast::job_conjunction_continuation_t &jc : job_expr.continuations) { + for (size_t i = 0; i < job_expr.continuations().count(); i++) { + const ast::job_conjunction_continuation_t &jc = *job_expr.continuations().at(i); if (result != end_execution_reason_t::ok) { return result; } @@ -1437,7 +1460,7 @@ end_execution_reason_t parse_execution_context_t::run_job_conjunction( } // Check the conjunction type. bool skip = false; - switch (jc.conjunction.type) { + switch (jc.conjunction().token_type()) { case parse_token_type_t::andand: // AND. Skip if the last job failed. skip = parser->get_last_status() != 0; @@ -1450,7 +1473,7 @@ end_execution_reason_t parse_execution_context_t::run_job_conjunction( DIE("Unexpected job conjunction type"); } if (!skip) { - result = run_1_job(jc.job, associated_block); + result = run_1_job(jc.job(), associated_block); } } return result; @@ -1465,8 +1488,8 @@ end_execution_reason_t parse_execution_context_t::test_and_run_1_job_conjunction } // Maybe skip the job if it has a leading and/or. bool skip = false; - if (jc.decorator.has_value()) { - switch (jc.decorator->kw) { + if (jc.has_decorator()) { + switch (jc.decorator().kw()) { case parse_keyword_t::kw_and: // AND. Skip if the last job failed. skip = parser->get_last_status() != 0; @@ -1490,8 +1513,9 @@ end_execution_reason_t parse_execution_context_t::test_and_run_1_job_conjunction end_execution_reason_t parse_execution_context_t::run_job_list(const ast::job_list_t &job_list_node, const block_t *associated_block) { auto result = end_execution_reason_t::ok; - for (const ast::job_conjunction_t &jc : job_list_node) { - result = test_and_run_1_job_conjunction(jc, associated_block); + for (size_t i = 0; i < job_list_node.count(); i++) { + const ast::job_conjunction_t *jc = job_list_node.at(i); + result = test_and_run_1_job_conjunction(*jc, associated_block); } // Returns the result of the last job executed or skipped. return result; @@ -1500,8 +1524,9 @@ end_execution_reason_t parse_execution_context_t::run_job_list(const ast::job_li end_execution_reason_t parse_execution_context_t::run_job_list( const ast::andor_job_list_t &job_list_node, const block_t *associated_block) { auto result = end_execution_reason_t::ok; - for (const ast::andor_job_t &aoj : job_list_node) { - result = test_and_run_1_job_conjunction(aoj.job, associated_block); + for (size_t i = 0; i < job_list_node.count(); i++) { + const ast::andor_job_t *aoj = job_list_node.at(i); + result = test_and_run_1_job_conjunction(aoj->job(), associated_block); } // Returns the result of the last job executed or skipped. return result; @@ -1511,15 +1536,15 @@ end_execution_reason_t parse_execution_context_t::eval_node(const ast::statement const block_t *associated_block) { // Note we only expect block-style statements here. No not statements. enum end_execution_reason_t status = end_execution_reason_t::ok; - const ast::node_t *contents = statement.contents.get(); - if (const auto *block = contents->try_as()) { + const auto contents = statement.contents().ptr(); + if (const auto *block = contents->try_as_block_statement()) { status = this->run_block_statement(*block, associated_block); - } else if (const auto *ifstat = contents->try_as()) { + } else if (const auto *ifstat = contents->try_as_if_statement()) { status = this->run_if_statement(*ifstat, associated_block); - } else if (const auto *switchstat = contents->try_as()) { + } else if (const auto *switchstat = contents->try_as_switch_statement()) { status = this->run_switch_statement(*switchstat); } else { - FLOGF(error, L"Unexpected node %ls found in %s", statement.describe().c_str(), + FLOGF(error, L"Unexpected node %ls found in %s", statement.describe()->c_str(), __FUNCTION__); abort(); } @@ -1535,7 +1560,7 @@ end_execution_reason_t parse_execution_context_t::eval_node(const ast::job_list_ if (const auto *infinite_recursive_node = this->infinite_recursive_statement_in_job_list(job_list, &func_name)) { // We have an infinite recursion. - return this->report_error(STATUS_CMD_ERROR, *infinite_recursive_node, + return this->report_error(STATUS_CMD_ERROR, *infinite_recursive_node->ptr(), INFINITE_FUNC_RECURSION_ERR_MSG, func_name.c_str()); } @@ -1544,7 +1569,8 @@ end_execution_reason_t parse_execution_context_t::eval_node(const ast::job_list_ if ((associated_block->type() == block_type_t::top && parser->function_stack_is_overflowing()) || (associated_block->type() == block_type_t::subst && parser->is_eval_depth_exceeded())) { - return this->report_error(STATUS_CMD_ERROR, job_list, CALL_STACK_LIMIT_EXCEEDED_ERR_MSG); + return this->report_error(STATUS_CMD_ERROR, *job_list.ptr(), + CALL_STACK_LIMIT_EXCEEDED_ERR_MSG); } return this->run_job_list(job_list, associated_block); } @@ -1594,17 +1620,16 @@ int parse_execution_context_t::line_offset_of_node(const ast::job_pipeline_t *no } // If for some reason we're executing a node without source, return -1. - auto range = node->try_source_range(); - if (!range) { + if (!node->try_source_range()) { return -1; } - return this->line_offset_of_character_at_offset(range->start); + return this->line_offset_of_character_at_offset(node->source_range().start); } int parse_execution_context_t::line_offset_of_character_at_offset(size_t offset) { // Count the number of newlines, leveraging our cache. - assert(offset <= pstree->src.size()); + assert(offset <= pstree->src().size()); // Easy hack to handle 0. if (offset == 0) { @@ -1613,7 +1638,7 @@ int parse_execution_context_t::line_offset_of_character_at_offset(size_t offset) // We want to return (one plus) the number of newlines at offsets less than the given offset. // cached_lineno_count is the number of newlines at indexes less than cached_lineno_offset. - const wchar_t *str = pstree->src.c_str(); + const wcstring &str = pstree->src(); if (offset > cached_lineno_offset) { size_t i; for (i = cached_lineno_offset; i < offset && str[i] != L'\0'; i++) { @@ -1649,8 +1674,8 @@ int parse_execution_context_t::get_current_line_number() { int parse_execution_context_t::get_current_source_offset() const { int result = -1; if (executing_job_node) { - if (auto range = executing_job_node->try_source_range()) { - result = static_cast(range->start); + if (executing_job_node->try_source_range()) { + result = static_cast(executing_job_node->source_range().start); } } return result; diff --git a/src/parse_execution.h b/src/parse_execution.h index 63cdb3c0d..52c4718b1 100644 --- a/src/parse_execution.h +++ b/src/parse_execution.h @@ -38,7 +38,7 @@ enum class end_execution_reason_t { class parse_execution_context_t : noncopyable_t { private: - parsed_source_ref_t pstree; + rust::Box pstree; parser_t *const parser; const operation_context_t &ctx; @@ -161,7 +161,7 @@ class parse_execution_context_t : noncopyable_t { public: /// Construct a context in preparation for evaluating a node in a tree, with the given block_io. /// The execution context may access the parser and parent job group (if any) through ctx. - parse_execution_context_t(parsed_source_ref_t pstree, const operation_context_t &ctx, + parse_execution_context_t(rust::Box pstree, const operation_context_t &ctx, io_chain_t block_io); /// Returns the current line number, indexed from 1. Not const since it touches @@ -172,10 +172,10 @@ class parse_execution_context_t : noncopyable_t { int get_current_source_offset() const; /// Returns the source string. - const wcstring &get_source() const { return pstree->src; } + const wcstring &get_source() const { return pstree->src(); } /// Return the parsed ast. - const ast::ast_t &ast() const { return pstree->ast; } + const ast::ast_t &ast() const { return pstree->ast(); } /// Start executing at the given node. Returns 0 if there was no error, 1 if there was an /// error. diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp deleted file mode 100644 index 3942f6e4d..000000000 --- a/src/parse_tree.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// Programmatic representation of fish code. -#include "config.h" // IWYU pragma: keep - -#include "parse_tree.h" - -#include - -#include -#include - -#include "ast.h" -#include "common.h" -#include "enum_map.h" -#include "fallback.h" -#include "maybe.h" -#include "parse_constants.h" -#include "tokenizer.h" -#include "wutil.h" // IWYU pragma: keep - -parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err) { - switch (err) { - case tokenizer_error_t::none: - return parse_error_code_t::none; - case tokenizer_error_t::unterminated_quote: - return parse_error_code_t::tokenizer_unterminated_quote; - case tokenizer_error_t::unterminated_subshell: - return parse_error_code_t::tokenizer_unterminated_subshell; - case tokenizer_error_t::unterminated_slice: - return parse_error_code_t::tokenizer_unterminated_slice; - case tokenizer_error_t::unterminated_escape: - return parse_error_code_t::tokenizer_unterminated_escape; - default: - return parse_error_code_t::tokenizer_other; - } -} - -/// Returns a string description of the given parse token. -wcstring parse_token_t::describe() const { - wcstring result = token_type_description(type); - if (keyword != parse_keyword_t::none) { - append_format(result, L" <%ls>", keyword_description(keyword)); - } - return result; -} - -/// A string description appropriate for presentation to the user. -wcstring parse_token_t::user_presentable_description() const { - return *token_type_user_presentable_description(type, keyword); -} - -parsed_source_t::parsed_source_t(wcstring &&s, ast::ast_t &&ast) - : src(std::move(s)), ast(std::move(ast)) {} - -parsed_source_t::~parsed_source_t() = default; - -parsed_source_ref_t parse_source(wcstring &&src, parse_tree_flags_t flags, - parse_error_list_t *errors) { - using namespace ast; - ast_t ast = ast_t::parse(src, flags, errors); - if (ast.errored() && !(flags & parse_flag_continue_after_error)) { - return nullptr; - } - return std::make_shared(std::move(src), std::move(ast)); -} diff --git a/src/parse_tree.h b/src/parse_tree.h index 7814155e6..85b557f66 100644 --- a/src/parse_tree.h +++ b/src/parse_tree.h @@ -9,50 +9,13 @@ #include "parse_constants.h" #include "tokenizer.h" -/// A struct representing the token type that we use internally. -struct parse_token_t { - parse_token_type_t type; // The type of the token as represented by the parser - parse_keyword_t keyword{parse_keyword_t::none}; // Any keyword represented by this token - bool has_dash_prefix{false}; // Hackish: whether the source contains a dash prefix - bool is_help_argument{false}; // Hackish: whether the source looks like '-h' or '--help' - bool is_newline{false}; // Hackish: if TOK_END, whether the source is a newline. - bool may_be_variable_assignment{false}; // Hackish: whether this token is a string like FOO=bar - tokenizer_error_t tok_error{ - tokenizer_error_t::none}; // If this is a tokenizer error, that error. - source_offset_t source_start{SOURCE_OFFSET_INVALID}; - source_offset_t source_length{0}; - - /// \return the source range. - /// Note the start may be invalid. - source_range_t range() const { return source_range_t{source_start, source_length}; } - - /// \return whether we are a string with the dash prefix set. - bool is_dash_prefix_string() const { - return type == parse_token_type_t::string && has_dash_prefix; - } - - wcstring describe() const; - wcstring user_presentable_description() const; - - constexpr parse_token_t(parse_token_type_t type) : type(type) {} -}; - -parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err); - -/// A type wrapping up a parse tree and the original source behind it. -struct parsed_source_t : noncopyable_t, nonmovable_t { - wcstring src; - ast::ast_t ast; - - parsed_source_t(wcstring &&s, ast::ast_t &&ast); - ~parsed_source_t(); -}; - -/// Return a shared pointer to parsed_source_t, or null on failure. -/// If parse_flag_continue_after_error is not set, this will return null on any error. -using parsed_source_ref_t = std::shared_ptr; -parsed_source_ref_t parse_source(wcstring &&src, parse_tree_flags_t flags, - parse_error_list_t *errors); +#if INCLUDE_RUST_HEADERS +#include "parse_tree.rs.h" +using parsed_source_ref_t = ParsedSourceRefFFI; +#else +struct ParsedSourceRefFFI; +using parsed_source_ref_t = ParsedSourceRefFFI; +#endif /// Error message when a command may not be in a pipeline. #define INVALID_PIPELINE_CMD_ERR_MSG _(L"The '%ls' command can not be used in a pipeline") diff --git a/src/parse_util.cpp b/src/parse_util.cpp index c8bde9860..5e2b8853f 100644 --- a/src/parse_util.cpp +++ b/src/parse_util.cpp @@ -24,6 +24,7 @@ #include "operation_context.h" #include "parse_constants.h" #include "parse_tree.h" +#include "parse_util.rs.h" #include "tokenizer.h" #include "wcstringutil.h" #include "wildcard.h" @@ -592,6 +593,144 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote, return result; } +indent_visitor_t::indent_visitor_t(const wcstring &src, std::vector &indents) + : src(src), indents(indents), visitor(new_indent_visitor(*this)) {} + +bool indent_visitor_t::has_newline(const ast::maybe_newlines_t &nls) const { + return nls.ptr()->source(src)->find(L'\n') != wcstring::npos; +} + +int indent_visitor_t::visit(const void *node_) { + auto &node = *static_cast(node_); + int inc = 0; + int dec = 0; + using namespace ast; + switch (node.typ()) { + case type_t::job_list: + case type_t::andor_job_list: + // Job lists are never unwound. + inc = 1; + dec = 1; + break; + + // Increment indents for conditions in headers (#1665). + case type_t::job_conjunction: + if (node.parent()->typ() == type_t::while_header || + node.parent()->typ() == type_t::if_clause) { + inc = 1; + dec = 1; + } + break; + + // Increment indents for job_continuation_t if it contains a newline. + // This is a bit of a hack - it indents cases like: + // cmd1 | + // ....cmd2 + // but avoids "double indenting" if there's no newline: + // cmd1 | while cmd2 + // ....cmd3 + // end + // See #7252. + case type_t::job_continuation: + if (has_newline(node.as_job_continuation().newlines())) { + inc = 1; + dec = 1; + } + break; + + // Likewise for && and ||. + case type_t::job_conjunction_continuation: + if (has_newline(node.as_job_conjunction_continuation().newlines())) { + inc = 1; + dec = 1; + } + break; + + case type_t::case_item_list: + // Here's a hack. Consider: + // switch abc + // cas + // + // fish will see that 'cas' is not valid inside a switch statement because it is + // not "case". It will then unwind back to the top level job list, producing a + // parse tree like: + // + // job_list + // switch_job + // + // normal_job + // cas + // + // And so we will think that the 'cas' job is at the same level as the switch. + // To address this, if we see that the switch statement was not closed, do not + // decrement the indent afterwards. + inc = 1; + dec = node.parent()->as_switch_statement().end().ptr()->has_source() ? 1 : 0; + break; + case type_t::token_base: { + if (node.parent()->typ() == type_t::begin_header && + node.token_type() == parse_token_type_t::end) { + // The newline after "begin" is optional, so it is part of the header. + // The header is not in the indented block, so indent the newline here. + if (*node.source(src) == L"\n") { + inc = 1; + dec = 1; + } + } + break; + } + default: + break; + } + + auto range = node.source_range(); + if (range.length > 0 && node.category() == category_t::leaf) { + record_line_continuations_until(range.start); + std::fill(indents.begin() + last_leaf_end, indents.begin() + range.start, last_indent); + } + + indent += inc; + + // If we increased the indentation, apply it to the remainder of the string, even if the + // list is empty. For example (where _ represents the cursor): + // + // if foo + // _ + // + // we want to indent the newline. + if (inc) { + last_indent = indent; + } + + // If this is a leaf node, apply the current indentation. + if (node.category() == category_t::leaf && range.length > 0) { + std::fill(indents.begin() + range.start, indents.begin() + range.end(), indent); + last_leaf_end = range.start + range.length; + last_indent = indent; + } + + return dec; +} + +void indent_visitor_t::did_visit(int dec) { indent -= dec; } + +void indent_visitor_t::record_line_continuations_until(size_t offset) { + wcstring gap_text = src.substr(last_leaf_end, offset - last_leaf_end); + size_t escaped_nl = gap_text.find(L"\\\n"); + if (escaped_nl == wcstring::npos) return; + auto line_end = gap_text.begin() + escaped_nl; + if (std::find(gap_text.begin(), line_end, L'#') != line_end) return; + auto end = src.begin() + offset; + auto newline = src.begin() + last_leaf_end + escaped_nl + 1; + // The gap text might contain multiple newlines if there are multiple lines that + // don't contain an AST node, for example, comment lines, or lines containing only + // the escaped newline. + do { + line_continuations.push_back(newline - src.begin()); + newline = std::find(newline + 1, end, L'\n'); + } while (newline != end); +} + std::vector parse_util_compute_indents(const wcstring &src) { // Make a vector the same size as the input string, which contains the indents. Initialize them // to 0. @@ -609,173 +748,11 @@ std::vector parse_util_compute_indents(const wcstring &src) { // were a case item list. using namespace ast; auto ast = - ast_t::parse(src, parse_flag_continue_after_error | parse_flag_include_comments | - parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated); - - // Visit all of our nodes. When we get a job_list or case_item_list, increment indent while - // visiting its children. - struct indent_visitor_t { - indent_visitor_t(const wcstring &src, std::vector &indents) - : src(src), indents(indents) {} - - void visit(const node_t &node) { - int inc = 0; - int dec = 0; - switch (node.type) { - case type_t::job_list: - case type_t::andor_job_list: - // Job lists are never unwound. - inc = 1; - dec = 1; - break; - - // Increment indents for conditions in headers (#1665). - case type_t::job_conjunction: - if (node.parent->type == type_t::while_header || - node.parent->type == type_t::if_clause) { - inc = 1; - dec = 1; - } - break; - - // Increment indents for job_continuation_t if it contains a newline. - // This is a bit of a hack - it indents cases like: - // cmd1 | - // ....cmd2 - // but avoids "double indenting" if there's no newline: - // cmd1 | while cmd2 - // ....cmd3 - // end - // See #7252. - case type_t::job_continuation: - if (has_newline(node.as()->newlines)) { - inc = 1; - dec = 1; - } - break; - - // Likewise for && and ||. - case type_t::job_conjunction_continuation: - if (has_newline(node.as()->newlines)) { - inc = 1; - dec = 1; - } - break; - - case type_t::case_item_list: - // Here's a hack. Consider: - // switch abc - // cas - // - // fish will see that 'cas' is not valid inside a switch statement because it is - // not "case". It will then unwind back to the top level job list, producing a - // parse tree like: - // - // job_list - // switch_job - // - // normal_job - // cas - // - // And so we will think that the 'cas' job is at the same level as the switch. - // To address this, if we see that the switch statement was not closed, do not - // decrement the indent afterwards. - inc = 1; - dec = node.parent->as()->end.unsourced ? 0 : 1; - break; - case type_t::token_base: { - auto tok = node.as(); - if (node.parent->type == type_t::begin_header && - tok->type == parse_token_type_t::end) { - // The newline after "begin" is optional, so it is part of the header. - // The header is not in the indented block, so indent the newline here. - if (node.source(src) == L"\n") { - inc = 1; - dec = 1; - } - } - break; - } - default: - break; - } - - auto range = node.source_range(); - if (range.length > 0 && node.category == category_t::leaf) { - record_line_continuations_until(range.start); - std::fill(indents.begin() + last_leaf_end, indents.begin() + range.start, - last_indent); - } - - indent += inc; - - // If we increased the indentation, apply it to the remainder of the string, even if the - // list is empty. For example (where _ represents the cursor): - // - // if foo - // _ - // - // we want to indent the newline. - if (inc) { - last_indent = indent; - } - - // If this is a leaf node, apply the current indentation. - if (node.category == category_t::leaf && range.length > 0) { - std::fill(indents.begin() + range.start, indents.begin() + range.end(), indent); - last_leaf_end = range.start + range.length; - last_indent = indent; - } - - node_visitor(*this).accept_children_of(&node); - indent -= dec; - } - - /// \return whether a maybe_newlines node contains at least one newline. - bool has_newline(const maybe_newlines_t &nls) const { - return nls.source(src).find(L'\n') != wcstring::npos; - } - - void record_line_continuations_until(size_t offset) { - wcstring gap_text = src.substr(last_leaf_end, offset - last_leaf_end); - size_t escaped_nl = gap_text.find(L"\\\n"); - if (escaped_nl == wcstring::npos) return; - auto line_end = gap_text.begin() + escaped_nl; - if (std::find(gap_text.begin(), line_end, L'#') != line_end) return; - auto end = src.begin() + offset; - auto newline = src.begin() + last_leaf_end + escaped_nl + 1; - // The gap text might contain multiple newlines if there are multiple lines that - // don't contain an AST node, for example, comment lines, or lines containing only - // the escaped newline. - do { - line_continuations.push_back(newline - src.begin()); - newline = std::find(newline + 1, end, L'\n'); - } while (newline != end); - } - - // The one-past-the-last index of the most recently encountered leaf node. - // We use this to populate the indents even if there's no tokens in the range. - size_t last_leaf_end{0}; - - // The last indent which we assigned. - int last_indent{-1}; - - // The source we are indenting. - const wcstring &src; - - // List of indents, which we populate. - std::vector &indents; - - // Initialize our starting indent to -1, as our top-level node is a job list which - // will immediately increment it. - int indent{-1}; - - // List of locations of escaped newline characters. - std::vector line_continuations; - }; + ast_parse(src, parse_flag_continue_after_error | parse_flag_include_comments | + parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated); indent_visitor_t iv(src, indents); - node_visitor(iv).accept(ast.top()); + iv.visitor->visit(*ast->top()); iv.record_line_continuations_until(indents.size()); std::fill(indents.begin() + iv.last_leaf_end, indents.end(), iv.last_indent); @@ -838,8 +815,9 @@ bool parse_util_argument_is_help(const wcstring &s) { return s == L"-h" || s == // \return a pointer to the first argument node of an argument_or_redirection_list_t, or nullptr if // there are no arguments. static const ast::argument_t *get_first_arg(const ast::argument_or_redirection_list_t &list) { - for (const ast::argument_or_redirection_t &v : list) { - if (v.is_argument()) return &v.argument(); + for (size_t i = 0; i < list.count(); i++) { + const ast::argument_or_redirection_t *v = list.at(i); + if (v->is_argument()) return &v->argument(); } return nullptr; } @@ -953,10 +931,10 @@ void parse_util_expand_variable_error(const wcstring &token, size_t global_token parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argument_t &arg, const wcstring &arg_src, parse_error_list_t *out_errors) { - maybe_t source_range = arg.try_source_range(); - if (!source_range.has_value()) return 0; + if (!arg.try_source_range()) return 0; + auto source_range = arg.source_range(); - size_t source_start = source_range->start; + size_t source_start = source_range.start; parser_test_error_bits_t err = 0; auto check_subtoken = [&arg_src, &out_errors, source_start](size_t begin, size_t end) -> int { @@ -1062,8 +1040,8 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen static bool detect_errors_in_backgrounded_job(const ast::job_pipeline_t &job, parse_error_list_t *parse_errors) { using namespace ast; - auto source_range = job.try_source_range(); - if (!source_range) return false; + if (!job.try_source_range()) return false; + auto source_range = job.source_range(); bool errored = false; // Disallow background in the following cases: @@ -1071,16 +1049,16 @@ static bool detect_errors_in_backgrounded_job(const ast::job_pipeline_t &job, // foo & ; or bar // if foo & ; end // while foo & ; end - const job_conjunction_t *job_conj = job.parent->try_as(); + const job_conjunction_t *job_conj = job.ptr()->parent()->try_as_job_conjunction(); if (!job_conj) return false; - if (job_conj->parent->try_as()) { - errored = append_syntax_error(parse_errors, source_range->start, source_range->length, + if (job_conj->ptr()->parent()->try_as_if_clause()) { + errored = append_syntax_error(parse_errors, source_range.start, source_range.length, BACKGROUND_IN_CONDITIONAL_ERROR_MSG); - } else if (job_conj->parent->try_as()) { - errored = append_syntax_error(parse_errors, source_range->start, source_range->length, + } else if (job_conj->ptr()->parent()->try_as_while_header()) { + errored = append_syntax_error(parse_errors, source_range.start, source_range.length, BACKGROUND_IN_CONDITIONAL_ERROR_MSG); - } else if (const ast::job_list_t *jlist = job_conj->parent->try_as()) { + } else if (const ast::job_list_t *jlist = job_conj->ptr()->parent()->try_as_job_list()) { // This isn't very complete, e.g. we don't catch 'foo & ; not and bar'. // Find the index of ourselves in the job list. size_t index; @@ -1091,13 +1069,14 @@ static bool detect_errors_in_backgrounded_job(const ast::job_pipeline_t &job, // Try getting the next job and check its decorator. if (const job_conjunction_t *next = jlist->at(index + 1)) { - if (const keyword_base_t *deco = next->decorator.contents.get()) { + if (next->has_decorator()) { + const auto &deco = next->decorator(); assert( - (deco->kw == parse_keyword_t::kw_and || deco->kw == parse_keyword_t::kw_or) && + (deco.kw() == parse_keyword_t::kw_and || deco.kw() == parse_keyword_t::kw_or) && "Unexpected decorator keyword"); - const wchar_t *deco_name = (deco->kw == parse_keyword_t::kw_and ? L"and" : L"or"); - errored = append_syntax_error(parse_errors, deco->source_range().start, - deco->source_range().length, + const wchar_t *deco_name = (deco.kw() == parse_keyword_t::kw_and ? L"and" : L"or"); + errored = append_syntax_error(parse_errors, deco.source_range().start, + deco.source_range().length, BOOL_AFTER_BACKGROUND_ERROR_MSG, deco_name); } } @@ -1119,27 +1098,28 @@ static bool detect_errors_in_decorated_statement(const wcstring &buff_src, // Determine if the first argument is help. bool first_arg_is_help = false; - if (const auto *arg = get_first_arg(dst.args_or_redirs)) { - const wcstring &arg_src = arg->source(buff_src, storage); + if (const auto *arg = get_first_arg(dst.args_or_redirs())) { + wcstring arg_src = *arg->source(buff_src); + *storage = arg_src; first_arg_is_help = parse_util_argument_is_help(arg_src); } // Get the statement we are part of. - const statement_t *st = dst.parent->as(); + const statement_t &st = dst.ptr()->parent()->as_statement(); // Walk up to the job. const ast::job_pipeline_t *job = nullptr; - for (const node_t *cursor = st; job == nullptr; cursor = cursor->parent) { - assert(cursor && "Reached root without finding a job"); - job = cursor->try_as(); + for (auto cursor = dst.ptr()->parent(); job == nullptr; cursor = cursor->parent()) { + assert(cursor->has_value() && "Reached root without finding a job"); + job = cursor->try_as_job_pipeline(); } assert(job && "Should have found the job"); // Check our pipeline position. pipeline_position_t pipe_pos; - if (job->continuation.empty()) { + if (job->continuation().empty()) { pipe_pos = pipeline_position_t::none; - } else if (&job->statement == st) { + } else if (&job->statement() == &st) { pipe_pos = pipeline_position_t::first; } else { pipe_pos = pipeline_position_t::subsequent; @@ -1158,7 +1138,8 @@ static bool detect_errors_in_decorated_statement(const wcstring &buff_src, if (pipe_pos == pipeline_position_t::subsequent) { // check if our command is 'and' or 'or'. This is very clumsy; we don't catch e.g. quoted // commands. - const wcstring &command = dst.command.source(buff_src, storage); + wcstring command = *dst.command().source(buff_src); + *storage = command; if (command == L"and" || command == L"or") { errored = append_syntax_error(parse_errors, source_start, source_length, INVALID_PIPELINE_CMD_ERR_MSG, command.c_str()); @@ -1174,14 +1155,16 @@ static bool detect_errors_in_decorated_statement(const wcstring &buff_src, // $status specifically is invalid as a command, // to avoid people trying `if $status`. // We see this surprisingly regularly. - const wcstring &com = dst.command.source(buff_src, storage); + wcstring com = *dst.command().source(buff_src); + *storage = com; if (com == L"$status") { errored = append_syntax_error(parse_errors, source_start, source_length, _(L"$status is not valid as a command. See `help conditions`")); } - const wcstring &unexp_command = dst.command.source(buff_src, storage); + wcstring unexp_command = *dst.command().source(buff_src); + *storage = unexp_command; if (!unexp_command.empty()) { // Check that we can expand the command. // Make a new error list so we can fix the offset for just those, then append later. @@ -1207,15 +1190,15 @@ static bool detect_errors_in_decorated_statement(const wcstring &buff_src, // loop from the ancestor alone; we need the header. That is, we hit a // block_statement, and have to check its header. bool found_loop = false; - for (const node_t *ancestor = &dst; ancestor != nullptr; ancestor = ancestor->parent) { - const auto *block = ancestor->try_as(); + for (auto ancestor = dst.ptr(); ancestor->has_value(); ancestor = ancestor->parent()) { + const auto *block = ancestor->try_as_block_statement(); if (!block) continue; - if (block->header->type == type_t::for_header || - block->header->type == type_t::while_header) { + if (block->header().ptr()->typ() == type_t::for_header || + block->header().ptr()->typ() == type_t::while_header) { // This is a loop header, so we can break or continue. found_loop = true; break; - } else if (block->header->type == type_t::function_header) { + } else if (block->header().ptr()->typ() == type_t::function_header) { // This is a function header, so we cannot break or // continue. We stop our search here. found_loop = false; @@ -1245,7 +1228,7 @@ static bool detect_errors_in_decorated_statement(const wcstring &buff_src, // The expansion errors here go from the *command* onwards, // so we need to offset them by the *command* offset, // excluding the decoration. - new_errors->offset_source_start(dst.command.source_range().start); + new_errors->offset_source_start(dst.command().source_range().start); parse_errors->append(&*new_errors); } } @@ -1289,23 +1272,26 @@ parser_test_error_bits_t parse_util_detect_errors(const ast::ast_t &ast, const w // Verify no variable expansions. wcstring storage; - for (const node_t &node : ast) { - if (const job_continuation_t *jc = node.try_as()) { + for (auto ast_traversal = new_ast_traversal(*ast.top());;) { + auto node = ast_traversal->next(); + if (!node->has_value()) break; + if (const auto *jc = node->try_as_job_continuation()) { // Somewhat clumsy way of checking for a statement without source in a pipeline. // See if our pipe has source but our statement does not. - if (!jc->pipe.unsourced && !jc->statement.try_source_range().has_value()) { + if (jc->pipe().ptr()->has_source() && !jc->statement().ptr()->try_source_range()) { has_unclosed_pipe = true; } - } else if (const auto *jcc = node.try_as()) { + } else if (const auto *jcc = node->try_as_job_conjunction_continuation()) { // Somewhat clumsy way of checking for a job without source in a conjunction. // See if our conjunction operator (&& or ||) has source but our job does not. - if (!jcc->conjunction.unsourced && !jcc->job.try_source_range().has_value()) { + if (jcc->conjunction().ptr()->has_source() && !jcc->job().try_source_range()) { has_unclosed_conjunction = true; } - } else if (const argument_t *arg = node.try_as()) { - const wcstring &arg_src = arg->source(buff_src, &storage); + } else if (const argument_t *arg = node->try_as_argument()) { + wcstring arg_src = *arg->source(buff_src); + storage = arg_src; res |= parse_util_detect_errors_in_argument(*arg, arg_src, out_errors); - } else if (const ast::job_pipeline_t *job = node.try_as()) { + } else if (const ast::job_pipeline_t *job = node->try_as_job_pipeline()) { // Disallow background in the following cases: // // foo & ; and bar @@ -1313,23 +1299,24 @@ parser_test_error_bits_t parse_util_detect_errors(const ast::ast_t &ast, const w // if foo & ; end // while foo & ; end // If it's not a background job, nothing to do. - if (job->bg) { + if (job->has_bg()) { errored |= detect_errors_in_backgrounded_job(*job, out_errors); } - } else if (const ast::decorated_statement_t *stmt = node.try_as()) { + } else if (const auto *stmt = node->try_as_decorated_statement()) { errored |= detect_errors_in_decorated_statement(buff_src, *stmt, &storage, out_errors); - } else if (const auto *block = node.try_as()) { + } else if (const auto *block = node->try_as_block_statement()) { // If our 'end' had no source, we are unsourced. - if (block->end.unsourced) has_unclosed_block = true; - errored |= detect_errors_in_block_redirection_list(block->args_or_redirs, out_errors); - } else if (const auto *ifs = node.try_as()) { + if (!block->end().ptr()->has_source()) has_unclosed_block = true; + errored |= detect_errors_in_block_redirection_list(block->args_or_redirs(), out_errors); + } else if (const auto *ifs = node->try_as_if_statement()) { // If our 'end' had no source, we are unsourced. - if (ifs->end.unsourced) has_unclosed_block = true; - errored |= detect_errors_in_block_redirection_list(ifs->args_or_redirs, out_errors); - } else if (const auto *switchs = node.try_as()) { + if (!ifs->end().ptr()->has_source()) has_unclosed_block = true; + errored |= detect_errors_in_block_redirection_list(ifs->args_or_redirs(), out_errors); + } else if (const auto *switchs = node->try_as_switch_statement()) { // If our 'end' had no source, we are unsourced. - if (switchs->end.unsourced) has_unclosed_block = true; - errored |= detect_errors_in_block_redirection_list(switchs->args_or_redirs, out_errors); + if (!switchs->end().ptr()->has_source()) has_unclosed_block = true; + errored |= + detect_errors_in_block_redirection_list(switchs->args_or_redirs(), out_errors); } } @@ -1354,7 +1341,7 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, // Parse the input string into an ast. Some errors are detected here. using namespace ast; auto parse_errors = new_parse_error_list(); - auto ast = ast_t::parse(buff_src, parse_flags, &*parse_errors); + auto ast = ast_parse(buff_src, parse_flags, &*parse_errors); if (allow_incomplete) { // Issue #1238: If the only error was unterminated quote, then consider this to have parsed // successfully. @@ -1384,7 +1371,7 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, } // Defer to the tree-walking version. - return parse_util_detect_errors(ast, buff_src, out_errors); + return parse_util_detect_errors(*ast, buff_src, out_errors); } maybe_t parse_util_detect_errors_in_argument_list(const wcstring &arg_list_src, @@ -1399,16 +1386,18 @@ maybe_t parse_util_detect_errors_in_argument_list(const wcstring &arg_ // Parse the string as a freestanding argument list. using namespace ast; auto errors = new_parse_error_list(); - auto ast = ast_t::parse_argument_list(arg_list_src, parse_flag_none, &*errors); + auto ast = ast_parse_argument_list(arg_list_src, parse_flag_none, &*errors); if (!errors->empty()) { return get_error_text(*errors); } // Get the root argument list and extract arguments from it. // Test each of these. - for (const argument_t &arg : ast.top()->as()->arguments) { - const wcstring arg_src = arg.source(arg_list_src); - if (parse_util_detect_errors_in_argument(arg, arg_src, &*errors)) { + const auto &args = ast->top()->as_freestanding_argument_list().arguments(); + for (size_t i = 0; i < args.count(); i++) { + const argument_t *arg = args.at(i); + const wcstring arg_src = *arg->source(arg_list_src); + if (parse_util_detect_errors_in_argument(*arg, arg_src, &*errors)) { return get_error_text(*errors); } } diff --git a/src/parse_util.h b/src/parse_util.h index 54f492378..b589e2278 100644 --- a/src/parse_util.h +++ b/src/parse_util.h @@ -6,14 +6,12 @@ #include +#include "ast.h" #include "common.h" +#include "cxx.h" #include "maybe.h" #include "parse_constants.h" -namespace ast { -struct argument_t; -class ast_t; -} // namespace ast struct Tok; using tok_t = Tok; @@ -116,6 +114,47 @@ wchar_t parse_util_get_quote_type(const wcstring &cmd, size_t pos); wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote, bool no_tilde = false); +// Visit all of our nodes. When we get a job_list or case_item_list, increment indent while +// visiting its children. +struct IndentVisitor; +struct indent_visitor_t { + indent_visitor_t(const wcstring &src, std::vector &indents); + indent_visitor_t(const indent_visitor_t &) = delete; + indent_visitor_t &operator=(const indent_visitor_t &) = delete; + + int visit(const void *node); + void did_visit(int dec); + +#if INCLUDE_RUST_HEADERS + /// \return whether a maybe_newlines node contains at least one newline. + bool has_newline(const ast::maybe_newlines_t &nls) const; + + void record_line_continuations_until(size_t offset); + + // The one-past-the-last index of the most recently encountered leaf node. + // We use this to populate the indents even if there's no tokens in the range. + size_t last_leaf_end{0}; + + // The last indent which we assigned. + int last_indent{-1}; + + // The source we are indenting. + const wcstring &src; + + // List of indents, which we populate. + std::vector &indents; + + // Initialize our starting indent to -1, as our top-level node is a job list which + // will immediately increment it. + int indent{-1}; + + // List of locations of escaped newline characters. + std::vector line_continuations; + + rust::Box visitor; +#endif +}; + /// Given a string, parse it as fish code and then return the indents. The return value has the same /// size as the string. std::vector parse_util_compute_indents(const wcstring &src); diff --git a/src/parser.cpp b/src/parser.cpp index d5d2e5ed0..f2f9160ed 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -189,18 +189,18 @@ completion_list_t parser_t::expand_argument_list(const wcstring &arg_list_src, expand_flags_t eflags, const operation_context_t &ctx) { // Parse the string as an argument list. - auto ast = ast::ast_t::parse_argument_list(arg_list_src); - if (ast.errored()) { + auto ast = ast_parse_argument_list(arg_list_src); + if (ast->errored()) { // Failed to parse. Here we expect to have reported any errors in test_args. return {}; } // Get the root argument list and extract arguments from it. completion_list_t result; - const ast::freestanding_argument_list_t *list = - ast.top()->as(); - for (const ast::argument_t &arg : list->arguments) { - wcstring arg_src = arg.source(arg_list_src); + const ast::freestanding_argument_list_t &list = ast->top()->as_freestanding_argument_list(); + for (size_t i = 0; i < list.arguments().count(); i++) { + const ast::argument_t &arg = *list.arguments().at(i); + wcstring arg_src = *arg.source(arg_list_src); if (expand_string(arg_src, &result, eflags, ctx) == expand_result_t::error) { break; // failed to expand a string } @@ -528,8 +528,9 @@ eval_res_t parser_t::eval(const wcstring &cmd, const io_chain_t &io, const job_group_ref_t &job_group, enum block_type_t block_type) { // Parse the source into a tree, if we can. auto error_list = new_parse_error_list(); - if (parsed_source_ref_t ps = parse_source(wcstring{cmd}, parse_flag_none, &*error_list)) { - return this->eval(ps, io, job_group, block_type); + auto ps = parse_source(wcstring{cmd}, parse_flag_none, &*error_list); + if (ps->has_value()) { + return this->eval(*ps, io, job_group, block_type); } else { // Get a backtrace. This includes the message. wcstring backtrace_and_desc; @@ -550,10 +551,10 @@ eval_res_t parser_t::eval_string_ffi1(const wcstring &cmd) { return eval(cmd, io eval_res_t parser_t::eval(const parsed_source_ref_t &ps, const io_chain_t &io, const job_group_ref_t &job_group, enum block_type_t block_type) { assert(block_type == block_type_t::top || block_type == block_type_t::subst); - const auto *job_list = ps->ast.top()->as(); - if (!job_list->empty()) { + const auto &job_list = ps.ast().top()->as_job_list(); + if (!job_list.empty()) { // Execute the top job list. - return this->eval_node(ps, *job_list, io, job_group, block_type); + return this->eval_node(ps, job_list, io, job_group, block_type); } else { auto status = proc_status_t::from_exit_code(get_last_status()); bool break_expand = false; @@ -618,8 +619,8 @@ eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, const T &node, // Create and set a new execution context. using exc_ctx_ref_t = std::unique_ptr; - scoped_push exc(&execution_context, - make_unique(ps, op_ctx, block_io)); + scoped_push exc( + &execution_context, make_unique(ps.clone(), op_ctx, block_io)); // Check the exec count so we know if anything got executed. const size_t prev_exec_count = libdata().exec_count; diff --git a/src/proc.cpp b/src/proc.cpp index 12b7198a0..1178c28e6 100644 --- a/src/proc.cpp +++ b/src/proc.cpp @@ -255,7 +255,9 @@ static void handle_child_status(const shared_ptr &job, process_t *proc, } } -process_t::process_t() : proc_redirection_specs_(new_redirection_spec_list()) {} +process_t::process_t() + : block_node_source(empty_parsed_source_ref()), + proc_redirection_specs_(new_redirection_spec_list()) {} void process_t::check_generations_before_launch() { gens_ = topic_monitor_principal().current_generations(); diff --git a/src/proc.h b/src/proc.h index b597f9858..ae321b152 100644 --- a/src/proc.h +++ b/src/proc.h @@ -17,7 +17,9 @@ #include #include +#include "ast.h" #include "common.h" +#include "cxx.h" #include "maybe.h" #include "parse_tree.h" #include "redirection.h" @@ -53,10 +55,6 @@ using clock_ticks_t = uint64_t; /// This uses sysconf(_SC_CLK_TCK) to convert to seconds. double clock_ticks_to_seconds(clock_ticks_t ticks); -namespace ast { -struct statement_t; -} - struct job_group_t; using job_group_ref_t = std::shared_ptr; @@ -255,7 +253,7 @@ class process_t { /// For internal block processes only, the node of the statement. /// This is always either block, ifs, or switchs, never boolean or decorated. - parsed_source_ref_t block_node_source{}; + rust::Box block_node_source; const ast::statement_t *internal_block_node{}; struct concrete_assignment { diff --git a/src/reader.cpp b/src/reader.cpp index be0992c06..3e57f87e2 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -1421,13 +1421,13 @@ static std::vector extract_tokens(const wcstring &str) { parse_tree_flags_t ast_flags = parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated; - auto ast = ast::ast_t::parse(str, ast_flags); + auto ast = ast_parse(str, ast_flags); // Helper to check if a node is the command portion of an undecorated statement. - auto is_command = [&](const node_t *node) { - for (const node_t *cursor = node; cursor; cursor = cursor->parent) { - if (const auto *stmt = cursor->try_as()) { - if (!stmt->opt_decoration && node == &stmt->command) { + auto is_command = [&](const ast::node_t &node) { + for (auto cursor = node.ptr(); cursor->has_value(); cursor = cursor->parent()) { + if (const auto *stmt = cursor->try_as_decorated_statement()) { + if (!stmt->has_opt_decoration() && node.pointer_eq(*stmt->command().ptr())) { return true; } } @@ -1437,10 +1437,11 @@ static std::vector extract_tokens(const wcstring &str) { wcstring cmdsub_contents; std::vector result; - traversal_t tv = ast.walk(); - while (const node_t *node = tv.next()) { + for (auto tv = new_ast_traversal(*ast->top());;) { + auto node = tv->next(); + if (!node->has_value()) break; // We are only interested in leaf nodes with source. - if (node->category != category_t::leaf) continue; + if (node->category() != category_t::leaf) continue; source_range_t r = node->source_range(); if (r.length == 0) continue; @@ -1463,7 +1464,7 @@ static std::vector extract_tokens(const wcstring &str) { if (!has_cmd_subs) { // Common case of no command substitutions in this leaf node. - result.push_back(positioned_token_t{r, is_command(node)}); + result.push_back(positioned_token_t{r, is_command(*node)}); } } return result; @@ -4739,16 +4740,16 @@ static int read_ni(parser_t &parser, int fd, const io_chain_t &io) { // Parse into an ast and detect errors. auto errors = new_parse_error_list(); - auto ast = ast::ast_t::parse(str, parse_flag_none, &*errors); - bool errored = ast.errored(); + auto ast = ast_parse(str, parse_flag_none, &*errors); + bool errored = ast->errored(); if (!errored) { - errored = parse_util_detect_errors(ast, str, &*errors); + errored = parse_util_detect_errors(*ast, str, &*errors); } if (!errored) { // Construct a parsed source ref. // Be careful to transfer ownership, this could be a very large string. - parsed_source_ref_t ps = std::make_shared(std::move(str), std::move(ast)); - parser.eval(ps, io); + auto ps = new_parsed_source_ref(str, *ast); + parser.eval(*ps, io); return 0; } else { wcstring sb;