Port AST to Rust

The translation is fairly direct though it adds some duplication, for example
there are multiple "match" statements that mimic function overloading.

Rust has no overloading, and we cannot have generic methods in the Node trait
(due to a Rust limitation, the error is like "cannot be made into an object")
so we include the type name in method names.

Give clients like "indent_visitor_t" a Rust companion ("IndentVisitor")
that takes care of the AST traversal while the AST consumption remains
in C++ for now.  In future, "IndentVisitor" should absorb the entirety of
"indent_visitor_t".  This pattern requires that "fish_indent" be exposed
includable header to the CXX bridge.

Alternatively, we could define FFI wrappers for recursive AST traversal.

Rust requires we separate the AST visitors for "mut" and "const"
scenarios. Take this opportunity to concretize both visitors:

The only client that requires mutable access is the populator.  To match the
structure of the C++ populator which makes heavy use of function overloading,
we need to add a bunch of functions to the trait. Since there is no other
mutable visit, this seems acceptable.

The "const" visitors never use "will_visit_fields_of()" or
"did_visit_fields_of()", so remove them (though this is debatable).

Like in the C++ implementation, the AST nodes themselves are largely defined
via macros.  Union fields like "Statement" and "ArgumentOrRedirection"
do currently not use macros but may in future.

This commit also introduces a precedent for a type that is defined in one
CXX bridge and used in another one - "ParseErrorList".  To make this work
we need to manually define "ExternType".

There is one annoyance with CXX: functions that take explicit lifetime
parameters require to be marked as unsafe. This makes little sense
because functions that return `&Foo` with implicit lifetime can be
misused the same way on the C++ side.

One notable change is that we cannot directly port "find_block_open_keyword()"
(which is used to compute an error) because it relies on the stack of visited
nodes. We cannot modify a stack of node references while we do the "mut"
walk. Happily, an idiomatic solution is easy: we can tell the AST visitor
to backtrack to the parent node and create the error there.

Since "node_t::accept_base" is no longer a template we don't need the
"node_visitation_t" trampoline anymore.

The added copying at the FFI boundary makes things slower (memcpy dominates
the profile) but it's not unusable, which is good news:

    $ hyperfine ./fish.{old,new}" -c 'source ../share/completions/git.fish'"
    Benchmark 1: ./fish.old -c 'source ../share/completions/git.fish'
      Time (mean ± σ):     195.5 ms ±   2.9 ms    [User: 190.1 ms, System: 4.4 ms]
      Range (min … max):   193.2 ms … 205.1 ms    15 runs

    Benchmark 2: ./fish.new -c 'source ../share/completions/git.fish'
      Time (mean ± σ):     677.5 ms ±  62.0 ms    [User: 665.4 ms, System: 10.0 ms]
      Range (min … max):   611.7 ms … 805.5 ms    10 runs

    Summary
      './fish.old -c 'source ../share/completions/git.fish'' ran
        3.47 ± 0.32 times faster than './fish.new -c 'source ../share/completions/git.fish''

Leftovers:
- Enum variants are still snakecase; I didn't get around to changing this yet.
- "ast_type_to_string()" still returns a snakecase name. This could be
  changed since  it's not user visible.
This commit is contained in:
Johannes Altmanninger 2023-04-02 16:42:59 +02:00
parent 915db44fbd
commit 971d257e67
38 changed files with 7685 additions and 3794 deletions

View file

@ -115,12 +115,12 @@ set(FISH_BUILTIN_SRCS
set(FISH_SRCS set(FISH_SRCS
src/ast.cpp src/autoload.cpp src/color.cpp src/common.cpp src/complete.cpp src/ast.cpp src/autoload.cpp src/color.cpp src/common.cpp src/complete.cpp
src/env.cpp src/env_dispatch.cpp src/env_universal_common.cpp src/event.cpp src/env.cpp src/env_dispatch.cpp src/env_universal_common.cpp src/event.cpp
src/exec.cpp src/expand.cpp src/fallback.cpp src/fish_version.cpp src/exec.cpp src/expand.cpp src/fallback.cpp src/fish_indent_common.cpp src/fish_version.cpp
src/flog.cpp src/function.cpp src/highlight.cpp src/flog.cpp src/function.cpp src/highlight.cpp
src/history.cpp src/history_file.cpp src/input.cpp src/input_common.cpp src/history.cpp src/history_file.cpp src/input.cpp src/input_common.cpp
src/io.cpp src/iothread.cpp src/kill.cpp src/io.cpp src/iothread.cpp src/kill.cpp
src/null_terminated_array.cpp src/operation_context.cpp src/output.cpp src/null_terminated_array.cpp src/operation_context.cpp src/output.cpp
src/pager.cpp src/parse_execution.cpp src/parse_tree.cpp src/parse_util.cpp src/pager.cpp src/parse_execution.cpp src/parse_util.cpp
src/parser.cpp src/parser_keywords.cpp src/path.cpp src/postfork.cpp src/parser.cpp src/parser_keywords.cpp src/path.cpp src/postfork.cpp
src/proc.cpp src/re.cpp src/reader.cpp src/screen.cpp src/proc.cpp src/re.cpp src/reader.cpp src/screen.cpp
src/signals.cpp src/tinyexpr.cpp src/utf8.cpp src/signals.cpp src/tinyexpr.cpp src/utf8.cpp

View file

@ -26,6 +26,7 @@ fn main() -> miette::Result<()> {
// This must come before autocxx so that cxx can emit its cxx.h header. // This must come before autocxx so that cxx can emit its cxx.h header.
let source_files = vec![ let source_files = vec![
"src/abbrs.rs", "src/abbrs.rs",
"src/ast.rs",
"src/event.rs", "src/event.rs",
"src/common.rs", "src/common.rs",
"src/fd_monitor.rs", "src/fd_monitor.rs",
@ -33,9 +34,13 @@ fn main() -> miette::Result<()> {
"src/fds.rs", "src/fds.rs",
"src/ffi_init.rs", "src/ffi_init.rs",
"src/ffi_tests.rs", "src/ffi_tests.rs",
"src/fish_indent.rs",
"src/future_feature_flags.rs", "src/future_feature_flags.rs",
"src/highlight.rs",
"src/job_group.rs", "src/job_group.rs",
"src/parse_constants.rs", "src/parse_constants.rs",
"src/parse_tree.rs",
"src/parse_util.rs",
"src/redirection.rs", "src/redirection.rs",
"src/smoke.rs", "src/smoke.rs",
"src/termsize.rs", "src/termsize.rs",

5708
fish-rust/src/ast.rs Normal file

File diff suppressed because it is too large Load diff

View file

@ -24,6 +24,7 @@ include_cpp! {
#include "event.h" #include "event.h"
#include "fallback.h" #include "fallback.h"
#include "fds.h" #include "fds.h"
#include "fish_indent_common.h"
#include "flog.h" #include "flog.h"
#include "function.h" #include "function.h"
#include "highlight.h" #include "highlight.h"
@ -57,6 +58,7 @@ include_cpp! {
generate!("get_flog_file_fd") generate!("get_flog_file_fd")
generate!("log_extra_to_flog_file") generate!("log_extra_to_flog_file")
generate!("indent_visitor_t")
generate!("parse_util_unescape_wildcards") generate!("parse_util_unescape_wildcards")
generate!("fish_wcwidth") generate!("fish_wcwidth")
@ -73,6 +75,8 @@ include_cpp! {
generate!("library_data_t") generate!("library_data_t")
generate_pod!("library_data_pod_t") generate_pod!("library_data_pod_t")
generate!("highlighter_t")
generate!("proc_wait_any") generate!("proc_wait_any")
generate!("output_stream_t") generate!("output_stream_t")
@ -89,6 +93,8 @@ include_cpp! {
generate!("builtin_print_error_trailer") generate!("builtin_print_error_trailer")
generate!("builtin_get_names_ffi") generate!("builtin_get_names_ffi")
generate!("pretty_printer_t")
generate!("escape_string") generate!("escape_string")
generate!("sig2wcs") generate!("sig2wcs")
generate!("wcs2sig") generate!("wcs2sig")

View file

@ -0,0 +1,92 @@
use crate::ast::{self, Category, Node, NodeFfi, NodeVisitor, Type};
use crate::ffi::pretty_printer_t;
use crate::parse_constants::ParseTokenType;
use std::pin::Pin;
struct PrettyPrinter<'a> {
companion: Pin<&'a mut pretty_printer_t>,
}
impl<'a> NodeVisitor<'a> for &mut PrettyPrinter<'a> {
// Default implementation is to just visit children.
fn visit(&mut self, node: &'a dyn Node) {
let ffi_node = NodeFfi::new(node);
// Leaf nodes we just visit their text.
if node.as_keyword().is_some() {
self.companion
.as_mut()
.emit_node_text((&ffi_node as *const NodeFfi<'_>).cast());
return;
}
if let Some(token) = node.as_token() {
if token.token_type() == ParseTokenType::end {
self.companion
.as_mut()
.visit_semi_nl((&ffi_node as *const NodeFfi<'_>).cast());
return;
}
self.companion
.as_mut()
.emit_node_text((&ffi_node as *const NodeFfi<'_>).cast());
return;
}
match node.typ() {
Type::argument | Type::variable_assignment => {
self.companion
.as_mut()
.emit_node_text((&ffi_node as *const NodeFfi<'_>).cast());
}
Type::redirection => {
self.companion.as_mut().visit_redirection(
(node.as_redirection().unwrap() as *const ast::Redirection).cast(),
);
}
Type::maybe_newlines => {
self.companion.as_mut().visit_maybe_newlines(
(node.as_maybe_newlines().unwrap() as *const ast::MaybeNewlines).cast(),
);
}
Type::begin_header => {
// 'begin' does not require a newline after it, but we insert one.
node.accept(self, false);
self.companion.as_mut().visit_begin_header();
}
_ => {
// For branch and list nodes, default is to visit their children.
if [Category::branch, Category::list].contains(&node.category()) {
node.accept(self, false);
return;
}
panic!("unexpected node type");
}
}
}
}
#[cxx::bridge]
#[allow(clippy::needless_lifetimes)] // false positive
mod fish_indent_ffi {
extern "C++" {
include!("ast.h");
include!("fish_indent_common.h");
type pretty_printer_t = crate::ffi::pretty_printer_t;
type Ast = crate::ast::Ast;
type NodeFfi<'a> = crate::ast::NodeFfi<'a>;
}
extern "Rust" {
type PrettyPrinter<'a>;
unsafe fn new_pretty_printer(
companion: Pin<&mut pretty_printer_t>,
) -> Box<PrettyPrinter<'_>>;
#[cxx_name = "visit"]
unsafe fn visit_ffi<'a>(self: &mut PrettyPrinter<'a>, node: &'a NodeFfi<'a>);
}
}
fn new_pretty_printer(companion: Pin<&mut pretty_printer_t>) -> Box<PrettyPrinter<'_>> {
Box::new(PrettyPrinter { companion })
}
impl<'a> PrettyPrinter<'a> {
fn visit_ffi(mut self: &mut PrettyPrinter<'a>, node: &'a NodeFfi<'a>) {
self.visit(node.as_node());
}
}

139
fish-rust/src/highlight.rs Normal file
View file

@ -0,0 +1,139 @@
use crate::ast::{
Argument, Ast, BlockStatement, BlockStatementHeaderVariant, DecoratedStatement, Keyword, Node,
NodeFfi, NodeVisitor, Redirection, Token, Type, VariableAssignment,
};
use crate::ffi::highlighter_t;
use crate::parse_constants::ParseTokenType;
use std::pin::Pin;
struct Highlighter<'a> {
companion: Pin<&'a mut highlighter_t>,
ast: &'a Ast,
}
impl<'a> Highlighter<'a> {
// Visit the children of a node.
fn visit_children(&mut self, node: &'a dyn Node) {
node.accept(self, false);
}
// AST visitor implementations.
fn visit_keyword(&mut self, node: &dyn Keyword) {
let ffi_node = NodeFfi::new(node.leaf_as_node_ffi());
self.companion
.as_mut()
.visit_keyword((&ffi_node as *const NodeFfi<'_>).cast());
}
fn visit_token(&mut self, node: &dyn Token) {
let ffi_node = NodeFfi::new(node.leaf_as_node_ffi());
self.companion
.as_mut()
.visit_token((&ffi_node as *const NodeFfi<'_>).cast());
}
fn visit_argument(&mut self, node: &Argument) {
self.companion
.as_mut()
.visit_argument((node as *const Argument).cast(), false, true);
}
fn visit_redirection(&mut self, node: &Redirection) {
self.companion
.as_mut()
.visit_redirection((node as *const Redirection).cast());
}
fn visit_variable_assignment(&mut self, node: &VariableAssignment) {
self.companion
.as_mut()
.visit_variable_assignment((node as *const VariableAssignment).cast());
}
fn visit_semi_nl(&mut self, node: &dyn Node) {
let ffi_node = NodeFfi::new(node);
self.companion
.as_mut()
.visit_semi_nl((&ffi_node as *const NodeFfi<'_>).cast());
}
fn visit_decorated_statement(&mut self, node: &DecoratedStatement) {
self.companion
.as_mut()
.visit_decorated_statement((node as *const DecoratedStatement).cast());
}
fn visit_block_statement(&mut self, node: &'a BlockStatement) {
match &*node.header {
BlockStatementHeaderVariant::None => panic!(),
BlockStatementHeaderVariant::ForHeader(node) => self.visit(node),
BlockStatementHeaderVariant::WhileHeader(node) => self.visit(node),
BlockStatementHeaderVariant::FunctionHeader(node) => self.visit(node),
BlockStatementHeaderVariant::BeginHeader(node) => self.visit(node),
}
self.visit(&node.args_or_redirs);
let pending_variables_count = self
.companion
.as_mut()
.visit_block_statement1((node as *const BlockStatement).cast());
self.visit(&node.jobs);
self.visit(&node.end);
self.companion
.as_mut()
.visit_block_statement2(pending_variables_count);
}
}
impl<'a> NodeVisitor<'a> for Highlighter<'a> {
fn visit(&mut self, node: &'a dyn Node) {
if let Some(keyword) = node.as_keyword() {
return self.visit_keyword(keyword);
}
if let Some(token) = node.as_token() {
if token.token_type() == ParseTokenType::end {
self.visit_semi_nl(node);
return;
}
self.visit_token(token);
return;
}
match node.typ() {
Type::argument => self.visit_argument(node.as_argument().unwrap()),
Type::redirection => self.visit_redirection(node.as_redirection().unwrap()),
Type::variable_assignment => {
self.visit_variable_assignment(node.as_variable_assignment().unwrap())
}
Type::decorated_statement => {
self.visit_decorated_statement(node.as_decorated_statement().unwrap())
}
Type::block_statement => self.visit_block_statement(node.as_block_statement().unwrap()),
// Default implementation is to just visit children.
_ => self.visit_children(node),
}
}
}
#[cxx::bridge]
#[allow(clippy::needless_lifetimes)] // false positive
mod highlighter_ffi {
extern "C++" {
include!("ast.h");
include!("highlight.h");
include!("parse_constants.h");
type highlighter_t = crate::ffi::highlighter_t;
type Ast = crate::ast::Ast;
type NodeFfi<'a> = crate::ast::NodeFfi<'a>;
}
extern "Rust" {
type Highlighter<'a>;
unsafe fn new_highlighter<'a>(
companion: Pin<&'a mut highlighter_t>,
ast: &'a Ast,
) -> Box<Highlighter<'a>>;
#[cxx_name = "visit_children"]
unsafe fn visit_children_ffi<'a>(self: &mut Highlighter<'a>, node: &'a NodeFfi<'a>);
}
}
fn new_highlighter<'a>(
companion: Pin<&'a mut highlighter_t>,
ast: &'a Ast,
) -> Box<Highlighter<'a>> {
Box::new(Highlighter { companion, ast })
}
impl<'a> Highlighter<'a> {
fn visit_children_ffi(&mut self, node: &'a NodeFfi<'a>) {
self.visit_children(node.as_node());
}
}

View file

@ -11,6 +11,7 @@
mod common; mod common;
mod abbrs; mod abbrs;
mod ast;
mod builtins; mod builtins;
mod color; mod color;
mod compat; mod compat;
@ -29,14 +30,18 @@ mod fds;
mod ffi; mod ffi;
mod ffi_init; mod ffi_init;
mod ffi_tests; mod ffi_tests;
mod fish_indent;
mod flog; mod flog;
mod future_feature_flags; mod future_feature_flags;
mod global_safety; mod global_safety;
mod highlight;
mod io; mod io;
mod job_group; mod job_group;
mod locale; mod locale;
mod nix; mod nix;
mod parse_constants; mod parse_constants;
mod parse_tree;
mod parse_util;
mod path; mod path;
mod re; mod re;
mod redirection; mod redirection;

View file

@ -5,6 +5,7 @@ use crate::tokenizer::variable_assignment_equals_pos;
use crate::wchar::{wstr, WString, L}; use crate::wchar::{wstr, WString, L};
use crate::wchar_ffi::{wcharz, WCharFromFFI, WCharToFFI}; use crate::wchar_ffi::{wcharz, WCharFromFFI, WCharToFFI};
use crate::wutil::{sprintf, wgettext_fmt}; use crate::wutil::{sprintf, wgettext_fmt};
use cxx::{type_id, ExternType};
use cxx::{CxxWString, UniquePtr}; use cxx::{CxxWString, UniquePtr};
use std::ops::{BitAnd, BitOr, BitOrAssign}; use std::ops::{BitAnd, BitOr, BitOrAssign};
use widestring_suffix::widestrs; use widestring_suffix::widestrs;
@ -616,8 +617,14 @@ fn token_type_user_presentable_description_ffi(
} }
/// TODO This should be type alias once we drop the FFI. /// TODO This should be type alias once we drop the FFI.
#[derive(Clone)]
pub struct ParseErrorList(pub Vec<ParseError>); pub struct ParseErrorList(pub Vec<ParseError>);
unsafe impl ExternType for ParseErrorList {
type Id = type_id!("ParseErrorList");
type Kind = cxx::kind::Opaque;
}
/// Helper function to offset error positions by the given amount. This is used when determining /// Helper function to offset error positions by the given amount. This is used when determining
/// errors in a substring of a larger source buffer. /// errors in a substring of a larger source buffer.
pub fn parse_error_offset_source_start(errors: &mut ParseErrorList, amt: usize) { pub fn parse_error_offset_source_start(errors: &mut ParseErrorList, amt: usize) {

190
fish-rust/src/parse_tree.rs Normal file
View file

@ -0,0 +1,190 @@
//! Programmatic representation of fish code.
use std::pin::Pin;
use std::rc::Rc;
use crate::ast::Ast;
use crate::parse_constants::{
token_type_user_presentable_description, ParseErrorCode, ParseErrorList, ParseKeyword,
ParseTokenType, ParseTreeFlags, SourceOffset, SourceRange, PARSE_FLAG_CONTINUE_AFTER_ERROR,
SOURCE_OFFSET_INVALID,
};
use crate::tokenizer::TokenizerError;
use crate::wchar::{wstr, WString, L};
use crate::wchar_ffi::{WCharFromFFI, WCharToFFI};
use crate::wutil::sprintf;
use cxx::{CxxWString, UniquePtr};
/// A struct representing the token type that we use internally.
#[derive(Clone, Copy)]
pub struct ParseToken {
/// The type of the token as represented by the parser
pub typ: ParseTokenType,
/// Any keyword represented by this token
pub keyword: ParseKeyword,
/// Hackish: whether the source contains a dash prefix
pub has_dash_prefix: bool,
/// Hackish: whether the source looks like '-h' or '--help'
pub is_help_argument: bool,
/// Hackish: if TOK_END, whether the source is a newline.
pub is_newline: bool,
// Hackish: whether this token is a string like FOO=bar
pub may_be_variable_assignment: bool,
/// If this is a tokenizer error, that error.
pub tok_error: TokenizerError,
pub source_start: SourceOffset,
pub source_length: SourceOffset,
}
impl ParseToken {
pub fn new(typ: ParseTokenType) -> Self {
ParseToken {
typ,
keyword: ParseKeyword::none,
has_dash_prefix: false,
is_help_argument: false,
is_newline: false,
may_be_variable_assignment: false,
tok_error: TokenizerError::none,
source_start: SOURCE_OFFSET_INVALID,
source_length: 0,
}
}
/// \return the source range.
/// Note the start may be invalid.
pub fn range(&self) -> SourceRange {
SourceRange::new(self.source_start, self.source_length)
}
/// \return whether we are a string with the dash prefix set.
pub fn is_dash_prefix_string(&self) -> bool {
self.typ == ParseTokenType::string && self.has_dash_prefix
}
/// Returns a string description of the given parse token.
pub fn describe(&self) -> WString {
let mut result = Into::<&'static wstr>::into(self.typ).to_owned();
if self.keyword != ParseKeyword::none {
result += &sprintf!(L!(" <%ls>"), Into::<&'static wstr>::into(self.keyword))[..]
}
result
}
pub fn user_presentable_description(&self) -> WString {
token_type_user_presentable_description(self.typ, self.keyword)
}
}
impl From<TokenizerError> for ParseErrorCode {
fn from(err: TokenizerError) -> Self {
match err {
TokenizerError::none => ParseErrorCode::none,
TokenizerError::unterminated_quote => ParseErrorCode::tokenizer_unterminated_quote,
TokenizerError::unterminated_subshell => {
ParseErrorCode::tokenizer_unterminated_subshell
}
TokenizerError::unterminated_slice => ParseErrorCode::tokenizer_unterminated_slice,
TokenizerError::unterminated_escape => ParseErrorCode::tokenizer_unterminated_escape,
_ => ParseErrorCode::tokenizer_other,
}
}
}
/// A type wrapping up a parse tree and the original source behind it.
pub struct ParsedSource {
src: WString,
src_ffi: UniquePtr<CxxWString>,
ast: Ast,
}
impl ParsedSource {
fn new(src: WString, ast: Ast) -> Self {
let src_ffi = src.to_ffi();
ParsedSource { src, src_ffi, ast }
}
}
pub type ParsedSourceRef = Option<Rc<ParsedSource>>;
/// Return a shared pointer to ParsedSource, or null on failure.
/// If parse_flag_continue_after_error is not set, this will return null on any error.
pub fn parse_source(
src: WString,
flags: ParseTreeFlags,
errors: &mut Option<ParseErrorList>,
) -> ParsedSourceRef {
let ast = Ast::parse(&src, flags, errors);
if ast.errored() && !(flags & PARSE_FLAG_CONTINUE_AFTER_ERROR) {
None
} else {
Some(Rc::new(ParsedSource::new(src, ast)))
}
}
struct ParsedSourceRefFFI(pub ParsedSourceRef);
#[cxx::bridge]
mod parse_tree_ffi {
extern "C++" {
include!("ast.h");
pub type Ast = crate::ast::Ast;
pub type ParseErrorList = crate::parse_constants::ParseErrorList;
}
extern "Rust" {
type ParsedSourceRefFFI;
fn empty_parsed_source_ref() -> Box<ParsedSourceRefFFI>;
fn has_value(&self) -> bool;
fn new_parsed_source_ref(src: &CxxWString, ast: Pin<&mut Ast>) -> Box<ParsedSourceRefFFI>;
#[cxx_name = "parse_source"]
fn parse_source_ffi(
src: &CxxWString,
flags: u8,
errors: *mut ParseErrorList,
) -> Box<ParsedSourceRefFFI>;
fn clone(self: &ParsedSourceRefFFI) -> Box<ParsedSourceRefFFI>;
fn src(self: &ParsedSourceRefFFI) -> &CxxWString;
fn ast(self: &ParsedSourceRefFFI) -> &Ast;
}
}
impl ParsedSourceRefFFI {
fn has_value(&self) -> bool {
self.0.is_some()
}
}
fn empty_parsed_source_ref() -> Box<ParsedSourceRefFFI> {
Box::new(ParsedSourceRefFFI(None))
}
fn new_parsed_source_ref(src: &CxxWString, ast: Pin<&mut Ast>) -> Box<ParsedSourceRefFFI> {
let mut stolen_ast = Ast::default();
std::mem::swap(&mut stolen_ast, ast.get_mut());
Box::new(ParsedSourceRefFFI(Some(Rc::new(ParsedSource::new(
src.from_ffi(),
stolen_ast,
)))))
}
fn parse_source_ffi(
src: &CxxWString,
flags: u8,
errors: *mut ParseErrorList,
) -> Box<ParsedSourceRefFFI> {
let mut out_errors: Option<ParseErrorList> = if errors.is_null() {
None
} else {
Some(unsafe { &*errors }.clone())
};
let ps = parse_source(src.from_ffi(), ParseTreeFlags(flags), &mut out_errors);
if let Some(out_errors) = out_errors {
unsafe { *errors = out_errors };
}
Box::new(ParsedSourceRefFFI(ps))
}
impl ParsedSourceRefFFI {
fn clone(&self) -> Box<ParsedSourceRefFFI> {
Box::new(ParsedSourceRefFFI(self.0.clone()))
}
fn src(&self) -> &CxxWString {
&self.0.as_ref().unwrap().src_ffi
}
fn ast(&self) -> &Ast {
&self.0.as_ref().unwrap().ast
}
}

View file

@ -0,0 +1,48 @@
use crate::ast::{Node, NodeFfi, NodeVisitor};
use crate::ffi::indent_visitor_t;
use std::pin::Pin;
struct IndentVisitor<'a> {
companion: Pin<&'a mut indent_visitor_t>,
}
impl<'a> NodeVisitor<'a> for IndentVisitor<'a> {
// Default implementation is to just visit children.
fn visit(&mut self, node: &'a dyn Node) {
let ffi_node = NodeFfi::new(node);
let dec = self
.companion
.as_mut()
.visit((&ffi_node as *const NodeFfi<'_>).cast());
node.accept(self, false);
self.companion.as_mut().did_visit(dec);
}
}
#[cxx::bridge]
#[allow(clippy::needless_lifetimes)] // false positive
mod parse_util_ffi {
extern "C++" {
include!("ast.h");
include!("parse_util.h");
type indent_visitor_t = crate::ffi::indent_visitor_t;
type Ast = crate::ast::Ast;
type NodeFfi<'a> = crate::ast::NodeFfi<'a>;
}
extern "Rust" {
type IndentVisitor<'a>;
unsafe fn new_indent_visitor(
companion: Pin<&mut indent_visitor_t>,
) -> Box<IndentVisitor<'_>>;
#[cxx_name = "visit"]
unsafe fn visit_ffi<'a>(self: &mut IndentVisitor<'a>, node: &'a NodeFfi<'a>);
}
}
fn new_indent_visitor(companion: Pin<&mut indent_visitor_t>) -> Box<IndentVisitor<'_>> {
Box::new(IndentVisitor { companion })
}
impl<'a> IndentVisitor<'a> {
fn visit_ffi(self: &mut IndentVisitor<'a>, node: &'a NodeFfi<'a>) {
self.visit(node.as_node());
}
}

File diff suppressed because it is too large Load diff

1068
src/ast.h

File diff suppressed because it is too large Load diff

View file

@ -1,60 +0,0 @@
// Define ELEM and optionally ELEMLIST before including this file.
// ELEM is for ordinary nodes.
// ELEMLIST(x, y) marks list nodes and the type they contain.
#ifndef ELEMLIST
#define ELEMLIST(x, y) ELEM(x)
#endif
ELEM(keyword_base)
ELEM(token_base)
ELEM(maybe_newlines)
ELEM(argument)
ELEMLIST(argument_list, argument)
ELEM(redirection)
ELEM(argument_or_redirection)
ELEMLIST(argument_or_redirection_list, argument_or_redirection)
ELEM(variable_assignment)
ELEMLIST(variable_assignment_list, variable_assignment)
ELEM(job_pipeline)
ELEM(job_conjunction)
// For historical reasons, a job list is a list of job *conjunctions*. This should be fixed.
ELEMLIST(job_list, job_conjunction)
ELEM(job_conjunction_continuation)
ELEMLIST(job_conjunction_continuation_list, job_conjunction_continuation)
ELEM(job_continuation)
ELEMLIST(job_continuation_list, job_continuation)
ELEM(andor_job)
ELEMLIST(andor_job_list, andor_job)
ELEM(statement)
ELEM(not_statement)
ELEM(block_statement)
ELEM(for_header)
ELEM(while_header)
ELEM(function_header)
ELEM(begin_header)
ELEM(if_statement)
ELEM(if_clause)
ELEM(elseif_clause)
ELEMLIST(elseif_clause_list, elseif_clause)
ELEM(else_clause)
ELEM(switch_statement)
ELEM(case_item)
ELEMLIST(case_item_list, case_item)
ELEM(decorated_statement)
ELEM(freestanding_argument_list)
#undef ELEM
#undef ELEMLIST

View file

@ -231,7 +231,7 @@ static int validate_function_name(int argc, const wchar_t *const *argv, wcstring
/// function. /// function.
int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_list_t &c_args, int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_list_t &c_args,
const parsed_source_ref_t &source, const ast::block_statement_t &func_node) { const parsed_source_ref_t &source, const ast::block_statement_t &func_node) {
assert(source && "Missing source in builtin_function"); assert(source.has_value() && "Missing source in builtin_function");
// The wgetopt function expects 'function' as the first argument. Make a new wcstring_list with // The wgetopt function expects 'function' as the first argument. Make a new wcstring_list with
// that property. This is needed because this builtin has a different signature than the other // that property. This is needed because this builtin has a different signature than the other
// builtins. // builtins.
@ -280,7 +280,7 @@ int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_lis
auto props = std::make_shared<function_properties_t>(); auto props = std::make_shared<function_properties_t>();
props->shadow_scope = opts.shadow_scope; props->shadow_scope = opts.shadow_scope;
props->named_arguments = std::move(opts.named_arguments); props->named_arguments = std::move(opts.named_arguments);
props->parsed_source = source; props->parsed_source = source.clone();
props->func_node = &func_node; props->func_node = &func_node;
props->description = opts.description; props->description = opts.description;
props->definition_file = parser.libdata().current_filename; props->definition_file = parser.libdata().current_filename;

View file

@ -2,17 +2,13 @@
#ifndef FISH_BUILTIN_FUNCTION_H #ifndef FISH_BUILTIN_FUNCTION_H
#define FISH_BUILTIN_FUNCTION_H #define FISH_BUILTIN_FUNCTION_H
#include "../ast.h"
#include "../common.h" #include "../common.h"
#include "../parse_tree.h" #include "../parse_tree.h"
class parser_t; class parser_t;
struct io_streams_t; struct io_streams_t;
namespace ast { int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_list_t &c_args,
struct block_statement_t; const parsed_source_ref_t &source, const ast::block_statement_t &func_node);
}
int builtin_function(parser_t &parser, io_streams_t &streams,
const wcstring_list_t &c_args, const parsed_source_ref_t &source,
const ast::block_statement_t &func_node);
#endif #endif

View file

@ -634,11 +634,14 @@ static proc_performer_t get_performer_for_process(process_t *p, job_t *job,
job_group_ref_t job_group = job->group; job_group_ref_t job_group = job->group;
if (p->type == process_type_t::block_node) { if (p->type == process_type_t::block_node) {
const parsed_source_ref_t &source = p->block_node_source; const parsed_source_ref_t &source = *p->block_node_source;
const ast::statement_t *node = p->internal_block_node; const ast::statement_t *node = p->internal_block_node;
assert(source && node && "Process is missing node info"); assert(source.has_value() && node && "Process is missing node info");
// The lambda will convert into a std::function which requires copyability. A Box can't
// be copied, so add another indirection.
auto source_box = std::make_shared<rust::Box<ParsedSourceRefFFI>>(source.clone());
return [=](parser_t &parser) { return [=](parser_t &parser) {
return parser.eval_node(source, *node, io_chain, job_group).status; return parser.eval_node(**source_box, *node, io_chain, job_group).status;
}; };
} else { } else {
assert(p->type == process_type_t::function); assert(p->type == process_type_t::function);
@ -650,9 +653,9 @@ static proc_performer_t get_performer_for_process(process_t *p, job_t *job,
const wcstring_list_t &argv = p->argv(); const wcstring_list_t &argv = p->argv();
return [=](parser_t &parser) { return [=](parser_t &parser) {
// Pull out the job list from the function. // Pull out the job list from the function.
const ast::job_list_t &body = props->func_node->jobs; const ast::job_list_t &body = props->func_node->jobs();
const block_t *fb = function_prepare_environment(parser, argv, *props); const block_t *fb = function_prepare_environment(parser, argv, *props);
auto res = parser.eval_node(props->parsed_source, body, io_chain, job_group); auto res = parser.eval_node(*props->parsed_source, body, io_chain, job_group);
function_restore_environment(parser, fb); function_restore_environment(parser, fb);
// If the function did not execute anything, treat it as success. // If the function did not execute anything, treat it as success.

9
src/ffi_baggage.h Normal file
View file

@ -0,0 +1,9 @@
#include "fish_indent_common.h"
// Symbols that get autocxx bindings but are not used in a given binary, will cause "undefined
// reference" when trying to link that binary. Work around this by marking them as used in
// all binaries.
void mark_as_used() {
//
pretty_printer_t({}, {});
}

View file

@ -45,6 +45,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#include "expand.h" #include "expand.h"
#include "fallback.h" // IWYU pragma: keep #include "fallback.h" // IWYU pragma: keep
#include "fds.h" #include "fds.h"
#include "ffi_baggage.h"
#include "ffi_init.rs.h" #include "ffi_init.rs.h"
#include "fish_version.h" #include "fish_version.h"
#include "flog.h" #include "flog.h"
@ -264,17 +265,16 @@ static int run_command_list(parser_t &parser, const std::vector<std::string> &cm
wcstring cmd_wcs = str2wcstring(cmd); wcstring cmd_wcs = str2wcstring(cmd);
// Parse into an ast and detect errors. // Parse into an ast and detect errors.
auto errors = new_parse_error_list(); auto errors = new_parse_error_list();
auto ast = ast::ast_t::parse(cmd_wcs, parse_flag_none, &*errors); auto ast = ast_parse(cmd_wcs, parse_flag_none, &*errors);
bool errored = ast.errored(); bool errored = ast->errored();
if (!errored) { if (!errored) {
errored = parse_util_detect_errors(ast, cmd_wcs, &*errors); errored = parse_util_detect_errors(*ast, cmd_wcs, &*errors);
} }
if (!errored) { if (!errored) {
// Construct a parsed source ref. // Construct a parsed source ref.
// Be careful to transfer ownership, this could be a very large string. // Be careful to transfer ownership, this could be a very large string.
parsed_source_ref_t ps = auto ps = new_parsed_source_ref(cmd_wcs, *ast);
std::make_shared<parsed_source_t>(std::move(cmd_wcs), std::move(ast)); parser.eval(*ps, io);
parser.eval(ps, io);
} else { } else {
wcstring sb; wcstring sb;
parser.get_backtrace(cmd_wcs, *errors, sb); parser.get_backtrace(cmd_wcs, *errors, sb);

View file

@ -36,29 +36,21 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#include <vector> #include <vector>
#include "ast.h" #include "ast.h"
#include "common.h"
#include "env.h" #include "env.h"
#include "expand.h"
#include "fds.h" #include "fds.h"
#include "ffi_baggage.h"
#include "ffi_init.rs.h" #include "ffi_init.rs.h"
#include "fish_indent_common.h"
#include "fish_version.h" #include "fish_version.h"
#include "flog.h" #include "flog.h"
#include "future_feature_flags.h" #include "future_feature_flags.h"
#include "global_safety.h"
#include "highlight.h" #include "highlight.h"
#include "maybe.h"
#include "operation_context.h" #include "operation_context.h"
#include "parse_constants.h"
#include "parse_util.h"
#include "print_help.h" #include "print_help.h"
#include "tokenizer.h" #include "tokenizer.h"
#include "wcstringutil.h" #include "wcstringutil.h"
#include "wutil.h" // IWYU pragma: keep #include "wutil.h" // IWYU pragma: keep
// The number of spaces per indent isn't supposed to be configurable.
// See discussion at https://github.com/fish-shell/fish-shell/pull/6790
#define SPACES_PER_INDENT 4
static bool dump_parse_tree = false; static bool dump_parse_tree = false;
static int ret = 0; static int ret = 0;
@ -89,581 +81,6 @@ static wcstring read_file(FILE *f) {
return result; return result;
} }
namespace {
/// From C++14.
template <bool B, typename T = void>
using enable_if_t = typename std::enable_if<B, T>::type;
/// \return whether a character at a given index is escaped.
/// A character is escaped if it has an odd number of backslashes.
bool char_is_escaped(const wcstring &text, size_t idx) {
return count_preceding_backslashes(text, idx) % 2 == 1;
}
using namespace ast;
struct pretty_printer_t {
// Note: this got somewhat more complicated after introducing the new AST, because that AST no
// longer encodes detailed lexical information (e.g. every newline). This feels more complex
// than necessary and would probably benefit from a more layered approach where we identify
// certain runs, weight line breaks, have a cost model, etc.
pretty_printer_t(const wcstring &src, bool do_indent)
: source(src),
indents(do_indent ? parse_util_compute_indents(source) : std::vector<int>(src.size(), 0)),
ast(ast_t::parse(src, parse_flags())),
do_indent(do_indent),
gaps(compute_gaps()),
preferred_semi_locations(compute_preferred_semi_locations()) {
assert(indents.size() == source.size() && "indents and source should be same length");
}
// Original source.
const wcstring &source;
// The indents of our string.
// This has the same length as 'source' and describes the indentation level.
const std::vector<int> indents;
// The parsed ast.
const ast_t ast;
// The prettifier output.
wcstring output;
// The indent of the source range which we are currently emitting.
int current_indent{0};
// Whether to indent, or just insert spaces.
const bool do_indent;
// Whether the next gap text should hide the first newline.
bool gap_text_mask_newline{false};
// The "gaps": a sorted set of ranges between tokens.
// These contain whitespace, comments, semicolons, and other lexical elements which are not
// present in the ast.
const std::vector<source_range_t> gaps;
// The sorted set of source offsets of nl_semi_t which should be set as semis, not newlines.
// This is computed ahead of time for convenience.
const std::vector<uint32_t> preferred_semi_locations;
// Flags we support.
using gap_flags_t = uint32_t;
enum {
default_flags = 0,
// Whether to allow line splitting via escaped newlines.
// For example, in argument lists:
//
// echo a \
// b
//
// If this is not set, then split-lines will be joined.
allow_escaped_newlines = 1 << 0,
// Whether to require a space before this token.
// This is used when emitting semis:
// echo a; echo b;
// No space required between 'a' and ';', or 'b' and ';'.
skip_space = 1 << 1,
};
// \return gap text flags for the gap text that comes *before* a given node type.
static gap_flags_t gap_text_flags_before_node(const node_t &node) {
gap_flags_t result = default_flags;
switch (node.type) {
// Allow escaped newlines before leaf nodes that can be part of a long command.
case type_t::argument:
case type_t::redirection:
case type_t::variable_assignment:
result |= allow_escaped_newlines;
break;
case type_t::token_base:
// Allow escaped newlines before && and ||, and also pipes.
switch (node.as<token_base_t>()->type) {
case parse_token_type_t::andand:
case parse_token_type_t::oror:
case parse_token_type_t::pipe:
result |= allow_escaped_newlines;
break;
case parse_token_type_t::string: {
// Allow escaped newlines before commands that follow a variable assignment
// since both can be long (#7955).
const node_t *p = node.parent;
if (p->type != type_t::decorated_statement) break;
p = p->parent;
assert(p->type == type_t::statement);
p = p->parent;
if (auto job = p->try_as<job_pipeline_t>()) {
if (!job->variables.empty()) result |= allow_escaped_newlines;
} else if (auto job_cnt = p->try_as<job_continuation_t>()) {
if (!job_cnt->variables.empty()) result |= allow_escaped_newlines;
} else if (auto not_stmt = p->try_as<not_statement_t>()) {
if (!not_stmt->variables.empty()) result |= allow_escaped_newlines;
}
break;
}
default:
break;
}
break;
default:
break;
}
return result;
}
// \return whether we are at the start of a new line.
bool at_line_start() const { return output.empty() || output.back() == L'\n'; }
// \return whether we have a space before the output.
// This ignores escaped spaces and escaped newlines.
bool has_preceding_space() const {
long idx = static_cast<long>(output.size()) - 1;
// Skip escaped newlines.
// This is historical. Example:
//
// cmd1 \
// | cmd2
//
// we want the pipe to "see" the space after cmd1.
// TODO: this is too tricky, we should factor this better.
while (idx >= 0 && output.at(idx) == L'\n') {
size_t backslashes = count_preceding_backslashes(source, idx);
if (backslashes % 2 == 0) {
// Not escaped.
return false;
}
idx -= (1 + backslashes);
}
return idx >= 0 && output.at(idx) == L' ' && !char_is_escaped(output, idx);
}
// Entry point. Prettify our source code and return it.
wcstring prettify() {
output = wcstring{};
node_visitor(*this).accept(ast.top());
// Trailing gap text.
emit_gap_text_before(source_range_t{(uint32_t)source.size(), 0}, default_flags);
// Replace all trailing newlines with just a single one.
while (!output.empty() && at_line_start()) {
output.pop_back();
}
emit_newline();
wcstring result = std::move(output);
return result;
}
// \return a substring of source.
wcstring substr(source_range_t r) const { return source.substr(r.start, r.length); }
// Return the gap ranges from our ast.
std::vector<source_range_t> compute_gaps() const {
auto range_compare = [](source_range_t r1, source_range_t r2) {
if (r1.start != r2.start) return r1.start < r2.start;
return r1.length < r2.length;
};
// Collect the token ranges into a list.
std::vector<source_range_t> tok_ranges;
for (const node_t &node : ast) {
if (node.category == category_t::leaf) {
auto r = node.source_range();
if (r.length > 0) tok_ranges.push_back(r);
}
}
// Place a zero length range at end to aid in our inverting.
tok_ranges.push_back(source_range_t{(uint32_t)source.size(), 0});
// Our tokens should be sorted.
assert(std::is_sorted(tok_ranges.begin(), tok_ranges.end(), range_compare));
// For each range, add a gap range between the previous range and this range.
std::vector<source_range_t> gaps;
uint32_t prev_end = 0;
for (source_range_t tok_range : tok_ranges) {
assert(tok_range.start >= prev_end &&
"Token range should not overlap or be out of order");
if (tok_range.start >= prev_end) {
gaps.push_back(source_range_t{prev_end, tok_range.start - prev_end});
}
prev_end = tok_range.start + tok_range.length;
}
return gaps;
}
// Return sorted list of semi-preferring semi_nl nodes.
std::vector<uint32_t> compute_preferred_semi_locations() const {
std::vector<uint32_t> result;
auto mark_semi_from_input = [&](const optional_t<semi_nl_t> &n) {
if (n && n->has_source() && substr(n->range) == L";") {
result.push_back(n->range.start);
}
};
// andor_job_lists get semis if the input uses semis.
for (const auto &node : ast) {
// See if we have a condition and an andor_job_list.
const optional_t<semi_nl_t> *condition = nullptr;
const andor_job_list_t *andors = nullptr;
if (const auto *ifc = node.try_as<if_clause_t>()) {
condition = &ifc->condition.semi_nl;
andors = &ifc->andor_tail;
} else if (const auto *wc = node.try_as<while_header_t>()) {
condition = &wc->condition.semi_nl;
andors = &wc->andor_tail;
}
// If there is no and-or tail then we always use a newline.
if (andors && andors->count() > 0) {
if (condition) mark_semi_from_input(*condition);
// Mark all but last of the andor list.
for (uint32_t i = 0; i + 1 < andors->count(); i++) {
mark_semi_from_input(andors->at(i)->job.semi_nl);
}
}
}
// `x ; and y` gets semis if it has them already, and they are on the same line.
for (const auto &node : ast) {
if (const auto *job_list = node.try_as<job_list_t>()) {
const semi_nl_t *prev_job_semi_nl = nullptr;
for (const job_conjunction_t &job : *job_list) {
// Set up prev_job_semi_nl for the next iteration to make control flow easier.
const semi_nl_t *prev = prev_job_semi_nl;
prev_job_semi_nl = job.semi_nl.contents.get();
// Is this an 'and' or 'or' job?
if (!job.decorator) continue;
// Now see if we want to mark 'prev' as allowing a semi.
// Did we have a previous semi_nl which was a newline?
if (!prev || substr(prev->range) != L";") continue;
// Is there a newline between them?
assert(prev->range.start <= job.decorator->range.start &&
"Ranges out of order");
auto start = source.begin() + prev->range.start;
auto end = source.begin() + job.decorator->range.end();
if (std::find(start, end, L'\n') == end) {
// We're going to allow the previous semi_nl to be a semi.
result.push_back(prev->range.start);
}
}
}
}
std::sort(result.begin(), result.end());
return result;
}
// Emit a space or indent as necessary, depending on the previous output.
void emit_space_or_indent(gap_flags_t flags = default_flags) {
if (at_line_start()) {
output.append(SPACES_PER_INDENT * current_indent, L' ');
} else if (!(flags & skip_space) && !has_preceding_space()) {
output.append(1, L' ');
}
}
// Emit "gap text:" newlines and comments from the original source.
// Gap text may be a few things:
//
// 1. Just a space is common. We will trim the spaces to be empty.
//
// Here the gap text is the comment, followed by the newline:
//
// echo abc # arg
// echo def
//
// 2. It may also be an escaped newline:
// Here the gap text is a space, backslash, newline, space.
//
// echo \
// hi
//
// 3. Lastly it may be an error, if there was an error token. Here the gap text is the pipe:
//
// begin | stuff
//
// We do not handle errors here - instead our caller does.
bool emit_gap_text(source_range_t range, gap_flags_t flags) {
wcstring gap_text = substr(range);
// Common case: if we are only spaces, do nothing.
if (gap_text.find_first_not_of(L' ') == wcstring::npos) return false;
// Look to see if there is an escaped newline.
// Emit it if either we allow it, or it comes before the first comment.
// Note we do not have to be concerned with escaped backslashes or escaped #s. This is gap
// text - we already know it has no semantic significance.
size_t escaped_nl = gap_text.find(L"\\\n");
if (escaped_nl != wcstring::npos) {
size_t comment_idx = gap_text.find(L'#');
if ((flags & allow_escaped_newlines) ||
(comment_idx != wcstring::npos && escaped_nl < comment_idx)) {
// Emit a space before the escaped newline.
if (!at_line_start() && !has_preceding_space()) {
output.append(L" ");
}
output.append(L"\\\n");
// Indent the continuation line and any leading comments (#7252).
// Use the indentation level of the next newline.
current_indent = indents.at(range.start + escaped_nl + 1);
emit_space_or_indent();
}
}
// It seems somewhat ambiguous whether we always get a newline after a comment. Ensure we
// always emit one.
bool needs_nl = false;
auto tokenizer = new_tokenizer(gap_text.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES);
while (auto tok = tokenizer->next()) {
wcstring tok_text = *tokenizer->text_of(*tok);
if (needs_nl) {
emit_newline();
needs_nl = false;
if (tok_text == L"\n") continue;
} else if (gap_text_mask_newline) {
// We only respect mask_newline the first time through the loop.
gap_text_mask_newline = false;
if (tok_text == L"\n") continue;
}
if (tok->type_ == token_type_t::comment) {
emit_space_or_indent();
output.append(tok_text);
needs_nl = true;
} else if (tok->type_ == token_type_t::end) {
// This may be either a newline or semicolon.
// Semicolons found here are not part of the ast and can simply be removed.
// Newlines are preserved unless mask_newline is set.
if (tok_text == L"\n") {
emit_newline();
}
} else {
fprintf(stderr,
"Gap text should only have comments and newlines - instead found token "
"type %d with text: %ls\n",
(int)tok->type_, tok_text.c_str());
DIE("Gap text should only have comments and newlines");
}
}
if (needs_nl) emit_newline();
return needs_nl;
}
/// \return the gap text ending at a given index into the string, or empty if none.
source_range_t gap_text_to(uint32_t end) const {
auto where = std::lower_bound(
gaps.begin(), gaps.end(), end,
[](source_range_t r, uint32_t end) { return r.start + r.length < end; });
if (where == gaps.end() || where->start + where->length != end) {
// Not found.
return source_range_t{0, 0};
} else {
return *where;
}
}
/// \return whether a range \p r overlaps an error range from our ast.
bool range_contained_error(source_range_t r) const {
const auto &errs = ast.extras().errors;
auto range_is_before = [](source_range_t x, source_range_t y) {
return x.start + x.length <= y.start;
};
assert(std::is_sorted(errs.begin(), errs.end(), range_is_before) &&
"Error ranges should be sorted");
return std::binary_search(errs.begin(), errs.end(), r, range_is_before);
}
// Emit the gap text before a source range.
bool emit_gap_text_before(source_range_t r, gap_flags_t flags) {
assert(r.start <= source.size() && "source out of bounds");
bool added_newline = false;
// Find the gap text which ends at start.
source_range_t range = gap_text_to(r.start);
if (range.length > 0) {
// Set the indent from the beginning of this gap text.
// For example:
// begin
// cmd
// # comment
// end
// Here the comment is the gap text before the end, but we want the indent from the
// command.
if (range.start < indents.size()) current_indent = indents.at(range.start);
// If this range contained an error, append the gap text without modification.
// For example in: echo foo "
// We don't want to mess with the quote.
if (range_contained_error(range)) {
output.append(substr(range));
} else {
added_newline = emit_gap_text(range, flags);
}
}
// Always clear gap_text_mask_newline after emitting even empty gap text.
gap_text_mask_newline = false;
return added_newline;
}
/// Given a string \p input, remove unnecessary quotes, etc.
wcstring clean_text(const wcstring &input) {
// Unescape the string - this leaves special markers around if there are any
// expansions or anything. We specifically tell it to not compute backslash-escapes
// like \U or \x, because we want to leave them intact.
wcstring unescaped = input;
unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES);
// Remove INTERNAL_SEPARATOR because that's a quote.
auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; };
unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote), unescaped.end());
// If no non-"good" char is left, use the unescaped version.
// This can be extended to other characters, but giving the precise list is tough,
// can change over time (see "^", "%" and "?", in some cases "{}") and it just makes
// people feel more at ease.
auto goodchars = [](wchar_t ch) {
return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/';
};
if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) == unescaped.end() &&
!unescaped.empty()) {
return unescaped;
} else {
return input;
}
}
// Emit a range of original text. This indents as needed, and also inserts preceding gap text.
// If \p tolerate_line_splitting is set, then permit escaped newlines; otherwise collapse such
// lines.
void emit_text(source_range_t r, gap_flags_t flags) {
emit_gap_text_before(r, flags);
current_indent = indents.at(r.start);
if (r.length > 0) {
emit_space_or_indent(flags);
output.append(clean_text(substr(r)));
}
}
template <type_t Type>
void emit_node_text(const leaf_t<Type> &node) {
source_range_t range = node.range;
// Weird special-case: a token may end in an escaped newline. Notably, the newline is
// not part of the following gap text, handle indentation here (#8197).
bool ends_with_escaped_nl = node.range.length >= 2 &&
source.at(node.range.end() - 2) == L'\\' &&
source.at(node.range.end() - 1) == L'\n';
if (ends_with_escaped_nl) {
range = {range.start, range.length - 2};
}
emit_text(range, gap_text_flags_before_node(node));
if (ends_with_escaped_nl) {
// By convention, escaped newlines are preceded with a space.
output.append(L" \\\n");
// TODO Maybe check "allow_escaped_newlines" and use the precomputed indents.
// The cases where this matters are probably very rare.
current_indent++;
emit_space_or_indent();
current_indent--;
}
}
// Emit one newline.
void emit_newline() { output.push_back(L'\n'); }
// Emit a semicolon.
void emit_semi() { output.push_back(L';'); }
// For branch and list nodes, default is to visit their children.
template <typename Node>
enable_if_t<Node::Category == category_t::branch> visit(const Node &node) {
node_visitor(*this).accept_children_of(node);
}
template <typename Node>
enable_if_t<Node::Category == ast::category_t::list> visit(const Node &node) {
node_visitor(*this).accept_children_of(node);
}
// Leaf nodes we just visit their text.
void visit(const keyword_base_t &node) { emit_node_text(node); }
void visit(const token_base_t &node) { emit_node_text(node); }
void visit(const argument_t &node) { emit_node_text(node); }
void visit(const variable_assignment_t &node) { emit_node_text(node); }
void visit(const semi_nl_t &node) {
// These are semicolons or newlines which are part of the ast. That means it includes e.g.
// ones terminating a job or 'if' header, but not random semis in job lists. We respect
// preferred_semi_locations to decide whether or not these should stay as newlines or
// become semicolons.
// Check if we should prefer a semicolon.
bool prefer_semi = node.range.length > 0 &&
std::binary_search(preferred_semi_locations.begin(),
preferred_semi_locations.end(), node.range.start);
emit_gap_text_before(node.range, gap_text_flags_before_node(node));
// Don't emit anything if the gap text put us on a newline (because it had a comment).
if (!at_line_start()) {
prefer_semi ? emit_semi() : emit_newline();
// If it was a semi but we emitted a newline, swallow a subsequent newline.
if (!prefer_semi && substr(node.range) == L";") {
gap_text_mask_newline = true;
}
}
}
void visit(const redirection_t &node) {
// No space between a redirection operator and its target (#2899).
emit_text(node.oper.range, default_flags);
emit_text(node.target.range, skip_space);
}
void visit(const maybe_newlines_t &node) {
// Our newlines may have comments embedded in them, example:
// cmd |
// # something
// cmd2
// Treat it as gap text.
if (node.range.length > 0) {
auto flags = gap_text_flags_before_node(node);
current_indent = indents.at(node.range.start);
bool added_newline = emit_gap_text_before(node.range, flags);
source_range_t gap_range = node.range;
if (added_newline && gap_range.length > 0 && source.at(gap_range.start) == L'\n') {
gap_range.start++;
}
emit_gap_text(gap_range, flags);
}
}
void visit(const begin_header_t &node) {
// 'begin' does not require a newline after it, but we insert one.
node_visitor(*this).accept_children_of(node);
if (!at_line_start()) {
emit_newline();
}
}
// The flags we use to parse.
static parse_tree_flags_t parse_flags() {
return parse_flag_continue_after_error | parse_flag_include_comments |
parse_flag_leave_unterminated | parse_flag_show_blank_lines;
}
};
} // namespace
static const char *highlight_role_to_string(highlight_role_t role) { static const char *highlight_role_to_string(highlight_role_t role) {
#define TEST_ROLE(x) \ #define TEST_ROLE(x) \
case highlight_role_t::x: \ case highlight_role_t::x: \
@ -750,10 +167,9 @@ static std::string make_pygments_csv(const wcstring &src) {
// Entry point for prettification. // Entry point for prettification.
static wcstring prettify(const wcstring &src, bool do_indent) { static wcstring prettify(const wcstring &src, bool do_indent) {
if (dump_parse_tree) { if (dump_parse_tree) {
auto ast = auto ast = ast_parse(src, parse_flag_leave_unterminated | parse_flag_include_comments |
ast::ast_t::parse(src, parse_flag_leave_unterminated | parse_flag_include_comments |
parse_flag_show_extra_semis); parse_flag_show_extra_semis);
wcstring ast_dump = ast.dump(src); wcstring ast_dump = *ast->dump(src);
std::fwprintf(stderr, L"%ls\n", ast_dump.c_str()); std::fwprintf(stderr, L"%ls\n", ast_dump.c_str());
} }

475
src/fish_indent_common.cpp Normal file
View file

@ -0,0 +1,475 @@
#include "fish_indent_common.h"
#include "ast.h"
#include "common.h"
#include "env.h"
#include "expand.h"
#include "flog.h"
#include "global_safety.h"
#include "maybe.h"
#include "operation_context.h"
#include "parse_constants.h"
#include "parse_util.h"
#include "tokenizer.h"
#include "wcstringutil.h"
#if INCLUDE_RUST_HEADERS
#include "fish_indent.rs.h"
#endif
using namespace ast;
// The number of spaces per indent isn't supposed to be configurable.
// See discussion at https://github.com/fish-shell/fish-shell/pull/6790
#define SPACES_PER_INDENT 4
/// \return whether a character at a given index is escaped.
/// A character is escaped if it has an odd number of backslashes.
static bool char_is_escaped(const wcstring &text, size_t idx) {
return count_preceding_backslashes(text, idx) % 2 == 1;
}
pretty_printer_t::pretty_printer_t(const wcstring &src, bool do_indent)
: source(src),
indents(do_indent ? parse_util_compute_indents(source) : std::vector<int>(src.size(), 0)),
ast(ast_parse(src, parse_flags())),
visitor(new_pretty_printer(*this)),
do_indent(do_indent),
gaps(compute_gaps()),
preferred_semi_locations(compute_preferred_semi_locations()) {
assert(indents.size() == source.size() && "indents and source should be same length");
}
pretty_printer_t::gap_flags_t pretty_printer_t::gap_text_flags_before_node(const node_t &node) {
gap_flags_t result = default_flags;
switch (node.typ()) {
// Allow escaped newlines before leaf nodes that can be part of a long command.
case type_t::argument:
case type_t::redirection:
case type_t::variable_assignment:
result |= allow_escaped_newlines;
break;
case type_t::token_base:
// Allow escaped newlines before && and ||, and also pipes.
switch (node.token_type()) {
case parse_token_type_t::andand:
case parse_token_type_t::oror:
case parse_token_type_t::pipe:
result |= allow_escaped_newlines;
break;
case parse_token_type_t::string: {
// Allow escaped newlines before commands that follow a variable assignment
// since both can be long (#7955).
auto p = node.parent();
if (p->typ() != type_t::decorated_statement) break;
p = p->parent();
assert(p->typ() == type_t::statement);
p = p->parent();
if (auto *job = p->try_as_job_pipeline()) {
if (!job->variables().empty()) result |= allow_escaped_newlines;
} else if (auto *job_cnt = p->try_as_job_continuation()) {
if (!job_cnt->variables().empty()) result |= allow_escaped_newlines;
} else if (auto *not_stmt = p->try_as_not_statement()) {
if (!not_stmt->variables().empty()) result |= allow_escaped_newlines;
}
break;
}
default:
break;
}
break;
default:
break;
}
return result;
}
bool pretty_printer_t::has_preceding_space() const {
long idx = static_cast<long>(output.size()) - 1;
// Skip escaped newlines.
// This is historical. Example:
//
// cmd1 \
// | cmd2
//
// we want the pipe to "see" the space after cmd1.
// TODO: this is too tricky, we should factor this better.
while (idx >= 0 && output.at(idx) == L'\n') {
size_t backslashes = count_preceding_backslashes(source, idx);
if (backslashes % 2 == 0) {
// Not escaped.
return false;
}
idx -= (1 + backslashes);
}
return idx >= 0 && output.at(idx) == L' ' && !char_is_escaped(output, idx);
}
wcstring pretty_printer_t::prettify() {
output = wcstring{};
visitor->visit(*ast->top());
// Trailing gap text.
emit_gap_text_before(source_range_t{(uint32_t)source.size(), 0}, default_flags);
// Replace all trailing newlines with just a single one.
while (!output.empty() && at_line_start()) {
output.pop_back();
}
emit_newline();
wcstring result = std::move(output);
return result;
}
std::vector<source_range_t> pretty_printer_t::compute_gaps() const {
auto range_compare = [](source_range_t r1, source_range_t r2) {
if (r1.start != r2.start) return r1.start < r2.start;
return r1.length < r2.length;
};
// Collect the token ranges into a list.
std::vector<source_range_t> tok_ranges;
for (auto ast_traversal = new_ast_traversal(*ast->top());;) {
auto node = ast_traversal->next();
if (!node->has_value()) break;
if (node->category() == category_t::leaf) {
auto r = node->source_range();
if (r.length > 0) tok_ranges.push_back(r);
}
}
// Place a zero length range at end to aid in our inverting.
tok_ranges.push_back(source_range_t{(uint32_t)source.size(), 0});
// Our tokens should be sorted.
assert(std::is_sorted(tok_ranges.begin(), tok_ranges.end(), range_compare));
// For each range, add a gap range between the previous range and this range.
std::vector<source_range_t> gaps;
uint32_t prev_end = 0;
for (source_range_t tok_range : tok_ranges) {
assert(tok_range.start >= prev_end && "Token range should not overlap or be out of order");
if (tok_range.start >= prev_end) {
gaps.push_back(source_range_t{prev_end, tok_range.start - prev_end});
}
prev_end = tok_range.start + tok_range.length;
}
return gaps;
}
void pretty_printer_t::visit_begin_header() {
if (!at_line_start()) {
emit_newline();
}
}
void pretty_printer_t::visit_maybe_newlines(const void *node_) {
const auto &node = *static_cast<const maybe_newlines_t *>(node_);
// Our newlines may have comments embedded in them, example:
// cmd |
// # something
// cmd2
// Treat it as gap text.
if (node.range().length > 0) {
auto flags = gap_text_flags_before_node(*node.ptr());
current_indent = indents.at(node.range().start);
bool added_newline = emit_gap_text_before(node.range(), flags);
source_range_t gap_range = node.range();
if (added_newline && gap_range.length > 0 && source.at(gap_range.start) == L'\n') {
gap_range.start++;
}
emit_gap_text(gap_range, flags);
}
}
void pretty_printer_t::visit_redirection(const void *node_) {
const auto &node = *static_cast<const redirection_t *>(node_);
// No space between a redirection operator and its target (#2899).
emit_text(node.oper().range(), default_flags);
emit_text(node.target().range(), skip_space);
}
void pretty_printer_t::visit_semi_nl(const void *node_) {
// These are semicolons or newlines which are part of the ast. That means it includes e.g.
// ones terminating a job or 'if' header, but not random semis in job lists. We respect
// preferred_semi_locations to decide whether or not these should stay as newlines or
// become semicolons.
const auto &node = *static_cast<const node_t *>(node_);
auto range = node.source_range();
// Check if we should prefer a semicolon.
bool prefer_semi =
range.length > 0 && std::binary_search(preferred_semi_locations.begin(),
preferred_semi_locations.end(), range.start);
emit_gap_text_before(range, gap_text_flags_before_node(*node.ptr()));
// Don't emit anything if the gap text put us on a newline (because it had a comment).
if (!at_line_start()) {
prefer_semi ? emit_semi() : emit_newline();
// If it was a semi but we emitted a newline, swallow a subsequent newline.
if (!prefer_semi && substr(range) == L";") {
gap_text_mask_newline = true;
}
}
}
void pretty_printer_t::emit_node_text(const void *node_) {
const auto &node = *static_cast<const node_t *>(node_);
source_range_t range = node.source_range();
// Weird special-case: a token may end in an escaped newline. Notably, the newline is
// not part of the following gap text, handle indentation here (#8197).
bool ends_with_escaped_nl = range.length >= 2 && source.at(range.end() - 2) == L'\\' &&
source.at(range.end() - 1) == L'\n';
if (ends_with_escaped_nl) {
range = {range.start, range.length - 2};
}
emit_text(range, gap_text_flags_before_node(node));
if (ends_with_escaped_nl) {
// By convention, escaped newlines are preceded with a space.
output.append(L" \\\n");
// TODO Maybe check "allow_escaped_newlines" and use the precomputed indents.
// The cases where this matters are probably very rare.
current_indent++;
emit_space_or_indent();
current_indent--;
}
}
void pretty_printer_t::emit_text(source_range_t r, gap_flags_t flags) {
emit_gap_text_before(r, flags);
current_indent = indents.at(r.start);
if (r.length > 0) {
emit_space_or_indent(flags);
output.append(clean_text(substr(r)));
}
}
wcstring pretty_printer_t::clean_text(const wcstring &input) {
// Unescape the string - this leaves special markers around if there are any
// expansions or anything. We specifically tell it to not compute backslash-escapes
// like \U or \x, because we want to leave them intact.
wcstring unescaped = input;
unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES);
// Remove INTERNAL_SEPARATOR because that's a quote.
auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; };
unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote), unescaped.end());
// If no non-"good" char is left, use the unescaped version.
// This can be extended to other characters, but giving the precise list is tough,
// can change over time (see "^", "%" and "?", in some cases "{}") and it just makes
// people feel more at ease.
auto goodchars = [](wchar_t ch) {
return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/';
};
if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) == unescaped.end() &&
!unescaped.empty()) {
return unescaped;
} else {
return input;
}
}
bool pretty_printer_t::emit_gap_text_before(source_range_t r, gap_flags_t flags) {
assert(r.start <= source.size() && "source out of bounds");
bool added_newline = false;
// Find the gap text which ends at start.
source_range_t range = gap_text_to(r.start);
if (range.length > 0) {
// Set the indent from the beginning of this gap text.
// For example:
// begin
// cmd
// # comment
// end
// Here the comment is the gap text before the end, but we want the indent from the
// command.
if (range.start < indents.size()) current_indent = indents.at(range.start);
// If this range contained an error, append the gap text without modification.
// For example in: echo foo "
// We don't want to mess with the quote.
if (range_contained_error(range)) {
output.append(substr(range));
} else {
added_newline = emit_gap_text(range, flags);
}
}
// Always clear gap_text_mask_newline after emitting even empty gap text.
gap_text_mask_newline = false;
return added_newline;
}
bool pretty_printer_t::range_contained_error(source_range_t r) const {
const auto &errs = ast->extras()->errors();
auto range_is_before = [](source_range_t x, source_range_t y) {
return x.start + x.length <= y.start;
};
assert(std::is_sorted(errs.begin(), errs.end(), range_is_before) &&
"Error ranges should be sorted");
return std::binary_search(errs.begin(), errs.end(), r, range_is_before);
}
source_range_t pretty_printer_t::gap_text_to(uint32_t end) const {
auto where =
std::lower_bound(gaps.begin(), gaps.end(), end,
[](source_range_t r, uint32_t end) { return r.start + r.length < end; });
if (where == gaps.end() || where->start + where->length != end) {
// Not found.
return source_range_t{0, 0};
} else {
return *where;
}
}
bool pretty_printer_t::emit_gap_text(source_range_t range, gap_flags_t flags) {
wcstring gap_text = substr(range);
// Common case: if we are only spaces, do nothing.
if (gap_text.find_first_not_of(L' ') == wcstring::npos) return false;
// Look to see if there is an escaped newline.
// Emit it if either we allow it, or it comes before the first comment.
// Note we do not have to be concerned with escaped backslashes or escaped #s. This is gap
// text - we already know it has no semantic significance.
size_t escaped_nl = gap_text.find(L"\\\n");
if (escaped_nl != wcstring::npos) {
size_t comment_idx = gap_text.find(L'#');
if ((flags & allow_escaped_newlines) ||
(comment_idx != wcstring::npos && escaped_nl < comment_idx)) {
// Emit a space before the escaped newline.
if (!at_line_start() && !has_preceding_space()) {
output.append(L" ");
}
output.append(L"\\\n");
// Indent the continuation line and any leading comments (#7252).
// Use the indentation level of the next newline.
current_indent = indents.at(range.start + escaped_nl + 1);
emit_space_or_indent();
}
}
// It seems somewhat ambiguous whether we always get a newline after a comment. Ensure we
// always emit one.
bool needs_nl = false;
auto tokenizer = new_tokenizer(gap_text.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES);
while (auto tok = tokenizer->next()) {
wcstring tok_text = *tokenizer->text_of(*tok);
if (needs_nl) {
emit_newline();
needs_nl = false;
if (tok_text == L"\n") continue;
} else if (gap_text_mask_newline) {
// We only respect mask_newline the first time through the loop.
gap_text_mask_newline = false;
if (tok_text == L"\n") continue;
}
if (tok->type_ == token_type_t::comment) {
emit_space_or_indent();
output.append(tok_text);
needs_nl = true;
} else if (tok->type_ == token_type_t::end) {
// This may be either a newline or semicolon.
// Semicolons found here are not part of the ast and can simply be removed.
// Newlines are preserved unless mask_newline is set.
if (tok_text == L"\n") {
emit_newline();
}
} else {
fprintf(stderr,
"Gap text should only have comments and newlines - instead found token "
"type %d with text: %ls\n",
(int)tok->type_, tok_text.c_str());
DIE("Gap text should only have comments and newlines");
}
}
if (needs_nl) emit_newline();
return needs_nl;
}
void pretty_printer_t::emit_space_or_indent(gap_flags_t flags) {
if (at_line_start()) {
output.append(SPACES_PER_INDENT * current_indent, L' ');
} else if (!(flags & skip_space) && !has_preceding_space()) {
output.append(1, L' ');
}
}
std::vector<uint32_t> pretty_printer_t::compute_preferred_semi_locations() const {
std::vector<uint32_t> result;
auto mark_semi_from_input = [&](const semi_nl_t &n) {
if (n.ptr()->has_source() && substr(n.range()) == L";") {
result.push_back(n.range().start);
}
};
// andor_job_lists get semis if the input uses semis.
for (auto ast_traversal = new_ast_traversal(*ast->top());;) {
auto node = ast_traversal->next();
if (!node->has_value()) break;
// See if we have a condition and an andor_job_list.
const semi_nl_t *condition = nullptr;
const andor_job_list_t *andors = nullptr;
if (const auto *ifc = node->try_as_if_clause()) {
if (ifc->condition().has_semi_nl()) {
condition = &ifc->condition().semi_nl();
}
andors = &ifc->andor_tail();
} else if (const auto *wc = node->try_as_while_header()) {
if (wc->condition().has_semi_nl()) {
condition = &wc->condition().semi_nl();
}
andors = &wc->andor_tail();
}
// If there is no and-or tail then we always use a newline.
if (andors && andors->count() > 0) {
if (condition) mark_semi_from_input(*condition);
// Mark all but last of the andor list.
for (uint32_t i = 0; i + 1 < andors->count(); i++) {
mark_semi_from_input(andors->at(i)->job().semi_nl());
}
}
}
// `x ; and y` gets semis if it has them already, and they are on the same line.
for (auto ast_traversal = new_ast_traversal(*ast->top());;) {
auto node = ast_traversal->next();
if (!node->has_value()) break;
if (const auto *job_list = node->try_as_job_list()) {
const semi_nl_t *prev_job_semi_nl = nullptr;
for (size_t i = 0; i < job_list->count(); i++) {
const job_conjunction_t &job = *job_list->at(i);
// Set up prev_job_semi_nl for the next iteration to make control flow easier.
const semi_nl_t *prev = prev_job_semi_nl;
prev_job_semi_nl = job.has_semi_nl() ? &job.semi_nl() : nullptr;
// Is this an 'and' or 'or' job?
if (!job.has_decorator()) continue;
// Now see if we want to mark 'prev' as allowing a semi.
// Did we have a previous semi_nl which was a newline?
if (!prev || substr(prev->range()) != L";") continue;
// Is there a newline between them?
assert(prev->range().start <= job.decorator().range().start &&
"Ranges out of order");
auto start = source.begin() + prev->range().start;
auto end = source.begin() + job.decorator().range().end();
if (std::find(start, end, L'\n') == end) {
// We're going to allow the previous semi_nl to be a semi.
result.push_back(prev->range().start);
}
}
}
}
std::sort(result.begin(), result.end());
return result;
}

160
src/fish_indent_common.h Normal file
View file

@ -0,0 +1,160 @@
#ifndef FISH_INDENT_STAGING_H
#define FISH_INDENT_STAGING_H
#include "ast.h"
#include "common.h"
#include "cxx.h"
struct PrettyPrinter;
struct pretty_printer_t {
// Note: this got somewhat more complicated after introducing the new AST, because that AST no
// longer encodes detailed lexical information (e.g. every newline). This feels more complex
// than necessary and would probably benefit from a more layered approach where we identify
// certain runs, weight line breaks, have a cost model, etc.
pretty_printer_t(const wcstring &src, bool do_indent);
// Original source.
const wcstring &source;
// The indents of our string.
// This has the same length as 'source' and describes the indentation level.
const std::vector<int> indents;
// The parsed ast.
rust::Box<Ast> ast;
rust::Box<PrettyPrinter> visitor;
// The prettifier output.
wcstring output;
// The indent of the source range which we are currently emitting.
int current_indent{0};
// Whether to indent, or just insert spaces.
const bool do_indent;
// Whether the next gap text should hide the first newline.
bool gap_text_mask_newline{false};
// The "gaps": a sorted set of ranges between tokens.
// These contain whitespace, comments, semicolons, and other lexical elements which are not
// present in the ast.
const std::vector<source_range_t> gaps;
// The sorted set of source offsets of nl_semi_t which should be set as semis, not newlines.
// This is computed ahead of time for convenience.
const std::vector<uint32_t> preferred_semi_locations;
// Flags we support.
using gap_flags_t = uint32_t;
enum {
default_flags = 0,
// Whether to allow line splitting via escaped newlines.
// For example, in argument lists:
//
// echo a \
// b
//
// If this is not set, then split-lines will be joined.
allow_escaped_newlines = 1 << 0,
// Whether to require a space before this token.
// This is used when emitting semis:
// echo a; echo b;
// No space required between 'a' and ';', or 'b' and ';'.
skip_space = 1 << 1,
};
#if INCLUDE_RUST_HEADERS
// \return gap text flags for the gap text that comes *before* a given node type.
static gap_flags_t gap_text_flags_before_node(const ast::node_t &node);
#endif
// \return whether we are at the start of a new line.
bool at_line_start() const { return output.empty() || output.back() == L'\n'; }
// \return whether we have a space before the output.
// This ignores escaped spaces and escaped newlines.
bool has_preceding_space() const;
// Entry point. Prettify our source code and return it.
wcstring prettify();
// \return a substring of source.
wcstring substr(source_range_t r) const { return source.substr(r.start, r.length); }
// Return the gap ranges from our ast.
std::vector<source_range_t> compute_gaps() const;
// Return sorted list of semi-preferring semi_nl nodes.
std::vector<uint32_t> compute_preferred_semi_locations() const;
// Emit a space or indent as necessary, depending on the previous output.
void emit_space_or_indent(gap_flags_t flags = default_flags);
// Emit "gap text:" newlines and comments from the original source.
// Gap text may be a few things:
//
// 1. Just a space is common. We will trim the spaces to be empty.
//
// Here the gap text is the comment, followed by the newline:
//
// echo abc # arg
// echo def
//
// 2. It may also be an escaped newline:
// Here the gap text is a space, backslash, newline, space.
//
// echo \
// hi
//
// 3. Lastly it may be an error, if there was an error token. Here the gap text is the pipe:
//
// begin | stuff
//
// We do not handle errors here - instead our caller does.
bool emit_gap_text(source_range_t range, gap_flags_t flags);
/// \return the gap text ending at a given index into the string, or empty if none.
source_range_t gap_text_to(uint32_t end) const;
/// \return whether a range \p r overlaps an error range from our ast.
bool range_contained_error(source_range_t r) const;
// Emit the gap text before a source range.
bool emit_gap_text_before(source_range_t r, gap_flags_t flags);
/// Given a string \p input, remove unnecessary quotes, etc.
wcstring clean_text(const wcstring &input);
// Emit a range of original text. This indents as needed, and also inserts preceding gap text.
// If \p tolerate_line_splitting is set, then permit escaped newlines; otherwise collapse such
// lines.
void emit_text(source_range_t r, gap_flags_t flags);
void emit_node_text(const void *node);
// Emit one newline.
void emit_newline() { output.push_back(L'\n'); }
// Emit a semicolon.
void emit_semi() { output.push_back(L';'); }
void visit_semi_nl(const void *node_);
void visit_redirection(const void *node_);
void visit_maybe_newlines(const void *node_);
void visit_begin_header();
// The flags we use to parse.
static parse_tree_flags_t parse_flags() {
return parse_flag_continue_after_error | parse_flag_include_comments |
parse_flag_leave_unterminated | parse_flag_show_blank_lines;
}
};
#endif // FISH_INDENT_STAGING_H

View file

@ -23,6 +23,7 @@
#include "cxxgen.h" #include "cxxgen.h"
#include "env.h" #include "env.h"
#include "fallback.h" // IWYU pragma: keep #include "fallback.h" // IWYU pragma: keep
#include "ffi_baggage.h"
#include "ffi_init.rs.h" #include "ffi_init.rs.h"
#include "fish_version.h" #include "fish_version.h"
#include "input.h" #include "input.h"

View file

@ -65,6 +65,7 @@
#include "fd_monitor.rs.h" #include "fd_monitor.rs.h"
#include "fd_readable_set.rs.h" #include "fd_readable_set.rs.h"
#include "fds.h" #include "fds.h"
#include "ffi_baggage.h"
#include "ffi_init.rs.h" #include "ffi_init.rs.h"
#include "ffi_tests.rs.h" #include "ffi_tests.rs.h"
#include "function.h" #include "function.h"
@ -928,17 +929,17 @@ static void test_debounce_timeout() {
static parser_test_error_bits_t detect_argument_errors(const wcstring &src) { static parser_test_error_bits_t detect_argument_errors(const wcstring &src) {
using namespace ast; using namespace ast;
auto ast = ast_t::parse_argument_list(src, parse_flag_none); auto ast = ast_parse_argument_list(src, parse_flag_none);
if (ast.errored()) { if (ast->errored()) {
return PARSER_TEST_ERROR; return PARSER_TEST_ERROR;
} }
const ast::argument_t *first_arg = const ast::argument_t *first_arg =
ast.top()->as<freestanding_argument_list_t>()->arguments.at(0); ast->top()->as_freestanding_argument_list().arguments().at(0);
if (!first_arg) { if (!first_arg) {
err(L"Failed to parse an argument"); err(L"Failed to parse an argument");
return 0; return 0;
} }
return parse_util_detect_errors_in_argument(*first_arg, first_arg->source(src)); return parse_util_detect_errors_in_argument(*first_arg, *first_arg->source(src));
} }
/// Test the parser. /// Test the parser.
@ -3066,9 +3067,11 @@ static void test_autoload() {
static std::shared_ptr<function_properties_t> make_test_func_props() { static std::shared_ptr<function_properties_t> make_test_func_props() {
auto ret = std::make_shared<function_properties_t>(); auto ret = std::make_shared<function_properties_t>();
ret->parsed_source = parse_source(L"function stuff; end", parse_flag_none, nullptr); ret->parsed_source = parse_source(L"function stuff; end", parse_flag_none, nullptr);
assert(ret->parsed_source && "Failed to parse"); assert(ret->parsed_source->has_value() && "Failed to parse");
for (const auto &node : ret->parsed_source->ast) { for (auto ast_traversal = new_ast_traversal(*ret->parsed_source->ast().top());;) {
if (const auto *s = node.try_as<ast::block_statement_t>()) { auto node = ast_traversal->next();
if (!node->has_value()) break;
if (const auto *s = node->try_as_block_statement()) {
ret->func_node = s; ret->func_node = s;
break; break;
} }
@ -4757,8 +4760,8 @@ static void test_new_parser_correctness() {
}; };
for (const auto &test : parser_tests) { for (const auto &test : parser_tests) {
auto ast = ast::ast_t::parse(test.src); auto ast = ast_parse(test.src);
bool success = !ast.errored(); bool success = !ast->errored();
if (success && !test.ok) { if (success && !test.ok) {
err(L"\"%ls\" should NOT have parsed, but did", test.src); err(L"\"%ls\" should NOT have parsed, but did", test.src);
} else if (!success && test.ok) { } else if (!success && test.ok) {
@ -4811,7 +4814,7 @@ static void test_new_parser_fuzzing() {
unsigned long permutation = 0; unsigned long permutation = 0;
while (string_for_permutation(fuzzes, sizeof fuzzes / sizeof *fuzzes, len, permutation++, while (string_for_permutation(fuzzes, sizeof fuzzes / sizeof *fuzzes, len, permutation++,
&src)) { &src)) {
ast::ast_t::parse(src); ast_parse(src);
} }
if (log_it) std::fwprintf(stderr, L"done (%lu)\n", permutation); if (log_it) std::fwprintf(stderr, L"done (%lu)\n", permutation);
} }
@ -4828,13 +4831,15 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o
out_joined_args->clear(); out_joined_args->clear();
*out_deco = statement_decoration_t::none; *out_deco = statement_decoration_t::none;
auto ast = ast_t::parse(src); auto ast = ast_parse(src);
if (ast.errored()) return false; if (ast->errored()) return false;
// Get the statement. Should only have one. // Get the statement. Should only have one.
const decorated_statement_t *statement = nullptr; const decorated_statement_t *statement = nullptr;
for (const auto &n : ast) { for (auto ast_traversal = new_ast_traversal(*ast->top());;) {
if (const auto *tmp = n.try_as<decorated_statement_t>()) { auto n = ast_traversal->next();
if (!n->has_value()) break;
if (const auto *tmp = n->try_as_decorated_statement()) {
if (statement) { if (statement) {
say(L"More than one decorated statement found in '%ls'", src.c_str()); say(L"More than one decorated statement found in '%ls'", src.c_str());
return false; return false;
@ -4849,14 +4854,15 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o
// Return its decoration and command. // Return its decoration and command.
*out_deco = statement->decoration(); *out_deco = statement->decoration();
*out_cmd = statement->command.source(src); *out_cmd = *statement->command().source(src);
// Return arguments separated by spaces. // Return arguments separated by spaces.
bool first = true; bool first = true;
for (const ast::argument_or_redirection_t &arg : statement->args_or_redirs) { for (size_t i = 0; i < statement->args_or_redirs().count(); i++) {
const ast::argument_or_redirection_t &arg = *statement->args_or_redirs().at(i);
if (!arg.is_argument()) continue; if (!arg.is_argument()) continue;
if (!first) out_joined_args->push_back(L' '); if (!first) out_joined_args->push_back(L' ');
out_joined_args->append(arg.source(src)); out_joined_args->append(*arg.ptr()->source(src));
first = false; first = false;
} }
@ -4868,14 +4874,16 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o
template <ast::type_t Type> template <ast::type_t Type>
static void check_function_help(const wchar_t *src) { static void check_function_help(const wchar_t *src) {
using namespace ast; using namespace ast;
auto ast = ast_t::parse(src); auto ast = ast_parse(src);
if (ast.errored()) { if (ast->errored()) {
err(L"Failed to parse '%ls'", src); err(L"Failed to parse '%ls'", src);
} }
int count = 0; int count = 0;
for (const node_t &node : ast) { for (auto ast_traversal = new_ast_traversal(*ast->top());;) {
count += (node.type == Type); auto node = ast_traversal->next();
if (!node->has_value()) break;
count += (node->typ() == Type);
} }
if (count == 0) { if (count == 0) {
err(L"Failed to find node of type '%ls'", ast_type_to_string(Type)); err(L"Failed to find node of type '%ls'", ast_type_to_string(Type));
@ -4939,16 +4947,18 @@ static void test_new_parser_ad_hoc() {
// Ensure that 'case' terminates a job list. // Ensure that 'case' terminates a job list.
const wcstring src = L"switch foo ; case bar; case baz; end"; const wcstring src = L"switch foo ; case bar; case baz; end";
auto ast = ast_t::parse(src); auto ast = ast_parse(src);
if (ast.errored()) { if (ast->errored()) {
err(L"Parsing failed"); err(L"Parsing failed");
} }
// Expect two case_item_lists. The bug was that we'd // Expect two case_item_lists. The bug was that we'd
// try to run a command 'case'. // try to run a command 'case'.
int count = 0; int count = 0;
for (const auto &n : ast) { for (auto ast_traversal = new_ast_traversal(*ast->top());;) {
count += (n.type == type_t::case_item); auto n = ast_traversal->next();
if (!n->has_value()) break;
count += (n->typ() == type_t::case_item);
} }
if (count != 2) { if (count != 2) {
err(L"Expected 2 case item nodes, found %d", count); err(L"Expected 2 case item nodes, found %d", count);
@ -4959,27 +4969,27 @@ static void test_new_parser_ad_hoc() {
// leading to an infinite loop. // leading to an infinite loop.
// By itself it should produce an error. // By itself it should produce an error.
ast = ast_t::parse(L"a="); ast = ast_parse(L"a=");
do_test(ast.errored()); do_test(ast->errored());
// If we are leaving things unterminated, this should not produce an error. // If we are leaving things unterminated, this should not produce an error.
// i.e. when typing "a=" at the command line, it should be treated as valid // i.e. when typing "a=" at the command line, it should be treated as valid
// because we don't want to color it as an error. // because we don't want to color it as an error.
ast = ast_t::parse(L"a=", parse_flag_leave_unterminated); ast = ast_parse(L"a=", parse_flag_leave_unterminated);
do_test(!ast.errored()); do_test(!ast->errored());
auto errors = new_parse_error_list(); auto errors = new_parse_error_list();
ast = ast_t::parse(L"begin; echo (", parse_flag_leave_unterminated, &*errors); ast = ast_parse(L"begin; echo (", parse_flag_leave_unterminated, &*errors);
do_test(errors->size() == 1 && do_test(errors->size() == 1 &&
errors->at(0)->code() == parse_error_code_t::tokenizer_unterminated_subshell); errors->at(0)->code() == parse_error_code_t::tokenizer_unterminated_subshell);
errors->clear(); errors->clear();
ast = ast_t::parse(L"for x in (", parse_flag_leave_unterminated, &*errors); ast = ast_parse(L"for x in (", parse_flag_leave_unterminated, &*errors);
do_test(errors->size() == 1 && do_test(errors->size() == 1 &&
errors->at(0)->code() == parse_error_code_t::tokenizer_unterminated_subshell); errors->at(0)->code() == parse_error_code_t::tokenizer_unterminated_subshell);
errors->clear(); errors->clear();
ast = ast_t::parse(L"begin; echo '", parse_flag_leave_unterminated, &*errors); ast = ast_parse(L"begin; echo '", parse_flag_leave_unterminated, &*errors);
do_test(errors->size() == 1 && do_test(errors->size() == 1 &&
errors->at(0)->code() == parse_error_code_t::tokenizer_unterminated_quote); errors->at(0)->code() == parse_error_code_t::tokenizer_unterminated_quote);
} }
@ -5013,8 +5023,8 @@ static void test_new_parser_errors() {
parse_error_code_t expected_code = test.code; parse_error_code_t expected_code = test.code;
auto errors = new_parse_error_list(); auto errors = new_parse_error_list();
auto ast = ast::ast_t::parse(src, parse_flag_none, &*errors); auto ast = ast_parse(src, parse_flag_none, &*errors);
if (!ast.errored()) { if (!ast->errored()) {
err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str()); err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str());
} }

View file

@ -227,13 +227,14 @@ void function_remove(const wcstring &name) {
static wcstring get_function_body_source(const function_properties_t &props) { static wcstring get_function_body_source(const function_properties_t &props) {
// We want to preserve comments that the AST attaches to the header (#5285). // We want to preserve comments that the AST attaches to the header (#5285).
// Take everything from the end of the header to the 'end' keyword. // Take everything from the end of the header to the 'end' keyword.
auto header_src = props.func_node->header->try_source_range(); if (props.func_node->header().ptr()->try_source_range() &&
auto end_kw_src = props.func_node->end.try_source_range(); props.func_node->end().try_source_range()) {
if (header_src && end_kw_src) { auto header_src = props.func_node->header().ptr()->source_range();
uint32_t body_start = header_src->start + header_src->length; auto end_kw_src = props.func_node->end().range();
uint32_t body_end = end_kw_src->start; uint32_t body_start = header_src.start + header_src.length;
uint32_t body_end = end_kw_src.start;
assert(body_start <= body_end && "end keyword should come after header"); assert(body_start <= body_end && "end keyword should come after header");
return wcstring(props.parsed_source->src, body_start, body_end - body_start); return wcstring(props.parsed_source->src(), body_start, body_end - body_start);
} }
return wcstring{}; return wcstring{};
} }
@ -308,6 +309,25 @@ void function_invalidate_path() {
funcset->autoloader.clear(); funcset->autoloader.clear();
} }
function_properties_t::function_properties_t() : parsed_source(empty_parsed_source_ref()) {}
function_properties_t::function_properties_t(const function_properties_t &other)
: parsed_source(empty_parsed_source_ref()) {
*this = other;
}
function_properties_t &function_properties_t::operator=(const function_properties_t &other) {
parsed_source = other.parsed_source->clone();
func_node = other.func_node;
named_arguments = other.named_arguments;
description = other.description;
inherit_vars = other.inherit_vars;
shadow_scope = other.shadow_scope;
is_autoload = other.is_autoload;
definition_file = other.definition_file;
return *this;
}
wcstring function_properties_t::annotated_definition(const wcstring &name) const { wcstring function_properties_t::annotated_definition(const wcstring &name) const {
wcstring out; wcstring out;
wcstring desc = this->localized_description(); wcstring desc = this->localized_description();
@ -415,10 +435,10 @@ int function_properties_t::definition_lineno() const {
// return one plus the number of newlines at offsets less than the start of our function's // return one plus the number of newlines at offsets less than the start of our function's
// statement (which includes the header). // statement (which includes the header).
// TODO: merge with line_offset_of_character_at_offset? // TODO: merge with line_offset_of_character_at_offset?
auto source_range = func_node->try_source_range(); assert(func_node->try_source_range() && "Function has no source range");
assert(source_range && "Function has no source range"); auto source_range = func_node->source_range();
uint32_t func_start = source_range->start; uint32_t func_start = source_range.start;
const wcstring &source = parsed_source->src; const wcstring &source = parsed_source->src();
assert(func_start <= source.size() && "function start out of bounds"); assert(func_start <= source.size() && "function start out of bounds");
return 1 + std::count(source.begin(), source.begin() + func_start, L'\n'); return 1 + std::count(source.begin(), source.begin() + func_start, L'\n');
} }

View file

@ -8,19 +8,20 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include "ast.h"
#include "common.h" #include "common.h"
#include "parse_tree.h" #include "parse_tree.h"
class parser_t; class parser_t;
namespace ast {
struct block_statement_t;
}
/// A function's constant properties. These do not change once initialized. /// A function's constant properties. These do not change once initialized.
struct function_properties_t { struct function_properties_t {
function_properties_t();
function_properties_t(const function_properties_t &other);
function_properties_t &operator=(const function_properties_t &other);
/// Parsed source containing the function. /// Parsed source containing the function.
parsed_source_ref_t parsed_source; rust::Box<parsed_source_ref_t> parsed_source;
/// Node containing the function statement, pointing into parsed_source. /// Node containing the function statement, pointing into parsed_source.
/// We store block_statement, not job_list, so that comments attached to the header are /// We store block_statement, not job_list, so that comments attached to the header are

View file

@ -26,6 +26,7 @@
#include "fallback.h" // IWYU pragma: keep #include "fallback.h" // IWYU pragma: keep
#include "function.h" #include "function.h"
#include "future_feature_flags.h" #include "future_feature_flags.h"
#include "highlight.rs.h"
#include "history.h" #include "history.h"
#include "maybe.h" #include "maybe.h"
#include "operation_context.h" #include "operation_context.h"
@ -331,7 +332,7 @@ static bool statement_get_expanded_command(const wcstring &src,
const ast::decorated_statement_t &stmt, const ast::decorated_statement_t &stmt,
const operation_context_t &ctx, wcstring *out_cmd) { const operation_context_t &ctx, wcstring *out_cmd) {
// Get the command. Try expanding it. If we cannot, it's an error. // Get the command. Try expanding it. If we cannot, it's an error.
maybe_t<wcstring> cmd = stmt.command.source(src); maybe_t<wcstring> cmd = stmt.command().source(src);
if (!cmd) return false; if (!cmd) return false;
expand_result_t err = expand_to_command_and_args(*cmd, ctx, out_cmd, nullptr); expand_result_t err = expand_to_command_and_args(*cmd, ctx, out_cmd, nullptr);
return err == expand_result_t::ok; return err == expand_result_t::ok;
@ -413,21 +414,21 @@ static bool has_expand_reserved(const wcstring &str) {
// command (as a string), if any. This is used to validate autosuggestions. // command (as a string), if any. This is used to validate autosuggestions.
static void autosuggest_parse_command(const wcstring &buff, const operation_context_t &ctx, static void autosuggest_parse_command(const wcstring &buff, const operation_context_t &ctx,
wcstring *out_expanded_command, wcstring *out_arg) { wcstring *out_expanded_command, wcstring *out_arg) {
auto ast = ast::ast_t::parse( auto ast =
buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens); ast_parse(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens);
// Find the first statement. // Find the first statement.
const ast::decorated_statement_t *first_statement = nullptr; const ast::decorated_statement_t *first_statement = nullptr;
if (const ast::job_conjunction_t *jc = ast.top()->as<ast::job_list_t>()->at(0)) { if (const ast::job_conjunction_t *jc = ast->top()->as_job_list().at(0)) {
first_statement = jc->job.statement.contents->try_as<ast::decorated_statement_t>(); first_statement = jc->job().statement().contents().ptr()->try_as_decorated_statement();
} }
if (first_statement && if (first_statement &&
statement_get_expanded_command(buff, *first_statement, ctx, out_expanded_command)) { statement_get_expanded_command(buff, *first_statement, ctx, out_expanded_command)) {
// Check if the first argument or redirection is, in fact, an argument. // Check if the first argument or redirection is, in fact, an argument.
if (const auto *arg_or_redir = first_statement->args_or_redirs.at(0)) { if (const auto *arg_or_redir = first_statement->args_or_redirs().at(0)) {
if (arg_or_redir && arg_or_redir->is_argument()) { if (arg_or_redir && arg_or_redir->is_argument()) {
*out_arg = arg_or_redir->argument().source(buff); *out_arg = *arg_or_redir->argument().source(buff);
} }
} }
} }
@ -776,83 +777,17 @@ static void color_string_internal(const wcstring &buffstr, highlight_spec_t base
} }
} }
namespace { highlighter_t::highlighter_t(const wcstring &str, maybe_t<size_t> cursor,
/// Syntax highlighter helper. const operation_context_t &ctx, wcstring wd, bool can_do_io)
class highlighter_t {
// The string we're highlighting. Note this is a reference member variable (to avoid copying)!
// We must not outlive this!
const wcstring &buff;
// The position of the cursor within the string.
const maybe_t<size_t> cursor;
// The operation context. Again, a reference member variable!
const operation_context_t &ctx;
// Whether it's OK to do I/O.
const bool io_ok;
// Working directory.
const wcstring working_directory;
// The ast we produced.
ast::ast_t ast;
// The resulting colors.
using color_array_t = std::vector<highlight_spec_t>;
color_array_t color_array;
// A stack of variables that the current commandline probably defines. We mark redirections
// as valid if they use one of these variables, to avoid marking valid targets as error.
std::vector<wcstring> pending_variables;
// Flags we use for AST parsing.
static constexpr parse_tree_flags_t ast_flags =
parse_flag_continue_after_error | parse_flag_include_comments |
parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated |
parse_flag_show_extra_semis;
bool io_still_ok() const { return io_ok && !ctx.check_cancel(); }
// Color a command.
void color_command(const ast::string_t &node);
// Color a node as if it were an argument.
void color_as_argument(const ast::node_t &node, bool options_allowed = true);
// Colors the source range of a node with a given color.
void color_node(const ast::node_t &node, highlight_spec_t color);
// Colors a range with a given color.
void color_range(source_range_t range, highlight_spec_t color);
/// \return a substring of our buffer.
wcstring get_source(source_range_t r) const;
public:
// Visit the children of a node.
void visit_children(const ast::node_t &node) {
ast::node_visitor(*this).accept_children_of(&node);
}
// AST visitor implementations.
void visit(const ast::keyword_base_t &kw);
void visit(const ast::token_base_t &tok);
void visit(const ast::redirection_t &redir);
void visit(const ast::variable_assignment_t &varas);
void visit(const ast::semi_nl_t &semi_nl);
void visit(const ast::decorated_statement_t &stmt);
void visit(const ast::block_statement_t &block);
// Visit an argument, perhaps knowing that our command is cd.
void visit(const ast::argument_t &arg, bool cmd_is_cd = false, bool options_allowed = true);
// Default implementation is to just visit children.
void visit(const ast::node_t &node) { visit_children(node); }
// Constructor
highlighter_t(const wcstring &str, maybe_t<size_t> cursor, const operation_context_t &ctx,
wcstring wd, bool can_do_io)
: buff(str), : buff(str),
cursor(cursor), cursor(cursor),
ctx(ctx), ctx(ctx),
io_ok(can_do_io), io_ok(can_do_io),
working_directory(std::move(wd)), working_directory(std::move(wd)),
ast(ast::ast_t::parse(buff, ast_flags)) {} ast(ast_parse(buff, ast_flags)),
highlighter(new_highlighter(*this, *ast)) {}
// Perform highlighting, returning an array of colors. bool highlighter_t::io_still_ok() const { return io_ok && !ctx.check_cancel(); }
color_array_t highlight();
};
wcstring highlighter_t::get_source(source_range_t r) const { wcstring highlighter_t::get_source(source_range_t r) const {
assert(r.start + r.length >= r.start && "Overflow"); assert(r.start + r.length >= r.start && "Overflow");
@ -961,9 +896,9 @@ static bool range_is_potential_path(const wcstring &src, const source_range_t &r
return result; return result;
} }
void highlighter_t::visit(const ast::keyword_base_t &kw) { void highlighter_t::visit_keyword(const ast::node_t *kw) {
highlight_role_t role = highlight_role_t::normal; highlight_role_t role = highlight_role_t::normal;
switch (kw.kw) { switch (kw->kw()) {
case parse_keyword_t::kw_begin: case parse_keyword_t::kw_begin:
case parse_keyword_t::kw_builtin: case parse_keyword_t::kw_builtin:
case parse_keyword_t::kw_case: case parse_keyword_t::kw_case:
@ -991,12 +926,12 @@ void highlighter_t::visit(const ast::keyword_base_t &kw) {
case parse_keyword_t::none: case parse_keyword_t::none:
break; break;
} }
color_node(kw, role); color_node(*kw, role);
} }
void highlighter_t::visit(const ast::token_base_t &tok) { void highlighter_t::visit_token(const ast::node_t *tok) {
maybe_t<highlight_role_t> role = highlight_role_t::normal; maybe_t<highlight_role_t> role = highlight_role_t::normal;
switch (tok.type) { switch (tok->token_type()) {
case parse_token_type_t::end: case parse_token_type_t::end:
case parse_token_type_t::pipe: case parse_token_type_t::pipe:
case parse_token_type_t::background: case parse_token_type_t::background:
@ -1017,15 +952,16 @@ void highlighter_t::visit(const ast::token_base_t &tok) {
default: default:
break; break;
} }
if (role) color_node(tok, *role); if (role) color_node(*tok, *role);
} }
void highlighter_t::visit(const ast::semi_nl_t &semi_nl) { void highlighter_t::visit_semi_nl(const ast::node_t *semi_nl) {
color_node(semi_nl, highlight_role_t::statement_terminator); color_node(*semi_nl, highlight_role_t::statement_terminator);
} }
void highlighter_t::visit(const ast::argument_t &arg, bool cmd_is_cd, bool options_allowed) { void highlighter_t::visit_argument(const void *arg_, bool cmd_is_cd, bool options_allowed) {
color_as_argument(arg, options_allowed); const auto &arg = *static_cast<const ast::argument_t *>(arg_);
color_as_argument(*arg.ptr(), options_allowed);
if (!io_still_ok()) { if (!io_still_ok()) {
return; return;
} }
@ -1034,7 +970,7 @@ void highlighter_t::visit(const ast::argument_t &arg, bool cmd_is_cd, bool optio
bool at_cursor = cursor.has_value() && arg.source_range().contains_inclusive(*cursor); bool at_cursor = cursor.has_value() && arg.source_range().contains_inclusive(*cursor);
if (cmd_is_cd) { if (cmd_is_cd) {
// Mark this as an error if it's not 'help' and not a valid cd path. // Mark this as an error if it's not 'help' and not a valid cd path.
wcstring param = arg.source(this->buff); wcstring param = *arg.source(this->buff);
if (expand_one(param, expand_flag::skip_cmdsubst, ctx)) { if (expand_one(param, expand_flag::skip_cmdsubst, ctx)) {
bool is_help = bool is_help =
string_prefixes_string(param, L"--help") || string_prefixes_string(param, L"-h"); string_prefixes_string(param, L"--help") || string_prefixes_string(param, L"-h");
@ -1042,45 +978,51 @@ void highlighter_t::visit(const ast::argument_t &arg, bool cmd_is_cd, bool optio
is_valid_path = is_potential_cd_path(param, at_cursor, working_directory, ctx, is_valid_path = is_potential_cd_path(param, at_cursor, working_directory, ctx,
PATH_EXPAND_TILDE); PATH_EXPAND_TILDE);
if (!is_valid_path) { if (!is_valid_path) {
this->color_node(arg, highlight_role_t::error); this->color_node(*arg.ptr(), highlight_role_t::error);
} }
} }
} }
} else if (range_is_potential_path(buff, arg.range, at_cursor, ctx, working_directory)) { } else if (range_is_potential_path(buff, arg.range(), at_cursor, ctx, working_directory)) {
is_valid_path = true; is_valid_path = true;
} }
if (is_valid_path) if (is_valid_path)
for (size_t i = arg.range.start, end = arg.range.start + arg.range.length; i < end; i++) for (size_t i = arg.range().start, end = arg.range().start + arg.range().length; i < end;
i++)
this->color_array.at(i).valid_path = true; this->color_array.at(i).valid_path = true;
} }
void highlighter_t::visit(const ast::variable_assignment_t &varas) { void highlighter_t::visit_variable_assignment(const void *varas_) {
color_as_argument(varas); const auto &varas = *static_cast<const ast::variable_assignment_t *>(varas_);
color_as_argument(*varas.ptr());
// Highlight the '=' in variable assignments as an operator. // Highlight the '=' in variable assignments as an operator.
auto where = variable_assignment_equals_pos(varas.source(this->buff)); auto where = variable_assignment_equals_pos(*varas.source(this->buff));
if (where) { if (where) {
size_t equals_loc = varas.source_range().start + *where; size_t equals_loc = varas.source_range().start + *where;
this->color_array.at(equals_loc) = highlight_role_t::operat; this->color_array.at(equals_loc) = highlight_role_t::operat;
auto var_name = varas.source(this->buff).substr(0, *where); auto var_name = varas.source(this->buff)->substr(0, *where);
this->pending_variables.push_back(std::move(var_name)); this->pending_variables.push_back(std::move(var_name));
} }
} }
void highlighter_t::visit(const ast::decorated_statement_t &stmt) { void highlighter_t::visit_decorated_statement(const void *stmt_) {
const auto &stmt = *static_cast<const ast::decorated_statement_t *>(stmt_);
// Color any decoration. // Color any decoration.
if (stmt.opt_decoration) this->visit(*stmt.opt_decoration); if (stmt.has_opt_decoration()) {
auto decoration = stmt.opt_decoration().ptr();
this->visit_keyword(&*decoration);
}
// Color the command's source code. // Color the command's source code.
// If we get no source back, there's nothing to color. // If we get no source back, there's nothing to color.
maybe_t<wcstring> cmd = stmt.command.try_source(this->buff); if (!stmt.command().try_source_range()) return;
if (!cmd.has_value()) return; wcstring cmd = *stmt.command().source(this->buff);
wcstring expanded_cmd; wcstring expanded_cmd;
bool is_valid_cmd = false; bool is_valid_cmd = false;
if (!this->io_still_ok()) { if (!this->io_still_ok()) {
// We cannot check if the command is invalid, so just assume it's valid. // We cannot check if the command is invalid, so just assume it's valid.
is_valid_cmd = true; is_valid_cmd = true;
} else if (variable_assignment_equals_pos(*cmd)) { } else if (variable_assignment_equals_pos(cmd)) {
is_valid_cmd = true; is_valid_cmd = true;
} else { } else {
// Check to see if the command is valid. // Check to see if the command is valid.
@ -1094,9 +1036,9 @@ void highlighter_t::visit(const ast::decorated_statement_t &stmt) {
// Color our statement. // Color our statement.
if (is_valid_cmd) { if (is_valid_cmd) {
this->color_command(stmt.command); this->color_command(stmt.command());
} else { } else {
this->color_node(stmt.command, highlight_role_t::error); this->color_node(*stmt.command().ptr(), highlight_role_t::error);
} }
// Color arguments and redirections. // Color arguments and redirections.
@ -1105,34 +1047,36 @@ void highlighter_t::visit(const ast::decorated_statement_t &stmt) {
bool is_set = (expanded_cmd == L"set"); bool is_set = (expanded_cmd == L"set");
// If we have seen a "--" argument, color all options from then on as normal arguments. // If we have seen a "--" argument, color all options from then on as normal arguments.
bool have_dashdash = false; bool have_dashdash = false;
for (const ast::argument_or_redirection_t &v : stmt.args_or_redirs) { for (size_t i = 0; i < stmt.args_or_redirs().count(); i++) {
const auto &v = *stmt.args_or_redirs().at(i);
if (v.is_argument()) { if (v.is_argument()) {
if (is_set) { if (is_set) {
auto arg = v.argument().source(this->buff); auto arg = *v.argument().source(this->buff);
if (valid_var_name(arg)) { if (valid_var_name(arg)) {
this->pending_variables.push_back(std::move(arg)); this->pending_variables.push_back(std::move(arg));
is_set = false; is_set = false;
} }
} }
this->visit(v.argument(), is_cd, !have_dashdash); this->visit_argument(&v.argument(), is_cd, !have_dashdash);
if (v.argument().source(this->buff) == L"--") have_dashdash = true; if (*v.argument().source(this->buff) == L"--") have_dashdash = true;
} else { } else {
this->visit(v.redirection()); this->visit_redirection(&v.redirection());
} }
} }
} }
void highlighter_t::visit(const ast::block_statement_t &block) { size_t highlighter_t::visit_block_statement1(const void *block_) {
this->visit(*block.header.contents.get()); const auto &block = *static_cast<const ast::block_statement_t *>(block_);
this->visit(block.args_or_redirs); auto bh = block.header().ptr();
const ast::node_t &bh = *block.header.contents;
size_t pending_variables_count = this->pending_variables.size(); size_t pending_variables_count = this->pending_variables.size();
if (const auto *fh = bh.try_as<ast::for_header_t>()) { if (const auto *fh = bh->try_as_for_header()) {
auto var_name = fh->var_name.source(this->buff); auto var_name = *fh->var_name().source(this->buff);
pending_variables.push_back(std::move(var_name)); pending_variables.push_back(std::move(var_name));
} }
this->visit(block.jobs); return pending_variables_count;
this->visit(block.end); }
void highlighter_t::visit_block_statement2(size_t pending_variables_count) {
pending_variables.resize(pending_variables_count); pending_variables.resize(pending_variables_count);
} }
@ -1158,9 +1102,10 @@ static bool contains_pending_variable(const std::vector<wcstring> &pending_varia
return false; return false;
} }
void highlighter_t::visit(const ast::redirection_t &redir) { void highlighter_t::visit_redirection(const void *redir_) {
auto oper = pipe_or_redir_from_string(redir.oper.source(this->buff).c_str()); // like 2> const auto &redir = *static_cast<const ast::redirection_t *>(redir_);
wcstring target = redir.target.source(this->buff); // like &1 or file path auto oper = pipe_or_redir_from_string(redir.oper().source(this->buff)->c_str()); // like 2>
wcstring target = *redir.target().source(this->buff); // like &1 or file path
assert(oper && "Should have successfully parsed a pipe_or_redir_t since it was in our ast"); assert(oper && "Should have successfully parsed a pipe_or_redir_t since it was in our ast");
@ -1168,18 +1113,18 @@ void highlighter_t::visit(const ast::redirection_t &redir) {
// It may have parsed successfully yet still be invalid (e.g. 9999999999999>&1) // It may have parsed successfully yet still be invalid (e.g. 9999999999999>&1)
// If so, color the whole thing invalid and stop. // If so, color the whole thing invalid and stop.
if (!oper->is_valid()) { if (!oper->is_valid()) {
this->color_node(redir, highlight_role_t::error); this->color_node(*redir.ptr(), highlight_role_t::error);
return; return;
} }
// Color the operator part like 2>. // Color the operator part like 2>.
this->color_node(redir.oper, highlight_role_t::redirection); this->color_node(*redir.oper().ptr(), highlight_role_t::redirection);
// Color the target part. // Color the target part.
// Check if the argument contains a command substitution. If so, highlight it as a param // Check if the argument contains a command substitution. If so, highlight it as a param
// even though it's a command redirection, and don't try to do any other validation. // even though it's a command redirection, and don't try to do any other validation.
if (has_cmdsub(target)) { if (has_cmdsub(target)) {
this->color_as_argument(redir.target); this->color_as_argument(*redir.target().ptr());
} else { } else {
// No command substitution, so we can highlight the target file or fd. For example, // No command substitution, so we can highlight the target file or fd. For example,
// disallow redirections into a non-existent directory. // disallow redirections into a non-existent directory.
@ -1266,7 +1211,7 @@ void highlighter_t::visit(const ast::redirection_t &redir) {
} }
} }
} }
this->color_node(redir.target, this->color_node(*redir.target().ptr(),
target_is_valid ? highlight_role_t::redirection : highlight_role_t::error); target_is_valid ? highlight_role_t::redirection : highlight_role_t::error);
} }
} }
@ -1280,28 +1225,27 @@ highlighter_t::color_array_t highlighter_t::highlight() {
this->color_array.resize(this->buff.size()); this->color_array.resize(this->buff.size());
std::fill(this->color_array.begin(), this->color_array.end(), highlight_spec_t{}); std::fill(this->color_array.begin(), this->color_array.end(), highlight_spec_t{});
this->visit_children(*ast.top()); this->highlighter->visit_children(*ast->top());
if (ctx.check_cancel()) return std::move(color_array); if (ctx.check_cancel()) return std::move(color_array);
// Color every comment. // Color every comment.
const auto &extras = ast.extras(); auto extras = ast->extras();
for (const source_range_t &r : extras.comments) { for (const source_range_t &r : extras->comments()) {
this->color_range(r, highlight_role_t::comment); this->color_range(r, highlight_role_t::comment);
} }
// Color every extra semi. // Color every extra semi.
for (const source_range_t &r : extras.semis) { for (const source_range_t &r : extras->semis()) {
this->color_range(r, highlight_role_t::statement_terminator); this->color_range(r, highlight_role_t::statement_terminator);
} }
// Color every error range. // Color every error range.
for (const source_range_t &r : extras.errors) { for (const source_range_t &r : extras->errors()) {
this->color_range(r, highlight_role_t::error); this->color_range(r, highlight_role_t::error);
} }
return std::move(color_array); return std::move(color_array);
} }
} // namespace
/// Determine if a command is valid. /// Determine if a command is valid.
static bool command_is_valid(const wcstring &cmd, statement_decoration_t decoration, static bool command_is_valid(const wcstring &cmd, statement_decoration_t decoration,

View file

@ -11,10 +11,14 @@
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "ast.h"
#include "color.h" #include "color.h"
#include "cxx.h"
#include "flog.h" #include "flog.h"
#include "maybe.h" #include "maybe.h"
struct Highlighter;
class environment_t; class environment_t;
/// Describes the role of a span of text. /// Describes the role of a span of text.
@ -156,4 +160,76 @@ bool is_potential_path(const wcstring &potential_path_fragment, bool at_cursor,
const wcstring_list_t &directories, const operation_context_t &ctx, const wcstring_list_t &directories, const operation_context_t &ctx,
path_flags_t flags); path_flags_t flags);
/// Syntax highlighter helper.
class highlighter_t {
// The string we're highlighting. Note this is a reference member variable (to avoid copying)!
// We must not outlive this!
const wcstring &buff;
// The position of the cursor within the string.
const maybe_t<size_t> cursor;
// The operation context. Again, a reference member variable!
const operation_context_t &ctx;
// Whether it's OK to do I/O.
const bool io_ok;
// Working directory.
const wcstring working_directory;
// The ast we produced.
rust::Box<Ast> ast;
rust::Box<Highlighter> highlighter;
// The resulting colors.
using color_array_t = std::vector<highlight_spec_t>;
color_array_t color_array;
// A stack of variables that the current commandline probably defines. We mark redirections
// as valid if they use one of these variables, to avoid marking valid targets as error.
std::vector<wcstring> pending_variables;
// Flags we use for AST parsing.
static constexpr parse_tree_flags_t ast_flags =
parse_flag_continue_after_error | parse_flag_include_comments |
parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated |
parse_flag_show_extra_semis;
bool io_still_ok() const;
#if INCLUDE_RUST_HEADERS
// Declaring methods with forward-declared opaque Rust types like "ast::node_t" will cause
// undefined reference errors.
// Color a command.
void color_command(const ast::string_t &node);
// Color a node as if it were an argument.
void color_as_argument(const ast::node_t &node, bool options_allowed = true);
// Colors the source range of a node with a given color.
void color_node(const ast::node_t &node, highlight_spec_t color);
// Colors a range with a given color.
void color_range(source_range_t range, highlight_spec_t color);
#endif
public:
/// \return a substring of our buffer.
wcstring get_source(source_range_t r) const;
// AST visitor implementations.
void visit_keyword(const ast::node_t *kw);
void visit_token(const ast::node_t *tok);
void visit_argument(const void *arg, bool cmd_is_cd, bool options_allowed);
void visit_redirection(const void *redir);
void visit_variable_assignment(const void *varas);
void visit_semi_nl(const ast::node_t *semi_nl);
void visit_decorated_statement(const void *stmt);
size_t visit_block_statement1(const void *block);
void visit_block_statement2(size_t pending_variables_count);
#if INCLUDE_RUST_HEADERS
// Visit an argument, perhaps knowing that our command is cd.
void visit(const ast::argument_t &arg, bool cmd_is_cd = false, bool options_allowed = true);
#endif
// Constructor
highlighter_t(const wcstring &str, maybe_t<size_t> cursor, const operation_context_t &ctx,
wcstring wd, bool can_do_io);
// Perform highlighting, returning an array of colors.
color_array_t highlight();
};
#endif #endif

View file

@ -1202,7 +1202,7 @@ static bool should_import_bash_history_line(const wcstring &line) {
// "<<" here is a proxy for heredocs (and herestrings). // "<<" here is a proxy for heredocs (and herestrings).
if (line.find(L"<<") != std::string::npos) return false; if (line.find(L"<<") != std::string::npos) return false;
if (ast::ast_t::parse(line).errored()) return false; if (ast_parse(line)->errored()) return false;
// In doing this test do not allow incomplete strings. Hence the "false" argument. // In doing this test do not allow incomplete strings. Hence the "false" argument.
auto errors = new_parse_error_list(); auto errors = new_parse_error_list();
@ -1396,16 +1396,18 @@ void history_t::add_pending_with_file_detection(const std::shared_ptr<history_t>
// Find all arguments that look like they could be file paths. // Find all arguments that look like they could be file paths.
bool needs_sync_write = false; bool needs_sync_write = false;
using namespace ast; using namespace ast;
auto ast = ast_t::parse(str); auto ast = ast_parse(str);
path_list_t potential_paths; path_list_t potential_paths;
for (const node_t &node : ast) { for (auto ast_traversal = new_ast_traversal(*ast->top());;) {
if (const argument_t *arg = node.try_as<argument_t>()) { auto node = ast_traversal->next();
wcstring potential_path = arg->source(str); if (!node->has_value()) break;
if (const argument_t *arg = node->try_as_argument()) {
wcstring potential_path = *arg->source(str);
if (string_could_be_path(potential_path)) { if (string_could_be_path(potential_path)) {
potential_paths.push_back(std::move(potential_path)); potential_paths.push_back(std::move(potential_path));
} }
} else if (const decorated_statement_t *stmt = node.try_as<decorated_statement_t>()) { } else if (const decorated_statement_t *stmt = node->try_as_decorated_statement()) {
// Hack hack hack - if the command is likely to trigger an exit, then don't do // Hack hack hack - if the command is likely to trigger an exit, then don't do
// background file detection, because we won't be able to write it to our history file // background file detection, because we won't be able to write it to our history file
// before we exit. // before we exit.
@ -1416,7 +1418,7 @@ void history_t::add_pending_with_file_detection(const std::shared_ptr<history_t>
needs_sync_write = true; needs_sync_write = true;
} }
wcstring command = stmt->command.source(str); wcstring command = *stmt->command().source(str);
unescape_string_in_place(&command, UNESCAPE_DEFAULT); unescape_string_in_place(&command, UNESCAPE_DEFAULT);
if (command == L"exit" || command == L"reboot" || command == L"restart" || if (command == L"exit" || command == L"reboot" || command == L"restart" ||
command == L"echo") { command == L"echo") {

View file

@ -53,37 +53,39 @@ static constexpr bool type_is_redirectable_block(ast::type_t type) {
} }
static bool specific_statement_type_is_redirectable_block(const ast::node_t &node) { static bool specific_statement_type_is_redirectable_block(const ast::node_t &node) {
return type_is_redirectable_block(node.type); return type_is_redirectable_block(node.typ());
} }
/// Get the name of a redirectable block, for profiling purposes. /// Get the name of a redirectable block, for profiling purposes.
static wcstring profiling_cmd_name_for_redirectable_block(const ast::node_t &node, static wcstring profiling_cmd_name_for_redirectable_block(const ast::node_t &node,
const parsed_source_t &pstree) { const parsed_source_ref_t &pstree) {
using namespace ast; using namespace ast;
assert(specific_statement_type_is_redirectable_block(node)); assert(specific_statement_type_is_redirectable_block(node));
auto source_range = node.try_source_range(); assert(node.try_source_range() && "No source range for block");
assert(source_range.has_value() && "No source range for block"); auto source_range = node.source_range();
size_t src_end = 0; size_t src_end = 0;
switch (node.type) { switch (node.typ()) {
case type_t::block_statement: { case type_t::block_statement: {
const node_t *block_header = node.as<block_statement_t>()->header.get(); auto block_header = node.as_block_statement().header().ptr();
switch (block_header->type) { switch (block_header->typ()) {
case type_t::for_header: case type_t::for_header:
src_end = block_header->as<for_header_t>()->semi_nl.source_range().start; src_end = block_header->as_for_header().semi_nl().source_range().start;
break; break;
case type_t::while_header: case type_t::while_header:
src_end = block_header->as<while_header_t>()->condition.source_range().end(); src_end =
block_header->as_while_header().condition().ptr()->source_range().end();
break; break;
case type_t::function_header: case type_t::function_header:
src_end = block_header->as<function_header_t>()->semi_nl.source_range().start; src_end = block_header->as_function_header().semi_nl().source_range().start;
break; break;
case type_t::begin_header: case type_t::begin_header:
src_end = block_header->as<begin_header_t>()->kw_begin.source_range().end(); src_end =
block_header->as_begin_header().kw_begin().ptr()->source_range().end();
break; break;
default: default:
@ -92,11 +94,12 @@ static wcstring profiling_cmd_name_for_redirectable_block(const ast::node_t &nod
} break; } break;
case type_t::if_statement: case type_t::if_statement:
src_end = node.as<if_statement_t>()->if_clause.condition.job.source_range().end(); src_end =
node.as_if_statement().if_clause().condition().job().ptr()->source_range().end();
break; break;
case type_t::switch_statement: case type_t::switch_statement:
src_end = node.as<switch_statement_t>()->semi_nl.source_range().start; src_end = node.as_switch_statement().semi_nl().source_range().start;
break; break;
default: default:
@ -104,10 +107,10 @@ static wcstring profiling_cmd_name_for_redirectable_block(const ast::node_t &nod
break; break;
} }
assert(src_end >= source_range->start && "Invalid source end"); assert(src_end >= source_range.start && "Invalid source end");
// Get the source for the block, and cut it at the next statement terminator. // Get the source for the block, and cut it at the next statement terminator.
wcstring result = pstree.src.substr(source_range->start, src_end - source_range->start); wcstring result = pstree.src().substr(source_range.start, src_end - source_range.start);
result.append(L"..."); result.append(L"...");
return result; return result;
} }
@ -118,7 +121,7 @@ static rust::Box<redirection_spec_t> get_stderr_merge() {
return new_redirection_spec(STDERR_FILENO, redirection_mode_t::fd, stdout_fileno_str); return new_redirection_spec(STDERR_FILENO, redirection_mode_t::fd, stdout_fileno_str);
} }
parse_execution_context_t::parse_execution_context_t(parsed_source_ref_t pstree, parse_execution_context_t::parse_execution_context_t(rust::Box<parsed_source_ref_t> pstree,
const operation_context_t &ctx, const operation_context_t &ctx,
io_chain_t block_io) io_chain_t block_io)
: pstree(std::move(pstree)), : pstree(std::move(pstree)),
@ -129,7 +132,7 @@ parse_execution_context_t::parse_execution_context_t(parsed_source_ref_t pstree,
// Utilities // Utilities
wcstring parse_execution_context_t::get_source(const ast::node_t &node) const { wcstring parse_execution_context_t::get_source(const ast::node_t &node) const {
return node.source(pstree->src); return *node.source(pstree->src());
} }
const ast::decorated_statement_t * const ast::decorated_statement_t *
@ -151,14 +154,16 @@ parse_execution_context_t::infinite_recursive_statement_in_job_list(const ast::j
// Get the first job in the job list. // Get the first job in the job list.
const ast::job_conjunction_t *jc = jobs.at(0); const ast::job_conjunction_t *jc = jobs.at(0);
if (!jc) return nullptr; if (!jc) return nullptr;
const ast::job_pipeline_t *job = &jc->job; const ast::job_pipeline_t *job = &jc->job();
// Helper to return if a statement is infinitely recursive in this function. // Helper to return if a statement is infinitely recursive in this function.
auto statement_recurses = auto statement_recurses =
[&](const ast::statement_t &stat) -> const ast::decorated_statement_t * { [&](const ast::statement_t &stat) -> const ast::decorated_statement_t * {
// Ignore non-decorated statements like `if`, etc. // Ignore non-decorated statements like `if`, etc.
const ast::decorated_statement_t *dc = const ast::decorated_statement_t *dc =
stat.contents.contents->try_as<ast::decorated_statement_t>(); stat.contents().ptr()->try_as_decorated_statement()
? &stat.contents().ptr()->as_decorated_statement()
: nullptr;
if (!dc) return nullptr; if (!dc) return nullptr;
// Ignore statements with decorations like 'builtin' or 'command', since those // Ignore statements with decorations like 'builtin' or 'command', since those
@ -166,7 +171,7 @@ parse_execution_context_t::infinite_recursive_statement_in_job_list(const ast::j
if (dc->decoration() != statement_decoration_t::none) return nullptr; if (dc->decoration() != statement_decoration_t::none) return nullptr;
// Check the command. // Check the command.
wcstring cmd = dc->command.source(pstree->src); wcstring cmd = *dc->command().source(pstree->src());
bool forbidden = bool forbidden =
!cmd.empty() && !cmd.empty() &&
expand_one(cmd, {expand_flag::skip_cmdsubst, expand_flag::skip_variables}, ctx) && expand_one(cmd, {expand_flag::skip_cmdsubst, expand_flag::skip_variables}, ctx) &&
@ -177,12 +182,13 @@ parse_execution_context_t::infinite_recursive_statement_in_job_list(const ast::j
const ast::decorated_statement_t *infinite_recursive_statement = nullptr; const ast::decorated_statement_t *infinite_recursive_statement = nullptr;
// Check main statement. // Check main statement.
infinite_recursive_statement = statement_recurses(jc->job.statement); infinite_recursive_statement = statement_recurses(jc->job().statement());
// Check piped remainder. // Check piped remainder.
if (!infinite_recursive_statement) { if (!infinite_recursive_statement) {
for (const ast::job_continuation_t &c : job->continuation) { for (size_t i = 0; i < job->continuation().count(); i++) {
if (const auto *s = statement_recurses(c.statement)) { const ast::job_continuation_t &c = *job->continuation().at(i);
if (const auto *s = statement_recurses(c.statement())) {
infinite_recursive_statement = s; infinite_recursive_statement = s;
break; break;
} }
@ -249,13 +255,14 @@ maybe_t<end_execution_reason_t> parse_execution_context_t::check_end_execution()
bool parse_execution_context_t::job_is_simple_block(const ast::job_pipeline_t &job) const { bool parse_execution_context_t::job_is_simple_block(const ast::job_pipeline_t &job) const {
using namespace ast; using namespace ast;
// Must be no pipes. // Must be no pipes.
if (!job.continuation.empty()) { if (!job.continuation().empty()) {
return false; return false;
} }
// Helper to check if an argument_or_redirection_list_t has no redirections. // Helper to check if an argument_or_redirection_list_t has no redirections.
auto no_redirs = [](const argument_or_redirection_list_t &list) -> bool { auto no_redirs = [](const argument_or_redirection_list_t &list) -> bool {
for (const argument_or_redirection_t &val : list) { for (size_t i = 0; i < list.count(); i++) {
const argument_or_redirection_t &val = *list.at(i);
if (val.is_redirection()) return false; if (val.is_redirection()) return false;
} }
return true; return true;
@ -263,14 +270,14 @@ bool parse_execution_context_t::job_is_simple_block(const ast::job_pipeline_t &j
// Check if we're a block statement with redirections. We do it this obnoxious way to preserve // Check if we're a block statement with redirections. We do it this obnoxious way to preserve
// type safety (in case we add more specific statement types). // type safety (in case we add more specific statement types).
const node_t &ss = *job.statement.contents.contents; const auto ss = job.statement().contents().ptr();
switch (ss.type) { switch (ss->typ()) {
case type_t::block_statement: case type_t::block_statement:
return no_redirs(ss.as<block_statement_t>()->args_or_redirs); return no_redirs(ss->as_block_statement().args_or_redirs());
case type_t::switch_statement: case type_t::switch_statement:
return no_redirs(ss.as<switch_statement_t>()->args_or_redirs); return no_redirs(ss->as_switch_statement().args_or_redirs());
case type_t::if_statement: case type_t::if_statement:
return no_redirs(ss.as<if_statement_t>()->args_or_redirs); return no_redirs(ss->as_if_statement().args_or_redirs());
case type_t::not_statement: case type_t::not_statement:
case type_t::decorated_statement: case type_t::decorated_statement:
// not block statements // not block statements
@ -290,10 +297,10 @@ end_execution_reason_t parse_execution_context_t::run_if_statement(
// We have a sequence of if clauses, with a final else, resulting in a single job list that we // We have a sequence of if clauses, with a final else, resulting in a single job list that we
// execute. // execute.
const job_list_t *job_list_to_execute = nullptr; const job_list_t *job_list_to_execute = nullptr;
const if_clause_t *if_clause = &statement.if_clause; const if_clause_t *if_clause = &statement.if_clause();
// Index of the *next* elseif_clause to test. // Index of the *next* elseif_clause to test.
const elseif_clause_list_t &elseif_clauses = statement.elseif_clauses; const elseif_clause_list_t &elseif_clauses = statement.elseif_clauses();
size_t next_elseif_idx = 0; size_t next_elseif_idx = 0;
// We start with the 'if'. // We start with the 'if'.
@ -309,16 +316,16 @@ end_execution_reason_t parse_execution_context_t::run_if_statement(
// Check the condition and the tail. We treat end_execution_reason_t::error here as failure, // Check the condition and the tail. We treat end_execution_reason_t::error here as failure,
// in accordance with historic behavior. // in accordance with historic behavior.
end_execution_reason_t cond_ret = end_execution_reason_t cond_ret =
run_job_conjunction(if_clause->condition, associated_block); run_job_conjunction(if_clause->condition(), associated_block);
if (cond_ret == end_execution_reason_t::ok) { if (cond_ret == end_execution_reason_t::ok) {
cond_ret = run_job_list(if_clause->andor_tail, associated_block); cond_ret = run_job_list(if_clause->andor_tail(), associated_block);
} }
const bool take_branch = const bool take_branch =
(cond_ret == end_execution_reason_t::ok) && parser->get_last_status() == EXIT_SUCCESS; (cond_ret == end_execution_reason_t::ok) && parser->get_last_status() == EXIT_SUCCESS;
if (take_branch) { if (take_branch) {
// Condition succeeded. // Condition succeeded.
job_list_to_execute = &if_clause->body; job_list_to_execute = &if_clause->body();
break; break;
} }
@ -326,7 +333,7 @@ end_execution_reason_t parse_execution_context_t::run_if_statement(
const auto *elseif_clause = elseif_clauses.at(next_elseif_idx++); const auto *elseif_clause = elseif_clauses.at(next_elseif_idx++);
if (elseif_clause) { if (elseif_clause) {
trace_if_enabled(*parser, L"else if"); trace_if_enabled(*parser, L"else if");
if_clause = &elseif_clause->if_clause; if_clause = &elseif_clause->if_clause();
} else { } else {
break; break;
} }
@ -335,9 +342,9 @@ end_execution_reason_t parse_execution_context_t::run_if_statement(
if (!job_list_to_execute) { if (!job_list_to_execute) {
// our ifs and elseifs failed. // our ifs and elseifs failed.
// Check our else body. // Check our else body.
if (statement.else_clause) { if (statement.has_else_clause()) {
trace_if_enabled(*parser, L"else"); trace_if_enabled(*parser, L"else");
job_list_to_execute = &statement.else_clause->body; job_list_to_execute = &statement.else_clause().body();
} }
} }
@ -382,8 +389,8 @@ end_execution_reason_t parse_execution_context_t::run_function_statement(
using namespace ast; using namespace ast;
// Get arguments. // Get arguments.
wcstring_list_t arguments; wcstring_list_t arguments;
ast_args_list_t arg_nodes = get_argument_nodes(header.args); ast_args_list_t arg_nodes = get_argument_nodes(header.args());
arg_nodes.insert(arg_nodes.begin(), &header.first_arg); arg_nodes.insert(arg_nodes.begin(), &header.first_arg());
end_execution_reason_t result = end_execution_reason_t result =
this->expand_arguments_from_nodes(arg_nodes, &arguments, failglob); this->expand_arguments_from_nodes(arg_nodes, &arguments, failglob);
@ -395,32 +402,32 @@ end_execution_reason_t parse_execution_context_t::run_function_statement(
null_output_stream_t outs; null_output_stream_t outs;
string_output_stream_t errs; string_output_stream_t errs;
io_streams_t streams(outs, errs); io_streams_t streams(outs, errs);
int err_code = builtin_function(*parser, streams, arguments, pstree, statement); int err_code = builtin_function(*parser, streams, arguments, *pstree, statement);
parser->libdata().status_count++; parser->libdata().status_count++;
parser->set_last_statuses(statuses_t::just(err_code)); parser->set_last_statuses(statuses_t::just(err_code));
const wcstring &errtext = errs.contents(); const wcstring &errtext = errs.contents();
if (!errtext.empty()) { if (!errtext.empty()) {
return this->report_error(err_code, header, L"%ls", errtext.c_str()); return this->report_error(err_code, *header.ptr(), L"%ls", errtext.c_str());
} }
return result; return result;
} }
end_execution_reason_t parse_execution_context_t::run_block_statement( end_execution_reason_t parse_execution_context_t::run_block_statement(
const ast::block_statement_t &statement, const block_t *associated_block) { const ast::block_statement_t &statement, const block_t *associated_block) {
const ast::node_t &bh = *statement.header.contents; auto bh = statement.header().ptr();
const ast::job_list_t &contents = statement.jobs; const ast::job_list_t &contents = statement.jobs();
end_execution_reason_t ret = end_execution_reason_t::ok; end_execution_reason_t ret = end_execution_reason_t::ok;
if (const auto *fh = bh.try_as<ast::for_header_t>()) { if (const auto *fh = bh->try_as_for_header()) {
ret = run_for_statement(*fh, contents); ret = run_for_statement(*fh, contents);
} else if (const auto *wh = bh.try_as<ast::while_header_t>()) { } else if (const auto *wh = bh->try_as_while_header()) {
ret = run_while_statement(*wh, contents, associated_block); ret = run_while_statement(*wh, contents, associated_block);
} else if (const auto *fh = bh.try_as<ast::function_header_t>()) { } else if (const auto *fh = bh->try_as_function_header()) {
ret = run_function_statement(statement, *fh); ret = run_function_statement(statement, *fh);
} else if (bh.try_as<ast::begin_header_t>()) { } else if (bh->try_as_begin_header()) {
ret = run_begin_statement(contents); ret = run_begin_statement(contents);
} else { } else {
FLOGF(error, L"Unexpected block header: %ls\n", bh.describe().c_str()); FLOGF(error, L"Unexpected block header: %ls\n", bh->describe()->c_str());
PARSER_DIE(); PARSER_DIE();
} }
return ret; return ret;
@ -430,20 +437,20 @@ end_execution_reason_t parse_execution_context_t::run_for_statement(
const ast::for_header_t &header, const ast::job_list_t &block_contents) { const ast::for_header_t &header, const ast::job_list_t &block_contents) {
// Get the variable name: `for var_name in ...`. We expand the variable name. It better result // Get the variable name: `for var_name in ...`. We expand the variable name. It better result
// in just one. // in just one.
wcstring for_var_name = header.var_name.source(get_source()); wcstring for_var_name = *header.var_name().source(get_source());
if (!expand_one(for_var_name, expand_flags_t{}, ctx)) { if (!expand_one(for_var_name, expand_flags_t{}, ctx)) {
return report_error(STATUS_EXPAND_ERROR, header.var_name, return report_error(STATUS_EXPAND_ERROR, *header.var_name().ptr(),
FAILED_EXPANSION_VARIABLE_NAME_ERR_MSG, for_var_name.c_str()); FAILED_EXPANSION_VARIABLE_NAME_ERR_MSG, for_var_name.c_str());
} }
if (!valid_var_name(for_var_name)) { if (!valid_var_name(for_var_name)) {
return report_error(STATUS_INVALID_ARGS, header.var_name, BUILTIN_ERR_VARNAME, L"for", return report_error(STATUS_INVALID_ARGS, *header.var_name().ptr(), BUILTIN_ERR_VARNAME,
for_var_name.c_str()); L"for", for_var_name.c_str());
} }
// Get the contents to iterate over. // Get the contents to iterate over.
wcstring_list_t arguments; wcstring_list_t arguments;
ast_args_list_t arg_nodes = get_argument_nodes(header.args); ast_args_list_t arg_nodes = get_argument_nodes(header.args());
end_execution_reason_t ret = this->expand_arguments_from_nodes(arg_nodes, &arguments, nullglob); end_execution_reason_t ret = this->expand_arguments_from_nodes(arg_nodes, &arguments, nullglob);
if (ret != end_execution_reason_t::ok) { if (ret != end_execution_reason_t::ok) {
return ret; return ret;
@ -451,7 +458,7 @@ end_execution_reason_t parse_execution_context_t::run_for_statement(
auto var = parser->vars().get(for_var_name, ENV_DEFAULT); auto var = parser->vars().get(for_var_name, ENV_DEFAULT);
if (env_var_t::flags_for(for_var_name.c_str()) & env_var_t::flag_read_only) { if (env_var_t::flags_for(for_var_name.c_str()) & env_var_t::flag_read_only) {
return report_error(STATUS_INVALID_ARGS, header.var_name, return report_error(STATUS_INVALID_ARGS, *header.var_name().ptr(),
_(L"%ls: %ls: cannot overwrite read-only variable"), L"for", _(L"%ls: %ls: cannot overwrite read-only variable"), L"for",
for_var_name.c_str()); for_var_name.c_str());
} }
@ -501,14 +508,14 @@ end_execution_reason_t parse_execution_context_t::run_for_statement(
end_execution_reason_t parse_execution_context_t::run_switch_statement( end_execution_reason_t parse_execution_context_t::run_switch_statement(
const ast::switch_statement_t &statement) { const ast::switch_statement_t &statement) {
// Get the switch variable. // Get the switch variable.
const wcstring switch_value = get_source(statement.argument); const wcstring switch_value = get_source(*statement.argument().ptr());
// Expand it. We need to offset any errors by the position of the string. // Expand it. We need to offset any errors by the position of the string.
completion_list_t switch_values_expanded; completion_list_t switch_values_expanded;
auto errors = new_parse_error_list(); auto errors = new_parse_error_list();
auto expand_ret = auto expand_ret =
expand_string(switch_value, &switch_values_expanded, expand_flags_t{}, ctx, &*errors); expand_string(switch_value, &switch_values_expanded, expand_flags_t{}, ctx, &*errors);
errors->offset_source_start(statement.argument.range.start); errors->offset_source_start(statement.argument().range().start);
switch (expand_ret.result) { switch (expand_ret.result) {
case expand_result_t::error: case expand_result_t::error:
@ -518,12 +525,12 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement(
return end_execution_reason_t::cancelled; return end_execution_reason_t::cancelled;
case expand_result_t::wildcard_no_match: case expand_result_t::wildcard_no_match:
return report_error(STATUS_UNMATCHED_WILDCARD, statement.argument, WILDCARD_ERR_MSG, return report_error(STATUS_UNMATCHED_WILDCARD, *statement.argument().ptr(),
get_source(statement.argument).c_str()); WILDCARD_ERR_MSG, get_source(*statement.argument().ptr()).c_str());
case expand_result_t::ok: case expand_result_t::ok:
if (switch_values_expanded.size() > 1) { if (switch_values_expanded.size() > 1) {
return report_error(STATUS_INVALID_ARGS, statement.argument, return report_error(STATUS_INVALID_ARGS, *statement.argument().ptr(),
_(L"switch: Expected at most one argument, got %lu\n"), _(L"switch: Expected at most one argument, got %lu\n"),
switch_values_expanded.size()); switch_values_expanded.size());
} }
@ -544,7 +551,8 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement(
// Expand case statements. // Expand case statements.
const ast::case_item_t *matching_case_item = nullptr; const ast::case_item_t *matching_case_item = nullptr;
for (const ast::case_item_t &case_item : statement.cases) { for (size_t i = 0; i < statement.cases().count(); i++) {
const ast::case_item_t &case_item = *statement.cases().at(i);
if (auto ret = check_end_execution()) { if (auto ret = check_end_execution()) {
result = *ret; result = *ret;
break; break;
@ -553,7 +561,7 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement(
// Expand arguments. A case item list may have a wildcard that fails to expand to // Expand arguments. A case item list may have a wildcard that fails to expand to
// anything. We also report case errors, but don't stop execution; i.e. a case item that // anything. We also report case errors, but don't stop execution; i.e. a case item that
// contains an unexpandable process will report and then fail to match. // contains an unexpandable process will report and then fail to match.
ast_args_list_t arg_nodes = get_argument_nodes(case_item.arguments); ast_args_list_t arg_nodes = get_argument_nodes(case_item.arguments());
wcstring_list_t case_args; wcstring_list_t case_args;
end_execution_reason_t case_result = end_execution_reason_t case_result =
this->expand_arguments_from_nodes(arg_nodes, &case_args, failglob); this->expand_arguments_from_nodes(arg_nodes, &case_args, failglob);
@ -576,7 +584,7 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement(
if (matching_case_item) { if (matching_case_item) {
// Success, evaluate the job list. // Success, evaluate the job list.
assert(result == end_execution_reason_t::ok && "Expected success"); assert(result == end_execution_reason_t::ok && "Expected success");
result = this->run_job_list(matching_case_item->body, sb); result = this->run_job_list(matching_case_item->body(), sb);
} }
parser->pop_block(sb); parser->pop_block(sb);
@ -612,9 +620,9 @@ end_execution_reason_t parse_execution_context_t::run_while_statement(
// Check the condition. // Check the condition.
end_execution_reason_t cond_ret = end_execution_reason_t cond_ret =
this->run_job_conjunction(header.condition, associated_block); this->run_job_conjunction(header.condition(), associated_block);
if (cond_ret == end_execution_reason_t::ok) { if (cond_ret == end_execution_reason_t::ok) {
cond_ret = run_job_list(header.andor_tail, associated_block); cond_ret = run_job_list(header.andor_tail(), associated_block);
} }
// If the loop condition failed to execute, then exit the loop without modifying the exit // If the loop condition failed to execute, then exit the loop without modifying the exit
@ -694,7 +702,7 @@ end_execution_reason_t parse_execution_context_t::report_errors(
// Get a backtrace. // Get a backtrace.
wcstring backtrace_and_desc; wcstring backtrace_and_desc;
parser->get_backtrace(pstree->src, error_list, backtrace_and_desc); parser->get_backtrace(pstree->src(), error_list, backtrace_and_desc);
// Print it. // Print it.
if (!should_suppress_stderr_for_tests()) { if (!should_suppress_stderr_for_tests()) {
@ -711,7 +719,10 @@ end_execution_reason_t parse_execution_context_t::report_errors(
parse_execution_context_t::ast_args_list_t parse_execution_context_t::get_argument_nodes( parse_execution_context_t::ast_args_list_t parse_execution_context_t::get_argument_nodes(
const ast::argument_list_t &args) { const ast::argument_list_t &args) {
ast_args_list_t result; ast_args_list_t result;
for (const ast::argument_t &arg : args) result.push_back(&arg); for (size_t i = 0; i < args.count(); i++) {
const ast::argument_t &arg = *args.at(i);
result.push_back(&arg);
}
return result; return result;
} }
@ -719,7 +730,8 @@ parse_execution_context_t::ast_args_list_t parse_execution_context_t::get_argume
parse_execution_context_t::ast_args_list_t parse_execution_context_t::get_argument_nodes( parse_execution_context_t::ast_args_list_t parse_execution_context_t::get_argument_nodes(
const ast::argument_or_redirection_list_t &args) { const ast::argument_or_redirection_list_t &args) {
ast_args_list_t result; ast_args_list_t result;
for (const ast::argument_or_redirection_t &v : args) { for (size_t i = 0; i < args.count(); i++) {
const ast::argument_or_redirection_t &v = *args.at(i);
if (v.is_argument()) result.push_back(&v.argument()); if (v.is_argument()) result.push_back(&v.argument());
} }
return result; return result;
@ -739,21 +751,21 @@ end_execution_reason_t parse_execution_context_t::handle_command_not_found(
// ENAMETOOLONG // ENAMETOOLONG
if (err_code == ENOTDIR) { if (err_code == ENOTDIR) {
// If the original command did not include a "/", assume we found it via $PATH. // If the original command did not include a "/", assume we found it via $PATH.
auto src = get_source(statement.command); auto src = get_source(*statement.command().ptr());
if (src.find(L"/") == wcstring::npos) { if (src.find(L"/") == wcstring::npos) {
return this->report_error(STATUS_NOT_EXECUTABLE, statement.command, return this->report_error(STATUS_NOT_EXECUTABLE, *statement.command().ptr(),
_(L"Unknown command. A component of '%ls' is not a " _(L"Unknown command. A component of '%ls' is not a "
L"directory. Check your $PATH."), L"directory. Check your $PATH."),
cmd); cmd);
} else { } else {
return this->report_error( return this->report_error(
STATUS_NOT_EXECUTABLE, statement.command, STATUS_NOT_EXECUTABLE, *statement.command().ptr(),
_(L"Unknown command. A component of '%ls' is not a directory."), cmd); _(L"Unknown command. A component of '%ls' is not a directory."), cmd);
} }
} }
return this->report_error( return this->report_error(
STATUS_NOT_EXECUTABLE, statement.command, STATUS_NOT_EXECUTABLE, *statement.command().ptr(),
_(L"Unknown command. '%ls' exists but is not an executable file."), cmd); _(L"Unknown command. '%ls' exists but is not an executable file."), cmd);
} }
@ -761,7 +773,7 @@ end_execution_reason_t parse_execution_context_t::handle_command_not_found(
// error messages. // error messages.
wcstring_list_t event_args; wcstring_list_t event_args;
{ {
ast_args_list_t args = get_argument_nodes(statement.args_or_redirs); ast_args_list_t args = get_argument_nodes(statement.args_or_redirs());
end_execution_reason_t arg_result = end_execution_reason_t arg_result =
this->expand_arguments_from_nodes(args, &event_args, failglob); this->expand_arguments_from_nodes(args, &event_args, failglob);
@ -809,7 +821,7 @@ end_execution_reason_t parse_execution_context_t::handle_command_not_found(
// Here we want to report an error (so it shows a backtrace). // Here we want to report an error (so it shows a backtrace).
// If the handler printed text, that's already shown, so error will be empty. // If the handler printed text, that's already shown, so error will be empty.
return this->report_error(STATUS_CMD_UNKNOWN, statement.command, error.c_str()); return this->report_error(STATUS_CMD_UNKNOWN, *statement.command().ptr(), error.c_str());
} }
end_execution_reason_t parse_execution_context_t::expand_command( end_execution_reason_t parse_execution_context_t::expand_command(
@ -821,8 +833,8 @@ end_execution_reason_t parse_execution_context_t::expand_command(
auto errors = new_parse_error_list(); auto errors = new_parse_error_list();
// Get the unexpanded command string. We expect to always get it here. // Get the unexpanded command string. We expect to always get it here.
wcstring unexp_cmd = get_source(statement.command); wcstring unexp_cmd = get_source(*statement.command().ptr());
size_t pos_of_command_token = statement.command.range.start; size_t pos_of_command_token = statement.command().range().start;
// Expand the string to produce completions, and report errors. // Expand the string to produce completions, and report errors.
expand_result_t expand_err = expand_result_t expand_err =
@ -835,15 +847,15 @@ end_execution_reason_t parse_execution_context_t::expand_command(
errors->offset_source_start(pos_of_command_token); errors->offset_source_start(pos_of_command_token);
return report_errors(STATUS_ILLEGAL_CMD, *errors); return report_errors(STATUS_ILLEGAL_CMD, *errors);
} else if (expand_err == expand_result_t::wildcard_no_match) { } else if (expand_err == expand_result_t::wildcard_no_match) {
return report_error(STATUS_UNMATCHED_WILDCARD, statement, WILDCARD_ERR_MSG, return report_error(STATUS_UNMATCHED_WILDCARD, *statement.ptr(), WILDCARD_ERR_MSG,
get_source(statement).c_str()); get_source(*statement.ptr()).c_str());
} }
assert(expand_err == expand_result_t::ok); assert(expand_err == expand_result_t::ok);
// Complain if the resulting expansion was empty, or expanded to an empty string. // Complain if the resulting expansion was empty, or expanded to an empty string.
// For no-exec it's okay, as we can't really perform the expansion. // For no-exec it's okay, as we can't really perform the expansion.
if (out_cmd->empty() && !no_exec()) { if (out_cmd->empty() && !no_exec()) {
return this->report_error(STATUS_ILLEGAL_CMD, statement.command, return this->report_error(STATUS_ILLEGAL_CMD, *statement.command().ptr(),
_(L"The expanded command was empty.")); _(L"The expanded command was empty."));
} }
return end_execution_reason_t::ok; return end_execution_reason_t::ok;
@ -880,7 +892,7 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process(
// If the specified command does not exist, and is undecorated, try using an implicit cd. // If the specified command does not exist, and is undecorated, try using an implicit cd.
if (!has_command && statement.decoration() == statement_decoration_t::none) { if (!has_command && statement.decoration() == statement_decoration_t::none) {
// Implicit cd requires an empty argument and redirection list. // Implicit cd requires an empty argument and redirection list.
if (statement.args_or_redirs.empty()) { if (statement.args_or_redirs().empty()) {
// Ok, no arguments or redirections; check to see if the command is a directory. // Ok, no arguments or redirections; check to see if the command is a directory.
use_implicit_cd = use_implicit_cd =
path_as_implicit_cd(cmd, parser->vars().get_pwd_slash(), parser->vars()) path_as_implicit_cd(cmd, parser->vars().get_pwd_slash(), parser->vars())
@ -917,7 +929,7 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process(
cmd_args.push_back(cmd); cmd_args.push_back(cmd);
vec_append(cmd_args, std::move(args_from_cmd_expansion)); vec_append(cmd_args, std::move(args_from_cmd_expansion));
ast_args_list_t arg_nodes = get_argument_nodes(statement.args_or_redirs); ast_args_list_t arg_nodes = get_argument_nodes(statement.args_or_redirs());
end_execution_reason_t arg_result = end_execution_reason_t arg_result =
this->expand_arguments_from_nodes(arg_nodes, &cmd_args, glob_behavior); this->expand_arguments_from_nodes(arg_nodes, &cmd_args, glob_behavior);
if (arg_result != end_execution_reason_t::ok) { if (arg_result != end_execution_reason_t::ok) {
@ -925,7 +937,7 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process(
} }
// The set of IO redirections that we construct for the process. // The set of IO redirections that we construct for the process.
auto reason = this->determine_redirections(statement.args_or_redirs, &*redirections); auto reason = this->determine_redirections(statement.args_or_redirs(), &*redirections);
if (reason != end_execution_reason_t::ok) { if (reason != end_execution_reason_t::ok) {
return reason; return reason;
} }
@ -950,14 +962,14 @@ end_execution_reason_t parse_execution_context_t::expand_arguments_from_nodes(
completion_list_t arg_expanded; completion_list_t arg_expanded;
for (const ast::argument_t *arg_node : argument_nodes) { for (const ast::argument_t *arg_node : argument_nodes) {
// Expect all arguments to have source. // Expect all arguments to have source.
assert(arg_node->has_source() && "Argument should have source"); assert(arg_node->ptr()->has_source() && "Argument should have source");
// Expand this string. // Expand this string.
auto errors = new_parse_error_list(); auto errors = new_parse_error_list();
arg_expanded.clear(); arg_expanded.clear();
auto expand_ret = auto expand_ret = expand_string(get_source(*arg_node->ptr()), &arg_expanded,
expand_string(get_source(*arg_node), &arg_expanded, expand_flags_t{}, ctx, &*errors); expand_flags_t{}, ctx, &*errors);
errors->offset_source_start(arg_node->range.start); errors->offset_source_start(arg_node->range().start);
switch (expand_ret.result) { switch (expand_ret.result) {
case expand_result_t::error: { case expand_result_t::error: {
return this->report_errors(expand_ret.status, *errors); return this->report_errors(expand_ret.status, *errors);
@ -971,8 +983,8 @@ end_execution_reason_t parse_execution_context_t::expand_arguments_from_nodes(
// For no_exec, ignore the error - this might work at runtime. // For no_exec, ignore the error - this might work at runtime.
if (no_exec()) return end_execution_reason_t::ok; if (no_exec()) return end_execution_reason_t::ok;
// Report the unmatched wildcard error and stop processing. // Report the unmatched wildcard error and stop processing.
return report_error(STATUS_UNMATCHED_WILDCARD, *arg_node, WILDCARD_ERR_MSG, return report_error(STATUS_UNMATCHED_WILDCARD, *arg_node->ptr(),
get_source(*arg_node).c_str()); WILDCARD_ERR_MSG, get_source(*arg_node->ptr()).c_str());
} }
break; break;
} }
@ -1003,24 +1015,26 @@ end_execution_reason_t parse_execution_context_t::expand_arguments_from_nodes(
end_execution_reason_t parse_execution_context_t::determine_redirections( end_execution_reason_t parse_execution_context_t::determine_redirections(
const ast::argument_or_redirection_list_t &list, redirection_spec_list_t *out_redirections) { const ast::argument_or_redirection_list_t &list, redirection_spec_list_t *out_redirections) {
// Get all redirection nodes underneath the statement. // Get all redirection nodes underneath the statement.
for (const ast::argument_or_redirection_t &arg_or_redir : list) { for (size_t i = 0; i < list.count(); i++) {
const ast::argument_or_redirection_t &arg_or_redir = *list.at(i);
if (!arg_or_redir.is_redirection()) continue; if (!arg_or_redir.is_redirection()) continue;
const ast::redirection_t &redir_node = arg_or_redir.redirection(); const ast::redirection_t &redir_node = arg_or_redir.redirection();
auto oper = pipe_or_redir_from_string(get_source(redir_node.oper).c_str()); auto oper = pipe_or_redir_from_string(get_source(*redir_node.oper().ptr()).c_str());
if (!oper || !oper->is_valid()) { if (!oper || !oper->is_valid()) {
// TODO: figure out if this can ever happen. If so, improve this error message. // TODO: figure out if this can ever happen. If so, improve this error message.
return report_error(STATUS_INVALID_ARGS, redir_node, _(L"Invalid redirection: %ls"), return report_error(STATUS_INVALID_ARGS, *redir_node.ptr(),
get_source(redir_node).c_str()); _(L"Invalid redirection: %ls"),
get_source(*redir_node.ptr()).c_str());
} }
// PCA: I can't justify this skip_variables flag. It was like this when I got here. // PCA: I can't justify this skip_variables flag. It was like this when I got here.
wcstring target = get_source(redir_node.target); wcstring target = get_source(*redir_node.target().ptr());
bool target_expanded = bool target_expanded =
expand_one(target, no_exec() ? expand_flag::skip_variables : expand_flags_t{}, ctx); expand_one(target, no_exec() ? expand_flag::skip_variables : expand_flags_t{}, ctx);
if (!target_expanded || target.empty()) { if (!target_expanded || target.empty()) {
// TODO: Improve this error message. // TODO: Improve this error message.
return report_error(STATUS_INVALID_ARGS, redir_node, return report_error(STATUS_INVALID_ARGS, *redir_node.ptr(),
_(L"Invalid redirection target: %ls"), target.c_str()); _(L"Invalid redirection target: %ls"), target.c_str());
} }
@ -1033,7 +1047,8 @@ end_execution_reason_t parse_execution_context_t::determine_redirections(
!spec->get_target_as_fd()) { !spec->get_target_as_fd()) {
const wchar_t *fmt = const wchar_t *fmt =
_(L"Requested redirection to '%ls', which is not a valid file descriptor"); _(L"Requested redirection to '%ls', which is not a valid file descriptor");
return report_error(STATUS_INVALID_ARGS, redir_node, fmt, spec->target()->c_str()); return report_error(STATUS_INVALID_ARGS, *redir_node.ptr(), fmt,
spec->target()->c_str());
} }
out_redirections->push_back(std::move(spec)); out_redirections->push_back(std::move(spec));
@ -1050,7 +1065,8 @@ end_execution_reason_t parse_execution_context_t::populate_not_process(
job_t *job, process_t *proc, const ast::not_statement_t &not_statement) { job_t *job, process_t *proc, const ast::not_statement_t &not_statement) {
auto &flags = job->mut_flags(); auto &flags = job->mut_flags();
flags.negate = !flags.negate; flags.negate = !flags.negate;
return this->populate_job_process(job, proc, not_statement.contents, not_statement.variables); return this->populate_job_process(job, proc, not_statement.contents(),
not_statement.variables());
} }
template <typename Type> template <typename Type>
@ -1059,9 +1075,9 @@ end_execution_reason_t parse_execution_context_t::populate_block_process(
using namespace ast; using namespace ast;
// We handle block statements by creating process_type_t::block_node, that will bounce back to // We handle block statements by creating process_type_t::block_node, that will bounce back to
// us when it's time to execute them. // us when it's time to execute them.
static_assert(Type::AstType == type_t::block_statement || static_assert(std::is_same<Type, block_statement_t>::value ||
Type::AstType == type_t::if_statement || std::is_same<Type, if_statement_t>::value ||
Type::AstType == type_t::switch_statement, std::is_same<Type, switch_statement_t>::value,
"Invalid block process"); "Invalid block process");
// Get the argument or redirections list. // Get the argument or redirections list.
@ -1069,16 +1085,16 @@ end_execution_reason_t parse_execution_context_t::populate_block_process(
const argument_or_redirection_list_t *args_or_redirs = nullptr; const argument_or_redirection_list_t *args_or_redirs = nullptr;
// Upcast to permit dropping the 'template' keyword. // Upcast to permit dropping the 'template' keyword.
const node_t &ss = specific_statement; const auto ss = specific_statement.ptr();
switch (Type::AstType) { switch (ss->typ()) {
case type_t::block_statement: case type_t::block_statement:
args_or_redirs = &ss.as<block_statement_t>()->args_or_redirs; args_or_redirs = &ss->as_block_statement().args_or_redirs();
break; break;
case type_t::if_statement: case type_t::if_statement:
args_or_redirs = &ss.as<if_statement_t>()->args_or_redirs; args_or_redirs = &ss->as_if_statement().args_or_redirs();
break; break;
case type_t::switch_statement: case type_t::switch_statement:
args_or_redirs = &ss.as<switch_statement_t>()->args_or_redirs; args_or_redirs = &ss->as_switch_statement().args_or_redirs();
break; break;
default: default:
DIE("Unexpected block node type"); DIE("Unexpected block node type");
@ -1089,7 +1105,7 @@ end_execution_reason_t parse_execution_context_t::populate_block_process(
auto reason = this->determine_redirections(*args_or_redirs, &*redirections); auto reason = this->determine_redirections(*args_or_redirs, &*redirections);
if (reason == end_execution_reason_t::ok) { if (reason == end_execution_reason_t::ok) {
proc->type = process_type_t::block_node; proc->type = process_type_t::block_node;
proc->block_node_source = pstree; proc->block_node_source = pstree->clone();
proc->internal_block_node = &statement; proc->internal_block_node = &statement;
proc->set_redirection_specs(std::move(redirections)); proc->set_redirection_specs(std::move(redirections));
} }
@ -1101,8 +1117,9 @@ end_execution_reason_t parse_execution_context_t::apply_variable_assignments(
const block_t **block) { const block_t **block) {
if (variable_assignment_list.empty()) return end_execution_reason_t::ok; if (variable_assignment_list.empty()) return end_execution_reason_t::ok;
*block = parser->push_block(block_t::variable_assignment_block()); *block = parser->push_block(block_t::variable_assignment_block());
for (const ast::variable_assignment_t &variable_assignment : variable_assignment_list) { for (size_t i = 0; i < variable_assignment_list.count(); i++) {
const wcstring &source = get_source(variable_assignment); const ast::variable_assignment_t &variable_assignment = *variable_assignment_list.at(i);
const wcstring &source = get_source(*variable_assignment.ptr());
auto equals_pos = variable_assignment_equals_pos(source); auto equals_pos = variable_assignment_equals_pos(source);
assert(equals_pos); assert(equals_pos);
const wcstring variable_name = source.substr(0, *equals_pos); const wcstring variable_name = source.substr(0, *equals_pos);
@ -1112,7 +1129,7 @@ end_execution_reason_t parse_execution_context_t::apply_variable_assignments(
// TODO this is mostly copied from expand_arguments_from_nodes, maybe extract to function // TODO this is mostly copied from expand_arguments_from_nodes, maybe extract to function
auto expand_ret = auto expand_ret =
expand_string(expression, &expression_expanded, expand_flags_t{}, ctx, &*errors); expand_string(expression, &expression_expanded, expand_flags_t{}, ctx, &*errors);
errors->offset_source_start(variable_assignment.range.start + *equals_pos + 1); errors->offset_source_start(variable_assignment.range().start + *equals_pos + 1);
switch (expand_ret.result) { switch (expand_ret.result) {
case expand_result_t::error: case expand_result_t::error:
return this->report_errors(expand_ret.status, *errors); return this->report_errors(expand_ret.status, *errors);
@ -1143,7 +1160,7 @@ end_execution_reason_t parse_execution_context_t::populate_job_process(
const ast::variable_assignment_list_t &variable_assignments) { const ast::variable_assignment_list_t &variable_assignments) {
using namespace ast; using namespace ast;
// Get the "specific statement" which is boolean / block / if / switch / decorated. // Get the "specific statement" which is boolean / block / if / switch / decorated.
const node_t &specific_statement = *statement.contents.contents; const auto specific_statement = statement.contents().ptr();
const block_t *block = nullptr; const block_t *block = nullptr;
end_execution_reason_t result = end_execution_reason_t result =
@ -1153,32 +1170,31 @@ end_execution_reason_t parse_execution_context_t::populate_job_process(
}); });
if (result != end_execution_reason_t::ok) return result; if (result != end_execution_reason_t::ok) return result;
switch (specific_statement.type) { switch (specific_statement->typ()) {
case type_t::not_statement: { case type_t::not_statement: {
result = result = this->populate_not_process(job, proc, specific_statement->as_not_statement());
this->populate_not_process(job, proc, *specific_statement.as<not_statement_t>());
break; break;
} }
case type_t::block_statement: case type_t::block_statement:
result = this->populate_block_process(proc, statement, result = this->populate_block_process(proc, statement,
*specific_statement.as<block_statement_t>()); specific_statement->as_block_statement());
break; break;
case type_t::if_statement: case type_t::if_statement:
result = this->populate_block_process(proc, statement, result = this->populate_block_process(proc, statement,
*specific_statement.as<if_statement_t>()); specific_statement->as_if_statement());
break; break;
case type_t::switch_statement: case type_t::switch_statement:
result = this->populate_block_process(proc, statement, result = this->populate_block_process(proc, statement,
*specific_statement.as<switch_statement_t>()); specific_statement->as_switch_statement());
break; break;
case type_t::decorated_statement: { case type_t::decorated_statement: {
result = result =
this->populate_plain_process(proc, *specific_statement.as<decorated_statement_t>()); this->populate_plain_process(proc, specific_statement->as_decorated_statement());
break; break;
} }
default: { default: {
FLOGF(error, L"'%ls' not handled by new parser yet.", FLOGF(error, L"'%ls' not handled by new parser yet.",
specific_statement.describe().c_str()); specific_statement->describe()->c_str());
PARSER_DIE(); PARSER_DIE();
break; break;
} }
@ -1196,19 +1212,20 @@ end_execution_reason_t parse_execution_context_t::populate_job_from_job_node(
process_list_t processes; process_list_t processes;
processes.emplace_back(new process_t()); processes.emplace_back(new process_t());
end_execution_reason_t result = this->populate_job_process( end_execution_reason_t result = this->populate_job_process(
j, processes.back().get(), job_node.statement, job_node.variables); j, processes.back().get(), job_node.statement(), job_node.variables());
// Construct process_ts for job continuations (pipelines). // Construct process_ts for job continuations (pipelines).
for (const ast::job_continuation_t &jc : job_node.continuation) { for (size_t i = 0; i < job_node.continuation().count(); i++) {
const ast::job_continuation_t &jc = *job_node.continuation().at(i);
if (result != end_execution_reason_t::ok) { if (result != end_execution_reason_t::ok) {
break; break;
} }
// Handle the pipe, whose fd may not be the obvious stdout. // Handle the pipe, whose fd may not be the obvious stdout.
auto parsed_pipe = pipe_or_redir_from_string(get_source(jc.pipe).c_str()); auto parsed_pipe = pipe_or_redir_from_string(get_source(*jc.pipe().ptr()).c_str());
assert(parsed_pipe && parsed_pipe->is_pipe && "Failed to parse valid pipe"); assert(parsed_pipe && parsed_pipe->is_pipe && "Failed to parse valid pipe");
if (!parsed_pipe->is_valid()) { if (!parsed_pipe->is_valid()) {
result = report_error(STATUS_INVALID_ARGS, jc.pipe, ILLEGAL_FD_ERR_MSG, result = report_error(STATUS_INVALID_ARGS, *jc.pipe().ptr(), ILLEGAL_FD_ERR_MSG,
get_source(jc.pipe).c_str()); get_source(*jc.pipe().ptr()).c_str());
break; break;
} }
processes.back()->pipe_write_fd = parsed_pipe->fd; processes.back()->pipe_write_fd = parsed_pipe->fd;
@ -1222,7 +1239,8 @@ end_execution_reason_t parse_execution_context_t::populate_job_from_job_node(
// Store the new process (and maybe with an error). // Store the new process (and maybe with an error).
processes.emplace_back(new process_t()); processes.emplace_back(new process_t());
result = this->populate_job_process(j, processes.back().get(), jc.statement, jc.variables); result =
this->populate_job_process(j, processes.back().get(), jc.statement(), jc.variables());
} }
// Inform our processes of who is first and last // Inform our processes of who is first and last
@ -1254,22 +1272,27 @@ static bool remove_job(parser_t &parser, const job_t *job) {
/// `sleep 1 | not time true` will time the whole job! /// `sleep 1 | not time true` will time the whole job!
static bool job_node_wants_timing(const ast::job_pipeline_t &job_node) { static bool job_node_wants_timing(const ast::job_pipeline_t &job_node) {
// Does our job have the job-level time prefix? // Does our job have the job-level time prefix?
if (job_node.time) return true; if (job_node.has_time()) return true;
// Helper to return true if a node is 'not time ...' or 'not not time...' or... // Helper to return true if a node is 'not time ...' or 'not not time...' or...
auto is_timed_not_statement = [](const ast::statement_t &stat) { auto is_timed_not_statement = [](const ast::statement_t &stat) {
const auto *ns = stat.contents->try_as<ast::not_statement_t>(); const auto *ns = stat.contents().ptr()->try_as_not_statement()
? &stat.contents().ptr()->as_not_statement()
: nullptr;
while (ns) { while (ns) {
if (ns->time) return true; if (ns->has_time()) return true;
ns = ns->contents.try_as<ast::not_statement_t>(); ns = ns->contents().ptr()->try_as_not_statement()
? &ns->contents().ptr()->as_not_statement()
: nullptr;
} }
return false; return false;
}; };
// Do we have a 'not time ...' anywhere in our pipeline? // Do we have a 'not time ...' anywhere in our pipeline?
if (is_timed_not_statement(job_node.statement)) return true; if (is_timed_not_statement(job_node.statement())) return true;
for (const ast::job_continuation_t &jc : job_node.continuation) { for (size_t i = 0; i < job_node.continuation().count(); i++) {
if (is_timed_not_statement(jc.statement)) return true; const ast::job_continuation_t &jc = *job_node.continuation().at(i);
if (is_timed_not_statement(jc.statement())) return true;
} }
return false; return false;
} }
@ -1307,33 +1330,32 @@ end_execution_reason_t parse_execution_context_t::run_1_job(const ast::job_pipel
// However, if there are no redirections, then we can just jump into the block directly, which // However, if there are no redirections, then we can just jump into the block directly, which
// is significantly faster. // is significantly faster.
if (job_is_simple_block(job_node)) { if (job_is_simple_block(job_node)) {
bool do_time = job_node.time.has_value(); bool do_time = job_node.has_time();
// If no-exec has been given, there is nothing to time. // If no-exec has been given, there is nothing to time.
auto timer = push_timer(do_time && !no_exec()); auto timer = push_timer(do_time && !no_exec());
const block_t *block = nullptr; const block_t *block = nullptr;
end_execution_reason_t result = end_execution_reason_t result =
this->apply_variable_assignments(nullptr, job_node.variables, &block); this->apply_variable_assignments(nullptr, job_node.variables(), &block);
cleanup_t scope([&]() { cleanup_t scope([&]() {
if (block) parser->pop_block(block); if (block) parser->pop_block(block);
}); });
const ast::node_t *specific_statement = job_node.statement.contents.get(); const auto specific_statement = job_node.statement().contents().ptr();
assert(specific_statement_type_is_redirectable_block(*specific_statement)); assert(specific_statement_type_is_redirectable_block(*specific_statement));
if (result == end_execution_reason_t::ok) { if (result == end_execution_reason_t::ok) {
switch (specific_statement->type) { switch (specific_statement->typ()) {
case ast::type_t::block_statement: { case ast::type_t::block_statement: {
result = this->run_block_statement( result = this->run_block_statement(specific_statement->as_block_statement(),
*specific_statement->as<ast::block_statement_t>(), associated_block); associated_block);
break; break;
} }
case ast::type_t::if_statement: { case ast::type_t::if_statement: {
result = this->run_if_statement(*specific_statement->as<ast::if_statement_t>(), result = this->run_if_statement(specific_statement->as_if_statement(),
associated_block); associated_block);
break; break;
} }
case ast::type_t::switch_statement: { case ast::type_t::switch_statement: {
result = this->run_switch_statement( result = this->run_switch_statement(specific_statement->as_switch_statement());
*specific_statement->as<ast::switch_statement_t>());
break; break;
} }
default: { default: {
@ -1359,7 +1381,7 @@ end_execution_reason_t parse_execution_context_t::run_1_job(const ast::job_pipel
const auto &ld = parser->libdata(); const auto &ld = parser->libdata();
job_t::properties_t props{}; job_t::properties_t props{};
props.initial_background = job_node.bg.has_value(); props.initial_background = job_node.has_bg();
props.skip_notification = props.skip_notification =
ld.is_subshell || parser->is_block() || ld.is_event || !parser->is_interactive(); ld.is_subshell || parser->is_block() || ld.is_event || !parser->is_interactive();
props.from_event_handler = ld.is_event; props.from_event_handler = ld.is_event;
@ -1367,10 +1389,10 @@ end_execution_reason_t parse_execution_context_t::run_1_job(const ast::job_pipel
// It's an error to have 'time' in a background job. // It's an error to have 'time' in a background job.
if (props.wants_timing && props.initial_background) { if (props.wants_timing && props.initial_background) {
return this->report_error(STATUS_INVALID_ARGS, job_node, ERROR_TIME_BACKGROUND); return this->report_error(STATUS_INVALID_ARGS, *job_node.ptr(), ERROR_TIME_BACKGROUND);
} }
shared_ptr<job_t> job = std::make_shared<job_t>(props, get_source(job_node)); shared_ptr<job_t> job = std::make_shared<job_t>(props, get_source(*job_node.ptr()));
// We are about to populate a job. One possible argument to the job is a command substitution // We are about to populate a job. One possible argument to the job is a command substitution
// which may be interested in the job that's populating it, via '--on-job-exit caller'. Record // which may be interested in the job that's populating it, via '--on-job-exit caller'. Record
@ -1426,9 +1448,10 @@ end_execution_reason_t parse_execution_context_t::run_job_conjunction(
if (auto reason = check_end_execution()) { if (auto reason = check_end_execution()) {
return *reason; return *reason;
} }
end_execution_reason_t result = run_1_job(job_expr.job, associated_block); end_execution_reason_t result = run_1_job(job_expr.job(), associated_block);
for (const ast::job_conjunction_continuation_t &jc : job_expr.continuations) { for (size_t i = 0; i < job_expr.continuations().count(); i++) {
const ast::job_conjunction_continuation_t &jc = *job_expr.continuations().at(i);
if (result != end_execution_reason_t::ok) { if (result != end_execution_reason_t::ok) {
return result; return result;
} }
@ -1437,7 +1460,7 @@ end_execution_reason_t parse_execution_context_t::run_job_conjunction(
} }
// Check the conjunction type. // Check the conjunction type.
bool skip = false; bool skip = false;
switch (jc.conjunction.type) { switch (jc.conjunction().token_type()) {
case parse_token_type_t::andand: case parse_token_type_t::andand:
// AND. Skip if the last job failed. // AND. Skip if the last job failed.
skip = parser->get_last_status() != 0; skip = parser->get_last_status() != 0;
@ -1450,7 +1473,7 @@ end_execution_reason_t parse_execution_context_t::run_job_conjunction(
DIE("Unexpected job conjunction type"); DIE("Unexpected job conjunction type");
} }
if (!skip) { if (!skip) {
result = run_1_job(jc.job, associated_block); result = run_1_job(jc.job(), associated_block);
} }
} }
return result; return result;
@ -1465,8 +1488,8 @@ end_execution_reason_t parse_execution_context_t::test_and_run_1_job_conjunction
} }
// Maybe skip the job if it has a leading and/or. // Maybe skip the job if it has a leading and/or.
bool skip = false; bool skip = false;
if (jc.decorator.has_value()) { if (jc.has_decorator()) {
switch (jc.decorator->kw) { switch (jc.decorator().kw()) {
case parse_keyword_t::kw_and: case parse_keyword_t::kw_and:
// AND. Skip if the last job failed. // AND. Skip if the last job failed.
skip = parser->get_last_status() != 0; skip = parser->get_last_status() != 0;
@ -1490,8 +1513,9 @@ end_execution_reason_t parse_execution_context_t::test_and_run_1_job_conjunction
end_execution_reason_t parse_execution_context_t::run_job_list(const ast::job_list_t &job_list_node, end_execution_reason_t parse_execution_context_t::run_job_list(const ast::job_list_t &job_list_node,
const block_t *associated_block) { const block_t *associated_block) {
auto result = end_execution_reason_t::ok; auto result = end_execution_reason_t::ok;
for (const ast::job_conjunction_t &jc : job_list_node) { for (size_t i = 0; i < job_list_node.count(); i++) {
result = test_and_run_1_job_conjunction(jc, associated_block); const ast::job_conjunction_t *jc = job_list_node.at(i);
result = test_and_run_1_job_conjunction(*jc, associated_block);
} }
// Returns the result of the last job executed or skipped. // Returns the result of the last job executed or skipped.
return result; return result;
@ -1500,8 +1524,9 @@ end_execution_reason_t parse_execution_context_t::run_job_list(const ast::job_li
end_execution_reason_t parse_execution_context_t::run_job_list( end_execution_reason_t parse_execution_context_t::run_job_list(
const ast::andor_job_list_t &job_list_node, const block_t *associated_block) { const ast::andor_job_list_t &job_list_node, const block_t *associated_block) {
auto result = end_execution_reason_t::ok; auto result = end_execution_reason_t::ok;
for (const ast::andor_job_t &aoj : job_list_node) { for (size_t i = 0; i < job_list_node.count(); i++) {
result = test_and_run_1_job_conjunction(aoj.job, associated_block); const ast::andor_job_t *aoj = job_list_node.at(i);
result = test_and_run_1_job_conjunction(aoj->job(), associated_block);
} }
// Returns the result of the last job executed or skipped. // Returns the result of the last job executed or skipped.
return result; return result;
@ -1511,15 +1536,15 @@ end_execution_reason_t parse_execution_context_t::eval_node(const ast::statement
const block_t *associated_block) { const block_t *associated_block) {
// Note we only expect block-style statements here. No not statements. // Note we only expect block-style statements here. No not statements.
enum end_execution_reason_t status = end_execution_reason_t::ok; enum end_execution_reason_t status = end_execution_reason_t::ok;
const ast::node_t *contents = statement.contents.get(); const auto contents = statement.contents().ptr();
if (const auto *block = contents->try_as<ast::block_statement_t>()) { if (const auto *block = contents->try_as_block_statement()) {
status = this->run_block_statement(*block, associated_block); status = this->run_block_statement(*block, associated_block);
} else if (const auto *ifstat = contents->try_as<ast::if_statement_t>()) { } else if (const auto *ifstat = contents->try_as_if_statement()) {
status = this->run_if_statement(*ifstat, associated_block); status = this->run_if_statement(*ifstat, associated_block);
} else if (const auto *switchstat = contents->try_as<ast::switch_statement_t>()) { } else if (const auto *switchstat = contents->try_as_switch_statement()) {
status = this->run_switch_statement(*switchstat); status = this->run_switch_statement(*switchstat);
} else { } else {
FLOGF(error, L"Unexpected node %ls found in %s", statement.describe().c_str(), FLOGF(error, L"Unexpected node %ls found in %s", statement.describe()->c_str(),
__FUNCTION__); __FUNCTION__);
abort(); abort();
} }
@ -1535,7 +1560,7 @@ end_execution_reason_t parse_execution_context_t::eval_node(const ast::job_list_
if (const auto *infinite_recursive_node = if (const auto *infinite_recursive_node =
this->infinite_recursive_statement_in_job_list(job_list, &func_name)) { this->infinite_recursive_statement_in_job_list(job_list, &func_name)) {
// We have an infinite recursion. // We have an infinite recursion.
return this->report_error(STATUS_CMD_ERROR, *infinite_recursive_node, return this->report_error(STATUS_CMD_ERROR, *infinite_recursive_node->ptr(),
INFINITE_FUNC_RECURSION_ERR_MSG, func_name.c_str()); INFINITE_FUNC_RECURSION_ERR_MSG, func_name.c_str());
} }
@ -1544,7 +1569,8 @@ end_execution_reason_t parse_execution_context_t::eval_node(const ast::job_list_
if ((associated_block->type() == block_type_t::top && if ((associated_block->type() == block_type_t::top &&
parser->function_stack_is_overflowing()) || parser->function_stack_is_overflowing()) ||
(associated_block->type() == block_type_t::subst && parser->is_eval_depth_exceeded())) { (associated_block->type() == block_type_t::subst && parser->is_eval_depth_exceeded())) {
return this->report_error(STATUS_CMD_ERROR, job_list, CALL_STACK_LIMIT_EXCEEDED_ERR_MSG); return this->report_error(STATUS_CMD_ERROR, *job_list.ptr(),
CALL_STACK_LIMIT_EXCEEDED_ERR_MSG);
} }
return this->run_job_list(job_list, associated_block); return this->run_job_list(job_list, associated_block);
} }
@ -1594,17 +1620,16 @@ int parse_execution_context_t::line_offset_of_node(const ast::job_pipeline_t *no
} }
// If for some reason we're executing a node without source, return -1. // If for some reason we're executing a node without source, return -1.
auto range = node->try_source_range(); if (!node->try_source_range()) {
if (!range) {
return -1; return -1;
} }
return this->line_offset_of_character_at_offset(range->start); return this->line_offset_of_character_at_offset(node->source_range().start);
} }
int parse_execution_context_t::line_offset_of_character_at_offset(size_t offset) { int parse_execution_context_t::line_offset_of_character_at_offset(size_t offset) {
// Count the number of newlines, leveraging our cache. // Count the number of newlines, leveraging our cache.
assert(offset <= pstree->src.size()); assert(offset <= pstree->src().size());
// Easy hack to handle 0. // Easy hack to handle 0.
if (offset == 0) { if (offset == 0) {
@ -1613,7 +1638,7 @@ int parse_execution_context_t::line_offset_of_character_at_offset(size_t offset)
// We want to return (one plus) the number of newlines at offsets less than the given offset. // We want to return (one plus) the number of newlines at offsets less than the given offset.
// cached_lineno_count is the number of newlines at indexes less than cached_lineno_offset. // cached_lineno_count is the number of newlines at indexes less than cached_lineno_offset.
const wchar_t *str = pstree->src.c_str(); const wcstring &str = pstree->src();
if (offset > cached_lineno_offset) { if (offset > cached_lineno_offset) {
size_t i; size_t i;
for (i = cached_lineno_offset; i < offset && str[i] != L'\0'; i++) { for (i = cached_lineno_offset; i < offset && str[i] != L'\0'; i++) {
@ -1649,8 +1674,8 @@ int parse_execution_context_t::get_current_line_number() {
int parse_execution_context_t::get_current_source_offset() const { int parse_execution_context_t::get_current_source_offset() const {
int result = -1; int result = -1;
if (executing_job_node) { if (executing_job_node) {
if (auto range = executing_job_node->try_source_range()) { if (executing_job_node->try_source_range()) {
result = static_cast<int>(range->start); result = static_cast<int>(executing_job_node->source_range().start);
} }
} }
return result; return result;

View file

@ -38,7 +38,7 @@ enum class end_execution_reason_t {
class parse_execution_context_t : noncopyable_t { class parse_execution_context_t : noncopyable_t {
private: private:
parsed_source_ref_t pstree; rust::Box<parsed_source_ref_t> pstree;
parser_t *const parser; parser_t *const parser;
const operation_context_t &ctx; const operation_context_t &ctx;
@ -161,7 +161,7 @@ class parse_execution_context_t : noncopyable_t {
public: public:
/// Construct a context in preparation for evaluating a node in a tree, with the given block_io. /// Construct a context in preparation for evaluating a node in a tree, with the given block_io.
/// The execution context may access the parser and parent job group (if any) through ctx. /// The execution context may access the parser and parent job group (if any) through ctx.
parse_execution_context_t(parsed_source_ref_t pstree, const operation_context_t &ctx, parse_execution_context_t(rust::Box<parsed_source_ref_t> pstree, const operation_context_t &ctx,
io_chain_t block_io); io_chain_t block_io);
/// Returns the current line number, indexed from 1. Not const since it touches /// Returns the current line number, indexed from 1. Not const since it touches
@ -172,10 +172,10 @@ class parse_execution_context_t : noncopyable_t {
int get_current_source_offset() const; int get_current_source_offset() const;
/// Returns the source string. /// Returns the source string.
const wcstring &get_source() const { return pstree->src; } const wcstring &get_source() const { return pstree->src(); }
/// Return the parsed ast. /// Return the parsed ast.
const ast::ast_t &ast() const { return pstree->ast; } const ast::ast_t &ast() const { return pstree->ast(); }
/// Start executing at the given node. Returns 0 if there was no error, 1 if there was an /// Start executing at the given node. Returns 0 if there was no error, 1 if there was an
/// error. /// error.

View file

@ -1,64 +0,0 @@
// Programmatic representation of fish code.
#include "config.h" // IWYU pragma: keep
#include "parse_tree.h"
#include <stddef.h>
#include <string>
#include <utility>
#include "ast.h"
#include "common.h"
#include "enum_map.h"
#include "fallback.h"
#include "maybe.h"
#include "parse_constants.h"
#include "tokenizer.h"
#include "wutil.h" // IWYU pragma: keep
parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err) {
switch (err) {
case tokenizer_error_t::none:
return parse_error_code_t::none;
case tokenizer_error_t::unterminated_quote:
return parse_error_code_t::tokenizer_unterminated_quote;
case tokenizer_error_t::unterminated_subshell:
return parse_error_code_t::tokenizer_unterminated_subshell;
case tokenizer_error_t::unterminated_slice:
return parse_error_code_t::tokenizer_unterminated_slice;
case tokenizer_error_t::unterminated_escape:
return parse_error_code_t::tokenizer_unterminated_escape;
default:
return parse_error_code_t::tokenizer_other;
}
}
/// Returns a string description of the given parse token.
wcstring parse_token_t::describe() const {
wcstring result = token_type_description(type);
if (keyword != parse_keyword_t::none) {
append_format(result, L" <%ls>", keyword_description(keyword));
}
return result;
}
/// A string description appropriate for presentation to the user.
wcstring parse_token_t::user_presentable_description() const {
return *token_type_user_presentable_description(type, keyword);
}
parsed_source_t::parsed_source_t(wcstring &&s, ast::ast_t &&ast)
: src(std::move(s)), ast(std::move(ast)) {}
parsed_source_t::~parsed_source_t() = default;
parsed_source_ref_t parse_source(wcstring &&src, parse_tree_flags_t flags,
parse_error_list_t *errors) {
using namespace ast;
ast_t ast = ast_t::parse(src, flags, errors);
if (ast.errored() && !(flags & parse_flag_continue_after_error)) {
return nullptr;
}
return std::make_shared<parsed_source_t>(std::move(src), std::move(ast));
}

View file

@ -9,50 +9,13 @@
#include "parse_constants.h" #include "parse_constants.h"
#include "tokenizer.h" #include "tokenizer.h"
/// A struct representing the token type that we use internally. #if INCLUDE_RUST_HEADERS
struct parse_token_t { #include "parse_tree.rs.h"
parse_token_type_t type; // The type of the token as represented by the parser using parsed_source_ref_t = ParsedSourceRefFFI;
parse_keyword_t keyword{parse_keyword_t::none}; // Any keyword represented by this token #else
bool has_dash_prefix{false}; // Hackish: whether the source contains a dash prefix struct ParsedSourceRefFFI;
bool is_help_argument{false}; // Hackish: whether the source looks like '-h' or '--help' using parsed_source_ref_t = ParsedSourceRefFFI;
bool is_newline{false}; // Hackish: if TOK_END, whether the source is a newline. #endif
bool may_be_variable_assignment{false}; // Hackish: whether this token is a string like FOO=bar
tokenizer_error_t tok_error{
tokenizer_error_t::none}; // If this is a tokenizer error, that error.
source_offset_t source_start{SOURCE_OFFSET_INVALID};
source_offset_t source_length{0};
/// \return the source range.
/// Note the start may be invalid.
source_range_t range() const { return source_range_t{source_start, source_length}; }
/// \return whether we are a string with the dash prefix set.
bool is_dash_prefix_string() const {
return type == parse_token_type_t::string && has_dash_prefix;
}
wcstring describe() const;
wcstring user_presentable_description() const;
constexpr parse_token_t(parse_token_type_t type) : type(type) {}
};
parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err);
/// A type wrapping up a parse tree and the original source behind it.
struct parsed_source_t : noncopyable_t, nonmovable_t {
wcstring src;
ast::ast_t ast;
parsed_source_t(wcstring &&s, ast::ast_t &&ast);
~parsed_source_t();
};
/// Return a shared pointer to parsed_source_t, or null on failure.
/// If parse_flag_continue_after_error is not set, this will return null on any error.
using parsed_source_ref_t = std::shared_ptr<const parsed_source_t>;
parsed_source_ref_t parse_source(wcstring &&src, parse_tree_flags_t flags,
parse_error_list_t *errors);
/// Error message when a command may not be in a pipeline. /// Error message when a command may not be in a pipeline.
#define INVALID_PIPELINE_CMD_ERR_MSG _(L"The '%ls' command can not be used in a pipeline") #define INVALID_PIPELINE_CMD_ERR_MSG _(L"The '%ls' command can not be used in a pipeline")

View file

@ -24,6 +24,7 @@
#include "operation_context.h" #include "operation_context.h"
#include "parse_constants.h" #include "parse_constants.h"
#include "parse_tree.h" #include "parse_tree.h"
#include "parse_util.rs.h"
#include "tokenizer.h" #include "tokenizer.h"
#include "wcstringutil.h" #include "wcstringutil.h"
#include "wildcard.h" #include "wildcard.h"
@ -592,36 +593,19 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote,
return result; return result;
} }
std::vector<int> parse_util_compute_indents(const wcstring &src) { indent_visitor_t::indent_visitor_t(const wcstring &src, std::vector<int> &indents)
// Make a vector the same size as the input string, which contains the indents. Initialize them : src(src), indents(indents), visitor(new_indent_visitor(*this)) {}
// to 0.
const size_t src_size = src.size();
std::vector<int> indents(src_size, 0);
// Simple trick: if our source does not contain a newline, then all indents are 0. bool indent_visitor_t::has_newline(const ast::maybe_newlines_t &nls) const {
if (src.find('\n') == wcstring::npos) { return nls.ptr()->source(src)->find(L'\n') != wcstring::npos;
return indents; }
}
// Parse the string. We pass continue_after_error to produce a forest; the trailing indent of int indent_visitor_t::visit(const void *node_) {
// the last node we visited becomes the input indent of the next. I.e. in the case of 'switch auto &node = *static_cast<const ast::node_t *>(node_);
// foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it
// were a case item list.
using namespace ast;
auto ast =
ast_t::parse(src, parse_flag_continue_after_error | parse_flag_include_comments |
parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated);
// Visit all of our nodes. When we get a job_list or case_item_list, increment indent while
// visiting its children.
struct indent_visitor_t {
indent_visitor_t(const wcstring &src, std::vector<int> &indents)
: src(src), indents(indents) {}
void visit(const node_t &node) {
int inc = 0; int inc = 0;
int dec = 0; int dec = 0;
switch (node.type) { using namespace ast;
switch (node.typ()) {
case type_t::job_list: case type_t::job_list:
case type_t::andor_job_list: case type_t::andor_job_list:
// Job lists are never unwound. // Job lists are never unwound.
@ -631,8 +615,8 @@ std::vector<int> parse_util_compute_indents(const wcstring &src) {
// Increment indents for conditions in headers (#1665). // Increment indents for conditions in headers (#1665).
case type_t::job_conjunction: case type_t::job_conjunction:
if (node.parent->type == type_t::while_header || if (node.parent()->typ() == type_t::while_header ||
node.parent->type == type_t::if_clause) { node.parent()->typ() == type_t::if_clause) {
inc = 1; inc = 1;
dec = 1; dec = 1;
} }
@ -648,7 +632,7 @@ std::vector<int> parse_util_compute_indents(const wcstring &src) {
// end // end
// See #7252. // See #7252.
case type_t::job_continuation: case type_t::job_continuation:
if (has_newline(node.as<job_continuation_t>()->newlines)) { if (has_newline(node.as_job_continuation().newlines())) {
inc = 1; inc = 1;
dec = 1; dec = 1;
} }
@ -656,7 +640,7 @@ std::vector<int> parse_util_compute_indents(const wcstring &src) {
// Likewise for && and ||. // Likewise for && and ||.
case type_t::job_conjunction_continuation: case type_t::job_conjunction_continuation:
if (has_newline(node.as<job_conjunction_continuation_t>()->newlines)) { if (has_newline(node.as_job_conjunction_continuation().newlines())) {
inc = 1; inc = 1;
dec = 1; dec = 1;
} }
@ -681,15 +665,14 @@ std::vector<int> parse_util_compute_indents(const wcstring &src) {
// To address this, if we see that the switch statement was not closed, do not // To address this, if we see that the switch statement was not closed, do not
// decrement the indent afterwards. // decrement the indent afterwards.
inc = 1; inc = 1;
dec = node.parent->as<switch_statement_t>()->end.unsourced ? 0 : 1; dec = node.parent()->as_switch_statement().end().ptr()->has_source() ? 1 : 0;
break; break;
case type_t::token_base: { case type_t::token_base: {
auto tok = node.as<token_base_t>(); if (node.parent()->typ() == type_t::begin_header &&
if (node.parent->type == type_t::begin_header && node.token_type() == parse_token_type_t::end) {
tok->type == parse_token_type_t::end) {
// The newline after "begin" is optional, so it is part of the header. // The newline after "begin" is optional, so it is part of the header.
// The header is not in the indented block, so indent the newline here. // The header is not in the indented block, so indent the newline here.
if (node.source(src) == L"\n") { if (*node.source(src) == L"\n") {
inc = 1; inc = 1;
dec = 1; dec = 1;
} }
@ -701,10 +684,9 @@ std::vector<int> parse_util_compute_indents(const wcstring &src) {
} }
auto range = node.source_range(); auto range = node.source_range();
if (range.length > 0 && node.category == category_t::leaf) { if (range.length > 0 && node.category() == category_t::leaf) {
record_line_continuations_until(range.start); record_line_continuations_until(range.start);
std::fill(indents.begin() + last_leaf_end, indents.begin() + range.start, std::fill(indents.begin() + last_leaf_end, indents.begin() + range.start, last_indent);
last_indent);
} }
indent += inc; indent += inc;
@ -721,22 +703,18 @@ std::vector<int> parse_util_compute_indents(const wcstring &src) {
} }
// If this is a leaf node, apply the current indentation. // If this is a leaf node, apply the current indentation.
if (node.category == category_t::leaf && range.length > 0) { if (node.category() == category_t::leaf && range.length > 0) {
std::fill(indents.begin() + range.start, indents.begin() + range.end(), indent); std::fill(indents.begin() + range.start, indents.begin() + range.end(), indent);
last_leaf_end = range.start + range.length; last_leaf_end = range.start + range.length;
last_indent = indent; last_indent = indent;
} }
node_visitor(*this).accept_children_of(&node); return dec;
indent -= dec; }
}
/// \return whether a maybe_newlines node contains at least one newline. void indent_visitor_t::did_visit(int dec) { indent -= dec; }
bool has_newline(const maybe_newlines_t &nls) const {
return nls.source(src).find(L'\n') != wcstring::npos;
}
void record_line_continuations_until(size_t offset) { void indent_visitor_t::record_line_continuations_until(size_t offset) {
wcstring gap_text = src.substr(last_leaf_end, offset - last_leaf_end); wcstring gap_text = src.substr(last_leaf_end, offset - last_leaf_end);
size_t escaped_nl = gap_text.find(L"\\\n"); size_t escaped_nl = gap_text.find(L"\\\n");
if (escaped_nl == wcstring::npos) return; if (escaped_nl == wcstring::npos) return;
@ -751,31 +729,30 @@ std::vector<int> parse_util_compute_indents(const wcstring &src) {
line_continuations.push_back(newline - src.begin()); line_continuations.push_back(newline - src.begin());
newline = std::find(newline + 1, end, L'\n'); newline = std::find(newline + 1, end, L'\n');
} while (newline != end); } while (newline != end);
}
std::vector<int> parse_util_compute_indents(const wcstring &src) {
// Make a vector the same size as the input string, which contains the indents. Initialize them
// to 0.
const size_t src_size = src.size();
std::vector<int> indents(src_size, 0);
// Simple trick: if our source does not contain a newline, then all indents are 0.
if (src.find('\n') == wcstring::npos) {
return indents;
} }
// The one-past-the-last index of the most recently encountered leaf node. // Parse the string. We pass continue_after_error to produce a forest; the trailing indent of
// We use this to populate the indents even if there's no tokens in the range. // the last node we visited becomes the input indent of the next. I.e. in the case of 'switch
size_t last_leaf_end{0}; // foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it
// were a case item list.
// The last indent which we assigned. using namespace ast;
int last_indent{-1}; auto ast =
ast_parse(src, parse_flag_continue_after_error | parse_flag_include_comments |
// The source we are indenting. parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated);
const wcstring &src;
// List of indents, which we populate.
std::vector<int> &indents;
// Initialize our starting indent to -1, as our top-level node is a job list which
// will immediately increment it.
int indent{-1};
// List of locations of escaped newline characters.
std::vector<size_t> line_continuations;
};
indent_visitor_t iv(src, indents); indent_visitor_t iv(src, indents);
node_visitor(iv).accept(ast.top()); iv.visitor->visit(*ast->top());
iv.record_line_continuations_until(indents.size()); iv.record_line_continuations_until(indents.size());
std::fill(indents.begin() + iv.last_leaf_end, indents.end(), iv.last_indent); std::fill(indents.begin() + iv.last_leaf_end, indents.end(), iv.last_indent);
@ -838,8 +815,9 @@ bool parse_util_argument_is_help(const wcstring &s) { return s == L"-h" || s ==
// \return a pointer to the first argument node of an argument_or_redirection_list_t, or nullptr if // \return a pointer to the first argument node of an argument_or_redirection_list_t, or nullptr if
// there are no arguments. // there are no arguments.
static const ast::argument_t *get_first_arg(const ast::argument_or_redirection_list_t &list) { static const ast::argument_t *get_first_arg(const ast::argument_or_redirection_list_t &list) {
for (const ast::argument_or_redirection_t &v : list) { for (size_t i = 0; i < list.count(); i++) {
if (v.is_argument()) return &v.argument(); const ast::argument_or_redirection_t *v = list.at(i);
if (v->is_argument()) return &v->argument();
} }
return nullptr; return nullptr;
} }
@ -953,10 +931,10 @@ void parse_util_expand_variable_error(const wcstring &token, size_t global_token
parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argument_t &arg, parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argument_t &arg,
const wcstring &arg_src, const wcstring &arg_src,
parse_error_list_t *out_errors) { parse_error_list_t *out_errors) {
maybe_t<source_range_t> source_range = arg.try_source_range(); if (!arg.try_source_range()) return 0;
if (!source_range.has_value()) return 0; auto source_range = arg.source_range();
size_t source_start = source_range->start; size_t source_start = source_range.start;
parser_test_error_bits_t err = 0; parser_test_error_bits_t err = 0;
auto check_subtoken = [&arg_src, &out_errors, source_start](size_t begin, size_t end) -> int { auto check_subtoken = [&arg_src, &out_errors, source_start](size_t begin, size_t end) -> int {
@ -1062,8 +1040,8 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen
static bool detect_errors_in_backgrounded_job(const ast::job_pipeline_t &job, static bool detect_errors_in_backgrounded_job(const ast::job_pipeline_t &job,
parse_error_list_t *parse_errors) { parse_error_list_t *parse_errors) {
using namespace ast; using namespace ast;
auto source_range = job.try_source_range(); if (!job.try_source_range()) return false;
if (!source_range) return false; auto source_range = job.source_range();
bool errored = false; bool errored = false;
// Disallow background in the following cases: // Disallow background in the following cases:
@ -1071,16 +1049,16 @@ static bool detect_errors_in_backgrounded_job(const ast::job_pipeline_t &job,
// foo & ; or bar // foo & ; or bar
// if foo & ; end // if foo & ; end
// while foo & ; end // while foo & ; end
const job_conjunction_t *job_conj = job.parent->try_as<job_conjunction_t>(); const job_conjunction_t *job_conj = job.ptr()->parent()->try_as_job_conjunction();
if (!job_conj) return false; if (!job_conj) return false;
if (job_conj->parent->try_as<if_clause_t>()) { if (job_conj->ptr()->parent()->try_as_if_clause()) {
errored = append_syntax_error(parse_errors, source_range->start, source_range->length, errored = append_syntax_error(parse_errors, source_range.start, source_range.length,
BACKGROUND_IN_CONDITIONAL_ERROR_MSG); BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
} else if (job_conj->parent->try_as<while_header_t>()) { } else if (job_conj->ptr()->parent()->try_as_while_header()) {
errored = append_syntax_error(parse_errors, source_range->start, source_range->length, errored = append_syntax_error(parse_errors, source_range.start, source_range.length,
BACKGROUND_IN_CONDITIONAL_ERROR_MSG); BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
} else if (const ast::job_list_t *jlist = job_conj->parent->try_as<ast::job_list_t>()) { } else if (const ast::job_list_t *jlist = job_conj->ptr()->parent()->try_as_job_list()) {
// This isn't very complete, e.g. we don't catch 'foo & ; not and bar'. // This isn't very complete, e.g. we don't catch 'foo & ; not and bar'.
// Find the index of ourselves in the job list. // Find the index of ourselves in the job list.
size_t index; size_t index;
@ -1091,13 +1069,14 @@ static bool detect_errors_in_backgrounded_job(const ast::job_pipeline_t &job,
// Try getting the next job and check its decorator. // Try getting the next job and check its decorator.
if (const job_conjunction_t *next = jlist->at(index + 1)) { if (const job_conjunction_t *next = jlist->at(index + 1)) {
if (const keyword_base_t *deco = next->decorator.contents.get()) { if (next->has_decorator()) {
const auto &deco = next->decorator();
assert( assert(
(deco->kw == parse_keyword_t::kw_and || deco->kw == parse_keyword_t::kw_or) && (deco.kw() == parse_keyword_t::kw_and || deco.kw() == parse_keyword_t::kw_or) &&
"Unexpected decorator keyword"); "Unexpected decorator keyword");
const wchar_t *deco_name = (deco->kw == parse_keyword_t::kw_and ? L"and" : L"or"); const wchar_t *deco_name = (deco.kw() == parse_keyword_t::kw_and ? L"and" : L"or");
errored = append_syntax_error(parse_errors, deco->source_range().start, errored = append_syntax_error(parse_errors, deco.source_range().start,
deco->source_range().length, deco.source_range().length,
BOOL_AFTER_BACKGROUND_ERROR_MSG, deco_name); BOOL_AFTER_BACKGROUND_ERROR_MSG, deco_name);
} }
} }
@ -1119,27 +1098,28 @@ static bool detect_errors_in_decorated_statement(const wcstring &buff_src,
// Determine if the first argument is help. // Determine if the first argument is help.
bool first_arg_is_help = false; bool first_arg_is_help = false;
if (const auto *arg = get_first_arg(dst.args_or_redirs)) { if (const auto *arg = get_first_arg(dst.args_or_redirs())) {
const wcstring &arg_src = arg->source(buff_src, storage); wcstring arg_src = *arg->source(buff_src);
*storage = arg_src;
first_arg_is_help = parse_util_argument_is_help(arg_src); first_arg_is_help = parse_util_argument_is_help(arg_src);
} }
// Get the statement we are part of. // Get the statement we are part of.
const statement_t *st = dst.parent->as<statement_t>(); const statement_t &st = dst.ptr()->parent()->as_statement();
// Walk up to the job. // Walk up to the job.
const ast::job_pipeline_t *job = nullptr; const ast::job_pipeline_t *job = nullptr;
for (const node_t *cursor = st; job == nullptr; cursor = cursor->parent) { for (auto cursor = dst.ptr()->parent(); job == nullptr; cursor = cursor->parent()) {
assert(cursor && "Reached root without finding a job"); assert(cursor->has_value() && "Reached root without finding a job");
job = cursor->try_as<ast::job_pipeline_t>(); job = cursor->try_as_job_pipeline();
} }
assert(job && "Should have found the job"); assert(job && "Should have found the job");
// Check our pipeline position. // Check our pipeline position.
pipeline_position_t pipe_pos; pipeline_position_t pipe_pos;
if (job->continuation.empty()) { if (job->continuation().empty()) {
pipe_pos = pipeline_position_t::none; pipe_pos = pipeline_position_t::none;
} else if (&job->statement == st) { } else if (&job->statement() == &st) {
pipe_pos = pipeline_position_t::first; pipe_pos = pipeline_position_t::first;
} else { } else {
pipe_pos = pipeline_position_t::subsequent; pipe_pos = pipeline_position_t::subsequent;
@ -1158,7 +1138,8 @@ static bool detect_errors_in_decorated_statement(const wcstring &buff_src,
if (pipe_pos == pipeline_position_t::subsequent) { if (pipe_pos == pipeline_position_t::subsequent) {
// check if our command is 'and' or 'or'. This is very clumsy; we don't catch e.g. quoted // check if our command is 'and' or 'or'. This is very clumsy; we don't catch e.g. quoted
// commands. // commands.
const wcstring &command = dst.command.source(buff_src, storage); wcstring command = *dst.command().source(buff_src);
*storage = command;
if (command == L"and" || command == L"or") { if (command == L"and" || command == L"or") {
errored = append_syntax_error(parse_errors, source_start, source_length, errored = append_syntax_error(parse_errors, source_start, source_length,
INVALID_PIPELINE_CMD_ERR_MSG, command.c_str()); INVALID_PIPELINE_CMD_ERR_MSG, command.c_str());
@ -1174,14 +1155,16 @@ static bool detect_errors_in_decorated_statement(const wcstring &buff_src,
// $status specifically is invalid as a command, // $status specifically is invalid as a command,
// to avoid people trying `if $status`. // to avoid people trying `if $status`.
// We see this surprisingly regularly. // We see this surprisingly regularly.
const wcstring &com = dst.command.source(buff_src, storage); wcstring com = *dst.command().source(buff_src);
*storage = com;
if (com == L"$status") { if (com == L"$status") {
errored = errored =
append_syntax_error(parse_errors, source_start, source_length, append_syntax_error(parse_errors, source_start, source_length,
_(L"$status is not valid as a command. See `help conditions`")); _(L"$status is not valid as a command. See `help conditions`"));
} }
const wcstring &unexp_command = dst.command.source(buff_src, storage); wcstring unexp_command = *dst.command().source(buff_src);
*storage = unexp_command;
if (!unexp_command.empty()) { if (!unexp_command.empty()) {
// Check that we can expand the command. // Check that we can expand the command.
// Make a new error list so we can fix the offset for just those, then append later. // Make a new error list so we can fix the offset for just those, then append later.
@ -1207,15 +1190,15 @@ static bool detect_errors_in_decorated_statement(const wcstring &buff_src,
// loop from the ancestor alone; we need the header. That is, we hit a // loop from the ancestor alone; we need the header. That is, we hit a
// block_statement, and have to check its header. // block_statement, and have to check its header.
bool found_loop = false; bool found_loop = false;
for (const node_t *ancestor = &dst; ancestor != nullptr; ancestor = ancestor->parent) { for (auto ancestor = dst.ptr(); ancestor->has_value(); ancestor = ancestor->parent()) {
const auto *block = ancestor->try_as<block_statement_t>(); const auto *block = ancestor->try_as_block_statement();
if (!block) continue; if (!block) continue;
if (block->header->type == type_t::for_header || if (block->header().ptr()->typ() == type_t::for_header ||
block->header->type == type_t::while_header) { block->header().ptr()->typ() == type_t::while_header) {
// This is a loop header, so we can break or continue. // This is a loop header, so we can break or continue.
found_loop = true; found_loop = true;
break; break;
} else if (block->header->type == type_t::function_header) { } else if (block->header().ptr()->typ() == type_t::function_header) {
// This is a function header, so we cannot break or // This is a function header, so we cannot break or
// continue. We stop our search here. // continue. We stop our search here.
found_loop = false; found_loop = false;
@ -1245,7 +1228,7 @@ static bool detect_errors_in_decorated_statement(const wcstring &buff_src,
// The expansion errors here go from the *command* onwards, // The expansion errors here go from the *command* onwards,
// so we need to offset them by the *command* offset, // so we need to offset them by the *command* offset,
// excluding the decoration. // excluding the decoration.
new_errors->offset_source_start(dst.command.source_range().start); new_errors->offset_source_start(dst.command().source_range().start);
parse_errors->append(&*new_errors); parse_errors->append(&*new_errors);
} }
} }
@ -1289,23 +1272,26 @@ parser_test_error_bits_t parse_util_detect_errors(const ast::ast_t &ast, const w
// Verify no variable expansions. // Verify no variable expansions.
wcstring storage; wcstring storage;
for (const node_t &node : ast) { for (auto ast_traversal = new_ast_traversal(*ast.top());;) {
if (const job_continuation_t *jc = node.try_as<job_continuation_t>()) { auto node = ast_traversal->next();
if (!node->has_value()) break;
if (const auto *jc = node->try_as_job_continuation()) {
// Somewhat clumsy way of checking for a statement without source in a pipeline. // Somewhat clumsy way of checking for a statement without source in a pipeline.
// See if our pipe has source but our statement does not. // See if our pipe has source but our statement does not.
if (!jc->pipe.unsourced && !jc->statement.try_source_range().has_value()) { if (jc->pipe().ptr()->has_source() && !jc->statement().ptr()->try_source_range()) {
has_unclosed_pipe = true; has_unclosed_pipe = true;
} }
} else if (const auto *jcc = node.try_as<job_conjunction_continuation_t>()) { } else if (const auto *jcc = node->try_as_job_conjunction_continuation()) {
// Somewhat clumsy way of checking for a job without source in a conjunction. // Somewhat clumsy way of checking for a job without source in a conjunction.
// See if our conjunction operator (&& or ||) has source but our job does not. // See if our conjunction operator (&& or ||) has source but our job does not.
if (!jcc->conjunction.unsourced && !jcc->job.try_source_range().has_value()) { if (jcc->conjunction().ptr()->has_source() && !jcc->job().try_source_range()) {
has_unclosed_conjunction = true; has_unclosed_conjunction = true;
} }
} else if (const argument_t *arg = node.try_as<argument_t>()) { } else if (const argument_t *arg = node->try_as_argument()) {
const wcstring &arg_src = arg->source(buff_src, &storage); wcstring arg_src = *arg->source(buff_src);
storage = arg_src;
res |= parse_util_detect_errors_in_argument(*arg, arg_src, out_errors); res |= parse_util_detect_errors_in_argument(*arg, arg_src, out_errors);
} else if (const ast::job_pipeline_t *job = node.try_as<ast::job_pipeline_t>()) { } else if (const ast::job_pipeline_t *job = node->try_as_job_pipeline()) {
// Disallow background in the following cases: // Disallow background in the following cases:
// //
// foo & ; and bar // foo & ; and bar
@ -1313,23 +1299,24 @@ parser_test_error_bits_t parse_util_detect_errors(const ast::ast_t &ast, const w
// if foo & ; end // if foo & ; end
// while foo & ; end // while foo & ; end
// If it's not a background job, nothing to do. // If it's not a background job, nothing to do.
if (job->bg) { if (job->has_bg()) {
errored |= detect_errors_in_backgrounded_job(*job, out_errors); errored |= detect_errors_in_backgrounded_job(*job, out_errors);
} }
} else if (const ast::decorated_statement_t *stmt = node.try_as<decorated_statement_t>()) { } else if (const auto *stmt = node->try_as_decorated_statement()) {
errored |= detect_errors_in_decorated_statement(buff_src, *stmt, &storage, out_errors); errored |= detect_errors_in_decorated_statement(buff_src, *stmt, &storage, out_errors);
} else if (const auto *block = node.try_as<block_statement_t>()) { } else if (const auto *block = node->try_as_block_statement()) {
// If our 'end' had no source, we are unsourced. // If our 'end' had no source, we are unsourced.
if (block->end.unsourced) has_unclosed_block = true; if (!block->end().ptr()->has_source()) has_unclosed_block = true;
errored |= detect_errors_in_block_redirection_list(block->args_or_redirs, out_errors); errored |= detect_errors_in_block_redirection_list(block->args_or_redirs(), out_errors);
} else if (const auto *ifs = node.try_as<if_statement_t>()) { } else if (const auto *ifs = node->try_as_if_statement()) {
// If our 'end' had no source, we are unsourced. // If our 'end' had no source, we are unsourced.
if (ifs->end.unsourced) has_unclosed_block = true; if (!ifs->end().ptr()->has_source()) has_unclosed_block = true;
errored |= detect_errors_in_block_redirection_list(ifs->args_or_redirs, out_errors); errored |= detect_errors_in_block_redirection_list(ifs->args_or_redirs(), out_errors);
} else if (const auto *switchs = node.try_as<switch_statement_t>()) { } else if (const auto *switchs = node->try_as_switch_statement()) {
// If our 'end' had no source, we are unsourced. // If our 'end' had no source, we are unsourced.
if (switchs->end.unsourced) has_unclosed_block = true; if (!switchs->end().ptr()->has_source()) has_unclosed_block = true;
errored |= detect_errors_in_block_redirection_list(switchs->args_or_redirs, out_errors); errored |=
detect_errors_in_block_redirection_list(switchs->args_or_redirs(), out_errors);
} }
} }
@ -1354,7 +1341,7 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
// Parse the input string into an ast. Some errors are detected here. // Parse the input string into an ast. Some errors are detected here.
using namespace ast; using namespace ast;
auto parse_errors = new_parse_error_list(); auto parse_errors = new_parse_error_list();
auto ast = ast_t::parse(buff_src, parse_flags, &*parse_errors); auto ast = ast_parse(buff_src, parse_flags, &*parse_errors);
if (allow_incomplete) { if (allow_incomplete) {
// Issue #1238: If the only error was unterminated quote, then consider this to have parsed // Issue #1238: If the only error was unterminated quote, then consider this to have parsed
// successfully. // successfully.
@ -1384,7 +1371,7 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
} }
// Defer to the tree-walking version. // Defer to the tree-walking version.
return parse_util_detect_errors(ast, buff_src, out_errors); return parse_util_detect_errors(*ast, buff_src, out_errors);
} }
maybe_t<wcstring> parse_util_detect_errors_in_argument_list(const wcstring &arg_list_src, maybe_t<wcstring> parse_util_detect_errors_in_argument_list(const wcstring &arg_list_src,
@ -1399,16 +1386,18 @@ maybe_t<wcstring> parse_util_detect_errors_in_argument_list(const wcstring &arg_
// Parse the string as a freestanding argument list. // Parse the string as a freestanding argument list.
using namespace ast; using namespace ast;
auto errors = new_parse_error_list(); auto errors = new_parse_error_list();
auto ast = ast_t::parse_argument_list(arg_list_src, parse_flag_none, &*errors); auto ast = ast_parse_argument_list(arg_list_src, parse_flag_none, &*errors);
if (!errors->empty()) { if (!errors->empty()) {
return get_error_text(*errors); return get_error_text(*errors);
} }
// Get the root argument list and extract arguments from it. // Get the root argument list and extract arguments from it.
// Test each of these. // Test each of these.
for (const argument_t &arg : ast.top()->as<freestanding_argument_list_t>()->arguments) { const auto &args = ast->top()->as_freestanding_argument_list().arguments();
const wcstring arg_src = arg.source(arg_list_src); for (size_t i = 0; i < args.count(); i++) {
if (parse_util_detect_errors_in_argument(arg, arg_src, &*errors)) { const argument_t *arg = args.at(i);
const wcstring arg_src = *arg->source(arg_list_src);
if (parse_util_detect_errors_in_argument(*arg, arg_src, &*errors)) {
return get_error_text(*errors); return get_error_text(*errors);
} }
} }

View file

@ -6,14 +6,12 @@
#include <vector> #include <vector>
#include "ast.h"
#include "common.h" #include "common.h"
#include "cxx.h"
#include "maybe.h" #include "maybe.h"
#include "parse_constants.h" #include "parse_constants.h"
namespace ast {
struct argument_t;
class ast_t;
} // namespace ast
struct Tok; struct Tok;
using tok_t = Tok; using tok_t = Tok;
@ -116,6 +114,47 @@ wchar_t parse_util_get_quote_type(const wcstring &cmd, size_t pos);
wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote, wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote,
bool no_tilde = false); bool no_tilde = false);
// Visit all of our nodes. When we get a job_list or case_item_list, increment indent while
// visiting its children.
struct IndentVisitor;
struct indent_visitor_t {
indent_visitor_t(const wcstring &src, std::vector<int> &indents);
indent_visitor_t(const indent_visitor_t &) = delete;
indent_visitor_t &operator=(const indent_visitor_t &) = delete;
int visit(const void *node);
void did_visit(int dec);
#if INCLUDE_RUST_HEADERS
/// \return whether a maybe_newlines node contains at least one newline.
bool has_newline(const ast::maybe_newlines_t &nls) const;
void record_line_continuations_until(size_t offset);
// The one-past-the-last index of the most recently encountered leaf node.
// We use this to populate the indents even if there's no tokens in the range.
size_t last_leaf_end{0};
// The last indent which we assigned.
int last_indent{-1};
// The source we are indenting.
const wcstring &src;
// List of indents, which we populate.
std::vector<int> &indents;
// Initialize our starting indent to -1, as our top-level node is a job list which
// will immediately increment it.
int indent{-1};
// List of locations of escaped newline characters.
std::vector<size_t> line_continuations;
rust::Box<IndentVisitor> visitor;
#endif
};
/// Given a string, parse it as fish code and then return the indents. The return value has the same /// Given a string, parse it as fish code and then return the indents. The return value has the same
/// size as the string. /// size as the string.
std::vector<int> parse_util_compute_indents(const wcstring &src); std::vector<int> parse_util_compute_indents(const wcstring &src);

View file

@ -189,18 +189,18 @@ completion_list_t parser_t::expand_argument_list(const wcstring &arg_list_src,
expand_flags_t eflags, expand_flags_t eflags,
const operation_context_t &ctx) { const operation_context_t &ctx) {
// Parse the string as an argument list. // Parse the string as an argument list.
auto ast = ast::ast_t::parse_argument_list(arg_list_src); auto ast = ast_parse_argument_list(arg_list_src);
if (ast.errored()) { if (ast->errored()) {
// Failed to parse. Here we expect to have reported any errors in test_args. // Failed to parse. Here we expect to have reported any errors in test_args.
return {}; return {};
} }
// Get the root argument list and extract arguments from it. // Get the root argument list and extract arguments from it.
completion_list_t result; completion_list_t result;
const ast::freestanding_argument_list_t *list = const ast::freestanding_argument_list_t &list = ast->top()->as_freestanding_argument_list();
ast.top()->as<ast::freestanding_argument_list_t>(); for (size_t i = 0; i < list.arguments().count(); i++) {
for (const ast::argument_t &arg : list->arguments) { const ast::argument_t &arg = *list.arguments().at(i);
wcstring arg_src = arg.source(arg_list_src); wcstring arg_src = *arg.source(arg_list_src);
if (expand_string(arg_src, &result, eflags, ctx) == expand_result_t::error) { if (expand_string(arg_src, &result, eflags, ctx) == expand_result_t::error) {
break; // failed to expand a string break; // failed to expand a string
} }
@ -528,8 +528,9 @@ eval_res_t parser_t::eval(const wcstring &cmd, const io_chain_t &io,
const job_group_ref_t &job_group, enum block_type_t block_type) { const job_group_ref_t &job_group, enum block_type_t block_type) {
// Parse the source into a tree, if we can. // Parse the source into a tree, if we can.
auto error_list = new_parse_error_list(); auto error_list = new_parse_error_list();
if (parsed_source_ref_t ps = parse_source(wcstring{cmd}, parse_flag_none, &*error_list)) { auto ps = parse_source(wcstring{cmd}, parse_flag_none, &*error_list);
return this->eval(ps, io, job_group, block_type); if (ps->has_value()) {
return this->eval(*ps, io, job_group, block_type);
} else { } else {
// Get a backtrace. This includes the message. // Get a backtrace. This includes the message.
wcstring backtrace_and_desc; wcstring backtrace_and_desc;
@ -550,10 +551,10 @@ eval_res_t parser_t::eval_string_ffi1(const wcstring &cmd) { return eval(cmd, io
eval_res_t parser_t::eval(const parsed_source_ref_t &ps, const io_chain_t &io, eval_res_t parser_t::eval(const parsed_source_ref_t &ps, const io_chain_t &io,
const job_group_ref_t &job_group, enum block_type_t block_type) { const job_group_ref_t &job_group, enum block_type_t block_type) {
assert(block_type == block_type_t::top || block_type == block_type_t::subst); assert(block_type == block_type_t::top || block_type == block_type_t::subst);
const auto *job_list = ps->ast.top()->as<ast::job_list_t>(); const auto &job_list = ps.ast().top()->as_job_list();
if (!job_list->empty()) { if (!job_list.empty()) {
// Execute the top job list. // Execute the top job list.
return this->eval_node(ps, *job_list, io, job_group, block_type); return this->eval_node(ps, job_list, io, job_group, block_type);
} else { } else {
auto status = proc_status_t::from_exit_code(get_last_status()); auto status = proc_status_t::from_exit_code(get_last_status());
bool break_expand = false; bool break_expand = false;
@ -618,8 +619,8 @@ eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, const T &node,
// Create and set a new execution context. // Create and set a new execution context.
using exc_ctx_ref_t = std::unique_ptr<parse_execution_context_t>; using exc_ctx_ref_t = std::unique_ptr<parse_execution_context_t>;
scoped_push<exc_ctx_ref_t> exc(&execution_context, scoped_push<exc_ctx_ref_t> exc(
make_unique<parse_execution_context_t>(ps, op_ctx, block_io)); &execution_context, make_unique<parse_execution_context_t>(ps.clone(), op_ctx, block_io));
// Check the exec count so we know if anything got executed. // Check the exec count so we know if anything got executed.
const size_t prev_exec_count = libdata().exec_count; const size_t prev_exec_count = libdata().exec_count;

View file

@ -255,7 +255,9 @@ static void handle_child_status(const shared_ptr<job_t> &job, process_t *proc,
} }
} }
process_t::process_t() : proc_redirection_specs_(new_redirection_spec_list()) {} process_t::process_t()
: block_node_source(empty_parsed_source_ref()),
proc_redirection_specs_(new_redirection_spec_list()) {}
void process_t::check_generations_before_launch() { void process_t::check_generations_before_launch() {
gens_ = topic_monitor_principal().current_generations(); gens_ = topic_monitor_principal().current_generations();

View file

@ -17,7 +17,9 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "ast.h"
#include "common.h" #include "common.h"
#include "cxx.h"
#include "maybe.h" #include "maybe.h"
#include "parse_tree.h" #include "parse_tree.h"
#include "redirection.h" #include "redirection.h"
@ -53,10 +55,6 @@ using clock_ticks_t = uint64_t;
/// This uses sysconf(_SC_CLK_TCK) to convert to seconds. /// This uses sysconf(_SC_CLK_TCK) to convert to seconds.
double clock_ticks_to_seconds(clock_ticks_t ticks); double clock_ticks_to_seconds(clock_ticks_t ticks);
namespace ast {
struct statement_t;
}
struct job_group_t; struct job_group_t;
using job_group_ref_t = std::shared_ptr<job_group_t>; using job_group_ref_t = std::shared_ptr<job_group_t>;
@ -255,7 +253,7 @@ class process_t {
/// For internal block processes only, the node of the statement. /// For internal block processes only, the node of the statement.
/// This is always either block, ifs, or switchs, never boolean or decorated. /// This is always either block, ifs, or switchs, never boolean or decorated.
parsed_source_ref_t block_node_source{}; rust::Box<ParsedSourceRefFFI> block_node_source;
const ast::statement_t *internal_block_node{}; const ast::statement_t *internal_block_node{};
struct concrete_assignment { struct concrete_assignment {

View file

@ -1421,13 +1421,13 @@ static std::vector<positioned_token_t> extract_tokens(const wcstring &str) {
parse_tree_flags_t ast_flags = parse_flag_continue_after_error | parse_tree_flags_t ast_flags = parse_flag_continue_after_error |
parse_flag_accept_incomplete_tokens | parse_flag_accept_incomplete_tokens |
parse_flag_leave_unterminated; parse_flag_leave_unterminated;
auto ast = ast::ast_t::parse(str, ast_flags); auto ast = ast_parse(str, ast_flags);
// Helper to check if a node is the command portion of an undecorated statement. // Helper to check if a node is the command portion of an undecorated statement.
auto is_command = [&](const node_t *node) { auto is_command = [&](const ast::node_t &node) {
for (const node_t *cursor = node; cursor; cursor = cursor->parent) { for (auto cursor = node.ptr(); cursor->has_value(); cursor = cursor->parent()) {
if (const auto *stmt = cursor->try_as<decorated_statement_t>()) { if (const auto *stmt = cursor->try_as_decorated_statement()) {
if (!stmt->opt_decoration && node == &stmt->command) { if (!stmt->has_opt_decoration() && node.pointer_eq(*stmt->command().ptr())) {
return true; return true;
} }
} }
@ -1437,10 +1437,11 @@ static std::vector<positioned_token_t> extract_tokens(const wcstring &str) {
wcstring cmdsub_contents; wcstring cmdsub_contents;
std::vector<positioned_token_t> result; std::vector<positioned_token_t> result;
traversal_t tv = ast.walk(); for (auto tv = new_ast_traversal(*ast->top());;) {
while (const node_t *node = tv.next()) { auto node = tv->next();
if (!node->has_value()) break;
// We are only interested in leaf nodes with source. // We are only interested in leaf nodes with source.
if (node->category != category_t::leaf) continue; if (node->category() != category_t::leaf) continue;
source_range_t r = node->source_range(); source_range_t r = node->source_range();
if (r.length == 0) continue; if (r.length == 0) continue;
@ -1463,7 +1464,7 @@ static std::vector<positioned_token_t> extract_tokens(const wcstring &str) {
if (!has_cmd_subs) { if (!has_cmd_subs) {
// Common case of no command substitutions in this leaf node. // Common case of no command substitutions in this leaf node.
result.push_back(positioned_token_t{r, is_command(node)}); result.push_back(positioned_token_t{r, is_command(*node)});
} }
} }
return result; return result;
@ -4739,16 +4740,16 @@ static int read_ni(parser_t &parser, int fd, const io_chain_t &io) {
// Parse into an ast and detect errors. // Parse into an ast and detect errors.
auto errors = new_parse_error_list(); auto errors = new_parse_error_list();
auto ast = ast::ast_t::parse(str, parse_flag_none, &*errors); auto ast = ast_parse(str, parse_flag_none, &*errors);
bool errored = ast.errored(); bool errored = ast->errored();
if (!errored) { if (!errored) {
errored = parse_util_detect_errors(ast, str, &*errors); errored = parse_util_detect_errors(*ast, str, &*errors);
} }
if (!errored) { if (!errored) {
// Construct a parsed source ref. // Construct a parsed source ref.
// Be careful to transfer ownership, this could be a very large string. // Be careful to transfer ownership, this could be a very large string.
parsed_source_ref_t ps = std::make_shared<parsed_source_t>(std::move(str), std::move(ast)); auto ps = new_parsed_source_ref(str, *ast);
parser.eval(ps, io); parser.eval(*ps, io);
return 0; return 0;
} else { } else {
wcstring sb; wcstring sb;