2016-05-02 23:09:46 +00:00
|
|
|
// Programmatic representation of fish code.
|
2013-07-25 22:24:22 +00:00
|
|
|
#ifndef FISH_PARSE_PRODUCTIONS_H
|
|
|
|
#define FISH_PARSE_PRODUCTIONS_H
|
2013-05-26 19:12:16 +00:00
|
|
|
|
2015-07-25 15:14:25 +00:00
|
|
|
#include <stddef.h>
|
2017-02-13 04:24:22 +00:00
|
|
|
#include <stdint.h>
|
2016-04-21 06:00:54 +00:00
|
|
|
#include <sys/types.h>
|
2017-02-14 04:37:27 +00:00
|
|
|
|
2020-06-20 22:27:10 +00:00
|
|
|
#include <deque>
|
2016-05-02 23:09:46 +00:00
|
|
|
#include <memory>
|
|
|
|
#include <vector>
|
2013-05-26 19:12:16 +00:00
|
|
|
|
|
|
|
#include "common.h"
|
2018-01-08 03:50:34 +00:00
|
|
|
#include "maybe.h"
|
2013-12-09 05:54:06 +00:00
|
|
|
#include "parse_constants.h"
|
2018-01-08 03:07:49 +00:00
|
|
|
#include "parse_grammar.h"
|
2016-05-02 23:09:46 +00:00
|
|
|
#include "tokenizer.h"
|
2013-06-11 16:37:51 +00:00
|
|
|
|
2013-06-24 19:33:40 +00:00
|
|
|
class parse_node_tree_t;
|
2014-03-26 03:06:34 +00:00
|
|
|
|
|
|
|
typedef uint32_t node_offset_t;
|
|
|
|
|
2013-06-23 09:09:46 +00:00
|
|
|
#define NODE_OFFSET_INVALID (static_cast<node_offset_t>(-1))
|
2013-06-15 21:32:38 +00:00
|
|
|
|
2014-03-26 03:06:34 +00:00
|
|
|
typedef uint32_t source_offset_t;
|
|
|
|
|
2017-01-27 01:28:46 +00:00
|
|
|
constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast<source_offset_t>(-1);
|
2014-03-26 03:06:34 +00:00
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
/// A struct representing the token type that we use internally.
|
|
|
|
struct parse_token_t {
|
|
|
|
enum parse_token_type_t type; // The type of the token as represented by the parser
|
2020-06-09 22:13:02 +00:00
|
|
|
enum parse_keyword_t keyword {
|
|
|
|
parse_keyword_t::none
|
|
|
|
}; // Any keyword represented by this token
|
2019-05-05 10:09:25 +00:00
|
|
|
bool has_dash_prefix{false}; // Hackish: whether the source contains a dash prefix
|
|
|
|
bool is_help_argument{false}; // Hackish: whether the source looks like '-h' or '--help'
|
|
|
|
bool is_newline{false}; // Hackish: if TOK_END, whether the source is a newline.
|
|
|
|
bool preceding_escaped_nl{false}; // Whether there was an escaped newline preceding this token.
|
Support FOO=bar syntax for passing variables to individual commands
This adds initial support for statements with prefixed variable assignments.
Statments like this are supported:
a=1 b=$a echo $b # outputs 1
Just like in other shells, the left-hand side of each assignment must
be a valid variable identifier (no quoting/escaping). Array indexing
(PATH[1]=/bin ls $PATH) is *not* yet supported, but can be added fairly
easily.
The right hand side may be any valid string token, like a command
substitution, or a brace expansion.
Since `a=* foo` is equivalent to `begin set -lx a *; foo; end`,
the assignment, like `set`, uses nullglob behavior, e.g. below command
can safely be used to check if a directory is empty.
x=/nothing/{,.}* test (count $x) -eq 0
Generic file completion is done after the equal sign, so for example
pressing tab after something like `HOME=/` completes files in the
root directory
Subcommand completion works, so something like
`GIT_DIR=repo.git and command git ` correctly calls git completions
(but the git completion does not use the variable as of now).
The variable assignment is highlighted like an argument.
Closes #6048
2019-10-23 01:13:29 +00:00
|
|
|
bool may_be_variable_assignment{false}; // Hackish: whether this token is a string like FOO=bar
|
2020-06-20 22:27:10 +00:00
|
|
|
tokenizer_error_t tok_error{tokenizer_error_t::none}; // If this is a tokenizer error, that error.
|
2018-05-07 22:22:09 +00:00
|
|
|
source_offset_t source_start{SOURCE_OFFSET_INVALID};
|
|
|
|
source_offset_t source_length{0};
|
2013-08-11 07:35:00 +00:00
|
|
|
|
2020-06-20 22:27:10 +00:00
|
|
|
/// \return the source range.
|
2020-07-02 04:06:58 +00:00
|
|
|
/// Note the start may be invalid.
|
2020-06-20 22:27:10 +00:00
|
|
|
source_range_t range() const {
|
|
|
|
return source_range_t{source_start, source_length};
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \return whether we are a string with the dash prefix set.
|
|
|
|
bool is_dash_prefix_string() const {
|
|
|
|
return type == parse_token_type_string && has_dash_prefix;
|
|
|
|
}
|
|
|
|
|
2013-10-12 09:46:49 +00:00
|
|
|
wcstring describe() const;
|
2014-01-01 08:04:02 +00:00
|
|
|
wcstring user_presentable_description() const;
|
2018-05-07 22:22:09 +00:00
|
|
|
|
|
|
|
constexpr parse_token_t(parse_token_type_t type) : type(type) {}
|
2013-08-11 07:35:00 +00:00
|
|
|
};
|
|
|
|
|
2020-06-20 22:27:10 +00:00
|
|
|
/// Return a new parse token, advancing the tokenizer.
|
|
|
|
parse_token_t next_parse_token(tokenizer_t *tok, maybe_t<tok_t> *out_token, wcstring *storage);
|
2013-08-11 07:35:00 +00:00
|
|
|
|
2019-11-19 00:54:36 +00:00
|
|
|
wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src);
|
2013-06-11 16:37:51 +00:00
|
|
|
|
2016-04-11 02:08:07 +00:00
|
|
|
const wchar_t *token_type_description(parse_token_type_t type);
|
|
|
|
const wchar_t *keyword_description(parse_keyword_t type);
|
2013-06-23 09:09:46 +00:00
|
|
|
|
2020-06-20 22:27:10 +00:00
|
|
|
parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err);
|
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
// Node flags.
|
|
|
|
enum {
|
|
|
|
/// Flag indicating that the node has associated comment nodes.
|
2015-12-15 22:59:03 +00:00
|
|
|
parse_node_flag_has_comments = 1 << 0,
|
2018-05-07 22:22:09 +00:00
|
|
|
|
|
|
|
/// Flag indicating that the token was preceded by an escaped newline, e.g.
|
|
|
|
/// echo abc | \
|
|
|
|
/// cat
|
|
|
|
parse_node_flag_preceding_escaped_nl = 1 << 1,
|
2014-12-23 18:58:45 +00:00
|
|
|
};
|
|
|
|
typedef uint8_t parse_node_flags_t;
|
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
/// Node-type specific tag value.
|
2015-12-15 22:59:03 +00:00
|
|
|
typedef uint8_t parse_node_tag_t;
|
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
/// Class for nodes of a parse tree. Since there's a lot of these, the size and order of the fields
|
|
|
|
/// is important.
|
|
|
|
class parse_node_t {
|
|
|
|
public:
|
|
|
|
// Start in the source code.
|
2020-06-09 22:16:31 +00:00
|
|
|
source_offset_t source_start{SOURCE_OFFSET_INVALID};
|
2016-05-02 23:09:46 +00:00
|
|
|
// Length of our range in the source code.
|
2020-06-09 22:16:31 +00:00
|
|
|
source_offset_t source_length{0};
|
2016-05-02 23:09:46 +00:00
|
|
|
// Parent
|
2020-06-09 22:16:31 +00:00
|
|
|
node_offset_t parent{NODE_OFFSET_INVALID};
|
2016-05-02 23:09:46 +00:00
|
|
|
// Children
|
2020-06-09 22:16:31 +00:00
|
|
|
node_offset_t child_start{0};
|
2016-05-02 23:09:46 +00:00
|
|
|
// Number of children.
|
2020-06-09 22:16:31 +00:00
|
|
|
uint8_t child_count{0};
|
2016-05-02 23:09:46 +00:00
|
|
|
// Type of the node.
|
2014-03-26 03:06:34 +00:00
|
|
|
enum parse_token_type_t type;
|
2016-05-02 23:09:46 +00:00
|
|
|
// Keyword associated with node.
|
2020-06-09 22:16:31 +00:00
|
|
|
enum parse_keyword_t keyword { parse_keyword_t::none };
|
2016-05-02 23:09:46 +00:00
|
|
|
// Node flags.
|
|
|
|
parse_node_flags_t flags : 4;
|
|
|
|
// This is used to store e.g. the statement decoration.
|
|
|
|
parse_node_tag_t tag : 4;
|
|
|
|
// Description
|
2018-03-04 23:03:56 +00:00
|
|
|
wcstring describe() const;
|
2013-07-23 01:26:15 +00:00
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
// Constructor
|
2020-06-09 22:16:31 +00:00
|
|
|
explicit parse_node_t(parse_token_type_t ty) : type(ty), flags(0), tag(0) {}
|
2016-05-02 23:09:46 +00:00
|
|
|
|
|
|
|
node_offset_t child_offset(node_offset_t which) const {
|
2013-06-23 09:09:46 +00:00
|
|
|
PARSE_ASSERT(which < child_count);
|
|
|
|
return child_start + which;
|
|
|
|
}
|
2013-08-11 07:35:00 +00:00
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
/// Indicate if this node has a range of source code associated with it.
|
|
|
|
bool has_source() const {
|
|
|
|
// Should never have a nonempty range with an invalid offset.
|
2014-09-29 18:29:50 +00:00
|
|
|
assert(this->source_start != SOURCE_OFFSET_INVALID || this->source_length == 0);
|
|
|
|
return this->source_length > 0;
|
2013-08-08 22:06:46 +00:00
|
|
|
}
|
2014-01-15 09:40:40 +00:00
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
/// Indicate if the node has comment nodes.
|
2019-05-05 10:09:25 +00:00
|
|
|
bool has_comments() const { return this->flags & parse_node_flag_has_comments; }
|
2018-05-07 22:22:09 +00:00
|
|
|
|
|
|
|
/// Indicates if we have a preceding escaped newline.
|
|
|
|
bool has_preceding_escaped_newline() const {
|
|
|
|
return this->flags & parse_node_flag_preceding_escaped_nl;
|
2016-10-21 04:14:40 +00:00
|
|
|
}
|
2014-12-23 18:58:45 +00:00
|
|
|
|
2019-10-29 12:32:26 +00:00
|
|
|
source_range_t source_range() const {
|
|
|
|
assert(has_source());
|
|
|
|
return {source_start, source_length};
|
|
|
|
}
|
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
/// Gets source for the node, or the empty string if it has no source.
|
|
|
|
wcstring get_source(const wcstring &str) const {
|
|
|
|
if (!has_source())
|
2013-10-08 22:05:30 +00:00
|
|
|
return wcstring();
|
|
|
|
else
|
|
|
|
return wcstring(str, this->source_start, this->source_length);
|
|
|
|
}
|
2014-01-15 09:40:40 +00:00
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
/// Returns whether the given location is within the source range or at its end.
|
|
|
|
bool location_in_or_at_end_of_source_range(size_t loc) const {
|
2013-10-13 01:17:03 +00:00
|
|
|
return has_source() && source_start <= loc && loc - source_start <= source_length;
|
|
|
|
}
|
2013-06-11 16:37:51 +00:00
|
|
|
};
|
|
|
|
|
2018-01-13 22:25:39 +00:00
|
|
|
template <typename Type>
|
|
|
|
class tnode_t;
|
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
/// The parse tree itself.
|
|
|
|
class parse_node_tree_t : public std::vector<parse_node_t> {
|
|
|
|
public:
|
2016-02-28 08:33:11 +00:00
|
|
|
parse_node_tree_t() {}
|
2017-01-26 23:36:12 +00:00
|
|
|
parse_node_tree_t(parse_node_tree_t &&) = default;
|
|
|
|
parse_node_tree_t &operator=(parse_node_tree_t &&) = default;
|
2017-01-27 04:00:43 +00:00
|
|
|
parse_node_tree_t(const parse_node_tree_t &) = delete; // no copying
|
|
|
|
parse_node_tree_t &operator=(const parse_node_tree_t &) = delete; // no copying
|
2016-05-02 23:09:46 +00:00
|
|
|
|
|
|
|
// Get the node corresponding to a child of the given node, or NULL if there is no such child.
|
|
|
|
// If expected_type is provided, assert that the node has that type.
|
|
|
|
const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which,
|
|
|
|
parse_token_type_t expected_type = token_type_invalid) const;
|
2014-01-15 09:40:40 +00:00
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
// Find the first direct child of the given node of the given type. asserts on failure.
|
2013-12-23 22:53:56 +00:00
|
|
|
const parse_node_t &find_child(const parse_node_t &parent, parse_token_type_t type) const;
|
2014-01-15 09:40:40 +00:00
|
|
|
|
2018-01-13 23:36:14 +00:00
|
|
|
template <typename Type>
|
|
|
|
tnode_t<Type> find_child(const parse_node_t &parent) const;
|
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
// Get the node corresponding to the parent of the given node, or NULL if there is no such
|
|
|
|
// child. If expected_type is provided, only returns the parent if it is of that type. Note the
|
|
|
|
// asymmetry: get_child asserts since the children are known, but get_parent does not, since the
|
|
|
|
// parent may not be known.
|
|
|
|
const parse_node_t *get_parent(const parse_node_t &node,
|
|
|
|
parse_token_type_t expected_type = token_type_invalid) const;
|
2014-01-15 09:40:40 +00:00
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
// Finds a node containing the given source location. If 'parent' is not NULL, it must be an
|
|
|
|
// ancestor.
|
|
|
|
const parse_node_t *find_node_matching_source_location(parse_token_type_t type,
|
|
|
|
size_t source_loc,
|
|
|
|
const parse_node_t *parent) const;
|
|
|
|
// Utilities
|
2014-01-15 09:40:40 +00:00
|
|
|
|
2018-01-16 02:41:14 +00:00
|
|
|
/// Given a node, return all of its comment nodes.
|
2019-11-19 00:54:36 +00:00
|
|
|
std::vector<tnode_t<grammar::comment>> comment_nodes_for_node(const parse_node_t &parent) const;
|
2014-01-15 09:40:40 +00:00
|
|
|
|
2018-01-16 02:41:14 +00:00
|
|
|
private:
|
|
|
|
template <typename Type>
|
|
|
|
friend class tnode_t;
|
2016-05-02 23:09:46 +00:00
|
|
|
/// Given a node list (e.g. of type symbol_job_list) and a node type (e.g. symbol_job), return
|
|
|
|
/// the next element of the given type in that list, and the tail (by reference). Returns NULL
|
|
|
|
/// if we've exhausted the list.
|
|
|
|
const parse_node_t *next_node_in_node_list(const parse_node_t &node_list,
|
2019-11-19 00:54:36 +00:00
|
|
|
parse_token_type_t entry_type,
|
2016-05-02 23:09:46 +00:00
|
|
|
const parse_node_t **list_tail) const;
|
2013-10-07 08:04:37 +00:00
|
|
|
};
|
|
|
|
|
2016-05-02 23:09:46 +00:00
|
|
|
/// The big entry point. Parse a string, attempting to produce a tree for the given goal type.
|
|
|
|
bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags,
|
|
|
|
parse_node_tree_t *output, parse_error_list_t *errors,
|
|
|
|
parse_token_type_t goal = symbol_job_list);
|
|
|
|
|
2017-12-22 22:40:15 +00:00
|
|
|
/// A type wrapping up a parse tree and the original source behind it.
|
|
|
|
struct parsed_source_t {
|
|
|
|
wcstring src;
|
|
|
|
parse_node_tree_t tree;
|
|
|
|
|
|
|
|
parsed_source_t(wcstring s, parse_node_tree_t t) : src(std::move(s)), tree(std::move(t)) {}
|
|
|
|
|
|
|
|
parsed_source_t(const parsed_source_t &) = delete;
|
|
|
|
void operator=(const parsed_source_t &) = delete;
|
|
|
|
parsed_source_t(parsed_source_t &&) = default;
|
|
|
|
parsed_source_t &operator=(parsed_source_t &&) = default;
|
|
|
|
};
|
|
|
|
/// Return a shared pointer to parsed_source_t, or null on failure.
|
2017-12-22 23:44:14 +00:00
|
|
|
using parsed_source_ref_t = std::shared_ptr<const parsed_source_t>;
|
2020-06-28 23:53:58 +00:00
|
|
|
parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags,
|
|
|
|
parse_error_list_t *errors);
|
2017-12-22 22:40:15 +00:00
|
|
|
|
2019-11-25 11:47:33 +00:00
|
|
|
/// Error message for improper use of the exec builtin.
|
|
|
|
#define EXEC_ERR_MSG _(L"The '%ls' command can not be used in a pipeline")
|
|
|
|
|
2013-05-26 19:12:16 +00:00
|
|
|
#endif
|