mirror of
https://github.com/fish-shell/fish-shell
synced 2024-12-27 05:13:10 +00:00
Introduce a new fish ast
This is the first commit of a series intended to replace the existing "parse tree" machinery. It adds a new abstract syntax tree and uses a more normal recursive descent parser. Initially there are no users of the new ast. The following commits will replace parse_tree -> ast for all usages.
This commit is contained in:
parent
45c9e3b0f1
commit
4d4455007d
11 changed files with 2350 additions and 30 deletions
|
@ -121,7 +121,7 @@ set(FISH_SRCS
|
|||
src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp src/wutil.cpp
|
||||
src/future_feature_flags.cpp src/redirection.cpp src/topic_monitor.cpp
|
||||
src/flog.cpp src/trace.cpp src/timer.cpp src/null_terminated_array.cpp
|
||||
src/operation_context.cpp src/fd_monitor.cpp src/termsize.cpp
|
||||
src/operation_context.cpp src/fd_monitor.cpp src/termsize.cpp src/ast.cpp
|
||||
)
|
||||
|
||||
# Header files are just globbed.
|
||||
|
|
1206
src/ast.cpp
Normal file
1206
src/ast.cpp
Normal file
File diff suppressed because it is too large
Load diff
60
src/ast_node_types.inc
Normal file
60
src/ast_node_types.inc
Normal file
|
@ -0,0 +1,60 @@
|
|||
// Define ELEM and optionally ELEMLIST before including this file.
|
||||
// ELEM is for ordinary nodes.
|
||||
// ELEMLIST(x, y) marks list nodes and the type they contain.
|
||||
#ifndef ELEMLIST
|
||||
#define ELEMLIST(x, y) ELEM(x)
|
||||
#endif
|
||||
|
||||
ELEM(keyword_base)
|
||||
ELEM(token_base)
|
||||
ELEM(maybe_newlines)
|
||||
|
||||
ELEM(argument)
|
||||
ELEMLIST(argument_list, argument)
|
||||
|
||||
ELEM(redirection)
|
||||
ELEM(argument_or_redirection)
|
||||
ELEMLIST(argument_or_redirection_list, argument_or_redirection)
|
||||
|
||||
ELEM(variable_assignment)
|
||||
ELEMLIST(variable_assignment_list, variable_assignment)
|
||||
|
||||
ELEM(job)
|
||||
ELEM(job_conjunction)
|
||||
// For historical reasons, a job list is a list of job *conjunctions*. This should be fixed.
|
||||
ELEMLIST(job_list, job_conjunction)
|
||||
ELEM(job_conjunction_continuation)
|
||||
ELEMLIST(job_conjunction_continuation_list, job_conjunction_continuation)
|
||||
|
||||
ELEM(job_continuation)
|
||||
ELEMLIST(job_continuation_list, job_continuation)
|
||||
|
||||
ELEM(andor_job)
|
||||
ELEMLIST(andor_job_list, andor_job)
|
||||
|
||||
ELEM(statement)
|
||||
|
||||
ELEM(not_statement)
|
||||
|
||||
ELEM(block_statement)
|
||||
ELEM(for_header)
|
||||
ELEM(while_header)
|
||||
ELEM(function_header)
|
||||
ELEM(begin_header)
|
||||
|
||||
ELEM(if_statement)
|
||||
ELEM(if_clause)
|
||||
ELEM(elseif_clause)
|
||||
ELEMLIST(elseif_clause_list, elseif_clause)
|
||||
ELEM(else_clause)
|
||||
|
||||
ELEM(switch_statement)
|
||||
ELEM(case_item)
|
||||
ELEMLIST(case_item_list, case_item)
|
||||
|
||||
ELEM(decorated_statement)
|
||||
|
||||
ELEM(freestanding_argument_list)
|
||||
|
||||
#undef ELEM
|
||||
#undef ELEMLIST
|
|
@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
|||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "ast.h"
|
||||
#include "color.h"
|
||||
#include "common.h"
|
||||
#include "env.h"
|
||||
|
@ -404,6 +405,12 @@ static wcstring prettify(const wcstring &src, bool do_indent) {
|
|||
if (dump_parse_tree) {
|
||||
const wcstring dump = parse_dump_tree(parse_tree, src);
|
||||
std::fwprintf(stderr, L"%ls\n", dump.c_str());
|
||||
|
||||
auto ast =
|
||||
ast::ast_t::parse(src, parse_flag_leave_unterminated | parse_flag_include_comments |
|
||||
parse_flag_show_extra_semis);
|
||||
wcstring ast_dump = ast.dump(src);
|
||||
std::fwprintf(stderr, L"%ls\n", ast_dump.c_str());
|
||||
}
|
||||
|
||||
// We may have a forest of disconnected trees on a parse failure. We have to handle all nodes
|
||||
|
|
|
@ -67,6 +67,7 @@ class category_list_t {
|
|||
category_t parse_productions{L"parse-productions", L"Resolving tokens"};
|
||||
category_t parse_productions_chatty{L"parse-productions-chatty",
|
||||
L"Resolving tokens (chatty messages)"};
|
||||
category_t ast_construction{L"ast-construction", L"Parsing fish AST"};
|
||||
|
||||
category_t proc_job_run{L"proc-job-run", L"Jobs getting started or continued"};
|
||||
|
||||
|
|
|
@ -13,6 +13,17 @@
|
|||
exit_without_destructors(-1); \
|
||||
} while (0)
|
||||
|
||||
// A range of source code.
|
||||
struct source_range_t {
|
||||
uint32_t start;
|
||||
uint32_t length;
|
||||
|
||||
uint32_t end() const {
|
||||
assert(start + length >= start && "Overflow");
|
||||
return start + length;
|
||||
}
|
||||
};
|
||||
|
||||
// IMPORTANT: If the following enum table is modified you must also update token_enum_map below.
|
||||
enum parse_token_type_t : uint8_t {
|
||||
token_type_invalid = 1,
|
||||
|
@ -193,6 +204,26 @@ enum parse_error_code_t {
|
|||
parse_error_andor_in_pipeline, // "and" or "or" after a pipe
|
||||
};
|
||||
|
||||
enum {
|
||||
parse_flag_none = 0,
|
||||
|
||||
/// Attempt to build a "parse tree" no matter what. This may result in a 'forest' of
|
||||
/// disconnected trees. This is intended to be used by syntax highlighting.
|
||||
parse_flag_continue_after_error = 1 << 0,
|
||||
/// Include comment tokens.
|
||||
parse_flag_include_comments = 1 << 1,
|
||||
/// Indicate that the tokenizer should accept incomplete tokens */
|
||||
parse_flag_accept_incomplete_tokens = 1 << 2,
|
||||
/// Indicate that the parser should not generate the terminate token, allowing an 'unfinished'
|
||||
/// tree where some nodes may have no productions.
|
||||
parse_flag_leave_unterminated = 1 << 3,
|
||||
/// Indicate that the parser should generate job_list entries for blank lines.
|
||||
parse_flag_show_blank_lines = 1 << 4,
|
||||
/// Indicate that extra semis should be generated.
|
||||
parse_flag_show_extra_semis = 1 << 5,
|
||||
};
|
||||
typedef unsigned int parse_tree_flags_t;
|
||||
|
||||
enum { PARSER_TEST_ERROR = 1, PARSER_TEST_INCOMPLETE = 2 };
|
||||
typedef unsigned int parser_test_error_bits_t;
|
||||
|
||||
|
@ -214,6 +245,9 @@ struct parse_error_t {
|
|||
};
|
||||
typedef std::vector<parse_error_t> parse_error_list_t;
|
||||
|
||||
wcstring token_type_user_presentable_description(parse_token_type_t type,
|
||||
parse_keyword_t keyword = parse_keyword_t::none);
|
||||
|
||||
// Special source_start value that means unknown.
|
||||
#define SOURCE_LOCATION_UNKNOWN (static_cast<size_t>(-1))
|
||||
|
||||
|
|
|
@ -255,7 +255,7 @@ DEF_ALT(variable_assignments) {
|
|||
// A string token like VAR=value
|
||||
DEF(variable_assignment) produces_single<tok_string>{BODY(variable_assignment)};
|
||||
|
||||
// A statement is a normal command, or an if / while / and etc
|
||||
// A statement is a normal command, or an if / while / etc
|
||||
DEF_ALT(statement) {
|
||||
using nots = single<not_statement>;
|
||||
using block = single<block_statement>;
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "ast.h"
|
||||
#include "parse_constants.h"
|
||||
|
||||
struct parse_token_t;
|
||||
|
|
|
@ -30,7 +30,7 @@ static bool production_is_empty(const production_element_t *production) {
|
|||
return *production == token_type_invalid;
|
||||
}
|
||||
|
||||
static parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err) {
|
||||
parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err) {
|
||||
switch (err) {
|
||||
case tokenizer_error_t::none:
|
||||
return parse_error_none;
|
||||
|
@ -168,8 +168,7 @@ const wchar_t *keyword_description(parse_keyword_t type) {
|
|||
return L"unknown_keyword";
|
||||
}
|
||||
|
||||
static wcstring token_type_user_presentable_description(
|
||||
parse_token_type_t type, parse_keyword_t keyword = parse_keyword_t::none) {
|
||||
wcstring token_type_user_presentable_description(parse_token_type_t type, parse_keyword_t keyword) {
|
||||
if (keyword != parse_keyword_t::none) {
|
||||
return format_string(L"keyword '%ls'", keyword_description(keyword));
|
||||
}
|
||||
|
@ -1078,8 +1077,7 @@ static inline bool is_help_argument(const wcstring &txt) {
|
|||
}
|
||||
|
||||
/// Return a new parse token, advancing the tokenizer.
|
||||
static inline parse_token_t next_parse_token(tokenizer_t *tok, maybe_t<tok_t> *out_token,
|
||||
wcstring *storage) {
|
||||
parse_token_t next_parse_token(tokenizer_t *tok, maybe_t<tok_t> *out_token, wcstring *storage) {
|
||||
*out_token = tok->next();
|
||||
if (!out_token->has_value()) {
|
||||
return kTerminalToken;
|
||||
|
@ -1098,7 +1096,8 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok, maybe_t<tok_t> *o
|
|||
result.is_help_argument = result.has_dash_prefix && is_help_argument(text);
|
||||
result.is_newline = (result.type == parse_token_type_end && text == L"\n");
|
||||
result.preceding_escaped_nl = token.preceding_escaped_nl;
|
||||
result.may_be_variable_assignment = bool(variable_assignment_equals_pos(text));
|
||||
result.may_be_variable_assignment = variable_assignment_equals_pos(text).has_value();
|
||||
result.tok_error = token.error;
|
||||
|
||||
// These assertions are totally bogus. Basically our tokenizer works in size_t but we work in
|
||||
// uint32_t to save some space. If we have a source file larger than 4 GB, we'll probably just
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
|
@ -25,11 +26,6 @@ typedef uint32_t source_offset_t;
|
|||
|
||||
constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast<source_offset_t>(-1);
|
||||
|
||||
struct source_range_t {
|
||||
uint32_t start;
|
||||
uint32_t length;
|
||||
};
|
||||
|
||||
/// A struct representing the token type that we use internally.
|
||||
struct parse_token_t {
|
||||
enum parse_token_type_t type; // The type of the token as represented by the parser
|
||||
|
@ -41,38 +37,36 @@ struct parse_token_t {
|
|||
bool is_newline{false}; // Hackish: if TOK_END, whether the source is a newline.
|
||||
bool preceding_escaped_nl{false}; // Whether there was an escaped newline preceding this token.
|
||||
bool may_be_variable_assignment{false}; // Hackish: whether this token is a string like FOO=bar
|
||||
tokenizer_error_t tok_error{tokenizer_error_t::none}; // If this is a tokenizer error, that error.
|
||||
source_offset_t source_start{SOURCE_OFFSET_INVALID};
|
||||
source_offset_t source_length{0};
|
||||
|
||||
/// \return the source range.
|
||||
source_range_t range() const {
|
||||
return source_range_t{source_start, source_length};
|
||||
}
|
||||
|
||||
/// \return whether we are a string with the dash prefix set.
|
||||
bool is_dash_prefix_string() const {
|
||||
return type == parse_token_type_string && has_dash_prefix;
|
||||
}
|
||||
|
||||
wcstring describe() const;
|
||||
wcstring user_presentable_description() const;
|
||||
|
||||
constexpr parse_token_t(parse_token_type_t type) : type(type) {}
|
||||
};
|
||||
|
||||
enum {
|
||||
parse_flag_none = 0,
|
||||
|
||||
/// Attempt to build a "parse tree" no matter what. This may result in a 'forest' of
|
||||
/// disconnected trees. This is intended to be used by syntax highlighting.
|
||||
parse_flag_continue_after_error = 1 << 0,
|
||||
/// Include comment tokens.
|
||||
parse_flag_include_comments = 1 << 1,
|
||||
/// Indicate that the tokenizer should accept incomplete tokens */
|
||||
parse_flag_accept_incomplete_tokens = 1 << 2,
|
||||
/// Indicate that the parser should not generate the terminate token, allowing an 'unfinished'
|
||||
/// tree where some nodes may have no productions.
|
||||
parse_flag_leave_unterminated = 1 << 3,
|
||||
/// Indicate that the parser should generate job_list entries for blank lines.
|
||||
parse_flag_show_blank_lines = 1 << 4
|
||||
};
|
||||
typedef unsigned int parse_tree_flags_t;
|
||||
/// Return a new parse token, advancing the tokenizer.
|
||||
parse_token_t next_parse_token(tokenizer_t *tok, maybe_t<tok_t> *out_token, wcstring *storage);
|
||||
|
||||
wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src);
|
||||
|
||||
const wchar_t *token_type_description(parse_token_type_t type);
|
||||
const wchar_t *keyword_description(parse_keyword_t type);
|
||||
|
||||
parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err);
|
||||
|
||||
// Node flags.
|
||||
enum {
|
||||
/// Flag indicating that the node has associated comment nodes.
|
||||
|
|
Loading…
Reference in a new issue