fish-shell/src/parse_productions.cpp
ridiculousfish e79df33e3a Disallow parsing 'and' and 'or' as commands
Except for and --help and or --help

Fixes #6089
2019-09-08 11:09:32 -07:00

446 lines
14 KiB
C++

#include "config.h" // IWYU pragma: keep
#include <stdio.h>
#include "common.h"
#include "flog.h"
#include "parse_constants.h"
#include "parse_grammar.h"
#include "parse_productions.h"
#include "parse_tree.h"
using namespace parse_productions;
using namespace grammar;
#define NO_PRODUCTION NULL
// Herein are encoded the productions for our LL2 fish grammar.
//
// Each symbol (e.g. symbol_job_list) has a corresponding function (e.g. resolve_job_lits). The
// function accepts two tokens, representing the first and second lookahead, and returns returns a
// production representing the rule, or NULL on error. There is also a tag value which is returned
// by reference; the tag is a sort of node annotation.
//
// Productions are generally a static const array, and we return a pointer to the array (yes,
// really).
#define RESOLVE(SYM) \
const production_element_t *SYM::resolve( \
const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag)
/// A job_list is a list of jobs, separated by semicolons or newlines.
RESOLVE(job_list) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.type) {
case parse_token_type_string: {
// Some keywords are special.
switch (token1.keyword) {
case parse_keyword_end:
case parse_keyword_else:
case parse_keyword_case: {
return production_for<empty>(); // end this job list
}
default: {
return production_for<normal>(); // normal string
}
}
}
case parse_token_type_pipe:
case parse_token_type_redirection:
case parse_token_type_background: {
return production_for<normal>();
}
case parse_token_type_end: {
return production_for<empty_line>();
}
case parse_token_type_terminate: {
return production_for<empty>(); // no more commands, just transition to empty
}
default: {
return NO_PRODUCTION;
}
}
}
// A job decorator is AND or OR
RESOLVE(job_decorator) {
// If it's followed by --help, it's not a decoration.
if (token2.is_help_argument) {
*out_tag = parse_bool_none;
return production_for<empty>();
}
switch (token1.keyword) {
case parse_keyword_and: {
*out_tag = parse_bool_and;
return production_for<ands>();
}
case parse_keyword_or: {
*out_tag = parse_bool_or;
return production_for<ors>();
}
default: {
*out_tag = parse_bool_none;
return production_for<empty>();
}
}
}
RESOLVE(job_conjunction_continuation) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.type) {
case parse_token_type_andand:
*out_tag = parse_bool_and;
return production_for<andands>();
case parse_token_type_oror:
*out_tag = parse_bool_or;
return production_for<orors>();
default:
return production_for<empty>();
}
}
RESOLVE(job_continuation) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.type) {
case parse_token_type_pipe: {
return production_for<piped>(); // pipe, continuation
}
default: {
return production_for<empty>(); // not a pipe, no job continuation
}
}
}
// A statement is a normal command, or an if / while / and etc.
RESOLVE(statement) {
UNUSED(out_tag);
// The only block-like builtin that takes any parameters is 'function' So go to decorated
// statements if the subsequent token looks like '--'. The logic here is subtle:
//
// If we are 'begin', then we expect to be invoked with no arguments.
// If we are 'function', then we are a non-block if we are invoked with -h or --help
// If we are anything else, we require an argument, so do the same thing if the subsequent token
// is a statement terminator.
if (token1.type == parse_token_type_string) {
// If we are a function, then look for help arguments. Otherwise, if the next token looks
// like an option (starts with a dash), then parse it as a decorated statement.
if (token1.keyword == parse_keyword_function && token2.is_help_argument) {
return production_for<decorated>();
} else if (token1.keyword != parse_keyword_function && token2.has_dash_prefix) {
return production_for<decorated>();
}
// Likewise if the next token doesn't look like an argument at all. This corresponds to e.g.
// a "naked if".
bool naked_invocation_invokes_help =
(token1.keyword != parse_keyword_begin && token1.keyword != parse_keyword_end);
if (naked_invocation_invokes_help &&
(token2.type == parse_token_type_end || token2.type == parse_token_type_terminate)) {
return production_for<decorated>();
}
}
switch (token1.type) {
case parse_token_type_string: {
switch (token1.keyword) {
case parse_keyword_not:
case parse_keyword_exclam: {
return production_for<nots>();
}
case parse_keyword_for:
case parse_keyword_while:
case parse_keyword_function:
case parse_keyword_begin: {
return production_for<block>();
}
case parse_keyword_if: {
return production_for<ifs>();
}
case parse_keyword_else: {
return NO_PRODUCTION;
}
case parse_keyword_switch: {
return production_for<switchs>();
}
case parse_keyword_end: {
return NO_PRODUCTION;
}
// All other keywords fall through to decorated statement.
default: {
return production_for<decorated>();
}
}
break;
}
case parse_token_type_pipe:
case parse_token_type_redirection:
case parse_token_type_background:
case parse_token_type_terminate: {
return NO_PRODUCTION;
}
default: {
return NO_PRODUCTION;
}
}
}
RESOLVE(else_clause) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.keyword) {
case parse_keyword_else: {
return production_for<else_cont>();
}
default: {
return production_for<empty>();
}
}
}
RESOLVE(else_continuation) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.keyword) {
case parse_keyword_if: {
return production_for<else_if>();
}
default: {
return production_for<else_only>();
}
}
}
RESOLVE(case_item_list) {
UNUSED(token2);
UNUSED(out_tag);
if (token1.keyword == parse_keyword_case)
return production_for<case_items>();
else if (token1.type == parse_token_type_end)
return production_for<blank_line>();
else
return production_for<empty>();
}
RESOLVE(not_statement) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.keyword) {
case parse_keyword_not:
return production_for<nots>();
case parse_keyword_exclam:
return production_for<exclams>();
default:
return NO_PRODUCTION;
}
}
RESOLVE(andor_job_list) {
UNUSED(out_tag);
if (token1.type == parse_token_type_end) {
return production_for<empty_line>();
} else if (token1.keyword == parse_keyword_and || token1.keyword == parse_keyword_or) {
// Check that the argument to and/or is a string that's not help. Otherwise it's either 'and
// --help' or a naked 'and', and not part of this list.
if (token2.type == parse_token_type_string && !token2.is_help_argument) {
return production_for<andor_job>();
}
}
// All other cases end the list.
return production_for<empty>();
}
RESOLVE(argument_list) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.type) {
case parse_token_type_string: {
return production_for<arg>();
}
default: {
return production_for<empty>();
}
}
}
RESOLVE(freestanding_argument_list) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.type) {
case parse_token_type_string: {
return production_for<arg>();
}
case parse_token_type_end: {
return production_for<semicolon>();
}
default: {
return production_for<empty>();
}
}
}
RESOLVE(block_header) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.keyword) {
case parse_keyword_for: {
return production_for<forh>();
}
case parse_keyword_while: {
return production_for<whileh>();
}
case parse_keyword_function: {
return production_for<funch>();
}
case parse_keyword_begin: {
return production_for<beginh>();
}
default: {
return NO_PRODUCTION;
}
}
}
RESOLVE(decorated_statement) {
// and/or are typically parsed in job_conjunction at the beginning of a job
// However they may be reached here through e.g. true && and false.
// Refuse to parse them as a command except for --help. See #6089.
if ((token1.keyword == parse_keyword_and || token1.keyword == parse_keyword_or) &&
!token2.is_help_argument) {
return NO_PRODUCTION;
}
// If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the
// second token is not a string, then this is a naked 'command' and we should execute it as
// undecorated.
if (token2.type != parse_token_type_string || token2.has_dash_prefix) {
return production_for<plains>();
}
switch (token1.keyword) {
case parse_keyword_command: {
*out_tag = parse_statement_decoration_command;
return production_for<cmds>();
}
case parse_keyword_builtin: {
*out_tag = parse_statement_decoration_builtin;
return production_for<builtins>();
}
case parse_keyword_exec: {
*out_tag = parse_statement_decoration_exec;
return production_for<execs>();
}
default: {
*out_tag = parse_statement_decoration_none;
return production_for<plains>();
}
}
}
RESOLVE(arguments_or_redirections_list) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.type) {
case parse_token_type_string:
return production_for<arg>();
case parse_token_type_redirection:
return production_for<redir>();
default:
return production_for<empty>();
}
}
RESOLVE(optional_newlines) {
UNUSED(token2);
UNUSED(out_tag);
if (token1.is_newline) return production_for<newlines>();
return production_for<empty>();
}
RESOLVE(optional_background) {
UNUSED(token2);
switch (token1.type) {
case parse_token_type_background: {
*out_tag = parse_background;
return production_for<background>();
}
default: {
*out_tag = parse_no_background;
return production_for<empty>();
}
}
}
const production_element_t *parse_productions::production_for_token(parse_token_type_t node_type,
const parse_token_t &input1,
const parse_token_t &input2,
parse_node_tag_t *out_tag) {
// this is **extremely** chatty
debug(6, L"Resolving production for %ls with input token <%ls>",
token_type_description(node_type), input1.describe().c_str());
// Fetch the function to resolve the list of productions.
const production_element_t *(*resolver)(const parse_token_t &input1, //!OCLINT(unused param)
const parse_token_t &input2, //!OCLINT(unused param)
parse_node_tag_t *out_tag) = //!OCLINT(unused param)
NULL;
switch (node_type) {
// Handle all of our grammar elements
#define ELEM(SYM) \
case (symbol_##SYM): \
resolver = SYM::resolve; \
break;
#include "parse_grammar_elements.inc"
// Everything else is an error.
case parse_token_type_string:
case parse_token_type_pipe:
case parse_token_type_redirection:
case parse_token_type_background:
case parse_token_type_andand:
case parse_token_type_oror:
case parse_token_type_end:
case parse_token_type_terminate: {
FLOGF(error, L"Terminal token type %ls passed to %s", token_type_description(node_type),
__FUNCTION__);
PARSER_DIE();
break;
}
case parse_special_type_parse_error:
case parse_special_type_tokenizer_error:
case parse_special_type_comment: {
FLOGF(error, L"Special type %ls passed to %s\n", token_type_description(node_type),
__FUNCTION__);
PARSER_DIE();
break;
}
case token_type_invalid: {
FLOGF(error, L"token_type_invalid passed to %s", __FUNCTION__);
PARSER_DIE();
break;
}
}
PARSE_ASSERT(resolver != NULL);
const production_element_t *result = resolver(input1, input2, out_tag);
if (result == NULL) {
debug(5, L"Node type '%ls' has no production for input '%ls' (in %s)",
token_type_description(node_type), input1.describe().c_str(), __FUNCTION__);
}
return result;
}