diff --git a/Makefile.in b/Makefile.in index c779ae2b0..31b23d5ef 100644 --- a/Makefile.in +++ b/Makefile.in @@ -92,7 +92,7 @@ FISH_OBJS := function.o builtin.o complete.o env.o exec.o expand.o \ env_universal.o env_universal_common.o input_common.o event.o \ signal.o io.o parse_util.o common.o screen.o path.o autoload.o \ parser_keywords.o iothread.o color.o postfork.o \ - builtin_test.o + builtin_test.o parse_tree.o parse_productions.o parse_execution.cpp FISH_INDENT_OBJS := fish_indent.o print_help.o common.o \ parser_keywords.o wutil.o tokenizer.o diff --git a/autoload.cpp b/autoload.cpp index 3470ecae6..e5d2334df 100644 --- a/autoload.cpp +++ b/autoload.cpp @@ -195,7 +195,6 @@ autoload_function_t *autoload_t::get_autoloaded_function_with_creation(const wcs bool autoload_t::locate_file_and_maybe_load_it(const wcstring &cmd, bool really_load, bool reload, const wcstring_list_t &path_list) { /* Note that we are NOT locked in this function! */ - size_t i; bool reloaded = 0; /* Try using a cached function. If we really want the function to be loaded, require that it be really loaded. If we're not reloading, allow stale functions. */ @@ -276,7 +275,7 @@ bool autoload_t::locate_file_and_maybe_load_it(const wcstring &cmd, bool really_ if (! has_script_source) { /* Iterate over path searching for suitable completion files */ - for (i=0; i argv_array(args); + wchar_t **argv = const_cast(argv_array.get()); + + int argc = builtin_count_args(argv); + int res=STATUS_BUILTIN_OK; + wchar_t *desc=0; + std::vector events; + std::auto_ptr named_arguments(NULL); + + wchar_t *name = 0; + bool shadows = true; + + woptind=0; + + const struct woption long_options[] = + { + { L"description", required_argument, 0, 'd' }, + { L"on-signal", required_argument, 0, 's' }, + { L"on-job-exit", required_argument, 0, 'j' }, + { L"on-process-exit", required_argument, 0, 'p' }, + { L"on-variable", required_argument, 0, 'v' }, + { L"on-event", required_argument, 0, 'e' }, + { L"help", no_argument, 0, 'h' }, + { L"argument-names", no_argument, 0, 'a' }, + { L"no-scope-shadowing", no_argument, 0, 'S' }, + { 0, 0, 0, 0 } + }; + + while (1 && (!res)) + { + int opt_index = 0; + + int opt = wgetopt_long(argc, + argv, + L"d:s:j:p:v:e:haS", + long_options, + &opt_index); + if (opt == -1) + break; + + switch (opt) + { + case 0: + if (long_options[opt_index].flag != 0) + break; + + + + append_format(*out_err, + BUILTIN_ERR_UNKNOWN, + argv[0], + long_options[opt_index].name); + + res = 1; + break; + + case 'd': + desc=woptarg; + break; + + case 's': + { + int sig = wcs2sig(woptarg); + + if (sig < 0) + { + append_format(*out_err, + _(L"%ls: Unknown signal '%ls'\n"), + argv[0], + woptarg); + res=1; + break; + } + events.push_back(event_t::signal_event(sig)); + break; + } + + case 'v': + { + if (wcsvarname(woptarg)) + { + append_format(*out_err, + _(L"%ls: Invalid variable name '%ls'\n"), + argv[0], + woptarg); + res=STATUS_BUILTIN_ERROR; + break; + } + + events.push_back(event_t::variable_event(woptarg)); + break; + } + + + case 'e': + { + events.push_back(event_t::generic_event(woptarg)); + break; + } + + case 'j': + case 'p': + { + pid_t pid; + wchar_t *end; + event_t e(EVENT_ANY); + + if ((opt == 'j') && + (wcscasecmp(woptarg, L"caller") == 0)) + { + int job_id = -1; + + if (is_subshell) + { + size_t block_idx = 0; + + /* Find the outermost substitution block */ + for (block_idx = 0; ; block_idx++) + { + const block_t *b = parser.block_at_index(block_idx); + if (b == NULL || b->type() == SUBST) + break; + } + + /* Go one step beyond that, to get to the caller */ + const block_t *caller_block = parser.block_at_index(block_idx + 1); + if (caller_block != NULL && caller_block->job != NULL) + { + job_id = caller_block->job->job_id; + } + } + + if (job_id == -1) + { + append_format(*out_err, + _(L"%ls: Cannot find calling job for event handler\n"), + argv[0]); + res=1; + } + else + { + e.type = EVENT_JOB_ID; + e.param1.job_id = job_id; + } + + } + else + { + errno = 0; + pid = fish_wcstoi(woptarg, &end, 10); + if (errno || !end || *end) + { + append_format(*out_err, + _(L"%ls: Invalid process id %ls\n"), + argv[0], + woptarg); + res=1; + break; + } + + + e.type = EVENT_EXIT; + e.param1.pid = (opt=='j'?-1:1)*abs(pid); + } + if (res) + { + /* nothing */ + } + else + { + events.push_back(e); + } + break; + } + + case 'a': + if (named_arguments.get() == NULL) + named_arguments.reset(new wcstring_list_t); + break; + + case 'S': + shadows = 0; + break; + + case 'h': + builtin_print_help(parser, argv[0], stdout_buffer); + return STATUS_BUILTIN_OK; + + case '?': + builtin_unknown_option(parser, argv[0], argv[woptind-1]); + res = 1; + break; + + } + + } + + if (!res) + { + + if (argc == woptind) + { + append_format(*out_err, + _(L"%ls: Expected function name\n"), + argv[0]); + res=1; + } + else if (wcsfuncname(argv[woptind])) + { + append_format(*out_err, + _(L"%ls: Illegal function name '%ls'\n"), + argv[0], + argv[woptind]); + + res=1; + } + else if (parser_keywords_is_reserved(argv[woptind])) + { + + append_format(*out_err, + _(L"%ls: The name '%ls' is reserved,\nand can not be used as a function name\n"), + argv[0], + argv[woptind]); + + res=1; + } + else if (! wcslen(argv[woptind])) + { + append_format(*out_err, _(L"%ls: No function name given\n"), argv[0]); + } + else + { + + name = argv[woptind++]; + + if (named_arguments.get()) + { + while (woptind < argc) + { + if (wcsvarname(argv[woptind])) + { + append_format(*out_err, + _(L"%ls: Invalid variable name '%ls'\n"), + argv[0], + argv[woptind]); + res = STATUS_BUILTIN_ERROR; + break; + } + + named_arguments->push_back(argv[woptind++]); + } + } + else if (woptind != argc) + { + append_format(*out_err, + _(L"%ls: Expected one argument, got %d\n"), + argv[0], + argc); + res=1; + + } + } + } + + if (res) + { + builtin_print_help(parser, argv[0], *out_err); + } + else + { + function_data_t d; + + d.name = name; + if (desc) + d.description = desc; + d.events.swap(events); + d.shadows = shadows; + if (named_arguments.get()) + d.named_arguments.swap(*named_arguments); + + for (size_t i=0; i txt; + for (;;) + { + char buff[256]; + ssize_t amt = read_loop(builtin_stdin, buff, sizeof buff); + if (amt <= 0) break; + txt.insert(txt.end(), buff, buff + amt); + } + if (! txt.empty()) + { + const wcstring src = str2wcstring(&txt.at(0), txt.size()); + parse_node_tree_t parse_tree; + parse_error_list_t errors; + bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors, true); + if (! success) + { + stdout_buffer.append(L"Parsing failed:\n"); + for (size_t i=0; i < errors.size(); i++) + { + stdout_buffer.append(errors.at(i).describe(src)); + stdout_buffer.push_back(L'\n'); + } + + stdout_buffer.append(L"(Reparsed with continue after error)\n"); + parse_tree.clear(); + errors.clear(); + parse_tree_from_string(src, parse_flag_continue_after_error, &parse_tree, &errors, true); + } + const wcstring dump = parse_dump_tree(parse_tree, src); + stdout_buffer.append(dump); + } + return STATUS_BUILTIN_OK; +} /* END OF BUILTIN COMMANDS @@ -3975,6 +4333,7 @@ static int builtin_history(parser_t &parser, wchar_t **argv) static const builtin_data_t builtin_datas[]= { { L"[", &builtin_test, N_(L"Test a condition") }, + { L"__fish_parse", &builtin_parse, N_(L"Try out the new parser") }, { L"and", &builtin_generic, N_(L"Execute command if previous command suceeded") }, { L"begin", &builtin_begin, N_(L"Create a block of code") }, { L"bg", &builtin_bg, N_(L"Send job to background") }, @@ -4119,7 +4478,7 @@ void builtin_get_names(std::vector &list) { for (size_t i=0; i < BUILTIN_COUNT; i++) { - list.push_back(completion_t(builtin_datas[i].name)); + append_completion(list, builtin_datas[i].name); } } @@ -4166,4 +4525,3 @@ void builtin_pop_io(parser_t &parser) builtin_stdin = 0; } } - diff --git a/builtin.h b/builtin.h index ae06bba26..7162de235 100644 --- a/builtin.h +++ b/builtin.h @@ -176,7 +176,10 @@ const wchar_t *builtin_complete_get_temporary_buffer(); Run the __fish_print_help function to obtain the help information for the specified command. */ - wcstring builtin_help_get(parser_t &parser, const wchar_t *cmd); +/** Defines a function, like builtin_function. Returns 0 on success. args should NOT contain 'function' as the first argument. */ +int define_function(parser_t &parser, const wcstring_list_t &args, const wcstring &contents, wcstring *out_err); + + #endif diff --git a/builtin_commandline.cpp b/builtin_commandline.cpp index f6dc2b031..f121cb644 100644 --- a/builtin_commandline.cpp +++ b/builtin_commandline.cpp @@ -143,17 +143,13 @@ static void write_part(const wchar_t *begin, int cut_at_cursor, int tokenize) { - wcstring out; - wchar_t *buff; - size_t pos; - - pos = get_cursor_pos()-(begin-get_buffer()); + size_t pos = get_cursor_pos()-(begin-get_buffer()); if (tokenize) { - buff = wcsndup(begin, end-begin); + wchar_t *buff = wcsndup(begin, end-begin); // fwprintf( stderr, L"Subshell: %ls, end char %lc\n", buff, *end ); - out.clear(); + wcstring out; tokenizer_t tok(buff, TOK_ACCEPT_UNFINISHED); for (; tok_has_next(&tok); tok_next(&tok)) { diff --git a/builtin_complete.cpp b/builtin_complete.cpp index 4bfab1b7c..4eeec8be4 100644 --- a/builtin_complete.cpp +++ b/builtin_complete.cpp @@ -290,7 +290,6 @@ static int builtin_complete(parser_t &parser, wchar_t **argv) int result_mode=SHARED; int remove = 0; int authoritative = -1; - int flags = COMPLETE_AUTO_SPACE; wcstring short_opt; wcstring_list_t gnu_opt, old_opt; @@ -497,15 +496,19 @@ static int builtin_complete(parser_t &parser, wchar_t **argv) { if (condition && wcslen(condition)) { - if (parser.test(condition)) + const wcstring condition_string = condition; + parse_error_list_t errors; + if (parse_util_detect_errors(condition_string, &errors)) { append_format(stderr_buffer, - L"%ls: Condition '%ls' contained a syntax error\n", + L"%ls: Condition '%ls' contained a syntax error", argv[0], condition); - - parser.test(condition, NULL, &stderr_buffer, argv[0]); - + for (size_t i=0; i < errors.size(); i++) + { + append_format(stderr_buffer, L"\n%s: ", argv[0]); + stderr_buffer.append(errors.at(i).describe(condition_string)); + } res = true; } } @@ -596,6 +599,8 @@ static int builtin_complete(parser_t &parser, wchar_t **argv) } else { + int flags = COMPLETE_AUTO_SPACE; + if (remove) { builtin_complete_remove(cmd, diff --git a/builtin_jobs.cpp b/builtin_jobs.cpp index 634aa0104..c6ca77132 100644 --- a/builtin_jobs.cpp +++ b/builtin_jobs.cpp @@ -164,7 +164,6 @@ static int builtin_jobs(parser_t &parser, wchar_t **argv) int found=0; int mode=JOBS_DEFAULT; int print_last = 0; - const job_t *j; argc = builtin_count_args(argv); woptind=0; @@ -305,7 +304,7 @@ static int builtin_jobs(parser_t &parser, wchar_t **argv) return 1; } - j = job_get_from_pid(pid); + const job_t *j = job_get_from_pid(pid); if (j && !job_is_completed(j)) { diff --git a/builtin_set.cpp b/builtin_set.cpp index eb2a70e9b..f7622260a 100644 --- a/builtin_set.cpp +++ b/builtin_set.cpp @@ -697,7 +697,6 @@ static int builtin_set(parser_t &parser, wchar_t **argv) /* Slice mode */ - size_t idx_count, val_count; std::vector indexes; wcstring_list_t result; @@ -713,9 +712,9 @@ static int builtin_set(parser_t &parser, wchar_t **argv) retcode = 1; break; } - - val_count = argc-woptind-1; - idx_count = indexes.size(); + + size_t idx_count = indexes.size(); + size_t val_count = argc-woptind-1; if (!erase) { diff --git a/common.cpp b/common.cpp index 2b99d1ff2..518625600 100644 --- a/common.cpp +++ b/common.cpp @@ -105,7 +105,7 @@ void show_stackframe() return; void *trace[32]; - int i, trace_size = 0; + int trace_size = 0; trace_size = backtrace(trace, 32); char **messages = backtrace_symbols(trace, trace_size); @@ -113,7 +113,7 @@ void show_stackframe() if (messages) { debug(0, L"Backtrace:"); - for (i=0; i @@ -612,10 +612,10 @@ const wchar_t *wcsfuncname(const wchar_t *str); /** Test if the given string is valid in a variable name - \return 1 if this is a valid name, 0 otherwise + \return true if this is a valid name, false otherwise */ -int wcsvarchr(wchar_t chr); +bool wcsvarchr(wchar_t chr); /** diff --git a/complete.cpp b/complete.cpp index 7c3445571..33e0536bd 100644 --- a/complete.cpp +++ b/complete.cpp @@ -44,6 +44,7 @@ #include "parser_keywords.h" #include "wutil.h" #include "path.h" +#include "parse_tree.h" #include "iothread.h" /* @@ -465,7 +466,13 @@ void completion_autoload_t::command_removed(const wcstring &cmd) /** Create a new completion entry */ void append_completion(std::vector &completions, const wcstring &comp, const wcstring &desc, complete_flags_t flags, string_fuzzy_match_t match) { - completions.push_back(completion_t(comp, desc, match, flags)); + /* If we just constructed the completion and used push_back, we would get two string copies. Try to avoid that by making a stubby completion in the vector first, and then copying our string in. */ + completions.push_back(completion_t(wcstring())); + completion_t *last = &completions.back(); + last->completion = comp; + last->description = desc; + last->match = match; + last->flags = flags; } /** @@ -1190,7 +1197,7 @@ void completer_t::complete_cmd(const wcstring &str_cmd, bool use_function, bool wcstring_list_t names = function_get_names(str_cmd.at(0) == L'_'); for (size_t i=0; i < names.size(); i++) { - possible_comp.push_back(completion_t(names.at(i))); + append_completion(possible_comp, names.at(i)); } this->complete_strings(str_cmd, 0, &complete_function_desc, possible_comp, 0); @@ -1229,7 +1236,7 @@ void completer_t::complete_from_args(const wcstring &str, std::vector possible_comp; bool is_autosuggest = (this->type() == COMPLETE_AUTOSUGGEST); - parser_t parser(is_autosuggest ? PARSER_TYPE_COMPLETIONS_ONLY : PARSER_TYPE_GENERAL, false); + parser_t parser(is_autosuggest ? PARSER_TYPE_COMPLETIONS_ONLY : PARSER_TYPE_GENERAL, false /* don't show errors */); /* If type is COMPLETE_AUTOSUGGEST, it means we're on a background thread, so don't call proc_push_interactive */ if (! is_autosuggest) @@ -1360,7 +1367,9 @@ struct local_options_t bool completer_t::complete_param(const wcstring &scmd_orig, const wcstring &spopt, const wcstring &sstr, bool use_switches) { - const wchar_t * const cmd_orig = scmd_orig.c_str(), * const popt = spopt.c_str(), * const str = sstr.c_str(); + const wchar_t * const cmd_orig = scmd_orig.c_str(); + const wchar_t * const popt = spopt.c_str(); + const wchar_t * const str = sstr.c_str(); bool use_common=1, use_files=1; @@ -1475,7 +1484,7 @@ bool completer_t::complete_param(const wcstring &scmd_orig, const wcstring &spop { if (o->result_mode & NO_COMMON) use_common = false; if (o->result_mode & NO_FILES) use_files = false; - complete_from_args(str, o->comp.c_str(), o->localized_desc(), o->flags); + complete_from_args(str, o->comp, o->localized_desc(), o->flags); } } @@ -1688,7 +1697,7 @@ bool completer_t::complete_variable(const wcstring &str, size_t start_offset) desc = format_string(COMPLETE_VAR_DESC_VAL, value.c_str()); } - append_completion(this->completions, comp.c_str(), desc.c_str(), flags, match); + append_completion(this->completions, comp, desc, flags, match); res = true; } @@ -1788,226 +1797,155 @@ bool completer_t::try_complete_user(const wcstring &str) return res; } -void complete(const wcstring &cmd, std::vector &comps, completion_request_flags_t flags) +void complete(const wcstring &cmd_with_subcmds, std::vector &comps, completion_request_flags_t flags) { + /* Determine the innermost subcommand */ + const wchar_t *cmdsubst_begin, *cmdsubst_end; + parse_util_cmdsubst_extent(cmd_with_subcmds.c_str(), cmd_with_subcmds.size(), &cmdsubst_begin, &cmdsubst_end); + assert(cmdsubst_begin != NULL && cmdsubst_end != NULL && cmdsubst_end >= cmdsubst_begin); + const wcstring cmd = wcstring(cmdsubst_begin, cmdsubst_end - cmdsubst_begin); + /* Make our completer */ completer_t completer(cmd, flags); - - const wchar_t *tok_begin, *tok_end, *cmdsubst_begin, *cmdsubst_end, *prev_begin, *prev_end; - wcstring current_token, prev_token; + wcstring current_command; - int on_command=0; - size_t pos; + const size_t pos = cmd.size(); bool done=false; - int use_command = 1; - int use_function = 1; - int use_builtin = 1; - int had_ddash = 0; - -// debug( 1, L"Complete '%ls'", cmd ); - - size_t cursor_pos = cmd.size(); - + bool use_command = 1; + bool use_function = 1; + bool use_builtin = 1; + + // debug( 1, L"Complete '%ls'", cmd ); + const wchar_t *cmd_cstr = cmd.c_str(); - parse_util_cmdsubst_extent(cmd_cstr, cursor_pos, &cmdsubst_begin, &cmdsubst_end); - parse_util_token_extent(cmd_cstr, cursor_pos, &tok_begin, &tok_end, &prev_begin, &prev_end); - - if (!cmdsubst_begin) - done=1; - - + const wchar_t *tok_begin = NULL, *prev_begin = NULL, *prev_end = NULL; + parse_util_token_extent(cmd_cstr, cmd.size(), &tok_begin, NULL, &prev_begin, &prev_end); + /** - If we are completing a variable name or a tilde expansion user - name, we do that and return. No need for any other completions. - */ - + If we are completing a variable name or a tilde expansion user + name, we do that and return. No need for any other completions. + */ + + const wcstring current_token = tok_begin; + if (!done) { - wcstring tmp = tok_begin; - done = completer.try_complete_variable(tmp) || completer.try_complete_user(tmp); + done = completer.try_complete_variable(current_token) || completer.try_complete_user(current_token); } - + if (!done) { - pos = cursor_pos-(cmdsubst_begin-cmd_cstr); - - const wcstring buff = wcstring(cmdsubst_begin, cmdsubst_end-cmdsubst_begin); - - int had_cmd=0; - int end_loop=0; - - tokenizer_t tok(buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); - while (tok_has_next(&tok) && !end_loop) + //const size_t prev_token_len = (prev_begin ? prev_end - prev_begin : 0); + //const wcstring prev_token(prev_begin, prev_token_len); + + parse_node_tree_t tree; + parse_tree_from_string(cmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL); + + /* Find the plain statement that contains the position */ + const parse_node_t *plain_statement = tree.find_node_matching_source_location(symbol_plain_statement, pos, NULL); + if (plain_statement != NULL) { - switch (tok_last_type(&tok)) + assert(plain_statement->has_source() && plain_statement->type == symbol_plain_statement); + + /* Get the command node */ + const parse_node_t *cmd_node = tree.get_child(*plain_statement, 0, parse_token_type_string); + + /* Get the actual command string */ + if (cmd_node != NULL) + current_command = cmd_node->get_source(cmd); + + /* Check the decoration */ + switch (tree.decoration_for_plain_statement(*plain_statement)) { - - case TOK_STRING: + case parse_statement_decoration_none: + use_command = true; + use_function = true; + use_builtin = true; + break; + + case parse_statement_decoration_command: + use_command = true; + use_function = false; + use_builtin = false; + break; + + case parse_statement_decoration_builtin: + use_command = false; + use_function = false; + use_builtin = true; + break; + } + + if (cmd_node && cmd_node->location_in_or_at_end_of_source_range(pos)) + { + /* Complete command filename */ + completer.complete_cmd(current_token, use_function, use_builtin, use_command); + } + else + { + /* Get all the arguments */ + const parse_node_tree_t::parse_node_list_t all_arguments = tree.find_nodes(*plain_statement, symbol_argument); + + /* See whether we are in an argument. We may also be in a redirection, or nothing at all. */ + size_t matching_arg_index = -1; + for (size_t i=0; i < all_arguments.size(); i++) { - - const wcstring ncmd = tok_last(&tok); - int is_ddash = (ncmd == L"--") && ((tok_get_pos(&tok)+2) < (long)pos); - - if (!had_cmd) + const parse_node_t *node = all_arguments.at(i); + if (node->location_in_or_at_end_of_source_range(pos)) { - - if (parser_keywords_is_subcommand(ncmd)) + matching_arg_index = i; + break; + } + } + + bool had_ddash = false; + wcstring current_argument, previous_argument; + if (matching_arg_index != (size_t)(-1)) + { + /* Get the current argument and the previous argument, if we have one */ + current_argument = all_arguments.at(matching_arg_index)->get_source(cmd); + + if (matching_arg_index > 0) + previous_argument = all_arguments.at(matching_arg_index - 1)->get_source(cmd); + + /* Check to see if we have a preceding double-dash */ + for (size_t i=0; i < matching_arg_index; i++) + { + if (all_arguments.at(i)->get_source(cmd) == L"--") { - if (ncmd == L"builtin") - { - use_function = 0; - use_command = 0; - use_builtin = 1; - } - else if (ncmd == L"command") - { - use_command = 1; - use_function = 0; - use_builtin = 0; - } + had_ddash = true; break; } - - - if (!is_ddash || - ((use_command && use_function && use_builtin))) - { - current_command = ncmd; - - size_t token_end = tok_get_pos(&tok) + ncmd.size(); - - on_command = (pos <= token_end); - had_cmd=1; - } - } - else - { - if (is_ddash) - { - had_ddash = 1; - } - } - - break; } - - case TOK_END: - case TOK_PIPE: - case TOK_BACKGROUND: + + bool do_file = false; + + wcstring current_command_unescape, previous_argument_unescape, current_argument_unescape; + if (unescape_string(current_command, ¤t_command_unescape, UNESCAPE_DEFAULT) && + unescape_string(previous_argument, &previous_argument_unescape, UNESCAPE_DEFAULT) && + unescape_string(current_argument, ¤t_argument_unescape, UNESCAPE_INCOMPLETE)) { - had_cmd=0; - had_ddash = 0; - use_command = 1; - use_function = 1; - use_builtin = 1; - break; - } - - case TOK_ERROR: - { - end_loop=1; - break; - } - - default: - { - break; + do_file = completer.complete_param(current_command_unescape, + previous_argument_unescape, + current_argument_unescape, + !had_ddash); } + + /* If we have found no command specific completions at all, fall back to using file completions. */ + if (completer.empty()) + do_file = true; + + /* And if we're autosuggesting, and the token is empty, don't do file suggestions */ + if ((flags & COMPLETION_REQUEST_AUTOSUGGESTION) && current_argument_unescape.empty()) + do_file = false; + + /* This function wants the unescaped string */ + completer.complete_param_expand(current_token, do_file); } - - if (tok_get_pos(&tok) >= (long)pos) - { - end_loop=1; - } - - tok_next(&tok); - - } - - /* - Get the string to complete - */ - - current_token.assign(tok_begin, cursor_pos-(tok_begin-cmd_cstr)); - - if (prev_begin) - { - prev_token.assign(prev_begin, prev_end - prev_begin); - } - else - { - prev_token.clear(); - } - -// debug( 0, L"on_command: %d, %ls %ls\n", on_command, current_command, current_token ); - - /* - Check if we are using the 'command' or 'builtin' builtins - _and_ we are writing a switch instead of a command. In that - case, complete using the builtins completions, not using a - subcommand. - */ - - if ((on_command || current_token == L"--") && - string_prefixes_string(L"-", current_token) && - !(use_command && use_function && use_builtin)) - { - if (use_command == 0) - current_command = L"builtin"; - else - current_command = L"command"; - - had_cmd = 1; - on_command = 0; - } - - /* - Use command completions if in between commands - */ - if (!had_cmd) - { - on_command=1; - } - - - if (on_command) - { - /* Complete command filename */ - completer.complete_cmd(current_token, use_function, use_builtin, use_command); - } - else - { - bool do_file = false; - - wcstring current_command_unescape, prev_token_unescape, current_token_unescape; - if (unescape_string(current_command, ¤t_command_unescape, UNESCAPE_DEFAULT) && - unescape_string(prev_token, &prev_token_unescape, UNESCAPE_DEFAULT) && - unescape_string(current_token, ¤t_token_unescape, UNESCAPE_INCOMPLETE)) - { - do_file = completer.complete_param(current_command_unescape, - prev_token_unescape, - current_token_unescape, - !had_ddash); - } - - /* If we have found no command specific completions at - all, fall back to using file completions. - */ - if (completer.empty()) - do_file = true; - - /* If we're autosuggesting, and the token is empty, don't do file suggestions */ - if ((flags & COMPLETION_REQUEST_AUTOSUGGESTION) && current_token_unescape.empty()) - do_file = false; - - /* - This function wants the unescaped string - */ - completer.complete_param_expand(current_token, do_file); } } - + comps = completer.get_completions(); } diff --git a/complete.h b/complete.h index cd4f22a16..fa00c3e5b 100644 --- a/complete.h +++ b/complete.h @@ -124,7 +124,7 @@ public: int flags; /* Construction. Note: defining these so that they are not inlined reduces the executable size. */ - completion_t(const wcstring &comp, const wcstring &desc = L"", string_fuzzy_match_t match = string_fuzzy_match_t(fuzzy_match_exact), int flags_val = 0); + completion_t(const wcstring &comp, const wcstring &desc = wcstring(), string_fuzzy_match_t match = string_fuzzy_match_t(fuzzy_match_exact), int flags_val = 0); completion_t(const completion_t &); completion_t &operator=(const completion_t &); @@ -268,7 +268,7 @@ void complete_load(const wcstring &cmd, bool reload); \param flags completion flags */ -void append_completion(std::vector &completions, const wcstring &comp, const wcstring &desc = L"", int flags = 0, string_fuzzy_match_t match = string_fuzzy_match_t(fuzzy_match_exact)); +void append_completion(std::vector &completions, const wcstring &comp, const wcstring &desc = wcstring(), int flags = 0, string_fuzzy_match_t match = string_fuzzy_match_t(fuzzy_match_exact)); /* Function used for testing */ void complete_set_variable_names(const wcstring_list_t *names); diff --git a/env.cpp b/env.cpp index 13f87b6cc..ab7f63e5d 100644 --- a/env.cpp +++ b/env.cpp @@ -299,7 +299,6 @@ static bool var_is_locale(const wcstring &key) static void handle_locale() { const env_var_t lc_all = env_get_string(L"LC_ALL"); - int i; const wcstring old_locale = wsetlocale(LC_MESSAGES, NULL); /* @@ -330,7 +329,7 @@ static void handle_locale() wsetlocale(LC_ALL, lang.c_str()); } - for (i=2; locale_variable[i]; i++) + for (int i=2; locale_variable[i]; i++) { const env_var_t val = env_get_string(locale_variable[i]); @@ -479,7 +478,7 @@ static void env_set_defaults() if (pw->pw_name != NULL) { const wcstring wide_name = str2wcstring(pw->pw_name); - env_set(L"USER", NULL, ENV_GLOBAL); + env_set(L"USER", wide_name.c_str(), ENV_GLOBAL); } } @@ -892,6 +891,7 @@ int env_set(const wcstring &key, const wchar_t *val, int var_mode) if (!is_universal) { event_t ev = event_t::variable_event(key); + ev.arguments.reserve(3); ev.arguments.push_back(L"VARIABLE"); ev.arguments.push_back(L"SET"); ev.arguments.push_back(key); diff --git a/env_universal.cpp b/env_universal.cpp index a9e7462a6..ed7396f6a 100644 --- a/env_universal.cpp +++ b/env_universal.cpp @@ -426,8 +426,6 @@ void env_universal_barrier() void env_universal_set(const wcstring &name, const wcstring &value, bool exportv) { - message_t *msg; - if (!s_env_univeral_inited) return; @@ -439,7 +437,7 @@ void env_universal_set(const wcstring &name, const wcstring &value, bool exportv } else { - msg = create_message(exportv?SET_EXPORT:SET, + message_t *msg = create_message(exportv?SET_EXPORT:SET, name.c_str(), value.c_str()); @@ -459,7 +457,6 @@ int env_universal_remove(const wchar_t *name) { int res; - message_t *msg; if (!s_env_univeral_inited) return 1; @@ -476,7 +473,7 @@ int env_universal_remove(const wchar_t *name) } else { - msg= create_message(ERASE, name, 0); + message_t *msg = create_message(ERASE, name, 0); msg->count=1; env_universal_server.unsent.push(msg); env_universal_barrier(); diff --git a/event.cpp b/event.cpp index d2b219e7d..a6fab6205 100644 --- a/event.cpp +++ b/event.cpp @@ -568,9 +568,6 @@ static void event_fire_internal(const event_t &event) */ static void event_fire_delayed() { - - size_t i; - /* If is_event is one, we are running the event-handler non-recursively. @@ -582,7 +579,7 @@ static void event_fire_delayed() { event_list_t new_blocked; - for (i=0; i &opened_fds) repeatedly reopened for every command in the block, which would reset the cursor position. - \return the transmogrified chain on sucess, or 0 on failiure + \return true on success, false on failure. Returns the output chain and opened_fds by reference */ -static bool io_transmogrify(const io_chain_t &in_chain, io_chain_t &out_chain, std::vector &out_opened_fds) +static bool io_transmogrify(const io_chain_t &in_chain, io_chain_t *out_chain, std::vector *out_opened_fds) { ASSERT_IS_MAIN_THREAD(); - assert(out_chain.empty()); + assert(out_chain != NULL && out_opened_fds != NULL); + assert(out_chain->empty()); /* Just to be clear what we do for an empty chain */ if (in_chain.empty()) @@ -479,8 +480,8 @@ static bool io_transmogrify(const io_chain_t &in_chain, io_chain_t &out_chain, s if (success) { /* Yay */ - out_chain.swap(result_chain); - out_opened_fds.swap(opened_fds); + out_chain->swap(result_chain); + out_opened_fds->swap(opened_fds); } else { @@ -496,19 +497,24 @@ static bool io_transmogrify(const io_chain_t &in_chain, io_chain_t &out_chain, s Morph an io redirection chain into redirections suitable for passing to eval, call eval, and clean up morphed redirections. - \param def the code to evaluate + \param def the code to evaluate, or the empty string if none + \param node_offset the offset of the node to evalute, or NODE_OFFSET_INVALID \param block_type the type of block to push on evaluation \param io the io redirections to be performed on this block */ static void internal_exec_helper(parser_t &parser, - const wchar_t *def, + const wcstring &def, + node_offset_t node_offset, enum block_type_t block_type, const io_chain_t &ios) { + // If we have a valid node offset, then we must not have a string to execute + assert(node_offset == NODE_OFFSET_INVALID || def.empty()); + io_chain_t morphed_chain; std::vector opened_fds; - bool transmorgrified = io_transmogrify(ios, morphed_chain, opened_fds); + bool transmorgrified = io_transmogrify(ios, &morphed_chain, &opened_fds); int is_block_old=is_block; is_block=1; @@ -524,7 +530,14 @@ static void internal_exec_helper(parser_t &parser, signal_unblock(); - parser.eval(def, morphed_chain, block_type); + if (node_offset == NODE_OFFSET_INVALID) + { + parser.eval(def, morphed_chain, block_type); + } + else + { + parser.eval_block_node(node_offset, morphed_chain, block_type); + } signal_block(); @@ -564,6 +577,12 @@ static bool can_use_posix_spawn_for_job(const job_t *job, const process_t *proce /* What exec does if no_exec is set. This only has to handle block pushing and popping. See #624. */ static void exec_no_exec(parser_t &parser, const job_t *job) { + if (parser_use_ast()) + { + /* With the new parser, commands aren't responsible for pushing / popping blocks, so there's nothing to do */ + return; + } + /* Hack hack hack. If this is an 'end' job, then trigger a pop. If this is a job that would create a block, trigger a push. See #624 */ const process_t *p = job->first_process; if (p && p->type == INTERNAL_BUILTIN) @@ -682,7 +701,7 @@ void exec_job(parser_t &parser, job_t *j) j->first_process->completed=1; return; } - + assert(0 && "This should be unreachable"); } signal_block(); @@ -807,7 +826,6 @@ void exec_job(parser_t &parser, job_t *j) { pipe_write.reset(new io_pipe_t(p->pipe_write_fd, false)); process_net_io_chain.push_back(pipe_write); - } /* The explicit IO redirections associated with the process */ @@ -926,7 +944,7 @@ void exec_job(parser_t &parser, job_t *j) if (! exec_error) { - internal_exec_helper(parser, def.c_str(), TOP, process_net_io_chain); + internal_exec_helper(parser, def, NODE_OFFSET_INVALID, TOP, process_net_io_chain); } parser.allow_function(); @@ -936,12 +954,14 @@ void exec_job(parser_t &parser, job_t *j) } case INTERNAL_BLOCK: + case INTERNAL_BLOCK_NODE: { if (p->next) { block_output_io_buffer.reset(io_buffer_t::create(0)); if (block_output_io_buffer.get() == NULL) { + /* We failed (e.g. no more fds could be created). */ exec_error = true; job_mark_process_as_failed(j, p); } @@ -954,12 +974,21 @@ void exec_job(parser_t &parser, job_t *j) if (! exec_error) { - internal_exec_helper(parser, p->argv0(), TOP, process_net_io_chain); + if (p->type == INTERNAL_BLOCK) + { + /* The block contents (as in, fish code) are stored in argv0 (ugh) */ + assert(p->argv0() != NULL); + internal_exec_helper(parser, p->argv0(), NODE_OFFSET_INVALID, TOP, process_net_io_chain); + } + else + { + assert(p->type == INTERNAL_BLOCK_NODE); + internal_exec_helper(parser, wcstring(), p->internal_block_node, TOP, process_net_io_chain); + } } break; - } - + case INTERNAL_BUILTIN: { int builtin_stdin=0; @@ -1104,6 +1133,20 @@ void exec_job(parser_t &parser, job_t *j) } break; } + + case EXTERNAL: + /* External commands are handled in the next switch statement below */ + break; + + case INTERNAL_EXEC: + /* We should have handled exec up above */ + assert(0 && "INTERNAL_EXEC process found in pipeline, where it should never be. Aborting."); + break; + + case INTERNAL_BUFFER: + /* Internal buffers are handled in the next switch statement below */ + break; + } if (exec_error) @@ -1115,6 +1158,7 @@ void exec_job(parser_t &parser, job_t *j) { case INTERNAL_BLOCK: + case INTERNAL_BLOCK_NODE: case INTERNAL_FUNCTION: { int status = proc_get_last_status(); @@ -1131,7 +1175,7 @@ void exec_job(parser_t &parser, job_t *j) No buffer, so we exit directly. This means we have to manually set the exit status. */ - if (p->next == 0) + if (p->next == NULL) { proc_set_last_status(job_get_flag(j, JOB_NEGATE)?(!status):status); } @@ -1463,7 +1507,13 @@ void exec_job(parser_t &parser, job_t *j) break; } - + + case INTERNAL_EXEC: + { + /* We should have handled exec up above */ + assert(0 && "INTERNAL_EXEC process found in pipeline, where it should never be. Aborting."); + break; + } } if (p->type == INTERNAL_BUILTIN) @@ -1531,6 +1581,8 @@ static int exec_subshell_internal(const wcstring &cmd, wcstring_list_t *lst, boo int prev_subshell = is_subshell; const int prev_status = proc_get_last_status(); char sep=0; + + //fprintf(stderr, "subcmd %ls\n", cmd.c_str()); const env_var_t ifs = env_get_string(L"IFS"); diff --git a/expand.cpp b/expand.cpp index 10e3cbf40..6216da222 100644 --- a/expand.cpp +++ b/expand.cpp @@ -785,7 +785,15 @@ static int expand_pid(const wcstring &instr_with_sep, expand_flags_t flags, std::vector &out) { - + /* Hack. If there's no INTERNAL_SEP and no PROCESS_EXPAND, then there's nothing to do. Check out this "null terminated string." */ + const wchar_t some_chars[] = {INTERNAL_SEPARATOR, PROCESS_EXPAND, L'\0'}; + if (instr_with_sep.find_first_of(some_chars) == wcstring::npos) + { + /* Nothing to do */ + append_completion(out, instr_with_sep); + return 1; + } + /* expand_string calls us with internal separators in instr...sigh */ wcstring instr = instr_with_sep; remove_internal_separator(instr, false); @@ -1372,7 +1380,7 @@ static int expand_brackets(parser_t &parser, const wcstring &instr, int flags, s /** Perform cmdsubst expansion */ -static int expand_cmdsubst(parser_t &parser, const wcstring &input, std::vector &outList) +static int expand_cmdsubst(parser_t &parser, const wcstring &input, std::vector &out_list) { wchar_t *paran_begin=0, *paran_end=0; std::vector sub_res; @@ -1390,7 +1398,7 @@ static int expand_cmdsubst(parser_t &parser, const wcstring &input, std::vector< L"Mismatched parenthesis"); return 0; case 0: - outList.push_back(completion_t(input)); + append_completion(out_list, input); return 1; case 1: @@ -1455,15 +1463,15 @@ static int expand_cmdsubst(parser_t &parser, const wcstring &input, std::vector< */ for (i=0; i *co } } +// If the given path contains the user's home directory, replace that with a tilde +// We don't try to be smart about case insensitivity, etc. +wcstring replace_home_directory_with_tilde(const wcstring &str) +{ + // only absolute paths get this treatment + wcstring result = str; + if (string_prefixes_string(L"/", result)) + { + wcstring home_directory = L"~"; + expand_tilde(home_directory); + if (! string_suffixes_string(L"/", home_directory)) + { + home_directory.push_back(L'/'); + } + + // Now check if the home_directory prefixes the string + if (string_prefixes_string(home_directory, result)) + { + // Success + result.replace(0, home_directory.size(), L"~/"); + } + } + return result; +} + /** Remove any internal separators. Also optionally convert wildcard characters to regular equivalents. This is done to support EXPAND_SKIP_WILDCARDS. @@ -1640,7 +1673,7 @@ int expand_string(const wcstring &input, std::vector &output, expa if ((!(flags & ACCEPT_INCOMPLETE)) && expand_is_clean(input.c_str())) { - output.push_back(completion_t(input)); + append_completion(output, input); return EXPAND_OK; } @@ -1656,7 +1689,7 @@ int expand_string(const wcstring &input, std::vector &output, expa parser.error(CMDSUBST_ERROR, -1, L"Command substitutions not allowed"); return EXPAND_ERROR; } - in->push_back(completion_t(input)); + append_completion(*in, input); } else { @@ -1684,7 +1717,7 @@ int expand_string(const wcstring &input, std::vector &output, expa next[i] = L'$'; } } - out->push_back(completion_t(next)); + append_completion(*out, next); } else { @@ -1700,7 +1733,7 @@ int expand_string(const wcstring &input, std::vector &output, expa for (i=0; i < in->size(); i++) { - wcstring next = in->at(i).completion; + const wcstring &next = in->at(i).completion; if (!expand_brackets(parser, next, flags, *out)) { @@ -1720,7 +1753,7 @@ int expand_string(const wcstring &input, std::vector &output, expa if (flags & ACCEPT_INCOMPLETE) { - if (next[0] == PROCESS_EXPAND) + if (! next.empty() && next.at(0) == PROCESS_EXPAND) { /* If process expansion matches, we are not @@ -1733,7 +1766,7 @@ int expand_string(const wcstring &input, std::vector &output, expa } else { - out->push_back(completion_t(next)); + append_completion(*out, next); } } else @@ -1815,7 +1848,7 @@ int expand_string(const wcstring &input, std::vector &output, expa { if (!(flags & ACCEPT_INCOMPLETE)) { - out->push_back(completion_t(next_str)); + append_completion(*out, next_str); } } } @@ -1842,7 +1875,7 @@ bool expand_one(wcstring &string, expand_flags_t flags) return true; } - if (expand_string(string, completions, flags)) + if (expand_string(string, completions, flags | EXPAND_NO_DESCRIPTIONS)) { if (completions.size() == 1) { @@ -1945,19 +1978,19 @@ bool fish_openSUSE_dbus_hack_hack_hack_hack(std::vector *args) val.resize(last_good + 1); args->clear(); - args->push_back(completion_t(L"set")); + append_completion(*args, L"set"); if (key == L"DBUS_SESSION_BUS_ADDRESS") - args->push_back(completion_t(L"-x")); - args->push_back(completion_t(key)); - args->push_back(completion_t(val)); + append_completion(*args, L"-x"); + append_completion(*args, key); + append_completion(*args, val); result = true; } else if (string_prefixes_string(L"export DBUS_SESSION_BUS_ADDRESS;", cmd)) { /* Nothing, we already exported it */ args->clear(); - args->push_back(completion_t(L"echo")); - args->push_back(completion_t(L"-n")); + append_completion(*args, L"echo"); + append_completion(*args, L"-n"); result = true; } } diff --git a/expand.h b/expand.h index 4893d2b92..803513c2a 100644 --- a/expand.h +++ b/expand.h @@ -176,6 +176,9 @@ wcstring expand_escape_variable(const wcstring &in); */ void expand_tilde(wcstring &input); +/** Perform the opposite of tilde expansion on the string, which is modified in place */ +wcstring replace_home_directory_with_tilde(const wcstring &str); + /** Test if the specified argument is clean, i.e. it does not contain any tokens which need to be expanded or otherwise altered. Clean diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index adb43ad0b..ceb694ee7 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -65,6 +65,7 @@ D033781115DC6D4C00A634BA /* completions in CopyFiles */ = {isa = PBXBuildFile; fileRef = D025C02715D1FEA100B9DB63 /* completions */; }; D033781215DC6D5200A634BA /* functions in CopyFiles */ = {isa = PBXBuildFile; fileRef = D025C02815D1FEA100B9DB63 /* functions */; }; D033781315DC6D5400A634BA /* tools in CopyFiles */ = {isa = PBXBuildFile; fileRef = D025C02915D1FEA100B9DB63 /* tools */; }; + D052D80B1868F7FC003ABCBD /* parse_execution.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D052D8091868F7FC003ABCBD /* parse_execution.cpp */; }; D07B247315BCC15700D4ADB4 /* add-shell in Resources */ = {isa = PBXBuildFile; fileRef = D07B247215BCC15700D4ADB4 /* add-shell */; }; D07B247615BCC4BE00D4ADB4 /* install.sh in Resources */ = {isa = PBXBuildFile; fileRef = D07B247515BCC4BE00D4ADB4 /* install.sh */; }; D07D266A15E33B86009E43F6 /* config.fish in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0C4FD9415A7D7EE00212EF1 /* config.fish */; }; @@ -73,8 +74,50 @@ D07D266E15E33B86009E43F6 /* tools in Copy Files */ = {isa = PBXBuildFile; fileRef = D025C02915D1FEA100B9DB63 /* tools */; }; D07D267215E34171009E43F6 /* config.fish in Copy Files */ = {isa = PBXBuildFile; fileRef = D0CBD580159EE48F0024809C /* config.fish */; }; D0879AC816BF9AAB00E98E56 /* fish_term_icon.icns in Resources */ = {isa = PBXBuildFile; fileRef = D0879AC616BF9A1A00E98E56 /* fish_term_icon.icns */; }; + D08A329417B4458D00F3A533 /* fish_tests.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D08A329317B4458D00F3A533 /* fish_tests.cpp */; }; + D08A329517B445C200F3A533 /* function.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854413B3ACEE0099B651 /* function.cpp */; }; + D08A329617B445FD00F3A533 /* builtin.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853513B3ACEE0099B651 /* builtin.cpp */; }; + D08A329717B4463B00F3A533 /* complete.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853713B3ACEE0099B651 /* complete.cpp */; }; + D08A329817B4463B00F3A533 /* env.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853A13B3ACEE0099B651 /* env.cpp */; }; + D08A329917B4463B00F3A533 /* exec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853C13B3ACEE0099B651 /* exec.cpp */; }; + D08A329A17B4463B00F3A533 /* expand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853D13B3ACEE0099B651 /* expand.cpp */; }; + D08A329B17B4463B00F3A533 /* highlight.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854713B3ACEE0099B651 /* highlight.cpp */; }; + D08A329C17B4463B00F3A533 /* history.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854813B3ACEE0099B651 /* history.cpp */; }; + D08A329D17B4463B00F3A533 /* kill.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854F13B3ACEE0099B651 /* kill.cpp */; }; + D08A329E17B4463B00F3A533 /* parser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855413B3ACEE0099B651 /* parser.cpp */; }; + D08A329F17B4463B00F3A533 /* proc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855713B3ACEE0099B651 /* proc.cpp */; }; + D08A32A017B4463B00F3A533 /* reader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855813B3ACEE0099B651 /* reader.cpp */; }; + D08A32A117B4463B00F3A533 /* sanity.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855913B3ACEE0099B651 /* sanity.cpp */; }; + D08A32A217B4463B00F3A533 /* tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855D13B3ACEE0099B651 /* tokenizer.cpp */; }; + D08A32A317B4463B00F3A533 /* wgetopt.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855F13B3ACEE0099B651 /* wgetopt.cpp */; }; + D08A32A417B4463B00F3A533 /* wildcard.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0856013B3ACEE0099B651 /* wildcard.cpp */; }; + D08A32A517B4463B00F3A533 /* wutil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0856113B3ACEE0099B651 /* wutil.cpp */; }; + D08A32A617B4464300F3A533 /* input.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854A13B3ACEE0099B651 /* input.cpp */; }; + D08A32A717B446A300F3A533 /* autoload.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C6FCC914CFA4B0004CE8AD /* autoload.cpp */; }; + D08A32A817B446A300F3A533 /* builtin_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0F3373A1506DE3C00ECEFC0 /* builtin_test.cpp */; }; + D08A32A917B446A300F3A533 /* color.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0B6B0FE14E88BA400AD6C10 /* color.cpp */; }; + D08A32AA17B446A300F3A533 /* common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853613B3ACEE0099B651 /* common.cpp */; }; + D08A32AB17B446A300F3A533 /* env_universal_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853813B3ACEE0099B651 /* env_universal_common.cpp */; }; + D08A32AC17B446A300F3A533 /* env_universal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853913B3ACEE0099B651 /* env_universal.cpp */; }; + D08A32AD17B446A300F3A533 /* event.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853B13B3ACEE0099B651 /* event.cpp */; }; + D08A32AE17B446A300F3A533 /* input_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854913B3ACEE0099B651 /* input_common.cpp */; }; + D08A32AF17B446A300F3A533 /* intern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854B13B3ACEE0099B651 /* intern.cpp */; }; + D08A32B017B446A300F3A533 /* io.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854C13B3ACEE0099B651 /* io.cpp */; }; + D08A32B117B446A300F3A533 /* iothread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854D13B3ACEE0099B651 /* iothread.cpp */; }; + D08A32B217B446A300F3A533 /* output.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855113B3ACEE0099B651 /* output.cpp */; }; + D08A32B317B446A300F3A533 /* parse_util.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855213B3ACEE0099B651 /* parse_util.cpp */; }; + D08A32B417B446A300F3A533 /* parser_keywords.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855313B3ACEE0099B651 /* parser_keywords.cpp */; }; + D08A32B517B446A300F3A533 /* path.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855513B3ACEE0099B651 /* path.cpp */; }; + D08A32B617B446A300F3A533 /* postfork.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D09B1C1914FC7B5B00F91077 /* postfork.cpp */; }; + D08A32B717B446A300F3A533 /* screen.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855A13B3ACEE0099B651 /* screen.cpp */; }; + D08A32B817B446A300F3A533 /* signal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855C13B3ACEE0099B651 /* signal.cpp */; }; + D08A32B917B446B100F3A533 /* parse_productions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */; }; + D08A32BA17B446B100F3A533 /* parse_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F351765284C00BFAB82 /* parse_tree.cpp */; }; + D08A32BC17B4473B00F3A533 /* libncurses.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = D0D02A8C15983CFA008E62BD /* libncurses.dylib */; }; + D08A32BD17B4474000F3A533 /* libiconv.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = D0D02A8A15983CDF008E62BD /* libiconv.dylib */; }; D0A564FE168D23D800AF6161 /* man in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0A56501168D258300AF6161 /* man in Copy Files */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; + D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F351765284C00BFAB82 /* parse_tree.cpp */; }; D0CBD587159EF0E10024809C /* launch_fish.scpt in Resources */ = {isa = PBXBuildFile; fileRef = D0CBD586159EF0E10024809C /* launch_fish.scpt */; }; D0D02A67159837AD008E62BD /* complete.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853713B3ACEE0099B651 /* complete.cpp */; }; D0D02A69159837B2008E62BD /* env.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853A13B3ACEE0099B651 /* env.cpp */; }; @@ -150,6 +193,7 @@ D0F019FD15A977CA0034B3B1 /* config.fish in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0C4FD9415A7D7EE00212EF1 /* config.fish */; }; D0F01A0315A978910034B3B1 /* osx_fish_launcher.m in Sources */ = {isa = PBXBuildFile; fileRef = D0D02AFA159871B2008E62BD /* osx_fish_launcher.m */; }; D0F01A0515A978A10034B3B1 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D0CBD583159EEE010024809C /* Foundation.framework */; }; + D0FE8EE8179FB760008C9F21 /* parse_productions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -292,6 +336,15 @@ name = "Copy Files"; runOnlyForDeploymentPostprocessing = 1; }; + D08A328B17B4455100F3A533 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; D0F019F015A977010034B3B1 /* CopyFiles */ = { isa = PBXCopyFilesBuildPhase; buildActionMask = 2147483647; @@ -334,9 +387,13 @@ D025C02915D1FEA100B9DB63 /* tools */ = {isa = PBXFileReference; lastKnownFileType = folder; name = tools; path = share/tools; sourceTree = ""; }; D031890915E36D9800D9CC39 /* base */ = {isa = PBXFileReference; lastKnownFileType = text; path = base; sourceTree = BUILT_PRODUCTS_DIR; }; D03EE83814DF88B200FC7150 /* lru.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = lru.h; sourceTree = ""; }; + D052D8091868F7FC003ABCBD /* parse_execution.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_execution.cpp; sourceTree = ""; }; + D052D80A1868F7FC003ABCBD /* parse_execution.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_execution.h; sourceTree = ""; }; D07B247215BCC15700D4ADB4 /* add-shell */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = "add-shell"; path = "build_tools/osx_package_scripts/add-shell"; sourceTree = ""; }; D07B247515BCC4BE00D4ADB4 /* install.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = install.sh; path = osx/install.sh; sourceTree = ""; }; D0879AC616BF9A1A00E98E56 /* fish_term_icon.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; name = fish_term_icon.icns; path = osx/fish_term_icon.icns; sourceTree = ""; }; + D08A328D17B4455100F3A533 /* fish_tests */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish_tests; sourceTree = BUILT_PRODUCTS_DIR; }; + D08A329317B4458D00F3A533 /* fish_tests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fish_tests.cpp; sourceTree = ""; }; D09B1C1914FC7B5B00F91077 /* postfork.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = postfork.cpp; sourceTree = ""; }; D09B1C1A14FC7B5B00F91077 /* postfork.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = postfork.h; sourceTree = ""; }; D0A0850313B3ACEE0099B651 /* builtin.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = builtin.h; sourceTree = ""; }; @@ -441,6 +498,8 @@ D0B6B0FE14E88BA400AD6C10 /* color.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = color.cpp; sourceTree = ""; }; D0B6B0FF14E88BA400AD6C10 /* color.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = color.h; sourceTree = ""; }; D0C4FD9415A7D7EE00212EF1 /* config.fish */ = {isa = PBXFileReference; lastKnownFileType = text; name = config.fish; path = etc/config.fish; sourceTree = ""; }; + D0C52F351765284C00BFAB82 /* parse_tree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_tree.cpp; sourceTree = ""; }; + D0C52F361765284C00BFAB82 /* parse_tree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_tree.h; sourceTree = ""; }; D0C6FCC914CFA4B0004CE8AD /* autoload.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = autoload.cpp; sourceTree = ""; }; D0C6FCCB14CFA4B7004CE8AD /* autoload.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = autoload.h; sourceTree = ""; }; D0C861EA16CC7054003B5A04 /* builtin_set_color.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_set_color.cpp; sourceTree = ""; }; @@ -457,11 +516,23 @@ D0D02AE415986537008E62BD /* fish_pager */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish_pager; sourceTree = BUILT_PRODUCTS_DIR; }; D0D02AFA159871B2008E62BD /* osx_fish_launcher.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = osx_fish_launcher.m; path = osx/osx_fish_launcher.m; sourceTree = ""; }; D0D2693C159835CA005D9B9C /* fish */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish; sourceTree = BUILT_PRODUCTS_DIR; }; + D0D9B2B318555D92001AE279 /* parse_constants.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = parse_constants.h; sourceTree = ""; }; D0F3373A1506DE3C00ECEFC0 /* builtin_test.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_test.cpp; sourceTree = ""; }; D0F5E28415A7A32D00315DFF /* config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = ""; }; + D0FE8EE6179CA8A5008C9F21 /* parse_productions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_productions.h; sourceTree = ""; }; + D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_productions.cpp; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ + D08A328A17B4455100F3A533 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + D08A32BD17B4474000F3A533 /* libiconv.dylib in Frameworks */, + D08A32BC17B4473B00F3A533 /* libncurses.dylib in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; D0D02AB915985EF9008E62BD /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; @@ -525,6 +596,13 @@ name = "Other Build Products"; sourceTree = ""; }; + D08A328E17B4455100F3A533 /* fish_tests */ = { + isa = PBXGroup; + children = ( + ); + path = fish_tests; + sourceTree = ""; + }; D0A084F013B3AC130099B651 = { isa = PBXGroup; children = ( @@ -534,6 +612,7 @@ D0D02A8E15983D5F008E62BD /* Libraries */, D0D02AAB15985C14008E62BD /* Resources */, D031890A15E36DB500D9CC39 /* Other Build Products */, + D08A328E17B4455100F3A533 /* fish_tests */, D0D2693215983562005D9B9C /* Products */, ); sourceTree = ""; @@ -582,6 +661,13 @@ D0A0853C13B3ACEE0099B651 /* exec.cpp */, D0A0850C13B3ACEE0099B651 /* expand.h */, D0A0853D13B3ACEE0099B651 /* expand.cpp */, + D0D9B2B318555D92001AE279 /* parse_constants.h */, + D0FE8EE6179CA8A5008C9F21 /* parse_productions.h */, + D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */, + D0C52F361765284C00BFAB82 /* parse_tree.h */, + D0C52F351765284C00BFAB82 /* parse_tree.cpp */, + D052D80A1868F7FC003ABCBD /* parse_execution.h */, + D052D8091868F7FC003ABCBD /* parse_execution.cpp */, D0A0850D13B3ACEE0099B651 /* fallback.h */, D0A0853E13B3ACEE0099B651 /* fallback.cpp */, D0A0850E13B3ACEE0099B651 /* function.h */, @@ -657,6 +743,7 @@ D0A0856613B3ACEE0099B651 /* xdgmimemagic.cpp */, D0A0852F13B3ACEE0099B651 /* xdgmimeparent.h */, D0A0856713B3ACEE0099B651 /* xdgmimeparent.cpp */, + D08A329317B4458D00F3A533 /* fish_tests.cpp */, ); name = Sources; sourceTree = ""; @@ -698,6 +785,7 @@ D0D02ABC15985EF9008E62BD /* fishd */, D0D02AD01598642A008E62BD /* fish_indent */, D0D02AE415986537008E62BD /* fish_pager */, + D08A328D17B4455100F3A533 /* fish_tests */, ); name = Products; sourceTree = ""; @@ -730,6 +818,23 @@ /* End PBXLegacyTarget section */ /* Begin PBXNativeTarget section */ + D08A328C17B4455100F3A533 /* fish_tests */ = { + isa = PBXNativeTarget; + buildConfigurationList = D08A329217B4455100F3A533 /* Build configuration list for PBXNativeTarget "fish_tests" */; + buildPhases = ( + D08A328917B4455100F3A533 /* Sources */, + D08A328A17B4455100F3A533 /* Frameworks */, + D08A328B17B4455100F3A533 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = fish_tests; + productName = fish_tests; + productReference = D08A328D17B4455100F3A533 /* fish_tests */; + productType = "com.apple.product-type.tool"; + }; D0D02A9915985A75008E62BD /* fish.app */ = { isa = PBXNativeTarget; buildConfigurationList = D0D02AA415985A75008E62BD /* Build configuration list for PBXNativeTarget "fish.app" */; @@ -839,6 +944,7 @@ D0D02ABB15985EF9008E62BD /* fishd */, D0D02ACF1598642A008E62BD /* fish_indent */, D0D02AE315986537008E62BD /* fish_pager */, + D08A328C17B4455100F3A533 /* fish_tests */, D0A564E6168CFDD800AF6161 /* man_pages */, D0A084F713B3AC130099B651 /* Makefile */, ); @@ -1019,6 +1125,52 @@ /* End PBXShellScriptBuildPhase section */ /* Begin PBXSourcesBuildPhase section */ + D08A328917B4455100F3A533 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + D08A32B917B446B100F3A533 /* parse_productions.cpp in Sources */, + D08A32BA17B446B100F3A533 /* parse_tree.cpp in Sources */, + D08A32A717B446A300F3A533 /* autoload.cpp in Sources */, + D08A32A817B446A300F3A533 /* builtin_test.cpp in Sources */, + D08A32A917B446A300F3A533 /* color.cpp in Sources */, + D08A32AA17B446A300F3A533 /* common.cpp in Sources */, + D08A32AB17B446A300F3A533 /* env_universal_common.cpp in Sources */, + D08A32AC17B446A300F3A533 /* env_universal.cpp in Sources */, + D08A32AD17B446A300F3A533 /* event.cpp in Sources */, + D08A32AE17B446A300F3A533 /* input_common.cpp in Sources */, + D08A32AF17B446A300F3A533 /* intern.cpp in Sources */, + D08A32B017B446A300F3A533 /* io.cpp in Sources */, + D08A32B117B446A300F3A533 /* iothread.cpp in Sources */, + D08A32B217B446A300F3A533 /* output.cpp in Sources */, + D08A32B317B446A300F3A533 /* parse_util.cpp in Sources */, + D08A32B417B446A300F3A533 /* parser_keywords.cpp in Sources */, + D08A32B517B446A300F3A533 /* path.cpp in Sources */, + D08A32B617B446A300F3A533 /* postfork.cpp in Sources */, + D08A32B717B446A300F3A533 /* screen.cpp in Sources */, + D08A32B817B446A300F3A533 /* signal.cpp in Sources */, + D08A32A617B4464300F3A533 /* input.cpp in Sources */, + D08A329717B4463B00F3A533 /* complete.cpp in Sources */, + D08A329817B4463B00F3A533 /* env.cpp in Sources */, + D08A329917B4463B00F3A533 /* exec.cpp in Sources */, + D08A329A17B4463B00F3A533 /* expand.cpp in Sources */, + D08A329B17B4463B00F3A533 /* highlight.cpp in Sources */, + D08A329C17B4463B00F3A533 /* history.cpp in Sources */, + D08A329D17B4463B00F3A533 /* kill.cpp in Sources */, + D08A329E17B4463B00F3A533 /* parser.cpp in Sources */, + D08A329F17B4463B00F3A533 /* proc.cpp in Sources */, + D08A32A017B4463B00F3A533 /* reader.cpp in Sources */, + D08A32A117B4463B00F3A533 /* sanity.cpp in Sources */, + D08A32A217B4463B00F3A533 /* tokenizer.cpp in Sources */, + D08A32A317B4463B00F3A533 /* wgetopt.cpp in Sources */, + D08A32A417B4463B00F3A533 /* wildcard.cpp in Sources */, + D08A32A517B4463B00F3A533 /* wutil.cpp in Sources */, + D08A329617B445FD00F3A533 /* builtin.cpp in Sources */, + D08A329417B4458D00F3A533 /* fish_tests.cpp in Sources */, + D08A329517B445C200F3A533 /* function.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; D0D02AB815985EF9008E62BD /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; @@ -1075,6 +1227,7 @@ D0D02A83159839D5008E62BD /* iothread.cpp in Sources */, D0D02A84159839D5008E62BD /* parse_util.cpp in Sources */, D0D02A85159839D5008E62BD /* path.cpp in Sources */, + D052D80B1868F7FC003ABCBD /* parse_execution.cpp in Sources */, D0D02A86159839D5008E62BD /* postfork.cpp in Sources */, D0D02A87159839D5008E62BD /* screen.cpp in Sources */, D0D02A88159839D5008E62BD /* signal.cpp in Sources */, @@ -1102,6 +1255,8 @@ D0D02A7A15983916008E62BD /* env_universal.cpp in Sources */, D0D02A7B15983928008E62BD /* env_universal_common.cpp in Sources */, D0D02A89159839DF008E62BD /* fish.cpp in Sources */, + D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */, + D0FE8EE8179FB760008C9F21 /* parse_productions.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -1329,6 +1484,74 @@ }; name = Release; }; + D08A328F17B4455100F3A533 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_64_BIT)"; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_UNINITIALIZED_AUTOS = YES; + MACOSX_DEPLOYMENT_TARGET = 10.8; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + D08A329017B4455100F3A533 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_64_BIT)"; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + MACOSX_DEPLOYMENT_TARGET = 10.8; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; + D08A329117B4455100F3A533 /* Release_C++11 */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_64_BIT)"; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + MACOSX_DEPLOYMENT_TARGET = 10.8; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = "Release_C++11"; + }; D0A084F813B3AC130099B651 /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { @@ -1605,6 +1828,16 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; + D08A329217B4455100F3A533 /* Build configuration list for PBXNativeTarget "fish_tests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + D08A328F17B4455100F3A533 /* Debug */, + D08A329017B4455100F3A533 /* Release */, + D08A329117B4455100F3A533 /* Release_C++11 */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; D0A084F513B3AC130099B651 /* Build configuration list for PBXProject "fish" */ = { isa = XCConfigurationList; buildConfigurations = ( diff --git a/fish_tests.cpp b/fish_tests.cpp index e7047c4bc..fab103351 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -59,9 +59,38 @@ #include "iothread.h" #include "postfork.h" #include "signal.h" -#include "highlight.h" +#include "parse_tree.h" #include "parse_util.h" +static const char * const * s_arguments; +static int s_test_run_count = 0; + +/* Indicate if we should test the given function. Either we test everything (all arguments) or we run only tests that have a prefix in s_arguments */ +static bool should_test_function(const char *func_name) +{ + /* No args, test everything */ + bool result = false; + if (! s_arguments || ! s_arguments[0]) + { + result = true; + } + else + { + for (size_t i=0; s_arguments[i] != NULL; i++) + { + if (! strncmp(func_name, s_arguments[i], strlen(s_arguments[i]))) + { + /* Prefix match */ + result = true; + break; + } + } + } + if (result) + s_test_run_count++; + return result; +} + /** The number of tests to run */ @@ -410,6 +439,18 @@ static void test_tok() } } } + + /* Test redirection_type_for_string */ + if (redirection_type_for_string(L"<") != TOK_REDIRECT_IN) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"^") != TOK_REDIRECT_OUT) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L">") != TOK_REDIRECT_OUT) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>") != TOK_REDIRECT_OUT) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L">>") != TOK_REDIRECT_APPEND) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>>") != TOK_REDIRECT_APPEND) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>?") != TOK_REDIRECT_NOCLOB) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"9999999999999999>?") != TOK_NONE) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>&3") != TOK_REDIRECT_FD) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>|") != TOK_NONE) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); } static int test_fork_helper(void *unused) @@ -542,56 +583,85 @@ static void test_parser() parser_t parser(PARSER_TYPE_GENERAL, true); - say(L"Testing null input to parser"); - if (!parser.test(NULL)) - { - err(L"Null input to parser.test undetected"); - } - say(L"Testing block nesting"); - if (!parser.test(L"if; end")) + if (!parse_util_detect_errors(L"if; end")) { err(L"Incomplete if statement undetected"); } - if (!parser.test(L"if test; echo")) + if (!parse_util_detect_errors(L"if test; echo")) { err(L"Missing end undetected"); } - if (!parser.test(L"if test; end; end")) + if (!parse_util_detect_errors(L"if test; end; end")) { err(L"Unbalanced end undetected"); } say(L"Testing detection of invalid use of builtin commands"); - if (!parser.test(L"case foo")) + if (!parse_util_detect_errors(L"case foo")) { err(L"'case' command outside of block context undetected"); } - if (!parser.test(L"switch ggg; if true; case foo;end;end")) + if (!parse_util_detect_errors(L"switch ggg; if true; case foo;end;end")) { err(L"'case' command outside of switch block context undetected"); } - if (!parser.test(L"else")) + if (!parse_util_detect_errors(L"else")) { err(L"'else' command outside of conditional block context undetected"); } - if (!parser.test(L"else if")) + if (!parse_util_detect_errors(L"else if")) { err(L"'else if' command outside of conditional block context undetected"); } - if (!parser.test(L"if false; else if; end")) + if (!parse_util_detect_errors(L"if false; else if; end")) { err(L"'else if' missing command undetected"); } - if (!parser.test(L"break")) + if (!parse_util_detect_errors(L"break")) { err(L"'break' command outside of loop block context undetected"); } - if (!parser.test(L"exec ls|less") || !parser.test(L"echo|return")) + + if (parse_util_detect_errors(L"break --help")) + { + err(L"'break --help' incorrectly marked as error"); + } + + if (! parse_util_detect_errors(L"while false ; function foo ; break ; end ; end ")) + { + err(L"'break' command inside function allowed to break from loop outside it"); + } + + + if (!parse_util_detect_errors(L"exec ls|less") || !parse_util_detect_errors(L"echo|return")) { err(L"Invalid pipe command undetected"); } + + if (parse_util_detect_errors(L"for i in foo ; switch $i ; case blah ; break; end; end ")) + { + err(L"'break' command inside switch falsely reported as error"); + } + + if (parse_util_detect_errors(L"or cat | cat") || parse_util_detect_errors(L"and cat | cat")) + { + err(L"boolean command at beginning of pipeline falsely reported as error"); + } + + if (! parse_util_detect_errors(L"cat | and cat")) + { + err(L"'and' command in pipeline not reported as error"); + } + + if (! parse_util_detect_errors(L"cat | exec") || ! parse_util_detect_errors(L"exec | cat")) + { + err(L"'exec' command in pipeline not reported as error"); + } + + + say(L"Testing basic evaluation"); #if 0 @@ -605,6 +675,226 @@ static void test_parser() { err(L"Invalid block mode when evaluating undetected"); } + + /* Ensure that we don't crash on infinite self recursion and mutual recursion. These must use the principal parser because we cannot yet execute jobs on other parsers (!) */ + say(L"Testing recursion detection"); + parser_t::principal_parser().eval(L"function recursive ; recursive ; end ; recursive; ", io_chain_t(), TOP); +#if 0 + /* This is disabled since it produces a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */ + parser_t::principal_parser().eval(L"function recursive1 ; recursive2 ; end ; function recursive2 ; recursive1 ; end ; recursive1; ", io_chain_t(), TOP); +#endif +} + +/* Wait a while and then SIGINT the main thread */ +struct test_cancellation_info_t +{ + pthread_t thread; + double delay; +}; + +static int signal_main(test_cancellation_info_t *info) +{ + usleep(info->delay * 1E6); + pthread_kill(info->thread, SIGINT); + return 0; +} + +static void test_1_cancellation(const wchar_t *src) +{ + shared_ptr out_buff(io_buffer_t::create(false, STDOUT_FILENO)); + const io_chain_t io_chain(out_buff); + test_cancellation_info_t ctx = {pthread_self(), 0.25 /* seconds */ }; + iothread_perform(signal_main, (void (*)(test_cancellation_info_t *, int))NULL, &ctx); + parser_t::principal_parser().eval(src, io_chain, TOP); + out_buff->read(); + if (out_buff->out_buffer_size() != 0) + { + err(L"Expected 0 bytes in out_buff, but instead found %lu bytes\n", out_buff->out_buffer_size()); + } + iothread_drain_all(); +} + +static void test_cancellation() +{ + say(L"Testing Ctrl-C cancellation. If this hangs, that's a bug!"); + + /* Enable fish's signal handling here. We need to make this interactive for fish to install its signal handlers */ + proc_push_interactive(1); + signal_set_handlers(); + + /* This tests that we can correctly ctrl-C out of certain loop constructs, and that nothing gets printed if we do */ + + /* Here the command substitution is an infinite loop. echo never even gets its argument, so when we cancel we expect no output */ + test_1_cancellation(L"echo (while true ; echo blah ; end)"); + + fprintf(stderr, "."); + + /* Nasty infinite loop that doesn't actually execute anything */ + test_1_cancellation(L"echo (while true ; end) (while true ; end) (while true ; end)"); + fprintf(stderr, "."); + + test_1_cancellation(L"while true ; end"); + fprintf(stderr, "."); + + test_1_cancellation(L"for i in (while true ; end) ; end"); + fprintf(stderr, "."); + + fprintf(stderr, "\n"); + + /* Restore signal handling */ + proc_pop_interactive(); + signal_reset_handlers(); + + /* Ensure that we don't think we should cancel */ + reader_reset_interrupted(); +} + +static void test_indents() +{ + say(L"Testing indents"); + + // Here are the components of our source and the indents we expect those to be + struct indent_component_t { + const wchar_t *txt; + int indent; + }; + + const indent_component_t components1[] = + { + {L"if foo", 0}, + {L"end", 0}, + {NULL, -1} + }; + + const indent_component_t components2[] = + { + {L"if foo", 0}, + {L"", 1}, //trailing newline! + {NULL, -1} + }; + + const indent_component_t components3[] = + { + {L"if foo", 0}, + {L"foo", 1}, + {L"end", 0}, //trailing newline! + {NULL, -1} + }; + + const indent_component_t components4[] = + { + {L"if foo", 0}, + {L"if bar", 1}, + {L"end", 1}, + {L"end", 0}, + {L"", 0}, + {NULL, -1} + }; + + const indent_component_t components5[] = + { + {L"if foo", 0}, + {L"if bar", 1}, + {L"", 2}, + {NULL, -1} + }; + + const indent_component_t components6[] = + { + {L"begin", 0}, + {L"foo", 1}, + {L"", 1}, + {NULL, -1} + }; + + const indent_component_t components7[] = + { + {L"begin; end", 0}, + {L"foo", 0}, + {L"", 0}, + {NULL, -1} + }; + + const indent_component_t components8[] = + { + {L"if foo", 0}, + {L"if bar", 1}, + {L"baz", 2}, + {L"end", 1}, + {L"", 1}, + {NULL, -1} + }; + + const indent_component_t components9[] = + { + {L"switch foo", 0}, + {L"", 1}, + {NULL, -1} + }; + + const indent_component_t components10[] = + { + {L"switch foo", 0}, + {L"case bar", 1}, + {L"case baz", 1}, + {L"quux", 2}, + {L"", 2}, + {NULL, -1} + }; + + const indent_component_t components11[] = + { + {L"switch foo", 0}, + {L"cas", 1}, //parse error indentation handling + {NULL, -1} + }; + + + + const indent_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8, components9, components10, components11}; + for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) + { + const indent_component_t *components = tests[which]; + // Count how many we have + size_t component_count = 0; + while (components[component_count].txt != NULL) + { + component_count++; + } + + // Generate the expected indents + wcstring text; + std::vector expected_indents; + for (size_t i=0; i < component_count; i++) + { + if (i > 0) + { + text.push_back(L'\n'); + expected_indents.push_back(components[i].indent); + } + text.append(components[i].txt); + expected_indents.resize(text.size(), components[i].indent); + } + assert(expected_indents.size() == text.size()); + + // Compute the indents + std::vector indents = parse_util_compute_indents(text); + + if (expected_indents.size() != indents.size()) + { + err(L"Indent vector has wrong size! Expected %lu, actual %lu", expected_indents.size(), indents.size()); + } + assert(expected_indents.size() == indents.size()); + for (size_t i=0; i < text.size(); i++) + { + if (expected_indents.at(i) != indents.at(i)) + { + err(L"Wrong indent at index %lu in test #%lu (expected %d, actual %d):\n%ls\n", i, which + 1, expected_indents.at(i), indents.at(i), text.c_str()); + break; //don't keep showing errors for the rest of the line + } + } + + } } static void test_utils() @@ -700,13 +990,13 @@ static int expand_test(const wchar_t *in, int flags, ...) size_t i=0; int res=1; wchar_t *arg; - + if (expand_string(in, output, flags)) { } - #if 0 + printf("input: %ls\n", in); for (size_t idx=0; idx < output.size(); idx++) { printf("%ls\n", output.at(idx).completion.c_str()); @@ -846,6 +1136,11 @@ static void test_abbreviations(void) expanded = reader_expand_abbreviation_in_command(L"of gc", wcslen(L"of gc"), &result); if (expanded) err(L"gc incorrectly expanded on line %ld", (long)__LINE__); + /* others should not be */ + expanded = reader_expand_abbreviation_in_command(L"command gc", wcslen(L"command gc"), &result); + if (expanded) err(L"gc incorrectly expanded on line %ld", (long)__LINE__); + + env_pop(); } @@ -1173,8 +1468,44 @@ static void test_complete(void) assert(completions.size() == 2); assert(completions.at(0).completion == L"$Foo1"); assert(completions.at(1).completion == L"$Bar1"); + + completions.clear(); + complete(L"echo (/bin/mkdi", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 1); + assert(completions.at(0).completion == L"r"); + + completions.clear(); + complete(L"echo (ls /bin/mkdi", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 1); + assert(completions.at(0).completion == L"r"); + + completions.clear(); + complete(L"echo (command ls /bin/mkdi", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 1); + assert(completions.at(0).completion == L"r"); + + /* Add a function and test completing it in various ways */ + struct function_data_t func_data; + func_data.name = L"scuttlebutt"; + func_data.definition = L"echo gongoozle"; + function_add(func_data, parser_t::principal_parser()); + /* Complete a function name */ + completions.clear(); + complete(L"echo (scuttlebut", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 1); + assert(completions.at(0).completion == L"t"); + /* But not with the command prefix */ + completions.clear(); + complete(L"echo (command scuttlebut", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 0); + + /* Not with the builtin prefix */ + completions.clear(); + complete(L"echo (builtin scuttlebut", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 0); + complete_set_variable_names(NULL); } @@ -1892,6 +2223,477 @@ void history_tests_t::test_history_speed(void) delete hist; } +static void test_new_parser_correctness(void) +{ + say(L"Testing new parser!"); + const struct parser_test_t + { + const wchar_t *src; + bool ok; + } + parser_tests[] = + { + {L"; ; ; ", true}, + {L"if ; end", false}, + {L"if true ; end", true}, + {L"if true; end ; end", false}, + {L"if end; end ; end", false}, + {L"if end", false}, + {L"end", false}, + {L"for i i", false}, + {L"for i in a b c ; end", true} + }; + + for (size_t i=0; i < sizeof parser_tests / sizeof *parser_tests; i++) + { + const parser_test_t *test = &parser_tests[i]; + + parse_node_tree_t parse_tree; + bool success = parse_tree_from_string(test->src, parse_flag_none, &parse_tree, NULL); + say(L"%lu / %lu: Parse \"%ls\": %s", i+1, sizeof parser_tests / sizeof *parser_tests, test->src, success ? "yes" : "no"); + if (success && ! test->ok) + { + err(L"\"%ls\" should NOT have parsed, but did", test->src); + } + else if (! success && test->ok) + { + err(L"\"%ls\" should have parsed, but failed", test->src); + } + } + say(L"Parse tests complete"); +} + +/* Given that we have an array of 'fuzz_count' strings, we wish to enumerate all permutations of 'len' values. We do this by incrementing an integer, interpreting it as "base fuzz_count". */ +static inline bool string_for_permutation(const wcstring *fuzzes, size_t fuzz_count, size_t len, size_t permutation, wcstring *out_str) +{ + out_str->clear(); + + size_t remaining_permutation = permutation; + for (size_t i=0; i < len; i++) + { + size_t idx = remaining_permutation % fuzz_count; + remaining_permutation /= fuzz_count; + + out_str->append(fuzzes[idx]); + out_str->push_back(L' '); + } + // Return false if we wrapped + return remaining_permutation == 0; +} + +static void test_new_parser_fuzzing(void) +{ + say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t)); + const wcstring fuzzes[] = + { + L"if", + L"else", + L"for", + L"in", + L"while", + L"begin", + L"function", + L"switch", + L"case", + L"end", + L"and", + L"or", + L"not", + L"command", + L"builtin", + L"foo", + L"|", + L"^", + L"&", + L";", + }; + + /* Generate a list of strings of all keyword / token combinations. */ + wcstring src; + src.reserve(128); + + parse_node_tree_t node_tree; + parse_error_list_t errors; + + double start = timef(); + bool log_it = true; + size_t max_len = 5; + for (size_t len = 0; len < max_len; len++) + { + if (log_it) + fprintf(stderr, "%lu / %lu...", len, max_len); + + /* We wish to look at all permutations of 4 elements of 'fuzzes' (with replacement). Construct an int and keep incrementing it. */ + size_t permutation = 0; + while (string_for_permutation(fuzzes, sizeof fuzzes / sizeof *fuzzes, len, permutation++, &src)) + { + parse_tree_from_string(src, parse_flag_continue_after_error, &node_tree, &errors); + } + if (log_it) + fprintf(stderr, "done (%lu)\n", permutation); + + } + double end = timef(); + if (log_it) + say(L"All fuzzed in %f seconds!", end - start); +} + +// Parse a statement, returning the command, args (joined by spaces), and the decoration. Returns true if successful. +static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *out_joined_args, enum parse_statement_decoration_t *out_deco) +{ + out_cmd->clear(); + out_joined_args->clear(); + *out_deco = parse_statement_decoration_none; + + bool result = false; + parse_node_tree_t tree; + if (parse_tree_from_string(src, parse_flag_none, &tree, NULL)) + { + /* Get the statement. Should only have one */ + const parse_node_tree_t::parse_node_list_t stmt_nodes = tree.find_nodes(tree.at(0), symbol_plain_statement); + if (stmt_nodes.size() != 1) + { + say(L"Unexpected number of statements (%lu) found in '%ls'", stmt_nodes.size(), src.c_str()); + return false; + } + const parse_node_t &stmt = *stmt_nodes.at(0); + + /* Return its decoration */ + *out_deco = tree.decoration_for_plain_statement(stmt); + + /* Return its command */ + tree.command_for_plain_statement(stmt, src, out_cmd); + + /* Return arguments separated by spaces */ + const parse_node_tree_t::parse_node_list_t arg_nodes = tree.find_nodes(stmt, symbol_argument); + for (size_t i=0; i < arg_nodes.size(); i++) + { + if (i > 0) out_joined_args->push_back(L' '); + out_joined_args->append(arg_nodes.at(i)->get_source(src)); + } + result = true; + } + return result; +} + +/* Test the LL2 (two token lookahead) nature of the parser by exercising the special builtin and command handling. In particular, 'command foo' should be a decorated statement 'foo' but 'command --help' should be an undecorated statement 'command' with argument '--help', and NOT attempt to run a command called '--help' */ +static void test_new_parser_ll2(void) +{ + say(L"Testing parser two-token lookahead"); + + const struct + { + wcstring src; + wcstring cmd; + wcstring args; + enum parse_statement_decoration_t deco; + } tests[] = + { + {L"echo hello", L"echo", L"hello", parse_statement_decoration_none}, + {L"command echo hello", L"echo", L"hello", parse_statement_decoration_command}, + {L"command command hello", L"command", L"hello", parse_statement_decoration_command}, + {L"builtin command hello", L"command", L"hello", parse_statement_decoration_builtin}, + {L"command --help", L"command", L"--help", parse_statement_decoration_none}, + {L"command -h", L"command", L"-h", parse_statement_decoration_none}, + {L"command", L"command", L"", parse_statement_decoration_none}, + {L"command -", L"command", L"-", parse_statement_decoration_none}, + {L"command --", L"command", L"--", parse_statement_decoration_none}, + {L"builtin --names", L"builtin", L"--names", parse_statement_decoration_none}, + {L"function", L"function", L"", parse_statement_decoration_none}, + {L"function --help", L"function", L"--help", parse_statement_decoration_none} + }; + + for (size_t i=0; i < sizeof tests / sizeof *tests; i++) + { + wcstring cmd, args; + enum parse_statement_decoration_t deco = parse_statement_decoration_none; + bool success = test_1_parse_ll2(tests[i].src, &cmd, &args, &deco); + if (! success) + err(L"Parse of '%ls' failed on line %ld", tests[i].cmd.c_str(), (long)__LINE__); + if (cmd != tests[i].cmd) + err(L"When parsing '%ls', expected command '%ls' but got '%ls' on line %ld", tests[i].src.c_str(), tests[i].cmd.c_str(), cmd.c_str(), (long)__LINE__); + if (args != tests[i].args) + err(L"When parsing '%ls', expected args '%ls' but got '%ls' on line %ld", tests[i].src.c_str(), tests[i].args.c_str(), args.c_str(), (long)__LINE__); + if (deco != tests[i].deco) + err(L"When parsing '%ls', expected decoration %d but got %d on line %ld", tests[i].src.c_str(), (int)tests[i].deco, (int)deco, (long)__LINE__); + } +} + +static void test_new_parser_ad_hoc() +{ + /* Very ad-hoc tests for issues encountered */ + say(L"Testing new parser ad hoc tests"); + + /* Ensure that 'case' terminates a job list */ + const wcstring src = L"switch foo ; case bar; case baz; end"; + parse_node_tree_t parse_tree; + bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, NULL); + if (! success) + { + err(L"Parsing failed"); + } + + /* Expect three case_item_lists: one for each case, and a terminal one. The bug was that we'd try to run a command 'case' */ + const parse_node_t &root = parse_tree.at(0); + const parse_node_tree_t::parse_node_list_t node_list = parse_tree.find_nodes(root, symbol_case_item_list); + if (node_list.size() != 3) + { + err(L"Expected 3 case item nodes, found %lu", node_list.size()); + } +} + +static void test_new_parser_errors(void) +{ + say(L"Testing new parser error reporting"); + const struct + { + const wchar_t *src; + parse_error_code_t code; + } + tests[] = + { + {L"echo (abc", parse_error_tokenizer}, + + {L"end", parse_error_unbalancing_end}, + {L"echo hi ; end", parse_error_unbalancing_end}, + + {L"else", parse_error_unbalancing_else}, + {L"if true ; end ; else", parse_error_unbalancing_else}, + + {L"case", parse_error_unbalancing_case}, + {L"if true ; case ; end", parse_error_unbalancing_case}, + + {L"foo || bar", parse_error_double_pipe}, + {L"foo && bar", parse_error_double_background}, + }; + + for (size_t i = 0; i < sizeof tests / sizeof *tests; i++) + { + const wcstring src = tests[i].src; + parse_error_code_t expected_code = tests[i].code; + + parse_error_list_t errors; + parse_node_tree_t parse_tree; + bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors); + if (success) + { + err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str()); + } + + if (errors.size() != 1) + { + err(L"Source '%ls' was expected to produce 1 error, but instead produced %lu errors", src.c_str(), errors.size()); + } + else if (errors.at(0).code != expected_code) + { + err(L"Source '%ls' was expected to produce error code %lu, but instead produced error code %lu", src.c_str(), expected_code, (unsigned long)errors.at(0).code); + for (size_t i=0; i < errors.size(); i++) + { + err(L"\t\t%ls", errors.at(i).describe(src).c_str()); + } + } + + } + +} + +static void test_highlighting(void) +{ + say(L"Testing syntax highlighting"); + if (system("mkdir -p /tmp/fish_highlight_test/")) err(L"mkdir failed"); + if (system("touch /tmp/fish_highlight_test/foo")) err(L"touch failed"); + if (system("touch /tmp/fish_highlight_test/bar")) err(L"touch failed"); + + // Here are the components of our source and the colors we expect those to be + struct highlight_component_t { + const wchar_t *txt; + int color; + }; + + const highlight_component_t components1[] = + { + {L"echo", HIGHLIGHT_COMMAND}, + {L"/tmp/fish_highlight_test/foo", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH}, + {L"&", HIGHLIGHT_END}, + {NULL, -1} + }; + + const highlight_component_t components2[] = + { + {L"command", HIGHLIGHT_COMMAND}, + {L"echo", HIGHLIGHT_COMMAND}, + {L"abc", HIGHLIGHT_PARAM}, + {L"/tmp/fish_highlight_test/foo", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH}, + {L"&", HIGHLIGHT_END}, + {NULL, -1} + }; + + const highlight_component_t components3[] = + { + {L"if command ls", HIGHLIGHT_COMMAND}, + {L"; ", HIGHLIGHT_END}, + {L"echo", HIGHLIGHT_COMMAND}, + {L"abc", HIGHLIGHT_PARAM}, + {L"; ", HIGHLIGHT_END}, + {L"/bin/definitely_not_a_command", HIGHLIGHT_ERROR}, + {L"; ", HIGHLIGHT_END}, + {L"end", HIGHLIGHT_COMMAND}, + {NULL, -1} + }; + + /* Verify that cd shows errors for non-directories */ + const highlight_component_t components4[] = + { + {L"cd", HIGHLIGHT_COMMAND}, + {L"/tmp/fish_highlight_test", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH}, + {NULL, -1} + }; + + const highlight_component_t components5[] = + { + {L"cd", HIGHLIGHT_COMMAND}, + {L"/tmp/fish_highlight_test/foo", HIGHLIGHT_ERROR}, + {NULL, -1} + }; + + const highlight_component_t components6[] = + { + {L"cd", HIGHLIGHT_COMMAND}, + {L"--help", HIGHLIGHT_PARAM}, + {L"-h", HIGHLIGHT_PARAM}, + {L"definitely_not_a_directory", HIGHLIGHT_ERROR}, + {NULL, -1} + }; + + // Command substitutions + const highlight_component_t components7[] = + { + {L"echo", HIGHLIGHT_COMMAND}, + {L"param1", HIGHLIGHT_PARAM}, + {L"(", HIGHLIGHT_OPERATOR}, + {L"ls", HIGHLIGHT_COMMAND}, + {L"param2", HIGHLIGHT_PARAM}, + {L")", HIGHLIGHT_OPERATOR}, + {NULL, -1} + }; + + // Redirections substitutions + const highlight_component_t components8[] = + { + {L"echo", HIGHLIGHT_COMMAND}, + {L"param1", HIGHLIGHT_PARAM}, + + /* Input redirection */ + {L"<", HIGHLIGHT_REDIRECTION}, + {L"/bin/echo", HIGHLIGHT_REDIRECTION}, + + /* Output redirection to a valid fd */ + {L"1>&2", HIGHLIGHT_REDIRECTION}, + + /* Output redirection to an invalid fd */ + {L"2>&", HIGHLIGHT_REDIRECTION}, + {L"LOL", HIGHLIGHT_ERROR}, + + /* Just a param, not a redirection */ + {L"/tmp/blah", HIGHLIGHT_PARAM}, + + /* Input redirection from directory */ + {L"<", HIGHLIGHT_REDIRECTION}, + {L"/tmp/", HIGHLIGHT_ERROR}, + + /* Output redirection to an invalid path */ + {L"3>", HIGHLIGHT_REDIRECTION}, + {L"/not/a/valid/path/nope", HIGHLIGHT_ERROR}, + + /* Output redirection to directory */ + {L"3>", HIGHLIGHT_REDIRECTION}, + {L"/tmp/nope/", HIGHLIGHT_ERROR}, + + + /* Redirections to overflow fd */ + {L"99999999999999999999>&2", HIGHLIGHT_ERROR}, + {L"2>&", HIGHLIGHT_REDIRECTION}, + {L"99999999999999999999", HIGHLIGHT_ERROR}, + + /* Output redirection containing a command substitution */ + {L"4>", HIGHLIGHT_REDIRECTION}, + {L"(", HIGHLIGHT_OPERATOR}, + {L"echo", HIGHLIGHT_COMMAND}, + {L"/tmp/somewhere", HIGHLIGHT_PARAM}, + {L")", HIGHLIGHT_OPERATOR}, + + /* Just another param */ + {L"param2", HIGHLIGHT_PARAM}, + {NULL, -1} + }; + + const highlight_component_t components9[] = + { + {L"end", HIGHLIGHT_ERROR}, + {L";", HIGHLIGHT_END}, + {L"if", HIGHLIGHT_COMMAND}, + {L"end", HIGHLIGHT_ERROR}, + {NULL, -1} + }; + + const highlight_component_t components10[] = + { + {L"echo", HIGHLIGHT_COMMAND}, + {L"'single_quote", HIGHLIGHT_ERROR}, + {NULL, -1} + }; + + + const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8, components9, components10}; + for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) + { + const highlight_component_t *components = tests[which]; + // Count how many we have + size_t component_count = 0; + while (components[component_count].txt != NULL) + { + component_count++; + } + + // Generate the text + wcstring text; + std::vector expected_colors; + for (size_t i=0; i < component_count; i++) + { + if (i > 0) + { + text.push_back(L' '); + expected_colors.push_back(0); + } + text.append(components[i].txt); + expected_colors.resize(text.size(), components[i].color); + } + assert(expected_colors.size() == text.size()); + + std::vector colors(text.size()); + highlight_shell(text, colors, 20, NULL, env_vars_snapshot_t()); + + if (expected_colors.size() != colors.size()) + { + err(L"Color vector has wrong size! Expected %lu, actual %lu", expected_colors.size(), colors.size()); + } + assert(expected_colors.size() == colors.size()); + for (size_t i=0; i < text.size(); i++) + { + // Hackish space handling. We don't care about the colors in spaces. + if (text.at(i) == L' ') + continue; + + if (expected_colors.at(i) != colors.at(i)) + { + const wcstring spaces(i, L' '); + err(L"Wrong color at index %lu in text (expected %#x, actual %#x):\n%ls\n%ls^", i, expected_colors.at(i), colors.at(i), text.c_str(), spaces.c_str()); + } + } + } + + system("rm -Rf /tmp/fish_highlight_test"); +} /** Main test @@ -1899,13 +2701,13 @@ void history_tests_t::test_history_speed(void) int main(int argc, char **argv) { setlocale(LC_ALL, ""); - srand(time(0)); + //srand(time(0)); configure_thread_assertions_for_testing(); program_name=L"(ignore)"; + s_arguments = argv + 1; say(L"Testing low-level functionality"); - say(L"Lines beginning with '(ignore):' are not errors, they are warning messages\ngenerated by the fish parser library when given broken input, and can be\nignored. All actual errors begin with 'Error:'."); set_main_thread(); setup_fork_guards(); proc_init(); @@ -1914,38 +2716,51 @@ int main(int argc, char **argv) builtin_init(); reader_init(); env_init(); + + /* Set default signal handlers, so we can ctrl-C out of this */ + signal_reset_handlers(); - test_unescape_sane(); - test_escape_crazy(); - test_format(); - test_convert(); - test_convert_nulls(); - test_tok(); - test_fork(); - test_iothread(); - test_parser(); - test_utils(); - test_escape_sequences(); - test_lru(); - test_expand(); - test_fuzzy_match(); - test_abbreviations(); - test_test(); - test_path(); - test_word_motion(); - test_is_potential_path(); - test_colors(); - test_complete(); - test_completion_insertions(); - test_autosuggestion_combining(); - test_autosuggest_suggest_special(); - history_tests_t::test_history(); - history_tests_t::test_history_merge(); - history_tests_t::test_history_races(); - history_tests_t::test_history_formats(); + if (should_test_function("highlighting")) test_highlighting(); + if (should_test_function("new_parser_ll2")) test_new_parser_ll2(); + if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); //fuzzing is expensive + if (should_test_function("new_parser_correctness")) test_new_parser_correctness(); + if (should_test_function("new_parser_ad_hoc")) test_new_parser_ad_hoc(); + if (should_test_function("new_parser_errors")) test_new_parser_errors(); + if (should_test_function("escape")) test_unescape_sane(); + if (should_test_function("escape")) test_escape_crazy(); + if (should_test_function("format")) test_format(); + if (should_test_function("convert")) test_convert(); + if (should_test_function("convert_nulls")) test_convert_nulls(); + if (should_test_function("tok")) test_tok(); + if (should_test_function("fork")) test_fork(); + if (should_test_function("iothread")) test_iothread(); + if (should_test_function("parser")) test_parser(); + if (should_test_function("cancellation")) test_cancellation(); + if (should_test_function("indents")) test_indents(); + if (should_test_function("utils")) test_utils(); + if (should_test_function("escape_sequences")) test_escape_sequences(); + if (should_test_function("lru")) test_lru(); + if (should_test_function("expand")) test_expand(); + if (should_test_function("fuzzy_match")) test_fuzzy_match(); + if (should_test_function("abbreviations")) test_abbreviations(); + if (should_test_function("test")) test_test(); + if (should_test_function("path")) test_path(); + if (should_test_function("word_motion")) test_word_motion(); + if (should_test_function("is_potential_path")) test_is_potential_path(); + if (should_test_function("colors")) test_colors(); + if (should_test_function("complete")) test_complete(); + if (should_test_function("completion_insertions")) test_completion_insertions(); + if (should_test_function("autosuggestion_combining")) test_autosuggestion_combining(); + if (should_test_function("autosuggest_suggest_special")) test_autosuggest_suggest_special(); + if (should_test_function("history")) history_tests_t::test_history(); + if (should_test_function("history_merge")) history_tests_t::test_history_merge(); + if (should_test_function("history_races")) history_tests_t::test_history_races(); + if (should_test_function("history_formats")) history_tests_t::test_history_formats(); //history_tests_t::test_history_speed(); say(L"Encountered %d errors in low-level tests", err_count); + if (s_test_run_count == 0) + say(L"*** No Tests Were Actually Run! ***"); /* Skip performance tests for now, since they seem to hang when running from inside make (?) diff --git a/function.cpp b/function.cpp index c559a686a..eadcca7c6 100644 --- a/function.cpp +++ b/function.cpp @@ -186,10 +186,15 @@ void function_add(const function_data_t &data, const parser_t &parser) /* Remove the old function */ function_remove(data.name); - /* Create and store a new function */ const wchar_t *filename = reader_current_filename(); - int def_offset = parser.line_number_of_character_at_offset(parser.current_block()->tok_pos) - 1; + + int def_offset = -1; + if (parser.current_block() != NULL) + { + def_offset = parser.line_number_of_character_at_offset(parser.current_block()->tok_pos); + } + const function_map_t::value_type new_pair(data.name, function_info_t(data, filename, def_offset, is_autoload)); loaded_functions.insert(new_pair); diff --git a/function.h b/function.h index e2896f18e..847c818b0 100644 --- a/function.h +++ b/function.h @@ -39,7 +39,7 @@ struct function_data_t /** Function definition */ - wchar_t *definition; + const wchar_t *definition; /** List of all event handlers for this function */ diff --git a/highlight.cpp b/highlight.cpp index fd7aa481e..32a8a27d7 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include "fallback.h" #include "util.h" @@ -34,6 +35,9 @@ #include "wildcard.h" #include "path.h" #include "history.h" +#include "parse_tree.h" + +#define CURSOR_POSITION_INVALID ((size_t)(-1)) /** Number of elements in the highlight_var array @@ -328,6 +332,28 @@ static bool is_potential_cd_path(const wcstring &path, const wcstring &working_d return result; } +/* Given a plain statement node in a parse tree, get the command and return it, expanded appropriately for commands. If we succeed, return true. */ +bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &plain_statement, wcstring *out_cmd) +{ + assert(plain_statement.type == symbol_plain_statement); + bool result = false; + + /* Get the command */ + wcstring cmd; + if (tree.command_for_plain_statement(plain_statement, src, &cmd)) + { + /* Try expanding it. If we cannot, it's an error. */ + if (expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) + { + /* Success, return the expanded string by reference */ + std::swap(cmd, *out_cmd); + result = true; + } + } + return result; +} + + rgb_color_t highlight_get_color(int highlight, bool is_background) { size_t idx=0; @@ -666,164 +692,74 @@ static void highlight_param(const wcstring &buffstr, std::vector &colors, w } } -static int has_expand_reserved(const wchar_t *str) +static bool has_expand_reserved(const wcstring &str) { - while (*str) + bool result = false; + for (size_t i=0; i < str.size(); i++) { - if (*str >= EXPAND_RESERVED && - *str <= EXPAND_RESERVED_END) + wchar_t wc = str.at(i); + if (wc >= EXPAND_RESERVED && wc <= EXPAND_RESERVED_END) { - return 1; + result = true; + break; } - str++; } - return 0; + return result; } -/* Parse a command line. Return by reference the last command, its arguments, and the offset in the string of the beginning of the last argument. This is used by autosuggestions */ -static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command, wcstring_list_t *out_arguments, int *out_last_arg_pos) +/* Parse a command line. Return by reference the last command, and the last argument to that command (as a copied node), if any. This is used by autosuggestions */ +static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expanded_command, parse_node_t *out_last_arg) { - if (str.empty()) - return false; - - wcstring cmd; - wcstring_list_t args; - int arg_pos = -1; - - bool had_cmd = false; - tokenizer_t tok(str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); - for (; tok_has_next(&tok); tok_next(&tok)) + bool result = false; + + /* Parse the buffer */ + parse_node_tree_t parse_tree; + parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); + + /* Find the last statement */ + const parse_node_t *last_statement = parse_tree.find_last_node_of_type(symbol_plain_statement, NULL); + if (last_statement != NULL) { - int last_type = tok_last_type(&tok); - - switch (last_type) + if (plain_statement_get_expanded_command(buff, parse_tree, *last_statement, out_expanded_command)) { - case TOK_STRING: + /* We got it */ + result = true; + + /* Find the last argument. If we don't get one, return an invalid node. */ + const parse_node_t *last_arg = parse_tree.find_last_node_of_type(symbol_argument, last_statement); + if (last_arg != NULL) { - if (had_cmd) - { - /* Parameter to the command. We store these escaped. */ - args.push_back(tok_last(&tok)); - arg_pos = tok_get_pos(&tok); - } - else - { - /* Command. First check that the command actually exists. */ - wcstring local_cmd = tok_last(&tok); - bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS); - if (! expanded || has_expand_reserved(cmd.c_str())) - { - /* We can't expand this cmd, ignore it */ - } - else - { - bool is_subcommand = false; - int mark = tok_get_pos(&tok); - - if (parser_keywords_is_subcommand(cmd)) - { - int sw; - tok_next(&tok); - - sw = parser_keywords_is_switch(tok_last(&tok)); - if (!parser_keywords_is_block(cmd) && - sw == ARG_SWITCH) - { - /* It's an argument to the subcommand itself */ - } - else - { - if (sw == ARG_SKIP) - mark = tok_get_pos(&tok); - is_subcommand = true; - } - tok_set_pos(&tok, mark); - } - - if (!is_subcommand) - { - /* It's really a command */ - had_cmd = true; - cmd = local_cmd; - } - } - - } - break; - } - - case TOK_REDIRECT_NOCLOB: - case TOK_REDIRECT_OUT: - case TOK_REDIRECT_IN: - case TOK_REDIRECT_APPEND: - case TOK_REDIRECT_FD: - { - if (!had_cmd) - { - break; - } - tok_next(&tok); - break; - } - - case TOK_PIPE: - case TOK_BACKGROUND: - case TOK_END: - { - had_cmd = false; - cmd.clear(); - args.clear(); - arg_pos = -1; - break; - } - - case TOK_COMMENT: - case TOK_ERROR: - default: - { - break; + *out_last_arg = *last_arg; } } } - - /* Remember our command if we have one */ - if (had_cmd) - { - if (out_command) out_command->swap(cmd); - if (out_arguments) out_arguments->swap(args); - if (out_last_arg_pos) *out_last_arg_pos = arg_pos; - } - return had_cmd; + return result; } - /* We have to return an escaped string here */ -bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_directory, wcstring &outSuggestion) +bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_directory, wcstring &out_suggestion) { if (str.empty()) return false; - + ASSERT_IS_BACKGROUND_THREAD(); /* Parse the string */ wcstring parsed_command; - wcstring_list_t parsed_arguments; - int parsed_last_arg_pos = -1; - if (! autosuggest_parse_command(str, &parsed_command, &parsed_arguments, &parsed_last_arg_pos)) - { + parse_node_t last_arg_node(token_type_invalid); + if (! autosuggest_parse_command(str, &parsed_command, &last_arg_node)) return false; - } bool result = false; - if (parsed_command == L"cd" && ! parsed_arguments.empty()) + if (parsed_command == L"cd" && last_arg_node.type == symbol_argument && last_arg_node.has_source()) { /* We can possibly handle this specially */ - const wcstring escaped_dir = parsed_arguments.back(); + const wcstring escaped_dir = last_arg_node.get_source(str); wcstring suggested_path; /* We always return true because we recognized the command. This prevents us from falling back to dumber algorithms; for example we won't suggest a non-directory for the cd command. */ result = true; - outSuggestion.clear(); + out_suggestion.clear(); /* Unescape the parameter */ wcstring unescaped_dir; @@ -837,16 +773,15 @@ bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_di path_flags_t path_flags = (quote == L'\0') ? PATH_EXPAND_TILDE : 0; if (unescaped && is_potential_cd_path(unescaped_dir, working_directory, path_flags, &suggested_path)) { - /* Note: this looks really wrong for strings that have an "unescapable" character in them, e.g. a \t, because parse_util_escape_string_with_quote will insert that character */ wcstring escaped_suggested_path = parse_util_escape_string_with_quote(suggested_path, quote); /* Return it */ - outSuggestion = str; - outSuggestion.erase(parsed_last_arg_pos); - if (quote != L'\0') outSuggestion.push_back(quote); - outSuggestion.append(escaped_suggested_path); - if (quote != L'\0') outSuggestion.push_back(quote); + out_suggestion = str; + out_suggestion.erase(last_arg_node.source_start); + if (quote != L'\0') out_suggestion.push_back(quote); + out_suggestion.append(escaped_suggested_path); + if (quote != L'\0') out_suggestion.push_back(quote); } } else @@ -864,15 +799,14 @@ bool autosuggest_validate_from_history(const history_item_t &item, file_detectio /* Parse the string */ wcstring parsed_command; - wcstring_list_t parsed_arguments; - int parsed_last_arg_pos = -1; - if (! autosuggest_parse_command(item.str(), &parsed_command, &parsed_arguments, &parsed_last_arg_pos)) + parse_node_t last_arg_node(token_type_invalid); + if (! autosuggest_parse_command(item.str(), &parsed_command, &last_arg_node)) return false; - if (parsed_command == L"cd" && ! parsed_arguments.empty()) + if (parsed_command == L"cd" && last_arg_node.type == symbol_argument && last_arg_node.has_source()) { /* We can possibly handle this specially */ - wcstring dir = parsed_arguments.back(); + wcstring dir = last_arg_node.get_source(item.str()); if (expand_one(dir, EXPAND_SKIP_CMDSUBST)) { handled = true; @@ -1026,7 +960,7 @@ static void tokenize(const wchar_t * const buff, std::vector &color, const */ cmd = tok_last(&tok); bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS); - if (! expanded || has_expand_reserved(cmd.c_str())) + if (! expanded || has_expand_reserved(cmd)) { color.at(tok_get_pos(&tok)) = HIGHLIGHT_ERROR; } @@ -1307,9 +1241,20 @@ static void tokenize(const wchar_t * const buff, std::vector &color, const } } +void highlight_shell(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) +{ + if (1) + { + highlight_shell_new_parser(buff, color, pos, error, vars); + } + else + { + highlight_shell_classic(buff, color, pos, error, vars); + } +} // PCA This function does I/O, (calls is_potential_path, path_get_path, maybe others) and so ought to only run on a background thread -void highlight_shell(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) +void highlight_shell_classic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) { ASSERT_IS_BACKGROUND_THREAD(); @@ -1441,7 +1386,815 @@ void highlight_shell(const wcstring &buff, std::vector &color, size_t pos, } } +/* This function is a disaster badly in need of refactoring. */ +static void color_argument_internal(const wcstring &buffstr, std::vector::iterator colors) +{ + const size_t buff_len = buffstr.size(); + std::fill(colors, colors + buff_len, HIGHLIGHT_PARAM); + enum {e_unquoted, e_single_quoted, e_double_quoted} mode = e_unquoted; + int bracket_count=0; + for (size_t in_pos=0; in_pos < buff_len; in_pos++) + { + const wchar_t c = buffstr.at(in_pos); + switch (mode) + { + case e_unquoted: + { + if (c == L'\\') + { + int fill_color = HIGHLIGHT_ESCAPE; //may be set to HIGHLIGHT_ERROR + const size_t backslash_pos = in_pos; + size_t fill_end = backslash_pos; + + // Move to the escaped character + in_pos++; + const wchar_t escaped_char = (in_pos < buff_len ? buffstr.at(in_pos) : L'\0'); + + if (escaped_char == L'\0') + { + fill_end = in_pos; + fill_color = HIGHLIGHT_ERROR; + } + else if (wcschr(L"~%", escaped_char)) + { + if (in_pos == 1) + { + fill_end = in_pos + 1; + } + } + else if (escaped_char == L',') + { + if (bracket_count) + { + fill_end = in_pos + 1; + } + } + else if (wcschr(L"abefnrtv*?$(){}[]'\"<>^ \\#;|&", escaped_char)) + { + fill_end = in_pos + 1; + } + else if (wcschr(L"c", escaped_char)) + { + // Like \ci. So highlight three characters + fill_end = in_pos + 1; + } + else if (wcschr(L"uUxX01234567", escaped_char)) + { + long long res=0; + int chars=2; + int base=16; + + wchar_t max_val = ASCII_MAX; + + switch (escaped_char) + { + case L'u': + { + chars=4; + max_val = UCS2_MAX; + in_pos++; + break; + } + + case L'U': + { + chars=8; + max_val = WCHAR_MAX; + in_pos++; + break; + } + + case L'x': + { + in_pos++; + break; + } + + case L'X': + { + max_val = BYTE_MAX; + in_pos++; + break; + } + + default: + { + // a digit like \12 + base=8; + chars=3; + break; + } + } + + // Consume + for (int i=0; i < chars && in_pos < buff_len; i++) + { + long d = convert_digit(buffstr.at(in_pos), base); + if (d < 0) + break; + res = (res * base) + d; + in_pos++; + } + //in_pos is now at the first character that could not be converted (or buff_len) + assert(in_pos >= backslash_pos && in_pos <= buff_len); + fill_end = in_pos; + + // It's an error if we exceeded the max value + if (res > max_val) + fill_color = HIGHLIGHT_ERROR; + + // Subtract one from in_pos, so that the increment in the loop will move to the next character + in_pos--; + } + assert(fill_end >= backslash_pos); + std::fill(colors + backslash_pos, colors + fill_end, fill_color); + } + else + { + // Not a backslash + switch (c) + { + case L'~': + case L'%': + { + if (in_pos == 0) + { + colors[in_pos] = HIGHLIGHT_OPERATOR; + } + break; + } + + case L'$': + { + assert(in_pos < buff_len); + int dollar_color = HIGHLIGHT_ERROR; + if (in_pos + 1 < buff_len) + { + wchar_t next = buffstr.at(in_pos + 1); + if (next == L'$' || wcsvarchr(next)) + dollar_color = HIGHLIGHT_OPERATOR; + } + colors[in_pos] = dollar_color; + break; + } + + + case L'*': + case L'?': + case L'(': + case L')': + { + colors[in_pos] = HIGHLIGHT_OPERATOR; + break; + } + + case L'{': + { + colors[in_pos] = HIGHLIGHT_OPERATOR; + bracket_count++; + break; + } + + case L'}': + { + colors[in_pos] = HIGHLIGHT_OPERATOR; + bracket_count--; + break; + } + + case L',': + { + if (bracket_count > 0) + { + colors[in_pos] = HIGHLIGHT_OPERATOR; + } + + break; + } + + case L'\'': + { + colors[in_pos] = HIGHLIGHT_QUOTE; + mode = e_single_quoted; + break; + } + + case L'\"': + { + colors[in_pos] = HIGHLIGHT_QUOTE; + mode = e_double_quoted; + break; + } + + } + } + break; + } + + /* + Mode 1 means single quoted string, i.e 'foo' + */ + case e_single_quoted: + { + colors[in_pos] = HIGHLIGHT_QUOTE; + if (c == L'\\') + { + // backslash + if (in_pos + 1 < buff_len) + { + const wchar_t escaped_char = buffstr.at(in_pos + 1); + if (escaped_char == L'\\' || escaped_char == L'\'') + { + colors[in_pos] = HIGHLIGHT_ESCAPE; //backslash + colors[in_pos + 1] = HIGHLIGHT_ESCAPE; //escaped char + in_pos += 1; //skip over backslash + } + } + } + else if (c == L'\'') + { + mode = e_unquoted; + } + break; + } + + /* + Mode 2 means double quoted string, i.e. "foo" + */ + case e_double_quoted: + { + colors[in_pos] = HIGHLIGHT_QUOTE; + switch (c) + { + case L'"': + { + mode = e_unquoted; + break; + } + + case L'\\': + { + // backslash + if (in_pos + 1 < buff_len) + { + const wchar_t escaped_char = buffstr.at(in_pos + 1); + if (escaped_char == L'\\' || escaped_char == L'\'' || escaped_char == L'$') + { + colors[in_pos] = HIGHLIGHT_ESCAPE; //backslash + colors[in_pos + 1] = HIGHLIGHT_ESCAPE; //escaped char + in_pos += 1; //skip over backslash + } + } + break; + } + + case L'$': + { + int dollar_color = HIGHLIGHT_ERROR; + if (in_pos + 1 < buff_len) + { + wchar_t next = buffstr.at(in_pos + 1); + if (next == L'$' || wcsvarchr(next)) + dollar_color = HIGHLIGHT_OPERATOR; + } + colors[in_pos] = dollar_color; + break; + } + + } + break; + } + } + } +} + +/* Syntax highlighter helper */ +class highlighter_t +{ + /* The string we're highlighting. Note this is a reference memmber variable (to avoid copying)! We must not outlive this! */ + const wcstring &buff; + + /* Cursor position */ + const size_t cursor_pos; + + /* Environment variables. Again, a reference member variable! */ + const env_vars_snapshot_t &vars; + + /* Working directory */ + const wcstring working_directory; + + /* The resulting colors */ + typedef std::vector color_array_t; + color_array_t color_array; + + /* The parse tree of the buff */ + parse_node_tree_t parse_tree; + + /* Color an argument */ + void color_argument(const parse_node_t &node); + + /* Color a redirection */ + void color_redirection(const parse_node_t &node); + + /* Color the arguments of the given node */ + void color_arguments(const parse_node_t &list_node); + + /* Color the redirections of the given node */ + void color_redirections(const parse_node_t &list_node); + + /* Color all the children of the command with the given type */ + void color_children(const parse_node_t &parent, parse_token_type_t type, int color); + + /* Colors the source range of a node with a given color */ + void color_node(const parse_node_t &node, int color); + + public: + + /* Constructor */ + highlighter_t(const wcstring &str, size_t pos, const env_vars_snapshot_t &ev, const wcstring &wd) : buff(str), cursor_pos(pos), vars(ev), working_directory(wd), color_array(str.size()) + { + /* Parse the tree */ + this->parse_tree.clear(); + parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL); + } + + /* Perform highlighting, returning an array of colors */ + const color_array_t &highlight(); +}; + +void highlighter_t::color_node(const parse_node_t &node, int color) +{ + // Can only color nodes with valid source ranges + if (! node.has_source()) + return; + + // Fill the color array with our color in the corresponding range + size_t source_end = node.source_start + node.source_length; + assert(source_end >= node.source_start); + assert(source_end <= color_array.size()); + + std::fill(this->color_array.begin() + node.source_start, this->color_array.begin() + source_end, color); +} + +/* node does not necessarily have type symbol_argument here */ +void highlighter_t::color_argument(const parse_node_t &node) +{ + if (! node.has_source()) + return; + + const wcstring arg_str = node.get_source(this->buff); + + /* Get an iterator to the colors associated with the argument */ + const size_t arg_start = node.source_start; + const color_array_t::iterator arg_colors = color_array.begin() + arg_start; + + /* Color this argument without concern for command substitutions */ + color_argument_internal(arg_str, arg_colors); + + /* Now do command substitutions */ + size_t cmdsub_cursor = 0, cmdsub_start = 0, cmdsub_end = 0; + wcstring cmdsub_contents; + while (parse_util_locate_cmdsubst_range(arg_str, &cmdsub_cursor, &cmdsub_contents, &cmdsub_start, &cmdsub_end, true /* accept incomplete */) > 0) + { + /* The cmdsub_start is the open paren. cmdsub_end is either the close paren or the end of the string. cmdsub_contents extends from one past cmdsub_start to cmdsub_end */ + assert(cmdsub_end > cmdsub_start); + assert(cmdsub_end - cmdsub_start - 1 == cmdsub_contents.size()); + + /* Found a command substitution. Compute the position of the start and end of the cmdsub contents, within our overall src. */ + const size_t arg_subcmd_start = arg_start + cmdsub_start, arg_subcmd_end = arg_start + cmdsub_end; + + /* Highlight the parens. The open paren must exist; the closed paren may not if it was incomplete. */ + assert(cmdsub_start < arg_str.size()); + this->color_array.at(arg_subcmd_start) = HIGHLIGHT_OPERATOR; + if (arg_subcmd_end < this->buff.size()) + this->color_array.at(arg_subcmd_end) = HIGHLIGHT_OPERATOR; + + /* Compute the cursor's position within the cmdsub. We must be past the open paren (hence >) but can be at the end of the string or closed paren (hence <=) */ + size_t cursor_subpos = CURSOR_POSITION_INVALID; + if (cursor_pos != CURSOR_POSITION_INVALID && cursor_pos > arg_subcmd_start && cursor_pos <= arg_subcmd_end) + { + /* The -1 because the cmdsub_contents does not include the open paren */ + cursor_subpos = cursor_pos - arg_subcmd_start - 1; + } + + /* Highlight it recursively. */ + highlighter_t cmdsub_highlighter(cmdsub_contents, cursor_subpos, this->vars, this->working_directory); + const color_array_t &subcolors = cmdsub_highlighter.highlight(); + + /* Copy out the subcolors back into our array */ + assert(subcolors.size() == cmdsub_contents.size()); + std::copy(subcolors.begin(), subcolors.end(), this->color_array.begin() + arg_subcmd_start + 1); + } +} + +// Indicates whether the source range of the given node forms a valid path in the given working_directory +static bool node_is_potential_path(const wcstring &src, const parse_node_t &node, const wcstring &working_directory) +{ + if (! node.has_source()) + return false; + + + /* Get the node source, unescape it, and then pass it to is_potential_path along with the working directory (as a one element list) */ + bool result = false; + wcstring token(src, node.source_start, node.source_length); + if (unescape_string_in_place(&token, UNESCAPE_SPECIAL)) + { + /* Big hack: is_potential_path expects a tilde, but unescape_string gives us HOME_DIRECTORY. Put it back. */ + if (! token.empty() && token.at(0) == HOME_DIRECTORY) + token.at(0) = L'~'; + + const wcstring_list_t working_directory_list(1, working_directory); + result = is_potential_path(token, working_directory_list, PATH_EXPAND_TILDE); + } + return result; +} + +// Color all of the arguments of the given command +void highlighter_t::color_arguments(const parse_node_t &list_node) +{ + /* Hack: determine whether the parent is the cd command, so we can show errors for non-directories */ + bool cmd_is_cd = false; + const parse_node_t *parent = this->parse_tree.get_parent(list_node, symbol_plain_statement); + if (parent != NULL) + { + wcstring cmd_str; + if (plain_statement_get_expanded_command(this->buff, this->parse_tree, *parent, &cmd_str)) + { + cmd_is_cd = (cmd_str == L"cd"); + } + } + + /* Find all the arguments of this list */ + const parse_node_tree_t::parse_node_list_t nodes = this->parse_tree.find_nodes(list_node, symbol_argument); + + for (size_t i=0; i < nodes.size(); i++) + { + const parse_node_t *child = nodes.at(i); + assert(child != NULL && child->type == symbol_argument); + this->color_argument(*child); + + if (cmd_is_cd) + { + /* Mark this as an error if it's not 'help' and not a valid cd path */ + wcstring param = child->get_source(this->buff); + if (expand_one(param, EXPAND_SKIP_CMDSUBST)) + { + bool is_help = string_prefixes_string(param, L"--help") || string_prefixes_string(param, L"-h"); + if (!is_help && ! is_potential_cd_path(param, working_directory, PATH_EXPAND_TILDE, NULL)) + { + this->color_node(*child, HIGHLIGHT_ERROR); + } + } + } + } +} + +void highlighter_t::color_redirection(const parse_node_t &redirection_node) +{ + assert(redirection_node.type == symbol_redirection); + if (! redirection_node.has_source()) + return; + + const parse_node_t *redirection_primitive = this->parse_tree.get_child(redirection_node, 0, parse_token_type_redirection); //like 2> + const parse_node_t *redirection_target = this->parse_tree.get_child(redirection_node, 1, parse_token_type_string); //like &1 or file path + + if (redirection_primitive != NULL) + { + wcstring target; + const enum token_type redirect_type = this->parse_tree.type_for_redirection(redirection_node, this->buff, NULL, &target); + + /* We may get a TOK_NONE redirection type, e.g. if the redirection is invalid */ + this->color_node(*redirection_primitive, redirect_type == TOK_NONE ? HIGHLIGHT_ERROR : HIGHLIGHT_REDIRECTION); + + /* Check if the argument contains a command substitution. If so, highlight it as a param even though it's a command redirection, and don't try to do any other validation. */ + if (parse_util_locate_cmdsubst(target.c_str(), NULL, NULL, true) != 0) + { + if (redirection_target != NULL) + this->color_argument(*redirection_target); + } + else + { + /* No command substitution, so we can highlight the target file or fd. For example, disallow redirections into a non-existent directory */ + bool target_is_valid = true; + + if (! expand_one(target, EXPAND_SKIP_CMDSUBST)) + { + /* Could not be expanded */ + target_is_valid = false; + } + else + { + /* Ok, we successfully expanded our target. Now verify that it works with this redirection. We will probably need it as a path (but not in the case of fd redirections */ + const wcstring target_path = apply_working_directory(target, this->working_directory); + switch (redirect_type) + { + case TOK_REDIRECT_FD: + { + /* target should be an fd. It must be all digits, and must not overflow. fish_wcstoi returns INT_MAX on overflow; we could instead check errno to disambiguiate this from a real INT_MAX fd, but instead we just disallow that. */ + const wchar_t *target_cstr = target.c_str(); + wchar_t *end = NULL; + int fd = fish_wcstoi(target_cstr, &end, 10); + + /* The iswdigit check ensures there's no leading whitespace, the *end check ensures the entire string was consumed, and the numeric checks ensure the fd is at least zero and there was no overflow */ + target_is_valid = (iswdigit(target_cstr[0]) && *end == L'\0' && fd >= 0 && fd < INT_MAX); + } + break; + + case TOK_REDIRECT_IN: + { + /* Input redirections must have a readable non-directory */ + struct stat buf = {}; + target_is_valid = ! waccess(target_path, R_OK) && ! wstat(target_path, &buf) && ! S_ISDIR(buf.st_mode); + } + break; + + case TOK_REDIRECT_OUT: + case TOK_REDIRECT_APPEND: + case TOK_REDIRECT_NOCLOB: + { + /* Test whether the file exists, and whether it's writable (possibly after creating it). access() returns failure if the file does not exist. */ + bool file_exists = false, file_is_writable = false; + int err = 0; + + struct stat buf = {}; + if (wstat(target_path, &buf) < 0) + { + err = errno; + } + + if (string_suffixes_string(L"/", target)) + { + /* Redirections to things that are directories is definitely not allowed */ + file_exists = false; + file_is_writable = false; + } + else if (err == 0) + { + /* No err. We can write to it if it's not a directory and we have permission */ + file_exists = true; + file_is_writable = ! S_ISDIR(buf.st_mode) && ! waccess(target_path, W_OK); + } + else if (err == ENOENT) + { + /* File does not exist. Check if its parent directory is writable. */ + wcstring parent = wdirname(target_path); + + /* Ensure that the parent ends with the path separator. This will ensure that we get an error if the parent directory is not really a directory. */ + if (! string_suffixes_string(L"/", parent)) + parent.push_back(L'/'); + + /* Now the file is considered writable if the parent directory is writable */ + file_exists = false; + file_is_writable = (0 == waccess(parent, W_OK)); + } + else + { + /* Other errors we treat as not writable. This includes things like ENOTDIR. */ + file_exists = false; + file_is_writable = false; + } + + /* NOCLOB means that we must not overwrite files that exist */ + target_is_valid = file_is_writable && ! (file_exists && redirect_type == TOK_REDIRECT_NOCLOB); + } + break; + + default: + /* We should not get here, since the node was marked as a redirection, but treat it as an error for paranoia */ + target_is_valid = false; + break; + } + } + + if (redirection_target != NULL) + { + this->color_node(*redirection_target, target_is_valid ? HIGHLIGHT_REDIRECTION : HIGHLIGHT_ERROR); + } + } + } +} + +// Color all of the redirections of the given command +void highlighter_t::color_redirections(const parse_node_t &list_node) +{ + const parse_node_tree_t::parse_node_list_t nodes = this->parse_tree.find_nodes(list_node, symbol_redirection); + for (size_t i=0; i < nodes.size(); i++) + { + this->color_redirection(*nodes.at(i)); + } +} + +/* Color all the children of the command with the given type */ +void highlighter_t::color_children(const parse_node_t &parent, parse_token_type_t type, int color) +{ + for (node_offset_t idx=0; idx < parent.child_count; idx++) + { + const parse_node_t *child = this->parse_tree.get_child(parent, idx); + if (child != NULL && child->type == type) + { + this->color_node(*child, color); + } + } +} + +/* Determine if a command is valid */ +static bool command_is_valid(const wcstring &cmd, enum parse_statement_decoration_t decoration, const wcstring &working_directory, const env_vars_snapshot_t &vars) +{ + /* Determine which types we check, based on the decoration */ + bool builtin_ok = true, function_ok = true, abbreviation_ok = true, command_ok = true, implicit_cd_ok = true; + if (decoration == parse_statement_decoration_command) + { + builtin_ok = false; + function_ok = false; + abbreviation_ok = false; + command_ok = true; + implicit_cd_ok = false; + } + else if (decoration == parse_statement_decoration_builtin) + { + builtin_ok = true; + function_ok = false; + abbreviation_ok = false; + command_ok = false; + implicit_cd_ok = false; + } + + /* Check them */ + bool is_valid = false; + + /* Builtins */ + if (! is_valid && builtin_ok) + is_valid = builtin_exists(cmd); + + /* Functions */ + if (! is_valid && function_ok) + is_valid = function_exists_no_autoload(cmd, vars); + + /* Abbreviations */ + if (! is_valid && abbreviation_ok) + is_valid = expand_abbreviation(cmd, NULL); + + /* Regular commands */ + if (! is_valid && command_ok) + is_valid = path_get_path(cmd, NULL, vars); + + /* Implicit cd */ + if (! is_valid && implicit_cd_ok) + is_valid = path_can_be_implicit_cd(cmd, NULL, working_directory.c_str(), vars); + + /* Return what we got */ + return is_valid; +} + +const highlighter_t::color_array_t & highlighter_t::highlight() +{ + ASSERT_IS_BACKGROUND_THREAD(); + + const size_t length = buff.size(); + assert(this->buff.size() == this->color_array.size()); + + if (length == 0) + return color_array; + + /* Start out at zero */ + std::fill(this->color_array.begin(), this->color_array.end(), 0); + + /* Parse the buffer */ + parse_node_tree_t parse_tree; + parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL); + +#if 0 + const wcstring dump = parse_dump_tree(parse_tree, buff); + fprintf(stderr, "%ls\n", dump.c_str()); +#endif + + /* Walk the node tree */ + for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter) + { + const parse_node_t &node = *iter; + + switch (node.type) + { + // Color direct string descendants, e.g. 'for' and 'in'. + case symbol_for_header: + case symbol_while_header: + case symbol_begin_header: + case symbol_function_header: + case symbol_if_clause: + case symbol_else_clause: + case symbol_case_item: + case symbol_switch_statement: + case symbol_boolean_statement: + case symbol_decorated_statement: + case symbol_if_statement: + { + this->color_children(node, parse_token_type_string, HIGHLIGHT_COMMAND); + // Color the 'end' + this->color_children(node, symbol_end_command, HIGHLIGHT_COMMAND); + } + break; + + case parse_token_type_background: + case parse_token_type_end: + { + this->color_node(node, HIGHLIGHT_END); + } + break; + + case symbol_plain_statement: + { + // Get the decoration from the parent + enum parse_statement_decoration_t decoration = parse_tree.decoration_for_plain_statement(node); + + /* Color the command */ + const parse_node_t *cmd_node = parse_tree.get_child(node, 0, parse_token_type_string); + if (cmd_node != NULL && cmd_node->has_source()) + { + bool is_valid_cmd = false; + wcstring cmd(buff, cmd_node->source_start, cmd_node->source_length); + + /* Try expanding it. If we cannot, it's an error. */ + bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS); + if (expanded && ! has_expand_reserved(cmd)) + { + is_valid_cmd = command_is_valid(cmd, decoration, working_directory, vars); + } + this->color_node(*cmd_node, is_valid_cmd ? HIGHLIGHT_COMMAND : HIGHLIGHT_ERROR); + } + } + break; + + + case symbol_arguments_or_redirections_list: + case symbol_argument_list: + { + /* Only work on root lists, so that we don't re-color child lists */ + if (parse_tree.argument_list_is_root(node)) + { + this->color_arguments(node); + this->color_redirections(node); + } + } + break; + + case parse_special_type_parse_error: + case parse_special_type_tokenizer_error: + this->color_node(node, HIGHLIGHT_ERROR); + break; + + case parse_special_type_comment: + this->color_node(node, HIGHLIGHT_COMMENT); + break; + + default: + break; + } + } + + if (this->cursor_pos <= this->buff.size()) + { + /* If the cursor is over an argument, and that argument is a valid path, underline it */ + for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter) + { + const parse_node_t &node = *iter; + + /* Must be an argument with source */ + if (node.type != symbol_argument || ! node.has_source()) + continue; + + /* See if this node contains the cursor. We check <= source_length so that, when backspacing (and the cursor is just beyond the last token), we may still underline it */ + if (this->cursor_pos >= node.source_start && this->cursor_pos - node.source_start <= node.source_length) + { + /* See if this is a valid path */ + if (node_is_potential_path(buff, node, working_directory)) + { + /* It is, underline it. */ + for (size_t i=node.source_start; i < node.source_start + node.source_length; i++) + { + /* Don't color HIGHLIGHT_ERROR because it looks dorky. For example, trying to cd into a non-directory would show an underline and also red. */ + if (! (this->color_array.at(i) & HIGHLIGHT_ERROR)) + { + this->color_array.at(i) |= HIGHLIGHT_VALID_PATH; + } + } + } + } + } + } + + return color_array; +} + +void highlight_shell_new_parser(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) +{ + /* Do something sucky and get the current working directory on this background thread. This should really be passed in. */ + const wcstring working_directory = env_get_pwd_slash(); + + /* Highlight it! */ + highlighter_t highlighter(buff, pos, vars, working_directory); + color = highlighter.highlight(); +} /** Perform quote and parenthesis highlighting on the specified string. diff --git a/highlight.h b/highlight.h index 6747bba51..40a535518 100644 --- a/highlight.h +++ b/highlight.h @@ -84,6 +84,7 @@ struct file_detection_context_t; \param error a list in which a description of each error will be inserted. May be 0, in whcich case no error descriptions will be generated. */ void highlight_shell(const wcstring &buffstr, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); +void highlight_shell_new_parser(const wcstring &buffstr, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); /** Perform syntax highlighting for the text in buff. Matching quotes and paranthesis are highlighted. The result is @@ -133,5 +134,9 @@ enum typedef unsigned int path_flags_t; bool is_potential_path(const wcstring &const_path, const wcstring_list_t &directories, path_flags_t flags, wcstring *out_path = NULL); +/* For testing */ +void highlight_shell_classic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); +void highlight_shell_new_parser(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); + #endif diff --git a/history.h b/history.h index a19c88440..b9cfc85b0 100644 --- a/history.h +++ b/history.h @@ -61,6 +61,7 @@ public: { return contents; } + bool empty() const { return contents.empty(); diff --git a/parse_constants.h b/parse_constants.h new file mode 100644 index 000000000..104af27f4 --- /dev/null +++ b/parse_constants.h @@ -0,0 +1,259 @@ +/**\file parse_constants.h + + Constants used in the programmatic representation of fish code. +*/ + +#ifndef fish_parse_constants_h +#define fish_parse_constants_h + +#define PARSE_ASSERT(a) assert(a) +#define PARSER_DIE() do { fprintf(stderr, "Parser dying!\n"); exit_without_destructors(-1); } while (0) + + +enum parse_token_type_t +{ + token_type_invalid, + + // Non-terminal tokens + symbol_job_list, + symbol_job, + symbol_job_continuation, + symbol_statement, + symbol_block_statement, + symbol_block_header, + symbol_for_header, + symbol_while_header, + symbol_begin_header, + symbol_function_header, + + symbol_if_statement, + symbol_if_clause, + symbol_else_clause, + symbol_else_continuation, + + symbol_switch_statement, + symbol_case_item_list, + symbol_case_item, + + symbol_boolean_statement, + symbol_decorated_statement, + symbol_plain_statement, + symbol_arguments_or_redirections_list, + symbol_argument_or_redirection, + + symbol_argument_list, + + symbol_argument, + symbol_redirection, + + symbol_optional_background, + + symbol_end_command, + + // Terminal types + parse_token_type_string, + parse_token_type_pipe, + parse_token_type_redirection, + parse_token_type_background, + parse_token_type_end, + + // Special terminal type that means no more tokens forthcoming + parse_token_type_terminate, + + // Very special terminal types that don't appear in the production list + parse_special_type_parse_error, + parse_special_type_tokenizer_error, + parse_special_type_comment, + + FIRST_TERMINAL_TYPE = parse_token_type_string, + LAST_TERMINAL_TYPE = parse_token_type_terminate, + + LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate, + FIRST_PARSE_TOKEN_TYPE = parse_token_type_string +}; + +enum parse_keyword_t +{ + parse_keyword_none, + parse_keyword_if, + parse_keyword_else, + parse_keyword_for, + parse_keyword_in, + parse_keyword_while, + parse_keyword_begin, + parse_keyword_function, + parse_keyword_switch, + parse_keyword_case, + parse_keyword_end, + parse_keyword_and, + parse_keyword_or, + parse_keyword_not, + parse_keyword_command, + parse_keyword_builtin, + + LAST_KEYWORD = parse_keyword_builtin +}; + +/* Statement decorations. This matches the order of productions in decorated_statement */ +enum parse_statement_decoration_t +{ + parse_statement_decoration_none, + parse_statement_decoration_command, + parse_statement_decoration_builtin +}; + +/* Parse error code list */ +enum parse_error_code_t +{ + parse_error_none, + + /* Matching values from enum parser_error */ + parse_error_syntax, + parse_error_eval, + parse_error_cmdsubst, + + parse_error_generic, // unclassified error types + + parse_error_tokenizer, //tokenizer error + + parse_error_unbalancing_end, //end outside of block + parse_error_unbalancing_else, //else outside of if + parse_error_unbalancing_case, //case outside of switch + + parse_error_double_pipe, // foo || bar, has special error message + parse_error_double_background // foo && bar, has special error message +}; + +enum { + PARSER_TEST_ERROR = 1, + PARSER_TEST_INCOMPLETE = 2 +}; +typedef unsigned int parser_test_error_bits_t; + + +/** Maximum number of function calls. */ +#define FISH_MAX_STACK_DEPTH 128 + +/** Error message on a function that calls itself immediately */ +#define INFINITE_FUNC_RECURSION_ERR_MSG _( L"The function '%ls' calls itself immediately, which would result in an infinite loop.") + + +/** Error message on reaching maximum call stack depth */ +#define CALL_STACK_LIMIT_EXCEEDED_ERR_MSG _( L"The function call stack limit has been exceeded. Do you have an accidental infinite loop?") + +/** Error message when a non-string token is found when expecting a command name */ +#define CMD_OR_ERR_MSG _( L"Expected a command, but instead found a pipe. Did you mean 'COMMAND; or COMMAND'? See the help section for the 'or' builtin command by typing 'help or'.") + +/** Error message when a non-string token is found when expecting a command name */ +#define CMD_AND_ERR_MSG _( L"Expected a command, but instead found a '&'. Did you mean 'COMMAND; and COMMAND'? See the help section for the 'and' builtin command by typing 'help and'.") + +/** Error message when encountering an illegal command name */ +#define ILLEGAL_CMD_ERR_MSG _( L"Illegal command name '%ls'") + +/** Error message when encountering an illegal file descriptor */ +#define ILLEGAL_FD_ERR_MSG _( L"Illegal file descriptor in redirection '%ls'") + +/** Error message for wildcards with no matches */ +#define WILDCARD_ERR_MSG _( L"No matches for wildcard '%ls'.") + +/** Error when using break outside of loop */ +#define INVALID_BREAK_ERR_MSG _( L"break command while not inside of loop" ) + +/** Error when using continue outside of loop */ +#define INVALID_CONTINUE_ERR_MSG _( L"continue command while not inside of loop" ) + +/** Error when using return builtin outside of function definition */ +#define INVALID_RETURN_ERR_MSG _( L"'return' builtin command outside of function definition" ) + +/** Error message for Posix-style assignment: foo=bar */ +#define COMMAND_ASSIGN_ERR_MSG _( L"Unknown command '%ls'. Did you mean 'set %ls %ls'? See the help section on the set command by typing 'help set'.") + +/** + While block description +*/ +#define WHILE_BLOCK N_( L"'while' block" ) + +/** + For block description +*/ +#define FOR_BLOCK N_( L"'for' block" ) + +/** + Breakpoint block +*/ +#define BREAKPOINT_BLOCK N_( L"Block created by breakpoint" ) + + + +/** + If block description +*/ +#define IF_BLOCK N_( L"'if' conditional block" ) + + +/** + Function definition block description +*/ +#define FUNCTION_DEF_BLOCK N_( L"function definition block" ) + + +/** + Function invocation block description +*/ +#define FUNCTION_CALL_BLOCK N_( L"function invocation block" ) + +/** + Function invocation block description +*/ +#define FUNCTION_CALL_NO_SHADOW_BLOCK N_( L"function invocation block with no variable shadowing" ) + + +/** + Switch block description +*/ +#define SWITCH_BLOCK N_( L"'switch' block" ) + + +/** + Fake block description +*/ +#define FAKE_BLOCK N_( L"unexecutable block" ) + + +/** + Top block description +*/ +#define TOP_BLOCK N_( L"global root block" ) + + +/** + Command substitution block description +*/ +#define SUBST_BLOCK N_( L"command substitution block" ) + + +/** + Begin block description +*/ +#define BEGIN_BLOCK N_( L"'begin' unconditional block" ) + + +/** + Source block description +*/ +#define SOURCE_BLOCK N_( L"Block created by the . builtin" ) + +/** + Source block description +*/ +#define EVENT_BLOCK N_( L"event handler block" ) + + +/** + Unknown block description +*/ +#define UNKNOWN_BLOCK N_( L"unknown/invalid block" ) + + + +#endif diff --git a/parse_execution.cpp b/parse_execution.cpp new file mode 100644 index 000000000..d30f3b994 --- /dev/null +++ b/parse_execution.cpp @@ -0,0 +1,1476 @@ +/**\file parse_execution.cpp + + Provides the "linkage" between a parse_node_tree_t and actual execution structures (job_t, etc.) + + A note on error handling: fish has two kind of errors, fatal parse errors non-fatal runtime errors. A fatal error prevents execution of the entire file, while a non-fatal error skips that job. + + Non-fatal errors are printed as soon as they are encountered; otherwise you would have to wait for the execution to finish to see them. +*/ + +#include "parse_execution.h" +#include "parse_util.h" +#include "complete.h" +#include "wildcard.h" +#include "builtin.h" +#include "parser.h" +#include "expand.h" +#include "reader.h" +#include "wutil.h" +#include "exec.h" +#include "path.h" +#include + +/* These are the specific statement types that support redirections */ +static bool specific_statement_type_is_redirectable_block(const parse_node_t &node) +{ + return node.type == symbol_block_statement || node.type == symbol_if_statement || node.type == symbol_switch_statement; + +} + +parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &t, const wcstring &s, parser_t *p) : tree(t), src(s), parser(p), eval_level(0) +{ +} + +/* Utilities */ + +wcstring parse_execution_context_t::get_source(const parse_node_t &node) const +{ + return node.get_source(this->src); +} + +const parse_node_t *parse_execution_context_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const +{ + return this->tree.get_child(parent, which, expected_type); +} + +node_offset_t parse_execution_context_t::get_offset(const parse_node_t &node) const +{ + /* Get the offset of a node via pointer arithmetic, very hackish */ + const parse_node_t *addr = &node; + const parse_node_t *base = &this->tree.at(0); + assert(addr >= base); + node_offset_t offset = addr - base; + assert(offset < this->tree.size()); + assert(&tree.at(offset) == &node); + return offset; +} + +const parse_node_t *parse_execution_context_t::infinite_recursive_statement_in_job_list(const parse_node_t &job_list, wcstring *out_func_name) const +{ + assert(job_list.type == symbol_job_list); + /* + This is a bit fragile. It is a test to see if we are + inside of function call, but not inside a block in that + function call. If, in the future, the rules for what + block scopes are pushed on function invocation changes, + then this check will break. + */ + const block_t *current = parser->block_at_index(0), *parent = parser->block_at_index(1); + bool is_within_function_call = (current && parent && current->type() == TOP && parent->type() == FUNCTION_CALL); + if (! is_within_function_call) + { + return NULL; + } + + /* Check to see which function call is forbidden */ + if (parser->forbidden_function.empty()) + { + return NULL; + } + const wcstring &forbidden_function_name = parser->forbidden_function.back(); + + /* Get the first job in the job list. */ + const parse_node_t *first_job = tree.next_node_in_node_list(job_list, symbol_job, NULL); + if (first_job == NULL) + { + return NULL; + } + + /* Here's the statement node we find that's infinite recursive */ + const parse_node_t *infinite_recursive_statement = NULL; + + /* Get the list of statements */ + const parse_node_tree_t::parse_node_list_t statements = tree.specific_statements_for_job(*first_job); + + /* Find all the decorated statements. We are interested in statements with no decoration (i.e. not command, not builtin) whose command expands to the forbidden function */ + for (size_t i=0; i < statements.size(); i++) + { + /* We only care about decorated statements, not while statements, etc. */ + const parse_node_t &statement = *statements.at(i); + if (statement.type != symbol_decorated_statement) + { + continue; + } + + const parse_node_t &plain_statement = tree.find_child(statement, symbol_plain_statement); + if (tree.decoration_for_plain_statement(plain_statement) != parse_statement_decoration_none) + { + /* This statement has a decoration like 'builtin' or 'command', and therefore is not infinite recursion. In particular this is what enables 'wrapper functions' */ + continue; + } + + /* Ok, this is an undecorated plain statement. Get and expand its command */ + wcstring cmd; + tree.command_for_plain_statement(plain_statement, src, &cmd); + expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); + + if (cmd == forbidden_function_name) + { + /* This is it */ + infinite_recursive_statement = &statement; + if (out_func_name != NULL) + { + *out_func_name = forbidden_function_name; + } + break; + } + } + + assert(infinite_recursive_statement == NULL || infinite_recursive_statement->type == symbol_decorated_statement); + return infinite_recursive_statement; +} + +enum process_type_t parse_execution_context_t::process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const +{ + assert(plain_statement.type == symbol_plain_statement); + enum process_type_t process_type = EXTERNAL; + + /* Determine the process type, which depends on the statement decoration (command, builtin, etc) */ + enum parse_statement_decoration_t decoration = tree.decoration_for_plain_statement(plain_statement); + + /* Do the "exec hack" */ + if (decoration != parse_statement_decoration_command && cmd == L"exec") + { + /* Either 'builtin exec' or just plain 'exec', and definitely not 'command exec'. Note we don't allow overriding exec with a function. */ + process_type = INTERNAL_EXEC; + } + else if (decoration == parse_statement_decoration_command) + { + /* Always a command */ + process_type = EXTERNAL; + } + else if (decoration == parse_statement_decoration_builtin) + { + /* What happens if this builtin is not valid? */ + process_type = INTERNAL_BUILTIN; + } + else if (function_exists(cmd)) + { + process_type = INTERNAL_FUNCTION; + } + else if (builtin_exists(cmd)) + { + process_type = INTERNAL_BUILTIN; + } + else + { + process_type = EXTERNAL; + } + return process_type; +} + +bool parse_execution_context_t::should_cancel_execution(const block_t *block) const +{ + return cancellation_reason(block) != execution_cancellation_none; +} + +parse_execution_context_t::execution_cancellation_reason_t parse_execution_context_t::cancellation_reason(const block_t *block) const +{ + if (shell_is_exiting()) + { + return execution_cancellation_exit; + } + else if (parser && parser->cancellation_requested) + { + return execution_cancellation_skip; + } + else if (block && block->loop_status != LOOP_NORMAL) + { + /* Nasty hack - break and continue set the 'skip' flag as well as the loop status flag. */ + return execution_cancellation_loop_control; + } + else if (block && block->skip) + { + return execution_cancellation_skip; + } + else + { + return execution_cancellation_none; + } +} + +/* Return whether the job contains a single statement, of block type, with no redirections */ +bool parse_execution_context_t::job_is_simple_block(const parse_node_t &job_node) const +{ + assert(job_node.type == symbol_job); + + /* Must have one statement */ + const parse_node_t &statement = *get_child(job_node, 0, symbol_statement); + const parse_node_t &specific_statement = *get_child(statement, 0); + if (! specific_statement_type_is_redirectable_block(specific_statement)) + { + /* Not an appropriate block type */ + return false; + } + + + /* Must be no pipes */ + const parse_node_t &continuation = *get_child(job_node, 1, symbol_job_continuation); + if (continuation.child_count > 0) + { + /* Multiple statements in this job, so there's pipes involved */ + return false; + } + + /* Check for arguments and redirections. All of the above types have an arguments / redirections list. It must be empty. */ + const parse_node_t &args_and_redirections = tree.find_child(specific_statement, symbol_arguments_or_redirections_list); + if (args_and_redirections.child_count > 0) + { + /* Non-empty, we have an argument or redirection */ + return false; + } + + /* Ok, we are a simple block! */ + return true; +} + +parse_execution_result_t parse_execution_context_t::run_if_statement(const parse_node_t &statement) +{ + assert(statement.type == symbol_if_statement); + + /* Push an if block */ + if_block_t *ib = new if_block_t(); + ib->node_offset = this->get_offset(statement); + parser->push_block(ib); + + parse_execution_result_t result = parse_execution_success; + + /* We have a sequence of if clauses, with a final else, resulting in a single job list that we execute */ + const parse_node_t *job_list_to_execute = NULL; + const parse_node_t *if_clause = get_child(statement, 0, symbol_if_clause); + const parse_node_t *else_clause = get_child(statement, 1, symbol_else_clause); + for (;;) + { + if (should_cancel_execution(ib)) + { + result = parse_execution_cancelled; + break; + } + + assert(if_clause != NULL && else_clause != NULL); + const parse_node_t &condition = *get_child(*if_clause, 1, symbol_job); + + /* Check the condition. We treat parse_execution_errored here as failure, in accordance with historic behavior */ + parse_execution_result_t cond_ret = run_1_job(condition, ib); + bool take_branch = (cond_ret == parse_execution_success) && proc_get_last_status() == EXIT_SUCCESS; + + if (take_branch) + { + /* condition succeeded */ + job_list_to_execute = get_child(*if_clause, 3, symbol_job_list); + break; + } + else if (else_clause->child_count == 0) + { + /* 'if' condition failed, no else clause, we're done */ + job_list_to_execute = NULL; + break; + } + else + { + /* We have an 'else continuation' (either else-if or else) */ + const parse_node_t &else_cont = *get_child(*else_clause, 1, symbol_else_continuation); + assert(else_cont.production_idx < 2); + if (else_cont.production_idx == 0) + { + /* it's an 'else if', go to the next one */ + if_clause = get_child(else_cont, 0, symbol_if_clause); + else_clause = get_child(else_cont, 1, symbol_else_clause); + } + else + { + /* it's the final 'else', we're done */ + assert(else_cont.production_idx == 1); + job_list_to_execute = get_child(else_cont, 1, symbol_job_list); + break; + } + } + } + + /* Execute any job list we got */ + if (job_list_to_execute != NULL) + { + run_job_list(*job_list_to_execute, ib); + } + + /* Done */ + parser->pop_block(ib); + + return result; +} + +parse_execution_result_t parse_execution_context_t::run_begin_statement(const parse_node_t &header, const parse_node_t &contents) +{ + assert(header.type == symbol_begin_header); + assert(contents.type == symbol_job_list); + + /* Basic begin/end block. Push a scope block. */ + scope_block_t *sb = new scope_block_t(BEGIN); + parser->push_block(sb); + + /* Run the job list */ + parse_execution_result_t ret = run_job_list(contents, sb); + + /* Pop the block */ + parser->pop_block(sb); + + return ret; + } + +/* Define a function */ +parse_execution_result_t parse_execution_context_t::run_function_statement(const parse_node_t &header, const parse_node_t &contents) +{ + assert(header.type == symbol_function_header); + assert(contents.type == symbol_job_list); + parse_execution_result_t result = parse_execution_success; + + /* Get arguments */ + const parse_node_t *unmatched_wildcard = NULL; + wcstring_list_t argument_list = this->determine_arguments(header, &unmatched_wildcard); + + if (unmatched_wildcard != NULL) + { + report_unmatched_wildcard_error(*unmatched_wildcard); + result = parse_execution_errored; + } + + if (result == parse_execution_success) + { + const wcstring contents_str = get_source(contents); + wcstring error_str; + int err = define_function(*parser, argument_list, contents_str, &error_str); + proc_set_last_status(err); + + if (! error_str.empty()) + { + this->report_error(header, L"%ls", error_str.c_str()); + result = parse_execution_errored; + } + } + return result; + +} + +parse_execution_result_t parse_execution_context_t::run_block_statement(const parse_node_t &statement) +{ + assert(statement.type == symbol_block_statement); + + const parse_node_t &block_header = *get_child(statement, 0, symbol_block_header); //block header + const parse_node_t &header = *get_child(block_header, 0); //specific header type (e.g. for loop) + const parse_node_t &contents = *get_child(statement, 2, symbol_job_list); //block contents + + parse_execution_result_t ret = parse_execution_success; + switch (header.type) + { + case symbol_for_header: + ret = run_for_statement(header, contents); + break; + + case symbol_while_header: + ret = run_while_statement(header, contents); + break; + + case symbol_function_header: + ret = run_function_statement(header, contents); + break; + + case symbol_begin_header: + ret = run_begin_statement(header, contents); + break; + + default: + fprintf(stderr, "Unexpected block header: %ls\n", header.describe().c_str()); + PARSER_DIE(); + break; + } + + return ret; +} + +parse_execution_result_t parse_execution_context_t::run_for_statement(const parse_node_t &header, const parse_node_t &block_contents) +{ + assert(header.type == symbol_for_header); + assert(block_contents.type == symbol_job_list); + + /* Get the variable name: `for var_name in ...` */ + const parse_node_t &var_name_node = *get_child(header, 1, parse_token_type_string); + const wcstring for_var_name = get_source(var_name_node); + + /* Get the contents to iterate over. */ + const parse_node_t *unmatched_wildcard = NULL; + wcstring_list_t argument_list = this->determine_arguments(header, &unmatched_wildcard); + if (unmatched_wildcard != NULL) + { + return report_unmatched_wildcard_error(*unmatched_wildcard); + } + + parse_execution_result_t ret = parse_execution_success; + + for_block_t *fb = new for_block_t(for_var_name); + parser->push_block(fb); + + /* Note that we store the sequence of values in opposite order */ + std::reverse(argument_list.begin(), argument_list.end()); + fb->sequence = argument_list; + + /* Now drive the for loop. */ + while (! fb->sequence.empty()) + { + if (should_cancel_execution(fb)) + { + ret = parse_execution_cancelled; + break; + } + + const wcstring &for_variable = fb->variable; + const wcstring &val = fb->sequence.back(); + env_set(for_variable, val.c_str(), ENV_LOCAL); + fb->sequence.pop_back(); + fb->loop_status = LOOP_NORMAL; + fb->skip = 0; + + this->run_job_list(block_contents, fb); + + if (this->cancellation_reason(fb) == execution_cancellation_loop_control) + { + /* Handle break or continue */ + if (fb->loop_status == LOOP_CONTINUE) + { + /* Reset the loop state */ + fb->loop_status = LOOP_NORMAL; + fb->skip = false; + continue; + } + else if (fb->loop_status == LOOP_BREAK) + { + break; + } + } + } + + parser->pop_block(fb); + + return ret; +} + + +parse_execution_result_t parse_execution_context_t::run_switch_statement(const parse_node_t &statement) +{ + assert(statement.type == symbol_switch_statement); + parse_execution_result_t ret = parse_execution_success; + const parse_node_t *matching_case_item = NULL; + + parse_execution_result_t result = parse_execution_success; + + /* Get the switch variable */ + const parse_node_t &switch_value_node = *get_child(statement, 1, parse_token_type_string); + const wcstring switch_value = get_source(switch_value_node); + + /* Expand it */ + std::vector switch_values_expanded; + int expand_ret = expand_string(switch_value, switch_values_expanded, EXPAND_NO_DESCRIPTIONS); + switch (expand_ret) + { + case EXPAND_ERROR: + { + result = report_error(switch_value_node, + _(L"Could not expand string '%ls'"), + switch_value.c_str()); + break; + } + + case EXPAND_WILDCARD_NO_MATCH: + { + /* Store the node that failed to expand */ + report_error(switch_value_node, WILDCARD_ERR_MSG, switch_value.c_str()); + ret = parse_execution_errored; + break; + } + + case EXPAND_WILDCARD_MATCH: + case EXPAND_OK: + { + break; + } + } + + if (result == parse_execution_success && switch_values_expanded.size() != 1) + { + result = report_error(switch_value_node, + _(L"switch: Expected exactly one argument, got %lu\n"), + switch_values_expanded.size()); + } + const wcstring &switch_value_expanded = switch_values_expanded.at(0).completion; + + switch_block_t *sb = new switch_block_t(switch_value_expanded); + parser->push_block(sb); + + if (result == parse_execution_success) + { + /* Expand case statements */ + const parse_node_t *case_item_list = get_child(statement, 3, symbol_case_item_list); + + /* Loop while we don't have a match but do have more of the list */ + while (matching_case_item == NULL && case_item_list != NULL) + { + if (should_cancel_execution(sb)) + { + result = parse_execution_cancelled; + break; + } + + /* Get the next item and the remainder of the list */ + const parse_node_t *case_item = tree.next_node_in_node_list(*case_item_list, symbol_case_item, &case_item_list); + if (case_item == NULL) + { + /* No more items */ + break; + } + + /* Pull out the argument list */ + const parse_node_t &arg_list = *get_child(*case_item, 1, symbol_argument_list); + + /* Expand arguments. We explicitly ignore unmatched_wildcard. That is, a case item list may have a wildcard that fails to expand to anything. */ + const wcstring_list_t case_args = this->determine_arguments(arg_list, NULL); + + for (size_t i=0; i < case_args.size(); i++) + { + const wcstring &arg = case_args.at(i); + + /* Unescape wildcards so they can be expanded again */ + wchar_t *unescaped_arg = parse_util_unescape_wildcards(arg.c_str()); + bool match = wildcard_match(switch_value_expanded, unescaped_arg); + free(unescaped_arg); + + /* If this matched, we're done */ + if (match) + { + matching_case_item = case_item; + break; + } + } + } + } + + if (result == parse_execution_success && matching_case_item != NULL) + { + /* Success, evaluate the job list */ + const parse_node_t *job_list = get_child(*matching_case_item, 3, symbol_job_list); + result = this->run_job_list(*job_list, sb); + } + + parser->pop_block(sb); + + return result; +} + +parse_execution_result_t parse_execution_context_t::run_while_statement(const parse_node_t &header, const parse_node_t &block_contents) +{ + assert(header.type == symbol_while_header); + assert(block_contents.type == symbol_job_list); + + /* Push a while block */ + while_block_t *wb = new while_block_t(); + wb->status = WHILE_TEST_FIRST; + wb->node_offset = this->get_offset(header); + parser->push_block(wb); + + parse_execution_result_t ret = parse_execution_success; + + /* The condition and contents of the while loop, as a job and job list respectively */ + const parse_node_t &while_condition = *get_child(header, 1, symbol_job); + + /* Run while the condition is true */ + for (;;) + { + /* Check the condition */ + parse_execution_result_t cond_result = this->run_1_job(while_condition, wb); + + /* We only continue on successful execution and EXIT_SUCCESS */ + if (cond_result != parse_execution_success || proc_get_last_status() != EXIT_SUCCESS) + { + break; + } + + /* Check cancellation */ + if (this->should_cancel_execution(wb)) + { + ret = parse_execution_cancelled; + break; + } + + + /* The block ought to go inside the loop (see #1212) */ + this->run_job_list(block_contents, wb); + + if (this->cancellation_reason(wb) == execution_cancellation_loop_control) + { + /* Handle break or continue */ + if (wb->loop_status == LOOP_CONTINUE) + { + /* Reset the loop state */ + wb->loop_status = LOOP_NORMAL; + wb->skip = false; + continue; + } + else if (wb->loop_status == LOOP_BREAK) + { + break; + } + } + } + + /* Done */ + parser->pop_block(wb); + + return ret; +} + +/* Reports an error. Always returns parse_execution_errored, so you can assign the result to an 'errored' variable */ +parse_execution_result_t parse_execution_context_t::report_error(const parse_node_t &node, const wchar_t *fmt, ...) +{ + if (parser->show_errors) + { + /* Create an error */ + parse_error_t error; + error.source_start = node.source_start; + error.source_length = node.source_length; + error.code = parse_error_syntax; //hackish + + va_list va; + va_start(va, fmt); + error.text = vformat_string(fmt, va); + va_end(va); + + /* Get a backtrace */ + wcstring backtrace_and_desc; + const parse_error_list_t error_list = parse_error_list_t(1, error); + parser->get_backtrace(src, error_list, &backtrace_and_desc); + + fprintf(stderr, "%ls", backtrace_and_desc.c_str()); + } + + return parse_execution_errored; +} + +/* Reoports an unmatched wildcard error and returns parse_execution_errored */ +parse_execution_result_t parse_execution_context_t::report_unmatched_wildcard_error(const parse_node_t &unmatched_wildcard) +{ + proc_set_last_status(STATUS_UNMATCHED_WILDCARD); + /* For reasons I cannot explain, unmatched wildcards are only reported in interactive use. */ + if (get_is_interactive()) + { + return report_error(unmatched_wildcard, WILDCARD_ERR_MSG, get_source(unmatched_wildcard).c_str()); + } + else + { + return parse_execution_errored; + } +} + +/* Handle the case of command not found */ +void parse_execution_context_t::handle_command_not_found(const wcstring &cmd_str, const parse_node_t &statement_node, int err_code) +{ + assert(statement_node.type == symbol_plain_statement); + + /* We couldn't find the specified command. This is a non-fatal error. We want to set the exit status to 127, which is the standard number used by other shells like bash and zsh. */ + + const wchar_t * const cmd = cmd_str.c_str(); + const wchar_t * const equals_ptr = wcschr(cmd, L'='); + if (equals_ptr != NULL) + { + /* Try to figure out if this is a pure variable assignment (foo=bar), or if this appears to be running a command (foo=bar ruby...) */ + + const wcstring name_str = wcstring(cmd, equals_ptr - cmd); //variable name, up to the = + const wcstring val_str = wcstring(equals_ptr + 1); //variable value, past the = + + + const parse_node_tree_t::parse_node_list_t args = tree.find_nodes(statement_node, symbol_argument, 1); + + if (! args.empty()) + { + const wcstring argument = get_source(*args.at(0)); + + wcstring ellipsis_str = wcstring(1, ellipsis_char); + if (ellipsis_str == L"$") + ellipsis_str = L"..."; + + /* Looks like a command */ + this->report_error(statement_node, + _(L"Unknown command '%ls'. Did you mean to run %ls with a modified environment? Try 'env %ls=%ls %ls%ls'. See the help section on the set command by typing 'help set'."), + cmd, + argument.c_str(), + name_str.c_str(), + val_str.c_str(), + argument.c_str(), + ellipsis_str.c_str()); + } + else + { + this->report_error(statement_node, + COMMAND_ASSIGN_ERR_MSG, + cmd, + name_str.c_str(), + val_str.c_str()); + } + } + else if (cmd[0]==L'$' || cmd[0] == VARIABLE_EXPAND || cmd[0] == VARIABLE_EXPAND_SINGLE) + { + + const env_var_t val_wstr = env_get_string(cmd+1); + const wchar_t *val = val_wstr.missing() ? NULL : val_wstr.c_str(); + if (val) + { + this->report_error(statement_node, + _(L"Variables may not be used as commands. Instead, define a function like 'function %ls; %ls $argv; end' or use the eval builtin instead, like 'eval %ls'. See the help section for the function command by typing 'help function'."), + cmd+1, + val, + cmd, + cmd); + } + else + { + this->report_error(statement_node, + _(L"Variables may not be used as commands. Instead, define a function or use the eval builtin instead, like 'eval %ls'. See the help section for the function command by typing 'help function'."), + cmd, + cmd); + } + } + else if (wcschr(cmd, L'$')) + { + this->report_error(statement_node, + _(L"Commands may not contain variables. Use the eval builtin instead, like 'eval %ls'. See the help section for the eval command by typing 'help eval'."), + cmd, + cmd); + } + else if (err_code!=ENOENT) + { + this->report_error(statement_node, + _(L"The file '%ls' is not executable by this user"), + cmd?cmd:L"UNKNOWN"); + } + else + { + /* + Handle unrecognized commands with standard + command not found handler that can make better + error messages + */ + + wcstring_list_t event_args; + event_args.push_back(cmd_str); + event_fire_generic(L"fish_command_not_found", &event_args); + + /* Here we want to report an error (so it shows a backtrace), but with no text */ + this->report_error(statement_node, L""); + } + + /* Set the last proc status appropriately */ + proc_set_last_status(err_code==ENOENT?STATUS_UNKNOWN_COMMAND:STATUS_NOT_EXECUTABLE); +} + +/* Creates a 'normal' (non-block) process */ +parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t *job, process_t *proc, const parse_node_t &statement) +{ + assert(job != NULL); + assert(proc != NULL); + assert(statement.type == symbol_plain_statement); + + /* We may decide that a command should be an implicit cd */ + bool use_implicit_cd = false; + + /* Get the command. We expect to always get it here. */ + wcstring cmd; + bool got_cmd = tree.command_for_plain_statement(statement, src, &cmd); + assert(got_cmd); + + /* Expand it as a command. Return an error on failure. */ + bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); + if (! expanded) + { + report_error(statement, ILLEGAL_CMD_ERR_MSG, cmd.c_str()); + return parse_execution_errored; + } + + /* Determine the process type */ + enum process_type_t process_type = process_type_for_command(statement, cmd); + + /* Check for stack overflow */ + if (process_type == INTERNAL_FUNCTION && parser->forbidden_function.size() > FISH_MAX_STACK_DEPTH) + { + this->report_error(statement, CALL_STACK_LIMIT_EXCEEDED_ERR_MSG); + return parse_execution_errored; + } + + wcstring path_to_external_command; + if (process_type == EXTERNAL) + { + /* Determine the actual command. This may be an implicit cd. */ + bool has_command = path_get_path(cmd, &path_to_external_command); + + /* If there was no command, then we care about the value of errno after checking for it, to distinguish between e.g. no file vs permissions problem */ + const int no_cmd_err_code = errno; + + /* If the specified command does not exist, and is undecorated, try using an implicit cd. */ + if (! has_command && tree.decoration_for_plain_statement(statement) == parse_statement_decoration_none) + { + /* Implicit cd requires an empty argument and redirection list */ + const parse_node_t *args = get_child(statement, 1, symbol_arguments_or_redirections_list); + if (args->child_count == 0) + { + /* Ok, no arguments or redirections; check to see if the first argument is a directory */ + wcstring implicit_cd_path; + use_implicit_cd = path_can_be_implicit_cd(cmd, &implicit_cd_path); + } + } + + if (! has_command && ! use_implicit_cd) + { + /* No command */ + this->handle_command_not_found(cmd, statement, no_cmd_err_code); + return parse_execution_errored; + } + } + + /* The argument list and set of IO redirections that we will construct for the process */ + wcstring_list_t argument_list; + io_chain_t process_io_chain; + if (use_implicit_cd) + { + /* Implicit cd is simple */ + argument_list.push_back(L"cd"); + argument_list.push_back(cmd); + path_to_external_command.clear(); + + /* If we have defined a wrapper around cd, use it, otherwise use the cd builtin */ + process_type = function_exists(L"cd") ? INTERNAL_FUNCTION : INTERNAL_BUILTIN; + } + else + { + /* Form the list of arguments. The command is the first argument. TODO: count hack, where we treat 'count --help' as different from 'count $foo' that expands to 'count --help'. fish 1.x never successfully did this, but it tried to! */ + const parse_node_t *unmatched_wildcard = NULL; + argument_list = this->determine_arguments(statement, &unmatched_wildcard); + argument_list.insert(argument_list.begin(), cmd); + + /* If we were not able to expand any wildcards, here is the first one that failed */ + if (unmatched_wildcard != NULL) + { + job_set_flag(job, JOB_WILDCARD_ERROR, 1); + report_unmatched_wildcard_error(*unmatched_wildcard); + return parse_execution_errored; + } + + /* The set of IO redirections that we construct for the process */ + if (! this->determine_io_chain(statement, &process_io_chain)) + { + return parse_execution_errored; + } + + /* Determine the process type */ + process_type = process_type_for_command(statement, cmd); + } + + + /* Populate the process */ + proc->type = process_type; + proc->set_argv(argument_list); + proc->set_io_chain(process_io_chain); + proc->actual_cmd = path_to_external_command; + return parse_execution_success; +} + +/* Determine the list of arguments, expanding stuff. If we have a wildcard and none could be expanded, return the unexpandable wildcard node by reference. */ +wcstring_list_t parse_execution_context_t::determine_arguments(const parse_node_t &parent, const parse_node_t **out_unmatched_wildcard_node) +{ + wcstring_list_t argument_list; + + /* Whether we failed to match any wildcards, and succeeded in matching any wildcards */ + bool unmatched_wildcard = false, matched_wildcard = false; + + /* First node that failed to expand as a wildcard (if any) */ + const parse_node_t *unmatched_wildcard_node = NULL; + + /* Get all argument nodes underneath the statement */ + const parse_node_tree_t::parse_node_list_t argument_nodes = tree.find_nodes(parent, symbol_argument); + argument_list.reserve(argument_nodes.size()); + for (size_t i=0; i < argument_nodes.size(); i++) + { + const parse_node_t &arg_node = *argument_nodes.at(i); + + /* Expect all arguments to have source */ + assert(arg_node.has_source()); + const wcstring arg_str = arg_node.get_source(src); + + /* Expand this string */ + std::vector arg_expanded; + int expand_ret = expand_string(arg_str, arg_expanded, EXPAND_NO_DESCRIPTIONS); + switch (expand_ret) + { + case EXPAND_ERROR: + { + this->report_error(arg_node, + _(L"Could not expand string '%ls'"), + arg_str.c_str()); + break; + } + + case EXPAND_WILDCARD_NO_MATCH: + { + /* Store the node that failed to expand */ + unmatched_wildcard = true; + if (! unmatched_wildcard_node) + { + unmatched_wildcard_node = &arg_node; + } + break; + } + + case EXPAND_WILDCARD_MATCH: + { + matched_wildcard = true; + break; + } + + case EXPAND_OK: + { + break; + } + } + + /* Now copy over any expanded arguments */ + for (size_t i=0; i < arg_expanded.size(); i++) + { + argument_list.push_back(arg_expanded.at(i).completion); + } + } + + /* Return if we had a wildcard problem */ + if (out_unmatched_wildcard_node != NULL && unmatched_wildcard && ! matched_wildcard) + { + *out_unmatched_wildcard_node = unmatched_wildcard_node; + } + + return argument_list; +} + +bool parse_execution_context_t::determine_io_chain(const parse_node_t &statement_node, io_chain_t *out_chain) +{ + io_chain_t result; + bool errored = false; + + /* We are called with a statement of varying types. We require that the statement have an arguments_or_redirections_list child. */ + const parse_node_t &args_and_redirections_list = tree.find_child(statement_node, symbol_arguments_or_redirections_list); + + /* Get all redirection nodes underneath the statement */ + const parse_node_tree_t::parse_node_list_t redirect_nodes = tree.find_nodes(args_and_redirections_list, symbol_redirection); + for (size_t i=0; i < redirect_nodes.size(); i++) + { + const parse_node_t &redirect_node = *redirect_nodes.at(i); + + int source_fd = -1; /* source fd */ + wcstring target; /* file path or target fd */ + enum token_type redirect_type = tree.type_for_redirection(redirect_node, src, &source_fd, &target); + + /* PCA: I can't justify this EXPAND_SKIP_VARIABLES flag. It was like this when I got here. */ + bool target_expanded = expand_one(target, no_exec ? EXPAND_SKIP_VARIABLES : 0); + if (! target_expanded || target.empty()) + { + /* Should improve this error message */ + errored = report_error(redirect_node, + _(L"Invalid redirection target: %ls"), + target.c_str()); + } + + + /* Generate the actual IO redirection */ + shared_ptr new_io; + assert(redirect_type != TOK_NONE); + switch (redirect_type) + { + case TOK_REDIRECT_FD: + { + if (target == L"-") + { + new_io.reset(new io_close_t(source_fd)); + } + else + { + wchar_t *end = NULL; + errno = 0; + int old_fd = fish_wcstoi(target.c_str(), &end, 10); + if (old_fd < 0 || errno || *end) + { + errored = report_error(redirect_node, + _(L"Requested redirection to '%ls', which is not a valid file descriptor"), + target.c_str()); + } + else + { + new_io.reset(new io_fd_t(source_fd, old_fd)); + } + } + break; + } + + case TOK_REDIRECT_OUT: + case TOK_REDIRECT_APPEND: + case TOK_REDIRECT_IN: + case TOK_REDIRECT_NOCLOB: + { + int oflags = oflags_for_redirection_type(redirect_type); + io_file_t *new_io_file = new io_file_t(source_fd, target, oflags); + new_io.reset(new_io_file); + break; + } + + default: + { + // Should be unreachable + fprintf(stderr, "Unexpected redirection type %ld. aborting.\n", (long)redirect_type); + PARSER_DIE(); + break; + } + } + + /* Append the new_io if we got one */ + if (new_io.get() != NULL) + { + result.push_back(new_io); + } + } + + if (out_chain && ! errored) + { + std::swap(*out_chain, result); + } + return ! errored; +} + +parse_execution_result_t parse_execution_context_t::populate_boolean_process(job_t *job, process_t *proc, const parse_node_t &bool_statement) +{ + // Handle a boolean statement + bool skip_job = false; + assert(bool_statement.type == symbol_boolean_statement); + switch (bool_statement.production_idx) + { + // These magic numbers correspond to productions for boolean_statement + case 0: + // AND. Skip if the last job failed. + skip_job = (proc_get_last_status() != 0); + break; + + case 1: + // OR. Skip if the last job succeeded. + skip_job = (proc_get_last_status() == 0); + break; + + case 2: + // NOT. Negate it. + job_set_flag(job, JOB_NEGATE, !job_get_flag(job, JOB_NEGATE)); + break; + + default: + { + fprintf(stderr, "Unexpected production in boolean statement\n"); + PARSER_DIE(); + break; + } + } + + if (skip_job) + { + return parse_execution_skipped; + } + else + { + const parse_node_t &subject = *tree.get_child(bool_statement, 1, symbol_statement); + return this->populate_job_process(job, proc, subject); + } +} + +parse_execution_result_t parse_execution_context_t::populate_block_process(job_t *job, process_t *proc, const parse_node_t &statement_node) +{ + /* We handle block statements by creating INTERNAL_BLOCK_NODE, that will bounce back to us when it's time to execute them */ + assert(statement_node.type == symbol_block_statement || statement_node.type == symbol_if_statement || statement_node.type == symbol_switch_statement); + + /* The set of IO redirections that we construct for the process */ + io_chain_t process_io_chain; + bool errored = ! this->determine_io_chain(statement_node, &process_io_chain); + if (errored) + return parse_execution_errored; + + proc->type = INTERNAL_BLOCK_NODE; + proc->internal_block_node = this->get_offset(statement_node); + proc->set_io_chain(process_io_chain); + return parse_execution_success; +} + + +/* Returns a process_t allocated with new. It's the caller's responsibility to delete it (!) */ +parse_execution_result_t parse_execution_context_t::populate_job_process(job_t *job, process_t *proc, const parse_node_t &statement_node) +{ + assert(statement_node.type == symbol_statement); + assert(statement_node.child_count == 1); + + // Get the "specific statement" which is boolean / block / if / switch / decorated + const parse_node_t &specific_statement = *get_child(statement_node, 0); + + parse_execution_result_t result = parse_execution_success; + + switch (specific_statement.type) + { + case symbol_boolean_statement: + { + result = this->populate_boolean_process(job, proc, specific_statement); + break; + } + + case symbol_block_statement: + case symbol_if_statement: + case symbol_switch_statement: + { + result = this->populate_block_process(job, proc, specific_statement); + break; + } + + case symbol_decorated_statement: + { + /* Get the plain statement. It will pull out the decoration itself */ + const parse_node_t &plain_statement = tree.find_child(specific_statement, symbol_plain_statement); + result = this->populate_plain_process(job, proc, plain_statement); + break; + } + + default: + fprintf(stderr, "'%ls' not handled by new parser yet\n", specific_statement.describe().c_str()); + PARSER_DIE(); + break; + } + + return result; +} + + +parse_execution_result_t parse_execution_context_t::populate_job_from_job_node(job_t *j, const parse_node_t &job_node, const block_t *associated_block) +{ + assert(job_node.type == symbol_job); + + /* Tell the job what its command is */ + j->set_command(get_source(job_node)); + + /* We are going to construct process_t structures for every statement in the job. Get the first statement. */ + const parse_node_t *statement_node = get_child(job_node, 0, symbol_statement); + assert(statement_node != NULL); + + parse_execution_result_t result = parse_execution_success; + + /* Create processes. Each one may fail. */ + std::vector processes; + processes.push_back(new process_t()); + result = this->populate_job_process(j, processes.back(), *statement_node); + + /* Construct process_ts for job continuations (pipelines), by walking the list until we hit the terminal (empty) job continuation */ + const parse_node_t *job_cont = get_child(job_node, 1, symbol_job_continuation); + assert(job_cont != NULL); + while (result == parse_execution_success && job_cont->child_count > 0) + { + assert(job_cont->type == symbol_job_continuation); + + /* Handle the pipe, whose fd may not be the obvious stdout */ + const parse_node_t &pipe_node = *get_child(*job_cont, 0, parse_token_type_pipe); + int pipe_write_fd = fd_redirected_by_pipe(get_source(pipe_node)); + if (pipe_write_fd == -1) + { + result = report_error(pipe_node, ILLEGAL_FD_ERR_MSG, get_source(pipe_node).c_str()); + break; + } + processes.back()->pipe_write_fd = pipe_write_fd; + + /* Get the statement node and make a process from it */ + const parse_node_t *statement_node = get_child(*job_cont, 1, symbol_statement); + assert(statement_node != NULL); + + /* Store the new process (and maybe with an error) */ + processes.push_back(new process_t()); + result = this->populate_job_process(j, processes.back(), *statement_node); + + /* Get the next continuation */ + job_cont = get_child(*job_cont, 2, symbol_job_continuation); + assert(job_cont != NULL); + } + + /* Return what happened */ + if (result == parse_execution_success) + { + /* Link up the processes */ + assert(! processes.empty()); + j->first_process = processes.at(0); + for (size_t i=1 ; i < processes.size(); i++) + { + processes.at(i-1)->next = processes.at(i); + } + } + else + { + /* Clean up processes */ + for (size_t i=0; i < processes.size(); i++) + { + const process_t *proc = processes.at(i); + processes.at(i) = NULL; + delete proc; + } + } + return result; +} + +parse_execution_result_t parse_execution_context_t::run_1_job(const parse_node_t &job_node, const block_t *associated_block) +{ + parse_execution_result_t result = parse_execution_success; + + bool log_it = false; + if (log_it) + { + fprintf(stderr, "%s: %ls\n", __FUNCTION__, get_source(job_node).c_str()); + } + + + if (should_cancel_execution(associated_block)) + { + return parse_execution_cancelled; + } + + // Get terminal modes + struct termios tmodes = {}; + if (get_is_interactive()) + { + if (tcgetattr(STDIN_FILENO, &tmodes)) + { + // need real error handling here + wperror(L"tcgetattr"); + return parse_execution_errored; + } + } + + /* Increment the eval_level for the duration of this command */ + scoped_push saved_eval_level(&eval_level, eval_level + 1); + + /* When we encounter a block construct (e.g. while loop) in the general case, we create a "block process" that has a pointer to its source. This allows us to handle block-level redirections. However, if there are no redirections, then we can just jump into the block directly, which is significantly faster. */ + if (job_is_simple_block(job_node)) + { + const parse_node_t &statement = *get_child(job_node, 0, symbol_statement); + const parse_node_t &specific_statement = *get_child(statement, 0); + assert(specific_statement_type_is_redirectable_block(specific_statement)); + switch (specific_statement.type) + { + case symbol_block_statement: + return this->run_block_statement(specific_statement); + + case symbol_if_statement: + return this->run_if_statement(specific_statement); + + case symbol_switch_statement: + return this->run_switch_statement(specific_statement); + + default: + /* Other types should be impossible due to the specific_statement_type_is_redirectable_block check */ + PARSER_DIE(); + break; + } + } + + /* Profiling support */ + long long start_time = 0, parse_time = 0, exec_time = 0; + const bool do_profile = profile; + profile_item_t *profile_item = NULL; + if (do_profile) + { + profile_item = new profile_item_t(); + profile_item->skipped = 1; + profile_items.push_back(profile_item); + start_time = get_time(); + } + + job_t *j = new job_t(acquire_job_id(), block_io); + j->tmodes = tmodes; + job_set_flag(j, JOB_CONTROL, + (job_control_mode==JOB_CONTROL_ALL) || + ((job_control_mode == JOB_CONTROL_INTERACTIVE) && (get_is_interactive()))); + + job_set_flag(j, JOB_FOREGROUND, 1); + + job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL) \ + && (!is_subshell && !is_event)); + + job_set_flag(j, JOB_SKIP_NOTIFICATION, is_subshell \ + || is_block \ + || is_event \ + || (!get_is_interactive())); + + /* Populate the job. This may fail for reasons like command_not_found. If this fails, an error will have been printed */ + parse_execution_result_t pop_result = this->populate_job_from_job_node(j, job_node, associated_block); + + /* Clean up the job on failure or cancellation */ + bool populated_job = (pop_result == parse_execution_success); + if (! populated_job || this->should_cancel_execution(associated_block)) + { + delete j; + j = NULL; + populated_job = false; + } + + + /* Store time it took to 'parse' the command */ + if (do_profile) + { + parse_time = get_time(); + profile_item->cmd = j->command(); + profile_item->skipped=parser->current_block()->skip; + } + + if (populated_job) + { + /* Success. Give the job to the parser - it will clean it up. */ + parser->job_add(j); + parser->current_block()->job = j; + + /* Check to see if this contained any external commands */ + bool job_contained_external_command = false; + for (const process_t *proc = j->first_process; proc != NULL; proc = proc->next) + { + if (proc->type == EXTERNAL) + { + job_contained_external_command = true; + break; + } + } + + /* Actually execute the job */ + exec_job(*this->parser, j); + + /* Only external commands require a new fishd barrier */ + if (!job_contained_external_command) + { + set_proc_had_barrier(false); + } + } + + /* If the job was skipped, we pretend it ran anyways */ + if (result == parse_execution_skipped) + { + result = parse_execution_success; + } + + if (do_profile) + { + exec_time = get_time(); + profile_item->level=eval_level; + profile_item->parse = (int)(parse_time-start_time); + profile_item->exec=(int)(exec_time-parse_time); + profile_item->skipped = ! populated_job; + } + + /* Clean up jobs. */ + job_reap(0); + + /* All done */ + return result; +} + +parse_execution_result_t parse_execution_context_t::run_job_list(const parse_node_t &job_list_node, const block_t *associated_block) +{ + assert(job_list_node.type == symbol_job_list); + + parse_execution_result_t result = parse_execution_success; + const parse_node_t *job_list = &job_list_node; + while (job_list != NULL && ! should_cancel_execution(associated_block)) + { + assert(job_list->type == symbol_job_list); + + // Try pulling out a job + const parse_node_t *job = tree.next_node_in_node_list(*job_list, symbol_job, &job_list); + + if (job != NULL) + { + result = this->run_1_job(*job, associated_block); + } + } + + /* Returns the last job executed */ + return result; +} + +parse_execution_result_t parse_execution_context_t::eval_node_at_offset(node_offset_t offset, const block_t *associated_block, const io_chain_t &io) +{ + bool log_it = false; + + /* Don't ever expect to have an empty tree if this is called */ + assert(! tree.empty()); + assert(offset < tree.size()); + + /* Apply this block IO for the duration of this function */ + scoped_push block_io_push(&block_io, io); + + const parse_node_t &node = tree.at(offset); + + if (log_it) + { + fprintf(stderr, "eval node: %ls\n", get_source(node).c_str()); + } + + /* Currently, we only expect to execute the top level job list, or a block node. Assert that. */ + assert(node.type == symbol_job_list || specific_statement_type_is_redirectable_block(node)); + + enum parse_execution_result_t status = parse_execution_success; + switch (node.type) + { + case symbol_job_list: + { + /* We should only get a job list if it's the very first node. This is because this is the entry point for both top-level execution (the first node) and INTERNAL_BLOCK_NODE execution (which does block statements, but never job lists) */ + assert(offset == 0); + wcstring func_name; + const parse_node_t *infinite_recursive_node = this->infinite_recursive_statement_in_job_list(node, &func_name); + if (infinite_recursive_node != NULL) + { + /* We have an infinite recursion */ + this->report_error(*infinite_recursive_node, INFINITE_FUNC_RECURSION_ERR_MSG, func_name.c_str()); + status = parse_execution_errored; + } + else + { + /* No infinite recursion */ + status = this->run_job_list(node, associated_block); + } + break; + } + + case symbol_block_statement: + status = this->run_block_statement(node); + break; + + case symbol_if_statement: + status = this->run_if_statement(node); + break; + + case symbol_switch_statement: + status = this->run_switch_statement(node); + break; + + default: + /* In principle, we could support other node types. However we never expect to be passed them - see above. */ + fprintf(stderr, "Unexpected node %ls found in %s\n", node.describe().c_str(), __FUNCTION__); + PARSER_DIE(); + break; + } + + return status; +} diff --git a/parse_execution.h b/parse_execution.h new file mode 100644 index 000000000..6c022cb2b --- /dev/null +++ b/parse_execution.h @@ -0,0 +1,114 @@ +/**\file parse_execution.h + + Provides the "linkage" between a parse_node_tree_t and actual execution structures (job_t, etc.). +*/ + +#ifndef FISH_PARSE_EXECUTION_H +#define FISH_PARSE_EXECUTION_H + +#include "config.h" +#include "util.h" +#include "parse_tree.h" +#include "proc.h" + +class job_t; +struct profile_item_t; +struct block_t; + +enum parse_execution_result_t +{ + /* The job was successfully executed (though it have failed on its own). */ + parse_execution_success, + + /* The job did not execute due to some error (e.g. failed to wildcard expand). An error will have been printed and proc_last_status will have been set. */ + parse_execution_errored, + + /* The job was cancelled (e.g. Ctrl-C) */ + parse_execution_cancelled, + + /* The job was skipped (e.g. due to a not-taken 'and' command). This is a special return allowed only from the populate functions, not the run functions. */ + parse_execution_skipped +}; + +class parse_execution_context_t +{ + private: + const parse_node_tree_t tree; + const wcstring src; + io_chain_t block_io; + parser_t * const parser; + //parse_error_list_t errors; + + int eval_level; + std::vector profile_items; + + /* No copying allowed */ + parse_execution_context_t(const parse_execution_context_t&); + parse_execution_context_t& operator=(const parse_execution_context_t&); + + /* Should I cancel? */ + bool should_cancel_execution(const block_t *block) const; + + /* Ways that we can stop executing a block. These are in a sort of ascending order of importance, e.g. `exit` should trump `break` */ + enum execution_cancellation_reason_t + { + execution_cancellation_none, + execution_cancellation_loop_control, + execution_cancellation_skip, + execution_cancellation_exit + }; + execution_cancellation_reason_t cancellation_reason(const block_t *block) const; + + /* Report an error. Always returns true. */ + parse_execution_result_t report_error(const parse_node_t &node, const wchar_t *fmt, ...); + /* Wildcard error helper */ + parse_execution_result_t report_unmatched_wildcard_error(const parse_node_t &unmatched_wildcard); + + /* Command not found support */ + void handle_command_not_found(const wcstring &cmd, const parse_node_t &statement_node, int err_code); + + /* Utilities */ + wcstring get_source(const parse_node_t &node) const; + const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; + node_offset_t get_offset(const parse_node_t &node) const; + const parse_node_t *infinite_recursive_statement_in_job_list(const parse_node_t &job_list, wcstring *out_func_name) const; + + /* Indicates whether a job is a simple block (one block, no redirections) */ + bool job_is_simple_block(const parse_node_t &node) const; + + enum process_type_t process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const; + + /* These create process_t structures from statements */ + parse_execution_result_t populate_job_process(job_t *job, process_t *proc, const parse_node_t &statement_node); + parse_execution_result_t populate_boolean_process(job_t *job, process_t *proc, const parse_node_t &bool_statement); + parse_execution_result_t populate_plain_process(job_t *job, process_t *proc, const parse_node_t &statement); + parse_execution_result_t populate_block_process(job_t *job, process_t *proc, const parse_node_t &statement_node); + + /* These encapsulate the actual logic of various (block) statements. */ + parse_execution_result_t run_block_statement(const parse_node_t &statement); + parse_execution_result_t run_for_statement(const parse_node_t &header, const parse_node_t &contents); + parse_execution_result_t run_if_statement(const parse_node_t &statement); + parse_execution_result_t run_switch_statement(const parse_node_t &statement); + parse_execution_result_t run_while_statement(const parse_node_t &header, const parse_node_t &contents); + parse_execution_result_t run_function_statement(const parse_node_t &header, const parse_node_t &contents); + parse_execution_result_t run_begin_statement(const parse_node_t &header, const parse_node_t &contents); + + wcstring_list_t determine_arguments(const parse_node_t &parent, const parse_node_t **out_unmatched_wildcard_node); + + /* Determines the IO chain. Returns true on success, false on error */ + bool determine_io_chain(const parse_node_t &statement, io_chain_t *out_chain); + + parse_execution_result_t run_1_job(const parse_node_t &job_node, const block_t *associated_block); + parse_execution_result_t run_job_list(const parse_node_t &job_list_node, const block_t *associated_block); + parse_execution_result_t populate_job_from_job_node(job_t *j, const parse_node_t &job_node, const block_t *associated_block); + + public: + parse_execution_context_t(const parse_node_tree_t &t, const wcstring &s, parser_t *p); + + /* Start executing at the given node offset. Returns 0 if there was no error, 1 if there was an error */ + parse_execution_result_t eval_node_at_offset(node_offset_t offset, const block_t *associated_block, const io_chain_t &io); + +}; + + +#endif diff --git a/parse_productions.cpp b/parse_productions.cpp new file mode 100644 index 000000000..53a90a56a --- /dev/null +++ b/parse_productions.cpp @@ -0,0 +1,544 @@ +#include "parse_productions.h" + +using namespace parse_productions; +#define NO_PRODUCTION ((production_option_idx_t)(-1)) + +static bool production_is_empty(const production_t production) +{ + return production[0] == token_type_invalid; +} + +/* Empty productions are allowed but must be first. Validate that the given production is in the valid range, i.e. it is either not empty or there is a non-empty production after it */ +static bool production_is_valid(const production_options_t production_list, production_option_idx_t which) +{ + if (which < 0 || which >= MAX_PRODUCTIONS) + return false; + + bool nonempty_found = false; + for (int i=which; i < MAX_PRODUCTIONS; i++) + { + if (! production_is_empty(production_list[i])) + { + nonempty_found = true; + break; + } + } + return nonempty_found; +} + +#define PRODUCTIONS(sym) static const production_options_t productions_##sym +#define RESOLVE(sym) static production_option_idx_t resolve_##sym (const parse_token_t &token1, const parse_token_t &token2) +#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (const parse_token_t &input1, const parse_token_t &input2) { return 0; } + +#define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) + + +/* A job_list is a list of jobs, separated by semicolons or newlines */ +PRODUCTIONS(job_list) = +{ + {}, + {symbol_job, symbol_job_list}, + {parse_token_type_end, symbol_job_list} +}; + +RESOLVE(job_list) +{ + switch (token1.type) + { + case parse_token_type_string: + // some keywords are special + switch (token1.keyword) + { + case parse_keyword_end: + case parse_keyword_else: + case parse_keyword_case: + // End this job list + return 0; + + default: + // Normal string + return 1; + } + + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + return 1; + + case parse_token_type_end: + // Empty line + return 2; + + case parse_token_type_terminate: + // no more commands, just transition to empty + return 0; + + default: + return NO_PRODUCTION; + } +} + +/* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */ + +PRODUCTIONS(job) = +{ + {symbol_statement, symbol_job_continuation} +}; +RESOLVE_ONLY(job) + +PRODUCTIONS(job_continuation) = +{ + {}, + {parse_token_type_pipe, symbol_statement, symbol_job_continuation} +}; +RESOLVE(job_continuation) +{ + switch (token1.type) + { + case parse_token_type_pipe: + // Pipe, continuation + return 1; + + default: + // Not a pipe, no job continuation + return 0; + } +} + +/* A statement is a normal command, or an if / while / and etc */ +PRODUCTIONS(statement) = +{ + {symbol_boolean_statement}, + {symbol_block_statement}, + {symbol_if_statement}, + {symbol_switch_statement}, + {symbol_decorated_statement} +}; +RESOLVE(statement) +{ + // Go to decorated statements if the subsequent token looks like '--' + // If we are 'begin', then we expect to be invoked with no arguments. But if we are anything else, we require an argument, so do the same thing if the subsequent token is a line end. + if (token1.type == parse_token_type_string) + { + // If the next token looks like an option (starts with a dash), then parse it as a decorated statement + if (token2.has_dash_prefix) + { + return 4; + } + + // Likewise if the next token doesn't look like an argument at all. This corresponds to e.g. a "naked if". + bool naked_invocation_invokes_help = (token1.keyword != parse_keyword_begin && token1.keyword != parse_keyword_end); + if (naked_invocation_invokes_help && (token2.type == parse_token_type_end || token2.type == parse_token_type_terminate)) + { + return 4; + } + + } + + switch (token1.type) + { + case parse_token_type_string: + switch (token1.keyword) + { + case parse_keyword_and: + case parse_keyword_or: + case parse_keyword_not: + return 0; + + case parse_keyword_for: + case parse_keyword_while: + case parse_keyword_function: + case parse_keyword_begin: + return 1; + + case parse_keyword_if: + return 2; + + case parse_keyword_else: + return NO_PRODUCTION; + + case parse_keyword_switch: + return 3; + + case parse_keyword_end: + return NO_PRODUCTION; + + // 'in' is only special within a for_header + case parse_keyword_in: + case parse_keyword_none: + case parse_keyword_command: + case parse_keyword_builtin: + case parse_keyword_case: + return 4; + } + break; + + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + case parse_token_type_terminate: + return NO_PRODUCTION; + //parse_error(L"statement", token); + + default: + return NO_PRODUCTION; + } +} + +PRODUCTIONS(if_statement) = +{ + {symbol_if_clause, symbol_else_clause, symbol_end_command, symbol_arguments_or_redirections_list} +}; +RESOLVE_ONLY(if_statement) + +PRODUCTIONS(if_clause) = +{ + { KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, symbol_job_list } +}; +RESOLVE_ONLY(if_clause) + +PRODUCTIONS(else_clause) = +{ + { }, + { KEYWORD(parse_keyword_else), symbol_else_continuation } +}; +RESOLVE(else_clause) +{ + switch (token1.keyword) + { + case parse_keyword_else: + return 1; + default: + return 0; + } +} + +PRODUCTIONS(else_continuation) = +{ + {symbol_if_clause, symbol_else_clause}, + {parse_token_type_end, symbol_job_list} +}; +RESOLVE(else_continuation) +{ + switch (token1.keyword) + { + case parse_keyword_if: + return 0; + default: + return 1; + } +} + +PRODUCTIONS(switch_statement) = +{ + { KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, symbol_end_command, symbol_arguments_or_redirections_list} +}; +RESOLVE_ONLY(switch_statement) + +PRODUCTIONS(case_item_list) = +{ + {}, + {symbol_case_item, symbol_case_item_list}, + {parse_token_type_end, symbol_case_item_list} +}; +RESOLVE(case_item_list) +{ + if (token1.keyword == parse_keyword_case) return 1; + else if (token1.type == parse_token_type_end) return 2; //empty line + else return 0; +} + +PRODUCTIONS(case_item) = +{ + {KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, symbol_job_list} +}; +RESOLVE_ONLY(case_item) + +PRODUCTIONS(argument_list) = +{ + {}, + {symbol_argument, symbol_argument_list} +}; +RESOLVE(argument_list) +{ + switch (token1.type) + { + case parse_token_type_string: + return 1; + default: + return 0; + } +} + +PRODUCTIONS(block_statement) = +{ + {symbol_block_header, parse_token_type_end, symbol_job_list, symbol_end_command, symbol_arguments_or_redirections_list} +}; +RESOLVE_ONLY(block_statement) + +PRODUCTIONS(block_header) = +{ + {symbol_for_header}, + {symbol_while_header}, + {symbol_function_header}, + {symbol_begin_header} +}; +RESOLVE(block_header) +{ + switch (token1.keyword) + { + case parse_keyword_for: + return 0; + case parse_keyword_while: + return 1; + case parse_keyword_function: + return 2; + case parse_keyword_begin: + return 3; + default: + return NO_PRODUCTION; + } +} + +PRODUCTIONS(for_header) = +{ + {KEYWORD(parse_keyword_for), parse_token_type_string, KEYWORD(parse_keyword_in), symbol_argument_list} +}; +RESOLVE_ONLY(for_header) + +PRODUCTIONS(while_header) = +{ + {KEYWORD(parse_keyword_while), symbol_job} +}; +RESOLVE_ONLY(while_header) + +PRODUCTIONS(begin_header) = +{ + {KEYWORD(parse_keyword_begin)} +}; +RESOLVE_ONLY(begin_header) + +PRODUCTIONS(function_header) = +{ + {KEYWORD(parse_keyword_function), symbol_argument, symbol_argument_list} +}; +RESOLVE_ONLY(function_header) + +/* A boolean statement is AND or OR or NOT */ +PRODUCTIONS(boolean_statement) = +{ + {KEYWORD(parse_keyword_and), symbol_statement}, + {KEYWORD(parse_keyword_or), symbol_statement}, + {KEYWORD(parse_keyword_not), symbol_statement} +}; +RESOLVE(boolean_statement) +{ + switch (token1.keyword) + { + case parse_keyword_and: + return 0; + case parse_keyword_or: + return 1; + case parse_keyword_not: + return 2; + default: + return NO_PRODUCTION; + } +} + +PRODUCTIONS(decorated_statement) = +{ + {symbol_plain_statement}, + {KEYWORD(parse_keyword_command), symbol_plain_statement}, + {KEYWORD(parse_keyword_builtin), symbol_plain_statement}, +}; +RESOLVE(decorated_statement) +{ + /* If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the second token is not a string, then this is a naked 'command' and we should execute it as undecorated. */ + if (token2.type != parse_token_type_string || token2.has_dash_prefix) + { + return 0; + } + + switch (token1.keyword) + { + default: + return 0; + case parse_keyword_command: + return 1; + case parse_keyword_builtin: + return 2; + } +} + +PRODUCTIONS(plain_statement) = +{ + {parse_token_type_string, symbol_arguments_or_redirections_list, symbol_optional_background} +}; +RESOLVE_ONLY(plain_statement) + +PRODUCTIONS(arguments_or_redirections_list) = +{ + {}, + {symbol_argument_or_redirection, symbol_arguments_or_redirections_list} +}; +RESOLVE(arguments_or_redirections_list) +{ + switch (token1.type) + { + case parse_token_type_string: + case parse_token_type_redirection: + return 1; + default: + return 0; + } +} + +PRODUCTIONS(argument_or_redirection) = +{ + {symbol_argument}, + {symbol_redirection} +}; +RESOLVE(argument_or_redirection) +{ + switch (token1.type) + { + case parse_token_type_string: + return 0; + case parse_token_type_redirection: + return 1; + default: + return NO_PRODUCTION; + } +} + +PRODUCTIONS(argument) = +{ + {parse_token_type_string} +}; +RESOLVE_ONLY(argument) + +PRODUCTIONS(redirection) = +{ + {parse_token_type_redirection, parse_token_type_string} +}; +RESOLVE_ONLY(redirection) + +PRODUCTIONS(optional_background) = +{ + {}, + { parse_token_type_background } +}; + +RESOLVE(optional_background) +{ + switch (token1.type) + { + case parse_token_type_background: + return 1; + default: + return 0; + } +} + +PRODUCTIONS(end_command) = +{ + {KEYWORD(parse_keyword_end)} +}; +RESOLVE_ONLY(end_command) + +#define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break; +const production_t *parse_productions::production_for_token(parse_token_type_t node_type, const parse_token_t &input1, const parse_token_t &input2, production_option_idx_t *out_which_production, wcstring *out_error_text) +{ + bool log_it = false; + if (log_it) + { + fprintf(stderr, "Resolving production for %ls with input token <%ls>\n", token_type_description(node_type).c_str(), input1.describe().c_str()); + } + + /* Fetch the list of productions and the function to resolve them */ + const production_options_t *production_list = NULL; + production_option_idx_t (*resolver)(const parse_token_t &input1, const parse_token_t &input2) = NULL; + switch (node_type) + { + TEST(job_list) + TEST(job) + TEST(statement) + TEST(job_continuation) + TEST(boolean_statement) + TEST(block_statement) + TEST(if_statement) + TEST(if_clause) + TEST(else_clause) + TEST(else_continuation) + TEST(switch_statement) + TEST(decorated_statement) + TEST(case_item_list) + TEST(case_item) + TEST(argument_list) + TEST(block_header) + TEST(for_header) + TEST(while_header) + TEST(begin_header) + TEST(function_header) + TEST(plain_statement) + TEST(arguments_or_redirections_list) + TEST(argument_or_redirection) + TEST(argument) + TEST(redirection) + TEST(optional_background) + TEST(end_command) + + case parse_token_type_string: + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + case parse_token_type_end: + case parse_token_type_terminate: + fprintf(stderr, "Terminal token type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__); + PARSER_DIE(); + break; + + case parse_special_type_parse_error: + case parse_special_type_tokenizer_error: + case parse_special_type_comment: + fprintf(stderr, "Special type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__); + PARSER_DIE(); + break; + + + case token_type_invalid: + fprintf(stderr, "token_type_invalid passed to %s\n", __FUNCTION__); + PARSER_DIE(); + break; + + } + PARSE_ASSERT(production_list != NULL); + PARSE_ASSERT(resolver != NULL); + + const production_t *result = NULL; + production_option_idx_t which = resolver(input1, input2); + + if (log_it) + { + fprintf(stderr, "\tresolved to %u\n", (unsigned)which); + } + + + if (which == NO_PRODUCTION) + { + if (log_it) + { + fprintf(stderr, "Node type '%ls' has no production for input '%ls' (in %s)\n", token_type_description(node_type).c_str(), input1.describe().c_str(), __FUNCTION__); + } + result = NULL; + } + else + { + PARSE_ASSERT(production_is_valid(*production_list, which)); + result = &((*production_list)[which]); + } + *out_which_production = which; + return result; +} + diff --git a/parse_productions.h b/parse_productions.h new file mode 100644 index 000000000..df3ae9c96 --- /dev/null +++ b/parse_productions.h @@ -0,0 +1,71 @@ +/**\file parse_tree.h + + Programmatic representation of fish code. +*/ + +#ifndef FISH_PARSE_TREE_CONSTRUCTION_H +#define FISH_PARSE_TREE_CONSTRUCTION_H + +#include "parse_tree.h" +#include + +namespace parse_productions +{ + +#define MAX_PRODUCTIONS 5 +#define MAX_SYMBOLS_PER_PRODUCTION 6 + +typedef uint32_t production_tag_t; + +/* A production is an array of unsigned char. Symbols are encoded directly as their symbol value. Keywords are encoded with an offset of LAST_TOKEN_OR_SYMBOL + 1. So essentially we glom together keywords and symbols. */ +typedef uint8_t production_element_t; + +/* An index into a production option list */ +typedef uint8_t production_option_idx_t; + +/* A production is an array of production elements */ +typedef production_element_t const production_t[MAX_SYMBOLS_PER_PRODUCTION]; + +/* A production options is an array of (possible) productions */ +typedef production_t production_options_t[MAX_PRODUCTIONS]; + +/* Resolve the type from a production element */ +inline parse_token_type_t production_element_type(production_element_t elem) +{ + if (elem > LAST_TOKEN_OR_SYMBOL) + { + return parse_token_type_string; + } + else + { + return static_cast(elem); + } +} + +/* Resolve the keyword from a production element */ +inline parse_keyword_t production_element_keyword(production_element_t elem) +{ + if (elem > LAST_TOKEN_OR_SYMBOL) + { + // First keyword is LAST_TOKEN_OR_SYMBOL + 1 + return static_cast(elem - LAST_TOKEN_OR_SYMBOL - 1); + } + else + { + return parse_keyword_none; + } +} + +/* Check if an element is valid */ +inline bool production_element_is_valid(production_element_t elem) +{ + return elem != token_type_invalid; +} + +/* Fetch a production. We are passed two input tokens. The first input token is guaranteed to not be invalid; the second token may be invalid if there's no more tokens. */ +const production_t *production_for_token(parse_token_type_t node_type, const parse_token_t &input1, const parse_token_t &input2, production_option_idx_t *out_which_production, wcstring *out_error_text); + +} + + +#endif diff --git a/parse_tree.cpp b/parse_tree.cpp new file mode 100644 index 000000000..6bb7a3cd3 --- /dev/null +++ b/parse_tree.cpp @@ -0,0 +1,1492 @@ +#include "parse_productions.h" +#include "tokenizer.h" +#include "fallback.h" +#include "wutil.h" +#include "proc.h" +#include +#include + +using namespace parse_productions; + +static bool production_is_empty(const production_t *production) +{ + return (*production)[0] == token_type_invalid; +} + +/** Returns a string description of this parse error */ +wcstring parse_error_t::describe(const wcstring &src, bool skip_caret) const +{ + wcstring result = text; + if (! skip_caret && source_start < src.size() && source_start + source_length <= src.size()) + { + // Locate the beginning of this line of source + size_t line_start = 0; + + // Look for a newline prior to source_start. If we don't find one, start at the beginning of the string; otherwise start one past the newline. Note that source_start may itself point at a newline; we want to find the newline before it. + if (source_start > 0) + { + size_t newline = src.find_last_of(L'\n', source_start - 1); + if (newline != wcstring::npos) + { + line_start = newline + 1; + } + } + + // Look for the newline after the source range. If the source range itself includes a newline, that's the one we want, so start just before the end of the range + size_t last_char_in_range = (source_length == 0 ? source_start : source_start + source_length - 1); + size_t line_end = src.find(L'\n', last_char_in_range); + if (line_end == wcstring::npos) + { + line_end = src.size(); + } + + assert(line_end >= line_start); + assert(source_start >= line_start); + + // Don't include the caret and line if we're interactive this is the first line, because then it's obvious + bool skip_caret = (get_is_interactive() && source_start == 0); + + if (! skip_caret) + { + // Append the line of text. + if (! result.empty()) + { + result.push_back(L'\n'); + } + result.append(src, line_start, line_end - line_start); + + + // Append the caret line. The input source may include tabs; for that reason we construct a "caret line" that has tabs in corresponding positions + wcstring caret_space_line; + caret_space_line.reserve(source_start - line_start); + for (size_t i=line_start; i < source_start; i++) + { + wchar_t wc = src.at(i); + if (wc == L'\t') + { + caret_space_line.push_back(L'\t'); + } + else if (wc == L'\n') + { + /* It's possible that the source_start points at a newline itself. In that case, pretend it's a space. We only expect this to be at the end of the string. */ + caret_space_line.push_back(L' '); + } + else + { + int width = fish_wcwidth(wc); + if (width > 0) + { + caret_space_line.append(static_cast(width), L' '); + } + } + } + result.push_back(L'\n'); + result.append(caret_space_line); + result.push_back(L'^'); + } + } + return result; +} + +wcstring parse_errors_description(const parse_error_list_t &errors, const wcstring &src, const wchar_t *prefix) +{ + wcstring target; + for (size_t i=0; i < errors.size(); i++) + { + if (i > 0) + { + target.push_back(L'\n'); + } + if (prefix != NULL) + { + target.append(prefix); + target.append(L": "); + } + target.append(errors.at(i).describe(src)); + } + return target; +} + +/** Returns a string description of the given token type */ +wcstring token_type_description(parse_token_type_t type) +{ + switch (type) + { + case token_type_invalid: + return L"invalid"; + + case symbol_job_list: + return L"job_list"; + case symbol_job: + return L"job"; + case symbol_job_continuation: + return L"job_continuation"; + + case symbol_statement: + return L"statement"; + case symbol_block_statement: + return L"block_statement"; + case symbol_block_header: + return L"block_header"; + case symbol_for_header: + return L"for_header"; + case symbol_while_header: + return L"while_header"; + case symbol_begin_header: + return L"begin_header"; + case symbol_function_header: + return L"function_header"; + + case symbol_if_statement: + return L"if_statement"; + case symbol_if_clause: + return L"if_clause"; + case symbol_else_clause: + return L"else_clause"; + case symbol_else_continuation: + return L"else_continuation"; + + case symbol_switch_statement: + return L"switch_statement"; + case symbol_case_item_list: + return L"case_item_list"; + case symbol_case_item: + return L"case_item"; + + case symbol_argument_list: + return L"argument_list"; + + case symbol_boolean_statement: + return L"boolean_statement"; + case symbol_decorated_statement: + return L"decorated_statement"; + case symbol_plain_statement: + return L"plain_statement"; + case symbol_arguments_or_redirections_list: + return L"arguments_or_redirections_list"; + case symbol_argument_or_redirection: + return L"argument_or_redirection"; + case symbol_argument: + return L"symbol_argument"; + case symbol_redirection: + return L"symbol_redirection"; + case symbol_optional_background: + return L"optional_background"; + case symbol_end_command: + return L"symbol_end_command"; + + + case parse_token_type_string: + return L"token_string"; + case parse_token_type_pipe: + return L"token_pipe"; + case parse_token_type_redirection: + return L"token_redirection"; + case parse_token_type_background: + return L"token_background"; + case parse_token_type_end: + return L"token_end"; + case parse_token_type_terminate: + return L"token_terminate"; + + + case parse_special_type_parse_error: + return L"parse_error"; + case parse_special_type_tokenizer_error: + return L"tokenizer_error"; + case parse_special_type_comment: + return L"comment"; + + } + return format_string(L"Unknown token type %ld", static_cast(type)); +} + +wcstring keyword_description(parse_keyword_t k) +{ + switch (k) + { + case parse_keyword_none: + return L"none"; + case parse_keyword_if: + return L"if"; + case parse_keyword_else: + return L"else"; + case parse_keyword_for: + return L"for"; + case parse_keyword_in: + return L"in"; + case parse_keyword_while: + return L"while"; + case parse_keyword_begin: + return L"begin"; + case parse_keyword_function: + return L"function"; + case parse_keyword_switch: + return L"switch"; + case parse_keyword_case: + return L"case"; + case parse_keyword_end: + return L"end"; + case parse_keyword_and: + return L"and"; + case parse_keyword_or: + return L"or"; + case parse_keyword_not: + return L"not"; + case parse_keyword_command: + return L"command"; + case parse_keyword_builtin: + return L"builtin"; + } + return format_string(L"Unknown keyword type %ld", static_cast(k)); +} + +static wcstring token_type_user_presentable_description(parse_token_type_t type, parse_keyword_t keyword) +{ + if (keyword != parse_keyword_none) + { + return format_string(L"keyword '%ls'", keyword_description(keyword).c_str()); + } + + switch (type) + { + /* Hackish. We only support the following types. */ + case symbol_statement: + return L"a command"; + + case parse_token_type_string: + return L"a string"; + + case parse_token_type_pipe: + return L"a pipe"; + + case parse_token_type_redirection: + return L"a redirection"; + + case parse_token_type_background: + return L"a '&'"; + + case parse_token_type_end: + return L"end of the statement"; + + default: + return format_string(L"a %ls", token_type_description(type).c_str()); + } +} + +/** Returns a string description of the given parse node */ +wcstring parse_node_t::describe(void) const +{ + wcstring result = token_type_description(type); + append_format(result, L" (prod %d)", this->production_idx); + return result; +} + + +/** Returns a string description of the given parse token */ +wcstring parse_token_t::describe() const +{ + wcstring result = token_type_description(type); + if (keyword != parse_keyword_none) + { + append_format(result, L" <%ls>", keyword_description(keyword).c_str()); + } + return result; +} + +/** A string description appropriate for presentation to the user */ +wcstring parse_token_t::user_presentable_description() const +{ + return token_type_user_presentable_description(type, keyword); +} + +/* Convert from tokenizer_t's token type to a parse_token_t type */ +static inline parse_token_type_t parse_token_type_from_tokenizer_token(enum token_type tokenizer_token_type) +{ + parse_token_type_t result = token_type_invalid; + switch (tokenizer_token_type) + { + case TOK_STRING: + result = parse_token_type_string; + break; + + case TOK_PIPE: + result = parse_token_type_pipe; + break; + + case TOK_END: + result = parse_token_type_end; + break; + + case TOK_BACKGROUND: + result = parse_token_type_background; + break; + + case TOK_REDIRECT_OUT: + case TOK_REDIRECT_APPEND: + case TOK_REDIRECT_IN: + case TOK_REDIRECT_FD: + case TOK_REDIRECT_NOCLOB: + result = parse_token_type_redirection; + break; + + case TOK_ERROR: + result = parse_special_type_tokenizer_error; + break; + + case TOK_COMMENT: + result = parse_special_type_comment; + break; + + + default: + fprintf(stderr, "Bad token type %d passed to %s\n", (int)tokenizer_token_type, __FUNCTION__); + assert(0); + break; + } + return result; +} + +/* Helper function for dump_tree */ +static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, node_offset_t node_idx, size_t indent, wcstring *result, size_t *line, node_offset_t *inout_first_node_not_dumped) +{ + assert(node_idx < nodes.size()); + + // Update first_node_not_dumped + // This takes a bit of explanation. While it's true that a parse tree may be a "forest", its individual trees are "compact," meaning they are not interleaved. Thus we keep track of the largest node index as we descend a tree. One past the largest is the start of the next tree. + if (*inout_first_node_not_dumped <= node_idx) + { + *inout_first_node_not_dumped = node_idx + 1; + } + + const parse_node_t &node = nodes.at(node_idx); + + const size_t spacesPerIndent = 2; + + // unindent statement lists by 1 to flatten them + if (node.type == symbol_job_list || node.type == symbol_arguments_or_redirections_list) + { + if (indent > 0) indent -= 1; + } + + append_format(*result, L"%2lu - %l2u ", *line, node_idx); + result->append(indent * spacesPerIndent, L' ');; + result->append(node.describe()); + if (node.child_count > 0) + { + append_format(*result, L" <%lu children>", node.child_count); + } + + if (node.has_source() && node.type == parse_token_type_string) + { + result->append(L": \""); + result->append(src, node.source_start, node.source_length); + result->append(L"\""); + } + + if (node.type != parse_token_type_string) + { + if (node.has_source()) + { + append_format(*result, L" [%ld, %ld]", (long)node.source_start, (long)node.source_length); + } + else + { + append_format(*result, L" [no src]", (long)node.source_start, (long)node.source_length); + } + } + + result->push_back(L'\n'); + ++*line; + for (size_t child_idx = node.child_start; child_idx < node.child_start + node.child_count; child_idx++) + { + dump_tree_recursive(nodes, src, child_idx, indent + 1, result, line, inout_first_node_not_dumped); + } +} + +/* Gives a debugging textual description of a parse tree. Note that this supports "parse forests" too. That is, our tree may not really be a tree, but instead a collection of trees. */ +wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src) +{ + if (nodes.empty()) + return L"(empty!)"; + + node_offset_t first_node_not_dumped = 0; + size_t line = 0; + wcstring result; + while (first_node_not_dumped < nodes.size()) + { + if (first_node_not_dumped > 0) + { + result.append(L"---New Tree---\n"); + } + dump_tree_recursive(nodes, src, first_node_not_dumped, 0, &result, &line, &first_node_not_dumped); + } + return result; +} + +/* Struct representing elements of the symbol stack, used in the internal state of the LL parser */ +struct parse_stack_element_t +{ + enum parse_token_type_t type; + enum parse_keyword_t keyword; + node_offset_t node_idx; + + explicit parse_stack_element_t(parse_token_type_t t, node_offset_t idx) : type(t), keyword(parse_keyword_none), node_idx(idx) + { + } + + explicit parse_stack_element_t(production_element_t e, node_offset_t idx) : type(production_element_type(e)), keyword(production_element_keyword(e)), node_idx(idx) + { + } + + wcstring describe(void) const + { + wcstring result = token_type_description(type); + if (keyword != parse_keyword_none) + { + append_format(result, L" <%ls>", keyword_description(keyword).c_str()); + } + return result; + } + + /* Returns a name that we can show to the user, e.g. "a command" */ + wcstring user_presentable_description(void) const + { + return token_type_user_presentable_description(type, keyword); + } +}; + +/* The parser itself, private implementation of class parse_t. This is a hand-coded table-driven LL parser. Most hand-coded LL parsers are recursive descent, but recursive descent parsers are difficult to "pause", unlike table-driven parsers. */ +class parse_ll_t +{ + /* Traditional symbol stack of the LL parser */ + std::vector symbol_stack; + + /* Parser output. This is a parse tree, but stored in an array. */ + parse_node_tree_t nodes; + + /* Whether we ran into a fatal error, including parse errors or tokenizer errors */ + bool fatal_errored; + + /* Whether we should collect error messages or not */ + bool should_generate_error_messages; + + /* List of errors we have encountered */ + parse_error_list_t errors; + + /* The symbol stack can contain terminal types or symbols. Symbols go on to do productions, but terminal types are just matched against input tokens. */ + bool top_node_handle_terminal_types(parse_token_t token); + + void parse_error(const wchar_t *expected, parse_token_t token); + void parse_error(parse_token_t token, parse_error_code_t code, const wchar_t *format, ...); + void parse_error_failed_production(struct parse_stack_element_t &elem, parse_token_t token); + void parse_error_unbalancing_token(parse_token_t token); + void append_error_callout(wcstring &error_message, parse_token_t token); + + void dump_stack(void) const; + + // Get the node corresponding to the top element of the stack + parse_node_t &node_for_top_symbol() + { + PARSE_ASSERT(! symbol_stack.empty()); + const parse_stack_element_t &top_symbol = symbol_stack.back(); + PARSE_ASSERT(top_symbol.node_idx != -1); + PARSE_ASSERT(top_symbol.node_idx < nodes.size()); + return nodes.at(top_symbol.node_idx); + } + + parse_token_type_t stack_top_type() const + { + return symbol_stack.back().type; + } + + // Pop from the top of the symbol stack, then push the given production, updating node counts. Note that production_t has type "pointer to array" so some care is required. + inline void symbol_stack_pop_push_production(const production_t *production) + { + bool logit = false; + if (logit) + { + size_t count = 0; + fprintf(stderr, "Applying production:\n"); + for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) + { + production_element_t elem = (*production)[i]; + if (production_element_is_valid(elem)) + { + parse_token_type_t type = production_element_type(elem); + parse_keyword_t keyword = production_element_keyword(elem); + fprintf(stderr, "\t%ls <%ls>\n", token_type_description(type).c_str(), keyword_description(keyword).c_str()); + count++; + } + } + if (! count) fprintf(stderr, "\t\n"); + } + + // Get the parent index. But we can't get the parent parse node yet, since it may be made invalid by adding children + const size_t parent_node_idx = symbol_stack.back().node_idx; + + // Add the children. Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack) + const size_t child_start = nodes.size(); + size_t child_count = 0; + for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) + { + production_element_t elem = (*production)[i]; + if (!production_element_is_valid(elem)) + { + // All done, bail out + break; + } + else + { + // Generate the parse node. + parse_token_type_t child_type = production_element_type(elem); + parse_node_t child = parse_node_t(child_type); + child.parent = parent_node_idx; + nodes.push_back(child); + child_count++; + } + } + + // Update the parent + parse_node_t &parent_node = nodes.at(parent_node_idx); + + // Should have no children yet + PARSE_ASSERT(parent_node.child_count == 0); + + // Tell the node about its children + parent_node.child_start = child_start; + parent_node.child_count = child_count; + + // Replace the top of the stack with new stack elements corresponding to our new nodes. Note that these go in reverse order. + symbol_stack.pop_back(); + symbol_stack.reserve(symbol_stack.size() + child_count); + size_t idx = child_count; + while (idx--) + { + production_element_t elem = (*production)[idx]; + PARSE_ASSERT(production_element_is_valid(elem)); + symbol_stack.push_back(parse_stack_element_t(elem, child_start + idx)); + } + } + + public: + + /* Constructor */ + parse_ll_t() : fatal_errored(false), should_generate_error_messages(true) + { + this->symbol_stack.reserve(16); + this->nodes.reserve(64); + this->reset_symbols_and_nodes(); + } + + /* Input */ + void accept_tokens(parse_token_t token1, parse_token_t token2); + + /* Report tokenizer errors */ + void report_tokenizer_error(parse_token_t token, const wchar_t *tok_error); + + /* Indicate if we hit a fatal error */ + bool has_fatal_error(void) const + { + return this->fatal_errored; + } + + /* Indicate whether we want to generate error messages */ + void set_should_generate_error_messages(bool flag) + { + this->should_generate_error_messages = flag; + } + + /* Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node. This is called from the constructor */ + void reset_symbols(void); + + /* Clear the parse symbol stack and the node tree. Add a new job_list_t goal node. This is called from the constructor. */ + void reset_symbols_and_nodes(void); + + /* Once parsing is complete, determine the ranges of intermediate nodes */ + void determine_node_ranges(); + + /* Acquire output after parsing. This transfers directly from within self */ + void acquire_output(parse_node_tree_t *output, parse_error_list_t *errors); +}; + +void parse_ll_t::dump_stack(void) const +{ + // Walk backwards from the top, looking for parents + wcstring_list_t lines; + if (symbol_stack.empty()) + { + lines.push_back(L"(empty)"); + } + else + { + node_offset_t child = symbol_stack.back().node_idx; + node_offset_t cursor = child; + lines.push_back(nodes.at(cursor).describe()); + while (cursor--) + { + const parse_node_t &node = nodes.at(cursor); + if (node.child_start <= child && node.child_start + node.child_count > child) + { + lines.push_back(node.describe()); + child = cursor; + } + } + } + + fprintf(stderr, "Stack dump (%lu elements):\n", symbol_stack.size()); + for (size_t idx = 0; idx < lines.size(); idx++) + { + fprintf(stderr, " %ls\n", lines.at(idx).c_str()); + } +} + +// Give each node a source range equal to the union of the ranges of its children +// Terminal nodes already have source ranges (and no children) +// Since children always appear after their parents, we can implement this very simply by walking backwards +void parse_ll_t::determine_node_ranges(void) +{ + const size_t source_start_invalid = -1; + size_t idx = nodes.size(); + while (idx--) + { + parse_node_t *parent = &nodes.at(idx); + + // Skip nodes that already have a source range. These are terminal nodes. + if (parent->source_start != source_start_invalid) + continue; + + // Ok, this node needs a source range. Get all of its children, and then set its range. + size_t min_start = source_start_invalid, max_end = 0; //note source_start_invalid is huge + for (node_offset_t i=0; i < parent->child_count; i++) + { + const parse_node_t &child = nodes.at(parent->child_offset(i)); + if (child.has_source()) + { + min_start = std::min(min_start, child.source_start); + max_end = std::max(max_end, child.source_start + child.source_length); + } + } + + if (min_start != source_start_invalid) + { + assert(max_end >= min_start); + parent->source_start = min_start; + parent->source_length = max_end - min_start; + } + } +} + +void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *errors) +{ + if (output != NULL) + { + std::swap(*output, this->nodes); + } + this->nodes.clear(); + + if (errors != NULL) + { + std::swap(*errors, this->errors); + } + this->errors.clear(); + this->symbol_stack.clear(); +} + +void parse_ll_t::parse_error(parse_token_t token, parse_error_code_t code, const wchar_t *fmt, ...) +{ + this->fatal_errored = true; + if (this->should_generate_error_messages) + { + //this->dump_stack(); + parse_error_t err; + + va_list va; + va_start(va, fmt); + err.text = vformat_string(fmt, va); + err.code = code; + va_end(va); + + err.source_start = token.source_start; + err.source_length = token.source_length; + this->errors.push_back(err); + } +} + +// Unbalancing token. This includes 'else' or 'case' or 'end' outside of the appropriate block +// This essentially duplicates some logic from resolving the production for symbol_statement_list - yuck +void parse_ll_t::parse_error_unbalancing_token(parse_token_t token) +{ + this->fatal_errored = true; + if (this->should_generate_error_messages) + { + assert(token.type == parse_token_type_string); + assert(token.keyword == parse_keyword_end || token.keyword == parse_keyword_else || token.keyword == parse_keyword_case); + switch (token.keyword) + { + case parse_keyword_end: + this->parse_error(token, parse_error_unbalancing_end, L"'end' outside of a block"); + break; + + case parse_keyword_else: + this->parse_error(token, parse_error_unbalancing_else, L"'else' builtin not inside of if block"); + break; + + case parse_keyword_case: + this->parse_error(token, parse_error_unbalancing_case, L"'case' builtin not inside of switch block"); + break; + + default: + fprintf(stderr, "Unexpected token %ls passed to %s\n", token.describe().c_str(), __FUNCTION__); + PARSER_DIE(); + break; + } + } +} + +// This is a 'generic' parse error when we can't match the top of the stack element +void parse_ll_t::parse_error_failed_production(struct parse_stack_element_t &stack_elem, parse_token_t token) +{ + fatal_errored = true; + if (this->should_generate_error_messages) + { + bool done = false; + + /* Check for || */ + if (token.type == parse_token_type_pipe && token.source_start > 0) + { + /* Here we wanted a statement and instead got a pipe. See if this is a double pipe: foo || bar. If so, we have a special error message. */ + const parse_node_t *prev_pipe = nodes.find_node_matching_source_location(parse_token_type_pipe, token.source_start - 1, NULL); + if (prev_pipe != NULL) + { + /* The pipe of the previous job abuts our current token. So we have ||. */ + this->parse_error(token, parse_error_double_pipe, CMD_OR_ERR_MSG); + done = true; + } + } + + /* Check for && */ + if (! done && token.type == parse_token_type_background && token.source_start > 0) + { + /* Check to see if there was a previous token_background */ + const parse_node_t *prev_background = nodes.find_node_matching_source_location(parse_token_type_background, token.source_start - 1, NULL); + if (prev_background != NULL) + { + /* We have &&. */ + this->parse_error(token, parse_error_double_background, CMD_AND_ERR_MSG); + done = true; + } + } + + if (! done) + { + const wcstring expected = stack_elem.user_presentable_description(); + this->parse_error(expected.c_str(), token); + } + } +} + +void parse_ll_t::report_tokenizer_error(parse_token_t token, const wchar_t *tok_error) +{ + assert(tok_error != NULL); + this->parse_error(token, parse_error_tokenizer, L"%ls", tok_error); +} + +void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) +{ + fatal_errored = true; + if (this->should_generate_error_messages) + { + this->parse_error(token, parse_error_generic, L"Expected %ls, but instead found %ls", expected, token.user_presentable_description().c_str()); + } +} + +void parse_ll_t::reset_symbols(void) +{ + /* Add a new job_list node, and then reset our symbol list to point at it */ + node_offset_t where = nodes.size(); + nodes.push_back(parse_node_t(symbol_job_list)); + + symbol_stack.clear(); + symbol_stack.push_back(parse_stack_element_t(symbol_job_list, where)); // goal token + this->fatal_errored = false; +} + +/* Reset both symbols and nodes */ +void parse_ll_t::reset_symbols_and_nodes(void) +{ + nodes.clear(); + this->reset_symbols(); +} + +static bool type_is_terminal_type(parse_token_type_t type) +{ + switch (type) + { + case parse_token_type_string: + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + case parse_token_type_end: + case parse_token_type_terminate: + return true; + + default: + return false; + } +} + +bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token) +{ + PARSE_ASSERT(! symbol_stack.empty()); + PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); + bool handled = false; + parse_stack_element_t &stack_top = symbol_stack.back(); + if (type_is_terminal_type(stack_top.type)) + { + // The top of the stack is terminal. We are going to handle this (because we can't produce from a terminal type) + handled = true; + + // Now see if we actually matched + bool matched = false; + if (stack_top.type == token.type) + { + switch (stack_top.type) + { + case parse_token_type_string: + // We matched if the keywords match, or no keyword was required + matched = (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword); + break; + + default: + // For other types, we only require that the types match + matched = true; + break; + } + } + + if (matched) + { + // Success. Tell the node that it matched this token, and what its source range is + // In the parse phase, we only set source ranges for terminal types. We propagate ranges to parent nodes afterwards. + parse_node_t &node = node_for_top_symbol(); + node.source_start = token.source_start; + node.source_length = token.source_length; + } + else + { + // Failure + if (stack_top.type == parse_token_type_string && token.type == parse_token_type_string) + { + // Keyword failure. We should unify this with the 'matched' computation above. + assert(stack_top.keyword != parse_keyword_none && stack_top.keyword != token.keyword); + + // Check to see which keyword we got which was considered wrong + switch (token.keyword) + { + // Some keywords are only valid in certain contexts. If this cascaded all the way down through the outermost job_list, it was not in a valid context. + case parse_keyword_case: + case parse_keyword_end: + case parse_keyword_else: + this->parse_error_unbalancing_token(token); + break; + + case parse_keyword_none: + { + // This is a random other string (not a keyword) + const wcstring expected = keyword_description(stack_top.keyword); + this->parse_error(token, parse_error_generic, L"Expected keyword '%ls'", expected.c_str()); + break; + } + + + default: + { + // Got a real keyword we can report + const wcstring actual = (token.keyword == parse_keyword_none ? token.describe() : keyword_description(token.keyword)); + const wcstring expected = keyword_description(stack_top.keyword); + this->parse_error(token, parse_error_generic, L"Expected keyword '%ls', instead got keyword '%ls'", expected.c_str(), actual.c_str()); + break; + } + } + } + else + { + const wcstring expected = stack_top.user_presentable_description(); + this->parse_error(expected.c_str(), token); + } + } + + // We handled the token, so pop the symbol stack + symbol_stack.pop_back(); + } + return handled; +} + +void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) +{ + bool logit = false; + if (logit) + { + fprintf(stderr, "Accept token %ls\n", token1.describe().c_str()); + } + PARSE_ASSERT(token1.type >= FIRST_PARSE_TOKEN_TYPE); + + bool consumed = false; + + // Handle special types specially. Note that these are the only types that can be pushed if the symbol stack is empty. + if (token1.type == parse_special_type_parse_error || token1.type == parse_special_type_tokenizer_error || token1.type == parse_special_type_comment) + { + parse_node_t err_node(token1.type); + err_node.source_start = token1.source_start; + err_node.source_length = token1.source_length; + nodes.push_back(err_node); + consumed = true; + + /* tokenizer errors are fatal */ + if (token1.type == parse_special_type_tokenizer_error) + this->fatal_errored = true; + } + + while (! consumed && ! this->fatal_errored) + { + PARSE_ASSERT(! symbol_stack.empty()); + + if (top_node_handle_terminal_types(token1)) + { + if (logit) + { + fprintf(stderr, "Consumed token %ls\n", token1.describe().c_str()); + } + consumed = true; + break; + } + + // top_node_match_token may indicate an error if our stack is empty + if (this->fatal_errored) + break; + + // Get the production for the top of the stack + parse_stack_element_t &stack_elem = symbol_stack.back(); + parse_node_t &node = nodes.at(stack_elem.node_idx); + const production_t *production = production_for_token(stack_elem.type, token1, token2, &node.production_idx, NULL /* error text */); + if (production == NULL) + { + parse_error_failed_production(stack_elem, token1); + // the above sets fatal_errored, which ends the loop + } + else + { + bool is_terminate = (token1.type == parse_token_type_terminate); + + // When a job_list encounters something like 'else', it returns an empty production to return control to the outer block. But if it's unbalanced, then we'll end up with an empty stack! So make sure that doesn't happen. This is the primary mechanism by which we detect e.g. unbalanced end. However, if we get a true terminate token, then we allow (expect) this to empty the stack + if (symbol_stack.size() == 1 && production_is_empty(production) && ! is_terminate) + { + this->parse_error_unbalancing_token(token1); + break; + } + + // Manipulate the symbol stack. + // Note that stack_elem is invalidated by popping the stack. + symbol_stack_pop_push_production(production); + + // Expect to not have an empty stack, unless this was the terminate type + // Note we may not have an empty stack with the terminate type (i.e. incomplete input) + assert(is_terminate || ! symbol_stack.empty()); + + if (symbol_stack.empty()) + { + break; + } + } + } +} + +static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) +{ + parse_keyword_t result = parse_keyword_none; + if (tok == TOK_STRING) + { + + const struct + { + const wchar_t *txt; + parse_keyword_t keyword; + } keywords[] = + { + {L"if", parse_keyword_if}, + {L"else", parse_keyword_else}, + {L"for", parse_keyword_for}, + {L"in", parse_keyword_in}, + {L"while", parse_keyword_while}, + {L"begin", parse_keyword_begin}, + {L"function", parse_keyword_function}, + {L"switch", parse_keyword_switch}, + {L"case", parse_keyword_case}, + {L"end", parse_keyword_end}, + {L"and", parse_keyword_and}, + {L"or", parse_keyword_or}, + {L"not", parse_keyword_not}, + {L"command", parse_keyword_command}, + {L"builtin", parse_keyword_builtin} + }; + + for (size_t i=0; i < sizeof keywords / sizeof *keywords; i++) + { + if (! wcscmp(keywords[i].txt, tok_txt)) + { + result = keywords[i].keyword; + break; + } + } + } + return result; +} + +/* Placeholder invalid token */ +static const parse_token_t kInvalidToken = {token_type_invalid, parse_keyword_none, false, -1, -1}; + +/* Terminal token */ +static const parse_token_t kTerminalToken = {parse_token_type_terminate, parse_keyword_none, false, -1, -1}; + +/* Return a new parse token, advancing the tokenizer */ +static inline parse_token_t next_parse_token(tokenizer_t *tok) +{ + if (! tok_has_next(tok)) + { + return kTerminalToken; + } + + token_type tok_type = static_cast(tok_last_type(tok)); + int tok_start = tok_get_pos(tok); + size_t tok_extent = tok_get_extent(tok); + assert(tok_extent < 10000000); //paranoia + const wchar_t *tok_txt = tok_last(tok); + + parse_token_t result; + + /* Set the type, keyword, and whether there's a dash prefix. Note that this is quite sketchy, because it ignores quotes. This is the historical behavior. For example, `builtin --names` lists builtins, but `builtin "--names"` attempts to run --names as a command. Amazingly as of this writing (10/12/13) nobody seems to have noticed this. Squint at it really hard and it even starts to look like a feature. */ + result.type = parse_token_type_from_tokenizer_token(tok_type); + result.keyword = keyword_for_token(tok_type, tok_txt); + result.has_dash_prefix = (tok_txt[0] == L'-'); + result.source_start = (size_t)tok_start; + result.source_length = tok_extent; + + tok_next(tok); + return result; +} + +bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) +{ + parse_ll_t parser; + parser.set_should_generate_error_messages(errors != NULL); + + /* Construct the tokenizer */ + tok_flags_t tok_options = 0; + if (parse_flags & parse_flag_include_comments) + tok_options |= TOK_SHOW_COMMENTS; + + if (parse_flags & parse_flag_accept_incomplete_tokens) + tok_options |= TOK_ACCEPT_UNFINISHED; + + if (errors == NULL) + tok_options |= TOK_SQUASH_ERRORS; + + tokenizer_t tok = tokenizer_t(str.c_str(), tok_options); + + /* We are an LL(2) parser. We pass two tokens at a time. New tokens come in at index 1. Seed our queue with an initial token at index 1. */ + parse_token_t queue[2] = {kInvalidToken, next_parse_token(&tok)}; + + /* Loop until we get a terminal token */ + do + { + /* Push a new token onto the queue */ + queue[0] = queue[1]; + queue[1] = next_parse_token(&tok); + + /* If we are leaving things unterminated, then don't pass parse_token_type_terminate */ + if (queue[0].type == parse_token_type_terminate && (parse_flags & parse_flag_leave_unterminated)) + { + break; + } + + /* Pass these two tokens. We know that queue[0] is valid; queue[1] may be invalid. */ + parser.accept_tokens(queue[0], queue[1]); + + /* Handle tokenizer errors. This is a hack because really the parser should report this for itself; but it has no way of getting the tokenizer message */ + if (queue[1].type == parse_special_type_tokenizer_error) + { + parser.report_tokenizer_error(queue[1], tok_last(&tok)); + } + + /* Handle errors */ + if (parser.has_fatal_error()) + { + if (parse_flags & parse_flag_continue_after_error) + { + /* Hack hack hack. Typically the parse error is due to the first token. However, if it's a tokenizer error, then has_fatal_error was set due to the check above; in that case the second token is what matters. */ + size_t error_token_idx = (queue[1].type == parse_special_type_tokenizer_error ? 1 : 0); + + /* Mark a special error token, and then keep going */ + const parse_token_t token = {parse_special_type_parse_error, parse_keyword_none, false, queue[error_token_idx].source_start, queue[error_token_idx].source_length}; + parser.accept_tokens(token, kInvalidToken); + parser.reset_symbols(); + } + else + { + /* Bail out */ + break; + } + } + + /* If this was the last token, then stop the loop */ + } while (queue[0].type != parse_token_type_terminate); + + + // Teach each node where its source range is + parser.determine_node_ranges(); + + // Acquire the output from the parser + parser.acquire_output(output, errors); + +#if 0 + //wcstring result = dump_tree(this->parser->nodes, str); + //fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); + fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", output->size(), sizeof(parse_node_t), output->size() * sizeof(parse_node_t)); +#endif + + // Indicate if we had a fatal error + return ! parser.has_fatal_error(); +} + +const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const +{ + const parse_node_t *result = NULL; + + /* We may get nodes with no children if we had an imcomplete parse. Don't consider than an error */ + if (parent.child_count > 0) + { + PARSE_ASSERT(which < parent.child_count); + node_offset_t child_offset = parent.child_offset(which); + if (child_offset < this->size()) + { + result = &this->at(child_offset); + + /* If we are given an expected type, then the node must be null or that type */ + assert(expected_type == token_type_invalid || expected_type == result->type); + } + } + + return result; +} + +const parse_node_t &parse_node_tree_t::find_child(const parse_node_t &parent, parse_token_type_t type) const +{ + for (size_t i=0; i < parent.child_count; i++) + { + const parse_node_t *child = this->get_child(parent, i); + if (child->type == type) + { + return *child; + } + } + PARSE_ASSERT(0); + return *(parse_node_t *)(NULL); //unreachable +} + +const parse_node_t *parse_node_tree_t::get_parent(const parse_node_t &node, parse_token_type_t expected_type) const +{ + const parse_node_t *result = NULL; + if (node.parent != NODE_OFFSET_INVALID) + { + PARSE_ASSERT(node.parent < this->size()); + const parse_node_t &parent = this->at(node.parent); + if (expected_type == token_type_invalid || expected_type == parent.type) + { + // The type matches (or no type was requested) + result = &parent; + } + } + return result; +} + +const parse_node_t *parse_node_tree_t::get_first_ancestor_of_type(const parse_node_t &node, parse_token_type_t desired_type) const +{ + const parse_node_t *ancestor = &node; + while ((ancestor = this->get_parent(*ancestor))) + { + if (ancestor->type == desired_type) + { + break; + } + } + return ancestor; +} + +static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result, size_t max_count) +{ + if (result->size() < max_count) + { + if (parent.type == type) result->push_back(&parent); + for (size_t i=0; i < parent.child_count; i++) + { + const parse_node_t *child = tree.get_child(parent, i); + assert(child != NULL); + find_nodes_recursive(tree, *child, type, result, max_count); + } + } +} + +parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_node_t &parent, parse_token_type_t type, size_t max_count) const +{ + parse_node_list_t result; + find_nodes_recursive(*this, parent, type, &result, max_count); + return result; +} + +/* Return true if the given node has the proposed ancestor as an ancestor (or is itself that ancestor) */ +static bool node_has_ancestor(const parse_node_tree_t &tree, const parse_node_t &node, const parse_node_t &proposed_ancestor) +{ + if (&node == &proposed_ancestor) + { + /* Found it */ + return true; + } + else if (node.parent == NODE_OFFSET_INVALID) + { + /* No more parents */ + return false; + } + else + { + /* Recurse to the parent */ + return node_has_ancestor(tree, tree.at(node.parent), proposed_ancestor); + } +} + +const parse_node_t *parse_node_tree_t::find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent) const +{ + const parse_node_t *result = NULL; + // Find nodes of the given type in the tree, working backwards + size_t idx = this->size(); + while (idx--) + { + const parse_node_t &node = this->at(idx); + if (node.type == type) + { + // Types match. Check if it has the right parent + if (parent == NULL || node_has_ancestor(*this, node, *parent)) + { + // Success + result = &node; + break; + } + } + } + return result; +} + +const parse_node_t *parse_node_tree_t::find_node_matching_source_location(parse_token_type_t type, size_t source_loc, const parse_node_t *parent) const +{ + const parse_node_t *result = NULL; + // Find nodes of the given type in the tree, working backwards + const size_t len = this->size(); + for (size_t idx=0; idx < len; idx++) + { + const parse_node_t &node = this->at(idx); + + /* Types must match */ + if (node.type != type) + continue; + + /* Must contain source location */ + if (! node.location_in_or_at_end_of_source_range(source_loc)) + continue; + + /* If a parent is given, it must be an ancestor */ + if (parent != NULL && node_has_ancestor(*this, node, *parent)) + continue; + + /* Found it */ + result = &node; + break; + } + return result; +} + + +bool parse_node_tree_t::argument_list_is_root(const parse_node_t &node) const +{ + bool result = true; + assert(node.type == symbol_argument_list || node.type == symbol_arguments_or_redirections_list); + const parse_node_t *parent = this->get_parent(node); + if (parent != NULL) + { + /* We have a parent - check to make sure it's not another list! */ + result = parent->type != symbol_arguments_or_redirections_list && parent->type != symbol_argument_list; + } + return result; +} + +enum parse_statement_decoration_t parse_node_tree_t::decoration_for_plain_statement(const parse_node_t &node) const +{ + assert(node.type == symbol_plain_statement); + enum parse_statement_decoration_t decoration = parse_statement_decoration_none; + const parse_node_t *decorated_statement = this->get_parent(node, symbol_decorated_statement); + if (decorated_statement != NULL) + { + decoration = static_cast(decorated_statement->production_idx); + } + return decoration; +} + +bool parse_node_tree_t::command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const +{ + bool result = false; + assert(node.type == symbol_plain_statement); + const parse_node_t *cmd_node = this->get_child(node, 0, parse_token_type_string); + if (cmd_node != NULL && cmd_node->has_source()) + { + out_cmd->assign(src, cmd_node->source_start, cmd_node->source_length); + result = true; + } + else + { + out_cmd->clear(); + } + return result; +} + +bool parse_node_tree_t::statement_is_in_pipeline(const parse_node_t &node, bool include_first) const +{ + // Moderately nasty hack! Walk up our ancestor chain and see if we are in a job_continuation. This checks if we are in the second or greater element in a pipeline; if we are the first element we treat this as false + // This accepts a few statement types + bool result = false; + const parse_node_t *ancestor = &node; + + // If we're given a plain statement, try to get its decorated statement parent + if (ancestor && ancestor->type == symbol_plain_statement) + ancestor = this->get_parent(*ancestor, symbol_decorated_statement); + if (ancestor) + ancestor = this->get_parent(*ancestor, symbol_statement); + if (ancestor) + ancestor = this->get_parent(*ancestor); + + if (ancestor) + { + if (ancestor->type == symbol_job_continuation) + { + // Second or more in a pipeline + result = true; + } + else if (ancestor->type == symbol_job && include_first) + { + // Check to see if we have a job continuation that's not empty + const parse_node_t *continuation = this->get_child(*ancestor, 1, symbol_job_continuation); + result = (continuation != NULL && continuation->child_count > 0); + } + } + + return result; +} + +enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redirection_node, const wcstring &src, int *out_fd, wcstring *out_target) const +{ + assert(redirection_node.type == symbol_redirection); + enum token_type result = TOK_NONE; + const parse_node_t *redirection_primitive = this->get_child(redirection_node, 0, parse_token_type_redirection); //like 2> + const parse_node_t *redirection_target = this->get_child(redirection_node, 1, parse_token_type_string); //like &1 or file path + + if (redirection_primitive != NULL && redirection_primitive->has_source()) + { + result = redirection_type_for_string(redirection_primitive->get_source(src), out_fd); + } + if (out_target != NULL) + { + *out_target = redirection_target ? redirection_target->get_source(src) : L""; + } + return result; +} + +const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const parse_node_t &node) const +{ + const parse_node_t *result = NULL; + if (node.type == symbol_block_statement) + { + const parse_node_t *block_header = this->get_child(node, 0, symbol_block_header); + if (block_header != NULL) + { + result = this->get_child(*block_header, 0); + } + } + return result; +} + +parse_node_tree_t::parse_node_list_t parse_node_tree_t::specific_statements_for_job(const parse_node_t &job) const +{ + assert(job.type == symbol_job); + parse_node_list_t result; + + /* Initial statement (non-specific) */ + result.push_back(get_child(job, 0, symbol_statement)); + + /* Our cursor variable. Walk over the list of continuations. */ + const parse_node_t *continuation = get_child(job, 1, symbol_job_continuation); + while (continuation != NULL && continuation->child_count > 0) + { + result.push_back(get_child(*continuation, 1, symbol_statement)); + continuation = get_child(*continuation, 2, symbol_job_continuation); + } + + /* Result now contains a list of statements. But we want a list of specific statements e.g. symbol_switch_statement. So replace them in-place in the vector. */ + for (size_t i=0; i < result.size(); i++) + { + const parse_node_t *statement = result.at(i); + assert(statement->type == symbol_statement); + result.at(i) = this->get_child(*statement, 0); + } + + return result; +} + +const parse_node_t *parse_node_tree_t::next_node_in_node_list(const parse_node_t &node_list, parse_token_type_t entry_type, const parse_node_t **out_list_tail) const +{ + parse_token_type_t list_type = node_list.type; + + /* Paranoia - it doesn't make sense for a list type to contain itself */ + assert(list_type != entry_type); + + const parse_node_t *list_cursor = &node_list; + const parse_node_t *list_entry = NULL; + + /* Loop while we don't have an item but do have a list. Note that not every node in the list may contain an in item that we care about - e.g. job_list contains blank lines as a production */ + while (list_entry == NULL && list_cursor != NULL) + { + const parse_node_t *next_cursor = NULL; + + /* Walk through the children */ + for (size_t i=0; i < list_cursor->child_count; i++) + { + const parse_node_t *child = this->get_child(*list_cursor, i); + if (child->type == entry_type) + { + /* This is the list entry */ + list_entry = child; + } + else if (child->type == list_type) + { + /* This is the next in the list */ + next_cursor = child; + } + } + /* Go to the next entry, even if it's NULL */ + list_cursor = next_cursor; + } + + /* Return what we got */ + assert(list_cursor == NULL || list_cursor->type == list_type); + assert(list_entry == NULL || list_entry->type == entry_type); + if (out_list_tail != NULL) + *out_list_tail = list_cursor; + return list_entry; +} diff --git a/parse_tree.h b/parse_tree.h new file mode 100644 index 000000000..f77b87811 --- /dev/null +++ b/parse_tree.h @@ -0,0 +1,271 @@ +/**\file parse_tree.h + + Programmatic representation of fish code. +*/ + +#ifndef FISH_PARSE_PRODUCTIONS_H +#define FISH_PARSE_PRODUCTIONS_H + +#include + +#include "config.h" +#include "util.h" +#include "common.h" +#include "tokenizer.h" +#include "parse_constants.h" +#include +#include + +class parse_node_t; +class parse_node_tree_t; +typedef size_t node_offset_t; +#define NODE_OFFSET_INVALID (static_cast(-1)) + +struct parse_error_t +{ + /** Text of the error */ + wcstring text; + + /** Code for the error */ + enum parse_error_code_t code; + + /** Offset and length of the token in the source code that triggered this error */ + size_t source_start; + size_t source_length; + + /** Return a string describing the error, suitable for presentation to the user. If skip_caret is false, the offending line with a caret is printed as well */ + wcstring describe(const wcstring &src, bool skip_caret = false) const; +}; +typedef std::vector parse_error_list_t; + +/* Returns a description of a list of parse errors */ +wcstring parse_errors_description(const parse_error_list_t &errors, const wcstring &src, const wchar_t *prefix = NULL); + +/** A struct representing the token type that we use internally */ +struct parse_token_t +{ + enum parse_token_type_t type; // The type of the token as represented by the parser + enum parse_keyword_t keyword; // Any keyword represented by this token + bool has_dash_prefix; // Hackish: whether the source contains a dash prefix + size_t source_start; + size_t source_length; + + wcstring describe() const; + wcstring user_presentable_description() const; +}; + + +enum +{ + parse_flag_none = 0, + + /* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */ + parse_flag_continue_after_error = 1 << 0, + + /* Include comment tokens */ + parse_flag_include_comments = 1 << 1, + + /* Indicate that the tokenizer should accept incomplete tokens */ + parse_flag_accept_incomplete_tokens = 1 << 2, + + /* Indicate that the parser should not generate the terminate token, allowing an 'unfinished' tree where some nodes may have no productions. */ + parse_flag_leave_unterminated = 1 << 3 + +}; +typedef unsigned int parse_tree_flags_t; + +wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src); + +wcstring token_type_description(parse_token_type_t type); +wcstring keyword_description(parse_keyword_t type); + +/** Class for nodes of a parse tree */ +class parse_node_t +{ +public: + + /* Type of the node */ + enum parse_token_type_t type; + + /* Start in the source code */ + size_t source_start; + + /* Length of our range in the source code */ + size_t source_length; + + /* Parent */ + node_offset_t parent; + + /* Children */ + node_offset_t child_start; + uint8_t child_count; + + /* Which production was used */ + uint8_t production_idx; + + /* Description */ + wcstring describe(void) const; + + /* Constructor */ + explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0), production_idx(-1) + { + } + + node_offset_t child_offset(node_offset_t which) const + { + PARSE_ASSERT(which < child_count); + return child_start + which; + } + + /* Indicate if this node has a range of source code associated with it */ + bool has_source() const + { + return source_start != (size_t)(-1); + } + + /* Gets source for the node, or the empty string if it has no source */ + wcstring get_source(const wcstring &str) const + { + if (! has_source()) + return wcstring(); + else + return wcstring(str, this->source_start, this->source_length); + } + + /* Returns whether the given location is within the source range or at its end */ + bool location_in_or_at_end_of_source_range(size_t loc) const + { + return has_source() && source_start <= loc && loc - source_start <= source_length; + } +}; + + +/* The parse tree itself */ +class parse_node_tree_t : public std::vector +{ +public: + + /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. + */ + const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; + + /* Find the first direct child of the given node of the given type. asserts on failure + */ + const parse_node_t &find_child(const parse_node_t &parent, parse_token_type_t type) const; + + /* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */ + const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const; + + /* Returns the first ancestor of the given type, or NULL. */ + const parse_node_t *get_first_ancestor_of_type(const parse_node_t &node, parse_token_type_t desired_type) const; + + /* Find all the nodes of a given type underneath a given node, up to max_count of them */ + typedef std::vector parse_node_list_t; + parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type, size_t max_count = (size_t)(-1)) const; + + /* Finds the last node of a given type underneath a given node, or NULL if it could not be found. If parent is NULL, this finds the last node in the tree of that type. */ + const parse_node_t *find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent = NULL) const; + + /* Finds a node containing the given source location. If 'parent' is not NULL, it must be an ancestor. */ + const parse_node_t *find_node_matching_source_location(parse_token_type_t type, size_t source_loc, const parse_node_t *parent) const; + + /* Indicate if the given argument_list or arguments_or_redirections_list is a root list, or has a parent */ + bool argument_list_is_root(const parse_node_t &node) const; + + /* Utilities */ + + /* Given a plain statement, get the decoration (from the parent node), or none if there is no decoration */ + enum parse_statement_decoration_t decoration_for_plain_statement(const parse_node_t &node) const; + + /* Given a plain statement, get the command by reference (from the child node). Returns true if successful. Clears the command on failure. */ + bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const; + + /* Given a plain statement, return true if the statement is part of a pipeline. If include_first is set, the first command in a pipeline is considered part of it; otherwise only the second or additional commands are */ + bool statement_is_in_pipeline(const parse_node_t &node, bool include_first) const; + + /* Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd) */ + enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, int *out_fd, wcstring *out_target) const; + + /* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */ + const parse_node_t *header_node_for_block_statement(const parse_node_t &node) const; + + /* Given a node list (e.g. of type symbol_job_list) and a node type (e.g. symbol_job), return the next element of the given type in that list, and the tail (by reference). Returns NULL if we've exhausted the list. */ + const parse_node_t *next_node_in_node_list(const parse_node_t &node_list, parse_token_type_t item_type, const parse_node_t **list_tail) const; + + /* Given a job, return all of its statements. These are 'specific statements' (e.g. symbol_decorated_statement, not symbol_statement) */ + parse_node_list_t specific_statements_for_job(const parse_node_t &job) const; +}; + +/* The big entry point. Parse a string! */ +bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); + +/* Fish grammar: + +# A job_list is a list of jobs, separated by semicolons or newlines + + job_list = | + job job_list + job_list + +# A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation + + job = statement job_continuation + job_continuation = | + statement job_continuation + +# A statement is a normal command, or an if / while / and etc + + statement = boolean_statement | block_statement | if_statement | switch_statement | decorated_statement + +# A block is a conditional, loop, or begin/end + + if_statement = if_clause else_clause end_command arguments_or_redirections_list + if_clause = job STATEMENT_TERMINATOR job_list + else_clause = | + else_continuation + else_continuation = if_clause else_clause | + STATEMENT_TERMINATOR job_list + + switch_statement = SWITCH STATEMENT_TERMINATOR case_item_list end_command arguments_or_redirections_list + case_item_list = | + case_item case_item_list | + case_item_list + + case_item = CASE argument_list STATEMENT_TERMINATOR job_list + + block_statement = block_header job_list end_command arguments_or_redirections_list + block_header = for_header | while_header | function_header | begin_header + for_header = FOR var_name IN argument_list + while_header = WHILE job + begin_header = BEGIN + +# Functions take arguments, and require at least one (the name). No redirections allowed. + function_header = FUNCTION argument argument_list + +# A boolean statement is AND or OR or NOT + + boolean_statement = AND statement | OR statement | NOT statement + +# A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" + + decorated_statement = plain_statement | COMMAND plain_statement | BUILTIN plain_statement + plain_statement = arguments_or_redirections_list optional_background + + argument_list = | argument argument_list + + arguments_or_redirections_list = | + argument_or_redirection arguments_or_redirections_list + argument_or_redirection = argument | redirection + argument = + + redirection = + + terminator = | + + optional_background = | + + end_command = END + +*/ + +#endif diff --git a/parse_util.cpp b/parse_util.cpp index 3cf407d15..491e47328 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -38,18 +38,13 @@ #include "env.h" #include "signal.h" #include "wildcard.h" +#include "parse_tree.h" +#include "parser.h" /** - Maximum number of autoloaded items opf a specific type to keep in - memory at a time. + Error message for improper use of the exec builtin */ -#define AUTOLOAD_MAX 10 - -/** - Minimum time, in seconds, before an autoloaded item will be - unloaded -*/ -#define AUTOLOAD_MIN_AGE 60 +#define EXEC_ERR_MSG _(L"The '%ls' command can not be used in a pipeline") int parse_util_lineno(const wchar_t *str, size_t offset) { @@ -164,7 +159,7 @@ int parse_util_locate_cmdsubst(const wchar_t *in, wchar_t **begin, wchar_t **end CHECK(in, 0); - for (pos = (wchar_t *)in; *pos; pos++) + for (pos = const_cast(in); *pos; pos++) { if (prev != '\\') { @@ -240,6 +235,42 @@ int parse_util_locate_cmdsubst(const wchar_t *in, wchar_t **begin, wchar_t **end return 1; } +int parse_util_locate_cmdsubst_range(const wcstring &str, size_t *inout_cursor_offset, wcstring *out_contents, size_t *out_start, size_t *out_end, bool accept_incomplete) +{ + /* Clear the return values */ + out_contents->clear(); + *out_start = 0; + *out_end = str.size(); + + /* Nothing to do if the offset is at or past the end of the string. */ + if (*inout_cursor_offset >= str.size()) + return 0; + + /* Defer to the wonky version */ + const wchar_t * const buff = str.c_str(); + const wchar_t * const valid_range_start = buff + *inout_cursor_offset, *valid_range_end = buff + str.size(); + wchar_t *cmdsub_begin = NULL, *cmdsub_end = NULL; + int ret = parse_util_locate_cmdsubst(valid_range_start, &cmdsub_begin, &cmdsub_end, accept_incomplete); + if (ret > 0) + { + /* The command substitutions must not be NULL and must be in the valid pointer range, and the end must be bigger than the beginning */ + assert(cmdsub_begin != NULL && cmdsub_begin >= valid_range_start && cmdsub_begin <= valid_range_end); + assert(cmdsub_end != NULL && cmdsub_end > cmdsub_begin && cmdsub_end >= valid_range_start && cmdsub_end <= valid_range_end); + + /* Assign the substring to the out_contents */ + const wchar_t *interior_begin = cmdsub_begin + 1; + out_contents->assign(interior_begin, cmdsub_end - interior_begin); + + /* Return the start and end */ + *out_start = cmdsub_begin - buff; + *out_end = cmdsub_end - buff; + + /* Update the inout_cursor_offset. Note this may cause it to exceed str.size(), though overflow is not likely */ + *inout_cursor_offset = 1 + *out_end; + } + return ret; +} + void parse_util_cmdsubst_extent(const wchar_t *buff, size_t cursor_pos, const wchar_t **a, const wchar_t **b) { const wchar_t * const cursor = buff + cursor_pos; @@ -768,3 +799,335 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote) } return result; } + +/* We are given a parse tree, the index of a node within the tree, its indent, and a vector of indents the same size as the original source string. Set the indent correspdonding to the node's source range, if appropriate. + + trailing_indent is the indent for nodes with unrealized source, i.e. if I type 'if false ' then we have an if node with an empty job list (without source) but we want the last line to be indented anyways. + + switch statements also indent. + + max_visited_node_idx is the largest index we visited. +*/ +static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset_t node_idx, int node_indent, parse_token_type_t parent_type, std::vector *indents, int *trailing_indent, node_offset_t *max_visited_node_idx) +{ + /* Guard against incomplete trees */ + if (node_idx > tree.size()) + return; + + /* Update max_visited_node_idx */ + if (node_idx > *max_visited_node_idx) + *max_visited_node_idx = node_idx; + + /* We could implement this by utilizing the fish grammar. But there's an easy trick instead: almost everything that wraps a job list should be indented by 1. So just find all of the job lists. One exception is switch; the other exception is job_list itself: a job_list is a job and a job_list, and we want that child list to be indented the same as the parent. So just find all job_lists whose parent is not a job_list, and increment their indent by 1. */ + + const parse_node_t &node = tree.at(node_idx); + const parse_token_type_t node_type = node.type; + + /* Increment the indent if we are either a root job_list, or root case_item_list */ + const bool is_root_job_list = (node_type == symbol_job_list && parent_type != symbol_job_list); + const bool is_root_case_item_list = (node_type == symbol_case_item_list && parent_type != symbol_case_item_list); + if (is_root_job_list || is_root_case_item_list) + { + node_indent += 1; + } + + /* If we have source, store the trailing indent unconditionally. If we do not have source, store the trailing indent only if ours is bigger; this prevents the trailing "run" of terminal job lists from affecting the trailing indent. For example, code like this: + + if foo + + will be parsed as this: + + job_list + job + if_statement + job [if] + job_list [empty] + job_list [empty] + + There's two "terminal" job lists, and we want the innermost one. + + Note we are relying on the fact that nodes are in the same order as the source, i.e. an in-order traversal of the node tree also traverses the source from beginning to end. + */ + if (node.has_source() || node_indent > *trailing_indent) + { + *trailing_indent = node_indent; + } + + + /* Store the indent into the indent array */ + if (node.has_source()) + { + assert(node.source_start < indents->size()); + indents->at(node.source_start) = node_indent; + } + + + /* Recursive to all our children */ + for (node_offset_t idx = 0; idx < node.child_count; idx++) + { + /* Note we pass our type to our child, which becomes its parent node type */ + compute_indents_recursive(tree, node.child_start + idx, node_indent, node_type, indents, trailing_indent, max_visited_node_idx); + } +} + +std::vector parse_util_compute_indents(const wcstring &src) +{ + /* Make a vector the same size as the input string, which contains the indents. Initialize them to -1. */ + const size_t src_size = src.size(); + std::vector indents(src_size, -1); + + /* Parse the string. We pass continue_after_error to produce a forest; the trailing indent of the last node we visited becomes the input indent of the next. I.e. in the case of 'switch foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it were a case item list */ + parse_node_tree_t tree; + parse_tree_from_string(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */); + + /* Start indenting at the first node. If we have a parse error, we'll have to start indenting from the top again */ + node_offset_t start_node_idx = 0; + int last_trailing_indent = 0; + + while (start_node_idx < tree.size()) + { + /* The indent that we'll get for the last line */ + int trailing_indent = 0; + + /* Biggest offset we visited */ + node_offset_t max_visited_node_idx = 0; + + /* Invoke the recursive version. As a hack, pass job_list for the 'parent' token type, which will prevent the really-root job list from indenting */ + compute_indents_recursive(tree, start_node_idx, last_trailing_indent, symbol_job_list, &indents, &trailing_indent, &max_visited_node_idx); + + /* We may have more to indent. The trailing indent becomes our current indent. Start at the node after the last we visited. */ + last_trailing_indent = trailing_indent; + start_node_idx = max_visited_node_idx + 1; + } + + int last_indent = 0; + for (size_t i=0; ipush_back(error); + return true; +} + +/** + Returns 1 if the specified command is a builtin that may not be used in a pipeline +*/ +static int parser_is_pipe_forbidden(const wcstring &word) +{ + return contains(word, + L"exec", + L"case", + L"break", + L"return", + L"continue"); +} + +// Check if the first argument under the given node is --help +static bool first_argument_is_help(const parse_node_tree_t &node_tree, const parse_node_t &node, const wcstring &src) +{ + bool is_help = false; + const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument, 1); + if (! arg_nodes.empty()) + { + // Check the first argument only + const parse_node_t &arg = *arg_nodes.at(0); + const wcstring first_arg_src = arg.get_source(src); + is_help = parser_t::is_help(first_arg_src.c_str(), 3); + } + return is_help; +} + +parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, parse_error_list_t *out_errors) +{ + parse_node_tree_t node_tree; + parse_error_list_t parse_errors; + + // Whether we encountered a parse error + bool errored = false; + + // Whether we encountered an unclosed block + // We detect this via an 'end_command' block without source + bool has_unclosed_block = false; + + // Parse the input string into a parse tree + // Some errors are detected here + bool parsed = parse_tree_from_string(buff_src, parse_flag_leave_unterminated, &node_tree, &parse_errors); + if (! parsed) + { + errored = true; + } + + // Expand all commands + // Verify 'or' and 'and' not used inside pipelines + // Verify pipes via parser_is_pipe_forbidden + // Verify return only within a function + + if (! errored) + { + const size_t node_tree_size = node_tree.size(); + for (size_t i=0; i < node_tree_size; i++) + { + const parse_node_t &node = node_tree.at(i); + if (node.type == symbol_end_command && ! node.has_source()) + { + // an 'end' without source is an unclosed block + has_unclosed_block = true; + } + else if (node.type == symbol_boolean_statement) + { + // 'or' and 'and' can be in a pipeline, as long as they're first + // These numbers 0 and 1 correspond to productions for boolean_statement. This should be cleaned up. + bool is_and = (node.production_idx == 0), is_or = (node.production_idx == 1); + if ((is_and || is_or) && node_tree.statement_is_in_pipeline(node, false /* don't count first */)) + { + errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG, is_and ? L"and" : L"or"); + } + } + else if (node.type == symbol_plain_statement) + { + wcstring command; + if (node_tree.command_for_plain_statement(node, buff_src, &command)) + { + // Check that we can expand the command + if (! expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) + { + errored = append_syntax_error(&parse_errors, node, ILLEGAL_CMD_ERR_MSG, command.c_str()); + } + + // Check that pipes are sound + if (! errored && parser_is_pipe_forbidden(command)) + { + // forbidden commands cannot be in a pipeline at all + if (node_tree.statement_is_in_pipeline(node, true /* count first */)) + { + errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG, command.c_str()); + } + } + + // Check that we don't return from outside a function + // But we allow it if it's 'return --help' + if (! errored && command == L"return") + { + const parse_node_t *ancestor = &node; + bool found_function = false; + while (ancestor != NULL) + { + const parse_node_t *possible_function_header = node_tree.header_node_for_block_statement(*ancestor); + if (possible_function_header != NULL && possible_function_header->type == symbol_function_header) + { + found_function = true; + break; + } + ancestor = node_tree.get_parent(*ancestor); + + } + if (! found_function && ! first_argument_is_help(node_tree, node, buff_src)) + { + errored = append_syntax_error(&parse_errors, node, INVALID_RETURN_ERR_MSG); + } + } + + // Check that we don't break or continue from outside a loop + if (! errored && (command == L"break" || command == L"continue")) + { + // Walk up until we hit a 'for' or 'while' loop. If we hit a function first, stop the search; we can't break an outer loop from inside a function. + // This is a little funny because we can't tell if it's a 'for' or 'while' loop from the ancestor alone; we need the header. That is, we hit a block_statement, and have to check its header. + bool found_loop = false, end_search = false; + const parse_node_t *ancestor = &node; + while (ancestor != NULL && ! end_search) + { + const parse_node_t *loop_or_function_header = node_tree.header_node_for_block_statement(*ancestor); + if (loop_or_function_header != NULL) + { + switch (loop_or_function_header->type) + { + case symbol_while_header: + case symbol_for_header: + // this is a loop header, so we can break or continue + found_loop = true; + end_search = true; + break; + + case symbol_function_header: + // this is a function header, so we cannot break or continue. We stop our search here. + found_loop = false; + end_search = true; + break; + + default: + // most likely begin / end style block, which makes no difference + break; + } + } + ancestor = node_tree.get_parent(*ancestor); + } + + if (! found_loop && ! first_argument_is_help(node_tree, node, buff_src)) + { + errored = append_syntax_error(&parse_errors, node, (command == L"break" ? INVALID_BREAK_ERR_MSG : INVALID_CONTINUE_ERR_MSG)); + } + } + } + } + } + } + + parser_test_error_bits_t res = 0; + + if (errored) + res |= PARSER_TEST_ERROR; + + if (has_unclosed_block) + res |= PARSER_TEST_INCOMPLETE; + + if (out_errors) + { + out_errors->swap(parse_errors); + } + + return res; + +} diff --git a/parse_util.h b/parse_util.h index 24147e180..862e5a621 100644 --- a/parse_util.h +++ b/parse_util.h @@ -8,6 +8,7 @@ #define FISH_PARSE_UTIL_H #include "autoload.h" +#include "parse_tree.h" #include #include #include @@ -27,6 +28,25 @@ int parse_util_locate_cmdsubst(const wchar_t *in, wchar_t **end, bool accept_incomplete); +/** + Alternative API. Iterate over command substitutions. + + \param str the string to search for subshells + \param inout_cursor_offset On input, the location to begin the search. On output, either the end of the string, or just after the closed-paren. + \param out_contents On output, the contents of the command substitution + \param out_start On output, the offset of the start of the command substitution (open paren) + \param out_end On output, the offset of the end of the command substitution (close paren), or the end of the string if it was incomplete + \param accept_incomplete whether to permit missing closing parenthesis + \return -1 on syntax error, 0 if no subshells exist and 1 on sucess +*/ + +int parse_util_locate_cmdsubst_range(const wcstring &str, + size_t *inout_cursor_offset, + wcstring *out_contents, + size_t *out_start, + size_t *out_end, + bool accept_incomplete); + /** Find the beginning and end of the command substitution under the cursor. If no subshell is found, the entire string is returned. If @@ -140,5 +160,9 @@ void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_ */ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote); +/** Given a string, parse it as fish code and then return the indents. The return value has the same size as the string */ +std::vector parse_util_compute_indents(const wcstring &src); + +parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, parse_error_list_t *out_errors = NULL); #endif diff --git a/parser.cpp b/parser.cpp index 62b45dcc3..ffcec02ad 100644 --- a/parser.cpp +++ b/parser.cpp @@ -44,11 +44,8 @@ The fish parser. Contains functions for parsing and evaluating code. #include "path.h" #include "signal.h" #include "complete.h" - -/** - Maximum number of function calls, i.e. recursion depth. -*/ -#define MAX_RECURSION_DEPTH 128 +#include "parse_tree.h" +#include "parse_execution.h" /** Error message for unknown builtin @@ -76,36 +73,14 @@ The fish parser. Contains functions for parsing and evaluating code. */ #define INFINITE_RECURSION_ERR_MSG _( L"The function calls itself immediately, which would result in an infinite loop.") -/** - Error message on reaching maximum recursion depth -*/ -#define OVERFLOW_RECURSION_ERR_MSG _( L"Maximum recursion depth reached. Accidental infinite loop?") - /** Error message used when the end of a block can't be located */ #define BLOCK_END_ERR_MSG _( L"Could not locate end of block. The 'end' command is missing, misspelled or a ';' is missing.") -/** - Error message on reaching maximum number of block calls -*/ -#define BLOCK_ERR_MSG _( L"Maximum number of nested blocks reached.") - -/** - Error message when a non-string token is found when expecting a command name -*/ +/** Error message when a non-string token is found when expecting a command name */ #define CMD_ERR_MSG _( L"Expected a command name, got token of type '%ls'") -/** - Error message when a non-string token is found when expecting a command name -*/ -#define CMD_OR_ERR_MSG _( L"Expected a command name, got token of type '%ls'. Did you mean 'COMMAND; or COMMAND'? See the help section for the 'or' builtin command by typing 'help or'.") - -/** - Error message when a non-string token is found when expecting a command name -*/ -#define CMD_AND_ERR_MSG _( L"Expected a command name, got token of type '%ls'. Did you mean 'COMMAND; and COMMAND'? See the help section for the 'and' builtin command by typing 'help and'.") - /** Error message when encountering an illegal command name */ @@ -114,7 +89,7 @@ The fish parser. Contains functions for parsing and evaluating code. /** Error message when encountering an illegal file descriptor */ -#define ILLEGAL_FD_ERR_MSG _( L"Illegal file descriptor '%ls'") +#define ILLEGAL_FD_ERR_MSG _( L"Illegal file descriptor in redirection '%ls'") /** Error message for wildcards with no matches @@ -166,11 +141,6 @@ The fish parser. Contains functions for parsing and evaluating code. */ #define INVALID_REDIRECTION_ERR_MSG _( L"Encountered redirection when expecting a command name. Fish does not allow a redirection operation before a command.") -/** - Error for evaluating null pointer -*/ -#define EVAL_NULL_ERR_MSG _( L"Tried to evaluate null pointer." ) - /** Error for evaluating in illegal scope */ @@ -316,18 +286,25 @@ static const struct block_lookup_entry block_lookup[]= static bool job_should_skip_elseif(const job_t *job, const block_t *current_block); +// Given a file path, return something nicer. Currently we just "unexpand" tildes. +static wcstring user_presentable_path(const wcstring &path) +{ + return replace_home_directory_with_tilde(path); +} + + parser_t::parser_t(enum parser_type_t type, bool errors) : parser_type(type), show_errors(errors), error_code(0), err_pos(0), + cancellation_requested(false), current_tokenizer(NULL), current_tokenizer_pos(0), job_start_pos(0), eval_level(-1), block_io(shared_ptr()) { - } /* A pointer to the principal parser (which is a static local) */ @@ -350,6 +327,8 @@ void parser_t::skip_all_blocks(void) /* Tell all blocks to skip */ if (s_principal_parser) { + s_principal_parser->cancellation_requested = true; + //write(2, "Cancelling blocks\n", strlen("Cancelling blocks\n")); for (size_t i=0; i < s_principal_parser->block_count(); i++) { @@ -425,6 +404,12 @@ void parser_t::pop_block() delete old; } +void parser_t::pop_block(const block_t *expected) +{ + assert(expected == this->current_block()); + this->pop_block(); +} + const wchar_t *parser_t::get_block_desc(int block) const { for (size_t i=0; block_lookup[i].desc; i++) @@ -460,20 +445,6 @@ block_t *parser_t::current_block() return block_stack.empty() ? NULL : block_stack.back(); } - -/** - Returns 1 if the specified command is a builtin that may not be used in a pipeline -*/ -static int parser_is_pipe_forbidden(const wcstring &word) -{ - return contains(word, - L"exec", - L"case", - L"break", - L"return", - L"continue"); -} - /** Search the text for the end of the current block */ @@ -575,19 +546,20 @@ void parser_t::allow_function() forbidden_function.pop_back(); } -void parser_t::error(int ec, int p, const wchar_t *str, ...) +void parser_t::error(int ec, size_t p, const wchar_t *str, ...) { va_list va; CHECK(str,); error_code = ec; - err_pos = p; + + // note : p may be -1 + err_pos = static_cast(p); va_start(va, str); err_buff = vformat_string(str, va); va_end(va); - } /** @@ -742,21 +714,19 @@ void parser_t::print_errors_stderr() } -int parser_t::eval_args(const wchar_t *line, std::vector &args) +void parser_t::eval_args(const wchar_t *line, std::vector &args) { - expand_flags_t eflags = 0; if (! show_errors) eflags |= EXPAND_NO_DESCRIPTIONS; if (this->parser_type != PARSER_TYPE_GENERAL) eflags |= EXPAND_SKIP_CMDSUBST; - int do_loop=1; + bool do_loop=1; - CHECK(line, 1); -// CHECK( args, 1 ); + if (! line) return; - // PCA we need to suppress calling proc_push_interactive off of the main thread. I'm not sure exactly what it does. + // PCA we need to suppress calling proc_push_interactive off of the main thread. if (this->parser_type == PARSER_TYPE_GENERAL) proc_push_interactive(0); @@ -823,11 +793,9 @@ int parser_t::eval_args(const wchar_t *line, std::vector &args) if (this->parser_type == PARSER_TYPE_GENERAL) proc_pop_interactive(); - - return 1; } -void parser_t::stack_trace(size_t block_idx, wcstring &buff) +void parser_t::stack_trace(size_t block_idx, wcstring &buff) const { /* Check if we should end the recursion @@ -872,13 +840,13 @@ void parser_t::stack_trace(size_t block_idx, wcstring &buff) { const source_block_t *sb = static_cast(b); const wchar_t *source_dest = sb->source_file; - append_format(buff, _(L"in . (source) call of file '%ls',\n"), source_dest); + append_format(buff, _(L"from sourcing file %ls\n"), user_presentable_path(source_dest).c_str()); break; } case FUNCTION_CALL: { const function_block_t *fb = static_cast(b); - append_format(buff, _(L"in function '%ls',\n"), fb->name.c_str()); + append_format(buff, _(L"in function '%ls'\n"), fb->name.c_str()); break; } case SUBST: @@ -896,14 +864,14 @@ void parser_t::stack_trace(size_t block_idx, wcstring &buff) if (file) { append_format(buff, - _(L"\tcalled on line %d of file '%ls',\n"), + _(L"\tcalled on line %d of file %ls\n"), b->src_lineno, - file); + user_presentable_path(file).c_str()); } else { append_format(buff, - _(L"\tcalled on standard input,\n")); + _(L"\tcalled on standard input\n")); } if (b->type() == FUNCTION_CALL) @@ -989,10 +957,7 @@ int parser_t::line_number_of_character_at_offset(size_t idx) const const wchar_t *parser_t::current_filename() const { - /* We query a global array for the current file name, so it only makes sense to ask this on the principal parser. */ ASSERT_IS_MAIN_THREAD(); - assert(this == &principal_parser()); - for (size_t i=0; i < this->block_count(); i++) { @@ -1003,7 +968,13 @@ const wchar_t *parser_t::current_filename() const return function_get_definition_file(fb->name); } } - return reader_current_filename(); + + /* We query a global array for the current file name, but only do that if we are the principal parser */ + if (this == &principal_parser()) + { + return reader_current_filename(); + } + return NULL; } /** @@ -1171,7 +1142,7 @@ const wchar_t *parser_t::get_buffer() const } -int parser_t::is_help(const wchar_t *s, int min_match) const +int parser_t::is_help(const wchar_t *s, int min_match) { CHECK(s, 0); @@ -1183,9 +1154,16 @@ int parser_t::is_help(const wchar_t *s, int min_match) const (len >= (size_t)min_match && (wcsncmp(L"--help", s, len) == 0)); } -job_t *parser_t::job_create() +void parser_t::job_add(job_t *job) { - job_t *res = new job_t(acquire_job_id(), this->block_io); + assert(job != NULL); + assert(job->first_process != NULL); + this->my_job_list.push_front(job); +} + +job_t *parser_t::job_create(const io_chain_t &io) +{ + job_t *res = new job_t(acquire_job_id(), io); this->my_job_list.push_front(res); job_set_flag(res, @@ -1215,7 +1193,7 @@ void parser_t::job_promote(job_t *job) { signal_block(); - job_list_t::iterator loc = std::find(my_job_list.begin(), my_job_list.end(), job); + job_list_t::iterator loc = std::find(my_job_list.begin(), my_job_list.end(), job); assert(loc != my_job_list.end()); /* Move the job to the beginning */ @@ -1329,6 +1307,7 @@ void parser_t::parse_job_argument_list(process_t *p, case TOK_BACKGROUND: { job_set_flag(j, JOB_FOREGROUND, 0); + // PCA note fall through, this is deliberate. The background modifier & terminates a command } case TOK_END: @@ -1636,17 +1615,6 @@ void parser_t::parse_job_argument_list(process_t *p, p->set_io_chain(process_io_chain); } -/* - static void print_block_stack( block_t *b ) - { - if( !b ) - return; - print_block_stack( b->outer ); - - debug( 0, L"Block type %ls, skip: %d", parser_get_block_desc( b->type ), b->skip ); - } -*/ - /** Fully parse a single job. Does not call exec on it, but any command substitutions in the job will be executed. @@ -1656,9 +1624,7 @@ void parser_t::parse_job_argument_list(process_t *p, f \return 1 on success, 0 on error */ -int parser_t::parse_job(process_t *p, - job_t *j, - tokenizer_t *tok) +int parser_t::parse_job(process_t *p, job_t *j, tokenizer_t *tok) { std::vector args; // The list that will become the argv array for the program int use_function = 1; // May functions be considered when checking what action this command represents @@ -1714,8 +1680,7 @@ int parser_t::parse_job(process_t *p, { error(SYNTAX_ERROR, tok_get_pos(tok), - CMD_OR_ERR_MSG, - tok_get_desc(tok_last_type(tok))); + CMD_OR_ERR_MSG); } else { @@ -1946,9 +1911,9 @@ int parser_t::parse_job(process_t *p, /* Check if we have reached the maximum recursion depth */ - if (forbidden_function.size() > MAX_RECURSION_DEPTH) + if (forbidden_function.size() > FISH_MAX_STACK_DEPTH) { - error(SYNTAX_ERROR, tok_get_pos(tok), OVERFLOW_RECURSION_ERR_MSG); + error(SYNTAX_ERROR, tok_get_pos(tok), CALL_STACK_LIMIT_EXCEEDED_ERR_MSG); } else { @@ -1956,7 +1921,7 @@ int parser_t::parse_job(process_t *p, } } } - args.push_back(completion_t(nxt)); + append_completion(args, nxt); } if (error_code == 0) @@ -1999,8 +1964,8 @@ int parser_t::parse_job(process_t *p, if (use_implicit_cd) { args.clear(); - args.push_back(completion_t(L"cd")); - args.push_back(completion_t(implicit_cd_path)); + append_completion(args, L"cd"); + append_completion(args, implicit_cd_path); /* If we have defined a wrapper around cd, use it, otherwise use the cd builtin */ if (use_function && function_exists(L"cd")) @@ -2381,7 +2346,7 @@ void parser_t::eval_job(tokenizer_t *tok) { case TOK_STRING: { - job_t *j = this->job_create(); + job_t *j = this->job_create(this->block_io); job_set_flag(j, JOB_FOREGROUND, 1); job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL)); job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL) \ @@ -2454,7 +2419,9 @@ void parser_t::eval_job(tokenizer_t *tok) { int was_builtin = 0; if (j->first_process->type==INTERNAL_BUILTIN && !j->first_process->next) + { was_builtin = 1; + } scoped_push tokenizer_pos_push(¤t_tokenizer_pos, job_begin_pos); exec_job(*this, j); @@ -2550,8 +2517,7 @@ void parser_t::eval_job(tokenizer_t *tok) { error(SYNTAX_ERROR, tok_get_pos(tok), - CMD_AND_ERR_MSG, - tok_get_desc(tok_last_type(tok))); + CMD_AND_ERR_MSG); } else { @@ -2586,12 +2552,115 @@ void parser_t::eval_job(tokenizer_t *tok) } job_reap(0); +} + +int parser_t::eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum block_type_t block_type) +{ + CHECK_BLOCK(1); + + if (block_type != TOP && block_type != SUBST) + { + debug(1, INVALID_SCOPE_ERR_MSG, parser_t::get_block_desc(block_type)); + bugreport(); + return 1; + } + + /* Parse the source into a tree, if we can */ + parse_node_tree_t tree; + if (! parse_tree_from_string(cmd, parse_flag_none, &tree, NULL)) + { + return 1; + } + + /* Append to the execution context stack */ + parse_execution_context_t *ctx = new parse_execution_context_t(tree, cmd, this); + execution_contexts.push_back(ctx); + + /* Execute the first node */ + int result = 1; + if (! tree.empty()) + { + result = this->eval_block_node(0, io, block_type); + } + + /* Clean up the execution context stack */ + assert(! execution_contexts.empty() && execution_contexts.back() == ctx); + execution_contexts.pop_back(); + delete ctx; + + return 0; +} + +int parser_t::eval_block_node(node_offset_t node_idx, const io_chain_t &io, enum block_type_t block_type) +{ + // Paranoia. It's a little frightening that we're given only a node_idx and we interpret this in the topmost execution context's tree. What happens if these were to be interleaved? Fortunately that cannot happen. + parse_execution_context_t *ctx = execution_contexts.back(); + assert(ctx != NULL); + + CHECK_BLOCK(1); + + /* Handle cancellation requests. If our block stack is currently empty, then we already did successfully cancel (or there was nothing to cancel); clear the flag. If our block stack is not empty, we are still in the process of cancelling; refuse to evaluate anything */ + if (this->cancellation_requested) + { + if (! block_stack.empty()) + { + return 1; + } + else + { + this->cancellation_requested = false; + } + } + + /* Only certain blocks are allowed */ + if ((block_type != TOP) && + (block_type != SUBST)) + { + debug(1, + INVALID_SCOPE_ERR_MSG, + parser_t::get_block_desc(block_type)); + bugreport(); + return 1; + } + + /* Not sure why we reap jobs here */ + job_reap(0); + + /* Start it up */ + const block_t * const start_current_block = current_block(); + block_t *scope_block = new scope_block_t(block_type); + this->push_block(scope_block); + int result = ctx->eval_node_at_offset(node_idx, scope_block, io); + + /* Clean up the block stack */ + this->pop_block(); + while (start_current_block != current_block()) + { + if (current_block() == NULL) + { + debug(0, + _(L"End of block mismatch. Program terminating.")); + bugreport(); + FATAL_EXIT(); + break; + } + this->pop_block(); + } + + /* Reap again */ + job_reap(0); + + return result; } -int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type_t block_type) +int parser_t::eval(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type) { - const wchar_t * const cmd = cmdStr.c_str(); + + if (parser_use_ast()) + return this->eval_new_parser(cmd_str, io, block_type); + + const wchar_t * const cmd = cmd_str.c_str(); size_t forbid_count; int code; const block_t *start_current_block = current_block(); @@ -2614,13 +2683,6 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type debug(4, L"eval: %ls", cmd); - if (!cmd) - { - debug(1, - EVAL_NULL_ERR_MSG); - bugreport(); - return 1; - } if ((block_type != TOP) && (block_type != SUBST)) @@ -2638,6 +2700,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type tokenizer_t local_tokenizer(cmd, 0); scoped_push tokenizer_push(¤t_tokenizer, &local_tokenizer); + scoped_push tokenizer_pos_push(¤t_tokenizer_pos, 0); error_code = 0; @@ -2646,7 +2709,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type while (tok_has_next(current_tokenizer) && !error_code && !sanity_check() && - !exit_status()) + !shell_is_exiting()) { this->eval_job(current_tokenizer); event_fire(NULL); @@ -2665,7 +2728,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type break; } - if ((!error_code) && (!exit_status()) && (!proc_get_last_status())) + if ((!error_code) && (!shell_is_exiting()) && (!proc_get_last_status())) { //debug( 2, L"Status %d\n", proc_get_last_status() ); @@ -2779,7 +2842,7 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha case 1: { - wchar_t *subst = wcsndup(paran_begin+1, paran_end-paran_begin-1); + const wcstring subst(paran_begin + 1, paran_end); wcstring tmp; tmp.append(arg_cpy, paran_begin - arg_cpy); @@ -2788,17 +2851,16 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha // debug( 1, L"%ls -> %ls %ls", arg_cpy, subst, tmp.buff ); - err |= parser_t::test(subst, 0, out, prefix); + parse_error_list_t errors; + err |= parse_util_detect_errors(subst, &errors); + if (out && ! errors.empty()) + { + out->append(parse_errors_description(errors, subst, prefix)); + } - free(subst); free(arg_cpy); arg_cpy = wcsdup(tmp.c_str()); - /* - Do _not_ call sb_destroy on this stringbuffer - it's - buffer is used as the new 'arg_cpy'. It is free'd at - the end of the loop. - */ break; } } @@ -2923,819 +2985,37 @@ struct block_info_t { int position; //tokenizer position block_type_t type; //type of the block - int indentation; //indentation associated with the block - - bool has_had_case; //if we are a switch, whether we've encountered a case }; -int parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const wchar_t *prefix) +void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &errors, wcstring *output) const { - ASSERT_IS_MAIN_THREAD(); - - /* - Set to one if a command name has been given for the currently - parsed process specification - */ - int had_cmd=0; - int err=0; - int unfinished = 0; - - // These are very nearly stacks, but sometimes we have to inspect non-top elements (e.g. return) - std::vector block_infos; - int indentation_sum = 0; //sum of indentation in block_infos - int res = 0; - - /* - Set to 1 if the current command is inside a pipeline - */ - int is_pipeline = 0; - - /* - Set to one if the currently specified process can not be used inside a pipeline - */ - int forbid_pipeline = 0; - - /* - Set to one if an additional process specification is needed - */ - bool needs_cmd = false; - - /* - Counter on the number of arguments this function has encountered - so far. Is set to -1 when the count is unknown, i.e. after - encountering an argument that contains substitutions that can - expand to more/less arguemtns then 1. - */ - int arg_count=0; - - /* - The currently validated command. - */ - wcstring command; - bool has_command = false; - - CHECK(buff, 1); - - if (block_level) + assert(output != NULL); + if (! errors.empty()) { - size_t len = wcslen(buff); - for (size_t i=0; icurrent_filename(); + if (filename) { - block_level[i] = -1; + append_format(*output, _(L"fish: line %lu of %ls:\n"), which_line, user_presentable_path(filename).c_str()); } - + else + { + output->append(L"fish: "); + } + + // Don't include the caret if we're interactive, this is the first line of text, and our source is at its beginning, because then it's obvious + bool skip_caret = (get_is_interactive() && which_line == 1 && err.source_start == 0); + + output->append(err.describe(src, skip_caret)); + output->push_back(L'\n'); + + this->stack_trace(0, *output); } - - tokenizer_t tok(buff, 0); - - scoped_push tokenizer_push(¤t_tokenizer, &tok); - scoped_push tokenizer_pos_push(¤t_tokenizer_pos); - - for (;; tok_next(&tok)) - { - current_tokenizer_pos = tok_get_pos(&tok); - - int last_type = tok_last_type(&tok); - int end_of_cmd = 0; - - switch (last_type) - { - case TOK_STRING: - { - if (!had_cmd) - { - int mark = tok_get_pos(&tok); - had_cmd = 1; - arg_count=0; - - command = tok_last(&tok); - - // Pass SKIP_HOME_DIRECTORIES for https://github.com/fish-shell/fish-shell/issues/512 - has_command = expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_HOME_DIRECTORIES); - if (! has_command) - { - command = L""; - err=1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - ILLEGAL_CMD_ERR_MSG, - tok_last(&tok)); - - print_errors(*out, prefix); - } - break; - } - - if (needs_cmd) - { - /* - end is not a valid command when a followup - command is needed, such as after 'and' or - 'while' - */ - if (contains(command, - L"end")) - { - err=1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - COND_ERR_MSG); - - print_errors(*out, prefix); - } - } - - needs_cmd = false; - } - - /* - Decrement block count on end command - */ - if (command == L"end") - { - tok_next(&tok); - tok_set_pos(&tok, mark); - - /* Test that end is not used when not inside any block */ - if (block_infos.empty()) - { - err = 1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - INVALID_END_ERR_MSG); - print_errors(*out, prefix); - const wcstring h = builtin_help_get(*this, L"end"); - if (! h.empty()) - append_format(*out, L"%ls", h.c_str()); - } - } - else - { - indentation_sum -= block_infos.back().indentation; - block_infos.pop_back(); - - } - } - - /* - Store the block level. This needs to be done - _after_ checking for end commands, but _before_ - checking for block opening commands. - */ - if (block_level != NULL) - { - int indentation_adjust = 0; - if (command == L"else") - { - // if or else if goes back - indentation_adjust = -1; - } - else if (command == L"case") - { - if (! block_infos.empty() && block_infos.back().type == SWITCH) - { - // mark that we've encountered a case, and increase the indentation - // by doing this now, we avoid overly indenting the first case as the user types it - if (! block_infos.back().has_had_case) - { - block_infos.back().has_had_case = true; - block_infos.back().indentation += 1; - indentation_sum += 1; - } - // unindent this case - indentation_adjust = -1; - } - } - - block_level[tok_get_pos(&tok)] = indentation_sum + indentation_adjust; - } - - /* - Handle block commands - */ - if (parser_keywords_is_block(command)) - { - struct block_info_t info = {current_tokenizer_pos, parser_get_block_type(command), 1 /* indent */}; - block_infos.push_back(info); - indentation_sum += info.indentation; - tok_next(&tok); - tok_set_pos(&tok, mark); - } - - /* - If parser_keywords_is_subcommand is true, the command - accepts a second command as it's first - argument. If parser_skip_arguments is true, the - second argument is optional. - */ - if (parser_keywords_is_subcommand(command) && !parser_keywords_skip_arguments(command)) - { - needs_cmd = true; - had_cmd = 0; - } - - if (contains(command, - L"or", - L"and")) - { - /* - 'or' and 'and' can not be used inside pipelines - */ - if (is_pipeline) - { - err=1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - EXEC_ERR_MSG); - - print_errors(*out, prefix); - - } - } - } - - /* - There are a lot of situations where pipelines - are forbidden, including when using the exec - builtin. - */ - if (parser_is_pipe_forbidden(command)) - { - if (is_pipeline) - { - err=1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - EXEC_ERR_MSG); - - print_errors(*out, prefix); - - } - } - forbid_pipeline = 1; - } - - /* - Test that the case builtin is only used directly in a switch block - */ - if (command == L"case") - { - if (block_infos.empty() || block_infos.back().type != SWITCH) - { - err=1; - - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - INVALID_CASE_ERR_MSG); - - print_errors(*out, prefix); - const wcstring h = builtin_help_get(*this, L"case"); - if (h.size()) - append_format(*out, L"%ls", h.c_str()); - } - } - } - - /* - Test that the return bultin is only used within function definitions - */ - if (command == L"return") - { - bool found_func = false; - size_t block_idx = block_infos.size(); - while (block_idx--) - { - if (block_infos.at(block_idx).type == FUNCTION_DEF) - { - found_func = true; - break; - } - } - - if (!found_func) - { - /* - Peek to see if the next argument is - --help, in which case we'll allow it to - show the help. - */ - - int old_pos = tok_get_pos(&tok); - int is_help = 0; - - tok_next(&tok); - if (tok_last_type(&tok) == TOK_STRING) - { - wcstring first_arg = tok_last(&tok); - if (expand_one(first_arg, EXPAND_SKIP_CMDSUBST) && parser_t::is_help(first_arg.c_str(), 3)) - { - is_help = 1; - } - } - - tok_set_pos(&tok, old_pos); - - if (!is_help) - { - err=1; - - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - INVALID_RETURN_ERR_MSG); - print_errors(*out, prefix); - } - } - } - } - - - /* - Test that break and continue are only used within loop blocks - */ - if (contains(command, L"break", L"continue")) - { - bool found_loop = false; - size_t block_idx = block_infos.size(); - while (block_idx--) - { - block_type_t type = block_infos.at(block_idx).type; - if (type == WHILE || type == FOR) - { - found_loop = true; - break; - } - } - - if (!found_loop) - { - /* - Peek to see if the next argument is - --help, in which case we'll allow it to - show the help. - */ - - int old_pos = tok_get_pos(&tok); - int is_help = 0; - - tok_next(&tok); - if (tok_last_type(&tok) == TOK_STRING) - { - wcstring first_arg = tok_last(&tok); - if (expand_one(first_arg, EXPAND_SKIP_CMDSUBST) && parser_t::is_help(first_arg.c_str(), 3)) - { - is_help = 1; - } - } - - tok_set_pos(&tok, old_pos); - - if (!is_help) - { - err=1; - - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - INVALID_LOOP_ERR_MSG); - print_errors(*out, prefix); - } - } - } - } - - /* - Test that else and else-if are only used directly in an if-block - */ - if (command == L"else") - { - if (block_infos.empty() || block_infos.back().type != IF) - { - err=1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - INVALID_ELSE_ERR_MSG, - command.c_str()); - - print_errors(*out, prefix); - } - } - } - } - else - { - err |= parser_test_argument(tok_last(&tok), out, prefix, tok_get_pos(&tok)); - - /* If possible, keep track of number of supplied arguments */ - if (arg_count >= 0 && expand_is_clean(tok_last(&tok))) - { - arg_count++; - } - else - { - arg_count = -1; - } - - if (has_command) - { - - /* - Try to make sure the second argument to 'for' is 'in' - */ - if (command == L"for") - { - if (arg_count == 1) - { - - if (wcsvarname(tok_last(&tok))) - { - - err = 1; - - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - BUILTIN_FOR_ERR_NAME, - L"for", - tok_last(&tok)); - - print_errors(*out, prefix); - } - } - - } - else if (arg_count == 2) - { - if (wcscmp(tok_last(&tok), L"in") != 0) - { - err = 1; - - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - BUILTIN_FOR_ERR_IN, - L"for"); - - print_errors(*out, prefix); - } - } - } - } - else if (command == L"else") - { - if (arg_count == 1) - { - /* Any second argument must be "if" */ - if (wcscmp(tok_last(&tok), L"if") != 0) - { - err = 1; - - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - BUILTIN_ELSEIF_ERR_ARGUMENT, - L"else"); - print_errors(*out, prefix); - } - } - else - { - /* Successfully detected "else if". Now we need a new command. */ - needs_cmd = true; - had_cmd = false; - } - } - } - } - - } - - break; - } - - case TOK_REDIRECT_OUT: - case TOK_REDIRECT_IN: - case TOK_REDIRECT_APPEND: - case TOK_REDIRECT_FD: - case TOK_REDIRECT_NOCLOB: - { - if (!had_cmd) - { - err = 1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - INVALID_REDIRECTION_ERR_MSG); - print_errors(*out, prefix); - } - } - break; - } - - case TOK_END: - { - if (needs_cmd && !had_cmd) - { - err = 1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - CMD_ERR_MSG, - tok_get_desc(tok_last_type(&tok))); - print_errors(*out, prefix); - } - } - needs_cmd = false; - had_cmd = 0; - is_pipeline=0; - forbid_pipeline=0; - end_of_cmd = 1; - - break; - } - - case TOK_PIPE: - { - if (!had_cmd) - { - err=1; - if (out) - { - if (tok_get_pos(&tok)>0 && buff[tok_get_pos(&tok)-1] == L'|') - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - CMD_OR_ERR_MSG, - tok_get_desc(tok_last_type(&tok))); - - } - else - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - CMD_ERR_MSG, - tok_get_desc(tok_last_type(&tok))); - } - - print_errors(*out, prefix); - } - } - else if (forbid_pipeline) - { - err=1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - EXEC_ERR_MSG); - - print_errors(*out, prefix); - } - } - else - { - needs_cmd = true; - is_pipeline=1; - had_cmd=0; - end_of_cmd = 1; - - } - break; - } - - case TOK_BACKGROUND: - { - if (!had_cmd) - { - err = 1; - if (out) - { - if (tok_get_pos(&tok)>0 && buff[tok_get_pos(&tok)-1] == L'&') - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - CMD_AND_ERR_MSG, - tok_get_desc(tok_last_type(&tok))); - - } - else - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - CMD_ERR_MSG, - tok_get_desc(tok_last_type(&tok))); - } - - print_errors(*out, prefix); - } - } - - had_cmd = 0; - end_of_cmd = 1; - - break; - } - - case TOK_ERROR: - default: - if (tok_get_error(&tok) == TOK_UNTERMINATED_QUOTE) - { - unfinished = 1; - } - else - { - // Only print errors once - if (out && ! err) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - TOK_ERR_MSG, - tok_last(&tok)); - - - print_errors(*out, prefix); - } - err = 1; - } - - break; - } - - if (end_of_cmd) - { - if (has_command && command == L"for") - { - if (arg_count >= 0 && arg_count < 2) - { - /* - Not enough arguments to the for builtin - */ - err = 1; - - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - BUILTIN_FOR_ERR_COUNT, - L"for", - arg_count); - - print_errors(*out, prefix); - } - } - } - else if (has_command && command == L"else") - { - if (arg_count == 1) - { - /* If we have any arguments, we must have at least two...either "else" or "else if foo..." */ - err = true; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - BUILTIN_ELSEIF_ERR_COUNT, - L"else", - arg_count); - - print_errors(*out, prefix); - - } - } - } - - } - - if (!tok_has_next(&tok)) - break; - - } - - if (needs_cmd) - { - err=1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - COND_ERR_MSG); - - print_errors(*out, prefix); - } - } - - - if (out != NULL && ! block_infos.empty()) - { - const wchar_t *cmd; - int bad_pos = block_infos.back().position; - block_type_t bad_type = block_infos.back().type; - - error(SYNTAX_ERROR, bad_pos, BLOCK_END_ERR_MSG); - - print_errors(*out, prefix); - - cmd = parser_get_block_command(bad_type); - if (cmd) - { - const wcstring h = builtin_help_get(*this, cmd); - if (h.size()) - { - append_format(*out, L"%ls", h.c_str()); - } - } - - - } - - /* - Fill in the unset block_level entries. Until now, only places - where the block level _changed_ have been filled out. This fills - in the rest. - */ - - if (block_level) - { - int last_level = 0; - size_t i, len = wcslen(buff); - for (i=0; i= 0) - { - last_level = block_level[i]; - /* - Make all whitespace before a token have the new - level. This avoid using the wrong indentation level - if a new line starts with whitespace. - */ - size_t prev_char_idx = i; - while (prev_char_idx--) - { - if (!wcschr(L" \n\t\r", buff[prev_char_idx])) - break; - block_level[prev_char_idx] = last_level; - } - } - block_level[i] = last_level; - } - - /* - Make all trailing whitespace have the block level that the - validator had at exit. This makes sure a new line is - correctly indented even if it is empty. - */ - int last_indent = block_infos.empty() ? 0 : block_infos.back().indentation; - size_t suffix_idx = len; - while (suffix_idx--) - { - if (!wcschr(L" \n\t\r", buff[suffix_idx])) - break; - block_level[suffix_idx] = last_indent; - } - } - - /* - Calculate exit status - */ - if (! block_infos.empty()) - unfinished = 1; - - if (err) - res |= PARSER_TEST_ERROR; - - if (unfinished) - res |= PARSER_TEST_INCOMPLETE; - - /* - Cleanup - */ - - error_code=0; - - - return res; - } block_t::block_t(block_type_t t) : @@ -3744,6 +3024,7 @@ block_t::block_t(block_type_t t) : skip(), had_command(), tok_pos(), + node_offset(NODE_OFFSET_INVALID), loop_status(), job(), src_filename(), @@ -3829,3 +3110,15 @@ breakpoint_block_t::breakpoint_block_t() : { } +bool parser_use_ast(void) +{ + env_var_t var = env_get_string(L"fish_new_parser"); + if (var.missing_or_empty()) + { + return 0; + } + else + { + return from_string(var); + } +} diff --git a/parser.h b/parser.h index 9b613d3c8..0c8c7334a 100644 --- a/parser.h +++ b/parser.h @@ -11,11 +11,9 @@ #include "util.h" #include "event.h" #include "function.h" +#include "parse_tree.h" #include -#define PARSER_TEST_ERROR 1 -#define PARSER_TEST_INCOMPLETE 2 - /** event_blockage_t represents a block on events of the specified type */ @@ -97,38 +95,19 @@ public: bool skip; /**< Whether execution of the commands in this block should be skipped */ bool had_command; /**< Set to non-zero once a command has been executed in this block */ int tok_pos; /**< The start index of the block */ + + node_offset_t node_offset; /* Offset of the node */ - /** - Status for the current loop block. Can be any of the values from the loop_status enum. - */ + /** Status for the current loop block. Can be any of the values from the loop_status enum. */ int loop_status; - /** - The job that is currently evaluated in the specified block. - */ + /** The job that is currently evaluated in the specified block. */ job_t *job; -#if 0 - union - { - int while_state; /**< True if the loop condition has not yet been evaluated*/ - wchar_t *for_variable; /**< Name of the variable to loop over */ - int if_state; /**< The state of the if block, can be one of IF_STATE_UNTESTED, IF_STATE_FALSE, IF_STATE_TRUE */ - wchar_t *switch_value; /**< The value to test in a switch block */ - const wchar_t *source_dest; /**< The name of the file to source*/ - event_t *event; /** execution_contexts; /** Description of last error */ wcstring err_buff; @@ -340,6 +327,7 @@ private: /* No copying allowed */ parser_t(const parser_t&); parser_t& operator=(const parser_t&); + void parse_job_argument_list(process_t *p, job_t *j, tokenizer_t *tok, std::vector&, bool); int parse_job(process_t *p, job_t *j, tokenizer_t *tok); @@ -350,7 +338,10 @@ private: void print_errors_stderr(); /** Create a job */ - job_t *job_create(); + job_t *job_create(const io_chain_t &io); + + /** Adds a job to the beginning of the job list. */ + void job_add(job_t *job); public: std::vector profile_items; @@ -389,11 +380,15 @@ public: \return 0 on success, 1 otherwise */ - int eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type_t block_type); - + int eval(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type); + int eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum block_type_t block_type); + + /** Evaluates a block node at the given node offset in the topmost execution context */ + int eval_block_node(node_offset_t node_idx, const io_chain_t &io, enum block_type_t block_type); + /** Evaluate line as a list of parameters, i.e. tokenize it and perform parameter expansion and cmdsubst execution on the tokens. - The output is inserted into output, and should be freed by the caller. + The output is inserted into output. \param line Line to evaluate \param output List to insert output to @@ -402,7 +397,7 @@ public: \param line Line to evaluate \param output List to insert output to */ - int eval_args(const wchar_t *line, std::vector &output); + void eval_args(const wchar_t *line, std::vector &output); /** Sets the current evaluation error. This function should only be used by libraries that are called by @@ -411,7 +406,7 @@ public: \param p The character offset at which the error occured \param str The printf-style error message filter */ - void error(int ec, int p, const wchar_t *str, ...); + void error(int ec, size_t p, const wchar_t *str, ...); /** Returns a string describing the current parser pisition in the format 'FILENAME (line LINE_NUMBER): LINE'. @@ -464,6 +459,9 @@ public: /** Remove the outermost block namespace */ void pop_block(); + + /** Remove the outermost block, asserting it's the given one */ + void pop_block(const block_t *b); /** Return a description of the given blocktype */ const wchar_t *get_block_desc(int block) const; @@ -492,7 +490,7 @@ public: \param out if non-null, any errors in the command will be filled out into this buffer \param prefix the prefix string to prepend to each error message written to the \c out buffer */ - int test(const wchar_t * buff, int *block_level = NULL, wcstring *out = NULL, const wchar_t *prefix = NULL); + void get_backtrace(const wcstring &src, const parse_error_list_t &errors, wcstring *output) const; /** Test if the specified string can be parsed as an argument list, @@ -529,7 +527,7 @@ public: \param s the string to test \param min_match is the minimum number of characters that must match in a long style option, i.e. the longest common prefix between --help and any other option. If less than 3, 3 will be assumed. */ - int is_help(const wchar_t *s, int min_match) const; + static int is_help(const wchar_t *s, int min_match); /** Returns the file currently evaluated by the parser. This can be @@ -541,11 +539,14 @@ public: /** Write a stack trace starting at the specified block to the specified wcstring */ - void stack_trace(size_t block_idx, wcstring &buff); + void stack_trace(size_t block_idx, wcstring &buff) const; int get_block_type(const wchar_t *cmd) const; const wchar_t *get_block_command(int type) const; }; +/* Temporary */ +bool parser_use_ast(void); + #endif diff --git a/proc.cpp b/proc.cpp index a774d3597..4a1bfd5b5 100644 --- a/proc.cpp +++ b/proc.cpp @@ -136,7 +136,9 @@ static bool proc_had_barrier = false; int get_is_interactive(void) { ASSERT_IS_MAIN_THREAD(); - return is_interactive; + /* is_interactive is initialized to -1; ensure someone has popped/pushed it before then */ + assert(is_interactive >= 0); + return is_interactive > 0; } bool get_proc_had_barrier() @@ -515,7 +517,8 @@ static void handle_child_status(pid_t pid, int status) process_t::process_t() : argv_array(), argv0_narrow(), - type(0), + type(), + internal_block_node(NODE_OFFSET_INVALID), actual_cmd(), pid(0), pipe_write_fd(0), @@ -637,6 +640,9 @@ int job_reap(bool interactive) static int locked = 0; locked++; + + /* Preserve the exit status */ + const int saved_status = proc_get_last_status(); /* job_read may fire an event handler, we do not want to call @@ -752,6 +758,9 @@ int job_reap(bool interactive) if (found) fflush(stdout); + /* Restore the exit status. */ + proc_set_last_status(saved_status); + locked = 0; return found; diff --git a/proc.h b/proc.h index b1661b801..510f549a2 100644 --- a/proc.h +++ b/proc.h @@ -20,6 +20,7 @@ #include "util.h" #include "io.h" #include "common.h" +#include "parse_tree.h" /** The status code use when a command was not found @@ -54,7 +55,7 @@ /** Types of processes */ -enum +enum process_type_t { /** A regular external command @@ -72,6 +73,10 @@ enum A block of commands */ INTERNAL_BLOCK, + + /** A block of commands, represented as a node */ + INTERNAL_BLOCK_NODE, + /** The exec builtin */ @@ -81,8 +86,7 @@ enum */ INTERNAL_BUFFER, -} -; +}; enum { @@ -151,8 +155,10 @@ public: INTERNAL_BUILTIN, \c INTERNAL_FUNCTION, \c INTERNAL_BLOCK, INTERNAL_EXEC, or INTERNAL_BUFFER */ - int type; - + enum process_type_t type; + + /* For internal block processes only, the node offset of the block */ + node_offset_t internal_block_node; /** Sets argv */ void set_argv(const wcstring_list_t &argv) @@ -505,18 +511,12 @@ void job_free(job_t* j); */ void job_promote(job_t *job); -/** - Create a new job. -*/ -job_t *job_create(); - /** Return the job with the specified job id. If id is 0 or less, return the last job used. */ job_t *job_get(job_id_t id); - /** Return the job with the specified pid. */ diff --git a/reader.cpp b/reader.cpp index b0cbd8a35..6cf77ae41 100644 --- a/reader.cpp +++ b/reader.cpp @@ -99,6 +99,7 @@ commence. #include "path.h" #include "parse_util.h" #include "parser_keywords.h" +#include "parse_tree.h" /** Maximum length of prefix string when printing completion @@ -518,7 +519,7 @@ wcstring combine_command_and_autosuggestion(const wcstring &cmdline, const wcstr static void reader_repaint() { // Update the indentation - parser_t::principal_parser().test(data->command_line.c_str(), &data->indents[0]); + data->indents = parse_util_compute_indents(data->command_line); // Combine the command and autosuggestion into one string wcstring full_line = combine_command_and_autosuggestion(data->command_line, data->autosuggestion); @@ -659,117 +660,55 @@ bool reader_expand_abbreviation_in_command(const wcstring &cmdline, size_t curso const size_t subcmd_offset = cmdsub_begin - buff; const wcstring subcmd = wcstring(cmdsub_begin, cmdsub_end - cmdsub_begin); - const wchar_t *subcmd_cstr = subcmd.c_str(); - - /* Get the token containing the cursor */ - const wchar_t *subcmd_tok_begin = NULL, *subcmd_tok_end = NULL; - assert(cursor_pos >= subcmd_offset); - size_t subcmd_cursor_pos = cursor_pos - subcmd_offset; - parse_util_token_extent(subcmd_cstr, subcmd_cursor_pos, &subcmd_tok_begin, &subcmd_tok_end, NULL, NULL); - - /* Compute the offset of the token before the cursor within the subcmd */ - assert(subcmd_tok_begin >= subcmd_cstr); - assert(subcmd_tok_end >= subcmd_tok_begin); - const size_t subcmd_tok_begin_offset = subcmd_tok_begin - subcmd_cstr; - const size_t subcmd_tok_length = subcmd_tok_end - subcmd_tok_begin; - - /* Now parse the subcmd, looking for commands */ - bool had_cmd = false, previous_token_is_cmd = false; - tokenizer_t tok(subcmd_cstr, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); - for (; tok_has_next(&tok); tok_next(&tok)) + const size_t subcmd_cursor_pos = cursor_pos - subcmd_offset; + + /* Parse this subcmd */ + parse_node_tree_t parse_tree; + parse_tree_from_string(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); + + /* Look for plain statements where the cursor is at the end of the command */ + const parse_node_t *matching_cmd_node = NULL; + const size_t len = parse_tree.size(); + for (size_t i=0; i < len; i++) { - size_t tok_pos = static_cast(tok_get_pos(&tok)); - if (tok_pos > subcmd_tok_begin_offset) + const parse_node_t &node = parse_tree.at(i); + + /* Only interested in plain statements with source */ + if (node.type != symbol_plain_statement || ! node.has_source()) + continue; + + /* Skip decorated statements */ + if (parse_tree.decoration_for_plain_statement(node) != parse_statement_decoration_none) + continue; + + /* Get the command node. Skip it if we can't or it has no source */ + const parse_node_t *cmd_node = parse_tree.get_child(node, 0, parse_token_type_string); + if (cmd_node == NULL || ! cmd_node->has_source()) + continue; + + /* Now see if its source range contains our cursor, including at the end */ + if (subcmd_cursor_pos >= cmd_node->source_start && subcmd_cursor_pos <= cmd_node->source_start + cmd_node->source_length) { - /* We've passed the token we're interested in */ + /* Success! */ + matching_cmd_node = cmd_node; break; } - - int last_type = tok_last_type(&tok); - - switch (last_type) - { - case TOK_STRING: - { - if (had_cmd) - { - /* Parameter to the command. */ - } - else - { - const wcstring potential_cmd = tok_last(&tok); - if (parser_keywords_is_subcommand(potential_cmd)) - { - if (potential_cmd == L"command" || potential_cmd == L"builtin") - { - /* 'command' and 'builtin' defeat abbreviation expansion. Skip this command. */ - had_cmd = true; - } - else - { - /* Other subcommand. Pretend it doesn't exist so that we can expand the following command */ - had_cmd = false; - } - } - else - { - /* It's a normal command */ - had_cmd = true; - if (tok_pos == subcmd_tok_begin_offset) - { - /* This is the token we care about! */ - previous_token_is_cmd = true; - } - } - } - break; - } - - case TOK_REDIRECT_NOCLOB: - case TOK_REDIRECT_OUT: - case TOK_REDIRECT_IN: - case TOK_REDIRECT_APPEND: - case TOK_REDIRECT_FD: - { - if (!had_cmd) - { - break; - } - tok_next(&tok); - break; - } - - case TOK_PIPE: - case TOK_BACKGROUND: - case TOK_END: - { - had_cmd = false; - break; - } - - case TOK_COMMENT: - case TOK_ERROR: - default: - { - break; - } - } } - + + /* Now if we found a command node, expand it */ bool result = false; - if (previous_token_is_cmd) + if (matching_cmd_node != NULL) { - /* The token is a command. Try expanding it as an abbreviation. */ - const wcstring token = wcstring(subcmd, subcmd_tok_begin_offset, subcmd_tok_length); + assert(matching_cmd_node->type == parse_token_type_string); + const wcstring token = matching_cmd_node->get_source(subcmd); wcstring abbreviation; if (expand_abbreviation(token, &abbreviation)) { /* There was an abbreviation! Replace the token in the full command. Maintain the relative position of the cursor. */ if (output != NULL) { - size_t cmd_tok_begin_offset = subcmd_tok_begin_offset + subcmd_offset; output->assign(cmdline); - output->replace(cmd_tok_begin_offset, subcmd_tok_length, abbreviation); + output->replace(subcmd_offset + matching_cmd_node->source_start, matching_cmd_node->source_length, abbreviation); } result = true; } @@ -1494,7 +1433,7 @@ struct autosuggestion_context_t { const completion_t &comp = completions.at(0); size_t cursor = this->cursor_pos; - this->autosuggestion = completion_apply_to_command_line(comp.completion.c_str(), comp.flags, this->search_string, &cursor, true /* append only */); + this->autosuggestion = completion_apply_to_command_line(comp.completion, comp.flags, this->search_string, &cursor, true /* append only */); return 1; } @@ -2129,11 +2068,9 @@ static void reader_interactive_destroy() void reader_sanity_check() { - if (get_is_interactive()) + /* Note: 'data' is non-null if we are interactive, except in the testing environment */ + if (get_is_interactive() && data != NULL) { - if (!data) - sanity_lose(); - if (!(data->buff_pos <= data->command_length())) sanity_lose(); @@ -2263,7 +2200,6 @@ static void handle_token_history(int forward, int reset) */ if (data->history_search.go_backwards()) { - wcstring item = data->history_search.current_string(); data->token_history_buff = data->history_search.current_string(); } current_pos = data->token_history_buff.size(); @@ -2533,28 +2469,26 @@ void reader_run_command(parser_t &parser, const wcstring &cmd) int reader_shell_test(const wchar_t *b) { - int res = parser_t::principal_parser().test(b); + assert(b != NULL); + wcstring bstr = b; + + /* Append a newline, to act as a statement terminator */ + bstr.push_back(L'\n'); + + parse_error_list_t errors; + int res = parse_util_detect_errors(bstr, &errors); if (res & PARSER_TEST_ERROR) { - wcstring sb; - - const int tmp[1] = {0}; - const int tmp2[1] = {0}; - const wcstring empty; - - s_write(&data->screen, - empty, - empty, - empty, - 0, - tmp, - tmp2, - 0); - - - parser_t::principal_parser().test(b, NULL, &sb, L"fish"); - fwprintf(stderr, L"%ls", sb.c_str()); + wcstring error_desc; + parser_t::principal_parser().get_backtrace(bstr, errors, &error_desc); + + // ensure we end with a newline. Also add an initial newline, because it's likely the user just hit enter and so there's junk on the current line + if (! string_suffixes_string(L"\n", error_desc)) + { + error_desc.push_back(L'\n'); + } + fwprintf(stderr, L"\n%ls", error_desc.c_str()); } return res; } @@ -2804,10 +2738,10 @@ static void reader_super_highlight_me_plenty(size_t match_highlight_pos) } -int exit_status() +bool shell_is_exiting() { if (get_is_interactive()) - return job_list_is_empty() && data->end_loop; + return job_list_is_empty() && data != NULL && data->end_loop; else return end_loop; } @@ -3058,6 +2992,7 @@ const wchar_t *reader_readline(void) is_interactive_read = 1; c=input_readch(); is_interactive_read = was_interactive_read; + //fprintf(stderr, "C: %lx\n", (long)c); if (((!wchar_private(c))) && (c>31) && (c != 127)) { @@ -3231,6 +3166,9 @@ const wchar_t *reader_readline(void) /* Figure out the extent of the token within the command substitution. Note we pass cmdsub_begin here, not buff */ const wchar_t *token_begin, *token_end; parse_util_token_extent(cmdsub_begin, data->buff_pos - (cmdsub_begin-buff), &token_begin, &token_end, 0, 0); + + /* Hack: the token may extend past the end of the command substitution, e.g. in (echo foo) the last token is 'foo)'. Don't let that happen. */ + if (token_end > cmdsub_end) token_end = cmdsub_end; /* Figure out how many steps to get from the current position to the end of the current token. */ size_t end_of_token_offset = token_end - buff; @@ -3386,7 +3324,7 @@ const wchar_t *reader_readline(void) { //history_reset(); data->history_search.go_to_end(); - reader_set_buffer(data->search_buff.c_str(), data->search_buff.size()); + reader_set_buffer(data->search_buff, data->search_buff.size()); } else { @@ -3463,12 +3401,9 @@ const wchar_t *reader_readline(void) case 0: { /* Finished command, execute it. Don't add items that start with a leading space. */ - if (! data->command_line.empty() && data->command_line.at(0) != L' ') + if (data->history != NULL && ! data->command_line.empty() && data->command_line.at(0) != L' ') { - if (data->history != NULL) - { - data->history->add_with_file_detection(data->command_line); - } + data->history->add_with_file_detection(data->command_line); } finished=1; data->buff_pos=data->command_length(); @@ -3958,13 +3893,15 @@ static int read_ni(int fd, const io_chain_t &io) res = 1; } - wcstring sb; - if (! parser.test(str.c_str(), 0, &sb, L"fish")) + parse_error_list_t errors; + if (! parse_util_detect_errors(str, &errors)) { parser.eval(str, io, TOP); } else { + wcstring sb; + parser.get_backtrace(str, errors, &sb); fwprintf(stderr, L"%ls", sb.c_str()); res = 1; } diff --git a/reader.h b/reader.h index b954c1bea..e028e2f03 100644 --- a/reader.h +++ b/reader.h @@ -217,7 +217,7 @@ void reader_set_exit_on_interrupt(bool flag); /** Returns true if the shell is exiting, 0 otherwise. */ -int exit_status(); +bool shell_is_exiting(); /** The readers interrupt signal handler. Cancels all currently running blocks. diff --git a/tests/test1.in b/tests/test1.in index c180159c8..7f60a4dad 100644 --- a/tests/test1.in +++ b/tests/test1.in @@ -15,7 +15,7 @@ echo x-{1} echo x-{1,2} echo foo-{1,2{3,4}} -# Escpaed newlines +# Escaped newlines echo foo\ bar echo foo\ bar @@ -99,6 +99,12 @@ echo Test 5 $sta echo Test redirections begin ; echo output ; echo errput 1>&2 ; end 2>&1 | tee /tmp/tee_test.txt ; cat /tmp/tee_test.txt +# Verify that we can pipe something other than stdout +# The first line should be printed, since we output to stdout but pipe stderr to /dev/null +# The second line should not be printed, since we output to stderr and pipe it to /dev/null +begin ; echo is_stdout ; end 2>| cat > /dev/null +begin ; echo is_stderr 1>&2 ; end 2>| cat > /dev/null + # echo tests echo 'abc\ndef' diff --git a/tests/test1.out b/tests/test1.out index c6ecbb308..b3460cdde 100644 --- a/tests/test1.out +++ b/tests/test1.out @@ -23,6 +23,7 @@ errput output errput output +is_stdout abc\ndef abc def diff --git a/tests/test7.in b/tests/test7.in index 22f5d92c6..a3ae8360c 100644 --- a/tests/test7.in +++ b/tests/test7.in @@ -20,15 +20,6 @@ case one echo $status end -# Test that non-case tokens inside `switch` don't blow away status -# (why are these even allowed?) -false -switch one -true -case one - echo $status -end - #test contains -i echo test contains -i contains -i string a b c string d diff --git a/tests/test7.out b/tests/test7.out index fd3b8a701..bbe2ab1a5 100644 --- a/tests/test7.out +++ b/tests/test7.out @@ -3,7 +3,6 @@ 3 0 -1 1 test contains -i 4 diff --git a/tests/test9.in b/tests/test9.in index a38fbc7c1..e449a21dd 100644 --- a/tests/test9.in +++ b/tests/test9.in @@ -35,3 +35,39 @@ emit test3 foo bar # test empty argument emit + +echo "Test break and continue" +# This should output Ping once +for i in a b c + if not contains $i c ; continue ; end + echo Ping +end + +# This should output Pong not at all +for i in a b c + if not contains $i c ; break ; end + echo Pong +end + +# This should output Foop three times, and Boop not at all +set i a a a +while contains $i a + set -e i[-1] + echo Foop + continue + echo Boop +end + +# This should output Doop once +set i a a a +while contains $i a + set -e i[-1] + echo Doop + break + echo Darp +end + +# Test implicit cd. This should do nothing. +./ + +false diff --git a/tests/test9.out b/tests/test9.out index 8e19365cd..cf9054f8c 100644 --- a/tests/test9.out +++ b/tests/test9.out @@ -2,3 +2,9 @@ Testing that builtins can truncate files abc before:test1 received event test3 with args: foo bar +Test break and continue +Ping +Foop +Foop +Foop +Doop diff --git a/tokenizer.cpp b/tokenizer.cpp index 90c8b703f..4e2b402c6 100644 --- a/tokenizer.cpp +++ b/tokenizer.cpp @@ -14,7 +14,7 @@ segments. #include #include #include - +#include #include "fallback.h" #include "util.h" @@ -50,7 +50,7 @@ segments. /** Error string for when trying to pipe from fd 0 */ -#define PIPE_ERROR _( L"Can not use fd 0 as pipe output" ) +#define PIPE_ERROR _( L"Cannot use stdin (fd 0) as pipe output" ) /** Characters that separate tokens. They are ordered by frequency of occurrence to increase parsing speed. @@ -64,7 +64,6 @@ static const wchar_t *tok_desc[] = { N_(L"Tokenizer not yet initialized"), N_(L"Tokenizer error"), - N_(L"Invalid token"), N_(L"String"), N_(L"Pipe"), N_(L"End of command"), @@ -77,6 +76,8 @@ static const wchar_t *tok_desc[] = N_(L"Comment") }; + + /** Set the latest tokens string to be the specified error message */ @@ -95,16 +96,8 @@ int tok_get_error(tokenizer_t *tok) tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last_type(TOK_NONE), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), last_quote(0), error(0), squash_errors(false), cached_lineno_offset(0), cached_lineno_count(0) { - - /* We can only generate error messages on the main thread due to wgettext() thread safety issues. */ - if (!(flags & TOK_SQUASH_ERRORS)) - { - ASSERT_IS_MAIN_THREAD(); - } - CHECK(b,); - this->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED); this->show_comments = !!(flags & TOK_SHOW_COMMENTS); this->squash_errors = !!(flags & TOK_SQUASH_ERRORS); @@ -435,65 +428,141 @@ static void read_comment(tokenizer_t *tok) tok->last_type = TOK_COMMENT; } -/** - Read a FD redirection. + + +/* Reads a redirection or an "fd pipe" (like 2>|) from a string. Returns how many characters were consumed. If zero, then this string was not a redirection. + + Also returns by reference the redirection mode, and the fd to redirection. If there is overflow, *out_fd is set to -1. */ -static void read_redirect(tokenizer_t *tok, int fd) +static size_t read_redirection_or_fd_pipe(const wchar_t *buff, enum token_type *out_redirection_mode, int *out_fd) { + bool errored = false; + int fd = 0; enum token_type redirection_mode = TOK_NONE; - if ((*tok->buff == L'>') || - (*tok->buff == L'^')) + size_t idx = 0; + + /* Determine the fd. This may be specified as a prefix like '2>...' or it may be implicit like '>' or '^'. Try parsing out a number; if we did not get any digits then infer it from the first character. Watch out for overflow. */ + long long big_fd = 0; + for (; iswdigit(buff[idx]); idx++) { - tok->buff++; - if (*tok->buff == *(tok->buff-1)) + /* Note that it's important we consume all the digits here, even if it overflows. */ + if (big_fd <= INT_MAX) + big_fd = big_fd * 10 + (buff[idx] - L'0'); + } + + fd = (big_fd > INT_MAX ? -1 : static_cast(big_fd)); + + if (idx == 0) + { + /* We did not find a leading digit, so there's no explicit fd. Infer it from the type */ + switch (buff[idx]) { - tok->buff++; - redirection_mode = TOK_REDIRECT_APPEND; - } - else - { - redirection_mode = TOK_REDIRECT_OUT; - } - - if (*tok->buff == L'|') - { - if (fd == 0) - { - TOK_CALL_ERROR(tok, TOK_OTHER, PIPE_ERROR); - return; - } - tok->buff++; - tok->last_token = to_string(fd); - tok->last_type = TOK_PIPE; - return; + case L'>': fd = STDOUT_FILENO; break; + case L'<': fd = STDIN_FILENO; break; + case L'^': fd = STDERR_FILENO; break; + default: errored = true; break; } } - else if (*tok->buff == L'<') + + /* Either way we should have ended on the redirection character itself like '>' */ + wchar_t redirect_char = buff[idx++]; //note increment of idx + if (redirect_char == L'>' || redirect_char == L'^') + { + redirection_mode = TOK_REDIRECT_OUT; + if (buff[idx] == redirect_char) + { + /* Doubled up like ^^ or >>. That means append */ + redirection_mode = TOK_REDIRECT_APPEND; + idx++; + } + } + else if (redirect_char == L'<') { - tok->buff++; redirection_mode = TOK_REDIRECT_IN; } else { - TOK_CALL_ERROR(tok, TOK_OTHER, REDIRECT_ERROR); + /* Something else */ + errored = true; } + + /* Optional characters like & or ?, or the pipe char | */ + wchar_t opt_char = buff[idx]; + if (opt_char == L'&') + { + redirection_mode = TOK_REDIRECT_FD; + idx++; + } + else if (opt_char == L'?') + { + redirection_mode = TOK_REDIRECT_NOCLOB; + idx++; + } + else if (opt_char == L'|') + { + /* So the string looked like '2>|'. This is not a redirection - it's a pipe! That gets handled elsewhere. */ + redirection_mode = TOK_PIPE; + idx++; + } + + /* Don't return valid-looking stuff on error */ + if (errored) + { + idx = 0; + redirection_mode = TOK_NONE; + } + + /* Return stuff */ + if (out_redirection_mode != NULL) + *out_redirection_mode = redirection_mode; + if (out_fd != NULL) + *out_fd = fd; + + return idx; +} - tok->last_token = to_string(fd); +enum token_type redirection_type_for_string(const wcstring &str, int *out_fd) +{ + enum token_type mode = TOK_NONE; + int fd = 0; + read_redirection_or_fd_pipe(str.c_str(), &mode, &fd); + /* Redirections only, no pipes */ + if (mode == TOK_PIPE || fd < 0) + mode = TOK_NONE; + if (out_fd != NULL) + *out_fd = fd; + return mode; +} - if (*tok->buff == L'&') +int fd_redirected_by_pipe(const wcstring &str) +{ + /* Hack for the common case */ + if (str == L"|") { - tok->buff++; - tok->last_type = TOK_REDIRECT_FD; + return STDOUT_FILENO; } - else if (*tok->buff == L'?') + + enum token_type mode = TOK_NONE; + int fd = 0; + read_redirection_or_fd_pipe(str.c_str(), &mode, &fd); + /* Pipes only */ + if (mode != TOK_PIPE || fd < 0) + fd = -1; + return fd; +} + +int oflags_for_redirection_type(enum token_type type) +{ + switch (type) { - tok->buff++; - tok->last_type = TOK_REDIRECT_NOCLOB; - } - else - { - tok->last_type = redirection_mode; + case TOK_REDIRECT_APPEND: return O_CREAT | O_APPEND | O_WRONLY; + case TOK_REDIRECT_OUT: return O_CREAT | O_WRONLY | O_TRUNC; + case TOK_REDIRECT_NOCLOB: return O_CREAT | O_EXCL | O_WRONLY; + case TOK_REDIRECT_IN: return O_RDONLY; + + default: + return -1; } } @@ -516,7 +585,7 @@ static bool my_iswspace(wchar_t c) const wchar_t *tok_get_desc(int type) { - if (type < 0 || (size_t)type >= sizeof(tok_desc)) + if (type < 0 || (size_t)type >= (sizeof tok_desc / sizeof *tok_desc)) { return _(L"Invalid token type"); } @@ -606,36 +675,56 @@ void tok_next(tokenizer_t *tok) break; case L'>': - read_redirect(tok, 1); - return; case L'<': - read_redirect(tok, 0); - return; case L'^': - read_redirect(tok, 2); - return; + { + /* There's some duplication with the code in the default case below. The key difference here is that we must never parse these as a string; a failed redirection is an error! */ + enum token_type mode = TOK_NONE; + int fd = -1; + size_t consumed = read_redirection_or_fd_pipe(tok->buff, &mode, &fd); + if (consumed == 0 || fd < 0) + { + TOK_CALL_ERROR(tok, TOK_OTHER, REDIRECT_ERROR); + } + else + { + tok->buff += consumed; + tok->last_type = mode; + tok->last_token = to_string(fd); + } + } + break; default: { + /* Maybe a redirection like '2>&1', maybe a pipe like 2>|, maybe just a string */ + size_t consumed = 0; + enum token_type mode = TOK_NONE; + int fd = -1; if (iswdigit(*tok->buff)) + consumed = read_redirection_or_fd_pipe(tok->buff, &mode, &fd); + + if (consumed > 0) { - const wchar_t *orig = tok->buff; - int fd = 0; - while (iswdigit(*tok->buff)) - fd = (fd*10) + (*(tok->buff++) - L'0'); - - switch (*(tok->buff)) + /* It looks like a redirection or a pipe. But we don't support piping fd 0. Note that fd 0 may be -1, indicating overflow; but we don't treat that as a tokenizer error. */ + if (mode == TOK_PIPE && fd == 0) { - case L'^': - case L'>': - case L'<': - read_redirect(tok, fd); - return; + TOK_CALL_ERROR(tok, TOK_OTHER, PIPE_ERROR); + } + else + { + tok->buff += consumed; + tok->last_type = mode; + tok->last_token = to_string(fd); } - tok->buff = orig; } - read_string(tok); + else + { + /* Not a redirection or pipe, so just a stirng */ + read_string(tok); + } } + break; } @@ -693,13 +782,19 @@ wcstring tok_first(const wchar_t *str) return result; } -int tok_get_pos(tokenizer_t *tok) +int tok_get_pos(const tokenizer_t *tok) { CHECK(tok, 0); - return (int)tok->last_pos; } +size_t tok_get_extent(const tokenizer_t *tok) +{ + CHECK(tok, 0); + size_t current_pos = tok->buff - tok->orig_buff; + return current_pos > tok->last_pos ? current_pos - tok->last_pos : 0; +} + void tok_set_pos(tokenizer_t *tok, int pos) { diff --git a/tokenizer.h b/tokenizer.h index 027f2d6d7..c50aac99f 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -19,10 +19,9 @@ enum token_type { TOK_NONE, /**< Tokenizer not yet constructed */ TOK_ERROR, /**< Error reading token */ - TOK_INVALID,/**< Invalid token */ TOK_STRING,/**< String token */ TOK_PIPE,/**< Pipe token */ - TOK_END,/**< End token */ + TOK_END,/**< End token (semicolon or newline, not literal end) */ TOK_REDIRECT_OUT, /**< redirection token */ TOK_REDIRECT_APPEND,/**< redirection append token */ TOK_REDIRECT_IN,/**< input redirection token */ @@ -143,7 +142,10 @@ int tok_has_next(tokenizer_t *tok); /** Returns the position of the beginning of the current token in the original string */ -int tok_get_pos(tokenizer_t *tok); +int tok_get_pos(const tokenizer_t *tok); + +/** Returns the extent of the current token */ +size_t tok_get_extent(const tokenizer_t *tok); /** Returns the token type after the current one, without adjusting the position. Optionally returns the next string by reference. */ enum token_type tok_peek_next(tokenizer_t *tok, wcstring *out_next_string); @@ -185,6 +187,15 @@ const wchar_t *tok_get_desc(int type); */ int tok_get_error(tokenizer_t *tok); +/* Helper function to determine redirection type from a string, or TOK_NONE if the redirection is invalid. Also returns the fd by reference. */ +enum token_type redirection_type_for_string(const wcstring &str, int *out_fd = NULL); + +/* Helper function to determine which fd is redirected by a pipe */ +int fd_redirected_by_pipe(const wcstring &str); + +/* Helper function to return oflags (as in open(2)) for a redirection type */ +int oflags_for_redirection_type(enum token_type type); + enum move_word_style_t { move_word_style_punctuation, //stop at punctuation diff --git a/wutil.cpp b/wutil.cpp index 3f70368e8..ffb4f2b22 100644 --- a/wutil.cpp +++ b/wutil.cpp @@ -476,7 +476,7 @@ const wchar_t *wgettext(const wchar_t *in) { cstring mbs_in = wcs2string(key); char *out = fish_gettext(mbs_in.c_str()); - val = new wcstring(format_string(L"%s", out)); + val = new wcstring(format_string(L"%s", out)); //note that this writes into the map! } errno = err; return val->c_str();