fish-shell/src/highlight.cpp
Kurtis Rader f2246dfb34 reduce number of Unicode private-use characters
This narrows the range of Unicode codepoints fish reserves for its own
use from U+E000 thru U+F8FE (6399 codepoints) to U+F600 thru U+F73F (320
codepoints). This is still not ideal since fish shouldn't be using any
Unicode private-use codepoints but it's a step in the right direction.

This partially addresses issue #2684.
2016-02-28 18:36:34 -08:00

1492 lines
54 KiB
C++

/** \file highlight.c
Functions for syntax highlighting
*/
#include "config.h" // IWYU pragma: keep
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
#include <wchar.h>
#include <algorithm>
#include <dirent.h>
#include <map>
#include <set>
#include <string>
#include "fallback.h"
#include "wutil.h"
#include "highlight.h"
#include "tokenizer.h"
#include "parse_util.h"
#include "parse_constants.h"
#include "builtin.h"
#include "function.h"
#include "env.h"
#include "expand.h"
#include "common.h"
#include "complete.h"
#include "output.h"
#include "wildcard.h"
#include "path.h"
#include "history.h"
#include "parse_tree.h"
#define CURSOR_POSITION_INVALID ((size_t)(-1))
/**
Number of elements in the highlight_var array
*/
#define VAR_COUNT ( sizeof(highlight_var)/sizeof(wchar_t *) )
/** The environment variables used to specify the color of different tokens. This matches the order in highlight_spec_t */
static const wchar_t * const highlight_var[] =
{
L"fish_color_normal",
L"fish_color_error",
L"fish_color_command",
L"fish_color_end",
L"fish_color_param",
L"fish_color_comment",
L"fish_color_match",
L"fish_color_search_match",
L"fish_color_operator",
L"fish_color_escape",
L"fish_color_quote",
L"fish_color_redirection",
L"fish_color_autosuggestion",
L"fish_color_selection",
L"fish_pager_color_prefix",
L"fish_pager_color_completion",
L"fish_pager_color_description",
L"fish_pager_color_progress",
L"fish_pager_color_secondary"
};
/* Determine if the filesystem containing the given fd is case insensitive. */
typedef std::map<wcstring, bool> case_sensitivity_cache_t;
bool fs_is_case_insensitive(const wcstring &path, int fd, case_sensitivity_cache_t &case_sensitivity_cache)
{
/* If _PC_CASE_SENSITIVE is not defined, assume case sensitive */
bool result = false;
#ifdef _PC_CASE_SENSITIVE
/* Try the cache first */
case_sensitivity_cache_t::iterator cache = case_sensitivity_cache.find(path);
if (cache != case_sensitivity_cache.end())
{
/* Use the cached value */
result = cache->second;
}
else
{
/* Ask the system. A -1 value means error (so assume case sensitive), a 1 value means case sensitive, and a 0 value means case insensitive */
long ret = fpathconf(fd, _PC_CASE_SENSITIVE);
result = (ret == 0);
case_sensitivity_cache[path] = result;
}
#endif
return result;
}
/* Tests whether the specified string cpath is the prefix of anything we could cd to. directories is a list of possible parent directories (typically either the working directory, or the cdpath). This does I/O!
Hack: if out_suggested_cdpath is not NULL, it returns the autosuggestion for cd. This descends the deepest unique directory hierarchy.
We expect the path to already be unescaped.
*/
bool is_potential_path(const wcstring &potential_path_fragment, const wcstring_list_t &directories, path_flags_t flags)
{
ASSERT_IS_BACKGROUND_THREAD();
const bool require_dir = !!(flags & PATH_REQUIRE_DIR);
wcstring clean_potential_path_fragment;
int has_magic = 0;
bool result = false;
wcstring path_with_magic(potential_path_fragment);
if (flags & PATH_EXPAND_TILDE)
expand_tilde(path_with_magic);
// debug( 1, L"%ls -> %ls ->%ls", path, tilde, unescaped );
for (size_t i=0; i < path_with_magic.size(); i++)
{
wchar_t c = path_with_magic.at(i);
switch (c)
{
case PROCESS_EXPAND:
case VARIABLE_EXPAND:
case VARIABLE_EXPAND_SINGLE:
case BRACKET_BEGIN:
case BRACKET_END:
case BRACKET_SEP:
case ANY_CHAR:
case ANY_STRING:
case ANY_STRING_RECURSIVE:
{
has_magic = 1;
break;
}
case INTERNAL_SEPARATOR:
{
break;
}
default:
{
clean_potential_path_fragment.push_back(c);
break;
}
}
}
if (! has_magic && ! clean_potential_path_fragment.empty())
{
/* Don't test the same path multiple times, which can happen if the path is absolute and the CDPATH contains multiple entries */
std::set<wcstring> checked_paths;
/* Keep a cache of which paths / filesystems are case sensitive */
case_sensitivity_cache_t case_sensitivity_cache;
for (size_t wd_idx = 0; wd_idx < directories.size() && ! result; wd_idx++)
{
const wcstring &wd = directories.at(wd_idx);
const wcstring abs_path = path_apply_working_directory(clean_potential_path_fragment, wd);
/* Skip this if it's empty or we've already checked it */
if (abs_path.empty() || checked_paths.count(abs_path))
continue;
checked_paths.insert(abs_path);
/* If we end with a slash, then it must be a directory */
bool must_be_full_dir = abs_path.at(abs_path.size()-1) == L'/';
if (must_be_full_dir)
{
struct stat buf;
if (0 == wstat(abs_path, &buf) && S_ISDIR(buf.st_mode))
{
result = true;
}
}
else
{
/* We do not end with a slash; it does not have to be a directory */
DIR *dir = NULL;
const wcstring dir_name = wdirname(abs_path);
const wcstring filename_fragment = wbasename(abs_path);
if (dir_name == L"/" && filename_fragment == L"/")
{
/* cd ///.... No autosuggestion. */
result = true;
}
else if ((dir = wopendir(dir_name)))
{
// Check if we're case insensitive
const bool do_case_insensitive = fs_is_case_insensitive(dir_name, dirfd(dir), case_sensitivity_cache);
wcstring matched_file;
// We opened the dir_name; look for a string where the base name prefixes it
// Don't ask for the is_dir value unless we care, because it can cause extra filesystem access
wcstring ent;
bool is_dir = false;
while (wreaddir_resolving(dir, dir_name, ent, require_dir ? &is_dir : NULL))
{
// Maybe skip directories
if (require_dir && ! is_dir)
{
continue;
}
if (string_prefixes_string(filename_fragment, ent) ||
(do_case_insensitive && string_prefixes_string_case_insensitive(filename_fragment, ent)))
{
// We matched
matched_file = ent;
break;
}
}
closedir(dir);
/* We succeeded if we found a match */
result = ! matched_file.empty();
}
}
}
}
return result;
}
/* Given a string, return whether it prefixes a path that we could cd into. Return that path in out_path. Expects path to be unescaped. */
static bool is_potential_cd_path(const wcstring &path, const wcstring &working_directory, path_flags_t flags)
{
wcstring_list_t directories;
if (string_prefixes_string(L"./", path))
{
/* Ignore the CDPATH in this case; just use the working directory */
directories.push_back(working_directory);
}
else
{
/* Get the CDPATH */
env_var_t cdpath = env_get_string(L"CDPATH");
if (cdpath.missing_or_empty())
cdpath = L".";
/* Tokenize it into directories */
wcstokenizer tokenizer(cdpath, ARRAY_SEP_STR);
wcstring next_path;
while (tokenizer.next(next_path))
{
/* Ensure that we use the working directory for relative cdpaths like "." */
directories.push_back(path_apply_working_directory(next_path, working_directory));
}
}
/* Call is_potential_path with all of these directories */
bool result = is_potential_path(path, directories, flags | PATH_REQUIRE_DIR);
return result;
}
/* Given a plain statement node in a parse tree, get the command and return it, expanded appropriately for commands. If we succeed, return true. */
bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &plain_statement, wcstring *out_cmd)
{
assert(plain_statement.type == symbol_plain_statement);
bool result = false;
/* Get the command */
wcstring cmd;
if (tree.command_for_plain_statement(plain_statement, src, &cmd))
{
/* Try expanding it. If we cannot, it's an error. */
if (expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS))
{
/* Success, return the expanded string by reference */
out_cmd->swap(cmd);
result = true;
}
}
return result;
}
rgb_color_t highlight_get_color(highlight_spec_t highlight, bool is_background)
{
rgb_color_t result = rgb_color_t::normal();
/* If sloppy_background is set, then we look at the foreground color even if is_background is set */
bool treat_as_background = is_background && !(highlight & highlight_modifier_sloppy_background);
/* Get the primary variable */
size_t idx = highlight_get_primary(highlight);
if (idx >= VAR_COUNT)
{
return rgb_color_t::normal();
}
env_var_t val_wstr = env_get_string(highlight_var[idx]);
// debug( 1, L"%d -> %d -> %ls", highlight, idx, val );
if (val_wstr.missing())
val_wstr = env_get_string(highlight_var[0]);
if (! val_wstr.missing())
result = parse_color(val_wstr, treat_as_background);
/* Handle modifiers. */
if (highlight & highlight_modifier_valid_path)
{
env_var_t val2_wstr = env_get_string(L"fish_color_valid_path");
const wcstring val2 = val2_wstr.missing() ? L"" : val2_wstr.c_str();
rgb_color_t result2 = parse_color(val2, is_background);
if (result.is_normal())
result = result2;
else
{
if (result2.is_bold())
result.set_bold(true);
if (result2.is_underline())
result.set_underline(true);
}
}
if (highlight & highlight_modifier_force_underline)
{
result.set_underline(true);
}
return result;
}
static bool has_expand_reserved(const wcstring &str)
{
bool result = false;
for (size_t i=0; i < str.size(); i++)
{
wchar_t wc = str.at(i);
if (wc >= EXPAND_RESERVED_BASE && wc <= EXPAND_RESERVED_END)
{
result = true;
break;
}
}
return result;
}
/* Parse a command line. Return by reference the last command, and the last argument to that command (as a copied node), if any. This is used by autosuggestions */
static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expanded_command, parse_node_t *out_last_arg)
{
bool result = false;
/* Parse the buffer */
parse_node_tree_t parse_tree;
parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
/* Find the last statement */
const parse_node_t *last_statement = parse_tree.find_last_node_of_type(symbol_plain_statement, NULL);
if (last_statement != NULL)
{
if (plain_statement_get_expanded_command(buff, parse_tree, *last_statement, out_expanded_command))
{
/* We got it */
result = true;
/* Find the last argument. If we don't get one, return an invalid node. */
const parse_node_t *last_arg = parse_tree.find_last_node_of_type(symbol_argument, last_statement);
if (last_arg != NULL)
{
*out_last_arg = *last_arg;
}
}
}
return result;
}
bool autosuggest_validate_from_history(const history_item_t &item, file_detection_context_t &detector, const wcstring &working_directory, const env_vars_snapshot_t &vars)
{
ASSERT_IS_BACKGROUND_THREAD();
bool handled = false, suggestionOK = false;
/* Parse the string */
wcstring parsed_command;
parse_node_t last_arg_node(token_type_invalid);
if (! autosuggest_parse_command(item.str(), &parsed_command, &last_arg_node))
return false;
if (parsed_command == L"cd" && last_arg_node.type == symbol_argument && last_arg_node.has_source())
{
/* We can possibly handle this specially */
wcstring dir = last_arg_node.get_source(item.str());
if (expand_one(dir, EXPAND_SKIP_CMDSUBST))
{
handled = true;
bool is_help = string_prefixes_string(dir, L"--help") || string_prefixes_string(dir, L"-h");
if (is_help)
{
suggestionOK = false;
}
else
{
wcstring path;
bool can_cd = path_get_cdpath(dir, &path, working_directory.c_str(), vars);
if (! can_cd)
{
suggestionOK = false;
}
else if (paths_are_same_file(working_directory, path))
{
/* Don't suggest the working directory as the path! */
suggestionOK = false;
}
else
{
suggestionOK = true;
}
}
}
}
/* If not handled specially, handle it here */
if (! handled)
{
bool cmd_ok = false;
if (path_get_path(parsed_command, NULL))
{
cmd_ok = true;
}
else if (builtin_exists(parsed_command) || function_exists_no_autoload(parsed_command, vars))
{
cmd_ok = true;
}
if (cmd_ok)
{
const path_list_t &paths = item.get_required_paths();
if (paths.empty())
{
suggestionOK= true;
}
else
{
suggestionOK = detector.paths_are_valid(paths);
}
}
}
return suggestionOK;
}
/* Highlights the variable starting with 'in', setting colors within the 'colors' array. Returns the number of characters consumed. */
static size_t color_variable(const wchar_t *in, size_t in_len, std::vector<highlight_spec_t>::iterator colors)
{
assert(in_len > 0);
assert(in[0] == L'$');
// Handle an initial run of $s.
size_t idx = 0;
size_t dollar_count = 0;
while (in[idx] == '$')
{
// Our color depends on the next char
wchar_t next = in[idx + 1];
if (next == L'$' || wcsvarchr(next))
{
colors[idx] = highlight_spec_operator;
}
else
{
colors[idx] = highlight_spec_error;
}
idx++;
dollar_count++;
}
// Handle a sequence of variable characters
while (wcsvarchr(in[idx]))
{
colors[idx++] = highlight_spec_operator;
}
// Handle a slice, up to dollar_count of them. Note that we currently don't do any validation of the slice's contents, e.g. $foo[blah] will not show an error even though it's invalid.
for (size_t slice_count=0; slice_count < dollar_count && in[idx] == L'['; slice_count++)
{
wchar_t *slice_begin = NULL, *slice_end = NULL;
int located = parse_util_locate_slice(in + idx, &slice_begin, &slice_end, false);
if (located == 1)
{
size_t slice_begin_idx = slice_begin - in, slice_end_idx = slice_end - in;
assert(slice_end_idx > slice_begin_idx);
colors[slice_begin_idx] = highlight_spec_operator;
colors[slice_end_idx] = highlight_spec_operator;
idx = slice_end_idx + 1;
}
else if (located == 0)
{
// not a slice
break;
}
else
{
assert(located < 0);
// syntax error
// Normally the entire token is colored red for us, but inside a double-quoted string
// that doesn't happen. As such, color the variable + the slice start red. Coloring any
// more than that looks bad, unless we're willing to try and detect where the double-quoted
// string ends, and I'd rather not do that.
std::fill(colors, colors + idx + 1, (highlight_spec_t)highlight_spec_error);
break;
}
}
return idx;
}
/* This function is a disaster badly in need of refactoring. It colors an argument, without regard to command substitutions. */
static void color_argument_internal(const wcstring &buffstr, std::vector<highlight_spec_t>::iterator colors)
{
const size_t buff_len = buffstr.size();
std::fill(colors, colors + buff_len, (highlight_spec_t)highlight_spec_param);
enum {e_unquoted, e_single_quoted, e_double_quoted} mode = e_unquoted;
int bracket_count=0;
for (size_t in_pos=0; in_pos < buff_len; in_pos++)
{
const wchar_t c = buffstr.at(in_pos);
switch (mode)
{
case e_unquoted:
{
if (c == L'\\')
{
int fill_color = highlight_spec_escape; //may be set to highlight_error
const size_t backslash_pos = in_pos;
size_t fill_end = backslash_pos;
// Move to the escaped character
in_pos++;
const wchar_t escaped_char = (in_pos < buff_len ? buffstr.at(in_pos) : L'\0');
if (escaped_char == L'\0')
{
fill_end = in_pos;
fill_color = highlight_spec_error;
}
else if (wcschr(L"~%", escaped_char))
{
if (in_pos == 1)
{
fill_end = in_pos + 1;
}
}
else if (escaped_char == L',')
{
if (bracket_count)
{
fill_end = in_pos + 1;
}
}
else if (wcschr(L"abefnrtv*?$(){}[]'\"<>^ \\#;|&", escaped_char))
{
fill_end = in_pos + 1;
}
else if (wcschr(L"c", escaped_char))
{
// Like \ci. So highlight three characters
fill_end = in_pos + 1;
}
else if (wcschr(L"uUxX01234567", escaped_char))
{
long long res=0;
int chars=2;
int base=16;
wchar_t max_val = ASCII_MAX;
switch (escaped_char)
{
case L'u':
{
chars=4;
max_val = UCS2_MAX;
in_pos++;
break;
}
case L'U':
{
chars=8;
max_val = WCHAR_MAX;
in_pos++;
break;
}
case L'x':
{
in_pos++;
break;
}
case L'X':
{
max_val = BYTE_MAX;
in_pos++;
break;
}
default:
{
// a digit like \12
base=8;
chars=3;
break;
}
}
// Consume
for (int i=0; i < chars && in_pos < buff_len; i++)
{
long d = convert_digit(buffstr.at(in_pos), base);
if (d < 0)
break;
res = (res * base) + d;
in_pos++;
}
//in_pos is now at the first character that could not be converted (or buff_len)
assert(in_pos >= backslash_pos && in_pos <= buff_len);
fill_end = in_pos;
// It's an error if we exceeded the max value
if (res > max_val)
fill_color = highlight_spec_error;
// Subtract one from in_pos, so that the increment in the loop will move to the next character
in_pos--;
}
assert(fill_end >= backslash_pos);
std::fill(colors + backslash_pos, colors + fill_end, fill_color);
}
else
{
// Not a backslash
switch (c)
{
case L'~':
case L'%':
{
if (in_pos == 0)
{
colors[in_pos] = highlight_spec_operator;
}
break;
}
case L'$':
{
assert(in_pos < buff_len);
in_pos += color_variable(buffstr.c_str() + in_pos, buff_len - in_pos, colors + in_pos);
/* subtract one to account for the upcoming loop increment */
in_pos -= 1;
break;
}
case L'*':
case L'?':
case L'(':
case L')':
{
colors[in_pos] = highlight_spec_operator;
break;
}
case L'{':
{
colors[in_pos] = highlight_spec_operator;
bracket_count++;
break;
}
case L'}':
{
colors[in_pos] = highlight_spec_operator;
bracket_count--;
break;
}
case L',':
{
if (bracket_count > 0)
{
colors[in_pos] = highlight_spec_operator;
}
break;
}
case L'\'':
{
colors[in_pos] = highlight_spec_quote;
mode = e_single_quoted;
break;
}
case L'\"':
{
colors[in_pos] = highlight_spec_quote;
mode = e_double_quoted;
break;
}
}
}
break;
}
/*
Mode 1 means single quoted string, i.e 'foo'
*/
case e_single_quoted:
{
colors[in_pos] = highlight_spec_quote;
if (c == L'\\')
{
// backslash
if (in_pos + 1 < buff_len)
{
const wchar_t escaped_char = buffstr.at(in_pos + 1);
if (escaped_char == L'\\' || escaped_char == L'\'')
{
colors[in_pos] = highlight_spec_escape; //backslash
colors[in_pos + 1] = highlight_spec_escape; //escaped char
in_pos += 1; //skip over backslash
}
}
}
else if (c == L'\'')
{
mode = e_unquoted;
}
break;
}
/*
Mode 2 means double quoted string, i.e. "foo"
*/
case e_double_quoted:
{
// slices are colored in advance, past `in_pos`, and we don't want to overwrite that
if (colors[in_pos] == highlight_spec_param)
{
colors[in_pos] = highlight_spec_quote;
}
switch (c)
{
case L'"':
{
mode = e_unquoted;
break;
}
case L'\\':
{
// backslash
if (in_pos + 1 < buff_len)
{
const wchar_t escaped_char = buffstr.at(in_pos + 1);
if (wcschr(L"\\\"\n$", escaped_char))
{
colors[in_pos] = highlight_spec_escape; //backslash
colors[in_pos + 1] = highlight_spec_escape; //escaped char
in_pos += 1; //skip over backslash
}
}
break;
}
case L'$':
{
in_pos += color_variable(buffstr.c_str() + in_pos, buff_len - in_pos, colors + in_pos);
/* subtract one to account for the upcoming increment in the loop */
in_pos -= 1;
break;
}
}
break;
}
}
}
}
/* Syntax highlighter helper */
class highlighter_t
{
/* The string we're highlighting. Note this is a reference memmber variable (to avoid copying)! We must not outlive this! */
const wcstring &buff;
/* Cursor position */
const size_t cursor_pos;
/* Environment variables. Again, a reference member variable! */
const env_vars_snapshot_t &vars;
/* Whether it's OK to do I/O */
const bool io_ok;
/* Working directory */
const wcstring working_directory;
/* The resulting colors */
typedef std::vector<highlight_spec_t> color_array_t;
color_array_t color_array;
/* The parse tree of the buff */
parse_node_tree_t parse_tree;
/* Color an argument */
void color_argument(const parse_node_t &node);
/* Color a redirection */
void color_redirection(const parse_node_t &node);
/* Color the arguments of the given node */
void color_arguments(const parse_node_t &list_node);
/* Color the redirections of the given node */
void color_redirections(const parse_node_t &list_node);
/* Color all the children of the command with the given type */
void color_children(const parse_node_t &parent, parse_token_type_t type, highlight_spec_t color);
/* Colors the source range of a node with a given color */
void color_node(const parse_node_t &node, highlight_spec_t color);
public:
/* Constructor */
highlighter_t(const wcstring &str, size_t pos, const env_vars_snapshot_t &ev, const wcstring &wd, bool can_do_io) : buff(str), cursor_pos(pos), vars(ev), io_ok(can_do_io), working_directory(wd), color_array(str.size())
{
/* Parse the tree */
parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL);
}
/* Perform highlighting, returning an array of colors */
const color_array_t &highlight();
};
void highlighter_t::color_node(const parse_node_t &node, highlight_spec_t color)
{
// Can only color nodes with valid source ranges
if (! node.has_source() || node.source_length == 0)
return;
// Fill the color array with our color in the corresponding range
size_t source_end = node.source_start + node.source_length;
assert(source_end >= node.source_start);
assert(source_end <= color_array.size());
std::fill(this->color_array.begin() + node.source_start, this->color_array.begin() + source_end, color);
}
/* node does not necessarily have type symbol_argument here */
void highlighter_t::color_argument(const parse_node_t &node)
{
if (! node.has_source())
return;
const wcstring arg_str = node.get_source(this->buff);
/* Get an iterator to the colors associated with the argument */
const size_t arg_start = node.source_start;
const color_array_t::iterator arg_colors = color_array.begin() + arg_start;
/* Color this argument without concern for command substitutions */
color_argument_internal(arg_str, arg_colors);
/* Now do command substitutions */
size_t cmdsub_cursor = 0, cmdsub_start = 0, cmdsub_end = 0;
wcstring cmdsub_contents;
while (parse_util_locate_cmdsubst_range(arg_str, &cmdsub_cursor, &cmdsub_contents, &cmdsub_start, &cmdsub_end, true /* accept incomplete */) > 0)
{
/* The cmdsub_start is the open paren. cmdsub_end is either the close paren or the end of the string. cmdsub_contents extends from one past cmdsub_start to cmdsub_end */
assert(cmdsub_end > cmdsub_start);
assert(cmdsub_end - cmdsub_start - 1 == cmdsub_contents.size());
/* Found a command substitution. Compute the position of the start and end of the cmdsub contents, within our overall src. */
const size_t arg_subcmd_start = arg_start + cmdsub_start, arg_subcmd_end = arg_start + cmdsub_end;
/* Highlight the parens. The open paren must exist; the closed paren may not if it was incomplete. */
assert(cmdsub_start < arg_str.size());
this->color_array.at(arg_subcmd_start) = highlight_spec_operator;
if (arg_subcmd_end < this->buff.size())
this->color_array.at(arg_subcmd_end) = highlight_spec_operator;
/* Compute the cursor's position within the cmdsub. We must be past the open paren (hence >) but can be at the end of the string or closed paren (hence <=) */
size_t cursor_subpos = CURSOR_POSITION_INVALID;
if (cursor_pos != CURSOR_POSITION_INVALID && cursor_pos > arg_subcmd_start && cursor_pos <= arg_subcmd_end)
{
/* The -1 because the cmdsub_contents does not include the open paren */
cursor_subpos = cursor_pos - arg_subcmd_start - 1;
}
/* Highlight it recursively. */
highlighter_t cmdsub_highlighter(cmdsub_contents, cursor_subpos, this->vars, this->working_directory, this->io_ok);
const color_array_t &subcolors = cmdsub_highlighter.highlight();
/* Copy out the subcolors back into our array */
assert(subcolors.size() == cmdsub_contents.size());
std::copy(subcolors.begin(), subcolors.end(), this->color_array.begin() + arg_subcmd_start + 1);
}
}
// Indicates whether the source range of the given node forms a valid path in the given working_directory
static bool node_is_potential_path(const wcstring &src, const parse_node_t &node, const wcstring &working_directory)
{
if (! node.has_source())
return false;
/* Get the node source, unescape it, and then pass it to is_potential_path along with the working directory (as a one element list) */
bool result = false;
wcstring token(src, node.source_start, node.source_length);
if (unescape_string_in_place(&token, UNESCAPE_SPECIAL))
{
/* Big hack: is_potential_path expects a tilde, but unescape_string gives us HOME_DIRECTORY. Put it back. */
if (! token.empty() && token.at(0) == HOME_DIRECTORY)
token.at(0) = L'~';
const wcstring_list_t working_directory_list(1, working_directory);
result = is_potential_path(token, working_directory_list, PATH_EXPAND_TILDE);
}
return result;
}
// Color all of the arguments of the given command
void highlighter_t::color_arguments(const parse_node_t &list_node)
{
/* Hack: determine whether the parent is the cd command, so we can show errors for non-directories */
bool cmd_is_cd = false;
if (this->io_ok)
{
const parse_node_t *parent = this->parse_tree.get_parent(list_node, symbol_plain_statement);
if (parent != NULL)
{
wcstring cmd_str;
if (plain_statement_get_expanded_command(this->buff, this->parse_tree, *parent, &cmd_str))
{
cmd_is_cd = (cmd_str == L"cd");
}
}
}
/* Find all the arguments of this list */
const parse_node_tree_t::parse_node_list_t nodes = this->parse_tree.find_nodes(list_node, symbol_argument);
for (size_t i=0; i < nodes.size(); i++)
{
const parse_node_t *child = nodes.at(i);
assert(child != NULL && child->type == symbol_argument);
this->color_argument(*child);
if (cmd_is_cd)
{
/* Mark this as an error if it's not 'help' and not a valid cd path */
wcstring param = child->get_source(this->buff);
if (expand_one(param, EXPAND_SKIP_CMDSUBST))
{
bool is_help = string_prefixes_string(param, L"--help") || string_prefixes_string(param, L"-h");
if (! is_help && this->io_ok && ! is_potential_cd_path(param, working_directory, PATH_EXPAND_TILDE))
{
this->color_node(*child, highlight_spec_error);
}
}
}
}
}
void highlighter_t::color_redirection(const parse_node_t &redirection_node)
{
assert(redirection_node.type == symbol_redirection);
if (! redirection_node.has_source())
return;
const parse_node_t *redirection_primitive = this->parse_tree.get_child(redirection_node, 0, parse_token_type_redirection); //like 2>
const parse_node_t *redirection_target = this->parse_tree.get_child(redirection_node, 1, parse_token_type_string); //like &1 or file path
if (redirection_primitive != NULL)
{
wcstring target;
const enum token_type redirect_type = this->parse_tree.type_for_redirection(redirection_node, this->buff, NULL, &target);
/* We may get a TOK_NONE redirection type, e.g. if the redirection is invalid */
this->color_node(*redirection_primitive, redirect_type == TOK_NONE ? highlight_spec_error : highlight_spec_redirection);
/* Check if the argument contains a command substitution. If so, highlight it as a param even though it's a command redirection, and don't try to do any other validation. */
if (parse_util_locate_cmdsubst(target.c_str(), NULL, NULL, true) != 0)
{
if (redirection_target != NULL)
{
this->color_argument(*redirection_target);
}
}
else
{
/* No command substitution, so we can highlight the target file or fd. For example, disallow redirections into a non-existent directory */
bool target_is_valid = true;
if (! this->io_ok)
{
/* I/O is disallowed, so we don't have much hope of catching anything but gross errors. Assume it's valid. */
target_is_valid = true;
}
else if (! expand_one(target, EXPAND_SKIP_CMDSUBST))
{
/* Could not be expanded */
target_is_valid = false;
}
else
{
/* Ok, we successfully expanded our target. Now verify that it works with this redirection. We will probably need it as a path (but not in the case of fd redirections). Note that the target is now unescaped. */
const wcstring target_path = path_apply_working_directory(target, this->working_directory);
switch (redirect_type)
{
case TOK_REDIRECT_FD:
{
/* target should be an fd. It must be all digits, and must not overflow. fish_wcstoi returns INT_MAX on overflow; we could instead check errno to disambiguiate this from a real INT_MAX fd, but instead we just disallow that. */
const wchar_t *target_cstr = target.c_str();
wchar_t *end = NULL;
int fd = fish_wcstoi(target_cstr, &end, 10);
/* The iswdigit check ensures there's no leading whitespace, the *end check ensures the entire string was consumed, and the numeric checks ensure the fd is at least zero and there was no overflow */
target_is_valid = (iswdigit(target_cstr[0]) && *end == L'\0' && fd >= 0 && fd < INT_MAX);
}
break;
case TOK_REDIRECT_IN:
{
/* Input redirections must have a readable non-directory */
struct stat buf = {};
target_is_valid = ! waccess(target_path, R_OK) && ! wstat(target_path, &buf) && ! S_ISDIR(buf.st_mode);
}
break;
case TOK_REDIRECT_OUT:
case TOK_REDIRECT_APPEND:
case TOK_REDIRECT_NOCLOB:
{
/* Test whether the file exists, and whether it's writable (possibly after creating it). access() returns failure if the file does not exist. */
bool file_exists = false, file_is_writable = false;
int err = 0;
struct stat buf = {};
if (wstat(target_path, &buf) < 0)
{
err = errno;
}
if (string_suffixes_string(L"/", target))
{
/* Redirections to things that are directories is definitely not allowed */
file_exists = false;
file_is_writable = false;
}
else if (err == 0)
{
/* No err. We can write to it if it's not a directory and we have permission */
file_exists = true;
file_is_writable = ! S_ISDIR(buf.st_mode) && ! waccess(target_path, W_OK);
}
else if (err == ENOENT)
{
/* File does not exist. Check if its parent directory is writable. */
wcstring parent = wdirname(target_path);
/* Ensure that the parent ends with the path separator. This will ensure that we get an error if the parent directory is not really a directory. */
if (! string_suffixes_string(L"/", parent))
parent.push_back(L'/');
/* Now the file is considered writable if the parent directory is writable */
file_exists = false;
file_is_writable = (0 == waccess(parent, W_OK));
}
else
{
/* Other errors we treat as not writable. This includes things like ENOTDIR. */
file_exists = false;
file_is_writable = false;
}
/* NOCLOB means that we must not overwrite files that exist */
target_is_valid = file_is_writable && !(file_exists && redirect_type == TOK_REDIRECT_NOCLOB);
}
break;
default:
/* We should not get here, since the node was marked as a redirection, but treat it as an error for paranoia */
target_is_valid = false;
break;
}
}
if (redirection_target != NULL)
{
this->color_node(*redirection_target, target_is_valid ? highlight_spec_redirection : highlight_spec_error);
}
}
}
}
// Color all of the redirections of the given command
void highlighter_t::color_redirections(const parse_node_t &list_node)
{
const parse_node_tree_t::parse_node_list_t nodes = this->parse_tree.find_nodes(list_node, symbol_redirection);
for (size_t i=0; i < nodes.size(); i++)
{
this->color_redirection(*nodes.at(i));
}
}
/* Color all the children of the command with the given type */
void highlighter_t::color_children(const parse_node_t &parent, parse_token_type_t type, highlight_spec_t color)
{
for (node_offset_t idx=0; idx < parent.child_count; idx++)
{
const parse_node_t *child = this->parse_tree.get_child(parent, idx);
if (child != NULL && child->type == type)
{
this->color_node(*child, color);
}
}
}
/* Determine if a command is valid */
static bool command_is_valid(const wcstring &cmd, enum parse_statement_decoration_t decoration, const wcstring &working_directory, const env_vars_snapshot_t &vars)
{
/* Determine which types we check, based on the decoration */
bool builtin_ok = true, function_ok = true, abbreviation_ok = true, command_ok = true, implicit_cd_ok = true;
if (decoration == parse_statement_decoration_command || decoration == parse_statement_decoration_exec)
{
builtin_ok = false;
function_ok = false;
abbreviation_ok = false;
command_ok = true;
implicit_cd_ok = false;
}
else if (decoration == parse_statement_decoration_builtin)
{
builtin_ok = true;
function_ok = false;
abbreviation_ok = false;
command_ok = false;
implicit_cd_ok = false;
}
/* Check them */
bool is_valid = false;
/* Builtins */
if (! is_valid && builtin_ok)
is_valid = builtin_exists(cmd);
/* Functions */
if (! is_valid && function_ok)
is_valid = function_exists_no_autoload(cmd, vars);
/* Abbreviations */
if (! is_valid && abbreviation_ok)
is_valid = expand_abbreviation(cmd, NULL);
/* Regular commands */
if (! is_valid && command_ok)
is_valid = path_get_path(cmd, NULL, vars);
/* Implicit cd */
if (! is_valid && implicit_cd_ok)
is_valid = path_can_be_implicit_cd(cmd, NULL, working_directory.c_str(), vars);
/* Return what we got */
return is_valid;
}
const highlighter_t::color_array_t & highlighter_t::highlight()
{
// If we are doing I/O, we must be in a background thread
if (io_ok)
{
ASSERT_IS_BACKGROUND_THREAD();
}
const size_t length = buff.size();
assert(this->buff.size() == this->color_array.size());
if (length == 0)
return color_array;
/* Start out at zero */
std::fill(this->color_array.begin(), this->color_array.end(), 0);
#if 0
const wcstring dump = parse_dump_tree(parse_tree, buff);
fprintf(stderr, "%ls\n", dump.c_str());
#endif
/* Walk the node tree */
for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter)
{
const parse_node_t &node = *iter;
switch (node.type)
{
// Color direct string descendants, e.g. 'for' and 'in'.
case symbol_while_header:
case symbol_begin_header:
case symbol_function_header:
case symbol_if_clause:
case symbol_else_clause:
case symbol_case_item:
case symbol_boolean_statement:
case symbol_decorated_statement:
case symbol_if_statement:
{
this->color_children(node, parse_token_type_string, highlight_spec_command);
}
break;
case symbol_switch_statement:
{
const parse_node_t *literal_switch = this->parse_tree.get_child(node, 0, parse_token_type_string);
const parse_node_t *switch_arg = this->parse_tree.get_child(node, 1, symbol_argument);
this->color_node(*literal_switch, highlight_spec_command);
this->color_node(*switch_arg, highlight_spec_param);
}
break;
case symbol_for_header:
{
// Color the 'for' and 'in' as commands
const parse_node_t *literal_for_node = this->parse_tree.get_child(node, 0, parse_token_type_string);
const parse_node_t *literal_in_node = this->parse_tree.get_child(node, 2, parse_token_type_string);
this->color_node(*literal_for_node, highlight_spec_command);
this->color_node(*literal_in_node, highlight_spec_command);
// Color the variable name as a parameter
const parse_node_t *var_name_node = this->parse_tree.get_child(node, 1, parse_token_type_string);
this->color_argument(*var_name_node);
}
break;
case parse_token_type_pipe:
case parse_token_type_background:
case parse_token_type_end:
case symbol_optional_background:
{
this->color_node(node, highlight_spec_statement_terminator);
}
break;
case symbol_plain_statement:
{
/* Get the decoration from the parent */
enum parse_statement_decoration_t decoration = parse_tree.decoration_for_plain_statement(node);
/* Color the command */
const parse_node_t *cmd_node = parse_tree.get_child(node, 0, parse_token_type_string);
if (cmd_node != NULL && cmd_node->has_source())
{
bool is_valid_cmd = false;
if (! this->io_ok)
{
/* We cannot check if the command is invalid, so just assume it's valid */
is_valid_cmd = true;
}
else
{
/* Check to see if the command is valid */
wcstring cmd(buff, cmd_node->source_start, cmd_node->source_length);
/* Try expanding it. If we cannot, it's an error. */
bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS);
if (expanded && ! has_expand_reserved(cmd))
{
is_valid_cmd = command_is_valid(cmd, decoration, working_directory, vars);
}
}
this->color_node(*cmd_node, is_valid_cmd ? highlight_spec_command : highlight_spec_error);
}
}
break;
case symbol_arguments_or_redirections_list:
case symbol_argument_list:
{
/* Only work on root lists, so that we don't re-color child lists */
if (parse_tree.argument_list_is_root(node))
{
this->color_arguments(node);
this->color_redirections(node);
}
}
break;
case symbol_end_command:
this->color_node(node, highlight_spec_command);
break;
case parse_special_type_parse_error:
case parse_special_type_tokenizer_error:
this->color_node(node, highlight_spec_error);
break;
case parse_special_type_comment:
this->color_node(node, highlight_spec_comment);
break;
default:
break;
}
}
if (this->io_ok && this->cursor_pos <= this->buff.size())
{
/* If the cursor is over an argument, and that argument is a valid path, underline it */
for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter)
{
const parse_node_t &node = *iter;
/* Must be an argument with source */
if (node.type != symbol_argument || ! node.has_source())
continue;
/* See if this node contains the cursor. We check <= source_length so that, when backspacing (and the cursor is just beyond the last token), we may still underline it */
if (this->cursor_pos >= node.source_start && this->cursor_pos - node.source_start <= node.source_length)
{
/* See if this is a valid path */
if (node_is_potential_path(buff, node, working_directory))
{
/* It is, underline it. */
for (size_t i=node.source_start; i < node.source_start + node.source_length; i++)
{
/* Don't color highlight_spec_error because it looks dorky. For example, trying to cd into a non-directory would show an underline and also red. */
if (highlight_get_primary(this->color_array.at(i)) != highlight_spec_error)
{
this->color_array.at(i) |= highlight_modifier_valid_path;
}
}
}
}
}
}
return color_array;
}
void highlight_shell(const wcstring &buff, std::vector<highlight_spec_t> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars)
{
/* Do something sucky and get the current working directory on this background thread. This should really be passed in. */
const wcstring working_directory = env_get_pwd_slash();
/* Highlight it! */
highlighter_t highlighter(buff, pos, vars, working_directory, true /* can do IO */);
color = highlighter.highlight();
}
void highlight_shell_no_io(const wcstring &buff, std::vector<highlight_spec_t> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars)
{
/* Do something sucky and get the current working directory on this background thread. This should really be passed in. */
const wcstring working_directory = env_get_pwd_slash();
/* Highlight it! */
highlighter_t highlighter(buff, pos, vars, working_directory, false /* no IO allowed */);
color = highlighter.highlight();
}
/**
Perform quote and parenthesis highlighting on the specified string.
*/
static void highlight_universal_internal(const wcstring &buffstr, std::vector<highlight_spec_t> &color, size_t pos)
{
assert(buffstr.size() == color.size());
if (pos < buffstr.size())
{
/*
Highlight matching quotes
*/
if ((buffstr.at(pos) == L'\'') || (buffstr.at(pos) == L'\"'))
{
std::vector<size_t> lst;
int level=0;
wchar_t prev_q=0;
const wchar_t * const buff = buffstr.c_str();
const wchar_t *str = buff;
int match_found=0;
while (*str)
{
switch (*str)
{
case L'\\':
str++;
break;
case L'\"':
case L'\'':
if (level == 0)
{
level++;
lst.push_back(str-buff);
prev_q = *str;
}
else
{
if (prev_q == *str)
{
size_t pos1, pos2;
level--;
pos1 = lst.back();
pos2 = str-buff;
if (pos1==pos || pos2==pos)
{
color.at(pos1)|=highlight_make_background(highlight_spec_match);
color.at(pos2)|=highlight_make_background(highlight_spec_match);
match_found = 1;
}
prev_q = *str==L'\"'?L'\'':L'\"';
}
else
{
level++;
lst.push_back(str-buff);
prev_q = *str;
}
}
break;
}
if ((*str == L'\0'))
break;
str++;
}
if (!match_found)
color.at(pos) = highlight_make_background(highlight_spec_error);
}
/*
Highlight matching parenthesis
*/
const wchar_t c = buffstr.at(pos);
if (wcschr(L"()[]{}", c))
{
int step = wcschr(L"({[", c)?1:-1;
wchar_t dec_char = *(wcschr(L"()[]{}", c) + step);
wchar_t inc_char = c;
int level = 0;
int match_found=0;
for (long i=pos; i >= 0 && (size_t)i < buffstr.size(); i+=step)
{
const wchar_t test_char = buffstr.at(i);
if (test_char == inc_char)
level++;
if (test_char == dec_char)
level--;
if (level == 0)
{
long pos2 = i;
color.at(pos)|=highlight_spec_match<<16;
color.at(pos2)|=highlight_spec_match<<16;
match_found=1;
break;
}
}
if (!match_found)
color[pos] = highlight_make_background(highlight_spec_error);
}
}
}
void highlight_universal(const wcstring &buff, std::vector<highlight_spec_t> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars)
{
assert(buff.size() == color.size());
std::fill(color.begin(), color.end(), 0);
highlight_universal_internal(buff, color, pos);
}