From 3981b644d68f6b6947b4a12810c2fa5e09da4e58 Mon Sep 17 00:00:00 2001 From: Kevin Ballard Date: Wed, 20 Aug 2014 21:19:08 -0700 Subject: [PATCH 1/6] Fix double expansions (`$$foo`) Double expansions of variables had the following issues: * `"$$foo"` threw an error no matter what the value of `$foo` was. * `set -l foo ''; echo $$foo` threw an error because of the expansion of `$foo` to `''`. With this change, double expansion always works properly. When double-expanding a multi-valued variable, in a double-quoted string the first word of the inner expansion is used for the outer expansion, and outside of a quoted string every word is used for the double-expansion in each of the arguments. > set -l foo bar baz > set -l bar one two > set -l baz three four > echo "$$foo" one two baz > echo $$foo one two three four --- expand.cpp | 57 ++++++++++++++++++++++++++++--------- expand.h | 5 ++++ tests/expansion.err | 0 tests/expansion.in | 64 ++++++++++++++++++++++++++++++++++++++++++ tests/expansion.out | 38 +++++++++++++++++++++++++ tests/expansion.status | 1 + tests/top.out | 1 + 7 files changed, 153 insertions(+), 13 deletions(-) create mode 100644 tests/expansion.err create mode 100644 tests/expansion.in create mode 100644 tests/expansion.out create mode 100644 tests/expansion.status diff --git a/expand.cpp b/expand.cpp index d8cf4a1ab..d75c65723 100644 --- a/expand.cpp +++ b/expand.cpp @@ -1086,10 +1086,15 @@ static int expand_variables_internal(parser_t &parser, wchar_t * const in, std:: while (1) { - if (!(in[stop_pos ])) + const wchar_t nc = in[stop_pos]; + if (!(nc)) break; - if (!(iswalnum(in[stop_pos]) || - (wcschr(L"_", in[stop_pos])!= 0))) + if (nc == VARIABLE_EXPAND_EMPTY) + { + stop_pos++; + break; + } + if (!(wcsvarchr(nc))) break; stop_pos++; @@ -1108,7 +1113,15 @@ static int expand_variables_internal(parser_t &parser, wchar_t * const in, std:: } var_tmp.append(in + start_pos, var_len); - env_var_t var_val = expand_var(var_tmp.c_str()); + env_var_t var_val; + if (var_len == 1 && var_tmp[0] == VARIABLE_EXPAND_EMPTY) + { + var_val = env_var_t::missing_var(); + } + else + { + var_val = expand_var(var_tmp.c_str()); + } if (! var_val.missing()) { @@ -1174,7 +1187,18 @@ static int expand_variables_internal(parser_t &parser, wchar_t * const in, std:: { in[i]=0; wcstring res = in; - res.push_back(INTERNAL_SEPARATOR); + if (i > 0) + { + if (in[i-1] != VARIABLE_EXPAND_SINGLE) + { + res.push_back(INTERNAL_SEPARATOR); + } + else if (var_item_list.empty() || var_item_list.front().empty()) + { + // first expansion is empty, but we need to recursively expand + res.push_back(VARIABLE_EXPAND_EMPTY); + } + } for (size_t j=0; j 0) - new_in.append(in, start_pos - 1); - - // at this point new_in.size() is start_pos - 1 - if (start_pos>1 && new_in[start_pos-2]!=VARIABLE_EXPAND) + if (i > 0) { - new_in.push_back(INTERNAL_SEPARATOR); + if (in[i-1] != VARIABLE_EXPAND) + { + new_in.push_back(INTERNAL_SEPARATOR); + } + else if (next.empty()) + { + new_in.push_back(VARIABLE_EXPAND_EMPTY); + } } new_in.append(next); new_in.append(in + stop_pos); @@ -1243,8 +1271,11 @@ static int expand_variables_internal(parser_t &parser, wchar_t * const in, std:: Expansion to single argument. */ wcstring res; - in[i] = 0; - res.append(in); + res.append(in, i); + if (i > 0 && in[i-1] == VARIABLE_EXPAND_SINGLE) + { + res.push_back(VARIABLE_EXPAND_EMPTY); + } res.append(in + stop_pos); is_ok &= expand_variables2(parser, res, out, i, errors); diff --git a/expand.h b/expand.h index 3956b1f16..14a4f4772 100644 --- a/expand.h +++ b/expand.h @@ -102,6 +102,11 @@ enum */ INTERNAL_SEPARATOR, + /** + Character representing an empty variable expansion. + Only used transitively while expanding variables. + */ + VARIABLE_EXPAND_EMPTY, } ; diff --git a/tests/expansion.err b/tests/expansion.err new file mode 100644 index 000000000..e69de29bb diff --git a/tests/expansion.in b/tests/expansion.in new file mode 100644 index 000000000..d362e7895 --- /dev/null +++ b/tests/expansion.in @@ -0,0 +1,64 @@ +# Test expansion of variables + +function show --description 'Prints argument count followed by arguments' + echo (count $argv) $argv +end + +set -l foo +show "$foo" +show $foo +show "prefix$foo" +show prefix$foo + +show "$$foo" +show $$foo +show "prefix$$foo" +show prefix$$foo + +set -l foo '' +show "$foo" +show $foo +show "prefix$foo" +show prefix$foo + +show "$$foo" +show $$foo +show "prefix$$foo" +show prefix$$foo + +set -l foo bar +set -l bar +show "$$foo" +show $$foo +show "prefix$$foo" +show prefix$$foo + +set -l bar baz +show "$$foo" +show $$foo +show "prefix$$foo" +show prefix$$foo + +set -l bar baz quux +show "$$foo" +show $$foo +show "prefix$$foo" +show prefix$$foo + +set -l foo bar fooer fooest +set -l fooer +set -l fooest +show "$$foo" +show $$foo +show "prefix$$foo" +show prefix$$foo + +set -l fooer '' +show $$foo +show prefix$$foo + +set -l foo bar '' fooest +show "$$foo" +show $$foo +show "prefix$$foo" +show prefix$$foo diff --git a/tests/expansion.out b/tests/expansion.out new file mode 100644 index 000000000..3e7631804 --- /dev/null +++ b/tests/expansion.out @@ -0,0 +1,38 @@ +1 +0 +1 prefix +0 +1 +0 +1 prefix +0 +1 +1 +1 prefix +1 prefix +1 +0 +1 prefix +0 +1 +0 +1 prefix +0 +1 baz +1 baz +1 prefixbaz +1 prefixbaz +1 baz quux +2 baz quux +1 prefixbaz quux +2 prefixbaz prefixquux +1 baz quux fooer fooest +2 baz quux +1 prefixbaz quux fooer fooest +2 prefixbaz prefixquux +3 baz quux +3 prefixbaz prefixquux prefix +1 baz quux fooest +2 baz quux +1 prefixbaz quux fooest +2 prefixbaz prefixquux diff --git a/tests/expansion.status b/tests/expansion.status new file mode 100644 index 000000000..573541ac9 --- /dev/null +++ b/tests/expansion.status @@ -0,0 +1 @@ +0 diff --git a/tests/top.out b/tests/top.out index 768526c66..acf52d424 100644 --- a/tests/top.out +++ b/tests/top.out @@ -1,4 +1,5 @@ Testing high level script functionality +File expansion.in tested ok File printf.in tested ok File test1.in tested ok File test2.in tested ok From cc49042294aceb3ab30396eb5731e4ac0cf601bc Mon Sep 17 00:00:00 2001 From: Kevin Ballard Date: Wed, 20 Aug 2014 22:01:24 -0700 Subject: [PATCH 2/6] Parse slices even for empty variables When a variable is parsed as being empty, parse out the slice and validate the indexes anyway, behaving for slicing purposes as if the variable had a single empty value. Besides providing errors when expected, this also fixes the following: set -l foo echo "$foo[1]" This used to print "[1]", now it properly prints nothing. --- expand.cpp | 35 ++++++++++++++++++++++++++++++++++- tests/expansion.err | 18 ++++++++++++++++++ tests/expansion.in | 12 ++++++++++++ tests/expansion.out | 4 ++++ 4 files changed, 68 insertions(+), 1 deletion(-) diff --git a/expand.cpp b/expand.cpp index d75c65723..7388cb18b 100644 --- a/expand.cpp +++ b/expand.cpp @@ -1155,10 +1155,10 @@ static int expand_variables_internal(parser_t &parser, wchar_t * const in, std:: for (size_t j=0; j var_item_list.size()) { + size_t var_src_pos = var_pos_list.at(j); /* The slice was parsed starting at stop_pos, so we have to add that to the error position */ append_syntax_error(errors, slice_start + var_src_pos, @@ -1255,6 +1255,39 @@ static int expand_variables_internal(parser_t &parser, wchar_t * const in, std:: } else { + // even with no value, we still need to parse out slice syntax + // Behave as though we had 1 value, so $foo[1] always works. + const size_t slice_start = stop_pos; + if (in[slice_start] == L'[') + { + wchar_t *slice_end; + + if (parse_slice(in + slice_start, &slice_end, var_idx_list, var_pos_list, 1)) + { + append_syntax_error(errors, + stop_pos, + L"Invalid index value"); + is_ok = 0; + return is_ok; + } + stop_pos = (slice_end-in); + + // validate that the parsed indexes are valid + for (size_t j=0; j Date: Wed, 20 Aug 2014 22:28:42 -0700 Subject: [PATCH 3/6] Highlight "$foo[1]" properly Preserve the highlighting applied to the slice brackets when coloring variables inside of double-quoted strings. --- highlight.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/highlight.cpp b/highlight.cpp index 8145e5f30..f560264c2 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -861,7 +861,11 @@ static void color_argument_internal(const wcstring &buffstr, std::vector Date: Wed, 20 Aug 2014 22:31:58 -0700 Subject: [PATCH 4/6] Fix highlighting of `"foo\"bar"` The backslash-escape wasn't being properly caught by the highlighter. Also remove the highlighting of `"\'"`, as `\'` is not a valid escape in double-quotes, and add highlighting for a backslash-escaped newline. --- highlight.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/highlight.cpp b/highlight.cpp index f560264c2..9d43f8e08 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -880,7 +880,7 @@ static void color_argument_internal(const wcstring &buffstr, std::vector Date: Wed, 20 Aug 2014 22:55:24 -0700 Subject: [PATCH 5/6] Color `"$foo[1"` as an error We can't color the whole argument as an error, since the tokenizer is responsible for that and doesn't care abou this case, but we can color the `$foo[` bit as an error. --- highlight.cpp | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/highlight.cpp b/highlight.cpp index 9d43f8e08..5369f3a5a 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -617,12 +617,26 @@ static size_t color_variable(const wchar_t *in, size_t in_len, std::vector slice_begin_idx); - colors[slice_begin_idx] = highlight_spec_operator; - colors[slice_end_idx] = highlight_spec_operator; + case 1: + { + size_t slice_begin_idx = slice_begin - in, slice_end_idx = slice_end - in; + assert(slice_end_idx > slice_begin_idx); + colors[slice_begin_idx] = highlight_spec_operator; + colors[slice_end_idx] = highlight_spec_operator; + break; + } + case -1: + { + // syntax error + // Normally the entire token is colored red for us, but inside a double-quoted string + // that doesn't happen. As such, color the variable + the slice start red. Coloring any + // more than that looks bad, unless we're willing to try and detect where the double-quoted + // string ends, and I'd rather not do that. + std::fill(colors, colors + idx + 1, (highlight_spec_t)highlight_spec_error); + break; + } } } return idx; From 2974025010cac7b7238c88c88216c1722e61f5c9 Mon Sep 17 00:00:00 2001 From: Kevin Ballard Date: Thu, 21 Aug 2014 00:26:14 -0700 Subject: [PATCH 6/6] Fix error span for invalid slice indexes The span now properly points at the token that was invalid, rather than the start of the slice. Also fix the span for `()[1]` and `()[d]`, which were previously reporting no source location at all. --- expand.cpp | 31 +++++++++++++++++++++---------- tests/expansion.err | 12 ++++++++++++ tests/expansion.in | 6 ++++++ 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/expand.cpp b/expand.cpp index 7388cb18b..5f96fb2f9 100644 --- a/expand.cpp +++ b/expand.cpp @@ -953,8 +953,11 @@ void expand_variable_error(parser_t &parser, const wcstring &token, size_t token /** Parse an array slicing specification + Returns 0 on success. + If a parse error occurs, returns the index of the bad token. + Note that 0 can never be a bad index because the string always starts with [. */ -static int parse_slice(const wchar_t *in, wchar_t **end_ptr, std::vector &idx, std::vector &source_positions, size_t array_size) +static size_t parse_slice(const wchar_t *in, wchar_t **end_ptr, std::vector &idx, std::vector &source_positions, size_t array_size) { wchar_t *end; @@ -981,7 +984,7 @@ static int parse_slice(const wchar_t *in, wchar_t **end_ptr, std::vector & tmp = wcstol(&in[pos], &end, 10); if ((errno) || (end == &in[pos])) { - return 1; + return pos; } // debug( 0, L"Push idx %d", tmp ); @@ -999,7 +1002,7 @@ static int parse_slice(const wchar_t *in, wchar_t **end_ptr, std::vector & long tmp1 = wcstol(&in[pos], &end, 10); if ((errno) || (end == &in[pos])) { - return 1; + return pos; } pos = end-in; @@ -1136,12 +1139,14 @@ static int expand_variables_internal(parser_t &parser, wchar_t * const in, std:: if (in[slice_start] == L'[') { wchar_t *slice_end; + size_t bad_pos; all_vars=0; - if (parse_slice(in + slice_start, &slice_end, var_idx_list, var_pos_list, var_item_list.size())) + bad_pos = parse_slice(in + slice_start, &slice_end, var_idx_list, var_pos_list, var_item_list.size()); + if (bad_pos != 0) { append_syntax_error(errors, - stop_pos, + stop_pos + bad_pos, L"Invalid index value"); is_ok = 0; break; @@ -1261,11 +1266,13 @@ static int expand_variables_internal(parser_t &parser, wchar_t * const in, std:: if (in[slice_start] == L'[') { wchar_t *slice_end; + size_t bad_pos; - if (parse_slice(in + slice_start, &slice_end, var_idx_list, var_pos_list, 1)) + bad_pos = parse_slice(in + slice_start, &slice_end, var_idx_list, var_pos_list, 1); + if (bad_pos != 0) { append_syntax_error(errors, - stop_pos, + stop_pos + bad_pos, L"Invalid index value"); is_ok = 0; return is_ok; @@ -1499,11 +1506,14 @@ static int expand_cmdsubst(parser_t &parser, const wcstring &input, std::vector< { std::vector slice_idx; std::vector slice_source_positions; + const wchar_t * const slice_begin = tail_begin; wchar_t *slice_end; + size_t bad_pos; - if (parse_slice(tail_begin, &slice_end, slice_idx, slice_source_positions, sub_res.size())) + bad_pos = parse_slice(slice_begin, &slice_end, slice_idx, slice_source_positions, sub_res.size()); + if (bad_pos != 0) { - append_syntax_error(errors, SOURCE_LOCATION_UNKNOWN, L"Invalid index value"); + append_syntax_error(errors, slice_begin - in + bad_pos, L"Invalid index value"); return 0; } else @@ -1515,8 +1525,9 @@ static int expand_cmdsubst(parser_t &parser, const wcstring &input, std::vector< long idx = slice_idx.at(i); if (idx < 1 || (size_t)idx > sub_res.size()) { + size_t pos = slice_source_positions.at(i); append_syntax_error(errors, - SOURCE_LOCATION_UNKNOWN, + slice_begin - in + pos, ARRAY_BOUNDS_ERR); return 0; } diff --git a/tests/expansion.err b/tests/expansion.err index 6346f23fb..e60c203d5 100644 --- a/tests/expansion.err +++ b/tests/expansion.err @@ -16,3 +16,15 @@ fish: show "$foo[2 1]" Array index out of bounds fish: show $foo[2 1] ^ +Invalid index value +fish: echo "$foo[d]" + ^ +Invalid index value +fish: echo $foo[d] + ^ +Array index out of bounds +fish: echo ()[1] + ^ +Invalid index value +fish: echo ()[d] + ^ diff --git a/tests/expansion.in b/tests/expansion.in index 8b48772ef..a22251272 100644 --- a/tests/expansion.in +++ b/tests/expansion.in @@ -74,3 +74,9 @@ show "$foo[1 2]" show $foo[1 2] show "$foo[2 1]" show $foo[2 1] + +echo "$foo[d]" +echo $foo[d] + +echo ()[1] +echo ()[d]