simplify and clarify valid identifiers

This is the first step in addressing issue #3965. It renames some of the
functions involved in validating variable and function names to clarify
their purpose. It also augments the documentation to make the rules for
such identifiers clearly documented.
This commit is contained in:
Kurtis Rader 2017-04-19 23:43:02 -07:00
parent 15e1f4349b
commit 275d658616
13 changed files with 72 additions and 90 deletions

View file

@ -667,11 +667,22 @@ Example:
If the current directory contains the files 'foo' and 'bar', the command `echo a(ls){1,2,3} ` will output 'abar1 abar2 abar3 afoo1 afoo2 afoo3'.
\section identifiers Shell variable and function names
The names given to shell objects such as variables and function names are known as "identifiers". Each type of identifier has rules that define the valid sequence of characters which compose the identifier.
A variable name cannot be empty. It can contain only letters, digits, and underscores. It may begin and end with any of those characters.
A function name cannot be empty. It may not begin with a hyphen ("-") and many contain a slash ("/"). All other characters, including a space, are valid.
A bind mode name (e.g., `bind -m abc ...`) is restricted to the rules for valid variable names.
\section variables Shell variables
Shell variables are named pieces of data, which can be created, deleted and their values changed and used by the user. Variables may optionally be "exported", so that a copy of the variable is available to any subprocesses the shell creates. An exported variable is referred to as an "environment variable".
To set a variable value, use the <a href="commands.html#set">`set` command</a>.
To set a variable value, use the <a href="commands.html#set">`set` command</a>. A variable name can not be empty and can contain only letters, digits, and underscores. It may begin and end with any of those characters.
Example:

View file

@ -88,20 +88,6 @@ static void builtin_append_format(wcstring &str, const wchar_t *fmt, ...) {
va_end(ap);
}
bool builtin_is_valid_varname(const wchar_t *varname, wcstring &errstr, const wchar_t *cmd) {
const wchar_t *invalid_char = wcsvarname(varname);
if (!invalid_char) {
return true;
}
if (*invalid_char == L'\0') {
builtin_append_format(errstr, BUILTIN_ERR_VARNAME_ZERO, cmd);
} else {
builtin_append_format(errstr, BUILTIN_ERR_VARCHAR, cmd, *invalid_char);
}
return false;
}
/// Counts the number of arguments in the specified null-terminated array
int builtin_count_args(const wchar_t *const *argv) {
int argc;
@ -1270,7 +1256,7 @@ static int builtin_functions(parser_t &parser, io_streams_t &streams, wchar_t **
return STATUS_BUILTIN_ERROR;
}
if (!wcsfuncname(new_func) || parser_keywords_is_reserved(new_func)) {
if (!valid_func_name(new_func) || parser_keywords_is_reserved(new_func)) {
streams.err.append_format(_(L"%ls: Illegal function name '%ls'\n"), argv[0],
new_func.c_str());
builtin_print_help(parser, streams, argv[0], streams.err);
@ -1597,7 +1583,7 @@ static int validate_function_name(int argc, const wchar_t *const *argv, wcstring
}
function_name = argv[1];
if (!wcsfuncname(function_name)) {
if (!valid_func_name(function_name)) {
append_format(*out_err, _(L"%ls: Illegal function name '%ls'"), cmd, function_name.c_str());
return STATUS_BUILTIN_ERROR;
}
@ -1681,7 +1667,7 @@ int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_lis
break;
}
case 'v': {
if (wcsvarname(w.woptarg)) {
if (!valid_var_name(w.woptarg)) {
append_format(*out_err, _(L"%ls: Invalid variable name '%ls'"), cmd, w.woptarg);
return STATUS_BUILTIN_ERROR;
}
@ -1751,7 +1737,7 @@ int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_lis
break;
}
case 'V': {
if (wcsvarname(w.woptarg)) {
if (!valid_var_name(w.woptarg)) {
append_format(*out_err, _(L"%ls: Invalid variable name '%ls'"), cmd, w.woptarg);
return STATUS_BUILTIN_ERROR;
}
@ -2293,10 +2279,9 @@ static int builtin_read(parser_t &parser, io_streams_t &streams, wchar_t **argv)
}
// Verify all variable names.
wcstring errstr;
for (int i = w.woptind; i < argc; i++) {
if (!builtin_is_valid_varname(argv[i], errstr, argv[0])) {
streams.err.append(errstr);
if (!valid_var_name(argv[i])) {
streams.err.append_format(BUILTIN_ERR_VARNAME, cmd, argv[i]);
builtin_print_help(parser, streams, argv[0], streams.err);
return STATUS_BUILTIN_ERROR;
}

View file

@ -57,13 +57,8 @@ enum { COMMAND_NOT_BUILTIN, BUILTIN_REGULAR, BUILTIN_FUNCTION };
#define BUILTIN_ERR_ARG_COUNT1 _(L"%ls: expected %d args, got %d\n")
#define BUILTIN_ERR_ARG_COUNT2 _(L"%ls: %ls expected %d args, got %d\n")
/// Error message for invalid character in variable name.
#define BUILTIN_ERR_VARCHAR \
_(L"%ls: Invalid character '%lc' in variable name. Only alphanumerical characters and " \
L"underscores are valid in a variable name.\n")
/// Error message for invalid (empty) variable name.
#define BUILTIN_ERR_VARNAME_ZERO _(L"%ls: Variable name can not be the empty string\n")
/// Error message for invalid variable name.
#define BUILTIN_ERR_VARNAME _(L"%ls: Variable name '%ls' is not valid. See `help identifiers`.\n")
/// Error message when too many arguments are supplied to a builtin.
#define BUILTIN_ERR_TOO_MANY_ARGUMENTS _(L"%ls: Too many arguments\n")
@ -110,7 +105,6 @@ int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_lis
void builtin_print_help(parser_t &parser, io_streams_t &streams, const wchar_t *cmd,
output_stream_t &b);
int builtin_count_args(const wchar_t *const *argv);
bool builtin_is_valid_varname(const wchar_t *varname, wcstring &errstr, const wchar_t *cmd);
void builtin_unknown_option(parser_t &parser, io_streams_t &streams, const wchar_t *cmd,
const wchar_t *opt);

View file

@ -315,6 +315,7 @@ static void print_variables(int include_values, int esc, bool shorten_ok, int sc
/// The set builtin creates, updates, and erases (removes, deletes) variables.
int builtin_set(parser_t &parser, io_streams_t &streams, wchar_t **argv) {
wchar_t *cmd = argv[0];
wgetopter_t w;
// Variables used for parsing the argument list.
const struct woption long_options[] = {{L"export", no_argument, 0, 'x'},
@ -400,11 +401,11 @@ int builtin_set(parser_t &parser, io_streams_t &streams, wchar_t **argv) {
break;
}
case 'h': {
builtin_print_help(parser, streams, argv[0], streams.out);
builtin_print_help(parser, streams, cmd, streams.out);
return 0;
}
case '?': {
builtin_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
builtin_unknown_option(parser, streams, cmd, argv[w.woptind - 1]);
return 1;
}
default: { break; }
@ -414,31 +415,31 @@ int builtin_set(parser_t &parser, io_streams_t &streams, wchar_t **argv) {
// Ok, all arguments have been parsed, let's validate them. If we are checking the existance of
// a variable (-q) we can not also specify scope.
if (query && (erase || list)) {
streams.err.append_format(BUILTIN_ERR_COMBO, argv[0]);
streams.err.append_format(BUILTIN_ERR_COMBO, cmd);
builtin_print_help(parser, streams, argv[0], streams.err);
builtin_print_help(parser, streams, cmd, streams.err);
return 1;
}
// We can't both list and erase variables.
if (erase && list) {
streams.err.append_format(BUILTIN_ERR_COMBO, argv[0]);
streams.err.append_format(BUILTIN_ERR_COMBO, cmd);
builtin_print_help(parser, streams, argv[0], streams.err);
builtin_print_help(parser, streams, cmd, streams.err);
return 1;
}
// Variables can only have one scope.
if (local + global + universal > 1) {
streams.err.append_format(BUILTIN_ERR_GLOCAL, argv[0]);
builtin_print_help(parser, streams, argv[0], streams.err);
streams.err.append_format(BUILTIN_ERR_GLOCAL, cmd);
builtin_print_help(parser, streams, cmd, streams.err);
return 1;
}
// Variables can only have one export status.
if (exportv && unexport) {
streams.err.append_format(BUILTIN_ERR_EXPUNEXP, argv[0]);
builtin_print_help(parser, streams, argv[0], streams.err);
builtin_print_help(parser, streams, cmd, streams.err);
return 1;
}
@ -471,7 +472,7 @@ int builtin_set(parser_t &parser, io_streams_t &streams, wchar_t **argv) {
if (!dest_str.missing()) tokenize_variable_array(dest_str, result);
if (!parse_index(indexes, arg, dest, result.size(), streams)) {
builtin_print_help(parser, streams, argv[0], streams.err);
builtin_print_help(parser, streams, cmd, streams.err);
retcode = 1;
break;
}
@ -501,9 +502,9 @@ int builtin_set(parser_t &parser, io_streams_t &streams, wchar_t **argv) {
if (w.woptind == argc) {
// Print values of variables.
if (erase) {
streams.err.append_format(_(L"%ls: Erase needs a variable name\n"), argv[0]);
streams.err.append_format(_(L"%ls: Erase needs a variable name\n"), cmd);
builtin_print_help(parser, streams, argv[0], streams.err);
builtin_print_help(parser, streams, cmd, streams.err);
retcode = 1;
} else {
print_variables(1, 1, shorten_ok, scope, streams);
@ -521,8 +522,8 @@ int builtin_set(parser_t &parser, io_streams_t &streams, wchar_t **argv) {
}
wcstring errstr;
if (!builtin_is_valid_varname(dest, errstr, argv[0])) {
streams.err.append(errstr);
if (!valid_var_name(dest)) {
streams.err.append_format(BUILTIN_ERR_VARNAME, cmd, dest);
builtin_print_help(parser, streams, argv[0], streams.err);
return STATUS_BUILTIN_ERROR;
}

View file

@ -1993,3 +1993,27 @@ void redirect_tty_output() {
show_stackframe(L'E', 99, 1);
abort();
}
/// Test if the given char is valid in a variable name.
bool valid_var_name_char(wchar_t chr) { return fish_iswalnum(chr) || chr == L'_'; }
/// Test if the given string is a valid variable name.
bool valid_var_name(const wchar_t *str) {
if (str[0] == L'\0') return false;
while (*str) {
if (!valid_var_name_char(*str)) return false;
str++;
}
return true;
}
/// Test if the given string is a valid variable name.
bool valid_var_name(const wcstring &str) { return valid_var_name(str.c_str()); }
/// Test if the string is a valid function name.
bool valid_func_name(const wcstring &str) {
if (str.size() == 0) return false;
if (str.at(0) == L'-') return false;
if (str.find_first_of(L'/') != wcstring::npos) return false;
return true;
}

View file

@ -1,7 +1,7 @@
// Prototypes for various functions, mostly string utilities, that are used by most parts of fish.
#ifndef FISH_COMMON_H
#define FISH_COMMON_H
#include "config.h"
#include "config.h" // IWYU pragma: keep
#include <errno.h>
#include <pthread.h>
@ -845,4 +845,9 @@ void redirect_tty_output();
#define DFLT_TERM_ROW_STR L"24"
void invalidate_termsize(bool invalidate_vars = false);
struct winsize get_current_winsize();
bool valid_var_name_char(wchar_t chr);
bool valid_var_name(const wchar_t *str);
bool valid_var_name(const wcstring &str);
bool valid_func_name(const wcstring &str);
#endif

View file

@ -1121,7 +1121,7 @@ bool completer_t::try_complete_variable(const wcstring &str) {
for (size_t in_pos = 0; in_pos < len; in_pos++) {
wchar_t c = str.at(in_pos);
if (!wcsvarchr(c)) {
if (!valid_var_name_char(c)) {
// This character cannot be in a variable, reset the dollar.
variable_start = -1;
}

View file

@ -228,7 +228,7 @@ static bool append_file_entry(fish_message_type_t type, const wcstring &key_in,
result->push_back(' ');
// Append variable name like "fish_color_cwd".
if (wcsvarname(key_in)) {
if (!valid_var_name(key_in)) {
debug(0, L"Illegal variable name: '%ls'", key_in.c_str());
success = false;
}

View file

@ -746,7 +746,7 @@ static int expand_variables(const wcstring &instr, std::vector<completion_t> *ou
stop_pos++;
break;
}
if (!wcsvarchr(nc)) break;
if (!valid_var_name_char(nc)) break;
stop_pos++;
}

View file

@ -393,7 +393,7 @@ static size_t color_variable(const wchar_t *in, size_t in_len,
while (in[idx] == '$') {
// Our color depends on the next char.
wchar_t next = in[idx + 1];
if (next == L'$' || wcsvarchr(next)) {
if (next == L'$' || valid_var_name_char(next)) {
colors[idx] = highlight_spec_operator;
} else {
colors[idx] = highlight_spec_error;
@ -403,7 +403,7 @@ static size_t color_variable(const wchar_t *in, size_t in_len,
}
// Handle a sequence of variable characters.
while (wcsvarchr(in[idx])) {
while (valid_var_name_char(in[idx])) {
colors[idx++] = highlight_spec_operator;
}

View file

@ -844,7 +844,7 @@ void parse_util_expand_variable_error(const wcstring &token, size_t global_token
if (closing_bracket != wcstring::npos) {
size_t var_start = dollar_pos + 2, var_end = closing_bracket;
var_name = wcstring(token, var_start, var_end - var_start);
looks_like_variable = wcsvarname(var_name) == NULL;
looks_like_variable = valid_var_name(var_name);
}
if (looks_like_variable) {
append_syntax_error(
@ -1026,7 +1026,7 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const parse_node_t
wchar_t next_char = idx + 1 < unesc_size ? unesc.at(idx + 1) : L'\0';
if (next_char != VARIABLE_EXPAND && next_char != VARIABLE_EXPAND_SINGLE &&
!wcsvarchr(next_char)) {
!valid_var_name_char(next_char)) {
err = 1;
if (out_errors) {
// We have something like $$$^.... Back up until we reach the first $.

View file

@ -474,40 +474,6 @@ int fish_iswgraph(wint_t wc) {
return iswgraph(wc);
}
/// Test if the given string is a valid variable name.
///
/// \return null if this is a valid name, and a pointer to the first invalid character otherwise.
const wchar_t *wcsvarname(const wchar_t *str) {
if (str[0] == L'\0') return str;
while (*str) {
if ((!fish_iswalnum(*str)) && (*str != L'_')) {
return str;
}
str++;
}
return NULL;
}
/// Test if the given string is a valid variable name.
///
/// \return null if this is a valid name, and a pointer to the first invalid character otherwise.
const wchar_t *wcsvarname(const wcstring &str) { return wcsvarname(str.c_str()); }
/// Test if the string is a valid function name.
///
/// \return true if it is valid else false.
bool wcsfuncname(const wcstring &str) {
if (str.size() == 0) return false;
if (str.at(0) == L'-') return false;
if (str.find_first_of(L'/') != wcstring::npos) return false;
return true;
}
/// Test if the given string is valid in a variable name.
///
/// \return true if this is a valid name, false otherwise.
bool wcsvarchr(wchar_t chr) { return fish_iswalnum(chr) || chr == L'_'; }
/// Convenience variants on fish_wcwswidth().
///
/// See fallback.h for the normal definitions.

View file

@ -110,10 +110,6 @@ int fish_iswalnum(wint_t wc);
int fish_iswalpha(wint_t wc);
int fish_iswgraph(wint_t wc);
const wchar_t *wcsvarname(const wchar_t *str);
const wchar_t *wcsvarname(const wcstring &str);
bool wcsfuncname(const wcstring &str);
bool wcsvarchr(wchar_t chr);
int fish_wcswidth(const wchar_t *str);
int fish_wcswidth(const wcstring &str);