// Functions used for implementing the test builtin. // // Implemented from scratch (yes, really) by way of IEEE 1003.1 as reference. #include "config.h" // IWYU pragma: keep #include #include #include #include #include #include #include #include #include #include "builtin.h" #include "common.h" #include "io.h" #include "wutil.h" // IWYU pragma: keep using std::unique_ptr; using std::move; int builtin_test(parser_t &parser, io_streams_t &streams, wchar_t **argv); namespace test_expressions { enum token_t { test_unknown, // arbitrary string test_bang, // "!", inverts sense test_filetype_b, // "-b", for block special files test_filetype_c, // "-c", for character special files test_filetype_d, // "-d", for directories test_filetype_e, // "-e", for files that exist test_filetype_f, // "-f", for for regular files test_filetype_G, // "-G", for check effective group id test_filetype_g, // "-g", for set-group-id test_filetype_h, // "-h", for symbolic links test_filetype_k, // "-k", for sticky bit test_filetype_L, // "-L", same as -h test_filetype_O, // "-O", for check effective user id test_filetype_p, // "-p", for FIFO test_filetype_S, // "-S", socket test_filesize_s, // "-s", size greater than zero test_filedesc_t, // "-t", whether the fd is associated with a terminal test_fileperm_r, // "-r", read permission test_fileperm_u, // "-u", whether file is setuid test_fileperm_w, // "-w", whether file write permission is allowed test_fileperm_x, // "-x", whether file execute/search is allowed test_string_n, // "-n", non-empty string test_string_z, // "-z", true if length of string is 0 test_string_equal, // "=", true if strings are identical test_string_not_equal, // "!=", true if strings are not identical test_number_equal, // "-eq", true if numbers are equal test_number_not_equal, // "-ne", true if numbers are not equal test_number_greater, // "-gt", true if first number is larger than second test_number_greater_equal, // "-ge", true if first number is at least second test_number_lesser, // "-lt", true if first number is smaller than second test_number_lesser_equal, // "-le", true if first number is at most second test_combine_and, // "-a", true if left and right are both true test_combine_or, // "-o", true if either left or right is true test_paren_open, // "(", open paren test_paren_close, // ")", close paren }; static bool binary_primary_evaluate(test_expressions::token_t token, const wcstring &left, const wcstring &right, wcstring_list_t &errors); static bool unary_primary_evaluate(test_expressions::token_t token, const wcstring &arg, wcstring_list_t &errors); enum { UNARY_PRIMARY = 1 << 0, BINARY_PRIMARY = 1 << 1 }; static const struct token_info_t { token_t tok; const wchar_t *string; unsigned int flags; } token_infos[] = {{test_unknown, L"", 0}, {test_bang, L"!", 0}, {test_filetype_b, L"-b", UNARY_PRIMARY}, {test_filetype_c, L"-c", UNARY_PRIMARY}, {test_filetype_d, L"-d", UNARY_PRIMARY}, {test_filetype_e, L"-e", UNARY_PRIMARY}, {test_filetype_f, L"-f", UNARY_PRIMARY}, {test_filetype_G, L"-G", UNARY_PRIMARY}, {test_filetype_g, L"-g", UNARY_PRIMARY}, {test_filetype_h, L"-h", UNARY_PRIMARY}, {test_filetype_k, L"-k", UNARY_PRIMARY}, {test_filetype_L, L"-L", UNARY_PRIMARY}, {test_filetype_O, L"-O", UNARY_PRIMARY}, {test_filetype_p, L"-p", UNARY_PRIMARY}, {test_filetype_S, L"-S", UNARY_PRIMARY}, {test_filesize_s, L"-s", UNARY_PRIMARY}, {test_filedesc_t, L"-t", UNARY_PRIMARY}, {test_fileperm_r, L"-r", UNARY_PRIMARY}, {test_fileperm_u, L"-u", UNARY_PRIMARY}, {test_fileperm_w, L"-w", UNARY_PRIMARY}, {test_fileperm_x, L"-x", UNARY_PRIMARY}, {test_string_n, L"-n", UNARY_PRIMARY}, {test_string_z, L"-z", UNARY_PRIMARY}, {test_string_equal, L"=", BINARY_PRIMARY}, {test_string_not_equal, L"!=", BINARY_PRIMARY}, {test_number_equal, L"-eq", BINARY_PRIMARY}, {test_number_not_equal, L"-ne", BINARY_PRIMARY}, {test_number_greater, L"-gt", BINARY_PRIMARY}, {test_number_greater_equal, L"-ge", BINARY_PRIMARY}, {test_number_lesser, L"-lt", BINARY_PRIMARY}, {test_number_lesser_equal, L"-le", BINARY_PRIMARY}, {test_combine_and, L"-a", 0}, {test_combine_or, L"-o", 0}, {test_paren_open, L"(", 0}, {test_paren_close, L")", 0}}; const token_info_t *token_for_string(const wcstring &str) { for (size_t i = 0; i < sizeof token_infos / sizeof *token_infos; i++) { if (str == token_infos[i].string) { return &token_infos[i]; } } return &token_infos[0]; // unknown } // Grammar. // // = // // = and/or | // // // = bang | // // // = arg | // arg arg | // '(' ')' class expression; class test_parser { private: wcstring_list_t strings; wcstring_list_t errors; unique_ptr error(const wchar_t *fmt, ...); void add_error(const wchar_t *fmt, ...); const wcstring &arg(unsigned int idx) { return strings.at(idx); } public: explicit test_parser(const wcstring_list_t &val) : strings(val) {} unique_ptr parse_expression(unsigned int start, unsigned int end); unique_ptr parse_3_arg_expression(unsigned int start, unsigned int end); unique_ptr parse_4_arg_expression(unsigned int start, unsigned int end); unique_ptr parse_combining_expression(unsigned int start, unsigned int end); unique_ptr parse_unary_expression(unsigned int start, unsigned int end); unique_ptr parse_primary(unsigned int start, unsigned int end); unique_ptr parse_parenthentical(unsigned int start, unsigned int end); unique_ptr parse_unary_primary(unsigned int start, unsigned int end); unique_ptr parse_binary_primary(unsigned int start, unsigned int end); unique_ptr parse_just_a_string(unsigned int start, unsigned int end); static unique_ptr parse_args(const wcstring_list_t &args, wcstring &err, wchar_t *program_name); }; struct range_t { unsigned int start; unsigned int end; range_t(unsigned s, unsigned e) : start(s), end(e) {} }; /// Base class for expressions. class expression { protected: expression(token_t what, range_t where) : token(what), range(where) {} public: const token_t token; range_t range; virtual ~expression() {} /// Evaluate returns true if the expression is true (i.e. STATUS_CMD_OK). virtual bool evaluate(wcstring_list_t &errors) = 0; }; /// Single argument like -n foo or "just a string". class unary_primary : public expression { public: wcstring arg; unary_primary(token_t tok, range_t where, const wcstring &what) : expression(tok, where), arg(what) {} bool evaluate(wcstring_list_t &errors); }; /// Two argument primary like foo != bar. class binary_primary : public expression { public: wcstring arg_left; wcstring arg_right; binary_primary(token_t tok, range_t where, const wcstring &left, const wcstring &right) : expression(tok, where), arg_left(left), arg_right(right) {} bool evaluate(wcstring_list_t &errors); }; /// Unary operator like bang. class unary_operator : public expression { public: unique_ptr subject; unary_operator(token_t tok, range_t where, unique_ptr exp) : expression(tok, where), subject(move(exp)) {} bool evaluate(wcstring_list_t &errors); }; /// Combining expression. Contains a list of AND or OR expressions. It takes more than two so that /// we don't have to worry about precedence in the parser. class combining_expression : public expression { public: const std::vector> subjects; const std::vector combiners; combining_expression(token_t tok, range_t where, std::vector> exprs, const std::vector &combs) : expression(tok, where), subjects(std::move(exprs)), combiners(std::move(combs)) { // We should have one more subject than combiner. assert(subjects.size() == combiners.size() + 1); } virtual ~combining_expression() {} bool evaluate(wcstring_list_t &errors); }; /// Parenthetical expression. class parenthetical_expression : public expression { public: unique_ptr contents; parenthetical_expression(token_t tok, range_t where, unique_ptr expr) : expression(tok, where), contents(move(expr)) {} virtual bool evaluate(wcstring_list_t &errors); }; void test_parser::add_error(const wchar_t *fmt, ...) { assert(fmt != NULL); va_list va; va_start(va, fmt); this->errors.push_back(vformat_string(fmt, va)); va_end(va); } unique_ptr test_parser::error(const wchar_t *fmt, ...) { assert(fmt != NULL); va_list va; va_start(va, fmt); this->errors.push_back(vformat_string(fmt, va)); va_end(va); return NULL; } unique_ptr test_parser::parse_unary_expression(unsigned int start, unsigned int end) { if (start >= end) { return error(L"Missing argument at index %u", start); } token_t tok = token_for_string(arg(start))->tok; if (tok == test_bang) { unique_ptr subject(parse_unary_expression(start + 1, end)); if (subject.get()) { return make_unique(tok, range_t(start, subject->range.end), move(subject)); } return NULL; } return parse_primary(start, end); } /// Parse a combining expression (AND, OR). unique_ptr test_parser::parse_combining_expression(unsigned int start, unsigned int end) { if (start >= end) return NULL; std::vector> subjects; std::vector combiners; unsigned int idx = start; bool first = true; while (idx < end) { if (!first) { // This is not the first expression, so we expect a combiner. token_t combiner = token_for_string(arg(idx))->tok; if (combiner != test_combine_and && combiner != test_combine_or) { /* Not a combiner, we're done */ this->errors.insert( this->errors.begin(), format_string(L"Expected a combining operator like '-a' at index %u", idx)); break; } combiners.push_back(combiner); idx++; } // Parse another expression. unique_ptr expr = parse_unary_expression(idx, end); if (!expr) { add_error(L"Missing argument at index %u", idx); if (!first) { // Clean up the dangling combiner, since it never got its right hand expression. combiners.pop_back(); } break; } // Go to the end of this expression. idx = expr->range.end; subjects.push_back(move(expr)); first = false; } if (subjects.empty()) { return NULL; // no subjects } // Our new expression takes ownership of all expressions we created. The token we pass is // irrelevant. return make_unique(test_combine_and, range_t(start, idx), move(subjects), move(combiners)); } unique_ptr test_parser::parse_unary_primary(unsigned int start, unsigned int end) { // We need two arguments. if (start >= end) { return error(L"Missing argument at index %u", start); } if (start + 1 >= end) { return error(L"Missing argument at index %u", start + 1); } // All our unary primaries are prefix, so the operator is at start. const token_info_t *info = token_for_string(arg(start)); if (!(info->flags & UNARY_PRIMARY)) return NULL; return make_unique(info->tok, range_t(start, start + 2), arg(start + 1)); } unique_ptr test_parser::parse_just_a_string(unsigned int start, unsigned int end) { // Handle a string as a unary primary that is not a token of any other type. e.g. 'test foo -a // bar' should evaluate to true We handle this with a unary primary of test_string_n. // We need one argument. if (start >= end) { return error(L"Missing argument at index %u", start); } const token_info_t *info = token_for_string(arg(start)); if (info->tok != test_unknown) { return error(L"Unexpected argument type at index %u", start); } // This is hackish; a nicer way to implement this would be with a "just a string" expression // type. return make_unique(test_string_n, range_t(start, start + 1), arg(start)); } unique_ptr test_parser::parse_binary_primary(unsigned int start, unsigned int end) { // We need three arguments. for (unsigned int idx = start; idx < start + 3; idx++) { if (idx >= end) { return error(L"Missing argument at index %u", idx); } } // All our binary primaries are infix, so the operator is at start + 1. const token_info_t *info = token_for_string(arg(start + 1)); if (!(info->flags & BINARY_PRIMARY)) return NULL; return make_unique(info->tok, range_t(start, start + 3), arg(start), arg(start + 2)); } unique_ptr test_parser::parse_parenthentical(unsigned int start, unsigned int end) { // We need at least three arguments: open paren, argument, close paren. if (start + 3 >= end) return NULL; // Must start with an open expression. const token_info_t *open_paren = token_for_string(arg(start)); if (open_paren->tok != test_paren_open) return NULL; // Parse a subexpression. unique_ptr subexpr = parse_expression(start + 1, end); if (!subexpr) return NULL; // Parse a close paren. unsigned close_index = subexpr->range.end; assert(close_index <= end); if (close_index == end) { return error(L"Missing close paren at index %u", close_index); } const token_info_t *close_paren = token_for_string(arg(close_index)); if (close_paren->tok != test_paren_close) { return error(L"Expected close paren at index %u", close_index); } // Success. return make_unique(test_paren_open, range_t(start, close_index + 1), move(subexpr)); } unique_ptr test_parser::parse_primary(unsigned int start, unsigned int end) { if (start >= end) { return error(L"Missing argument at index %u", start); } unique_ptr expr = NULL; if (!expr) expr = parse_parenthentical(start, end); if (!expr) expr = parse_unary_primary(start, end); if (!expr) expr = parse_binary_primary(start, end); if (!expr) expr = parse_just_a_string(start, end); return expr; } // See IEEE 1003.1 breakdown of the behavior for different parameter counts. unique_ptr test_parser::parse_3_arg_expression(unsigned int start, unsigned int end) { assert(end - start == 3); unique_ptr result = NULL; const token_info_t *center_token = token_for_string(arg(start + 1)); if (center_token->flags & BINARY_PRIMARY) { result = parse_binary_primary(start, end); } else if (center_token->tok == test_combine_and || center_token->tok == test_combine_or) { unique_ptr left(parse_unary_expression(start, start + 1)); unique_ptr right(parse_unary_expression(start + 2, start + 3)); if (left.get() && right.get()) { // Transfer ownership to the vector of subjects. std::vector combiners = {center_token->tok}; std::vector> subjects; subjects.push_back(move(left)); subjects.push_back(move(right)); result = make_unique(center_token->tok, range_t(start, end), move(subjects), move(combiners)); } } else { result = parse_unary_expression(start, end); } return result; } unique_ptr test_parser::parse_4_arg_expression(unsigned int start, unsigned int end) { assert(end - start == 4); unique_ptr result = NULL; token_t first_token = token_for_string(arg(start))->tok; if (first_token == test_bang) { unique_ptr subject(parse_3_arg_expression(start + 1, end)); if (subject.get()) { result = make_unique(first_token, range_t(start, subject->range.end), move(subject)); } } else if (first_token == test_paren_open) { result = parse_parenthentical(start, end); } else { result = parse_combining_expression(start, end); } return result; } unique_ptr test_parser::parse_expression(unsigned int start, unsigned int end) { if (start >= end) { return error(L"Missing argument at index %u", start); } unsigned int argc = end - start; switch (argc) { case 0: { DIE("argc should not be zero"); // should have been caught by the above test break; } case 1: { return error(L"Missing argument at index %u", start + 1); } case 2: { return parse_unary_expression(start, end); } case 3: { return parse_3_arg_expression(start, end); } case 4: { return parse_4_arg_expression(start, end); } default: { return parse_combining_expression(start, end); } } } unique_ptr test_parser::parse_args(const wcstring_list_t &args, wcstring &err, wchar_t *program_name) { // Empty list and one-arg list should be handled by caller. assert(args.size() > 1); test_parser parser(args); unique_ptr result = parser.parse_expression(0, (unsigned int)args.size()); // Handle errors. // For now we only show the first error. if (!parser.errors.empty()) { err.append(program_name); err.append(L": "); err.append(parser.errors.at(0)); err.push_back(L'\n'); } if (result) { // It's also an error if there are any unused arguments. This is not detected by // parse_expression(). assert(result->range.end <= args.size()); if (result->range.end < args.size()) { if (err.empty()) { append_format(err, L"%ls: unexpected argument at index %lu: '%ls'\n", program_name, (unsigned long)result->range.end, args.at(result->range.end).c_str()); } result.reset(NULL); } } return result; } bool unary_primary::evaluate(wcstring_list_t &errors) { return unary_primary_evaluate(token, arg, errors); } bool binary_primary::evaluate(wcstring_list_t &errors) { return binary_primary_evaluate(token, arg_left, arg_right, errors); } bool unary_operator::evaluate(wcstring_list_t &errors) { if (token == test_bang) { assert(subject.get()); return !subject->evaluate(errors); } errors.push_back(format_string(L"Unknown token type in %s", __func__)); return false; } bool combining_expression::evaluate(wcstring_list_t &errors) { if (token == test_combine_and || token == test_combine_or) { assert(!subjects.empty()); //!OCLINT(multiple unary operator) assert(combiners.size() + 1 == subjects.size()); // One-element case. if (subjects.size() == 1) return subjects.at(0)->evaluate(errors); // Evaluate our lists, remembering that AND has higher precedence than OR. We can // visualize this as a sequence of OR expressions of AND expressions. size_t idx = 0, max = subjects.size(); bool or_result = false; while (idx < max) { if (or_result) { // short circuit break; } // Evaluate a stream of AND starting at given subject index. It may only have one // element. bool and_result = true; for (; idx < max; idx++) { // Evaluate it, short-circuiting. and_result = and_result && subjects.at(idx)->evaluate(errors); // If the combiner at this index (which corresponding to how we combine with the // next subject) is not AND, then exit the loop. if (idx + 1 < max && combiners.at(idx) != test_combine_and) { idx++; break; } } // OR it in. or_result = or_result || and_result; } return or_result; } errors.push_back(format_string(L"Unknown token type in %s", __func__)); return STATUS_INVALID_ARGS; } bool parenthetical_expression::evaluate(wcstring_list_t &errors) { return contents->evaluate(errors); } // IEEE 1003.1 says nothing about what it means for two strings to be "algebraically equal". For // example, should we interpret 0x10 as 0, 10, or 16? Here we use only base 10 and use wcstoll, // which allows for leading + and -, and whitespace. This is consistent, albeit a bit more lenient // since we allow trailing whitespace, with other implementations such as bash. static bool parse_number(const wcstring &arg, long long *out, wcstring_list_t &errors) { *out = fish_wcstoll(arg.c_str()); if (errno) { errors.push_back(format_string(_(L"invalid integer '%ls'"), arg.c_str())); } return !errno; } static bool binary_primary_evaluate(test_expressions::token_t token, const wcstring &left, const wcstring &right, wcstring_list_t &errors) { using namespace test_expressions; long long left_num, right_num; switch (token) { case test_string_equal: { return left == right; } case test_string_not_equal: { return left != right; } case test_number_equal: { return parse_number(left, &left_num, errors) && parse_number(right, &right_num, errors) && left_num == right_num; } case test_number_not_equal: { return parse_number(left, &left_num, errors) && parse_number(right, &right_num, errors) && left_num != right_num; } case test_number_greater: { return parse_number(left, &left_num, errors) && parse_number(right, &right_num, errors) && left_num > right_num; } case test_number_greater_equal: { return parse_number(left, &left_num, errors) && parse_number(right, &right_num, errors) && left_num >= right_num; } case test_number_lesser: { return parse_number(left, &left_num, errors) && parse_number(right, &right_num, errors) && left_num < right_num; } case test_number_lesser_equal: { return parse_number(left, &left_num, errors) && parse_number(right, &right_num, errors) && left_num <= right_num; } default: { errors.push_back(format_string(L"Unknown token type in %s", __func__)); return false; } } } static bool unary_primary_evaluate(test_expressions::token_t token, const wcstring &arg, wcstring_list_t &errors) { using namespace test_expressions; struct stat buf; long long num; switch (token) { case test_filetype_b: { // "-b", for block special files return !wstat(arg, &buf) && S_ISBLK(buf.st_mode); } case test_filetype_c: { // "-c", for character special files return !wstat(arg, &buf) && S_ISCHR(buf.st_mode); } case test_filetype_d: { // "-d", for directories return !wstat(arg, &buf) && S_ISDIR(buf.st_mode); } case test_filetype_e: { // "-e", for files that exist return !wstat(arg, &buf); } case test_filetype_f: { // "-f", for for regular files return !wstat(arg, &buf) && S_ISREG(buf.st_mode); } case test_filetype_G: { // "-G", for check effective group id return !wstat(arg, &buf) && getegid() == buf.st_gid; } case test_filetype_g: { // "-g", for set-group-id return !wstat(arg, &buf) && (S_ISGID & buf.st_mode); } case test_filetype_h: // "-h", for symbolic links case test_filetype_L: { // "-L", same as -h return !lwstat(arg, &buf) && S_ISLNK(buf.st_mode); } case test_filetype_k: { // "-k", for sticky bit #ifdef S_ISVTX return !lwstat(arg, &buf) && buf.st_mode & S_ISVTX; #else return false; #endif } case test_filetype_O: { // "-O", for check effective user id return !wstat(arg, &buf) && geteuid() == buf.st_uid; } case test_filetype_p: { // "-p", for FIFO return !wstat(arg, &buf) && S_ISFIFO(buf.st_mode); } case test_filetype_S: { // "-S", socket return !wstat(arg, &buf) && S_ISSOCK(buf.st_mode); } case test_filesize_s: { // "-s", size greater than zero return !wstat(arg, &buf) && buf.st_size > 0; } case test_filedesc_t: { // "-t", whether the fd is associated with a terminal return parse_number(arg, &num, errors) && num == (int)num && isatty((int)num); } case test_fileperm_r: { // "-r", read permission return !waccess(arg, R_OK); } case test_fileperm_u: { // "-u", whether file is setuid return !wstat(arg, &buf) && (S_ISUID & buf.st_mode); } case test_fileperm_w: { // "-w", whether file write permission is allowed return !waccess(arg, W_OK); } case test_fileperm_x: { // "-x", whether file execute/search is allowed return !waccess(arg, X_OK); } case test_string_n: { // "-n", non-empty string return !arg.empty(); } case test_string_z: { // "-z", true if length of string is 0 return arg.empty(); } default: { errors.push_back(format_string(L"Unknown token type in %s", __func__)); return false; } } } }; // namespace test_expressions /// Evaluate a conditional expression given the arguments. If fromtest is set, the caller is the /// test or [ builtin; with the pointer giving the name of the command. for POSIX conformance this /// supports a more limited range of functionality. /// /// Return status is the final shell status, i.e. 0 for true, 1 for false and 2 for error. int builtin_test(parser_t &parser, io_streams_t &streams, wchar_t **argv) { UNUSED(parser); using namespace test_expressions; // The first argument should be the name of the command ('test'). if (!argv[0]) return STATUS_INVALID_ARGS; // Whether we are invoked with bracket '[' or not. wchar_t *program_name = argv[0]; const bool is_bracket = !wcscmp(program_name, L"["); size_t argc = 0; while (argv[argc + 1]) argc++; // If we're bracket, the last argument ought to be ]; we ignore it. Note that argc is the number // of arguments after the command name; thus argv[argc] is the last argument. if (is_bracket) { if (!wcscmp(argv[argc], L"]")) { // Ignore the closing bracket from now on. argc--; } else { streams.err.append(L"[: the last argument must be ']'\n"); return STATUS_INVALID_ARGS; } } // Collect the arguments into a list. const wcstring_list_t args(argv + 1, argv + 1 + argc); if (argc == 0) { return STATUS_CMD_ERROR; // Per 1003.1, exit false. } else if (argc == 1) { // Per 1003.1, exit true if the arg is non-empty. return args.at(0).empty() ? STATUS_CMD_ERROR : STATUS_CMD_OK; } // Try parsing wcstring err; unique_ptr expr = test_parser::parse_args(args, err, program_name); if (!expr) { #if 0 streams.err.append(L"Oops! test was given args:\n"); for (size_t i=0; i < argc; i++) { streams.err.append_format(L"\t%ls\n", args.at(i).c_str()); } streams.err.append_format(L"and returned parse error: %ls\n", err.c_str()); #endif streams.err.append(err); return STATUS_CMD_ERROR; } wcstring_list_t eval_errors; bool result = expr->evaluate(eval_errors); if (!eval_errors.empty() && !should_suppress_stderr_for_tests()) { streams.err.append(L"test returned eval errors:\n"); for (size_t i = 0; i < eval_errors.size(); i++) { streams.err.append_format(L"\t%ls\n", eval_errors.at(i).c_str()); } } return result ? STATUS_CMD_OK : STATUS_CMD_ERROR; }