some fixes from review

- string_get_arg_stdin(): simplify and don't discard the argument when
  the trailing newline is absent
- fix calls to pcre2 for e.g. string match -r -a 'a*' 'b'
- correct test for args coming from stdin
This commit is contained in:
Michael Steed 2015-08-27 08:19:23 -06:00
parent 45b777e4dc
commit 2ecd24f795
2 changed files with 38 additions and 54 deletions

View file

@ -34,62 +34,50 @@ static void string_unknown_option(parser_t &parser, const wchar_t *subcmd, const
builtin_print_help(parser, L"string", stderr_buffer); builtin_print_help(parser, L"string", stderr_buffer);
} }
static bool string_args_from_stdin()
{
return builtin_stdin != STDIN_FILENO || !isatty(builtin_stdin);
}
static const wchar_t *string_get_arg_stdin() static const wchar_t *string_get_arg_stdin()
{ {
static wcstring arg; static wcstring warg;
arg.clear();
bool eof = false;
bool gotarg = false;
std::string arg;
for (;;) for (;;)
{ {
wchar_t wch = L'\0'; char ch = '\0';
mbstate_t state = {}; int rc = read_blocked(builtin_stdin, &ch, 1);
for (;;)
if (rc < 0)
{ {
char ch = '\0'; // failure
if (read_blocked(builtin_stdin, &ch, 1) <= 0) return 0;
}
if (rc == 0)
{
// eof
if (arg.empty())
{ {
eof = true; return 0;
break;
} }
else else
{ {
size_t n = mbrtowc(&wch, &ch, 1, &state); break;
if (n == size_t(-1))
{
// Invalid multibyte sequence: start over
memset(&state, 0, sizeof(state));
}
else if (n == size_t(-2))
{
// Incomplete sequence: continue reading
}
else
{
// Got a complete char (could be L'\0')
break;
}
} }
} }
if (eof) if (ch == '\n')
{ {
break; break;
} }
if (wch == L'\n') arg += ch;
{
gotarg = true;
break;
}
arg += wch;
} }
return gotarg ? arg.c_str() : 0; warg = str2wcstring(arg.c_str(), arg.size());
return warg.c_str();
} }
static const wchar_t *string_get_arg_argv(int *argidx, wchar_t **argv) static const wchar_t *string_get_arg_argv(int *argidx, wchar_t **argv)
@ -97,15 +85,15 @@ static const wchar_t *string_get_arg_argv(int *argidx, wchar_t **argv)
return (argv && argv[*argidx]) ? argv[(*argidx)++] : 0; return (argv && argv[*argidx]) ? argv[(*argidx)++] : 0;
} }
static inline const wchar_t *string_get_arg(int *argidx, wchar_t **argv) static const wchar_t *string_get_arg(int *argidx, wchar_t **argv)
{ {
if (isatty(builtin_stdin)) if (string_args_from_stdin())
{ {
return string_get_arg_argv(argidx, argv); return string_get_arg_stdin();
} }
else else
{ {
return string_get_arg_stdin(); return string_get_arg_argv(argidx, argv);
} }
} }
@ -144,7 +132,7 @@ static int string_escape(parser_t &parser, int argc, wchar_t **argv)
} }
int i = w.woptind; int i = w.woptind;
if (!isatty(builtin_stdin) && argc > i) if (string_args_from_stdin() && argc > i)
{ {
string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
return BUILTIN_STRING_ERROR; return BUILTIN_STRING_ERROR;
@ -204,7 +192,7 @@ static int string_join(parser_t &parser, int argc, wchar_t **argv)
return BUILTIN_STRING_ERROR; return BUILTIN_STRING_ERROR;
} }
if (!isatty(builtin_stdin) && argc > i) if (string_args_from_stdin() && argc > i)
{ {
string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
return BUILTIN_STRING_ERROR; return BUILTIN_STRING_ERROR;
@ -265,7 +253,7 @@ static int string_length(parser_t &parser, int argc, wchar_t **argv)
} }
int i = w.woptind; int i = w.woptind;
if (!isatty(builtin_stdin) && argc > i) if (string_args_from_stdin() && argc > i)
{ {
string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
return BUILTIN_STRING_ERROR; return BUILTIN_STRING_ERROR;
@ -602,11 +590,6 @@ public:
{ {
uint32_t options = 0; uint32_t options = 0;
PCRE2_SIZE offset = ovector[1]; // Start at end of previous match PCRE2_SIZE offset = ovector[1]; // Start at end of previous match
PCRE2_SIZE old_offset = pcre2_get_startchar(regex.match);
if (offset <= old_offset)
{
offset = old_offset + 1;
}
if (ovector[0] == ovector[1]) if (ovector[0] == ovector[1])
{ {
@ -702,7 +685,7 @@ static int string_match(parser_t &parser, int argc, wchar_t **argv)
return BUILTIN_STRING_ERROR; return BUILTIN_STRING_ERROR;
} }
if (!isatty(builtin_stdin) && argc > i) if (string_args_from_stdin() && argc > i)
{ {
string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
return BUILTIN_STRING_ERROR; return BUILTIN_STRING_ERROR;
@ -984,7 +967,7 @@ static int string_replace(parser_t &parser, int argc, wchar_t **argv)
return BUILTIN_STRING_ERROR; return BUILTIN_STRING_ERROR;
} }
if (!isatty(builtin_stdin) && argc > i) if (string_args_from_stdin() && argc > i)
{ {
string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
return BUILTIN_STRING_ERROR; return BUILTIN_STRING_ERROR;
@ -1082,7 +1065,7 @@ static int string_split(parser_t &parser, int argc, wchar_t **argv)
return BUILTIN_STRING_ERROR; return BUILTIN_STRING_ERROR;
} }
if (!isatty(builtin_stdin) && argc > i) if (string_args_from_stdin() && argc > i)
{ {
string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
return BUILTIN_STRING_ERROR; return BUILTIN_STRING_ERROR;
@ -1262,7 +1245,7 @@ static int string_sub(parser_t &parser, int argc, wchar_t **argv)
} }
int i = w.woptind; int i = w.woptind;
if (!isatty(builtin_stdin) && argc > i) if (string_args_from_stdin() && argc > i)
{ {
string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
return BUILTIN_STRING_ERROR; return BUILTIN_STRING_ERROR;
@ -1365,7 +1348,7 @@ static int string_trim(parser_t &parser, int argc, wchar_t **argv)
} }
int i = w.woptind; int i = w.woptind;
if (!isatty(builtin_stdin) && argc > i) if (string_args_from_stdin() && argc > i)
{ {
string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]); string_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
return BUILTIN_STRING_ERROR; return BUILTIN_STRING_ERROR;

View file

@ -4162,6 +4162,7 @@ static void test_string(void)
{ {L"string", L"match", L"-r", L"-n", L"(a)(b)", L"abab", 0}, 0, L"1 2\n1 1\n2 1\n" }, { {L"string", L"match", L"-r", L"-n", L"(a)(b)", L"abab", 0}, 0, L"1 2\n1 1\n2 1\n" },
{ {L"string", L"match", L"-r", L"-n", L"-a", L"(a)(b)", L"abab", 0}, 0, L"1 2\n1 1\n2 1\n3 2\n3 1\n4 1\n" }, { {L"string", L"match", L"-r", L"-n", L"-a", L"(a)(b)", L"abab", 0}, 0, L"1 2\n1 1\n2 1\n3 2\n3 1\n4 1\n" },
{ {L"string", L"match", L"-r", L"*", L"", 0}, 2, L"" }, { {L"string", L"match", L"-r", L"*", L"", 0}, 2, L"" },
{ {L"string", L"match", L"-r", L"-a", L"a*", L"b", 0}, 0, L"\n\n" },
{ {L"string", L"match", L"-r", L"foo\\Kbar", L"foobar", 0}, 0, L"bar\n" }, { {L"string", L"match", L"-r", L"foo\\Kbar", L"foobar", 0}, 0, L"bar\n" },
{ {L"string", L"match", L"-r", L"(foo)\\Kbar", L"foobar", 0}, 0, L"bar\nfoo\n" }, { {L"string", L"match", L"-r", L"(foo)\\Kbar", L"foobar", 0}, 0, L"bar\nfoo\n" },
{ {L"string", L"match", L"-r", L"(?=ab\\K)", L"ab", 0}, 0, L"\n" }, { {L"string", L"match", L"-r", L"(?=ab\\K)", L"ab", 0}, 0, L"\n" },