Implement new read --null flag

The `--null` flag to `read` makes it split incoming lines on NUL instead
of newlines. This is intended for processing the output of a command
that uses NUL separators (such as `find -print0`).

Fixes #1694.
This commit is contained in:
Kevin Ballard 2014-09-21 19:18:56 -07:00
parent f889ad0fda
commit 8f8c4cdd17
8 changed files with 192 additions and 43 deletions

View file

@ -91,7 +91,7 @@ FISH_OBJS := function.o builtin.o complete.o env.o exec.o expand.o \
signal.o io.o parse_util.o common.o screen.o path.o autoload.o \
parser_keywords.o iothread.o color.o postfork.o \
builtin_test.o parse_tree.o parse_productions.o parse_execution.o \
pager.o utf8.o fish_version.o
pager.o utf8.o fish_version.o wcstringutil.o
FISH_INDENT_OBJS := fish_indent.o print_help.o common.o \
parser_keywords.o wutil.o tokenizer.o fish_version.o
@ -843,9 +843,10 @@ builtin.o: io.h function.h event.h complete.h proc.h parse_tree.h tokenizer.h
builtin.o: parse_constants.h parser.h reader.h highlight.h env.h color.h
builtin.o: wgetopt.h sanity.h wildcard.h expand.h input_common.h input.h
builtin.o: intern.h exec.h parse_util.h autoload.h lru.h parser_keywords.h
builtin.o: path.h history.h builtin_set.cpp builtin_commandline.cpp
builtin.o: builtin_complete.cpp builtin_ulimit.cpp builtin_jobs.cpp
builtin.o: builtin_set_color.cpp output.h screen.h builtin_printf.cpp
builtin.o: path.h history.h wcstringutil.h builtin_set.cpp
builtin.o: builtin_commandline.cpp builtin_complete.cpp builtin_ulimit.cpp
builtin.o: builtin_jobs.cpp builtin_set_color.cpp output.h screen.h
builtin.o: builtin_printf.cpp
builtin_commandline.o: config.h signal.h fallback.h util.h wutil.h common.h
builtin_commandline.o: builtin.h io.h wgetopt.h reader.h complete.h
builtin_commandline.o: highlight.h env.h color.h proc.h parse_tree.h
@ -860,6 +861,7 @@ builtin_jobs.o: config.h fallback.h signal.h util.h wutil.h common.h
builtin_jobs.o: builtin.h io.h proc.h parse_tree.h tokenizer.h
builtin_jobs.o: parse_constants.h parser.h event.h function.h wgetopt.h
builtin_printf.o: common.h util.h
builtin_scripts.o: builtin_scripts.h
builtin_set.o: config.h signal.h fallback.h util.h wutil.h common.h builtin.h
builtin_set.o: io.h env.h expand.h parse_constants.h wgetopt.h proc.h
builtin_set.o: parse_tree.h tokenizer.h parser.h event.h function.h
@ -911,6 +913,7 @@ fish_tests.o: highlight.h env.h color.h builtin.h function.h event.h
fish_tests.o: autoload.h lru.h wutil.h expand.h parser.h output.h screen.h
fish_tests.o: exec.h path.h history.h iothread.h postfork.h parse_util.h
fish_tests.o: pager.h input.h input_common.h utf8.h env_universal_common.h
fish_tests.o: wcstringutil.h
fish_version.o: fish_version.h
function.o: config.h signal.h wutil.h common.h util.h fallback.h function.h
function.o: event.h proc.h io.h parse_tree.h tokenizer.h parse_constants.h
@ -930,7 +933,7 @@ input.o: complete.h highlight.h env.h color.h proc.h parse_tree.h tokenizer.h
input.o: parse_constants.h sanity.h input_common.h input.h parser.h event.h
input.o: function.h expand.h output.h screen.h intern.h
input_common.o: config.h fallback.h signal.h util.h common.h wutil.h
input_common.o: input_common.h iothread.h
input_common.o: input_common.h env_universal_common.h env.h iothread.h
intern.o: config.h fallback.h signal.h util.h wutil.h common.h intern.h
io.o: config.h fallback.h signal.h util.h wutil.h common.h exec.h proc.h io.h
io.o: parse_tree.h tokenizer.h parse_constants.h
@ -993,6 +996,7 @@ signal.o: parse_tree.h tokenizer.h parse_constants.h
tokenizer.o: config.h fallback.h signal.h util.h wutil.h common.h tokenizer.h
utf8.o: utf8.h
util.o: config.h fallback.h signal.h util.h common.h wutil.h
wcstringutil.o: config.h wcstringutil.h common.h util.h
wgetopt.o: config.h wgetopt.h wutil.h common.h util.h fallback.h signal.h
wildcard.o: config.h fallback.h signal.h util.h wutil.h common.h complete.h
wildcard.o: wildcard.h expand.h parse_constants.h reader.h io.h highlight.h

View file

@ -65,6 +65,7 @@
#include "path.h"
#include "history.h"
#include "parse_tree.h"
#include "wcstringutil.h"
/**
The default prompt for the read command
@ -2305,10 +2306,9 @@ static int builtin_random(parser_t &parser, wchar_t **argv)
*/
static int builtin_read(parser_t &parser, wchar_t **argv)
{
wchar_t *buff=0;
wcstring buff;
int i, argc = builtin_count_args(argv);
int place = ENV_USER;
wchar_t *nxt;
const wchar_t *prompt = DEFAULT_READ_PROMPT;
const wchar_t *commandline = L"";
int exit_res=STATUS_BUILTIN_OK;
@ -2317,6 +2317,7 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
wchar_t *end;
int shell = 0;
int array = 0;
bool split_null = false;
woptind=0;
@ -2369,6 +2370,10 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
L"array", no_argument, 0, 'a'
}
,
{
L"null", no_argument, 0, 'z'
}
,
{
L"help", no_argument, 0, 'h'
}
@ -2383,7 +2388,7 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
int opt = wgetopt_long(argc,
argv,
L"xglUup:c:hm:n:sa",
L"xglUup:c:hm:n:saz",
long_options,
&opt_index);
if (opt == -1)
@ -2468,6 +2473,10 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
array = 1;
break;
case L'z':
split_null = true;
break;
case 'h':
builtin_print_help(parser, argv[0], stdout_buffer);
return STATUS_BUILTIN_OK;
@ -2541,7 +2550,7 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
/*
Check if we should read interactively using \c reader_readline()
*/
if (isatty(0) && builtin_stdin == 0)
if (isatty(0) && builtin_stdin == 0 && !split_null)
{
const wchar_t *line;
@ -2572,13 +2581,11 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
// note: we're deliberately throwing away the tail of the commandline.
// It shouldn't be unread because it was produced with `commandline -i`,
// not typed.
buff = (wchar_t *)malloc(((size_t)nchars + 1) * sizeof(wchar_t));
wmemcpy(buff, line, (size_t)nchars);
buff[nchars] = 0;
buff = wcstring(line, nchars);
}
else
{
buff = wcsdup(line);
buff = wcstring(line);
}
}
else
@ -2591,7 +2598,7 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
{
int eof=0;
wcstring sb;
buff.clear();
while (1)
{
@ -2621,7 +2628,6 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
case (size_t)(-2):
break;
case 0:
eof=1;
finished = 1;
break;
@ -2635,44 +2641,43 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
if (eof)
break;
if (res == L'\n')
if (!split_null && res == L'\n')
break;
sb.push_back(res);
if (split_null && res == L'\0')
break;
if (0 < nchars && (size_t)nchars <= sb.size())
buff.push_back(res);
if (0 < nchars && (size_t)nchars <= buff.size())
{
break;
}
}
if (sb.size() < 2 && eof)
if (buff.size() < 2 && eof)
{
exit_res = 1;
}
buff = wcsdup(sb.c_str());
}
if (i != argc && !exit_res)
{
wchar_t *state;
env_var_t ifs = env_get_string(L"IFS");
if (ifs.missing_or_empty())
{
/* Every character is a separate token */
size_t bufflen = wcslen(buff);
size_t bufflen = buff.size();
if (array)
{
if (bufflen > 0)
{
wcstring chars(bufflen+(bufflen-1), ARRAY_SEP);
for (size_t j=0; j<bufflen; ++j)
wcstring::iterator out = chars.begin();
for (wcstring::const_iterator it = buff.begin(), end = buff.end(); it != end; ++it)
{
chars[j*2] = buff[j];
*out = *it;
out += 2;
}
env_set(argv[i], chars.c_str(), place);
}
@ -2686,14 +2691,15 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
size_t j = 0;
for (; i+1 < argc; ++i)
{
if (j < bufflen) {
wchar_t buffer[2] = {buff[j], 0};
if (j < bufflen)
{
wchar_t buffer[2] = {buff[j++], 0};
env_set(argv[i], buffer, place);
}
else {
else
{
env_set(argv[i], L"", place);
}
if (j < bufflen) ++j;
}
if (i < argc) env_set(argv[i], &buff[j], place);
}
@ -2701,34 +2707,32 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
else if (array)
{
wcstring tokens;
tokens.reserve(wcslen(buff));
tokens.reserve(buff.size());
bool empty = true;
for (nxt = wcstok(buff, ifs.c_str(), &state); nxt != 0; nxt = wcstok(0, ifs.c_str(), &state))
for (wcstring_range loc = wcstring_tok(buff, ifs); loc.first != wcstring::npos; loc = wcstring_tok(buff, ifs, loc))
{
if (! tokens.empty()) tokens.push_back(ARRAY_SEP);
tokens.append(nxt);
if (!empty) tokens.push_back(ARRAY_SEP);
tokens.append(buff, loc.first, loc.second);
empty = false;
}
env_set(argv[i], empty ? NULL : tokens.c_str(), place);
}
else
{
nxt = wcstok(buff, (i<argc-1)?ifs.c_str():L"", &state);
wcstring_range loc = wcstring_range(0,0);
while (i<argc)
{
env_set(argv[i], nxt != 0 ? nxt: L"", place);
loc = wcstring_tok(buff, (i+1<argc) ? ifs : L"", loc);
env_set(argv[i], loc.first == wcstring::npos ? L"" : &buff.c_str()[loc.first], place);
i++;
if (nxt != 0)
nxt = wcstok(0, (i<argc-1)?ifs.c_str():L"", &state);
++i;
}
}
}
free(buff);
return exit_res;
}

View file

@ -33,6 +33,8 @@ The following options are available:
- `-a` or `--array` stores the result as an array.
- `-z` or `--null` reads up to NUL instead of newline. Disables interactive mode.
`read` reads a single line of input from stdin, breaks it into tokens based on the `IFS` shell variable, and then assigns one token to each variable specified in `VARIABLES`. If there are more tokens than variables, the complete remainder is assigned to the last variable. As a special case, if `IFS` is set to the empty string, each character of the input is considered a separate token.
If `-a` or `--array` is provided, only one variable name is allowed and the tokens are stored as an array in this variable.

View file

@ -65,6 +65,7 @@
#include "input.h"
#include "utf8.h"
#include "env_universal_common.h"
#include "wcstringutil.h"
static const char * const * s_arguments;
static int s_test_run_count = 0;
@ -3629,6 +3630,37 @@ static void test_highlighting(void)
}
}
static void test_wcstring_tok(void)
{
say(L"Testing wcstring_tok");
wcstring buff = L"hello world";
wcstring needle = L" \t\n";
wcstring_range loc = wcstring_tok(buff, needle);
if (loc.first == wcstring::npos || buff.substr(loc.first, loc.second) != L"hello")
{
err(L"Wrong results from first wcstring_tok(): {%zu, %zu}", loc.first, loc.second);
}
loc = wcstring_tok(buff, needle, loc);
if (loc.first == wcstring::npos || buff.substr(loc.first, loc.second) != L"world")
{
err(L"Wrong results from second wcstring_tok(): {%zu, %zu}", loc.first, loc.second);
}
loc = wcstring_tok(buff, needle, loc);
if (loc.first != wcstring::npos)
{
err(L"Wrong results from third wcstring_tok(): {%zu, %zu}", loc.first, loc.second);
}
buff = L"hello world";
loc = wcstring_tok(buff, needle);
// loc is "hello" again
loc = wcstring_tok(buff, L"", loc);
if (loc.first == wcstring::npos || buff.substr(loc.first, loc.second) != L"world")
{
err(L"Wrong results from wcstring_tok with empty needle: {%zu, %zu}", loc.first, loc.second);
}
}
/**
Main test
*/
@ -3709,6 +3741,7 @@ int main(int argc, char **argv)
if (should_test_function("autosuggestion_ignores")) test_autosuggestion_ignores();
if (should_test_function("autosuggestion_combining")) test_autosuggestion_combining();
if (should_test_function("autosuggest_suggest_special")) test_autosuggest_suggest_special();
if (should_test_function("wcstring_tok")) test_wcstring_tok();
if (should_test_function("history")) history_tests_t::test_history();
if (should_test_function("history_merge")) history_tests_t::test_history_merge();
if (should_test_function("history_races")) history_tests_t::test_history_races();

View file

@ -1,3 +1,4 @@
# vim: set filetype=fish:
#
# Test read builtin and IFS
#
@ -35,6 +36,8 @@ echo '' | read -l one two
print_vars one two
echo 'test' | read -l one two three
print_vars one two three
echo 'foo bar baz' | read -l one two three
print_vars one two three
echo
set -l IFS
@ -91,3 +94,25 @@ echo $foo
echo $bar
echo 'test' | read -n 1 foo
echo $foo
# read -0 tests
echo
echo '# read -z tests'
echo -n 'testing' | read -lz foo
echo $foo
echo -n 'test ing' | read -lz foo
echo $foo
echo 'newline' | read -lz foo
echo $foo
echo -n 'test ing' | read -lz foo bar
print_vars foo bar
echo -ne 'test\0ing' | read -lz foo bar
print_vars foo bar
echo -ne 'foo\nbar' | read -lz foo bar
print_vars foo bar
echo -ne 'foo\nbar\0baz\nquux' | while read -lza foo
print_vars foo
end
true

View file

@ -16,6 +16,7 @@ two
1 ''
1 '' 1 ''
1 'test' 1 '' 1 ''
1 'foo' 1 'bar' 1 ' baz'
1 'hello'
1 'h' 1 'ello'
@ -42,3 +43,14 @@ test
tes
tin
t
# read -z tests
testing
test ing
newline
1 'test' 1 'ing'
1 'test' 1 ''
1 'foo' 1 'bar'
2 'foo' 'bar'
2 'baz' 'quux'

40
wcstringutil.cpp Normal file
View file

@ -0,0 +1,40 @@
/** \file wcstringutil.cpp
Helper functions for working with wcstring
*/
#include "config.h"
#include "wcstringutil.h"
typedef wcstring::size_type size_type;
wcstring_range wcstring_tok(wcstring& str, const wcstring &needle, wcstring_range last)
{
size_type pos = last.second == wcstring::npos ? wcstring::npos : last.first;
if (pos != wcstring::npos && last.second != wcstring::npos) pos += last.second;
if (pos != wcstring::npos && pos != 0) ++pos;
if (pos == wcstring::npos || pos >= str.size())
{
return std::make_pair(wcstring::npos, wcstring::npos);
}
if (needle.empty())
{
return std::make_pair(pos, wcstring::npos);
}
pos = str.find_first_not_of(needle, pos);
if (pos == wcstring::npos) return std::make_pair(wcstring::npos, wcstring::npos);
size_type next_pos = str.find_first_of(needle, pos);
if (next_pos == wcstring::npos)
{
return std::make_pair(pos, wcstring::npos);
}
else
{
str[next_pos] = L'\0';
return std::make_pair(pos, next_pos - pos);
}
}

29
wcstringutil.h Normal file
View file

@ -0,0 +1,29 @@
/** \file wcstringutil.h
Helper functions for working with wcstring
*/
#ifndef FISH_WCSTRINGUTIL_H
#define FISH_WCSTRINGUTIL_H
#include <utility>
#include "common.h"
/**
typedef that represents a range in a wcstring.
The first element is the location, the second is the count.
*/
typedef std::pair<wcstring::size_type, wcstring::size_type> wcstring_range;
/**
wcstring equivalent of wcstok(). Supports NUL.
For convenience and wcstok() compatibility, the first character of each
token separator is replaced with NUL.
Returns a pair of (pos, count).
Returns (npos, npos) when it's done.
Returns (pos, npos) when the token is already known to be the final token.
Note that the final token may not necessarily return (pos, npos).
*/
wcstring_range wcstring_tok(wcstring& str, const wcstring &needle, wcstring_range last = wcstring_range(0,0));
#endif