[string] Chunk reads

Profiling with callgrind revealed that about 60% of the time in a `something | string match` call
was actually spent in `string_get_arg_stdin()`,
because it was calling `read` one byte at a time.

This makes it read in chunks similar to builtin read.

This increases performance for `getent hosts | string match -v '0.0.0.0*'` from about 300ms to about 30ms (i.e. 90%).
At that point it's _actually_ quicker than `grep`.

To improve performance even more, we'd have to cut down on str2wcstring.

Fixes #4604.
This commit is contained in:
Fabian Homborg 2017-12-18 17:26:33 +01:00
parent ec2b38053e
commit 2de38ef7bf

View file

@ -38,6 +38,11 @@ class parser_t;
#define STRING_ERR_MISSING _(L"%ls: Expected argument\n")
// How many bytes we read() at once.
// Bash uses 128 here, so we do too (see READ_CHUNK_SIZE).
// This should be about the size of a line.
#define STRING_CHUNK_SIZE 128
static void string_error(io_streams_t &streams, const wchar_t *fmt, ...) {
streams.err.append(L"string ");
va_list va;
@ -58,30 +63,36 @@ static bool string_args_from_stdin(const io_streams_t &streams) {
}
static const wchar_t *string_get_arg_stdin(wcstring *storage, const io_streams_t &streams) {
std::string arg;
for (;;) {
char ch = '\0';
long rc = read_blocked(streams.stdin_fd, &ch, 1);
// We might read more than a line - store the rest in a static buffer.
static std::string buffer;
if (rc < 0) { // failure
// Read in chunks from fd until buffer has a line.
size_t pos;
while ((pos = buffer.find('\n')) == std::string::npos) {
char buf[STRING_CHUNK_SIZE];
int n = read_blocked(streams.stdin_fd, buf, STRING_CHUNK_SIZE);
if (n == 0) {
// If we still have buffer contents, flush them,
// in case there was no trailing '\n'.
if (buffer.empty()) return NULL;
*storage = str2wcstring(buffer);
buffer.clear();
return storage->c_str();
}
if (n == -1) {
// Some error happened. We can't do anything about it,
// so ignore it.
// (read_blocked already retries for EAGAIN and EINTR)
*storage = str2wcstring(buffer);
buffer.clear();
return NULL;
}
if (rc == 0) { // EOF
if (arg.empty()) {
return NULL;
}
break;
buffer.append(buf, n);
}
if (ch == '\n') {
break;
}
arg += ch;
}
*storage = str2wcstring(arg);
// Split the buffer on the '\n' and return the first part.
*storage = str2wcstring(buffer.c_str(), pos);
buffer.erase(0, pos + 1);
return storage->c_str();
}