[string] Chunk reads

Profiling with callgrind revealed that about 60% of the time in a `something | string match` call
was actually spent in `string_get_arg_stdin()`,
because it was calling `read` one byte at a time.

This makes it read in chunks similar to builtin read.

This increases performance for `getent hosts | string match -v '0.0.0.0*'` from about 300ms to about 30ms (i.e. 90%).
At that point it's _actually_ quicker than `grep`.

To improve performance even more, we'd have to cut down on str2wcstring.

Fixes #4604.
This commit is contained in:
Fabian Homborg 2017-12-18 17:26:33 +01:00
parent ec2b38053e
commit 2de38ef7bf

View file

@ -38,6 +38,11 @@ class parser_t;
#define STRING_ERR_MISSING _(L"%ls: Expected argument\n") #define STRING_ERR_MISSING _(L"%ls: Expected argument\n")
// How many bytes we read() at once.
// Bash uses 128 here, so we do too (see READ_CHUNK_SIZE).
// This should be about the size of a line.
#define STRING_CHUNK_SIZE 128
static void string_error(io_streams_t &streams, const wchar_t *fmt, ...) { static void string_error(io_streams_t &streams, const wchar_t *fmt, ...) {
streams.err.append(L"string "); streams.err.append(L"string ");
va_list va; va_list va;
@ -58,30 +63,36 @@ static bool string_args_from_stdin(const io_streams_t &streams) {
} }
static const wchar_t *string_get_arg_stdin(wcstring *storage, const io_streams_t &streams) { static const wchar_t *string_get_arg_stdin(wcstring *storage, const io_streams_t &streams) {
std::string arg; // We might read more than a line - store the rest in a static buffer.
for (;;) { static std::string buffer;
char ch = '\0';
long rc = read_blocked(streams.stdin_fd, &ch, 1);
if (rc < 0) { // failure // Read in chunks from fd until buffer has a line.
size_t pos;
while ((pos = buffer.find('\n')) == std::string::npos) {
char buf[STRING_CHUNK_SIZE];
int n = read_blocked(streams.stdin_fd, buf, STRING_CHUNK_SIZE);
if (n == 0) {
// If we still have buffer contents, flush them,
// in case there was no trailing '\n'.
if (buffer.empty()) return NULL;
*storage = str2wcstring(buffer);
buffer.clear();
return storage->c_str();
}
if (n == -1) {
// Some error happened. We can't do anything about it,
// so ignore it.
// (read_blocked already retries for EAGAIN and EINTR)
*storage = str2wcstring(buffer);
buffer.clear();
return NULL; return NULL;
} }
buffer.append(buf, n);
if (rc == 0) { // EOF
if (arg.empty()) {
return NULL;
}
break;
}
if (ch == '\n') {
break;
}
arg += ch;
} }
*storage = str2wcstring(arg); // Split the buffer on the '\n' and return the first part.
*storage = str2wcstring(buffer.c_str(), pos);
buffer.erase(0, pos + 1);
return storage->c_str(); return storage->c_str();
} }