From 2de38ef7bfd8569a6876d8b8ffbe748eec1b3c43 Mon Sep 17 00:00:00 2001 From: Fabian Homborg Date: Mon, 18 Dec 2017 17:26:33 +0100 Subject: [PATCH] [string] Chunk reads Profiling with callgrind revealed that about 60% of the time in a `something | string match` call was actually spent in `string_get_arg_stdin()`, because it was calling `read` one byte at a time. This makes it read in chunks similar to builtin read. This increases performance for `getent hosts | string match -v '0.0.0.0*'` from about 300ms to about 30ms (i.e. 90%). At that point it's _actually_ quicker than `grep`. To improve performance even more, we'd have to cut down on str2wcstring. Fixes #4604. --- src/builtin_string.cpp | 49 ++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/src/builtin_string.cpp b/src/builtin_string.cpp index c1c5211a8..8b553e079 100644 --- a/src/builtin_string.cpp +++ b/src/builtin_string.cpp @@ -38,6 +38,11 @@ class parser_t; #define STRING_ERR_MISSING _(L"%ls: Expected argument\n") +// How many bytes we read() at once. +// Bash uses 128 here, so we do too (see READ_CHUNK_SIZE). +// This should be about the size of a line. +#define STRING_CHUNK_SIZE 128 + static void string_error(io_streams_t &streams, const wchar_t *fmt, ...) { streams.err.append(L"string "); va_list va; @@ -58,30 +63,36 @@ static bool string_args_from_stdin(const io_streams_t &streams) { } static const wchar_t *string_get_arg_stdin(wcstring *storage, const io_streams_t &streams) { - std::string arg; - for (;;) { - char ch = '\0'; - long rc = read_blocked(streams.stdin_fd, &ch, 1); + // We might read more than a line - store the rest in a static buffer. + static std::string buffer; - if (rc < 0) { // failure + // Read in chunks from fd until buffer has a line. + size_t pos; + while ((pos = buffer.find('\n')) == std::string::npos) { + char buf[STRING_CHUNK_SIZE]; + int n = read_blocked(streams.stdin_fd, buf, STRING_CHUNK_SIZE); + if (n == 0) { + // If we still have buffer contents, flush them, + // in case there was no trailing '\n'. + if (buffer.empty()) return NULL; + *storage = str2wcstring(buffer); + buffer.clear(); + return storage->c_str(); + } + if (n == -1) { + // Some error happened. We can't do anything about it, + // so ignore it. + // (read_blocked already retries for EAGAIN and EINTR) + *storage = str2wcstring(buffer); + buffer.clear(); return NULL; } - - if (rc == 0) { // EOF - if (arg.empty()) { - return NULL; - } - break; - } - - if (ch == '\n') { - break; - } - - arg += ch; + buffer.append(buf, n); } - *storage = str2wcstring(arg); + // Split the buffer on the '\n' and return the first part. + *storage = str2wcstring(buffer.c_str(), pos); + buffer.erase(0, pos + 1); return storage->c_str(); }