mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-27 20:25:12 +00:00
[string] Chunk reads
Profiling with callgrind revealed that about 60% of the time in a `something | string match` call was actually spent in `string_get_arg_stdin()`, because it was calling `read` one byte at a time. This makes it read in chunks similar to builtin read. This increases performance for `getent hosts | string match -v '0.0.0.0*'` from about 300ms to about 30ms (i.e. 90%). At that point it's _actually_ quicker than `grep`. To improve performance even more, we'd have to cut down on str2wcstring. Fixes #4604.
This commit is contained in:
parent
ec2b38053e
commit
2de38ef7bf
1 changed files with 30 additions and 19 deletions
|
@ -38,6 +38,11 @@ class parser_t;
|
||||||
|
|
||||||
#define STRING_ERR_MISSING _(L"%ls: Expected argument\n")
|
#define STRING_ERR_MISSING _(L"%ls: Expected argument\n")
|
||||||
|
|
||||||
|
// How many bytes we read() at once.
|
||||||
|
// Bash uses 128 here, so we do too (see READ_CHUNK_SIZE).
|
||||||
|
// This should be about the size of a line.
|
||||||
|
#define STRING_CHUNK_SIZE 128
|
||||||
|
|
||||||
static void string_error(io_streams_t &streams, const wchar_t *fmt, ...) {
|
static void string_error(io_streams_t &streams, const wchar_t *fmt, ...) {
|
||||||
streams.err.append(L"string ");
|
streams.err.append(L"string ");
|
||||||
va_list va;
|
va_list va;
|
||||||
|
@ -58,30 +63,36 @@ static bool string_args_from_stdin(const io_streams_t &streams) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static const wchar_t *string_get_arg_stdin(wcstring *storage, const io_streams_t &streams) {
|
static const wchar_t *string_get_arg_stdin(wcstring *storage, const io_streams_t &streams) {
|
||||||
std::string arg;
|
// We might read more than a line - store the rest in a static buffer.
|
||||||
for (;;) {
|
static std::string buffer;
|
||||||
char ch = '\0';
|
|
||||||
long rc = read_blocked(streams.stdin_fd, &ch, 1);
|
|
||||||
|
|
||||||
if (rc < 0) { // failure
|
// Read in chunks from fd until buffer has a line.
|
||||||
|
size_t pos;
|
||||||
|
while ((pos = buffer.find('\n')) == std::string::npos) {
|
||||||
|
char buf[STRING_CHUNK_SIZE];
|
||||||
|
int n = read_blocked(streams.stdin_fd, buf, STRING_CHUNK_SIZE);
|
||||||
|
if (n == 0) {
|
||||||
|
// If we still have buffer contents, flush them,
|
||||||
|
// in case there was no trailing '\n'.
|
||||||
|
if (buffer.empty()) return NULL;
|
||||||
|
*storage = str2wcstring(buffer);
|
||||||
|
buffer.clear();
|
||||||
|
return storage->c_str();
|
||||||
|
}
|
||||||
|
if (n == -1) {
|
||||||
|
// Some error happened. We can't do anything about it,
|
||||||
|
// so ignore it.
|
||||||
|
// (read_blocked already retries for EAGAIN and EINTR)
|
||||||
|
*storage = str2wcstring(buffer);
|
||||||
|
buffer.clear();
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
buffer.append(buf, n);
|
||||||
if (rc == 0) { // EOF
|
|
||||||
if (arg.empty()) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ch == '\n') {
|
// Split the buffer on the '\n' and return the first part.
|
||||||
break;
|
*storage = str2wcstring(buffer.c_str(), pos);
|
||||||
}
|
buffer.erase(0, pos + 1);
|
||||||
|
|
||||||
arg += ch;
|
|
||||||
}
|
|
||||||
|
|
||||||
*storage = str2wcstring(arg);
|
|
||||||
return storage->c_str();
|
return storage->c_str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue