mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-15 22:44:01 +00:00
string repeat: Don't allocate repeated string all at once (#9124)
* string repeat: Don't allocate repeated string all at once This used to allocate one string and fill it with the necessary repetitions, which could be a very very large string. Now, it instead uses one buffer and fills it to a chunk size, and then writes that. This fixes: 1. We no longer crash with too large max/count values. Before they caused a bad_alloc because we tried to fill all RAM. 2. We no longer fill all RAM if given a big-but-not-too-big value. You could've caused fish to eat *most* of your RAM here. 3. It can start writing almost immediately, instead of waiting potentially minutes to start. Performance is about the same to slightly faster overall.
This commit is contained in:
parent
6128b58be6
commit
eac808a819
2 changed files with 92 additions and 37 deletions
|
@ -1493,29 +1493,6 @@ static int string_collect(parser_t &parser, io_streams_t &streams, int argc, con
|
||||||
return appended > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
return appended > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper function to abstract the repeat logic from string_repeat
|
|
||||||
// returns the to_repeat string, repeated count times.
|
|
||||||
static wcstring wcsrepeat(const wcstring &to_repeat, size_t count) {
|
|
||||||
wcstring repeated;
|
|
||||||
repeated.reserve(to_repeat.length() * count);
|
|
||||||
|
|
||||||
for (size_t j = 0; j < count; j++) {
|
|
||||||
repeated += to_repeat;
|
|
||||||
}
|
|
||||||
|
|
||||||
return repeated;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper function to abstract the repeat until logic from string_repeat
|
|
||||||
// returns the to_repeat string, repeated until max char has been reached.
|
|
||||||
static wcstring wcsrepeat_until(const wcstring &to_repeat, size_t max) {
|
|
||||||
if (to_repeat.length() == 0) return {};
|
|
||||||
size_t count = max / to_repeat.length();
|
|
||||||
size_t mod = max % to_repeat.length();
|
|
||||||
|
|
||||||
return wcsrepeat(to_repeat, count) + to_repeat.substr(0, mod);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int string_repeat(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
static int string_repeat(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
||||||
options_t opts;
|
options_t opts;
|
||||||
opts.count_valid = true;
|
opts.count_valid = true;
|
||||||
|
@ -1525,32 +1502,84 @@ static int string_repeat(parser_t &parser, io_streams_t &streams, int argc, cons
|
||||||
int optind;
|
int optind;
|
||||||
int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams);
|
int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams);
|
||||||
if (retval != STATUS_CMD_OK) return retval;
|
if (retval != STATUS_CMD_OK) return retval;
|
||||||
|
if (opts.max == 0 && opts.count == 0) {
|
||||||
|
// XXX: This used to be allowed, but returned 1.
|
||||||
|
// Keep it that way for now instead of adding an error.
|
||||||
|
// streams.err.append(L"Count or max must be greater than zero");
|
||||||
|
return STATUS_CMD_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
bool all_empty = true;
|
bool all_empty = true;
|
||||||
bool first = true;
|
bool first = true;
|
||||||
|
|
||||||
arg_iterator_t aiter(argv, optind, streams);
|
arg_iterator_t aiter(argv, optind, streams);
|
||||||
while (const wcstring *word = aiter.nextstr()) {
|
while (const wcstring *word = aiter.nextstr()) {
|
||||||
if (!first && !opts.quiet) {
|
// If the string is empty, there is nothing to repeat.
|
||||||
streams.out.append(L'\n');
|
if (word->empty()) {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
first = false;
|
|
||||||
const bool limit_repeat =
|
|
||||||
(opts.max > 0 && word->length() * opts.count > static_cast<size_t>(opts.max)) ||
|
|
||||||
!opts.count;
|
|
||||||
const wcstring repeated =
|
|
||||||
limit_repeat ? wcsrepeat_until(*word, opts.max) : wcsrepeat(*word, opts.count);
|
|
||||||
if (!repeated.empty()) {
|
|
||||||
all_empty = false;
|
all_empty = false;
|
||||||
if (opts.quiet) {
|
if (opts.quiet) {
|
||||||
// Early out if we can - see #7495.
|
// Early out if we can - see #7495.
|
||||||
return STATUS_CMD_OK;
|
return STATUS_CMD_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!first && !opts.quiet) {
|
||||||
|
streams.out.append(L'\n');
|
||||||
|
}
|
||||||
|
first = false;
|
||||||
|
|
||||||
|
auto &w = *word;
|
||||||
|
|
||||||
|
// The maximum size of the string is either the "max" characters,
|
||||||
|
// or it's the "count" repetitions, whichever ends up lower.
|
||||||
|
size_t max = opts.max;
|
||||||
|
if (max == 0 || (opts.count > 0 && w.length() * opts.count < max)) {
|
||||||
|
max = w.length() * opts.count;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Append if not quiet.
|
// Reserve a string to avoid writing constantly.
|
||||||
if (!opts.quiet) {
|
// The 1500 here is a total gluteal extraction, but 500 seems to perform slightly worse.
|
||||||
streams.out.append(repeated);
|
const size_t chunk_size = 1500;
|
||||||
|
// The + word length is so we don't have to hit the chunk size exactly,
|
||||||
|
// which would require us to restart in the middle of the string.
|
||||||
|
// E.g. imagine repeating "12345678". The first chunk is hit after a last "1234",
|
||||||
|
// so we would then have to restart by appending "5678", which requires a substring.
|
||||||
|
// So let's not bother.
|
||||||
|
//
|
||||||
|
// Unless of course we don't even print the entire word, in which case we just need max.
|
||||||
|
wcstring chunk;
|
||||||
|
chunk.reserve(std::min(chunk_size + w.length(), max));
|
||||||
|
|
||||||
|
for (size_t i = max; i > 0; i -= w.length()) {
|
||||||
|
// Build up the chunk.
|
||||||
|
if (i >= w.length()) {
|
||||||
|
chunk.append(w);
|
||||||
|
} else {
|
||||||
|
chunk.append(w.substr(0, i));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (chunk.length() >= chunk_size) {
|
||||||
|
// We hit the chunk size, write it repeatedly until we can't anymore.
|
||||||
|
streams.out.append(chunk);
|
||||||
|
while (i >= chunk.length()) {
|
||||||
|
streams.out.append(chunk);
|
||||||
|
// We can easily be asked to write *a lot* of data,
|
||||||
|
// so we need to check every so often if the pipe has been closed.
|
||||||
|
// If we didn't, running `string repeat -n LARGENUMBER foo | pv`
|
||||||
|
// and pressing ctrl-c seems to hang.
|
||||||
|
if (streams.out.flush_and_check_error() != STATUS_CMD_OK) {
|
||||||
|
return STATUS_CMD_ERROR;
|
||||||
|
}
|
||||||
|
i -= chunk.length();
|
||||||
|
}
|
||||||
|
chunk.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Flush the remainder.
|
||||||
|
if (!chunk.empty()) {
|
||||||
|
streams.out.append(chunk);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -504,6 +504,32 @@ string repeat -n3 ""
|
||||||
or echo string repeat empty string failed
|
or echo string repeat empty string failed
|
||||||
# CHECK: string repeat empty string failed
|
# CHECK: string repeat empty string failed
|
||||||
|
|
||||||
|
# See that we hit the expected length
|
||||||
|
# First with "max", i.e. maximum number of characters
|
||||||
|
string repeat -m 5000 aab | string length
|
||||||
|
# CHECK: 5000
|
||||||
|
string repeat -m 5000 ab | string length
|
||||||
|
# CHECK: 5000
|
||||||
|
string repeat -m 5000 a | string length
|
||||||
|
# CHECK: 5000
|
||||||
|
string repeat -m 17 aab | string length
|
||||||
|
# CHECK: 17
|
||||||
|
string repeat -m 17 ab | string length
|
||||||
|
# CHECK: 17
|
||||||
|
string repeat -m 17 a | string length
|
||||||
|
# CHECK: 17
|
||||||
|
# Then with "count", i.e. number of repetitions.
|
||||||
|
# (these are count * length long)
|
||||||
|
string repeat -n 17 aab | string length
|
||||||
|
# CHECK: 51
|
||||||
|
string repeat -n 17 ab | string length
|
||||||
|
# CHECK: 34
|
||||||
|
string repeat -n 17 a | string length
|
||||||
|
# CHECK: 17
|
||||||
|
# And a more tricksy case with a long string that we truncate.
|
||||||
|
string repeat -m 5 (string repeat -n 500000 aaaaaaaaaaaaaaaaaa) | string length
|
||||||
|
# CHECK: 5
|
||||||
|
|
||||||
# Test equivalent matches with/without the --entire, --regex, and --invert flags.
|
# Test equivalent matches with/without the --entire, --regex, and --invert flags.
|
||||||
string match -e x abc dxf xyz jkx x z
|
string match -e x abc dxf xyz jkx x z
|
||||||
or echo exit 1
|
or echo exit 1
|
||||||
|
|
Loading…
Reference in a new issue