Correctly handle multiple chunks from read_in_chunks

read_in_chunks does not clear the intermediate string 'str' between iterations, so every chunk has every other chunk prepended to it. A secondary issue is that it calls str2wcstring() on an intermediate chunk, which may split multi-byte sequences. This needs to be deferred to the end. Test added. Fixes #3756
2024-12-26 12:53:13 +00:00 · 2017-01-21 12:43:20 -08:00 · 2017-01-21 12:43:20 -08:00 · 5a5a6a6968
commit 5a5a6a6968
parent f7c133da00
3 changed files with 23 additions and 11 deletions
--- a/src/builtin.cpp
+++ b/src/builtin.cpp
@ -2024,20 +2024,19 @@ static int read_in_chunks(int fd, wcstring &buff, bool split_null) {
            break;
        }
-        long i;
+        const char *end = std::find(inbuf, inbuf + bytes_read, split_null ? L'\0' : L'\n');
-        for (i = 0; i < bytes_read && !finished; i++) {
+        long bytes_consumed = end - inbuf; // note: must be signed for use in lseek
-            if ((!split_null && inbuf[i] == L'\n') || (split_null && inbuf[i] == L'\0')) {
+        assert(bytes_consumed <= bytes_read);
-                finished = true;
+        str.append(inbuf, bytes_consumed);
-            } else {
+        if (bytes_consumed < bytes_read) {
-                str.push_back(inbuf[i]);
+            // We found a splitter
-            }
+            // +1 because we need to treat the splitter as consumed, but not append it to the string
-        }
+            CHECK(lseek(fd, bytes_consumed - bytes_read + 1, SEEK_CUR) != -1, STATUS_BUILTIN_ERROR)
-        buff += str2wcstring(str);
+            finished = true;
        if (i < bytes_read) {
            CHECK(lseek(fd, i - bytes_read, SEEK_CUR) != -1, STATUS_BUILTIN_ERROR)
        }
    }
    buff = str2wcstring(str);
    if (buff.empty() && eof) {
        exit_res = STATUS_BUILTIN_ERROR;
    }
--- a/tests/read.in
+++ b/tests/read.in
@ -117,4 +117,14 @@ echo -ne 'foo\nbar\0baz\nquux' | while read -lza foo
    print_vars foo
 end
 echo
 echo '# chunked read tests'
 set -l path /tmp/fish_chunked_read_test.txt
 set -l longstr (seq 1024 | string join ',')
 echo -n $longstr > $path
 read -l longstr2 < $path
 test "$longstr" = "$longstr2"
 and echo "Chunked reads test pass"
 or echo "Chunked reads test failure: long strings don't match!"
 true
--- a/tests/read.out
+++ b/tests/read.out
@ -55,3 +55,6 @@ newline
 1 'foo' 1 'bar'
 2 'foo' 'bar'
 2 'baz' 'quux'
 # chunked read tests
 Chunked reads test pass