diff --git a/src/builtins/string.cpp b/src/builtins/string.cpp index c4c1a138c..f24e9db28 100644 --- a/src/builtins/string.cpp +++ b/src/builtins/string.cpp @@ -79,6 +79,11 @@ class arg_iterator_t { // Backing storage for the next() string. wcstring storage_; const io_streams_t &streams_; + // If set, we have consumed all of stdin and its last line is missing a newline character. + // This is an edge case -- we expect text input, which is conventionally terminated by a + // newline character. But if it isn't, we use this to avoid creating one out of thin air, + // to not corrupt input data. + bool missing_trailing_newline = false; /// Reads the next argument from stdin, returning true if an argument was produced and false if /// not. On true, the string is stored in storage_. @@ -94,6 +99,7 @@ class arg_iterator_t { // If we still have buffer contents, flush them, // in case there was no trailing sep. if (buffer_.empty()) return false; + missing_trailing_newline = true; storage_ = str2wcstring(buffer_); buffer_.clear(); return true; @@ -131,6 +137,11 @@ class arg_iterator_t { return nullptr; } } + + /// Returns true if we should add a newline after printing output for the current item. + /// This is only ever false in an edge case, namely after we have consumed stdin and the + /// last line is missing a trailing newline. + bool want_newline() const { return !missing_trailing_newline; } }; // This is used by the string subcommands to communicate with the option parser which flags are @@ -688,7 +699,9 @@ static int string_escape(parser_t &parser, io_streams_t &streams, int argc, cons arg_iterator_t aiter(argv, optind, streams); while (const wcstring *arg = aiter.nextstr()) { streams.out.append(escape_string(*arg, flags, opts.escape_style)); - streams.out.append(L'\n'); + if (aiter.want_newline()) { + streams.out.append(L'\n'); + } nesc++; } @@ -713,7 +726,9 @@ static int string_unescape(parser_t &parser, io_streams_t &streams, int argc, wcstring result; if (unescape_string(*arg, &result, flags, opts.escape_style)) { streams.out.append(result); - streams.out.append(L'\n'); + if (aiter.want_newline()) { + streams.out.append(L'\n'); + } nesc++; } } @@ -745,7 +760,11 @@ static int string_join_maybe0(parser_t &parser, io_streams_t &streams, int argc, nargs++; } if (nargs > 0 && !opts.quiet) { - streams.out.push_back(is_join0 ? L'\0' : L'\n'); + if (is_join0) { + streams.out.push_back(L'\0'); + } else if (aiter.want_newline()) { + streams.out.push_back(L'\n'); + } } return nargs > 1 ? STATUS_CMD_OK : STATUS_CMD_ERROR; @@ -1323,7 +1342,9 @@ static int string_pad(parser_t &parser, io_streams_t &streams, int argc, const w padded.append(pad, opts.char_to_pad); } } - padded.push_back(L'\n'); + if (aiter_width.want_newline()) { + padded.push_back(L'\n'); + } streams.out.append(padded); } @@ -1343,7 +1364,7 @@ class string_replacer_t { virtual ~string_replacer_t() = default; int replace_count() const { return total_replaced; } - virtual bool replace_matches(const wcstring &arg) = 0; + virtual bool replace_matches(const wcstring &arg, bool want_newline) = 0; }; class literal_replacer_t : public string_replacer_t { @@ -1360,7 +1381,7 @@ class literal_replacer_t : public string_replacer_t { patlen(pattern.length()) {} ~literal_replacer_t() override = default; - bool replace_matches(const wcstring &arg) override; + bool replace_matches(const wcstring &arg, bool want_newline) override; }; static maybe_t interpret_escapes(const wcstring &arg) { @@ -1400,12 +1421,12 @@ class regex_replacer_t : public string_replacer_t { } } - bool replace_matches(const wcstring &arg) override; + bool replace_matches(const wcstring &arg, bool want_newline) override; }; /// A return value of true means all is well (even if no replacements were performed), false /// indicates an unrecoverable error. -bool literal_replacer_t::replace_matches(const wcstring &arg) { +bool literal_replacer_t::replace_matches(const wcstring &arg, bool want_newline) { wcstring result; bool replacement_occurred = false; @@ -1432,7 +1453,9 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) { if (!opts.quiet && (!opts.filter || replacement_occurred)) { streams.out.append(result); - streams.out.append(L'\n'); + if (want_newline) { + streams.out.append(L'\n'); + } } return true; @@ -1440,7 +1463,7 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) { /// A return value of true means all is well (even if no replacements were performed), false /// indicates an unrecoverable error. -bool regex_replacer_t::replace_matches(const wcstring &arg) { +bool regex_replacer_t::replace_matches(const wcstring &arg, bool want_newline) { if (!regex.code) return false; // pcre2_compile() failed if (!replacement) return false; // replacement was an invalid string @@ -1488,7 +1511,9 @@ bool regex_replacer_t::replace_matches(const wcstring &arg) { bool replacement_occurred = pcre2_rc > 0; if (!opts.quiet && (!opts.filter || replacement_occurred)) { streams.out.append(outstr); - streams.out.append(L'\n'); + if (want_newline) { + streams.out.append(L'\n'); + } } total_replaced += pcre2_rc; } @@ -1520,7 +1545,7 @@ static int string_replace(parser_t &parser, io_streams_t &streams, int argc, con arg_iterator_t aiter(argv, optind, streams); while (const wcstring *arg = aiter.nextstr()) { - if (!replacer->replace_matches(*arg)) return STATUS_INVALID_ARGS; + if (!replacer->replace_matches(*arg, aiter.want_newline())) return STATUS_INVALID_ARGS; if (opts.quiet && replacer->replace_count() > 0) return STATUS_CMD_OK; } @@ -1601,12 +1626,12 @@ static int string_split_maybe0(parser_t &parser, io_streams_t &streams, int argc for (const auto &field : opts.fields) { if (field - 1 < (long)splits.size()) { streams.out.append_with_separation(splits.at(field - 1), - separation_type_t::explicitly); + separation_type_t::explicitly, true); } } } else { for (const wcstring &split : splits) { - streams.out.append_with_separation(split, separation_type_t::explicitly); + streams.out.append_with_separation(split, separation_type_t::explicitly, true); } } } @@ -1641,7 +1666,8 @@ static int string_collect(parser_t &parser, io_streams_t &streams, int argc, con len -= 1; } } - streams.out.append_with_separation(s, len, separation_type_t::explicitly); + streams.out.append_with_separation(s, len, separation_type_t::explicitly, + aiter.want_newline()); appended += len; } @@ -1650,7 +1676,9 @@ static int string_collect(parser_t &parser, io_streams_t &streams, int argc, con // echo (true | string collect --allow-empty)"bar" // prints "bar". if (opts.allow_empty && appended == 0) { - streams.out.append_with_separation(L"", 0, separation_type_t::explicitly); + streams.out.append_with_separation( + L"", 0, separation_type_t::explicitly, + true /* historical behavior is to always print a newline */); } return appended > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; @@ -1718,7 +1746,7 @@ static int string_repeat(parser_t &parser, io_streams_t &streams, int argc, cons } // Historical behavior is to never append a newline if all strings were empty. - if (!opts.quiet && !opts.no_newline && !all_empty) { + if (!opts.quiet && !opts.no_newline && !all_empty && aiter.want_newline()) { streams.out.append(L'\n'); } @@ -1780,7 +1808,9 @@ static int string_sub(parser_t &parser, io_streams_t &streams, int argc, const w // Note that std::string permits count to extend past end of string. if (!opts.quiet) { streams.out.append(s->substr(pos, count)); - streams.out.append(L'\n'); + if (aiter.want_newline()) { + streams.out.append(L'\n'); + } } nsub++; if (opts.quiet) return STATUS_CMD_OK; @@ -1823,7 +1853,9 @@ static int string_trim(parser_t &parser, io_streams_t &streams, int argc, const ntrim += arg->size() - (end - begin); if (!opts.quiet) { streams.out.append(wcstring(*arg, begin, end - begin)); - streams.out.append(L'\n'); + if (aiter.want_newline()) { + streams.out.append(L'\n'); + } } else if (ntrim > 0) { return STATUS_CMD_OK; } @@ -1849,7 +1881,9 @@ static int string_transform(parser_t &parser, io_streams_t &streams, int argc, c if (transformed != *arg) n_transformed++; if (!opts.quiet) { streams.out.append(transformed); - streams.out.append(L'\n'); + if (aiter.want_newline()) { + streams.out.append(L'\n'); + } } else if (n_transformed > 0) { return STATUS_CMD_OK; } diff --git a/src/io.cpp b/src/io.cpp index 63565bae1..9e86ebebc 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -309,13 +309,14 @@ shared_ptr io_chain_t::io_for_fd(int fd) const { void output_stream_t::append_narrow_buffer(separated_buffer_t buffer) { for (const auto &rhs_elem : buffer.elements()) { - append_with_separation(str2wcstring(rhs_elem.contents), rhs_elem.separation); + append_with_separation(str2wcstring(rhs_elem.contents), rhs_elem.separation, false); } } -void output_stream_t::append_with_separation(const wchar_t *s, size_t len, separation_type_t type) { +void output_stream_t::append_with_separation(const wchar_t *s, size_t len, separation_type_t type, + bool want_newline) { append(s, len); - if (type == separation_type_t::explicitly) { + if (type == separation_type_t::explicitly && want_newline) { append(L'\n'); } } @@ -352,7 +353,8 @@ void buffered_output_stream_t::append(const wchar_t *s, size_t amt) { } void buffered_output_stream_t::append_with_separation(const wchar_t *s, size_t len, - separation_type_t type) { + separation_type_t type, bool want_newline) { + UNUSED(want_newline); buffer_->append(wcs2string(s, len), type); } diff --git a/src/io.h b/src/io.h index 14ed800c2..55247f9a8 100644 --- a/src/io.h +++ b/src/io.h @@ -365,11 +365,14 @@ class output_stream_t : noncopyable_t, nonmovable_t { virtual int flush_and_check_error(); /// An optional override point. This is for explicit separation. - virtual void append_with_separation(const wchar_t *s, size_t len, separation_type_t type); + /// \param want_newline this is true if the output item should be ended with a newline. This + /// is only relevant if we are printing the output to a stream, + virtual void append_with_separation(const wchar_t *s, size_t len, separation_type_t type, + bool want_newline); /// The following are all convenience overrides. - void append_with_separation(const wcstring &s, separation_type_t type) { - append_with_separation(s.data(), s.size(), type); + void append_with_separation(const wcstring &s, separation_type_t type, bool want_newline) { + append_with_separation(s.data(), s.size(), type, want_newline); } /// Append a string. @@ -443,7 +446,8 @@ class buffered_output_stream_t final : public output_stream_t { } void append(const wchar_t *s, size_t amt) override; - void append_with_separation(const wchar_t *s, size_t len, separation_type_t type) override; + void append_with_separation(const wchar_t *s, size_t len, separation_type_t type, + bool want_newline) override; int flush_and_check_error() override; private: diff --git a/tests/checks/expansion.fish b/tests/checks/expansion.fish index 0cff88d14..15435161f 100644 --- a/tests/checks/expansion.fish +++ b/tests/checks/expansion.fish @@ -40,6 +40,7 @@ echo \'{ hello , world }\' for phrase in {good\,, beautiful ,morning} echo -n "$phrase " end | string trim +echo for phrase in {goodbye\,,\ cruel\ ,world\n} echo -n $phrase end diff --git a/tests/checks/string.fish b/tests/checks/string.fish index 7ffe2d427..afade5ca9 100644 --- a/tests/checks/string.fish +++ b/tests/checks/string.fish @@ -601,12 +601,12 @@ and echo uppercasing a uppercase string did not fail as expected # 'Check NUL' # Note: We do `string escape` at the end to make a `\0` literal visible. -printf 'a\0b' | string escape -printf 'a\0c' | string match -e a | string escape -printf 'a\0d' | string split '' | string escape -printf 'a\0b' | string match -r '.*b$' | string escape -printf 'a\0b' | string replace b g | string escape -printf 'a\0b' | string replace -r b g | string escape +printf 'a\0b\n' | string escape +printf 'a\0c\n' | string match -e a | string escape +printf 'a\0d\n' | string split '' | string escape +printf 'a\0b\n' | string match -r '.*b$' | string escape +printf 'a\0b\n' | string replace b g | string escape +printf 'a\0b\n' | string replace -r b g | string escape # TODO: These do not yet work! # printf 'a\0b' | string match '*b' | string escape # CHECK: a\x00b @@ -797,3 +797,12 @@ echo "foo1x foo2x foo3x" | string match -arg 'foo(\d)x' # CHECK: 1 # CHECK: 2 # CHECK: 3 + +# Most subcommands preserve missing newline (#3847). +echo -n abc | string upper +echo '' +# CHECK: ABC +printf \< +printf my-password | string replace -ra . \* +printf \>\n +# CHECK: <***********>