Merge pull request #8473 from krobelus/string-preserve-missing-newline

builtin string: don't print final newline if it's missing from stdin
This commit is contained in:
Fabian Homborg 2022-03-13 11:22:18 +01:00 committed by GitHub
commit 9172ab5983
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 84 additions and 34 deletions

View file

@ -79,6 +79,11 @@ class arg_iterator_t {
// Backing storage for the next() string. // Backing storage for the next() string.
wcstring storage_; wcstring storage_;
const io_streams_t &streams_; const io_streams_t &streams_;
// If set, we have consumed all of stdin and its last line is missing a newline character.
// This is an edge case -- we expect text input, which is conventionally terminated by a
// newline character. But if it isn't, we use this to avoid creating one out of thin air,
// to not corrupt input data.
bool missing_trailing_newline = false;
/// Reads the next argument from stdin, returning true if an argument was produced and false if /// Reads the next argument from stdin, returning true if an argument was produced and false if
/// not. On true, the string is stored in storage_. /// not. On true, the string is stored in storage_.
@ -94,6 +99,7 @@ class arg_iterator_t {
// If we still have buffer contents, flush them, // If we still have buffer contents, flush them,
// in case there was no trailing sep. // in case there was no trailing sep.
if (buffer_.empty()) return false; if (buffer_.empty()) return false;
missing_trailing_newline = true;
storage_ = str2wcstring(buffer_); storage_ = str2wcstring(buffer_);
buffer_.clear(); buffer_.clear();
return true; return true;
@ -131,6 +137,11 @@ class arg_iterator_t {
return nullptr; return nullptr;
} }
} }
/// Returns true if we should add a newline after printing output for the current item.
/// This is only ever false in an edge case, namely after we have consumed stdin and the
/// last line is missing a trailing newline.
bool want_newline() const { return !missing_trailing_newline; }
}; };
// This is used by the string subcommands to communicate with the option parser which flags are // This is used by the string subcommands to communicate with the option parser which flags are
@ -688,7 +699,9 @@ static int string_escape(parser_t &parser, io_streams_t &streams, int argc, cons
arg_iterator_t aiter(argv, optind, streams); arg_iterator_t aiter(argv, optind, streams);
while (const wcstring *arg = aiter.nextstr()) { while (const wcstring *arg = aiter.nextstr()) {
streams.out.append(escape_string(*arg, flags, opts.escape_style)); streams.out.append(escape_string(*arg, flags, opts.escape_style));
streams.out.append(L'\n'); if (aiter.want_newline()) {
streams.out.append(L'\n');
}
nesc++; nesc++;
} }
@ -713,7 +726,9 @@ static int string_unescape(parser_t &parser, io_streams_t &streams, int argc,
wcstring result; wcstring result;
if (unescape_string(*arg, &result, flags, opts.escape_style)) { if (unescape_string(*arg, &result, flags, opts.escape_style)) {
streams.out.append(result); streams.out.append(result);
streams.out.append(L'\n'); if (aiter.want_newline()) {
streams.out.append(L'\n');
}
nesc++; nesc++;
} }
} }
@ -745,7 +760,11 @@ static int string_join_maybe0(parser_t &parser, io_streams_t &streams, int argc,
nargs++; nargs++;
} }
if (nargs > 0 && !opts.quiet) { if (nargs > 0 && !opts.quiet) {
streams.out.push_back(is_join0 ? L'\0' : L'\n'); if (is_join0) {
streams.out.push_back(L'\0');
} else if (aiter.want_newline()) {
streams.out.push_back(L'\n');
}
} }
return nargs > 1 ? STATUS_CMD_OK : STATUS_CMD_ERROR; return nargs > 1 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
@ -1323,7 +1342,9 @@ static int string_pad(parser_t &parser, io_streams_t &streams, int argc, const w
padded.append(pad, opts.char_to_pad); padded.append(pad, opts.char_to_pad);
} }
} }
padded.push_back(L'\n'); if (aiter_width.want_newline()) {
padded.push_back(L'\n');
}
streams.out.append(padded); streams.out.append(padded);
} }
@ -1343,7 +1364,7 @@ class string_replacer_t {
virtual ~string_replacer_t() = default; virtual ~string_replacer_t() = default;
int replace_count() const { return total_replaced; } int replace_count() const { return total_replaced; }
virtual bool replace_matches(const wcstring &arg) = 0; virtual bool replace_matches(const wcstring &arg, bool want_newline) = 0;
}; };
class literal_replacer_t : public string_replacer_t { class literal_replacer_t : public string_replacer_t {
@ -1360,7 +1381,7 @@ class literal_replacer_t : public string_replacer_t {
patlen(pattern.length()) {} patlen(pattern.length()) {}
~literal_replacer_t() override = default; ~literal_replacer_t() override = default;
bool replace_matches(const wcstring &arg) override; bool replace_matches(const wcstring &arg, bool want_newline) override;
}; };
static maybe_t<wcstring> interpret_escapes(const wcstring &arg) { static maybe_t<wcstring> interpret_escapes(const wcstring &arg) {
@ -1400,12 +1421,12 @@ class regex_replacer_t : public string_replacer_t {
} }
} }
bool replace_matches(const wcstring &arg) override; bool replace_matches(const wcstring &arg, bool want_newline) override;
}; };
/// A return value of true means all is well (even if no replacements were performed), false /// A return value of true means all is well (even if no replacements were performed), false
/// indicates an unrecoverable error. /// indicates an unrecoverable error.
bool literal_replacer_t::replace_matches(const wcstring &arg) { bool literal_replacer_t::replace_matches(const wcstring &arg, bool want_newline) {
wcstring result; wcstring result;
bool replacement_occurred = false; bool replacement_occurred = false;
@ -1432,7 +1453,9 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) {
if (!opts.quiet && (!opts.filter || replacement_occurred)) { if (!opts.quiet && (!opts.filter || replacement_occurred)) {
streams.out.append(result); streams.out.append(result);
streams.out.append(L'\n'); if (want_newline) {
streams.out.append(L'\n');
}
} }
return true; return true;
@ -1440,7 +1463,7 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) {
/// A return value of true means all is well (even if no replacements were performed), false /// A return value of true means all is well (even if no replacements were performed), false
/// indicates an unrecoverable error. /// indicates an unrecoverable error.
bool regex_replacer_t::replace_matches(const wcstring &arg) { bool regex_replacer_t::replace_matches(const wcstring &arg, bool want_newline) {
if (!regex.code) return false; // pcre2_compile() failed if (!regex.code) return false; // pcre2_compile() failed
if (!replacement) return false; // replacement was an invalid string if (!replacement) return false; // replacement was an invalid string
@ -1488,7 +1511,9 @@ bool regex_replacer_t::replace_matches(const wcstring &arg) {
bool replacement_occurred = pcre2_rc > 0; bool replacement_occurred = pcre2_rc > 0;
if (!opts.quiet && (!opts.filter || replacement_occurred)) { if (!opts.quiet && (!opts.filter || replacement_occurred)) {
streams.out.append(outstr); streams.out.append(outstr);
streams.out.append(L'\n'); if (want_newline) {
streams.out.append(L'\n');
}
} }
total_replaced += pcre2_rc; total_replaced += pcre2_rc;
} }
@ -1520,7 +1545,7 @@ static int string_replace(parser_t &parser, io_streams_t &streams, int argc, con
arg_iterator_t aiter(argv, optind, streams); arg_iterator_t aiter(argv, optind, streams);
while (const wcstring *arg = aiter.nextstr()) { while (const wcstring *arg = aiter.nextstr()) {
if (!replacer->replace_matches(*arg)) return STATUS_INVALID_ARGS; if (!replacer->replace_matches(*arg, aiter.want_newline())) return STATUS_INVALID_ARGS;
if (opts.quiet && replacer->replace_count() > 0) return STATUS_CMD_OK; if (opts.quiet && replacer->replace_count() > 0) return STATUS_CMD_OK;
} }
@ -1601,12 +1626,12 @@ static int string_split_maybe0(parser_t &parser, io_streams_t &streams, int argc
for (const auto &field : opts.fields) { for (const auto &field : opts.fields) {
if (field - 1 < (long)splits.size()) { if (field - 1 < (long)splits.size()) {
streams.out.append_with_separation(splits.at(field - 1), streams.out.append_with_separation(splits.at(field - 1),
separation_type_t::explicitly); separation_type_t::explicitly, true);
} }
} }
} else { } else {
for (const wcstring &split : splits) { for (const wcstring &split : splits) {
streams.out.append_with_separation(split, separation_type_t::explicitly); streams.out.append_with_separation(split, separation_type_t::explicitly, true);
} }
} }
} }
@ -1641,7 +1666,8 @@ static int string_collect(parser_t &parser, io_streams_t &streams, int argc, con
len -= 1; len -= 1;
} }
} }
streams.out.append_with_separation(s, len, separation_type_t::explicitly); streams.out.append_with_separation(s, len, separation_type_t::explicitly,
aiter.want_newline());
appended += len; appended += len;
} }
@ -1650,7 +1676,9 @@ static int string_collect(parser_t &parser, io_streams_t &streams, int argc, con
// echo (true | string collect --allow-empty)"bar" // echo (true | string collect --allow-empty)"bar"
// prints "bar". // prints "bar".
if (opts.allow_empty && appended == 0) { if (opts.allow_empty && appended == 0) {
streams.out.append_with_separation(L"", 0, separation_type_t::explicitly); streams.out.append_with_separation(
L"", 0, separation_type_t::explicitly,
true /* historical behavior is to always print a newline */);
} }
return appended > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; return appended > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
@ -1718,7 +1746,7 @@ static int string_repeat(parser_t &parser, io_streams_t &streams, int argc, cons
} }
// Historical behavior is to never append a newline if all strings were empty. // Historical behavior is to never append a newline if all strings were empty.
if (!opts.quiet && !opts.no_newline && !all_empty) { if (!opts.quiet && !opts.no_newline && !all_empty && aiter.want_newline()) {
streams.out.append(L'\n'); streams.out.append(L'\n');
} }
@ -1780,7 +1808,9 @@ static int string_sub(parser_t &parser, io_streams_t &streams, int argc, const w
// Note that std::string permits count to extend past end of string. // Note that std::string permits count to extend past end of string.
if (!opts.quiet) { if (!opts.quiet) {
streams.out.append(s->substr(pos, count)); streams.out.append(s->substr(pos, count));
streams.out.append(L'\n'); if (aiter.want_newline()) {
streams.out.append(L'\n');
}
} }
nsub++; nsub++;
if (opts.quiet) return STATUS_CMD_OK; if (opts.quiet) return STATUS_CMD_OK;
@ -1823,7 +1853,9 @@ static int string_trim(parser_t &parser, io_streams_t &streams, int argc, const
ntrim += arg->size() - (end - begin); ntrim += arg->size() - (end - begin);
if (!opts.quiet) { if (!opts.quiet) {
streams.out.append(wcstring(*arg, begin, end - begin)); streams.out.append(wcstring(*arg, begin, end - begin));
streams.out.append(L'\n'); if (aiter.want_newline()) {
streams.out.append(L'\n');
}
} else if (ntrim > 0) { } else if (ntrim > 0) {
return STATUS_CMD_OK; return STATUS_CMD_OK;
} }
@ -1849,7 +1881,9 @@ static int string_transform(parser_t &parser, io_streams_t &streams, int argc, c
if (transformed != *arg) n_transformed++; if (transformed != *arg) n_transformed++;
if (!opts.quiet) { if (!opts.quiet) {
streams.out.append(transformed); streams.out.append(transformed);
streams.out.append(L'\n'); if (aiter.want_newline()) {
streams.out.append(L'\n');
}
} else if (n_transformed > 0) { } else if (n_transformed > 0) {
return STATUS_CMD_OK; return STATUS_CMD_OK;
} }

View file

@ -309,13 +309,14 @@ shared_ptr<const io_data_t> io_chain_t::io_for_fd(int fd) const {
void output_stream_t::append_narrow_buffer(separated_buffer_t buffer) { void output_stream_t::append_narrow_buffer(separated_buffer_t buffer) {
for (const auto &rhs_elem : buffer.elements()) { for (const auto &rhs_elem : buffer.elements()) {
append_with_separation(str2wcstring(rhs_elem.contents), rhs_elem.separation); append_with_separation(str2wcstring(rhs_elem.contents), rhs_elem.separation, false);
} }
} }
void output_stream_t::append_with_separation(const wchar_t *s, size_t len, separation_type_t type) { void output_stream_t::append_with_separation(const wchar_t *s, size_t len, separation_type_t type,
bool want_newline) {
append(s, len); append(s, len);
if (type == separation_type_t::explicitly) { if (type == separation_type_t::explicitly && want_newline) {
append(L'\n'); append(L'\n');
} }
} }
@ -352,7 +353,8 @@ void buffered_output_stream_t::append(const wchar_t *s, size_t amt) {
} }
void buffered_output_stream_t::append_with_separation(const wchar_t *s, size_t len, void buffered_output_stream_t::append_with_separation(const wchar_t *s, size_t len,
separation_type_t type) { separation_type_t type, bool want_newline) {
UNUSED(want_newline);
buffer_->append(wcs2string(s, len), type); buffer_->append(wcs2string(s, len), type);
} }

View file

@ -365,11 +365,14 @@ class output_stream_t : noncopyable_t, nonmovable_t {
virtual int flush_and_check_error(); virtual int flush_and_check_error();
/// An optional override point. This is for explicit separation. /// An optional override point. This is for explicit separation.
virtual void append_with_separation(const wchar_t *s, size_t len, separation_type_t type); /// \param want_newline this is true if the output item should be ended with a newline. This
/// is only relevant if we are printing the output to a stream,
virtual void append_with_separation(const wchar_t *s, size_t len, separation_type_t type,
bool want_newline);
/// The following are all convenience overrides. /// The following are all convenience overrides.
void append_with_separation(const wcstring &s, separation_type_t type) { void append_with_separation(const wcstring &s, separation_type_t type, bool want_newline) {
append_with_separation(s.data(), s.size(), type); append_with_separation(s.data(), s.size(), type, want_newline);
} }
/// Append a string. /// Append a string.
@ -443,7 +446,8 @@ class buffered_output_stream_t final : public output_stream_t {
} }
void append(const wchar_t *s, size_t amt) override; void append(const wchar_t *s, size_t amt) override;
void append_with_separation(const wchar_t *s, size_t len, separation_type_t type) override; void append_with_separation(const wchar_t *s, size_t len, separation_type_t type,
bool want_newline) override;
int flush_and_check_error() override; int flush_and_check_error() override;
private: private:

View file

@ -40,6 +40,7 @@ echo \'{ hello , world }\'
for phrase in {good\,, beautiful ,morning} for phrase in {good\,, beautiful ,morning}
echo -n "$phrase " echo -n "$phrase "
end | string trim end | string trim
echo
for phrase in {goodbye\,,\ cruel\ ,world\n} for phrase in {goodbye\,,\ cruel\ ,world\n}
echo -n $phrase echo -n $phrase
end end

View file

@ -601,12 +601,12 @@ and echo uppercasing a uppercase string did not fail as expected
# 'Check NUL' # 'Check NUL'
# Note: We do `string escape` at the end to make a `\0` literal visible. # Note: We do `string escape` at the end to make a `\0` literal visible.
printf 'a\0b' | string escape printf 'a\0b\n' | string escape
printf 'a\0c' | string match -e a | string escape printf 'a\0c\n' | string match -e a | string escape
printf 'a\0d' | string split '' | string escape printf 'a\0d\n' | string split '' | string escape
printf 'a\0b' | string match -r '.*b$' | string escape printf 'a\0b\n' | string match -r '.*b$' | string escape
printf 'a\0b' | string replace b g | string escape printf 'a\0b\n' | string replace b g | string escape
printf 'a\0b' | string replace -r b g | string escape printf 'a\0b\n' | string replace -r b g | string escape
# TODO: These do not yet work! # TODO: These do not yet work!
# printf 'a\0b' | string match '*b' | string escape # printf 'a\0b' | string match '*b' | string escape
# CHECK: a\x00b # CHECK: a\x00b
@ -797,3 +797,12 @@ echo "foo1x foo2x foo3x" | string match -arg 'foo(\d)x'
# CHECK: 1 # CHECK: 1
# CHECK: 2 # CHECK: 2
# CHECK: 3 # CHECK: 3
# Most subcommands preserve missing newline (#3847).
echo -n abc | string upper
echo '<eol>'
# CHECK: ABC<eol>
printf \<
printf my-password | string replace -ra . \*
printf \>\n
# CHECK: <***********>