Merge pull request #8473 from krobelus/string-preserve-missing-newline

builtin string: don't print final newline if it's missing from stdin
This commit is contained in:
Fabian Homborg 2022-03-13 11:22:18 +01:00 committed by GitHub
commit 9172ab5983
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 84 additions and 34 deletions

View file

@ -79,6 +79,11 @@ class arg_iterator_t {
// Backing storage for the next() string.
wcstring storage_;
const io_streams_t &streams_;
// If set, we have consumed all of stdin and its last line is missing a newline character.
// This is an edge case -- we expect text input, which is conventionally terminated by a
// newline character. But if it isn't, we use this to avoid creating one out of thin air,
// to not corrupt input data.
bool missing_trailing_newline = false;
/// Reads the next argument from stdin, returning true if an argument was produced and false if
/// not. On true, the string is stored in storage_.
@ -94,6 +99,7 @@ class arg_iterator_t {
// If we still have buffer contents, flush them,
// in case there was no trailing sep.
if (buffer_.empty()) return false;
missing_trailing_newline = true;
storage_ = str2wcstring(buffer_);
buffer_.clear();
return true;
@ -131,6 +137,11 @@ class arg_iterator_t {
return nullptr;
}
}
/// Returns true if we should add a newline after printing output for the current item.
/// This is only ever false in an edge case, namely after we have consumed stdin and the
/// last line is missing a trailing newline.
bool want_newline() const { return !missing_trailing_newline; }
};
// This is used by the string subcommands to communicate with the option parser which flags are
@ -688,7 +699,9 @@ static int string_escape(parser_t &parser, io_streams_t &streams, int argc, cons
arg_iterator_t aiter(argv, optind, streams);
while (const wcstring *arg = aiter.nextstr()) {
streams.out.append(escape_string(*arg, flags, opts.escape_style));
if (aiter.want_newline()) {
streams.out.append(L'\n');
}
nesc++;
}
@ -713,7 +726,9 @@ static int string_unescape(parser_t &parser, io_streams_t &streams, int argc,
wcstring result;
if (unescape_string(*arg, &result, flags, opts.escape_style)) {
streams.out.append(result);
if (aiter.want_newline()) {
streams.out.append(L'\n');
}
nesc++;
}
}
@ -745,7 +760,11 @@ static int string_join_maybe0(parser_t &parser, io_streams_t &streams, int argc,
nargs++;
}
if (nargs > 0 && !opts.quiet) {
streams.out.push_back(is_join0 ? L'\0' : L'\n');
if (is_join0) {
streams.out.push_back(L'\0');
} else if (aiter.want_newline()) {
streams.out.push_back(L'\n');
}
}
return nargs > 1 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
@ -1323,7 +1342,9 @@ static int string_pad(parser_t &parser, io_streams_t &streams, int argc, const w
padded.append(pad, opts.char_to_pad);
}
}
if (aiter_width.want_newline()) {
padded.push_back(L'\n');
}
streams.out.append(padded);
}
@ -1343,7 +1364,7 @@ class string_replacer_t {
virtual ~string_replacer_t() = default;
int replace_count() const { return total_replaced; }
virtual bool replace_matches(const wcstring &arg) = 0;
virtual bool replace_matches(const wcstring &arg, bool want_newline) = 0;
};
class literal_replacer_t : public string_replacer_t {
@ -1360,7 +1381,7 @@ class literal_replacer_t : public string_replacer_t {
patlen(pattern.length()) {}
~literal_replacer_t() override = default;
bool replace_matches(const wcstring &arg) override;
bool replace_matches(const wcstring &arg, bool want_newline) override;
};
static maybe_t<wcstring> interpret_escapes(const wcstring &arg) {
@ -1400,12 +1421,12 @@ class regex_replacer_t : public string_replacer_t {
}
}
bool replace_matches(const wcstring &arg) override;
bool replace_matches(const wcstring &arg, bool want_newline) override;
};
/// A return value of true means all is well (even if no replacements were performed), false
/// indicates an unrecoverable error.
bool literal_replacer_t::replace_matches(const wcstring &arg) {
bool literal_replacer_t::replace_matches(const wcstring &arg, bool want_newline) {
wcstring result;
bool replacement_occurred = false;
@ -1432,15 +1453,17 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) {
if (!opts.quiet && (!opts.filter || replacement_occurred)) {
streams.out.append(result);
if (want_newline) {
streams.out.append(L'\n');
}
}
return true;
}
/// A return value of true means all is well (even if no replacements were performed), false
/// indicates an unrecoverable error.
bool regex_replacer_t::replace_matches(const wcstring &arg) {
bool regex_replacer_t::replace_matches(const wcstring &arg, bool want_newline) {
if (!regex.code) return false; // pcre2_compile() failed
if (!replacement) return false; // replacement was an invalid string
@ -1488,8 +1511,10 @@ bool regex_replacer_t::replace_matches(const wcstring &arg) {
bool replacement_occurred = pcre2_rc > 0;
if (!opts.quiet && (!opts.filter || replacement_occurred)) {
streams.out.append(outstr);
if (want_newline) {
streams.out.append(L'\n');
}
}
total_replaced += pcre2_rc;
}
@ -1520,7 +1545,7 @@ static int string_replace(parser_t &parser, io_streams_t &streams, int argc, con
arg_iterator_t aiter(argv, optind, streams);
while (const wcstring *arg = aiter.nextstr()) {
if (!replacer->replace_matches(*arg)) return STATUS_INVALID_ARGS;
if (!replacer->replace_matches(*arg, aiter.want_newline())) return STATUS_INVALID_ARGS;
if (opts.quiet && replacer->replace_count() > 0) return STATUS_CMD_OK;
}
@ -1601,12 +1626,12 @@ static int string_split_maybe0(parser_t &parser, io_streams_t &streams, int argc
for (const auto &field : opts.fields) {
if (field - 1 < (long)splits.size()) {
streams.out.append_with_separation(splits.at(field - 1),
separation_type_t::explicitly);
separation_type_t::explicitly, true);
}
}
} else {
for (const wcstring &split : splits) {
streams.out.append_with_separation(split, separation_type_t::explicitly);
streams.out.append_with_separation(split, separation_type_t::explicitly, true);
}
}
}
@ -1641,7 +1666,8 @@ static int string_collect(parser_t &parser, io_streams_t &streams, int argc, con
len -= 1;
}
}
streams.out.append_with_separation(s, len, separation_type_t::explicitly);
streams.out.append_with_separation(s, len, separation_type_t::explicitly,
aiter.want_newline());
appended += len;
}
@ -1650,7 +1676,9 @@ static int string_collect(parser_t &parser, io_streams_t &streams, int argc, con
// echo (true | string collect --allow-empty)"bar"
// prints "bar".
if (opts.allow_empty && appended == 0) {
streams.out.append_with_separation(L"", 0, separation_type_t::explicitly);
streams.out.append_with_separation(
L"", 0, separation_type_t::explicitly,
true /* historical behavior is to always print a newline */);
}
return appended > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
@ -1718,7 +1746,7 @@ static int string_repeat(parser_t &parser, io_streams_t &streams, int argc, cons
}
// Historical behavior is to never append a newline if all strings were empty.
if (!opts.quiet && !opts.no_newline && !all_empty) {
if (!opts.quiet && !opts.no_newline && !all_empty && aiter.want_newline()) {
streams.out.append(L'\n');
}
@ -1780,8 +1808,10 @@ static int string_sub(parser_t &parser, io_streams_t &streams, int argc, const w
// Note that std::string permits count to extend past end of string.
if (!opts.quiet) {
streams.out.append(s->substr(pos, count));
if (aiter.want_newline()) {
streams.out.append(L'\n');
}
}
nsub++;
if (opts.quiet) return STATUS_CMD_OK;
}
@ -1823,7 +1853,9 @@ static int string_trim(parser_t &parser, io_streams_t &streams, int argc, const
ntrim += arg->size() - (end - begin);
if (!opts.quiet) {
streams.out.append(wcstring(*arg, begin, end - begin));
if (aiter.want_newline()) {
streams.out.append(L'\n');
}
} else if (ntrim > 0) {
return STATUS_CMD_OK;
}
@ -1849,7 +1881,9 @@ static int string_transform(parser_t &parser, io_streams_t &streams, int argc, c
if (transformed != *arg) n_transformed++;
if (!opts.quiet) {
streams.out.append(transformed);
if (aiter.want_newline()) {
streams.out.append(L'\n');
}
} else if (n_transformed > 0) {
return STATUS_CMD_OK;
}

View file

@ -309,13 +309,14 @@ shared_ptr<const io_data_t> io_chain_t::io_for_fd(int fd) const {
void output_stream_t::append_narrow_buffer(separated_buffer_t buffer) {
for (const auto &rhs_elem : buffer.elements()) {
append_with_separation(str2wcstring(rhs_elem.contents), rhs_elem.separation);
append_with_separation(str2wcstring(rhs_elem.contents), rhs_elem.separation, false);
}
}
void output_stream_t::append_with_separation(const wchar_t *s, size_t len, separation_type_t type) {
void output_stream_t::append_with_separation(const wchar_t *s, size_t len, separation_type_t type,
bool want_newline) {
append(s, len);
if (type == separation_type_t::explicitly) {
if (type == separation_type_t::explicitly && want_newline) {
append(L'\n');
}
}
@ -352,7 +353,8 @@ void buffered_output_stream_t::append(const wchar_t *s, size_t amt) {
}
void buffered_output_stream_t::append_with_separation(const wchar_t *s, size_t len,
separation_type_t type) {
separation_type_t type, bool want_newline) {
UNUSED(want_newline);
buffer_->append(wcs2string(s, len), type);
}

View file

@ -365,11 +365,14 @@ class output_stream_t : noncopyable_t, nonmovable_t {
virtual int flush_and_check_error();
/// An optional override point. This is for explicit separation.
virtual void append_with_separation(const wchar_t *s, size_t len, separation_type_t type);
/// \param want_newline this is true if the output item should be ended with a newline. This
/// is only relevant if we are printing the output to a stream,
virtual void append_with_separation(const wchar_t *s, size_t len, separation_type_t type,
bool want_newline);
/// The following are all convenience overrides.
void append_with_separation(const wcstring &s, separation_type_t type) {
append_with_separation(s.data(), s.size(), type);
void append_with_separation(const wcstring &s, separation_type_t type, bool want_newline) {
append_with_separation(s.data(), s.size(), type, want_newline);
}
/// Append a string.
@ -443,7 +446,8 @@ class buffered_output_stream_t final : public output_stream_t {
}
void append(const wchar_t *s, size_t amt) override;
void append_with_separation(const wchar_t *s, size_t len, separation_type_t type) override;
void append_with_separation(const wchar_t *s, size_t len, separation_type_t type,
bool want_newline) override;
int flush_and_check_error() override;
private:

View file

@ -40,6 +40,7 @@ echo \'{ hello , world }\'
for phrase in {good\,, beautiful ,morning}
echo -n "$phrase "
end | string trim
echo
for phrase in {goodbye\,,\ cruel\ ,world\n}
echo -n $phrase
end

View file

@ -601,12 +601,12 @@ and echo uppercasing a uppercase string did not fail as expected
# 'Check NUL'
# Note: We do `string escape` at the end to make a `\0` literal visible.
printf 'a\0b' | string escape
printf 'a\0c' | string match -e a | string escape
printf 'a\0d' | string split '' | string escape
printf 'a\0b' | string match -r '.*b$' | string escape
printf 'a\0b' | string replace b g | string escape
printf 'a\0b' | string replace -r b g | string escape
printf 'a\0b\n' | string escape
printf 'a\0c\n' | string match -e a | string escape
printf 'a\0d\n' | string split '' | string escape
printf 'a\0b\n' | string match -r '.*b$' | string escape
printf 'a\0b\n' | string replace b g | string escape
printf 'a\0b\n' | string replace -r b g | string escape
# TODO: These do not yet work!
# printf 'a\0b' | string match '*b' | string escape
# CHECK: a\x00b
@ -797,3 +797,12 @@ echo "foo1x foo2x foo3x" | string match -arg 'foo(\d)x'
# CHECK: 1
# CHECK: 2
# CHECK: 3
# Most subcommands preserve missing newline (#3847).
echo -n abc | string upper
echo '<eol>'
# CHECK: ABC<eol>
printf \<
printf my-password | string replace -ra . \*
printf \>\n
# CHECK: <***********>