Fix behaviour in the presence of non-visible width

Padding with an unprintable character is now disallowed, like it was for other
zero-length characters.

`string shorten` now ignores escape sequences and non-printable characters
when calculating the visible width of the ellipsis used (except for `\b`,
which is treated as a width of -1).
Previously `fish_wcswidth` returned a length of -1 when the ellipsis-str
contained any non-printable character, causing the command to poentially
print a larger width than expected.

This also fixes an integer overflows in `string shorten`'s
`max` and `max2`, when the cumulative sum of character widths turned negative
(e.g. with any non-printable characters, or `\b` after the changes above).
The overflow potentially caused strings containing non-printable characters
to be truncated.

This adds test that verify the fixed behaviour.
This commit is contained in:
Henrik Hørlück Berg 2023-07-13 04:17:19 +02:00 committed by Peter Ammon
parent 20be990fd9
commit 6dd2cd2b20
6 changed files with 151 additions and 59 deletions

View file

@ -51,6 +51,8 @@ Other improvements
- A bug that prevented certain executables from being offered in tab-completions when root has been fixed (:issue:`9639`).
- Builin `jobs` will print commands with non-printable chars escaped (:issue:`9808`)
- An integer overflow in `string repeat` leading to a near-infinite loop has been fixed (:issue:`9899`).
- `string shorten` behaves better in the presence of non-printable characters, including fixing an integer overflow that shortened strings more than intended. (:issue:`9854`)
- `string pad` no longer allows non-printable characters as padding. (:issue:`9854`)
For distributors
----------------

View file

@ -259,7 +259,7 @@ enum Direction {
Right,
}
pub(self) fn width_without_escapes(ins: &wstr, start_pos: usize) -> i32 {
pub(self) fn width_without_escapes(ins: &wstr, start_pos: usize) -> usize {
let mut width: i32 = 0;
for c in ins[start_pos..].chars() {
let w = fish_wcwidth_visible(c);
@ -287,8 +287,9 @@ pub(self) fn width_without_escapes(ins: &wstr, start_pos: usize) -> i32 {
pos += 1;
}
}
return width;
// we subtracted less than we added
debug_assert!(width >= 0, "line has negative width");
return width as usize;
}
pub(self) fn escape_code_length(code: &wstr) -> Option<usize> {

View file

@ -41,8 +41,8 @@ impl StringSubCommand<'_> for Length {
// Carriage-return returns us to the beginning. The longest substring without
// carriage-return determines the overall width.
for reset in split_string(&line, '\r') {
let n = width_without_escapes(&reset, 0) as usize;
max = max.max(n);
let n = width_without_escapes(&reset, 0);
max = usize::max(max, n);
}
if max > 0 {
nnonempty += 1;

View file

@ -1,11 +1,12 @@
use std::borrow::Cow;
use super::*;
use crate::wutil::{fish_wcstol, fish_wcswidth};
use crate::fallback::fish_wcwidth;
use crate::wutil::fish_wcstol;
pub struct Pad {
char_to_pad: char,
pad_char_width: i32,
pad_char_width: usize,
pad_from: Direction,
width: usize,
}
@ -33,25 +34,23 @@ impl StringSubCommand<'_> for Pad {
fn parse_opt(&mut self, name: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> {
match c {
'c' => {
let arg = arg.expect("option -c requires an argument");
if arg.len() != 1 {
let [pad_char] = arg.unwrap().as_char_slice() else {
return Err(invalid_args!(
"%ls: Padding should be a character '%ls'\n",
name,
Some(arg)
arg
));
}
let pad_char_width = fish_wcswidth(arg.slice_to(1));
// can we ever have negative width?
if pad_char_width == 0 {
};
let pad_char_width = fish_wcwidth(*pad_char);
if pad_char_width <= 0 {
return Err(invalid_args!(
"%ls: Invalid padding character of width zero '%ls'\n",
name,
Some(arg)
arg
));
}
self.pad_char_width = pad_char_width;
self.char_to_pad = arg.char_at(0);
self.pad_char_width = pad_char_width as usize;
self.char_to_pad = *pad_char;
}
'r' => self.pad_from = Direction::Right,
'w' => {
@ -71,8 +70,8 @@ impl StringSubCommand<'_> for Pad {
optind: &mut usize,
args: &[&'args wstr],
) -> Option<libc::c_int> {
let mut max_width = 0i32;
let mut inputs: Vec<(Cow<'args, wstr>, i32)> = Vec::new();
let mut max_width = 0usize;
let mut inputs: Vec<(Cow<'args, wstr>, usize)> = Vec::new();
let mut print_newline = true;
for (arg, want_newline) in Arguments::new(args, optind, streams) {
@ -82,7 +81,7 @@ impl StringSubCommand<'_> for Pad {
print_newline = want_newline;
}
let pad_width = max_width.max(self.width as i32);
let pad_width = max_width.max(self.width);
for (input, width) in inputs {
use std::iter::repeat;
@ -91,14 +90,14 @@ impl StringSubCommand<'_> for Pad {
let remaining_width = (pad_width - width) % self.pad_char_width;
let mut padded: WString = match self.pad_from {
Direction::Left => repeat(self.char_to_pad)
.take(pad as usize)
.chain(repeat(' ').take(remaining_width as usize))
.take(pad)
.chain(repeat(' ').take(remaining_width))
.chain(input.chars())
.collect(),
Direction::Right => input
.chars()
.chain(repeat(' ').take(remaining_width as usize))
.chain(repeat(self.char_to_pad).take(pad as usize))
.chain(repeat(' ').take(remaining_width))
.chain(repeat(self.char_to_pad).take(pad))
.collect(),
};

View file

@ -1,25 +1,26 @@
use super::*;
use crate::common::get_ellipsis_str;
use crate::fallback::fish_wcwidth;
use crate::wcstringutil::split_string;
use crate::wutil::{fish_wcstol, fish_wcswidth};
use crate::wutil::fish_wcstol;
pub struct Shorten<'args> {
chars_to_shorten: &'args wstr,
ellipsis: &'args wstr,
ellipsis_width: usize,
max: Option<usize>,
no_newline: bool,
quiet: bool,
direction: Direction,
shorten_from: Direction,
}
impl Default for Shorten<'_> {
fn default() -> Self {
Self {
chars_to_shorten: get_ellipsis_str(),
ellipsis: get_ellipsis_str(),
ellipsis_width: width_without_escapes(get_ellipsis_str(), 0),
max: None,
no_newline: false,
quiet: false,
direction: Direction::Right,
shorten_from: Direction::Right,
}
}
}
@ -42,7 +43,10 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> {
arg: Option<&'args wstr>,
) -> Result<(), StringError> {
match c {
'c' => self.chars_to_shorten = arg.expect("option --char requires an argument"),
'c' => {
self.ellipsis = arg.unwrap();
self.ellipsis_width = width_without_escapes(self.ellipsis, 0);
}
'm' => {
self.max = Some(
fish_wcstol(arg.unwrap())?
@ -51,7 +55,7 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> {
)
}
'N' => self.no_newline = true,
'l' => self.direction = Direction::Left,
'l' => self.shorten_from = Direction::Left,
'q' => self.quiet = true,
_ => return Err(StringError::UnknownOption),
}
@ -67,7 +71,6 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> {
) -> Option<libc::c_int> {
let mut min_width = usize::MAX;
let mut inputs = Vec::new();
let mut ell = self.chars_to_shorten;
let iter = Arguments::new(args, optind, streams);
@ -93,23 +96,23 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> {
// or we handle the lines separately.
let mut splits = split_string(&arg, '\n').into_iter();
if self.no_newline && splits.len() > 1 {
let mut s = match self.direction {
let mut s = match self.shorten_from {
Direction::Right => splits.next(),
Direction::Left => splits.last(),
}
.unwrap();
s.push_utfstr(ell);
s.push_utfstr(self.ellipsis);
let width = width_without_escapes(&s, 0);
if width > 0 && (width as usize) < min_width {
min_width = width as usize;
if width > 0 && width < min_width {
min_width = width;
}
inputs.push(s);
} else {
for s in splits {
let width = width_without_escapes(&s, 0);
if width > 0 && (width as usize) < min_width {
min_width = width as usize;
if width > 0 && width < min_width {
min_width = width;
}
inputs.push(s);
}
@ -118,18 +121,12 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> {
let ourmax: usize = self.max.unwrap_or(min_width);
// TODO: Can we have negative width
let ell_width: i32 = {
let w = fish_wcswidth(ell);
if w > ourmax as i32 {
let (ell, ell_width) = if self.ellipsis_width > ourmax {
// If we can't even print our ellipsis, we substitute nothing,
// truncating instead.
ell = L!("");
0
(L!(""), 0)
} else {
w
}
(self.ellipsis, self.ellipsis_width)
};
let mut nsub = 0usize;
@ -153,7 +150,7 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> {
let mut pos = 0usize;
let mut max = 0usize;
// Collect how much of the string we can use without going over the maximum.
if self.direction == Direction::Left {
if self.shorten_from == Direction::Left {
// Our strategy for keeping from the end.
// This is rather unoptimized - actually going *backwards* from the end
// is extremely tricky because we would have to subtract escapes again.
@ -165,7 +162,7 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> {
// If we're at the beginning and it fits, we sits.
//
// Otherwise we require it to fit the ellipsis
if (w <= ourmax as i32 && pos == 0) || (w + ell_width <= ourmax as i32) {
if (w <= ourmax && pos == 0) || (w + ell_width <= ourmax) {
out = line.slice_from(pos);
break;
}
@ -191,23 +188,24 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> {
streams.out.append1('\n');
continue;
} else {
/* Direction::Right */
/* shorten the right side */
// Going from the left.
// This is somewhat easier.
while max <= ourmax && pos < line.len() {
pos += skip_escapes(&line, pos);
let w = fish_wcwidth(line.char_at(pos));
if w <= 0 || max + w as usize + ell_width as usize <= ourmax {
let w = fish_wcwidth_visible(line.char_at(pos)) as isize;
if w <= 0 || max + w as usize + ell_width <= ourmax {
// If it still fits, even if it is the last, we add it.
max += w as usize;
max = max.saturating_add_signed(w);
pos += 1;
} else {
// We're at the limit, so see if the entire string fits.
let mut max2: usize = max + w as usize;
let mut max2 = max + w as usize;
let mut pos2 = pos + 1;
while pos2 < line.len() {
pos2 += skip_escapes(&line, pos2);
max2 += fish_wcwidth(line.char_at(pos2)) as usize;
let w = fish_wcwidth_visible(line.char_at(pos2)) as isize;
max2 = max2.saturating_add_signed(w);
pos2 += 1;
}

View file

@ -91,6 +91,10 @@ string pad -c_ --width 5 longer-than-width-param x
string pad -c ab -w4 .
# CHECKERR: string pad: Padding should be a character 'ab'
# nonprintable characters does not make sense
string pad -c \u07 .
# CHECKERR: string pad: Invalid padding character of width zero {{'\a'}}
# Visible length. Let's start off simple, colors are ignored:
string length --visible (set_color red)abc
# CHECK: 3
@ -945,6 +949,37 @@ string shorten foo foobar
# CHECK: foo
# CHECK: fo…
# pad with a bell, it has zero width, that's fine
string shorten -c \a foo foobar | string escape
# CHECK: foo
# CHECK: foo\cg
string shorten -c \aw foo foobar | string escape
# CHECK: foo
# CHECK: fo\cgw
# backspace is fine!
string shorten -c \b foo foobar | string escape
# CHECK: foo
# CHECK: foo\b
string shorten -c \ba foo foobar | string escape
# CHECK: foo
# CHECK: fo\ba
string shorten -c cool\b\b\b\b foo foobar | string escape
# CHECK: foo
# CHECK: foocool\b\b\b\b
string shorten -c cool\b\b\b\b\b foo foobar | string escape
# CHECK: foo
# negative width ellipsis is fine
# CHECK: foocool\b\b\b\b\b
string shorten -c \a\aXX foo foobar | string escape
# CHECK: foo
# CHECK: f\cg\cgXX
# A weird case - our minimum width here is 1,
# so everything that goes over the width becomes "x"
for i in (seq 1 10)
@ -995,6 +1030,11 @@ string shorten -m6 (set_color blue)s(set_color red)t(set_color --bold brwhite)ri
# Note that red sequence that we still pass on because it's width 0.
# CHECK: \e\[34ms\e\[31mt\e\[1m\e\[37mrin\e\[31m…
# See that colors aren't counted in ellipsis
string shorten -c (set_color blue)s(set_color red)t(set_color --bold brwhite)rin(set_color red)g -m 8 abcdefghijklmno | string escape
# Renders like "abstring" in colors
# CHECK: ab\e\[34ms\e\[31mt\e\[1m\e\[37mrin\e\[31mg
set -l str (set_color blue)s(set_color red)t(set_color --bold brwhite)rin(set_color red)g(set_color yellow)-shorten
for i in (seq 1 (string length -V -- $str))
set -l len (string shorten -m$i -- $str | string length -V)
@ -1042,3 +1082,55 @@ string shorten -m0 foo bar asodjsaoidj
# CHECK: foo
# CHECK: bar
# CHECK: asodjsaoidj
# backspaces are weird
string shorten abc ab abcdef(string repeat -n 6 \b) | string escape
# CHECK: a…
# CHECK: ab
# this line has length zero, since backspace removes it all
# CHECK: abcdef\b\b\b\b\b\b
# due to an integer overflow this might truncate the third backspaced one, it should not
string shorten abc ab abcdef(string repeat -n 7 \b) | string escape
# CHECK: a…
# CHECK: ab
# this line has length zero, since backspace removes it all
# CHECK: abcdef\b\b\b\b\b\b\b
# due to an integer overflow this might truncate
string shorten abc \bab ab abcdef | string escape
# CHECK: a…
# backspace does not contribute length at the start
# CHECK: \bab
# CHECK: ab
# CHECK: a…
string shorten abc \babc ab abcdef | string escape
# CHECK: a…
# CHECK: \ba…
# CHECK: ab
# CHECK: a…
# non-printable-escape-chars (in this case bell)
string shorten abc ab abcdef(string repeat -n 6 \a) | string escape
# CHECK: a…
# CHECK: ab
# CHECK: a…
string shorten abc ab abcdef(string repeat -n 7 \a) | string escape
# CHECK: a…
# CHECK: ab
# CHECK: a…
string shorten abc \aab ab abcdef | string escape
# CHECK: a…
# non-printables have length 0
# CHECK: \cgab
# CHECK: ab
# CHECK: a…
string shorten abc \aabc ab abcdef | string escape
# CHECK: a…
# CHECK: \cga…
# CHECK: ab
# CHECK: a…