Fix behaviour in the presence of non-visible width

Padding with an unprintable character is now disallowed, like it was for other
zero-length characters.

`string shorten` now ignores escape sequences and non-printable characters
when calculating the visible width of the ellipsis used (except for `\b`,
which is treated as a width of -1).
Previously `fish_wcswidth` returned a length of -1 when the ellipsis-str
contained any non-printable character, causing the command to poentially
print a larger width than expected.

This also fixes an integer overflows in `string shorten`'s
`max` and `max2`, when the cumulative sum of character widths turned negative
(e.g. with any non-printable characters, or `\b` after the changes above).
The overflow potentially caused strings containing non-printable characters
to be truncated.

This adds test that verify the fixed behaviour.
This commit is contained in:
Henrik Hørlück Berg 2023-07-13 04:17:19 +02:00 committed by Peter Ammon
parent 20be990fd9
commit 6dd2cd2b20
6 changed files with 151 additions and 59 deletions

View file

@ -51,6 +51,8 @@ Other improvements
- A bug that prevented certain executables from being offered in tab-completions when root has been fixed (:issue:`9639`). - A bug that prevented certain executables from being offered in tab-completions when root has been fixed (:issue:`9639`).
- Builin `jobs` will print commands with non-printable chars escaped (:issue:`9808`) - Builin `jobs` will print commands with non-printable chars escaped (:issue:`9808`)
- An integer overflow in `string repeat` leading to a near-infinite loop has been fixed (:issue:`9899`). - An integer overflow in `string repeat` leading to a near-infinite loop has been fixed (:issue:`9899`).
- `string shorten` behaves better in the presence of non-printable characters, including fixing an integer overflow that shortened strings more than intended. (:issue:`9854`)
- `string pad` no longer allows non-printable characters as padding. (:issue:`9854`)
For distributors For distributors
---------------- ----------------

View file

@ -259,7 +259,7 @@ enum Direction {
Right, Right,
} }
pub(self) fn width_without_escapes(ins: &wstr, start_pos: usize) -> i32 { pub(self) fn width_without_escapes(ins: &wstr, start_pos: usize) -> usize {
let mut width: i32 = 0; let mut width: i32 = 0;
for c in ins[start_pos..].chars() { for c in ins[start_pos..].chars() {
let w = fish_wcwidth_visible(c); let w = fish_wcwidth_visible(c);
@ -287,8 +287,9 @@ pub(self) fn width_without_escapes(ins: &wstr, start_pos: usize) -> i32 {
pos += 1; pos += 1;
} }
} }
// we subtracted less than we added
return width; debug_assert!(width >= 0, "line has negative width");
return width as usize;
} }
pub(self) fn escape_code_length(code: &wstr) -> Option<usize> { pub(self) fn escape_code_length(code: &wstr) -> Option<usize> {

View file

@ -41,8 +41,8 @@ impl StringSubCommand<'_> for Length {
// Carriage-return returns us to the beginning. The longest substring without // Carriage-return returns us to the beginning. The longest substring without
// carriage-return determines the overall width. // carriage-return determines the overall width.
for reset in split_string(&line, '\r') { for reset in split_string(&line, '\r') {
let n = width_without_escapes(&reset, 0) as usize; let n = width_without_escapes(&reset, 0);
max = max.max(n); max = usize::max(max, n);
} }
if max > 0 { if max > 0 {
nnonempty += 1; nnonempty += 1;

View file

@ -1,11 +1,12 @@
use std::borrow::Cow; use std::borrow::Cow;
use super::*; use super::*;
use crate::wutil::{fish_wcstol, fish_wcswidth}; use crate::fallback::fish_wcwidth;
use crate::wutil::fish_wcstol;
pub struct Pad { pub struct Pad {
char_to_pad: char, char_to_pad: char,
pad_char_width: i32, pad_char_width: usize,
pad_from: Direction, pad_from: Direction,
width: usize, width: usize,
} }
@ -33,25 +34,23 @@ impl StringSubCommand<'_> for Pad {
fn parse_opt(&mut self, name: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> { fn parse_opt(&mut self, name: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> {
match c { match c {
'c' => { 'c' => {
let arg = arg.expect("option -c requires an argument"); let [pad_char] = arg.unwrap().as_char_slice() else {
if arg.len() != 1 {
return Err(invalid_args!( return Err(invalid_args!(
"%ls: Padding should be a character '%ls'\n", "%ls: Padding should be a character '%ls'\n",
name, name,
Some(arg) arg
)); ));
} };
let pad_char_width = fish_wcswidth(arg.slice_to(1)); let pad_char_width = fish_wcwidth(*pad_char);
// can we ever have negative width? if pad_char_width <= 0 {
if pad_char_width == 0 {
return Err(invalid_args!( return Err(invalid_args!(
"%ls: Invalid padding character of width zero '%ls'\n", "%ls: Invalid padding character of width zero '%ls'\n",
name, name,
Some(arg) arg
)); ));
} }
self.pad_char_width = pad_char_width; self.pad_char_width = pad_char_width as usize;
self.char_to_pad = arg.char_at(0); self.char_to_pad = *pad_char;
} }
'r' => self.pad_from = Direction::Right, 'r' => self.pad_from = Direction::Right,
'w' => { 'w' => {
@ -71,8 +70,8 @@ impl StringSubCommand<'_> for Pad {
optind: &mut usize, optind: &mut usize,
args: &[&'args wstr], args: &[&'args wstr],
) -> Option<libc::c_int> { ) -> Option<libc::c_int> {
let mut max_width = 0i32; let mut max_width = 0usize;
let mut inputs: Vec<(Cow<'args, wstr>, i32)> = Vec::new(); let mut inputs: Vec<(Cow<'args, wstr>, usize)> = Vec::new();
let mut print_newline = true; let mut print_newline = true;
for (arg, want_newline) in Arguments::new(args, optind, streams) { for (arg, want_newline) in Arguments::new(args, optind, streams) {
@ -82,7 +81,7 @@ impl StringSubCommand<'_> for Pad {
print_newline = want_newline; print_newline = want_newline;
} }
let pad_width = max_width.max(self.width as i32); let pad_width = max_width.max(self.width);
for (input, width) in inputs { for (input, width) in inputs {
use std::iter::repeat; use std::iter::repeat;
@ -91,14 +90,14 @@ impl StringSubCommand<'_> for Pad {
let remaining_width = (pad_width - width) % self.pad_char_width; let remaining_width = (pad_width - width) % self.pad_char_width;
let mut padded: WString = match self.pad_from { let mut padded: WString = match self.pad_from {
Direction::Left => repeat(self.char_to_pad) Direction::Left => repeat(self.char_to_pad)
.take(pad as usize) .take(pad)
.chain(repeat(' ').take(remaining_width as usize)) .chain(repeat(' ').take(remaining_width))
.chain(input.chars()) .chain(input.chars())
.collect(), .collect(),
Direction::Right => input Direction::Right => input
.chars() .chars()
.chain(repeat(' ').take(remaining_width as usize)) .chain(repeat(' ').take(remaining_width))
.chain(repeat(self.char_to_pad).take(pad as usize)) .chain(repeat(self.char_to_pad).take(pad))
.collect(), .collect(),
}; };

View file

@ -1,25 +1,26 @@
use super::*; use super::*;
use crate::common::get_ellipsis_str; use crate::common::get_ellipsis_str;
use crate::fallback::fish_wcwidth;
use crate::wcstringutil::split_string; use crate::wcstringutil::split_string;
use crate::wutil::{fish_wcstol, fish_wcswidth}; use crate::wutil::fish_wcstol;
pub struct Shorten<'args> { pub struct Shorten<'args> {
chars_to_shorten: &'args wstr, ellipsis: &'args wstr,
ellipsis_width: usize,
max: Option<usize>, max: Option<usize>,
no_newline: bool, no_newline: bool,
quiet: bool, quiet: bool,
direction: Direction, shorten_from: Direction,
} }
impl Default for Shorten<'_> { impl Default for Shorten<'_> {
fn default() -> Self { fn default() -> Self {
Self { Self {
chars_to_shorten: get_ellipsis_str(), ellipsis: get_ellipsis_str(),
ellipsis_width: width_without_escapes(get_ellipsis_str(), 0),
max: None, max: None,
no_newline: false, no_newline: false,
quiet: false, quiet: false,
direction: Direction::Right, shorten_from: Direction::Right,
} }
} }
} }
@ -42,7 +43,10 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> {
arg: Option<&'args wstr>, arg: Option<&'args wstr>,
) -> Result<(), StringError> { ) -> Result<(), StringError> {
match c { match c {
'c' => self.chars_to_shorten = arg.expect("option --char requires an argument"), 'c' => {
self.ellipsis = arg.unwrap();
self.ellipsis_width = width_without_escapes(self.ellipsis, 0);
}
'm' => { 'm' => {
self.max = Some( self.max = Some(
fish_wcstol(arg.unwrap())? fish_wcstol(arg.unwrap())?
@ -51,7 +55,7 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> {
) )
} }
'N' => self.no_newline = true, 'N' => self.no_newline = true,
'l' => self.direction = Direction::Left, 'l' => self.shorten_from = Direction::Left,
'q' => self.quiet = true, 'q' => self.quiet = true,
_ => return Err(StringError::UnknownOption), _ => return Err(StringError::UnknownOption),
} }
@ -67,7 +71,6 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> {
) -> Option<libc::c_int> { ) -> Option<libc::c_int> {
let mut min_width = usize::MAX; let mut min_width = usize::MAX;
let mut inputs = Vec::new(); let mut inputs = Vec::new();
let mut ell = self.chars_to_shorten;
let iter = Arguments::new(args, optind, streams); let iter = Arguments::new(args, optind, streams);
@ -93,23 +96,23 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> {
// or we handle the lines separately. // or we handle the lines separately.
let mut splits = split_string(&arg, '\n').into_iter(); let mut splits = split_string(&arg, '\n').into_iter();
if self.no_newline && splits.len() > 1 { if self.no_newline && splits.len() > 1 {
let mut s = match self.direction { let mut s = match self.shorten_from {
Direction::Right => splits.next(), Direction::Right => splits.next(),
Direction::Left => splits.last(), Direction::Left => splits.last(),
} }
.unwrap(); .unwrap();
s.push_utfstr(ell); s.push_utfstr(self.ellipsis);
let width = width_without_escapes(&s, 0); let width = width_without_escapes(&s, 0);
if width > 0 && (width as usize) < min_width { if width > 0 && width < min_width {
min_width = width as usize; min_width = width;
} }
inputs.push(s); inputs.push(s);
} else { } else {
for s in splits { for s in splits {
let width = width_without_escapes(&s, 0); let width = width_without_escapes(&s, 0);
if width > 0 && (width as usize) < min_width { if width > 0 && width < min_width {
min_width = width as usize; min_width = width;
} }
inputs.push(s); inputs.push(s);
} }
@ -118,18 +121,12 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> {
let ourmax: usize = self.max.unwrap_or(min_width); let ourmax: usize = self.max.unwrap_or(min_width);
// TODO: Can we have negative width let (ell, ell_width) = if self.ellipsis_width > ourmax {
// If we can't even print our ellipsis, we substitute nothing,
let ell_width: i32 = { // truncating instead.
let w = fish_wcswidth(ell); (L!(""), 0)
if w > ourmax as i32 { } else {
// If we can't even print our ellipsis, we substitute nothing, (self.ellipsis, self.ellipsis_width)
// truncating instead.
ell = L!("");
0
} else {
w
}
}; };
let mut nsub = 0usize; let mut nsub = 0usize;
@ -153,7 +150,7 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> {
let mut pos = 0usize; let mut pos = 0usize;
let mut max = 0usize; let mut max = 0usize;
// Collect how much of the string we can use without going over the maximum. // Collect how much of the string we can use without going over the maximum.
if self.direction == Direction::Left { if self.shorten_from == Direction::Left {
// Our strategy for keeping from the end. // Our strategy for keeping from the end.
// This is rather unoptimized - actually going *backwards* from the end // This is rather unoptimized - actually going *backwards* from the end
// is extremely tricky because we would have to subtract escapes again. // is extremely tricky because we would have to subtract escapes again.
@ -165,7 +162,7 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> {
// If we're at the beginning and it fits, we sits. // If we're at the beginning and it fits, we sits.
// //
// Otherwise we require it to fit the ellipsis // Otherwise we require it to fit the ellipsis
if (w <= ourmax as i32 && pos == 0) || (w + ell_width <= ourmax as i32) { if (w <= ourmax && pos == 0) || (w + ell_width <= ourmax) {
out = line.slice_from(pos); out = line.slice_from(pos);
break; break;
} }
@ -191,23 +188,24 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> {
streams.out.append1('\n'); streams.out.append1('\n');
continue; continue;
} else { } else {
/* Direction::Right */ /* shorten the right side */
// Going from the left. // Going from the left.
// This is somewhat easier. // This is somewhat easier.
while max <= ourmax && pos < line.len() { while max <= ourmax && pos < line.len() {
pos += skip_escapes(&line, pos); pos += skip_escapes(&line, pos);
let w = fish_wcwidth(line.char_at(pos)); let w = fish_wcwidth_visible(line.char_at(pos)) as isize;
if w <= 0 || max + w as usize + ell_width as usize <= ourmax { if w <= 0 || max + w as usize + ell_width <= ourmax {
// If it still fits, even if it is the last, we add it. // If it still fits, even if it is the last, we add it.
max += w as usize; max = max.saturating_add_signed(w);
pos += 1; pos += 1;
} else { } else {
// We're at the limit, so see if the entire string fits. // We're at the limit, so see if the entire string fits.
let mut max2: usize = max + w as usize; let mut max2 = max + w as usize;
let mut pos2 = pos + 1; let mut pos2 = pos + 1;
while pos2 < line.len() { while pos2 < line.len() {
pos2 += skip_escapes(&line, pos2); pos2 += skip_escapes(&line, pos2);
max2 += fish_wcwidth(line.char_at(pos2)) as usize; let w = fish_wcwidth_visible(line.char_at(pos2)) as isize;
max2 = max2.saturating_add_signed(w);
pos2 += 1; pos2 += 1;
} }

View file

@ -91,6 +91,10 @@ string pad -c_ --width 5 longer-than-width-param x
string pad -c ab -w4 . string pad -c ab -w4 .
# CHECKERR: string pad: Padding should be a character 'ab' # CHECKERR: string pad: Padding should be a character 'ab'
# nonprintable characters does not make sense
string pad -c \u07 .
# CHECKERR: string pad: Invalid padding character of width zero {{'\a'}}
# Visible length. Let's start off simple, colors are ignored: # Visible length. Let's start off simple, colors are ignored:
string length --visible (set_color red)abc string length --visible (set_color red)abc
# CHECK: 3 # CHECK: 3
@ -945,6 +949,37 @@ string shorten foo foobar
# CHECK: foo # CHECK: foo
# CHECK: fo… # CHECK: fo…
# pad with a bell, it has zero width, that's fine
string shorten -c \a foo foobar | string escape
# CHECK: foo
# CHECK: foo\cg
string shorten -c \aw foo foobar | string escape
# CHECK: foo
# CHECK: fo\cgw
# backspace is fine!
string shorten -c \b foo foobar | string escape
# CHECK: foo
# CHECK: foo\b
string shorten -c \ba foo foobar | string escape
# CHECK: foo
# CHECK: fo\ba
string shorten -c cool\b\b\b\b foo foobar | string escape
# CHECK: foo
# CHECK: foocool\b\b\b\b
string shorten -c cool\b\b\b\b\b foo foobar | string escape
# CHECK: foo
# negative width ellipsis is fine
# CHECK: foocool\b\b\b\b\b
string shorten -c \a\aXX foo foobar | string escape
# CHECK: foo
# CHECK: f\cg\cgXX
# A weird case - our minimum width here is 1, # A weird case - our minimum width here is 1,
# so everything that goes over the width becomes "x" # so everything that goes over the width becomes "x"
for i in (seq 1 10) for i in (seq 1 10)
@ -995,6 +1030,11 @@ string shorten -m6 (set_color blue)s(set_color red)t(set_color --bold brwhite)ri
# Note that red sequence that we still pass on because it's width 0. # Note that red sequence that we still pass on because it's width 0.
# CHECK: \e\[34ms\e\[31mt\e\[1m\e\[37mrin\e\[31m… # CHECK: \e\[34ms\e\[31mt\e\[1m\e\[37mrin\e\[31m…
# See that colors aren't counted in ellipsis
string shorten -c (set_color blue)s(set_color red)t(set_color --bold brwhite)rin(set_color red)g -m 8 abcdefghijklmno | string escape
# Renders like "abstring" in colors
# CHECK: ab\e\[34ms\e\[31mt\e\[1m\e\[37mrin\e\[31mg
set -l str (set_color blue)s(set_color red)t(set_color --bold brwhite)rin(set_color red)g(set_color yellow)-shorten set -l str (set_color blue)s(set_color red)t(set_color --bold brwhite)rin(set_color red)g(set_color yellow)-shorten
for i in (seq 1 (string length -V -- $str)) for i in (seq 1 (string length -V -- $str))
set -l len (string shorten -m$i -- $str | string length -V) set -l len (string shorten -m$i -- $str | string length -V)
@ -1042,3 +1082,55 @@ string shorten -m0 foo bar asodjsaoidj
# CHECK: foo # CHECK: foo
# CHECK: bar # CHECK: bar
# CHECK: asodjsaoidj # CHECK: asodjsaoidj
# backspaces are weird
string shorten abc ab abcdef(string repeat -n 6 \b) | string escape
# CHECK: a…
# CHECK: ab
# this line has length zero, since backspace removes it all
# CHECK: abcdef\b\b\b\b\b\b
# due to an integer overflow this might truncate the third backspaced one, it should not
string shorten abc ab abcdef(string repeat -n 7 \b) | string escape
# CHECK: a…
# CHECK: ab
# this line has length zero, since backspace removes it all
# CHECK: abcdef\b\b\b\b\b\b\b
# due to an integer overflow this might truncate
string shorten abc \bab ab abcdef | string escape
# CHECK: a…
# backspace does not contribute length at the start
# CHECK: \bab
# CHECK: ab
# CHECK: a…
string shorten abc \babc ab abcdef | string escape
# CHECK: a…
# CHECK: \ba…
# CHECK: ab
# CHECK: a…
# non-printable-escape-chars (in this case bell)
string shorten abc ab abcdef(string repeat -n 6 \a) | string escape
# CHECK: a…
# CHECK: ab
# CHECK: a…
string shorten abc ab abcdef(string repeat -n 7 \a) | string escape
# CHECK: a…
# CHECK: ab
# CHECK: a…
string shorten abc \aab ab abcdef | string escape
# CHECK: a…
# non-printables have length 0
# CHECK: \cgab
# CHECK: ab
# CHECK: a…
string shorten abc \aabc ab abcdef | string escape
# CHECK: a…
# CHECK: \cga…
# CHECK: ab
# CHECK: a…