diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 417c47b94..9d11cfdba 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -51,6 +51,8 @@ Other improvements - A bug that prevented certain executables from being offered in tab-completions when root has been fixed (:issue:`9639`). - Builin `jobs` will print commands with non-printable chars escaped (:issue:`9808`) - An integer overflow in `string repeat` leading to a near-infinite loop has been fixed (:issue:`9899`). +- `string shorten` behaves better in the presence of non-printable characters, including fixing an integer overflow that shortened strings more than intended. (:issue:`9854`) +- `string pad` no longer allows non-printable characters as padding. (:issue:`9854`) For distributors ---------------- diff --git a/fish-rust/src/builtins/string.rs b/fish-rust/src/builtins/string.rs index 67491049f..0583d0f7a 100644 --- a/fish-rust/src/builtins/string.rs +++ b/fish-rust/src/builtins/string.rs @@ -259,7 +259,7 @@ enum Direction { Right, } -pub(self) fn width_without_escapes(ins: &wstr, start_pos: usize) -> i32 { +pub(self) fn width_without_escapes(ins: &wstr, start_pos: usize) -> usize { let mut width: i32 = 0; for c in ins[start_pos..].chars() { let w = fish_wcwidth_visible(c); @@ -287,8 +287,9 @@ pub(self) fn width_without_escapes(ins: &wstr, start_pos: usize) -> i32 { pos += 1; } } - - return width; + // we subtracted less than we added + debug_assert!(width >= 0, "line has negative width"); + return width as usize; } pub(self) fn escape_code_length(code: &wstr) -> Option { diff --git a/fish-rust/src/builtins/string/length.rs b/fish-rust/src/builtins/string/length.rs index d400658c4..6c53a2e0d 100644 --- a/fish-rust/src/builtins/string/length.rs +++ b/fish-rust/src/builtins/string/length.rs @@ -41,8 +41,8 @@ impl StringSubCommand<'_> for Length { // Carriage-return returns us to the beginning. The longest substring without // carriage-return determines the overall width. for reset in split_string(&line, '\r') { - let n = width_without_escapes(&reset, 0) as usize; - max = max.max(n); + let n = width_without_escapes(&reset, 0); + max = usize::max(max, n); } if max > 0 { nnonempty += 1; diff --git a/fish-rust/src/builtins/string/pad.rs b/fish-rust/src/builtins/string/pad.rs index 7b2ad761d..3238f042c 100644 --- a/fish-rust/src/builtins/string/pad.rs +++ b/fish-rust/src/builtins/string/pad.rs @@ -1,11 +1,12 @@ use std::borrow::Cow; use super::*; -use crate::wutil::{fish_wcstol, fish_wcswidth}; +use crate::fallback::fish_wcwidth; +use crate::wutil::fish_wcstol; pub struct Pad { char_to_pad: char, - pad_char_width: i32, + pad_char_width: usize, pad_from: Direction, width: usize, } @@ -33,25 +34,23 @@ impl StringSubCommand<'_> for Pad { fn parse_opt(&mut self, name: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> { match c { 'c' => { - let arg = arg.expect("option -c requires an argument"); - if arg.len() != 1 { + let [pad_char] = arg.unwrap().as_char_slice() else { return Err(invalid_args!( "%ls: Padding should be a character '%ls'\n", name, - Some(arg) + arg )); - } - let pad_char_width = fish_wcswidth(arg.slice_to(1)); - // can we ever have negative width? - if pad_char_width == 0 { + }; + let pad_char_width = fish_wcwidth(*pad_char); + if pad_char_width <= 0 { return Err(invalid_args!( "%ls: Invalid padding character of width zero '%ls'\n", name, - Some(arg) + arg )); } - self.pad_char_width = pad_char_width; - self.char_to_pad = arg.char_at(0); + self.pad_char_width = pad_char_width as usize; + self.char_to_pad = *pad_char; } 'r' => self.pad_from = Direction::Right, 'w' => { @@ -71,8 +70,8 @@ impl StringSubCommand<'_> for Pad { optind: &mut usize, args: &[&'args wstr], ) -> Option { - let mut max_width = 0i32; - let mut inputs: Vec<(Cow<'args, wstr>, i32)> = Vec::new(); + let mut max_width = 0usize; + let mut inputs: Vec<(Cow<'args, wstr>, usize)> = Vec::new(); let mut print_newline = true; for (arg, want_newline) in Arguments::new(args, optind, streams) { @@ -82,7 +81,7 @@ impl StringSubCommand<'_> for Pad { print_newline = want_newline; } - let pad_width = max_width.max(self.width as i32); + let pad_width = max_width.max(self.width); for (input, width) in inputs { use std::iter::repeat; @@ -91,14 +90,14 @@ impl StringSubCommand<'_> for Pad { let remaining_width = (pad_width - width) % self.pad_char_width; let mut padded: WString = match self.pad_from { Direction::Left => repeat(self.char_to_pad) - .take(pad as usize) - .chain(repeat(' ').take(remaining_width as usize)) + .take(pad) + .chain(repeat(' ').take(remaining_width)) .chain(input.chars()) .collect(), Direction::Right => input .chars() - .chain(repeat(' ').take(remaining_width as usize)) - .chain(repeat(self.char_to_pad).take(pad as usize)) + .chain(repeat(' ').take(remaining_width)) + .chain(repeat(self.char_to_pad).take(pad)) .collect(), }; diff --git a/fish-rust/src/builtins/string/shorten.rs b/fish-rust/src/builtins/string/shorten.rs index 0c46ddc97..163b7383b 100644 --- a/fish-rust/src/builtins/string/shorten.rs +++ b/fish-rust/src/builtins/string/shorten.rs @@ -1,25 +1,26 @@ use super::*; use crate::common::get_ellipsis_str; -use crate::fallback::fish_wcwidth; use crate::wcstringutil::split_string; -use crate::wutil::{fish_wcstol, fish_wcswidth}; +use crate::wutil::fish_wcstol; pub struct Shorten<'args> { - chars_to_shorten: &'args wstr, + ellipsis: &'args wstr, + ellipsis_width: usize, max: Option, no_newline: bool, quiet: bool, - direction: Direction, + shorten_from: Direction, } impl Default for Shorten<'_> { fn default() -> Self { Self { - chars_to_shorten: get_ellipsis_str(), + ellipsis: get_ellipsis_str(), + ellipsis_width: width_without_escapes(get_ellipsis_str(), 0), max: None, no_newline: false, quiet: false, - direction: Direction::Right, + shorten_from: Direction::Right, } } } @@ -42,7 +43,10 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> { arg: Option<&'args wstr>, ) -> Result<(), StringError> { match c { - 'c' => self.chars_to_shorten = arg.expect("option --char requires an argument"), + 'c' => { + self.ellipsis = arg.unwrap(); + self.ellipsis_width = width_without_escapes(self.ellipsis, 0); + } 'm' => { self.max = Some( fish_wcstol(arg.unwrap())? @@ -51,7 +55,7 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> { ) } 'N' => self.no_newline = true, - 'l' => self.direction = Direction::Left, + 'l' => self.shorten_from = Direction::Left, 'q' => self.quiet = true, _ => return Err(StringError::UnknownOption), } @@ -67,7 +71,6 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> { ) -> Option { let mut min_width = usize::MAX; let mut inputs = Vec::new(); - let mut ell = self.chars_to_shorten; let iter = Arguments::new(args, optind, streams); @@ -93,23 +96,23 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> { // or we handle the lines separately. let mut splits = split_string(&arg, '\n').into_iter(); if self.no_newline && splits.len() > 1 { - let mut s = match self.direction { + let mut s = match self.shorten_from { Direction::Right => splits.next(), Direction::Left => splits.last(), } .unwrap(); - s.push_utfstr(ell); + s.push_utfstr(self.ellipsis); let width = width_without_escapes(&s, 0); - if width > 0 && (width as usize) < min_width { - min_width = width as usize; + if width > 0 && width < min_width { + min_width = width; } inputs.push(s); } else { for s in splits { let width = width_without_escapes(&s, 0); - if width > 0 && (width as usize) < min_width { - min_width = width as usize; + if width > 0 && width < min_width { + min_width = width; } inputs.push(s); } @@ -118,18 +121,12 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> { let ourmax: usize = self.max.unwrap_or(min_width); - // TODO: Can we have negative width - - let ell_width: i32 = { - let w = fish_wcswidth(ell); - if w > ourmax as i32 { - // If we can't even print our ellipsis, we substitute nothing, - // truncating instead. - ell = L!(""); - 0 - } else { - w - } + let (ell, ell_width) = if self.ellipsis_width > ourmax { + // If we can't even print our ellipsis, we substitute nothing, + // truncating instead. + (L!(""), 0) + } else { + (self.ellipsis, self.ellipsis_width) }; let mut nsub = 0usize; @@ -153,7 +150,7 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> { let mut pos = 0usize; let mut max = 0usize; // Collect how much of the string we can use without going over the maximum. - if self.direction == Direction::Left { + if self.shorten_from == Direction::Left { // Our strategy for keeping from the end. // This is rather unoptimized - actually going *backwards* from the end // is extremely tricky because we would have to subtract escapes again. @@ -165,7 +162,7 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> { // If we're at the beginning and it fits, we sits. // // Otherwise we require it to fit the ellipsis - if (w <= ourmax as i32 && pos == 0) || (w + ell_width <= ourmax as i32) { + if (w <= ourmax && pos == 0) || (w + ell_width <= ourmax) { out = line.slice_from(pos); break; } @@ -191,23 +188,24 @@ impl<'args> StringSubCommand<'args> for Shorten<'args> { streams.out.append1('\n'); continue; } else { - /* Direction::Right */ + /* shorten the right side */ // Going from the left. // This is somewhat easier. while max <= ourmax && pos < line.len() { pos += skip_escapes(&line, pos); - let w = fish_wcwidth(line.char_at(pos)); - if w <= 0 || max + w as usize + ell_width as usize <= ourmax { + let w = fish_wcwidth_visible(line.char_at(pos)) as isize; + if w <= 0 || max + w as usize + ell_width <= ourmax { // If it still fits, even if it is the last, we add it. - max += w as usize; + max = max.saturating_add_signed(w); pos += 1; } else { // We're at the limit, so see if the entire string fits. - let mut max2: usize = max + w as usize; + let mut max2 = max + w as usize; let mut pos2 = pos + 1; while pos2 < line.len() { pos2 += skip_escapes(&line, pos2); - max2 += fish_wcwidth(line.char_at(pos2)) as usize; + let w = fish_wcwidth_visible(line.char_at(pos2)) as isize; + max2 = max2.saturating_add_signed(w); pos2 += 1; } diff --git a/tests/checks/string.fish b/tests/checks/string.fish index ee9f1bc0e..871052247 100644 --- a/tests/checks/string.fish +++ b/tests/checks/string.fish @@ -91,6 +91,10 @@ string pad -c_ --width 5 longer-than-width-param x string pad -c ab -w4 . # CHECKERR: string pad: Padding should be a character 'ab' +# nonprintable characters does not make sense +string pad -c \u07 . +# CHECKERR: string pad: Invalid padding character of width zero {{'\a'}} + # Visible length. Let's start off simple, colors are ignored: string length --visible (set_color red)abc # CHECK: 3 @@ -945,6 +949,37 @@ string shorten foo foobar # CHECK: foo # CHECK: fo… +# pad with a bell, it has zero width, that's fine +string shorten -c \a foo foobar | string escape +# CHECK: foo +# CHECK: foo\cg + +string shorten -c \aw foo foobar | string escape +# CHECK: foo +# CHECK: fo\cgw + +# backspace is fine! +string shorten -c \b foo foobar | string escape +# CHECK: foo +# CHECK: foo\b + +string shorten -c \ba foo foobar | string escape +# CHECK: foo +# CHECK: fo\ba + +string shorten -c cool\b\b\b\b foo foobar | string escape +# CHECK: foo +# CHECK: foocool\b\b\b\b + +string shorten -c cool\b\b\b\b\b foo foobar | string escape +# CHECK: foo +# negative width ellipsis is fine +# CHECK: foocool\b\b\b\b\b + +string shorten -c \a\aXX foo foobar | string escape +# CHECK: foo +# CHECK: f\cg\cgXX + # A weird case - our minimum width here is 1, # so everything that goes over the width becomes "x" for i in (seq 1 10) @@ -995,6 +1030,11 @@ string shorten -m6 (set_color blue)s(set_color red)t(set_color --bold brwhite)ri # Note that red sequence that we still pass on because it's width 0. # CHECK: \e\[34ms\e\[31mt\e\[1m\e\[37mrin\e\[31m… +# See that colors aren't counted in ellipsis +string shorten -c (set_color blue)s(set_color red)t(set_color --bold brwhite)rin(set_color red)g -m 8 abcdefghijklmno | string escape +# Renders like "abstring" in colors +# CHECK: ab\e\[34ms\e\[31mt\e\[1m\e\[37mrin\e\[31mg + set -l str (set_color blue)s(set_color red)t(set_color --bold brwhite)rin(set_color red)g(set_color yellow)-shorten for i in (seq 1 (string length -V -- $str)) set -l len (string shorten -m$i -- $str | string length -V) @@ -1042,3 +1082,55 @@ string shorten -m0 foo bar asodjsaoidj # CHECK: foo # CHECK: bar # CHECK: asodjsaoidj + +# backspaces are weird +string shorten abc ab abcdef(string repeat -n 6 \b) | string escape +# CHECK: a… +# CHECK: ab +# this line has length zero, since backspace removes it all +# CHECK: abcdef\b\b\b\b\b\b + +# due to an integer overflow this might truncate the third backspaced one, it should not +string shorten abc ab abcdef(string repeat -n 7 \b) | string escape +# CHECK: a… +# CHECK: ab +# this line has length zero, since backspace removes it all +# CHECK: abcdef\b\b\b\b\b\b\b + +# due to an integer overflow this might truncate +string shorten abc \bab ab abcdef | string escape +# CHECK: a… +# backspace does not contribute length at the start +# CHECK: \bab +# CHECK: ab +# CHECK: a… + +string shorten abc \babc ab abcdef | string escape +# CHECK: a… +# CHECK: \ba… +# CHECK: ab +# CHECK: a… + +# non-printable-escape-chars (in this case bell) +string shorten abc ab abcdef(string repeat -n 6 \a) | string escape +# CHECK: a… +# CHECK: ab +# CHECK: a… + +string shorten abc ab abcdef(string repeat -n 7 \a) | string escape +# CHECK: a… +# CHECK: ab +# CHECK: a… + +string shorten abc \aab ab abcdef | string escape +# CHECK: a… +# non-printables have length 0 +# CHECK: \cgab +# CHECK: ab +# CHECK: a… + +string shorten abc \aabc ab abcdef | string escape +# CHECK: a… +# CHECK: \cga… +# CHECK: ab +# CHECK: a…