Rationalize fish_wcstoi/d and friends

Historically fish has used the functions `fish_wcstol`, `fish_wcstoi`, and
`fish_wcstoul` (and some long long variants) for most integer conversions.
These have semantics that are deliberately different from the libc
functions, such as consuming trailing whitespace, and disallowing `-` in
unsigned versions.

fish has started to drift away from these semantics; some divergence from
C++ has crept in.

Rename the existing `fish_wcs*` functions in Rust to remove the fish
prefix, to express that they attempt to mirror libc semantics; then
introduce `fish_` wrappers which are ported from C++. Also fix some
miscellaneous bugs which have crept in, such as missing range checks.
This commit is contained in:
ridiculousfish 2023-05-14 17:25:55 -07:00
parent 364f8223b2
commit 60d439ab22
9 changed files with 211 additions and 101 deletions

View file

@ -93,24 +93,25 @@ pub fn bg(parser: &mut parser_t, streams: &mut io_streams_t, args: &mut [&wstr])
}
// The user specified at least one job to be backgrounded.
let mut pids: Vec<libc::pid_t> = Vec::new();
// If one argument is not a valid pid (i.e. integer >= 0), fail without backgrounding anything,
// but still print errors for all of them.
let mut retval = STATUS_CMD_OK;
let pids: Vec<i64> = args[opts.optind..]
.iter()
.map(|&arg| {
fish_wcstoi(arg).unwrap_or_else(|_| {
streams.err.append(wgettext_fmt!(
"%ls: '%ls' is not a valid job specifier\n",
cmd,
arg
));
retval = STATUS_INVALID_ARGS;
0
})
})
.collect();
let mut retval: Option<i32> = STATUS_CMD_OK;
for arg in &args[opts.optind..] {
let pid = fish_wcstoi(arg);
#[allow(clippy::unnecessary_unwrap)]
if pid.is_err() || pid.unwrap() < 0 {
streams.err.append(wgettext_fmt!(
"%ls: '%ls' is not a valid job specifier\n",
cmd,
arg
));
retval = STATUS_INVALID_ARGS;
} else {
pids.push(pid.unwrap());
}
}
if retval != STATUS_CMD_OK {
return retval;
@ -122,7 +123,7 @@ pub fn bg(parser: &mut parser_t, streams: &mut io_streams_t, args: &mut [&wstr])
let mut job_pos = 0;
let job = unsafe {
parser
.job_get_from_pid1(pid, Pin::new(&mut job_pos))
.job_get_from_pid1(autocxx::c_int(pid), Pin::new(&mut job_pos))
.as_ref()
};

View file

@ -59,35 +59,41 @@ fn parse_cmd_opts(
let optarg = w.woptarg.unwrap();
have_scale = true;
// "max" is the special value that tells us to pick the maximum scale.
opts.scale = if optarg == "max"L {
15
} else if let Ok(base) = fish_wcstoi(optarg) {
base
if optarg == "max" {
opts.scale = 15;
} else {
streams.err.append(wgettext_fmt!(
"%ls: %ls: invalid base value\n",
cmd,
optarg
));
return Err(STATUS_INVALID_ARGS);
};
let scale = fish_wcstoi(optarg);
if scale.is_err() || scale.unwrap() < 0 || scale.unwrap() > 15 {
streams.err.append(wgettext_fmt!(
"%ls: %ls: invalid base value\n",
cmd,
optarg
));
return Err(STATUS_INVALID_ARGS);
}
// We know the value is in the range [0, 15]
opts.scale = scale.unwrap() as usize;
}
}
'b' => {
let optarg = w.woptarg.unwrap();
opts.base = if optarg == "hex"L {
16
} else if optarg == "octal"L {
8
} else if let Ok(base) = fish_wcstoi(optarg) {
base
if optarg == "hex" {
opts.base = 16;
} else if optarg == "octal" {
opts.base = 8;
} else {
streams.err.append(wgettext_fmt!(
"%ls: %ls: invalid base value\n",
cmd,
optarg
));
return Err(STATUS_INVALID_ARGS);
};
let base = fish_wcstoi(optarg);
if base.is_err() || (base.unwrap() != 8 && base.unwrap() != 16) {
streams.err.append(wgettext_fmt!(
"%ls: %ls: invalid base value\n",
cmd,
optarg
));
return Err(STATUS_INVALID_ARGS);
}
// We know the value is 8 or 16.
opts.base = base.unwrap() as usize;
}
}
'h' => {
opts.print_help = true;

View file

@ -59,7 +59,7 @@ use crate::wchar::{encode_byte_to_char, wstr, WExt, WString, L};
use crate::wutil::errors::Error;
use crate::wutil::gettext::{wgettext, wgettext_fmt};
use crate::wutil::wcstod::wcstod;
use crate::wutil::wcstoi::{fish_wcstoi_partial, Options as WcstoiOpts};
use crate::wutil::wcstoi::{wcstoi_partial, Options as WcstoiOpts};
use crate::wutil::{sprintf, wstr_offset_in};
use printf_compat::args::ToArg;
use printf_compat::printf::sprintf_locale;
@ -129,7 +129,7 @@ impl RawStringToScalarType for i64 {
end: &mut &'a wstr,
) -> Result<Self, Error> {
let mut consumed = 0;
let res = fish_wcstoi_partial(s, WcstoiOpts::default(), &mut consumed);
let res = wcstoi_partial(s, WcstoiOpts::default(), &mut consumed);
*end = s.slice_from(consumed);
res
}
@ -142,7 +142,7 @@ impl RawStringToScalarType for u64 {
end: &mut &'a wstr,
) -> Result<Self, Error> {
let mut consumed = 0;
let res = fish_wcstoi_partial(
let res = wcstoi_partial(
s,
WcstoiOpts {
wrap_negatives: true,

View file

@ -7,12 +7,10 @@ use crate::builtins::shared::{
use crate::ffi::parser_t;
use crate::wchar::{wstr, L};
use crate::wgetopt::{wgetopter_t, wopt, woption, woption_argument_t};
use crate::wutil::{self, fish_wcstoi_opts, sprintf, wgettext_fmt, Options as WcstoiOptions};
use num_traits::PrimInt;
use crate::wutil::{self, fish_wcstol, fish_wcstoul, sprintf, wgettext_fmt};
use once_cell::sync::Lazy;
use rand::rngs::SmallRng;
use rand::{Rng, SeedableRng};
use std::default::Default;
use std::sync::Mutex;
static RNG: Lazy<Mutex<SmallRng>> = Lazy::new(|| Mutex::new(SmallRng::from_entropy()));
@ -69,22 +67,21 @@ pub fn random(
.append(sprintf!(L!("%ls\n"), argv[i + 1 + rand]));
return STATUS_CMD_OK;
}
fn parse<T: PrimInt>(
streams: &mut io_streams_t,
cmd: &wstr,
num: &wstr,
) -> Result<T, wutil::Error> {
let res = fish_wcstoi_opts(
num,
WcstoiOptions {
consume_all: true,
..Default::default()
},
);
fn parse_ll(streams: &mut io_streams_t, cmd: &wstr, num: &wstr) -> Result<i64, wutil::Error> {
let res = fish_wcstol(num);
if res.is_err() {
streams
.err
.append(wgettext_fmt!("%ls: %ls: invalid integer\n", cmd, num,));
.append(wgettext_fmt!("%ls: %ls: invalid integer\n", cmd, num));
}
return res;
}
fn parse_ull(streams: &mut io_streams_t, cmd: &wstr, num: &wstr) -> Result<u64, wutil::Error> {
let res = fish_wcstoul(num);
if res.is_err() {
streams
.err
.append(wgettext_fmt!("%ls: %ls: invalid integer\n", cmd, num));
}
return res;
}
@ -95,7 +92,7 @@ pub fn random(
}
1 => {
// Seed the engine persistently
let num = parse::<i64>(streams, cmd, argv[i]);
let num = parse_ll(streams, cmd, argv[i]);
match num {
Err(_) => return STATUS_INVALID_ARGS,
Ok(x) => {
@ -107,25 +104,25 @@ pub fn random(
}
2 => {
// start is first, end is second
match parse::<i64>(streams, cmd, argv[i]) {
match parse_ll(streams, cmd, argv[i]) {
Err(_) => return STATUS_INVALID_ARGS,
Ok(x) => start = x,
}
match parse::<i64>(streams, cmd, argv[i + 1]) {
match parse_ll(streams, cmd, argv[i + 1]) {
Err(_) => return STATUS_INVALID_ARGS,
Ok(x) => end = x,
}
}
3 => {
// start, step, end
match parse::<i64>(streams, cmd, argv[i]) {
match parse_ll(streams, cmd, argv[i]) {
Err(_) => return STATUS_INVALID_ARGS,
Ok(x) => start = x,
}
// start, step, end
match parse::<u64>(streams, cmd, argv[i + 1]) {
match parse_ull(streams, cmd, argv[i + 1]) {
Err(_) => return STATUS_INVALID_ARGS,
Ok(0) => {
streams
@ -136,7 +133,7 @@ pub fn random(
Ok(x) => step = x,
}
match parse::<i64>(streams, cmd, argv[i + 2]) {
match parse_ll(streams, cmd, argv[i + 2]) {
Err(_) => return STATUS_INVALID_ARGS,
Ok(x) => end = x,
}

View file

@ -11,7 +11,7 @@ use crate::threads::{is_forked_child, is_main_thread};
use crate::wchar::{widestrs, wstr, WExt, WString, L};
use crate::wchar_ext::ToWString;
use crate::wchar_ffi::{WCharFromFFI, WCharToFFI};
use crate::wutil::{fish_wcstoi_opts, sprintf, Options};
use crate::wutil::{fish_wcstol_radix, sprintf};
use autocxx::WithinUniquePtr;
use cxx::UniquePtr;
@ -95,12 +95,7 @@ fn set_umask(list_val: &Vec<WString>) -> EnvStackSetResult {
if list_val.len() != 1 || list_val[0].is_empty() {
return EnvStackSetResult::ENV_INVALID;
}
let opts = Options {
wrap_negatives: false,
consume_all: false,
mradix: Some(8),
};
let Ok(mask) = fish_wcstoi_opts(&list_val[0], opts) else {
let Ok(mask) = fish_wcstol_radix(&list_val[0], 8) else {
return EnvStackSetResult::ENV_INVALID;
};
@ -114,7 +109,7 @@ fn set_umask(list_val: &Vec<WString>) -> EnvStackSetResult {
}
// Do not actually create a umask variable. On env_stack_t::get() it will be calculated.
// SAFETY: umask cannot fail.
unsafe { libc::umask(mask) };
unsafe { libc::umask(mask as libc::mode_t) };
EnvStackSetResult::ENV_OK
}

View file

@ -40,17 +40,15 @@ static TTY_TERMSIZE_GEN_COUNT: AtomicU32 = AtomicU32::new(0);
/// Convert an environment variable to an int, or return a default value.
/// The int must be >0 and <USHRT_MAX (from struct winsize).
fn var_to_int_or(var: Option<WString>, default: isize) -> isize {
match var {
Some(s) => {
let proposed = fish_wcstoi(&s);
if let Ok(proposed) = proposed {
proposed
} else {
default
if var.is_some() && !var.as_ref().unwrap().is_empty() {
#[allow(clippy::unnecessary_unwrap)]
if let Ok(proposed) = fish_wcstoi(&var.unwrap()) {
if proposed > 0 && proposed <= u16::MAX as i32 {
return proposed as isize;
}
}
None => default,
}
default
}
/// \return a termsize from ioctl, or None on error or if not supported.

View file

@ -1,5 +1,6 @@
pub use super::errors::Error;
use crate::wchar::IntoCharIter;
use crate::wchar::{wstr, IntoCharIter};
use crate::wchar_ext::WExt;
use num_traits::{NumCast, PrimInt};
use std::default::Default;
use std::iter::{Fuse, Peekable};
@ -64,7 +65,7 @@ fn parse_radix<Iter: Iterator<Item = char>>(
error_if_negative: bool,
) -> Result<ParseResult, Error> {
if let Some(r) = mradix {
assert!((2..=36).contains(&r), "fish_parse_radix: invalid radix {r}");
assert!((2..=36).contains(&r), "parse_radix: invalid radix {r}");
}
// Construct a CharsIterator to keep track of how many we consume.
@ -161,7 +162,7 @@ fn parse_radix<Iter: Iterator<Item = char>>(
}
/// Parse some iterator over Chars into some Integer type, optionally with a radix.
fn fish_wcstoi_impl<Int, Chars>(
fn wcstoi_impl<Int, Chars>(
src: Chars,
options: Options,
out_consumed: &mut usize,
@ -171,7 +172,7 @@ where
Int: PrimInt,
{
let bits = Int::zero().count_zeros();
assert!(bits <= 64, "fish_wcstoi: Int must be <= 64 bits");
assert!(bits <= 64, "wcstoi: Int must be <= 64 bits");
let signed = Int::min_value() < Int::zero();
let Options {
@ -228,22 +229,22 @@ where
/// - Leading whitespace is skipped.
/// - 0 means octal, 0x means hex
/// - Leading + is supported.
pub fn fish_wcstoi<Int, Chars>(src: Chars) -> Result<Int, Error>
pub fn wcstoi<Int, Chars>(src: Chars) -> Result<Int, Error>
where
Chars: IntoCharIter,
Int: PrimInt,
{
fish_wcstoi_impl(src.chars(), Default::default(), &mut 0)
wcstoi_impl(src.chars(), Default::default(), &mut 0)
}
/// Convert the given wide string to an integer using the given radix.
/// Leading whitespace is skipped.
pub fn fish_wcstoi_opts<Int, Chars>(src: Chars, options: Options) -> Result<Int, Error>
pub fn wcstoi_opts<Int, Chars>(src: Chars, options: Options) -> Result<Int, Error>
where
Chars: IntoCharIter,
Int: PrimInt,
{
fish_wcstoi_impl(src.chars(), options, &mut 0)
wcstoi_impl(src.chars(), options, &mut 0)
}
/// Convert the given wide string to an integer.
@ -252,7 +253,7 @@ where
/// - 0 means octal, 0x means hex
/// - Leading + is supported.
/// The number of consumed characters is returned in out_consumed.
pub fn fish_wcstoi_partial<Int, Chars>(
pub fn wcstoi_partial<Int, Chars>(
src: Chars,
options: Options,
out_consumed: &mut usize,
@ -261,23 +262,93 @@ where
Chars: IntoCharIter,
Int: PrimInt,
{
fish_wcstoi_impl(src.chars(), options, out_consumed)
wcstoi_impl(src.chars(), options, out_consumed)
}
/// A historic "enhanced" version of wcstol.
/// Leading whitespace is ignored (per wcstol).
/// Trailing whitespace is also ignored.
/// Trailing characters other than whitespace are errors.
pub fn fish_wcstol_radix(mut src: &wstr, radix: u32) -> Result<i64, Error> {
// Unlike wcstol, we do not infer the radix.
assert!(radix > 0, "radix cannot be 0");
let options: Options = Options {
mradix: Some(radix),
..Default::default()
};
let mut consumed = 0;
let result = wcstoi_partial(src, options, &mut consumed)?;
// Skip trailing whitespace, erroring if we encounter a non-whitespace character.
src = src.slice_from(consumed);
while !src.is_empty() && src.char_at(0).is_whitespace() {
src = src.slice_from(1);
}
if !src.is_empty() {
return Err(Error::InvalidChar);
}
Ok(result)
}
/// Variant of fish_wcstol_radix which assumes base 10.
pub fn fish_wcstol(src: &wstr) -> Result<i64, Error> {
fish_wcstol_radix(src, 10)
}
/// Variant of fish_wcstol for ints, erroring if it does not fit.
pub fn fish_wcstoi(src: &wstr) -> Result<i32, Error> {
let res = fish_wcstol(src)?;
if let Ok(val) = res.try_into() {
Ok(val)
} else {
Err(Error::Overflow)
}
}
/// Historic "enhanced" version of wcstoul.
/// Leading minus is considered invalid.
/// Leading whitespace is ignored (per wcstoul).
/// Trailing whitespace is also ignored.
pub fn fish_wcstoul(mut src: &wstr) -> Result<u64, Error> {
// Skip leading whitespace.
while !src.is_empty() && src.char_at(0).is_whitespace() {
src = src.slice_from(1);
}
// Disallow minus as the first character to avoid questionable wrap-around.
if src.is_empty() || src.char_at(0) == '-' {
return Err(Error::InvalidChar);
}
let options: Options = Options {
mradix: Some(10),
..Default::default()
};
let mut consumed = 0;
let result = wcstoi_partial(src, options, &mut consumed)?;
// Skip trailling whitespace.
src = src.slice_from(consumed);
while !src.is_empty() && src.char_at(0).is_whitespace() {
src = src.slice_from(1);
}
if !src.is_empty() {
return Err(Error::InvalidChar);
}
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::wchar::L;
fn test_min_max<Int: PrimInt + std::fmt::Display + std::fmt::Debug>(min: Int, max: Int) {
assert_eq!(fish_wcstoi(min.to_string().chars()), Ok(min));
assert_eq!(fish_wcstoi(max.to_string().chars()), Ok(max));
assert_eq!(wcstoi(min.to_string().chars()), Ok(min));
assert_eq!(wcstoi(max.to_string().chars()), Ok(max));
}
#[test]
fn test_signed() {
let run1 = |s: &str| -> Result<i32, Error> { fish_wcstoi(s.chars()) };
let run1 = |s: &str| -> Result<i32, Error> { wcstoi(s.chars()) };
let run1_rad = |s: &str, radix: u32| -> Result<i32, Error> {
fish_wcstoi_opts(
wcstoi_opts(
s.chars(),
Options {
mradix: Some(radix),
@ -335,7 +406,7 @@ mod tests {
}
let run1 = |s: &str| -> Result<u64, Error> {
fish_wcstoi_opts(
wcstoi_opts(
s.chars(),
Options {
wrap_negatives: true,
@ -344,7 +415,7 @@ mod tests {
)
};
let run1_rad = |s: &str, radix: u32| -> Result<u64, Error> {
fish_wcstoi_opts(
wcstoi_opts(
s.chars(),
Options {
wrap_negatives: true,
@ -394,7 +465,7 @@ mod tests {
std::u64::MAX - x + 1
}
let run1 = |s: &str, opts: Options| -> Result<u64, Error> { fish_wcstoi_opts(s, opts) };
let run1 = |s: &str, opts: Options| -> Result<u64, Error> { wcstoi_opts(s, opts) };
let mut opts = Options::default();
assert_eq!(run1("-123", opts), Err(Error::InvalidChar));
assert_eq!(run1("-0x123", opts), Err(Error::InvalidChar));
@ -410,7 +481,7 @@ mod tests {
fn test_partial() {
let run1 = |s: &str| -> (i32, usize) {
let mut consumed = 0;
let res = fish_wcstoi_partial(s, Default::default(), &mut consumed)
let res = wcstoi_partial(s, Default::default(), &mut consumed)
.expect("Should have parsed an int");
(res, consumed)
};
@ -430,4 +501,46 @@ mod tests {
assert_eq!(run1("0x"), (0, 1));
assert_eq!(run1("0xx"), (0, 1));
}
#[test]
fn test_fish_wcstol() {
assert_eq!(fish_wcstol(L!("0")), Ok(0));
assert_eq!(fish_wcstol(L!("10")), Ok(10));
assert_eq!(fish_wcstol(L!(" 10")), Ok(10));
assert_eq!(fish_wcstol(L!(" 10 ")), Ok(10));
assert_eq!(fish_wcstol(L!("-10")), Ok(-10));
assert_eq!(fish_wcstol(L!(" +10 ")), Ok(10));
assert_eq!(fish_wcstol(L!("10foo")), Err(Error::InvalidChar));
assert_eq!(fish_wcstol(L!("10.5")), Err(Error::InvalidChar));
assert_eq!(fish_wcstol(L!("10 x ")), Err(Error::InvalidChar));
}
#[test]
fn test_fish_wcstoi() {
assert_eq!(fish_wcstoi(L!("0")), Ok(0));
assert_eq!(fish_wcstoi(L!("10")), Ok(10));
assert_eq!(fish_wcstoi(L!(" 10")), Ok(10));
assert_eq!(fish_wcstoi(L!(" 10 ")), Ok(10));
assert_eq!(fish_wcstoi(L!("-10")), Ok(-10));
assert_eq!(fish_wcstoi(L!(" +10 ")), Ok(10));
assert_eq!(fish_wcstoi(L!(" 2147483647 ")), Ok(2147483647));
assert_eq!(fish_wcstoi(L!(" 2147483648 ")), Err(Error::Overflow));
assert_eq!(fish_wcstoi(L!(" -2147483647 ")), Ok(-2147483647));
assert_eq!(fish_wcstoi(L!(" -2147483648 ")), Ok(-2147483648));
assert_eq!(fish_wcstoi(L!(" -2147483649 ")), Err(Error::Overflow));
assert_eq!(fish_wcstoi(L!("10foo")), Err(Error::InvalidChar));
assert_eq!(fish_wcstoi(L!("10.5")), Err(Error::InvalidChar));
}
#[test]
fn test_fish_wcstoul() {
assert_eq!(fish_wcstoul(L!("0")), Ok(0));
assert_eq!(fish_wcstoul(L!("10")), Ok(10));
assert_eq!(fish_wcstoul(L!(" +10")), Ok(10));
assert_eq!(fish_wcstoul(L!(" -10")), Err(Error::InvalidChar));
assert_eq!(fish_wcstoul(L!(" 10 ")), Ok(10));
assert_eq!(fish_wcstoul(L!("10foo")), Err(Error::InvalidChar));
assert_eq!(fish_wcstoul(L!("10.5")), Err(Error::InvalidChar));
assert_eq!(fish_wcstoul(L!("18446744073709551615")), Ok(u64::MAX));
}
}

View file

@ -501,7 +501,7 @@ job_t *parser_t::job_get_from_pid(pid_t pid) const {
return job_get_from_pid(pid, job_pos);
}
job_t *parser_t::job_get_from_pid(int64_t pid, size_t &job_pos) const {
job_t *parser_t::job_get_from_pid(int pid, size_t &job_pos) const {
for (auto it = job_list.begin(); it != job_list.end(); ++it) {
for (const process_ptr_t &p : (*it)->processes) {
if (p->pid == pid) {

View file

@ -449,7 +449,7 @@ class parser_t : public std::enable_shared_from_this<parser_t> {
job_t *job_get_from_pid(pid_t pid) const;
/// Returns the job and position with the given pid.
job_t *job_get_from_pid(int64_t pid, size_t &job_pos) const;
job_t *job_get_from_pid(int pid, size_t &job_pos) const;
/// Returns a new profile item if profiling is active. The caller should fill it in.
/// The parser_t will deallocate it.