Fix "coreutils manpage base64" bug

This commit is contained in:
Andrew Liebenow 2024-10-05 08:17:10 -05:00
parent 9fa405fad6
commit 32e1c54c78
10 changed files with 103 additions and 91 deletions

1
Cargo.lock generated
View file

@ -2532,6 +2532,7 @@ dependencies = [
name = "uu_base64"
version = "0.0.27"
dependencies = [
"clap",
"uu_base32",
"uucore",
]

View file

@ -3,13 +3,11 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use std::io::{stdin, Read};
pub mod base_common;
use clap::Command;
use uucore::{encoding::Format, error::UResult, help_about, help_usage};
pub mod base_common;
const ABOUT: &str = help_about!("base32.md");
const USAGE: &str = help_usage!("base32.md");
@ -17,20 +15,11 @@ const USAGE: &str = help_usage!("base32.md");
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let format = Format::Base32;
let config: base_common::Config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
// Create a reference to stdin so we can return a locked stdin from
// parse_base_cmd_args
let stdin_raw = stdin();
let mut input: Box<dyn Read> = base_common::get_input(&config, &stdin_raw)?;
let mut input = base_common::get_input(&config)?;
base_common::handle_input(
&mut input,
format,
config.wrap_cols,
config.ignore_garbage,
config.decode,
)
base_common::handle_input(&mut input, format, config)
}
pub fn uu_app() -> Command {

View file

@ -7,11 +7,11 @@
use clap::{crate_version, Arg, ArgAction, Command};
use std::fs::File;
use std::io::{self, ErrorKind, Read, Stdin};
use std::path::Path;
use std::io::{self, ErrorKind, Read};
use std::path::{Path, PathBuf};
use uucore::display::Quotable;
use uucore::encoding::{
for_fast_encode::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER},
for_base_common::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER},
Format, Z85Wrapper, BASE2LSBF, BASE2MSBF,
};
use uucore::encoding::{EncodingWrapper, SupportsFastDecodeAndEncode};
@ -31,7 +31,7 @@ pub struct Config {
pub decode: bool,
pub ignore_garbage: bool,
pub wrap_cols: Option<usize>,
pub to_read: Option<String>,
pub to_read: Option<PathBuf>,
}
pub mod options {
@ -43,9 +43,10 @@ pub mod options {
impl Config {
pub fn from(options: &clap::ArgMatches) -> UResult<Self> {
let file: Option<String> = match options.get_many::<String>(options::FILE) {
let to_read = match options.get_many::<String>(options::FILE) {
Some(mut values) => {
let name = values.next().unwrap();
if let Some(extra_op) = values.next() {
return Err(UUsageError::new(
BASE_CMD_PARSE_ERROR,
@ -56,19 +57,22 @@ impl Config {
if name == "-" {
None
} else {
if !Path::exists(Path::new(name)) {
let path = Path::new(name);
if !path.exists() {
return Err(USimpleError::new(
BASE_CMD_PARSE_ERROR,
format!("{}: No such file or directory", name.maybe_quote()),
format!("{}: No such file or directory", path.maybe_quote()),
));
}
Some(name.clone())
Some(path.to_owned())
}
}
None => None,
};
let cols = options
let wrap_cols = options
.get_one::<String>(options::WRAP)
.map(|num| {
num.parse::<usize>().map_err(|_| {
@ -83,8 +87,8 @@ impl Config {
Ok(Self {
decode: options.get_flag(options::DECODE),
ignore_garbage: options.get_flag(options::IGNORE_GARBAGE),
wrap_cols: cols,
to_read: file,
wrap_cols,
to_read,
})
}
}
@ -139,42 +143,43 @@ pub fn base_app(about: &'static str, usage: &str) -> Command {
)
}
pub fn get_input<'a>(config: &Config, stdin_ref: &'a Stdin) -> UResult<Box<dyn Read + 'a>> {
pub fn get_input(config: &Config) -> UResult<Box<dyn Read>> {
match &config.to_read {
Some(name) => {
Some(path_buf) => {
// Do not buffer input, because buffering is handled by `fast_decode` and `fast_encode`
let file_buf =
File::open(Path::new(name)).map_err_context(|| name.maybe_quote().to_string())?;
Ok(Box::new(file_buf))
let file =
File::open(path_buf).map_err_context(|| path_buf.maybe_quote().to_string())?;
Ok(Box::new(file))
}
None => {
let stdin_lock = io::stdin().lock();
Ok(Box::new(stdin_lock))
}
None => Ok(Box::new(stdin_ref.lock())),
}
}
pub fn handle_input<R: Read>(
input: &mut R,
format: Format,
wrap: Option<usize>,
ignore_garbage: bool,
decode: bool,
) -> UResult<()> {
pub fn handle_input<R: Read>(input: &mut R, format: Format, config: Config) -> UResult<()> {
let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format);
let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref();
let mut stdout_lock = io::stdout().lock();
if decode {
if config.decode {
fast_decode::fast_decode(
input,
&mut stdout_lock,
supports_fast_decode_and_encode.as_ref(),
ignore_garbage,
supports_fast_decode_and_encode_ref,
config.ignore_garbage,
)
} else {
fast_encode::fast_encode(
input,
&mut stdout_lock,
supports_fast_decode_and_encode.as_ref(),
wrap,
supports_fast_decode_and_encode_ref,
config.wrap_cols,
)
}
}
@ -423,15 +428,15 @@ pub mod fast_encode {
};
// Start of buffers
// Data that was read from stdin
// Data that was read from `input`
let mut input_buffer = vec![0; INPUT_BUFFER_SIZE];
assert!(!input_buffer.is_empty());
// Data that was read from stdin but has not been encoded yet
// Data that was read from `input` but has not been encoded yet
let mut leftover_buffer = VecDeque::<u8>::new();
// Encoded data that needs to be written to output
// Encoded data that needs to be written to `output`
let mut encoded_buffer = VecDeque::<u8>::new();
// End of buffers
@ -469,7 +474,7 @@ pub mod fast_encode {
assert!(leftover_buffer.len() < encode_in_chunks_of_size);
// Write all data in `encoded_buffer` to output
// Write all data in `encoded_buffer` to `output`
write_to_output(&mut line_wrapping, &mut encoded_buffer, &mut output, false)?;
}
Err(er) => {
@ -511,7 +516,7 @@ pub mod fast_decode {
// Start of helper functions
fn alphabet_to_table(alphabet: &[u8], ignore_garbage: bool) -> [bool; 256] {
// If "ignore_garbage" is enabled, all characters outside the alphabet are ignored
// If `ignore_garbage` is enabled, all characters outside the alphabet are ignored
// If it is not enabled, only '\n' and '\r' are ignored
if ignore_garbage {
// Note: "false" here
@ -618,12 +623,12 @@ pub mod fast_decode {
assert!(decode_in_chunks_of_size > 0);
// Note that it's not worth using "data-encoding"'s ignore functionality if "ignore_garbage" is true, because
// Note that it's not worth using "data-encoding"'s ignore functionality if `ignore_garbage` is true, because
// "data-encoding"'s ignore functionality cannot discard non-ASCII bytes. The data has to be filtered before
// passing it to "data-encoding", so there is no point in doing any filtering in "data-encoding". This also
// allows execution to stay on the happy path in "data-encoding":
// https://github.com/ia0/data-encoding/blob/4f42ad7ef242f6d243e4de90cd1b46a57690d00e/lib/src/lib.rs#L754-L756
// Update: it is not even worth it to use "data-encoding"'s ignore functionality when "ignore_garbage" is
// It is also not worth using "data-encoding"'s ignore functionality when `ignore_garbage` is
// false.
// Note that the alphabet constants above already include the padding characters
// TODO
@ -631,18 +636,18 @@ pub mod fast_decode {
let table = alphabet_to_table(alphabet, ignore_garbage);
// Start of buffers
// Data that was read from stdin
// Data that was read from `input`
let mut input_buffer = vec![0; INPUT_BUFFER_SIZE];
assert!(!input_buffer.is_empty());
// Data that was read from stdin but has not been decoded yet
// Data that was read from `input` but has not been decoded yet
let mut leftover_buffer = Vec::<u8>::new();
// Decoded data that needs to be written to `output`
let mut decoded_buffer = Vec::<u8>::new();
// Buffer that will be used when "ignore_garbage" is true, and the chunk read from "input" contains garbage
// Buffer that will be used when `ignore_garbage` is true, and the chunk read from `input` contains garbage
// data
let mut non_garbage_buffer = Vec::<u8>::new();
// End of buffers

View file

@ -17,6 +17,7 @@ readme.workspace = true
path = "src/base64.rs"
[dependencies]
clap = { workspace = true }
uucore = { workspace = true, features = ["encoding"] }
uu_base32 = { workspace = true }

View file

@ -3,13 +3,10 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use clap::Command;
use uu_base32::base_common;
pub use uu_base32::uu_app;
use uucore::{encoding::Format, error::UResult, help_about, help_usage};
use std::io::{stdin, Read};
const ABOUT: &str = help_about!("base64.md");
const USAGE: &str = help_usage!("base64.md");
@ -17,18 +14,13 @@ const USAGE: &str = help_usage!("base64.md");
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let format = Format::Base64;
let config: base_common::Config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
// Create a reference to stdin so we can return a locked stdin from
// parse_base_cmd_args
let stdin_raw = stdin();
let mut input: Box<dyn Read> = base_common::get_input(&config, &stdin_raw)?;
let mut input = base_common::get_input(&config)?;
base_common::handle_input(
&mut input,
format,
config.wrap_cols,
config.ignore_garbage,
config.decode,
)
base_common::handle_input(&mut input, format, config)
}
pub fn uu_app() -> Command {
base_common::base_app(ABOUT, USAGE)
}

View file

@ -13,7 +13,7 @@ use a benchmarking tool like [hyperfine][0].
hyperfine currently does not measure maximum memory usage. Memory usage can be benchmarked using [poop][2], or
[toybox][3]'s "time" subcommand (both are Linux only).
Next, build the `basenc` binary using the release profile:
Build the `basenc` binary using the release profile:
```Shell
cargo build --package uu_basenc --profile release

View file

@ -3,19 +3,15 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//spell-checker:ignore (args) lsbf msbf
// spell-checker:ignore lsbf msbf
use clap::{Arg, ArgAction, Command};
use uu_base32::base_common::{self, Config, BASE_CMD_PARSE_ERROR};
use uucore::error::UClapError;
use uucore::{
encoding::Format,
error::{UResult, UUsageError},
};
use std::io::{stdin, Read};
use uucore::error::UClapError;
use uucore::{help_about, help_usage};
const ABOUT: &str = help_about!("basenc.md");
@ -81,16 +77,8 @@ fn parse_cmd_args(args: impl uucore::Args) -> UResult<(Config, Format)> {
#[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let (config, format) = parse_cmd_args(args)?;
// Create a reference to stdin so we can return a locked stdin from
// parse_base_cmd_args
let stdin_raw = stdin();
let mut input: Box<dyn Read> = base_common::get_input(&config, &stdin_raw)?;
base_common::handle_input(
&mut input,
format,
config.wrap_cols,
config.ignore_garbage,
config.decode,
)
let mut input = base_common::get_input(&config)?;
base_common::handle_input(&mut input, format, config)
}

View file

@ -11,8 +11,8 @@ use data_encoding::Encoding;
use data_encoding_macro::new_encoding;
use std::collections::VecDeque;
// Re-export for the faster encoding logic
pub mod for_fast_encode {
// Re-export for the faster decoding/encoding logic
pub mod for_base_common {
pub use data_encoding::*;
}

View file

@ -185,3 +185,38 @@ cyBvdmVyIHRoZSBsYXp5IGRvZy4=
// cSpell:enable
);
}
// Prevent regression to:
//
// coreutils manpage base64 | rg --fixed-strings -- 'base32'
// The data are encoded as described for the base32 alphabet in RFC 4648.
// to the bytes of the formal base32 alphabet. Use \-\-ignore\-garbage
// The data are encoded as described for the base32 alphabet in RFC 4648.
// to the bytes of the formal base32 alphabet. Use \-\-ignore\-garbage
#[test]
fn test_manpage() {
use std::process::{Command, Stdio};
let test_scenario = TestScenario::new("");
let child = Command::new(test_scenario.bin_path)
.arg("manpage")
.arg("base64")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.unwrap();
let output = child.wait_with_output().unwrap();
assert_eq!(output.status.code().unwrap(), 0);
assert!(output.stderr.is_empty());
let stdout_str = std::str::from_utf8(&output.stdout).unwrap();
assert!(stdout_str.contains("base64 alphabet"));
assert!(!stdout_str.to_ascii_lowercase().contains("base32"));
}

View file

@ -3,7 +3,8 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//spell-checker: ignore (encodings) lsbf msbf
// spell-checker: ignore (encodings) lsbf msbf
use crate::common::util::TestScenario;
#[test]