mirror of
https://github.com/uutils/coreutils
synced 2025-01-19 00:24:13 +00:00
Fix "coreutils manpage base64" bug
This commit is contained in:
parent
9fa405fad6
commit
32e1c54c78
10 changed files with 103 additions and 91 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -2532,6 +2532,7 @@ dependencies = [
|
|||
name = "uu_base64"
|
||||
version = "0.0.27"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"uu_base32",
|
||||
"uucore",
|
||||
]
|
||||
|
|
|
@ -3,13 +3,11 @@
|
|||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
use std::io::{stdin, Read};
|
||||
pub mod base_common;
|
||||
|
||||
use clap::Command;
|
||||
use uucore::{encoding::Format, error::UResult, help_about, help_usage};
|
||||
|
||||
pub mod base_common;
|
||||
|
||||
const ABOUT: &str = help_about!("base32.md");
|
||||
const USAGE: &str = help_usage!("base32.md");
|
||||
|
||||
|
@ -17,20 +15,11 @@ const USAGE: &str = help_usage!("base32.md");
|
|||
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||
let format = Format::Base32;
|
||||
|
||||
let config: base_common::Config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
|
||||
let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
|
||||
|
||||
// Create a reference to stdin so we can return a locked stdin from
|
||||
// parse_base_cmd_args
|
||||
let stdin_raw = stdin();
|
||||
let mut input: Box<dyn Read> = base_common::get_input(&config, &stdin_raw)?;
|
||||
let mut input = base_common::get_input(&config)?;
|
||||
|
||||
base_common::handle_input(
|
||||
&mut input,
|
||||
format,
|
||||
config.wrap_cols,
|
||||
config.ignore_garbage,
|
||||
config.decode,
|
||||
)
|
||||
base_common::handle_input(&mut input, format, config)
|
||||
}
|
||||
|
||||
pub fn uu_app() -> Command {
|
||||
|
|
|
@ -7,11 +7,11 @@
|
|||
|
||||
use clap::{crate_version, Arg, ArgAction, Command};
|
||||
use std::fs::File;
|
||||
use std::io::{self, ErrorKind, Read, Stdin};
|
||||
use std::path::Path;
|
||||
use std::io::{self, ErrorKind, Read};
|
||||
use std::path::{Path, PathBuf};
|
||||
use uucore::display::Quotable;
|
||||
use uucore::encoding::{
|
||||
for_fast_encode::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER},
|
||||
for_base_common::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER},
|
||||
Format, Z85Wrapper, BASE2LSBF, BASE2MSBF,
|
||||
};
|
||||
use uucore::encoding::{EncodingWrapper, SupportsFastDecodeAndEncode};
|
||||
|
@ -31,7 +31,7 @@ pub struct Config {
|
|||
pub decode: bool,
|
||||
pub ignore_garbage: bool,
|
||||
pub wrap_cols: Option<usize>,
|
||||
pub to_read: Option<String>,
|
||||
pub to_read: Option<PathBuf>,
|
||||
}
|
||||
|
||||
pub mod options {
|
||||
|
@ -43,9 +43,10 @@ pub mod options {
|
|||
|
||||
impl Config {
|
||||
pub fn from(options: &clap::ArgMatches) -> UResult<Self> {
|
||||
let file: Option<String> = match options.get_many::<String>(options::FILE) {
|
||||
let to_read = match options.get_many::<String>(options::FILE) {
|
||||
Some(mut values) => {
|
||||
let name = values.next().unwrap();
|
||||
|
||||
if let Some(extra_op) = values.next() {
|
||||
return Err(UUsageError::new(
|
||||
BASE_CMD_PARSE_ERROR,
|
||||
|
@ -56,19 +57,22 @@ impl Config {
|
|||
if name == "-" {
|
||||
None
|
||||
} else {
|
||||
if !Path::exists(Path::new(name)) {
|
||||
let path = Path::new(name);
|
||||
|
||||
if !path.exists() {
|
||||
return Err(USimpleError::new(
|
||||
BASE_CMD_PARSE_ERROR,
|
||||
format!("{}: No such file or directory", name.maybe_quote()),
|
||||
format!("{}: No such file or directory", path.maybe_quote()),
|
||||
));
|
||||
}
|
||||
Some(name.clone())
|
||||
|
||||
Some(path.to_owned())
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let cols = options
|
||||
let wrap_cols = options
|
||||
.get_one::<String>(options::WRAP)
|
||||
.map(|num| {
|
||||
num.parse::<usize>().map_err(|_| {
|
||||
|
@ -83,8 +87,8 @@ impl Config {
|
|||
Ok(Self {
|
||||
decode: options.get_flag(options::DECODE),
|
||||
ignore_garbage: options.get_flag(options::IGNORE_GARBAGE),
|
||||
wrap_cols: cols,
|
||||
to_read: file,
|
||||
wrap_cols,
|
||||
to_read,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -139,42 +143,43 @@ pub fn base_app(about: &'static str, usage: &str) -> Command {
|
|||
)
|
||||
}
|
||||
|
||||
pub fn get_input<'a>(config: &Config, stdin_ref: &'a Stdin) -> UResult<Box<dyn Read + 'a>> {
|
||||
pub fn get_input(config: &Config) -> UResult<Box<dyn Read>> {
|
||||
match &config.to_read {
|
||||
Some(name) => {
|
||||
Some(path_buf) => {
|
||||
// Do not buffer input, because buffering is handled by `fast_decode` and `fast_encode`
|
||||
let file_buf =
|
||||
File::open(Path::new(name)).map_err_context(|| name.maybe_quote().to_string())?;
|
||||
Ok(Box::new(file_buf))
|
||||
let file =
|
||||
File::open(path_buf).map_err_context(|| path_buf.maybe_quote().to_string())?;
|
||||
|
||||
Ok(Box::new(file))
|
||||
}
|
||||
None => {
|
||||
let stdin_lock = io::stdin().lock();
|
||||
|
||||
Ok(Box::new(stdin_lock))
|
||||
}
|
||||
None => Ok(Box::new(stdin_ref.lock())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn handle_input<R: Read>(
|
||||
input: &mut R,
|
||||
format: Format,
|
||||
wrap: Option<usize>,
|
||||
ignore_garbage: bool,
|
||||
decode: bool,
|
||||
) -> UResult<()> {
|
||||
pub fn handle_input<R: Read>(input: &mut R, format: Format, config: Config) -> UResult<()> {
|
||||
let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format);
|
||||
|
||||
let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref();
|
||||
|
||||
let mut stdout_lock = io::stdout().lock();
|
||||
|
||||
if decode {
|
||||
if config.decode {
|
||||
fast_decode::fast_decode(
|
||||
input,
|
||||
&mut stdout_lock,
|
||||
supports_fast_decode_and_encode.as_ref(),
|
||||
ignore_garbage,
|
||||
supports_fast_decode_and_encode_ref,
|
||||
config.ignore_garbage,
|
||||
)
|
||||
} else {
|
||||
fast_encode::fast_encode(
|
||||
input,
|
||||
&mut stdout_lock,
|
||||
supports_fast_decode_and_encode.as_ref(),
|
||||
wrap,
|
||||
supports_fast_decode_and_encode_ref,
|
||||
config.wrap_cols,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -423,15 +428,15 @@ pub mod fast_encode {
|
|||
};
|
||||
|
||||
// Start of buffers
|
||||
// Data that was read from stdin
|
||||
// Data that was read from `input`
|
||||
let mut input_buffer = vec![0; INPUT_BUFFER_SIZE];
|
||||
|
||||
assert!(!input_buffer.is_empty());
|
||||
|
||||
// Data that was read from stdin but has not been encoded yet
|
||||
// Data that was read from `input` but has not been encoded yet
|
||||
let mut leftover_buffer = VecDeque::<u8>::new();
|
||||
|
||||
// Encoded data that needs to be written to output
|
||||
// Encoded data that needs to be written to `output`
|
||||
let mut encoded_buffer = VecDeque::<u8>::new();
|
||||
// End of buffers
|
||||
|
||||
|
@ -469,7 +474,7 @@ pub mod fast_encode {
|
|||
|
||||
assert!(leftover_buffer.len() < encode_in_chunks_of_size);
|
||||
|
||||
// Write all data in `encoded_buffer` to output
|
||||
// Write all data in `encoded_buffer` to `output`
|
||||
write_to_output(&mut line_wrapping, &mut encoded_buffer, &mut output, false)?;
|
||||
}
|
||||
Err(er) => {
|
||||
|
@ -511,7 +516,7 @@ pub mod fast_decode {
|
|||
|
||||
// Start of helper functions
|
||||
fn alphabet_to_table(alphabet: &[u8], ignore_garbage: bool) -> [bool; 256] {
|
||||
// If "ignore_garbage" is enabled, all characters outside the alphabet are ignored
|
||||
// If `ignore_garbage` is enabled, all characters outside the alphabet are ignored
|
||||
// If it is not enabled, only '\n' and '\r' are ignored
|
||||
if ignore_garbage {
|
||||
// Note: "false" here
|
||||
|
@ -618,12 +623,12 @@ pub mod fast_decode {
|
|||
|
||||
assert!(decode_in_chunks_of_size > 0);
|
||||
|
||||
// Note that it's not worth using "data-encoding"'s ignore functionality if "ignore_garbage" is true, because
|
||||
// Note that it's not worth using "data-encoding"'s ignore functionality if `ignore_garbage` is true, because
|
||||
// "data-encoding"'s ignore functionality cannot discard non-ASCII bytes. The data has to be filtered before
|
||||
// passing it to "data-encoding", so there is no point in doing any filtering in "data-encoding". This also
|
||||
// allows execution to stay on the happy path in "data-encoding":
|
||||
// https://github.com/ia0/data-encoding/blob/4f42ad7ef242f6d243e4de90cd1b46a57690d00e/lib/src/lib.rs#L754-L756
|
||||
// Update: it is not even worth it to use "data-encoding"'s ignore functionality when "ignore_garbage" is
|
||||
// It is also not worth using "data-encoding"'s ignore functionality when `ignore_garbage` is
|
||||
// false.
|
||||
// Note that the alphabet constants above already include the padding characters
|
||||
// TODO
|
||||
|
@ -631,18 +636,18 @@ pub mod fast_decode {
|
|||
let table = alphabet_to_table(alphabet, ignore_garbage);
|
||||
|
||||
// Start of buffers
|
||||
// Data that was read from stdin
|
||||
// Data that was read from `input`
|
||||
let mut input_buffer = vec![0; INPUT_BUFFER_SIZE];
|
||||
|
||||
assert!(!input_buffer.is_empty());
|
||||
|
||||
// Data that was read from stdin but has not been decoded yet
|
||||
// Data that was read from `input` but has not been decoded yet
|
||||
let mut leftover_buffer = Vec::<u8>::new();
|
||||
|
||||
// Decoded data that needs to be written to `output`
|
||||
let mut decoded_buffer = Vec::<u8>::new();
|
||||
|
||||
// Buffer that will be used when "ignore_garbage" is true, and the chunk read from "input" contains garbage
|
||||
// Buffer that will be used when `ignore_garbage` is true, and the chunk read from `input` contains garbage
|
||||
// data
|
||||
let mut non_garbage_buffer = Vec::<u8>::new();
|
||||
// End of buffers
|
||||
|
|
|
@ -17,6 +17,7 @@ readme.workspace = true
|
|||
path = "src/base64.rs"
|
||||
|
||||
[dependencies]
|
||||
clap = { workspace = true }
|
||||
uucore = { workspace = true, features = ["encoding"] }
|
||||
uu_base32 = { workspace = true }
|
||||
|
||||
|
|
|
@ -3,13 +3,10 @@
|
|||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
use clap::Command;
|
||||
use uu_base32::base_common;
|
||||
pub use uu_base32::uu_app;
|
||||
|
||||
use uucore::{encoding::Format, error::UResult, help_about, help_usage};
|
||||
|
||||
use std::io::{stdin, Read};
|
||||
|
||||
const ABOUT: &str = help_about!("base64.md");
|
||||
const USAGE: &str = help_usage!("base64.md");
|
||||
|
||||
|
@ -17,18 +14,13 @@ const USAGE: &str = help_usage!("base64.md");
|
|||
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||
let format = Format::Base64;
|
||||
|
||||
let config: base_common::Config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
|
||||
let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
|
||||
|
||||
// Create a reference to stdin so we can return a locked stdin from
|
||||
// parse_base_cmd_args
|
||||
let stdin_raw = stdin();
|
||||
let mut input: Box<dyn Read> = base_common::get_input(&config, &stdin_raw)?;
|
||||
let mut input = base_common::get_input(&config)?;
|
||||
|
||||
base_common::handle_input(
|
||||
&mut input,
|
||||
format,
|
||||
config.wrap_cols,
|
||||
config.ignore_garbage,
|
||||
config.decode,
|
||||
)
|
||||
base_common::handle_input(&mut input, format, config)
|
||||
}
|
||||
|
||||
pub fn uu_app() -> Command {
|
||||
base_common::base_app(ABOUT, USAGE)
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@ use a benchmarking tool like [hyperfine][0].
|
|||
hyperfine currently does not measure maximum memory usage. Memory usage can be benchmarked using [poop][2], or
|
||||
[toybox][3]'s "time" subcommand (both are Linux only).
|
||||
|
||||
Next, build the `basenc` binary using the release profile:
|
||||
Build the `basenc` binary using the release profile:
|
||||
|
||||
```Shell
|
||||
cargo build --package uu_basenc --profile release
|
||||
|
|
|
@ -3,19 +3,15 @@
|
|||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
//spell-checker:ignore (args) lsbf msbf
|
||||
// spell-checker:ignore lsbf msbf
|
||||
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
use uu_base32::base_common::{self, Config, BASE_CMD_PARSE_ERROR};
|
||||
|
||||
use uucore::error::UClapError;
|
||||
use uucore::{
|
||||
encoding::Format,
|
||||
error::{UResult, UUsageError},
|
||||
};
|
||||
|
||||
use std::io::{stdin, Read};
|
||||
use uucore::error::UClapError;
|
||||
|
||||
use uucore::{help_about, help_usage};
|
||||
|
||||
const ABOUT: &str = help_about!("basenc.md");
|
||||
|
@ -81,16 +77,8 @@ fn parse_cmd_args(args: impl uucore::Args) -> UResult<(Config, Format)> {
|
|||
#[uucore::main]
|
||||
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||
let (config, format) = parse_cmd_args(args)?;
|
||||
// Create a reference to stdin so we can return a locked stdin from
|
||||
// parse_base_cmd_args
|
||||
let stdin_raw = stdin();
|
||||
let mut input: Box<dyn Read> = base_common::get_input(&config, &stdin_raw)?;
|
||||
|
||||
base_common::handle_input(
|
||||
&mut input,
|
||||
format,
|
||||
config.wrap_cols,
|
||||
config.ignore_garbage,
|
||||
config.decode,
|
||||
)
|
||||
let mut input = base_common::get_input(&config)?;
|
||||
|
||||
base_common::handle_input(&mut input, format, config)
|
||||
}
|
||||
|
|
|
@ -11,8 +11,8 @@ use data_encoding::Encoding;
|
|||
use data_encoding_macro::new_encoding;
|
||||
use std::collections::VecDeque;
|
||||
|
||||
// Re-export for the faster encoding logic
|
||||
pub mod for_fast_encode {
|
||||
// Re-export for the faster decoding/encoding logic
|
||||
pub mod for_base_common {
|
||||
pub use data_encoding::*;
|
||||
}
|
||||
|
||||
|
|
|
@ -185,3 +185,38 @@ cyBvdmVyIHRoZSBsYXp5IGRvZy4=
|
|||
// cSpell:enable
|
||||
);
|
||||
}
|
||||
|
||||
// Prevent regression to:
|
||||
//
|
||||
// ❯ coreutils manpage base64 | rg --fixed-strings -- 'base32'
|
||||
// The data are encoded as described for the base32 alphabet in RFC 4648.
|
||||
// to the bytes of the formal base32 alphabet. Use \-\-ignore\-garbage
|
||||
// The data are encoded as described for the base32 alphabet in RFC 4648.
|
||||
// to the bytes of the formal base32 alphabet. Use \-\-ignore\-garbage
|
||||
#[test]
|
||||
fn test_manpage() {
|
||||
use std::process::{Command, Stdio};
|
||||
|
||||
let test_scenario = TestScenario::new("");
|
||||
|
||||
let child = Command::new(test_scenario.bin_path)
|
||||
.arg("manpage")
|
||||
.arg("base64")
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.spawn()
|
||||
.unwrap();
|
||||
|
||||
let output = child.wait_with_output().unwrap();
|
||||
|
||||
assert_eq!(output.status.code().unwrap(), 0);
|
||||
|
||||
assert!(output.stderr.is_empty());
|
||||
|
||||
let stdout_str = std::str::from_utf8(&output.stdout).unwrap();
|
||||
|
||||
assert!(stdout_str.contains("base64 alphabet"));
|
||||
|
||||
assert!(!stdout_str.to_ascii_lowercase().contains("base32"));
|
||||
}
|
||||
|
|
|
@ -3,7 +3,8 @@
|
|||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
//spell-checker: ignore (encodings) lsbf msbf
|
||||
// spell-checker: ignore (encodings) lsbf msbf
|
||||
|
||||
use crate::common::util::TestScenario;
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Reference in a new issue