basenc: add utility

basenc is a brand-new gnu core utility (added less than 3 years ago!),
which enables some more encodings.
This commit is contained in:
Michael Debertol 2021-08-05 00:28:14 +02:00
parent 158a8a387b
commit b8c383e210
12 changed files with 290 additions and 34 deletions

View file

@ -68,6 +68,7 @@ splitn
trunc
# * uutils
basenc
chcon
chgrp
chmod

43
Cargo.lock generated
View file

@ -308,6 +308,7 @@ dependencies = [
"uu_base32",
"uu_base64",
"uu_basename",
"uu_basenc",
"uu_cat",
"uu_chgrp",
"uu_chmod",
@ -595,9 +596,29 @@ checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9"
[[package]]
name = "data-encoding"
version = "2.1.2"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4f47ca1860a761136924ddd2422ba77b2ea54fe8cc75b9040804a0d9d32ad97"
checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57"
[[package]]
name = "data-encoding-macro"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86927b7cd2fe88fa698b87404b287ab98d1a0063a34071d92e575b72d3029aca"
dependencies = [
"data-encoding",
"data-encoding-macro-internal",
]
[[package]]
name = "data-encoding-macro-internal"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5bbed42daaa95e780b60a50546aa345b8413a1e46f9a40a12907d3598f038db"
dependencies = [
"data-encoding",
"syn",
]
[[package]]
name = "diff"
@ -1959,6 +1980,16 @@ dependencies = [
"uucore_procs",
]
[[package]]
name = "uu_basenc"
version = "0.0.7"
dependencies = [
"clap",
"uu_base32",
"uucore",
"uucore_procs",
]
[[package]]
name = "uu_cat"
version = "0.0.7"
@ -2990,6 +3021,7 @@ version = "0.0.9"
dependencies = [
"clap",
"data-encoding",
"data-encoding-macro",
"dns-lookup",
"dunce",
"getopts",
@ -3002,6 +3034,7 @@ dependencies = [
"time",
"wild",
"winapi 0.3.9",
"z85",
]
[[package]]
@ -3129,3 +3162,9 @@ checksum = "244c3741f4240ef46274860397c7c74e50eb23624996930e484c16679633a54c"
dependencies = [
"libc",
]
[[package]]
name = "z85"
version = "3.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ac8b56e4f9906a4ef5412875e9ce448364023335cec645fd457ecf51d4f2781"

View file

@ -35,6 +35,7 @@ feat_common_core = [
"base32",
"base64",
"basename",
"basenc",
"cat",
"cksum",
"comm",
@ -245,6 +246,7 @@ arch = { optional=true, version="0.0.7", package="uu_arch", path="src/uu/arc
base32 = { optional=true, version="0.0.7", package="uu_base32", path="src/uu/base32" }
base64 = { optional=true, version="0.0.7", package="uu_base64", path="src/uu/base64" }
basename = { optional=true, version="0.0.7", package="uu_basename", path="src/uu/basename" }
basenc = { optional=true, version="0.0.7", package="uu_basenc", path="src/uu/basenc" }
cat = { optional=true, version="0.0.7", package="uu_cat", path="src/uu/cat" }
chgrp = { optional=true, version="0.0.7", package="uu_chgrp", path="src/uu/chgrp" }
chmod = { optional=true, version="0.0.7", package="uu_chmod", path="src/uu/chmod" }

View file

@ -369,20 +369,21 @@ To contribute to uutils, please see [CONTRIBUTING](CONTRIBUTING.md).
| base32 | date | runcon |
| base64 | dd | stty |
| basename | df | |
| cat | expr | |
| chgrp | install | |
| chmod | join | |
| chown | ls | |
| chroot | more | |
| cksum | numfmt | |
| comm | od (`--strings` and 128-bit data types missing) |
| csplit | pr | |
| cut | printf | |
| dircolors | sort | |
| dirname | split | |
| du | tac | |
| echo | tail | |
| env | test | |
| basenc | expr | |
| cat | install | |
| chgrp | join | |
| chmod | ls | |
| chown | more | |
| chroot | numfmt | |
| cksum | od (`--strings` and 128-bit data types missing) |
| comm | pr | |
| csplit | printf | |
| cut | sort | |
| dircolors | split | |
| dirname | tac | |
| du | tail | |
| echo | test | |
| env | | |
| expand | | |
| factor | | |
| false | | |

View file

@ -34,7 +34,7 @@ pub mod options {
}
impl Config {
fn from(app_name: &str, options: clap::ArgMatches) -> Result<Config, String> {
pub fn from(app_name: &str, options: &clap::ArgMatches) -> Result<Config, String> {
let file: Option<String> = match options.values_of(options::FILE) {
Some(mut values) => {
let name = values.next().unwrap();
@ -85,7 +85,7 @@ pub fn parse_base_cmd_args(
let arg_list = args
.collect_str(InvalidEncodingHandling::ConvertLossy)
.accept_any();
Config::from(name, app.get_matches_from(arg_list))
Config::from(name, &app.get_matches_from(arg_list))
}
pub fn base_app<'a>(name: &str, version: &'a str, about: &'a str) -> App<'static, 'a> {
@ -145,8 +145,18 @@ pub fn handle_input<R: Read>(
}
if !decode {
let encoded = data.encode();
wrap_print(&data, encoded);
match data.encode() {
Ok(s) => {
wrap_print(&data, s);
}
Err(_) => {
eprintln!(
"{}: error: invalid input (length must be multiple of 4 characters)",
name
);
exit!(1)
}
}
} else {
match data.decode() {
Ok(s) => {

25
src/uu/basenc/Cargo.toml Normal file
View file

@ -0,0 +1,25 @@
[package]
name = "uu_basenc"
version = "0.0.7"
authors = ["uutils developers"]
license = "MIT"
description = "basenc ~ (uutils) decode/encode input"
homepage = "https://github.com/uutils/coreutils"
repository = "https://github.com/uutils/coreutils/tree/master/src/uu/basenc"
keywords = ["coreutils", "uutils", "cross-platform", "cli", "utility"]
categories = ["command-line-utilities"]
edition = "2018"
[lib]
path = "src/basenc.rs"
[dependencies]
clap = { version = "2.33", features = ["wrap_help"] }
uucore = { version=">=0.0.9", package="uucore", path="../../uucore", features = ["encoding"] }
uucore_procs = { version=">=0.0.6", package="uucore_procs", path="../../uucore_procs" }
uu_base32 = { version=">=0.0.6", package="uu_base32", path="../base32"}
[[bin]]
name = "basenc"
path = "src/main.rs"

View file

@ -0,0 +1,95 @@
// This file is part of the uutils coreutils package.
//
// (c) Jordy Dickinson <jordy.dickinson@gmail.com>
// (c) Jian Zeng <anonymousknight96@gmail.com>
//
// For the full copyright and license information, please view the LICENSE file
// that was distributed with this source code.
//spell-checker:ignore (args) lsbf msbf
#[macro_use]
extern crate uucore;
use clap::{crate_version, App, Arg};
use uu_base32::base_common::{self, Config};
use uucore::{encoding::Format, InvalidEncodingHandling};
use std::io::{stdin, Read};
static ABOUT: &str = "
With no FILE, or when FILE is -, read standard input.
When decoding, the input may contain newlines in addition to the bytes of
the formal alphabet. Use --ignore-garbage to attempt to recover
from any other non-alphabet bytes in the encoded stream.
";
static BASE_CMD_PARSE_ERROR: i32 = 1;
const ENCODINGS: &[(&str, Format)] = &[
("base64", Format::Base64),
("base64url", Format::Base64Url),
("base32", Format::Base32),
("base32hex", Format::Base32Hex),
("base16", Format::Base16),
("base2lsbf", Format::Base2Lsbf),
("base2msbf", Format::Base2Msbf),
("z85", Format::Z85),
// common abbreviations. TODO: once we have clap 3.0 we can use `AppSettings::InferLongArgs` to get all abbreviations automatically
("base2l", Format::Base2Lsbf),
("base2m", Format::Base2Msbf),
];
fn get_usage() -> String {
format!("{0} [OPTION]... [FILE]", executable!())
}
pub fn uu_app() -> App<'static, 'static> {
let mut app = base_common::base_app(executable!(), crate_version!(), ABOUT);
for encoding in ENCODINGS {
app = app.arg(Arg::with_name(encoding.0).long(encoding.0));
}
app
}
fn parse_cmd_args(args: impl uucore::Args) -> (Config, Format) {
let usage = get_usage();
let matches = uu_app().usage(&usage[..]).get_matches_from(
args.collect_str(InvalidEncodingHandling::ConvertLossy)
.accept_any(),
);
let format = ENCODINGS
.iter()
.find(|encoding| matches.is_present(encoding.0))
.unwrap_or_else(|| {
show_usage_error!("missing encoding type");
std::process::exit(1)
})
.1;
(
Config::from("basenc", &matches).unwrap_or_else(|s| crash!(BASE_CMD_PARSE_ERROR, "{}", s)),
format,
)
}
pub fn uumain(args: impl uucore::Args) -> i32 {
let name = executable!();
let (config, format) = parse_cmd_args(args);
// Create a reference to stdin so we can return a locked stdin from
// parse_base_cmd_args
let stdin_raw = stdin();
let mut input: Box<dyn Read> = base_common::get_input(&config, &stdin_raw);
base_common::handle_input(
&mut input,
format,
config.wrap_cols,
config.ignore_garbage,
config.decode,
name,
);
0
}

View file

@ -0,0 +1 @@
uucore_procs::main!(uu_basenc);

View file

@ -27,7 +27,9 @@ nix = { version="<= 0.13", optional=true }
platform-info = { version="<= 0.1", optional=true }
time = { version="<= 0.1.43", optional=true }
# * "problem" dependencies (pinned)
data-encoding = { version="~2.1", optional=true } ## data-encoding: require v2.1; but v2.2.0 breaks the build for MinSRV v1.31.0
data-encoding = { version="2.1", optional=true }
data-encoding-macro = { version="0.1.12", optional=true }
z85 = { version="3.0.3", optional=true }
libc = { version="0.2.15, <= 0.2.85", optional=true } ## libc: initial utmp support added in v0.2.15; but v0.2.68 breaks the build for MinSRV v1.31.0
[dev-dependencies]
@ -43,7 +45,7 @@ termion = "1.5"
[features]
default = []
# * non-default features
encoding = ["data-encoding", "thiserror"]
encoding = ["data-encoding", "data-encoding-macro", "z85", "thiserror"]
entries = ["libc"]
fs = ["libc"]
fsext = ["libc", "time"]

View file

@ -5,45 +5,95 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (strings) ABCDEFGHIJKLMNOPQRSTUVWXYZ
// spell-checker:ignore (strings) ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUV
// spell-checker:ignore (encodings) lsbf msbf hexupper
extern crate data_encoding;
use self::data_encoding::{DecodeError, BASE32, BASE64};
use data_encoding::{self, BASE32, BASE64};
use std::io::{self, Read, Write};
use data_encoding::{Encoding, BASE32HEX, BASE64URL, HEXUPPER};
use data_encoding_macro::new_encoding;
#[cfg(feature = "thiserror")]
use thiserror::Error;
#[derive(Debug, Error)]
pub enum EncodingError {
pub enum DecodeError {
#[error("{}", _0)]
Decode(#[from] DecodeError),
Decode(#[from] data_encoding::DecodeError),
#[error("{}", _0)]
DecodeZ85(#[from] z85::DecodeError),
#[error("{}", _0)]
Io(#[from] io::Error),
}
pub type DecodeResult = Result<Vec<u8>, EncodingError>;
pub enum EncodeError {
Z85InputLenNotMultipleOf4,
}
pub type DecodeResult = Result<Vec<u8>, DecodeError>;
#[derive(Clone, Copy)]
pub enum Format {
Base32,
Base64,
Base64Url,
Base32,
Base32Hex,
Base16,
Base2Lsbf,
Base2Msbf,
Z85,
}
use self::Format::*;
pub fn encode(f: Format, input: &[u8]) -> String {
match f {
const BASE2LSBF: Encoding = new_encoding! {
symbols: "01",
bit_order: LeastSignificantFirst,
};
const BASE2MSBF: Encoding = new_encoding! {
symbols: "01",
bit_order: MostSignificantFirst,
};
pub fn encode(f: Format, input: &[u8]) -> Result<String, EncodeError> {
Ok(match f {
Base32 => BASE32.encode(input),
Base64 => BASE64.encode(input),
}
Base64Url => BASE64URL.encode(input),
Base32Hex => BASE32HEX.encode(input),
Base16 => HEXUPPER.encode(input),
Base2Lsbf => BASE2LSBF.encode(input),
Base2Msbf => BASE2MSBF.encode(input),
Z85 => {
// According to the spec we should not accept inputs whose len is not a multiple of 4.
// However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them.
if input.len() % 4 != 0 {
return Err(EncodeError::Z85InputLenNotMultipleOf4);
} else {
z85::encode(input)
}
}
})
}
pub fn decode(f: Format, input: &[u8]) -> DecodeResult {
Ok(match f {
Base32 => BASE32.decode(input)?,
Base64 => BASE64.decode(input)?,
Base64Url => BASE64URL.decode(input)?,
Base32Hex => BASE32HEX.decode(input)?,
Base16 => HEXUPPER.decode(input)?,
Base2Lsbf => BASE2LSBF.decode(input)?,
Base2Msbf => BASE2MSBF.decode(input)?,
Z85 => {
// The z85 crate implements a padded encoding by using a leading '#' which is otherwise not allowed.
// We manually check for a leading '#' and return an error ourselves.
if input.starts_with(&[b'#']) {
return Err(z85::DecodeError::InvalidByte(0, b'#').into());
} else {
z85::decode(input)?
}
}
})
}
@ -65,6 +115,12 @@ impl<R: Read> Data<R> {
alphabet: match format {
Base32 => b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=",
Base64 => b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=+/",
Base64Url => b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=_-",
Base32Hex => b"0123456789ABCDEFGHIJKLMNOPQRSTUV=",
Base16 => b"0123456789ABCDEF",
Base2Lsbf => b"01",
Base2Msbf => b"01",
Z85 => b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#",
},
}
}
@ -90,7 +146,7 @@ impl<R: Read> Data<R> {
decode(self.format, &buf)
}
pub fn encode(&mut self) -> String {
pub fn encode(&mut self) -> Result<String, EncodeError> {
let mut buf: Vec<u8> = vec![];
self.input.read_to_end(&mut buf).unwrap();
encode(self.format, buf.as_slice())

View file

@ -0,0 +1,16 @@
use crate::common::util::*;
#[test]
fn test_z85_not_padded() {
// The z85 crate deviates from the standard in some cases; we have to catch those
new_ucmd!()
.args(&["--z85", "-d"])
.pipe_in("##########")
.fails()
.stderr_only("basenc: error: invalid input");
new_ucmd!()
.args(&["--z85"])
.pipe_in("123")
.fails()
.stderr_only("basenc: error: invalid input (length must be multiple of 4 characters)");
}

View file

@ -123,6 +123,14 @@ test -f "${BUILDDIR}/getlimits" || cp src/getlimits "${BUILDDIR}"
# When decoding an invalid base32/64 string, gnu writes everything it was able to decode until
# it hit the decode error, while we don't write anything if the input is invalid.
sed -i "s/\(baddecode.*OUT=>\"\).*\"/\1\"/g" tests/misc/base64.pl
sed -i "s/\(\(b2[ml]_[69]\|b32h_[56]\|z85_8\|z85_35\).*OUT=>\)[^}]*\(.*\)/\1\"\"\3/g" tests/misc/basenc.pl
# add "error: " to the expected error message
sed -i "s/\$prog: invalid input/\$prog: error: invalid input/g" tests/misc/basenc.pl
# basenc: swap out error message for unexpected arg
sed -i "s/ {ERR=>\"\$prog: foobar\\\\n\" \. \$try_help }/ {ERR=>\"error: Found argument '--foobar' which wasn't expected, or isn't valid in this context\n\nUSAGE:\n basenc [OPTION]... [FILE]\n\nFor more information try --help\n\"}]/" tests/misc/basenc.pl
sed -i "s/ {ERR_SUBST=>\"s\/(unrecognized|unknown) option \[-' \]\*foobar\[' \]\*\/foobar\/\"}],//" tests/misc/basenc.pl
# Remove the check whether a util was built. Otherwise tests against utils like "arch" are not run.
sed -i "s|require_built_ |# require_built_ |g" init.cfg