From 1c8df122d70770b7d4f65a8c909492b9b3e3f376 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sat, 22 Jan 2022 21:01:16 -0500 Subject: [PATCH] dd: block/unblock on ebcdic/ascii conversions Update `dd` so that the conversion `conv=ascii` implies `conv=unblock` and, symmetrically, the conversion `conv=ebcdic` implies `conv=block`. --- src/uu/dd/src/parseargs.rs | 27 ++++++++++++++++++++++ src/uu/dd/src/parseargs/unit_tests.rs | 2 ++ tests/by-util/test_dd.rs | 33 +++++++++++++++++++++++++++ 3 files changed, 62 insertions(+) diff --git a/src/uu/dd/src/parseargs.rs b/src/uu/dd/src/parseargs.rs index 06cdeff25..fb3327822 100644 --- a/src/uu/dd/src/parseargs.rs +++ b/src/uu/dd/src/parseargs.rs @@ -444,6 +444,20 @@ pub fn parse_conv_flag_input(matches: &Matches) -> Result ‘ascii’ + // > + // > Convert EBCDIC to ASCII, using the conversion + // > table specified by POSIX. This provides a 1:1 + // > translation for all 256 bytes. This implies + // > ‘conv=unblock’; input is converted to ASCII + // > before trailing spaces are deleted. + // + // -- https://www.gnu.org/software/coreutils/manual/html_node/dd-invocation.html + if cbs.is_some() { + iconvflags.unblock = cbs; + } } } ConvFlag::FmtAtoE => { @@ -451,6 +465,19 @@ pub fn parse_conv_flag_input(matches: &Matches) -> Result ‘ebcdic’ + // > + // > Convert ASCII to EBCDIC. This is the inverse + // > of the ‘ascii’ conversion. This implies + // > ‘conv=block’; trailing spaces are added before + // > being converted to EBCDIC. + // + // -- https://www.gnu.org/software/coreutils/manual/html_node/dd-invocation.html + if cbs.is_some() { + iconvflags.block = cbs; + } } } ConvFlag::FmtAtoI => { diff --git a/src/uu/dd/src/parseargs/unit_tests.rs b/src/uu/dd/src/parseargs/unit_tests.rs index 3ee949805..c74439159 100644 --- a/src/uu/dd/src/parseargs/unit_tests.rs +++ b/src/uu/dd/src/parseargs/unit_tests.rs @@ -157,6 +157,7 @@ fn test_all_top_level_args_no_leading_dashes() { assert_eq!( IConvFlags { ctable: Some(&EBCDIC_TO_ASCII_LCASE_TO_UCASE), + unblock: Some(1), // because ascii implies unblock ..IConvFlags::default() }, parse_conv_flag_input(&matches).unwrap() @@ -241,6 +242,7 @@ fn test_all_top_level_args_with_leading_dashes() { assert_eq!( IConvFlags { ctable: Some(&EBCDIC_TO_ASCII_LCASE_TO_UCASE), + unblock: Some(1), // because ascii implies unblock ..IConvFlags::default() }, parse_conv_flag_input(&matches).unwrap() diff --git a/tests/by-util/test_dd.rs b/tests/by-util/test_dd.rs index dd4204e2e..43a59808a 100644 --- a/tests/by-util/test_dd.rs +++ b/tests/by-util/test_dd.rs @@ -559,5 +559,38 @@ fn test_unicode_filenames() { ); } +#[test] +fn test_conv_ascii_implies_unblock() { + // 0x40 = 0o100 = 64, which gets converted to ' ' + // 0xc1 = 0o301 = 193, which gets converted to 'A' + // + // `conv=ascii` implies `conv=unblock`, which means trailing paces + // are stripped and a newline is appended at the end of each + // block. + // + // `cbs=4` means use a conversion block size of 4 bytes per block. + new_ucmd!() + .args(&["conv=ascii", "cbs=4"]) + .pipe_in(b"\x40\xc1\x40\xc1\x40\xc1\x40\x40".to_vec()) + .succeeds() + .stdout_is(" A A\n A\n"); +} + +#[test] +fn test_conv_ebcdic_implies_block() { + // 0x40 = 0o100 = 64, which is the result of converting from ' ' + // 0xc1 = 0o301 = 193, which is the result of converting from 'A' + // + // `conv=ebcdic` implies `conv=block`, which means trailing spaces + // are added to pad each block. + // + // `cbs=4` means use a conversion block size of 4 bytes per block. + new_ucmd!() + .args(&["conv=ebcdic", "cbs=4"]) + .pipe_in(" A A\n A\n") + .succeeds() + .stdout_is_bytes(b"\x40\xc1\x40\xc1\x40\xc1\x40\x40"); +} + // conv=[ascii,ebcdic,ibm], conv=[ucase,lcase], conv=[block,unblock], conv=sync // TODO: Move conv tests from unit test module