Merge pull request #2902 from jtracey/join-non-unicode-sep

join: add support for non-unicode field separators
This commit is contained in:
Terts Diepraam 2022-01-31 21:54:56 +01:00 committed by GitHub
commit 7fc82cd376
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 47 additions and 3 deletions

View file

@ -14,6 +14,8 @@ use clap::{crate_version, App, AppSettings, Arg};
use std::cmp::Ordering; use std::cmp::Ordering;
use std::fs::File; use std::fs::File;
use std::io::{stdin, stdout, BufRead, BufReader, Split, Stdin, Write}; use std::io::{stdin, stdout, BufRead, BufReader, Split, Stdin, Write};
#[cfg(unix)]
use std::os::unix::ffi::OsStrExt;
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::error::{set_exit_code, UResult, USimpleError}; use uucore::error::{set_exit_code, UResult, USimpleError};
@ -532,8 +534,19 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
settings.key1 = get_field_number(keys, key1)?; settings.key1 = get_field_number(keys, key1)?;
settings.key2 = get_field_number(keys, key2)?; settings.key2 = get_field_number(keys, key2)?;
if let Some(value_str) = matches.value_of("t") { if let Some(value_os) = matches.value_of_os("t") {
let value = value_str.as_bytes(); #[cfg(unix)]
let value = value_os.as_bytes();
#[cfg(not(unix))]
let value = match value_os.to_str() {
Some(value) => value.as_bytes(),
None => {
return Err(USimpleError::new(
1,
"unprintable field separators are only supported on unix-like platforms",
))
}
};
settings.separator = match value.len() { settings.separator = match value.len() {
0 => Sep::Line, 0 => Sep::Line,
1 => Sep::Char(value[0]), 1 => Sep::Char(value[0]),
@ -541,7 +554,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
_ => { _ => {
return Err(USimpleError::new( return Err(USimpleError::new(
1, 1,
format!("multi-character tab {}", value_str), format!("multi-character tab {}", value_os.to_string_lossy()),
)) ))
} }
}; };
@ -655,6 +668,7 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2",
.short('t') .short('t')
.takes_value(true) .takes_value(true)
.value_name("CHAR") .value_name("CHAR")
.allow_invalid_utf8(true)
.help("use CHAR as input and output field separator"), .help("use CHAR as input and output field separator"),
) )
.arg( .arg(

View file

@ -1,6 +1,10 @@
// spell-checker:ignore (words) autoformat // spell-checker:ignore (words) autoformat
use crate::common::util::*; use crate::common::util::*;
#[cfg(unix)]
use std::{ffi::OsStr, os::unix::ffi::OsStrExt};
#[cfg(windows)]
use std::{ffi::OsString, os::windows::ffi::OsStringExt};
#[test] #[test]
fn empty_files() { fn empty_files() {
@ -385,6 +389,32 @@ fn non_unicode() {
.arg("non-unicode_2.bin") .arg("non-unicode_2.bin")
.succeeds() .succeeds()
.stdout_only_fixture("non-unicode.expected"); .stdout_only_fixture("non-unicode.expected");
#[cfg(unix)]
{
let invalid_utf8: u8 = 167;
new_ucmd!()
.arg("-t")
.arg(OsStr::from_bytes(&[invalid_utf8]))
.arg("non-unicode_1.bin")
.arg("non-unicode_2.bin")
.succeeds()
.stdout_only_fixture("non-unicode_sep.expected");
}
#[cfg(windows)]
{
let invalid_utf16: OsString = OsStringExt::from_wide(&[0xD800]);
new_ucmd!()
.arg("-t")
.arg(&invalid_utf16)
.arg("non-unicode_1.bin")
.arg("non-unicode_2.bin")
.fails()
.stderr_is(
"join: unprintable field separators are only supported on unix-like platforms",
);
}
} }
#[test] #[test]

Binary file not shown.