Merge pull request #5252 from zhitkoff/split-gnu-test-fail.sh

split: implement remaining -n variants and pass GNU tests/split/fail.sh
2024-12-14 15:22:38 +00:00 · 2023-09-07 17:15:09 +02:00 · 2023-09-07 17:15:09 +02:00 · 80f8eb6724
commit 80f8eb6724
parent 9d8656c0fa 3f065eed8a
3 changed files with 947 additions and 59 deletions
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@ -13,14 +13,16 @@ use crate::filenames::FilenameIterator;
 use crate::filenames::SuffixType;
 use clap::{crate_version, parser::ValueSource, Arg, ArgAction, ArgMatches, Command};
 use std::env;
+use std::ffi::OsString;
 use std::fmt;
 use std::fs::{metadata, File};
 use std::io;
 use std::io::{stdin, BufRead, BufReader, BufWriter, ErrorKind, Read, Write};
 use std::path::Path;
+use std::u64;
 use uucore::display::Quotable;
 use uucore::error::{FromIo, UIoError, UResult, USimpleError, UUsageError};
-use uucore::parse_size::{parse_size, ParseSizeError};
+use uucore::parse_size::{parse_size, parse_size_max, ParseSizeError};
 use uucore::uio_error;
 use uucore::{format_usage, help_about, help_section, help_usage};

@ -52,14 +54,175 @@ const AFTER_HELP: &str = help_section!("after help", "split.md");

 #[uucore::main]
 pub fn uumain(args: impl uucore::Args) -> UResult<()> {
+    let (args, obs_lines) = handle_obsolete(args);
+
    let matches = uu_app().try_get_matches_from(args)?;
-    match Settings::from(&matches) {
+
+    match Settings::from(&matches, &obs_lines) {
        Ok(settings) => split(&settings),
        Err(e) if e.requires_usage() => Err(UUsageError::new(1, format!("{e}"))),
        Err(e) => Err(USimpleError::new(1, format!("{e}"))),
    }
 }

+/// Extract obsolete shorthand (if any) for specifying lines in following scenarios (and similar)
+/// `split -22 file` would mean `split -l 22 file`
+/// `split -2de file` would mean `split -l 2 -d -e file`
+/// `split -x300e file` would mean `split -x -l 300 -e file`
+/// `split -x300e -22 file` would mean `split -x -e -l 22 file` (last obsolete lines option wins)
+/// following GNU `split` behavior
+fn handle_obsolete(args: impl uucore::Args) -> (Vec<OsString>, Option<String>) {
+    let mut obs_lines = None;
+    let mut preceding_long_opt_req_value = false;
+    let mut preceding_short_opt_req_value = false;
+
+    let filtered_args = args
+        .filter_map(|os_slice| {
+            filter_args(
+                os_slice,
+                &mut obs_lines,
+                &mut preceding_long_opt_req_value,
+                &mut preceding_short_opt_req_value,
+            )
+        })
+        .collect();
+
+    (filtered_args, obs_lines)
+}
+
+/// Helper function to [`handle_obsolete`]
+/// Filters out obsolete lines option from args
+fn filter_args(
+    os_slice: OsString,
+    obs_lines: &mut Option<String>,
+    preceding_long_opt_req_value: &mut bool,
+    preceding_short_opt_req_value: &mut bool,
+) -> Option<OsString> {
+    let filter: Option<OsString>;
+    if let Some(slice) = os_slice.to_str() {
+        if should_extract_obs_lines(
+            slice,
+            preceding_long_opt_req_value,
+            preceding_short_opt_req_value,
+        ) {
+            // start of the short option string
+            // that can have obsolete lines option value in it
+            filter = handle_extract_obs_lines(slice, obs_lines);
+        } else {
+            // either not a short option
+            // or a short option that cannot have obsolete lines value in it
+            filter = Some(OsString::from(slice));
+        }
+        handle_preceding_options(
+            slice,
+            preceding_long_opt_req_value,
+            preceding_short_opt_req_value,
+        );
+    } else {
+        // Cannot cleanly convert os_slice to UTF-8
+        // Do not process and return as-is
+        // This will cause failure later on, but we should not handle it here
+        // and let clap panic on invalid UTF-8 argument
+        filter = Some(os_slice);
+    }
+    filter
+}
+
+/// Helper function to [`filter_args`]
+/// Checks if the slice is a true short option (and not hyphen prefixed value of an option)
+/// and if so, a short option that can contain obsolete lines value
+fn should_extract_obs_lines(
+    slice: &str,
+    preceding_long_opt_req_value: &bool,
+    preceding_short_opt_req_value: &bool,
+) -> bool {
+    slice.starts_with('-')
+        && !slice.starts_with("--")
+        && !preceding_long_opt_req_value
+        && !preceding_short_opt_req_value
+        && !slice.starts_with("-a")
+        && !slice.starts_with("-b")
+        && !slice.starts_with("-C")
+        && !slice.starts_with("-l")
+        && !slice.starts_with("-n")
+}
+
+/// Helper function to [`filter_args`]
+/// Extracts obsolete lines numeric part from argument slice
+/// and filters it out
+fn handle_extract_obs_lines(slice: &str, obs_lines: &mut Option<String>) -> Option<OsString> {
+    let mut obs_lines_extracted: Vec<char> = vec![];
+    let mut obs_lines_end_reached = false;
+    let filtered_slice: Vec<char> = slice
+        .chars()
+        .filter(|c| {
+            // To correctly process scenario like '-x200a4'
+            // we need to stop extracting digits once alphabetic character is encountered
+            // after we already have something in obs_lines_extracted
+            if c.is_ascii_digit() && !obs_lines_end_reached {
+                obs_lines_extracted.push(*c);
+                false
+            } else {
+                if !obs_lines_extracted.is_empty() {
+                    obs_lines_end_reached = true;
+                }
+                true
+            }
+        })
+        .collect();
+
+    if obs_lines_extracted.is_empty() {
+        // no obsolete lines value found/extracted
+        Some(OsString::from(slice))
+    } else {
+        // obsolete lines value was extracted
+        let extracted: String = obs_lines_extracted.iter().collect();
+        *obs_lines = Some(extracted);
+        if filtered_slice.get(1).is_some() {
+            // there were some short options in front of or after obsolete lines value
+            // i.e. '-xd100' or '-100de' or similar, which after extraction of obsolete lines value
+            // would look like '-xd' or '-de' or similar
+            let filtered_slice: String = filtered_slice.iter().collect();
+            Some(OsString::from(filtered_slice))
+        } else {
+            None
+        }
+    }
+}
+
+/// Helper function to [`handle_extract_obs_lines`]
+/// Captures if current slice is a preceding option
+/// that requires value
+fn handle_preceding_options(
+    slice: &str,
+    preceding_long_opt_req_value: &mut bool,
+    preceding_short_opt_req_value: &mut bool,
+) {
+    // capture if current slice is a preceding long option that requires value and does not use '=' to assign that value
+    // following slice should be treaded as value for this option
+    // even if it starts with '-' (which would be treated as hyphen prefixed value)
+    if slice.starts_with("--") {
+        *preceding_long_opt_req_value = &slice[2..] == OPT_BYTES
+            || &slice[2..] == OPT_LINE_BYTES
+            || &slice[2..] == OPT_LINES
+            || &slice[2..] == OPT_ADDITIONAL_SUFFIX
+            || &slice[2..] == OPT_FILTER
+            || &slice[2..] == OPT_NUMBER
+            || &slice[2..] == OPT_SUFFIX_LENGTH;
+    }
+    // capture if current slice is a preceding short option that requires value and does not have value in the same slice (value separated by whitespace)
+    // following slice should be treaded as value for this option
+    // even if it starts with '-' (which would be treated as hyphen prefixed value)
+    *preceding_short_opt_req_value =
+        slice == "-b" || slice == "-C" || slice == "-l" || slice == "-n" || slice == "-a";
+    // slice is a value
+    // reset preceding option flags
+    if !slice.starts_with('-') {
+        *preceding_short_opt_req_value = false;
+        *preceding_long_opt_req_value = false;
+    }
+}
+
 pub fn uu_app() -> Command {
    Command::new(uucore::util_name())
        .version(crate_version!())
@ -72,6 +235,7 @@ pub fn uu_app() -> Command {
            Arg::new(OPT_BYTES)
                .short('b')
                .long(OPT_BYTES)
+                .allow_hyphen_values(true)
                .value_name("SIZE")
                .help("put SIZE bytes per output file"),
        )
@ -79,14 +243,15 @@ pub fn uu_app() -> Command {
            Arg::new(OPT_LINE_BYTES)
                .short('C')
                .long(OPT_LINE_BYTES)
+                .allow_hyphen_values(true)
                .value_name("SIZE")
-                .default_value("2")
                .help("put at most SIZE bytes of lines per output file"),
        )
        .arg(
            Arg::new(OPT_LINES)
                .short('l')
                .long(OPT_LINES)
+                .allow_hyphen_values(true)
                .value_name("NUMBER")
                .default_value("1000")
                .help("put NUMBER lines/records per output file"),
@ -95,6 +260,7 @@ pub fn uu_app() -> Command {
            Arg::new(OPT_NUMBER)
                .short('n')
                .long(OPT_NUMBER)
+                .allow_hyphen_values(true)
                .value_name("CHUNKS")
                .help("generate CHUNKS output files; see explanation below"),
        )
@ -102,6 +268,7 @@ pub fn uu_app() -> Command {
        .arg(
            Arg::new(OPT_ADDITIONAL_SUFFIX)
                .long(OPT_ADDITIONAL_SUFFIX)
+                .allow_hyphen_values(true)
                .value_name("SUFFIX")
                .default_value("")
                .help("additional SUFFIX to append to output file names"),
@ -109,6 +276,7 @@ pub fn uu_app() -> Command {
        .arg(
            Arg::new(OPT_FILTER)
                .long(OPT_FILTER)
+                .allow_hyphen_values(true)
                .value_name("COMMAND")
                .value_hint(clap::ValueHint::CommandName)
                .help(
@ -178,9 +346,10 @@ pub fn uu_app() -> Command {
            Arg::new(OPT_SUFFIX_LENGTH)
                .short('a')
                .long(OPT_SUFFIX_LENGTH)
+                .allow_hyphen_values(true)
                .value_name("N")
                .default_value(OPT_DEFAULT_SUFFIX_LENGTH)
-                .help("use suffixes of fixed length N. 0 implies dynamic length."),
+                .help("use suffixes of fixed length N. 0 implies dynamic length, starting with 2"),
        )
        .arg(
            Arg::new(OPT_VERBOSE)
@ -217,6 +386,10 @@ enum NumberType {
    /// Split into a specific number of chunks by byte.
    Bytes(u64),

+    /// Split into a specific number of chunks by byte
+    /// but output only the *k*th chunk.
+    KthBytes(u64, u64),
+
    /// Split into a specific number of chunks by line (approximately).
    Lines(u64),

@ -237,6 +410,7 @@ impl NumberType {
    fn num_chunks(&self) -> u64 {
        match self {
            Self::Bytes(n) => *n,
+            Self::KthBytes(_, n) => *n,
            Self::Lines(n) => *n,
            Self::KthLines(_, n) => *n,
            Self::RoundRobin(n) => *n,
@ -255,6 +429,7 @@ enum NumberTypeError {
    ///
    /// ```ignore
    /// -n N
+    /// -n K/N
    /// -n l/N
    /// -n l/K/N
    /// -n r/N
@ -265,9 +440,12 @@ enum NumberTypeError {
    /// The chunk number was invalid.
    ///
    /// This can happen if the value of `K` in any of the following
-    /// command-line options is not a positive integer:
+    /// command-line options is not a positive integer
+    /// or if `K` is 0
+    /// or if `K` is greater than `N`:
    ///
    /// ```ignore
+    /// -n K/N
    /// -n l/K/N
    /// -n r/K/N
    /// ```
@ -281,6 +459,7 @@ impl NumberType {
    ///
    /// ```ignore
    /// "N"
+    /// "K/N"
    /// "l/N"
    /// "l/K/N"
    /// "r/N"
@ -292,15 +471,20 @@ impl NumberType {
    ///
    /// # Errors
    ///
-    /// If the string is not one of the valid number types, if `K` is
-    /// not a nonnegative integer, or if `N` is not a positive
-    /// integer, then this function returns [`NumberTypeError`].
+    /// If the string is not one of the valid number types,
+    /// if `K` is not a nonnegative integer,
+    /// or if `K` is 0,
+    /// or if `N` is not a positive integer,
+    /// or if `K` is greater than `N`
+    /// then this function returns [`NumberTypeError`].
    fn from(s: &str) -> Result<Self, NumberTypeError> {
+        fn is_invalid_chunk(chunk_number: u64, num_chunks: u64) -> bool {
+            chunk_number > num_chunks || chunk_number == 0
+        }
        let parts: Vec<&str> = s.split('/').collect();
        match &parts[..] {
            [n_str] => {
-                let num_chunks = n_str
-                    .parse()
+                let num_chunks = parse_size(n_str)
                    .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
                if num_chunks > 0 {
                    Ok(Self::Bytes(num_chunks))
@ -308,34 +492,44 @@ impl NumberType {
                    Err(NumberTypeError::NumberOfChunks(s.to_string()))
                }
            }
+            [k_str, n_str] if !k_str.starts_with('l') && !k_str.starts_with('r') => {
+                let num_chunks = parse_size(n_str)
+                    .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
+                let chunk_number = parse_size(k_str)
+                    .map_err(|_| NumberTypeError::ChunkNumber(k_str.to_string()))?;
+                if is_invalid_chunk(chunk_number, num_chunks) {
+                    return Err(NumberTypeError::ChunkNumber(k_str.to_string()));
+                }
+                Ok(Self::KthBytes(chunk_number, num_chunks))
+            }
            ["l", n_str] => {
-                let num_chunks = n_str
-                    .parse()
+                let num_chunks = parse_size(n_str)
                    .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
                Ok(Self::Lines(num_chunks))
            }
            ["l", k_str, n_str] => {
-                let num_chunks = n_str
-                    .parse()
+                let num_chunks = parse_size(n_str)
                    .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
-                let chunk_number = k_str
-                    .parse()
+                let chunk_number = parse_size(k_str)
                    .map_err(|_| NumberTypeError::ChunkNumber(k_str.to_string()))?;
+                if is_invalid_chunk(chunk_number, num_chunks) {
+                    return Err(NumberTypeError::ChunkNumber(k_str.to_string()));
+                }
                Ok(Self::KthLines(chunk_number, num_chunks))
            }
            ["r", n_str] => {
-                let num_chunks = n_str
-                    .parse()
+                let num_chunks = parse_size(n_str)
                    .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
                Ok(Self::RoundRobin(num_chunks))
            }
            ["r", k_str, n_str] => {
-                let num_chunks = n_str
-                    .parse()
+                let num_chunks = parse_size(n_str)
                    .map_err(|_| NumberTypeError::NumberOfChunks(n_str.to_string()))?;
-                let chunk_number = k_str
-                    .parse()
+                let chunk_number = parse_size(k_str)
                    .map_err(|_| NumberTypeError::ChunkNumber(k_str.to_string()))?;
+                if is_invalid_chunk(chunk_number, num_chunks) {
+                    return Err(NumberTypeError::ChunkNumber(k_str.to_string()));
+                }
                Ok(Self::KthRoundRobin(chunk_number, num_chunks))
            }
            _ => Err(NumberTypeError::NumberOfChunks(s.to_string())),
@ -395,7 +589,7 @@ impl fmt::Display for StrategyError {

 impl Strategy {
    /// Parse a strategy from the command-line arguments.
-    fn from(matches: &ArgMatches) -> Result<Self, StrategyError> {
+    fn from(matches: &ArgMatches, obs_lines: &Option<String>) -> Result<Self, StrategyError> {
        fn get_and_parse(
            matches: &ArgMatches,
            option: &str,
@ -403,7 +597,7 @@ impl Strategy {
            error: fn(ParseSizeError) -> StrategyError,
        ) -> Result<Strategy, StrategyError> {
            let s = matches.get_one::<String>(option).unwrap();
-            let n = parse_size(s).map_err(error)?;
+            let n = parse_size_max(s).map_err(error)?;
            if n > 0 {
                Ok(strategy(n))
            } else {
@ -413,28 +607,40 @@ impl Strategy {
        // Check that the user is not specifying more than one strategy.
        //
        // Note: right now, this exact behavior cannot be handled by
-        // `ArgGroup` since `ArgGroup` considers a default value `Arg`
-        // as "defined".
+        // overrides_with_all() due to obsolete lines value option
        match (
+            obs_lines,
            matches.value_source(OPT_LINES) == Some(ValueSource::CommandLine),
            matches.value_source(OPT_BYTES) == Some(ValueSource::CommandLine),
            matches.value_source(OPT_LINE_BYTES) == Some(ValueSource::CommandLine),
            matches.value_source(OPT_NUMBER) == Some(ValueSource::CommandLine),
        ) {
-            (false, false, false, false) => Ok(Self::Lines(1000)),
-            (true, false, false, false) => {
+            (Some(v), false, false, false, false) => {
+                let v = parse_size_max(v).map_err(|_| {
+                    StrategyError::Lines(ParseSizeError::ParseFailure(v.to_string()))
+                })?;
+                if v > 0 {
+                    Ok(Self::Lines(v))
+                } else {
+                    Err(StrategyError::Lines(ParseSizeError::ParseFailure(
+                        v.to_string(),
+                    )))
+                }
+            }
+            (None, false, false, false, false) => Ok(Self::Lines(1000)),
+            (None, true, false, false, false) => {
                get_and_parse(matches, OPT_LINES, Self::Lines, StrategyError::Lines)
            }
-            (false, true, false, false) => {
+            (None, false, true, false, false) => {
                get_and_parse(matches, OPT_BYTES, Self::Bytes, StrategyError::Bytes)
            }
-            (false, false, true, false) => get_and_parse(
+            (None, false, false, true, false) => get_and_parse(
                matches,
                OPT_LINE_BYTES,
                Self::LineBytes,
                StrategyError::Bytes,
            ),
-            (false, false, false, true) => {
+            (None, false, false, false, true) => {
                let s = matches.get_one::<String>(OPT_NUMBER).unwrap();
                let number_type = NumberType::from(s).map_err(StrategyError::NumberType)?;
                Ok(Self::Number(number_type))
@ -553,7 +759,7 @@ impl fmt::Display for SettingsError {

 impl Settings {
    /// Parse a strategy from the command-line arguments.
-    fn from(matches: &ArgMatches) -> Result<Self, SettingsError> {
+    fn from(matches: &ArgMatches, obs_lines: &Option<String>) -> Result<Self, SettingsError> {
        let additional_suffix = matches
            .get_one::<String>(OPT_ADDITIONAL_SUFFIX)
            .unwrap()
@ -561,7 +767,7 @@ impl Settings {
        if additional_suffix.contains('/') {
            return Err(SettingsError::SuffixContainsSeparator(additional_suffix));
        }
-        let strategy = Strategy::from(matches).map_err(SettingsError::Strategy)?;
+        let strategy = Strategy::from(matches, obs_lines).map_err(SettingsError::Strategy)?;
        let (suffix_type, suffix_start) = suffix_type_from(matches)?;
        let suffix_length_str = matches.get_one::<String>(OPT_SUFFIX_LENGTH).unwrap();
        let suffix_length: usize = suffix_length_str
@ -1089,7 +1295,7 @@ where
    // If we would have written zero chunks of output, then terminate
    // immediately. This happens on `split -e -n 3 /dev/null`, for
    // example.
-    if num_chunks == 0 {
+    if num_chunks == 0 || num_bytes == 0 {
        return Ok(());
    }

@ -1144,6 +1350,93 @@ where
    }
 }

+/// Print the k-th chunk of a file to stdout, splitting by byte.
+///
+/// This function is like [`split_into_n_chunks_by_byte`], but instead
+/// of writing each chunk to its own file, it only writes to stdout
+/// the contents of the chunk identified by `chunk_number`
+///
+/// # Errors
+///
+/// This function returns an error if there is a problem reading from
+/// `reader` or writing to stdout.
+fn kth_chunks_by_byte<R>(
+    settings: &Settings,
+    reader: &mut R,
+    chunk_number: u64,
+    num_chunks: u64,
+) -> UResult<()>
+where
+    R: BufRead,
+{
+    // Get the size of the input file in bytes and compute the number
+    // of bytes per chunk.
+    //
+    // If the requested number of chunks exceeds the number of bytes
+    // in the file - just write empty byte string to stdout
+    // NOTE: the `elide_empty_files` parameter is ignored here
+    // as we do not generate any files
+    // and instead writing to stdout
+    let metadata = metadata(&settings.input).map_err(|_| {
+        USimpleError::new(1, format!("{}: cannot determine file size", settings.input))
+    })?;
+
+    let num_bytes = metadata.len();
+    // If input file is empty and we would have written zero chunks of output,
+    // then terminate immediately.
+    // This happens on `split -e -n 3 /dev/null`, for example.
+    if num_bytes == 0 {
+        return Ok(());
+    }
+
+    // Write to stdout instead of to a file.
+    let stdout = std::io::stdout();
+    let mut writer = stdout.lock();
+
+    let chunk_size = (num_bytes / (num_chunks)).max(1);
+    let mut num_bytes: usize = num_bytes.try_into().unwrap();
+
+    let mut i = 1;
+    loop {
+        let buf: &mut Vec<u8> = &mut vec![];
+        if num_bytes > 0 {
+            // Read `chunk_size` bytes from the reader into `buf`
+            // except the last.
+            //
+            // The last chunk gets all remaining bytes so that if the number
+            // of bytes in the input file was not evenly divisible by
+            // `num_chunks`, we don't leave any bytes behind.
+            let limit = {
+                if i == num_chunks {
+                    num_bytes.try_into().unwrap()
+                } else {
+                    chunk_size
+                }
+            };
+            let n_bytes_read = reader.by_ref().take(limit).read_to_end(buf);
+            match n_bytes_read {
+                Ok(n_bytes) => {
+                    num_bytes -= n_bytes;
+                }
+                Err(error) => {
+                    return Err(USimpleError::new(
+                        1,
+                        format!("{}: cannot read from input : {}", settings.input, error),
+                    ));
+                }
+            }
+            if i == chunk_number {
+                writer.write_all(buf)?;
+                break;
+            }
+            i += 1;
+        } else {
+            break;
+        }
+    }
+    Ok(())
+}
+
 /// Split a file into a specific number of chunks by line.
 ///
 /// This function always creates one output file for each chunk, even
@ -1319,6 +1612,50 @@ where
    Ok(())
 }

+/// Print the k-th chunk of a file, splitting by line, but
+/// assign lines via round-robin to the specified number of output
+/// chunks, but output only the *k*th chunk.
+///
+/// This function is like [`kth_chunk_by_line`], as it only writes to stdout and
+/// prints out only *k*th chunk
+/// It is also like [`split_into_n_chunks_by_line_round_robin`], as it is assigning chunks
+/// using round robin distribution
+///
+/// # Errors
+///
+/// This function returns an error if there is a problem reading from
+/// `reader` or writing to one of the output files.
+///
+/// # See also
+///
+/// * [`split_into_n_chunks_by_line_round_robin`], which splits its input in the
+///   same way, but writes each chunk to its own file.
+fn kth_chunk_by_line_round_robin<R>(
+    _settings: &Settings,
+    reader: &mut R,
+    chunk_number: u64,
+    num_chunks: u64,
+) -> UResult<()>
+where
+    R: BufRead,
+{
+    // Write to stdout instead of to a file.
+    let stdout = std::io::stdout();
+    let mut writer = stdout.lock();
+
+    let num_chunks: usize = num_chunks.try_into().unwrap();
+    let chunk_number: usize = chunk_number.try_into().unwrap();
+    for (i, line_result) in reader.lines().enumerate() {
+        let line = line_result?;
+        let bytes = line.as_bytes();
+        if (i % num_chunks) == chunk_number {
+            writer.write_all(bytes)?;
+            writer.write_all(b"\n")?;
+        }
+    }
+    Ok(())
+}
+
 fn split(settings: &Settings) -> UResult<()> {
    let mut reader = BufReader::new(if settings.input == "-" {
        Box::new(stdin()) as Box<dyn Read>
@ -1336,6 +1673,9 @@ fn split(settings: &Settings) -> UResult<()> {
        Strategy::Number(NumberType::Bytes(num_chunks)) => {
            split_into_n_chunks_by_byte(settings, &mut reader, num_chunks)
        }
+        Strategy::Number(NumberType::KthBytes(chunk_number, num_chunks)) => {
+            kth_chunks_by_byte(settings, &mut reader, chunk_number, num_chunks)
+        }
        Strategy::Number(NumberType::Lines(num_chunks)) => {
            split_into_n_chunks_by_line(settings, &mut reader, num_chunks)
        }
@ -1348,7 +1688,12 @@ fn split(settings: &Settings) -> UResult<()> {
        Strategy::Number(NumberType::RoundRobin(num_chunks)) => {
            split_into_n_chunks_by_line_round_robin(settings, &mut reader, num_chunks)
        }
-        Strategy::Number(_) => Err(USimpleError::new(1, "-n mode not yet fully implemented")),
+        Strategy::Number(NumberType::KthRoundRobin(chunk_number, num_chunks)) => {
+            // The chunk number is given as a 1-indexed number, but it
+            // is a little easier to deal with a 0-indexed number.
+            let chunk_number = chunk_number - 1;
+            kth_chunk_by_line_round_robin(settings, &mut reader, chunk_number, num_chunks)
+        }
        Strategy::Lines(chunk_size) => {
            let mut writer = LineChunkWriter::new(chunk_size, settings)?;
            match std::io::copy(&mut reader, &mut writer) {
@ -1451,6 +1796,18 @@ mod tests {
            NumberType::from("l/abc/456").unwrap_err(),
            NumberTypeError::ChunkNumber("abc".to_string())
        );
+        assert_eq!(
+            NumberType::from("l/456/123").unwrap_err(),
+            NumberTypeError::ChunkNumber("456".to_string())
+        );
+        assert_eq!(
+            NumberType::from("r/456/123").unwrap_err(),
+            NumberTypeError::ChunkNumber("456".to_string())
+        );
+        assert_eq!(
+            NumberType::from("456/123").unwrap_err(),
+            NumberTypeError::ChunkNumber("456".to_string())
+        );
        // In GNU split, the number of chunks get precedence:
        //
        //     $ split -n l/abc/xyz
@ -1486,6 +1843,7 @@ mod tests {
    #[test]
    fn test_number_type_num_chunks() {
        assert_eq!(NumberType::from("123").unwrap().num_chunks(), 123);
+        assert_eq!(NumberType::from("123/456").unwrap().num_chunks(), 456);
        assert_eq!(NumberType::from("l/123").unwrap().num_chunks(), 123);
        assert_eq!(NumberType::from("l/123/456").unwrap().num_chunks(), 456);
        assert_eq!(NumberType::from("r/123").unwrap().num_chunks(), 123);
--- a/src/uucore/src/lib/parser/parse_size.rs
+++ b/src/uucore/src/lib/parser/parse_size.rs
@ -7,6 +7,7 @@

 use std::error::Error;
 use std::fmt;
+use std::num::IntErrorKind;

 use crate::display::Quotable;

@ -201,8 +202,10 @@ impl<'parser> Parser<'parser> {
        radix: u32,
        original_size: &str,
    ) -> Result<u64, ParseSizeError> {
-        u64::from_str_radix(numeric_string, radix)
-            .map_err(|_| ParseSizeError::ParseFailure(original_size.to_string()))
+        u64::from_str_radix(numeric_string, radix).map_err(|e| match e.kind() {
+            IntErrorKind::PosOverflow => ParseSizeError::size_too_big(original_size),
+            _ => ParseSizeError::ParseFailure(original_size.to_string()),
+        })
    }
 }

@ -232,6 +235,23 @@ pub fn parse_size(size: &str) -> Result<u64, ParseSizeError> {
    Parser::default().parse(size)
 }

+/// Same as `parse_size()`, except returns `u64::MAX` on overflow
+/// GNU lib/coreutils include similar functionality
+/// and GNU test suite checks this behavior for some utils
+pub fn parse_size_max(size: &str) -> Result<u64, ParseSizeError> {
+    let result = Parser::default().parse(size);
+    match result {
+        Ok(_) => result,
+        Err(error) => {
+            if let ParseSizeError::SizeTooBig(_) = error {
+                Ok(u64::MAX)
+            } else {
+                Err(error)
+            }
+        }
+    }
+}
+
 #[derive(Debug, PartialEq, Eq)]
 pub enum ParseSizeError {
    InvalidSuffix(String), // Suffix
@ -392,6 +412,14 @@ mod tests {
        );
    }

+    #[test]
+    #[cfg(not(target_pointer_width = "128"))]
+    fn overflow_to_max_x64() {
+        assert_eq!(Ok(u64::MAX), parse_size_max("18446744073709551616"));
+        assert_eq!(Ok(u64::MAX), parse_size_max("10000000000000000000000"));
+        assert_eq!(Ok(u64::MAX), parse_size_max("1Y"));
+    }
+
    #[test]
    fn invalid_suffix() {
        let test_strings = ["5mib", "1eb", "1H"];
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@ -2,7 +2,7 @@
 //
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
-// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen
+// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen ncccc

 use crate::common::util::{AtPath, TestScenario};
 use rand::{thread_rng, Rng, SeedableRng};
@ -170,6 +170,22 @@ fn test_split_str_prefixed_chunks_by_bytes() {
    assert_eq!(glob.collate(), at.read_bytes(name));
 }

+/// Test short bytes option concatenated with value
+#[test]
+fn test_split_by_bytes_short_concatenated_with_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "split_by_bytes_short_concatenated_with_value";
+    RandomFile::new(&at, name).add_bytes(10000);
+    ucmd.args(&["-b1000", name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 10);
+    for filename in glob.collect() {
+        assert_eq!(glob.directory.metadata(&filename).len(), 1000);
+    }
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
 // This is designed to test what happens when the desired part size is not a
 // multiple of the buffer size and we hopefully don't overshoot the desired part
 // size.
@ -238,6 +254,18 @@ fn test_additional_suffix_no_slash() {
        .usage_error("invalid suffix 'a/b', contains directory separator");
 }

+#[test]
+fn test_split_additional_suffix_hyphen_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "split_additional_suffix";
+    RandomFile::new(&at, name).add_lines(2000);
+    ucmd.args(&["--additional-suffix", "-300", name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]-300$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
 // note: the test_filter* tests below are unix-only
 // windows support has been waived for now because of the difficulty of getting
 // the `cmd` call right
@ -312,12 +340,292 @@ fn test_split_lines_number() {
        .succeeds()
        .no_stderr()
        .no_stdout();
+    scene
+        .ucmd()
+        .args(&["--lines", "0", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_only("split: invalid number of lines: 0\n");
+    scene
+        .ucmd()
+        .args(&["-0", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_only("split: invalid number of lines: 0\n");
    scene
        .ucmd()
        .args(&["--lines", "2fb", "file"])
        .fails()
        .code_is(1)
        .stderr_only("split: invalid number of lines: '2fb'\n");
+    scene
+        .ucmd()
+        .args(&["--lines", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_only("split: invalid number of lines: 'file'\n");
+}
+
+/// Test short lines option with value concatenated
+#[test]
+fn test_split_lines_short_concatenated_with_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "split_num_prefixed_chunks_by_lines";
+    RandomFile::new(&at, name).add_lines(10000);
+    ucmd.args(&["-l1000", name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 10);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
+/// Test for obsolete lines option standalone
+#[test]
+fn test_split_obs_lines_standalone() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "obs-lines-standalone";
+    RandomFile::new(&at, name).add_lines(4);
+    ucmd.args(&["-2", name]).succeeds().no_stderr().no_stdout();
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
+/// Test for obsolete lines option standalone overflow
+#[test]
+fn test_split_obs_lines_standalone_overflow() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "obs-lines-standalone";
+    RandomFile::new(&at, name).add_lines(4);
+    ucmd.args(&["-99999999999999999991", name])
+        .succeeds()
+        .no_stderr()
+        .no_stdout();
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 1);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
+/// Test for obsolete lines option as part of invalid combined short options
+#[test]
+fn test_split_obs_lines_within_invalid_combined_shorts() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    at.touch("file");
+
+    scene
+        .ucmd()
+        .args(&["-2fb", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("error: unexpected argument '-f' found\n");
+}
+
+/// Test for obsolete lines option as part of combined short options
+#[test]
+fn test_split_obs_lines_within_combined_shorts() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    let name = "obs-lines-within-shorts";
+    RandomFile::new(&at, name).add_lines(400);
+
+    scene
+        .ucmd()
+        .args(&["-x200de", name])
+        .succeeds()
+        .no_stderr()
+        .no_stdout();
+    let glob = Glob::new(&at, ".", r"x\d\d$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name))
+}
+
+/// Test for obsolete lines option as part of combined short options with tailing suffix length with value
+#[test]
+fn test_split_obs_lines_within_combined_shorts_tailing_suffix_length() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "obs-lines-combined-shorts-tailing-suffix-length";
+    RandomFile::new(&at, name).add_lines(1000);
+    ucmd.args(&["-d200a4", name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x\d\d\d\d$");
+    assert_eq!(glob.count(), 5);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
+/// Test for obsolete lines option starts as part of combined short options
+#[test]
+fn test_split_obs_lines_starts_combined_shorts() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    let name = "obs-lines-starts-shorts";
+    RandomFile::new(&at, name).add_lines(400);
+
+    scene
+        .ucmd()
+        .args(&["-200xd", name])
+        .succeeds()
+        .no_stderr()
+        .no_stdout();
+    let glob = Glob::new(&at, ".", r"x\d\d$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name))
+}
+
+/// Test for using both obsolete lines (standalone) option and short/long lines option simultaneously
+#[test]
+fn test_split_both_lines_and_obs_lines_standalone() {
+    // This test will ensure that:
+    // if both lines option '-l' or '--lines' (with value) and obsolete lines option '-100' are used - it fails
+    // if standalone lines option is used incorrectly and treated as a hyphen prefixed value of other option - it fails
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    at.touch("file");
+
+    scene
+        .ucmd()
+        .args(&["-l", "2", "-2", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: cannot split in more than one way\n");
+    scene
+        .ucmd()
+        .args(&["--lines", "2", "-2", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: cannot split in more than one way\n");
+}
+
+/// Test for using obsolete lines option incorrectly, so it is treated as a hyphen prefixed value of other option
+#[test]
+fn test_split_obs_lines_as_other_option_value() {
+    // This test will ensure that:
+    // if obsolete lines option is used incorrectly and treated as a hyphen prefixed value of other option - it fails
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    at.touch("file");
+
+    scene
+        .ucmd()
+        .args(&["--lines", "-200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of lines: '-200'\n");
+    scene
+        .ucmd()
+        .args(&["-l", "-200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of lines: '-200'\n");
+    scene
+        .ucmd()
+        .args(&["-a", "-200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid suffix length: '-200'\n");
+    scene
+        .ucmd()
+        .args(&["--suffix-length", "-d200e", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid suffix length: '-d200e'\n");
+    scene
+        .ucmd()
+        .args(&["-C", "-200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of bytes: '-200'\n");
+    scene
+        .ucmd()
+        .args(&["--line-bytes", "-x200a4", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of bytes: '-x200a4'\n");
+    scene
+        .ucmd()
+        .args(&["-b", "-200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of bytes: '-200'\n");
+    scene
+        .ucmd()
+        .args(&["--bytes", "-200xd", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of bytes: '-200xd'\n");
+    scene
+        .ucmd()
+        .args(&["-n", "-200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of chunks: -200\n");
+    scene
+        .ucmd()
+        .args(&["--number", "-e200", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: invalid number of chunks: -e200\n");
+}
+
+/// Test for using more than one obsolete lines option (standalone)
+/// last one wins
+#[test]
+fn test_split_multiple_obs_lines_standalone() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    let name = "multiple-obs-lines";
+    RandomFile::new(&at, name).add_lines(400);
+
+    scene
+        .ucmd()
+        .args(&["-3000", "-200", name])
+        .succeeds()
+        .no_stderr()
+        .no_stdout();
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name))
+}
+
+/// Test for using more than one obsolete lines option within combined shorts
+/// last one wins
+#[test]
+fn test_split_multiple_obs_lines_within_combined() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    let name = "multiple-obs-lines";
+    RandomFile::new(&at, name).add_lines(400);
+
+    scene
+        .ucmd()
+        .args(&["-d5000x", "-e200d", name])
+        .succeeds()
+        .no_stderr()
+        .no_stdout();
+    let glob = Glob::new(&at, ".", r"x\d\d$");
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), at.read_bytes(name))
+}
+
+/// Test for using both obsolete lines option within combined shorts with conflicting -n option simultaneously
+#[test]
+fn test_split_obs_lines_within_combined_with_number() {
+    let scene = TestScenario::new(util_name!());
+    let at = &scene.fixtures;
+    at.touch("file");
+
+    scene
+        .ucmd()
+        .args(&["-3dxen", "4", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: cannot split in more than one way\n");
+    scene
+        .ucmd()
+        .args(&["-dxe30n", "4", "file"])
+        .fails()
+        .code_is(1)
+        .stderr_contains("split: cannot split in more than one way\n");
 }

 #[test]
@ -327,14 +635,6 @@ fn test_split_invalid_bytes_size() {
        .fails()
        .code_is(1)
        .stderr_only("split: invalid number of bytes: '1024R'\n");
-    #[cfg(not(target_pointer_width = "128"))]
-    new_ucmd!()
-        .args(&["-b", "1Y"])
-        .fails()
-        .code_is(1)
-        .stderr_only(
-            "split: invalid number of bytes: '1Y': Value too large for defined data type\n",
-        );
    #[cfg(target_pointer_width = "32")]
    {
        let sizes = ["1000G", "10T"];
@ -345,19 +645,29 @@ fn test_split_invalid_bytes_size() {
 }

 #[test]
+fn test_split_overflow_bytes_size() {
+    #[cfg(not(target_pointer_width = "128"))]
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "test_split_overflow_bytes_size";
+    RandomFile::new(&at, name).add_bytes(1000);
+    ucmd.args(&["-b", "1Y", name]).succeeds();
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 1);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
+#[test]
+#[cfg(target_pointer_width = "32")]
 fn test_split_chunks_num_chunks_oversized_32() {
-    #[cfg(target_pointer_width = "32")]
-    {
    let scene = TestScenario::new(util_name!());
    let at = &scene.fixtures;
    at.touch("file");
    scene
        .ucmd()
-            .args(&["--number", "5000000000", "file"])
+        .args(&["--number", "5000000000", "sixhundredfiftyonebytes.txt"])
        .fails()
        .code_is(1)
        .stderr_only("split: Number of chunks too big\n");
-    }
 }

 #[test]
@ -369,6 +679,15 @@ fn test_split_stdin_num_chunks() {
        .stderr_only("split: -: cannot determine file size\n");
 }

+#[test]
+fn test_split_stdin_num_kth_chunk() {
+    new_ucmd!()
+        .args(&["--number=1/2"])
+        .fails()
+        .code_is(1)
+        .stderr_only("split: -: cannot determine file size\n");
+}
+
 fn file_read(at: &AtPath, filename: &str) -> String {
    let mut s = String::new();
    at.open(filename).read_to_string(&mut s).unwrap();
@ -471,7 +790,7 @@ creating file 'xaf'
 }

 #[test]
-fn test_number() {
+fn test_number_n() {
    let (at, mut ucmd) = at_and_ucmd!();
    let file_read = |f| {
        let mut s = String::new();
@ -484,6 +803,98 @@ fn test_number() {
    assert_eq!(file_read("xac"), "klmno");
    assert_eq!(file_read("xad"), "pqrst");
    assert_eq!(file_read("xae"), "uvwxyz\n");
+    #[cfg(unix)]
+    new_ucmd!()
+        .args(&["--number=100", "/dev/null"])
+        .succeeds()
+        .stdout_only("");
+}
+
+#[test]
+fn test_number_kth_of_n() {
+    new_ucmd!()
+        .args(&["--number=3/5", "asciilowercase.txt"])
+        .succeeds()
+        .stdout_only("klmno");
+    new_ucmd!()
+        .args(&["--number=5/5", "asciilowercase.txt"])
+        .succeeds()
+        .stdout_only("uvwxyz\n");
+    new_ucmd!()
+        .args(&["-e", "--number=99/100", "asciilowercase.txt"])
+        .succeeds()
+        .stdout_only("");
+    #[cfg(unix)]
+    new_ucmd!()
+        .args(&["--number=3/10", "/dev/null"])
+        .succeeds()
+        .stdout_only("");
+    #[cfg(target_pointer_width = "64")]
+    new_ucmd!()
+        .args(&[
+            "--number=r/9223372036854775807/18446744073709551615",
+            "asciilowercase.txt",
+        ])
+        .succeeds()
+        .stdout_only("");
+    new_ucmd!()
+        .args(&["--number=0/5", "asciilowercase.txt"])
+        .fails()
+        .stderr_contains("split: invalid chunk number: 0");
+    new_ucmd!()
+        .args(&["--number=10/5", "asciilowercase.txt"])
+        .fails()
+        .stderr_contains("split: invalid chunk number: 10");
+    #[cfg(target_pointer_width = "64")]
+    new_ucmd!()
+        .args(&[
+            "--number=9223372036854775807/18446744073709551616",
+            "asciilowercase.txt",
+        ])
+        .fails()
+        .stderr_contains("split: invalid number of chunks: 18446744073709551616");
+}
+
+#[test]
+fn test_number_kth_of_n_round_robin() {
+    new_ucmd!()
+        .args(&["--number", "r/2/3", "fivelines.txt"])
+        .succeeds()
+        .stdout_only("2\n5\n");
+    new_ucmd!()
+        .args(&["--number", "r/1/4", "fivelines.txt"])
+        .succeeds()
+        .stdout_only("1\n5\n");
+    new_ucmd!()
+        .args(&["-e", "--number", "r/7/7", "fivelines.txt"])
+        .succeeds()
+        .stdout_only("");
+    #[cfg(target_pointer_width = "64")]
+    new_ucmd!()
+        .args(&[
+            "--number",
+            "r/9223372036854775807/18446744073709551615",
+            "fivelines.txt",
+        ])
+        .succeeds()
+        .stdout_only("");
+    #[cfg(target_pointer_width = "64")]
+    new_ucmd!()
+        .args(&[
+            "--number",
+            "r/9223372036854775807/18446744073709551616",
+            "fivelines.txt",
+        ])
+        .fails()
+        .stderr_contains("split: invalid number of chunks: 18446744073709551616");
+    new_ucmd!()
+        .args(&["--number", "r/0/3", "fivelines.txt"])
+        .fails()
+        .stderr_contains("split: invalid chunk number: 0");
+    new_ucmd!()
+        .args(&["--number", "r/10/3", "fivelines.txt"])
+        .fails()
+        .stderr_contains("split: invalid chunk number: 10");
 }

 #[test]
@ -524,6 +935,19 @@ fn test_invalid_suffix_length() {
        .stderr_contains("invalid suffix length: 'xyz'");
 }

+/// Test short suffix length option with value concatenated
+#[test]
+fn test_split_suffix_length_short_concatenated_with_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "split_num_prefixed_chunks_by_lines";
+    RandomFile::new(&at, name).add_lines(10000);
+    ucmd.args(&["-a4", name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]][[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 10);
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
 #[test]
 fn test_include_newlines() {
    let (at, mut ucmd) = at_and_ucmd!();
@ -542,6 +966,19 @@ fn test_include_newlines() {
    assert_eq!(s, "5\n");
 }

+/// Test short number of chunks option concatenated with value
+#[test]
+fn test_split_number_chunks_short_concatenated_with_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    ucmd.args(&["-n3", "threebytes.txt"])
+        .succeeds()
+        .no_stdout()
+        .no_stderr();
+    assert_eq!(at.read("xaa"), "a");
+    assert_eq!(at.read("xab"), "b");
+    assert_eq!(at.read("xac"), "c");
+}
+
 #[test]
 fn test_allow_empty_files() {
    let (at, mut ucmd) = at_and_ucmd!();
@ -616,6 +1053,25 @@ fn test_line_bytes() {
    assert_eq!(at.read("xad"), "ee\n");
 }

+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_line_bytes_overflow() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    ucmd.args(&["-C", "18446744073709551616", "letters.txt"])
+        .succeeds();
+    assert_eq!(at.read("xaa"), "aaaaaaaaa\nbbbb\ncccc\ndd\nee\n");
+}
+
+#[test]
+fn test_line_bytes_concatenated_with_value() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    ucmd.args(&["-C8", "letters.txt"]).succeeds();
+    assert_eq!(at.read("xaa"), "aaaaaaaa");
+    assert_eq!(at.read("xab"), "a\nbbbb\n");
+    assert_eq!(at.read("xac"), "cccc\ndd\n");
+    assert_eq!(at.read("xad"), "ee\n");
+}
+
 #[test]
 fn test_line_bytes_no_final_newline() {
    let (at, mut ucmd) = at_and_ucmd!();
@ -970,3 +1426,49 @@ fn test_split_invalid_input() {
        .no_stdout()
        .stderr_contains("split: invalid number of chunks: 0");
 }
+
+/// Test if there are invalid (non UTF-8) in the arguments - unix
+/// clap is expected to fail/panic
+#[test]
+#[cfg(unix)]
+fn test_split_non_utf8_argument_unix() {
+    use std::ffi::OsStr;
+    use std::os::unix::ffi::OsStrExt;
+
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "test_split_non_utf8_argument";
+    let opt = OsStr::from_bytes("--additional-suffix".as_bytes());
+    RandomFile::new(&at, name).add_lines(2000);
+    // Here, the values 0x66 and 0x6f correspond to 'f' and 'o'
+    // respectively. The value 0x80 is a lone continuation byte, invalid
+    // in a UTF-8 sequence.
+    let opt_value = [0x66, 0x6f, 0x80, 0x6f];
+    let opt_value = OsStr::from_bytes(&opt_value[..]);
+    let name = OsStr::from_bytes(name.as_bytes());
+    ucmd.args(&[opt, opt_value, name])
+        .fails()
+        .stderr_contains("error: invalid UTF-8 was detected in one or more arguments");
+}
+
+/// Test if there are invalid (non UTF-8) in the arguments - windows
+/// clap is expected to fail/panic
+#[test]
+#[cfg(windows)]
+fn test_split_non_utf8_argument_windows() {
+    use std::ffi::OsString;
+    use std::os::windows::ffi::OsStringExt;
+
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "test_split_non_utf8_argument";
+    let opt = OsString::from("--additional-suffix");
+    RandomFile::new(&at, name).add_lines(2000);
+    // Here the values 0x0066 and 0x006f correspond to 'f' and 'o'
+    // respectively. The value 0xD800 is a lone surrogate half, invalid
+    // in a UTF-16 sequence.
+    let opt_value = [0x0066, 0x006f, 0xD800, 0x006f];
+    let opt_value = OsString::from_wide(&opt_value[..]);
+    let name = OsString::from(name);
+    ucmd.args(&[opt, opt_value, name])
+        .fails()
+        .stderr_contains("error: invalid UTF-8 was detected in one or more arguments");
+}