mirror of
https://github.com/uutils/coreutils
synced 2024-11-16 01:38:04 +00:00
Merge pull request #2537 from jfinkels/expand-improve-tabs-argument-2
expand: expand support for --tabs arguments
This commit is contained in:
commit
1eb3b62c46
2 changed files with 286 additions and 27 deletions
|
@ -36,28 +36,86 @@ fn get_usage() -> String {
|
|||
format!("{0} [OPTION]... [FILE]...", executable!())
|
||||
}
|
||||
|
||||
fn tabstops_parse(s: String) -> Vec<usize> {
|
||||
let words = s.split(',');
|
||||
/// The mode to use when replacing tabs beyond the last one specified in
|
||||
/// the `--tabs` argument.
|
||||
enum RemainingMode {
|
||||
None,
|
||||
Slash,
|
||||
Plus,
|
||||
}
|
||||
|
||||
let nums = words
|
||||
.map(|sn| {
|
||||
sn.parse::<usize>()
|
||||
.unwrap_or_else(|_| crash!(1, "{}\n", "tab size contains invalid character(s)"))
|
||||
})
|
||||
.collect::<Vec<usize>>();
|
||||
/// Decide whether the character is either a space or a comma.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust,ignore
|
||||
/// assert!(is_space_or_comma(' '))
|
||||
/// assert!(is_space_or_comma(','))
|
||||
/// assert!(!is_space_or_comma('a'))
|
||||
/// ```
|
||||
fn is_space_or_comma(c: char) -> bool {
|
||||
c == ' ' || c == ','
|
||||
}
|
||||
|
||||
if nums.iter().any(|&n| n == 0) {
|
||||
crash!(1, "{}\n", "tab size cannot be 0");
|
||||
/// Parse a list of tabstops from a `--tabs` argument.
|
||||
///
|
||||
/// This function returns both the vector of numbers appearing in the
|
||||
/// comma- or space-separated list, and also an optional mode, specified
|
||||
/// by either a "/" or a "+" character appearing before the final number
|
||||
/// in the list. This mode defines the strategy to use for computing the
|
||||
/// number of spaces to use for columns beyond the end of the tab stop
|
||||
/// list specified here.
|
||||
fn tabstops_parse(s: String) -> (RemainingMode, Vec<usize>) {
|
||||
// Leading commas and spaces are ignored.
|
||||
let s = s.trim_start_matches(is_space_or_comma);
|
||||
|
||||
// If there were only commas and spaces in the string, just use the
|
||||
// default tabstops.
|
||||
if s.is_empty() {
|
||||
return (RemainingMode::None, vec![DEFAULT_TABSTOP]);
|
||||
}
|
||||
|
||||
if let (false, _) = nums
|
||||
.iter()
|
||||
.fold((true, 0), |(acc, last), &n| (acc && last <= n, n))
|
||||
{
|
||||
crash!(1, "{}\n", "tab sizes must be ascending");
|
||||
}
|
||||
let mut nums = vec![];
|
||||
let mut remaining_mode = RemainingMode::None;
|
||||
for word in s.split(is_space_or_comma) {
|
||||
let bytes = word.as_bytes();
|
||||
for i in 0..bytes.len() {
|
||||
match bytes[i] {
|
||||
b'+' => {
|
||||
remaining_mode = RemainingMode::Plus;
|
||||
}
|
||||
b'/' => {
|
||||
remaining_mode = RemainingMode::Slash;
|
||||
}
|
||||
_ => {
|
||||
// Parse a number from the byte sequence.
|
||||
let num = from_utf8(&bytes[i..]).unwrap().parse::<usize>().unwrap();
|
||||
|
||||
nums
|
||||
// Tab size must be positive.
|
||||
if num == 0 {
|
||||
crash!(1, "{}\n", "tab size cannot be 0");
|
||||
}
|
||||
|
||||
// Tab sizes must be ascending.
|
||||
if let Some(last_stop) = nums.last() {
|
||||
if *last_stop >= num {
|
||||
crash!(1, "tab sizes must be ascending");
|
||||
}
|
||||
}
|
||||
|
||||
// Append this tab stop to the list of all tabstops.
|
||||
nums.push(num);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// If no numbers could be parsed (for example, if `s` were "+,+,+"),
|
||||
// then just use the default tabstops.
|
||||
if nums.is_empty() {
|
||||
nums = vec![DEFAULT_TABSTOP];
|
||||
}
|
||||
(remaining_mode, nums)
|
||||
}
|
||||
|
||||
struct Options {
|
||||
|
@ -66,13 +124,17 @@ struct Options {
|
|||
tspaces: String,
|
||||
iflag: bool,
|
||||
uflag: bool,
|
||||
|
||||
/// Strategy for expanding tabs for columns beyond those specified
|
||||
/// in `tabstops`.
|
||||
remaining_mode: RemainingMode,
|
||||
}
|
||||
|
||||
impl Options {
|
||||
fn new(matches: &ArgMatches) -> Options {
|
||||
let tabstops = match matches.value_of(options::TABS) {
|
||||
let (remaining_mode, tabstops) = match matches.value_of(options::TABS) {
|
||||
Some(s) => tabstops_parse(s.to_string()),
|
||||
None => vec![DEFAULT_TABSTOP],
|
||||
None => (RemainingMode::None, vec![DEFAULT_TABSTOP]),
|
||||
};
|
||||
|
||||
let iflag = matches.is_present(options::INITIAL);
|
||||
|
@ -102,6 +164,7 @@ impl Options {
|
|||
tspaces,
|
||||
iflag,
|
||||
uflag,
|
||||
remaining_mode,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -159,13 +222,41 @@ fn open(path: String) -> BufReader<Box<dyn Read + 'static>> {
|
|||
}
|
||||
}
|
||||
|
||||
fn next_tabstop(tabstops: &[usize], col: usize) -> usize {
|
||||
if tabstops.len() == 1 {
|
||||
tabstops[0] - col % tabstops[0]
|
||||
} else {
|
||||
match tabstops.iter().find(|&&t| t > col) {
|
||||
/// Compute the number of spaces to the next tabstop.
|
||||
///
|
||||
/// `tabstops` is the sequence of tabstop locations.
|
||||
///
|
||||
/// `col` is the index of the current cursor in the line being written.
|
||||
///
|
||||
/// If `remaining_mode` is [`RemainingMode::Plus`], then the last entry
|
||||
/// in the `tabstops` slice is interpreted as a relative number of
|
||||
/// spaces, which this function will return for every input value of
|
||||
/// `col` beyond the end of the second-to-last element of `tabstops`.
|
||||
///
|
||||
/// If `remaining_mode` is [`RemainingMode::Plus`], then the last entry
|
||||
/// in the `tabstops` slice is interpreted as a relative number of
|
||||
/// spaces, which this function will return for every input value of
|
||||
/// `col` beyond the end of the second-to-last element of `tabstops`.
|
||||
fn next_tabstop(tabstops: &[usize], col: usize, remaining_mode: &RemainingMode) -> usize {
|
||||
let num_tabstops = tabstops.len();
|
||||
match remaining_mode {
|
||||
RemainingMode::Plus => match tabstops[0..num_tabstops - 1].iter().find(|&&t| t > col) {
|
||||
Some(t) => t - col,
|
||||
None => 1,
|
||||
None => tabstops[num_tabstops - 1] - 1,
|
||||
},
|
||||
RemainingMode::Slash => match tabstops[0..num_tabstops - 1].iter().find(|&&t| t > col) {
|
||||
Some(t) => t - col,
|
||||
None => tabstops[num_tabstops - 1] - col % tabstops[num_tabstops - 1],
|
||||
},
|
||||
RemainingMode::None => {
|
||||
if num_tabstops == 1 {
|
||||
tabstops[0] - col % tabstops[0]
|
||||
} else {
|
||||
match tabstops.iter().find(|&&t| t > col) {
|
||||
Some(t) => t - col,
|
||||
None => 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -232,12 +323,16 @@ fn expand(options: Options) {
|
|||
match ctype {
|
||||
Tab => {
|
||||
// figure out how many spaces to the next tabstop
|
||||
let nts = next_tabstop(ts, col);
|
||||
let nts = next_tabstop(ts, col, &options.remaining_mode);
|
||||
col += nts;
|
||||
|
||||
// now dump out either spaces if we're expanding, or a literal tab if we're not
|
||||
if init || !options.iflag {
|
||||
safe_unwrap!(output.write_all(options.tspaces[..nts].as_bytes()));
|
||||
if nts <= options.tspaces.len() {
|
||||
safe_unwrap!(output.write_all(options.tspaces[..nts].as_bytes()));
|
||||
} else {
|
||||
safe_unwrap!(output.write_all(" ".repeat(nts).as_bytes()));
|
||||
};
|
||||
} else {
|
||||
safe_unwrap!(output.write_all(&buf[byte..byte + nbytes]));
|
||||
}
|
||||
|
@ -269,3 +364,30 @@ fn expand(options: Options) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::next_tabstop;
|
||||
use super::RemainingMode;
|
||||
|
||||
#[test]
|
||||
fn test_next_tabstop_remaining_mode_none() {
|
||||
assert_eq!(next_tabstop(&[1, 5], 0, &RemainingMode::None), 1);
|
||||
assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::None), 2);
|
||||
assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::None), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_next_tabstop_remaining_mode_plus() {
|
||||
assert_eq!(next_tabstop(&[1, 5], 0, &RemainingMode::Plus), 1);
|
||||
assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::Plus), 4);
|
||||
assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::Plus), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_next_tabstop_remaining_mode_slash() {
|
||||
assert_eq!(next_tabstop(&[1, 5], 0, &RemainingMode::Slash), 1);
|
||||
assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::Slash), 2);
|
||||
assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::Slash), 4);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,3 +53,140 @@ fn test_with_multiple_files() {
|
|||
.stdout_contains(" return")
|
||||
.stdout_contains(" ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tabs_space_separated_list() {
|
||||
new_ucmd!()
|
||||
.args(&["--tabs", "3 6 9"])
|
||||
.pipe_in("a\tb\tc\td\te")
|
||||
.succeeds()
|
||||
.stdout_is("a b c d e");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tabs_mixed_style_list() {
|
||||
new_ucmd!()
|
||||
.args(&["--tabs", ", 3,6 9"])
|
||||
.pipe_in("a\tb\tc\td\te")
|
||||
.succeeds()
|
||||
.stdout_is("a b c d e");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tabs_empty_string() {
|
||||
new_ucmd!()
|
||||
.args(&["--tabs", ""])
|
||||
.pipe_in("a\tb\tc")
|
||||
.succeeds()
|
||||
.stdout_is("a b c");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tabs_comma_only() {
|
||||
new_ucmd!()
|
||||
.args(&["--tabs", ","])
|
||||
.pipe_in("a\tb\tc")
|
||||
.succeeds()
|
||||
.stdout_is("a b c");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tabs_space_only() {
|
||||
new_ucmd!()
|
||||
.args(&["--tabs", " "])
|
||||
.pipe_in("a\tb\tc")
|
||||
.succeeds()
|
||||
.stdout_is("a b c");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tabs_slash() {
|
||||
new_ucmd!()
|
||||
.args(&["--tabs", "/"])
|
||||
.pipe_in("a\tb\tc")
|
||||
.succeeds()
|
||||
.stdout_is("a b c");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tabs_plus() {
|
||||
new_ucmd!()
|
||||
.args(&["--tabs", "+"])
|
||||
.pipe_in("a\tb\tc")
|
||||
.succeeds()
|
||||
.stdout_is("a b c");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tabs_trailing_slash() {
|
||||
new_ucmd!()
|
||||
.arg("--tabs=1,/5")
|
||||
.pipe_in("\ta\tb\tc")
|
||||
.succeeds()
|
||||
// 0 1
|
||||
// 01234567890
|
||||
.stdout_is(" a b c");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tabs_trailing_slash_long_columns() {
|
||||
new_ucmd!()
|
||||
.arg("--tabs=1,/3")
|
||||
.pipe_in("\taaaa\tbbbb\tcccc")
|
||||
.succeeds()
|
||||
// 0 1
|
||||
// 01234567890123456
|
||||
.stdout_is(" aaaa bbbb cccc");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tabs_trailing_plus() {
|
||||
new_ucmd!()
|
||||
.arg("--tabs=1,+5")
|
||||
.pipe_in("\ta\tb\tc")
|
||||
.succeeds()
|
||||
// 0 1
|
||||
// 012345678901
|
||||
.stdout_is(" a b c");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tabs_trailing_plus_long_columns() {
|
||||
new_ucmd!()
|
||||
.arg("--tabs=1,+3")
|
||||
.pipe_in("\taaaa\tbbbb\tcccc")
|
||||
.succeeds()
|
||||
// 0 1
|
||||
// 012345678901234567
|
||||
.stdout_is(" aaaa bbbb cccc");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tabs_must_be_ascending() {
|
||||
new_ucmd!()
|
||||
.arg("--tabs=1,1")
|
||||
.fails()
|
||||
.stderr_contains("tab sizes must be ascending");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tabs_keep_last_trailing_specifier() {
|
||||
// If there are multiple trailing specifiers, use only the last one
|
||||
// before the number.
|
||||
new_ucmd!()
|
||||
.arg("--tabs=1,+/+/5")
|
||||
.pipe_in("\ta\tb\tc")
|
||||
.succeeds()
|
||||
// 0 1
|
||||
// 01234567890
|
||||
.stdout_is(" a b c");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tabs_comma_separated_no_numbers() {
|
||||
new_ucmd!()
|
||||
.arg("--tabs=+,/,+,/")
|
||||
.pipe_in("\ta\tb\tc")
|
||||
.succeeds()
|
||||
.stdout_is(" a b c");
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue