Merge pull request #2537 from jfinkels/expand-improve-tabs-argument-2

expand: expand support for --tabs arguments
This commit is contained in:
Sylvestre Ledru 2021-07-31 10:33:08 +02:00 committed by GitHub
commit 1eb3b62c46
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 286 additions and 27 deletions

View file

@ -36,28 +36,86 @@ fn get_usage() -> String {
format!("{0} [OPTION]... [FILE]...", executable!())
}
fn tabstops_parse(s: String) -> Vec<usize> {
let words = s.split(',');
/// The mode to use when replacing tabs beyond the last one specified in
/// the `--tabs` argument.
enum RemainingMode {
None,
Slash,
Plus,
}
let nums = words
.map(|sn| {
sn.parse::<usize>()
.unwrap_or_else(|_| crash!(1, "{}\n", "tab size contains invalid character(s)"))
})
.collect::<Vec<usize>>();
/// Decide whether the character is either a space or a comma.
///
/// # Examples
///
/// ```rust,ignore
/// assert!(is_space_or_comma(' '))
/// assert!(is_space_or_comma(','))
/// assert!(!is_space_or_comma('a'))
/// ```
fn is_space_or_comma(c: char) -> bool {
c == ' ' || c == ','
}
if nums.iter().any(|&n| n == 0) {
crash!(1, "{}\n", "tab size cannot be 0");
/// Parse a list of tabstops from a `--tabs` argument.
///
/// This function returns both the vector of numbers appearing in the
/// comma- or space-separated list, and also an optional mode, specified
/// by either a "/" or a "+" character appearing before the final number
/// in the list. This mode defines the strategy to use for computing the
/// number of spaces to use for columns beyond the end of the tab stop
/// list specified here.
fn tabstops_parse(s: String) -> (RemainingMode, Vec<usize>) {
// Leading commas and spaces are ignored.
let s = s.trim_start_matches(is_space_or_comma);
// If there were only commas and spaces in the string, just use the
// default tabstops.
if s.is_empty() {
return (RemainingMode::None, vec![DEFAULT_TABSTOP]);
}
if let (false, _) = nums
.iter()
.fold((true, 0), |(acc, last), &n| (acc && last <= n, n))
{
crash!(1, "{}\n", "tab sizes must be ascending");
}
let mut nums = vec![];
let mut remaining_mode = RemainingMode::None;
for word in s.split(is_space_or_comma) {
let bytes = word.as_bytes();
for i in 0..bytes.len() {
match bytes[i] {
b'+' => {
remaining_mode = RemainingMode::Plus;
}
b'/' => {
remaining_mode = RemainingMode::Slash;
}
_ => {
// Parse a number from the byte sequence.
let num = from_utf8(&bytes[i..]).unwrap().parse::<usize>().unwrap();
nums
// Tab size must be positive.
if num == 0 {
crash!(1, "{}\n", "tab size cannot be 0");
}
// Tab sizes must be ascending.
if let Some(last_stop) = nums.last() {
if *last_stop >= num {
crash!(1, "tab sizes must be ascending");
}
}
// Append this tab stop to the list of all tabstops.
nums.push(num);
break;
}
}
}
}
// If no numbers could be parsed (for example, if `s` were "+,+,+"),
// then just use the default tabstops.
if nums.is_empty() {
nums = vec![DEFAULT_TABSTOP];
}
(remaining_mode, nums)
}
struct Options {
@ -66,13 +124,17 @@ struct Options {
tspaces: String,
iflag: bool,
uflag: bool,
/// Strategy for expanding tabs for columns beyond those specified
/// in `tabstops`.
remaining_mode: RemainingMode,
}
impl Options {
fn new(matches: &ArgMatches) -> Options {
let tabstops = match matches.value_of(options::TABS) {
let (remaining_mode, tabstops) = match matches.value_of(options::TABS) {
Some(s) => tabstops_parse(s.to_string()),
None => vec![DEFAULT_TABSTOP],
None => (RemainingMode::None, vec![DEFAULT_TABSTOP]),
};
let iflag = matches.is_present(options::INITIAL);
@ -102,6 +164,7 @@ impl Options {
tspaces,
iflag,
uflag,
remaining_mode,
}
}
}
@ -159,13 +222,41 @@ fn open(path: String) -> BufReader<Box<dyn Read + 'static>> {
}
}
fn next_tabstop(tabstops: &[usize], col: usize) -> usize {
if tabstops.len() == 1 {
tabstops[0] - col % tabstops[0]
} else {
match tabstops.iter().find(|&&t| t > col) {
/// Compute the number of spaces to the next tabstop.
///
/// `tabstops` is the sequence of tabstop locations.
///
/// `col` is the index of the current cursor in the line being written.
///
/// If `remaining_mode` is [`RemainingMode::Plus`], then the last entry
/// in the `tabstops` slice is interpreted as a relative number of
/// spaces, which this function will return for every input value of
/// `col` beyond the end of the second-to-last element of `tabstops`.
///
/// If `remaining_mode` is [`RemainingMode::Plus`], then the last entry
/// in the `tabstops` slice is interpreted as a relative number of
/// spaces, which this function will return for every input value of
/// `col` beyond the end of the second-to-last element of `tabstops`.
fn next_tabstop(tabstops: &[usize], col: usize, remaining_mode: &RemainingMode) -> usize {
let num_tabstops = tabstops.len();
match remaining_mode {
RemainingMode::Plus => match tabstops[0..num_tabstops - 1].iter().find(|&&t| t > col) {
Some(t) => t - col,
None => 1,
None => tabstops[num_tabstops - 1] - 1,
},
RemainingMode::Slash => match tabstops[0..num_tabstops - 1].iter().find(|&&t| t > col) {
Some(t) => t - col,
None => tabstops[num_tabstops - 1] - col % tabstops[num_tabstops - 1],
},
RemainingMode::None => {
if num_tabstops == 1 {
tabstops[0] - col % tabstops[0]
} else {
match tabstops.iter().find(|&&t| t > col) {
Some(t) => t - col,
None => 1,
}
}
}
}
}
@ -232,12 +323,16 @@ fn expand(options: Options) {
match ctype {
Tab => {
// figure out how many spaces to the next tabstop
let nts = next_tabstop(ts, col);
let nts = next_tabstop(ts, col, &options.remaining_mode);
col += nts;
// now dump out either spaces if we're expanding, or a literal tab if we're not
if init || !options.iflag {
safe_unwrap!(output.write_all(options.tspaces[..nts].as_bytes()));
if nts <= options.tspaces.len() {
safe_unwrap!(output.write_all(options.tspaces[..nts].as_bytes()));
} else {
safe_unwrap!(output.write_all(" ".repeat(nts).as_bytes()));
};
} else {
safe_unwrap!(output.write_all(&buf[byte..byte + nbytes]));
}
@ -269,3 +364,30 @@ fn expand(options: Options) {
}
}
}
#[cfg(test)]
mod tests {
use super::next_tabstop;
use super::RemainingMode;
#[test]
fn test_next_tabstop_remaining_mode_none() {
assert_eq!(next_tabstop(&[1, 5], 0, &RemainingMode::None), 1);
assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::None), 2);
assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::None), 1);
}
#[test]
fn test_next_tabstop_remaining_mode_plus() {
assert_eq!(next_tabstop(&[1, 5], 0, &RemainingMode::Plus), 1);
assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::Plus), 4);
assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::Plus), 4);
}
#[test]
fn test_next_tabstop_remaining_mode_slash() {
assert_eq!(next_tabstop(&[1, 5], 0, &RemainingMode::Slash), 1);
assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::Slash), 2);
assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::Slash), 4);
}
}

View file

@ -53,3 +53,140 @@ fn test_with_multiple_files() {
.stdout_contains(" return")
.stdout_contains(" ");
}
#[test]
fn test_tabs_space_separated_list() {
new_ucmd!()
.args(&["--tabs", "3 6 9"])
.pipe_in("a\tb\tc\td\te")
.succeeds()
.stdout_is("a b c d e");
}
#[test]
fn test_tabs_mixed_style_list() {
new_ucmd!()
.args(&["--tabs", ", 3,6 9"])
.pipe_in("a\tb\tc\td\te")
.succeeds()
.stdout_is("a b c d e");
}
#[test]
fn test_tabs_empty_string() {
new_ucmd!()
.args(&["--tabs", ""])
.pipe_in("a\tb\tc")
.succeeds()
.stdout_is("a b c");
}
#[test]
fn test_tabs_comma_only() {
new_ucmd!()
.args(&["--tabs", ","])
.pipe_in("a\tb\tc")
.succeeds()
.stdout_is("a b c");
}
#[test]
fn test_tabs_space_only() {
new_ucmd!()
.args(&["--tabs", " "])
.pipe_in("a\tb\tc")
.succeeds()
.stdout_is("a b c");
}
#[test]
fn test_tabs_slash() {
new_ucmd!()
.args(&["--tabs", "/"])
.pipe_in("a\tb\tc")
.succeeds()
.stdout_is("a b c");
}
#[test]
fn test_tabs_plus() {
new_ucmd!()
.args(&["--tabs", "+"])
.pipe_in("a\tb\tc")
.succeeds()
.stdout_is("a b c");
}
#[test]
fn test_tabs_trailing_slash() {
new_ucmd!()
.arg("--tabs=1,/5")
.pipe_in("\ta\tb\tc")
.succeeds()
// 0 1
// 01234567890
.stdout_is(" a b c");
}
#[test]
fn test_tabs_trailing_slash_long_columns() {
new_ucmd!()
.arg("--tabs=1,/3")
.pipe_in("\taaaa\tbbbb\tcccc")
.succeeds()
// 0 1
// 01234567890123456
.stdout_is(" aaaa bbbb cccc");
}
#[test]
fn test_tabs_trailing_plus() {
new_ucmd!()
.arg("--tabs=1,+5")
.pipe_in("\ta\tb\tc")
.succeeds()
// 0 1
// 012345678901
.stdout_is(" a b c");
}
#[test]
fn test_tabs_trailing_plus_long_columns() {
new_ucmd!()
.arg("--tabs=1,+3")
.pipe_in("\taaaa\tbbbb\tcccc")
.succeeds()
// 0 1
// 012345678901234567
.stdout_is(" aaaa bbbb cccc");
}
#[test]
fn test_tabs_must_be_ascending() {
new_ucmd!()
.arg("--tabs=1,1")
.fails()
.stderr_contains("tab sizes must be ascending");
}
#[test]
fn test_tabs_keep_last_trailing_specifier() {
// If there are multiple trailing specifiers, use only the last one
// before the number.
new_ucmd!()
.arg("--tabs=1,+/+/5")
.pipe_in("\ta\tb\tc")
.succeeds()
// 0 1
// 01234567890
.stdout_is(" a b c");
}
#[test]
fn test_tabs_comma_separated_no_numbers() {
new_ucmd!()
.arg("--tabs=+,/,+,/")
.pipe_in("\ta\tb\tc")
.succeeds()
.stdout_is(" a b c");
}