Merge pull request #2098 from miDeb/sort-trailing-separator

sort: fix tokenization for trailing separators
This commit is contained in:
Sylvestre Ledru 2021-04-24 10:00:20 +02:00 committed by GitHub
commit 372d08c341
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 30 additions and 13 deletions

View file

@ -351,20 +351,18 @@ fn tokenize_default(line: &str) -> Vec<Field> {
/// Split between separators. These separators are not included in fields.
fn tokenize_with_separator(line: &str, separator: char) -> Vec<Field> {
let mut tokens = vec![0..0];
let mut previous_was_separator = false;
for (idx, char) in line.char_indices() {
if previous_was_separator {
tokens.push(idx..0);
let mut tokens = vec![];
let separator_indices =
line.char_indices()
.filter_map(|(i, c)| if c == separator { Some(i) } else { None });
let mut start = 0;
for sep_idx in separator_indices {
tokens.push(start..sep_idx);
start = sep_idx + 1;
}
if char == separator {
tokens.last_mut().unwrap().end = idx;
previous_was_separator = true;
} else {
previous_was_separator = false;
if start < line.len() {
tokens.push(start..line.len());
}
}
tokens.last_mut().unwrap().end = line.len();
tokens
}
@ -1383,4 +1381,14 @@ mod tests {
vec![0..0, 1..1, 2..2, 3..9, 10..18,]
);
}
#[test]
fn test_tokenize_fields_trailing_custom_separator() {
let line = "a";
assert_eq!(tokenize(line, Some('a')), vec![0..0]);
let line = "aa";
assert_eq!(tokenize(line, Some('a')), vec![0..0, 1..1]);
let line = "..a..a";
assert_eq!(tokenize(line, Some('a')), vec![0..2, 3..5]);
}
}

View file

@ -581,3 +581,12 @@ fn test_check_silent() {
.fails()
.stdout_is("");
}
#[test]
fn test_trailing_separator() {
new_ucmd!()
.args(&["-t", "x", "-k", "1,1"])
.pipe_in("aax\naaa\n")
.succeeds()
.stdout_is("aax\naaa\n");
}