nl: fix output of numbering styles

This commit is contained in:
Daniel Hofstetter 2023-08-03 16:12:29 +02:00
parent 9be4a89260
commit 271606ddfa
2 changed files with 208 additions and 137 deletions

View file

@ -44,7 +44,7 @@ impl Default for Settings {
fn default() -> Self {
Self {
header_numbering: NumberingStyle::None,
body_numbering: NumberingStyle::All,
body_numbering: NumberingStyle::NonEmpty,
footer_numbering: NumberingStyle::None,
section_delimiter: ['\\', ':'],
starting_line_number: 1,
@ -271,146 +271,70 @@ pub fn uu_app() -> Command {
}
// nl implements the main functionality for an individual buffer.
#[allow(clippy::cognitive_complexity)]
fn nl<T: Read>(reader: &mut BufReader<T>, settings: &Settings) -> UResult<()> {
let regexp: regex::Regex = regex::Regex::new(r".?").unwrap();
let mut current_numbering_style = &settings.body_numbering;
let mut line_no = settings.starting_line_number;
let mut empty_line_count: u64 = 0;
// Initially, we use the body's line counting settings
let mut regex_filter = match settings.body_numbering {
NumberingStyle::Regex(ref re) => re,
_ => &regexp,
};
let mut line_filter: fn(&str, &regex::Regex) -> bool = pass_regex;
for l in reader.lines() {
let mut l = l.map_err_context(|| "could not read line".to_string())?;
// Sanitize the string. We want to print the newline ourselves.
if l.ends_with('\n') {
l.pop();
}
// Next we iterate through the individual chars to see if this
// is one of the special lines starting a new "section" in the
// document.
let line = l;
let mut odd = false;
// matched_group counts how many copies of section_delimiter
// this string consists of (0 if there's anything else)
let mut matched_groups = 0u8;
for c in line.chars() {
// If this is a newline character, the loop should end.
if c == '\n' {
break;
}
// If we have already seen three groups (corresponding to
// a header) or the current char does not form part of
// a new group, then this line is not a segment indicator.
if matched_groups >= 3 || settings.section_delimiter[usize::from(odd)] != c {
matched_groups = 0;
break;
}
if odd {
// We have seen a new group and count it.
matched_groups += 1;
}
odd = !odd;
}
let mut consecutive_empty_lines = 0;
for line in reader.lines() {
let line = line.map_err_context(|| "could not read line".to_string())?;
// See how many groups we matched. That will tell us if this is
// a line starting a new segment, and the number of groups
// indicates what type of segment.
if matched_groups > 0 {
// The current line is a section delimiter, so we output
// a blank line.
println!();
// However the line does not count as a blank line, so we
// reset the counter used for --join-blank-lines.
empty_line_count = 0;
match *match matched_groups {
3 => {
// This is a header, so we may need to reset the
// line number
if settings.renumber {
line_no = settings.starting_line_number;
}
&settings.header_numbering
}
1 => &settings.footer_numbering,
// The only option left is 2, but rust wants
// a catch-all here.
_ => &settings.body_numbering,
} {
NumberingStyle::All => {
line_filter = pass_all;
}
NumberingStyle::NonEmpty => {
line_filter = pass_nonempty;
}
NumberingStyle::None => {
line_filter = pass_none;
}
NumberingStyle::Regex(ref re) => {
line_filter = pass_regex;
regex_filter = re;
}
}
continue;
}
// From this point on we format and print a "regular" line.
if line.is_empty() {
// The line is empty, which means that we have to care
// about the --join-blank-lines parameter.
empty_line_count += 1;
consecutive_empty_lines += 1;
} else {
// This saves us from having to check for an empty string
// in the next selector.
empty_line_count = 0;
consecutive_empty_lines = 0;
};
// FIXME section delimiters are hardcoded and settings.section_delimiter is ignored
// because --section-delimiter is not correctly implemented yet
let _ = settings.section_delimiter; // XXX suppress "field never read" warning
let new_numbering_style = match line.as_str() {
"\\:\\:\\:" => Some(&settings.header_numbering),
"\\:\\:" => Some(&settings.body_numbering),
"\\:" => Some(&settings.footer_numbering),
_ => None,
};
if let Some(new_style) = new_numbering_style {
current_numbering_style = new_style;
line_no = settings.starting_line_number;
println!();
} else {
let is_line_numbered = match current_numbering_style {
// consider $join_blank_lines consecutive empty lines to be one logical line
// for numbering, and only number the last one
NumberingStyle::All
if line.is_empty()
&& consecutive_empty_lines % settings.join_blank_lines != 0 =>
{
false
}
NumberingStyle::All => true,
NumberingStyle::NonEmpty => !line.is_empty(),
NumberingStyle::None => false,
NumberingStyle::Regex(re) => re.is_match(&line),
};
if is_line_numbered {
println!(
"{}{}{}",
settings
.number_format
.format(line_no, settings.number_width),
settings.number_separator,
line
);
// update line number for the potential next line
line_no += settings.line_increment;
} else {
let spaces = " ".repeat(settings.number_width + 1);
println!("{spaces}{line}");
}
}
if !line_filter(&line, regex_filter)
|| (empty_line_count > 0 && empty_line_count < settings.join_blank_lines)
{
// No number is printed for this line. Either we did not
// want to print one in the first place, or it is a blank
// line but we are still collecting more blank lines via
// the option --join-blank-lines.
println!("{line}");
continue;
}
// If we make it here, then either we are printing a non-empty
// line or assigning a line number to an empty line. Either
// way, start counting empties from zero once more.
empty_line_count = 0;
// A line number is to be printed.
println!(
"{}{}{}",
settings
.number_format
.format(line_no, settings.number_width),
settings.number_separator,
line
);
// Now update the line number for the (potential) next
// line.
line_no += settings.line_increment;
}
Ok(())
}
fn pass_regex(line: &str, re: &regex::Regex) -> bool {
re.is_match(line)
}
fn pass_nonempty(line: &str, _: &regex::Regex) -> bool {
!line.is_empty()
}
fn pass_none(_: &str, _: &regex::Regex) -> bool {
false
}
fn pass_all(_: &str, _: &regex::Regex) -> bool {
true
}
#[cfg(test)]
mod test {
use super::*;

View file

@ -52,15 +52,15 @@ fn test_sections_and_styles() {
for (fixture, output) in [
(
"section.txt",
"\nHEADER1\nHEADER2\n\n1 |BODY1\n2 \
|BODY2\n\nFOOTER1\nFOOTER2\n\nNEXTHEADER1\nNEXTHEADER2\n\n1 \
|NEXTBODY1\n2 |NEXTBODY2\n\nNEXTFOOTER1\nNEXTFOOTER2\n",
"\n HEADER1\n HEADER2\n\n1 |BODY1\n2 \
|BODY2\n\n FOOTER1\n FOOTER2\n\n NEXTHEADER1\n NEXTHEADER2\n\n1 \
|NEXTBODY1\n2 |NEXTBODY2\n\n NEXTFOOTER1\n NEXTFOOTER2\n",
),
(
"joinblanklines.txt",
"1 |Nonempty\n2 |Nonempty\n3 |Followed by 10x empty\n\n\n\n\n4 \
|\n\n\n\n\n5 |\n6 |Followed by 5x empty\n\n\n\n\n7 |\n8 \
|Followed by 4x empty\n\n\n\n\n9 |Nonempty\n10 |Nonempty\n11 \
"1 |Nonempty\n2 |Nonempty\n3 |Followed by 10x empty\n \n \n \n \n4 \
|\n \n \n \n \n5 |\n6 |Followed by 5x empty\n \n \n \n \n7 |\n8 \
|Followed by 4x empty\n \n \n \n \n9 |Nonempty\n10 |Nonempty\n11 \
|Nonempty.\n",
),
] {
@ -257,6 +257,25 @@ fn test_invalid_line_increment() {
}
}
#[test]
fn test_join_blank_lines() {
for arg in ["-l3", "--join-blank-lines=3"] {
new_ucmd!()
.arg(arg)
.arg("--body-numbering=a")
.pipe_in("\n\n\n\n\n\n")
.succeeds()
.stdout_is(concat!(
" \n",
" \n",
" 1\t\n",
" \n",
" \n",
" 2\t\n",
));
}
}
#[test]
fn test_join_blank_lines_zero() {
for arg in ["-l0", "--join-blank-lines=0"] {
@ -275,3 +294,131 @@ fn test_invalid_join_blank_lines() {
.stderr_contains("invalid value 'invalid'");
}
}
#[test]
fn test_default_body_numbering() {
new_ucmd!()
.pipe_in("a\n\nb")
.succeeds()
.stdout_is(" 1\ta\n \n 2\tb\n");
}
#[test]
fn test_body_numbering_all_lines_without_delimiter() {
for arg in ["-ba", "--body-numbering=a"] {
new_ucmd!()
.arg(arg)
.pipe_in("a\n\nb")
.succeeds()
.stdout_is(" 1\ta\n 2\t\n 3\tb\n");
}
}
#[test]
fn test_body_numbering_no_lines_without_delimiter() {
for arg in ["-bn", "--body-numbering=n"] {
new_ucmd!()
.arg(arg)
.pipe_in("a\n\nb")
.succeeds()
.stdout_is(" a\n \n b\n");
}
}
#[test]
fn test_body_numbering_non_empty_lines_without_delimiter() {
for arg in ["-bt", "--body-numbering=t"] {
new_ucmd!()
.arg(arg)
.pipe_in("a\n\nb")
.succeeds()
.stdout_is(" 1\ta\n \n 2\tb\n");
}
}
#[test]
fn test_body_numbering_matched_lines_without_delimiter() {
for arg in ["-bp^[ac]", "--body-numbering=p^[ac]"] {
new_ucmd!()
.arg(arg)
.pipe_in("a\nb\nc")
.succeeds()
.stdout_is(" 1\ta\n b\n 2\tc\n");
}
}
#[test]
fn test_numbering_all_lines() {
let delimiters_and_args = [
("\\:\\:\\:\n", ["-ha", "--header-numbering=a"]),
("\\:\\:\n", ["-ba", "--body-numbering=a"]),
("\\:\n", ["-fa", "--footer-numbering=a"]),
];
for (delimiter, args) in delimiters_and_args {
for arg in args {
new_ucmd!()
.arg(arg)
.pipe_in(format!("{delimiter}a\n\nb"))
.succeeds()
.stdout_is("\n 1\ta\n 2\t\n 3\tb\n");
}
}
}
#[test]
fn test_numbering_no_lines() {
let delimiters_and_args = [
("\\:\\:\\:\n", ["-hn", "--header-numbering=n"]),
("\\:\\:\n", ["-bn", "--body-numbering=n"]),
("\\:\n", ["-fn", "--footer-numbering=n"]),
];
for (delimiter, args) in delimiters_and_args {
for arg in args {
new_ucmd!()
.arg(arg)
.pipe_in(format!("{delimiter}a\n\nb"))
.succeeds()
.stdout_is("\n a\n \n b\n");
}
}
}
#[test]
fn test_numbering_non_empty_lines() {
let delimiters_and_args = [
("\\:\\:\\:\n", ["-ht", "--header-numbering=t"]),
("\\:\\:\n", ["-bt", "--body-numbering=t"]),
("\\:\n", ["-ft", "--footer-numbering=t"]),
];
for (delimiter, args) in delimiters_and_args {
for arg in args {
new_ucmd!()
.arg(arg)
.pipe_in(format!("{delimiter}a\n\nb"))
.succeeds()
.stdout_is("\n 1\ta\n \n 2\tb\n");
}
}
}
#[test]
fn test_numbering_matched_lines() {
let delimiters_and_args = [
("\\:\\:\\:\n", ["-hp^[ac]", "--header-numbering=p^[ac]"]),
("\\:\\:\n", ["-bp^[ac]", "--body-numbering=p^[ac]"]),
("\\:\n", ["-fp^[ac]", "--footer-numbering=p^[ac]"]),
];
for (delimiter, args) in delimiters_and_args {
for arg in args {
new_ucmd!()
.arg(arg)
.pipe_in(format!("{delimiter}a\nb\nc"))
.succeeds()
.stdout_is("\n 1\ta\n b\n 2\tc\n");
}
}
}