mirror of
https://github.com/uutils/coreutils
synced 2024-12-14 07:12:44 +00:00
fmt: clean up some small bits
This commit is contained in:
parent
14a8e8a452
commit
01c32a5220
2 changed files with 132 additions and 137 deletions
|
@ -46,7 +46,7 @@ pub fn break_lines(
|
|||
ostream: &mut BufWriter<Stdout>,
|
||||
) -> std::io::Result<()> {
|
||||
// indent
|
||||
let p_indent = ¶.indent_str[..];
|
||||
let p_indent = ¶.indent_str;
|
||||
let p_indent_len = para.indent_len;
|
||||
|
||||
// words
|
||||
|
@ -55,14 +55,12 @@ pub fn break_lines(
|
|||
|
||||
// the first word will *always* appear on the first line
|
||||
// make sure of this here
|
||||
let (w, w_len) = match p_words_words.next() {
|
||||
Some(winfo) => (winfo.word, winfo.word_nchars),
|
||||
None => {
|
||||
return ostream.write_all(b"\n");
|
||||
}
|
||||
let Some(winfo) = p_words_words.next() else {
|
||||
return ostream.write_all(b"\n");
|
||||
};
|
||||
|
||||
// print the init, if it exists, and get its length
|
||||
let p_init_len = w_len
|
||||
let p_init_len = winfo.word_nchars
|
||||
+ if opts.crown || opts.tagged {
|
||||
// handle "init" portion
|
||||
ostream.write_all(para.init_str.as_bytes())?;
|
||||
|
@ -75,8 +73,9 @@ pub fn break_lines(
|
|||
// except that mail headers get no indent at all
|
||||
0
|
||||
};
|
||||
|
||||
// write first word after writing init
|
||||
ostream.write_all(w.as_bytes())?;
|
||||
ostream.write_all(winfo.word.as_bytes())?;
|
||||
|
||||
// does this paragraph require uniform spacing?
|
||||
let uniform = para.mail_header || opts.uniform;
|
||||
|
@ -103,15 +102,16 @@ fn break_simple<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
|
|||
mut iter: T,
|
||||
args: &mut BreakArgs<'a>,
|
||||
) -> std::io::Result<()> {
|
||||
iter.try_fold((args.init_len, false), |l, winfo| {
|
||||
accum_words_simple(args, l, winfo)
|
||||
iter.try_fold((args.init_len, false), |(l, prev_punct), winfo| {
|
||||
accum_words_simple(args, l, prev_punct, winfo)
|
||||
})?;
|
||||
args.ostream.write_all(b"\n")
|
||||
}
|
||||
|
||||
fn accum_words_simple<'a>(
|
||||
args: &mut BreakArgs<'a>,
|
||||
(l, prev_punct): (usize, bool),
|
||||
l: usize,
|
||||
prev_punct: bool,
|
||||
winfo: &'a WordInfo<'a>,
|
||||
) -> std::io::Result<(usize, bool)> {
|
||||
// compute the length of this word, considering how tabs will expand at this position on the line
|
||||
|
@ -233,14 +233,14 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
|
|||
linebreak: None,
|
||||
break_before: false,
|
||||
demerits: 0,
|
||||
prev_rat: 0.0f32,
|
||||
prev_rat: 0.0,
|
||||
length: args.init_len,
|
||||
fresh: false,
|
||||
}];
|
||||
// this vec holds the current active linebreaks; next_ holds the breaks that will be active for
|
||||
// the next word
|
||||
let active_breaks = &mut vec![0];
|
||||
let next_active_breaks = &mut vec![];
|
||||
let mut active_breaks = vec![0];
|
||||
let mut next_active_breaks = vec![];
|
||||
|
||||
let stretch = (args.opts.width - args.opts.goal) as isize;
|
||||
let minlength = args.opts.goal - stretch as usize;
|
||||
|
@ -248,10 +248,7 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
|
|||
let mut is_sentence_start = false;
|
||||
let mut least_demerits = 0;
|
||||
loop {
|
||||
let w = match iter.next() {
|
||||
None => break,
|
||||
Some(w) => w,
|
||||
};
|
||||
let Some(w) = iter.next() else { break };
|
||||
|
||||
// if this is the last word, we don't add additional demerits for this break
|
||||
let (is_last_word, is_sentence_end) = match iter.peek() {
|
||||
|
@ -358,13 +355,13 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
|
|||
least_demerits = cmp::max(ld_next, 0);
|
||||
}
|
||||
// swap in new list of active breaks
|
||||
mem::swap(active_breaks, next_active_breaks);
|
||||
mem::swap(&mut active_breaks, &mut next_active_breaks);
|
||||
// If this was the last word in a sentence, the next one must be the first in the next.
|
||||
is_sentence_start = is_sentence_end;
|
||||
}
|
||||
|
||||
// return the best path
|
||||
build_best_path(&linebreaks, active_breaks)
|
||||
build_best_path(&linebreaks, &active_breaks)
|
||||
}
|
||||
|
||||
fn build_best_path<'a>(paths: &[LineBreak<'a>], active: &[usize]) -> Vec<(&'a WordInfo<'a>, bool)> {
|
||||
|
|
|
@ -52,18 +52,22 @@ impl Line {
|
|||
}
|
||||
}
|
||||
|
||||
// each line's prefix has to be considered to know whether to merge it with
|
||||
// the next line or not
|
||||
/// Each line's prefix has to be considered to know whether to merge it with
|
||||
/// the next line or not
|
||||
#[derive(Debug)]
|
||||
pub struct FileLine {
|
||||
line: String,
|
||||
indent_end: usize, // the end of the indent, always the start of the text
|
||||
pfxind_end: usize, // the end of the PREFIX's indent, that is, the spaces before the prefix
|
||||
indent_len: usize, // display length of indent taking into account tabs
|
||||
prefix_len: usize, // PREFIX indent length taking into account tabs
|
||||
/// The end of the indent, always the start of the text
|
||||
indent_end: usize,
|
||||
/// The end of the PREFIX's indent, that is, the spaces before the prefix
|
||||
pfxind_end: usize,
|
||||
/// Display length of indent taking into account tabs
|
||||
indent_len: usize,
|
||||
/// PREFIX indent length taking into account tabs
|
||||
prefix_len: usize,
|
||||
}
|
||||
|
||||
// iterator that produces a stream of Lines from a file
|
||||
/// Iterator that produces a stream of Lines from a file
|
||||
pub struct FileLines<'a> {
|
||||
opts: &'a FmtOptions,
|
||||
lines: Lines<&'a mut FileOrStdReader>,
|
||||
|
@ -74,7 +78,7 @@ impl<'a> FileLines<'a> {
|
|||
FileLines { opts, lines }
|
||||
}
|
||||
|
||||
// returns true if this line should be formatted
|
||||
/// returns true if this line should be formatted
|
||||
fn match_prefix(&self, line: &str) -> (bool, usize) {
|
||||
if !self.opts.use_prefix {
|
||||
return (true, 0);
|
||||
|
@ -83,7 +87,7 @@ impl<'a> FileLines<'a> {
|
|||
FileLines::match_prefix_generic(&self.opts.prefix[..], line, self.opts.xprefix)
|
||||
}
|
||||
|
||||
// returns true if this line should be formatted
|
||||
/// returns true if this line should be formatted
|
||||
fn match_anti_prefix(&self, line: &str) -> bool {
|
||||
if !self.opts.use_anti_prefix {
|
||||
return true;
|
||||
|
@ -148,13 +152,7 @@ impl<'a> Iterator for FileLines<'a> {
|
|||
type Item = Line;
|
||||
|
||||
fn next(&mut self) -> Option<Line> {
|
||||
let n = match self.lines.next() {
|
||||
Some(t) => match t {
|
||||
Ok(tt) => tt,
|
||||
Err(_) => return None,
|
||||
},
|
||||
None => return None,
|
||||
};
|
||||
let n = self.lines.next()?.ok()?;
|
||||
|
||||
// if this line is entirely whitespace,
|
||||
// emit a blank line
|
||||
|
@ -205,24 +203,33 @@ impl<'a> Iterator for FileLines<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// a paragraph : a collection of FileLines that are to be formatted
|
||||
// plus info about the paragraph's indentation
|
||||
// (but we only retain the String from the FileLine; the other info
|
||||
// is only there to help us in deciding how to merge lines into Paragraphs
|
||||
/// A paragraph : a collection of FileLines that are to be formatted
|
||||
/// plus info about the paragraph's indentation
|
||||
///
|
||||
/// We only retain the String from the FileLine; the other info
|
||||
/// is only there to help us in deciding how to merge lines into Paragraphs
|
||||
#[derive(Debug)]
|
||||
pub struct Paragraph {
|
||||
lines: Vec<String>, // the lines of the file
|
||||
pub init_str: String, // string representing the init, that is, the first line's indent
|
||||
pub init_len: usize, // printable length of the init string considering TABWIDTH
|
||||
init_end: usize, // byte location of end of init in first line String
|
||||
pub indent_str: String, // string representing indent
|
||||
pub indent_len: usize, // length of above
|
||||
indent_end: usize, // byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward)
|
||||
pub mail_header: bool, // we need to know if this is a mail header because we do word splitting differently in that case
|
||||
/// the lines of the file
|
||||
lines: Vec<String>,
|
||||
/// string representing the init, that is, the first line's indent
|
||||
pub init_str: String,
|
||||
/// printable length of the init string considering TABWIDTH
|
||||
pub init_len: usize,
|
||||
/// byte location of end of init in first line String
|
||||
init_end: usize,
|
||||
/// string representing indent
|
||||
pub indent_str: String,
|
||||
/// length of above
|
||||
pub indent_len: usize,
|
||||
/// byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward)
|
||||
indent_end: usize,
|
||||
/// we need to know if this is a mail header because we do word splitting differently in that case
|
||||
pub mail_header: bool,
|
||||
}
|
||||
|
||||
// an iterator producing a stream of paragraphs from a stream of lines
|
||||
// given a set of options.
|
||||
/// An iterator producing a stream of paragraphs from a stream of lines
|
||||
/// given a set of options.
|
||||
pub struct ParagraphStream<'a> {
|
||||
lines: Peekable<FileLines<'a>>,
|
||||
next_mail: bool,
|
||||
|
@ -240,7 +247,7 @@ impl<'a> ParagraphStream<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// detect RFC822 mail header
|
||||
/// Detect RFC822 mail header
|
||||
fn is_mail_header(line: &FileLine) -> bool {
|
||||
// a mail header begins with either "From " (envelope sender line)
|
||||
// or with a sequence of printable ASCII chars (33 to 126, inclusive,
|
||||
|
@ -276,12 +283,9 @@ impl<'a> Iterator for ParagraphStream<'a> {
|
|||
#[allow(clippy::cognitive_complexity)]
|
||||
fn next(&mut self) -> Option<Result<Paragraph, String>> {
|
||||
// return a NoFormatLine in an Err; it should immediately be output
|
||||
let noformat = match self.lines.peek() {
|
||||
None => return None,
|
||||
Some(l) => match *l {
|
||||
Line::FormatLine(_) => false,
|
||||
Line::NoFormatLine(_, _) => true,
|
||||
},
|
||||
let noformat = match self.lines.peek()? {
|
||||
Line::FormatLine(_) => false,
|
||||
Line::NoFormatLine(_, _) => true,
|
||||
};
|
||||
|
||||
// found a NoFormatLine, immediately dump it out
|
||||
|
@ -305,95 +309,89 @@ impl<'a> Iterator for ParagraphStream<'a> {
|
|||
let mut in_mail = false;
|
||||
let mut second_done = false; // for when we use crown or tagged mode
|
||||
loop {
|
||||
{
|
||||
// peek ahead
|
||||
// need to explicitly force fl out of scope before we can call self.lines.next()
|
||||
let fl = match self.lines.peek() {
|
||||
None => break,
|
||||
Some(l) => match *l {
|
||||
Line::FormatLine(ref x) => x,
|
||||
Line::NoFormatLine(..) => break,
|
||||
},
|
||||
};
|
||||
// peek ahead
|
||||
// need to explicitly force fl out of scope before we can call self.lines.next()
|
||||
let Some(Line::FormatLine(fl)) = self.lines.peek() else {
|
||||
break;
|
||||
};
|
||||
|
||||
if p_lines.is_empty() {
|
||||
// first time through the loop, get things set up
|
||||
// detect mail header
|
||||
if self.opts.mail && self.next_mail && ParagraphStream::is_mail_header(fl) {
|
||||
in_mail = true;
|
||||
// there can't be any indent or pfxind because otherwise is_mail_header
|
||||
// would fail since there cannot be any whitespace before the colon in a
|
||||
// valid header field
|
||||
indent_str.push_str(" ");
|
||||
indent_len = 2;
|
||||
if p_lines.is_empty() {
|
||||
// first time through the loop, get things set up
|
||||
// detect mail header
|
||||
if self.opts.mail && self.next_mail && ParagraphStream::is_mail_header(fl) {
|
||||
in_mail = true;
|
||||
// there can't be any indent or pfxind because otherwise is_mail_header
|
||||
// would fail since there cannot be any whitespace before the colon in a
|
||||
// valid header field
|
||||
indent_str.push_str(" ");
|
||||
indent_len = 2;
|
||||
} else {
|
||||
if self.opts.crown || self.opts.tagged {
|
||||
init_str.push_str(&fl.line[..fl.indent_end]);
|
||||
init_len = fl.indent_len;
|
||||
init_end = fl.indent_end;
|
||||
} else {
|
||||
if self.opts.crown || self.opts.tagged {
|
||||
init_str.push_str(&fl.line[..fl.indent_end]);
|
||||
init_len = fl.indent_len;
|
||||
init_end = fl.indent_end;
|
||||
} else {
|
||||
second_done = true;
|
||||
}
|
||||
|
||||
// these will be overwritten in the 2nd line of crown or tagged mode, but
|
||||
// we are not guaranteed to get to the 2nd line, e.g., if the next line
|
||||
// is a NoFormatLine or None. Thus, we set sane defaults the 1st time around
|
||||
indent_str.push_str(&fl.line[..fl.indent_end]);
|
||||
indent_len = fl.indent_len;
|
||||
indent_end = fl.indent_end;
|
||||
|
||||
// save these to check for matching lines
|
||||
prefix_len = fl.prefix_len;
|
||||
pfxind_end = fl.pfxind_end;
|
||||
|
||||
// in tagged mode, add 4 spaces of additional indenting by default
|
||||
// (gnu fmt's behavior is different: it seems to find the closest column to
|
||||
// indent_end that is divisible by 3. But honestly that behavior seems
|
||||
// pretty arbitrary.
|
||||
// Perhaps a better default would be 1 TABWIDTH? But ugh that's so big.
|
||||
if self.opts.tagged {
|
||||
indent_str.push_str(" ");
|
||||
indent_len += 4;
|
||||
}
|
||||
}
|
||||
} else if in_mail {
|
||||
// lines following mail headers must begin with spaces
|
||||
if fl.indent_end == 0 || (self.opts.use_prefix && fl.pfxind_end == 0) {
|
||||
break; // this line does not begin with spaces
|
||||
}
|
||||
} else if !second_done {
|
||||
// now we have enough info to handle crown margin and tagged mode
|
||||
|
||||
// in both crown and tagged modes we require that prefix_len is the same
|
||||
if prefix_len != fl.prefix_len || pfxind_end != fl.pfxind_end {
|
||||
break;
|
||||
second_done = true;
|
||||
}
|
||||
|
||||
// in tagged mode, indent has to be *different* on following lines
|
||||
if self.opts.tagged
|
||||
&& indent_len - 4 == fl.indent_len
|
||||
&& indent_end == fl.indent_end
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// this is part of the same paragraph, get the indent info from this line
|
||||
indent_str.clear();
|
||||
// these will be overwritten in the 2nd line of crown or tagged mode, but
|
||||
// we are not guaranteed to get to the 2nd line, e.g., if the next line
|
||||
// is a NoFormatLine or None. Thus, we set sane defaults the 1st time around
|
||||
indent_str.push_str(&fl.line[..fl.indent_end]);
|
||||
indent_len = fl.indent_len;
|
||||
indent_end = fl.indent_end;
|
||||
|
||||
second_done = true;
|
||||
} else {
|
||||
// detect mismatch
|
||||
if indent_end != fl.indent_end
|
||||
|| pfxind_end != fl.pfxind_end
|
||||
|| indent_len != fl.indent_len
|
||||
|| prefix_len != fl.prefix_len
|
||||
{
|
||||
break;
|
||||
// save these to check for matching lines
|
||||
prefix_len = fl.prefix_len;
|
||||
pfxind_end = fl.pfxind_end;
|
||||
|
||||
// in tagged mode, add 4 spaces of additional indenting by default
|
||||
// (gnu fmt's behavior is different: it seems to find the closest column to
|
||||
// indent_end that is divisible by 3. But honestly that behavior seems
|
||||
// pretty arbitrary.
|
||||
// Perhaps a better default would be 1 TABWIDTH? But ugh that's so big.
|
||||
if self.opts.tagged {
|
||||
indent_str.push_str(" ");
|
||||
indent_len += 4;
|
||||
}
|
||||
}
|
||||
} else if in_mail {
|
||||
// lines following mail headers must begin with spaces
|
||||
if fl.indent_end == 0 || (self.opts.use_prefix && fl.pfxind_end == 0) {
|
||||
break; // this line does not begin with spaces
|
||||
}
|
||||
} else if !second_done {
|
||||
// now we have enough info to handle crown margin and tagged mode
|
||||
|
||||
// in both crown and tagged modes we require that prefix_len is the same
|
||||
if prefix_len != fl.prefix_len || pfxind_end != fl.pfxind_end {
|
||||
break;
|
||||
}
|
||||
|
||||
// in tagged mode, indent has to be *different* on following lines
|
||||
if self.opts.tagged
|
||||
&& indent_len - 4 == fl.indent_len
|
||||
&& indent_end == fl.indent_end
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// this is part of the same paragraph, get the indent info from this line
|
||||
indent_str.clear();
|
||||
indent_str.push_str(&fl.line[..fl.indent_end]);
|
||||
indent_len = fl.indent_len;
|
||||
indent_end = fl.indent_end;
|
||||
|
||||
second_done = true;
|
||||
} else {
|
||||
// detect mismatch
|
||||
if indent_end != fl.indent_end
|
||||
|| pfxind_end != fl.pfxind_end
|
||||
|| indent_len != fl.indent_len
|
||||
|| prefix_len != fl.prefix_len
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
p_lines.push(self.lines.next().unwrap().get_formatline().line);
|
||||
|
@ -429,7 +427,7 @@ pub struct ParaWords<'a> {
|
|||
}
|
||||
|
||||
impl<'a> ParaWords<'a> {
|
||||
pub fn new<'b>(opts: &'b FmtOptions, para: &'b Paragraph) -> ParaWords<'b> {
|
||||
pub fn new(opts: &'a FmtOptions, para: &'a Paragraph) -> Self {
|
||||
let mut pw = ParaWords {
|
||||
opts,
|
||||
para,
|
||||
|
|
Loading…
Reference in a new issue