fmt: clean up some small bits

This commit is contained in:
Terts Diepraam 2023-11-28 11:40:33 +01:00
parent 14a8e8a452
commit 01c32a5220
2 changed files with 132 additions and 137 deletions

View file

@ -46,7 +46,7 @@ pub fn break_lines(
ostream: &mut BufWriter<Stdout>,
) -> std::io::Result<()> {
// indent
let p_indent = &para.indent_str[..];
let p_indent = &para.indent_str;
let p_indent_len = para.indent_len;
// words
@ -55,14 +55,12 @@ pub fn break_lines(
// the first word will *always* appear on the first line
// make sure of this here
let (w, w_len) = match p_words_words.next() {
Some(winfo) => (winfo.word, winfo.word_nchars),
None => {
return ostream.write_all(b"\n");
}
let Some(winfo) = p_words_words.next() else {
return ostream.write_all(b"\n");
};
// print the init, if it exists, and get its length
let p_init_len = w_len
let p_init_len = winfo.word_nchars
+ if opts.crown || opts.tagged {
// handle "init" portion
ostream.write_all(para.init_str.as_bytes())?;
@ -75,8 +73,9 @@ pub fn break_lines(
// except that mail headers get no indent at all
0
};
// write first word after writing init
ostream.write_all(w.as_bytes())?;
ostream.write_all(winfo.word.as_bytes())?;
// does this paragraph require uniform spacing?
let uniform = para.mail_header || opts.uniform;
@ -103,15 +102,16 @@ fn break_simple<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
mut iter: T,
args: &mut BreakArgs<'a>,
) -> std::io::Result<()> {
iter.try_fold((args.init_len, false), |l, winfo| {
accum_words_simple(args, l, winfo)
iter.try_fold((args.init_len, false), |(l, prev_punct), winfo| {
accum_words_simple(args, l, prev_punct, winfo)
})?;
args.ostream.write_all(b"\n")
}
fn accum_words_simple<'a>(
args: &mut BreakArgs<'a>,
(l, prev_punct): (usize, bool),
l: usize,
prev_punct: bool,
winfo: &'a WordInfo<'a>,
) -> std::io::Result<(usize, bool)> {
// compute the length of this word, considering how tabs will expand at this position on the line
@ -233,14 +233,14 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
linebreak: None,
break_before: false,
demerits: 0,
prev_rat: 0.0f32,
prev_rat: 0.0,
length: args.init_len,
fresh: false,
}];
// this vec holds the current active linebreaks; next_ holds the breaks that will be active for
// the next word
let active_breaks = &mut vec![0];
let next_active_breaks = &mut vec![];
let mut active_breaks = vec![0];
let mut next_active_breaks = vec![];
let stretch = (args.opts.width - args.opts.goal) as isize;
let minlength = args.opts.goal - stretch as usize;
@ -248,10 +248,7 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
let mut is_sentence_start = false;
let mut least_demerits = 0;
loop {
let w = match iter.next() {
None => break,
Some(w) => w,
};
let Some(w) = iter.next() else { break };
// if this is the last word, we don't add additional demerits for this break
let (is_last_word, is_sentence_end) = match iter.peek() {
@ -358,13 +355,13 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
least_demerits = cmp::max(ld_next, 0);
}
// swap in new list of active breaks
mem::swap(active_breaks, next_active_breaks);
mem::swap(&mut active_breaks, &mut next_active_breaks);
// If this was the last word in a sentence, the next one must be the first in the next.
is_sentence_start = is_sentence_end;
}
// return the best path
build_best_path(&linebreaks, active_breaks)
build_best_path(&linebreaks, &active_breaks)
}
fn build_best_path<'a>(paths: &[LineBreak<'a>], active: &[usize]) -> Vec<(&'a WordInfo<'a>, bool)> {

View file

@ -52,18 +52,22 @@ impl Line {
}
}
// each line's prefix has to be considered to know whether to merge it with
// the next line or not
/// Each line's prefix has to be considered to know whether to merge it with
/// the next line or not
#[derive(Debug)]
pub struct FileLine {
line: String,
indent_end: usize, // the end of the indent, always the start of the text
pfxind_end: usize, // the end of the PREFIX's indent, that is, the spaces before the prefix
indent_len: usize, // display length of indent taking into account tabs
prefix_len: usize, // PREFIX indent length taking into account tabs
/// The end of the indent, always the start of the text
indent_end: usize,
/// The end of the PREFIX's indent, that is, the spaces before the prefix
pfxind_end: usize,
/// Display length of indent taking into account tabs
indent_len: usize,
/// PREFIX indent length taking into account tabs
prefix_len: usize,
}
// iterator that produces a stream of Lines from a file
/// Iterator that produces a stream of Lines from a file
pub struct FileLines<'a> {
opts: &'a FmtOptions,
lines: Lines<&'a mut FileOrStdReader>,
@ -74,7 +78,7 @@ impl<'a> FileLines<'a> {
FileLines { opts, lines }
}
// returns true if this line should be formatted
/// returns true if this line should be formatted
fn match_prefix(&self, line: &str) -> (bool, usize) {
if !self.opts.use_prefix {
return (true, 0);
@ -83,7 +87,7 @@ impl<'a> FileLines<'a> {
FileLines::match_prefix_generic(&self.opts.prefix[..], line, self.opts.xprefix)
}
// returns true if this line should be formatted
/// returns true if this line should be formatted
fn match_anti_prefix(&self, line: &str) -> bool {
if !self.opts.use_anti_prefix {
return true;
@ -148,13 +152,7 @@ impl<'a> Iterator for FileLines<'a> {
type Item = Line;
fn next(&mut self) -> Option<Line> {
let n = match self.lines.next() {
Some(t) => match t {
Ok(tt) => tt,
Err(_) => return None,
},
None => return None,
};
let n = self.lines.next()?.ok()?;
// if this line is entirely whitespace,
// emit a blank line
@ -205,24 +203,33 @@ impl<'a> Iterator for FileLines<'a> {
}
}
// a paragraph : a collection of FileLines that are to be formatted
// plus info about the paragraph's indentation
// (but we only retain the String from the FileLine; the other info
// is only there to help us in deciding how to merge lines into Paragraphs
/// A paragraph : a collection of FileLines that are to be formatted
/// plus info about the paragraph's indentation
///
/// We only retain the String from the FileLine; the other info
/// is only there to help us in deciding how to merge lines into Paragraphs
#[derive(Debug)]
pub struct Paragraph {
lines: Vec<String>, // the lines of the file
pub init_str: String, // string representing the init, that is, the first line's indent
pub init_len: usize, // printable length of the init string considering TABWIDTH
init_end: usize, // byte location of end of init in first line String
pub indent_str: String, // string representing indent
pub indent_len: usize, // length of above
indent_end: usize, // byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward)
pub mail_header: bool, // we need to know if this is a mail header because we do word splitting differently in that case
/// the lines of the file
lines: Vec<String>,
/// string representing the init, that is, the first line's indent
pub init_str: String,
/// printable length of the init string considering TABWIDTH
pub init_len: usize,
/// byte location of end of init in first line String
init_end: usize,
/// string representing indent
pub indent_str: String,
/// length of above
pub indent_len: usize,
/// byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward)
indent_end: usize,
/// we need to know if this is a mail header because we do word splitting differently in that case
pub mail_header: bool,
}
// an iterator producing a stream of paragraphs from a stream of lines
// given a set of options.
/// An iterator producing a stream of paragraphs from a stream of lines
/// given a set of options.
pub struct ParagraphStream<'a> {
lines: Peekable<FileLines<'a>>,
next_mail: bool,
@ -240,7 +247,7 @@ impl<'a> ParagraphStream<'a> {
}
}
// detect RFC822 mail header
/// Detect RFC822 mail header
fn is_mail_header(line: &FileLine) -> bool {
// a mail header begins with either "From " (envelope sender line)
// or with a sequence of printable ASCII chars (33 to 126, inclusive,
@ -276,12 +283,9 @@ impl<'a> Iterator for ParagraphStream<'a> {
#[allow(clippy::cognitive_complexity)]
fn next(&mut self) -> Option<Result<Paragraph, String>> {
// return a NoFormatLine in an Err; it should immediately be output
let noformat = match self.lines.peek() {
None => return None,
Some(l) => match *l {
Line::FormatLine(_) => false,
Line::NoFormatLine(_, _) => true,
},
let noformat = match self.lines.peek()? {
Line::FormatLine(_) => false,
Line::NoFormatLine(_, _) => true,
};
// found a NoFormatLine, immediately dump it out
@ -305,95 +309,89 @@ impl<'a> Iterator for ParagraphStream<'a> {
let mut in_mail = false;
let mut second_done = false; // for when we use crown or tagged mode
loop {
{
// peek ahead
// need to explicitly force fl out of scope before we can call self.lines.next()
let fl = match self.lines.peek() {
None => break,
Some(l) => match *l {
Line::FormatLine(ref x) => x,
Line::NoFormatLine(..) => break,
},
};
// peek ahead
// need to explicitly force fl out of scope before we can call self.lines.next()
let Some(Line::FormatLine(fl)) = self.lines.peek() else {
break;
};
if p_lines.is_empty() {
// first time through the loop, get things set up
// detect mail header
if self.opts.mail && self.next_mail && ParagraphStream::is_mail_header(fl) {
in_mail = true;
// there can't be any indent or pfxind because otherwise is_mail_header
// would fail since there cannot be any whitespace before the colon in a
// valid header field
indent_str.push_str(" ");
indent_len = 2;
if p_lines.is_empty() {
// first time through the loop, get things set up
// detect mail header
if self.opts.mail && self.next_mail && ParagraphStream::is_mail_header(fl) {
in_mail = true;
// there can't be any indent or pfxind because otherwise is_mail_header
// would fail since there cannot be any whitespace before the colon in a
// valid header field
indent_str.push_str(" ");
indent_len = 2;
} else {
if self.opts.crown || self.opts.tagged {
init_str.push_str(&fl.line[..fl.indent_end]);
init_len = fl.indent_len;
init_end = fl.indent_end;
} else {
if self.opts.crown || self.opts.tagged {
init_str.push_str(&fl.line[..fl.indent_end]);
init_len = fl.indent_len;
init_end = fl.indent_end;
} else {
second_done = true;
}
// these will be overwritten in the 2nd line of crown or tagged mode, but
// we are not guaranteed to get to the 2nd line, e.g., if the next line
// is a NoFormatLine or None. Thus, we set sane defaults the 1st time around
indent_str.push_str(&fl.line[..fl.indent_end]);
indent_len = fl.indent_len;
indent_end = fl.indent_end;
// save these to check for matching lines
prefix_len = fl.prefix_len;
pfxind_end = fl.pfxind_end;
// in tagged mode, add 4 spaces of additional indenting by default
// (gnu fmt's behavior is different: it seems to find the closest column to
// indent_end that is divisible by 3. But honestly that behavior seems
// pretty arbitrary.
// Perhaps a better default would be 1 TABWIDTH? But ugh that's so big.
if self.opts.tagged {
indent_str.push_str(" ");
indent_len += 4;
}
}
} else if in_mail {
// lines following mail headers must begin with spaces
if fl.indent_end == 0 || (self.opts.use_prefix && fl.pfxind_end == 0) {
break; // this line does not begin with spaces
}
} else if !second_done {
// now we have enough info to handle crown margin and tagged mode
// in both crown and tagged modes we require that prefix_len is the same
if prefix_len != fl.prefix_len || pfxind_end != fl.pfxind_end {
break;
second_done = true;
}
// in tagged mode, indent has to be *different* on following lines
if self.opts.tagged
&& indent_len - 4 == fl.indent_len
&& indent_end == fl.indent_end
{
break;
}
// this is part of the same paragraph, get the indent info from this line
indent_str.clear();
// these will be overwritten in the 2nd line of crown or tagged mode, but
// we are not guaranteed to get to the 2nd line, e.g., if the next line
// is a NoFormatLine or None. Thus, we set sane defaults the 1st time around
indent_str.push_str(&fl.line[..fl.indent_end]);
indent_len = fl.indent_len;
indent_end = fl.indent_end;
second_done = true;
} else {
// detect mismatch
if indent_end != fl.indent_end
|| pfxind_end != fl.pfxind_end
|| indent_len != fl.indent_len
|| prefix_len != fl.prefix_len
{
break;
// save these to check for matching lines
prefix_len = fl.prefix_len;
pfxind_end = fl.pfxind_end;
// in tagged mode, add 4 spaces of additional indenting by default
// (gnu fmt's behavior is different: it seems to find the closest column to
// indent_end that is divisible by 3. But honestly that behavior seems
// pretty arbitrary.
// Perhaps a better default would be 1 TABWIDTH? But ugh that's so big.
if self.opts.tagged {
indent_str.push_str(" ");
indent_len += 4;
}
}
} else if in_mail {
// lines following mail headers must begin with spaces
if fl.indent_end == 0 || (self.opts.use_prefix && fl.pfxind_end == 0) {
break; // this line does not begin with spaces
}
} else if !second_done {
// now we have enough info to handle crown margin and tagged mode
// in both crown and tagged modes we require that prefix_len is the same
if prefix_len != fl.prefix_len || pfxind_end != fl.pfxind_end {
break;
}
// in tagged mode, indent has to be *different* on following lines
if self.opts.tagged
&& indent_len - 4 == fl.indent_len
&& indent_end == fl.indent_end
{
break;
}
// this is part of the same paragraph, get the indent info from this line
indent_str.clear();
indent_str.push_str(&fl.line[..fl.indent_end]);
indent_len = fl.indent_len;
indent_end = fl.indent_end;
second_done = true;
} else {
// detect mismatch
if indent_end != fl.indent_end
|| pfxind_end != fl.pfxind_end
|| indent_len != fl.indent_len
|| prefix_len != fl.prefix_len
{
break;
}
}
p_lines.push(self.lines.next().unwrap().get_formatline().line);
@ -429,7 +427,7 @@ pub struct ParaWords<'a> {
}
impl<'a> ParaWords<'a> {
pub fn new<'b>(opts: &'b FmtOptions, para: &'b Paragraph) -> ParaWords<'b> {
pub fn new(opts: &'a FmtOptions, para: &'a Paragraph) -> Self {
let mut pw = ParaWords {
opts,
para,