coreutils/fmt/linebreak.rs

/*
 * This file is part of `fmt` from the uutils coreutils package.
 *
 * (c) kwantam <kwantam@gmail.com>
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

use FmtOptions;
use parasplit::{Paragraph, ParaWords, WordInfo};

struct BreakArgs<'a> {
    opts       : &'a FmtOptions,
    init_len   : uint,
    indent_str : &'a str,
    indent_len : uint,
    uniform    : bool,
    ostream    : &'a mut Box<Writer>
}

impl<'a> BreakArgs<'a> {
    #[inline(always)]
    fn compute_width(&self, pre: uint, post: uint, posn: uint) -> uint {
        post + ((pre + posn) / self.opts.tabwidth + 1) * self.opts.tabwidth - posn
    }
}

pub fn break_lines(para: &Paragraph, opts: &FmtOptions, ostream: &mut Box<Writer>) {
    // indent
    let pIndent = para.indent_str.as_slice();
    let pIndentLen = para.indent_len;

    // words
    let pWords = ParaWords::new(opts, para);
    let mut pWords_words = pWords.words();

    // the first word will *always* appear on the first line
    // make sure of this here
    let (w, w_len) = match pWords_words.next() {
        Some(winfo) => (winfo.word, winfo.word_nchars),
        None => {
            silent_unwrap!(ostream.write_char('\n'));
            return;
        }
    };
    // print the init, if it exists, and get its length
    let pInitLen = w_len +
        if opts.crown || opts.tagged {
            // handle "init" portion
            silent_unwrap!(ostream.write(para.init_str.as_bytes()));
            para.init_len
        } else if !para.mail_header {
            // for non-(crown, tagged) that's the same as a normal indent
            silent_unwrap!(ostream.write(pIndent.as_bytes()));
            pIndentLen
        } else {
            // except that mail headers get no indent at all
            0
        };
    // write first word after writing init
    silent_unwrap!(ostream.write(w.as_bytes()));

    // does this paragraph require uniform spacing?
    let uniform = para.mail_header || opts.uniform;

    let mut break_args = BreakArgs {
        opts       : opts,
        init_len   : pInitLen,
        indent_str : pIndent,
        indent_len : pIndentLen,
        uniform    : uniform,
        ostream    : ostream
    };

    break_simple(&mut pWords_words, &mut break_args);
}

/*
 * break_simple implements the "tight" breaking algorithm: print words until
 * maxlength would be exceeded, then print a linebreak and indent and continue.
 * Note that any first line indent should already have been printed before
 * calling this function, and the displayed length of said indent passed as
 * args.init_len
 */
fn break_simple<'a,T: Iterator<&'a WordInfo<'a>>>(iter: &'a mut T, args: &mut BreakArgs<'a>) {
    iter.fold((args.init_len, false), |l, winfo| accum_words_simple(args, l, winfo));
    silent_unwrap!(args.ostream.write_char('\n'));
}

fn accum_words_simple<'a>(args: &mut BreakArgs<'a>, (l, prev_punct): (uint, bool), winfo: &'a WordInfo<'a>) -> (uint, bool) {
    // compute the length of this word, considering how tabs will expand at this position on the line
    let wlen = winfo.word_nchars +
        if winfo.before_tab.is_some() {
            args.compute_width(winfo.before_tab.unwrap(), winfo.after_tab, l)
        } else {
            winfo.after_tab
        };

    let splen =
        if args.uniform || winfo.new_line {
            if winfo.sentence_start || (winfo.new_line && prev_punct) { 2 }
            else { 1 }
        } else {
            0
        };

    if l + wlen + splen > args.opts.width {
        let wtrim = winfo.word.slice_from(winfo.word_start);
        silent_unwrap!(args.ostream.write_char('\n'));
        silent_unwrap!(args.ostream.write(args.indent_str.as_bytes()));
        silent_unwrap!(args.ostream.write(wtrim.as_bytes()));
        (args.indent_len + wtrim.len(), winfo.ends_punct)
    } else {
        if splen == 2 { silent_unwrap!(args.ostream.write("  ".as_bytes())); }
        else if splen == 1 { silent_unwrap!(args.ostream.write_char(' ')) }
        silent_unwrap!(args.ostream.write(winfo.word.as_bytes()));
        (l + wlen + splen, winfo.ends_punct)
    }
}

#[allow(dead_code)]
enum PreviousBreak<'a> {
    ParaStart,
    PrevBreak(&'a LineBreak<'a>)
}

#[allow(dead_code)]
struct LineBreak<'a> {
    prev       : PreviousBreak<'a>,
    breakafter : &'a str,
    demerits   : uint
}

// when comparing two LineBreaks, compare their demerits
#[allow(dead_code)]
impl<'a> PartialEq for LineBreak<'a> {
    fn eq(&self, other: &LineBreak) -> bool {
        self.demerits == other.demerits
    }
}

// NOTE "less than" in this case means "worse", i.e., more demerits
#[allow(dead_code)]
impl<'a> PartialOrd for LineBreak<'a> {
    fn lt(&self, other: &LineBreak) -> bool {
        self.demerits > other.demerits
    }
}

// we have to satisfy Eq to implement Ord
#[allow(dead_code)]
impl<'a> Eq for LineBreak<'a> {}

// NOTE again here we reverse the ordering:
// if other has more demerits, self is Greater
#[allow(dead_code)]
impl<'a> Ord for LineBreak<'a> {
    fn cmp(&self, other: &LineBreak) -> Ordering {
        other.demerits.cmp(&self.demerits)
    }
}
initial release of working fmt Note: for now, this version does not use Knuth-Plass, but everything else is in place with "greedy" breaking. All options (should) work, and performance is nearly on par with GNU fmt. Squashed commit of the following local commits: commit ebc12f5e7d19d351ada9273ec0c42d66d3730431 commit 125fdabcb2a32de161c7a8b76c3e766a40ff9f76 commit dadd62acc093b5bd4bc94ad4f8a499d2663a7097 commit e436fdaade3876e92020c61a736eba54eb5ca0cf commit bbc4f4f6ad749753efe9b2df871ddb257f33de4b commit 12bc4ecb0c56c0d43515a111e9129a4bfaf36531 commit 2e693553ed9af59c53ee13026d19c9f82f2973fc commit 9b15a130148d62dd6a1d2765848ddc4daf30c649 commit ea335eb2869afcc94709345118fab3fb2e612954 Merge: ee92573 23cc41d commit 23cc41d188cb3134c04872fd77acb331d86a64ea commit 2fa7c48133001d86da39feda04d870ff67e88400 commit eb71558ee46654b568adf167f194cb854bbf7056 commit c8baabc0b86d831b5741fa496c312134db652c55 commit ee4fab44b216c1d9c7dcdcdc29ca587c76284834 commit c5444416a531ae1341dddbfd528e4a3ee5f106bf commit e1177d47941654b8834d18599c80065943a26159 commit c7fb30e2ff32313974f99d34ba4735be064b0cc5 commit 99a9406bc6fff33fc64c190356e48f443312a6c4 commit 3d244d62c9b60b579f2e5b723da6389a5dbc8805 commit 2d4f09cb2ff83664730edba209ec129abdcf1403 commit 947c32b72bff8d50e362555ec21a6b848d5fec9f commit 8556d2a3467651ee7833ad800876af35a7dd5db7 commit a2e4bc3dc45e5f39b402e6fdd3e19edcea6d3c34 Merge: 0308884 439e65d commit 03088844f1fd2faca6c3471230730136dd140f35 commit ac80d888649dd1311fdaa68400ea45d52b2e23ab commit c1d6b36acb7038e14d5b3e1fb6a44614a3351f96 commit 6539b102593aa9d9570df8be99ca1a1bf01ea1f4 commit 439e65d3331936e00fa89a4b2f88c343b9e28c5b commit fac27de7c4918bc5cf1a1ac1a43550236ba8af4d commit 365989c5bbe5c2289648f6efbc3c9388388e30a0 commit 3dd71364cce9aaaa773fc88eb206aba31aa61390 2014-06-19 00:43:26 +00:00			`/*`
			* This file is part of `fmt` from the uutils coreutils package.
			`*`
			`* (c) kwantam <kwantam@gmail.com>`
			`*`
			`* For the full copyright and license information, please view the LICENSE`
			`* file that was distributed with this source code.`
			`*/`

fmt: correct tab support, better formatting In brief: - Lines no longer end with trailing whitespace. - fixed length calculation when tabs are present - word splitting gives more info to the line breaking process, which should be useful for K-P - code cleanup here and there - K-P is not implemented yet. That's next. There is some dead code in linebreak.rs that forms the basis for K-P. - Performance has regressed somewhat; we're now about 60% slower than GNU fmt (formerly about 20%), but we are basically on par with OpenBSD fmt. - addressed comments from Arcterus on PR This is a squash of the following local commits: 1feceb0 - address comments from Arcterus on PR b36aa90 - use word_nchars rather than w.len() for first word f44a629 - proper tab handling 4f57593 - added tab analysis info to WordInfo 211f4a5 - pass WordInfo by ref 80e14b9 - overhaul word splitting apparatus d29f2e6 - tidy up the breaking by passing arg struct pointer d8020df - lines blank save for prefix act as par separators 8bd7f1e - fixed tab behavior in -u a2387f7 - cleaner prefix handling ; cleanup ; prep for K-P 2014-06-19 21:18:29 +00:00			`use FmtOptions;`
			`use parasplit::{Paragraph, ParaWords, WordInfo};`

			`struct BreakArgs<'a> {`
			`opts : &'a FmtOptions,`
			`init_len : uint,`
			`indent_str : &'a str,`
			`indent_len : uint,`
			`uniform : bool,`
			`ostream : &'a mut Box<Writer>`
			`}`

			`impl<'a> BreakArgs<'a> {`
			`#[inline(always)]`
			`fn compute_width(&self, pre: uint, post: uint, posn: uint) -> uint {`
			`post + ((pre + posn) / self.opts.tabwidth + 1) * self.opts.tabwidth - posn`
			`}`
			`}`

			`pub fn break_lines(para: &Paragraph, opts: &FmtOptions, ostream: &mut Box<Writer>) {`
			`// indent`
			`let pIndent = para.indent_str.as_slice();`
			`let pIndentLen = para.indent_len;`

			`// words`
			`let pWords = ParaWords::new(opts, para);`
			`let mut pWords_words = pWords.words();`

			`// the first word will always appear on the first line`
			`// make sure of this here`
			`let (w, w_len) = match pWords_words.next() {`
			`Some(winfo) => (winfo.word, winfo.word_nchars),`
			`None => {`
			`silent_unwrap!(ostream.write_char('\n'));`
			`return;`
			`}`
			`};`
			`// print the init, if it exists, and get its length`
			`let pInitLen = w_len +`
			`if opts.crown \|\| opts.tagged {`
			`// handle "init" portion`
			`silent_unwrap!(ostream.write(para.init_str.as_bytes()));`
			`para.init_len`
			`} else if !para.mail_header {`
			`// for non-(crown, tagged) that's the same as a normal indent`
			`silent_unwrap!(ostream.write(pIndent.as_bytes()));`
			`pIndentLen`
initial release of working fmt Note: for now, this version does not use Knuth-Plass, but everything else is in place with "greedy" breaking. All options (should) work, and performance is nearly on par with GNU fmt. Squashed commit of the following local commits: commit ebc12f5e7d19d351ada9273ec0c42d66d3730431 commit 125fdabcb2a32de161c7a8b76c3e766a40ff9f76 commit dadd62acc093b5bd4bc94ad4f8a499d2663a7097 commit e436fdaade3876e92020c61a736eba54eb5ca0cf commit bbc4f4f6ad749753efe9b2df871ddb257f33de4b commit 12bc4ecb0c56c0d43515a111e9129a4bfaf36531 commit 2e693553ed9af59c53ee13026d19c9f82f2973fc commit 9b15a130148d62dd6a1d2765848ddc4daf30c649 commit ea335eb2869afcc94709345118fab3fb2e612954 Merge: ee92573 23cc41d commit 23cc41d188cb3134c04872fd77acb331d86a64ea commit 2fa7c48133001d86da39feda04d870ff67e88400 commit eb71558ee46654b568adf167f194cb854bbf7056 commit c8baabc0b86d831b5741fa496c312134db652c55 commit ee4fab44b216c1d9c7dcdcdc29ca587c76284834 commit c5444416a531ae1341dddbfd528e4a3ee5f106bf commit e1177d47941654b8834d18599c80065943a26159 commit c7fb30e2ff32313974f99d34ba4735be064b0cc5 commit 99a9406bc6fff33fc64c190356e48f443312a6c4 commit 3d244d62c9b60b579f2e5b723da6389a5dbc8805 commit 2d4f09cb2ff83664730edba209ec129abdcf1403 commit 947c32b72bff8d50e362555ec21a6b848d5fec9f commit 8556d2a3467651ee7833ad800876af35a7dd5db7 commit a2e4bc3dc45e5f39b402e6fdd3e19edcea6d3c34 Merge: 0308884 439e65d commit 03088844f1fd2faca6c3471230730136dd140f35 commit ac80d888649dd1311fdaa68400ea45d52b2e23ab commit c1d6b36acb7038e14d5b3e1fb6a44614a3351f96 commit 6539b102593aa9d9570df8be99ca1a1bf01ea1f4 commit 439e65d3331936e00fa89a4b2f88c343b9e28c5b commit fac27de7c4918bc5cf1a1ac1a43550236ba8af4d commit 365989c5bbe5c2289648f6efbc3c9388388e30a0 commit 3dd71364cce9aaaa773fc88eb206aba31aa61390 2014-06-19 00:43:26 +00:00			`} else {`
fmt: correct tab support, better formatting In brief: - Lines no longer end with trailing whitespace. - fixed length calculation when tabs are present - word splitting gives more info to the line breaking process, which should be useful for K-P - code cleanup here and there - K-P is not implemented yet. That's next. There is some dead code in linebreak.rs that forms the basis for K-P. - Performance has regressed somewhat; we're now about 60% slower than GNU fmt (formerly about 20%), but we are basically on par with OpenBSD fmt. - addressed comments from Arcterus on PR This is a squash of the following local commits: 1feceb0 - address comments from Arcterus on PR b36aa90 - use word_nchars rather than w.len() for first word f44a629 - proper tab handling 4f57593 - added tab analysis info to WordInfo 211f4a5 - pass WordInfo by ref 80e14b9 - overhaul word splitting apparatus d29f2e6 - tidy up the breaking by passing arg struct pointer d8020df - lines blank save for prefix act as par separators 8bd7f1e - fixed tab behavior in -u a2387f7 - cleaner prefix handling ; cleanup ; prep for K-P 2014-06-19 21:18:29 +00:00			`// except that mail headers get no indent at all`
			`0`
initial release of working fmt Note: for now, this version does not use Knuth-Plass, but everything else is in place with "greedy" breaking. All options (should) work, and performance is nearly on par with GNU fmt. Squashed commit of the following local commits: commit ebc12f5e7d19d351ada9273ec0c42d66d3730431 commit 125fdabcb2a32de161c7a8b76c3e766a40ff9f76 commit dadd62acc093b5bd4bc94ad4f8a499d2663a7097 commit e436fdaade3876e92020c61a736eba54eb5ca0cf commit bbc4f4f6ad749753efe9b2df871ddb257f33de4b commit 12bc4ecb0c56c0d43515a111e9129a4bfaf36531 commit 2e693553ed9af59c53ee13026d19c9f82f2973fc commit 9b15a130148d62dd6a1d2765848ddc4daf30c649 commit ea335eb2869afcc94709345118fab3fb2e612954 Merge: ee92573 23cc41d commit 23cc41d188cb3134c04872fd77acb331d86a64ea commit 2fa7c48133001d86da39feda04d870ff67e88400 commit eb71558ee46654b568adf167f194cb854bbf7056 commit c8baabc0b86d831b5741fa496c312134db652c55 commit ee4fab44b216c1d9c7dcdcdc29ca587c76284834 commit c5444416a531ae1341dddbfd528e4a3ee5f106bf commit e1177d47941654b8834d18599c80065943a26159 commit c7fb30e2ff32313974f99d34ba4735be064b0cc5 commit 99a9406bc6fff33fc64c190356e48f443312a6c4 commit 3d244d62c9b60b579f2e5b723da6389a5dbc8805 commit 2d4f09cb2ff83664730edba209ec129abdcf1403 commit 947c32b72bff8d50e362555ec21a6b848d5fec9f commit 8556d2a3467651ee7833ad800876af35a7dd5db7 commit a2e4bc3dc45e5f39b402e6fdd3e19edcea6d3c34 Merge: 0308884 439e65d commit 03088844f1fd2faca6c3471230730136dd140f35 commit ac80d888649dd1311fdaa68400ea45d52b2e23ab commit c1d6b36acb7038e14d5b3e1fb6a44614a3351f96 commit 6539b102593aa9d9570df8be99ca1a1bf01ea1f4 commit 439e65d3331936e00fa89a4b2f88c343b9e28c5b commit fac27de7c4918bc5cf1a1ac1a43550236ba8af4d commit 365989c5bbe5c2289648f6efbc3c9388388e30a0 commit 3dd71364cce9aaaa773fc88eb206aba31aa61390 2014-06-19 00:43:26 +00:00			`};`
fmt: correct tab support, better formatting In brief: - Lines no longer end with trailing whitespace. - fixed length calculation when tabs are present - word splitting gives more info to the line breaking process, which should be useful for K-P - code cleanup here and there - K-P is not implemented yet. That's next. There is some dead code in linebreak.rs that forms the basis for K-P. - Performance has regressed somewhat; we're now about 60% slower than GNU fmt (formerly about 20%), but we are basically on par with OpenBSD fmt. - addressed comments from Arcterus on PR This is a squash of the following local commits: 1feceb0 - address comments from Arcterus on PR b36aa90 - use word_nchars rather than w.len() for first word f44a629 - proper tab handling 4f57593 - added tab analysis info to WordInfo 211f4a5 - pass WordInfo by ref 80e14b9 - overhaul word splitting apparatus d29f2e6 - tidy up the breaking by passing arg struct pointer d8020df - lines blank save for prefix act as par separators 8bd7f1e - fixed tab behavior in -u a2387f7 - cleaner prefix handling ; cleanup ; prep for K-P 2014-06-19 21:18:29 +00:00			`// write first word after writing init`
initial release of working fmt Note: for now, this version does not use Knuth-Plass, but everything else is in place with "greedy" breaking. All options (should) work, and performance is nearly on par with GNU fmt. Squashed commit of the following local commits: commit ebc12f5e7d19d351ada9273ec0c42d66d3730431 commit 125fdabcb2a32de161c7a8b76c3e766a40ff9f76 commit dadd62acc093b5bd4bc94ad4f8a499d2663a7097 commit e436fdaade3876e92020c61a736eba54eb5ca0cf commit bbc4f4f6ad749753efe9b2df871ddb257f33de4b commit 12bc4ecb0c56c0d43515a111e9129a4bfaf36531 commit 2e693553ed9af59c53ee13026d19c9f82f2973fc commit 9b15a130148d62dd6a1d2765848ddc4daf30c649 commit ea335eb2869afcc94709345118fab3fb2e612954 Merge: ee92573 23cc41d commit 23cc41d188cb3134c04872fd77acb331d86a64ea commit 2fa7c48133001d86da39feda04d870ff67e88400 commit eb71558ee46654b568adf167f194cb854bbf7056 commit c8baabc0b86d831b5741fa496c312134db652c55 commit ee4fab44b216c1d9c7dcdcdc29ca587c76284834 commit c5444416a531ae1341dddbfd528e4a3ee5f106bf commit e1177d47941654b8834d18599c80065943a26159 commit c7fb30e2ff32313974f99d34ba4735be064b0cc5 commit 99a9406bc6fff33fc64c190356e48f443312a6c4 commit 3d244d62c9b60b579f2e5b723da6389a5dbc8805 commit 2d4f09cb2ff83664730edba209ec129abdcf1403 commit 947c32b72bff8d50e362555ec21a6b848d5fec9f commit 8556d2a3467651ee7833ad800876af35a7dd5db7 commit a2e4bc3dc45e5f39b402e6fdd3e19edcea6d3c34 Merge: 0308884 439e65d commit 03088844f1fd2faca6c3471230730136dd140f35 commit ac80d888649dd1311fdaa68400ea45d52b2e23ab commit c1d6b36acb7038e14d5b3e1fb6a44614a3351f96 commit 6539b102593aa9d9570df8be99ca1a1bf01ea1f4 commit 439e65d3331936e00fa89a4b2f88c343b9e28c5b commit fac27de7c4918bc5cf1a1ac1a43550236ba8af4d commit 365989c5bbe5c2289648f6efbc3c9388388e30a0 commit 3dd71364cce9aaaa773fc88eb206aba31aa61390 2014-06-19 00:43:26 +00:00			`silent_unwrap!(ostream.write(w.as_bytes()));`
fmt: correct tab support, better formatting In brief: - Lines no longer end with trailing whitespace. - fixed length calculation when tabs are present - word splitting gives more info to the line breaking process, which should be useful for K-P - code cleanup here and there - K-P is not implemented yet. That's next. There is some dead code in linebreak.rs that forms the basis for K-P. - Performance has regressed somewhat; we're now about 60% slower than GNU fmt (formerly about 20%), but we are basically on par with OpenBSD fmt. - addressed comments from Arcterus on PR This is a squash of the following local commits: 1feceb0 - address comments from Arcterus on PR b36aa90 - use word_nchars rather than w.len() for first word f44a629 - proper tab handling 4f57593 - added tab analysis info to WordInfo 211f4a5 - pass WordInfo by ref 80e14b9 - overhaul word splitting apparatus d29f2e6 - tidy up the breaking by passing arg struct pointer d8020df - lines blank save for prefix act as par separators 8bd7f1e - fixed tab behavior in -u a2387f7 - cleaner prefix handling ; cleanup ; prep for K-P 2014-06-19 21:18:29 +00:00
			`// does this paragraph require uniform spacing?`
			`let uniform = para.mail_header \|\| opts.uniform;`

			`let mut break_args = BreakArgs {`
			`opts : opts,`
			`init_len : pInitLen,`
			`indent_str : pIndent,`
			`indent_len : pIndentLen,`
			`uniform : uniform,`
			`ostream : ostream`
			`};`

			`break_simple(&mut pWords_words, &mut break_args);`
initial release of working fmt Note: for now, this version does not use Knuth-Plass, but everything else is in place with "greedy" breaking. All options (should) work, and performance is nearly on par with GNU fmt. Squashed commit of the following local commits: commit ebc12f5e7d19d351ada9273ec0c42d66d3730431 commit 125fdabcb2a32de161c7a8b76c3e766a40ff9f76 commit dadd62acc093b5bd4bc94ad4f8a499d2663a7097 commit e436fdaade3876e92020c61a736eba54eb5ca0cf commit bbc4f4f6ad749753efe9b2df871ddb257f33de4b commit 12bc4ecb0c56c0d43515a111e9129a4bfaf36531 commit 2e693553ed9af59c53ee13026d19c9f82f2973fc commit 9b15a130148d62dd6a1d2765848ddc4daf30c649 commit ea335eb2869afcc94709345118fab3fb2e612954 Merge: ee92573 23cc41d commit 23cc41d188cb3134c04872fd77acb331d86a64ea commit 2fa7c48133001d86da39feda04d870ff67e88400 commit eb71558ee46654b568adf167f194cb854bbf7056 commit c8baabc0b86d831b5741fa496c312134db652c55 commit ee4fab44b216c1d9c7dcdcdc29ca587c76284834 commit c5444416a531ae1341dddbfd528e4a3ee5f106bf commit e1177d47941654b8834d18599c80065943a26159 commit c7fb30e2ff32313974f99d34ba4735be064b0cc5 commit 99a9406bc6fff33fc64c190356e48f443312a6c4 commit 3d244d62c9b60b579f2e5b723da6389a5dbc8805 commit 2d4f09cb2ff83664730edba209ec129abdcf1403 commit 947c32b72bff8d50e362555ec21a6b848d5fec9f commit 8556d2a3467651ee7833ad800876af35a7dd5db7 commit a2e4bc3dc45e5f39b402e6fdd3e19edcea6d3c34 Merge: 0308884 439e65d commit 03088844f1fd2faca6c3471230730136dd140f35 commit ac80d888649dd1311fdaa68400ea45d52b2e23ab commit c1d6b36acb7038e14d5b3e1fb6a44614a3351f96 commit 6539b102593aa9d9570df8be99ca1a1bf01ea1f4 commit 439e65d3331936e00fa89a4b2f88c343b9e28c5b commit fac27de7c4918bc5cf1a1ac1a43550236ba8af4d commit 365989c5bbe5c2289648f6efbc3c9388388e30a0 commit 3dd71364cce9aaaa773fc88eb206aba31aa61390 2014-06-19 00:43:26 +00:00			`}`
fmt: correct tab support, better formatting In brief: - Lines no longer end with trailing whitespace. - fixed length calculation when tabs are present - word splitting gives more info to the line breaking process, which should be useful for K-P - code cleanup here and there - K-P is not implemented yet. That's next. There is some dead code in linebreak.rs that forms the basis for K-P. - Performance has regressed somewhat; we're now about 60% slower than GNU fmt (formerly about 20%), but we are basically on par with OpenBSD fmt. - addressed comments from Arcterus on PR This is a squash of the following local commits: 1feceb0 - address comments from Arcterus on PR b36aa90 - use word_nchars rather than w.len() for first word f44a629 - proper tab handling 4f57593 - added tab analysis info to WordInfo 211f4a5 - pass WordInfo by ref 80e14b9 - overhaul word splitting apparatus d29f2e6 - tidy up the breaking by passing arg struct pointer d8020df - lines blank save for prefix act as par separators 8bd7f1e - fixed tab behavior in -u a2387f7 - cleaner prefix handling ; cleanup ; prep for K-P 2014-06-19 21:18:29 +00:00
			`/*`
			`* break_simple implements the "tight" breaking algorithm: print words until`
			`* maxlength would be exceeded, then print a linebreak and indent and continue.`
			`* Note that any first line indent should already have been printed before`
			`* calling this function, and the displayed length of said indent passed as`
			`* args.init_len`
			`*/`
			`fn break_simple<'a,T: Iterator<&'a WordInfo<'a>>>(iter: &'a mut T, args: &mut BreakArgs<'a>) {`
			`iter.fold((args.init_len, false), \|l, winfo\| accum_words_simple(args, l, winfo));`
			`silent_unwrap!(args.ostream.write_char('\n'));`
			`}`

			`fn accum_words_simple<'a>(args: &mut BreakArgs<'a>, (l, prev_punct): (uint, bool), winfo: &'a WordInfo<'a>) -> (uint, bool) {`
			`// compute the length of this word, considering how tabs will expand at this position on the line`
			`let wlen = winfo.word_nchars +`
			`if winfo.before_tab.is_some() {`
			`args.compute_width(winfo.before_tab.unwrap(), winfo.after_tab, l)`
			`} else {`
			`winfo.after_tab`
			`};`

			`let splen =`
			`if args.uniform \|\| winfo.new_line {`
			`if winfo.sentence_start \|\| (winfo.new_line && prev_punct) { 2 }`
			`else { 1 }`
			`} else {`
			`0`
			`};`

			`if l + wlen + splen > args.opts.width {`
			`let wtrim = winfo.word.slice_from(winfo.word_start);`
			`silent_unwrap!(args.ostream.write_char('\n'));`
			`silent_unwrap!(args.ostream.write(args.indent_str.as_bytes()));`
			`silent_unwrap!(args.ostream.write(wtrim.as_bytes()));`
			`(args.indent_len + wtrim.len(), winfo.ends_punct)`
			`} else {`
			`if splen == 2 { silent_unwrap!(args.ostream.write(" ".as_bytes())); }`
			`else if splen == 1 { silent_unwrap!(args.ostream.write_char(' ')) }`
			`silent_unwrap!(args.ostream.write(winfo.word.as_bytes()));`
			`(l + wlen + splen, winfo.ends_punct)`
			`}`
			`}`

			`#[allow(dead_code)]`
			`enum PreviousBreak<'a> {`
			`ParaStart,`
			`PrevBreak(&'a LineBreak<'a>)`
			`}`

			`#[allow(dead_code)]`
			`struct LineBreak<'a> {`
			`prev : PreviousBreak<'a>,`
			`breakafter : &'a str,`
			`demerits : uint`
			`}`

			`// when comparing two LineBreaks, compare their demerits`
			`#[allow(dead_code)]`
			`impl<'a> PartialEq for LineBreak<'a> {`
			`fn eq(&self, other: &LineBreak) -> bool {`
			`self.demerits == other.demerits`
			`}`
			`}`

			`// NOTE "less than" in this case means "worse", i.e., more demerits`
			`#[allow(dead_code)]`
			`impl<'a> PartialOrd for LineBreak<'a> {`
			`fn lt(&self, other: &LineBreak) -> bool {`
			`self.demerits > other.demerits`
			`}`
			`}`

			`// we have to satisfy Eq to implement Ord`
			`#[allow(dead_code)]`
			`impl<'a> Eq for LineBreak<'a> {}`

			`// NOTE again here we reverse the ordering:`
			`// if other has more demerits, self is Greater`
			`#[allow(dead_code)]`
			`impl<'a> Ord for LineBreak<'a> {`
			`fn cmp(&self, other: &LineBreak) -> Ordering {`
			`other.demerits.cmp(&self.demerits)`
			`}`
			`}`