coreutils/src/ptx/ptx.rs

#![crate_name = "ptx"]
#![feature(convert, slice_chars, vec_push_all)]

/*
 * This file is part of the uutils coreutils package.
 *
 * (c) Dorota Kapturkiewicz <dokaptur@gmail.com>
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

extern crate aho_corasick;
extern crate getopts;
extern crate memchr;
extern crate regex_syntax;
extern crate regex;
 
use std::collections::{HashMap, HashSet, BTreeSet};
use std::default::Default;
use std::fs::File;
use getopts::{Options, Matches};
use std::io::{stdin, stdout, BufReader, BufWriter, BufRead, Read, Write};
use regex::Regex;
use std::cmp;


#[path = "../common/util.rs"]
#[macro_use]
mod util;

static NAME: &'static str = "ptx";
static VERSION: &'static str = "1.0.0";

#[derive(Debug)]
enum OutFormat {
    Dumb, 
    Roff, 
    Tex,
}

#[derive(Debug)]
struct Config {
    format : OutFormat,
    gnu_ext : bool,
    auto_ref : bool,
    input_ref : bool,
    right_ref : bool,
    ignore_case : bool,
    macro_name : String,
    trunc_str : String,
    context_regex : String,
    line_width : usize,
    gap_size : usize,
}

impl Default for Config {
    fn default() -> Config {
        Config {
            format : OutFormat::Dumb,
            gnu_ext : true,
            auto_ref : false,
            input_ref : false,
            right_ref : false,
            ignore_case : false,
            macro_name : "xx".to_string(),
            trunc_str : "/".to_string(),
            context_regex : "\\w+".to_string(),
            line_width : 72,
            gap_size : 3
        }
    }
}

fn read_word_filter_file(matches: &Matches, option: &str) -> HashSet<String> {
    let filename = matches.opt_str(option).expect("parsing options failed!");
    let reader = BufReader::new(crash_if_err!(1, File::open(filename)));
    let mut words: HashSet<String> = HashSet::new();
    for word in reader.lines() {
        words.insert(crash_if_err!(1, word));
    }
    words
}

#[derive(Debug)]
struct WordFilter {
    only_specified: bool,
    ignore_specified: bool,
    only_set: HashSet<String>,
    ignore_set: HashSet<String>,
    word_regex: String,
}

impl WordFilter {
   fn new(matches: &Matches, config: &Config) -> WordFilter {
        let (o, oset): (bool, HashSet<String>) = 
            if matches.opt_present("o") {
                (true, read_word_filter_file(matches, "o"))
            } else {
                (false, HashSet::new())
            };
        let (i, iset): (bool, HashSet<String>) = 
            if matches.opt_present("i") {
                (true, read_word_filter_file(matches, "i"))
            } else {
                (false, HashSet::new())
            };
        if matches.opt_present("b") {
            crash!(1, "-b not implemented yet");
        }
        let reg = 
            if matches.opt_present("W") {
                matches.opt_str("W").expect("parsing options failed!")
            } else if config.gnu_ext {
                "\\w+".to_string()
            } else {
                "[^ \t\n]+".to_string()
            };
        WordFilter { 
            only_specified: o, 
            ignore_specified: i, 
            only_set: oset, 
            ignore_set: iset,
            word_regex: reg
        }
    }
}

#[derive(Debug, PartialOrd, PartialEq, Eq, Ord)]
struct WordRef {
    word: String,
    global_line_nr: usize,
    local_line_nr: usize,
    position: usize,
    position_end: usize,
    filename: String,
}

fn print_version() {
    println!("{} {}", NAME, VERSION);
}

fn print_usage(opts: &Options) {
    let brief = "Usage: ptx [OPTION]... [INPUT]...   (without -G) or: \
        ptx -G [OPTION]... [INPUT [OUTPUT]] \n Output a permuted index, \
        including context, of the words in the input files. \n\n Mandatory \
        arguments to long options are mandatory for short options too.";
    let explaination = "With no FILE, or when FILE is -, read standard input. \
        Default is '-F /'.";
    println!("{}\n{}", opts.usage(&brief), explaination);
}


fn get_config(matches: &Matches) -> Config {
    let mut config: Config = Default::default();
    let err_msg = "parsing options failed";
    if matches.opt_present("G") {
        config.gnu_ext = false;
        config.format = OutFormat::Roff;
        config.context_regex = "[^ \t\n]+".to_string();
    } else {
        crash!(1, "GNU extensions not implemented yet");
    }
    if matches.opt_present("S") {
        crash!(1, "-S not implemented yet");
    }
    config.auto_ref = matches.opt_present("A");
    config.input_ref = matches.opt_present("r");
    config.right_ref &= matches.opt_present("R");
    config.ignore_case = matches.opt_present("f"); 
    if matches.opt_present("M") {
        config.macro_name = 
            matches.opt_str("M").expect(err_msg).to_string();
    }
    if matches.opt_present("F") {
        config.trunc_str = 
            matches.opt_str("F").expect(err_msg).to_string();
    }
    if matches.opt_present("w") {
        let width_str = matches.opt_str("w").expect(err_msg);
        config.line_width = crash_if_err!(
            1, usize::from_str_radix(width_str.as_str(), 10));
    }
    if matches.opt_present("g") {
        let gap_str = matches.opt_str("g").expect(err_msg);
        config.gap_size = crash_if_err!(
            1, usize::from_str_radix(gap_str.as_str(), 10));
    }
    if matches.opt_present("O") {
        config.format = OutFormat::Roff;
    }
    if matches.opt_present("T") {
        config.format = OutFormat::Tex;
    }
    config
}

fn read_input(input_files: Vec<&str>, config: &Config) ->
             HashMap<String, (Vec<String>, usize)> {
    let mut file_map : HashMap<String, (Vec<String>, usize)> = 
        HashMap::new();
    let mut files = Vec::new();
    if input_files.is_empty() {
        files.push("-");
    } else {
        if config.gnu_ext {
            files.push_all(input_files.as_slice());
        } else {
            files.push(input_files[0]);
        }
    }
    let mut lines_so_far: usize = 0;
    for filename in files {
        let reader: BufReader<Box<Read>> = BufReader::new(
            if filename == "-" {
                Box::new(stdin())
            } else {
                let file = crash_if_err!(1, File::open(filename));
                Box::new(file)
            });
        let lines: Vec<String> = reader.lines().map(|x| crash_if_err!(1, x))
            .collect();
        let size = lines.len();
        file_map.insert(filename.to_string(), (lines, lines_so_far));
        lines_so_far += size
    }
    file_map
}

fn create_word_set(config: &Config, filter: &WordFilter, 
                  file_map: &HashMap<String, (Vec<String>, usize)>)-> 
                  BTreeSet<WordRef> {
    let reg = Regex::new(filter.word_regex.as_str()).unwrap();
    let ref_reg = Regex::new(config.context_regex.as_str()).unwrap();
    let mut word_set: BTreeSet<WordRef> = BTreeSet::new();
    for (file, lines) in file_map.iter() {
        let mut count: usize = 0;
        let offs = lines.1;
        for line in (lines.0).iter() {
            // if -r, exclude reference from word set
            let (ref_beg, ref_end) = match ref_reg.find(line) {
                Some(x) => x,
                None => (0,0)
            };
            // match words with given regex
            for (beg, end) in reg.find_iter(line) {
                if config.input_ref && ((beg, end) == (ref_beg, ref_end)) {
                    continue;
                }
                let mut word = line.slice_chars(beg, end).to_string();
                if filter.only_specified && 
                   !(filter.only_set.contains(&word)) {
                    continue;
                }
                if filter.ignore_specified && 
                   filter.ignore_set.contains(&word) {
                    continue;
                }
                if config.ignore_case {
                    word = word.to_lowercase();
                }
                word_set.insert(WordRef{
                    word: word,
                    filename: String::from(file.as_str()),
                    global_line_nr: offs + count,
                    local_line_nr: count,
                    position: beg,
                    position_end: end
                });
            }
            count += 1;
        }
    }
    word_set
}

fn get_reference(config: &Config, word_ref: &WordRef, line: &String) -> 
                String {
    if config.auto_ref {
        format!("{}:{}", word_ref.filename, word_ref.local_line_nr + 1)
    } else if config.input_ref {
        let reg = Regex::new(config.context_regex.as_str()).unwrap();
        let (beg, end) = match reg.find(line) {
            Some(x) => x,
            None => (0,0)
        };
        format!("{}", line.slice_chars(beg, end))
    } else {
        String::new()
    }
}

fn assert_str_integrity(s: &Vec<char>, beg: usize, end: usize) {
    assert!(beg <= end);
    assert!(end <= s.len());
}

fn trim_broken_word_left(s: &Vec<char>, beg: usize, end: usize) -> usize {
    assert_str_integrity(s, beg, end);
    if beg == end || beg == 0 || s[beg].is_whitespace() || 
       s[beg-1].is_whitespace() {
        return beg;
    }
    let mut b = beg;
    while b < end && !s[b].is_whitespace() {
        b += 1;
    }
    b
}

fn trim_broken_word_right(s: &Vec<char>, beg: usize, end: usize) -> usize {
    assert_str_integrity(s, beg, end);
    if beg == end || end == s.len() || s[end-1].is_whitespace() || 
       s[end].is_whitespace() {
        return end;
    }
    let mut e = end;
    while beg < e && !s[e-1].is_whitespace() {
        e -= 1;
    }
    e
}

fn trim_idx(s: &Vec<char>, beg: usize, end: usize) -> (usize, usize) {
    assert_str_integrity(s, beg, end);
    let mut b = beg;
    let mut e = end;
    while b < e && s[b].is_whitespace() {
        b += 1;
    }
    while b < e && s[e-1].is_whitespace() {
        e -= 1;
    }
    (b,e)
}


fn get_output_chunks(all_before: &String, keyword: &String, all_after: &String,
                    config: &Config) -> (String, String, String, String) {
    assert!(all_before.trim() == all_before.as_str());
    assert!(keyword.trim() == keyword.as_str());
    assert!(all_after.trim() == all_after.as_str());
    let mut head = String::new();
    let mut before = String::new();
    let mut after = String::new();
    let mut tail = String::new();
    
    let half_line_size = cmp::max((config.line_width/2) as isize - 
        (2*config.trunc_str.len()) as isize, 0) as usize; 
    let max_after_size = cmp::max(half_line_size as isize - 
        keyword.len() as isize - 1, 0) as usize;
    let max_before_size = half_line_size; 
    let all_before_vec: Vec<char> = all_before.chars().collect();
    let all_after_vec: Vec<char> = all_after.chars().collect();
    
    // get before
    let mut bb_tmp = 
        cmp::max(all_before.len() as isize - max_before_size as isize, 0) as usize;
    bb_tmp = trim_broken_word_left(&all_before_vec, bb_tmp, all_before.len());
    let (before_beg, before_end) = 
        trim_idx(&all_before_vec, bb_tmp, all_before.len());
    before.push_str(all_before.slice_chars(before_beg, before_end));
    assert!(max_before_size >= before.len());
    
    // get after
    let mut ae_tmp = cmp::min(max_after_size, all_after.len());
    ae_tmp = trim_broken_word_right(&all_after_vec, 0, ae_tmp);
    let (after_beg, after_end) = trim_idx(&all_after_vec, 0, ae_tmp);
    after.push_str(all_after.slice_chars(after_beg, after_end));
    assert!(max_after_size >= after.len()); 
    
    // get tail
    let max_tail_size = max_before_size - before.len();
    let (tb, _) = trim_idx(&all_after_vec, after_end, all_after.len());
    let mut te_tmp = cmp::min(tb + max_tail_size, all_after.len()); 
    te_tmp = trim_broken_word_right(&all_after_vec, tb, te_tmp);
    let (tail_beg, tail_end) = trim_idx(&all_after_vec, tb, te_tmp);
    tail.push_str(all_after.slice_chars(tail_beg, tail_end));
    
    // get head
    let max_head_size = max_after_size - after.len();
    let (_, he) = trim_idx(&all_before_vec, 0, before_beg);
    let mut hb_tmp = 
        cmp::max(he as isize - max_head_size as isize, 0) as usize;
    hb_tmp = trim_broken_word_left(&all_before_vec, hb_tmp, he);
    let (head_beg, head_end) = trim_idx(&all_before_vec, hb_tmp, he);
    head.push_str(all_before.slice_chars(head_beg, head_end));
    
    // put right context truncation string if needed
    if after_end != all_after.len() && tail_beg == tail_end {
        after.push_str(config.trunc_str.as_str());
    } else if after_end != all_after.len() && tail_end != all_after.len() {
        tail.push_str(config.trunc_str.as_str());
    }
    
    // put left context truncation string if needed
    if before_beg != 0 && head_beg == head_end {
        before = format!("{}{}", config.trunc_str, before);
    } else if before_beg != 0 && head_beg != 0 {
        head = format!("{}{}", config.trunc_str, head);
    }
    
    // add space before "after" if needed
    if after.len() > 0 {
        after = format!(" {}", after);
    }
    
    (tail, before, after, head)
}

fn tex_mapper(x: char) -> String {
    match x {
        '\\' => "\\backslash{}".to_string(),
        '$' | '%' | '#' | '&' | '_' => format!("\\{}", x),
        '}' | '{' => format!("$\\{}$", x),
        _ => x.to_string()
    }
}

fn adjust_tex_str(context: &str) -> String {
    let ws_reg = Regex::new(r"[\t\n\v\f\r ]").unwrap();
    let mut fix: String = ws_reg.replace_all(context, " ").trim().to_string();
    let mapped_chunks: Vec<String> = fix.chars().map(tex_mapper).collect();
    fix = mapped_chunks.join("");
    fix
}

fn format_tex_line(config: &Config, word_ref: &WordRef, line: &String, 
                  reference: &String) -> String {
    let mut output = String::new();
    output.push_str(&format!("\\{} ", config.macro_name));
    let all_before = if config.input_ref {
        let before = line.slice_chars(0, word_ref.position);
        adjust_tex_str(before.trim().trim_left_matches(reference))
    } else {
        adjust_tex_str(line.slice_chars(0, word_ref.position))
    };
    let keyword = adjust_tex_str(
        line.slice_chars(word_ref.position, word_ref.position_end));
    let all_after = adjust_tex_str(
        line.slice_chars(word_ref.position_end, line.len()));
    let (tail, before, after, head) = 
        get_output_chunks(&all_before, &keyword, &all_after, &config);
    output.push_str(format!("{5}{0}{6}{5}{1}{6}{5}{2}{6}{5}{3}{6}{5}{4}{6}", 
        tail, before, keyword, after, head, "{", "}").as_str());
    if config.auto_ref || config.input_ref {
        output.push_str(
            &format!("{}{}{}", "{", adjust_tex_str(&reference), "}"));
    }
    output
}

fn adjust_roff_str(context: &str) -> String {
    let ws_reg = Regex::new(r"[\t\n\v\f\r]").unwrap();
    ws_reg.replace_all(context, " ").replace("\"", "\"\"").trim().to_string()
}

fn format_roff_line(config: &Config, word_ref: &WordRef, line: &str, 
                   reference: &str) -> String {
    let mut output = String::new();
    output.push_str(&format!(".{}", config.macro_name));
    let all_before = if config.input_ref {
        let before = line.slice_chars(0, word_ref.position);
        adjust_roff_str(before.trim().trim_left_matches(reference))
    } else {
        adjust_roff_str(line.slice_chars(0, word_ref.position))
    };
    let keyword = adjust_roff_str(
        line.slice_chars(word_ref.position, word_ref.position_end));
    let all_after = adjust_roff_str(
        line.slice_chars(word_ref.position_end, line.len()));
    let (tail, before, after, head) = 
        get_output_chunks(&all_before, &keyword, &all_after, &config);
    output.push_str(format!(" \"{}\" \"{}\" \"{}{}\" \"{}\"", 
        tail, before, keyword, after, head).as_str());
    if config.auto_ref || config.input_ref {
        output.push_str(&format!(" \"{}\"", adjust_roff_str(&reference)));
    }
    output
}

fn write_traditional_output(config: &Config, 
                            file_map: &HashMap<String, (Vec<String>,usize)>, 
                            words: &BTreeSet<WordRef>, output_filename: &str) {
    let mut writer: BufWriter<Box<Write>> = BufWriter::new(
    if output_filename == "-" {
        Box::new(stdout())
    } else {
        let file = crash_if_err!(1, File::create(output_filename));
        Box::new(file)
    });
    for word_ref in words.iter() {
        let file_map_value : &(Vec<String>, usize) =
            file_map.get(&(word_ref.filename))
                .expect("Missing file in file map");
        let (ref lines, _) = *(file_map_value);
        let reference = 
            get_reference(config, word_ref, &lines[word_ref.local_line_nr]);
        let output_line: String = match config.format {
            OutFormat::Tex => format_tex_line(
                config, word_ref, &lines[word_ref.local_line_nr], &reference),
            OutFormat::Roff => format_roff_line(
                config, word_ref, &lines[word_ref.local_line_nr], &reference),
            OutFormat::Dumb => crash!(
                1, "There is no dumb format with GNU extensions disabled")
        };
        crash_if_err!(1, writeln!(writer, "{}", output_line));
    }
}

pub fn uumain(args: Vec<String>) -> i32 {
    let mut opts = Options::new();
    opts.optflag("A", "auto-reference", 
        "output automatically generated references");
    opts.optflag("G", "traditional", "behave more like System V 'ptx'");
    opts.optopt("F", "flag-truncation", 
        "use STRING for flagging line truncations", "STRING");
    opts.optopt("M", "macro-name", "macro name to use instead of 'xx'", 
        "STRING");
    opts.optflag("O", "format=roff", "generate output as roff directives");
    opts.optflag("R", "right-side-refs", 
        "put references at right, not counted in -w");
    opts.optopt("S", "sentence-regexp", "for end of lines or end of sentences",
        "REGEXP");
    opts.optflag("T", "format=tex", "generate output as TeX directives");
    opts.optopt("W", "word-regexp", "use REGEXP to match each keyword", 
        "REGEXP");
    opts.optopt("b", "break-file", "word break characters in this FILE", 
        "FILE");
    opts.optflag("f", "ignore-case", 
        "fold lower case to upper case for sorting");
    opts.optopt("g", "gap-size", "gap size in columns between output fields", 
        "NUMBER");
    opts.optopt("i", "ignore-file", "read ignore word list from FILE", "FILE");
    opts.optopt("o", "only-file", "read only word list from this FILE", 
        "FILE");
    opts.optflag("r", "references", "first field of each line is a reference");
    opts.optopt("w", "width", "output width in columns, reference excluded",
        "NUMBER");
    opts.optflag("", "help", "display this help and exit");
    opts.optflag("", "version", "output version information and exit");
    
    let matches = return_if_err!(1, opts.parse(&args[1..]));
    
    if matches.opt_present("help") {
        print_usage(&opts);
        return 0;
    }
    if matches.opt_present("version") {
        print_version();
        return 0;
    }
    let config = get_config(&matches);
    let word_filter = WordFilter::new(&matches, &config);
    let file_map = 
        read_input(matches.free.iter().map(|x| x.as_str()).collect(), &config);
    let word_set = create_word_set(&config, &word_filter, &file_map);
    let output_file = if !config.gnu_ext && matches.free.len() == 2 {
        matches.free[1].as_str()
    } else {
        "-"
    };
    write_traditional_output(&config, &file_map, &word_set, output_file);
    0
}
initial ptx commit 2015-01-27 15:37:07 +00:00			`#![crate_name = "ptx"]`
Update to nightly build. I updated the library dependencies, features, and deprecated methods. 2015-06-24 03:00:00 +00:00			`#![feature(convert, slice_chars, vec_push_all)]`
initial ptx commit 2015-01-27 15:37:07 +00:00
			`/*`
			`* This file is part of the uutils coreutils package.`
			`*`
			`* (c) Dorota Kapturkiewicz <dokaptur@gmail.com>`
			`*`
			`* For the full copyright and license information, please view the LICENSE`
			`* file that was distributed with this source code.`
			`*/`
Update to nightly build. I updated the library dependencies, features, and deprecated methods. 2015-06-24 03:00:00 +00:00
			`extern crate aho_corasick;`
initial ptx commit 2015-01-27 15:37:07 +00:00			`extern crate getopts;`
Update to nightly build. I updated the library dependencies, features, and deprecated methods. 2015-06-24 03:00:00 +00:00			`extern crate memchr;`
Update dependencies. I added the regex-syntax crate to the dependencies for hashsum, nl, and ptx. 2015-06-05 02:26:13 +00:00			`extern crate regex_syntax;`
initial ptx commit 2015-01-27 15:37:07 +00:00			`extern crate regex;`

			`use std::collections::{HashMap, HashSet, BTreeSet};`
			`use std::default::Default;`
			`use std::fs::File;`
			`use getopts::{Options, Matches};`
			`use std::io::{stdin, stdout, BufReader, BufWriter, BufRead, Read, Write};`
			`use regex::Regex;`
			`use std::cmp;`


			`#[path = "../common/util.rs"]`
			`#[macro_use]`
			`mod util;`

			`static NAME: &'static str = "ptx";`
			`static VERSION: &'static str = "1.0.0";`

			`#[derive(Debug)]`
			`enum OutFormat {`
			`Dumb,`
			`Roff,`
			`Tex,`
			`}`

			`#[derive(Debug)]`
			`struct Config {`
			`format : OutFormat,`
			`gnu_ext : bool,`
			`auto_ref : bool,`
			`input_ref : bool,`
			`right_ref : bool,`
			`ignore_case : bool,`
			`macro_name : String,`
			`trunc_str : String,`
			`context_regex : String,`
			`line_width : usize,`
			`gap_size : usize,`
			`}`

			`impl Default for Config {`
			`fn default() -> Config {`
			`Config {`
			`format : OutFormat::Dumb,`
			`gnu_ext : true,`
			`auto_ref : false,`
			`input_ref : false,`
			`right_ref : false,`
			`ignore_case : false,`
			`macro_name : "xx".to_string(),`
			`trunc_str : "/".to_string(),`
			`context_regex : "\\w+".to_string(),`
			`line_width : 72,`
			`gap_size : 3`
			`}`
			`}`
			`}`

			`fn read_word_filter_file(matches: &Matches, option: &str) -> HashSet<String> {`
			`let filename = matches.opt_str(option).expect("parsing options failed!");`
			`let reader = BufReader::new(crash_if_err!(1, File::open(filename)));`
			`let mut words: HashSet<String> = HashSet::new();`
			`for word in reader.lines() {`
			`words.insert(crash_if_err!(1, word));`
			`}`
			`words`
			`}`

			`#[derive(Debug)]`
			`struct WordFilter {`
			`only_specified: bool,`
			`ignore_specified: bool,`
			`only_set: HashSet<String>,`
			`ignore_set: HashSet<String>,`
			`word_regex: String,`
			`}`

			`impl WordFilter {`
			`fn new(matches: &Matches, config: &Config) -> WordFilter {`
			`let (o, oset): (bool, HashSet<String>) =`
			`if matches.opt_present("o") {`
			`(true, read_word_filter_file(matches, "o"))`
			`} else {`
			`(false, HashSet::new())`
			`};`
			`let (i, iset): (bool, HashSet<String>) =`
			`if matches.opt_present("i") {`
			`(true, read_word_filter_file(matches, "i"))`
			`} else {`
			`(false, HashSet::new())`
			`};`
			`if matches.opt_present("b") {`
			`crash!(1, "-b not implemented yet");`
			`}`
			`let reg =`
			`if matches.opt_present("W") {`
			`matches.opt_str("W").expect("parsing options failed!")`
			`} else if config.gnu_ext {`
			`"\\w+".to_string()`
			`} else {`
			`"[^ \t\n]+".to_string()`
			`};`
			`WordFilter {`
			`only_specified: o,`
			`ignore_specified: i,`
			`only_set: oset,`
			`ignore_set: iset,`
			`word_regex: reg`
			`}`
			`}`
			`}`

			`#[derive(Debug, PartialOrd, PartialEq, Eq, Ord)]`
			`struct WordRef {`
			`word: String,`
			`global_line_nr: usize,`
			`local_line_nr: usize,`
			`position: usize,`
			`position_end: usize,`
			`filename: String,`
			`}`

			`fn print_version() {`
printing version unified and tests 2015-05-30 07:34:23 +00:00			`println!("{} {}", NAME, VERSION);`
initial ptx commit 2015-01-27 15:37:07 +00:00			`}`

			`fn print_usage(opts: &Options) {`
			`let brief = "Usage: ptx [OPTION]... [INPUT]... (without -G) or: \`
			`ptx -G [OPTION]... [INPUT [OUTPUT]] \n Output a permuted index, \`
			`including context, of the words in the input files. \n\n Mandatory \`
			`arguments to long options are mandatory for short options too.";`
			`let explaination = "With no FILE, or when FILE is -, read standard input. \`
			`Default is '-F /'.";`
			`println!("{}\n{}", opts.usage(&brief), explaination);`
			`}`


			`fn get_config(matches: &Matches) -> Config {`
			`let mut config: Config = Default::default();`
			`let err_msg = "parsing options failed";`
			`if matches.opt_present("G") {`
			`config.gnu_ext = false;`
			`config.format = OutFormat::Roff;`
			`config.context_regex = "[^ \t\n]+".to_string();`
			`} else {`
			`crash!(1, "GNU extensions not implemented yet");`
			`}`
			`if matches.opt_present("S") {`
			`crash!(1, "-S not implemented yet");`
			`}`
			`config.auto_ref = matches.opt_present("A");`
			`config.input_ref = matches.opt_present("r");`
			`config.right_ref &= matches.opt_present("R");`
			`config.ignore_case = matches.opt_present("f");`
			`if matches.opt_present("M") {`
			`config.macro_name =`
			`matches.opt_str("M").expect(err_msg).to_string();`
			`}`
			`if matches.opt_present("F") {`
			`config.trunc_str =`
			`matches.opt_str("F").expect(err_msg).to_string();`
			`}`
			`if matches.opt_present("w") {`
			`let width_str = matches.opt_str("w").expect(err_msg);`
			`config.line_width = crash_if_err!(`
			`1, usize::from_str_radix(width_str.as_str(), 10));`
			`}`
			`if matches.opt_present("g") {`
			`let gap_str = matches.opt_str("g").expect(err_msg);`
			`config.gap_size = crash_if_err!(`
			`1, usize::from_str_radix(gap_str.as_str(), 10));`
			`}`
			`if matches.opt_present("O") {`
			`config.format = OutFormat::Roff;`
			`}`
			`if matches.opt_present("T") {`
			`config.format = OutFormat::Tex;`
			`}`
			`config`
			`}`

			`fn read_input(input_files: Vec<&str>, config: &Config) ->`
			`HashMap<String, (Vec<String>, usize)> {`
			`let mut file_map : HashMap<String, (Vec<String>, usize)> =`
			`HashMap::new();`
			`let mut files = Vec::new();`
			`if input_files.is_empty() {`
			`files.push("-");`
			`} else {`
			`if config.gnu_ext {`
			`files.push_all(input_files.as_slice());`
			`} else {`
			`files.push(input_files[0]);`
			`}`
			`}`
			`let mut lines_so_far: usize = 0;`
			`for filename in files {`
			`let reader: BufReader<Box<Read>> = BufReader::new(`
			`if filename == "-" {`
			`Box::new(stdin())`
			`} else {`
			`let file = crash_if_err!(1, File::open(filename));`
			`Box::new(file)`
			`});`
			`let lines: Vec<String> = reader.lines().map(\|x\| crash_if_err!(1, x))`
			`.collect();`
			`let size = lines.len();`
			`file_map.insert(filename.to_string(), (lines, lines_so_far));`
			`lines_so_far += size`
			`}`
			`file_map`
			`}`

			`fn create_word_set(config: &Config, filter: &WordFilter,`
			`file_map: &HashMap<String, (Vec<String>, usize)>)->`
			`BTreeSet<WordRef> {`
			`let reg = Regex::new(filter.word_regex.as_str()).unwrap();`
			`let ref_reg = Regex::new(config.context_regex.as_str()).unwrap();`
			`let mut word_set: BTreeSet<WordRef> = BTreeSet::new();`
			`for (file, lines) in file_map.iter() {`
			`let mut count: usize = 0;`
			`let offs = lines.1;`
			`for line in (lines.0).iter() {`
			`// if -r, exclude reference from word set`
			`let (ref_beg, ref_end) = match ref_reg.find(line) {`
			`Some(x) => x,`
			`None => (0,0)`
			`};`
			`// match words with given regex`
			`for (beg, end) in reg.find_iter(line) {`
			`if config.input_ref && ((beg, end) == (ref_beg, ref_end)) {`
			`continue;`
			`}`
			`let mut word = line.slice_chars(beg, end).to_string();`
			`if filter.only_specified &&`
			`!(filter.only_set.contains(&word)) {`
			`continue;`
			`}`
			`if filter.ignore_specified &&`
			`filter.ignore_set.contains(&word) {`
			`continue;`
			`}`
			`if config.ignore_case {`
			`word = word.to_lowercase();`
			`}`
			`word_set.insert(WordRef{`
			`word: word,`
			`filename: String::from(file.as_str()),`
			`global_line_nr: offs + count,`
			`local_line_nr: count,`
			`position: beg,`
			`position_end: end`
			`});`
			`}`
			`count += 1;`
			`}`
			`}`
			`word_set`
			`}`

			`fn get_reference(config: &Config, word_ref: &WordRef, line: &String) ->`
			`String {`
			`if config.auto_ref {`
			`format!("{}:{}", word_ref.filename, word_ref.local_line_nr + 1)`
			`} else if config.input_ref {`
			`let reg = Regex::new(config.context_regex.as_str()).unwrap();`
			`let (beg, end) = match reg.find(line) {`
			`Some(x) => x,`
			`None => (0,0)`
			`};`
			`format!("{}", line.slice_chars(beg, end))`
			`} else {`
			`String::new()`
			`}`
			`}`

			`fn assert_str_integrity(s: &Vec<char>, beg: usize, end: usize) {`
			`assert!(beg <= end);`
			`assert!(end <= s.len());`
			`}`

			`fn trim_broken_word_left(s: &Vec<char>, beg: usize, end: usize) -> usize {`
			`assert_str_integrity(s, beg, end);`
			`if beg == end \|\| beg == 0 \|\| s[beg].is_whitespace() \|\|`
			`s[beg-1].is_whitespace() {`
			`return beg;`
			`}`
			`let mut b = beg;`
			`while b < end && !s[b].is_whitespace() {`
			`b += 1;`
			`}`
			`b`
			`}`

			`fn trim_broken_word_right(s: &Vec<char>, beg: usize, end: usize) -> usize {`
			`assert_str_integrity(s, beg, end);`
			`if beg == end \|\| end == s.len() \|\| s[end-1].is_whitespace() \|\|`
			`s[end].is_whitespace() {`
			`return end;`
			`}`
			`let mut e = end;`
			`while beg < e && !s[e-1].is_whitespace() {`
			`e -= 1;`
			`}`
			`e`
			`}`

			`fn trim_idx(s: &Vec<char>, beg: usize, end: usize) -> (usize, usize) {`
			`assert_str_integrity(s, beg, end);`
			`let mut b = beg;`
			`let mut e = end;`
			`while b < e && s[b].is_whitespace() {`
			`b += 1;`
			`}`
			`while b < e && s[e-1].is_whitespace() {`
			`e -= 1;`
			`}`
			`(b,e)`
			`}`


			`fn get_output_chunks(all_before: &String, keyword: &String, all_after: &String,`
			`config: &Config) -> (String, String, String, String) {`
			`assert!(all_before.trim() == all_before.as_str());`
			`assert!(keyword.trim() == keyword.as_str());`
			`assert!(all_after.trim() == all_after.as_str());`
			`let mut head = String::new();`
			`let mut before = String::new();`
			`let mut after = String::new();`
			`let mut tail = String::new();`

			`let half_line_size = cmp::max((config.line_width/2) as isize -`
			`(2*config.trunc_str.len()) as isize, 0) as usize;`
			`let max_after_size = cmp::max(half_line_size as isize -`
			`keyword.len() as isize - 1, 0) as usize;`
			`let max_before_size = half_line_size;`
			`let all_before_vec: Vec<char> = all_before.chars().collect();`
			`let all_after_vec: Vec<char> = all_after.chars().collect();`

			`// get before`
			`let mut bb_tmp =`
			`cmp::max(all_before.len() as isize - max_before_size as isize, 0) as usize;`
			`bb_tmp = trim_broken_word_left(&all_before_vec, bb_tmp, all_before.len());`
			`let (before_beg, before_end) =`
			`trim_idx(&all_before_vec, bb_tmp, all_before.len());`
			`before.push_str(all_before.slice_chars(before_beg, before_end));`
			`assert!(max_before_size >= before.len());`

			`// get after`
			`let mut ae_tmp = cmp::min(max_after_size, all_after.len());`
			`ae_tmp = trim_broken_word_right(&all_after_vec, 0, ae_tmp);`
			`let (after_beg, after_end) = trim_idx(&all_after_vec, 0, ae_tmp);`
			`after.push_str(all_after.slice_chars(after_beg, after_end));`
			`assert!(max_after_size >= after.len());`

			`// get tail`
			`let max_tail_size = max_before_size - before.len();`
			`let (tb, _) = trim_idx(&all_after_vec, after_end, all_after.len());`
			`let mut te_tmp = cmp::min(tb + max_tail_size, all_after.len());`
			`te_tmp = trim_broken_word_right(&all_after_vec, tb, te_tmp);`
			`let (tail_beg, tail_end) = trim_idx(&all_after_vec, tb, te_tmp);`
			`tail.push_str(all_after.slice_chars(tail_beg, tail_end));`

			`// get head`
			`let max_head_size = max_after_size - after.len();`
			`let (_, he) = trim_idx(&all_before_vec, 0, before_beg);`
			`let mut hb_tmp =`
			`cmp::max(he as isize - max_head_size as isize, 0) as usize;`
			`hb_tmp = trim_broken_word_left(&all_before_vec, hb_tmp, he);`
			`let (head_beg, head_end) = trim_idx(&all_before_vec, hb_tmp, he);`
			`head.push_str(all_before.slice_chars(head_beg, head_end));`

			`// put right context truncation string if needed`
			`if after_end != all_after.len() && tail_beg == tail_end {`
			`after.push_str(config.trunc_str.as_str());`
			`} else if after_end != all_after.len() && tail_end != all_after.len() {`
			`tail.push_str(config.trunc_str.as_str());`
			`}`

			`// put left context truncation string if needed`
			`if before_beg != 0 && head_beg == head_end {`
			`before = format!("{}{}", config.trunc_str, before);`
			`} else if before_beg != 0 && head_beg != 0 {`
			`head = format!("{}{}", config.trunc_str, head);`
			`}`

			`// add space before "after" if needed`
			`if after.len() > 0 {`
			`after = format!(" {}", after);`
			`}`

			`(tail, before, after, head)`
			`}`

			`fn tex_mapper(x: char) -> String {`
			`match x {`
			`'\\' => "\\backslash{}".to_string(),`
			`'$' \| '%' \| '#' \| '&' \| '_' => format!("\\{}", x),`
			`'}' \| '{' => format!("$\\{}$", x),`
			`_ => x.to_string()`
			`}`
			`}`

			`fn adjust_tex_str(context: &str) -> String {`
			`let ws_reg = Regex::new(r"[\t\n\v\f\r ]").unwrap();`
			`let mut fix: String = ws_reg.replace_all(context, " ").trim().to_string();`
			`let mapped_chunks: Vec<String> = fix.chars().map(tex_mapper).collect();`
Replace deprecated methods. 2015-07-27 04:35:34 +00:00			`fix = mapped_chunks.join("");`
initial ptx commit 2015-01-27 15:37:07 +00:00			`fix`
			`}`

			`fn format_tex_line(config: &Config, word_ref: &WordRef, line: &String,`
			`reference: &String) -> String {`
			`let mut output = String::new();`
			`output.push_str(&format!("\\{} ", config.macro_name));`
			`let all_before = if config.input_ref {`
			`let before = line.slice_chars(0, word_ref.position);`
			`adjust_tex_str(before.trim().trim_left_matches(reference))`
			`} else {`
			`adjust_tex_str(line.slice_chars(0, word_ref.position))`
			`};`
			`let keyword = adjust_tex_str(`
			`line.slice_chars(word_ref.position, word_ref.position_end));`
			`let all_after = adjust_tex_str(`
			`line.slice_chars(word_ref.position_end, line.len()));`
			`let (tail, before, after, head) =`
			`get_output_chunks(&all_before, &keyword, &all_after, &config);`
			`output.push_str(format!("{5}{0}{6}{5}{1}{6}{5}{2}{6}{5}{3}{6}{5}{4}{6}",`
			`tail, before, keyword, after, head, "{", "}").as_str());`
			`if config.auto_ref \|\| config.input_ref {`
printing version unified and tests 2015-05-30 07:34:23 +00:00			`output.push_str(`
			`&format!("{}{}{}", "{", adjust_tex_str(&reference), "}"));`
initial ptx commit 2015-01-27 15:37:07 +00:00			`}`
			`output`
			`}`

			`fn adjust_roff_str(context: &str) -> String {`
			`let ws_reg = Regex::new(r"[\t\n\v\f\r]").unwrap();`
			`ws_reg.replace_all(context, " ").replace("\"", "\"\"").trim().to_string()`
			`}`

			`fn format_roff_line(config: &Config, word_ref: &WordRef, line: &str,`
			`reference: &str) -> String {`
			`let mut output = String::new();`
			`output.push_str(&format!(".{}", config.macro_name));`
			`let all_before = if config.input_ref {`
			`let before = line.slice_chars(0, word_ref.position);`
			`adjust_roff_str(before.trim().trim_left_matches(reference))`
			`} else {`
			`adjust_roff_str(line.slice_chars(0, word_ref.position))`
			`};`
			`let keyword = adjust_roff_str(`
			`line.slice_chars(word_ref.position, word_ref.position_end));`
			`let all_after = adjust_roff_str(`
			`line.slice_chars(word_ref.position_end, line.len()));`
			`let (tail, before, after, head) =`
			`get_output_chunks(&all_before, &keyword, &all_after, &config);`
			`output.push_str(format!(" \"{}\" \"{}\" \"{}{}\" \"{}\"",`
			`tail, before, keyword, after, head).as_str());`
			`if config.auto_ref \|\| config.input_ref {`
printing version unified and tests 2015-05-30 07:34:23 +00:00			`output.push_str(&format!(" \"{}\"", adjust_roff_str(&reference)));`
initial ptx commit 2015-01-27 15:37:07 +00:00			`}`
			`output`
			`}`

			`fn write_traditional_output(config: &Config,`
			`file_map: &HashMap<String, (Vec<String>,usize)>,`
			`words: &BTreeSet<WordRef>, output_filename: &str) {`
			`let mut writer: BufWriter<Box<Write>> = BufWriter::new(`
			`if output_filename == "-" {`
			`Box::new(stdout())`
			`} else {`
			`let file = crash_if_err!(1, File::create(output_filename));`
			`Box::new(file)`
			`});`
			`for word_ref in words.iter() {`
			`let file_map_value : &(Vec<String>, usize) =`
			`file_map.get(&(word_ref.filename))`
			`.expect("Missing file in file map");`
			`let (ref lines, _) = *(file_map_value);`
			`let reference =`
			`get_reference(config, word_ref, &lines[word_ref.local_line_nr]);`
			`let output_line: String = match config.format {`
			`OutFormat::Tex => format_tex_line(`
			`config, word_ref, &lines[word_ref.local_line_nr], &reference),`
			`OutFormat::Roff => format_roff_line(`
			`config, word_ref, &lines[word_ref.local_line_nr], &reference),`
			`OutFormat::Dumb => crash!(`
			`1, "There is no dumb format with GNU extensions disabled")`
			`};`
			`crash_if_err!(1, writeln!(writer, "{}", output_line));`
			`}`
			`}`

			`pub fn uumain(args: Vec<String>) -> i32 {`
			`let mut opts = Options::new();`
			`opts.optflag("A", "auto-reference",`
			`"output automatically generated references");`
			`opts.optflag("G", "traditional", "behave more like System V 'ptx'");`
			`opts.optopt("F", "flag-truncation",`
			`"use STRING for flagging line truncations", "STRING");`
			`opts.optopt("M", "macro-name", "macro name to use instead of 'xx'",`
			`"STRING");`
			`opts.optflag("O", "format=roff", "generate output as roff directives");`
			`opts.optflag("R", "right-side-refs",`
			`"put references at right, not counted in -w");`
			`opts.optopt("S", "sentence-regexp", "for end of lines or end of sentences",`
			`"REGEXP");`
			`opts.optflag("T", "format=tex", "generate output as TeX directives");`
			`opts.optopt("W", "word-regexp", "use REGEXP to match each keyword",`
			`"REGEXP");`
			`opts.optopt("b", "break-file", "word break characters in this FILE",`
			`"FILE");`
			`opts.optflag("f", "ignore-case",`
			`"fold lower case to upper case for sorting");`
			`opts.optopt("g", "gap-size", "gap size in columns between output fields",`
			`"NUMBER");`
			`opts.optopt("i", "ignore-file", "read ignore word list from FILE", "FILE");`
			`opts.optopt("o", "only-file", "read only word list from this FILE",`
			`"FILE");`
			`opts.optflag("r", "references", "first field of each line is a reference");`
			`opts.optopt("w", "width", "output width in columns, reference excluded",`
			`"NUMBER");`
			`opts.optflag("", "help", "display this help and exit");`
			`opts.optflag("", "version", "output version information and exit");`

			`let matches = return_if_err!(1, opts.parse(&args[1..]));`

			`if matches.opt_present("help") {`
			`print_usage(&opts);`
			`return 0;`
			`}`
			`if matches.opt_present("version") {`
			`print_version();`
			`return 0;`
			`}`
			`let config = get_config(&matches);`
			`let word_filter = WordFilter::new(&matches, &config);`
			`let file_map =`
			`read_input(matches.free.iter().map(\|x\| x.as_str()).collect(), &config);`
			`let word_set = create_word_set(&config, &word_filter, &file_map);`
			`let output_file = if !config.gnu_ext && matches.free.len() == 2 {`
			`matches.free[1].as_str()`
			`} else {`
			`"-"`
			`};`
			`write_traditional_output(&config, &file_map, &word_set, output_file);`
			`0`
			`}`