2015-12-08 02:42:08 +00:00
|
|
|
#![crate_name = "uu_ptx"]
|
2015-01-27 15:37:07 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This file is part of the uutils coreutils package.
|
|
|
|
*
|
|
|
|
* (c) Dorota Kapturkiewicz <dokaptur@gmail.com>
|
|
|
|
*
|
|
|
|
* For the full copyright and license information, please view the LICENSE
|
|
|
|
* file that was distributed with this source code.
|
|
|
|
*/
|
2015-06-24 03:00:00 +00:00
|
|
|
|
|
|
|
extern crate aho_corasick;
|
2015-01-27 15:37:07 +00:00
|
|
|
extern crate getopts;
|
2015-06-24 03:00:00 +00:00
|
|
|
extern crate memchr;
|
2015-01-27 15:37:07 +00:00
|
|
|
extern crate regex;
|
2018-03-12 08:20:58 +00:00
|
|
|
extern crate regex_syntax;
|
2015-08-12 04:01:10 +00:00
|
|
|
|
2015-11-24 01:00:51 +00:00
|
|
|
#[macro_use]
|
|
|
|
extern crate uucore;
|
|
|
|
|
2018-03-12 08:20:58 +00:00
|
|
|
use getopts::{Matches, Options};
|
2015-11-24 01:00:51 +00:00
|
|
|
use regex::Regex;
|
|
|
|
use std::cmp;
|
2018-03-12 08:20:58 +00:00
|
|
|
use std::collections::{BTreeSet, HashMap, HashSet};
|
2015-01-27 15:37:07 +00:00
|
|
|
use std::default::Default;
|
|
|
|
use std::fs::File;
|
2018-03-12 08:20:58 +00:00
|
|
|
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
|
2015-01-27 15:37:07 +00:00
|
|
|
|
2018-09-04 12:33:36 +00:00
|
|
|
static NAME: &str = "ptx";
|
|
|
|
static VERSION: &str = env!("CARGO_PKG_VERSION");
|
2015-01-27 15:37:07 +00:00
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
enum OutFormat {
|
2015-08-12 04:01:10 +00:00
|
|
|
Dumb,
|
|
|
|
Roff,
|
2015-01-27 15:37:07 +00:00
|
|
|
Tex,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
struct Config {
|
2018-03-12 08:20:58 +00:00
|
|
|
format: OutFormat,
|
|
|
|
gnu_ext: bool,
|
|
|
|
auto_ref: bool,
|
|
|
|
input_ref: bool,
|
|
|
|
right_ref: bool,
|
|
|
|
ignore_case: bool,
|
|
|
|
macro_name: String,
|
|
|
|
trunc_str: String,
|
|
|
|
context_regex: String,
|
|
|
|
line_width: usize,
|
|
|
|
gap_size: usize,
|
2015-01-27 15:37:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Default for Config {
|
|
|
|
fn default() -> Config {
|
|
|
|
Config {
|
2018-03-12 08:20:58 +00:00
|
|
|
format: OutFormat::Dumb,
|
|
|
|
gnu_ext: true,
|
|
|
|
auto_ref: false,
|
|
|
|
input_ref: false,
|
|
|
|
right_ref: false,
|
|
|
|
ignore_case: false,
|
|
|
|
macro_name: "xx".to_owned(),
|
|
|
|
trunc_str: "/".to_owned(),
|
|
|
|
context_regex: "\\w+".to_owned(),
|
|
|
|
line_width: 72,
|
|
|
|
gap_size: 3,
|
2015-01-27 15:37:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn read_word_filter_file(matches: &Matches, option: &str) -> HashSet<String> {
|
|
|
|
let filename = matches.opt_str(option).expect("parsing options failed!");
|
|
|
|
let reader = BufReader::new(crash_if_err!(1, File::open(filename)));
|
|
|
|
let mut words: HashSet<String> = HashSet::new();
|
|
|
|
for word in reader.lines() {
|
|
|
|
words.insert(crash_if_err!(1, word));
|
|
|
|
}
|
|
|
|
words
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
struct WordFilter {
|
|
|
|
only_specified: bool,
|
|
|
|
ignore_specified: bool,
|
|
|
|
only_set: HashSet<String>,
|
|
|
|
ignore_set: HashSet<String>,
|
|
|
|
word_regex: String,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl WordFilter {
|
2018-03-12 08:20:58 +00:00
|
|
|
fn new(matches: &Matches, config: &Config) -> WordFilter {
|
|
|
|
let (o, oset): (bool, HashSet<String>) = if matches.opt_present("o") {
|
|
|
|
(true, read_word_filter_file(matches, "o"))
|
|
|
|
} else {
|
|
|
|
(false, HashSet::new())
|
|
|
|
};
|
|
|
|
let (i, iset): (bool, HashSet<String>) = if matches.opt_present("i") {
|
|
|
|
(true, read_word_filter_file(matches, "i"))
|
|
|
|
} else {
|
|
|
|
(false, HashSet::new())
|
|
|
|
};
|
2015-01-27 15:37:07 +00:00
|
|
|
if matches.opt_present("b") {
|
|
|
|
crash!(1, "-b not implemented yet");
|
|
|
|
}
|
2018-03-12 08:20:58 +00:00
|
|
|
let reg = if matches.opt_present("W") {
|
|
|
|
matches.opt_str("W").expect("parsing options failed!")
|
|
|
|
} else if config.gnu_ext {
|
|
|
|
"\\w+".to_owned()
|
|
|
|
} else {
|
|
|
|
"[^ \t\n]+".to_owned()
|
|
|
|
};
|
2015-08-12 04:01:10 +00:00
|
|
|
WordFilter {
|
|
|
|
only_specified: o,
|
|
|
|
ignore_specified: i,
|
|
|
|
only_set: oset,
|
2015-01-27 15:37:07 +00:00
|
|
|
ignore_set: iset,
|
2018-03-12 08:20:58 +00:00
|
|
|
word_regex: reg,
|
2015-01-27 15:37:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, PartialOrd, PartialEq, Eq, Ord)]
|
|
|
|
struct WordRef {
|
|
|
|
word: String,
|
|
|
|
global_line_nr: usize,
|
|
|
|
local_line_nr: usize,
|
|
|
|
position: usize,
|
|
|
|
position_end: usize,
|
|
|
|
filename: String,
|
|
|
|
}
|
|
|
|
|
|
|
|
fn print_version() {
|
2015-05-30 07:34:23 +00:00
|
|
|
println!("{} {}", NAME, VERSION);
|
2015-01-27 15:37:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn print_usage(opts: &Options) {
|
|
|
|
let brief = "Usage: ptx [OPTION]... [INPUT]... (without -G) or: \
|
2018-03-12 08:20:58 +00:00
|
|
|
ptx -G [OPTION]... [INPUT [OUTPUT]] \n Output a permuted index, \
|
|
|
|
including context, of the words in the input files. \n\n Mandatory \
|
|
|
|
arguments to long options are mandatory for short options too.";
|
2015-01-27 15:37:07 +00:00
|
|
|
let explaination = "With no FILE, or when FILE is -, read standard input. \
|
2018-03-12 08:20:58 +00:00
|
|
|
Default is '-F /'.";
|
2015-01-27 15:37:07 +00:00
|
|
|
println!("{}\n{}", opts.usage(&brief), explaination);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn get_config(matches: &Matches) -> Config {
|
|
|
|
let mut config: Config = Default::default();
|
|
|
|
let err_msg = "parsing options failed";
|
|
|
|
if matches.opt_present("G") {
|
|
|
|
config.gnu_ext = false;
|
|
|
|
config.format = OutFormat::Roff;
|
2016-01-05 19:42:52 +00:00
|
|
|
config.context_regex = "[^ \t\n]+".to_owned();
|
2015-01-27 15:37:07 +00:00
|
|
|
} else {
|
|
|
|
crash!(1, "GNU extensions not implemented yet");
|
|
|
|
}
|
|
|
|
if matches.opt_present("S") {
|
|
|
|
crash!(1, "-S not implemented yet");
|
|
|
|
}
|
|
|
|
config.auto_ref = matches.opt_present("A");
|
|
|
|
config.input_ref = matches.opt_present("r");
|
|
|
|
config.right_ref &= matches.opt_present("R");
|
2015-08-12 04:01:10 +00:00
|
|
|
config.ignore_case = matches.opt_present("f");
|
2015-01-27 15:37:07 +00:00
|
|
|
if matches.opt_present("M") {
|
2018-03-12 08:20:58 +00:00
|
|
|
config.macro_name = matches.opt_str("M").expect(err_msg);
|
2015-01-27 15:37:07 +00:00
|
|
|
}
|
|
|
|
if matches.opt_present("F") {
|
2018-03-12 08:20:58 +00:00
|
|
|
config.trunc_str = matches.opt_str("F").expect(err_msg);
|
2015-01-27 15:37:07 +00:00
|
|
|
}
|
|
|
|
if matches.opt_present("w") {
|
|
|
|
let width_str = matches.opt_str("w").expect(err_msg);
|
2018-03-12 08:20:58 +00:00
|
|
|
config.line_width = crash_if_err!(1, usize::from_str_radix(&width_str, 10));
|
2015-01-27 15:37:07 +00:00
|
|
|
}
|
|
|
|
if matches.opt_present("g") {
|
|
|
|
let gap_str = matches.opt_str("g").expect(err_msg);
|
2018-03-12 08:20:58 +00:00
|
|
|
config.gap_size = crash_if_err!(1, usize::from_str_radix(&gap_str, 10));
|
2015-01-27 15:37:07 +00:00
|
|
|
}
|
|
|
|
if matches.opt_present("O") {
|
|
|
|
config.format = OutFormat::Roff;
|
|
|
|
}
|
|
|
|
if matches.opt_present("T") {
|
|
|
|
config.format = OutFormat::Tex;
|
|
|
|
}
|
|
|
|
config
|
|
|
|
}
|
|
|
|
|
2018-03-12 08:20:58 +00:00
|
|
|
fn read_input(input_files: &[String], config: &Config) -> HashMap<String, (Vec<String>, usize)> {
|
|
|
|
let mut file_map: HashMap<String, (Vec<String>, usize)> = HashMap::new();
|
2015-01-27 15:37:07 +00:00
|
|
|
let mut files = Vec::new();
|
|
|
|
if input_files.is_empty() {
|
|
|
|
files.push("-");
|
|
|
|
} else {
|
|
|
|
if config.gnu_ext {
|
2015-08-12 04:01:10 +00:00
|
|
|
for file in input_files {
|
|
|
|
files.push(&file);
|
|
|
|
}
|
2015-01-27 15:37:07 +00:00
|
|
|
} else {
|
2015-08-12 04:01:10 +00:00
|
|
|
files.push(&input_files[0]);
|
2015-01-27 15:37:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
let mut lines_so_far: usize = 0;
|
|
|
|
for filename in files {
|
2019-10-01 16:34:26 +00:00
|
|
|
let reader: BufReader<Box<dyn Read>> = BufReader::new(if filename == "-" {
|
2018-03-12 08:20:58 +00:00
|
|
|
Box::new(stdin())
|
|
|
|
} else {
|
|
|
|
let file = crash_if_err!(1, File::open(filename));
|
|
|
|
Box::new(file)
|
|
|
|
});
|
|
|
|
let lines: Vec<String> = reader.lines().map(|x| crash_if_err!(1, x)).collect();
|
2015-01-27 15:37:07 +00:00
|
|
|
let size = lines.len();
|
2016-01-05 19:42:52 +00:00
|
|
|
file_map.insert(filename.to_owned(), (lines, lines_so_far));
|
2015-01-27 15:37:07 +00:00
|
|
|
lines_so_far += size
|
|
|
|
}
|
|
|
|
file_map
|
|
|
|
}
|
|
|
|
|
2018-03-12 08:20:58 +00:00
|
|
|
fn create_word_set(
|
|
|
|
config: &Config,
|
|
|
|
filter: &WordFilter,
|
|
|
|
file_map: &HashMap<String, (Vec<String>, usize)>,
|
|
|
|
) -> BTreeSet<WordRef> {
|
2015-08-12 04:01:10 +00:00
|
|
|
let reg = Regex::new(&filter.word_regex).unwrap();
|
|
|
|
let ref_reg = Regex::new(&config.context_regex).unwrap();
|
2015-01-27 15:37:07 +00:00
|
|
|
let mut word_set: BTreeSet<WordRef> = BTreeSet::new();
|
|
|
|
for (file, lines) in file_map.iter() {
|
|
|
|
let mut count: usize = 0;
|
|
|
|
let offs = lines.1;
|
2016-01-05 19:42:52 +00:00
|
|
|
for line in &lines.0 {
|
2015-01-27 15:37:07 +00:00
|
|
|
// if -r, exclude reference from word set
|
|
|
|
let (ref_beg, ref_end) = match ref_reg.find(line) {
|
2016-12-02 22:06:21 +00:00
|
|
|
Some(x) => (x.start(), x.end()),
|
2018-03-12 08:20:58 +00:00
|
|
|
None => (0, 0),
|
2015-01-27 15:37:07 +00:00
|
|
|
};
|
|
|
|
// match words with given regex
|
2016-12-02 22:06:21 +00:00
|
|
|
for mat in reg.find_iter(line) {
|
|
|
|
let (beg, end) = (mat.start(), mat.end());
|
2015-01-27 15:37:07 +00:00
|
|
|
if config.input_ref && ((beg, end) == (ref_beg, ref_end)) {
|
|
|
|
continue;
|
|
|
|
}
|
2018-03-12 08:20:58 +00:00
|
|
|
let mut word = line[beg..end].to_owned();
|
|
|
|
if filter.only_specified && !(filter.only_set.contains(&word)) {
|
2015-01-27 15:37:07 +00:00
|
|
|
continue;
|
|
|
|
}
|
2018-03-12 08:20:58 +00:00
|
|
|
if filter.ignore_specified && filter.ignore_set.contains(&word) {
|
2015-01-27 15:37:07 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if config.ignore_case {
|
|
|
|
word = word.to_lowercase();
|
|
|
|
}
|
2018-03-12 08:20:58 +00:00
|
|
|
word_set.insert(WordRef {
|
2015-01-27 15:37:07 +00:00
|
|
|
word: word,
|
2015-08-12 04:01:10 +00:00
|
|
|
filename: String::from(file.clone()),
|
2015-01-27 15:37:07 +00:00
|
|
|
global_line_nr: offs + count,
|
|
|
|
local_line_nr: count,
|
|
|
|
position: beg,
|
2018-03-12 08:20:58 +00:00
|
|
|
position_end: end,
|
2015-01-27 15:37:07 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
count += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
word_set
|
|
|
|
}
|
|
|
|
|
2018-03-12 08:20:58 +00:00
|
|
|
fn get_reference(config: &Config, word_ref: &WordRef, line: &str) -> String {
|
2015-01-27 15:37:07 +00:00
|
|
|
if config.auto_ref {
|
|
|
|
format!("{}:{}", word_ref.filename, word_ref.local_line_nr + 1)
|
|
|
|
} else if config.input_ref {
|
2015-08-12 04:01:10 +00:00
|
|
|
let reg = Regex::new(&config.context_regex).unwrap();
|
2015-01-27 15:37:07 +00:00
|
|
|
let (beg, end) = match reg.find(line) {
|
2016-12-02 22:06:21 +00:00
|
|
|
Some(x) => (x.start(), x.end()),
|
2018-03-12 08:20:58 +00:00
|
|
|
None => (0, 0),
|
2015-01-27 15:37:07 +00:00
|
|
|
};
|
2019-12-28 17:36:33 +00:00
|
|
|
line[beg..end].to_string()
|
2015-01-27 15:37:07 +00:00
|
|
|
} else {
|
|
|
|
String::new()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-05 19:42:52 +00:00
|
|
|
fn assert_str_integrity(s: &[char], beg: usize, end: usize) {
|
2015-01-27 15:37:07 +00:00
|
|
|
assert!(beg <= end);
|
|
|
|
assert!(end <= s.len());
|
|
|
|
}
|
|
|
|
|
2016-01-05 19:42:52 +00:00
|
|
|
fn trim_broken_word_left(s: &[char], beg: usize, end: usize) -> usize {
|
2015-01-27 15:37:07 +00:00
|
|
|
assert_str_integrity(s, beg, end);
|
2018-03-12 08:20:58 +00:00
|
|
|
if beg == end || beg == 0 || s[beg].is_whitespace() || s[beg - 1].is_whitespace() {
|
2015-01-27 15:37:07 +00:00
|
|
|
return beg;
|
|
|
|
}
|
|
|
|
let mut b = beg;
|
|
|
|
while b < end && !s[b].is_whitespace() {
|
|
|
|
b += 1;
|
|
|
|
}
|
|
|
|
b
|
|
|
|
}
|
|
|
|
|
2016-01-05 19:42:52 +00:00
|
|
|
fn trim_broken_word_right(s: &[char], beg: usize, end: usize) -> usize {
|
2015-01-27 15:37:07 +00:00
|
|
|
assert_str_integrity(s, beg, end);
|
2018-03-12 08:20:58 +00:00
|
|
|
if beg == end || end == s.len() || s[end - 1].is_whitespace() || s[end].is_whitespace() {
|
2015-01-27 15:37:07 +00:00
|
|
|
return end;
|
|
|
|
}
|
|
|
|
let mut e = end;
|
2018-03-12 08:20:58 +00:00
|
|
|
while beg < e && !s[e - 1].is_whitespace() {
|
2015-01-27 15:37:07 +00:00
|
|
|
e -= 1;
|
|
|
|
}
|
|
|
|
e
|
|
|
|
}
|
|
|
|
|
2016-01-05 19:42:52 +00:00
|
|
|
fn trim_idx(s: &[char], beg: usize, end: usize) -> (usize, usize) {
|
2015-01-27 15:37:07 +00:00
|
|
|
assert_str_integrity(s, beg, end);
|
|
|
|
let mut b = beg;
|
|
|
|
let mut e = end;
|
|
|
|
while b < e && s[b].is_whitespace() {
|
|
|
|
b += 1;
|
|
|
|
}
|
2018-03-12 08:20:58 +00:00
|
|
|
while b < e && s[e - 1].is_whitespace() {
|
2015-01-27 15:37:07 +00:00
|
|
|
e -= 1;
|
|
|
|
}
|
2018-03-12 08:20:58 +00:00
|
|
|
(b, e)
|
2015-01-27 15:37:07 +00:00
|
|
|
}
|
|
|
|
|
2018-03-12 08:20:58 +00:00
|
|
|
fn get_output_chunks(
|
|
|
|
all_before: &str,
|
|
|
|
keyword: &str,
|
|
|
|
all_after: &str,
|
|
|
|
config: &Config,
|
|
|
|
) -> (String, String, String, String) {
|
2016-01-06 15:20:59 +00:00
|
|
|
assert_eq!(all_before.trim(), all_before);
|
|
|
|
assert_eq!(keyword.trim(), keyword);
|
|
|
|
assert_eq!(all_after.trim(), all_after);
|
2015-01-27 15:37:07 +00:00
|
|
|
let mut head = String::new();
|
|
|
|
let mut before = String::new();
|
|
|
|
let mut after = String::new();
|
|
|
|
let mut tail = String::new();
|
2015-08-12 04:01:10 +00:00
|
|
|
|
2018-03-12 08:20:58 +00:00
|
|
|
let half_line_size = cmp::max(
|
|
|
|
(config.line_width / 2) as isize - (2 * config.trunc_str.len()) as isize,
|
|
|
|
0,
|
|
|
|
) as usize;
|
|
|
|
let max_after_size = cmp::max(half_line_size as isize - keyword.len() as isize - 1, 0) as usize;
|
2015-08-12 04:01:10 +00:00
|
|
|
let max_before_size = half_line_size;
|
2015-01-27 15:37:07 +00:00
|
|
|
let all_before_vec: Vec<char> = all_before.chars().collect();
|
|
|
|
let all_after_vec: Vec<char> = all_after.chars().collect();
|
2015-08-12 04:01:10 +00:00
|
|
|
|
2015-01-27 15:37:07 +00:00
|
|
|
// get before
|
2018-03-12 08:20:58 +00:00
|
|
|
let mut bb_tmp = cmp::max(all_before.len() as isize - max_before_size as isize, 0) as usize;
|
2015-01-27 15:37:07 +00:00
|
|
|
bb_tmp = trim_broken_word_left(&all_before_vec, bb_tmp, all_before.len());
|
2018-03-12 08:20:58 +00:00
|
|
|
let (before_beg, before_end) = trim_idx(&all_before_vec, bb_tmp, all_before.len());
|
|
|
|
before.push_str(&all_before[before_beg..before_end]);
|
2015-01-27 15:37:07 +00:00
|
|
|
assert!(max_before_size >= before.len());
|
2015-08-12 04:01:10 +00:00
|
|
|
|
2015-01-27 15:37:07 +00:00
|
|
|
// get after
|
|
|
|
let mut ae_tmp = cmp::min(max_after_size, all_after.len());
|
|
|
|
ae_tmp = trim_broken_word_right(&all_after_vec, 0, ae_tmp);
|
|
|
|
let (after_beg, after_end) = trim_idx(&all_after_vec, 0, ae_tmp);
|
2018-03-12 08:20:58 +00:00
|
|
|
after.push_str(&all_after[after_beg..after_end]);
|
2015-08-12 04:01:10 +00:00
|
|
|
assert!(max_after_size >= after.len());
|
|
|
|
|
2015-01-27 15:37:07 +00:00
|
|
|
// get tail
|
|
|
|
let max_tail_size = max_before_size - before.len();
|
|
|
|
let (tb, _) = trim_idx(&all_after_vec, after_end, all_after.len());
|
2015-08-12 04:01:10 +00:00
|
|
|
let mut te_tmp = cmp::min(tb + max_tail_size, all_after.len());
|
2015-01-27 15:37:07 +00:00
|
|
|
te_tmp = trim_broken_word_right(&all_after_vec, tb, te_tmp);
|
|
|
|
let (tail_beg, tail_end) = trim_idx(&all_after_vec, tb, te_tmp);
|
2018-03-12 08:20:58 +00:00
|
|
|
tail.push_str(&all_after[tail_beg..tail_end]);
|
2015-08-12 04:01:10 +00:00
|
|
|
|
2015-01-27 15:37:07 +00:00
|
|
|
// get head
|
|
|
|
let max_head_size = max_after_size - after.len();
|
|
|
|
let (_, he) = trim_idx(&all_before_vec, 0, before_beg);
|
2018-03-12 08:20:58 +00:00
|
|
|
let mut hb_tmp = cmp::max(he as isize - max_head_size as isize, 0) as usize;
|
2015-01-27 15:37:07 +00:00
|
|
|
hb_tmp = trim_broken_word_left(&all_before_vec, hb_tmp, he);
|
|
|
|
let (head_beg, head_end) = trim_idx(&all_before_vec, hb_tmp, he);
|
2018-03-12 08:20:58 +00:00
|
|
|
head.push_str(&all_before[head_beg..head_end]);
|
2015-08-12 04:01:10 +00:00
|
|
|
|
2015-01-27 15:37:07 +00:00
|
|
|
// put right context truncation string if needed
|
|
|
|
if after_end != all_after.len() && tail_beg == tail_end {
|
2015-08-12 04:01:10 +00:00
|
|
|
after.push_str(&config.trunc_str);
|
2015-01-27 15:37:07 +00:00
|
|
|
} else if after_end != all_after.len() && tail_end != all_after.len() {
|
2015-08-12 04:01:10 +00:00
|
|
|
tail.push_str(&config.trunc_str);
|
2015-01-27 15:37:07 +00:00
|
|
|
}
|
2015-08-12 04:01:10 +00:00
|
|
|
|
2015-01-27 15:37:07 +00:00
|
|
|
// put left context truncation string if needed
|
|
|
|
if before_beg != 0 && head_beg == head_end {
|
|
|
|
before = format!("{}{}", config.trunc_str, before);
|
|
|
|
} else if before_beg != 0 && head_beg != 0 {
|
|
|
|
head = format!("{}{}", config.trunc_str, head);
|
|
|
|
}
|
2015-08-12 04:01:10 +00:00
|
|
|
|
2015-01-27 15:37:07 +00:00
|
|
|
// add space before "after" if needed
|
2016-01-05 19:42:52 +00:00
|
|
|
if !after.is_empty() {
|
2015-01-27 15:37:07 +00:00
|
|
|
after = format!(" {}", after);
|
|
|
|
}
|
2015-08-12 04:01:10 +00:00
|
|
|
|
2015-01-27 15:37:07 +00:00
|
|
|
(tail, before, after, head)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn tex_mapper(x: char) -> String {
|
|
|
|
match x {
|
2016-01-05 19:42:52 +00:00
|
|
|
'\\' => "\\backslash{}".to_owned(),
|
2015-01-27 15:37:07 +00:00
|
|
|
'$' | '%' | '#' | '&' | '_' => format!("\\{}", x),
|
|
|
|
'}' | '{' => format!("$\\{}$", x),
|
2018-03-12 08:20:58 +00:00
|
|
|
_ => x.to_string(),
|
2015-01-27 15:37:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn adjust_tex_str(context: &str) -> String {
|
|
|
|
let ws_reg = Regex::new(r"[\t\n\v\f\r ]").unwrap();
|
2016-01-05 19:42:52 +00:00
|
|
|
let mut fix: String = ws_reg.replace_all(context, " ").trim().to_owned();
|
2015-01-27 15:37:07 +00:00
|
|
|
let mapped_chunks: Vec<String> = fix.chars().map(tex_mapper).collect();
|
2015-08-26 16:50:07 +00:00
|
|
|
fix = mapped_chunks.join("");
|
2015-01-27 15:37:07 +00:00
|
|
|
fix
|
|
|
|
}
|
|
|
|
|
2018-03-12 08:20:58 +00:00
|
|
|
fn format_tex_line(config: &Config, word_ref: &WordRef, line: &str, reference: &str) -> String {
|
2015-01-27 15:37:07 +00:00
|
|
|
let mut output = String::new();
|
|
|
|
output.push_str(&format!("\\{} ", config.macro_name));
|
|
|
|
let all_before = if config.input_ref {
|
2018-03-12 08:20:58 +00:00
|
|
|
let before = &line[0..word_ref.position];
|
2019-04-28 10:49:18 +00:00
|
|
|
adjust_tex_str(before.trim().trim_start_matches(reference))
|
2015-01-27 15:37:07 +00:00
|
|
|
} else {
|
2018-03-12 08:20:58 +00:00
|
|
|
adjust_tex_str(&line[0..word_ref.position])
|
2015-01-27 15:37:07 +00:00
|
|
|
};
|
2018-03-12 08:20:58 +00:00
|
|
|
let keyword = adjust_tex_str(&line[word_ref.position..word_ref.position_end]);
|
|
|
|
let all_after = adjust_tex_str(&line[word_ref.position_end..line.len()]);
|
|
|
|
let (tail, before, after, head) = get_output_chunks(&all_before, &keyword, &all_after, &config);
|
|
|
|
output.push_str(&format!(
|
|
|
|
"{5}{0}{6}{5}{1}{6}{5}{2}{6}{5}{3}{6}{5}{4}{6}",
|
|
|
|
tail, before, keyword, after, head, "{", "}"
|
|
|
|
));
|
2015-01-27 15:37:07 +00:00
|
|
|
if config.auto_ref || config.input_ref {
|
2018-03-12 08:20:58 +00:00
|
|
|
output.push_str(&format!("{}{}{}", "{", adjust_tex_str(&reference), "}"));
|
2015-01-27 15:37:07 +00:00
|
|
|
}
|
|
|
|
output
|
|
|
|
}
|
|
|
|
|
|
|
|
fn adjust_roff_str(context: &str) -> String {
|
|
|
|
let ws_reg = Regex::new(r"[\t\n\v\f\r]").unwrap();
|
2018-03-12 08:20:58 +00:00
|
|
|
ws_reg
|
|
|
|
.replace_all(context, " ")
|
|
|
|
.replace("\"", "\"\"")
|
|
|
|
.trim()
|
|
|
|
.to_owned()
|
2015-01-27 15:37:07 +00:00
|
|
|
}
|
|
|
|
|
2018-03-12 08:20:58 +00:00
|
|
|
fn format_roff_line(config: &Config, word_ref: &WordRef, line: &str, reference: &str) -> String {
|
2015-01-27 15:37:07 +00:00
|
|
|
let mut output = String::new();
|
|
|
|
output.push_str(&format!(".{}", config.macro_name));
|
|
|
|
let all_before = if config.input_ref {
|
2018-03-12 08:20:58 +00:00
|
|
|
let before = &line[0..word_ref.position];
|
2019-04-28 10:49:18 +00:00
|
|
|
adjust_roff_str(before.trim().trim_start_matches(reference))
|
2015-01-27 15:37:07 +00:00
|
|
|
} else {
|
2018-03-12 08:20:58 +00:00
|
|
|
adjust_roff_str(&line[0..word_ref.position])
|
2015-01-27 15:37:07 +00:00
|
|
|
};
|
2018-03-12 08:20:58 +00:00
|
|
|
let keyword = adjust_roff_str(&line[word_ref.position..word_ref.position_end]);
|
|
|
|
let all_after = adjust_roff_str(&line[word_ref.position_end..line.len()]);
|
|
|
|
let (tail, before, after, head) = get_output_chunks(&all_before, &keyword, &all_after, &config);
|
|
|
|
output.push_str(&format!(
|
|
|
|
" \"{}\" \"{}\" \"{}{}\" \"{}\"",
|
|
|
|
tail, before, keyword, after, head
|
|
|
|
));
|
2015-01-27 15:37:07 +00:00
|
|
|
if config.auto_ref || config.input_ref {
|
2015-05-30 07:34:23 +00:00
|
|
|
output.push_str(&format!(" \"{}\"", adjust_roff_str(&reference)));
|
2015-01-27 15:37:07 +00:00
|
|
|
}
|
|
|
|
output
|
|
|
|
}
|
|
|
|
|
2018-03-12 08:20:58 +00:00
|
|
|
fn write_traditional_output(
|
|
|
|
config: &Config,
|
|
|
|
file_map: &HashMap<String, (Vec<String>, usize)>,
|
|
|
|
words: &BTreeSet<WordRef>,
|
|
|
|
output_filename: &str,
|
|
|
|
) {
|
2019-10-01 16:34:26 +00:00
|
|
|
let mut writer: BufWriter<Box<dyn Write>> = BufWriter::new(if output_filename == "-" {
|
2015-01-27 15:37:07 +00:00
|
|
|
Box::new(stdout())
|
|
|
|
} else {
|
|
|
|
let file = crash_if_err!(1, File::create(output_filename));
|
|
|
|
Box::new(file)
|
|
|
|
});
|
|
|
|
for word_ref in words.iter() {
|
2018-03-12 08:20:58 +00:00
|
|
|
let file_map_value: &(Vec<String>, usize) = file_map
|
|
|
|
.get(&(word_ref.filename))
|
|
|
|
.expect("Missing file in file map");
|
2015-01-27 15:37:07 +00:00
|
|
|
let (ref lines, _) = *(file_map_value);
|
2018-03-12 08:20:58 +00:00
|
|
|
let reference = get_reference(config, word_ref, &lines[word_ref.local_line_nr]);
|
2015-01-27 15:37:07 +00:00
|
|
|
let output_line: String = match config.format {
|
2018-03-12 08:20:58 +00:00
|
|
|
OutFormat::Tex => {
|
|
|
|
format_tex_line(config, word_ref, &lines[word_ref.local_line_nr], &reference)
|
|
|
|
}
|
|
|
|
OutFormat::Roff => {
|
|
|
|
format_roff_line(config, word_ref, &lines[word_ref.local_line_nr], &reference)
|
|
|
|
}
|
|
|
|
OutFormat::Dumb => crash!(1, "There is no dumb format with GNU extensions disabled"),
|
2015-01-27 15:37:07 +00:00
|
|
|
};
|
|
|
|
crash_if_err!(1, writeln!(writer, "{}", output_line));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn uumain(args: Vec<String>) -> i32 {
|
|
|
|
let mut opts = Options::new();
|
2018-03-12 08:20:58 +00:00
|
|
|
opts.optflag(
|
|
|
|
"A",
|
|
|
|
"auto-reference",
|
|
|
|
"output automatically generated references",
|
|
|
|
);
|
2015-01-27 15:37:07 +00:00
|
|
|
opts.optflag("G", "traditional", "behave more like System V 'ptx'");
|
2018-03-12 08:20:58 +00:00
|
|
|
opts.optopt(
|
|
|
|
"F",
|
|
|
|
"flag-truncation",
|
|
|
|
"use STRING for flagging line truncations",
|
|
|
|
"STRING",
|
|
|
|
);
|
|
|
|
opts.optopt(
|
|
|
|
"M",
|
|
|
|
"macro-name",
|
|
|
|
"macro name to use instead of 'xx'",
|
|
|
|
"STRING",
|
|
|
|
);
|
2015-01-27 15:37:07 +00:00
|
|
|
opts.optflag("O", "format=roff", "generate output as roff directives");
|
2018-03-12 08:20:58 +00:00
|
|
|
opts.optflag(
|
|
|
|
"R",
|
|
|
|
"right-side-refs",
|
|
|
|
"put references at right, not counted in -w",
|
|
|
|
);
|
|
|
|
opts.optopt(
|
|
|
|
"S",
|
|
|
|
"sentence-regexp",
|
|
|
|
"for end of lines or end of sentences",
|
|
|
|
"REGEXP",
|
|
|
|
);
|
2015-01-27 15:37:07 +00:00
|
|
|
opts.optflag("T", "format=tex", "generate output as TeX directives");
|
2018-03-12 08:20:58 +00:00
|
|
|
opts.optopt(
|
|
|
|
"W",
|
|
|
|
"word-regexp",
|
|
|
|
"use REGEXP to match each keyword",
|
|
|
|
"REGEXP",
|
|
|
|
);
|
|
|
|
opts.optopt(
|
|
|
|
"b",
|
|
|
|
"break-file",
|
|
|
|
"word break characters in this FILE",
|
|
|
|
"FILE",
|
|
|
|
);
|
|
|
|
opts.optflag(
|
|
|
|
"f",
|
|
|
|
"ignore-case",
|
|
|
|
"fold lower case to upper case for sorting",
|
|
|
|
);
|
|
|
|
opts.optopt(
|
|
|
|
"g",
|
|
|
|
"gap-size",
|
|
|
|
"gap size in columns between output fields",
|
|
|
|
"NUMBER",
|
|
|
|
);
|
|
|
|
opts.optopt(
|
|
|
|
"i",
|
|
|
|
"ignore-file",
|
|
|
|
"read ignore word list from FILE",
|
|
|
|
"FILE",
|
|
|
|
);
|
|
|
|
opts.optopt(
|
|
|
|
"o",
|
|
|
|
"only-file",
|
|
|
|
"read only word list from this FILE",
|
|
|
|
"FILE",
|
|
|
|
);
|
2015-01-27 15:37:07 +00:00
|
|
|
opts.optflag("r", "references", "first field of each line is a reference");
|
2018-03-12 08:20:58 +00:00
|
|
|
opts.optopt(
|
|
|
|
"w",
|
|
|
|
"width",
|
|
|
|
"output width in columns, reference excluded",
|
|
|
|
"NUMBER",
|
|
|
|
);
|
2015-01-27 15:37:07 +00:00
|
|
|
opts.optflag("", "help", "display this help and exit");
|
|
|
|
opts.optflag("", "version", "output version information and exit");
|
2015-08-12 04:01:10 +00:00
|
|
|
|
2015-01-27 15:37:07 +00:00
|
|
|
let matches = return_if_err!(1, opts.parse(&args[1..]));
|
2015-08-12 04:01:10 +00:00
|
|
|
|
2015-01-27 15:37:07 +00:00
|
|
|
if matches.opt_present("help") {
|
|
|
|
print_usage(&opts);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if matches.opt_present("version") {
|
|
|
|
print_version();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
let config = get_config(&matches);
|
|
|
|
let word_filter = WordFilter::new(&matches, &config);
|
2018-03-12 08:20:58 +00:00
|
|
|
let file_map = read_input(&matches.free, &config);
|
2015-01-27 15:37:07 +00:00
|
|
|
let word_set = create_word_set(&config, &word_filter, &file_map);
|
|
|
|
let output_file = if !config.gnu_ext && matches.free.len() == 2 {
|
2015-08-12 04:01:10 +00:00
|
|
|
matches.free[1].clone()
|
2015-01-27 15:37:07 +00:00
|
|
|
} else {
|
2016-01-05 19:42:52 +00:00
|
|
|
"-".to_owned()
|
2015-01-27 15:37:07 +00:00
|
|
|
};
|
2015-08-12 04:01:10 +00:00
|
|
|
write_traditional_output(&config, &file_map, &word_set, &output_file);
|
2015-01-27 15:37:07 +00:00
|
|
|
0
|
|
|
|
}
|