pr: refactor common iterator between pr and mpr

pr: refactor common iterator between pr and mpr

pr: remove fill lines in mpr
This commit is contained in:
tilakpatidar 2019-01-14 20:29:20 +05:30 committed by Max Semenik
parent aefc2eb540
commit a7def9386b

View file

@ -20,7 +20,6 @@ use chrono::offset::Local;
use chrono::DateTime; use chrono::DateTime;
use getopts::{HasArg, Occur}; use getopts::{HasArg, Occur};
use getopts::{Matches, Options}; use getopts::{Matches, Options};
use itertools::structs::Batching;
use itertools::structs::KMergeBy; use itertools::structs::KMergeBy;
use itertools::{GroupBy, Itertools}; use itertools::{GroupBy, Itertools};
use quick_error::ResultExt; use quick_error::ResultExt;
@ -28,8 +27,7 @@ use regex::Regex;
use std::convert::From; use std::convert::From;
use std::fs::{metadata, File, Metadata}; use std::fs::{metadata, File, Metadata};
use std::io::{stderr, stdin, stdout, BufRead, BufReader, Lines, Read, Stdin, Stdout, Write}; use std::io::{stderr, stdin, stdout, BufRead, BufReader, Lines, Read, Stdin, Stdout, Write};
use std::iter::FlatMap; use std::iter::{FlatMap, Map};
use std::iter::{Enumerate, Map, SkipWhile, TakeWhile};
use std::num::ParseIntError; use std::num::ParseIntError;
#[cfg(unix)] #[cfg(unix)]
use std::os::unix::fs::FileTypeExt; use std::os::unix::fs::FileTypeExt;
@ -873,27 +871,49 @@ fn split_lines_if_form_feed(file_content: Result<String, IOError>) -> Vec<FileLi
} }
fn pr(path: &String, options: &OutputOptions) -> Result<i32, PrError> { fn pr(path: &String, options: &OutputOptions) -> Result<i32, PrError> {
let start_page: &usize = &options.start_page; let start_page: usize = options.start_page;
let start_line_number: usize = get_start_line_number(options); let start_line_number: usize = get_start_line_number(options);
let last_page: Option<&usize> = options.end_page.as_ref(); let last_page: Option<usize> = options.end_page;
let lines_needed_per_page: usize = lines_to_read_for_page(options); let lines_needed_per_page: usize = lines_to_read_for_page(options);
let pages: Batching< let lines: Lines<BufReader<Box<Read>>> =
Map<Map<Enumerate<FlatMap<Map<Lines<BufReader<Box<Read>>>, _>, _, _>>, _>, _>, BufReader::with_capacity(READ_BUFFER_SIZE, open(path)?).lines();
_,
> = BufReader::with_capacity(READ_BUFFER_SIZE, open(path)?) let pages: Box<Iterator<Item = (usize, Vec<FileLine>)>> = read_stream_and_create_pages(
.lines() lines,
start_line_number,
lines_needed_per_page,
start_page,
last_page,
0,
);
for page_with_page_number in pages {
let page_number = page_with_page_number.0 + 1;
let page = page_with_page_number.1;
print_page(&page, options, &page_number)?;
}
return Ok(0);
}
fn read_stream_and_create_pages(
lines: Lines<BufReader<Box<Read>>>,
start_line_number: usize,
lines_needed_per_page: usize,
start_page: usize,
last_page: Option<usize>,
file_id: usize,
) -> Box<Iterator<Item = (usize, Vec<FileLine>)>> {
return Box::new(
lines
.map(split_lines_if_form_feed) .map(split_lines_if_form_feed)
.flat_map(|i: Vec<FileLine>| i) .flat_map(|i: Vec<FileLine>| i)
.enumerate() .enumerate()
.map(|i: (usize, FileLine)| FileLine { .map(move |i: (usize, FileLine)| FileLine {
line_number: i.0, line_number: i.0 + start_line_number,
file_id,
..i.1 ..i.1
}) }) // Add line number and file_id
.map(|file_line: FileLine| FileLine { .batching(move |it| {
line_number: file_line.line_number + start_line_number,
..file_line
}) // get display line number with line content
.batching(|it| {
let mut first_page: Vec<FileLine> = Vec::new(); let mut first_page: Vec<FileLine> = Vec::new();
let mut page_with_lines: Vec<Vec<FileLine>> = Vec::new(); let mut page_with_lines: Vec<Vec<FileLine>> = Vec::new();
for line in it { for line in it {
@ -919,26 +939,29 @@ fn pr(path: &String, options: &OutputOptions) -> Result<i32, PrError> {
} }
page_with_lines.push(first_page); page_with_lines.push(first_page);
return Some(page_with_lines); return Some(page_with_lines);
}); }) // Create set of pages as form feeds could lead to empty pages
.flat_map(|x| x) // Flatten to pages from page sets
let mut page_number = 1; .enumerate() // Assign page number
for page_set in pages { .skip_while(move |x: &(usize, Vec<FileLine>)| {
for page in page_set { // Skip the not needed pages
print_page(&page, options, &page_number, &start_page, &last_page)?; let current_page = x.0 + 1;
page_number += 1; return current_page < start_page;
} })
} .take_while(move |x: &(usize, Vec<FileLine>)| {
return Ok(0); // Take only the required pages
let current_page = x.0 + 1;
return current_page >= start_page
&& (last_page.is_none() || current_page <= last_page.unwrap());
}),
);
} }
fn mpr(paths: &Vec<String>, options: &OutputOptions) -> Result<i32, PrError> { fn mpr(paths: &Vec<String>, options: &OutputOptions) -> Result<i32, PrError> {
let nfiles = paths.len(); let nfiles = paths.len();
let lines_needed_per_page: usize = lines_to_read_for_page(options); let lines_needed_per_page: usize = lines_to_read_for_page(options);
let lines_needed_per_page_f64: f64 = lines_needed_per_page as f64; let start_page: usize = options.start_page;
let start_page: &usize = &options.start_page; let last_page: Option<usize> = options.end_page;
let last_page: Option<&usize> = options.end_page.as_ref();
let start_line_index_of_start_page = (start_page - 1) * lines_needed_per_page;
// Check if files exists // Check if files exists
for path in paths { for path in paths {
@ -947,46 +970,37 @@ fn mpr(paths: &Vec<String>, options: &OutputOptions) -> Result<i32, PrError> {
let file_line_groups: GroupBy< let file_line_groups: GroupBy<
usize, usize,
KMergeBy< KMergeBy<FlatMap<Map<Box<Iterator<Item = (usize, Vec<FileLine>)>>, _>, _, _>, _>,
Map<TakeWhile<SkipWhile<Map<Enumerate<Lines<BufReader<Box<Read>>>>, _>, _>, _>, _>,
_,
>,
_, _,
> = paths > = paths
.into_iter() .into_iter()
.enumerate() .enumerate()
.map(|indexed_path: (usize, &String)| { .map(|indexed_path: (usize, &String)| {
let start_line_number: usize = get_start_line_number(options); let start_line_number: usize = get_start_line_number(options);
BufReader::with_capacity(READ_BUFFER_SIZE, open(indexed_path.1).unwrap()) let lines =
.lines() BufReader::with_capacity(READ_BUFFER_SIZE, open(indexed_path.1).unwrap()).lines();
.enumerate()
.map(move |i: (usize, Result<String, IOError>)| FileLine { read_stream_and_create_pages(
file_id: indexed_path.0, lines,
line_number: i.0, start_line_number,
line_content: i.1, lines_needed_per_page,
..FileLine::default() start_page,
}) last_page,
.skip_while(move |file_line: &FileLine| { indexed_path.0,
// Skip the initial lines if not in page range )
file_line.line_number < (start_line_index_of_start_page) .map(move |x: (usize, Vec<FileLine>)| {
}) let file_line = x.1;
.take_while(move |file_line: &FileLine| { let page_number = x.0 + 1;
// Only read the file until provided last page reached file_line
last_page .into_iter()
.map(|lp| file_line.line_number < ((*lp) * lines_needed_per_page)) .map(|fl| FileLine {
.unwrap_or(true)
})
.map(move |file_line: FileLine| {
let page_number = ((file_line.line_number + 2 - start_line_number) as f64
/ (lines_needed_per_page_f64))
.ceil() as usize;
FileLine {
line_number: file_line.line_number + start_line_number,
page_number, page_number,
group_key: page_number * nfiles + file_line.file_id, group_key: page_number * nfiles + fl.file_id,
..file_line ..fl
} })
}) // get display line number with line content .collect()
})
.flat_map(|x: Vec<FileLine>| x)
}) })
.kmerge_by(|a: &FileLine, b: &FileLine| { .kmerge_by(|a: &FileLine, b: &FileLine| {
if a.group_key == b.group_key { if a.group_key == b.group_key {
@ -997,9 +1011,10 @@ fn mpr(paths: &Vec<String>, options: &OutputOptions) -> Result<i32, PrError> {
}) })
.group_by(|file_line: &FileLine| file_line.group_key); .group_by(|file_line: &FileLine| file_line.group_key);
let mut lines: Vec<FileLine> = Vec::new();
let start_page: &usize = &options.start_page; let start_page: &usize = &options.start_page;
let mut lines: Vec<FileLine> = Vec::new();
let mut page_counter: usize = *start_page; let mut page_counter: usize = *start_page;
for (_key, file_line_group) in file_line_groups.into_iter() { for (_key, file_line_group) in file_line_groups.into_iter() {
for file_line in file_line_group { for file_line in file_line_group {
if file_line.line_content.is_err() { if file_line.line_content.is_err() {
@ -1007,87 +1022,24 @@ fn mpr(paths: &Vec<String>, options: &OutputOptions) -> Result<i32, PrError> {
} }
let new_page_number = file_line.page_number; let new_page_number = file_line.page_number;
if page_counter != new_page_number { if page_counter != new_page_number {
fill_missing_lines(&mut lines, lines_needed_per_page, &nfiles, page_counter); print_page(&lines, options, &page_counter)?;
print_page(&lines, options, &page_counter, &start_page, &last_page)?;
lines = Vec::new(); lines = Vec::new();
}
lines.push(file_line);
page_counter = new_page_number; page_counter = new_page_number;
} }
lines.push(file_line);
}
} }
fill_missing_lines(&mut lines, lines_needed_per_page, &nfiles, page_counter); print_page(&lines, options, &page_counter)?;
print_page(&lines, options, &page_counter, &start_page, &last_page)?;
return Ok(0); return Ok(0);
} }
fn fill_missing_lines(
lines: &mut Vec<FileLine>,
lines_per_file: usize,
nfiles: &usize,
page_number: usize,
) {
let init_line_number = (page_number - 1) * lines_per_file + 1;
let mut file_id_counter: usize = 0;
let mut line_number_counter: usize = init_line_number;
let mut lines_processed_per_file: usize = 0;
for mut i in 0..lines_per_file * nfiles {
let file_id = lines
.get(i)
.map(|i: &FileLine| i.file_id)
.unwrap_or(file_id_counter);
let line_number = lines.get(i).map(|i: &FileLine| i.line_number).unwrap_or(1);
if lines_processed_per_file == lines_per_file {
line_number_counter = init_line_number;
file_id_counter += 1;
lines_processed_per_file = 0;
}
if file_id != file_id_counter {
// Insert missing file_ids
lines.insert(
i,
FileLine {
file_id: file_id_counter,
line_number: line_number_counter,
line_content: Ok("".to_string()),
page_number,
..FileLine::default()
},
);
line_number_counter += 1;
} else if line_number < line_number_counter {
// Insert missing lines for a file_id
line_number_counter += 1;
lines.insert(
i,
FileLine {
file_id,
line_number: line_number_counter,
line_content: Ok("".to_string()),
page_number,
..FileLine::default()
},
);
} else {
line_number_counter = line_number;
}
lines_processed_per_file += 1;
}
}
fn print_page( fn print_page(
lines: &Vec<FileLine>, lines: &Vec<FileLine>,
options: &OutputOptions, options: &OutputOptions,
page: &usize, page: &usize,
start_page: &usize,
last_page: &Option<&usize>,
) -> Result<usize, IOError> { ) -> Result<usize, IOError> {
if (last_page.is_some() && page > last_page.unwrap()) || page < start_page {
return Ok(0);
}
let page_separator = options.page_separator_char.as_bytes(); let page_separator = options.page_separator_char.as_bytes();
let header: Vec<String> = header_content(options, page); let header: Vec<String> = header_content(options, page);
let trailer_content: Vec<String> = trailer_content(options); let trailer_content: Vec<String> = trailer_content(options);
@ -1159,16 +1111,19 @@ fn write_columns(
options.page_width options.page_width
}; };
let offset_spaces: &usize = &options.offset_spaces;
let mut lines_printed = 0;
let is_number_mode = options.number.is_some();
let feed_line_present = options.form_feed_used;
let spaces = " ".repeat(*offset_spaces);
let mut not_found_break = false;
if options.merge_files_print.is_none() {
let across_mode = options let across_mode = options
.column_mode_options .column_mode_options
.as_ref() .as_ref()
.map(|i| i.across_mode) .map(|i| i.across_mode)
.unwrap_or(false); .unwrap_or(false);
let offset_spaces: &usize = &options.offset_spaces;
let mut lines_printed = 0;
let is_number_mode = options.number.is_some();
let fetch_indexes: Vec<Vec<usize>> = if across_mode { let fetch_indexes: Vec<Vec<usize>> = if across_mode {
(0..content_lines_per_page) (0..content_lines_per_page)
.map(|a| (0..columns).map(|i| a * columns + i).collect()) .map(|a| (0..columns).map(|i| a * columns + i).collect())
@ -1182,9 +1137,6 @@ fn write_columns(
}) })
.collect() .collect()
}; };
let feed_line_present = options.form_feed_used;
let spaces = " ".repeat(*offset_spaces);
let mut not_found_break = false;
for fetch_index in fetch_indexes { for fetch_index in fetch_indexes {
let indexes = fetch_index.len(); let indexes = fetch_index.len();
for i in 0..indexes { for i in 0..indexes {
@ -1220,6 +1172,62 @@ fn write_columns(
out.write(line_separator)?; out.write(line_separator)?;
} }
} }
} else {
let mut index: usize = 0;
let mut batches: Vec<Vec<&FileLine>> = Vec::new();
for col in 0..columns {
let mut batch: Vec<&FileLine> = vec![];
for i in index..lines.len() {
let line = lines.get(i).unwrap();
if line.file_id != col {
break;
}
batch.push(line);
index += 1;
}
batches.push(batch);
}
let blank_line = &&FileLine::default();
for _i in 0..content_lines_per_page {
for col_index in 0..columns {
let col: Option<&Vec<&FileLine>> = batches.get(col_index);
let file_line = if col.is_some() {
let opt_file_line: Option<&&FileLine> = col.unwrap().get(_i);
opt_file_line.unwrap_or(blank_line)
} else {
blank_line
};
let trimmed_line: String = format!(
"{}{}",
spaces,
get_line_for_printing(
file_line,
&number_width,
&number_separator,
columns,
is_number_mode,
&options.merge_files_print,
&col_index,
line_width,
)
);
out.write(trimmed_line.as_bytes())?;
if (col_index + 1) != columns && !options.join_lines {
out.write(col_sep.as_bytes())?;
}
lines_printed += 1;
}
if feed_line_present {
break;
} else {
out.write(line_separator)?;
}
}
}
Ok(lines_printed) Ok(lines_printed)
} }