Improve performance and add profiling tooling

Add tags, todo, *.bench, *.svg and bench_output to .gitignore

Add test/bench.sh script. bench.sh runs the `bench` command on each
test/long*txt file with range 3:5 and saves the output to a file for
comparing performance across file sizes.

Inline printing in get_choice_slice

Change BufWriter<..stdout..> to BufWriter<T>

Add MockStdout for testing printing

Add more reverse range tests

Simplify word finding with a more uniform bounds check.

Add Makefile for generating flamegraphs

Redefine Choice struct as a start and end integer

Improve algorithm for finding words to print

Settle exclusivity at Config construction time

Add tests for nonexistant field_seps

Add regression test for preceding separator

Use handle.write instead of write! macro for tremendous speed up
This commit is contained in:
Ryan Geary 2020-02-24 23:59:51 -05:00
parent c729ad3f00
commit 389c29822b
6 changed files with 326 additions and 161 deletions

7
.gitignore vendored
View file

@ -1,2 +1,9 @@
/target
**/*.rs.bk
tags
todo
bench_output/
**/*.bench
**/*.svg
test/long*txt
perf.data*

11
Makefile Normal file
View file

@ -0,0 +1,11 @@
flamegraph: release
perf record --call-graph dwarf,16384 -e cpu-clock -F 997 target/release/choose -i test/long_long_long_long.txt 3:5
perf script | stackcollapse-perf.pl | stackcollapse-recursive.pl | c++filt | flamegraph.pl > flamegraphs/working.svg
flamegraph_commit: release
perf record --call-graph dwarf,16384 -e cpu-clock -F 997 target/release/choose -i test/long_long_long_long.txt 3:5
perf script | stackcollapse-perf.pl | stackcollapse-recursive.pl | c++filt | flamegraph.pl > flamegraphs/`git log -n 1 --pretty=format:"%h"`.svg
.PHONY: release
release:
cargo build --release

View file

@ -1,89 +1,74 @@
use crate::io::{BufWriter, Write};
use std::convert::TryInto;
use crate::config::Config;
pub type Range = (Option<u32>, Option<u32>);
use crate::io::{BufWriter, Write};
#[derive(Debug)]
pub enum Choice {
Field(u32),
FieldRange(Range),
pub struct Choice {
pub start: usize,
pub end: usize,
}
impl Choice {
pub fn print_choice(
pub fn new(start: usize, end: usize) -> Self {
Choice { start, end }
}
pub fn print_choice<WriterType: Write>(
&self,
line: &String,
config: &Config,
handle: &mut BufWriter<std::io::StdoutLock>,
handle: &mut BufWriter<WriterType>,
) {
write!(handle, "{}", self.get_choice_slice(line, config).join(" "));
}
pub fn is_reverse_range(&self) -> bool {
match self {
Choice::Field(_) => false,
Choice::FieldRange(r) => match r {
(Some(start), Some(end)) => end < start,
_ => false,
},
}
}
fn get_choice_slice<'a>(&self, line: &'a String, config: &Config) -> Vec<&'a str> {
let words = config
.separator
.split(line)
.into_iter()
.filter(|s| !s.is_empty())
.enumerate();
let mut slices = match self {
Choice::Field(i) => words
.filter(|x| x.0 == *i as usize)
.map(|x| x.1)
.collect::<Vec<&str>>(),
Choice::FieldRange(r) => match r {
(None, None) => words.map(|x| x.1).collect::<Vec<&str>>(),
(Some(start), None) => words
.filter(|x| x.0 >= (*start).try_into().unwrap())
.map(|x| x.1)
.collect::<Vec<&str>>(),
(None, Some(end)) => {
let e: usize = if config.opt.exclusive {
(end - 1).try_into().unwrap()
} else {
(*end).try_into().unwrap()
};
words
.filter(|x| x.0 <= e)
.map(|x| x.1)
.collect::<Vec<&str>>()
}
(Some(start), Some(end)) => {
let e: usize = if config.opt.exclusive {
(end - 1).try_into().unwrap()
} else {
(*end).try_into().unwrap()
};
words
.filter(|x| {
(x.0 <= e && x.0 >= (*start).try_into().unwrap())
|| self.is_reverse_range()
&& (x.0 >= e && x.0 <= (*start).try_into().unwrap())
})
.map(|x| x.1)
.collect::<Vec<&str>>()
}
},
};
let mut line_iter = config.separator.split(line).filter(|s| !s.is_empty());
if self.is_reverse_range() {
slices.reverse();
}
if self.end > 0 {
line_iter.nth(self.end - 1);
}
return slices;
let mut stack = Vec::new();
for i in 0..=(self.start - self.end) {
match line_iter.next() {
Some(s) => stack.push(s),
None => break,
}
if self.start <= self.end + i {
break;
}
}
loop {
match stack.pop() {
Some(s) => Choice::write_bytes(handle, s.as_bytes()),
None => break,
}
}
} else {
if self.start > 0 {
line_iter.nth(self.start - 1);
}
for i in 0..=(self.end - self.start) {
match line_iter.next() {
Some(s) => Choice::write_bytes(handle, s.as_bytes()),
None => break,
};
if self.end <= self.start + i {
break;
}
}
}
}
fn write_bytes<WriterType: Write>(handle: &mut BufWriter<WriterType>, b: &[u8]) {
handle.write(b).unwrap();
handle.write(b" ").unwrap();
}
#[cfg_attr(feature = "flame_it", flame)]
pub fn is_reverse_range(&self) -> bool {
self.end < self.start
}
}
@ -92,6 +77,7 @@ mod tests {
use crate::config::{Config, Opt};
use std::ffi::OsString;
use std::io::{self, BufWriter, Write};
use structopt::StructOpt;
impl Config {
@ -104,128 +90,293 @@ mod tests {
}
}
mod get_choice_slice_tests {
struct MockStdout {
pub buffer: String,
}
impl MockStdout {
fn new() -> Self {
MockStdout {
buffer: String::new(),
}
}
fn str_from_buf_writer(b: BufWriter<MockStdout>) -> String {
match b.into_inner() {
Ok(b) => b.buffer,
Err(_) => panic!("Failed to access BufWriter inner writer"),
}
.trim_end()
.to_string()
}
}
impl Write for MockStdout {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
let mut bytes_written = 0;
for i in buf {
self.buffer.push(*i as char);
bytes_written += 1;
}
Ok(bytes_written)
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
mod print_choice_tests {
use super::*;
#[test]
fn print_0() {
let config = Config::from_iter(vec!["choose", "0"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from("rust is pretty cool"),
&config,
&mut handle,
);
assert_eq!(
vec!["rust"],
config.opt.choice[0]
.get_choice_slice(&String::from("rust is pretty cool"), &config)
String::from("rust"),
MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_after_end() {
let config = Config::from_iter(vec!["choose", "10"]);
assert_eq!(
Vec::<&str>::new(),
config.opt.choice[0]
.get_choice_slice(&String::from("rust is pretty cool"), &config)
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from("rust is pretty cool"),
&config,
&mut handle,
);
assert_eq!(String::new(), MockStdout::str_from_buf_writer(handle));
}
#[test]
fn print_out_of_order() {
let config = Config::from_iter(vec!["choose", "3", "1"]);
assert_eq!(
vec!["cool"],
config.opt.choice[0]
.get_choice_slice(&String::from("rust is pretty cool"), &config)
let mut handle = BufWriter::new(MockStdout::new());
let mut handle1 = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from("rust is pretty cool"),
&config,
&mut handle,
);
assert_eq!(
vec!["is"],
config.opt.choice[1]
.get_choice_slice(&String::from("rust is pretty cool"), &config)
String::from("cool"),
MockStdout::str_from_buf_writer(handle)
);
config.opt.choice[1].print_choice(
&String::from("rust is pretty cool"),
&config,
&mut handle1,
);
assert_eq!(String::from("is"), MockStdout::str_from_buf_writer(handle1));
}
#[test]
fn print_1_to_3_exclusive() {
let config = Config::from_iter(vec!["choose", "1:3", "-x"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from("rust is pretty cool"),
&config,
&mut handle,
);
assert_eq!(
vec!["is", "pretty"],
config.opt.choice[0]
.get_choice_slice(&String::from("rust is pretty cool"), &config)
String::from("is pretty"),
MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_1_to_3() {
let config = Config::from_iter(vec!["choose", "1:3"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from("rust is pretty cool"),
&config,
&mut handle,
);
assert_eq!(
vec!["is", "pretty", "cool"],
config.opt.choice[0]
.get_choice_slice(&String::from("rust is pretty cool"), &config)
String::from("is pretty cool"),
MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_1_to_3_separated_by_hashtag() {
let config = Config::from_iter(vec!["choose", "1:3", "-f", "#"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from("rust#is#pretty#cool"),
&config,
&mut handle,
);
assert_eq!(
vec!["is", "pretty", "cool"],
config.opt.choice[0]
.get_choice_slice(&String::from("rust#is#pretty#cool"), &config)
String::from("is pretty cool"),
MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_1_to_3_separated_by_varying_multiple_hashtag_exclusive() {
let config = Config::from_iter(vec!["choose", "1:3", "-f", "#", "-x"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from("rust##is###pretty####cool"),
&config,
&mut handle,
);
assert_eq!(
vec!["is", "pretty"],
config.opt.choice[0]
.get_choice_slice(&String::from("rust##is###pretty####cool"), &config)
String::from("is pretty"),
MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_1_to_3_separated_by_varying_multiple_hashtag() {
let config = Config::from_iter(vec!["choose", "1:3", "-f", "#"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from("rust##is###pretty####cool"),
&config,
&mut handle,
);
assert_eq!(
vec!["is", "pretty", "cool"],
config.opt.choice[0]
.get_choice_slice(&String::from("rust##is###pretty####cool"), &config)
String::from("is pretty cool"),
MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_1_to_3_separated_by_regex_group_vowels_exclusive() {
let config = Config::from_iter(vec!["choose", "1:3", "-f", "[aeiou]", "-x"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from("the quick brown fox jumped over the lazy dog"),
&config,
&mut handle,
);
assert_eq!(
vec![" q", "ck br"],
config.opt.choice[0].get_choice_slice(
&String::from("the quick brown fox jumped over the lazy dog"),
&config
)
String::from(" q ck br"),
MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_1_to_3_separated_by_regex_group_vowels() {
let config = Config::from_iter(vec!["choose", "1:3", "-f", "[aeiou]"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from("the quick brown fox jumped over the lazy dog"),
&config,
&mut handle,
);
assert_eq!(
vec![" q", "ck br", "wn f"],
config.opt.choice[0].get_choice_slice(
&String::from("the quick brown fox jumped over the lazy dog"),
&config
)
String::from(" q ck br wn f"),
MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_3_to_1() {
let config = Config::from_iter(vec!["choose", "3:1"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from("rust lang is pretty darn cool"),
&config,
&mut handle,
);
assert_eq!(
vec!["pretty", "is", "lang"],
config.opt.choice[0]
.get_choice_slice(&String::from("rust lang is pretty darn cool"), &config)
String::from("pretty is lang"),
MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_3_to_1_exclusive() {
let config = Config::from_iter(vec!["choose", "3:1", "-x"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from("rust lang is pretty darn cool"),
&config,
&mut handle,
);
assert_eq!(
String::from("is lang"),
MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_1_to_3_nonexistant_field_separator() {
let config = Config::from_iter(vec!["choose", "1:3", "-f", "#"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from("rust lang is pretty darn cool"),
&config,
&mut handle,
);
assert_eq!(String::from(""), MockStdout::str_from_buf_writer(handle));
}
#[test]
fn print_0_nonexistant_field_separator() {
let config = Config::from_iter(vec!["choose", "0", "-f", "#"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from("rust lang is pretty darn cool"),
&config,
&mut handle,
);
assert_eq!(
String::from("rust lang is pretty darn cool"),
MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_0_to_3_nonexistant_field_separator() {
let config = Config::from_iter(vec!["choose", "0:3", "-f", "#"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from("rust lang is pretty darn cool"),
&config,
&mut handle,
);
assert_eq!(
String::from("rust lang is pretty darn cool"),
MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_0_with_preceding_separator() {
let config = Config::from_iter(vec!["choose", "0"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from(" rust lang is pretty darn cool"),
&config,
&mut handle,
);
assert_eq!(
String::from("rust"),
MockStdout::str_from_buf_writer(handle)
);
}
}
mod is_reverse_range_tests {
@ -260,7 +411,5 @@ mod tests {
let config = Config::from_iter(vec!["choose", "4:2"]);
assert_eq!(true, config.opt.choice[0].is_reverse_range());
}
}
}

View file

@ -38,7 +38,17 @@ pub struct Config {
}
impl Config {
pub fn new(opt: Opt) -> Self {
pub fn new(mut opt: Opt) -> Self {
if opt.exclusive {
for mut choice in &mut opt.choice {
if choice.is_reverse_range() {
choice.start = choice.start - 1;
} else {
choice.end = choice.end - 1;
}
}
}
let separator = Regex::new(match &opt.field_separator {
Some(s) => s,
None => "[[:space:]]",
@ -57,7 +67,7 @@ impl Config {
let cap = match re.captures_iter(src).next() {
Some(v) => v,
None => match src.parse() {
Ok(x) => return Ok(Choice::Field(x)),
Ok(x) => return Ok(Choice::new(x, x)),
Err(_) => {
eprintln!("failed to parse choice argument: {}", src);
// Exit code of 2 means failed to parse choice argument
@ -67,10 +77,10 @@ impl Config {
};
let start = if cap[1].is_empty() {
None
usize::min_value()
} else {
match cap[1].parse() {
Ok(x) => Some(x),
Ok(x) => x,
Err(_) => {
eprintln!("failed to parse range start: {}", &cap[1]);
process::exit(2);
@ -79,10 +89,10 @@ impl Config {
};
let end = if cap[2].is_empty() {
None
usize::max_value()
} else {
match cap[2].parse() {
Ok(x) => Some(x),
Ok(x) => x,
Err(_) => {
eprintln!("failed to parse range end: {}", &cap[2]);
process::exit(2);
@ -90,7 +100,7 @@ impl Config {
}
};
return Ok(Choice::FieldRange((start, end)));
return Ok(Choice::new(start, end));
}
}
@ -102,62 +112,41 @@ mod tests {
use super::*;
#[test]
fn parse_single_choice() {
fn parse_single_choice_start() {
let result = Config::parse_choice("6").unwrap();
assert_eq!(
6,
match result {
Choice::Field(x) => x,
_ => panic!(),
}
)
assert_eq!(6, result.start)
}
#[test]
fn parse_single_choice_end() {
let result = Config::parse_choice("6").unwrap();
assert_eq!(6, result.end)
}
#[test]
fn parse_none_started_range() {
let result = Config::parse_choice(":5").unwrap();
assert_eq!(
(None, Some(5)),
match result {
Choice::FieldRange(x) => x,
_ => panic!(),
}
)
assert_eq!((usize::min_value(), 5), (result.start, result.end))
}
#[test]
fn parse_none_terminated_range() {
let result = Config::parse_choice("5:").unwrap();
assert_eq!(
(Some(5), None),
match result {
Choice::FieldRange(x) => x,
_ => panic!(),
}
)
assert_eq!((5, usize::max_value()), (result.start, result.end))
}
#[test]
fn parse_full_range() {
let result = Config::parse_choice("5:7").unwrap();
assert_eq!(
(Some(5), Some(7)),
match result {
Choice::FieldRange(x) => x,
_ => panic!(),
}
)
assert_eq!((5, 7), (result.start, result.end))
}
#[test]
fn parse_beginning_to_end_range() {
let result = Config::parse_choice(":").unwrap();
assert_eq!(
(None, None),
match result {
Choice::FieldRange(x) => x,
_ => panic!(),
}
(usize::min_value(), usize::max_value()),
(result.start, result.end)
)
}
@ -173,5 +162,4 @@ mod tests {
//assert!(Config::parse_choice("d:i").is_err());
//}
}
}

View file

@ -28,7 +28,7 @@ fn main() {
for choice in &config.opt.choice {
choice.print_choice(&l, &config, &mut handle);
}
writeln!(handle, "");
handle.write(b"\n").unwrap();
}
Err(e) => println!("ERROR: {}", e),
}

10
test/bench.sh Executable file
View file

@ -0,0 +1,10 @@
#!/bin/bash
cargo build --release # always be up to date
output="bench_output"
mkdir -p $output
inputs=($(find test -name "long*txt" | sort -r))
for i in {0..4}
do
echo ${inputs[$i]}
bench "target/release/choose 3:5 -i ${inputs[$i]}" > $output/$1$i.bench
done