Extract messy tree handling out of profiling code

This commit is contained in:
Aleksey Kladov 2020-04-25 21:04:04 +02:00
parent 0f099ead88
commit bd9ede0ec9
6 changed files with 119 additions and 150 deletions

1
Cargo.lock generated
View file

@ -1139,6 +1139,7 @@ dependencies = [
"jemalloc-ctl", "jemalloc-ctl",
"jemallocator", "jemallocator",
"once_cell", "once_cell",
"ra_arena",
] ]
[[package]] [[package]]

View file

@ -96,6 +96,9 @@ impl<T> Arena<T> {
pub const fn new() -> Arena<T> { pub const fn new() -> Arena<T> {
Arena { data: Vec::new() } Arena { data: Vec::new() }
} }
pub fn clear(&mut self) {
self.data.clear();
}
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
self.data.len() self.data.len()

View file

@ -9,6 +9,7 @@ publish = false
doctest = false doctest = false
[dependencies] [dependencies]
ra_arena = { path = "../ra_arena" }
once_cell = "1.3.1" once_cell = "1.3.1"
backtrace = { version = "0.3.44", optional = true } backtrace = { version = "0.3.44", optional = true }

View file

@ -1,4 +1,5 @@
//! Simple hierarchical profiler //! Simple hierarchical profiler
use once_cell::sync::Lazy;
use std::{ use std::{
cell::RefCell, cell::RefCell,
collections::{BTreeMap, HashSet}, collections::{BTreeMap, HashSet},
@ -10,7 +11,7 @@ use std::{
time::{Duration, Instant}, time::{Duration, Instant},
}; };
use once_cell::sync::Lazy; use crate::tree::{Idx, Tree};
/// Filtering syntax /// Filtering syntax
/// env RA_PROFILE=* // dump everything /// env RA_PROFILE=* // dump everything
@ -138,12 +139,12 @@ impl Filter {
struct ProfileStack { struct ProfileStack {
starts: Vec<Instant>, starts: Vec<Instant>,
messages: Vec<Message>,
filter: Filter, filter: Filter,
messages: Tree<Message>,
} }
#[derive(Default)]
struct Message { struct Message {
level: usize,
duration: Duration, duration: Duration,
label: Label, label: Label,
detail: Option<String>, detail: Option<String>,
@ -151,7 +152,7 @@ struct Message {
impl ProfileStack { impl ProfileStack {
fn new() -> ProfileStack { fn new() -> ProfileStack {
ProfileStack { starts: Vec::new(), messages: Vec::new(), filter: Default::default() } ProfileStack { starts: Vec::new(), messages: Tree::default(), filter: Default::default() }
} }
fn push(&mut self, label: Label) -> bool { fn push(&mut self, label: Label) -> bool {
@ -171,6 +172,7 @@ impl ProfileStack {
} }
self.starts.push(Instant::now()); self.starts.push(Instant::now());
self.messages.start();
true true
} }
@ -178,7 +180,7 @@ impl ProfileStack {
let start = self.starts.pop().unwrap(); let start = self.starts.pop().unwrap();
let duration = start.elapsed(); let duration = start.elapsed();
let level = self.starts.len(); let level = self.starts.len();
self.messages.push(Message { level, duration, label, detail }); self.messages.finish(Message { duration, label, detail });
if level == 0 { if level == 0 {
let longer_than = self.filter.longer_than; let longer_than = self.filter.longer_than;
// Convert to millis for comparison to avoid problems with rounding // Convert to millis for comparison to avoid problems with rounding
@ -186,7 +188,9 @@ impl ProfileStack {
// `duration` is just a few nanos). // `duration` is just a few nanos).
if duration.as_millis() > longer_than.as_millis() { if duration.as_millis() > longer_than.as_millis() {
let stderr = stderr(); let stderr = stderr();
print(&self.messages, longer_than, &mut stderr.lock()); if let Some(root) = self.messages.root() {
print(&self.messages, root, 0, longer_than, &mut stderr.lock());
}
} }
self.messages.clear(); self.messages.clear();
assert!(self.starts.is_empty()) assert!(self.starts.is_empty())
@ -194,50 +198,38 @@ impl ProfileStack {
} }
} }
fn print(msgs: &[Message], longer_than: Duration, out: &mut impl Write) { fn print(
if msgs.is_empty() { tree: &Tree<Message>,
return; curr: Idx<Message>,
} level: u32,
let children_map = idx_to_children(msgs);
let root_idx = msgs.len() - 1;
print_for_idx(root_idx, &children_map, msgs, longer_than, out);
}
fn print_for_idx(
current_idx: usize,
children_map: &[Vec<usize>],
msgs: &[Message],
longer_than: Duration, longer_than: Duration,
out: &mut impl Write, out: &mut impl Write,
) { ) {
let current = &msgs[current_idx]; let current_indent = " ".repeat(level as usize);
let current_indent = " ".repeat(current.level); let detail = tree[curr].detail.as_ref().map(|it| format!(" @ {}", it)).unwrap_or_default();
let detail = current.detail.as_ref().map(|it| format!(" @ {}", it)).unwrap_or_default();
writeln!( writeln!(
out, out,
"{}{:5}ms - {}{}", "{}{:5}ms - {}{}",
current_indent, current_indent,
current.duration.as_millis(), tree[curr].duration.as_millis(),
current.label, tree[curr].label,
detail, detail,
) )
.expect("printing profiling info"); .expect("printing profiling info");
let longer_than_millis = longer_than.as_millis();
let children_indices = &children_map[current_idx];
let mut accounted_for = Duration::default(); let mut accounted_for = Duration::default();
let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output. let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output.
for child in tree.children(curr) {
accounted_for += tree[child].duration;
for child_idx in children_indices.iter() { if tree[child].duration.as_millis() > longer_than.as_millis() {
let child = &msgs[*child_idx]; print(tree, child, level + 1, longer_than, out)
if child.duration.as_millis() > longer_than_millis {
print_for_idx(*child_idx, children_map, msgs, longer_than, out);
} else { } else {
let pair = short_children.entry(child.label).or_insert((Duration::default(), 0)); let (total_duration, cnt) =
pair.0 += child.duration; short_children.entry(tree[child].label).or_insert((Duration::default(), 0));
pair.1 += 1; *total_duration += tree[child].duration;
*cnt += 1;
} }
accounted_for += child.duration;
} }
for (child_msg, (duration, count)) in short_children.iter() { for (child_msg, (duration, count)) in short_children.iter() {
@ -246,122 +238,9 @@ fn print_for_idx(
.expect("printing profiling info"); .expect("printing profiling info");
} }
let unaccounted_millis = (current.duration - accounted_for).as_millis(); let unaccounted = tree[curr].duration - accounted_for;
if !children_indices.is_empty() if tree.children(curr).next().is_some() && unaccounted > longer_than {
&& unaccounted_millis > 0 writeln!(out, " {}{:5}ms - ???", current_indent, unaccounted.as_millis())
&& unaccounted_millis > longer_than_millis
{
writeln!(out, " {}{:5}ms - ???", current_indent, unaccounted_millis)
.expect("printing profiling info"); .expect("printing profiling info");
} }
} }
/// Returns a mapping from an index in the `msgs` to the vector with the indices of its children.
///
/// This assumes that the entries in `msgs` are in the order of when the calls to `profile` finish.
/// In other words, a postorder of the call graph. In particular, the root is the last element of
/// `msgs`.
fn idx_to_children(msgs: &[Message]) -> Vec<Vec<usize>> {
// Initialize with the index of the root; `msgs` and `ancestors` should be never empty.
assert!(!msgs.is_empty());
let mut ancestors = vec![msgs.len() - 1];
let mut result: Vec<Vec<usize>> = vec![vec![]; msgs.len()];
for (idx, msg) in msgs[..msgs.len() - 1].iter().enumerate().rev() {
// We need to find the parent of the current message, i.e., the last ancestor that has a
// level lower than the current message.
while msgs[*ancestors.last().unwrap()].level >= msg.level {
ancestors.pop();
}
result[*ancestors.last().unwrap()].push(idx);
ancestors.push(idx);
}
// Note that above we visited all children from the last to the first one. Let's reverse vectors
// to get the more natural order where the first element is the first child.
for vec in result.iter_mut() {
vec.reverse();
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_profile() {
let s = vec!["profile1".to_string(), "profile2".to_string()];
let f = Filter::new(2, s, Duration::new(0, 0));
set_filter(f);
profiling_function1();
}
fn profiling_function1() {
let _p = profile("profile1");
profiling_function2();
}
fn profiling_function2() {
let _p = profile("profile2");
}
#[test]
fn test_longer_than() {
let mut result = vec![];
let msgs = vec![
Message { level: 1, duration: Duration::from_nanos(3), label: "bar", detail: None },
Message { level: 1, duration: Duration::from_nanos(2), label: "bar", detail: None },
Message { level: 0, duration: Duration::from_millis(1), label: "foo", detail: None },
];
print(&msgs, Duration::from_millis(0), &mut result);
// The calls to `bar` are so short that they'll be rounded to 0ms and should get collapsed
// when printing.
assert_eq!(
std::str::from_utf8(&result).unwrap(),
" 1ms - foo\n 0ms - bar (2 calls)\n"
);
}
#[test]
fn test_unaccounted_for_topmost() {
let mut result = vec![];
let msgs = vec![
Message { level: 1, duration: Duration::from_millis(2), label: "bar", detail: None },
Message { level: 0, duration: Duration::from_millis(5), label: "foo", detail: None },
];
print(&msgs, Duration::from_millis(0), &mut result);
assert_eq!(
std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),
vec![
" 5ms - foo",
" 2ms - bar",
" 3ms - ???",
// Dummy comment to improve formatting
]
);
}
#[test]
fn test_unaccounted_for_multiple_levels() {
let mut result = vec![];
let msgs = vec![
Message { level: 2, duration: Duration::from_millis(3), label: "baz", detail: None },
Message { level: 1, duration: Duration::from_millis(5), label: "bar", detail: None },
Message { level: 2, duration: Duration::from_millis(2), label: "baz", detail: None },
Message { level: 1, duration: Duration::from_millis(4), label: "bar", detail: None },
Message { level: 0, duration: Duration::from_millis(9), label: "foo", detail: None },
];
print(&msgs, Duration::from_millis(0), &mut result);
assert_eq!(
std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),
vec![
" 9ms - foo",
" 5ms - bar",
" 3ms - baz",
" 2ms - ???",
" 4ms - bar",
" 2ms - baz",
" 2ms - ???",
]
);
}
}

View file

@ -4,6 +4,7 @@ mod memory_usage;
#[cfg(feature = "cpu_profiler")] #[cfg(feature = "cpu_profiler")]
mod google_cpu_profiler; mod google_cpu_profiler;
mod hprof; mod hprof;
mod tree;
use std::cell::RefCell; use std::cell::RefCell;

View file

@ -0,0 +1,84 @@
//! A simple tree implementation which tries to not allocate all over the place.
use std::ops;
use ra_arena::Arena;
#[derive(Default)]
pub struct Tree<T> {
nodes: Arena<Node<T>>,
current_path: Vec<(Idx<T>, Option<Idx<T>>)>,
}
pub type Idx<T> = ra_arena::Idx<Node<T>>;
impl<T> Tree<T> {
pub fn start(&mut self)
where
T: Default,
{
let me = self.nodes.alloc(Node::new(T::default()));
if let Some((parent, last_child)) = self.current_path.last_mut() {
let slot = match *last_child {
Some(last_child) => &mut self.nodes[last_child].next_sibling,
None => &mut self.nodes[*parent].first_child,
};
let prev = slot.replace(me);
assert!(prev.is_none());
*last_child = Some(me);
}
self.current_path.push((me, None));
}
pub fn finish(&mut self, data: T) {
let (me, _last_child) = self.current_path.pop().unwrap();
self.nodes[me].data = data;
}
pub fn root(&self) -> Option<Idx<T>> {
self.nodes.iter().next().map(|(idx, _)| idx)
}
pub fn children(&self, idx: Idx<T>) -> impl Iterator<Item = Idx<T>> + '_ {
NodeIter { nodes: &self.nodes, next: self.nodes[idx].first_child }
}
pub fn clear(&mut self) {
self.nodes.clear();
self.current_path.clear();
}
}
impl<T> ops::Index<Idx<T>> for Tree<T> {
type Output = T;
fn index(&self, index: Idx<T>) -> &T {
&self.nodes[index].data
}
}
pub struct Node<T> {
data: T,
first_child: Option<Idx<T>>,
next_sibling: Option<Idx<T>>,
}
impl<T> Node<T> {
fn new(data: T) -> Node<T> {
Node { data, first_child: None, next_sibling: None }
}
}
struct NodeIter<'a, T> {
nodes: &'a Arena<Node<T>>,
next: Option<Idx<T>>,
}
impl<'a, T> Iterator for NodeIter<'a, T> {
type Item = Idx<T>;
fn next(&mut self) -> Option<Idx<T>> {
self.next.map(|next| {
self.next = self.nodes[next].next_sibling;
next
})
}
}