10877: feat: make hightlighting linear r=matklad a=matklad

In https://youtu.be/qvIZZf5dmTE, we've noticed that AstIdMap does a
linear lookup when going from SyntaxNode to Id. This leads to
accidentally quadratic overall performance. Replace linear lookup with a
O(1) hashmap lookup.

Future work: don't duplicate `SyntaxNodePtr` in `AstIdMap` and switch to
"call site dependency injection" style storage (eg, store a
`HashSet<ErasedFileAstId>`).

See the explanation of the work here on YouTube :-)

As you can see from then benchmark results, this doesn't actually make analysis stats fastre. I am a bit mystified as to why this is happening to be honest. 

Baseline
```
Database loaded:     598.40ms, 304minstr, 118mb (metadata 390.57ms, 21minstr, 841kb; build 111.31ms, 8764kinstr, -214kb)
  crates: 39, mods: 824, decls: 18647, fns: 13910
Item Collection:     9.70s, 75ginstr, 377mb
  exprs: 382426, ??ty: 387 (0%), ?ty: 285 (0%), !ty: 145
Inference:           43.16s, 342ginstr, 641mb
Total:               52.86s, 417ginstr, 1018mb
```

This PR:
```
Database loaded:     626.34ms, 304minstr, 118mb (metadata 416.26ms, 21minstr, 841kb; build 113.67ms, 8750kinstr, -209kb)
  crates: 39, mods: 824, decls: 18647, fns: 13910
Item Collection:     10.16s, 75ginstr, 389mb
  exprs: 382426, ??ty: 387 (0%), ?ty: 285 (0%), !ty: 145
Inference:           44.51s, 342ginstr, 644mb
Total:               54.67s, 417ginstr, 1034mb
```

I think we probably should merge the first commit here, but not the second. 

Co-authored-by: Aleksey Kladov <aleksey.kladov@gmail.com>
This commit is contained in:
bors[bot] 2021-12-11 14:49:29 +00:00 committed by GitHub
commit 9946def7e2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 40 additions and 4 deletions

1
Cargo.lock generated
View file

@ -525,6 +525,7 @@ dependencies = [
"cfg", "cfg",
"cov-mark", "cov-mark",
"either", "either",
"hashbrown",
"itertools", "itertools",
"la-arena", "la-arena",
"limit", "limit",

View file

@ -16,6 +16,7 @@ either = "1.5.3"
rustc-hash = "1.0.0" rustc-hash = "1.0.0"
la-arena = { version = "0.3.0", path = "../../lib/arena" } la-arena = { version = "0.3.0", path = "../../lib/arena" }
itertools = "0.10.0" itertools = "0.10.0"
hashbrown = { version = "0.11", features = ["inline-more"], default-features = false }
base_db = { path = "../base_db", version = "0.0.0" } base_db = { path = "../base_db", version = "0.0.0" }
cfg = { path = "../cfg", version = "0.0.0" } cfg = { path = "../cfg", version = "0.0.0" }

View file

@ -8,12 +8,13 @@
use std::{ use std::{
any::type_name, any::type_name,
fmt, fmt,
hash::{Hash, Hasher}, hash::{BuildHasher, BuildHasherDefault, Hash, Hasher},
marker::PhantomData, marker::PhantomData,
}; };
use la_arena::{Arena, Idx}; use la_arena::{Arena, Idx};
use profile::Count; use profile::Count;
use rustc_hash::FxHasher;
use syntax::{ast, match_ast, AstNode, AstPtr, SyntaxNode, SyntaxNodePtr}; use syntax::{ast, match_ast, AstNode, AstPtr, SyntaxNode, SyntaxNodePtr};
/// `AstId` points to an AST node in a specific file. /// `AstId` points to an AST node in a specific file.
@ -60,12 +61,28 @@ impl<N: AstNode> FileAstId<N> {
type ErasedFileAstId = Idx<SyntaxNodePtr>; type ErasedFileAstId = Idx<SyntaxNodePtr>;
/// Maps items' `SyntaxNode`s to `ErasedFileAstId`s and back. /// Maps items' `SyntaxNode`s to `ErasedFileAstId`s and back.
#[derive(Debug, PartialEq, Eq, Default)] #[derive(Default)]
pub struct AstIdMap { pub struct AstIdMap {
/// Maps stable id to unstable ptr.
arena: Arena<SyntaxNodePtr>, arena: Arena<SyntaxNodePtr>,
/// Reverse: map ptr to id.
map: hashbrown::HashMap<Idx<SyntaxNodePtr>, (), ()>,
_c: Count<Self>, _c: Count<Self>,
} }
impl fmt::Debug for AstIdMap {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("AstIdMap").field("arena", &self.arena).finish()
}
}
impl PartialEq for AstIdMap {
fn eq(&self, other: &Self) -> bool {
self.arena == other.arena
}
}
impl Eq for AstIdMap {}
impl AstIdMap { impl AstIdMap {
pub(crate) fn from_source(node: &SyntaxNode) -> AstIdMap { pub(crate) fn from_source(node: &SyntaxNode) -> AstIdMap {
assert!(node.parent().is_none()); assert!(node.parent().is_none());
@ -89,6 +106,16 @@ impl AstIdMap {
} }
} }
}); });
res.map = hashbrown::HashMap::with_capacity_and_hasher(res.arena.len(), ());
for (idx, ptr) in res.arena.iter() {
let hash = hash_ptr(ptr);
match res.map.raw_entry_mut().from_hash(hash, |idx2| *idx2 == idx) {
hashbrown::hash_map::RawEntryMut::Occupied(_) => unreachable!(),
hashbrown::hash_map::RawEntryMut::Vacant(entry) => {
entry.insert_with_hasher(hash, idx, (), |&idx| hash_ptr(&res.arena[idx]));
}
}
}
res res
} }
@ -98,8 +125,9 @@ impl AstIdMap {
} }
fn erased_ast_id(&self, item: &SyntaxNode) -> ErasedFileAstId { fn erased_ast_id(&self, item: &SyntaxNode) -> ErasedFileAstId {
let ptr = SyntaxNodePtr::new(item); let ptr = SyntaxNodePtr::new(item);
match self.arena.iter().find(|(_id, i)| **i == ptr) { let hash = hash_ptr(&ptr);
Some((it, _)) => it, match self.map.raw_entry().from_hash(hash, |&idx| self.arena[idx] == ptr) {
Some((&idx, &())) => idx,
None => panic!( None => panic!(
"Can't find {:?} in AstIdMap:\n{:?}", "Can't find {:?} in AstIdMap:\n{:?}",
item, item,
@ -117,6 +145,12 @@ impl AstIdMap {
} }
} }
fn hash_ptr(ptr: &SyntaxNodePtr) -> u64 {
let mut hasher = BuildHasherDefault::<FxHasher>::default().build_hasher();
ptr.hash(&mut hasher);
hasher.finish()
}
/// Walks the subtree in bdfs order, calling `f` for each node. What is bdfs /// Walks the subtree in bdfs order, calling `f` for each node. What is bdfs
/// order? It is a mix of breadth-first and depth first orders. Nodes for which /// order? It is a mix of breadth-first and depth first orders. Nodes for which
/// `f` returns true are visited breadth-first, all the other nodes are explored /// `f` returns true are visited breadth-first, all the other nodes are explored