From 63f500b0ee55443a52f078060004c911a7532a14 Mon Sep 17 00:00:00 2001 From: David Lattimore Date: Wed, 22 Jul 2020 14:01:21 +1000 Subject: [PATCH] SSR: Use Definition::find_usages to speed up matching. When the search pattern contains a path, this substantially speeds up finding matches, especially if the path references a private item. --- crates/ra_ide_db/src/defs.rs | 37 ++++++---- crates/ra_ide_db/src/search.rs | 4 + crates/ra_ssr/src/lib.rs | 3 +- crates/ra_ssr/src/resolving.rs | 8 +- crates/ra_ssr/src/search.rs | 131 +++++++++++++++++++++++++++++++-- 5 files changed, 158 insertions(+), 25 deletions(-) diff --git a/crates/ra_ide_db/src/defs.rs b/crates/ra_ide_db/src/defs.rs index e06b189a00..f391a8e432 100644 --- a/crates/ra_ide_db/src/defs.rs +++ b/crates/ra_ide_db/src/defs.rs @@ -290,20 +290,25 @@ pub fn classify_name_ref( let path = name_ref.syntax().ancestors().find_map(ast::Path::cast)?; let resolved = sema.resolve_path(&path)?; - let res = match resolved { - PathResolution::Def(def) => Definition::ModuleDef(def), - PathResolution::AssocItem(item) => { - let def = match item { - hir::AssocItem::Function(it) => it.into(), - hir::AssocItem::Const(it) => it.into(), - hir::AssocItem::TypeAlias(it) => it.into(), - }; - Definition::ModuleDef(def) - } - PathResolution::Local(local) => Definition::Local(local), - PathResolution::TypeParam(par) => Definition::TypeParam(par), - PathResolution::Macro(def) => Definition::Macro(def), - PathResolution::SelfType(impl_def) => Definition::SelfType(impl_def), - }; - Some(NameRefClass::Definition(res)) + Some(NameRefClass::Definition(resolved.into())) +} + +impl From for Definition { + fn from(path_resolution: PathResolution) -> Self { + match path_resolution { + PathResolution::Def(def) => Definition::ModuleDef(def), + PathResolution::AssocItem(item) => { + let def = match item { + hir::AssocItem::Function(it) => it.into(), + hir::AssocItem::Const(it) => it.into(), + hir::AssocItem::TypeAlias(it) => it.into(), + }; + Definition::ModuleDef(def) + } + PathResolution::Local(local) => Definition::Local(local), + PathResolution::TypeParam(par) => Definition::TypeParam(par), + PathResolution::Macro(def) => Definition::Macro(def), + PathResolution::SelfType(impl_def) => Definition::SelfType(impl_def), + } + } } diff --git a/crates/ra_ide_db/src/search.rs b/crates/ra_ide_db/src/search.rs index 81553150b5..a7cae37b0c 100644 --- a/crates/ra_ide_db/src/search.rs +++ b/crates/ra_ide_db/src/search.rs @@ -60,6 +60,10 @@ impl SearchScope { SearchScope::new(std::iter::once((file, None)).collect()) } + pub fn files(files: &[FileId]) -> SearchScope { + SearchScope::new(files.iter().map(|f| (*f, None)).collect()) + } + pub fn intersection(&self, other: &SearchScope) -> SearchScope { let (mut small, mut large) = (&self.entries, &other.entries); if small.len() > large.len() { diff --git a/crates/ra_ssr/src/lib.rs b/crates/ra_ssr/src/lib.rs index 286619f59a..5a71d4f31a 100644 --- a/crates/ra_ssr/src/lib.rs +++ b/crates/ra_ssr/src/lib.rs @@ -151,8 +151,9 @@ impl<'db> MatchFinder<'db> { /// Returns matches for all added rules. pub fn matches(&self) -> SsrMatches { let mut matches = Vec::new(); + let mut usage_cache = search::UsageCache::default(); for rule in &self.rules { - self.find_matches_for_rule(rule, &mut matches); + self.find_matches_for_rule(rule, &mut usage_cache, &mut matches); } nester::nest_and_remove_collisions(matches, &self.sema) } diff --git a/crates/ra_ssr/src/resolving.rs b/crates/ra_ssr/src/resolving.rs index e9d0521117..63fd217ce1 100644 --- a/crates/ra_ssr/src/resolving.rs +++ b/crates/ra_ssr/src/resolving.rs @@ -22,6 +22,7 @@ pub(crate) struct ResolvedPattern { pub(crate) struct ResolvedPath { pub(crate) resolution: hir::PathResolution, + pub(crate) depth: u32, } impl ResolvedRule { @@ -62,7 +63,7 @@ struct Resolver<'a, 'db> { impl Resolver<'_, '_> { fn resolve_pattern_tree(&self, pattern: SyntaxNode) -> Result { let mut resolved_paths = FxHashMap::default(); - self.resolve(pattern.clone(), &mut resolved_paths)?; + self.resolve(pattern.clone(), 0, &mut resolved_paths)?; Ok(ResolvedPattern { node: pattern, resolved_paths, @@ -73,6 +74,7 @@ impl Resolver<'_, '_> { fn resolve( &self, node: SyntaxNode, + depth: u32, resolved_paths: &mut FxHashMap, ) -> Result<(), SsrError> { use ra_syntax::ast::AstNode; @@ -86,12 +88,12 @@ impl Resolver<'_, '_> { let resolution = self .resolve_path(&path) .ok_or_else(|| error!("Failed to resolve path `{}`", node.text()))?; - resolved_paths.insert(node, ResolvedPath { resolution }); + resolved_paths.insert(node, ResolvedPath { resolution, depth }); return Ok(()); } } for node in node.children() { - self.resolve(node, resolved_paths)?; + self.resolve(node, depth + 1, resolved_paths)?; } Ok(()) } diff --git a/crates/ra_ssr/src/search.rs b/crates/ra_ssr/src/search.rs index ccc2d544ab..9a4e35e96d 100644 --- a/crates/ra_ssr/src/search.rs +++ b/crates/ra_ssr/src/search.rs @@ -1,17 +1,106 @@ //! Searching for matches. -use crate::{matching, resolving::ResolvedRule, Match, MatchFinder}; +use crate::{ + matching, + resolving::{ResolvedPath, ResolvedPattern, ResolvedRule}, + Match, MatchFinder, +}; use ra_db::FileRange; +use ra_ide_db::{ + defs::Definition, + search::{Reference, SearchScope}, +}; use ra_syntax::{ast, AstNode, SyntaxNode}; +/// A cache for the results of find_usages. This is for when we have multiple patterns that have the +/// same path. e.g. if the pattern was `foo::Bar` that can parse as a path, an expression, a type +/// and as a pattern. In each, the usages of `foo::Bar` are the same and we'd like to avoid finding +/// them more than once. +#[derive(Default)] +pub(crate) struct UsageCache { + usages: Vec<(Definition, Vec)>, +} + impl<'db> MatchFinder<'db> { /// Adds all matches for `rule` to `matches_out`. Matches may overlap in ways that make /// replacement impossible, so further processing is required in order to properly nest matches /// and remove overlapping matches. This is done in the `nesting` module. - pub(crate) fn find_matches_for_rule(&self, rule: &ResolvedRule, matches_out: &mut Vec) { - // FIXME: Use resolved paths in the pattern to find places to search instead of always - // scanning every node. - self.slow_scan(rule, matches_out); + pub(crate) fn find_matches_for_rule( + &self, + rule: &ResolvedRule, + usage_cache: &mut UsageCache, + matches_out: &mut Vec, + ) { + if pick_path_for_usages(&rule.pattern).is_none() { + self.slow_scan(rule, matches_out); + return; + } + self.find_matches_for_pattern_tree(rule, &rule.pattern, usage_cache, matches_out); + } + + fn find_matches_for_pattern_tree( + &self, + rule: &ResolvedRule, + pattern: &ResolvedPattern, + usage_cache: &mut UsageCache, + matches_out: &mut Vec, + ) { + if let Some(first_path) = pick_path_for_usages(pattern) { + let definition: Definition = first_path.resolution.clone().into(); + for reference in self.find_usages(usage_cache, definition) { + let file = self.sema.parse(reference.file_range.file_id); + if let Some(path) = self.sema.find_node_at_offset_with_descend::( + file.syntax(), + reference.file_range.range.start(), + ) { + if let Some(node_to_match) = self + .sema + .ancestors_with_macros(path.syntax().clone()) + .skip(first_path.depth as usize) + .next() + { + if let Ok(m) = + matching::get_match(false, rule, &node_to_match, &None, &self.sema) + { + matches_out.push(m); + } + } + } + } + } + } + + fn find_usages<'a>( + &self, + usage_cache: &'a mut UsageCache, + definition: Definition, + ) -> &'a [Reference] { + // Logically if a lookup succeeds we should just return it. Unfortunately returning it would + // extend the lifetime of the borrow, then we wouldn't be able to do the insertion on a + // cache miss. This is a limitation of NLL and is fixed with Polonius. For now we do two + // lookups in the case of a cache hit. + if usage_cache.find(&definition).is_none() { + let usages = definition.find_usages(&self.sema, Some(self.search_scope())); + usage_cache.usages.push((definition, usages)); + return &usage_cache.usages.last().unwrap().1; + } + usage_cache.find(&definition).unwrap() + } + + /// Returns the scope within which we want to search. We don't want un unrestricted search + /// scope, since we don't want to find references in external dependencies. + fn search_scope(&self) -> SearchScope { + // FIXME: We should ideally have a test that checks that we edit local roots and not library + // roots. This probably would require some changes to fixtures, since currently everything + // seems to get put into a single source root. + use ra_db::SourceDatabaseExt; + use ra_ide_db::symbol_index::SymbolsDatabase; + let mut files = Vec::new(); + for &root in self.sema.db.local_roots().iter() { + let sr = self.sema.db.source_root(root); + files.extend(sr.iter()); + } + SearchScope::files(&files) } fn slow_scan(&self, rule: &ResolvedRule, matches_out: &mut Vec) { @@ -59,3 +148,35 @@ impl<'db> MatchFinder<'db> { } } } + +impl UsageCache { + fn find(&mut self, definition: &Definition) -> Option<&[Reference]> { + // We expect a very small number of cache entries (generally 1), so a linear scan should be + // fast enough and avoids the need to implement Hash for Definition. + for (d, refs) in &self.usages { + if d == definition { + return Some(refs); + } + } + None + } +} + +/// Returns a path that's suitable for path resolution. We exclude builtin types, since they aren't +/// something that we can find references to. We then somewhat arbitrarily pick the path that is the +/// longest as this is hopefully more likely to be less common, making it faster to find. +fn pick_path_for_usages(pattern: &ResolvedPattern) -> Option<&ResolvedPath> { + // FIXME: Take the scope of the resolved path into account. e.g. if there are any paths that are + // private to the current module, then we definitely would want to pick them over say a path + // from std. Possibly we should go further than this and intersect the search scopes for all + // resolved paths then search only in that scope. + pattern + .resolved_paths + .iter() + .filter(|(_, p)| { + !matches!(p.resolution, hir::PathResolution::Def(hir::ModuleDef::BuiltinType(_))) + }) + .map(|(node, resolved)| (node.text().len(), resolved)) + .max_by(|(a, _), (b, _)| a.cmp(b)) + .map(|(_, resolved)| resolved) +}