diff --git a/crates/ide-db/src/search.rs b/crates/ide-db/src/search.rs index 197c327ee4..494ff82490 100644 --- a/crates/ide-db/src/search.rs +++ b/crates/ide-db/src/search.rs @@ -4,19 +4,24 @@ //! get a super-set of matches. Then, we confirm each match using precise //! name resolution. -use std::cell::LazyCell; use std::mem; +use std::{cell::LazyCell, cmp::Reverse}; use base_db::{salsa::Database, SourceDatabase, SourceRootDatabase}; use hir::{ - sym, AsAssocItem, DefWithBody, FileRange, HasAttrs, HasSource, HirFileIdExt, InFile, - InRealFile, ModuleSource, PathResolution, Semantics, Visibility, + sym, Adt, AsAssocItem, DefWithBody, FileRange, FileRangeWrapper, HasAttrs, HasContainer, + HasSource, HirFileIdExt, InFile, InFileWrapper, InRealFile, ItemContainer, ModuleSource, + PathResolution, Semantics, Visibility, }; use memchr::memmem::Finder; use parser::SyntaxKind; -use rustc_hash::FxHashMap; +use rustc_hash::{FxHashMap, FxHashSet}; use span::EditionedFileId; -use syntax::{ast, match_ast, AstNode, AstToken, SyntaxElement, TextRange, TextSize, ToSmolStr}; +use syntax::{ + ast::{self, HasName}, + match_ast, AstNode, AstToken, SmolStr, SyntaxElement, SyntaxNode, TextRange, TextSize, + ToSmolStr, +}; use triomphe::Arc; use crate::{ @@ -442,6 +447,398 @@ impl<'a> FindUsages<'a> { res } + fn scope_files<'b>( + db: &'b RootDatabase, + scope: &'b SearchScope, + ) -> impl Iterator, EditionedFileId, TextRange)> + 'b { + scope.entries.iter().map(|(&file_id, &search_range)| { + let text = db.file_text(file_id.file_id()); + let search_range = + search_range.unwrap_or_else(|| TextRange::up_to(TextSize::of(&*text))); + + (text, file_id, search_range) + }) + } + + fn match_indices<'b>( + text: &'b str, + finder: &'b Finder<'b>, + search_range: TextRange, + ) -> impl Iterator + 'b { + finder.find_iter(text.as_bytes()).filter_map(move |idx| { + let offset: TextSize = idx.try_into().unwrap(); + if !search_range.contains_inclusive(offset) { + return None; + } + // If this is not a word boundary, that means this is only part of an identifier, + // so it can't be what we're looking for. + // This speeds up short identifiers significantly. + if text[..idx] + .chars() + .next_back() + .is_some_and(|ch| matches!(ch, 'A'..='Z' | 'a'..='z' | '_')) + || text[idx + finder.needle().len()..] + .chars() + .next() + .is_some_and(|ch| matches!(ch, 'A'..='Z' | 'a'..='z' | '_' | '0'..='9')) + { + return None; + } + Some(offset) + }) + } + + fn find_nodes<'b>( + sema: &'b Semantics<'_, RootDatabase>, + name: &str, + node: &syntax::SyntaxNode, + offset: TextSize, + ) -> impl Iterator + 'b { + node.token_at_offset(offset) + .find(|it| { + // `name` is stripped of raw ident prefix. See the comment on name retrieval below. + it.text().trim_start_matches("r#") == name + }) + .into_iter() + .flat_map(move |token| { + sema.descend_into_macros_exact_if_in_macro(token) + .into_iter() + .filter_map(|it| it.parent()) + }) + } + + /// Performs a special fast search for associated functions. This is mainly intended + /// to speed up `new()` which can take a long time. + /// + /// The trick is instead of searching for `func_name` search for `TypeThatContainsContainerName::func_name`. + /// We cannot search exactly that (not even in tokens), because `ContainerName` may be aliased. + /// Instead, we perform a textual search for `ContainerName`. Then, we look for all cases where + /// `ContainerName` may be aliased (that includes `use ContainerName as Xyz` and + /// `type Xyz = ContainerName`). We collect a list of all possible aliases of `ContainerName`. + /// The list can have false positives (because there may be multiple types named `ContainerName`), + /// but it cannot have false negatives. Then, we look for `TypeThatContainsContainerNameOrAnyAlias::func_name`. + /// Those that will be found are of high chance to be actual hits (of course, we will need to verify + /// that). + /// + /// Returns true if completed the search. + // FIXME: Extend this to other cases, such as associated types/consts/enum variants (note those can be `use`d). + fn short_associated_function_fast_search( + &self, + sink: &mut dyn FnMut(EditionedFileId, FileReference) -> bool, + search_scope: &SearchScope, + name: &str, + ) -> bool { + let _p = tracing::info_span!("short_associated_function_fast_search").entered(); + + let container = (|| { + let Definition::Function(function) = self.def else { + return None; + }; + if function.has_self_param(self.sema.db) { + return None; + } + match function.container(self.sema.db) { + // Only freestanding `impl`s qualify; methods from trait + // can be called from within subtraits and bounds. + ItemContainer::Impl(impl_) => { + let has_trait = impl_.trait_(self.sema.db).is_some(); + if has_trait { + return None; + } + let adt = impl_.self_ty(self.sema.db).as_adt()?; + Some(adt) + } + _ => None, + } + })(); + let Some(container) = container else { + return false; + }; + + fn has_any_name(node: &SyntaxNode, mut predicate: impl FnMut(&str) -> bool) -> bool { + node.descendants().any(|node| { + match_ast! { + match node { + ast::Name(it) => predicate(it.text().trim_start_matches("r#")), + ast::NameRef(it) => predicate(it.text().trim_start_matches("r#")), + _ => false + } + } + }) + } + + // This is a fixpoint algorithm with O(number of aliases), but most types have no or few aliases, + // so this should stay fast. + // + /// Returns `(aliases, ranges_where_Self_can_refer_to_our_type)`. + fn collect_possible_aliases( + sema: &Semantics<'_, RootDatabase>, + container: Adt, + ) -> Option<(FxHashSet, Vec>)> { + fn insert_type_alias( + db: &RootDatabase, + to_process: &mut Vec<(SmolStr, SearchScope)>, + alias_name: &str, + def: Definition, + ) { + let alias = alias_name.trim_start_matches("r#").to_smolstr(); + tracing::debug!("found alias: {alias}"); + to_process.push((alias, def.search_scope(db))); + } + + let _p = tracing::info_span!("collect_possible_aliases").entered(); + + let db = sema.db; + let container_name = container.name(db).unescaped().display(db).to_smolstr(); + let search_scope = Definition::from(container).search_scope(db); + let mut seen = FxHashSet::default(); + let mut completed = FxHashSet::default(); + let mut to_process = vec![(container_name, search_scope)]; + let mut is_possibly_self = Vec::new(); + let mut total_files_searched = 0; + + while let Some((current_to_process, current_to_process_search_scope)) = to_process.pop() + { + let is_alias = |alias: &ast::TypeAlias| { + let def = sema.to_def(alias)?; + let ty = def.ty(db); + let is_alias = ty.as_adt()? == container; + is_alias.then_some(def) + }; + + let finder = Finder::new(current_to_process.as_bytes()); + for (file_text, file_id, search_range) in + FindUsages::scope_files(db, ¤t_to_process_search_scope) + { + let tree = LazyCell::new(move || sema.parse(file_id).syntax().clone()); + + for offset in FindUsages::match_indices(&file_text, &finder, search_range) { + let usages = + FindUsages::find_nodes(sema, ¤t_to_process, &tree, offset) + .filter(|it| { + matches!(it.kind(), SyntaxKind::NAME | SyntaxKind::NAME_REF) + }); + for usage in usages { + if let Some(alias) = usage.parent().and_then(|it| { + let path = ast::PathSegment::cast(it)?.parent_path(); + let use_tree = ast::UseTree::cast(path.syntax().parent()?)?; + use_tree.rename()?.name() + }) { + if seen.insert(InFileWrapper::new( + file_id, + alias.syntax().text_range(), + )) { + tracing::debug!("found alias: {alias}"); + // FIXME: `use`s have no easy way to determine their search scope, but they are rare. + to_process.push(( + alias.text().to_smolstr(), + current_to_process_search_scope.clone(), + )); + } + } else if let Some(alias) = + usage.ancestors().find_map(ast::TypeAlias::cast) + { + if let Some(name) = alias.name() { + if seen.insert(InFileWrapper::new( + file_id, + name.syntax().text_range(), + )) { + if let Some(def) = is_alias(&alias) { + insert_type_alias( + sema.db, + &mut to_process, + name.text().as_str(), + def.into(), + ); + } + } + } + } + + // We need to account for `Self`. It can only refer to our type inside an impl. + let impl_ = 'impl_: { + for ancestor in usage.ancestors() { + if let Some(parent) = ancestor.parent() { + if let Some(parent) = ast::Impl::cast(parent) { + // Only if the GENERIC_PARAM_LIST is directly under impl, otherwise it may be in the self ty. + if matches!( + ancestor.kind(), + SyntaxKind::ASSOC_ITEM_LIST + | SyntaxKind::WHERE_CLAUSE + | SyntaxKind::GENERIC_PARAM_LIST + ) { + break; + } + if parent + .trait_() + .is_some_and(|trait_| *trait_.syntax() == ancestor) + { + break; + } + + // Otherwise, found an impl where its self ty may be our type. + break 'impl_ Some(parent); + } + } + } + None + }; + (|| { + let impl_ = impl_?; + is_possibly_self.push(sema.original_range(impl_.syntax())); + let assoc_items = impl_.assoc_item_list()?; + let type_aliases = assoc_items + .syntax() + .descendants() + .filter_map(ast::TypeAlias::cast); + for type_alias in type_aliases { + let Some(ty) = type_alias.ty() else { continue }; + let Some(name) = type_alias.name() else { continue }; + let contains_self = ty + .syntax() + .descendants_with_tokens() + .any(|node| node.kind() == SyntaxKind::SELF_TYPE_KW); + if !contains_self { + continue; + } + if seen.insert(InFileWrapper::new( + file_id, + name.syntax().text_range(), + )) { + if let Some(def) = is_alias(&type_alias) { + insert_type_alias( + sema.db, + &mut to_process, + name.text().as_str(), + def.into(), + ); + } + } + } + Some(()) + })(); + } + } + } + + completed.insert(current_to_process); + + total_files_searched += current_to_process_search_scope.entries.len(); + // FIXME: Maybe this needs to be relative to the project size, or at least to the initial search scope? + if total_files_searched > 20_000 && completed.len() > 100 { + // This case is extremely unlikely (even searching for `Vec::new()` on rust-analyzer does not enter + // here - it searches less than 10,000 files, and it does so in five seconds), but if we get here, + // we at a risk of entering an almost-infinite loop of growing the aliases list. So just stop and + // let normal search handle this case. + tracing::info!(aliases_count = %completed.len(), "too much aliases; leaving fast path"); + return None; + } + } + + // Impls can contain each other, so we need to deduplicate their ranges. + is_possibly_self.sort_unstable_by_key(|position| { + (position.file_id, position.range.start(), Reverse(position.range.end())) + }); + is_possibly_self.dedup_by(|pos2, pos1| { + pos1.file_id == pos2.file_id + && pos1.range.start() <= pos2.range.start() + && pos1.range.end() >= pos2.range.end() + }); + + tracing::info!(aliases_count = %completed.len(), "aliases search completed"); + + Some((completed, is_possibly_self)) + } + + fn search( + this: &FindUsages<'_>, + finder: &Finder<'_>, + name: &str, + files: impl Iterator, EditionedFileId, TextRange)>, + mut container_predicate: impl FnMut( + &SyntaxNode, + InFileWrapper, + ) -> bool, + sink: &mut dyn FnMut(EditionedFileId, FileReference) -> bool, + ) { + for (file_text, file_id, search_range) in files { + let tree = LazyCell::new(move || this.sema.parse(file_id).syntax().clone()); + + for offset in FindUsages::match_indices(&file_text, finder, search_range) { + let usages = FindUsages::find_nodes(this.sema, name, &tree, offset) + .filter_map(ast::NameRef::cast); + for usage in usages { + let found_usage = usage + .syntax() + .parent() + .and_then(ast::PathSegment::cast) + .map(|path_segment| { + container_predicate( + path_segment.parent_path().syntax(), + InFileWrapper::new(file_id, usage.syntax().text_range()), + ) + }) + .unwrap_or(false); + if found_usage { + this.found_name_ref(&usage, sink); + } + } + } + } + } + + let Some((container_possible_aliases, is_possibly_self)) = + collect_possible_aliases(self.sema, container) + else { + return false; + }; + + // FIXME: If Rust ever gains the ability to `use Struct::method` we'll also need to account for free + // functions. + let finder = Finder::new(name.as_bytes()); + // The search for `Self` may return duplicate results with `ContainerName`, so deduplicate them. + let mut self_positions = FxHashSet::default(); + tracing::info_span!("Self_search").in_scope(|| { + search( + self, + &finder, + name, + is_possibly_self.into_iter().map(|position| { + ( + self.sema.db.file_text(position.file_id.file_id()), + position.file_id, + position.range, + ) + }), + |path, name_position| { + let has_self = path + .descendants_with_tokens() + .any(|node| node.kind() == SyntaxKind::SELF_TYPE_KW); + if has_self { + self_positions.insert(name_position); + } + has_self + }, + sink, + ) + }); + tracing::info_span!("aliases_search").in_scope(|| { + search( + self, + &finder, + name, + FindUsages::scope_files(self.sema.db, search_scope), + |path, name_position| { + has_any_name(path, |name| container_possible_aliases.contains(name)) + && !self_positions.contains(&name_position) + }, + sink, + ) + }); + + true + } + pub fn search(&self, sink: &mut dyn FnMut(EditionedFileId, FileReference) -> bool) { let _p = tracing::info_span!("FindUsages:search").entered(); let sema = self.sema; @@ -488,77 +885,23 @@ impl<'a> FindUsages<'a> { Some(s) => s.as_str(), None => return, }; + + // FIXME: This should probably depend on the number of the results (specifically, the number of false results). + if name.len() <= 7 && self.short_associated_function_fast_search(sink, &search_scope, name) + { + return; + } + let finder = &Finder::new(name); let include_self_kw_refs = self.include_self_kw_refs.as_ref().map(|ty| (ty, Finder::new("Self"))); - // for<'a> |text: &'a str, name: &'a str, search_range: TextRange| -> impl Iterator + 'a { ... } - fn match_indices<'a>( - text: &'a str, - finder: &'a Finder<'a>, - search_range: TextRange, - ) -> impl Iterator + 'a { - finder.find_iter(text.as_bytes()).filter_map(move |idx| { - let offset: TextSize = idx.try_into().unwrap(); - if !search_range.contains_inclusive(offset) { - return None; - } - // If this is not a word boundary, that means this is only part of an identifier, - // so it can't be what we're looking for. - // This speeds up short identifiers significantly. - if text[..idx] - .chars() - .next_back() - .is_some_and(|ch| matches!(ch, 'A'..='Z' | 'a'..='z' | '_')) - || text[idx + finder.needle().len()..] - .chars() - .next() - .is_some_and(|ch| matches!(ch, 'A'..='Z' | 'a'..='z' | '_' | '0'..='9')) - { - return None; - } - Some(offset) - }) - } - - // for<'a> |scope: &'a SearchScope| -> impl Iterator, EditionedFileId, TextRange)> + 'a { ... } - fn scope_files<'a>( - sema: &'a Semantics<'_, RootDatabase>, - scope: &'a SearchScope, - ) -> impl Iterator, EditionedFileId, TextRange)> + 'a { - scope.entries.iter().map(|(&file_id, &search_range)| { - let text = sema.db.file_text(file_id.file_id()); - let search_range = - search_range.unwrap_or_else(|| TextRange::up_to(TextSize::of(&*text))); - - (text, file_id, search_range) - }) - } - - let find_nodes = move |name: &str, node: &syntax::SyntaxNode, offset: TextSize| { - node.token_at_offset(offset) - .find(|it| { - // `name` is stripped of raw ident prefix. See the comment on name retrieval above. - it.text().trim_start_matches("r#") == name - }) - .into_iter() - .flat_map(move |token| { - // FIXME: There should be optimization potential here - // Currently we try to descend everything we find which - // means we call `Semantics::descend_into_macros` on - // every textual hit. That function is notoriously - // expensive even for things that do not get down mapped - // into macros. - sema.descend_into_macros_exact(token).into_iter().filter_map(|it| it.parent()) - }) - }; - - for (text, file_id, search_range) in scope_files(sema, &search_scope) { + for (text, file_id, search_range) in Self::scope_files(sema.db, &search_scope) { self.sema.db.unwind_if_cancelled(); let tree = LazyCell::new(move || sema.parse(file_id).syntax().clone()); // Search for occurrences of the items name - for offset in match_indices(&text, finder, search_range) { + for offset in Self::match_indices(&text, finder, search_range) { tree.token_at_offset(offset).for_each(|token| { let Some(str_token) = ast::String::cast(token.clone()) else { return }; if let Some((range, nameres)) = @@ -568,7 +911,9 @@ impl<'a> FindUsages<'a> { } }); - for name in find_nodes(name, &tree, offset).filter_map(ast::NameLike::cast) { + for name in + Self::find_nodes(sema, name, &tree, offset).filter_map(ast::NameLike::cast) + { if match name { ast::NameLike::NameRef(name_ref) => self.found_name_ref(&name_ref, sink), ast::NameLike::Name(name) => self.found_name(&name, sink), @@ -580,8 +925,9 @@ impl<'a> FindUsages<'a> { } // Search for occurrences of the `Self` referring to our type if let Some((self_ty, finder)) = &include_self_kw_refs { - for offset in match_indices(&text, finder, search_range) { - for name_ref in find_nodes("Self", &tree, offset).filter_map(ast::NameRef::cast) + for offset in Self::match_indices(&text, finder, search_range) { + for name_ref in + Self::find_nodes(sema, "Self", &tree, offset).filter_map(ast::NameRef::cast) { if self.found_self_ty_name_ref(self_ty, &name_ref, sink) { return; @@ -599,13 +945,13 @@ impl<'a> FindUsages<'a> { let is_crate_root = module.is_crate_root().then(|| Finder::new("crate")); let finder = &Finder::new("super"); - for (text, file_id, search_range) in scope_files(sema, &scope) { + for (text, file_id, search_range) in Self::scope_files(sema.db, &scope) { self.sema.db.unwind_if_cancelled(); let tree = LazyCell::new(move || sema.parse(file_id).syntax().clone()); - for offset in match_indices(&text, finder, search_range) { - for name_ref in - find_nodes("super", &tree, offset).filter_map(ast::NameRef::cast) + for offset in Self::match_indices(&text, finder, search_range) { + for name_ref in Self::find_nodes(sema, "super", &tree, offset) + .filter_map(ast::NameRef::cast) { if self.found_name_ref(&name_ref, sink) { return; @@ -613,9 +959,9 @@ impl<'a> FindUsages<'a> { } } if let Some(finder) = &is_crate_root { - for offset in match_indices(&text, finder, search_range) { - for name_ref in - find_nodes("crate", &tree, offset).filter_map(ast::NameRef::cast) + for offset in Self::match_indices(&text, finder, search_range) { + for name_ref in Self::find_nodes(sema, "crate", &tree, offset) + .filter_map(ast::NameRef::cast) { if self.found_name_ref(&name_ref, sink) { return; @@ -656,8 +1002,9 @@ impl<'a> FindUsages<'a> { let tree = LazyCell::new(|| sema.parse(file_id).syntax().clone()); let finder = &Finder::new("self"); - for offset in match_indices(&text, finder, search_range) { - for name_ref in find_nodes("self", &tree, offset).filter_map(ast::NameRef::cast) + for offset in Self::match_indices(&text, finder, search_range) { + for name_ref in + Self::find_nodes(sema, "self", &tree, offset).filter_map(ast::NameRef::cast) { if self.found_self_module_name_ref(&name_ref, sink) { return; diff --git a/crates/ide/src/references.rs b/crates/ide/src/references.rs index 55afcb59ba..43c04b20dd 100644 --- a/crates/ide/src/references.rs +++ b/crates/ide/src/references.rs @@ -2510,4 +2510,234 @@ fn main() { "#]], ) } + + // The following are tests for short_associated_function_fast_search() in crates/ide-db/src/search.rs, because find all references + // use `FindUsages` and I found it easy to test it here. + + #[test] + fn goto_ref_on_short_associated_function() { + check( + r#" +struct Foo; +impl Foo { + fn new$0() {} +} + +fn bar() { + Foo::new(); +} +fn baz() { + Foo::new; +} + "#, + expect![[r#" + new Function FileId(0) 27..38 30..33 + + FileId(0) 62..65 + FileId(0) 91..94 + "#]], + ); + } + + #[test] + fn goto_ref_on_short_associated_function_with_aliases() { + check( + r#" +//- /lib.rs +mod a; +mod b; + +struct Foo; +impl Foo { + fn new$0() {} +} + +fn bar() { + b::c::Baz::new(); +} + +//- /a.rs +use crate::Foo as Bar; + +fn baz() { Bar::new(); } +fn quux() { ::Assoc::new(); } + +//- /b.rs +pub(crate) mod c; + +pub(crate) struct Other; +pub(crate) trait Trait { + type Assoc; +} +impl Trait for Other { + type Assoc = super::Foo; +} + +//- /b/c.rs +type Itself = T; +pub(in super::super) type Baz = Itself; + "#, + expect![[r#" + new Function FileId(0) 42..53 45..48 + + FileId(0) 83..86 + FileId(1) 40..43 + FileId(1) 106..109 + "#]], + ); + } + + #[test] + fn goto_ref_on_short_associated_function_self_works() { + check( + r#" +//- /lib.rs +mod module; + +struct Foo; +impl Foo { + fn new$0() {} + fn bar() { Self::new(); } +} +trait Trait { + type Assoc; + fn baz(); +} +impl Trait for Foo { + type Assoc = Self; + fn baz() { Self::new(); } +} + +//- /module.rs +impl super::Foo { + fn quux() { Self::new(); } +} +fn foo() { ::Assoc::new(); } + "#, + expect![[r#" + new Function FileId(0) 40..51 43..46 + + FileId(0) 73..76 + FileId(0) 195..198 + FileId(1) 40..43 + FileId(1) 99..102 + "#]], + ); + } + + #[test] + fn goto_ref_on_short_associated_function_overlapping_self_ranges() { + check( + r#" +struct Foo; +impl Foo { + fn new$0() {} + fn bar() { + Self::new(); + impl Foo { + fn baz() { Self::new(); } + } + } +} + "#, + expect![[r#" + new Function FileId(0) 27..38 30..33 + + FileId(0) 68..71 + FileId(0) 123..126 + "#]], + ); + } + + #[test] + fn goto_ref_on_short_associated_function_no_direct_self_but_path_contains_self() { + check( + r#" +struct Foo; +impl Foo { + fn new$0() {} +} +trait Trait { + type Assoc; +} +impl Trait for (A, B) { + type Assoc = B; +} +impl Foo { + fn bar() { + <((), Foo) as Trait>::Assoc::new(); + <((), Self) as Trait>::Assoc::new(); + } +} + "#, + expect![[r#" + new Function FileId(0) 27..38 30..33 + + FileId(0) 188..191 + FileId(0) 233..236 + "#]], + ); + } + + // Checks that we can circumvent our fast path logic using complicated type level functions. + // This mainly exists as a documentation. I don't believe it is fixable. + // Usages search is not 100% accurate anyway; we miss macros. + #[test] + fn goto_ref_on_short_associated_function_complicated_type_magic_can_confuse_our_logic() { + check( + r#" +struct Foo; +impl Foo { + fn new$0() {} +} + +struct ChoiceA; +struct ChoiceB; +trait Choice { + type Choose; +} +impl Choice for ChoiceA { + type Choose = A; +} +impl Choice for ChoiceB { + type Choose = B; +} +type Choose = ::Choose; + +fn bar() { + Choose::<(), ChoiceB>::new(); +} + "#, + expect![[r#" + new Function FileId(0) 27..38 30..33 + + (no references) + "#]], + ); + } + + #[test] + fn goto_ref_on_short_associated_function_same_path_mention_alias_and_self() { + check( + r#" +struct Foo; +impl Foo { + fn new$0() {} +} + +type IgnoreFirst = B; + +impl Foo { + fn bar() { + >::new(); + } +} + "#, + expect![[r#" + new Function FileId(0) 27..38 30..33 + + FileId(0) 131..134 + "#]], + ); + } }