Auto merge of #17372 - Veykril:parallel-diagnostics, r=Veykril

feat: Compute native diagnostics in parallel
2024-12-27 21:43:37 +00:00 · 2024-06-10 07:34:36 +00:00 · 2024-06-10 07:34:36 +00:00 · 72dfbe95de
commit 72dfbe95de
parent b427d460eb 2ced362e57
17 changed files with 149 additions and 61 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -162,7 +162,11 @@ xshell = "0.2.5"
 dashmap = { version = "=5.5.3", features = ["raw-api"] }

 [workspace.lints.rust]
-rust_2018_idioms = "warn"
+bare_trait_objects = "warn"
+elided_lifetimes_in_paths = "warn"
+ellipsis_inclusive_range_patterns = "warn"
+explicit_outlives_requirements = "warn"
+unused_extern_crates = "warn"
 unused_lifetimes = "warn"
 unreachable_pub = "warn"
 semicolon_in_expressions_from_macros = "warn"
--- a/crates/hir-ty/src/builder.rs
+++ b/crates/hir-ty/src/builder.rs
@ -246,6 +246,7 @@ impl TyBuilder<()> {
    /// - yield type of coroutine ([`Coroutine::Yield`](std::ops::Coroutine::Yield))
    /// - return type of coroutine ([`Coroutine::Return`](std::ops::Coroutine::Return))
    /// - generic parameters in scope on `parent`
+    ///
    /// in this order.
    ///
    /// This method prepopulates the builder with placeholder substitution of `parent`, so you
--- a/crates/hir-ty/src/mir.rs
+++ b/crates/hir-ty/src/mir.rs
@ -898,20 +898,19 @@ pub enum Rvalue {
    Cast(CastKind, Operand, Ty),

    // FIXME link to `pointer::offset` when it hits stable.
-    /// * `Offset` has the same semantics as `pointer::offset`, except that the second
-    ///   parameter may be a `usize` as well.
-    /// * The comparison operations accept `bool`s, `char`s, signed or unsigned integers, floats,
-    ///   raw pointers, or function pointers and return a `bool`. The types of the operands must be
-    ///   matching, up to the usual caveat of the lifetimes in function pointers.
-    /// * Left and right shift operations accept signed or unsigned integers not necessarily of the
-    ///   same type and return a value of the same type as their LHS. Like in Rust, the RHS is
-    ///   truncated as needed.
-    /// * The `Bit*` operations accept signed integers, unsigned integers, or bools with matching
-    ///   types and return a value of that type.
-    /// * The remaining operations accept signed integers, unsigned integers, or floats with
-    ///   matching types and return a value of that type.
+    // /// * `Offset` has the same semantics as `pointer::offset`, except that the second
+    // ///   parameter may be a `usize` as well.
+    // /// * The comparison operations accept `bool`s, `char`s, signed or unsigned integers, floats,
+    // ///   raw pointers, or function pointers and return a `bool`. The types of the operands must be
+    // ///   matching, up to the usual caveat of the lifetimes in function pointers.
+    // /// * Left and right shift operations accept signed or unsigned integers not necessarily of the
+    // ///   same type and return a value of the same type as their LHS. Like in Rust, the RHS is
+    // ///   truncated as needed.
+    // /// * The `Bit*` operations accept signed integers, unsigned integers, or bools with matching
+    // ///   types and return a value of that type.
+    // /// * The remaining operations accept signed integers, unsigned integers, or floats with
+    // ///   matching types and return a value of that type.
    //BinaryOp(BinOp, Box<(Operand, Operand)>),
-
    /// Same as `BinaryOp`, but yields `(T, bool)` with a `bool` indicating an error condition.
    ///
    /// When overflow checking is disabled and we are generating run-time code, the error condition
--- a/crates/hir/src/term_search/tactics.rs
+++ b/crates/hir/src/term_search/tactics.rs
@ -5,6 +5,7 @@
 //! * `defs` - Set of items in scope at term search target location
 //! * `lookup` - Lookup table for types
 //! * `should_continue` - Function that indicates when to stop iterating
+//!
 //! And they return iterator that yields type trees that unify with the `goal` type.

 use std::iter;
--- a/crates/ide-assists/src/handlers/generate_function.rs
+++ b/crates/ide-assists/src/handlers/generate_function.rs
@ -393,9 +393,9 @@ impl FunctionBuilder {
 /// The rule for whether we focus a return type or not (and thus focus the function body),
 /// is rather simple:
 /// * If we could *not* infer what the return type should be, focus it (so the user can fill-in
-/// the correct return type).
+///   the correct return type).
 /// * If we could infer the return type, don't focus it (and thus focus the function body) so the
-/// user can change the `todo!` function body.
+///   user can change the `todo!` function body.
 fn make_return_type(
    ctx: &AssistContext<'_>,
    expr: &ast::Expr,
@ -918,9 +918,9 @@ fn filter_generic_params(ctx: &AssistContext<'_>, node: SyntaxNode) -> Option<hi
 /// Say we have a trait bound `Struct<T>: Trait<U>`. Given `necessary_params`, when is it relevant
 /// and when not? Some observations:
 /// - When `necessary_params` contains `T`, it's likely that we want this bound, but now we have
-/// an extra param to consider: `U`.
+///   an extra param to consider: `U`.
 /// - On the other hand, when `necessary_params` contains `U` (but not `T`), then it's unlikely
-/// that we want this bound because it doesn't really constrain `U`.
+///   that we want this bound because it doesn't really constrain `U`.
 ///
 /// (FIXME?: The latter clause might be overstating. We may want to include the bound if the self
 /// type does *not* include generic params at all - like `Option<i32>: From<U>`)
@ -928,7 +928,7 @@ fn filter_generic_params(ctx: &AssistContext<'_>, node: SyntaxNode) -> Option<hi
 /// Can we make this a bit more formal? Let's define "dependency" between generic parameters and
 /// trait bounds:
 /// - A generic parameter `T` depends on a trait bound if `T` appears in the self type (i.e. left
-/// part) of the bound.
+///   part) of the bound.
 /// - A trait bound depends on a generic parameter `T` if `T` appears in the bound.
 ///
 /// Using the notion, what we want is all the bounds that params in `necessary_params`
--- a/crates/ide-assists/src/handlers/inline_call.rs
+++ b/crates/ide-assists/src/handlers/inline_call.rs
@ -368,7 +368,7 @@ fn inline(
                    _ => None,
                })
                .for_each(|usage| {
-                    ted::replace(usage, &this());
+                    ted::replace(usage, this());
                });
        }
    }
@ -483,7 +483,7 @@ fn inline(
                cov_mark::hit!(inline_call_inline_direct_field);
                field.replace_expr(replacement.clone_for_update());
            } else {
-                ted::replace(usage.syntax(), &replacement.syntax().clone_for_update());
+                ted::replace(usage.syntax(), replacement.syntax().clone_for_update());
            }
        };

--- a/crates/ide-completion/src/context.rs
+++ b/crates/ide-completion/src/context.rs
@ -452,6 +452,7 @@ pub(crate) struct CompletionContext<'a> {
    /// - crate-root
    ///  - mod foo
    ///   - mod bar
+    ///
    /// Here depth will be 2
    pub(crate) depth_from_crate_root: usize,
 }
--- a/crates/load-cargo/src/lib.rs
+++ b/crates/load-cargo/src/lib.rs
@ -15,7 +15,7 @@ use ide_db::{
 };
 use itertools::Itertools;
 use proc_macro_api::{MacroDylib, ProcMacroServer};
-use project_model::{CargoConfig, PackageRoot, ProjectManifest, ProjectWorkspace};
+use project_model::{CargoConfig, ManifestPath, PackageRoot, ProjectManifest, ProjectWorkspace};
 use span::Span;
 use tracing::instrument;
 use vfs::{file_set::FileSetConfig, loader::Handle, AbsPath, AbsPathBuf, VfsPath};
@ -238,6 +238,19 @@ impl ProjectFolders {
            fsc.add_file_set(file_set_roots)
        }

+        // register the workspace manifest as well, note that this currently causes duplicates for
+        // non-virtual cargo workspaces! We ought to fix that
+        for manifest in workspaces.iter().filter_map(|ws| ws.manifest().map(ManifestPath::as_ref)) {
+            let file_set_roots: Vec<VfsPath> = vec![VfsPath::from(manifest.to_owned())];
+
+            let entry = vfs::loader::Entry::Files(vec![manifest.to_owned()]);
+
+            res.watch.push(res.load.len());
+            res.load.push(entry);
+            local_filesets.push(fsc.len() as u64);
+            fsc.add_file_set(file_set_roots)
+        }
+
        let fsc = fsc.build();
        res.source_root_config = SourceRootConfig { fsc, local_filesets };

--- a/crates/proc-macro-api/src/version.rs
+++ b/crates/proc-macro-api/src/version.rs
@ -93,6 +93,7 @@ fn read_section<'a>(dylib_binary: &'a [u8], section_name: &str) -> io::Result<&'
 ///   means bytes from here(including this sequence) are compressed in
 ///   snappy compression format. Version info is inside here, so decompress
 ///   this.
+///
 /// The bytes you get after decompressing the snappy format portion has
 /// following layout:
 /// * [b'r',b'u',b's',b't',0,0,0,5] is the first 8 bytes(again)
@ -102,6 +103,7 @@ fn read_section<'a>(dylib_binary: &'a [u8], section_name: &str) -> io::Result<&'
 ///   for the version string's utf8 bytes
 /// * [version string bytes encoded in utf8] <- GET THIS BOI
 /// * [some more bytes that we don't really care but about still there] :-)
+///
 /// Check this issue for more about the bytes layout:
 /// <https://github.com/rust-lang/rust-analyzer/issues/6174>
 pub fn read_version(dylib_path: &AbsPath) -> io::Result<String> {
--- a/crates/project-model/src/project_json.rs
+++ b/crates/project-model/src/project_json.rs
@ -167,6 +167,11 @@ impl ProjectJson {
        &self.project_root
    }

+    /// Returns the path to the project's manifest file, if it exists.
+    pub fn manifest(&self) -> Option<&ManifestPath> {
+        self.manifest.as_ref()
+    }
+
    /// Returns the path to the project's manifest or root folder, if no manifest exists.
    pub fn manifest_or_root(&self) -> &AbsPath {
        self.manifest.as_ref().map_or(&self.project_root, |manifest| manifest.as_ref())
--- a/crates/project-model/src/workspace.rs
+++ b/crates/project-model/src/workspace.rs
@ -527,6 +527,16 @@ impl ProjectWorkspace {
        }
    }

+    pub fn manifest(&self) -> Option<&ManifestPath> {
+        match &self.kind {
+            ProjectWorkspaceKind::Cargo { cargo, .. } => Some(cargo.manifest_path()),
+            ProjectWorkspaceKind::Json(project) => project.manifest(),
+            ProjectWorkspaceKind::DetachedFile { cargo, .. } => {
+                Some(cargo.as_ref()?.0.manifest_path())
+            }
+        }
+    }
+
    pub fn find_sysroot_proc_macro_srv(&self) -> anyhow::Result<AbsPathBuf> {
        self.sysroot.discover_proc_macro_srv()
    }
--- a/crates/rust-analyzer/src/diagnostics.rs
+++ b/crates/rust-analyzer/src/diagnostics.rs
@ -8,6 +8,7 @@ use ide_db::FxHashMap;
 use itertools::Itertools;
 use nohash_hasher::{IntMap, IntSet};
 use rustc_hash::FxHashSet;
+use stdx::iter_eq_by;
 use triomphe::Arc;

 use crate::{global_state::GlobalStateSnapshot, lsp, lsp_ext};
@ -22,14 +23,21 @@ pub struct DiagnosticsMapConfig {
    pub check_ignore: FxHashSet<String>,
 }

+pub(crate) type DiagnosticsGeneration = usize;
+
 #[derive(Debug, Default, Clone)]
 pub(crate) struct DiagnosticCollection {
    // FIXME: should be IntMap<FileId, Vec<ra_id::Diagnostic>>
-    pub(crate) native: IntMap<FileId, Vec<lsp_types::Diagnostic>>,
+    pub(crate) native: IntMap<FileId, (DiagnosticsGeneration, Vec<lsp_types::Diagnostic>)>,
    // FIXME: should be Vec<flycheck::Diagnostic>
    pub(crate) check: IntMap<usize, IntMap<FileId, Vec<lsp_types::Diagnostic>>>,
    pub(crate) check_fixes: CheckFixes,
    changes: IntSet<FileId>,
+    /// Counter for supplying a new generation number for diagnostics.
+    /// This is used to keep track of when to clear the diagnostics for a given file as we compute
+    /// diagnostics on multiple worker threads simultaneously which may result in multiple diagnostics
+    /// updates for the same file in a single generation update (due to macros affecting multiple files).
+    generation: DiagnosticsGeneration,
 }

 #[derive(Debug, Clone)]
@ -82,21 +90,31 @@ impl DiagnosticCollection {

    pub(crate) fn set_native_diagnostics(
        &mut self,
+        generation: DiagnosticsGeneration,
        file_id: FileId,
-        diagnostics: Vec<lsp_types::Diagnostic>,
+        mut diagnostics: Vec<lsp_types::Diagnostic>,
    ) {
-        if let Some(existing_diagnostics) = self.native.get(&file_id) {
+        diagnostics.sort_by_key(|it| (it.range.start, it.range.end));
+        if let Some((old_gen, existing_diagnostics)) = self.native.get_mut(&file_id) {
            if existing_diagnostics.len() == diagnostics.len()
-                && diagnostics
-                    .iter()
-                    .zip(existing_diagnostics)
-                    .all(|(new, existing)| are_diagnostics_equal(new, existing))
+                && iter_eq_by(&diagnostics, &*existing_diagnostics, |new, existing| {
+                    are_diagnostics_equal(new, existing)
+                })
            {
+                // don't signal an update if the diagnostics are the same
                return;
            }
+            if *old_gen < generation || generation == 0 {
+                self.native.insert(file_id, (generation, diagnostics));
+            } else {
+                existing_diagnostics.extend(diagnostics);
+                // FIXME: Doing the merge step of a merge sort here would be a bit more performant
+                // but eh
+                existing_diagnostics.sort_by_key(|it| (it.range.start, it.range.end))
+            }
+        } else {
+            self.native.insert(file_id, (generation, diagnostics));
        }
-
-        self.native.insert(file_id, diagnostics);
        self.changes.insert(file_id);
    }

@ -104,7 +122,7 @@ impl DiagnosticCollection {
        &self,
        file_id: FileId,
    ) -> impl Iterator<Item = &lsp_types::Diagnostic> {
-        let native = self.native.get(&file_id).into_iter().flatten();
+        let native = self.native.get(&file_id).into_iter().flat_map(|(_, d)| d);
        let check = self.check.values().filter_map(move |it| it.get(&file_id)).flatten();
        native.chain(check)
    }
@ -115,6 +133,11 @@ impl DiagnosticCollection {
        }
        Some(mem::take(&mut self.changes))
    }
+
+    pub(crate) fn next_generation(&mut self) -> usize {
+        self.generation += 1;
+        self.generation
+    }
 }

 fn are_diagnostics_equal(left: &lsp_types::Diagnostic, right: &lsp_types::Diagnostic) -> bool {
@ -126,7 +149,8 @@ fn are_diagnostics_equal(left: &lsp_types::Diagnostic, right: &lsp_types::Diagno

 pub(crate) fn fetch_native_diagnostics(
    snapshot: GlobalStateSnapshot,
-    subscriptions: Vec<FileId>,
+    subscriptions: std::sync::Arc<[FileId]>,
+    slice: std::ops::Range<usize>,
 ) -> Vec<(FileId, Vec<lsp_types::Diagnostic>)> {
    let _p = tracing::info_span!("fetch_native_diagnostics").entered();
    let _ctx = stdx::panic_context::enter("fetch_native_diagnostics".to_owned());
@ -149,7 +173,7 @@ pub(crate) fn fetch_native_diagnostics(
    // the diagnostics produced may point to different files not requested by the concrete request,
    // put those into here and filter later
    let mut odd_ones = Vec::new();
-    let mut diagnostics = subscriptions
+    let mut diagnostics = subscriptions[slice]
        .iter()
        .copied()
        .filter_map(|file_id| {
--- a/crates/rust-analyzer/src/global_state.rs
+++ b/crates/rust-analyzer/src/global_state.rs
@ -163,7 +163,9 @@ pub(crate) struct GlobalStateSnapshot {
    pub(crate) semantic_tokens_cache: Arc<Mutex<FxHashMap<Url, SemanticTokens>>>,
    vfs: Arc<RwLock<(vfs::Vfs, IntMap<FileId, LineEndings>)>>,
    pub(crate) workspaces: Arc<Vec<ProjectWorkspace>>,
-    // used to signal semantic highlighting to fall back to syntax based highlighting until proc-macros have been loaded
+    // used to signal semantic highlighting to fall back to syntax based highlighting until
+    // proc-macros have been loaded
+    // FIXME: Can we derive this from somewhere else?
    pub(crate) proc_macros_loaded: bool,
    pub(crate) flycheck: Arc<[FlycheckHandle]>,
 }
--- a/crates/rust-analyzer/src/main_loop.rs
+++ b/crates/rust-analyzer/src/main_loop.rs
@ -3,6 +3,7 @@

 use std::{
    fmt,
+    ops::Div as _,
    time::{Duration, Instant},
 };

@ -17,7 +18,7 @@ use vfs::FileId;

 use crate::{
    config::Config,
-    diagnostics::fetch_native_diagnostics,
+    diagnostics::{fetch_native_diagnostics, DiagnosticsGeneration},
    dispatch::{NotificationDispatcher, RequestDispatcher},
    global_state::{file_id_to_url, url_to_file_id, GlobalState},
    hack_recover_crate_name,
@ -87,7 +88,7 @@ pub(crate) enum Task {
    Response(lsp_server::Response),
    ClientNotification(lsp_ext::UnindexedProjectParams),
    Retry(lsp_server::Request),
-    Diagnostics(Vec<(FileId, Vec<lsp_types::Diagnostic>)>),
+    Diagnostics(DiagnosticsGeneration, Vec<(FileId, Vec<lsp_types::Diagnostic>)>),
    DiscoverTest(lsp_ext::DiscoverTestResults),
    PrimeCaches(PrimeCachesProgress),
    FetchWorkspace(ProjectWorkspaceProgress),
@ -479,7 +480,7 @@ impl GlobalState {

    fn update_diagnostics(&mut self) {
        let db = self.analysis_host.raw_database();
-        // spawn a task per subscription?
+        let generation = self.diagnostics.next_generation();
        let subscriptions = {
            let vfs = &self.vfs.read().0;
            self.mem_docs
@ -494,16 +495,37 @@ impl GlobalState {
                    // forever if we emitted them here.
                    !db.source_root(source_root).is_library
                })
-                .collect::<Vec<_>>()
+                .collect::<std::sync::Arc<_>>()
        };
        tracing::trace!("updating notifications for {:?}", subscriptions);
+        // Split up the work on multiple threads, but we don't wanna fill the entire task pool with
+        // diagnostic tasks, so we limit the number of tasks to a quarter of the total thread pool.
+        let max_tasks = self.config.main_loop_num_threads().div(4).max(1);
+        let chunk_length = subscriptions.len() / max_tasks;
+        let remainder = subscriptions.len() % max_tasks;

-        // Diagnostics are triggered by the user typing
-        // so we run them on a latency sensitive thread.
-        self.task_pool.handle.spawn(ThreadIntent::LatencySensitive, {
-            let snapshot = self.snapshot();
-            move || Task::Diagnostics(fetch_native_diagnostics(snapshot, subscriptions))
-        });
+        let mut start = 0;
+        for task_idx in 0..max_tasks {
+            let extra = if task_idx < remainder { 1 } else { 0 };
+            let end = start + chunk_length + extra;
+            let slice = start..end;
+            if slice.is_empty() {
+                break;
+            }
+            // Diagnostics are triggered by the user typing
+            // so we run them on a latency sensitive thread.
+            self.task_pool.handle.spawn(ThreadIntent::LatencySensitive, {
+                let snapshot = self.snapshot();
+                let subscriptions = subscriptions.clone();
+                move || {
+                    Task::Diagnostics(
+                        generation,
+                        fetch_native_diagnostics(snapshot, subscriptions, slice),
+                    )
+                }
+            });
+            start = end;
+        }
    }

    fn update_tests(&mut self) {
@ -590,9 +612,9 @@ impl GlobalState {
            // Only retry requests that haven't been cancelled. Otherwise we do unnecessary work.
            Task::Retry(req) if !self.is_completed(&req) => self.on_request(req),
            Task::Retry(_) => (),
-            Task::Diagnostics(diagnostics_per_file) => {
+            Task::Diagnostics(generation, diagnostics_per_file) => {
                for (file_id, diagnostics) in diagnostics_per_file {
-                    self.diagnostics.set_native_diagnostics(file_id, diagnostics)
+                    self.diagnostics.set_native_diagnostics(generation, file_id, diagnostics)
                }
            }
            Task::PrimeCaches(progress) => match progress {
--- a/crates/rust-analyzer/src/reload.rs
+++ b/crates/rust-analyzer/src/reload.rs
@ -440,15 +440,19 @@ impl GlobalState {
        }

        if let FilesWatcher::Client = self.config.files().watcher {
-            let filter =
-                self.workspaces.iter().flat_map(|ws| ws.to_roots()).filter(|it| it.is_local);
+            let filter = self
+                .workspaces
+                .iter()
+                .flat_map(|ws| ws.to_roots())
+                .filter(|it| it.is_local)
+                .map(|it| it.include);

            let mut watchers: Vec<FileSystemWatcher> =
                if self.config.did_change_watched_files_relative_pattern_support() {
                    // When relative patterns are supported by the client, prefer using them
                    filter
-                        .flat_map(|root| {
-                            root.include.into_iter().flat_map(|base| {
+                        .flat_map(|include| {
+                            include.into_iter().flat_map(|base| {
                                [
                                    (base.clone(), "**/*.rs"),
                                    (base.clone(), "**/Cargo.{lock,toml}"),
@ -471,8 +475,8 @@ impl GlobalState {
                } else {
                    // When they're not, integrate the base to make them into absolute patterns
                    filter
-                        .flat_map(|root| {
-                            root.include.into_iter().flat_map(|base| {
+                        .flat_map(|include| {
+                            include.into_iter().flat_map(|base| {
                                [
                                    format!("{base}/**/*.rs"),
                                    format!("{base}/**/Cargo.{{toml,lock}}"),
@ -488,13 +492,14 @@ impl GlobalState {
                };

            watchers.extend(
-                iter::once(self.config.user_config_path().to_string())
-                    .chain(iter::once(self.config.root_ratoml_path().to_string()))
+                iter::once(self.config.user_config_path().as_path())
+                    .chain(iter::once(self.config.root_ratoml_path().as_path()))
+                    .chain(self.workspaces.iter().map(|ws| ws.manifest().map(ManifestPath::as_ref)))
+                    .flatten()
                    .map(|glob_pattern| lsp_types::FileSystemWatcher {
-                        glob_pattern: lsp_types::GlobPattern::String(glob_pattern),
+                        glob_pattern: lsp_types::GlobPattern::String(glob_pattern.to_string()),
                        kind: None,
-                    })
-                    .collect::<Vec<FileSystemWatcher>>(),
+                    }),
            );

            let registration_options =
--- a/crates/vfs/src/lib.rs
+++ b/crates/vfs/src/lib.rs
@ -282,7 +282,7 @@ impl Vfs {
    /// Returns the id associated with `path`
    ///
    /// - If `path` does not exists in the `Vfs`, allocate a new id for it, associated with a
-    /// deleted file;
+    ///   deleted file;
    /// - Else, returns `path`'s id.
    ///
    /// Does not record a change.
--- a/crates/vfs/src/vfs_path.rs
+++ b/crates/vfs/src/vfs_path.rs
@ -384,8 +384,7 @@ impl VirtualPath {
    ///
    /// # Returns
    /// - `None` if `self` ends with `"//"`.
-    /// - `Some((name, None))` if `self`'s base contains no `.`, or only one `.` at
-    /// the start.
+    /// - `Some((name, None))` if `self`'s base contains no `.`, or only one `.` at the start.
    /// - `Some((name, Some(extension))` else.
    ///
    /// # Note