Auto merge of #16307 - Veykril:vfs2, r=lnicola

internal: VFS no longer stores all source files in memory

Turns out there is no need to keep the files around. We either upload them to salsa once processed, or we need to keep them around for the `DidChangeTextDocumentNotification`, but that notification is only valid for opened documents, so instead we can just keep the files around in the `MemDocs`!

Fixes https://github.com/rust-lang/rust-analyzer/issues/16301
This commit is contained in:
bors 2024-01-08 16:56:44 +00:00
commit c1c242180e
9 changed files with 130 additions and 143 deletions

View file

@ -331,9 +331,8 @@ fn load_crate_graph(
}
let changes = vfs.take_changes();
for file in changes {
if file.exists() {
let contents = vfs.file_contents(file.file_id);
if let Ok(text) = std::str::from_utf8(contents) {
if let vfs::Change::Create(v) | vfs::Change::Modify(v) = file.change {
if let Ok(text) = std::str::from_utf8(&v) {
analysis_change.change_file(file.file_id, Some(text.into()))
}
}

View file

@ -3,7 +3,7 @@
//!
//! Each tick provides an immutable snapshot of the state as `WorldSnapshot`.
use std::time::Instant;
use std::{collections::hash_map::Entry, time::Instant};
use crossbeam_channel::{unbounded, Receiver, Sender};
use flycheck::FlycheckHandle;
@ -21,7 +21,7 @@ use proc_macro_api::ProcMacroServer;
use project_model::{CargoWorkspace, ProjectWorkspace, Target, WorkspaceBuildScripts};
use rustc_hash::{FxHashMap, FxHashSet};
use triomphe::Arc;
use vfs::{AnchoredPathBuf, Vfs};
use vfs::{AnchoredPathBuf, ChangedFile, Vfs};
use crate::{
config::{Config, ConfigError},
@ -217,8 +217,8 @@ impl GlobalState {
pub(crate) fn process_changes(&mut self) -> bool {
let _p = profile::span("GlobalState::process_changes");
let mut file_changes = FxHashMap::default();
let (change, changed_files, workspace_structure_change) = {
let mut file_changes = FxHashMap::<_, (bool, ChangedFile)>::default();
let (change, modified_files, workspace_structure_change) = {
let mut change = Change::new();
let mut guard = self.vfs.write();
let changed_files = guard.0.take_changes();
@ -233,64 +233,63 @@ impl GlobalState {
// id that is followed by a delete we actually skip observing the file text from the
// earlier event, to avoid problems later on.
for changed_file in changed_files {
use vfs::ChangeKind::*;
file_changes
.entry(changed_file.file_id)
.and_modify(|(change, just_created)| {
// None -> Delete => keep
// Create -> Delete => collapse
//
match (change, just_created, changed_file.change_kind) {
use vfs::Change::*;
match file_changes.entry(changed_file.file_id) {
Entry::Occupied(mut o) => {
let (just_created, change) = o.get_mut();
match (&mut change.change, just_created, changed_file.change) {
// latter `Delete` wins
(change, _, Delete) => *change = Delete,
// merge `Create` with `Create` or `Modify`
(Create, _, Create | Modify) => {}
(Create(prev), _, Create(new) | Modify(new)) => *prev = new,
// collapse identical `Modify`es
(Modify, _, Modify) => {}
(Modify(prev), _, Modify(new)) => *prev = new,
// equivalent to `Modify`
(change @ Delete, just_created, Create) => {
*change = Modify;
(change @ Delete, just_created, Create(new)) => {
*change = Modify(new);
*just_created = true;
}
// shouldn't occur, but collapse into `Create`
(change @ Delete, just_created, Modify) => {
*change = Create;
(change @ Delete, just_created, Modify(new)) => {
*change = Create(new);
*just_created = true;
}
// shouldn't occur, but collapse into `Modify`
(Modify, _, Create) => {}
(Modify(prev), _, Create(new)) => *prev = new,
}
})
.or_insert((
changed_file.change_kind,
matches!(changed_file.change_kind, Create),
));
}
Entry::Vacant(v) => {
_ = v.insert((matches!(&changed_file.change, Create(_)), changed_file))
}
}
}
let changed_files: Vec<_> = file_changes
.into_iter()
.filter(|(_, (change_kind, just_created))| {
!matches!((change_kind, just_created), (vfs::ChangeKind::Delete, true))
.filter(|(_, (just_created, change))| {
!(*just_created && matches!(change.change, vfs::Change::Delete))
})
.map(|(file_id, (change_kind, _))| vfs::ChangedFile { file_id, change_kind })
.map(|(file_id, (_, change))| vfs::ChangedFile { file_id, ..change })
.collect();
let mut workspace_structure_change = None;
// A file was added or deleted
let mut has_structure_changes = false;
let mut bytes = vec![];
for file in &changed_files {
let mut modified_files = vec![];
for file in changed_files {
let vfs_path = &vfs.file_path(file.file_id);
if let Some(path) = vfs_path.as_path() {
let path = path.to_path_buf();
if reload::should_refresh_for_change(&path, file.change_kind) {
if reload::should_refresh_for_change(&path, file.kind()) {
workspace_structure_change = Some((path.clone(), false));
}
if file.is_created_or_deleted() {
has_structure_changes = true;
workspace_structure_change =
Some((path, self.crate_graph_file_dependencies.contains(vfs_path)));
} else {
modified_files.push(file.file_id);
}
}
@ -299,10 +298,8 @@ impl GlobalState {
self.diagnostics.clear_native_for(file.file_id);
}
let text = if file.exists() {
let bytes = vfs.file_contents(file.file_id).to_vec();
String::from_utf8(bytes).ok().and_then(|text| {
let text = if let vfs::Change::Create(v) | vfs::Change::Modify(v) = file.change {
String::from_utf8(v).ok().and_then(|text| {
// FIXME: Consider doing normalization in the `vfs` instead? That allows
// getting rid of some locking
let (text, line_endings) = LineEndings::normalize(text);
@ -327,11 +324,10 @@ impl GlobalState {
let roots = self.source_root_config.partition(vfs);
change.set_roots(roots);
}
(change, changed_files, workspace_structure_change)
(change, modified_files, workspace_structure_change)
};
self.analysis_host.apply_change(change);
{
let raw_database = self.analysis_host.raw_database();
// FIXME: ideally we should only trigger a workspace fetch for non-library changes
@ -343,13 +339,12 @@ impl GlobalState {
force_crate_graph_reload,
);
}
self.proc_macro_changed =
changed_files.iter().filter(|file| !file.is_created_or_deleted()).any(|file| {
let crates = raw_database.relevant_crates(file.file_id);
let crate_graph = raw_database.crate_graph();
self.proc_macro_changed = modified_files.into_iter().any(|file_id| {
let crates = raw_database.relevant_crates(file_id);
let crate_graph = raw_database.crate_graph();
crates.iter().any(|&krate| crate_graph[krate].is_proc_macro)
});
crates.iter().any(|&krate| crate_graph[krate].is_proc_macro)
});
}
true
@ -494,10 +489,6 @@ impl GlobalStateSnapshot {
})
}
pub(crate) fn vfs_memory_usage(&self) -> usize {
self.vfs_read().memory_usage()
}
pub(crate) fn file_exists(&self, file_id: FileId) -> bool {
self.vfs.read().0.exists(file_id)
}

View file

@ -59,7 +59,13 @@ pub(crate) fn handle_did_open_text_document(
if let Ok(path) = from_proto::vfs_path(&params.text_document.uri) {
let already_exists = state
.mem_docs
.insert(path.clone(), DocumentData::new(params.text_document.version))
.insert(
path.clone(),
DocumentData::new(
params.text_document.version,
params.text_document.text.clone().into_bytes(),
),
)
.is_err();
if already_exists {
tracing::error!("duplicate DidOpenTextDocument: {}", path);
@ -76,11 +82,12 @@ pub(crate) fn handle_did_change_text_document(
let _p = profile::span("handle_did_change_text_document");
if let Ok(path) = from_proto::vfs_path(&params.text_document.uri) {
match state.mem_docs.get_mut(&path) {
let data = match state.mem_docs.get_mut(&path) {
Some(doc) => {
// The version passed in DidChangeTextDocument is the version after all edits are applied
// so we should apply it before the vfs is notified.
doc.version = params.text_document.version;
&mut doc.data
}
None => {
tracing::error!("unexpected DidChangeTextDocument: {}", path);
@ -88,16 +95,14 @@ pub(crate) fn handle_did_change_text_document(
}
};
let text = apply_document_changes(
let new_contents = apply_document_changes(
state.config.position_encoding(),
|| {
let vfs = &state.vfs.read().0;
let file_id = vfs.file_id(&path).unwrap();
std::str::from_utf8(vfs.file_contents(file_id)).unwrap().into()
},
std::str::from_utf8(data).unwrap(),
params.content_changes,
);
state.vfs.write().0.set_file_contents(path, Some(text.into_bytes()));
)
.into_bytes();
*data = new_contents.clone();
state.vfs.write().0.set_file_contents(path, Some(new_contents));
}
Ok(())
}

View file

@ -103,7 +103,6 @@ pub(crate) fn handle_analyzer_status(
.collect::<Vec<&AbsPath>>()
);
}
format_to!(buf, "\nVfs memory usage: {}\n", profile::Bytes::new(snap.vfs_memory_usage() as _));
buf.push_str("\nAnalysis:\n");
buf.push_str(
&snap

View file

@ -168,7 +168,7 @@ impl GlobalState {
pub(crate) fn apply_document_changes(
encoding: PositionEncoding,
file_contents: impl FnOnce() -> String,
file_contents: &str,
mut content_changes: Vec<lsp_types::TextDocumentContentChangeEvent>,
) -> String {
// If at least one of the changes is a full document change, use the last
@ -179,7 +179,7 @@ pub(crate) fn apply_document_changes(
let text = mem::take(&mut content_changes[idx].text);
(text, &content_changes[idx + 1..])
}
None => (file_contents(), &content_changes[..]),
None => (file_contents.to_owned(), &content_changes[..]),
};
if content_changes.is_empty() {
return text;
@ -276,11 +276,11 @@ mod tests {
}
let encoding = PositionEncoding::Wide(WideEncoding::Utf16);
let text = apply_document_changes(encoding, || String::new(), vec![]);
let text = apply_document_changes(encoding, "", vec![]);
assert_eq!(text, "");
let text = apply_document_changes(
encoding,
|| text,
&text,
vec![TextDocumentContentChangeEvent {
range: None,
range_length: None,
@ -288,49 +288,46 @@ mod tests {
}],
);
assert_eq!(text, "the");
let text = apply_document_changes(encoding, || text, c![0, 3; 0, 3 => " quick"]);
let text = apply_document_changes(encoding, &text, c![0, 3; 0, 3 => " quick"]);
assert_eq!(text, "the quick");
let text =
apply_document_changes(encoding, || text, c![0, 0; 0, 4 => "", 0, 5; 0, 5 => " foxes"]);
apply_document_changes(encoding, &text, c![0, 0; 0, 4 => "", 0, 5; 0, 5 => " foxes"]);
assert_eq!(text, "quick foxes");
let text = apply_document_changes(encoding, || text, c![0, 11; 0, 11 => "\ndream"]);
let text = apply_document_changes(encoding, &text, c![0, 11; 0, 11 => "\ndream"]);
assert_eq!(text, "quick foxes\ndream");
let text = apply_document_changes(encoding, || text, c![1, 0; 1, 0 => "have "]);
let text = apply_document_changes(encoding, &text, c![1, 0; 1, 0 => "have "]);
assert_eq!(text, "quick foxes\nhave dream");
let text = apply_document_changes(
encoding,
|| text,
&text,
c![0, 0; 0, 0 => "the ", 1, 4; 1, 4 => " quiet", 1, 16; 1, 16 => "s\n"],
);
assert_eq!(text, "the quick foxes\nhave quiet dreams\n");
let text = apply_document_changes(
encoding,
|| text,
c![0, 15; 0, 15 => "\n", 2, 17; 2, 17 => "\n"],
);
let text =
apply_document_changes(encoding, &text, c![0, 15; 0, 15 => "\n", 2, 17; 2, 17 => "\n"]);
assert_eq!(text, "the quick foxes\n\nhave quiet dreams\n\n");
let text = apply_document_changes(
encoding,
|| text,
&text,
c![1, 0; 1, 0 => "DREAM", 2, 0; 2, 0 => "they ", 3, 0; 3, 0 => "DON'T THEY?"],
);
assert_eq!(text, "the quick foxes\nDREAM\nthey have quiet dreams\nDON'T THEY?\n");
let text =
apply_document_changes(encoding, || text, c![0, 10; 1, 5 => "", 2, 0; 2, 12 => ""]);
apply_document_changes(encoding, &text, c![0, 10; 1, 5 => "", 2, 0; 2, 12 => ""]);
assert_eq!(text, "the quick \nthey have quiet dreams\n");
let text = String::from("❤️");
let text = apply_document_changes(encoding, || text, c![0, 0; 0, 0 => "a"]);
let text = apply_document_changes(encoding, &text, c![0, 0; 0, 0 => "a"]);
assert_eq!(text, "a❤");
let text = String::from("a\nb");
let text =
apply_document_changes(encoding, || text, c![0, 1; 1, 0 => "\nțc", 0, 1; 1, 1 => "d"]);
apply_document_changes(encoding, &text, c![0, 1; 1, 0 => "\nțc", 0, 1; 1, 1 => "d"]);
assert_eq!(text, "adcb");
let text = String::from("a\nb");
let text =
apply_document_changes(encoding, || text, c![0, 1; 1, 0 => "ț\nc", 0, 2; 0, 2 => "c"]);
apply_document_changes(encoding, &text, c![0, 1; 1, 0 => "ț\nc", 0, 2; 0, 2 => "c"]);
assert_eq!(text, "ațc\ncb");
}

View file

@ -576,6 +576,8 @@ impl GlobalState {
let vfs = &mut self.vfs.write().0;
for (path, contents) in files {
let path = VfsPath::from(path);
// if the file is in mem docs, it's managed by the client via notifications
// so only set it if its not in there
if !self.mem_docs.contains(&path) {
vfs.set_file_contents(path, contents);
}

View file

@ -62,10 +62,11 @@ impl MemDocs {
#[derive(Debug, Clone)]
pub(crate) struct DocumentData {
pub(crate) version: i32,
pub(crate) data: Vec<u8>,
}
impl DocumentData {
pub(crate) fn new(version: i32) -> Self {
DocumentData { version }
pub(crate) fn new(version: i32, data: Vec<u8>) -> Self {
DocumentData { version, data }
}
}

View file

@ -503,10 +503,9 @@ impl GlobalState {
match vfs.file_id(&vfs_path) {
Some(file_id) => Some(file_id),
None => {
if !self.mem_docs.contains(&vfs_path) {
let contents = loader.handle.load_sync(path);
vfs.set_file_contents(vfs_path.clone(), contents);
}
// FIXME: Consider not loading this here?
let contents = loader.handle.load_sync(path);
vfs.set_file_contents(vfs_path.clone(), contents);
vfs.file_id(&vfs_path)
}
}

View file

@ -1,8 +1,8 @@
//! # Virtual File System
//!
//! VFS stores all files read by rust-analyzer. Reading file contents from VFS
//! always returns the same contents, unless VFS was explicitly modified with
//! [`set_file_contents`]. All changes to VFS are logged, and can be retrieved via
//! VFS records all file changes pushed to it via [`set_file_contents`].
//! As such it only ever stores changes, not the actual content of a file at any given moment.
//! All file changes are logged, and can be retrieved via
//! [`take_changes`] method. The pack of changes is then pushed to `salsa` and
//! triggers incremental recomputation.
//!
@ -84,40 +84,65 @@ impl FileId {
/// safe because `FileId` is a newtype of `u32`
impl nohash_hasher::IsEnabled for FileId {}
/// Storage for all files read by rust-analyzer.
/// Storage for all file changes and the file id to path mapping.
///
/// For more information see the [crate-level](crate) documentation.
#[derive(Default)]
pub struct Vfs {
interner: PathInterner,
data: Vec<Option<Vec<u8>>>,
data: Vec<FileState>,
changes: Vec<ChangedFile>,
}
#[derive(Copy, PartialEq, PartialOrd, Clone)]
pub enum FileState {
Exists,
Deleted,
}
/// Changed file in the [`Vfs`].
#[derive(Debug)]
pub struct ChangedFile {
/// Id of the changed file
pub file_id: FileId,
/// Kind of change
pub change_kind: ChangeKind,
pub change: Change,
}
impl ChangedFile {
/// Returns `true` if the change is not [`Delete`](ChangeKind::Delete).
pub fn exists(&self) -> bool {
self.change_kind != ChangeKind::Delete
!matches!(self.change, Change::Delete)
}
/// Returns `true` if the change is [`Create`](ChangeKind::Create) or
/// [`Delete`](ChangeKind::Delete).
/// [`Delete`](Change::Delete).
pub fn is_created_or_deleted(&self) -> bool {
matches!(self.change_kind, ChangeKind::Create | ChangeKind::Delete)
matches!(self.change, Change::Create(_) | Change::Delete)
}
pub fn kind(&self) -> ChangeKind {
match self.change {
Change::Create(_) => ChangeKind::Create,
Change::Modify(_) => ChangeKind::Modify,
Change::Delete => ChangeKind::Delete,
}
}
}
/// Kind of [file change](ChangedFile).
#[derive(Eq, PartialEq, Copy, Clone, Debug)]
#[derive(Eq, PartialEq, Debug)]
pub enum Change {
/// The file was (re-)created
Create(Vec<u8>),
/// The file was modified
Modify(Vec<u8>),
/// The file was deleted
Delete,
}
/// Kind of [file change](ChangedFile).
#[derive(Eq, PartialEq, Debug)]
pub enum ChangeKind {
/// The file was (re-)created
Create,
@ -130,7 +155,7 @@ pub enum ChangeKind {
impl Vfs {
/// Id of the given path if it exists in the `Vfs` and is not deleted.
pub fn file_id(&self, path: &VfsPath) -> Option<FileId> {
self.interner.get(path).filter(|&it| self.get(it).is_some())
self.interner.get(path).filter(|&it| matches!(self.get(it), FileState::Exists))
}
/// File path corresponding to the given `file_id`.
@ -142,28 +167,13 @@ impl Vfs {
self.interner.lookup(file_id).clone()
}
/// File content corresponding to the given `file_id`.
///
/// # Panics
///
/// Panics if the id is not present in the `Vfs`, or if the corresponding file is
/// deleted.
pub fn file_contents(&self, file_id: FileId) -> &[u8] {
self.get(file_id).as_deref().unwrap()
}
/// Returns the overall memory usage for the stored files.
pub fn memory_usage(&self) -> usize {
self.data.iter().flatten().map(|d| d.capacity()).sum()
}
/// Returns an iterator over the stored ids and their corresponding paths.
///
/// This will skip deleted files.
pub fn iter(&self) -> impl Iterator<Item = (FileId, &VfsPath)> + '_ {
(0..self.data.len())
.map(|it| FileId(it as u32))
.filter(move |&file_id| self.get(file_id).is_some())
.filter(move |&file_id| matches!(self.get(file_id), FileState::Exists))
.map(move |file_id| {
let path = self.interner.lookup(file_id);
(file_id, path)
@ -176,28 +186,21 @@ impl Vfs {
///
/// If the path does not currently exists in the `Vfs`, allocates a new
/// [`FileId`] for it.
pub fn set_file_contents(&mut self, path: VfsPath, mut contents: Option<Vec<u8>>) -> bool {
pub fn set_file_contents(&mut self, path: VfsPath, contents: Option<Vec<u8>>) -> bool {
let file_id = self.alloc_file_id(path);
let change_kind = match (self.get(file_id), &contents) {
(None, None) => return false,
(Some(old), Some(new)) if old == new => return false,
(None, Some(_)) => ChangeKind::Create,
(Some(_), None) => ChangeKind::Delete,
(Some(_), Some(_)) => ChangeKind::Modify,
let change_kind = match (self.get(file_id), contents) {
(FileState::Deleted, None) => return false,
(FileState::Deleted, Some(v)) => Change::Create(v),
(FileState::Exists, None) => Change::Delete,
(FileState::Exists, Some(v)) => Change::Modify(v),
};
if let Some(contents) = &mut contents {
contents.shrink_to_fit();
}
*self.get_mut(file_id) = contents;
self.changes.push(ChangedFile { file_id, change_kind });
let changed_file = ChangedFile { file_id, change: change_kind };
self.data[file_id.0 as usize] =
if changed_file.exists() { FileState::Exists } else { FileState::Deleted };
self.changes.push(changed_file);
true
}
/// Returns `true` if the `Vfs` contains [changes](ChangedFile).
pub fn has_changes(&self) -> bool {
!self.changes.is_empty()
}
/// Drain and returns all the changes in the `Vfs`.
pub fn take_changes(&mut self) -> Vec<ChangedFile> {
mem::take(&mut self.changes)
@ -205,7 +208,7 @@ impl Vfs {
/// Provides a panic-less way to verify file_id validity.
pub fn exists(&self, file_id: FileId) -> bool {
self.get(file_id).is_some()
matches!(self.get(file_id), FileState::Exists)
}
/// Returns the id associated with `path`
@ -219,26 +222,17 @@ impl Vfs {
let file_id = self.interner.intern(path);
let idx = file_id.0 as usize;
let len = self.data.len().max(idx + 1);
self.data.resize_with(len, || None);
self.data.resize(len, FileState::Deleted);
file_id
}
/// Returns the content associated with the given `file_id`.
/// Returns the status of the file associated with the given `file_id`.
///
/// # Panics
///
/// Panics if no file is associated to that id.
fn get(&self, file_id: FileId) -> &Option<Vec<u8>> {
&self.data[file_id.0 as usize]
}
/// Mutably returns the content associated with the given `file_id`.
///
/// # Panics
///
/// Panics if no file is associated to that id.
fn get_mut(&mut self, file_id: FileId) -> &mut Option<Vec<u8>> {
&mut self.data[file_id.0 as usize]
fn get(&self, file_id: FileId) -> FileState {
self.data[file_id.0 as usize]
}
}