From c002322bde06a73c8cfa02cd1cbe33cf225da47b Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 15 Jun 2020 13:29:07 +0200 Subject: [PATCH] New VFS API --- Cargo.lock | 12 +++ crates/vfs/Cargo.toml | 14 ++++ crates/vfs/src/file_set.rs | 99 ++++++++++++++++++++++ crates/vfs/src/lib.rs | 138 +++++++++++++++++++++++++++++++ crates/vfs/src/loader.rs | 69 ++++++++++++++++ crates/vfs/src/path_interner.rs | 31 +++++++ crates/vfs/src/vfs_path.rs | 49 +++++++++++ crates/vfs/src/walkdir_loader.rs | 108 ++++++++++++++++++++++++ 8 files changed, 520 insertions(+) create mode 100644 crates/vfs/Cargo.toml create mode 100644 crates/vfs/src/file_set.rs create mode 100644 crates/vfs/src/lib.rs create mode 100644 crates/vfs/src/loader.rs create mode 100644 crates/vfs/src/path_interner.rs create mode 100644 crates/vfs/src/vfs_path.rs create mode 100644 crates/vfs/src/walkdir_loader.rs diff --git a/Cargo.lock b/Cargo.lock index 5848e61c7e..6d83c92761 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1729,6 +1729,18 @@ dependencies = [ "serde", ] +[[package]] +name = "vfs" +version = "0.1.0" +dependencies = [ + "crossbeam-channel", + "globset", + "jod-thread", + "paths", + "rustc-hash", + "walkdir", +] + [[package]] name = "walkdir" version = "2.3.1" diff --git a/crates/vfs/Cargo.toml b/crates/vfs/Cargo.toml new file mode 100644 index 0000000000..c03e6363b8 --- /dev/null +++ b/crates/vfs/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "vfs" +version = "0.1.0" +authors = ["rust-analyzer developers"] +edition = "2018" + +[dependencies] +rustc-hash = "1.0" +jod-thread = "0.1.0" +walkdir = "2.3.1" +globset = "0.4.5" +crossbeam-channel = "0.4.0" + +paths = { path = "../paths" } diff --git a/crates/vfs/src/file_set.rs b/crates/vfs/src/file_set.rs new file mode 100644 index 0000000000..7dc721f7e4 --- /dev/null +++ b/crates/vfs/src/file_set.rs @@ -0,0 +1,99 @@ +//! Partitions a list of files into disjoint subsets. +//! +//! Files which do not belong to any explicitly configured `FileSet` belong to +//! the default `FileSet`. +use std::{cmp, fmt, iter}; + +use paths::AbsPathBuf; +use rustc_hash::FxHashMap; + +use crate::{FileId, Vfs, VfsPath}; + +#[derive(Default, Clone, Eq, PartialEq)] +pub struct FileSet { + files: FxHashMap, + paths: FxHashMap, +} + +impl FileSet { + pub fn resolve_path(&self, anchor: FileId, path: &str) -> Option { + let mut base = self.paths[&anchor].clone(); + base.pop(); + let path = base.join(path); + let res = self.files.get(&path).copied(); + res + } + pub fn insert(&mut self, file_id: FileId, path: VfsPath) { + self.files.insert(path.clone(), file_id); + self.paths.insert(file_id, path); + } + pub fn iter(&self) -> impl Iterator + '_ { + self.paths.keys().copied() + } +} + +impl fmt::Debug for FileSet { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("FileSet").field("n_files", &self.files.len()).finish() + } +} + +#[derive(Debug)] +pub struct FileSetConfig { + n_file_sets: usize, + roots: Vec<(AbsPathBuf, usize)>, +} + +impl FileSetConfig { + pub fn builder() -> FileSetConfigBuilder { + FileSetConfigBuilder::default() + } + pub fn partition(&self, vfs: &Vfs) -> Vec { + let mut res = vec![FileSet::default(); self.len()]; + for (file_id, path) in vfs.iter() { + let root = self.classify(&path); + res[root].insert(file_id, path) + } + res + } + fn len(&self) -> usize { + self.n_file_sets + } + fn classify(&self, path: &VfsPath) -> usize { + for (root, idx) in self.roots.iter() { + if let Some(path) = path.as_path() { + if path.starts_with(root) { + return *idx; + } + } + } + self.len() - 1 + } +} + +pub struct FileSetConfigBuilder { + roots: Vec>, +} + +impl Default for FileSetConfigBuilder { + fn default() -> Self { + FileSetConfigBuilder { roots: Vec::new() } + } +} + +impl FileSetConfigBuilder { + pub fn add_file_set(&mut self, roots: Vec) { + self.roots.push(roots) + } + pub fn build(self) -> FileSetConfig { + let n_file_sets = self.roots.len() + 1; + let mut roots: Vec<(AbsPathBuf, usize)> = self + .roots + .into_iter() + .enumerate() + .flat_map(|(i, paths)| paths.into_iter().zip(iter::repeat(i))) + .collect(); + roots.sort_by_key(|(path, _)| cmp::Reverse(path.to_string_lossy().len())); + FileSetConfig { n_file_sets, roots } + } +} diff --git a/crates/vfs/src/lib.rs b/crates/vfs/src/lib.rs new file mode 100644 index 0000000000..75ce61cf9d --- /dev/null +++ b/crates/vfs/src/lib.rs @@ -0,0 +1,138 @@ +//! # Virtual File System +//! +//! VFS stores all files read by rust-analyzer. Reading file contents from VFS +//! always returns the same contents, unless VFS was explicitly modified with +//! `set_file_contents`. All changes to VFS are logged, and can be retrieved via +//! `take_changes` method. The pack of changes is then pushed to `salsa` and +//! triggers incremental recomputation. +//! +//! Files in VFS are identified with `FileId`s -- interned paths. The notion of +//! the path, `VfsPath` is somewhat abstract: at the moment, it is represented +//! as an `std::path::PathBuf` internally, but this is an implementation detail. +//! +//! VFS doesn't do IO or file watching itself. For that, see the `loader` +//! module. `loader::Handle` is an object-safe trait which abstracts both file +//! loading and file watching. `Handle` is dynamically configured with a set of +//! directory entries which should be scanned and watched. `Handle` then +//! asynchronously pushes file changes. Directory entries are configured in +//! free-form via list of globs, it's up to the `Handle` to interpret the globs +//! in any specific way. +//! +//! A simple `WalkdirLoaderHandle` is provided, which doesn't implement watching +//! and just scans the directory using walkdir. +//! +//! VFS stores a flat list of files. `FileSet` can partition this list of files +//! into disjoint sets of files. Traversal-like operations (including getting +//! the neighbor file by the relative path) are handled by the `FileSet`. +//! `FileSet`s are also pushed to salsa and cause it to re-check `mod foo;` +//! declarations when files are created or deleted. +//! +//! `file_set::FileSet` and `loader::Entry` play similar, but different roles. +//! Both specify the "set of paths/files", one is geared towards file watching, +//! the other towards salsa changes. In particular, single `file_set::FileSet` +//! may correspond to several `loader::Entry`. For example, a crate from +//! crates.io which uses code generation would have two `Entries` -- for sources +//! in `~/.cargo`, and for generated code in `./target/debug/build`. It will +//! have a single `FileSet` which unions the two sources. +mod vfs_path; +mod path_interner; +pub mod file_set; +pub mod loader; +pub mod walkdir_loader; + +use std::{fmt, mem}; + +use crate::path_interner::PathInterner; + +pub use crate::vfs_path::VfsPath; +pub use paths::{AbsPath, AbsPathBuf}; + +#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub struct FileId(pub u32); + +#[derive(Default)] +pub struct Vfs { + interner: PathInterner, + data: Vec>>, + changes: Vec, +} + +pub struct ChangedFile { + pub file_id: FileId, + pub change_kind: ChangeKind, +} + +impl ChangedFile { + pub fn exists(&self) -> bool { + self.change_kind != ChangeKind::Delete + } + pub fn is_created_or_deleted(&self) -> bool { + matches!(self.change_kind, ChangeKind::Create | ChangeKind::Delete) + } +} + +#[derive(Eq, PartialEq)] +pub enum ChangeKind { + Create, + Modify, + Delete, +} + +impl Vfs { + pub fn file_id(&self, path: &VfsPath) -> Option { + self.interner.get(path).filter(|&it| self.get(it).is_some()) + } + pub fn file_path(&self, file_id: FileId) -> VfsPath { + self.interner.lookup(file_id).clone() + } + pub fn file_contents(&self, file_id: FileId) -> &[u8] { + self.get(file_id).as_deref().unwrap() + } + pub fn iter(&self) -> impl Iterator + '_ { + (0..self.data.len()) + .map(|it| FileId(it as u32)) + .filter(move |&file_id| self.get(file_id).is_some()) + .map(move |file_id| { + let path = self.interner.lookup(file_id).clone(); + (file_id, path) + }) + } + pub fn set_file_contents(&mut self, path: VfsPath, contents: Option>) { + let file_id = self.alloc_file_id(path); + let change_kind = match (&self.get(file_id), &contents) { + (None, None) => return, + (None, Some(_)) => ChangeKind::Create, + (Some(_), None) => ChangeKind::Delete, + (Some(old), Some(new)) if old == new => return, + (Some(_), Some(_)) => ChangeKind::Modify, + }; + + *self.get_mut(file_id) = contents; + self.changes.push(ChangedFile { file_id, change_kind }) + } + pub fn has_changes(&self) -> bool { + !self.changes.is_empty() + } + pub fn take_changes(&mut self) -> Vec { + mem::take(&mut self.changes) + } + fn alloc_file_id(&mut self, path: VfsPath) -> FileId { + let file_id = self.interner.intern(path); + let idx = file_id.0 as usize; + let len = self.data.len().max(idx + 1); + self.data.resize_with(len, || None); + file_id + } + fn get(&self, file_id: FileId) -> &Option> { + &self.data[file_id.0 as usize] + } + fn get_mut(&mut self, file_id: FileId) -> &mut Option> { + &mut self.data[file_id.0 as usize] + } +} + +impl fmt::Debug for Vfs { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Vfs").field("n_files", &self.data.len()).finish() + } +} diff --git a/crates/vfs/src/loader.rs b/crates/vfs/src/loader.rs new file mode 100644 index 0000000000..5a0ca68f38 --- /dev/null +++ b/crates/vfs/src/loader.rs @@ -0,0 +1,69 @@ +//! Object safe interface for file watching and reading. +use std::fmt; + +use paths::AbsPathBuf; + +pub enum Entry { + Files(Vec), + Directory { path: AbsPathBuf, globs: Vec }, +} + +pub struct Config { + pub load: Vec, + pub watch: Vec, +} + +pub enum Message { + DidSwitchConfig { n_entries: usize }, + DidLoadAllEntries, + Loaded { files: Vec<(AbsPathBuf, Option>)> }, +} + +pub type Sender = Box; + +pub trait Handle: fmt::Debug { + fn spawn(sender: Sender) -> Self + where + Self: Sized; + fn set_config(&mut self, config: Config); + fn invalidate(&mut self, path: AbsPathBuf); + fn load_sync(&mut self, path: &AbsPathBuf) -> Option>; +} + +impl Entry { + pub fn rs_files_recursively(base: AbsPathBuf) -> Entry { + Entry::Directory { path: base, globs: globs(&["*.rs"]) } + } + pub fn local_cargo_package(base: AbsPathBuf) -> Entry { + Entry::Directory { path: base, globs: globs(&["*.rs", "!/target/"]) } + } + pub fn cargo_package_dependency(base: AbsPathBuf) -> Entry { + Entry::Directory { + path: base, + globs: globs(&["*.rs", "!/tests/", "!/examples/", "!/benches/"]), + } + } +} + +fn globs(globs: &[&str]) -> Vec { + globs.iter().map(|it| it.to_string()).collect() +} + +impl fmt::Debug for Message { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Message::Loaded { files } => { + f.debug_struct("Loaded").field("n_files", &files.len()).finish() + } + Message::DidSwitchConfig { n_entries } => { + f.debug_struct("DidSwitchConfig").field("n_entries", n_entries).finish() + } + Message::DidLoadAllEntries => f.debug_struct("DidLoadAllEntries").finish(), + } + } +} + +#[test] +fn handle_is_object_safe() { + fn _assert(_: &dyn Handle) {} +} diff --git a/crates/vfs/src/path_interner.rs b/crates/vfs/src/path_interner.rs new file mode 100644 index 0000000000..4f70d61e80 --- /dev/null +++ b/crates/vfs/src/path_interner.rs @@ -0,0 +1,31 @@ +//! Maps paths to compact integer ids. We don't care about clearings paths which +//! no longer exist -- the assumption is total size of paths we ever look at is +//! not too big. +use rustc_hash::FxHashMap; + +use crate::{FileId, VfsPath}; + +#[derive(Default)] +pub(crate) struct PathInterner { + map: FxHashMap, + vec: Vec, +} + +impl PathInterner { + pub(crate) fn get(&self, path: &VfsPath) -> Option { + self.map.get(path).copied() + } + pub(crate) fn intern(&mut self, path: VfsPath) -> FileId { + if let Some(id) = self.get(&path) { + return id; + } + let id = FileId(self.vec.len() as u32); + self.map.insert(path.clone(), id); + self.vec.push(path); + id + } + + pub(crate) fn lookup(&self, id: FileId) -> &VfsPath { + &self.vec[id.0 as usize] + } +} diff --git a/crates/vfs/src/vfs_path.rs b/crates/vfs/src/vfs_path.rs new file mode 100644 index 0000000000..de5dc0bf34 --- /dev/null +++ b/crates/vfs/src/vfs_path.rs @@ -0,0 +1,49 @@ +//! Abstract-ish representation of paths for VFS. +use std::fmt; + +use paths::{AbsPath, AbsPathBuf}; + +/// Long-term, we want to support files which do not reside in the file-system, +/// so we treat VfsPaths as opaque identifiers. +#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub struct VfsPath(VfsPathRepr); + +impl VfsPath { + pub fn as_path(&self) -> Option<&AbsPath> { + match &self.0 { + VfsPathRepr::PathBuf(it) => Some(it.as_path()), + } + } + pub fn join(&self, path: &str) -> VfsPath { + match &self.0 { + VfsPathRepr::PathBuf(it) => { + let res = it.join(path).normalize(); + VfsPath(VfsPathRepr::PathBuf(res)) + } + } + } + pub fn pop(&mut self) -> bool { + match &mut self.0 { + VfsPathRepr::PathBuf(it) => it.pop(), + } + } +} + +#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +enum VfsPathRepr { + PathBuf(AbsPathBuf), +} + +impl From for VfsPath { + fn from(v: AbsPathBuf) -> Self { + VfsPath(VfsPathRepr::PathBuf(v)) + } +} + +impl fmt::Display for VfsPath { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self.0 { + VfsPathRepr::PathBuf(it) => fmt::Display::fmt(&it.display(), f), + } + } +} diff --git a/crates/vfs/src/walkdir_loader.rs b/crates/vfs/src/walkdir_loader.rs new file mode 100644 index 0000000000..13e59e3f34 --- /dev/null +++ b/crates/vfs/src/walkdir_loader.rs @@ -0,0 +1,108 @@ +//! A walkdir-based implementation of `loader::Handle`, which doesn't try to +//! watch files. +use std::convert::TryFrom; + +use globset::{Glob, GlobSetBuilder}; +use paths::{AbsPath, AbsPathBuf}; +use walkdir::WalkDir; + +use crate::loader; + +#[derive(Debug)] +pub struct WalkdirLoaderHandle { + // Relative order of fields below is significant. + sender: crossbeam_channel::Sender, + _thread: jod_thread::JoinHandle, +} + +enum Message { + Config(loader::Config), + Invalidate(AbsPathBuf), +} + +impl loader::Handle for WalkdirLoaderHandle { + fn spawn(sender: loader::Sender) -> WalkdirLoaderHandle { + let actor = WalkdirLoaderActor { sender }; + let (sender, receiver) = crossbeam_channel::unbounded::(); + let thread = jod_thread::spawn(move || actor.run(receiver)); + WalkdirLoaderHandle { sender, _thread: thread } + } + fn set_config(&mut self, config: loader::Config) { + self.sender.send(Message::Config(config)).unwrap() + } + fn invalidate(&mut self, path: AbsPathBuf) { + self.sender.send(Message::Invalidate(path)).unwrap(); + } + fn load_sync(&mut self, path: &AbsPathBuf) -> Option> { + read(path) + } +} + +struct WalkdirLoaderActor { + sender: loader::Sender, +} + +impl WalkdirLoaderActor { + fn run(mut self, receiver: crossbeam_channel::Receiver) { + for msg in receiver { + match msg { + Message::Config(config) => { + self.send(loader::Message::DidSwitchConfig { n_entries: config.load.len() }); + for entry in config.load.into_iter() { + let files = self.load_entry(entry); + self.send(loader::Message::Loaded { files }); + } + drop(config.watch); + self.send(loader::Message::DidLoadAllEntries); + } + Message::Invalidate(path) => { + let contents = read(path.as_path()); + let files = vec![(path, contents)]; + self.send(loader::Message::Loaded { files }); + } + } + } + } + fn load_entry(&mut self, entry: loader::Entry) -> Vec<(AbsPathBuf, Option>)> { + match entry { + loader::Entry::Files(files) => files + .into_iter() + .map(|file| { + let contents = read(file.as_path()); + (file, contents) + }) + .collect::>(), + loader::Entry::Directory { path, globs } => { + let globset = { + let mut builder = GlobSetBuilder::new(); + for glob in &globs { + builder.add(Glob::new(glob).unwrap()); + } + builder.build().unwrap() + }; + + let files = WalkDir::new(path) + .into_iter() + .filter_map(|it| it.ok()) + .filter(|it| it.file_type().is_file()) + .map(|it| it.into_path()) + .map(|it| AbsPathBuf::try_from(it).unwrap()) + .filter(|it| globset.is_match(&it)); + + files + .map(|file| { + let contents = read(file.as_path()); + (file, contents) + }) + .collect() + } + } + } + fn send(&mut self, msg: loader::Message) { + (self.sender)(msg) + } +} + +fn read(path: &AbsPath) -> Option> { + std::fs::read(path).ok() +}