New VFS API

This commit is contained in:
Aleksey Kladov 2020-06-15 13:29:07 +02:00
parent db6100dbaa
commit c002322bde
8 changed files with 520 additions and 0 deletions

12
Cargo.lock generated
View file

@ -1729,6 +1729,18 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "vfs"
version = "0.1.0"
dependencies = [
"crossbeam-channel",
"globset",
"jod-thread",
"paths",
"rustc-hash",
"walkdir",
]
[[package]] [[package]]
name = "walkdir" name = "walkdir"
version = "2.3.1" version = "2.3.1"

14
crates/vfs/Cargo.toml Normal file
View file

@ -0,0 +1,14 @@
[package]
name = "vfs"
version = "0.1.0"
authors = ["rust-analyzer developers"]
edition = "2018"
[dependencies]
rustc-hash = "1.0"
jod-thread = "0.1.0"
walkdir = "2.3.1"
globset = "0.4.5"
crossbeam-channel = "0.4.0"
paths = { path = "../paths" }

View file

@ -0,0 +1,99 @@
//! Partitions a list of files into disjoint subsets.
//!
//! Files which do not belong to any explicitly configured `FileSet` belong to
//! the default `FileSet`.
use std::{cmp, fmt, iter};
use paths::AbsPathBuf;
use rustc_hash::FxHashMap;
use crate::{FileId, Vfs, VfsPath};
#[derive(Default, Clone, Eq, PartialEq)]
pub struct FileSet {
files: FxHashMap<VfsPath, FileId>,
paths: FxHashMap<FileId, VfsPath>,
}
impl FileSet {
pub fn resolve_path(&self, anchor: FileId, path: &str) -> Option<FileId> {
let mut base = self.paths[&anchor].clone();
base.pop();
let path = base.join(path);
let res = self.files.get(&path).copied();
res
}
pub fn insert(&mut self, file_id: FileId, path: VfsPath) {
self.files.insert(path.clone(), file_id);
self.paths.insert(file_id, path);
}
pub fn iter(&self) -> impl Iterator<Item = FileId> + '_ {
self.paths.keys().copied()
}
}
impl fmt::Debug for FileSet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("FileSet").field("n_files", &self.files.len()).finish()
}
}
#[derive(Debug)]
pub struct FileSetConfig {
n_file_sets: usize,
roots: Vec<(AbsPathBuf, usize)>,
}
impl FileSetConfig {
pub fn builder() -> FileSetConfigBuilder {
FileSetConfigBuilder::default()
}
pub fn partition(&self, vfs: &Vfs) -> Vec<FileSet> {
let mut res = vec![FileSet::default(); self.len()];
for (file_id, path) in vfs.iter() {
let root = self.classify(&path);
res[root].insert(file_id, path)
}
res
}
fn len(&self) -> usize {
self.n_file_sets
}
fn classify(&self, path: &VfsPath) -> usize {
for (root, idx) in self.roots.iter() {
if let Some(path) = path.as_path() {
if path.starts_with(root) {
return *idx;
}
}
}
self.len() - 1
}
}
pub struct FileSetConfigBuilder {
roots: Vec<Vec<AbsPathBuf>>,
}
impl Default for FileSetConfigBuilder {
fn default() -> Self {
FileSetConfigBuilder { roots: Vec::new() }
}
}
impl FileSetConfigBuilder {
pub fn add_file_set(&mut self, roots: Vec<AbsPathBuf>) {
self.roots.push(roots)
}
pub fn build(self) -> FileSetConfig {
let n_file_sets = self.roots.len() + 1;
let mut roots: Vec<(AbsPathBuf, usize)> = self
.roots
.into_iter()
.enumerate()
.flat_map(|(i, paths)| paths.into_iter().zip(iter::repeat(i)))
.collect();
roots.sort_by_key(|(path, _)| cmp::Reverse(path.to_string_lossy().len()));
FileSetConfig { n_file_sets, roots }
}
}

138
crates/vfs/src/lib.rs Normal file
View file

@ -0,0 +1,138 @@
//! # Virtual File System
//!
//! VFS stores all files read by rust-analyzer. Reading file contents from VFS
//! always returns the same contents, unless VFS was explicitly modified with
//! `set_file_contents`. All changes to VFS are logged, and can be retrieved via
//! `take_changes` method. The pack of changes is then pushed to `salsa` and
//! triggers incremental recomputation.
//!
//! Files in VFS are identified with `FileId`s -- interned paths. The notion of
//! the path, `VfsPath` is somewhat abstract: at the moment, it is represented
//! as an `std::path::PathBuf` internally, but this is an implementation detail.
//!
//! VFS doesn't do IO or file watching itself. For that, see the `loader`
//! module. `loader::Handle` is an object-safe trait which abstracts both file
//! loading and file watching. `Handle` is dynamically configured with a set of
//! directory entries which should be scanned and watched. `Handle` then
//! asynchronously pushes file changes. Directory entries are configured in
//! free-form via list of globs, it's up to the `Handle` to interpret the globs
//! in any specific way.
//!
//! A simple `WalkdirLoaderHandle` is provided, which doesn't implement watching
//! and just scans the directory using walkdir.
//!
//! VFS stores a flat list of files. `FileSet` can partition this list of files
//! into disjoint sets of files. Traversal-like operations (including getting
//! the neighbor file by the relative path) are handled by the `FileSet`.
//! `FileSet`s are also pushed to salsa and cause it to re-check `mod foo;`
//! declarations when files are created or deleted.
//!
//! `file_set::FileSet` and `loader::Entry` play similar, but different roles.
//! Both specify the "set of paths/files", one is geared towards file watching,
//! the other towards salsa changes. In particular, single `file_set::FileSet`
//! may correspond to several `loader::Entry`. For example, a crate from
//! crates.io which uses code generation would have two `Entries` -- for sources
//! in `~/.cargo`, and for generated code in `./target/debug/build`. It will
//! have a single `FileSet` which unions the two sources.
mod vfs_path;
mod path_interner;
pub mod file_set;
pub mod loader;
pub mod walkdir_loader;
use std::{fmt, mem};
use crate::path_interner::PathInterner;
pub use crate::vfs_path::VfsPath;
pub use paths::{AbsPath, AbsPathBuf};
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub struct FileId(pub u32);
#[derive(Default)]
pub struct Vfs {
interner: PathInterner,
data: Vec<Option<Vec<u8>>>,
changes: Vec<ChangedFile>,
}
pub struct ChangedFile {
pub file_id: FileId,
pub change_kind: ChangeKind,
}
impl ChangedFile {
pub fn exists(&self) -> bool {
self.change_kind != ChangeKind::Delete
}
pub fn is_created_or_deleted(&self) -> bool {
matches!(self.change_kind, ChangeKind::Create | ChangeKind::Delete)
}
}
#[derive(Eq, PartialEq)]
pub enum ChangeKind {
Create,
Modify,
Delete,
}
impl Vfs {
pub fn file_id(&self, path: &VfsPath) -> Option<FileId> {
self.interner.get(path).filter(|&it| self.get(it).is_some())
}
pub fn file_path(&self, file_id: FileId) -> VfsPath {
self.interner.lookup(file_id).clone()
}
pub fn file_contents(&self, file_id: FileId) -> &[u8] {
self.get(file_id).as_deref().unwrap()
}
pub fn iter(&self) -> impl Iterator<Item = (FileId, VfsPath)> + '_ {
(0..self.data.len())
.map(|it| FileId(it as u32))
.filter(move |&file_id| self.get(file_id).is_some())
.map(move |file_id| {
let path = self.interner.lookup(file_id).clone();
(file_id, path)
})
}
pub fn set_file_contents(&mut self, path: VfsPath, contents: Option<Vec<u8>>) {
let file_id = self.alloc_file_id(path);
let change_kind = match (&self.get(file_id), &contents) {
(None, None) => return,
(None, Some(_)) => ChangeKind::Create,
(Some(_), None) => ChangeKind::Delete,
(Some(old), Some(new)) if old == new => return,
(Some(_), Some(_)) => ChangeKind::Modify,
};
*self.get_mut(file_id) = contents;
self.changes.push(ChangedFile { file_id, change_kind })
}
pub fn has_changes(&self) -> bool {
!self.changes.is_empty()
}
pub fn take_changes(&mut self) -> Vec<ChangedFile> {
mem::take(&mut self.changes)
}
fn alloc_file_id(&mut self, path: VfsPath) -> FileId {
let file_id = self.interner.intern(path);
let idx = file_id.0 as usize;
let len = self.data.len().max(idx + 1);
self.data.resize_with(len, || None);
file_id
}
fn get(&self, file_id: FileId) -> &Option<Vec<u8>> {
&self.data[file_id.0 as usize]
}
fn get_mut(&mut self, file_id: FileId) -> &mut Option<Vec<u8>> {
&mut self.data[file_id.0 as usize]
}
}
impl fmt::Debug for Vfs {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Vfs").field("n_files", &self.data.len()).finish()
}
}

69
crates/vfs/src/loader.rs Normal file
View file

@ -0,0 +1,69 @@
//! Object safe interface for file watching and reading.
use std::fmt;
use paths::AbsPathBuf;
pub enum Entry {
Files(Vec<AbsPathBuf>),
Directory { path: AbsPathBuf, globs: Vec<String> },
}
pub struct Config {
pub load: Vec<Entry>,
pub watch: Vec<usize>,
}
pub enum Message {
DidSwitchConfig { n_entries: usize },
DidLoadAllEntries,
Loaded { files: Vec<(AbsPathBuf, Option<Vec<u8>>)> },
}
pub type Sender = Box<dyn Fn(Message) + Send>;
pub trait Handle: fmt::Debug {
fn spawn(sender: Sender) -> Self
where
Self: Sized;
fn set_config(&mut self, config: Config);
fn invalidate(&mut self, path: AbsPathBuf);
fn load_sync(&mut self, path: &AbsPathBuf) -> Option<Vec<u8>>;
}
impl Entry {
pub fn rs_files_recursively(base: AbsPathBuf) -> Entry {
Entry::Directory { path: base, globs: globs(&["*.rs"]) }
}
pub fn local_cargo_package(base: AbsPathBuf) -> Entry {
Entry::Directory { path: base, globs: globs(&["*.rs", "!/target/"]) }
}
pub fn cargo_package_dependency(base: AbsPathBuf) -> Entry {
Entry::Directory {
path: base,
globs: globs(&["*.rs", "!/tests/", "!/examples/", "!/benches/"]),
}
}
}
fn globs(globs: &[&str]) -> Vec<String> {
globs.iter().map(|it| it.to_string()).collect()
}
impl fmt::Debug for Message {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Message::Loaded { files } => {
f.debug_struct("Loaded").field("n_files", &files.len()).finish()
}
Message::DidSwitchConfig { n_entries } => {
f.debug_struct("DidSwitchConfig").field("n_entries", n_entries).finish()
}
Message::DidLoadAllEntries => f.debug_struct("DidLoadAllEntries").finish(),
}
}
}
#[test]
fn handle_is_object_safe() {
fn _assert(_: &dyn Handle) {}
}

View file

@ -0,0 +1,31 @@
//! Maps paths to compact integer ids. We don't care about clearings paths which
//! no longer exist -- the assumption is total size of paths we ever look at is
//! not too big.
use rustc_hash::FxHashMap;
use crate::{FileId, VfsPath};
#[derive(Default)]
pub(crate) struct PathInterner {
map: FxHashMap<VfsPath, FileId>,
vec: Vec<VfsPath>,
}
impl PathInterner {
pub(crate) fn get(&self, path: &VfsPath) -> Option<FileId> {
self.map.get(path).copied()
}
pub(crate) fn intern(&mut self, path: VfsPath) -> FileId {
if let Some(id) = self.get(&path) {
return id;
}
let id = FileId(self.vec.len() as u32);
self.map.insert(path.clone(), id);
self.vec.push(path);
id
}
pub(crate) fn lookup(&self, id: FileId) -> &VfsPath {
&self.vec[id.0 as usize]
}
}

View file

@ -0,0 +1,49 @@
//! Abstract-ish representation of paths for VFS.
use std::fmt;
use paths::{AbsPath, AbsPathBuf};
/// Long-term, we want to support files which do not reside in the file-system,
/// so we treat VfsPaths as opaque identifiers.
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub struct VfsPath(VfsPathRepr);
impl VfsPath {
pub fn as_path(&self) -> Option<&AbsPath> {
match &self.0 {
VfsPathRepr::PathBuf(it) => Some(it.as_path()),
}
}
pub fn join(&self, path: &str) -> VfsPath {
match &self.0 {
VfsPathRepr::PathBuf(it) => {
let res = it.join(path).normalize();
VfsPath(VfsPathRepr::PathBuf(res))
}
}
}
pub fn pop(&mut self) -> bool {
match &mut self.0 {
VfsPathRepr::PathBuf(it) => it.pop(),
}
}
}
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
enum VfsPathRepr {
PathBuf(AbsPathBuf),
}
impl From<AbsPathBuf> for VfsPath {
fn from(v: AbsPathBuf) -> Self {
VfsPath(VfsPathRepr::PathBuf(v))
}
}
impl fmt::Display for VfsPath {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match &self.0 {
VfsPathRepr::PathBuf(it) => fmt::Display::fmt(&it.display(), f),
}
}
}

View file

@ -0,0 +1,108 @@
//! A walkdir-based implementation of `loader::Handle`, which doesn't try to
//! watch files.
use std::convert::TryFrom;
use globset::{Glob, GlobSetBuilder};
use paths::{AbsPath, AbsPathBuf};
use walkdir::WalkDir;
use crate::loader;
#[derive(Debug)]
pub struct WalkdirLoaderHandle {
// Relative order of fields below is significant.
sender: crossbeam_channel::Sender<Message>,
_thread: jod_thread::JoinHandle,
}
enum Message {
Config(loader::Config),
Invalidate(AbsPathBuf),
}
impl loader::Handle for WalkdirLoaderHandle {
fn spawn(sender: loader::Sender) -> WalkdirLoaderHandle {
let actor = WalkdirLoaderActor { sender };
let (sender, receiver) = crossbeam_channel::unbounded::<Message>();
let thread = jod_thread::spawn(move || actor.run(receiver));
WalkdirLoaderHandle { sender, _thread: thread }
}
fn set_config(&mut self, config: loader::Config) {
self.sender.send(Message::Config(config)).unwrap()
}
fn invalidate(&mut self, path: AbsPathBuf) {
self.sender.send(Message::Invalidate(path)).unwrap();
}
fn load_sync(&mut self, path: &AbsPathBuf) -> Option<Vec<u8>> {
read(path)
}
}
struct WalkdirLoaderActor {
sender: loader::Sender,
}
impl WalkdirLoaderActor {
fn run(mut self, receiver: crossbeam_channel::Receiver<Message>) {
for msg in receiver {
match msg {
Message::Config(config) => {
self.send(loader::Message::DidSwitchConfig { n_entries: config.load.len() });
for entry in config.load.into_iter() {
let files = self.load_entry(entry);
self.send(loader::Message::Loaded { files });
}
drop(config.watch);
self.send(loader::Message::DidLoadAllEntries);
}
Message::Invalidate(path) => {
let contents = read(path.as_path());
let files = vec![(path, contents)];
self.send(loader::Message::Loaded { files });
}
}
}
}
fn load_entry(&mut self, entry: loader::Entry) -> Vec<(AbsPathBuf, Option<Vec<u8>>)> {
match entry {
loader::Entry::Files(files) => files
.into_iter()
.map(|file| {
let contents = read(file.as_path());
(file, contents)
})
.collect::<Vec<_>>(),
loader::Entry::Directory { path, globs } => {
let globset = {
let mut builder = GlobSetBuilder::new();
for glob in &globs {
builder.add(Glob::new(glob).unwrap());
}
builder.build().unwrap()
};
let files = WalkDir::new(path)
.into_iter()
.filter_map(|it| it.ok())
.filter(|it| it.file_type().is_file())
.map(|it| it.into_path())
.map(|it| AbsPathBuf::try_from(it).unwrap())
.filter(|it| globset.is_match(&it));
files
.map(|file| {
let contents = read(file.as_path());
(file, contents)
})
.collect()
}
}
}
fn send(&mut self, msg: loader::Message) {
(self.sender)(msg)
}
}
fn read(path: &AbsPath) -> Option<Vec<u8>> {
std::fs::read(path).ok()
}