Speedup VFS::partition

The task of `partition` function is to bin the flat list of paths into
disjoint filesets. Ideally, it should be incremental -- each new file
should be added to a specific fileset.

However, preliminary measurnments show that it is actually fast enough
if we just optimize this to use a binary search instead of a linear
scan.
This commit is contained in:
Aleksey Kladov 2020-06-19 15:07:32 +02:00
parent 902a9c6da7
commit b9f3c5d585
4 changed files with 30 additions and 11 deletions

View file

@ -2,7 +2,7 @@
//! relative paths. //! relative paths.
use std::{ use std::{
convert::{TryFrom, TryInto}, convert::{TryFrom, TryInto},
ops, io, ops,
path::{Component, Path, PathBuf}, path::{Component, Path, PathBuf},
}; };
@ -46,6 +46,9 @@ impl TryFrom<&str> for AbsPathBuf {
} }
impl AbsPathBuf { impl AbsPathBuf {
pub fn canonicalized(path: &Path) -> io::Result<AbsPathBuf> {
path.canonicalize().map(|it| AbsPathBuf::try_from(it).unwrap())
}
pub fn as_path(&self) -> &AbsPath { pub fn as_path(&self) -> &AbsPath {
AbsPath::new_unchecked(self.0.as_path()) AbsPath::new_unchecked(self.0.as_path())
} }

View file

@ -1,5 +1,4 @@
//! Missing batteries for standard libraries. //! Missing batteries for standard libraries.
use std::{cell::Cell, fmt, time::Instant}; use std::{cell::Cell, fmt, time::Instant};
#[inline(always)] #[inline(always)]

View file

@ -2,7 +2,7 @@
//! //!
//! Files which do not belong to any explicitly configured `FileSet` belong to //! Files which do not belong to any explicitly configured `FileSet` belong to
//! the default `FileSet`. //! the default `FileSet`.
use std::{cmp, fmt, iter}; use std::{fmt, iter};
use paths::AbsPathBuf; use paths::AbsPathBuf;
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
@ -44,6 +44,12 @@ pub struct FileSetConfig {
roots: Vec<(AbsPathBuf, usize)>, roots: Vec<(AbsPathBuf, usize)>,
} }
impl Default for FileSetConfig {
fn default() -> Self {
FileSetConfig::builder().build()
}
}
impl FileSetConfig { impl FileSetConfig {
pub fn builder() -> FileSetConfigBuilder { pub fn builder() -> FileSetConfigBuilder {
FileSetConfigBuilder::default() FileSetConfigBuilder::default()
@ -60,16 +66,21 @@ impl FileSetConfig {
self.n_file_sets self.n_file_sets
} }
fn classify(&self, path: &VfsPath) -> usize { fn classify(&self, path: &VfsPath) -> usize {
for (root, idx) in self.roots.iter() { let path = match path.as_path() {
if let Some(path) = path.as_path() { Some(it) => it,
if path.starts_with(root) { None => return self.len() - 1,
return *idx; };
} let idx = match self.roots.binary_search_by(|(p, _)| p.as_path().cmp(path)) {
} Ok(it) => it,
} Err(it) => it.saturating_sub(1),
};
if path.starts_with(&self.roots[idx].0) {
self.roots[idx].1
} else {
self.len() - 1 self.len() - 1
} }
} }
}
pub struct FileSetConfigBuilder { pub struct FileSetConfigBuilder {
roots: Vec<Vec<AbsPathBuf>>, roots: Vec<Vec<AbsPathBuf>>,
@ -82,6 +93,9 @@ impl Default for FileSetConfigBuilder {
} }
impl FileSetConfigBuilder { impl FileSetConfigBuilder {
pub fn len(&self) -> usize {
self.roots.len()
}
pub fn add_file_set(&mut self, roots: Vec<AbsPathBuf>) { pub fn add_file_set(&mut self, roots: Vec<AbsPathBuf>) {
self.roots.push(roots) self.roots.push(roots)
} }
@ -93,7 +107,7 @@ impl FileSetConfigBuilder {
.enumerate() .enumerate()
.flat_map(|(i, paths)| paths.into_iter().zip(iter::repeat(i))) .flat_map(|(i, paths)| paths.into_iter().zip(iter::repeat(i)))
.collect(); .collect();
roots.sort_by_key(|(path, _)| cmp::Reverse(path.to_string_lossy().len())); roots.sort();
FileSetConfig { n_file_sets, roots } FileSetConfig { n_file_sets, roots }
} }
} }

View file

@ -79,6 +79,9 @@ pub enum ChangeKind {
} }
impl Vfs { impl Vfs {
pub fn len(&self) -> usize {
self.data.len()
}
pub fn file_id(&self, path: &VfsPath) -> Option<FileId> { pub fn file_id(&self, path: &VfsPath) -> Option<FileId> {
self.interner.get(path).filter(|&it| self.get(it).is_some()) self.interner.get(path).filter(|&it| self.get(it).is_some())
} }