2020-06-15 11:29:07 +00:00
|
|
|
//! Partitions a list of files into disjoint subsets.
|
|
|
|
//!
|
|
|
|
//! Files which do not belong to any explicitly configured `FileSet` belong to
|
|
|
|
//! the default `FileSet`.
|
2020-07-07 20:53:12 +00:00
|
|
|
use std::fmt;
|
2020-06-15 11:29:07 +00:00
|
|
|
|
2020-07-07 20:53:12 +00:00
|
|
|
use fst::{IntoStreamer, Streamer};
|
2023-05-04 23:28:15 +00:00
|
|
|
use nohash_hasher::IntMap;
|
2020-06-15 11:29:07 +00:00
|
|
|
use rustc_hash::FxHashMap;
|
|
|
|
|
2020-12-09 15:41:35 +00:00
|
|
|
use crate::{AnchoredPath, FileId, Vfs, VfsPath};
|
2020-06-15 11:29:07 +00:00
|
|
|
|
2021-01-12 16:22:57 +00:00
|
|
|
/// A set of [`VfsPath`]s identified by [`FileId`]s.
|
2020-06-15 11:29:07 +00:00
|
|
|
#[derive(Default, Clone, Eq, PartialEq)]
|
|
|
|
pub struct FileSet {
|
|
|
|
files: FxHashMap<VfsPath, FileId>,
|
2023-05-04 23:28:15 +00:00
|
|
|
paths: IntMap<FileId, VfsPath>,
|
2020-06-15 11:29:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl FileSet {
|
2021-01-12 16:22:57 +00:00
|
|
|
/// Returns the number of stored paths.
|
2020-07-07 15:38:02 +00:00
|
|
|
pub fn len(&self) -> usize {
|
|
|
|
self.files.len()
|
|
|
|
}
|
2021-01-12 16:22:57 +00:00
|
|
|
|
|
|
|
/// Get the id of the file corresponding to `path`.
|
|
|
|
///
|
|
|
|
/// If either `path`'s [`anchor`](AnchoredPath::anchor) or the resolved path is not in
|
|
|
|
/// the set, returns [`None`].
|
2020-12-09 15:41:35 +00:00
|
|
|
pub fn resolve_path(&self, path: AnchoredPath<'_>) -> Option<FileId> {
|
|
|
|
let mut base = self.paths[&path.anchor].clone();
|
2020-06-15 11:29:07 +00:00
|
|
|
base.pop();
|
2020-12-09 15:41:35 +00:00
|
|
|
let path = base.join(path.path)?;
|
2020-08-28 18:28:30 +00:00
|
|
|
self.files.get(&path).copied()
|
|
|
|
}
|
2020-09-03 20:18:23 +00:00
|
|
|
|
2021-01-12 16:22:57 +00:00
|
|
|
/// Get the id corresponding to `path` if it exists in the set.
|
2020-09-07 13:17:50 +00:00
|
|
|
pub fn file_for_path(&self, path: &VfsPath) -> Option<&FileId> {
|
|
|
|
self.files.get(path)
|
2020-09-05 22:41:18 +00:00
|
|
|
}
|
|
|
|
|
2021-01-12 16:22:57 +00:00
|
|
|
/// Get the path corresponding to `file` if it exists in the set.
|
2020-09-07 13:17:50 +00:00
|
|
|
pub fn path_for_file(&self, file: &FileId) -> Option<&VfsPath> {
|
|
|
|
self.paths.get(file)
|
2020-06-15 11:29:07 +00:00
|
|
|
}
|
2020-09-05 22:41:18 +00:00
|
|
|
|
2021-01-12 16:22:57 +00:00
|
|
|
/// Insert the `file_id, path` pair into the set.
|
|
|
|
///
|
|
|
|
/// # Note
|
|
|
|
/// Multiple [`FileId`] can be mapped to the same [`VfsPath`], and vice-versa.
|
2020-06-15 11:29:07 +00:00
|
|
|
pub fn insert(&mut self, file_id: FileId, path: VfsPath) {
|
|
|
|
self.files.insert(path.clone(), file_id);
|
|
|
|
self.paths.insert(file_id, path);
|
|
|
|
}
|
2020-09-07 13:17:50 +00:00
|
|
|
|
2021-01-12 16:22:57 +00:00
|
|
|
/// Iterate over this set's ids.
|
2020-06-15 11:29:07 +00:00
|
|
|
pub fn iter(&self) -> impl Iterator<Item = FileId> + '_ {
|
|
|
|
self.paths.keys().copied()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl fmt::Debug for FileSet {
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
|
|
f.debug_struct("FileSet").field("n_files", &self.files.len()).finish()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-12 16:22:57 +00:00
|
|
|
/// This contains path prefixes to partition a [`Vfs`] into [`FileSet`]s.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
/// ```rust
|
|
|
|
/// # use vfs::{file_set::FileSetConfigBuilder, VfsPath, Vfs};
|
|
|
|
/// let mut builder = FileSetConfigBuilder::default();
|
|
|
|
/// builder.add_file_set(vec![VfsPath::new_virtual_path("/src".to_string())]);
|
|
|
|
/// let config = builder.build();
|
|
|
|
/// let mut file_system = Vfs::default();
|
|
|
|
/// file_system.set_file_contents(VfsPath::new_virtual_path("/src/main.rs".to_string()), Some(vec![]));
|
|
|
|
/// file_system.set_file_contents(VfsPath::new_virtual_path("/src/lib.rs".to_string()), Some(vec![]));
|
|
|
|
/// file_system.set_file_contents(VfsPath::new_virtual_path("/build.rs".to_string()), Some(vec![]));
|
|
|
|
/// // contains the sets :
|
|
|
|
/// // { "/src/main.rs", "/src/lib.rs" }
|
|
|
|
/// // { "build.rs" }
|
|
|
|
/// let sets = config.partition(&file_system);
|
|
|
|
/// ```
|
2020-06-15 11:29:07 +00:00
|
|
|
#[derive(Debug)]
|
|
|
|
pub struct FileSetConfig {
|
2021-01-12 16:41:45 +00:00
|
|
|
/// Number of sets that `self` can partition a [`Vfs`] into.
|
|
|
|
///
|
|
|
|
/// This should be the number of sets in `self.map` + 1 for files that don't fit in any
|
|
|
|
/// defined set.
|
2020-06-15 11:29:07 +00:00
|
|
|
n_file_sets: usize,
|
2021-01-12 16:41:45 +00:00
|
|
|
/// Map from encoded paths to the set they belong to.
|
2020-07-07 20:53:12 +00:00
|
|
|
map: fst::Map<Vec<u8>>,
|
2020-06-15 11:29:07 +00:00
|
|
|
}
|
|
|
|
|
2020-06-19 13:07:32 +00:00
|
|
|
impl Default for FileSetConfig {
|
|
|
|
fn default() -> Self {
|
|
|
|
FileSetConfig::builder().build()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-15 11:29:07 +00:00
|
|
|
impl FileSetConfig {
|
2021-01-12 16:22:57 +00:00
|
|
|
/// Returns a builder for `FileSetConfig`.
|
2020-06-15 11:29:07 +00:00
|
|
|
pub fn builder() -> FileSetConfigBuilder {
|
|
|
|
FileSetConfigBuilder::default()
|
|
|
|
}
|
2021-01-12 16:22:57 +00:00
|
|
|
|
|
|
|
/// Partition `vfs` into `FileSet`s.
|
|
|
|
///
|
|
|
|
/// Creates a new [`FileSet`] for every set of prefixes in `self`.
|
2020-06-15 11:29:07 +00:00
|
|
|
pub fn partition(&self, vfs: &Vfs) -> Vec<FileSet> {
|
2020-07-07 20:53:12 +00:00
|
|
|
let mut scratch_space = Vec::new();
|
2020-06-15 11:29:07 +00:00
|
|
|
let mut res = vec![FileSet::default(); self.len()];
|
|
|
|
for (file_id, path) in vfs.iter() {
|
2021-06-13 03:54:16 +00:00
|
|
|
let root = self.classify(path, &mut scratch_space);
|
2021-10-03 12:39:43 +00:00
|
|
|
res[root].insert(file_id, path.clone());
|
2020-06-15 11:29:07 +00:00
|
|
|
}
|
|
|
|
res
|
|
|
|
}
|
2021-01-12 16:41:45 +00:00
|
|
|
|
|
|
|
/// Number of sets that `self` can partition a [`Vfs`] into.
|
2020-06-15 11:29:07 +00:00
|
|
|
fn len(&self) -> usize {
|
|
|
|
self.n_file_sets
|
|
|
|
}
|
2021-01-12 16:41:45 +00:00
|
|
|
|
2024-02-26 13:35:54 +00:00
|
|
|
/// Get the lexicographically ordered vector of the underlying map.
|
|
|
|
pub fn roots(&self) -> Vec<(Vec<u8>, u64)> {
|
2024-03-04 21:58:49 +00:00
|
|
|
self.map.stream().into_byte_vec()
|
2024-02-26 13:35:54 +00:00
|
|
|
}
|
|
|
|
|
2021-01-12 16:41:45 +00:00
|
|
|
/// Returns the set index for the given `path`.
|
|
|
|
///
|
|
|
|
/// `scratch_space` is used as a buffer and will be entirely replaced.
|
2020-07-07 20:53:12 +00:00
|
|
|
fn classify(&self, path: &VfsPath, scratch_space: &mut Vec<u8>) -> usize {
|
2024-04-05 22:40:43 +00:00
|
|
|
// `path` is a file, but r-a only cares about the containing directory. We don't
|
|
|
|
// want `/foo/bar_baz.rs` to be attributed to source root directory `/foo/bar`.
|
|
|
|
let path = path.parent().unwrap_or_else(|| path.clone());
|
|
|
|
|
2020-07-07 20:53:12 +00:00
|
|
|
scratch_space.clear();
|
|
|
|
path.encode(scratch_space);
|
|
|
|
let automaton = PrefixOf::new(scratch_space.as_slice());
|
|
|
|
let mut longest_prefix = self.len() - 1;
|
|
|
|
let mut stream = self.map.search(automaton).into_stream();
|
|
|
|
while let Some((_, v)) = stream.next() {
|
|
|
|
longest_prefix = v as usize;
|
|
|
|
}
|
|
|
|
longest_prefix
|
2020-06-15 11:29:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-12 16:22:57 +00:00
|
|
|
/// Builder for [`FileSetConfig`].
|
2022-12-23 08:07:42 +00:00
|
|
|
#[derive(Default)]
|
2020-06-15 11:29:07 +00:00
|
|
|
pub struct FileSetConfigBuilder {
|
2020-06-11 09:04:09 +00:00
|
|
|
roots: Vec<Vec<VfsPath>>,
|
2020-06-15 11:29:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl FileSetConfigBuilder {
|
2021-01-12 16:22:57 +00:00
|
|
|
/// Returns the number of sets currently held.
|
2020-06-19 13:07:32 +00:00
|
|
|
pub fn len(&self) -> usize {
|
|
|
|
self.roots.len()
|
|
|
|
}
|
2021-01-12 16:22:57 +00:00
|
|
|
|
|
|
|
/// Add a new set of paths prefixes.
|
2020-06-11 09:04:09 +00:00
|
|
|
pub fn add_file_set(&mut self, roots: Vec<VfsPath>) {
|
2021-10-03 12:39:43 +00:00
|
|
|
self.roots.push(roots);
|
2020-06-15 11:29:07 +00:00
|
|
|
}
|
2021-01-12 16:22:57 +00:00
|
|
|
|
|
|
|
/// Build the `FileSetConfig`.
|
2020-06-15 11:29:07 +00:00
|
|
|
pub fn build(self) -> FileSetConfig {
|
|
|
|
let n_file_sets = self.roots.len() + 1;
|
2020-07-07 20:53:12 +00:00
|
|
|
let map = {
|
|
|
|
let mut entries = Vec::new();
|
|
|
|
for (i, paths) in self.roots.into_iter().enumerate() {
|
|
|
|
for p in paths {
|
|
|
|
let mut buf = Vec::new();
|
|
|
|
p.encode(&mut buf);
|
|
|
|
entries.push((buf, i as u64));
|
|
|
|
}
|
2020-07-07 15:38:02 +00:00
|
|
|
}
|
2020-07-07 20:53:12 +00:00
|
|
|
entries.sort();
|
|
|
|
entries.dedup_by(|(a, _), (b, _)| a == b);
|
|
|
|
fst::Map::from_iter(entries).unwrap()
|
|
|
|
};
|
|
|
|
FileSetConfig { n_file_sets, map }
|
2020-07-07 15:38:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-12 16:41:45 +00:00
|
|
|
/// Implements [`fst::Automaton`]
|
|
|
|
///
|
|
|
|
/// It will match if `prefix_of` is a prefix of the given data.
|
2020-07-07 20:53:12 +00:00
|
|
|
struct PrefixOf<'a> {
|
|
|
|
prefix_of: &'a [u8],
|
2020-07-07 15:38:02 +00:00
|
|
|
}
|
|
|
|
|
2020-07-07 20:53:12 +00:00
|
|
|
impl<'a> PrefixOf<'a> {
|
2021-01-12 16:41:45 +00:00
|
|
|
/// Creates a new `PrefixOf` from the given slice.
|
2020-07-07 20:53:12 +00:00
|
|
|
fn new(prefix_of: &'a [u8]) -> Self {
|
|
|
|
Self { prefix_of }
|
2020-07-07 15:38:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-07 20:53:12 +00:00
|
|
|
impl fst::Automaton for PrefixOf<'_> {
|
|
|
|
type State = usize;
|
|
|
|
fn start(&self) -> usize {
|
|
|
|
0
|
|
|
|
}
|
|
|
|
fn is_match(&self, &state: &usize) -> bool {
|
|
|
|
state != !0
|
|
|
|
}
|
|
|
|
fn can_match(&self, &state: &usize) -> bool {
|
|
|
|
state != !0
|
|
|
|
}
|
|
|
|
fn accept(&self, &state: &usize, byte: u8) -> usize {
|
|
|
|
if self.prefix_of.get(state) == Some(&byte) {
|
|
|
|
state + 1
|
|
|
|
} else {
|
|
|
|
!0
|
2020-07-07 15:38:02 +00:00
|
|
|
}
|
2020-06-15 11:29:07 +00:00
|
|
|
}
|
2020-07-07 15:38:02 +00:00
|
|
|
}
|
|
|
|
|
2020-07-26 09:05:28 +00:00
|
|
|
#[cfg(test)]
|
2020-11-30 15:10:12 +00:00
|
|
|
mod tests;
|