Refactor source cache into CachedFile struct (#12240)

# Description
Get rid of two parallel `Vec`s in `StateDelta` and `EngineState`, that
also duplicated span information. Use a struct with documenting fields.

Also use `Arc<str>` and `Arc<[u8]>` for the allocations as they are
never modified and cloned often (see #12229 for the first improvement).
This also makes the representation more compact as no capacity is
necessary.

# User-Facing Changes
API breakage on `EngineState`/`StateWorkingSet`/`StateDelta` that should
not really affect plugin authors.
This commit is contained in:
Stefan Holderbach 2024-03-20 19:43:50 +01:00 committed by GitHub
parent 63335e99ae
commit ec528c0626
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 133 additions and 104 deletions

View file

@ -1,7 +1,7 @@
use crate::completions::{Completer, CompletionOptions, MatchAlgorithm, SortBy};
use nu_parser::FlatShape;
use nu_protocol::{
engine::{EngineState, StateWorkingSet},
engine::{CachedFile, EngineState, StateWorkingSet},
Span,
};
use reedline::Suggestion;
@ -229,8 +229,9 @@ pub fn find_non_whitespace_index(contents: &[u8], start: usize) -> usize {
}
}
pub fn is_passthrough_command(working_set_file_contents: &[(Arc<Vec<u8>>, usize, usize)]) -> bool {
for (contents, _, _) in working_set_file_contents {
pub fn is_passthrough_command(working_set_file_contents: &[CachedFile]) -> bool {
for cached_file in working_set_file_contents {
let contents = &cached_file.content;
let last_pipe_pos_rev = contents.iter().rev().position(|x| x == &b'|');
let last_pipe_pos = last_pipe_pos_rev.map(|x| contents.len() - x).unwrap_or(0);
@ -295,7 +296,7 @@ mod command_completions_tests {
let input = ele.0.as_bytes();
let mut engine_state = EngineState::new();
engine_state.add_file("test.nu".into(), vec![]);
engine_state.add_file("test.nu".into(), Arc::new([]));
let delta = {
let mut working_set = StateWorkingSet::new(&engine_state);

View file

@ -94,8 +94,8 @@ fn gather_env_vars(
let span_offset = engine_state.next_span_start();
engine_state.add_file(
"Host Environment Variables".to_string(),
fake_env_file.as_bytes().to_vec(),
"Host Environment Variables".into(),
fake_env_file.as_bytes().into(),
);
let (tokens, _) = lex(fake_env_file.as_bytes(), span_offset, &[], &[], true);

View file

@ -43,13 +43,15 @@ impl Command for ViewFiles {
) -> Result<PipelineData, ShellError> {
let mut records = vec![];
for (file, start, end) in engine_state.files() {
for file in engine_state.files() {
let start = file.covered_span.start;
let end = file.covered_span.end;
records.push(Value::record(
record! {
"filename" => Value::string(&**file, call.head),
"start" => Value::int(*start as i64, call.head),
"end" => Value::int(*end as i64, call.head),
"size" => Value::int(*end as i64 - *start as i64, call.head),
"filename" => Value::string(&*file.name, call.head),
"start" => Value::int(start as i64, call.head),
"end" => Value::int(end as i64, call.head),
"size" => Value::int(end as i64 - start as i64, call.head),
},
call.head,
));

View file

@ -284,11 +284,15 @@ impl LanguageServer {
if let Some(block_id) = working_set.get_decl(decl_id).get_block_id() {
let block = working_set.get_block(block_id);
if let Some(span) = &block.span {
for (file_path, file_start, file_end) in working_set.files() {
if span.start >= *file_start && span.start < *file_end {
for cached_file in working_set.files() {
if cached_file.covered_span.contains(span.start) {
return Some(GotoDefinitionResponse::Scalar(Location {
uri: Url::from_file_path(&**file_path).ok()?,
range: Self::span_to_range(span, file, *file_start),
uri: Url::from_file_path(&*cached_file.name).ok()?,
range: Self::span_to_range(
span,
file,
cached_file.covered_span.start,
),
}));
}
}
@ -297,9 +301,10 @@ impl LanguageServer {
}
Id::Variable(var_id) => {
let var = working_set.get_variable(var_id);
for (_, file_start, file_end) in working_set.files() {
if var.declaration_span.start >= *file_start
&& var.declaration_span.start < *file_end
for cached_file in working_set.files() {
if cached_file
.covered_span
.contains(var.declaration_span.start)
{
return Some(GotoDefinitionResponse::Scalar(Location {
uri: params
@ -307,7 +312,11 @@ impl LanguageServer {
.text_document
.uri
.clone(),
range: Self::span_to_range(&var.declaration_span, file, *file_start),
range: Self::span_to_range(
&var.declaration_span,
file,
cached_file.covered_span.start,
),
}));
}
}

View file

@ -0,0 +1,15 @@
use crate::Span;
use std::sync::Arc;
/// Unit of cached source code
#[derive(Clone)]
pub struct CachedFile {
// Use Arcs of slice types for more compact representation (capacity less)
// Could possibly become an `Arc<PathBuf>`
/// The file name with which the code is associated (also includes REPL input)
pub name: Arc<str>,
/// Source code as raw bytes
pub content: Arc<[u8]>,
/// global span coordinates that are covered by this [`CachedFile`]
pub covered_span: Span,
}

View file

@ -1,6 +1,7 @@
use fancy_regex::Regex;
use lru::LruCache;
use super::cached_file::CachedFile;
use super::{usage::build_usage, usage::Usage, StateDelta};
use super::{
Command, EnvVars, OverlayFrame, ScopeFrame, Stack, Variable, Visibility, DEFAULT_OVERLAY_NAME,
@ -74,8 +75,7 @@ impl Clone for IsDebugging {
/// but they also rely on using IDs rather than full definitions.
#[derive(Clone)]
pub struct EngineState {
files: Vec<(Arc<String>, usize, usize)>,
file_contents: Vec<(Arc<Vec<u8>>, usize, usize)>,
files: Vec<CachedFile>,
pub(super) virtual_paths: Vec<(String, VirtualPath)>,
vars: Vec<Variable>,
decls: Arc<Vec<Box<dyn Command + 'static>>>,
@ -119,7 +119,6 @@ impl EngineState {
pub fn new() -> Self {
Self {
files: vec![],
file_contents: vec![],
virtual_paths: vec![],
vars: vec![
Variable::new(Span::new(0, 0), Type::Any, false),
@ -183,7 +182,6 @@ impl EngineState {
pub fn merge_delta(&mut self, mut delta: StateDelta) -> Result<(), ShellError> {
// Take the mutable reference and extend the permanent state from the working set
self.files.extend(delta.files);
self.file_contents.extend(delta.file_contents);
self.virtual_paths.extend(delta.virtual_paths);
self.vars.extend(delta.vars);
self.blocks.extend(delta.blocks);
@ -628,8 +626,8 @@ impl EngineState {
}
pub fn print_contents(&self) {
for (contents, _, _) in self.file_contents.iter() {
let string = String::from_utf8_lossy(contents);
for cached_file in self.files.iter() {
let string = String::from_utf8_lossy(&cached_file.content);
println!("{string}");
}
}
@ -747,9 +745,10 @@ impl EngineState {
}
pub fn get_span_contents(&self, span: Span) -> &[u8] {
for (contents, start, finish) in &self.file_contents {
if span.start >= *start && span.end <= *finish {
return &contents[(span.start - start)..(span.end - start)];
for file in &self.files {
if file.covered_span.contains_span(span) {
return &file.content
[(span.start - file.covered_span.start)..(span.end - file.covered_span.start)];
}
}
&[0u8; 0]
@ -909,26 +908,28 @@ impl EngineState {
}
pub fn next_span_start(&self) -> usize {
if let Some((_, _, last)) = self.file_contents.last() {
*last
if let Some(cached_file) = self.files.last() {
cached_file.covered_span.end
} else {
0
}
}
pub fn files(&self) -> impl Iterator<Item = &(Arc<String>, usize, usize)> {
pub fn files(&self) -> impl Iterator<Item = &CachedFile> {
self.files.iter()
}
pub fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize {
pub fn add_file(&mut self, filename: Arc<str>, content: Arc<[u8]>) -> FileId {
let next_span_start = self.next_span_start();
let next_span_end = next_span_start + contents.len();
let next_span_end = next_span_start + content.len();
self.file_contents
.push((Arc::new(contents), next_span_start, next_span_end));
let covered_span = Span::new(next_span_start, next_span_end);
self.files
.push((Arc::new(filename), next_span_start, next_span_end));
self.files.push(CachedFile {
name: filename,
content,
covered_span,
});
self.num_files() - 1
}
@ -968,8 +969,9 @@ impl EngineState {
.unwrap_or_default()
}
pub fn get_file_contents(&self) -> &[(Arc<Vec<u8>>, usize, usize)] {
&self.file_contents
// TODO: see if we can completely get rid of this
pub fn get_file_contents(&self) -> &[CachedFile] {
&self.files
}
pub fn get_startup_time(&self) -> i64 {
@ -1043,7 +1045,7 @@ mod engine_state_tests {
#[test]
fn add_file_gives_id_including_parent() {
let mut engine_state = EngineState::new();
let parent_id = engine_state.add_file("test.nu".into(), vec![]);
let parent_id = engine_state.add_file("test.nu".into(), Arc::new([]));
let mut working_set = StateWorkingSet::new(&engine_state);
let working_set_id = working_set.add_file("child.nu".into(), &[]);
@ -1055,7 +1057,7 @@ mod engine_state_tests {
#[test]
fn merge_states() -> Result<(), ShellError> {
let mut engine_state = EngineState::new();
engine_state.add_file("test.nu".into(), vec![]);
engine_state.add_file("test.nu".into(), Arc::new([]));
let delta = {
let mut working_set = StateWorkingSet::new(&engine_state);
@ -1066,8 +1068,8 @@ mod engine_state_tests {
engine_state.merge_delta(delta)?;
assert_eq!(engine_state.num_files(), 2);
assert_eq!(&*engine_state.files[0].0, "test.nu");
assert_eq!(&*engine_state.files[1].0, "child.nu");
assert_eq!(&*engine_state.files[0].name, "test.nu");
assert_eq!(&*engine_state.files[1].name, "child.nu");
Ok(())
}

View file

@ -1,3 +1,4 @@
mod cached_file;
mod call_info;
mod capture_block;
mod command;
@ -11,6 +12,8 @@ mod stdio;
mod usage;
mod variable;
pub use cached_file::CachedFile;
pub use call_info::*;
pub use capture_block::*;
pub use command::*;

View file

@ -1,3 +1,4 @@
use super::cached_file::CachedFile;
use super::{usage::Usage, Command, EngineState, OverlayFrame, ScopeFrame, Variable, VirtualPath};
use crate::ast::Block;
use crate::Module;
@ -12,8 +13,7 @@ use crate::RegisteredPlugin;
/// can be applied to the global state to update it to contain both previous state and the state held
/// within the delta.
pub struct StateDelta {
pub(super) files: Vec<(Arc<String>, usize, usize)>,
pub(crate) file_contents: Vec<(Arc<Vec<u8>>, usize, usize)>,
pub(super) files: Vec<CachedFile>,
pub(super) virtual_paths: Vec<(String, VirtualPath)>,
pub(super) vars: Vec<Variable>, // indexed by VarId
pub(super) decls: Vec<Box<dyn Command>>, // indexed by DeclId
@ -38,7 +38,6 @@ impl StateDelta {
StateDelta {
files: vec![],
file_contents: vec![],
virtual_paths: vec![],
vars: vec![],
decls: vec![],
@ -131,7 +130,7 @@ impl StateDelta {
self.scope.pop();
}
pub fn get_file_contents(&self) -> &[(Arc<Vec<u8>>, usize, usize)] {
&self.file_contents
pub fn get_file_contents(&self) -> &[CachedFile] {
&self.files
}
}

View file

@ -1,3 +1,4 @@
use super::cached_file::CachedFile;
use super::{
usage::build_usage, Command, EngineState, OverlayFrame, StateDelta, Variable, VirtualPath,
Visibility, PWD_ENV,
@ -285,8 +286,8 @@ impl<'a> StateWorkingSet<'a> {
pub fn next_span_start(&self) -> usize {
let permanent_span_start = self.permanent_state.next_span_start();
if let Some((_, _, last)) = self.delta.file_contents.last() {
*last
if let Some(cached_file) = self.delta.files.last() {
cached_file.covered_span.end
} else {
permanent_span_start
}
@ -296,21 +297,22 @@ impl<'a> StateWorkingSet<'a> {
self.permanent_state.next_span_start()
}
pub fn files(&'a self) -> impl Iterator<Item = &(Arc<String>, usize, usize)> {
pub fn files(&self) -> impl Iterator<Item = &CachedFile> {
self.permanent_state.files().chain(self.delta.files.iter())
}
pub fn get_contents_of_file(&self, file_id: usize) -> Option<&[u8]> {
for (id, (contents, _, _)) in self.delta.file_contents.iter().enumerate() {
if self.permanent_state.num_files() + id == file_id {
return Some(contents);
}
pub fn get_contents_of_file(&self, file_id: FileId) -> Option<&[u8]> {
if let Some(cached_file) = self.permanent_state.get_file_contents().get(file_id) {
return Some(&cached_file.content);
}
for (id, (contents, _, _)) in self.permanent_state.get_file_contents().iter().enumerate() {
if id == file_id {
return Some(contents);
}
// The index subtraction will not underflow, if we hit the permanent state first.
// Check if you try reordering for locality
if let Some(cached_file) = self
.delta
.get_file_contents()
.get(file_id - self.permanent_state.num_files())
{
return Some(&cached_file.content);
}
None
@ -319,27 +321,22 @@ impl<'a> StateWorkingSet<'a> {
#[must_use]
pub fn add_file(&mut self, filename: String, contents: &[u8]) -> FileId {
// First, look for the file to see if we already have it
for (idx, (fname, file_start, file_end)) in self.files().enumerate() {
if **fname == filename {
let prev_contents = self.get_span_contents(Span::new(*file_start, *file_end));
if prev_contents == contents {
return idx;
}
for (idx, cached_file) in self.files().enumerate() {
if *cached_file.name == filename && &*cached_file.content == contents {
return idx;
}
}
let next_span_start = self.next_span_start();
let next_span_end = next_span_start + contents.len();
self.delta.file_contents.push((
Arc::new(contents.to_vec()),
next_span_start,
next_span_end,
));
let covered_span = Span::new(next_span_start, next_span_end);
self.delta
.files
.push((Arc::new(filename), next_span_start, next_span_end));
self.delta.files.push(CachedFile {
name: filename.into(),
content: contents.into(),
covered_span,
});
self.num_files() - 1
}
@ -352,35 +349,31 @@ impl<'a> StateWorkingSet<'a> {
}
pub fn get_span_for_filename(&self, filename: &str) -> Option<Span> {
let (file_id, ..) = self
.files()
.enumerate()
.find(|(_, (fname, _, _))| **fname == filename)?;
let file_id = self.files().position(|file| &*file.name == filename)?;
Some(self.get_span_for_file(file_id))
}
pub fn get_span_for_file(&self, file_id: usize) -> Span {
/// Panics:
/// On invalid `FileId`
///
/// Use with care
pub fn get_span_for_file(&self, file_id: FileId) -> Span {
let result = self
.files()
.nth(file_id)
.expect("internal error: could not find source for previously parsed file");
Span::new(result.1, result.2)
result.covered_span
}
pub fn get_span_contents(&self, span: Span) -> &[u8] {
let permanent_end = self.permanent_state.next_span_start();
if permanent_end <= span.start {
for (contents, start, finish) in &self.delta.file_contents {
if (span.start >= *start) && (span.end <= *finish) {
let begin = span.start - start;
let mut end = span.end - start;
if begin > end {
end = *finish - permanent_end;
}
return &contents[begin..end];
for cached_file in &self.delta.files {
if cached_file.covered_span.contains_span(span) {
return &cached_file.content[span.start - cached_file.covered_span.start
..span.end - cached_file.covered_span.start];
}
}
}
@ -1033,19 +1026,24 @@ impl<'a> miette::SourceCode for &StateWorkingSet<'a> {
let finding_span = "Finding span in StateWorkingSet";
dbg!(finding_span, span);
}
for (filename, start, end) in self.files() {
for cached_file in self.files() {
let (filename, start, end) = (
&cached_file.name,
cached_file.covered_span.start,
cached_file.covered_span.end,
);
if debugging {
dbg!(&filename, start, end);
}
if span.offset() >= *start && span.offset() + span.len() <= *end {
if span.offset() >= start && span.offset() + span.len() <= end {
if debugging {
let found_file = "Found matching file";
dbg!(found_file);
}
let our_span = Span::new(*start, *end);
let our_span = cached_file.covered_span;
// We need to move to a local span because we're only reading
// the specific file contents via self.get_span_contents.
let local_span = (span.offset() - *start, span.len()).into();
let local_span = (span.offset() - start, span.len()).into();
if debugging {
dbg!(&local_span);
}
@ -1066,7 +1064,7 @@ impl<'a> miette::SourceCode for &StateWorkingSet<'a> {
}
let data = span_contents.data();
if **filename == "<cli>" {
if &**filename == "<cli>" {
if debugging {
let success_cli = "Successfully read CLI span";
dbg!(success_cli, String::from_utf8_lossy(data));
@ -1084,7 +1082,7 @@ impl<'a> miette::SourceCode for &StateWorkingSet<'a> {
dbg!(success_file);
}
return Ok(Box::new(miette::MietteSpanContents::new_named(
(**filename).clone(),
(**filename).to_owned(),
data,
retranslated,
span_contents.line(),

View file

@ -160,14 +160,14 @@ pub fn goto_def(engine_state: &mut EngineState, file_path: &str, location: &Valu
let block = working_set.get_block(block_id);
if let Some(span) = &block.span {
for file in working_set.files() {
if span.start >= file.1 && span.start < file.2 {
if file.covered_span.contains(span.start) {
println!(
"{}",
json!(
{
"file": &**file.0,
"start": span.start - file.1,
"end": span.end - file.1
"file": &*file.name,
"start": span.start - file.covered_span.start,
"end": span.end - file.covered_span.start,
}
)
);
@ -180,14 +180,14 @@ pub fn goto_def(engine_state: &mut EngineState, file_path: &str, location: &Valu
Some((Id::Variable(var_id), ..)) => {
let var = working_set.get_variable(var_id);
for file in working_set.files() {
if var.declaration_span.start >= file.1 && var.declaration_span.start < file.2 {
if file.covered_span.contains(var.declaration_span.start) {
println!(
"{}",
json!(
{
"file": &**file.0,
"start": var.declaration_span.start - file.1,
"end": var.declaration_span.end - file.1
"file": &*file.name,
"start": var.declaration_span.start - file.covered_span.start,
"end": var.declaration_span.end - file.covered_span.start,
}
)
);