From 80e0cd4e006735b7cbf84dc713da253a0a339708 Mon Sep 17 00:00:00 2001 From: JT Date: Sat, 3 Jul 2021 15:11:24 +1200 Subject: [PATCH] Revert "Removed file_id in Span, compact file sources" --- src/lex.rs | 39 ++++++++++++++++++--------- src/lite_parse.rs | 8 ++++-- src/main.rs | 9 +------ src/parser.rs | 59 +++++++++++++++++++++++++++++------------ src/parser_state.rs | 64 ++++++++++++++++++--------------------------- src/span.rs | 15 ++++++++--- 6 files changed, 114 insertions(+), 80 deletions(-) diff --git a/src/lex.rs b/src/lex.rs index b10f67055f..84f91f8cc0 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -51,7 +51,11 @@ fn is_item_terminator(block_level: &[BlockKind], c: u8) -> bool { && (c == b' ' || c == b'\t' || c == b'\n' || c == b'|' || c == b';' || c == b'#') } -pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option) { +pub fn lex_item( + input: &[u8], + curr_offset: &mut usize, + file_id: usize, +) -> (Span, Option) { // This variable tracks the starting character of a string literal, so that // we remain inside the string literal lexer mode until we encounter the // closing quote. @@ -133,7 +137,7 @@ pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option (Span, Option (Vec, Option) { @@ -193,7 +198,7 @@ pub fn lex( curr_offset += 1; output.push(Token::new( TokenContents::Item, - Span::new(span_offset + prev_idx, span_offset + idx + 1), + Span::new(span_offset + prev_idx, span_offset + idx + 1, file_id), )); continue; } @@ -202,7 +207,7 @@ pub fn lex( // Otherwise, it's just a regular `|` token. output.push(Token::new( TokenContents::Pipe, - Span::new(span_offset + idx, span_offset + idx + 1), + Span::new(span_offset + idx, span_offset + idx + 1, file_id), )); is_complete = false; } else if c == b';' { @@ -212,13 +217,14 @@ pub fn lex( error = Some(ParseError::ExtraTokens(Span::new( curr_offset, curr_offset + 1, + file_id, ))); } let idx = curr_offset; curr_offset += 1; output.push(Token::new( TokenContents::Semicolon, - Span::new(idx, idx + 1), + Span::new(idx, idx + 1, file_id), )); } else if c == b'\n' || c == b'\r' { // If the next character is a newline, we're looking at an EOL (end of line) token. @@ -226,7 +232,10 @@ pub fn lex( let idx = curr_offset; curr_offset += 1; if lex_mode == LexMode::Normal { - output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1))); + output.push(Token::new( + TokenContents::Eol, + Span::new(idx, idx + 1, file_id), + )); } } else if c == b'#' { // If the next character is `#`, we're at the beginning of a line @@ -238,7 +247,7 @@ pub fn lex( if *input == b'\n' { output.push(Token::new( TokenContents::Comment, - Span::new(start, curr_offset), + Span::new(start, curr_offset, file_id), )); start = curr_offset; @@ -248,7 +257,7 @@ pub fn lex( if start != curr_offset { output.push(Token::new( TokenContents::Comment, - Span::new(start, curr_offset), + Span::new(start, curr_offset, file_id), )); } } else if c == b' ' || c == b'\t' { @@ -257,7 +266,7 @@ pub fn lex( } else { // Otherwise, try to consume an unclassified token. - let (span, err) = lex_item(input, &mut curr_offset); + let (span, err) = lex_item(input, &mut curr_offset, file_id); if error.is_none() { error = err; } @@ -276,7 +285,7 @@ mod lex_tests { fn lex_basic() { let file = b"let x = 4"; - let output = lex(file, 0, LexMode::Normal); + let output = lex(file, 0, 0, LexMode::Normal); assert!(output.1.is_none()); } @@ -285,12 +294,16 @@ mod lex_tests { fn lex_newline() { let file = b"let x = 300\nlet y = 500;"; - let output = lex(file, 0, LexMode::Normal); + let output = lex(file, 0, 0, LexMode::Normal); println!("{:#?}", output.0); assert!(output.0.contains(&Token { contents: TokenContents::Eol, - span: Span { start: 11, end: 12 } + span: Span { + start: 11, + end: 12, + file_id: 0 + } })); } @@ -298,7 +311,7 @@ mod lex_tests { fn lex_empty() { let file = b""; - let output = lex(file, 0, LexMode::Normal); + let output = lex(file, 0, 0, LexMode::Normal); assert!(output.0.is_empty()); assert!(output.1.is_none()); diff --git a/src/lite_parse.rs b/src/lite_parse.rs index 9e3e15a3d9..a3f2d9e0cb 100644 --- a/src/lite_parse.rs +++ b/src/lite_parse.rs @@ -128,7 +128,7 @@ mod tests { use crate::{lex, lite_parse, LiteBlock, ParseError, Span}; fn lite_parse_helper(input: &[u8]) -> Result { - let (output, err) = lex(input, 0, crate::LexMode::Normal); + let (output, err) = lex(input, 0, 0, crate::LexMode::Normal); if let Some(err) = err { return Err(err); } @@ -194,7 +194,11 @@ mod tests { assert_eq!(lite_block.block[0].commands[0].comments.len(), 1); assert_eq!( lite_block.block[0].commands[0].comments[0], - Span { start: 21, end: 39 } + Span { + start: 21, + end: 39, + file_id: 0 + } ); assert_eq!(lite_block.block[0].commands[0].parts.len(), 3); diff --git a/src/main.rs b/src/main.rs index e0e2b085d7..4c39ef36e0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,17 +8,10 @@ fn main() -> std::io::Result<()> { working_set.add_decl((b"foo").to_vec(), sig); let file = std::fs::read(&path)?; - let (output, err) = working_set.parse_file(&path, file); + let (output, err) = working_set.parse_file(&path, &file); //let (output, err) = working_set.parse_source(path.as_bytes()); println!("{}", output.len()); println!("error: {:?}", err); - // println!("{}", size_of::()); - - // let mut buffer = String::new(); - // let stdin = std::io::stdin(); - // let mut handle = stdin.lock(); - - // handle.read_to_string(&mut buffer)?; Ok(()) } else { diff --git a/src/parser.rs b/src/parser.rs index 7baa5a2d78..07b010f446 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -109,6 +109,7 @@ pub enum Expr { #[derive(Debug, Clone)] pub struct Expression { expr: Expr, + ty: Type, span: Span, } impl Expression { @@ -116,7 +117,7 @@ impl Expression { Expression { expr: Expr::Garbage, span, - //ty: Type::Unknown, + ty: Type::Unknown, } } pub fn precedence(&self) -> usize { @@ -263,12 +264,13 @@ fn span(spans: &[Span]) -> Span { if length == 0 { Span::unknown() - } else if length == 1 { + } else if length == 1 || spans[0].file_id != spans[length - 1].file_id { spans[0] } else { Span { start: spans[0].start, end: spans[length - 1].end, + file_id: spans[0].file_id, } } } @@ -340,6 +342,7 @@ impl ParserWorkingSet { let short_flag_span = Span { start: orig.start + 1 + short_flag.0, end: orig.start + 1 + short_flag.0 + 1, + file_id: orig.file_id, }; if let Some(flag) = sig.get_short_flag(short_flag_char) { // If we require an arg and are in a batch of short flags, error @@ -416,7 +419,7 @@ impl ParserWorkingSet { ( Expression { expr: Expr::Call(Box::new(call)), - //ty: Type::Unknown, + ty: Type::Unknown, span: span(spans), }, error, @@ -432,6 +435,7 @@ impl ParserWorkingSet { ( Expression { expr: Expr::Int(v), + ty: Type::Int, span, }, None, @@ -447,6 +451,7 @@ impl ParserWorkingSet { ( Expression { expr: Expr::Int(v), + ty: Type::Int, span, }, None, @@ -462,6 +467,7 @@ impl ParserWorkingSet { ( Expression { expr: Expr::Int(v), + ty: Type::Int, span, }, None, @@ -476,6 +482,7 @@ impl ParserWorkingSet { ( Expression { expr: Expr::Int(x), + ty: Type::Int, span, }, None, @@ -503,9 +510,14 @@ impl ParserWorkingSet { let bytes = self.get_span_contents(span); if let Some(var_id) = self.find_variable(bytes) { + let ty = *self + .get_variable(var_id) + .expect("internal error: invalid VarId"); + ( Expression { expr: Expr::Var(var_id), + ty, span, }, None, @@ -535,16 +547,21 @@ impl ParserWorkingSet { Span { start: end, end: end + 1, + file_id: span.file_id, }, )) }); } - let span = Span { start, end }; + let span = Span { + start, + end, + file_id: span.file_id, + }; - let source = self.get_span_contents(span); + let source = self.get_file_contents(span.file_id); - let (output, err) = lex(&source, start, crate::LexMode::Normal); + let (output, err) = lex(&source[..end], span.file_id, start, crate::LexMode::Normal); error = error.or(err); let (output, err) = lite_parse(&output); @@ -556,6 +573,7 @@ impl ParserWorkingSet { ( Expression { expr: Expr::Subexpression(Box::new(output)), + ty: Type::Unknown, span, }, error, @@ -581,16 +599,21 @@ impl ParserWorkingSet { Span { start: end, end: end + 1, + file_id: span.file_id, }, )) }); } - let span = Span { start, end }; + let span = Span { + start, + end, + file_id: span.file_id, + }; - let source = &self.file_contents[..end]; + let source = self.get_file_contents(span.file_id); - let (output, err) = lex(&source, start, crate::LexMode::Normal); + let (output, err) = lex(&source[..end], span.file_id, start, crate::LexMode::Normal); error = error.or(err); let (output, err) = lite_parse(&output); @@ -604,6 +627,7 @@ impl ParserWorkingSet { ( Expression { expr: Expr::Block(Box::new(output)), + ty: Type::Unknown, span, }, error, @@ -714,6 +738,7 @@ impl ParserWorkingSet { ( Expression { expr: Expr::Operator(operator), + ty: Type::Unknown, span, }, None, @@ -778,6 +803,7 @@ impl ParserWorkingSet { expr_stack.push(Expression { expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)), span: op_span, + ty: Type::Unknown, }); } } @@ -803,6 +829,7 @@ impl ParserWorkingSet { let binary_op_span = span(&[lhs.span, rhs.span]); expr_stack.push(Expression { expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)), + ty: Type::Unknown, span: binary_op_span, }); } @@ -862,7 +889,7 @@ impl ParserWorkingSet { error = error.or(err); let var_name: Vec<_> = self.get_span_contents(spans[1]).into(); - let var_id = self.add_variable(var_name, Type::Unknown); + let var_id = self.add_variable(var_name, expression.ty); (Statement::VarDecl(VarDecl { var_id, expression }), error) } else { @@ -901,13 +928,13 @@ impl ParserWorkingSet { (block, error) } - pub fn parse_file(&mut self, fname: &str, contents: Vec) -> (Block, Option) { + pub fn parse_file(&mut self, fname: &str, contents: &[u8]) -> (Block, Option) { let mut error = None; - let (output, err) = lex(&contents, 0, crate::LexMode::Normal); - error = error.or(err); + let file_id = self.add_file(fname.into(), contents.into()); - self.add_file(fname.into(), contents); + let (output, err) = lex(contents, file_id, 0, crate::LexMode::Normal); + error = error.or(err); let (output, err) = lite_parse(&output); error = error.or(err); @@ -921,9 +948,9 @@ impl ParserWorkingSet { pub fn parse_source(&mut self, source: &[u8]) -> (Block, Option) { let mut error = None; - self.add_file("source".into(), source.into()); + let file_id = self.add_file("source".into(), source.into()); - let (output, err) = lex(source, 0, crate::LexMode::Normal); + let (output, err) = lex(source, file_id, 0, crate::LexMode::Normal); error = error.or(err); let (output, err) = lite_parse(&output); diff --git a/src/parser_state.rs b/src/parser_state.rs index e37260736e..bd0dcfb41f 100644 --- a/src/parser_state.rs +++ b/src/parser_state.rs @@ -2,8 +2,7 @@ use crate::{Signature, Span}; use std::{collections::HashMap, sync::Arc}; pub struct ParserState { - files: Vec<(String, usize, usize)>, - file_contents: Vec, + files: Vec<(String, Vec)>, vars: Vec, decls: Vec, } @@ -42,7 +41,6 @@ impl ParserState { pub fn new() -> Self { Self { files: vec![], - file_contents: vec![], vars: vec![], decls: vec![], } @@ -55,7 +53,6 @@ impl ParserState { // Take the mutable reference and extend the permanent state from the working set if let Some(this) = std::sync::Arc::::get_mut(this) { this.files.extend(working_set.files); - this.file_contents.extend(working_set.file_contents); this.decls.extend(working_set.decls); this.vars.extend(working_set.vars); @@ -85,27 +82,20 @@ impl ParserState { self.decls.get(decl_id) } - pub fn next_span_start(&self) -> usize { - self.file_contents.len() - } - #[allow(unused)] pub(crate) fn add_file(&mut self, filename: String, contents: Vec) -> usize { - let next_span_start = self.next_span_start(); - - self.file_contents.extend(&contents); - - let next_span_end = self.next_span_start(); - - self.files.push((filename, next_span_start, next_span_end)); + self.files.push((filename, contents)); self.num_files() - 1 } + + pub(crate) fn get_file_contents(&self, idx: usize) -> &[u8] { + &self.files[idx].1 + } } pub struct ParserWorkingSet { - files: Vec<(String, usize, usize)>, - pub(crate) file_contents: Vec, + files: Vec<(String, Vec)>, vars: Vec, // indexed by VarId decls: Vec, // indexed by DeclId permanent_state: Option>, @@ -116,7 +106,6 @@ impl ParserWorkingSet { pub fn new(permanent_state: Option>) -> Self { Self { files: vec![], - file_contents: vec![], vars: vec![], decls: vec![], permanent_state, @@ -148,36 +137,35 @@ impl ParserWorkingSet { decl_id } - pub fn next_span_start(&self) -> usize { - if let Some(permanent_state) = &self.permanent_state { - permanent_state.next_span_start() + self.file_contents.len() - } else { - self.file_contents.len() - } - } - pub fn add_file(&mut self, filename: String, contents: Vec) -> usize { - let next_span_start = self.next_span_start(); - - self.file_contents.extend(&contents); - - let next_span_end = self.next_span_start(); - - self.files.push((filename, next_span_start, next_span_end)); + self.files.push((filename, contents)); self.num_files() - 1 } pub fn get_span_contents(&self, span: Span) -> &[u8] { if let Some(permanent_state) = &self.permanent_state { - let permanent_end = permanent_state.next_span_start(); - if permanent_end <= span.start { - &self.file_contents[(span.start - permanent_end)..(span.end - permanent_end)] + let num_permanent_files = permanent_state.num_files(); + if span.file_id < num_permanent_files { + &permanent_state.get_file_contents(span.file_id)[span.start..span.end] } else { - &permanent_state.file_contents[span.start..span.end] + &self.files[span.file_id - num_permanent_files].1[span.start..span.end] } } else { - &self.file_contents[span.start..span.end] + &self.files[span.file_id].1[span.start..span.end] + } + } + + pub fn get_file_contents(&self, file_id: usize) -> &[u8] { + if let Some(permanent_state) = &self.permanent_state { + let num_permanent_files = permanent_state.num_files(); + if file_id < num_permanent_files { + &permanent_state.get_file_contents(file_id) + } else { + &self.files[file_id - num_permanent_files].1 + } + } else { + &self.files[file_id].1 } } diff --git a/src/span.rs b/src/span.rs index 4d436245d0..8c3f8664e4 100644 --- a/src/span.rs +++ b/src/span.rs @@ -2,14 +2,23 @@ pub struct Span { pub start: usize, pub end: usize, + pub file_id: usize, } impl Span { - pub fn new(start: usize, end: usize) -> Span { - Span { start, end } + pub fn new(start: usize, end: usize, file_id: usize) -> Span { + Span { + start, + end, + file_id, + } } pub fn unknown() -> Span { - Span { start: 0, end: 0 } + Span { + start: usize::MAX, + end: usize::MAX, + file_id: usize::MAX, + } } }