Merge pull request #2 from jonathandturner/revert-1-checkpiont

Revert "Removed file_id in Span, compact file sources"
This commit is contained in:
JT 2021-07-03 15:34:17 +12:00 committed by GitHub
commit 03ce896f6f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 114 additions and 80 deletions

View file

@ -51,7 +51,11 @@ fn is_item_terminator(block_level: &[BlockKind], c: u8) -> bool {
&& (c == b' ' || c == b'\t' || c == b'\n' || c == b'|' || c == b';' || c == b'#') && (c == b' ' || c == b'\t' || c == b'\n' || c == b'|' || c == b';' || c == b'#')
} }
pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseError>) { pub fn lex_item(
input: &[u8],
curr_offset: &mut usize,
file_id: usize,
) -> (Span, Option<ParseError>) {
// This variable tracks the starting character of a string literal, so that // This variable tracks the starting character of a string literal, so that
// we remain inside the string literal lexer mode until we encounter the // we remain inside the string literal lexer mode until we encounter the
// closing quote. // closing quote.
@ -133,7 +137,7 @@ pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseErr
*curr_offset += 1; *curr_offset += 1;
} }
let span = Span::new(token_start, *curr_offset); let span = Span::new(token_start, *curr_offset, file_id);
// If there is still unclosed opening delimiters, close them and add // If there is still unclosed opening delimiters, close them and add
// synthetic closing characters to the accumulated token. // synthetic closing characters to the accumulated token.
@ -167,6 +171,7 @@ pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseErr
pub fn lex( pub fn lex(
input: &[u8], input: &[u8],
file_id: usize,
span_offset: usize, span_offset: usize,
lex_mode: LexMode, lex_mode: LexMode,
) -> (Vec<Token>, Option<ParseError>) { ) -> (Vec<Token>, Option<ParseError>) {
@ -193,7 +198,7 @@ pub fn lex(
curr_offset += 1; curr_offset += 1;
output.push(Token::new( output.push(Token::new(
TokenContents::Item, TokenContents::Item,
Span::new(span_offset + prev_idx, span_offset + idx + 1), Span::new(span_offset + prev_idx, span_offset + idx + 1, file_id),
)); ));
continue; continue;
} }
@ -202,7 +207,7 @@ pub fn lex(
// Otherwise, it's just a regular `|` token. // Otherwise, it's just a regular `|` token.
output.push(Token::new( output.push(Token::new(
TokenContents::Pipe, TokenContents::Pipe,
Span::new(span_offset + idx, span_offset + idx + 1), Span::new(span_offset + idx, span_offset + idx + 1, file_id),
)); ));
is_complete = false; is_complete = false;
} else if c == b';' { } else if c == b';' {
@ -212,13 +217,14 @@ pub fn lex(
error = Some(ParseError::ExtraTokens(Span::new( error = Some(ParseError::ExtraTokens(Span::new(
curr_offset, curr_offset,
curr_offset + 1, curr_offset + 1,
file_id,
))); )));
} }
let idx = curr_offset; let idx = curr_offset;
curr_offset += 1; curr_offset += 1;
output.push(Token::new( output.push(Token::new(
TokenContents::Semicolon, TokenContents::Semicolon,
Span::new(idx, idx + 1), Span::new(idx, idx + 1, file_id),
)); ));
} else if c == b'\n' || c == b'\r' { } else if c == b'\n' || c == b'\r' {
// If the next character is a newline, we're looking at an EOL (end of line) token. // If the next character is a newline, we're looking at an EOL (end of line) token.
@ -226,7 +232,10 @@ pub fn lex(
let idx = curr_offset; let idx = curr_offset;
curr_offset += 1; curr_offset += 1;
if lex_mode == LexMode::Normal { if lex_mode == LexMode::Normal {
output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1))); output.push(Token::new(
TokenContents::Eol,
Span::new(idx, idx + 1, file_id),
));
} }
} else if c == b'#' { } else if c == b'#' {
// If the next character is `#`, we're at the beginning of a line // If the next character is `#`, we're at the beginning of a line
@ -238,7 +247,7 @@ pub fn lex(
if *input == b'\n' { if *input == b'\n' {
output.push(Token::new( output.push(Token::new(
TokenContents::Comment, TokenContents::Comment,
Span::new(start, curr_offset), Span::new(start, curr_offset, file_id),
)); ));
start = curr_offset; start = curr_offset;
@ -248,7 +257,7 @@ pub fn lex(
if start != curr_offset { if start != curr_offset {
output.push(Token::new( output.push(Token::new(
TokenContents::Comment, TokenContents::Comment,
Span::new(start, curr_offset), Span::new(start, curr_offset, file_id),
)); ));
} }
} else if c == b' ' || c == b'\t' { } else if c == b' ' || c == b'\t' {
@ -257,7 +266,7 @@ pub fn lex(
} else { } else {
// Otherwise, try to consume an unclassified token. // Otherwise, try to consume an unclassified token.
let (span, err) = lex_item(input, &mut curr_offset); let (span, err) = lex_item(input, &mut curr_offset, file_id);
if error.is_none() { if error.is_none() {
error = err; error = err;
} }
@ -276,7 +285,7 @@ mod lex_tests {
fn lex_basic() { fn lex_basic() {
let file = b"let x = 4"; let file = b"let x = 4";
let output = lex(file, 0, LexMode::Normal); let output = lex(file, 0, 0, LexMode::Normal);
assert!(output.1.is_none()); assert!(output.1.is_none());
} }
@ -285,12 +294,16 @@ mod lex_tests {
fn lex_newline() { fn lex_newline() {
let file = b"let x = 300\nlet y = 500;"; let file = b"let x = 300\nlet y = 500;";
let output = lex(file, 0, LexMode::Normal); let output = lex(file, 0, 0, LexMode::Normal);
println!("{:#?}", output.0); println!("{:#?}", output.0);
assert!(output.0.contains(&Token { assert!(output.0.contains(&Token {
contents: TokenContents::Eol, contents: TokenContents::Eol,
span: Span { start: 11, end: 12 } span: Span {
start: 11,
end: 12,
file_id: 0
}
})); }));
} }
@ -298,7 +311,7 @@ mod lex_tests {
fn lex_empty() { fn lex_empty() {
let file = b""; let file = b"";
let output = lex(file, 0, LexMode::Normal); let output = lex(file, 0, 0, LexMode::Normal);
assert!(output.0.is_empty()); assert!(output.0.is_empty());
assert!(output.1.is_none()); assert!(output.1.is_none());

View file

@ -128,7 +128,7 @@ mod tests {
use crate::{lex, lite_parse, LiteBlock, ParseError, Span}; use crate::{lex, lite_parse, LiteBlock, ParseError, Span};
fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> { fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> {
let (output, err) = lex(input, 0, crate::LexMode::Normal); let (output, err) = lex(input, 0, 0, crate::LexMode::Normal);
if let Some(err) = err { if let Some(err) = err {
return Err(err); return Err(err);
} }
@ -194,7 +194,11 @@ mod tests {
assert_eq!(lite_block.block[0].commands[0].comments.len(), 1); assert_eq!(lite_block.block[0].commands[0].comments.len(), 1);
assert_eq!( assert_eq!(
lite_block.block[0].commands[0].comments[0], lite_block.block[0].commands[0].comments[0],
Span { start: 21, end: 39 } Span {
start: 21,
end: 39,
file_id: 0
}
); );
assert_eq!(lite_block.block[0].commands[0].parts.len(), 3); assert_eq!(lite_block.block[0].commands[0].parts.len(), 3);

View file

@ -8,17 +8,10 @@ fn main() -> std::io::Result<()> {
working_set.add_decl((b"foo").to_vec(), sig); working_set.add_decl((b"foo").to_vec(), sig);
let file = std::fs::read(&path)?; let file = std::fs::read(&path)?;
let (output, err) = working_set.parse_file(&path, file); let (output, err) = working_set.parse_file(&path, &file);
//let (output, err) = working_set.parse_source(path.as_bytes()); //let (output, err) = working_set.parse_source(path.as_bytes());
println!("{}", output.len()); println!("{}", output.len());
println!("error: {:?}", err); println!("error: {:?}", err);
// println!("{}", size_of::<Statement>());
// let mut buffer = String::new();
// let stdin = std::io::stdin();
// let mut handle = stdin.lock();
// handle.read_to_string(&mut buffer)?;
Ok(()) Ok(())
} else { } else {

View file

@ -109,6 +109,7 @@ pub enum Expr {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Expression { pub struct Expression {
expr: Expr, expr: Expr,
ty: Type,
span: Span, span: Span,
} }
impl Expression { impl Expression {
@ -116,7 +117,7 @@ impl Expression {
Expression { Expression {
expr: Expr::Garbage, expr: Expr::Garbage,
span, span,
//ty: Type::Unknown, ty: Type::Unknown,
} }
} }
pub fn precedence(&self) -> usize { pub fn precedence(&self) -> usize {
@ -263,12 +264,13 @@ fn span(spans: &[Span]) -> Span {
if length == 0 { if length == 0 {
Span::unknown() Span::unknown()
} else if length == 1 { } else if length == 1 || spans[0].file_id != spans[length - 1].file_id {
spans[0] spans[0]
} else { } else {
Span { Span {
start: spans[0].start, start: spans[0].start,
end: spans[length - 1].end, end: spans[length - 1].end,
file_id: spans[0].file_id,
} }
} }
} }
@ -340,6 +342,7 @@ impl ParserWorkingSet {
let short_flag_span = Span { let short_flag_span = Span {
start: orig.start + 1 + short_flag.0, start: orig.start + 1 + short_flag.0,
end: orig.start + 1 + short_flag.0 + 1, end: orig.start + 1 + short_flag.0 + 1,
file_id: orig.file_id,
}; };
if let Some(flag) = sig.get_short_flag(short_flag_char) { if let Some(flag) = sig.get_short_flag(short_flag_char) {
// If we require an arg and are in a batch of short flags, error // If we require an arg and are in a batch of short flags, error
@ -416,7 +419,7 @@ impl ParserWorkingSet {
( (
Expression { Expression {
expr: Expr::Call(Box::new(call)), expr: Expr::Call(Box::new(call)),
//ty: Type::Unknown, ty: Type::Unknown,
span: span(spans), span: span(spans),
}, },
error, error,
@ -432,6 +435,7 @@ impl ParserWorkingSet {
( (
Expression { Expression {
expr: Expr::Int(v), expr: Expr::Int(v),
ty: Type::Int,
span, span,
}, },
None, None,
@ -447,6 +451,7 @@ impl ParserWorkingSet {
( (
Expression { Expression {
expr: Expr::Int(v), expr: Expr::Int(v),
ty: Type::Int,
span, span,
}, },
None, None,
@ -462,6 +467,7 @@ impl ParserWorkingSet {
( (
Expression { Expression {
expr: Expr::Int(v), expr: Expr::Int(v),
ty: Type::Int,
span, span,
}, },
None, None,
@ -476,6 +482,7 @@ impl ParserWorkingSet {
( (
Expression { Expression {
expr: Expr::Int(x), expr: Expr::Int(x),
ty: Type::Int,
span, span,
}, },
None, None,
@ -503,9 +510,14 @@ impl ParserWorkingSet {
let bytes = self.get_span_contents(span); let bytes = self.get_span_contents(span);
if let Some(var_id) = self.find_variable(bytes) { if let Some(var_id) = self.find_variable(bytes) {
let ty = *self
.get_variable(var_id)
.expect("internal error: invalid VarId");
( (
Expression { Expression {
expr: Expr::Var(var_id), expr: Expr::Var(var_id),
ty,
span, span,
}, },
None, None,
@ -535,16 +547,21 @@ impl ParserWorkingSet {
Span { Span {
start: end, start: end,
end: end + 1, end: end + 1,
file_id: span.file_id,
}, },
)) ))
}); });
} }
let span = Span { start, end }; let span = Span {
start,
end,
file_id: span.file_id,
};
let source = self.get_span_contents(span); let source = self.get_file_contents(span.file_id);
let (output, err) = lex(&source, start, crate::LexMode::Normal); let (output, err) = lex(&source[..end], span.file_id, start, crate::LexMode::Normal);
error = error.or(err); error = error.or(err);
let (output, err) = lite_parse(&output); let (output, err) = lite_parse(&output);
@ -556,6 +573,7 @@ impl ParserWorkingSet {
( (
Expression { Expression {
expr: Expr::Subexpression(Box::new(output)), expr: Expr::Subexpression(Box::new(output)),
ty: Type::Unknown,
span, span,
}, },
error, error,
@ -581,16 +599,21 @@ impl ParserWorkingSet {
Span { Span {
start: end, start: end,
end: end + 1, end: end + 1,
file_id: span.file_id,
}, },
)) ))
}); });
} }
let span = Span { start, end }; let span = Span {
start,
end,
file_id: span.file_id,
};
let source = &self.file_contents[..end]; let source = self.get_file_contents(span.file_id);
let (output, err) = lex(&source, start, crate::LexMode::Normal); let (output, err) = lex(&source[..end], span.file_id, start, crate::LexMode::Normal);
error = error.or(err); error = error.or(err);
let (output, err) = lite_parse(&output); let (output, err) = lite_parse(&output);
@ -604,6 +627,7 @@ impl ParserWorkingSet {
( (
Expression { Expression {
expr: Expr::Block(Box::new(output)), expr: Expr::Block(Box::new(output)),
ty: Type::Unknown,
span, span,
}, },
error, error,
@ -714,6 +738,7 @@ impl ParserWorkingSet {
( (
Expression { Expression {
expr: Expr::Operator(operator), expr: Expr::Operator(operator),
ty: Type::Unknown,
span, span,
}, },
None, None,
@ -778,6 +803,7 @@ impl ParserWorkingSet {
expr_stack.push(Expression { expr_stack.push(Expression {
expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)), expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)),
span: op_span, span: op_span,
ty: Type::Unknown,
}); });
} }
} }
@ -803,6 +829,7 @@ impl ParserWorkingSet {
let binary_op_span = span(&[lhs.span, rhs.span]); let binary_op_span = span(&[lhs.span, rhs.span]);
expr_stack.push(Expression { expr_stack.push(Expression {
expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)), expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)),
ty: Type::Unknown,
span: binary_op_span, span: binary_op_span,
}); });
} }
@ -862,7 +889,7 @@ impl ParserWorkingSet {
error = error.or(err); error = error.or(err);
let var_name: Vec<_> = self.get_span_contents(spans[1]).into(); let var_name: Vec<_> = self.get_span_contents(spans[1]).into();
let var_id = self.add_variable(var_name, Type::Unknown); let var_id = self.add_variable(var_name, expression.ty);
(Statement::VarDecl(VarDecl { var_id, expression }), error) (Statement::VarDecl(VarDecl { var_id, expression }), error)
} else { } else {
@ -901,13 +928,13 @@ impl ParserWorkingSet {
(block, error) (block, error)
} }
pub fn parse_file(&mut self, fname: &str, contents: Vec<u8>) -> (Block, Option<ParseError>) { pub fn parse_file(&mut self, fname: &str, contents: &[u8]) -> (Block, Option<ParseError>) {
let mut error = None; let mut error = None;
let (output, err) = lex(&contents, 0, crate::LexMode::Normal); let file_id = self.add_file(fname.into(), contents.into());
error = error.or(err);
self.add_file(fname.into(), contents); let (output, err) = lex(contents, file_id, 0, crate::LexMode::Normal);
error = error.or(err);
let (output, err) = lite_parse(&output); let (output, err) = lite_parse(&output);
error = error.or(err); error = error.or(err);
@ -921,9 +948,9 @@ impl ParserWorkingSet {
pub fn parse_source(&mut self, source: &[u8]) -> (Block, Option<ParseError>) { pub fn parse_source(&mut self, source: &[u8]) -> (Block, Option<ParseError>) {
let mut error = None; let mut error = None;
self.add_file("source".into(), source.into()); let file_id = self.add_file("source".into(), source.into());
let (output, err) = lex(source, 0, crate::LexMode::Normal); let (output, err) = lex(source, file_id, 0, crate::LexMode::Normal);
error = error.or(err); error = error.or(err);
let (output, err) = lite_parse(&output); let (output, err) = lite_parse(&output);

View file

@ -2,8 +2,7 @@ use crate::{Signature, Span};
use std::{collections::HashMap, sync::Arc}; use std::{collections::HashMap, sync::Arc};
pub struct ParserState { pub struct ParserState {
files: Vec<(String, usize, usize)>, files: Vec<(String, Vec<u8>)>,
file_contents: Vec<u8>,
vars: Vec<Type>, vars: Vec<Type>,
decls: Vec<Signature>, decls: Vec<Signature>,
} }
@ -42,7 +41,6 @@ impl ParserState {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
files: vec![], files: vec![],
file_contents: vec![],
vars: vec![], vars: vec![],
decls: vec![], decls: vec![],
} }
@ -55,7 +53,6 @@ impl ParserState {
// Take the mutable reference and extend the permanent state from the working set // Take the mutable reference and extend the permanent state from the working set
if let Some(this) = std::sync::Arc::<ParserState>::get_mut(this) { if let Some(this) = std::sync::Arc::<ParserState>::get_mut(this) {
this.files.extend(working_set.files); this.files.extend(working_set.files);
this.file_contents.extend(working_set.file_contents);
this.decls.extend(working_set.decls); this.decls.extend(working_set.decls);
this.vars.extend(working_set.vars); this.vars.extend(working_set.vars);
@ -85,27 +82,20 @@ impl ParserState {
self.decls.get(decl_id) self.decls.get(decl_id)
} }
pub fn next_span_start(&self) -> usize {
self.file_contents.len()
}
#[allow(unused)] #[allow(unused)]
pub(crate) fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize { pub(crate) fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize {
let next_span_start = self.next_span_start(); self.files.push((filename, contents));
self.file_contents.extend(&contents);
let next_span_end = self.next_span_start();
self.files.push((filename, next_span_start, next_span_end));
self.num_files() - 1 self.num_files() - 1
} }
pub(crate) fn get_file_contents(&self, idx: usize) -> &[u8] {
&self.files[idx].1
}
} }
pub struct ParserWorkingSet { pub struct ParserWorkingSet {
files: Vec<(String, usize, usize)>, files: Vec<(String, Vec<u8>)>,
pub(crate) file_contents: Vec<u8>,
vars: Vec<Type>, // indexed by VarId vars: Vec<Type>, // indexed by VarId
decls: Vec<Signature>, // indexed by DeclId decls: Vec<Signature>, // indexed by DeclId
permanent_state: Option<Arc<ParserState>>, permanent_state: Option<Arc<ParserState>>,
@ -116,7 +106,6 @@ impl ParserWorkingSet {
pub fn new(permanent_state: Option<Arc<ParserState>>) -> Self { pub fn new(permanent_state: Option<Arc<ParserState>>) -> Self {
Self { Self {
files: vec![], files: vec![],
file_contents: vec![],
vars: vec![], vars: vec![],
decls: vec![], decls: vec![],
permanent_state, permanent_state,
@ -148,36 +137,35 @@ impl ParserWorkingSet {
decl_id decl_id
} }
pub fn next_span_start(&self) -> usize {
if let Some(permanent_state) = &self.permanent_state {
permanent_state.next_span_start() + self.file_contents.len()
} else {
self.file_contents.len()
}
}
pub fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize { pub fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize {
let next_span_start = self.next_span_start(); self.files.push((filename, contents));
self.file_contents.extend(&contents);
let next_span_end = self.next_span_start();
self.files.push((filename, next_span_start, next_span_end));
self.num_files() - 1 self.num_files() - 1
} }
pub fn get_span_contents(&self, span: Span) -> &[u8] { pub fn get_span_contents(&self, span: Span) -> &[u8] {
if let Some(permanent_state) = &self.permanent_state { if let Some(permanent_state) = &self.permanent_state {
let permanent_end = permanent_state.next_span_start(); let num_permanent_files = permanent_state.num_files();
if permanent_end <= span.start { if span.file_id < num_permanent_files {
&self.file_contents[(span.start - permanent_end)..(span.end - permanent_end)] &permanent_state.get_file_contents(span.file_id)[span.start..span.end]
} else { } else {
&permanent_state.file_contents[span.start..span.end] &self.files[span.file_id - num_permanent_files].1[span.start..span.end]
} }
} else { } else {
&self.file_contents[span.start..span.end] &self.files[span.file_id].1[span.start..span.end]
}
}
pub fn get_file_contents(&self, file_id: usize) -> &[u8] {
if let Some(permanent_state) = &self.permanent_state {
let num_permanent_files = permanent_state.num_files();
if file_id < num_permanent_files {
&permanent_state.get_file_contents(file_id)
} else {
&self.files[file_id - num_permanent_files].1
}
} else {
&self.files[file_id].1
} }
} }

View file

@ -2,14 +2,23 @@
pub struct Span { pub struct Span {
pub start: usize, pub start: usize,
pub end: usize, pub end: usize,
pub file_id: usize,
} }
impl Span { impl Span {
pub fn new(start: usize, end: usize) -> Span { pub fn new(start: usize, end: usize, file_id: usize) -> Span {
Span { start, end } Span {
start,
end,
file_id,
}
} }
pub fn unknown() -> Span { pub fn unknown() -> Span {
Span { start: 0, end: 0 } Span {
start: usize::MAX,
end: usize::MAX,
file_id: usize::MAX,
}
} }
} }