mirror of
https://github.com/nushell/nushell
synced 2024-12-28 05:53:09 +00:00
Merge pull request #1 from jonathandturner/checkpiont
Removed file_id in Span, compact file sources
This commit is contained in:
commit
049477a9bd
6 changed files with 82 additions and 116 deletions
39
src/lex.rs
39
src/lex.rs
|
@ -51,11 +51,7 @@ fn is_item_terminator(block_level: &[BlockKind], c: u8) -> bool {
|
|||
&& (c == b' ' || c == b'\t' || c == b'\n' || c == b'|' || c == b';' || c == b'#')
|
||||
}
|
||||
|
||||
pub fn lex_item(
|
||||
input: &[u8],
|
||||
curr_offset: &mut usize,
|
||||
file_id: usize,
|
||||
) -> (Span, Option<ParseError>) {
|
||||
pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseError>) {
|
||||
// This variable tracks the starting character of a string literal, so that
|
||||
// we remain inside the string literal lexer mode until we encounter the
|
||||
// closing quote.
|
||||
|
@ -137,7 +133,7 @@ pub fn lex_item(
|
|||
*curr_offset += 1;
|
||||
}
|
||||
|
||||
let span = Span::new(token_start, *curr_offset, file_id);
|
||||
let span = Span::new(token_start, *curr_offset);
|
||||
|
||||
// If there is still unclosed opening delimiters, close them and add
|
||||
// synthetic closing characters to the accumulated token.
|
||||
|
@ -171,7 +167,6 @@ pub fn lex_item(
|
|||
|
||||
pub fn lex(
|
||||
input: &[u8],
|
||||
file_id: usize,
|
||||
span_offset: usize,
|
||||
lex_mode: LexMode,
|
||||
) -> (Vec<Token>, Option<ParseError>) {
|
||||
|
@ -198,7 +193,7 @@ pub fn lex(
|
|||
curr_offset += 1;
|
||||
output.push(Token::new(
|
||||
TokenContents::Item,
|
||||
Span::new(span_offset + prev_idx, span_offset + idx + 1, file_id),
|
||||
Span::new(span_offset + prev_idx, span_offset + idx + 1),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
@ -207,7 +202,7 @@ pub fn lex(
|
|||
// Otherwise, it's just a regular `|` token.
|
||||
output.push(Token::new(
|
||||
TokenContents::Pipe,
|
||||
Span::new(span_offset + idx, span_offset + idx + 1, file_id),
|
||||
Span::new(span_offset + idx, span_offset + idx + 1),
|
||||
));
|
||||
is_complete = false;
|
||||
} else if c == b';' {
|
||||
|
@ -217,14 +212,13 @@ pub fn lex(
|
|||
error = Some(ParseError::ExtraTokens(Span::new(
|
||||
curr_offset,
|
||||
curr_offset + 1,
|
||||
file_id,
|
||||
)));
|
||||
}
|
||||
let idx = curr_offset;
|
||||
curr_offset += 1;
|
||||
output.push(Token::new(
|
||||
TokenContents::Semicolon,
|
||||
Span::new(idx, idx + 1, file_id),
|
||||
Span::new(idx, idx + 1),
|
||||
));
|
||||
} else if c == b'\n' || c == b'\r' {
|
||||
// If the next character is a newline, we're looking at an EOL (end of line) token.
|
||||
|
@ -232,10 +226,7 @@ pub fn lex(
|
|||
let idx = curr_offset;
|
||||
curr_offset += 1;
|
||||
if lex_mode == LexMode::Normal {
|
||||
output.push(Token::new(
|
||||
TokenContents::Eol,
|
||||
Span::new(idx, idx + 1, file_id),
|
||||
));
|
||||
output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1)));
|
||||
}
|
||||
} else if c == b'#' {
|
||||
// If the next character is `#`, we're at the beginning of a line
|
||||
|
@ -247,7 +238,7 @@ pub fn lex(
|
|||
if *input == b'\n' {
|
||||
output.push(Token::new(
|
||||
TokenContents::Comment,
|
||||
Span::new(start, curr_offset, file_id),
|
||||
Span::new(start, curr_offset),
|
||||
));
|
||||
start = curr_offset;
|
||||
|
||||
|
@ -257,7 +248,7 @@ pub fn lex(
|
|||
if start != curr_offset {
|
||||
output.push(Token::new(
|
||||
TokenContents::Comment,
|
||||
Span::new(start, curr_offset, file_id),
|
||||
Span::new(start, curr_offset),
|
||||
));
|
||||
}
|
||||
} else if c == b' ' || c == b'\t' {
|
||||
|
@ -266,7 +257,7 @@ pub fn lex(
|
|||
} else {
|
||||
// Otherwise, try to consume an unclassified token.
|
||||
|
||||
let (span, err) = lex_item(input, &mut curr_offset, file_id);
|
||||
let (span, err) = lex_item(input, &mut curr_offset);
|
||||
if error.is_none() {
|
||||
error = err;
|
||||
}
|
||||
|
@ -285,7 +276,7 @@ mod lex_tests {
|
|||
fn lex_basic() {
|
||||
let file = b"let x = 4";
|
||||
|
||||
let output = lex(file, 0, 0, LexMode::Normal);
|
||||
let output = lex(file, 0, LexMode::Normal);
|
||||
|
||||
assert!(output.1.is_none());
|
||||
}
|
||||
|
@ -294,16 +285,12 @@ mod lex_tests {
|
|||
fn lex_newline() {
|
||||
let file = b"let x = 300\nlet y = 500;";
|
||||
|
||||
let output = lex(file, 0, 0, LexMode::Normal);
|
||||
let output = lex(file, 0, LexMode::Normal);
|
||||
|
||||
println!("{:#?}", output.0);
|
||||
assert!(output.0.contains(&Token {
|
||||
contents: TokenContents::Eol,
|
||||
span: Span {
|
||||
start: 11,
|
||||
end: 12,
|
||||
file_id: 0
|
||||
}
|
||||
span: Span { start: 11, end: 12 }
|
||||
}));
|
||||
}
|
||||
|
||||
|
@ -311,7 +298,7 @@ mod lex_tests {
|
|||
fn lex_empty() {
|
||||
let file = b"";
|
||||
|
||||
let output = lex(file, 0, 0, LexMode::Normal);
|
||||
let output = lex(file, 0, LexMode::Normal);
|
||||
|
||||
assert!(output.0.is_empty());
|
||||
assert!(output.1.is_none());
|
||||
|
|
|
@ -128,7 +128,7 @@ mod tests {
|
|||
use crate::{lex, lite_parse, LiteBlock, ParseError, Span};
|
||||
|
||||
fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> {
|
||||
let (output, err) = lex(input, 0, 0, crate::LexMode::Normal);
|
||||
let (output, err) = lex(input, 0, crate::LexMode::Normal);
|
||||
if let Some(err) = err {
|
||||
return Err(err);
|
||||
}
|
||||
|
@ -194,11 +194,7 @@ mod tests {
|
|||
assert_eq!(lite_block.block[0].commands[0].comments.len(), 1);
|
||||
assert_eq!(
|
||||
lite_block.block[0].commands[0].comments[0],
|
||||
Span {
|
||||
start: 21,
|
||||
end: 39,
|
||||
file_id: 0
|
||||
}
|
||||
Span { start: 21, end: 39 }
|
||||
);
|
||||
assert_eq!(lite_block.block[0].commands[0].parts.len(), 3);
|
||||
|
||||
|
|
|
@ -8,10 +8,17 @@ fn main() -> std::io::Result<()> {
|
|||
working_set.add_decl((b"foo").to_vec(), sig);
|
||||
|
||||
let file = std::fs::read(&path)?;
|
||||
let (output, err) = working_set.parse_file(&path, &file);
|
||||
let (output, err) = working_set.parse_file(&path, file);
|
||||
//let (output, err) = working_set.parse_source(path.as_bytes());
|
||||
println!("{}", output.len());
|
||||
println!("error: {:?}", err);
|
||||
// println!("{}", size_of::<Statement>());
|
||||
|
||||
// let mut buffer = String::new();
|
||||
// let stdin = std::io::stdin();
|
||||
// let mut handle = stdin.lock();
|
||||
|
||||
// handle.read_to_string(&mut buffer)?;
|
||||
|
||||
Ok(())
|
||||
} else {
|
||||
|
|
|
@ -109,7 +109,6 @@ pub enum Expr {
|
|||
#[derive(Debug, Clone)]
|
||||
pub struct Expression {
|
||||
expr: Expr,
|
||||
ty: Type,
|
||||
span: Span,
|
||||
}
|
||||
impl Expression {
|
||||
|
@ -117,7 +116,7 @@ impl Expression {
|
|||
Expression {
|
||||
expr: Expr::Garbage,
|
||||
span,
|
||||
ty: Type::Unknown,
|
||||
//ty: Type::Unknown,
|
||||
}
|
||||
}
|
||||
pub fn precedence(&self) -> usize {
|
||||
|
@ -264,13 +263,12 @@ fn span(spans: &[Span]) -> Span {
|
|||
|
||||
if length == 0 {
|
||||
Span::unknown()
|
||||
} else if length == 1 || spans[0].file_id != spans[length - 1].file_id {
|
||||
} else if length == 1 {
|
||||
spans[0]
|
||||
} else {
|
||||
Span {
|
||||
start: spans[0].start,
|
||||
end: spans[length - 1].end,
|
||||
file_id: spans[0].file_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -342,7 +340,6 @@ impl ParserWorkingSet {
|
|||
let short_flag_span = Span {
|
||||
start: orig.start + 1 + short_flag.0,
|
||||
end: orig.start + 1 + short_flag.0 + 1,
|
||||
file_id: orig.file_id,
|
||||
};
|
||||
if let Some(flag) = sig.get_short_flag(short_flag_char) {
|
||||
// If we require an arg and are in a batch of short flags, error
|
||||
|
@ -419,7 +416,7 @@ impl ParserWorkingSet {
|
|||
(
|
||||
Expression {
|
||||
expr: Expr::Call(Box::new(call)),
|
||||
ty: Type::Unknown,
|
||||
//ty: Type::Unknown,
|
||||
span: span(spans),
|
||||
},
|
||||
error,
|
||||
|
@ -435,7 +432,6 @@ impl ParserWorkingSet {
|
|||
(
|
||||
Expression {
|
||||
expr: Expr::Int(v),
|
||||
ty: Type::Int,
|
||||
span,
|
||||
},
|
||||
None,
|
||||
|
@ -451,7 +447,6 @@ impl ParserWorkingSet {
|
|||
(
|
||||
Expression {
|
||||
expr: Expr::Int(v),
|
||||
ty: Type::Int,
|
||||
span,
|
||||
},
|
||||
None,
|
||||
|
@ -467,7 +462,6 @@ impl ParserWorkingSet {
|
|||
(
|
||||
Expression {
|
||||
expr: Expr::Int(v),
|
||||
ty: Type::Int,
|
||||
span,
|
||||
},
|
||||
None,
|
||||
|
@ -482,7 +476,6 @@ impl ParserWorkingSet {
|
|||
(
|
||||
Expression {
|
||||
expr: Expr::Int(x),
|
||||
ty: Type::Int,
|
||||
span,
|
||||
},
|
||||
None,
|
||||
|
@ -510,14 +503,9 @@ impl ParserWorkingSet {
|
|||
let bytes = self.get_span_contents(span);
|
||||
|
||||
if let Some(var_id) = self.find_variable(bytes) {
|
||||
let ty = *self
|
||||
.get_variable(var_id)
|
||||
.expect("internal error: invalid VarId");
|
||||
|
||||
(
|
||||
Expression {
|
||||
expr: Expr::Var(var_id),
|
||||
ty,
|
||||
span,
|
||||
},
|
||||
None,
|
||||
|
@ -547,21 +535,16 @@ impl ParserWorkingSet {
|
|||
Span {
|
||||
start: end,
|
||||
end: end + 1,
|
||||
file_id: span.file_id,
|
||||
},
|
||||
))
|
||||
});
|
||||
}
|
||||
|
||||
let span = Span {
|
||||
start,
|
||||
end,
|
||||
file_id: span.file_id,
|
||||
};
|
||||
let span = Span { start, end };
|
||||
|
||||
let source = self.get_file_contents(span.file_id);
|
||||
let source = self.get_span_contents(span);
|
||||
|
||||
let (output, err) = lex(&source[..end], span.file_id, start, crate::LexMode::Normal);
|
||||
let (output, err) = lex(&source, start, crate::LexMode::Normal);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
|
@ -573,7 +556,6 @@ impl ParserWorkingSet {
|
|||
(
|
||||
Expression {
|
||||
expr: Expr::Subexpression(Box::new(output)),
|
||||
ty: Type::Unknown,
|
||||
span,
|
||||
},
|
||||
error,
|
||||
|
@ -599,21 +581,16 @@ impl ParserWorkingSet {
|
|||
Span {
|
||||
start: end,
|
||||
end: end + 1,
|
||||
file_id: span.file_id,
|
||||
},
|
||||
))
|
||||
});
|
||||
}
|
||||
|
||||
let span = Span {
|
||||
start,
|
||||
end,
|
||||
file_id: span.file_id,
|
||||
};
|
||||
let span = Span { start, end };
|
||||
|
||||
let source = self.get_file_contents(span.file_id);
|
||||
let source = &self.file_contents[..end];
|
||||
|
||||
let (output, err) = lex(&source[..end], span.file_id, start, crate::LexMode::Normal);
|
||||
let (output, err) = lex(&source, start, crate::LexMode::Normal);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
|
@ -627,7 +604,6 @@ impl ParserWorkingSet {
|
|||
(
|
||||
Expression {
|
||||
expr: Expr::Block(Box::new(output)),
|
||||
ty: Type::Unknown,
|
||||
span,
|
||||
},
|
||||
error,
|
||||
|
@ -738,7 +714,6 @@ impl ParserWorkingSet {
|
|||
(
|
||||
Expression {
|
||||
expr: Expr::Operator(operator),
|
||||
ty: Type::Unknown,
|
||||
span,
|
||||
},
|
||||
None,
|
||||
|
@ -803,7 +778,6 @@ impl ParserWorkingSet {
|
|||
expr_stack.push(Expression {
|
||||
expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)),
|
||||
span: op_span,
|
||||
ty: Type::Unknown,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -829,7 +803,6 @@ impl ParserWorkingSet {
|
|||
let binary_op_span = span(&[lhs.span, rhs.span]);
|
||||
expr_stack.push(Expression {
|
||||
expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)),
|
||||
ty: Type::Unknown,
|
||||
span: binary_op_span,
|
||||
});
|
||||
}
|
||||
|
@ -889,7 +862,7 @@ impl ParserWorkingSet {
|
|||
error = error.or(err);
|
||||
|
||||
let var_name: Vec<_> = self.get_span_contents(spans[1]).into();
|
||||
let var_id = self.add_variable(var_name, expression.ty);
|
||||
let var_id = self.add_variable(var_name, Type::Unknown);
|
||||
|
||||
(Statement::VarDecl(VarDecl { var_id, expression }), error)
|
||||
} else {
|
||||
|
@ -928,14 +901,14 @@ impl ParserWorkingSet {
|
|||
(block, error)
|
||||
}
|
||||
|
||||
pub fn parse_file(&mut self, fname: &str, contents: &[u8]) -> (Block, Option<ParseError>) {
|
||||
pub fn parse_file(&mut self, fname: &str, contents: Vec<u8>) -> (Block, Option<ParseError>) {
|
||||
let mut error = None;
|
||||
|
||||
let file_id = self.add_file(fname.into(), contents.into());
|
||||
|
||||
let (output, err) = lex(contents, file_id, 0, crate::LexMode::Normal);
|
||||
let (output, err) = lex(&contents, 0, crate::LexMode::Normal);
|
||||
error = error.or(err);
|
||||
|
||||
self.add_file(fname.into(), contents);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
error = error.or(err);
|
||||
|
||||
|
@ -948,9 +921,9 @@ impl ParserWorkingSet {
|
|||
pub fn parse_source(&mut self, source: &[u8]) -> (Block, Option<ParseError>) {
|
||||
let mut error = None;
|
||||
|
||||
let file_id = self.add_file("source".into(), source.into());
|
||||
self.add_file("source".into(), source.into());
|
||||
|
||||
let (output, err) = lex(source, file_id, 0, crate::LexMode::Normal);
|
||||
let (output, err) = lex(source, 0, crate::LexMode::Normal);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
|
|
|
@ -2,7 +2,8 @@ use crate::{Signature, Span};
|
|||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
pub struct ParserState {
|
||||
files: Vec<(String, Vec<u8>)>,
|
||||
files: Vec<(String, usize, usize)>,
|
||||
file_contents: Vec<u8>,
|
||||
vars: Vec<Type>,
|
||||
decls: Vec<Signature>,
|
||||
}
|
||||
|
@ -41,6 +42,7 @@ impl ParserState {
|
|||
pub fn new() -> Self {
|
||||
Self {
|
||||
files: vec![],
|
||||
file_contents: vec![],
|
||||
vars: vec![],
|
||||
decls: vec![],
|
||||
}
|
||||
|
@ -53,6 +55,7 @@ impl ParserState {
|
|||
// Take the mutable reference and extend the permanent state from the working set
|
||||
if let Some(this) = std::sync::Arc::<ParserState>::get_mut(this) {
|
||||
this.files.extend(working_set.files);
|
||||
this.file_contents.extend(working_set.file_contents);
|
||||
this.decls.extend(working_set.decls);
|
||||
this.vars.extend(working_set.vars);
|
||||
|
||||
|
@ -82,20 +85,27 @@ impl ParserState {
|
|||
self.decls.get(decl_id)
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub(crate) fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize {
|
||||
self.files.push((filename, contents));
|
||||
|
||||
self.num_files() - 1
|
||||
pub fn next_span_start(&self) -> usize {
|
||||
self.file_contents.len()
|
||||
}
|
||||
|
||||
pub(crate) fn get_file_contents(&self, idx: usize) -> &[u8] {
|
||||
&self.files[idx].1
|
||||
#[allow(unused)]
|
||||
pub(crate) fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize {
|
||||
let next_span_start = self.next_span_start();
|
||||
|
||||
self.file_contents.extend(&contents);
|
||||
|
||||
let next_span_end = self.next_span_start();
|
||||
|
||||
self.files.push((filename, next_span_start, next_span_end));
|
||||
|
||||
self.num_files() - 1
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ParserWorkingSet {
|
||||
files: Vec<(String, Vec<u8>)>,
|
||||
files: Vec<(String, usize, usize)>,
|
||||
pub(crate) file_contents: Vec<u8>,
|
||||
vars: Vec<Type>, // indexed by VarId
|
||||
decls: Vec<Signature>, // indexed by DeclId
|
||||
permanent_state: Option<Arc<ParserState>>,
|
||||
|
@ -106,6 +116,7 @@ impl ParserWorkingSet {
|
|||
pub fn new(permanent_state: Option<Arc<ParserState>>) -> Self {
|
||||
Self {
|
||||
files: vec![],
|
||||
file_contents: vec![],
|
||||
vars: vec![],
|
||||
decls: vec![],
|
||||
permanent_state,
|
||||
|
@ -137,35 +148,36 @@ impl ParserWorkingSet {
|
|||
decl_id
|
||||
}
|
||||
|
||||
pub fn next_span_start(&self) -> usize {
|
||||
if let Some(permanent_state) = &self.permanent_state {
|
||||
permanent_state.next_span_start() + self.file_contents.len()
|
||||
} else {
|
||||
self.file_contents.len()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize {
|
||||
self.files.push((filename, contents));
|
||||
let next_span_start = self.next_span_start();
|
||||
|
||||
self.file_contents.extend(&contents);
|
||||
|
||||
let next_span_end = self.next_span_start();
|
||||
|
||||
self.files.push((filename, next_span_start, next_span_end));
|
||||
|
||||
self.num_files() - 1
|
||||
}
|
||||
|
||||
pub fn get_span_contents(&self, span: Span) -> &[u8] {
|
||||
if let Some(permanent_state) = &self.permanent_state {
|
||||
let num_permanent_files = permanent_state.num_files();
|
||||
if span.file_id < num_permanent_files {
|
||||
&permanent_state.get_file_contents(span.file_id)[span.start..span.end]
|
||||
let permanent_end = permanent_state.next_span_start();
|
||||
if permanent_end <= span.start {
|
||||
&self.file_contents[(span.start - permanent_end)..(span.end - permanent_end)]
|
||||
} else {
|
||||
&self.files[span.file_id - num_permanent_files].1[span.start..span.end]
|
||||
&permanent_state.file_contents[span.start..span.end]
|
||||
}
|
||||
} else {
|
||||
&self.files[span.file_id].1[span.start..span.end]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_file_contents(&self, file_id: usize) -> &[u8] {
|
||||
if let Some(permanent_state) = &self.permanent_state {
|
||||
let num_permanent_files = permanent_state.num_files();
|
||||
if file_id < num_permanent_files {
|
||||
&permanent_state.get_file_contents(file_id)
|
||||
} else {
|
||||
&self.files[file_id - num_permanent_files].1
|
||||
}
|
||||
} else {
|
||||
&self.files[file_id].1
|
||||
&self.file_contents[span.start..span.end]
|
||||
}
|
||||
}
|
||||
|
||||
|
|
15
src/span.rs
15
src/span.rs
|
@ -2,23 +2,14 @@
|
|||
pub struct Span {
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
pub file_id: usize,
|
||||
}
|
||||
|
||||
impl Span {
|
||||
pub fn new(start: usize, end: usize, file_id: usize) -> Span {
|
||||
Span {
|
||||
start,
|
||||
end,
|
||||
file_id,
|
||||
}
|
||||
pub fn new(start: usize, end: usize) -> Span {
|
||||
Span { start, end }
|
||||
}
|
||||
|
||||
pub fn unknown() -> Span {
|
||||
Span {
|
||||
start: usize::MAX,
|
||||
end: usize::MAX,
|
||||
file_id: usize::MAX,
|
||||
}
|
||||
Span { start: 0, end: 0 }
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue