mirror of
https://github.com/nushell/nushell
synced 2025-01-15 22:54:16 +00:00
Add list parsing
This commit is contained in:
parent
666bee61f7
commit
04a6a4f860
3 changed files with 109 additions and 15 deletions
34
src/lex.rs
34
src/lex.rs
|
@ -38,20 +38,32 @@ impl BlockKind {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, Debug)]
|
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
|
||||||
pub enum LexMode {
|
pub enum LexMode {
|
||||||
Normal,
|
Normal,
|
||||||
|
CommaIsSpace,
|
||||||
|
NewlineIsSpace,
|
||||||
}
|
}
|
||||||
|
|
||||||
// A baseline token is terminated if it's not nested inside of a paired
|
// A baseline token is terminated if it's not nested inside of a paired
|
||||||
// delimiter and the next character is one of: `|`, `;`, `#` or any
|
// delimiter and the next character is one of: `|`, `;`, `#` or any
|
||||||
// whitespace.
|
// whitespace.
|
||||||
fn is_item_terminator(block_level: &[BlockKind], c: u8) -> bool {
|
fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: LexMode) -> bool {
|
||||||
block_level.is_empty()
|
block_level.is_empty()
|
||||||
&& (c == b' ' || c == b'\t' || c == b'\n' || c == b'|' || c == b';' || c == b'#')
|
&& (c == b' '
|
||||||
|
|| c == b'\t'
|
||||||
|
|| c == b'\n'
|
||||||
|
|| c == b'|'
|
||||||
|
|| c == b';'
|
||||||
|
|| c == b'#'
|
||||||
|
|| (c == b',' && lex_mode == LexMode::CommaIsSpace))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseError>) {
|
pub fn lex_item(
|
||||||
|
input: &[u8],
|
||||||
|
curr_offset: &mut usize,
|
||||||
|
lex_mode: LexMode,
|
||||||
|
) -> (Span, Option<ParseError>) {
|
||||||
// This variable tracks the starting character of a string literal, so that
|
// This variable tracks the starting character of a string literal, so that
|
||||||
// we remain inside the string literal lexer mode until we encounter the
|
// we remain inside the string literal lexer mode until we encounter the
|
||||||
// closing quote.
|
// closing quote.
|
||||||
|
@ -85,17 +97,17 @@ pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseErr
|
||||||
quote_start = None;
|
quote_start = None;
|
||||||
}
|
}
|
||||||
} else if c == b'#' {
|
} else if c == b'#' {
|
||||||
if is_item_terminator(&block_level, c) {
|
if is_item_terminator(&block_level, c, lex_mode) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
in_comment = true;
|
in_comment = true;
|
||||||
} else if c == b'\n' {
|
} else if c == b'\n' {
|
||||||
in_comment = false;
|
in_comment = false;
|
||||||
if is_item_terminator(&block_level, c) {
|
if is_item_terminator(&block_level, c, lex_mode) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if in_comment {
|
} else if in_comment {
|
||||||
if is_item_terminator(&block_level, c) {
|
if is_item_terminator(&block_level, c, lex_mode) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if c == b'\'' || c == b'"' {
|
} else if c == b'\'' || c == b'"' {
|
||||||
|
@ -126,7 +138,7 @@ pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseErr
|
||||||
if let Some(BlockKind::Paren) = block_level.last() {
|
if let Some(BlockKind::Paren) = block_level.last() {
|
||||||
let _ = block_level.pop();
|
let _ = block_level.pop();
|
||||||
}
|
}
|
||||||
} else if is_item_terminator(&block_level, c) {
|
} else if is_item_terminator(&block_level, c, lex_mode) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -225,7 +237,7 @@ pub fn lex(
|
||||||
|
|
||||||
let idx = curr_offset;
|
let idx = curr_offset;
|
||||||
curr_offset += 1;
|
curr_offset += 1;
|
||||||
if lex_mode == LexMode::Normal {
|
if lex_mode != LexMode::NewlineIsSpace {
|
||||||
output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1)));
|
output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1)));
|
||||||
}
|
}
|
||||||
} else if c == b'#' {
|
} else if c == b'#' {
|
||||||
|
@ -251,13 +263,13 @@ pub fn lex(
|
||||||
Span::new(start, curr_offset),
|
Span::new(start, curr_offset),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
} else if c == b' ' || c == b'\t' {
|
} else if c == b' ' || c == b'\t' || (c == b',' && lex_mode == LexMode::CommaIsSpace) {
|
||||||
// If the next character is non-newline whitespace, skip it.
|
// If the next character is non-newline whitespace, skip it.
|
||||||
curr_offset += 1;
|
curr_offset += 1;
|
||||||
} else {
|
} else {
|
||||||
// Otherwise, try to consume an unclassified token.
|
// Otherwise, try to consume an unclassified token.
|
||||||
|
|
||||||
let (span, err) = lex_item(input, &mut curr_offset);
|
let (span, err) = lex_item(input, &mut curr_offset, lex_mode);
|
||||||
if error.is_none() {
|
if error.is_none() {
|
||||||
error = err;
|
error = err;
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,10 +7,10 @@ fn main() -> std::io::Result<()> {
|
||||||
let sig = Signature::build("foo").named("--jazz", SyntaxShape::Int, "jazz!!", Some('j'));
|
let sig = Signature::build("foo").named("--jazz", SyntaxShape::Int, "jazz!!", Some('j'));
|
||||||
working_set.add_decl((b"foo").to_vec(), sig);
|
working_set.add_decl((b"foo").to_vec(), sig);
|
||||||
|
|
||||||
let file = std::fs::read(&path)?;
|
//let file = std::fs::read(&path)?;
|
||||||
let (output, err) = working_set.parse_file(&path, file);
|
//let (output, err) = working_set.parse_file(&path, file);
|
||||||
//let (output, err) = working_set.parse_source(path.as_bytes());
|
let (output, err) = working_set.parse_source(path.as_bytes());
|
||||||
println!("{}", output.len());
|
println!("{:#?}", output);
|
||||||
println!("error: {:?}", err);
|
println!("error: {:?}", err);
|
||||||
// println!("{}", size_of::<Statement>());
|
// println!("{}", size_of::<Statement>());
|
||||||
|
|
||||||
|
|
|
@ -103,6 +103,7 @@ pub enum Expr {
|
||||||
BinaryOp(Box<Expression>, Box<Expression>, Box<Expression>), //lhs, op, rhs
|
BinaryOp(Box<Expression>, Box<Expression>, Box<Expression>), //lhs, op, rhs
|
||||||
Subexpression(Box<Block>),
|
Subexpression(Box<Block>),
|
||||||
Block(Box<Block>),
|
Block(Box<Block>),
|
||||||
|
List(Vec<Expression>),
|
||||||
Garbage,
|
Garbage,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -562,6 +563,78 @@ impl ParserWorkingSet {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn parse_table_expression(&mut self, span: Span) -> (Expression, Option<ParseError>) {
|
||||||
|
let bytes = self.get_span_contents(span);
|
||||||
|
let mut error = None;
|
||||||
|
|
||||||
|
let mut start = span.start;
|
||||||
|
let mut end = span.end;
|
||||||
|
|
||||||
|
if bytes.starts_with(b"[") {
|
||||||
|
start += 1;
|
||||||
|
}
|
||||||
|
if bytes.ends_with(b"]") {
|
||||||
|
end -= 1;
|
||||||
|
} else {
|
||||||
|
error = error.or_else(|| {
|
||||||
|
Some(ParseError::Unclosed(
|
||||||
|
"]".into(),
|
||||||
|
Span {
|
||||||
|
start: end,
|
||||||
|
end: end + 1,
|
||||||
|
},
|
||||||
|
))
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let span = Span { start, end };
|
||||||
|
|
||||||
|
let source = &self.file_contents[..end];
|
||||||
|
|
||||||
|
let (output, err) = lex(&source, start, crate::LexMode::CommaIsSpace);
|
||||||
|
error = error.or(err);
|
||||||
|
|
||||||
|
let (output, err) = lite_parse(&output);
|
||||||
|
error = error.or(err);
|
||||||
|
|
||||||
|
println!("{:?}", output.block);
|
||||||
|
|
||||||
|
match output.block.len() {
|
||||||
|
0 => (
|
||||||
|
Expression {
|
||||||
|
expr: Expr::List(vec![]),
|
||||||
|
span,
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
1 => {
|
||||||
|
// List
|
||||||
|
|
||||||
|
let mut args = vec![];
|
||||||
|
for arg in &output.block[0].commands {
|
||||||
|
for part in &arg.parts {
|
||||||
|
let (arg, err) = self.parse_arg(*part, SyntaxShape::Any);
|
||||||
|
error = error.or(err);
|
||||||
|
|
||||||
|
args.push(arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(
|
||||||
|
Expression {
|
||||||
|
expr: Expr::List(args),
|
||||||
|
span,
|
||||||
|
},
|
||||||
|
error,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
_ => (
|
||||||
|
garbage(span),
|
||||||
|
Some(ParseError::Mismatch("table".into(), span)),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn parse_block_expression(&mut self, span: Span) -> (Expression, Option<ParseError>) {
|
pub fn parse_block_expression(&mut self, span: Span) -> (Expression, Option<ParseError>) {
|
||||||
let bytes = self.get_span_contents(span);
|
let bytes = self.get_span_contents(span);
|
||||||
let mut error = None;
|
let mut error = None;
|
||||||
|
@ -629,6 +702,15 @@ impl ParserWorkingSet {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return self.parse_block_expression(span);
|
return self.parse_block_expression(span);
|
||||||
|
} else if bytes.starts_with(b"[") {
|
||||||
|
if shape != SyntaxShape::Table && shape != SyntaxShape::Any {
|
||||||
|
// FIXME: need better errors
|
||||||
|
return (
|
||||||
|
garbage(span),
|
||||||
|
Some(ParseError::Mismatch("not a table".into(), span)),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return self.parse_table_expression(span);
|
||||||
}
|
}
|
||||||
|
|
||||||
match shape {
|
match shape {
|
||||||
|
|
Loading…
Reference in a new issue