Add list parsing

This commit is contained in:
JT 2021-07-06 10:58:56 +12:00
parent 666bee61f7
commit 04a6a4f860
3 changed files with 109 additions and 15 deletions

View file

@ -38,20 +38,32 @@ impl BlockKind {
}
}
#[derive(PartialEq, Eq, Debug)]
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
pub enum LexMode {
Normal,
CommaIsSpace,
NewlineIsSpace,
}
// A baseline token is terminated if it's not nested inside of a paired
// delimiter and the next character is one of: `|`, `;`, `#` or any
// whitespace.
fn is_item_terminator(block_level: &[BlockKind], c: u8) -> bool {
fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: LexMode) -> bool {
block_level.is_empty()
&& (c == b' ' || c == b'\t' || c == b'\n' || c == b'|' || c == b';' || c == b'#')
&& (c == b' '
|| c == b'\t'
|| c == b'\n'
|| c == b'|'
|| c == b';'
|| c == b'#'
|| (c == b',' && lex_mode == LexMode::CommaIsSpace))
}
pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseError>) {
pub fn lex_item(
input: &[u8],
curr_offset: &mut usize,
lex_mode: LexMode,
) -> (Span, Option<ParseError>) {
// This variable tracks the starting character of a string literal, so that
// we remain inside the string literal lexer mode until we encounter the
// closing quote.
@ -85,17 +97,17 @@ pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseErr
quote_start = None;
}
} else if c == b'#' {
if is_item_terminator(&block_level, c) {
if is_item_terminator(&block_level, c, lex_mode) {
break;
}
in_comment = true;
} else if c == b'\n' {
in_comment = false;
if is_item_terminator(&block_level, c) {
if is_item_terminator(&block_level, c, lex_mode) {
break;
}
} else if in_comment {
if is_item_terminator(&block_level, c) {
if is_item_terminator(&block_level, c, lex_mode) {
break;
}
} else if c == b'\'' || c == b'"' {
@ -126,7 +138,7 @@ pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseErr
if let Some(BlockKind::Paren) = block_level.last() {
let _ = block_level.pop();
}
} else if is_item_terminator(&block_level, c) {
} else if is_item_terminator(&block_level, c, lex_mode) {
break;
}
@ -225,7 +237,7 @@ pub fn lex(
let idx = curr_offset;
curr_offset += 1;
if lex_mode == LexMode::Normal {
if lex_mode != LexMode::NewlineIsSpace {
output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1)));
}
} else if c == b'#' {
@ -251,13 +263,13 @@ pub fn lex(
Span::new(start, curr_offset),
));
}
} else if c == b' ' || c == b'\t' {
} else if c == b' ' || c == b'\t' || (c == b',' && lex_mode == LexMode::CommaIsSpace) {
// If the next character is non-newline whitespace, skip it.
curr_offset += 1;
} else {
// Otherwise, try to consume an unclassified token.
let (span, err) = lex_item(input, &mut curr_offset);
let (span, err) = lex_item(input, &mut curr_offset, lex_mode);
if error.is_none() {
error = err;
}

View file

@ -7,10 +7,10 @@ fn main() -> std::io::Result<()> {
let sig = Signature::build("foo").named("--jazz", SyntaxShape::Int, "jazz!!", Some('j'));
working_set.add_decl((b"foo").to_vec(), sig);
let file = std::fs::read(&path)?;
let (output, err) = working_set.parse_file(&path, file);
//let (output, err) = working_set.parse_source(path.as_bytes());
println!("{}", output.len());
//let file = std::fs::read(&path)?;
//let (output, err) = working_set.parse_file(&path, file);
let (output, err) = working_set.parse_source(path.as_bytes());
println!("{:#?}", output);
println!("error: {:?}", err);
// println!("{}", size_of::<Statement>());

View file

@ -103,6 +103,7 @@ pub enum Expr {
BinaryOp(Box<Expression>, Box<Expression>, Box<Expression>), //lhs, op, rhs
Subexpression(Box<Block>),
Block(Box<Block>),
List(Vec<Expression>),
Garbage,
}
@ -562,6 +563,78 @@ impl ParserWorkingSet {
)
}
pub fn parse_table_expression(&mut self, span: Span) -> (Expression, Option<ParseError>) {
let bytes = self.get_span_contents(span);
let mut error = None;
let mut start = span.start;
let mut end = span.end;
if bytes.starts_with(b"[") {
start += 1;
}
if bytes.ends_with(b"]") {
end -= 1;
} else {
error = error.or_else(|| {
Some(ParseError::Unclosed(
"]".into(),
Span {
start: end,
end: end + 1,
},
))
});
}
let span = Span { start, end };
let source = &self.file_contents[..end];
let (output, err) = lex(&source, start, crate::LexMode::CommaIsSpace);
error = error.or(err);
let (output, err) = lite_parse(&output);
error = error.or(err);
println!("{:?}", output.block);
match output.block.len() {
0 => (
Expression {
expr: Expr::List(vec![]),
span,
},
None,
),
1 => {
// List
let mut args = vec![];
for arg in &output.block[0].commands {
for part in &arg.parts {
let (arg, err) = self.parse_arg(*part, SyntaxShape::Any);
error = error.or(err);
args.push(arg);
}
}
(
Expression {
expr: Expr::List(args),
span,
},
error,
)
}
_ => (
garbage(span),
Some(ParseError::Mismatch("table".into(), span)),
),
}
}
pub fn parse_block_expression(&mut self, span: Span) -> (Expression, Option<ParseError>) {
let bytes = self.get_span_contents(span);
let mut error = None;
@ -629,6 +702,15 @@ impl ParserWorkingSet {
);
}
return self.parse_block_expression(span);
} else if bytes.starts_with(b"[") {
if shape != SyntaxShape::Table && shape != SyntaxShape::Any {
// FIXME: need better errors
return (
garbage(span),
Some(ParseError::Mismatch("not a table".into(), span)),
);
}
return self.parse_table_expression(span);
}
match shape {