Add list parsing

This commit is contained in:
JT 2021-07-06 10:58:56 +12:00
parent 666bee61f7
commit 04a6a4f860
3 changed files with 109 additions and 15 deletions

View file

@ -38,20 +38,32 @@ impl BlockKind {
} }
} }
#[derive(PartialEq, Eq, Debug)] #[derive(PartialEq, Eq, Debug, Clone, Copy)]
pub enum LexMode { pub enum LexMode {
Normal, Normal,
CommaIsSpace,
NewlineIsSpace,
} }
// A baseline token is terminated if it's not nested inside of a paired // A baseline token is terminated if it's not nested inside of a paired
// delimiter and the next character is one of: `|`, `;`, `#` or any // delimiter and the next character is one of: `|`, `;`, `#` or any
// whitespace. // whitespace.
fn is_item_terminator(block_level: &[BlockKind], c: u8) -> bool { fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: LexMode) -> bool {
block_level.is_empty() block_level.is_empty()
&& (c == b' ' || c == b'\t' || c == b'\n' || c == b'|' || c == b';' || c == b'#') && (c == b' '
|| c == b'\t'
|| c == b'\n'
|| c == b'|'
|| c == b';'
|| c == b'#'
|| (c == b',' && lex_mode == LexMode::CommaIsSpace))
} }
pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseError>) { pub fn lex_item(
input: &[u8],
curr_offset: &mut usize,
lex_mode: LexMode,
) -> (Span, Option<ParseError>) {
// This variable tracks the starting character of a string literal, so that // This variable tracks the starting character of a string literal, so that
// we remain inside the string literal lexer mode until we encounter the // we remain inside the string literal lexer mode until we encounter the
// closing quote. // closing quote.
@ -85,17 +97,17 @@ pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseErr
quote_start = None; quote_start = None;
} }
} else if c == b'#' { } else if c == b'#' {
if is_item_terminator(&block_level, c) { if is_item_terminator(&block_level, c, lex_mode) {
break; break;
} }
in_comment = true; in_comment = true;
} else if c == b'\n' { } else if c == b'\n' {
in_comment = false; in_comment = false;
if is_item_terminator(&block_level, c) { if is_item_terminator(&block_level, c, lex_mode) {
break; break;
} }
} else if in_comment { } else if in_comment {
if is_item_terminator(&block_level, c) { if is_item_terminator(&block_level, c, lex_mode) {
break; break;
} }
} else if c == b'\'' || c == b'"' { } else if c == b'\'' || c == b'"' {
@ -126,7 +138,7 @@ pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseErr
if let Some(BlockKind::Paren) = block_level.last() { if let Some(BlockKind::Paren) = block_level.last() {
let _ = block_level.pop(); let _ = block_level.pop();
} }
} else if is_item_terminator(&block_level, c) { } else if is_item_terminator(&block_level, c, lex_mode) {
break; break;
} }
@ -225,7 +237,7 @@ pub fn lex(
let idx = curr_offset; let idx = curr_offset;
curr_offset += 1; curr_offset += 1;
if lex_mode == LexMode::Normal { if lex_mode != LexMode::NewlineIsSpace {
output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1))); output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1)));
} }
} else if c == b'#' { } else if c == b'#' {
@ -251,13 +263,13 @@ pub fn lex(
Span::new(start, curr_offset), Span::new(start, curr_offset),
)); ));
} }
} else if c == b' ' || c == b'\t' { } else if c == b' ' || c == b'\t' || (c == b',' && lex_mode == LexMode::CommaIsSpace) {
// If the next character is non-newline whitespace, skip it. // If the next character is non-newline whitespace, skip it.
curr_offset += 1; curr_offset += 1;
} else { } else {
// Otherwise, try to consume an unclassified token. // Otherwise, try to consume an unclassified token.
let (span, err) = lex_item(input, &mut curr_offset); let (span, err) = lex_item(input, &mut curr_offset, lex_mode);
if error.is_none() { if error.is_none() {
error = err; error = err;
} }

View file

@ -7,10 +7,10 @@ fn main() -> std::io::Result<()> {
let sig = Signature::build("foo").named("--jazz", SyntaxShape::Int, "jazz!!", Some('j')); let sig = Signature::build("foo").named("--jazz", SyntaxShape::Int, "jazz!!", Some('j'));
working_set.add_decl((b"foo").to_vec(), sig); working_set.add_decl((b"foo").to_vec(), sig);
let file = std::fs::read(&path)?; //let file = std::fs::read(&path)?;
let (output, err) = working_set.parse_file(&path, file); //let (output, err) = working_set.parse_file(&path, file);
//let (output, err) = working_set.parse_source(path.as_bytes()); let (output, err) = working_set.parse_source(path.as_bytes());
println!("{}", output.len()); println!("{:#?}", output);
println!("error: {:?}", err); println!("error: {:?}", err);
// println!("{}", size_of::<Statement>()); // println!("{}", size_of::<Statement>());

View file

@ -103,6 +103,7 @@ pub enum Expr {
BinaryOp(Box<Expression>, Box<Expression>, Box<Expression>), //lhs, op, rhs BinaryOp(Box<Expression>, Box<Expression>, Box<Expression>), //lhs, op, rhs
Subexpression(Box<Block>), Subexpression(Box<Block>),
Block(Box<Block>), Block(Box<Block>),
List(Vec<Expression>),
Garbage, Garbage,
} }
@ -562,6 +563,78 @@ impl ParserWorkingSet {
) )
} }
pub fn parse_table_expression(&mut self, span: Span) -> (Expression, Option<ParseError>) {
let bytes = self.get_span_contents(span);
let mut error = None;
let mut start = span.start;
let mut end = span.end;
if bytes.starts_with(b"[") {
start += 1;
}
if bytes.ends_with(b"]") {
end -= 1;
} else {
error = error.or_else(|| {
Some(ParseError::Unclosed(
"]".into(),
Span {
start: end,
end: end + 1,
},
))
});
}
let span = Span { start, end };
let source = &self.file_contents[..end];
let (output, err) = lex(&source, start, crate::LexMode::CommaIsSpace);
error = error.or(err);
let (output, err) = lite_parse(&output);
error = error.or(err);
println!("{:?}", output.block);
match output.block.len() {
0 => (
Expression {
expr: Expr::List(vec![]),
span,
},
None,
),
1 => {
// List
let mut args = vec![];
for arg in &output.block[0].commands {
for part in &arg.parts {
let (arg, err) = self.parse_arg(*part, SyntaxShape::Any);
error = error.or(err);
args.push(arg);
}
}
(
Expression {
expr: Expr::List(args),
span,
},
error,
)
}
_ => (
garbage(span),
Some(ParseError::Mismatch("table".into(), span)),
),
}
}
pub fn parse_block_expression(&mut self, span: Span) -> (Expression, Option<ParseError>) { pub fn parse_block_expression(&mut self, span: Span) -> (Expression, Option<ParseError>) {
let bytes = self.get_span_contents(span); let bytes = self.get_span_contents(span);
let mut error = None; let mut error = None;
@ -629,6 +702,15 @@ impl ParserWorkingSet {
); );
} }
return self.parse_block_expression(span); return self.parse_block_expression(span);
} else if bytes.starts_with(b"[") {
if shape != SyntaxShape::Table && shape != SyntaxShape::Any {
// FIXME: need better errors
return (
garbage(span),
Some(ParseError::Mismatch("not a table".into(), span)),
);
}
return self.parse_table_expression(span);
} }
match shape { match shape {