From bf1a23afcfe4c960471160c23e24d65e2bf3b73e Mon Sep 17 00:00:00 2001 From: JT Date: Tue, 6 Jul 2021 13:48:45 +1200 Subject: [PATCH] Add table parsing --- src/lex.rs | 12 ++++++--- src/parser.rs | 71 ++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 73 insertions(+), 10 deletions(-) diff --git a/src/lex.rs b/src/lex.rs index 8f7fb841f7..47220aa13b 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -43,6 +43,7 @@ pub enum LexMode { Normal, CommaIsSpace, NewlineIsSpace, + CommaAndNewlineIsSpace, } // A baseline token is terminated if it's not nested inside of a paired @@ -56,7 +57,8 @@ fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: LexMode) -> bo || c == b'|' || c == b';' || c == b'#' - || (c == b',' && lex_mode == LexMode::CommaIsSpace)) + || (c == b',' && lex_mode == LexMode::CommaIsSpace) + || (c == b',' && lex_mode == LexMode::CommaAndNewlineIsSpace)) } pub fn lex_item( @@ -237,7 +239,7 @@ pub fn lex( let idx = curr_offset; curr_offset += 1; - if lex_mode != LexMode::NewlineIsSpace { + if lex_mode != LexMode::NewlineIsSpace && lex_mode != LexMode::CommaAndNewlineIsSpace { output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1))); } } else if c == b'#' { @@ -263,7 +265,11 @@ pub fn lex( Span::new(start, curr_offset), )); } - } else if c == b' ' || c == b'\t' || (c == b',' && lex_mode == LexMode::CommaIsSpace) { + } else if c == b' ' + || c == b'\t' + || (c == b',' && lex_mode == LexMode::CommaIsSpace) + || (c == b',' && lex_mode == LexMode::CommaAndNewlineIsSpace) + { // If the next character is non-newline whitespace, skip it. curr_offset += 1; } else { diff --git a/src/parser.rs b/src/parser.rs index 627e7616ea..3bbf0b5bbf 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -104,6 +104,8 @@ pub enum Expr { Subexpression(Box), Block(Box), List(Vec), + Table(Vec, Vec>), + String(String), // FIXME: improve this in the future? Garbage, } @@ -563,6 +565,25 @@ impl ParserWorkingSet { ) } + pub fn parse_string(&mut self, span: Span) -> (Expression, Option) { + let bytes = self.get_span_contents(span); + + if let Ok(token) = String::from_utf8(bytes.into()) { + ( + Expression { + expr: Expr::String(token), + span, + }, + None, + ) + } else { + ( + garbage(span), + Some(ParseError::Mismatch("string".into(), span)), + ) + } + } + pub fn parse_table_expression(&mut self, span: Span) -> (Expression, Option) { let bytes = self.get_span_contents(span); let mut error = None; @@ -591,14 +612,12 @@ impl ParserWorkingSet { let source = &self.file_contents[..end]; - let (output, err) = lex(&source, start, crate::LexMode::CommaIsSpace); + let (output, err) = lex(&source, start, crate::LexMode::CommaAndNewlineIsSpace); error = error.or(err); let (output, err) = lite_parse(&output); error = error.or(err); - println!("{:?}", output.block); - match output.block.len() { 0 => ( Expression { @@ -628,10 +647,42 @@ impl ParserWorkingSet { error, ) } - _ => ( - garbage(span), - Some(ParseError::Mismatch("table".into(), span)), - ), + _ => { + let mut table_headers = vec![]; + + let (headers, err) = + self.parse_arg(output.block[0].commands[0].parts[0], SyntaxShape::Table); + error = error.or(err); + + if let Expression { + expr: Expr::List(headers), + .. + } = headers + { + table_headers = headers; + } + + let mut rows = vec![]; + for part in &output.block[1].commands[0].parts { + let (values, err) = self.parse_arg(*part, SyntaxShape::Table); + error = error.or(err); + if let Expression { + expr: Expr::List(values), + .. + } = values + { + rows.push(values); + } + } + + ( + Expression { + expr: Expr::Table(table_headers, rows), + span, + }, + error, + ) + } } } @@ -644,6 +695,11 @@ impl ParserWorkingSet { if bytes.starts_with(b"{") { start += 1; + } else { + return ( + garbage(span), + Some(ParseError::Mismatch("block".into(), span)), + ); } if bytes.ends_with(b"}") { end -= 1; @@ -734,6 +790,7 @@ impl ParserWorkingSet { ) } } + SyntaxShape::String => self.parse_string(span), SyntaxShape::Block => self.parse_block_expression(span), SyntaxShape::Any => { let shapes = vec![