From bf1a23afcfe4c960471160c23e24d65e2bf3b73e Mon Sep 17 00:00:00 2001
From: JT <jonathan.d.turner@gmail.com>
Date: Tue, 6 Jul 2021 13:48:45 +1200
Subject: [PATCH] Add table parsing

---
 src/lex.rs    | 12 ++++++---
 src/parser.rs | 71 ++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 73 insertions(+), 10 deletions(-)
diff --git a/src/lex.rs b/src/lex.rs
index 8f7fb841f7..47220aa13b 100644
--- a/src/lex.rs
+++ b/src/lex.rs
@@ -43,6 +43,7 @@ pub enum LexMode {
     Normal,
     CommaIsSpace,
     NewlineIsSpace,
+    CommaAndNewlineIsSpace,
 }
 
 // A baseline token is terminated if it's not nested inside of a paired
@@ -56,7 +57,8 @@ fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: LexMode) -> bo
             || c == b'|'
             || c == b';'
             || c == b'#'
-            || (c == b',' && lex_mode == LexMode::CommaIsSpace))
+            || (c == b',' && lex_mode == LexMode::CommaIsSpace)
+            || (c == b',' && lex_mode == LexMode::CommaAndNewlineIsSpace))
 }
 
 pub fn lex_item(
@@ -237,7 +239,7 @@ pub fn lex(
 
             let idx = curr_offset;
             curr_offset += 1;
-            if lex_mode != LexMode::NewlineIsSpace {
+            if lex_mode != LexMode::NewlineIsSpace && lex_mode != LexMode::CommaAndNewlineIsSpace {
                 output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1)));
             }
         } else if c == b'#' {
@@ -263,7 +265,11 @@ pub fn lex(
                     Span::new(start, curr_offset),
                 ));
             }
-        } else if c == b' ' || c == b'\t' || (c == b',' && lex_mode == LexMode::CommaIsSpace) {
+        } else if c == b' '
+            || c == b'\t'
+            || (c == b',' && lex_mode == LexMode::CommaIsSpace)
+            || (c == b',' && lex_mode == LexMode::CommaAndNewlineIsSpace)
+        {
             // If the next character is non-newline whitespace, skip it.
             curr_offset += 1;
         } else {
diff --git a/src/parser.rs b/src/parser.rs
index 627e7616ea..3bbf0b5bbf 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -104,6 +104,8 @@ pub enum Expr {
     Subexpression(Box<Block>),
     Block(Box<Block>),
     List(Vec<Expression>),
+    Table(Vec<Expression>, Vec<Vec<Expression>>),
+    String(String), // FIXME: improve this in the future?
     Garbage,
 }
 
@@ -563,6 +565,25 @@ impl ParserWorkingSet {
         )
     }
 
+    pub fn parse_string(&mut self, span: Span) -> (Expression, Option<ParseError>) {
+        let bytes = self.get_span_contents(span);
+
+        if let Ok(token) = String::from_utf8(bytes.into()) {
+            (
+                Expression {
+                    expr: Expr::String(token),
+                    span,
+                },
+                None,
+            )
+        } else {
+            (
+                garbage(span),
+                Some(ParseError::Mismatch("string".into(), span)),
+            )
+        }
+    }
+
     pub fn parse_table_expression(&mut self, span: Span) -> (Expression, Option<ParseError>) {
         let bytes = self.get_span_contents(span);
         let mut error = None;
@@ -591,14 +612,12 @@ impl ParserWorkingSet {
 
         let source = &self.file_contents[..end];
 
-        let (output, err) = lex(&source, start, crate::LexMode::CommaIsSpace);
+        let (output, err) = lex(&source, start, crate::LexMode::CommaAndNewlineIsSpace);
         error = error.or(err);
 
         let (output, err) = lite_parse(&output);
         error = error.or(err);
 
-        println!("{:?}", output.block);
-
         match output.block.len() {
             0 => (
                 Expression {
@@ -628,10 +647,42 @@ impl ParserWorkingSet {
                     error,
                 )
             }
-            _ => (
-                garbage(span),
-                Some(ParseError::Mismatch("table".into(), span)),
-            ),
+            _ => {
+                let mut table_headers = vec![];
+
+                let (headers, err) =
+                    self.parse_arg(output.block[0].commands[0].parts[0], SyntaxShape::Table);
+                error = error.or(err);
+
+                if let Expression {
+                    expr: Expr::List(headers),
+                    ..
+                } = headers
+                {
+                    table_headers = headers;
+                }
+
+                let mut rows = vec![];
+                for part in &output.block[1].commands[0].parts {
+                    let (values, err) = self.parse_arg(*part, SyntaxShape::Table);
+                    error = error.or(err);
+                    if let Expression {
+                        expr: Expr::List(values),
+                        ..
+                    } = values
+                    {
+                        rows.push(values);
+                    }
+                }
+
+                (
+                    Expression {
+                        expr: Expr::Table(table_headers, rows),
+                        span,
+                    },
+                    error,
+                )
+            }
         }
     }
 
@@ -644,6 +695,11 @@ impl ParserWorkingSet {
 
         if bytes.starts_with(b"{") {
             start += 1;
+        } else {
+            return (
+                garbage(span),
+                Some(ParseError::Mismatch("block".into(), span)),
+            );
         }
         if bytes.ends_with(b"}") {
             end -= 1;
@@ -734,6 +790,7 @@ impl ParserWorkingSet {
                     )
                 }
             }
+            SyntaxShape::String => self.parse_string(span),
             SyntaxShape::Block => self.parse_block_expression(span),
             SyntaxShape::Any => {
                 let shapes = vec![