Add list parsing

2025-01-15 22:54:16 +00:00 · 2021-07-06 10:58:56 +12:00 · 2021-07-06 10:58:56 +12:00 · 04a6a4f860
commit 04a6a4f860
parent 666bee61f7
3 changed files with 109 additions and 15 deletions
--- a/src/lex.rs
+++ b/src/lex.rs
@ -38,20 +38,32 @@ impl BlockKind {
    }
 }
-#[derive(PartialEq, Eq, Debug)]
+#[derive(PartialEq, Eq, Debug, Clone, Copy)]
 pub enum LexMode {
    Normal,
    CommaIsSpace,
    NewlineIsSpace,
 }
 // A baseline token is terminated if it's not nested inside of a paired
 // delimiter and the next character is one of: `|`, `;`, `#` or any
 // whitespace.
-fn is_item_terminator(block_level: &[BlockKind], c: u8) -> bool {
+fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: LexMode) -> bool {
    block_level.is_empty()
-        && (c == b' ' || c == b'\t' || c == b'\n' || c == b'|' || c == b';' || c == b'#')
+        && (c == b' '
            || c == b'\t'
            || c == b'\n'
            || c == b'|'
            || c == b';'
            || c == b'#'
            || (c == b',' && lex_mode == LexMode::CommaIsSpace))
 }
-pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseError>) {
+pub fn lex_item(
    input: &[u8],
    curr_offset: &mut usize,
    lex_mode: LexMode,
 ) -> (Span, Option<ParseError>) {
    // This variable tracks the starting character of a string literal, so that
    // we remain inside the string literal lexer mode until we encounter the
    // closing quote.
@ -85,17 +97,17 @@ pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseErr
                quote_start = None;
            }
        } else if c == b'#' {
-            if is_item_terminator(&block_level, c) {
+            if is_item_terminator(&block_level, c, lex_mode) {
                break;
            }
            in_comment = true;
        } else if c == b'\n' {
            in_comment = false;
-            if is_item_terminator(&block_level, c) {
+            if is_item_terminator(&block_level, c, lex_mode) {
                break;
            }
        } else if in_comment {
-            if is_item_terminator(&block_level, c) {
+            if is_item_terminator(&block_level, c, lex_mode) {
                break;
            }
        } else if c == b'\'' || c == b'"' {
@ -126,7 +138,7 @@ pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseErr
            if let Some(BlockKind::Paren) = block_level.last() {
                let _ = block_level.pop();
            }
-        } else if is_item_terminator(&block_level, c) {
+        } else if is_item_terminator(&block_level, c, lex_mode) {
            break;
        }
@ -225,7 +237,7 @@ pub fn lex(
            let idx = curr_offset;
            curr_offset += 1;
-            if lex_mode == LexMode::Normal {
+            if lex_mode != LexMode::NewlineIsSpace {
                output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1)));
            }
        } else if c == b'#' {
@ -251,13 +263,13 @@ pub fn lex(
                    Span::new(start, curr_offset),
                ));
            }
-        } else if c == b' ' || c == b'\t' {
+        } else if c == b' ' || c == b'\t' || (c == b',' && lex_mode == LexMode::CommaIsSpace) {
            // If the next character is non-newline whitespace, skip it.
            curr_offset += 1;
        } else {
            // Otherwise, try to consume an unclassified token.
-            let (span, err) = lex_item(input, &mut curr_offset);
+            let (span, err) = lex_item(input, &mut curr_offset, lex_mode);
            if error.is_none() {
                error = err;
            }
--- a/src/main.rs
+++ b/src/main.rs
@ -7,10 +7,10 @@ fn main() -> std::io::Result<()> {
        let sig = Signature::build("foo").named("--jazz", SyntaxShape::Int, "jazz!!", Some('j'));
        working_set.add_decl((b"foo").to_vec(), sig);
-        let file = std::fs::read(&path)?;
+        //let file = std::fs::read(&path)?;
-        let (output, err) = working_set.parse_file(&path, file);
+        //let (output, err) = working_set.parse_file(&path, file);
-        //let (output, err) = working_set.parse_source(path.as_bytes());
+        let (output, err) = working_set.parse_source(path.as_bytes());
-        println!("{}", output.len());
+        println!("{:#?}", output);
        println!("error: {:?}", err);
        // println!("{}", size_of::<Statement>());
--- a/src/parser.rs
+++ b/src/parser.rs
@ -103,6 +103,7 @@ pub enum Expr {
    BinaryOp(Box<Expression>, Box<Expression>, Box<Expression>), //lhs, op, rhs
    Subexpression(Box<Block>),
    Block(Box<Block>),
    List(Vec<Expression>),
    Garbage,
 }
@ -562,6 +563,78 @@ impl ParserWorkingSet {
        )
    }
    pub fn parse_table_expression(&mut self, span: Span) -> (Expression, Option<ParseError>) {
        let bytes = self.get_span_contents(span);
        let mut error = None;
        let mut start = span.start;
        let mut end = span.end;
        if bytes.starts_with(b"[") {
            start += 1;
        }
        if bytes.ends_with(b"]") {
            end -= 1;
        } else {
            error = error.or_else(|| {
                Some(ParseError::Unclosed(
                    "]".into(),
                    Span {
                        start: end,
                        end: end + 1,
                    },
                ))
            });
        }
        let span = Span { start, end };
        let source = &self.file_contents[..end];
        let (output, err) = lex(&source, start, crate::LexMode::CommaIsSpace);
        error = error.or(err);
        let (output, err) = lite_parse(&output);
        error = error.or(err);
        println!("{:?}", output.block);
        match output.block.len() {
            0 => (
                Expression {
                    expr: Expr::List(vec![]),
                    span,
                },
                None,
            ),
            1 => {
                // List
                let mut args = vec![];
                for arg in &output.block[0].commands {
                    for part in &arg.parts {
                        let (arg, err) = self.parse_arg(*part, SyntaxShape::Any);
                        error = error.or(err);
                        args.push(arg);
                    }
                }
                (
                    Expression {
                        expr: Expr::List(args),
                        span,
                    },
                    error,
                )
            }
            _ => (
                garbage(span),
                Some(ParseError::Mismatch("table".into(), span)),
            ),
        }
    }
    pub fn parse_block_expression(&mut self, span: Span) -> (Expression, Option<ParseError>) {
        let bytes = self.get_span_contents(span);
        let mut error = None;
@ -629,6 +702,15 @@ impl ParserWorkingSet {
                );
            }
            return self.parse_block_expression(span);
        } else if bytes.starts_with(b"[") {
            if shape != SyntaxShape::Table && shape != SyntaxShape::Any {
                // FIXME: need better errors
                return (
                    garbage(span),
                    Some(ParseError::Mismatch("not a table".into(), span)),
                );
            }
            return self.parse_table_expression(span);
        }
        match shape {