diff --git a/src/lex.rs b/src/lex.rs index 7f4b661e5a..886a81ea20 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -38,35 +38,15 @@ impl BlockKind { } } -#[derive(PartialEq, Eq, Debug, Clone)] -pub enum LexMode { - Normal, - Custom { - whitespace: Vec, - special: Vec, - }, -} - -impl LexMode { - pub fn whitespace_contains(&self, b: u8) -> bool { - match self { - LexMode::Custom { ref whitespace, .. } => whitespace.contains(&b), - _ => false, - } - } - - pub fn special_contains(&self, b: u8) -> bool { - match self { - LexMode::Custom { ref special, .. } => special.contains(&b), - _ => false, - } - } -} - // A baseline token is terminated if it's not nested inside of a paired // delimiter and the next character is one of: `|`, `;`, `#` or any // whitespace. -fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: &LexMode) -> bool { +fn is_item_terminator( + block_level: &[BlockKind], + c: u8, + additional_whitespace: &[u8], + special_tokens: &[u8], +) -> bool { block_level.is_empty() && (c == b' ' || c == b'\t' @@ -74,26 +54,23 @@ fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: &LexMode) -> b || c == b'|' || c == b';' || c == b'#' - || lex_mode.whitespace_contains(c) - || lex_mode.special_contains(c)) + || additional_whitespace.contains(&c) + || special_tokens.contains(&c)) } // A special token is one that is a byte that stands alone as its own token. For example // when parsing a signature you may want to have `:` be able to separate tokens and also // to be handled as its own token to notify you you're about to parse a type in the example // `foo:bar` -fn is_special_item(block_level: &[BlockKind], c: u8, lex_mode: &LexMode) -> bool { - block_level.is_empty() - && (match lex_mode { - LexMode::Custom { special, .. } => special.contains(&c), - _ => false, - }) +fn is_special_item(block_level: &[BlockKind], c: u8, special_tokens: &[u8]) -> bool { + block_level.is_empty() && special_tokens.contains(&c) } pub fn lex_item( input: &[u8], curr_offset: &mut usize, - lex_mode: &LexMode, + additional_whitespace: &[u8], + special_tokens: &[u8], ) -> (Span, Option) { // This variable tracks the starting character of a string literal, so that // we remain inside the string literal lexer mode until we encounter the @@ -128,20 +105,20 @@ pub fn lex_item( quote_start = None; } } else if c == b'#' { - if is_item_terminator(&block_level, c, &lex_mode) { + if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) { break; } in_comment = true; } else if c == b'\n' { in_comment = false; - if is_item_terminator(&block_level, c, &lex_mode) { + if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) { break; } } else if in_comment { - if is_item_terminator(&block_level, c, &lex_mode) { + if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) { break; } - } else if is_special_item(&block_level, c, &lex_mode) && token_start == *curr_offset { + } else if is_special_item(&block_level, c, special_tokens) && token_start == *curr_offset { *curr_offset += 1; break; } else if c == b'\'' || c == b'"' { @@ -172,7 +149,7 @@ pub fn lex_item( if let Some(BlockKind::Paren) = block_level.last() { let _ = block_level.pop(); } - } else if is_item_terminator(&block_level, c, &lex_mode) { + } else if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) { break; } @@ -214,7 +191,8 @@ pub fn lex_item( pub fn lex( input: &[u8], span_offset: usize, - lex_mode: &LexMode, + additional_whitespace: &[u8], + special_tokens: &[u8], ) -> (Vec, Option) { let mut error = None; @@ -271,7 +249,7 @@ pub fn lex( let idx = curr_offset; curr_offset += 1; - if !lex_mode.whitespace_contains(c) { + if !additional_whitespace.contains(&c) { output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1))); } } else if c == b'#' { @@ -297,13 +275,18 @@ pub fn lex( Span::new(start, curr_offset), )); } - } else if c == b' ' || c == b'\t' || lex_mode.whitespace_contains(c) { + } else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) { // If the next character is non-newline whitespace, skip it. curr_offset += 1; } else { // Otherwise, try to consume an unclassified token. - let (span, err) = lex_item(input, &mut curr_offset, &lex_mode); + let (span, err) = lex_item( + input, + &mut curr_offset, + additional_whitespace, + special_tokens, + ); if error.is_none() { error = err; } @@ -322,7 +305,7 @@ mod lex_tests { fn lex_basic() { let file = b"let x = 4"; - let output = lex(file, 0, &LexMode::Normal); + let output = lex(file, 0, &[], &[]); assert!(output.1.is_none()); } @@ -331,7 +314,7 @@ mod lex_tests { fn lex_newline() { let file = b"let x = 300\nlet y = 500;"; - let output = lex(file, 0, &LexMode::Normal); + let output = lex(file, 0, &[], &[]); println!("{:#?}", output.0); assert!(output.0.contains(&Token { @@ -344,7 +327,7 @@ mod lex_tests { fn lex_empty() { let file = b""; - let output = lex(file, 0, &LexMode::Normal); + let output = lex(file, 0, &[], &[]); assert!(output.0.is_empty()); assert!(output.1.is_none()); diff --git a/src/lib.rs b/src/lib.rs index 26380f90ac..a884dab7eb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ mod span; pub use declaration::Declaration; pub use eval::Engine; -pub use lex::{lex, LexMode, Token, TokenContents}; +pub use lex::{lex, Token, TokenContents}; pub use lite_parse::{lite_parse, LiteBlock, LiteCommand, LiteStatement}; pub use parse_error::ParseError; pub use parser::{ diff --git a/src/lite_parse.rs b/src/lite_parse.rs index 6a28638391..7590b5eb19 100644 --- a/src/lite_parse.rs +++ b/src/lite_parse.rs @@ -128,7 +128,7 @@ mod tests { use crate::{lex, lite_parse, LiteBlock, ParseError, Span}; fn lite_parse_helper(input: &[u8]) -> Result { - let (output, err) = lex(input, 0, &crate::LexMode::Normal); + let (output, err) = lex(input, 0, &[], &[]); if let Some(err) = err { return Err(err); } diff --git a/src/parser.rs b/src/parser.rs index 38977c4719..4b38e9df02 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -798,7 +798,7 @@ impl ParserWorkingSet { let source = self.get_span_contents(span); - let (output, err) = lex(&source, start, &crate::LexMode::Normal); + let (output, err) = lex(&source, start, &[], &[]); error = error.or(err); let (output, err) = lite_parse(&output); @@ -958,14 +958,7 @@ impl ParserWorkingSet { let span = Span { start, end }; let source = &self.file_contents[..span.end]; - let (output, err) = lex( - &source, - span.start, - &crate::LexMode::Custom { - whitespace: vec![b'\n', b','], - special: vec![b':', b'?'], - }, - ); + let (output, err) = lex(&source, span.start, &[b'\n', b','], &[b':', b'?']); error = error.or(err); let mut args: Vec = vec![]; @@ -1086,14 +1079,7 @@ impl ParserWorkingSet { let span = Span { start, end }; let source = &self.file_contents[..span.end]; - let (output, err) = lex( - &source, - span.start, - &crate::LexMode::Custom { - whitespace: vec![b'\n', b','], - special: vec![], - }, - ); + let (output, err) = lex(&source, span.start, &[b'\n', b','], &[]); error = error.or(err); let (output, err) = lite_parse(&output); @@ -1157,14 +1143,7 @@ impl ParserWorkingSet { let source = &self.file_contents[..end]; - let (output, err) = lex( - &source, - start, - &crate::LexMode::Custom { - whitespace: vec![b'\n', b','], - special: vec![], - }, - ); + let (output, err) = lex(&source, start, &[b'\n', b','], &[]); error = error.or(err); let (output, err) = lite_parse(&output); @@ -1254,7 +1233,7 @@ impl ParserWorkingSet { let source = &self.file_contents[..end]; - let (output, err) = lex(&source, start, &crate::LexMode::Normal); + let (output, err) = lex(&source, start, &[], &[]); error = error.or(err); let (output, err) = lite_parse(&output); @@ -1718,7 +1697,7 @@ impl ParserWorkingSet { pub fn parse_file(&mut self, fname: &str, contents: Vec) -> (Block, Option) { let mut error = None; - let (output, err) = lex(&contents, 0, &crate::LexMode::Normal); + let (output, err) = lex(&contents, 0, &[], &[]); error = error.or(err); self.add_file(fname.into(), contents); @@ -1737,7 +1716,7 @@ impl ParserWorkingSet { self.add_file("source".into(), source.into()); - let (output, err) = lex(source, 0, &crate::LexMode::Normal); + let (output, err) = lex(source, 0, &[], &[]); error = error.or(err); let (output, err) = lite_parse(&output);