Document lexer (#2865)

* Update dependencies * Document the lexer and lightly improve its names The bulk of this pull request adds a substantial amount of new inline documentation for the lexer. Along the way, I made a few minor changes to the names in the lexer, most of which were internal. The main change that affects other files is renaming `group` to `block`, since the function is actually parsing a block (a list of groups). * Fix rustfmt * Update lock Co-authored-by: Jonathan Turner <jonathandturner@users.noreply.github.com> Co-authored-by: Jonathan Turner <jonathan.d.turner@gmail.com>
2024-12-27 13:33:16 +00:00 · 2021-01-06 19:03:00 -08:00 · 2021-01-06 19:03:00 -08:00 · f410fb6689
commit f410fb6689
parent eb62fd466e
12 changed files with 262 additions and 121 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -282,7 +282,7 @@ dependencies = [
 "memchr",
 "num_cpus",
 "once_cell",
- "pin-project-lite 0.2.0",
+ "pin-project-lite 0.2.1",
 "pin-utils",
 "slab 0.4.2",
 "wasm-bindgen-futures 0.4.19",
@ -801,9 +801,9 @@ dependencies = [

 [[package]]
 name = "const_fn"
-version = "0.4.4"
+version = "0.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd51eab21ab4fd6a3bf889e2d0958c0a6e3a61ad04260325e919e652a2a62826"
+checksum = "28b9d6de7f49e22cf97ad17fc4036ece69300032f45f78f30b4a4482cdc3f4a6"

 [[package]]
 name = "constant_time_eq"
@ -1742,7 +1742,7 @@ dependencies = [
 "futures-io",
 "memchr",
 "parking",
- "pin-project-lite 0.2.0",
+ "pin-project-lite 0.2.1",
 "waker-fn",
 ]

@ -1813,7 +1813,7 @@ dependencies = [
 "futures-sink",
 "futures-task",
 "memchr",
- "pin-project 1.0.2",
+ "pin-project 1.0.3",
 "pin-utils",
 "proc-macro-hack",
 "proc-macro-nested",
@ -2322,7 +2322,7 @@ dependencies = [
 "httparse",
 "httpdate",
 "itoa",
- "pin-project 1.0.2",
+ "pin-project 1.0.3",
 "socket2",
 "tokio 0.2.24",
 "tower-service",
@ -4016,11 +4016,11 @@ dependencies = [

 [[package]]
 name = "pin-project"
-version = "1.0.2"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ccc2237c2c489783abd8c4c80e5450fc0e98644555b1364da68cc29aa151ca7"
+checksum = "5a83804639aad6ba65345661744708855f9fbcb71176ea8d28d05aeb11d975e7"
 dependencies = [
- "pin-project-internal 1.0.2",
+ "pin-project-internal 1.0.3",
 ]

 [[package]]
@ -4036,9 +4036,9 @@ dependencies = [

 [[package]]
 name = "pin-project-internal"
-version = "1.0.2"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8e8d2bf0b23038a4424865103a4df472855692821aab4e4f5c3312d461d9e5f"
+checksum = "b7bcc46b8f73443d15bc1c5fecbb315718491fa9187fa483f0e359323cde8b3a"
 dependencies = [
 "proc-macro2",
 "quote",
@ -4053,9 +4053,9 @@ checksum = "c917123afa01924fc84bb20c4c03f004d9c38e5127e3c039bbf7f4b9c76a2f6b"

 [[package]]
 name = "pin-project-lite"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b063f57ec186e6140e2b8b6921e5f1bd89c7356dda5b33acc5401203ca6131c"
+checksum = "e36743d754ccdf9954c2e352ce2d4b106e024c814f6499c2dadff80da9a442d8"

 [[package]]
 name = "pin-utils"
@ -4210,9 +4210,9 @@ dependencies = [

 [[package]]
 name = "ptree"
-version = "0.3.0"
+version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "828735579562f9be5e3a605016076cc47d7da3c29bf40aa44da28f161cb7f3c0"
+checksum = "95fd400102d16e4e90e0735c0eb1808ae569a4e62fb8e65a7d1e700611cae6ae"
 dependencies = [
 "ansi_term 0.12.1",
 "atty",
@ -4547,7 +4547,7 @@ dependencies = [
 "mime_guess",
 "native-tls",
 "percent-encoding 2.1.0",
- "pin-project-lite 0.2.0",
+ "pin-project-lite 0.2.1",
 "serde 1.0.118",
 "serde_urlencoded 0.7.0",
 "tokio 0.2.24",
@ -4851,7 +4851,7 @@ version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6"
 dependencies = [
- "semver-parser 0.10.1",
+ "semver-parser 0.10.2",
 ]

 [[package]]
@ -4862,9 +4862,9 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"

 [[package]]
 name = "semver-parser"
-version = "0.10.1"
+version = "0.10.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42ef146c2ad5e5f4b037cd6ce2ebb775401729b19a82040c1beac9d36c7d1428"
+checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7"
 dependencies = [
 "pest",
 ]
@ -4985,9 +4985,9 @@ dependencies = [

 [[package]]
 name = "serde_yaml"
-version = "0.8.14"
+version = "0.8.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7baae0a99f1a324984bcdc5f0718384c1f69775f1c7eec8b859b71b443e3fd7"
+checksum = "971be8f6e4d4a47163b405a3df70d14359186f9ab0f3a3ec37df144ca1ce089f"
 dependencies = [
 "dtoa",
 "linked-hash-map 0.5.3",
@ -5787,7 +5787,7 @@ checksum = "9f47026cdc4080c07e49b37087de021820269d996f581aac150ef9e5583eefe3"
 dependencies = [
 "cfg-if 1.0.0",
 "log 0.4.11",
- "pin-project-lite 0.2.0",
+ "pin-project-lite 0.2.1",
 "tracing-core",
 ]

--- a/crates/nu-cli/src/cli.rs
+++ b/crates/nu-cli/src/cli.rs
@ -408,7 +408,7 @@ mod tests {
    #[quickcheck]
    fn quickcheck_parse(data: String) -> bool {
        let (tokens, err) = nu_parser::lex(&data, 0);
-        let (lite_block, err2) = nu_parser::group(tokens);
+        let (lite_block, err2) = nu_parser::block(tokens);
        if err.is_none() && err2.is_none() {
            let context = crate::evaluation_context::EvaluationContext::basic().unwrap();
            let _ = nu_parser::classify_block(&lite_block, &context.scope);
--- a/crates/nu-cli/src/completion/engine.rs
+++ b/crates/nu-cli/src/completion/engine.rs
@ -256,7 +256,7 @@ pub fn completion_location(line: &str, block: &Block, pos: usize) -> Vec<Complet
 mod tests {
    use super::*;

-    use nu_parser::{classify_block, group, lex, ParserScope};
+    use nu_parser::{block, classify_block, lex, ParserScope};
    use nu_protocol::{Signature, SyntaxShape};

    #[derive(Clone, Debug)]
@ -307,7 +307,7 @@ mod tests {
            pos: usize,
        ) -> Vec<LocationType> {
            let (tokens, _) = lex(line, 0);
-            let (lite_block, _) = group(tokens);
+            let (lite_block, _) = block(tokens);

            scope.enter_scope();
            let (block, _) = classify_block(&lite_block, scope);
--- a/crates/nu-cli/src/examples.rs
+++ b/crates/nu-cli/src/examples.rs
@ -207,7 +207,7 @@ fn parse_line(line: &str, ctx: &EvaluationContext) -> Result<ClassifiedBlock, Sh
    if let Some(err) = err {
        return Err(err.into());
    }
-    let (lite_result, err) = nu_parser::group(lite_result);
+    let (lite_result, err) = nu_parser::block(lite_result);
    if let Some(err) = err {
        return Err(err.into());
    }
--- a/crates/nu-cli/src/shell/helper.rs
+++ b/crates/nu-cli/src/shell/helper.rs
@ -123,7 +123,7 @@ impl rustyline::validate::Validator for NuValidator {
            }
        }

-        let (_, err) = nu_parser::group(tokens);
+        let (_, err) = nu_parser::block(tokens);

        if let Some(err) = err {
            if let nu_errors::ParseErrorReason::Eof { .. } = err.reason() {
--- a/crates/nu-errors/Cargo.toml
+++ b/crates/nu-errors/Cargo.toml
@ -17,14 +17,14 @@ bigdecimal = {version = "0.2.0", features = ["serde"]}
 codespan-reporting = {version = "0.11.0", features = ["serialization"]}
 derive-new = "0.5.8"
 getset = "0.1.1"
-num-bigint = {version = "0.3.0", features = ["serde"]}
-num-traits = "0.2.12"
-serde = {version = "1.0.115", features = ["derive"]}
+num-bigint = {version = "0.3.1", features = ["serde"]}
+num-traits = "0.2.14"
+serde = {version = "1.0.118", features = ["derive"]}

 # implement conversions
 glob = "0.3.0"
-serde_json = "1.0.57"
-serde_yaml = "0.8.13"
-toml = "0.5.6"
+serde_json = "1.0.61"
+serde_yaml = "0.8.15"
+toml = "0.5.8"

 [build-dependencies]
--- a/crates/nu-parser/Cargo.toml
+++ b/crates/nu-parser/Cargo.toml
@ -12,12 +12,12 @@ version = "0.25.1"
 bigdecimal = {version = "0.2.0", features = ["serde"]}
 codespan-reporting = "0.11.0"
 derive-new = "0.5.8"
-indexmap = {version = "1.6.0", features = ["serde-1"]}
+indexmap = {version = "1.6.1", features = ["serde-1"]}
 log = "0.4.11"
-num-bigint = {version = "0.3.0", features = ["serde"]}
-num-traits = "0.2.12"
-serde = "1.0.115"
-shellexpand = "2.0.0"
+num-bigint = {version = "0.3.1", features = ["serde"]}
+num-traits = "0.2.14"
+serde = "1.0.118"
+shellexpand = "2.1.0"

 nu-errors = {version = "0.25.1", path = "../nu-errors"}
 nu-protocol = {version = "0.25.1", path = "../nu-protocol"}
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@ -20,12 +20,21 @@ impl Token {

 #[derive(Debug)]
 pub enum TokenContents {
-    Bare(String),
+    /// A baseline token is an atomic chunk of source code. This means that the
+    /// token contains the entirety of string literals, as well as the entirety
+    /// of sections delimited by paired delimiters.
+    ///
+    /// For example, if the token begins with `{`, the baseline token continues
+    /// until the closing `}` (after taking comments and string literals into
+    /// consideration).
+    Baseline(String),
    Pipe,
    Semicolon,
    EOL,
 }

+/// A `LiteCommand` is a list of words that will get meaning when processed by
+/// the parser.
 #[derive(Debug, Clone)]
 pub struct LiteCommand {
    pub parts: Vec<Spanned<String>>,
@ -39,6 +48,11 @@ impl LiteCommand {
    pub fn is_empty(&self) -> bool {
        self.parts.is_empty()
    }
+
+    pub fn has_content(&self) -> bool {
+        !self.is_empty()
+    }
+
    pub fn push(&mut self, item: Spanned<String>) {
        self.parts.push(item)
    }
@ -60,6 +74,7 @@ impl LiteCommand {
    }
 }

+/// A `LitePipeline` is a series of `LiteCommand`s, separated by `|`.
 #[derive(Debug, Clone)]
 pub struct LitePipeline {
    pub commands: Vec<LiteCommand>,
@ -75,12 +90,19 @@ impl LitePipeline {
    pub fn new() -> Self {
        Self { commands: vec![] }
    }
+
    pub fn is_empty(&self) -> bool {
        self.commands.is_empty()
    }
+
+    pub fn has_content(&self) -> bool {
+        !self.commands.is_empty()
+    }
+
    pub fn push(&mut self, item: LiteCommand) {
        self.commands.push(item)
    }
+
    pub(crate) fn span(&self) -> Span {
        let start = if !self.commands.is_empty() {
            self.commands[0].span().start()
@ -96,6 +118,7 @@ impl LitePipeline {
    }
 }

+/// A `LiteGroup` is a series of `LitePipeline`s, separated by `;`.
 #[derive(Debug, Clone)]
 pub struct LiteGroup {
    pub pipelines: Vec<LitePipeline>,
@ -111,12 +134,19 @@ impl LiteGroup {
    pub fn new() -> Self {
        Self { pipelines: vec![] }
    }
+
    pub fn is_empty(&self) -> bool {
        self.pipelines.is_empty()
    }
+
+    pub fn has_content(&self) -> bool {
+        !self.pipelines.is_empty()
+    }
+
    pub fn push(&mut self, item: LitePipeline) {
        self.pipelines.push(item)
    }
+
    pub fn is_comment(&self) -> bool {
        if !self.is_empty()
            && !self.pipelines[0].is_empty()
@ -128,6 +158,7 @@ impl LiteGroup {
            false
        }
    }
+
    #[cfg(test)]
    pub(crate) fn span(&self) -> Span {
        let start = if !self.pipelines.is_empty() {
@ -144,6 +175,7 @@ impl LiteGroup {
    }
 }

+/// A `LiteBlock` is a series of `LiteGroup`s, separated by newlines.
 #[derive(Debug, Clone)]
 pub struct LiteBlock {
    pub block: Vec<LiteGroup>,
@ -153,12 +185,15 @@ impl LiteBlock {
    pub fn new(block: Vec<LiteGroup>) -> Self {
        Self { block }
    }
+
    pub fn is_empty(&self) -> bool {
        self.block.is_empty()
    }
+
    pub fn push(&mut self, item: LiteGroup) {
        self.block.push(item)
    }
+
    #[cfg(test)]
    pub(crate) fn span(&self) -> Span {
        let start = if !self.block.is_empty() {
@ -173,29 +208,6 @@ impl LiteBlock {
            Span::new(start, 0)
        }
    }
-    pub fn head(&self) -> Option<Spanned<String>> {
-        if let Some(group) = self.block.get(0) {
-            if let Some(pipeline) = group.pipelines.get(0) {
-                if let Some(command) = pipeline.commands.get(0) {
-                    if let Some(head) = command.parts.get(0) {
-                        return Some(head.clone());
-                    }
-                }
-            }
-        }
-        None
-    }
-    pub fn remove_head(&mut self) {
-        if let Some(group) = self.block.get_mut(0) {
-            if let Some(pipeline) = group.pipelines.get_mut(0) {
-                if let Some(command) = pipeline.commands.get_mut(0) {
-                    if !command.parts.is_empty() {
-                        command.parts.remove(0);
-                    }
-                }
-            }
-        }
-    }
 }

 #[derive(Clone, Copy)]
@ -205,9 +217,9 @@ enum BlockKind {
    SquareBracket,
 }

-impl From<BlockKind> for char {
-    fn from(bk: BlockKind) -> char {
-        match bk {
+impl BlockKind {
+    fn closing(self) -> char {
+        match self {
            BlockKind::Paren => ')',
            BlockKind::SquareBracket => ']',
            BlockKind::CurlyBracket => '}',
@ -215,93 +227,143 @@ impl From<BlockKind> for char {
    }
 }

-/// Finds the extents of a bare (un-classified) token, returning the string with its associated span,
-/// along with any parse error that was discovered along the way.
-/// Bare tokens are unparsed content separated by spaces or a command separator (like pipe or semicolon)
-/// Bare tokens may be surrounded by quotes (single, double, or backtick) or braces (square, paren, curly)
-pub fn bare(src: &mut Input, span_offset: usize) -> (Spanned<String>, Option<ParseError>) {
-    let mut bare = String::new();
+/// Finds the extents of a basline token, returning the string with its
+/// associated span, along with any parse error that was discovered along the
+/// way.
+///
+/// Baseline tokens are unparsed content separated by spaces or a command
+/// separator (like pipe or semicolon) Baseline tokens may be surrounded by
+/// quotes (single, double, or backtick) or braces (square, paren, curly)
+///
+/// Baseline tokens may be further processed based on the needs of the syntax
+/// shape that encounters them. They are still lightly lexed. For example, if a
+/// baseline token begins with `{`, the entire token will continue until the
+/// closing `}`, taking comments into consideration.
+pub fn baseline(src: &mut Input, span_offset: usize) -> (Spanned<String>, Option<ParseError>) {
+    let mut token_contents = String::new();
    let start_offset = if let Some((pos, _)) = src.peek() {
        *pos
    } else {
        0
    };

-    let mut inside_quote: Option<char> = None;
+    // This variable tracks the starting character of a string literal, so that
+    // we remain inside the string literal lexer mode until we encounter the
+    // closing quote.
+    let mut quote_start: Option<char> = None;
+
+    // This Vec tracks paired delimiters
    let mut block_level: Vec<BlockKind> = vec![];

+    // A baseline token is terminated if it's not nested inside of a paired
+    // delimiter and the next character is one of: `|`, `;`, `#` or any
+    // whitespace.
+    fn is_termination(block_level: &[BlockKind], c: char) -> bool {
+        block_level.is_empty() && (c.is_whitespace() || c == '|' || c == ';' || c == '#')
+    }
+
+    // The process of slurping up a baseline token repeats:
+    //
+    // - String literal, which begins with `'`, `"` or `\``, and continues until
+    //   the same character is encountered again.
+    // - Delimiter pair, which begins with `[`, `(`, or `{`, and continues until
+    //   the matching closing delimiter is found, skipping comments and string
+    //   literals.
+    // - When not nested inside of a delimiter pair, when a terminating
+    //   character (whitespace, `|`, `;` or `#`) is encountered, the baseline
+    //   token is done.
+    // - Otherwise, accumulate the character into the current baseline token.
    while let Some((_, c)) = src.peek() {
        let c = *c;
-        if inside_quote.is_some() {
-            if Some(c) == inside_quote {
-                inside_quote = None;
+
+        if quote_start.is_some() {
+            // If we encountered the closing quote character for the current
+            // string, we're done with the current string.
+            if Some(c) == quote_start {
+                quote_start = None;
            }
        } else if c == '\'' || c == '"' || c == '`' {
-            inside_quote = Some(c);
+            // We encountered the opening quote of a string literal.
+            quote_start = Some(c);
        } else if c == '[' {
+            // We encountered an opening `[` delimiter.
            block_level.push(BlockKind::SquareBracket);
        } else if c == ']' {
+            // We encountered a closing `]` delimiter. Pop off the opening `[`
+            // delimiter.
            if let Some(BlockKind::SquareBracket) = block_level.last() {
                let _ = block_level.pop();
            }
        } else if c == '{' {
+            // We encountered an opening `{` delimiter.
            block_level.push(BlockKind::CurlyBracket);
        } else if c == '}' {
+            // We encountered a closing `}` delimiter. Pop off the opening `{`.
            if let Some(BlockKind::CurlyBracket) = block_level.last() {
                let _ = block_level.pop();
            }
        } else if c == '(' {
+            // We enceountered an opening `(` delimiter.
            block_level.push(BlockKind::Paren);
        } else if c == ')' {
+            // We encountered a closing `)` delimiter. Pop off the opening `(`.
            if let Some(BlockKind::Paren) = block_level.last() {
                let _ = block_level.pop();
            }
-        } else if block_level.is_empty() && (c.is_whitespace() || c == '|' || c == ';' || c == '#')
-        {
+        } else if is_termination(&block_level, c) {
            break;
        }
-        bare.push(c);
+
+        // Otherwise, accumulate the character into the current token.
+        token_contents.push(c);
+
+        // Consume the character.
        let _ = src.next();
    }

    let span = Span::new(
        start_offset + span_offset,
-        start_offset + span_offset + bare.len(),
+        start_offset + span_offset + token_contents.len(),
    );

+    // If there is still unclosed opening delimiters, close them and add
+    // synthetic closing characters to the accumulated token.
    if let Some(block) = block_level.last() {
-        let delim: char = (*block).into();
+        let delim: char = (*block).closing();
        let cause = ParseError::unexpected_eof(delim.to_string(), span);

        while let Some(bk) = block_level.pop() {
-            bare.push(bk.into());
+            token_contents.push(bk.closing());
        }

-        return (bare.spanned(span), Some(cause));
+        return (token_contents.spanned(span), Some(cause));
    }

-    if let Some(delimiter) = inside_quote {
+    if let Some(delimiter) = quote_start {
        // The non-lite parse trims quotes on both sides, so we add the expected quote so that
        // anyone wanting to consume this partial parse (e.g., completions) will be able to get
        // correct information from the non-lite parse.
-        bare.push(delimiter);
+        token_contents.push(delimiter);

        return (
-            bare.spanned(span),
+            token_contents.spanned(span),
            Some(ParseError::unexpected_eof(delimiter.to_string(), span)),
        );
    }

-    if bare.is_empty() {
+    // If we didn't accumulate any characters, it's an unexpected error.
+    if token_contents.is_empty() {
        return (
-            bare.spanned(span),
+            token_contents.spanned(span),
            Some(ParseError::unexpected_eof("command".to_string(), span)),
        );
    }

-    (bare.spanned(span), None)
+    (token_contents.spanned(span), None)
 }

+/// We encountered a `#` character. Keep consuming characters until we encounter
+/// a newline character (but don't consume it).
 fn skip_comment(input: &mut Input) {
    while let Some((_, c)) = input.peek() {
        if *c == '\n' || *c == '\r' {
@ -311,39 +373,75 @@ fn skip_comment(input: &mut Input) {
    }
 }

-pub fn group(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
+/// Try to parse a list of tokens into a block.
+pub fn block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
+    // Accumulate chunks of tokens into groups.
    let mut groups = vec![];
+
+    // The current group
    let mut group = LiteGroup::new();
+
+    // The current pipeline
    let mut pipeline = LitePipeline::new();
+
+    // The current command
    let mut command = LiteCommand::new();

    let mut prev_token: Option<Token> = None;
+
+    // The parsing process repeats:
+    //
+    // - newline (`\n` or `\r`)
+    // - pipes (`|`)
+    // - semicolon
    for token in tokens {
        match &token.contents {
            TokenContents::EOL => {
+                // We encountered a newline character. If the last token on the
+                // current line is a `|`, continue the current group on the next
+                // line. Otherwise, close up the current group by rolling up the
+                // current command into the current pipeline, and then roll up
+                // the current pipeline into the group.
+
+                // If the last token on the current line is a `|`, the group
+                // continues on the next line.
                if let Some(prev) = &prev_token {
                    if let TokenContents::Pipe = prev.contents {
                        continue;
                    }
                }
-                if !command.is_empty() {
+
+                // If we have an open command, push it into the current
+                // pipeline.
+                if command.has_content() {
                    pipeline.push(command);
                    command = LiteCommand::new();
                }
-                if !pipeline.is_empty() {
+
+                // If we have an open pipeline, push it into the current group.
+                if pipeline.has_content() {
                    group.push(pipeline);
                    pipeline = LitePipeline::new();
                }
-                if !group.is_empty() {
+
+                // If we have an open group, accumulate it into `groups`.
+                if group.has_content() {
                    groups.push(group);
                    group = LiteGroup::new();
                }
            }
            TokenContents::Pipe => {
-                if !command.is_empty() {
+                // We encountered a pipe (`|`) character, which terminates a
+                // command.
+
+                // If the current command has content, accumulate it into
+                // the current pipeline and start a new command.
+                if command.has_content() {
                    pipeline.push(command);
                    command = LiteCommand::new();
                } else {
+                    // If the current command doesn't have content, return an
+                    // error that indicates that the `|` was unexpected.
                    return (
                        LiteBlock::new(groups),
                        Some(ParseError::extra_tokens(
@ -353,31 +451,49 @@ pub fn group(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
                }
            }
            TokenContents::Semicolon => {
-                if !command.is_empty() {
+                // We encountered a semicolon (`;`) character, which terminates
+                // a pipeline.
+
+                // If the current command has content, accumulate it into the
+                // current pipeline and start a new command.
+                if command.has_content() {
                    pipeline.push(command);
                    command = LiteCommand::new();
                }
-                if !pipeline.is_empty() {
+
+                // If the current pipeline has content, accumulate it into the
+                // current group and start a new pipeline.
+                if pipeline.has_content() {
                    group.push(pipeline);
                    pipeline = LitePipeline::new();
                }
            }
-            TokenContents::Bare(bare) => {
+            TokenContents::Baseline(bare) => {
+                // We encountered an unclassified character. Accumulate it into
+                // the current command as a string.
+
                command.push(bare.to_string().spanned(token.span));
            }
        }
        prev_token = Some(token);
    }
-    if !command.is_empty() {
+
+    // If the current command has content, accumulate it into the current pipeline.
+    if command.has_content() {
        pipeline.push(command);
    }
-    if !pipeline.is_empty() {
+
+    // If the current pipeline has content, accumulate it into the current group.
+    if pipeline.has_content() {
        group.push(pipeline);
    }
-    if !group.is_empty() {
+
+    // If the current group has content, accumulate it into the list of groups.
+    if group.has_content() {
        groups.push(group);
    }

+    // Return a new LiteBlock with the accumulated list of groups.
    (LiteBlock::new(groups), None)
 }

@ -385,35 +501,51 @@ pub fn group(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
 /// semicolons, pipes, etc from external bare values (values that haven't been classified further)
 /// Takes in a string and and offset, which is used to offset the spans created (for when this function is used to parse inner strings)
 pub fn lex(input: &str, span_offset: usize) -> (Vec<Token>, Option<ParseError>) {
+    // Break the input slice into an iterator of Unicode characters.
    let mut char_indices = input.char_indices().peekable();
    let mut error = None;

    let mut output = vec![];
    let mut is_complete = true;

+    // The lexing process repeats. One character of lookahead is sufficient to decide what to do next.
+    //
+    // - `|`: the token is either `|` token or a `||` token
+    // - `;`: the token is a semicolon
+    // - `\n` or `\r`: the token is an EOL (end of line) token
+    // - other whitespace: ignored
+    // - `#` the token starts a line comment, which contains all of the subsequent characters until the next EOL
+    // -
    while let Some((idx, c)) = char_indices.peek() {
        if *c == '|' {
+            // If the next character is `|`, it's either `|` or `||`.
+
            let idx = *idx;
            let prev_idx = idx;
            let _ = char_indices.next();
+
+            // If the next character is `|`, we're looking at a `||`.
            if let Some((idx, c)) = char_indices.peek() {
                if *c == '|' {
-                    // we have '||' instead of '|'
                    let idx = *idx;
                    let _ = char_indices.next();
                    output.push(Token::new(
-                        TokenContents::Bare("||".into()),
+                        TokenContents::Baseline("||".into()),
                        Span::new(span_offset + prev_idx, span_offset + idx + 1),
                    ));
                    continue;
                }
            }
+
+            // Otherwise, it's just a regular `|` token.
            output.push(Token::new(
                TokenContents::Pipe,
                Span::new(span_offset + idx, span_offset + idx + 1),
            ));
            is_complete = false;
        } else if *c == ';' {
+            // If the next character is a `;`, we're looking at a semicolon token.
+
            if !is_complete && error.is_none() {
                error = Some(ParseError::extra_tokens(
                    ";".to_string().spanned(Span::new(*idx, idx + 1)),
@ -426,6 +558,8 @@ pub fn lex(input: &str, span_offset: usize) -> (Vec<Token>, Option<ParseError>)
                Span::new(span_offset + idx, span_offset + idx + 1),
            ));
        } else if *c == '\n' || *c == '\r' {
+            // If the next character is a newline, we're looking at an EOL (end of line) token.
+
            let idx = *idx;
            let _ = char_indices.next();
            output.push(Token::new(
@ -433,17 +567,24 @@ pub fn lex(input: &str, span_offset: usize) -> (Vec<Token>, Option<ParseError>)
                Span::new(span_offset + idx, span_offset + idx + 1),
            ));
        } else if *c == '#' {
+            // If the next character is `#`, we're at the beginning of a line
+            // comment. The comment continues until the next newline.
+
            skip_comment(&mut char_indices);
        } else if c.is_whitespace() {
+            // If the next character is non-newline whitespace, skip it.
+
            let _ = char_indices.next();
        } else {
-            let (result, err) = bare(&mut char_indices, span_offset);
+            // Otherwise, try to consume an unclassified token.
+
+            let (result, err) = baseline(&mut char_indices, span_offset);
            if error.is_none() {
                error = err;
            }
            is_complete = true;
            let Spanned { item, span } = result;
-            output.push(Token::new(TokenContents::Bare(item), span));
+            output.push(Token::new(TokenContents::Baseline(item), span));
        }
    }

@ -605,7 +746,7 @@ mod tests {
        fn pipeline() {
            let (result, err) = lex("cmd1 | cmd2 ; deploy", 0);
            assert!(err.is_none());
-            let (result, err) = group(result);
+            let (result, err) = block(result);
            assert!(err.is_none());
            assert_eq!(result.span(), span(0, 20));
            assert_eq!(result.block[0].pipelines[0].span(), span(0, 11));
@ -616,7 +757,7 @@ mod tests {
        fn simple_1() {
            let (result, err) = lex("foo", 0);
            assert!(err.is_none());
-            let (result, err) = group(result);
+            let (result, err) = block(result);
            assert!(err.is_none());
            assert_eq!(result.block.len(), 1);
            assert_eq!(result.block[0].pipelines.len(), 1);
@ -632,7 +773,7 @@ mod tests {
        fn simple_offset() {
            let (result, err) = lex("foo", 10);
            assert!(err.is_none());
-            let (result, err) = group(result);
+            let (result, err) = block(result);
            assert!(err.is_none());
            assert_eq!(result.block[0].pipelines.len(), 1);
            assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
@ -647,7 +788,7 @@ mod tests {
        fn incomplete_result() {
            let (result, err) = lex("my_command \"foo' --test", 10);
            assert!(matches!(err.unwrap().reason(), nu_errors::ParseErrorReason::Eof { .. }));
-            let (result, _) = group(result);
+            let (result, _) = block(result);

            assert_eq!(result.block.len(), 1);
            assert_eq!(result.block[0].pipelines.len(), 1);
--- a/crates/nu-parser/src/lib.rs
+++ b/crates/nu-parser/src/lib.rs
@ -6,7 +6,7 @@ mod scope;
 mod shapes;
 mod signature;

-pub use lex::{group, lex, LiteBlock, LiteCommand, LiteGroup, LitePipeline};
+pub use lex::{block, lex, LiteBlock, LiteCommand, LiteGroup, LitePipeline};
 pub use parse::{classify_block, garbage, parse, parse_full_column_path, parse_math_expression};
 pub use path::expand_ndots;
 pub use scope::ParserScope;
--- a/crates/nu-parser/src/parse.rs
+++ b/crates/nu-parser/src/parse.rs
@ -13,7 +13,7 @@ use nu_source::{Span, Spanned, SpannedItem};
 use num_bigint::BigInt;

 //use crate::errors::{ParseError, ParseResult};
-use crate::lex::{group, lex, LiteBlock, LiteCommand, LitePipeline};
+use crate::lex::{block, lex, LiteBlock, LiteCommand, LitePipeline};
 use crate::path::expand_path;
 use crate::scope::ParserScope;
 use bigdecimal::BigDecimal;
@ -393,7 +393,7 @@ fn parse_invocation(
    if err.is_some() {
        return (garbage(lite_arg.span), err);
    };
-    let (lite_block, err) = group(tokens);
+    let (lite_block, err) = block(tokens);
    if err.is_some() {
        return (garbage(lite_arg.span), err);
    };
@ -719,7 +719,7 @@ fn parse_table(
        return (garbage(lite_inner.span()), err);
    }

-    let (lite_header, err) = group(tokens);
+    let (lite_header, err) = block(tokens);
    if err.is_some() {
        return (garbage(lite_inner.span()), err);
    }
@ -742,7 +742,7 @@ fn parse_table(
        if err.is_some() {
            return (garbage(arg.span), err);
        }
-        let (lite_cell, err) = group(tokens);
+        let (lite_cell, err) = block(tokens);
        if err.is_some() {
            return (garbage(arg.span), err);
        }
@ -873,7 +873,7 @@ fn parse_arg(
                        return (garbage(lite_arg.span), err);
                    }

-                    let (lite_block, err) = group(tokens);
+                    let (lite_block, err) = block(tokens);
                    if err.is_some() {
                        return (garbage(lite_arg.span), err);
                    }
@ -928,7 +928,7 @@ fn parse_arg(
                        return (garbage(lite_arg.span), err);
                    }

-                    let (lite_block, err) = group(tokens);
+                    let (lite_block, err) = block(tokens);
                    if err.is_some() {
                        return (garbage(lite_arg.span), err);
                    }
@ -1156,7 +1156,7 @@ fn parse_parenthesized_expression(
                return (garbage(lite_arg.span), err);
            }

-            let (lite_block, err) = group(tokens);
+            let (lite_block, err) = block(tokens);
            if err.is_some() {
                return (garbage(lite_arg.span), err);
            }
@ -2178,7 +2178,7 @@ fn parse_definition(call: &LiteCommand, scope: &dyn ParserScope) -> Option<Parse
                if err.is_some() {
                    return err;
                };
-                let (lite_block, err) = group(tokens);
+                let (lite_block, err) = block(tokens);
                if err.is_some() {
                    return err;
                };
@ -2338,7 +2338,7 @@ pub fn parse(
    if error.is_some() {
        return (Block::basic(), error);
    }
-    let (lite_block, error) = group(output);
+    let (lite_block, error) = block(output);
    if error.is_some() {
        return (Block::basic(), error);
    }
--- a/crates/nu-protocol/Cargo.toml
+++ b/crates/nu-protocol/Cargo.toml
@ -15,7 +15,7 @@ byte-unit = "4.0.9"
 chrono = {version = "0.4.15", features = ["serde"]}
 derive-new = "0.5.8"
 getset = "0.1.1"
-indexmap = {version = "1.6.0", features = ["serde-1"]}
+indexmap = {version = "1.6.1", features = ["serde-1"]}
 log = "0.4.11"
 nu-errors = {path = "../nu-errors", version = "0.25.1"}
 nu-source = {path = "../nu-source", version = "0.25.1"}
--- a/crates/nu-source/Cargo.toml
+++ b/crates/nu-source/Cargo.toml
@ -13,7 +13,7 @@ doctest = false
 derive-new = "0.5.8"
 getset = "0.1.1"
 pretty = "0.5.2"
-serde = {version = "1.0.115", features = ["derive"]}
-termcolor = "1.1.0"
+serde = {version = "1.0.118", features = ["derive"]}
+termcolor = "1.1.2"

 [build-dependencies]