Add && and || support to tokenizer

2025-01-13 05:28:49 +00:00 · 2018-03-01 12:56:15 -08:00 · 2018-03-01 12:56:15 -08:00 · 8ded041352
commit 8ded041352
parent a5dd96558f
5 changed files with 56 additions and 45 deletions
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@ -535,10 +535,14 @@ static void test_tokenizer() {
    const wchar_t *str =
        L"string <redirection  2>&1 'nested \"quoted\" '(string containing subshells "
        L"){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect "
+        L"&&& ||| "
+        L"&& || & |"
        L"Compress_Newlines\n  \n\t\n   \nInto_Just_One";
-    const int types[] = {TOK_STRING, TOK_REDIRECT, TOK_STRING,   TOK_REDIRECT, TOK_STRING,
-                         TOK_STRING, TOK_STRING,   TOK_REDIRECT, TOK_REDIRECT, TOK_STRING,
-                         TOK_STRING, TOK_END,      TOK_STRING};
+    const int types[] = {TOK_STRING, TOK_REDIRECT,   TOK_STRING,   TOK_REDIRECT, TOK_STRING,
+                         TOK_STRING, TOK_STRING,     TOK_REDIRECT, TOK_REDIRECT, TOK_STRING,
+                         TOK_ANDAND, TOK_BACKGROUND, TOK_OROR,     TOK_PIPE,     TOK_ANDAND,
+                         TOK_OROR,   TOK_BACKGROUND, TOK_PIPE,     TOK_STRING,   TOK_END,
+                         TOK_STRING};

    say(L"Test correct tokenization");

--- a/src/history.cpp
+++ b/src/history.cpp
@ -1773,6 +1773,10 @@ static bool should_import_bash_history_line(const std::string &line) {
    if (line.find("((") != std::string::npos) return false;
    if (line.find("))") != std::string::npos) return false;

+    // Temporarily skip lines with && and ||
+    if (line.find("&&") != std::string::npos) return false;
+    if (line.find("||") != std::string::npos) return false;
+
    // Skip lines that end with a backslash. We do not handle multiline commands from bash history.
    if (line.back() == '\\') return false;

--- a/src/parse_tree.cpp
+++ b/src/parse_tree.cpp
@ -213,43 +213,32 @@ wcstring parse_token_t::user_presentable_description() const {
 /// Convert from tokenizer_t's token type to a parse_token_t type.
 static inline parse_token_type_t parse_token_type_from_tokenizer_token(
    enum token_type tokenizer_token_type) {
-    parse_token_type_t result = token_type_invalid;
    switch (tokenizer_token_type) {
-        case TOK_STRING: {
-            result = parse_token_type_string;
-            break;
-        }
-        case TOK_PIPE: {
-            result = parse_token_type_pipe;
-            break;
-        }
-        case TOK_END: {
-            result = parse_token_type_end;
-            break;
-        }
-        case TOK_BACKGROUND: {
-            result = parse_token_type_background;
-            break;
-        }
-        case TOK_REDIRECT: {
-            result = parse_token_type_redirection;
-            break;
-        }
-        case TOK_ERROR: {
-            result = parse_special_type_tokenizer_error;
-            break;
-        }
-        case TOK_COMMENT: {
-            result = parse_special_type_comment;
-            break;
-        }
-        default: {
-            debug(0, "Bad token type %d passed to %s", (int)tokenizer_token_type, __FUNCTION__);
-            DIE("bad token type");
-            break;
-        }
+        case TOK_NONE:
+            DIE("TOK_NONE passed to parse_token_type_from_tokenizer_token");
+            return token_type_invalid;
+        case TOK_STRING:
+            return parse_token_type_string;
+        case TOK_PIPE:
+            return parse_token_type_pipe;
+        case TOK_ANDAND:
+        case TOK_OROR:
+            // Temporary while && and || support is brought up.
+            return parse_special_type_comment;
+        case TOK_END:
+            return parse_token_type_end;
+        case TOK_BACKGROUND:
+            return parse_token_type_background;
+        case TOK_REDIRECT:
+            return parse_token_type_redirection;
+        case TOK_ERROR:
+            return parse_special_type_tokenizer_error;
+        case TOK_COMMENT:
+            return parse_special_type_comment;
    }
-    return result;
+    debug(0, "Bad token type %d passed to %s", (int)tokenizer_token_type, __FUNCTION__);
+    DIE("bad token type");
+    return token_type_invalid;
 }

 /// Helper function for parse_dump_tree().
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@ -528,16 +528,28 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
            break;
        }
        case L'&': {
-            result.type = TOK_BACKGROUND;
-            result.length = 1;
-            this->buff++;
+            if (this->buff[1] == L'&') {
+                result.type = TOK_ANDAND;
+                result.length = 2;
+                this->buff += 2;
+            } else {
+                result.type = TOK_BACKGROUND;
+                result.length = 1;
+                this->buff++;
+            }
            break;
        }
        case L'|': {
-            result.type = TOK_PIPE;
-            result.redirected_fd = 1;
-            result.length = 1;
-            this->buff++;
+            if (this->buff[1] == L'|') {
+                result.type = TOK_OROR;
+                result.length = 2;
+                this->buff += 2;
+            } else {
+                result.type = TOK_PIPE;
+                result.redirected_fd = 1;
+                result.length = 1;
+                this->buff++;
+            }
            break;
        }
        case L'>':
--- a/src/tokenizer.h
+++ b/src/tokenizer.h
@ -14,6 +14,8 @@ enum token_type {
    TOK_ERROR,       /// Error reading token
    TOK_STRING,      /// String token
    TOK_PIPE,        /// Pipe token
+    TOK_ANDAND,      /// && token
+    TOK_OROR,        /// || token
    TOK_END,         /// End token (semicolon or newline, not literal end)
    TOK_REDIRECT,    /// redirection token
    TOK_BACKGROUND,  /// send job to bg token