diff --git a/test/to-parse.zsh b/test/to-parse.zsh new file mode 100644 index 0000000..5381a94 --- /dev/null +++ b/test/to-parse.zsh @@ -0,0 +1,823 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2010-2016 zsh-syntax-highlighting contributors +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted +# provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this list of conditions +# and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, this list of +# conditions and the following disclaimer in the documentation and/or other materials provided +# with the distribution. +# * Neither the name of the zsh-syntax-highlighting contributors nor the names of its contributors +# may be used to endorse or promote products derived from this software without specific prior +# written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ------------------------------------------------------------------------------------------------- +# -*- mode: zsh; sh-indentation: 2; indent-tabs-mode: nil; sh-basic-offset: 2; -*- +# vim: ft=zsh sw=2 ts=2 et +# ------------------------------------------------------------------------------------------------- + +typeset -gA __hsmw_highlight_main__command_type_cache + +# Define default styles. +typeset -gA HSMW_HIGHLIGHT_STYLES +: ${HSMW_HIGHLIGHT_STYLES[default]:=none} +: ${HSMW_HIGHLIGHT_STYLES[unknown-token]:=fg=red,bold} +: ${HSMW_HIGHLIGHT_STYLES[reserved-word]:=fg=yellow} +: ${HSMW_HIGHLIGHT_STYLES[alias]:=fg=green} +: ${HSMW_HIGHLIGHT_STYLES[suffix-alias]:=fg=green} +: ${HSMW_HIGHLIGHT_STYLES[builtin]:=fg=green} +: ${HSMW_HIGHLIGHT_STYLES[function]:=fg=green} +: ${HSMW_HIGHLIGHT_STYLES[command]:=fg=green} +: ${HSMW_HIGHLIGHT_STYLES[precommand]:=fg=green} +: ${HSMW_HIGHLIGHT_STYLES[commandseparator]:=none} +: ${HSMW_HIGHLIGHT_STYLES[hashed-command]:=fg=green} +: ${HSMW_HIGHLIGHT_STYLES[path]:=fg=magenta} +: ${HSMW_HIGHLIGHT_STYLES[path_pathseparator]:=} +: ${HSMW_HIGHLIGHT_STYLES[path_prefix]:=fg=magenta} +: ${HSMW_HIGHLIGHT_STYLES[path_prefix_pathseparator]:=} +: ${HSMW_HIGHLIGHT_STYLES[globbing]:=fg=blue,bold} +: ${HSMW_HIGHLIGHT_STYLES[history-expansion]:=fg=blue,bold} +: ${HSMW_HIGHLIGHT_STYLES[single-hyphen-option]:=none} +: ${HSMW_HIGHLIGHT_STYLES[double-hyphen-option]:=none} +: ${HSMW_HIGHLIGHT_STYLES[back-quoted-argument]:=none} +: ${HSMW_HIGHLIGHT_STYLES[single-quoted-argument]:=fg=yellow} +: ${HSMW_HIGHLIGHT_STYLES[double-quoted-argument]:=fg=yellow} +: ${HSMW_HIGHLIGHT_STYLES[dollar-quoted-argument]:=fg=yellow} +: ${HSMW_HIGHLIGHT_STYLES[dollar-double-quoted-argument]:=fg=cyan} +: ${HSMW_HIGHLIGHT_STYLES[back-double-quoted-argument]:=fg=cyan} +: ${HSMW_HIGHLIGHT_STYLES[back-dollar-quoted-argument]:=fg=cyan} +: ${HSMW_HIGHLIGHT_STYLES[assign]:=none} +: ${HSMW_HIGHLIGHT_STYLES[redirection]:=none} +: ${HSMW_HIGHLIGHT_STYLES[comment]:=fg=black,bold} + +# Get the type of a command. +# +# Uses the zsh/parameter module if available to avoid forks, and a +# wrapper around 'type -w' as fallback. +# +# Takes a single argument. +# +# The result will be stored in REPLY. +-hsmw-highlight-main-type() { + if (( $+__hsmw_highlight_main__command_type_cache )); then + REPLY=$__hsmw_highlight_main__command_type_cache[(e)$1] + if [[ -n "$REPLY" ]]; then + return + fi + fi + if (( $#options_to_set )); then + setopt localoptions $options_to_set; + fi + unset REPLY + if zmodload -e zsh/parameter; then + if (( $+aliases[(e)$1] )); then + REPLY=alias + elif (( $+saliases[(e)${1##*.}] )); then + REPLY='suffix alias' + elif (( $reswords[(Ie)$1] )); then + REPLY=reserved + elif (( $+functions[(e)$1] )); then + REPLY=function + elif (( $+builtins[(e)$1] )); then + REPLY=builtin + elif (( $+commands[(e)$1] )); then + REPLY=command + # zsh 5.2 and older have a bug whereby running 'type -w ./sudo' implicitly + # runs 'hash ./sudo=/usr/local/bin/./sudo' (assuming /usr/local/bin/sudo + # exists and is in $PATH). Avoid triggering the bug, at the expense of + # falling through to the $() below, incurring a fork. (Issue #354.) + # + # The second disjunct mimics the isrelative() C call from the zsh bug. + elif { [[ $1 != */* ]] || is-at-least 5.3 } && + ! builtin type -w -- $1 >/dev/null 2>&1; then + REPLY=none + fi + fi + if ! (( $+REPLY )); then + REPLY="${$(LC_ALL=C builtin type -w -- $1 2>/dev/null)#*: }" + fi + if (( $+__hsmw_highlight_main__command_type_cache )); then + __hsmw_highlight_main__command_type_cache[(e)$1]=$REPLY + fi +} + +# Check whether the first argument is a redirection operator token. +# Report result via the exit code. +-hsmw-highlight-is-redirection() { + # A redirection operator token: + # - starts with an optional single-digit number; + # - then, has a '<' or '>' character; + # - is not a process substitution [<(...) or >(...)]. + [[ $1 == (<0-9>|)(\<|\>)* ]] && [[ $1 != (\<|\>)$'\x28'* ]] +} + +# Resolve alias. +# +# Takes a single argument. +# +# The result will be stored in REPLY. +-hsmw-highlight-resolve-alias() { + if zmodload -e zsh/parameter; then + REPLY=${aliases[$arg]} + else + REPLY="${"$(alias -- $arg)"#*=}" + fi +} + +# Check that the top of $braces_stack has the expected value. If it does, set +# the style according to $2; otherwise, set style=unknown-token. +# +# $1: character expected to be at the top of $braces_stack +# $2: assignment to execute it if matches +-hsmw-highlight-stack-pop() { + if [[ $braces_stack[1] == $1 ]]; then + braces_stack=${braces_stack:1} + eval "$2" + else + style=unknown-token + fi +} + +# Main syntax highlighting function. +-hsmw-highlight-process() +{ + ## Before we even 'emulate -L', we must test a few options that would reset. + if [[ -o interactive_comments ]]; then + local interactive_comments= # set to empty + fi + if [[ -o ignore_braces ]] || eval '[[ -o ignore_close_braces ]] 2>/dev/null'; then + local right_brace_is_recognised_everywhere=false + else + local right_brace_is_recognised_everywhere=true + fi + if [[ -o path_dirs ]]; then + integer path_dirs_was_set=1 + else + integer path_dirs_was_set=0 + fi + if [[ -o multi_func_def ]]; then + integer multi_func_def=1 + else + integer multi_func_def=0 + fi + emulate -L zsh + setopt localoptions extendedglob bareglobqual + + ## Variable declarations and initializations + local start_pos=0 end_pos highlight_glob=true arg style + local in_array_assignment=false # true between 'a=(' and the matching ')' + typeset -a __HSMW_HIGHLIGHT_TOKENS_COMMANDSEPARATOR + typeset -a __HSMW_HIGHLIGHT_TOKENS_PRECOMMANDS + typeset -a __HSMW_HIGHLIGHT_TOKENS_CONTROL_FLOW + local -a options_to_set # used in callees + local buf="$1" + integer len="${#buf}" + integer pure_buf_len=len # historical, was $#BUFFER, i.e. len without $PREBUFFER; used e.g. in *_check_path + + local braces_stack # "R" for round, "Q" for square, "Y" for curly + + if (( path_dirs_was_set )); then + options_to_set+=( PATH_DIRS ) + fi + unset path_dirs_was_set + + __HSMW_HIGHLIGHT_TOKENS_COMMANDSEPARATOR=( + '|' '||' ';' '&' '&&' + '|&' + '&!' '&|' + # ### 'case' syntax, but followed by a pattern, not by a command + # ';;' ';&' ';|' + ) + __HSMW_HIGHLIGHT_TOKENS_PRECOMMANDS=( + 'builtin' 'command' 'exec' 'nocorrect' 'noglob' + 'pkexec' # immune to #121 because it's usually not passed --option flags + ) + + # Tokens that, at (naively-determined) "command position", are followed by + # a de jure command position. All of these are reserved words. + __HSMW_HIGHLIGHT_TOKENS_CONTROL_FLOW=( + $'\x7b' # block + $'\x28' # subshell + '()' # anonymous function + 'while' + 'until' + 'if' + 'then' + 'elif' + 'else' + 'do' + 'time' + 'coproc' + '!' # reserved word; unrelated to $histchars[1] + ) + + local -a match mbegin mend + + # State machine + # + # The states are: + # - :start: Command word + # - :sudo_opt: A leading-dash option to sudo (such as "-u" or "-i") + # - :sudo_arg: The argument to a sudo leading-dash option that takes one, + # when given as a separate word; i.e., "foo" in "-u foo" (two + # words) but not in "-ufoo" (one word). + # - :regular: "Not a command word", and command delimiters are permitted. + # Mainly used to detect premature termination of commands. + # - :always: The word 'always' in the «{ foo } always { bar }» syntax. + # + # When the kind of a word is not yet known, $this_word / $next_word may contain + # multiple states. For example, after "sudo -i", the next word may be either + # another --flag or a command name, hence the state would include both :start: + # and :sudo_opt:. + # + # The tokens are always added with both leading and trailing colons to serve as + # word delimiters (an improvised array); [[ $x == *:foo:* ]] and x=${x//:foo:/} + # will DTRT regardless of how many elements or repetitions $x has.. + # + # Handling of redirections: upon seeing a redirection token, we must stall + # the current state --- that is, the value of $this_word --- for two iterations + # (one for the redirection operator, one for the word following it representing + # the redirection target). Therefore, we set $in_redirection to 2 upon seeing a + # redirection operator, decrement it each iteration, and stall the current state + # when it is non-zero. Thus, upon reaching the next word (the one that follows + # the redirection operator and target), $this_word will still contain values + # appropriate for the word immediately following the word that preceded the + # redirection operator. + # + # The "the previous word was a redirection operator" state is not communicated + # to the next iteration via $next_word/$this_word as usual, but via + # $in_redirection. The value of $next_word from the iteration that processed + # the operator is discarded. + # + local this_word=':start:' next_word + integer in_redirection + # Processing buffer + local proc_buf="$buf" + for arg in ${interactive_comments-${(z)buf}} \ + ${interactive_comments+${(zZ+c+)buf}}; do + # Initialize $next_word. + if (( in_redirection )); then + (( --in_redirection )) + fi + if (( in_redirection == 0 )); then + # Initialize $next_word to its default value. + next_word=':regular:' + else + # Stall $next_word. + fi + + # Initialize per-"simple command" [zshmisc(1)] variables: + # + # $already_added (see next paragraph) + # $style how to highlight $arg + # $in_array_assignment boolean flag for "between '(' and ')' of array assignment" + # $highlight_glob boolean flag for "'noglob' is in effect" + # + # $already_added is set to 1 to disable adding an entry to region_highlight + # for this iteration. Currently, that is done for "" and $'' strings, + # which add the entry early so escape sequences within the string override + # the string's color. + integer already_added=0 + style=unknown-token + if [[ $this_word == *':start:'* ]]; then + in_array_assignment=false + if [[ $arg == 'noglob' ]]; then + highlight_glob=false + fi + fi + + # Compute the new $start_pos and $end_pos, skipping over whitespace in $buf. + if [[ $arg == ';' ]] ; then + # We're looking for either a semicolon or a newline, whichever comes + # first. Both of these are rendered as a ";" (SEPER) by the ${(z)..} + # flag. + # + # We can't use the (Z+n+) flag because that elides the end-of-command + # token altogether, so 'echo foo\necho bar' (two commands) becomes + # indistinguishable from 'echo foo echo bar' (one command with three + # words for arguments). + local needle=$'[;\n]' + integer offset=$(( ${proc_buf[(i)$needle]} - 1 )) + (( start_pos += offset )) + (( end_pos = start_pos + $#arg )) + else + # The line was: + # + # integer offset=$(((len-start_pos)-${#${proc_buf##([[:space:]]|\\[[:space:]])#}})) + # + # - len-start_pos is length of current proc_buf; basically: initial length minus where + # we are, and proc_buf is chopped to the "where we are" (compare the "previous value + # of start_pos" below, and the len-(start_pos-offset) = len-start_pos+offset) + # - what's after main minus sign is: length of proc_buf without spaces at the beginning + # - so what the line actually did, was computing length of the spaces! + # - this can be done via (#b) flag, like below + if [[ "$proc_buf" = (#b)(#s)(([[:space:]]|\\[[:space:]])##)* ]]; then + # The first, outer parenthesis + integer offset="${#match[1]}" + else + integer offset=0 + fi + ((start_pos+=offset)) + ((end_pos=$start_pos+${#arg})) + fi + + # Compute the new $proc_buf. We advance it + # (chop off characters from the beginning) + # beyond what end_pos points to, by skipping + # as many characters as end_pos was advanced. + # + # end_pos was advanced by $offset (via start_pos) + # and by $#arg. Note the `start_pos=$end_pos` + # below. + # + # As for the [,len]. We could use [,len-start_pos+offset] + # here, but to make it easier on eyes, we use len and + # rely on the fact that Zsh simply handles that. The + # length of proc_buf is len-start_pos+offset because + # we're chopping it to match current start_pos, so its + # length matches the previous value of start_pos. + # + # Why [,-1] is slower than [,length] isn't clear. + proc_buf="${proc_buf[offset + $#arg + 1,len]}" + + # Handle the INTERACTIVE_COMMENTS option. + # + # We use the (Z+c+) flag so the entire comment is presented as one token in $arg. + if [[ -n ${interactive_comments+'set'} && $arg[1] == $histchars[3] ]]; then + if [[ $this_word == *(':regular:'|':start:')* ]]; then + style=comment + else + style=unknown-token # prematurely terminated + fi + -hsmw-add-highlight $start_pos $end_pos $style + already_added=1 + continue + fi + + # Analyse the current word. + if -hsmw-highlight-is-redirection $arg ; then + # A '<' or '>', possibly followed by a digit + in_redirection=2 + fi + + # Special-case the first word after 'sudo'. + if (( ! in_redirection )); then + if [[ $this_word == *':sudo_opt:'* ]] && [[ $arg != -* ]]; then + this_word=${this_word//:sudo_opt:/} + fi + fi + + # Parse the sudo command line + if (( ! in_redirection )); then + if [[ $this_word == *':sudo_opt:'* ]]; then + case "$arg" in + # Flag that requires an argument + '-'[Cgprtu]) this_word=${this_word//:start:/}; + next_word=':sudo_arg:';; + # This prevents misbehavior with sudo -u -otherargument + '-'*) this_word=${this_word//:start:/}; + next_word+=':start:'; + next_word+=':sudo_opt:';; + *) ;; + esac + elif [[ $this_word == *':sudo_arg:'* ]]; then + next_word+=':sudo_opt:' + next_word+=':start:' + fi + fi + + # The Great Fork: is this a command word? Is this a non-command word? + if [[ $this_word == *':always:'* && $arg == 'always' ]]; then + # try-always construct + style=reserved-word # de facto a reserved word, although not de jure + next_word=':start:' + elif [[ $this_word == *':start:'* ]] && (( in_redirection == 0 )); then # $arg is the command word + if [[ -n ${(M)__HSMW_HIGHLIGHT_TOKENS_PRECOMMANDS:#"$arg"} ]]; then + style=precommand + elif [[ "$arg" = "sudo" ]]; then + style=precommand + next_word=${next_word//:regular:/} + next_word+=':sudo_opt:' + next_word+=':start:' + else + -hsmw-highlight-expand-path $arg + local expanded_arg="$REPLY" + -hsmw-highlight-main-type ${expanded_arg} + local res="$REPLY" + () { + # Special-case: command word is '$foo', like that, without braces or anything. + # + # That's not entirely correct --- if the parameter's value happens to be a reserved + # word, the parameter expansion will be highlighted as a reserved word --- but that + # incorrectness is outweighed by the usability improvement of permitting the use of + # parameters that refer to commands, functions, and builtins. + local -a match mbegin mend + local MATCH; integer MBEGIN MEND + if [[ $res == none ]] && (( ${+parameters} )) && + [[ ${arg[1]} == \$ ]] && [[ ${arg:1} = ([[:alpha:]_][[:alnum:]_]#|[[:digit:]]##) ]] && + (( ${+parameters[${MATCH}]} )) + then + -hsmw-highlight-main-type ${(P)MATCH} + res=$REPLY + fi + } + case $res in + reserved) # reserved word + style=reserved-word + if [[ $arg == $'\x7b' ]]; then + braces_stack='Y'"$braces_stack" + elif [[ $arg == $'\x7d' ]]; then + # We're at command word, so no need to check $right_brace_is_recognised_everywhere + -hsmw-highlight-stack-pop 'Y' style=reserved-word + if [[ $style == reserved-word ]]; then + next_word+=':always:' + fi + fi + ;; + 'suffix alias') style=suffix-alias;; + alias) () { + integer insane_alias + case $arg in + # Issue #263: aliases with '=' on their LHS. + # + # There are three cases: + # + # - Unsupported, breaks 'alias -L' output, but invokable: + ('='*) :;; + # - Unsupported, not invokable: + (*'='*) insane_alias=1;; + # - The common case: + (*) :;; + esac + if (( insane_alias )); then + style=unknown-token + else + style=alias + -hsmw-highlight-resolve-alias $arg + local alias_target="$REPLY" + [[ -n ${(M)__HSMW_HIGHLIGHT_TOKENS_PRECOMMANDS:#"$alias_target"} && -z ${(M)__HSMW_HIGHLIGHT_TOKENS_PRECOMMANDS:#"$arg"} ]] && __HSMW_HIGHLIGHT_TOKENS_PRECOMMANDS+=($arg) + fi + } + ;; + builtin) style=builtin;; + function) style=function;; + command) style=command;; + hashed) style=hashed-command;; + none) if -hsmw-highlight-check-assign; then + style=assign + if [[ $arg[-1] == '(' ]]; then + in_array_assignment=true + else + # assignment to a scalar parameter. + # (For array assignments, the command doesn't start until the ")" token.) + next_word+=':start:' + fi + elif [[ $arg[0,1] = $histchars[0,1] ]] && (( $#arg[0,2] == 2 )); then + style=history-expansion + elif [[ $arg[0,1] == $histchars[2,2] ]]; then + style=history-expansion + elif [[ -n ${(M)__HSMW_HIGHLIGHT_TOKENS_COMMANDSEPARATOR:#"$arg"} ]]; then + if [[ $this_word == *':regular:'* ]]; then + # This highlights empty commands (semicolon follows nothing) as an error. + # Zsh accepts them, though. + style=commandseparator + else + style=unknown-token + fi + elif (( in_redirection == 2 )); then + style=redirection + elif [[ $arg[1,2] == '((' ]]; then + # Arithmetic evaluation. + # + # Note: prior to zsh-5.1.1-52-g4bed2cf (workers/36669), the ${(z)...} + # splitter would only output the '((' token if the matching '))' had + # been typed. Therefore, under those versions of zsh, BUFFER="(( 42" + # would be highlighted as an error until the matching "))" are typed. + # + # We highlight just the opening parentheses, as a reserved word; this + # is how [[ ... ]] is highlighted, too. + style=reserved-word + -hsmw-add-highlight $start_pos $((start_pos + 2)) $style + already_added=1 + if [[ $arg[-2,-1] == '))' ]]; then + -hsmw-add-highlight $((end_pos - 2)) $end_pos $style + already_added=1 + fi + elif [[ $arg == '()' ]]; then + # anonymous function + style=reserved-word + elif [[ $arg == $'\x28' ]]; then + # subshell + style=reserved-word + braces_stack='R'"$braces_stack" + else + if -hsmw-highlight-check-path; then + style=$REPLY + else + style=unknown-token + fi + fi + ;; + *) -hsmw-add-highlight $start_pos $end_pos commandtypefromthefuture-$res + already_added=1 + ;; + esac + fi + fi + if (( ! already_added )) && [[ $style == unknown-token ]] && # not handled by the 'command word' codepath + { (( in_redirection )) || [[ $this_word == *':regular:'* ]] || [[ $this_word == *':sudo_opt:'* ]] || [[ $this_word == *':sudo_arg:'* ]] } + then # $arg is a non-command word + case $arg in + $'\x29') # subshell or end of array assignment + if $in_array_assignment; then + style=assign + in_array_assignment=false + next_word+=':start:' + else + -hsmw-highlight-stack-pop 'R' style=reserved-word + fi;; + $'\x28\x29') # possibly a function definition + if (( multi_func_def )) || false # TODO: or if the previous word was a command word + then + next_word+=':start:' + fi + style=reserved-word + ;; + $'\x7d') # right brace + # + # Parsing rule: # { + # + # Additionally, `tt(})' is recognized in any position if neither the + # tt(IGNORE_BRACES) option nor the tt(IGNORE_CLOSE_BRACES) option is set.""" + if $right_brace_is_recognised_everywhere; then + -hsmw-highlight-stack-pop 'Y' style=reserved-word + if [[ $style == reserved-word ]]; then + next_word+=':always:' + fi + else + # Fall through to the catchall case at the end. + fi + ;| + '--'*) style=double-hyphen-option;; + '-'*) style=single-hyphen-option;; + "'"*) style=single-quoted-argument;; + '"'*) style=double-quoted-argument + -hsmw-add-highlight $start_pos $end_pos $style + -hsmw-highlight-string + already_added=1 + ;; + \$\'*) style=dollar-quoted-argument + -hsmw-add-highlight $start_pos $end_pos $style + -hsmw-highlight-dollar-string + already_added=1 + ;; + '`'*) style=back-quoted-argument;; + [*?]*|*[^\\][*?]*) + $highlight_glob && style=globbing || style=default;; + *) if false; then + elif [[ $arg = $'\x7d' ]] && $right_brace_is_recognised_everywhere; then + # was handled by the $'\x7d' case above + elif [[ $arg[0,1] = $histchars[0,1] ]] && (( $#arg[0,2] == 2 )); then + style=history-expansion + elif [[ -n ${(M)__HSMW_HIGHLIGHT_TOKENS_COMMANDSEPARATOR:#"$arg"} ]]; then + if [[ $this_word == *':regular:'* ]]; then + style=commandseparator + else + style=unknown-token + fi + elif (( in_redirection == 2 )); then + style=redirection + else + if -hsmw-highlight-check-path; then + style=$REPLY + else + style=default + fi + fi + ;; + esac + fi + if ! (( already_added )); then + -hsmw-add-highlight $start_pos $end_pos $style + [[ $style == path || $style == path_prefix ]] && -hsmw-highlight-path-separators + fi + if [[ -n ${(M)__HSMW_HIGHLIGHT_TOKENS_COMMANDSEPARATOR:#"$arg"} ]]; then + if [[ $arg == ';' ]] && $in_array_assignment; then + # literal newline inside an array assignment + next_word=':regular:' + else + next_word=':start:' + highlight_glob=true + fi + elif + [[ -n ${(M)__HSMW_HIGHLIGHT_TOKENS_CONTROL_FLOW:#"$arg"} && $this_word == *':start:'* ]] || + [[ -n ${(M)__HSMW_HIGHLIGHT_TOKENS_PRECOMMANDS:#"$arg"} && $this_word == *':start:'* ]]; then + next_word=':start:' + elif [[ $arg == "repeat" && $this_word == *':start:'* ]]; then + # skip the repeat-count word + in_redirection=2 + # The redirection mechanism assumes $this_word describes the word + # following the redirection. Make it so. + # + # That word can be a command word with shortloops (`repeat 2 ls`) + # or a command separator (`repeat 2; ls` or `repeat 2; do ls; done`). + # + # The repeat-count word will be handled like a redirection target. + this_word=':start::regular:' + fi + start_pos=$end_pos + if (( in_redirection == 0 )); then + # This is the default/common codepath. + this_word=$next_word + else + # Stall $this_word. + fi + done +} + +# Check if $arg is variable assignment +-hsmw-highlight-check-assign() +{ + setopt localoptions extended_glob + [[ $arg == [[:alpha:]_][[:alnum:]_]#(|\[*\])(|[+])=* ]] || + [[ $arg == [0-9]##(|[+])=* ]] +} + +-hsmw-highlight-path-separators() +{ + local pos style_pathsep + style_pathsep=${style}_pathseparator + [[ -z "$HSMW_HIGHLIGHT_STYLES[$style_pathsep]" || "$HSMW_HIGHLIGHT_STYLES[$style]" == "$HSMW_HIGHLIGHT_STYLES[$style_pathsep]" ]] && return 0 + for (( pos = start_pos; $pos <= end_pos; pos++ )) ; do + if [[ ${buf[pos]} == "/" ]]; then + -hsmw-add-highlight $((pos - 1)) $pos $style_pathsep + fi + done +} + +# Check if $arg is a path. +# If yes, return 0 and in $REPLY the style to use. +# Else, return non-zero (and the contents of $REPLY is undefined). +-hsmw-highlight-check-path() +{ + -hsmw-highlight-expand-path $arg; + local expanded_path="$REPLY" + + REPLY=path + + [[ -z $expanded_path ]] && return 1 + [[ -L $expanded_path ]] && return 0 + [[ -e $expanded_path ]] && return 0 + + # Search the path in CDPATH + local cdpath_dir + for cdpath_dir in $cdpath ; do + [[ -e "$cdpath_dir/$expanded_path" ]] && return 0 + done + + # If dirname($arg) doesn't exist, neither does $arg. + [[ ! -d ${expanded_path:h} ]] && return 1 + + # If this word ends the buffer, check if it's the prefix of a valid path. + if [[ ${buf[1]} != "-" && $pure_buf_len == $end_pos ]]; then + local -a tmp + tmp=( ${expanded_path}*(N) ) + (( $#tmp > 0 )) && REPLY=path_prefix && return 0 + fi + + # It's not a path. + return 1 +} + +# Highlight special chars inside double-quoted strings +-hsmw-highlight-string() +{ + setopt localoptions noksharrays + local -a match mbegin mend + local MATCH; integer MBEGIN MEND + local i j k style + # Starting quote is at 1, so start parsing at offset 2 in the string. + for (( i = 2 ; i < end_pos - start_pos ; i += 1 )) ; do + (( j = i + start_pos - 1 )) + (( k = j + 1 )) + case "$arg[$i]" in + '$' ) style=dollar-double-quoted-argument + # Look for an alphanumeric parameter name. + if [[ ${arg:$i} = ([[:alpha:]_][[:alnum:]_]#|[[:digit:]]##) ]] ; then + (( k += $#MATCH )) # highlight the parameter name + (( i += $#MATCH )) # skip past it + elif [[ ${arg:$i} = [{]([[:alpha:]_][[:alnum:]_]#|[[:digit:]]##)[}]* ]] ; then + (( k += $#MATCH )) # highlight the parameter name and braces + (( i += $#MATCH )) # skip past it + else + continue + fi + ;; + "\\") style=back-double-quoted-argument + if [[ \\\`\"\$ == *$arg[$i+1]* ]]; then + (( k += 1 )) # Color following char too. + (( i += 1 )) # Skip parsing the escaped char. + else + continue + fi + ;; + *) continue ;; + + esac + -hsmw-add-highlight $j $k $style + done +} + +# Highlight special chars inside dollar-quoted strings +-hsmw-highlight-dollar-string() +{ + setopt localoptions noksharrays + local -a match mbegin mend + local MATCH; integer MBEGIN MEND + local i j k style + local AA + integer c + # Starting dollar-quote is at 1:2, so start parsing at offset 3 in the string. + for (( i = 3 ; i < end_pos - start_pos ; i += 1 )) ; do + (( j = i + start_pos - 1 )) + (( k = j + 1 )) + case "$arg[$i]" in + "\\") style=back-dollar-quoted-argument + for (( c = i + 1 ; c <= end_pos - start_pos ; c += 1 )); do + [[ "$arg[$c]" != ([0-9xXuUa-fA-F]) ]] && break + done + AA=$arg[$i+1,$c-1] + # Matching for HEX and OCT values like \0xA6, \xA6 or \012 + if [[ "$AA" =~ "^(x|X)[0-9a-fA-F]{1,2}" + || "$AA" =~ "^[0-7]{1,3}" + || "$AA" =~ "^u[0-9a-fA-F]{1,4}" + || "$AA" =~ "^U[0-9a-fA-F]{1,8}" + ]]; then + (( k += $#MATCH )) + (( i += $#MATCH )) + else + if (( $#arg > $i+1 )) && [[ $arg[$i+1] == [xXuU] ]]; then + # \x not followed by hex digits is probably an error + style=unknown-token + fi + (( k += 1 )) # Color following char too. + (( i += 1 )) # Skip parsing the escaped char. + fi + ;; + *) continue ;; + + esac + -hsmw-add-highlight $j $k $style + done +} + +# Called with a single positional argument. +# Perform filename expansion (tilde expansion) on the argument and set $REPLY to the expanded value. +# Does not perform filename generation (globbing). +-hsmw-highlight-expand-path() +{ + (( $# == 1 )) || print -r -- >&2 "hsmw-highlight: BUG: -hsmw-highlight-expand-path: called without argument" + + # The $~1 syntax normally performs filename generation, but not when it's on the right-hand side of ${x:=y}. + setopt localoptions nonomatch + unset REPLY + : ${REPLY:=${(Q)~1}} +} + +# ------------------------------------------------------------------------------------------------- +# Main highlighter initialization +# ------------------------------------------------------------------------------------------------- + +-hsmw-highlight-init() { + __hsmw_highlight_main__command_type_cache=() +} + +-hsmw-add-highlight() +{ + local -i start end + local highlight + start=$1 + end=$2 + shift 2 + for highlight; do + if (( $+HSMW_HIGHLIGHT_STYLES[$highlight] )); then + reply+=("$start $end $HSMW_HIGHLIGHT_STYLES[$highlight]") + break + fi + done +} + +__HSMW_MH_SOURCED=1 + +# vim:ft=zsh