#! @sed@ -f #. # A Doxygen filter for building Fish's lexicon, for documentation bling. #. # Written specially for Fish, the shell for the 90's, in sed, the state of the # art text processor from the 70's. Who's sed? sed's dead, baby, sed's dead.* # by Mark Griffiths <mark@thebespokepixel.com> *but quite portable #. # Finds \fish..\endfish blocks in documentation source files and enhances # markup. Requires that the four character word 'classes' declared here are # added to Doxyfiles as aliases i.e.: #. # Enhance for HTML Help pages (Doxyfile.user)… # ALIASES = "fish=\htmlonly[block] \n<pre class=\"fish\">" # ALIASES += "fish{1}=\htmlonly[block] \n<pre class=\"fish \1\">" # ALIASES += "endfish=</pre>\endhtmlonly \n" #. # ALIASES += "blah{1}=<span class=\"comment\">\1</span>" # ALIASES += "bltn{1}=<span class=\"command\">\1</span>" and so on... #. # And simplify for man pages (Doxyfile.help)… # ALIASES = "fish=<pre>" # ALIASES += "fish{1}=<pre>" # ALIASES += "endfish=</pre>" #. # ALIASES += "blah{1}=\1" # ALIASES += "bltn{1}=<em>\1</em>"... #. # It's meant to only ever be run once, during make, as Doxygen's 'INPUT # FILTER', though can be run interactively by passing a file in via stdin. It # wont respond to arguments. #. # It's most easily tested by passing test strings into the compiled script: #. # echo "/fish Line to test" | ./fish_lexicon_filter #. # The, at times, archiac looking regex is down to ensuring portable sed BREs #. # This code is licensed with fish under the GPL 2.0. #. # Pattern flow control for scanning doc.h /\\fish/,/\\endfish/ { # Open \fish block, firstly it it's on it's own line /^\\fish$/b /^\\fish{[^}]*}$/b # Then if it's inline. Remove and process immediately... /^\\fish.*$/ { # Catch @ symbol s/@/@at/g # Catch & symbol s/&\([^a-z]\)/@amp\1/g # Catch {{ & }} symbols s/{{/@curlyL/g s/}}/@curlyR/g s/^\\fish// s/\\endfish// b html } # Output blank lines /^$/b # Inside \fish block. Process... /\\endfish/!{ # Catch @ symbol s/@/@at/g # Catch & symbol s/&\([^a-z]\)/@amp\1/g # Catch {{ & }} symbols s/{{/@curlyL/g s/}}/@curlyR/g # Preprocess HTML and HTML-like formatting /<[^>]*>/ { b html } # Process the rest b process } # End block /\\endfish/b } #. # This is not the pattern we're looking for b #. # Process any HTML tags. # Structured to reduce sed's greediness. :html # Spans s|<span style=['"]\([^'"][^'"]*\)">|@span{\1,| s|<span class=['"]\([^'"][^'"]*\)">|@spcl{\1,| s|</span>|}| #. # Bold s|<b>|@bold{| s|<b [^>]*>|@bold{| s|</b>|}| #. # Strong (synonimous with emphasis) s|<strong>|@bold{| s|<strong [^>]*>|@bold{| s|</strong>|}| #. # EMPHasis s|<em>|@emph{| s|<em [^>]*>|@emph{| s|</em>|}| #. # Italic (synonimous with emphasis) s|<i>|@emph{| s|<i [^>]*>|@emph{| s|</i>|}| #. # UNDeRline s|<u>|@undr{| s|<u [^>]*>|@undr{| s|</u>|}| # Backslash (when escaping output) s|<bs>|@bksl{| s|</bs>|}| t html #. # Some handy non-standard extensions # autoSuGgeSTion s|<s>|@sgst{| s|<s [^>]*>|@sgst{| s|</s>|}| #. # MaTCH s|<m>|@mtch{| s|<m [^>]*>|@mtch{| s|</m>|}| #. # SearchMaTCh s|<sm>|@smtc{| s|<sm [^>]*>|@smtc{| s|</sm>|}| #. # ERrOR s|<eror>|@eror{| s|<eror [^>]*>|@eror{| s|</eror>|}| #. # AsIs - protect from auto-formatting s|<asis>|@asis{| s|</asis>|}| #. # OUTPut - protect from auto-formatting s|<outp>|@outp{| s|</outp>|}| t html #. # Clean other unhandled html s|<\([A-Za-z][A-Za-z]*\)[^>]*>\([^<]*\)</\1>|\2| t html #. # Start processing entities :process # Output: # Line marked as output pass through /@outp/ { b } # Comments: # Capture full line comments /^\( *\)#\(.*\)$/ { # Assume any line starting with a # is complete s//\1@blah{\2}/ t } # Match sub-line comments /#[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]/ ! { s/#\(.*$\)/\\\ <@blah{#\1}\ / } #. # Protected entities These shouldn't allow nested structure, so we move them # to a marked, new line for a future extract/process/insert action. #. # AsIs block - resists formatting. s/@asis{\(.*\)}/\\\ <@asis{\1}\ /g #. # Manual <span> s/@span{\(.*\)}/\\\ <@span{\1}\ /g #. # String Literals s/"\([^"]*\)"/\\\ <@dblq{\1}\ /g s/'\([^']*\)'/\\\ <@sglq{\1}\ /g #. # AutoSuggestions. s/@sgst{\([^}]*\)}/\\\ <@sgst{\1}\ / #. # Command/Function options # Short options s/\([[( ]\)-\([A-Za-z][A-Za-z]*\)\([^A-Za-z}]\)/\1\\\ <@opts{-\2}\ \3/g #. # Long options s/\([[( ]\)--\([A-Za-z][A-Za-z0-9=_-]*\)\([^A-Za-z0-9=_-]*\)/\1\\\ <@opts{--\2}\ \3/g #. # Prompt s/~>_/\\\ <@prmt{\ <@path{~}\ }/ s/^>_/@prmt/ #. # Cursor #. s/___$/@curs/ s/___\(.\)/\\\ <@curs{\1}\ / #. # Escaped Options s/ \\\([A-Za-z0-9][A-Za-z0-9]*\) / @bksl{\1} /g #. # Trailing Backslash s/ \\$/ @bksl{ }/ #. # Paths /\n<@dblq[^}]*[~/]/b protect /\n<@sglq[^}]*[~/]/b protect /\n<@span[^}]*[~/]/b protect #. # Normal Directory s|mkdir |mkdir :| s|\([~/:][/]*[.A-Za-z_0-9*/-]*\)\\ |\1=|g s| \([~/][/]*[.A-Za-z_0-9*/=-]*\)| \\\ <@path{\1}\ |g s| \(:[/]*[.A-Za-z_0-9*/=-]*\)| \\\ <@path{\1}\ |g t protect #. # Dot Relative Directory (no spaces in path) s| \(./[A-Za-z_0-9/-]*\)| \\\ <@path{\1}\ |g b protect #. # Tidy up. Merge back 'pure' entities from hold space. :tidy #. # Convert loose text to arguments s/ \([a-zA-Z0-9+%*.-][{},a-zA-Z0-9%*._/?!=-]*\)/ @args{\1}/g #. # Or when tight to a newline s|\n\([a-zA-Z0-9+%*.-][{},a-zA-Z0-9%*._/?!-]*\)|\ @args{\1}|g #. # Or when tight to the beginning s|^\([a-zA-Z][{},a-zA-Z0-9%*._/?!-]*\)|@args{\1}|g #. # Pick up loose text after markup. s/\([})]\)\([a-zA-Z0-9+%*.,][,a-zA-Z0-9%*._/?!-]*\);/\1@args{\2};/g s/\([})]\)\([a-zA-Z0-9+%*.,][,a-zA-Z0-9%*._/?!-]*\)$/\1@args{\2}/g s/\([})]\)\([a-zA-Z0-9+%*.,][,a-zA-Z0-9%*._/?!-]*\)@EOL/\1@args{\2}/g #. # Uncomment the following 2 lines (ss) to log the pattern buffer. s/^.*$/Pattern : &/w lexicon.log s/^Pattern : // #. # Uncomment the following 4 lines (xssx) to log the hold buffer. x s/^.*$/HoldBufr: &/w lexicon.log s/^HoldBufr: // x #. # Tack the hold space to the end of the pattern buffer. G #. # Uncomment the folowing two lines (ss) to log the buffer join. s/^.*$/Joined : &/w lexicon.log s/^Joined : // #. # Iterate over alternate lines, matching '<' to '\' :join s,\([^\\ ]*\)\\\n\([^<]*\)<\(@[^}]*[}\\]\),\1\3\2, t join # Clean up stray new lines s/\n//g # Clean up past @EOL s/@EOL.*$//g #. # Uncomment the folowing two lines (ss) to log the buffer before 'cleaning'. s/^.*$/PreClean: &/w lexicon.log s/^PreClean: // # Clean up special cases #. /@blah/{ s/\(blah{[^@]*\)@sglq{\([^}]*\)}/\1'\2'/ s/\(blah{[^@]*\)@dblq{\([^}]*\)}/\1"\2"/ s/\(blah{[^@]*\)@....{\([^}]*\)}/\1\2/ } /@dblq/{ :cleandblq s/\(dblq{[^@}<]*\)[<]*@...[^q]{\([^}]*\)}/\1\2/ t cleandblq } /@sglq/{ :cleansglq s/\(sglq{[^@}<]*\)[<]*@...[^q]{\([^}]*\)}/\1\2/ t cleansglq } /@vars/{ :cleanvars s/\(vars{@optr{$}[^@}]*\)@bltn{\([^}]*\)}/\1\2/ s/\(vars{@optr{$}[^@}]*\)@func{\([^}]*\)}/\1\2/ s/\(vars{@optr{$}[^@}]*\)@cmnd{\([^}]*\)}/\1\2/ s/\(vars{@optr{$}[^@}]*\)@args{\([^}]*\)}/\1\2/ t cleanvars } /@redr/{ :cleanredr s/\(redr{[^@}]*\)@bltn{\([^}]*\)}/\1\2/ s/\(redr{[^@}]*\)@func{\([^}]*\)}/\1\2/ s/\(redr{[^@}]*\)@cmnd{\([^}]*\)}/\1\2/ s/\(redr{[^@}]*\)@fsfo{\([^}]*\)}/\1\2/ s/\(redr{[^}]*\)}\( *\)@path{\([^}]*\)/\1\2\3/ t cleanredr } /@sgst/{ s/@sgst{<@/@sgst{@/ :cleansgst s/\(sgst{@curs{.}[^@]*\)@bltn{\([^}]*\)}/\1\2/ s/\(sgst{@curs{.}[^@]*\)@func{\([^}]*\)}/\1\2/ s/\(sgst{@curs{.}[^@]*\)@cmnd{\([^}]*\)}/\1\2/ s/\(sgst{@curs{.}[^@]*\)@opts{\([^}]*\)}/\1\2/ s/\(sgst{@curs{.}[^@]*\)@path{\([^}]*\)}/\1\2/ s/\(sgst{@curs{.}[^@]*\)@args{\([^}]*\)}/\1\2/ s/\(sgst{@curs{.}[^@]*\)@fsfo{\([^}]*\)}/\1\2/ t cleansgst } /@fsfo/{ :cleanfsfo s/\(fsfo{[^@}]*\)@bltn{\([^}]*\)}/\1\2/ s/\(fsfo{[^@}]*\)@func{\([^}]*\)}/\1\2/ s/\(fsfo{[^@}]*\)@cmnd{\([^}]*\)}/\1\2/ t cleanfsfo } /@prmt{/{ s/@prmt{<@path/@prmt{@path/ } #. # Restore Paths /@fsfo/ { s/\(@fsfo{[^=]*\)=/\1 / } /@path/ { :cleanpath s/\(@path{[^:]*\):/\1/ s/\(@path{[^=]*\)=/\1\\ / t cleanpath s/@path{}// } #. # Finally, restructure to follow Fish's command [arguments] semantics. # Find the initial command, and change any others to arguments, up to a |, ( or ; # Assumes that a valid line will start with either a builtin, a function or a binary. #. # 'if' and 'for' seem to be special cases #. # Uncomment the folowing two lines (ss) to log the buffer before semantic conversion. s/^.*$/PreArgs : &/w lexicon.log s/^PreArgs : // #. # Find initial commands/functions/binaries #. # Store prmt, if present #. /@prmt/ { h s/^\(@prmt *\).*$/\1/ x s/^@prmt *// } #. # Special case for optional commands s/@args{\[@bltn/@args{[@xbln/g # Special case for one-line 'if' statements /@bltn{if}/ { s//@xbln{if}/ s/@bltn{set}/@xbln{set}/ s/@bltn{not}/@xbln{not}/ s/@bltn{else}/@xbln{else}/ s/@bltn{contains}/@xbln{contains}/ s/@bltn{test}/@xbln{test}/ s/@bltn{end}/@xbln{end}/ s/@cmnd{grep}/@xcmd{grep}/ } # one-line 'for' statements /@bltn{for}/ { s//@xbln{for}/ s/@args{in}/@xbln{in}/ } # one-line 'begin' statements /@bltn{begin}/ { s//@xbln{begin}/ s/@bltn{end}/@xbln{end}/ } # one-line 'break' statements /@bltn{break}/ { s//@xbln{break}/ s/@bltn{end}/@xbln{end}/ } # one-line 'continue' statements /@bltn{continue}/ { s//@xbln{continue}/ s/@bltn{end}/@xbln{end}/ } # one-line 'switch' statements /@bltn{switch}/ { s//@xbln{switch}/ s/@bltn{case}/@xbln{case}/ s/@bltn{end}/@xbln{end}/ } # one-line 'function' statements /@bltn{function}/ { s//@xbln{function}/ s/@bltn{return}/@xbln{return}/ s/@bltn{end}/@xbln{end}/ } # one-line 'bind' statements - special input functions /@bltn{bind}/ { s//@xbln{bind}/ s/@....{\([a-z]*\)}\(-[a-z-]*\)/@args{\1\2}/ } # one-line 'builtin' statements s/@bltn{builtin} @bltn/@xbln{builtin} @xbln/g s/@bltn{builtin} @cmnd/@xbln{builtin} @xcmd/g s/@bltn{builtin} @func/@xbln{builtin} @xfnc/g #. # one-line 'command' statements s/@bltn{command} @bltn/@xbln{command} @xbln/g s/@bltn{command} @cmnd/@xbln{command} @xcmd/g s/@bltn{command} @func/@xbln{command} @xfnc/g #. # one-line 'and/or' statements s/@bltn{and} @bltn/@xbln{and} @xbln/g s/@bltn{and} @cmnd/@xbln{and} @xcmd/g s/@bltn{and} @func/@xbln{and} @xfnc/g s/@bltn{or} @bltn/@xbln{or} @xbln/g s/@bltn{or} @cmnd/@xbln{or} @xcmd/g s/@bltn{or} @func/@xbln{or} @xfnc/g #. s/^\( *\)@cmnd/\1@xcmd/ s/\( *[;()] *\)@cmnd/\1@xcmd/g s/\( *@redr{|} *\)@cmnd/\1@xcmd/g s/^\( *\)@bltn/\1@xbln/ s/\( *[;()] *\)@bltn/\1@xbln/g s/\( *@redr{|} *\)@bltn/\1@xbln/g s/^\( *\)@func/\1@xfnc/ s/\( *[;()] *\)@func/\1@xfnc/g s/\( *@redr{|} *\)@func/\1@xfnc/g s/ @bksl{\([^}]*\)} / @args{@bksl{\1}} /g s/ @bksl{@bltn{\([^}]*\)}/ @args{@bksl{\1}/g s/ @bksl{@func{\([^}]*\)}/ @args{@bksl{\1}/g s/ @bksl{@cmnd{\([^}]*\)}/ @args{@bksl{\1}/g s/@bltn/@args/g s/@func/@args/g s/@cmnd/@args/g #. s/^.*$/PostArgs: &/w lexicon.log s/^PostArgs: // #. s/xbln/bltn/g s/xfnc/func/g s/xcmd/cmnd/g x /^@prmt/ { G s/^@prmt \n/@prmt / } /^@prmt/ ! { x } #. # Mark up sesitive character entities. #. s/</\</g s/>/\>/g s/@amp/\&/g s/@curlyL/\{/g s/@curlyR/\}/g s/@at/@atat{ }/g #. # Final post processing s/};\([^]]\)/}@redr{;}\1/g s/};$/}@redr{;}/ s/@sglq{}/''/ s/ \[\([@(]\)/ @args{[}\1/g s/ \[\([A-Z]*\) / @args{[\1} /g s/@args{\([a-zA-Z0-9_.]*\)}\]/@args{\1]}/g s/@args{\([a-zA-Z0-9_.]*\)}: /@args{\1:} /g s/@bltn{echo} @fsfo/@bltn{echo} @args/g s/@bltn{echo}\([a-zA-Z0-9.@{} _-]*\)@fsfo/@bltn{echo}\1@args/g s/ \] / @args{]} /g s/ \]$/ @args{]}/g s/\]}\]$/]]}/ s/\\\([()]\)/@optr{@bksl{\1}}/g s/\([()]\)/@optr{\1}/g s/\\\\\([cdgnstwx?]\)/@bksl{\1}/g s/\\n/@bksl{n}/ s/%\([diouxXfgGeEsbmy]\)/@pcnt{\1}/g s/ \\$// #. # Uncomment the folowing two lines (ss) to log the final output, sent to Doxygen. s/^.*$/Output : &\ \ /w lexicon.log s/^Output : // s/\n\n$// #. # Lines are reassembled, so branch to end b # === Main End === #. #. # === Subroutines === # Branched to when content requires. #. # Move protected content to hold space and mark up other entities. :protect # Add an 'End of Line' marker s/$/@EOL/ s/^.*$/Input : &/w lexicon.log s/^Input : // h # Clear out any content that has already been marked up, to prevent futher # markup on words that should be left alone. #. :patternflush s/\n<@[^}]*[}\\]// s/\\ [^\\]*$/\\/ t patternflush s/\n$//g #. # Swap the pattern and hold buffers and remove unmarked lines and extra # characters. Basically the inverse of the 'patternflush' action, with # additional trailing characters stripped. x /^<@[^}]*$/ ! { s/[^\<]*// s/^ *\\\n//g s/\n *\\//g s/\n@EOL//g s/[()] \\//g s/^[^\<][^@][^\\]*// s/\n[]|;) ][^\\]*\\// s/\n[]|;) a-zA-Z0-9-][^\\]*$// s/\n[]|;)}]\\// s/\n[]|;)}]\n// s/\n[]|;)}]$// s/[()]$// s/}@curs/}/ s/\n@curs$// s/\n[^\<@][^\\]*\\// s/\n[^\<@][^\\]*// s/^\\// s/\n$//g } s/\\\n/\ / s/< \n// s/^[a-z][a-z]* \n// #. # Swap the buffers back. x #. # A special case. Tidy up after performing command substitution. # Redirectors s/\([^{|] *\)|/\1@redr{|}/g s/\&@EOL$/@redr{@amp}@EOL/g s/@amp@EOL$/@redr{@amp}@EOL/g s/\([<>]\)@amp\([0-9]\)/@redr{\1@amp\2}/g s/\([^{&] *\)&[^@a-z]/\1@redr{\&}/g s/\([^{<>^] *\)\([0-9]* *[<>^][<>^]*[^@][a-zA-Z0-9./_-]*\)/\1@redr{\2}/g s/\\}/}\\/g #. # Now we can add in 'unsafe' entities that would be too greedy. # Arrays s/[[][0-9$a-zA-Z_;. -]*]/@args{&}/g #. # Declared Variables s/\($[$]*\)\([A-Za-z_0-9][A-Za-z_0-9]*\)/@vars{@optr{\1}\2}/g #. # Files /@at/ ! { s/\([A-Za-z0-9_*-][A-Za-z0-9_*-]*\.[a-z0-9*][a-z0-9*]*\)/@fsfo{\1}/g } #. #### This section is built in the Makefile. Just some formatting examples. ##### #. # Fish builtin (bltn) <- 4 character code that has a Doxygen alias counterpart # template : s/[[:<:]]function[[:>:]]/@bltn{&}/ #. # s,[[:<:]]function[[:>:]],@bltn{function},g # s,[[:<:]]begin[[:>:]],@bltn{begin},g # ... #. # Fish functions (func) # Populated by 'public' functions' filename. #. # s,[[:<:]]fish_pwd[[:>:]],@func{fish_pwd},g # s,[[:<:]]fish_prompt[[:>:]],@func{fish_prompt},g # ... #. # Shell Command (cmnd) # Populated from completion filenames #. # s,[[:<:]]seq[[:>:]],@cmnd{seq},g # s,[[:<:]]rm[[:>:]],@cmnd{rm},g # ... #. # Color Variable (clrv) # Populated from __fish_config_interactive.fish # Allows fish's 'special' color variables to be identified #. # s,[[:<:]]fish_color_normal[[:>:]],@clrv{fish_color_normal},g # s,[[:<:]]fish_color_command[[:>:]],@clrv{fish_color_command},g #. # Once all of the commands/functions/variables/special's have been marked up, # branch back to tidy up and collapse the pattern/hold buffers back to a # single line. #. # b tidy #. #. # Below is a special section that adds vocabuarly to the lexicon during 'make'. # As the lexicon is written into the output lexicon_filter, portability is # automatically handled. #. #.!# cmnd whoami #.!# cmnd mkdir #.!# cmnd basename #.!# bltn sleep #.!# args in