fish-shell/lexicon_filter.in
2014-09-03 14:43:26 +01:00

478 lines
10 KiB
Text

#! @sed@ -f
#.
# A Doxygen filter for building Fish's lexicon, for documentation bling.
#.
# Written specially for Fish, the shell for the 90's, in sed, the state of the
# art text processor from the 70's. Who's sed? sed's dead, baby, sed's dead.*
# by Mark Griffiths <mark@thebespokepixel.com> *but quite portable
#.
# Finds /fish../endfish blocks in documentation source files and enhances
# markup. Requires that the four character word 'classes' declared here are
# added to Doxyfiles as aliases i.e.:
#.
# Enhance for HTML Help pages (Doxyfile.user)…
# ALIASES = "fish=\htmlonly[block] \n<pre class=\"fish\">"
# ALIASES += "fish{1}=\htmlonly[block] \n<pre class=\"fish \1\">"
# ALIASES += "endfish=</pre>\endhtmlonly \n"
#.
# ALIASES += "blah{1}=<span class=\"comment\">\1</span>"
# ALIASES += "cmnd{1}=<span class=\"command\">\1</span>" and so on...
#.
# And simplify for man pages (Doxyfile.help)…
# ALIASES = "fish=<pre>"
# ALIASES += "fish{1}=<pre>"
# ALIASES += "endfish=</pre>"
#.
# ALIASES += "blah{1}=\1"
# ALIASES += "cmnd{1}=<em>\1</em>"...
#.
# It's meant to only ever be run once, during make, as Doxygen's 'INPUT
# FILTER', though can be run interactively by passing a file in via stdin. It
# wont respond to arguments.
#.
# It's most easily tested by passing test strings into the compiled script:
#.
# echo "/fish Line to test" | ./fish_lexicon_filter
#.
# The, at times, archiac looking regex is down to ensuring portable sed BREs
#.
# Licensed under whatever terms are most compatible with Fish's GPLv2 license,
# bascially free to use/reuse/redistribute/laugh at/be inspired by. Don't
# pretend it's your code unless you've spent more late nights on it than me but
# if it saves you a late night, do what you can to help rebalance karma. If it
# doesn't work or breaks something, it's your fault for using it: if it seems
# to work it's more likely a hallucination than anything based in reality.
#.
# Pattern flow control for scanning doc.h
/\\fish/,/\\endfish/ {
# Open \fish block, firstly it it's on it's own line
/^\\fish$/b
/^\\fish{[^}]*}$/b
# Then if it's inline. Remove and process immediately...
/^\\fish.*$/ {
# Catch @ symbol
s/@/(at)/
s/^\\fish//
s/\\endfish//
b html
}
# Output blank lines
/^$/b
# Inside \fish block. Process...
/\\endfish/!{
# Catch @ symbol
s/@/((d))/
# Preprocess HTML and HTML-like formatting
/<[^>]*>/ {
b html
}
# Process the rest
b process
}
# End block
/\\endfish/b
}
#.
# This is not the pattern we're looking for
b
#.
# Process any HTML tags.
# Structured to reduce sed's greediness.
:html
# Spans
s|<span style=['"]\([^'"][^'"]*\)">|@span{\1,|
s|<span class=['"]\([^'"][^'"]*\)">|@spcl{\1,|
s|</span>|}|
#.
# Bold
s|<b>|@bold{|
s|<b [^>]*>|@bold{|
s|</b>|}|
#.
# Strong (synonimous with emphasis)
s|<strong>|@bold{|
s|<strong [^>]*>|@bold{|
s|</strong>|}|
#.
# EMPHasis
s|<em>|@emph{|
s|<em [^>]*>|@emph{|
s|</em>|}|
#.
# Italic (synonimous with emphasis)
s|<i>|@emph{|
s|<i [^>]*>|@emph{|
s|</i>|}|
#.
# UNDeRline
s|<u>|@undr{|
s|<u [^>]*>|@undr{|
s|</u>|}|
t html
#.
# Some handy non-standard extensions
# autoSuGgeSTion
s|<s>|@sgst{|
s|<s [^>]*>|@sgst{|
s|</s>|}|
#.
# MaTCH
s|<m>|@mtch{|
s|<m [^>]*>|@mtch{|
s|</m>|}|
#.
# SearchMaTCh
s|<sm>|@smtc{|
s|<sm [^>]*>|@smtc{|
s|</sm>|}|
#.
# ERrOR
s|<error>|@eror{|
s|<error [^>]*>|@eror{|
s|</error>|}|
#.
# AsIs - protect from auto-formatting
s|<asis>|@asis{|
s|</asis>|}|
#.
# OUTPut - protect from auto-formatting
s|<outp>|@outp{|
s|</outp>|}|
t html
#.
# Clean other unhandled html
s|<\([A-Za-z][A-Za-z]*\)[^>]*>\([^<]*\)</\1>|\2|
t html
#.
# Start processing entities
:process
# Output:
# Line marked as output pass through
/@outp/ {
b
}
# Comments:
# Capture full line comments
/^\( *\)#\(.*\)$/ {
# Assume any line starting with a # is complete
s//\1@blah{\2}/
t
}
# Match sub-line comments
/#[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]/ ! {
s/#\(.*$\)/\\\
<@blah{#\1}\
/
}
#.
# Protected entities These shouldn't allow nested structure, so we move them
# to a marked, new line for a future extract/process/insert action.
#.
# AsIs block - resists formatting.
s/@asis{\(.*\)}/\\\
<@asis{\1}\
/g
#.
# Manual <span>
s/@span{\(.*\)}/\\\
<@span{\1}\
/g
#.
# String Literals
s/"\([^"]*\)"/\\\
<@dblq{\1}\
/g
s/'\([^']*\)'/\\\
<@sglq{\1}\
/g
#.
# AutoSuggestions.
s/@sgst{\([^}]*\)}/\\\
<@sgst{\1}\
/
#.
# Command/Function options
# Short options
s/-\([A-Za-z]\)\([^A-Za-z}]\)/\\\
<@opts{-\1}\
\2/g
#.
# Long options
s/--\([A-Za-z][A-Za-z0-9=_-]*\)\([^A-Za-z0-9=_-]*\)/\\\
<@opts{--\1}\
\2/g
#.
# Prompt
s/~>_/\\\
<@prmt{~}\
/
s/^>_/@prmt/
#.
# Cursor
#.
s/__$/@curs/
s/__\(.\)/\\\
<@curs{\1}\
/
#.
# Paths
/\n<@dblq[^}]*[~/]/b protect
/\n<@sglq[^}]*[~/]/b protect
/\n<@span[^}]*[~/]/b protect
#.
# Normal Directory
s|mkdir |mkdir :|
s|\([~/:][/]*[.A-Za-z_0-9/-]*\)\\ |\1=|g
s|\([~/][/]*[.A-Za-z_0-9/=-]*\)|\\\
<@path{\1}\
|g
t protect
s| \(:[/]*[.A-Za-z_0-9/=-]*\)|\\\
<@path{\1}\
|g
t protect
#.
# Dot Relative Directory (no spaces in path)
s| *\(./[A-Za-z_0-9/-]*\)| \\\
<@path{\1}\
|g
b protect
#.
# Tidy up. Merge back 'pure' entities from hold space.
:tidy
#.
# Uncomment the following 2 lines (ss) to log the pattern buffer.
s/^.*$/PATT: &/w lexicon.log
s/^PATT: //
#.
# Uncomment the following 4 lines (xssx) to log the hold buffer.
x
s/^.*$/HOLD: &/w lexicon.log
s/^HOLD: //
x
#.
# Tack the hold space to the end of the pattern buffer.
G
#.
# Uncomment the folowing two lines (ss) to log the buffer join.
s/^.*$/JOIN: &/w debug-lexicon.log
s/^JOIN: //
#.
# Iterate over alternate lines, matching '<' to '\'
:join
s,\([^\\ ]*\)\\\n\([^<]*\)<\(@[^}]*[}\\]\),\1\3\2,
t join
# Clean up stray new lines
s/\n//g
#.
# Uncomment the folowing two lines (ss) to log the buffer join.
s/^.*$/PCLN: &/w lexicon.log
s/^PCLN: //
# Clean up special cases
#.
/@blah/{
s/\(blah{[^@]*\)@sglq{\([^}]*\)}/\1'\2'/
s/\(blah{[^@]*\)@dblq{\([^}]*\)}/\1"\2"/
s/\(blah{[^@]*\)@....{\([^}]*\)}/\1\2/
}
/@redr/{
:cleanredr
s/\(redr{[^@}]*\)@cmnd{\([^}]*\)}/\1\2/
s/\(redr{[^@}]*\)@func{\([^}]*\)}/\1\2/
s/\(redr{[^@}]*\)@sbin{\([^}]*\)}/\1\2/
s/\(redr{[^@}]*\)@fsfo{\([^}]*\)}/\1\2/
s/\(redr{[^}]*\)}\( *\)@path{\([^}]*\)/\1\2\3/
t cleanredr
}
/@sgst/ {
:cleansgst
s/\(sgst{@curs{.}[^@]*\)@cmnd{\([^}]*\)}/\1\2/
s/\(sgst{@curs{.}[^@]*\)@sbin{\([^}]*\)}/\1\2/
s/\(sgst{@curs{.}[^@]*\)@path{\([^}]*\)}/\1\2/
t cleansgst
}
/@fsfo/{
:cleanfsfo
s/\(fsfo{[^@}]*\)@cmnd{\([^}]*\)}/\1\2/
s/\(fsfo{[^@}]*\)@func{\([^}]*\)}/\1\2/
s/\(fsfo{[^@}]*\)@sbin{\([^}]*\)}/\1\2/
t cleanfsfo
}
#.
# Restore Paths
/@fsfo/ {
s/\(@fsfo{[^=]*\)=/\1 /
}
/@path/ {
:cleanpath
s/\(@path{[^:]*\):/\1/
s/\(@path{[^=]*\)=/\1\\ /
t cleanpath
s/@path{}//
}
#.
# Finally, restructure to follow Fish's command [arguments] semantics.
# Find the initial command, and change any others to arguments, up to a |, ( or ;
# Assumes that a valid line will start with either a builtin, a function or a binary.
#.
# 'if' and 'for' seem to be special cases
#.
# Uncomment the folowing two lines (ss) to log the buffer join.
s/^.*$/PREQ: &/w lexicon.log
s/^PREQ: //
#.
# Find initial commands/functions/binaries
#.
# Store prmt, if present
#.
/@prmt/ {
h
s/^\(@prmt *\).*$/\1/
x
s/^@prmt *//
}
#.
s/^\( *\)@sbin/\1@xbin/
s/\( *[;()] *\)@sbin/\1@xbin/
s/\( *@redr{|} *\)@sbin/\1@xbin/
s/^\( *\)@cmnd/\1@xcmd/
s/\( *[;()] *\)@cmnd/\1@xcmd/
s/\( *@redr{|} *\)@cmnd/\1@xcmd/
s/^\( *\)@func/\1@xfnc/
s/\( *[;()] *\)@func/\1@xfnc/
s/\( *@redr{|} *\)@func/\1@xfnc/
s/@cmnd/@args/g
s/@func/@args/g
s/@sbin/@args/g
s/^.*$/PSTQ: &/w lexicon.log
s/^PSTQ: //
#.
s/xcmd/cmnd/g
s/xfnc/func/g
s/xbin/sbin/g
x
/^@prmt/ {
G
s/^@prmt \n/@prmt /
}
/^@prmt/ ! {
x
}
#.
# Mark up sesitive character entities.
#.
:entities
s/</\&lt;/g
s/>/\&gt;/g
s/((d))/@/g
#.
# Uncomment the folowing two lines (ss) to log the final output, sent to Doxygen.
s/^.*$/OUT : &/w lexicon.log
s/^OUT : //
#.
# Lines are reassembled, so branch to end
b
# === Main End ===
#.
#.
# === Subroutines ===
# Branched to when content requires.
#.
# Move protected content to hold space and mark up other entities.
:protect
h
# Clear out any content that has already been marked up, to prevent futher
# markup on words that should be left alone.
#.
:patternflush
s/\n<@[^}]*[}\\]//
s/\\ [^\\]*$/\\/
t patternflush
s/\n$//g
#.
# Swap the pattern and hold buffers and remove unmarked lines and extra
# characters. Basically the inverse of the 'patternflush' action, with
# additional trailing characters stripped.
x
/^\<@[^}]*$/ ! {
s/[^\<]*//
s/^ *\\\n//
s/[()] \\//
s/\n *\\//
s/^[^\<][^@][^\\]*//
s/\n[]|;) ][^\\]*\\//
s/\n[]|;) a-zA-z0-9-][^\\]*$//
s/\n[]|;)}]\\//
s/\n[]|;)}]\n//
s/\n[]|;)}]$//
s/[()]$//
s/}@curs/}/
s/\n@curs$//
s/\n[^\<@][^\\]*\\//
s/\n[^\<@][^\\]*//
s/^\\//
s/\n$//g
s/\\\n/\
/
}
#.
# Swap the buffers back.
x
#.
# A special case. Tidy up after commands.
# Redirectors
s/\([^{|] *\)|/\1@redr{|}/g
s/&$/@redr{\&amp;}/
s/\([^{&] *\)&[^a-z]/\1@redr{\&amp;}/g
s/\([^{<>^] *\)\([0-9]* *[<>^][<>^]*[^@][a-zA-Z0-9._-]*\)/\1@redr{\2}/g
s/\\}/}\\/g
#.
# Now we can add in 'unsafe' entities that would be too greedy.
# Declared Variables
s/\([$%][$%]*\)\([A-Za-z_0-9][A-Za-z_0-9]*\)/@vars{@optr{\1}\2}/g
#.
# Files
s/\([^@]\)\([A-Za-z0-9_-][A-Za-z0-9_-]*\.[a-z0-9*][a-z0-9*]*\)/\1@fsfo{\2}/g
#.
# Fold Files into Paths
s/\(@path{[^}]*\)}@fsfo/\1}@fsfo/
#.
:commands
#.
#### This section is built in the Makefile. Just some formatting examples. #####
#.
# fish commands (cmnd) <- 4 character code that has a Doxygen alias counterpart
# template : s/[[:<:]]function[[:>:]]/@cmnd{&}/
#.
# s,[[:<:]]function[[:>:]],@cmnd{function},g
# s,[[:<:]]begin[[:>:]],@cmnd{begin},g
# ...
#.
# fish functions (func)
# Populated by 'public' functions' filename.
#.
# s,[[:<:]]fish_pwd[[:>:]],@func{fish_pwd},g
# s,[[:<:]]fish_prompt[[:>:]],@func{fish_prompt},g
# ...
#.
# Shell Binary (sbin)
# Populated from completion filenames
#.
# s,[[:<:]]seq[[:>:]],@sbin{seq},g
# s,[[:<:]]rm[[:>:]],@sbin{rm},g
# ...
#.
# Color Variable (clrv)
# Populated from __fish_config_interactive.fish
# Allows fish's 'special' color variables to be identified
#.
# s,[[:<:]]fish_color_normal[[:>:]],@clrv{fish_color_normal},g
# s,[[:<:]]fish_color_command[[:>:]],@clrv{fish_color_command},g
#.
# Once all of the commands/functions/variables/special's have been marked up,
# branch back to tidy up and collapse the pattern/hold buffers back to a
# single line.
#.
# b tidy