fish-shell/share/tools/deroff.py
2012-11-18 11:23:22 +01:00

1101 lines
32 KiB
Python
Executable file
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Deroff.py, ported to Python from the venerable deroff.c """
import sys, re, string
IS_PY3 = sys.version_info[0] >= 3
class Deroffer:
g_specs_specletter = {
# Output composed latin1 letters
'-D': '\320',
'Sd': '\360',
'Tp': '\376',
'TP': '\336',
'AE': '\306',
'ae': '\346',
'OE': "OE",
'oe': "oe",
':a': '\344',
':A': '\304',
':e': '\353',
':E': '\313',
':i': '\357',
':I': '\317',
':o': '\366',
':O': '\326',
':u': '\374',
':U': '\334',
':y': '\377',
'ss': '\337',
'\'A': '\301',
'\'E': '\311',
'\'I': '\315',
'\'O': '\323',
'\'U': '\332',
'\'Y': '\335',
'\'a': '\341',
'\'e': '\351',
'\'i': '\355',
'\'o': '\363',
'\'u': '\372',
'\'y': '\375',
'^A': '\302',
'^E': '\312',
'^I': '\316',
'^O': '\324',
'^U': '\333',
'^a': '\342',
'^e': '\352',
'^i': '\356',
'^o': '\364',
'^u': '\373',
'`A': '\300',
'`E': '\310',
'`I': '\314',
'`O': '\322',
'`U': '\331',
'`a': '\340',
'`e': '\350',
'`i': '\354',
'`o': '\362',
'`u': '\371',
'~A': '\303',
'~N': '\321',
'~O': '\325',
'~a': '\343',
'~n': '\361',
'~o': '\365',
',C': '\307',
',c': '\347',
'/l': "/l",
'/L': "/L",
'/o': '\370',
'/O': '\330',
'oA': '\305',
'oa': '\345',
# Ligatures
'fi': 'fi',
'ff': 'ff',
'fl': 'fl',
'Fi': 'ffi',
'Ff': 'fff',
'Fl': 'ffl'
}
g_specs = {
'mi': '-',
'en': '-',
'hy': '-',
'em': "--",
'lq': "\"", # PCA: This used to be left and right smart quotes, but they look dumb
'rq': "\"", # So just use ordinary double quotes
'Bq': ",,",
'oq': '`',
'cq': '\'',
'aq': '\'',
'dq': '"',
'or': '|',
'at': '@',
'sh': '#',
'Eu': '\244',
'eu': '\244',
'Do': '$',
'ct': '\242',
'Fo': '\253',
'Fc': '\273',
'fo': '<',
'fc': '>',
'r!': '\241',
'r?': '\277',
'Of': '\252',
'Om': '\272',
'pc': '\267',
'S1': '\271',
'S2': '\262',
'S3': '\263',
'<-': "<-",
'->': "->",
'<>': "<->",
'ua': '^',
'da': 'v',
'lA': "<=",
'rA': "=>",
'hA': "<=>",
'uA': "^^",
'dA': "vv",
'ba': '|',
'bb': '|',
'br': '|',
'bv': '|',
'ru': '_',
'ul': '_',
'ci': 'O',
'bu': 'o',
'co': '\251',
'rg': '\256',
'tm': "(TM)",
'dd': "||",
'dg': '|',
'ps': '\266',
'sc': '\247',
'de': '\260',
'%0': "0/00",
'14': '\274',
'12': '\275',
'34': '\276',
'f/': '/',
'sl': '/',
'rs': '\\',
'sq': "[]",
'fm': '\'',
'ha': '^',
'ti': '~',
'lB': '[',
'rB': ']',
'lC': '{',
'rC': '}',
'la': '<',
'ra': '>',
'lh': "<=",
'rh': "=>",
'tf': "therefore",
'~~': "~~",
'~=': "~=",
'!=': "!=",
'**': '*',
'+-': '\261',
'<=': "<=",
'==': "==",
'=~': "=~",
'>=': ">=",
'AN': "\\/",
'OR': "/\\",
'no': '\254',
'te': "there exists",
'fa': "for all",
'Ah': "aleph",
'Im': "imaginary",
'Re': "real",
'if': "infinity",
'md': "\267",
'mo': "member of",
'mu': '\327',
'nm': "not member of",
'pl': '+',
'eq': '=',
'pt': "oc",
'pp': "perpendicular",
'sb': "(=",
'sp': "=)",
'ib': "(-",
'ip': "-)",
'ap': '~',
'is': 'I',
'sr': "root",
'pd': 'd',
'c*': "(x)",
'c+': "(+)",
'ca': "cap",
'cu': 'U',
'di': '\367',
'gr': 'V',
'es': "{}",
'CR': "_|",
'st': "such that",
'/_': "/_",
'lz': "<>",
'an': '-',
# Output Greek
'*A': "Alpha",
'*B': "Beta",
'*C': "Xi",
'*D': "Delta",
'*E': "Epsilon",
'*F': "Phi",
'*G': "Gamma",
'*H': "Theta",
'*I': "Iota",
'*K': "Kappa",
'*L': "Lambda",
'*M': "Mu",
'*N': "Nu",
'*O': "Omicron",
'*P': "Pi",
'*Q': "Psi",
'*R': "Rho",
'*S': "Sigma",
'*T': "Tau",
'*U': "Upsilon",
'*W': "Omega",
'*X': "Chi",
'*Y': "Eta",
'*Z': "Zeta",
'*a': "alpha",
'*b': "beta",
'*c': "xi",
'*d': "delta",
'*e': "epsilon",
'*f': "phi",
'+f': "phi",
'*g': "gamma",
'*h': "theta",
'+h': "theta",
'*i': "iota",
'*k': "kappa",
'*l': "lambda",
'*m': "\265",
'*n': "nu",
'*o': "omicron",
'*p': "pi",
'+p': "omega",
'*q': "psi",
'*r': "rho",
'*s': "sigma",
'*t': "tau",
'*u': "upsilon",
'*w': "omega",
'*x': "chi",
'*y': "eta",
'*z': "zeta",
'ts': "sigma",
}
g_re_word = re.compile(r'[a-zA-Z_]+') # equivalent to the word() method
g_re_number = re.compile(r'[+-]?\d+') # equivalent to the number() method
g_re_esc_char = re.compile(r"""([a-zA-Z_]) | # Word
([+-]?\d) | # Number
\\ # Backslash (for escape seq)
""", re.VERBOSE)
g_re_not_backslash_or_whitespace = re.compile(r'[^ \t\n\r\f\v\\]+') # Match a sequence of not backslash or whitespace
g_re_newline_collapse = re.compile(r'\n{3,}')
g_re_font = re.compile(r"""\\f( # Starts with backslash f
(\(\S{2}) | # Open paren, then two printable chars
(\[\S*?\]) | # Open bracket, zero or more printable characters, then close bracket
\S) # Any printable character
""", re.VERBOSE)
# This gets filled in in __init__ below
g_macro_dict = False
def __init__(self):
self.reg_table = {}
self.tr_from = ''
self.tr_to = ''
self.tr = ''
self.nls = 2
self.specletter = False
self.refer = False
self.macro = 0
self.nobody = False
self.inlist = False
self.inheader = False
self.pic = False
self.tbl = False
self.tblstate = 0
self.tblTab = ''
self.eqn = False
self.skipheaders = False
self.skiplists = False
self.ignore_sonx = False
self.output = []
self.name = ''
self.OPTIONS = 0
self.FORMAT = 1
self.DATA = 2
# words is uninteresting and should be treated as false
if not Deroffer.g_macro_dict:
Deroffer.g_macro_dict = {
'SH': Deroffer.macro_sh,
'SS': Deroffer.macro_ss_ip,
'IP': Deroffer.macro_ss_ip,
'H ': Deroffer.macro_ss_ip,
'I ': Deroffer.macro_i_ir,
'IR': Deroffer.macro_i_ir,
'IB': Deroffer.macro_i_ir,
'B ': Deroffer.macro_i_ir,
'BR': Deroffer.macro_i_ir,
'BI': Deroffer.macro_i_ir,
'R ': Deroffer.macro_i_ir,
'RB': Deroffer.macro_i_ir,
'RI': Deroffer.macro_i_ir,
'AB': Deroffer.macro_i_ir,
'Nm': Deroffer.macro_Nm,
'] ': Deroffer.macro_close_bracket,
'PS': Deroffer.macro_ps,
'PE': Deroffer.macro_pe,
'TS': Deroffer.macro_ts,
'T&': Deroffer.macro_t_and,
'TE': Deroffer.macro_te,
'EQ': Deroffer.macro_eq,
'EN': Deroffer.macro_en,
'R1': Deroffer.macro_r1,
'R2': Deroffer.macro_r2,
'de': Deroffer.macro_de,
'BL': Deroffer.macro_bl_vl,
'VL': Deroffer.macro_bl_vl,
'AL': Deroffer.macro_bl_vl,
'LB': Deroffer.macro_bl_vl,
'RL': Deroffer.macro_bl_vl,
'ML': Deroffer.macro_bl_vl,
'DL': Deroffer.macro_bl_vl,
'BV': Deroffer.macro_bv,
'LE': Deroffer.macro_le,
'LP': Deroffer.macro_lp_pp,
'PP': Deroffer.macro_lp_pp,
'P\n': Deroffer.macro_lp_pp,
'ds': Deroffer.macro_ds,
'so': Deroffer.macro_so_nx,
'nx': Deroffer.macro_so_nx,
'tr': Deroffer.macro_tr,
'sp': Deroffer.macro_sp
}
def flush_output(self, where):
if where:
where.write(self.get_output())
self.output[:] = []
def get_output(self):
res = ''.join(self.output)
clean_res = Deroffer.g_re_newline_collapse.sub('\n', res)
return clean_res
def putchar(self, c):
self.output.append(c)
return c
# This gets swapped in in place of condputs the first time tr gets modified
def condputs_tr(self, str):
special = self.pic or self.eqn or self.refer or self.macro or (self.skiplists and self.inlist) or (self.skipheaders and self.inheader)
if not special:
self.output.append(str.translate(self.tr))
def condputs(self, str):
special = self.pic or self.eqn or self.refer or self.macro or (self.skiplists and self.inlist) or (self.skipheaders and self.inheader)
if not special:
self.output.append(str)
def str_at(self, idx):
return self.s[idx:idx+1]
def skip_char(self, amt=1):
self.s = self.s[amt:]
def skip_leading_whitespace(self):
self.s = self.s.lstrip()
def is_white(self, idx):
# Note this returns false for empty strings (idx >= len(self.s))
return self.s[idx:idx+1].isspace()
def str_eq(offset, other, len):
return self.s[offset:offset+len] == other[:len]
def prch(self, idx):
# Note that this return False for the empty string (idx >= len(self.s))
ch = self.s[idx:idx+1]
return ch not in ' \t\n'
def font(self):
match = Deroffer.g_re_font.match(self.s)
if not match: return False
self.skip_char(match.end())
return True
def font2(self):
if self.s[0:2] == '\\f':
c = self.str_at(2)
if c == '(' and self.prch(3) and self.prch(4):
self.skip_char(5)
return True
elif c == '[':
self.skip_char(2)
while self.prch(0) and self.str_at(0) != ']': self.skip_char()
if self.str_at(0) == ']': self.skip_char()
elif self.prch(2):
self.skip_char(3)
return True
return False
def comment(self):
# Here we require that the string start with \"
while self.str_at(0) and self.str_at(0) != '\n': self.skip_char()
return True
def numreq(self):
# We require that the string starts with backslash
if self.str_at(1) in 'hvwud' and self.str_at(2) == '\'':
self.macro += 1
self.skip_char(3)
while self.str_at(0) != '\'' and self.esc_char():
pass # Weird
if self.str_at(0) == '\'':
self.skip_char()
self.macro -= 1
return True
return False
def var(self):
reg = ''
s0s1 = self.s[0:2]
if s0s1 == '\\n':
if self.s[3:5] == 'dy':
self.skip_char(5)
return True
elif self.str_at(2) == '(' and self.prch(3) and self.prch(4):
self.skip_char(5)
return True
elif self.str_at(2) == '[' and self.prch(3):
self.skip_char(3)
while self.str_at(0) and self.str_at(0) != ']':
self.skip_char()
return True
elif self.prch(2):
self.skip_char(3)
return True
elif s0s1 == '\\*':
if self.str_at(2) == '(' and self.prch(3) and self.prch(4):
reg = self.s[3:5]
self.skip_char(5)
elif self.str_at(2) == '[' and self.prch(3):
self.skip_char(3)
while self.str_at(0) and self.str_at(0) != ']':
reg = reg + self.str_at(0)
self.skip_char()
if self.s[0:1] == ']':
self.skip_char()
else:
return False
elif self.prch(2):
reg = self.str_at(2)
self.skip_char(3)
else:
return False
if reg in self.reg_table:
old_s = self.s
self.s = self.reg_table[reg]
self.text_arg()
return True
return False
def size(self):
# We require that the string starts with \s
if self.digit(2) or (self.str_at(2) in '-+' and self.digit(3)):
self.skip_char(3)
while self.digit(0): self.skip_char()
return True
return False
def spec(self):
self.specletter = False
if self.s[0:2] == '\\(' and self.prch(2) and self.prch(3):
key = self.s[2:4]
if key in Deroffer.g_specs_specletter:
self.condputs(Deroffer.g_specs_specletter[key])
self.specletter = True
elif key in Deroffer.g_specs:
self.condputs(Deroffer.g_specs[key])
self.skip_char(4)
return True
elif self.s.startswith('\\%'):
self.specletter = True
self.skip_char(2)
return True
else:
return False
def esc(self):
# We require that the string start with backslash
c = self.s[1:2]
if not c: return False
if c in 'eE':
self.condputs('\\')
elif c in 't':
self.condputs('\t')
elif c in '0~':
self.condputs(' ')
elif c in '|^&:':
pass
else:
self.condputs(c)
self.skip_char(2)
return True
def word(self):
got_something = False
while True:
match = Deroffer.g_re_word.match(self.s)
if not match: break
got_something = True
self.condputs(match.group(0))
self.skip_char(match.end(0))
# Consume all specials
while self.spec():
if not self.specletter: break
return got_something
def text(self):
while True:
idx = self.s.find('\\')
if idx == -1:
self.condputs(self.s)
self.s = ''
break
else:
self.condputs(self.s[:idx])
self.skip_char(idx)
if not self.esc_char_backslash():
self.condputs(self.str_at(0))
self.skip_char()
return True
def letter(self, idx):
ch = self.str_at(idx)
return ch.isalpha() or ch == '_' # underscore is used in C identifiers
def digit(self, idx):
ch = self.str_at(idx)
return ch.isdigit()
def number(self):
match = Deroffer.g_re_number.match(self.s)
if not match:
return False
else:
self.condputs(match.group(0))
self.skip_char(match.end())
return True
def esc_char_backslash(self):
# Like esc_char, but we know the string starts with a backslash
c = self.s[1:2]
if c == '"':
return self.comment()
elif c == 'f':
return self.font()
elif c == 's':
return self.size()
elif c in 'hvwud':
return self.numreq()
elif c in 'n*':
return self.var()
elif c == '(':
return self.spec()
else:
return self.esc()
def esc_char(self):
if self.s[0:1] == '\\':
return self.esc_char_backslash()
return self.word() or self.number()
def quoted_arg(self):
if self.str_at(0) == '"':
self.skip_char()
while self.s and self.str_at(0) != '"':
if not self.esc_char():
if self.s:
self.condputs(self.str_at(0))
self.skip_char()
return True
else:
return False
def text_arg(self):
# PCA: The deroff.c textArg() disallowed quotes at the start of an argument
# I'm not sure if this was a bug or not
got_something = False
while True:
match = Deroffer.g_re_not_backslash_or_whitespace.match(self.s)
if match:
# Output the characters in the match
self.condputs(match.group(0))
self.skip_char(match.end(0))
got_something = True
# Next is either an escape, or whitespace, or the end
# If it's the whitespace or the end, we're done
if not self.s or self.is_white(0):
return got_something
# Try an escape
if not self.esc_char():
# Some busted escape? Just output it
self.condputs(self.str_at(0))
self.skip_char()
got_something = True
def text_arg2(self):
if not self.esc_char():
if self.s and not self.is_white(0):
self.condputs(self.str_at(0))
self.skip_char()
else:
return False
while True:
if not self.esc_char():
if self.s and not self.is_white(0):
self.condputs(self.str_at(0))
self.skip_char()
else:
return True
# Macro functions
def macro_sh(self):
for header_str in [' SYNOPSIS', ' "SYNOPSIS', ' BERSICHT', ' "BERSICHT']:
if self.s[2:].startswith(header_str):
self.inheader = True
break
else:
# Did not find a header string
self.inheader = False
self.nobody = True
def macro_ss_ip(self):
self.nobody = True
return False
def macro_i_ir(self):
pass
return False
def macro_Nm(self):
if self.s == 'Nm\n':
self.condputs(self.name)
else:
self.name = self.s[3:].strip() + ' '
return True
def macro_close_bracket(self):
self.refer = False
return False
def macro_ps(self):
if self.is_white(2): self.pic = True
self.condputs('\n')
return True
def macro_pe(self):
if self.is_white(2): self.pic = False
self.condputs('\n')
return True
def macro_ts(self):
if self.is_white(2): self.tbl, self.tblstate = True, self.OPTIONS
self.condputs('\n')
return True
def macro_t_and(self):
if self.is_white(2): self.tbl, self.tblstate = True, self.FORMAT
self.condputs('\n')
return True
def macro_te(self):
if self.is_white(2): self.tbl = False
self.condputs('\n')
return True
def macro_eq(self):
if self.is_white(2): self.eqn = True
self.condputs('\n')
return True
def macro_en(self):
if self.is_white(2): self.eqn = False
self.condputs('\n')
return True
def macro_r1(self):
if self.is_white(2): self.refer2 = True
self.condputs('\n')
return True
def macro_r2(self):
if self.is_white(2): self.refer2 = False
self.condputs('\n')
return True
def macro_de(self):
macro=True
self.condputs('\n')
return True
def macro_bl_vl(self):
if self.is_white(2): self.inlist = True
self.condputs('\n')
return True
def macro_bv(self):
if self.str_at(2) == 'L' and self.white(self.str_at(3)): self.inlist = True
self.condputs('\n')
return True
def macro_le(self):
if self.is_white(2): self.inlist = False
self.condputs('\n')
return True
def macro_lp_pp(self):
self.condputs('\n')
return True
def macro_ds(self):
self.skip_char(2)
self.skip_leading_whitespace()
if self.str_at(0):
# Split at whitespace
comps = self.s.split(None, 2)
if len(comps) is 2:
name, value = comps
value = value.rstrip()
self.reg_table[name] = value
self.condputs('\n')
return True
def macro_so_nx(self):
# We always ignore include directives
# deroff.c for some reason allowed this to fall through to the 'tr' case
# I think that was just a bug so I won't replicate it
return True
def macro_tr(self):
self.skip_char(2)
self.skip_leading_whitespace()
while self.s and self.str_at(0) != '\n':
c = self.str_at(0)
ns = self.str_at(1)
self.skip_char(2)
if not ns or ns == '\n': ns = ' '
self.tr_from += c
self.tr_to += ns
# Update our table, then swap in the slower tr-savvy condputs
try: #Python2
self.tr = string.maketrans(self.tr_from, self.tr_to)
except AttributeError: #Python3
self.tr = "".maketrans(self.tr_from, self.tr_to)
self.condputs = self.condputs_tr
return True
def macro_sp(self):
self.condputs('\n')
return True
def macro_other(self):
self.condputs('\n')
return True
def request_or_macro(self):
# s[0] is period or open single quote
self.skip_char()
s0 = self.s[1:2]
if s0 == '\\':
if self.str_at(1) == '"':
self.condputs('\n')
return True
else:
pass
elif s0 == '[':
self.refer = True
self.condputs('\n')
return True
elif s0 == ']':
self.refer = False
self.skip_char()
return self.text()
elif s0 == '.':
self.macro = False
self.condputs('\n')
return True
self.nobody = False
s0s1 = self.s[0:2]
macro_func = Deroffer.g_macro_dict.get(s0s1, Deroffer.macro_other)
if macro_func(self):
return True
if self.skipheaders and self.nobody: return True
self.skip_leading_whitespace()
while self.s and not self.is_white(0): self.skip_char()
self.skip_leading_whitespace()
while True:
if not self.quoted_arg() and not self.text_arg():
if self.s:
self.condputs(self.str_at(0))
self.skip_char()
else:
return True
def request_or_macro2(self):
self.skip_char()
s0 = self.s[0:1]
if s0 == '\\':
if self.str_at(1) == '"':
self.condputs('\n')
return True
else:
pass
elif s0 == '[':
self.refer = True
self.condputs('\n')
return True
elif s0 == ']':
self.refer = False
self.skip_char()
return self.text()
elif s0 == '.':
self.macro = False
self.condputs('\n')
return True
self.nobody = False
s0s1 = self.s[0:2]
if s0s1 == 'SH':
for header_str in [' SYNOPSIS', ' "SYNOPSIS', ' BERSICHT', ' "BERSICHT']:
if self.s[2:].startswith(header_str):
self.inheader = True
break
else:
# Did not find a header string
self.inheader = False
self.nobody = True
elif s0s1 in ['SS', 'IP', 'H ']:
self.nobody = True
elif s0s1 in ['I ', 'IR', 'IB', 'B ', 'BR', 'BI', 'R ', 'RB', 'RI', 'AB']:
pass
elif s0s1 in ['] ']:
self.refer = False
elif s0s1 in ['PS']:
if self.is_white(2): self.pic = True
self.condputs('\n')
return True
elif s0s1 in ['PE']:
if self.is_white(2): self.pic = False
self.condputs('\n')
return True
elif s0s1 in ['TS']:
if self.is_white(2): self.tbl, self.tblstate = True, self.OPTIONS
self.condputs('\n')
return True
elif s0s1 in ['T&']:
if self.is_white(2): self.tbl, self.tblstate = True, self.FORMAT
self.condputs('\n')
return True
elif s0s1 in ['TE']:
if self.is_white(2): self.tbl = False
self.condputs('\n')
return True
elif s0s1 in ['EQ']:
if self.is_white(2): self.eqn = True
self.condputs('\n')
return True
elif s0s1 in ['EN']:
if self.is_white(2): self.eqn = False
self.condputs('\n')
return True
elif s0s1 in ['R1']:
if self.is_white(2): self.refer2 = True
self.condputs('\n')
return True
elif s0s1 in ['R2']:
if self.is_white(2): self.refer2 = False
self.condputs('\n')
return True
elif s0s1 in ['de']:
macro=True
self.condputs('\n')
return True
elif s0s1 in ['BL', 'VL', 'AL', 'LB', 'RL', 'ML', 'DL']:
if self.is_white(2): self.inlist = True
self.condputs('\n')
return True
elif s0s1 in ['BV']:
if self.str_at(2) == 'L' and self.white(self.str_at(3)): self.inlist = True
self.condputs('\n')
return True
elif s0s1 in ['LE']:
if self.is_white(2): self.inlist = False
self.condputs('\n')
return True
elif s0s1 in ['LP', 'PP', 'P\n']:
self.condputs('\n')
return True
elif s0s1 in ['ds']:
self.skip_char(2)
self.skip_leading_whitespace()
if self.str_at(0):
# Split at whitespace
comps = self.s.split(None, 2)
if len(comps) is 2:
name, value = comps
value = value.rstrip()
self.reg_table[name] = value
self.condputs('\n')
return True
elif s0s1 in ['so', 'nx']:
# We always ignore include directives
# deroff.c for some reason allowed this to fall through to the 'tr' case
# I think that was just a bug so I won't replicate it
return True
elif s0s1 in ['tr']:
self.skip_char(2)
self.skip_leading_whitespace()
while self.s and self.str_at(0) != '\n':
c = self.str_at(0)
ns = self.str_at(1)
self.skip_char(2)
if not ns or ns == '\n': ns = ' '
self.tr_from += c
self.tr_to += ns
# Update our table, then swap in the slower tr-savvy condputs
try: #Python2
self.tr = string.maketrans(self.tr_from, self.tr_to)
except AttributeError: #Python3
self.tr = "".maketrans(self.tr_from, self.tr_to)
self.condputs = self.condputs_tr
return True
elif s0s1 in ['sp']:
self.condputs('\n')
return True
else:
self.condputs('\n')
return True
if self.skipheaders and self.nobody: return True
self.skip_leading_whitespace()
while self.s and not self.is_white(0): self.skip_char()
self.skip_leading_whitespace()
while True:
if not self.quoted_arg() and not self.text_arg():
if self.s:
self.condputs(self.str_at(0))
self.skip_char()
else:
return True
def do_tbl(self):
if self.tblstate == self.OPTIONS:
while self.s and self.str_at(0) != ';' and self.str_at(0) != '\n':
self.skip_leading_whitespace()
if not self.str_at(0).isalpha():
# deroff.c has a bug where it can loop forever here...we try to work around it
self.skip_char()
else: # Parse option
option = self.s
arg = ''
idx = 0
while option[idx:idx+1].isalpha():
idx += 1
if option[idx:idx+1] == '(':
option = option[:idx]
self.s = self.s[idx+1:]
arg = self.s
else:
self.s = ''
if arg:
idx = arg.find(')')
if idx != -1:
arg = arg[:idx]
self.s = self.s[idx+1:]
else:
#self.skip_char()
pass
if option.lower() == 'tab':
self.tblTab = arg[0:1]
self.tblstate = self.FORMAT
self.condputs('\n')
elif self.tblstate == self.FORMAT:
while self.s and self.str_at(0) != '.' and self.str_at(0) != '\n':
self.skip_leading_whitespace()
if self.str_at(0): self.skip_char()
if self.str_at(0) == '.': self.tblstate = self.DATA
self.condputs('\n')
elif self.tblstate == self.DATA:
if self.tblTab:
self.s = self.s.replace(self.tblTab, '\t')
self.text()
return True
def do_line(self):
if self.s[0:1] in ".'":
if not self.request_or_macro(): return False
elif self.tbl:
self.do_tbl()
else:
self.text()
return True
def deroff(self, str):
lines = str.split('\n')
for line in lines:
self.s = line + '\n'
if not self.do_line():
break
#self.putchar('\n')
def deroff_files(files):
for arg in files:
sys.stderr.write(arg + '\n')
if arg.endswith('.gz'):
f = gzip.open(arg, 'r')
str = f.read()
if IS_PY3: str = str.decode('latin-1')
else:
f = open(arg, 'r')
str = f.read()
d = Deroffer()
d.deroff(str)
d.flush_output(sys.stdout)
f.close()
if __name__ == "__main__":
import gzip
paths = sys.argv[1:]
if True:
deroff_files(paths)
else:
import cProfile, profile, pstats
profile.run('deroff_files(paths)', 'fooprof')
p = pstats.Stats('fooprof')
p.sort_stats('time').print_stats(100)
#p.sort_stats('calls').print_callers(.5, 'startswith')