Merge pull request #3218 from floam/manparser

Minor manpage completion utility improvements
This commit is contained in:
Aaron Gyes 2016-07-10 10:32:45 -07:00
commit 013506dfd5
2 changed files with 26 additions and 49 deletions

View file

@ -155,12 +155,12 @@ def built_command(options, description):
if not fish_options: return if not fish_options: return
# Here's what we'll use to truncate if necessary # Here's what we'll use to truncate if necessary
max_description_width = 63 max_description_width = 78
if IS_PY3: if IS_PY3:
truncation_suffix = ' [See Man Page]' truncation_suffix = ''
else: else:
ELLIPSIS_CODE_POINT = 0x2026 ELLIPSIS_CODE_POINT = 0x2026
truncation_suffix = unichr(ELLIPSIS_CODE_POINT) + unicode(' [See Man Page]') truncation_suffix = unichr(ELLIPSIS_CODE_POINT)
# Try to include as many whole sentences as will fit # Try to include as many whole sentences as will fit
# Clean up some probably bogus escapes in the process # Clean up some probably bogus escapes in the process
@ -221,16 +221,13 @@ def remove_groff_formatting(data):
data = data.replace("\f","") data = data.replace("\f","")
return data return data
class ManParser: class ManParser(object):
def is_my_type(self, manpage): def is_my_type(self, manpage):
return False return False
def parse_man_page(self, manpage): def parse_man_page(self, manpage):
return False return False
def name(self):
return "no-name"
class Type1ManParser(ManParser): class Type1ManParser(ManParser):
def is_my_type(self, manpage): def is_my_type(self, manpage):
# print manpage # print manpage
@ -250,7 +247,7 @@ class Type1ManParser(ManParser):
options_parts_regex = re.compile("\.PP(.*?)\.RE", re.DOTALL) options_parts_regex = re.compile("\.PP(.*?)\.RE", re.DOTALL)
options_matched = re.search(options_parts_regex, options_section) options_matched = re.search(options_parts_regex, options_section)
# print options_matched # print options_matched
add_diagnostic('Command is ' + CMDNAME) add_diagnostic("Command is %r" % CMDNAME)
if options_matched == None: if options_matched == None:
add_diagnostic('Unable to find options') add_diagnostic('Unable to find options')
@ -272,7 +269,7 @@ class Type1ManParser(ManParser):
optionName = data[0].strip() optionName = data[0].strip()
if ( optionName.find("-") == -1): if ( optionName.find("-") == -1):
add_diagnostic(optionName + " doesn't contain - ") add_diagnostic("%r doesn't contain '-' " % optionName)
else: else:
optionName = unquote_double_quotes(optionName) optionName = unquote_double_quotes(optionName)
optionName = unquote_single_quotes(optionName) optionName = unquote_single_quotes(optionName)
@ -287,7 +284,7 @@ class Type1ManParser(ManParser):
options_matched = re.search(options_parts_regex, options_section) options_matched = re.search(options_parts_regex, options_section)
def fallback(self, options_section): def fallback(self, options_section):
add_diagnostic('Falling Back') add_diagnostic('Trying fallback')
options_parts_regex = re.compile("\.TP( \d+)?(.*?)\.TP", re.DOTALL) options_parts_regex = re.compile("\.TP( \d+)?(.*?)\.TP", re.DOTALL)
options_matched = re.search(options_parts_regex, options_section) options_matched = re.search(options_parts_regex, options_section)
if options_matched == None: if options_matched == None:
@ -301,7 +298,7 @@ class Type1ManParser(ManParser):
if (len(data)>1 and len(data[1].strip())>0): # and len(data[1])<400): if (len(data)>1 and len(data[1].strip())>0): # and len(data[1])<400):
optionName = data[0].strip() optionName = data[0].strip()
if ( optionName.find("-") == -1): if ( optionName.find("-") == -1):
add_diagnostic(optionName + "doesn't contains -") add_diagnostic("%r doesn't contain '-'" % optionName)
else: else:
optionName = unquote_double_quotes(optionName) optionName = unquote_double_quotes(optionName)
optionName = unquote_single_quotes(optionName) optionName = unquote_single_quotes(optionName)
@ -316,7 +313,7 @@ class Type1ManParser(ManParser):
return True return True
def fallback2(self, options_section): def fallback2(self, options_section):
add_diagnostic('Falling Back2') add_diagnostic('Trying last chance fallback')
ix_remover_regex = re.compile("\.IX.*") ix_remover_regex = re.compile("\.IX.*")
trailing_num_regex = re.compile('\\d+$') trailing_num_regex = re.compile('\\d+$')
options_parts_regex = re.compile("\.IP (.*?)\.IP", re.DOTALL) options_parts_regex = re.compile("\.IP (.*?)\.IP", re.DOTALL)
@ -324,7 +321,7 @@ class Type1ManParser(ManParser):
options_section = re.sub(ix_remover_regex, "", options_section) options_section = re.sub(ix_remover_regex, "", options_section)
options_matched = re.search(options_parts_regex, options_section) options_matched = re.search(options_parts_regex, options_section)
if options_matched == None: if options_matched == None:
add_diagnostic('Still not found2') add_diagnostic('Still (still!) not found')
return False return False
while options_matched != None: while options_matched != None:
data = options_matched.group(1) data = options_matched.group(1)
@ -336,7 +333,7 @@ class Type1ManParser(ManParser):
optionName = re.sub(trailing_num_regex, "", data[0].strip()) optionName = re.sub(trailing_num_regex, "", data[0].strip())
if ('-' not in optionName): if ('-' not in optionName):
add_diagnostic(optionName + " doesn't contain -") add_diagnostic("%r doesn't contain '-'" % optionName)
else: else:
optionName = optionName.strip() optionName = optionName.strip()
optionName = unquote_double_quotes(optionName) optionName = unquote_double_quotes(optionName)
@ -351,10 +348,6 @@ class Type1ManParser(ManParser):
options_matched = re.search(options_parts_regex, options_section) options_matched = re.search(options_parts_regex, options_section)
return True return True
def name(self):
return "Type1"
class Type2ManParser(ManParser): class Type2ManParser(ManParser):
def is_my_type(self, manpage): def is_my_type(self, manpage):
options_section_matched = compile_and_search("\.SH OPTIONS(.*?)", manpage) options_section_matched = compile_and_search("\.SH OPTIONS(.*?)", manpage)
@ -370,12 +363,12 @@ class Type2ManParser(ManParser):
options_section = options_section_matched.group(1) options_section = options_section_matched.group(1)
options_parts_regex = re.compile("\.[I|T]P( \d+(\.\d)?i?)?(.*?)\.[I|T]P", re.DOTALL) options_parts_regex = re.compile("\.[I|T]P( \d+(\.\d)?i?)?(.*?)\.([I|T]P|UNINDENT)", re.DOTALL)
options_matched = re.search(options_parts_regex, options_section) options_matched = re.search(options_parts_regex, options_section)
add_diagnostic('Command is ' + CMDNAME) add_diagnostic('Command is %r' % CMDNAME)
if options_matched == None: if options_matched == None:
add_diagnostic(self.name() + ': Unable to find options') add_diagnostic("%r: Unable to find options" % self)
return False return False
while (options_matched != None): while (options_matched != None):
@ -389,7 +382,7 @@ class Type2ManParser(ManParser):
if (len(data)>1 and len(data[1].strip())>0): # and len(data[1])<400): if (len(data)>1 and len(data[1].strip())>0): # and len(data[1])<400):
optionName = data[0].strip() optionName = data[0].strip()
if '-' not in optionName: if '-' not in optionName:
add_diagnostic(optionName + " doesn't contain -") add_diagnostic("%r doesn't contain '-'" % optionName)
else: else:
optionName = unquote_double_quotes(optionName) optionName = unquote_double_quotes(optionName)
optionName = unquote_single_quotes(optionName) optionName = unquote_single_quotes(optionName)
@ -401,9 +394,6 @@ class Type2ManParser(ManParser):
options_section = options_section[options_matched.end()-3:] options_section = options_section[options_matched.end()-3:]
options_matched = re.search(options_parts_regex, options_section) options_matched = re.search(options_parts_regex, options_section)
def name(self):
return "Type2"
class Type3ManParser(ManParser): class Type3ManParser(ManParser):
def is_my_type(self, manpage): def is_my_type(self, manpage):
@ -421,7 +411,7 @@ class Type3ManParser(ManParser):
options_section = options_section_matched.group(1) options_section = options_section_matched.group(1)
options_parts_regex = re.compile("\.TP(.*?)\.TP", re.DOTALL) options_parts_regex = re.compile("\.TP(.*?)\.TP", re.DOTALL)
options_matched = re.search(options_parts_regex, options_section) options_matched = re.search(options_parts_regex, options_section)
add_diagnostic('Command is ' + CMDNAME) add_diagnostic("Command is %r" % CMDNAME)
if options_matched == None: if options_matched == None:
add_diagnostic('Unable to find options section') add_diagnostic('Unable to find options section')
@ -437,7 +427,7 @@ class Type3ManParser(ManParser):
if (len(data)>1): # and len(data[1])<400): if (len(data)>1): # and len(data[1])<400):
optionName = data[0].strip() optionName = data[0].strip()
if ( optionName.find("-") == -1): if ( optionName.find("-") == -1):
add_diagnostic(optionName + "doesn't contain -") add_diagnostic("%r doesn't contain '-'" % optionName)
else: else:
optionName = unquote_double_quotes(optionName) optionName = unquote_double_quotes(optionName)
optionName = unquote_single_quotes(optionName) optionName = unquote_single_quotes(optionName)
@ -452,10 +442,6 @@ class Type3ManParser(ManParser):
options_matched = re.search(options_parts_regex, options_section) options_matched = re.search(options_parts_regex, options_section)
def name(self):
return "Type3"
class Type4ManParser(ManParser): class Type4ManParser(ManParser):
def is_my_type(self, manpage): def is_my_type(self, manpage):
options_section_matched = compile_and_search("\.SH FUNCTION LETTERS(.*?)", manpage) options_section_matched = compile_and_search("\.SH FUNCTION LETTERS(.*?)", manpage)
@ -472,7 +458,7 @@ class Type4ManParser(ManParser):
options_section = options_section_matched.group(1) options_section = options_section_matched.group(1)
options_parts_regex = re.compile("\.TP(.*?)\.TP", re.DOTALL) options_parts_regex = re.compile("\.TP(.*?)\.TP", re.DOTALL)
options_matched = re.search(options_parts_regex, options_section) options_matched = re.search(options_parts_regex, options_section)
add_diagnostic('Command is ' + CMDNAME) add_diagnostic("Command is %r" % CMDNAME)
if options_matched == None: if options_matched == None:
print >> sys.stderr, "Unable to find options section" print >> sys.stderr, "Unable to find options section"
@ -488,7 +474,7 @@ class Type4ManParser(ManParser):
if (len(data)>1): # and len(data[1])<400): if (len(data)>1): # and len(data[1])<400):
optionName = data[0].strip() optionName = data[0].strip()
if ( optionName.find("-") == -1): if ( optionName.find("-") == -1):
add_diagnostic(optionName + " doesn't contain - ") add_diagnostic("%r doesn't contain '-' " % optionName)
else: else:
optionName = unquote_double_quotes(optionName) optionName = unquote_double_quotes(optionName)
optionName = unquote_single_quotes(optionName) optionName = unquote_single_quotes(optionName)
@ -504,9 +490,6 @@ class Type4ManParser(ManParser):
return True return True
def name(self):
return "Type4"
class TypeDarwinManParser(ManParser): class TypeDarwinManParser(ManParser):
def is_my_type(self, manpage): def is_my_type(self, manpage):
options_section_matched = compile_and_search("\.S[hH] DESCRIPTION", manpage) options_section_matched = compile_and_search("\.S[hH] DESCRIPTION", manpage)
@ -602,9 +585,6 @@ class TypeDarwinManParser(ManParser):
return got_something return got_something
def name(self):
return "Darwin man parser"
class TypeDeroffManParser(ManParser): class TypeDeroffManParser(ManParser):
def is_my_type(self, manpage): def is_my_type(self, manpage):
@ -657,9 +637,6 @@ class TypeDeroffManParser(ManParser):
return got_something return got_something
def name(self):
return "Deroffing man parser"
# Return whether the file at the given path is overwritable # Return whether the file at the given path is overwritable
# Raises IOError if it cannot be opened # Raises IOError if it cannot be opened
def file_is_overwritable(path): def file_is_overwritable(path):
@ -771,8 +748,7 @@ def parse_manpage_at_path(manpage_path, output_directory):
add_diagnostic(manpage_path + ": Not supported") add_diagnostic(manpage_path + ": Not supported")
else: else:
for parser in parsersToTry: for parser in parsersToTry:
parser_name = parser.name() add_diagnostic('Trying %s' % parser.__class__.__name__)
add_diagnostic('Trying parser ' + parser_name)
diagnostic_indent += 1 diagnostic_indent += 1
success = parser.parse_man_page(manpage) success = parser.parse_man_page(manpage)
diagnostic_indent -= 1 diagnostic_indent -= 1
@ -780,7 +756,7 @@ def parse_manpage_at_path(manpage_path, output_directory):
if not built_command_output: if not built_command_output:
success = False success = False
if success: if success:
PARSER_INFO.setdefault(parser_name, []).append(CMDNAME) PARSER_INFO.setdefault(parser.__class__.__name__, []).append(CMDNAME)
break break
if success: if success:
@ -798,7 +774,8 @@ def parse_manpage_at_path(manpage_path, output_directory):
# Output the magic word Autogenerated so we can tell if we can overwrite this # Output the magic word Autogenerated so we can tell if we can overwrite this
built_command_output.insert(1, "# Autogenerated from man page " + manpage_path) built_command_output.insert(1, "# Autogenerated from man page " + manpage_path)
built_command_output.insert(2, "# using " + parser_name) # built_command_output.insert(2, "# using " + parser.__class__.__name__) # XXX MISATTRIBUTES THE CULPABILE PARSER! Was really using Type2 but reporting TypeDeroffManParser
for line in built_command_output: for line in built_command_output:
output_file.write(line) output_file.write(line)
output_file.write('\n') output_file.write('\n')
@ -807,7 +784,7 @@ def parse_manpage_at_path(manpage_path, output_directory):
if output_file != sys.stdout: if output_file != sys.stdout:
output_file.close() output_file.close()
else: else:
parser_names = ', '.join(p.name() for p in parsersToTry) parser_names = ', '.join(p.__class__.__name__ for p in parsersToTry)
#add_diagnostic('%s contains no options or is unparsable' % manpage_path, BRIEF_VERBOSE) #add_diagnostic('%s contains no options or is unparsable' % manpage_path, BRIEF_VERBOSE)
add_diagnostic('%s contains no options or is unparsable (tried parser %s)' % (manpage_path, parser_names), BRIEF_VERBOSE) add_diagnostic('%s contains no options or is unparsable (tried parser %s)' % (manpage_path, parser_names), BRIEF_VERBOSE)

View file

@ -94,8 +94,8 @@ class Deroffer:
'en': '-', 'en': '-',
'hy': '-', 'hy': '-',
'em': "--", 'em': "--",
'lq': "\"", # PCA: This used to be left and right smart quotes, but they look dumb 'lq': '',
'rq': "\"", # So just use ordinary double quotes 'rq': '',
'Bq': ",,", 'Bq': ",,",
'oq': '`', 'oq': '`',
'cq': '\'', 'cq': '\'',