#!/usr/bin/env python3 import sys import os import re import importlib import inspect import enum def PrintUsage(): print(""" USAGE: pydown.py prefix.md infile.py outfile.md """) return 1 def Fail(message): print('FATAL(pydown):', message) sys.exit(1) def LoadModule(inPythonFileName): dir = os.path.dirname(inPythonFileName) if not dir: dir = os.getcwd() sys.path.append(dir) modname = os.path.basename(inPythonFileName) if modname.endswith('.py'): modname = modname[:-3] # chop off '.py' module = importlib.import_module(modname) return module def HtmlEscape(text): text = text.replace('&', '&') text = text.replace('<', '<') text = text.replace('>', '>') return text def SymbolLink(name): # Special case: Search() and related functions have return type = "Time or `None`" m = re.match(r'^\s*([a-zA-Z0-9_]+)\s+or\s+`None`\s*$', name) if m: return SymbolLink(m.group(1)) + ' or `None`' if 'a' <= name[0] <= 'z': # Assume built-in Python identifier, so do not link return '`{0}`'.format(name) # Other links look like `StateVector[]`. We need to link to StateVector, but exclude the []. m = re.match(r'^\s*([a-zA-Z0-9_]+(\.[a-zA-Z0-9_]+)*)([^a-zA-Z0-9_\s]+)', name) if m: return SymbolLink(m.group(1)) + '`' + m.group(3) + '`' # [`astro_time_t`](#astro_time_t) return '[`{0}`](#{0})'.format(name) def FixText(s): # Expand "#Body" to "[`Body`](#Body)". # Tricky: also need to find "#GravitySimulator.Update", # but NOT "#GravitySimulator. Blah blah...". return re.sub(r'#([A-Z][A-Za-z0-9_]*(\.[A-Z][A-Za-z0-9_]*)*)', r'[`\1`](#\1)', s) class ParmInfo: def __init__(self, name, type): self.name = name self.type = type self.description = '' def AppendDescriptionLine(self, line): self.description += line.strip() + ' ' class DocInfo: def __init__(self, doc): self.description = '' self.parameters = [] self.attributes = [] self.enumValues = [] self.returnType = None self.returns = '' lines = doc.split('\n') # First line is boldfaced if followed by blank line. if len(lines) >= 2 and lines[0].strip() != '' and lines[1].strip() == '': self.summary = lines[0] lines = lines[2:] else: self.summary = '' currentAttr = None currentParm = None mode = '' for line in lines: if re.match(r'^\-+$', line): continue if line in ['Parameters', 'Returns', 'Example', 'Examples', 'Attributes', 'Values']: mode = line continue if line.strip() == '': if mode == 'code': self.description += '```\n' mode = '' continue if mode == 'Parameters': currentParm = self.ProcessParmAttrLine(line, currentParm, self.parameters) elif mode == 'Attributes': currentAttr = self.ProcessParmAttrLine(line, currentAttr, self.attributes) elif mode == 'Returns': if line.startswith(' '): self.returns += line.strip() + '\n' else: self.returnType = line.strip() elif mode == 'Example' or mode == 'Examples': pass elif mode == 'Values': self.ProcessEnumValue(line) elif mode == '': if re.match(r'^\s*>>>', line): mode = 'code' self.description += '```\n' self.description += line + '\n' elif mode == 'code': self.description += line + '\n' else: raise Exception('Unknown mode = "{}"'.format(mode)) if mode == 'code': self.description += '```\n' def ProcessEnumValue(self, line): m = re.match(r'^\s*([A-Za-z][A-Za-z0-9_]+)\s*:\s*(.*)$', line) if not m: raise Exception('Invalid enum documentation: "{}"'.format(line)) pair = (m.group(1), m.group(2).strip()) self.enumValues.append(pair) def ProcessParmAttrLine(self, line, item, itemlist): if line.startswith(' '): # The first line of description, or another line of description. item.AppendDescriptionLine(line) else: # name : type token = line.split(':') if len(token) != 2: raise Exception('Expected name:type but found: "{}"'.format(line)) item = ParmInfo(token[0].strip(), token[1].strip()) itemlist.append(item) return item def Table(self, itemlist, tag): md = '' if itemlist: md += '| Type | {} | Description |\n'.format(tag) md += '| --- | --- | --- |\n' for p in itemlist: if not p.type: raise Exception('Symbol "{}" has missing type declaration.'.format(p.name)) md += '| {} | {} | {} |\n'.format(SymbolLink(p.type), '`' + p.name + '`', FixText(p.description.strip())) md += '\n' return md def EnumTable(self): md = '' if self.enumValues: md += '| Value | Description |\n' md += '| --- | --- |\n' for (name, desc) in self.enumValues: md += '| {} | {} |\n'.format('`' + name + '`', desc) return md def Markdown(self): md = '\n' if self.summary: md += '**' + FixText(self.summary) + '**\n\n' if self.description: md += FixText(self.description) + '\n\n' md += self.Table(self.parameters, 'Parameter') md += self.Table(self.attributes, 'Attribute') md += self.EnumTable() if self.returns or self.returnType: md += '\n**Returns**' if self.returnType: md += ': ' + SymbolLink(self.returnType) md += '\n' md += self.returns + '\n' md += '\n' md += '\n' return md def VerifyEnum(self, members): defs = set(name for (name, _) in self.enumValues) if defs != members: print('Actual enums: [' + ', '.join(members) + ']') print('Documented enums: [' + ', '.join(defs) + ']') raise Exception('Documented enums do not match actual enums.') def MdSignature(sig): # Convert the type signature from inspect.signature() into a string. text = str(sig) # Now that we have type hints, we get inconsistent return values from # inspect.signature() for functions that return either a given type value or None. # On older Pythons we see "Optional[astronomy.Time]". # On newer Pythons we see "Union[astronomy.Time, NoneType]". # This causes unit test failures on GitHub Actions when I check in changes. # I prefer the older syntax because it's clearer what my intention is. text = re.sub(r'Union\[([A-Za-z_][A-Za-z_0-9\.]*),\s*NoneType\]', r'Optional[\1]', text) # Escape square brackets as in Optional[X] to Optional\[X\] for Markdown safety. text = text.replace('[', r'\[').replace(']', r'\]') # Convert 'astronomy.X' to a link to the type X. text = re.sub(r'\bastronomy\.([A-Za-z_][A-Za-z_0-9]*)', lambda m : SymbolLink(m.group(1)), text) # Replace quoted forward declarations 'X' with links to the type X. text = re.sub(r"'([A-Za-z_][A-Za-z_0-9]*)'", lambda m : SymbolLink(m.group(1)), text) # Escape characters as needed for Markdown/HTML. text = HtmlEscape(text) # Replace clumsy '->' symbols with HTML right-arrow character. text = text.replace('->', '→') return text def MdFunction(func, parent=None): md = '' doc = inspect.getdoc(func) if doc: if doc.startswith('Initialize self.'): # Special case: skip trivial constructors that have no documentation. return '' sig = inspect.signature(func) md += '\n' if parent: name = parent.__name__ + '.' + func.__name__ else: name = func.__name__ md += '---\n' md += '\n' md += '\n'.format(name) md += '### ' + name + MdSignature(sig) + '\n' info = DocInfo(doc) md += info.Markdown() md += '\n' else: Fail('No documentation for function ' + func.__name__) return md def MdClass(c): md = '' doc = inspect.getdoc(c) if doc: # Do not document the placeholder type `Any` if c.__name__ == 'Any': return '' md += '\n' md += '---\n' md += '\n' md += '\n'.format(c.__name__) md += '### class ' + c.__name__ + '\n' info = DocInfo(doc) md += info.Markdown() md += '\n' func_md = '' for name, obj in inspect.getmembers(c): if name == '__init__': func_md += MdFunction(obj, parent=c) for name, obj in inspect.getmembers(c): if not name.startswith('_'): func_md += MdFunction(obj, parent=c) if func_md: md += '#### member functions\n\n' + func_md else: Fail('No documentation for class ' + c.__name__) return md def MdEnumType(c): md = '' doc = inspect.getdoc(c) if doc: md += '\n' md += '---\n' md += '\n' md += '\n'.format(c.__name__) md += '### enum ' + c.__name__ + '\n' info = DocInfo(doc) info.VerifyEnum(set(c.__members__)) md += info.Markdown() md += '\n' else: Fail('No documentation for enumeration class ' + c.__name__) return md def MdErrType(c): md = '' doc = inspect.getdoc(c) if doc: md += '\n' md += '---\n' md += '\n' md += '\n'.format(c.__name__) md += '### ' + c.__name__ + '\n' info = DocInfo(doc) md += info.Markdown() md += '\n' else: Fail('No documentation for exception class ' + c.__name__) return md def Markdown(module, const_md, const_set): md = '' funclist = [] classlist = [] enumlist = [] errlist = [] for name, obj in inspect.getmembers(module): if not name.startswith('_'): if inspect.isfunction(obj): funclist.append(obj) elif inspect.isclass(obj): if issubclass(obj, enum.Enum): enumlist.append(obj) elif issubclass(obj, Exception): errlist.append(obj) else: classlist.append(obj) elif inspect.ismodule(obj): pass # ignore other modules pulled in else: # Assume this is a global constant. Fail if not documented # using my custom "#" documentation text. if name not in const_set: Fail('Undocumented symbol: ' + name) md += '---\n' md += '\n' md += '\n' md += '## Constants\n' md += 'The following numeric constants are exported by the `astronomy` module.\n' md += 'They may be of use for unit conversion.\n' md += 'Note: For the other supported programming languages, Astronomy Engine defines\n' md += 'helper constants `DEG2RAD` and `RAD2DEG` to convert between angular degrees and radians.\n' md += 'However, because Python defines the [angular conversion functions](https://docs.python.org/3/library/math.html#angular-conversion)\n' md += '`math.degrees()` and `math.radians()`, they are not needed in the Python version.\n' md += '\n' md += const_md md += '---\n' md += '\n' md += '\n' md += '## Classes\n' md += '\n' for c in classlist: md += MdClass(c) md += '---\n' md += '\n' md += '\n' md += '## Enumerated Types\n' md += '\n' for c in enumlist: md += MdEnumType(c) md += '---\n' md += '\n' md += '\n' md += '## Error Types\n' md += '\n' for c in errlist: md += MdErrType(c) md += '---\n' md += '\n' md += '\n' md += '## Functions\n' md += '\n' for func in funclist: md += MdFunction(func) # Remove extraneous blank lines. # We never need more than 2 consecutive newline characters. md = re.sub('\n{3,}', '\n\n', md) return md def ConstantsMd(inPythonFileName): documentedSymbolSet = set() md = '' clist = [] with open(inPythonFileName) as infile: for line in infile: # Consider symbols like Union defined in lines like the following: # "from typing import List, Optional, Union, Callable" tokens = line.split() if len(tokens) > 3 and tokens[0] == 'from' and tokens[2] == 'import': tokens = re.findall(r'[A-Za-z_][A-Za-z0-9_]*', line) if len(tokens) > 3 and tokens[0] == 'from' and tokens[2] == 'import': for name in tokens[3:]: documentedSymbolSet.add(name) continue # Consider symbols like KM_PER_AU defined in lines like the following: # "KM_PER_AU = 1.4959787069098932e+8 # The number of kilometers per astronomical unit." parts = line.split('#') if len(parts) == 2: code = parts[0].strip() doc = parts[1].strip() tokens = code.split() if len(tokens) >= 3 and tokens[1] == '=': # Reformat the code to remove extraneous spaces. codeText = ' '.join(tokens).strip() symbol = tokens[0] clist.append((symbol, codeText, doc)) documentedSymbolSet.add(symbol) continue for (symbol, code, doc) in sorted(clist): md += '\n---\n\n' md += '\n'.format(symbol) md += '### `{}`\n\n'.format(code) md += '**{}**\n\n'.format(doc) return md, documentedSymbolSet def main(): if len(sys.argv) != 4: return PrintUsage() prefixFileName = sys.argv[1] inPythonFileName = sys.argv[2] outMarkdownFileName = sys.argv[3] # Delete output file before we begin. # That way, if anything goes wrong, it won't exist, # and thus the error becomes conspicuous to scripts/tools. if os.access(outMarkdownFileName, os.F_OK): os.remove(outMarkdownFileName) # Load the prefix text. with open(prefixFileName, 'rt') as infile: prefix = infile.read() module = LoadModule(inPythonFileName) const_md, const_set = ConstantsMd(inPythonFileName) md = Markdown(module, const_md, const_set) with open(outMarkdownFileName, 'wt', encoding='utf-8') as outfile: outfile.write(prefix) outfile.write(md) return 0 if __name__ == '__main__': sys.exit(main())