Files
astronomy/generate/pydown/pydown.py
Don Cross d3a2596e4c Exclude Any type from Python documentation.
Now that we use Python type hints, I discovered that
Python 3.11.2 includes the placeholder type `Any`
in the metadata reflected by `importlib`.
In order to generate consistent documentation with
older versions of Python, I exclude `Any` if it is present.

This change affects only the documentation and has
no effect on the Astronomy Engine package itself.
2023-03-22 12:23:10 -04:00

447 lines
15 KiB
Python
Executable File

#!/usr/bin/env python3
import sys
import os
import re
import importlib
import inspect
import enum
def PrintUsage():
print("""
USAGE: pydown.py prefix.md infile.py outfile.md
""")
return 1
def Fail(message):
print('FATAL(pydown):', message)
sys.exit(1)
def LoadModule(inPythonFileName):
dir = os.path.dirname(inPythonFileName)
if not dir:
dir = os.getcwd()
sys.path.append(dir)
modname = os.path.basename(inPythonFileName)
if modname.endswith('.py'):
modname = modname[:-3] # chop off '.py'
module = importlib.import_module(modname)
return module
def HtmlEscape(text):
text = text.replace('&', '&')
text = text.replace('<', '&lt;')
text = text.replace('>', '&gt;')
return text
def SymbolLink(name):
# Special case: Search() and related functions have return type = "Time or `None`"
m = re.match(r'^\s*([a-zA-Z0-9_]+)\s+or\s+`None`\s*$', name)
if m:
return SymbolLink(m.group(1)) + ' or `None`'
if 'a' <= name[0] <= 'z':
# Assume built-in Python identifier, so do not link
return '`{0}`'.format(name)
# Other links look like `StateVector[]`. We need to link to StateVector, but exclude the [].
m = re.match(r'^\s*([a-zA-Z0-9_]+(\.[a-zA-Z0-9_]+)*)([^a-zA-Z0-9_\s]+)', name)
if m:
return SymbolLink(m.group(1)) + '`' + m.group(3) + '`'
# [`astro_time_t`](#astro_time_t)
return '[`{0}`](#{0})'.format(name)
def FixText(s):
# Expand "#Body" to "[`Body`](#Body)".
# Tricky: also need to find "#GravitySimulator.Update",
# but NOT "#GravitySimulator. Blah blah...".
return re.sub(r'#([A-Z][A-Za-z0-9_]*(\.[A-Z][A-Za-z0-9_]*)*)', r'[`\1`](#\1)', s)
class ParmInfo:
def __init__(self, name, type):
self.name = name
self.type = type
self.description = ''
def AppendDescriptionLine(self, line):
self.description += line.strip() + ' '
class DocInfo:
def __init__(self, doc):
self.description = ''
self.parameters = []
self.attributes = []
self.enumValues = []
self.returnType = None
self.returns = ''
lines = doc.split('\n')
# First line is boldfaced if followed by blank line.
if len(lines) >= 2 and lines[0].strip() != '' and lines[1].strip() == '':
self.summary = lines[0]
lines = lines[2:]
else:
self.summary = ''
currentAttr = None
currentParm = None
mode = ''
for line in lines:
if re.match(r'^\-+$', line):
continue
if line in ['Parameters', 'Returns', 'Example', 'Examples', 'Attributes', 'Values']:
mode = line
continue
if line.strip() == '':
if mode == 'code':
self.description += '```\n'
mode = ''
continue
if mode == 'Parameters':
currentParm = self.ProcessParmAttrLine(line, currentParm, self.parameters)
elif mode == 'Attributes':
currentAttr = self.ProcessParmAttrLine(line, currentAttr, self.attributes)
elif mode == 'Returns':
if line.startswith(' '):
self.returns += line.strip() + '\n'
else:
self.returnType = line.strip()
elif mode == 'Example' or mode == 'Examples':
pass
elif mode == 'Values':
self.ProcessEnumValue(line)
elif mode == '':
if re.match(r'^\s*>>>', line):
mode = 'code'
self.description += '```\n'
self.description += line + '\n'
elif mode == 'code':
self.description += line + '\n'
else:
raise Exception('Unknown mode = "{}"'.format(mode))
if mode == 'code':
self.description += '```\n'
def ProcessEnumValue(self, line):
m = re.match(r'^\s*([A-Za-z][A-Za-z0-9_]+)\s*:\s*(.*)$', line)
if not m:
raise Exception('Invalid enum documentation: "{}"'.format(line))
pair = (m.group(1), m.group(2).strip())
self.enumValues.append(pair)
def ProcessParmAttrLine(self, line, item, itemlist):
if line.startswith(' '):
# The first line of description, or another line of description.
item.AppendDescriptionLine(line)
else:
# name : type
token = line.split(':')
if len(token) != 2:
raise Exception('Expected name:type but found: "{}"'.format(line))
item = ParmInfo(token[0].strip(), token[1].strip())
itemlist.append(item)
return item
def Table(self, itemlist, tag):
md = ''
if itemlist:
md += '| Type | {} | Description |\n'.format(tag)
md += '| --- | --- | --- |\n'
for p in itemlist:
if not p.type:
raise Exception('Symbol "{}" has missing type declaration.'.format(p.name))
md += '| {} | {} | {} |\n'.format(SymbolLink(p.type), '`' + p.name + '`', FixText(p.description.strip()))
md += '\n'
return md
def EnumTable(self):
md = ''
if self.enumValues:
md += '| Value | Description |\n'
md += '| --- | --- |\n'
for (name, desc) in self.enumValues:
md += '| {} | {} |\n'.format('`' + name + '`', desc)
return md
def Markdown(self):
md = '\n'
if self.summary:
md += '**' + FixText(self.summary) + '**\n\n'
if self.description:
md += FixText(self.description) + '\n\n'
md += self.Table(self.parameters, 'Parameter')
md += self.Table(self.attributes, 'Attribute')
md += self.EnumTable()
if self.returns or self.returnType:
md += '\n**Returns**'
if self.returnType:
md += ': ' + SymbolLink(self.returnType)
md += '\n'
md += self.returns + '\n'
md += '\n'
md += '\n'
return md
def VerifyEnum(self, members):
defs = set(name for (name, _) in self.enumValues)
if defs != members:
print('Actual enums: [' + ', '.join(members) + ']')
print('Documented enums: [' + ', '.join(defs) + ']')
raise Exception('Documented enums do not match actual enums.')
def MdSignature(sig):
# Convert the type signature from inspect.signature() into a string.
text = str(sig)
# Now that we have type hints, we get inconsistent return values from
# inspect.signature() for functions that return either a given type value or None.
# On older Pythons we see "Optional[astronomy.Time]".
# On newer Pythons we see "Union[astronomy.Time, NoneType]".
# This causes unit test failures on GitHub Actions when I check in changes.
# I prefer the older syntax because it's clearer what my intention is.
text = re.sub(r'Union\[([A-Za-z_][A-Za-z_0-9\.]*),\s*NoneType\]', r'Optional[\1]', text)
# Escape square brackets as in Optional[X] to Optional\[X\] for Markdown safety.
text = text.replace('[', r'\[').replace(']', r'\]')
# Convert 'astronomy.X' to a link to the type X.
text = re.sub(r'\bastronomy\.([A-Za-z_][A-Za-z_0-9]*)', lambda m : SymbolLink(m.group(1)), text)
# Replace quoted forward declarations 'X' with links to the type X.
text = re.sub(r"'([A-Za-z_][A-Za-z_0-9]*)'", lambda m : SymbolLink(m.group(1)), text)
# Escape characters as needed for Markdown/HTML.
text = HtmlEscape(text)
# Replace clumsy '->' symbols with HTML right-arrow character.
text = text.replace('-&gt;', '&#8594;')
return text
def MdFunction(func, parent=None):
md = ''
doc = inspect.getdoc(func)
if doc:
if doc.startswith('Initialize self.'):
# Special case: skip trivial constructors that have no documentation.
return ''
sig = inspect.signature(func)
md += '\n'
if parent:
name = parent.__name__ + '.' + func.__name__
else:
name = func.__name__
md += '---\n'
md += '\n'
md += '<a name="{}"></a>\n'.format(name)
md += '### ' + name + MdSignature(sig) + '\n'
info = DocInfo(doc)
md += info.Markdown()
md += '\n'
else:
Fail('No documentation for function ' + func.__name__)
return md
def MdClass(c):
md = ''
doc = inspect.getdoc(c)
if doc:
# Do not document the placeholder type `Any`
if c.__name__ == 'Any':
return ''
md += '\n'
md += '---\n'
md += '\n'
md += '<a name="{}"></a>\n'.format(c.__name__)
md += '### class ' + c.__name__ + '\n'
info = DocInfo(doc)
md += info.Markdown()
md += '\n'
func_md = ''
for name, obj in inspect.getmembers(c):
if name == '__init__':
func_md += MdFunction(obj, parent=c)
for name, obj in inspect.getmembers(c):
if not name.startswith('_'):
func_md += MdFunction(obj, parent=c)
if func_md:
md += '#### member functions\n\n' + func_md
else:
Fail('No documentation for class ' + c.__name__)
return md
def MdEnumType(c):
md = ''
doc = inspect.getdoc(c)
if doc:
md += '\n'
md += '---\n'
md += '\n'
md += '<a name="{}"></a>\n'.format(c.__name__)
md += '### enum ' + c.__name__ + '\n'
info = DocInfo(doc)
info.VerifyEnum(set(c.__members__))
md += info.Markdown()
md += '\n'
else:
Fail('No documentation for enumeration class ' + c.__name__)
return md
def MdErrType(c):
md = ''
doc = inspect.getdoc(c)
if doc:
md += '\n'
md += '---\n'
md += '\n'
md += '<a name="{}"></a>\n'.format(c.__name__)
md += '### ' + c.__name__ + '\n'
info = DocInfo(doc)
md += info.Markdown()
md += '\n'
else:
Fail('No documentation for exception class ' + c.__name__)
return md
def Markdown(module, const_md, const_set):
md = ''
funclist = []
classlist = []
enumlist = []
errlist = []
for name, obj in inspect.getmembers(module):
if not name.startswith('_'):
if inspect.isfunction(obj):
funclist.append(obj)
elif inspect.isclass(obj):
if issubclass(obj, enum.Enum):
enumlist.append(obj)
elif issubclass(obj, Exception):
errlist.append(obj)
else:
classlist.append(obj)
elif inspect.ismodule(obj):
pass # ignore other modules pulled in
else:
# Assume this is a global constant. Fail if not documented
# using my custom "#<const>" documentation text.
if name not in const_set:
Fail('Undocumented symbol: ' + name)
md += '---\n'
md += '\n'
md += '<a name="constants"></a>\n'
md += '## Constants\n'
md += 'The following numeric constants are exported by the `astronomy` module.\n'
md += 'They may be of use for unit conversion.\n'
md += 'Note: For the other supported programming languages, Astronomy Engine defines\n'
md += 'helper constants `DEG2RAD` and `RAD2DEG` to convert between angular degrees and radians.\n'
md += 'However, because Python defines the [angular conversion functions](https://docs.python.org/3/library/math.html#angular-conversion)\n'
md += '`math.degrees()` and `math.radians()`, they are not needed in the Python version.\n'
md += '\n'
md += const_md
md += '---\n'
md += '\n'
md += '<a name="classes"></a>\n'
md += '## Classes\n'
md += '\n'
for c in classlist:
md += MdClass(c)
md += '---\n'
md += '\n'
md += '<a name="enumerations"></a>\n'
md += '## Enumerated Types\n'
md += '\n'
for c in enumlist:
md += MdEnumType(c)
md += '---\n'
md += '\n'
md += '<a name="errors"></a>\n'
md += '## Error Types\n'
md += '\n'
for c in errlist:
md += MdErrType(c)
md += '---\n'
md += '\n'
md += '<a name="functions"></a>\n'
md += '## Functions\n'
md += '\n'
for func in funclist:
md += MdFunction(func)
# Remove extraneous blank lines.
# We never need more than 2 consecutive newline characters.
md = re.sub('\n{3,}', '\n\n', md)
return md
def ConstantsMd(inPythonFileName):
documentedSymbolSet = set()
md = ''
clist = []
with open(inPythonFileName) as infile:
for line in infile:
# Consider symbols like Union defined in lines like the following:
# "from typing import List, Optional, Union, Callable"
tokens = line.split()
if len(tokens) > 3 and tokens[0] == 'from' and tokens[2] == 'import':
tokens = re.findall(r'[A-Za-z_][A-Za-z0-9_]*', line)
if len(tokens) > 3 and tokens[0] == 'from' and tokens[2] == 'import':
for name in tokens[3:]:
documentedSymbolSet.add(name)
continue
# Consider symbols like KM_PER_AU defined in lines like the following:
# "KM_PER_AU = 1.4959787069098932e+8 #<const> The number of kilometers per astronomical unit."
parts = line.split('#<const>')
if len(parts) == 2:
code = parts[0].strip()
doc = parts[1].strip()
tokens = code.split()
if len(tokens) >= 3 and tokens[1] == '=':
# Reformat the code to remove extraneous spaces.
codeText = ' '.join(tokens).strip()
symbol = tokens[0]
clist.append((symbol, codeText, doc))
documentedSymbolSet.add(symbol)
continue
for (symbol, code, doc) in sorted(clist):
md += '\n---\n\n'
md += '<a name="{}"></a>\n'.format(symbol)
md += '### `{}`\n\n'.format(code)
md += '**{}**\n\n'.format(doc)
return md, documentedSymbolSet
def main():
if len(sys.argv) != 4:
return PrintUsage()
prefixFileName = sys.argv[1]
inPythonFileName = sys.argv[2]
outMarkdownFileName = sys.argv[3]
# Delete output file before we begin.
# That way, if anything goes wrong, it won't exist,
# and thus the error becomes conspicuous to scripts/tools.
if os.access(outMarkdownFileName, os.F_OK):
os.remove(outMarkdownFileName)
# Load the prefix text.
with open(prefixFileName, 'rt') as infile:
prefix = infile.read()
module = LoadModule(inPythonFileName)
const_md, const_set = ConstantsMd(inPythonFileName)
md = Markdown(module, const_md, const_set)
with open(outMarkdownFileName, 'wt', encoding='utf-8') as outfile:
outfile.write(prefix)
outfile.write(md)
return 0
if __name__ == '__main__':
sys.exit(main())