More man processing improvements

- Support the commonmark library in addition to cmarkgfm.
- Remove github-flavor from the markup.
- A few more html style improvements.
This commit is contained in:
Wayne Davison
2020-06-09 15:21:38 -07:00
parent 68c865c9e6
commit 03fc62ad2f
5 changed files with 82 additions and 49 deletions

View File

@@ -126,6 +126,10 @@ Protocol: 31 (unchanged)
- Silenced some annoying warnings about major()|minor() due to the autoconf
include-file check not being smart enough.
- Converted the man pages from yodl to markdown. They are now processed via a
simple python3 script using the cmarkgfm OR commonmark library. This should
make it easier for packaging rsync, since yodl has gotten obscure.
- Improved some configure checks to work better with strict C99 compilers.
- The `--debug=FOO` options are no longer auto-forwarded to the server side,

84
md2man
View File

@@ -1,15 +1,15 @@
#!/usr/bin/python3
# This script takes a manpage written in github-flavored markdown and turns it
# into a html web page and a nroff man page. The input file must have the name
# of the program and the section in the format: NAME.NUM.md. The output files
# are written into the current directory named NAME.NUM.html and NAME.NUM. The
# input format has one extra extension: if a numbered list starts at 0, it is
# turned into a description list. The dl's dt tag is taken from the contents of
# the first tag inside the li, which is usually a p tag or a code tag. The
# cmarkgfm lib is used to transforms the input file into html. The html.parser
# is used as a state machine that both tweaks the html and outputs the nroff
# data based on the html tags.
# This script takes a manpage written in markdown and turns it into an html web
# page and a nroff man page. The input file must have the name of the program
# and the section in this format: NAME.NUM.md. The output files are written
# into the current directory named NAME.NUM.html and NAME.NUM. The input
# format has one extra extension: if a numbered list starts at 0, it is turned
# into a description list. The dl's dt tag is taken from the contents of the
# first tag inside the li, which is usually a p, code, or strong tag. The
# cmarkgfm or commonmark lib is used to transforms the input file into html.
# The html.parser is used as a state machine that both tweaks the html and
# outputs the nroff data based on the html tags.
#
# Copyright (C) 2020 Wayne Davison
#
@@ -23,16 +23,26 @@ CONSUMES_TXT = set('h1 h2 p li pre'.split())
HTML_START = """\
<html><head>
<title>%s</title>
<link href="https://fonts.googleapis.com/css2?family=Roboto&display=swap" rel="stylesheet">
<link href="https://fonts.googleapis.com/css2?family=Roboto&family=Roboto+Mono&display=swap" rel="stylesheet">
<style>
body {
max-width: 40em;
max-width: 50em;
margin: auto;
font-size: 1.2em;
}
body, b, strong, u {
font-family: 'Roboto', sans-serif;
}
code {
font-family: 'Roboto Mono', monospace;
font-weight: bold;
}
pre code {
display: block;
font-weight: normal;
}
blockquote pre code {
background: #eee;
background: #f1f1f1;
}
dd p:first-of-type {
margin-block-start: 0em;
@@ -47,7 +57,7 @@ HTML_END = """\
"""
MAN_START = r"""
.TH "%s" "%s" "%s" "" ""
.TH "%s" "%s" "%s" "%s" "User Commands"
""".lstrip()
MAN_END = """\
@@ -57,6 +67,8 @@ NORM_FONT = ('\1', r"\fP")
BOLD_FONT = ('\2', r"\fB")
ULIN_FONT = ('\3', r"\fI")
md_parser = None
def main():
fi = re.match(r'^(?P<fn>(?P<srcdir>.+/)?(?P<name>(?P<prog>[^/]+)\.(?P<sect>\d+))\.md)$', args.mdfile)
if not fi:
@@ -93,10 +105,14 @@ def main():
break
with open(fi.fn, 'r', encoding='utf-8') as fh:
txt = re.sub(r'@VERSION@', env_subs['VERSION'], fh.read())
txt = re.sub(r'@LIBDIR@', env_subs['libdir'], txt)
fi.html_in = cmarkgfm.github_flavored_markdown_to_html(txt)
txt = None
txt = fh.read()
txt = re.sub(r'@VERSION@', env_subs['VERSION'], txt)
txt = re.sub(r'@LIBDIR@', env_subs['libdir'], txt)
fi.html_in = md_parser(txt)
txt = None
fi.man_headings = (fi.prog, fi.sect, fi.date, fi.prog + ' ' + env_subs['VERSION'])
HtmlToManPage(fi)
@@ -109,6 +125,13 @@ def main():
with open(fn, 'w', encoding='utf-8') as fh:
fh.write(txt)
def html_via_cmarkgfm(txt):
return cmarkgfm.markdown_to_html(txt)
def html_via_commonmark(txt):
return commonmark.HtmlRenderer().render(commonmark.Parser().parse(txt))
class HtmlToManPage(HTMLParser):
def __init__(self, fi):
@@ -122,7 +145,7 @@ class HtmlToManPage(HTMLParser):
dt_from = None,
in_pre = False,
html_out = [ HTML_START % fi.title ],
man_out = [ MAN_START % (fi.prog, fi.sect, fi.date) ],
man_out = [ MAN_START % fi.man_headings ],
txt = '',
)
@@ -171,9 +194,10 @@ class HtmlToManPage(HTMLParser):
st.man_out.append(st.p_macro + ".nf\n")
elif tag == 'code' and not st.in_pre:
st.txt += BOLD_FONT[0]
elif tag == 'strong' or tag == 'bold':
elif tag == 'strong' or tag == 'b':
st.txt += BOLD_FONT[0]
elif tag == 'i' or tag == 'em':
elif tag == 'em' or tag == 'i':
tag = 'u' # Change it into underline to be more like the man page
st.txt += ULIN_FONT[0]
elif tag == 'ol':
start = 1
@@ -236,11 +260,12 @@ class HtmlToManPage(HTMLParser):
st.in_pre = False
st.man_out.append(manify(txt) + "\n.fi\n")
elif tag == 'code' and not st.in_pre:
add_to_txt = NORM_FONT[0]
elif tag == 'strong' or tag == 'bold':
add_to_txt = NORM_FONT[0]
elif tag == 'i' or tag == 'em':
add_to_txt = NORM_FONT[0]
add_to_txt = NORM_FONT[0]
elif tag == 'strong' or tag == 'b':
add_to_txt = NORM_FONT[0]
elif tag == 'em' or tag == 'i':
tag = 'u' # Change it into underline to be more like the man page
add_to_txt = NORM_FONT[0]
elif tag == 'ol' or tag == 'ul':
if st.list_state.pop() == 'dl':
tag = 'dl'
@@ -315,7 +340,12 @@ if __name__ == '__main__':
try:
import cmarkgfm
md_parser = html_via_cmarkgfm
except:
die("The cmarkgfm library is not available for python3.")
try:
import commonmark
md_parser = html_via_commonmark
except:
die("Failed to find cmarkgfm or commonmark for python3.")
main()

View File

@@ -68,7 +68,7 @@ The ssl helper scripts are affected by the following environment variables:
# BUGS
Please report bugs! See the web site at http://rsync.samba.org/
Please report bugs! See the web site at <http://rsync.samba.org/>.
# VERSION
@@ -79,7 +79,7 @@ This man page is current for version @VERSION@ of rsync.
rsync is distributed under the GNU General Public License. See the file
COPYING for details.
A WEB site is available at http://rsync.samba.org/ . The site includes an
A web site is available at <http://rsync.samba.org/>. The site includes an
FAQ-O-Matic which may cover questions unanswered by this manual page.
# AUTHOR
@@ -87,4 +87,4 @@ FAQ-O-Matic which may cover questions unanswered by this manual page.
This manpage was written by Wayne Davison.
Mailing lists for support and development are available at
http://lists.samba.org .
<http://lists.samba.org/>.

View File

@@ -3854,7 +3854,7 @@ values
see also the comments on the `--delete` option
Please report bugs! See the web site at http://rsync.samba.org/
Please report bugs! See the web site at <http://rsync.samba.org/>.
# VERSION
@@ -3874,17 +3874,16 @@ that can be used with a restricted ssh login.
rsync is distributed under the GNU General Public License. See the file
COPYING for details.
A WEB site is available at http://rsync.samba.org/ . The
site includes an FAQ-O-Matic which may cover questions unanswered by this
manual page.
A web site is available at <http://rsync.samba.org/>. The site includes an
FAQ-O-Matic which may cover questions unanswered by this manual page.
The primary ftp site for rsync is ftp://rsync.samba.org/pub/rsync .
The primary ftp site for rsync is <ftp://rsync.samba.org/pub/rsync>
We would be delighted to hear from you if you like this program.
Please contact the mailing-list at rsync@lists.samba.org.
We would be delighted to hear from you if you like this program. Please
contact the mailing-list at <rsync@lists.samba.org>.
This program uses the excellent zlib compression library written by
Jean-loup Gailly and Mark Adler.
This program uses the excellent zlib compression library written by Jean-loup
Gailly and Mark Adler.
# THANKS
@@ -3892,14 +3891,14 @@ Special thanks go out to: John Van Essen, Matt McCutchen, Wesley W. Terpstra,
David Dykstra, Jos Backus, Sebastian Krahmer, Martin Pool, and our
gone-but-not-forgotten compadre, J.W. Schultz.
Thanks also to Richard Brent, Brendan Mackay, Bill Waite, Stephen Rothwell
and David Bell. I've probably missed some people, my apologies if I have.
Thanks also to Richard Brent, Brendan Mackay, Bill Waite, Stephen Rothwell and
David Bell. I've probably missed some people, my apologies if I have.
# AUTHOR
rsync was originally written by Andrew Tridgell and Paul Mackerras.
Many people have later contributed to it. It is currently maintained
by Wayne Davison.
rsync was originally written by Andrew Tridgell and Paul Mackerras. Many
people have later contributed to it. It is currently maintained by Wayne
Davison.
Mailing lists for support and development are available at
http://lists.samba.org .
<http://lists.samba.org/>.

View File

@@ -1092,7 +1092,7 @@ The /etc/rsyncd.secrets file would look something like this:
# BUGS
Please report bugs! The rsync bug tracking system is online at
http://rsync.samba.org/ .
<http://rsync.samba.org/>.
# VERSION
@@ -1103,9 +1103,9 @@ This man page is current for version @VERSION@ of rsync.
rsync is distributed under the GNU General Public License. See the file
COPYING for details.
The primary ftp site for rsync is ftp://rsync.samba.org/pub/rsync .
The primary ftp site for rsync is <ftp://rsync.samba.org/pub/rsync>
A WEB site is available at http://rsync.samba.org/ .
A web site is available at <http://rsync.samba.org/>.
We would be delighted to hear from you if you like this program.
@@ -1123,4 +1123,4 @@ rsync was written by Andrew Tridgell and Paul Mackerras. Many people have
later contributed to it.
Mailing lists for support and development are available at
http://lists.samba.org .
<http://lists.samba.org/>.