py2html Module (py2html.py)


#!/usr/local/bin/python

"""Pretty-print Python code to colorized, hyperlinked html.

In python, do:
    py2html.convert_files(['file1.py', 'file2.py', ...])
From the shell, do:
    python py2html.py *.py"""

import re, string, time

id = r'[a-zA-Z_][a-zA-Z_0-9]*' ## RE for a Python identifier
g1, g2, g3, g4 = r'\1 \2 \3 \4'.split() ## groups for re.matches
def b(text): return '<b>%s</b>' % text
def i(text): return '<i>%s</i>' % text
def color(rgb, text): return '<font color="%s">%s</font>' % (rgb, text)
def link(url, anchor): return '<a href="%s">%s</a>' % (url, anchor)
def hilite(text, bg="ffff00"):
    return '<b style="background-color:%s">%s</b>' % (bg,text)

def modulelink(module, baseurl=''):
    """Hyperlink to a module, either locally or on python.org"""
    if module+'.py' not in local_files:
        baseurl = 'http://www.python.org/doc/current/lib/module-'
    return link(baseurl+module+'.html', module)

def importer(m):
    "Turn text such as 'utils, math, re' into a string of HTML links."
    modules = [modulelink(mod.strip()) for mod in m.group(2).split(',')]
    return (m.group(1) + ', '.join(modules) + m.group(3))

def find1(regex, str):
    return (re.findall(regex, str) or ['&nbsp;'])[0]

def convert_files(filenames, local_filenames=None, tblfile='python.html'):
    "Convert files of python code to colorized HTML."
    global local_files
    local_files = local_filenames or filenames
    summary_table = {}
    for f in filenames:
        fulltext = '\n'.join(map(string.rstrip, open(f).readlines()))
        text = fulltext
        for (pattern, repl) in replacements:
            text = re.sub(pattern, repl, text)
        text = '%s<pre>%s</pre>%s' % (header(f), text, footer(f))
        open(f[:-3]+'.html', 'w').write(text)
        if tblfile:
            ch = find1(r'Chapters?\s+([^ \)"]*)', fulltext)
            module = f.replace('.py','')
            lines = fulltext.count('\n')
            desc = find1(r'"""(.*)\n', fulltext).replace('"""', '')
            summary_table.setdefault(ch,[]).append((module, lines, desc))
    if tblfile:
        totallines = 0
        tbl = ["<!-- table -->",
               "<tr><th>Chapter<th>Module<th>Lines<th>Description"]
        fmt = "<tr><td align=right>%s<td>%s<td align=right>%s<td>%s"
        items = summary_table.items(); items.sort(num_cmp)
        for (ch, entries) in items:
            for (module, lines, desc) in entries:
                totallines += lines
                tbl += [fmt % (ch, link(module+'.html', module), lines, desc)]
        tbl += [fmt % ('', '', totallines, ''), "</table>"]
        old = open(tblfile).read()
        new = re.sub("(?s)<!-- table -->(.*)</table>", '\n'.join(tbl), old, 1)
        open(tblfile, 'w').write(new)

def num_cmp(x, y):
    def num(x):
        nums = re.findall('[0-9]+', x or '')
        if nums: return int(nums[0])
        return x
    return cmp(num(x[0]), num(y[0]))

### Above is general (more or less); below is specific to my files.

def header(file):
    module = file[:-3]
    return ("""<html><head><title>%s Module</title></head>
    <body bgcolor=ffffff><h1>%s Module (<a href="%s">%s</a>)</h1><hr>"""
            % (module, hilite(module), file, file))

def footer(file):
    return """<p><hr><i>
<br> #
<a href="http://www.norvig.com/license.html">Copyright</a>:
<a href="http://www.norvig.com">Peter Norvig</a>, 2002.
<br> #
<a href="http://www.cs.berkeley.edu/~russell/aima.html">AIMA</a>:
<a href="python.html">Python Code</a>,
<a href="docex-log.html#%s">Example Output</a>.
<br> #
<a href="http://www.python.org">Python.org</a>:
<a href="http://www.python.org/doc/current/tut/tut.html">Tutorial</a>,
<a href="http://www.python.org/doc/current/ref/ref.html">Language Ref</a>,
<a href="http://www.python.org/doc/current/lib/lib.html">Libraries</a>.
</i>""" % file

def comment(text): return i(color("green", text))

replacements = [
    (r'&', '&amp;'),
    (r'<', '&lt;'),
    (r'>', '&gt;'),
    (r'(?ms)^#+[#_]{10,} *\n', '<hr>'),
    (r"""('[^']*?'|"[^"]*?")""", comment(g1)),
    (r'(?s)(""".*?"""|' + r"'''.*?''')", comment(g1)),
    (r'(#.*)', color("cc33cc", g1)),
    (r'(?m)(^[a-zA-Z][a-zA-Z_0-9, ]+)(\s+=\s+)', hilite(g1) + g2),
    (r'(?m)(^def\s+%s)' % id, hilite(g1)),
    (r'(?m)(^\s+def)(\s+)(%s)' % id, b(g1) + g2 + hilite(g3, "ffff99")),
    (r'(?m)(^\s*)(class)(\s+)(%s)' % id, g1 + hilite(g2+g3+g4)),
    (r'(from\s+)([a-z]+)(\s+import)', importer),
    (r'(import\s+)([a-z, ]+)(\s|\n|$|,)', importer),
    ]

if __name__ == '__main__':
    import sys, glob
    files = []
    for arg in sys.argv[1:]:
        files.extend(glob.glob(arg))
    convert_files(files)

## ENHANCEMENTS:
## Can get confused with """ and '''; not a problem in practice.
## Maybe each def c or class c should have a <a name="filename-c"> tag,
## and then we should create an index



# Copyright: Peter Norvig, 2002.
# AIMA: Python Code, Example Output.
# Python.org: Tutorial, Language Ref, Libraries.