#!/usr/local/bin/python
"""Pretty-print Python code to colorized, hyperlinked html.
In python, do:
py2html.convert_files(['file1.py', 'file2.py', ...])
From the shell, do:
python py2html.py *.py"""
import re, string, time
id = r'[a-zA-Z_][a-zA-Z_0-9]*' ## RE for a Python identifier
g1, g2, g3, g4 = r'\1 \2 \3 \4'.split() ## groups for re.matches
def b(text): return '<b>%s</b>' % text
def i(text): return '<i>%s</i>' % text
def color(rgb, text): return '<font color="%s">%s</font>' % (rgb, text)
def link(url, anchor): return '<a href="%s">%s</a>' % (url, anchor)
def hilite(text, bg="ffff00"):
return '<b style="background-color:%s">%s</b>' % (bg,text)
def modulelink(module, baseurl=''):
"""Hyperlink to a module, either locally or on python.org"""
if module+'.py' not in local_files:
baseurl = 'http://www.python.org/doc/current/lib/module-'
return link(baseurl+module+'.html', module)
def importer(m):
"Turn text such as 'utils, math, re' into a string of HTML links."
modules = [modulelink(mod.strip()) for mod in m.group(2).split(',')]
return (m.group(1) + ', '.join(modules) + m.group(3))
def find1(regex, str):
return (re.findall(regex, str) or [' '])[0]
def convert_files(filenames, local_filenames=None, tblfile='python.html'):
"Convert files of python code to colorized HTML."
global local_files
local_files = local_filenames or filenames
summary_table = {}
for f in filenames:
fulltext = '\n'.join(map(string.rstrip, open(f).readlines()))
text = fulltext
for (pattern, repl) in replacements:
text = re.sub(pattern, repl, text)
text = '%s<pre>%s</pre>%s' % (header(f), text, footer(f))
open(f[:-3]+'.html', 'w').write(text)
if tblfile:
ch = find1(r'Chapters?\s+([^ \)"]*)', fulltext)
module = f.replace('.py','')
lines = fulltext.count('\n')
desc = find1(r'"""(.*)\n', fulltext).replace('"""', '')
summary_table.setdefault(ch,[]).append((module, lines, desc))
if tblfile:
totallines = 0
tbl = ["<!-- table -->",
"<tr><th>Chapter<th>Module<th>Lines<th>Description"]
fmt = "<tr><td align=right>%s<td>%s<td align=right>%s<td>%s"
items = summary_table.items(); items.sort(num_cmp)
for (ch, entries) in items:
for (module, lines, desc) in entries:
totallines += lines
tbl += [fmt % (ch, link(module+'.html', module), lines, desc)]
tbl += [fmt % ('', '', totallines, ''), "</table>"]
old = open(tblfile).read()
new = re.sub("(?s)<!-- table -->(.*)</table>", '\n'.join(tbl), old, 1)
open(tblfile, 'w').write(new)
def num_cmp(x, y):
def num(x):
nums = re.findall('[0-9]+', x or '')
if nums: return int(nums[0])
return x
return cmp(num(x[0]), num(y[0]))
### Above is general (more or less); below is specific to my files.
def header(file):
module = file[:-3]
return ("""<html><head><title>%s Module</title></head>
<body bgcolor=ffffff><h1>%s Module (<a href="%s">%s</a>)</h1><hr>"""
% (module, hilite(module), file, file))
def footer(file):
return """<p><hr><i>
<br> #
<a href="http://www.norvig.com/license.html">Copyright</a>:
<a href="http://www.norvig.com">Peter Norvig</a>, 2002.
<br> #
<a href="http://www.cs.berkeley.edu/~russell/aima.html">AIMA</a>:
<a href="python.html">Python Code</a>,
<a href="docex-log.html#%s">Example Output</a>.
<br> #
<a href="http://www.python.org">Python.org</a>:
<a href="http://www.python.org/doc/current/tut/tut.html">Tutorial</a>,
<a href="http://www.python.org/doc/current/ref/ref.html">Language Ref</a>,
<a href="http://www.python.org/doc/current/lib/lib.html">Libraries</a>.
</i>""" % file
def comment(text): return i(color("green", text))
replacements = [
(r'&', '&'),
(r'<', '<'),
(r'>', '>'),
(r'(?ms)^#+[#_]{10,} *\n', '<hr>'),
(r"""('[^']*?'|"[^"]*?")""", comment(g1)),
(r'(?s)(""".*?"""|' + r"'''.*?''')", comment(g1)),
(r'(#.*)', color("cc33cc", g1)),
(r'(?m)(^[a-zA-Z][a-zA-Z_0-9, ]+)(\s+=\s+)', hilite(g1) + g2),
(r'(?m)(^def\s+%s)' % id, hilite(g1)),
(r'(?m)(^\s+def)(\s+)(%s)' % id, b(g1) + g2 + hilite(g3, "ffff99")),
(r'(?m)(^\s*)(class)(\s+)(%s)' % id, g1 + hilite(g2+g3+g4)),
(r'(from\s+)([a-z]+)(\s+import)', importer),
(r'(import\s+)([a-z, ]+)(\s|\n|$|,)', importer),
]
if __name__ == '__main__':
import sys, glob
files = []
for arg in sys.argv[1:]:
files.extend(glob.glob(arg))
convert_files(files)
## ENHANCEMENTS:
## Can get confused with """ and '''; not a problem in practice.
## Maybe each def c or class c should have a <a name="filename-c"> tag,
## and then we should create an index
#
Copyright:
Peter Norvig, 2002.
#
AIMA:
Python Code,
Example Output.
#
Python.org:
Tutorial,
Language Ref,
Libraries.