Source Code: %(file)s
%(source)s
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
LobsterBlog
~~~~~~~~~~~
HTML5 Technical Article Beautifier
:Copyright: Copyright (c) 2010 J.A. Roberts Tunney
:License: GPL v2 or later
:Modified: 2010-08-14
:URL:
%(source)s
`` blocks
"""
try:
import pygments
import pygments.lexers as pyglex
import pygments.formatters as pygfmt
except ImportError, exc:
logging.warning('syntax highlighting: %r' % (exc))
return root
formatter = pygfmt.HtmlFormatter()
for tag in root.xpath('//pre'):
env['line'] = tag.sourceline
info = tag.attrib.get('class')
if not info:
continue
if 'lobsterblog-dedent' in info:
code = etree.tostring(tag, method="text", encoding="utf8")
code = code.decode('utf8')
code = textwrap.dedent(code).strip()
tag.text = code
match = re.search(r'lobsterblog-syntax--(?P\S+)', info)
if match:
env['lang'] = match.group('lang')
try:
lexer = pyglex.get_lexer_by_name(env['lang'])
except pyglex.ClassNotFound:
logging.warning(
"%(file)s:%(line)d: Could not find Pygments lexer "
"for language %(lang)r. Please see: "
"http://pygments.org/docs/lexers/" % env)
else:
code = etree.tostring(tag, method="text", encoding="utf8")
code = code.decode('utf8')
code = textwrap.dedent(code).strip()
high = pygments.highlight(code, lexer, formatter)
for child in tag:
tag.remove(child)
tag.text = ''
xml = etree.fromstring(high)
for child in xml.xpath('//pre')[0]:
tag.append(child)
return root
def image_sizes(root):
"""Adds width/height attributes to ``
`` if not present
"""
try:
from PIL import Image
except ImportError, exc:
logging.warning('image sizes: %r' % (exc))
return root
for tag in root.xpath('//img'):
env['line'] = tag.sourceline
if 'width' not in tag.attrib or 'height' not in tag.attrib:
try:
path = htmlpath(tag.attrib.get('src'))
assert os.path.exists(path), "file not found: " + path
except Exception, exc:
logging.warning("%(file)s:%(line)d: bad img src: "
% env + str(exc))
else:
try:
f = Image.open(path)
except Exception, exc:
logging.warning("PIL.open(%r) failed: %s" % (path, exc))
continue
width, height = f.size
tag.attrib['width'] = str(width)
tag.attrib['height'] = str(height)
logging.info("Image %s dimensions are %d x %d"
% (path, width, height))
return root
def latex(root):
"""Replaces ```` tags with generated images
"""
with chdir('media/img/tex'):
n = 0
for tag in root.xpath('//latex'):
n = n + 1
name = "%s.%d" % (env['slug'], n)
env['line'] = tag.sourceline
env['source'] = get_tag_source(tag)
env['srcfile'] = "media/img/tex/%s.tex" % (name)
env['imgfile'] = "media/img/tex/%s.png" % (name)
if should_regenerate_image():
logging.info("%(file)s:%(line)d: LaTeX "
"%(srcfile)s -> %(imgfile)s" % env)
f = codecs.open(name + '.tex', 'w', 'utf8')
f.write(latex_template % env)
f.close()
cmd = "latex %s.tex >/dev/null 2>&1" % (name)
if os.system(cmd) != 0:
logging.warning("Failed to run: " + cmd + "\n" +
slurp(name + '.log'))
continue
cmd = ("dvipng -bg transparent -o %s.png %s.dvi "
">/dev/null 2>&1") % (name, name)
if os.system(cmd) != 0:
logging.warning("Failed to run: " + cmd)
continue
os.unlink(name + '.log')
os.unlink(name + '.dvi')
env['extra_sources'].append(env['srcfile'])
image_was_regenerated()
img = etree.fromstring('
')
img.attrib['src'] = '/' + env['imgfile']
img.attrib['alt'] = 'LaTeX Figure #%d' % (n)
img.attrib['title'] = 'LaTeX Figure #%d' % (n)
for k, v in tag.attrib.items():
img.attrib[k] = v
a = etree.fromstring('')
a.tail = tag.tail
a.attrib['href'] = os.path.relpath(
highlight_html_filename(env['srcfile']), 'html')
a.append(img)
parent = tag.getparent()
parent.replace(tag, a)
return root
def python(root):
"""Executes ```` tags, replacing them with captured stdout
"""
for tag in root.xpath("//*[name()='python' or name()='py']"):
env['line'] = tag.sourceline
code = get_tag_source(tag).strip()
if tag.tag == 'python':
sys.stdout, oldstdout = StringIO.StringIO(), sys.stdout
try:
exec code
except:
logging.exception("python exec() failed:\n\n%s\n\n"
"output:\n\n%s", code, sys.stdout.getvalue())
continue
finally:
output = sys.stdout.getvalue()
sys.stdout = oldstdout
elif tag.tag == 'py':
try:
output = eval(code)
except:
logging.exception("python eval(%r) failed", code)
continue
if not etree.iselement(output):
try:
try:
output = etree.fromstring(output)
except:
output = etree.fromstring('%s' % (output))
except Exception, exc:
logging.error("bad xml output: %s\n\n%s", exc, output)
continue
output.tail = tag.tail
for k, v in tag.attrib.items():
output.attrib[k] = v
parent = tag.getparent()
parent.replace(tag, output)
return root
def toc(root):
"""Generates a table of contents as a bulleted list.
This works on a per article basis by scanning ```` through
```` tags. Headers containing a ``notoc="notoc"`` attribute
will be ignored. If header tags do not contain an ``id``
attribute one will be added by slugifying the text within the
header.
For example::
Open Source Is Happy
Table of Contents
Section 1
Section 2
Section 2.1
Section 2.1.1
Section 2.1.2
Section 2.2
Section 3
Section 4
"""
for art in root.xpath('//article'):
toctags = list(art.xpath('//toc'))
if not toctags:
continue
toc = etree.fromstring('
')
ul_stack = [toc]
last_li = toc
lev = 2
for tag in art.xpath(r'''
//*[name()='h2' or name()='h3' or name()='h4' or
name()='h5' or name()='h6']
'''):
env['line'] = tag.sourceline
if 'notoc' in tag.attrib:
del tag.attrib['notoc']
else:
# text = etree.tostring(tag, method="text", encoding="utf8")
text = get_tag_text(tag)
curlev = int(tag.tag[1])
if 'id' not in tag.attrib:
tag.attrib['id'] = slugify(text)
pilcrow = etree.fromstring('ΒΆ')
pilcrow.attrib['href'] = '#' + tag.attrib['id']
tag.append(pilcrow)
li = etree.fromstring('
')
a = etree.fromstring('')
a.text = text
a.attrib['href'] = '#' + tag.attrib['id']
li.append(a)
while curlev > lev:
ul = etree.fromstring('
')
ul_stack.append(ul)
last_li.append(ul)
last_li = ul
lev += 1
while curlev < lev:
ul_stack.pop()
if ul_stack[-1].getchildren():
last_li = ul_stack[-1].getchildren()[-1]
else:
last_li = ul_stack[-1]
lev -= 1
ul_stack[-1].append(li)
last_li = li
for tag in toctags:
div = etree.fromstring('')
for k, v in tag.attrib.items():
div.attrib[k] = v
for child in tag.getchildren():
div.append(child)
div.append(toc)
tag.getparent().replace(tag, div)
return root
def matplotlib(root):
"""Replaces ```` tags with generated images
"""
try:
import numpy
import matplotlib
except ImportError, exc:
logging.warning('graph matplotlib: %r' % (exc))
return root
n = 0
for tag in root.xpath('//matplotlib'):
n = n + 1
name = "%s.%d" % (env['slug'], n)
env['line'] = tag.sourceline
env['source'] = get_tag_source(tag)
env['srcfile'] = "media/img/graph/%s.py" % (name)
env['imgfile'] = "media/img/graph/%s.png" % (name)
if should_regenerate_image():
logging.info("%(file)s:%(line)d: Matplotlib "
"%(srcfile)s -> %(imgfile)s" % env)
code = matplotlib_template % env
exec(code)
codecs.open(env['srcfile'], 'w', 'utf8').write(code)
env['extra_sources'].append(env['srcfile'])
image_was_regenerated()
img = etree.fromstring('
')
img.tag = 'img'
img.attrib['src'] = '/' + env['imgfile']
img.attrib['alt'] = 'Graph #%d' % (n)
img.attrib['title'] = 'Graph #%d' % (n)
for k, v in img.attrib.items():
img.attrib[k] = v
a = etree.fromstring('')
a.tail = tag.tail
a.attrib['href'] = '/' + os.path.relpath(
highlight_html_filename(env['srcfile']), 'html')
a.append(img)
parent = tag.getparent()
parent.replace(tag, a)
return root
def gnuplot(root):
"""Replaces ```` tags with generated images
"""
n = 0
for tag in root.xpath('//gnuplot'):
n = n + 1
name = "%s.%d" % (env['slug'], n)
env['line'] = tag.sourceline
env['source'] = get_tag_source(tag)
env['srcfile'] = "media/img/plot/%s.gnuplot" % (name)
env['imgfile'] = "media/img/plot/%s.png" % (name)
if should_regenerate_image():
logging.info(
"%(file)s:%(line)d: Gnuplot %(srcfile)s -> %(imgfile)s" % env)
code = gnuplot_template % env
f = codecs.open(env['srcfile'], 'w', 'utf8')
f.write(code)
f.close()
cmd = 'gnuplot ' + env['srcfile']
if os.system(cmd) != 0:
logging.warning("Failed to run: " + cmd)
continue
env['extra_sources'].append(env['srcfile'])
image_was_regenerated()
img = etree.fromstring('
')
img.tag = 'img'
img.attrib['src'] = '/' + env['imgfile']
img.attrib['alt'] = 'Plot #%d' % (n)
img.attrib['title'] = 'Plot #%d' % (n)
for k, v in img.attrib.items():
img.attrib[k] = v
a = etree.fromstring('')
a.tail = tag.tail
a.attrib['href'] = '/' + os.path.relpath(
highlight_html_filename(env['srcfile']), 'html')
a.append(img)
parent = tag.getparent()
parent.replace(tag, a)
return root
def graphviz(root):
"""Replaces ```` tags with generated images
"""
n = 0
for tag in root.xpath("//graphviz"):
n = n + 1
types = ('dot', 'circo', 'neato', 'twopi', 'fdp')
cmd = tag.attrib.get('type', 'dot')
if cmd not in types:
logging.info(
"%(file)s:%(line)d: " % env +
"Bad GraphViz Type %r. Try: %s" % (cmd, ', '.join(types)))
continue
name = "%s.%d" % (env['slug'], n)
env['line'] = tag.sourceline
env['source'] = get_tag_source(tag)
env['srcfile'] = "media/img/graphviz/%s.dot" % (name)
env['imgfile'] = "media/img/graphviz/%s.png" % (name)
if should_regenerate_image():
logging.info(
"%(file)s:%(line)d: GraphViz %(srcfile)s -> %(imgfile)s" % env)
code = graphviz_template % env
f = codecs.open(env['srcfile'], 'w', 'utf8')
f.write(code)
f.close()
cmd = '%s -Tpng -o %s %s' % (cmd, env['imgfile'], env['srcfile'])
if os.system(cmd) != 0:
logging.warning("Failed to run: " + cmd)
continue
env['extra_sources'].append(env['srcfile'])
image_was_regenerated()
img = etree.fromstring('
')
img.tag = 'img'
img.attrib['src'] = '/' + env['imgfile']
img.attrib['alt'] = 'Plot #%d' % (n)
img.attrib['title'] = 'Plot #%d' % (n)
for k, v in img.attrib.items():
img.attrib[k] = v
a = etree.fromstring('')
a.tail = tag.tail
a.attrib['href'] = '/' + os.path.relpath(
highlight_html_filename(env['srcfile']), 'html')
a.append(img)
parent = tag.getparent()
parent.replace(tag, a)
return root
def head_tags(root):
if not root.xpath('//head'):
return root
head = root.xpath('//head')[0]
if not root.xpath('//head/title'):
h1text = root.xpath('//h1')
if h1text:
tag = etree.fromstring(' ')
tag.text = re.sub(r'\s+', ' ', get_tag_text(h1text[0])).strip()
head.append(tag)
sumtext = root.xpath('//article/summary/p')
if sumtext:
tag = etree.fromstring('')
tag.attrib['content'] = \
re.sub(r'\s+', ' ', get_tag_text(sumtext[0])).strip()
head.append(tag)
#
return root
def xml_to_html(fin, fout):
"""Transforms certain elements inside HTML5 document
"""
try:
root = etree.XML(fin.read(), xmlparser)
except etree.XMLSyntaxError, exc:
logging.error("%s: %s" % (env['file'], exc))
return
try:
env['root'] = root
root = python(root)
root = latex(root)
root = matplotlib(root)
root = gnuplot(root)
root = graphviz(root)
root = image_sizes(root)
root = syntax_highlight(root)
root = toc(root)
root = head_tags(root)
except OhNo, exc:
logging.error("%s: %s" % (env['file'], exc))
return
if root.tag == 'html':
fout.write("\n")
tags = root if root.tag == 'frag' else [root]
for tag in tags:
fout.write(etree.tostring(tag, pretty_print=True, encoding="utf8"))
def source_to_html(fin, fout):
"""Turns a source code files into HTML5 documents
"""
try:
import pygments
import pygments.lexers as pyglex
import pygments.formatters as pygfmt
except ImportError, exc:
logging.warning('source to html: %r' % (exc))
return
data = slurp(fin)
lang = guess_language(env['file'], data)
if lang == 'xml':
lang = 'html'
try:
lexer = pyglex.get_lexer_by_name(lang)
except pyglex.ClassNotFound:
logging.warning("No pygments lexer '%s'" % (lang))
env['source'] = esc(data)
else:
formatter = pygfmt.HtmlFormatter(nobackground=True)
# data = pygments.highlight(data, lexer, formatter)
sl = []
print type(data)
frag = etree.XML(data, xmlparser)
etree.fromstring(data.encode('utf8'))
for pre in frag.xpath('//pre'):
for tag in pre:
sl.append(etree.tostring(tag, encoding='utf8'))
env['source'] = "".join(sl)
fout.write(source_page_template % env)
def db_connect():
"""We store checksums in sqlite to avoid having to constantly
regenerate a zillion image files.
"""
global db
import sqlite3
db = sqlite3.connect('.lobsterblog.sqlite')
try:
c = db.cursor()
c.execute(db_schema)
db.commit()
except Exception, exc:
logging.warning("failed to build sqlite schema: %s" % (exc))
def should_regenerate_image():
if not os.path.exists(env['imgfile']):
return True
if not db:
db_connect()
try:
ck = hashlib.md5(env['source']).hexdigest()
q = "select count(*) from imghash where filename = ? and checksum = ?"
lol = db.cursor().execute(q, [env['imgfile'], ck]).fetchone()[0]
if lol:
return False
else:
return True
except Exception, exc:
logging.warning("failed to check db: %s" % (exc))
return True
def image_was_regenerated():
if not db:
db_connect()
try:
ck = hashlib.md5(env['source']).hexdigest()
c = db.cursor()
c.execute("delete from imghash where filename = ?", [env['imgfile']])
c.execute("insert into imghash values (?, ?)", [env['imgfile'], ck])
db.commit()
except Exception, exc:
logging.warning("failed to update db: %s" % (exc))
def xpath(query, path=None):
"""This function helps ```` tags extract document content
For example::
xpath('//article/summary')[0]
xpath('//article/summary', '/article.xml')[0]
"""
if path:
root = etree.XML(open(path).read(), xmlparser)
else:
root = env['root']
return root.xpath(query)
def esc(unsafe):
"""Escape HTML
Same as PHP's ``htmlspecialchars()`` function.
"""
safe = []
for c in unsafe:
if c in html_entities:
safe.append(html_entities[c])
else:
safe.append(c)
return ''.join(safe).encode('utf8')
def main(args):
from os.path import relpath, exists, getmtime
os.system('mkdir -p html')
os.system('mkdir -p media')
os.system('mkdir -p media/img')
os.system('mkdir -p media/img/tex')
os.system('mkdir -p media/img/plot')
os.system('mkdir -p media/img/graph')
os.system('mkdir -p media/img/graphviz')
files = [relpath(fn) for fn in args]
env['extra_sources'] = []
for fn in [fn for fn in files if fn.endswith(('.xml', '.rss'))]:
env['file'] = fn
env['slug'] = slugify(fn)
env['newfile'] = 'html/' + \
fn.replace('.xml', '.html').replace('/', '_')
if (not exists(env['newfile']) or
getmtime(env['file']) > getmtime(env['newfile'])):
logging.info('Source Code %(file)s -> %(newfile)s' % env)
ss = StringIO.StringIO()
xml_to_html(open(env['file']), ss)
open(env['newfile'], 'w').write(ss.getvalue())
for fn in set(files + env['extra_sources']):
env['file'] = fn
env['slug'] = slugify(fn)
env['newfile'] = highlight_html_filename(fn)
if (not exists(env['newfile']) or
getmtime(env['file']) > getmtime(env['newfile'])):
logging.info('Source Code %(file)s -> %(newfile)s' % env)
ss = StringIO.StringIO()
source_to_html(open(env['file']), ss)
open(env['newfile'], 'w').write(ss.getvalue())
if __name__ == '__main__':
import doctest
doctest.testmod()
logging.basicConfig(level=logging.DEBUG)
main(sys.argv[1:])
# try:
# while True:
# main(sys.argv[1:])
# time.sleep(0.5)
# except KeyboardInterrupt:
# pass