我使用 Markdown 写博客已经有段时间了,但是一直以来有个小小的问题:对于代码块,markdown 生成的是一个<pre>标签里套一个<code>标签。缩进四个空格还好,用 Vim 的列编辑就行了(>操作不行,因为空行不会被缩进),可是删除这些<code>标签并加上相应的语言标识很烦。于是有了以下 Python 代码,使用的是 Python 版的 markdown,支持使用~~~~作为代码分隔符,如:
~~~~python|这是 Python 代码
print('Hello Python!')
~~~~
将会被翻译为
<pre class="brush: python;" title="这是 Python 代码">print('Hello Python!')
</pre>
程序如下:
#!/usr/bin/env python3
# vim:fileencoding=utf-8
import sys
from itertools import takewhile
import markdown
from lxml.html import fromstring, tostring
def parseAttr(s):
a = s.split('|')
if len(a) > 3:
raise ValueError('Too many attributes')
a = list(map(str.strip, a))
if len(a) == 3:
a[2] = bool(a[2])
elif len(a) == 2:
a.append(False)
elif len(a) == 1:
a.extend(['', False])
else:
a = ['plain', '', False]
if not a[0]:
a[0] = 'plain'
return a
def analyseAttrs(text):
'''Attributes are defined like this:
~~~~lang|title|collapse
In place of ``collapse``, anything not empty is considered true.
'''
incode = False
lines = []
attrs = []
istilda = lambda ch: ch == '~'
for l in text.split('\n'):
if l.startswith('~~~~'):
if not incode:
incode = len(tuple(takewhile(istilda, l)))
attr = parseAttr(l.lstrip('~'))
attrs.append(attr)
l = tildas = '~' * incode
else:
if l.find(tildas) == 0:
incode = False
lines.append(l)
return '\n'.join(lines), attrs
def applyAttrs(html, attrs):
doc = fromstring(html)
for i, code in enumerate(doc.xpath('//pre/code')):
pre = code.getparent()
text = pre[0].text
del pre[:]
pre.text = text
attr = attrs[i]
c = 'brush: %s;' % attr[0]
if attr[2]:
c += ' collapse: true;'
pre.set('class', c)
if attr[1]:
pre.set('title', attr[1])
return tostring(doc, encoding=str)[5:-6] + '\n'
def main():
text = sys.stdin.read()
text, attrs = analyseAttrs(text)
out = markdown.markdown(text, ['fenced_code'])
out = applyAttrs(out, attrs)
sys.stdout.write(out)
if __name__ == '__main__':
main()
