我使用 Markdown 写博客已经有段时间了,但是一直以来有个小小的问题:对于代码块,markdown 生成的是一个<pre>
标签里套一个<code>
标签。缩进四个空格还好,用 Vim 的列编辑就行了(>操作不行,因为空行不会被缩进),可是删除这些<code>
标签并加上相应的语言标识很烦。于是有了以下 Python 代码,使用的是 Python 版的 markdown,支持使用~~~~
作为代码分隔符,如:
~~~~python|这是 Python 代码 print('Hello Python!') ~~~~
将会被翻译为
<pre class="brush: python;" title="这是 Python 代码">print('Hello Python!') </pre>
程序如下:
#!/usr/bin/env python3 # vim:fileencoding=utf-8 import sys from itertools import takewhile import markdown from lxml.html import fromstring, tostring def parseAttr(s): a = s.split('|') if len(a) > 3: raise ValueError('Too many attributes') a = list(map(str.strip, a)) if len(a) == 3: a[2] = bool(a[2]) elif len(a) == 2: a.append(False) elif len(a) == 1: a.extend(['', False]) else: a = ['plain', '', False] if not a[0]: a[0] = 'plain' return a def analyseAttrs(text): '''Attributes are defined like this: ~~~~lang|title|collapse In place of ``collapse``, anything not empty is considered true. ''' incode = False lines = [] attrs = [] istilda = lambda ch: ch == '~' for l in text.split('\n'): if l.startswith('~~~~'): if not incode: incode = len(tuple(takewhile(istilda, l))) attr = parseAttr(l.lstrip('~')) attrs.append(attr) l = tildas = '~' * incode else: if l.find(tildas) == 0: incode = False lines.append(l) return '\n'.join(lines), attrs def applyAttrs(html, attrs): doc = fromstring(html) for i, code in enumerate(doc.xpath('//pre/code')): pre = code.getparent() text = pre[0].text del pre[:] pre.text = text attr = attrs[i] c = 'brush: %s;' % attr[0] if attr[2]: c += ' collapse: true;' pre.set('class', c) if attr[1]: pre.set('title', attr[1]) return tostring(doc, encoding=str)[5:-6] + '\n' def main(): text = sys.stdin.read() text, attrs = analyseAttrs(text) out = markdown.markdown(text, ['fenced_code']) out = applyAttrs(out, attrs) sys.stdout.write(out) if __name__ == '__main__': main()