我使用 Markdown 写博客已经有段时间了,但是一直以来有个小小的问题:对于代码块,markdown 生成的是一个<pre>
标签里套一个<code>
标签。缩进四个空格还好,用 Vim 的列编辑就行了(>操作不行,因为空行不会被缩进),可是删除这些<code>
标签并加上相应的语言标识很烦。于是有了以下 Python 代码,使用的是 Python 版的 markdown,支持使用~~~~
作为代码分隔符,如:
1 2 3 | ~~~~python|这是 Python 代码 print('Hello Python!') ~~~~ |
将会被翻译为
1 2 | < pre class = "brush: python;" title = "这是 Python 代码" >print('Hello Python!') </ pre > |
程序如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | #!/usr/bin/env python3 # vim:fileencoding=utf-8 import sys from itertools import takewhile import markdown from lxml.html import fromstring, tostring def parseAttr(s): a = s.split( '|' ) if len (a) > 3 : raise ValueError( 'Too many attributes' ) a = list ( map ( str .strip, a)) if len (a) = = 3 : a[ 2 ] = bool (a[ 2 ]) elif len (a) = = 2 : a.append( False ) elif len (a) = = 1 : a.extend(['', False ]) else : a = [ 'plain' , '', False ] if not a[ 0 ]: a[ 0 ] = 'plain' return a def analyseAttrs(text): '''Attributes are defined like this: ~~~~lang|title|collapse In place of ``collapse``, anything not empty is considered true. ''' incode = False lines = [] attrs = [] istilda = lambda ch: ch = = '~' for l in text.split( '\n' ): if l.startswith( '~~~~' ): if not incode: incode = len ( tuple (takewhile(istilda, l))) attr = parseAttr(l.lstrip( '~' )) attrs.append(attr) l = tildas = '~' * incode else : if l.find(tildas) = = 0 : incode = False lines.append(l) return '\n' .join(lines), attrs def applyAttrs(html, attrs): doc = fromstring(html) for i, code in enumerate (doc.xpath( '//pre/code' )): pre = code.getparent() text = pre[ 0 ].text del pre[:] pre.text = text attr = attrs[i] c = 'brush: %s;' % attr[ 0 ] if attr[ 2 ]: c + = ' collapse: true;' pre. set ( 'class' , c) if attr[ 1 ]: pre. set ( 'title' , attr[ 1 ]) return tostring(doc, encoding = str )[ 5 : - 6 ] + '\n' def main(): text = sys.stdin.read() text, attrs = analyseAttrs(text) out = markdown.markdown(text, [ 'fenced_code' ]) out = applyAttrs(out, attrs) sys.stdout.write(out) if __name__ = = '__main__' : main() |