Suǒwèide 「pīnyīn shēngdiào shùzì zhuǎn zìfú」 jiùshì bǎ 「pīnyīn+shùzìdiàbiǎo de shēngdiào」 zhuǎnchén Unicode zìfú biǎoshì.
所谓的「拼音声调数字转字符」就是把「拼音+数字表示的声调」转成 Unicode 字符表示,为了是做成 fcitx 的拼音输入插件,以方便输入上段的内容。
算法是参考别人的,把所有带声调的音节后缀穷举出来再转换,简单暴力好用。我改写的 Python 3 版在 github/winterpy 上,支持大写和「ü」。
为了在 fcitx 中使用,我又改写了一 Lua 版本,代码如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | #!/usr/bin/env lua -- definitions -- For the pinyin tone rules (which vowel?), see local strsub = string.gsub local _strupper = string.upper -- map (final) constanant+tone to tone+constanant mapConstTone2ToneConst = { n1 = '1n' , n2 = '2n' , n3 = '3n' , n4 = '4n' , ng1 = '1ng' , ng2 = '2ng' , ng3 = '3ng' , ng4 = '4ng' , r1 = '1r' , r2 = '2r' , r3 = '3r' , r4 = '4r' , } -- map vowel+vowel+tone to vowel+tone+vowel mapVowelVowelTone2VowelToneVowel = { ai1 = 'a1i' , ai2 = 'a2i' , ai3 = 'a3i' , ai4 = 'a4i' , ao1 = 'a1o' , ao2 = 'a2o' , ao3 = 'a3o' , ao4 = 'a4o' , ei1 = 'e1i' , ei2 = 'e2i' , ei3 = 'e3i' , ei4 = 'e4i' , ou1 = 'o1u' , ou2 = 'o2u' , ou3 = 'o3u' , ou4 = 'o4u' , } -- map vowel-number combination to unicode mapVowelTone2Unicode = { a1 = 'ā' , a2 = 'á' , a3 = 'ǎ' , a4 = 'à' , e1 = 'ē' , e2 = 'é' , e3 = 'ě' , e4 = 'è' , i1 = 'ī' , i2 = 'í' , i3 = 'ǐ' , i4 = 'ì' , o1 = 'ō' , o2 = 'ó' , o3 = 'ǒ' , o4 = 'ò' , u1 = 'ū' , u2 = 'ú' , u3 = 'ǔ' , u4 = 'ù' , v1 = 'ǜ' , v2 = 'ǘ' , v3 = 'ǚ' , v4 = 'ǜ' , } function strupper (c) local specials = { [ 'ā' ] = 'Ā' , [ 'á' ] = 'Á' , [ 'ǎ' ] = 'Ǎ' , [ 'à' ] = 'À' , [ 'ē' ] = 'Ē' , [ 'é' ] = 'É' , [ 'ě' ] = 'Ě' , [ 'è' ] = 'È' , [ 'ī' ] = 'Ī' , [ 'í' ] = 'Í' , [ 'ǐ' ] = 'Ǐ' , [ 'ì' ] = 'Ì' , [ 'ō' ] = 'Ō' , [ 'ó' ] = 'Ó' , [ 'ǒ' ] = 'Ǒ' , [ 'ò' ] = 'Ò' , [ 'ū' ] = 'Ū' , [ 'ú' ] = 'Ú' , [ 'ǔ' ] = 'Ǔ' , [ 'ù' ] = 'Ù' , [ 'ǜ' ] = 'Ǜ' , [ 'ǘ' ] = 'Ǘ' , [ 'ǚ' ] = 'Ǚ' , [ 'ǜ' ] = 'Ǜ' , } if specials[c] then return specials[c] else return _strupper(c) end end function ConvertPinyinToneNumbers(lineIn) local lineOut = lineIn -- first transform for x, y in pairs (mapConstTone2ToneConst) do lineOut = strsub ( strsub (lineOut, x, y), strupper (x), strupper (y)) end -- second transform for x, y in pairs (mapVowelVowelTone2VowelToneVowel) do lineOut = strsub ( strsub (lineOut, x, y), strupper (x), strupper (y)) end -- third transform for x, y in pairs (mapVowelTone2Unicode) do lineOut = strsub ( strsub (lineOut, x, y), strupper (x), strupper (y)) end return strsub ( strsub (lineOut, 'v' , 'ü' ), 'V' , 'Ü' ) end local function main() local lineOut for lineIn in io.stdin :lines () do lineOut = ConvertPinyinToneNumbers(lineIn) print (lineOut) end end main() |
很可惜的是,fcitx 的 Lua 模块目前不支持屏蔽数字键选字,所以暂无法在 fcitx 中使用。