Suǒwèide 「pīnyīn shēngdiào shùzì zhuǎn zìfú」 jiùshì bǎ 「pīnyīn+shùzìdiàbiǎo de shēngdiào」 zhuǎnchén Unicode zìfú biǎoshì.
所谓的「拼音声调数字转字符」就是把「拼音+数字表示的声调」转成 Unicode 字符表示,为了是做成 fcitx 的拼音输入插件,以方便输入上段的内容。
算法是参考别人的,把所有带声调的音节后缀穷举出来再转换,简单暴力好用。我改写的 Python 3 版在 github/winterpy 上,支持大写和「ü」。
为了在 fcitx 中使用,我又改写了一 Lua 版本,代码如下:
#!/usr/bin/env lua
-- http://www.robertyu.com/wikiperdido/Pinyin%20Parser%20for%20MoinMoin
-- definitions
-- For the pinyin tone rules (which vowel?), see
-- http://www.pinyin.info/rules/where.html
local strsub = string.gsub
local _strupper = string.upper
-- map (final) constanant+tone to tone+constanant
mapConstTone2ToneConst = {
n1 = '1n',
n2 = '2n',
n3 = '3n',
n4 = '4n',
ng1 = '1ng',
ng2 = '2ng',
ng3 = '3ng',
ng4 = '4ng',
r1 = '1r',
r2 = '2r',
r3 = '3r',
r4 = '4r',
}
-- map vowel+vowel+tone to vowel+tone+vowel
mapVowelVowelTone2VowelToneVowel = {
ai1 = 'a1i',
ai2 = 'a2i',
ai3 = 'a3i',
ai4 = 'a4i',
ao1 = 'a1o',
ao2 = 'a2o',
ao3 = 'a3o',
ao4 = 'a4o',
ei1 = 'e1i',
ei2 = 'e2i',
ei3 = 'e3i',
ei4 = 'e4i',
ou1 = 'o1u',
ou2 = 'o2u',
ou3 = 'o3u',
ou4 = 'o4u',
}
-- map vowel-number combination to unicode
mapVowelTone2Unicode = {
a1 = 'ā',
a2 = 'á',
a3 = 'ǎ',
a4 = 'à',
e1 = 'ē',
e2 = 'é',
e3 = 'ě',
e4 = 'è',
i1 = 'ī',
i2 = 'í',
i3 = 'ǐ',
i4 = 'ì',
o1 = 'ō',
o2 = 'ó',
o3 = 'ǒ',
o4 = 'ò',
u1 = 'ū',
u2 = 'ú',
u3 = 'ǔ',
u4 = 'ù',
v1 = 'ǜ',
v2 = 'ǘ',
v3 = 'ǚ',
v4 = 'ǜ',
}
function strupper(c)
local specials = {
['ā'] = 'Ā',
['á'] = 'Á',
['ǎ'] = 'Ǎ',
['à'] = 'À',
['ē'] = 'Ē',
['é'] = 'É',
['ě'] = 'Ě',
['è'] = 'È',
['ī'] = 'Ī',
['í'] = 'Í',
['ǐ'] = 'Ǐ',
['ì'] = 'Ì',
['ō'] = 'Ō',
['ó'] = 'Ó',
['ǒ'] = 'Ǒ',
['ò'] = 'Ò',
['ū'] = 'Ū',
['ú'] = 'Ú',
['ǔ'] = 'Ǔ',
['ù'] = 'Ù',
['ǜ'] = 'Ǜ',
['ǘ'] = 'Ǘ',
['ǚ'] = 'Ǚ',
['ǜ'] = 'Ǜ',
}
if specials[c] then
return specials[c]
else
return _strupper(c)
end
end
function ConvertPinyinToneNumbers(lineIn)
local lineOut = lineIn
-- first transform
for x, y in pairs(mapConstTone2ToneConst) do
lineOut = strsub(strsub(lineOut, x, y), strupper(x), strupper(y))
end
-- second transform
for x, y in pairs(mapVowelVowelTone2VowelToneVowel) do
lineOut = strsub(strsub(lineOut, x, y), strupper(x), strupper(y))
end
-- third transform
for x, y in pairs(mapVowelTone2Unicode) do
lineOut = strsub(strsub(lineOut, x, y), strupper(x), strupper(y))
end
return strsub(strsub(lineOut, 'v', 'ü'), 'V', 'Ü')
end
local function main()
local lineOut
for lineIn in io.stdin:lines() do
lineOut = ConvertPinyinToneNumbers(lineIn)
print(lineOut)
end
end
main()
很可惜的是,fcitx 的 Lua 模块目前不支持屏蔽数字键选字,所以暂无法在 fcitx 中使用。