diff --git a/.gitignore b/.gitignore index 80c9c10a6..213f335e2 100644 --- a/.gitignore +++ b/.gitignore @@ -340,3 +340,8 @@ appcast.*.xml *.tar.gz .vscode/ Microsoft.AI.DirectML + +.example/ + +.example/**/target/ + diff --git a/OpenUtau.Plugin.Builtin/ChineseToJapanesePhonemizer.cs b/OpenUtau.Plugin.Builtin/ChineseToJapanesePhonemizer.cs new file mode 100644 index 000000000..077b169f3 --- /dev/null +++ b/OpenUtau.Plugin.Builtin/ChineseToJapanesePhonemizer.cs @@ -0,0 +1,204 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Text; +using OpenUtau.Api; +using OpenUtau.Core.Ustx; +using Serilog; +using WanaKanaNet; + +namespace OpenUtau.Plugin.Builtin { + /// + /// Cross-lingual phonemizer that converts Chinese pinyin lyrics to Japanese romaji. + /// Uses an embedded weighted mapping table (pinyin.txt) to split each Chinese syllable + /// into one or more Japanese morae by weight ratio, then assigns overlap between + /// non-first sub-phonemes for smoother transitions. + /// + /// For CV (standalone) voicebanks whose OTO aliases are in kana, romaji is + /// automatically converted to hiragana. + /// + [Phonemizer("Chinese to Japanese Phonemizer", "ZH to JA", language: "ZH")] + public class ChineseToJapanesePhonemizer : Phonemizer { + + private USinger? singer; + private Dictionary mapping = null!; + private bool? useKana; // null=undetected, true=hiragana, false=romaji + + /// (ratio, romaji) pair used in weighted mapping. + private readonly record struct WeightedOption(int Ratio, string Romaji); + + /// One scheme = an array of weighted romaji options. + private readonly record struct WeightedScheme(WeightedOption[] Options); + + private const double OverlapMs = 80; + + public ChineseToJapanesePhonemizer() { + try { + LoadMapping(); + } catch (Exception e) { + Log.Error(e, "Failed to load pinyin mapping"); + mapping = new Dictionary(); + } + } + + // ── mapping loader ─────────────────────────────────────────── + + private void LoadMapping() { + mapping = new Dictionary(); + var assembly = Assembly.GetExecutingAssembly(); + using var stream = assembly.GetManifestResourceStream( + "OpenUtau.Plugin.Builtin.Data.pinyin_zh_to_ja.txt"); + if (stream == null) { + Log.Error("Embedded resource pinyin_zh_to_ja.txt not found"); + return; + } + using var reader = new StreamReader(stream, Encoding.UTF8); + + string? line; + while ((line = reader.ReadLine()) != null) { + line = line.Trim(); + if (line.Length == 0 || line[0] == '#' || !line.Contains(';')) + continue; + + var parts = line.Split(';', 2); + if (parts.Length != 2) continue; + + string pinyin = parts[0].Trim(); + if (pinyin.Length == 0) continue; + + // Each scheme separated by '_' + var schemeStrs = parts[1].Trim().Split('_'); + var schemes = new List(); + + foreach (var schemeStr in schemeStrs) { + var tokens = schemeStr.Split(','); + var opts = new List(); + bool valid = true; + + foreach (var token in tokens) { + var dot = token.IndexOf('.'); + if (dot <= 0) { valid = false; break; } + if (!int.TryParse(token.AsSpan(0, dot), out int ratio) || ratio <= 0) + { valid = false; break; } + string romaji = token.Substring(dot + 1).Trim(); + if (romaji.Length == 0) { valid = false; break; } + opts.Add(new WeightedOption(ratio, romaji)); + } + + if (valid && opts.Count > 0) + schemes.Add(new WeightedScheme(opts.ToArray())); + } + + if (schemes.Count > 0) + mapping[pinyin] = schemes.ToArray(); + } + } + + // ── Phonemizer API ─────────────────────────────────────────── + + public override void SetSinger(USinger singer) { + this.singer = singer; + useKana = null; // re-detect on next use + } + + public override Result Process(Note[] notes, Note? prev, Note? next, + Note? prevNeighbour, Note? nextNeighbour, Note[] prevs) { + + var note = notes[0]; + string lyric = note.lyric.Normalize(); + + // Forced alias (? prefix) + if (lyric.Length > 0 && lyric[0] == '?') + return MakeSimpleResult(lyric.Substring(1)); + + // Extension note + if (lyric == "+" || lyric.StartsWith("+~") || lyric.StartsWith("+*")) + return MakeSimpleResult(lyric); + + // Rest / breath / tail + if (lyric == "R" || lyric == "-") + return MakeSimpleResult(lyric); + + // Look up mapping → use first scheme (index 0) + if (!mapping.TryGetValue(lyric, out var schemes) || schemes.Length == 0) { + // No mapping – pass through (with kana conversion attempt) + var fallback = ConvertToVoicebankAlias(lyric, note.tone); + return MakeSimpleResult(fallback); + } + + var scheme = schemes[0].Options; + int totalRatio = scheme.Sum(o => o.Ratio); + int totalDuration = notes.Sum(n => n.duration); + if (totalDuration <= 0) totalDuration = 480; + + // Compute overlap in ticks: 80 ms expressed in ticks at current tempo + double bpm = timeAxis.GetBpmAtTick(note.position); + double msPerTick = 60000.0 / (bpm * 480); + int overlapTicks = (int)(OverlapMs / msPerTick); + if (overlapTicks < 0) overlapTicks = 0; + + var phonemes = new List(); + int cumulativePos = 0; + + for (int i = 0; i < scheme.Length; i++) { + var opt = scheme[i]; + int phonemeDuration = totalDuration * opt.Ratio / totalRatio; + if (phonemeDuration <= 0) phonemeDuration = 1; + + string alias = ConvertToVoicebankAlias(opt.Romaji, note.tone); + + int position = cumulativePos; + // Non-first phonemes overlap with the previous one for continuity + if (i > 0) { + position -= overlapTicks; + } + + phonemes.Add(new Phoneme { + phoneme = alias, + position = position, + }); + + cumulativePos += phonemeDuration; + } + + // Fix: last phoneme should not extend beyond the total duration + // (earlier phonemes' overlap shifts may have caused position misalignment) + + return new Result { phonemes = phonemes.ToArray() }; + } + + // ── helpers ────────────────────────────────────────────────── + + /// + /// Detects the voicebank format once by probing for "あ" in the OTO. + /// If "あ" exists → hiragana mode; otherwise → romaji mode. + /// + private void DetectFormat() { + useKana = false; + if (singer == null || !singer.Found) return; + if (singer.TryGetMappedOto("あ", 60, out _)) + useKana = true; + } + + /// + /// Converts romaji to the voicebank's preferred format. + /// Hiragana mode: WanaKana.ToHiragana(). Romaji mode: pass through. + /// + private string ConvertToVoicebankAlias(string romaji, int tone) { + if (singer == null || !singer.Found) + return romaji; + if (useKana == null) + DetectFormat(); + if (useKana == true) { + try { + return WanaKana.ToHiragana(romaji); + } catch { } + } + return romaji; + } + + public override string ToString() => "[ZH to JA] Chinese to Japanese Phonemizer"; + } +} diff --git a/OpenUtau.Plugin.Builtin/ChineseToJapaneseVCVPhonemizer.cs b/OpenUtau.Plugin.Builtin/ChineseToJapaneseVCVPhonemizer.cs new file mode 100644 index 000000000..953aa5e38 --- /dev/null +++ b/OpenUtau.Plugin.Builtin/ChineseToJapaneseVCVPhonemizer.cs @@ -0,0 +1,253 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Text; +using OpenUtau.Api; +using OpenUtau.Core.Ustx; +using Serilog; +using WanaKanaNet; + +namespace OpenUtau.Plugin.Builtin { + /// + /// Cross-lingual phonemizer that converts Chinese pinyin lyrics to + /// Japanese VCV (renzokuon / continuous-sound) aliases. + /// + /// Unlike the CV version ("ZH to JA") which outputs standalone romaji, + /// this phonemizer links adjacent phonemes by prepending the previous + /// vowel, forming the characteristic VCV transition: + /// phrase start → "- tsu" / "- a" + /// between notes → "u shi" / "a n" + /// within a note → "u a" / "a o" + /// + /// This is designed for VCV (continuous) Japanese voicebanks. + /// + [Phonemizer("Chinese to Japanese VCV Phonemizer", "ZH to JA VCV", language: "ZH")] + public class ChineseToJapaneseVCVPhonemizer : Phonemizer { + + private USinger? singer; + private Dictionary mapping = null!; + private bool? useKana; // null=undetected, true=hiragana, false=romaji + + private readonly record struct WeightedOption(int Ratio, string Romaji); + private readonly record struct WeightedScheme(WeightedOption[] Options); + + private const double OverlapMs = 80; + private const string VcvPad = " "; // separator between prev-vowel and current romaji + + // ── ctor ───────────────────────────────────────────────────── + + public ChineseToJapaneseVCVPhonemizer() { + try { + LoadMapping(); + } catch (Exception e) { + Log.Error(e, "Failed to load pinyin mapping"); + mapping = new Dictionary(); + } + } + + // ── mapping loader (same source as ChineseToJapanesePhonemizer) + + private void LoadMapping() { + mapping = new Dictionary(); + var assembly = Assembly.GetExecutingAssembly(); + using var stream = assembly.GetManifestResourceStream( + "OpenUtau.Plugin.Builtin.Data.pinyin_zh_to_ja.txt"); + if (stream == null) { + Log.Error("Embedded resource pinyin_zh_to_ja.txt not found"); + return; + } + using var reader = new StreamReader(stream, Encoding.UTF8); + + string? line; + while ((line = reader.ReadLine()) != null) { + line = line.Trim(); + if (line.Length == 0 || line[0] == '#' || !line.Contains(';')) + continue; + var parts = line.Split(';', 2); + if (parts.Length != 2) continue; + string pinyin = parts[0].Trim(); + if (pinyin.Length == 0) continue; + + var schemeStrs = parts[1].Trim().Split('_'); + var schemes = new List(); + foreach (var schemeStr in schemeStrs) { + var tokens = schemeStr.Split(','); + var opts = new List(); + bool valid = true; + foreach (var token in tokens) { + var dot = token.IndexOf('.'); + if (dot <= 0) { valid = false; break; } + if (!int.TryParse(token.AsSpan(0, dot), out int ratio) || ratio <= 0) + { valid = false; break; } + string romaji = token.Substring(dot + 1).Trim(); + if (romaji.Length == 0) { valid = false; break; } + opts.Add(new WeightedOption(ratio, romaji)); + } + if (valid && opts.Count > 0) + schemes.Add(new WeightedScheme(opts.ToArray())); + } + if (schemes.Count > 0) + mapping[pinyin] = schemes.ToArray(); + } + } + + // ── Phonemizer API ─────────────────────────────────────────── + + public override void SetSinger(USinger singer) { + this.singer = singer; + useKana = null; // re-detect on next use + } + + public override Result Process(Note[] notes, Note? prev, Note? next, + Note? prevNeighbour, Note? nextNeighbour, Note[] prevs) { + + var note = notes[0]; + string lyric = note.lyric.Normalize(); + + // Forced alias + if (lyric.Length > 0 && lyric[0] == '?') + return MakeSimpleResult(lyric.Substring(1)); + + // Extension / rest / breath + if (lyric == "+" || lyric.StartsWith("+~") || lyric.StartsWith("+*")) + return MakeSimpleResult(lyric); + if (lyric == "R" || lyric == "-") + return MakeSimpleResult(lyric); + + // ── Look up the mapping (first scheme only) ────────────── + WeightedOption[] scheme; + if (!mapping.TryGetValue(lyric, out var schemes) || schemes.Length == 0) { + // No mapping – pass through with VCV prefix + var fallback = ConvertToVoicebankAlias(lyric, note.tone); + string prevV = GetLastVowelOfNote(prevNeighbour); + string alias = prevV != null + ? prevV + VcvPad + fallback + : "-" + VcvPad + fallback; + return MakeSimpleResult(alias); + } + + scheme = schemes[0].Options; + int totalRatio = scheme.Sum(o => o.Ratio); + int totalDuration = notes.Sum(n => n.duration); + if (totalDuration <= 0) totalDuration = 480; + + // Overlap in ticks + double bpm = timeAxis.GetBpmAtTick(note.position); + double msPerTick = 60000.0 / (bpm * 480); + int overlapTicks = (int)(OverlapMs / msPerTick); + if (overlapTicks < 0) overlapTicks = 0; + + // ── Determine the linking vowel ────────────────────────── + // For the FIRST sub-phoneme: use the previous note's last vowel, + // or "-" if this is the start of a phrase. + string? linkVowel = GetLastVowelOfNote(prevNeighbour); + + // ── Build phonemes ─────────────────────────────────────── + var phonemes = new List(); + int cumulativePos = 0; + + for (int i = 0; i < scheme.Length; i++) { + var opt = scheme[i]; + int phonemeDuration = totalDuration * opt.Ratio / totalRatio; + if (phonemeDuration <= 0) phonemeDuration = 1; + + string baseRomaji = opt.Romaji; + string alias = ConvertToVoicebankAlias(baseRomaji, note.tone); + + // Build VCV alias + if (i == 0) { + // First sub-phoneme → linked from previous note or phrase start + alias = linkVowel != null + ? linkVowel + VcvPad + alias + : "-" + VcvPad + alias; + } else { + // Subsequent sub-phonemes → linked from previous sub-phoneme + string prevVowel = ExtractVowel(scheme[i - 1].Romaji); + alias = prevVowel + VcvPad + alias; + } + + int position = cumulativePos; + if (i > 0) { + position -= overlapTicks; + } + + phonemes.Add(new Phoneme { + phoneme = alias, + position = position, + }); + + cumulativePos += phonemeDuration; + } + + return new Result { phonemes = phonemes.ToArray() }; + } + + // ── helpers ────────────────────────────────────────────────── + + /// + /// Re-computes the last sub-phoneme vowel of the previous note + /// by looking up its lyric in the mapping table. + /// Returns null if there is no previous note or the lookup fails. + /// + private string? GetLastVowelOfNote(Note? prevNote) { + if (prevNote == null) return null; + + string lyric = prevNote.Value.lyric.Normalize(); + if (string.IsNullOrEmpty(lyric) || lyric == "R" || lyric == "-") + return null; + if (lyric.Length > 0 && lyric[0] == '?') + lyric = lyric.Substring(1); + + if (!mapping.TryGetValue(lyric, out var schemes) || schemes.Length == 0) + return null; + + var opts = schemes[0].Options; + if (opts.Length == 0) return null; + + return ExtractVowel(opts[^1].Romaji); // last sub-phoneme's vowel + } + + /// + /// Extracts the vowel from a Japanese romaji syllable. + /// For CV syllables (ka, tsu, shi, kya) the vowel is the last character. + /// "n" is treated as a syllabic nasal. + /// + private static string ExtractVowel(string romaji) { + if (string.IsNullOrEmpty(romaji)) return "a"; + return romaji[^1].ToString(); + } + + /// + /// Detects the voicebank format once by probing for "o あ" in the OTO. + /// VCV kana banks use " " format (e.g. "o あ", "a か"). + /// If "o あ" exists → hiragana mode; otherwise → romaji mode. + /// + private void DetectFormat() { + useKana = false; + if (singer == null || !singer.Found) return; + if (singer.TryGetMappedOto("o あ", 60, out _)) + useKana = true; + } + + /// + /// Converts romaji to the voicebank's preferred format. + /// Hiragana mode: WanaKana.ToHiragana(). Romaji mode: pass through. + /// + private string ConvertToVoicebankAlias(string romaji, int tone) { + if (singer == null || !singer.Found) + return romaji; + if (useKana == null) + DetectFormat(); + if (useKana == true) { + try { + return WanaKana.ToHiragana(romaji); + } catch { } + } + return romaji; + } + + public override string ToString() => "[ZH to JA VCV] Chinese to Japanese VCV Phonemizer"; + } +} diff --git a/OpenUtau.Plugin.Builtin/Data/arpabet_to_pinyin_enhanced.txt b/OpenUtau.Plugin.Builtin/Data/arpabet_to_pinyin_enhanced.txt new file mode 100644 index 000000000..3d93cb9b1 --- /dev/null +++ b/OpenUtau.Plugin.Builtin/Data/arpabet_to_pinyin_enhanced.txt @@ -0,0 +1,72 @@ +# Enhanced ARPAbet → Chinese pinyin mapping table (two-dimensional) +# ── Consonant → Chinese initial (shengmu) ── +# Format: C:ARPABET=initial1|initial2|... +# Used as the onset of a syllable. Multiple options are ranked by preference. +C:B=b +C:CH=ch +C:D=d +C:DH=z|zh +C:F=f +C:G=g +C:HH=h +C:JH=j +C:K=k +C:L=l +C:M=m +C:N=n +C:NG=n +C:P=p +C:R=r +C:S=s +C:SH=sh +C:T=t +C:TH=s|c +C:V=w +C:W=w +C:Y=y +C:Z=z +C:ZH=zh + +# ── Vowel → Chinese final (yunmu) ── +# Format: V:ARPABET=final1|final2|... +# First option is the default. Multiple finals cover diphthong variations. +V:AA=a +V:AE=a|ai|ei +V:AH=a|e +V:AO=o|ao|ou +V:AW=ao +V:AY=ai|ei +V:EH=e|ei|ai|a +V:ER=e|er|a +V:EY=ei|e|ai +V:IH=i|ei +V:IY=i|ei +V:OW=ou|o|u +V:OY=o_i|ou_yi +V:UH=u|ou +V:UW=u|ou + +# ── Valid Chinese syllables (common subset used for validation) ── +# Syllables that don't validate are replaced with the phonetically closest valid one. +S:ba,bai,ban,bang,bao,bei,ben,beng,bi,bian,biao,bie,bin,bing,bo,bu +S:ca,cai,can,cang,cao,ce,cen,ceng,cha,chai,chan,chang,chao,che,chen,cheng,chi,chong,chou,chu,chuai,chuan,chuang,chui,chun,chuo,ci,cong,cou,cu,cuan,cui,cun,cuo +S:da,dai,dan,dang,dao,de,den,dei,deng,di,dian,diao,die,ding,diu,dong,dou,du,duan,dui,dun,duo +S:e,ei,en,eng,er +S:fa,fan,fang,fei,fen,feng,fo,fou,fu +S:ga,gai,gan,gang,gao,ge,gei,gen,geng,gong,gou,gu,gua,guai,guan,guang,gui,gun,guo +S:ha,hai,han,hang,hao,he,hei,hen,heng,hong,hou,hu,hua,huai,huan,huang,hui,hun,huo +S:ji,jia,jian,jiang,jiao,jie,jin,jing,jiong,jiu,ju,juan,jue,jun +S:ka,kai,kan,kang,kao,ke,kei,ken,keng,kong,kou,ku,kua,kuai,kuan,kuang,kui,kun,kuo +S:la,lai,lan,lang,lao,le,lei,leng,li,lia,lian,liang,liao,lie,lin,ling,liu,long,lou,lu,luan,lun,luo,lv,lve +S:ma,mai,man,mang,mao,me,mei,men,meng,mi,mian,miao,mie,min,ming,miu,mo,mou,mu +S:na,nai,nan,nang,nao,ne,nei,nen,neng,ni,nian,niang,niao,nie,nin,ning,niu,nong,nou,nu,nuan,nuo,nv,nve +S:o,ou +S:pa,pai,pan,pang,pao,pei,pen,peng,pi,pian,piao,pie,pin,ping,po,pou,pu +S:qi,qia,qian,qiang,qiao,qie,qin,qing,qiong,qiu,qu,quan,que,qun +S:ran,rang,rao,re,ren,reng,ri,rong,rou,ru,ruan,rui,run,ruo +S:sa,sai,san,sang,sao,se,sen,seng,sha,shai,shan,shang,shao,she,shei,shen,sheng,shi,shou,shu,shua,shuai,shuan,shuang,shui,shun,shuo,si,song,sou,su,suan,sui,sun,suo +S:ta,tai,tan,tang,tao,te,tei,teng,ti,tian,tiao,tie,ting,tong,tou,tu,tuan,tui,tun,tuo +S:wa,wai,wan,wang,wei,wen,weng,wo,wu +S:xi,xia,xian,xiang,xiao,xie,xin,xing,xiong,xiu,xu,xuan,xue,xun +S:ya,yan,yang,yao,ye,yi,yin,ying,yong,you,yu,yuan,yue,yun +S:za,zai,zan,zang,zao,ze,zei,zen,zeng,zha,zhai,zhan,zhang,zhao,zhe,zhei,zhen,zheng,zhi,zhong,zhou,zhu,zhua,zhuai,zhuan,zhuang,zhui,zhun,zhuo,zi,zong,zou,zu,zuan,zui,zun,zuo diff --git a/OpenUtau.Plugin.Builtin/Data/pinyin_zh_to_ja.txt b/OpenUtau.Plugin.Builtin/Data/pinyin_zh_to_ja.txt new file mode 100755 index 000000000..abdd5413c --- /dev/null +++ b/OpenUtau.Plugin.Builtin/Data/pinyin_zh_to_ja.txt @@ -0,0 +1,395 @@ +a;10.a +ai;7.a,3.i +an;7.a,3.n +ang;7.a,3.n +ao;3.a,7.o +ba;10.ba +bai;7.ba,3.i +ban;7.ba,3.n +bang;7.ba,3.n +bao;7.ba,3.o +bei;10.be +ben;7.be,3.n +beng;7.be,3.n +bi;10.bi +bian;7.bya,3.n +biao;7.bya,3.o +bie;10.bye +bin;7.bi,3.n +bing;7.bi,3.n +bo;10.bo +bu;10.bu +ca;10.cha +cai;7.cha,3.i +can;7.cha,3.n +cang;7.cha,3.n +cao;3.tsu,4.a,3.o_6.cha,4.o +ce;10.cha +cen;7.che,3.n +ceng;7.che,3.n +cha;10.cha +chai;7.cha,3.i +chan;7.cha,3.n +chang;7.cha,3.n +chao;6.cha,4.o +che;7.che,3.n +chen;7.che,3.n +cheng;7.che,3.n +chi;10.chi +chong;7.cho,3.n +chou;7.cho,3.u +chu;10.chu +chua;5.chu,5.a +chuai;7.chu,3.a +chuan;7.cha,3.n +chuang;7.chu,3.a +chui;7.chu,3.e +chun;7.chu,3.n +chuo;7.chu,3.o +ci;10.chi +cong;3.chu,7.o +cou;3.cho,7.u +cu;10.tsu +cuan;7.tsa,3.n +cui;3.tsu,7.i +cun;3.tsu,7.n +cuo;3.tsu,7.o +da;10.da +dai;7.da,3.i +dan;7.da,3.n +dang;7.da,3.n +dao;7.da,3.o +de;10.de +dei;10.de +deng;6.de,4.n +deng;6.de,4.n +di;10.di +dia;5.di,5.a +dian;7.de,3.n +diao;4.di,3.a,3.o +die;10.de +ding;7.di,3.n +diu;7.di,3.u +dong;7.do,3.n +dou;7.do,3.u +du;10.du +duan;4.du,3.a,3.n +dui;5.du,5.i +dun;7.du,3.n +duo;7.du,3.o +e;10.a +ei;10.e +en;7.a,3.n +eng;7.a,3.n +er;10.o +fa;10.fa +fan;7.fa,3.n +fang;7.fa,3.n +fei;10.fe +fen;7.fa,3.n +feng;7.fo,3.n +fo;10.fo +fou;7.fo,3.u +fu;10.fu +ga;10.ga +gai;7.ga,3.i +gan;7.ga,3.n +gang;7.ga,3.n +gao;7.ga,3.o +ge;10.ga +gei;10.ge +gen;7.go,3.n +geng;7.go,3.n +gong;7.go,3.n +gou;7.go,3.u +gu;10.gu +gua;3.gu,7.a +guai;3.gu,7.a,3.i +guan;3.gu,4.a,3.n +guang;3.gu,4.a,3.n +gui;3.gu,7.e +gun;7.gu,3.n +guo;3.gu,7.o +ha;10.ha +hai;7.ha,3.i +han;7.ha,3.n +hang;7.ha,3.n +hao;7.ha,3.o +he;10.ha +hei;10.he +hen;7.ha,3.n +heng;7.ha,3.n +hong;7.ho,3.n +hou;7.ho,3.u +hu;10.hu +hua;3.hu,7.a +huai;3.ha,7.i +huan;3.hu,4.a,3.n +huang;3.hu,4.a,3.n +hui;3.hu,7.e +hun;3.hu,7.n +huo;3.hu,7.o +i;10.i +ji;10.ji +jia;10.ja +jian;7.je,3.n +jiang;3.ji,4.a,3.n +jiao;7.ja,3.o +jie;10.je +jin;7.ji,3.n +jing;7.ji,3.n +jiong;7.jo,3.n +jiu;3.ji,7.o_10.ju +ju;10.ju +juan;3.ju,7.a +jue;3.ju,7.e +jun;3.ju,7.n +ka;10.ka +kai;7.ka,3.i +kan;7.ka,3.n +kang;7.ka,3.n +kao;7.ka,3.o +ke;10.ke_10.ko +kei;5.ke,5.i +ken;7.ke,3.n +keng;7.ke,3.n +kong;7.ko,3.n +kou;7.ko,3.u +ku;10.ku +kua;3.ku,7.a +kuai;4.ku,3.a,3.i +kuan;4.ku,3.a,3.n +kuang;3.ku,4.a,3.n +kui;7.ku,3.i +kun;7.ku,3.n +kuo;7.ku,3.o +la;10.ra +lai;7.ra,3.i +lan;7.ra,3.n +lang;7.ra,3.n +lao;7.ra,3.o +le;10.ra +lei;10.re +leng;7.ro,3.n +li;10.ri +lia;3.ri,7.a +lian;7.ra,3.n +liang;3.ri,4.a,3.n +liao;3.ri,4.a,3.o +lie;10.re +lin;7.ri,3.n +ling;7.ri,3.n +liu;10.ru +long;7.ro,3.n +lou;7.ro,3.u +lu;10.ru +lv;10.ru +luan;3.ru,4.a,3.n +lue;3.ru,7.e +lve;3.ru,7.e,3.a +luo;7.ru,3.o +ma;10.ma +mai;7.ma,3.i +man;7.ma,3.n +mang;7.ma,3.n +mao;7.ma,3.o +me;10.mo +mei;10.me +men;7.me,3.n +meng;7.mo,3.n +mi;10.mi +mian;3.mi,4.a,3.n +miao;7.ma,3.o +mie;10.me +min;7.mi,3.n +ming;7.mi,3.n +miu;3.mi,7.u +mo;10.mo +mou;7.mo,3.u +mu;10.mu +na;10.na +ni;10.ni +nai;7.na,3.e +nan;7.na,3.n +nang;7.na,3.n +nao;7.na,3.o +ne;10.na +nei;10.ne +nen;7.no,3.n +neng;7.no,3.n +nian;5.nya,5.n +niao;5.nya,5.o +nuan;3.nyu,3.a,4.n +o;10.o +ong;10.n +pa;10.pa +pai;5.pa,5.i_5.pa,5.e +pan;6.pa,4.n +pang;7.pa,3.n +pao;7.pa,3.o +pei;10.pe +pen;7.pa,3.n +peng;7.pa,3.n +pi;10.pi +pian;3.pi,4.a,3.n +piao;3.pi,4.a,3.o +pie;10.pe +pin;7.pi,3.n +ping;7.pi,3.n +po;10.po +pou;7.po,3.u +pu;10.pu +qi;10.chi +qia;10.kya +qian;7.kya,3.n +qiang;7.kya,3.n +qiao;7.kya,3.o +qie;10.kye +qin;7.chi,3.n +qing;7.chi,3.n +qiong;7.kyo,3.n +qiu;10.kyu +qu;10.kyu +quan;3.kya,7.n +que;10.kye +qun;3.kyu,7.n +ran;7.ra,3.n +rang;7.ra,3.n +rao;7.ra,3.o +re;10.ra +ren;7.re,3.n +reng;7.re,3.n +ri;10.ri +rong;7.ro,3.n +rou;7.ro,3.u +ru;10.ru +ruan;3.ru,4.a,3.n +rui;3.ru,7.i +run;3.ru,7.n +ruo;7.ru,3.o +sa;10.sa +sai;7.sa,3.i +san;7.sa,3.n +sang;7.sa,3.n +sao;7.sa,3.o +se;10.se +sen;7.se,3.n +seng;7.se,3.n +sha;10.sha +shai;7.sha,3.i +shan;7.sha,3.n +shang;7.sha,3.n +shao;7.sha,3.o +she;10.sho +shei;7.she,3.i +shen;7.she,3.n +sheng;7.she,3.n +shi;10.shi +shou;7.sho,3.u +shu;10.shu +shua;3.shu,7.a +shuai;3.shu,3.a,4.i +shuan;3.shu,4.a,3.n +shuang;3.shu,4.a,3.n +shui;3.shu,7.i +shun;3.shu,7.n +shuo;3.sha,7.o +si;10.si +song;7.so,3.n +sou;7.so,3.u +su;10.su +suan;4.su,3.a,3.n +sui;3.su,7.i +sun;3.su,7.n +suo;10.so +ta;10.ta +tai;7.ta,3.i +tan;7.ta,3.n +tang;7.ta,3.n +tao;7.ta,3.o +te;10.te_10.to +tei;5.te,5.i +teng;7.te,3.n_7.to,3.n +ti;10.ti +tian;3.ti,4.a,3.n +tiao;3.ti,4.a,3.o +tie;10.te +ting;7.ti,3.n +tong;7.to,3.n +tou;7.to,3.u +tu;10.tu +tuan;3.tu,4.a,3.n +tui;3.tu,7.e +tun;3.tu,7.n +tuo;3.tu,7.o +u;10.u +wa;10.wa +wai;7.wa,3.i +wan;7.wa,3.n +wang;7.wa,3.n +wei;10.we +wen;7.wo,3.n +weng;7.wo,3.n +wo;10.wo +wu;10.u +xi;10.si +xia;10.sha +xian;3.si,4.a,3.n_3.si,7.a +xiang;3.si,4.a,3.n +xiao;3.sa,7.o_3.si,3.a,4.o +xie;10.se +xin;3.si,7.n +xing;3.si,7.n +xiong;3.si,4.o,3.n +xiu;10.su +xu;10.su +xuan;3.su,7.a +xue;3.su,7.e_3.si,3.u,4.e +xun;3.su,7.n +ya;10.ya +yan;7.ya,3.n +yang;7.ya,3.n +yao;7.ya,3.o +ye;10.ye +yi;10.i +yin;7.i,3.n +ying;7.i,3.n +yong;7.yo,3.n +you;10.yo +yu;10.yu +yuan;3.yu,7.e +yue;3.yu,7.e +yun;3.yu,7.n +za;10.za +zai;7.za,3.i +zan;7.za,3.n +zang;7.za,3.n +zao;7.za,3.o +ze;10.ze +zei;7.ze,3.i +zen;7.ze,3.n +zeng;7.ze,3.n +zha;10.za +zhai;7.za,3.i +zhan;7.za,3.n +zhang;7.za,3.n +zhao;7.za,3.o +zhe;10.zo +zhen;7.ze,3.n +zheng;7.ze,3.n +zhi;10.zi +zhong;10.zo +zhou;7.zo,3.u +zhu;10.zu +zhua;3.zu,7.a +zhuai;3.zu,4.a,3.i +zhuan;3.zu,4.a,3.n +zhuang;3.zu,4.a,3.n +zhui;3.zu,7.e +zhun;3.zu,7.n +zhuo;3.zu,7.o +zi;10.ji +zong;7.zo,3.n +zou;7.zo,3.u +zu;10.zu +zuo;5.zu,5.wo \ No newline at end of file diff --git a/OpenUtau.Plugin.Builtin/EnglishToChinesePhonemizer.cs b/OpenUtau.Plugin.Builtin/EnglishToChinesePhonemizer.cs new file mode 100644 index 000000000..051e8164e --- /dev/null +++ b/OpenUtau.Plugin.Builtin/EnglishToChinesePhonemizer.cs @@ -0,0 +1,528 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Text; +using OpenUtau.Api; +using OpenUtau.Core.G2p; +using OpenUtau.Core.Ustx; +using Serilog; + +namespace OpenUtau.Plugin.Builtin { + /// + /// Cross-lingual phonemizer that converts English lyrics to Chinese pinyin + /// using a two-dimensional consonant×vowel mapping: + /// 1. CMUdict → ARPAbet phonemes + /// 2. Parse ARPAbet into (onset, nucleus) syllable pairs + /// 3. Consonant initial × vowel final → Chinese syllable + /// 4. Validate against known Chinese syllables; fall back gracefully + /// 5. Merge bare finals with preceding syllables + /// + /// Example: "hello" → [HH,AH, L,OW] → [ha, lou] + /// + [Phonemizer("English to Chinese Phonemizer", "EN to ZH", language: "EN")] + public class EnglishToChinesePhonemizer : Phonemizer { + + // ── mapping tables ──────────────────────────────────────────── + + private USinger? singer; + + /// ARPAbet consonant → Chinese initials (shengmu), ranked by preference. + private Dictionary consonantInitials = null!; + + /// ARPAbet vowel → Chinese finals (yunmu), ranked by preference. + private Dictionary vowelFinals = null!; + + /// Set of valid Chinese syllables for validation / fallback. + private HashSet validSyllables = null!; + + /// Cache: final part of a pinyin string. + private Dictionary pinyinFinalCache = new(); + + private ArpabetG2p? arpabetG2p; + + // ── ARPAbet vowel set ───────────────────────────────────────── + + private static readonly HashSet ArpabetVowelSet = new() { + "AA","AE","AH","AO","AW","AY", + "EH","ER","EY", + "IH","IY", + "OW","OY", + "UH","UW" + }; + + // ── Chinese initial consonants (shengmu), longest-match first ─ + private static readonly string[] ChineseInitials = { + "zh","ch","sh", + "b","p","m","f","d","t","n","l", + "g","k","h","j","q","x", + "r","z","c","s", + "y","w" + }; + + // ── ctor ───────────────────────────────────────────────────── + + public EnglishToChinesePhonemizer() { + try { + LoadMappingTables(); + arpabetG2p = new ArpabetG2p(); + } catch (Exception e) { + Log.Error(e, "Failed to initialize English→Chinese phonemizer"); + consonantInitials = new Dictionary(); + vowelFinals = new Dictionary(); + validSyllables = new HashSet(); + } + } + + // ── mapping loader ─────────────────────────────────────────── + + private void LoadMappingTables() { + consonantInitials = new Dictionary(); + vowelFinals = new Dictionary(); + validSyllables = new HashSet(); + + var assembly = Assembly.GetExecutingAssembly(); + using var stream = assembly.GetManifestResourceStream( + "OpenUtau.Plugin.Builtin.Data.arpabet_to_pinyin_enhanced.txt"); + if (stream == null) { + Log.Error("Embedded resource arpabet_to_pinyin_enhanced.txt not found"); + return; + } + using var reader = new StreamReader(stream, Encoding.UTF8); + + string? line; + while ((line = reader.ReadLine()) != null) { + line = line.Trim(); + if (line.Length == 0 || line[0] == '#') continue; + + if (line.StartsWith("C:") || line.StartsWith("V:")) { + // Format: C:ARPABET=initial1|initial2 or V:ARPABET=final1|final2 + int eq = line.IndexOf('='); + if (eq < 3) continue; // at least "C:X=" or "V:X=" + string key = line.Substring(2, eq - 2).Trim(); // between "C:" / "V:" and "=" + string body = line.Substring(eq + 1).Trim(); + var values = body.Split('|') + .Select(s => s.Trim()).Where(s => s.Length > 0).ToArray(); + if (values.Length == 0) continue; + + if (line[0] == 'C') + consonantInitials[key] = values; + else + vowelFinals[key] = values; + + } else if (line.StartsWith("S:")) { + // Format: S:syl1,syl2,... + string body = line.Substring(2).Trim(); + foreach (var syl in body.Split(',')) { + var s = syl.Trim(); + if (s.Length > 0) validSyllables.Add(s); + } + } + } + } + + // ── Phonemizer API ─────────────────────────────────────────── + + public override void SetSinger(USinger singer) { + this.singer = singer; + } + + public override Result Process(Note[] notes, Note? prev, Note? next, + Note? prevNeighbour, Note? nextNeighbour, Note[] prevs) { + + var note = notes[0]; + string lyric = note.lyric.Normalize(); + + // Forced alias + if (lyric.Length > 0 && lyric[0] == '?') + return MakeSimpleResult(lyric.Substring(1)); + + // Extension / rest / breath + if (lyric == "+" || lyric.StartsWith("+~") || lyric.StartsWith("+*")) + return MakeSimpleResult(lyric); + if (lyric == "R") return MakeSimpleResult("R"); + if (lyric == "-") return MakeSimpleResult("SP"); + + // Phonetic hint bypass + if (!string.IsNullOrEmpty(note.phoneticHint)) { + var hintPhonemes = note.phoneticHint.Split() + .Where(s => s.Length > 0) + .Select(s => TryMapPinyinToOto(s, note.tone)) + .ToArray(); + return DistributePhonemes(notes, hintPhonemes); + } + + // ── Stage 1: CMUdict → ARPAbet ───────────────────────── + string[]? arpa = arpabetG2p?.Query(lyric.ToLowerInvariant()) + ?? arpabetG2p?.Query(lyric); + + if (arpa == null || arpa.Length == 0) { + return DistributePhonemes(notes, new[] { + TryMapPinyinToOto(lyric.ToLowerInvariant(), note.tone) + }); + } + + // Normalise to uppercase + arpa = arpa.Select(p => p.ToUpperInvariant()).ToArray(); + + // ── Stage 2: syllable-based ARPAbet → Pinyin ──────────── + var rawPinyins = SyllableMap(arpa); + + // ── Stage 3a: merge bare vowels with same final ───────── + var merged = MergeByFinal(rawPinyins); + + // ── Stage 3b: merge overlapping finals (la+ai→lai) ────── + merged = MergeByFinalOverlap(merged, note.tone); + + // ── Stage 4: OTO lookup & distribute ──────────────────── + var mapped = merged.Select(p => TryMapPinyinToOto(p, note.tone)).ToArray(); + return DistributePhonemes(notes, mapped); + } + + // ── Stage 2: syllable-based mapping ───────────────────────── + + /// + /// Parses the ARPAbet sequence into (onset, nucleus) syllable pairs. + /// Each vowel is a syllable nucleus; consonants before it form + /// the onset; trailing consonants after the last vowel become + /// standalone coda syllables. + /// + /// For each pair: initial = consonant_mapping[onset] × vowel_mapping[nucleus] + /// The combination is validated against the known-Chinese-syllable set. + /// + private string[] SyllableMap(string[] arpa) { + // 1) Find vowel positions + var vowelIdx = new List(); + for (int i = 0; i < arpa.Length; i++) { + if (ArpabetVowelSet.Contains(arpa[i])) + vowelIdx.Add(i); + } + if (vowelIdx.Count == 0) { + // No vowels – map each consonant directly as a standalone + return arpa.Select(a => MapConsonantStandalone(a)).ToArray(); + } + + var result = new List(); + + // 2) First syllable: onset = everything before first vowel + int firstV = vowelIdx[0]; + if (firstV > 0) { + // onset consonants → Chinese initial + string initials = MapOnsetCluster(arpa.Take(firstV)); + string finals = MapNucleusVowel(arpa[firstV], initials); + string syllable = MakeSyllable(initials, finals); + result.Add(syllable); + } else { + // Syllable starts with a vowel (no onset consonant) + result.Add(MapNucleusVowel(arpa[0], "")); + } + + // 3) Remaining syllables + for (int vi = 1; vi < vowelIdx.Count; vi++) { + int prevV = vowelIdx[vi - 1]; + int thisV = vowelIdx[vi]; + int onsetStart = prevV + 1; + int onsetCount = thisV - onsetStart; + + if (onsetCount > 0) { + string initials = MapOnsetCluster(arpa.Skip(onsetStart).Take(onsetCount)); + string finals = MapNucleusVowel(arpa[thisV], initials); + result.Add(MakeSyllable(initials, finals)); + } else { + // Back-to-back vowels + result.Add(MapNucleusVowel(arpa[thisV], "")); + } + } + + // 4) Coda: consonants after the last vowel + // Nasal codas (NG, N, M) try to fold into the last syllable's final. + int lastV = vowelIdx[^1]; + if (lastV + 1 < arpa.Length) { + string codaArpa = arpa[lastV + 1]; + if (IsNasalCoda(codaArpa)) { + // Try to absorb the nasal into the preceding syllable + string? folded = FoldNasalIntoSyllable(result[^1], codaArpa); + if (folded != null) { + result[^1] = folded; + // Skip this coda; process remaining codas normally + for (int i = lastV + 2; i < arpa.Length; i++) + result.Add(MapConsonantStandalone(arpa[i])); + return result.ToArray(); + } + } + } + // Normal coda processing + for (int i = lastV + 1; i < arpa.Length; i++) { + result.Add(MapConsonantStandalone(arpa[i])); + } + + return result.ToArray(); + } + + // ── onset / nucleus mappers ────────────────────────────────── + + /// Maps a single onset consonant to its Chinese initial. + private string MapOnsetConsonant(string arpaConsonant) { + if (consonantInitials.TryGetValue(arpaConsonant, out var initials) && initials.Length > 0) + return initials[0]; + return arpaConsonant.ToLowerInvariant(); // fallback + } + + /// + /// Maps an onset consonant cluster to a Chinese initial. + /// For a single consonant, uses the consonant→initial map directly. + /// For clusters (e.g. "S T" in "stop"), takes the first consonant + /// as the primary initial (clusters don't exist in Chinese). + /// + private string MapOnsetCluster(IEnumerable consonants) { + var list = consonants.ToList(); + if (list.Count == 0) return ""; + // Use the primary (first) consonant of the cluster + return MapOnsetConsonant(list[0]); + } + + /// + /// Maps a nucleus vowel to a Chinese final. + /// When a preceding initial is known, prefers a final that + /// forms a valid syllable with it. + /// + private string MapNucleusVowel(string arpaVowel, string precedingInitial) { + if (!vowelFinals.TryGetValue(arpaVowel, out var finals) || finals.Length == 0) + return arpaVowel.ToLowerInvariant(); + + // If we have a preceding initial, try finals in order until + // one produces a valid Chinese syllable + if (!string.IsNullOrEmpty(precedingInitial)) { + foreach (var f in finals) { + if (IsValidSyllable(precedingInitial + f)) + return f; + } + } + // Fallback: return the first (default) final + return finals[0]; + } + + /// Maps a coda consonant to a standalone Chinese syllable. + private string MapConsonantStandalone(string arpaConsonant) { + if (consonantInitials.TryGetValue(arpaConsonant, out var initials) && initials.Length > 0) { + // Try attaching each common final until we get a valid syllable + foreach (var init in initials) { + foreach (var final in new[] { "e", "a", "u", "i", "o", "ou", "ei" }) { + string candidate = init + final; + if (IsValidSyllable(candidate)) + return candidate; + } + } + // Fallback: just the initial + "e" (most neutral) + return initials[0] + "e"; + } + return arpaConsonant.ToLowerInvariant(); + } + + /// Builds a Chinese syllable from initial + final, validated. + private string MakeSyllable(string initial, string final) { + string candidate = initial + final; + if (IsValidSyllable(candidate)) + return candidate; + + // If the direct combination is invalid, try the final alone + if (IsValidSyllable(final)) + return final; + + // Last resort + return candidate; + } + + private bool IsValidSyllable(string pinyin) { + return validSyllables.Contains(pinyin.ToLowerInvariant()); + } + + private static bool IsNasalCoda(string arpa) => arpa is "NG" or "N" or "M"; + + /// + /// Tries to fold a nasal coda into the preceding syllable by + /// appending the nasal to each possible final of the syllable + /// and checking whether the result is a valid Chinese syllable. + /// Returns the folded syllable or null. + /// + private string? FoldNasalIntoSyllable(string syllable, string nasalArpa) { + string nasalSuffix = nasalArpa switch { + "NG" => "ng", + "N" => "n", + "M" => "m", // "m" coda is rare in Chinese but can map to "n" + _ => "" + }; + if (string.IsNullOrEmpty(nasalSuffix)) return null; + + string init = GetInitial(syllable); + string final = GetFinal(syllable); + + // Try: final + nasal + string candidate = init + final + nasalSuffix; + if (IsValidSyllable(candidate)) + return candidate; + + // Try alternate nasals (e.g., N might fold better as "ng") + foreach (var altNasal in new[] { "n", "ng" }) { + if (altNasal == nasalSuffix) continue; + candidate = init + final + altNasal; + if (IsValidSyllable(candidate)) + return candidate; + } + + // Try: replace the final entirely with a known nasal-final + // e.g., AO→"o" + NG → "ong" is valid + candidate = init + "ong"; + if (nasalArpa == "NG" && IsValidSyllable(candidate)) + return candidate; + candidate = init + "an"; + if (nasalArpa == "N" && IsValidSyllable(candidate)) + return candidate; + candidate = init + "en"; + if (nasalArpa == "N" && IsValidSyllable(candidate)) + return candidate; + + return null; + } + + // ── Stage 3a: merge bare vowels with same final ────────────── + + /// + /// Absorbs consecutive bare-vowel entries into the preceding + /// syllable when they share the same final. + /// Example: [ha, a, lou, ou] → [ha, lou] + /// + private string[] MergeByFinal(string[] pinyins) { + if (pinyins.Length <= 1) return pinyins; + + var merged = new List { pinyins[0] }; + + for (int i = 1; i < pinyins.Length; i++) { + string prev = merged[merged.Count - 1]; + string curr = pinyins[i]; + + if (GetFinal(prev) == GetFinal(curr) && !HasChineseInitial(curr)) { + // curr is a bare vowel tail → absorbed by prev + } else { + merged.Add(curr); + } + } + + return merged.ToArray(); + } + + // ── Stage 3b: merge overlapping finals ────────────────────── + + /// + /// When prev-final matches the START of curr AND curr has no + /// initial consonant, fuse them: prev-initial + curr → one syllable. + /// Only fuses when the result exists in the singer's OTO. + /// Example: la + ai → lai + /// + private string[] MergeByFinalOverlap(string[] pinyins, int tone) { + if (pinyins.Length <= 1) return pinyins; + + var merged = new List(pinyins); + + for (int i = 1; i < merged.Count; i++) { + string prev = merged[i - 1]; + string curr = merged[i]; + string prevFinal = GetFinal(prev); + + if (!HasChineseInitial(curr) && HasChineseInitial(prev) + && prevFinal.Length > 0 && curr.StartsWith(prevFinal)) { + + string prevInit = GetInitial(prev); + string candidate = prevInit + curr; + + if (singer != null && singer.Found + && singer.TryGetMappedOto(candidate, tone, out _)) { + merged[i - 1] = candidate; + merged.RemoveAt(i); + i--; + } + } + } + + return merged.ToArray(); + } + + // ── Stage 4: distribute phonemes ───────────────────────────── + + private Result DistributePhonemes(Note[] notes, string[] syllables) { + int totalDuration = notes.Sum(n => n.duration); + if (totalDuration <= 0) totalDuration = 480; + if (syllables.Length == 0) + return MakeSimpleResult(""); + + int count = syllables.Length; + int baseLen = totalDuration / count; + int remainder = totalDuration % count; + + var phonemes = new Phoneme[count]; + int pos = 0; + for (int i = 0; i < count; i++) { + int dur = baseLen + (i < remainder ? 1 : 0); + phonemes[i] = new Phoneme { + phoneme = syllables[i], + position = pos, + }; + pos += dur; + } + + return new Result { phonemes = phonemes }; + } + + // ── helpers ────────────────────────────────────────────────── + + private string GetFinal(string pinyin) { + if (pinyinFinalCache.TryGetValue(pinyin, out var cached)) + return cached; + string final = ComputeFinal(pinyin); + pinyinFinalCache[pinyin] = final; + return final; + } + + private static string ComputeFinal(string pinyin) { + if (string.IsNullOrEmpty(pinyin)) return ""; + pinyin = pinyin.ToLowerInvariant(); + foreach (var init in ChineseInitials.OrderByDescending(i => i.Length)) { + if (pinyin.StartsWith(init) && pinyin.Length > init.Length) + return pinyin.Substring(init.Length); + } + return pinyin; + } + + private static bool HasChineseInitial(string pinyin) { + if (string.IsNullOrEmpty(pinyin)) return false; + pinyin = pinyin.ToLowerInvariant(); + foreach (var init in ChineseInitials.OrderByDescending(i => i.Length)) { + if (pinyin.StartsWith(init) && pinyin.Length > init.Length) + return true; + } + return false; + } + + private static string GetInitial(string pinyin) { + if (string.IsNullOrEmpty(pinyin)) return ""; + pinyin = pinyin.ToLowerInvariant(); + foreach (var init in ChineseInitials.OrderByDescending(i => i.Length)) { + if (pinyin.StartsWith(init) && pinyin.Length > init.Length) + return init; + } + return ""; + } + + private string TryMapPinyinToOto(string pinyin, int tone) { + if (singer == null || !singer.Found) return pinyin; + if (singer.TryGetMappedOto(pinyin, tone, out _)) return pinyin; + string stripped = ArpabetG2p.RemoveTailDigits(pinyin); + if (stripped != pinyin && singer.TryGetMappedOto(stripped, tone, out _)) + return stripped; + return pinyin; + } + + public override string ToString() => "[EN to ZH] English to Chinese Phonemizer"; + } +} diff --git "a/OpenUtau.Plugin.Builtin/JapaneseCVVCPhonemizer (\345\244\215\345\210\266 1).cs" "b/OpenUtau.Plugin.Builtin/JapaneseCVVCPhonemizer (\345\244\215\345\210\266 1).cs" new file mode 100644 index 000000000..3bdfd82d9 --- /dev/null +++ "b/OpenUtau.Plugin.Builtin/JapaneseCVVCPhonemizer (\345\244\215\345\210\266 1).cs" @@ -0,0 +1,294 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using OpenUtau.Api; +using OpenUtau.Core.Ustx; +using Serilog; + +namespace OpenUtau.Plugin.Builtin { + [Phonemizer("Japanese CVVC Phonemizer (legacy)", "JA CVVC", "TUBS",language:"JA")] + public class JapaneseCVVCPhonemizer : Phonemizer { + static readonly string[] plainVowels = new string[] {"あ","い","う","え","お","を","ん","ン"}; + static readonly string[] nonVowels = new string[]{"息","吸","R","-","k","ky","g","gy", + "s","sh","z","j","t","ch","ty","ts", + "d","dy","n","ny","h","hy","f","b", + "by","p","py","m","my","y","r","4", + "ry","w","v","ng","l","・","B", "H", + }; + + static readonly string[] vowels = new string[] { + "a=ぁ,あ,か,が,さ,ざ,た,だ,な,は,ば,ぱ,ま,ゃ,や,ら,わ,ァ,ア,カ,ガ,サ,ザ,タ,ダ,ナ,ハ,バ,パ,マ,ャ,ヤ,ラ,ワ", + "e=ぇ,え,け,げ,せ,ぜ,て,で,ね,へ,べ,ぺ,め,れ,ゑ,ェ,エ,ケ,ゲ,セ,ゼ,テ,デ,ネ,ヘ,ベ,ペ,メ,レ,ヱ", + "i=ぃ,い,き,ぎ,し,じ,ち,ぢ,に,ひ,び,ぴ,み,り,ゐ,ィ,イ,キ,ギ,シ,ジ,チ,ヂ,ニ,ヒ,ビ,ピ,ミ,リ,ヰ", + "o=ぉ,お,こ,ご,そ,ぞ,と,ど,の,ほ,ぼ,ぽ,も,ょ,よ,ろ,を,ォ,オ,コ,ゴ,ソ,ゾ,ト,ド,ノ,ホ,ボ,ポ,モ,ョ,ヨ,ロ,ヲ", + "n=ん", + "u=ぅ,う,く,ぐ,す,ず,つ,づ,ぬ,ふ,ぶ,ぷ,む,ゅ,ゆ,る,ゥ,ウ,ク,グ,ス,ズ,ツ,ヅ,ヌ,フ,ブ,プ,ム,ュ,ユ,ル,ヴ", + "N=ン", + "・=・", + }; + + static readonly string[] consonants = new string[] { + "ch=ち,ちぇ,ちゃ,ちゅ,ちょ", + "gy=ぎ,ぎぇ,ぎゃ,ぎゅ,ぎょ", + "ts=つ,つぁ,つぃ,つぇ,つぉ", + "ty=てぃ,てぇ,てゃ,てゅ,てょ", + "py=ぴ,ぴぇ,ぴゃ,ぴゅ,ぴょ", + "ry=り,りぇ,りゃ,りゅ,りょ", + "ly=リ,リェ,リャ,リュ,リョ", + "ny=に,にぇ,にゃ,にゅ,にょ", + "r=ら,る,るぃ,れ,ろ", + "hy=ひ,ひぇ,ひゃ,ひゅ,ひょ", + "dy=でぃ,でぇ,でゃ,でゅ,でょ", + "by=び,びぇ,びゃ,びゅ,びょ", + "b=ば,ぶ,ぶぃ,べ,ぼ", + "d=だ,で,ど,どぃ,どぅ", + "g=が,ぐ,ぐぃ,げ,ご", + "f=ふ,ふぁ,ふぃ,ふぇ,ふぉ", + "h=は,はぃ,へ,ほ,ほぅ", + "k=か,く,くぃ,け,こ", + "j=じ,じぇ,じゃ,じゅ,じょ,ぢ,ぢぇ,ぢゃ,ぢゅ,ぢょ", + "m=ま,む,むぃ,め,も", + "n=な,ぬ,ぬぃ,ね,の", + "p=ぱ,ぷ,ぷぃ,ぺ,ぽ", + "s=さ,す,すぃ,せ,そ", + "sh=し,しぇ,しゃ,しゅ,しょ", + "t=た,て,と,とぃ,とぅ", + "v=ヴ,ヴぁ,ヴぃ,ヴぅ,ヴぇ,ヴぉ", + "ky=き,きぇ,きゃ,きゅ,きょ", + "w=うぃ,うぅ,うぇ,うぉ,わ,ゐ,ゑ,を,ヰ,ヱ", + "y=いぃ,いぇ,や,ゆ,よ", + "z=ざ,ず,ずぃ,ぜ,ぞ", + "dz=づ,づぃ", + "my=み,みぇ,みゃ,みゅ,みょ", + "ng=ガ,ギ,グ,ゲ,ゴ,ギェ,ギャ,ギュ,ギョ,カ゜,キ゜,ク゜,ケ゜,コ゜,キ゜ェ,キ゜ャ,キ゜ュ,キ゜ョ", + "l=ラ,ル,レ,ロ", + "・=・あ,・い,・う,・え,・お,・ん,・を,・ン", + }; + + // in case voicebank is missing certain symbols + static readonly string[] substitution = new string[] { + "ty,ch,ts=t", "j,dy=d", "gy=g", "ky=k", "py=p", "ny=n", "ry=r", "my=m", "hy,f=h", "by,v=b", "dz=z", "l=r", "ly=l" + }; + + static readonly Dictionary vowelLookup; + static readonly Dictionary consonantLookup; + static readonly Dictionary substituteLookup; + + static JapaneseCVVCPhonemizer() { + vowelLookup = vowels.ToList() + .SelectMany(line => { + var parts = line.Split('='); + return parts[1].Split(',').Select(cv => (cv, parts[0])); + }) + .ToDictionary(t => t.Item1, t => t.Item2); + consonantLookup = consonants.ToList() + .SelectMany(line => { + var parts = line.Split('='); + return parts[1].Split(',').Select(cv => (cv, parts[0])); + }) + .ToDictionary(t => t.Item1, t => t.Item2); + substituteLookup = substitution.ToList() + .SelectMany(line => { + var parts = line.Split('='); + return parts[0].Split(',').Select(orig => (orig, parts[1])); + }) + .ToDictionary(t => t.Item1, t => t.Item2); + } + + // Store singer in field, will try reading presamp.ini later + private USinger singer; + public override void SetSinger(USinger singer) => this.singer = singer; + + // make it quicker to check multiple oto occurrences at once rather than spamming if else if + private bool checkOtoUntilHit(string[] input, Note note, out UOto oto) { + oto = default; + var attr = note.phonemeAttributes?.FirstOrDefault(attr => attr.index == 0) ?? default; + + var otos = new List(); + foreach (string test in input) { + if (singer.TryGetMappedOto(test + attr.alternate, note.tone + attr.toneShift, attr.voiceColor, out var otoAlt)) { + otos.Add(otoAlt); + } else if (singer.TryGetMappedOto(test, note.tone + attr.toneShift, attr.voiceColor, out var otoCandidacy)) { + otos.Add(otoCandidacy); + } + } + + string color = attr.voiceColor ?? ""; + if (otos.Count > 0) { + oto = otos.FirstOrDefault(oto => oto.IsColorMatch(color)); + if (oto == null) { + oto = otos.First(); + } + return true; + } + return false; + } + + // checking VCs + // when VC does not exist, it will not be inserted + private bool checkOtoUntilHitVc(string[] input, Note note, out UOto oto) { + oto = default; + var attr = note.phonemeAttributes?.FirstOrDefault(attr => attr.index == 1) ?? default; + + var otos = new List(); + foreach (string test in input) { + if (singer.TryGetMappedOto(test + attr.alternate, note.tone + attr.toneShift, attr.voiceColor, out var otoAlt)) { + otos.Add(otoAlt); + } else if (singer.TryGetMappedOto(test, note.tone + attr.toneShift, attr.voiceColor, out var otoCandidacy)) { + otos.Add(otoCandidacy); + } + } + + string color = attr.voiceColor ?? ""; + if (otos.Count > 0) { + oto = otos.FirstOrDefault(oto => oto.IsColorMatch(color)); + if (oto != null) { + return true; + } + } + return false; + } + + + // can probably be cleaned up more but i have work in the morning. have fun. + public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevNeighbours) { + var note = notes[0]; + var currentLyric = note.lyric.Normalize(); + if (!string.IsNullOrEmpty(note.phoneticHint)) { + currentLyric = note.phoneticHint.Normalize(); + } + var originalCurrentLyric = currentLyric; + var cfLyric = $"* {currentLyric}"; + var attr0 = note.phonemeAttributes?.FirstOrDefault(attr => attr.index == 0) ?? default; + var attr1 = note.phonemeAttributes?.FirstOrDefault(attr => attr.index == 1) ?? default; + + if (!string.IsNullOrEmpty(note.phoneticHint)) { + string[] tests = new string[] { currentLyric }; + // Not convert VCV + if (checkOtoUntilHit(tests, note, out var oto)) { + currentLyric = oto.Alias; + } + } else if (prevNeighbour == null) { + // Use "- V" or "- CV" if present in voicebank + var initial = $"- {currentLyric}"; + string[] tests = new string[] { initial, currentLyric }; + // try [- XX] before trying plain lyric + if (checkOtoUntilHit(tests, note, out var oto)) { + currentLyric = oto.Alias; + } + } else if (plainVowels.Contains(currentLyric) || nonVowels.Contains(currentLyric)) { + var prevLyric = prevNeighbour.Value.lyric.Normalize(); + if (!string.IsNullOrEmpty(prevNeighbour.Value.phoneticHint)) { + prevLyric = prevNeighbour.Value.phoneticHint.Normalize(); + } + // Current note is VV + if (vowelLookup.TryGetValue(prevLyric.LastOrDefault().ToString() ?? string.Empty, out var vow)) { + var vowLyric = $"{vow} {currentLyric}"; + // try vowlyric before cflyric, if both fail try currentlyric + string[] tests = new string[] {vowLyric, cfLyric, currentLyric}; + if (checkOtoUntilHit(tests, note, out var oto)){ + currentLyric = oto.Alias; + } + } + } else { + string[] tests = new string[] {cfLyric, currentLyric}; + if (checkOtoUntilHit(tests, note, out var oto)){ + currentLyric = oto.Alias; + } + } + + if (nextNeighbour != null && string.IsNullOrEmpty(nextNeighbour.Value.phoneticHint)) { + var nextLyric = nextNeighbour.Value.lyric.Normalize(); + + // Check if next note is a vowel and does not require VC + if (nextLyric.Length == 1 && plainVowels.Contains(nextLyric)) { + return new Result { + phonemes = new Phoneme[] { + new Phoneme() { + phoneme = currentLyric, + } + }, + }; + } + + // Insert VC before next neighbor + // Get vowel from current note + var vowel = ""; + if (vowelLookup.TryGetValue(originalCurrentLyric.LastOrDefault().ToString() ?? string.Empty, out var vow)) { + vowel = vow; + } + + // Get consonant from next note + var consonant = ""; + if (consonantLookup.TryGetValue(nextLyric.FirstOrDefault().ToString() ?? string.Empty, out var con) || (nextLyric.Length >= 2 && consonantLookup.TryGetValue(nextLyric.Substring(0, 2), out con))) { + consonant = con; + } + + + if (consonant == "") { + return new Result { + phonemes = new Phoneme[] { + new Phoneme() { + phoneme = currentLyric, + } + }, + }; + } + + var vcPhoneme = $"{vowel} {consonant}"; + var vcPhonemes = new string[] {vcPhoneme, ""}; + // find potential substitute symbol + if (substituteLookup.TryGetValue(consonant ?? string.Empty, out con)){ + vcPhonemes[1] = $"{vowel} {con}"; + } + //if (singer.TryGetMappedOto(vcPhoneme, note.tone + attr0.toneShift, attr0.voiceColor, out var oto1)) { + if (checkOtoUntilHitVc(vcPhonemes, note, out var oto1)) { + vcPhoneme = oto1.Alias; + } else { + return new Result { + phonemes = new Phoneme[] { + new Phoneme() { + phoneme = currentLyric, + } + }, + }; + } + + int totalDuration = notes.Sum(n => n.duration); + int vcLength = 120; + var nextAttr = nextNeighbour.Value.phonemeAttributes?.FirstOrDefault(attr => attr.index == 0) ?? default; + if (singer.TryGetMappedOto(nextLyric, nextNeighbour.Value.tone + nextAttr.toneShift, nextAttr.voiceColor, out var oto)) { + // If overlap is a negative value, vcLength is longer than Preutter + if (oto.Overlap < 0) { + vcLength = MsToTick(oto.Preutter - oto.Overlap); + } else { + vcLength = MsToTick(oto.Preutter); + } + } + // vcLength depends on the Vel of the next note + vcLength = Convert.ToInt32(Math.Min(totalDuration / 2, vcLength * (nextAttr.consonantStretchRatio ?? 1))); + + return new Result { + phonemes = new Phoneme[] { + new Phoneme() { + phoneme = currentLyric, + }, + new Phoneme() { + phoneme = vcPhoneme, + position = totalDuration - vcLength, + } + }, + }; + } + + // No next neighbor + return new Result { + phonemes = new Phoneme[] { + new Phoneme { + phoneme = currentLyric, + } + }, + }; + } + } +} diff --git a/OpenUtau.Plugin.Builtin/OpenUtau.Plugin.Builtin.csproj b/OpenUtau.Plugin.Builtin/OpenUtau.Plugin.Builtin.csproj index bf555d107..484d0938c 100644 --- a/OpenUtau.Plugin.Builtin/OpenUtau.Plugin.Builtin.csproj +++ b/OpenUtau.Plugin.Builtin/OpenUtau.Plugin.Builtin.csproj @@ -30,4 +30,9 @@ + + + + +