【问题标题】:How to get pronunciation phonemes corresponding to a word using C#?如何使用 C# 获取与单词对应的发音音素?
【发布时间】:2018-09-06 05:58:11
【问题描述】:

我先说我是 C# 编程的新手。我正在开发一个使用 C# 结合 SAPI v5.4 (speechlib) 以编程方式修改 Windows 语音词典的应用程序。到目前为止一切都运行良好,但我需要更深入地了解字符串在合成(发声)时是如何解释的。

我的理解是,在 SAPI 5.4 中,单词被分解为 phoneme representations,并且我在使用音素正确“训练”单词发音方面取得了一些成功。我还知道我可以手动将单词添加到 Windows 语音识别词典中,提供录音,然后提取单词的发音(音素)……但这很麻烦。探索默认情况下如何合成单词也很有用,即在没有我输入的情况下(例如合成器如何解释“海豚”?)。

从编码的角度来看,这是我目前所得到的:

using System;
using System.Speech.Synthesis;

namespace SpeechTest
{
    class Program
    {
        static void Main(string[] args)
        {
            // Set up the speech synthesizer
            SpeechSynthesizer synthesizer = new SpeechSynthesizer();
            synthesizer.Volume = 100;
            synthesizer.Rate = -2;

            // Configure the audio output 
            synthesizer.SetOutputToDefaultAudioDevice();

            // Initialize string to store word of interest (not in the speech dictionary)
            string myWord = "dolphins";

            // Speak the word of interest
            synthesizer.Speak(myWord);

            // Retrieve pronunciation of myWord
            string myPronunciation = // *some code here*

            Console.WriteLine("Press any key to exit...");
            Console.ReadLine();
        }
    }
}

【问题讨论】:

    标签: c# text-to-speech microsoft-speech-api


    【解决方案1】:

    感谢Casey Chesnut 的出色工作,我已经弄清楚如何确定给定字符串的 IPA 音素。现在我只需要弄清楚如何从 IPA 电话转换为 SAPI 符号,但这是一个单独的主题(请参阅 here 了解如何从文本字符串中获取 SAPI 音素)。

    using System;
    using System.Collections.ObjectModel;
    using System.ComponentModel;
    using System.IO;
    using System.Speech.Recognition;
    using System.Speech.Synthesis;
    using System.Windows.Forms;
    
    namespace SpeechTest
    {
        class Program
        {
            static void Main(string[] args)
            {
                string MyText = "dolphins"; // Initialze string for storing word (or words) of interest
                string MyPronunciation = GetPronunciationFromText(MyText.Trim()); // Get IPA pronunciations of MyTe
                MessageBox.Show(MyText + " = " + MyPronunciation); // Output MyText and MyPronunciation
            }
    
            public static string recoPhonemes;
    
            public static string GetPronunciationFromText(string MyWord)
            {
                //this is a trick to figure out phonemes used by synthesis engine
    
                //txt to wav
                using (MemoryStream audioStream = new MemoryStream())
                {
                    using (SpeechSynthesizer synth = new SpeechSynthesizer())
                    {
                        synth.SetOutputToWaveStream(audioStream);
                        PromptBuilder pb = new PromptBuilder();
                        //pb.AppendBreak(PromptBreak.ExtraSmall); //'e' wont be recognized if this is large, or non-existent?
                        //synth.Speak(pb);
                        synth.Speak(MyWord);
                        //synth.Speak(pb);
                        synth.SetOutputToNull();
                        audioStream.Position = 0;
    
                        //now wav to txt (for reco phonemes)
                        recoPhonemes = String.Empty;
                        GrammarBuilder gb = new GrammarBuilder(MyWord);
                        Grammar g = new Grammar(gb); //TODO the hard letters to recognize are 'g' and 'e'
                        SpeechRecognitionEngine reco = new SpeechRecognitionEngine();
                        reco.SpeechHypothesized += new EventHandler<SpeechHypothesizedEventArgs>(reco_SpeechHypothesized);
                        reco.SpeechRecognitionRejected += new EventHandler<SpeechRecognitionRejectedEventArgs>(reco_SpeechRecognitionRejected);
                        reco.UnloadAllGrammars(); //only use the one word grammar
                        reco.LoadGrammar(g);
                        reco.SetInputToWaveStream(audioStream);
                        RecognitionResult rr = reco.Recognize();
                        reco.SetInputToNull();
                        if (rr != null)
                        {
                            recoPhonemes = StringFromWordArray(rr.Words, WordType.Pronunciation);
                        }
                        //txtRecoPho.Text = recoPhonemes;
                        return recoPhonemes;
                    }
                }
            }
    
            public static string StringFromWordArray(ReadOnlyCollection<RecognizedWordUnit> words, WordType type)
            {
                string text = "";
                foreach (RecognizedWordUnit word in words)
                {
                    string wordText = "";
                    if (type == WordType.Text || type == WordType.Normalized)
                    {
                        wordText = word.Text;
                    }
                    else if (type == WordType.Lexical)
                    {
                        wordText = word.LexicalForm;
                    }
                    else if (type == WordType.Pronunciation)
                    {
                        wordText = word.Pronunciation;
                        //MessageBox.Show(word.LexicalForm);
                    }
                    else
                    {
                        throw new InvalidEnumArgumentException(String.Format("[0}: is not a valid input", type));
                    }
                    //Use display attribute
    
                    if ((word.DisplayAttributes & DisplayAttributes.OneTrailingSpace) != 0)
                    {
                        wordText += " ";
                    }
                    if ((word.DisplayAttributes & DisplayAttributes.TwoTrailingSpaces) != 0)
                    {
                        wordText += "  ";
                    }
                    if ((word.DisplayAttributes & DisplayAttributes.ConsumeLeadingSpaces) != 0)
                    {
                        wordText = wordText.TrimStart();
                    }
                    if ((word.DisplayAttributes & DisplayAttributes.ZeroTrailingSpaces) != 0)
                    {
                        wordText = wordText.TrimEnd();
                    }
    
                    text += wordText;
    
                }
                return text;
            }
    
            public static void reco_SpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
            {
                recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);
            }
    
            public static void reco_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
            {
                recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);
            }
    
        }
    
        public enum WordType
        {
            Text,
            Normalized = Text,
            Lexical,
            Pronunciation
        }
    }
    
    // Credit for method of retrieving IPA pronunciation from a string goes to Casey Chesnut (http://www.mperfect.net/speechSamples/)
    

    【讨论】:

      猜你喜欢
      • 2014-07-28
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2019-03-15
      相关资源
      最近更新 更多