语音合成和语音识别

1、语音合成

　　　　预先准备工作：

　　　　安装所需的依赖包

　　　　1：pip install baidu-aip

　　　　2：百度云创建语音合成项目，https://ai.baidu.com/

 1 from aip import AipSpeech
 2 
 3 """ 你的 APPID AK SK """
 4 APP_ID = \'14940739\'
 5 API_KEY = \'xCnr5K8ESsmOVaA5bl5ot5QY\'
 6 SECRET_KEY = \'4wH7W92hPUp8V7ogY4BZzV2pcZ3nC8LH \'
 7 
 8 client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
 9 
10 
11 result  = client.synthesis(\'你好百度\', \'zh\', 1, {
12     \'vol\': 5,
13     \'per\':5
14 })
15 
16 # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
17 if not isinstance(result, dict):
18     with open(\'auido.mp3\', \'wb\') as f:
19         f.write(result)

代码实例

2、语音识别

　　预先准备工作：

　　1安装转码工具　　

　　1.FFmpeg:

　　链接：https://pan.baidu.com/s/1jonSAa_TG2XuaJEy3iTmHg

　　密码：w6hk

　　转码命令：ffmpeg -y -i audio.wav -acodec pcm_s16le -f s16le -ac 1 -ar 16000 audio.pcm

 1 from aip import AipSpeech
 2 import  os
 3 """ 你的 APPID AK SK """
 4 APP_ID = \'14940739\'
 5 API_KEY = \'xCnr5K8ESsmOVaA5bl5ot5QY\'
 6 SECRET_KEY = \'4wH7W92hPUp8V7ogY4BZzV2pcZ3nC8LH \'
 7 
 8 client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
 9 
10 # contemp=f"ffmpeg -y  -i audio.wav  -acodec pcm_s16le -f s16le -ac 1 -ar 16000 audio.pcm"
11 
12 # 读取文件
13 def get_file_content(filePath):
14     os.system(f"ffmpeg -y  -i {filePath}  -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
15     with open(f"{filePath}.pcm", \'rb\') as fp:
16         return fp.read()
17 
18 # 识别本地文件
19 res=client.asr(get_file_content(\'auido.mp3\'), \'pcm\', 16000, {
20     \'dev_pid\': 1536,
21 })
22 print(res.get("result")[0])

代码实例

3、语音对话

 1 from aip import AipSpeech
 2 import os
 3 """ 你的 APPID AK SK """
 4 APP_ID = \'14940739\'
 5 API_KEY = \'xCnr5K8ESsmOVaA5bl5ot5QY\'
 6 SECRET_KEY = \'4wH7W92hPUp8V7ogY4BZzV2pcZ3nC8LH \'
 7 
 8 client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
 9 
10 def get_file_content(filename):
11     # os.system(f"ffmpeg -y  -i {filename}  -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filename}.pcm")
12     # with open(f"{filename}.pcm", \'rb\') as fp:
13     with open("sd.pcm", \'rb\') as fp:
14         res = client.asr(fp.read(), \'pcm\', 16000, {
15             \'dev_pid\': 1536,
16         })
17         return  res.get("result")[0]
18 
19 def  synthesis(text):
20     result = client.synthesis(text, \'zh\', 1, {
21         "spd": 4,
22         \'vol\': 5,
23         "pit": 8,
24         "per": 4
25     })
26 
27     # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
28     if not isinstance(result, dict):
29         with open(\'auido.mp3\', \'wb\') as f:
30             f.write(result)
31     os.system("auido.mp3")
32 
33 text=get_file_content("auido.mp3")
34 if "傻屌" in text:
35     synthesis("我挺好的")
36 else:
37     synthesis(f"你刚才是不是说,{text}")

代码实例