【发布时间】:2019-10-14 10:38:36
【问题描述】:
我刚接触语音识别话题
我正在开发一个项目,在该项目中我可以在工作区中找到 Pepper 机器人的一些用例。与 Pepper 一起玩,我们发现了它的语音识别能力的一些问题。我发现我们可以尝试将它与外部引擎连接起来,我从 GCP 中选择了 DialogFlow。 也因为我发现了一些与该服务的集成。 我使用了来自project的代码
import traceback
from naoqi import qi
# [START dialogflow_detect_intent_streaming]
def detect_intent_stream(project_id, session_id, audio_file_path,
language_code,ip):
"""Returns the result of detect intent with streaming audio as input.
Using the same `session_id` between requests allows continuation
of the conversation."""
import dialogflow_v2 as dialogflow
session_client = dialogflow.SessionsClient()
# Note: hard coding audio_encoding and sample_rate_hertz for simplicity.
audio_encoding = dialogflow.enums.AudioEncoding.AUDIO_ENCODING_LINEAR_16
sample_rate_hertz = 44100
session_path = session_client.session_path(project_id, session_id)
print('Session path: {}\n'.format(session_path))
def request_generator(audio_config, audio_file_path):
query_input = dialogflow.types.QueryInput(audio_config=audio_config)
# The first request contains the configuration.
yield dialogflow.types.StreamingDetectIntentRequest(
session=session_path, query_input=query_input)
# Here we are reading small chunks of audio data from a local
# audio file. In practice these chunks should come from
# an audio input device.
try:
with open(audio_file_path, 'rb') as audio_file:
while True:
chunk = audio_file.read(4096)
print(chunk)
if not chunk:
break
except:
traceback.print_exc()
# The later requests contains audio data.
yield dialogflow.types.StreamingDetectIntentRequest(input_audio=chunk)
audio_config = dialogflow.types.InputAudioConfig(audio_encoding=audio_encoding, language_code=language_code, sample_rate_hertz=sample_rate_hertz)
requests = request_generator(audio_config, audio_file_path)
responses = session_client.streaming_detect_intent(requests)
print('=' * 20)
for response in responses:
print('Intermediate transcript: "{}".'.format(response.recognition_result.transcript))
# Note: The result from the last response is the final transcript along
# with the detected content.
query_result = response.query_result
session = qi.Session()
try:
tts = session.service("ALTextToSpeech")
tts.say(query_result.fulfillment_text)
except:
traceback.print_exc()
raise Exception("session.connect failed.")
finally:
session.close()
print('=' * 20)
print('Query text: {}'.format(query_result.query_text))
print('Detected intent: {} (confidence: {})\n'.format(
query_result.intent.display_name,
query_result.intent_detection_confidence))
print('Fulfillment text: {}\n'.format(
query_result.fulfillment_text))
# [END dialogflow_detect_intent_streaming]
已编辑:(添加pepper_recorder.py 代码) 下面的代码从 Pepper 获取任何声音,并仅以期望峰值发送到对话流检测_intent_stream
import StringIO
from Queue import Queue
from naoqi import ALModule, ALProxy
import numpy as np
import time
import logging
import uuid
import traceback
from detect_intent_stream import detect_intent_stream
LISTEN_RETRIES = 10
DIALOG_FLOW_GCP_PROJECT_ID = "XXXXXXXXXXXXXXXXXXXX"
class SoundProcessingModule(ALModule):
def __init__( self, name, ip, stop_recognition):
try:
ALModule.__init__( self, name );
except Exception as e:
logging.error(str(e))
pass
print("connected")
self.ip = ip
self.BIND_PYTHON( name, "processRemote")
self.ALAudioDevice = ALProxy("ALAudioDevice", self.ip, 9559)
self.framesCount=0
self.count = LISTEN_RETRIES
self.recordingInProgress = False
self.stopRecognition = stop_recognition
self.uuid = uuid.uuid4()
self.previous_sound_data = None
def startProcessing(self):
"""init sound processing, set microphone and stream rate"""
print("startProcessing")
self.ALAudioDevice.setClientPreferences(self.getName(), 16000, 4, 0)
self.ALAudioDevice.subscribe(self.getName())
while not self.stopRecognition.is_set():
time.sleep(1)
self.ALAudioDevice.unsubscribe(self.getName())
def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer):
"""audio stream callback method with simple silence detection"""
self.framesCount = self.framesCount + 1
sound_data_interlaced = np.fromstring(str(inputBuffer), dtype=np.int16)
sound_data = np.reshape(sound_data_interlaced, (nbOfChannels, nbOfSamplesByChannel), 'F')
peak_value = np.max(sound_data)
# detect sound
if peak_value > 10000:
print("Peak:", peak_value)
self.count = LISTEN_RETRIES
if not self.recordingInProgress:
self.startRecording(self.previous_sound_data)
# if there is no sound for a few seconds we end the current recording and start audio processing
if self.count <= 0 and self.recordingInProgress:
self.stopRecording()
# if recording is in progress we save the sound to an in-memory file
if self.recordingInProgress:
self.count -= 1
self.previous_data = sound_data
self.procssingQueue.put(sound_data[0].tostring())
self.outfile.write(sound_data[0].tostring())
def startRecording(self, previous_sound_data):
"""init a in memory file object and save the last raw sound buffer to it."""
self.outfile = StringIO.StringIO()
self.procssingQueue = Queue()
self.recordingInProgress = True
if not previous_sound_data is None:
self.procssingQueue.put(previous_sound_data[0].tostring())
self.outfile.write(previous_sound_data[0].tostring())
print("start recording")
def stopRecording(self):
"""saves the recording to memory"""
print("stopped recording")
self.previous_sound_data = None
self.outfile.seek(0)
try:
detect_intent_stream(DIALOG_FLOW_GCP_PROJECT_ID, self.uuid,
self.outfile, "en-US", self.ip)
except:
traceback.print_exc()
self.recordingInProgress = False
我已连接到机器人,它正在监听,但每次它开始录制时,我都会在控制台中看到类似的内容
('峰值:',14023)
开始录制
停止录制
会话路径:XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
=====================
然后出错...
Traceback(最近一次调用最后一次):
> File
> "C:\Users\marwloda\PycharmProjects\Pepper\scripts\pepper_recorder.py",
> line 83, in stopRecording self.outfile, "en-US", self.ip) File
> "C:\Users\marwloda\PycharmProjects\Pepper\scripts\detect_intent_stream.py",
> line 76, in detect_intent_stream for response in responses: File
> "C:\Users\marwloda\PycharmProjects\Pepper\venv\lib\site-packages\google\api_core\grpc_helpers.py",
> line 81, in next
> six.raise_from(exceptions.from_grpc_error(exc), exc) File "C:\Users\marwloda\PycharmProjects\Pepper\venv\lib\site-packages\six.py",
> line 737, in raise_from
> raise value Unknown: None Exception iterating requests!
如果从机器人录制的音频文件为空。 但是我已经打印了一些语音记录数据并产生了一些原始的不可读的字符串
为了证明我与 API 有某种联系 - 当我打开 GCP 控制台 API 和服务时,我看到了 view
什么可能导致此错误? 我应该从哪里寻找原因?
【问题讨论】:
-
嗨,Marek 请同时提供pepper_recorder.py,以便我们尝试重现您的问题
标签: python speech-recognition dialogflow-es grpc pepper