【发布时间】:2018-07-02 15:19:21
【问题描述】:
我在我的 Qt C++ 应用程序中使用 Google 的 Speech-To-Text API。
Google's C++ documentation 有帮助,但在一定程度上。
在我下面的代码中,如果我取消注释
std::this_thread::sleep_for(std::chrono::seconds(1));
语音识别工作正常,但不正确 - 它跳过了一些单词。但是没有这条线,它根本不起作用。我认为这是因为 MicrophoneThreadMain() 的 while 循环与 start_speech_to_text() 的 while 循环相冲突。但我不确定。
我希望这两个函数同时运行,没有中断,也没有延迟。 我尝试使用 QThreads 和 Signal and Slots 但无法正常工作。
speech_to_text.cpp
#include "speechtotext.h"
using google::cloud::speech::v1::StreamingRecognitionConfig;
using google::cloud::speech::v1::RecognitionConfig;
using google::cloud::speech::v1::Speech;
using google::cloud::speech::v1::StreamingRecognizeRequest;
using google::cloud::speech::v1::StreamingRecognizeResponse;
SpeechToText::SpeechToText(QObject *parent) : QObject(parent)
{
}
void SpeechToText::initialize()
{
QAudioFormat qtFormat;
// Get default audio input device
QAudioDeviceInfo qtInfo = QAudioDeviceInfo::defaultInputDevice();
// Set the audio format settings
qtFormat.setCodec("audio/pcm");
qtFormat.setByteOrder(QAudioFormat::Endian::LittleEndian);
qtFormat.setChannelCount(1);
qtFormat.setSampleRate(16000);
qtFormat.setSampleSize(16);
qtFormat.setSampleType(QAudioFormat::SignedInt);
// Check whether the format is supported
if (!qtInfo.isFormatSupported(qtFormat)) {
qWarning() << "Default format is not supported";
exit(3);
}
// Instantiate QAudioInput with the settings
audioInput = new QAudioInput(qtFormat);
// Start receiving data from audio input
ioDevice = audioInput->start();
emit finished_initializing();
}
void SpeechToText::MicrophoneThreadMain(grpc::ClientReaderWriterInterface<StreamingRecognizeRequest,
StreamingRecognizeResponse> *streamer)
{
StreamingRecognizeRequest request;
std::size_t size_read;
while(true)
{
audioDataBuffer.append(ioDevice->readAll());
size_read = audioDataBuffer.size();
// And write the chunk to the stream.
request.set_audio_content(&audioDataBuffer.data()[0], size_read);
std::cout << "Sending " << size_read / 1024 << "k bytes." << std::endl;
streamer->Write(request);
//std::this_thread::sleep_for(std::chrono::seconds(1));
}
}
void SpeechToText::start_speech_to_text()
{
StreamingRecognizeRequest request;
auto *streaming_config = request.mutable_streaming_config();
RecognitionConfig *recognition_config = new RecognitionConfig();
recognition_config->set_language_code("en-US");
recognition_config->set_sample_rate_hertz(16000);
recognition_config->set_encoding(RecognitionConfig::LINEAR16);
streaming_config->set_allocated_config(recognition_config);
// Create a Speech Stub connected to the speech service.
auto creds = grpc::GoogleDefaultCredentials();
auto channel = grpc::CreateChannel("speech.googleapis.com", creds);
std::unique_ptr<Speech::Stub> speech(Speech::NewStub(channel));
// Begin a stream.
grpc::ClientContext context;
auto streamer = speech->StreamingRecognize(&context);
// Write the first request, containing the config only.
streaming_config->set_interim_results(true);
streamer->Write(request);
// The microphone thread writes the audio content.
std::thread microphone_thread(&SpeechToText::MicrophoneThreadMain, this, streamer.get());
// Read responses.
StreamingRecognizeResponse response;
while (streamer->Read(&response)) // Returns false when no more to read.
{
// Dump the transcript of all the results.
for (int r = 0; r < response.results_size(); ++r)
{
auto result = response.results(r);
std::cout << "Result stability: " << result.stability() << std::endl;
for (int a = 0; a < result.alternatives_size(); ++a)
{
auto alternative = result.alternatives(a);
std::cout << alternative.confidence() << "\t"
<< alternative.transcript() << std::endl;
}
}
}
grpc::Status status = streamer->Finish();
microphone_thread.join();
if (!status.ok()) {
// Report the RPC failure.
qDebug() << "error RPC";
std::cerr << status.error_message() << std::endl;
}
}
speech_to_text.h
#ifndef SPEECHTOTEXT_H
#define SPEECHTOTEXT_H
#include <QObject>
#include <QDebug>
#include <QThread>
#include <thread>
#include <chrono>
#include <fstream>
#include <iostream>
#include <iterator>
#include <string>
#include <functional>
#include <QtMultimedia>
#include <QtMultimedia/QAudioInput>
#include <QAudioDeviceInfo>
#include <QAudioFormat>
#include <QIODevice>
#include <QtConcurrent>
#include <QMutex>
#include <grpc++/grpc++.h>
#include "google/cloud/speech/v1/cloud_speech.grpc.pb.h"
using google::cloud::speech::v1::StreamingRecognitionConfig;
using google::cloud::speech::v1::RecognitionConfig;
using google::cloud::speech::v1::Speech;
using google::cloud::speech::v1::StreamingRecognizeRequest;
using google::cloud::speech::v1::StreamingRecognizeResponse;
class SpeechToText : public QObject
{
Q_OBJECT
public:
explicit SpeechToText(QObject *parent = nullptr);
signals:
void finished_initializing();
void finished_speech_to_text(QString);
public slots:
void initialize();
void start_speech_to_text();
private:
void MicrophoneThreadMain(grpc::ClientReaderWriterInterface<StreamingRecognizeRequest,
StreamingRecognizeResponse> *);
QAudioInput *audioInput;
QIODevice *ioDevice;
QByteArray audioDataBuffer;
};
#endif // SPEECHTOTEXT_H
你知道如何解决这个问题吗?
【问题讨论】:
-
另外,您提供的链接中的谷歌代码在写入之后调用
WritesDone,当没有什么可写的时候。你确定你总是在写东西,即 size_read 永远不会 0 吗? -
如果您丢失了一些单词,可能是因为音频缓冲区太小:也许使用带有“足够大缓冲区”的 setBuffer 会有所帮助,例如:forum.qt.io/topic/71129/voip-qtcpsoket-audio-streaming/5
-
@iMajuscule 感谢您的信息。 GDPR 的答案似乎不那么骇人听闻,所以我会先研究一下。
标签: c++ multithreading qt speech-recognition google-speech-api