【问题标题】:Android speech recognition without a time-out?Android语音识别没有超时?
【发布时间】:2015-12-13 17:12:22
【问题描述】:

我正在使用 android 语音识别并编写了一些代码来识别口语。请看下面的代码。

import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;

import android.speech.RecognitionListener;
import android.speech.RecognizerIntent;
import android.speech.SpeechRecognizer;
import android.app.Activity;
import android.content.Intent;
import android.os.Bundle;
import android.speech.tts.TextToSpeech;
import android.text.Html;
import android.util.Log;
import android.view.View;
import android.widget.AdapterView;
import android.widget.ArrayAdapter;
import android.widget.CompoundButton;
import android.widget.CompoundButton.OnCheckedChangeListener;
import android.widget.ListView;
import android.widget.ProgressBar;
import android.widget.TextView;
import android.widget.Toast;
import android.widget.ToggleButton;

public class MainActivity extends Activity {

    private TextView returnedText;
    private ToggleButton toggleButton;
    private ProgressBar progressBar;
    private SpeechRecognizer speech = null;
    private Intent recognizerIntent;
    private ListView wordList;
    private String LOG_TAG = "VoiceRecognitionActivity";
    private List<String> previousInterim;
    private diff_match_patch diff;
    private String display = "test";

    private List<String>adapterList = new ArrayList<String>();
    ArrayAdapter<String>  adapter;

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);
        returnedText = (TextView) findViewById(R.id.textView1);
        progressBar = (ProgressBar) findViewById(R.id.progressBar1);
        toggleButton = (ToggleButton) findViewById(R.id.toggleButton1);
        wordList = (ListView)findViewById(R.id.word_list);

        adapter = new ArrayAdapter<String>(this,android.R.layout.simple_list_item_checked, adapterList);
        adapter.setNotifyOnChange(true);
        wordList.setAdapter(adapter);
        progressBar.setVisibility(View.INVISIBLE);

       // createRecog();
        speech = SpeechRecognizer.createSpeechRecognizer(getApplicationContext());
        toggleButton.setOnCheckedChangeListener(new ButtonListener());



    }

    private class ButtonListener implements OnCheckedChangeListener, Runnable
    {
        boolean isChecked;

        @Override
        public void onCheckedChanged(CompoundButton buttonView, boolean isChecked) {

            this.isChecked = isChecked;
            this.run();
        }

        @Override
        public void run() {

            if (isChecked) {
                speech.stopListening();
                speech.cancel();
                speech.destroy();
                createRecog();
                progressBar.setVisibility(View.VISIBLE);
                progressBar.setIndeterminate(true);
                speech.startListening(recognizerIntent);
                adapter.clear();

                returnedText.setText("");

            } else {
                progressBar.setIndeterminate(false);
                progressBar.setVisibility(View.INVISIBLE);
                speech.stopListening();

            }


        }
    }

    private class RecognitionListenerClass implements RecognitionListener
    {


        @Override
        public void onBeginningOfSpeech() {
            Log.i(LOG_TAG, "onBeginningOfSpeech");
            progressBar.setIndeterminate(false);
            progressBar.setMax(10);
            wordList.computeScroll();
        }

        @Override
        public void onBufferReceived(byte[] buffer) {
            Log.i(LOG_TAG, "onBufferReceived: " + buffer);
        }

        @Override
        public void onEndOfSpeech() {
            Log.i(LOG_TAG, "onEndOfSpeech");
            progressBar.setIndeterminate(true);
            toggleButton.setChecked(false);
        }

        @Override
        public void onError(int errorCode) {
            String errorMessage = getErrorText(errorCode);
            Log.d(LOG_TAG, "FAILED " + errorMessage);
            returnedText.setText(errorMessage);
            toggleButton.setChecked(false);


          //  speech = null;
            toggleButton.performClick();
        }

        @Override
        public void onEvent(int arg0, Bundle arg1) {
            Log.i(LOG_TAG, "onEvent");
        }

        @Override
        public void onPartialResults(Bundle arg0) {
            Log.i(LOG_TAG, "onPartialResults");
            final ArrayList<String> matches = arg0.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
            final float[] scores = arg0.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES);
            receiveWhatWasHeard(matches, scores);

        }

        @Override
        public void onReadyForSpeech(Bundle arg0) {
            Log.i(LOG_TAG, "onReadyForSpeech");
        }

        @Override
        public void onResults(Bundle results) {

        }

        @Override
        public void onRmsChanged(float rmsdB) {
            Log.i(LOG_TAG, "onRmsChanged: " + rmsdB);
            progressBar.setProgress((int) rmsdB);
        }
    }


    private void receiveWhatWasHeard(ArrayList<String> matches, float[] scores) {

        Log.i(LOG_TAG, matches.get(0));
    returnedText.setText(matches.get(0));        
    }

    private void createRecog()
    {
        speech = SpeechRecognizer.createSpeechRecognizer(this);
        speech.setRecognitionListener(new RecognitionListenerClass());
        recognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
        recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_PREFERENCE,
                "en");
        recognizerIntent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE,
                this.getPackageName());
        recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
                RecognizerIntent.LANGUAGE_MODEL_WEB_SEARCH);
        recognizerIntent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 3);
        recognizerIntent.putExtra("android.speech.extra.DICTATION_MODE", true);
        recognizerIntent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true);
    }

    @Override
    public void onResume() {
        super.onResume();
    }

    @Override
    protected void onPause() {
        super.onPause();
    }



    public  String getErrorText(int errorCode) {
        String message;
        switch (errorCode) {
            case SpeechRecognizer.ERROR_AUDIO:
                message = "Audio recording error";
                break;
            case SpeechRecognizer.ERROR_CLIENT:
                message = "Client side error";
                break;
            case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
                message = "Insufficient permissions";
                break;
            case SpeechRecognizer.ERROR_NETWORK:
                message = "Network error";
                break;
            case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
                message = "Network timeout";
                break;
            case SpeechRecognizer.ERROR_NO_MATCH:
                message = "No match";
                break;
            case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
                message = "RecognitionService busy";
                break;
            case SpeechRecognizer.ERROR_SERVER:
                message = "error from server";
                break;
            case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
                message = "No speech input";
                break;
            default:
                message = "Didn't understand, please try again.";
                break;
        }

        return message;
    }

}

但是我需要不断地运行它,而我一直没有这样做。在语音结束时重新开始识别可能是一个好主意,但它会产生很小的噪音,并且加载需要 1-3 秒,这意味着一些口语将会丢失。

是的,我知道 Google 已经提到他们的系统不是用于持续识别;但有几个人以前做过变通方法。不幸的是,这些变通方法现在似乎不起作用,因为它们是用相当旧的 API 编写的。如果旧版本可以实现,那么新版本当然更有可能。

那么,关于如何实现这一连续识别任务的任何想法?我正在使用 API 15。

【问题讨论】:

标签: java android speech-recognition voice-recognition


【解决方案1】:

阅读here 了解背景

this demo是连续的,使用pultz博客cmets流中提到的铬源

全双工 google api 工作正常 in android。 AFAIK,它仍然受到速率限制,使其在生产应用程序中无用。

google 全双工stdout sample 使用 curl cli

IBM watson 具有支持连续模式的全双工和生产就绪 API。您将不得不深入研究文档详细信息,但一般的 cli 示例是 here

在 watson api 上查看 continuous here

【讨论】:

  • 看起来不错。我也在考虑重新启动监听器并将“就绪”声音静音。会更新你
猜你喜欢
  • 2016-11-04
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 2021-10-20
  • 1970-01-01
  • 1970-01-01
相关资源
最近更新 更多