如何使用 opus 对音频数据进行编码和解码？答案

【问题标题】：How to encode and decode audio data with opus?如何使用 opus 对音频数据进行编码和解码？
【发布时间】：2019-01-09 08:12:23
【问题描述】：

我正在处理语音聊天，我需要压缩我的音频数据。我通过 Qt 框架录制和播放音频数据。如果我在不压缩的情况下录制和播放音频数据，一切都很好。如果我压缩、解压缩和播放音频数据，我只会听到破裂的声音。

编辑：我查看了演示代码并尝试使用该代码。我能听到一些声音，但它非常滞后。如果我将 pcm_bytes 的大小增加到例如 40000，听起来会更好，但我的声音仍然有滞后和破裂的声音。

这是行（底部的audioinput.cpp）：

speaker->write((const char*)pcm_bytes,3840);

codecopus.cpp：

#include "codecopus.h"

CodecOpus::CodecOpus()
{

}

void CodecOpus::initDecoder(opus_int32 samplingRate, int channels) //decoder
{
    int error;
    decoderState = opus_decoder_create(samplingRate,channels,&error);
    if(error == OPUS_OK){
        std::cout << "Created Opus Decoder struct" << std::endl;
    }

}

void CodecOpus::initEncoder(opus_int32 samplingRate, int channels) // Encoder
{
    int error;
    encoderState = opus_encoder_create(samplingRate,channels,OPUS_APPLICATION_VOIP,&error);
    error = opus_encoder_ctl(encoderState,OPUS_SET_BITRATE(64000));
    if(error == OPUS_OK){
        std::cout << "Created Opus Encoder struct" << std::endl;
    }
}

opus_int32 CodecOpus::encodeData(const opus_int16 *pcm, int frameSize, unsigned char *data, opus_int32 maxDataBytes) //Encoder
{
    opus_int32 i = opus_encode(encoderState,pcm,frameSize,data,maxDataBytes);
    return i;
}

int CodecOpus::decodeData(const unsigned char *data, opus_int32 numberOfBytes,opus_int16* pcm,int frameSizeInSec) //Decoder
{

    int i = opus_decode(decoderState,data,numberOfBytes,pcm,frameSizeInSec,0);

    return i;
}

CodecOpus::~CodecOpus()
{
    opus_decoder_destroy(this->decoderState);
    opus_encoder_destroy(this->encoderState);

}

音频输入.h：

#ifndef AUDIOINPUT_H
#define AUDIOINPUT_H
#include <QAudioFormat>
#include <iostream>
#include <QAudioInput>
#include <QAudioOutput>
#include <thread>
#include "codecopus.h"
#include "QDebug"
class AudioInput : public QObject
{
    Q_OBJECT

public:
    AudioInput();
    ~AudioInput();
    void startRecording();
    void CreateNewAudioThread();

private:

    CodecOpus opus;
    unsigned char cbits[4000] = {};
    opus_int16 in[960*2*sizeof(opus_int16)] = {};
    opus_int16 out[5760*2] = {};

    unsigned char *pcm_bytes;

    int MAX_FRAME_SIZE;

    QAudioFormat audioFormat;
    QAudioInput *audioInput;
    QIODevice *mic;
    QByteArray data;
    int micFrameSize;


    QAudioOutput *audioOutput;
    QIODevice *speaker;
    QAudioFormat speakerAudioFormat;




public slots:
    void OnAudioNotfiy();
};

#endif // AUDIOINPUT_H

音频输入.cpp：

#include "audioinput.h"

AudioInput::AudioInput() : audioFormat(),pcm_bytes(new unsigned char[40000])
{
    audioFormat.setSampleRate(48000);
    audioFormat.setChannelCount(2);
    audioFormat.setSampleSize(16);
    audioFormat.setSampleType(QAudioFormat::SignedInt);
    audioFormat.setByteOrder(QAudioFormat::LittleEndian);
    audioFormat.setCodec("audio/pcm");


    speakerAudioFormat.setSampleRate(48000);
    speakerAudioFormat.setChannelCount(2);
    speakerAudioFormat.setSampleSize(16);
    speakerAudioFormat.setSampleType(QAudioFormat::SignedInt);
    speakerAudioFormat.setByteOrder(QAudioFormat::LittleEndian);
    speakerAudioFormat.setCodec("audio/pcm");

    QAudioDeviceInfo info = QAudioDeviceInfo::defaultInputDevice();
    if(!info.isFormatSupported(audioFormat)){
        std::cout << "Mic Format not supported!" << std::endl;
        audioFormat = info.nearestFormat(audioFormat);
    }
    QAudioDeviceInfo speakerInfo = QAudioDeviceInfo::defaultOutputDevice();
    if(!speakerInfo.isFormatSupported(speakerAudioFormat)){
        std::cout << "Speaker Format is not supported!" << std::endl;
        speakerAudioFormat = info.nearestFormat(speakerAudioFormat);

    }
    std::cout << speakerAudioFormat.sampleRate() << audioFormat.sampleRate() << speakerAudioFormat.channelCount() << audioFormat.channelCount() << std::endl;
    audioInput = new QAudioInput(audioFormat);
    audioOutput = new QAudioOutput(speakerAudioFormat);
    audioInput->setNotifyInterval(20);
    micFrameSize = (audioFormat.sampleRate()/1000)*20;

    opus.initEncoder(audioFormat.sampleRate(),audioFormat.channelCount());
    opus.initDecoder(speakerAudioFormat.sampleRate(),speakerAudioFormat.channelCount());

    MAX_FRAME_SIZE = 6*960;



    connect(audioInput,SIGNAL(notify()),this,SLOT(OnAudioNotfiy()));
}

AudioInput::~AudioInput()
{

}

void AudioInput::startRecording()
{

    mic = audioInput->start();
    speaker = audioOutput->start();
    std::cout << "Recording started!" << std::endl;


}


void AudioInput::CreateNewAudioThread()
{
    std::thread t1(&AudioInput::startRecording,this);
    t1.detach();
}





void AudioInput::OnAudioNotfiy()
{
    data = mic->readAll();


    std::cout << "data size" <<data.size() << std::endl;
    if(data.size() > 0){
    pcm_bytes = reinterpret_cast<unsigned char*>(data.data());

//convert

    for(int i=0;i<2*960;i++){ //TODO HARDCODED
        in[i]=pcm_bytes[2*i+1]<<8|pcm_bytes[2*i];
    }
    opus_int32 compressedBytes = opus.encodeData(in,960,cbits,4000);

    opus_int32 decompressedBytes = opus.decodeData(cbits,compressedBytes,out,MAX_FRAME_SIZE);

    for(int i = 0; i<2*decompressedBytes;i++) //TODO HARDCODED
    {
        pcm_bytes[2*i]=out[i]&0xFF;
        pcm_bytes[2*i+1]=(out[i]>>8)&0xFF;
    }


    speaker->write((const char*)pcm_bytes,3840);
}

}

【问题讨论】：

你能解决这个问题吗？我有兴趣找到类似问题的解决方案。如果你解决了，请告诉我。

标签： c++ qt audio opus

【解决方案1】：

关于您如何将 opus.decodeData 的返回值误解为字节数，我准备了一个很长的答案，其中正确的解释是“每个通道的解码样本数”。但它看起来仍然像你稍后在字节转换例程中解释的那样。所以我不确定错误在哪里。

总的来说，我认为您正在使 unsigned char int16 的转换比它需要的更复杂。您应该能够直接将音频缓冲区传递给/从 opus 传递，并将其指针重新解释为所需的内联类型，而无需手动进行位操作以在不同的缓冲区之间进行转换和复制。音频设备应该为您提供 little-endian 数据，但如果存在不匹配，您可以执行基本的字节交换例程

for (int c = 0; c < numSamples; c++)
{
    unsigned char tmp = data[2 * c];
    data[2 * c] = data[2 * c + 1];
    data[2 * c + 1] = tmp;
}

我在这里看不到它，但我假设您还有代码可以一次仅从麦克风消耗 960 个样本，并将其余样本保留在缓冲区中以供下一帧使用，否则您将丢失数据。

这并不重要，但您也可以将 cbits 中的 4000 替换为 1275，这是最大的 opus 数据包大小。

【讨论】：

【解决方案2】：

1) 您只编码 960 字节，而缓冲区要大得多。您必须将缓冲区分成几个相等的部分并将它们传递给编码器。零件尺寸为120、240、480、960、1920、2880。

2)从 char 数组转换为 opus_int16 数组/从 opus_int16 数组转换为 char 数组时，使用 qFromLittleEndian()/qToLittleEndian() 函数或类型转换。这样可以防止开裂和音质变差。

例子：

void voice::slot_read_audio_input()
{

    //    Audio settings:
    //    Sample Rate=48000
    //    Sample Size=16
    //    Channel Count=1
    //    Byte Order=Little Endian
    //    Sample Type= UnSignedInt

    //    Encoder settings:
    //    Sample Rate=48000
    //    Channel Count=1
    //    OPUS_APPLICATION_VOIP

    //    Decoder settings:
    //    Sample Rate=48000
    //    Channel Count=1

    QByteArray audio_buffer;//mic
    QByteArray output_audio_buffer;//speaker

    int const OPUS_INT_SIZE=2;//sizeof(opus_int16)
    int const FRAME_SIZE=960;
    int const MAX_FRAME_SIZE=1276;
    int FRAME_COUNT=3840/FRAME_SIZE/OPUS_INT_SIZE;// 3840 is a sample size= voice_input->bytesReady;

    opus_int16 input_frame[FRAME_SIZE] = {};
    opus_int16 output_frame[FRAME_SIZE] = {};
    unsigned char compressed_frame[MAX_FRAME_SIZE] = {};
    unsigned char decompressed_frame[FRAME_SIZE*OPUS_INT_SIZE] = {};

    audio_buffer.resize(voice_input->bytesReady());   
    output_audio_buffer.resize(FRAME_SIZE*OPUS_INT_SIZE);

    input->read(audio_buffer.data(),audio_buffer.size());

    for(int i=0;i<FRAME_COUNT;i++)
    {
        //    convert from LittleEndian
        for(int j=0;j<FRAME_SIZE;j++)
        {
            input_frame[j]=qFromLittleEndian<opus_int16>(audio_buffer.data()+j*OPUS_INT_SIZE);
            //    or use this:
            //    input_frame[j]=static_cast<short>(static_cast<unsigned char>(audio_buffer.at(OPUS_INT_SIZE*j+1))<<8|static_cast<unsigned char>(audio_buffer.at(OPUS_INT_SIZE*j)));
        }

        opus_int32 compressedBytes = opus_encode(enc, input_frame,FRAME_SIZE,compressed_frame,MAX_FRAME_SIZE);
        opus_int32 decompressedBytes = opus_decode(dec,compressed_frame,compressedBytes,output_frame,FRAME_SIZE,0);

        //    conver to LittleEndian
        for(int j = 0; j<decompressedBytes;j++)
        {
            qToLittleEndian(output_frame[j],output_audio_buffer.data()+j*OPUS_INT_SIZE);
            //    or use this:
            //    decompressed_frame[OPUS_INT_SIZE*j]=output_frame[j]&0xFF;
            //    decompressed_frame[OPUS_INT_SIZE*j+1]=(output_frame[j]>>8)&0xFF;
        }

        audio_buffer.remove(0,FRAME_SIZE*OPUS_INT_SIZE);
        output->write(output_audio_buffer,FRAME_SIZE*OPUS_INT_SIZE);
        //    or use this:
        //    output->write(reinterpret_cast<char*>(decompressed_frame),FRAME_SIZE*OPUS_INT_SIZE);
    }
}

【讨论】：