录音并识别琴键
Imports NAudio.Wave
Imports MathNet.Numerics.IntegralTransforms
Imports System.Numerics
Imports TensorFlow
Imports System.IO
Public Class Form1
\'录音
Dim wav As New WaveInEvent
Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
\'设置缓冲区大小
wav.BufferMilliseconds = 128 \'缓冲区大小= 频率*Milliseconds*字节/1000 ;
wav.NumberOfBuffers = 6 \'原12 减少缓冲区数量,使用录音不中断
wav.WaveFormat = New WaveFormat(16000, 16, 1) \'格式 16000
\'添加回调函数
AddHandler wav.DataAvailable, AddressOf waveIn_DataAvailable
wav.StartRecording()
End Sub
\'回调函数
Dim WavData16(2048 - 1) As Int16
Dim WavDataDb(2048 - 1) As Single
Dim mfcc As New MFCC
Private Sub waveIn_DataAvailable(sender As Object, e As WaveInEventArgs)
Buffer.BlockCopy(e.Buffer, 0, WavData16, 0, e.BytesRecorded)
\'Array.Copy(WavData16, WavDataDb, 2048)
\'计算mfcc 返回182维数
Dim mfccs = WavTMfcc(WavData16)
Dim float(0, 182 - 1) As Single
For i = 0 To 182 - 1
float(0, i) = mfccs(i)
Next
Dim out() As Single = piano.Detect(float)
Dim max As Single = out(0)
Dim num = 0
For i = 0 To 7 - 1
If out(i) > max Then
max = out(i)
num = i
End If
Next
If max > 0.9 Then
\' Label1.Text = num.ToString
Me.Invoke(New ShowText(AddressOf ShowTxt), num.ToString)
End If
\'Debug.Print(num & ":" & max)
End Sub
Public Delegate Sub ShowText(txt As String)
Public Sub ShowTxt(txt As String)
Label1.Text = txt
End Sub
Public Function WavTMfcc(data() As Int16) As Single()
\'分帧,每一帧进行mfcc计算 帧长512 帧移256
Dim len = data.Length
Dim FrmSize = 512
Dim FrmNum = len / 256 - 1
Dim mfccs(FrmNum * 26 - 1) As Single \'7*26 =182
Dim Frame(FrmSize - 1) As Single
For i As Integer = 0 To FrmNum - 1
Array.Copy(data, i * 256, Frame, 0, FrmSize)
mfcc.Hamming_window(Frame)
Dim fft As Complex() = mfcc.FFT(Frame)
Dim rs As Single() = mfcc.MFCC(fft)
Array.Copy(rs, 0, mfccs, i * 26, 26)
Next
Return mfccs
End Function
Private Sub Button7_Click(sender As Object, e As EventArgs) Handles Button7.Click
Dim txtwrite As New IO.StreamWriter("d:\mfcc.txt", False)
Dim fs As New DirectoryInfo("d:\piano")
For Each i As FileInfo In fs.GetFiles("*.wav", SearchOption.AllDirectories)
Dim fn = i.Name
Dim fullname = i.FullName
\'打开文件获取mfcc
Dim len As Integer = 2048
Dim bt(len * 2 - 1) As Byte
Dim it16(len - 1) As Int16
Dim wf As New WaveFileReader(fullname)
wf.Read(bt, 0, len * 2)
Buffer.BlockCopy(bt, 0, it16, 0, len * 2)
\'VAD(it16)
\'计算mfcc 返回182维数
Dim mfccs = WavTMfcc(it16)
Dim StrMfcc = String.Join(",", mfccs.ToArray)
Dim Ans = {0, 0, 0, 0, 0, 0, 0, 0}
Ans(Val(Mid(fn, 1, 1))) = 1
Dim StrAns = String.Join(",", Ans.ToArray)
txtwrite.WriteLine(StrMfcc)
txtwrite.WriteLine(StrAns)
Next
txtwrite.Close()
\'语音区域识别-指数平均法 v = 0.1*vo + (1-0.1)v(0-1)
End Sub
Public Function VAD(data As Int16()) As List(Of Point)
Dim Belta As Single = 0.1
Dim Sum As UInt64
Dim StartP, EndP As Integer
Dim Status As Boolean = False
Dim WaveArea As New List(Of Point)
For i = 0 To data.Length - 1
Sum = Belta * Math.Pow(data(i), 2) + (1 - Belta) * Sum
\'声音开始位置
If (Sum > Math.Pow(1024, 2)) Then
If Status = False Then
Status = True
StartP = i
End If
End If
\'声音结束位置
If Status = True Then
If Sum < Math.Pow(100, 2) Then
WaveArea.Add(New Point(StartP, i))
Status = False
End If
End If
Next
Return WaveArea
End Function
Private Sub Button8_Click(sender As Object, e As EventArgs) Handles Button8.Click
Dim wf As New WaveFileReader("d:\d00.wav")
Dim len As Integer = wf.Length / 2
Dim bt(len * 2 - 1) As Byte
Dim it16(len - 1) As Int16
wf.Read(bt, 0, len * 2)
Buffer.BlockCopy(bt, 0, it16, 0, len * 2)
VAD(it16)
End Sub
Dim piano As New TensorflowPiano
Private Sub Button9_Click(sender As Object, e As EventArgs) Handles Button9.Click
Dim wf As New WaveFileReader("d:\testpiano.wav")
Dim len As Integer = wf.Length / 2
Dim bt(len * 2 - 1) As Byte
Dim it16(len - 1) As Int16
wf.Read(bt, 0, len * 2)
Buffer.BlockCopy(bt, 0, it16, 0, len * 2)
Dim frame(2048 - 1) As Int16
For k As Integer = 0 To Math.Floor(len / 2048) - 1
Array.Copy(it16, k * 2048, frame, 0, 2048)
\'计算mfcc 返回182维数
Dim mfccs = WavTMfcc(frame)
Dim float(0, 182 - 1) As Single
For i = 0 To 182 - 1
float(0, i) = mfccs(i)
Next
Dim out() As Single = piano.Detect(float)
Dim max As Single = out(0)
Dim num = 0
For i = 0 To 7 - 1
If out(i) > max Then
max = out(i)
num = i
End If
Next
Debug.Print(num & ":" & max)
Next
End Sub
Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load
End Sub
End Class
在vb中对训练数据的MFCC进行读取,保存为TXT文件;在PYTHON文件中进行训练
import tensorflow as tf
import librosa
import numpy as np
from tensorflow.python.framework.graph_util import convert_variables_to_constants
f = open("d:/mfcc.txt","r")
xdata = []
ydata = []
while True:
v1 = f.readline()
if v1 == \'\':
break;
xd = np.array(tuple(eval(v1)))
#xd = list(xd.reshape(1,-1))
xdata.append(xd)
v2 = f.readline()
yd = np.array(tuple(eval(v2)))
ydata.append(yd)
x = tf.placeholder("float32",[None,182],name= \'input\')
w = tf.Variable(tf.truncated_normal([182,8],stddev = 0.1))
b = tf.Variable(tf.truncated_normal(shape=[8],stddev = 0.1,dtype = tf.float32))
y = tf.nn.softmax(tf.matmul(x,w)+b,name = \'out\')
y_ = tf.placeholder("float32",[None,8])
loss = -tf.reduce_sum(y_*tf.log(y))
train = tf.train.GradientDescentOptimizer(1e-5).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(20000):
sess.run(train,feed_dict={x:xdata,y_:ydata})
if i%100 == 0:
print("step %d,%g"%(i,i))
print(sess.run(y_,feed_dict={y_:ydata}))
print(sess.run(loss,feed_dict={x:xdata,y_:ydata}))
print(sess.run(y,feed_dict={x:xdata}))
#保存为pb格式
graph = convert_variables_to_constants(sess,sess.graph_def,[\'out\'])
tf.train.write_graph(graph,\'d:/\',\'piano.pb\',as_text=False)
#保存训练参数
在VB中使用训练好的模型
Imports System.IO
Imports System.Numerics
Imports TensorFlow
\'Install-Package TensorFlowSharp
Public Class TensorflowPiano
Dim graph As TFGraph
Dim session As TFSession
\'加载模型
Public Sub New()
Dim model As Byte() = File.ReadAllBytes("d:\piano.pb")
graph = New TFGraph()
graph.Import(model, "")
session = New TFSession(graph)
\' Threading.ThreadPool.SetMaxThreads(5, 5)
End Sub
Protected Overrides Sub finalize()
\' session.CloseSession()
End Sub
Dim output
Dim runner As TFSession.Runner
Dim result
Dim rshape
\'运行模型
Public Function Detect(Data(,) As Single) As Single()
runner = session.GetRunner()
runner.AddInput(graph("input")(0), Data).Fetch(graph("out")(0))
output = runner.Run()
result = output(0)
rshape = result.Shape
Dim rt As Single()
rt = result.GetValue(True)(0)
\'For k = 0 To rshape.GetValue(0) - 1
\' rt = result.GetValue(True)(k)(0)
\' \'Debug.Print(rt)
\' If (rt > 0.8) Then
\' Debug.Print("-----------recogxili")
\' \' MsgBox("recgo")
\' End If
\'Next
Return rt
End Function
End Class
声音特征提取:
Imports System.Numerics
Imports MathNet.Numerics.IntegralTransforms
Public Class MFCC
Public H As Double(,)
Private MFCCNum As Integer
Private FrameSize As Integer \'帧长512
Public Sub New(Optional framesize As Integer = 512, Optional MFCCNum As Integer = 26)
\'注意设置最小频率 freMin 0 ,300
Me.MFCCNum = MFCCNum
Me.FrameSize = framesize
H = New Double(MFCCNum, Me.FrameSize / 2) {}
\'计算mel系数
Dim filter_points(40 + 1) As Integer \'40个滤波器,需要41点
Const sampleRate As Integer = 16000 \'采样频率 16000
Const filterNum As Integer = 40 \'滤波器数量 取40个
Dim freMax As Double = sampleRate / 2 \'实际最大频率
Dim freMin As Double = 0 \'实际最小频率
Dim melFremax As Double = 1125 * Math.Log(1 + freMax / 700) \'将实际频率转换成梅尔频率
Dim melFremin As Double = 1125 * Math.Log(1 + freMin / 700)
Dim k As Double = (melFremax - melFremin) / (filterNum + 1)
Dim m As Double() = New Double(filterNum + 1) {}
Dim r As Double() = New Double(filterNum + 1) {}
For i As Integer = 0 To filterNum + 1
m(i) = melFremin + k * i
r(i) = 700 * (Math.Exp(m(i) / 1125) - 1)
\'将梅尔频率转换成实际频率
filter_points(i) = Math.Floor((Me.FrameSize + 1) * r(i) / sampleRate)
Next
\'生成mel滤波器
For i As Integer = 0 To MFCCNum
For j As Integer = 0 To Me.FrameSize / 2 - 1
If j < filter_points(i) Then
H(i, j) = 0
End If
If (filter_points(i) <= j) And (j <= filter_points(i + 1)) Then
H(i, j) = (CDbl(j - filter_points(i)) / (filter_points(i + 1) - filter_points(i)))
End If
If (filter_points(i + 1) <= j) And (j <= filter_points(i + 2)) Then
H(i, j) = (CDbl(filter_points(i + 2) - j) / (filter_points(i + 2) - filter_points(i + 1)))
End If
If j > filter_points(i + 2) Then
H(i, j) = 0
End If
Next
Next
End Sub
\'汉明窗
Public Sub Hamming_window(WaveData() As Single)
Dim len As Integer = WaveData.Length
Dim omega As Single = 2.0 * Math.PI / len
For j As Integer = 0 To len - 1
WaveData(j) = (0.54 - 0.46 * Math.Cos(omega * (j))) * WaveData(j)
Next
End Sub
\'傅里叶计算
Public Function FFT(WaveData() As Single) As Complex()
Dim FFT_Complex(WaveData.Length - 1) As Complex
For i = 0 To WaveData.Length - 1
FFT_Complex(i) = WaveData(i)
Next
MathNet.Numerics.IntegralTransforms.Fourier.Forward(FFT_Complex, FourierOptions.Matlab)
Return FFT_Complex
End Function
Public Function MFCC(fft() As Complex) As Single()
\'取LOG
Dim S As Single() = New Single(MFCCNum - 1) {}
For i As Integer = 0 To MFCCNum - 1
For j As Integer = 0 To Me.FrameSize / 2 - 1
S(i) = S(i) + Math.Pow(fft(j).Magnitude, 2) * H(i, j)
Next
If S(i) <> 0 Then
S(i) = Math.Log(S(i), Math.E)
End If
Next
\'DCT运算
Dim mfcc_mass(MFCCNum - 1) As Double
For l As Integer = 0 To MFCCNum - 1
For i As Integer = 0 To MFCCNum - 1
mfcc_mass(l) += S(i) * Math.Cos(Math.PI * l * ((i * 0.5) / 20))
Next
Next
Return S
End Function
End Class