【发布时间】:2018-04-10 11:15:39
【问题描述】:
以下代码使用音频文件在tensorflow 中创建特征矩阵:
import tensorflow as tf
directory = "audio_dataset/*.wav"
filenames = tf.train.match_filenames_once(directory)
init = (tf.global_variables_initializer(), tf.local_variables_initializer())
count_num_files = tf.size(filenames)
filename_queue = tf.train.string_input_producer(filenames)
reader = tf.WholeFileReader()
filename, file_contents = reader.read(filename_queue)
with tf.Session() as sess:
sess.run(init)
num_files = sess.run(count_num_files)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for i in range(num_files):
audio_file = sess.run(filename)
print(audio_file)
这是一个将音频从时域转换为频域的工具包:
from bregman.suite import *
chromo = tf.placeholder(tf.float32)
max_freqs = tf.argmax(chromo, 0)
def get_next_chromogram(sess):
audio_file = sess.run(filename)
F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)
return F.X
def extract_feature_vector(sess, chromo_data):
num_features, num_samples = np.shape(chromo_data)
freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})
hist, bins = np.histogram(freq_vals, bins=range(num_features + 1))
return hist.astype(float) / num_samples
def get_dataset(sess):
num_files = sess.run(count_num_files)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
xs = []
for _ in range(num_files):
chromo_data = get_next_chromogram(sess)
x = [extract_feature_vector(sess, chromo_data)]
x = np.matrix(x)
if len(xs) == 0:
xs = x
else:
xs = np.vstack((xs, x))
return xs
这会将数据聚集在两个质心周围:
k = 2
max_iterations = 100
def initial_cluster_centroids(X, k):
return X[0:k, :]
def assign_cluster(X, centroids):
expanded_vectors = tf.expand_dims(X, 0)
expanded_centroids = tf.expand_dims(centroids, 1)
distances = tf.reduce_sum(tf.square(tf.subtract(expanded_vectors, expanded_centroids)), 2)
mins = tf.argmin(distances, 0)
return mins
def recompute_centroids(X, Y):
sums = tf.unsorted_segment_sum(X, Y, k)
counts = tf.unsorted_segment_sum(tf.ones_like(X), Y, k)
return sums / counts
with tf.Session() as sess:
sess.run(init)
X = get_dataset(sess)
centroids = initial_cluster_centroids(X, k)
i, converged = 0, False
while not converged and i < max_iterations:
i += 1
Y = assign_cluster(X, centroids)
centroids = sess.run(recompute_centroids(X, Y))
print(centroids)
但我得到以下回溯:
Traceback (most recent call last):
File "components.py", line 776, in <module>
X = get_dataset(sess)
File "ccomponents.py", line 745, in get_dataset
chromo_data = get_next_chromogram(sess)
File "coffee_components.py", line 728, in get_next_chromogram
F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features.py", line 143, in __init__
Features.__init__(self, arg, feature_params)
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features_base.py", line 70, in __init__
self.extract()
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features_base.py", line 213, in extract
self.extract_funs.get(f, self._extract_error)()
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features_base.py", line 711, in _chroma
if not self._cqft():
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features_base.py", line 588, in _cqft
self._make_log_freq_map()
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features_base.py", line 353, in _make_log_freq_map
mxnorm = P.empty(self._cqtN) # Normalization coefficients
TypeError: 'float' object cannot be interpreted as an index
就我而言,range是int而不是float。
有人可以在这里指出错误吗?
【问题讨论】:
-
range在哪里?它不在堆栈跟踪中。这似乎是在抱怨X = get_dataset(sess)行。 -
是的,
get_dataset(sess)是一个使用 (range()) 进行迭代的函数(见上文)。通常这个错误是指你在range中使用了float,但我不确定。 -
也许您可以检查
audio_file在get_next_chromogram()中的值?这是传递给Chromagram()的唯一非整数。 -
@Antimony
audio_file将路径('AUDIO VALUE', 'audio_dataset/Wicked.wav')打印到 wav。文件 -
什么是
AUDIO_VALUE?也许它应该只引用路径?我不太确定Chromagram()接受什么数据类型。
标签: python tensorflow