【发布时间】:2021-03-30 22:47:38
【问题描述】:
我正在制作一个模型,该模型使用简单的连体网络区分两个指纹(dataset),但即使经过 400 个 epoch 损失也不会减少。损失停留在 6000,准确率也根本没有增加。我正在使用triplet loss来训练模型,损失函数的代码是:
def triplet_loss(y_true, y_pred, alpha = 0.2):
anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2]
pos_dist = tf.reduce_sum((anchor - positive)**2, axis=-1)
neg_dist = tf.reduce_sum((anchor - negative)**2, axis=-1)
basic_loss = pos_dist - neg_dist + tf.constant(alpha)
loss = tf.reduce_sum(tf.maximum(basic_loss, tf.constant(0.0)))
return loss
型号如下:
def model(input_shape):
anc_inp = Input(input_shape, name='anchor_input')
pos_inp = Input(input_shape, name='positive_input')
neg_inp = Input(input_shape, name='negative_input')
network = Sequential()
network.add(Conv2D(128, (7,7), activation='relu', input_shape=input_shape))
network.add(MaxPooling2D())
network.add(Conv2D(128, (3,3), activation='relu'))
network.add(MaxPooling2D())
network.add(Conv2D(256, (3,3), activation='relu'))
network.add(Flatten())
network.add(Dense(4096, activation='relu'))
network.add(Dense(128))
network.add(Lambda(lambda x: K.l2_normalize(x,axis=-1)))
anc_emb = network(anc_inp)
pos_emb = network(pos_inp)
neg_emb = network(neg_inp)
model = Model(inputs=[anc_inp, pos_inp, neg_inp], outputs=[anc_emb, pos_emb, neg_emb])
return model
我使用了不同类型的优化器来训练模型,但损失并没有减少。
model_a = model((3, 96, 96))
adam_o = Adam(0.01)
sgd_o = SGD(0.1, momentum=0.1, nesterov=True)
ada = Adagrad(0.01)
model_a.compile(optimizer = adam_o, loss = triplet_loss, metrics = ['accuracy'])
我正在使用生成器来训练模型。生成器是:
def get_triple(real_id, data_ids, dic_data, dic_real):
while True:
anc_id = np.random.choice(real_id)
new_anc_id = [i for i in data_ids if i != anc_id]
neg_id = np.random.choice(new_anc_id)
anc_img = dic_real[anc_id][0]
pos_img = np.random.choice(dic_data[anc_id])
neg_img = np.random.choice(dic_data[neg_id])
anc_img = np.around(np.transpose(cv2.resize(cv2.imread(anc_img), (96, 96)), (2, 0, 1))/255.0, decimals=6)
pos_img = np.around(np.transpose(cv2.resize(cv2.imread(pos_img), (96, 96)), (2, 0, 1))/255.0, decimals=6)
neg_img = np.around(np.transpose(cv2.resize(cv2.imread(neg_img), (96, 96)), (2, 0, 1))/255.0, decimals=6)
yield [anc_img, pos_img, neg_img]
def batch_generator_RN(batch_size, real_id, ids, dic_data, dic_real):
triplet_generator = get_triple(real_id, ids, dic_data, dic_real)
y_val = np.zeros((batch_size, 2, 1))
anchors = np.zeros((batch_size, 3, 96, 96))
positives = np.zeros((batch_size, 3, 96, 96))
negatives = np.zeros((batch_size, 3, 96, 96))
while True:
for i in range(batch_size):
anchors[i], positives[i], negatives[i] = next(triplet_generator)
x_data = {'anchor_input': anchors,
'positive_input': positives,
'negative_input': negatives
}
yield (x_data, [y_val, y_val, y_val])
【问题讨论】:
标签: python tensorflow keras