【发布时间】:2017-01-02 15:29:58
【问题描述】:
在 26700 次迭代后,训练准确率意外地从 1 下降到 0.06。代码来自 tensorflow 的在线文档,我只是将过滤器大小从 5x5 修改为 3x3,迭代从 20000 到 100000,批量大小从 50 到 100。任何人都可以解释一下吗? 可能和AdamOptimizer有关,因为如果改成GradientDesentOptimizer,56200次迭代都不会发生。但我不确定。 GradientDesentOptimizer也有这个问题。
step 26400, training accuracy 1, loss 0.00202696
step 26500, training accuracy 1, loss 0.0750173
step 26600, training accuracy 1, loss 0.0790716
step 26700, training accuracy 1, loss 0.0136688
step 26800, training accuracy 0.06, loss nan
step 26900, training accuracy 0.03, loss nan
step 27000, training accuracy 0.12, loss nan
step 27100, training accuracy 0.08, loss nan
python代码:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
def weight_varible(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
print("Download Done!")
sess = tf.InteractiveSession()
# paras
W_conv1 = weight_varible([3, 3, 1, 32])
b_conv1 = bias_variable([32])
# conv layer-1
x = tf.placeholder(tf.float32, [None, 784])
x_image = tf.reshape(x, [-1, 28, 28, 1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
# conv layer-2
W_conv2 = weight_varible([3, 3, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
# full connection
W_fc1 = weight_varible([7 * 7 * 64, 1204])
b_fc1 = bias_variable([1204])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# output layer: softmax
W_fc2 = weight_varible([1204, 10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
y_ = tf.placeholder(tf.float32, [None, 10])
# model training
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.arg_max(y_conv, 1), tf.arg_max(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
saver = tf.train.Saver()
sess.run(tf.initialize_all_variables())
for i in range(100000):
batch = mnist.train.next_batch(100)
if i % 10 == 0:
train_accuacy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
train_cross_entropy = cross_entropy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
print("step %d, training accuracy %g, loss %g"%(i, train_accuacy, train_cross_entropy))
train_step.run(feed_dict = {x: batch[0], y_: batch[1], keep_prob: 0.5})
# accuacy on test
save_path = saver.save(sess, "./mnist.model")
#saver.restore(sess,"./mnist.model")
print("Model saved in file: %s" % save_path)
print("test accuracy %g"%(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})))
【问题讨论】:
标签: tensorflow conv-neural-network gradient-descent mnist