【问题标题】:variable sharing in TensorflowTensorFlow 中的变量共享
【发布时间】:2017-12-02 14:46:31
【问题描述】:

我第一次尝试实现连体网络。我没有任何变量共享的经验。我不知道为什么我会变成这个错误“变量 conv2/W 不存在,或者不是用 tf.get_variable() 创建的。你的意思是在 VarScope 中设置重用 = tf.AUTO_REUSE?”任何帮助表示赞赏

from __future__ import division, print_function, absolute_import

import tensorflow as tf

import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.estimator import regression

# Data loading and preprocessing
import tflearn.datasets.mnist as mnist
X, Y, testX, testY = mnist.load_data(one_hot=True)
X = X.reshape([-1, 28, 28, 1])
testX = testX.reshape([-1, 28, 28, 1])


def tower_network(reuse = True):
    network = tflearn.input_data(shape=(None,28,28,1))
    network = tflearn.conv_2d(network, 32,1, activation='relu',reuse=reuse, scope='conv1')
    network = tflearn.conv_2d(network, 64,1, activation='relu',reuse=reuse, scope='conv2') 
    network = tflearn.conv_2d(network, 128,1, activation='relu',reuse=reuse, scope='conv3')

    network = tflearn.max_pool_2d(network, 2, strides=2)

    network = tflearn.fully_connected(network, 512, activation='relu',reuse=reuse, scope='fc1')

    network = tflearn.dropout(network, 0.5)
    return network


def similarity_network( net1, net2):
    num_classes = 2
    network = tflearn.merge([net1,net2], mode='concat', axis=1, name='Merge') # merge net1 and net2 networks
    # fully connected layers
    network = tflearn.fully_connected(network, 2048, activation='relu')
    network = tflearn.dropout(network, 0.5)
    network = tflearn.fully_connected(network, 2048, activation='relu')
    network = tflearn.dropout(network, 0.5) 
    # softmax layers
    network = tflearn.fully_connected(network, num_classes, activation='softmax')
    return network




net1 = tower_network()
net2 = tower_network(reuse=True)

#similarity network
network = similarity_network( net1, net2)
#output layer
#network = tflearn.regression(network, optimizer='sgd', loss='hinge_loss', learning_rate=0.02)
network = tflearn.regression(network, optimizer='sgd', loss='categorical_crossentropy', learning_rate=0.02)

# Training
model = tflearn.DNN(network, tensorboard_verbose=0)
model.fit({'input': X}, {'target': Y}, n_epoch=20,
           validation_set=({'input': testX}, {'target': testY}),
snapshot_step=100, show_metric=True, run_id='convnet_mnist')

【问题讨论】:

    标签: python tensorflow tensorboard tflearn


    【解决方案1】:

    net1 = tower_network() 中,参数reuse 设置为其默认值,即True。 这会导致 tensorflow 尝试重用同名的变量,但该变量尚不存在。

    net1 = tower_network(reuse=False) 替换该行应该可以解决问题。

    from __future__ import division, print_function, absolute_import
    
    import tensorflow as tf
    
    import tflearn
    from tflearn.layers.core import input_data, dropout, fully_connected
    from tflearn.layers.conv import conv_2d, max_pool_2d
    from tflearn.layers.normalization import local_response_normalization
    from tflearn.layers.estimator import regression
    
    # Data loading and preprocessing
    import tflearn.datasets.mnist as mnist
    X, Y, testX, testY = mnist.load_data(one_hot=True)
    X = X.reshape([-1, 28, 28, 1])
    testX = testX.reshape([-1, 28, 28, 1])
    
    
    def tower_network(reuse = True):
        network = tflearn.input_data(shape=(None,28,28,1))
        network = tflearn.conv_2d(network, 32,1, activation='relu',reuse=reuse, scope='conv1')
        network = tflearn.conv_2d(network, 64,1, activation='relu',reuse=reuse, scope='conv2') 
        network = tflearn.conv_2d(network, 128,1, activation='relu',reuse=reuse, scope='conv3')
    
        network = tflearn.max_pool_2d(network, 2, strides=2)
    
        network = tflearn.fully_connected(network, 512, activation='relu',reuse=reuse, scope='fc1')
    
        network = tflearn.dropout(network, 0.5)
        return network
    
    
    def similarity_network( net1, net2):
        num_classes = 2
        network = tflearn.merge([net1,net2], mode='concat', axis=1, name='Merge') # merge net1 and net2 networks
        # fully connected layers
        network = tflearn.fully_connected(network, 2048, activation='relu')
        network = tflearn.dropout(network, 0.5)
        network = tflearn.fully_connected(network, 2048, activation='relu')
        network = tflearn.dropout(network, 0.5) 
        # softmax layers
        network = tflearn.fully_connected(network, num_classes, activation='softmax')
        return network
    
    
    
    
    net1 = tower_network(reuse=False)
    net2 = tower_network(reuse=True)
    
    #similarity network
    network = similarity_network( net1, net2)
    #output layer
    #network = tflearn.regression(network, optimizer='sgd', loss='hinge_loss', learning_rate=0.02)
    network = tflearn.regression(network, optimizer='sgd', loss='categorical_crossentropy', learning_rate=0.02)
    
    # Training
    model = tflearn.DNN(network, tensorboard_verbose=0)
    model.fit({'input': X}, {'target': Y}, n_epoch=20,
               validation_set=({'input': testX}, {'target': testY}),
    snapshot_step=100, show_metric=True, run_id='convnet_mnist')
    

    这仍然会导致您在提要字典中定义的缺少变量“输入”的错误,但在其他地方没有,但这是一个不同的问题。

    【讨论】:

    • 设置 net1 = tower_network (reuse = False ) 导致相同的错误:变量 conv1/W 已经存在,不允许。您的意思是在 VarScope 中设置 reuse=True 或 reuse=tf.AUTO_REUSE 吗?最初定义于:
    • 我编辑了答案并添加了修改后的源文件。该文件解决了关于变量“conv1/W”的问题。还有一些其他问题需要您解决。另请注意,“conv/W 已存在”的错误消息与“conv/W 不存在”的错误消息不同。
    • 也许我需要进一步解释一下:您需要在第一次调用 tower_network 时将重用设置为 False。然后,第一次调用将创建变量。成功调用 tower_network 并将重用设置为 True 将使用这些变量并且不会尝试创建任何新变量。
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2018-08-07
    • 1970-01-01
    • 2016-08-24
    • 2017-01-23
    • 2021-06-22
    相关资源
    最近更新 更多