TensorFlow 中的变量共享答案

【问题标题】：variable sharing in TensorflowTensorFlow 中的变量共享
【发布时间】：2017-12-02 14:46:31
【问题描述】：

我第一次尝试实现连体网络。我没有任何变量共享的经验。我不知道为什么我会变成这个错误“变量 conv2/W 不存在，或者不是用 tf.get_variable() 创建的。你的意思是在 VarScope 中设置重用 = tf.AUTO_REUSE？”任何帮助表示赞赏

from __future__ import division, print_function, absolute_import

import tensorflow as tf

import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.estimator import regression

# Data loading and preprocessing
import tflearn.datasets.mnist as mnist
X, Y, testX, testY = mnist.load_data(one_hot=True)
X = X.reshape([-1, 28, 28, 1])
testX = testX.reshape([-1, 28, 28, 1])


def tower_network(reuse = True):
    network = tflearn.input_data(shape=(None,28,28,1))
    network = tflearn.conv_2d(network, 32,1, activation='relu',reuse=reuse, scope='conv1')
    network = tflearn.conv_2d(network, 64,1, activation='relu',reuse=reuse, scope='conv2') 
    network = tflearn.conv_2d(network, 128,1, activation='relu',reuse=reuse, scope='conv3')

    network = tflearn.max_pool_2d(network, 2, strides=2)

    network = tflearn.fully_connected(network, 512, activation='relu',reuse=reuse, scope='fc1')

    network = tflearn.dropout(network, 0.5)
    return network


def similarity_network( net1, net2):
    num_classes = 2
    network = tflearn.merge([net1,net2], mode='concat', axis=1, name='Merge') # merge net1 and net2 networks
    # fully connected layers
    network = tflearn.fully_connected(network, 2048, activation='relu')
    network = tflearn.dropout(network, 0.5)
    network = tflearn.fully_connected(network, 2048, activation='relu')
    network = tflearn.dropout(network, 0.5) 
    # softmax layers
    network = tflearn.fully_connected(network, num_classes, activation='softmax')
    return network




net1 = tower_network()
net2 = tower_network(reuse=True)

#similarity network
network = similarity_network( net1, net2)
#output layer
#network = tflearn.regression(network, optimizer='sgd', loss='hinge_loss', learning_rate=0.02)
network = tflearn.regression(network, optimizer='sgd', loss='categorical_crossentropy', learning_rate=0.02)

# Training
model = tflearn.DNN(network, tensorboard_verbose=0)
model.fit({'input': X}, {'target': Y}, n_epoch=20,
           validation_set=({'input': testX}, {'target': testY}),
snapshot_step=100, show_metric=True, run_id='convnet_mnist')

【问题讨论】：

标签： python tensorflow tensorboard tflearn

【解决方案1】：

在net1 = tower_network() 中，参数reuse 设置为其默认值，即True。这会导致 tensorflow 尝试重用同名的变量，但该变量尚不存在。

用net1 = tower_network(reuse=False) 替换该行应该可以解决问题。

from __future__ import division, print_function, absolute_import

import tensorflow as tf

import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.estimator import regression

# Data loading and preprocessing
import tflearn.datasets.mnist as mnist
X, Y, testX, testY = mnist.load_data(one_hot=True)
X = X.reshape([-1, 28, 28, 1])
testX = testX.reshape([-1, 28, 28, 1])


def tower_network(reuse = True):
    network = tflearn.input_data(shape=(None,28,28,1))
    network = tflearn.conv_2d(network, 32,1, activation='relu',reuse=reuse, scope='conv1')
    network = tflearn.conv_2d(network, 64,1, activation='relu',reuse=reuse, scope='conv2') 
    network = tflearn.conv_2d(network, 128,1, activation='relu',reuse=reuse, scope='conv3')

    network = tflearn.max_pool_2d(network, 2, strides=2)

    network = tflearn.fully_connected(network, 512, activation='relu',reuse=reuse, scope='fc1')

    network = tflearn.dropout(network, 0.5)
    return network


def similarity_network( net1, net2):
    num_classes = 2
    network = tflearn.merge([net1,net2], mode='concat', axis=1, name='Merge') # merge net1 and net2 networks
    # fully connected layers
    network = tflearn.fully_connected(network, 2048, activation='relu')
    network = tflearn.dropout(network, 0.5)
    network = tflearn.fully_connected(network, 2048, activation='relu')
    network = tflearn.dropout(network, 0.5) 
    # softmax layers
    network = tflearn.fully_connected(network, num_classes, activation='softmax')
    return network




net1 = tower_network(reuse=False)
net2 = tower_network(reuse=True)

#similarity network
network = similarity_network( net1, net2)
#output layer
#network = tflearn.regression(network, optimizer='sgd', loss='hinge_loss', learning_rate=0.02)
network = tflearn.regression(network, optimizer='sgd', loss='categorical_crossentropy', learning_rate=0.02)

# Training
model = tflearn.DNN(network, tensorboard_verbose=0)
model.fit({'input': X}, {'target': Y}, n_epoch=20,
           validation_set=({'input': testX}, {'target': testY}),
snapshot_step=100, show_metric=True, run_id='convnet_mnist')

这仍然会导致您在提要字典中定义的缺少变量“输入”的错误，但在其他地方没有，但这是一个不同的问题。

【讨论】：

设置 net1 = tower_network (reuse = False ) 导致相同的错误：变量 conv1/W 已经存在，不允许。您的意思是在 VarScope 中设置 reuse=True 或 reuse=tf.AUTO_REUSE 吗？最初定义于：
我编辑了答案并添加了修改后的源文件。该文件解决了关于变量“conv1/W”的问题。还有一些其他问题需要您解决。另请注意，“conv/W 已存在”的错误消息与“conv/W 不存在”的错误消息不同。
也许我需要进一步解释一下：您需要在第一次调用 tower_network 时将重用设置为 False。然后，第一次调用将创建变量。成功调用 tower_network 并将重用设置为 True 将使用这些变量并且不会尝试创建任何新变量。