使用千层面获得适合回归的尺寸答案

【问题标题】：Getting dimensions right for regression with lasagne使用千层面获得适合回归的尺寸
【发布时间】：2016-08-04 17:31:05
【问题描述】：

我正在尝试学习一个输出范围为 -1.0..1.0 的值的网络。到目前为止只有六个特征，都是浮动的。我在对齐类型和形状时遇到了真正的麻烦。到目前为止，我有：

#!/usr/bin/env python3

import lasagne
import numpy as np
import sys
import theano
import theano.tensor as T

infilename = sys.argv[1]
split_size = 500
epochs = 100
theano.config.exception_verbosity = 'high'

examples = np.genfromtxt(infilename, delimiter=' ')

np.random.shuffle(examples)
examples = examples.reshape(-1, 7)

train, test = examples[:split_size,:], examples[split_size:,:]

# input and target
train_y = train[:,0]
train_X = train[:,1:]

test_y = test[:,0]
test_X = test[:,1:]

input_var = T.matrix()
target_var = T.vector()


def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]

# nn structure
from lasagne.nonlinearities import tanh, softmax, leaky_rectify
net = lasagne.layers.InputLayer(shape=(None, 6), input_var=input_var)
net = lasagne.layers.DenseLayer(net, num_units=10, nonlinearity=tanh)
net = lasagne.layers.DenseLayer(net, num_units=1, nonlinearity=softmax)

prediction = lasagne.layers.get_output(net)
loss = lasagne.objectives.aggregate(prediction, target_var)
loss = loss.mean() + 1e-4 * lasagne.regularization.regularize_network_params(net, lasagne.regularization.l2)

# parameter update expressions
params = lasagne.layers.get_all_params(net, trainable=True)
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate = 0.02, momentum=0.9)

# training function
train_fn = theano.function([input_var, target_var], loss, updates=updates)

for epoch in range(epochs):
    loss = 0
    for input_batch, target_batch in iterate_minibatches(train_X, train_y, 50, shuffle=True):
        print('input', input_batch.shape)
        print('target', target_batch.shape)
        loss += train_fn(input_batch, target_batch)
    print('epoch', epoch, 'loss', loss / len(training_data))

test_prediction = lasagne.layers.get_output(network, deterministic=True)
predict_fn = theano.function([input_var], T.argmax(test_prediction, axis=1))
print('predicted score for first test input', predict_fn(test_X[0]))

print(net_output)

输入数据是一个 7 列的浮点数文件，以空格分隔。以下是一些示例行：

-0.4361711835021444 0.9926778242677824 1.0 0.0 0.0 0.0 0.0
1.0 0.9817294281729428 1.0 1.7142857142857142 0.0 0.42857142857142855 1.7142857142857142
-0.4356014580801944 0.9956764295676429 1.0 0.0 0.0 0.0 0.0
1.0 1.0 3.0 0.0 0.0 4.0 1.0
-0.4361977186311787 0.9925383542538354 1.0 0.0 0.0 0.0 0.0
-0.46511627906976744 1.0 0.5 0.0 0.0 0.0 0.0
-0.4347826086956522 1.0 1.0 0.0 0.0 0.0 0.0
-0.4378224895429426 0.9840306834030683 1.0 0.0 0.0 0.0 0.0
-0.4377155764476054 0.9845885634588564 1.0 0.0 0.0 0.0 0.0
1.0 1.0 1.0 1.0 0.0 2.0 0.0

这非常紧密地基于千层面参考示例。出来的错误是：

/usr/local/lib/python3.5/dist-packages/theano/tensor/signal/downsample.py:6: UserWarning: downsample module has been moved to the theano.tensor.signal.pool module.
  "downsample module has been moved to the theano.tensor.signal.pool module.")
input (50, 6)
target (50,)
Traceback (most recent call last):
  File "/usr/local/lib/python3.5/dist-packages/theano/compile/function_module.py", line 859, in __call__
    outputs = self.fn()
ValueError: Input dimension mis-match. (input[0].shape[1] = 1, input[1].shape[1] = 50)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "./nn_cluster.py", line 66, in <module>
    loss += train_fn(input_batch, target_batch)
  File "/usr/local/lib/python3.5/dist-packages/theano/compile/function_module.py", line 871, in __call__
    storage_map=getattr(self.fn, 'storage_map', None))
  File "/usr/local/lib/python3.5/dist-packages/theano/gof/link.py", line 314, in raise_with_op
    reraise(exc_type, exc_value, exc_trace)
  File "/usr/lib/python3/dist-packages/six.py", line 685, in reraise
    raise value.with_traceback(tb)
  File "/usr/local/lib/python3.5/dist-packages/theano/compile/function_module.py", line 859, in __call__
    outputs = self.fn()
ValueError: Input dimension mis-match. (input[0].shape[1] = 1, input[1].shape[1] = 50)
Apply node that caused the error: Elemwise{Mul}[(0, 0)](SoftmaxWithBias.0, InplaceDimShuffle{x,0}.0)
Toposort index: 21
Inputs types: [TensorType(float64, matrix), TensorType(float64, row)]
Inputs shapes: [(50, 1), (1, 50)]
Inputs strides: [(8, 8), (400, 8)]
Inputs values: ['not shown', 'not shown']
Outputs clients: [[Sum{acc_dtype=float64}(Elemwise{Mul}[(0, 0)].0)]]

Debugprint of the apply node: 
Elemwise{Mul}[(0, 0)] [id A] <TensorType(float64, matrix)> ''   
 |SoftmaxWithBias [id B] <TensorType(float64, matrix)> ''   
 | |Dot22 [id C] <TensorType(float64, matrix)> ''   
 | | |Elemwise{Composite{tanh((i0 + i1))}}[(0, 0)] [id D] <TensorType(float64, matrix)> ''   
 | | | |Dot22 [id E] <TensorType(float64, matrix)> ''   
 | | | | |<TensorType(float64, matrix)> [id F] <TensorType(float64, matrix)>
 | | | | |W [id G] <TensorType(float64, matrix)>
 | | | |InplaceDimShuffle{x,0} [id H] <TensorType(float64, row)> ''   
 | | |   |b [id I] <TensorType(float64, vector)>
 | | |W [id J] <TensorType(float64, matrix)>
 | |b [id K] <TensorType(float64, vector)>
 |InplaceDimShuffle{x,0} [id L] <TensorType(float64, row)> ''   
   |<TensorType(float64, vector)> [id M] <TensorType(float64, vector)>

Storage map footprint:
 - Elemwise{Composite{tanh((i0 + i1))}}[(0, 0)].0, Shape: (50, 10), ElemSize: 8 Byte(s), TotalSize: 4000 Byte(s)
 - <TensorType(float64, matrix)>, Input, Shape: (50, 6), ElemSize: 8 Byte(s), TotalSize: 2400 Byte(s)
 - W, Shared Input, Shape: (6, 10), ElemSize: 8 Byte(s), TotalSize: 480 Byte(s)
 - <TensorType(float64, matrix)>, Shared Input, Shape: (6, 10), ElemSize: 8 Byte(s), TotalSize: 480 Byte(s)
 - SoftmaxWithBias.0, Shape: (50, 1), ElemSize: 8 Byte(s), TotalSize: 400 Byte(s)
 - InplaceDimShuffle{x,0}.0, Shape: (1, 50), ElemSize: 8 Byte(s), TotalSize: 400 Byte(s)
 - SoftmaxGrad.0, Shape: (50, 1), ElemSize: 8 Byte(s), TotalSize: 400 Byte(s)
 - <TensorType(float64, vector)>, Input, Shape: (50,), ElemSize: 8 Byte(s), TotalSize: 400 Byte(s)
 - W, Shared Input, Shape: (10, 1), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s)
 - b, Shared Input, Shape: (10,), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s)
 - <TensorType(float64, vector)>, Shared Input, Shape: (10,), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s)
 - <TensorType(float64, matrix)>, Shared Input, Shape: (10, 1), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s)
 - TensorConstant{0.02}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - b, Shared Input, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
 - TensorConstant{0.0001}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{(1, 1) of 0.9}, Shape: (1, 1), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
 - TensorConstant{4.00000000..000001e-06}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{(1,) of 0.02}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
 - Constant{0}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Subtensor{int64}.0, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{(1,) of 0.9}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
 - Constant{1}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Subtensor{int64}.0, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{(1, 1) of 1.0}, Shape: (1, 1), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
 - <TensorType(float64, vector)>, Shared Input, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
 TotalSize: 8984.0 Byte(s) 0.000 GB
 TotalSize inputs: 4168.0 Byte(s) 0.000 GB

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.

使用lasagne.objectives.squared_error 时会引发类似的异常。有任何想法吗？我无法确定数据形状的错误之处，是否是问题所在，以及这是否是使用目标函数的正确方法。

【问题讨论】：

标签： python numpy theano lasagne

【解决方案1】：

我复制了你的代码和你的数据输入，修改了一些东西并运行没有错误。

代码：

import lasagne
import numpy as np
import sys
import theano
import theano.tensor as T

infilename = 'tt_lasagne.input' #sys.argv[1]
split_size = 500
epochs = 100
theano.config.exception_verbosity = 'high'

examples = np.genfromtxt(infilename, delimiter=' ')

np.random.shuffle(examples)
examples = examples.reshape(-1, 7)

train, test = examples[:split_size,:], examples[split_size:,:]

# input and target
train_y = train[:,0]
train_X = train[:,1:]

test_y = test[:,0]
test_X = test[:,1:]

input_var = T.matrix()
target_var = T.vector()


def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]

# nn structure
from lasagne.nonlinearities import tanh, softmax, leaky_rectify
net = lasagne.layers.InputLayer(shape=(None, 6), input_var=input_var)
net = lasagne.layers.DenseLayer(net, num_units=10, nonlinearity=tanh)
net = lasagne.layers.DenseLayer(net, num_units=1, nonlinearity=softmax)

prediction = lasagne.layers.get_output(net)
loss = lasagne.objectives.aggregate(prediction, target_var)
loss = loss.mean() + 1e-4 * lasagne.regularization.regularize_network_params(net, lasagne.regularization.l2)

# parameter update expressions
params = lasagne.layers.get_all_params(net, trainable=True)
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate = 0.02, momentum=0.9)

# training function
train_fn = theano.function([input_var, target_var], loss, updates=updates)

for epoch in range(epochs):
    loss = 0
    for input_batch, target_batch in iterate_minibatches(train_X, train_y, 50, shuffle=True):
        print('input', input_batch.shape)
        print('target', target_batch.shape)
        loss += train_fn(input_batch, target_batch)
    print('epoch', epoch, 'loss', loss / len(train_X))

#test_prediction = lasagne.layers.get_output(net, deterministic=True)
#predict_fn = theano.function([input_var], T.argmax(test_prediction, axis=1))
#print('predicted score for first test input', predict_fn(test_X[0]))

#print(net_output)

tt_lasagne.input

-0.4361711835021444 0.9926778242677824 1.0 0.0 0.0 0.0 0.0
1.0 0.9817294281729428 1.0 1.7142857142857142 0.0 0.42857142857142855 1.7142857142857142
-0.4356014580801944 0.9956764295676429 1.0 0.0 0.0 0.0 0.0
1.0 1.0 3.0 0.0 0.0 4.0 1.0
-0.4361977186311787 0.9925383542538354 1.0 0.0 0.0 0.0 0.0
-0.46511627906976744 1.0 0.5 0.0 0.0 0.0 0.0
-0.4347826086956522 1.0 1.0 0.0 0.0 0.0 0.0
-0.4378224895429426 0.9840306834030683 1.0 0.0 0.0 0.0 0.0
-0.4377155764476054 0.9845885634588564 1.0 0.0 0.0 0.0 0.0
1.0 1.0 1.0 1.0 0.0 2.0 0.0

【讨论】：