【发布时间】:2017-05-24 05:56:51
【问题描述】:
我已经用 cntk (python) 实现了“异或问题”。
目前它只是偶尔解决问题。如何实现更可靠的网络?
我想只要起始随机权重接近最优,问题就会得到解决。我试过binary_cross_entropy 作为损失函数,但没有改善。我尝试将tanh 作为非线性函数,但它也不起作用。我还尝试了许多不同的参数组合learning_rate、minibatch_size 和num_minibatches_to_train。请帮忙。
谢谢
# -*- coding: utf-8 -*-
import numpy as np
from cntk import *
import random
import pandas as pd
input_dim = 2
output_dim = 1
def generate_random_data_sample(sample_size, feature_dim, num_classes):
Y = []
X = []
for i in range(sample_size):
if i % 4 == 0:
Y.append([0])
X.append([1,1])
if i % 4 == 1:
Y.append([0])
X.append([0,0])
if i % 4 == 2:
Y.append([1])
X.append([1,0])
if i % 4 == 3:
Y.append([1])
X.append([0,1])
return np.array(X,dtype=np.float32), np.array(Y,dtype=np.float32)
def linear_layer(input_var, output_dim,scale=10):
input_dim = input_var.shape[0]
weight = parameter(shape=(input_dim, output_dim),init=uniform(scale=scale))
bias = parameter(shape=(output_dim))
return bias + times(input_var, weight)
def dense_layer(input_var, output_dim, nonlinearity,scale=10):
l = linear_layer(input_var, output_dim,scale=scale)
return nonlinearity(l)
feature = input(input_dim, np.float32)
h1 = dense_layer(feature, 2, sigmoid,scale=10)
z = dense_layer(h1, output_dim, sigmoid,scale=10)
label=input(1,np.float32)
loss = squared_error(z,label)
eval_error = squared_error(z,label)
learning_rate = 0.5
lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)
learner = sgd(z.parameters, lr_schedule)
trainer = Trainer(z, (loss, eval_error), [learner])
def print_training_progress(trainer, mb, frequency, verbose=1):
training_loss, eval_error = "NA", "NA"
if mb % frequency == 0:
training_loss = trainer.previous_minibatch_loss_average
eval_error = trainer.previous_minibatch_evaluation_average
if verbose:
print ("Minibatch: {0}, Loss: {1:.4f}, Error: {2:.2f}".format(mb, training_loss, eval_error))
return mb, training_loss, eval_error
minibatch_size = 800
num_minibatches_to_train = 2000
training_progress_output_freq = 50
for i in range(0, num_minibatches_to_train):
features, labels = generate_random_data_sample(minibatch_size, input_dim, output_dim)
trainer.train_minibatch({feature : features, label : labels})
batchsize, loss, error = print_training_progress(trainer, i, training_progress_output_freq, verbose=1)
out = z
result = out.eval({feature : features})
a = pd.DataFrame(data=dict(
query=[str(int(x[0]))+str(int(x[1])) for x in features],
test=[int(l[0]) for l in labels],
pred=[l[0] for l in result]))
print(pd.DataFrame.drop_duplicates(a[["query","test","pred"]]).sort_values(by="test"))
【问题讨论】:
标签: python neural-network cntk