【发布时间】:2020-10-11 20:40:58
【问题描述】:
所以我终于在 youtube 频道“Coding Train”的系列节目的帮助下第一次编写了一个神经网络(我一直想做这个),“唯一”的区别是我编写了它用 C 而不是 JS。
我试图模拟一个 XOR:结构是两个输入节点、两个隐藏节点和一个输出节点。经过训练,我发现它不能正常工作。
已经看过这篇文章和其他几篇文章,但它们对我没有帮助:XOR Neural Network sometimes outputs 0.5
这是我的训练数据:
Training data:
IN | OUT
00 | 0
01 | 1
10 | 1
11 | 0
我对它进行了多次训练,每次训练超过 10000 次,学习率从 0.5 到 0.01 不等,并期望得到相应的结果。在下表中,我列出了最常见的输出,无论我选择什么学习范围,在不同的培训课程之后。
一次培训 = 多次培训
Actual output after training (OUTn is the nth training session):
IN | OUT1 | OUT2 | OUT3 | OUT4 | OUT5
00 | 0.01 | 0.01 | 0.01 | 0.66 | 0.01
01 | 0.99 | 0.99 | 0.50 | 0.66 | 0.66
10 | 0.99 | 0.50 | 0.99 | 0.66 | 0.66
11 | 0.01 | 0.50 | 0.50 | 0.66 | 0.66
大多数时候它会输出一些非常奇怪的东西。经过几个小时的错误搜索等。我仍然无法弄清楚错误在哪里。也许读到这里的人会找到一个?
以下是代码。
我通过结构定义了一个 GETRANDOM 和我的网络,以便我可以轻松地传递、修改和返回它:
#define GETRANDOM ( (double)rand() / RAND_MAX * 2.0 - 1.0 ) // random number between -1 and 1
// network structure
struct sNetwork {
// node count
int input_nodes;
int hidden_nodes;
int output_nodes;
// values
double* input_values;
double* hidden_values;
double* output_values;
double* expected_values;
// error
double* hidden_error;
double* output_error;
// bias
double* bias_h;
double* bias_o;
// weights
double** weights_ih;
double** weights_ho;
};
typedef struct sNetwork tNetwork;
为此我还写了一个设置函数:
tNetwork* setup_network(tNetwork* tNet)
{
// general error check
if(tNet == NULL)
{
return NULL;
}
if((*tNet).input_nodes == 0 || (*tNet).hidden_nodes == 0 || (*tNet).output_nodes == 0)
{
return NULL;
}
// based on the defined size, set up the weights
// set up the input to hidden weights
(*tNet).weights_ih = (double**)malloc((*tNet).input_nodes * sizeof(double*));
for(int i = 0; i < (*tNet).input_nodes; i++)
{
(*tNet).weights_ih[i] = (double*)malloc((*tNet).hidden_nodes * sizeof(double));
for(int j = 0; j < (*tNet).hidden_nodes; j++)
{
(*tNet).weights_ih[i][j] = GETRANDOM;
}
}
// set up the hidden to output weights
(*tNet).weights_ho = (double**)malloc((*tNet).hidden_nodes * sizeof(double*));
for(int i = 0; i < (*tNet).hidden_nodes; i++)
{
(*tNet).weights_ho[i] = (double*)malloc((*tNet).output_nodes * sizeof(double));
for(int j = 0; j < (*tNet).output_nodes; j++)
{
(*tNet).weights_ho[i][j] = GETRANDOM;
}
}
// set up the bias
// set up hidden bias and value
(*tNet).bias_h = (double*)malloc((*tNet).hidden_nodes * sizeof(double));
for(int i = 0; i < (*tNet).hidden_nodes; i++)
{
(*tNet).bias_h[i] = GETRANDOM;
}
// set up the output bias and value
(*tNet).bias_o = (double*)malloc((*tNet).output_nodes * sizeof(double));
for(int i = 0; i < (*tNet).output_nodes; i++)
{
(*tNet).bias_o[i] = GETRANDOM;
}
// set up the values
(*tNet).hidden_values = (double*)malloc((*tNet).hidden_nodes * sizeof(double));
(*tNet).output_values = (double*)malloc((*tNet).output_nodes * sizeof(double));
(*tNet).input_values = (double*)malloc((*tNet).input_nodes * sizeof(double));
(*tNet).expected_values = (double*)malloc((*tNet).output_nodes * sizeof(double));
// set up the error stuff
(*tNet).hidden_error = (double*)malloc((*tNet).hidden_nodes * sizeof(double));
(*tNet).output_error = (double*)malloc((*tNet).output_nodes * sizeof(double));
return tNet;
}
sigmoid 函数:
double sigmoid(double x)
{
return 1 / (1 + exp(-x));
}
double dsigmoid(double x)
{
return x * (1 - (x));
}
然后我编写了前馈函数:
tNetwork* feed_forward(tNetwork* tNet)
{
// calculate the hidden outputs
for(int i = 0; i < (*tNet).hidden_nodes; i++)
{
(*tNet).hidden_values[i] = (*tNet).bias_h[i]; // add bias to weighted sum
for(int j = 0; j < (*tNet).input_nodes; j++)
{
(*tNet).hidden_values[i] += ( (*tNet).input_values[j] * (*tNet).weights_ih[j][i] ); // build the weighted sum
}
(*tNet).hidden_values[i] = sigmoid((*tNet).hidden_values[i]);
}
// calculate the output
for(int i = 0; i < (*tNet).output_nodes; i++)
{
(*tNet).output_values[i] = (*tNet).bias_o[i]; // add bias to weighted sum
for(int j = 0; j < (*tNet).hidden_nodes; j++)
{
(*tNet).output_values[i] += ( (*tNet).hidden_values[j] * (*tNet).weights_ho[j][i] ); // build the weighted sum
}
(*tNet).output_values[i] = sigmoid((*tNet).output_values[i]);
}
return tNet;
}
之后的train函数:
tNetwork* train(tNetwork* tNet, double learning_rate)
{
// first of all feed the network
tNet = feed_forward(tNet);
// init the hidden errors
for(int i = 0; i < (*tNet).hidden_nodes; i++)
{
(*tNet).hidden_error[i] = 0;
}
// calculate the output error
for(int i = 0; i < (*tNet).output_nodes; i++)
{
(*tNet).output_error[i] = (*tNet).expected_values[i] - (*tNet).output_values[i];
}
// calculate the hidden error
for(int i = 0; i < (*tNet).hidden_nodes; i++)
{
for(int j = 0; j < (*tNet).output_nodes; j++)
{
(*tNet).hidden_error[i] += ( (*tNet).weights_ho[i][j] * (*tNet).output_error[j] );
}
}
// adjust outputs
for(int i = 0; i < (*tNet).output_nodes; i++)
{
// adjust output bias
double gradient = learning_rate * (*tNet).output_error[i] * dsigmoid((*tNet).output_values[i]);
(*tNet).bias_o[i] += gradient;
for(int j = 0; j < (*tNet).hidden_nodes; j++)
{
// adjust hidden->output weights
(*tNet).weights_ho[j][i] += gradient * (*tNet).hidden_values[j];
}
}
// adjust hiddens
for(int j = 0; j < (*tNet).hidden_nodes; j++)
{
// adjust hidden bias
double gradient = learning_rate * (*tNet).hidden_error[j] * dsigmoid((*tNet).hidden_values[j]);
(*tNet).bias_h[j] += gradient;
for(int k = 0; k < (*tNet).input_nodes; k++)
{
// adjust input->hidden weights
(*tNet).weights_ih[k][j] += gradient * (*tNet).input_values[k];
}
}
return tNet;
}
最后,在我的主要功能中,我这样做了:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
int main(void)
{
// initialize
srand(time(NULL));
// create neural network
tNetwork* network = (tNetwork*)malloc(sizeof(tNetwork));
// set up the properties of the network and initialize it
network->input_nodes = 2;
network->hidden_nodes = 2;
network->output_nodes = 1;
network = setup_network(network);
// train
for(int i = 0; i < 50000; i++)
{
switch(rand() % 4)
{
case 0:
// train #1
network->input_values[0] = 0;
network->input_values[1] = 0;
network->expected_values[0] = 0;
network = train(network, 0.1);
break;
case 1:
// train #2
network->input_values[0] = 1;
network->input_values[1] = 0;
network->expected_values[0] = 1;
network = train(network, 0.1);
break;
case 2:
// train #3
network->input_values[0] = 0;
network->input_values[1] = 1;
network->expected_values[0] = 1;
network = train(network, 0.1);
break;
case 3:
// train #4
network->input_values[0] = 1;
network->input_values[1] = 1;
network->expected_values[0] = 0;
network = train(network, 0.1);
break;
default:
break;
}
}
// check the functionality
network->input_values[0] = 0;
network->input_values[1] = 0;
network = feed_forward(network);
printf("%f\n", network->output_values[0]);
network->input_values[0] = 0;
network->input_values[1] = 1;
network = feed_forward(network);
printf("%f\n", network->output_values[0]);
network->input_values[0] = 1;
network->input_values[1] = 0;
network = feed_forward(network);
printf("%f\n", network->output_values[0]);
network->input_values[0] = 1;
network->input_values[1] = 1;
network = feed_forward(network);
printf("%f\n", network->output_values[0]);
return 0;
}
如果有人真的读到这里,我印象深刻,如果发现并解释了任何错误,我非常感谢,提前感谢!!
【问题讨论】:
-
你好,我看了一下你的主要方法和训练好的模型输出......它可能很好。在大多数情况下,神经网络永远不会给出准确的答案,即它会给出概率。那么为什么不在最后(输出)层使用 softmax 激活呢?那么激活度最高的节点就是你的答案
-
在您当前的情况下,当输出神经元小于 0.5 时,表示为 0,否则表示为 1
-
@mettleap 您好,感谢您的回复。当然,我想试试。但首先,我必须弄清楚 softmax 到底是什么...... :D(如前所述,这是我写的第一个 nn-algo,但我真的想更深入地研究这个话题)
-
你能在第 100 次培训后给出输出吗?在一次训练中,网络必须看到所有可能的输入及其输出,然后从中学习。在您编写的当前训练课程中,网络只看到一个输入-输出对。也许再运行 for 循环一段时间可能会有所帮助。所以 5 个循环可能是不够的,因为 NN 可能永远不会看到某些情况
-
我回来了,睡了一觉,看看有没有新的答案。只是为了澄清一个可能的误解:我将一个培训课程定义为(在我的代码中)10000 次培训。而且我在多次运行程序时得到了不同的输出(每次训练 10000 次)。也许您将一次培训课程解释为一次网络培训?或者你的意思是我应该训练我的网络 100 x 10000 次?
标签: c neural-network