- #1
Superposed_Cat
- 388
- 5
I am trying for the first time in 5 years, to make a n- layer, m-width backprop neural network from scratch, My issue is, I've tried training it on XOR, where it returns 0.48 for any of the inputs instead of 1 for half of them and 0 for the other half, if you give it a dataset where the outputs range from 0.6 to 0.99 with the average being 0.7, it will return 0.7 for all, I feel like my loop structure and math is correct, I'm failing to see my problem, any help appreciated.
full code: https://www.mediafire.com/file/f58ia4kj4hmhz99/ConsoleApp3.rar/file
output:
enter image description here
backprop:
feedforward:
training:
initialization:
Any help appreciated.
full code: https://www.mediafire.com/file/f58ia4kj4hmhz99/ConsoleApp3.rar/file
output:
enter image description here
backprop:
C#:
public void bp(double[] x)
{
for (int i = 0; i < outputs.Length; i++)
{
outputs[i].e = -(outputs[i].a - x[i]) *
sig_dx(outputs[i].a);
for (int j = 0; j < width; j++)
{
outputs[i].w[j] += outputs[i].e * n[n.GetLength(0) - 1, j].a;
}
}
for (int j = 0; j < width; j++)
{
double sum = 0;
for (int k = 0; k < outputs.Length; k++)
{
sum += outputs[k].e * sig_dx(n[n.GetLength(0)-1,j].a) *
outputs[k].w[j];
}
n[n.GetLength(0)-1, j].e = sum;
}
/*for (int i = layers - 1; i > 0; i--)
{
for (int j = 0; j < width; j++)
{
for (int k = 0; k < width; k++)
{
n[i, j].w[k] += n[i, j].e //* sig_dx(n[i, j].a)
* n[i - 1, k].a;
}
}
}*/
for (int i = layers - 2; i >= 0; i--)
{
for (int j = 0; j < width; j++)
{
double sum = 0;
for (int k = 0; k < width; k++)
{
sum += n[i + 1, k].e * sig_dx(n[i, j].a) *
n[i + 1, k].w[j];
}
n[i, j].e = sum;
}
}
//
for (int j = 0; j < width; j++)
{
double sum = 0;
for (int k = 0; k < width; k++)
{
sum += n[1, k].e * sig_dx(n[0, j].a) *
n[1, k].w[j];
}
n[0, j].e = sum;
}
for (int j = 0; j < width; j++)
{
for (int k = 0; k < inputs.Length; k++)
{
n[0, j].w[k] += n[0, j].e //* sig_dx(n[i, j].a)
* inputs[k];
}
}
}
C#:
public void ff(double[] x)
{
inputs = x;
for (int j = 0; j < width; j++)
{
double sum = 0;
for (int k = 0; k < x.Length; k++)
{
sum += n[0, j].w[k] * x[k];
}
n[0, j].a = sig(sum);
}
for (int i = 1; i < layers; i++)
{
for(int j = 0; j < width; j++)
{
double sum = 0;
for(int k = 0; k < width; k++)
{
sum += n[i, j].w[k] * n[i - 1, k].a;
}
n[i, j].a = sig(sum);
}
}
for (int j = 0; j < outputs.Length; j++)
{
double sum = 0;
for (int k = 0; k < width; k++)
{
sum += n[n.GetLength(0)-1, k].a * outputs[j].w[k];
}
outputs[j].a = sig(sum);
}
}
C#:
var data2 =
new double[][][] {
new double[][]{new double[] { 0, 0 }, new double[] { 0 } },
new double[][]{new double[] { 1, 0 }, new double[] { 1 } },
new double[][]{new double[] { 0, 1 }, new double[] { 1 } },
new double[][]{new double[] { 1, 1 }, new double[] { 0 } }
};
net n = new net(2, 1, 4, 3);
for (int t = 0; t < 1000; t++)
{
for (int i = 0; i < data2.Length; i++)
{
n.ff(data2[i][0]);
n.bp(data2[i][1]);
}
}
Console.WriteLine("done");
for (int i = 0; i < data2.Length; i++)
{
n.ff(data2[i][0]);//new double[] { d,1 });
Console.WriteLine(n.outputs[0].a);
}
C#:
public class node
{
public double a;
public double e;
public double[] w;
Random r = new Random();
public node(int pl)
{
a = 0;
e = 10;
w = new double[pl];
for(int i = 0; i < pl; i++)
{
w[i] = r.NextDouble();
}
}
}
public class net
{
public node[,] n;
public node[] outputs;
double[] inputs;
int layers;
int width;
public net(int inp,int outp,int layers,int width)
{
this.width = width;
this.layers= layers;
outputs = new node[outp];
for(int i = 0; i < outp; i++)
{
outputs[i] = new node(width);
}
n = new node[layers,width];
for (int j = 0; j < width; j++)
{
n[0, j] = new node(inp);
}
for (int i = 1; i < layers; i++)
{
for(int j = 0; j < width; j++)
{
n[i, j] = new node(width);
}
}
}
double sig(double x)
{
return 1.0 / (1.0 + Math.Exp(-x));
}
double sig_dx(double x)
{
return x * (1.0 - x);
}