cra*_*s84 5 c# backpropagation neural-network
2014年6月6日更新:我已经更新了问题,以便我尝试解决非线性方程.正如你们许多人所指出的,我不需要额外的复杂性(隐藏层,sigmoid函数等)来解决非线性问题.
此外,我意识到除了神经网络之外,我甚至可以使用其他方法来解决这样的非线性问题.我不是要编写最有效的代码或最少量的代码.这纯粹是为了让我更好地学习神经网络.
我已经创建了自己的反向传播神经网络实现.
在训练解决简单的XOR操作时,它工作正常.
但是现在我想调整它并训练它来解决Y = X*X + B类型的公式,但是我没有得到预期的结果.训练后,网络无法计算出正确的答案.神经网络是否非常适合解决这样的代数方程?我意识到我的例子是微不足道的我只是想了解更多关于神经网络及其能力的知识.
我的隐藏层使用sigmoid激活函数,我的输出层使用身份函数.
如果你能分析我的代码并指出任何错误,我将不胜感激.
这是我的完整代码(C#.NET):
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace NeuralNetwork
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Training Network...");
Random r = new Random();
var network = new NeuralNetwork(1, 5, 1);
for (int i = 0; i < 100000; i++)
{
int x = i % 15;
int y = x * x + 10;
network.Train(x);
network.BackPropagate(y);
}
//Below should output 20, but instead outputs garbage
Console.WriteLine("0 * 0 + 10 = " + network.Compute(0)[0]);
//Below should output 110, but instead outputs garbage
Console.WriteLine("10 * 10 + 10 = " + network.Compute(10)[0]);
//Below should output 410, but instead outputs garbage
Console.WriteLine("20 * 20 + 10 = " + network.Compute(20)[0]);
}
}
public class NeuralNetwork
{
public double LearnRate { get; set; }
public double Momentum { get; set; }
public List<Neuron> InputLayer { get; set; }
public List<Neuron> HiddenLayer { get; set; }
public List<Neuron> OutputLayer { get; set; }
static Random random = new Random();
public NeuralNetwork(int inputSize, int hiddenSize, int outputSize)
{
LearnRate = .9;
Momentum = .04;
InputLayer = new List<Neuron>();
HiddenLayer = new List<Neuron>();
OutputLayer = new List<Neuron>();
for (int i = 0; i < inputSize; i++)
InputLayer.Add(new Neuron());
for (int i = 0; i < hiddenSize; i++)
HiddenLayer.Add(new Neuron(InputLayer));
for (int i = 0; i < outputSize; i++)
OutputLayer.Add(new Neuron(HiddenLayer));
}
public void Train(params double[] inputs)
{
int i = 0;
InputLayer.ForEach(a => a.Value = inputs[i++]);
HiddenLayer.ForEach(a => a.CalculateValue());
OutputLayer.ForEach(a => a.CalculateValue());
}
public double[] Compute(params double[] inputs)
{
Train(inputs);
return OutputLayer.Select(a => a.Value).ToArray();
}
public double CalculateError(params double[] targets)
{
int i = 0;
return OutputLayer.Sum(a => Math.Abs(a.CalculateError(targets[i++])));
}
public void BackPropagate(params double[] targets)
{
int i = 0;
OutputLayer.ForEach(a => a.CalculateGradient(targets[i++]));
HiddenLayer.ForEach(a => a.CalculateGradient());
HiddenLayer.ForEach(a => a.UpdateWeights(LearnRate, Momentum));
OutputLayer.ForEach(a => a.UpdateWeights(LearnRate, Momentum));
}
public static double NextRandom()
{
return 2 * random.NextDouble() - 1;
}
public static double SigmoidFunction(double x)
{
if (x < -45.0) return 0.0;
else if (x > 45.0) return 1.0;
return 1.0 / (1.0 + Math.Exp(-x));
}
public static double SigmoidDerivative(double f)
{
return f * (1 - f);
}
public static double HyperTanFunction(double x)
{
if (x < -10.0) return -1.0;
else if (x > 10.0) return 1.0;
else return Math.Tanh(x);
}
public static double HyperTanDerivative(double f)
{
return (1 - f) * (1 + f);
}
public static double IdentityFunction(double x)
{
return x;
}
public static double IdentityDerivative()
{
return 1;
}
}
public class Neuron
{
public bool IsInput { get { return InputSynapses.Count == 0; } }
public bool IsHidden { get { return InputSynapses.Count != 0 && OutputSynapses.Count != 0; } }
public bool IsOutput { get { return OutputSynapses.Count == 0; } }
public List<Synapse> InputSynapses { get; set; }
public List<Synapse> OutputSynapses { get; set; }
public double Bias { get; set; }
public double BiasDelta { get; set; }
public double Gradient { get; set; }
public double Value { get; set; }
public Neuron()
{
InputSynapses = new List<Synapse>();
OutputSynapses = new List<Synapse>();
Bias = NeuralNetwork.NextRandom();
}
public Neuron(List<Neuron> inputNeurons) : this()
{
foreach (var inputNeuron in inputNeurons)
{
var synapse = new Synapse(inputNeuron, this);
inputNeuron.OutputSynapses.Add(synapse);
InputSynapses.Add(synapse);
}
}
public virtual double CalculateValue()
{
var d = InputSynapses.Sum(a => a.Weight * a.InputNeuron.Value) + Bias;
return Value = IsHidden ? NeuralNetwork.SigmoidFunction(d) : NeuralNetwork.IdentityFunction(d);
}
public virtual double CalculateDerivative()
{
var d = Value;
return IsHidden ? NeuralNetwork.SigmoidDerivative(d) : NeuralNetwork.IdentityDerivative();
}
public double CalculateError(double target)
{
return target - Value;
}
public double CalculateGradient(double target)
{
return Gradient = CalculateError(target) * CalculateDerivative();
}
public double CalculateGradient()
{
return Gradient = OutputSynapses.Sum(a => a.OutputNeuron.Gradient * a.Weight) * CalculateDerivative();
}
public void UpdateWeights(double learnRate, double momentum)
{
var prevDelta = BiasDelta;
BiasDelta = learnRate * Gradient; // * 1
Bias += BiasDelta + momentum * prevDelta;
foreach (var s in InputSynapses)
{
prevDelta = s.WeightDelta;
s.WeightDelta = learnRate * Gradient * s.InputNeuron.Value;
s.Weight += s.WeightDelta + momentum * prevDelta;
}
}
}
public class Synapse
{
public Neuron InputNeuron { get; set; }
public Neuron OutputNeuron { get; set; }
public double Weight { get; set; }
public double WeightDelta { get; set; }
public Synapse(Neuron inputNeuron, Neuron outputNeuron)
{
InputNeuron = inputNeuron;
OutputNeuron = outputNeuron;
Weight = NeuralNetwork.NextRandom();
}
}
}
Run Code Online (Sandbox Code Playgroud)
您使用 sigmoid 作为输出函数,其范围在 [0-1]\n但您的目标值是范围的两倍 [ 0 - MAX_INT ],我认为这是您获得 NAN\xe3\x80\x82 的基本原因\ni 更新您的代码,并尝试规范化 [0-1] 范围内的值,\n我可以得到像这样的输出,这正是我所期望的
\n\n我想我已经接近真相\xef\xbc\x8c我不确定为什么这个答案被否决\n
using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\n\nnamespace NeuralNetwork\n{\n class Program\n {\n static void Main(string[] args)\n {\n Console.WriteLine("Training Network...");\n\n Random r = new Random();\n var network = new NeuralNetwork(1, 3, 1);\n for (int k = 0; k < 60; k++)\n {\n for (int i = 0; i < 1000; i++)\n {\n double x = i / 1000.0;// r.Next();\n double y = 3 * x;\n network.Train(x);\n network.BackPropagate(y);\n }\n double output = network.Compute(0.2)[0];\n Console.WriteLine(output);\n }\n //Below should output 10, but instead outputs either a very large number or NaN\n /* double output = network.Compute(3)[0];\n Console.WriteLine(output);*/\n }\n }\n\n public class NeuralNetwork\n {\n public double LearnRate { get; set; }\n public double Momentum { get; set; }\n public List<Neuron> InputLayer { get; set; }\n public List<Neuron> HiddenLayer { get; set; }\n public List<Neuron> OutputLayer { get; set; }\n static Random random = new Random();\n\n public NeuralNetwork(int inputSize, int hiddenSize, int outputSize)\n {\n LearnRate = .2;\n Momentum = .04;\n InputLayer = new List<Neuron>();\n HiddenLayer = new List<Neuron>();\n OutputLayer = new List<Neuron>();\n\n for (int i = 0; i < inputSize; i++)\n InputLayer.Add(new Neuron());\n\n for (int i = 0; i < hiddenSize; i++)\n HiddenLayer.Add(new Neuron(InputLayer));\n\n for (int i = 0; i < outputSize; i++)\n OutputLayer.Add(new Neuron(HiddenLayer));\n }\n\n public void Train(params double[] inputs)\n {\n int i = 0;\n InputLayer.ForEach(a => a.Value = inputs[i++]);\n HiddenLayer.ForEach(a => a.CalculateValue());\n OutputLayer.ForEach(a => a.CalculateValue());\n }\n\n public double[] Compute(params double[] inputs)\n {\n Train(inputs);\n return OutputLayer.Select(a => a.Value).ToArray();\n }\n\n public double CalculateError(params double[] targets)\n {\n int i = 0;\n return OutputLayer.Sum(a => Math.Abs(a.CalculateError(targets[i++])));\n }\n\n public void BackPropagate(params double[] targets)\n {\n int i = 0;\n OutputLayer.ForEach(a => a.CalculateGradient(targets[i++]));\n HiddenLayer.ForEach(a => a.CalculateGradient());\n HiddenLayer.ForEach(a => a.UpdateWeights(LearnRate, Momentum));\n OutputLayer.ForEach(a => a.UpdateWeights(LearnRate, Momentum));\n }\n\n public static double NextRandom()\n {\n return 2 * random.NextDouble() - 1;\n }\n\n public static double SigmoidFunction(double x)\n {\n if (x < -45.0)\n {\n return 0.0;\n }\n else if (x > 45.0)\n {\n return 1.0;\n }\n return 1.0 / (1.0 + Math.Exp(-x));\n\n }\n\n public static double SigmoidDerivative(double f)\n {\n return f * (1 - f);\n }\n\n public static double HyperTanFunction(double x)\n {\n if (x < -10.0) return -1.0;\n else if (x > 10.0) return 1.0;\n else return Math.Tanh(x);\n }\n\n public static double HyperTanDerivative(double f)\n {\n return (1 - f) * (1 + f);\n }\n\n public static double IdentityFunction(double x)\n {\n return x;\n }\n\n public static double IdentityDerivative()\n {\n return 1;\n }\n }\n\n public class Neuron\n {\n public bool IsInput { get { return InputSynapses.Count == 0; } }\n public bool IsHidden { get { return InputSynapses.Count != 0 && OutputSynapses.Count != 0; } }\n public bool IsOutput { get { return OutputSynapses.Count == 0; } }\n public List<Synapse> InputSynapses { get; set; }\n public List<Synapse> OutputSynapses { get; set; }\n public double Bias { get; set; }\n public double BiasDelta { get; set; }\n public double Gradient { get; set; }\n public double Value { get; set; }\n\n public Neuron()\n {\n InputSynapses = new List<Synapse>();\n OutputSynapses = new List<Synapse>();\n Bias = NeuralNetwork.NextRandom();\n }\n\n public Neuron(List<Neuron> inputNeurons)\n : this()\n {\n foreach (var inputNeuron in inputNeurons)\n {\n var synapse = new Synapse(inputNeuron, this);\n inputNeuron.OutputSynapses.Add(synapse);\n InputSynapses.Add(synapse);\n }\n }\n\n public virtual double CalculateValue()\n {\n var d = InputSynapses.Sum(a => a.Weight * a.InputNeuron.Value);// + Bias;\n return Value = IsHidden ? NeuralNetwork.SigmoidFunction(d) : NeuralNetwork.IdentityFunction(d);\n }\n\n public virtual double CalculateDerivative()\n {\n var d = Value;\n return IsHidden ? NeuralNetwork.SigmoidDerivative(d) : NeuralNetwork.IdentityDerivative();\n }\n\n public double CalculateError(double target)\n {\n return target - Value;\n }\n\n public double CalculateGradient(double target)\n {\n return Gradient = CalculateError(target) * CalculateDerivative();\n }\n\n public double CalculateGradient()\n {\n return Gradient = OutputSynapses.Sum(a => a.OutputNeuron.Gradient * a.Weight) * CalculateDerivative();\n }\n\n public void UpdateWeights(double learnRate, double momentum)\n {\n var prevDelta = BiasDelta;\n BiasDelta = learnRate * Gradient; // * 1\n Bias += BiasDelta + momentum * prevDelta;\n\n foreach (var s in InputSynapses)\n {\n prevDelta = s.WeightDelta;\n s.WeightDelta = learnRate * Gradient * s.InputNeuron.Value;\n s.Weight += s.WeightDelta; //;+ momentum * prevDelta;\n }\n }\n }\n\n public class Synapse\n {\n public Neuron InputNeuron { get; set; }\n public Neuron OutputNeuron { get; set; }\n public double Weight { get; set; }\n public double WeightDelta { get; set; }\n\n public Synapse(Neuron inputNeuron, Neuron outputNeuron)\n {\n InputNeuron = inputNeuron;\n OutputNeuron = outputNeuron;\n Weight = NeuralNetwork.NextRandom();\n }\n }\n}\nRun Code Online (Sandbox Code Playgroud)\n