试图用Python编写我自己的神经网络

use*_*430 10 python machine-learning neural-network

上学期我参加了由Ng教授讲授的斯坦福大学的在线机器学习课程.http://www.ml-class.org/course/auth/welcome我认为它非常有用.为了更好地了解/理解神经网络,我尝试在python中编写自己的神经网络.这里是:

import numpy

class NN:

    def __init__(self, sl):

        #sl = number of units (not counting bias unit) in layer l
        self.sl = sl
        self.layers = len(sl)

        #Create weights
        self.weights = []
        for idx in range(1, self.layers):
            self.weights.append(numpy.matrix(numpy.random.rand(self.sl[idx-1]+1, self.sl[idx])/5))

        self.cost = []

    def update(self, input):

        if input.shape[1] != self.sl[0]:
            raise ValueError, 'The first layer must have a node for every feature'

        self.z = []
        self.a = []

        #Input activations.  I'm expecting inputs as numpy matrix (Examples x Featrues) 
        self.a.append(numpy.hstack((numpy.ones((input.shape[0], 1)), input)))#Set inputs ai + bias unit

        #Hidden activations
        for weight in self.weights:         
            self.z.append(self.a[-1]*weight)
            self.a.append(numpy.hstack((numpy.ones((self.z[-1].shape[0], 1)), numpy.tanh(self.z[-1])))) #tanh is a fancy sigmoid

        #Output activation
        self.a[-1] = self.z[-1] #Not logistic regression thus no sigmoid function
        del self.z[-1]

    def backPropagate(self, targets, lamda):

        m = float(targets.shape[0]) #m is number of examples

        #Calculate cost
        Cost = -1/m*sum(numpy.power(self.a[-1] - targets, 2))
        for weight in self.weights:
            Cost = Cost + lamda/(2*m)*numpy.power(weight[1:, :], 2).sum()
        self.cost.append(abs(float(Cost)))

        #Calculate error for each layer
        delta = []
        delta.append(self.a[-1] - targets)
        for idx in range(1, self.layers-1): #No delta for the input layer because it is the input
            weight = self.weights[-idx][1:, :] #Ignore bias unit
            dsigmoid = numpy.multiply(self.a[-(idx+1)][:,1:], 1-self.a[-(idx+1)][:,1:]) #dsigmoid is a(l).*(1-a(l))
            delta.append(numpy.multiply(delta[-1]*weight.T, dsigmoid)) #Ignore Regularization

        Delta = []
        for idx in range(self.layers-1):
            Delta.append(self.a[idx].T*delta[-(idx+1)])

        self.weight_gradient = []
        for idx in range(len(Delta)):
            self.weight_gradient.append(numpy.nan_to_num(1/m*Delta[idx] + numpy.vstack((numpy.zeros((1, self.weights[idx].shape[1])), lamda/m*self.weights[idx][1:, :]))))

    def train(self, input, targets, alpha, lamda, iterations = 1000):

        #alpha: learning rate
        #lamda: regularization term

        for i in range(iterations):
            self.update(input)
            self.backPropagate(targets, lamda)
            self.weights = [self.weights[idx] - alpha*self.weight_gradient[idx] for idx in range(len(self.weights))]

    def predict(self, input):

        self.update(input)
        return self.a[-1]
Run Code Online (Sandbox Code Playgroud)

但它不起作用=(.检查成本与迭代我可以看到成本中的一个小点,A的预测都是一样的.有人能帮助我理解为什么我的神经网络没有收敛吗?

谢谢,抱歉代码的数量(也许有人会发现它很有用).

更新:

我没有使用随机数据,而是从UCI机器学习库获得了一些结构化数据.特定数据集是葡萄牙东北地区森林火灾的烧毁区域,使用气象和其他数据:http://archive.ics.uci.edu/ml/datasets/Forest+Fires我修改了数据,以便天数和月数是数字:https://docs.google.com/spreadsheet/ccc?key = 0Am3oTptaLsExdC1PeXl1eTczRnRNejl3QUo5RjNLVVE

data = numpy.loadtxt(open('FF-data.csv', 'rb'), delimiter = ',', skiprows = 1)
features = data[:,0:11]
targets = numpy.matrix(data[:,12]).T

nfeatures = (features-features.mean(axis=0))/features.std(axis=0)

n = NN([11, 10, 1]) #The class takes the list of how many nodes in each layer
n.train(nfeatures, targets, 0.003, 0.0)

import matplotlib.pyplot
matplotlib.pyplot.subplot(221)
matplotlib.pyplot.plot(n.cost)
matplotlib.pyplot.title('Cost vs. Iteration')

matplotlib.pyplot.subplot(222)
matplotlib.pyplot.scatter(n.predict(nfeatures), targets)
matplotlib.pyplot.title('Data vs. Predicted')

matplotlib.pyplot.savefig('Report.png', format = 'png')
matplotlib.pyplot.close()
Run Code Online (Sandbox Code Playgroud)

为什么成本低于4000左右,为什么Data Vs. 预测没有任何趋势?您可以在此处查看图表:https://docs.google.com/open?id = 0B23oTptaLsExMTQ0OTAxNWEtYjE2NS00MjA5LTg1MjMtNDBhYjVmMTFhZDhm

Son*_*nya 7

(对不起,我没有足够的代表来添加评论,所以我会继续发布答案.)

是的,看起来确实很奇怪.但是,如果在训练后生成新的矩阵B:

B = numpy.random.rand(5, 4)/5
Targets = B*X
print n.predict(B)
print B*X
Run Code Online (Sandbox Code Playgroud)

它会正常工作(大部分时间 - 有时它仍会给出平均值(目标)作为答案).注意:我在使用100个功能时只使用了4个功能.

另外,我认为在数据集的50个元素上运行5000次迭代对你没什么好处.您通常应尽量使用尽可能多的训练数据 - 在这里您可以根据需要使用,但您使用的示例甚至比您的功能更少.

这很有趣,我会考虑更多:)我正在使用你的网络一个更简单的例子 - 因为输入我提供了两个数字,并期望它们的总和作为输出.它的工作或多或少都没问题.


use*_*430 4

由于多种原因,神经网络无法对森林火灾数据进行训练https://docs.google.com/spreadsheet/ccc?key=0Am3oTptaLsExdC1PeXl1eTczRnRNejl3QUo5RjNLVVE 。

首先,numpy.tanh() sigmoid 函数的行为不符合预期。代码应从以下位置更改:

self.a.append(numpy.hstack((numpy.ones((self.z[-1].shape[0], 1)),numpy.tanh(self.z[-1])))) #tanh is a fancy sigmoid
Run Code Online (Sandbox Code Playgroud)

到:

self.a.append(numpy.hstack((numpy.ones((self.z[-1].shape[0], 1)), 1/(1+numpy.exp(-self.z[-1])))))
Run Code Online (Sandbox Code Playgroud)

第二个 numpy 和 matplotlib 表现不佳。numpy 矩阵似乎是向后绘制的。这可以通过使用matrix.tolist()来修复。代码更改自:

matplotlib.pyplot.scatter(n.predict(nfeatures), targets)
Run Code Online (Sandbox Code Playgroud)

到:

matplotlib.pyplot.scatter(n.predict(nfeatures).tolist(), targets.tolist())
Run Code Online (Sandbox Code Playgroud)

最后,节点数量应约为示例大小的 10%。使用 50 个节点而不是 10 个节点会更好。

下面发布了工作神经网络代码,其中包含一个新函数 autoparam,它试图找到最佳学习率和正则化常数。您可以在此处查看森林火灾成本与迭代以及数据与预测的图表:https://docs.google.com/open ?id=0B23oTptaLsExMWQ4ZWM1ODYtZDMzMC00M2VkLWI1OWUtYzg3NzgxNWYyMTIy

谢谢阅读!我希望我的神经网络可以帮助人们。

import numpy

class NN:

    def __init__(self, sl):

        #sl = number of units (not counting bias unit) in layer l
        self.sl = sl
        self.layers = len(sl)

        #Create weights
        self.weights = []
        for idx in range(1, self.layers):
            self.weights.append(numpy.matrix(numpy.random.rand(self.sl[idx-1]+1, self.sl[idx]))/5)

        self.cost = []

    def update(self, input):

        if input.shape[1] != self.sl[0]:
            raise ValueError, 'The first layer must have a node for every feature'

        self.z = []
        self.a = []

        #Input activations.  Expected inputs as numpy matrix (Examples x Featrues) 
        self.a.append(numpy.hstack((numpy.ones((input.shape[0], 1)), input)))#Set inputs ai + bias unit

        #Hidden activations
        for weight in self.weights: 
            self.z.append(self.a[-1]*weight)
            self.a.append(numpy.hstack((numpy.ones((self.z[-1].shape[0], 1)), 1/(1+numpy.exp(-self.z[-1]))))) #sigmoid

        #Output activation
        self.a[-1] = self.z[-1] #Not logistic regression thus no sigmoid function
        del self.z[-1]

    def backPropagate(self, targets, lamda):

        m = float(targets.shape[0]) #m is number of examples

        #Calculate cost
        Cost = -1/m*sum(numpy.power(self.a[-1] - targets, 2))
        for weight in self.weights:
            Cost = Cost + lamda/(2*m)*numpy.power(weight[1:, :], 2).sum()
        self.cost.append(abs(float(Cost)))

        #Calculate error for each layer
        delta = []
        delta.append(self.a[-1] - targets)
        for idx in range(1, self.layers-1): #No delta for the input layer because it is the input
            weight = self.weights[-idx][1:, :] #Ignore bias unit
            dsigmoid = numpy.multiply(self.a[-(idx+1)][:,1:], 1-self.a[-(idx+1)][:,1:]) #dsigmoid is a(l).*(1-a(l))
            delta.append(numpy.multiply(delta[-1]*weight.T, dsigmoid)) #Ignore Regularization

        Delta = []
        for idx in range(self.layers-1):
            Delta.append(self.a[idx].T*delta[-(idx+1)])

        self.weight_gradient = []
        for idx in range(len(Delta)):
            self.weight_gradient.append(numpy.nan_to_num(1/m*Delta[idx] + numpy.vstack((numpy.zeros((1, self.weights[idx].shape[1])), lamda/m*self.weights[idx][1:, :]))))

    def train(self, input, targets, alpha, lamda, iterations = 1000):

        #alpha: learning rate
        #lamda: regularization term

        for i in range(iterations):
            self.update(input)
            self.backPropagate(targets, lamda)
            self.weights = [self.weights[idx] - alpha*self.weight_gradient[idx] for idx in range(len(self.weights))]

    def autoparam(self, data, alpha = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3], lamda = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]):

        #data: numpy matrix with targets in last column
        #alpha: learning rate
        #lamda: regularization term

        #Create training, cross validation, and test sets
        while 1:
            try:
                numpy.seterr(invalid = 'raise')
                numpy.random.shuffle(data) #Shuffle data
                training_set = data[0:data.shape[0]/10*6, 0:-1]
                self.ntraining_set = (training_set-training_set.mean(axis=0))/training_set.std(axis=0)
                self.training_tgt = numpy.matrix(data[0:data.shape[0]/10*6, -1]).T

                cv_set = data[data.shape[0]/10*6:data.shape[0]/10*8, 0:-1]
                self.ncv_set = (cv_set-cv_set.mean(axis=0))/cv_set.std(axis=0)
                self.cv_tgt = numpy.matrix(data[data.shape[0]/10*6:data.shape[0]/10*8, -1]).T

                test_set = data[data.shape[0]/10*8:, 0:-1]
                self.ntest_set = (test_set-test_set.mean(axis=0))/test_set.std(axis=0)
                self.test_tgt = numpy.matrix(data[data.shape[0]/10*8:, -1]).T

                break

            except FloatingPointError:
                pass

        numpy.seterr(invalid = 'warn')
        cost = 999999
        for i in alpha:
            for j in lamda:
                self.__init__(self.sl)
                self.train(self.ntraining_set, self.training_tgt, i, j, 2000)
                current_cost = 1/float(cv_set.shape[0])*sum(numpy.square(self.predict(self.ncv_set) - self.cv_tgt)).tolist()[0][0]
                print current_cost
                if current_cost < cost:
                    cost = current_cost
                    self.learning_rate = i
                    self.regularization = j
        self.__init__(self.sl)

    def predict(self, input):

        self.update(input)
        return self.a[-1]
Run Code Online (Sandbox Code Playgroud)

加载数据、绘图等...

data = numpy.loadtxt(open('FF-data.csv', 'rb'), delimiter = ',', skiprows = 1)#Load
numpy.random.shuffle(data)

features = data[:,0:11]
nfeatures = (features-features.mean(axis=0))/features.std(axis=0)
targets = numpy.matrix(data[:, 12]).T

n = NN([11, 50, 1])

n.train(nfeatures, targets, 0.07, 0.0, 2000)

import matplotlib.pyplot
matplotlib.pyplot.subplot(221)
matplotlib.pyplot.plot(n.cost)
matplotlib.pyplot.title('Cost vs. Iteration')

matplotlib.pyplot.subplot(222)
matplotlib.pyplot.scatter(n.predict(nfeatures).tolist(), targets.tolist())
matplotlib.pyplot.plot(targets.tolist(), targets.tolist(), c = 'r')
matplotlib.pyplot.title('Data vs. Predicted')

matplotlib.pyplot.savefig('Report.png', format = 'png')
matplotlib.pyplot.close()
Run Code Online (Sandbox Code Playgroud)