スタンフォードマシン学習コースニューラルネットワーク作業のPython実現

12402 ワード

最近スタンフォード大学の机械の学习の课程を学んで、自分でex 4の宿题をPythonで実现して、みんなに分かち合って参考にして学びます.改善提案を提出するか、一緒に討論することを歓迎します.
  • クラスパッケージ
  • を使用する.
  • は、パラメータによって多層hidden layers
  • を定義することができる
  • scipyのminimize関数を使用してthetaを計算する(パラメータを調整する方法が分からない複数のアルゴリズムを試みたが、BFGSアルゴリズムを使用してMemoryErrorを返す)
  • もし自分で循環を実現してminimize関数を適用しないならば効率は比較的に低いですか?これはまだテストをしていないで、もし誰がテストすることができるならば
  • に知らせます
    # -*- coding: utf-8 -*-
    import numpy as np
    import scipy.io as sio
    import scipy.optimize as op
    import threading
    import datetime
    
    class NerualNetwork():
        @property
        def ThetaSet(self):
            return self.__theta_set
        
        @property
        def ResultMsg(self):
            return self.__resultMsg
    
        def __init__(self):
            np.random.seed(1)
            self.trainingState = "Not Started"
            self.__inputs = np.array([])
            self.__outputs = np.array([])
            self.__num__labels = np.array([])
            self.__theta_set = []
            self.__regular = 0
            self.__iterations = 0
            self.__minimize_method = 'L-BFGS-B'
            self.__minimize_disp = False
            self.__labels = []
            self.__resultMsg = None
    
        def sigmoid(self, z):
            return 1 / (1 + np.exp(-z))
    
        def sigmoidGradient(self, z):
            ret = z * (1 - z)
            return ret
    
        def unrollTheta(self, theta_set):
            v = []
            for t in theta_set:
                v.extend(t.flatten().tolist())
            return np.array(v)
    
        def obtainTheta(self, theta):
            theta_set = []
            pos1 = 0
            pos2 = 0
            for t in self.__theta_set:
                pos2 += t.size
                tmp = theta[pos1 : pos2]
                tmp = tmp.reshape(t.shape)
                theta_set.append(tmp)
                pos1 = pos2
            return theta_set
    
        def costFunction(self, nn_params, inputs, outputs):
            # Set up parameters
            theta_set = self.obtainTheta(nn_params)
            m = np.size(inputs, 0)
            J = 0
            layers = len(theta_set)
            theta_grad = []
            for i in range(layers):
                theta_grad.append(np.zeros(theta_set[i].shape))
    
            # Forward propagation to calculate the cost
            aval = []
            aval.append(inputs)
            
            for i in range(layers):
                a = self.sigmoid(aval[-1].dot(theta_set[i].T))
                a = np.column_stack((np.ones((m, 1)), a))
                aval.append(a)
    
            
            # Calculate the cost
            jval = np.sum((-1) * outputs * np.log(aval[-1][:, 1:]) - (1 - outputs) * np.log(1 - aval[-1][:, 1:])) / m
            rval = 0
            for x in theta_set:
                rval += np.sum(np.power(x[:,1:],2))
            
            rval = rval * self.__regular/(2*m)
            J = jval + rval
    
            # Implement the backward propagation to compute the gradients
            errors = []
            deltas = []
            # errors in the backward order, i.e the last error is calculated firstly.
            for i in range(layers):
                # The last error = the last predicted values from forward propagation- outputs
                if i == 0:
                    errors.append(aval[-1][:,1:] - outputs)
                    deltas.append(errors[-1] * self.sigmoidGradient(aval[layers][:,1:]))
                else:
                    errors.append(errors[-1].dot(theta_set[layers-i][:,1:]))
                    deltas.append(errors[-1] * self.sigmoidGradient(aval[layers-i][:,1:]))
    
            # Note that the forward and backword lists are in reversed orders
            for i in range(layers):
                theta_grad[i] = deltas[layers-1-i].T.dot(aval[i])
    
            theta_regular = []
            for t in theta_set:
                theta_regular.append(np.column_stack((np.zeros((np.size(t,0),1)),t[:,1:])))
            
            for i in range(layers):
                theta_grad[i] = (1/m) * theta_grad[i] + (self.__regular * theta_regular[i])/m
    
            grad = self.unrollTheta(theta_grad)
            return (J, grad)
    
        def minimize(self, initial_theta):
            
            # See more options from https://docs.scipy.org/doc/scipy/reference/optimize.html
    
            opts = {'maxiter':self.__iterations, 'disp':self.__minimize_disp}
            if self.__minimize_method == "TNC":
                opts['maxCGit'] = 0
                opts['stepmx'] = 500
            elif self.__minimize_method == 'L-BFGS-B':
                opts['eps'] = 1e-8
            else:
                pass
    
            Result = op.minimize(fun = self.costFunction,
                         x0 = initial_theta,
                         args = (self.__inputs, self.__outputs),
                         method = self.__minimize_method,
                         jac = True,
                         options= opts)
    
            optimal_theta = Result.x;
    
            self.__resultMsg = "Iterations = " + str(Result.nit)
            self.__resultMsg += "
    " + str(Result.message) self.__theta_set = self.obtainTheta(optimal_theta); def train(self, training_set_inputs, training_set_outputs, number_of_labels, labels, theta_set, regular, training_iterations, minimize_method, minimize_disp): self.__inputs = training_set_inputs self.__outputs = training_set_outputs self.__num__labels = number_of_labels self.__labels = labels self.__theta_set = theta_set self.__regular = regular self.__iterations = training_iterations self.__minimize_method = minimize_method self.__minimize_disp = minimize_disp if (self.__checkInput()): self.trainingState = "Training" print(".....Initialize theta with random weights between(-1, 1)") for i in range(len(self.__theta_set)): rd = 2 * np.random.random(self.__theta_set[i].shape) - 1 self.__theta_set[i] = rd initial_theta = self.unrollTheta(self.__theta_set) print(".....Minimize the cost function in a new thread.") # This line is for test J,g = self.costFunction(initial_theta, self.__inputs, self.__outputs) print("Cost = %f" % J) t = threading.Thread(target=self.minimize, args=(initial_theta,)) t.setDaemon(True) t.start() t.join() self.trainingState = "Done"; else: print(".....Change the parameters and try again.") def predict(self, predict_input): h = None m = np.size(predict_input,0) for t in self.__theta_set: if (h is None): h = self.sigmoid(predict_input.dot(t.T)) else: h = np.column_stack((np.ones((m,1)),h)) h = self.sigmoid(h.dot(t.T)) label = np.zeros((m,1)) -1 for i in range(m): label[i] = self.__labels[np.nanargmax(h[i,:])] return label def getAccuracy(self, inputs, outputs): acc = 0.0 if (self.__checkInput()): m = np.size(inputs,0) h=None for t in self.__theta_set: if (h is None): h = self.sigmoid(inputs.dot(t.T)) else: h = np.column_stack((np.ones((m,1)),h)) h = self.sigmoid(h.dot(t.T)) check = (np.argmax(h,1) == np.argmax(outputs,1)) acc = np.sum(check)/np.size(check,0) else: print("Cannot to calculate the accuracy.") return acc def printInfo(self): print(".....Inputs size:" + str(self.__inputs.shape)) print(".....Outputs size:" + str(self.__outputs.shape)) print(".....Labels:" + str(self.__labels)) print(".....Number of layers:" + str(np.size(self.__theta_set, 0))) print(".....Theta Set:
    " + str.join("
    ", [str(t.shape) for t in self.__theta_set])) print(".....Regularization parameter:" + str(self.__regular)) print(".....Iterations:" + str(self.__iterations)) if (self.trainingState == "Done"): print(".....Training Result:
    " + str(self.ResultMsg) + "
    Accuracy: " + str(self.getAccuracy(self.__inputs, self.__outputs)) ) else: print(".....Training State:" + self.trainingState) def __checkInput(self): flag = False if (self.__inputs.size < 4 or self.__inputs.ndim != 2): # Error 1001 self.__reportError(1001) elif (self.__outputs.ndim != 2 or np.size(self.__outputs, 1) != self.__num__labels or np.size(self.__outputs, 0) != np.size(self.__inputs, 0)): # Error 1002 self.__reportError(1002) elif (self.__num__labels != len(self.__labels) or len(self.__labels) < 3 or len(self.__labels) != len(list(set(self.__labels)))): # Error 1003 self.__reportError(1003) elif (len(self.__theta_set) == 0 or np.size(self.__theta_set[0], 1) != np.size(self.__inputs, 1) or bool(sum([ np.size(self.__theta_set[t], 0) != (np.size(self.__theta_set[t + 1], 1) - 1) for t in range(len(self.__theta_set) - 1) ])) or np.size(self.__theta_set[-1], 0) != np.size(self.__outputs, 1)): # Error 1004 self.__reportError(1004) elif (self.__regular is None or self.__iterations < 3): # Error 1005 self.__reportError(1005) else: flag = True return flag def __reportError(self, errorCode, msg = ""): errors = { 1001: "Inputs should be a non-empty 2-D array.", 1002: "Outputs should be a 2-D array with len(Y) = label_size, \ and len(X)=len(Inputs), and all values are 0 or 1 \ (only one item=1).", 1003: "Values in the Labels array should be unique and \ Len(Labels)>2 ", 1004: "For the first theta size(thata(0),1)=size(inputs,1), \ size(thata(i),0) = size(thata(i+1),1)+1, \ for the last one size(theta(end),0)=Len(labels)", 1005: "Regularization should not be empty and \ training iterations should larger than 2." } try: print(".....%d: %s
    \t %s
    " % (errorCode, errors[errorCode], msg)) except KeyError as e: print(".....Error code is undefined.") # Main if __name__ == '__main__': # Initialize training set inputs and outputs matfn = "D:\\MachineLearning\\machine-learning-ex4\\ex4\\ex4data1.mat" data = sio.loadmat(matfn) inputs = data["X"] y = data["y"] m = np.size(inputs, 0) inputs = np.column_stack((np.ones((m, 1)), inputs)) labels = list(set(y.flatten())) labels_size = len(labels) outputs = np.zeros((np.size(inputs, 0), labels_size)) for i in range(0, m): outputs[i, labels.index(y[i, 0])] = 1 # Initialize theta set (2 layers) # layer1_size = np.size(inputs, 1) # layer2_size = 25 # theta2 = np.zeros((labels_size, layer2_size + 1)) # theta1 = np.zeros((layer2_size, layer1_size)) # theta_set = [theta1, theta2] # Initialize theta set (3 layers) layer1_size = np.size(inputs, 1) layer2_size = 80 layer3_size = 25 theta3 = np.zeros((labels_size, layer3_size + 1)) theta2 = np.zeros((layer3_size, layer2_size + 1)) theta1 = np.zeros((layer2_size, layer1_size)) theta_set = [theta1, theta2, theta3] # Set pramaters iterations = 100 regular = 1 # minimize_method = 'TNC' # Fastest minimize_method = 'L-BFGS-B' # Fastest minimize_disp = False # Train the data set t_start = datetime.datetime.now() nerualNetwork = NerualNetwork() nerualNetwork.train(inputs, outputs, labels_size, labels, theta_set, regular, iterations, minimize_method, minimize_disp) # Print result nerualNetwork.printInfo() t_end = datetime.datetime.now() print("Total seconds: %f" % (t_end -t_start).total_seconds()) # Test result for 3 layers # Accuracy: 0.9826 # Total seconds: 6.638665