スタンフォードマシン学習コースニューラルネットワーク作業のPython実現
12402 ワード
最近スタンフォード大学の机械の学习の课程を学んで、自分でex 4の宿题をPythonで実现して、みんなに分かち合って参考にして学びます.改善提案を提出するか、一緒に討論することを歓迎します.クラスパッケージ を使用する.は、パラメータによって多層hidden layers を定義することができる scipyのminimize関数を使用してthetaを計算する(パラメータを調整する方法が分からない複数のアルゴリズムを試みたが、BFGSアルゴリズムを使用してMemoryErrorを返す) もし自分で循環を実現してminimize関数を適用しないならば効率は比較的に低いですか?これはまだテストをしていないで、もし誰がテストすることができるならば に知らせます
# -*- coding: utf-8 -*-
import numpy as np
import scipy.io as sio
import scipy.optimize as op
import threading
import datetime
class NerualNetwork():
@property
def ThetaSet(self):
return self.__theta_set
@property
def ResultMsg(self):
return self.__resultMsg
def __init__(self):
np.random.seed(1)
self.trainingState = "Not Started"
self.__inputs = np.array([])
self.__outputs = np.array([])
self.__num__labels = np.array([])
self.__theta_set = []
self.__regular = 0
self.__iterations = 0
self.__minimize_method = 'L-BFGS-B'
self.__minimize_disp = False
self.__labels = []
self.__resultMsg = None
def sigmoid(self, z):
return 1 / (1 + np.exp(-z))
def sigmoidGradient(self, z):
ret = z * (1 - z)
return ret
def unrollTheta(self, theta_set):
v = []
for t in theta_set:
v.extend(t.flatten().tolist())
return np.array(v)
def obtainTheta(self, theta):
theta_set = []
pos1 = 0
pos2 = 0
for t in self.__theta_set:
pos2 += t.size
tmp = theta[pos1 : pos2]
tmp = tmp.reshape(t.shape)
theta_set.append(tmp)
pos1 = pos2
return theta_set
def costFunction(self, nn_params, inputs, outputs):
# Set up parameters
theta_set = self.obtainTheta(nn_params)
m = np.size(inputs, 0)
J = 0
layers = len(theta_set)
theta_grad = []
for i in range(layers):
theta_grad.append(np.zeros(theta_set[i].shape))
# Forward propagation to calculate the cost
aval = []
aval.append(inputs)
for i in range(layers):
a = self.sigmoid(aval[-1].dot(theta_set[i].T))
a = np.column_stack((np.ones((m, 1)), a))
aval.append(a)
# Calculate the cost
jval = np.sum((-1) * outputs * np.log(aval[-1][:, 1:]) - (1 - outputs) * np.log(1 - aval[-1][:, 1:])) / m
rval = 0
for x in theta_set:
rval += np.sum(np.power(x[:,1:],2))
rval = rval * self.__regular/(2*m)
J = jval + rval
# Implement the backward propagation to compute the gradients
errors = []
deltas = []
# errors in the backward order, i.e the last error is calculated firstly.
for i in range(layers):
# The last error = the last predicted values from forward propagation- outputs
if i == 0:
errors.append(aval[-1][:,1:] - outputs)
deltas.append(errors[-1] * self.sigmoidGradient(aval[layers][:,1:]))
else:
errors.append(errors[-1].dot(theta_set[layers-i][:,1:]))
deltas.append(errors[-1] * self.sigmoidGradient(aval[layers-i][:,1:]))
# Note that the forward and backword lists are in reversed orders
for i in range(layers):
theta_grad[i] = deltas[layers-1-i].T.dot(aval[i])
theta_regular = []
for t in theta_set:
theta_regular.append(np.column_stack((np.zeros((np.size(t,0),1)),t[:,1:])))
for i in range(layers):
theta_grad[i] = (1/m) * theta_grad[i] + (self.__regular * theta_regular[i])/m
grad = self.unrollTheta(theta_grad)
return (J, grad)
def minimize(self, initial_theta):
# See more options from https://docs.scipy.org/doc/scipy/reference/optimize.html
opts = {'maxiter':self.__iterations, 'disp':self.__minimize_disp}
if self.__minimize_method == "TNC":
opts['maxCGit'] = 0
opts['stepmx'] = 500
elif self.__minimize_method == 'L-BFGS-B':
opts['eps'] = 1e-8
else:
pass
Result = op.minimize(fun = self.costFunction,
x0 = initial_theta,
args = (self.__inputs, self.__outputs),
method = self.__minimize_method,
jac = True,
options= opts)
optimal_theta = Result.x;
self.__resultMsg = "Iterations = " + str(Result.nit)
self.__resultMsg += "
" + str(Result.message)
self.__theta_set = self.obtainTheta(optimal_theta);
def train(self, training_set_inputs, training_set_outputs,
number_of_labels, labels, theta_set, regular,
training_iterations, minimize_method, minimize_disp):
self.__inputs = training_set_inputs
self.__outputs = training_set_outputs
self.__num__labels = number_of_labels
self.__labels = labels
self.__theta_set = theta_set
self.__regular = regular
self.__iterations = training_iterations
self.__minimize_method = minimize_method
self.__minimize_disp = minimize_disp
if (self.__checkInput()):
self.trainingState = "Training"
print(".....Initialize theta with random weights between(-1, 1)")
for i in range(len(self.__theta_set)):
rd = 2 * np.random.random(self.__theta_set[i].shape) - 1
self.__theta_set[i] = rd
initial_theta = self.unrollTheta(self.__theta_set)
print(".....Minimize the cost function in a new thread.")
# This line is for test
J,g = self.costFunction(initial_theta, self.__inputs, self.__outputs)
print("Cost = %f" % J)
t = threading.Thread(target=self.minimize, args=(initial_theta,))
t.setDaemon(True)
t.start()
t.join()
self.trainingState = "Done";
else:
print(".....Change the parameters and try again.")
def predict(self, predict_input):
h = None
m = np.size(predict_input,0)
for t in self.__theta_set:
if (h is None):
h = self.sigmoid(predict_input.dot(t.T))
else:
h = np.column_stack((np.ones((m,1)),h))
h = self.sigmoid(h.dot(t.T))
label = np.zeros((m,1)) -1
for i in range(m):
label[i] = self.__labels[np.nanargmax(h[i,:])]
return label
def getAccuracy(self, inputs, outputs):
acc = 0.0
if (self.__checkInput()):
m = np.size(inputs,0)
h=None
for t in self.__theta_set:
if (h is None):
h = self.sigmoid(inputs.dot(t.T))
else:
h = np.column_stack((np.ones((m,1)),h))
h = self.sigmoid(h.dot(t.T))
check = (np.argmax(h,1) == np.argmax(outputs,1))
acc = np.sum(check)/np.size(check,0)
else:
print("Cannot to calculate the accuracy.")
return acc
def printInfo(self):
print(".....Inputs size:" + str(self.__inputs.shape))
print(".....Outputs size:" + str(self.__outputs.shape))
print(".....Labels:" + str(self.__labels))
print(".....Number of layers:" + str(np.size(self.__theta_set, 0)))
print(".....Theta Set:
" + str.join("
", [str(t.shape) for t in self.__theta_set]))
print(".....Regularization parameter:" + str(self.__regular))
print(".....Iterations:" + str(self.__iterations))
if (self.trainingState == "Done"):
print(".....Training Result:
" +
str(self.ResultMsg) +
"
Accuracy: " + str(self.getAccuracy(self.__inputs, self.__outputs))
)
else:
print(".....Training State:" + self.trainingState)
def __checkInput(self):
flag = False
if (self.__inputs.size < 4 or self.__inputs.ndim != 2):
# Error 1001
self.__reportError(1001)
elif (self.__outputs.ndim != 2 or np.size(self.__outputs, 1) != self.__num__labels or np.size(self.__outputs, 0) != np.size(self.__inputs, 0)):
# Error 1002
self.__reportError(1002)
elif (self.__num__labels != len(self.__labels) or len(self.__labels) < 3 or len(self.__labels) != len(list(set(self.__labels)))):
# Error 1003
self.__reportError(1003)
elif (len(self.__theta_set) == 0 or np.size(self.__theta_set[0], 1) != np.size(self.__inputs, 1) or bool(sum([
np.size(self.__theta_set[t], 0) != (np.size(self.__theta_set[t + 1], 1) - 1)
for t in range(len(self.__theta_set) - 1)
])) or np.size(self.__theta_set[-1], 0) != np.size(self.__outputs, 1)):
# Error 1004
self.__reportError(1004)
elif (self.__regular is None or self.__iterations < 3):
# Error 1005
self.__reportError(1005)
else:
flag = True
return flag
def __reportError(self, errorCode, msg = ""):
errors = {
1001: "Inputs should be a non-empty 2-D array.",
1002: "Outputs should be a 2-D array with len(Y) = label_size, \
and len(X)=len(Inputs), and all values are 0 or 1 \
(only one item=1).",
1003: "Values in the Labels array should be unique and \
Len(Labels)>2 ",
1004: "For the first theta size(thata(0),1)=size(inputs,1), \
size(thata(i),0) = size(thata(i+1),1)+1, \
for the last one size(theta(end),0)=Len(labels)",
1005: "Regularization should not be empty and \
training iterations should larger than 2."
}
try:
print(".....%d: %s
\t %s
" % (errorCode, errors[errorCode], msg))
except KeyError as e:
print(".....Error code is undefined.")
# Main
if __name__ == '__main__':
# Initialize training set inputs and outputs
matfn = "D:\\MachineLearning\\machine-learning-ex4\\ex4\\ex4data1.mat"
data = sio.loadmat(matfn)
inputs = data["X"]
y = data["y"]
m = np.size(inputs, 0)
inputs = np.column_stack((np.ones((m, 1)), inputs))
labels = list(set(y.flatten()))
labels_size = len(labels)
outputs = np.zeros((np.size(inputs, 0), labels_size))
for i in range(0, m):
outputs[i, labels.index(y[i, 0])] = 1
# Initialize theta set (2 layers)
# layer1_size = np.size(inputs, 1)
# layer2_size = 25
# theta2 = np.zeros((labels_size, layer2_size + 1))
# theta1 = np.zeros((layer2_size, layer1_size))
# theta_set = [theta1, theta2]
# Initialize theta set (3 layers)
layer1_size = np.size(inputs, 1)
layer2_size = 80
layer3_size = 25
theta3 = np.zeros((labels_size, layer3_size + 1))
theta2 = np.zeros((layer3_size, layer2_size + 1))
theta1 = np.zeros((layer2_size, layer1_size))
theta_set = [theta1, theta2, theta3]
# Set pramaters
iterations = 100
regular = 1
# minimize_method = 'TNC' # Fastest
minimize_method = 'L-BFGS-B' # Fastest
minimize_disp = False
# Train the data set
t_start = datetime.datetime.now()
nerualNetwork = NerualNetwork()
nerualNetwork.train(inputs, outputs, labels_size, labels, theta_set,
regular, iterations, minimize_method, minimize_disp)
# Print result
nerualNetwork.printInfo()
t_end = datetime.datetime.now()
print("Total seconds: %f" % (t_end -t_start).total_seconds())
# Test result for 3 layers
# Accuracy: 0.9826
# Total seconds: 6.638665