主線1.2 FMアルゴリズムのPython実現
アルゴリズムの原理部分はブログの主線1.1 FMアルゴリズムの原理の詳しい解を見ることができて、本文は直接コードに上がって、関連する説明はすべてコードの注釈の中にあります.
# coding:UTF-8
from __future__ import division
from math import exp
from numpy import *
from random import normalvariate #
from datetime import datetime
trainData = 'E://data//diabetes_train.txt'
testData = 'E://data//diabetes_test.txt'
featureNum = 8
def loadDataSet(data): #
dataMat = []
labelMat = []
fr = open(data) #
for line in fr.readlines(): # readlines() , Python for... in ... 。 EOF 。
currLine = line.strip().split() # ,
# lineArr = [1.0]
lineArr = []
for i in range(featureNum):
lineArr.append(float(currLine[i + 1])) # lineArr
dataMat.append(lineArr) # # dataMat
labelMat.append(float(currLine[0]) * 2 - 1)
return dataMat, labelMat
def sigmoid(inx): # sigmoid
return 1.0 / (1 + exp(-inx))
def stocGradAscent(dataMatrix, classLabels, k, iter): #
# dataMatrix mat, classLabels mat
m, n = shape(dataMatrix)
alpha = 0.01
#
w = zeros((n, 1)) # n
w_0 = 0.
v = normalvariate(0, 0.2) * ones((n, k)) # normalvariate
for it in range(iter):
print
it
for x in range(m): # ,
inter_1 = dataMatrix[x] * v
inter_2 = multiply(dataMatrix[x], dataMatrix[x]) * multiply(v, v) # multiply
#
interaction = sum(multiply(inter_1, inter_1) - inter_2) / 2.
p = w_0 + dataMatrix[x] * w + interaction #
loss = sigmoid(classLabels[x] * p[0, 0]) - 1
print
loss
w_0 = w_0 - alpha * loss * classLabels[x]
for i in range(n):
if dataMatrix[x, i] != 0:
w[i, 0] = w[i, 0] - alpha * loss * classLabels[x] * dataMatrix[x, i]
for j in range(k):
v[i, j] = v[i, j] - alpha * loss * classLabels[x] * (
dataMatrix[x, i] * inter_1[0, j] - v[i, j] * dataMatrix[x, i] * dataMatrix[x, i])
return w_0, w, v
def getAccuracy(dataMatrix, classLabels, w_0, w, v):
m, n = shape(dataMatrix)
allItem = 0
error = 0
result = []
for x in range(m):
allItem += 1
inter_1 = dataMatrix[x] * v
inter_2 = multiply(dataMatrix[x], dataMatrix[x]) * multiply(v, v) # multiply
#
interaction = sum(multiply(inter_1, inter_1) - inter_2) / 2.
p = w_0 + dataMatrix[x] * w + interaction #
pre = sigmoid(p[0, 0])
result.append(pre)
if pre < 0.5 and classLabels[x] == 1.0:
error += 1
elif pre >= 0.5 and classLabels[x] == -1.0:
error += 1
else:
continue
print
result
return float(error) / allItem
if __name__ == '__main__':
dataTrain, labelTrain = loadDataSet(trainData)
dataTest, labelTest = loadDataSet(testData)
date_startTrain = datetime.now() #
print
" "
w_0, w, v = stocGradAscent(mat(dataTrain), labelTrain, 20, 200)
print
" :%f" % (1 - getAccuracy(mat(dataTrain), labelTrain, w_0, w, v))
date_endTrain = datetime.now()
print
" :%s" % (date_endTrain - date_startTrain)
print
" "
print
" :%f" % (1 - getAccuracy(mat(dataTest), labelTest, w_0, w, v))