統計学習方法第四章極めて類似している見積もりの質素な貝葉斯分類方法例題4.1コード実践
8948 ワード
統計学習方法第四章極めて類似している推計の質素なベキス分類方法の例題4.1コードの実践(ベキストの推計を確認する必要があるので、私の別の文章を見ることができます.http://blog.csdn.net/grinandbearit/article/details/79045143)
コードは以下の通りです
コードは以下の通りです
#-*- coding:utf-8 -*-
from numpy import *
# , array ! 0.0
def loadDataSet():
dataSet=[[1,1,1,1,1,2,2,2,2,2,3,3,3,3,3],['S','M','M','S','S','S','M','M','L','L','L','M','M','L','L']]
labels=[-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1]
return array(dataSet).transpose().tolist(),labels
# labels , , n ( )
def calc_label(labels):
m=len(labels)
uniqueLabel=set(labels) #
labelRate={}
for label in uniqueLabel:
labelRate[label]=labels.count(label)/float(m)
return labelRate,list(uniqueLabel) # uniqueLabel set , list
# ,
def calcVocaulary(dataset):
voca=set()
for content in dataset:
voca = voca | set(content)
return list(voca)
# , 1
def calcVector(voca,vector):
n=len(voca)
originVector=zeros(n)
for word in vector:
if word in voca:
originVector[voca.index(word)] += 1
return array(originVector) # array
# , key , n
def Bayes(dataset,labels,uniqueLabel,voca):
n=len(uniqueLabel);m=len(dataset)
trainVecDict={}
for i in range(n):
labelVector=array(zeros(len(voca)))
for j in range(m):
if labels[j]== uniqueLabel[i]:
labelVector += calcVector(voca,dataset[j]) #
labelVector /= float(labels.count(uniqueLabel[i])) #
trainVecDict[uniqueLabel[i]]=labelVector #
return trainVecDict
# ,
def testFunction(testArray,voca,trainVecDict,labelRate):
result = -1;maxRate = -inf
for key in trainVecDict:
singleLabelRate=1.0
for word in testArray:
singleLabelRate *= trainVecDict[key][voca.index(word)] #
if singleLabelRate*labelRate[key] > maxRate:
result = key;maxRate =singleLabelRate*labelRate[key]
return result
dataSet,labels=loadDataSet()
labelRate,uniqueLabel=calc_label(labels)
voca=calcVocaulary(dataSet)
print voca
trainVecDict=Bayes(dataSet,labels,uniqueLabel,voca)
testArray=array([2,'S'])
print labelRate
print trainVecDict
print testFunction(testArray,voca,trainVecDict,labelRate)
:
['1', 'S', '2', 'M', '3', 'L']
{1: 0.6, -1: 0.4}
{1: array([ 0.22222222, 0.11111111, 0.33333333, 0.44444444, 0.44444444,
0.44444444]), -1: array([ 0.5 , 0.5 , 0.33333333, 0.33333333, 0.16666667,
0.16666667])}
-1
made by zcl at CUMT
I know I can because I have a heart that beats