python SVM(サポートベクトルマシン)の実装


from sklearn import svm
import numpy as np
from sklearn.metrics import accuracy_score
def loadSplitDataSet1(txtname,rate,k):
    file = open(txtname)
    lines1 = file.readlines()
    file.close
    #print(lines1)
    lines2=[]
    lines1.pop(0)
    for line in lines1:
        lineTemp=line.replace('
'
,'').split('\t') lines2.append(lineTemp) step=int(1/(1-rate)) testSet=lines2[::step] del lines2[::step] trainSet=lines2 trainData=[] testData=[] trainLabel=[] testLabel=[] for x in trainSet: trainDataTemp=[] if(x[-k]=='yes'): trainLabel.append(1) else: trainLabel.append(0) for y in x[0:-2]: if(y=='yes'): trainDataTemp.append(1) elif(y=='no'): trainDataTemp.append(0) else: trainDataTemp.append(float(y)) trainData.append(trainDataTemp) for x in testSet: testDataTemp=[] if(x[-k]=='yes'): testLabel.append(1) else: testLabel.append(0) for y in x[0:-2]: if(y=='yes'): testDataTemp.append(1) elif(y=='no'): testDataTemp.append(0) else: testDataTemp.append(float(y)) testData.append(testDataTemp) #print(len(trainData)) #print(len(testData)) #print(len(trainLabel)) #print(len(testLabel)) trainData=np.array(trainData) testData=np.array(testData) trainLabel=np.array(trainLabel) testLabel=np.array(testLabel) return trainData,testData,trainLabel,testLabel def loadSplitDataSet2(txtname,rate,k): file = open(txtname) lines1 = file.readlines() file.close #print(lines1) lines2=[] for line in lines1: lineTemp=line.replace('
'
,'').split(',') if(int(lineTemp[0])!=k): lines2.append(lineTemp) step=int(1/(1-rate)) testSet=lines2[::step] del lines2[::step] trainSet=lines2 trainData=[] testData=[] trainLabel=[] testLabel=[] for x in trainSet: trainDataTemp=[] trainLabel.append(int(x[0])) for y in x[1:]: trainDataTemp.append(float(y)) trainData.append(trainDataTemp) for x in testSet: testDataTemp=[] testLabel.append(int(x[0])) for y in x[1:]: testDataTemp.append(float(y)) testData.append(testDataTemp) #print(len(trainData)) #print(len(testData)) #print(len(trainLabel)) #print(len(testLabel)) return trainData,testData,trainLabel,testLabel if __name__ == "__main__": print('kernel=rbf') model = svm.SVC(kernel='rbf',gamma='auto') #kernel : , ‘linear’,‘poly’, ‘rbf’, trainData,testData,trainLabel,testLabel=loadSplitDataSet1('./AcuteInflammations/diagnosis.data',0.8,1) #trainData,testData,trainLabel,testLabel=loadSplitDataSet2('./wine.data',0.8,1) model.fit(trainData, trainLabel) model.score(trainData, trainLabel) predicted= model.predict(testData) print('--------------------------------------------------------------------') print('Nephritis of renal pelvis origin :') print('testLabel:',testLabel) print('predicteLabel:',predicted) acc=accuracy_score(testLabel, predicted) print('accuracy_score:',acc) print('--------------------------------------------------------------------') trainData,testData,trainLabel,testLabel=loadSplitDataSet1('./AcuteInflammations/diagnosis.data',0.8,2) #trainData,testData,trainLabel,testLabel=loadSplitDataSet2('./wine.data',0.8,2) model.fit(trainData, trainLabel) model.score(trainData, trainLabel) predicted= model.predict(testData) print('Inflammation of urinary bladder:') print('testLabel:',testLabel) print('predicteLabel:',predicted) acc=accuracy_score(testLabel, predicted) print('accuracy_score:',acc) print('--------------------------------------------------------------------')