クロス検証でKNNモデルのパラメータを調整する
1682 ワード
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
#
def inspect_data(file_root):
dataframe=pd.read_csv(file_root)
print(" :")
print(dataframe.info())
print(" %i ,%i "%(dataframe.shape[0],dataframe.shape[1]))
print(" :")
print(dataframe.head())
return dataframe
#
def processing_missing_data(dataframe):
if dataframe.isnull().values.any():
dataframe=dataframe.dropna()
#dataframe=dataframe.fillna(0)
return dataframe
#
dataframe=pd.read_csv("H:/pythonfigure/voice.csv")
#
dataframe=processing_missing_data(dataframe)
#
dataframe.replace("male",1,inplace=True)
dataframe.replace("female",0,inplace=True)
#
x=dataframe.ix[:,:-1]
y=dataframe.ix[:,-1]
#
from sklearn import preprocessing
x=preprocessing.scale(x)
#
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=1/3.,random_state=5)
#
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
k_range=range(1,31)
cv_score=[]
for k in k_range:
knn=KNeighborsClassifier(k)
scores=cross_val_score(knn,x_train,y_train,cv=10,scoring="accuracy")
score_mean=scores.mean()
cv_score.append(score_mean)
print(k,score_mean)
best_k=np.argmax(cv_score)+1
print(" k %i"%(best_k))
plt.plot(k_range,cv_score)
plt.xlabel("k")
plt.ylabel("score")
plt.show()
#
knn_model=KNeighborsClassifier(best_k)
knn_model.fit(x_train,y_train)
print(" :",knn_model.score(x_test,y_test))