交差検証を使ってScikit-learn Iris datasetを処理するSVMとロジスティック回帰の比較をする
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
iris = datasets.load_iris()
X_w = iris.data[:, :2]
y_w = iris.target
X = iris.data[:, :2]
y = iris.target
プロットする
X_0 = X[y == 0]
X_1 = X[y == 1]
%matplotlib inline
plt.figure(figsize=(10,7))
plt.scatter(X_0[:,0],X_0[:,1], color = 'red')
plt.scatter(X_1[:,0],X_1[:,1], color = 'blue')
交差検証をする場合、データを2回splitする(分ける)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=7,stratify=y)
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(X_train, y_train, test_size=0.25, random_state=7)
図にするとこんな感じになる
つまり、2回の分割によってデータチャンクは5つになっている。
↓ SVMとロジスティック回帰をする。
from sklearn.svm import SVC
svm_inst = SVC(kernel='linear', random_state=7)
svm_inst.fit(X_train_2,y_train_2)
from sklearn.linear_model import LogisticRegression
lr_clf = LogisticRegression(random_state = 7).fit(X_train_2, y_train_2)
accuracy_scoreを計算する
from sklearn.metrics import accuracy_score
svc_pred = svm_inst.predict(X_test_2)
lr_pred = lr_clf.predict(X_test_2)
print("Accuracy of SVC:",accuracy_score(y_test_2,svc_pred))
print("Accuracy of LR:",accuracy_score(y_test_2,lr_pred))
これをさらに、交差検証を使って再度算出する。
まずは、
from itertools import product
#Minima and maxima of both features
xmin, xmax = np.percentile(X[:, 0], [0, 100])
ymin, ymax = np.percentile(X[:, 1], [0, 100])
#Grid/Cartesian product with itertools.product
test_points = np.array([[xx, yy] for xx, yy in product(np.linspace(xmin, xmax), np.linspace(ymin, ymax))])
#Predictions on the grid
test_preds = lr_clf.predict(test_points)
X_0 = X[y == 0]
X_1 = X[y == 1]
%matplotlib inline
fig = plt.figure(figsize=(24,7))
ax1 = fig.add_subplot(1, 2, 1)
#colors = np.array(['aqua', 'aqua', 'rose'])
colors = np.array(['tomato', 'aqua', 'aqua'])
#colors = np.array(['r', 'b'])
ax1.scatter(test_points[:, 0], test_points[:, 1], color=colors[test_preds], alpha=0.25)
#ax1.scatter(X[:, 0], X[:, 1], color=colors[y])
ax1.scatter(X_0[:,0],X_0[:,1], color = 'red')
ax1.scatter(X_1[:,0],X_1[:,1], color = 'blue')
plt.title("SVM")
#Predictions on the grid
test_preds = svm_inst.predict(test_points)
X_0 = X[y == 0]
X_1 = X[y == 1]
fig = plt.figure(figsize=(24,7))
ax2 = fig.add_subplot(1, 2, 1)
ax2.scatter(test_points[:, 0], test_points[:, 1], color=colors[test_preds], alpha=0.25)
#ax2.scatter(X[:, 0], X[:, 1], color=colors[y])
ax2.scatter(X_0[:,0],X_0[:,1], color = 'red')
ax2.scatter(X_1[:,0],X_1[:,1], color = 'blue')
plt.title("Linear Regression")
交差検証をする。
from sklearn.model_selection import cross_val_score
svc_scores = cross_val_score(svm_inst, X_train_2, y_train_2, cv=4)
svc_scores
print("Average SVC scores: ", svc_scores.mean())
print("Standard Deviation of SVC scores: ", svc_scores.std())
Average SVC scores: 0.7857142857142857
Standard Deviation of SVC scores: 0.07142857142857144
lr_scores = cross_val_score(lr_clf, X_train_2, y_train_2, cv=10)
lr_scores
print("Average LR scores: ", lr_scores.mean())
print("Standard Deviation of LR scores: ", lr_scores.std())
Average LR scores: 0.7944444444444445
Standard Deviation of LR scores: 0.1254313544986182
Author And Source
この問題について(交差検証を使ってScikit-learn Iris datasetを処理するSVMとロジスティック回帰の比較をする), 我々は、より多くの情報をここで見つけました https://qiita.com/Ruo_Ando/items/1ea6de326eee69690f7b著者帰属:元の著者の情報は、元のURLに含まれています。著作権は原作者に属する。
Content is automatically searched and collected through network algorithms . If there is a violation . Please contact us . We will adjust (correct author information ,or delete content ) as soon as possible .