sklearnのtrain_test_split(データ分割)、preprocessing(正規化)、cross_val_score(クロス検証)
2247 ワード
1、train_test_split(データ分割):
2、preprocessing(正規化)
3、cross_val_score(クロス検証)
from sklearn.model_selection import train_test_split
X=np.random.randint(0,100,(10,4))
y=np.random.randint(0,3,10)
y.sort()
print(' :')
print(X)
print(' :')
print(y)
# 、
# random_state
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=1/3,random_state=7)
print(' :')
print(X_train)
print(y_train)
print(' :')
print(X_test)
print(y_test)
2、preprocessing(正規化)
from sklearn import preprocessing
x1=np.random.randint(1,100,5).reshape(5,1)
x2=np.random.randint(1,10,5).reshape(5,1)
x3=np.random.randint(1,100000,5).reshape(5,1)
X=np.concatenate([x1,x2,x3],axis=1)
print(X)
print(preprocessing.scale(X))
# scale
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
%matplotlib inline
X, y = make_classification(n_samples=300, n_features=2, n_redundant=0, n_informative=2, random_state=25, n_clusters_per_class=1, scale=100)
plt.scatter(X[:,0], X[:,1], c=y)
plt.show()
#
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=1/3)
svm_classifier=svm.SVC()
svm_classifier.fit(X_train,y_train)
svm_classifier.score(X_test,y_test)
X=preprocessing.scale(X) #X
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=1/3)
svm_classifier=svm.SVC()
svm_classifier.fit(X_train,y_train)
svm_classifier.score(X_test,y_test)
3、cross_val_score(クロス検証)
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
iris=datasets.load_iris()
X=iris.data
y=iris.target
X_train, X_test, y_train, y_test=train_test_split(X,y,train_size=1/3,random_state=7)
k_range=range(1,31)
cv_scores=[]
for n in k_range:
knn=KNeighborsClassifier(n)
scores=cross_val_score(knn,X_train,y_train,cv=10,scoring='accuracy') #
#scores = cross_val_score(knn, X_train, y_train, cv=10, scoring='neg_mean_squared_error') #
cv_scores.append(scores.mean())
plt.plot(k_range,cv_scores)
plt.xlabel('K')
plt.ylabel('Accuracy')
plt.show()
# K
best_knn=KNeighborsClassifier(n_neighbors=8)
best_knn.fit(X_train,y_train)
print(best_knn.score(X_test,y_test))
print(best_knn.predict(X_test))