sklearnのtrain_test_split(データ分割)、preprocessing(正規化)、cross_val_score(クロス検証)

2247 ワード

1、train_test_split(データ分割):
from sklearn.model_selection import train_test_split

X=np.random.randint(0,100,(10,4))
y=np.random.randint(0,3,10)
y.sort()

print(' :')
print(X)
print(' :')
print(y)

#  、 
# random_state 
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=1/3,random_state=7)

print(' :')
print(X_train)
print(y_train)
print(' :')
print(X_test)
print(y_test)

2、preprocessing(正規化)
from sklearn import preprocessing

x1=np.random.randint(1,100,5).reshape(5,1)
x2=np.random.randint(1,10,5).reshape(5,1)
x3=np.random.randint(1,100000,5).reshape(5,1)

X=np.concatenate([x1,x2,x3],axis=1)

print(X)

print(preprocessing.scale(X))

#  scale 
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
%matplotlib inline

X, y = make_classification(n_samples=300, n_features=2, n_redundant=0, n_informative=2, random_state=25, n_clusters_per_class=1, scale=100)

plt.scatter(X[:,0], X[:,1], c=y)
plt.show()

# 
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=1/3)
svm_classifier=svm.SVC()
svm_classifier.fit(X_train,y_train)
svm_classifier.score(X_test,y_test)

X=preprocessing.scale(X)  #X 

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=1/3)
svm_classifier=svm.SVC()
svm_classifier.fit(X_train,y_train)
svm_classifier.score(X_test,y_test)

3、cross_val_score(クロス検証)
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier

iris=datasets.load_iris()
X=iris.data
y=iris.target

X_train, X_test, y_train, y_test=train_test_split(X,y,train_size=1/3,random_state=7)

k_range=range(1,31)
cv_scores=[]
for n in k_range:
    knn=KNeighborsClassifier(n)
    scores=cross_val_score(knn,X_train,y_train,cv=10,scoring='accuracy') #  
    #scores = cross_val_score(knn, X_train, y_train, cv=10, scoring='neg_mean_squared_error') #  
    cv_scores.append(scores.mean())
    
plt.plot(k_range,cv_scores)
plt.xlabel('K')
plt.ylabel('Accuracy')
plt.show()

# K
best_knn=KNeighborsClassifier(n_neighbors=8)
best_knn.fit(X_train,y_train)
print(best_knn.score(X_test,y_test))
print(best_knn.predict(X_test))