Logistic Regression


1.データナビゲーション


Rは基本データ「Iris」データを用い,Logistic Regression分類器を用いる前にデータの基本構造を探索する.
ターゲット変数はSpeciesに設定され、setosaとnonetosaに分けられます.
iris$Species <- as.character(iris$Species)
iris$Species[iris$Species !="setosa"] <- "non setosa"
iris$Species <- as.factor(iris$Species)

2.Logistic Regressionモデルに適合し、エラー率

set.seed(150) 
train_sample = sample(150, 100)
str(train_sample)

iris_train = iris[train_sample, ]
iris_test  = iris[-train_sample, ]

prop.table(table(iris_train$Species))
prop.table(table(iris_test$Species))


# Logistic Regression 모델적합

iris_model_logistic = glm(Species~., family=binomial, data=iris_train)
summary(iris_model_logistic)
iris_model_logistic_step = step(iris_model_logistic)
summary(iris_model_logistic_step)


iris_logistic_pred = predict(iris_model_logistic, iris_test,type="response")

iris_logistic_tmp = predict(iris_model_logistic, iris_test)
exp(iris_logistic_tmp)/(1+exp(iris_logistic_tmp))

iris_logistic_pred = ifelse(iris_logistic_pred>0.5,"yes","no")

#Test error 계산
CrossTable(iris_test$Species, iris_logistic_pred,
           prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
           dnn = c('actual Species', 'predicted Species'))

# accuracy : 1.0, error rate = 0

# Logistic Regression ROC, AUC

Logistic Regressionを実行した結果、精度=1.0、エラー率=0となりました.

3. ROC, AUC

library(ROCR)

iris_logistic_pred = predict(iris_model_logistic, iris_test,type="response")
pred = prediction(iris_logistic_pred, iris_test$Species)
irislg_roc = performance(pred, measure = 'tpr', x.measure = 'fpr')
plot(irislg_roc, col='red', lty = 1, lwd = 3, main = 'ROC curve')
iris_auc <- performance(pred, measure = "auc")
unlist([email protected])

Logistic Regressionモデルの誤り率は0であり,ROC曲線は上記直角形,AUC=1である.