#####10 #####
# 。 Bagging
#AdaBoost, ,
# 。
#####10.2.2 #####
#1. bagging
#bagging(formula,data,mfinal=100,control)
#formula , y~x1+x2+x3;data ;mfinal
# , , , 100;
#control rpart() , 。
#2. boosting
# boosting() Adaboost ,
#boosting(formula,data,boos=TRUE,mfinal=100,coeflearn='Breiman',control)
# formula、data、mfinal control bagging() , ;boos
# , boostrap ,
# TRUE, FALSE, ;coeflearn
# alpha , Breiman, alpha=1/2ln((1-err)/err),
# "Freund" "Zhu"。
#####10.2.3 #####
# UCI Machine Learning Repository Bank Marketing ,
# ,
# 16 , 1 ——
# 。http://archive.ics.uci.edu/ml/datasets/Bank+Marketing
setwd("E://books/ R /bank") #
data=read.csv("bank.csv", header=TRUE, sep=";") # bank.csv
dim(data)
head(data)
summary(data)
#
# age 17 87
# admin. ;unknown ;unemployed ;management ;housemaid ;entrepreneur ;
# job student ;blue-collar ;self-emploted ;retried ;technician ;
# services 。
# marital married ;divorced ;single ;
# education unknown ,secondary ;primary ;tertiary
# default yes ;no ;
# balance ( ) -3313 71188
# housing yes ;no
# loan yes ;no
# contact unknown ;telephone ;cellular
# day 1 31
# month jan ;feb ...dec
# duration 4 3025( )
# compaign 1 50
# pdays -1 ;1 871
#previous 0 25
# y yes ;no
# 1/4
sub=sample(1:nrow(data), round(nrow(data)/4)) # data
length(sub) # sub
data_train=data[-sub,] # sub
data_test=data[sub,] # sub
dim(data_train);dim(data_test)
#####10.3 #####
# bagging boosting
#####10.3.1 Bagging #####
library(adabag)
library(rpart)
#1. data_train Bagging
bag = bagging(y~.,data_train,mfinal=5) # bagging() , 5
names(bag) # bag
bag$formula # bag
bag$trees[2] # bag
bag$votes[105:115,] # bag 105 115
bag$prob[105:115,] # bag 105 115
bag$class[105:115] # bag 105 115
# 5 boostrap
bag$samples[105:115,] # bag 105 115 5
# , 10
bag$importance # bag
# control maxdepth
bag1=bagging(y~.,data_train,mfinal=5,control=rpart.control(maxdepth=3))# control
bag1$trees[2]
#2. data_test
pre_bag=predict(bag,data_test) # bag , pre_bag
names(pre_bag) #
pre_bag$votes[1:10,] # pre_bag 10
pre_bag$prob[1:10,] # bag 10
pre_bag$class[1:10] #
# confusion error 。
pre_bag$confusion #
pre_bag$error #
# "yes" "no" ,
#
sub_minor=which(data_test$y=="yes") # "yes"
sub_major=which(data_test$y=="no") # "no"
length(sub_minor);length(sub_major) #
# ,
err_bag=sum(pre_bag$class!=data_test$y)/nrow(data_test)#
err_minor_bag=sum(pre_bag$class[sub_minor]!=data_test$y[sub_minor])/length(sub_minor)
# "yes" err_minor_bag
err_major_bag=sum(pre_bag$class[sub_major]!=data_test$y[sub_major])/length(sub_major)
# "no" err_minor_bag
err_bag;err_minor_bag;err_major_bag
# 0.637, 0.0382。
#####10.3.2 Adaboost #####
boo=boosting(y~.,data_train,mfinal=5) # Adaboost
pre_boo=predict(boo,data_test)
err_boo=sum(pre_boo$class!=data_test$y)/nrow(data_test)#
err_minor_boo=sum(pre_boo$class[sub_minor]!=data_test$y[sub_minor])/length(sub_minor)
# "yes" err_minor_bag
err_major_boo=sum(pre_boo$class[sub_major]!=data_test$y[sub_major])/length(sub_major)
# "no" err_minor_bag
err_boo;err_minor_boo;err_major_boo
# ,