10.統合学習


#####10     #####
#                      。          Bagging 
#AdaBoost,                            ,    
#          。

#####10.2.2     #####
#1. bagging  
#bagging(formula,data,mfinal=100,control)
#formula         ,   y~x1+x2+x3;data        ;mfinal  
#       ,        ,        ,    100;   
#control   rpart()      ,           。

#2. boosting  
#  boosting()  Adaboost           ,      
#boosting(formula,data,boos=TRUE,mfinal=100,coeflearn='Breiman',control)
#  formula、data、mfinal control   bagging()     ,    ;boos  
#             ,                 boostrap  ,  
#   TRUE,   FALSE,                     ;coeflearn 
#         alpha     ,   Breiman, alpha=1/2ln((1-err)/err),  
#       "Freund" "Zhu"。

#####10.2.3    #####
#    UCI Machine Learning Repository     Bank Marketing   ,     
#                          ,             
#               16    ,  1    ——            
#   。http://archive.ics.uci.edu/ml/datasets/Bank+Marketing

setwd("E://books/     R    /bank")      #    
data=read.csv("bank.csv", header=TRUE, sep=";") #  bank.csv    
dim(data)
head(data)
summary(data)

#        
# age                          17 87
#                                  admin.  ;unknown  ;unemployed  ;management  ;housemaid    ;entrepreneur   ;
# job                        student  ;blue-collar     ;self-emploted  ;retried  ;technician    ;
#                                  services   。
# marital                    married  ;divorced     ;single  ;
# education                  unknown  ,secondary  ;primary  ;tertiary  
# default                 yes ;no ;
# balance     (  )         -3313 71188
# housing                   yes ;no 
# loan                    yes ;no 
# contact                    unknown  ;telephone    ;cellular    
# day                    1 31
# month                  jan  ;feb  ...dec   
# duration                4 3025( )
# compaign                   1 50
# pdays                  -1    ;1 871
#previous                 0 25
# y                       yes ;no 

#     1/4        
sub=sample(1:nrow(data), round(nrow(data)/4)) #    data         
length(sub)                                   #  sub          
data_train=data[-sub,]                        #     sub          
data_test=data[sub,]                          #    sub          
dim(data_train);dim(data_test)

#####10.3     #####
#        bagging boosting          

#####10.3.1 Bagging  #####
library(adabag)
library(rpart)

#1.     data_train  Bagging  
bag = bagging(y~.,data_train,mfinal=5)  #  bagging()    ,       5    
names(bag)                            #    bag         
bag$formula                           #  bag        
bag$trees[2]                          #  bag          
bag$votes[105:115,]                   #  bag  105  115        
bag$prob[105:115,]                    #  bag  105  115             
bag$class[105:115]                    #  bag  105  115       
#    5          boostrap  
bag$samples[105:115,]                 #  bag  105  115    5           
#                     ,      10           
bag$importance                        #  bag            

# control      maxdepth          
bag1=bagging(y~.,data_train,mfinal=5,control=rpart.control(maxdepth=3))#  control            
bag1$trees[2]

#2.     data_test         
pre_bag=predict(bag,data_test) #  bag                  ,  pre_bag
names(pre_bag)                 #            
pre_bag$votes[1:10,]           #    pre_bag  10        
pre_bag$prob[1:10,]            #  bag  10             
pre_bag$class[1:10]            #           
#    confusion     error            。     
pre_bag$confusion              #            
pre_bag$error                  #        

#           "yes"    "no"        ,               
#         
sub_minor=which(data_test$y=="yes") #    "yes"        
sub_major=which(data_test$y=="no")  #    "no"        
length(sub_minor);length(sub_major) #            
#                   ,          
err_bag=sum(pre_bag$class!=data_test$y)/nrow(data_test)#       
err_minor_bag=sum(pre_bag$class[sub_minor]!=data_test$y[sub_minor])/length(sub_minor)
                                    #     "yes"    err_minor_bag
err_major_bag=sum(pre_bag$class[sub_major]!=data_test$y[sub_major])/length(sub_major)
                                    #     "no"    err_minor_bag                                   
err_bag;err_minor_bag;err_major_bag
#             0.637,         0.0382。               

#####10.3.2 Adaboost  #####
boo=boosting(y~.,data_train,mfinal=5)   #  Adaboost  
pre_boo=predict(boo,data_test)
err_boo=sum(pre_boo$class!=data_test$y)/nrow(data_test)#       
err_minor_boo=sum(pre_boo$class[sub_minor]!=data_test$y[sub_minor])/length(sub_minor)
#     "yes"    err_minor_bag
err_major_boo=sum(pre_boo$class[sub_major]!=data_test$y[sub_major])/length(sub_major)
#     "no"    err_minor_bag 
err_boo;err_minor_boo;err_major_boo
#                 ,