Ensemble

library(adabag)

## Warning: package 'adabag' was built under R version 3.6.1

## Loading required package: rpart

## Loading required package: caret

## Loading required package: lattice

## Loading required package: ggplot2

## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang

## Loading required package: foreach

## Loading required package: doParallel

## Warning: package 'doParallel' was built under R version 3.6.1

## Loading required package: iterators

## Loading required package: parallel

library(caret)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(randomForest)

## Warning: package 'randomForest' was built under R version 3.6.1

## randomForest 4.6-14

## Type rfNews() to see new features/changes/bug fixes.

## 
## Attaching package: 'randomForest'

## The following object is masked from 'package:dplyr':
## 
##     combine

## The following object is masked from 'package:ggplot2':
## 
##     margin

library(rpart)

dataset = read.csv(header = TRUE,"UniversalBank_MD.csv")
dataset = dplyr::select(dataset,c(3,4,10))
dataframe = dataset
set.seed(1)
train.rows = sample(row.names(dataframe),dim(dataframe)[1]*0.6)
dataframet = dataframe[train.rows,]
valid.rows = setdiff(row.names(dataframe),train.rows)
dataframev = dataframe[valid.rows,]  

#Bagging
modelbg = bagging(Personal.Loan~.,data = dataframet)
predictionbg = predict(modelbg,dataframev,type = "class")
confusionMatrix(factor(predictionbg$class),dataframev$Personal.Loan)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Avail_A Avail_N
##    Avail_A      48      25
##    Avail_N     157    1770
##                                           
##                Accuracy : 0.909           
##                  95% CI : (0.8955, 0.9212)
##     No Information Rate : 0.8975          
##     P-Value [Acc > NIR] : 0.04677         
##                                           
##                   Kappa : 0.3081          
##                                           
##  Mcnemar's Test P-Value : < 2e-16         
##                                           
##             Sensitivity : 0.2341          
##             Specificity : 0.9861          
##          Pos Pred Value : 0.6575          
##          Neg Pred Value : 0.9185          
##              Prevalence : 0.1025          
##          Detection Rate : 0.0240          
##    Detection Prevalence : 0.0365          
##       Balanced Accuracy : 0.6101          
##                                           
##        'Positive' Class : Avail_A         
##

#Boosting
modelboost =  boosting(Personal.Loan~.,data = dataframet)
predictionboost = predict(modelboost,dataframev,type = "class")
confusionMatrix(as.factor(predictionboost$class),dataframev$Personal.Loan)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Avail_A Avail_N
##    Avail_A      70      69
##    Avail_N     135    1726
##                                           
##                Accuracy : 0.898           
##                  95% CI : (0.8839, 0.9109)
##     No Information Rate : 0.8975          
##     P-Value [Acc > NIR] : 0.4892          
##                                           
##                   Kappa : 0.3534          
##                                           
##  Mcnemar's Test P-Value : 5.341e-06       
##                                           
##             Sensitivity : 0.3415          
##             Specificity : 0.9616          
##          Pos Pred Value : 0.5036          
##          Neg Pred Value : 0.9275          
##              Prevalence : 0.1025          
##          Detection Rate : 0.0350          
##    Detection Prevalence : 0.0695          
##       Balanced Accuracy : 0.6515          
##                                           
##        'Positive' Class : Avail_A         
##

#randomforest
modelrf =  randomForest(Personal.Loan~.,data = dataframet)
predictionrf = predict(modelrf,dataframev,type = "class")
confusionMatrix(as.factor(predictionrf),dataframev$Personal.Loan)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Avail_A Avail_N
##    Avail_A      65      75
##    Avail_N     140    1720
##                                           
##                Accuracy : 0.8925          
##                  95% CI : (0.8781, 0.9057)
##     No Information Rate : 0.8975          
##     P-Value [Acc > NIR] : 0.7817          
##                                           
##                   Kappa : 0.3203          
##                                           
##  Mcnemar's Test P-Value : 1.273e-05       
##                                           
##             Sensitivity : 0.3171          
##             Specificity : 0.9582          
##          Pos Pred Value : 0.4643          
##          Neg Pred Value : 0.9247          
##              Prevalence : 0.1025          
##          Detection Rate : 0.0325          
##    Detection Prevalence : 0.0700          
##       Balanced Accuracy : 0.6376          
##                                           
##        'Positive' Class : Avail_A         
##

#rpart
modelrpart = rpart(Personal.Loan~.,data = dataframet)
predictionrpart = predict(modelrpart,dataframev,type = "class")
confusionMatrix(as.factor(predictionrpart),dataframev$Personal.Loan)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Avail_A Avail_N
##    Avail_A      43      22
##    Avail_N     162    1773
##                                           
##                Accuracy : 0.908           
##                  95% CI : (0.8945, 0.9203)
##     No Information Rate : 0.8975          
##     P-Value [Acc > NIR] : 0.06367         
##                                           
##                   Kappa : 0.2831          
##                                           
##  Mcnemar's Test P-Value : < 2e-16         
##                                           
##             Sensitivity : 0.2098          
##             Specificity : 0.9877          
##          Pos Pred Value : 0.6615          
##          Neg Pred Value : 0.9163          
##              Prevalence : 0.1025          
##          Detection Rate : 0.0215          
##    Detection Prevalence : 0.0325          
##       Balanced Accuracy : 0.5987          
##                                           
##        'Positive' Class : Avail_A         
##

#lda
modellda = MASS::lda(Personal.Loan~.,data = dataframet)
predictionlda = predict(modellda,dataframev)
confusionMatrix(predictionlda$class,dataframev$Personal.Loan)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Avail_A Avail_N
##    Avail_A      97      91
##    Avail_N     108    1704
##                                           
##                Accuracy : 0.9005          
##                  95% CI : (0.8865, 0.9133)
##     No Information Rate : 0.8975          
##     P-Value [Acc > NIR] : 0.3456          
##                                           
##                   Kappa : 0.4386          
##                                           
##  Mcnemar's Test P-Value : 0.2567          
##                                           
##             Sensitivity : 0.4732          
##             Specificity : 0.9493          
##          Pos Pred Value : 0.5160          
##          Neg Pred Value : 0.9404          
##              Prevalence : 0.1025          
##          Detection Rate : 0.0485          
##    Detection Prevalence : 0.0940          
##       Balanced Accuracy : 0.7112          
##                                           
##        'Positive' Class : Avail_A         
##

#logistic regression
modellr = glm(Personal.Loan~.,data = dataframet,family = "binomial")
predictionlr = predict(modellr,dataframev,type = "response")
confusionMatrix(as.factor(ifelse(predictionlr > 0.5,"Avail_N","Avail_A")),dataframev$Personal.Loan)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Avail_A Avail_N
##    Avail_A      65      48
##    Avail_N     140    1747
##                                           
##                Accuracy : 0.906           
##                  95% CI : (0.8924, 0.9184)
##     No Information Rate : 0.8975          
##     P-Value [Acc > NIR] : 0.111           
##                                           
##                   Kappa : 0.3624          
##                                           
##  Mcnemar's Test P-Value : 3.204e-11       
##                                           
##             Sensitivity : 0.3171          
##             Specificity : 0.9733          
##          Pos Pred Value : 0.5752          
##          Neg Pred Value : 0.9258          
##              Prevalence : 0.1025          
##          Detection Rate : 0.0325          
##    Detection Prevalence : 0.0565          
##       Balanced Accuracy : 0.6452          
##                                           
##        'Positive' Class : Avail_A         
##

Ensemble

Kartikeya Bolar

17/08/2019