library(adabag)
## Warning: package 'adabag' was built under R version 3.6.1
## Loading required package: rpart
## Loading required package: caret
## Loading required package: lattice
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## Loading required package: foreach
## Loading required package: doParallel
## Warning: package 'doParallel' was built under R version 3.6.1
## Loading required package: iterators
## Loading required package: parallel
library(caret)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(randomForest)
## Warning: package 'randomForest' was built under R version 3.6.1
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
library(rpart)
dataset = read.csv(header = TRUE,"UniversalBank_MD.csv")
dataset = dplyr::select(dataset,c(3,4,10))
dataframe = dataset
set.seed(1)
train.rows = sample(row.names(dataframe),dim(dataframe)[1]*0.6)
dataframet = dataframe[train.rows,]
valid.rows = setdiff(row.names(dataframe),train.rows)
dataframev = dataframe[valid.rows,]
#Bagging
modelbg = bagging(Personal.Loan~.,data = dataframet)
predictionbg = predict(modelbg,dataframev,type = "class")
confusionMatrix(factor(predictionbg$class),dataframev$Personal.Loan)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Avail_A Avail_N
## Avail_A 48 25
## Avail_N 157 1770
##
## Accuracy : 0.909
## 95% CI : (0.8955, 0.9212)
## No Information Rate : 0.8975
## P-Value [Acc > NIR] : 0.04677
##
## Kappa : 0.3081
##
## Mcnemar's Test P-Value : < 2e-16
##
## Sensitivity : 0.2341
## Specificity : 0.9861
## Pos Pred Value : 0.6575
## Neg Pred Value : 0.9185
## Prevalence : 0.1025
## Detection Rate : 0.0240
## Detection Prevalence : 0.0365
## Balanced Accuracy : 0.6101
##
## 'Positive' Class : Avail_A
##
#Boosting
modelboost = boosting(Personal.Loan~.,data = dataframet)
predictionboost = predict(modelboost,dataframev,type = "class")
confusionMatrix(as.factor(predictionboost$class),dataframev$Personal.Loan)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Avail_A Avail_N
## Avail_A 70 69
## Avail_N 135 1726
##
## Accuracy : 0.898
## 95% CI : (0.8839, 0.9109)
## No Information Rate : 0.8975
## P-Value [Acc > NIR] : 0.4892
##
## Kappa : 0.3534
##
## Mcnemar's Test P-Value : 5.341e-06
##
## Sensitivity : 0.3415
## Specificity : 0.9616
## Pos Pred Value : 0.5036
## Neg Pred Value : 0.9275
## Prevalence : 0.1025
## Detection Rate : 0.0350
## Detection Prevalence : 0.0695
## Balanced Accuracy : 0.6515
##
## 'Positive' Class : Avail_A
##
#randomforest
modelrf = randomForest(Personal.Loan~.,data = dataframet)
predictionrf = predict(modelrf,dataframev,type = "class")
confusionMatrix(as.factor(predictionrf),dataframev$Personal.Loan)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Avail_A Avail_N
## Avail_A 65 75
## Avail_N 140 1720
##
## Accuracy : 0.8925
## 95% CI : (0.8781, 0.9057)
## No Information Rate : 0.8975
## P-Value [Acc > NIR] : 0.7817
##
## Kappa : 0.3203
##
## Mcnemar's Test P-Value : 1.273e-05
##
## Sensitivity : 0.3171
## Specificity : 0.9582
## Pos Pred Value : 0.4643
## Neg Pred Value : 0.9247
## Prevalence : 0.1025
## Detection Rate : 0.0325
## Detection Prevalence : 0.0700
## Balanced Accuracy : 0.6376
##
## 'Positive' Class : Avail_A
##
#rpart
modelrpart = rpart(Personal.Loan~.,data = dataframet)
predictionrpart = predict(modelrpart,dataframev,type = "class")
confusionMatrix(as.factor(predictionrpart),dataframev$Personal.Loan)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Avail_A Avail_N
## Avail_A 43 22
## Avail_N 162 1773
##
## Accuracy : 0.908
## 95% CI : (0.8945, 0.9203)
## No Information Rate : 0.8975
## P-Value [Acc > NIR] : 0.06367
##
## Kappa : 0.2831
##
## Mcnemar's Test P-Value : < 2e-16
##
## Sensitivity : 0.2098
## Specificity : 0.9877
## Pos Pred Value : 0.6615
## Neg Pred Value : 0.9163
## Prevalence : 0.1025
## Detection Rate : 0.0215
## Detection Prevalence : 0.0325
## Balanced Accuracy : 0.5987
##
## 'Positive' Class : Avail_A
##
#lda
modellda = MASS::lda(Personal.Loan~.,data = dataframet)
predictionlda = predict(modellda,dataframev)
confusionMatrix(predictionlda$class,dataframev$Personal.Loan)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Avail_A Avail_N
## Avail_A 97 91
## Avail_N 108 1704
##
## Accuracy : 0.9005
## 95% CI : (0.8865, 0.9133)
## No Information Rate : 0.8975
## P-Value [Acc > NIR] : 0.3456
##
## Kappa : 0.4386
##
## Mcnemar's Test P-Value : 0.2567
##
## Sensitivity : 0.4732
## Specificity : 0.9493
## Pos Pred Value : 0.5160
## Neg Pred Value : 0.9404
## Prevalence : 0.1025
## Detection Rate : 0.0485
## Detection Prevalence : 0.0940
## Balanced Accuracy : 0.7112
##
## 'Positive' Class : Avail_A
##
#logistic regression
modellr = glm(Personal.Loan~.,data = dataframet,family = "binomial")
predictionlr = predict(modellr,dataframev,type = "response")
confusionMatrix(as.factor(ifelse(predictionlr > 0.5,"Avail_N","Avail_A")),dataframev$Personal.Loan)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Avail_A Avail_N
## Avail_A 65 48
## Avail_N 140 1747
##
## Accuracy : 0.906
## 95% CI : (0.8924, 0.9184)
## No Information Rate : 0.8975
## P-Value [Acc > NIR] : 0.111
##
## Kappa : 0.3624
##
## Mcnemar's Test P-Value : 3.204e-11
##
## Sensitivity : 0.3171
## Specificity : 0.9733
## Pos Pred Value : 0.5752
## Neg Pred Value : 0.9258
## Prevalence : 0.1025
## Detection Rate : 0.0325
## Detection Prevalence : 0.0565
## Balanced Accuracy : 0.6452
##
## 'Positive' Class : Avail_A
##