library(caret)
# data partition
set.seed(2341)
trainIndex <- createDataPartition(df$Default, p = 0.80, list = FALSE)
# 80% training data
train.df <- df[trainIndex, ]
# 20% testing data
test.df <- df[-trainIndex, ]
dim(train.df)
## [1] 23681 9
## [1] 5920 9
## setting levels as "Yes" and "NO"
train.df$Default <- ifelse(train.df$Default == "1","Yes","No")
set.seed(123)
# fitting decision tree classification model
ModelBagging <- train(Default ~ CreditLimit
+ Male
+ Education
+ MaritalStatus
+ Age
+ BillOutstanding
+ LastPayment,
data = train.df,
method = "treebag",
nbagg = 50,
parms = list(split = "gini"),
trControl = trctrl,
importance = TRUE)
# model summary
ModelBagging
## Bagged CART
##
## 23681 samples
## 7 predictor
## 2 classes: 'No', 'Yes'
##
## No pre-processing
## Resampling: None
# plot of probabilities
plot(PredBagModel$Yes,
main = "Scatterplot of Probabilities of default (test data)",
xlab = "Customer ID",
ylab = "Predicted Probability of default")
# taking the cut-off probability 50%
Predicted <- ifelse(PredBagModel$Yes > 0.50, "Yes", "No")
# ordering the vectors
Actual <- ifelse(test.df$Default=="1","Yes","No")
# making confusion matrix
cm <-confusionMatrix(table(Predicted,Actual), positive = "Yes")
cm
## Confusion Matrix and Statistics
##
## Actual
## Predicted No Yes
## No 4293 1104
## Yes 306 217
##
## Accuracy : 0.7618
## 95% CI : (0.7508, 0.7726)
## No Information Rate : 0.7769
## P-Value [Acc > NIR] : 0.9972
##
## Kappa : 0.1245
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.16427
## Specificity : 0.93346
## Pos Pred Value : 0.41491
## Neg Pred Value : 0.79544
## Prevalence : 0.22314
## Detection Rate : 0.03666
## Detection Prevalence : 0.08834
## Balanced Accuracy : 0.54887
##
## 'Positive' Class : Yes
##
# loading the package
library(ROCR)
BagPrediction <- predict(ModelBagging, test.df,type = "prob")
BagPrediction <- prediction(BagPrediction[2],test.df$Default)
Bagperformance <- performance(BagPrediction, "tpr","fpr")
# plotting ROC curve
plot(Bagperformance,main = "ROC Curve",col = 2,lwd = 2)
abline(a = 0,b = 1,lwd = 2,lty = 3,col = "black")
library(ROCR)
# area under curve
aucBag <- performance(BagPrediction, measure = "auc")
aucBag <- aucBag@y.values[[1]]
aucBag
## [1] 0.632259