library(caret)
# data partition
set.seed(2341)
trainIndex <- createDataPartition(df$default, p = 0.80, list = FALSE)
# 80% training data
train.df <- df[trainIndex, ]
# 20% testing data
test.df <- df[-trainIndex, ]
dim(train.df)## [1] 8001 4
## [1] 1999 4
set.seed(123)
# fitting random forest classification model
BaggingModel <- train(default ~ .,
data = train.df,
method = "treebag",
nbagg = 50,
trControl = trctrl)
# model summary
BaggingModel## Bagged CART
##
## 8001 samples
## 3 predictor
## 2 classes: 'No', 'Yes'
##
## No pre-processing
## Resampling: None
# plot of probabilities
plot(PredBagModel$Yes,
main = "Scatterplot of Probabilities of default (test data)",
xlab = "Customer ID",
ylab = "Predicted Probability of default")# taking the cut-off probability 50%
PredBagModel <- ifelse(PredBagModel$Yes > 0.50, "Yes", "No")
# saving predicted vector as factor
Pred <- as.factor(PredBagModel)
# ordering the vectors
Predicted <- ordered(Pred, levels = c("Yes", "No"))
Actual <- ordered(test.df$default,levels = c("Yes", "No"))
# making confusion matrix
cm <-confusionMatrix(data = Predicted,reference = Actual, positive = "Yes")
cm## Confusion Matrix and Statistics
##
## Reference
## Prediction Yes No
## Yes 29 16
## No 37 1917
##
## Accuracy : 0.9735
## 95% CI : (0.9655, 0.9801)
## No Information Rate : 0.967
## P-Value [Acc > NIR] : 0.05520
##
## Kappa : 0.5094
##
## Mcnemar's Test P-Value : 0.00601
##
## Sensitivity : 0.43939
## Specificity : 0.99172
## Pos Pred Value : 0.64444
## Neg Pred Value : 0.98106
## Prevalence : 0.03302
## Detection Rate : 0.01451
## Detection Prevalence : 0.02251
## Balanced Accuracy : 0.71556
##
## 'Positive' Class : Yes
##
## Warning: package 'ROCR' was built under R version 4.0.4
BagPrediction <- predict(BaggingModel, test.df,type = "prob")
BagPrediction <- prediction(BagPrediction[2],test.df$default)
Bagperformance <- performance(BagPrediction, "tpr","fpr")
# plotting ROC curve
plot(Bagperformance,main = "ROC Curve",col = 2,lwd = 2)
abline(a = 0,b = 1,lwd = 2,lty = 3,col = "black")library(ROCR)
# area under curve
aucBag <- performance(BagPrediction, measure = "auc")
aucBag <- aucBag@y.values[[1]]
aucBag## [1] 0.9061123