LOADING DATA INTO R ENVIRONMENT

TRAINING THE BOOSTING TREE MODEL

Running the Training Model

## eXtreme Gradient Boosting 
## 
## 23681 samples
##     7 predictor
##     2 classes: 'No', 'Yes' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 21313, 21314, 21314, 21313, 21312, 21312, ... 
## Resampling results across tuning parameters:
## 
##   eta  max_depth  colsample_bytree  subsample  nrounds  Accuracy 
##   0.3  1          0.6               0.50        50      0.7779654
##   0.3  1          0.6               0.50       100      0.7778390
##   0.3  1          0.6               0.50       150      0.7782612
##   0.3  1          0.6               0.75        50      0.7774588
##   0.3  1          0.6               0.75       100      0.7786833
##   0.3  1          0.6               0.75       150      0.7780078
##   0.3  1          0.6               1.00        50      0.7778811
##   0.3  1          0.6               1.00       100      0.7785146
##   0.3  1          0.6               1.00       150      0.7782191
##   0.3  1          0.8               0.50        50      0.7778391
##   0.3  1          0.8               0.50       100      0.7780503
##   0.3  1          0.8               0.50       150      0.7786834
##   0.3  1          0.8               0.75        50      0.7775012
##   0.3  1          0.8               0.75       100      0.7777967
##   0.3  1          0.8               0.75       150      0.7778811
##   0.3  1          0.8               1.00        50      0.7777122
##   0.3  1          0.8               1.00       100      0.7780502
##   0.3  1          0.8               1.00       150      0.7783879
##   0.3  2          0.6               0.50        50      0.7783877
##   0.3  2          0.6               0.50       100      0.7785566
##   0.3  2          0.6               0.50       150      0.7796546
##   0.3  2          0.6               0.75        50      0.7788945
##   0.3  2          0.6               0.75       100      0.7789790
##   0.3  2          0.6               0.75       150      0.7794012
##   0.3  2          0.6               1.00        50      0.7789790
##   0.3  2          0.6               1.00       100      0.7794435
##   0.3  2          0.6               1.00       150      0.7797389
##   0.3  2          0.8               0.50        50      0.7799079
##   0.3  2          0.8               0.50       100      0.7799925
##   0.3  2          0.8               0.50       150      0.7791060
##   0.3  2          0.8               0.75        50      0.7794436
##   0.3  2          0.8               0.75       100      0.7796125
##   0.3  2          0.8               0.75       150      0.7804992
##   0.3  2          0.8               1.00        50      0.7795702
##   0.3  2          0.8               1.00       100      0.7807103
##   0.3  2          0.8               1.00       150      0.7811748
##   0.3  3          0.6               0.50        50      0.7797392
##   0.3  3          0.6               0.50       100      0.7785991
##   0.3  3          0.6               0.50       150      0.7774589
##   0.3  3          0.6               0.75        50      0.7807102
##   0.3  3          0.6               0.75       100      0.7801616
##   0.3  3          0.6               0.75       150      0.7802457
##   0.3  3          0.6               1.00        50      0.7801193
##   0.3  3          0.6               1.00       100      0.7796125
##   0.3  3          0.6               1.00       150      0.7792750
##   0.3  3          0.8               0.50        50      0.7789790
##   0.3  3          0.8               0.50       100      0.7786834
##   0.3  3          0.8               0.50       150      0.7781768
##   0.3  3          0.8               0.75        50      0.7801191
##   0.3  3          0.8               0.75       100      0.7797393
##   0.3  3          0.8               0.75       150      0.7788101
##   0.3  3          0.8               1.00        50      0.7807106
##   0.3  3          0.8               1.00       100      0.7799925
##   0.3  3          0.8               1.00       150      0.7793589
##   0.4  1          0.6               0.50        50      0.7780923
##   0.4  1          0.6               0.50       100      0.7771212
##   0.4  1          0.6               0.50       150      0.7770366
##   0.4  1          0.6               0.75        50      0.7779656
##   0.4  1          0.6               0.75       100      0.7781767
##   0.4  1          0.6               0.75       150      0.7782611
##   0.4  1          0.6               1.00        50      0.7779234
##   0.4  1          0.6               1.00       100      0.7782189
##   0.4  1          0.6               1.00       150      0.7783033
##   0.4  1          0.8               0.50        50      0.7785568
##   0.4  1          0.8               0.50       100      0.7773320
##   0.4  1          0.8               0.50       150      0.7773320
##   0.4  1          0.8               0.75        50      0.7776278
##   0.4  1          0.8               0.75       100      0.7779235
##   0.4  1          0.8               0.75       150      0.7773745
##   0.4  1          0.8               1.00        50      0.7776279
##   0.4  1          0.8               1.00       100      0.7783034
##   0.4  1          0.8               1.00       150      0.7781344
##   0.4  2          0.6               0.50        50      0.7793164
##   0.4  2          0.6               0.50       100      0.7784722
##   0.4  2          0.6               0.50       150      0.7797391
##   0.4  2          0.6               0.75        50      0.7798660
##   0.4  2          0.6               0.75       100      0.7792749
##   0.4  2          0.6               0.75       150      0.7794857
##   0.4  2          0.6               1.00        50      0.7799924
##   0.4  2          0.6               1.00       100      0.7803726
##   0.4  2          0.6               1.00       150      0.7798660
##   0.4  2          0.8               0.50        50      0.7777968
##   0.4  2          0.8               0.50       100      0.7789367
##   0.4  2          0.8               0.50       150      0.7791479
##   0.4  2          0.8               0.75        50      0.7797814
##   0.4  2          0.8               0.75       100      0.7794857
##   0.4  2          0.8               0.75       150      0.7798659
##   0.4  2          0.8               1.00        50      0.7802880
##   0.4  2          0.8               1.00       100      0.7796549
##   0.4  2          0.8               1.00       150      0.7806257
##   0.4  3          0.6               0.50        50      0.7793590
##   0.4  3          0.6               0.50       100      0.7771634
##   0.4  3          0.6               0.50       150      0.7751366
##   0.4  3          0.6               0.75        50      0.7793592
##   0.4  3          0.6               0.75       100      0.7788101
##   0.4  3          0.6               0.75       150      0.7764878
##   0.4  3          0.6               1.00        50      0.7800350
##   0.4  3          0.6               1.00       100      0.7788528
##   0.4  3          0.6               1.00       150      0.7784726
##   0.4  3          0.8               0.50        50      0.7789367
##   0.4  3          0.8               0.50       100      0.7765298
##   0.4  3          0.8               0.50       150      0.7760226
##   0.4  3          0.8               0.75        50      0.7792323
##   0.4  3          0.8               0.75       100      0.7774164
##   0.4  3          0.8               0.75       150      0.7767831
##   0.4  3          0.8               1.00        50      0.7796549
##   0.4  3          0.8               1.00       100      0.7793166
##   0.4  3          0.8               1.00       150      0.7783035
##   Kappa     
##   0.05215425
##   0.06383431
##   0.06823095
##   0.04133710
##   0.07017065
##   0.06825885
##   0.02171847
##   0.06503546
##   0.06556696
##   0.05511830
##   0.06320222
##   0.07088069
##   0.04289436
##   0.06615456
##   0.06653417
##   0.02260535
##   0.06187452
##   0.06626162
##   0.06702150
##   0.07608836
##   0.08797840
##   0.06753951
##   0.07592400
##   0.08236265
##   0.06514723
##   0.07794471
##   0.08342203
##   0.07597178
##   0.08082709
##   0.08440120
##   0.07269380
##   0.08095632
##   0.08938600
##   0.06688971
##   0.08231574
##   0.09060890
##   0.08312853
##   0.08807302
##   0.09372481
##   0.08750676
##   0.09548614
##   0.10123761
##   0.08467924
##   0.09145568
##   0.09093150
##   0.08481403
##   0.09358372
##   0.10131228
##   0.08641733
##   0.09409560
##   0.09811641
##   0.08789884
##   0.09326249
##   0.09439176
##   0.05504424
##   0.06279036
##   0.06844911
##   0.05608456
##   0.06711174
##   0.06893537
##   0.04756445
##   0.06552158
##   0.06850041
##   0.06386828
##   0.06360168
##   0.06428661
##   0.05726900
##   0.06677700
##   0.06627247
##   0.04957321
##   0.06588960
##   0.06684371
##   0.06921649
##   0.07656174
##   0.09109936
##   0.08367121
##   0.08404956
##   0.08887245
##   0.07977519
##   0.08651224
##   0.08915767
##   0.06914630
##   0.08456212
##   0.09514198
##   0.08081818
##   0.08464022
##   0.09210846
##   0.07731593
##   0.08607558
##   0.09543951
##   0.09205747
##   0.09561430
##   0.10153557
##   0.08614713
##   0.09325227
##   0.09420404
##   0.08444604
##   0.08937415
##   0.09450598
##   0.09372866
##   0.09863778
##   0.10782700
##   0.08619952
##   0.09320778
##   0.09638328
##   0.08926219
##   0.09660444
##   0.09794811
## 
## Tuning parameter 'gamma' was held constant at a value of 0
## 
## Tuning parameter 'min_child_weight' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were nrounds = 150, max_depth = 2,
##  eta = 0.3, gamma = 0, colsample_bytree = 0.8, min_child_weight = 1
##  and subsample = 1.

Variable Importance in Boosting Model

TESTING THE BOOSTING MODEL

Confusion Matrix at 50% Cut-Off Probability

## Confusion Matrix and Statistics
## 
##          Actual
## Predicted  Yes   No
##       Yes   92   85
##       No  1229 4514
##                                           
##                Accuracy : 0.778           
##                  95% CI : (0.7672, 0.7886)
##     No Information Rate : 0.7769          
##     P-Value [Acc > NIR] : 0.4207          
##                                           
##                   Kappa : 0.074           
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.06964         
##             Specificity : 0.98152         
##          Pos Pred Value : 0.51977         
##          Neg Pred Value : 0.78600         
##              Prevalence : 0.22314         
##          Detection Rate : 0.01554         
##    Detection Prevalence : 0.02990         
##       Balanced Accuracy : 0.52558         
##                                           
##        'Positive' Class : Yes             
## 
##          Actual
## Predicted  Yes   No
##       Yes  1.6  1.4
##       No  20.8 76.2

Performance Metrics at different Cut-Off Probabilities

library(dplyr)

# function to print confusion matrices for diffrent cut-off levels of probability
CmFn <- function(cutoff) {
      
       # predicting the test set results
         PredBoosting <- predict(ModelXGboost, testData.dt,type = "prob")
         C1 <- ifelse(PredBoosting$Yes > cutoff, "Yes", "No")
         C2 <- testData.dt$Default
         predY   <- as.factor(C1)
           actualY <- as.factor(C2)
           Predicted <- ordered(predY, levels = c("Yes", "No"))
           Actual <- ordered(actualY, levels = c("Yes", "No"))
     
        # use the confusionMatrix from the caret package
        cm1 <-confusionMatrix(data = Predicted,reference = Actual, positive = "Yes")
        # extracting accuracy
        Accuracy <- cm1$overall[1]
        # extracting sensitivity
          Sensitivity <- cm1$byClass[1]
        # extracting specificity
          Specificity <- cm1$byClass[2]
      # extracting value of kappa
          Kappa <- cm1$overall[2]
          
        # combined table
          tab <- cbind(Accuracy,Sensitivity,Specificity,Kappa)
        return(tab)}
     # making sequence of cut-off probabilities       
        cutoff1 <- seq( .1, .9, by = .05 )
     # loop using "lapply"
        tab2    <- lapply(cutoff1, CmFn)
     # extra coding for saving table as desired format
        tab3 <- rbind(tab2[[1]],tab2[[2]],tab2[[3]],tab2[[4]],tab2[[5]],tab2[[6]],tab2[[7]],
                 tab2[[8]],tab2[[9]],tab2[[10]],tab2[[11]],tab2[[12]],tab2[[13]],tab2[[14]],
                 tab2[[15]],tab2[[16]],tab2[[17]])
        # printing the table
        tab4 <- as.data.frame(tab3)
        tab5 <- cbind(cutoff1,tab4$Accuracy,tab4$Sensitivity,tab4$Specificity,tab4$Kappa)
        tab6 <- as.data.frame(tab5)
        tab7 <- rename(tab6,cutoff =  cutoff1, Accuracy = V2 , 
                       Senstivity = V3 ,Specificity =  V4 ,kappa = V5)
        tab7
##    cutoff  Accuracy   Senstivity Specificity       kappa
## 1    0.10 0.3476351 0.9591218774   0.1719939 0.065142252
## 2    0.15 0.4543919 0.8796366389   0.3322461 0.118661636
## 3    0.20 0.5451014 0.7645722937   0.4820613 0.158228479
## 4    0.25 0.6547297 0.5715367146   0.6786258 0.200765861
## 5    0.30 0.7376689 0.3686601060   0.8436617 0.219119949
## 6    0.35 0.7673986 0.2278576836   0.9223744 0.182956736
## 7    0.40 0.7765203 0.1566994701   0.9545553 0.147192150
## 8    0.45 0.7800676 0.1127933384   0.9717330 0.117579379
## 9    0.50 0.7780405 0.0696442089   0.9815177 0.074000842
## 10   0.55 0.7788851 0.0257380772   0.9952164 0.031810540
## 11   0.60 0.7770270 0.0060560182   0.9984779 0.007000454
## 12   0.65 0.7773649 0.0022710068   1.0000000 0.003524069
## 13   0.70 0.7770270 0.0007570023   1.0000000 0.001175674
## 14   0.75 0.7768581 0.0000000000   1.0000000 0.000000000
## 15   0.80 0.7768581 0.0000000000   1.0000000 0.000000000
## 16   0.85 0.7768581 0.0000000000   1.0000000 0.000000000
## 17   0.90 0.7768581 0.0000000000   1.0000000 0.000000000