library(randomForest)
library(rpart)
library(readr)
library(caret)
library(data.table)
library(scales)
library(ROCR)
library(ineq)
library(knitr)
fulldata <- as.data.frame(read_csv("D:/PG Business Analytics/DM/Group Assignment/PL_XSELL.csv"))
Parsed with column specification:
cols(
  .default = col_integer(),
  CUST_ID = col_character(),
  GENDER = col_character(),
  BALANCE = col_double(),
  OCCUPATION = col_character(),
  AGE_BKT = col_character(),
  ACC_TYPE = col_character(),
  ACC_OP_DATE = col_character(),
  AMT_NET_DR = col_double(),
  AMT_L_DR = col_double(),
  AVG_AMT_PER_ATM_TXN = col_double(),
  AVG_AMT_PER_CSH_WDL_TXN = col_double(),
  AVG_AMT_PER_CHQ_TXN = col_double(),
  AVG_AMT_PER_NET_TXN = col_double(),
  AVG_AMT_PER_MOB_TXN = col_double(),
  random = col_double()
)
See spec(...) for full column specifications.
head(fulldata)
summary(fulldata)
   CUST_ID              TARGET            AGE           GENDER             BALANCE       
 Length:20000       Min.   :0.0000   Min.   :21.00   Length:20000       Min.   :      0  
 Class :character   1st Qu.:0.0000   1st Qu.:30.00   Class :character   1st Qu.:  64754  
 Mode  :character   Median :0.0000   Median :38.00   Mode  :character   Median : 231676  
                    Mean   :0.1256   Mean   :38.42                      Mean   : 511362  
                    3rd Qu.:0.0000   3rd Qu.:46.00                      3rd Qu.: 653877  
                    Max.   :1.0000   Max.   :55.00                      Max.   :8360431  
  OCCUPATION          AGE_BKT               SCR        HOLDING_PERIOD    ACC_TYPE        
 Length:20000       Length:20000       Min.   :100.0   Min.   : 1.00   Length:20000      
 Class :character   Class :character   1st Qu.:227.0   1st Qu.: 7.00   Class :character  
 Mode  :character   Mode  :character   Median :364.0   Median :15.00   Mode  :character  
                                       Mean   :440.2   Mean   :14.96                     
                                       3rd Qu.:644.0   3rd Qu.:22.00                     
                                       Max.   :999.0   Max.   :31.00                     
 ACC_OP_DATE        LEN_OF_RLTN_IN_MNTH NO_OF_L_CR_TXNS NO_OF_L_DR_TXNS  TOT_NO_OF_L_TXNS
 Length:20000       Min.   : 29.0       Min.   : 0.00   Min.   : 0.000   Min.   :  0.00  
 Class :character   1st Qu.: 79.0       1st Qu.: 6.00   1st Qu.: 2.000   1st Qu.:  9.00  
 Mode  :character   Median :125.0       Median :10.00   Median : 5.000   Median : 14.00  
                    Mean   :125.2       Mean   :12.35   Mean   : 6.634   Mean   : 18.98  
                    3rd Qu.:172.0       3rd Qu.:14.00   3rd Qu.: 7.000   3rd Qu.: 21.00  
                    Max.   :221.0       Max.   :75.00   Max.   :74.000   Max.   :149.00  
 NO_OF_BR_CSH_WDL_DR_TXNS NO_OF_ATM_DR_TXNS NO_OF_NET_DR_TXNS NO_OF_MOB_DR_TXNS
 Min.   : 0.000           Min.   : 0.000    Min.   : 0.000    Min.   : 0.0000  
 1st Qu.: 1.000           1st Qu.: 0.000    1st Qu.: 0.000    1st Qu.: 0.0000  
 Median : 1.000           Median : 1.000    Median : 0.000    Median : 0.0000  
 Mean   : 1.883           Mean   : 1.029    Mean   : 1.172    Mean   : 0.4118  
 3rd Qu.: 2.000           3rd Qu.: 1.000    3rd Qu.: 1.000    3rd Qu.: 0.0000  
 Max.   :15.000           Max.   :25.000    Max.   :22.000    Max.   :25.0000  
 NO_OF_CHQ_DR_TXNS   FLG_HAS_CC       AMT_ATM_DR     AMT_BR_CSH_WDL_DR   AMT_CHQ_DR     
 Min.   : 0.000    Min.   :0.0000   Min.   :     0   Min.   :     0    Min.   :      0  
 1st Qu.: 0.000    1st Qu.:0.0000   1st Qu.:     0   1st Qu.:  2990    1st Qu.:      0  
 Median : 2.000    Median :0.0000   Median :  6900   Median :340150    Median :  23840  
 Mean   : 2.138    Mean   :0.3054   Mean   : 10990   Mean   :378475    Mean   : 124520  
 3rd Qu.: 4.000    3rd Qu.:1.0000   3rd Qu.: 15800   3rd Qu.:674675    3rd Qu.:  72470  
 Max.   :15.000    Max.   :1.0000   Max.   :199300   Max.   :999930    Max.   :4928640  
   AMT_NET_DR       AMT_MOB_DR        AMT_L_DR       FLG_HAS_ANY_CHGS AMT_OTH_BK_ATM_USG_CHGS
 Min.   :     0   Min.   :     0   Min.   :      0   Min.   :0.0000   Min.   :  0.000        
 1st Qu.:     0   1st Qu.:     0   1st Qu.: 237936   1st Qu.:0.0000   1st Qu.:  0.000        
 Median :     0   Median :     0   Median : 695115   Median :0.0000   Median :  0.000        
 Mean   :237308   Mean   : 22425   Mean   : 773717   Mean   :0.1106   Mean   :  1.099        
 3rd Qu.:473971   3rd Qu.:     0   3rd Qu.:1078927   3rd Qu.:0.0000   3rd Qu.:  0.000        
 Max.   :999854   Max.   :199667   Max.   :6514921   Max.   :1.0000   Max.   :250.000        
 AMT_MIN_BAL_NMC_CHGS NO_OF_IW_CHQ_BNC_TXNS NO_OF_OW_CHQ_BNC_TXNS AVG_AMT_PER_ATM_TXN
 Min.   :  0.000      Min.   :0.00000       Min.   :0.0000        Min.   :    0      
 1st Qu.:  0.000      1st Qu.:0.00000       1st Qu.:0.0000        1st Qu.:    0      
 Median :  0.000      Median :0.00000       Median :0.0000        Median : 6000      
 Mean   :  1.292      Mean   :0.04275       Mean   :0.0444        Mean   : 7409      
 3rd Qu.:  0.000      3rd Qu.:0.00000       3rd Qu.:0.0000        3rd Qu.:13500      
 Max.   :170.000      Max.   :2.00000       Max.   :2.0000        Max.   :25000      
 AVG_AMT_PER_CSH_WDL_TXN AVG_AMT_PER_CHQ_TXN AVG_AMT_PER_NET_TXN AVG_AMT_PER_MOB_TXN
 Min.   :     0          Min.   :     0      Min.   :     0      Min.   :     0     
 1st Qu.:  1266          1st Qu.:     0      1st Qu.:     0      1st Qu.:     0     
 Median :147095          Median :  8645      Median :     0      Median :     0     
 Mean   :242237          Mean   : 25093      Mean   :179059      Mean   : 20304     
 3rd Qu.:385000          3rd Qu.: 28605      3rd Qu.:257699      3rd Qu.:     0     
 Max.   :999640          Max.   :537842      Max.   :999854      Max.   :199667     
 FLG_HAS_NOMINEE  FLG_HAS_OLD_LOAN     random         
 Min.   :0.0000   Min.   :0.0000   Min.   :0.0000114  
 1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:0.2481866  
 Median :1.0000   Median :0.0000   Median :0.5061214  
 Mean   :0.9012   Mean   :0.4929   Mean   :0.5019330  
 3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.7535712  
 Max.   :1.0000   Max.   :1.0000   Max.   :0.9999471  
d1<-fulldata[,-c(1,4,6,7,10,11)]
summary(d1)
     TARGET            AGE           BALANCE             SCR        HOLDING_PERIOD 
 Min.   :0.0000   Min.   :21.00   Min.   :      0   Min.   :100.0   Min.   : 1.00  
 1st Qu.:0.0000   1st Qu.:30.00   1st Qu.:  64754   1st Qu.:227.0   1st Qu.: 7.00  
 Median :0.0000   Median :38.00   Median : 231676   Median :364.0   Median :15.00  
 Mean   :0.1256   Mean   :38.42   Mean   : 511362   Mean   :440.2   Mean   :14.96  
 3rd Qu.:0.0000   3rd Qu.:46.00   3rd Qu.: 653877   3rd Qu.:644.0   3rd Qu.:22.00  
 Max.   :1.0000   Max.   :55.00   Max.   :8360431   Max.   :999.0   Max.   :31.00  
 LEN_OF_RLTN_IN_MNTH NO_OF_L_CR_TXNS NO_OF_L_DR_TXNS  TOT_NO_OF_L_TXNS NO_OF_BR_CSH_WDL_DR_TXNS
 Min.   : 29.0       Min.   : 0.00   Min.   : 0.000   Min.   :  0.00   Min.   : 0.000          
 1st Qu.: 79.0       1st Qu.: 6.00   1st Qu.: 2.000   1st Qu.:  9.00   1st Qu.: 1.000          
 Median :125.0       Median :10.00   Median : 5.000   Median : 14.00   Median : 1.000          
 Mean   :125.2       Mean   :12.35   Mean   : 6.634   Mean   : 18.98   Mean   : 1.883          
 3rd Qu.:172.0       3rd Qu.:14.00   3rd Qu.: 7.000   3rd Qu.: 21.00   3rd Qu.: 2.000          
 Max.   :221.0       Max.   :75.00   Max.   :74.000   Max.   :149.00   Max.   :15.000          
 NO_OF_ATM_DR_TXNS NO_OF_NET_DR_TXNS NO_OF_MOB_DR_TXNS NO_OF_CHQ_DR_TXNS   FLG_HAS_CC    
 Min.   : 0.000    Min.   : 0.000    Min.   : 0.0000   Min.   : 0.000    Min.   :0.0000  
 1st Qu.: 0.000    1st Qu.: 0.000    1st Qu.: 0.0000   1st Qu.: 0.000    1st Qu.:0.0000  
 Median : 1.000    Median : 0.000    Median : 0.0000   Median : 2.000    Median :0.0000  
 Mean   : 1.029    Mean   : 1.172    Mean   : 0.4118   Mean   : 2.138    Mean   :0.3054  
 3rd Qu.: 1.000    3rd Qu.: 1.000    3rd Qu.: 0.0000   3rd Qu.: 4.000    3rd Qu.:1.0000  
 Max.   :25.000    Max.   :22.000    Max.   :25.0000   Max.   :15.000    Max.   :1.0000  
   AMT_ATM_DR     AMT_BR_CSH_WDL_DR   AMT_CHQ_DR        AMT_NET_DR       AMT_MOB_DR    
 Min.   :     0   Min.   :     0    Min.   :      0   Min.   :     0   Min.   :     0  
 1st Qu.:     0   1st Qu.:  2990    1st Qu.:      0   1st Qu.:     0   1st Qu.:     0  
 Median :  6900   Median :340150    Median :  23840   Median :     0   Median :     0  
 Mean   : 10990   Mean   :378475    Mean   : 124520   Mean   :237308   Mean   : 22425  
 3rd Qu.: 15800   3rd Qu.:674675    3rd Qu.:  72470   3rd Qu.:473971   3rd Qu.:     0  
 Max.   :199300   Max.   :999930    Max.   :4928640   Max.   :999854   Max.   :199667  
    AMT_L_DR       FLG_HAS_ANY_CHGS AMT_OTH_BK_ATM_USG_CHGS AMT_MIN_BAL_NMC_CHGS
 Min.   :      0   Min.   :0.0000   Min.   :  0.000         Min.   :  0.000     
 1st Qu.: 237936   1st Qu.:0.0000   1st Qu.:  0.000         1st Qu.:  0.000     
 Median : 695115   Median :0.0000   Median :  0.000         Median :  0.000     
 Mean   : 773717   Mean   :0.1106   Mean   :  1.099         Mean   :  1.292     
 3rd Qu.:1078927   3rd Qu.:0.0000   3rd Qu.:  0.000         3rd Qu.:  0.000     
 Max.   :6514921   Max.   :1.0000   Max.   :250.000         Max.   :170.000     
 NO_OF_IW_CHQ_BNC_TXNS NO_OF_OW_CHQ_BNC_TXNS AVG_AMT_PER_ATM_TXN AVG_AMT_PER_CSH_WDL_TXN
 Min.   :0.00000       Min.   :0.0000        Min.   :    0       Min.   :     0         
 1st Qu.:0.00000       1st Qu.:0.0000        1st Qu.:    0       1st Qu.:  1266         
 Median :0.00000       Median :0.0000        Median : 6000       Median :147095         
 Mean   :0.04275       Mean   :0.0444        Mean   : 7409       Mean   :242237         
 3rd Qu.:0.00000       3rd Qu.:0.0000        3rd Qu.:13500       3rd Qu.:385000         
 Max.   :2.00000       Max.   :2.0000        Max.   :25000       Max.   :999640         
 AVG_AMT_PER_CHQ_TXN AVG_AMT_PER_NET_TXN AVG_AMT_PER_MOB_TXN FLG_HAS_NOMINEE  FLG_HAS_OLD_LOAN
 Min.   :     0      Min.   :     0      Min.   :     0      Min.   :0.0000   Min.   :0.0000  
 1st Qu.:     0      1st Qu.:     0      1st Qu.:     0      1st Qu.:1.0000   1st Qu.:0.0000  
 Median :  8645      Median :     0      Median :     0      Median :1.0000   Median :0.0000  
 Mean   : 25093      Mean   :179059      Mean   : 20304      Mean   :0.9012   Mean   :0.4929  
 3rd Qu.: 28605      3rd Qu.:257699      3rd Qu.:     0      3rd Qu.:1.0000   3rd Qu.:1.0000  
 Max.   :537842      Max.   :999854      Max.   :199667      Max.   :1.0000   Max.   :1.0000  
     random         
 Min.   :0.0000114  
 1st Qu.:0.2481866  
 Median :0.5061214  
 Mean   :0.5019330  
 3rd Qu.:0.7535712  
 Max.   :0.9999471  
str(d1)
'data.frame':   20000 obs. of  34 variables:
 $ TARGET                  : int  0 0 0 0 0 0 0 0 0 0 ...
 $ AGE                     : int  27 47 40 53 36 42 30 53 42 30 ...
 $ BALANCE                 : num  3384 287489 18217 71720 1671623 ...
 $ SCR                     : int  776 324 603 196 167 493 479 562 105 170 ...
 $ HOLDING_PERIOD          : int  30 28 2 13 24 26 14 25 15 13 ...
 $ LEN_OF_RLTN_IN_MNTH     : int  146 104 61 107 185 192 177 99 88 111 ...
 $ NO_OF_L_CR_TXNS         : int  7 8 10 36 20 5 6 14 18 14 ...
 $ NO_OF_L_DR_TXNS         : int  3 2 5 14 1 2 6 3 14 8 ...
 $ TOT_NO_OF_L_TXNS        : int  10 10 15 50 21 7 12 17 32 22 ...
 $ NO_OF_BR_CSH_WDL_DR_TXNS: int  0 0 1 4 1 1 0 3 6 3 ...
 $ NO_OF_ATM_DR_TXNS       : int  1 1 1 2 0 1 1 0 2 1 ...
 $ NO_OF_NET_DR_TXNS       : int  2 1 1 3 0 0 1 0 4 0 ...
 $ NO_OF_MOB_DR_TXNS       : int  0 0 0 1 0 0 0 0 1 0 ...
 $ NO_OF_CHQ_DR_TXNS       : int  0 0 2 4 0 0 4 0 1 4 ...
 $ FLG_HAS_CC              : int  0 0 0 0 0 1 0 0 1 0 ...
 $ AMT_ATM_DR              : int  13100 6600 11200 26100 0 18500 6200 0 35400 18000 ...
 $ AMT_BR_CSH_WDL_DR       : int  0 0 561120 673590 808480 379310 0 945160 198430 869880 ...
 $ AMT_CHQ_DR              : int  0 0 49320 60780 0 0 10580 0 51490 32610 ...
 $ AMT_NET_DR              : num  973557 799813 997570 741506 0 ...
 $ AMT_MOB_DR              : int  0 0 0 71388 0 0 0 0 170332 0 ...
 $ AMT_L_DR                : num  986657 806413 1619210 1573364 808480 ...
 $ FLG_HAS_ANY_CHGS        : int  0 1 1 0 0 0 1 0 0 0 ...
 $ AMT_OTH_BK_ATM_USG_CHGS : int  0 0 0 0 0 0 0 0 0 0 ...
 $ AMT_MIN_BAL_NMC_CHGS    : int  0 0 0 0 0 0 0 0 0 0 ...
 $ NO_OF_IW_CHQ_BNC_TXNS   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ NO_OF_OW_CHQ_BNC_TXNS   : int  0 0 1 0 0 0 0 0 0 0 ...
 $ AVG_AMT_PER_ATM_TXN     : num  13100 6600 11200 13050 0 ...
 $ AVG_AMT_PER_CSH_WDL_TXN : num  0 0 561120 168398 808480 ...
 $ AVG_AMT_PER_CHQ_TXN     : num  0 0 24660 15195 0 ...
 $ AVG_AMT_PER_NET_TXN     : num  486779 799813 997570 247169 0 ...
 $ AVG_AMT_PER_MOB_TXN     : num  0 0 0 71388 0 ...
 $ FLG_HAS_NOMINEE         : int  1 1 1 1 1 1 0 1 1 0 ...
 $ FLG_HAS_OLD_LOAN        : int  1 0 1 0 0 1 1 1 1 0 ...
 $ random                  : num  1.14e-05 1.11e-04 1.20e-04 1.37e-04 1.74e-04 ...
d1$MALE<-ifelse(fulldata$GENDER=='M',1,0)
d1$FEMALE<-ifelse(fulldata$GENDER=='F',1,0)
trainInd<-createDataPartition(d1$TARGET,p=.6,list=FALSE)
train<-d1[trainInd,]
test<-d1[-trainInd,]
nrow(train)
[1] 12000
nrow(test)
[1] 8000
RF1 <- randomForest(TARGET ~ ., data = train, 
                   ntree=101, mtry = 24, nodesize = 30,
                   importance=TRUE)
The response has five or fewer unique values.  Are you sure you want to do regression?
RF1

Call:
 randomForest(formula = TARGET ~ ., data = train, ntree = 101,      mtry = 24, nodesize = 30, importance = TRUE) 
               Type of random forest: regression
                     Number of trees: 101
No. of variables tried at each split: 24

          Mean of squared residuals: 0.06246178
                    % Var explained: 43.09
RF1$err.rate
NULL
impVar <- round(randomForest::importance(RF1), 2)
impVar[order(impVar[,2], decreasing=TRUE),]
                         %IncMSE IncNodePurity
BALANCE                    39.35         78.95
SCR                        49.52         75.29
LEN_OF_RLTN_IN_MNTH        29.34         60.98
HOLDING_PERIOD             32.40         48.40
AMT_L_DR                   19.27         46.72
AGE                        25.27         44.59
NO_OF_L_CR_TXNS            17.82         43.17
TOT_NO_OF_L_TXNS           16.07         42.13
AMT_BR_CSH_WDL_DR          17.34         41.09
AVG_AMT_PER_CSH_WDL_TXN    16.76         36.53
AVG_AMT_PER_ATM_TXN        12.10         32.98
AMT_ATM_DR                 14.68         30.75
AVG_AMT_PER_CHQ_TXN        16.00         29.09
AMT_CHQ_DR                 19.29         27.93
random                      0.38         27.86
AVG_AMT_PER_NET_TXN        13.27         24.81
AMT_NET_DR                 13.48         23.99
NO_OF_L_DR_TXNS            13.21         23.54
AVG_AMT_PER_MOB_TXN         9.32         15.94
NO_OF_BR_CSH_WDL_DR_TXNS   14.09         15.05
AMT_MOB_DR                 10.42         13.73
NO_OF_ATM_DR_TXNS          11.14         12.70
FLG_HAS_CC                 21.49         11.99
NO_OF_CHQ_DR_TXNS           6.68         10.86
NO_OF_IW_CHQ_BNC_TXNS       7.63          5.17
FLG_HAS_ANY_CHGS            9.59          4.29
FEMALE                      4.43          4.11
FLG_HAS_OLD_LOAN            8.25          3.54
NO_OF_OW_CHQ_BNC_TXNS       7.44          3.42
NO_OF_NET_DR_TXNS           4.57          3.36
FLG_HAS_NOMINEE             8.10          3.06
MALE                        3.61          2.42
NO_OF_MOB_DR_TXNS           6.60          1.48
AMT_MIN_BAL_NMC_CHGS        1.31          0.39
AMT_OTH_BK_ATM_USG_CHGS     0.39          0.20
tRF <- tuneRF(x = d1[,-c(1)], 
              y=as.factor(d1$TARGET),
              mtryStart = 6, 
              ntreeTry=101, 
              stepFactor = 2, 
              improve = 0.001, 
              trace=TRUE, 
              plot = TRUE,
              doBest = TRUE,
              nodesize = 150, 
              importance=FALSE
)
mtry = 6  OOB error = 12.23% 
Searching left ...
mtry = 3    OOB error = 12.51% 
-0.02289452 0.001 
Searching right ...
mtry = 12   OOB error = 12.04% 
0.01553557 0.001 
mtry = 24   OOB error = 11.9% 
0.01121262 0.001 
mtry = 35   OOB error = 11.89% 
0.001259975 0.001 

tRF$importance
                         MeanDecreaseGini
AGE                           51.15217871
BALANCE                      105.85473208
SCR                          111.28328187
HOLDING_PERIOD                84.71283598
LEN_OF_RLTN_IN_MNTH           54.48171361
NO_OF_L_CR_TXNS               90.12853969
NO_OF_L_DR_TXNS               62.26459737
TOT_NO_OF_L_TXNS              71.31838314
NO_OF_BR_CSH_WDL_DR_TXNS      13.86315737
NO_OF_ATM_DR_TXNS             23.41492708
NO_OF_NET_DR_TXNS              2.83366850
NO_OF_MOB_DR_TXNS              1.50919255
NO_OF_CHQ_DR_TXNS              9.50798669
FLG_HAS_CC                    37.54258242
AMT_ATM_DR                    36.15623700
AMT_BR_CSH_WDL_DR             47.34324876
AMT_CHQ_DR                    30.46642054
AMT_NET_DR                    20.35351794
AMT_MOB_DR                    12.07468864
AMT_L_DR                      58.40845407
FLG_HAS_ANY_CHGS               3.13020658
AMT_OTH_BK_ATM_USG_CHGS        0.02871932
AMT_MIN_BAL_NMC_CHGS           0.36726935
NO_OF_IW_CHQ_BNC_TXNS          7.02775670
NO_OF_OW_CHQ_BNC_TXNS          3.74806518
AVG_AMT_PER_ATM_TXN           34.10274749
AVG_AMT_PER_CSH_WDL_TXN       35.37585185
AVG_AMT_PER_CHQ_TXN           32.94370566
AVG_AMT_PER_NET_TXN           24.80776069
AVG_AMT_PER_MOB_TXN           21.97662447
FLG_HAS_NOMINEE                2.05622171
FLG_HAS_OLD_LOAN               1.80081032
random                        16.07435530
MALE                           2.12797638
FEMALE                         4.21555805
train$predict.class <- predict(tRF, train, type="class")
train$predict.score <- predict(tRF, train, type="prob")
head(train)
class(train$predict.score)
[1] "matrix" "votes" 
# decile <- function(x){
#   deciles <- vector(length=10)
#   for (i in seq(0.1,1,.1)){
#     deciles[i*10] <- quantile(x, i, na.rm=T)
#   }
#   return (
#     ifelse(x<deciles[1], 1,
#            ifelse(x<deciles[2], 2,
#                   ifelse(x<deciles[3], 3,
#                          ifelse(x<deciles[4], 4,
#                                 ifelse(x<deciles[5], 5,
#                                        ifelse(x<deciles[6], 6,
#                                               ifelse(x<deciles[7], 7,
#                                                      ifelse(x<deciles[8], 8,
#                                                             ifelse(x<deciles[9], 9, 10
#                                                             ))))))))))
# }
train$deciles <- decile(train$predict.score[,2])
tmp_DT = data.table(train)
# rank <- tmp_DT[, list(
#   cnt = length(TARGET), 
#   cnt_resp = sum(TARGET), 
#   cnt_non_resp = sum(TARGET == 0)) , 
#   by=deciles][order(-deciles)]
rank$rrate <- round (rank$cnt_resp / rank$cnt,2);
rank$cum_resp <- cumsum(rank$cnt_resp)
rank$cum_non_resp <- cumsum(rank$cnt_non_resp)
rank$cum_rel_resp <- round(rank$cum_resp / sum(rank$cnt_resp),2);
rank$cum_rel_non_resp <- round(rank$cum_non_resp / sum(rank$cnt_non_resp),2);
rank$ks <- abs(rank$cum_rel_resp - rank$cum_rel_non_resp);
rank$rrate <- percent(rank$rrate)
rank$cum_rel_resp <- percent(rank$cum_rel_resp)
rank$cum_rel_non_resp <- percent(rank$cum_rel_non_resp)
rank$ks
[1] 0.52 0.73 0.73 0.64 0.54 0.42 0.30 0.00
train_rank<-as.data.frame(rank)
kable(train_rank)
deciles cnt cnt_resp cnt_non_resp rrate cum_resp cum_non_resp cum_rel_resp cum_rel_non_resp ks
10 1209 841 368 70% 841 368 56% 4% 0.52
9 1199 431 768 36% 1272 1136 84% 11% 0.73
8 1311 152 1159 12% 1424 2295 95% 22% 0.73
7 1304 56 1248 4% 1480 3543 98% 34% 0.64
6 1236 18 1218 1% 1498 4761 99% 45% 0.54
5 1359 5 1354 0% 1503 6115 100% 58% 0.42
4 1237 3 1234 0% 1506 7349 100% 70% 0.30
3 3145 0 3145 0% 1506 10494 100% 100% 0.00
sum(train$TARGET) / nrow(train)
[1] 0.1255
pred <- prediction(train$predict.score[,2], train$TARGET)
perf <- performance(pred, "tpr", "fpr")
plot(perf)

KS <- max(attr(perf, 'y.values')[[1]]-attr(perf, 'x.values')[[1]])
KS
[1] 0.7492918
auc <- performance(pred,"auc"); 
auc <- as.numeric(auc@y.values)
auc
[1] 0.9436159
gini = ineq(train$predict.score[,2], type="Gini")
gini
[1] 0.7866774
with(train, table(TARGET, predict.class))
      predict.class
TARGET     0     1
     0 10488     6
     1  1363   143
test$predict.class <- predict(tRF, test, type="class")
test$predict.score <- predict(tRF, test, type="prob")
test$deciles <- decile(test$predict.score[,2])
tmp_DT = data.table(test)
h_rank <- tmp_DT[, list(
  cnt = length(TARGET), 
  cnt_resp = sum(TARGET), 
  cnt_non_resp = sum(TARGET == 0)) , 
  by=deciles][order(-deciles)]
h_rank$rrate <- round (h_rank$cnt_resp / h_rank$cnt,2);
h_rank$cum_resp <- cumsum(h_rank$cnt_resp)
h_rank$cum_non_resp <- cumsum(h_rank$cnt_non_resp)
h_rank$cum_rel_resp <- round(h_rank$cum_resp / sum(h_rank$cnt_resp),2);
h_rank$cum_rel_non_resp <- round(h_rank$cum_non_resp / sum(h_rank$cnt_non_resp),2);
h_rank$ks <- abs(h_rank$cum_rel_resp - h_rank$cum_rel_non_resp);
h_rank$rrate <- percent(h_rank$rrate)
h_rank$cum_rel_resp <- percent(h_rank$cum_rel_resp)
h_rank$cum_rel_non_resp <- percent(h_rank$cum_rel_non_resp)
test_rank<-as.data.frame(h_rank)
kable(test_rank)
deciles cnt cnt_resp cnt_non_resp rrate cum_resp cum_non_resp cum_rel_resp cum_rel_non_resp ks
10 807 536 271 66% 536 271 53% 4% 0.49
9 800 291 509 36% 827 780 82% 11% 0.71
8 853 110 743 13% 937 1523 93% 22% 0.71
7 864 50 814 6% 987 2337 98% 33% 0.65
6 809 16 793 2% 1003 3130 100% 45% 0.55
5 985 3 982 0% 1006 4112 100% 59% 0.41
4 863 0 863 0% 1006 4975 100% 71% 0.29
3 2019 0 2019 0% 1006 6994 100% 100% 0.00
LS0tDQp0aXRsZTogIkRhdGEgTWluaW5nIC0gUmFuZG9tRm9yZXN0IG1vZGVsIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KYGBge3IsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQpsaWJyYXJ5KHJhbmRvbUZvcmVzdCkNCmxpYnJhcnkocnBhcnQpDQpsaWJyYXJ5KHJlYWRyKQ0KbGlicmFyeShjYXJldCkNCmxpYnJhcnkoZGF0YS50YWJsZSkNCmxpYnJhcnkoc2NhbGVzKQ0KbGlicmFyeShST0NSKQ0KbGlicmFyeShpbmVxKQ0KbGlicmFyeShrbml0cikNCmBgYA0KDQoNCmBgYHtyfQ0KDQpmdWxsZGF0YSA8LSBhcy5kYXRhLmZyYW1lKHJlYWRfY3N2KCJEOi9QRyBCdXNpbmVzcyBBbmFseXRpY3MvRE0vR3JvdXAgQXNzaWdubWVudC9QTF9YU0VMTC5jc3YiKSkNCmhlYWQoZnVsbGRhdGEpDQpzdW1tYXJ5KGZ1bGxkYXRhKQ0KDQpgYGANCg0KDQpgYGB7cn0NCmQxPC1mdWxsZGF0YVssLWMoMSw0LDYsNywxMCwxMSldDQpzdW1tYXJ5KGQxKQ0Kc3RyKGQxKQ0KYGBgDQoNCg0KYGBge3J9DQpkMSRNQUxFPC1pZmVsc2UoZnVsbGRhdGEkR0VOREVSPT0nTScsMSwwKQ0KZDEkRkVNQUxFPC1pZmVsc2UoZnVsbGRhdGEkR0VOREVSPT0nRicsMSwwKQ0KYGBgDQoNCg0KYGBge3J9DQp0cmFpbkluZDwtY3JlYXRlRGF0YVBhcnRpdGlvbihkMSRUQVJHRVQscD0uNixsaXN0PUZBTFNFKQ0KDQp0cmFpbjwtZDFbdHJhaW5JbmQsXQ0KdGVzdDwtZDFbLXRyYWluSW5kLF0NCm5yb3codHJhaW4pDQpucm93KHRlc3QpDQpgYGANCg0KDQpgYGB7cn0NClJGMSA8LSByYW5kb21Gb3Jlc3QoVEFSR0VUIH4gLiwgZGF0YSA9IHRyYWluLCANCiAgICAgICAgICAgICAgICAgICBudHJlZT0xMDEsIG10cnkgPSAyNCwgbm9kZXNpemUgPSAzMCwNCiAgICAgICAgICAgICAgICAgICBpbXBvcnRhbmNlPVRSVUUpDQoNCg0KUkYxDQpgYGANCg0KDQpgYGB7cn0NClJGMSRlcnIucmF0ZQ0KYGBgDQoNCg0KYGBge3J9DQppbXBWYXIgPC0gcm91bmQocmFuZG9tRm9yZXN0OjppbXBvcnRhbmNlKFJGMSksIDIpDQppbXBWYXJbb3JkZXIoaW1wVmFyWywyXSwgZGVjcmVhc2luZz1UUlVFKSxdDQpgYGANCg0KDQpgYGB7cn0NCnRSRiA8LSB0dW5lUkYoeCA9IGQxWywtYygxKV0sIA0KICAgICAgICAgICAgICB5PWFzLmZhY3RvcihkMSRUQVJHRVQpLA0KICAgICAgICAgICAgICBtdHJ5U3RhcnQgPSA2LCANCiAgICAgICAgICAgICAgbnRyZWVUcnk9MTAxLCANCiAgICAgICAgICAgICAgc3RlcEZhY3RvciA9IDIsIA0KICAgICAgICAgICAgICBpbXByb3ZlID0gMC4wMDEsIA0KICAgICAgICAgICAgICB0cmFjZT1UUlVFLCANCiAgICAgICAgICAgICAgcGxvdCA9IFRSVUUsDQogICAgICAgICAgICAgIGRvQmVzdCA9IFRSVUUsDQogICAgICAgICAgICAgIG5vZGVzaXplID0gMTUwLCANCiAgICAgICAgICAgICAgaW1wb3J0YW5jZT1GQUxTRQ0KKQ0KDQp0UkYkaW1wb3J0YW5jZQ0KYGBgDQoNCg0KYGBge3J9DQp0cmFpbiRwcmVkaWN0LmNsYXNzIDwtIHByZWRpY3QodFJGLCB0cmFpbiwgdHlwZT0iY2xhc3MiKQ0KdHJhaW4kcHJlZGljdC5zY29yZSA8LSBwcmVkaWN0KHRSRiwgdHJhaW4sIHR5cGU9InByb2IiKQ0KaGVhZCh0cmFpbikNCmNsYXNzKHRyYWluJHByZWRpY3Quc2NvcmUpDQpgYGANCg0KDQpgYGB7cn0NCiMgZGVjaWxlIDwtIGZ1bmN0aW9uKHgpew0KIyAgIGRlY2lsZXMgPC0gdmVjdG9yKGxlbmd0aD0xMCkNCiMgICBmb3IgKGkgaW4gc2VxKDAuMSwxLC4xKSl7DQojICAgICBkZWNpbGVzW2kqMTBdIDwtIHF1YW50aWxlKHgsIGksIG5hLnJtPVQpDQojICAgfQ0KIyAgIHJldHVybiAoDQojICAgICBpZmVsc2UoeDxkZWNpbGVzWzFdLCAxLA0KIyAgICAgICAgICAgIGlmZWxzZSh4PGRlY2lsZXNbMl0sIDIsDQojICAgICAgICAgICAgICAgICAgIGlmZWxzZSh4PGRlY2lsZXNbM10sIDMsDQojICAgICAgICAgICAgICAgICAgICAgICAgICBpZmVsc2UoeDxkZWNpbGVzWzRdLCA0LA0KIyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmZWxzZSh4PGRlY2lsZXNbNV0sIDUsDQojICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmZWxzZSh4PGRlY2lsZXNbNl0sIDYsDQojICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBpZmVsc2UoeDxkZWNpbGVzWzddLCA3LA0KIyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmZWxzZSh4PGRlY2lsZXNbOF0sIDgsDQojICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmZWxzZSh4PGRlY2lsZXNbOV0sIDksIDEwDQojICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICkpKSkpKSkpKSkNCiMgfQ0KdHJhaW4kZGVjaWxlcyA8LSBkZWNpbGUodHJhaW4kcHJlZGljdC5zY29yZVssMl0pDQpgYGANCg0KDQpgYGB7cn0NCnRtcF9EVCA9IGRhdGEudGFibGUodHJhaW4pDQojIHJhbmsgPC0gdG1wX0RUWywgbGlzdCgNCiMgICBjbnQgPSBsZW5ndGgoVEFSR0VUKSwgDQojICAgY250X3Jlc3AgPSBzdW0oVEFSR0VUKSwgDQojICAgY250X25vbl9yZXNwID0gc3VtKFRBUkdFVCA9PSAwKSkgLCANCiMgICBieT1kZWNpbGVzXVtvcmRlcigtZGVjaWxlcyldDQpyYW5rJHJyYXRlIDwtIHJvdW5kIChyYW5rJGNudF9yZXNwIC8gcmFuayRjbnQsMik7DQpyYW5rJGN1bV9yZXNwIDwtIGN1bXN1bShyYW5rJGNudF9yZXNwKQ0KcmFuayRjdW1fbm9uX3Jlc3AgPC0gY3Vtc3VtKHJhbmskY250X25vbl9yZXNwKQ0KcmFuayRjdW1fcmVsX3Jlc3AgPC0gcm91bmQocmFuayRjdW1fcmVzcCAvIHN1bShyYW5rJGNudF9yZXNwKSwyKTsNCnJhbmskY3VtX3JlbF9ub25fcmVzcCA8LSByb3VuZChyYW5rJGN1bV9ub25fcmVzcCAvIHN1bShyYW5rJGNudF9ub25fcmVzcCksMik7DQpyYW5rJGtzIDwtIGFicyhyYW5rJGN1bV9yZWxfcmVzcCAtIHJhbmskY3VtX3JlbF9ub25fcmVzcCk7DQpgYGANCg0KYGBge3J9DQpyYW5rJHJyYXRlIDwtIHBlcmNlbnQocmFuayRycmF0ZSkNCnJhbmskY3VtX3JlbF9yZXNwIDwtIHBlcmNlbnQocmFuayRjdW1fcmVsX3Jlc3ApDQpyYW5rJGN1bV9yZWxfbm9uX3Jlc3AgPC0gcGVyY2VudChyYW5rJGN1bV9yZWxfbm9uX3Jlc3ApDQpyYW5rJGtzDQpgYGANCg0KYGBge3J9DQp0cmFpbl9yYW5rPC1hcy5kYXRhLmZyYW1lKHJhbmspDQprYWJsZSh0cmFpbl9yYW5rKQ0KYGBgDQoNCmBgYHtyfQ0Kc3VtKHRyYWluJFRBUkdFVCkgLyBucm93KHRyYWluKQ0KYGBgDQoNCmBgYHtyfQ0KcHJlZCA8LSBwcmVkaWN0aW9uKHRyYWluJHByZWRpY3Quc2NvcmVbLDJdLCB0cmFpbiRUQVJHRVQpDQpwZXJmIDwtIHBlcmZvcm1hbmNlKHByZWQsICJ0cHIiLCAiZnByIikNCnBsb3QocGVyZikNCktTIDwtIG1heChhdHRyKHBlcmYsICd5LnZhbHVlcycpW1sxXV0tYXR0cihwZXJmLCAneC52YWx1ZXMnKVtbMV1dKQ0KS1MNCmBgYA0KDQoNCmBgYHtyfQ0KYXVjIDwtIHBlcmZvcm1hbmNlKHByZWQsImF1YyIpOyANCmF1YyA8LSBhcy5udW1lcmljKGF1Y0B5LnZhbHVlcykNCmF1Yw0KYGBgDQoNCg0KYGBge3J9DQpnaW5pID0gaW5lcSh0cmFpbiRwcmVkaWN0LnNjb3JlWywyXSwgdHlwZT0iR2luaSIpDQpnaW5pDQpgYGANCg0KYGBge3J9DQp3aXRoKHRyYWluLCB0YWJsZShUQVJHRVQsIHByZWRpY3QuY2xhc3MpKQ0KYGBgDQoNCg0KYGBge3J9DQp0ZXN0JHByZWRpY3QuY2xhc3MgPC0gcHJlZGljdCh0UkYsIHRlc3QsIHR5cGU9ImNsYXNzIikNCnRlc3QkcHJlZGljdC5zY29yZSA8LSBwcmVkaWN0KHRSRiwgdGVzdCwgdHlwZT0icHJvYiIpDQoNCnRlc3QkZGVjaWxlcyA8LSBkZWNpbGUodGVzdCRwcmVkaWN0LnNjb3JlWywyXSkNCg0KdG1wX0RUID0gZGF0YS50YWJsZSh0ZXN0KQ0KaF9yYW5rIDwtIHRtcF9EVFssIGxpc3QoDQogIGNudCA9IGxlbmd0aChUQVJHRVQpLCANCiAgY250X3Jlc3AgPSBzdW0oVEFSR0VUKSwgDQogIGNudF9ub25fcmVzcCA9IHN1bShUQVJHRVQgPT0gMCkpICwgDQogIGJ5PWRlY2lsZXNdW29yZGVyKC1kZWNpbGVzKV0NCmhfcmFuayRycmF0ZSA8LSByb3VuZCAoaF9yYW5rJGNudF9yZXNwIC8gaF9yYW5rJGNudCwyKTsNCmhfcmFuayRjdW1fcmVzcCA8LSBjdW1zdW0oaF9yYW5rJGNudF9yZXNwKQ0KaF9yYW5rJGN1bV9ub25fcmVzcCA8LSBjdW1zdW0oaF9yYW5rJGNudF9ub25fcmVzcCkNCmhfcmFuayRjdW1fcmVsX3Jlc3AgPC0gcm91bmQoaF9yYW5rJGN1bV9yZXNwIC8gc3VtKGhfcmFuayRjbnRfcmVzcCksMik7DQpoX3JhbmskY3VtX3JlbF9ub25fcmVzcCA8LSByb3VuZChoX3JhbmskY3VtX25vbl9yZXNwIC8gc3VtKGhfcmFuayRjbnRfbm9uX3Jlc3ApLDIpOw0KaF9yYW5rJGtzIDwtIGFicyhoX3JhbmskY3VtX3JlbF9yZXNwIC0gaF9yYW5rJGN1bV9yZWxfbm9uX3Jlc3ApOw0KDQpoX3JhbmskcnJhdGUgPC0gcGVyY2VudChoX3JhbmskcnJhdGUpDQpoX3JhbmskY3VtX3JlbF9yZXNwIDwtIHBlcmNlbnQoaF9yYW5rJGN1bV9yZWxfcmVzcCkNCmhfcmFuayRjdW1fcmVsX25vbl9yZXNwIDwtIHBlcmNlbnQoaF9yYW5rJGN1bV9yZWxfbm9uX3Jlc3ApDQpgYGANCg0KYGBge3J9DQp0ZXN0X3Jhbms8LWFzLmRhdGEuZnJhbWUoaF9yYW5rKQ0Ka2FibGUodGVzdF9yYW5rKQ0KYGBgDQoNCg0K