This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

########READING CSV###################################################

getwd()
## [1] "D:/GREAT LAKES/DATA MINING"
setwd("D:/GREAT LAKES/DATA MINING/Neural Network")


#Reading data 
data_full = read.table("GROUP ASSIGNMENT QUESTION.csv", sep = ",", header = T)




#######################################################################
library(neuralnet)
library(caret)
## Warning: package 'caret' was built under R version 3.4.3
## Loading required package: lattice
## Loading required package: ggplot2
library(scales)
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
##############Splitting data############################################

## 80% of the sample size
smp_size <- floor(0.8 * nrow(data_full))

## set the seed to make your partition reproductible
set.seed(4)
train_ind <- sample(seq_len(nrow(data_full)), size = smp_size)

Dev_sample <- data_full[train_ind, ]
Hold_sample <- data_full[-train_ind, ]



sum(Dev_sample$TARGET) / nrow(Dev_sample)
## [1] 0.1270625
sum(Hold_sample$TARGET) / nrow(Hold_sample)
## [1] 0.11975
#Response rate for Dev - 12.7%
#Response rate for Hol - 11.97%
#################################################################################

#Converting categorical variables into matrix data

occ.matrix <- model.matrix(~ OCCUPATION - 1, data = Dev_sample)
Dev_sample <- data.frame(Dev_sample, occ.matrix)

Gender.matrix <- model.matrix(~ GENDER - 1, data = Dev_sample)
Dev_sample <- data.frame(Dev_sample, Gender.matrix)

acc.matrix <- model.matrix(~ ACC_TYPE - 1, data = Dev_sample)
Dev_sample <- data.frame(Dev_sample, acc.matrix)


occ.matrix <- model.matrix(~ OCCUPATION - 1, data = Hold_sample)
Hold_sample <- data.frame(Hold_sample, occ.matrix)

Gender.matrix <- model.matrix(~ GENDER - 1, data = Hold_sample)
Hold_sample <- data.frame(Hold_sample, Gender.matrix)

acc.matrix <- model.matrix(~ ACC_TYPE - 1, data = Hold_sample)
Hold_sample <- data.frame(Hold_sample, acc.matrix)


#I have ignored cust ID, Acct Opening data & Random variables
#added the categrical variables as matrix variables

names(Dev_sample)
##  [1] "CUST_ID"                  "TARGET"                  
##  [3] "AGE"                      "GENDER"                  
##  [5] "BALANCE"                  "OCCUPATION"              
##  [7] "AGE_BKT"                  "SCR"                     
##  [9] "HOLDING_PERIOD"           "ACC_TYPE"                
## [11] "ACC_OP_DATE"              "LEN_OF_RLTN_IN_MNTH"     
## [13] "NO_OF_L_CR_TXNS"          "NO_OF_L_DR_TXNS"         
## [15] "TOT_NO_OF_L_TXNS"         "NO_OF_BR_CSH_WDL_DR_TXNS"
## [17] "NO_OF_ATM_DR_TXNS"        "NO_OF_NET_DR_TXNS"       
## [19] "NO_OF_MOB_DR_TXNS"        "NO_OF_CHQ_DR_TXNS"       
## [21] "FLG_HAS_CC"               "AMT_ATM_DR"              
## [23] "AMT_BR_CSH_WDL_DR"        "AMT_CHQ_DR"              
## [25] "AMT_NET_DR"               "AMT_MOB_DR"              
## [27] "AMT_L_DR"                 "FLG_HAS_ANY_CHGS"        
## [29] "AMT_OTH_BK_ATM_USG_CHGS"  "AMT_MIN_BAL_NMC_CHGS"    
## [31] "NO_OF_IW_CHQ_BNC_TXNS"    "NO_OF_OW_CHQ_BNC_TXNS"   
## [33] "AVG_AMT_PER_ATM_TXN"      "AVG_AMT_PER_CSH_WDL_TXN" 
## [35] "AVG_AMT_PER_CHQ_TXN"      "AVG_AMT_PER_NET_TXN"     
## [37] "AVG_AMT_PER_MOB_TXN"      "FLG_HAS_NOMINEE"         
## [39] "FLG_HAS_OLD_LOAN"         "random"                  
## [41] "OCCUPATIONPROF"           "OCCUPATIONSAL"           
## [43] "OCCUPATIONSELF.EMP"       "OCCUPATIONSENP"          
## [45] "GENDERF"                  "GENDERM"                 
## [47] "GENDERO"                  "ACC_TYPECA"              
## [49] "ACC_TYPESA"
c(nrow(Dev_sample), nrow(Hold_sample))
## [1] 16000  4000
str(Dev_sample)
## 'data.frame':    16000 obs. of  49 variables:
##  $ CUST_ID                 : Factor w/ 20000 levels "C1","C10","C100",..: 19195 11898 1333 17586 6294 11789 13868 17602 4492 19475 ...
##  $ TARGET                  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ AGE                     : int  54 25 53 55 32 29 30 51 55 34 ...
##  $ GENDER                  : Factor w/ 3 levels "F","M","O": 2 2 2 2 2 2 1 1 2 1 ...
##  $ BALANCE                 : num  58222 98779 4653 53553 98450 ...
##  $ OCCUPATION              : Factor w/ 4 levels "PROF","SAL","SELF-EMP",..: 1 1 3 3 1 2 2 4 1 2 ...
##  $ AGE_BKT                 : Factor w/ 7 levels "<25",">50","26-30",..: 2 1 2 2 4 3 3 2 2 4 ...
##  $ SCR                     : int  398 272 537 272 305 172 423 571 662 222 ...
##  $ HOLDING_PERIOD          : int  21 21 22 29 8 24 30 23 16 31 ...
##  $ ACC_TYPE                : Factor w/ 2 levels "CA","SA": 2 2 2 2 2 2 1 1 2 1 ...
##  $ ACC_OP_DATE             : Factor w/ 4869 levels "01-01-2000","01-01-2001",..: 2544 3196 263 2109 2360 2987 2842 1053 3803 1107 ...
##  $ LEN_OF_RLTN_IN_MNTH     : int  126 146 208 127 90 39 41 179 156 166 ...
##  $ NO_OF_L_CR_TXNS         : int  8 12 7 2 8 0 35 11 13 11 ...
##  $ NO_OF_L_DR_TXNS         : int  4 3 3 3 5 2 3 2 7 1 ...
##  $ TOT_NO_OF_L_TXNS        : int  12 15 10 5 13 2 38 13 20 12 ...
##  $ NO_OF_BR_CSH_WDL_DR_TXNS: int  3 0 3 1 0 1 3 0 2 1 ...
##  $ NO_OF_ATM_DR_TXNS       : int  1 2 0 1 1 0 0 2 1 0 ...
##  $ NO_OF_NET_DR_TXNS       : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ NO_OF_MOB_DR_TXNS       : int  0 0 0 0 0 1 0 0 0 0 ...
##  $ NO_OF_CHQ_DR_TXNS       : int  0 0 0 1 4 0 0 0 4 0 ...
##  $ FLG_HAS_CC              : int  0 0 0 0 0 1 0 0 0 1 ...
##  $ AMT_ATM_DR              : int  2000 2200 0 19300 13200 0 0 40700 3000 0 ...
##  $ AMT_BR_CSH_WDL_DR       : int  488730 0 87470 761670 0 339790 778200 0 188060 420060 ...
##  $ AMT_CHQ_DR              : int  0 0 0 66460 25630 0 0 0 78350 0 ...
##  $ AMT_NET_DR              : num  0 136078 0 0 0 ...
##  $ AMT_MOB_DR              : int  0 0 0 0 0 159106 0 0 0 0 ...
##  $ AMT_L_DR                : num  490730 138278 87470 847430 38830 ...
##  $ FLG_HAS_ANY_CHGS        : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ AMT_OTH_BK_ATM_USG_CHGS : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ AMT_MIN_BAL_NMC_CHGS    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NO_OF_IW_CHQ_BNC_TXNS   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NO_OF_OW_CHQ_BNC_TXNS   : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ AVG_AMT_PER_ATM_TXN     : num  2000 1100 0 19300 13200 ...
##  $ AVG_AMT_PER_CSH_WDL_TXN : num  162910 0 29157 761670 0 ...
##  $ AVG_AMT_PER_CHQ_TXN     : num  0 0 0 66460 6408 ...
##  $ AVG_AMT_PER_NET_TXN     : num  0 136078 0 0 0 ...
##  $ AVG_AMT_PER_MOB_TXN     : num  0 0 0 0 0 ...
##  $ FLG_HAS_NOMINEE         : int  1 1 1 1 1 0 1 1 1 1 ...
##  $ FLG_HAS_OLD_LOAN        : int  1 0 1 1 1 0 1 0 1 0 ...
##  $ random                  : num  0.5912 0.0085 0.2945 0.2772 0.816 ...
##  $ OCCUPATIONPROF          : num  1 1 0 0 1 0 0 0 1 0 ...
##  $ OCCUPATIONSAL           : num  0 0 0 0 0 1 1 0 0 1 ...
##  $ OCCUPATIONSELF.EMP      : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ OCCUPATIONSENP          : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ GENDERF                 : num  0 0 0 0 0 0 1 1 0 1 ...
##  $ GENDERM                 : num  1 1 1 1 1 1 0 0 1 0 ...
##  $ GENDERO                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ACC_TYPECA              : num  0 0 0 0 0 0 1 1 0 1 ...
##  $ ACC_TYPESA              : num  1 1 1 1 1 1 0 0 1 0 ...
colnames(Dev_sample)
##  [1] "CUST_ID"                  "TARGET"                  
##  [3] "AGE"                      "GENDER"                  
##  [5] "BALANCE"                  "OCCUPATION"              
##  [7] "AGE_BKT"                  "SCR"                     
##  [9] "HOLDING_PERIOD"           "ACC_TYPE"                
## [11] "ACC_OP_DATE"              "LEN_OF_RLTN_IN_MNTH"     
## [13] "NO_OF_L_CR_TXNS"          "NO_OF_L_DR_TXNS"         
## [15] "TOT_NO_OF_L_TXNS"         "NO_OF_BR_CSH_WDL_DR_TXNS"
## [17] "NO_OF_ATM_DR_TXNS"        "NO_OF_NET_DR_TXNS"       
## [19] "NO_OF_MOB_DR_TXNS"        "NO_OF_CHQ_DR_TXNS"       
## [21] "FLG_HAS_CC"               "AMT_ATM_DR"              
## [23] "AMT_BR_CSH_WDL_DR"        "AMT_CHQ_DR"              
## [25] "AMT_NET_DR"               "AMT_MOB_DR"              
## [27] "AMT_L_DR"                 "FLG_HAS_ANY_CHGS"        
## [29] "AMT_OTH_BK_ATM_USG_CHGS"  "AMT_MIN_BAL_NMC_CHGS"    
## [31] "NO_OF_IW_CHQ_BNC_TXNS"    "NO_OF_OW_CHQ_BNC_TXNS"   
## [33] "AVG_AMT_PER_ATM_TXN"      "AVG_AMT_PER_CSH_WDL_TXN" 
## [35] "AVG_AMT_PER_CHQ_TXN"      "AVG_AMT_PER_NET_TXN"     
## [37] "AVG_AMT_PER_MOB_TXN"      "FLG_HAS_NOMINEE"         
## [39] "FLG_HAS_OLD_LOAN"         "random"                  
## [41] "OCCUPATIONPROF"           "OCCUPATIONSAL"           
## [43] "OCCUPATIONSELF.EMP"       "OCCUPATIONSENP"          
## [45] "GENDERF"                  "GENDERM"                 
## [47] "GENDERO"                  "ACC_TYPECA"              
## [49] "ACC_TYPESA"
#creating subset
x <- subset(Dev_sample[-c(1,2,4,6,7,10,11,40)])

#Scaling them into development data
rf.Dev_sample <- scale(x)
#attaching target into our scaled data
rf.devscaled <- cbind(Dev_sample[2], rf.Dev_sample)

View(rf.devscaled)
str(rf.devscaled)
## 'data.frame':    16000 obs. of  42 variables:
##  $ TARGET                  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ AGE                     : num  1.634 -1.393 1.529 1.738 -0.662 ...
##  $ BALANCE                 : num  -0.625 -0.568 -0.699 -0.631 -0.569 ...
##  $ SCR                     : num  -0.17 -0.674 0.386 -0.674 -0.542 ...
##  $ HOLDING_PERIOD          : num  0.696 0.696 0.812 1.622 -0.809 ...
##  $ LEN_OF_RLTN_IN_MNTH     : num  0.0157 0.3794 1.5071 0.0339 -0.6391 ...
##  $ NO_OF_L_CR_TXNS         : num  -0.3586 -0.0287 -0.4411 -0.8535 -0.3586 ...
##  $ NO_OF_L_DR_TXNS         : num  -0.342 -0.471 -0.471 -0.471 -0.212 ...
##  $ TOT_NO_OF_L_TXNS        : num  -0.393 -0.224 -0.505 -0.787 -0.336 ...
##  $ NO_OF_BR_CSH_WDL_DR_TXNS: num  0.492 -0.835 0.492 -0.392 -0.835 ...
##  $ NO_OF_ATM_DR_TXNS       : num  -0.021 0.638 -0.68 -0.021 -0.021 ...
##  $ NO_OF_NET_DR_TXNS       : num  -0.4872 -0.0727 -0.4872 -0.4872 -0.4872 ...
##  $ NO_OF_MOB_DR_TXNS       : num  -0.204 -0.204 -0.204 -0.204 -0.204 ...
##  $ NO_OF_CHQ_DR_TXNS       : num  -0.895 -0.895 -0.895 -0.474 0.789 ...
##  $ FLG_HAS_CC              : num  -0.661 -0.661 -0.661 -0.661 -0.661 ...
##  $ AMT_ATM_DR              : num  -0.588 -0.575 -0.719 0.543 0.144 ...
##  $ AMT_BR_CSH_WDL_DR       : num  0.331 -1.144 -0.88 1.155 -1.144 ...
##  $ AMT_CHQ_DR              : num  -0.283 -0.283 -0.283 -0.129 -0.224 ...
##  $ AMT_NET_DR              : num  -0.746 -0.32 -0.746 -0.746 -0.746 ...
##  $ AMT_MOB_DR              : num  -0.451 -0.451 -0.451 -0.451 -0.451 ...
##  $ AMT_L_DR                : num  -0.399 -0.897 -0.969 0.105 -1.038 ...
##  $ FLG_HAS_ANY_CHGS        : num  -0.353 2.833 -0.353 -0.353 -0.353 ...
##  $ AMT_OTH_BK_ATM_USG_CHGS : num  -0.078 -0.078 -0.078 -0.078 -0.078 ...
##  $ AMT_MIN_BAL_NMC_CHGS    : num  -0.0887 -0.0887 -0.0887 -0.0887 -0.0887 ...
##  $ NO_OF_IW_CHQ_BNC_TXNS   : num  -0.211 -0.211 -0.211 -0.211 -0.211 ...
##  $ NO_OF_OW_CHQ_BNC_TXNS   : num  -0.217 -0.217 -0.217 -0.217 -0.217 ...
##  $ AVG_AMT_PER_ATM_TXN     : num  -0.754 -0.878 -1.031 1.647 0.8 ...
##  $ AVG_AMT_PER_CSH_WDL_TXN : num  -0.296 -0.899 -0.791 1.923 -0.899 ...
##  $ AVG_AMT_PER_CHQ_TXN     : num  -0.506 -0.506 -0.506 0.848 -0.376 ...
##  $ AVG_AMT_PER_NET_TXN     : num  -0.637 -0.155 -0.637 -0.637 -0.637 ...
##  $ AVG_AMT_PER_MOB_TXN     : num  -0.434 -0.434 -0.434 -0.434 -0.434 ...
##  $ FLG_HAS_NOMINEE         : num  0.332 0.332 0.332 0.332 0.332 ...
##  $ FLG_HAS_OLD_LOAN        : num  1.018 -0.982 1.018 1.018 1.018 ...
##  $ OCCUPATIONPROF          : num  1.619 1.619 -0.618 -0.618 1.619 ...
##  $ OCCUPATIONSAL           : num  -0.64 -0.64 -0.64 -0.64 -0.64 ...
##  $ OCCUPATIONSELF.EMP      : num  -0.464 -0.464 2.157 2.157 -0.464 ...
##  $ OCCUPATIONSENP          : num  -0.587 -0.587 -0.587 -0.587 -0.587 ...
##  $ GENDERF                 : num  -0.609 -0.609 -0.609 -0.609 -0.609 ...
##  $ GENDERM                 : num  0.624 0.624 0.624 0.624 0.624 ...
##  $ GENDERO                 : num  -0.0976 -0.0976 -0.0976 -0.0976 -0.0976 ...
##  $ ACC_TYPECA              : num  -0.518 -0.518 -0.518 -0.518 -0.518 ...
##  $ ACC_TYPESA              : num  0.518 0.518 0.518 0.518 0.518 ...
#all are numeric variables
############################  Model creation###################################
?randomForest
## starting httpd help server ...
##  done
#ntree - Just give an approx number
#mtry - aprrox sqrt of number of variables
#nodesize - 1% of number of observations

aforest <- randomForest(as.factor(TARGET)~ . ,data = rf.devscaled,
                        ntree=101, mtry = 7 , nodesize = 150,importance=TRUE)
plot(aforest)

#OB error is reducing upto 20 then it is flat
#Therefore, optimal number of trees in some where b/w 10-25
#Lets now see the tablular way of seeing the OBC error

attributes(aforest)
## $names
##  [1] "call"            "type"            "predicted"      
##  [4] "err.rate"        "confusion"       "votes"          
##  [7] "oob.times"       "classes"         "importance"     
## [10] "importanceSD"    "localImportance" "proximity"      
## [13] "ntree"           "mtry"            "forest"         
## [16] "y"               "test"            "inbag"          
## [19] "terms"          
## 
## $class
## [1] "randomForest.formula" "randomForest"
aforest$err.rate
##              OOB            0         1
##   [1,] 0.1326844 2.424242e-02 0.8812416
##   [2,] 0.1293641 2.141328e-02 0.8743842
##   [3,] 0.1271257 1.821824e-02 0.8822751
##   [4,] 0.1273701 1.582575e-02 0.9003538
##   [5,] 0.1282123 1.471173e-02 0.9111355
##   [6,] 0.1280528 1.299694e-02 0.9176285
##   [7,] 0.1274644 1.058674e-02 0.9289366
##   [8,] 0.1279739 9.158851e-03 0.9436620
##   [9,] 0.1267373 7.416024e-03 0.9460809
##  [10,] 0.1264585 5.925712e-03 0.9533961
##  [11,] 0.1263515 5.328725e-03 0.9579416
##  [12,] 0.1267093 5.245760e-03 0.9610069
##  [13,] 0.1255872 4.161883e-03 0.9595860
##  [14,] 0.1259936 3.441600e-03 0.9679803
##  [15,] 0.1262901 3.009890e-03 0.9724545
##  [16,] 0.1253673 2.220630e-03 0.9709788
##  [17,] 0.1253595 2.220471e-03 0.9709788
##  [18,] 0.1245936 1.432357e-03 0.9704870
##  [19,] 0.1248828 1.432049e-03 0.9729464
##  [20,] 0.1239452 9.308320e-04 0.9690113
##  [21,] 0.1243828 8.592296e-04 0.9729464
##  [22,] 0.1248203 8.592296e-04 0.9763896
##  [23,] 0.1250078 6.444222e-04 0.9793409
##  [24,] 0.1248828 6.444222e-04 0.9783571
##  [25,] 0.1244375 4.295840e-04 0.9763896
##  [26,] 0.1245000 2.147920e-04 0.9783571
##  [27,] 0.1243125 2.147920e-04 0.9768815
##  [28,] 0.1245625 3.579867e-04 0.9778652
##  [29,] 0.1248750 4.295840e-04 0.9798328
##  [30,] 0.1248750 2.863893e-04 0.9808165
##  [31,] 0.1246875 2.863893e-04 0.9793409
##  [32,] 0.1245000 2.147920e-04 0.9783571
##  [33,] 0.1245625 2.147920e-04 0.9788490
##  [34,] 0.1243750 2.147920e-04 0.9773733
##  [35,] 0.1245625 2.147920e-04 0.9788490
##  [36,] 0.1246250 2.147920e-04 0.9793409
##  [37,] 0.1246875 1.431947e-04 0.9803246
##  [38,] 0.1248125 1.431947e-04 0.9813084
##  [39,] 0.1248125 1.431947e-04 0.9813084
##  [40,] 0.1247500 1.431947e-04 0.9808165
##  [41,] 0.1248125 1.431947e-04 0.9813084
##  [42,] 0.1248750 1.431947e-04 0.9818003
##  [43,] 0.1249375 1.431947e-04 0.9822922
##  [44,] 0.1251875 1.431947e-04 0.9842597
##  [45,] 0.1251250 7.159734e-05 0.9842597
##  [46,] 0.1255000 1.431947e-04 0.9867191
##  [47,] 0.1253125 1.431947e-04 0.9852435
##  [48,] 0.1252500 2.147920e-04 0.9842597
##  [49,] 0.1251250 7.159734e-05 0.9842597
##  [50,] 0.1251250 1.431947e-04 0.9837678
##  [51,] 0.1251875 7.159734e-05 0.9847516
##  [52,] 0.1253750 7.159734e-05 0.9862273
##  [53,] 0.1253750 7.159734e-05 0.9862273
##  [54,] 0.1253125 7.159734e-05 0.9857354
##  [55,] 0.1252500 7.159734e-05 0.9852435
##  [56,] 0.1254375 7.159734e-05 0.9867191
##  [57,] 0.1253750 7.159734e-05 0.9862273
##  [58,] 0.1254375 7.159734e-05 0.9867191
##  [59,] 0.1255000 7.159734e-05 0.9872110
##  [60,] 0.1255625 7.159734e-05 0.9877029
##  [61,] 0.1255625 7.159734e-05 0.9877029
##  [62,] 0.1255000 7.159734e-05 0.9872110
##  [63,] 0.1253750 0.000000e+00 0.9867191
##  [64,] 0.1255000 7.159734e-05 0.9872110
##  [65,] 0.1255625 0.000000e+00 0.9881948
##  [66,] 0.1254375 0.000000e+00 0.9872110
##  [67,] 0.1255000 0.000000e+00 0.9877029
##  [68,] 0.1255625 0.000000e+00 0.9881948
##  [69,] 0.1256250 0.000000e+00 0.9886867
##  [70,] 0.1256250 0.000000e+00 0.9886867
##  [71,] 0.1255625 0.000000e+00 0.9881948
##  [72,] 0.1256875 0.000000e+00 0.9891786
##  [73,] 0.1256875 0.000000e+00 0.9891786
##  [74,] 0.1256250 0.000000e+00 0.9886867
##  [75,] 0.1256250 0.000000e+00 0.9886867
##  [76,] 0.1256875 0.000000e+00 0.9891786
##  [77,] 0.1257500 0.000000e+00 0.9896704
##  [78,] 0.1257500 0.000000e+00 0.9896704
##  [79,] 0.1258750 7.159734e-05 0.9901623
##  [80,] 0.1259375 7.159734e-05 0.9906542
##  [81,] 0.1259375 0.000000e+00 0.9911461
##  [82,] 0.1259375 0.000000e+00 0.9911461
##  [83,] 0.1258750 0.000000e+00 0.9906542
##  [84,] 0.1258750 0.000000e+00 0.9906542
##  [85,] 0.1258750 0.000000e+00 0.9906542
##  [86,] 0.1259375 0.000000e+00 0.9911461
##  [87,] 0.1258750 0.000000e+00 0.9906542
##  [88,] 0.1259375 0.000000e+00 0.9911461
##  [89,] 0.1258750 0.000000e+00 0.9906542
##  [90,] 0.1258750 0.000000e+00 0.9906542
##  [91,] 0.1258125 0.000000e+00 0.9901623
##  [92,] 0.1258125 0.000000e+00 0.9901623
##  [93,] 0.1258750 0.000000e+00 0.9906542
##  [94,] 0.1258750 0.000000e+00 0.9906542
##  [95,] 0.1258750 0.000000e+00 0.9906542
##  [96,] 0.1260000 0.000000e+00 0.9916380
##  [97,] 0.1260625 0.000000e+00 0.9921299
##  [98,] 0.1258750 0.000000e+00 0.9906542
##  [99,] 0.1259375 0.000000e+00 0.9911461
## [100,] 0.1258750 0.000000e+00 0.9906542
## [101,] 0.1258125 0.000000e+00 0.9901623
print(aforest)
## 
## Call:
##  randomForest(formula = as.factor(TARGET) ~ ., data = rf.devscaled,      ntree = 101, mtry = 7, nodesize = 150, importance = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 101
## No. of variables tried at each split: 7
## 
##         OOB estimate of  error rate: 12.58%
## Confusion matrix:
##       0  1 class.error
## 0 13967  0   0.0000000
## 1  2013 20   0.9901623
#Out of Bag error is 11.94%.
#we can use a tune RF function to identify which is the optimum mtry

## Tuning Random Forest

#While doing tune RF, we have to remove dependent variable in command.
#mtry start will be sqrt of number of variables

tRF <- tuneRF(x = rf.devscaled[,-1], 
              y=as.factor(rf.devscaled$TARGET),
              mtryStart = 7, 
              ntreeTry=101, 
              stepFactor = 1.5, 
              improve = 0.001, 
              trace=T, 
              plot = T,
              doBest = TRUE,
              nodesize = 150, 
              importance=T
)
## mtry = 7  OOB error = 12.57% 
## Searching left ...
## mtry = 5     OOB error = 12.68% 
## -0.008453506 0.001 
## Searching right ...
## mtry = 10    OOB error = 12.46% 
## 0.008950771 0.001 
## mtry = 15    OOB error = 12.24% 
## 0.01705971 0.001 
## mtry = 22    OOB error = 12.11% 
## 0.01123022 0.001 
## mtry = 33    OOB error = 12.09% 
## 0.001032525 0.001 
## mtry = 41    OOB error = 12.09% 
## 0 0.001

#Looks like there is 41 tree which is optimum
#therefore, we will take mtry as 41. 

tRF$importance
##                                      0             1 MeanDecreaseAccuracy
## AGE                       6.335827e-04  3.756007e-03         1.030114e-03
## BALANCE                   1.903333e-03  2.643940e-02         5.015197e-03
## SCR                       2.544291e-03  2.371110e-02         5.230994e-03
## HOLDING_PERIOD            1.228188e-02  1.707297e-02         1.289275e-02
## LEN_OF_RLTN_IN_MNTH       1.022907e-03  5.603383e-03         1.603964e-03
## NO_OF_L_CR_TXNS           2.640135e-02 -1.103298e-02         2.163542e-02
## NO_OF_L_DR_TXNS           7.672277e-02 -2.151887e-02         6.424058e-02
## TOT_NO_OF_L_TXNS          4.453127e-02 -3.361879e-02         3.460826e-02
## NO_OF_BR_CSH_WDL_DR_TXNS  1.055151e-03  1.162451e-03         1.069983e-03
## NO_OF_ATM_DR_TXNS         2.120783e-02 -1.145247e-02         1.704674e-02
## NO_OF_NET_DR_TXNS         5.252337e-04 -8.315427e-05         4.477746e-04
## NO_OF_MOB_DR_TXNS         1.848496e-04 -1.278774e-04         1.457270e-04
## NO_OF_CHQ_DR_TXNS         4.170308e-03 -1.377150e-03         3.460624e-03
## FLG_HAS_CC                2.112033e-03  2.066809e-02         4.466828e-03
## AMT_ATM_DR                8.787073e-03 -1.123408e-03         7.520541e-03
## AMT_BR_CSH_WDL_DR         4.679923e-03  1.808252e-03         4.310720e-03
## AMT_CHQ_DR                6.459411e-03  2.529663e-04         5.673287e-03
## AMT_NET_DR                2.104578e-03  4.011741e-05         1.841814e-03
## AMT_MOB_DR                1.162179e-03  2.704481e-04         1.047068e-03
## AMT_L_DR                  1.826242e-02 -7.972873e-03         1.492881e-02
## FLG_HAS_ANY_CHGS          4.038065e-05  1.976710e-04         6.012969e-05
## AMT_OTH_BK_ATM_USG_CHGS  -3.889896e-06  2.176646e-05        -6.793510e-07
## AMT_MIN_BAL_NMC_CHGS      1.364304e-05  1.001183e-04         2.448842e-05
## NO_OF_IW_CHQ_BNC_TXNS     9.557092e-05  6.597972e-04         1.671386e-04
## NO_OF_OW_CHQ_BNC_TXNS     3.796836e-05  5.683274e-04         1.048230e-04
## AVG_AMT_PER_ATM_TXN       7.082396e-03 -3.614369e-04         6.129520e-03
## AVG_AMT_PER_CSH_WDL_TXN   4.193803e-03  1.945881e-05         3.661726e-03
## AVG_AMT_PER_CHQ_TXN       8.936356e-03 -3.588654e-03         7.344762e-03
## AVG_AMT_PER_NET_TXN       2.270607e-03  7.813902e-04         2.079548e-03
## AVG_AMT_PER_MOB_TXN       1.072694e-03  2.850963e-03         1.298298e-03
## FLG_HAS_NOMINEE          -4.702877e-06  1.073480e-04         9.818572e-06
## FLG_HAS_OLD_LOAN          3.552343e-06  6.255142e-05         1.093303e-05
## OCCUPATIONPROF            2.599109e-05  1.528876e-04         4.210797e-05
## OCCUPATIONSAL             2.090056e-04  1.865403e-04         2.062378e-04
## OCCUPATIONSELF.EMP        1.971545e-03  2.425944e-02         4.798473e-03
## OCCUPATIONSENP            1.173029e-04  2.794916e-04         1.381685e-04
## GENDERF                   4.371696e-04 -1.049532e-04         3.693833e-04
## GENDERM                   1.900514e-04  2.365190e-04         1.966502e-04
## GENDERO                   6.648829e-04  3.566780e-03         1.033832e-03
## ACC_TYPECA                6.738917e-04 -2.396671e-04         5.569701e-04
## ACC_TYPESA                6.801632e-04 -1.578754e-04         5.731777e-04
##                          MeanDecreaseGini
## AGE                            22.9532468
## BALANCE                        79.9821038
## SCR                            82.0994967
## HOLDING_PERIOD                 71.0150761
## LEN_OF_RLTN_IN_MNTH            39.4859097
## NO_OF_L_CR_TXNS                62.8620815
## NO_OF_L_DR_TXNS                45.2085057
## TOT_NO_OF_L_TXNS               59.3031735
## NO_OF_BR_CSH_WDL_DR_TXNS        9.9014735
## NO_OF_ATM_DR_TXNS              15.0360908
## NO_OF_NET_DR_TXNS               2.0770474
## NO_OF_MOB_DR_TXNS               0.8441493
## NO_OF_CHQ_DR_TXNS               7.9720741
## FLG_HAS_CC                     33.9288445
## AMT_ATM_DR                     24.3519615
## AMT_BR_CSH_WDL_DR              27.4298134
## AMT_CHQ_DR                     23.6986932
## AMT_NET_DR                     13.5264140
## AMT_MOB_DR                      9.2343786
## AMT_L_DR                       34.3059443
## FLG_HAS_ANY_CHGS                2.6267636
## AMT_OTH_BK_ATM_USG_CHGS         0.1193202
## AMT_MIN_BAL_NMC_CHGS            0.4946426
## NO_OF_IW_CHQ_BNC_TXNS           3.7377585
## NO_OF_OW_CHQ_BNC_TXNS           3.4973976
## AVG_AMT_PER_ATM_TXN            24.0949226
## AVG_AMT_PER_CSH_WDL_TXN        21.7537011
## AVG_AMT_PER_CHQ_TXN            20.2357889
## AVG_AMT_PER_NET_TXN            16.3109029
## AVG_AMT_PER_MOB_TXN            17.5997959
## FLG_HAS_NOMINEE                 1.2832635
## FLG_HAS_OLD_LOAN                1.4894957
## OCCUPATIONPROF                  1.8949551
## OCCUPATIONSAL                   3.1911938
## OCCUPATIONSELF.EMP             44.7451733
## OCCUPATIONSENP                  1.6516624
## GENDERF                         2.8893774
## GENDERM                         2.2009936
## GENDERO                        12.5172943
## ACC_TYPECA                      2.0209313
## ACC_TYPESA                      1.8953312
## List the importance of the variables.
impVar <- round(randomForest::importance(aforest), 2)
impVar[order(impVar[,3], decreasing=TRUE),]
##                              0     1 MeanDecreaseAccuracy MeanDecreaseGini
## SCR                       8.09 11.84                12.06            56.69
## TOT_NO_OF_L_TXNS          9.59 -4.15                10.33            46.51
## NO_OF_L_CR_TXNS           9.52 -3.24                10.04            51.77
## OCCUPATIONSELF.EMP        7.57  9.49                 9.87            31.66
## AMT_L_DR                  9.22 -2.82                 9.80            30.82
## BALANCE                   5.40 10.65                 9.72            47.66
## FLG_HAS_CC                6.56 10.67                 9.46            28.68
## HOLDING_PERIOD            7.09  8.49                 8.99            54.85
## NO_OF_L_DR_TXNS           7.77 -3.30                 7.96            33.90
## GENDERO                   5.80  6.30                 7.15             8.92
## LEN_OF_RLTN_IN_MNTH       5.26  6.43                 7.05            25.44
## AMT_ATM_DR                6.05 -0.84                 6.35            21.55
## NO_OF_ATM_DR_TXNS         5.89 -5.54                 5.86            10.73
## AVG_AMT_PER_ATM_TXN       5.55 -2.68                 5.68            26.59
## AVG_AMT_PER_CSH_WDL_TXN   4.85  2.10                 5.59            20.20
## OCCUPATIONSAL             4.44  4.24                 5.45             5.43
## AGE                       3.49  5.62                 5.33            17.23
## NO_OF_CHQ_DR_TXNS         4.91  0.23                 5.22            11.57
## AVG_AMT_PER_CHQ_TXN       4.78 -1.90                 5.22            16.93
## AMT_CHQ_DR                4.87 -1.45                 5.17            19.18
## AMT_BR_CSH_WDL_DR         3.80  4.52                 5.10            21.90
## AMT_NET_DR                3.91 -1.04                 4.18            12.88
## AMT_MOB_DR                3.71 -1.16                 4.18            12.19
## NO_OF_BR_CSH_WDL_DR_TXNS  3.40  2.43                 4.16             9.74
## AVG_AMT_PER_MOB_TXN       2.55  1.82                 3.46            14.54
## NO_OF_IW_CHQ_BNC_TXNS     1.98  2.48                 3.37             1.99
## NO_OF_NET_DR_TXNS         2.83 -0.33                 3.09             4.67
## ACC_TYPECA                2.51 -0.65                 2.80             2.63
## AVG_AMT_PER_NET_TXN       2.53 -0.09                 2.76            14.24
## OCCUPATIONPROF            2.07  1.41                 2.63             1.94
## FLG_HAS_OLD_LOAN          1.81  2.19                 2.30             2.44
## FLG_HAS_ANY_CHGS          1.05  2.71                 2.21             2.61
## AMT_MIN_BAL_NMC_CHGS      0.24  3.98                 2.04             1.93
## OCCUPATIONSENP            1.61 -0.62                 2.00             1.50
## NO_OF_OW_CHQ_BNC_TXNS     0.25  2.92                 1.99             2.86
## NO_OF_MOB_DR_TXNS         1.81 -1.48                 1.79             1.99
## ACC_TYPESA                1.35 -0.76                 1.51             2.43
## GENDERF                   1.41  0.07                 1.49             4.51
## GENDERM                   1.21  0.52                 1.40             2.49
## FLG_HAS_NOMINEE           0.44  2.62                 1.20             1.13
## AMT_OTH_BK_ATM_USG_CHGS  -1.16  1.00                -0.69             0.25
impVar[order(impVar[,2], decreasing=TRUE),]
##                              0     1 MeanDecreaseAccuracy MeanDecreaseGini
## SCR                       8.09 11.84                12.06            56.69
## FLG_HAS_CC                6.56 10.67                 9.46            28.68
## BALANCE                   5.40 10.65                 9.72            47.66
## OCCUPATIONSELF.EMP        7.57  9.49                 9.87            31.66
## HOLDING_PERIOD            7.09  8.49                 8.99            54.85
## LEN_OF_RLTN_IN_MNTH       5.26  6.43                 7.05            25.44
## GENDERO                   5.80  6.30                 7.15             8.92
## AGE                       3.49  5.62                 5.33            17.23
## AMT_BR_CSH_WDL_DR         3.80  4.52                 5.10            21.90
## OCCUPATIONSAL             4.44  4.24                 5.45             5.43
## AMT_MIN_BAL_NMC_CHGS      0.24  3.98                 2.04             1.93
## NO_OF_OW_CHQ_BNC_TXNS     0.25  2.92                 1.99             2.86
## FLG_HAS_ANY_CHGS          1.05  2.71                 2.21             2.61
## FLG_HAS_NOMINEE           0.44  2.62                 1.20             1.13
## NO_OF_IW_CHQ_BNC_TXNS     1.98  2.48                 3.37             1.99
## NO_OF_BR_CSH_WDL_DR_TXNS  3.40  2.43                 4.16             9.74
## FLG_HAS_OLD_LOAN          1.81  2.19                 2.30             2.44
## AVG_AMT_PER_CSH_WDL_TXN   4.85  2.10                 5.59            20.20
## AVG_AMT_PER_MOB_TXN       2.55  1.82                 3.46            14.54
## OCCUPATIONPROF            2.07  1.41                 2.63             1.94
## AMT_OTH_BK_ATM_USG_CHGS  -1.16  1.00                -0.69             0.25
## GENDERM                   1.21  0.52                 1.40             2.49
## NO_OF_CHQ_DR_TXNS         4.91  0.23                 5.22            11.57
## GENDERF                   1.41  0.07                 1.49             4.51
## AVG_AMT_PER_NET_TXN       2.53 -0.09                 2.76            14.24
## NO_OF_NET_DR_TXNS         2.83 -0.33                 3.09             4.67
## OCCUPATIONSENP            1.61 -0.62                 2.00             1.50
## ACC_TYPECA                2.51 -0.65                 2.80             2.63
## ACC_TYPESA                1.35 -0.76                 1.51             2.43
## AMT_ATM_DR                6.05 -0.84                 6.35            21.55
## AMT_NET_DR                3.91 -1.04                 4.18            12.88
## AMT_MOB_DR                3.71 -1.16                 4.18            12.19
## AMT_CHQ_DR                4.87 -1.45                 5.17            19.18
## NO_OF_MOB_DR_TXNS         1.81 -1.48                 1.79             1.99
## AVG_AMT_PER_CHQ_TXN       4.78 -1.90                 5.22            16.93
## AVG_AMT_PER_ATM_TXN       5.55 -2.68                 5.68            26.59
## AMT_L_DR                  9.22 -2.82                 9.80            30.82
## NO_OF_L_CR_TXNS           9.52 -3.24                10.04            51.77
## NO_OF_L_DR_TXNS           7.77 -3.30                 7.96            33.90
## TOT_NO_OF_L_TXNS          9.59 -4.15                10.33            46.51
## NO_OF_ATM_DR_TXNS         5.89 -5.54                 5.86            10.73
###############################################

#creating model with 41n mtry 
aforest1 <- randomForest(as.factor(TARGET)~ . ,data = rf.devscaled,
                         ntree=101, mtry = 41 , nodesize = 100,importance=TRUE)

print(aforest1)
## 
## Call:
##  randomForest(formula = as.factor(TARGET) ~ ., data = rf.devscaled,      ntree = 101, mtry = 41, nodesize = 100, importance = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 101
## No. of variables tried at each split: 41
## 
##         OOB estimate of  error rate: 11.66%
## Confusion matrix:
##       0   1 class.error
## 0 13911  56 0.004009451
## 1  1809 224 0.889818003
plot(aforest1)

#checking error rate 
aforest1$err.rate
##              OOB           0         1
##   [1,] 0.1316498 0.041545894 0.7411765
##   [2,] 0.1359233 0.045094094 0.7474104
##   [3,] 0.1324235 0.038877299 0.7713731
##   [4,] 0.1310324 0.035069886 0.7873696
##   [5,] 0.1281572 0.031961297 0.7866450
##   [6,] 0.1276255 0.028652200 0.8061812
##   [7,] 0.1257080 0.025956590 0.8101331
##   [8,] 0.1233237 0.023240421 0.8078471
##   [9,] 0.1228550 0.022795135 0.8087868
##  [10,] 0.1228137 0.021416685 0.8179563
##  [11,] 0.1209185 0.018810811 0.8222772
##  [12,] 0.1190282 0.017329402 0.8184965
##  [13,] 0.1208647 0.017082974 0.8338264
##  [14,] 0.1185051 0.015491645 0.8257016
##  [15,] 0.1180629 0.013259748 0.8380108
##  [16,] 0.1185667 0.013109822 0.8430118
##  [17,] 0.1178641 0.012105150 0.8444882
##  [18,] 0.1178567 0.011030728 0.8514511
##  [19,] 0.1182796 0.011387238 0.8524348
##  [20,] 0.1170771 0.009308987 0.8573537
##  [21,] 0.1170698 0.009379923 0.8568618
##  [22,] 0.1175073 0.008807103 0.8642400
##  [23,] 0.1173823 0.008091078 0.8681751
##  [24,] 0.1172573 0.007876271 0.8686670
##  [25,] 0.1175698 0.008019476 0.8701426
##  [26,] 0.1173750 0.007517720 0.8721102
##  [27,] 0.1173750 0.007446123 0.8726021
##  [28,] 0.1175625 0.007302928 0.8750615
##  [29,] 0.1168750 0.006801747 0.8730939
##  [30,] 0.1170000 0.007302928 0.8706345
##  [31,] 0.1172500 0.006873344 0.8755534
##  [32,] 0.1178125 0.007088136 0.8785047
##  [33,] 0.1175625 0.006730150 0.8789966
##  [34,] 0.1175000 0.006515358 0.8799803
##  [35,] 0.1179375 0.006443760 0.8839154
##  [36,] 0.1178750 0.006873344 0.8804722
##  [37,] 0.1176875 0.006443760 0.8819479
##  [38,] 0.1173125 0.006300566 0.8799803
##  [39,] 0.1175625 0.006014176 0.8839154
##  [40,] 0.1175000 0.006515358 0.8799803
##  [41,] 0.1171250 0.006228968 0.8789966
##  [42,] 0.1171250 0.006157371 0.8794884
##  [43,] 0.1172500 0.006443760 0.8785047
##  [44,] 0.1172500 0.006300566 0.8794884
##  [45,] 0.1171250 0.005942579 0.8809641
##  [46,] 0.1171875 0.006085774 0.8804722
##  [47,] 0.1175000 0.006515358 0.8799803
##  [48,] 0.1170625 0.005870982 0.8809641
##  [49,] 0.1171250 0.005799384 0.8819479
##  [50,] 0.1176250 0.005727787 0.8863748
##  [51,] 0.1172500 0.005584592 0.8844073
##  [52,] 0.1170625 0.005369800 0.8844073
##  [53,] 0.1172500 0.005727787 0.8834235
##  [54,] 0.1170000 0.005512995 0.8829316
##  [55,] 0.1166250 0.005369800 0.8809641
##  [56,] 0.1168125 0.005441398 0.8819479
##  [57,] 0.1166250 0.005441398 0.8804722
##  [58,] 0.1168125 0.005369800 0.8824397
##  [59,] 0.1169375 0.005083411 0.8853910
##  [60,] 0.1170000 0.005083411 0.8858829
##  [61,] 0.1166875 0.004940216 0.8844073
##  [62,] 0.1167500 0.004725424 0.8863748
##  [63,] 0.1168125 0.004940216 0.8853910
##  [64,] 0.1166875 0.004868619 0.8848992
##  [65,] 0.1171875 0.004940216 0.8883424
##  [66,] 0.1164375 0.004797022 0.8834235
##  [67,] 0.1167500 0.004940216 0.8848992
##  [68,] 0.1168125 0.004868619 0.8858829
##  [69,] 0.1170625 0.004653827 0.8893261
##  [70,] 0.1166250 0.004868619 0.8844073
##  [71,] 0.1162500 0.004510632 0.8839154
##  [72,] 0.1166250 0.004582230 0.8863748
##  [73,] 0.1166250 0.004725424 0.8853910
##  [74,] 0.1166250 0.004582230 0.8863748
##  [75,] 0.1167500 0.004725424 0.8863748
##  [76,] 0.1166250 0.004367438 0.8878505
##  [77,] 0.1163750 0.004295840 0.8863748
##  [78,] 0.1161250 0.004081048 0.8858829
##  [79,] 0.1166875 0.004439035 0.8878505
##  [80,] 0.1164375 0.004510632 0.8853910
##  [81,] 0.1167500 0.004439035 0.8883424
##  [82,] 0.1167500 0.004510632 0.8878505
##  [83,] 0.1166875 0.004653827 0.8863748
##  [84,] 0.1165000 0.004582230 0.8853910
##  [85,] 0.1163750 0.004152646 0.8873586
##  [86,] 0.1165625 0.004653827 0.8853910
##  [87,] 0.1165625 0.004367438 0.8873586
##  [88,] 0.1163125 0.004152646 0.8868667
##  [89,] 0.1164375 0.004081048 0.8883424
##  [90,] 0.1165000 0.004367438 0.8868667
##  [91,] 0.1161875 0.004295840 0.8848992
##  [92,] 0.1160000 0.004081048 0.8848992
##  [93,] 0.1161250 0.004009451 0.8863748
##  [94,] 0.1161875 0.003937854 0.8873586
##  [95,] 0.1165000 0.004152646 0.8883424
##  [96,] 0.1163750 0.004081048 0.8878505
##  [97,] 0.1165625 0.004081048 0.8893261
##  [98,] 0.1163750 0.003937854 0.8888342
##  [99,] 0.1163125 0.003866256 0.8888342
## [100,] 0.1163125 0.003866256 0.8888342
## [101,] 0.1165625 0.004009451 0.8898180
## deciling code


rf.devscaled$predict.class <- predict(aforest1, rf.devscaled, type="class")

rf.devscaled$predict.score <- predict(aforest1, rf.devscaled, type="prob")

decile <- function(x){
  deciles <- vector(length=10)
  for (i in seq(0.1,1,.1)){
    deciles[i*10] <- quantile(x, i, na.rm=T)
  }
  return (
    ifelse(x<deciles[1], 1,
           ifelse(x<deciles[2], 2,
                  ifelse(x<deciles[3], 3,
                         ifelse(x<deciles[4], 4,
                                ifelse(x<deciles[5], 5,
                                       ifelse(x<deciles[6], 6,
                                              ifelse(x<deciles[7], 7,
                                                     ifelse(x<deciles[8], 8,
                                                            ifelse(x<deciles[9], 9, 10
                                                            ))))))))))
}


rf.devscaled$deciles <- decile(rf.devscaled$predict.score[,2])

summary(as.factor(rf.devscaled$TARGET))
##     0     1 
## 13967  2033
## deciling

??data.table
library(data.table)

tmp_DT = data.table(rf.devscaled)
rank <- tmp_DT[, list(
  cnt = length(TARGET), 
  cnt_resp = sum(TARGET), 
  cnt_non_resp = sum(TARGET == 0)) , 
  by=deciles][order(-deciles)]
rank$rrate <- round (rank$cnt_resp / rank$cnt,2);
rank$cum_resp <- cumsum(rank$cnt_resp)
rank$cum_non_resp <- cumsum(rank$cnt_non_resp)
rank$cum_rel_resp <- round(rank$cum_resp / sum(rank$cnt_resp),2);
rank$cum_rel_non_resp <- round(rank$cum_non_resp / sum(rank$cnt_non_resp),2);
rank$ks <- abs(rank$cum_rel_resp - rank$cum_rel_non_resp);


rank$rrate <- percent(rank$rrate)
rank$cum_rel_resp <- percent(rank$cum_rel_resp)
rank$cum_rel_non_resp <- percent(rank$cum_rel_non_resp)

View(rank)

#KS score is 0.76
###################################################################################

######################Hold out data prediction####################################

y <- subset(Hold_sample[-c(1,2,4,6,7,10,11,40)])


y.scaled <- scale(y)
rf.holdscaled <- cbind(Hold_sample[2], y.scaled)
View(rf.holdscaled)

rf.holdscaled$predict.class <- predict(aforest1, rf.holdscaled, type="class")
rf.holdscaled$predict.score <- predict(aforest1, rf.holdscaled, type="prob")
rf.holdscaled$deciles <- decile(rf.holdscaled$predict.score[,2])


tmp_DT = data.table(rf.holdscaled)
h_rank <- tmp_DT[, list(
  cnt = length(TARGET), 
  cnt_resp = sum(TARGET), 
  cnt_non_resp = sum(TARGET == 0)) , 
  by=deciles][order(-deciles)]
h_rank$rrate <- round (h_rank$cnt_resp / h_rank$cnt,2);
h_rank$cum_resp <- cumsum(h_rank$cnt_resp)
h_rank$cum_non_resp <- cumsum(h_rank$cnt_non_resp)
h_rank$cum_rel_resp <- round(h_rank$cum_resp / sum(h_rank$cnt_resp),2);
h_rank$cum_rel_non_resp <- round(h_rank$cum_non_resp / sum(h_rank$cnt_non_resp),2);
h_rank$ks <- abs(h_rank$cum_rel_resp - h_rank$cum_rel_non_resp);


library(scales)
h_rank$rrate <- percent(h_rank$rrate)
h_rank$cum_rel_resp <- percent(h_rank$cum_rel_resp)
h_rank$cum_rel_non_resp <- percent(h_rank$cum_rel_non_resp)

View(h_rank)
########################################################################################

######################################Other performance parameters#######################
library(ROCR)
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
## 
## Attaching package: 'ROCR'
## The following object is masked from 'package:neuralnet':
## 
##     prediction
pred <- prediction(rf.devscaled$predict.score[,2], rf.devscaled$TARGET)
perf <- performance(pred, "tpr", "fpr")
plot(perf)

KS <- max(attr(perf, 'y.values')[[1]]-attr(perf, 'x.values')[[1]])
KS
## [1] 0.7500795
## Area Under Curve
auc <- performance(pred,"auc"); 
auc <- as.numeric(auc@y.values)
auc
## [1] 0.9421148
## Gini Coefficient
library(ineq)
gini = ineq(rf.devscaled$predict.score[,2], type="Gini")
gini
## [1] 0.7808436
## Classification Error
with(rf.devscaled, table(TARGET, predict.class))
##       predict.class
## TARGET     0     1
##      0 13943    24
##      1  1761   272
##################################################################################

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.