This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
########READING CSV###################################################
getwd()
## [1] "D:/GREAT LAKES/DATA MINING"
setwd("D:/GREAT LAKES/DATA MINING/Neural Network")
#Reading data
data_full = read.table("GROUP ASSIGNMENT QUESTION.csv", sep = ",", header = T)
#######################################################################
library(neuralnet)
library(caret)
## Warning: package 'caret' was built under R version 3.4.3
## Loading required package: lattice
## Loading required package: ggplot2
library(scales)
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
##############Splitting data############################################
## 80% of the sample size
smp_size <- floor(0.8 * nrow(data_full))
## set the seed to make your partition reproductible
set.seed(4)
train_ind <- sample(seq_len(nrow(data_full)), size = smp_size)
Dev_sample <- data_full[train_ind, ]
Hold_sample <- data_full[-train_ind, ]
sum(Dev_sample$TARGET) / nrow(Dev_sample)
## [1] 0.1270625
sum(Hold_sample$TARGET) / nrow(Hold_sample)
## [1] 0.11975
#Response rate for Dev - 12.7%
#Response rate for Hol - 11.97%
#################################################################################
#Converting categorical variables into matrix data
occ.matrix <- model.matrix(~ OCCUPATION - 1, data = Dev_sample)
Dev_sample <- data.frame(Dev_sample, occ.matrix)
Gender.matrix <- model.matrix(~ GENDER - 1, data = Dev_sample)
Dev_sample <- data.frame(Dev_sample, Gender.matrix)
acc.matrix <- model.matrix(~ ACC_TYPE - 1, data = Dev_sample)
Dev_sample <- data.frame(Dev_sample, acc.matrix)
occ.matrix <- model.matrix(~ OCCUPATION - 1, data = Hold_sample)
Hold_sample <- data.frame(Hold_sample, occ.matrix)
Gender.matrix <- model.matrix(~ GENDER - 1, data = Hold_sample)
Hold_sample <- data.frame(Hold_sample, Gender.matrix)
acc.matrix <- model.matrix(~ ACC_TYPE - 1, data = Hold_sample)
Hold_sample <- data.frame(Hold_sample, acc.matrix)
#I have ignored cust ID, Acct Opening data & Random variables
#added the categrical variables as matrix variables
names(Dev_sample)
## [1] "CUST_ID" "TARGET"
## [3] "AGE" "GENDER"
## [5] "BALANCE" "OCCUPATION"
## [7] "AGE_BKT" "SCR"
## [9] "HOLDING_PERIOD" "ACC_TYPE"
## [11] "ACC_OP_DATE" "LEN_OF_RLTN_IN_MNTH"
## [13] "NO_OF_L_CR_TXNS" "NO_OF_L_DR_TXNS"
## [15] "TOT_NO_OF_L_TXNS" "NO_OF_BR_CSH_WDL_DR_TXNS"
## [17] "NO_OF_ATM_DR_TXNS" "NO_OF_NET_DR_TXNS"
## [19] "NO_OF_MOB_DR_TXNS" "NO_OF_CHQ_DR_TXNS"
## [21] "FLG_HAS_CC" "AMT_ATM_DR"
## [23] "AMT_BR_CSH_WDL_DR" "AMT_CHQ_DR"
## [25] "AMT_NET_DR" "AMT_MOB_DR"
## [27] "AMT_L_DR" "FLG_HAS_ANY_CHGS"
## [29] "AMT_OTH_BK_ATM_USG_CHGS" "AMT_MIN_BAL_NMC_CHGS"
## [31] "NO_OF_IW_CHQ_BNC_TXNS" "NO_OF_OW_CHQ_BNC_TXNS"
## [33] "AVG_AMT_PER_ATM_TXN" "AVG_AMT_PER_CSH_WDL_TXN"
## [35] "AVG_AMT_PER_CHQ_TXN" "AVG_AMT_PER_NET_TXN"
## [37] "AVG_AMT_PER_MOB_TXN" "FLG_HAS_NOMINEE"
## [39] "FLG_HAS_OLD_LOAN" "random"
## [41] "OCCUPATIONPROF" "OCCUPATIONSAL"
## [43] "OCCUPATIONSELF.EMP" "OCCUPATIONSENP"
## [45] "GENDERF" "GENDERM"
## [47] "GENDERO" "ACC_TYPECA"
## [49] "ACC_TYPESA"
c(nrow(Dev_sample), nrow(Hold_sample))
## [1] 16000 4000
str(Dev_sample)
## 'data.frame': 16000 obs. of 49 variables:
## $ CUST_ID : Factor w/ 20000 levels "C1","C10","C100",..: 19195 11898 1333 17586 6294 11789 13868 17602 4492 19475 ...
## $ TARGET : int 0 0 0 0 0 0 0 0 0 0 ...
## $ AGE : int 54 25 53 55 32 29 30 51 55 34 ...
## $ GENDER : Factor w/ 3 levels "F","M","O": 2 2 2 2 2 2 1 1 2 1 ...
## $ BALANCE : num 58222 98779 4653 53553 98450 ...
## $ OCCUPATION : Factor w/ 4 levels "PROF","SAL","SELF-EMP",..: 1 1 3 3 1 2 2 4 1 2 ...
## $ AGE_BKT : Factor w/ 7 levels "<25",">50","26-30",..: 2 1 2 2 4 3 3 2 2 4 ...
## $ SCR : int 398 272 537 272 305 172 423 571 662 222 ...
## $ HOLDING_PERIOD : int 21 21 22 29 8 24 30 23 16 31 ...
## $ ACC_TYPE : Factor w/ 2 levels "CA","SA": 2 2 2 2 2 2 1 1 2 1 ...
## $ ACC_OP_DATE : Factor w/ 4869 levels "01-01-2000","01-01-2001",..: 2544 3196 263 2109 2360 2987 2842 1053 3803 1107 ...
## $ LEN_OF_RLTN_IN_MNTH : int 126 146 208 127 90 39 41 179 156 166 ...
## $ NO_OF_L_CR_TXNS : int 8 12 7 2 8 0 35 11 13 11 ...
## $ NO_OF_L_DR_TXNS : int 4 3 3 3 5 2 3 2 7 1 ...
## $ TOT_NO_OF_L_TXNS : int 12 15 10 5 13 2 38 13 20 12 ...
## $ NO_OF_BR_CSH_WDL_DR_TXNS: int 3 0 3 1 0 1 3 0 2 1 ...
## $ NO_OF_ATM_DR_TXNS : int 1 2 0 1 1 0 0 2 1 0 ...
## $ NO_OF_NET_DR_TXNS : int 0 1 0 0 0 0 0 0 0 0 ...
## $ NO_OF_MOB_DR_TXNS : int 0 0 0 0 0 1 0 0 0 0 ...
## $ NO_OF_CHQ_DR_TXNS : int 0 0 0 1 4 0 0 0 4 0 ...
## $ FLG_HAS_CC : int 0 0 0 0 0 1 0 0 0 1 ...
## $ AMT_ATM_DR : int 2000 2200 0 19300 13200 0 0 40700 3000 0 ...
## $ AMT_BR_CSH_WDL_DR : int 488730 0 87470 761670 0 339790 778200 0 188060 420060 ...
## $ AMT_CHQ_DR : int 0 0 0 66460 25630 0 0 0 78350 0 ...
## $ AMT_NET_DR : num 0 136078 0 0 0 ...
## $ AMT_MOB_DR : int 0 0 0 0 0 159106 0 0 0 0 ...
## $ AMT_L_DR : num 490730 138278 87470 847430 38830 ...
## $ FLG_HAS_ANY_CHGS : int 0 1 0 0 0 0 0 0 0 0 ...
## $ AMT_OTH_BK_ATM_USG_CHGS : int 0 0 0 0 0 0 0 0 0 0 ...
## $ AMT_MIN_BAL_NMC_CHGS : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NO_OF_IW_CHQ_BNC_TXNS : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NO_OF_OW_CHQ_BNC_TXNS : int 0 0 0 0 0 0 0 0 1 0 ...
## $ AVG_AMT_PER_ATM_TXN : num 2000 1100 0 19300 13200 ...
## $ AVG_AMT_PER_CSH_WDL_TXN : num 162910 0 29157 761670 0 ...
## $ AVG_AMT_PER_CHQ_TXN : num 0 0 0 66460 6408 ...
## $ AVG_AMT_PER_NET_TXN : num 0 136078 0 0 0 ...
## $ AVG_AMT_PER_MOB_TXN : num 0 0 0 0 0 ...
## $ FLG_HAS_NOMINEE : int 1 1 1 1 1 0 1 1 1 1 ...
## $ FLG_HAS_OLD_LOAN : int 1 0 1 1 1 0 1 0 1 0 ...
## $ random : num 0.5912 0.0085 0.2945 0.2772 0.816 ...
## $ OCCUPATIONPROF : num 1 1 0 0 1 0 0 0 1 0 ...
## $ OCCUPATIONSAL : num 0 0 0 0 0 1 1 0 0 1 ...
## $ OCCUPATIONSELF.EMP : num 0 0 1 1 0 0 0 0 0 0 ...
## $ OCCUPATIONSENP : num 0 0 0 0 0 0 0 1 0 0 ...
## $ GENDERF : num 0 0 0 0 0 0 1 1 0 1 ...
## $ GENDERM : num 1 1 1 1 1 1 0 0 1 0 ...
## $ GENDERO : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ACC_TYPECA : num 0 0 0 0 0 0 1 1 0 1 ...
## $ ACC_TYPESA : num 1 1 1 1 1 1 0 0 1 0 ...
colnames(Dev_sample)
## [1] "CUST_ID" "TARGET"
## [3] "AGE" "GENDER"
## [5] "BALANCE" "OCCUPATION"
## [7] "AGE_BKT" "SCR"
## [9] "HOLDING_PERIOD" "ACC_TYPE"
## [11] "ACC_OP_DATE" "LEN_OF_RLTN_IN_MNTH"
## [13] "NO_OF_L_CR_TXNS" "NO_OF_L_DR_TXNS"
## [15] "TOT_NO_OF_L_TXNS" "NO_OF_BR_CSH_WDL_DR_TXNS"
## [17] "NO_OF_ATM_DR_TXNS" "NO_OF_NET_DR_TXNS"
## [19] "NO_OF_MOB_DR_TXNS" "NO_OF_CHQ_DR_TXNS"
## [21] "FLG_HAS_CC" "AMT_ATM_DR"
## [23] "AMT_BR_CSH_WDL_DR" "AMT_CHQ_DR"
## [25] "AMT_NET_DR" "AMT_MOB_DR"
## [27] "AMT_L_DR" "FLG_HAS_ANY_CHGS"
## [29] "AMT_OTH_BK_ATM_USG_CHGS" "AMT_MIN_BAL_NMC_CHGS"
## [31] "NO_OF_IW_CHQ_BNC_TXNS" "NO_OF_OW_CHQ_BNC_TXNS"
## [33] "AVG_AMT_PER_ATM_TXN" "AVG_AMT_PER_CSH_WDL_TXN"
## [35] "AVG_AMT_PER_CHQ_TXN" "AVG_AMT_PER_NET_TXN"
## [37] "AVG_AMT_PER_MOB_TXN" "FLG_HAS_NOMINEE"
## [39] "FLG_HAS_OLD_LOAN" "random"
## [41] "OCCUPATIONPROF" "OCCUPATIONSAL"
## [43] "OCCUPATIONSELF.EMP" "OCCUPATIONSENP"
## [45] "GENDERF" "GENDERM"
## [47] "GENDERO" "ACC_TYPECA"
## [49] "ACC_TYPESA"
#creating subset
x <- subset(Dev_sample[-c(1,2,4,6,7,10,11,40)])
#Scaling them into development data
rf.Dev_sample <- scale(x)
#attaching target into our scaled data
rf.devscaled <- cbind(Dev_sample[2], rf.Dev_sample)
View(rf.devscaled)
str(rf.devscaled)
## 'data.frame': 16000 obs. of 42 variables:
## $ TARGET : int 0 0 0 0 0 0 0 0 0 0 ...
## $ AGE : num 1.634 -1.393 1.529 1.738 -0.662 ...
## $ BALANCE : num -0.625 -0.568 -0.699 -0.631 -0.569 ...
## $ SCR : num -0.17 -0.674 0.386 -0.674 -0.542 ...
## $ HOLDING_PERIOD : num 0.696 0.696 0.812 1.622 -0.809 ...
## $ LEN_OF_RLTN_IN_MNTH : num 0.0157 0.3794 1.5071 0.0339 -0.6391 ...
## $ NO_OF_L_CR_TXNS : num -0.3586 -0.0287 -0.4411 -0.8535 -0.3586 ...
## $ NO_OF_L_DR_TXNS : num -0.342 -0.471 -0.471 -0.471 -0.212 ...
## $ TOT_NO_OF_L_TXNS : num -0.393 -0.224 -0.505 -0.787 -0.336 ...
## $ NO_OF_BR_CSH_WDL_DR_TXNS: num 0.492 -0.835 0.492 -0.392 -0.835 ...
## $ NO_OF_ATM_DR_TXNS : num -0.021 0.638 -0.68 -0.021 -0.021 ...
## $ NO_OF_NET_DR_TXNS : num -0.4872 -0.0727 -0.4872 -0.4872 -0.4872 ...
## $ NO_OF_MOB_DR_TXNS : num -0.204 -0.204 -0.204 -0.204 -0.204 ...
## $ NO_OF_CHQ_DR_TXNS : num -0.895 -0.895 -0.895 -0.474 0.789 ...
## $ FLG_HAS_CC : num -0.661 -0.661 -0.661 -0.661 -0.661 ...
## $ AMT_ATM_DR : num -0.588 -0.575 -0.719 0.543 0.144 ...
## $ AMT_BR_CSH_WDL_DR : num 0.331 -1.144 -0.88 1.155 -1.144 ...
## $ AMT_CHQ_DR : num -0.283 -0.283 -0.283 -0.129 -0.224 ...
## $ AMT_NET_DR : num -0.746 -0.32 -0.746 -0.746 -0.746 ...
## $ AMT_MOB_DR : num -0.451 -0.451 -0.451 -0.451 -0.451 ...
## $ AMT_L_DR : num -0.399 -0.897 -0.969 0.105 -1.038 ...
## $ FLG_HAS_ANY_CHGS : num -0.353 2.833 -0.353 -0.353 -0.353 ...
## $ AMT_OTH_BK_ATM_USG_CHGS : num -0.078 -0.078 -0.078 -0.078 -0.078 ...
## $ AMT_MIN_BAL_NMC_CHGS : num -0.0887 -0.0887 -0.0887 -0.0887 -0.0887 ...
## $ NO_OF_IW_CHQ_BNC_TXNS : num -0.211 -0.211 -0.211 -0.211 -0.211 ...
## $ NO_OF_OW_CHQ_BNC_TXNS : num -0.217 -0.217 -0.217 -0.217 -0.217 ...
## $ AVG_AMT_PER_ATM_TXN : num -0.754 -0.878 -1.031 1.647 0.8 ...
## $ AVG_AMT_PER_CSH_WDL_TXN : num -0.296 -0.899 -0.791 1.923 -0.899 ...
## $ AVG_AMT_PER_CHQ_TXN : num -0.506 -0.506 -0.506 0.848 -0.376 ...
## $ AVG_AMT_PER_NET_TXN : num -0.637 -0.155 -0.637 -0.637 -0.637 ...
## $ AVG_AMT_PER_MOB_TXN : num -0.434 -0.434 -0.434 -0.434 -0.434 ...
## $ FLG_HAS_NOMINEE : num 0.332 0.332 0.332 0.332 0.332 ...
## $ FLG_HAS_OLD_LOAN : num 1.018 -0.982 1.018 1.018 1.018 ...
## $ OCCUPATIONPROF : num 1.619 1.619 -0.618 -0.618 1.619 ...
## $ OCCUPATIONSAL : num -0.64 -0.64 -0.64 -0.64 -0.64 ...
## $ OCCUPATIONSELF.EMP : num -0.464 -0.464 2.157 2.157 -0.464 ...
## $ OCCUPATIONSENP : num -0.587 -0.587 -0.587 -0.587 -0.587 ...
## $ GENDERF : num -0.609 -0.609 -0.609 -0.609 -0.609 ...
## $ GENDERM : num 0.624 0.624 0.624 0.624 0.624 ...
## $ GENDERO : num -0.0976 -0.0976 -0.0976 -0.0976 -0.0976 ...
## $ ACC_TYPECA : num -0.518 -0.518 -0.518 -0.518 -0.518 ...
## $ ACC_TYPESA : num 0.518 0.518 0.518 0.518 0.518 ...
#all are numeric variables
############################ Model creation###################################
?randomForest
## starting httpd help server ...
## done
#ntree - Just give an approx number
#mtry - aprrox sqrt of number of variables
#nodesize - 1% of number of observations
aforest <- randomForest(as.factor(TARGET)~ . ,data = rf.devscaled,
ntree=101, mtry = 7 , nodesize = 150,importance=TRUE)
plot(aforest)
#OB error is reducing upto 20 then it is flat
#Therefore, optimal number of trees in some where b/w 10-25
#Lets now see the tablular way of seeing the OBC error
attributes(aforest)
## $names
## [1] "call" "type" "predicted"
## [4] "err.rate" "confusion" "votes"
## [7] "oob.times" "classes" "importance"
## [10] "importanceSD" "localImportance" "proximity"
## [13] "ntree" "mtry" "forest"
## [16] "y" "test" "inbag"
## [19] "terms"
##
## $class
## [1] "randomForest.formula" "randomForest"
aforest$err.rate
## OOB 0 1
## [1,] 0.1326844 2.424242e-02 0.8812416
## [2,] 0.1293641 2.141328e-02 0.8743842
## [3,] 0.1271257 1.821824e-02 0.8822751
## [4,] 0.1273701 1.582575e-02 0.9003538
## [5,] 0.1282123 1.471173e-02 0.9111355
## [6,] 0.1280528 1.299694e-02 0.9176285
## [7,] 0.1274644 1.058674e-02 0.9289366
## [8,] 0.1279739 9.158851e-03 0.9436620
## [9,] 0.1267373 7.416024e-03 0.9460809
## [10,] 0.1264585 5.925712e-03 0.9533961
## [11,] 0.1263515 5.328725e-03 0.9579416
## [12,] 0.1267093 5.245760e-03 0.9610069
## [13,] 0.1255872 4.161883e-03 0.9595860
## [14,] 0.1259936 3.441600e-03 0.9679803
## [15,] 0.1262901 3.009890e-03 0.9724545
## [16,] 0.1253673 2.220630e-03 0.9709788
## [17,] 0.1253595 2.220471e-03 0.9709788
## [18,] 0.1245936 1.432357e-03 0.9704870
## [19,] 0.1248828 1.432049e-03 0.9729464
## [20,] 0.1239452 9.308320e-04 0.9690113
## [21,] 0.1243828 8.592296e-04 0.9729464
## [22,] 0.1248203 8.592296e-04 0.9763896
## [23,] 0.1250078 6.444222e-04 0.9793409
## [24,] 0.1248828 6.444222e-04 0.9783571
## [25,] 0.1244375 4.295840e-04 0.9763896
## [26,] 0.1245000 2.147920e-04 0.9783571
## [27,] 0.1243125 2.147920e-04 0.9768815
## [28,] 0.1245625 3.579867e-04 0.9778652
## [29,] 0.1248750 4.295840e-04 0.9798328
## [30,] 0.1248750 2.863893e-04 0.9808165
## [31,] 0.1246875 2.863893e-04 0.9793409
## [32,] 0.1245000 2.147920e-04 0.9783571
## [33,] 0.1245625 2.147920e-04 0.9788490
## [34,] 0.1243750 2.147920e-04 0.9773733
## [35,] 0.1245625 2.147920e-04 0.9788490
## [36,] 0.1246250 2.147920e-04 0.9793409
## [37,] 0.1246875 1.431947e-04 0.9803246
## [38,] 0.1248125 1.431947e-04 0.9813084
## [39,] 0.1248125 1.431947e-04 0.9813084
## [40,] 0.1247500 1.431947e-04 0.9808165
## [41,] 0.1248125 1.431947e-04 0.9813084
## [42,] 0.1248750 1.431947e-04 0.9818003
## [43,] 0.1249375 1.431947e-04 0.9822922
## [44,] 0.1251875 1.431947e-04 0.9842597
## [45,] 0.1251250 7.159734e-05 0.9842597
## [46,] 0.1255000 1.431947e-04 0.9867191
## [47,] 0.1253125 1.431947e-04 0.9852435
## [48,] 0.1252500 2.147920e-04 0.9842597
## [49,] 0.1251250 7.159734e-05 0.9842597
## [50,] 0.1251250 1.431947e-04 0.9837678
## [51,] 0.1251875 7.159734e-05 0.9847516
## [52,] 0.1253750 7.159734e-05 0.9862273
## [53,] 0.1253750 7.159734e-05 0.9862273
## [54,] 0.1253125 7.159734e-05 0.9857354
## [55,] 0.1252500 7.159734e-05 0.9852435
## [56,] 0.1254375 7.159734e-05 0.9867191
## [57,] 0.1253750 7.159734e-05 0.9862273
## [58,] 0.1254375 7.159734e-05 0.9867191
## [59,] 0.1255000 7.159734e-05 0.9872110
## [60,] 0.1255625 7.159734e-05 0.9877029
## [61,] 0.1255625 7.159734e-05 0.9877029
## [62,] 0.1255000 7.159734e-05 0.9872110
## [63,] 0.1253750 0.000000e+00 0.9867191
## [64,] 0.1255000 7.159734e-05 0.9872110
## [65,] 0.1255625 0.000000e+00 0.9881948
## [66,] 0.1254375 0.000000e+00 0.9872110
## [67,] 0.1255000 0.000000e+00 0.9877029
## [68,] 0.1255625 0.000000e+00 0.9881948
## [69,] 0.1256250 0.000000e+00 0.9886867
## [70,] 0.1256250 0.000000e+00 0.9886867
## [71,] 0.1255625 0.000000e+00 0.9881948
## [72,] 0.1256875 0.000000e+00 0.9891786
## [73,] 0.1256875 0.000000e+00 0.9891786
## [74,] 0.1256250 0.000000e+00 0.9886867
## [75,] 0.1256250 0.000000e+00 0.9886867
## [76,] 0.1256875 0.000000e+00 0.9891786
## [77,] 0.1257500 0.000000e+00 0.9896704
## [78,] 0.1257500 0.000000e+00 0.9896704
## [79,] 0.1258750 7.159734e-05 0.9901623
## [80,] 0.1259375 7.159734e-05 0.9906542
## [81,] 0.1259375 0.000000e+00 0.9911461
## [82,] 0.1259375 0.000000e+00 0.9911461
## [83,] 0.1258750 0.000000e+00 0.9906542
## [84,] 0.1258750 0.000000e+00 0.9906542
## [85,] 0.1258750 0.000000e+00 0.9906542
## [86,] 0.1259375 0.000000e+00 0.9911461
## [87,] 0.1258750 0.000000e+00 0.9906542
## [88,] 0.1259375 0.000000e+00 0.9911461
## [89,] 0.1258750 0.000000e+00 0.9906542
## [90,] 0.1258750 0.000000e+00 0.9906542
## [91,] 0.1258125 0.000000e+00 0.9901623
## [92,] 0.1258125 0.000000e+00 0.9901623
## [93,] 0.1258750 0.000000e+00 0.9906542
## [94,] 0.1258750 0.000000e+00 0.9906542
## [95,] 0.1258750 0.000000e+00 0.9906542
## [96,] 0.1260000 0.000000e+00 0.9916380
## [97,] 0.1260625 0.000000e+00 0.9921299
## [98,] 0.1258750 0.000000e+00 0.9906542
## [99,] 0.1259375 0.000000e+00 0.9911461
## [100,] 0.1258750 0.000000e+00 0.9906542
## [101,] 0.1258125 0.000000e+00 0.9901623
print(aforest)
##
## Call:
## randomForest(formula = as.factor(TARGET) ~ ., data = rf.devscaled, ntree = 101, mtry = 7, nodesize = 150, importance = TRUE)
## Type of random forest: classification
## Number of trees: 101
## No. of variables tried at each split: 7
##
## OOB estimate of error rate: 12.58%
## Confusion matrix:
## 0 1 class.error
## 0 13967 0 0.0000000
## 1 2013 20 0.9901623
#Out of Bag error is 11.94%.
#we can use a tune RF function to identify which is the optimum mtry
## Tuning Random Forest
#While doing tune RF, we have to remove dependent variable in command.
#mtry start will be sqrt of number of variables
tRF <- tuneRF(x = rf.devscaled[,-1],
y=as.factor(rf.devscaled$TARGET),
mtryStart = 7,
ntreeTry=101,
stepFactor = 1.5,
improve = 0.001,
trace=T,
plot = T,
doBest = TRUE,
nodesize = 150,
importance=T
)
## mtry = 7 OOB error = 12.57%
## Searching left ...
## mtry = 5 OOB error = 12.68%
## -0.008453506 0.001
## Searching right ...
## mtry = 10 OOB error = 12.46%
## 0.008950771 0.001
## mtry = 15 OOB error = 12.24%
## 0.01705971 0.001
## mtry = 22 OOB error = 12.11%
## 0.01123022 0.001
## mtry = 33 OOB error = 12.09%
## 0.001032525 0.001
## mtry = 41 OOB error = 12.09%
## 0 0.001
#Looks like there is 41 tree which is optimum
#therefore, we will take mtry as 41.
tRF$importance
## 0 1 MeanDecreaseAccuracy
## AGE 6.335827e-04 3.756007e-03 1.030114e-03
## BALANCE 1.903333e-03 2.643940e-02 5.015197e-03
## SCR 2.544291e-03 2.371110e-02 5.230994e-03
## HOLDING_PERIOD 1.228188e-02 1.707297e-02 1.289275e-02
## LEN_OF_RLTN_IN_MNTH 1.022907e-03 5.603383e-03 1.603964e-03
## NO_OF_L_CR_TXNS 2.640135e-02 -1.103298e-02 2.163542e-02
## NO_OF_L_DR_TXNS 7.672277e-02 -2.151887e-02 6.424058e-02
## TOT_NO_OF_L_TXNS 4.453127e-02 -3.361879e-02 3.460826e-02
## NO_OF_BR_CSH_WDL_DR_TXNS 1.055151e-03 1.162451e-03 1.069983e-03
## NO_OF_ATM_DR_TXNS 2.120783e-02 -1.145247e-02 1.704674e-02
## NO_OF_NET_DR_TXNS 5.252337e-04 -8.315427e-05 4.477746e-04
## NO_OF_MOB_DR_TXNS 1.848496e-04 -1.278774e-04 1.457270e-04
## NO_OF_CHQ_DR_TXNS 4.170308e-03 -1.377150e-03 3.460624e-03
## FLG_HAS_CC 2.112033e-03 2.066809e-02 4.466828e-03
## AMT_ATM_DR 8.787073e-03 -1.123408e-03 7.520541e-03
## AMT_BR_CSH_WDL_DR 4.679923e-03 1.808252e-03 4.310720e-03
## AMT_CHQ_DR 6.459411e-03 2.529663e-04 5.673287e-03
## AMT_NET_DR 2.104578e-03 4.011741e-05 1.841814e-03
## AMT_MOB_DR 1.162179e-03 2.704481e-04 1.047068e-03
## AMT_L_DR 1.826242e-02 -7.972873e-03 1.492881e-02
## FLG_HAS_ANY_CHGS 4.038065e-05 1.976710e-04 6.012969e-05
## AMT_OTH_BK_ATM_USG_CHGS -3.889896e-06 2.176646e-05 -6.793510e-07
## AMT_MIN_BAL_NMC_CHGS 1.364304e-05 1.001183e-04 2.448842e-05
## NO_OF_IW_CHQ_BNC_TXNS 9.557092e-05 6.597972e-04 1.671386e-04
## NO_OF_OW_CHQ_BNC_TXNS 3.796836e-05 5.683274e-04 1.048230e-04
## AVG_AMT_PER_ATM_TXN 7.082396e-03 -3.614369e-04 6.129520e-03
## AVG_AMT_PER_CSH_WDL_TXN 4.193803e-03 1.945881e-05 3.661726e-03
## AVG_AMT_PER_CHQ_TXN 8.936356e-03 -3.588654e-03 7.344762e-03
## AVG_AMT_PER_NET_TXN 2.270607e-03 7.813902e-04 2.079548e-03
## AVG_AMT_PER_MOB_TXN 1.072694e-03 2.850963e-03 1.298298e-03
## FLG_HAS_NOMINEE -4.702877e-06 1.073480e-04 9.818572e-06
## FLG_HAS_OLD_LOAN 3.552343e-06 6.255142e-05 1.093303e-05
## OCCUPATIONPROF 2.599109e-05 1.528876e-04 4.210797e-05
## OCCUPATIONSAL 2.090056e-04 1.865403e-04 2.062378e-04
## OCCUPATIONSELF.EMP 1.971545e-03 2.425944e-02 4.798473e-03
## OCCUPATIONSENP 1.173029e-04 2.794916e-04 1.381685e-04
## GENDERF 4.371696e-04 -1.049532e-04 3.693833e-04
## GENDERM 1.900514e-04 2.365190e-04 1.966502e-04
## GENDERO 6.648829e-04 3.566780e-03 1.033832e-03
## ACC_TYPECA 6.738917e-04 -2.396671e-04 5.569701e-04
## ACC_TYPESA 6.801632e-04 -1.578754e-04 5.731777e-04
## MeanDecreaseGini
## AGE 22.9532468
## BALANCE 79.9821038
## SCR 82.0994967
## HOLDING_PERIOD 71.0150761
## LEN_OF_RLTN_IN_MNTH 39.4859097
## NO_OF_L_CR_TXNS 62.8620815
## NO_OF_L_DR_TXNS 45.2085057
## TOT_NO_OF_L_TXNS 59.3031735
## NO_OF_BR_CSH_WDL_DR_TXNS 9.9014735
## NO_OF_ATM_DR_TXNS 15.0360908
## NO_OF_NET_DR_TXNS 2.0770474
## NO_OF_MOB_DR_TXNS 0.8441493
## NO_OF_CHQ_DR_TXNS 7.9720741
## FLG_HAS_CC 33.9288445
## AMT_ATM_DR 24.3519615
## AMT_BR_CSH_WDL_DR 27.4298134
## AMT_CHQ_DR 23.6986932
## AMT_NET_DR 13.5264140
## AMT_MOB_DR 9.2343786
## AMT_L_DR 34.3059443
## FLG_HAS_ANY_CHGS 2.6267636
## AMT_OTH_BK_ATM_USG_CHGS 0.1193202
## AMT_MIN_BAL_NMC_CHGS 0.4946426
## NO_OF_IW_CHQ_BNC_TXNS 3.7377585
## NO_OF_OW_CHQ_BNC_TXNS 3.4973976
## AVG_AMT_PER_ATM_TXN 24.0949226
## AVG_AMT_PER_CSH_WDL_TXN 21.7537011
## AVG_AMT_PER_CHQ_TXN 20.2357889
## AVG_AMT_PER_NET_TXN 16.3109029
## AVG_AMT_PER_MOB_TXN 17.5997959
## FLG_HAS_NOMINEE 1.2832635
## FLG_HAS_OLD_LOAN 1.4894957
## OCCUPATIONPROF 1.8949551
## OCCUPATIONSAL 3.1911938
## OCCUPATIONSELF.EMP 44.7451733
## OCCUPATIONSENP 1.6516624
## GENDERF 2.8893774
## GENDERM 2.2009936
## GENDERO 12.5172943
## ACC_TYPECA 2.0209313
## ACC_TYPESA 1.8953312
## List the importance of the variables.
impVar <- round(randomForest::importance(aforest), 2)
impVar[order(impVar[,3], decreasing=TRUE),]
## 0 1 MeanDecreaseAccuracy MeanDecreaseGini
## SCR 8.09 11.84 12.06 56.69
## TOT_NO_OF_L_TXNS 9.59 -4.15 10.33 46.51
## NO_OF_L_CR_TXNS 9.52 -3.24 10.04 51.77
## OCCUPATIONSELF.EMP 7.57 9.49 9.87 31.66
## AMT_L_DR 9.22 -2.82 9.80 30.82
## BALANCE 5.40 10.65 9.72 47.66
## FLG_HAS_CC 6.56 10.67 9.46 28.68
## HOLDING_PERIOD 7.09 8.49 8.99 54.85
## NO_OF_L_DR_TXNS 7.77 -3.30 7.96 33.90
## GENDERO 5.80 6.30 7.15 8.92
## LEN_OF_RLTN_IN_MNTH 5.26 6.43 7.05 25.44
## AMT_ATM_DR 6.05 -0.84 6.35 21.55
## NO_OF_ATM_DR_TXNS 5.89 -5.54 5.86 10.73
## AVG_AMT_PER_ATM_TXN 5.55 -2.68 5.68 26.59
## AVG_AMT_PER_CSH_WDL_TXN 4.85 2.10 5.59 20.20
## OCCUPATIONSAL 4.44 4.24 5.45 5.43
## AGE 3.49 5.62 5.33 17.23
## NO_OF_CHQ_DR_TXNS 4.91 0.23 5.22 11.57
## AVG_AMT_PER_CHQ_TXN 4.78 -1.90 5.22 16.93
## AMT_CHQ_DR 4.87 -1.45 5.17 19.18
## AMT_BR_CSH_WDL_DR 3.80 4.52 5.10 21.90
## AMT_NET_DR 3.91 -1.04 4.18 12.88
## AMT_MOB_DR 3.71 -1.16 4.18 12.19
## NO_OF_BR_CSH_WDL_DR_TXNS 3.40 2.43 4.16 9.74
## AVG_AMT_PER_MOB_TXN 2.55 1.82 3.46 14.54
## NO_OF_IW_CHQ_BNC_TXNS 1.98 2.48 3.37 1.99
## NO_OF_NET_DR_TXNS 2.83 -0.33 3.09 4.67
## ACC_TYPECA 2.51 -0.65 2.80 2.63
## AVG_AMT_PER_NET_TXN 2.53 -0.09 2.76 14.24
## OCCUPATIONPROF 2.07 1.41 2.63 1.94
## FLG_HAS_OLD_LOAN 1.81 2.19 2.30 2.44
## FLG_HAS_ANY_CHGS 1.05 2.71 2.21 2.61
## AMT_MIN_BAL_NMC_CHGS 0.24 3.98 2.04 1.93
## OCCUPATIONSENP 1.61 -0.62 2.00 1.50
## NO_OF_OW_CHQ_BNC_TXNS 0.25 2.92 1.99 2.86
## NO_OF_MOB_DR_TXNS 1.81 -1.48 1.79 1.99
## ACC_TYPESA 1.35 -0.76 1.51 2.43
## GENDERF 1.41 0.07 1.49 4.51
## GENDERM 1.21 0.52 1.40 2.49
## FLG_HAS_NOMINEE 0.44 2.62 1.20 1.13
## AMT_OTH_BK_ATM_USG_CHGS -1.16 1.00 -0.69 0.25
impVar[order(impVar[,2], decreasing=TRUE),]
## 0 1 MeanDecreaseAccuracy MeanDecreaseGini
## SCR 8.09 11.84 12.06 56.69
## FLG_HAS_CC 6.56 10.67 9.46 28.68
## BALANCE 5.40 10.65 9.72 47.66
## OCCUPATIONSELF.EMP 7.57 9.49 9.87 31.66
## HOLDING_PERIOD 7.09 8.49 8.99 54.85
## LEN_OF_RLTN_IN_MNTH 5.26 6.43 7.05 25.44
## GENDERO 5.80 6.30 7.15 8.92
## AGE 3.49 5.62 5.33 17.23
## AMT_BR_CSH_WDL_DR 3.80 4.52 5.10 21.90
## OCCUPATIONSAL 4.44 4.24 5.45 5.43
## AMT_MIN_BAL_NMC_CHGS 0.24 3.98 2.04 1.93
## NO_OF_OW_CHQ_BNC_TXNS 0.25 2.92 1.99 2.86
## FLG_HAS_ANY_CHGS 1.05 2.71 2.21 2.61
## FLG_HAS_NOMINEE 0.44 2.62 1.20 1.13
## NO_OF_IW_CHQ_BNC_TXNS 1.98 2.48 3.37 1.99
## NO_OF_BR_CSH_WDL_DR_TXNS 3.40 2.43 4.16 9.74
## FLG_HAS_OLD_LOAN 1.81 2.19 2.30 2.44
## AVG_AMT_PER_CSH_WDL_TXN 4.85 2.10 5.59 20.20
## AVG_AMT_PER_MOB_TXN 2.55 1.82 3.46 14.54
## OCCUPATIONPROF 2.07 1.41 2.63 1.94
## AMT_OTH_BK_ATM_USG_CHGS -1.16 1.00 -0.69 0.25
## GENDERM 1.21 0.52 1.40 2.49
## NO_OF_CHQ_DR_TXNS 4.91 0.23 5.22 11.57
## GENDERF 1.41 0.07 1.49 4.51
## AVG_AMT_PER_NET_TXN 2.53 -0.09 2.76 14.24
## NO_OF_NET_DR_TXNS 2.83 -0.33 3.09 4.67
## OCCUPATIONSENP 1.61 -0.62 2.00 1.50
## ACC_TYPECA 2.51 -0.65 2.80 2.63
## ACC_TYPESA 1.35 -0.76 1.51 2.43
## AMT_ATM_DR 6.05 -0.84 6.35 21.55
## AMT_NET_DR 3.91 -1.04 4.18 12.88
## AMT_MOB_DR 3.71 -1.16 4.18 12.19
## AMT_CHQ_DR 4.87 -1.45 5.17 19.18
## NO_OF_MOB_DR_TXNS 1.81 -1.48 1.79 1.99
## AVG_AMT_PER_CHQ_TXN 4.78 -1.90 5.22 16.93
## AVG_AMT_PER_ATM_TXN 5.55 -2.68 5.68 26.59
## AMT_L_DR 9.22 -2.82 9.80 30.82
## NO_OF_L_CR_TXNS 9.52 -3.24 10.04 51.77
## NO_OF_L_DR_TXNS 7.77 -3.30 7.96 33.90
## TOT_NO_OF_L_TXNS 9.59 -4.15 10.33 46.51
## NO_OF_ATM_DR_TXNS 5.89 -5.54 5.86 10.73
###############################################
#creating model with 41n mtry
aforest1 <- randomForest(as.factor(TARGET)~ . ,data = rf.devscaled,
ntree=101, mtry = 41 , nodesize = 100,importance=TRUE)
print(aforest1)
##
## Call:
## randomForest(formula = as.factor(TARGET) ~ ., data = rf.devscaled, ntree = 101, mtry = 41, nodesize = 100, importance = TRUE)
## Type of random forest: classification
## Number of trees: 101
## No. of variables tried at each split: 41
##
## OOB estimate of error rate: 11.66%
## Confusion matrix:
## 0 1 class.error
## 0 13911 56 0.004009451
## 1 1809 224 0.889818003
plot(aforest1)
#checking error rate
aforest1$err.rate
## OOB 0 1
## [1,] 0.1316498 0.041545894 0.7411765
## [2,] 0.1359233 0.045094094 0.7474104
## [3,] 0.1324235 0.038877299 0.7713731
## [4,] 0.1310324 0.035069886 0.7873696
## [5,] 0.1281572 0.031961297 0.7866450
## [6,] 0.1276255 0.028652200 0.8061812
## [7,] 0.1257080 0.025956590 0.8101331
## [8,] 0.1233237 0.023240421 0.8078471
## [9,] 0.1228550 0.022795135 0.8087868
## [10,] 0.1228137 0.021416685 0.8179563
## [11,] 0.1209185 0.018810811 0.8222772
## [12,] 0.1190282 0.017329402 0.8184965
## [13,] 0.1208647 0.017082974 0.8338264
## [14,] 0.1185051 0.015491645 0.8257016
## [15,] 0.1180629 0.013259748 0.8380108
## [16,] 0.1185667 0.013109822 0.8430118
## [17,] 0.1178641 0.012105150 0.8444882
## [18,] 0.1178567 0.011030728 0.8514511
## [19,] 0.1182796 0.011387238 0.8524348
## [20,] 0.1170771 0.009308987 0.8573537
## [21,] 0.1170698 0.009379923 0.8568618
## [22,] 0.1175073 0.008807103 0.8642400
## [23,] 0.1173823 0.008091078 0.8681751
## [24,] 0.1172573 0.007876271 0.8686670
## [25,] 0.1175698 0.008019476 0.8701426
## [26,] 0.1173750 0.007517720 0.8721102
## [27,] 0.1173750 0.007446123 0.8726021
## [28,] 0.1175625 0.007302928 0.8750615
## [29,] 0.1168750 0.006801747 0.8730939
## [30,] 0.1170000 0.007302928 0.8706345
## [31,] 0.1172500 0.006873344 0.8755534
## [32,] 0.1178125 0.007088136 0.8785047
## [33,] 0.1175625 0.006730150 0.8789966
## [34,] 0.1175000 0.006515358 0.8799803
## [35,] 0.1179375 0.006443760 0.8839154
## [36,] 0.1178750 0.006873344 0.8804722
## [37,] 0.1176875 0.006443760 0.8819479
## [38,] 0.1173125 0.006300566 0.8799803
## [39,] 0.1175625 0.006014176 0.8839154
## [40,] 0.1175000 0.006515358 0.8799803
## [41,] 0.1171250 0.006228968 0.8789966
## [42,] 0.1171250 0.006157371 0.8794884
## [43,] 0.1172500 0.006443760 0.8785047
## [44,] 0.1172500 0.006300566 0.8794884
## [45,] 0.1171250 0.005942579 0.8809641
## [46,] 0.1171875 0.006085774 0.8804722
## [47,] 0.1175000 0.006515358 0.8799803
## [48,] 0.1170625 0.005870982 0.8809641
## [49,] 0.1171250 0.005799384 0.8819479
## [50,] 0.1176250 0.005727787 0.8863748
## [51,] 0.1172500 0.005584592 0.8844073
## [52,] 0.1170625 0.005369800 0.8844073
## [53,] 0.1172500 0.005727787 0.8834235
## [54,] 0.1170000 0.005512995 0.8829316
## [55,] 0.1166250 0.005369800 0.8809641
## [56,] 0.1168125 0.005441398 0.8819479
## [57,] 0.1166250 0.005441398 0.8804722
## [58,] 0.1168125 0.005369800 0.8824397
## [59,] 0.1169375 0.005083411 0.8853910
## [60,] 0.1170000 0.005083411 0.8858829
## [61,] 0.1166875 0.004940216 0.8844073
## [62,] 0.1167500 0.004725424 0.8863748
## [63,] 0.1168125 0.004940216 0.8853910
## [64,] 0.1166875 0.004868619 0.8848992
## [65,] 0.1171875 0.004940216 0.8883424
## [66,] 0.1164375 0.004797022 0.8834235
## [67,] 0.1167500 0.004940216 0.8848992
## [68,] 0.1168125 0.004868619 0.8858829
## [69,] 0.1170625 0.004653827 0.8893261
## [70,] 0.1166250 0.004868619 0.8844073
## [71,] 0.1162500 0.004510632 0.8839154
## [72,] 0.1166250 0.004582230 0.8863748
## [73,] 0.1166250 0.004725424 0.8853910
## [74,] 0.1166250 0.004582230 0.8863748
## [75,] 0.1167500 0.004725424 0.8863748
## [76,] 0.1166250 0.004367438 0.8878505
## [77,] 0.1163750 0.004295840 0.8863748
## [78,] 0.1161250 0.004081048 0.8858829
## [79,] 0.1166875 0.004439035 0.8878505
## [80,] 0.1164375 0.004510632 0.8853910
## [81,] 0.1167500 0.004439035 0.8883424
## [82,] 0.1167500 0.004510632 0.8878505
## [83,] 0.1166875 0.004653827 0.8863748
## [84,] 0.1165000 0.004582230 0.8853910
## [85,] 0.1163750 0.004152646 0.8873586
## [86,] 0.1165625 0.004653827 0.8853910
## [87,] 0.1165625 0.004367438 0.8873586
## [88,] 0.1163125 0.004152646 0.8868667
## [89,] 0.1164375 0.004081048 0.8883424
## [90,] 0.1165000 0.004367438 0.8868667
## [91,] 0.1161875 0.004295840 0.8848992
## [92,] 0.1160000 0.004081048 0.8848992
## [93,] 0.1161250 0.004009451 0.8863748
## [94,] 0.1161875 0.003937854 0.8873586
## [95,] 0.1165000 0.004152646 0.8883424
## [96,] 0.1163750 0.004081048 0.8878505
## [97,] 0.1165625 0.004081048 0.8893261
## [98,] 0.1163750 0.003937854 0.8888342
## [99,] 0.1163125 0.003866256 0.8888342
## [100,] 0.1163125 0.003866256 0.8888342
## [101,] 0.1165625 0.004009451 0.8898180
## deciling code
rf.devscaled$predict.class <- predict(aforest1, rf.devscaled, type="class")
rf.devscaled$predict.score <- predict(aforest1, rf.devscaled, type="prob")
decile <- function(x){
deciles <- vector(length=10)
for (i in seq(0.1,1,.1)){
deciles[i*10] <- quantile(x, i, na.rm=T)
}
return (
ifelse(x<deciles[1], 1,
ifelse(x<deciles[2], 2,
ifelse(x<deciles[3], 3,
ifelse(x<deciles[4], 4,
ifelse(x<deciles[5], 5,
ifelse(x<deciles[6], 6,
ifelse(x<deciles[7], 7,
ifelse(x<deciles[8], 8,
ifelse(x<deciles[9], 9, 10
))))))))))
}
rf.devscaled$deciles <- decile(rf.devscaled$predict.score[,2])
summary(as.factor(rf.devscaled$TARGET))
## 0 1
## 13967 2033
## deciling
??data.table
library(data.table)
tmp_DT = data.table(rf.devscaled)
rank <- tmp_DT[, list(
cnt = length(TARGET),
cnt_resp = sum(TARGET),
cnt_non_resp = sum(TARGET == 0)) ,
by=deciles][order(-deciles)]
rank$rrate <- round (rank$cnt_resp / rank$cnt,2);
rank$cum_resp <- cumsum(rank$cnt_resp)
rank$cum_non_resp <- cumsum(rank$cnt_non_resp)
rank$cum_rel_resp <- round(rank$cum_resp / sum(rank$cnt_resp),2);
rank$cum_rel_non_resp <- round(rank$cum_non_resp / sum(rank$cnt_non_resp),2);
rank$ks <- abs(rank$cum_rel_resp - rank$cum_rel_non_resp);
rank$rrate <- percent(rank$rrate)
rank$cum_rel_resp <- percent(rank$cum_rel_resp)
rank$cum_rel_non_resp <- percent(rank$cum_rel_non_resp)
View(rank)
#KS score is 0.76
###################################################################################
######################Hold out data prediction####################################
y <- subset(Hold_sample[-c(1,2,4,6,7,10,11,40)])
y.scaled <- scale(y)
rf.holdscaled <- cbind(Hold_sample[2], y.scaled)
View(rf.holdscaled)
rf.holdscaled$predict.class <- predict(aforest1, rf.holdscaled, type="class")
rf.holdscaled$predict.score <- predict(aforest1, rf.holdscaled, type="prob")
rf.holdscaled$deciles <- decile(rf.holdscaled$predict.score[,2])
tmp_DT = data.table(rf.holdscaled)
h_rank <- tmp_DT[, list(
cnt = length(TARGET),
cnt_resp = sum(TARGET),
cnt_non_resp = sum(TARGET == 0)) ,
by=deciles][order(-deciles)]
h_rank$rrate <- round (h_rank$cnt_resp / h_rank$cnt,2);
h_rank$cum_resp <- cumsum(h_rank$cnt_resp)
h_rank$cum_non_resp <- cumsum(h_rank$cnt_non_resp)
h_rank$cum_rel_resp <- round(h_rank$cum_resp / sum(h_rank$cnt_resp),2);
h_rank$cum_rel_non_resp <- round(h_rank$cum_non_resp / sum(h_rank$cnt_non_resp),2);
h_rank$ks <- abs(h_rank$cum_rel_resp - h_rank$cum_rel_non_resp);
library(scales)
h_rank$rrate <- percent(h_rank$rrate)
h_rank$cum_rel_resp <- percent(h_rank$cum_rel_resp)
h_rank$cum_rel_non_resp <- percent(h_rank$cum_rel_non_resp)
View(h_rank)
########################################################################################
######################################Other performance parameters#######################
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
##
## Attaching package: 'ROCR'
## The following object is masked from 'package:neuralnet':
##
## prediction
pred <- prediction(rf.devscaled$predict.score[,2], rf.devscaled$TARGET)
perf <- performance(pred, "tpr", "fpr")
plot(perf)
KS <- max(attr(perf, 'y.values')[[1]]-attr(perf, 'x.values')[[1]])
KS
## [1] 0.7500795
## Area Under Curve
auc <- performance(pred,"auc");
auc <- as.numeric(auc@y.values)
auc
## [1] 0.9421148
## Gini Coefficient
library(ineq)
gini = ineq(rf.devscaled$predict.score[,2], type="Gini")
gini
## [1] 0.7808436
## Classification Error
with(rf.devscaled, table(TARGET, predict.class))
## predict.class
## TARGET 0 1
## 0 13943 24
## 1 1761 272
##################################################################################
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.