This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
## Let us first set the working directory path
setwd ("D:/GREAT LAKES/DATA MINING/Neural Network")
getwd()
## [1] "D:/GREAT LAKES/DATA MINING/Neural Network"
rm(list = ls())
## Ideally for any modeling you should have Training & Testing dataset
## Typically you would use sampling strategy
## However for the Neural Net training I am supplying the Training & Testing data separately
datafull <- read.table("GROUP ASSIGNMENT QUESTION.csv", sep = ",", header = T)
## 75% of the sample size
smp_size <- floor(0.75 * nrow(datafull))
## set the seed to make your partition reproductible
set.seed(147)
train_ind <- sample(seq_len(nrow(datafull)), size = smp_size)
nn.dev <- datafull[train_ind, ]
nn.holdout <- datafull[-train_ind, ]
colnames(nn.dev)
## [1] "CUST_ID" "TARGET"
## [3] "AGE" "GENDER"
## [5] "BALANCE" "OCCUPATION"
## [7] "AGE_BKT" "SCR"
## [9] "HOLDING_PERIOD" "ACC_TYPE"
## [11] "ACC_OP_DATE" "LEN_OF_RLTN_IN_MNTH"
## [13] "NO_OF_L_CR_TXNS" "NO_OF_L_DR_TXNS"
## [15] "TOT_NO_OF_L_TXNS" "NO_OF_BR_CSH_WDL_DR_TXNS"
## [17] "NO_OF_ATM_DR_TXNS" "NO_OF_NET_DR_TXNS"
## [19] "NO_OF_MOB_DR_TXNS" "NO_OF_CHQ_DR_TXNS"
## [21] "FLG_HAS_CC" "AMT_ATM_DR"
## [23] "AMT_BR_CSH_WDL_DR" "AMT_CHQ_DR"
## [25] "AMT_NET_DR" "AMT_MOB_DR"
## [27] "AMT_L_DR" "FLG_HAS_ANY_CHGS"
## [29] "AMT_OTH_BK_ATM_USG_CHGS" "AMT_MIN_BAL_NMC_CHGS"
## [31] "NO_OF_IW_CHQ_BNC_TXNS" "NO_OF_OW_CHQ_BNC_TXNS"
## [33] "AVG_AMT_PER_ATM_TXN" "AVG_AMT_PER_CSH_WDL_TXN"
## [35] "AVG_AMT_PER_CHQ_TXN" "AVG_AMT_PER_NET_TXN"
## [37] "AVG_AMT_PER_MOB_TXN" "FLG_HAS_NOMINEE"
## [39] "FLG_HAS_OLD_LOAN" "random"
occ.matrix <- model.matrix(~ OCCUPATION - 1, data = nn.dev)
nn.dev <- data.frame(nn.dev, occ.matrix)
Gender.matrix <- model.matrix(~ GENDER - 1, data = nn.dev)
nn.dev <- data.frame(nn.dev, Gender.matrix)
acc.matrix <- model.matrix(~ ACC_TYPE - 1, data = nn.dev)
nn.dev <- data.frame(nn.dev, acc.matrix)
occ.matrix <- model.matrix(~ OCCUPATION - 1, data = nn.holdout)
nn.holdout <- data.frame(nn.holdout, occ.matrix)
Gender.matrix <- model.matrix(~ GENDER - 1, data = nn.holdout)
nn.holdout <- data.frame(nn.holdout, Gender.matrix)
acc.matrix <- model.matrix(~ ACC_TYPE - 1, data = nn.holdout)
nn.holdout <- data.frame(nn.holdout, acc.matrix)
names(nn.dev)
## [1] "CUST_ID" "TARGET"
## [3] "AGE" "GENDER"
## [5] "BALANCE" "OCCUPATION"
## [7] "AGE_BKT" "SCR"
## [9] "HOLDING_PERIOD" "ACC_TYPE"
## [11] "ACC_OP_DATE" "LEN_OF_RLTN_IN_MNTH"
## [13] "NO_OF_L_CR_TXNS" "NO_OF_L_DR_TXNS"
## [15] "TOT_NO_OF_L_TXNS" "NO_OF_BR_CSH_WDL_DR_TXNS"
## [17] "NO_OF_ATM_DR_TXNS" "NO_OF_NET_DR_TXNS"
## [19] "NO_OF_MOB_DR_TXNS" "NO_OF_CHQ_DR_TXNS"
## [21] "FLG_HAS_CC" "AMT_ATM_DR"
## [23] "AMT_BR_CSH_WDL_DR" "AMT_CHQ_DR"
## [25] "AMT_NET_DR" "AMT_MOB_DR"
## [27] "AMT_L_DR" "FLG_HAS_ANY_CHGS"
## [29] "AMT_OTH_BK_ATM_USG_CHGS" "AMT_MIN_BAL_NMC_CHGS"
## [31] "NO_OF_IW_CHQ_BNC_TXNS" "NO_OF_OW_CHQ_BNC_TXNS"
## [33] "AVG_AMT_PER_ATM_TXN" "AVG_AMT_PER_CSH_WDL_TXN"
## [35] "AVG_AMT_PER_CHQ_TXN" "AVG_AMT_PER_NET_TXN"
## [37] "AVG_AMT_PER_MOB_TXN" "FLG_HAS_NOMINEE"
## [39] "FLG_HAS_OLD_LOAN" "random"
## [41] "OCCUPATIONPROF" "OCCUPATIONSAL"
## [43] "OCCUPATIONSELF.EMP" "OCCUPATIONSENP"
## [45] "GENDERF" "GENDERM"
## [47] "GENDERO" "ACC_TYPECA"
## [49] "ACC_TYPESA"
c(nrow(nn.dev), nrow(nn.holdout))
## [1] 15000 5000
str(nn.dev)
## 'data.frame': 15000 obs. of 49 variables:
## $ CUST_ID : Factor w/ 20000 levels "C1","C10","C100",..: 5882 790 15541 13461 6311 13224 11733 8944 14871 12466 ...
## $ TARGET : int 0 0 0 0 0 0 0 0 0 0 ...
## $ AGE : int 32 53 26 27 33 42 21 40 32 51 ...
## $ GENDER : Factor w/ 3 levels "F","M","O": 2 2 1 1 2 2 1 2 2 1 ...
## $ BALANCE : num 357791 37277 477579 44337 512466 ...
## $ OCCUPATION : Factor w/ 4 levels "PROF","SAL","SELF-EMP",..: 4 2 3 4 3 1 3 2 4 1 ...
## $ AGE_BKT : Factor w/ 7 levels "<25",">50","26-30",..: 4 2 3 3 4 6 1 5 4 2 ...
## $ SCR : int 569 924 258 677 997 306 158 208 551 199 ...
## $ HOLDING_PERIOD : int 5 18 15 18 6 25 28 8 29 22 ...
## $ ACC_TYPE : Factor w/ 2 levels "CA","SA": 2 2 2 1 2 2 2 2 2 2 ...
## $ ACC_OP_DATE : Factor w/ 4869 levels "01-01-2000","01-01-2001",..: 4108 1804 1586 4573 1802 1303 1401 4477 507 731 ...
## $ LEN_OF_RLTN_IN_MNTH : int 143 164 52 153 188 153 69 165 146 205 ...
## $ NO_OF_L_CR_TXNS : int 20 2 0 28 4 11 4 19 0 13 ...
## $ NO_OF_L_DR_TXNS : int 16 2 0 1 3 1 2 15 3 2 ...
## $ TOT_NO_OF_L_TXNS : int 36 4 0 29 7 12 6 34 3 15 ...
## $ NO_OF_BR_CSH_WDL_DR_TXNS: int 7 0 0 0 1 0 2 4 2 0 ...
## $ NO_OF_ATM_DR_TXNS : int 2 0 0 0 0 0 0 2 1 1 ...
## $ NO_OF_NET_DR_TXNS : int 0 0 0 0 0 1 0 4 0 1 ...
## $ NO_OF_MOB_DR_TXNS : int 1 0 0 1 0 0 0 1 0 0 ...
## $ NO_OF_CHQ_DR_TXNS : int 6 2 0 0 2 0 0 4 0 0 ...
## $ FLG_HAS_CC : int 1 0 0 0 0 1 0 0 0 0 ...
## $ AMT_ATM_DR : int 35300 0 0 0 0 0 0 12600 14500 3700 ...
## $ AMT_BR_CSH_WDL_DR : int 829690 0 0 0 247490 0 230870 178400 20680 0 ...
## $ AMT_CHQ_DR : int 29840 46150 0 0 43690 0 0 54450 0 0 ...
## $ AMT_NET_DR : num 0 0 0 0 0 ...
## $ AMT_MOB_DR : int 17951 0 0 134914 0 0 0 38818 0 0 ...
## $ AMT_L_DR : num 912781 46150 0 134914 291180 ...
## $ FLG_HAS_ANY_CHGS : int 0 0 0 0 0 0 0 1 0 0 ...
## $ AMT_OTH_BK_ATM_USG_CHGS : int 0 0 0 0 0 0 0 0 0 0 ...
## $ AMT_MIN_BAL_NMC_CHGS : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NO_OF_IW_CHQ_BNC_TXNS : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NO_OF_OW_CHQ_BNC_TXNS : int 0 0 0 0 0 0 0 1 0 0 ...
## $ AVG_AMT_PER_ATM_TXN : num 17650 0 0 0 0 ...
## $ AVG_AMT_PER_CSH_WDL_TXN : num 118527 0 0 0 247490 ...
## $ AVG_AMT_PER_CHQ_TXN : num 4973 23075 0 0 21845 ...
## $ AVG_AMT_PER_NET_TXN : num 0 0 0 0 0 ...
## $ AVG_AMT_PER_MOB_TXN : num 17951 0 0 134914 0 ...
## $ FLG_HAS_NOMINEE : int 1 1 1 1 1 1 1 1 1 1 ...
## $ FLG_HAS_OLD_LOAN : int 0 0 1 0 1 1 1 0 0 0 ...
## $ random : num 0.599 0.568 0.388 0.983 0.291 ...
## $ OCCUPATIONPROF : num 0 0 0 0 0 1 0 0 0 1 ...
## $ OCCUPATIONSAL : num 0 1 0 0 0 0 0 1 0 0 ...
## $ OCCUPATIONSELF.EMP : num 0 0 1 0 1 0 1 0 0 0 ...
## $ OCCUPATIONSENP : num 1 0 0 1 0 0 0 0 1 0 ...
## $ GENDERF : num 0 0 1 1 0 0 1 0 0 1 ...
## $ GENDERM : num 1 1 0 0 1 1 0 1 1 0 ...
## $ GENDERO : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ACC_TYPECA : num 0 0 0 1 0 0 0 0 0 0 ...
## $ ACC_TYPESA : num 1 1 1 0 1 1 1 1 1 1 ...
## Response Rate
sum(nn.dev$TARGET) / nrow(nn.dev)
## [1] 0.1259333
sum(nn.holdout$TARGET) / nrow(nn.holdout)
## [1] 0.1246
## Installing the Neural Net package;
## If already installed do not run the below step
##install.packages("neuralnet")
library(neuralnet)
?"neuralnet"
## starting httpd help server ... done
names(nn.dev)
## [1] "CUST_ID" "TARGET"
## [3] "AGE" "GENDER"
## [5] "BALANCE" "OCCUPATION"
## [7] "AGE_BKT" "SCR"
## [9] "HOLDING_PERIOD" "ACC_TYPE"
## [11] "ACC_OP_DATE" "LEN_OF_RLTN_IN_MNTH"
## [13] "NO_OF_L_CR_TXNS" "NO_OF_L_DR_TXNS"
## [15] "TOT_NO_OF_L_TXNS" "NO_OF_BR_CSH_WDL_DR_TXNS"
## [17] "NO_OF_ATM_DR_TXNS" "NO_OF_NET_DR_TXNS"
## [19] "NO_OF_MOB_DR_TXNS" "NO_OF_CHQ_DR_TXNS"
## [21] "FLG_HAS_CC" "AMT_ATM_DR"
## [23] "AMT_BR_CSH_WDL_DR" "AMT_CHQ_DR"
## [25] "AMT_NET_DR" "AMT_MOB_DR"
## [27] "AMT_L_DR" "FLG_HAS_ANY_CHGS"
## [29] "AMT_OTH_BK_ATM_USG_CHGS" "AMT_MIN_BAL_NMC_CHGS"
## [31] "NO_OF_IW_CHQ_BNC_TXNS" "NO_OF_OW_CHQ_BNC_TXNS"
## [33] "AVG_AMT_PER_ATM_TXN" "AVG_AMT_PER_CSH_WDL_TXN"
## [35] "AVG_AMT_PER_CHQ_TXN" "AVG_AMT_PER_NET_TXN"
## [37] "AVG_AMT_PER_MOB_TXN" "FLG_HAS_NOMINEE"
## [39] "FLG_HAS_OLD_LOAN" "random"
## [41] "OCCUPATIONPROF" "OCCUPATIONSAL"
## [43] "OCCUPATIONSELF.EMP" "OCCUPATIONSENP"
## [45] "GENDERF" "GENDERM"
## [47] "GENDERO" "ACC_TYPECA"
## [49] "ACC_TYPESA"
colnames(nn.dev)
## [1] "CUST_ID" "TARGET"
## [3] "AGE" "GENDER"
## [5] "BALANCE" "OCCUPATION"
## [7] "AGE_BKT" "SCR"
## [9] "HOLDING_PERIOD" "ACC_TYPE"
## [11] "ACC_OP_DATE" "LEN_OF_RLTN_IN_MNTH"
## [13] "NO_OF_L_CR_TXNS" "NO_OF_L_DR_TXNS"
## [15] "TOT_NO_OF_L_TXNS" "NO_OF_BR_CSH_WDL_DR_TXNS"
## [17] "NO_OF_ATM_DR_TXNS" "NO_OF_NET_DR_TXNS"
## [19] "NO_OF_MOB_DR_TXNS" "NO_OF_CHQ_DR_TXNS"
## [21] "FLG_HAS_CC" "AMT_ATM_DR"
## [23] "AMT_BR_CSH_WDL_DR" "AMT_CHQ_DR"
## [25] "AMT_NET_DR" "AMT_MOB_DR"
## [27] "AMT_L_DR" "FLG_HAS_ANY_CHGS"
## [29] "AMT_OTH_BK_ATM_USG_CHGS" "AMT_MIN_BAL_NMC_CHGS"
## [31] "NO_OF_IW_CHQ_BNC_TXNS" "NO_OF_OW_CHQ_BNC_TXNS"
## [33] "AVG_AMT_PER_ATM_TXN" "AVG_AMT_PER_CSH_WDL_TXN"
## [35] "AVG_AMT_PER_CHQ_TXN" "AVG_AMT_PER_NET_TXN"
## [37] "AVG_AMT_PER_MOB_TXN" "FLG_HAS_NOMINEE"
## [39] "FLG_HAS_OLD_LOAN" "random"
## [41] "OCCUPATIONPROF" "OCCUPATIONSAL"
## [43] "OCCUPATIONSELF.EMP" "OCCUPATIONSENP"
## [45] "GENDERF" "GENDERM"
## [47] "GENDERO" "ACC_TYPECA"
## [49] "ACC_TYPESA"
str(nn.dev)
## 'data.frame': 15000 obs. of 49 variables:
## $ CUST_ID : Factor w/ 20000 levels "C1","C10","C100",..: 5882 790 15541 13461 6311 13224 11733 8944 14871 12466 ...
## $ TARGET : int 0 0 0 0 0 0 0 0 0 0 ...
## $ AGE : int 32 53 26 27 33 42 21 40 32 51 ...
## $ GENDER : Factor w/ 3 levels "F","M","O": 2 2 1 1 2 2 1 2 2 1 ...
## $ BALANCE : num 357791 37277 477579 44337 512466 ...
## $ OCCUPATION : Factor w/ 4 levels "PROF","SAL","SELF-EMP",..: 4 2 3 4 3 1 3 2 4 1 ...
## $ AGE_BKT : Factor w/ 7 levels "<25",">50","26-30",..: 4 2 3 3 4 6 1 5 4 2 ...
## $ SCR : int 569 924 258 677 997 306 158 208 551 199 ...
## $ HOLDING_PERIOD : int 5 18 15 18 6 25 28 8 29 22 ...
## $ ACC_TYPE : Factor w/ 2 levels "CA","SA": 2 2 2 1 2 2 2 2 2 2 ...
## $ ACC_OP_DATE : Factor w/ 4869 levels "01-01-2000","01-01-2001",..: 4108 1804 1586 4573 1802 1303 1401 4477 507 731 ...
## $ LEN_OF_RLTN_IN_MNTH : int 143 164 52 153 188 153 69 165 146 205 ...
## $ NO_OF_L_CR_TXNS : int 20 2 0 28 4 11 4 19 0 13 ...
## $ NO_OF_L_DR_TXNS : int 16 2 0 1 3 1 2 15 3 2 ...
## $ TOT_NO_OF_L_TXNS : int 36 4 0 29 7 12 6 34 3 15 ...
## $ NO_OF_BR_CSH_WDL_DR_TXNS: int 7 0 0 0 1 0 2 4 2 0 ...
## $ NO_OF_ATM_DR_TXNS : int 2 0 0 0 0 0 0 2 1 1 ...
## $ NO_OF_NET_DR_TXNS : int 0 0 0 0 0 1 0 4 0 1 ...
## $ NO_OF_MOB_DR_TXNS : int 1 0 0 1 0 0 0 1 0 0 ...
## $ NO_OF_CHQ_DR_TXNS : int 6 2 0 0 2 0 0 4 0 0 ...
## $ FLG_HAS_CC : int 1 0 0 0 0 1 0 0 0 0 ...
## $ AMT_ATM_DR : int 35300 0 0 0 0 0 0 12600 14500 3700 ...
## $ AMT_BR_CSH_WDL_DR : int 829690 0 0 0 247490 0 230870 178400 20680 0 ...
## $ AMT_CHQ_DR : int 29840 46150 0 0 43690 0 0 54450 0 0 ...
## $ AMT_NET_DR : num 0 0 0 0 0 ...
## $ AMT_MOB_DR : int 17951 0 0 134914 0 0 0 38818 0 0 ...
## $ AMT_L_DR : num 912781 46150 0 134914 291180 ...
## $ FLG_HAS_ANY_CHGS : int 0 0 0 0 0 0 0 1 0 0 ...
## $ AMT_OTH_BK_ATM_USG_CHGS : int 0 0 0 0 0 0 0 0 0 0 ...
## $ AMT_MIN_BAL_NMC_CHGS : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NO_OF_IW_CHQ_BNC_TXNS : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NO_OF_OW_CHQ_BNC_TXNS : int 0 0 0 0 0 0 0 1 0 0 ...
## $ AVG_AMT_PER_ATM_TXN : num 17650 0 0 0 0 ...
## $ AVG_AMT_PER_CSH_WDL_TXN : num 118527 0 0 0 247490 ...
## $ AVG_AMT_PER_CHQ_TXN : num 4973 23075 0 0 21845 ...
## $ AVG_AMT_PER_NET_TXN : num 0 0 0 0 0 ...
## $ AVG_AMT_PER_MOB_TXN : num 17951 0 0 134914 0 ...
## $ FLG_HAS_NOMINEE : int 1 1 1 1 1 1 1 1 1 1 ...
## $ FLG_HAS_OLD_LOAN : int 0 0 1 0 1 1 1 0 0 0 ...
## $ random : num 0.599 0.568 0.388 0.983 0.291 ...
## $ OCCUPATIONPROF : num 0 0 0 0 0 1 0 0 0 1 ...
## $ OCCUPATIONSAL : num 0 1 0 0 0 0 0 1 0 0 ...
## $ OCCUPATIONSELF.EMP : num 0 0 1 0 1 0 1 0 0 0 ...
## $ OCCUPATIONSENP : num 1 0 0 1 0 0 0 0 1 0 ...
## $ GENDERF : num 0 0 1 1 0 0 1 0 0 1 ...
## $ GENDERM : num 1 1 0 0 1 1 0 1 1 0 ...
## $ GENDERO : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ACC_TYPECA : num 0 0 0 1 0 0 0 0 0 0 ...
## $ ACC_TYPESA : num 1 1 1 0 1 1 1 1 1 1 ...
## build the neural net model by scaling the variables
#creating subset
x <- subset(nn.dev[-c(1,2,4,6,7,10,11,40)])
example(subset)
##
## subset> subset(airquality, Temp > 80, select = c(Ozone, Temp))
## Ozone Temp
## 29 45 81
## 35 NA 84
## 36 NA 85
## 38 29 82
## 39 NA 87
## 40 71 90
## 41 39 87
## 42 NA 93
## 43 NA 92
## 44 23 82
## 61 NA 83
## 62 135 84
## 63 49 85
## 64 32 81
## 65 NA 84
## 66 64 83
## 67 40 83
## 68 77 88
## 69 97 92
## 70 97 92
## 71 85 89
## 72 NA 82
## 74 27 81
## 75 NA 91
## 77 48 81
## 78 35 82
## 79 61 84
## 80 79 87
## 81 63 85
## 83 NA 81
## 84 NA 82
## 85 80 86
## 86 108 85
## 87 20 82
## 88 52 86
## 89 82 88
## 90 50 86
## 91 64 83
## 92 59 81
## 93 39 81
## 94 9 81
## 95 16 82
## 96 78 86
## 97 35 85
## 98 66 87
## 99 122 89
## 100 89 90
## 101 110 90
## 102 NA 92
## 103 NA 86
## 104 44 86
## 105 28 82
## 117 168 81
## 118 73 86
## 119 NA 88
## 120 76 97
## 121 118 94
## 122 84 96
## 123 85 94
## 124 96 91
## 125 78 92
## 126 73 93
## 127 91 93
## 128 47 87
## 129 32 84
## 134 44 81
## 143 16 82
## 146 36 81
##
## subset> subset(airquality, Day == 1, select = -Temp)
## Ozone Solar.R Wind Month Day
## 1 41 190 7.4 5 1
## 32 NA 286 8.6 6 1
## 62 135 269 4.1 7 1
## 93 39 83 6.9 8 1
## 124 96 167 6.9 9 1
##
## subset> subset(airquality, select = Ozone:Wind)
## Ozone Solar.R Wind
## 1 41 190 7.4
## 2 36 118 8.0
## 3 12 149 12.6
## 4 18 313 11.5
## 5 NA NA 14.3
## 6 28 NA 14.9
## 7 23 299 8.6
## 8 19 99 13.8
## 9 8 19 20.1
## 10 NA 194 8.6
## 11 7 NA 6.9
## 12 16 256 9.7
## 13 11 290 9.2
## 14 14 274 10.9
## 15 18 65 13.2
## 16 14 334 11.5
## 17 34 307 12.0
## 18 6 78 18.4
## 19 30 322 11.5
## 20 11 44 9.7
## 21 1 8 9.7
## 22 11 320 16.6
## 23 4 25 9.7
## 24 32 92 12.0
## 25 NA 66 16.6
## 26 NA 266 14.9
## 27 NA NA 8.0
## 28 23 13 12.0
## 29 45 252 14.9
## 30 115 223 5.7
## 31 37 279 7.4
## 32 NA 286 8.6
## 33 NA 287 9.7
## 34 NA 242 16.1
## 35 NA 186 9.2
## 36 NA 220 8.6
## 37 NA 264 14.3
## 38 29 127 9.7
## 39 NA 273 6.9
## 40 71 291 13.8
## 41 39 323 11.5
## 42 NA 259 10.9
## 43 NA 250 9.2
## 44 23 148 8.0
## 45 NA 332 13.8
## 46 NA 322 11.5
## 47 21 191 14.9
## 48 37 284 20.7
## 49 20 37 9.2
## 50 12 120 11.5
## 51 13 137 10.3
## 52 NA 150 6.3
## 53 NA 59 1.7
## 54 NA 91 4.6
## 55 NA 250 6.3
## 56 NA 135 8.0
## 57 NA 127 8.0
## 58 NA 47 10.3
## 59 NA 98 11.5
## 60 NA 31 14.9
## 61 NA 138 8.0
## 62 135 269 4.1
## 63 49 248 9.2
## 64 32 236 9.2
## 65 NA 101 10.9
## 66 64 175 4.6
## 67 40 314 10.9
## 68 77 276 5.1
## 69 97 267 6.3
## 70 97 272 5.7
## 71 85 175 7.4
## 72 NA 139 8.6
## 73 10 264 14.3
## 74 27 175 14.9
## 75 NA 291 14.9
## 76 7 48 14.3
## 77 48 260 6.9
## 78 35 274 10.3
## 79 61 285 6.3
## 80 79 187 5.1
## 81 63 220 11.5
## 82 16 7 6.9
## 83 NA 258 9.7
## 84 NA 295 11.5
## 85 80 294 8.6
## 86 108 223 8.0
## 87 20 81 8.6
## 88 52 82 12.0
## 89 82 213 7.4
## 90 50 275 7.4
## 91 64 253 7.4
## 92 59 254 9.2
## 93 39 83 6.9
## 94 9 24 13.8
## 95 16 77 7.4
## 96 78 NA 6.9
## 97 35 NA 7.4
## 98 66 NA 4.6
## 99 122 255 4.0
## 100 89 229 10.3
## 101 110 207 8.0
## 102 NA 222 8.6
## 103 NA 137 11.5
## 104 44 192 11.5
## 105 28 273 11.5
## 106 65 157 9.7
## 107 NA 64 11.5
## 108 22 71 10.3
## 109 59 51 6.3
## 110 23 115 7.4
## 111 31 244 10.9
## 112 44 190 10.3
## 113 21 259 15.5
## 114 9 36 14.3
## 115 NA 255 12.6
## 116 45 212 9.7
## 117 168 238 3.4
## 118 73 215 8.0
## 119 NA 153 5.7
## 120 76 203 9.7
## 121 118 225 2.3
## 122 84 237 6.3
## 123 85 188 6.3
## 124 96 167 6.9
## 125 78 197 5.1
## 126 73 183 2.8
## 127 91 189 4.6
## 128 47 95 7.4
## 129 32 92 15.5
## 130 20 252 10.9
## 131 23 220 10.3
## 132 21 230 10.9
## 133 24 259 9.7
## 134 44 236 14.9
## 135 21 259 15.5
## 136 28 238 6.3
## 137 9 24 10.9
## 138 13 112 11.5
## 139 46 237 6.9
## 140 18 224 13.8
## 141 13 27 10.3
## 142 24 238 10.3
## 143 16 201 8.0
## 144 13 238 12.6
## 145 23 14 9.2
## 146 36 139 10.3
## 147 7 49 10.3
## 148 14 20 16.6
## 149 30 193 6.9
## 150 NA 145 13.2
## 151 14 191 14.3
## 152 18 131 8.0
## 153 20 223 11.5
##
## subset> with(airquality, subset(Ozone, Temp > 80))
## [1] 45 NA NA 29 NA 71 39 NA NA 23 NA 135 49 32 NA 64 40
## [18] 77 97 97 85 NA 27 NA 48 35 61 79 63 NA NA 80 108 20
## [35] 52 82 50 64 59 39 9 16 78 35 66 122 89 110 NA NA 44
## [52] 28 168 73 NA 76 118 84 85 96 78 73 91 47 32 44 16 36
##
## subset> ## sometimes requiring a logical 'subset' argument is a nuisance
## subset> nm <- rownames(state.x77)
##
## subset> start_with_M <- nm %in% grep("^M", nm, value = TRUE)
##
## subset> subset(state.x77, start_with_M, Illiteracy:Murder)
## Illiteracy Life Exp Murder
## Maine 0.7 70.39 2.7
## Maryland 0.9 70.22 8.5
## Massachusetts 1.1 71.83 3.3
## Michigan 0.9 70.63 11.1
## Minnesota 0.6 72.96 2.3
## Mississippi 2.4 68.09 12.5
## Missouri 0.8 70.69 9.3
## Montana 0.6 70.56 5.0
##
## subset> # but in recent versions of R this can simply be
## subset> subset(state.x77, grepl("^M", nm), Illiteracy:Murder)
## Illiteracy Life Exp Murder
## Maine 0.7 70.39 2.7
## Maryland 0.9 70.22 8.5
## Massachusetts 1.1 71.83 3.3
## Michigan 0.9 70.63 11.1
## Minnesota 0.6 72.96 2.3
## Mississippi 2.4 68.09 12.5
## Missouri 0.8 70.69 9.3
## Montana 0.6 70.56 5.0
nn.devscaled <- scale(x)
nn.devscaled <- cbind(nn.dev[2], nn.devscaled)
View(nn.devscaled)
str(nn.devscaled)
## 'data.frame': 15000 obs. of 42 variables:
## $ TARGET : int 0 0 0 0 0 0 0 0 0 0 ...
## $ AGE : num -0.672 1.519 -1.298 -1.194 -0.568 ...
## $ BALANCE : num -0.21584 -0.6545 -0.0519 -0.64484 -0.00415 ...
## $ SCR : num 0.507 1.922 -0.733 0.937 2.213 ...
## $ HOLDING_PERIOD : num -1.15724 0.34122 -0.00458 0.34122 -1.04197 ...
## $ LEN_OF_RLTN_IN_MNTH : num 0.324 0.706 -1.333 0.506 1.143 ...
## $ NO_OF_L_CR_TXNS : num 0.628 -0.849 -1.013 1.285 -0.685 ...
## $ NO_OF_L_DR_TXNS : num 1.22 -0.597 -0.856 -0.726 -0.467 ...
## $ TOT_NO_OF_L_TXNS : num 0.958 -0.838 -1.063 0.565 -0.67 ...
## $ NO_OF_BR_CSH_WDL_DR_TXNS: num 2.307 -0.835 -0.835 -0.835 -0.386 ...
## $ NO_OF_ATM_DR_TXNS : num 0.636 -0.673 -0.673 -0.673 -0.673 ...
## $ NO_OF_NET_DR_TXNS : num -0.483 -0.483 -0.483 -0.483 -0.483 ...
## $ NO_OF_MOB_DR_TXNS : num 0.286 -0.204 -0.204 0.286 -0.204 ...
## $ NO_OF_CHQ_DR_TXNS : num 1.617 -0.054 -0.89 -0.89 -0.054 ...
## $ FLG_HAS_CC : num 1.505 -0.665 -0.665 -0.665 -0.665 ...
## $ AMT_ATM_DR : num 1.579 -0.709 -0.709 -0.709 -0.709 ...
## $ AMT_BR_CSH_WDL_DR : num 1.361 -1.139 -1.139 -1.139 -0.393 ...
## $ AMT_CHQ_DR : num -0.214 -0.178 -0.28 -0.28 -0.183 ...
## $ AMT_NET_DR : num -0.742 -0.742 -0.742 -0.742 -0.742 ...
## $ AMT_MOB_DR : num -0.0895 -0.4529 -0.4529 2.2785 -0.4529 ...
## $ AMT_L_DR : num 0.192 -1.008 -1.072 -0.885 -0.669 ...
## $ FLG_HAS_ANY_CHGS : num -0.353 -0.353 -0.353 -0.353 -0.353 ...
## $ AMT_OTH_BK_ATM_USG_CHGS : num -0.079 -0.079 -0.079 -0.079 -0.079 ...
## $ AMT_MIN_BAL_NMC_CHGS : num -0.089 -0.089 -0.089 -0.089 -0.089 ...
## $ NO_OF_IW_CHQ_BNC_TXNS : num -0.209 -0.209 -0.209 -0.209 -0.209 ...
## $ NO_OF_OW_CHQ_BNC_TXNS : num -0.215 -0.215 -0.215 -0.215 -0.215 ...
## $ AVG_AMT_PER_ATM_TXN : num 1.43 -1.02 -1.02 -1.02 -1.02 ...
## $ AVG_AMT_PER_CSH_WDL_TXN : num -0.4594 -0.9007 -0.9007 -0.9007 0.0207 ...
## $ AVG_AMT_PER_CHQ_TXN : num -0.4049 -0.044 -0.5041 -0.5041 -0.0685 ...
## $ AVG_AMT_PER_NET_TXN : num -0.635 -0.635 -0.635 -0.635 -0.635 ...
## $ AVG_AMT_PER_MOB_TXN : num -0.0499 -0.4363 -0.4363 2.4676 -0.4363 ...
## $ FLG_HAS_NOMINEE : num 0.335 0.335 0.335 0.335 0.335 ...
## $ FLG_HAS_OLD_LOAN : num -0.987 -0.987 1.013 -0.987 1.013 ...
## $ OCCUPATIONPROF : num -0.608 -0.608 -0.608 -0.608 -0.608 ...
## $ OCCUPATIONSAL : num -0.646 1.548 -0.646 -0.646 -0.646 ...
## $ OCCUPATIONSELF.EMP : num -0.466 -0.466 2.145 -0.466 2.145 ...
## $ OCCUPATIONSENP : num 1.699 -0.589 -0.589 1.699 -0.589 ...
## $ GENDERF : num -0.609 -0.609 1.642 1.642 -0.609 ...
## $ GENDERM : num 0.624 0.624 -1.603 -1.603 0.624 ...
## $ GENDERO : num -0.0981 -0.0981 -0.0981 -0.0981 -0.0981 ...
## $ ACC_TYPECA : num -0.52 -0.52 -0.52 1.92 -0.52 ...
## $ ACC_TYPESA : num 0.52 0.52 0.52 -1.92 0.52 ...
cn <- paste(colnames(nn.devscaled)[2:42], collapse = ' + ')
fo <- as.formula(paste('TARGET', '~', cn)) # define the formula
fo
## TARGET ~ AGE + BALANCE + SCR + HOLDING_PERIOD + LEN_OF_RLTN_IN_MNTH +
## NO_OF_L_CR_TXNS + NO_OF_L_DR_TXNS + TOT_NO_OF_L_TXNS + NO_OF_BR_CSH_WDL_DR_TXNS +
## NO_OF_ATM_DR_TXNS + NO_OF_NET_DR_TXNS + NO_OF_MOB_DR_TXNS +
## NO_OF_CHQ_DR_TXNS + FLG_HAS_CC + AMT_ATM_DR + AMT_BR_CSH_WDL_DR +
## AMT_CHQ_DR + AMT_NET_DR + AMT_MOB_DR + AMT_L_DR + FLG_HAS_ANY_CHGS +
## AMT_OTH_BK_ATM_USG_CHGS + AMT_MIN_BAL_NMC_CHGS + NO_OF_IW_CHQ_BNC_TXNS +
## NO_OF_OW_CHQ_BNC_TXNS + AVG_AMT_PER_ATM_TXN + AVG_AMT_PER_CSH_WDL_TXN +
## AVG_AMT_PER_CHQ_TXN + AVG_AMT_PER_NET_TXN + AVG_AMT_PER_MOB_TXN +
## FLG_HAS_NOMINEE + FLG_HAS_OLD_LOAN + OCCUPATIONPROF + OCCUPATIONSAL +
## OCCUPATIONSELF.EMP + OCCUPATIONSENP + GENDERF + GENDERM +
## GENDERO + ACC_TYPECA + ACC_TYPESA
str(nn.devscaled)
## 'data.frame': 15000 obs. of 42 variables:
## $ TARGET : int 0 0 0 0 0 0 0 0 0 0 ...
## $ AGE : num -0.672 1.519 -1.298 -1.194 -0.568 ...
## $ BALANCE : num -0.21584 -0.6545 -0.0519 -0.64484 -0.00415 ...
## $ SCR : num 0.507 1.922 -0.733 0.937 2.213 ...
## $ HOLDING_PERIOD : num -1.15724 0.34122 -0.00458 0.34122 -1.04197 ...
## $ LEN_OF_RLTN_IN_MNTH : num 0.324 0.706 -1.333 0.506 1.143 ...
## $ NO_OF_L_CR_TXNS : num 0.628 -0.849 -1.013 1.285 -0.685 ...
## $ NO_OF_L_DR_TXNS : num 1.22 -0.597 -0.856 -0.726 -0.467 ...
## $ TOT_NO_OF_L_TXNS : num 0.958 -0.838 -1.063 0.565 -0.67 ...
## $ NO_OF_BR_CSH_WDL_DR_TXNS: num 2.307 -0.835 -0.835 -0.835 -0.386 ...
## $ NO_OF_ATM_DR_TXNS : num 0.636 -0.673 -0.673 -0.673 -0.673 ...
## $ NO_OF_NET_DR_TXNS : num -0.483 -0.483 -0.483 -0.483 -0.483 ...
## $ NO_OF_MOB_DR_TXNS : num 0.286 -0.204 -0.204 0.286 -0.204 ...
## $ NO_OF_CHQ_DR_TXNS : num 1.617 -0.054 -0.89 -0.89 -0.054 ...
## $ FLG_HAS_CC : num 1.505 -0.665 -0.665 -0.665 -0.665 ...
## $ AMT_ATM_DR : num 1.579 -0.709 -0.709 -0.709 -0.709 ...
## $ AMT_BR_CSH_WDL_DR : num 1.361 -1.139 -1.139 -1.139 -0.393 ...
## $ AMT_CHQ_DR : num -0.214 -0.178 -0.28 -0.28 -0.183 ...
## $ AMT_NET_DR : num -0.742 -0.742 -0.742 -0.742 -0.742 ...
## $ AMT_MOB_DR : num -0.0895 -0.4529 -0.4529 2.2785 -0.4529 ...
## $ AMT_L_DR : num 0.192 -1.008 -1.072 -0.885 -0.669 ...
## $ FLG_HAS_ANY_CHGS : num -0.353 -0.353 -0.353 -0.353 -0.353 ...
## $ AMT_OTH_BK_ATM_USG_CHGS : num -0.079 -0.079 -0.079 -0.079 -0.079 ...
## $ AMT_MIN_BAL_NMC_CHGS : num -0.089 -0.089 -0.089 -0.089 -0.089 ...
## $ NO_OF_IW_CHQ_BNC_TXNS : num -0.209 -0.209 -0.209 -0.209 -0.209 ...
## $ NO_OF_OW_CHQ_BNC_TXNS : num -0.215 -0.215 -0.215 -0.215 -0.215 ...
## $ AVG_AMT_PER_ATM_TXN : num 1.43 -1.02 -1.02 -1.02 -1.02 ...
## $ AVG_AMT_PER_CSH_WDL_TXN : num -0.4594 -0.9007 -0.9007 -0.9007 0.0207 ...
## $ AVG_AMT_PER_CHQ_TXN : num -0.4049 -0.044 -0.5041 -0.5041 -0.0685 ...
## $ AVG_AMT_PER_NET_TXN : num -0.635 -0.635 -0.635 -0.635 -0.635 ...
## $ AVG_AMT_PER_MOB_TXN : num -0.0499 -0.4363 -0.4363 2.4676 -0.4363 ...
## $ FLG_HAS_NOMINEE : num 0.335 0.335 0.335 0.335 0.335 ...
## $ FLG_HAS_OLD_LOAN : num -0.987 -0.987 1.013 -0.987 1.013 ...
## $ OCCUPATIONPROF : num -0.608 -0.608 -0.608 -0.608 -0.608 ...
## $ OCCUPATIONSAL : num -0.646 1.548 -0.646 -0.646 -0.646 ...
## $ OCCUPATIONSELF.EMP : num -0.466 -0.466 2.145 -0.466 2.145 ...
## $ OCCUPATIONSENP : num 1.699 -0.589 -0.589 1.699 -0.589 ...
## $ GENDERF : num -0.609 -0.609 1.642 1.642 -0.609 ...
## $ GENDERM : num 0.624 0.624 -1.603 -1.603 0.624 ...
## $ GENDERO : num -0.0981 -0.0981 -0.0981 -0.0981 -0.0981 ...
## $ ACC_TYPECA : num -0.52 -0.52 -0.52 1.92 -0.52 ...
## $ ACC_TYPESA : num 0.52 0.52 0.52 -1.92 0.52 ...
nn2 <- neuralnet(fo ,
data = nn.devscaled,
hidden = 6,
err.fct = "sse",
linear.output = FALSE,
lifesign = "full",
lifesign.step = 10,
threshold = 0.1,
stepmax = 2000)
## hidden: 6 thresh: 0.1 rep: 1/1 steps: 10 min thresh: 21.22515357
## 20 min thresh: 5.486991145
## 30 min thresh: 5.486991145
## 40 min thresh: 5.113033639
## 50 min thresh: 3.602604686
## 60 min thresh: 3.602604686
## 70 min thresh: 3.602604686
## 80 min thresh: 3.018602058
## 90 min thresh: 2.955307515
## 100 min thresh: 2.955307515
## 110 min thresh: 2.955307515
## 120 min thresh: 2.8494937
## 130 min thresh: 2.8494937
## 140 min thresh: 1.894810883
## 150 min thresh: 1.894810883
## 160 min thresh: 1.87764536
## 170 min thresh: 1.443458277
## 180 min thresh: 1.319207091
## 190 min thresh: 1.223014749
## 200 min thresh: 1.223014749
## 210 min thresh: 0.9845342857
## 220 min thresh: 0.9845342857
## 230 min thresh: 0.8335323055
## 240 min thresh: 0.7792614739
## 250 min thresh: 0.7792614739
## 260 min thresh: 0.7459294348
## 270 min thresh: 0.7459294348
## 280 min thresh: 0.7459294348
## 290 min thresh: 0.6742613947
## 300 min thresh: 0.6742613947
## 310 min thresh: 0.6742613947
## 320 min thresh: 0.6742613947
## 330 min thresh: 0.6495852547
## 340 min thresh: 0.5651036251
## 350 min thresh: 0.5651036251
## 360 min thresh: 0.5501872237
## 370 min thresh: 0.5501872237
## 380 min thresh: 0.5008992495
## 390 min thresh: 0.5008992495
## 400 min thresh: 0.5008992495
## 410 min thresh: 0.5008992495
## 420 min thresh: 0.5008992495
## 430 min thresh: 0.5008992495
## 440 min thresh: 0.5008992495
## 450 min thresh: 0.5008992495
## 460 min thresh: 0.5008992495
## 470 min thresh: 0.5008992495
## 480 min thresh: 0.5008992495
## 490 min thresh: 0.4918360775
## 500 min thresh: 0.4918360775
## 510 min thresh: 0.4918360775
## 520 min thresh: 0.4918360775
## 530 min thresh: 0.4918360775
## 540 min thresh: 0.4918360775
## 550 min thresh: 0.4918360775
## 560 min thresh: 0.4918360775
## 570 min thresh: 0.4763025563
## 580 min thresh: 0.3270519883
## 590 min thresh: 0.3270519883
## 600 min thresh: 0.2873194742
## 610 min thresh: 0.282294847
## 620 min thresh: 0.282294847
## 630 min thresh: 0.282294847
## 640 min thresh: 0.2820989703
## 650 min thresh: 0.2820989703
## 660 min thresh: 0.2820989703
## 670 min thresh: 0.2820989703
## 680 min thresh: 0.2820989703
## 690 min thresh: 0.2820989703
## 700 min thresh: 0.2820989703
## 710 min thresh: 0.2820989703
## 720 min thresh: 0.2820989703
## 730 min thresh: 0.2820989703
## 740 min thresh: 0.2820989703
## 750 min thresh: 0.2512066741
## 760 min thresh: 0.2512066741
## 770 min thresh: 0.2512066741
## 780 min thresh: 0.2512066741
## 790 min thresh: 0.2512066741
## 800 min thresh: 0.2512066741
## 810 min thresh: 0.2512066741
## 820 min thresh: 0.2512066741
## 830 min thresh: 0.2512066741
## 840 min thresh: 0.2512066741
## 850 min thresh: 0.2512066741
## 860 min thresh: 0.2512066741
## 870 min thresh: 0.2512066741
## 880 min thresh: 0.2512066741
## 890 min thresh: 0.2512066741
## 900 min thresh: 0.2512066741
## 910 min thresh: 0.2440842996
## 920 min thresh: 0.2440842996
## 930 min thresh: 0.2440842996
## 940 min thresh: 0.1879317412
## 950 min thresh: 0.1879317412
## 960 min thresh: 0.1710145124
## 970 min thresh: 0.1710145124
## 980 min thresh: 0.1710145124
## 990 min thresh: 0.1710145124
## 1000 min thresh: 0.1710145124
## 1010 min thresh: 0.1710145124
## 1020 min thresh: 0.1710145124
## 1030 min thresh: 0.1710145124
## 1040 min thresh: 0.1710145124
## 1050 min thresh: 0.1638523711
## 1060 min thresh: 0.1368024068
## 1070 min thresh: 0.1368024068
## 1080 min thresh: 0.1368024068
## 1090 min thresh: 0.1102316126
## 1100 min thresh: 0.1102316126
## 1110 min thresh: 0.1102316126
## 1120 min thresh: 0.1102316126
## 1130 min thresh: 0.1102316126
## 1140 min thresh: 0.1102316126
## 1150 min thresh: 0.1102316126
## 1160 min thresh: 0.1102316126
## 1170 min thresh: 0.1102316126
## 1180 min thresh: 0.1102316126
## 1190 min thresh: 0.1102316126
## 1200 min thresh: 0.1102316126
## 1210 min thresh: 0.1102316126
## 1220 min thresh: 0.1102316126
## 1230 min thresh: 0.1102316126
## 1240 min thresh: 0.1102316126
## 1250 min thresh: 0.1102316126
## 1260 min thresh: 0.1102316126
## 1270 min thresh: 0.1102316126
## 1280 min thresh: 0.1102316126
## 1290 min thresh: 0.1102316126
## 1300 min thresh: 0.1102316126
## 1310 min thresh: 0.1102316126
## 1320 min thresh: 0.1102316126
## 1330 min thresh: 0.1102316126
## 1340 min thresh: 0.1102316126
## 1350 min thresh: 0.1059522154
## 1360 min thresh: 0.1059522154
## 1370 min thresh: 0.1059522154
## 1380 min thresh: 0.1059522154
## 1390 min thresh: 0.1059522154
## 1400 min thresh: 0.1059522154
## 1410 error: 659.42699 time: 26.84 secs
plot(nn2)
attributes(nn2)
## $names
## [1] "call" "response" "covariate"
## [4] "model.list" "err.fct" "act.fct"
## [7] "linear.output" "data" "net.result"
## [10] "weights" "startweights" "generalized.weights"
## [13] "result.matrix"
##
## $class
## [1] "nn"
## Assigning the Probabilities to Dev Sample
nn.dev$Prob = nn2$net.result[[1]]
## The distribution of the estimated probabilities
quantile(nn.dev$Prob, c(0,1,5,10,25,50,75,90,95,99,100)/100)
## 0% 1% 5% 10%
## 0.0004477242608 0.0004744909457 0.0027194773936 0.0042439091581
## 25% 50% 75% 90%
## 0.0155409115983 0.0564753541359 0.1182105603749 0.3426845266539
## 95% 99% 100%
## 0.4374282222438 0.7677390075998 0.9596859704383
hist(nn.dev$Prob)
## deciling code
decile <- function(x){
deciles <- vector(length=10)
for (i in seq(0.1,1,.1)){
deciles[i*10] <- quantile(x, i, na.rm=T)
}
return (
ifelse(x<deciles[1], 1,
ifelse(x<deciles[2], 2,
ifelse(x<deciles[3], 3,
ifelse(x<deciles[4], 4,
ifelse(x<deciles[5], 5,
ifelse(x<deciles[6], 6,
ifelse(x<deciles[7], 7,
ifelse(x<deciles[8], 8,
ifelse(x<deciles[9], 9, 10
))))))))))
}
## deciling
??decile
nn.dev$deciles <- decile(nn.dev$Prob)
?decile
## No documentation for 'decile' in specified packages and libraries:
## you could try '??decile'
class(nn.dev$Prob)
## [1] "matrix"
## Ranking code
##install.packages("data.table")
library(data.table)
tmp_DT = data.table(nn.dev)
rank <- tmp_DT[, list(
cnt = length(TARGET),
cnt_resp = sum(TARGET),
cnt_non_resp = sum(TARGET == 0)) ,
by=deciles][order(-deciles)]
rank$rrate <- round (rank$cnt_resp / rank$cnt,2);
rank$cum_resp <- cumsum(rank$cnt_resp)
rank$cum_non_resp <- cumsum(rank$cnt_non_resp)
rank$cum_rel_resp <- round(rank$cum_resp / sum(rank$cnt_resp),2);
rank$cum_rel_non_resp <- round(rank$cum_non_resp / sum(rank$cnt_non_resp),2);
rank$ks <- abs(rank$cum_rel_resp - rank$cum_rel_non_resp);
library(scales)
rank$rrate <- percent(rank$rrate)
rank$cum_rel_resp <- percent(rank$cum_rel_resp)
rank$cum_rel_non_resp <- percent(rank$cum_rel_non_resp)
View(rank)
## Assgining 0 / 1 class based on certain threshold
nn.dev$Class = ifelse(nn.dev$Prob>0.5,1,0)
with( nn.dev, table(TARGET, as.factor(Class) ))
##
## TARGET 0 1
## 0 13000 111
## 1 1530 359
## We can use the confusionMatrix function of the caret package
##install.packages("caret")
library(caret)
## Warning: package 'caret' was built under R version 3.4.3
## Loading required package: lattice
## Loading required package: ggplot2
confusionMatrix(nn.dev$TARGET, nn.dev$Class)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 13000 111
## 1 1530 359
##
## Accuracy : 0.8906
## 95% CI : (0.8854955, 0.8955524)
## No Information Rate : 0.9686667
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.2676143
## Mcnemar's Test P-Value : <0.0000000000000002
##
## Sensitivity : 0.8947006
## Specificity : 0.7638298
## Pos Pred Value : 0.9915338
## Neg Pred Value : 0.1900476
## Prevalence : 0.9686667
## Detection Rate : 0.8666667
## Detection Prevalence : 0.8740667
## Balanced Accuracy : 0.8292652
##
## 'Positive' Class : 0
##
## Error Computation
sum((nn.dev$Target - nn.dev$Prob)^2)/2
## [1] 0
## Other Model Performance Measures
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
##
##
## Attaching package: 'ROCR'
##
## The following object is masked from 'package:neuralnet':
##
## prediction
pred <- prediction(nn.dev$Prob, nn.dev$TARGET)
perf <- performance(pred, "tpr", "fpr")
plot(perf)
KS <- max(attr(perf, 'y.values')[[1]]-attr(perf, 'x.values')[[1]])
auc <- performance(pred,"auc");
auc <- as.numeric(auc@y.values)
library(ineq)
gini = ineq(nn.dev$Prob, type="Gini")
auc
## [1] 0.7739889753
KS
## [1] 0.428657189
gini
## [1] 0.6306458561
## Scoring another dataset using the Neural Net Model Object
## To score we will use the compute function
colnames(nn.holdout)
## [1] "CUST_ID" "TARGET"
## [3] "AGE" "GENDER"
## [5] "BALANCE" "OCCUPATION"
## [7] "AGE_BKT" "SCR"
## [9] "HOLDING_PERIOD" "ACC_TYPE"
## [11] "ACC_OP_DATE" "LEN_OF_RLTN_IN_MNTH"
## [13] "NO_OF_L_CR_TXNS" "NO_OF_L_DR_TXNS"
## [15] "TOT_NO_OF_L_TXNS" "NO_OF_BR_CSH_WDL_DR_TXNS"
## [17] "NO_OF_ATM_DR_TXNS" "NO_OF_NET_DR_TXNS"
## [19] "NO_OF_MOB_DR_TXNS" "NO_OF_CHQ_DR_TXNS"
## [21] "FLG_HAS_CC" "AMT_ATM_DR"
## [23] "AMT_BR_CSH_WDL_DR" "AMT_CHQ_DR"
## [25] "AMT_NET_DR" "AMT_MOB_DR"
## [27] "AMT_L_DR" "FLG_HAS_ANY_CHGS"
## [29] "AMT_OTH_BK_ATM_USG_CHGS" "AMT_MIN_BAL_NMC_CHGS"
## [31] "NO_OF_IW_CHQ_BNC_TXNS" "NO_OF_OW_CHQ_BNC_TXNS"
## [33] "AVG_AMT_PER_ATM_TXN" "AVG_AMT_PER_CSH_WDL_TXN"
## [35] "AVG_AMT_PER_CHQ_TXN" "AVG_AMT_PER_NET_TXN"
## [37] "AVG_AMT_PER_MOB_TXN" "FLG_HAS_NOMINEE"
## [39] "FLG_HAS_OLD_LOAN" "random"
## [41] "OCCUPATIONPROF" "OCCUPATIONSAL"
## [43] "OCCUPATIONSELF.EMP" "OCCUPATIONSENP"
## [45] "GENDERF" "GENDERM"
## [47] "GENDERO" "ACC_TYPECA"
## [49] "ACC_TYPESA"
## build the neural net model by scaling the variables
?compute
y <- subset(nn.holdout[-c(1,2,4,6,7,10,11,40)])
y.scaled <- scale(y)
nn.holdscaled <- cbind(nn.holdout[1], y.scaled)
compute.output = compute(nn2, y.scaled)
nn.holdout$Predict.score = compute.output$net.result
quantile(nn.holdout$Predict.score, c(0,1,5,10,25,50,75,90,95,99,100)/100)
## 0% 1% 5% 10%
## 0.0004477270869 0.0004683543919 0.0027312665844 0.0043306504713
## 25% 50% 75% 90%
## 0.0161451172037 0.0591331457638 0.1177008147969 0.3476430277130
## 95% 99% 100%
## 0.4361521321458 0.7632038401552 0.8996005751058
nn.holdout$deciles <- decile(as.numeric(nn.holdout$Predict.score))
library(data.table)
tmp_DT = data.table(nn.holdout)
h_rank <- tmp_DT[, list(
cnt = length(TARGET),
cnt_resp = sum(TARGET),
cnt_non_resp = sum(TARGET == 0)) ,
by=deciles][order(-deciles)]
h_rank$rrate <- round (h_rank$cnt_resp / h_rank$cnt,2);
h_rank$cum_resp <- cumsum(h_rank$cnt_resp)
h_rank$cum_non_resp <- cumsum(h_rank$cnt_non_resp)
h_rank$cum_rel_resp <- round(h_rank$cum_resp / sum(h_rank$cnt_resp),2);
h_rank$cum_rel_non_resp <- round(h_rank$cum_non_resp / sum(h_rank$cnt_non_resp),2);
h_rank$ks <- abs(h_rank$cum_rel_resp - h_rank$cum_rel_non_resp);
library(scales)
h_rank$rrate <- percent(h_rank$rrate)
h_rank$cum_rel_resp <- percent(h_rank$cum_rel_resp)
h_rank$cum_rel_non_resp <- percent(h_rank$cum_rel_non_resp)
View(h_rank)
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.