library(randomForest)
library(rpart)
library(readr)
library(caret)
library(data.table)
library(scales)
library(ROCR)
library(ineq)
library(knitr)
fulldata <- as.data.frame(read_csv("D:/PG Business Analytics/DM/Group Assignment/PL_XSELL.csv"))
Parsed with column specification:
cols(
.default = col_integer(),
CUST_ID = col_character(),
GENDER = col_character(),
BALANCE = col_double(),
OCCUPATION = col_character(),
AGE_BKT = col_character(),
ACC_TYPE = col_character(),
ACC_OP_DATE = col_character(),
AMT_NET_DR = col_double(),
AMT_L_DR = col_double(),
AVG_AMT_PER_ATM_TXN = col_double(),
AVG_AMT_PER_CSH_WDL_TXN = col_double(),
AVG_AMT_PER_CHQ_TXN = col_double(),
AVG_AMT_PER_NET_TXN = col_double(),
AVG_AMT_PER_MOB_TXN = col_double(),
random = col_double()
)
See spec(...) for full column specifications.
head(fulldata)
summary(fulldata)
CUST_ID TARGET AGE GENDER BALANCE
Length:20000 Min. :0.0000 Min. :21.00 Length:20000 Min. : 0
Class :character 1st Qu.:0.0000 1st Qu.:30.00 Class :character 1st Qu.: 64754
Mode :character Median :0.0000 Median :38.00 Mode :character Median : 231676
Mean :0.1256 Mean :38.42 Mean : 511362
3rd Qu.:0.0000 3rd Qu.:46.00 3rd Qu.: 653877
Max. :1.0000 Max. :55.00 Max. :8360431
OCCUPATION AGE_BKT SCR HOLDING_PERIOD ACC_TYPE
Length:20000 Length:20000 Min. :100.0 Min. : 1.00 Length:20000
Class :character Class :character 1st Qu.:227.0 1st Qu.: 7.00 Class :character
Mode :character Mode :character Median :364.0 Median :15.00 Mode :character
Mean :440.2 Mean :14.96
3rd Qu.:644.0 3rd Qu.:22.00
Max. :999.0 Max. :31.00
ACC_OP_DATE LEN_OF_RLTN_IN_MNTH NO_OF_L_CR_TXNS NO_OF_L_DR_TXNS TOT_NO_OF_L_TXNS
Length:20000 Min. : 29.0 Min. : 0.00 Min. : 0.000 Min. : 0.00
Class :character 1st Qu.: 79.0 1st Qu.: 6.00 1st Qu.: 2.000 1st Qu.: 9.00
Mode :character Median :125.0 Median :10.00 Median : 5.000 Median : 14.00
Mean :125.2 Mean :12.35 Mean : 6.634 Mean : 18.98
3rd Qu.:172.0 3rd Qu.:14.00 3rd Qu.: 7.000 3rd Qu.: 21.00
Max. :221.0 Max. :75.00 Max. :74.000 Max. :149.00
NO_OF_BR_CSH_WDL_DR_TXNS NO_OF_ATM_DR_TXNS NO_OF_NET_DR_TXNS NO_OF_MOB_DR_TXNS
Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.0000
1st Qu.: 1.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.0000
Median : 1.000 Median : 1.000 Median : 0.000 Median : 0.0000
Mean : 1.883 Mean : 1.029 Mean : 1.172 Mean : 0.4118
3rd Qu.: 2.000 3rd Qu.: 1.000 3rd Qu.: 1.000 3rd Qu.: 0.0000
Max. :15.000 Max. :25.000 Max. :22.000 Max. :25.0000
NO_OF_CHQ_DR_TXNS FLG_HAS_CC AMT_ATM_DR AMT_BR_CSH_WDL_DR AMT_CHQ_DR
Min. : 0.000 Min. :0.0000 Min. : 0 Min. : 0 Min. : 0
1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.: 0 1st Qu.: 2990 1st Qu.: 0
Median : 2.000 Median :0.0000 Median : 6900 Median :340150 Median : 23840
Mean : 2.138 Mean :0.3054 Mean : 10990 Mean :378475 Mean : 124520
3rd Qu.: 4.000 3rd Qu.:1.0000 3rd Qu.: 15800 3rd Qu.:674675 3rd Qu.: 72470
Max. :15.000 Max. :1.0000 Max. :199300 Max. :999930 Max. :4928640
AMT_NET_DR AMT_MOB_DR AMT_L_DR FLG_HAS_ANY_CHGS AMT_OTH_BK_ATM_USG_CHGS
Min. : 0 Min. : 0 Min. : 0 Min. :0.0000 Min. : 0.000
1st Qu.: 0 1st Qu.: 0 1st Qu.: 237936 1st Qu.:0.0000 1st Qu.: 0.000
Median : 0 Median : 0 Median : 695115 Median :0.0000 Median : 0.000
Mean :237308 Mean : 22425 Mean : 773717 Mean :0.1106 Mean : 1.099
3rd Qu.:473971 3rd Qu.: 0 3rd Qu.:1078927 3rd Qu.:0.0000 3rd Qu.: 0.000
Max. :999854 Max. :199667 Max. :6514921 Max. :1.0000 Max. :250.000
AMT_MIN_BAL_NMC_CHGS NO_OF_IW_CHQ_BNC_TXNS NO_OF_OW_CHQ_BNC_TXNS AVG_AMT_PER_ATM_TXN
Min. : 0.000 Min. :0.00000 Min. :0.0000 Min. : 0
1st Qu.: 0.000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.: 0
Median : 0.000 Median :0.00000 Median :0.0000 Median : 6000
Mean : 1.292 Mean :0.04275 Mean :0.0444 Mean : 7409
3rd Qu.: 0.000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:13500
Max. :170.000 Max. :2.00000 Max. :2.0000 Max. :25000
AVG_AMT_PER_CSH_WDL_TXN AVG_AMT_PER_CHQ_TXN AVG_AMT_PER_NET_TXN AVG_AMT_PER_MOB_TXN
Min. : 0 Min. : 0 Min. : 0 Min. : 0
1st Qu.: 1266 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0
Median :147095 Median : 8645 Median : 0 Median : 0
Mean :242237 Mean : 25093 Mean :179059 Mean : 20304
3rd Qu.:385000 3rd Qu.: 28605 3rd Qu.:257699 3rd Qu.: 0
Max. :999640 Max. :537842 Max. :999854 Max. :199667
FLG_HAS_NOMINEE FLG_HAS_OLD_LOAN random
Min. :0.0000 Min. :0.0000 Min. :0.0000114
1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.2481866
Median :1.0000 Median :0.0000 Median :0.5061214
Mean :0.9012 Mean :0.4929 Mean :0.5019330
3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.7535712
Max. :1.0000 Max. :1.0000 Max. :0.9999471
d1<-fulldata[,-c(1,4,6,7,10,11)]
summary(d1)
TARGET AGE BALANCE SCR HOLDING_PERIOD
Min. :0.0000 Min. :21.00 Min. : 0 Min. :100.0 Min. : 1.00
1st Qu.:0.0000 1st Qu.:30.00 1st Qu.: 64754 1st Qu.:227.0 1st Qu.: 7.00
Median :0.0000 Median :38.00 Median : 231676 Median :364.0 Median :15.00
Mean :0.1256 Mean :38.42 Mean : 511362 Mean :440.2 Mean :14.96
3rd Qu.:0.0000 3rd Qu.:46.00 3rd Qu.: 653877 3rd Qu.:644.0 3rd Qu.:22.00
Max. :1.0000 Max. :55.00 Max. :8360431 Max. :999.0 Max. :31.00
LEN_OF_RLTN_IN_MNTH NO_OF_L_CR_TXNS NO_OF_L_DR_TXNS TOT_NO_OF_L_TXNS NO_OF_BR_CSH_WDL_DR_TXNS
Min. : 29.0 Min. : 0.00 Min. : 0.000 Min. : 0.00 Min. : 0.000
1st Qu.: 79.0 1st Qu.: 6.00 1st Qu.: 2.000 1st Qu.: 9.00 1st Qu.: 1.000
Median :125.0 Median :10.00 Median : 5.000 Median : 14.00 Median : 1.000
Mean :125.2 Mean :12.35 Mean : 6.634 Mean : 18.98 Mean : 1.883
3rd Qu.:172.0 3rd Qu.:14.00 3rd Qu.: 7.000 3rd Qu.: 21.00 3rd Qu.: 2.000
Max. :221.0 Max. :75.00 Max. :74.000 Max. :149.00 Max. :15.000
NO_OF_ATM_DR_TXNS NO_OF_NET_DR_TXNS NO_OF_MOB_DR_TXNS NO_OF_CHQ_DR_TXNS FLG_HAS_CC
Min. : 0.000 Min. : 0.000 Min. : 0.0000 Min. : 0.000 Min. :0.0000
1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.:0.0000
Median : 1.000 Median : 0.000 Median : 0.0000 Median : 2.000 Median :0.0000
Mean : 1.029 Mean : 1.172 Mean : 0.4118 Mean : 2.138 Mean :0.3054
3rd Qu.: 1.000 3rd Qu.: 1.000 3rd Qu.: 0.0000 3rd Qu.: 4.000 3rd Qu.:1.0000
Max. :25.000 Max. :22.000 Max. :25.0000 Max. :15.000 Max. :1.0000
AMT_ATM_DR AMT_BR_CSH_WDL_DR AMT_CHQ_DR AMT_NET_DR AMT_MOB_DR
Min. : 0 Min. : 0 Min. : 0 Min. : 0 Min. : 0
1st Qu.: 0 1st Qu.: 2990 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0
Median : 6900 Median :340150 Median : 23840 Median : 0 Median : 0
Mean : 10990 Mean :378475 Mean : 124520 Mean :237308 Mean : 22425
3rd Qu.: 15800 3rd Qu.:674675 3rd Qu.: 72470 3rd Qu.:473971 3rd Qu.: 0
Max. :199300 Max. :999930 Max. :4928640 Max. :999854 Max. :199667
AMT_L_DR FLG_HAS_ANY_CHGS AMT_OTH_BK_ATM_USG_CHGS AMT_MIN_BAL_NMC_CHGS
Min. : 0 Min. :0.0000 Min. : 0.000 Min. : 0.000
1st Qu.: 237936 1st Qu.:0.0000 1st Qu.: 0.000 1st Qu.: 0.000
Median : 695115 Median :0.0000 Median : 0.000 Median : 0.000
Mean : 773717 Mean :0.1106 Mean : 1.099 Mean : 1.292
3rd Qu.:1078927 3rd Qu.:0.0000 3rd Qu.: 0.000 3rd Qu.: 0.000
Max. :6514921 Max. :1.0000 Max. :250.000 Max. :170.000
NO_OF_IW_CHQ_BNC_TXNS NO_OF_OW_CHQ_BNC_TXNS AVG_AMT_PER_ATM_TXN AVG_AMT_PER_CSH_WDL_TXN
Min. :0.00000 Min. :0.0000 Min. : 0 Min. : 0
1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.: 0 1st Qu.: 1266
Median :0.00000 Median :0.0000 Median : 6000 Median :147095
Mean :0.04275 Mean :0.0444 Mean : 7409 Mean :242237
3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:13500 3rd Qu.:385000
Max. :2.00000 Max. :2.0000 Max. :25000 Max. :999640
AVG_AMT_PER_CHQ_TXN AVG_AMT_PER_NET_TXN AVG_AMT_PER_MOB_TXN FLG_HAS_NOMINEE FLG_HAS_OLD_LOAN
Min. : 0 Min. : 0 Min. : 0 Min. :0.0000 Min. :0.0000
1st Qu.: 0 1st Qu.: 0 1st Qu.: 0 1st Qu.:1.0000 1st Qu.:0.0000
Median : 8645 Median : 0 Median : 0 Median :1.0000 Median :0.0000
Mean : 25093 Mean :179059 Mean : 20304 Mean :0.9012 Mean :0.4929
3rd Qu.: 28605 3rd Qu.:257699 3rd Qu.: 0 3rd Qu.:1.0000 3rd Qu.:1.0000
Max. :537842 Max. :999854 Max. :199667 Max. :1.0000 Max. :1.0000
random
Min. :0.0000114
1st Qu.:0.2481866
Median :0.5061214
Mean :0.5019330
3rd Qu.:0.7535712
Max. :0.9999471
str(d1)
'data.frame': 20000 obs. of 34 variables:
$ TARGET : int 0 0 0 0 0 0 0 0 0 0 ...
$ AGE : int 27 47 40 53 36 42 30 53 42 30 ...
$ BALANCE : num 3384 287489 18217 71720 1671623 ...
$ SCR : int 776 324 603 196 167 493 479 562 105 170 ...
$ HOLDING_PERIOD : int 30 28 2 13 24 26 14 25 15 13 ...
$ LEN_OF_RLTN_IN_MNTH : int 146 104 61 107 185 192 177 99 88 111 ...
$ NO_OF_L_CR_TXNS : int 7 8 10 36 20 5 6 14 18 14 ...
$ NO_OF_L_DR_TXNS : int 3 2 5 14 1 2 6 3 14 8 ...
$ TOT_NO_OF_L_TXNS : int 10 10 15 50 21 7 12 17 32 22 ...
$ NO_OF_BR_CSH_WDL_DR_TXNS: int 0 0 1 4 1 1 0 3 6 3 ...
$ NO_OF_ATM_DR_TXNS : int 1 1 1 2 0 1 1 0 2 1 ...
$ NO_OF_NET_DR_TXNS : int 2 1 1 3 0 0 1 0 4 0 ...
$ NO_OF_MOB_DR_TXNS : int 0 0 0 1 0 0 0 0 1 0 ...
$ NO_OF_CHQ_DR_TXNS : int 0 0 2 4 0 0 4 0 1 4 ...
$ FLG_HAS_CC : int 0 0 0 0 0 1 0 0 1 0 ...
$ AMT_ATM_DR : int 13100 6600 11200 26100 0 18500 6200 0 35400 18000 ...
$ AMT_BR_CSH_WDL_DR : int 0 0 561120 673590 808480 379310 0 945160 198430 869880 ...
$ AMT_CHQ_DR : int 0 0 49320 60780 0 0 10580 0 51490 32610 ...
$ AMT_NET_DR : num 973557 799813 997570 741506 0 ...
$ AMT_MOB_DR : int 0 0 0 71388 0 0 0 0 170332 0 ...
$ AMT_L_DR : num 986657 806413 1619210 1573364 808480 ...
$ FLG_HAS_ANY_CHGS : int 0 1 1 0 0 0 1 0 0 0 ...
$ AMT_OTH_BK_ATM_USG_CHGS : int 0 0 0 0 0 0 0 0 0 0 ...
$ AMT_MIN_BAL_NMC_CHGS : int 0 0 0 0 0 0 0 0 0 0 ...
$ NO_OF_IW_CHQ_BNC_TXNS : int 0 0 0 0 0 0 0 0 0 0 ...
$ NO_OF_OW_CHQ_BNC_TXNS : int 0 0 1 0 0 0 0 0 0 0 ...
$ AVG_AMT_PER_ATM_TXN : num 13100 6600 11200 13050 0 ...
$ AVG_AMT_PER_CSH_WDL_TXN : num 0 0 561120 168398 808480 ...
$ AVG_AMT_PER_CHQ_TXN : num 0 0 24660 15195 0 ...
$ AVG_AMT_PER_NET_TXN : num 486779 799813 997570 247169 0 ...
$ AVG_AMT_PER_MOB_TXN : num 0 0 0 71388 0 ...
$ FLG_HAS_NOMINEE : int 1 1 1 1 1 1 0 1 1 0 ...
$ FLG_HAS_OLD_LOAN : int 1 0 1 0 0 1 1 1 1 0 ...
$ random : num 1.14e-05 1.11e-04 1.20e-04 1.37e-04 1.74e-04 ...
d1$MALE<-ifelse(fulldata$GENDER=='M',1,0)
d1$FEMALE<-ifelse(fulldata$GENDER=='F',1,0)
trainInd<-createDataPartition(d1$TARGET,p=.6,list=FALSE)
train<-d1[trainInd,]
test<-d1[-trainInd,]
nrow(train)
[1] 12000
nrow(test)
[1] 8000
RF1 <- randomForest(TARGET ~ ., data = train,
ntree=101, mtry = 24, nodesize = 30,
importance=TRUE)
The response has five or fewer unique values. Are you sure you want to do regression?
RF1
Call:
randomForest(formula = TARGET ~ ., data = train, ntree = 101, mtry = 24, nodesize = 30, importance = TRUE)
Type of random forest: regression
Number of trees: 101
No. of variables tried at each split: 24
Mean of squared residuals: 0.06246178
% Var explained: 43.09
RF1$err.rate
NULL
impVar <- round(randomForest::importance(RF1), 2)
impVar[order(impVar[,2], decreasing=TRUE),]
%IncMSE IncNodePurity
BALANCE 39.35 78.95
SCR 49.52 75.29
LEN_OF_RLTN_IN_MNTH 29.34 60.98
HOLDING_PERIOD 32.40 48.40
AMT_L_DR 19.27 46.72
AGE 25.27 44.59
NO_OF_L_CR_TXNS 17.82 43.17
TOT_NO_OF_L_TXNS 16.07 42.13
AMT_BR_CSH_WDL_DR 17.34 41.09
AVG_AMT_PER_CSH_WDL_TXN 16.76 36.53
AVG_AMT_PER_ATM_TXN 12.10 32.98
AMT_ATM_DR 14.68 30.75
AVG_AMT_PER_CHQ_TXN 16.00 29.09
AMT_CHQ_DR 19.29 27.93
random 0.38 27.86
AVG_AMT_PER_NET_TXN 13.27 24.81
AMT_NET_DR 13.48 23.99
NO_OF_L_DR_TXNS 13.21 23.54
AVG_AMT_PER_MOB_TXN 9.32 15.94
NO_OF_BR_CSH_WDL_DR_TXNS 14.09 15.05
AMT_MOB_DR 10.42 13.73
NO_OF_ATM_DR_TXNS 11.14 12.70
FLG_HAS_CC 21.49 11.99
NO_OF_CHQ_DR_TXNS 6.68 10.86
NO_OF_IW_CHQ_BNC_TXNS 7.63 5.17
FLG_HAS_ANY_CHGS 9.59 4.29
FEMALE 4.43 4.11
FLG_HAS_OLD_LOAN 8.25 3.54
NO_OF_OW_CHQ_BNC_TXNS 7.44 3.42
NO_OF_NET_DR_TXNS 4.57 3.36
FLG_HAS_NOMINEE 8.10 3.06
MALE 3.61 2.42
NO_OF_MOB_DR_TXNS 6.60 1.48
AMT_MIN_BAL_NMC_CHGS 1.31 0.39
AMT_OTH_BK_ATM_USG_CHGS 0.39 0.20
tRF <- tuneRF(x = d1[,-c(1)],
y=as.factor(d1$TARGET),
mtryStart = 6,
ntreeTry=101,
stepFactor = 2,
improve = 0.001,
trace=TRUE,
plot = TRUE,
doBest = TRUE,
nodesize = 150,
importance=FALSE
)
mtry = 6 OOB error = 12.23%
Searching left ...
mtry = 3 OOB error = 12.51%
-0.02289452 0.001
Searching right ...
mtry = 12 OOB error = 12.04%
0.01553557 0.001
mtry = 24 OOB error = 11.9%
0.01121262 0.001
mtry = 35 OOB error = 11.89%
0.001259975 0.001

tRF$importance
MeanDecreaseGini
AGE 51.15217871
BALANCE 105.85473208
SCR 111.28328187
HOLDING_PERIOD 84.71283598
LEN_OF_RLTN_IN_MNTH 54.48171361
NO_OF_L_CR_TXNS 90.12853969
NO_OF_L_DR_TXNS 62.26459737
TOT_NO_OF_L_TXNS 71.31838314
NO_OF_BR_CSH_WDL_DR_TXNS 13.86315737
NO_OF_ATM_DR_TXNS 23.41492708
NO_OF_NET_DR_TXNS 2.83366850
NO_OF_MOB_DR_TXNS 1.50919255
NO_OF_CHQ_DR_TXNS 9.50798669
FLG_HAS_CC 37.54258242
AMT_ATM_DR 36.15623700
AMT_BR_CSH_WDL_DR 47.34324876
AMT_CHQ_DR 30.46642054
AMT_NET_DR 20.35351794
AMT_MOB_DR 12.07468864
AMT_L_DR 58.40845407
FLG_HAS_ANY_CHGS 3.13020658
AMT_OTH_BK_ATM_USG_CHGS 0.02871932
AMT_MIN_BAL_NMC_CHGS 0.36726935
NO_OF_IW_CHQ_BNC_TXNS 7.02775670
NO_OF_OW_CHQ_BNC_TXNS 3.74806518
AVG_AMT_PER_ATM_TXN 34.10274749
AVG_AMT_PER_CSH_WDL_TXN 35.37585185
AVG_AMT_PER_CHQ_TXN 32.94370566
AVG_AMT_PER_NET_TXN 24.80776069
AVG_AMT_PER_MOB_TXN 21.97662447
FLG_HAS_NOMINEE 2.05622171
FLG_HAS_OLD_LOAN 1.80081032
random 16.07435530
MALE 2.12797638
FEMALE 4.21555805
train$predict.class <- predict(tRF, train, type="class")
train$predict.score <- predict(tRF, train, type="prob")
head(train)
class(train$predict.score)
[1] "matrix" "votes"
# decile <- function(x){
# deciles <- vector(length=10)
# for (i in seq(0.1,1,.1)){
# deciles[i*10] <- quantile(x, i, na.rm=T)
# }
# return (
# ifelse(x<deciles[1], 1,
# ifelse(x<deciles[2], 2,
# ifelse(x<deciles[3], 3,
# ifelse(x<deciles[4], 4,
# ifelse(x<deciles[5], 5,
# ifelse(x<deciles[6], 6,
# ifelse(x<deciles[7], 7,
# ifelse(x<deciles[8], 8,
# ifelse(x<deciles[9], 9, 10
# ))))))))))
# }
train$deciles <- decile(train$predict.score[,2])
tmp_DT = data.table(train)
# rank <- tmp_DT[, list(
# cnt = length(TARGET),
# cnt_resp = sum(TARGET),
# cnt_non_resp = sum(TARGET == 0)) ,
# by=deciles][order(-deciles)]
rank$rrate <- round (rank$cnt_resp / rank$cnt,2);
rank$cum_resp <- cumsum(rank$cnt_resp)
rank$cum_non_resp <- cumsum(rank$cnt_non_resp)
rank$cum_rel_resp <- round(rank$cum_resp / sum(rank$cnt_resp),2);
rank$cum_rel_non_resp <- round(rank$cum_non_resp / sum(rank$cnt_non_resp),2);
rank$ks <- abs(rank$cum_rel_resp - rank$cum_rel_non_resp);
rank$rrate <- percent(rank$rrate)
rank$cum_rel_resp <- percent(rank$cum_rel_resp)
rank$cum_rel_non_resp <- percent(rank$cum_rel_non_resp)
rank$ks
[1] 0.52 0.73 0.73 0.64 0.54 0.42 0.30 0.00
train_rank<-as.data.frame(rank)
kable(train_rank)
10 |
1209 |
841 |
368 |
70% |
841 |
368 |
56% |
4% |
0.52 |
9 |
1199 |
431 |
768 |
36% |
1272 |
1136 |
84% |
11% |
0.73 |
8 |
1311 |
152 |
1159 |
12% |
1424 |
2295 |
95% |
22% |
0.73 |
7 |
1304 |
56 |
1248 |
4% |
1480 |
3543 |
98% |
34% |
0.64 |
6 |
1236 |
18 |
1218 |
1% |
1498 |
4761 |
99% |
45% |
0.54 |
5 |
1359 |
5 |
1354 |
0% |
1503 |
6115 |
100% |
58% |
0.42 |
4 |
1237 |
3 |
1234 |
0% |
1506 |
7349 |
100% |
70% |
0.30 |
3 |
3145 |
0 |
3145 |
0% |
1506 |
10494 |
100% |
100% |
0.00 |
sum(train$TARGET) / nrow(train)
[1] 0.1255
pred <- prediction(train$predict.score[,2], train$TARGET)
perf <- performance(pred, "tpr", "fpr")
plot(perf)

KS <- max(attr(perf, 'y.values')[[1]]-attr(perf, 'x.values')[[1]])
KS
[1] 0.7492918
auc <- performance(pred,"auc");
auc <- as.numeric(auc@y.values)
auc
[1] 0.9436159
gini = ineq(train$predict.score[,2], type="Gini")
gini
[1] 0.7866774
with(train, table(TARGET, predict.class))
predict.class
TARGET 0 1
0 10488 6
1 1363 143
test$predict.class <- predict(tRF, test, type="class")
test$predict.score <- predict(tRF, test, type="prob")
test$deciles <- decile(test$predict.score[,2])
tmp_DT = data.table(test)
h_rank <- tmp_DT[, list(
cnt = length(TARGET),
cnt_resp = sum(TARGET),
cnt_non_resp = sum(TARGET == 0)) ,
by=deciles][order(-deciles)]
h_rank$rrate <- round (h_rank$cnt_resp / h_rank$cnt,2);
h_rank$cum_resp <- cumsum(h_rank$cnt_resp)
h_rank$cum_non_resp <- cumsum(h_rank$cnt_non_resp)
h_rank$cum_rel_resp <- round(h_rank$cum_resp / sum(h_rank$cnt_resp),2);
h_rank$cum_rel_non_resp <- round(h_rank$cum_non_resp / sum(h_rank$cnt_non_resp),2);
h_rank$ks <- abs(h_rank$cum_rel_resp - h_rank$cum_rel_non_resp);
h_rank$rrate <- percent(h_rank$rrate)
h_rank$cum_rel_resp <- percent(h_rank$cum_rel_resp)
h_rank$cum_rel_non_resp <- percent(h_rank$cum_rel_non_resp)
test_rank<-as.data.frame(h_rank)
kable(test_rank)
10 |
807 |
536 |
271 |
66% |
536 |
271 |
53% |
4% |
0.49 |
9 |
800 |
291 |
509 |
36% |
827 |
780 |
82% |
11% |
0.71 |
8 |
853 |
110 |
743 |
13% |
937 |
1523 |
93% |
22% |
0.71 |
7 |
864 |
50 |
814 |
6% |
987 |
2337 |
98% |
33% |
0.65 |
6 |
809 |
16 |
793 |
2% |
1003 |
3130 |
100% |
45% |
0.55 |
5 |
985 |
3 |
982 |
0% |
1006 |
4112 |
100% |
59% |
0.41 |
4 |
863 |
0 |
863 |
0% |
1006 |
4975 |
100% |
71% |
0.29 |
3 |
2019 |
0 |
2019 |
0% |
1006 |
6994 |
100% |
100% |
0.00 |
LS0tDQp0aXRsZTogIkRhdGEgTWluaW5nIC0gUmFuZG9tRm9yZXN0IG1vZGVsIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KYGBge3IsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQpsaWJyYXJ5KHJhbmRvbUZvcmVzdCkNCmxpYnJhcnkocnBhcnQpDQpsaWJyYXJ5KHJlYWRyKQ0KbGlicmFyeShjYXJldCkNCmxpYnJhcnkoZGF0YS50YWJsZSkNCmxpYnJhcnkoc2NhbGVzKQ0KbGlicmFyeShST0NSKQ0KbGlicmFyeShpbmVxKQ0KbGlicmFyeShrbml0cikNCmBgYA0KDQoNCmBgYHtyfQ0KDQpmdWxsZGF0YSA8LSBhcy5kYXRhLmZyYW1lKHJlYWRfY3N2KCJEOi9QRyBCdXNpbmVzcyBBbmFseXRpY3MvRE0vR3JvdXAgQXNzaWdubWVudC9QTF9YU0VMTC5jc3YiKSkNCmhlYWQoZnVsbGRhdGEpDQpzdW1tYXJ5KGZ1bGxkYXRhKQ0KDQpgYGANCg0KDQpgYGB7cn0NCmQxPC1mdWxsZGF0YVssLWMoMSw0LDYsNywxMCwxMSldDQpzdW1tYXJ5KGQxKQ0Kc3RyKGQxKQ0KYGBgDQoNCg0KYGBge3J9DQpkMSRNQUxFPC1pZmVsc2UoZnVsbGRhdGEkR0VOREVSPT0nTScsMSwwKQ0KZDEkRkVNQUxFPC1pZmVsc2UoZnVsbGRhdGEkR0VOREVSPT0nRicsMSwwKQ0KYGBgDQoNCg0KYGBge3J9DQp0cmFpbkluZDwtY3JlYXRlRGF0YVBhcnRpdGlvbihkMSRUQVJHRVQscD0uNixsaXN0PUZBTFNFKQ0KDQp0cmFpbjwtZDFbdHJhaW5JbmQsXQ0KdGVzdDwtZDFbLXRyYWluSW5kLF0NCm5yb3codHJhaW4pDQpucm93KHRlc3QpDQpgYGANCg0KDQpgYGB7cn0NClJGMSA8LSByYW5kb21Gb3Jlc3QoVEFSR0VUIH4gLiwgZGF0YSA9IHRyYWluLCANCiAgICAgICAgICAgICAgICAgICBudHJlZT0xMDEsIG10cnkgPSAyNCwgbm9kZXNpemUgPSAzMCwNCiAgICAgICAgICAgICAgICAgICBpbXBvcnRhbmNlPVRSVUUpDQoNCg0KUkYxDQpgYGANCg0KDQpgYGB7cn0NClJGMSRlcnIucmF0ZQ0KYGBgDQoNCg0KYGBge3J9DQppbXBWYXIgPC0gcm91bmQocmFuZG9tRm9yZXN0OjppbXBvcnRhbmNlKFJGMSksIDIpDQppbXBWYXJbb3JkZXIoaW1wVmFyWywyXSwgZGVjcmVhc2luZz1UUlVFKSxdDQpgYGANCg0KDQpgYGB7cn0NCnRSRiA8LSB0dW5lUkYoeCA9IGQxWywtYygxKV0sIA0KICAgICAgICAgICAgICB5PWFzLmZhY3RvcihkMSRUQVJHRVQpLA0KICAgICAgICAgICAgICBtdHJ5U3RhcnQgPSA2LCANCiAgICAgICAgICAgICAgbnRyZWVUcnk9MTAxLCANCiAgICAgICAgICAgICAgc3RlcEZhY3RvciA9IDIsIA0KICAgICAgICAgICAgICBpbXByb3ZlID0gMC4wMDEsIA0KICAgICAgICAgICAgICB0cmFjZT1UUlVFLCANCiAgICAgICAgICAgICAgcGxvdCA9IFRSVUUsDQogICAgICAgICAgICAgIGRvQmVzdCA9IFRSVUUsDQogICAgICAgICAgICAgIG5vZGVzaXplID0gMTUwLCANCiAgICAgICAgICAgICAgaW1wb3J0YW5jZT1GQUxTRQ0KKQ0KDQp0UkYkaW1wb3J0YW5jZQ0KYGBgDQoNCg0KYGBge3J9DQp0cmFpbiRwcmVkaWN0LmNsYXNzIDwtIHByZWRpY3QodFJGLCB0cmFpbiwgdHlwZT0iY2xhc3MiKQ0KdHJhaW4kcHJlZGljdC5zY29yZSA8LSBwcmVkaWN0KHRSRiwgdHJhaW4sIHR5cGU9InByb2IiKQ0KaGVhZCh0cmFpbikNCmNsYXNzKHRyYWluJHByZWRpY3Quc2NvcmUpDQpgYGANCg0KDQpgYGB7cn0NCiMgZGVjaWxlIDwtIGZ1bmN0aW9uKHgpew0KIyAgIGRlY2lsZXMgPC0gdmVjdG9yKGxlbmd0aD0xMCkNCiMgICBmb3IgKGkgaW4gc2VxKDAuMSwxLC4xKSl7DQojICAgICBkZWNpbGVzW2kqMTBdIDwtIHF1YW50aWxlKHgsIGksIG5hLnJtPVQpDQojICAgfQ0KIyAgIHJldHVybiAoDQojICAgICBpZmVsc2UoeDxkZWNpbGVzWzFdLCAxLA0KIyAgICAgICAgICAgIGlmZWxzZSh4PGRlY2lsZXNbMl0sIDIsDQojICAgICAgICAgICAgICAgICAgIGlmZWxzZSh4PGRlY2lsZXNbM10sIDMsDQojICAgICAgICAgICAgICAgICAgICAgICAgICBpZmVsc2UoeDxkZWNpbGVzWzRdLCA0LA0KIyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmZWxzZSh4PGRlY2lsZXNbNV0sIDUsDQojICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmZWxzZSh4PGRlY2lsZXNbNl0sIDYsDQojICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBpZmVsc2UoeDxkZWNpbGVzWzddLCA3LA0KIyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmZWxzZSh4PGRlY2lsZXNbOF0sIDgsDQojICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmZWxzZSh4PGRlY2lsZXNbOV0sIDksIDEwDQojICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICkpKSkpKSkpKSkNCiMgfQ0KdHJhaW4kZGVjaWxlcyA8LSBkZWNpbGUodHJhaW4kcHJlZGljdC5zY29yZVssMl0pDQpgYGANCg0KDQpgYGB7cn0NCnRtcF9EVCA9IGRhdGEudGFibGUodHJhaW4pDQojIHJhbmsgPC0gdG1wX0RUWywgbGlzdCgNCiMgICBjbnQgPSBsZW5ndGgoVEFSR0VUKSwgDQojICAgY250X3Jlc3AgPSBzdW0oVEFSR0VUKSwgDQojICAgY250X25vbl9yZXNwID0gc3VtKFRBUkdFVCA9PSAwKSkgLCANCiMgICBieT1kZWNpbGVzXVtvcmRlcigtZGVjaWxlcyldDQpyYW5rJHJyYXRlIDwtIHJvdW5kIChyYW5rJGNudF9yZXNwIC8gcmFuayRjbnQsMik7DQpyYW5rJGN1bV9yZXNwIDwtIGN1bXN1bShyYW5rJGNudF9yZXNwKQ0KcmFuayRjdW1fbm9uX3Jlc3AgPC0gY3Vtc3VtKHJhbmskY250X25vbl9yZXNwKQ0KcmFuayRjdW1fcmVsX3Jlc3AgPC0gcm91bmQocmFuayRjdW1fcmVzcCAvIHN1bShyYW5rJGNudF9yZXNwKSwyKTsNCnJhbmskY3VtX3JlbF9ub25fcmVzcCA8LSByb3VuZChyYW5rJGN1bV9ub25fcmVzcCAvIHN1bShyYW5rJGNudF9ub25fcmVzcCksMik7DQpyYW5rJGtzIDwtIGFicyhyYW5rJGN1bV9yZWxfcmVzcCAtIHJhbmskY3VtX3JlbF9ub25fcmVzcCk7DQpgYGANCg0KYGBge3J9DQpyYW5rJHJyYXRlIDwtIHBlcmNlbnQocmFuayRycmF0ZSkNCnJhbmskY3VtX3JlbF9yZXNwIDwtIHBlcmNlbnQocmFuayRjdW1fcmVsX3Jlc3ApDQpyYW5rJGN1bV9yZWxfbm9uX3Jlc3AgPC0gcGVyY2VudChyYW5rJGN1bV9yZWxfbm9uX3Jlc3ApDQpyYW5rJGtzDQpgYGANCg0KYGBge3J9DQp0cmFpbl9yYW5rPC1hcy5kYXRhLmZyYW1lKHJhbmspDQprYWJsZSh0cmFpbl9yYW5rKQ0KYGBgDQoNCmBgYHtyfQ0Kc3VtKHRyYWluJFRBUkdFVCkgLyBucm93KHRyYWluKQ0KYGBgDQoNCmBgYHtyfQ0KcHJlZCA8LSBwcmVkaWN0aW9uKHRyYWluJHByZWRpY3Quc2NvcmVbLDJdLCB0cmFpbiRUQVJHRVQpDQpwZXJmIDwtIHBlcmZvcm1hbmNlKHByZWQsICJ0cHIiLCAiZnByIikNCnBsb3QocGVyZikNCktTIDwtIG1heChhdHRyKHBlcmYsICd5LnZhbHVlcycpW1sxXV0tYXR0cihwZXJmLCAneC52YWx1ZXMnKVtbMV1dKQ0KS1MNCmBgYA0KDQoNCmBgYHtyfQ0KYXVjIDwtIHBlcmZvcm1hbmNlKHByZWQsImF1YyIpOyANCmF1YyA8LSBhcy5udW1lcmljKGF1Y0B5LnZhbHVlcykNCmF1Yw0KYGBgDQoNCg0KYGBge3J9DQpnaW5pID0gaW5lcSh0cmFpbiRwcmVkaWN0LnNjb3JlWywyXSwgdHlwZT0iR2luaSIpDQpnaW5pDQpgYGANCg0KYGBge3J9DQp3aXRoKHRyYWluLCB0YWJsZShUQVJHRVQsIHByZWRpY3QuY2xhc3MpKQ0KYGBgDQoNCg0KYGBge3J9DQp0ZXN0JHByZWRpY3QuY2xhc3MgPC0gcHJlZGljdCh0UkYsIHRlc3QsIHR5cGU9ImNsYXNzIikNCnRlc3QkcHJlZGljdC5zY29yZSA8LSBwcmVkaWN0KHRSRiwgdGVzdCwgdHlwZT0icHJvYiIpDQoNCnRlc3QkZGVjaWxlcyA8LSBkZWNpbGUodGVzdCRwcmVkaWN0LnNjb3JlWywyXSkNCg0KdG1wX0RUID0gZGF0YS50YWJsZSh0ZXN0KQ0KaF9yYW5rIDwtIHRtcF9EVFssIGxpc3QoDQogIGNudCA9IGxlbmd0aChUQVJHRVQpLCANCiAgY250X3Jlc3AgPSBzdW0oVEFSR0VUKSwgDQogIGNudF9ub25fcmVzcCA9IHN1bShUQVJHRVQgPT0gMCkpICwgDQogIGJ5PWRlY2lsZXNdW29yZGVyKC1kZWNpbGVzKV0NCmhfcmFuayRycmF0ZSA8LSByb3VuZCAoaF9yYW5rJGNudF9yZXNwIC8gaF9yYW5rJGNudCwyKTsNCmhfcmFuayRjdW1fcmVzcCA8LSBjdW1zdW0oaF9yYW5rJGNudF9yZXNwKQ0KaF9yYW5rJGN1bV9ub25fcmVzcCA8LSBjdW1zdW0oaF9yYW5rJGNudF9ub25fcmVzcCkNCmhfcmFuayRjdW1fcmVsX3Jlc3AgPC0gcm91bmQoaF9yYW5rJGN1bV9yZXNwIC8gc3VtKGhfcmFuayRjbnRfcmVzcCksMik7DQpoX3JhbmskY3VtX3JlbF9ub25fcmVzcCA8LSByb3VuZChoX3JhbmskY3VtX25vbl9yZXNwIC8gc3VtKGhfcmFuayRjbnRfbm9uX3Jlc3ApLDIpOw0KaF9yYW5rJGtzIDwtIGFicyhoX3JhbmskY3VtX3JlbF9yZXNwIC0gaF9yYW5rJGN1bV9yZWxfbm9uX3Jlc3ApOw0KDQpoX3JhbmskcnJhdGUgPC0gcGVyY2VudChoX3JhbmskcnJhdGUpDQpoX3JhbmskY3VtX3JlbF9yZXNwIDwtIHBlcmNlbnQoaF9yYW5rJGN1bV9yZWxfcmVzcCkNCmhfcmFuayRjdW1fcmVsX25vbl9yZXNwIDwtIHBlcmNlbnQoaF9yYW5rJGN1bV9yZWxfbm9uX3Jlc3ApDQpgYGANCg0KYGBge3J9DQp0ZXN0X3Jhbms8LWFzLmRhdGEuZnJhbWUoaF9yYW5rKQ0Ka2FibGUodGVzdF9yYW5rKQ0KYGBgDQoNCg0K