Assignment 15
mydata <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\4 Logistic\\Assignment\\bank_data.csv")
colnames(mydata)
## [1] "age" "default" "balance"
## [4] "housing" "loan" "duration"
## [7] "campaign" "pdays" "previous"
## [10] "poutfailure" "poutother" "poutsuccess"
## [13] "poutunknown" "con_cellular" "con_telephone"
## [16] "con_unknown" "divorced" "married"
## [19] "single" "joadmin." "joblue.collar"
## [22] "joentrepreneur" "johousemaid" "jomanagement"
## [25] "joretired" "joself.employed" "joservices"
## [28] "jostudent" "jotechnician" "jounemployed"
## [31] "jounknown" "y"
summary(mydata)
## age default balance housing
## Min. :18.00 Min. :0.00000 Min. : -8019 Min. :0.0000
## 1st Qu.:33.00 1st Qu.:0.00000 1st Qu.: 72 1st Qu.:0.0000
## Median :39.00 Median :0.00000 Median : 448 Median :1.0000
## Mean :40.94 Mean :0.01803 Mean : 1362 Mean :0.5558
## 3rd Qu.:48.00 3rd Qu.:0.00000 3rd Qu.: 1428 3rd Qu.:1.0000
## Max. :95.00 Max. :1.00000 Max. :102127 Max. :1.0000
## loan duration campaign pdays
## Min. :0.0000 Min. : 0.0 Min. : 1.000 Min. : -1.0
## 1st Qu.:0.0000 1st Qu.: 103.0 1st Qu.: 1.000 1st Qu.: -1.0
## Median :0.0000 Median : 180.0 Median : 2.000 Median : -1.0
## Mean :0.1602 Mean : 258.2 Mean : 2.764 Mean : 40.2
## 3rd Qu.:0.0000 3rd Qu.: 319.0 3rd Qu.: 3.000 3rd Qu.: -1.0
## Max. :1.0000 Max. :4918.0 Max. :63.000 Max. :871.0
## previous poutfailure poutother poutsuccess
## Min. : 0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.: 0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median : 0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean : 0.5803 Mean :0.1084 Mean :0.0407 Mean :0.03342
## 3rd Qu.: 0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :275.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## poutunknown con_cellular con_telephone con_unknown
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.000
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.000
## Median :1.0000 Median :1.0000 Median :0.00000 Median :0.000
## Mean :0.8175 Mean :0.6477 Mean :0.06428 Mean :0.288
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:1.000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.000
## divorced married single joadmin.
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.1152 Mean :0.6019 Mean :0.2829 Mean :0.1144
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## joblue.collar joentrepreneur johousemaid jomanagement
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.2153 Mean :0.03289 Mean :0.02743 Mean :0.2092
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## joretired joself.employed joservices jostudent
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.05008 Mean :0.03493 Mean :0.09188 Mean :0.02075
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## jotechnician jounemployed jounknown y
## Min. :0.000 Min. :0.00000 Min. :0.00000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000
## Median :0.000 Median :0.00000 Median :0.00000 Median :0.000
## Mean :0.168 Mean :0.02882 Mean :0.00637 Mean :0.117
## 3rd Qu.:0.000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000
## Max. :1.000 Max. :1.00000 Max. :1.00000 Max. :1.000
dim(mydata)
## [1] 45211 32
model <- glm(y~.,data = mydata,family = binomial)
summary(model)
##
## Call:
## glm(formula = y ~ ., family = binomial, data = mydata)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.6748 -0.4060 -0.2731 -0.1625 3.4400
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.855e+00 2.447e-01 -15.754 < 2e-16 ***
## age 3.567e-04 2.123e-03 0.168 0.866617
## default -1.822e-01 1.613e-01 -1.130 0.258582
## balance 1.872e-05 4.848e-06 3.860 0.000113 ***
## housing -7.756e-01 3.953e-02 -19.618 < 2e-16 ***
## loan -5.720e-01 5.811e-02 -9.843 < 2e-16 ***
## duration 4.048e-03 6.264e-05 64.619 < 2e-16 ***
## campaign -1.093e-01 9.905e-03 -11.037 < 2e-16 ***
## pdays 1.441e-04 3.006e-04 0.479 0.631590
## previous 1.042e-02 6.435e-03 1.620 0.105281
## poutfailure 2.563e-01 9.038e-02 2.835 0.004576 **
## poutother 4.985e-01 1.028e-01 4.849 1.24e-06 ***
## poutsuccess 2.565e+00 8.318e-02 30.836 < 2e-16 ***
## poutunknown NA NA NA NA
## con_cellular 1.166e+00 5.762e-02 20.232 < 2e-16 ***
## con_telephone 1.067e+00 8.825e-02 12.093 < 2e-16 ***
## con_unknown NA NA NA NA
## divorced -1.762e-01 6.530e-02 -2.697 0.006986 **
## married -3.394e-01 4.448e-02 -7.631 2.32e-14 ***
## single NA NA NA NA
## joadmin. 2.965e-01 2.263e-01 1.310 0.190041
## joblue.collar -1.671e-01 2.256e-01 -0.741 0.458886
## joentrepreneur -1.073e-01 2.463e-01 -0.436 0.663056
## johousemaid -2.776e-01 2.495e-01 -1.113 0.265815
## jomanagement 2.380e-01 2.232e-01 1.067 0.286143
## joretired 6.315e-01 2.301e-01 2.745 0.006052 **
## joself.employed 1.962e-02 2.392e-01 0.082 0.934620
## joservices -3.688e-02 2.301e-01 -0.160 0.872676
## jostudent 8.087e-01 2.395e-01 3.377 0.000734 ***
## jotechnician 6.383e-02 2.247e-01 0.284 0.776342
## jounemployed 8.167e-02 2.397e-01 0.341 0.733322
## jounknown NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 32631 on 45210 degrees of freedom
## Residual deviance: 22640 on 45183 degrees of freedom
## AIC: 22696
##
## Number of Fisher Scoring iterations: 6
mydata <- mydata[,-c(13,16,19,31)]
model2 <- glm(y~.,data = mydata)
summary(model2)
##
## Call:
## glm(formula = y ~ ., data = mydata)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.29345 -0.11582 -0.04883 0.01842 1.06743
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.755e-02 1.776e-02 -1.552 0.120766
## age 1.741e-04 1.570e-04 1.109 0.267383
## default -1.037e-02 9.753e-03 -1.063 0.287572
## balance 1.959e-06 4.303e-07 4.552 5.33e-06 ***
## housing -5.666e-02 2.844e-03 -19.925 < 2e-16 ***
## loan -3.314e-02 3.565e-03 -9.296 < 2e-16 ***
## duration 4.733e-04 5.038e-06 93.953 < 2e-16 ***
## campaign -2.083e-03 4.219e-04 -4.936 8.02e-07 ***
## pdays -2.589e-05 2.726e-05 -0.950 0.342170
## previous 1.213e-03 6.651e-04 1.824 0.068120 .
## poutfailure 2.984e-02 8.145e-03 3.664 0.000248 ***
## poutother 5.788e-02 9.544e-03 6.064 1.34e-09 ***
## poutsuccess 4.753e-01 8.896e-03 53.426 < 2e-16 ***
## con_cellular 5.555e-02 3.137e-03 17.705 < 2e-16 ***
## con_telephone 4.941e-02 5.830e-03 8.474 < 2e-16 ***
## divorced -1.531e-02 4.850e-03 -3.156 0.001601 **
## married -2.668e-02 3.305e-03 -8.074 7.00e-16 ***
## joadmin. 2.297e-02 1.672e-02 1.374 0.169376
## joblue.collar -4.980e-03 1.652e-02 -0.301 0.763053
## joentrepreneur -3.181e-03 1.774e-02 -0.179 0.857741
## johousemaid -1.559e-02 1.796e-02 -0.868 0.385371
## jomanagement 2.133e-02 1.649e-02 1.294 0.195809
## joretired 6.599e-02 1.730e-02 3.814 0.000137 ***
## joself.employed 2.451e-03 1.764e-02 0.139 0.889468
## joservices 1.654e-03 1.683e-02 0.098 0.921715
## jostudent 1.132e-01 1.876e-02 6.034 1.61e-09 ***
## jotechnician 5.941e-03 1.656e-02 0.359 0.719819
## jounemployed 1.554e-02 1.791e-02 0.868 0.385450
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.07511589)
##
## Null deviance: 4670.3 on 45210 degrees of freedom
## Residual deviance: 3394.0 on 45183 degrees of freedom
## AIC: 11294
##
## Number of Fisher Scoring iterations: 2
library(MASS)
#stepAIC(model)
FinalModel <- glm(y ~ balance + housing + loan + duration + campaign + (1/sqrt(previous)) + poutfailure + poutother + poutsuccess + con_cellular + con_telephone + divorced + married + joadmin. + joblue.collar + johousemaid + jomanagement + joretired + jostudent, family = binomial,data = mydata)
summary(FinalModel)
##
## Call:
## glm(formula = y ~ balance + housing + loan + duration + campaign +
## (1/sqrt(previous)) + poutfailure + poutother + poutsuccess +
## con_cellular + con_telephone + divorced + married + joadmin. +
## joblue.collar + johousemaid + jomanagement + joretired +
## jostudent, family = binomial, data = mydata)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.6659 -0.4061 -0.2734 -0.1628 3.4004
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.823e+00 7.525e-02 -50.801 < 2e-16 ***
## balance 1.903e-05 4.811e-06 3.954 7.67e-05 ***
## housing -7.763e-01 3.862e-02 -20.101 < 2e-16 ***
## loan -5.816e-01 5.782e-02 -10.058 < 2e-16 ***
## duration 4.047e-03 6.259e-05 64.649 < 2e-16 ***
## campaign -1.088e-01 9.881e-03 -11.011 < 2e-16 ***
## poutfailure 3.206e-01 5.432e-02 5.901 3.60e-09 ***
## poutother 5.715e-01 7.554e-02 7.565 3.87e-14 ***
## poutsuccess 2.625e+00 6.370e-02 41.219 < 2e-16 ***
## con_cellular 1.170e+00 5.753e-02 20.330 < 2e-16 ***
## con_telephone 1.074e+00 8.756e-02 12.267 < 2e-16 ***
## divorced -1.770e-01 6.116e-02 -2.895 0.003792 **
## married -3.403e-01 4.053e-02 -8.396 < 2e-16 ***
## joadmin. 2.743e-01 5.870e-02 4.674 2.96e-06 ***
## joblue.collar -1.897e-01 5.562e-02 -3.410 0.000649 ***
## johousemaid -3.006e-01 1.228e-01 -2.447 0.014395 *
## jomanagement 2.149e-01 4.694e-02 4.579 4.68e-06 ***
## joretired 6.150e-01 7.148e-02 8.604 < 2e-16 ***
## jostudent 7.789e-01 9.494e-02 8.204 2.32e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 32631 on 45210 degrees of freedom
## Residual deviance: 22648 on 45192 degrees of freedom
## AIC: 22686
##
## Number of Fisher Scoring iterations: 6
#vif(FinalModel)
exp(coef(FinalModel))
## (Intercept) balance housing loan duration
## 0.02186255 1.00001903 0.46012374 0.55901143 1.00405472
## campaign poutfailure poutother poutsuccess con_cellular
## 0.89690644 1.37794412 1.77086670 13.81123294 3.22091301
## con_telephone divorced married joadmin. joblue.collar
## 2.92712022 0.83774286 0.71154805 1.31565109 0.82721637
## johousemaid jomanagement joretired jostudent
## 0.74038523 1.23975840 1.84967292 2.17908489
prob <- predict(FinalModel,type=c("response"),mydata)
prob<-as.data.frame(prob)
final <- cbind(prob,mydata)
confusion <- table(prob > 0.5,mydata$y)
table(prob > 0.5)
##
## FALSE TRUE
## 42598 2613
confusion
##
## 0 1
## FALSE 39017 3581
## TRUE 905 1708
Accuracy <- sum(diag(confusion)/sum(confusion))
Accuracy
## [1] 0.9007764
#install.packages("ROCR")
library(ROCR)
## Warning: package 'ROCR' was built under R version 3.5.1
## Loading required package: gplots
## Warning: package 'gplots' was built under R version 3.5.1
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
rocrpred<-prediction(prob,mydata$y)
rocrperf<-performance(rocrpred,'tpr','fpr')
plot(rocrperf,colorize=T,text.adj=c(-0.2,1.0))
