## Load the Package
library(tidyverse)
library(xlsx)
library(VIF)
## Set the directory
getwd()
## [1] "C:/Users/SK/Desktop/SK/NUS EBA/Semester 3/Advanced Customer Analytics/Week 2"
setwd("C:/Users/SK/Desktop/SK/NUS EBA/Semester 3/Advanced Customer Analytics/Week 2")
## Read and Explore the Data
campaign <- read.csv("bank-additional.csv")
str(campaign)
## 'data.frame':    41188 obs. of  21 variables:
##  $ age           : int  56 57 37 40 56 45 59 41 24 25 ...
##  $ job           : Factor w/ 12 levels "admin.","blue-collar",..: 4 8 8 1 8 8 1 2 10 8 ...
##  $ marital       : Factor w/ 4 levels "divorced","married",..: 2 2 2 2 2 2 2 2 3 3 ...
##  $ education     : Factor w/ 8 levels "basic.4y","basic.6y",..: 1 4 4 2 4 3 6 8 6 4 ...
##  $ default       : Factor w/ 3 levels "no","unknown",..: 1 2 1 1 1 2 1 2 1 1 ...
##  $ housing       : Factor w/ 3 levels "no","unknown",..: 1 1 3 1 1 1 1 1 3 3 ...
##  $ loan          : Factor w/ 3 levels "no","unknown",..: 1 1 1 1 3 1 1 1 1 1 ...
##  $ contact       : Factor w/ 2 levels "cellular","telephone": 2 2 2 2 2 2 2 2 2 2 ...
##  $ month         : Factor w/ 10 levels "apr","aug","dec",..: 7 7 7 7 7 7 7 7 7 7 ...
##  $ day_of_week   : Factor w/ 5 levels "fri","mon","thu",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ duration      : int  261 149 226 151 307 198 139 217 380 50 ...
##  $ campaign      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ pdays         : int  999 999 999 999 999 999 999 999 999 999 ...
##  $ previous      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ poutcome      : Factor w/ 3 levels "failure","nonexistent",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ emp.var.rate  : num  1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 ...
##  $ cons.price.idx: num  94 94 94 94 94 ...
##  $ cons.conf.idx : num  -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 ...
##  $ euribor3m     : num  4.86 4.86 4.86 4.86 4.86 ...
##  $ nr.employed   : num  5191 5191 5191 5191 5191 ...
##  $ y             : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
summary(campaign)
##       age                 job            marital     
##  Min.   :17.00   admin.     :10422   divorced: 4612  
##  1st Qu.:32.00   blue-collar: 9254   married :24928  
##  Median :38.00   technician : 6743   single  :11568  
##  Mean   :40.02   services   : 3969   unknown :   80  
##  3rd Qu.:47.00   management : 2924                   
##  Max.   :98.00   retired    : 1720                   
##                  (Other)    : 6156                   
##                education        default         housing     
##  university.degree  :12168   no     :32588   no     :18622  
##  high.school        : 9515   unknown: 8597   unknown:  990  
##  basic.9y           : 6045   yes    :    3   yes    :21576  
##  professional.course: 5243                                  
##  basic.4y           : 4176                                  
##  basic.6y           : 2292                                  
##  (Other)            : 1749                                  
##       loan            contact          month       day_of_week
##  no     :33950   cellular :26144   may    :13769   fri:7827   
##  unknown:  990   telephone:15044   jul    : 7174   mon:8514   
##  yes    : 6248                     aug    : 6178   thu:8623   
##                                    jun    : 5318   tue:8090   
##                                    nov    : 4101   wed:8134   
##                                    apr    : 2632              
##                                    (Other): 2016              
##     duration         campaign          pdays          previous    
##  Min.   :   0.0   Min.   : 1.000   Min.   :  0.0   Min.   :0.000  
##  1st Qu.: 102.0   1st Qu.: 1.000   1st Qu.:999.0   1st Qu.:0.000  
##  Median : 180.0   Median : 2.000   Median :999.0   Median :0.000  
##  Mean   : 258.3   Mean   : 2.568   Mean   :962.5   Mean   :0.173  
##  3rd Qu.: 319.0   3rd Qu.: 3.000   3rd Qu.:999.0   3rd Qu.:0.000  
##  Max.   :4918.0   Max.   :56.000   Max.   :999.0   Max.   :7.000  
##                                                                   
##         poutcome      emp.var.rate      cons.price.idx  cons.conf.idx  
##  failure    : 4252   Min.   :-3.40000   Min.   :92.20   Min.   :-50.8  
##  nonexistent:35563   1st Qu.:-1.80000   1st Qu.:93.08   1st Qu.:-42.7  
##  success    : 1373   Median : 1.10000   Median :93.75   Median :-41.8  
##                      Mean   : 0.08189   Mean   :93.58   Mean   :-40.5  
##                      3rd Qu.: 1.40000   3rd Qu.:93.99   3rd Qu.:-36.4  
##                      Max.   : 1.40000   Max.   :94.77   Max.   :-26.9  
##                                                                        
##    euribor3m      nr.employed     y        
##  Min.   :0.634   Min.   :4964   no :36548  
##  1st Qu.:1.344   1st Qu.:5099   yes: 4640  
##  Median :4.857   Median :5191              
##  Mean   :3.621   Mean   :5167              
##  3rd Qu.:4.961   3rd Qu.:5228              
##  Max.   :5.045   Max.   :5228              
## 
head(campaign)
##   age       job marital   education default housing loan   contact month
## 1  56 housemaid married    basic.4y      no      no   no telephone   may
## 2  57  services married high.school unknown      no   no telephone   may
## 3  37  services married high.school      no     yes   no telephone   may
## 4  40    admin. married    basic.6y      no      no   no telephone   may
## 5  56  services married high.school      no      no  yes telephone   may
## 6  45  services married    basic.9y unknown      no   no telephone   may
##   day_of_week duration campaign pdays previous    poutcome emp.var.rate
## 1         mon      261        1   999        0 nonexistent          1.1
## 2         mon      149        1   999        0 nonexistent          1.1
## 3         mon      226        1   999        0 nonexistent          1.1
## 4         mon      151        1   999        0 nonexistent          1.1
## 5         mon      307        1   999        0 nonexistent          1.1
## 6         mon      198        1   999        0 nonexistent          1.1
##   cons.price.idx cons.conf.idx euribor3m nr.employed  y
## 1         93.994         -36.4     4.857        5191 no
## 2         93.994         -36.4     4.857        5191 no
## 3         93.994         -36.4     4.857        5191 no
## 4         93.994         -36.4     4.857        5191 no
## 5         93.994         -36.4     4.857        5191 no
## 6         93.994         -36.4     4.857        5191 no
## duration variable is dropped due to useful for prediction
## Explore the Categorical Variables
colnames(campaign)
##  [1] "age"            "job"            "marital"        "education"     
##  [5] "default"        "housing"        "loan"           "contact"       
##  [9] "month"          "day_of_week"    "duration"       "campaign"      
## [13] "pdays"          "previous"       "poutcome"       "emp.var.rate"  
## [17] "cons.price.idx" "cons.conf.idx"  "euribor3m"      "nr.employed"   
## [21] "y"
cat_var = c('job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day_of_week', 'poutcome', 'y')

for (col in cat_var) { 
  x <- table(campaign[,col])
  barplot(x, main = col)
}

## Marital, default, housing, loan, job and education have 'unknown' data which unable ignore
## Assume the job and education are closely related
table(campaign$job)
## 
##        admin.   blue-collar  entrepreneur     housemaid    management 
##         10422          9254          1456          1060          2924 
##       retired self-employed      services       student    technician 
##          1720          1421          3969           875          6743 
##    unemployed       unknown 
##          1014           330
table(campaign$education)
## 
##            basic.4y            basic.6y            basic.9y 
##                4176                2292                6045 
##         high.school          illiterate professional.course 
##                9515                  18                5243 
##   university.degree             unknown 
##               12168                1731
## if the age > 60, replace the job from 'unknown' to 'retired'
campaign$job[campaign$age >60 & campaign$job == 'unknown'] <- 'retired'
## if job is management, replace the education from 'unknown' to 'university.degree'
campaign$education[campaign$job == 'management' & campaign$education == 'unknown'] <- 'university.degree'
## if job is service, replace the education from 'unknown' to 'high.school'
campaign$education[campaign$job == 'service' & campaign$education == 'unknown'] <- 'high.school'
## if job is housemaid, replace the education from 'unknown' to 'basic.4y'
campaign$education[campaign$job == 'housemaid' & campaign$education == 'unknown'] <- 'basic.4y'
## if education is 'basic.4y', replace the job from 'unknown' to 'blue-collar'
campaign$job[campaign$education == 'basic.4y' & campaign$job == 'unknown'] <- 'blue-collar'
## if education is 'basic.6y', replace the job from 'unknown' to 'blue-collar'
campaign$job[campaign$education == 'basic.6y' & campaign$job == 'unknown'] <- 'blue-collar'
## if education is 'basic.9y', replace the job from 'unknown' to 'blue-collar'
campaign$job[campaign$education == 'basic.9y' & campaign$job == 'unknown'] <- 'blue-collar'
## if education is 'professional.course', replace the job from 'unknown' to 'blue-collar'
campaign$job[campaign$education == 'professional' & campaign$job == 'unknown'] <- 'technician'
table(campaign$job)
## 
##        admin.   blue-collar  entrepreneur     housemaid    management 
##         10422          9355          1456          1060          2924 
##       retired self-employed      services       student    technician 
##          1741          1421          3969           875          6743 
##    unemployed       unknown 
##          1014           208
## Check the cross tabulation between housing and marital
library(gmodels)
CrossTable(campaign$marital, campaign$housing)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  41188 
## 
##  
##                  | campaign$housing 
## campaign$marital |        no |   unknown |       yes | Row Total | 
## -----------------|-----------|-----------|-----------|-----------|
##         divorced |      2092 |       121 |      2399 |      4612 | 
##                  |     0.022 |     0.929 |     0.119 |           | 
##                  |     0.454 |     0.026 |     0.520 |     0.112 | 
##                  |     0.112 |     0.122 |     0.111 |           | 
##                  |     0.051 |     0.003 |     0.058 |           | 
## -----------------|-----------|-----------|-----------|-----------|
##          married |     11389 |       588 |     12951 |     24928 | 
##                  |     1.246 |     0.208 |     0.882 |           | 
##                  |     0.457 |     0.024 |     0.520 |     0.605 | 
##                  |     0.612 |     0.594 |     0.600 |           | 
##                  |     0.277 |     0.014 |     0.314 |           | 
## -----------------|-----------|-----------|-----------|-----------|
##           single |      5097 |       280 |      6191 |     11568 | 
##                  |     3.390 |     0.014 |     2.840 |           | 
##                  |     0.441 |     0.024 |     0.535 |     0.281 | 
##                  |     0.274 |     0.283 |     0.287 |           | 
##                  |     0.124 |     0.007 |     0.150 |           | 
## -----------------|-----------|-----------|-----------|-----------|
##          unknown |        44 |         1 |        35 |        80 | 
##                  |     1.695 |     0.443 |     1.138 |           | 
##                  |     0.550 |     0.012 |     0.438 |     0.002 | 
##                  |     0.002 |     0.001 |     0.002 |           | 
##                  |     0.001 |     0.000 |     0.001 |           | 
## -----------------|-----------|-----------|-----------|-----------|
##     Column Total |     18622 |       990 |     21576 |     41188 | 
##                  |     0.452 |     0.024 |     0.524 |           | 
## -----------------|-----------|-----------|-----------|-----------|
## 
## 
## Assume majority married will get housing
table(campaign$housing)
## 
##      no unknown     yes 
##   18622     990   21576
campaign$housing[campaign$marital == 'married' & campaign$housing == 'unknown'] <- 'yes'
table(campaign$housing)
## 
##      no unknown     yes 
##   18622     402   22164
## Split Train and test set by 80:20
## set intital seed
set.seed(123)
## Create a boolean flag to split data
library(caTools)
splitData = sample.split(campaign$y, SplitRatio = 0.8)
## Train set
train_set = campaign[splitData, ]
nrow(train_set)/nrow(campaign)
## [1] 0.7999903
## Test set
test_set = campaign[!splitData,]
nrow(test_set)/nrow(campaign)
## [1] 0.2000097
## Data Imbalance - 'no' (29238) subscribe is must higher than 'yes' (3712)
library(ROSE)
table(train_set$y)
## 
##    no   yes 
## 29238  3712
Train_balance <- ovun.sample(y ~ ., data = train_set, N=nrow(train_set), method = 'both', p = 0.5, seed=1)$data
table(Train_balance$y)
## 
##    no   yes 
## 16583 16367
## Create the Logistic Modelling
## Since only concentrate on customer features, campaign features need to exclude in the modelling
colnames(Train_balance)
##  [1] "age"            "job"            "marital"        "education"     
##  [5] "default"        "housing"        "loan"           "contact"       
##  [9] "month"          "day_of_week"    "duration"       "campaign"      
## [13] "pdays"          "previous"       "poutcome"       "emp.var.rate"  
## [17] "cons.price.idx" "cons.conf.idx"  "euribor3m"      "nr.employed"   
## [21] "y"
model1 <- glm(y ~ age+job+marital+education+default+loan+housing+contact+pdays+previous+poutcome, data = Train_balance , family = binomial)
summary(model1)
## 
## Call:
## glm(formula = y ~ age + job + marital + education + default + 
##     loan + housing + contact + pdays + previous + poutcome, family = binomial, 
##     data = Train_balance)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.9373  -1.0468  -0.5623   1.1193   2.0623  
## 
## Coefficients:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   9.181e-01  2.703e-01   3.397 0.000683 ***
## age                           9.395e-03  1.465e-03   6.413 1.43e-10 ***
## jobblue-collar               -2.783e-01  4.629e-02  -6.013 1.82e-09 ***
## jobentrepreneur              -2.017e-01  7.080e-02  -2.849 0.004389 ** 
## jobhousemaid                 -2.801e-01  8.516e-02  -3.289 0.001006 ** 
## jobmanagement                -1.529e-01  5.089e-02  -3.004 0.002663 ** 
## jobretired                    6.981e-01  7.003e-02   9.968  < 2e-16 ***
## jobself-employed             -2.145e-01  6.869e-02  -3.122 0.001796 ** 
## jobservices                  -1.850e-01  5.034e-02  -3.675 0.000238 ***
## jobstudent                    9.941e-01  8.055e-02  12.341  < 2e-16 ***
## jobtechnician                -1.121e-01  4.190e-02  -2.675 0.007466 ** 
## jobunemployed                 1.133e-01  7.839e-02   1.445 0.148454    
## jobunknown                   -1.246e-01  1.705e-01  -0.731 0.464945    
## maritalmarried                2.204e-01  4.080e-02   5.403 6.55e-08 ***
## maritalsingle                 4.308e-01  4.609e-02   9.346  < 2e-16 ***
## maritalunknown                3.915e-01  2.796e-01   1.400 0.161388    
## educationbasic.6y             1.792e-01  6.923e-02   2.589 0.009638 ** 
## educationbasic.9y             3.523e-02  5.559e-02   0.634 0.526228    
## educationhigh.school          6.176e-02  5.524e-02   1.118 0.263570    
## educationilliterate           2.106e+00  6.216e-01   3.389 0.000702 ***
## educationprofessional.course  7.066e-02  6.090e-02   1.160 0.245939    
## educationuniversity.degree    1.878e-01  5.610e-02   3.348 0.000815 ***
## educationunknown              2.234e-01  7.718e-02   2.894 0.003804 ** 
## defaultunknown               -6.295e-01  3.555e-02 -17.709  < 2e-16 ***
## defaultyes                   -1.060e+01  8.448e+01  -0.126 0.900120    
## loanunknown                   8.258e-02  9.719e-02   0.850 0.395500    
## loanyes                      -1.319e-01  3.392e-02  -3.887 0.000102 ***
## housingunknown               -3.080e-01  1.620e-01  -1.901 0.057344 .  
## housingyes                   -1.126e-02  2.471e-02  -0.456 0.648562    
## contacttelephone             -7.887e-01  2.815e-02 -28.021  < 2e-16 ***
## pdays                        -1.608e-03  2.161e-04  -7.438 1.02e-13 ***
## previous                      1.956e-01  6.389e-02   3.062 0.002197 ** 
## poutcomenonexistent           1.058e-01  8.195e-02   1.290 0.196890    
## poutcomesuccess               8.700e-01  2.174e-01   4.001 6.30e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 45677  on 32949  degrees of freedom
## Residual deviance: 39376  on 32916  degrees of freedom
## AIC: 39444
## 
## Number of Fisher Scoring iterations: 9
library(car)
vif(model1)
##                GVIF Df GVIF^(1/(2*Df))
## age        1.872105  1        1.368249
## job        4.905744 11        1.074968
## marital    1.417892  3        1.059922
## education  3.406711  7        1.091501
## default    1.088225  2        1.021362
## loan       1.601970  2        1.125029
## housing    1.632628  2        1.130373
## contact    1.058853  1        1.029006
## pdays      9.252287  1        3.041757
## previous   6.667708  1        2.582191
## poutcome  37.771617  2        2.479085
## poutcome is higher GVIF. Need to remove this variable
model2 <- glm(y ~ age+job+marital+education+default+loan+housing+contact+pdays+previous, data = Train_balance , family = binomial)
summary(model2)
## 
## Call:
## glm(formula = y ~ age + job + marital + education + default + 
##     loan + housing + contact + pdays + previous, family = binomial, 
##     data = Train_balance)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8501  -1.0467  -0.5619   1.1191   2.0631  
## 
## Coefficients:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   1.841e+00  1.269e-01  14.501  < 2e-16 ***
## age                           9.402e-03  1.464e-03   6.421 1.36e-10 ***
## jobblue-collar               -2.791e-01  4.628e-02  -6.031 1.63e-09 ***
## jobentrepreneur              -2.009e-01  7.075e-02  -2.840 0.004518 ** 
## jobhousemaid                 -2.794e-01  8.512e-02  -3.282 0.001031 ** 
## jobmanagement                -1.525e-01  5.088e-02  -2.997 0.002724 ** 
## jobretired                    7.007e-01  7.000e-02  10.010  < 2e-16 ***
## jobself-employed             -2.140e-01  6.868e-02  -3.116 0.001831 ** 
## jobservices                  -1.856e-01  5.034e-02  -3.686 0.000228 ***
## jobstudent                    9.919e-01  8.057e-02  12.312  < 2e-16 ***
## jobtechnician                -1.122e-01  4.191e-02  -2.676 0.007444 ** 
## jobunemployed                 1.115e-01  7.837e-02   1.423 0.154748    
## jobunknown                   -1.269e-01  1.706e-01  -0.744 0.456868    
## maritalmarried                2.197e-01  4.077e-02   5.388 7.14e-08 ***
## maritalsingle                 4.308e-01  4.606e-02   9.352  < 2e-16 ***
## maritalunknown                3.981e-01  2.788e-01   1.428 0.153374    
## educationbasic.6y             1.787e-01  6.923e-02   2.582 0.009831 ** 
## educationbasic.9y             3.523e-02  5.559e-02   0.634 0.526226    
## educationhigh.school          6.088e-02  5.522e-02   1.103 0.270237    
## educationilliterate           2.105e+00  6.216e-01   3.387 0.000708 ***
## educationprofessional.course  7.067e-02  6.089e-02   1.161 0.245819    
## educationuniversity.degree    1.882e-01  5.607e-02   3.357 0.000788 ***
## educationunknown              2.260e-01  7.712e-02   2.930 0.003385 ** 
## defaultunknown               -6.301e-01  3.555e-02 -17.724  < 2e-16 ***
## defaultyes                   -1.060e+01  8.448e+01  -0.125 0.900129    
## loanunknown                   8.215e-02  9.716e-02   0.845 0.397834    
## loanyes                      -1.328e-01  3.393e-02  -3.914 9.09e-05 ***
## housingunknown               -3.060e-01  1.619e-01  -1.890 0.058777 .  
## housingyes                   -1.114e-02  2.471e-02  -0.451 0.651955    
## contacttelephone             -7.882e-01  2.804e-02 -28.104  < 2e-16 ***
## pdays                        -2.426e-03  8.203e-05 -29.574  < 2e-16 ***
## previous                      1.089e-01  2.901e-02   3.756 0.000173 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 45677  on 32949  degrees of freedom
## Residual deviance: 39391  on 32918  degrees of freedom
## AIC: 39455
## 
## Number of Fisher Scoring iterations: 9
vif(model2)
##               GVIF Df GVIF^(1/(2*Df))
## age       1.872616  1        1.368436
## job       4.896951 11        1.074881
## marital   1.417353  3        1.059855
## education 3.399982  7        1.091346
## default   1.087940  2        1.021295
## loan      1.602438  2        1.125111
## housing   1.633748  2        1.130567
## contact   1.050878  1        1.025123
## pdays     1.340462  1        1.157783
## previous  1.380166  1        1.174804
## drop housing as not significant
model3 <-glm(y ~ age+job+marital+education+default+loan+contact+pdays+previous, data = Train_balance , family = binomial)
summary(model3)
## 
## Call:
## glm(formula = y ~ age + job + marital + education + default + 
##     loan + contact + pdays + previous, family = binomial, data = Train_balance)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8524  -1.0471  -0.5626   1.1187   2.0669  
## 
## Coefficients:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   1.833e+00  1.264e-01  14.502  < 2e-16 ***
## age                           9.327e-03  1.463e-03   6.373 1.85e-10 ***
## jobblue-collar               -2.776e-01  4.627e-02  -5.999 1.98e-09 ***
## jobentrepreneur              -2.007e-01  7.073e-02  -2.838 0.004543 ** 
## jobhousemaid                 -2.775e-01  8.512e-02  -3.260 0.001114 ** 
## jobmanagement                -1.517e-01  5.087e-02  -2.983 0.002859 ** 
## jobretired                    7.048e-01  6.997e-02  10.073  < 2e-16 ***
## jobself-employed             -2.150e-01  6.867e-02  -3.131 0.001739 ** 
## jobservices                  -1.846e-01  5.034e-02  -3.667 0.000245 ***
## jobstudent                    9.903e-01  8.056e-02  12.293  < 2e-16 ***
## jobtechnician                -1.104e-01  4.189e-02  -2.637 0.008377 ** 
## jobunemployed                 1.122e-01  7.835e-02   1.432 0.152151    
## jobunknown                   -1.243e-01  1.706e-01  -0.729 0.466132    
## maritalmarried                2.262e-01  4.059e-02   5.574 2.49e-08 ***
## maritalsingle                 4.298e-01  4.604e-02   9.335  < 2e-16 ***
## maritalunknown                4.017e-01  2.788e-01   1.441 0.149637    
## educationbasic.6y             1.789e-01  6.922e-02   2.584 0.009760 ** 
## educationbasic.9y             3.505e-02  5.559e-02   0.631 0.528345    
## educationhigh.school          6.122e-02  5.521e-02   1.109 0.267536    
## educationilliterate           2.103e+00  6.216e-01   3.384 0.000715 ***
## educationprofessional.course  6.952e-02  6.087e-02   1.142 0.253434    
## educationuniversity.degree    1.895e-01  5.606e-02   3.381 0.000723 ***
## educationunknown              2.257e-01  7.712e-02   2.926 0.003429 ** 
## defaultunknown               -6.295e-01  3.555e-02 -17.709  < 2e-16 ***
## defaultyes                   -1.061e+01  8.448e+01  -0.126 0.900067    
## loanunknown                  -3.068e-02  7.717e-02  -0.398 0.690888    
## loanyes                      -1.334e-01  3.388e-02  -3.937 8.24e-05 ***
## contacttelephone             -7.874e-01  2.796e-02 -28.159  < 2e-16 ***
## pdays                        -2.426e-03  8.203e-05 -29.573  < 2e-16 ***
## previous                      1.088e-01  2.900e-02   3.753 0.000175 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 45677  on 32949  degrees of freedom
## Residual deviance: 39394  on 32920  degrees of freedom
## AIC: 39454
## 
## Number of Fisher Scoring iterations: 9
vif(model3)
##               GVIF Df GVIF^(1/(2*Df))
## age       1.870606  1        1.367701
## job       4.883839 11        1.074750
## marital   1.385968  3        1.055907
## education 3.396783  7        1.091273
## default   1.087859  2        1.021276
## loan      1.004740  2        1.001183
## contact   1.044824  1        1.022166
## pdays     1.340687  1        1.157880
## previous  1.380318  1        1.174869
## test it on train set
trainPredict = predict(model3, newdata = train_set, type = 'response')
## Set threshold 70% for subcriber
p_class = ifelse(trainPredict >0.7, 'yes', 'no')

matrix_table = table(train_set$y, p_class)
matrix_table
##      p_class
##          no   yes
##   no  28122  1116
##   yes  2645  1067
## Accuracy
accuracy = sum(diag(matrix_table))/sum(matrix_table)
round(accuracy, 3)*100
## [1] 88.6
## Lift curve

library(ROCR)
pred = prediction(trainPredict, train_set$y)
perf = performance(pred, "lift", "rpp")
plot(perf, main = "lift curve", xlab = 'Proportion of Customers (sort prob)')

## test it on test set
testPredict = predict(model3, newdata = test_set, type = 'response')
p_class = ifelse(testPredict > 0.7, 'yes', 'no')

matrix_table = table(test_set$y, p_class)
matrix_table
##      p_class
##         no  yes
##   no  7029  281
##   yes  660  268
accuracy = sum(diag(matrix_table))/sum(matrix_table)
round(accuracy,3)*100
## [1] 88.6
## Model can achieve 88.6% of accuracy which is very high.
## This model will be use as scoring for campaign data.
scoring = predict(model3, newdata = campaign, type = 'response')
## Sort the highest probability of subcribe customer based on model prediction first
score_sort <- sort(scoring, decreasing = TRUE, na.last = TRUE)
## Marketing can decide to select the bin 1 for 1st priority for target phone call campaign
## Assume marketing budget allow to spend on first 500 customers for bin 1
## The first target 500 customers which have highest probability to subscribe (bin 1)
score_sort[1:500]
##     37356     39727     41175     40470     39326     40894     40561 
## 0.9929294 0.9852576 0.9849662 0.9843630 0.9841378 0.9839739 0.9836442 
##     39640     39592     39656     40330     39187     40745     39505 
## 0.9835752 0.9832921 0.9828894 0.9828878 0.9825450 0.9825352 0.9825338 
##     37717     38915     40365     40369     39866     39698     40485 
## 0.9824045 0.9823950 0.9823792 0.9822527 0.9818196 0.9816380 0.9815531 
##     38775     41032     39009     40930     37748     40323     40662 
## 0.9815429 0.9814441 0.9814125 0.9812925 0.9812735 0.9812666 0.9809283 
##     40239     40352     39262     40118     40347     39349     38968 
## 0.9808254 0.9808254 0.9807514 0.9807478 0.9806880 0.9805492 0.9804702 
##     39327     37101     41109     40401     28219     39627     38129 
## 0.9804550 0.9803243 0.9802716 0.9800744 0.9800563 0.9800489 0.9800196 
##     40833     39709     38814     37129     39259     38756     38835 
## 0.9798392 0.9798347 0.9797457 0.9795853 0.9795423 0.9795172 0.9794944 
##     40068     38978     41043     40279     40515     40967     39611 
## 0.9793815 0.9793707 0.9793546 0.9793339 0.9792622 0.9792457 0.9791087 
##     39337     37141     37496     41179     39341     39720     40380 
## 0.9790724 0.9790667 0.9790635 0.9790341 0.9788224 0.9787375 0.9786066 
##     38749     37462     40451     39148     39981     38826     39620 
## 0.9785437 0.9785199 0.9784553 0.9784311 0.9784286 0.9783178 0.9782143 
##     39894     38762     39566     40560     40952     40838     37494 
## 0.9782006 0.9781924 0.9781490 0.9781034 0.9780823 0.9780806 0.9780637 
##     37765     38913     27833     40162     37764     38805     37277 
## 0.9780513 0.9780112 0.9778494 0.9778301 0.9776774 0.9776191 0.9776100 
##     40471     38704     41099     41046     40639     40211     35974 
## 0.9775862 0.9774817 0.9774804 0.9774786 0.9774038 0.9773186 0.9773138 
##     39181     37861     35857     39590     40455     37917     39728 
## 0.9772630 0.9772436 0.9772229 0.9770981 0.9770600 0.9769816 0.9769100 
##     37885     40732     39582     39577     40374     38813     39589 
## 0.9768911 0.9768474 0.9768251 0.9768174 0.9768174 0.9767967 0.9767701 
##     36246     40064     39059     39062     40735     37971     39066 
## 0.9767674 0.9767299 0.9767258 0.9767258 0.9766917 0.9766329 0.9766312 
##     40301     37864     40372     39983     39920     39942     37820 
## 0.9765882 0.9765503 0.9765194 0.9764778 0.9763817 0.9763702 0.9763506 
##     37517     38275     38977     40727     38728     38180     40359 
## 0.9762957 0.9762635 0.9762510 0.9762072 0.9761267 0.9760307 0.9760134 
##     36461     40643     35873     40845     36173     41059     37934 
## 0.9759903 0.9758605 0.9758119 0.9757942 0.9757701 0.9756227 0.9756220 
##     38833     40969     31670     40202     39585     39677     40113 
## 0.9754659 0.9753908 0.9753394 0.9753392 0.9753160 0.9752990 0.9751494 
##     40260     40837     40366     37559     41021     39228     40333 
## 0.9750933 0.9750380 0.9748664 0.9747815 0.9746427 0.9745949 0.9745827 
##     37955     39216     36369     36024     38841     39881     40467 
## 0.9745651 0.9745348 0.9744112 0.9743701 0.9743583 0.9742291 0.9742102 
##     38902     41054     37394     38072     39663     39506     39087 
## 0.9741953 0.9741776 0.9741622 0.9740987 0.9740987 0.9740805 0.9740678 
##     38315     37528     37876     40687     40574     37598     36000 
## 0.9740479 0.9739420 0.9739244 0.9738269 0.9738186 0.9737822 0.9737789 
##     38271     38446     40742     40729     40587     39185     37027 
## 0.9737566 0.9737566 0.9736489 0.9736054 0.9735924 0.9735722 0.9735603 
##     40800     38727     38492     40896     40414     39892     38742 
## 0.9735527 0.9734630 0.9734363 0.9732764 0.9732510 0.9732287 0.9732125 
##     30424     38458     38317     39154     38931     39858     38034 
## 0.9731184 0.9730796 0.9729714 0.9728342 0.9728093 0.9727384 0.9727305 
##     40700     37694     37285     37302     38087     41102     40943 
## 0.9726777 0.9726487 0.9725319 0.9725319 0.9724861 0.9723995 0.9722406 
##     39734     40760     40274     37238     38765     39517     38016 
## 0.9721666 0.9721604 0.9721582 0.9721398 0.9721185 0.9719486 0.9717843 
##     29499     40557     40336     41013     38588     39521     38844 
## 0.9717796 0.9717619 0.9717512 0.9716114 0.9715227 0.9713986 0.9713530 
##     40703     41006     40459     37804     38916     39472     40508 
## 0.9711626 0.9711210 0.9710020 0.9708583 0.9708572 0.9708471 0.9708252 
##     40951     40698     38540     37780     41023     40000     38149 
## 0.9707891 0.9707383 0.9706225 0.9706158 0.9706011 0.9705922 0.9705641 
##     37926     40419     37327     39136     39303     39743     40736 
## 0.9705348 0.9702669 0.9702264 0.9701820 0.9701739 0.9700860 0.9700860 
##     38811     39135     38867     38723     37806     40337     39615 
## 0.9700461 0.9699329 0.9698621 0.9697906 0.9697541 0.9697089 0.9697013 
##     40289     40422     38433     38416     40377     39977     40719 
## 0.9696106 0.9695526 0.9694490 0.9693127 0.9692664 0.9692022 0.9691475 
##     40172     36780     39940     40984     38576     29156     37359 
## 0.9691062 0.9689225 0.9689115 0.9688797 0.9686290 0.9685552 0.9684607 
##     38432     30137     37739     38893     39448     40263     40164 
## 0.9683044 0.9679812 0.9678707 0.9678311 0.9677196 0.9676468 0.9675070 
##     40079     38488     40576     37567     41027     30139     30341 
## 0.9674382 0.9672969 0.9672333 0.9670253 0.9670016 0.9668300 0.9668227 
##     38784     38832     39691     39675     40292     36296     39125 
## 0.9666491 0.9666491 0.9665130 0.9664002 0.9662215 0.9661399 0.9660885 
##     39499     36957     40914     29504     36941     39651     37676 
## 0.9659034 0.9658613 0.9657214 0.9656604 0.9650046 0.9648482 0.9646062 
##     37184     41093     39474     37846     38453     39379     41091 
## 0.9638985 0.9637610 0.9636734 0.9631620 0.9630768 0.9628193 0.9628068 
##     37582     40680     40443     40277     40974     39667     39292 
## 0.9618324 0.9617792 0.9617412 0.9608836 0.9608836 0.9605868 0.9603376 
##     41127     37170     39540     40389     40126     39838     39606 
## 0.9601718 0.9599958 0.9597035 0.9595541 0.9595160 0.9589399 0.9583997 
##     39106     40715     40392     41082     39646     40453     40381 
## 0.9581655 0.9580221 0.9579594 0.9578869 0.9578563 0.9577789 0.9576130 
##     40396     40023     39359     32383     40025     39976     39221 
## 0.9575414 0.9574723 0.9574303 0.9573649 0.9572743 0.9572480 0.9561152 
##     39315     40887     41011     37828     39390     39657     39035 
## 0.9561152 0.9560549 0.9560292 0.9558482 0.9556910 0.9554664 0.9551566 
##     37322     39332     40175     40597     39612     30150     39756 
## 0.9551345 0.9550323 0.9550142 0.9549099 0.9548371 0.9548060 0.9547650 
##     40106     39335     36721     39649     41115     40500     39398 
## 0.9547339 0.9547026 0.9547012 0.9546764 0.9545713 0.9545230 0.9544490 
##     39284     39661     40846     38506     39847     38054     37443 
## 0.9544340 0.9544340 0.9544340 0.9543624 0.9542990 0.9542390 0.9541444 
##     39829     40592     39639     41031     40942     40699     39956 
## 0.9541164 0.9541156 0.9540965 0.9540266 0.9539679 0.9538759 0.9537601 
##     40614     40418     38777     40817     40120     41045     39975 
## 0.9537064 0.9536158 0.9535333 0.9534942 0.9534654 0.9532664 0.9532190 
##     37916     40178     40390     39659     39078     41064     40644 
## 0.9530860 0.9530853 0.9530262 0.9530052 0.9529907 0.9529474 0.9528758 
##     37421     39930     40258     40031     38288     39644     40247 
## 0.9527898 0.9527459 0.9527168 0.9526412 0.9526116 0.9525968 0.9525479 
##     36237     41029     39664     38928     39665     40448     41125 
## 0.9525440 0.9525138 0.9522351 0.9521510 0.9521247 0.9520545 0.9520151 
##     40222     38035     38918     38047     38781     40016     40856 
## 0.9517395 0.9517243 0.9516976 0.9516509 0.9516509 0.9515970 0.9515127 
##     41004     35835     40343     38693     38782     40066     41034 
## 0.9513894 0.9513628 0.9512825 0.9512571 0.9512025 0.9510577 0.9509944 
##     40373     39624     37284     39273     39263     40710     39843 
## 0.9509672 0.9509294 0.9508906 0.9508812 0.9508756 0.9508583 0.9508321 
##     40657     36429     30312     40767     39576     39397     40876 
## 0.9508228 0.9508161 0.9508113 0.9507881 0.9507709 0.9507677 0.9507334 
##     39225     37626     40383     39571     39608     39322     38683 
## 0.9507119 0.9505887 0.9502504 0.9499810 0.9499810 0.9499487 0.9499433 
##     40309     36589     40045     37308     39908     39287     39296 
## 0.9499319 0.9499052 0.9499052 0.9498656 0.9497536 0.9497002 0.9496341 
##     29511     40159     38205     38807     40959     27992     40173 
## 0.9495711 0.9495578 0.9494984 0.9494595 0.9493345 0.9492262 0.9491898 
##     40787     39595     40829     40603     39855     41110     39739 
## 0.9491610 0.9491092 0.9490708 0.9490604 0.9490558 0.9490400 0.9490101 
##     38717     37591     37246     40795     37770     38776     41038 
## 0.9489381 0.9488941 0.9488926 0.9488743 0.9487736 0.9487565 0.9486568 
##     38848     38839     39604     40116     40911     37255     36225 
## 0.9486163 0.9486101 0.9485240 0.9485181 0.9484015 0.9483995 0.9483295 
##     39393     39824     40481     39681     40792     38999     30409 
## 0.9482006 0.9481650 0.9481428 0.9479037 0.9478717 0.9478195 0.9476558 
##     40382     39645     38279     30645     40361     40473     40518 
## 0.9475978 0.9474329 0.9474129 0.9473702 0.9473227 0.9472024 0.9471768 
##     40971     41098     39064     40814     41025     39912     37988 
## 0.9471743 0.9470999 0.9470924 0.9470440 0.9470410 0.9468420 0.9467861 
##     40596     40634     40027     40303     24911     39160     39945 
## 0.9467505 0.9467430 0.9467388 0.9467341 0.9467008 0.9466987 0.9466811 
##     37440     38711     38716     40488     39003     38824     37552 
## 0.9466638 0.9466603 0.9466307 0.9466307 0.9466041 0.9465798 0.9465377 
##     40748     26749     39715     41048     37709     37759     38799 
## 0.9465195 0.9464936 0.9464889 0.9464768 0.9463898 0.9463494 0.9462749 
##     37688     40567     37753     39853     38783     39222     37197 
## 0.9461623 0.9461588 0.9461169 0.9461009 0.9460193 0.9460193 0.9460031 
##     37692     39831     32318 
## 0.9459146 0.9459146 0.9457516