Reading The Data into R

## Reading the data into RStudio ##
Bank_marketing<-read.csv("/Users/ravishankar/Downloads/Banking.csv",header=T,sep=",")

Data Preparation

#Colnames of the variables
names(Bank_marketing)
##  [1] "Age"            "Job"            "Marital"        "Education"     
##  [5] "Credit.default" "Housing.Loan"   "Persona.Loan"   "Contact.Type"  
##  [9] "Month"          "Day"            "Duration"       "Campaign"      
## [13] "pdays"          "previous"       "poutcome"       "emp.var.rate"  
## [17] "cons.price.idx" "cons.conf.idx"  "euribor3m"      "nr.employed"   
## [21] "Term.Deposit"
#Top 10 Obserbations
head(Bank_marketing,10)
##    Age         Job Marital           Education Credit.default Housing.Loan
## 1   56   housemaid married               Basic             no           no
## 2   57    services married         High School            yes           no
## 3   37    services married         High School             no          yes
## 4   40       Admin married               Basic             no           no
## 5   56    services married         High School             no           no
## 6   45    services married               Basic            yes           no
## 7   59       Admin married Professional Course             no           no
## 8   41 blue collar married         High School            yes           no
## 9   24  technician  single Professional Course             no          yes
## 10  25    services  single         High School             no          yes
##    Persona.Loan Contact.Type Month    Day Duration Campaign pdays previous
## 1            no    telephone   may Monday      261        1   999        0
## 2            no    telephone   may Monday      149        1   999        0
## 3            no    telephone   may Monday      226        1   999        0
## 4            no    telephone   may Monday      151        1   999        0
## 5           yes    telephone   may Monday      307        1   999        0
## 6            no    telephone   may Monday      198        1   999        0
## 7            no    telephone   may Monday      139        1   999        0
## 8            no    telephone   may Monday      217        1   999        0
## 9            no    telephone   may Monday      380        1   999        0
## 10           no    telephone   may Monday       50        1   999        0
##       poutcome emp.var.rate cons.price.idx cons.conf.idx euribor3m
## 1  nonexistent          1.1         93.994         -36.4     4.857
## 2  nonexistent          1.1         93.994         -36.4     4.857
## 3  nonexistent          1.1         93.994         -36.4     4.857
## 4  nonexistent          1.1         93.994         -36.4     4.857
## 5  nonexistent          1.1         93.994         -36.4     4.857
## 6  nonexistent          1.1         93.994         -36.4     4.857
## 7  nonexistent          1.1         93.994         -36.4     4.857
## 8  nonexistent          1.1         93.994         -36.4     4.857
## 9  nonexistent          1.1         93.994         -36.4     4.857
## 10 nonexistent          1.1         93.994         -36.4     4.857
##    nr.employed Term.Deposit
## 1         5191           no
## 2         5191           no
## 3         5191           no
## 4         5191           no
## 5         5191           no
## 6         5191           no
## 7         5191           no
## 8         5191           no
## 9         5191           no
## 10        5191           no
#Structure of the data
str(Bank_marketing)
## 'data.frame':    41188 obs. of  21 variables:
##  $ Age           : int  56 57 37 40 56 45 59 41 24 25 ...
##  $ Job           : Factor w/ 11 levels "Admin","blue collar",..: 4 8 8 1 8 8 1 2 10 8 ...
##  $ Marital       : Factor w/ 2 levels "married","single": 1 1 1 1 1 1 1 1 2 2 ...
##  $ Education     : Factor w/ 5 levels "Basic","High School",..: 1 2 2 1 2 1 3 2 3 2 ...
##  $ Credit.default: Factor w/ 2 levels "no","yes": 1 2 1 1 1 2 1 2 1 1 ...
##  $ Housing.Loan  : Factor w/ 2 levels "no","yes": 1 1 2 1 1 1 1 1 2 2 ...
##  $ Persona.Loan  : Factor w/ 2 levels "no","yes": 1 1 1 1 2 1 1 1 1 1 ...
##  $ Contact.Type  : Factor w/ 2 levels "cellular","telephone": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Month         : Factor w/ 10 levels "apr","aug","dec",..: 7 7 7 7 7 7 7 7 7 7 ...
##  $ Day           : Factor w/ 5 levels "Friday","Monday",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Duration      : int  261 149 226 151 307 198 139 217 380 50 ...
##  $ Campaign      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ pdays         : int  999 999 999 999 999 999 999 999 999 999 ...
##  $ previous      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ poutcome      : Factor w/ 3 levels "failure","nonexistent",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ emp.var.rate  : num  1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 ...
##  $ cons.price.idx: num  94 94 94 94 94 ...
##  $ cons.conf.idx : num  -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 ...
##  $ euribor3m     : num  4.86 4.86 4.86 4.86 4.86 ...
##  $ nr.employed   : num  5191 5191 5191 5191 5191 ...
##  $ Term.Deposit  : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
#Summary of the data
summary(Bank_marketing)
##       Age                 Job           Marital     
##  Min.   :17.00   Admin      :10422   married:29540  
##  1st Qu.:32.00   blue collar: 9254   single :11648  
##  Median :38.00   technician : 6743                  
##  Mean   :40.02   services   : 3969                  
##  3rd Qu.:47.00   management : 2924                  
##  Max.   :98.00   retired    : 1720                  
##                  (Other)    : 6156                  
##                Education     Credit.default Housing.Loan Persona.Loan
##  Basic              :12513   no :32588      no :18622    no :34940   
##  High School        :11246   yes: 8600      yes:22566    yes: 6248   
##  Professional Course: 5243                                           
##  University degree  :12168                                           
##  illiterate         :   18                                           
##                                                                      
##                                                                      
##     Contact.Type       Month              Day          Duration     
##  cellular :26144   may    :13769   Friday   :7827   Min.   :   0.0  
##  telephone:15044   jul    : 7174   Monday   :8514   1st Qu.: 102.0  
##                    aug    : 6178   Thursday :8623   Median : 180.0  
##                    jun    : 5318   Tuesday  :8090   Mean   : 258.3  
##                    nov    : 4101   Wednesday:8134   3rd Qu.: 319.0  
##                    apr    : 2632                    Max.   :4918.0  
##                    (Other): 2016                                    
##     Campaign          pdays          previous            poutcome    
##  Min.   : 1.000   Min.   :  0.0   Min.   :0.000   failure    : 4252  
##  1st Qu.: 1.000   1st Qu.:999.0   1st Qu.:0.000   nonexistent:35563  
##  Median : 2.000   Median :999.0   Median :0.000   success    : 1373  
##  Mean   : 2.568   Mean   :962.5   Mean   :0.173                      
##  3rd Qu.: 3.000   3rd Qu.:999.0   3rd Qu.:0.000                      
##  Max.   :56.000   Max.   :999.0   Max.   :7.000                      
##                                                                      
##   emp.var.rate      cons.price.idx  cons.conf.idx     euribor3m    
##  Min.   :-3.40000   Min.   :92.20   Min.   :-50.8   Min.   :0.634  
##  1st Qu.:-1.80000   1st Qu.:93.08   1st Qu.:-42.7   1st Qu.:1.344  
##  Median : 1.10000   Median :93.75   Median :-41.8   Median :4.857  
##  Mean   : 0.08189   Mean   :93.58   Mean   :-40.5   Mean   :3.621  
##  3rd Qu.: 1.40000   3rd Qu.:93.99   3rd Qu.:-36.4   3rd Qu.:4.961  
##  Max.   : 1.40000   Max.   :94.77   Max.   :-26.9   Max.   :5.045  
##                                                                    
##   nr.employed   Term.Deposit
##  Min.   :4964   no :36548   
##  1st Qu.:5099   yes: 4640   
##  Median :5191               
##  Mean   :5167               
##  3rd Qu.:5228               
##  Max.   :5228               
## 
##Outlier Treatment
# The outliers can be detected with the help of box plots
# It can be treated by caping and flooring method

##Missing Value Treatmemt
#There is no Missing Value Treatment in this dataset but usually if the missing value is more than 30%
#then the variable can be removed else the missing values can be replaced with a mean if it is the 
#representative value or median if its influenced by outliers

Conversion of Dummy Variables

##Conversion of Dummy Variables - all the factor variables will be converted into dummy variables

# Example - Job
#Bank_marketing$Admin <- ifelse(Bank_marketing$job=="Admin",1,0)
#Bank_marketing$blue.collar <- ifelse(Bank_marketing$job=="blue.collar",1,0)
#Bank_marketing$technician <- ifelse(Bank_marketing$job=="technician",1,0)
#Bank_marketing$services <- ifelse(Bank_marketing$job=="services",1,0)
#Bank_marketing$management <- ifelse(Bank_marketing$job=="management",1,0)
#Bank_marketing$retired <- ifelse(Bank_marketing$job=="retired",1,0)

#In this dataset the conversion of dummy variables is done through Excel
Bank_data<-read.csv("/Users/ravishankar/Desktop/Bank_Marketing.csv",header=T,sep=",")

Spliiting the data into Train data and Test data

## Split the data into Train data and Test data
nrow(Bank_data)
## [1] 41188
#Random Sampling
population_Size<-nrow(Bank_data)
sample_pct<-20/100
sample_size<-as.integer(sample_pct*population_Size)
test_data<-Bank_data[sample(1:population_Size,sample_size,replace=F),]
train_data<-Bank_data[1:40000,]

Choosing the Desired Model

## The Preferred Model is Logistic Regression Since the Target Variable is binominal Variable(Categorical)
## Assumptions Made for Logistic Regression
#1. Linearity Between Independent and Dependent Variables
#2. Normality of Errors(Observed-Predicted)
#3. Variance of Errors should be a Constant (Hetroscadacity)
#4. Multi-colloniearity Should be removed

Iterations

iteration1<-glm(Term.Deposit~Age,data=train_data,family=binomial(logit))
summary(iteration1)
## 
## Call:
## glm(formula = Term.Deposit ~ Age, family = binomial(logit), data = train_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.5544  -0.4704  -0.4562  -0.4455   2.2109  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.474887   0.066788 -37.056  < 2e-16 ***
## Age          0.007155   0.001594   4.488 7.19e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 26173  on 39999  degrees of freedom
## Residual deviance: 26153  on 39998  degrees of freedom
## AIC: 26157
## 
## Number of Fisher Scoring iterations: 4
library(car)
## Warning: package 'car' was built under R version 3.1.3
anova(object=iteration1,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Term.Deposit
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                 39999      26173              
## Age   1   19.873     39998      26153 8.275e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
iteration2<-glm(Term.Deposit~Age+housemaid+services+Admin+blue.collar+technician+management+unemployed,data=train_data,family=binomial(logit))
summary(iteration2)
## 
## Call:
## glm(formula = Term.Deposit ~ Age + housemaid + services + Admin + 
##     blue.collar + technician + management + unemployed, family = binomial(logit), 
##     data = train_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.6252  -0.4939  -0.4452  -0.3719   2.3553  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.833224   0.081801 -22.411  < 2e-16 ***
## Age          0.003158   0.001605   1.968   0.0491 *  
## housemaid   -0.632239   0.115459  -5.476 4.35e-08 ***
## services    -0.800268   0.072314 -11.067  < 2e-16 ***
## Admin       -0.322876   0.049929  -6.467 1.00e-10 ***
## blue.collar -0.942336   0.056929 -16.553  < 2e-16 ***
## technician  -0.542072   0.057272  -9.465  < 2e-16 ***
## management  -0.473796   0.072310  -6.552 5.67e-11 ***
## unemployed  -0.231534   0.104141  -2.223   0.0262 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 26173  on 39999  degrees of freedom
## Residual deviance: 25808  on 39991  degrees of freedom
## AIC: 25826
## 
## Number of Fisher Scoring iterations: 5
vif(iteration2)
##         Age   housemaid    services       Admin blue.collar  technician 
##    1.072936    1.090764    1.306380    1.828918    1.545681    1.559250 
##  management  unemployed 
##    1.270735    1.122316
anova(object=iteration2,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Term.Deposit
## 
## Terms added sequentially (first to last)
## 
## 
##             Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                        39999      26173              
## Age          1   19.873     39998      26153 8.275e-06 ***
## housemaid    1    2.733     39997      26151   0.09830 .  
## services     1   32.525     39996      26118 1.177e-08 ***
## Admin        1   23.922     39995      26094 1.003e-06 ***
## blue.collar  1  185.018     39994      25909 < 2.2e-16 ***
## technician   1   55.138     39993      25854 1.124e-13 ***
## management   1   40.845     39992      25813 1.648e-10 ***
## unemployed   1    5.152     39991      25808   0.02322 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
iteration3<-glm(Term.Deposit~Age+housemaid+services+Admin+blue.collar+technician+management+unemployed+retired+student+entrepreneur+Marital+Basic+High.School+Prodessional.Course+University.degree,data=train_data,family=binomial(logit))
summary(iteration3)
## 
## Call:
## glm(formula = Term.Deposit ~ Age + housemaid + services + Admin + 
##     blue.collar + technician + management + unemployed + retired + 
##     student + entrepreneur + Marital + Basic + High.School + 
##     Prodessional.Course + University.degree, family = binomial(logit), 
##     data = train_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9147  -0.4815  -0.4258  -0.3665   2.4259  
## 
## Coefficients: (1 not defined because of singularities)
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -2.264897   0.126484 -17.907  < 2e-16 ***
## Age                  0.007040   0.001981   3.554  0.00038 ***
## housemaid            0.002795   0.142885   0.020  0.98439    
## services            -0.233488   0.111182  -2.100  0.03572 *  
## Admin                0.098883   0.094710   1.044  0.29646    
## blue.collar         -0.263594   0.103439  -2.548  0.01083 *  
## technician          -0.071569   0.100765  -0.710  0.47755    
## management          -0.019926   0.108634  -0.183  0.85447    
## unemployed           0.312838   0.132037   2.369  0.01782 *  
## retired              0.949020   0.115840   8.193 2.56e-16 ***
## student              0.946443   0.118331   7.998 1.26e-15 ***
## entrepreneur        -0.187610   0.131370  -1.428  0.15326    
## Marital             -0.337813   0.040786  -8.283  < 2e-16 ***
## Basic               -0.169902   0.068493  -2.481  0.01312 *  
## High.School         -0.034860   0.064485  -0.541  0.58879    
## Prodessional.Course        NA         NA      NA       NA    
## University.degree    0.153875   0.062524   2.461  0.01385 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 26173  on 39999  degrees of freedom
## Residual deviance: 25507  on 39984  degrees of freedom
## AIC: 25539
## 
## Number of Fisher Scoring iterations: 5
anova(object=iteration3,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Term.Deposit
## 
## Terms added sequentially (first to last)
## 
## 
##                     Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                                39999      26173              
## Age                  1   19.873     39998      26153 8.275e-06 ***
## housemaid            1    2.733     39997      26151 0.0982980 .  
## services             1   32.525     39996      26118 1.177e-08 ***
## Admin                1   23.922     39995      26094 1.003e-06 ***
## blue.collar          1  185.018     39994      25909 < 2.2e-16 ***
## technician           1   55.138     39993      25854 1.124e-13 ***
## management           1   40.845     39992      25813 1.648e-10 ***
## unemployed           1    5.152     39991      25808 0.0232188 *  
## retired              1   68.119     39990      25740 < 2.2e-16 ***
## student              1  117.363     39989      25622 < 2.2e-16 ***
## entrepreneur         1    3.586     39988      25619 0.0582785 .  
## Marital              1   76.647     39987      25542 < 2.2e-16 ***
## Basic                1   18.011     39986      25524 2.196e-05 ***
## High.School          1   11.558     39985      25513 0.0006746 ***
## Prodessional.Course  0    0.000     39985      25513              
## University.degree    1    6.119     39984      25507 0.0133740 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
iteration4<-glm(Term.Deposit~Age+housemaid+services+Admin+blue.collar+technician+management+unemployed+retired+student+entrepreneur+Marital+Basic+High.School+Prodessional.Course+University.degree+Credit.default+Housing.Loan+Persona.Loan+Contact.Type,data=train_data,family=binomial(logit))
summary(iteration4)
## 
## Call:
## glm(formula = Term.Deposit ~ Age + housemaid + services + Admin + 
##     blue.collar + technician + management + unemployed + retired + 
##     student + entrepreneur + Marital + Basic + High.School + 
##     Prodessional.Course + University.degree + Credit.default + 
##     Housing.Loan + Persona.Loan + Contact.Type, family = binomial(logit), 
##     data = train_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0211  -0.5245  -0.3912  -0.3009   2.8411  
## 
## Coefficients: (1 not defined because of singularities)
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -2.058821   0.129604 -15.885  < 2e-16 ***
## Age                  0.010304   0.002005   5.139 2.76e-07 ***
## housemaid            0.005501   0.144636   0.038   0.9697    
## services            -0.208812   0.112280  -1.860   0.0629 .  
## Admin                0.045197   0.095658   0.472   0.6366    
## blue.collar         -0.201120   0.104766  -1.920   0.0549 .  
## technician          -0.131658   0.101903  -1.292   0.1964    
## management          -0.057458   0.109710  -0.524   0.6005    
## unemployed           0.326672   0.133713   2.443   0.0146 *  
## retired              0.803506   0.118384   6.787 1.14e-11 ***
## student              0.973841   0.120488   8.082 6.35e-16 ***
## entrepreneur        -0.178927   0.132650  -1.349   0.1774    
## Marital             -0.273281   0.041424  -6.597 4.19e-11 ***
## Basic               -0.070578   0.069520  -1.015   0.3100    
## High.School          0.001859   0.065054   0.029   0.9772    
## Prodessional.Course        NA         NA      NA       NA    
## University.degree    0.112087   0.063169   1.774   0.0760 .  
## Credit.default      -0.733060   0.054404 -13.474  < 2e-16 ***
## Housing.Loan         0.001822   0.034241   0.053   0.9576    
## Persona.Loan        -0.069010   0.047762  -1.445   0.1485    
## Contact.Type        -0.933406   0.042877 -21.769  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 26173  on 39999  degrees of freedom
## Residual deviance: 24670  on 39980  degrees of freedom
## AIC: 24710
## 
## Number of Fisher Scoring iterations: 5
anova(object=iteration4,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Term.Deposit
## 
## Terms added sequentially (first to last)
## 
## 
##                     Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                                39999      26173              
## Age                  1    19.87     39998      26153 8.275e-06 ***
## housemaid            1     2.73     39997      26151 0.0982980 .  
## services             1    32.52     39996      26118 1.177e-08 ***
## Admin                1    23.92     39995      26094 1.003e-06 ***
## blue.collar          1   185.02     39994      25909 < 2.2e-16 ***
## technician           1    55.14     39993      25854 1.124e-13 ***
## management           1    40.85     39992      25813 1.648e-10 ***
## unemployed           1     5.15     39991      25808 0.0232188 *  
## retired              1    68.12     39990      25740 < 2.2e-16 ***
## student              1   117.36     39989      25622 < 2.2e-16 ***
## entrepreneur         1     3.59     39988      25619 0.0582785 .  
## Marital              1    76.65     39987      25542 < 2.2e-16 ***
## Basic                1    18.01     39986      25524 2.196e-05 ***
## High.School          1    11.56     39985      25513 0.0006746 ***
## Prodessional.Course  0     0.00     39985      25513              
## University.degree    1     6.12     39984      25507 0.0133740 *  
## Credit.default       1   286.48     39983      25220 < 2.2e-16 ***
## Housing.Loan         1     2.85     39982      25217 0.0913077 .  
## Persona.Loan         1     1.57     39981      25216 0.2105271    
## Contact.Type         1   545.71     39980      24670 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
iteration5<-glm(Term.Deposit~Age+housemaid+services+Admin+blue.collar+technician+management+unemployed+retired+student+entrepreneur+Marital+Basic+High.School+Prodessional.Course+University.degree+Credit.default+Housing.Loan+Persona.Loan+Contact.Type+mar+apr+may+jun+jul+aug+sep+oct,data=train_data,family=binomial(logit))
summary(iteration5)
## 
## Call:
## glm(formula = Term.Deposit ~ Age + housemaid + services + Admin + 
##     blue.collar + technician + management + unemployed + retired + 
##     student + entrepreneur + Marital + Basic + High.School + 
##     Prodessional.Course + University.degree + Credit.default + 
##     Housing.Loan + Persona.Loan + Contact.Type + mar + apr + 
##     may + jun + jul + aug + sep + oct, family = binomial(logit), 
##     data = train_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6346  -0.4667  -0.3889  -0.2705   2.9910  
## 
## Coefficients: (1 not defined because of singularities)
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -2.142703   0.141860 -15.104  < 2e-16 ***
## Age                  0.005704   0.002070   2.755 0.005861 ** 
## housemaid            0.127620   0.148535   0.859 0.390237    
## services            -0.147812   0.115263  -1.282 0.199705    
## Admin                0.088628   0.098429   0.900 0.367889    
## blue.collar         -0.122526   0.107820  -1.136 0.255795    
## technician          -0.046105   0.104964  -0.439 0.660483    
## management          -0.042545   0.112828  -0.377 0.706116    
## unemployed           0.256152   0.138485   1.850 0.064360 .  
## retired              0.687717   0.124261   5.534 3.12e-08 ***
## student              0.804466   0.125572   6.406 1.49e-10 ***
## entrepreneur        -0.110168   0.135433  -0.813 0.415961    
## Marital             -0.190226   0.042812  -4.443 8.86e-06 ***
## Basic               -0.082812   0.071410  -1.160 0.246181    
## High.School          0.013901   0.066997   0.207 0.835627    
## Prodessional.Course        NA         NA      NA       NA    
## University.degree    0.110845   0.065110   1.702 0.088676 .  
## Credit.default      -0.529607   0.055558  -9.532  < 2e-16 ***
## Housing.Loan        -0.013099   0.035257  -0.372 0.710255    
## Persona.Loan        -0.046403   0.049055  -0.946 0.344182    
## Contact.Type        -1.176047   0.052141 -22.555  < 2e-16 ***
## mar                  2.000945   0.102139  19.590  < 2e-16 ***
## apr                  0.733357   0.070747  10.366  < 2e-16 ***
## may                  0.019292   0.064446   0.299 0.764668    
## jun                  0.852862   0.076203  11.192  < 2e-16 ***
## jul                 -0.353693   0.070527  -5.015 5.30e-07 ***
## aug                 -0.257409   0.069127  -3.724 0.000196 ***
## sep                  1.488062   0.138787  10.722  < 2e-16 ***
## oct                  1.807341   0.106734  16.933  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 26173  on 39999  degrees of freedom
## Residual deviance: 23421  on 39972  degrees of freedom
## AIC: 23477
## 
## Number of Fisher Scoring iterations: 5
anova(object=iteration5,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Term.Deposit
## 
## Terms added sequentially (first to last)
## 
## 
##                     Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                                39999      26173              
## Age                  1    19.87     39998      26153 8.275e-06 ***
## housemaid            1     2.73     39997      26151 0.0982980 .  
## services             1    32.52     39996      26118 1.177e-08 ***
## Admin                1    23.92     39995      26094 1.003e-06 ***
## blue.collar          1   185.02     39994      25909 < 2.2e-16 ***
## technician           1    55.14     39993      25854 1.124e-13 ***
## management           1    40.85     39992      25813 1.648e-10 ***
## unemployed           1     5.15     39991      25808 0.0232188 *  
## retired              1    68.12     39990      25740 < 2.2e-16 ***
## student              1   117.36     39989      25622 < 2.2e-16 ***
## entrepreneur         1     3.59     39988      25619 0.0582785 .  
## Marital              1    76.65     39987      25542 < 2.2e-16 ***
## Basic                1    18.01     39986      25524 2.196e-05 ***
## High.School          1    11.56     39985      25513 0.0006746 ***
## Prodessional.Course  0     0.00     39985      25513              
## University.degree    1     6.12     39984      25507 0.0133740 *  
## Credit.default       1   286.48     39983      25220 < 2.2e-16 ***
## Housing.Loan         1     2.85     39982      25217 0.0913077 .  
## Persona.Loan         1     1.57     39981      25216 0.2105271    
## Contact.Type         1   545.71     39980      24670 < 2.2e-16 ***
## mar                  1   378.77     39979      24291 < 2.2e-16 ***
## apr                  1   154.26     39978      24137 < 2.2e-16 ***
## may                  1    16.32     39977      24121 5.361e-05 ***
## jun                  1   188.40     39976      23932 < 2.2e-16 ***
## jul                  1    58.83     39975      23873 1.720e-14 ***
## aug                  1   120.18     39974      23753 < 2.2e-16 ***
## sep                  1    65.42     39973      23688 6.059e-16 ***
## oct                  1   266.98     39972      23421 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
iteration6<-glm(Term.Deposit~Age+housemaid+services+Admin+blue.collar+technician+management+unemployed+retired+student+entrepreneur+Marital+Basic+High.School+Prodessional.Course+University.degree+Credit.default+Housing.Loan+Persona.Loan+Contact.Type+mar+apr+may+jun+jul+aug+sep+oct+Monday+Tuesday+Wednesday+Thursday,data=train_data,family=binomial(logit))
summary(iteration6)
## 
## Call:
## glm(formula = Term.Deposit ~ Age + housemaid + services + Admin + 
##     blue.collar + technician + management + unemployed + retired + 
##     student + entrepreneur + Marital + Basic + High.School + 
##     Prodessional.Course + University.degree + Credit.default + 
##     Housing.Loan + Persona.Loan + Contact.Type + mar + apr + 
##     may + jun + jul + aug + sep + oct + Monday + Tuesday + Wednesday + 
##     Thursday, family = binomial(logit), data = train_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6225  -0.4689  -0.3870  -0.2687   2.9795  
## 
## Coefficients: (1 not defined because of singularities)
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -2.205993   0.146631 -15.045  < 2e-16 ***
## Age                  0.005938   0.002072   2.865 0.004170 ** 
## housemaid            0.121899   0.148667   0.820 0.412249    
## services            -0.151408   0.115312  -1.313 0.189172    
## Admin                0.083308   0.098518   0.846 0.397770    
## blue.collar         -0.127912   0.107874  -1.186 0.235719    
## technician          -0.048897   0.105031  -0.466 0.641537    
## management          -0.046633   0.112909  -0.413 0.679595    
## unemployed           0.248079   0.138605   1.790 0.073483 .  
## retired              0.674576   0.124399   5.423 5.87e-08 ***
## student              0.799025   0.125691   6.357 2.06e-10 ***
## entrepreneur        -0.114204   0.135531  -0.843 0.399431    
## Marital             -0.189174   0.042827  -4.417 1.00e-05 ***
## Basic               -0.083258   0.071494  -1.165 0.244208    
## High.School          0.017570   0.067080   0.262 0.793373    
## Prodessional.Course        NA         NA      NA       NA    
## University.degree    0.114120   0.065155   1.752 0.079858 .  
## Credit.default      -0.525133   0.055589  -9.447  < 2e-16 ***
## Housing.Loan        -0.014067   0.035281  -0.399 0.690106    
## Persona.Loan        -0.044905   0.049095  -0.915 0.360372    
## Contact.Type        -1.179932   0.052184 -22.611  < 2e-16 ***
## mar                  2.022773   0.102465  19.741  < 2e-16 ***
## apr                  0.760729   0.071171  10.689  < 2e-16 ***
## may                  0.026833   0.064521   0.416 0.677495    
## jun                  0.869040   0.076363  11.380  < 2e-16 ***
## jul                 -0.352986   0.070591  -5.000 5.72e-07 ***
## aug                 -0.256959   0.069170  -3.715 0.000203 ***
## sep                  1.486721   0.139067  10.691  < 2e-16 ***
## oct                  1.803758   0.106860  16.880  < 2e-16 ***
## Monday              -0.109008   0.057084  -1.910 0.056183 .  
## Tuesday              0.113770   0.056356   2.019 0.043510 *  
## Wednesday            0.149380   0.056398   2.649 0.008081 ** 
## Thursday             0.085833   0.055213   1.555 0.120045    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 26173  on 39999  degrees of freedom
## Residual deviance: 23393  on 39968  degrees of freedom
## AIC: 23457
## 
## Number of Fisher Scoring iterations: 5
anova(object=iteration6,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Term.Deposit
## 
## Terms added sequentially (first to last)
## 
## 
##                     Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                                39999      26173              
## Age                  1    19.87     39998      26153 8.275e-06 ***
## housemaid            1     2.73     39997      26151 0.0982980 .  
## services             1    32.52     39996      26118 1.177e-08 ***
## Admin                1    23.92     39995      26094 1.003e-06 ***
## blue.collar          1   185.02     39994      25909 < 2.2e-16 ***
## technician           1    55.14     39993      25854 1.124e-13 ***
## management           1    40.85     39992      25813 1.648e-10 ***
## unemployed           1     5.15     39991      25808 0.0232188 *  
## retired              1    68.12     39990      25740 < 2.2e-16 ***
## student              1   117.36     39989      25622 < 2.2e-16 ***
## entrepreneur         1     3.59     39988      25619 0.0582785 .  
## Marital              1    76.65     39987      25542 < 2.2e-16 ***
## Basic                1    18.01     39986      25524 2.196e-05 ***
## High.School          1    11.56     39985      25513 0.0006746 ***
## Prodessional.Course  0     0.00     39985      25513              
## University.degree    1     6.12     39984      25507 0.0133740 *  
## Credit.default       1   286.48     39983      25220 < 2.2e-16 ***
## Housing.Loan         1     2.85     39982      25217 0.0913077 .  
## Persona.Loan         1     1.57     39981      25216 0.2105271    
## Contact.Type         1   545.71     39980      24670 < 2.2e-16 ***
## mar                  1   378.77     39979      24291 < 2.2e-16 ***
## apr                  1   154.26     39978      24137 < 2.2e-16 ***
## may                  1    16.32     39977      24121 5.361e-05 ***
## jun                  1   188.40     39976      23932 < 2.2e-16 ***
## jul                  1    58.83     39975      23873 1.720e-14 ***
## aug                  1   120.18     39974      23753 < 2.2e-16 ***
## sep                  1    65.42     39973      23688 6.059e-16 ***
## oct                  1   266.98     39972      23421 < 2.2e-16 ***
## Monday               1    20.06     39971      23401 7.510e-06 ***
## Tuesday              1     0.56     39970      23400 0.4550487    
## Wednesday            1     4.65     39969      23396 0.0310500 *  
## Thursday             1     2.42     39968      23393 0.1197521    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
iteration7<-glm(Term.Deposit~Age+housemaid+services+Admin+blue.collar+technician+management+unemployed+retired+student+entrepreneur+Marital+Basic+High.School+University.degree+Credit.default+Housing.Loan+Persona.Loan+Contact.Type+mar+apr+may+jun+jul+aug+sep+oct+Monday+Tuesday+Wednesday+Thursday+Duration+Campaign+emp.var.rate+cons.price.idx+cons.conf.idx+euribor3m+nr.employed+previous,data=train_data,family=binomial(logit))
summary(iteration7)
## 
## Call:
## glm(formula = Term.Deposit ~ Age + housemaid + services + Admin + 
##     blue.collar + technician + management + unemployed + retired + 
##     student + entrepreneur + Marital + Basic + High.School + 
##     University.degree + Credit.default + Housing.Loan + Persona.Loan + 
##     Contact.Type + mar + apr + may + jun + jul + aug + sep + 
##     oct + Monday + Tuesday + Wednesday + Thursday + Duration + 
##     Campaign + emp.var.rate + cons.price.idx + cons.conf.idx + 
##     euribor3m + nr.employed + previous, family = binomial(logit), 
##     data = train_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -6.1024  -0.2917  -0.1843  -0.1338   3.7829  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -3.632e+02  3.897e+01  -9.320  < 2e-16 ***
## Age                3.907e-04  2.450e-03   0.159 0.873298    
## housemaid          1.773e-01  1.810e-01   0.979 0.327458    
## services          -3.457e-04  1.385e-01  -0.002 0.998009    
## Admin              1.472e-01  1.191e-01   1.236 0.216577    
## blue.collar       -7.140e-02  1.306e-01  -0.547 0.584530    
## technician         1.097e-01  1.267e-01   0.866 0.386719    
## management         6.099e-02  1.359e-01   0.449 0.653674    
## unemployed         1.628e-01  1.687e-01   0.965 0.334367    
## retired            4.356e-01  1.509e-01   2.886 0.003896 ** 
## student            3.626e-01  1.519e-01   2.387 0.016996 *  
## entrepreneur      -2.906e-02  1.641e-01  -0.177 0.859497    
## Marital           -1.186e-01  5.196e-02  -2.283 0.022442 *  
## Basic             -8.457e-02  8.555e-02  -0.988 0.322918    
## High.School       -6.556e-02  8.063e-02  -0.813 0.416182    
## University.degree  7.731e-02  7.834e-02   0.987 0.323713    
## Credit.default    -2.593e-01  6.749e-02  -3.843 0.000122 ***
## Housing.Loan      -1.502e-02  4.227e-02  -0.355 0.722277    
## Persona.Loan      -6.013e-02  5.939e-02  -1.013 0.311273    
## Contact.Type      -4.345e-01  7.944e-02  -5.469 4.53e-08 ***
## mar                1.876e+00  1.493e-01  12.567  < 2e-16 ***
## apr               -4.106e-01  1.372e-01  -2.993 0.002766 ** 
## may               -4.902e-01  1.121e-01  -4.372 1.23e-05 ***
## jun               -1.328e+00  1.751e-01  -7.587 3.26e-14 ***
## jul                5.470e-01  1.144e-01   4.783 1.73e-06 ***
## aug                2.580e+00  1.511e-01  17.076  < 2e-16 ***
## sep                5.779e-01  1.719e-01   3.363 0.000772 ***
## oct                7.649e-01  1.292e-01   5.921 3.19e-09 ***
## Monday            -6.700e-02  6.822e-02  -0.982 0.326059    
## Tuesday            1.041e-01  6.779e-02   1.535 0.124714    
## Wednesday          1.619e-01  6.811e-02   2.377 0.017444 *  
## Thursday           8.582e-02  6.629e-02   1.295 0.195442    
## Duration           4.753e-03  7.608e-05  62.467  < 2e-16 ***
## Campaign          -3.451e-02  1.172e-02  -2.945 0.003229 ** 
## emp.var.rate      -2.771e+00  1.567e-01 -17.678  < 2e-16 ***
## cons.price.idx     3.854e+00  2.766e-01  13.932  < 2e-16 ***
## cons.conf.idx     -5.707e-02  1.054e-02  -5.415 6.11e-08 ***
## euribor3m          9.922e-01  1.408e-01   7.045 1.85e-12 ***
## nr.employed       -1.412e-03  3.159e-03  -0.447 0.654973    
## previous           1.224e-01  3.830e-02   3.197 0.001390 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 26173  on 39999  degrees of freedom
## Residual deviance: 16065  on 39960  degrees of freedom
## AIC: 16145
## 
## Number of Fisher Scoring iterations: 6
vif(iteration7)
##               Age         housemaid          services             Admin 
##          2.107506          1.694202          3.270716          6.584982 
##       blue.collar        technician        management        unemployed 
##          5.388995          4.909448          2.932368          1.781019 
##           retired           student      entrepreneur           Marital 
##          3.534897          2.585975          1.833974          1.386768 
##             Basic       High.School University.degree    Credit.default 
##          3.184347          2.979485          3.189946          1.134886 
##      Housing.Loan      Persona.Loan      Contact.Type               mar 
##          1.011142          1.003787          2.325672          2.502671 
##               apr               may               jun               jul 
##          5.095600          5.278746          8.125101          3.342961 
##               aug               sep               oct            Monday 
##          6.182014          1.495800          1.649216          1.680410 
##           Tuesday         Wednesday          Thursday          Duration 
##          1.714373          1.690382          1.730033          1.255727 
##          Campaign      emp.var.rate    cons.price.idx     cons.conf.idx 
##          1.055949        174.940272         65.724224          9.528612 
##         euribor3m       nr.employed          previous 
##        151.121193        139.380641          1.312375
anova(object=iteration7,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Term.Deposit
## 
## Terms added sequentially (first to last)
## 
## 
##                   Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                              39999      26173              
## Age                1     19.9     39998      26153 8.275e-06 ***
## housemaid          1      2.7     39997      26151 0.0982980 .  
## services           1     32.5     39996      26118 1.177e-08 ***
## Admin              1     23.9     39995      26094 1.003e-06 ***
## blue.collar        1    185.0     39994      25909 < 2.2e-16 ***
## technician         1     55.1     39993      25854 1.124e-13 ***
## management         1     40.8     39992      25813 1.648e-10 ***
## unemployed         1      5.2     39991      25808 0.0232188 *  
## retired            1     68.1     39990      25740 < 2.2e-16 ***
## student            1    117.4     39989      25622 < 2.2e-16 ***
## entrepreneur       1      3.6     39988      25619 0.0582785 .  
## Marital            1     76.6     39987      25542 < 2.2e-16 ***
## Basic              1     18.0     39986      25524 2.196e-05 ***
## High.School        1     11.6     39985      25513 0.0006746 ***
## University.degree  1      6.1     39984      25507 0.0133740 *  
## Credit.default     1    286.5     39983      25220 < 2.2e-16 ***
## Housing.Loan       1      2.9     39982      25217 0.0913077 .  
## Persona.Loan       1      1.6     39981      25216 0.2105271    
## Contact.Type       1    545.7     39980      24670 < 2.2e-16 ***
## mar                1    378.8     39979      24291 < 2.2e-16 ***
## apr                1    154.3     39978      24137 < 2.2e-16 ***
## may                1     16.3     39977      24121 5.361e-05 ***
## jun                1    188.4     39976      23932 < 2.2e-16 ***
## jul                1     58.8     39975      23873 1.720e-14 ***
## aug                1    120.2     39974      23753 < 2.2e-16 ***
## sep                1     65.4     39973      23688 6.059e-16 ***
## oct                1    267.0     39972      23421 < 2.2e-16 ***
## Monday             1     20.1     39971      23401 7.510e-06 ***
## Tuesday            1      0.6     39970      23400 0.4550487    
## Wednesday          1      4.7     39969      23396 0.0310500 *  
## Thursday           1      2.4     39968      23393 0.1197521    
## Duration           1   5226.5     39967      18167 < 2.2e-16 ***
## Campaign           1     45.7     39966      18121 1.411e-11 ***
## emp.var.rate       1   1438.2     39965      16683 < 2.2e-16 ***
## cons.price.idx     1    496.9     39964      16186 < 2.2e-16 ***
## cons.conf.idx      1     10.3     39963      16176 0.0013171 ** 
## euribor3m          1     99.8     39962      16076 < 2.2e-16 ***
## nr.employed        1      0.1     39961      16076 0.7008176    
## previous           1     10.2     39960      16065 0.0014018 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Let us Remove the Variables with insignificant P-Values
iteration8<-glm(Term.Deposit~retired+student+Marital+Credit.default+Housing.Loan+Contact.Type+mar+apr+may+jun+jul+aug+sep+oct+Wednesday+Duration+Campaign+emp.var.rate+cons.price.idx+cons.conf.idx+euribor3m+previous,data=train_data,family=binomial(logit))
summary(iteration8)
## 
## Call:
## glm(formula = Term.Deposit ~ retired + student + Marital + Credit.default + 
##     Housing.Loan + Contact.Type + mar + apr + may + jun + jul + 
##     aug + sep + oct + Wednesday + Duration + Campaign + emp.var.rate + 
##     cons.price.idx + cons.conf.idx + euribor3m + previous, family = binomial(logit), 
##     data = train_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -6.1001  -0.2923  -0.1842  -0.1350   3.7128  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -3.842e+02  1.796e+01 -21.387  < 2e-16 ***
## retired         3.394e-01  8.410e-02   4.036 5.44e-05 ***
## student         2.260e-01  9.955e-02   2.270 0.023213 *  
## Marital        -1.557e-01  4.676e-02  -3.330 0.000869 ***
## Credit.default -3.151e-01  6.598e-02  -4.775 1.80e-06 ***
## Housing.Loan   -8.175e-03  4.213e-02  -0.194 0.846154    
## Contact.Type   -4.608e-01  7.646e-02  -6.026 1.68e-09 ***
## mar             1.917e+00  1.451e-01  13.215  < 2e-16 ***
## apr            -4.506e-01  1.299e-01  -3.468 0.000525 ***
## may            -5.304e-01  1.103e-01  -4.809 1.52e-06 ***
## jun            -1.402e+00  1.426e-01  -9.836  < 2e-16 ***
## jul             5.147e-01  1.035e-01   4.975 6.53e-07 ***
## aug             2.636e+00  1.489e-01  17.702  < 2e-16 ***
## sep             6.165e-01  1.620e-01   3.806 0.000141 ***
## oct             7.703e-01  1.278e-01   6.026 1.69e-09 ***
## Wednesday       1.252e-01  5.268e-02   2.377 0.017445 *  
## Duration        4.740e-03  7.579e-05  62.539  < 2e-16 ***
## Campaign       -3.517e-02  1.172e-02  -3.000 0.002697 ** 
## emp.var.rate   -2.844e+00  1.394e-01 -20.399  < 2e-16 ***
## cons.price.idx  4.005e+00  1.876e-01  21.348  < 2e-16 ***
## cons.conf.idx  -5.267e-02  7.756e-03  -6.791 1.11e-11 ***
## euribor3m       9.659e-01  9.147e-02  10.559  < 2e-16 ***
## previous        1.209e-01  3.816e-02   3.169 0.001528 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 26173  on 39999  degrees of freedom
## Residual deviance: 16108  on 39977  degrees of freedom
## AIC: 16154
## 
## Number of Fisher Scoring iterations: 6
vif(iteration8)
##        retired        student        Marital Credit.default   Housing.Loan 
##       1.093873       1.101369       1.125613       1.086046       1.007248 
##   Contact.Type            mar            apr            may            jun 
##       2.159618       2.364508       4.587985       5.123953       5.438998 
##            jul            aug            sep            oct      Wednesday 
##       2.739731       5.988754       1.333275       1.622380       1.012711 
##       Duration       Campaign   emp.var.rate cons.price.idx  cons.conf.idx 
##       1.249779       1.051470     138.753046      30.478336       5.181194 
##      euribor3m       previous 
##      63.901680       1.308782
anova(object=iteration8,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Term.Deposit
## 
## Terms added sequentially (first to last)
## 
## 
##                Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                           39999      26173              
## retired         1    203.2     39998      25970 < 2.2e-16 ***
## student         1    172.6     39997      25797 < 2.2e-16 ***
## Marital         1     91.8     39996      25706 < 2.2e-16 ***
## Credit.default  1    333.9     39995      25372 < 2.2e-16 ***
## Housing.Loan    1      3.5     39994      25368  0.061660 .  
## Contact.Type    1    583.3     39993      24785 < 2.2e-16 ***
## mar             1    395.6     39992      24389 < 2.2e-16 ***
## apr             1    151.5     39991      24238 < 2.2e-16 ***
## may             1     28.8     39990      24209 7.876e-08 ***
## jun             1    179.9     39989      24029 < 2.2e-16 ***
## jul             1     80.0     39988      23949 < 2.2e-16 ***
## aug             1    113.5     39987      23835 < 2.2e-16 ***
## sep             1     72.0     39986      23763 < 2.2e-16 ***
## oct             1    273.6     39985      23490 < 2.2e-16 ***
## Wednesday       1      7.5     39984      23482  0.006180 ** 
## Duration        1   5198.6     39983      18284 < 2.2e-16 ***
## Campaign        1     45.6     39982      18238 1.436e-11 ***
## emp.var.rate    1   1482.9     39981      16755 < 2.2e-16 ***
## cons.price.idx  1    523.3     39980      16232 < 2.2e-16 ***
## cons.conf.idx   1      8.8     39979      16223  0.002972 ** 
## euribor3m       1    105.4     39978      16118 < 2.2e-16 ***
## previous        1     10.0     39977      16108  0.001542 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
iteration9<-glm(Term.Deposit~retired+student+Marital+Credit.default+Contact.Type+mar+apr+may+jun+jul+aug+sep+oct+Wednesday+Duration+Campaign+emp.var.rate+cons.price.idx+cons.conf.idx+euribor3m+previous,data=train_data,family=binomial(logit))
summary(iteration9)
## 
## Call:
## glm(formula = Term.Deposit ~ retired + student + Marital + Credit.default + 
##     Contact.Type + mar + apr + may + jun + jul + aug + sep + 
##     oct + Wednesday + Duration + Campaign + emp.var.rate + cons.price.idx + 
##     cons.conf.idx + euribor3m + previous, family = binomial(logit), 
##     data = train_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -6.1008  -0.2922  -0.1842  -0.1350   3.7118  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -3.842e+02  1.796e+01 -21.385  < 2e-16 ***
## retired         3.396e-01  8.410e-02   4.038 5.39e-05 ***
## student         2.262e-01  9.955e-02   2.272 0.023087 *  
## Marital        -1.556e-01  4.676e-02  -3.329 0.000873 ***
## Credit.default -3.150e-01  6.598e-02  -4.774 1.80e-06 ***
## Contact.Type   -4.604e-01  7.644e-02  -6.024 1.70e-09 ***
## mar             1.917e+00  1.451e-01  13.216  < 2e-16 ***
## apr            -4.503e-01  1.299e-01  -3.466 0.000528 ***
## may            -5.301e-01  1.103e-01  -4.807 1.53e-06 ***
## jun            -1.402e+00  1.425e-01  -9.834  < 2e-16 ***
## jul             5.150e-01  1.034e-01   4.978 6.41e-07 ***
## aug             2.636e+00  1.489e-01  17.701  < 2e-16 ***
## sep             6.161e-01  1.620e-01   3.804 0.000142 ***
## oct             7.705e-01  1.278e-01   6.027 1.67e-09 ***
## Wednesday       1.252e-01  5.268e-02   2.377 0.017464 *  
## Duration        4.740e-03  7.578e-05  62.544  < 2e-16 ***
## Campaign       -3.517e-02  1.172e-02  -3.001 0.002694 ** 
## emp.var.rate   -2.844e+00  1.394e-01 -20.398  < 2e-16 ***
## cons.price.idx  4.005e+00  1.876e-01  21.346  < 2e-16 ***
## cons.conf.idx  -5.264e-02  7.754e-03  -6.788 1.13e-11 ***
## euribor3m       9.658e-01  9.147e-02  10.559  < 2e-16 ***
## previous        1.211e-01  3.815e-02   3.173 0.001509 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 26173  on 39999  degrees of freedom
## Residual deviance: 16108  on 39978  degrees of freedom
## AIC: 16152
## 
## Number of Fisher Scoring iterations: 6
vif(iteration9)
##        retired        student        Marital Credit.default   Contact.Type 
##       1.093752       1.101253       1.125567       1.086047       2.158255 
##            mar            apr            may            jun            jul 
##       2.364445       4.587293       5.122649       5.436576       2.739101 
##            aug            sep            oct      Wednesday       Duration 
##       5.988899       1.333151       1.622316       1.012701       1.249654 
##       Campaign   emp.var.rate cons.price.idx  cons.conf.idx      euribor3m 
##       1.051460     138.746810      30.476720       5.178905      63.899553 
##       previous 
##       1.308420
anova(object=iteration9,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Term.Deposit
## 
## Terms added sequentially (first to last)
## 
## 
##                Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                           39999      26173              
## retired         1    203.2     39998      25970 < 2.2e-16 ***
## student         1    172.6     39997      25797 < 2.2e-16 ***
## Marital         1     91.8     39996      25706 < 2.2e-16 ***
## Credit.default  1    333.9     39995      25372 < 2.2e-16 ***
## Contact.Type    1    586.8     39994      24785 < 2.2e-16 ***
## mar             1    395.6     39993      24389 < 2.2e-16 ***
## apr             1    151.4     39992      24238 < 2.2e-16 ***
## may             1     28.9     39991      24209 7.712e-08 ***
## jun             1    180.0     39990      24029 < 2.2e-16 ***
## jul             1     79.9     39989      23949 < 2.2e-16 ***
## aug             1    113.4     39988      23836 < 2.2e-16 ***
## sep             1     71.9     39987      23764 < 2.2e-16 ***
## oct             1    273.8     39986      23490 < 2.2e-16 ***
## Wednesday       1      7.5     39985      23482  0.006215 ** 
## Duration        1   5198.6     39984      18284 < 2.2e-16 ***
## Campaign        1     45.6     39983      18238 1.439e-11 ***
## emp.var.rate    1   1483.0     39982      16755 < 2.2e-16 ***
## cons.price.idx  1    523.3     39981      16232 < 2.2e-16 ***
## cons.conf.idx   1      8.8     39980      16223  0.003016 ** 
## euribor3m       1    105.4     39979      16118 < 2.2e-16 ***
## previous        1     10.0     39978      16108  0.001524 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
iteration10<-glm(Term.Deposit~retired+student+Marital+Credit.default+Contact.Type+mar+jul+aug+sep+oct+Wednesday+Duration+Campaign+previous,data=train_data,family=binomial(logit))
summary(iteration10)
## 
## Call:
## glm(formula = Term.Deposit ~ retired + student + Marital + Credit.default + 
##     Contact.Type + mar + jul + aug + sep + oct + Wednesday + 
##     Duration + Campaign + previous, family = binomial(logit), 
##     data = train_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.7912  -0.3683  -0.2505  -0.1677   3.0608  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -3.241e+00  5.674e-02 -57.118  < 2e-16 ***
## retired         9.510e-01  7.889e-02  12.056  < 2e-16 ***
## student         8.502e-01  9.404e-02   9.041  < 2e-16 ***
## Marital        -2.371e-01  4.403e-02  -5.386  7.2e-08 ***
## Credit.default -6.887e-01  6.243e-02 -11.032  < 2e-16 ***
## Contact.Type   -1.029e+00  5.424e-02 -18.974  < 2e-16 ***
## mar             2.205e+00  9.935e-02  22.189  < 2e-16 ***
## jul            -6.843e-01  6.573e-02 -10.412  < 2e-16 ***
## aug            -1.485e-01  6.047e-02  -2.456    0.014 *  
## sep             1.461e+00  1.445e-01  10.109  < 2e-16 ***
## oct             1.790e+00  1.066e-01  16.794  < 2e-16 ***
## Wednesday       3.271e-02  4.945e-02   0.662    0.508    
## Duration        4.292e-03  6.879e-05  62.385  < 2e-16 ***
## Campaign       -6.705e-02  1.147e-02  -5.847  5.0e-09 ***
## previous        6.557e-01  3.508e-02  18.690  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 26173  on 39999  degrees of freedom
## Residual deviance: 18249  on 39985  degrees of freedom
## AIC: 18279
## 
## Number of Fisher Scoring iterations: 6
vif(iteration10)
##        retired        student        Marital Credit.default   Contact.Type 
##       1.054572       1.071277       1.106137       1.045623       1.207102 
##            mar            jul            aug            sep            oct 
##       1.054348       1.184041       1.161304       1.027613       1.048630 
##      Wednesday       Duration       Campaign       previous 
##       1.003463       1.116754       1.034729       1.144899
anova(object=iteration10,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Term.Deposit
## 
## Terms added sequentially (first to last)
## 
## 
##                Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                           39999      26173              
## retired         1    203.2     39998      25970 < 2.2e-16 ***
## student         1    172.6     39997      25797 < 2.2e-16 ***
## Marital         1     91.8     39996      25706 < 2.2e-16 ***
## Credit.default  1    333.9     39995      25372 < 2.2e-16 ***
## Contact.Type    1    586.8     39994      24785 < 2.2e-16 ***
## mar             1    395.6     39993      24389 < 2.2e-16 ***
## jul             1    144.8     39992      24244 < 2.2e-16 ***
## aug             1    129.7     39991      24115 < 2.2e-16 ***
## sep             1     75.6     39990      24039 < 2.2e-16 ***
## oct             1    239.6     39989      23800 < 2.2e-16 ***
## Wednesday       1      3.2     39988      23796   0.07395 .  
## Duration        1   5161.8     39987      18634 < 2.2e-16 ***
## Campaign        1     44.7     39986      18590 2.269e-11 ***
## previous        1    340.3     39985      18250 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
iteration11<-glm(Term.Deposit~retired+student+Marital+Credit.default+Contact.Type+mar+apr+may+jun+jul+aug+sep+oct+Wednesday+Duration+Campaign+cons.conf.idx+previous,data=train_data,family=binomial(logit))
summary(iteration11)
## 
## Call:
## glm(formula = Term.Deposit ~ retired + student + Marital + Credit.default + 
##     Contact.Type + mar + apr + may + jun + jul + aug + sep + 
##     oct + Wednesday + Duration + Campaign + cons.conf.idx + previous, 
##     family = binomial(logit), data = train_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.6387  -0.3363  -0.2341  -0.1640   3.4295  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     1.764e+00  2.574e-01   6.853 7.24e-12 ***
## retired         7.058e-01  8.165e-02   8.645  < 2e-16 ***
## student         6.325e-01  9.669e-02   6.542 6.08e-11 ***
## Marital        -2.155e-01  4.524e-02  -4.764 1.90e-06 ***
## Credit.default -6.039e-01  6.346e-02  -9.517  < 2e-16 ***
## Contact.Type   -1.843e+00  7.233e-02 -25.474  < 2e-16 ***
## mar             2.988e+00  1.281e-01  23.326  < 2e-16 ***
## apr             1.575e+00  9.394e-02  16.770  < 2e-16 ***
## may             4.579e-01  8.261e-02   5.543 2.97e-08 ***
## jun             1.604e+00  9.352e-02  17.152  < 2e-16 ***
## jul            -1.831e-01  8.668e-02  -2.112 0.034647 *  
## aug            -5.646e-01  8.473e-02  -6.663 2.68e-11 ***
## sep             5.385e-01  1.616e-01   3.332 0.000862 ***
## oct             5.948e-01  1.395e-01   4.263 2.01e-05 ***
## Wednesday       9.707e-02  5.072e-02   1.914 0.055669 .  
## Duration        4.385e-03  7.046e-05  62.243  < 2e-16 ***
## Campaign       -5.596e-02  1.152e-02  -4.858 1.19e-06 ***
## cons.conf.idx   1.318e-01  6.292e-03  20.947  < 2e-16 ***
## previous        4.663e-01  3.735e-02  12.485  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 26173  on 39999  degrees of freedom
## Residual deviance: 17454  on 39981  degrees of freedom
## AIC: 17492
## 
## Number of Fisher Scoring iterations: 6
vif(iteration11)
##        retired        student        Marital Credit.default   Contact.Type 
##       1.070323       1.080880       1.112779       1.058664       2.099328 
##            mar            apr            may            jun            jul 
##       1.374010       2.399577       3.156431       2.530315       2.011731 
##            aug            sep            oct      Wednesday       Duration 
##       2.234660       1.296207       1.688622       1.011577       1.134212 
##       Campaign  cons.conf.idx       previous 
##       1.039015       3.154452       1.218036
anova(object=iteration9,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Term.Deposit
## 
## Terms added sequentially (first to last)
## 
## 
##                Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                           39999      26173              
## retired         1    203.2     39998      25970 < 2.2e-16 ***
## student         1    172.6     39997      25797 < 2.2e-16 ***
## Marital         1     91.8     39996      25706 < 2.2e-16 ***
## Credit.default  1    333.9     39995      25372 < 2.2e-16 ***
## Contact.Type    1    586.8     39994      24785 < 2.2e-16 ***
## mar             1    395.6     39993      24389 < 2.2e-16 ***
## apr             1    151.4     39992      24238 < 2.2e-16 ***
## may             1     28.9     39991      24209 7.712e-08 ***
## jun             1    180.0     39990      24029 < 2.2e-16 ***
## jul             1     79.9     39989      23949 < 2.2e-16 ***
## aug             1    113.4     39988      23836 < 2.2e-16 ***
## sep             1     71.9     39987      23764 < 2.2e-16 ***
## oct             1    273.8     39986      23490 < 2.2e-16 ***
## Wednesday       1      7.5     39985      23482  0.006215 ** 
## Duration        1   5198.6     39984      18284 < 2.2e-16 ***
## Campaign        1     45.6     39983      18238 1.439e-11 ***
## emp.var.rate    1   1483.0     39982      16755 < 2.2e-16 ***
## cons.price.idx  1    523.3     39981      16232 < 2.2e-16 ***
## cons.conf.idx   1      8.8     39980      16223  0.003016 ** 
## euribor3m       1    105.4     39979      16118 < 2.2e-16 ***
## previous        1     10.0     39978      16108  0.001524 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

CONCORDANCE

# Assuming the input is a stored binomial GLM object
Concordance = function(GLM.binomial) {
  outcome_and_fitted_col = cbind(GLM.binomial$y, GLM.binomial$fitted.values)
  # get a subset of outcomes where the event actually happened
  ones = outcome_and_fitted_col[outcome_and_fitted_col[,1] == 1,]
  # get a subset of outcomes where the event didn't actually happen
  zeros = outcome_and_fitted_col[outcome_and_fitted_col[,1] == 0,]
  # Equate the length of the event and non-event tables
  if (length(ones[,1])>length(zeros[,1])) {ones = ones[1:length(zeros[,1]),]}
  else {zeros = zeros[1:length(ones[,1]),]}
  # Following will be c(ones_outcome, ones_fitted, zeros_outcome, zeros_fitted)
  ones_and_zeros = data.frame(ones, zeros)
  # initiate columns to store concordant, discordant, and tie pair evaluations
  conc = rep(NA, length(ones_and_zeros[,1]))
  disc = rep(NA, length(ones_and_zeros[,1]))
  ties = rep(NA, length(ones_and_zeros[,1]))
  for (i in 1:length(ones_and_zeros[,1])) {
    # This tests for concordance
    if (ones_and_zeros[i,2] > ones_and_zeros[i,4])
    {conc[i] = 1
     disc[i] = 0
     ties[i] = 0}
    # This tests for a tie
    else if (ones_and_zeros[i,2] == ones_and_zeros[i,4])
    {
      conc[i] = 0
      disc[i] = 0
      ties[i] = 1
    }
    # This should catch discordant pairs.
    else if (ones_and_zeros[i,2] < ones_and_zeros[i,4])
    {
      conc[i] = 0
      disc[i] = 1
      ties[i] = 0
    }
  }
  # Here we save the various rates
  conc_rate = mean(conc, na.rm=TRUE)
  disc_rate = mean(disc, na.rm=TRUE)
  tie_rate = mean(ties, na.rm=TRUE)
  return(list(concordance=conc_rate, num_concordant=sum(conc), discordance=disc_rate, num_discordant=sum(disc), tie_rate=tie_rate,num_tied=sum(ties)))
  
}
Concordance(iteration7)
## $concordance
## [1] 0.9809312
## 
## $num_concordant
## [1] 3961
## 
## $discordance
## [1] 0.01906885
## 
## $num_discordant
## [1] 77
## 
## $tie_rate
## [1] 0
## 
## $num_tied
## [1] 0
Concordance(iteration8)
## $concordance
## [1] 0.9809312
## 
## $num_concordant
## [1] 3961
## 
## $discordance
## [1] 0.01906885
## 
## $num_discordant
## [1] 77
## 
## $tie_rate
## [1] 0
## 
## $num_tied
## [1] 0
Concordance(iteration9)
## $concordance
## [1] 0.9806835
## 
## $num_concordant
## [1] 3960
## 
## $discordance
## [1] 0.01931649
## 
## $num_discordant
## [1] 78
## 
## $tie_rate
## [1] 0
## 
## $num_tied
## [1] 0
Concordance(iteration10)
## $concordance
## [1] 0.9551758
## 
## $num_concordant
## [1] 3857
## 
## $discordance
## [1] 0.04482417
## 
## $num_discordant
## [1] 181
## 
## $tie_rate
## [1] 0
## 
## $num_tied
## [1] 0
Concordance(iteration11)
## $concordance
## [1] 0.957157
## 
## $num_concordant
## [1] 3865
## 
## $discordance
## [1] 0.04284299
## 
## $num_discordant
## [1] 173
## 
## $tie_rate
## [1] 0
## 
## $num_tied
## [1] 0

The Best Model is iteration 11 Since there is no Multi-colinearity, Low AIC value and very good Concordance and the errors are normally distributed

iteration11<-glm(Term.Deposit~retired+student+Marital+Credit.default+Contact.Type+mar+apr+may+jun+jul+aug+sep+oct+Wednesday+Duration+Campaign+cons.conf.idx+previous,data=train_data,family=binomial(logit))
summary(iteration11)
## 
## Call:
## glm(formula = Term.Deposit ~ retired + student + Marital + Credit.default + 
##     Contact.Type + mar + apr + may + jun + jul + aug + sep + 
##     oct + Wednesday + Duration + Campaign + cons.conf.idx + previous, 
##     family = binomial(logit), data = train_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.6387  -0.3363  -0.2341  -0.1640   3.4295  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     1.764e+00  2.574e-01   6.853 7.24e-12 ***
## retired         7.058e-01  8.165e-02   8.645  < 2e-16 ***
## student         6.325e-01  9.669e-02   6.542 6.08e-11 ***
## Marital        -2.155e-01  4.524e-02  -4.764 1.90e-06 ***
## Credit.default -6.039e-01  6.346e-02  -9.517  < 2e-16 ***
## Contact.Type   -1.843e+00  7.233e-02 -25.474  < 2e-16 ***
## mar             2.988e+00  1.281e-01  23.326  < 2e-16 ***
## apr             1.575e+00  9.394e-02  16.770  < 2e-16 ***
## may             4.579e-01  8.261e-02   5.543 2.97e-08 ***
## jun             1.604e+00  9.352e-02  17.152  < 2e-16 ***
## jul            -1.831e-01  8.668e-02  -2.112 0.034647 *  
## aug            -5.646e-01  8.473e-02  -6.663 2.68e-11 ***
## sep             5.385e-01  1.616e-01   3.332 0.000862 ***
## oct             5.948e-01  1.395e-01   4.263 2.01e-05 ***
## Wednesday       9.707e-02  5.072e-02   1.914 0.055669 .  
## Duration        4.385e-03  7.046e-05  62.243  < 2e-16 ***
## Campaign       -5.596e-02  1.152e-02  -4.858 1.19e-06 ***
## cons.conf.idx   1.318e-01  6.292e-03  20.947  < 2e-16 ***
## previous        4.663e-01  3.735e-02  12.485  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 26173  on 39999  degrees of freedom
## Residual deviance: 17454  on 39981  degrees of freedom
## AIC: 17492
## 
## Number of Fisher Scoring iterations: 6
vif(iteration11)
##        retired        student        Marital Credit.default   Contact.Type 
##       1.070323       1.080880       1.112779       1.058664       2.099328 
##            mar            apr            may            jun            jul 
##       1.374010       2.399577       3.156431       2.530315       2.011731 
##            aug            sep            oct      Wednesday       Duration 
##       2.234660       1.296207       1.688622       1.011577       1.134212 
##       Campaign  cons.conf.idx       previous 
##       1.039015       3.154452       1.218036
anova(object=iteration9,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Term.Deposit
## 
## Terms added sequentially (first to last)
## 
## 
##                Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                           39999      26173              
## retired         1    203.2     39998      25970 < 2.2e-16 ***
## student         1    172.6     39997      25797 < 2.2e-16 ***
## Marital         1     91.8     39996      25706 < 2.2e-16 ***
## Credit.default  1    333.9     39995      25372 < 2.2e-16 ***
## Contact.Type    1    586.8     39994      24785 < 2.2e-16 ***
## mar             1    395.6     39993      24389 < 2.2e-16 ***
## apr             1    151.4     39992      24238 < 2.2e-16 ***
## may             1     28.9     39991      24209 7.712e-08 ***
## jun             1    180.0     39990      24029 < 2.2e-16 ***
## jul             1     79.9     39989      23949 < 2.2e-16 ***
## aug             1    113.4     39988      23836 < 2.2e-16 ***
## sep             1     71.9     39987      23764 < 2.2e-16 ***
## oct             1    273.8     39986      23490 < 2.2e-16 ***
## Wednesday       1      7.5     39985      23482  0.006215 ** 
## Duration        1   5198.6     39984      18284 < 2.2e-16 ***
## Campaign        1     45.6     39983      18238 1.439e-11 ***
## emp.var.rate    1   1483.0     39982      16755 < 2.2e-16 ***
## cons.price.idx  1    523.3     39981      16232 < 2.2e-16 ***
## cons.conf.idx   1      8.8     39980      16223  0.003016 ** 
## euribor3m       1    105.4     39979      16118 < 2.2e-16 ***
## previous        1     10.0     39978      16108  0.001524 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Concordance(iteration11)
## $concordance
## [1] 0.957157
## 
## $num_concordant
## [1] 3865
## 
## $discordance
## [1] 0.04284299
## 
## $num_discordant
## [1] 173
## 
## $tie_rate
## [1] 0
## 
## $num_tied
## [1] 0