## Loading required package: carData
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units

Read Dataset

cellphonedata<-read.csv("Dataset_Cellphone.csv", header = TRUE)
str(cellphonedata)
## 'data.frame':    3333 obs. of  11 variables:
##  $ Churn          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ AccountWeeks   : int  128 107 137 84 75 118 121 147 117 141 ...
##  $ ContractRenewal: int  1 1 1 0 0 0 1 0 1 0 ...
##  $ DataPlan       : int  1 1 0 0 0 0 1 0 0 1 ...
##  $ DataUsage      : num  2.7 3.7 0 0 0 0 2.03 0 0.19 3.02 ...
##  $ CustServCalls  : int  1 1 0 2 3 0 3 0 1 0 ...
##  $ DayMins        : num  265 162 243 299 167 ...
##  $ DayCalls       : int  110 123 114 71 113 98 88 79 97 84 ...
##  $ MonthlyCharge  : num  89 82 52 57 41 57 87.3 36 63.9 93.2 ...
##  $ OverageFee     : num  9.87 9.78 6.06 3.1 7.42 ...
##  $ RoamMins       : num  10 13.7 12.2 6.6 10.1 6.3 7.5 7.1 8.7 11.2 ...
##Convert the Dependent variable and 2 other predicors into factor
cellphonedata$Churn<-factor(cellphonedata$Churn)
##cellphonedata$ContractRenewal<-factor(cellphonedata$ContractRenewal)
##cellphonedata$DataPlan<-factor(cellphonedata$DataPlan)
##Split Data into Train and test
library(caret)
## 
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
## 
##     cluster
set.seed(101)
spindex<-createDataPartition(cellphonedata$Churn, p=0.7, list = FALSE)
cellphonetrain<-cellphonedata[spindex,]
cellphonetest<-cellphonedata[-spindex,]
LogRegModel<-glm(Churn~., data = cellphonetrain, family = binomial(link = 'logit'))
summary(LogRegModel)
## 
## Call:
## glm(formula = Churn ~ ., family = binomial(link = "logit"), data = cellphonetrain)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0101  -0.5135  -0.3485  -0.2039   3.0493  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -6.456862   0.662712  -9.743  < 2e-16 ***
## AccountWeeks     0.001063   0.001667   0.638  0.52370    
## ContractRenewal -1.919653   0.171185 -11.214  < 2e-16 ***
## DataPlan        -1.424391   0.672065  -2.119  0.03405 *  
## DataUsage       -0.397500   2.307736  -0.172  0.86324    
## CustServCalls    0.509929   0.046259  11.023  < 2e-16 ***
## DayMins          0.003815   0.038976   0.098  0.92203    
## DayCalls         0.006983   0.003265   2.139  0.03246 *  
## MonthlyCharge    0.051165   0.229109   0.223  0.82328    
## OverageFee       0.063334   0.390369   0.162  0.87112    
## RoamMins         0.078190   0.026144   2.991  0.00278 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1934.3  on 2333  degrees of freedom
## Residual deviance: 1532.5  on 2323  degrees of freedom
## AIC: 1554.5
## 
## Number of Fisher Scoring iterations: 6
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
##Log Likelihood Test
lrtest(LogRegModel)
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
##Pseudo RSquare Test
pR2(LogRegModel)
##          llh      llhNull           G2     McFadden         r2ML 
## -766.2572201 -967.1400908  401.7657413    0.2077081    0.1581354 
##         r2CU 
##    0.2806800
##Predict the outcome
predictprob<-predict(LogRegModel,cellphonetest[,2:11], type="response")
predictedresponse<-ifelse(predictprob>0.5,1,0)
predictedresponse<-as.factor(predictedresponse)
##Confusion Matrix
confusionMatrix(predictedresponse,cellphonetest$Churn)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 840 115
##          1  15  29
##                                           
##                Accuracy : 0.8699          
##                  95% CI : (0.8474, 0.8901)
##     No Information Rate : 0.8559          
##     P-Value [Acc > NIR] : 0.1109          
##                                           
##                   Kappa : 0.2585          
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.9825          
##             Specificity : 0.2014          
##          Pos Pred Value : 0.8796          
##          Neg Pred Value : 0.6591          
##              Prevalence : 0.8559          
##          Detection Rate : 0.8408          
##    Detection Prevalence : 0.9560          
##       Balanced Accuracy : 0.5919          
##                                           
##        'Positive' Class : 0               
## 
oddModel<-exp(coef(LogRegModel))
print(oddModel)
##     (Intercept)    AccountWeeks ContractRenewal        DataPlan 
##     0.001569714     1.001063383     0.146657862     0.240654971 
##       DataUsage   CustServCalls         DayMins        DayCalls 
##     0.671998121     1.665172875     1.003821928     1.007007090 
##   MonthlyCharge      OverageFee        RoamMins 
##     1.052497001     1.065382434     1.081327662
write.csv(file = "CellPhoneOdds.csv", oddModel)