BINARY LOGISTIC REGRESSION

library(readr)
telecom_churn <- read_csv("C:/Users/USER/Desktop/telecom_churn.csv")
## Rows: 3333 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (11): Churn, AccountWeeks, ContractRenewal, DataPlan, DataUsage, CustSer...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
churn<-telecom_churn
churn$Churn<-as.factor(churn$Churn)
names(churn)
##  [1] "Churn"           "AccountWeeks"    "ContractRenewal" "DataPlan"       
##  [5] "DataUsage"       "CustServCalls"   "DayMins"         "DayCalls"       
##  [9] "MonthlyCharge"   "OverageFee"      "RoamMins"
str(churn)
## spc_tbl_ [3,333 × 11] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Churn          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ AccountWeeks   : num [1:3333] 128 107 137 84 75 118 121 147 117 141 ...
##  $ ContractRenewal: num [1:3333] 1 1 1 0 0 0 1 0 1 0 ...
##  $ DataPlan       : num [1:3333] 1 1 0 0 0 0 1 0 0 1 ...
##  $ DataUsage      : num [1:3333] 2.7 3.7 0 0 0 0 2.03 0 0.19 3.02 ...
##  $ CustServCalls  : num [1:3333] 1 1 0 2 3 0 3 0 1 0 ...
##  $ DayMins        : num [1:3333] 265 162 243 299 167 ...
##  $ DayCalls       : num [1:3333] 110 123 114 71 113 98 88 79 97 84 ...
##  $ MonthlyCharge  : num [1:3333] 89 82 52 57 41 57 87.3 36 63.9 93.2 ...
##  $ OverageFee     : num [1:3333] 9.87 9.78 6.06 3.1 7.42 ...
##  $ RoamMins       : num [1:3333] 10 13.7 12.2 6.6 10.1 6.3 7.5 7.1 8.7 11.2 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Churn = col_double(),
##   ..   AccountWeeks = col_double(),
##   ..   ContractRenewal = col_double(),
##   ..   DataPlan = col_double(),
##   ..   DataUsage = col_double(),
##   ..   CustServCalls = col_double(),
##   ..   DayMins = col_double(),
##   ..   DayCalls = col_double(),
##   ..   MonthlyCharge = col_double(),
##   ..   OverageFee = col_double(),
##   ..   RoamMins = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
head(churn)
## # A tibble: 6 × 11
##   Churn AccountWeeks ContractRenewal DataPlan DataUsage CustServCalls DayMins
##   <fct>        <dbl>           <dbl>    <dbl>     <dbl>         <dbl>   <dbl>
## 1 0              128               1        1       2.7             1    265.
## 2 0              107               1        1       3.7             1    162.
## 3 0              137               1        0       0               0    243.
## 4 0               84               0        0       0               2    299.
## 5 0               75               0        0       0               3    167.
## 6 0              118               0        0       0               0    223.
## # ℹ 4 more variables: DayCalls <dbl>, MonthlyCharge <dbl>, OverageFee <dbl>,
## #   RoamMins <dbl>

DATA PARTITIONING

ind<-sample(2,nrow(churn),replace=TRUE,prob=c(0.8,0.2))
train<-churn[ind==1,]
test<-churn[ind==2,]

modelling

mymodel<-glm(Churn~MonthlyCharge+OverageFee,data=churn,family="binomial")
summary(mymodel)
## 
## Call:
## glm(formula = Churn ~ MonthlyCharge + OverageFee, family = "binomial", 
##     data = churn)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -3.192084   0.246627 -12.943  < 2e-16 ***
## MonthlyCharge  0.008515   0.003046   2.795  0.00519 ** 
## OverageFee     0.090140   0.020466   4.404 1.06e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2758.3  on 3332  degrees of freedom
## Residual deviance: 2721.7  on 3330  degrees of freedom
## AIC: 2727.7
## 
## Number of Fisher Scoring iterations: 4

PREDICTION

p1<-predict(mymodel,train,type='response')
head(p1)
##          1          2          3          4          5          6 
## 0.17586805 0.16626793 0.15283956 0.08162781 0.25667352 0.19813963
head(train)
## # A tibble: 6 × 11
##   Churn AccountWeeks ContractRenewal DataPlan DataUsage CustServCalls DayMins
##   <fct>        <dbl>           <dbl>    <dbl>     <dbl>         <dbl>   <dbl>
## 1 0              128               1        1      2.7              1    265.
## 2 0              107               1        1      3.7              1    162.
## 3 0              118               0        0      0                0    223.
## 4 0              147               0        0      0                0    157 
## 5 0              117               1        0      0.19             1    184.
## 6 0              141               0        1      3.02             0    259.
## # ℹ 4 more variables: DayCalls <dbl>, MonthlyCharge <dbl>, OverageFee <dbl>,
## #   RoamMins <dbl>

accuracy

pred1=ifelse(p1>0.5,1,0)
table(pred1)
## pred1
##    0 
## 2667
tab1<-table(predict=pred1,actual=train$Churn)

confusion matrix

1-sum(diag(tab1))/sum(tab1)
## [1] 0.1473566

test data

p2<-predict(mymodel,test,type='response')
pred2<-ifelse(p2>0.5,1,0)
tab2<-table(predicted=pred2,actual=test$Churn)
tab2
##          actual
## predicted   0   1
##         0 576  90

misclassification error

1-sum(diag(tab2))/sum(tab2)
## [1] 0.1351351