BINARY LOGISTIC REGRESSION
library(readr)
telecom_churn <- read_csv("C:/Users/USER/Desktop/telecom_churn.csv")
## Rows: 3333 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (11): Churn, AccountWeeks, ContractRenewal, DataPlan, DataUsage, CustSer...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
churn<-telecom_churn
churn$Churn<-as.factor(churn$Churn)
names(churn)
## [1] "Churn" "AccountWeeks" "ContractRenewal" "DataPlan"
## [5] "DataUsage" "CustServCalls" "DayMins" "DayCalls"
## [9] "MonthlyCharge" "OverageFee" "RoamMins"
str(churn)
## spc_tbl_ [3,333 × 11] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Churn : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ AccountWeeks : num [1:3333] 128 107 137 84 75 118 121 147 117 141 ...
## $ ContractRenewal: num [1:3333] 1 1 1 0 0 0 1 0 1 0 ...
## $ DataPlan : num [1:3333] 1 1 0 0 0 0 1 0 0 1 ...
## $ DataUsage : num [1:3333] 2.7 3.7 0 0 0 0 2.03 0 0.19 3.02 ...
## $ CustServCalls : num [1:3333] 1 1 0 2 3 0 3 0 1 0 ...
## $ DayMins : num [1:3333] 265 162 243 299 167 ...
## $ DayCalls : num [1:3333] 110 123 114 71 113 98 88 79 97 84 ...
## $ MonthlyCharge : num [1:3333] 89 82 52 57 41 57 87.3 36 63.9 93.2 ...
## $ OverageFee : num [1:3333] 9.87 9.78 6.06 3.1 7.42 ...
## $ RoamMins : num [1:3333] 10 13.7 12.2 6.6 10.1 6.3 7.5 7.1 8.7 11.2 ...
## - attr(*, "spec")=
## .. cols(
## .. Churn = col_double(),
## .. AccountWeeks = col_double(),
## .. ContractRenewal = col_double(),
## .. DataPlan = col_double(),
## .. DataUsage = col_double(),
## .. CustServCalls = col_double(),
## .. DayMins = col_double(),
## .. DayCalls = col_double(),
## .. MonthlyCharge = col_double(),
## .. OverageFee = col_double(),
## .. RoamMins = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
head(churn)
## # A tibble: 6 × 11
## Churn AccountWeeks ContractRenewal DataPlan DataUsage CustServCalls DayMins
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 128 1 1 2.7 1 265.
## 2 0 107 1 1 3.7 1 162.
## 3 0 137 1 0 0 0 243.
## 4 0 84 0 0 0 2 299.
## 5 0 75 0 0 0 3 167.
## 6 0 118 0 0 0 0 223.
## # ℹ 4 more variables: DayCalls <dbl>, MonthlyCharge <dbl>, OverageFee <dbl>,
## # RoamMins <dbl>
DATA PARTITIONING
ind<-sample(2,nrow(churn),replace=TRUE,prob=c(0.8,0.2))
train<-churn[ind==1,]
test<-churn[ind==2,]
modelling
mymodel<-glm(Churn~MonthlyCharge+OverageFee,data=churn,family="binomial")
summary(mymodel)
##
## Call:
## glm(formula = Churn ~ MonthlyCharge + OverageFee, family = "binomial",
## data = churn)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.192084 0.246627 -12.943 < 2e-16 ***
## MonthlyCharge 0.008515 0.003046 2.795 0.00519 **
## OverageFee 0.090140 0.020466 4.404 1.06e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2758.3 on 3332 degrees of freedom
## Residual deviance: 2721.7 on 3330 degrees of freedom
## AIC: 2727.7
##
## Number of Fisher Scoring iterations: 4
PREDICTION
p1<-predict(mymodel,train,type='response')
head(p1)
## 1 2 3 4 5 6
## 0.17586805 0.16626793 0.15283956 0.08162781 0.25667352 0.19813963
head(train)
## # A tibble: 6 × 11
## Churn AccountWeeks ContractRenewal DataPlan DataUsage CustServCalls DayMins
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 128 1 1 2.7 1 265.
## 2 0 107 1 1 3.7 1 162.
## 3 0 118 0 0 0 0 223.
## 4 0 147 0 0 0 0 157
## 5 0 117 1 0 0.19 1 184.
## 6 0 141 0 1 3.02 0 259.
## # ℹ 4 more variables: DayCalls <dbl>, MonthlyCharge <dbl>, OverageFee <dbl>,
## # RoamMins <dbl>
accuracy
pred1=ifelse(p1>0.5,1,0)
table(pred1)
## pred1
## 0
## 2667
tab1<-table(predict=pred1,actual=train$Churn)
confusion matrix
1-sum(diag(tab1))/sum(tab1)
## [1] 0.1473566
test data
p2<-predict(mymodel,test,type='response')
pred2<-ifelse(p2>0.5,1,0)
tab2<-table(predicted=pred2,actual=test$Churn)
tab2
## actual
## predicted 0 1
## 0 576 90
misclassification error
1-sum(diag(tab2))/sum(tab2)
## [1] 0.1351351