Demo LBB Classification-1

Ahmad Husain Abdullah

2019-02-21

Data Import

library(tidyverse)
datachurn <- read_csv("data_input/TelcoChurn.csv")
glimpse(datachurn)
## Observations: 7,043
## Variables: 21
## $ customerID       <chr> "7590-VHVEG", "5575-GNVDE", "3668-QPYBK", "77...
## $ gender           <chr> "Female", "Male", "Male", "Male", "Female", "...
## $ SeniorCitizen    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ Partner          <chr> "Yes", "No", "No", "No", "No", "No", "No", "N...
## $ Dependents       <chr> "No", "No", "No", "No", "No", "No", "Yes", "N...
## $ tenure           <int> 1, 34, 2, 45, 2, 8, 22, 10, 28, 62, 13, 16, 5...
## $ PhoneService     <chr> "No", "Yes", "Yes", "No", "Yes", "Yes", "Yes"...
## $ MultipleLines    <chr> "No phone service", "No", "No", "No phone ser...
## $ InternetService  <chr> "DSL", "DSL", "DSL", "DSL", "Fiber optic", "F...
## $ OnlineSecurity   <chr> "No", "Yes", "Yes", "Yes", "No", "No", "No", ...
## $ OnlineBackup     <chr> "Yes", "No", "Yes", "No", "No", "No", "Yes", ...
## $ DeviceProtection <chr> "No", "Yes", "No", "Yes", "No", "Yes", "No", ...
## $ TechSupport      <chr> "No", "No", "No", "Yes", "No", "No", "No", "N...
## $ StreamingTV      <chr> "No", "No", "No", "No", "No", "Yes", "Yes", "...
## $ StreamingMovies  <chr> "No", "No", "No", "No", "No", "Yes", "No", "N...
## $ Contract         <chr> "Month-to-month", "One year", "Month-to-month...
## $ PaperlessBilling <chr> "Yes", "No", "Yes", "No", "Yes", "Yes", "Yes"...
## $ PaymentMethod    <chr> "Electronic check", "Mailed check", "Mailed c...
## $ MonthlyCharges   <dbl> 29.85, 56.95, 53.85, 42.30, 70.70, 99.65, 89....
## $ TotalCharges     <dbl> 29.85, 1889.50, 108.15, 1840.75, 151.65, 820....
## $ Churn            <chr> "No", "No", "Yes", "No", "Yes", "Yes", "No", ...

Beberapa informasi penting Data:

SeniorCitizen : Informasi apakah pelanggan senior = 1, atau bukan = 0. Partner : Informasi apakah pelanggan punya pasangan atau tidak. tenure : Informasi lama waktu pelanggan mulai berlangganan. Churn : Informasi apakah pelanggan churned (berhenti layanan) atau tidak.

Data Manipulation

datachurn <- datachurn %>% 
  mutate_if(is.character, as.factor) %>% 
  mutate(SeniorCitizen = factor(SeniorCitizen, levels = c(0,1), labels = c("No","Yes"))) %>% 
  dplyr::select(-customerID)
glimpse(datachurn)
## Observations: 7,043
## Variables: 20
## $ gender           <fct> Female, Male, Male, Male, Female, Female, Mal...
## $ SeniorCitizen    <fct> No, No, No, No, No, No, No, No, No, No, No, N...
## $ Partner          <fct> Yes, No, No, No, No, No, No, No, Yes, No, Yes...
## $ Dependents       <fct> No, No, No, No, No, No, Yes, No, No, Yes, Yes...
## $ tenure           <int> 1, 34, 2, 45, 2, 8, 22, 10, 28, 62, 13, 16, 5...
## $ PhoneService     <fct> No, Yes, Yes, No, Yes, Yes, Yes, No, Yes, Yes...
## $ MultipleLines    <fct> No phone service, No, No, No phone service, N...
## $ InternetService  <fct> DSL, DSL, DSL, DSL, Fiber optic, Fiber optic,...
## $ OnlineSecurity   <fct> No, Yes, Yes, Yes, No, No, No, Yes, No, Yes, ...
## $ OnlineBackup     <fct> Yes, No, Yes, No, No, No, Yes, No, No, Yes, N...
## $ DeviceProtection <fct> No, Yes, No, Yes, No, Yes, No, No, Yes, No, N...
## $ TechSupport      <fct> No, No, No, Yes, No, No, No, No, Yes, No, No,...
## $ StreamingTV      <fct> No, No, No, No, No, Yes, Yes, No, Yes, No, No...
## $ StreamingMovies  <fct> No, No, No, No, No, Yes, No, No, Yes, No, No,...
## $ Contract         <fct> Month-to-month, One year, Month-to-month, One...
## $ PaperlessBilling <fct> Yes, No, Yes, No, Yes, Yes, Yes, No, Yes, No,...
## $ PaymentMethod    <fct> Electronic check, Mailed check, Mailed check,...
## $ MonthlyCharges   <dbl> 29.85, 56.95, 53.85, 42.30, 70.70, 99.65, 89....
## $ TotalCharges     <dbl> 29.85, 1889.50, 108.15, 1840.75, 151.65, 820....
## $ Churn            <fct> No, No, Yes, No, Yes, Yes, No, No, Yes, No, N...
colSums(is.na(datachurn))
##           gender    SeniorCitizen          Partner       Dependents 
##                0                0                0                0 
##           tenure     PhoneService    MultipleLines  InternetService 
##                0                0                0                0 
##   OnlineSecurity     OnlineBackup DeviceProtection      TechSupport 
##                0                0                0                0 
##      StreamingTV  StreamingMovies         Contract PaperlessBilling 
##                0                0                0                0 
##    PaymentMethod   MonthlyCharges     TotalCharges            Churn 
##                0                0               11                0
datachurn <- na.omit(datachurn)
datachurn <- datachurn %>% 
  mutate(MultipleLines = plyr::revalue(MultipleLines, c("No phone service" = "No")),
         OnlineSecurity = plyr::revalue(OnlineSecurity, c("No internet service" = "No")),
         OnlineBackup = plyr::revalue(OnlineBackup, c("No internet service" = "No")),
         DeviceProtection = plyr::revalue(DeviceProtection, c("No internet service" = "No")),
         TechSupport = plyr::revalue(TechSupport, c("No internet service" = "No")),
         StreamingTV = plyr::revalue(StreamingTV, c("No internet service" = "No")),
         StreamingMovies = plyr::revalue(StreamingMovies, c("No internet service" = "No")))

Exploratory Data Analisys

Modelling

prop.table(table(datachurn$Churn))
## 
##       No      Yes 
## 0.734215 0.265785
set.seed(417)
intrain <- sample(nrow(datachurn), nrow(datachurn)*0.7)
churn.train <- datachurn[intrain, ]
churn.test <- datachurn[-intrain, ]
model <- glm(formula = Churn ~., family = "binomial", data = churn.train)
summary(model)
## 
## Call:
## glm(formula = Churn ~ ., family = "binomial", data = churn.train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9319  -0.6847  -0.3058   0.7427   3.2348  
## 
## Coefficients:
##                                        Estimate Std. Error z value
## (Intercept)                           8.695e-01  9.745e-01   0.892
## genderMale                           -2.653e-02  7.697e-02  -0.345
## SeniorCitizenYes                      2.701e-01  9.980e-02   2.706
## PartnerYes                            2.174e-02  9.283e-02   0.234
## DependentsYes                        -1.241e-01  1.064e-01  -1.166
## tenure                               -4.639e-02  7.081e-03  -6.552
## PhoneServiceYes                       2.153e-01  7.740e-01   0.278
## MultipleLinesYes                      4.026e-01  2.110e-01   1.907
## InternetServiceFiber optic            1.719e+00  9.516e-01   1.806
## InternetServiceNo                    -1.718e+00  9.637e-01  -1.783
## OnlineSecurityYes                    -2.888e-01  2.131e-01  -1.355
## OnlineBackupYes                      -2.232e-03  2.086e-01  -0.011
## DeviceProtectionYes                   1.086e-01  2.100e-01   0.517
## TechSupportYes                       -2.113e-01  2.137e-01  -0.989
## StreamingTVYes                        5.903e-01  3.908e-01   1.510
## StreamingMoviesYes                    5.800e-01  3.903e-01   1.486
## ContractOne year                     -6.897e-01  1.276e-01  -5.407
## ContractTwo year                     -1.254e+00  2.027e-01  -6.183
## PaperlessBillingYes                   2.767e-01  8.778e-02   3.152
## PaymentMethodCredit card (automatic) -4.563e-02  1.357e-01  -0.336
## PaymentMethodElectronic check         2.597e-01  1.130e-01   2.297
## PaymentMethodMailed check            -8.074e-02  1.360e-01  -0.594
## MonthlyCharges                       -3.562e-02  3.791e-02  -0.940
## TotalCharges                          1.763e-04  8.083e-05   2.181
##                                      Pr(>|z|)    
## (Intercept)                           0.37223    
## genderMale                            0.73037    
## SeniorCitizenYes                      0.00680 ** 
## PartnerYes                            0.81487    
## DependentsYes                         0.24354    
## tenure                               5.69e-11 ***
## PhoneServiceYes                       0.78090    
## MultipleLinesYes                      0.05646 .  
## InternetServiceFiber optic            0.07086 .  
## InternetServiceNo                     0.07457 .  
## OnlineSecurityYes                     0.17527    
## OnlineBackupYes                       0.99146    
## DeviceProtectionYes                   0.60515    
## TechSupportYes                        0.32289    
## StreamingTVYes                        0.13095    
## StreamingMoviesYes                    0.13730    
## ContractOne year                     6.43e-08 ***
## ContractTwo year                     6.27e-10 ***
## PaperlessBillingYes                   0.00162 ** 
## PaymentMethodCredit card (automatic)  0.73664    
## PaymentMethodElectronic check         0.02160 *  
## PaymentMethodMailed check             0.55257    
## MonthlyCharges                        0.34740    
## TotalCharges                          0.02916 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 5709.6  on 4921  degrees of freedom
## Residual deviance: 4149.1  on 4898  degrees of freedom
## AIC: 4197.1
## 
## Number of Fisher Scoring iterations: 6
library(MASS)
model2 <- stepAIC(model, direction="backward")
## Start:  AIC=4197.09
## Churn ~ gender + SeniorCitizen + Partner + Dependents + tenure + 
##     PhoneService + MultipleLines + InternetService + OnlineSecurity + 
##     OnlineBackup + DeviceProtection + TechSupport + StreamingTV + 
##     StreamingMovies + Contract + PaperlessBilling + PaymentMethod + 
##     MonthlyCharges + TotalCharges
## 
##                    Df Deviance    AIC
## - OnlineBackup      1   4149.1 4195.1
## - Partner           1   4149.1 4195.1
## - PhoneService      1   4149.2 4195.2
## - gender            1   4149.2 4195.2
## - DeviceProtection  1   4149.4 4195.4
## - MonthlyCharges    1   4150.0 4196.0
## - TechSupport       1   4150.1 4196.1
## - InternetService   2   4152.4 4196.4
## - Dependents        1   4150.5 4196.5
## - OnlineSecurity    1   4150.9 4196.9
## <none>                  4149.1 4197.1
## - StreamingMovies   1   4151.3 4197.3
## - StreamingTV       1   4151.4 4197.4
## - MultipleLines     1   4152.7 4198.7
## - TotalCharges      1   4154.0 4200.0
## - SeniorCitizen     1   4156.4 4202.4
## - PaymentMethod     3   4162.9 4204.9
## - PaperlessBilling  1   4159.1 4205.1
## - tenure            1   4197.9 4243.9
## - Contract          2   4204.6 4248.6
## 
## Step:  AIC=4195.09
## Churn ~ gender + SeniorCitizen + Partner + Dependents + tenure + 
##     PhoneService + MultipleLines + InternetService + OnlineSecurity + 
##     DeviceProtection + TechSupport + StreamingTV + StreamingMovies + 
##     Contract + PaperlessBilling + PaymentMethod + MonthlyCharges + 
##     TotalCharges
## 
##                    Df Deviance    AIC
## - Partner           1   4149.1 4193.1
## - gender            1   4149.2 4193.2
## - PhoneService      1   4149.5 4193.5
## - DeviceProtection  1   4149.9 4193.9
## - Dependents        1   4150.5 4194.5
## <none>                  4149.1 4195.1
## - TechSupport       1   4151.7 4195.7
## - MonthlyCharges    1   4153.8 4197.8
## - TotalCharges      1   4154.0 4198.0
## - OnlineSecurity    1   4154.0 4198.0
## - SeniorCitizen     1   4156.4 4200.4
## - StreamingMovies   1   4158.6 4202.6
## - StreamingTV       1   4158.7 4202.7
## - PaymentMethod     3   4162.9 4202.9
## - PaperlessBilling  1   4159.1 4203.1
## - MultipleLines     1   4159.6 4203.6
## - InternetService   2   4166.0 4208.0
## - tenure            1   4197.9 4241.9
## - Contract          2   4204.6 4246.6
## 
## Step:  AIC=4193.14
## Churn ~ gender + SeniorCitizen + Dependents + tenure + PhoneService + 
##     MultipleLines + InternetService + OnlineSecurity + DeviceProtection + 
##     TechSupport + StreamingTV + StreamingMovies + Contract + 
##     PaperlessBilling + PaymentMethod + MonthlyCharges + TotalCharges
## 
##                    Df Deviance    AIC
## - gender            1   4149.3 4191.3
## - PhoneService      1   4149.5 4191.5
## - DeviceProtection  1   4149.9 4191.9
## - Dependents        1   4150.5 4192.5
## <none>                  4149.1 4193.1
## - TechSupport       1   4151.7 4193.7
## - MonthlyCharges    1   4153.9 4195.9
## - TotalCharges      1   4154.0 4196.0
## - OnlineSecurity    1   4154.1 4196.1
## - SeniorCitizen     1   4156.7 4198.7
## - StreamingMovies   1   4158.7 4200.7
## - StreamingTV       1   4158.8 4200.8
## - PaymentMethod     3   4163.0 4201.0
## - PaperlessBilling  1   4159.1 4201.1
## - MultipleLines     1   4159.7 4201.7
## - InternetService   2   4166.1 4206.1
## - tenure            1   4198.1 4240.1
## - Contract          2   4204.7 4244.7
## 
## Step:  AIC=4191.26
## Churn ~ SeniorCitizen + Dependents + tenure + PhoneService + 
##     MultipleLines + InternetService + OnlineSecurity + DeviceProtection + 
##     TechSupport + StreamingTV + StreamingMovies + Contract + 
##     PaperlessBilling + PaymentMethod + MonthlyCharges + TotalCharges
## 
##                    Df Deviance    AIC
## - PhoneService      1   4149.6 4189.6
## - DeviceProtection  1   4150.1 4190.1
## - Dependents        1   4150.7 4190.7
## <none>                  4149.3 4191.3
## - TechSupport       1   4151.8 4191.8
## - MonthlyCharges    1   4154.0 4194.0
## - OnlineSecurity    1   4154.2 4194.2
## - TotalCharges      1   4154.2 4194.2
## - SeniorCitizen     1   4156.8 4196.8
## - StreamingMovies   1   4158.8 4198.8
## - StreamingTV       1   4158.9 4198.9
## - PaymentMethod     3   4163.1 4199.1
## - PaperlessBilling  1   4159.3 4199.3
## - MultipleLines     1   4159.8 4199.8
## - InternetService   2   4166.2 4204.2
## - tenure            1   4198.3 4238.3
## - Contract          2   4204.9 4242.9
## 
## Step:  AIC=4189.64
## Churn ~ SeniorCitizen + Dependents + tenure + MultipleLines + 
##     InternetService + OnlineSecurity + DeviceProtection + TechSupport + 
##     StreamingTV + StreamingMovies + Contract + PaperlessBilling + 
##     PaymentMethod + MonthlyCharges + TotalCharges
## 
##                    Df Deviance    AIC
## - DeviceProtection  1   4150.1 4188.1
## - Dependents        1   4151.1 4189.1
## <none>                  4149.6 4189.6
## - TotalCharges      1   4154.4 4192.4
## - TechSupport       1   4155.1 4193.1
## - SeniorCitizen     1   4157.1 4195.1
## - OnlineSecurity    1   4159.4 4197.4
## - PaymentMethod     3   4163.4 4197.4
## - PaperlessBilling  1   4159.6 4197.6
## - MultipleLines     1   4161.6 4199.6
## - MonthlyCharges    1   4163.3 4201.3
## - StreamingMovies   1   4167.7 4205.7
## - StreamingTV       1   4167.8 4205.8
## - tenure            1   4199.0 4237.0
## - Contract          2   4205.1 4241.1
## - InternetService   2   4206.9 4242.9
## 
## Step:  AIC=4188.06
## Churn ~ SeniorCitizen + Dependents + tenure + MultipleLines + 
##     InternetService + OnlineSecurity + TechSupport + StreamingTV + 
##     StreamingMovies + Contract + PaperlessBilling + PaymentMethod + 
##     MonthlyCharges + TotalCharges
## 
##                    Df Deviance    AIC
## - Dependents        1   4151.5 4187.5
## <none>                  4150.1 4188.1
## - TotalCharges      1   4154.9 4190.9
## - TechSupport       1   4155.8 4191.8
## - SeniorCitizen     1   4157.6 4193.6
## - PaymentMethod     3   4163.9 4195.9
## - PaperlessBilling  1   4160.0 4196.0
## - OnlineSecurity    1   4160.7 4196.7
## - MultipleLines     1   4161.6 4197.6
## - MonthlyCharges    1   4163.6 4199.6
## - StreamingMovies   1   4167.7 4203.7
## - StreamingTV       1   4167.8 4203.8
## - tenure            1   4199.3 4235.3
## - Contract          2   4205.1 4239.1
## - InternetService   2   4208.7 4242.7
## 
## Step:  AIC=4187.5
## Churn ~ SeniorCitizen + tenure + MultipleLines + InternetService + 
##     OnlineSecurity + TechSupport + StreamingTV + StreamingMovies + 
##     Contract + PaperlessBilling + PaymentMethod + MonthlyCharges + 
##     TotalCharges
## 
##                    Df Deviance    AIC
## <none>                  4151.5 4187.5
## - TotalCharges      1   4156.6 4190.6
## - TechSupport       1   4157.1 4191.1
## - SeniorCitizen     1   4160.3 4194.3
## - PaymentMethod     3   4165.4 4195.4
## - PaperlessBilling  1   4161.5 4195.5
## - OnlineSecurity    1   4162.1 4196.1
## - MultipleLines     1   4163.2 4197.2
## - MonthlyCharges    1   4165.4 4199.4
## - StreamingTV       1   4169.3 4203.3
## - StreamingMovies   1   4169.5 4203.5
## - tenure            1   4202.0 4236.0
## - Contract          2   4208.6 4240.6
## - InternetService   2   4211.0 4243.0
summary(model2)
## 
## Call:
## glm(formula = Churn ~ SeniorCitizen + tenure + MultipleLines + 
##     InternetService + OnlineSecurity + TechSupport + StreamingTV + 
##     StreamingMovies + Contract + PaperlessBilling + PaymentMethod + 
##     MonthlyCharges + TotalCharges, family = "binomial", data = churn.train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9442  -0.6819  -0.3089   0.7487   3.2497  
## 
## Coefficients:
##                                        Estimate Std. Error z value
## (Intercept)                           0.5874515  0.3295296   1.783
## SeniorCitizenYes                      0.2916756  0.0980222   2.976
## tenure                               -0.0468290  0.0070355  -6.656
## MultipleLinesYes                      0.3532324  0.1035597   3.411
## InternetServiceFiber optic            1.4816303  0.2277475   6.506
## InternetServiceNo                    -1.4598222  0.2097082  -6.961
## OnlineSecurityYes                    -0.3414423  0.1050979  -3.249
## TechSupportYes                       -0.2563492  0.1082908  -2.367
## StreamingTVYes                        0.4937503  0.1173653   4.207
## StreamingMoviesYes                    0.4872998  0.1151793   4.231
## ContractOne year                     -0.6933437  0.1269226  -5.463
## ContractTwo year                     -1.2604038  0.2018241  -6.245
## PaperlessBillingYes                   0.2773031  0.0876190   3.165
## PaymentMethodCredit card (automatic) -0.0474705  0.1355079  -0.350
## PaymentMethodElectronic check         0.2601407  0.1129409   2.303
## PaymentMethodMailed check            -0.0818137  0.1356872  -0.603
## MonthlyCharges                       -0.0255788  0.0068645  -3.726
## TotalCharges                          0.0001795  0.0000805   2.230
##                                      Pr(>|z|)    
## (Intercept)                          0.074636 .  
## SeniorCitizenYes                     0.002924 ** 
## tenure                               2.81e-11 ***
## MultipleLinesYes                     0.000647 ***
## InternetServiceFiber optic           7.74e-11 ***
## InternetServiceNo                    3.37e-12 ***
## OnlineSecurityYes                    0.001159 ** 
## TechSupportYes                       0.017922 *  
## StreamingTVYes                       2.59e-05 ***
## StreamingMoviesYes                   2.33e-05 ***
## ContractOne year                     4.69e-08 ***
## ContractTwo year                     4.24e-10 ***
## PaperlessBillingYes                  0.001551 ** 
## PaymentMethodCredit card (automatic) 0.726102    
## PaymentMethodElectronic check        0.021260 *  
## PaymentMethodMailed check            0.546537    
## MonthlyCharges                       0.000194 ***
## TotalCharges                         0.025760 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 5709.6  on 4921  degrees of freedom
## Residual deviance: 4151.5  on 4904  degrees of freedom
## AIC: 4187.5
## 
## Number of Fisher Scoring iterations: 6
churn.test$prob_churn <- predict(model2, type = "response", newdata = churn.test[,-20])
ggplot(churn.test, aes(x = prob_churn)) + 
  geom_density(lwd = 1) +
  theme_minimal()

churn.test$pred_churn <- factor(ifelse(churn.test$prob_churn > 0.5, "Yes","No"))
churn.test[1:6,c("pred_churn", "Churn")]
## # A tibble: 6 x 2
##   pred_churn Churn
##   <fct>      <fct>
## 1 Yes        Yes  
## 2 Yes        Yes  
## 3 No         No   
## 4 No         Yes  
## 5 No         No   
## 6 No         No
library(caret)
conf <- confusionMatrix(churn.test$pred_churn , churn.test$Churn, positive = "Yes")
conf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   No  Yes
##        No  1401  256
##        Yes  153  300
##                                           
##                Accuracy : 0.8062          
##                  95% CI : (0.7886, 0.8228)
##     No Information Rate : 0.7365          
##     P-Value [Acc > NIR] : 3.900e-14       
##                                           
##                   Kappa : 0.469           
##  Mcnemar's Test P-Value : 4.569e-07       
##                                           
##             Sensitivity : 0.5396          
##             Specificity : 0.9015          
##          Pos Pred Value : 0.6623          
##          Neg Pred Value : 0.8455          
##              Prevalence : 0.2635          
##          Detection Rate : 0.1422          
##    Detection Prevalence : 0.2147          
##       Balanced Accuracy : 0.7206          
##                                           
##        'Positive' Class : Yes             
## 
# tuning cutoff
performa <- function(cutoff, prob, ref, postarget, negtarget) 
{
  predict <- factor(ifelse(prob >= cutoff, postarget, negtarget))
  conf <- caret::confusionMatrix(predict , ref, positive = postarget)
  acc <- conf$overall[1]
  rec <- conf$byClass[1]
  prec <- conf$byClass[3]
  spec <- conf$byClass[2]
  mat <- t(as.matrix(c(rec , acc , prec, spec))) 
  colnames(mat) <- c("recall", "accuracy", "precicion", "specificity")
  return(mat)
}

co <- seq(0.01,0.80,length=100)
result <- matrix(0,100,4)

for(i in 1:100){
  result[i,] = performa(cutoff = co[i], 
                     prob = churn.test$prob_churn, 
                     ref = churn.test$Churn, 
                     postarget = "Yes", 
                     negtarget = "No")
}

data_frame("Recall" = result[,1],
           "Accuracy" = result[,2],
           "Precision" = result[,3],
           "Specificity" = result[,4],
                   "Cutoff" = co) %>% 
  gather(key = "performa", value = "value", 1:4) %>% 
  ggplot(aes(x = Cutoff, y = value, col = performa)) +
  geom_line(lwd = 1.5) +
  scale_color_manual(values = c("darkred","darkgreen","orange", "blue")) +
  scale_y_continuous(breaks = seq(0,1,0.1), limits = c(0,1)) +
  scale_x_continuous(breaks = seq(0,1,0.1)) +
  labs(title = "Tradeoff model perfomance") +
  theme_minimal() +
  theme(legend.position = "top",
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank())
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.

Model Interpretation

#Odds ratio all coefficients
exp(model2$coefficients) %>% 
  data.frame() 
##                                              .
## (Intercept)                          1.7993968
## SeniorCitizenYes                     1.3386687
## tenure                               0.9542506
## MultipleLinesYes                     1.4236620
## InternetServiceFiber optic           4.4001135
## InternetServiceNo                    0.2322776
## OnlineSecurityYes                    0.7107445
## TechSupportYes                       0.7738717
## StreamingTVYes                       1.6384494
## StreamingMoviesYes                   1.6279146
## ContractOne year                     0.4999018
## ContractTwo year                     0.2835395
## PaperlessBillingYes                  1.3195663
## PaymentMethodCredit card (automatic) 0.9536386
## PaymentMethodElectronic check        1.2971125
## PaymentMethodMailed check            0.9214436
## MonthlyCharges                       0.9747456
## TotalCharges                         1.0001795
levels(datachurn$Contract)
## [1] "Month-to-month" "One year"       "Two year"

interpretasi:

  1. Odds Ratio Contract_One_year = 0,49 < 1. Artinya kemungkinan pelanggan yang kontrak berlangganan satu tahun memiliki peluang untuk churn lebih kecil 49% dibandingkan dengan yang kontrak bulanan.
#cutoff prop 15%
predict_15 <- factor(ifelse(churn.test$prob_churn > 0.15, "Yes","No"))
perf_logistic <-  confusionMatrix(predict_15, churn.test$Churn, positive = "Yes")

KNN

dmy <- dummyVars(" ~ .", data = datachurn)
dmy <- data.frame(predict(dmy, newdata = datachurn))
str(dmy)
## 'data.frame':    7032 obs. of  41 variables:
##  $ gender.Female                          : num  1 0 0 0 1 1 0 1 1 0 ...
##  $ gender.Male                            : num  0 1 1 1 0 0 1 0 0 1 ...
##  $ SeniorCitizen.No                       : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ SeniorCitizen.Yes                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Partner.No                             : num  0 1 1 1 1 1 1 1 0 1 ...
##  $ Partner.Yes                            : num  1 0 0 0 0 0 0 0 1 0 ...
##  $ Dependents.No                          : num  1 1 1 1 1 1 0 1 1 0 ...
##  $ Dependents.Yes                         : num  0 0 0 0 0 0 1 0 0 1 ...
##  $ tenure                                 : num  1 34 2 45 2 8 22 10 28 62 ...
##  $ PhoneService.No                        : num  1 0 0 1 0 0 0 1 0 0 ...
##  $ PhoneService.Yes                       : num  0 1 1 0 1 1 1 0 1 1 ...
##  $ MultipleLines.No                       : num  1 1 1 1 1 0 0 1 0 1 ...
##  $ MultipleLines.Yes                      : num  0 0 0 0 0 1 1 0 1 0 ...
##  $ InternetService.DSL                    : num  1 1 1 1 0 0 0 1 0 1 ...
##  $ InternetService.Fiber.optic            : num  0 0 0 0 1 1 1 0 1 0 ...
##  $ InternetService.No                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ OnlineSecurity.No                      : num  1 0 0 0 1 1 1 0 1 0 ...
##  $ OnlineSecurity.Yes                     : num  0 1 1 1 0 0 0 1 0 1 ...
##  $ OnlineBackup.No                        : num  0 1 0 1 1 1 0 1 1 0 ...
##  $ OnlineBackup.Yes                       : num  1 0 1 0 0 0 1 0 0 1 ...
##  $ DeviceProtection.No                    : num  1 0 1 0 1 0 1 1 0 1 ...
##  $ DeviceProtection.Yes                   : num  0 1 0 1 0 1 0 0 1 0 ...
##  $ TechSupport.No                         : num  1 1 1 0 1 1 1 1 0 1 ...
##  $ TechSupport.Yes                        : num  0 0 0 1 0 0 0 0 1 0 ...
##  $ StreamingTV.No                         : num  1 1 1 1 1 0 0 1 0 1 ...
##  $ StreamingTV.Yes                        : num  0 0 0 0 0 1 1 0 1 0 ...
##  $ StreamingMovies.No                     : num  1 1 1 1 1 0 1 1 0 1 ...
##  $ StreamingMovies.Yes                    : num  0 0 0 0 0 1 0 0 1 0 ...
##  $ Contract.Month.to.month                : num  1 0 1 0 1 1 1 1 1 0 ...
##  $ Contract.One.year                      : num  0 1 0 1 0 0 0 0 0 1 ...
##  $ Contract.Two.year                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ PaperlessBilling.No                    : num  0 1 0 1 0 0 0 1 0 1 ...
##  $ PaperlessBilling.Yes                   : num  1 0 1 0 1 1 1 0 1 0 ...
##  $ PaymentMethod.Bank.transfer..automatic.: num  0 0 0 1 0 0 0 0 0 1 ...
##  $ PaymentMethod.Credit.card..automatic.  : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ PaymentMethod.Electronic.check         : num  1 0 0 0 1 1 0 0 1 0 ...
##  $ PaymentMethod.Mailed.check             : num  0 1 1 0 0 0 0 1 0 0 ...
##  $ MonthlyCharges                         : num  29.9 57 53.9 42.3 70.7 ...
##  $ TotalCharges                           : num  29.9 1889.5 108.2 1840.8 151.7 ...
##  $ Churn.No                               : num  1 1 0 1 0 0 1 1 0 1 ...
##  $ Churn.Yes                              : num  0 0 1 0 1 1 0 0 1 0 ...
dmy$gender.Female <- NULL
dmy$SeniorCitizen.No <- NULL
dmy$Partner.No <- NULL
dmy$Dependents.No <- NULL
dmy$PhoneService.No <- NULL
dmy$MultipleLines.No <- NULL
dmy$OnlineSecurity.No <- NULL
dmy$OnlineBackup.No <- NULL
dmy$DeviceProtection.No <- NULL
dmy$TechSupport.No <- NULL
dmy$DeviceProtection.No <- NULL
dmy$PaperlessBilling.No <- NULL
dmy$Churn.No <- NULL
names(dmy)
##  [1] "gender.Male"                            
##  [2] "SeniorCitizen.Yes"                      
##  [3] "Partner.Yes"                            
##  [4] "Dependents.Yes"                         
##  [5] "tenure"                                 
##  [6] "PhoneService.Yes"                       
##  [7] "MultipleLines.Yes"                      
##  [8] "InternetService.DSL"                    
##  [9] "InternetService.Fiber.optic"            
## [10] "InternetService.No"                     
## [11] "OnlineSecurity.Yes"                     
## [12] "OnlineBackup.Yes"                       
## [13] "DeviceProtection.Yes"                   
## [14] "TechSupport.Yes"                        
## [15] "StreamingTV.No"                         
## [16] "StreamingTV.Yes"                        
## [17] "StreamingMovies.No"                     
## [18] "StreamingMovies.Yes"                    
## [19] "Contract.Month.to.month"                
## [20] "Contract.One.year"                      
## [21] "Contract.Two.year"                      
## [22] "PaperlessBilling.Yes"                   
## [23] "PaymentMethod.Bank.transfer..automatic."
## [24] "PaymentMethod.Credit.card..automatic."  
## [25] "PaymentMethod.Electronic.check"         
## [26] "PaymentMethod.Mailed.check"             
## [27] "MonthlyCharges"                         
## [28] "TotalCharges"                           
## [29] "Churn.Yes"
dmy_train <- dmy[intrain, 1:28]
dmy_test <- dmy[-intrain, 1:28]
dmy_train_label <- dmy[intrain, 29]
dmy_test_label <- dmy[-intrain, 29]
pred_knn <- class::knn(train = dmy_train,
                       test = dmy_test, 
                       cl = dmy_train_label, 
                       k = 70)
perf_knn <-  confusionMatrix(as.factor(pred_knn), as.factor(dmy_test_label), "1")

Model Evaluation Logistic Regression vs KNN

eval_logit <- data_frame(Accuracy = perf_logistic$overall[1],
           Recall = perf_logistic$byClass[1],
           Precision = perf_logistic$byClass[3])

eval_knn <- data_frame(Accuracy = perf_knn$overall[1],
           Recall = perf_knn$byClass[1],
           Precision = perf_knn$byClass[3])
#Model Evaluation Logistic Regression
eval_logit
## # A tibble: 1 x 3
##   Accuracy Recall Precision
##      <dbl>  <dbl>     <dbl>
## 1    0.684  0.921     0.451
#Model Evaluation KKN
eval_knn
## # A tibble: 1 x 3
##   Accuracy Recall Precision
##      <dbl>  <dbl>     <dbl>
## 1    0.788  0.302     0.740