Data Import
library(tidyverse)
datachurn <- read_csv("data_input/TelcoChurn.csv")
glimpse(datachurn)
## Observations: 7,043
## Variables: 21
## $ customerID <chr> "7590-VHVEG", "5575-GNVDE", "3668-QPYBK", "77...
## $ gender <chr> "Female", "Male", "Male", "Male", "Female", "...
## $ SeniorCitizen <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ Partner <chr> "Yes", "No", "No", "No", "No", "No", "No", "N...
## $ Dependents <chr> "No", "No", "No", "No", "No", "No", "Yes", "N...
## $ tenure <int> 1, 34, 2, 45, 2, 8, 22, 10, 28, 62, 13, 16, 5...
## $ PhoneService <chr> "No", "Yes", "Yes", "No", "Yes", "Yes", "Yes"...
## $ MultipleLines <chr> "No phone service", "No", "No", "No phone ser...
## $ InternetService <chr> "DSL", "DSL", "DSL", "DSL", "Fiber optic", "F...
## $ OnlineSecurity <chr> "No", "Yes", "Yes", "Yes", "No", "No", "No", ...
## $ OnlineBackup <chr> "Yes", "No", "Yes", "No", "No", "No", "Yes", ...
## $ DeviceProtection <chr> "No", "Yes", "No", "Yes", "No", "Yes", "No", ...
## $ TechSupport <chr> "No", "No", "No", "Yes", "No", "No", "No", "N...
## $ StreamingTV <chr> "No", "No", "No", "No", "No", "Yes", "Yes", "...
## $ StreamingMovies <chr> "No", "No", "No", "No", "No", "Yes", "No", "N...
## $ Contract <chr> "Month-to-month", "One year", "Month-to-month...
## $ PaperlessBilling <chr> "Yes", "No", "Yes", "No", "Yes", "Yes", "Yes"...
## $ PaymentMethod <chr> "Electronic check", "Mailed check", "Mailed c...
## $ MonthlyCharges <dbl> 29.85, 56.95, 53.85, 42.30, 70.70, 99.65, 89....
## $ TotalCharges <dbl> 29.85, 1889.50, 108.15, 1840.75, 151.65, 820....
## $ Churn <chr> "No", "No", "Yes", "No", "Yes", "Yes", "No", ...
Beberapa informasi penting Data:
SeniorCitizen : Informasi apakah pelanggan senior = 1, atau bukan = 0. Partner : Informasi apakah pelanggan punya pasangan atau tidak. tenure : Informasi lama waktu pelanggan mulai berlangganan. Churn : Informasi apakah pelanggan churned (berhenti layanan) atau tidak.
Data Manipulation
datachurn <- datachurn %>%
mutate_if(is.character, as.factor) %>%
mutate(SeniorCitizen = factor(SeniorCitizen, levels = c(0,1), labels = c("No","Yes"))) %>%
dplyr::select(-customerID)
glimpse(datachurn)
## Observations: 7,043
## Variables: 20
## $ gender <fct> Female, Male, Male, Male, Female, Female, Mal...
## $ SeniorCitizen <fct> No, No, No, No, No, No, No, No, No, No, No, N...
## $ Partner <fct> Yes, No, No, No, No, No, No, No, Yes, No, Yes...
## $ Dependents <fct> No, No, No, No, No, No, Yes, No, No, Yes, Yes...
## $ tenure <int> 1, 34, 2, 45, 2, 8, 22, 10, 28, 62, 13, 16, 5...
## $ PhoneService <fct> No, Yes, Yes, No, Yes, Yes, Yes, No, Yes, Yes...
## $ MultipleLines <fct> No phone service, No, No, No phone service, N...
## $ InternetService <fct> DSL, DSL, DSL, DSL, Fiber optic, Fiber optic,...
## $ OnlineSecurity <fct> No, Yes, Yes, Yes, No, No, No, Yes, No, Yes, ...
## $ OnlineBackup <fct> Yes, No, Yes, No, No, No, Yes, No, No, Yes, N...
## $ DeviceProtection <fct> No, Yes, No, Yes, No, Yes, No, No, Yes, No, N...
## $ TechSupport <fct> No, No, No, Yes, No, No, No, No, Yes, No, No,...
## $ StreamingTV <fct> No, No, No, No, No, Yes, Yes, No, Yes, No, No...
## $ StreamingMovies <fct> No, No, No, No, No, Yes, No, No, Yes, No, No,...
## $ Contract <fct> Month-to-month, One year, Month-to-month, One...
## $ PaperlessBilling <fct> Yes, No, Yes, No, Yes, Yes, Yes, No, Yes, No,...
## $ PaymentMethod <fct> Electronic check, Mailed check, Mailed check,...
## $ MonthlyCharges <dbl> 29.85, 56.95, 53.85, 42.30, 70.70, 99.65, 89....
## $ TotalCharges <dbl> 29.85, 1889.50, 108.15, 1840.75, 151.65, 820....
## $ Churn <fct> No, No, Yes, No, Yes, Yes, No, No, Yes, No, N...
colSums(is.na(datachurn))
## gender SeniorCitizen Partner Dependents
## 0 0 0 0
## tenure PhoneService MultipleLines InternetService
## 0 0 0 0
## OnlineSecurity OnlineBackup DeviceProtection TechSupport
## 0 0 0 0
## StreamingTV StreamingMovies Contract PaperlessBilling
## 0 0 0 0
## PaymentMethod MonthlyCharges TotalCharges Churn
## 0 0 11 0
datachurn <- na.omit(datachurn)
datachurn <- datachurn %>%
mutate(MultipleLines = plyr::revalue(MultipleLines, c("No phone service" = "No")),
OnlineSecurity = plyr::revalue(OnlineSecurity, c("No internet service" = "No")),
OnlineBackup = plyr::revalue(OnlineBackup, c("No internet service" = "No")),
DeviceProtection = plyr::revalue(DeviceProtection, c("No internet service" = "No")),
TechSupport = plyr::revalue(TechSupport, c("No internet service" = "No")),
StreamingTV = plyr::revalue(StreamingTV, c("No internet service" = "No")),
StreamingMovies = plyr::revalue(StreamingMovies, c("No internet service" = "No")))
Exploratory Data Analisys
Modelling
prop.table(table(datachurn$Churn))
##
## No Yes
## 0.734215 0.265785
set.seed(417)
intrain <- sample(nrow(datachurn), nrow(datachurn)*0.7)
churn.train <- datachurn[intrain, ]
churn.test <- datachurn[-intrain, ]
model <- glm(formula = Churn ~., family = "binomial", data = churn.train)
summary(model)
##
## Call:
## glm(formula = Churn ~ ., family = "binomial", data = churn.train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9319 -0.6847 -0.3058 0.7427 3.2348
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) 8.695e-01 9.745e-01 0.892
## genderMale -2.653e-02 7.697e-02 -0.345
## SeniorCitizenYes 2.701e-01 9.980e-02 2.706
## PartnerYes 2.174e-02 9.283e-02 0.234
## DependentsYes -1.241e-01 1.064e-01 -1.166
## tenure -4.639e-02 7.081e-03 -6.552
## PhoneServiceYes 2.153e-01 7.740e-01 0.278
## MultipleLinesYes 4.026e-01 2.110e-01 1.907
## InternetServiceFiber optic 1.719e+00 9.516e-01 1.806
## InternetServiceNo -1.718e+00 9.637e-01 -1.783
## OnlineSecurityYes -2.888e-01 2.131e-01 -1.355
## OnlineBackupYes -2.232e-03 2.086e-01 -0.011
## DeviceProtectionYes 1.086e-01 2.100e-01 0.517
## TechSupportYes -2.113e-01 2.137e-01 -0.989
## StreamingTVYes 5.903e-01 3.908e-01 1.510
## StreamingMoviesYes 5.800e-01 3.903e-01 1.486
## ContractOne year -6.897e-01 1.276e-01 -5.407
## ContractTwo year -1.254e+00 2.027e-01 -6.183
## PaperlessBillingYes 2.767e-01 8.778e-02 3.152
## PaymentMethodCredit card (automatic) -4.563e-02 1.357e-01 -0.336
## PaymentMethodElectronic check 2.597e-01 1.130e-01 2.297
## PaymentMethodMailed check -8.074e-02 1.360e-01 -0.594
## MonthlyCharges -3.562e-02 3.791e-02 -0.940
## TotalCharges 1.763e-04 8.083e-05 2.181
## Pr(>|z|)
## (Intercept) 0.37223
## genderMale 0.73037
## SeniorCitizenYes 0.00680 **
## PartnerYes 0.81487
## DependentsYes 0.24354
## tenure 5.69e-11 ***
## PhoneServiceYes 0.78090
## MultipleLinesYes 0.05646 .
## InternetServiceFiber optic 0.07086 .
## InternetServiceNo 0.07457 .
## OnlineSecurityYes 0.17527
## OnlineBackupYes 0.99146
## DeviceProtectionYes 0.60515
## TechSupportYes 0.32289
## StreamingTVYes 0.13095
## StreamingMoviesYes 0.13730
## ContractOne year 6.43e-08 ***
## ContractTwo year 6.27e-10 ***
## PaperlessBillingYes 0.00162 **
## PaymentMethodCredit card (automatic) 0.73664
## PaymentMethodElectronic check 0.02160 *
## PaymentMethodMailed check 0.55257
## MonthlyCharges 0.34740
## TotalCharges 0.02916 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 5709.6 on 4921 degrees of freedom
## Residual deviance: 4149.1 on 4898 degrees of freedom
## AIC: 4197.1
##
## Number of Fisher Scoring iterations: 6
library(MASS)
model2 <- stepAIC(model, direction="backward")
## Start: AIC=4197.09
## Churn ~ gender + SeniorCitizen + Partner + Dependents + tenure +
## PhoneService + MultipleLines + InternetService + OnlineSecurity +
## OnlineBackup + DeviceProtection + TechSupport + StreamingTV +
## StreamingMovies + Contract + PaperlessBilling + PaymentMethod +
## MonthlyCharges + TotalCharges
##
## Df Deviance AIC
## - OnlineBackup 1 4149.1 4195.1
## - Partner 1 4149.1 4195.1
## - PhoneService 1 4149.2 4195.2
## - gender 1 4149.2 4195.2
## - DeviceProtection 1 4149.4 4195.4
## - MonthlyCharges 1 4150.0 4196.0
## - TechSupport 1 4150.1 4196.1
## - InternetService 2 4152.4 4196.4
## - Dependents 1 4150.5 4196.5
## - OnlineSecurity 1 4150.9 4196.9
## <none> 4149.1 4197.1
## - StreamingMovies 1 4151.3 4197.3
## - StreamingTV 1 4151.4 4197.4
## - MultipleLines 1 4152.7 4198.7
## - TotalCharges 1 4154.0 4200.0
## - SeniorCitizen 1 4156.4 4202.4
## - PaymentMethod 3 4162.9 4204.9
## - PaperlessBilling 1 4159.1 4205.1
## - tenure 1 4197.9 4243.9
## - Contract 2 4204.6 4248.6
##
## Step: AIC=4195.09
## Churn ~ gender + SeniorCitizen + Partner + Dependents + tenure +
## PhoneService + MultipleLines + InternetService + OnlineSecurity +
## DeviceProtection + TechSupport + StreamingTV + StreamingMovies +
## Contract + PaperlessBilling + PaymentMethod + MonthlyCharges +
## TotalCharges
##
## Df Deviance AIC
## - Partner 1 4149.1 4193.1
## - gender 1 4149.2 4193.2
## - PhoneService 1 4149.5 4193.5
## - DeviceProtection 1 4149.9 4193.9
## - Dependents 1 4150.5 4194.5
## <none> 4149.1 4195.1
## - TechSupport 1 4151.7 4195.7
## - MonthlyCharges 1 4153.8 4197.8
## - TotalCharges 1 4154.0 4198.0
## - OnlineSecurity 1 4154.0 4198.0
## - SeniorCitizen 1 4156.4 4200.4
## - StreamingMovies 1 4158.6 4202.6
## - StreamingTV 1 4158.7 4202.7
## - PaymentMethod 3 4162.9 4202.9
## - PaperlessBilling 1 4159.1 4203.1
## - MultipleLines 1 4159.6 4203.6
## - InternetService 2 4166.0 4208.0
## - tenure 1 4197.9 4241.9
## - Contract 2 4204.6 4246.6
##
## Step: AIC=4193.14
## Churn ~ gender + SeniorCitizen + Dependents + tenure + PhoneService +
## MultipleLines + InternetService + OnlineSecurity + DeviceProtection +
## TechSupport + StreamingTV + StreamingMovies + Contract +
## PaperlessBilling + PaymentMethod + MonthlyCharges + TotalCharges
##
## Df Deviance AIC
## - gender 1 4149.3 4191.3
## - PhoneService 1 4149.5 4191.5
## - DeviceProtection 1 4149.9 4191.9
## - Dependents 1 4150.5 4192.5
## <none> 4149.1 4193.1
## - TechSupport 1 4151.7 4193.7
## - MonthlyCharges 1 4153.9 4195.9
## - TotalCharges 1 4154.0 4196.0
## - OnlineSecurity 1 4154.1 4196.1
## - SeniorCitizen 1 4156.7 4198.7
## - StreamingMovies 1 4158.7 4200.7
## - StreamingTV 1 4158.8 4200.8
## - PaymentMethod 3 4163.0 4201.0
## - PaperlessBilling 1 4159.1 4201.1
## - MultipleLines 1 4159.7 4201.7
## - InternetService 2 4166.1 4206.1
## - tenure 1 4198.1 4240.1
## - Contract 2 4204.7 4244.7
##
## Step: AIC=4191.26
## Churn ~ SeniorCitizen + Dependents + tenure + PhoneService +
## MultipleLines + InternetService + OnlineSecurity + DeviceProtection +
## TechSupport + StreamingTV + StreamingMovies + Contract +
## PaperlessBilling + PaymentMethod + MonthlyCharges + TotalCharges
##
## Df Deviance AIC
## - PhoneService 1 4149.6 4189.6
## - DeviceProtection 1 4150.1 4190.1
## - Dependents 1 4150.7 4190.7
## <none> 4149.3 4191.3
## - TechSupport 1 4151.8 4191.8
## - MonthlyCharges 1 4154.0 4194.0
## - OnlineSecurity 1 4154.2 4194.2
## - TotalCharges 1 4154.2 4194.2
## - SeniorCitizen 1 4156.8 4196.8
## - StreamingMovies 1 4158.8 4198.8
## - StreamingTV 1 4158.9 4198.9
## - PaymentMethod 3 4163.1 4199.1
## - PaperlessBilling 1 4159.3 4199.3
## - MultipleLines 1 4159.8 4199.8
## - InternetService 2 4166.2 4204.2
## - tenure 1 4198.3 4238.3
## - Contract 2 4204.9 4242.9
##
## Step: AIC=4189.64
## Churn ~ SeniorCitizen + Dependents + tenure + MultipleLines +
## InternetService + OnlineSecurity + DeviceProtection + TechSupport +
## StreamingTV + StreamingMovies + Contract + PaperlessBilling +
## PaymentMethod + MonthlyCharges + TotalCharges
##
## Df Deviance AIC
## - DeviceProtection 1 4150.1 4188.1
## - Dependents 1 4151.1 4189.1
## <none> 4149.6 4189.6
## - TotalCharges 1 4154.4 4192.4
## - TechSupport 1 4155.1 4193.1
## - SeniorCitizen 1 4157.1 4195.1
## - OnlineSecurity 1 4159.4 4197.4
## - PaymentMethod 3 4163.4 4197.4
## - PaperlessBilling 1 4159.6 4197.6
## - MultipleLines 1 4161.6 4199.6
## - MonthlyCharges 1 4163.3 4201.3
## - StreamingMovies 1 4167.7 4205.7
## - StreamingTV 1 4167.8 4205.8
## - tenure 1 4199.0 4237.0
## - Contract 2 4205.1 4241.1
## - InternetService 2 4206.9 4242.9
##
## Step: AIC=4188.06
## Churn ~ SeniorCitizen + Dependents + tenure + MultipleLines +
## InternetService + OnlineSecurity + TechSupport + StreamingTV +
## StreamingMovies + Contract + PaperlessBilling + PaymentMethod +
## MonthlyCharges + TotalCharges
##
## Df Deviance AIC
## - Dependents 1 4151.5 4187.5
## <none> 4150.1 4188.1
## - TotalCharges 1 4154.9 4190.9
## - TechSupport 1 4155.8 4191.8
## - SeniorCitizen 1 4157.6 4193.6
## - PaymentMethod 3 4163.9 4195.9
## - PaperlessBilling 1 4160.0 4196.0
## - OnlineSecurity 1 4160.7 4196.7
## - MultipleLines 1 4161.6 4197.6
## - MonthlyCharges 1 4163.6 4199.6
## - StreamingMovies 1 4167.7 4203.7
## - StreamingTV 1 4167.8 4203.8
## - tenure 1 4199.3 4235.3
## - Contract 2 4205.1 4239.1
## - InternetService 2 4208.7 4242.7
##
## Step: AIC=4187.5
## Churn ~ SeniorCitizen + tenure + MultipleLines + InternetService +
## OnlineSecurity + TechSupport + StreamingTV + StreamingMovies +
## Contract + PaperlessBilling + PaymentMethod + MonthlyCharges +
## TotalCharges
##
## Df Deviance AIC
## <none> 4151.5 4187.5
## - TotalCharges 1 4156.6 4190.6
## - TechSupport 1 4157.1 4191.1
## - SeniorCitizen 1 4160.3 4194.3
## - PaymentMethod 3 4165.4 4195.4
## - PaperlessBilling 1 4161.5 4195.5
## - OnlineSecurity 1 4162.1 4196.1
## - MultipleLines 1 4163.2 4197.2
## - MonthlyCharges 1 4165.4 4199.4
## - StreamingTV 1 4169.3 4203.3
## - StreamingMovies 1 4169.5 4203.5
## - tenure 1 4202.0 4236.0
## - Contract 2 4208.6 4240.6
## - InternetService 2 4211.0 4243.0
summary(model2)
##
## Call:
## glm(formula = Churn ~ SeniorCitizen + tenure + MultipleLines +
## InternetService + OnlineSecurity + TechSupport + StreamingTV +
## StreamingMovies + Contract + PaperlessBilling + PaymentMethod +
## MonthlyCharges + TotalCharges, family = "binomial", data = churn.train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9442 -0.6819 -0.3089 0.7487 3.2497
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) 0.5874515 0.3295296 1.783
## SeniorCitizenYes 0.2916756 0.0980222 2.976
## tenure -0.0468290 0.0070355 -6.656
## MultipleLinesYes 0.3532324 0.1035597 3.411
## InternetServiceFiber optic 1.4816303 0.2277475 6.506
## InternetServiceNo -1.4598222 0.2097082 -6.961
## OnlineSecurityYes -0.3414423 0.1050979 -3.249
## TechSupportYes -0.2563492 0.1082908 -2.367
## StreamingTVYes 0.4937503 0.1173653 4.207
## StreamingMoviesYes 0.4872998 0.1151793 4.231
## ContractOne year -0.6933437 0.1269226 -5.463
## ContractTwo year -1.2604038 0.2018241 -6.245
## PaperlessBillingYes 0.2773031 0.0876190 3.165
## PaymentMethodCredit card (automatic) -0.0474705 0.1355079 -0.350
## PaymentMethodElectronic check 0.2601407 0.1129409 2.303
## PaymentMethodMailed check -0.0818137 0.1356872 -0.603
## MonthlyCharges -0.0255788 0.0068645 -3.726
## TotalCharges 0.0001795 0.0000805 2.230
## Pr(>|z|)
## (Intercept) 0.074636 .
## SeniorCitizenYes 0.002924 **
## tenure 2.81e-11 ***
## MultipleLinesYes 0.000647 ***
## InternetServiceFiber optic 7.74e-11 ***
## InternetServiceNo 3.37e-12 ***
## OnlineSecurityYes 0.001159 **
## TechSupportYes 0.017922 *
## StreamingTVYes 2.59e-05 ***
## StreamingMoviesYes 2.33e-05 ***
## ContractOne year 4.69e-08 ***
## ContractTwo year 4.24e-10 ***
## PaperlessBillingYes 0.001551 **
## PaymentMethodCredit card (automatic) 0.726102
## PaymentMethodElectronic check 0.021260 *
## PaymentMethodMailed check 0.546537
## MonthlyCharges 0.000194 ***
## TotalCharges 0.025760 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 5709.6 on 4921 degrees of freedom
## Residual deviance: 4151.5 on 4904 degrees of freedom
## AIC: 4187.5
##
## Number of Fisher Scoring iterations: 6
churn.test$prob_churn <- predict(model2, type = "response", newdata = churn.test[,-20])
ggplot(churn.test, aes(x = prob_churn)) +
geom_density(lwd = 1) +
theme_minimal()
churn.test$pred_churn <- factor(ifelse(churn.test$prob_churn > 0.5, "Yes","No"))
churn.test[1:6,c("pred_churn", "Churn")]
## # A tibble: 6 x 2
## pred_churn Churn
## <fct> <fct>
## 1 Yes Yes
## 2 Yes Yes
## 3 No No
## 4 No Yes
## 5 No No
## 6 No No
library(caret)
conf <- confusionMatrix(churn.test$pred_churn , churn.test$Churn, positive = "Yes")
conf
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 1401 256
## Yes 153 300
##
## Accuracy : 0.8062
## 95% CI : (0.7886, 0.8228)
## No Information Rate : 0.7365
## P-Value [Acc > NIR] : 3.900e-14
##
## Kappa : 0.469
## Mcnemar's Test P-Value : 4.569e-07
##
## Sensitivity : 0.5396
## Specificity : 0.9015
## Pos Pred Value : 0.6623
## Neg Pred Value : 0.8455
## Prevalence : 0.2635
## Detection Rate : 0.1422
## Detection Prevalence : 0.2147
## Balanced Accuracy : 0.7206
##
## 'Positive' Class : Yes
##
# tuning cutoff
performa <- function(cutoff, prob, ref, postarget, negtarget)
{
predict <- factor(ifelse(prob >= cutoff, postarget, negtarget))
conf <- caret::confusionMatrix(predict , ref, positive = postarget)
acc <- conf$overall[1]
rec <- conf$byClass[1]
prec <- conf$byClass[3]
spec <- conf$byClass[2]
mat <- t(as.matrix(c(rec , acc , prec, spec)))
colnames(mat) <- c("recall", "accuracy", "precicion", "specificity")
return(mat)
}
co <- seq(0.01,0.80,length=100)
result <- matrix(0,100,4)
for(i in 1:100){
result[i,] = performa(cutoff = co[i],
prob = churn.test$prob_churn,
ref = churn.test$Churn,
postarget = "Yes",
negtarget = "No")
}
data_frame("Recall" = result[,1],
"Accuracy" = result[,2],
"Precision" = result[,3],
"Specificity" = result[,4],
"Cutoff" = co) %>%
gather(key = "performa", value = "value", 1:4) %>%
ggplot(aes(x = Cutoff, y = value, col = performa)) +
geom_line(lwd = 1.5) +
scale_color_manual(values = c("darkred","darkgreen","orange", "blue")) +
scale_y_continuous(breaks = seq(0,1,0.1), limits = c(0,1)) +
scale_x_continuous(breaks = seq(0,1,0.1)) +
labs(title = "Tradeoff model perfomance") +
theme_minimal() +
theme(legend.position = "top",
panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank())
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
Model Interpretation
#Odds ratio all coefficients
exp(model2$coefficients) %>%
data.frame()
## .
## (Intercept) 1.7993968
## SeniorCitizenYes 1.3386687
## tenure 0.9542506
## MultipleLinesYes 1.4236620
## InternetServiceFiber optic 4.4001135
## InternetServiceNo 0.2322776
## OnlineSecurityYes 0.7107445
## TechSupportYes 0.7738717
## StreamingTVYes 1.6384494
## StreamingMoviesYes 1.6279146
## ContractOne year 0.4999018
## ContractTwo year 0.2835395
## PaperlessBillingYes 1.3195663
## PaymentMethodCredit card (automatic) 0.9536386
## PaymentMethodElectronic check 1.2971125
## PaymentMethodMailed check 0.9214436
## MonthlyCharges 0.9747456
## TotalCharges 1.0001795
levels(datachurn$Contract)
## [1] "Month-to-month" "One year" "Two year"
interpretasi:
- Odds Ratio Contract_One_year = 0,49 < 1. Artinya kemungkinan pelanggan yang kontrak berlangganan satu tahun memiliki peluang untuk churn lebih kecil 49% dibandingkan dengan yang kontrak bulanan.
#cutoff prop 15%
predict_15 <- factor(ifelse(churn.test$prob_churn > 0.15, "Yes","No"))
perf_logistic <- confusionMatrix(predict_15, churn.test$Churn, positive = "Yes")
KNN
dmy <- dummyVars(" ~ .", data = datachurn)
dmy <- data.frame(predict(dmy, newdata = datachurn))
str(dmy)
## 'data.frame': 7032 obs. of 41 variables:
## $ gender.Female : num 1 0 0 0 1 1 0 1 1 0 ...
## $ gender.Male : num 0 1 1 1 0 0 1 0 0 1 ...
## $ SeniorCitizen.No : num 1 1 1 1 1 1 1 1 1 1 ...
## $ SeniorCitizen.Yes : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Partner.No : num 0 1 1 1 1 1 1 1 0 1 ...
## $ Partner.Yes : num 1 0 0 0 0 0 0 0 1 0 ...
## $ Dependents.No : num 1 1 1 1 1 1 0 1 1 0 ...
## $ Dependents.Yes : num 0 0 0 0 0 0 1 0 0 1 ...
## $ tenure : num 1 34 2 45 2 8 22 10 28 62 ...
## $ PhoneService.No : num 1 0 0 1 0 0 0 1 0 0 ...
## $ PhoneService.Yes : num 0 1 1 0 1 1 1 0 1 1 ...
## $ MultipleLines.No : num 1 1 1 1 1 0 0 1 0 1 ...
## $ MultipleLines.Yes : num 0 0 0 0 0 1 1 0 1 0 ...
## $ InternetService.DSL : num 1 1 1 1 0 0 0 1 0 1 ...
## $ InternetService.Fiber.optic : num 0 0 0 0 1 1 1 0 1 0 ...
## $ InternetService.No : num 0 0 0 0 0 0 0 0 0 0 ...
## $ OnlineSecurity.No : num 1 0 0 0 1 1 1 0 1 0 ...
## $ OnlineSecurity.Yes : num 0 1 1 1 0 0 0 1 0 1 ...
## $ OnlineBackup.No : num 0 1 0 1 1 1 0 1 1 0 ...
## $ OnlineBackup.Yes : num 1 0 1 0 0 0 1 0 0 1 ...
## $ DeviceProtection.No : num 1 0 1 0 1 0 1 1 0 1 ...
## $ DeviceProtection.Yes : num 0 1 0 1 0 1 0 0 1 0 ...
## $ TechSupport.No : num 1 1 1 0 1 1 1 1 0 1 ...
## $ TechSupport.Yes : num 0 0 0 1 0 0 0 0 1 0 ...
## $ StreamingTV.No : num 1 1 1 1 1 0 0 1 0 1 ...
## $ StreamingTV.Yes : num 0 0 0 0 0 1 1 0 1 0 ...
## $ StreamingMovies.No : num 1 1 1 1 1 0 1 1 0 1 ...
## $ StreamingMovies.Yes : num 0 0 0 0 0 1 0 0 1 0 ...
## $ Contract.Month.to.month : num 1 0 1 0 1 1 1 1 1 0 ...
## $ Contract.One.year : num 0 1 0 1 0 0 0 0 0 1 ...
## $ Contract.Two.year : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PaperlessBilling.No : num 0 1 0 1 0 0 0 1 0 1 ...
## $ PaperlessBilling.Yes : num 1 0 1 0 1 1 1 0 1 0 ...
## $ PaymentMethod.Bank.transfer..automatic.: num 0 0 0 1 0 0 0 0 0 1 ...
## $ PaymentMethod.Credit.card..automatic. : num 0 0 0 0 0 0 1 0 0 0 ...
## $ PaymentMethod.Electronic.check : num 1 0 0 0 1 1 0 0 1 0 ...
## $ PaymentMethod.Mailed.check : num 0 1 1 0 0 0 0 1 0 0 ...
## $ MonthlyCharges : num 29.9 57 53.9 42.3 70.7 ...
## $ TotalCharges : num 29.9 1889.5 108.2 1840.8 151.7 ...
## $ Churn.No : num 1 1 0 1 0 0 1 1 0 1 ...
## $ Churn.Yes : num 0 0 1 0 1 1 0 0 1 0 ...
dmy$gender.Female <- NULL
dmy$SeniorCitizen.No <- NULL
dmy$Partner.No <- NULL
dmy$Dependents.No <- NULL
dmy$PhoneService.No <- NULL
dmy$MultipleLines.No <- NULL
dmy$OnlineSecurity.No <- NULL
dmy$OnlineBackup.No <- NULL
dmy$DeviceProtection.No <- NULL
dmy$TechSupport.No <- NULL
dmy$DeviceProtection.No <- NULL
dmy$PaperlessBilling.No <- NULL
dmy$Churn.No <- NULL
names(dmy)
## [1] "gender.Male"
## [2] "SeniorCitizen.Yes"
## [3] "Partner.Yes"
## [4] "Dependents.Yes"
## [5] "tenure"
## [6] "PhoneService.Yes"
## [7] "MultipleLines.Yes"
## [8] "InternetService.DSL"
## [9] "InternetService.Fiber.optic"
## [10] "InternetService.No"
## [11] "OnlineSecurity.Yes"
## [12] "OnlineBackup.Yes"
## [13] "DeviceProtection.Yes"
## [14] "TechSupport.Yes"
## [15] "StreamingTV.No"
## [16] "StreamingTV.Yes"
## [17] "StreamingMovies.No"
## [18] "StreamingMovies.Yes"
## [19] "Contract.Month.to.month"
## [20] "Contract.One.year"
## [21] "Contract.Two.year"
## [22] "PaperlessBilling.Yes"
## [23] "PaymentMethod.Bank.transfer..automatic."
## [24] "PaymentMethod.Credit.card..automatic."
## [25] "PaymentMethod.Electronic.check"
## [26] "PaymentMethod.Mailed.check"
## [27] "MonthlyCharges"
## [28] "TotalCharges"
## [29] "Churn.Yes"
dmy_train <- dmy[intrain, 1:28]
dmy_test <- dmy[-intrain, 1:28]
dmy_train_label <- dmy[intrain, 29]
dmy_test_label <- dmy[-intrain, 29]
pred_knn <- class::knn(train = dmy_train,
test = dmy_test,
cl = dmy_train_label,
k = 70)
perf_knn <- confusionMatrix(as.factor(pred_knn), as.factor(dmy_test_label), "1")
Model Evaluation Logistic Regression vs KNN
eval_logit <- data_frame(Accuracy = perf_logistic$overall[1],
Recall = perf_logistic$byClass[1],
Precision = perf_logistic$byClass[3])
eval_knn <- data_frame(Accuracy = perf_knn$overall[1],
Recall = perf_knn$byClass[1],
Precision = perf_knn$byClass[3])
#Model Evaluation Logistic Regression
eval_logit
## # A tibble: 1 x 3
## Accuracy Recall Precision
## <dbl> <dbl> <dbl>
## 1 0.684 0.921 0.451
#Model Evaluation KKN
eval_knn
## # A tibble: 1 x 3
## Accuracy Recall Precision
## <dbl> <dbl> <dbl>
## 1 0.788 0.302 0.740