#(1)
telco <- read.csv("/Users/jianingjin/Desktop/IEMS_304/lab3/churn.csv")
#(2)
# Cases that are INCOMPLETE
dim(telco[!complete.cases(telco),])
## [1] 11 23
# assigning getting rid of incomplete data points
telco = telco[complete.cases(telco),]
#(3)
barplot(table(telco$Churn))

#(4)
counts1 = table(telco$Churn, telco$gender)
barplot(counts1, legend = rownames(counts1),main="Churn x Gender Breakdown")

counts2 = table(telco$Churn, telco$SeniorCitizen)
barplot(counts2, legend = rownames(counts2),main="Churn x SeniorCitizen Breakdown")

counts3 = table(telco$Churn, telco$Partner)
barplot(counts3, legend = rownames(counts3),main="Churn x Partner Breakdown")

counts4 = table(telco$Churn, telco$Dependents)
barplot(counts4, legend = rownames(counts4),main="Churn x Dependents Breakdown")

#(5)
boxplot(telco[telco$Churn == "No",]$tenure,data=telco)

boxplot(telco[telco$Churn == "Yes",]$tenure,data=telco)

boxplot(telco[telco$Churn == "No",]$MonthlyCharges, data = telco)

boxplot(telco[telco$Churn == "Yes",]$MonthlyCharges, data = telco)

boxplot(telco[telco$Churn == "No",]$TotalCharges, data = telco)

boxplot(telco[telco$Churn == "Yes",]$TotalCharges, data = telco)

#(6)
cor(telco[,c("tenure", "TotalCharges", "MonthlyCharges", "Metric1", "Metric2")])
##                    tenure TotalCharges MonthlyCharges     Metric1     Metric2
## tenure         1.00000000   0.82588046     0.24686177  0.09403654  0.09295148
## TotalCharges   0.82588046   1.00000000     0.65106480  0.05024178  0.05811042
## MonthlyCharges 0.24686177   0.65106480     1.00000000 -0.04478265 -0.05898434
## Metric1        0.09403654   0.05024178    -0.04478265  1.00000000 -0.28637676
## Metric2        0.09295148   0.05811042    -0.05898434 -0.28637676  1.00000000
#(8)
telco[telco == "No internet service"] <- "No"
telco[telco == "No phone service"] <- "No"
#(9)
num_columns <- c(6, 19, 20, 21, 22)
telco[num_columns] <- sapply(telco[num_columns], as.numeric)
telco_int <- telco[, c(num_columns)]
telco_int <- data.frame(scale(telco_int))
telco_int$Metric3 = scale(telco_int$Metric1*telco_int$Metric2)
#(10)
telco$tenure_bin = telco$tenure
telco$tenure_bin[telco$tenure_bin > 0 & telco$tenure_bin <= 12] <- '0-1 year'
telco$tenure_bin[telco$tenure_bin > 12 & telco$tenure_bin <= 24] <- '1-2 years'
telco$tenure_bin[telco$tenure_bin > 24 & telco$tenure_bin <= 36] <- '2-3 years'
telco$tenure_bin[telco$tenure_bin > 36 & telco$tenure_bin <= 48] <- '3-4 years'
telco$tenure_bin[telco$tenure_bin > 48 & telco$tenure_bin <= 60] <- '4-5 years'
telco$tenure_bin[telco$tenure_bin > 60 & telco$tenure_bin <= 72] <- '5-6 years'
telco$tenure_bin <- as.factor(telco$tenure_bin) # convert it to factor so that R knows it's categorical
telco$tenure = NULL # delete tenure col
#(11)
telco_cat = telco[,-c(1,3,18,19,20,21)]
dummy<- data.frame(sapply(telco_cat,
                          function(x) 
                          data.frame(model.matrix(~x-1,data =telco_cat))[,-1]))
head(dummy)
#(12)
telco_final <- cbind(telco_int, dummy) # combine telco_int and dummy horizontally
head(telco_final)
#(13)
library("caTools")
indices = sample.split(telco_final$Churn, 0.7)
train = telco_final[indices,]
validation = telco_final[!(indices),]
#(14)
model_1 = glm(Churn~., data = train, family = "binomial") 
summary(model_1)
## 
## Call:
## glm(formula = Churn ~ ., family = "binomial", data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5181  -0.4280  -0.1320   0.3764   3.3318  
## 
## Coefficients:
##                                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)                            -4.106374   1.844399  -2.226 0.025987
## tenure                                 -2.050141   0.362671  -5.653 1.58e-08
## MonthlyCharges                         -0.786807   1.343843  -0.585 0.558218
## TotalCharges                            0.197650   0.214953   0.920 0.357833
## Metric1                                -4.465428   0.392325 -11.382  < 2e-16
## Metric2                                -4.613900   0.400213 -11.529  < 2e-16
## Metric3                                -3.503151   0.418394  -8.373  < 2e-16
## gender                                  0.026103   0.091467   0.285 0.775348
## Partner                                -0.030441   0.108432  -0.281 0.778911
## Dependents                             -0.136498   0.121511  -1.123 0.261295
## PhoneService                            0.204914   0.912857   0.224 0.822387
## MultipleLines                           0.496041   0.244614   2.028 0.042575
## InternetService.xFiber.optic            1.393152   1.123976   1.239 0.215166
## InternetService.xNo                    -1.832604   1.136227  -1.613 0.106769
## OnlineSecurity                         -0.217723   0.250985  -0.867 0.385681
## OnlineBackup                           -0.059202   0.247458  -0.239 0.810919
## DeviceProtection                        0.196041   0.245684   0.798 0.424905
## TechSupport                            -0.162778   0.253941  -0.641 0.521516
## StreamingTV                             0.554662   0.459960   1.206 0.227860
## StreamingMovies                         0.316017   0.458680   0.689 0.490841
## Contract.xOne.year                     -0.792021   0.148519  -5.333 9.67e-08
## Contract.xTwo.year                     -1.429175   0.227452  -6.283 3.31e-10
## PaperlessBilling                        0.300649   0.104780   2.869 0.004113
## PaymentMethod.xCredit.card..automatic.  0.002717   0.156078   0.017 0.986114
## PaymentMethod.xElectronic.check         0.345495   0.133386   2.590 0.009592
## PaymentMethod.xMailed.check            -0.012204   0.164695  -0.074 0.940928
## tenure_bin.x1.2.years                   0.071067   0.219276   0.324 0.745862
## tenure_bin.x2.3.years                   0.637574   0.363273   1.755 0.079245
## tenure_bin.x3.4.years                   1.706499   0.513716   3.322 0.000894
## tenure_bin.x4.5.years                   2.108221   0.664811   3.171 0.001518
## tenure_bin.x5.6.years                   2.783326   0.829165   3.357 0.000789
##                                           
## (Intercept)                            *  
## tenure                                 ***
## MonthlyCharges                            
## TotalCharges                              
## Metric1                                ***
## Metric2                                ***
## Metric3                                ***
## gender                                    
## Partner                                   
## Dependents                                
## PhoneService                              
## MultipleLines                          *  
## InternetService.xFiber.optic              
## InternetService.xNo                       
## OnlineSecurity                            
## OnlineBackup                              
## DeviceProtection                          
## TechSupport                               
## StreamingTV                               
## StreamingMovies                           
## Contract.xOne.year                     ***
## Contract.xTwo.year                     ***
## PaperlessBilling                       ** 
## PaymentMethod.xCredit.card..automatic.    
## PaymentMethod.xElectronic.check        ** 
## PaymentMethod.xMailed.check               
## tenure_bin.x1.2.years                     
## tenure_bin.x2.3.years                  .  
## tenure_bin.x3.4.years                  ***
## tenure_bin.x4.5.years                  ** 
## tenure_bin.x5.6.years                  ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 5699.5  on 4921  degrees of freedom
## Residual deviance: 3042.9  on 4891  degrees of freedom
## AIC: 3104.9
## 
## Number of Fisher Scoring iterations: 7
library(MASS)
model_2<- stepAIC(model_1, direction="both")
## Start:  AIC=3104.9
## Churn ~ tenure + MonthlyCharges + TotalCharges + Metric1 + Metric2 + 
##     Metric3 + gender + Partner + Dependents + PhoneService + 
##     MultipleLines + InternetService.xFiber.optic + InternetService.xNo + 
##     OnlineSecurity + OnlineBackup + DeviceProtection + TechSupport + 
##     StreamingTV + StreamingMovies + Contract.xOne.year + Contract.xTwo.year + 
##     PaperlessBilling + PaymentMethod.xCredit.card..automatic. + 
##     PaymentMethod.xElectronic.check + PaymentMethod.xMailed.check + 
##     tenure_bin.x1.2.years + tenure_bin.x2.3.years + tenure_bin.x3.4.years + 
##     tenure_bin.x4.5.years + tenure_bin.x5.6.years
## 
##                                          Df Deviance    AIC
## - PaymentMethod.xCredit.card..automatic.  1   3042.9 3102.9
## - PaymentMethod.xMailed.check             1   3042.9 3102.9
## - PhoneService                            1   3042.9 3102.9
## - OnlineBackup                            1   3043.0 3103.0
## - Partner                                 1   3043.0 3103.0
## - gender                                  1   3043.0 3103.0
## - tenure_bin.x1.2.years                   1   3043.0 3103.0
## - MonthlyCharges                          1   3043.2 3103.2
## - TechSupport                             1   3043.3 3103.3
## - StreamingMovies                         1   3043.4 3103.4
## - DeviceProtection                        1   3043.5 3103.5
## - OnlineSecurity                          1   3043.6 3103.6
## - TotalCharges                            1   3043.8 3103.8
## - Dependents                              1   3044.2 3104.2
## - StreamingTV                             1   3044.4 3104.4
## - InternetService.xFiber.optic            1   3044.4 3104.4
## <none>                                        3042.9 3104.9
## - InternetService.xNo                     1   3045.5 3105.5
## - tenure_bin.x2.3.years                   1   3046.0 3106.0
## - MultipleLines                           1   3047.0 3107.0
## - PaymentMethod.xElectronic.check         1   3049.6 3109.6
## - PaperlessBilling                        1   3051.1 3111.1
## - tenure_bin.x4.5.years                   1   3053.0 3113.0
## - tenure_bin.x3.4.years                   1   3054.0 3114.0
## - tenure_bin.x5.6.years                   1   3054.2 3114.2
## - Contract.xOne.year                      1   3072.4 3132.4
## - tenure                                  1   3075.6 3135.6
## - Contract.xTwo.year                      1   3087.0 3147.0
## - Metric3                                 1   3123.6 3183.6
## - Metric1                                 1   3215.6 3275.6
## - Metric2                                 1   3219.6 3279.6
## 
## Step:  AIC=3102.9
## Churn ~ tenure + MonthlyCharges + TotalCharges + Metric1 + Metric2 + 
##     Metric3 + gender + Partner + Dependents + PhoneService + 
##     MultipleLines + InternetService.xFiber.optic + InternetService.xNo + 
##     OnlineSecurity + OnlineBackup + DeviceProtection + TechSupport + 
##     StreamingTV + StreamingMovies + Contract.xOne.year + Contract.xTwo.year + 
##     PaperlessBilling + PaymentMethod.xElectronic.check + PaymentMethod.xMailed.check + 
##     tenure_bin.x1.2.years + tenure_bin.x2.3.years + tenure_bin.x3.4.years + 
##     tenure_bin.x4.5.years + tenure_bin.x5.6.years
## 
##                                          Df Deviance    AIC
## - PaymentMethod.xMailed.check             1   3042.9 3100.9
## - PhoneService                            1   3042.9 3100.9
## - OnlineBackup                            1   3043.0 3101.0
## - Partner                                 1   3043.0 3101.0
## - gender                                  1   3043.0 3101.0
## - tenure_bin.x1.2.years                   1   3043.0 3101.0
## - MonthlyCharges                          1   3043.2 3101.2
## - TechSupport                             1   3043.3 3101.3
## - StreamingMovies                         1   3043.4 3101.4
## - DeviceProtection                        1   3043.5 3101.5
## - OnlineSecurity                          1   3043.6 3101.6
## - TotalCharges                            1   3043.8 3101.8
## - Dependents                              1   3044.2 3102.2
## - StreamingTV                             1   3044.4 3102.4
## - InternetService.xFiber.optic            1   3044.4 3102.4
## <none>                                        3042.9 3102.9
## - InternetService.xNo                     1   3045.5 3103.5
## - tenure_bin.x2.3.years                   1   3046.0 3104.0
## + PaymentMethod.xCredit.card..automatic.  1   3042.9 3104.9
## - MultipleLines                           1   3047.0 3105.0
## - PaperlessBilling                        1   3051.1 3109.1
## - tenure_bin.x4.5.years                   1   3053.0 3111.0
## - PaymentMethod.xElectronic.check         1   3053.1 3111.1
## - tenure_bin.x3.4.years                   1   3054.0 3112.0
## - tenure_bin.x5.6.years                   1   3054.2 3112.2
## - Contract.xOne.year                      1   3072.4 3130.4
## - tenure                                  1   3075.6 3133.6
## - Contract.xTwo.year                      1   3087.0 3145.0
## - Metric3                                 1   3123.6 3181.6
## - Metric1                                 1   3215.6 3273.6
## - Metric2                                 1   3219.6 3277.6
## 
## Step:  AIC=3100.91
## Churn ~ tenure + MonthlyCharges + TotalCharges + Metric1 + Metric2 + 
##     Metric3 + gender + Partner + Dependents + PhoneService + 
##     MultipleLines + InternetService.xFiber.optic + InternetService.xNo + 
##     OnlineSecurity + OnlineBackup + DeviceProtection + TechSupport + 
##     StreamingTV + StreamingMovies + Contract.xOne.year + Contract.xTwo.year + 
##     PaperlessBilling + PaymentMethod.xElectronic.check + tenure_bin.x1.2.years + 
##     tenure_bin.x2.3.years + tenure_bin.x3.4.years + tenure_bin.x4.5.years + 
##     tenure_bin.x5.6.years
## 
##                                          Df Deviance    AIC
## - PhoneService                            1   3043.0 3099.0
## - OnlineBackup                            1   3043.0 3099.0
## - Partner                                 1   3043.0 3099.0
## - gender                                  1   3043.0 3099.0
## - tenure_bin.x1.2.years                   1   3043.0 3099.0
## - MonthlyCharges                          1   3043.2 3099.2
## - TechSupport                             1   3043.3 3099.3
## - StreamingMovies                         1   3043.4 3099.4
## - DeviceProtection                        1   3043.5 3099.5
## - OnlineSecurity                          1   3043.7 3099.7
## - TotalCharges                            1   3043.8 3099.8
## - Dependents                              1   3044.2 3100.2
## - StreamingTV                             1   3044.4 3100.4
## - InternetService.xFiber.optic            1   3044.4 3100.4
## <none>                                        3042.9 3100.9
## - InternetService.xNo                     1   3045.5 3101.5
## - tenure_bin.x2.3.years                   1   3046.0 3102.0
## + PaymentMethod.xMailed.check             1   3042.9 3102.9
## + PaymentMethod.xCredit.card..automatic.  1   3042.9 3102.9
## - MultipleLines                           1   3047.0 3103.0
## - PaperlessBilling                        1   3051.2 3107.2
## - tenure_bin.x4.5.years                   1   3053.0 3109.0
## - tenure_bin.x3.4.years                   1   3054.0 3110.0
## - tenure_bin.x5.6.years                   1   3054.2 3110.2
## - PaymentMethod.xElectronic.check         1   3055.5 3111.5
## - Contract.xOne.year                      1   3072.4 3128.4
## - tenure                                  1   3075.9 3131.9
## - Contract.xTwo.year                      1   3087.0 3143.0
## - Metric3                                 1   3123.7 3179.7
## - Metric1                                 1   3215.8 3271.8
## - Metric2                                 1   3219.7 3275.7
## 
## Step:  AIC=3098.96
## Churn ~ tenure + MonthlyCharges + TotalCharges + Metric1 + Metric2 + 
##     Metric3 + gender + Partner + Dependents + MultipleLines + 
##     InternetService.xFiber.optic + InternetService.xNo + OnlineSecurity + 
##     OnlineBackup + DeviceProtection + TechSupport + StreamingTV + 
##     StreamingMovies + Contract.xOne.year + Contract.xTwo.year + 
##     PaperlessBilling + PaymentMethod.xElectronic.check + tenure_bin.x1.2.years + 
##     tenure_bin.x2.3.years + tenure_bin.x3.4.years + tenure_bin.x4.5.years + 
##     tenure_bin.x5.6.years
## 
##                                          Df Deviance    AIC
## - gender                                  1   3043.0 3097.0
## - Partner                                 1   3043.0 3097.0
## - tenure_bin.x1.2.years                   1   3043.1 3097.1
## - TotalCharges                            1   3043.8 3097.8
## - OnlineBackup                            1   3043.8 3097.8
## - Dependents                              1   3044.2 3098.2
## - DeviceProtection                        1   3044.5 3098.5
## <none>                                        3043.0 3099.0
## - StreamingMovies                         1   3045.4 3099.4
## - TechSupport                             1   3045.7 3099.7
## - tenure_bin.x2.3.years                   1   3046.0 3100.0
## - MonthlyCharges                          1   3046.3 3100.3
## + PhoneService                            1   3042.9 3100.9
## + PaymentMethod.xMailed.check             1   3042.9 3100.9
## + PaymentMethod.xCredit.card..automatic.  1   3043.0 3101.0
## - OnlineSecurity                          1   3047.4 3101.4
## - PaperlessBilling                        1   3051.3 3105.3
## - tenure_bin.x4.5.years                   1   3053.0 3107.0
## - StreamingTV                             1   3053.7 3107.7
## - tenure_bin.x3.4.years                   1   3054.0 3108.0
## - tenure_bin.x5.6.years                   1   3054.2 3108.2
## - MultipleLines                           1   3055.3 3109.3
## - PaymentMethod.xElectronic.check         1   3055.5 3109.5
## - InternetService.xFiber.optic            1   3058.2 3112.2
## - Contract.xOne.year                      1   3072.5 3126.5
## - tenure                                  1   3075.9 3129.9
## - InternetService.xNo                     1   3084.2 3138.2
## - Contract.xTwo.year                      1   3087.0 3141.0
## - Metric3                                 1   3123.8 3177.8
## - Metric1                                 1   3215.9 3269.9
## - Metric2                                 1   3219.8 3273.8
## 
## Step:  AIC=3097.03
## Churn ~ tenure + MonthlyCharges + TotalCharges + Metric1 + Metric2 + 
##     Metric3 + Partner + Dependents + MultipleLines + InternetService.xFiber.optic + 
##     InternetService.xNo + OnlineSecurity + OnlineBackup + DeviceProtection + 
##     TechSupport + StreamingTV + StreamingMovies + Contract.xOne.year + 
##     Contract.xTwo.year + PaperlessBilling + PaymentMethod.xElectronic.check + 
##     tenure_bin.x1.2.years + tenure_bin.x2.3.years + tenure_bin.x3.4.years + 
##     tenure_bin.x4.5.years + tenure_bin.x5.6.years
## 
##                                          Df Deviance    AIC
## - Partner                                 1   3043.1 3095.1
## - tenure_bin.x1.2.years                   1   3043.1 3095.1
## - TotalCharges                            1   3043.9 3095.9
## - OnlineBackup                            1   3043.9 3095.9
## - Dependents                              1   3044.3 3096.3
## - DeviceProtection                        1   3044.6 3096.6
## <none>                                        3043.0 3097.0
## - StreamingMovies                         1   3045.5 3097.5
## - TechSupport                             1   3045.8 3097.8
## - tenure_bin.x2.3.years                   1   3046.1 3098.1
## - MonthlyCharges                          1   3046.4 3098.4
## + gender                                  1   3043.0 3099.0
## + PhoneService                            1   3043.0 3099.0
## + PaymentMethod.xMailed.check             1   3043.0 3099.0
## + PaymentMethod.xCredit.card..automatic.  1   3043.0 3099.0
## - OnlineSecurity                          1   3047.5 3099.5
## - PaperlessBilling                        1   3051.4 3103.4
## - tenure_bin.x4.5.years                   1   3053.0 3105.0
## - StreamingTV                             1   3053.7 3105.7
## - tenure_bin.x3.4.years                   1   3054.0 3106.0
## - tenure_bin.x5.6.years                   1   3054.3 3106.3
## - MultipleLines                           1   3055.4 3107.4
## - PaymentMethod.xElectronic.check         1   3055.5 3107.5
## - InternetService.xFiber.optic            1   3058.2 3110.2
## - Contract.xOne.year                      1   3072.5 3124.5
## - tenure                                  1   3075.9 3127.9
## - InternetService.xNo                     1   3084.3 3136.3
## - Contract.xTwo.year                      1   3087.1 3139.1
## - Metric3                                 1   3123.8 3175.8
## - Metric1                                 1   3215.9 3267.9
## - Metric2                                 1   3219.9 3271.9
## 
## Step:  AIC=3095.11
## Churn ~ tenure + MonthlyCharges + TotalCharges + Metric1 + Metric2 + 
##     Metric3 + Dependents + MultipleLines + InternetService.xFiber.optic + 
##     InternetService.xNo + OnlineSecurity + OnlineBackup + DeviceProtection + 
##     TechSupport + StreamingTV + StreamingMovies + Contract.xOne.year + 
##     Contract.xTwo.year + PaperlessBilling + PaymentMethod.xElectronic.check + 
##     tenure_bin.x1.2.years + tenure_bin.x2.3.years + tenure_bin.x3.4.years + 
##     tenure_bin.x4.5.years + tenure_bin.x5.6.years
## 
##                                          Df Deviance    AIC
## - tenure_bin.x1.2.years                   1   3043.2 3093.2
## - TotalCharges                            1   3044.0 3094.0
## - OnlineBackup                            1   3044.0 3094.0
## - DeviceProtection                        1   3044.6 3094.6
## - Dependents                              1   3044.9 3094.9
## <none>                                        3043.1 3095.1
## - StreamingMovies                         1   3045.5 3095.5
## - TechSupport                             1   3045.9 3095.9
## - tenure_bin.x2.3.years                   1   3046.2 3096.2
## - MonthlyCharges                          1   3046.4 3096.4
## + Partner                                 1   3043.0 3097.0
## + gender                                  1   3043.0 3097.0
## + PhoneService                            1   3043.1 3097.1
## + PaymentMethod.xMailed.check             1   3043.1 3097.1
## + PaymentMethod.xCredit.card..automatic.  1   3043.1 3097.1
## - OnlineSecurity                          1   3047.7 3097.7
## - PaperlessBilling                        1   3051.4 3101.4
## - tenure_bin.x4.5.years                   1   3053.2 3103.2
## - StreamingTV                             1   3053.8 3103.8
## - tenure_bin.x3.4.years                   1   3054.2 3104.2
## - tenure_bin.x5.6.years                   1   3054.4 3104.4
## - MultipleLines                           1   3055.4 3105.4
## - PaymentMethod.xElectronic.check         1   3055.6 3105.6
## - InternetService.xFiber.optic            1   3058.2 3108.2
## - Contract.xOne.year                      1   3072.6 3122.6
## - tenure                                  1   3076.5 3126.5
## - InternetService.xNo                     1   3084.3 3134.3
## - Contract.xTwo.year                      1   3087.1 3137.1
## - Metric3                                 1   3123.9 3173.9
## - Metric1                                 1   3216.0 3266.0
## - Metric2                                 1   3220.1 3270.1
## 
## Step:  AIC=3093.2
## Churn ~ tenure + MonthlyCharges + TotalCharges + Metric1 + Metric2 + 
##     Metric3 + Dependents + MultipleLines + InternetService.xFiber.optic + 
##     InternetService.xNo + OnlineSecurity + OnlineBackup + DeviceProtection + 
##     TechSupport + StreamingTV + StreamingMovies + Contract.xOne.year + 
##     Contract.xTwo.year + PaperlessBilling + PaymentMethod.xElectronic.check + 
##     tenure_bin.x2.3.years + tenure_bin.x3.4.years + tenure_bin.x4.5.years + 
##     tenure_bin.x5.6.years
## 
##                                          Df Deviance    AIC
## - TotalCharges                            1   3044.0 3092.0
## - OnlineBackup                            1   3044.1 3092.1
## - DeviceProtection                        1   3044.7 3092.7
## - Dependents                              1   3045.0 3093.0
## <none>                                        3043.2 3093.2
## - StreamingMovies                         1   3045.6 3093.6
## - TechSupport                             1   3046.0 3094.0
## - MonthlyCharges                          1   3046.5 3094.5
## + tenure_bin.x1.2.years                   1   3043.1 3095.1
## + Partner                                 1   3043.1 3095.1
## + gender                                  1   3043.1 3095.1
## + PhoneService                            1   3043.2 3095.2
## + PaymentMethod.xMailed.check             1   3043.2 3095.2
## + PaymentMethod.xCredit.card..automatic.  1   3043.2 3095.2
## - OnlineSecurity                          1   3047.8 3095.8
## - tenure_bin.x2.3.years                   1   3049.1 3097.1
## - PaperlessBilling                        1   3051.5 3099.5
## - StreamingTV                             1   3053.9 3101.9
## - MultipleLines                           1   3055.6 3103.6
## - PaymentMethod.xElectronic.check         1   3055.7 3103.7
## - InternetService.xFiber.optic            1   3058.3 3106.3
## - tenure_bin.x4.5.years                   1   3066.0 3114.0
## - tenure_bin.x5.6.years                   1   3068.0 3116.0
## - tenure_bin.x3.4.years                   1   3068.8 3116.8
## - Contract.xOne.year                      1   3072.8 3120.8
## - InternetService.xNo                     1   3084.4 3132.4
## - Contract.xTwo.year                      1   3087.9 3135.9
## - tenure                                  1   3114.2 3162.2
## - Metric3                                 1   3124.0 3172.0
## - Metric1                                 1   3216.0 3264.0
## - Metric2                                 1   3220.1 3268.1
## 
## Step:  AIC=3092.02
## Churn ~ tenure + MonthlyCharges + Metric1 + Metric2 + Metric3 + 
##     Dependents + MultipleLines + InternetService.xFiber.optic + 
##     InternetService.xNo + OnlineSecurity + OnlineBackup + DeviceProtection + 
##     TechSupport + StreamingTV + StreamingMovies + Contract.xOne.year + 
##     Contract.xTwo.year + PaperlessBilling + PaymentMethod.xElectronic.check + 
##     tenure_bin.x2.3.years + tenure_bin.x3.4.years + tenure_bin.x4.5.years + 
##     tenure_bin.x5.6.years
## 
##                                          Df Deviance    AIC
## - OnlineBackup                            1   3044.8 3090.8
## - DeviceProtection                        1   3045.7 3091.7
## - Dependents                              1   3045.9 3091.9
## <none>                                        3044.0 3092.0
## - StreamingMovies                         1   3046.6 3092.6
## - TechSupport                             1   3046.6 3092.6
## - MonthlyCharges                          1   3046.8 3092.8
## + TotalCharges                            1   3043.2 3093.2
## + Partner                                 1   3043.9 3093.9
## + gender                                  1   3043.9 3093.9
## + tenure_bin.x1.2.years                   1   3044.0 3094.0
## + PhoneService                            1   3044.0 3094.0
## + PaymentMethod.xCredit.card..automatic.  1   3044.0 3094.0
## + PaymentMethod.xMailed.check             1   3044.0 3094.0
## - OnlineSecurity                          1   3048.4 3094.4
## - tenure_bin.x2.3.years                   1   3050.3 3096.3
## - PaperlessBilling                        1   3052.3 3098.3
## - StreamingTV                             1   3055.1 3101.1
## - PaymentMethod.xElectronic.check         1   3056.3 3102.3
## - MultipleLines                           1   3056.9 3102.9
## - InternetService.xFiber.optic            1   3059.3 3105.3
## - tenure_bin.x4.5.years                   1   3070.1 3116.1
## - tenure_bin.x3.4.years                   1   3071.7 3117.7
## - Contract.xOne.year                      1   3074.0 3120.0
## - tenure_bin.x5.6.years                   1   3075.0 3121.0
## - InternetService.xNo                     1   3084.4 3130.4
## - Contract.xTwo.year                      1   3090.7 3136.7
## - Metric3                                 1   3125.5 3171.5
## - tenure                                  1   3128.4 3174.4
## - Metric1                                 1   3218.0 3264.0
## - Metric2                                 1   3221.8 3267.8
## 
## Step:  AIC=3090.77
## Churn ~ tenure + MonthlyCharges + Metric1 + Metric2 + Metric3 + 
##     Dependents + MultipleLines + InternetService.xFiber.optic + 
##     InternetService.xNo + OnlineSecurity + DeviceProtection + 
##     TechSupport + StreamingTV + StreamingMovies + Contract.xOne.year + 
##     Contract.xTwo.year + PaperlessBilling + PaymentMethod.xElectronic.check + 
##     tenure_bin.x2.3.years + tenure_bin.x3.4.years + tenure_bin.x4.5.years + 
##     tenure_bin.x5.6.years
## 
##                                          Df Deviance    AIC
## - Dependents                              1   3046.7 3090.7
## <none>                                        3044.8 3090.8
## - DeviceProtection                        1   3046.8 3090.8
## - TechSupport                             1   3047.1 3091.1
## + OnlineBackup                            1   3044.0 3092.0
## + PhoneService                            1   3044.0 3092.0
## + TotalCharges                            1   3044.1 3092.1
## - StreamingMovies                         1   3048.2 3092.2
## + Partner                                 1   3044.7 3092.7
## + gender                                  1   3044.7 3092.7
## + tenure_bin.x1.2.years                   1   3044.7 3092.7
## - OnlineSecurity                          1   3048.8 3092.8
## + PaymentMethod.xMailed.check             1   3044.8 3092.8
## + PaymentMethod.xCredit.card..automatic.  1   3044.8 3092.8
## - MonthlyCharges                          1   3049.3 3093.3
## - tenure_bin.x2.3.years                   1   3051.0 3095.0
## - PaperlessBilling                        1   3052.9 3096.9
## - PaymentMethod.xElectronic.check         1   3057.1 3101.1
## - StreamingTV                             1   3057.8 3101.8
## - MultipleLines                           1   3059.3 3103.3
## - InternetService.xFiber.optic            1   3064.8 3108.8
## - tenure_bin.x4.5.years                   1   3070.7 3114.7
## - tenure_bin.x3.4.years                   1   3072.3 3116.3
## - Contract.xOne.year                      1   3074.7 3118.7
## - tenure_bin.x5.6.years                   1   3075.5 3119.5
## - InternetService.xNo                     1   3087.7 3131.7
## - Contract.xTwo.year                      1   3091.3 3135.3
## - Metric3                                 1   3126.3 3170.3
## - tenure                                  1   3131.0 3175.0
## - Metric1                                 1   3218.7 3262.7
## - Metric2                                 1   3222.6 3266.6
## 
## Step:  AIC=3090.69
## Churn ~ tenure + MonthlyCharges + Metric1 + Metric2 + Metric3 + 
##     MultipleLines + InternetService.xFiber.optic + InternetService.xNo + 
##     OnlineSecurity + DeviceProtection + TechSupport + StreamingTV + 
##     StreamingMovies + Contract.xOne.year + Contract.xTwo.year + 
##     PaperlessBilling + PaymentMethod.xElectronic.check + tenure_bin.x2.3.years + 
##     tenure_bin.x3.4.years + tenure_bin.x4.5.years + tenure_bin.x5.6.years
## 
##                                          Df Deviance    AIC
## <none>                                        3046.7 3090.7
## - DeviceProtection                        1   3048.7 3090.7
## + Dependents                              1   3044.8 3090.8
## - TechSupport                             1   3048.9 3090.9
## + OnlineBackup                            1   3045.9 3091.9
## + Partner                                 1   3046.0 3092.0
## + PhoneService                            1   3046.0 3092.0
## + TotalCharges                            1   3046.0 3092.0
## - StreamingMovies                         1   3050.4 3092.4
## + gender                                  1   3046.6 3092.6
## + tenure_bin.x1.2.years                   1   3046.6 3092.6
## + PaymentMethod.xMailed.check             1   3046.7 3092.7
## + PaymentMethod.xCredit.card..automatic.  1   3046.7 3092.7
## - OnlineSecurity                          1   3050.8 3092.8
## - MonthlyCharges                          1   3051.5 3093.5
## - tenure_bin.x2.3.years                   1   3053.0 3095.0
## - PaperlessBilling                        1   3055.2 3097.2
## - PaymentMethod.xElectronic.check         1   3059.1 3101.1
## - StreamingTV                             1   3059.7 3101.7
## - MultipleLines                           1   3061.6 3103.6
## - InternetService.xFiber.optic            1   3067.7 3109.7
## - tenure_bin.x4.5.years                   1   3073.4 3115.4
## - tenure_bin.x3.4.years                   1   3075.0 3117.0
## - Contract.xOne.year                      1   3077.6 3119.6
## - tenure_bin.x5.6.years                   1   3078.1 3120.1
## - InternetService.xNo                     1   3090.5 3132.5
## - Contract.xTwo.year                      1   3094.7 3136.7
## - Metric3                                 1   3127.9 3169.9
## - tenure                                  1   3134.8 3176.8
## - Metric1                                 1   3220.3 3262.3
## - Metric2                                 1   3224.0 3266.0
summary(model_2)
## 
## Call:
## glm(formula = Churn ~ tenure + MonthlyCharges + Metric1 + Metric2 + 
##     Metric3 + MultipleLines + InternetService.xFiber.optic + 
##     InternetService.xNo + OnlineSecurity + DeviceProtection + 
##     TechSupport + StreamingTV + StreamingMovies + Contract.xOne.year + 
##     Contract.xTwo.year + PaperlessBilling + PaymentMethod.xElectronic.check + 
##     tenure_bin.x2.3.years + tenure_bin.x3.4.years + tenure_bin.x4.5.years + 
##     tenure_bin.x5.6.years, family = "binomial", data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5153  -0.4301  -0.1359   0.3697   3.3050  
## 
## Coefficients:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     -3.82960    0.33655 -11.379  < 2e-16 ***
## tenure                          -1.88775    0.20596  -9.166  < 2e-16 ***
## MonthlyCharges                  -0.52712    0.24047  -2.192 0.028378 *  
## Metric1                         -4.47945    0.39226 -11.420  < 2e-16 ***
## Metric2                         -4.62730    0.40035 -11.558  < 2e-16 ***
## Metric3                         -3.51513    0.41822  -8.405  < 2e-16 ***
## MultipleLines                    0.48299    0.12579   3.840 0.000123 ***
## InternetService.xFiber.optic     1.26064    0.27625   4.563 5.03e-06 ***
## InternetService.xNo             -1.58459    0.24044  -6.590 4.39e-11 ***
## OnlineSecurity                  -0.25112    0.12434  -2.020 0.043426 *  
## DeviceProtection                 0.16805    0.11878   1.415 0.157112    
## TechSupport                     -0.18732    0.12656  -1.480 0.138865    
## StreamingTV                      0.48848    0.13582   3.597 0.000322 ***
## StreamingMovies                  0.25993    0.13515   1.923 0.054450 .  
## Contract.xOne.year              -0.80670    0.14778  -5.459 4.79e-08 ***
## Contract.xTwo.year              -1.46742    0.22478  -6.528 6.66e-11 ***
## PaperlessBilling                 0.30374    0.10448   2.907 0.003647 ** 
## PaymentMethod.xElectronic.check  0.34677    0.09796   3.540 0.000400 ***
## tenure_bin.x2.3.years            0.56197    0.22326   2.517 0.011831 *  
## tenure_bin.x3.4.years            1.63111    0.30889   5.280 1.29e-07 ***
## tenure_bin.x4.5.years            2.04041    0.39743   5.134 2.84e-07 ***
## tenure_bin.x5.6.years            2.73858    0.49299   5.555 2.78e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 5699.5  on 4921  degrees of freedom
## Residual deviance: 3046.7  on 4900  degrees of freedom
## AIC: 3090.7
## 
## Number of Fisher Scoring iterations: 7
#(15)
model_3 <- glm(formula = Churn ~ tenure+MonthlyCharges+Metric1+Metric2+Metric3+
                 Contract.xOne.year+Contract.xTwo.year+tenure_bin.x3.4.years+
                 tenure_bin.x4.5.years+tenure_bin.x5.6.years+MultipleLines+
                 InternetService.xFiber.optic+InternetService.xNo+StreamingTV +
                 PaperlessBilling, family = "binomial", data = train)
summary(model_3)
## 
## Call:
## glm(formula = Churn ~ tenure + MonthlyCharges + Metric1 + Metric2 + 
##     Metric3 + Contract.xOne.year + Contract.xTwo.year + tenure_bin.x3.4.years + 
##     tenure_bin.x4.5.years + tenure_bin.x5.6.years + MultipleLines + 
##     InternetService.xFiber.optic + InternetService.xNo + StreamingTV + 
##     PaperlessBilling, family = "binomial", data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4670  -0.4375  -0.1378   0.3852   3.3259  
## 
## Coefficients:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   -3.3601     0.2086 -16.108  < 2e-16 ***
## tenure                        -1.5417     0.1362 -11.321  < 2e-16 ***
## MonthlyCharges                -0.4908     0.1683  -2.916  0.00354 ** 
## Metric1                       -4.4514     0.3893 -11.435  < 2e-16 ***
## Metric2                       -4.6025     0.3975 -11.579  < 2e-16 ***
## Metric3                       -3.4777     0.4153  -8.374  < 2e-16 ***
## Contract.xOne.year            -0.8630     0.1454  -5.935 2.94e-09 ***
## Contract.xTwo.year            -1.6600     0.2203  -7.536 4.84e-14 ***
## tenure_bin.x3.4.years          1.0929     0.2145   5.094 3.51e-07 ***
## tenure_bin.x4.5.years          1.3460     0.2662   5.056 4.29e-07 ***
## tenure_bin.x5.6.years          1.8531     0.3246   5.709 1.14e-08 ***
## MultipleLines                  0.4938     0.1168   4.227 2.37e-05 ***
## InternetService.xFiber.optic   1.3821     0.2076   6.657 2.79e-11 ***
## InternetService.xNo           -1.5479     0.2229  -6.944 3.80e-12 ***
## StreamingTV                    0.5943     0.1292   4.600 4.22e-06 ***
## PaperlessBilling               0.3395     0.1033   3.287  0.00101 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 5699.5  on 4921  degrees of freedom
## Residual deviance: 3085.1  on 4906  degrees of freedom
## AIC: 3117.1
## 
## Number of Fisher Scoring iterations: 7
final_model <- model_3
#(17)
pred <- predict(final_model, type = "response", newdata = validation)
summary(pred)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000006 0.0114100 0.1126668 0.2821273 0.5364083 0.9844745
pred_churn <- factor(ifelse(pred >= 0.5, "Yes", "No")) 
actual_churn <- factor(ifelse(validation$Churn == 1, "Yes", "No")) 
performance = table(actual_churn,pred_churn)
performance
##             pred_churn
## actual_churn   No  Yes
##          No  1390  159
##          Yes  153  408
#(18)
accuracy <- (1416+337)/(1416+337+133+184)
Sensitivity <- 377/(184+377)
Specificity <- 1416/(1416+133)
accuracy
## [1] 0.8468599
Sensitivity
## [1] 0.6720143
Specificity
## [1] 0.9141382
#(19)
pred_churn2 <- factor(ifelse(pred >= 0.45, "Yes", "No")) 
actual_churn2 <- factor(ifelse(validation$Churn == 1, "Yes", "No")) 
performance2 = table(actual_churn2,pred_churn2)
performance2
##              pred_churn2
## actual_churn2   No  Yes
##           No  1366  183
##           Yes  131  430
accuracy2 <- (1393+404)/(1393+156+157+404)
Sensitivity2 <- 404/(157+404)
Specificity2 <- 1393/(1393+156)
accuracy2
## [1] 0.8516588
Sensitivity2
## [1] 0.7201426
Specificity2
## [1] 0.8992899
#(20)
require(tree)
## Loading required package: tree
model_tree = tree(Churn~., data = train)
summary(model_tree)
## 
## Regression tree:
## tree(formula = Churn ~ ., data = train)
## Variables actually used in tree construction:
## [1] "Metric3"                      "tenure"                      
## [3] "Contract.xTwo.year"           "Contract.xOne.year"          
## [5] "InternetService.xFiber.optic"
## Number of terminal nodes:  8 
## Residual mean deviance:  0.1102 = 541.7 / 4914 
## Distribution of residuals:
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.77460 -0.08134 -0.03223  0.00000  0.22540  0.96780
plot(model_tree)
text(model_tree, pretty = 0)

pred2 <- predict(model_tree, type = "vector", newdata = validation)
pred_churn3 <- factor(ifelse(pred2 >= 0.5, "Yes", "No")) 
actual_churn3 <- factor(ifelse(validation$Churn == 1, "Yes", "No")) 
performance3 = table(actual_churn3, pred_churn3)
performance3
##              pred_churn3
## actual_churn3   No  Yes
##           No  1416  133
##           Yes  183  378