#(1)
telco <- read.csv("/Users/jianingjin/Desktop/IEMS_304/lab3/churn.csv")
#(2)
# Cases that are INCOMPLETE
dim(telco[!complete.cases(telco),])
## [1] 11 23
# assigning getting rid of incomplete data points
telco = telco[complete.cases(telco),]
#(3)
barplot(table(telco$Churn))

#(4)
counts1 = table(telco$Churn, telco$gender)
barplot(counts1, legend = rownames(counts1),main="Churn x Gender Breakdown")

counts2 = table(telco$Churn, telco$SeniorCitizen)
barplot(counts2, legend = rownames(counts2),main="Churn x SeniorCitizen Breakdown")

counts3 = table(telco$Churn, telco$Partner)
barplot(counts3, legend = rownames(counts3),main="Churn x Partner Breakdown")

counts4 = table(telco$Churn, telco$Dependents)
barplot(counts4, legend = rownames(counts4),main="Churn x Dependents Breakdown")

#(5)
boxplot(telco[telco$Churn == "No",]$tenure,data=telco)

boxplot(telco[telco$Churn == "Yes",]$tenure,data=telco)

boxplot(telco[telco$Churn == "No",]$MonthlyCharges, data = telco)

boxplot(telco[telco$Churn == "Yes",]$MonthlyCharges, data = telco)

boxplot(telco[telco$Churn == "No",]$TotalCharges, data = telco)

boxplot(telco[telco$Churn == "Yes",]$TotalCharges, data = telco)

#(6)
cor(telco[,c("tenure", "TotalCharges", "MonthlyCharges", "Metric1", "Metric2")])
## tenure TotalCharges MonthlyCharges Metric1 Metric2
## tenure 1.00000000 0.82588046 0.24686177 0.09403654 0.09295148
## TotalCharges 0.82588046 1.00000000 0.65106480 0.05024178 0.05811042
## MonthlyCharges 0.24686177 0.65106480 1.00000000 -0.04478265 -0.05898434
## Metric1 0.09403654 0.05024178 -0.04478265 1.00000000 -0.28637676
## Metric2 0.09295148 0.05811042 -0.05898434 -0.28637676 1.00000000
#(8)
telco[telco == "No internet service"] <- "No"
telco[telco == "No phone service"] <- "No"
#(9)
num_columns <- c(6, 19, 20, 21, 22)
telco[num_columns] <- sapply(telco[num_columns], as.numeric)
telco_int <- telco[, c(num_columns)]
telco_int <- data.frame(scale(telco_int))
telco_int$Metric3 = scale(telco_int$Metric1*telco_int$Metric2)
#(10)
telco$tenure_bin = telco$tenure
telco$tenure_bin[telco$tenure_bin > 0 & telco$tenure_bin <= 12] <- '0-1 year'
telco$tenure_bin[telco$tenure_bin > 12 & telco$tenure_bin <= 24] <- '1-2 years'
telco$tenure_bin[telco$tenure_bin > 24 & telco$tenure_bin <= 36] <- '2-3 years'
telco$tenure_bin[telco$tenure_bin > 36 & telco$tenure_bin <= 48] <- '3-4 years'
telco$tenure_bin[telco$tenure_bin > 48 & telco$tenure_bin <= 60] <- '4-5 years'
telco$tenure_bin[telco$tenure_bin > 60 & telco$tenure_bin <= 72] <- '5-6 years'
telco$tenure_bin <- as.factor(telco$tenure_bin) # convert it to factor so that R knows it's categorical
telco$tenure = NULL # delete tenure col
#(11)
telco_cat = telco[,-c(1,3,18,19,20,21)]
dummy<- data.frame(sapply(telco_cat,
function(x)
data.frame(model.matrix(~x-1,data =telco_cat))[,-1]))
head(dummy)
#(12)
telco_final <- cbind(telco_int, dummy) # combine telco_int and dummy horizontally
head(telco_final)
#(13)
library("caTools")
indices = sample.split(telco_final$Churn, 0.7)
train = telco_final[indices,]
validation = telco_final[!(indices),]
#(14)
model_1 = glm(Churn~., data = train, family = "binomial")
summary(model_1)
##
## Call:
## glm(formula = Churn ~ ., family = "binomial", data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5181 -0.4280 -0.1320 0.3764 3.3318
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.106374 1.844399 -2.226 0.025987
## tenure -2.050141 0.362671 -5.653 1.58e-08
## MonthlyCharges -0.786807 1.343843 -0.585 0.558218
## TotalCharges 0.197650 0.214953 0.920 0.357833
## Metric1 -4.465428 0.392325 -11.382 < 2e-16
## Metric2 -4.613900 0.400213 -11.529 < 2e-16
## Metric3 -3.503151 0.418394 -8.373 < 2e-16
## gender 0.026103 0.091467 0.285 0.775348
## Partner -0.030441 0.108432 -0.281 0.778911
## Dependents -0.136498 0.121511 -1.123 0.261295
## PhoneService 0.204914 0.912857 0.224 0.822387
## MultipleLines 0.496041 0.244614 2.028 0.042575
## InternetService.xFiber.optic 1.393152 1.123976 1.239 0.215166
## InternetService.xNo -1.832604 1.136227 -1.613 0.106769
## OnlineSecurity -0.217723 0.250985 -0.867 0.385681
## OnlineBackup -0.059202 0.247458 -0.239 0.810919
## DeviceProtection 0.196041 0.245684 0.798 0.424905
## TechSupport -0.162778 0.253941 -0.641 0.521516
## StreamingTV 0.554662 0.459960 1.206 0.227860
## StreamingMovies 0.316017 0.458680 0.689 0.490841
## Contract.xOne.year -0.792021 0.148519 -5.333 9.67e-08
## Contract.xTwo.year -1.429175 0.227452 -6.283 3.31e-10
## PaperlessBilling 0.300649 0.104780 2.869 0.004113
## PaymentMethod.xCredit.card..automatic. 0.002717 0.156078 0.017 0.986114
## PaymentMethod.xElectronic.check 0.345495 0.133386 2.590 0.009592
## PaymentMethod.xMailed.check -0.012204 0.164695 -0.074 0.940928
## tenure_bin.x1.2.years 0.071067 0.219276 0.324 0.745862
## tenure_bin.x2.3.years 0.637574 0.363273 1.755 0.079245
## tenure_bin.x3.4.years 1.706499 0.513716 3.322 0.000894
## tenure_bin.x4.5.years 2.108221 0.664811 3.171 0.001518
## tenure_bin.x5.6.years 2.783326 0.829165 3.357 0.000789
##
## (Intercept) *
## tenure ***
## MonthlyCharges
## TotalCharges
## Metric1 ***
## Metric2 ***
## Metric3 ***
## gender
## Partner
## Dependents
## PhoneService
## MultipleLines *
## InternetService.xFiber.optic
## InternetService.xNo
## OnlineSecurity
## OnlineBackup
## DeviceProtection
## TechSupport
## StreamingTV
## StreamingMovies
## Contract.xOne.year ***
## Contract.xTwo.year ***
## PaperlessBilling **
## PaymentMethod.xCredit.card..automatic.
## PaymentMethod.xElectronic.check **
## PaymentMethod.xMailed.check
## tenure_bin.x1.2.years
## tenure_bin.x2.3.years .
## tenure_bin.x3.4.years ***
## tenure_bin.x4.5.years **
## tenure_bin.x5.6.years ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 5699.5 on 4921 degrees of freedom
## Residual deviance: 3042.9 on 4891 degrees of freedom
## AIC: 3104.9
##
## Number of Fisher Scoring iterations: 7
library(MASS)
model_2<- stepAIC(model_1, direction="both")
## Start: AIC=3104.9
## Churn ~ tenure + MonthlyCharges + TotalCharges + Metric1 + Metric2 +
## Metric3 + gender + Partner + Dependents + PhoneService +
## MultipleLines + InternetService.xFiber.optic + InternetService.xNo +
## OnlineSecurity + OnlineBackup + DeviceProtection + TechSupport +
## StreamingTV + StreamingMovies + Contract.xOne.year + Contract.xTwo.year +
## PaperlessBilling + PaymentMethod.xCredit.card..automatic. +
## PaymentMethod.xElectronic.check + PaymentMethod.xMailed.check +
## tenure_bin.x1.2.years + tenure_bin.x2.3.years + tenure_bin.x3.4.years +
## tenure_bin.x4.5.years + tenure_bin.x5.6.years
##
## Df Deviance AIC
## - PaymentMethod.xCredit.card..automatic. 1 3042.9 3102.9
## - PaymentMethod.xMailed.check 1 3042.9 3102.9
## - PhoneService 1 3042.9 3102.9
## - OnlineBackup 1 3043.0 3103.0
## - Partner 1 3043.0 3103.0
## - gender 1 3043.0 3103.0
## - tenure_bin.x1.2.years 1 3043.0 3103.0
## - MonthlyCharges 1 3043.2 3103.2
## - TechSupport 1 3043.3 3103.3
## - StreamingMovies 1 3043.4 3103.4
## - DeviceProtection 1 3043.5 3103.5
## - OnlineSecurity 1 3043.6 3103.6
## - TotalCharges 1 3043.8 3103.8
## - Dependents 1 3044.2 3104.2
## - StreamingTV 1 3044.4 3104.4
## - InternetService.xFiber.optic 1 3044.4 3104.4
## <none> 3042.9 3104.9
## - InternetService.xNo 1 3045.5 3105.5
## - tenure_bin.x2.3.years 1 3046.0 3106.0
## - MultipleLines 1 3047.0 3107.0
## - PaymentMethod.xElectronic.check 1 3049.6 3109.6
## - PaperlessBilling 1 3051.1 3111.1
## - tenure_bin.x4.5.years 1 3053.0 3113.0
## - tenure_bin.x3.4.years 1 3054.0 3114.0
## - tenure_bin.x5.6.years 1 3054.2 3114.2
## - Contract.xOne.year 1 3072.4 3132.4
## - tenure 1 3075.6 3135.6
## - Contract.xTwo.year 1 3087.0 3147.0
## - Metric3 1 3123.6 3183.6
## - Metric1 1 3215.6 3275.6
## - Metric2 1 3219.6 3279.6
##
## Step: AIC=3102.9
## Churn ~ tenure + MonthlyCharges + TotalCharges + Metric1 + Metric2 +
## Metric3 + gender + Partner + Dependents + PhoneService +
## MultipleLines + InternetService.xFiber.optic + InternetService.xNo +
## OnlineSecurity + OnlineBackup + DeviceProtection + TechSupport +
## StreamingTV + StreamingMovies + Contract.xOne.year + Contract.xTwo.year +
## PaperlessBilling + PaymentMethod.xElectronic.check + PaymentMethod.xMailed.check +
## tenure_bin.x1.2.years + tenure_bin.x2.3.years + tenure_bin.x3.4.years +
## tenure_bin.x4.5.years + tenure_bin.x5.6.years
##
## Df Deviance AIC
## - PaymentMethod.xMailed.check 1 3042.9 3100.9
## - PhoneService 1 3042.9 3100.9
## - OnlineBackup 1 3043.0 3101.0
## - Partner 1 3043.0 3101.0
## - gender 1 3043.0 3101.0
## - tenure_bin.x1.2.years 1 3043.0 3101.0
## - MonthlyCharges 1 3043.2 3101.2
## - TechSupport 1 3043.3 3101.3
## - StreamingMovies 1 3043.4 3101.4
## - DeviceProtection 1 3043.5 3101.5
## - OnlineSecurity 1 3043.6 3101.6
## - TotalCharges 1 3043.8 3101.8
## - Dependents 1 3044.2 3102.2
## - StreamingTV 1 3044.4 3102.4
## - InternetService.xFiber.optic 1 3044.4 3102.4
## <none> 3042.9 3102.9
## - InternetService.xNo 1 3045.5 3103.5
## - tenure_bin.x2.3.years 1 3046.0 3104.0
## + PaymentMethod.xCredit.card..automatic. 1 3042.9 3104.9
## - MultipleLines 1 3047.0 3105.0
## - PaperlessBilling 1 3051.1 3109.1
## - tenure_bin.x4.5.years 1 3053.0 3111.0
## - PaymentMethod.xElectronic.check 1 3053.1 3111.1
## - tenure_bin.x3.4.years 1 3054.0 3112.0
## - tenure_bin.x5.6.years 1 3054.2 3112.2
## - Contract.xOne.year 1 3072.4 3130.4
## - tenure 1 3075.6 3133.6
## - Contract.xTwo.year 1 3087.0 3145.0
## - Metric3 1 3123.6 3181.6
## - Metric1 1 3215.6 3273.6
## - Metric2 1 3219.6 3277.6
##
## Step: AIC=3100.91
## Churn ~ tenure + MonthlyCharges + TotalCharges + Metric1 + Metric2 +
## Metric3 + gender + Partner + Dependents + PhoneService +
## MultipleLines + InternetService.xFiber.optic + InternetService.xNo +
## OnlineSecurity + OnlineBackup + DeviceProtection + TechSupport +
## StreamingTV + StreamingMovies + Contract.xOne.year + Contract.xTwo.year +
## PaperlessBilling + PaymentMethod.xElectronic.check + tenure_bin.x1.2.years +
## tenure_bin.x2.3.years + tenure_bin.x3.4.years + tenure_bin.x4.5.years +
## tenure_bin.x5.6.years
##
## Df Deviance AIC
## - PhoneService 1 3043.0 3099.0
## - OnlineBackup 1 3043.0 3099.0
## - Partner 1 3043.0 3099.0
## - gender 1 3043.0 3099.0
## - tenure_bin.x1.2.years 1 3043.0 3099.0
## - MonthlyCharges 1 3043.2 3099.2
## - TechSupport 1 3043.3 3099.3
## - StreamingMovies 1 3043.4 3099.4
## - DeviceProtection 1 3043.5 3099.5
## - OnlineSecurity 1 3043.7 3099.7
## - TotalCharges 1 3043.8 3099.8
## - Dependents 1 3044.2 3100.2
## - StreamingTV 1 3044.4 3100.4
## - InternetService.xFiber.optic 1 3044.4 3100.4
## <none> 3042.9 3100.9
## - InternetService.xNo 1 3045.5 3101.5
## - tenure_bin.x2.3.years 1 3046.0 3102.0
## + PaymentMethod.xMailed.check 1 3042.9 3102.9
## + PaymentMethod.xCredit.card..automatic. 1 3042.9 3102.9
## - MultipleLines 1 3047.0 3103.0
## - PaperlessBilling 1 3051.2 3107.2
## - tenure_bin.x4.5.years 1 3053.0 3109.0
## - tenure_bin.x3.4.years 1 3054.0 3110.0
## - tenure_bin.x5.6.years 1 3054.2 3110.2
## - PaymentMethod.xElectronic.check 1 3055.5 3111.5
## - Contract.xOne.year 1 3072.4 3128.4
## - tenure 1 3075.9 3131.9
## - Contract.xTwo.year 1 3087.0 3143.0
## - Metric3 1 3123.7 3179.7
## - Metric1 1 3215.8 3271.8
## - Metric2 1 3219.7 3275.7
##
## Step: AIC=3098.96
## Churn ~ tenure + MonthlyCharges + TotalCharges + Metric1 + Metric2 +
## Metric3 + gender + Partner + Dependents + MultipleLines +
## InternetService.xFiber.optic + InternetService.xNo + OnlineSecurity +
## OnlineBackup + DeviceProtection + TechSupport + StreamingTV +
## StreamingMovies + Contract.xOne.year + Contract.xTwo.year +
## PaperlessBilling + PaymentMethod.xElectronic.check + tenure_bin.x1.2.years +
## tenure_bin.x2.3.years + tenure_bin.x3.4.years + tenure_bin.x4.5.years +
## tenure_bin.x5.6.years
##
## Df Deviance AIC
## - gender 1 3043.0 3097.0
## - Partner 1 3043.0 3097.0
## - tenure_bin.x1.2.years 1 3043.1 3097.1
## - TotalCharges 1 3043.8 3097.8
## - OnlineBackup 1 3043.8 3097.8
## - Dependents 1 3044.2 3098.2
## - DeviceProtection 1 3044.5 3098.5
## <none> 3043.0 3099.0
## - StreamingMovies 1 3045.4 3099.4
## - TechSupport 1 3045.7 3099.7
## - tenure_bin.x2.3.years 1 3046.0 3100.0
## - MonthlyCharges 1 3046.3 3100.3
## + PhoneService 1 3042.9 3100.9
## + PaymentMethod.xMailed.check 1 3042.9 3100.9
## + PaymentMethod.xCredit.card..automatic. 1 3043.0 3101.0
## - OnlineSecurity 1 3047.4 3101.4
## - PaperlessBilling 1 3051.3 3105.3
## - tenure_bin.x4.5.years 1 3053.0 3107.0
## - StreamingTV 1 3053.7 3107.7
## - tenure_bin.x3.4.years 1 3054.0 3108.0
## - tenure_bin.x5.6.years 1 3054.2 3108.2
## - MultipleLines 1 3055.3 3109.3
## - PaymentMethod.xElectronic.check 1 3055.5 3109.5
## - InternetService.xFiber.optic 1 3058.2 3112.2
## - Contract.xOne.year 1 3072.5 3126.5
## - tenure 1 3075.9 3129.9
## - InternetService.xNo 1 3084.2 3138.2
## - Contract.xTwo.year 1 3087.0 3141.0
## - Metric3 1 3123.8 3177.8
## - Metric1 1 3215.9 3269.9
## - Metric2 1 3219.8 3273.8
##
## Step: AIC=3097.03
## Churn ~ tenure + MonthlyCharges + TotalCharges + Metric1 + Metric2 +
## Metric3 + Partner + Dependents + MultipleLines + InternetService.xFiber.optic +
## InternetService.xNo + OnlineSecurity + OnlineBackup + DeviceProtection +
## TechSupport + StreamingTV + StreamingMovies + Contract.xOne.year +
## Contract.xTwo.year + PaperlessBilling + PaymentMethod.xElectronic.check +
## tenure_bin.x1.2.years + tenure_bin.x2.3.years + tenure_bin.x3.4.years +
## tenure_bin.x4.5.years + tenure_bin.x5.6.years
##
## Df Deviance AIC
## - Partner 1 3043.1 3095.1
## - tenure_bin.x1.2.years 1 3043.1 3095.1
## - TotalCharges 1 3043.9 3095.9
## - OnlineBackup 1 3043.9 3095.9
## - Dependents 1 3044.3 3096.3
## - DeviceProtection 1 3044.6 3096.6
## <none> 3043.0 3097.0
## - StreamingMovies 1 3045.5 3097.5
## - TechSupport 1 3045.8 3097.8
## - tenure_bin.x2.3.years 1 3046.1 3098.1
## - MonthlyCharges 1 3046.4 3098.4
## + gender 1 3043.0 3099.0
## + PhoneService 1 3043.0 3099.0
## + PaymentMethod.xMailed.check 1 3043.0 3099.0
## + PaymentMethod.xCredit.card..automatic. 1 3043.0 3099.0
## - OnlineSecurity 1 3047.5 3099.5
## - PaperlessBilling 1 3051.4 3103.4
## - tenure_bin.x4.5.years 1 3053.0 3105.0
## - StreamingTV 1 3053.7 3105.7
## - tenure_bin.x3.4.years 1 3054.0 3106.0
## - tenure_bin.x5.6.years 1 3054.3 3106.3
## - MultipleLines 1 3055.4 3107.4
## - PaymentMethod.xElectronic.check 1 3055.5 3107.5
## - InternetService.xFiber.optic 1 3058.2 3110.2
## - Contract.xOne.year 1 3072.5 3124.5
## - tenure 1 3075.9 3127.9
## - InternetService.xNo 1 3084.3 3136.3
## - Contract.xTwo.year 1 3087.1 3139.1
## - Metric3 1 3123.8 3175.8
## - Metric1 1 3215.9 3267.9
## - Metric2 1 3219.9 3271.9
##
## Step: AIC=3095.11
## Churn ~ tenure + MonthlyCharges + TotalCharges + Metric1 + Metric2 +
## Metric3 + Dependents + MultipleLines + InternetService.xFiber.optic +
## InternetService.xNo + OnlineSecurity + OnlineBackup + DeviceProtection +
## TechSupport + StreamingTV + StreamingMovies + Contract.xOne.year +
## Contract.xTwo.year + PaperlessBilling + PaymentMethod.xElectronic.check +
## tenure_bin.x1.2.years + tenure_bin.x2.3.years + tenure_bin.x3.4.years +
## tenure_bin.x4.5.years + tenure_bin.x5.6.years
##
## Df Deviance AIC
## - tenure_bin.x1.2.years 1 3043.2 3093.2
## - TotalCharges 1 3044.0 3094.0
## - OnlineBackup 1 3044.0 3094.0
## - DeviceProtection 1 3044.6 3094.6
## - Dependents 1 3044.9 3094.9
## <none> 3043.1 3095.1
## - StreamingMovies 1 3045.5 3095.5
## - TechSupport 1 3045.9 3095.9
## - tenure_bin.x2.3.years 1 3046.2 3096.2
## - MonthlyCharges 1 3046.4 3096.4
## + Partner 1 3043.0 3097.0
## + gender 1 3043.0 3097.0
## + PhoneService 1 3043.1 3097.1
## + PaymentMethod.xMailed.check 1 3043.1 3097.1
## + PaymentMethod.xCredit.card..automatic. 1 3043.1 3097.1
## - OnlineSecurity 1 3047.7 3097.7
## - PaperlessBilling 1 3051.4 3101.4
## - tenure_bin.x4.5.years 1 3053.2 3103.2
## - StreamingTV 1 3053.8 3103.8
## - tenure_bin.x3.4.years 1 3054.2 3104.2
## - tenure_bin.x5.6.years 1 3054.4 3104.4
## - MultipleLines 1 3055.4 3105.4
## - PaymentMethod.xElectronic.check 1 3055.6 3105.6
## - InternetService.xFiber.optic 1 3058.2 3108.2
## - Contract.xOne.year 1 3072.6 3122.6
## - tenure 1 3076.5 3126.5
## - InternetService.xNo 1 3084.3 3134.3
## - Contract.xTwo.year 1 3087.1 3137.1
## - Metric3 1 3123.9 3173.9
## - Metric1 1 3216.0 3266.0
## - Metric2 1 3220.1 3270.1
##
## Step: AIC=3093.2
## Churn ~ tenure + MonthlyCharges + TotalCharges + Metric1 + Metric2 +
## Metric3 + Dependents + MultipleLines + InternetService.xFiber.optic +
## InternetService.xNo + OnlineSecurity + OnlineBackup + DeviceProtection +
## TechSupport + StreamingTV + StreamingMovies + Contract.xOne.year +
## Contract.xTwo.year + PaperlessBilling + PaymentMethod.xElectronic.check +
## tenure_bin.x2.3.years + tenure_bin.x3.4.years + tenure_bin.x4.5.years +
## tenure_bin.x5.6.years
##
## Df Deviance AIC
## - TotalCharges 1 3044.0 3092.0
## - OnlineBackup 1 3044.1 3092.1
## - DeviceProtection 1 3044.7 3092.7
## - Dependents 1 3045.0 3093.0
## <none> 3043.2 3093.2
## - StreamingMovies 1 3045.6 3093.6
## - TechSupport 1 3046.0 3094.0
## - MonthlyCharges 1 3046.5 3094.5
## + tenure_bin.x1.2.years 1 3043.1 3095.1
## + Partner 1 3043.1 3095.1
## + gender 1 3043.1 3095.1
## + PhoneService 1 3043.2 3095.2
## + PaymentMethod.xMailed.check 1 3043.2 3095.2
## + PaymentMethod.xCredit.card..automatic. 1 3043.2 3095.2
## - OnlineSecurity 1 3047.8 3095.8
## - tenure_bin.x2.3.years 1 3049.1 3097.1
## - PaperlessBilling 1 3051.5 3099.5
## - StreamingTV 1 3053.9 3101.9
## - MultipleLines 1 3055.6 3103.6
## - PaymentMethod.xElectronic.check 1 3055.7 3103.7
## - InternetService.xFiber.optic 1 3058.3 3106.3
## - tenure_bin.x4.5.years 1 3066.0 3114.0
## - tenure_bin.x5.6.years 1 3068.0 3116.0
## - tenure_bin.x3.4.years 1 3068.8 3116.8
## - Contract.xOne.year 1 3072.8 3120.8
## - InternetService.xNo 1 3084.4 3132.4
## - Contract.xTwo.year 1 3087.9 3135.9
## - tenure 1 3114.2 3162.2
## - Metric3 1 3124.0 3172.0
## - Metric1 1 3216.0 3264.0
## - Metric2 1 3220.1 3268.1
##
## Step: AIC=3092.02
## Churn ~ tenure + MonthlyCharges + Metric1 + Metric2 + Metric3 +
## Dependents + MultipleLines + InternetService.xFiber.optic +
## InternetService.xNo + OnlineSecurity + OnlineBackup + DeviceProtection +
## TechSupport + StreamingTV + StreamingMovies + Contract.xOne.year +
## Contract.xTwo.year + PaperlessBilling + PaymentMethod.xElectronic.check +
## tenure_bin.x2.3.years + tenure_bin.x3.4.years + tenure_bin.x4.5.years +
## tenure_bin.x5.6.years
##
## Df Deviance AIC
## - OnlineBackup 1 3044.8 3090.8
## - DeviceProtection 1 3045.7 3091.7
## - Dependents 1 3045.9 3091.9
## <none> 3044.0 3092.0
## - StreamingMovies 1 3046.6 3092.6
## - TechSupport 1 3046.6 3092.6
## - MonthlyCharges 1 3046.8 3092.8
## + TotalCharges 1 3043.2 3093.2
## + Partner 1 3043.9 3093.9
## + gender 1 3043.9 3093.9
## + tenure_bin.x1.2.years 1 3044.0 3094.0
## + PhoneService 1 3044.0 3094.0
## + PaymentMethod.xCredit.card..automatic. 1 3044.0 3094.0
## + PaymentMethod.xMailed.check 1 3044.0 3094.0
## - OnlineSecurity 1 3048.4 3094.4
## - tenure_bin.x2.3.years 1 3050.3 3096.3
## - PaperlessBilling 1 3052.3 3098.3
## - StreamingTV 1 3055.1 3101.1
## - PaymentMethod.xElectronic.check 1 3056.3 3102.3
## - MultipleLines 1 3056.9 3102.9
## - InternetService.xFiber.optic 1 3059.3 3105.3
## - tenure_bin.x4.5.years 1 3070.1 3116.1
## - tenure_bin.x3.4.years 1 3071.7 3117.7
## - Contract.xOne.year 1 3074.0 3120.0
## - tenure_bin.x5.6.years 1 3075.0 3121.0
## - InternetService.xNo 1 3084.4 3130.4
## - Contract.xTwo.year 1 3090.7 3136.7
## - Metric3 1 3125.5 3171.5
## - tenure 1 3128.4 3174.4
## - Metric1 1 3218.0 3264.0
## - Metric2 1 3221.8 3267.8
##
## Step: AIC=3090.77
## Churn ~ tenure + MonthlyCharges + Metric1 + Metric2 + Metric3 +
## Dependents + MultipleLines + InternetService.xFiber.optic +
## InternetService.xNo + OnlineSecurity + DeviceProtection +
## TechSupport + StreamingTV + StreamingMovies + Contract.xOne.year +
## Contract.xTwo.year + PaperlessBilling + PaymentMethod.xElectronic.check +
## tenure_bin.x2.3.years + tenure_bin.x3.4.years + tenure_bin.x4.5.years +
## tenure_bin.x5.6.years
##
## Df Deviance AIC
## - Dependents 1 3046.7 3090.7
## <none> 3044.8 3090.8
## - DeviceProtection 1 3046.8 3090.8
## - TechSupport 1 3047.1 3091.1
## + OnlineBackup 1 3044.0 3092.0
## + PhoneService 1 3044.0 3092.0
## + TotalCharges 1 3044.1 3092.1
## - StreamingMovies 1 3048.2 3092.2
## + Partner 1 3044.7 3092.7
## + gender 1 3044.7 3092.7
## + tenure_bin.x1.2.years 1 3044.7 3092.7
## - OnlineSecurity 1 3048.8 3092.8
## + PaymentMethod.xMailed.check 1 3044.8 3092.8
## + PaymentMethod.xCredit.card..automatic. 1 3044.8 3092.8
## - MonthlyCharges 1 3049.3 3093.3
## - tenure_bin.x2.3.years 1 3051.0 3095.0
## - PaperlessBilling 1 3052.9 3096.9
## - PaymentMethod.xElectronic.check 1 3057.1 3101.1
## - StreamingTV 1 3057.8 3101.8
## - MultipleLines 1 3059.3 3103.3
## - InternetService.xFiber.optic 1 3064.8 3108.8
## - tenure_bin.x4.5.years 1 3070.7 3114.7
## - tenure_bin.x3.4.years 1 3072.3 3116.3
## - Contract.xOne.year 1 3074.7 3118.7
## - tenure_bin.x5.6.years 1 3075.5 3119.5
## - InternetService.xNo 1 3087.7 3131.7
## - Contract.xTwo.year 1 3091.3 3135.3
## - Metric3 1 3126.3 3170.3
## - tenure 1 3131.0 3175.0
## - Metric1 1 3218.7 3262.7
## - Metric2 1 3222.6 3266.6
##
## Step: AIC=3090.69
## Churn ~ tenure + MonthlyCharges + Metric1 + Metric2 + Metric3 +
## MultipleLines + InternetService.xFiber.optic + InternetService.xNo +
## OnlineSecurity + DeviceProtection + TechSupport + StreamingTV +
## StreamingMovies + Contract.xOne.year + Contract.xTwo.year +
## PaperlessBilling + PaymentMethod.xElectronic.check + tenure_bin.x2.3.years +
## tenure_bin.x3.4.years + tenure_bin.x4.5.years + tenure_bin.x5.6.years
##
## Df Deviance AIC
## <none> 3046.7 3090.7
## - DeviceProtection 1 3048.7 3090.7
## + Dependents 1 3044.8 3090.8
## - TechSupport 1 3048.9 3090.9
## + OnlineBackup 1 3045.9 3091.9
## + Partner 1 3046.0 3092.0
## + PhoneService 1 3046.0 3092.0
## + TotalCharges 1 3046.0 3092.0
## - StreamingMovies 1 3050.4 3092.4
## + gender 1 3046.6 3092.6
## + tenure_bin.x1.2.years 1 3046.6 3092.6
## + PaymentMethod.xMailed.check 1 3046.7 3092.7
## + PaymentMethod.xCredit.card..automatic. 1 3046.7 3092.7
## - OnlineSecurity 1 3050.8 3092.8
## - MonthlyCharges 1 3051.5 3093.5
## - tenure_bin.x2.3.years 1 3053.0 3095.0
## - PaperlessBilling 1 3055.2 3097.2
## - PaymentMethod.xElectronic.check 1 3059.1 3101.1
## - StreamingTV 1 3059.7 3101.7
## - MultipleLines 1 3061.6 3103.6
## - InternetService.xFiber.optic 1 3067.7 3109.7
## - tenure_bin.x4.5.years 1 3073.4 3115.4
## - tenure_bin.x3.4.years 1 3075.0 3117.0
## - Contract.xOne.year 1 3077.6 3119.6
## - tenure_bin.x5.6.years 1 3078.1 3120.1
## - InternetService.xNo 1 3090.5 3132.5
## - Contract.xTwo.year 1 3094.7 3136.7
## - Metric3 1 3127.9 3169.9
## - tenure 1 3134.8 3176.8
## - Metric1 1 3220.3 3262.3
## - Metric2 1 3224.0 3266.0
summary(model_2)
##
## Call:
## glm(formula = Churn ~ tenure + MonthlyCharges + Metric1 + Metric2 +
## Metric3 + MultipleLines + InternetService.xFiber.optic +
## InternetService.xNo + OnlineSecurity + DeviceProtection +
## TechSupport + StreamingTV + StreamingMovies + Contract.xOne.year +
## Contract.xTwo.year + PaperlessBilling + PaymentMethod.xElectronic.check +
## tenure_bin.x2.3.years + tenure_bin.x3.4.years + tenure_bin.x4.5.years +
## tenure_bin.x5.6.years, family = "binomial", data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5153 -0.4301 -0.1359 0.3697 3.3050
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.82960 0.33655 -11.379 < 2e-16 ***
## tenure -1.88775 0.20596 -9.166 < 2e-16 ***
## MonthlyCharges -0.52712 0.24047 -2.192 0.028378 *
## Metric1 -4.47945 0.39226 -11.420 < 2e-16 ***
## Metric2 -4.62730 0.40035 -11.558 < 2e-16 ***
## Metric3 -3.51513 0.41822 -8.405 < 2e-16 ***
## MultipleLines 0.48299 0.12579 3.840 0.000123 ***
## InternetService.xFiber.optic 1.26064 0.27625 4.563 5.03e-06 ***
## InternetService.xNo -1.58459 0.24044 -6.590 4.39e-11 ***
## OnlineSecurity -0.25112 0.12434 -2.020 0.043426 *
## DeviceProtection 0.16805 0.11878 1.415 0.157112
## TechSupport -0.18732 0.12656 -1.480 0.138865
## StreamingTV 0.48848 0.13582 3.597 0.000322 ***
## StreamingMovies 0.25993 0.13515 1.923 0.054450 .
## Contract.xOne.year -0.80670 0.14778 -5.459 4.79e-08 ***
## Contract.xTwo.year -1.46742 0.22478 -6.528 6.66e-11 ***
## PaperlessBilling 0.30374 0.10448 2.907 0.003647 **
## PaymentMethod.xElectronic.check 0.34677 0.09796 3.540 0.000400 ***
## tenure_bin.x2.3.years 0.56197 0.22326 2.517 0.011831 *
## tenure_bin.x3.4.years 1.63111 0.30889 5.280 1.29e-07 ***
## tenure_bin.x4.5.years 2.04041 0.39743 5.134 2.84e-07 ***
## tenure_bin.x5.6.years 2.73858 0.49299 5.555 2.78e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 5699.5 on 4921 degrees of freedom
## Residual deviance: 3046.7 on 4900 degrees of freedom
## AIC: 3090.7
##
## Number of Fisher Scoring iterations: 7
#(15)
model_3 <- glm(formula = Churn ~ tenure+MonthlyCharges+Metric1+Metric2+Metric3+
Contract.xOne.year+Contract.xTwo.year+tenure_bin.x3.4.years+
tenure_bin.x4.5.years+tenure_bin.x5.6.years+MultipleLines+
InternetService.xFiber.optic+InternetService.xNo+StreamingTV +
PaperlessBilling, family = "binomial", data = train)
summary(model_3)
##
## Call:
## glm(formula = Churn ~ tenure + MonthlyCharges + Metric1 + Metric2 +
## Metric3 + Contract.xOne.year + Contract.xTwo.year + tenure_bin.x3.4.years +
## tenure_bin.x4.5.years + tenure_bin.x5.6.years + MultipleLines +
## InternetService.xFiber.optic + InternetService.xNo + StreamingTV +
## PaperlessBilling, family = "binomial", data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4670 -0.4375 -0.1378 0.3852 3.3259
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.3601 0.2086 -16.108 < 2e-16 ***
## tenure -1.5417 0.1362 -11.321 < 2e-16 ***
## MonthlyCharges -0.4908 0.1683 -2.916 0.00354 **
## Metric1 -4.4514 0.3893 -11.435 < 2e-16 ***
## Metric2 -4.6025 0.3975 -11.579 < 2e-16 ***
## Metric3 -3.4777 0.4153 -8.374 < 2e-16 ***
## Contract.xOne.year -0.8630 0.1454 -5.935 2.94e-09 ***
## Contract.xTwo.year -1.6600 0.2203 -7.536 4.84e-14 ***
## tenure_bin.x3.4.years 1.0929 0.2145 5.094 3.51e-07 ***
## tenure_bin.x4.5.years 1.3460 0.2662 5.056 4.29e-07 ***
## tenure_bin.x5.6.years 1.8531 0.3246 5.709 1.14e-08 ***
## MultipleLines 0.4938 0.1168 4.227 2.37e-05 ***
## InternetService.xFiber.optic 1.3821 0.2076 6.657 2.79e-11 ***
## InternetService.xNo -1.5479 0.2229 -6.944 3.80e-12 ***
## StreamingTV 0.5943 0.1292 4.600 4.22e-06 ***
## PaperlessBilling 0.3395 0.1033 3.287 0.00101 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 5699.5 on 4921 degrees of freedom
## Residual deviance: 3085.1 on 4906 degrees of freedom
## AIC: 3117.1
##
## Number of Fisher Scoring iterations: 7
final_model <- model_3
#(17)
pred <- predict(final_model, type = "response", newdata = validation)
summary(pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000006 0.0114100 0.1126668 0.2821273 0.5364083 0.9844745
pred_churn <- factor(ifelse(pred >= 0.5, "Yes", "No"))
actual_churn <- factor(ifelse(validation$Churn == 1, "Yes", "No"))
performance = table(actual_churn,pred_churn)
performance
## pred_churn
## actual_churn No Yes
## No 1390 159
## Yes 153 408
#(18)
accuracy <- (1416+337)/(1416+337+133+184)
Sensitivity <- 377/(184+377)
Specificity <- 1416/(1416+133)
accuracy
## [1] 0.8468599
Sensitivity
## [1] 0.6720143
Specificity
## [1] 0.9141382
#(19)
pred_churn2 <- factor(ifelse(pred >= 0.45, "Yes", "No"))
actual_churn2 <- factor(ifelse(validation$Churn == 1, "Yes", "No"))
performance2 = table(actual_churn2,pred_churn2)
performance2
## pred_churn2
## actual_churn2 No Yes
## No 1366 183
## Yes 131 430
accuracy2 <- (1393+404)/(1393+156+157+404)
Sensitivity2 <- 404/(157+404)
Specificity2 <- 1393/(1393+156)
accuracy2
## [1] 0.8516588
Sensitivity2
## [1] 0.7201426
Specificity2
## [1] 0.8992899
#(20)
require(tree)
## Loading required package: tree
model_tree = tree(Churn~., data = train)
summary(model_tree)
##
## Regression tree:
## tree(formula = Churn ~ ., data = train)
## Variables actually used in tree construction:
## [1] "Metric3" "tenure"
## [3] "Contract.xTwo.year" "Contract.xOne.year"
## [5] "InternetService.xFiber.optic"
## Number of terminal nodes: 8
## Residual mean deviance: 0.1102 = 541.7 / 4914
## Distribution of residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.77460 -0.08134 -0.03223 0.00000 0.22540 0.96780
plot(model_tree)
text(model_tree, pretty = 0)

pred2 <- predict(model_tree, type = "vector", newdata = validation)
pred_churn3 <- factor(ifelse(pred2 >= 0.5, "Yes", "No"))
actual_churn3 <- factor(ifelse(validation$Churn == 1, "Yes", "No"))
performance3 = table(actual_churn3, pred_churn3)
performance3
## pred_churn3
## actual_churn3 No Yes
## No 1416 133
## Yes 183 378