churn_data <- read.csv(file="telecom.csv",na.strings="")
head(churn_data)
## customerID gender SeniorCitizen Partner Dependents tenure PhoneService
## 1 7590-VHVEG Female 0 Yes No 1 No
## 2 5575-GNVDE Male 0 No No 34 Yes
## 3 3668-QPYBK Male 0 No No 2 Yes
## 4 7795-CFOCW Male 0 No No 45 No
## 5 9237-HQITU Female 0 No No 2 Yes
## 6 9305-CDSKC Female 0 No No 8 Yes
## MultipleLines InternetService OnlineSecurity OnlineBackup DeviceProtection
## 1 No phone service DSL No Yes No
## 2 No DSL Yes No Yes
## 3 No DSL Yes Yes No
## 4 No phone service DSL Yes No Yes
## 5 No Fiber optic No No No
## 6 Yes Fiber optic No No Yes
## TechSupport StreamingTV StreamingMovies Contract PaperlessBilling
## 1 No No No Month-to-month Yes
## 2 No No No One year No
## 3 No No No Month-to-month Yes
## 4 Yes No No One year No
## 5 No No No Month-to-month Yes
## 6 No Yes Yes Month-to-month Yes
## PaymentMethod Monthly.Charges Total.Charges Churn
## 1 Electronic check $29.85 $29.85 <NA>
## 2 Mailed check $56.95 $1,889.50 No
## 3 Mailed check $53.85 $108.15 Yes
## 4 Bank transfer (automatic) $42.30 $1,840.75 No
## 5 Electronic check $70.70 $151.65 Yes
## 6 Electronic check $99.65 $820.50 Yes
# dropping unneeded columns
churn_cleaned <- churn_data[c("customerID","Monthly.Charges","Total.Charges","Churn")]
# renaming columns
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
churn_cleaned <- rename(churn_cleaned,Monthly_Charges=Monthly.Charges,Total_Charges=Total.Charges)
# filtering columns
churn_cleaned <- drop_na(churn_cleaned,"Churn")
# removing duplicates
churn_cleaned <- distinct(churn_cleaned)
# reformatting columns
# Monthly_Charges
churn_cleaned$Monthly_Charges <- as.numeric(str_replace_all(churn_cleaned$Monthly_Charges,"[$,]",""))
# Total_Charges
churn_cleaned$Total_Charges <- as.numeric(str_replace_all(churn_cleaned$Total_Charges,"[$,]",""))
head(churn_cleaned)
## customerID Monthly_Charges Total_Charges Churn
## 1 5575-GNVDE 56.95 1889.50 No
## 2 3668-QPYBK 53.85 108.15 Yes
## 3 7795-CFOCW 42.30 1840.75 No
## 4 9237-HQITU 70.70 151.65 Yes
## 5 9305-CDSKC 99.65 820.50 Yes
## 6 6713-OKOMC 29.75 301.90 No
# Finding what percentage of people have churned
percentage_churn <- nrow(subset(churn_cleaned,Churn=="Yes"))/nrow(churn_cleaned)
25% of people have churned
# Average monthly charges for churned
monthly_charges_churned <- mean(subset(churn_cleaned,Churn=="Yes")$Monthly_Charges)
The average monthly charges for someone who has churned is $75.8
# Average monthly charges for stayed
monthly_charges_stayed <- mean(subset(churn_cleaned,Churn=="No")$Monthly_Charges)
The average monthly charges for somone who stayed is $65.04
# median total charges
total_charges_median <- median(churn_cleaned$Total_Charges)
The median of all total charges is $1425.5