Question 1: Read telecom.csv data

churn_data <- read.csv(file="telecom.csv",na.strings="")

head(churn_data)
##   customerID gender SeniorCitizen Partner Dependents tenure PhoneService
## 1 7590-VHVEG Female             0     Yes         No      1           No
## 2 5575-GNVDE   Male             0      No         No     34          Yes
## 3 3668-QPYBK   Male             0      No         No      2          Yes
## 4 7795-CFOCW   Male             0      No         No     45           No
## 5 9237-HQITU Female             0      No         No      2          Yes
## 6 9305-CDSKC Female             0      No         No      8          Yes
##      MultipleLines InternetService OnlineSecurity OnlineBackup DeviceProtection
## 1 No phone service             DSL             No          Yes               No
## 2               No             DSL            Yes           No              Yes
## 3               No             DSL            Yes          Yes               No
## 4 No phone service             DSL            Yes           No              Yes
## 5               No     Fiber optic             No           No               No
## 6              Yes     Fiber optic             No           No              Yes
##   TechSupport StreamingTV StreamingMovies       Contract PaperlessBilling
## 1          No          No              No Month-to-month              Yes
## 2          No          No              No       One year               No
## 3          No          No              No Month-to-month              Yes
## 4         Yes          No              No       One year               No
## 5          No          No              No Month-to-month              Yes
## 6          No         Yes             Yes Month-to-month              Yes
##               PaymentMethod Monthly.Charges Total.Charges Churn
## 1          Electronic check         $29.85         $29.85  <NA>
## 2              Mailed check         $56.95     $1,889.50     No
## 3              Mailed check         $53.85       $108.15    Yes
## 4 Bank transfer (automatic)         $42.30     $1,840.75     No
## 5          Electronic check         $70.70       $151.65    Yes
## 6          Electronic check         $99.65       $820.50    Yes

Question 2: Clean the churn_data as follows

Part i:

# dropping unneeded columns
churn_cleaned <- churn_data[c("customerID","Monthly.Charges","Total.Charges","Churn")]

Part ii:

# renaming columns
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
churn_cleaned <- rename(churn_cleaned,Monthly_Charges=Monthly.Charges,Total_Charges=Total.Charges)

Part iii:

# filtering columns
churn_cleaned <- drop_na(churn_cleaned,"Churn")

Part iv:

# removing duplicates
churn_cleaned <- distinct(churn_cleaned)

Part v:

# reformatting columns

# Monthly_Charges
churn_cleaned$Monthly_Charges <- as.numeric(str_replace_all(churn_cleaned$Monthly_Charges,"[$,]",""))

# Total_Charges
churn_cleaned$Total_Charges <- as.numeric(str_replace_all(churn_cleaned$Total_Charges,"[$,]",""))

head(churn_cleaned)
##   customerID Monthly_Charges Total_Charges Churn
## 1 5575-GNVDE           56.95       1889.50    No
## 2 3668-QPYBK           53.85        108.15   Yes
## 3 7795-CFOCW           42.30       1840.75    No
## 4 9237-HQITU           70.70        151.65   Yes
## 5 9305-CDSKC           99.65        820.50   Yes
## 6 6713-OKOMC           29.75        301.90    No

Question 3:

# Finding what percentage of people have churned
percentage_churn <- nrow(subset(churn_cleaned,Churn=="Yes"))/nrow(churn_cleaned)

25% of people have churned

Question 4:

# Average monthly charges for churned
monthly_charges_churned <- mean(subset(churn_cleaned,Churn=="Yes")$Monthly_Charges)

The average monthly charges for someone who has churned is $75.8

Question 5:

# Average monthly charges for stayed
monthly_charges_stayed <- mean(subset(churn_cleaned,Churn=="No")$Monthly_Charges)

The average monthly charges for somone who stayed is $65.04

Question 6:

# median total charges
total_charges_median <- median(churn_cleaned$Total_Charges)

The median of all total charges is $1425.5