<- read.csv("telecom_data_dictionary.csv", stringsAsFactors = FALSE)
telecom_data_dictionary <- read.csv("telecom_customer_churn.csv", stringsAsFactors = FALSE)
telecom_customer_churn <- read.csv("telecom_zipcode_population.csv", stringsAsFactors = FALSE) telecom_zipcode_population
Day 4 HW
names(telecom_customer_churn)
[1] "Customer.ID" "Gender"
[3] "Age" "Married"
[5] "Number.of.Dependents" "City"
[7] "Zip.code" "Latitude"
[9] "Longitude" "Number.of.Referrals"
[11] "Tenure.in.Months" "Offer"
[13] "Phone.Service" "Avg.Monthly.Long.Distance.Charges"
[15] "Multiple.Lines" "Internet.Service"
[17] "Internet.Type" "Avg.Monthly.GB.Download"
[19] "Online.Security" "Online.Backup"
[21] "Device.Protection.Plan" "Premium.Tech.Support"
[23] "Streaming.TV" "Streaming.Movies"
[25] "Streaming.Music" "Unlimited.Data"
[27] "Contract" "Paperless.Billing"
[29] "Payment.Method" "Monthly.Charge"
[31] "Total.Charges" "Total.Refunds"
[33] "Total.Extra.Data.Charges" "Total.Long.Distance.Charges"
[35] "Total.Revenue" "Customer.Status"
[37] "Churn.Category" "Churn.Reason"
names(telecom_zipcode_population)
[1] "Zip.Code" "Population"
<- intersect(
common_key names(telecom_customer_churn),
names(telecom_zipcode_population)
)
<- merge(
telecom_merged x = telecom_customer_churn,
y = telecom_zipcode_population,
by = common_key,
all.x = TRUE
)
summary(telecom_merged)
Customer.ID Gender Age Married
Length:11768853 Length:11768853 Min. :19.00 Length:11768853
Class :character Class :character 1st Qu.:32.00 Class :character
Mode :character Mode :character Median :46.00 Mode :character
Mean :46.51
3rd Qu.:60.00
Max. :80.00
Number.of.Dependents City Zip.code Latitude
Min. :0.0000 Length:11768853 Min. :90001 Min. :32.56
1st Qu.:0.0000 Class :character 1st Qu.:92101 1st Qu.:33.99
Median :0.0000 Mode :character Median :93518 Median :36.21
Mean :0.4687 Mean :93486 Mean :36.20
3rd Qu.:0.0000 3rd Qu.:95329 3rd Qu.:38.16
Max. :9.0000 Max. :96150 Max. :41.96
Longitude Number.of.Referrals Tenure.in.Months Offer
Min. :-124.3 Min. : 0.000 Min. : 1.00 Length:11768853
1st Qu.:-121.8 1st Qu.: 0.000 1st Qu.: 9.00 Class :character
Median :-119.6 Median : 0.000 Median :29.00 Mode :character
Mean :-119.8 Mean : 1.952 Mean :32.39
3rd Qu.:-118.0 3rd Qu.: 3.000 3rd Qu.:55.00
Max. :-114.2 Max. :11.000 Max. :72.00
Phone.Service Avg.Monthly.Long.Distance.Charges Multiple.Lines
Length:11768853 Min. : 1.01 Length:11768853
Class :character 1st Qu.:13.05 Class :character
Mode :character Median :25.69 Mode :character
Mean :25.42
3rd Qu.:37.68
Max. :49.99
NA's :1139622
Internet.Service Internet.Type Avg.Monthly.GB.Download
Length:11768853 Length:11768853 Min. : 2.00
Class :character Class :character 1st Qu.:13.00
Mode :character Mode :character Median :21.00
Mean :26.19
3rd Qu.:30.00
Max. :85.00
NA's :2549946
Online.Security Online.Backup Device.Protection.Plan
Length:11768853 Length:11768853 Length:11768853
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
Premium.Tech.Support Streaming.TV Streaming.Movies Streaming.Music
Length:11768853 Length:11768853 Length:11768853 Length:11768853
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
Unlimited.Data Contract Paperless.Billing Payment.Method
Length:11768853 Length:11768853 Length:11768853 Length:11768853
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
Monthly.Charge Total.Charges Total.Refunds Total.Extra.Data.Charges
Min. :-10.00 Min. : 18.8 Min. : 0.000 Min. : 0.000
1st Qu.: 30.40 1st Qu.: 400.0 1st Qu.: 0.000 1st Qu.: 0.000
Median : 70.05 Median :1394.5 Median : 0.000 Median : 0.000
Mean : 63.60 Mean :2280.4 Mean : 1.962 Mean : 6.861
3rd Qu.: 89.75 3rd Qu.:3789.2 3rd Qu.: 0.000 3rd Qu.: 0.000
Max. :118.75 Max. :8684.8 Max. :49.790 Max. :150.000
Total.Long.Distance.Charges Total.Revenue Customer.Status
Min. : 0.0 Min. : 21.36 Length:11768853
1st Qu.: 70.5 1st Qu.: 605.47 Class :character
Median : 401.4 Median : 2108.64 Mode :character
Mean : 749.1 Mean : 3034.38
3rd Qu.:1191.5 3rd Qu.: 4801.93
Max. :3564.7 Max. :11979.34
Churn.Category Churn.Reason Zip.Code Population
Length:11768853 Length:11768853 Min. :90001 Min. : 11
Class :character Class :character 1st Qu.:92268 1st Qu.: 1785
Mode :character Mode :character Median :93664 Median : 14239
Mean :93679 Mean : 20276
3rd Qu.:95409 3rd Qu.: 32984
Max. :96161 Max. :105285
<- telecom_merged %>%
avg_table group_by(Zip.Code) %>%
summarise(
Avg_Population = mean(Population, na.rm = TRUE),
Avg_Age = mean(Age, na.rm = TRUE)
%>%
) arrange(Zip.Code)
print(avg_table)
# A tibble: 1,671 × 3
Zip.Code Avg_Population Avg_Age
<int> <dbl> <dbl>
1 90001 54492 46.5
2 90002 44586 46.5
3 90003 58198 46.5
4 90004 67852 46.5
5 90005 43019 46.5
6 90006 62784 46.5
7 90007 45025 46.5
8 90008 30852 46.5
9 90010 1957 46.5
10 90011 101215 46.5
# ℹ 1,661 more rows