remove(list = ls())
library(psych)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
remove(list = ls())
library(psych)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
<- read.csv("~/Downloads/telecom_customer_churn.csv")
telecom_customer_churn
<- read.csv("~/Downloads/telecom_zipcode_population.csv") telecom_zipcode_population
?uniquelength(unique(telecom_customer_churn$Customer.ID))
[1] 7043
length(unique(telecom_zipcode_population$Zip.Code))
[1] 1671
The unique ID for telecom_customer_churn
is Customer.ID
The unique ID for telecom_zipcode_population
is Zip.Code
?merge<-
telecom_customer_zipcode merge(x = telecom_zipcode_population, # unique
y = telecom_customer_churn, # many
by.x = "Zip.Code",
by.y = "Zip.code") # one to many
remove(telecom_customer_churn)
remove(telecom_zipcode_population)
?describedescribe(telecom_customer_zipcode)
vars n mean sd median trimmed
Zip.Code 1 7043 93486.07 1856.77 93518.00 93571.63
Population 2 7043 22139.60 21152.39 17554.00 19503.96
Customer.ID* 3 7043 3522.00 2033.28 3522.00 3522.00
Gender* 4 7043 1.50 0.50 2.00 1.51
Age 5 7043 46.51 16.75 46.00 46.05
Married* 6 7043 1.48 0.50 1.00 1.48
Number.of.Dependents 7 7043 0.47 0.96 0.00 0.23
City* 8 7043 589.04 307.80 605.00 597.55
Latitude 9 7043 36.20 2.47 36.21 36.06
Longitude 10 7043 -119.76 2.15 -119.60 -119.73
Number.of.Referrals 11 7043 1.95 3.00 0.00 1.32
Tenure.in.Months 12 7043 32.39 24.54 29.00 31.44
Offer* 13 7043 2.40 1.83 1.00 2.12
Phone.Service* 14 7043 1.90 0.30 2.00 2.00
Avg.Monthly.Long.Distance.Charges 15 6361 25.42 14.20 25.69 25.41
Multiple.Lines* 16 7043 2.33 0.64 2.00 2.40
Internet.Service* 17 7043 1.78 0.41 2.00 1.85
Internet.Type* 18 7043 2.88 1.18 3.00 2.97
Avg.Monthly.GB.Download 19 5517 26.19 19.59 21.00 23.57
Online.Security* 20 7043 2.07 0.71 2.00 2.09
Online.Backup* 21 7043 2.13 0.74 2.00 2.16
Device.Protection.Plan* 22 7043 2.13 0.74 2.00 2.16
Premium.Tech.Support* 23 7043 2.07 0.71 2.00 2.09
Streaming.TV* 24 7043 2.17 0.76 2.00 2.21
Streaming.Movies* 25 7043 2.17 0.76 2.00 2.21
Streaming.Music* 26 7043 2.14 0.74 2.00 2.17
Unlimited.Data* 27 7043 2.46 0.83 3.00 2.57
Contract* 28 7043 1.75 0.85 1.00 1.69
Paperless.Billing* 29 7043 1.59 0.49 2.00 1.62
Payment.Method* 30 7043 1.50 0.60 1.00 1.43
Monthly.Charge 31 7043 63.60 31.20 70.05 64.06
Total.Charges 32 7043 2280.38 2266.22 1394.55 1966.73
Total.Refunds 33 7043 1.96 7.90 0.00 0.00
Total.Extra.Data.Charges 34 7043 6.86 25.10 0.00 0.04
Total.Long.Distance.Charges 35 7043 749.10 846.66 401.44 605.63
Total.Revenue 36 7043 3034.38 2865.20 2108.64 2660.75
Customer.Status* 37 7043 2.40 0.88 3.00 2.51
Churn.Category* 38 7043 1.67 1.28 1.00 1.36
Churn.Reason* 39 7043 2.96 4.42 1.00 1.71
mad min max range skew
Zip.Code 2379.57 90001.00 96150.00 6149.00 -0.21
Population 23471.04 11.00 105285.00 105274.00 0.91
Customer.ID* 2610.86 1.00 7043.00 7042.00 0.00
Gender* 0.00 1.00 2.00 1.00 -0.02
Age 20.76 19.00 80.00 61.00 0.16
Married* 0.00 1.00 2.00 1.00 0.07
Number.of.Dependents 0.00 0.00 9.00 9.00 2.11
City* 375.10 1.00 1106.00 1105.00 -0.21
Latitude 3.22 32.56 41.96 9.41 0.31
Longitude 2.74 -124.30 -114.19 10.11 -0.09
Number.of.Referrals 0.00 0.00 11.00 11.00 1.45
Tenure.in.Months 32.62 1.00 72.00 71.00 0.24
Offer* 0.00 1.00 6.00 5.00 0.92
Phone.Service* 0.00 1.00 2.00 1.00 -2.73
Avg.Monthly.Long.Distance.Charges 18.22 1.01 49.99 48.98 0.00
Multiple.Lines* 1.48 1.00 3.00 2.00 -0.42
Internet.Service* 0.00 1.00 2.00 1.00 -1.38
Internet.Type* 1.48 1.00 4.00 3.00 -0.55
Avg.Monthly.GB.Download 13.34 2.00 85.00 83.00 1.18
Online.Security* 1.48 1.00 3.00 2.00 -0.10
Online.Backup* 1.48 1.00 3.00 2.00 -0.21
Device.Protection.Plan* 1.48 1.00 3.00 2.00 -0.21
Premium.Tech.Support* 1.48 1.00 3.00 2.00 -0.11
Streaming.TV* 1.48 1.00 3.00 2.00 -0.29
Streaming.Movies* 1.48 1.00 3.00 2.00 -0.30
Streaming.Music* 1.48 1.00 3.00 2.00 -0.22
Unlimited.Data* 0.00 1.00 3.00 2.00 -1.02
Contract* 0.00 1.00 3.00 2.00 0.49
Paperless.Billing* 0.00 1.00 2.00 1.00 -0.38
Payment.Method* 0.00 1.00 3.00 2.00 0.76
Monthly.Charge 36.32 -10.00 118.75 128.75 -0.28
Total.Charges 1808.40 18.80 8684.80 8666.00 0.96
Total.Refunds 0.00 0.00 49.79 49.79 4.33
Total.Extra.Data.Charges 0.00 0.00 150.00 150.00 4.09
Total.Long.Distance.Charges 566.53 0.00 3564.72 3564.72 1.24
Total.Revenue 2620.66 21.36 11979.34 11957.98 0.92
Customer.Status* 0.00 1.00 3.00 2.00 -0.88
Churn.Category* 0.00 1.00 6.00 5.00 1.91
Churn.Reason* 0.00 1.00 21.00 20.00 2.65
kurtosis se
Zip.Code -1.17 22.12
Population 0.32 252.05
Customer.ID* -1.20 24.23
Gender* -2.00 0.01
Age -1.00 0.20
Married* -2.00 0.01
Number.of.Dependents 4.44 0.01
City* -1.13 3.67
Latitude -1.16 0.03
Longitude -1.19 0.03
Number.of.Referrals 0.72 0.04
Tenure.in.Months -1.39 0.29
Offer* -0.70 0.02
Phone.Service* 5.43 0.00
Avg.Monthly.Long.Distance.Charges -1.21 0.18
Multiple.Lines* -0.71 0.01
Internet.Service* -0.11 0.00
Internet.Type* -1.24 0.01
Avg.Monthly.GB.Download 0.63 0.26
Online.Security* -0.99 0.01
Online.Backup* -1.15 0.01
Device.Protection.Plan* -1.15 0.01
Premium.Tech.Support* -1.01 0.01
Streaming.TV* -1.21 0.01
Streaming.Movies* -1.21 0.01
Streaming.Music* -1.16 0.01
Unlimited.Data* -0.76 0.01
Contract* -1.44 0.01
Paperless.Billing* -1.86 0.01
Payment.Method* -0.40 0.01
Monthly.Charge -1.13 0.37
Total.Charges -0.23 27.00
Total.Refunds 18.33 0.09
Total.Extra.Data.Charges 16.44 0.30
Total.Long.Distance.Charges 0.64 10.09
Total.Revenue -0.20 34.14
Customer.Status* -1.12 0.01
Churn.Category* 2.76 0.02
Churn.Reason* 6.36 0.05
?dplyr
?summarise
?group_by
<- telecom_customer_zipcode
df
<- df |>
df_summary group_by(Zip.Code) |>
summarise(Avg_Population = mean(Population, na.rm = TRUE),
Avg_Age = median(Age, na.rm = TRUE)
)
df_summary
# A tibble: 1,626 × 3
Zip.Code Avg_Population Avg_Age
<int> <dbl> <dbl>
1 90001 54492 51
2 90002 44586 51
3 90003 58198 49
4 90004 67852 44
5 90005 43019 42.5
6 90006 62784 41
7 90007 45025 42
8 90008 30852 42
9 90010 1957 26
10 90011 101215 30
# ℹ 1,616 more rows