Day 4 Assignment

Author

Moon C

Setup

remove(list = ls())
library(psych)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

Import Data

telecom_customer_churn <- read.csv("~/Downloads/telecom_customer_churn.csv")

telecom_zipcode_population <- read.csv("~/Downloads/telecom_zipcode_population.csv")

Unique ID

?unique
length(unique(telecom_customer_churn$Customer.ID))
[1] 7043
length(unique(telecom_zipcode_population$Zip.Code))
[1] 1671
  • The unique ID for telecom_customer_churn is Customer.ID

  • The unique ID for telecom_zipcode_population is Zip.Code

Merge Data

?merge
telecom_customer_zipcode <- 
  merge(x = telecom_zipcode_population,    # unique
        y = telecom_customer_churn,        # many
     by.x = "Zip.Code", 
     by.y = "Zip.code")                    # one to many

remove(telecom_customer_churn)
remove(telecom_zipcode_population)

Summary Stats

?describe
describe(telecom_customer_zipcode)
                                  vars    n     mean       sd   median  trimmed
Zip.Code                             1 7043 93486.07  1856.77 93518.00 93571.63
Population                           2 7043 22139.60 21152.39 17554.00 19503.96
Customer.ID*                         3 7043  3522.00  2033.28  3522.00  3522.00
Gender*                              4 7043     1.50     0.50     2.00     1.51
Age                                  5 7043    46.51    16.75    46.00    46.05
Married*                             6 7043     1.48     0.50     1.00     1.48
Number.of.Dependents                 7 7043     0.47     0.96     0.00     0.23
City*                                8 7043   589.04   307.80   605.00   597.55
Latitude                             9 7043    36.20     2.47    36.21    36.06
Longitude                           10 7043  -119.76     2.15  -119.60  -119.73
Number.of.Referrals                 11 7043     1.95     3.00     0.00     1.32
Tenure.in.Months                    12 7043    32.39    24.54    29.00    31.44
Offer*                              13 7043     2.40     1.83     1.00     2.12
Phone.Service*                      14 7043     1.90     0.30     2.00     2.00
Avg.Monthly.Long.Distance.Charges   15 6361    25.42    14.20    25.69    25.41
Multiple.Lines*                     16 7043     2.33     0.64     2.00     2.40
Internet.Service*                   17 7043     1.78     0.41     2.00     1.85
Internet.Type*                      18 7043     2.88     1.18     3.00     2.97
Avg.Monthly.GB.Download             19 5517    26.19    19.59    21.00    23.57
Online.Security*                    20 7043     2.07     0.71     2.00     2.09
Online.Backup*                      21 7043     2.13     0.74     2.00     2.16
Device.Protection.Plan*             22 7043     2.13     0.74     2.00     2.16
Premium.Tech.Support*               23 7043     2.07     0.71     2.00     2.09
Streaming.TV*                       24 7043     2.17     0.76     2.00     2.21
Streaming.Movies*                   25 7043     2.17     0.76     2.00     2.21
Streaming.Music*                    26 7043     2.14     0.74     2.00     2.17
Unlimited.Data*                     27 7043     2.46     0.83     3.00     2.57
Contract*                           28 7043     1.75     0.85     1.00     1.69
Paperless.Billing*                  29 7043     1.59     0.49     2.00     1.62
Payment.Method*                     30 7043     1.50     0.60     1.00     1.43
Monthly.Charge                      31 7043    63.60    31.20    70.05    64.06
Total.Charges                       32 7043  2280.38  2266.22  1394.55  1966.73
Total.Refunds                       33 7043     1.96     7.90     0.00     0.00
Total.Extra.Data.Charges            34 7043     6.86    25.10     0.00     0.04
Total.Long.Distance.Charges         35 7043   749.10   846.66   401.44   605.63
Total.Revenue                       36 7043  3034.38  2865.20  2108.64  2660.75
Customer.Status*                    37 7043     2.40     0.88     3.00     2.51
Churn.Category*                     38 7043     1.67     1.28     1.00     1.36
Churn.Reason*                       39 7043     2.96     4.42     1.00     1.71
                                       mad      min       max     range  skew
Zip.Code                           2379.57 90001.00  96150.00   6149.00 -0.21
Population                        23471.04    11.00 105285.00 105274.00  0.91
Customer.ID*                       2610.86     1.00   7043.00   7042.00  0.00
Gender*                               0.00     1.00      2.00      1.00 -0.02
Age                                  20.76    19.00     80.00     61.00  0.16
Married*                              0.00     1.00      2.00      1.00  0.07
Number.of.Dependents                  0.00     0.00      9.00      9.00  2.11
City*                               375.10     1.00   1106.00   1105.00 -0.21
Latitude                              3.22    32.56     41.96      9.41  0.31
Longitude                             2.74  -124.30   -114.19     10.11 -0.09
Number.of.Referrals                   0.00     0.00     11.00     11.00  1.45
Tenure.in.Months                     32.62     1.00     72.00     71.00  0.24
Offer*                                0.00     1.00      6.00      5.00  0.92
Phone.Service*                        0.00     1.00      2.00      1.00 -2.73
Avg.Monthly.Long.Distance.Charges    18.22     1.01     49.99     48.98  0.00
Multiple.Lines*                       1.48     1.00      3.00      2.00 -0.42
Internet.Service*                     0.00     1.00      2.00      1.00 -1.38
Internet.Type*                        1.48     1.00      4.00      3.00 -0.55
Avg.Monthly.GB.Download              13.34     2.00     85.00     83.00  1.18
Online.Security*                      1.48     1.00      3.00      2.00 -0.10
Online.Backup*                        1.48     1.00      3.00      2.00 -0.21
Device.Protection.Plan*               1.48     1.00      3.00      2.00 -0.21
Premium.Tech.Support*                 1.48     1.00      3.00      2.00 -0.11
Streaming.TV*                         1.48     1.00      3.00      2.00 -0.29
Streaming.Movies*                     1.48     1.00      3.00      2.00 -0.30
Streaming.Music*                      1.48     1.00      3.00      2.00 -0.22
Unlimited.Data*                       0.00     1.00      3.00      2.00 -1.02
Contract*                             0.00     1.00      3.00      2.00  0.49
Paperless.Billing*                    0.00     1.00      2.00      1.00 -0.38
Payment.Method*                       0.00     1.00      3.00      2.00  0.76
Monthly.Charge                       36.32   -10.00    118.75    128.75 -0.28
Total.Charges                      1808.40    18.80   8684.80   8666.00  0.96
Total.Refunds                         0.00     0.00     49.79     49.79  4.33
Total.Extra.Data.Charges              0.00     0.00    150.00    150.00  4.09
Total.Long.Distance.Charges         566.53     0.00   3564.72   3564.72  1.24
Total.Revenue                      2620.66    21.36  11979.34  11957.98  0.92
Customer.Status*                      0.00     1.00      3.00      2.00 -0.88
Churn.Category*                       0.00     1.00      6.00      5.00  1.91
Churn.Reason*                         0.00     1.00     21.00     20.00  2.65
                                  kurtosis     se
Zip.Code                             -1.17  22.12
Population                            0.32 252.05
Customer.ID*                         -1.20  24.23
Gender*                              -2.00   0.01
Age                                  -1.00   0.20
Married*                             -2.00   0.01
Number.of.Dependents                  4.44   0.01
City*                                -1.13   3.67
Latitude                             -1.16   0.03
Longitude                            -1.19   0.03
Number.of.Referrals                   0.72   0.04
Tenure.in.Months                     -1.39   0.29
Offer*                               -0.70   0.02
Phone.Service*                        5.43   0.00
Avg.Monthly.Long.Distance.Charges    -1.21   0.18
Multiple.Lines*                      -0.71   0.01
Internet.Service*                    -0.11   0.00
Internet.Type*                       -1.24   0.01
Avg.Monthly.GB.Download               0.63   0.26
Online.Security*                     -0.99   0.01
Online.Backup*                       -1.15   0.01
Device.Protection.Plan*              -1.15   0.01
Premium.Tech.Support*                -1.01   0.01
Streaming.TV*                        -1.21   0.01
Streaming.Movies*                    -1.21   0.01
Streaming.Music*                     -1.16   0.01
Unlimited.Data*                      -0.76   0.01
Contract*                            -1.44   0.01
Paperless.Billing*                   -1.86   0.01
Payment.Method*                      -0.40   0.01
Monthly.Charge                       -1.13   0.37
Total.Charges                        -0.23  27.00
Total.Refunds                        18.33   0.09
Total.Extra.Data.Charges             16.44   0.30
Total.Long.Distance.Charges           0.64  10.09
Total.Revenue                        -0.20  34.14
Customer.Status*                     -1.12   0.01
Churn.Category*                       2.76   0.02
Churn.Reason*                         6.36   0.05

Table

?dplyr
?summarise
?group_by

df <- telecom_customer_zipcode

df_summary <- df |>
  group_by(Zip.Code) |>
  summarise(Avg_Population = mean(Population, na.rm = TRUE),
            Avg_Age = median(Age, na.rm = TRUE)
            )

df_summary
# A tibble: 1,626 × 3
   Zip.Code Avg_Population Avg_Age
      <int>          <dbl>   <dbl>
 1    90001          54492    51  
 2    90002          44586    51  
 3    90003          58198    49  
 4    90004          67852    44  
 5    90005          43019    42.5
 6    90006          62784    41  
 7    90007          45025    42  
 8    90008          30852    42  
 9    90010           1957    26  
10    90011         101215    30  
# ℹ 1,616 more rows