day4 discussion

Author

RL

Set Up

rm(list = ls())
cat("\f")
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(stargazer)

Please cite as: 
 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 
library(visdat)
library(psych)

Import Data

telecom_zipcode_population <- read.csv("C:/Users/rache/Downloads/telecom_zipcode_population.csv")

telecom_customer_churn <- read.csv("C:/Users/rache/Downloads/telecom_customer_churn.csv")
  • Zip.Code is unique to telecom_zipcode_population data frame

  • Zip.Code is not unique to telecom_customer_churn data frame

One to Many merge

telecom_customer_zipcode <-
  merge(x=telecom_zipcode_population,
        y= telecom_customer_churn,
        by.x="Zip.Code",        #data formatted diff.
        by.y="Zip.code")

rm(telecom_customer_churn)
rm(telecom_zipcode_population)

Summary Stats

describe(telecom_customer_zipcode)   # psych package
                                  vars    n     mean       sd   median  trimmed
Zip.Code                             1 7043 93486.07  1856.77 93518.00 93571.63
Population                           2 7043 22139.60 21152.39 17554.00 19503.96
Customer.ID*                         3 7043  3522.00  2033.28  3522.00  3522.00
Gender*                              4 7043     1.50     0.50     2.00     1.51
Age                                  5 7043    46.51    16.75    46.00    46.05
Married*                             6 7043     1.48     0.50     1.00     1.48
Number.of.Dependents                 7 7043     0.47     0.96     0.00     0.23
City*                                8 7043   589.04   307.80   605.00   597.55
Latitude                             9 7043    36.20     2.47    36.21    36.06
Longitude                           10 7043  -119.76     2.15  -119.60  -119.73
Number.of.Referrals                 11 7043     1.95     3.00     0.00     1.32
Tenure.in.Months                    12 7043    32.39    24.54    29.00    31.44
Offer*                              13 7043     2.40     1.83     1.00     2.12
Phone.Service*                      14 7043     1.90     0.30     2.00     2.00
Avg.Monthly.Long.Distance.Charges   15 6361    25.42    14.20    25.69    25.41
Multiple.Lines*                     16 7043     2.33     0.64     2.00     2.40
Internet.Service*                   17 7043     1.78     0.41     2.00     1.85
Internet.Type*                      18 7043     2.88     1.18     3.00     2.97
Avg.Monthly.GB.Download             19 5517    26.19    19.59    21.00    23.57
Online.Security*                    20 7043     2.07     0.71     2.00     2.09
Online.Backup*                      21 7043     2.13     0.74     2.00     2.16
Device.Protection.Plan*             22 7043     2.13     0.74     2.00     2.16
Premium.Tech.Support*               23 7043     2.07     0.71     2.00     2.09
Streaming.TV*                       24 7043     2.17     0.76     2.00     2.21
Streaming.Movies*                   25 7043     2.17     0.76     2.00     2.21
Streaming.Music*                    26 7043     2.14     0.74     2.00     2.17
Unlimited.Data*                     27 7043     2.46     0.83     3.00     2.57
Contract*                           28 7043     1.75     0.85     1.00     1.69
Paperless.Billing*                  29 7043     1.59     0.49     2.00     1.62
Payment.Method*                     30 7043     1.50     0.60     1.00     1.43
Monthly.Charge                      31 7043    63.60    31.20    70.05    64.06
Total.Charges                       32 7043  2280.38  2266.22  1394.55  1966.73
Total.Refunds                       33 7043     1.96     7.90     0.00     0.00
Total.Extra.Data.Charges            34 7043     6.86    25.10     0.00     0.04
Total.Long.Distance.Charges         35 7043   749.10   846.66   401.44   605.63
Total.Revenue                       36 7043  3034.38  2865.20  2108.64  2660.75
Customer.Status*                    37 7043     2.40     0.88     3.00     2.51
Churn.Category*                     38 7043     1.67     1.28     1.00     1.36
Churn.Reason*                       39 7043     2.96     4.42     1.00     1.71
                                       mad      min       max     range  skew
Zip.Code                           2379.57 90001.00  96150.00   6149.00 -0.21
Population                        23471.04    11.00 105285.00 105274.00  0.91
Customer.ID*                       2610.86     1.00   7043.00   7042.00  0.00
Gender*                               0.00     1.00      2.00      1.00 -0.02
Age                                  20.76    19.00     80.00     61.00  0.16
Married*                              0.00     1.00      2.00      1.00  0.07
Number.of.Dependents                  0.00     0.00      9.00      9.00  2.11
City*                               375.10     1.00   1106.00   1105.00 -0.21
Latitude                              3.22    32.56     41.96      9.41  0.31
Longitude                             2.74  -124.30   -114.19     10.11 -0.09
Number.of.Referrals                   0.00     0.00     11.00     11.00  1.45
Tenure.in.Months                     32.62     1.00     72.00     71.00  0.24
Offer*                                0.00     1.00      6.00      5.00  0.92
Phone.Service*                        0.00     1.00      2.00      1.00 -2.73
Avg.Monthly.Long.Distance.Charges    18.22     1.01     49.99     48.98  0.00
Multiple.Lines*                       1.48     1.00      3.00      2.00 -0.42
Internet.Service*                     0.00     1.00      2.00      1.00 -1.38
Internet.Type*                        1.48     1.00      4.00      3.00 -0.55
Avg.Monthly.GB.Download              13.34     2.00     85.00     83.00  1.18
Online.Security*                      1.48     1.00      3.00      2.00 -0.10
Online.Backup*                        1.48     1.00      3.00      2.00 -0.21
Device.Protection.Plan*               1.48     1.00      3.00      2.00 -0.21
Premium.Tech.Support*                 1.48     1.00      3.00      2.00 -0.11
Streaming.TV*                         1.48     1.00      3.00      2.00 -0.29
Streaming.Movies*                     1.48     1.00      3.00      2.00 -0.30
Streaming.Music*                      1.48     1.00      3.00      2.00 -0.22
Unlimited.Data*                       0.00     1.00      3.00      2.00 -1.02
Contract*                             0.00     1.00      3.00      2.00  0.49
Paperless.Billing*                    0.00     1.00      2.00      1.00 -0.38
Payment.Method*                       0.00     1.00      3.00      2.00  0.76
Monthly.Charge                       36.32   -10.00    118.75    128.75 -0.28
Total.Charges                      1808.40    18.80   8684.80   8666.00  0.96
Total.Refunds                         0.00     0.00     49.79     49.79  4.33
Total.Extra.Data.Charges              0.00     0.00    150.00    150.00  4.09
Total.Long.Distance.Charges         566.53     0.00   3564.72   3564.72  1.24
Total.Revenue                      2620.66    21.36  11979.34  11957.98  0.92
Customer.Status*                      0.00     1.00      3.00      2.00 -0.88
Churn.Category*                       0.00     1.00      6.00      5.00  1.91
Churn.Reason*                         0.00     1.00     21.00     20.00  2.65
                                  kurtosis     se
Zip.Code                             -1.17  22.12
Population                            0.32 252.05
Customer.ID*                         -1.20  24.23
Gender*                              -2.00   0.01
Age                                  -1.00   0.20
Married*                             -2.00   0.01
Number.of.Dependents                  4.44   0.01
City*                                -1.13   3.67
Latitude                             -1.16   0.03
Longitude                            -1.19   0.03
Number.of.Referrals                   0.72   0.04
Tenure.in.Months                     -1.39   0.29
Offer*                               -0.70   0.02
Phone.Service*                        5.43   0.00
Avg.Monthly.Long.Distance.Charges    -1.21   0.18
Multiple.Lines*                      -0.71   0.01
Internet.Service*                    -0.11   0.00
Internet.Type*                       -1.24   0.01
Avg.Monthly.GB.Download               0.63   0.26
Online.Security*                     -0.99   0.01
Online.Backup*                       -1.15   0.01
Device.Protection.Plan*              -1.15   0.01
Premium.Tech.Support*                -1.01   0.01
Streaming.TV*                        -1.21   0.01
Streaming.Movies*                    -1.21   0.01
Streaming.Music*                     -1.16   0.01
Unlimited.Data*                      -0.76   0.01
Contract*                            -1.44   0.01
Paperless.Billing*                   -1.86   0.01
Payment.Method*                      -0.40   0.01
Monthly.Charge                       -1.13   0.37
Total.Charges                        -0.23  27.00
Total.Refunds                        18.33   0.09
Total.Extra.Data.Charges             16.44   0.30
Total.Long.Distance.Charges           0.64  10.09
Total.Revenue                        -0.20  34.14
Customer.Status*                     -1.12   0.01
Churn.Category*                       2.76   0.02
Churn.Reason*                         6.36   0.05
stargazer(telecom_customer_zipcode,  # stargazer package
          type = "text")

=================================================================================
Statistic                           N      Mean     St. Dev.    Min       Max    
---------------------------------------------------------------------------------
Zip.Code                          7,043 93,486.070 1,856.768   90,001    96,150  
Population                        7,043 22,139.600 21,152.390    11     105,285  
Age                               7,043   46.510     16.750      19        80    
Number.of.Dependents              7,043   0.469      0.963       0         9     
Latitude                          7,043   36.197     2.469     32.556    41.962  
Longitude                         7,043  -119.757    2.154    -124.301  -114.193 
Number.of.Referrals               7,043   1.952      3.001       0         11    
Tenure.in.Months                  7,043   32.387     24.542      1         72    
Avg.Monthly.Long.Distance.Charges 6,361   25.421     14.200    1.010     49.990  
Avg.Monthly.GB.Download           5,517   26.190     19.587      2         85    
Monthly.Charge                    7,043   63.596     31.205   -10.000   118.750  
Total.Charges                     7,043 2,280.381  2,266.220   18.800  8,684.800 
Total.Refunds                     7,043   1.962      7.903     0.000     49.790  
Total.Extra.Data.Charges          7,043   6.861      25.105      0        150    
Total.Long.Distance.Charges       7,043  749.099    846.660    0.000   3,564.720 
Total.Revenue                     7,043 3,034.379  2,865.205   21.360  11,979.340
---------------------------------------------------------------------------------
summary(telecom_customer_zipcode)    # base R 
    Zip.Code       Population     Customer.ID           Gender         
 Min.   :90001   Min.   :    11   Length:7043        Length:7043       
 1st Qu.:92101   1st Qu.:  2344   Class :character   Class :character  
 Median :93518   Median : 17554   Mode  :character   Mode  :character  
 Mean   :93486   Mean   : 22140                                        
 3rd Qu.:95329   3rd Qu.: 36125                                        
 Max.   :96150   Max.   :105285                                        
                                                                       
      Age          Married          Number.of.Dependents     City          
 Min.   :19.00   Length:7043        Min.   :0.0000       Length:7043       
 1st Qu.:32.00   Class :character   1st Qu.:0.0000       Class :character  
 Median :46.00   Mode  :character   Median :0.0000       Mode  :character  
 Mean   :46.51                      Mean   :0.4687                         
 3rd Qu.:60.00                      3rd Qu.:0.0000                         
 Max.   :80.00                      Max.   :9.0000                         
                                                                           
    Latitude       Longitude      Number.of.Referrals Tenure.in.Months
 Min.   :32.56   Min.   :-124.3   Min.   : 0.000      Min.   : 1.00   
 1st Qu.:33.99   1st Qu.:-121.8   1st Qu.: 0.000      1st Qu.: 9.00   
 Median :36.21   Median :-119.6   Median : 0.000      Median :29.00   
 Mean   :36.20   Mean   :-119.8   Mean   : 1.952      Mean   :32.39   
 3rd Qu.:38.16   3rd Qu.:-118.0   3rd Qu.: 3.000      3rd Qu.:55.00   
 Max.   :41.96   Max.   :-114.2   Max.   :11.000      Max.   :72.00   
                                                                      
    Offer           Phone.Service      Avg.Monthly.Long.Distance.Charges
 Length:7043        Length:7043        Min.   : 1.01                    
 Class :character   Class :character   1st Qu.:13.05                    
 Mode  :character   Mode  :character   Median :25.69                    
                                       Mean   :25.42                    
                                       3rd Qu.:37.68                    
                                       Max.   :49.99                    
                                       NA's   :682                      
 Multiple.Lines     Internet.Service   Internet.Type     
 Length:7043        Length:7043        Length:7043       
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
                                                         
 Avg.Monthly.GB.Download Online.Security    Online.Backup     
 Min.   : 2.00           Length:7043        Length:7043       
 1st Qu.:13.00           Class :character   Class :character  
 Median :21.00           Mode  :character   Mode  :character  
 Mean   :26.19                                                
 3rd Qu.:30.00                                                
 Max.   :85.00                                                
 NA's   :1526                                                 
 Device.Protection.Plan Premium.Tech.Support Streaming.TV      
 Length:7043            Length:7043          Length:7043       
 Class :character       Class :character     Class :character  
 Mode  :character       Mode  :character     Mode  :character  
                                                               
                                                               
                                                               
                                                               
 Streaming.Movies   Streaming.Music    Unlimited.Data       Contract        
 Length:7043        Length:7043        Length:7043        Length:7043       
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
                                                                            
 Paperless.Billing  Payment.Method     Monthly.Charge   Total.Charges   
 Length:7043        Length:7043        Min.   :-10.00   Min.   :  18.8  
 Class :character   Class :character   1st Qu.: 30.40   1st Qu.: 400.1  
 Mode  :character   Mode  :character   Median : 70.05   Median :1394.5  
                                       Mean   : 63.60   Mean   :2280.4  
                                       3rd Qu.: 89.75   3rd Qu.:3786.6  
                                       Max.   :118.75   Max.   :8684.8  
                                                                        
 Total.Refunds    Total.Extra.Data.Charges Total.Long.Distance.Charges
 Min.   : 0.000   Min.   :  0.000          Min.   :   0.00            
 1st Qu.: 0.000   1st Qu.:  0.000          1st Qu.:  70.55            
 Median : 0.000   Median :  0.000          Median : 401.44            
 Mean   : 1.962   Mean   :  6.861          Mean   : 749.10            
 3rd Qu.: 0.000   3rd Qu.:  0.000          3rd Qu.:1191.10            
 Max.   :49.790   Max.   :150.000          Max.   :3564.72            
                                                                      
 Total.Revenue      Customer.Status    Churn.Category     Churn.Reason      
 Min.   :   21.36   Length:7043        Length:7043        Length:7043       
 1st Qu.:  605.61   Class :character   Class :character   Class :character  
 Median : 2108.64   Mode  :character   Mode  :character   Mode  :character  
 Mean   : 3034.38                                                           
 3rd Qu.: 4801.15                                                           
 Max.   :11979.34                                                           
                                                                            

Table

library(dplyr)


df <- telecom_customer_zipcode

df_summary <- df |>
  group_by(Zip.Code) |>
  summarise(
    Avg_Population = mean(Population, na.rm = TRUE),
    Avg_Age = median(Age, na.rm = TRUE)
  )

df_summary
# A tibble: 1,626 × 3
   Zip.Code Avg_Population Avg_Age
      <int>          <dbl>   <dbl>
 1    90001          54492    51  
 2    90002          44586    51  
 3    90003          58198    49  
 4    90004          67852    44  
 5    90005          43019    42.5
 6    90006          62784    41  
 7    90007          45025    42  
 8    90008          30852    42  
 9    90010           1957    26  
10    90011         101215    30  
# ℹ 1,616 more rows
?summarise
starting httpd help server ... done