Day 4 disscusion

Author

josh hong

zipcode <- read.csv("~/Downloads/telecom_zipcode_population.csv")

dict <- read.csv("~/Downloads/telecom_data_dictionary.csv")
churn <- read.csv("~/Downloads/telecom_customer_churn.csv")
zipcode <- read.csv("~/Downloads/telecom_zipcode_population.csv")

length(unique(churn$Zip.code))     # likely unique in churn data

[1] 1626

length(unique(zipcode$Zip.Code))

[1] 1671

zipcode_churn <- merge(x = zipcode,y = churn, by.x = "Zip.Code", by.y= "Zip.code")

The merge was one to many

library(stargazer)


Please cite as:

 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.

 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

stargazer(zipcode_churn,
          type = "text")


=================================================================================
Statistic                           N      Mean     St. Dev.    Min       Max    
---------------------------------------------------------------------------------
Zip.Code                          7,043 93,486.070 1,856.768   90,001    96,150  
Population                        7,043 22,139.600 21,152.390    11     105,285  
Age                               7,043   46.510     16.750      19        80    
Number.of.Dependents              7,043   0.469      0.963       0         9     
Latitude                          7,043   36.197     2.469     32.556    41.962  
Longitude                         7,043  -119.757    2.154    -124.301  -114.193 
Number.of.Referrals               7,043   1.952      3.001       0         11    
Tenure.in.Months                  7,043   32.387     24.542      1         72    
Avg.Monthly.Long.Distance.Charges 6,361   25.421     14.200    1.010     49.990  
Avg.Monthly.GB.Download           5,517   26.190     19.587      2         85    
Monthly.Charge                    7,043   63.596     31.205   -10.000   118.750  
Total.Charges                     7,043 2,280.381  2,266.220   18.800  8,684.800 
Total.Refunds                     7,043   1.962      7.903     0.000     49.790  
Total.Extra.Data.Charges          7,043   6.861      25.105      0        150    
Total.Long.Distance.Charges       7,043  749.099    846.660    0.000   3,564.720 
Total.Revenue                     7,043 3,034.379  2,865.205   21.360  11,979.340
---------------------------------------------------------------------------------

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

zipcode_churn |>
group_by(Zip.Code) |>
  summarise(Avg_Population = mean(Population, na.rm = TRUE),
            Avg_Age = median(Age, na.rm = TRUE)
            )

# A tibble: 1,626 × 3
   Zip.Code Avg_Population Avg_Age
      <int>          <dbl>   <dbl>
 1    90001          54492    51  
 2    90002          44586    51  
 3    90003          58198    49  
 4    90004          67852    44  
 5    90005          43019    42.5
 6    90006          62784    41  
 7    90007          45025    42  
 8    90008          30852    42  
 9    90010           1957    26  
10    90011         101215    30  
# ℹ 1,616 more rows