<- read.csv("~/Downloads/telecom_zipcode_population.csv") zipcode
Day 4 disscusion
<- read.csv("~/Downloads/telecom_data_dictionary.csv")
dict <- read.csv("~/Downloads/telecom_customer_churn.csv")
churn <- read.csv("~/Downloads/telecom_zipcode_population.csv") zipcode
length(unique(churn$Zip.code)) # likely unique in churn data
[1] 1626
length(unique(zipcode$Zip.Code))
[1] 1671
<- merge(x = zipcode,y = churn, by.x = "Zip.Code", by.y= "Zip.code") zipcode_churn
The merge was one to many
library(stargazer)
Please cite as:
Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
stargazer(zipcode_churn,
type = "text")
=================================================================================
Statistic N Mean St. Dev. Min Max
---------------------------------------------------------------------------------
Zip.Code 7,043 93,486.070 1,856.768 90,001 96,150
Population 7,043 22,139.600 21,152.390 11 105,285
Age 7,043 46.510 16.750 19 80
Number.of.Dependents 7,043 0.469 0.963 0 9
Latitude 7,043 36.197 2.469 32.556 41.962
Longitude 7,043 -119.757 2.154 -124.301 -114.193
Number.of.Referrals 7,043 1.952 3.001 0 11
Tenure.in.Months 7,043 32.387 24.542 1 72
Avg.Monthly.Long.Distance.Charges 6,361 25.421 14.200 1.010 49.990
Avg.Monthly.GB.Download 5,517 26.190 19.587 2 85
Monthly.Charge 7,043 63.596 31.205 -10.000 118.750
Total.Charges 7,043 2,280.381 2,266.220 18.800 8,684.800
Total.Refunds 7,043 1.962 7.903 0.000 49.790
Total.Extra.Data.Charges 7,043 6.861 25.105 0 150
Total.Long.Distance.Charges 7,043 749.099 846.660 0.000 3,564.720
Total.Revenue 7,043 3,034.379 2,865.205 21.360 11,979.340
---------------------------------------------------------------------------------
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
|>
zipcode_churn group_by(Zip.Code) |>
summarise(Avg_Population = mean(Population, na.rm = TRUE),
Avg_Age = median(Age, na.rm = TRUE)
)
# A tibble: 1,626 × 3
Zip.Code Avg_Population Avg_Age
<int> <dbl> <dbl>
1 90001 54492 51
2 90002 44586 51
3 90003 58198 49
4 90004 67852 44
5 90005 43019 42.5
6 90006 62784 41
7 90007 45025 42
8 90008 30852 42
9 90010 1957 26
10 90011 101215 30
# ℹ 1,616 more rows