remove(list=ls())
customer_churn <- read.csv("~/Downloads/telecom_customer_churn.csv")
zipcode_pop <- read.csv("~/Downloads/telecom_zipcode_population.csv")
unique(customer_churn$Zip.code)
## NULL
length(unique(customer_churn$Zip.code))
## [1] 0
unique(zipcode_pop$Zip.code)
## NULL
length(unique(zipcode_pop$Zip.code))
## [1] 0
cc_ID <- as.character(customer_churn$Zip.code)
zp_ID <- as.character(zipcode_pop$Zip.Code)
text <- cbind(cc_ID, zp_ID)
?merge
combined <- merge(x = customer_churn,
y = zipcode_pop,
by = "Zip.Code"
)
The unique ID in each set is Zip.Code.
This is a one to many merge.
library(psych)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
?stargazer
stargazer(combined, type = "text", summary.stat = c("Mean", "Median", "Sd", "Min", "Max") )
##
## =====================================================================================
## Statistic Mean Median St. Dev. Min Max
## -------------------------------------------------------------------------------------
## Zip.Code 93,486.070 93,518 1,856.768 90,001 96,150
## Age 46.510 46 16.750 19 80
## Number.of.Dependents 0.469 0 0.963 0 9
## Latitude 36.197 36.205 2.469 32.556 41.962
## Longitude -119.757 -119.595 2.154 -124.301 -114.193
## Number.of.Referrals 1.952 0 3.001 0 11
## Tenure.in.Months 32.387 29 24.542 1 72
## Avg.Monthly.Long.Distance.Charges 25.421 25.690 14.200 1.010 49.990
## Avg.Monthly.GB.Download 26.190 21 19.587 2 85
## Monthly.Charge 63.596 70.050 31.205 -10.000 118.750
## Total.Charges 2,280.381 1,394.550 2,266.220 18.800 8,684.800
## Total.Refunds 1.962 0.000 7.903 0.000 49.790
## Total.Extra.Data.Charges 6.861 0 25.105 0 150
## Total.Long.Distance.Charges 749.099 401.440 846.660 0.000 3,564.720
## Total.Revenue 3,034.379 2,108.640 2,865.205 21.360 11,979.340
## Population 22,139.600 17,554 21,152.390 11 105,285
## -------------------------------------------------------------------------------------
Then, try to create a table that lists 3 variables - average age and average population for each zipcode.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dplyr::mutate(iris, sepal = Sepal.Length + Sepal.Width)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species sepal
## 1 5.1 3.5 1.4 0.2 setosa 8.6
## 2 4.9 3.0 1.4 0.2 setosa 7.9
## 3 4.7 3.2 1.3 0.2 setosa 7.9
## 4 4.6 3.1 1.5 0.2 setosa 7.7
## 5 5.0 3.6 1.4 0.2 setosa 8.6
## 6 5.4 3.9 1.7 0.4 setosa 9.3
## 7 4.6 3.4 1.4 0.3 setosa 8.0
## 8 5.0 3.4 1.5 0.2 setosa 8.4
## 9 4.4 2.9 1.4 0.2 setosa 7.3
## 10 4.9 3.1 1.5 0.1 setosa 8.0
## 11 5.4 3.7 1.5 0.2 setosa 9.1
## 12 4.8 3.4 1.6 0.2 setosa 8.2
## 13 4.8 3.0 1.4 0.1 setosa 7.8
## 14 4.3 3.0 1.1 0.1 setosa 7.3
## 15 5.8 4.0 1.2 0.2 setosa 9.8
## 16 5.7 4.4 1.5 0.4 setosa 10.1
## 17 5.4 3.9 1.3 0.4 setosa 9.3
## 18 5.1 3.5 1.4 0.3 setosa 8.6
## 19 5.7 3.8 1.7 0.3 setosa 9.5
## 20 5.1 3.8 1.5 0.3 setosa 8.9
## 21 5.4 3.4 1.7 0.2 setosa 8.8
## 22 5.1 3.7 1.5 0.4 setosa 8.8
## 23 4.6 3.6 1.0 0.2 setosa 8.2
## 24 5.1 3.3 1.7 0.5 setosa 8.4
## 25 4.8 3.4 1.9 0.2 setosa 8.2
## 26 5.0 3.0 1.6 0.2 setosa 8.0
## 27 5.0 3.4 1.6 0.4 setosa 8.4
## 28 5.2 3.5 1.5 0.2 setosa 8.7
## 29 5.2 3.4 1.4 0.2 setosa 8.6
## 30 4.7 3.2 1.6 0.2 setosa 7.9
## 31 4.8 3.1 1.6 0.2 setosa 7.9
## 32 5.4 3.4 1.5 0.4 setosa 8.8
## 33 5.2 4.1 1.5 0.1 setosa 9.3
## 34 5.5 4.2 1.4 0.2 setosa 9.7
## 35 4.9 3.1 1.5 0.2 setosa 8.0
## 36 5.0 3.2 1.2 0.2 setosa 8.2
## 37 5.5 3.5 1.3 0.2 setosa 9.0
## 38 4.9 3.6 1.4 0.1 setosa 8.5
## 39 4.4 3.0 1.3 0.2 setosa 7.4
## 40 5.1 3.4 1.5 0.2 setosa 8.5
## 41 5.0 3.5 1.3 0.3 setosa 8.5
## 42 4.5 2.3 1.3 0.3 setosa 6.8
## 43 4.4 3.2 1.3 0.2 setosa 7.6
## 44 5.0 3.5 1.6 0.6 setosa 8.5
## 45 5.1 3.8 1.9 0.4 setosa 8.9
## 46 4.8 3.0 1.4 0.3 setosa 7.8
## 47 5.1 3.8 1.6 0.2 setosa 8.9
## 48 4.6 3.2 1.4 0.2 setosa 7.8
## 49 5.3 3.7 1.5 0.2 setosa 9.0
## 50 5.0 3.3 1.4 0.2 setosa 8.3
## 51 7.0 3.2 4.7 1.4 versicolor 10.2
## 52 6.4 3.2 4.5 1.5 versicolor 9.6
## 53 6.9 3.1 4.9 1.5 versicolor 10.0
## 54 5.5 2.3 4.0 1.3 versicolor 7.8
## 55 6.5 2.8 4.6 1.5 versicolor 9.3
## 56 5.7 2.8 4.5 1.3 versicolor 8.5
## 57 6.3 3.3 4.7 1.6 versicolor 9.6
## 58 4.9 2.4 3.3 1.0 versicolor 7.3
## 59 6.6 2.9 4.6 1.3 versicolor 9.5
## 60 5.2 2.7 3.9 1.4 versicolor 7.9
## 61 5.0 2.0 3.5 1.0 versicolor 7.0
## 62 5.9 3.0 4.2 1.5 versicolor 8.9
## 63 6.0 2.2 4.0 1.0 versicolor 8.2
## 64 6.1 2.9 4.7 1.4 versicolor 9.0
## 65 5.6 2.9 3.6 1.3 versicolor 8.5
## 66 6.7 3.1 4.4 1.4 versicolor 9.8
## 67 5.6 3.0 4.5 1.5 versicolor 8.6
## 68 5.8 2.7 4.1 1.0 versicolor 8.5
## 69 6.2 2.2 4.5 1.5 versicolor 8.4
## 70 5.6 2.5 3.9 1.1 versicolor 8.1
## 71 5.9 3.2 4.8 1.8 versicolor 9.1
## 72 6.1 2.8 4.0 1.3 versicolor 8.9
## 73 6.3 2.5 4.9 1.5 versicolor 8.8
## 74 6.1 2.8 4.7 1.2 versicolor 8.9
## 75 6.4 2.9 4.3 1.3 versicolor 9.3
## 76 6.6 3.0 4.4 1.4 versicolor 9.6
## 77 6.8 2.8 4.8 1.4 versicolor 9.6
## 78 6.7 3.0 5.0 1.7 versicolor 9.7
## 79 6.0 2.9 4.5 1.5 versicolor 8.9
## 80 5.7 2.6 3.5 1.0 versicolor 8.3
## 81 5.5 2.4 3.8 1.1 versicolor 7.9
## 82 5.5 2.4 3.7 1.0 versicolor 7.9
## 83 5.8 2.7 3.9 1.2 versicolor 8.5
## 84 6.0 2.7 5.1 1.6 versicolor 8.7
## 85 5.4 3.0 4.5 1.5 versicolor 8.4
## 86 6.0 3.4 4.5 1.6 versicolor 9.4
## 87 6.7 3.1 4.7 1.5 versicolor 9.8
## 88 6.3 2.3 4.4 1.3 versicolor 8.6
## 89 5.6 3.0 4.1 1.3 versicolor 8.6
## 90 5.5 2.5 4.0 1.3 versicolor 8.0
## 91 5.5 2.6 4.4 1.2 versicolor 8.1
## 92 6.1 3.0 4.6 1.4 versicolor 9.1
## 93 5.8 2.6 4.0 1.2 versicolor 8.4
## 94 5.0 2.3 3.3 1.0 versicolor 7.3
## 95 5.6 2.7 4.2 1.3 versicolor 8.3
## 96 5.7 3.0 4.2 1.2 versicolor 8.7
## 97 5.7 2.9 4.2 1.3 versicolor 8.6
## 98 6.2 2.9 4.3 1.3 versicolor 9.1
## 99 5.1 2.5 3.0 1.1 versicolor 7.6
## 100 5.7 2.8 4.1 1.3 versicolor 8.5
## 101 6.3 3.3 6.0 2.5 virginica 9.6
## 102 5.8 2.7 5.1 1.9 virginica 8.5
## 103 7.1 3.0 5.9 2.1 virginica 10.1
## 104 6.3 2.9 5.6 1.8 virginica 9.2
## 105 6.5 3.0 5.8 2.2 virginica 9.5
## 106 7.6 3.0 6.6 2.1 virginica 10.6
## 107 4.9 2.5 4.5 1.7 virginica 7.4
## 108 7.3 2.9 6.3 1.8 virginica 10.2
## 109 6.7 2.5 5.8 1.8 virginica 9.2
## 110 7.2 3.6 6.1 2.5 virginica 10.8
## 111 6.5 3.2 5.1 2.0 virginica 9.7
## 112 6.4 2.7 5.3 1.9 virginica 9.1
## 113 6.8 3.0 5.5 2.1 virginica 9.8
## 114 5.7 2.5 5.0 2.0 virginica 8.2
## 115 5.8 2.8 5.1 2.4 virginica 8.6
## 116 6.4 3.2 5.3 2.3 virginica 9.6
## 117 6.5 3.0 5.5 1.8 virginica 9.5
## 118 7.7 3.8 6.7 2.2 virginica 11.5
## 119 7.7 2.6 6.9 2.3 virginica 10.3
## 120 6.0 2.2 5.0 1.5 virginica 8.2
## 121 6.9 3.2 5.7 2.3 virginica 10.1
## 122 5.6 2.8 4.9 2.0 virginica 8.4
## 123 7.7 2.8 6.7 2.0 virginica 10.5
## 124 6.3 2.7 4.9 1.8 virginica 9.0
## 125 6.7 3.3 5.7 2.1 virginica 10.0
## 126 7.2 3.2 6.0 1.8 virginica 10.4
## 127 6.2 2.8 4.8 1.8 virginica 9.0
## 128 6.1 3.0 4.9 1.8 virginica 9.1
## 129 6.4 2.8 5.6 2.1 virginica 9.2
## 130 7.2 3.0 5.8 1.6 virginica 10.2
## 131 7.4 2.8 6.1 1.9 virginica 10.2
## 132 7.9 3.8 6.4 2.0 virginica 11.7
## 133 6.4 2.8 5.6 2.2 virginica 9.2
## 134 6.3 2.8 5.1 1.5 virginica 9.1
## 135 6.1 2.6 5.6 1.4 virginica 8.7
## 136 7.7 3.0 6.1 2.3 virginica 10.7
## 137 6.3 3.4 5.6 2.4 virginica 9.7
## 138 6.4 3.1 5.5 1.8 virginica 9.5
## 139 6.0 3.0 4.8 1.8 virginica 9.0
## 140 6.9 3.1 5.4 2.1 virginica 10.0
## 141 6.7 3.1 5.6 2.4 virginica 9.8
## 142 6.9 3.1 5.1 2.3 virginica 10.0
## 143 5.8 2.7 5.1 1.9 virginica 8.5
## 144 6.8 3.2 5.9 2.3 virginica 10.0
## 145 6.7 3.3 5.7 2.5 virginica 10.0
## 146 6.7 3.0 5.2 2.3 virginica 9.7
## 147 6.3 2.5 5.0 1.9 virginica 8.8
## 148 6.5 3.0 5.2 2.0 virginica 9.5
## 149 6.2 3.4 5.4 2.3 virginica 9.6
## 150 5.9 3.0 5.1 1.8 virginica 8.9
combined <- dplyr::mutate(combined, age = mean(Age))
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
custom_table <-
combined|>
group_by(Zip.Code) |>
summarise(Avg_Population = mean(Population, na.rm = TRUE),
Avg_Age = median(Age, na.rm = TRUE)
)
head(custom_table)
## # A tibble: 6 × 3
## Zip.Code Avg_Population Avg_Age
## <int> <dbl> <dbl>
## 1 90001 54492 51
## 2 90002 44586 51
## 3 90003 58198 49
## 4 90004 67852 44
## 5 90005 43019 42.5
## 6 90006 62784 41
Function Ran and created the table, but I received this error and can’t figure out a fix to save it.
Error in summarize()
:
! could not find function “summarize”
Quitting from lines 66-73 [unnamed-chunk-5] (Day4_HW.Rmd) #Execution halted