library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidycensus)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(pander)
library(knitr)
library(pander)
library(rmarkdown)
census_api_key("75836456df67458839c36492f8b736a89591e1c1", install = TRUE, overwrite = TRUE)
## Your original .Renviron will be backed up and stored in your R HOME directory if needed.
## Your API key has been stored in your .Renviron and can be accessed by Sys.getenv("CENSUS_API_KEY").
## To use now, restart R or run `readRenviron("~/.Renviron")`
## [1] "75836456df67458839c36492f8b736a89591e1c1"
acs_variable_list.2020 <- load_variables(2020, #year
"acs5", #five year ACS estimates
cache = TRUE)
acs_variable_list.2016 <- load_variables(2016, #year
"acs5", #five year ACS estimates
cache = TRUE)
acs_vars <- c("B01001_001E", # ACS total Pop estimate
"B25002_001E", # Estimate of total housing units
"B25002_003E", # Number of vacant housing units
"B19013_001E", # Median HH Income ($)
"B02001_002E", # People describing themselves as "white alone"
"B06009_006E") # Total graduate or professional degree
acsTractsPHL.2020 <- get_acs(geography = "tract",
year = 2020,
variables = acs_vars,
geometry = FALSE,
state = "PA",
county = "Philadelphia",
output = "wide")
## Getting data from the 2016-2020 5-year ACS
acsTractsPHL.2020 <- acsTractsPHL.2020 %>%
dplyr::select (GEOID, NAME, all_of(acs_vars))
acsTractsPHL.2020 <- acsTractsPHL.2020 %>%
rename (total_pop.2020 = B01001_001E,
total_HU.2020 = B25002_001E,
total_vacant.2020 = B25002_003E,
med_HH_Income.2020 = B19013_001E,
total_White.2020 = B02001_002E,
total_GradDeg.2020 = B06009_006E)
acsTractsPHL.2020 <- acsTractsPHL.2020 %>%
mutate(vacancyPct.2020 = total_vacant.2020/total_HU.2020,
pctWhite.2020 = total_White.2020/total_pop.2020)
acsTractsPHL.2016 <- get_acs(geography = "tract",
year = 2016,
variables = acs_vars,
geometry = FALSE,
state = "PA",
county = "Philadelphia",
output = "wide") %>%
dplyr::select (GEOID, NAME, all_of(acs_vars)) %>%
rename (total_pop.2016 = B01001_001E,
total_HU.2016 = B25002_001E,
total_vacant.2016 = B25002_003E,
med_HH_Income.2016 = B19013_001E,
total_White.2016 = B02001_002E,
total_GradDeg.2016 = B06009_006E) %>%
mutate(vacancyPct.2016 = total_vacant.2016/total_HU.2016,
pctWhite.2016 = total_White.2016/total_pop.2016)
## Getting data from the 2012-2016 5-year ACS
allACS <- left_join(acsTractsPHL.2016, acsTractsPHL.2020,
by= c("GEOID"))
allACS <- allACS %>%
mutate(change_med_HH_Income = med_HH_Income.2020 - (med_HH_Income.2016 * 1.08),
change_Grad_Degree_Pct = (total_GradDeg.2020/total_pop.2020)-(total_GradDeg.2016/total_pop.2016))
allACS <- allACS %>%
mutate(change_vacancyPct = (vacancyPct.2020 - vacancyPct.2016),
pct_change_pop = ((total_pop.2020 - total_pop.2016) / total_pop.2016) * 100,
pct_GradDeg_2020 = (total_GradDeg.2020 / total_pop.2020) * 100)
print(allACS)
## # A tibble: 384 × 24
## GEOID NAME.x total_pop.2016 total_HU.2016 total_vacant.2016
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 42101024900 Census Tract 249,… 3171 1381 286
## 2 42101025800 Census Tract 258,… 1718 800 72
## 3 42101026500 Census Tract 265,… 5106 1907 337
## 4 42101026700 Census Tract 267,… 6147 2834 380
## 5 42101026800 Census Tract 268,… 4554 2086 209
## 6 42101031900 Census Tract 319,… 4938 1890 214
## 7 42101033000 Census Tract 330,… 7231 2855 238
## 8 42101034000 Census Tract 340,… 2957 1114 74
## 9 42101034600 Census Tract 346,… 2664 1302 87
## 10 42101034801 Census Tract 348.… 4465 1978 114
## # ℹ 374 more rows
## # ℹ 19 more variables: med_HH_Income.2016 <dbl>, total_White.2016 <dbl>,
## # total_GradDeg.2016 <dbl>, vacancyPct.2016 <dbl>, pctWhite.2016 <dbl>,
## # NAME.y <chr>, total_pop.2020 <dbl>, total_HU.2020 <dbl>,
## # total_vacant.2020 <dbl>, med_HH_Income.2020 <dbl>, total_White.2020 <dbl>,
## # total_GradDeg.2020 <dbl>, vacancyPct.2020 <dbl>, pctWhite.2020 <dbl>,
## # change_med_HH_Income <dbl>, change_Grad_Degree_Pct <dbl>, …
mean(allACS$change_med_HH_Income, na.rm = TRUE)
## [1] 6012.472
median(allACS$change_med_HH_Income, na.rm = TRUE)
## [1] 4873.76
hist(allACS$change_med_HH_Income)
ggplot(allACS) +
geom_histogram(aes(change_med_HH_Income)) +
theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 31 rows containing non-finite outside the scale range
## (`stat_bin()`).
ggplot(allACS)+
geom_histogram(aes(change_med_HH_Income), binwidth = 5000)+
labs(
title = "Change in Philadelphia HH median income by tract, 2016-2020",
caption = "Data: US Census Bureau, ACS 5-year estimates",
x="Change in Med HH Income (2020 dollars)",
y="Number of tracts") +
theme_minimal()
summaryTable <- allACS %>%
summarize(mean_change_HH_Income = mean(change_med_HH_Income, na.rm = TRUE),
med_change_HH_Income = median(change_med_HH_Income, na.rm = TRUE))
pander(summaryTable)
| mean_change_HH_Income | med_change_HH_Income |
|---|---|
| 6012 | 4874 |
myTracts <- c("42101023500",
"42101023600",
"42101023700",
"42101025300",
"42101025400",
"42101025500",
"42101025600",
"42101038800")
allACS <- allACS %>%
mutate(mtAiry = ifelse(GEOID %in% myTracts, "MT AIRY", "REST OF PHILADELPHIA"))
summaryTable2 <- allACS %>%
group_by(mtAiry) %>%
summarize(mean_change_HH_Income = mean(change_med_HH_Income, na.rm = TRUE),
med_change_HH_Income = median(change_med_HH_Income, na.rm = TRUE))
pander(summaryTable2)
| mtAiry | mean_change_HH_Income | med_change_HH_Income |
|---|---|---|
| MT AIRY | 6945 | 3115 |
| REST OF PHILADELPHIA | 5991 | 5073 |
ggplot(allACS)+
geom_histogram(aes(change_med_HH_Income),
binwidth = 5000)+
labs(
title = "Change in Philadelphia HH median income by tract, 2016-2020",
caption = "Data: US Census Bureau, ACS 5-year estimates",
x="Change in Med HH Income (2020 dollars)",
y="Number of tracts")+
facet_wrap(~mtAiry, scales = "free") +
theme_minimal()
ggplot(allACS)+
geom_point(aes(x =med_HH_Income.2016 * 1.08,
y = med_HH_Income.2020,
color = mtAiry))+
geom_abline(intercept = 0, slope = 1)+
labs(
title = "2020 Median HH Income as a Function of 2016 Median HH Income",
subtitle = "All figures in 2020 dollars",
caption = "Data: US Census Bureau, ACS 5-year estimates",
x="Med HH Income 2016 ($)",
y="Med HH Income 2020 ($)") +
theme_minimal()
## Warning: Removed 31 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggplot(allACS)+
geom_point(aes(x = 100* pctWhite.2020,
y = med_HH_Income.2020,
color = mtAiry))+
geom_smooth(aes(x = 100* pctWhite.2020,
y = med_HH_Income.2020),
method = "lm", se = FALSE)+
labs(
title = "2020 Median HH Income as a Function of Pct White",
subtitle = "All figures in 2020 dollars",
caption = "Data: US Census Bureau, ACS 5-year estimates",
x="Pct. Residents Identifying as 'White Only'",
y="Med HH Income 2020 ($)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 31 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 31 rows containing missing values or values outside the scale range
## (`geom_point()`).
acsTractsPHL.2020.sf <- get_acs(geography = "tract",
year = 2020,
variables = acs_vars,
geometry = TRUE,
state = "PA",
county = "Philadelphia",
output = "wide") %>%
dplyr::select (GEOID, NAME, all_of(acs_vars)) %>%
rename (total_pop.2020 = B01001_001E,
total_HU.2020 = B25002_001E,
total_vacant.2020 = B25002_003E,
med_HH_Income.2020 = B19013_001E,
total_White.2020 = B02001_002E,
total_GradDeg.2020 = B06009_006E) %>%
mutate(vacancyPct.2020 = total_vacant.2020/total_HU.2020,
pctWhite.2020 = total_White.2020/total_pop.2020) %>%
mutate(mtAiry = ifelse(GEOID %in% myTracts, "MT AIRY", "REST OF PHILADELPHIA")) %>%
st_as_sf(crs = 4326) # Turn shp into sf object and project as WGS84
## Getting data from the 2016-2020 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## | | | 0% | |= | 1% | |= | 2% | |== | 3% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 9% | |======= | 10% | |========= | 13% | |=========== | 15% | |=========== | 16% | |============ | 18% | |============== | 20% | |=============== | 22% | |================ | 23% | |=================== | 28% | |====================== | 32% | |========================== | 37% | |=========================== | 39% | |============================== | 43% | |=============================== | 44% | |================================= | 47% | |=================================== | 50% | |==================================== | 51% | |====================================== | 54% | |====================================== | 55% | |======================================== | 57% | |=========================================== | 62% | |============================================= | 64% | |============================================== | 65% | |================================================ | 68% | |=================================================== | 73% | |===================================================== | 75% | |======================================================= | 79% | |========================================================= | 82% | |============================================================ | 85% | |============================================================= | 87% | |=============================================================== | 91% | |=================================================================== | 96% | |===================================================================== | 99% | |======================================================================| 100%
ggplot()+
geom_sf(data = acsTractsPHL.2020.sf, aes(fill = pctWhite.2020),
color = "transparent")+
geom_sf(data = acsTractsPHL.2020.sf %>%
filter(mtAiry == "MT AIRY") %>%
st_union(),
color = "white",
fill = "transparent")+
labs(
title = "Percentage of those identifying as 'white only' by tract",
subtitle = "",
caption = "Data: US Census Bureau, ACS 5-year estimates") +
theme_void()
ggplot(allACS, aes(x = total_vacant.2016, y = total_vacant.2020, color = mtAiry)) +
geom_point(size = 2) +
scale_color_manual(values = c("#6495ED", "#8B4513"), name = "Mt. Airy") +
labs(
x = "Vacant Housing Units in 2016",
y = "Vacant Housing Units in 2020",
title = "Change in Vacant Housing Units from 2016 to 2020"
) +
theme_minimal()
## Warning: Removed 21 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Calculate vacancy percentage for 2016
acsTractsPHL.2016 <- acsTractsPHL.2016 %>%
mutate(vacancyPct.2016 = total_vacant.2016 / total_HU.2016 * 100)
acsTractsPHL.2016.sf <- get_acs(geography = "tract",
year = 2016,
variables = acs_vars,
geometry = TRUE,
state = "PA",
county = "Philadelphia",
output = "wide") %>%
dplyr::select (GEOID, NAME, all_of(acs_vars)) %>%
rename (total_pop.2016 = B01001_001E,
total_HU.2016 = B25002_001E,
total_vacant.2016 = B25002_003E,
med_HH_Income.2016 = B19013_001E,
total_White.2016 = B02001_002E,
total_GradDeg.2016 = B06009_006E) %>%
mutate(vacancyPct.2016 = total_vacant.2016/total_HU.2016,
pctWhite.2016 = total_White.2016/total_pop.2016) %>%
mutate(mtAiry = ifelse(GEOID %in% myTracts, "MT AIRY", "REST OF PHILADELPHIA")) %>%
st_as_sf(crs = 4326) # Turn shp into sf object and project as WGS84
## Getting data from the 2012-2016 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## | | | 0% | |== | 2% | |===== | 6% | |======== | 12% | |=========== | 16% | |=============== | 21% | |=================== | 27% | |====================== | 31% | |========================== | 36% | |============================ | 40% | |============================= | 42% | |================================= | 47% | |==================================== | 52% | |======================================= | 56% | |======================================== | 57% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |============================================= | 64% | |================================================= | 70% | |==================================================== | 74% | |======================================================== | 80% | |======================================================== | 81% | |=========================================================== | 85% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================== | 95% | |======================================================================| 100%
ggplot() +
geom_sf(data = acsTractsPHL.2016.sf,
aes(fill = vacancyPct.2016), color = "transparent") +
geom_sf(data = acsTractsPHL.2016.sf %>%
filter(mtAiry == "MT AIRY") %>%
st_union(),
color = "yellow",
fill = "transparent") +
scale_fill_viridis_c(option = "viridis",
name = "Percentage of Vacant Homes") +
labs(
title = "Housing Vacancy Rates by Tract in Philadelphia (2016)",
subtitle = "",
caption = "Data Sources: US Census Bureau, ACS 5-year estimates"
) +
theme_void()
### The above map highlights the vacancy rates across Philadelphia
tracts in the year of 2016. Mt. Airy is outlined in yellow. The analysis
implies that Mt. Airy has lower vacancy rates compared to other areas in
the city, which could reflect its relatively stable housing market.
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(dplyr)
summary_table <- acsTractsPHL.2020.sf %>%
st_drop_geometry() %>%
group_by(mtAiry) %>%
summarise(mean_vacant_units = mean(total_vacant.2020, na.rm = TRUE))
# Summarize with a kable table
kable(summary_table, caption = "Mean Number of Vacant Housing Units per Tract (2020)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| mtAiry | mean_vacant_units |
|---|---|
| MT AIRY | 156.6250 |
| REST OF PHILADELPHIA | 186.8575 |