Setup
Load Packages
library(tidyverse)
library(tidycensus)
library(sf)
library(pander)
library(knitr)
library(pander)
library(rmarkdown)
Access Census API Key
census_api_key("75836456df67458839c36492f8b736a89591e1c1", install = TRUE, overwrite = TRUE)
Load census data dictionaries
acs_variable_list.2020 <- load_variables(2020, #year
"acs5", #five year ACS estimates
cache = TRUE)
acs_variable_list.2016 <- load_variables(2016, #year
"acs5", #five year ACS estimates
cache = TRUE)
Downloading Data from Tidycensus
Create a vector of census variables
acs_vars <- c("B01001_001E", # ACS total Pop estimate
"B25002_001E", # Estimate of total housing units
"B25002_003E", # Number of vacant housing units
"B19013_001E", # Median HH Income ($)
"B02001_002E", # People describing themselves as "white alone"
"B06009_006E") # Total graduate or professional degree
Call the Census API to get tract level data for 2020 for all of
Philadelphia
acsTractsPHL.2020 <- get_acs(geography = "tract",
year = 2020,
variables = acs_vars,
geometry = FALSE,
state = "PA",
county = "Philadelphia",
output = "wide")
Wrangling Data with dplyr
Mutating, selecting and renaming variables
acsTractsPHL.2020 <- acsTractsPHL.2020 %>%
dplyr::select (GEOID, NAME, all_of(acs_vars))
acsTractsPHL.2020 <- acsTractsPHL.2020 %>%
rename (total_pop.2020 = B01001_001E,
total_HU.2020 = B25002_001E,
total_vacant.2020 = B25002_003E,
med_HH_Income.2020 = B19013_001E,
total_White.2020 = B02001_002E,
total_GradDeg.2020 = B06009_006E)
acsTractsPHL.2020 <- acsTractsPHL.2020 %>%
mutate(vacancyPct.2020 = total_vacant.2020/total_HU.2020,
pctWhite.2020 = total_White.2020/total_pop.2020)
acsTractsPHL.2016 <- get_acs(geography = "tract",
year = 2016,
variables = acs_vars,
geometry = FALSE,
state = "PA",
county = "Philadelphia",
output = "wide") %>%
dplyr::select (GEOID, NAME, all_of(acs_vars)) %>%
rename (total_pop.2016 = B01001_001E,
total_HU.2016 = B25002_001E,
total_vacant.2016 = B25002_003E,
med_HH_Income.2016 = B19013_001E,
total_White.2016 = B02001_002E,
total_GradDeg.2016 = B06009_006E) %>%
mutate(vacancyPct.2016 = total_vacant.2016/total_HU.2016,
pctWhite.2016 = total_White.2016/total_pop.2016)
Joining data
allACS <- left_join(acsTractsPHL.2016, acsTractsPHL.2020,
by= c("GEOID"))
Doing column math using mutate
allACS <- allACS %>%
mutate(change_med_HH_Income = med_HH_Income.2020 - (med_HH_Income.2016 * 1.08),
change_Grad_Degree_Pct = (total_GradDeg.2020/total_pop.2020)-(total_GradDeg.2016/total_pop.2016))
Exercise: Creating Three New Variables With the Mutate Function
allACS <- allACS %>%
mutate(change_vacancyPct = (vacancyPct.2020 - vacancyPct.2016),
pct_change_pop = ((total_pop.2020 - total_pop.2016) / total_pop.2016) * 100,
pct_GradDeg_2020 = (total_GradDeg.2020 / total_pop.2020) * 100)
Summarizing Census Data
ggplot(allACS)+
geom_histogram(aes(change_med_HH_Income), binwidth = 5000)+
labs(
title = "Change in Philadelphia HH median income by tract, 2016-2020",
caption = "Data: US Census Bureau, ACS 5-year estimates",
x="Change in Med HH Income (2020 dollars)",
y="Number of tracts") +
theme_minimal()

Making a summary table
summaryTable <- allACS %>%
summarize(mean_change_HH_Income = mean(change_med_HH_Income, na.rm = TRUE),
med_change_HH_Income = median(change_med_HH_Income, na.rm = TRUE))
pander(summaryTable)
Comparing geographies
myTracts <- c("42101023500",
"42101023600",
"42101023700",
"42101025300",
"42101025400",
"42101025500",
"42101025600",
"42101038800")
allACS <- allACS %>%
mutate(mtAiry = ifelse(GEOID %in% myTracts, "MT AIRY", "REST OF PHILADELPHIA"))
summaryTable2 <- allACS %>%
group_by(mtAiry) %>%
summarize(mean_change_HH_Income = mean(change_med_HH_Income, na.rm = TRUE),
med_change_HH_Income = median(change_med_HH_Income, na.rm = TRUE))
pander(summaryTable2)
| MT AIRY |
6945 |
3115 |
| REST OF PHILADELPHIA |
5991 |
5073 |
Graphic comparisons Using ggplot2
ggplot(allACS)+
geom_histogram(aes(change_med_HH_Income),
binwidth = 5000)+
labs(
title = "Change in Philadelphia HH median income by tract, 2016-2020",
caption = "Data: US Census Bureau, ACS 5-year estimates",
x="Change in Med HH Income (2020 dollars)",
y="Number of tracts")+
facet_wrap(~mtAiry, scales = "free") +
theme_minimal()

ggplot(allACS)+
geom_point(aes(x =med_HH_Income.2016 * 1.08,
y = med_HH_Income.2020,
color = mtAiry))+
geom_abline(intercept = 0, slope = 1)+
labs(
title = "2020 Median HH Income as a Function of 2016 Median HH Income",
subtitle = "All figures in 2020 dollars",
caption = "Data: US Census Bureau, ACS 5-year estimates",
x="Med HH Income 2016 ($)",
y="Med HH Income 2020 ($)") +
theme_minimal()

ggplot(allACS)+
geom_point(aes(x = 100* pctWhite.2020,
y = med_HH_Income.2020,
color = mtAiry))+
geom_smooth(aes(x = 100* pctWhite.2020,
y = med_HH_Income.2020),
method = "lm", se = FALSE)+
labs(
title = "2020 Median HH Income as a Function of Pct White",
subtitle = "All figures in 2020 dollars",
caption = "Data: US Census Bureau, ACS 5-year estimates",
x="Pct. Residents Identifying as 'White Only'",
y="Med HH Income 2020 ($)") +
theme_minimal()

Spatial Data and Tidycensus
represent the boundary of Mt. Airy
ggplot()+
geom_sf(data = acsTractsPHL.2020.sf, aes(fill = pctWhite.2020),
color = "transparent")+
geom_sf(data = acsTractsPHL.2020.sf %>%
filter(mtAiry == "MT AIRY") %>%
st_union(),
color = "white",
fill = "transparent")+
labs(
title = "Percentage of those identifying as 'white only' by tract",
subtitle = "",
caption = "Data: US Census Bureau, ACS 5-year estimates") +
theme_void()

Homework Assignment
Q1: Create a ggplot: Change in Vacant Housing Units (2016-2020)
ggplot(allACS, aes(x = total_vacant.2016, y = total_vacant.2020, color = mtAiry)) +
geom_point(size = 2) +
scale_color_manual(values = c("#6495ED", "#8B4513"), name = "Neighborhood") +
labs(
x = "Vacant Housing Units in 2016",
y = "Vacant Housing Units in 2020",
title = "Change in Vacant Housing Units from 2016 to 2020"
) +
theme_minimal()

The above plot shows a comparison of vacant housing units in
Philadelphia’s tracts between 2016 and 2020. with Mt. Airy tracts
highlighted in blue, indicating how vacancy trends vary across the
city.
Q2: Create a map: Housing Vacancy Rates by Tract in Philadelphia
(2016)
ggplot() +
geom_sf(data = acsTractsPHL.2016.sf,
aes(fill = vacancyPct.2016), color = "transparent") +
geom_sf(data = acsTractsPHL.2016.sf %>%
filter(mtAiry == "MT AIRY") %>%
st_union(),
color = "yellow",
fill = "transparent") +
scale_fill_viridis_c(option = "viridis",
name = "Percentage of Vacant Homes") +
labs(
title = "Housing Vacancy Rates by Tract in Philadelphia (2016)",
subtitle = "",
caption = "Data Sources: US Census Bureau, ACS 5-year estimates"
) +
theme_void()

The above map highlights the vacancy rates across Philadelphia
tracts in the year of 2016. Mt. Airy is outlined in yellow. The analysis
implies that Mt. Airy has lower vacancy rates compared to other areas in
the city, which could reflect its relatively stable housing market.
Q3: a summary of mean number of vacant housing units per tract for
Mt. Airy vs. the rest Philadelphia as a whole in 2020
library(kableExtra)
library(dplyr)
summary_table <- acsTractsPHL.2020.sf %>%
st_drop_geometry() %>%
group_by(mtAiry) %>%
summarise(mean_vacant_units = mean(total_vacant.2020, na.rm = TRUE)) %>%
rename(Area = mtAiry)
# Summarize with a kable table
kable(summary_table, caption = "Mean Number of Vacant Housing Units per Tract (2020)") %>%
kable_styling(full_width = F)
Mean Number of Vacant Housing Units per Tract (2020)
|
Area
|
mean_vacant_units
|
|
MT AIRY
|
156.6250
|
|
REST OF PHILADELPHIA
|
186.8575
|