Setup

Load Packages

library(tidyverse)
library(tidycensus)
library(sf)
library(pander)
library(knitr)
library(pander)
library(rmarkdown)

Access Census API Key

census_api_key("75836456df67458839c36492f8b736a89591e1c1", install = TRUE, overwrite = TRUE)

Load census data dictionaries

acs_variable_list.2020 <- load_variables(2020, #year
                                         "acs5", #five year ACS estimates
                                         cache = TRUE)

acs_variable_list.2016 <- load_variables(2016, #year
                                         "acs5", #five year ACS estimates
                                         cache = TRUE)

Downloading Data from Tidycensus

Create a vector of census variables

acs_vars <- c("B01001_001E", # ACS total Pop estimate
              "B25002_001E", # Estimate of total housing units
              "B25002_003E", # Number of vacant housing units
              "B19013_001E", # Median HH Income ($)
              "B02001_002E", # People describing themselves as "white alone"
              "B06009_006E") # Total graduate or professional degree

Call the Census API to get tract level data for 2020 for all of Philadelphia

acsTractsPHL.2020 <- get_acs(geography = "tract",
                             year = 2020, 
                             variables = acs_vars, 
                             geometry = FALSE, 
                             state = "PA", 
                             county = "Philadelphia", 
                             output = "wide") 

Wrangling Data with dplyr

Mutating, selecting and renaming variables

acsTractsPHL.2020 <- acsTractsPHL.2020 %>%
  dplyr::select (GEOID, NAME, all_of(acs_vars))

acsTractsPHL.2020 <- acsTractsPHL.2020 %>%
  rename (total_pop.2020 = B01001_001E,
          total_HU.2020 = B25002_001E,
          total_vacant.2020 = B25002_003E,
          med_HH_Income.2020 = B19013_001E,
          total_White.2020 = B02001_002E,
          total_GradDeg.2020 = B06009_006E)

acsTractsPHL.2020 <- acsTractsPHL.2020 %>%
  mutate(vacancyPct.2020 = total_vacant.2020/total_HU.2020,
         pctWhite.2020   = total_White.2020/total_pop.2020)
acsTractsPHL.2016 <- get_acs(geography = "tract",
                             year = 2016, 
                             variables = acs_vars,
                             geometry = FALSE,
                             state = "PA", 
                             county = "Philadelphia",
                             output = "wide") %>%
  dplyr::select (GEOID, NAME, all_of(acs_vars)) %>% 
  rename (total_pop.2016 = B01001_001E,
          total_HU.2016 = B25002_001E,
          total_vacant.2016 = B25002_003E,
          med_HH_Income.2016 = B19013_001E,
          total_White.2016 = B02001_002E,
          total_GradDeg.2016 = B06009_006E) %>%
  mutate(vacancyPct.2016 = total_vacant.2016/total_HU.2016,
         pctWhite.2016 = total_White.2016/total_pop.2016)

Joining data

allACS <- left_join(acsTractsPHL.2016, acsTractsPHL.2020,
                    by= c("GEOID"))

Doing column math using mutate

allACS <- allACS %>%
  mutate(change_med_HH_Income = med_HH_Income.2020 - (med_HH_Income.2016 * 1.08), 
         change_Grad_Degree_Pct = (total_GradDeg.2020/total_pop.2020)-(total_GradDeg.2016/total_pop.2016))

Exercise: Creating Three New Variables With the Mutate Function

allACS <- allACS %>%
  mutate(change_vacancyPct = (vacancyPct.2020 - vacancyPct.2016),
         pct_change_pop = ((total_pop.2020 - total_pop.2016) / total_pop.2016) * 100,
         pct_GradDeg_2020 = (total_GradDeg.2020 / total_pop.2020) * 100)

Summarizing Census Data

ggplot(allACS)+
  geom_histogram(aes(change_med_HH_Income), binwidth = 5000)+
  labs(
    title = "Change in Philadelphia HH median income by tract, 2016-2020",
    caption = "Data: US Census Bureau, ACS 5-year estimates",
    x="Change in Med HH Income (2020 dollars)", 
       y="Number of tracts") +
  theme_minimal()

Making a summary table

summaryTable <- allACS %>%
  summarize(mean_change_HH_Income = mean(change_med_HH_Income, na.rm = TRUE),
            med_change_HH_Income = median(change_med_HH_Income, na.rm = TRUE))

pander(summaryTable)
mean_change_HH_Income med_change_HH_Income
6012 4874

Comparing geographies

myTracts <- c("42101023500", 
              "42101023600", 
              "42101023700", 
              "42101025300", 
              "42101025400",
              "42101025500", 
              "42101025600", 
              "42101038800")

allACS <- allACS %>%
  mutate(mtAiry = ifelse(GEOID %in% myTracts, "MT AIRY", "REST OF PHILADELPHIA"))
summaryTable2 <- allACS %>%
  group_by(mtAiry) %>%
  summarize(mean_change_HH_Income = mean(change_med_HH_Income, na.rm = TRUE),
            med_change_HH_Income = median(change_med_HH_Income, na.rm = TRUE))

pander(summaryTable2)
mtAiry mean_change_HH_Income med_change_HH_Income
MT AIRY 6945 3115
REST OF PHILADELPHIA 5991 5073

Graphic comparisons Using ggplot2

ggplot(allACS)+
  geom_histogram(aes(change_med_HH_Income),
                 binwidth = 5000)+
  labs(
    title = "Change in Philadelphia HH median income by tract, 2016-2020",
    caption = "Data: US Census Bureau, ACS 5-year estimates",
    x="Change in Med HH Income (2020 dollars)", 
       y="Number of tracts")+
  facet_wrap(~mtAiry, scales = "free") + 
  theme_minimal()

ggplot(allACS)+
  geom_point(aes(x =med_HH_Income.2016 * 1.08, 
                 y = med_HH_Income.2020,
                 color = mtAiry))+
  geom_abline(intercept = 0, slope = 1)+
  labs(
    title = "2020 Median HH Income as a Function of 2016 Median HH Income",
    subtitle = "All figures in 2020 dollars",
    caption = "Data: US Census Bureau, ACS 5-year estimates",
    x="Med HH Income 2016 ($)", 
    y="Med HH Income 2020 ($)") +
  theme_minimal()

ggplot(allACS)+
  geom_point(aes(x = 100* pctWhite.2020, 
                 y = med_HH_Income.2020,
                 color = mtAiry))+
  geom_smooth(aes(x = 100* pctWhite.2020, 
                  y = med_HH_Income.2020), 
              method = "lm", se = FALSE)+
  labs(
    title = "2020 Median HH Income as a Function of Pct White",
    subtitle = "All figures in 2020 dollars",
    caption = "Data: US Census Bureau, ACS 5-year estimates",
    x="Pct. Residents Identifying as 'White Only'", 
    y="Med HH Income 2020 ($)") +
  theme_minimal()

Spatial Data and Tidycensus

represent the boundary of Mt. Airy

ggplot()+
  geom_sf(data = acsTractsPHL.2020.sf, aes(fill = pctWhite.2020),
          color = "transparent")+
  geom_sf(data = acsTractsPHL.2020.sf %>%
            filter(mtAiry == "MT AIRY") %>%
            st_union(),
          color = "white",
          fill = "transparent")+
  labs(
    title = "Percentage of those identifying as 'white only' by tract",
    subtitle = "",
    caption = "Data: US Census Bureau, ACS 5-year estimates") +
  theme_void()

Homework Assignment

Q1: Create a ggplot: Change in Vacant Housing Units (2016-2020)

ggplot(allACS, aes(x = total_vacant.2016, y = total_vacant.2020, color = mtAiry)) +
  geom_point(size = 2) +
  scale_color_manual(values = c("#6495ED", "#8B4513"), name = "Neighborhood") +
  labs(
    x = "Vacant Housing Units in 2016",
    y = "Vacant Housing Units in 2020",
    title = "Change in Vacant Housing Units from 2016 to 2020"
  ) +
  theme_minimal()

Q2: Create a map: Housing Vacancy Rates by Tract in Philadelphia (2016)

ggplot() +
  geom_sf(data = acsTractsPHL.2016.sf, 
          aes(fill = vacancyPct.2016), color = "transparent") +
  geom_sf(data = acsTractsPHL.2016.sf %>%
            filter(mtAiry == "MT AIRY") %>%
            st_union(),
          color = "yellow", 
          fill = "transparent") +
  scale_fill_viridis_c(option = "viridis", 
                       name = "Percentage of Vacant Homes") +  
  labs(
    title = "Housing Vacancy Rates by Tract in Philadelphia (2016)",
    subtitle = "",
    caption = "Data Sources: US Census Bureau, ACS 5-year estimates"
  ) +
  theme_void()

The above map highlights the vacancy rates across Philadelphia tracts in the year of 2016. Mt. Airy is outlined in yellow. The analysis implies that Mt. Airy has lower vacancy rates compared to other areas in the city, which could reflect its relatively stable housing market.

Q3: a summary of mean number of vacant housing units per tract for Mt. Airy vs. the rest Philadelphia as a whole in 2020

library(kableExtra)
library(dplyr)
summary_table <- acsTractsPHL.2020.sf %>%
  st_drop_geometry() %>%
  group_by(mtAiry) %>%
  summarise(mean_vacant_units = mean(total_vacant.2020, na.rm = TRUE)) %>% 
  rename(Area = mtAiry)

# Summarize with a kable table
kable(summary_table, caption = "Mean Number of Vacant Housing Units per Tract (2020)") %>%
  kable_styling(full_width = F)
Mean Number of Vacant Housing Units per Tract (2020)
Area mean_vacant_units
MT AIRY 156.6250
REST OF PHILADELPHIA 186.8575