Lab 5 - 11.S939

Author
Affiliation

Dustin Michels

MIT

Published

November 26, 2024

# load libraries
library(tidyverse)
library(sf)
library(tidycensus)
library(tigris)
library(gt)

# set options
options(tigris_use_cache = TRUE)

Part 1: Racial composition of Chicago, by tract

Get demographic data

Use the CENSUS API to download racial data from the 2018-2022 5-year ACS for Cook County.

  • B03002_003 = White (non-Hispanic)
  • B03002_004 = Black (non-Hispanic)
  • B03002_012 = Hispanic
race_vars <- c(
  totpop = "B03002_001",
  white = "B03002_003",
  black = "B03002_004",
  hispanic = "B03002_012"
)

# download data via API
acs_2022 <- get_acs(
  geography = "tract",
  state = "IL", county = "Cook",
  variables = race_vars,
  output = "wide", geometry = TRUE,
  year = 2022,
  survey = "acs5"
)

# remove margin of error columns and trailing "E" from estimate
acs_2022 <- acs_2022 |>
  select(-ends_with("M")) |>
  rename_with(~ str_remove(., "E$"))

Determine which tracts are in the City of Chicago

Download the boundary of the City of Chicago

# Download the boundary of Chicago
chi_bnd <-
  places(state = "IL") |>
  filter(NAME == "Chicago")

# make sure the CRS of the two datasets match (3435)
acs_2022 <- st_transform(acs_2022, 3435)
chi_bnd <- st_transform(chi_bnd, 3435)

# compute the intersection of the tracts with the boundary of Chicago
acs_2022_chi <- acs_2022 |>
  st_intersection(chi_bnd) |>
  select(GEOID:hispanic)

Calculate the proportion of white, black, and hispanic in each tact

acs_2022_chi <- acs_2022_chi |>
  # pivot the data from wide to long format
  pivot_longer(
    cols = c(white, black, hispanic),
    names_to = "racial_group", values_to = "group_pop"
  ) |>
  # compute proportion of each group in each tract
  mutate(
    group_proportion = group_pop / totpop
  )

Create map and analyze

Create faceted map showing the percentage distribution of the non-Hispanic White, non-Hispanic Black, and Hispanic populations across census tracts in the City of Chicago

# create faceted map by racial group
acs_2022_chi |>
  ggplot() +
  geom_sf(aes(fill = group_proportion * 100)) +
  facet_wrap(~racial_group) +
  scale_fill_viridis_c() +
  labs(fill = "Percent of Total Population") +
  theme_void()
Figure 1: Racial composition comparison of Chicago. For each tract, the percentage of the population that is White (non-hispanic), Black (non-hispanic), and Hispanic is shown.

The racial segregation of the city is quite striking. In the south, the majority of the population in Black; in the north, the majority of the population is White; and in the west, the majority of the population is Hispanic.

We see a similar partition in Map 2, which highlights population change. The south of Chicago (majority Black) tends to have a shrinking population, while the north and west tend to have a growing population. The far north has low change. This might suggest the racial composition of the city is changing, to be less Black and more White and Hispanic.

Part 2 - Changing populations in Cook County

Load LTDB data

Load and clean two years of historical LTDB population data (1970 and 2020)

# load data
F_1970 <- read_csv("data/LTDB_Std_1970_fullcount.csv") |>
  filter(county == "Cook County" & state == "IL")
F_2020 <-
  read_csv("data/ltdb_std_2020_fullcount.csv") |>
  filter(substr(TRTID2010, 1, 5) == "17031")

# select and rename cols, standardize datatypes
S_1970 <- F_1970 |>
  select(GEOID = TRTID10, POP1970 = POP70) |>
  mutate(GEOID = as.character(GEOID))
S_2020 <- F_2020 |>
  select(GEOID = TRTID2010, POP2020 = pop20) |>
  mutate(GEOID = as.character(GEOID))

# combine
pop_data <-
  reduce(list(S_1970, S_2020),
    right_join,
    by = "GEOID"
  )

# categorize tracts by population change
pop_data <- pop_data |>
  mutate(change = (POP2020 - POP1970) / POP1970) |>
  mutate(status = case_when(
    is.na(change) ~ "Uncategorized",
    change > 0.10 ~ "1 - Growing",
    change < -0.10 ~ "3 - Declining",
    TRUE ~ "2 - Low Change"
  ))

Combine LTDB data with current geography

# get current geography
F_2022 <- get_acs(
  geography = "tract",
  state = "IL",
  county = "Cook",
  variables = "B01001_001",
  year = 2022,
  geometry = TRUE,
) |>
  select(GEOID)

# merge on GEOID, ensure its an sf
pop_data_geo <- pop_data |>
  inner_join(F_2022, by = "GEOID") |>
  st_as_sf()

Create map

# create map
pop_data_geo |>
  ggplot() +
  geom_sf(aes(fill = status)) +
  scale_fill_manual(
    values = c(
      "1 - Growing" = "#2ca25f",
      "2 - Low Change" = "#99d8c9",
      "3 - Declining" = "#e5f5f9",
      "Uncategorized" = "black"
    )
  ) +
  labs(fill = "Population Change") +
  geom_sf(
    data = chi_bnd, color = "yellow",
    linewidth = 0.5, fill = NA
  ) +
  theme_void()
Figure 2: Population change in Cook County, 1970-2020. The boundary of Chicago is highlighted in yellow.

Conclusion

You can see clear population grown in several areas in Cook Country but outside of the city, suggesting some suburbanization. To make this map more clear, I think it would be interesting to combine tracts with their neighbors somehow, reducing the number of polygons and making larger trends (potentially) easier to see.