This notebook presents a census tract-level analysis of race in New York City using data from the 2020 Decennial Census. The analysis examines the racial composition of New York City, processing the data to calculate percentages for various racial groups. The results are presented through summary table and maps, offering insights into the spatial distribution of racial demographics across the five boroughs of NYC.
library(tidyverse)
library(tidycensus)
library(sf)
library(scales)
library(viridis)
library(RColorBrewer)
library(plotly)
library(ggplot2)
library(knitr)
tidycensus package.boros <- st_read("C:/Users/arooj/OneDrive/Desktop/class 1/part2-20241014T141218Z-001/part2/data/raw/geo/Borough Boundaries.geojson")
raw_data_race <- get_decennial(geography = "tract",
variables = c(total_population = "P2_001N",
hispanic_or_latino = "P2_002N",
black_alone = "P2_006N",
asian_alone = "P2_008N",
white_alone = "P2_005N"),
state = "NY",
county = c("New York", "Kings", "Queens", "Bronx", "Richmond"),
year = 2020,
geometry = T,
output = "wide")
data_race <- raw_data_race |>
mutate(pct_hispanic_latino = round(hispanic_or_latino / total_population, 3),
pct_black = round(black_alone / total_population, 3),
pct_asian = round(asian_alone / total_population, 3),
pct_white = round(white_alone / total_population, 3))
data_race_summary <- data_race |>
separate(NAME, into = c("tract", "county"), sep = ", ")
summary_table <- data_race_summary |>
st_drop_geometry() |>
group_by(county) |>
summarise(Avg_Percent_hispanic_latino = percent(mean(pct_hispanic_latino, na.rm = TRUE), accuracy = 0.1),
Avg_Percent_black = percent(mean(pct_black, na.rm = TRUE), accuracy = 0.1),
Avg_Percent_asian = percent(mean(pct_asian, na.rm = TRUE), accuracy = 0.1),
Avg_Percent_white = percent(mean(pct_white, na.rm = TRUE), accuracy = 0.1))
kable(summary_table)
| county | Avg_Percent_hispanic_latino | Avg_Percent_black | Avg_Percent_asian | Avg_Percent_white |
|---|---|---|---|---|
| Bronx County | 52.3% | 28.6% | 4.5% | 10.5% |
| Kings County | 18.5% | 26.9% | 13.3% | 35.3% |
| New York County | 22.7% | 12.6% | 13.4% | 46.3% |
| Queens County | 27.0% | 18.0% | 25.0% | 22.9% |
| Richmond County | 20.7% | 10.3% | 11.6% | 53.6% |
Below are maps showing the spatial distribution of each racial category by census tract. Borough boundaries are included for spatial reference.
hispanic_map <- ggplot() +
geom_sf(data = data_race |>
filter(!is.na(pct_hispanic_latino)), # Removing NAs for interaction
mapping = aes(fill = pct_hispanic_latino,
text = paste0(NAME, ":",
"<br>Percent Hispanic or Latino: ",
scales=percent(pct_hispanic_latino, accuracy = 1))),
color = "transparent") +
scale_fill_distiller(palette = "Reds",
direction = 1,
na.value = "transparent",
name = "Percent Hispanic or Latino",
labels = percent_format(accuracy = 1)) +
theme_void() +
labs(title = "Percent Hispanic or Latino in NYC Census Tracts (2020)",
caption = "Source: U.S. Census Bureau, 2020 Decennial Census") +
geom_sf(data = boros, # Add borough boundaries for context
color = "black", fill = NA, lwd = 0.2)
ggplotly(hispanic_map, tooltip = "text")
black_map <- ggplot() +
geom_sf(data = data_race |>
filter(!is.na(pct_black)), # Remove NAs for interaction
mapping = aes(fill = pct_black,
text = paste0(NAME, ":",
"<br>Percent Black-alone, not Hispanic or Latino: ",
scales=percent(pct_black, accuracy = 1))),
color = "transparent") +
scale_fill_distiller(palette = "Blues",
direction = 1,
na.value = "transparent",
name = "Percent Black-alone, not Hispanic or Latino",
labels = percent_format(accuracy = 1)) +
theme_void() +
labs(title = "Percent Black-alone, not Hispanic or Latino in NYC Census Tracts (2020)",
caption = "Source: U.S. Census Bureau, 2020 Decennial Census") +
geom_sf(data = boros, # Add borough boundaries for context
color = "black", fill = NA, lwd = 0.2)
ggplotly(black_map, tooltip = "text")
# Map for Percent Asian-alone, not Hispanic or Latino
asian_map <- ggplot() +
geom_sf(data = data_race |>
filter(!is.na(pct_asian)), # Remove NAs for interaction
mapping = aes(fill = pct_asian,
text = paste0(NAME, ":",
"<br>Percent Asian-alone, not Hispanic or Latino: ",
scales=percent(pct_asian, accuracy = 1))),
color = "transparent") +
scale_fill_distiller(palette = "Greens",
direction = 1,
na.value = "transparent",
name = "Percent Asian-alone, not Hispanic or Latino",
labels = percent_format(accuracy = 1)) +
theme_void() +
labs(title = "Percent Asian-alone, not Hispanic or Latino in NYC Census Tracts (2020)",
caption = "Source: U.S. Census Bureau, 2020 Decennial Census") +
geom_sf(data = boros, # Add borough boundaries for context
color = "black", fill = NA, lwd = 0.2)
ggplotly(asian_map, tooltip = "text")
white_map <- ggplot() +
geom_sf(data = data_race |>
filter(!is.na(pct_white)), # Remove NAs for interaction
mapping = aes(fill = pct_white,
text = paste0(NAME, ":",
"<br>Percent White-alone, not Hispanic or Latino: ",
scales=percent(pct_white, accuracy = 1))),
color = "transparent") +
scale_fill_distiller(palette = "Purples",
direction = 1,
na.value = "transparent",
name = "Percent White-alone, not Hispanic or Latino",
labels = percent_format(accuracy = 1)) +
theme_void() +
labs(title = "Percent White-alone, not Hispanic or Latino in NYC Census Tracts (2020)",
caption = "Source: U.S. Census Bureau, 2020 Decennial Census") +
geom_sf(data = boros, # Add borough boundaries for context
color = "black", fill = NA, lwd = 0.2)
ggplotly(white_map, tooltip = "text")