Description

This notebook presents a census tract-level analysis of race in New York City using data from the 2020 Decennial Census. The analysis examines the racial composition of New York City, processing the data to calculate percentages for various racial groups. The results are presented through summary table and maps, offering insights into the spatial distribution of racial demographics across the five boroughs of NYC.

library(tidyverse)
library(tidycensus)
library(sf)
library(scales)
library(viridis)
library(RColorBrewer)
library(plotly)
library(ggplot2)
library(knitr)

Methods

  1. Data Source: The analysis uses the 2020 Decennial Census data, downloaded using the tidycensus package.
  2. Geographic Scope: The data is at the census tract level for New York City, covering the five boroughs (New York, Kings, Queens, Bronx, and Richmond Counties).
  3. Variables Created:
    • Percent Hispanic or Latino
    • Percent Black-alone, not Hispanic or Latino
    • Percent Asian-alone, not Hispanic or Latino
    • Percent White-alone, not Hispanic or Latino
  4. Visualization:
    • Maps are generated for each racial category using distinct color gradients.
    • Borough boundaries are overlaid for additional spatial context.
  5. Summary Table:
    • Displays the average percentage of each racial category by county, formatted in percentage format.
boros <- st_read("C:/Users/arooj/OneDrive/Desktop/class 1/part2-20241014T141218Z-001/part2/data/raw/geo/Borough Boundaries.geojson")

raw_data_race <- get_decennial(geography = "tract",
  variables = c(total_population = "P2_001N",
    hispanic_or_latino = "P2_002N",
    black_alone = "P2_006N",
    asian_alone = "P2_008N",
    white_alone = "P2_005N"),
  state = "NY",
  county = c("New York", "Kings", "Queens", "Bronx", "Richmond"),
  year = 2020,
  geometry = T,
  output = "wide")


data_race <- raw_data_race |> 
  mutate(pct_hispanic_latino = round(hispanic_or_latino / total_population, 3),
    pct_black = round(black_alone / total_population, 3),
    pct_asian = round(asian_alone / total_population, 3),
    pct_white = round(white_alone / total_population, 3))

Results

Summary Table: Percent of Each Racial Category by County

data_race_summary <- data_race |> 
  separate(NAME, into = c("tract", "county"), sep = ", ") 

summary_table <- data_race_summary |> 
  st_drop_geometry() |> 
  group_by(county) |> 
  summarise(Avg_Percent_hispanic_latino = percent(mean(pct_hispanic_latino, na.rm = TRUE), accuracy = 0.1),
    Avg_Percent_black = percent(mean(pct_black, na.rm = TRUE), accuracy = 0.1),
    Avg_Percent_asian = percent(mean(pct_asian, na.rm = TRUE), accuracy = 0.1),
    Avg_Percent_white = percent(mean(pct_white, na.rm = TRUE), accuracy = 0.1))
kable(summary_table)
county Avg_Percent_hispanic_latino Avg_Percent_black Avg_Percent_asian Avg_Percent_white
Bronx County 52.3% 28.6% 4.5% 10.5%
Kings County 18.5% 26.9% 13.3% 35.3%
New York County 22.7% 12.6% 13.4% 46.3%
Queens County 27.0% 18.0% 25.0% 22.9%
Richmond County 20.7% 10.3% 11.6% 53.6%

Maps: Percent of Each Racial Category by Census Tract

Below are maps showing the spatial distribution of each racial category by census tract. Borough boundaries are included for spatial reference.

hispanic_map <- ggplot() +
  geom_sf(data = data_race |> 
            filter(!is.na(pct_hispanic_latino)),  # Removing NAs for interaction
          mapping = aes(fill = pct_hispanic_latino,
                        text = paste0(NAME, ":",
                                      "<br>Percent Hispanic or Latino: ",
                                      scales=percent(pct_hispanic_latino, accuracy = 1))),
          color = "transparent") +
  scale_fill_distiller(palette = "Reds", 
                       direction = 1,
                       na.value = "transparent", 
                       name = "Percent Hispanic or Latino",
                       labels = percent_format(accuracy = 1)) +
  theme_void() +
  labs(title = "Percent Hispanic or Latino in NYC Census Tracts (2020)",
       caption = "Source: U.S. Census Bureau, 2020 Decennial Census") +
  geom_sf(data = boros,  # Add borough boundaries for context
          color = "black", fill = NA, lwd = 0.2)  


ggplotly(hispanic_map, tooltip = "text")
black_map <- ggplot() +
  geom_sf(data = data_race |> 
            filter(!is.na(pct_black)),  # Remove NAs for interaction
          mapping = aes(fill = pct_black,
                        text = paste0(NAME, ":",
                                      "<br>Percent Black-alone, not Hispanic or Latino: ",
                                      scales=percent(pct_black, accuracy = 1))),
          color = "transparent") +
  scale_fill_distiller(palette = "Blues", 
                       direction = 1,
                       na.value = "transparent", 
                       name = "Percent Black-alone, not Hispanic or Latino",
                       labels = percent_format(accuracy = 1)) +
  theme_void() +
  labs(title = "Percent Black-alone, not Hispanic or Latino in NYC Census Tracts (2020)",
       caption = "Source: U.S. Census Bureau, 2020 Decennial Census") +
  geom_sf(data = boros,  # Add borough boundaries for context
          color = "black", fill = NA, lwd = 0.2)  

ggplotly(black_map, tooltip = "text")
# Map for Percent Asian-alone, not Hispanic or Latino
asian_map <- ggplot() +
  geom_sf(data = data_race |> 
            filter(!is.na(pct_asian)),  # Remove NAs for interaction
          mapping = aes(fill = pct_asian,
                        text = paste0(NAME, ":",
                                      "<br>Percent Asian-alone, not Hispanic or Latino: ",
                                      scales=percent(pct_asian, accuracy = 1))),
          color = "transparent") +
  scale_fill_distiller(palette = "Greens", 
                       direction = 1,
                       na.value = "transparent", 
                       name = "Percent Asian-alone, not Hispanic or Latino",
                       labels = percent_format(accuracy = 1)) +
  theme_void() +
  labs(title = "Percent Asian-alone, not Hispanic or Latino in NYC Census Tracts (2020)",
       caption = "Source: U.S. Census Bureau, 2020 Decennial Census") +
  geom_sf(data = boros,  # Add borough boundaries for context
          color = "black", fill = NA, lwd = 0.2) 

ggplotly(asian_map, tooltip = "text")
white_map <- ggplot() +
  geom_sf(data = data_race |> 
            filter(!is.na(pct_white)),  # Remove NAs for interaction
          mapping = aes(fill = pct_white,
                        text = paste0(NAME, ":",
                                      "<br>Percent White-alone, not Hispanic or Latino: ",
                                      scales=percent(pct_white, accuracy = 1))),
          color = "transparent") +
  scale_fill_distiller(palette = "Purples", 
                       direction = 1,
                       na.value = "transparent", 
                       name = "Percent White-alone, not Hispanic or Latino",
                       labels = percent_format(accuracy = 1)) +
  theme_void() +
  labs(title = "Percent White-alone, not Hispanic or Latino in NYC Census Tracts (2020)",
       caption = "Source: U.S. Census Bureau, 2020 Decennial Census") +
  geom_sf(data = boros,  # Add borough boundaries for context
          color = "black", fill = NA, lwd = 0.2) 

ggplotly(white_map, tooltip = "text")

Description of Results

  1. The Bronx has the highest average percentage of Hispanic or Latino residents, followed by Queens.
  2. Richmond County (Staten Island) has the highest concentration of White-alone (not Hispanic or Latino) residents.
  3. Kings County (Brooklyn) shows a racially diverse composition, with significant percentages of Hispanic or Latino, Black-alone, and Asian-alone residents.
  4. The maps reveal spatial clustering of racial groups. For example, areas with high Asian-alone populations are concentrated in Queens, while Black-alone populations are more prominent in central Brooklyn.