Description

This notebook presents a census tract-level analysis of race in New York City using data from the 2020 Decennial Census. The analysis examines the racial composition of New York City, processing the data to calculate percentages for various racial groups. The results are presented through summary table and maps, offering insights into the spatial distribution of racial demographics across the five boroughs of NYC.

library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.2
library(tidycensus)
library(sf)
library(scales)
library(viridis)
library(RColorBrewer)
library(plotly)
## Warning: package 'plotly' was built under R version 4.4.2
library(ggplot2)

Methods

  1. Data Source: The analysis uses the 2020 Decennial Census data, downloaded using the tidycensus package.
  2. Geographic Scope: The data is at the census tract level for New York City, covering the five boroughs (New York, Kings, Queens, Bronx, and Richmond Counties).
  3. Variables Created:
    • Percent Hispanic or Latino
    • Percent Black-alone, not Hispanic or Latino
    • Percent Asian-alone, not Hispanic or Latino
    • Percent White-alone, not Hispanic or Latino
  4. Visualization:
    • Maps are generated for each racial category using distinct color gradients.
    • Borough boundaries are overlaid for additional spatial context.
  5. Summary Table:
    • Displays the average percentage of each racial category by county, formatted in percentage format.
boros <- st_read("C:/Users/arooj/OneDrive/Desktop/class 1/part2-20241014T141218Z-001/part2/data/raw/geo/Borough Boundaries.geojson")

raw_data_race <- get_decennial(geography = "tract",
  variables = c(total_population = "P2_001N",
    hispanic_or_latino = "P2_002N",
    black_alone = "P2_006N",
    asian_alone = "P2_008N",
    white_alone = "P2_005N"),
  state = "NY",
  county = c("New York", "Kings", "Queens", "Bronx", "Richmond"),
  year = 2020,
  geometry = T,
  output = "wide")
## Warning: • You have not set a Census API key. Users without a key are limited to 500
## queries per day and may experience performance limitations.
## ℹ For best results, get a Census API key at
## http://api.census.gov/data/key_signup.html and then supply the key to the
## `census_api_key()` function to use it throughout your tidycensus session.
## This warning is displayed once per session.
data_race <- raw_data_race |> 
  mutate(pct_hispanic_latino = round(hispanic_or_latino / total_population, 3),
    pct_black = round(black_alone / total_population, 3),
    pct_asian = round(asian_alone / total_population, 3),
    pct_white = round(white_alone / total_population, 3))

Results

Summary Table: Percent of Each Racial Category by County

data_race_summary <- data_race |> 
  separate(NAME, into = c("tract", "county"), sep = ", ") 
## Warning: Expected 2 pieces. Additional pieces discarded in 2327 rows [1, 2, 3, 4, 5, 6,
## 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
summary_table <- data_race_summary |> 
  st_drop_geometry() |> 
  group_by(county) |> 
  summarise(Avg_Percent_hispanic_latino = percent(mean(pct_hispanic_latino, na.rm = TRUE), accuracy = 0.1),
    Avg_Percent_black = percent(mean(pct_black, na.rm = TRUE), accuracy = 0.1),
    Avg_Percent_asian = percent(mean(pct_asian, na.rm = TRUE), accuracy = 0.1),
    Avg_Percent_white = percent(mean(pct_white, na.rm = TRUE), accuracy = 0.1))
print(summary_table)

Maps: Percent of Each Racial Category by Census Tract

Below are maps showing the spatial distribution of each racial category by census tract. Borough boundaries are included for spatial reference.

hispanic_map <- ggplot() +
  geom_sf(data = data_race |> 
            filter(!is.na(pct_hispanic_latino)),  # Removing NAs for interaction
          mapping = aes(fill = pct_hispanic_latino,
                        text = paste0(NAME, ":",
                                      "<br>Percent Hispanic or Latino: ",
                                      scales::percent(pct_hispanic_latino, accuracy = 1))),
          color = "transparent") +
  scale_fill_distiller(palette = "Reds", 
                       direction = 1,
                       na.value = "transparent", 
                       name = "Percent Hispanic or Latino",
                       labels = percent_format(accuracy = 1)) +
  theme_void() +
  labs(title = "Percent Hispanic or Latino in NYC Census Tracts (2020)",
       caption = "Source: U.S. Census Bureau, 2020 Decennial Census") +
  geom_sf(data = boros,  # Add borough boundaries for context
          color = "black", fill = NA, lwd = 0.2)  
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
ggplotly(hispanic_map, tooltip = "text")
black_map <- ggplot() +
  geom_sf(data = data_race |> 
            filter(!is.na(pct_black)),  # Remove NAs for interaction
          mapping = aes(fill = pct_black,
                        text = paste0(NAME, ":",
                                      "<br>Percent Black-alone, not Hispanic or Latino: ",
                                      scales::percent(pct_black, accuracy = 1))),
          color = "transparent") +
  scale_fill_distiller(palette = "Blues", 
                       direction = 1,
                       na.value = "transparent", 
                       name = "Percent Black-alone, not Hispanic or Latino",
                       labels = percent_format(accuracy = 1)) +
  theme_void() +
  labs(title = "Percent Black-alone, not Hispanic or Latino in NYC Census Tracts (2020)",
       caption = "Source: U.S. Census Bureau, 2020 Decennial Census") +
  geom_sf(data = boros,  # Add borough boundaries for context
          color = "black", fill = NA, lwd = 0.2)  
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
ggplotly(black_map, tooltip = "text")
# Map for Percent Asian-alone, not Hispanic or Latino
asian_map <- ggplot() +
  geom_sf(data = data_race |> 
            filter(!is.na(pct_asian)),  # Remove NAs for interaction
          mapping = aes(fill = pct_asian,
                        text = paste0(NAME, ":",
                                      "<br>Percent Asian-alone, not Hispanic or Latino: ",
                                      scales::percent(pct_asian, accuracy = 1))),
          color = "transparent") +
  scale_fill_distiller(palette = "Greens", 
                       direction = 1,
                       na.value = "transparent", 
                       name = "Percent Asian-alone, not Hispanic or Latino",
                       labels = percent_format(accuracy = 1)) +
  theme_void() +
  labs(title = "Percent Asian-alone, not Hispanic or Latino in NYC Census Tracts (2020)",
       caption = "Source: U.S. Census Bureau, 2020 Decennial Census") +
  geom_sf(data = boros,  # Add borough boundaries for context
          color = "black", fill = NA, lwd = 0.2) 
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
ggplotly(asian_map, tooltip = "text")
white_map <- ggplot() +
  geom_sf(data = data_race |> 
            filter(!is.na(pct_white)),  # Remove NAs for interaction
          mapping = aes(fill = pct_white,
                        text = paste0(NAME, ":",
                                      "<br>Percent White-alone, not Hispanic or Latino: ",
                                      scales::percent(pct_white, accuracy = 1))),
          color = "transparent") +
  scale_fill_distiller(palette = "Purples", 
                       direction = 1,
                       na.value = "transparent", 
                       name = "Percent White-alone, not Hispanic or Latino",
                       labels = percent_format(accuracy = 1)) +
  theme_void() +
  labs(title = "Percent White-alone, not Hispanic or Latino in NYC Census Tracts (2020)",
       caption = "Source: U.S. Census Bureau, 2020 Decennial Census") +
  geom_sf(data = boros,  # Add borough boundaries for context
          color = "black", fill = NA, lwd = 0.2) 
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
ggplotly(white_map, tooltip = "text")

Description of Results

  1. The Bronx has the highest average percentage of Hispanic or Latino residents, followed by Queens.
  2. Richmond County (Staten Island) has the highest concentration of White-alone (not Hispanic or Latino) residents.
  3. Kings County (Brooklyn) shows a racially diverse composition, with significant percentages of Hispanic or Latino, Black-alone, and Asian-alone residents.
  4. The maps reveal spatial clustering of racial groups. For example, areas with high Asian-alone populations are concentrated in Queens, while Black-alone populations are more prominent in central Brooklyn.