Script

I started, as always with the libraries I would use

library(tidyverse)
library(tidycensus)
library(sf)
library(scales)
library(viridis)

I looked through the variables from the 2020 decennial census and selected the ones I would need. I then created a dataframe of these variables in New York state at the census tract level.

pl_2020 <- load_variables(2020, "pl", cache = T)


race_nyc_raw <- get_decennial(geography = "tract",
                               variables = c(total_pop = "P1_001N", 
                                             total_hisp = "P2_002N", 
                                             total_white = "P2_005N",
                                             total_black = "P2_006N",
                                             total_asian = "P2_008N"),
                              state = 'NY',
                              year = 2020,
                              output = "wide",
                              geometry = T)

I created a new dataframe, filtering the the counties in NYC, and calculating percents. This will be used to map this data later.

race_nyc_tracts <- race_nyc_raw %>% 
  separate(NAME, into = c("tract", "county", "state"), sep = ", ") %>%
  filter(county == "Kings County" | county == "Queens County" 
         | county == "Bronx County" | county == "Richmond County" 
         | county == "New York County") %>% 
  mutate(pct_hisp = round(total_hisp/total_pop, 3),
         pct_hisp = ifelse(is.nan(pct_hisp), NA, pct_hisp),
         pct_white = round(total_white/total_pop, 3),
         pct_white = ifelse(is.nan(pct_white), NA, pct_white),
         pct_black = round(total_black/total_pop, 3),
         pct_black = ifelse(is.nan(pct_black), NA, pct_black),
         pct_asian = round(total_asian/total_pop, 3),
         pct_asian = ifelse(is.nan(pct_asian), NA, pct_asian))

I imported a shapefile of NYC’s boroughs and I fixed both it and the dataframe above to projection 2263. This will make the coming maps clearer

boros <- st_read("C:/Users/likms/Desktop/DUE/methods1/main_data/raw/geo/Borough Boundaries.geojson", quiet = TRUE)

race_nyc_tracts_2263 <- st_transform(race_nyc_tracts, 2263)
boros_2263 <- st_transform(boros, 2263)

I calculated summary statistics. I recalculated percentages at this point. I believe this is a more accurate way to calculate percentages at the county level, rather than averaging the tracts within each county.

nyc_race_boro_stats <- st_drop_geometry(race_nyc_tracts_2263) %>% 
  group_by(county) %>% 
  summarise(county = first(county),
            `Total Population` = sum(total_pop, na.rm = TRUE),
            `Total Hispanic or Latino` = sum(total_hisp, na.rm = TRUE),
            `Total White` = sum(total_white, na.rm = TRUE),
            `Total Black` = sum(total_black, na.rm = TRUE),
            `Total Asian` = sum(total_asian, na.rm = TRUE)) %>% 
  mutate(`Percent Hispanic or Latino` = percent(`Total Hispanic or Latino`/`Total Population`, accuracy = 1L),
         `Percent White` = percent(`Total White`/`Total Population`, accuracy = 1L),
         `Percent Black` = percent(`Total Black`/`Total Population`, accuracy = 1L),
         `Percent Asian` = percent(`Total Asian`/`Total Population`, accuracy = 1L))

nyc_race_boro_stats

I mapped percent Hispanic or Latino, White, Black, and Asian, each in a different color, at the census tract level, for all of NYC. I also added black borders around each Borough.

ggplot()  + 
  geom_sf(data = race_nyc_tracts_2263, mapping = aes(fill = pct_hisp), 
          color = "#ffffff",
          lwd = 0) +
  theme_void() +
  scale_fill_distiller(direction = 1,
                       palette = "Blues",
                       na.value = "transparent",
                       name="Percent Hispanic or Latino",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "Percent Hispanic or Latino NYC Census Tracts",
    caption = "Source: 2020 Decennial Census") +
  geom_sf(data = boros_2263 %>% filter(boro_name == "Queens" | boro_name == "Brooklyn" | boro_name == "Manhattan" | boro_name == "Staten Island" |boro_name == "Bronx"), 
          color = "black", fill = NA, lwd = .5) # this makes a black border around each borough in the filter

ggplot()  + 
  geom_sf(data = race_nyc_tracts_2263, mapping = aes(fill = pct_white), 
          color = "#ffffff",
          lwd = 0) +
  theme_void() +
  scale_fill_distiller(direction = 1,
                       palette = "Greens",
                       na.value = "transparent",
                       name="Percent White",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "Percent White NYC Census Tracts",
    caption = "Source: 2020 Decennial Census") +
  geom_sf(data = boros_2263 %>% filter(boro_name == "Queens" | boro_name == "Brooklyn" | boro_name == "Manhattan" | boro_name == "Staten Island" |boro_name == "Bronx"), 
          color = "black", fill = NA, lwd = .5)

ggplot()  + 
  geom_sf(data = race_nyc_tracts_2263, mapping = aes(fill = pct_black), 
          color = "#ffffff",
          lwd = 0) +
  theme_void() +
  scale_fill_distiller(direction = 1,
                       palette = "Purples",
                       na.value = "transparent",
                       name="Percent Black",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "Percent Black NYC Census Tracts",
    caption = "Source: 2020 Decennial Census") +
  geom_sf(data = boros_2263 %>% filter(boro_name == "Queens" | boro_name == "Brooklyn" | boro_name == "Manhattan" | boro_name == "Staten Island" |boro_name == "Bronx"), 
          color = "black", fill = NA, lwd = .5)

ggplot()  + 
  geom_sf(data = race_nyc_tracts_2263, mapping = aes(fill = pct_asian), 
          color = "#ffffff",
          lwd = 0) +
  theme_void() +
  scale_fill_distiller(direction = 1,
                       palette = "Reds",
                       na.value = "transparent",
                       name="Percent Asian",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "Percent Asian NYC Census Tracts",
    caption = "Source: 2020 Decennial Census") +
  geom_sf(data = boros_2263 %>% filter(boro_name == "Queens" | boro_name == "Brooklyn" | boro_name == "Manhattan" | boro_name == "Staten Island" |boro_name == "Bronx"), 
          color = "black", fill = NA, lwd = .5)

Methods

I began by picking variables from the 2020 Decennial Census. I chose the following variables:

P1_001N Total population P2_002N Total population identifying as Hispanic or Latino P2_005N Total population identifying as White alone, not Hispanic or Latino P2_006N Total population identifying as Black alone, not Hispanic or Latino P2_008N Total population identifying as Asian alone, not Hispanic or Latino

I filtered this data to the counties in New York City

I calculated the percent of people in each census tract identifying as Hispanic or Latino, White alone, Black alone, and Asian alone, by mutating to divide the respective variable by the total population.

When making summary statistics, I summed the populations of each census tract within each county to find the total populations of each county and the total population of people identifying as only Hispanic or Latino, only White, only Black, and only Asian. I removed NA’s at this point using na.rm = TRUE. I then calculated the percentages for each race at the county level by dividing the total populations identifying as each race alone in each county by the total population of that county.

Lastly, I mapped the percentages of people identifying as each race alone at the census tract level, on four maps, using ggplot.

Results

The Borough with the highest population is Brooklyn at 2,736,074 people, although Queens is not far behind at 2,405,464 people. Staten Island has the smallest population at 495,747, of which 277,981 or 56% identify as White alone. The Bronx has an unusually high percentage of Hispanic or Latino people at 806,463 people, or 55% of its 1,472,654 total population. The Bronx is also only 5% Asian, 9% White, and 28% Black.

Note: Some isolated areas appear to have high percentages of certain races. These areas appear as large, surprisingly dark spots on the maps. This is because these are mostly public areas, such as parks and airports, that don’t have many residents.