Aim

Analysis of race in New York City from the decennial census at the census-tract level.

Data

· Variables: Decennial Census · Borough Boundaries: NYC Open Data · Neighborhood Tabulation Areas: NYC Planning

Import Libraries needed:

library(tidyverse)
library(tidycensus)
library(sf)
library(scales)
library(viridis)

Decennial Census Variables

pl_2020 <- load_variables(2020, "pl", cache = T)
pl_vars = c("P1_001N", "P2_002N", "P2_005N", "P2_006N", "P2_008N")

Raw Race NYC Data Frame at the census-tract level.

raw_race_nyc <- get_decennial(geography = "tract",
                             variables = pl_vars,
                             state = 36,
                             year = 2020,
                             output = "wide",
                             geometry = T) %>%
  arrange(NAME)

Race NYC Data Frame: To be able to select only the five boroughs I separate the name into different columns, Filter by five boroughs of NYC. Rename columns, convert NaN in numbers, and calculate: · Percent Hispanic or Latino · Percent Black-alone, not Hispanic or Latino · Percent Asian-alone, not Hispanic or Latino · Percent White-alone, not Hispanic or Latino

race_nyc <- raw_race_nyc %>% 
  separate(NAME, into = c("tract", "county", "state"), sep = ", ") %>%
  filter(county == "Queens County" | county == "Bronx County" | county == "Kings County" | county == "Richmond County" | county == "New York County") %>%
  rename(tot_pop = P1_001N,
           his_or_lat = P2_002N,
           white_alone = P2_005N,
           black_alone = P2_006N,
           asian_alone = P2_008N) %>% 
    mutate(per_his_or_lat = round(his_or_lat/tot_pop, 3),
           per_his_or_lat = ifelse(is.nan(per_his_or_lat), NA, per_his_or_lat),
           per_white_alone = round(white_alone/tot_pop, 3),
           per_white_alone = ifelse(is.nan(per_white_alone), NA, per_white_alone),
           per_black_alone = round(black_alone/tot_pop, 3),
           per_black_alone = ifelse(is.nan(per_black_alone), NA, per_black_alone),
           per_asian_alone = round(asian_alone/tot_pop, 3),
           per_asian_alone = ifelse(is.nan(per_asian_alone), NA, per_asian_alone))

NYC Open Data: Borough Boundaries

boros <- st_read("~/Desktop/Fall 2022/methodos1/main_data/raw/geo/Borough Boundaries.geojson", quiet = TRUE)

NYC Planning: Neighborhood Tabulation Areas

nabes <- st_read("~/Desktop/Fall 2022/methodos1/main_data/raw/geo/nynta2020_22b/nynta2020.shp", quiet = TRUE)
race_nyc_2263 <- st_transform(race_nyc, 2263)
boros_2263 <- st_transform(boros, 2263)

SELECT THE FIELDS FROM NTA

nabes_selected <- nabes %>%
  select(BoroName, BoroName, NTA2020, NTAName)

SPATIAL JOIN

race_nyc_nabes <- race_nyc_2263 %>%
  st_join(nabes_selected, 
          left = TRUE,
          join = st_intersects,
          largest = TRUE)

Plot NYC maps

with Borough Boundaries, I remove the census tract outline lwd = 0, add the Neighborhoods.

Map the different percentage of each racial category in NYC.

· plot: NYC map % White alone

ggplot()  +
  geom_sf(data = race_nyc_nabes, mapping = aes(fill = per_white_alone), 
          color = "#ffffff",
          lwd = 0) +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       palette = "Reds",
                       na.value = "transparent",
                       name="Percent White alone (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(title = "NYC, White alone by Census Tract",
       caption = "Source: 2020 Decennial Census") + 
  geom_sf(data = boros_2263 %>% filter(boro_name == "Brooklyn" | boro_name == "Bronx" | boro_name == "Manhattan" | boro_name == "Staten Island" | boro_name == "Queens"), 
          color = "black", fill = NA, lwd = .1)

· plot: NYC % Hispanic or Latino

ggplot()  +
  geom_sf(data = race_nyc_nabes, mapping = aes(fill = per_his_or_lat), 
          color = "#ffffff",
          lwd = 0) +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       na.value = "transparent",
                       name="Percent Hispanic or Latino (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(title = "NYC, Hispanic or Latino by Census Tract",
       caption = "Source: 2020 Decennial Census") + 
  geom_sf(data = boros_2263 %>% filter(boro_name == "Brooklyn" | boro_name == "Bronx" | boro_name == "Manhattan" | boro_name == "Staten Island" | boro_name == "Queens"), 
          color = "black", fill = NA, lwd = .1)

· plot: NYC map % Black alone

ggplot()  +
  geom_sf(data = race_nyc_nabes, mapping = aes(fill = per_black_alone), 
          color = "#ffffff",
          lwd = 0) +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       palette = "Greens",
                       na.value = "transparent",
                       name="Percent Black alone (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(title = "NYC, Black alone by Census Tract",
       caption = "Source: 2020 Decennial Census") + 
  geom_sf(data = boros_2263 %>% filter(boro_name == "Brooklyn" | boro_name == "Bronx" | boro_name == "Manhattan" | boro_name == "Staten Island" | boro_name == "Queens"), 
          color = "black", fill = NA, lwd = .1)

· plot: NYC map % Asian alone

ggplot()  +
  geom_sf(data = race_nyc_nabes, mapping = aes(fill = per_asian_alone), 
          color = "#ffffff",
          lwd = 0) +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       palette = "Purples",
                       na.value = "transparent",
                       name="Percent Asian alone (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(title = "NYC, Asian alone by Census Tract",
       caption = "Source: 2020 Decennial Census") + 
  geom_sf(data = boros_2263 %>% filter(boro_name == "Brooklyn" | boro_name == "Bronx" | boro_name == "Manhattan" | boro_name == "Staten Island" | boro_name == "Queens"), 
          color = "black", fill = NA, lwd = .1)

I consider the visualisation of the distribution of each racial category on the map of NYC very powerful. The confirmation of spatial-racial segregation in the city is supported by these maps.

Summary statistics

Calculated summary statistics for the five Boroughs.

st_drop_geometry(race_nyc_nabes) %>% 
  group_by(county) %>% 
  summarise(Borough = first(BoroName),
            `Est. Total Population` = sum(tot_pop),
            `Est. Total Hispanic or Latino Population` = sum(his_or_lat),
            `Est. Total White Alone Population` = sum(white_alone),
            `Est. Total Black Alone Population` = sum(black_alone),
            `Est. Total Asian Alone Population` = sum(asian_alone)) %>% 
  mutate(`Est. Percent Hispanic or Latino Population` = percent(`Est. Total Hispanic or Latino Population`/`Est. Total Population`, accuracy = 1L),
         `Est. Total White Alone Population` = percent(`Est. Total White Alone Population`/`Est. Total Population`, accuracy = 1L),
         `Est. Total Black Alone Population` = percent(`Est. Total Black Alone Population`/`Est. Total Population`, accuracy = 1L),
         `Est. Total Asian Alone Population` = percent(`Est. Total Asian Alone Population`/`Est. Total Population`, accuracy = 1L))