Mapping the proportion of different races in New York.

library(tidyverse)
library(tidycensus)
library(sf)
library(scales)
library(viridis)
library(ggplot2)
library(plotly)

Methods

Census tract boundaries and data are from 2016-2020 5-year American Community Survey, assessed with the tidayverse R package. Census data includes:

the number of people with !!!West India ancestry(HISPANIC OR LATINO ORIGIN BY RACE)
the number of people reporting !!!ancestry

Borough boundaries are representated with a shapefile that was downloaded from NYC Open Data

Get Data

# load all acs variables
acs201620 <- load_variables(2020, "acs5", cache = T)

# Import table of PEOPLE REPORTING ANCESTRY: B04006
raw_race <- get_acs(geography = "tract", 
                        variables = c(race_pop = "B03002_001",
                                      white = "B03002_003",
                                      asian = "B03002_006",
                                      black = "B03002_004",
                                      hispanic = "B03002_012"), 
                        state='NY',
                        county = c('Queens', 'New York', 'Richmond', 'Bronx', 'Kings'),
                        geometry = T, 
                        year = 2020,
                        output = "wide")

## Warning: • You have not set a Census API key. Users without a key are limited to 500
## queries per day and may experience performance limitations.
## ℹ For best results, get a Census API key at
## http://api.census.gov/data/key_signup.html and then supply the key to the
## `census_api_key()` function to use it throughout your tidycensus session.
## This warning is displayed once per session.

#mutate pct
race <- raw_race |> 
  mutate(pct_white = whiteE/race_popE,
         pct_asian = asianE/race_popE,
         pct_black = blackE/race_popE,
         pct_hispanic = hispanicE/race_popE)

Check Data

# check the values of percent west indian to see how to map
## how many NAs
summary(race$pct_white)
summary(race$pct_asian)
summary(race$pct_black)
summary(race$pct_hispanic)

## results out of na checking: 
### the nas are areas where race_pop equal to zero or very little
### so here we will be filtering out areas where race_pop is smaller than 1000 to represent trend and proportion

## filter out areas with lil population
race <- raw_race |> 
  mutate(pct_white = whiteE/race_popE,
         pct_asian = asianE/race_popE,
         pct_black = blackE/race_popE,
         pct_hispanic = hispanicE/race_popE) |>
  filter(race_popE > 1000)  # Keep numbers that are greater than 1000

Bring in Shapefile Map Data

Download the shapefile of the 2020 Neighborhood Tabulation Areas from NYC Planning
Unzip and save to your part2/data/raw/geo folder

https://www.nyc.gov/site/planning/data-maps/open-data/census-download-metadata.page

Download the geojson of the NYC Borough Boundaries from NYC Open Data
Move it to your main_data/data/raw/geo folder

https://data.cityofnewyork.us/City-Government/Borough-Boundaries/tqmj-j8zm

## import borough shapefiles from NYC Open Data
boros <- st_read("~/Downloads/Methods 1_R Learning/part2/data/raw/geo/BoroughBoundaries.geojson")
## import Neighborhood Tabulation Areas for NYC
nabes <- st_read("~/Downloads/Methods 1_R Learning/part2/data/raw/geo/nynta2020.shp")

Map Borough - white

white_city_map <- ggplot() +
  geom_sf(data = race,
          mapping = aes(fill = pct_white, 
                        text = paste0(NAME, ":",
                                      "<br>Percent White Only Population:",
                                      percent(pct_white, accuracy=1))),
color = "transparent") +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       na.value = "#fafafa",#"transparent"
                       name="Percent White Only, non-Hispanic (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "New York City, Population by Race at Census Tract Level",
    caption = "Source: American Community Survey, 2016-20") +
  geom_sf(data = nabes |> filter(BoroName == c("Brooklyn", 'Queens', 'Manhattan', 'Richmond', 'Bronx')), 
          color = "#4D84BB", fill = NA, lwd = 0.1) + 
  geom_sf(data = boros |> filter(boro_name == c("Brooklyn", 'Queens', 'Manhattan', 'Richmond', 'Bronx')),  
          color = "#244C95", fill = NA, lwd = .25)

## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text

## Warning: There was 1 warning in `stopifnot()`.
## ℹ In argument: `==...`.
## Caused by warning in `BoroName == c("Brooklyn", "Queens", "Manhattan", "Richmond", "Bronx")`:
## ! longer object length is not a multiple of shorter object length

ggplotly(white_city_map, tooltip = "text")

Map Borough - asian

asian_city_map <- ggplot() +
  geom_sf(data = race,
          mapping = aes(fill = pct_asian, 
                        text = paste0(NAME, ":",
                                      "<br>Percent Asian Only Population:",
                                      percent(pct_asian, accuracy=1))),
color = "transparent") +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       na.value = "#fafafa",#"transparent"
                       name="Percent Asian Only, non-Hispanic (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "New York City, Population by Race at Census Tract Level",
    caption = "Source: American Community Survey, 2016-20") +
  geom_sf(data = nabes |> filter(BoroName %in% c("Brooklyn", 'Queens', 'Manhattan', 'Richmond', 'Bronx')), 
          color = "#4D84BB", fill = NA, lwd = 0.1) + 
  geom_sf(data = boros |> filter(boro_name %in% c("Brooklyn", 'Queens', 'Manhattan', 'Richmond', 'Bronx')),  
          color = "#244C95", fill = NA, lwd = .25)

## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text

ggplotly(asian_city_map, tooltip = "text")

Map Borough - black

black_city_map <- ggplot() +
  geom_sf(data = race,
          mapping = aes(fill = pct_black, 
                        text = paste0(NAME, ":",
                                      "<br>Percent Black Only Population:",
                                      percent(pct_black, accuracy=1))),
color = "transparent") +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       na.value = "#fafafa",#"transparent"
                       name="Percent Black Only, non-Hispanic (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "New York City, Population by Race at Census Tract Level",
    caption = "Source: American Community Survey, 2016-20") +
  geom_sf(data = nabes |> filter(BoroName %in% c("Brooklyn", 'Queens', 'Manhattan', 'Richmond', 'Bronx')), 
          color = "#4D84BB", fill = NA, lwd = 0.1) + 
  geom_sf(data = boros |> filter(boro_name %in% c("Brooklyn", 'Queens', 'Manhattan', 'Richmond', 'Bronx')),  
          color = "#244C95", fill = NA, lwd = .25)

## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text

ggplotly(black_city_map, tooltip = "text")

Map Borough - hispanic

hispanic_city_map <- ggplot() +
  geom_sf(data = race,
          mapping = aes(fill = pct_hispanic, 
                        text = paste0(NAME, ":",
                                      "<br>Percent Hispanic Only Population:",
                                      percent(pct_hispanic, accuracy=1))),
color = "transparent") +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       na.value = "#fafafa",#"transparent"
                       name="Percent Hispanic Only, non-Hispanic (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "New York City, Population by Race at Census Tract Level",
    caption = "Source: American Community Survey, 2016-20") +
  geom_sf(data = nabes |> filter(BoroName == c("Brooklyn", 'Queens', 'Manhattan', 'Richmond', 'Bronx')), 
          color = "#4D84BB", fill = NA, lwd = 0.1) + 
  geom_sf(data = boros |> filter(boro_name == c("Brooklyn", 'Queens', 'Manhattan', 'Richmond', 'Bronx')),  
          color = "#244C95", fill = NA, lwd = .25)

## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text

## Warning: There was 1 warning in `stopifnot()`.
## ℹ In argument: `==...`.
## Caused by warning in `BoroName == c("Brooklyn", "Queens", "Manhattan", "Richmond", "Bronx")`:
## ! longer object length is not a multiple of shorter object length

ggplotly(hispanic_city_map, tooltip = "text")

RACE in New York City