library(tidyverse)
library(tidycensus)
library(sf)
library(scales)
library(viridis)
library(ggplot2)
library(plotly)
Census tract boundaries and data are from 2016-2020 5-year American Community Survey, assessed with the tidayverse R package. Census data includes:
Borough boundaries are representated with a shapefile that was downloaded from NYC Open Data
# load all acs variables
acs201620 <- load_variables(2020, "acs5", cache = T)
# Import table of PEOPLE REPORTING ANCESTRY: B04006
raw_race <- get_acs(geography = "tract",
variables = c(race_pop = "B03002_001",
white = "B03002_003",
asian = "B03002_006",
black = "B03002_004",
hispanic = "B03002_012"),
state='NY',
county = c('Queens', 'New York', 'Richmond', 'Bronx', 'Kings'),
geometry = T,
year = 2020,
output = "wide")
## Warning: • You have not set a Census API key. Users without a key are limited to 500
## queries per day and may experience performance limitations.
## ℹ For best results, get a Census API key at
## http://api.census.gov/data/key_signup.html and then supply the key to the
## `census_api_key()` function to use it throughout your tidycensus session.
## This warning is displayed once per session.
#mutate pct
race <- raw_race |>
mutate(pct_white = whiteE/race_popE,
pct_asian = asianE/race_popE,
pct_black = blackE/race_popE,
pct_hispanic = hispanicE/race_popE)
# check the values of percent west indian to see how to map
## how many NAs
summary(race$pct_white)
summary(race$pct_asian)
summary(race$pct_black)
summary(race$pct_hispanic)
## results out of na checking:
### the nas are areas where race_pop equal to zero or very little
### so here we will be filtering out areas where race_pop is smaller than 1000 to represent trend and proportion
## filter out areas with lil population
race <- raw_race |>
mutate(pct_white = whiteE/race_popE,
pct_asian = asianE/race_popE,
pct_black = blackE/race_popE,
pct_hispanic = hispanicE/race_popE) |>
filter(race_popE > 1000) # Keep numbers that are greater than 1000
Download the shapefile of the 2020 Neighborhood Tabulation Areas from NYC Planning
Unzip and save to your part2/data/raw/geo folder
https://www.nyc.gov/site/planning/data-maps/open-data/census-download-metadata.page
Download the geojson of the NYC Borough Boundaries from NYC Open Data
Move it to your main_data/data/raw/geo folder
https://data.cityofnewyork.us/City-Government/Borough-Boundaries/tqmj-j8zm
## import borough shapefiles from NYC Open Data
boros <- st_read("~/Downloads/Methods 1_R Learning/part2/data/raw/geo/BoroughBoundaries.geojson")
## import Neighborhood Tabulation Areas for NYC
nabes <- st_read("~/Downloads/Methods 1_R Learning/part2/data/raw/geo/nynta2020.shp")
white_city_map <- ggplot() +
geom_sf(data = race,
mapping = aes(fill = pct_white,
text = paste0(NAME, ":",
"<br>Percent White Only Population:",
percent(pct_white, accuracy=1))),
color = "transparent") +
theme_void() +
scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
direction = 1,
na.value = "#fafafa",#"transparent"
name="Percent White Only, non-Hispanic (%)",
labels=percent_format(accuracy = 1L)) +
labs(
title = "New York City, Population by Race at Census Tract Level",
caption = "Source: American Community Survey, 2016-20") +
geom_sf(data = nabes |> filter(BoroName == c("Brooklyn", 'Queens', 'Manhattan', 'Richmond', 'Bronx')),
color = "#4D84BB", fill = NA, lwd = 0.1) +
geom_sf(data = boros |> filter(boro_name == c("Brooklyn", 'Queens', 'Manhattan', 'Richmond', 'Bronx')),
color = "#244C95", fill = NA, lwd = .25)
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
## Warning: There was 1 warning in `stopifnot()`.
## ℹ In argument: `==...`.
## Caused by warning in `BoroName == c("Brooklyn", "Queens", "Manhattan", "Richmond", "Bronx")`:
## ! longer object length is not a multiple of shorter object length
ggplotly(white_city_map, tooltip = "text")
asian_city_map <- ggplot() +
geom_sf(data = race,
mapping = aes(fill = pct_asian,
text = paste0(NAME, ":",
"<br>Percent Asian Only Population:",
percent(pct_asian, accuracy=1))),
color = "transparent") +
theme_void() +
scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
direction = 1,
na.value = "#fafafa",#"transparent"
name="Percent Asian Only, non-Hispanic (%)",
labels=percent_format(accuracy = 1L)) +
labs(
title = "New York City, Population by Race at Census Tract Level",
caption = "Source: American Community Survey, 2016-20") +
geom_sf(data = nabes |> filter(BoroName %in% c("Brooklyn", 'Queens', 'Manhattan', 'Richmond', 'Bronx')),
color = "#4D84BB", fill = NA, lwd = 0.1) +
geom_sf(data = boros |> filter(boro_name %in% c("Brooklyn", 'Queens', 'Manhattan', 'Richmond', 'Bronx')),
color = "#244C95", fill = NA, lwd = .25)
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
ggplotly(asian_city_map, tooltip = "text")
black_city_map <- ggplot() +
geom_sf(data = race,
mapping = aes(fill = pct_black,
text = paste0(NAME, ":",
"<br>Percent Black Only Population:",
percent(pct_black, accuracy=1))),
color = "transparent") +
theme_void() +
scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
direction = 1,
na.value = "#fafafa",#"transparent"
name="Percent Black Only, non-Hispanic (%)",
labels=percent_format(accuracy = 1L)) +
labs(
title = "New York City, Population by Race at Census Tract Level",
caption = "Source: American Community Survey, 2016-20") +
geom_sf(data = nabes |> filter(BoroName %in% c("Brooklyn", 'Queens', 'Manhattan', 'Richmond', 'Bronx')),
color = "#4D84BB", fill = NA, lwd = 0.1) +
geom_sf(data = boros |> filter(boro_name %in% c("Brooklyn", 'Queens', 'Manhattan', 'Richmond', 'Bronx')),
color = "#244C95", fill = NA, lwd = .25)
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
ggplotly(black_city_map, tooltip = "text")
hispanic_city_map <- ggplot() +
geom_sf(data = race,
mapping = aes(fill = pct_hispanic,
text = paste0(NAME, ":",
"<br>Percent Hispanic Only Population:",
percent(pct_hispanic, accuracy=1))),
color = "transparent") +
theme_void() +
scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
direction = 1,
na.value = "#fafafa",#"transparent"
name="Percent Hispanic Only, non-Hispanic (%)",
labels=percent_format(accuracy = 1L)) +
labs(
title = "New York City, Population by Race at Census Tract Level",
caption = "Source: American Community Survey, 2016-20") +
geom_sf(data = nabes |> filter(BoroName == c("Brooklyn", 'Queens', 'Manhattan', 'Richmond', 'Bronx')),
color = "#4D84BB", fill = NA, lwd = 0.1) +
geom_sf(data = boros |> filter(boro_name == c("Brooklyn", 'Queens', 'Manhattan', 'Richmond', 'Bronx')),
color = "#244C95", fill = NA, lwd = .25)
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
## Warning: There was 1 warning in `stopifnot()`.
## ℹ In argument: `==...`.
## Caused by warning in `BoroName == c("Brooklyn", "Queens", "Manhattan", "Richmond", "Bronx")`:
## ! longer object length is not a multiple of shorter object length
ggplotly(hispanic_city_map, tooltip = "text")