Analysis of race in New York City from the decennial census at the census-tract level.
· Variables: Decennial Census · Borough Boundaries: NYC Open Data · Neighborhood Tabulation Areas: NYC Planning
Import Libraries needed:
library(tidyverse)
library(tidycensus)
library(sf)
library(scales)
library(viridis)
pl_2020 <- load_variables(2020, "pl", cache = T)
pl_vars = c("P1_001N", "P2_002N", "P2_005N", "P2_006N", "P2_008N")
Raw Race NYC Data Frame at the census-tract level.
raw_race_nyc <- get_decennial(geography = "tract",
variables = pl_vars,
state = 36,
year = 2020,
output = "wide",
geometry = T) %>%
arrange(NAME)
Race NYC Data Frame: To be able to select only the five boroughs I separate the name into different columns, Filter by five boroughs of NYC. Rename columns, convert NaN in numbers, and calculate: · Percent Hispanic or Latino · Percent Black-alone, not Hispanic or Latino · Percent Asian-alone, not Hispanic or Latino · Percent White-alone, not Hispanic or Latino
race_nyc <- raw_race_nyc %>%
separate(NAME, into = c("tract", "county", "state"), sep = ", ") %>%
filter(county == "Queens County" | county == "Bronx County" | county == "Kings County" | county == "Richmond County" | county == "New York County") %>%
rename(tot_pop = P1_001N,
his_or_lat = P2_002N,
white_alone = P2_005N,
black_alone = P2_006N,
asian_alone = P2_008N) %>%
mutate(per_his_or_lat = round(his_or_lat/tot_pop, 3),
per_his_or_lat = ifelse(is.nan(per_his_or_lat), NA, per_his_or_lat),
per_white_alone = round(white_alone/tot_pop, 3),
per_white_alone = ifelse(is.nan(per_white_alone), NA, per_white_alone),
per_black_alone = round(black_alone/tot_pop, 3),
per_black_alone = ifelse(is.nan(per_black_alone), NA, per_black_alone),
per_asian_alone = round(asian_alone/tot_pop, 3),
per_asian_alone = ifelse(is.nan(per_asian_alone), NA, per_asian_alone))
boros <- st_read("~/Desktop/Fall 2022/methodos1/main_data/raw/geo/Borough Boundaries.geojson", quiet = TRUE)
nabes <- st_read("~/Desktop/Fall 2022/methodos1/main_data/raw/geo/nynta2020_22b/nynta2020.shp", quiet = TRUE)
race_nyc_2263 <- st_transform(race_nyc, 2263)
boros_2263 <- st_transform(boros, 2263)
SELECT THE FIELDS FROM NTA
nabes_selected <- nabes %>%
select(BoroName, BoroName, NTA2020, NTAName)
SPATIAL JOIN
race_nyc_nabes <- race_nyc_2263 %>%
st_join(nabes_selected,
left = TRUE,
join = st_intersects,
largest = TRUE)
with Borough Boundaries, I remove the census tract outline lwd = 0, add the Neighborhoods.
Map the different percentage of each racial category in NYC.
· plot: NYC map % White alone
ggplot() +
geom_sf(data = race_nyc_nabes, mapping = aes(fill = per_white_alone),
color = "#ffffff",
lwd = 0) +
theme_void() +
scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
direction = 1,
palette = "Reds",
na.value = "transparent",
name="Percent White alone (%)",
labels=percent_format(accuracy = 1L)) +
labs(title = "NYC, White alone by Census Tract",
caption = "Source: 2020 Decennial Census") +
geom_sf(data = boros_2263 %>% filter(boro_name == "Brooklyn" | boro_name == "Bronx" | boro_name == "Manhattan" | boro_name == "Staten Island" | boro_name == "Queens"),
color = "black", fill = NA, lwd = .1)
· plot: NYC % Hispanic or Latino
ggplot() +
geom_sf(data = race_nyc_nabes, mapping = aes(fill = per_his_or_lat),
color = "#ffffff",
lwd = 0) +
theme_void() +
scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
direction = 1,
na.value = "transparent",
name="Percent Hispanic or Latino (%)",
labels=percent_format(accuracy = 1L)) +
labs(title = "NYC, Hispanic or Latino by Census Tract",
caption = "Source: 2020 Decennial Census") +
geom_sf(data = boros_2263 %>% filter(boro_name == "Brooklyn" | boro_name == "Bronx" | boro_name == "Manhattan" | boro_name == "Staten Island" | boro_name == "Queens"),
color = "black", fill = NA, lwd = .1)
· plot: NYC map % Black alone
ggplot() +
geom_sf(data = race_nyc_nabes, mapping = aes(fill = per_black_alone),
color = "#ffffff",
lwd = 0) +
theme_void() +
scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
direction = 1,
palette = "Greens",
na.value = "transparent",
name="Percent Black alone (%)",
labels=percent_format(accuracy = 1L)) +
labs(title = "NYC, Black alone by Census Tract",
caption = "Source: 2020 Decennial Census") +
geom_sf(data = boros_2263 %>% filter(boro_name == "Brooklyn" | boro_name == "Bronx" | boro_name == "Manhattan" | boro_name == "Staten Island" | boro_name == "Queens"),
color = "black", fill = NA, lwd = .1)
· plot: NYC map % Asian alone
ggplot() +
geom_sf(data = race_nyc_nabes, mapping = aes(fill = per_asian_alone),
color = "#ffffff",
lwd = 0) +
theme_void() +
scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
direction = 1,
palette = "Purples",
na.value = "transparent",
name="Percent Asian alone (%)",
labels=percent_format(accuracy = 1L)) +
labs(title = "NYC, Asian alone by Census Tract",
caption = "Source: 2020 Decennial Census") +
geom_sf(data = boros_2263 %>% filter(boro_name == "Brooklyn" | boro_name == "Bronx" | boro_name == "Manhattan" | boro_name == "Staten Island" | boro_name == "Queens"),
color = "black", fill = NA, lwd = .1)
I consider the visualisation of the distribution of each racial category on the map of NYC very powerful. The confirmation of spatial-racial segregation in the city is supported by these maps.
Calculated summary statistics for the five Boroughs.
st_drop_geometry(race_nyc_nabes) %>%
group_by(county) %>%
summarise(Borough = first(BoroName),
`Est. Total Population` = sum(tot_pop),
`Est. Total Hispanic or Latino Population` = sum(his_or_lat),
`Est. Total White Alone Population` = sum(white_alone),
`Est. Total Black Alone Population` = sum(black_alone),
`Est. Total Asian Alone Population` = sum(asian_alone)) %>%
mutate(`Est. Percent Hispanic or Latino Population` = percent(`Est. Total Hispanic or Latino Population`/`Est. Total Population`, accuracy = 1L),
`Est. Total White Alone Population` = percent(`Est. Total White Alone Population`/`Est. Total Population`, accuracy = 1L),
`Est. Total Black Alone Population` = percent(`Est. Total Black Alone Population`/`Est. Total Population`, accuracy = 1L),
`Est. Total Asian Alone Population` = percent(`Est. Total Asian Alone Population`/`Est. Total Population`, accuracy = 1L))