Aim

The purpose of the product is to:

Shows the percentage of four different races in New York County based on ACS 2020 Data. The four races chosen were: “White alone”, “Black alone”, “Asian alone”, and “Hispanic or Latino”.

Data frames, histograms, and ggplots were used.

Steps

The first dataframes created were to show the total estimated and median population for each Race. Following this, the second frames produced were to find the percentages of said races in New York County.

library(tidycensus)
library(tidyverse)
library(sf)
library(viridis)
library(scales)
library(RColorBrewer)

“Black only”

black_pop_nyc_raw <- get_acs(geography = "tract",
                               variables = c(black = "B02001_003", total_pop = "B02001_001"),
                               state = "NY", 
                               county = "New York County",
                               output = "wide",
                               geometry = T)
black_pop <- black_pop_nyc_raw |>
  mutate(percent_black = blackE/total_popE)

“White only”

white_pop_nyc_raw <- get_acs(geography = "tract",
                                variables = c(white = "B02001_002", total_pop = "B02001_001"),
                                state = "NY", 
                                county = "New York County",
                                output = "wide",
                                geometry = T)
white_pop <- white_pop_nyc_raw |> 
  mutate(percent_white = whiteE/total_popE)

“Asian only”

asian_pop_nyc_raw <- get_acs(geography = "tract",
                             variables = c(asian = "B02001_005", total_pop = "B02001_001"),
                             state = "NY", 
                             county = "New York County",
                             output = "wide",
                             geometry = T)
asian_pop <- asian_pop_nyc_raw |> 
  mutate(percent_asian = asianE/total_popE)

“Hispanic or Latino”

hispanic_nyc_raw <- get_acs(geography = "tract",
                             variables = c(hispanic = "B03002_012", total_pop = "B02001_001"),
                             state = "NY", 
                             county = "New York County",
                             output = "wide",
                             geometry = T)
hispanic_pop <- hispanic_nyc_raw |> 
  mutate(percent_hispanic = hispanicE/total_popE)

Steps (cont.)

After this, we make remove the N/A’s from the data then make a map for each race to compare the data found in the data frames.

black_pop <- black_pop_nyc_raw |>
  mutate(percent_black = blackE/total_popE, 
  ct_black = ifelse(is.nan(percent_black), NA, percent_black))

ggplot(data = black_pop,
       mapping = aes(fill = percent_black)) + 
  geom_sf() +
  theme_void() +
  scale_fill_fermenter(breaks=c(0, .10, .20, .30, .40, .50, .60, .70, .80, .90),
                       palette = "Blues",
                       direction = 1,
                       na.value = "#fafafa",
                       name= "Percentage of Black Race (%)",
                       labels = percent_format(accuracy = 1L)) +
  labs(title = "Black Percentage of Population in New York County",
       caption = "Source: American Community Survey, 2020")

white_pop <- white_pop_nyc_raw |>
  mutate(percent_white = whiteE/total_popE, 
         ct_white = ifelse(is.nan(percent_white), NA, percent_white))

ggplot(data = white_pop,
       mapping = aes(fill = percent_white)) + 
  geom_sf() +
  theme_void() +
  scale_fill_fermenter(breaks=c(0, .10, .20, .30, .40, .50, .60, .70, .80, .90),
                       palette = "BuPu",
                       direction = 1,
                       na.value = "#fafafa",
                       name= "Percentage of White Race (%)",
                       labels = percent_format(accuracy = 1L)) +
  labs(title = "White Percentage of Population in New York County",
       caption = "Source: American Community Survey, 2020")

asian_pop <- asian_pop_nyc_raw |> 
  mutate(percent_asian = asianE/total_popE, 
         ct_asian = ifelse(is.nan(percent_asian), NA, percent_asian))

ggplot(data = asian_pop,
       mapping = aes(fill = percent_asian)) + 
  geom_sf() +
  theme_void() +
  scale_fill_fermenter(breaks=c(0, .10, .20, .30, .40, .50, .60, .70, .80, .90),
                       palette = "BuGn",
                       direction = 1,
                       na.value = "#fafafa",
                       name= "Percentage of Asian Race (%)",
                       labels = percent_format(accuracy = 1L)) +
  labs(title = "Asian Percentage of Population in New York County",
       caption = "Source: American Community Survey, 2020")

hispanic_pop <- hispanic_nyc_raw |> 
  mutate(percent_hispanic = hispanicE/total_popE, 
         ct_hispanic = ifelse(is.nan(percent_hispanic), NA, percent_hispanic))

ggplot(data = hispanic_pop,
       mapping = aes(fill = percent_hispanic)) + 
  geom_sf() +
  theme_void() +
  scale_fill_fermenter(breaks=c(0, .10, .20, .30, .40, .50, .60, .70, .80, .90),
                       palette = "OrRd",
                       direction = 1,
                       na.value = "#fafafa",
                       name= "Percentage of Hispanic or Latino Race (%)",
                       labels = percent_format(accuracy = 1L)) +
  labs(title = "Hispanic or Latino % of Population in New York County",
       caption = "Source: American Community Survey, 2020")

Methods

Borough boundaries are represented with a shapefile that was downloaded from NYC Open Data.

Census tract boundaries and data are from the 2016-2020 5-year American Community, accessed with the tidyverse R package. Census data includes:

The proportion of people for each race within each census tract was calculated and the census tracts with no residents (NA) were removed.

Results

Looking at the maps created, we see that visually the “White only” race holds a large majority of the population in New York County. However, the other three groups analyzed seem to congregate in specific areas of the island. The “Black only” race holds its highest percentage in the North East part of Manhattan (East and Central Harlem) while the “Asian only” race is most prevalent in Chinatown and the Lower East Side. The “Hispanic or Latino” race has a larger pocket of its population in Washington Heights but also spreads to lower Manhattan and Harlem.

According to summary statistics, the races analyzed in this study make up the following percentages in New York County: “White only” equates to 54% of the population, “Black only” equates to 14% of the population, “Asian only” equates to 12% of the population, and “Hispanic or Latino” equates to 26% of the population.