R Notebook

This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

Aim

Understand the origin of the population in Queens, particularly in Corona

Data

· Variables: American Community Survey · Borough Boundaries: NYC Open Data · Neighborhood Tabulation Areas: NYC Planning

Plot the data

This plots show the percentage of the distribution of Mexican, Ecuadorian and Colombian Population in Queens, NY.

library(tidyverse)
library(tidycensus)
library(sf)
library(scales)
library(viridis)

American Community Survey: Variables

acs201620 <- load_variables(2020, "acs5", cache = T)
raw_his_lat_origin <- get_acs(geography = "tract", 
                        variables = c(total_pop_his_lat_origin = "B03001_001",
                                      pop_not_his_lat_origin = "B03001_002",
                                      pop_his_lat_origin = "B03001_003",
                                      mex_pop = "B03001_004",
                                      ecu_pop = "B03001_021",
                                      col_pop = "B03001_020"), 
                        state='NY',
                        county = 'Queens',
                        geometry = T, 
                        year = 2020,
                        output = "wide")

his_lat_origin <- raw_his_lat_origin %>% 
  mutate(pct_mex_his_lat_origin = mex_popE/total_pop_his_lat_originE,
         pct_ecu_his_lat_origin = ecu_popE/total_pop_his_lat_originE,
         pct_col_his_lat_origin = col_popE/total_pop_his_lat_originE)

Queens maps

ggplot()  + 
  geom_sf(data = his_lat_origin, mapping = aes(fill = pct_mex_his_lat_origin), lwd = 0) +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .1, .2, .3, .4, .5),
                       direction = 1,
                       na.value = "#fafafa",
                       name="Percent Mexican Population Origin (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(title = "Queens, Mexican Population Origin by Census Tract",
       caption = "Source: American Community Survey, 2016-20")

ggplot()  + 
  geom_sf(data = his_lat_origin, mapping = aes(fill = pct_ecu_his_lat_origin), lwd = 0) +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .1, .2, .3, .4, .5),
                       direction = 1,
                       na.value = "#fafafa",
                       name="Percent Ecuatorian Population Origin (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(title = "Queens, Ecuadorian Population Origin by Census Tract",
       caption = "Source: American Community Survey, 2016-20")

ggplot()  + 
  geom_sf(data = his_lat_origin, mapping = aes(fill = pct_col_his_lat_origin), lwd = 0) +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .05, .1, .15, .2, .25),
                       direction = 1,
                       na.value = "#fafafa",
                       name="Percent Colombian Population Origin (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(title = "Queens, Colombian Population Origin by Census Tract",
       caption = "Source: American Community Survey, 2016-20")

NYC Open Data: Borough Boundaries

boros <- st_read("~/Desktop/Fall 2022/methodos1/main_data/raw/geo/Borough Boundaries.geojson", quiet = TRUE)

NYC Planning: Neighborhood Tabulation Areas

nabes <- st_read("~/Desktop/Fall 2022/methodos1/main_data/raw/geo/nynta2020_22b/nynta2020.shp", quiet = TRUE)

his_lat_origin_2263 <- st_transform(his_lat_origin, 2263)

Select fields

nabes_selected <- nabes %>%
  select(BoroName, BoroName, NTA2020, NTAName)

Spatial Join

his_lat_origin_nabes <- his_lat_origin_2263 %>%
  st_join(nabes_selected, 
          left = TRUE,
          join = st_intersects,
          largest = TRUE)

his_lat_origin_corona <- his_lat_origin_nabes %>% 
  filter(NTAName == "Corona" | NTAName == "North Corona" | NTAName == "Flushing Meadows-Corona Park")

Corona maps

ggplot()  +
  geom_sf(data = his_lat_origin_corona, mapping = aes(fill = pct_mex_his_lat_origin), 
          color = "#ffffff",
          lwd = 0) +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .05, .1, .15, .2, .25, .3, .35, .4, .45, .5),
                       direction = 1,
                       na.value = "transparent",
                       name="Percent Mexican Population Origin (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(title = "Corona Queens, Percent Mexican Population Origin by Census Tract",
       caption = "Source: American Community Survey, 2016-20") + 
  geom_sf(data = nabes %>% filter(NTAName == "Corona" | NTAName == "North Corona"),
          color = "gray", fill = NA, lwd = 0.25)

ggplot()  +
  geom_sf(data = his_lat_origin_corona, mapping = aes(fill = pct_ecu_his_lat_origin), 
          color = "#ffffff",
          lwd = 0) +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .1, .2, .3, .4, .5),
                       direction = 1,
                       na.value = "transparent",
                       name="Percent Ecuadorian Population Origin (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(title = "Corona Queens, Percent Ecuadorian Population Origin by Census Tract",
       caption = "Source: American Community Survey, 2016-20") + 
  geom_sf(data = nabes %>% filter(NTAName == "Corona" | NTAName == "North Corona"),
          color = "gray", fill = NA, lwd = 0.25)

ggplot()  +
  geom_sf(data = his_lat_origin_corona, mapping = aes(fill = pct_col_his_lat_origin), 
          color = "#ffffff",
          lwd = 0) +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .02, .04, .06, .08, 0.1),
                       direction = 1,
                       na.value = "transparent",
                       name="Percent Colombian Population Origin (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(title = "Corona Queens, Percent Colombian Population Origin by Census Tract",
       caption = "Source: American Community Survey, 2016-20") + 
  geom_sf(data = nabes %>% filter(NTAName == "Corona" | NTAName == "North Corona"),
          color = "gray", fill = NA, lwd = 0.25)

Queens Statistics

Compare the percentage of Hispanic or Latino origin among the different neighborhoods in Queens. In addition the percentage of Mexican, Ecuadorian and Colombian population.

st_drop_geometry(his_lat_origin_nabes) %>% 
  group_by(NTAName) %>%
  summarise(NTAName = first(NTAName),
            `Est. Total Population` = (sum(pop_his_lat_originE)+sum(pop_not_his_lat_originE)),
            `Est. Total Population His or Lat Origin` = sum(pop_his_lat_originE),
            `Est. Total Mexican Population` = sum(mex_popE),
            `Est. Total Ecuadorian Population` = sum(ecu_popE),
            `Est. Total Colombian Population` = sum(col_popE)) %>%
  mutate(`Est. Percent His or Lat Origin` = percent(`Est. Total Population His or Lat Origin`/`Est. Total Population`, accuracy = 1L),
        `Est. Percent Mexican Population` = percent(`Est. Total Mexican Population`/`Est. Total Population`, accuracy = 1L),
        `Est. Percent Ecuadorian Population` = percent(`Est. Total Ecuadorian Population`/`Est. Total Population`, accuracy = 1L),
        `Est. Percent Colombian Population` = percent(`Est. Total Colombian Population`/`Est. Total Population`, accuracy = 1L)) %>% 
  filter(!`Est. Total Population` == 0)

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.