Ingestion and Transformation of Data

NYCTracts <- st_read("Data/NYCTractsWData.gpkg")
## Reading layer `nyc_tracts_complete' from data source 
##   `C:\Users\student\OneDrive\.HUNTER\[4] SPRING 26\GTECH38520\work\project\Data\NYCTractsWData.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 2324 features and 78 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 913141.2 ymin: 120096.3 xmax: 1067338 ymax: 272752.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
NYCTracts <- st_transform(NYCTracts, 4326)
NYCNTA <- st_read("Data/nynta2020_26a/nynta2020.shp")
## Reading layer `nynta2020' from data source 
##   `C:\Users\student\OneDrive\.HUNTER\[4] SPRING 26\GTECH38520\work\project\Data\nynta2020_26a\nynta2020.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 262 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 913175.1 ymin: 120128.4 xmax: 1067383 ymax: 272844.3
## Projected CRS: NAD83 / New York Long Island (ftUS)
NYCNTA <- st_transform(NYCNTA, 4326)
NYCGardens <- read.csv("Data/communitygardens.csv")
NYCGardensclean <- NYCGardens %>% drop_na(Latitude)
NYCGardenssf <- st_as_sf(NYCGardensclean, coords = c("Longitude", "Latitude"), crs = 4326)
NYCGardenssf <- st_set_crs(NYCGardenssf, 4326)

Determination of Gentrification

As defined by the NYU Furman Center; Was a tract below median income in 2000 and did it experience rent growths higher than the city as a whole?

I used the following algorithm:

When running this, it was found that most gentrifying areas were in North Brooklyn and West Queens, while Upper Manhattan, the Bronx, and South Brooklyn were neither gentrifying nor high income.

NYCTractsGentrifying <- NYCTracts %>%
  mutate(Gentrifying = ifelse(
    # Was the median income of a tract above the median income of NYC?
    Median_Income_2000 > 38293,
    # If yes, mark as High-Income.
    "High-Income", 
    # Otherwise, did the rent increase higher than the citywide average during 2000 to 2022?
    # If yes, mark as gentrifying. If not, mark as not gentrifying.
    ifelse(Median_Rent_Change_2000_2022 > 1074,
       "Gentrifying",
       "Not Gentrifying")
  ))

NYCTractsGentrifying <- st_transform(NYCTractsGentrifying, 4326)

pal <- colorFactor(c("#900", "#00a", "#090"), domain = NYCTractsGentrifying$Gentrifying)
p_popup <- paste0("<strong>Gentrification Status: </strong>", 
                  NYCTractsGentrifying$Gentrifying)
leaflet(NYCTractsGentrifying) %>%
  addTiles(group = "OSM (default)") %>%
  addProviderTiles(providers$CartoDB.Positron, group = "Positron (minimal)") %>%
  addProviderTiles(providers$Esri.WorldImagery, group = "World Imagery (satellite)") %>%
  addPolygons(
    stroke = FALSE, # remove polygon borders
    fillColor = ~pal(NYCTractsGentrifying$Gentrifying), # set fill color with function from above and value
    fillOpacity = 0.7, 
    smoothFactor = 0.5, # make it nicer
    popup = p_popup) %>%
  addLegend("bottomright",  # location
            pal = pal, 
            values = ~Gentrifying,
            opacity = 1,
            labels = paste0(NYCTractsGentrifying$Gentrifying),
            title = 'Gentrification Status') %>% # legend title
  # Layers control
  addLayersControl(
    baseGroups = c(
      "OSM (default)",
      "Positron (minimal)",
      "World Imagery (satellite)"
    ))

Overlay of Gardens on Gentrification Data

As you can see, Community Gardens tend to concentrate in areas that are Gentrifying or Not Gentrifying, and do not necessarily concentrate in areas that were already high income.

pal <- colorFactor(c("#900", "#00a", "#090"), domain = NYCTractsGentrifying$Gentrifying)
p_popup <- paste0("<strong>Gentrification Status: </strong>", 
                  NYCTractsGentrifying$Gentrifying)
leaflet() %>%
  addTiles(group = "OSM (default)") %>%
  addProviderTiles(providers$CartoDB.Positron, group = "Positron (minimal)") %>%
  addProviderTiles(providers$Esri.WorldImagery, group = "World Imagery (satellite)")  %>%
  addPolygons(
    data = NYCTractsGentrifying,
    stroke = FALSE, # remove polygon borders
    fillColor = pal(NYCTractsGentrifying$Gentrifying), # set fill color with function from above and value
    fillOpacity = 0.7, 
    smoothFactor = 0.5,
    popup = p_popup)  %>%
  addLegend("bottomright",
            pal = pal, 
            values = NYCTractsGentrifying$Gentrifying,
            opacity = 1,
            labels = paste0(NYCTractsGentrifying$Gentrifying),
            title = 'Community Gardens and Gentrification Status <br> <p style="color:darkgreen;">Dark Green indicates a community garden.</p>')  %>%
  addCircleMarkers(data = NYCGardenssf,
             radius = 2,
             stroke = FALSE, fillOpacity = 1,
             color = "darkgreen",
             popup = NYCGardenssf$Garden.Name) %>%
  # Layers control
  addLayersControl(
    baseGroups = c(
      "OSM (default)",
      "Positron (minimal)",
      "World Imagery (satellite)"
    ))

Acreage of Community Gardens by NTA

The pattern continues if you look at Neighborhood Tabulation Areas. NTAs with a lot of non-high income tracts tend to also have high land use by Community Gardens.

GardensNTA <- NYCNTA %>%
  mutate(
    # How many acres in a Neighborhood is used by Community Gardens?
    total_size = sapply(st_intersects(., NYCGardenssf), function(x) {
      if(length(x) > 0) {
        sum(NYCGardenssf$Size[x], na.rm = TRUE)
      } else {
        0
      }
    }),
    # How many community gardens are in a particular neighborhood?
    point_count = sapply(st_intersects(., NYCGardenssf), length)
  )

pal <- colorNumeric(
  palette = "Greens",
  domain = GardensNTA$total_size)
p_popup <- paste0("<strong>Neighborhood Tabulation Area: </strong>", 
                  GardensNTA$NTAName, "<br>  <strong>Acres: </strong>", GardensNTA$total_size)
leaflet(GardensNTA) %>%
  addTiles(group = "OSM (default)") %>%
  addProviderTiles(providers$CartoDB.Positron, group = "Positron (minimal)") %>%
  addProviderTiles(providers$Esri.WorldImagery, group = "World Imagery (satellite)") %>%
  addPolygons(
    stroke = TRUE,
    weight = 0.1,
    color = "grey50",
    fillColor = ~pal(GardensNTA$total_size), # set fill color with function from above and value
    fillOpacity = 0.7, 
    smoothFactor = 0.5, # make it nicer
    popup = p_popup) %>%
  addLegend("bottomright",  # location
            pal = pal, 
            values = ~total_size,
            opacity = 1,
            labels = paste0(GardensNTA$total_size),
            title = 'Acres of Community Gardens') %>% # legend title
  # Layers control
  addLayersControl(
    baseGroups = c(
      "OSM (default)",
      "Positron (minimal)",
      "World Imagery (satellite)"
    ))

Acreage of Community Gardens by Typology

If one looks at a boxplot comparing the three typographies, Gentrifying and Non-Gentrifying neighborhood are far more similar to each other than High-Income neighborhoods, suggesting that the prevalence of community gardens is unique to communities that have already experienced low income. Gentrifying tracts tend to see less variety in the land used by community gardens compared to non-gentrifying tracts, suggesting that having a certain amount of land area dedicated to community gardens may have an effect, as gentrifying tracts have a higher median acreage of community gardens and a smaller minimum and fourth quantile. However, there needs to be more investigation into this question.

GentrifyingGardens <- NYCTractsGentrifying %>%
  mutate(
# How many acres in a tract is used by Community Gardens?
    total_size = sapply(st_intersects(., NYCGardenssf), function(x) {
      if(length(x) > 0) {
        sum(NYCGardenssf$Size[x], na.rm = TRUE)
      } else {
        0
      }
    }),
# How many community gardens are in a particular tract?
    point_count = sapply(st_intersects(., NYCGardenssf), length)
  )

# Filter out any gardens that are either in areas without a determination
# Or do not have any size calculation attached.
GentrifyingGardens_noNA <- GentrifyingGardens %>%
  filter(!is.na(Gentrifying)) %>%
  filter(!is.na(total_size))

stats_data <- GentrifyingGardens_noNA %>%
  group_by(Gentrifying) %>%
  summarise(
    median = median(total_size, na.rm = TRUE),
    q25 = quantile(total_size, 0.25, na.rm = TRUE),
    q75 = quantile(total_size, 0.75, na.rm = TRUE),
    n = n(),
    max_y = max(total_size, na.rm = TRUE)
  )

ggplot(GentrifyingGardens_noNA, 
                        aes(x = Gentrifying, y = total_size, fill = Gentrifying)) +
  geom_boxplot(alpha = 0.7, outlier.size = 1.5) +
  geom_text(data = stats_data,
            aes(x = Gentrifying, y = max_y * 0.95,
                label = paste("n =", n)),
            vjust = -0.5, size = 4) +
  scale_fill_manual(values = c("Gentrifying" = "#900",
                               "Not Gentrifying" = "#090",
                               "High-Income" = "#00a")) +
# use logarithmic scale due to many tracts having no gardens in them.
  scale_y_log10() +
  theme_minimal() +
  labs(title = "Land used by Community Gardens by Tract",
       subtitle = paste("Total tracts:", nrow(GentrifyingGardens_noNA)),
       x = "Tract Classification",
       y = "Total Size (acres)") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        legend.position = "none")