R Notebook - Asthma & Air Pollution NYC, South Bronx Focus

Add code chunk display options

load library

library(tidyverse)
library(sf)
library(scales)
library(viridis)
library(ggplot2)
library(plotly)
library(RSocrata)
library(knitr)
library(DT)
library(dplyr)
options(tigris_use_cache = TRUE)
options(scipen = 999)
library(tidycensus)
library(RColorBrewer)

read in data on air pollution (nitrogen dioxide and pm2.5) & uhf42 (united hospital fund neighborhoods) spatial files

data <- read.socrata("https://data.cityofnewyork.us/resource/c3uy-2p5r.csv")


uhf42_raw <- st_read("~/Documents/F23/Methods 1/methods1/part2/data/raw/UHF_42_DOHMH_2009.shp")

## Reading layer `UHF_42_DOHMH_2009' from data source 
##   `/Users/izzygroenewegen/Documents/F23/Methods 1/methods1/part2/data/raw/UHF_42_DOHMH_2009.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 43 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 913090.8 ymin: 120053.5 xmax: 1067310 ymax: 272932
## Projected CRS: NAD83 / New York Long Island (ftUS)

filter to a time frame that all south bronx neighborhoods are accounted for. select time frame ‘2018-19’ to ensure impacts of covid on transport patterns don’t sway data. ensure all uhf42 neighborhoods spelt the same before making the join.

data_2018_19 <- data|>
  filter(time_period== "Winter 2018-19")|>
  filter(geo_type_name == "UHF42")|>
  mutate(across('geo_place_name', str_replace, 'Fordham - Bronx Pk', 'Fordham - Bronx Park'),
         across('geo_place_name', str_replace, 'Crotona -Tremont', 'Crotona - Tremont'),
         across('geo_place_name', str_replace, 'Washington Heights', 'Washington Heights - Inwood'),
         across('geo_place_name', str_replace, 'Rockaways', 'Rockaway'),
         across('geo_place_name', str_replace, 'Downtown - Heights - Slope', 'Downtown  - Heights - Slope'),
         across('geo_place_name', str_replace, 'Greenwich Village - SoHo', 'Greenwich Village - Soho'),
         across('geo_place_name', str_replace, 'Gramercy Park - Murray Hill', 'Gramercy Park -  Murray Hill'))

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across("geo_place_name", str_replace, "Fordham - Bronx Pk",
##   "Fordham - Bronx Park")`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))

join air pollution data to uhf42 spatial data so it can be mapped

uhf42 <- uhf42_raw|>
  rename(geo_place_name = UHF_NEIGH)|>
  left_join(data_2018_19, by = "geo_place_name")|>
  select(BOROUGH, geo_place_name, time_period, name, measure_info, measure, data_value, geometry)

read in neighborhood and borough data in case needed

boros <- st_read("/Users/izzygroenewegen/Documents/F23/Methods 1/methods1/part2/data/raw/geo/Borough_Boundaries.geojson")

## Reading layer `Borough_Boundaries' from data source 
##   `/Users/izzygroenewegen/Documents/F23/Methods 1/methods1/part2/data/raw/geo/Borough_Boundaries.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 5 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49613 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS 84

neighbs <- st_read("/Users/izzygroenewegen/Documents/F23/Methods 1/methods1/part2/data/raw/geo/nynta2020_23c/nynta2020.shp")

## Reading layer `nynta2020' from data source 
##   `/Users/izzygroenewegen/Documents/F23/Methods 1/methods1/part2/data/raw/geo/nynta2020_23c/nynta2020.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 262 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 913175.1 ymin: 120128.4 xmax: 1067383 ymax: 272844.3
## Projected CRS: NAD83 / New York Long Island (ftUS)

separate no2 and pm2.5 data by creating a separate dataframe for each

uhf42_pm25 <- uhf42|>
  filter(name == "Fine particles (PM 2.5)")|>
  rename(`mean PM 2.5 mcg/m3` = data_value)|>
  select(BOROUGH, geo_place_name, time_period,`mean PM 2.5 mcg/m3`, geometry)

uhf42_no2 <- uhf42|>
  filter(name == "Nitrogen dioxide (NO2)")|>
  rename(`mean no2 ppb` = data_value)|>
  select(BOROUGH, geo_place_name, time_period,`mean no2 ppb`, geometry)

make a map for each pollutant and it’s concentration accross nyc

ggplot(data = uhf42_pm25, mapping = aes(fill = `mean PM 2.5 mcg/m3`)) +
  geom_sf(lwd=0.1) +
  theme_void()+
  scale_fill_distiller(breaks=c(0,.2,.4,.6,.8,1),
                       palette = "OrRd",
                       direction = 1,
                       name="mean pm2.5 particulate matter") +
  labs(
    title = "NYC, PM 2.5 Pollution per UHF42",
    caption = "NYC Open Data 2018-19")

ggplot(data = uhf42_no2, mapping = aes(fill = `mean no2 ppb`)) +
  geom_sf(lwd=0.1) +
  theme_void()+
  scale_fill_distiller(breaks=c(0,.2,.4,.6,.8,1),
                       palette = "OrRd",
                       direction = 1,
                       name="mean no2 parts per billion") +
  labs(
    title = "NYC, Nitrogen Dioxide Pollution per UHF42",
    caption = "NYC Open Data 2018-19")

I am surprised to see that both concentrations are highest in Chelsea-Clinton & Gramercy Park - Murray Hill.

To focus more on the Bronx, I filter the dataframe so it is only for uhg42 neighborhoods in the Bronx. I then separate pm2.5 and no2 data into two separate data frames. I then drop the geometry of one so I can rejoin them as one data frame with separate columns for pm2.5 and no2.

#make a bronx specific table for air pollution
uhf42_bronx <- uhf42|>
  filter(BOROUGH == "Bronx")

#split into pm2.5 and no2
uhf42_bronx_pm25 <- uhf42|>
  filter(name == "Fine particles (PM 2.5)")|>
  rename(`mean PM 2.5 mcg/m3` = data_value)|>
  select(BOROUGH, geo_place_name, time_period,`mean PM 2.5 mcg/m3`, geometry)|>
  filter(BOROUGH == "Bronx")


uhf42_bronx_no2 <- uhf42|>
  filter(name == "Nitrogen dioxide (NO2)")|>
  rename(`mean no2 ppb` = data_value)|>
  select(BOROUGH, geo_place_name, time_period,`mean no2 ppb`, geometry)|>
  filter(BOROUGH == "Bronx")

#drop geometry of pm2.5 data to join no2 and pm25 together in one table
uhf42_bronx_pm25_stats <- st_drop_geometry(uhf42_bronx_pm25)|>
  select(geo_place_name, `mean PM 2.5 mcg/m3`)

#new table with no2 and pm25 data for bronx side by side
uhf42_bronx_no2_pm25 <- uhf42_bronx_no2|>
  left_join(uhf42_bronx_pm25_stats, by = "geo_place_name")

I make two separate maps showing pm2.5 and no2 pollution respectively across the Bronx.

#map of pm2.25 pollution in bronx
ggplot(data = uhf42_bronx_pm25, mapping = aes(fill = `mean PM 2.5 mcg/m3`)) +
  geom_sf(lwd=0.1) +
  theme_void()+
  scale_fill_distiller(breaks=c(0,.2,.4,.6,.8,1),
                       palette = "OrRd",
                       direction = 1,
                       name="mean pm2.5 particulate matter") +
  labs(
    title = "Bronx, PM 2.5 Pollution per UHF42",
    caption = "NYC Open Data 2018-19")

#map of no2 pollution in bronx
ggplot(data = uhf42_bronx_no2, mapping = aes(fill = `mean no2 ppb`)) +
  geom_sf(lwd=0.1) +
  theme_void()+
  scale_fill_distiller(breaks=c(0,.2,.4,.6,.8,1),
                       palette = "OrRd",
                       direction = 1,
                       name="mean no2 parts per billion") +
  labs(
    title = "Bronx, Nitrogen Dioxide Pollution per UHF42",
    caption = "NYC Open Data 2018-19")

Read in Asthma data to observe the relationship between pollution and asthma rates. Please note that asthma data is from 2019, while pollution data is from winter 2018-2019.

read_csv("/Users/izzygroenewegen/Documents/F23/Methods 1/methods1/part2/data/raw/NYC_EH_Data Portal_Asthma_ed_UHF42.csv")

## # A tibble: 42 × 8
##     Time GeoTypeDesc GeoID GeoRank Geography              Age-adjusted rate pe…¹
##    <dbl> <chr>       <dbl>   <dbl> <chr>                                   <dbl>
##  1  2020 UHF 42        101       4 Kingsbridge - Riverda…                   25.4
##  2  2020 UHF 42        102       4 Northeast Bronx                          67  
##  3  2020 UHF 42        103       4 Fordham - Bronx Pk                       75.6
##  4  2020 UHF 42        104       4 Pelham - Throgs Neck                     65.6
##  5  2020 UHF 42        105       4 Crotona -Tremont                        129. 
##  6  2020 UHF 42        106       4 High Bridge - Morrisa…                  150. 
##  7  2020 UHF 42        107       4 Hunts Point - Mott Ha…                  150. 
##  8  2020 UHF 42        201       4 Greenpoint                               25  
##  9  2020 UHF 42        202       4 Downtown - Heights - …                   35.7
## 10  2020 UHF 42        203       4 Bedford Stuyvesant - …                   90.5
## # ℹ 32 more rows
## # ℹ abbreviated name: ¹`Age-adjusted rate per 10,000`
## # ℹ 2 more variables: `Estimated annual rate per 10,000` <dbl>, Number <dbl>

asthma_ed_raw <- read_csv("/Users/izzygroenewegen/Documents/F23/Methods 1/methods1/part2/data/raw/NYC_EH_Data Portal_Asthma_ed_UHF42.csv")

ensure all uhf42 neighborhoods spelt the same before making the join, then join asthma data to pollution data.

asthma_ed_uhf42 <- asthma_ed_raw|>
  rename(geo_place_name = Geography)|>
  mutate(across('geo_place_name', str_replace, 'Fordham - Bronx Pk', 'Fordham - Bronx Park'),
         across('geo_place_name', str_replace, 'Crotona -Tremont', 'Crotona - Tremont'),
         across('geo_place_name', str_replace, 'Washington Heights', 'Washington Heights - Inwood'),
         across('geo_place_name', str_replace, 'Rockaways', 'Rockaway'),
         across('geo_place_name', str_replace, 'Downtown - Heights - Slope', 'Downtown  - Heights - Slope'),
         across('geo_place_name', str_replace, 'Greenwich Village - SoHo', 'Greenwich Village - Soho'),
         across('geo_place_name', str_replace, 'Gramercy Park - Murray Hill', 'Gramercy Park -  Murray Hill'))

#join asthma data to pollution data 
uhf42_asthma_pollution <- uhf42|>
  left_join(asthma_ed_uhf42, by = "geo_place_name")

Create a map to show asthma rates across nyc

ggplot(data = uhf42_asthma_pollution, mapping = aes(fill = `Estimated annual rate per 10,000`)) +
  geom_sf(lwd=0.1) +
  theme_void()+
  scale_fill_distiller(breaks=c(0,.2,.4,.6,.8,1),
                       palette = "YlOrRd",
                       direction = 1,
                       name="Asthma Emergency Visits, Estimated annual rate per 10,000") +
  labs(
    title = "Emergency Visits for Asthma per 10,000 Adults",
    caption = "Source: NYC EH Data Portal Asthma Emergency Department 2019")

Create a dataframe combining data on asthma and pollution in the South Bronx. Use this data to make a map that shows asthma rates and includes information on pollution for each uhf42.

uhf42_asthma_pollution_bronx <- uhf42_bronx_no2_pm25|>
  left_join(asthma_ed_uhf42, by = "geo_place_name")

#create a map showing asthma rates in the bronx 
ggplot(data = uhf42_asthma_pollution_bronx, mapping = aes(fill = `Estimated annual rate per 10,000`)) +
  geom_sf(lwd=0.1) +
  theme_void()+
  scale_fill_distiller(breaks=c(0,.2,.4,.6,.8,1),
                       palette = "YlOrRd",
                       direction = 1,
                       name="Asthma Emergency Visits, Estimated annual rate per 10,000") +
  labs(
    title = "Bronx, Emergency Visits for Asthma & Air Pollution Concentration",
    caption = "Sources: NYC EH Data Portal Asthma Emergency Department 2019 & 
    NYC Open Data Air Pollution 2018-19")

#add tool tip
ufh42_asthma_pollution_bronx_mapped <- ggplot()+
  geom_sf(data = uhf42_asthma_pollution_bronx, 
  mapping = aes(fill = `Estimated annual rate per 10,000`,
                text = paste0(geo_place_name, ":",
                              "<br>Annual ED visit rate per 10k:",
                              (`Estimated annual rate per 10,000`),
                              "<br>N02 Levels:",
                              (`mean no2 ppb`),
                              "<br>PM2.5 Levels:",
                              (`mean PM 2.5 mcg/m3`))),
  color = "transparent") +
  theme_void()+
  scale_fill_distiller(breaks=c(0,.2,.4,.6,.8,1),
                       palette = "YlOrRd",
                       direction = 1,
                       name="Asthma Emergency Visits, Estimated annual rate per 10,000") +
  labs(
    title = "Bronx, Emergency Visits for Asthma per 10,000 Adults",
    caption = "Sources: NYC EH Data Portal Asthma Emergency Department 2019 & 
    NYC Open Data Air Pollution 2018-19")

ggplotly(ufh42_asthma_pollution_bronx_mapped, tooltip = "text")

Using the above tool tip, one can observe the positive correlation between pollution concentration and asthma rates. I attempt to show this correlation on a scatter plot below.

bronx_asthma_pollution_scatter <- ggplot(data = uhf42_asthma_pollution_bronx, aes(x = `pollution rate`, y = `Estimated annual rate per 10,000`))+
                     geom_point(aes(x = `mean PM 2.5 mcg/m3`, y = `Estimated annual rate per 10,000`), colour=alpha('red', 0.75))+
                     geom_point(aes(x = `mean no2 ppb`, y = `Estimated annual rate per 10,000`), colour=alpha('blue', 0.75))+
  labs(x = "Red = PM 2.5 mcg/m3 / Blue = N02 ppb", y = "ED visits per 10k",
       title = "Asthma + Pollution Bronx",
       caption = "Sources: Open Data NY (data.ny.gov)") +
  theme_classic()

bronx_asthma_pollution_scatter