Add code chunk display options
load library
library(tidyverse)
library(sf)
library(scales)
library(viridis)
library(ggplot2)
library(plotly)
library(RSocrata)
library(knitr)
library(DT)
library(dplyr)
options(tigris_use_cache = TRUE)
options(scipen = 999)
library(tidycensus)
library(RColorBrewer)
read in data on air pollution (nitrogen dioxide and pm2.5) & uhf42 (united hospital fund neighborhoods) spatial files
data <- read.socrata("https://data.cityofnewyork.us/resource/c3uy-2p5r.csv")
uhf42_raw <- st_read("~/Documents/F23/Methods 1/methods1/part2/data/raw/UHF_42_DOHMH_2009.shp")
## Reading layer `UHF_42_DOHMH_2009' from data source
## `/Users/izzygroenewegen/Documents/F23/Methods 1/methods1/part2/data/raw/UHF_42_DOHMH_2009.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 43 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 913090.8 ymin: 120053.5 xmax: 1067310 ymax: 272932
## Projected CRS: NAD83 / New York Long Island (ftUS)
filter to a time frame that all south bronx neighborhoods are accounted for. select time frame ‘2018-19’ to ensure impacts of covid on transport patterns don’t sway data. ensure all uhf42 neighborhoods spelt the same before making the join.
data_2018_19 <- data|>
filter(time_period== "Winter 2018-19")|>
filter(geo_type_name == "UHF42")|>
mutate(across('geo_place_name', str_replace, 'Fordham - Bronx Pk', 'Fordham - Bronx Park'),
across('geo_place_name', str_replace, 'Crotona -Tremont', 'Crotona - Tremont'),
across('geo_place_name', str_replace, 'Washington Heights', 'Washington Heights - Inwood'),
across('geo_place_name', str_replace, 'Rockaways', 'Rockaway'),
across('geo_place_name', str_replace, 'Downtown - Heights - Slope', 'Downtown - Heights - Slope'),
across('geo_place_name', str_replace, 'Greenwich Village - SoHo', 'Greenwich Village - Soho'),
across('geo_place_name', str_replace, 'Gramercy Park - Murray Hill', 'Gramercy Park - Murray Hill'))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across("geo_place_name", str_replace, "Fordham - Bronx Pk",
## "Fordham - Bronx Park")`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
join air pollution data to uhf42 spatial data so it can be mapped
uhf42 <- uhf42_raw|>
rename(geo_place_name = UHF_NEIGH)|>
left_join(data_2018_19, by = "geo_place_name")|>
select(BOROUGH, geo_place_name, time_period, name, measure_info, measure, data_value, geometry)
read in neighborhood and borough data in case needed
boros <- st_read("/Users/izzygroenewegen/Documents/F23/Methods 1/methods1/part2/data/raw/geo/Borough_Boundaries.geojson")
## Reading layer `Borough_Boundaries' from data source
## `/Users/izzygroenewegen/Documents/F23/Methods 1/methods1/part2/data/raw/geo/Borough_Boundaries.geojson'
## using driver `GeoJSON'
## Simple feature collection with 5 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49613 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS 84
neighbs <- st_read("/Users/izzygroenewegen/Documents/F23/Methods 1/methods1/part2/data/raw/geo/nynta2020_23c/nynta2020.shp")
## Reading layer `nynta2020' from data source
## `/Users/izzygroenewegen/Documents/F23/Methods 1/methods1/part2/data/raw/geo/nynta2020_23c/nynta2020.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 262 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 913175.1 ymin: 120128.4 xmax: 1067383 ymax: 272844.3
## Projected CRS: NAD83 / New York Long Island (ftUS)
separate no2 and pm2.5 data by creating a separate dataframe for each
uhf42_pm25 <- uhf42|>
filter(name == "Fine particles (PM 2.5)")|>
rename(`mean PM 2.5 mcg/m3` = data_value)|>
select(BOROUGH, geo_place_name, time_period,`mean PM 2.5 mcg/m3`, geometry)
uhf42_no2 <- uhf42|>
filter(name == "Nitrogen dioxide (NO2)")|>
rename(`mean no2 ppb` = data_value)|>
select(BOROUGH, geo_place_name, time_period,`mean no2 ppb`, geometry)
make a map for each pollutant and it’s concentration accross nyc
ggplot(data = uhf42_pm25, mapping = aes(fill = `mean PM 2.5 mcg/m3`)) +
geom_sf(lwd=0.1) +
theme_void()+
scale_fill_distiller(breaks=c(0,.2,.4,.6,.8,1),
palette = "OrRd",
direction = 1,
name="mean pm2.5 particulate matter") +
labs(
title = "NYC, PM 2.5 Pollution per UHF42",
caption = "NYC Open Data 2018-19")
ggplot(data = uhf42_no2, mapping = aes(fill = `mean no2 ppb`)) +
geom_sf(lwd=0.1) +
theme_void()+
scale_fill_distiller(breaks=c(0,.2,.4,.6,.8,1),
palette = "OrRd",
direction = 1,
name="mean no2 parts per billion") +
labs(
title = "NYC, Nitrogen Dioxide Pollution per UHF42",
caption = "NYC Open Data 2018-19")
I am surprised to see that both concentrations are highest in Chelsea-Clinton & Gramercy Park - Murray Hill.
To focus more on the Bronx, I filter the dataframe so it is only for uhg42 neighborhoods in the Bronx. I then separate pm2.5 and no2 data into two separate data frames. I then drop the geometry of one so I can rejoin them as one data frame with separate columns for pm2.5 and no2.
#make a bronx specific table for air pollution
uhf42_bronx <- uhf42|>
filter(BOROUGH == "Bronx")
#split into pm2.5 and no2
uhf42_bronx_pm25 <- uhf42|>
filter(name == "Fine particles (PM 2.5)")|>
rename(`mean PM 2.5 mcg/m3` = data_value)|>
select(BOROUGH, geo_place_name, time_period,`mean PM 2.5 mcg/m3`, geometry)|>
filter(BOROUGH == "Bronx")
uhf42_bronx_no2 <- uhf42|>
filter(name == "Nitrogen dioxide (NO2)")|>
rename(`mean no2 ppb` = data_value)|>
select(BOROUGH, geo_place_name, time_period,`mean no2 ppb`, geometry)|>
filter(BOROUGH == "Bronx")
#drop geometry of pm2.5 data to join no2 and pm25 together in one table
uhf42_bronx_pm25_stats <- st_drop_geometry(uhf42_bronx_pm25)|>
select(geo_place_name, `mean PM 2.5 mcg/m3`)
#new table with no2 and pm25 data for bronx side by side
uhf42_bronx_no2_pm25 <- uhf42_bronx_no2|>
left_join(uhf42_bronx_pm25_stats, by = "geo_place_name")
I make two separate maps showing pm2.5 and no2 pollution respectively across the Bronx.
#map of pm2.25 pollution in bronx
ggplot(data = uhf42_bronx_pm25, mapping = aes(fill = `mean PM 2.5 mcg/m3`)) +
geom_sf(lwd=0.1) +
theme_void()+
scale_fill_distiller(breaks=c(0,.2,.4,.6,.8,1),
palette = "OrRd",
direction = 1,
name="mean pm2.5 particulate matter") +
labs(
title = "Bronx, PM 2.5 Pollution per UHF42",
caption = "NYC Open Data 2018-19")
#map of no2 pollution in bronx
ggplot(data = uhf42_bronx_no2, mapping = aes(fill = `mean no2 ppb`)) +
geom_sf(lwd=0.1) +
theme_void()+
scale_fill_distiller(breaks=c(0,.2,.4,.6,.8,1),
palette = "OrRd",
direction = 1,
name="mean no2 parts per billion") +
labs(
title = "Bronx, Nitrogen Dioxide Pollution per UHF42",
caption = "NYC Open Data 2018-19")
Read in Asthma data to observe the relationship between pollution and asthma rates. Please note that asthma data is from 2019, while pollution data is from winter 2018-2019.
read_csv("/Users/izzygroenewegen/Documents/F23/Methods 1/methods1/part2/data/raw/NYC_EH_Data Portal_Asthma_ed_UHF42.csv")
## # A tibble: 42 × 8
## Time GeoTypeDesc GeoID GeoRank Geography Age-adjusted rate pe…¹
## <dbl> <chr> <dbl> <dbl> <chr> <dbl>
## 1 2020 UHF 42 101 4 Kingsbridge - Riverda… 25.4
## 2 2020 UHF 42 102 4 Northeast Bronx 67
## 3 2020 UHF 42 103 4 Fordham - Bronx Pk 75.6
## 4 2020 UHF 42 104 4 Pelham - Throgs Neck 65.6
## 5 2020 UHF 42 105 4 Crotona -Tremont 129.
## 6 2020 UHF 42 106 4 High Bridge - Morrisa… 150.
## 7 2020 UHF 42 107 4 Hunts Point - Mott Ha… 150.
## 8 2020 UHF 42 201 4 Greenpoint 25
## 9 2020 UHF 42 202 4 Downtown - Heights - … 35.7
## 10 2020 UHF 42 203 4 Bedford Stuyvesant - … 90.5
## # ℹ 32 more rows
## # ℹ abbreviated name: ¹​`Age-adjusted rate per 10,000`
## # ℹ 2 more variables: `Estimated annual rate per 10,000` <dbl>, Number <dbl>
asthma_ed_raw <- read_csv("/Users/izzygroenewegen/Documents/F23/Methods 1/methods1/part2/data/raw/NYC_EH_Data Portal_Asthma_ed_UHF42.csv")
ensure all uhf42 neighborhoods spelt the same before making the join, then join asthma data to pollution data.
asthma_ed_uhf42 <- asthma_ed_raw|>
rename(geo_place_name = Geography)|>
mutate(across('geo_place_name', str_replace, 'Fordham - Bronx Pk', 'Fordham - Bronx Park'),
across('geo_place_name', str_replace, 'Crotona -Tremont', 'Crotona - Tremont'),
across('geo_place_name', str_replace, 'Washington Heights', 'Washington Heights - Inwood'),
across('geo_place_name', str_replace, 'Rockaways', 'Rockaway'),
across('geo_place_name', str_replace, 'Downtown - Heights - Slope', 'Downtown - Heights - Slope'),
across('geo_place_name', str_replace, 'Greenwich Village - SoHo', 'Greenwich Village - Soho'),
across('geo_place_name', str_replace, 'Gramercy Park - Murray Hill', 'Gramercy Park - Murray Hill'))
#join asthma data to pollution data
uhf42_asthma_pollution <- uhf42|>
left_join(asthma_ed_uhf42, by = "geo_place_name")
Create a map to show asthma rates across nyc
ggplot(data = uhf42_asthma_pollution, mapping = aes(fill = `Estimated annual rate per 10,000`)) +
geom_sf(lwd=0.1) +
theme_void()+
scale_fill_distiller(breaks=c(0,.2,.4,.6,.8,1),
palette = "YlOrRd",
direction = 1,
name="Asthma Emergency Visits, Estimated annual rate per 10,000") +
labs(
title = "Emergency Visits for Asthma per 10,000 Adults",
caption = "Source: NYC EH Data Portal Asthma Emergency Department 2019")
Create a dataframe combining data on asthma and pollution in the South Bronx. Use this data to make a map that shows asthma rates and includes information on pollution for each uhf42.
uhf42_asthma_pollution_bronx <- uhf42_bronx_no2_pm25|>
left_join(asthma_ed_uhf42, by = "geo_place_name")
#create a map showing asthma rates in the bronx
ggplot(data = uhf42_asthma_pollution_bronx, mapping = aes(fill = `Estimated annual rate per 10,000`)) +
geom_sf(lwd=0.1) +
theme_void()+
scale_fill_distiller(breaks=c(0,.2,.4,.6,.8,1),
palette = "YlOrRd",
direction = 1,
name="Asthma Emergency Visits, Estimated annual rate per 10,000") +
labs(
title = "Bronx, Emergency Visits for Asthma & Air Pollution Concentration",
caption = "Sources: NYC EH Data Portal Asthma Emergency Department 2019 &
NYC Open Data Air Pollution 2018-19")
#add tool tip
ufh42_asthma_pollution_bronx_mapped <- ggplot()+
geom_sf(data = uhf42_asthma_pollution_bronx,
mapping = aes(fill = `Estimated annual rate per 10,000`,
text = paste0(geo_place_name, ":",
"<br>Annual ED visit rate per 10k:",
(`Estimated annual rate per 10,000`),
"<br>N02 Levels:",
(`mean no2 ppb`),
"<br>PM2.5 Levels:",
(`mean PM 2.5 mcg/m3`))),
color = "transparent") +
theme_void()+
scale_fill_distiller(breaks=c(0,.2,.4,.6,.8,1),
palette = "YlOrRd",
direction = 1,
name="Asthma Emergency Visits, Estimated annual rate per 10,000") +
labs(
title = "Bronx, Emergency Visits for Asthma per 10,000 Adults",
caption = "Sources: NYC EH Data Portal Asthma Emergency Department 2019 &
NYC Open Data Air Pollution 2018-19")
ggplotly(ufh42_asthma_pollution_bronx_mapped, tooltip = "text")
Using the above tool tip, one can observe the positive correlation between pollution concentration and asthma rates. I attempt to show this correlation on a scatter plot below.
bronx_asthma_pollution_scatter <- ggplot(data = uhf42_asthma_pollution_bronx, aes(x = `pollution rate`, y = `Estimated annual rate per 10,000`))+
geom_point(aes(x = `mean PM 2.5 mcg/m3`, y = `Estimated annual rate per 10,000`), colour=alpha('red', 0.75))+
geom_point(aes(x = `mean no2 ppb`, y = `Estimated annual rate per 10,000`), colour=alpha('blue', 0.75))+
labs(x = "Red = PM 2.5 mcg/m3 / Blue = N02 ppb", y = "ED visits per 10k",
title = "Asthma + Pollution Bronx",
caption = "Sources: Open Data NY (data.ny.gov)") +
theme_classic()
bronx_asthma_pollution_scatter