create a pm2.5 mean average table from the raster data
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(ggplot2)
library(terra)
## terra 1.9.1
##
## Attaching package: 'terra'
##
## The following object is masked from 'package:tidyr':
##
## extract
library(exactextractr)
library(dplyr)
census <- sf::st_read('/Users/areeba/Desktop/tl_2024_us_zcta520/tl_2024_us_zcta520.shp')
## Reading layer `tl_2024_us_zcta520' from data source
## `/Users/areeba/Desktop/tl_2024_us_zcta520/tl_2024_us_zcta520.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 33791 features and 10 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -176.6967 ymin: -14.37378 xmax: 145.8305 ymax: 71.34132
## Geodetic CRS: NAD83
nyc_zip_codes <- c(10001:10282, 10451:10475, 11201:11256, 11004:11005, 11101:11106, 11354:11375, 11377:11386, 11411:11436, 11691:11697, 10301:10314)
nyc_zip <- census %>% filter(as.numeric(ZCTA5CE20) %in% nyc_zip_codes)
nrow(nyc_zip)
## [1] 208
pm2.5 <- rast("/Users/areeba/Desktop/V6GL02.04.CNNPM25.NA.202301-202312.nc")
pm2.5_by_zipcode <- nyc_zip %>% st_transform(crs(pm2.5)) %>% mutate(pm2.5_mean = round(exact_extract(pm2.5, ., "mean"), 2))%>% st_drop_geometry() %>% select(zip5 = ZCTA5CE20, pm2.5_mean)
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
write.csv(pm2.5_by_zipcode, "/Users/areeba/Desktop/pm2.5_avg.csv", row.names = FALSE)
read the ems file and filter the data by 2023 and create a column incident count which groups the ems asthma related calls and counts them by zipcode
ems <- read_csv("/Users/areeba/Desktop/EMS_Incident_Dispatch_Data_20260501.csv")
## Rows: 289900 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (19): INCIDENT_DATETIME, INITIAL_CALL_TYPE, FINAL_CALL_TYPE, FIRST_ASSIG...
## dbl (9): CAD_INCIDENT_ID, INITIAL_SEVERITY_LEVEL_CODE, FINAL_SEVERITY_LEVEL...
## num (3): DISPATCH_RESPONSE_SECONDS_QY, INCIDENT_RESPONSE_SECONDS_QY, INCIDE...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ems_2023 <- ems %>% filter(startsWith(as.character(CAD_INCIDENT_ID), "23")) %>% group_by(ZIPCODE) %>% summarise(incident_count = n(), .groups = "drop")
read the census data by an API call and get certain categories
library(tidycensus)
census_api_key("04f69c8500e02b83fbd20a1d7f3df65e6a8c78e0", overwrite = TRUE)
## To install your API key for use in future sessions, run this function with `install = TRUE`.
census_2023 <- get_acs(geography = "zcta", variables=c(
total_population = "B01003_001",
med_income = "B19013_001",
poverty_rate = "S1701_C03_001",
white_pop = "B02001_002",
black_pop = "B02001_003",
asian_pop = "B02001_005"
), year = 2023, output = "wide")
## Getting data from the 2019-2023 5-year ACS
## Fetching data by table type ("B/C", "S", "DP") and combining the result.
names(census_2023)
## [1] "GEOID" "NAME" "total_populationE"
## [4] "total_populationM" "med_incomeE" "med_incomeM"
## [7] "white_popE" "white_popM" "black_popE"
## [10] "black_popM" "asian_popE" "asian_popM"
## [13] "poverty_rateE" "poverty_rateM"
nyc_zip_codes <- c(10001:10282, 10451:10475, 11201:11256, 11004:11005, 11101:11106, 11354:11375, 11377:11386, 11411:11436, 11691:11697, 10301:10314)
census_2023 <- census_2023 %>% mutate(zipcode = gsub("ZCTA5 ", "", NAME)) %>% filter(zipcode %in% as.character(nyc_zip_codes)) %>% select(-GEOID, -NAME)
join the ems, census and pm2.5 data by zipcode
ems_2023 <- ems_2023 %>% mutate(ZIPCODE = as.character(ZIPCODE))
ems_census <- census_2023 %>% left_join(ems_2023, by = c("zipcode" = "ZIPCODE"))
ems_census_pm25 <- ems_census %>% left_join(pm2.5_by_zipcode, by = c("zipcode" = "zip5"))
ems_census_pm25 <- ems_census_pm25 %>% mutate(med_income_pct = round(percent_rank(med_incomeE)*100, 1))
ggplot for PM2.5 mean for 2023
nyc_map <- nyc_zip %>% mutate(ZCTA5CE20 = as.character(ZCTA5CE20)) %>% left_join(ems_census_pm25, by = c("ZCTA5CE20" = "zipcode"))
ggplot(nyc_map) + geom_sf(aes(fill = pm2.5_mean), color = "white", size = 0.2) + scale_fill_gradient(low = "lightyellow", high = "darkred", name = "PM2.5 (µg/m³)")+ labs(title = "PM2.5 levels by zipcode (2023 mean)") + theme_void()
ggplot for PM2.5 mean for 2023
ggplot(nyc_map) + geom_sf(aes(fill = incident_count), color = "white", size = 0.2) + scale_fill_gradient(low = "lightyellow", high = "darkred", name = "EMS Asthma related incidents")+ labs(title = "EMS Asthma related incidents by zipcode (2023)") + theme_void()
ggplot for median income percentile for 2023
ggplot(nyc_map) + geom_sf(aes(fill = med_income_pct), color = "white", size = 0.2) + scale_fill_gradient(low = "lightyellow", high = "darkred", name = "Median income percentile")+ labs(title = "Median income percentile by zipcode (2023)") + theme_void()
pm2.5 and EMS incidents scatter plot
ggplot(ems_census_pm25, aes(x=pm2.5_mean, y = incident_count)) +
geom_point(alpha = 0.6, color = "skyblue")+
geom_smooth(method = "lm", color = "dodgerblue")+
labs(title = "Average pm2.5 vs Asthma related EMS incidents", x = "Mean PM2.5 (µg/m³)", y = "EMS Asthma related incidents")+ theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 30 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 30 rows containing missing values or values outside the scale range
## (`geom_point()`).
median income and EMS incidents scatter plot
ggplot(ems_census_pm25, aes(x=med_income_pct, y = incident_count)) +
geom_point(alpha = 0.6, color = "skyblue")+
geom_smooth(method = "lm", color = "dodgerblue")+
labs(title = "Median Income percentile vs Asthma related EMS incidents", x = "Median income percentile", y = "EMS Asthma related incidents")+ theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 34 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 34 rows containing missing values or values outside the scale range
## (`geom_point()`).
median income and pm2.5 scatter plot
ggplot(ems_census_pm25, aes(x=med_income_pct, y = pm2.5_mean)) +
geom_point(alpha = 0.6, color = "skyblue")+
geom_smooth(method = "lm", color = "dodgerblue")+
labs(title = "Median Income percentile vs Mean PM2.5", x = "Median income percentile", y = "Mean PM2.5")+ theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_point()`).
create a web interactive map which shows the asthma related ems calls, pm2.5 and median income percentile
library(leaflet)
library(sf)
nyc_map <- nyc_map %>% st_transform(crs = 4326)
pal_pm25 <- colorNumeric(palette = "YlOrRd", domain = nyc_map$pm2.5_mean)
pal_income <- colorNumeric(palette = "YlOrRd", domain = nyc_map$med_income_pct)
pal_incidents <- colorNumeric(palette = "YlOrRd", domain = nyc_map$incident_count)
leaflet(nyc_map) %>% addTiles() %>% addPolygons(group = "PM2.5",
fillColor = ~pal_pm25(pm2.5_mean),
fillOpacity = 0.7,
color = "pink", weight = 1,
popup = ~paste0(
"<b>ZIP Code:</b> ", ZCTA5CE20, "<br>",
"<b>PM2.5 Mean:</b> ", pm2.5_mean, "<br>"
)
)%>%
addLegend(
pal = pal_pm25,
values = ~pm2.5_mean,
title = "PM2.5 Average",
position = "bottomright",
group = "PM2.5"
) %>%
addPolygons(group = "Median Income",fillColor = ~pal_income(med_income_pct),
fillOpacity = 0.7,
color = "pink", weight = 1,
popup = ~paste0(
"<b>ZIP Code:</b> ", ZCTA5CE20, "<br>",
"<b>Median Income Percentile:</b> ", med_income_pct, "<br>"
)
)%>%
addLegend(
pal = pal_income,
values = ~med_income_pct,
title = "Median Income Percentile",
position = "bottomright",
group = "Median Income"
) %>%
addPolygons(group = "EMS Incidents",
fillColor = ~pal_incidents(incident_count),
fillOpacity = 0.7,
color = "pink", weight = 1,
popup = ~paste0(
"<b>ZIP Code:</b> ", ZCTA5CE20, "<br>",
"<b>EMS Asthma Related Incidents:</b> ", incident_count, "<br>"
)
)%>%
addLegend(
pal = pal_incidents,
values = ~incident_count,
title = "EMS Asthma Related Incidents",
position = "bottomright",
group = "EMS Incidents"
) %>%
addLayersControl(overlayGroups = c("PM2.5", "Median Income", "EMS Incidents"),
options = layersControlOptions(collapsed = FALSE)) %>%
hideGroup(c("PM2.5", "Median Income", "EMS Incidents"))
create a regression model to check the relationship between my variables
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
reg_model <- lm(incident_count~med_income_pct + pm2.5_mean, data = ems_census_pm25)
stargazer(reg_model,
type = "text",
title = "Linear Regression: EMS incident by PM2.5 and median income",
covariate.labels = c("Median Income Percentile", "PM2.5 Mean"),
dep.var.labels = "EMS Asthma Related Incidents",
ci = TRUE,
ci.level = 0.95,
single.row = TRUE)
##
## Linear Regression: EMS incident by PM2.5 and median income
## ========================================================
## Dependent variable:
## -------------------------------
## EMS Asthma Related Incidents
## --------------------------------------------------------
## Median Income Percentile -0.818*** (-0.971, -0.665)
## PM2.5 Mean 17.720*** (12.150, 23.289)
## Constant -116.730*** (-179.027, -54.433)
## --------------------------------------------------------
## Observations 174
## R2 0.469
## Adjusted R2 0.463
## Residual Std. Error 29.368 (df = 171)
## F Statistic 75.541*** (df = 2; 171)
## ========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01