Baltimore NMMAPS

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
balt621 <- read_csv("/Users/superclumsy/Library/CloudStorage/GoogleDrive-nguyenkhoiquan@gmail.com/My Drive/01 JHU MPH/140.621 Statistical Methods I/Problem Set/04/balt621.csv")
## Rows: 6940 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): date, season
## dbl (3): death, pm10, tempf
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Stratify mortality data into 4 groups

balt621 %>%
  group_by(season) %>%
  summarize(n_pm10 = sum(!is.na(pm10)), mean_pm10 = mean(pm10, na.rm=TRUE), 
            n_mortality = sum(!is.na(death)), mean_mortality = mean(death))
## # A tibble: 4 × 5
##   season n_pm10 mean_pm10 n_mortality mean_mortality
##   <chr>   <int>     <dbl>       <int>          <dbl>
## 1 Autumn    559    -2.97         1748           19.2
## 2 Spring    563     0.740        1729           18.2
## 3 Summer    571     4.58         1748           17.7
## 4 Winter    551    -2.93         1715           20.8

Stratify each season into 5 strata

Spring

pm10spring <- filter(balt621, season=="Spring")
quintiles = quantile(pm10spring$pm10, c(0,.2,.4,.6,.8,1), na.rm=TRUE)
pm10spring$pm_group = cut(pm10spring$pm10, breaks=quintiles, labels=1:5)
table(pm10spring$pm_group)
## 
##   1   2   3   4   5 
## 112 112 113 112 113

Summer

pm10summer <- filter(balt621, season=="Summer")
quintiles = quantile(pm10summer$pm10, c(0,.2,.4,.6,.8,1), na.rm=TRUE)
pm10summer$pm_group = cut(pm10summer$pm10, breaks=quintiles, labels=1:5)
table(pm10summer$pm_group)
## 
##   1   2   3   4   5 
## 114 114 114 114 114

Autumn

pm10autumn <- filter(balt621, season=="Autumn")
quintiles = quantile(pm10autumn$pm10, c(0,.2,.4,.6,.8,1), na.rm=TRUE)
pm10autumn$pm_group = cut(pm10autumn$pm10, breaks=quintiles, labels=1:5)
table(pm10autumn$pm_group)
## 
##   1   2   3   4   5 
## 111 112 111 112 112

Winter

pm10winter <- filter(balt621, season=="Winter")
quintiles = quantile(pm10winter$pm10, c(0,.2,.4,.6,.8,1), na.rm=TRUE)
pm10winter$pm_group = cut(pm10winter$pm10, breaks=quintiles, labels=1:5)
table(pm10winter$pm_group)
## 
##   1   2   3   4   5 
## 110 110 110 110 110

Calculating mean mortality

Spring

pm10spring %>% filter(pm_group==1) %>% 
  summarize(mean=mean(death), sd=sd(death), n=n())
## # A tibble: 1 × 3
##    mean    sd     n
##   <dbl> <dbl> <int>
## 1  18.6  4.19   112
pm10spring %>% filter(pm_group==5) %>% 
  summarize(mean=mean(death), sd=sd(death), n=n())
## # A tibble: 1 × 3
##    mean    sd     n
##   <dbl> <dbl> <int>
## 1  19.0  5.01   113

Summer

pm10summer %>% filter(pm_group==1) %>% 
  summarize(mean=mean(death), sd=sd(death), n=n())
## # A tibble: 1 × 3
##    mean    sd     n
##   <dbl> <dbl> <int>
## 1  17.6  4.15   114
pm10summer %>% filter(pm_group==5) %>% 
  summarize(mean=mean(death), sd=sd(death), n=n())
## # A tibble: 1 × 3
##    mean    sd     n
##   <dbl> <dbl> <int>
## 1  19.9  5.68   114

Autumn

pm10autumn %>% filter(pm_group==1) %>% 
  summarize(mean=mean(death), sd=sd(death), n=n())
## # A tibble: 1 × 3
##    mean    sd     n
##   <dbl> <dbl> <int>
## 1  19.8  5.38   111
pm10autumn %>% filter(pm_group==5) %>% 
  summarize(mean=mean(death), sd=sd(death), n=n())
## # A tibble: 1 × 3
##    mean    sd     n
##   <dbl> <dbl> <int>
## 1  20.5  5.43   112

Winter

pm10winter %>% filter(pm_group==1) %>% 
  summarize(mean=mean(death), sd=sd(death), n=n())
## # A tibble: 1 × 3
##    mean    sd     n
##   <dbl> <dbl> <int>
## 1  22.2  5.30   110
pm10winter %>% filter(pm_group==5) %>% 
  summarize(mean=mean(death), sd=sd(death), n=n())
## # A tibble: 1 × 3
##    mean    sd     n
##   <dbl> <dbl> <int>
## 1  21.4  5.35   110

Run t-test for mortality of lowest and highest pollution days

Spring

pm10spring.15 = pm10spring %>% filter(pm_group==1 | pm_group==5)
t.test(death ~ pm_group, data=pm10spring.15, var.equal=FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  death by pm_group
## t = -0.71099, df = 216.92, p-value = 0.4779
## alternative hypothesis: true difference in means between group 1 and group 5 is not equal to 0
## 95 percent confidence interval:
##  -1.6506036  0.7754455
## sample estimates:
## mean in group 1 mean in group 5 
##        18.55357        18.99115

Summer

pm10summer.15 = pm10summer %>% filter(pm_group==1 | pm_group==5)
t.test(death ~ pm_group, data=pm10summer.15, var.equal=FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  death by pm_group
## t = -3.4876, df = 207.04, p-value = 0.0005951
## alternative hypothesis: true difference in means between group 1 and group 5 is not equal to 0
## 95 percent confidence interval:
##  -3.5974049 -0.9990863
## sample estimates:
## mean in group 1 mean in group 5 
##        17.63158        19.92982

Autumn

pm10autumn.15 = pm10autumn %>% filter(pm_group==1 | pm_group==5)
t.test(death ~ pm_group, data=pm10autumn.15, var.equal=FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  death by pm_group
## t = -0.9771, df = 221, p-value = 0.3296
## alternative hypothesis: true difference in means between group 1 and group 5 is not equal to 0
## 95 percent confidence interval:
##  -2.1328719  0.7189401
## sample estimates:
## mean in group 1 mean in group 5 
##        19.81982        20.52679

Winter

pm10winter.15 = pm10winter %>% filter(pm_group==1 | pm_group==5)
t.test(death ~ pm_group, data=pm10winter.15, var.equal=FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  death by pm_group
## t = 1.2272, df = 217.98, p-value = 0.2211
## alternative hypothesis: true difference in means between group 1 and group 5 is not equal to 0
## 95 percent confidence interval:
##  -0.5344249  2.2980613
## sample estimates:
## mean in group 1 mean in group 5 
##        22.23636        21.35455