Baltimore NMMAPS
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
balt621 <- read_csv("/Users/superclumsy/Library/CloudStorage/GoogleDrive-nguyenkhoiquan@gmail.com/My Drive/01 JHU MPH/140.621 Statistical Methods I/Problem Set/04/balt621.csv")
## Rows: 6940 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): date, season
## dbl (3): death, pm10, tempf
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Stratify mortality data into 4 groups
balt621 %>%
group_by(season) %>%
summarize(n_pm10 = sum(!is.na(pm10)), mean_pm10 = mean(pm10, na.rm=TRUE),
n_mortality = sum(!is.na(death)), mean_mortality = mean(death))
## # A tibble: 4 × 5
## season n_pm10 mean_pm10 n_mortality mean_mortality
## <chr> <int> <dbl> <int> <dbl>
## 1 Autumn 559 -2.97 1748 19.2
## 2 Spring 563 0.740 1729 18.2
## 3 Summer 571 4.58 1748 17.7
## 4 Winter 551 -2.93 1715 20.8
Stratify each season into 5 strata
Spring
pm10spring <- filter(balt621, season=="Spring")
quintiles = quantile(pm10spring$pm10, c(0,.2,.4,.6,.8,1), na.rm=TRUE)
pm10spring$pm_group = cut(pm10spring$pm10, breaks=quintiles, labels=1:5)
table(pm10spring$pm_group)
##
## 1 2 3 4 5
## 112 112 113 112 113
Summer
pm10summer <- filter(balt621, season=="Summer")
quintiles = quantile(pm10summer$pm10, c(0,.2,.4,.6,.8,1), na.rm=TRUE)
pm10summer$pm_group = cut(pm10summer$pm10, breaks=quintiles, labels=1:5)
table(pm10summer$pm_group)
##
## 1 2 3 4 5
## 114 114 114 114 114
Autumn
pm10autumn <- filter(balt621, season=="Autumn")
quintiles = quantile(pm10autumn$pm10, c(0,.2,.4,.6,.8,1), na.rm=TRUE)
pm10autumn$pm_group = cut(pm10autumn$pm10, breaks=quintiles, labels=1:5)
table(pm10autumn$pm_group)
##
## 1 2 3 4 5
## 111 112 111 112 112
Winter
pm10winter <- filter(balt621, season=="Winter")
quintiles = quantile(pm10winter$pm10, c(0,.2,.4,.6,.8,1), na.rm=TRUE)
pm10winter$pm_group = cut(pm10winter$pm10, breaks=quintiles, labels=1:5)
table(pm10winter$pm_group)
##
## 1 2 3 4 5
## 110 110 110 110 110
Calculating mean mortality
Spring
pm10spring %>% filter(pm_group==1) %>%
summarize(mean=mean(death), sd=sd(death), n=n())
## # A tibble: 1 × 3
## mean sd n
## <dbl> <dbl> <int>
## 1 18.6 4.19 112
pm10spring %>% filter(pm_group==5) %>%
summarize(mean=mean(death), sd=sd(death), n=n())
## # A tibble: 1 × 3
## mean sd n
## <dbl> <dbl> <int>
## 1 19.0 5.01 113
Summer
pm10summer %>% filter(pm_group==1) %>%
summarize(mean=mean(death), sd=sd(death), n=n())
## # A tibble: 1 × 3
## mean sd n
## <dbl> <dbl> <int>
## 1 17.6 4.15 114
pm10summer %>% filter(pm_group==5) %>%
summarize(mean=mean(death), sd=sd(death), n=n())
## # A tibble: 1 × 3
## mean sd n
## <dbl> <dbl> <int>
## 1 19.9 5.68 114
Autumn
pm10autumn %>% filter(pm_group==1) %>%
summarize(mean=mean(death), sd=sd(death), n=n())
## # A tibble: 1 × 3
## mean sd n
## <dbl> <dbl> <int>
## 1 19.8 5.38 111
pm10autumn %>% filter(pm_group==5) %>%
summarize(mean=mean(death), sd=sd(death), n=n())
## # A tibble: 1 × 3
## mean sd n
## <dbl> <dbl> <int>
## 1 20.5 5.43 112
Winter
pm10winter %>% filter(pm_group==1) %>%
summarize(mean=mean(death), sd=sd(death), n=n())
## # A tibble: 1 × 3
## mean sd n
## <dbl> <dbl> <int>
## 1 22.2 5.30 110
pm10winter %>% filter(pm_group==5) %>%
summarize(mean=mean(death), sd=sd(death), n=n())
## # A tibble: 1 × 3
## mean sd n
## <dbl> <dbl> <int>
## 1 21.4 5.35 110
Run t-test for mortality of lowest and highest pollution days
Spring
pm10spring.15 = pm10spring %>% filter(pm_group==1 | pm_group==5)
t.test(death ~ pm_group, data=pm10spring.15, var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: death by pm_group
## t = -0.71099, df = 216.92, p-value = 0.4779
## alternative hypothesis: true difference in means between group 1 and group 5 is not equal to 0
## 95 percent confidence interval:
## -1.6506036 0.7754455
## sample estimates:
## mean in group 1 mean in group 5
## 18.55357 18.99115
Summer
pm10summer.15 = pm10summer %>% filter(pm_group==1 | pm_group==5)
t.test(death ~ pm_group, data=pm10summer.15, var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: death by pm_group
## t = -3.4876, df = 207.04, p-value = 0.0005951
## alternative hypothesis: true difference in means between group 1 and group 5 is not equal to 0
## 95 percent confidence interval:
## -3.5974049 -0.9990863
## sample estimates:
## mean in group 1 mean in group 5
## 17.63158 19.92982
Autumn
pm10autumn.15 = pm10autumn %>% filter(pm_group==1 | pm_group==5)
t.test(death ~ pm_group, data=pm10autumn.15, var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: death by pm_group
## t = -0.9771, df = 221, p-value = 0.3296
## alternative hypothesis: true difference in means between group 1 and group 5 is not equal to 0
## 95 percent confidence interval:
## -2.1328719 0.7189401
## sample estimates:
## mean in group 1 mean in group 5
## 19.81982 20.52679
Winter
pm10winter.15 = pm10winter %>% filter(pm_group==1 | pm_group==5)
t.test(death ~ pm_group, data=pm10winter.15, var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: death by pm_group
## t = 1.2272, df = 217.98, p-value = 0.2211
## alternative hypothesis: true difference in means between group 1 and group 5 is not equal to 0
## 95 percent confidence interval:
## -0.5344249 2.2980613
## sample estimates:
## mean in group 1 mean in group 5
## 22.23636 21.35455