--- title: "AP Summary" author: "Logan Piepmeier" date: "`r Sys.Date()`" output: html_document --- ```{r setup, include = FALSE, cache = FALSE} ### knitr setup only knitr::opts_chunk$set(echo = FALSE) knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file()) ``` ```{r initialize, include = FALSE} library(targets) library(data.table) library(ggplot2) library(skimr) library(dplyr) library(tidyr) if(!file.exists('data/ap_for_summary.rds')){ ap <- rbindlist( # external cohorts list( NHS = unique(setDT(tar_read(analytic_dataset_nhs))[ , .(h_id, end_mo, pm25, no2, o3)]), NHS2 = unique(setDT(tar_read(analytic_dataset_nhs2))[ , .(h_id, end_mo, pm25, no2, o3)]), HPFS = unique(setDT(tar_read(analytic_dataset_hpfs))[ , .(h_id, end_mo, pm25, no2, o3)]), WHI_CT = unique(setDT(tar_read(analytic_dataset_whi_ct))[ , .(h_id, end_mo, pm25, no2, o3)]), WHI_OS = unique(setDT(tar_read(analytic_dataset_whi))[ , .(h_id, end_mo, pm25, no2, o3)]) ), idcol = 'h_cohort', use.names = TRUE, fill = TRUE ) ap <- rbind(ap, tar_read(ap_avgs_t1_harvardtime)) # internal cohorts ap <- rbind(ap, tar_read(ap_avgs_t2_harvardtime)) saveRDS(ap, 'data/ap_for_summary.rds') }else{ap <- readRDS('data/ap_for_summary.rds')} ``` ## Pollutant Summaries ```{r plot_pm, echo = FALSE, warning = FALSE} ggplot(ap) + geom_boxplot(aes(h_cohort, pm25)) + ggtitle('PM2.5 Residential Concentrations') + xlab('Cohort') + ylab('PM2.5 (μg per cubic meter)') + theme_minimal() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) ``` ```{r plot_no2, echo = FALSE, warning = FALSE} ggplot(ap) + geom_boxplot(aes(h_cohort, no2)) + xlab('Cohort') + ylab('NO2 (ppb)') + ggtitle('NO2 Residential Concentrations') + theme_minimal() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) ``` ```{r plot_o3, echo = FALSE, warning = FALSE} ggplot(ap) + geom_boxplot(aes(h_cohort, o3)) + xlab('Cohort') + ylab('O3 (ppb)') + ggtitle('Ozone Residential Concentrations') + theme_minimal() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) ``` ### By Cohort ```{r} ap %>% pivot_longer(-c(h_cohort, h_id, end_mo), names_to = 'poll', values_to = 'conc') %>% group_by(poll, h_cohort) %>% skim_without_charts(conc) ``` ### Overall ```{r} ap %>% pivot_longer(-c(h_cohort, h_id, end_mo), names_to = 'poll', values_to = 'conc') %>% group_by(poll) %>% skim_without_charts(conc) ``` ## Correlations ### By Cohort ```{r} ap %>% group_by(h_cohort) %>% summarize( o3_no2 = cor(o3, no2, use = 'pairwise')**2, no2_o3 = cor(no2, o3, use = 'pairwise')**2, pm25_no2 = cor(pm25, no2, use = 'pairwise')**2, no2_pm25 = cor(no2, pm25, use = 'pairwise')**2, pm25_o3 = cor(pm25, o3, use = 'pairwise')**2, o3_pm25 = cor(o3, pm25, use = 'pairwise')**2 ) ``` ### Overall ```{r} ap %>% summarize( o3_no2 = cor(o3, no2, use = 'pairwise')**2, no2_o3 = cor(no2, o3, use = 'pairwise')**2, pm25_no2 = cor(pm25, no2, use = 'pairwise')**2, no2_pm25 = cor(no2, pm25, use = 'pairwise')**2, pm25_o3 = cor(pm25, o3, use = 'pairwise')**2, o3_pm25 = cor(o3, pm25, use = 'pairwise')**2 ) ```