This document, including figures and code, is hereby released under a Creative Commons 3.0 Attribution license. It is an R Markdown document. Markdown is a simple formatting syntax for authoring web pages. In RStudio, when you click the Knit HTML button a web page will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
csv_file <- system.file("data", "AQDMRS-Imperial-2013.csv", package = "Imperial")
stopifnot(file.exists(csv_file))
The contents below are a quick workup of some data I grabbed through the EPA AirData web interface. I used my own airdata R package to parse the data, which I've stored locally, but you can use good old read.csv or data.table::fread and just munge the timestamps yourself instead of using my airdata::read.DMCSV function.
library(airdata)
library(lubridate)
library(reshape2)
# Generalizable across the AQDMRS interface
param_codes <- c(`42101`="CO_ppm", `42602`="NO2_ppb", `44201`="O3_ppb", `81102`="PM10_ugm3", `88101`="PM25_ugm3", `42401`="SO2_ppb", `14129`="PbLC_ugm3", `12128`="PbSTP_ugm3")
# Specific to Imperial County
site_codes <- c(`0005`="CalexicoHS", `1003`="ElCentro", `0007`="Brawley/Main", `4003`="Westmorland", `4004`="Niland/English")
EPA_data <- mutate(
airdata::read.DMCSV(csv_file),
local_time = lubridate::with_tz(GMT, "PST8PDT"),
month = lubridate::month(local_time, label=TRUE, abbr=TRUE),
hour = lubridate::hour(local_time),
day = lubridate::yday(local_time),
parameter = revalue(parameter, param_codes),
site = revalue(site, site_codes)
)
Monthly averages, tabulated by parameter (with the help of dplyr):
non_missing <- function (data) {
subset(data, !is.na(value))
}
PM_data <- mutate(
subset(non_missing(EPA_data), parameter %in% c("PM25_ugm3", "PM10_ugm3")),
parameter = factor(parameter) # drop unused levels
)
month_x_site <- function (data, ...) {
dcast(data, site ~ month, fun.aggregate=mean, margins=c("site", "month"), ...)
}
options(digits=2, width=125)
by(PM_data, PM_data$parameter, month_x_site)
## PM_data$parameter: PM10_ugm3
## site Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov (all)
## 1 CalexicoHS 44 32.4 58 61 73 56 49 33 59 59 51 52
## 2 Brawley/Main 58 17.5 26 62 103 41 41 32 71 35 26 47
## 3 ElCentro 21 12.0 30 33 46 43 36 27 61 33 24 34
## 4 Westmorland 44 10.8 29 54 87 46 43 35 82 31 24 45
## 5 Niland/English 35 8.6 25 39 64 41 38 34 72 30 18 37
## 6 (all) 40 16.4 34 49 74 46 41 32 69 38 29 43
## ---------------------------------------------------------------------------------------------
## PM_data$parameter: PM25_ugm3
## site Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec (all)
## 1 CalexicoHS 9.7 8.7 12.7 16.3 15.2 17.7 13.9 9.6 12.3 12.8 12.9 17.9 13.4
## 2 Brawley/Main 8.4 4.2 6.5 6.4 9.4 9.4 8.5 7.3 6.6 5.7 6.5 7.5 7.2
## 3 ElCentro 7.4 5.0 5.7 5.8 7.2 12.3 8.7 7.0 6.5 5.7 6.3 6.9 7.1
## 4 (all) 8.7 6.6 8.1 11.1 11.7 14.3 10.9 8.2 9.0 8.8 9.2 11.8 10.0
… and by site:
month_x_parameter <- function (data, ...) {
dcast(data, parameter ~ month, fun.aggregate=mean, margins=c("month"), ...)
}
by(PM_data, PM_data$site, month_x_parameter)
## PM_data$site: CalexicoHS
## parameter Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec (all)
## 1 PM10_ugm3 44.2 32.4 58 61 73 56 49 33.0 59 59 51 NaN 52
## 2 PM25_ugm3 9.7 8.7 13 16 15 18 14 9.6 12 13 13 18 13
## ---------------------------------------------------------------------------------------------
## PM_data$site: Brawley/Main
## parameter Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec (all)
## 1 PM10_ugm3 58.4 17.5 25.8 61.8 102.6 41.2 40.8 31.8 71.0 35.0 26.2 NaN 47.3
## 2 PM25_ugm3 8.4 4.2 6.5 6.4 9.4 9.4 8.5 7.3 6.6 5.7 6.5 7.5 7.2
## ---------------------------------------------------------------------------------------------
## PM_data$site: ElCentro
## parameter Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec (all)
## 1 PM10_ugm3 20.8 12 30.0 33.4 45.6 43 36.4 27 61.4 33.3 24.3 NaN 33.8
## 2 PM25_ugm3 7.4 5 5.7 5.8 7.2 12 8.7 7 6.5 5.7 6.3 6.9 7.1
## ---------------------------------------------------------------------------------------------
## PM_data$site: Westmorland
## parameter Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov (all)
## 1 PM10_ugm3 44 11 29 54 87 46 43 35 82 31 24 45
## ---------------------------------------------------------------------------------------------
## PM_data$site: Niland/English
## parameter Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov (all)
## 1 PM10_ugm3 35 8.6 25 39 64 41 38 34 72 30 18 37
Here is a timeseries of PM10 (note, Dec 2013 data were not available at the time of this writing):
library(ggplot2)
library(scales)
theme_update(legend.position="bottom", plot.title=element_text(size=rel(1.1), face="bold", vjust=1.75))
PM10_data <- subset(PM_data, parameter=="PM10_ugm3")
fig_title <- ggtitle("Imperial County data from AQDMRS (U.S. EPA)")
scale_PM10 <- function (...) {
scale_y_continuous(str_c(..., " PM10, µg/m3"))
}
scale_month_continuous <- function (...) {
scale_x_datetime("", breaks="1 month", labels=date_format("%b\n%Y"), ...)
}
fig_PM10_timeseries <- ggplot(PM10_data, aes(local_time, value)) + fig_title +
scale_PM10("24 h") + scale_month_continuous(limits=range(PM_data$local_time, na.rm=TRUE)) +
geom_line(aes(linetype=site), alpha=0.8)
show(fig_PM10_timeseries)
… and the same figure showing exceedances in relation to the 24 h PM10 standard (150 µg/m3, not to be exceeded more than once per year on average over three years):
PM10_standard_24h <- 150
PM10_exceedances <- subset(PM10_data, value >= PM10_standard_24h)
fig_PM10_exceedances <- fig_PM10_timeseries +
geom_hline(yintercept=PM10_standard_24h, color="red") +
geom_text(aes(label=site), size=4, data=PM10_exceedances, hjust=-0.1) +
geom_point(data=PM10_exceedances)
show(fig_PM10_exceedances)
… as well a barchart for comparing monthly averages of PM10 and PM2.5 (error bars represent ± 1 SE):
library(dplyr)
common_sites <- c("CalexicoHS", "Brawley/Main", "ElCentro") # sites reporting both PM10 and PM25
scale_month_ordered <- scale_x_discrete("")
monthly_data <- non_missing(PM_data) %.% group_by(site, parameter, month)
stderr <- function (x) sd(x) / sqrt(length(x))
monthly_stats <- monthly_data %.% summarise(mean=mean(value), SE=stderr(value), n=length(value))
fig_PM_chart <- ggplot(subset(monthly_stats, site %in% common_sites), aes(month, mean, group=site)) +
fig_title + scale_month_ordered + scale_y_continuous("Monthly averages, µg/m3") +
geom_bar(aes(fill=site), position=position_dodge(width=0.8), stat="identity") +
geom_errorbar(aes(ymin=mean-SE, ymax=mean+SE), width=0.25, position=position_dodge(width=0.8)) +
facet_grid(parameter ~ ., scales="free_y")
show(fig_PM_chart)
If you're interested in reproducing this, best performance may be obtained with the following packages:
sessionInfo()
## R version 3.0.2 (2013-09-25)
## Platform: x86_64-apple-darwin10.8.0 (64-bit)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] graphics grDevices utils datasets stats methods base
##
## other attached packages:
## [1] dplyr_0.1.2.0.99 reshape2_1.2.2 lubridate_1.3.0 airdata_0.2 plyr_1.8 httr_0.2 stringr_0.6.2
## [8] fasttime_1.0-0 data.table_1.8.9 knitr_1.2 holstius_0.2 scales_0.2.3 ggplot2_0.9.3.1
##
## loaded via a namespace (and not attached):
## [1] assertthat_0.1 colorspace_1.2-2 dichromat_2.0-0 digest_0.6.3 evaluate_0.4.3 formatR_0.7
## [7] grid_3.0.2 gtable_0.1.2 labeling_0.1 MASS_7.3-29 munsell_0.4 proto_0.3-10
## [13] RColorBrewer_1.0-5 Rcpp_0.11.0 RCurl_1.95-4.1 tools_3.0.2