Imperial County: PM10 and PM2.5

This document, including figures and code, is hereby released under a Creative Commons 3.0 Attribution license. It is an R Markdown document. Markdown is a simple formatting syntax for authoring web pages. In RStudio, when you click the Knit HTML button a web page will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

csv_file <- system.file("data", "AQDMRS-Imperial-2013.csv", package = "Imperial")
stopifnot(file.exists(csv_file))

The contents below are a quick workup of some data I grabbed through the EPA AirData web interface. I used my own airdata R package to parse the data, which I've stored locally, but you can use good old read.csv or data.table::fread and just munge the timestamps yourself instead of using my airdata::read.DMCSV function.

library(airdata)
library(lubridate)
library(reshape2)

# Generalizable across the AQDMRS interface
param_codes <- c(`42101`="CO_ppm", `42602`="NO2_ppb", `44201`="O3_ppb", `81102`="PM10_ugm3", `88101`="PM25_ugm3", `42401`="SO2_ppb", `14129`="PbLC_ugm3", `12128`="PbSTP_ugm3")

# Specific to Imperial County
site_codes <- c(`0005`="CalexicoHS", `1003`="ElCentro", `0007`="Brawley/Main", `4003`="Westmorland", `4004`="Niland/English")

EPA_data <- mutate(
  airdata::read.DMCSV(csv_file), 
  local_time = lubridate::with_tz(GMT, "PST8PDT"),
  month = lubridate::month(local_time, label=TRUE, abbr=TRUE),
  hour = lubridate::hour(local_time),
  day = lubridate::yday(local_time),
  parameter = revalue(parameter, param_codes),
  site = revalue(site, site_codes)
)

Data

Monthly averages, tabulated by parameter (with the help of dplyr):

non_missing <- function (data) {
  subset(data, !is.na(value))
}
PM_data <- mutate(
  subset(non_missing(EPA_data), parameter %in% c("PM25_ugm3", "PM10_ugm3")),
  parameter = factor(parameter)  # drop unused levels
)
month_x_site <- function (data, ...) {
  dcast(data, site ~ month, fun.aggregate=mean, margins=c("site", "month"), ...)
}
options(digits=2, width=125)
by(PM_data, PM_data$parameter, month_x_site)
## PM_data$parameter: PM10_ugm3
##             site Jan  Feb Mar Apr May Jun Jul Aug Sep Oct Nov (all)
## 1     CalexicoHS  44 32.4  58  61  73  56  49  33  59  59  51    52
## 2   Brawley/Main  58 17.5  26  62 103  41  41  32  71  35  26    47
## 3       ElCentro  21 12.0  30  33  46  43  36  27  61  33  24    34
## 4    Westmorland  44 10.8  29  54  87  46  43  35  82  31  24    45
## 5 Niland/English  35  8.6  25  39  64  41  38  34  72  30  18    37
## 6          (all)  40 16.4  34  49  74  46  41  32  69  38  29    43
## --------------------------------------------------------------------------------------------- 
## PM_data$parameter: PM25_ugm3
##           site Jan Feb  Mar  Apr  May  Jun  Jul Aug  Sep  Oct  Nov  Dec (all)
## 1   CalexicoHS 9.7 8.7 12.7 16.3 15.2 17.7 13.9 9.6 12.3 12.8 12.9 17.9  13.4
## 2 Brawley/Main 8.4 4.2  6.5  6.4  9.4  9.4  8.5 7.3  6.6  5.7  6.5  7.5   7.2
## 3     ElCentro 7.4 5.0  5.7  5.8  7.2 12.3  8.7 7.0  6.5  5.7  6.3  6.9   7.1
## 4        (all) 8.7 6.6  8.1 11.1 11.7 14.3 10.9 8.2  9.0  8.8  9.2 11.8  10.0

… and by site:

month_x_parameter <- function (data, ...) {
  dcast(data, parameter ~ month, fun.aggregate=mean, margins=c("month"), ...)
}
by(PM_data, PM_data$site, month_x_parameter)
## PM_data$site: CalexicoHS
##   parameter  Jan  Feb Mar Apr May Jun Jul  Aug Sep Oct Nov Dec (all)
## 1 PM10_ugm3 44.2 32.4  58  61  73  56  49 33.0  59  59  51 NaN    52
## 2 PM25_ugm3  9.7  8.7  13  16  15  18  14  9.6  12  13  13  18    13
## --------------------------------------------------------------------------------------------- 
## PM_data$site: Brawley/Main
##   parameter  Jan  Feb  Mar  Apr   May  Jun  Jul  Aug  Sep  Oct  Nov Dec (all)
## 1 PM10_ugm3 58.4 17.5 25.8 61.8 102.6 41.2 40.8 31.8 71.0 35.0 26.2 NaN  47.3
## 2 PM25_ugm3  8.4  4.2  6.5  6.4   9.4  9.4  8.5  7.3  6.6  5.7  6.5 7.5   7.2
## --------------------------------------------------------------------------------------------- 
## PM_data$site: ElCentro
##   parameter  Jan Feb  Mar  Apr  May Jun  Jul Aug  Sep  Oct  Nov Dec (all)
## 1 PM10_ugm3 20.8  12 30.0 33.4 45.6  43 36.4  27 61.4 33.3 24.3 NaN  33.8
## 2 PM25_ugm3  7.4   5  5.7  5.8  7.2  12  8.7   7  6.5  5.7  6.3 6.9   7.1
## --------------------------------------------------------------------------------------------- 
## PM_data$site: Westmorland
##   parameter Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov (all)
## 1 PM10_ugm3  44  11  29  54  87  46  43  35  82  31  24    45
## --------------------------------------------------------------------------------------------- 
## PM_data$site: Niland/English
##   parameter Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov (all)
## 1 PM10_ugm3  35 8.6  25  39  64  41  38  34  72  30  18    37

Figures

Here is a timeseries of PM10 (note, Dec 2013 data were not available at the time of this writing):

library(ggplot2)
library(scales)
theme_update(legend.position="bottom", plot.title=element_text(size=rel(1.1), face="bold", vjust=1.75))
PM10_data <- subset(PM_data, parameter=="PM10_ugm3")
fig_title <- ggtitle("Imperial County data from AQDMRS (U.S. EPA)")
scale_PM10 <- function (...) {
  scale_y_continuous(str_c(..., " PM10, µg/m3"))
}
scale_month_continuous <- function (...) {
  scale_x_datetime("", breaks="1 month", labels=date_format("%b\n%Y"), ...)
}
fig_PM10_timeseries <- ggplot(PM10_data, aes(local_time, value)) + fig_title +
  scale_PM10("24 h") + scale_month_continuous(limits=range(PM_data$local_time, na.rm=TRUE)) +
  geom_line(aes(linetype=site), alpha=0.8)
show(fig_PM10_timeseries)

plot of chunk fig_PM10_timeseries

… and the same figure showing exceedances in relation to the 24 h PM10 standard (150 µg/m3, not to be exceeded more than once per year on average over three years):

PM10_standard_24h <- 150
PM10_exceedances <- subset(PM10_data, value >= PM10_standard_24h) 
fig_PM10_exceedances <- fig_PM10_timeseries + 
  geom_hline(yintercept=PM10_standard_24h, color="red") + 
  geom_text(aes(label=site), size=4, data=PM10_exceedances, hjust=-0.1) + 
  geom_point(data=PM10_exceedances)
show(fig_PM10_exceedances)

plot of chunk fig_PM10_exceedances

… as well a barchart for comparing monthly averages of PM10 and PM2.5 (error bars represent ± 1 SE):

library(dplyr)
common_sites <- c("CalexicoHS", "Brawley/Main", "ElCentro")  # sites reporting both PM10 and PM25
scale_month_ordered <- scale_x_discrete("")
monthly_data <- non_missing(PM_data) %.% group_by(site, parameter, month)
stderr <- function (x) sd(x) / sqrt(length(x))
monthly_stats <- monthly_data %.% summarise(mean=mean(value), SE=stderr(value), n=length(value))
fig_PM_chart <- ggplot(subset(monthly_stats, site %in% common_sites), aes(month, mean, group=site)) + 
  fig_title + scale_month_ordered + scale_y_continuous("Monthly averages, µg/m3") +
  geom_bar(aes(fill=site), position=position_dodge(width=0.8), stat="identity") + 
  geom_errorbar(aes(ymin=mean-SE, ymax=mean+SE), width=0.25, position=position_dodge(width=0.8)) + 
  facet_grid(parameter ~ ., scales="free_y")
show(fig_PM_chart)

plot of chunk fig_PM_chart

Epilogue

If you're interested in reproducing this, best performance may be obtained with the following packages:

sessionInfo()
## R version 3.0.2 (2013-09-25)
## Platform: x86_64-apple-darwin10.8.0 (64-bit)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] graphics  grDevices utils     datasets  stats     methods   base     
## 
## other attached packages:
##  [1] dplyr_0.1.2.0.99 reshape2_1.2.2   lubridate_1.3.0  airdata_0.2      plyr_1.8         httr_0.2         stringr_0.6.2   
##  [8] fasttime_1.0-0   data.table_1.8.9 knitr_1.2        holstius_0.2     scales_0.2.3     ggplot2_0.9.3.1 
## 
## loaded via a namespace (and not attached):
##  [1] assertthat_0.1     colorspace_1.2-2   dichromat_2.0-0    digest_0.6.3       evaluate_0.4.3     formatR_0.7       
##  [7] grid_3.0.2         gtable_0.1.2       labeling_0.1       MASS_7.3-29        munsell_0.4        proto_0.3-10      
## [13] RColorBrewer_1.0-5 Rcpp_0.11.0        RCurl_1.95-4.1     tools_3.0.2