# Load libraries ####
library(readr)
library(openair)
library(ggplot2)
# Quiet down warnings
oldw <- getOption("warn")
options(warn = -1)
# Paths and constants ####
# ODIN data folder
data_folder <- "~/data/ODIN_data/Tasmania/"
# Load data ####
odin_data <- read_delim(paste0(data_folder,"colocation_post.txt"),
"\t",
escape_double = FALSE,
col_types = cols(DateTime = col_datetime(format = "%Y-%m-%d %H:%M"),
OutRH = col_double(), PM10_EPA = col_double(),
PM25_EPA = col_double(), PM25_H1in = col_double(),
Rain = col_double(), RainRate = col_double(),
WD = col_double()), trim_ws = TRUE)
odin_data$date <- odin_data$DateTime
odin_data$DateTime <- NULL
# Move the data to 1 hour averages for analysis
odin_data.1hr <- timeAverage(odin_data,avg.time = '1 hour')
# Extract only EPA PM2.5 data
epa_data <- odin_data.1hr[,c(1:10)]
# Extract only ODIN data
odin_only_data <- odin_data.1hr[,c(1,(11:17))]
# A cross correlation analysis points to ODIN data being 1 hour behind of EPA data
ccf(odin_data.1hr$PM25_EPA,odin_data.1hr$PM25_H1in,na.action = na.omit)

# Correct the ODIN data
odin_only_data$date <- odin_only_data$date + 3600
odin_data.1hr.fixed <- merge(epa_data,odin_only_data,by = 'date',all = TRUE)
# Check the cross correlation analysis
ccf(odin_data.1hr.fixed$PM25_EPA,odin_data.1hr.fixed$PM25_H1in,na.action = na.omit)

# Plot summaries
timePlot(odin_data.1hr.fixed,
pollutant = names(odin_data.1hr.fixed)[10:17],
avg.time = '1 hour',
group = TRUE)

serials <- names(odin_data.1hr.fixed)[11:17]
correction.coeffs <- data.frame(serialn=names(odin_data.1hr.fixed)[11:17])
correction.coeffs$PM25_EPA.slope <- NA
correction.coeffs$PM25_EPA.intrcpt <- NA
correction.coeffs$PM25_EPA.rsqrd <- NA
nfiles <- length(serials)
# Compare ODIN data against EPA data
for (i in (1:nfiles)){
c_idx <- which(correction.coeffs$serialn==serials[i])
od_idx <- which(names(odin_data.1hr.fixed)==serials[i])
# PM2.5
pm <- 'PM25_EPA'
# Correction factors
summary(eval(parse(text=paste0("correction_fit <- lm(data=odin_data.1hr.fixed, ",pm," ~`",serials[i],"`)"))))
sum_fit <- summary(correction_fit)
slope <- sprintf("%0.2f",correction_fit$coefficients[2])
intrcpt <- sprintf("%0.2f",correction_fit$coefficients[1])
rsq <- sprintf("%0.2f",sum_fit$r.squared)
eval(parse(text = paste0("correction.coeffs$",pm,".slope[c_idx] <- correction_fit$coefficients[2]")))
eval(parse(text = paste0("correction.coeffs$",pm,".intrcpt[c_idx] <- correction_fit$coefficients[1]")))
eval(parse(text = paste0("correction.coeffs$",pm,".rsqrd[c_idx] <- sum_fit$r.squared")))
eval(parse(text=paste0("odin_data.1hr.fixed$",serials[i],".fitted <- predict(correction_fit,odin_data.1hr.fixed)")))
timePlot(odin_data.1hr.fixed,
pollutant = c(pm,serials[i],paste0(serials[i],".fitted")),
group = TRUE,
avg.time = '1 hour',
main = paste0(serials[i],
' (R2 = ',rsq,')\n',
slope,' * ODIN + ',
intrcpt))
}







knitr::kable(correction.coeffs[,c('serialn',
'PM25_EPA.rsqrd',
'PM25_EPA.slope',
'PM25_EPA.intrcpt')], digits = 2)
| PM25_H1in |
0.87 |
0.65 |
-1.63 |
| PM25_H1out |
0.82 |
0.81 |
-0.61 |
| PM25_H2out |
0.85 |
0.56 |
-0.51 |
| PM25_H3in |
0.87 |
0.45 |
0.45 |
| PM25_H3out |
0.81 |
0.86 |
0.38 |
| PM25_H4in |
0.76 |
1.04 |
0.30 |
| PM25_H4out |
0.82 |
0.94 |
0.00 |
readr::write_csv(correction.coeffs[,c('serialn',
'PM25_EPA.rsqrd',
'PM25_EPA.slope',
'PM25_EPA.intrcpt')],path = paste0(data_folder,'odin_corrections_pm2.5.csv'))
timePlot(odin_data.1hr.fixed,
pollutant = names(odin_data.1hr.fixed)[c(18:24)],
avg.time = '1 hour',
group = TRUE)

timePlot(odin_data.1hr.fixed,
pollutant = names(odin_data.1hr.fixed)[c(10,18:24)],
avg.time = '1 hour',
group = TRUE)

#re-enable warnings to continue the session
options(warn = oldw)