Setup
AQC <- function(x) {
return(x >= 50)
}
# std.err <- function(x) sd(x)/sqrt(length(x))
calculate.errors <- function(y, y.hat) {
SE <- (y-y.hat)^2
MSE <- mean(SE)
RMSE <- sqrt(MSE)
RMSE.sd <- sd(SE) * RMSE / MSE
RAE <- abs(y-y.hat)/y
MRAE <- mean(RAE)
MRAE.sd <- sd(RAE)
AQCE <- abs(AQC(y)-AQC(y.hat))
MAQCE <- mean(AQCE)
MAQCE.sd <- sd(AQCE)
c(RMSE, RMSE.sd, MRAE, MRAE.sd, MAQCE, MAQCE.sd)
}
Build Models
# Load the ODIN and ECan measurements (stored in current directory)
data <- readRDS("all_measurements.rds")
data <- subset(data, !is.na(odin.109.pm10))
data <- subset(data, !is.na(pm10))
# Set start and end times for initial and final colocations
init.start.time <- as.POSIXct('07/11/2016 00:00',format = '%m/%d/%Y %H:%M',tz='Etc/GMT-12')
init.end.time <- as.POSIXct('07/26/2016 00:00',format = '%m/%d/%Y %H:%M',tz='Etc/GMT-12')
final.start.time <- as.POSIXct('10/03/2016 00:00',format = '%m/%d/%Y %H:%M',tz='Etc/GMT-12')
final.end.time <- as.POSIXct('10/19/2016 00:00',format = '%m/%d/%Y %H:%M',tz='Etc/GMT-12')
# Get initial and final datasets
data.init <- data[init.start.time < data$date & data$date < init.end.time, ]
data.final <- data[final.start.time < data$date & data$date < final.end.time, ]
# Check the lengths
nrow(data.init)
## [1] 1876
nrow(data.final)
## [1] 2275
# Generate formula
odin.vars <- paste0('odin.109.',c('pm10','temp','rh'))
eval(parse(text=paste0('form <- pm10 ~ ', paste(odin.vars,collapse='+'))))
# Generate linear models
init.lm <- lm(form, data=data.init)
final.lm <- lm(form, data=data.final)
both.lm <- lm(form, data=rbind(data.init,data.final))
# Extract coefficients
init.coefs <- (summary(init.lm)$coefficients)[, "Estimate"]
final.coefs <- (summary(final.lm)$coefficients)[, "Estimate"]
both.coefs <- (summary(both.lm)$coefficients)[, "Estimate"]
Define Time-Dependent Models
# Generate the initial model
init.model <- function(line) {
answer <- init.coefs['(Intercept)']
for (var in odin.vars) {
answer <- answer + init.coefs[var] * line[var]
}
return(answer)
}
# Generate the final model
final.model <- function(line) {
answer <- final.coefs['(Intercept)']
for (var in odin.vars) {
answer <- answer + final.coefs[var] * line[var]
}
return(answer)
}
# Generate a single model using both colocation periods
both.model <- function(line) {
answer <- both.coefs['(Intercept)']
for (var in odin.vars) {
answer <- answer + both.coefs[var] * line[var]
}
return(answer)
}
# Create a time dependent weighted-average model
data.start <- data$date[1]
data.end <- data$date[nrow(data)]
weighted.model <- function(line) {
init.y <- init.model(line)
final.y <- final.model(line)
# The proportion of the way through the duration of the experiment
prop <- as.numeric(line$date - data.start) / as.numeric(data.end - data.start)
init.y * (1-prop) + final.y * prop
}
# Create a time dependent parametric-interpolation model
interpolate.coef <- function(line, var) {
x.0 <- data.start
x.1 <- data.end
x <- line$date
y.0 <- init.coefs[var]
y.1 <- final.coefs[var]
y.0 + as.numeric(x-x.0) * (y.1-y.0) / as.numeric(x.1 - x.0)
}
interpolation.model <- function(line) {
answer <- interpolate.coef(line, '(Intercept)')
for (var in odin.vars) {
answer <- answer + interpolate.coef(line, var) * line[var]
}
return(answer)
}
Test TD-Models
# Function for testing models
test.models <- function() {
models <- list(init.model, final.model, both.model,
weighted.model, interpolation.model)
names(models) <- c("Initial Model", "Final Model", "Both model",
"Weighted Average Model", "Parameter Interpolation Model")
results <- data.frame(model=names(models))
results[, 2:7] <- 0
names(results)[2:7] <- c('RMSE', 'RMSE.sd', 'MRAE', 'MRAE.sd', 'MAQCE', 'MAQCE.sd')
for (i in 1:length(models)) {
y.hat <- c(0)
y <- data[, 'pm10']
model <- models[[i]]
for (j in 1:nrow(data)) {
line <- data[j, ]
y.hat <- c(y.hat, as.numeric(model(line)))
}
y.hat <- as.vector(y.hat[2:length(y.hat)])
results[i, 2:7] <- calculate.errors(y, y.hat)
}
results
}
test.models()
## model RMSE RMSE.sd MRAE MRAE.sd
## 1 Initial Model 9.014027 37.33669 0.4369932 4.103933
## 2 Final Model 7.907665 24.83887 0.3825579 3.890614
## 3 Both model 8.651640 34.88786 0.4041621 3.864177
## 4 Weighted Average Model 8.218265 31.76697 0.4086551 4.082448
## 5 Parameter Interpolation Model 8.218265 31.76697 0.4086551 4.082448
## MAQCE MAQCE.sd
## 1 0.02417405 0.1535940
## 2 0.02324428 0.1506831
## 3 0.02212856 0.1471062
## 4 0.02132275 0.1444624
## 5 0.02132275 0.1444624