library(RPostgreSQL)
## Loading required package: DBI
library(rpart)
library(reshape2)
library(openair)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
# Set working directory
setwd("c:\\Users\\Hammo\\Documents\\smoke emissions project\\repo")
# Database login details (not in GitHub repo)
load("access.Rda")
# Connect to database
p <- dbDriver("PostgreSQL")
con <- dbConnect(p,
user=access$user,
password=access$pwd,
host='penap-data.dyndns.org',
dbname='cona',
port=5432)
# Load all PM2.5 data from ODINs in June 2017
siteid = 18 # ecan site
odins.fresh <- dbGetQuery(con," SELECT d.recordtime AT TIME ZONE 'NZT' AS date,
s.name as label, i.serialn as serialn,
d.value::numeric as val
FROM data.fixed_data as d, admin.sensor as s,
admin.instrument as i
WHERE s.id = d.sensorid AND
s.instrumentid = i.id AND
i.name = 'ODIN-SD-3' AND
d.siteid = 18 AND
(d.recordtime BETWEEN '2017-06-01 00:00' AND
'2017-07-01 00:00') AND
(s.name = 'PM2.5' OR s.name = 'Temperature')
ORDER BY date;")
serialns <- unique(odins.fresh$serialn)
odins.list <- list()
for (n in serialns) {
odin.n.long <- odins.fresh[which(odins.fresh$serialn==n),]
odin.n <- dcast(odin.n.long, date~label, value.var='val')
odin.n$temp <- odin.n$Temperature
odin.n$Temperature <- NULL
odin.n$pm2.5 <- odin.n$PM2.5
odin.n$PM2.5 <- NULL
odin.name <- paste0('odin.',substr(n,6,8)) # change ODIN-404 to odin.404
odins.list[[odin.name]] <- odin.n
}
# Load ECan data (stored locally)
ecan <- read.csv('EcanJune2017.csv',stringsAsFactors=FALSE)
names(ecan) <- c('date','wind.max','wind.speed','wind.direction','co','pm10',
'pm2.5','pm.course','temp.2m','temp.6m','temp.ground')
ecan$date <- as.POSIXct(ecan$date,format = '%m/%d/%Y %H:%M', tz='Etc/GMT-12')
# There are some NA's in PM2.5. Set these to zero.
ecan$pm2.5[which(is.na(ecan$pm2.5))] <- 0
merge.odin <- function(data,odin.1m,odin.name='odin.404') {
# merge an.odin with data using data$temp
# data.date is taken as the official date
# odin.1m has measurements from each 1m
best.cor <- -Inf
best.i <- NA
best.offset <- NA
# To merge ODIN (1 min sample rate) with ECan (10 min sample rate)
# we need to average ODIN over ten minutes. Thus need to try starting
# averages at 1m, at 2m, ...
for (i in 1:9) {
odin.10m <- timeAverage(odin.1m[i:nrow(odin.1m),],avg.time='10 min')
base.offset <- which(data$date > min(odin.1m$date) - 60*60)[1] # first odin measurement minus one hour
result <- align.odin.10m(data$temp[base.offset:nrow(data)],odin.10m$temp)
offset <- result[1]
correlation <- result[2]
if (correlation > best.cor) {
best.cor <- correlation
best.i <- i
best.offset <- offset + base.offset
}
}
# Do the merge
odin.10m <- timeAverage(odin.1m[best.i:nrow(odin.1m),], avg.time='10 min')
time.diff <- odin.10m$date[1] - data$date[best.offset]
odin.10m$date <- data$date[best.offset:(best.offset+nrow(odin.10m)-1)]
# rename the odin vars so that the have the serialn
for (var.name in names(odin.10m)) {
if (var.name=='date')
next
new.var.name <- paste0(var.name,'.',odin.name)
odin.10m[,new.var.name] <- odin.10m[,var.name]
odin.10m[,var.name] <- NULL
}
# print out the time difference
print(odin.name)
print(time.diff)
merge(data,odin.10m,by='date')
}
align.odin.10m <- function(ecan.series,odin.series,
corr.plot=FALSE,var.name="") {
lag_test=ccf(ecan.series,
odin.series,
na.action=na.pass,
lag.max=200,
plot=corr.plot,
type='correlation',
ylab='Correlation',
main=paste(var.name,'correlation as function of clock lag'))
offset <- lag_test$lag[which.max(lag_test$acf)]
correlation <- max(lag_test$acf)
c(offset,correlation)
}
# Merge each of the ODINs
row.names <- names(odins.list)
col.names <- c('temp','pm2.5')
best.offsets <- matrix(NA,nrow=length(row.names),ncol=length(col.names),
dimnames=list(row.names,col.names))
data <- ecan
data$temp <- data$temp.2m
for (odin.name in names(odins.list)) {
odin.data <- odins.list[[odin.name]]
data <- merge.odin(data,odin.data,odin.name)
# Make sure the fit is right
for (var.name in col.names) {
odin.var.name <- paste0(var.name,'.',odin.name)
eval(parse(text=paste0('offset <- align.odin.10m(data$',var.name,
',','data$',odin.var.name,
',corr.plot=TRUE,var.name="',
odin.var.name,'")[1]')))
best.offsets[odin.name,var.name] <- offset
}
}
## [1] "odin.123"
## Time difference of -7 mins
## [1] "odin.102"
## Time difference of -8 mins
## [1] "odin.121"
## Time difference of -8 mins
## [1] "odin.106"
## Time difference of -8 mins
## [1] "odin.122"
## Time difference of -8 mins
## [1] "odin.107"
## Time difference of -8 mins
## [1] "odin.101"
## Time difference of -8 mins
## [1] "odin.116"
## Time difference of -8 mins
## [1] "odin.115"
## Time difference of -8 mins
## [1] "odin.120"
## Time difference of -7 mins
## [1] "odin.100"
## Time difference of -17 mins
## [1] "odin.109"
## Time difference of 6 mins
# Should be all zeros (more or less)
best.offsets
## temp pm2.5
## odin.123 0 0
## odin.102 0 1
## odin.121 0 1
## odin.106 0 0
## odin.122 0 0
## odin.107 0 1
## odin.101 0 0
## odin.116 0 2
## odin.115 0 1
## odin.120 0 1
## odin.100 0 1
## odin.109 0 3
Here we develop a model based on ODIN-109
Here we look at how well the ODIN-109 model can predict TEOM measurements based on data from other oDINs.