All the ODINs

library(RPostgreSQL)
## Loading required package: DBI
library(rpart)
library(reshape2)
library(openair)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
# Set working directory
setwd("c:\\Users\\Hammo\\Documents\\smoke emissions project\\repo")
# Database login details (not in GitHub repo)
load("access.Rda")
# Connect to database
p <- dbDriver("PostgreSQL")
con <- dbConnect(p,
                 user=access$user,
                 password=access$pwd,
                 host='penap-data.dyndns.org',
                 dbname='cona',
                 port=5432)

# Load all PM2.5 data from ODINs in June 2017
siteid = 18 # ecan site
odins.fresh <- dbGetQuery(con," SELECT d.recordtime AT TIME ZONE 'NZT' AS date,
                         s.name as label, i.serialn as serialn, 
                         d.value::numeric as val
                         FROM data.fixed_data as d,  admin.sensor as s,
                              admin.instrument as i
                         WHERE s.id = d.sensorid AND
                         s.instrumentid = i.id AND
                         i.name = 'ODIN-SD-3' AND
                         d.siteid = 18 AND
                         (d.recordtime BETWEEN '2017-06-01 00:00' AND
                         '2017-07-01 00:00') AND
                         (s.name = 'PM2.5' OR s.name = 'Temperature')
                         ORDER BY date;")
serialns <- unique(odins.fresh$serialn)
odins.list <- list()
for (n in serialns) {
  odin.n.long <- odins.fresh[which(odins.fresh$serialn==n),]
  odin.n <- dcast(odin.n.long, date~label, value.var='val')
  odin.n$temp <- odin.n$Temperature
  odin.n$Temperature <- NULL
  odin.n$pm2.5 <- odin.n$PM2.5
  odin.n$PM2.5 <- NULL
  odin.name <- paste0('odin.',substr(n,6,8)) # change ODIN-404 to odin.404
  odins.list[[odin.name]] <- odin.n
}

# Load ECan data (stored locally)
ecan <- read.csv('EcanJune2017.csv',stringsAsFactors=FALSE)
names(ecan) <- c('date','wind.max','wind.speed','wind.direction','co','pm10',
                 'pm2.5','pm.course','temp.2m','temp.6m','temp.ground')
ecan$date <- as.POSIXct(ecan$date,format = '%m/%d/%Y %H:%M', tz='Etc/GMT-12')
# There are some NA's in PM2.5. Set these to zero.
ecan$pm2.5[which(is.na(ecan$pm2.5))] <- 0

Synchronizing Clocks

merge.odin <- function(data,odin.1m,odin.name='odin.404') {
  # merge an.odin with data using data$temp
  # data.date is taken as the official date
  # odin.1m has measurements from each 1m
  best.cor <- -Inf
  best.i <- NA
  best.offset <- NA
  # To merge ODIN (1 min sample rate) with ECan (10 min sample rate)
  # we need to average ODIN over ten minutes. Thus need to try starting
  # averages at 1m, at 2m, ...
  for (i in 1:9) {
    odin.10m <- timeAverage(odin.1m[i:nrow(odin.1m),],avg.time='10 min')
    base.offset <- which(data$date > min(odin.1m$date) - 60*60)[1] # first odin measurement minus one hour
    result <- align.odin.10m(data$temp[base.offset:nrow(data)],odin.10m$temp)
    offset <- result[1]
    correlation <- result[2]
    if (correlation > best.cor) {
      best.cor <- correlation
      best.i <- i
      best.offset <- offset + base.offset
    }
  }
  # Do the merge
  odin.10m <- timeAverage(odin.1m[best.i:nrow(odin.1m),], avg.time='10 min')
  time.diff <- odin.10m$date[1] - data$date[best.offset]
  odin.10m$date <- data$date[best.offset:(best.offset+nrow(odin.10m)-1)]
  # rename the odin vars so that the have the serialn
  for (var.name in names(odin.10m)) {
    if (var.name=='date')
      next
    new.var.name <- paste0(var.name,'.',odin.name)
    odin.10m[,new.var.name] <- odin.10m[,var.name]
    odin.10m[,var.name] <- NULL
  }
  # print out the time difference
  print(odin.name)
  print(time.diff)
  merge(data,odin.10m,by='date') 
}

align.odin.10m <- function(ecan.series,odin.series,
                           corr.plot=FALSE,var.name="") {
  lag_test=ccf(ecan.series,
               odin.series,
               na.action=na.pass,
               lag.max=200,
               plot=corr.plot,
               type='correlation',
               ylab='Correlation',
               main=paste(var.name,'correlation as function of clock lag'))
  offset <- lag_test$lag[which.max(lag_test$acf)]
  correlation <- max(lag_test$acf)
  c(offset,correlation)
}

# Merge each of the ODINs
row.names <- names(odins.list)
col.names <- c('temp','pm2.5')
best.offsets <- matrix(NA,nrow=length(row.names),ncol=length(col.names),
                       dimnames=list(row.names,col.names))
data <- ecan
data$temp <- data$temp.2m
for (odin.name in names(odins.list)) {
  odin.data <- odins.list[[odin.name]]
  data <- merge.odin(data,odin.data,odin.name)
  # Make sure the fit is right
  for (var.name in col.names) {
    odin.var.name <- paste0(var.name,'.',odin.name)
    eval(parse(text=paste0('offset <- align.odin.10m(data$',var.name,
                           ',','data$',odin.var.name,
                           ',corr.plot=TRUE,var.name="',
                           odin.var.name,'")[1]')))
    best.offsets[odin.name,var.name] <- offset
    
  }
}
## [1] "odin.123"
## Time difference of -7 mins

## [1] "odin.102"
## Time difference of -8 mins

## [1] "odin.121"
## Time difference of -8 mins

## [1] "odin.106"
## Time difference of -8 mins

## [1] "odin.122"
## Time difference of -8 mins

## [1] "odin.107"
## Time difference of -8 mins

## [1] "odin.101"
## Time difference of -8 mins

## [1] "odin.116"
## Time difference of -8 mins

## [1] "odin.115"
## Time difference of -8 mins

## [1] "odin.120"
## Time difference of -7 mins

## [1] "odin.100"
## Time difference of -17 mins

## [1] "odin.109"
## Time difference of 6 mins

# Should be all zeros (more or less)
best.offsets
##          temp pm2.5
## odin.123    0     0
## odin.102    0     1
## odin.121    0     1
## odin.106    0     0
## odin.122    0     0
## odin.107    0     1
## odin.101    0     0
## odin.116    0     2
## odin.115    0     1
## odin.120    0     1
## odin.100    0     1
## odin.109    0     3

Develop the First Model

Here we develop a model based on ODIN-109

Transfer to other ODINs

Here we look at how well the ODIN-109 model can predict TEOM measurements based on data from other oDINs.