Load data

library(RPostgreSQL)
## Loading required package: DBI
library(rpart)
library(reshape2)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ggplot2)

# Set working directory
setwd("\\\\files.auckland.ac.nz\\MyHome\\Documents")

# Database login details (not in GitHub repo)
load("access.Rda")

# Connect to database
p <- dbDriver("PostgreSQL")
con <- dbConnect(p,
                 user=access$user,
                 password=access$pwd,
                 host='penap-data.dyndns.org',
                 dbname='cona',
                 port=5432)
# Load ECan data (stored locally)
ecan <- read.csv('RangioraWinter2016.csv',stringsAsFactors=FALSE)
names(ecan) <- c('date','time','wind.speed','wind.dir','wind.dir.std','wind.speed.std',
                 'wind.max','co','temp.ground','temp.2m','temp.6m','pm10',
                 'pm2.5','pm.course')
ecan$date <- as.POSIXct(ecan$date,format = '%m/%d/%Y %H:%M', tz='Etc/GMT-12')
ecan$time <- NULL # this is included in date
# Load ODIN measurements from June 2017
siteid = 18 # ecan site
odin.raw <-dbGetQuery(con,"SELECT d.recordtime AT TIME ZONE 'NZST' AS date,
                            i.serialn as serial, s.name as label,
                            d.value::numeric as val
                           FROM data.fixed_data as d,  
                            admin.sensor as s, admin.instrument as i
                           WHERE s.id = d.sensorid
                            AND s.instrumentid = i.id
                            AND i.name = 'ODIN-SD-3'
                            AND d.siteid = 18
                            AND (d.recordtime BETWEEN '2016-01-01 00:00 NZST'
                             AND '2017-01-01 00:00 NZST')
                            AND NOT (d.recordtime BETWEEN '2016-09-25 02:00 NZST'
                             AND '2016-09-25 03:00 NZST')
                            AND (s.name = 'PM2.5'
                             OR s.name = 'PM10'
                             OR s.name = 'Temperature'
                             OR s.name = 'RH')
                           ORDER BY date;") 

# 2016-09-25 02:01 to 02:59 is excluded because this is when the NZST to NZDT change occurs.
# Including this leads to truncation of dates at "day".

odin.raw$date <- trunc(odin.raw$date,'min') # get rid of the seconds
odin.raw$date <- as.POSIXct(odin.raw$date) # the above converted it to POSIXlt

# this helps somehow
ds.time <- as.POSIXct('09/25/2016 02:00',format = '%m/%d/%Y %H:%M', tz='Etc/GMT-12')
odin.raw[odin.raw$date > ds.time, 'date'] <- odin.raw[odin.raw$date > ds.time, 'date'] + 60*60
# Create a data frame for each ODIN
serials <- unique(odin.raw$serial)
odins <- list()
for (i in 1:length(serials)) {
  this.odin <- dcast(odin.raw[odin.raw$serial==serials[i],], 
                     date~label, value.var='val', fun.aggregate=mean)
  # Make column names easier to type
  names(this.odin) <- tolower(names(this.odin))
  names(this.odin)[which(names(this.odin)=="temperature")] <- "temp"
  odins[[i]] <- this.odin
}
# change ODIN serials from ODIN-101 to odin.101
# the hyphens cause problems by being interpreted as minuses
tidy.serial <- function(serialn) paste0('odin.',substring(serialn,6,8))
names(odins) <- sapply(serials,FUN=tidy.serial)
best.offsets <- data.frame(odin=NA,pm2.5=NA,pm10=NA,temp=NA)
# correlation between measurements with zero offset
corr0 <- data.frame(odin=NA,pm2.5=NA,pm10=NA,temp=NA,no=NA) 

for (j in 1:length(odins)) {
  odin <- odins[[j]]
  # make sure that there is a odin entry for each minute
  dates <- data.frame(date=seq(odin$date[1],odin$date[nrow(odin)],by='mins'))
  odin <- merge(odin,dates,by="date",all=TRUE)
  if (nrow(odin) != nrow(dates))
    print("Oh noes! Gaps in ODIN data!")
  # if there are gaps merge "dates" with odin
  
  # change the ODIN measurements from 1 min averages to 1 hour averages
  odin.zoo <- zoo( odin[,2:ncol(odin)] ) # remove date because rollapply only works on numerics
  odin.roll.apply <- rollapply(odin.zoo,width=60,by=1,FUN=mean,align="left",na.rm=TRUE)
  # NB I don't know what the 'align' parameter does
  # Throw out data for which there is not a complete hour window
  odin.avgs <- odin[1:nrow(odin.roll.apply),]
  odin.avgs[,2:ncol(odin)] <- odin.roll.apply
  # NB if there are gaps in the data then this will be screwed up
  odin.avgs$date <- odin.avgs$date + 60*60 # take timestamps at the end of the hour average following ECan convention
  
  # Find best correlation between ECan and ODIN
  
  offsets <- -120:120
  blank <- rep(NA,length(offset))
  corr.df <- data.frame(offset=offsets,temp=blank,pm2.5=blank,pm10=blank)
  for (i in 1:nrow(corr.df)) {
    offset <- corr.df[i,'offset']
    odin.i <- odin.avgs
    odin.i$date <- odin.i$date + offset*60
    names(odin.i) <- c('date',paste0(names(odin.i),'.odin')[2:ncol(odin)])
    data.i <- merge(odin.i,ecan,by='date',all=FALSE)
    corr.df[i,'temp'] <- cor(data.i$temp.2m,data.i$temp.odin,
                             method="pearson",use="complete.obs")
    corr.df[i,'pm2.5'] <- cor(data.i$pm2.5,data.i$pm2.5.odin,
                             method="pearson",use="complete.obs")
    corr.df[i,'pm10'] <- cor(data.i$pm10,data.i$pm10.odin,
                             method="pearson",use="complete.obs")
  }
  
  odin.name <- names(odins)[j]
  best.offsets[j,'odin'] <- odin.name
  best.offsets[j,'temp'] <- corr.df[which.max(corr.df[,'temp']),'offset']
  best.offsets[j,'pm2.5'] <- corr.df[which.max(corr.df[,'pm2.5']),'offset']
  best.offsets[j,'pm10'] <- corr.df[which.max(corr.df[,'pm10']),'offset']
  
  print(best.offsets[j,])
  
  no <- length(which(!is.na(data.i$pm2.5.odin)))
  
  corr0[j,'odin'] <- odin.name
  corr0[j,'temp'] <- corr.df[corr.df$offset==0,'temp']
  corr0[j,'pm2.5'] <- corr.df[corr.df$offset==0,'pm2.5']
  corr0[j,'pm10'] <- corr.df[corr.df$offset==0,'pm10']
  corr0[j,'no'] <- no
  
  print(corr0[j,])
  
  print(paste("Number of colocated data points",no))
  
  corr.melt <- melt(corr.df,id='offset')
  the.plot <- ggplot(corr.melt) + 
                geom_line(aes(x=offset,y=value,colour=variable)) +
                scale_colour_manual("Quantity",values=c("red","green","blue"),
                                    labels=c("Temp",expression(PM[2.5]),
                                             expression(PM[10]))) + 
                ylab("Pearson Correlation Coefficient") +
                xlab("Offset (minutes)") +
                theme(legend.position = c(0.5, 0.2),plot.title = element_text(hjust = 0.5)) +
                ggtitle(paste0("Correlation Between ECan and ODIN-",
                              substr(odin.name,6,8)," Measurements"))
  print(the.plot)
  
  best.offset <- corr.df[which.max(corr.df[,'pm2.5']),'offset'] 

  odin.bo <- odin.avgs # bo = best offset
  odin.bo$date <- odin.bo$date + best.offset*60
  names(odin.bo) <- c('date',paste0(names(odin.bo),'.',names(odins)[j])[2:ncol(odin)])
}
##       odin pm2.5 pm10 temp
## 1 odin.102    14   11  -11
##       odin     pm2.5      pm10      temp   no
## 1 odin.102 0.9146071 0.8819723 0.9617869 6835
## [1] "Number of colocated data points 6835"

##       odin pm2.5 pm10 temp
## 2 odin.103    14   10  -19
##       odin     pm2.5      pm10      temp   no
## 2 odin.103 0.9430904 0.9428132 0.9298409 1891
## [1] "Number of colocated data points 1891"

##       odin pm2.5 pm10 temp
## 3 odin.115    11    9  -18
##       odin     pm2.5      pm10      temp   no
## 3 odin.115 0.9158897 0.8700058 0.9649036 6582
## [1] "Number of colocated data points 6582"

##       odin pm2.5 pm10 temp
## 4 odin.105    11    7  -22
##       odin     pm2.5      pm10      temp   no
## 4 odin.105 0.8838205 0.8538734 0.9753616 6795
## [1] "Number of colocated data points 6795"

##       odin pm2.5 pm10 temp
## 5 odin.107     8    5  -20
##       odin     pm2.5      pm10      temp   no
## 5 odin.107 0.9231427 0.8783852 0.9721451 6314
## [1] "Number of colocated data points 6314"

##       odin pm2.5 pm10 temp
## 6 odin.108    15   10  -15
##       odin     pm2.5      pm10      temp   no
## 6 odin.108 0.9211281 0.9169186 0.9667915 3787
## [1] "Number of colocated data points 3787"

##       odin pm2.5 pm10 temp
## 7 odin.114     9    7   -9
##       odin     pm2.5      pm10      temp   no
## 7 odin.114 0.9326945 0.8958748 0.9631179 3030
## [1] "Number of colocated data points 3030"

##       odin pm2.5 pm10 temp
## 8 odin.111    11    7  -19
##       odin     pm2.5      pm10      temp   no
## 8 odin.111 0.9392077 0.9330807 0.9616866 1547
## [1] "Number of colocated data points 1547"

##       odin pm2.5 pm10 temp
## 9 odin.113     9    8  -19
##       odin     pm2.5      pm10      temp   no
## 9 odin.113 0.9058122 0.8468821 0.9710828 6795
## [1] "Number of colocated data points 6795"

##        odin pm2.5 pm10 temp
## 10 odin.109    10    9  -14
##        odin     pm2.5      pm10      temp    no
## 10 odin.109 0.9121186 0.8721985 0.9743062 16341
## [1] "Number of colocated data points 16341"

##        odin pm2.5 pm10 temp
## 11 odin.112    14   10  -10
##        odin     pm2.5      pm10      temp   no
## 11 odin.112 0.7569478 0.6577005 0.9707741 8263
## [1] "Number of colocated data points 8263"

##        odin pm2.5 pm10 temp
## 12 odin.106    15   16  -14
##        odin     pm2.5     pm10      temp   no
## 12 odin.106 0.7519309 0.643659 0.9673419 2370
## [1] "Number of colocated data points 2370"

##        odin pm2.5 pm10 temp
## 13 odin.117    15    7  -20
##        odin     pm2.5      pm10      temp   no
## 13 odin.117 0.7827743 0.5965291 0.9669879 4936
## [1] "Number of colocated data points 4936"

##        odin pm2.5 pm10 temp
## 14 odin.101    13   13  -16
##        odin    pm2.5     pm10      temp   no
## 14 odin.101 0.815526 0.625987 0.9687175 4912
## [1] "Number of colocated data points 4912"

##        odin pm2.5 pm10 temp
## 15 odin.110    15   12  -11
##        odin     pm2.5      pm10      temp   no
## 15 odin.110 0.6966228 0.5195689 0.9704972 4912
## [1] "Number of colocated data points 4912"

##        odin pm2.5 pm10 temp
## 16 odin.104    12    2   -4
##        odin     pm2.5      pm10      temp   no
## 16 odin.104 0.8181087 0.6556272 0.9692645 4510
## [1] "Number of colocated data points 4510"

##        odin pm2.5 pm10 temp
## 17 odin.100    14    6  -10
##        odin     pm2.5      pm10      temp   no
## 17 odin.100 0.6541987 0.3224695 0.9719693 2466
## [1] "Number of colocated data points 2466"

best.offsets
##        odin pm2.5 pm10 temp
## 1  odin.102    14   11  -11
## 2  odin.103    14   10  -19
## 3  odin.115    11    9  -18
## 4  odin.105    11    7  -22
## 5  odin.107     8    5  -20
## 6  odin.108    15   10  -15
## 7  odin.114     9    7   -9
## 8  odin.111    11    7  -19
## 9  odin.113     9    8  -19
## 10 odin.109    10    9  -14
## 11 odin.112    14   10  -10
## 12 odin.106    15   16  -14
## 13 odin.117    15    7  -20
## 14 odin.101    13   13  -16
## 15 odin.110    15   12  -11
## 16 odin.104    12    2   -4
## 17 odin.100    14    6  -10
summary(best.offsets$pm2.5)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    8.00   11.00   13.00   12.35   14.00   15.00
summary(best.offsets$pm10)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   7.000   9.000   8.765  10.000  16.000
summary(best.offsets$temp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -22.00  -19.00  -15.00  -14.76  -11.00   -4.00
corr0
##        odin     pm2.5      pm10      temp    no
## 1  odin.102 0.9146071 0.8819723 0.9617869  6835
## 2  odin.103 0.9430904 0.9428132 0.9298409  1891
## 3  odin.115 0.9158897 0.8700058 0.9649036  6582
## 4  odin.105 0.8838205 0.8538734 0.9753616  6795
## 5  odin.107 0.9231427 0.8783852 0.9721451  6314
## 6  odin.108 0.9211281 0.9169186 0.9667915  3787
## 7  odin.114 0.9326945 0.8958748 0.9631179  3030
## 8  odin.111 0.9392077 0.9330807 0.9616866  1547
## 9  odin.113 0.9058122 0.8468821 0.9710828  6795
## 10 odin.109 0.9121186 0.8721985 0.9743062 16341
## 11 odin.112 0.7569478 0.6577005 0.9707741  8263
## 12 odin.106 0.7519309 0.6436590 0.9673419  2370
## 13 odin.117 0.7827743 0.5965291 0.9669879  4936
## 14 odin.101 0.8155260 0.6259870 0.9687175  4912
## 15 odin.110 0.6966228 0.5195689 0.9704972  4912
## 16 odin.104 0.8181087 0.6556272 0.9692645  4510
## 17 odin.100 0.6541987 0.3224695 0.9719693  2466