Load data
library(RPostgreSQL)
## Loading required package: DBI
library(rpart)
library(reshape2)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ggplot2)
# Set working directory
setwd("\\\\files.auckland.ac.nz\\MyHome\\Documents")
# Database login details (not in GitHub repo)
load("access.Rda")
# Connect to database
p <- dbDriver("PostgreSQL")
con <- dbConnect(p,
user=access$user,
password=access$pwd,
host='penap-data.dyndns.org',
dbname='cona',
port=5432)
# Load ECan data (stored locally)
ecan <- read.csv('RangioraWinter2016.csv',stringsAsFactors=FALSE)
names(ecan) <- c('date','time','wind.speed','wind.dir','wind.dir.std','wind.speed.std',
'wind.max','co','temp.ground','temp.2m','temp.6m','pm10',
'pm2.5','pm.course')
ecan$date <- as.POSIXct(ecan$date,format = '%m/%d/%Y %H:%M', tz='Etc/GMT-12')
ecan$time <- NULL # this is included in date
# Load ODIN measurements from June 2017
siteid = 18 # ecan site
odin.raw <-dbGetQuery(con,"SELECT d.recordtime AT TIME ZONE 'NZST' AS date,
i.serialn as serial, s.name as label,
d.value::numeric as val
FROM data.fixed_data as d,
admin.sensor as s, admin.instrument as i
WHERE s.id = d.sensorid
AND s.instrumentid = i.id
AND i.name = 'ODIN-SD-3'
AND d.siteid = 18
AND (d.recordtime BETWEEN '2016-01-01 00:00 NZST'
AND '2017-01-01 00:00 NZST')
AND NOT (d.recordtime BETWEEN '2016-09-25 02:00 NZST'
AND '2016-09-25 03:00 NZST')
AND (s.name = 'PM2.5'
OR s.name = 'PM10'
OR s.name = 'Temperature'
OR s.name = 'RH')
ORDER BY date;")
# 2016-09-25 02:01 to 02:59 is excluded because this is when the NZST to NZDT change occurs.
# Including this leads to truncation of dates at "day".
odin.raw$date <- trunc(odin.raw$date,'min') # get rid of the seconds
odin.raw$date <- as.POSIXct(odin.raw$date) # the above converted it to POSIXlt
# this helps somehow
ds.time <- as.POSIXct('09/25/2016 02:00',format = '%m/%d/%Y %H:%M', tz='Etc/GMT-12')
odin.raw[odin.raw$date > ds.time, 'date'] <- odin.raw[odin.raw$date > ds.time, 'date'] + 60*60
# Create a data frame for each ODIN
serials <- unique(odin.raw$serial)
odins <- list()
for (i in 1:length(serials)) {
this.odin <- dcast(odin.raw[odin.raw$serial==serials[i],],
date~label, value.var='val', fun.aggregate=mean)
# Make column names easier to type
names(this.odin) <- tolower(names(this.odin))
names(this.odin)[which(names(this.odin)=="temperature")] <- "temp"
odins[[i]] <- this.odin
}
# change ODIN serials from ODIN-101 to odin.101
# the hyphens cause problems by being interpreted as minuses
tidy.serial <- function(serialn) paste0('odin.',substring(serialn,6,8))
names(odins) <- sapply(serials,FUN=tidy.serial)
best.offsets <- data.frame(odin=NA,pm2.5=NA,pm10=NA,temp=NA)
# correlation between measurements with zero offset
corr0 <- data.frame(odin=NA,pm2.5=NA,pm10=NA,temp=NA,no=NA)
for (j in 1:length(odins)) {
odin <- odins[[j]]
# make sure that there is a odin entry for each minute
dates <- data.frame(date=seq(odin$date[1],odin$date[nrow(odin)],by='mins'))
odin <- merge(odin,dates,by="date",all=TRUE)
if (nrow(odin) != nrow(dates))
print("Oh noes! Gaps in ODIN data!")
# if there are gaps merge "dates" with odin
# change the ODIN measurements from 1 min averages to 1 hour averages
odin.zoo <- zoo( odin[,2:ncol(odin)] ) # remove date because rollapply only works on numerics
odin.roll.apply <- rollapply(odin.zoo,width=60,by=1,FUN=mean,align="left",na.rm=TRUE)
# NB I don't know what the 'align' parameter does
# Throw out data for which there is not a complete hour window
odin.avgs <- odin[1:nrow(odin.roll.apply),]
odin.avgs[,2:ncol(odin)] <- odin.roll.apply
# NB if there are gaps in the data then this will be screwed up
odin.avgs$date <- odin.avgs$date + 60*60 # take timestamps at the end of the hour average following ECan convention
# Find best correlation between ECan and ODIN
offsets <- -120:120
blank <- rep(NA,length(offset))
corr.df <- data.frame(offset=offsets,temp=blank,pm2.5=blank,pm10=blank)
for (i in 1:nrow(corr.df)) {
offset <- corr.df[i,'offset']
odin.i <- odin.avgs
odin.i$date <- odin.i$date + offset*60
names(odin.i) <- c('date',paste0(names(odin.i),'.odin')[2:ncol(odin)])
data.i <- merge(odin.i,ecan,by='date',all=FALSE)
corr.df[i,'temp'] <- cor(data.i$temp.2m,data.i$temp.odin,
method="pearson",use="complete.obs")
corr.df[i,'pm2.5'] <- cor(data.i$pm2.5,data.i$pm2.5.odin,
method="pearson",use="complete.obs")
corr.df[i,'pm10'] <- cor(data.i$pm10,data.i$pm10.odin,
method="pearson",use="complete.obs")
}
odin.name <- names(odins)[j]
best.offsets[j,'odin'] <- odin.name
best.offsets[j,'temp'] <- corr.df[which.max(corr.df[,'temp']),'offset']
best.offsets[j,'pm2.5'] <- corr.df[which.max(corr.df[,'pm2.5']),'offset']
best.offsets[j,'pm10'] <- corr.df[which.max(corr.df[,'pm10']),'offset']
print(best.offsets[j,])
no <- length(which(!is.na(data.i$pm2.5.odin)))
corr0[j,'odin'] <- odin.name
corr0[j,'temp'] <- corr.df[corr.df$offset==0,'temp']
corr0[j,'pm2.5'] <- corr.df[corr.df$offset==0,'pm2.5']
corr0[j,'pm10'] <- corr.df[corr.df$offset==0,'pm10']
corr0[j,'no'] <- no
print(corr0[j,])
print(paste("Number of colocated data points",no))
corr.melt <- melt(corr.df,id='offset')
the.plot <- ggplot(corr.melt) +
geom_line(aes(x=offset,y=value,colour=variable)) +
scale_colour_manual("Quantity",values=c("red","green","blue"),
labels=c("Temp",expression(PM[2.5]),
expression(PM[10]))) +
ylab("Pearson Correlation Coefficient") +
xlab("Offset (minutes)") +
theme(legend.position = c(0.5, 0.2),plot.title = element_text(hjust = 0.5)) +
ggtitle(paste0("Correlation Between ECan and ODIN-",
substr(odin.name,6,8)," Measurements"))
print(the.plot)
best.offset <- corr.df[which.max(corr.df[,'pm2.5']),'offset']
odin.bo <- odin.avgs # bo = best offset
odin.bo$date <- odin.bo$date + best.offset*60
names(odin.bo) <- c('date',paste0(names(odin.bo),'.',names(odins)[j])[2:ncol(odin)])
}
## odin pm2.5 pm10 temp
## 1 odin.102 14 11 -11
## odin pm2.5 pm10 temp no
## 1 odin.102 0.9146071 0.8819723 0.9617869 6835
## [1] "Number of colocated data points 6835"

## odin pm2.5 pm10 temp
## 2 odin.103 14 10 -19
## odin pm2.5 pm10 temp no
## 2 odin.103 0.9430904 0.9428132 0.9298409 1891
## [1] "Number of colocated data points 1891"

## odin pm2.5 pm10 temp
## 3 odin.115 11 9 -18
## odin pm2.5 pm10 temp no
## 3 odin.115 0.9158897 0.8700058 0.9649036 6582
## [1] "Number of colocated data points 6582"

## odin pm2.5 pm10 temp
## 4 odin.105 11 7 -22
## odin pm2.5 pm10 temp no
## 4 odin.105 0.8838205 0.8538734 0.9753616 6795
## [1] "Number of colocated data points 6795"

## odin pm2.5 pm10 temp
## 5 odin.107 8 5 -20
## odin pm2.5 pm10 temp no
## 5 odin.107 0.9231427 0.8783852 0.9721451 6314
## [1] "Number of colocated data points 6314"

## odin pm2.5 pm10 temp
## 6 odin.108 15 10 -15
## odin pm2.5 pm10 temp no
## 6 odin.108 0.9211281 0.9169186 0.9667915 3787
## [1] "Number of colocated data points 3787"

## odin pm2.5 pm10 temp
## 7 odin.114 9 7 -9
## odin pm2.5 pm10 temp no
## 7 odin.114 0.9326945 0.8958748 0.9631179 3030
## [1] "Number of colocated data points 3030"

## odin pm2.5 pm10 temp
## 8 odin.111 11 7 -19
## odin pm2.5 pm10 temp no
## 8 odin.111 0.9392077 0.9330807 0.9616866 1547
## [1] "Number of colocated data points 1547"

## odin pm2.5 pm10 temp
## 9 odin.113 9 8 -19
## odin pm2.5 pm10 temp no
## 9 odin.113 0.9058122 0.8468821 0.9710828 6795
## [1] "Number of colocated data points 6795"

## odin pm2.5 pm10 temp
## 10 odin.109 10 9 -14
## odin pm2.5 pm10 temp no
## 10 odin.109 0.9121186 0.8721985 0.9743062 16341
## [1] "Number of colocated data points 16341"

## odin pm2.5 pm10 temp
## 11 odin.112 14 10 -10
## odin pm2.5 pm10 temp no
## 11 odin.112 0.7569478 0.6577005 0.9707741 8263
## [1] "Number of colocated data points 8263"

## odin pm2.5 pm10 temp
## 12 odin.106 15 16 -14
## odin pm2.5 pm10 temp no
## 12 odin.106 0.7519309 0.643659 0.9673419 2370
## [1] "Number of colocated data points 2370"

## odin pm2.5 pm10 temp
## 13 odin.117 15 7 -20
## odin pm2.5 pm10 temp no
## 13 odin.117 0.7827743 0.5965291 0.9669879 4936
## [1] "Number of colocated data points 4936"

## odin pm2.5 pm10 temp
## 14 odin.101 13 13 -16
## odin pm2.5 pm10 temp no
## 14 odin.101 0.815526 0.625987 0.9687175 4912
## [1] "Number of colocated data points 4912"

## odin pm2.5 pm10 temp
## 15 odin.110 15 12 -11
## odin pm2.5 pm10 temp no
## 15 odin.110 0.6966228 0.5195689 0.9704972 4912
## [1] "Number of colocated data points 4912"

## odin pm2.5 pm10 temp
## 16 odin.104 12 2 -4
## odin pm2.5 pm10 temp no
## 16 odin.104 0.8181087 0.6556272 0.9692645 4510
## [1] "Number of colocated data points 4510"

## odin pm2.5 pm10 temp
## 17 odin.100 14 6 -10
## odin pm2.5 pm10 temp no
## 17 odin.100 0.6541987 0.3224695 0.9719693 2466
## [1] "Number of colocated data points 2466"

best.offsets
## odin pm2.5 pm10 temp
## 1 odin.102 14 11 -11
## 2 odin.103 14 10 -19
## 3 odin.115 11 9 -18
## 4 odin.105 11 7 -22
## 5 odin.107 8 5 -20
## 6 odin.108 15 10 -15
## 7 odin.114 9 7 -9
## 8 odin.111 11 7 -19
## 9 odin.113 9 8 -19
## 10 odin.109 10 9 -14
## 11 odin.112 14 10 -10
## 12 odin.106 15 16 -14
## 13 odin.117 15 7 -20
## 14 odin.101 13 13 -16
## 15 odin.110 15 12 -11
## 16 odin.104 12 2 -4
## 17 odin.100 14 6 -10
summary(best.offsets$pm2.5)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.00 11.00 13.00 12.35 14.00 15.00
summary(best.offsets$pm10)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 7.000 9.000 8.765 10.000 16.000
summary(best.offsets$temp)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -22.00 -19.00 -15.00 -14.76 -11.00 -4.00
corr0
## odin pm2.5 pm10 temp no
## 1 odin.102 0.9146071 0.8819723 0.9617869 6835
## 2 odin.103 0.9430904 0.9428132 0.9298409 1891
## 3 odin.115 0.9158897 0.8700058 0.9649036 6582
## 4 odin.105 0.8838205 0.8538734 0.9753616 6795
## 5 odin.107 0.9231427 0.8783852 0.9721451 6314
## 6 odin.108 0.9211281 0.9169186 0.9667915 3787
## 7 odin.114 0.9326945 0.8958748 0.9631179 3030
## 8 odin.111 0.9392077 0.9330807 0.9616866 1547
## 9 odin.113 0.9058122 0.8468821 0.9710828 6795
## 10 odin.109 0.9121186 0.8721985 0.9743062 16341
## 11 odin.112 0.7569478 0.6577005 0.9707741 8263
## 12 odin.106 0.7519309 0.6436590 0.9673419 2370
## 13 odin.117 0.7827743 0.5965291 0.9669879 4936
## 14 odin.101 0.8155260 0.6259870 0.9687175 4912
## 15 odin.110 0.6966228 0.5195689 0.9704972 4912
## 16 odin.104 0.8181087 0.6556272 0.9692645 4510
## 17 odin.100 0.6541987 0.3224695 0.9719693 2466