Load data
library(RPostgreSQL)
## Loading required package: DBI
library(rpart)
library(reshape2)
# library(openair)
# library(lubridate)
library(zoo)
## Warning: package 'zoo' was built under R version 3.4.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ggplot2)
# Set working directory
setwd("c:\\Users\\Hammo\\Documents\\smoke emissions project\\repo")
# Database login details (not in GitHub repo)
load("access.Rda")
# Connect to database
p <- dbDriver("PostgreSQL")
con <- dbConnect(p,
user=access$user,
password=access$pwd,
host='penap-data.dyndns.org',
dbname='cona',
port=5432)
# Load ECan data (stored locally)
ecan <- read.csv('RangioraWinter2016.csv',stringsAsFactors=FALSE)
names(ecan) <- c('date','time','wind.speed','wind.dir','wind.dir.std','wind.speed.std',
'wind.max','co','temp.ground','temp.2m','temp.6m','pm10',
'pm2.5','pm.course')
ecan$date <- as.POSIXct(ecan$date,format = '%m/%d/%Y %H:%M', tz='Etc/GMT-12')
# ecan$date <- ecan$date - 60*60
ecan$time <- NULL # this is included in date
# There are some NA's in PM2.5. Set these to zero.
# ecan$pm2.5[which(is.na(ecan$pm2.5))] <- 0
head(ecan)
## date wind.speed wind.dir wind.dir.std wind.speed.std
## 1 2016-04-01 00:10:00 0.72 353.9 43.0 0.24
## 2 2016-04-01 00:20:00 0.71 319.6 19.7 0.27
## 3 2016-04-01 00:30:00 0.63 305.4 4.0 0.36
## 4 2016-04-01 00:40:00 1.39 302.9 11.5 0.33
## 5 2016-04-01 00:50:00 0.91 312.4 21.3 0.45
## 6 2016-04-01 01:00:00 1.12 311.6 15.0 0.42
## wind.max co temp.ground temp.2m temp.6m pm10 pm2.5 pm.course
## 1 1.35 0.16 -0.01 11.12 11.13 12.54 4.61 7.93
## 2 1.35 0.15 0.03 11.08 11.06 13.05 5.92 7.13
## 3 1.55 0.14 0.06 11.05 11.00 13.91 6.98 6.92
## 4 2.25 0.10 0.08 11.04 10.95 14.46 6.18 8.28
## 5 2.25 0.10 0.10 10.99 10.90 15.17 5.59 9.58
## 6 2.25 0.09 0.09 10.99 10.90 15.56 6.12 9.45
odin.raw <-dbGetQuery(con,"SELECT d.recordtime AT TIME ZONE 'NZST' AS date,
i.serialn as serial, s.name as label,
d.value::numeric as val
FROM data.fixed_data as d,
admin.sensor as s, admin.instrument as i
WHERE s.id = d.sensorid
AND s.instrumentid = i.id
AND i.name = 'ODIN-SD-3'
AND d.siteid = 18
AND (i.serialn = 'ODIN-109')
AND (d.recordtime BETWEEN '2016-09-25 01:50 NZST'
AND '2016-09-25 03:10 NZST')
AND NOT (d.recordtime BETWEEN '2016-09-25 02:00 NZST'
AND '2016-09-25 03:00 NZST')
AND (s.name = 'PM2.5'
OR s.name = 'PM10'
OR s.name = 'Temperature'
OR s.name = 'RH')
ORDER BY date;")
odin.raw$date <- trunc(odin.raw$date,'min') # get rid of the seconds
odin.raw$date <- as.POSIXct(odin.raw$date) # the above converted it to POSIXlt
odin <- dcast(odin.raw, date~label, value.var='val', fun.aggregate=mean)
x <- odin$date
min <- x[2] - x[1]
y <- ecan[ecan$date >= x[1]-10*min & ecan$date <= x[length(x)]+10*min,'date']
x
## [1] "2016-09-25 01:50:00 NZST" "2016-09-25 01:51:00 NZST"
## [3] "2016-09-25 01:52:00 NZST" "2016-09-25 01:53:00 NZST"
## [5] "2016-09-25 01:54:00 NZST" "2016-09-25 01:55:00 NZST"
## [7] "2016-09-25 01:56:00 NZST" "2016-09-25 01:57:00 NZST"
## [9] "2016-09-25 01:58:00 NZST" "2016-09-25 01:59:00 NZST"
## [11] "2016-09-25 03:00:00 NZDT" "2016-09-25 03:01:00 NZDT"
## [13] "2016-09-25 03:02:00 NZDT" "2016-09-25 03:03:00 NZDT"
## [15] "2016-09-25 03:04:00 NZDT" "2016-09-25 03:05:00 NZDT"
## [17] "2016-09-25 03:06:00 NZDT" "2016-09-25 03:07:00 NZDT"
## [19] "2016-09-25 03:08:00 NZDT" "2016-09-25 03:09:00 NZDT"
y
## [1] "2016-09-25 01:40:00 +12" "2016-09-25 01:50:00 +12"
## [3] "2016-09-25 02:00:00 +12" "2016-09-25 02:10:00 +12"
i <- x[11] # 3:00 NZDT
j <- y[3] # 2:00 GMT+12
i == j # TRUE
## [1] TRUE
# everything seems fine
# Load ODIN measurements from June 2017
siteid = 18 # ecan site
odin.raw <-dbGetQuery(con,"SELECT d.recordtime AT TIME ZONE 'NZST' AS date,
i.serialn as serial, s.name as label,
d.value::numeric as val
FROM data.fixed_data as d,
admin.sensor as s, admin.instrument as i
WHERE s.id = d.sensorid
AND s.instrumentid = i.id
AND i.name = 'ODIN-SD-3'
AND d.siteid = 18
AND (i.serialn = 'ODIN-109')
AND (d.recordtime BETWEEN '2016-01-01 00:00 NZST'
AND '2017-01-01 00:00 NZST')
AND NOT (d.recordtime BETWEEN '2016-09-25 02:00 NZST'
AND '2016-09-25 03:00 NZST')
AND (s.name = 'PM2.5'
OR s.name = 'PM10'
OR s.name = 'Temperature'
OR s.name = 'RH')
ORDER BY date;")
# Time periods including timestamps between 2016-09-25 02:01 and 02:59 (daylight savings)
# daylight savings should also have weirdness in 3 april (3am went to 2am)
# data starts in
# will truncate timestamps at 'day'.
odin.raw$date <- trunc(odin.raw$date,'min') # get rid of the seconds
odin.raw$date <- as.POSIXct(odin.raw$date) # the above converted it to POSIXlt
# Create a data frame for each ODIN
serials <- unique(odin.raw$serial)
odins <- list()
for (i in 1:length(serials)) {
this.odin <- dcast(odin.raw[odin.raw$serial==serials[i],],
date~label, value.var='val', fun.aggregate=mean)
# Make column names easier to type
names(this.odin) <- tolower(names(this.odin))
names(this.odin)[which(names(this.odin)=="temperature")] <- "temp"
odins[[i]] <- this.odin
}
# change ODIN serials from ODIN-101 to odin.101
# the hyphens cause problems by being interpreted as minuses
tidy.serial <- function(serialn) paste0('odin.',substring(serialn,6,8))
names(odins) <- sapply(serials,FUN=tidy.serial)
odin <- odins[[1]]
# change the ODIN measurements from 1 min averages to 1 hour averages
# make sure that there is a odin entry for each minute
dates <- data.frame(date=seq(odin$date[1],odin$date[nrow(odin)],by='mins'))
odin <- merge(odin,dates,by="date",all=TRUE)
if (nrow(odin) != nrow(dates))
print("Oh noes! Gaps in ODIN data!")
# if there are gaps merge "dates" with odin
odin.zoo <- zoo( odin[,2:ncol(odin)] ) # remove date because rollapply only works on numerics
odin.roll.apply <- rollapply(odin.zoo,width=60,by=1,FUN=mean,align="left",na.rm=TRUE)
# NB I don't know what the 'align' parameter does
# Throw out data for which there is not a complete hour window
odin.avgs <- odin[1:nrow(odin.roll.apply),]
odin.avgs[,2:ncol(odin)] <- odin.roll.apply
# NB if there are gaps in the data then this will be screwed up
odin.avgs$date <- odin.avgs$date + 60*60 # take timestamps at the end of the hour average following ECan convention
# split into months
odin <- odin.avgs
odin$datel <- as.POSIXlt(odin$date)
months <- unique(odin$datel$mon)
# Find best correlation between ECan and ODIN
offsets <- -60:60 # 0:120
blank <- rep(NA,length(offset))
for (month in months) {
odin.month <- odin[odin$datel$mon==month,]
corr.df <- data.frame(offset=offsets,temp=blank,pm2.5=blank,pm10=blank)
for (i in 1:nrow(corr.df)) {
offset <- corr.df[i,'offset']
odin.i <- odin.month
odin.i$date <- odin.i$date + offset*60
names(odin.i) <- c('date',paste0(names(odin.i),'.odin.109')[2:ncol(odin)])
data.i <- merge(odin.i,ecan,by='date',all=FALSE)
corr.df[i,'temp'] <- cor(data.i$temp.2m,data.i$temp.odin.109,
method="pearson",use="complete.obs")
corr.df[i,'pm2.5'] <- cor(data.i$pm2.5,data.i$pm2.5.odin.109,
method="pearson",use="complete.obs")
corr.df[i,'pm10'] <- cor(data.i$pm10,data.i$pm10.odin.109,
method="pearson",use="complete.obs")
}
corr.melt <- melt(corr.df,id='offset')
plt <- ggplot(corr.melt) +
geom_line(aes(x=offset,y=value,colour=variable)) +
scale_colour_manual(values=c("red","green","blue")) +
ylab("Pearson Correlation") +
xlab("ODIN-109 Clock Offset (m)") +
ggtitle(paste("Correlation Between ECan and ODIN Measurements in month",month+1))
print(plt)
}




