library(RPostgreSQL)
## Loading required package: DBI
library(rpart)
library(reshape2)
# library(openair)
# library(lubridate)
library(zoo)
## Warning: package 'zoo' was built under R version 3.4.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ggplot2)
# Set working directory
setwd("c:\\Users\\Hammo\\Documents\\smoke emissions project\\repo")
# Database login details (not in GitHub repo)
load("access.Rda")
# Connect to database
p <- dbDriver("PostgreSQL")
con <- dbConnect(p,
user=access$user,
password=access$pwd,
host='penap-data.dyndns.org',
dbname='cona',
port=5432)
# Load ECan data (stored locally)
ecan <- read.csv('RangioraWinter2016.csv',stringsAsFactors=FALSE)
names(ecan) <- c('date','time','wind.speed','wind.dir','wind.dir.std','wind.speed.std',
'wind.max','co','temp.ground','temp.2m','temp.6m','pm10',
'pm2.5','pm.course')
ecan$date <- as.POSIXct(ecan$date,format = '%m/%d/%Y %H:%M', tz='Etc/GMT-12')
# ecan$date <- ecan$date - 60*60
ecan$time <- NULL # this is included in date
# There are some NA's in PM2.5. Set these to zero.
# ecan$pm2.5[which(is.na(ecan$pm2.5))] <- 0
head(ecan)
## date wind.speed wind.dir wind.dir.std wind.speed.std
## 1 2016-04-01 00:10:00 0.72 353.9 43.0 0.24
## 2 2016-04-01 00:20:00 0.71 319.6 19.7 0.27
## 3 2016-04-01 00:30:00 0.63 305.4 4.0 0.36
## 4 2016-04-01 00:40:00 1.39 302.9 11.5 0.33
## 5 2016-04-01 00:50:00 0.91 312.4 21.3 0.45
## 6 2016-04-01 01:00:00 1.12 311.6 15.0 0.42
## wind.max co temp.ground temp.2m temp.6m pm10 pm2.5 pm.course
## 1 1.35 0.16 -0.01 11.12 11.13 12.54 4.61 7.93
## 2 1.35 0.15 0.03 11.08 11.06 13.05 5.92 7.13
## 3 1.55 0.14 0.06 11.05 11.00 13.91 6.98 6.92
## 4 2.25 0.10 0.08 11.04 10.95 14.46 6.18 8.28
## 5 2.25 0.10 0.10 10.99 10.90 15.17 5.59 9.58
## 6 2.25 0.09 0.09 10.99 10.90 15.56 6.12 9.45
odin.raw <-dbGetQuery(con,"SELECT d.recordtime AT TIME ZONE 'NZST' AS date,
i.serialn as serial, s.name as label,
d.value::numeric as val
FROM data.fixed_data as d,
admin.sensor as s, admin.instrument as i
WHERE s.id = d.sensorid
AND s.instrumentid = i.id
AND i.name = 'ODIN-SD-3'
AND d.siteid = 18
AND (i.serialn = 'ODIN-109')
AND (d.recordtime BETWEEN '2016-09-10 00:00 NZST'
AND '2016-10-10 00:00 NZST')
AND NOT (d.recordtime BETWEEN '2016-09-25 02:00 NZST'
AND '2016-09-25 03:00 NZST')
AND (s.name = 'PM2.5'
OR s.name = 'PM10'
OR s.name = 'Temperature'
OR s.name = 'RH')
ORDER BY date;")
odin.raw$date <- trunc(odin.raw$date,'min') # get rid of the seconds
odin.raw$date <- as.POSIXct(odin.raw$date) # the above converted it to POSIXlt
odin <- dcast(odin.raw, date~label, value.var='val', fun.aggregate=mean)
# Make column names easier to type
names(odin) <- tolower(names(odin))
names(odin)[which(names(odin)=="temperature")] <- "temp"
dates <- data.frame(date=seq(odin$date[1],odin$date[nrow(odin)],by='mins'))
odin.thing <- merge(odin,dates,by="date",all=TRUE)
if (nrow(odin) != nrow(dates))
print("Oh noes! Gaps in ODIN data!")
## [1] "Oh noes! Gaps in ODIN data!"
odin.zoo <- zoo( odin.thing[,2:ncol(odin.thing)] ) # remove date because rollapply only works on numerics
odin.roll.apply <- rollapply(odin.zoo,width=60,by=1,FUN=mean,align="left",na.rm=TRUE)
odin.avgs <- odin.thing[1:nrow(odin.roll.apply),]
odin.avgs[,2:ncol(odin)] <- odin.roll.apply
odin.avgs[,'date0'] <- odin.avgs[,'date'] # the 'real' date
odin.avgs$date <- odin.avgs$date + 60*60
odin <- odin.avgs
ds.time <- as.POSIXct('09/25/2016 02:00',format = '%m/%d/%Y %H:%M', tz='Etc/GMT-12')
window <- 60*60*24*7
# odin.before <- odin[odin$date<ds.time,]
# odin.after <- odin[odin$date>ds.time,]
offsets <- -60:120 # 0:120
blank <- rep(NA,length(offset))
offset.diagram <- function() {
best.offsets <- data.frame()
window.slides <- seq(-window,window,60*60*6)
for (j in 1:length(window.slides)) {
corr.df <- data.frame(offset=offsets,temp=blank,pm2.5=blank,pm10=blank)
slide <- window.slides[j]
odin.j <- odin[odin$date > ds.time-window/2+slide & odin$date < ds.time+window/2+slide,]
for (i in 1:nrow(corr.df)) {
offset <- corr.df[i,'offset']
odin.i <- odin.j
odin.i$date <- odin.i$date + offset*60
names(odin.i) <- c('date',paste0(names(odin.i),'.odin.109')[2:ncol(odin)])
data.i <- merge(odin.i,ecan,by='date',all=FALSE)
corr.df[i,'temp'] <- cor(data.i$temp.2m,data.i$temp.odin.109,
method="pearson",use="complete.obs")
corr.df[i,'pm2.5'] <- cor(data.i$pm2.5,data.i$pm2.5.odin.109,
method="pearson",use="complete.obs")
corr.df[i,'pm10'] <- cor(data.i$pm10,data.i$pm10.odin.109,
method="pearson",use="complete.obs")
}
corr.melt <- melt(corr.df,id='offset')
# plt <- ggplot(corr.melt) +
# geom_line(aes(x=offset,y=value,colour=variable)) +
# scale_colour_manual(values=c("red","green","blue")) +
# ylab("Pearson Correlation") +
# xlab("ODIN-109 Clock Offset (m)") +
# ggtitle(paste("Correlation Between ECan and ODIN Measurements"))
# print(plt)
best.offsets[j,'date'] <- odin.j[1,'date']
best.offsets[j,'pm2.5'] <- corr.df[which.max(corr.df[,'pm2.5']),'offset']
best.offsets[j,'pm10'] <- corr.df[which.max(corr.df[,'pm10']),'offset']
best.offsets[j,'temp'] <- corr.df[which.max(corr.df[,'temp']),'offset']
}
off.melt <- melt(best.offsets,id='date')
plt <- ggplot(off.melt) +
geom_line(aes(x=date,y=value,colour=variable)) +
scale_colour_manual(values=c("red","green","blue")) +
ylab("Best Offset (m)") +
xlab("Date") +
ggtitle(paste("Best Offsets with Moving Window around DST transition"))
print(plt)
}
offset.diagram()
Try adding an hour to ODIN dates after daylight savings.
odin[odin$date > ds.time, 'date'] <- odin[odin$date > ds.time, 'date'] + 60*60
offset.diagram()