Load data
library(RPostgreSQL)
## Loading required package: DBI
library(rpart)
library(reshape2)
library(zoo)
## Warning: package 'zoo' was built under R version 3.4.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ggplot2)
library(MASS)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.4.3
# Set working directory
setwd("c:\\Users\\Hammo\\Documents\\smoke emissions project\\code_final")
# Database login details (not in GitHub repo)
load("access.Rda")
# Connect to database
p <- dbDriver("PostgreSQL")
con <- dbConnect(p,
user=access$user,
password=access$pwd,
host='penap-data.dyndns.org',
dbname='cona',
port=5432)
# Load ECan data (stored locally)
ecan <- read.csv('RangioraWinter2016.csv',stringsAsFactors=FALSE)
names(ecan) <- c('date','time','wind.speed','wind.dir','wind.dir.std','wind.speed.std',
'wind.max','co','temp.ground','temp.2m','temp.6m','pm10',
'pm2.5','pm.course')
ecan$date <- as.POSIXct(ecan$date,format = '%m/%d/%Y %H:%M', tz='Etc/GMT-12')
ecan$time <- NULL # this is included in date
# ECan site has siteid=18
odin.raw <-dbGetQuery(con,"SELECT d.recordtime AT TIME ZONE 'NZST' AS date,
i.serialn as serial, s.name as label,
d.value::numeric as val
FROM data.fixed_data as d,
admin.sensor as s, admin.instrument as i
WHERE s.id = d.sensorid
AND s.instrumentid = i.id
AND i.name = 'ODIN-SD-3'
AND d.siteid = 18
AND (i.serialn = 'ODIN-109'
OR i.serialn = 'ODIN-102'
OR i.serialn = 'ODIN-105'
OR i.serialn = 'ODIN-107'
OR i.serialn = 'ODIN-108'
OR i.serialn = 'ODIN-109'
OR i.serialn = 'ODIN-113'
OR i.serialn = 'ODIN-115')
AND ((d.recordtime BETWEEN '2016-07-11 00:00 NZST'
AND '2016-07-26 00:00 NZST')
OR (d.recordtime BETWEEN '2016-10-03 00:00 NZST'
AND '2016-10-19 00:00 NZST'))
AND NOT (d.recordtime BETWEEN '2016-09-25 02:00 NZST'
AND '2016-09-25 03:00 NZST')
AND (s.name = 'PM2.5'
OR s.name = 'Temperature'
OR s.name = 'RH')
ORDER BY date;")
# 02:01 to 02:59 on 2016-09-25 is excluded because this is when the switch to NZST occurs.
# Without the exclusion clause the timestamps will truncate at 'day'.
# Truncate seconds from timestamps
odin.raw$date <- trunc(odin.raw$date,'min')
odin.raw$date <- as.POSIXct(odin.raw$date) # the above converted it to POSIXlt
# Fix daylight savings bug
ds.time <- as.POSIXct('09/25/2016 02:00',format = '%m/%d/%Y %H:%M', tz='Etc/GMT-12')
odin.raw[odin.raw$date > ds.time, 'date'] <-
odin.raw[odin.raw$date > ds.time, 'date'] + 60*60
Preprocessing
serials <- unique(odin.raw$serial)
odins <- list()
for (i in 1:length(serials)) {
this.odin <- dcast(odin.raw[odin.raw$serial==serials[i],],
date~label, value.var='val', fun.aggregate=mean)
# Make column names easier to type
names(this.odin) <- tolower(names(this.odin))
names(this.odin)[which(names(this.odin)=="temperature")] <- "temp"
odins[[i]] <- this.odin
}
# Change ODIN serials from ODIN-x to odin.x
tidy.serial <- function(serialn) paste0('odin.',substring(serialn,6,8))
names(odins) <- sapply(serials,FUN=tidy.serial)
data <- ecan
for (j in 1:length(odins)) {
odin <- odins[[j]]
# Make sure that there is a odin entry for each minute
dates <- data.frame(date=seq(odin$date[1],odin$date[nrow(odin)],by='mins'))
odin <- merge(odin,dates,by="date",all=TRUE)
# Change the ODIN measurements from 1 min averages to 1 hour averages
odin.zoo <- zoo( odin[,2:ncol(odin)] )
odin.roll.apply <- rollapply(odin.zoo,width=60,by=1,FUN=mean,align="left",na.rm=TRUE)
odin.avgs <- odin[1:nrow(odin.roll.apply),]
odin.avgs[,2:ncol(odin)] <- odin.roll.apply
# Take timestamps at the end of the hour average following ECan convention
odin.avgs$date <- odin.avgs$date + 60*60
# Purge NaNs
odin.avgs <- odin.avgs[!is.nan(odin.avgs$pm2.5),]
odin.avgs <- odin.avgs[!is.nan(odin.avgs$rh),]
odin.avgs <- odin.avgs[!is.nan(odin.avgs$temp),]
names(odin.avgs) <- c('date',paste0(names(odin.avgs),'.',names(odins)[j])[2:ncol(odin)])
data <- merge(data,odin.avgs,by='date',all=FALSE)
}
Build Models
# september is a good place to split
split.time <- as.POSIXct('09/01/2016 00:00',format = '%m/%d/%Y %H:%M',tz='Etc/GMT-12')
# Initial colocation dataset
data.init <- data[data$date < split.time,]
nrow(data.init)
## [1] 1627
data.init$date[nrow(data.init)] - data.init$date[1]
## Time difference of 11.29167 days
# Final colocation dataset
data.fin <- data[data$date > split.time,]
nrow(data.fin)
## [1] 1923
data.fin$date[nrow(data.fin)] - data.fin$date[1]
## Time difference of 13.34722 days
# Check the length
nrow(data.init)
## [1] 1627
nrow(data.fin)
## [1] 1923
coefs.init <- data.frame(odin.n=NA,cnst=NA,pm2.5=NA,temp=NA,rh=NA,
cnst.std=NA,pm2.5.std=NA,temp.std=NA,rh.std=NA)
coefs.fin <- data.frame(odin.n=NA,cnst=NA,pm2.5=NA,temp=NA,rh=NA,
cnst.std=NA,pm2.5.std=NA,temp.std=NA,rh.std=NA)
for (i in 1:length(odins)) {
odin.name <- names(odins)[i]
odin.vars <- paste0(c('pm2.5','temp','rh'),'.',odin.name)
eval(parse(text=paste0('form <- pm2.5 ~ ', paste(odin.vars,collapse='+'))))
# Initial coefficients
this.init.coefs <- summary(rlm(form,data=data.init))$coefficients
coefs.init[i,'odin.n'] <- as.integer(substring(odin.name,6,8)) - 0.1
coefs.init[i,2:9] <- this.init.coefs[1:8]
# Final coefficients
this.fin.coefs <- summary(rlm(form,data=data.fin))$coefficients
coefs.fin[i,'odin.n'] <- as.integer(substring(odin.name,6,8)) + 0.1
coefs.fin[i,2:9] <- this.fin.coefs[1:8]
}
coefs.init
## odin.n cnst pm2.5 temp rh cnst.std pm2.5.std
## 1 101.9 5.385112 1.274892 -0.3731924 -0.05342281 1.0769485 0.010191425
## 2 114.9 3.053476 1.318904 -0.2757516 -0.03236068 0.8229067 0.008937167
## 3 104.9 6.771927 1.254425 -0.5558927 -0.05936919 1.2784072 0.012369187
## 4 106.9 10.183507 1.241222 -0.4025078 -0.10945085 0.6794777 0.009174396
## 5 107.9 3.376590 1.327997 -0.3395274 -0.02275101 1.0729985 0.010845325
## 6 112.9 10.507258 1.244782 -0.3472956 -0.13126911 1.3523739 0.010200203
## 7 108.9 10.263123 1.254841 -0.3455526 -0.13522022 0.9095272 0.008923287
## temp.std rh.std
## 1 0.02643081 0.016665638
## 2 0.02042690 0.013685608
## 3 0.03957089 0.014596203
## 4 0.02284619 0.007998736
## 5 0.02644621 0.016604718
## 6 0.03350578 0.017154031
## 7 0.02545377 0.012958829
coefs.fin
## odin.n cnst pm2.5 temp rh cnst.std pm2.5.std
## 1 102.1 2.453794 1.207059 -0.05443423 -0.04665073 0.4376679 0.01233895
## 2 115.1 2.906617 1.040813 -0.06858186 -0.04011833 0.4811796 0.01340476
## 3 105.1 3.542073 1.472806 -0.16302941 -0.04400750 0.4922109 0.01814309
## 4 107.1 4.824349 1.032089 -0.13519127 -0.04720401 0.3387323 0.01109148
## 5 108.1 3.177196 1.088191 -0.08911648 -0.04123586 0.4158096 0.01140521
## 6 113.1 1.673053 1.262171 -0.04389513 -0.03522526 0.5172140 0.01584501
## 7 109.1 4.108633 1.010685 -0.10276929 -0.05187981 0.3848702 0.01058287
## temp.std rh.std
## 1 0.01039767 0.005904185
## 2 0.01231195 0.006544463
## 3 0.01503848 0.004658749
## 4 0.01040366 0.003572953
## 5 0.01088112 0.005178867
## 6 0.01323102 0.006024191
## 7 0.01176577 0.004634177
# Percentage changes in each coefficient
coefs.init.mat <- matrix(c(coefs.init$cnst,coefs.init$pm2.5,
coefs.init$temp,coefs.init$rh),
nrow=nrow(coefs.init),ncol=4,
dimnames=list(serials,c('Constant','PM2.5','Temperature','RH')))
coefs.fin.mat <- matrix(c(coefs.fin$cnst,coefs.fin$pm2.5,
coefs.fin$temp,coefs.fin$rh),
nrow=nrow(coefs.fin),ncol=4,
dimnames=list(serials,c('Constant','PM2.5','Temperature','RH')))
coefs.change <- (coefs.fin.mat - coefs.init.mat)/coefs.init.mat * 100
round(coefs.change,2)
## Constant PM2.5 Temperature RH
## ODIN-102 -54.43 -5.32 -85.41 -12.68
## ODIN-115 -4.81 -21.08 -75.13 23.97
## ODIN-105 -47.69 17.41 -70.67 -25.87
## ODIN-107 -52.63 -16.85 -66.41 -56.87
## ODIN-108 -5.91 -18.06 -73.75 81.25
## ODIN-113 -84.08 1.40 -87.36 -73.17
## ODIN-109 -59.97 -19.46 -70.26 -61.63
# Mean percentage change for each coefficient across ODINs
round(colMeans(coefs.change),2)
## Constant PM2.5 Temperature RH
## -44.22 -8.85 -75.57 -17.86
Plot Coefficients
theme_set(theme_gray(base_size = 16))
# Prepare for plotting
coefs.init$time <- rep('init',nrow(coefs.init))
coefs.fin$time <- rep('fin',nrow(coefs.fin))
coefs <- rbind(coefs.init,coefs.fin)
# Plot constant terms
ggplot(coefs, aes(x=odin.n,y=cnst,colour=time)) +
geom_errorbar(aes(ymin=cnst-cnst.std*2, ymax=cnst+cnst.std*2)) +
geom_point() +
scale_colour_manual(values=c("red","blue"),name="Colocation",
labels=c(fin="Final",init="Initial")) +
ylab(expression("Constant")) +
xlab("ODIN Serial") +
ggtitle("Model Parameters for Initial and Final 2016 Colocation")

# Plot pm2.5.odin.x coefficients
ggplot(coefs, aes(x=odin.n,y=pm2.5,colour=time)) +
geom_errorbar(aes(ymin=pm2.5-pm2.5.std*2, ymax=pm2.5+pm2.5.std*2)) +
geom_point() +
scale_colour_manual(values=c("red","blue"),name="Colocation",
labels=c(fin="Final",init="Initial")) +
ylab(expression(PM[2.5]*"-ODIN Coefficient ("*mu*g^-1*m^3*")")) +
xlab("ODIN Serial") +
ggtitle("Model Parameters for Initial and Final 2016 Colocation")

# Plot rh.odin.x coefficients
ggplot(coefs, aes(x=odin.n,y=rh,colour=time)) +
geom_errorbar(aes(ymin=rh-rh.std*2, ymax=rh+rh.std*2)) +
geom_point() +
scale_colour_manual(values=c("red","blue"),name="Colocation",
labels=c(fin="Final",init="Initial")) +
ggtitle("Model Parameters for Initial and Final 2016 Colocation") +
ylab("RH-ODIN Coefficient (percentage"~point^-1*")") +
xlab("ODIN Serial")

# Plot temp.odin.x coefficients
ggplot(coefs, aes(x=odin.n,y=temp,colour=time)) +
geom_errorbar(aes(ymin=temp-temp.std*2, ymax=temp+temp.std*2)) +
geom_point() +
scale_colour_manual(values=c("red","blue"),name="Colocation",
labels=c(fin="Final",init="Initial")) +
ylab(expression("Temp-ODIN Coefficient ("*degree*C^-1*")")) +
xlab("ODIN Serial") +
ggtitle("Model Parameters for Initial and Final 2016 Colocation")

coefs.init <- data.frame(odin.n=NA,cnst=NA,pm2.5=NA,temp=NA,rh=NA,
cnst.std=NA,pm2.5.std=NA,temp.std=NA,rh.std=NA)
coefs.fin <- data.frame(odin.n=NA,cnst=NA,pm2.5=NA,temp=NA,rh=NA,
cnst.std=NA,pm2.5.std=NA,temp.std=NA,rh.std=NA)
for (i in 1:length(odins)) {
odin.name <- names(odins)[i]
odin.vars <- paste0(c('pm2.5','temp','rh'),'.',odin.name)
eval(parse(text=paste0('form <- pm2.5 ~ ', paste(odin.vars,collapse='+'))))
bin.breaks <- seq(-10,100,length.out=50)
pm.bin.init <- cut(data.init[, odin.vars[1]], breaks=bin.breaks)
pm.bin.fin <- cut(data.fin[, odin.vars[1]], breaks=bin.breaks)
# The number of occurances in each bin
pm.num.bin.init <- summary(pm.bin.init)
pm.num.bin.fin <- summary(pm.bin.fin)
# The density of pm concentratinos from the two samples (binned)
pm.dist.init <- summary(pm.bin.init)/sum(pm.num.bin.init)
pm.dist.fin <- summary(pm.bin.fin)/sum(pm.num.bin.fin)
# The desired distribution (binned)
#pm.dist.out <- data.frame(pm=pm.dist.1)# * pm.dist.2)
# Calculate weights
weights.fin <- pm.dist.init[pm.bin.fin]
weights.init <- pm.dist.fin[pm.bin.init]
# Initial coefficients
this.init.coefs <- summary(rlm(form,data=data.init,weights=weights.init))$coefficients
coefs.init[i,'odin.n'] <- as.integer(substring(odin.name,6,8)) - 0.1
coefs.init[i,2:9] <- this.init.coefs[1:8]
# Final coefficients
this.fin.coefs <- summary(rlm(form,data=data.fin,weights=weights.fin))$coefficients
coefs.fin[i,'odin.n'] <- as.integer(substring(odin.name,6,8)) + 0.1
coefs.fin[i,2:9] <- this.fin.coefs[1:8]
}
theme_set(theme_gray(base_size = 16))
# Prepare for plotting
coefs.init$time <- rep('init',nrow(coefs.init))
coefs.fin$time <- rep('fin',nrow(coefs.fin))
coefs <- rbind(coefs.init,coefs.fin)
# Plot constant terms
ggplot(coefs, aes(x=odin.n,y=cnst,colour=time)) +
geom_errorbar(aes(ymin=cnst-cnst.std*2, ymax=cnst+cnst.std*2)) +
geom_point() +
scale_colour_manual(values=c("red","blue"),name="Colocation",
labels=c(fin="Final",init="Initial")) +
ylab(expression("Constant")) +
xlab("ODIN Serial") +
ggtitle("Model Parameters for Initial and Final 2016 Colocation")

# Plot pm2.5.odin.x coefficients
ggplot(coefs, aes(x=odin.n,y=pm2.5,colour=time)) +
geom_errorbar(aes(ymin=pm2.5-pm2.5.std*2, ymax=pm2.5+pm2.5.std*2)) +
geom_point() +
scale_colour_manual(values=c("red","blue"),name="Colocation",
labels=c(fin="Final",init="Initial")) +
ylab(expression(PM[2.5]*"-ODIN Coefficient ("*mu*g^-1*m^3*")")) +
xlab("ODIN Serial") +
ggtitle("Model Parameters for Initial and Final 2016 Colocation")

# Plot rh.odin.x coefficients
ggplot(coefs, aes(x=odin.n,y=rh,colour=time)) +
geom_errorbar(aes(ymin=rh-rh.std*2, ymax=rh+rh.std*2)) +
geom_point() +
scale_colour_manual(values=c("red","blue"),name="Colocation",
labels=c(fin="Final",init="Initial")) +
ggtitle("Model Parameters for Initial and Final 2016 Colocation") +
ylab("RH-ODIN Coefficient (percentage"~point^-1*")") +
xlab("ODIN Serial")

# Plot temp.odin.x coefficients
ggplot(coefs, aes(x=odin.n,y=temp,colour=time)) +
geom_errorbar(aes(ymin=temp-temp.std*2, ymax=temp+temp.std*2)) +
geom_point() +
scale_colour_manual(values=c("red","blue"),name="Colocation",
labels=c(fin="Final",init="Initial")) +
ylab(expression("Temp-ODIN Coefficient ("*degree*C^-1*")")) +
xlab("ODIN Serial") +
ggtitle("Model Parameters for Initial and Final 2016 Colocation")
