library(RPostgreSQL)
## Loading required package: DBI
library(rpart)
library(reshape2)
library(zoo)
## Warning: package 'zoo' was built under R version 3.4.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ggplot2)
library(mblm)
library(MASS)
library(rpart)

# Set working directory
setwd("c:\\Users\\Hammo\\Documents\\smoke emissions project\\repo")

# Database login details (not in GitHub repo)
load("access.Rda")

# Connect to database
p <- dbDriver("PostgreSQL")
con <- dbConnect(p,
                 user=access$user,
                 password=access$pwd,
                 host='penap-data.dyndns.org',
                 dbname='cona',
                 port=5432)
# Load ECan data (stored locally)
ecan <- read.csv('RangioraWinter2016.csv',stringsAsFactors=FALSE)
names(ecan) <- c('date','time','wind.speed','wind.dir','wind.dir.std','wind.speed.std',
                 'wind.max','co','temp.ground','temp.2m','temp.6m','pm10',
                 'pm2.5','pm.course')
ecan$date <- as.POSIXct(ecan$date,format = '%m/%d/%Y %H:%M', tz='Etc/GMT-12')
ecan$time <- NULL # this is included in date
# Load ODIN data
# Siteid = 18 is ECan site
odin.raw <-dbGetQuery(con,"SELECT d.recordtime AT TIME ZONE 'NZST' AS date,
                            i.serialn as serial, s.name as label,
                            d.value::numeric as val
                           FROM data.fixed_data as d,  
                            admin.sensor as s, admin.instrument as i
                           WHERE s.id = d.sensorid
                            AND s.instrumentid = i.id
                            AND i.name = 'ODIN-SD-3'
                            AND d.siteid = 18
                            AND (i.serialn = 'ODIN-109')
                            AND (d.recordtime BETWEEN '2016-01-01 00:00 NZST'
                             AND '2017-01-01 00:00 NZST')
                            AND NOT (d.recordtime BETWEEN '2016-09-25 02:00 NZST'
                             AND '2016-09-25 03:00 NZST')
                            AND (s.name = 'PM2.5'
                             OR s.name = 'Temperature'
                             OR s.name = 'RH')
                           ORDER BY date;") 

# Time periods including timestamps between 2016-09-25 02:01 and 02:59 
# will truncate timestamps at 'day'.

# Get rid of the seconds
odin.raw$date <- trunc(odin.raw$date,'min')
# The above converted it to POSIXlt
odin.raw$date <- as.POSIXct(odin.raw$date)

# Fix daylight savings bug
ds.time <- as.POSIXct('09/25/2016 02:00',format = '%m/%d/%Y %H:%M', tz='Etc/GMT-12')
odin.raw[odin.raw$date > ds.time, 'date'] <- odin.raw[odin.raw$date > ds.time, 'date'] + 60*60
# Get data for one ODIN in wide format
odin <- dcast(odin.raw, date~label, value.var='val', fun.aggregate=mean)

# Make column names easier to type
names(odin) <- tolower(names(odin))
names(odin)[which(names(odin)=="temperature")] <- "temp"

# Change columns from "temp"" to "temp.odin.100", etc
serial <- '109'
names(odin) <- paste0(names(odin),'.odin.',serial)

# The 'date.odin.100' back to just 'date'
date.odin.100 <- paste0('date.odin.',serial)
odin[,'date'] <- odin[,date.odin.100]
odin[,date.odin.100] <- NULL

# Make sure that there is a odin entry for each minute
dates <- data.frame(date=seq(odin$date[1],odin$date[nrow(odin)],by='mins'))
odin <- merge(odin,dates,by="date",all=TRUE)

# Change the ODIN measurements from 1 min averages to 1 hour averages
date.col <- which(names(odin)=='date')
odin.zoo <- zoo( odin[,-date.col] ) # remove date because rollapply only works on numerics
odin.zoo <- rollapply(odin.zoo,width=60,by=1,FUN=mean,align="left",na.rm=TRUE)
# NB I don't know what the 'align' parameter does
odin <- odin[1:nrow(odin.zoo),]
odin[,-date.col] <- odin.zoo
# Take timestamps at the end of the hour average following ECan convention
odin$date <- odin$date + 60*60

# Merge with the rest of the data
data <- merge(ecan,odin,by='date',all=FALSE)
ggplot(data,aes(pm2.5.odin.109,pm2.5)) +
  geom_point(alpha=0.05) + 
  ggtitle(expression(PM[2.5]~"(as Measured by ODIN-SD 109) vs"~PM[2.5])) +
  xlab("RH (as measured by ODIN-SD 109)") +
  ylab(expression(PM[2.5] ~ (mu*'g'~m^-3)))
## Warning: Removed 409 rows containing missing values (geom_point).

ggplot(data,aes(temp.odin.109,pm2.5)) +
  geom_point(alpha=0.05) +
  ggtitle(expression('Temperature (as Measured by ODIN-SD 109) vs'~PM[2.5])) +
  xlab(expression("Temperature (" * degree * "C)")) +
  ylab(expression(PM[2.5] ~ (mu*'g'~m^-3)))
## Warning: Removed 409 rows containing missing values (geom_point).

ggplot(data,aes(rh.odin.109,pm2.5)) +
  geom_point(alpha=0.05) +
  ggtitle(expression('RH (as Measured by ODIN-SD 109) vs'~PM[2.5])) +
  xlab("RH (%)") +
  ylab(expression(PM[2.5] ~ (mu*'g'~m^-3)))
## Warning: Removed 409 rows containing missing values (geom_point).