# Initialise environment
set.seed(1)
library(openair)
## Loading required package: maps
## 
##  # maps v3.1: updated 'world': all lakes moved to separate new #
##  # 'lakes' database. Type '?world' or 'news(package="maps")'.  #
library(reshape2)


# source('./load_data_SPEC_RAW.R')
source('./load_data_SPEC_Thingiverse.R')
# Revert temperature formula
# The relationship between raw Temperature and what's reported as temperature is linear
scatterPlot(raw_data,'raw_temp','temp')

summary(temp_lm <- lm(data=timeAverage(raw_data, avg.time = '1 hour'),temp~raw_temp))
## 
## Call:
## lm(formula = temp ~ raw_temp, data = timeAverage(raw_data, avg.time = "1 hour"))
## 
## Residuals:
##      359      360      361      362      363      364 
## -0.13009  0.15325  0.05909  0.16464  0.03795 -0.28483 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.087e+01  5.823e+00  -7.019 0.002170 ** 
## raw_temp     2.435e-03  2.241e-04  10.867 0.000407 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1959 on 4 degrees of freedom
##   (358 observations deleted due to missingness)
## Multiple R-squared:  0.9672, Adjusted R-squared:  0.959 
## F-statistic: 118.1 on 1 and 4 DF,  p-value: 0.0004071
# Re-calculate temperature at higher resolution
raw_data$temperature <- temp_lm$coefficients[1] + temp_lm$coefficients[2]*raw_data$raw_temp
# Using previously calculated coefficients:
raw_data$temperature <- -47.439351359 + 0.002684298*raw_data$raw_temp
# Lab data (outdoor line)
lab_data <- subset(raw_data, subset=date < as.POSIXct('2017-05-03 09:00'))
# Indoor data
indoor_data <- subset(raw_data, subset=date > as.POSIXct('2017-05-03 11:00'))

# shift NO2 data to remove zeros and negatives
lab_data$no2 <- lab_data$no2 + abs(1.1*(min(lab_data$no2)))
indoor_data$no2 <- indoor_data$no2 + abs(1.1*(min(indoor_data$no2)))

# The first 
timePlot(lab_data,
         pollutant = c('temperature','no2'),
         group = TRUE,
         normalise = 'mean',
         avg.time = '10 min',
         main = 'Lab data')

timePlot(indoor_data,
         pollutant = c('temperature','no2'),
         group = TRUE,
         normalise = 'mean',
         avg.time = '10 min',
         main = 'Indoor data')

timeVariation(lab_data,pollutant = 'no2')

timeVariation(lab_data,pollutant = c('no2','temp'), normalise = TRUE)

timeVariation(indoor_data,pollutant = 'no2')

timeVariation(indoor_data,pollutant = c('no2','temp'), normalise = TRUE)

scatterPlot(lab_data,'temperature','no2', main = 'Lab data')

scatterPlot(indoor_data,'temperature','no2', main = 'Indoor data')