# Take data from HVS site for ODIN and Clarity and compare the readings

##### Load relevant packages #####
library(readr)
library(reshape2)
library(ggplot2)
library(RJSONIO)
library(curl)
## Using libcurl 7.79.1 with OpenSSL/1.1.1n-fips
## 
## Attaching package: 'curl'
## The following object is masked from 'package:readr':
## 
##     parse_date
library(base64enc)
library(openair)
library(stringi)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Get ODIN data ####

load('./all_data.RData')
all_data <- all_data[,c(11,2,3,4,6,9,10,13)]
names(all_data) <- c('date',
                     'PM1.odin',
                     'PM2.5.odin',
                     'PM10.odin',
                     'CO2.odin',
                     'T.odin',
                     'RH.odin',
                     'odinid')
# Get Clarity data ####

clarity_data <- read_csv("~/repositories/r-repo-for-work/data/clarity_HVS.csv")
## Rows: 1733 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (1): Device ID
## dbl  (11): PM1 Highest Resolution Mass Concentration Raw [ug/m3], PM1 Highes...
## lgl   (7): PM1 Highest Resolution Mass Concentration Calibrated [ug/m3], PM2...
## dttm  (1): Time [UTC+00:00]
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
clarity_data <- clarity_data[,c(1,2,4,7,10,13,15,16)]
names(clarity_data) <- c('clarityid',
                         'date',
                         'PM1.clarity',
                         'PM2.5.clarity',
                         'PM10.clarity',
                         'NO2.clarity',
                         'T.clarity',
                         'RH.clarity')

# Merge the datasets
all_data <- merge(all_data,clarity_data,by = "date", all = TRUE)


# Plot data ####
avg_plot <- '1 hour'
timePlot(all_data,pollutant = c('PM10.clarity','PM10.odin'),avg.time = avg_plot, group = TRUE)

timePlot(all_data,pollutant = c('PM2.5.clarity','PM2.5.odin'),avg.time = avg_plot, group = TRUE)

timePlot(all_data,pollutant = c('PM1.clarity','PM1.odin'),avg.time = avg_plot, group = TRUE)

scatterPlot(all_data,x='PM10.odin',y='PM10.clarity',linear = TRUE,avg.time = avg_plot)

scatterPlot(all_data,x='PM2.5.odin',y='PM2.5.clarity',linear = TRUE,avg.time = avg_plot)

scatterPlot(all_data,x='PM1.odin',y='PM1.clarity',linear = TRUE,avg.time = avg_plot)