# PM2.5 spectrometer tests
library('ggplot2')
library('openair')
## Loading required package: lazyeval
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Loading required package: maps
library('lubridate')
library('corrgram')
library('corrplot')


# PMS1003 data
pms1003.data <- read.delim("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/20160329_odin.txt")
pms1003.data$date <- as.POSIXct(paste(pms1003.data$Day,pms1003.data$Time),tz='Pacific/Auckland')
pms1003.data$Day<-NULL
pms1003.data$Time<-NULL
# Number concentrations to pt/cc (raw comes as pt/100cc)
pms1003.data$N300 <-pms1003.data$N300 / 100
pms1003.data$N500 <-pms1003.data$N500 / 100
pms1003.data$N1000 <-pms1003.data$N1000 / 100
pms1003.data$N2500 <-pms1003.data$N2500 / 100
pms1003.data$N5000 <-pms1003.data$N5000 / 100
pms1003.data$N10000 <-pms1003.data$N10000 / 100

pms1003.data$PMc <- pms1003.data$PM10 - pms1003.data$PM2.5
pms1003.data$PMf <- pms1003.data$PM2.5 - pms1003.data$PM1

# GRIMM 107 data
grimm.107 <- read.csv("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/20160329_grimm_107_data.parsed", sep="")
grimm.107$date <- ISOdatetime(year = grimm.107$Year+2000,
                              month = grimm.107$Month, 
                              day = grimm.107$Day,
                              hour = grimm.107$Hour,
                              min = grimm.107$Minute, 
                              sec = grimm.107$Second, tz = "GMT")
# Convert counts data into pt/cc (raw comes as pt/1000cc)
grimm.107[,17:47] <- grimm.107[,17:47] / 1000


# GRIMM 108 data
grimm.108 <- read.csv("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/20160329_grimm_108_data.parsed", sep="")
grimm.108$date <- ISOdatetime(year = grimm.108$Year+2000,
                              month = grimm.108$Month, 
                              day = grimm.108$Day,
                              hour = grimm.108$Hour,
                              min = grimm.108$Minute, 
                              sec = grimm.108$Second, tz = "Pacific/Auckland")
# Convert mass data into ug/m3 (raw comes x10)
grimm.108[,17:31] <- grimm.108[,17:31] / 10
grimm.108$gPM1 <- grimm.108$M0.3 - grimm.108$M1
grimm.108$gPM10 <- grimm.108$M0.3 - grimm.108$M10
# Rough calculation of PM2.5 as the average between PM2 and PM3
grimm.108$gPM2.5 <- ((grimm.108$M0.3 - grimm.108$M2) + (grimm.108$M0.3 - grimm.108$M3))/2




merged_data <- merge(pms1003.data,grimm.107,by = 'date', all = TRUE)
merged_data <- merge(merged_data,grimm.108,by = 'date', all = TRUE)
merged_data.1min <- timeAverage(merged_data,avg.time = '1 min')
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
timePlot(merged_data,pollutant = c('Temperature','x6.x','x6.y'),avg.time = '1 min')
## Warning in checkPrep(mydata, vars, type, remove.calm = FALSE): Detected
## data with Daylight Saving Time, converting to UTC/GMT

lag_test=ccf(merged_data.1min$Temperature,
             merged_data.1min$x6.x,
             na.action=na.pass,
             lag.max=60,
             type='correlation',
             ylab='Correlation',
             main='Temperature correlation as function of clock lag')

lag_grimm_107 = lag_test$lag[which.max(lag_test$acf)]
grimm.107$date <- grimm.107$date + lag_grimm_107*60

lag_test=ccf(merged_data.1min$Temperature,
             merged_data.1min$x6.y,
             na.action=na.pass,
             lag.max=60,
             type='correlation',
             ylab='Correlation',
             main='Temperature correlation as function of clock lag')

lag_grimm_108 = lag_test$lag[which.max(lag_test$acf)]
grimm.108$date <- grimm.108$date + lag_grimm_108*60

merged_data <- merge(pms1003.data,grimm.107,by = 'date', all = TRUE)
merged_data <- merge(merged_data,grimm.108,by = 'date', all = TRUE)
merged_data.1min <- timeAverage(merged_data,avg.time = '1 min')
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
timePlot(merged_data,pollutant = c('Temperature','x6.x','x6.y'),avg.time = '5 min',group = TRUE, normalise = 'mean')
## Warning in checkPrep(mydata, vars, type, remove.calm = FALSE): Detected
## data with Daylight Saving Time, converting to UTC/GMT

merged_data.5min <- timeAverage(merged_data,avg.time = '5 min')
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
merged_data.15min <- timeAverage(merged_data,avg.time = '15 min')
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
merged_data.60min <- timeAverage(merged_data,avg.time = '60 min')
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
# Plotting
ggplot(pms1003.data,aes(x=date))+
  geom_line(aes(y = PM1),colour = 'red')+
  geom_line(aes(y = PM2.5),colour = 'blue')+
  geom_line(aes(y = PM10),colour = 'black')

ggplot(pms1003.data,aes(x=date))+
  geom_line(aes(y = PM1),colour = 'red')+
  geom_line(aes(y = PMf),colour = 'blue')+
  geom_line(aes(y = PMc),colour = 'black')

ggplot(pms1003.data,aes(x=date))+
  geom_line(aes(y = N300),colour = 'red')+
  geom_line(aes(y = N500),colour = 'blue')+
  geom_line(aes(y = N1000),colour = 'black')+
  geom_line(aes(y = N2500),colour = 'green')

ggplot(merged_data.5min,aes(x=date))+
  geom_line(aes(y = N300),colour = 'red')+
  geom_line(aes(y = N500),colour = 'blue')+
  geom_line(aes(y = PM1),colour = 'black')+
  geom_point(aes(y = n265), colour = 'yellow')
## Warning: Removed 3 rows containing missing values (geom_path).
## Warning: Removed 3 rows containing missing values (geom_path).
## Warning: Removed 3 rows containing missing values (geom_path).
## Warning: Removed 3 rows containing missing values (geom_point).

timePlot(merged_data,pollutant = c('N300',
                                   'N500',
                                   'n265',
                                   'n290',
                                   'n324',
                                   'n374',
                                   'n424',
                                   'n539',
                                   'n614')
         ,avg.time = '5 min'
         ,normalise = 'mean'
         ,group = TRUE)
## Warning in checkPrep(mydata, vars, type, remove.calm = FALSE): Detected
## data with Daylight Saving Time, converting to UTC/GMT

timePlot(merged_data
         ,pollutant = c('N1000',
                        'PM1',
                               'N2500',
#                               'N5000',
#                               'N10000',
                               'n894',
                               'n1140',
                               'n2236',
                               'n2739',
                               'n4472')
         ,avg.time = '5 min'
         #,normalise = 'mean'
         ,group = TRUE)
## Warning in checkPrep(mydata, vars, type, remove.calm = FALSE): Detected
## data with Daylight Saving Time, converting to UTC/GMT

# Let's plot the scatter with some fits in the plot
ggplot(timeAverage(merged_data,avg.time = '60 min'), aes(n324, PM1)) +
  geom_point(shape=1) +    # Use hollow circles
  geom_smooth(method="lm", formula = y~x) +   # Add linear regression line (by default includes 95% confidence region)
  ggtitle("With non-zero intercept")
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT

for_correl <- merged_data.1min[,c(3:14,17,18,35:54,82:94)]
find_channels <- cor(for_correl,use = 'pairwise')

corrgram(find_channels)

corrplot(find_channels,method = 'circle')

# So ... apparently the N1000 channel correlates with n374 and the first 8 channels seem to tell the same story
# let's see.
# Let's plot the scatter with some fits in the plot
ggplot(timeAverage(merged_data,avg.time = '5 min'), aes(n374, N1000)) +
  geom_point(shape=1) +    # Use hollow circles
  geom_smooth(method="lm", formula = y~x) +   # Add linear regression line (by default includes 95% confidence region)
  ggtitle("With non-zero intercept")
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
## Warning: Removed 5 rows containing missing values (stat_smooth).
## Warning: Removed 5 rows containing missing values (geom_point).

timePlot(merged_data,pollutant = c('N1000','n374'),avg.time = '10 min',group = TRUE, normalise = 'mean')
## Warning in checkPrep(mydata, vars, type, remove.calm = FALSE): Detected
## data with Daylight Saving Time, converting to UTC/GMT

# Attempting a linear fit of N300 based on the 10min data
summary(n300_linear <- lm(data = timeAverage(merged_data,avg.time = '10 min'),n374~N1000))
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
## 
## Call:
## lm(formula = n374 ~ N1000, data = timeAverage(merged_data, avg.time = "10 min"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.6536 -0.2386 -0.0002  0.2292  1.2870 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.20547    0.03705   5.545 4.13e-08 ***
## N1000       51.04648    0.77788  65.623  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3993 on 713 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.8579, Adjusted R-squared:  0.8578 
## F-statistic:  4306 on 1 and 713 DF,  p-value: < 2.2e-16
merged_data$N1000_corr <- predict(n300_linear,newdata = merged_data)
timePlot(merged_data,pollutant = c('N1000_corr','n374'),avg.time = '10 min',group = TRUE)
## Warning in checkPrep(mydata, vars, type, remove.calm = FALSE): Detected
## data with Daylight Saving Time, converting to UTC/GMT