# Load libraries
library('ggplot2')
library('openair')
## Loading required package: maps
## 
##  # maps v3.1: updated 'world': all lakes moved to separate new #
##  # 'lakes' database. Type '?world' or 'news(package="maps")'.  #
library('lubridate')
library('corrgram')
library('corrplot')


# PMS1003 data
pms1003.data <- read.delim("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/20160223_pms1003.txt")
pms1003.data$experiment <- 1
tmp_data <- read.delim("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/weekend_garage_20160307.txt")
tmp_data$experiment <- 2
pms1003.data <-rbind(pms1003.data,tmp_data)
tmp_data <- read.delim("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/20160321_garage.txt")
tmp_data$experiment <- 3
pms1003.data <-rbind(pms1003.data,tmp_data)
tmp_data <- read.delim("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/20160329_odin.txt")
tmp_data[,9]<-NULL
tmp_data[,8]<-NULL
tmp_data[,7]<-NULL
tmp_data[,3]<-NULL
tmp_data$experiment <- 4
pms1003.data <-rbind(pms1003.data,tmp_data)

pms1003.data$date <- as.POSIXct(paste(pms1003.data$Day,pms1003.data$Time),tz='Pacific/Auckland')
pms1003.data$Day<-NULL
pms1003.data$Time<-NULL
# Number concentrations to pt/cc (raw comes as pt/100cc)
pms1003.data$N300 <-pms1003.data$N300 / 100
pms1003.data$N500 <-pms1003.data$N500 / 100
pms1003.data$N1000 <-pms1003.data$N1000 / 100
pms1003.data$N2500 <-pms1003.data$N2500 / 100
pms1003.data$N5000 <-pms1003.data$N5000 / 100
pms1003.data$N10000 <-pms1003.data$N10000 / 100

pms1003.data$PMc <- pms1003.data$PM10 - pms1003.data$PM2.5
pms1003.data$PMf <- pms1003.data$PM2.5 - pms1003.data$PM1

# GRIMM 107 data
grimm.107 <- read.csv("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/grimm_20160223.parsed", sep="")
tmp_data <- read.csv("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/20160307_grimm_109.parsed", sep="")
grimm.107 <- rbind(grimm.107,tmp_data)
tmp_data <- read.csv("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/20160321_grimm_107_data.parsed", sep="")
grimm.107 <- rbind(grimm.107,tmp_data)
tmp_data <- read.csv("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/20160329_grimm_107_data.parsed", sep="")
grimm.107 <- rbind(grimm.107,tmp_data)

grimm.107$date <- ISOdatetime(year = grimm.107$Year+2000,
                              month = grimm.107$Month, 
                              day = grimm.107$Day,
                              hour = grimm.107$Hour,
                              min = grimm.107$Minute, 
                              sec = grimm.107$Second, tz = "GMT")

Convert counts data into pt/cc (raw comes as pt/1000cc)

grimm.107[,17:47] <- grimm.107[,17:47] / 1000

merged_data <- merge(pms1003.data,grimm.107,by = 'date', all = TRUE)
merged_data.1min <- timeAverage(merged_data,avg.time = '1 min')
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
timePlot(merged_data,pollutant = c('Temperature','x6'),avg.time = '1 min')
## Warning in checkPrep(mydata, vars, type, remove.calm = FALSE): Detected
## data with Daylight Saving Time, converting to UTC/GMT

lag_test=ccf(merged_data.1min$Temperature,
             merged_data.1min$x6,
             na.action=na.pass,
             lag.max=120,
             type='correlation',
             ylab='Correlation',
             main='Temperature correlation as function of clock lag')

lag_grimm_107 = lag_test$lag[which.max(lag_test$acf)]
grimm.107$date <- grimm.107$date + lag_grimm_107*60


merged_data <- merge(pms1003.data,grimm.107,by = 'date', all = TRUE)
merged_data.1min <- timeAverage(merged_data,avg.time = '1 min')
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
timePlot(merged_data,pollutant = c('Temperature','x6'),avg.time = '5 min',group = TRUE, normalise = 'mean')
## Warning in checkPrep(mydata, vars, type, remove.calm = FALSE): Detected
## data with Daylight Saving Time, converting to UTC/GMT

merged_data.5min <- timeAverage(merged_data,avg.time = '5 min')
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
merged_data.15min <- timeAverage(merged_data,avg.time = '15 min')
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
merged_data.60min <- timeAverage(merged_data,avg.time = '60 min')
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
# Plotting

for_correl <- merged_data.1min[,c(5:10,32:55)]
find_channels <- cor(for_correl,use = 'pairwise')

corrgram(find_channels)

corrplot(find_channels,method = 'circle')

So … apparently the N1000 channel correlates with n374 let’s see. Let’s plot the scatter with some fits in the plot

TestID <- as.factor(merged_data.60min$experiment)
ggplot(merged_data.60min, aes(n324, N1000)) +
  geom_point(aes(colour = TestID),shape=1) +    # Use hollow circles
  geom_smooth(method="lm", formula = y~x) +   # Add linear regression line (by default includes 95% confidence region)
  ggtitle("With non-zero intercept")
## Warning: Removed 611 rows containing non-finite values (stat_smooth).
## Warning: Removed 611 rows containing missing values (geom_point).

timePlot(merged_data,pollutant = c('N1000','n374'),avg.time = '60 min',group = TRUE, normalise = 'mean')
## Warning in checkPrep(mydata, vars, type, remove.calm = FALSE): Detected
## data with Daylight Saving Time, converting to UTC/GMT

Attempting a linear fit of N300 based on the 10min data

summary(n1000_linear <- lm(data = timeAverage(merged_data,avg.time = '60 min'),n374~N1000))
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
## 
## Call:
## lm(formula = n374 ~ N1000, data = timeAverage(merged_data, avg.time = "60 min"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3921 -0.8859 -0.1222  0.6809  6.9663 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.4399     0.1420   3.098  0.00212 ** 
## N1000        68.2651     2.5606  26.660  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.223 on 323 degrees of freedom
##   (611 observations deleted due to missingness)
## Multiple R-squared:  0.6875, Adjusted R-squared:  0.6866 
## F-statistic: 710.8 on 1 and 323 DF,  p-value: < 2.2e-16
merged_data$N1000_corr <- predict(n1000_linear,newdata = merged_data)
timePlot(merged_data,pollutant = c('N1000_corr','n374'),avg.time = '60 min',group = TRUE)
## Warning in checkPrep(mydata, vars, type, remove.calm = FALSE): Detected
## data with Daylight Saving Time, converting to UTC/GMT