# PM2.5 spectrometer tests
library('ggplot2')
library('openair')
## Loading required package: lazyeval
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Loading required package: maps
library('lubridate')
library('corrgram')
library('corrplot')
# PMS1003 data
pms1003.data <- read.delim("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/20160329_odin.txt")
pms1003.data$date <- as.POSIXct(paste(pms1003.data$Day,pms1003.data$Time),tz='Pacific/Auckland')
pms1003.data$Day<-NULL
pms1003.data$Time<-NULL
# Number concentrations to pt/cc (raw comes as pt/100cc)
pms1003.data$N300 <-pms1003.data$N300 / 100
pms1003.data$N500 <-pms1003.data$N500 / 100
pms1003.data$N1000 <-pms1003.data$N1000 / 100
pms1003.data$N2500 <-pms1003.data$N2500 / 100
pms1003.data$N5000 <-pms1003.data$N5000 / 100
pms1003.data$N10000 <-pms1003.data$N10000 / 100
pms1003.data$PMc <- pms1003.data$PM10 - pms1003.data$PM2.5
pms1003.data$PMf <- pms1003.data$PM2.5 - pms1003.data$PM1
# GRIMM 107 data
grimm.107 <- read.csv("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/20160329_grimm_107_data.parsed", sep="")
grimm.107$date <- ISOdatetime(year = grimm.107$Year+2000,
month = grimm.107$Month,
day = grimm.107$Day,
hour = grimm.107$Hour,
min = grimm.107$Minute,
sec = grimm.107$Second, tz = "GMT")
# Convert counts data into pt/cc (raw comes as pt/1000cc)
grimm.107[,17:47] <- grimm.107[,17:47] / 1000
# GRIMM 108 data
grimm.108 <- read.csv("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/20160329_grimm_108_data.parsed", sep="")
grimm.108$date <- ISOdatetime(year = grimm.108$Year+2000,
month = grimm.108$Month,
day = grimm.108$Day,
hour = grimm.108$Hour,
min = grimm.108$Minute,
sec = grimm.108$Second, tz = "Pacific/Auckland")
# Convert mass data into ug/m3 (raw comes x10)
grimm.108[,17:31] <- grimm.108[,17:31] / 10
grimm.108$gPM1 <- grimm.108$M0.3 - grimm.108$M1
grimm.108$gPM10 <- grimm.108$M0.3 - grimm.108$M10
# Rough calculation of PM2.5 as the average between PM2 and PM3
grimm.108$gPM2.5 <- ((grimm.108$M0.3 - grimm.108$M2) + (grimm.108$M0.3 - grimm.108$M3))/2
merged_data <- merge(pms1003.data,grimm.107,by = 'date', all = TRUE)
merged_data <- merge(merged_data,grimm.108,by = 'date', all = TRUE)
merged_data.1min <- timeAverage(merged_data,avg.time = '1 min')
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
timePlot(merged_data,pollutant = c('Temperature','x6.x','x6.y'),avg.time = '1 min')
## Warning in checkPrep(mydata, vars, type, remove.calm = FALSE): Detected
## data with Daylight Saving Time, converting to UTC/GMT

lag_test=ccf(merged_data.1min$Temperature,
merged_data.1min$x6.x,
na.action=na.pass,
lag.max=60,
type='correlation',
ylab='Correlation',
main='Temperature correlation as function of clock lag')

lag_grimm_107 = lag_test$lag[which.max(lag_test$acf)]
grimm.107$date <- grimm.107$date + lag_grimm_107*60
lag_test=ccf(merged_data.1min$Temperature,
merged_data.1min$x6.y,
na.action=na.pass,
lag.max=60,
type='correlation',
ylab='Correlation',
main='Temperature correlation as function of clock lag')

lag_grimm_108 = lag_test$lag[which.max(lag_test$acf)]
grimm.108$date <- grimm.108$date + lag_grimm_108*60
merged_data <- merge(pms1003.data,grimm.107,by = 'date', all = TRUE)
merged_data <- merge(merged_data,grimm.108,by = 'date', all = TRUE)
merged_data.1min <- timeAverage(merged_data,avg.time = '1 min')
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
timePlot(merged_data,pollutant = c('Temperature','x6.x','x6.y'),avg.time = '5 min',group = TRUE, normalise = 'mean')
## Warning in checkPrep(mydata, vars, type, remove.calm = FALSE): Detected
## data with Daylight Saving Time, converting to UTC/GMT

merged_data.5min <- timeAverage(merged_data,avg.time = '5 min')
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
merged_data.15min <- timeAverage(merged_data,avg.time = '15 min')
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
merged_data.60min <- timeAverage(merged_data,avg.time = '60 min')
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
# Plotting
ggplot(pms1003.data,aes(x=date))+
geom_line(aes(y = PM1),colour = 'red')+
geom_line(aes(y = PM2.5),colour = 'blue')+
geom_line(aes(y = PM10),colour = 'black')

ggplot(pms1003.data,aes(x=date))+
geom_line(aes(y = PM1),colour = 'red')+
geom_line(aes(y = PMf),colour = 'blue')+
geom_line(aes(y = PMc),colour = 'black')

ggplot(pms1003.data,aes(x=date))+
geom_line(aes(y = N300),colour = 'red')+
geom_line(aes(y = N500),colour = 'blue')+
geom_line(aes(y = N1000),colour = 'black')+
geom_line(aes(y = N2500),colour = 'green')

ggplot(merged_data.5min,aes(x=date))+
geom_line(aes(y = N300),colour = 'red')+
geom_line(aes(y = N500),colour = 'blue')+
geom_line(aes(y = PM1),colour = 'black')+
geom_point(aes(y = n265), colour = 'yellow')
## Warning: Removed 3 rows containing missing values (geom_path).
## Warning: Removed 3 rows containing missing values (geom_path).
## Warning: Removed 3 rows containing missing values (geom_path).
## Warning: Removed 3 rows containing missing values (geom_point).

timePlot(merged_data,pollutant = c('N300',
'N500',
'n265',
'n290',
'n324',
'n374',
'n424',
'n539',
'n614')
,avg.time = '5 min'
,normalise = 'mean'
,group = TRUE)
## Warning in checkPrep(mydata, vars, type, remove.calm = FALSE): Detected
## data with Daylight Saving Time, converting to UTC/GMT

timePlot(merged_data
,pollutant = c('N1000',
'PM1',
'N2500',
# 'N5000',
# 'N10000',
'n894',
'n1140',
'n2236',
'n2739',
'n4472')
,avg.time = '5 min'
#,normalise = 'mean'
,group = TRUE)
## Warning in checkPrep(mydata, vars, type, remove.calm = FALSE): Detected
## data with Daylight Saving Time, converting to UTC/GMT

# Let's plot the scatter with some fits in the plot
ggplot(timeAverage(merged_data,avg.time = '60 min'), aes(n324, PM1)) +
geom_point(shape=1) + # Use hollow circles
geom_smooth(method="lm", formula = y~x) + # Add linear regression line (by default includes 95% confidence region)
ggtitle("With non-zero intercept")
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT

for_correl <- merged_data.1min[,c(3:14,17,18,35:54,82:94)]
find_channels <- cor(for_correl,use = 'pairwise')
corrgram(find_channels)

corrplot(find_channels,method = 'circle')

# So ... apparently the N1000 channel correlates with n374 and the first 8 channels seem to tell the same story
# let's see.
# Let's plot the scatter with some fits in the plot
ggplot(timeAverage(merged_data,avg.time = '5 min'), aes(n374, N1000)) +
geom_point(shape=1) + # Use hollow circles
geom_smooth(method="lm", formula = y~x) + # Add linear regression line (by default includes 95% confidence region)
ggtitle("With non-zero intercept")
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
## Warning: Removed 5 rows containing missing values (stat_smooth).
## Warning: Removed 5 rows containing missing values (geom_point).

timePlot(merged_data,pollutant = c('N1000','n374'),avg.time = '10 min',group = TRUE, normalise = 'mean')
## Warning in checkPrep(mydata, vars, type, remove.calm = FALSE): Detected
## data with Daylight Saving Time, converting to UTC/GMT

# Attempting a linear fit of N300 based on the 10min data
summary(n300_linear <- lm(data = timeAverage(merged_data,avg.time = '10 min'),n374~N1000))
## Warning in checkPrep(mydata, vars, type = "default", remove.calm = FALSE, :
## Detected data with Daylight Saving Time, converting to UTC/GMT
##
## Call:
## lm(formula = n374 ~ N1000, data = timeAverage(merged_data, avg.time = "10 min"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.6536 -0.2386 -0.0002 0.2292 1.2870
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.20547 0.03705 5.545 4.13e-08 ***
## N1000 51.04648 0.77788 65.623 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3993 on 713 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.8579, Adjusted R-squared: 0.8578
## F-statistic: 4306 on 1 and 713 DF, p-value: < 2.2e-16
merged_data$N1000_corr <- predict(n300_linear,newdata = merged_data)
timePlot(merged_data,pollutant = c('N1000_corr','n374'),avg.time = '10 min',group = TRUE)
## Warning in checkPrep(mydata, vars, type, remove.calm = FALSE): Detected
## data with Daylight Saving Time, converting to UTC/GMT
