# PM2.5 spectrometer tests
library('ggplot2')
library('openair')
## Loading required package: maps
##
## # maps v3.1: updated 'world': all lakes moved to separate new #
## # 'lakes' database. Type '?world' or 'news(package="maps")'. #
library('lubridate')
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library('corrgram')
library('corrplot')
# PMS1003 data
pms1003.data <- read.delim("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/Chch/ODIN4_SD.txt")
pms1003.data$date <- as.POSIXct(paste(pms1003.data$Day,pms1003.data$Time),tz='Pacific/Auckland')
pms1003.data$Day<-NULL
pms1003.data$Time<-NULL
# Number concentrations to pt/cc (raw comes as pt/100cc)
pms1003.data$N300 <-pms1003.data$N300 / 100
pms1003.data$N500 <-pms1003.data$N500 / 100
pms1003.data$N1000 <-pms1003.data$N1000 / 100
pms1003.data$N2500 <-pms1003.data$N2500 / 100
pms1003.data$N5000 <-pms1003.data$N5000 / 100
pms1003.data$N10000 <-pms1003.data$N10000 / 100
# Differences instead of cummulative
pms1003.data$dN300 <-pms1003.data$N300 - pms1003.data$N500
pms1003.data$dN500 <-pms1003.data$N500 - pms1003.data$N1000
pms1003.data$dN1000 <-pms1003.data$N1000 - pms1003.data$N2500
pms1003.data$dN2500 <-pms1003.data$N2500 - pms1003.data$N5000
pms1003.data$dN5000 <-pms1003.data$N5000 - pms1003.data$N10000
pms1003.data$dN10000 <-pms1003.data$N10000
pms1003.data$PMc <- pms1003.data$PM10 - pms1003.data$PM2.5
pms1003.data$PMf <- pms1003.data$PM2.5 - pms1003.data$PM1
# ODIN data
odin.data <- read.delim("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/Chch/ODIN6.txt")
odin.data$date <- as.POSIXct(paste(odin.data$Date,odin.data$Time),tz='Pacific/Auckland')
odin.data$Temperature <- odin.data$Battery
# Correct Dust with temperature
odin.data$ODIN_drift<-predict(lm(odin.data$Dust~seq(odin.data$Dust)),newdata = odin.data)
odin.data$Dust.raw <- odin.data$Dust
odin.data$Dust.detrend<-odin.data$Dust.raw - odin.data$ODIN_drift
odin.data$Temperature.bin<-cut(odin.data$Temperature,breaks = c(0,5,10,15,20,25),labels = c('2.5','7.5','12.5','17.5','22.5'))
Temp <- c(2.5,7.5,12.5,17.5,22.5)
Dust<-tapply(odin.data$Dust.detrend,odin.data$Temperature.bin,quantile,0.25)
TC_Dust <- data.frame(Dust.detrend = Dust,Temperature = Temp)
summary(odin.T<-lm(data = TC_Dust,Dust.detrend~Temperature))
##
## Call:
## lm(formula = Dust.detrend ~ Temperature, data = TC_Dust)
##
## Residuals:
## 2.5 7.5 12.5 17.5 22.5
## -0.01576 -0.28807 0.40140 0.12447 -0.22204
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.6094 0.2916 -15.81 0.00055 ***
## Temperature 0.2502 0.0203 12.32 0.00115 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.321 on 3 degrees of freedom
## Multiple R-squared: 0.9806, Adjusted R-squared: 0.9742
## F-statistic: 151.9 on 1 and 3 DF, p-value: 0.00115
odin.data$Dust.corr <- odin.data$Dust.detrend - predict(odin.T,newdata = odin.data)
# ECan Data
ecan_data_raw <- read.csv("/home/gustavo/data/Dust_tests/minispectrometer/WithArduino/Chch/ECan_data.csv",stringsAsFactors=FALSE)
ecan_data_raw$date<-as.POSIXct(ecan_data_raw$DateTime,format = "%d/%m/%Y %H:%M:%S",tz='NZST')
ecan_data_raw <- as.data.frame(ecan_data_raw[,c('date','Temperature..2m','PMcoarse.FDMS','PM10.FDMS','PM2.5.FDMS')])
merged_data <- merge(pms1003.data,odin.data,by = 'date', all = TRUE)
merged_data <- merge(merged_data,ecan_data_raw,by = 'date', all = TRUE)
merged_data.10min <- timeAverage(merged_data,avg.time = '10 min')
merged_data.15min <- timeAverage(merged_data,avg.time = '15 min')
merged_data.60min <- timeAverage(merged_data,avg.time = '60 min')
#Time differences
lag_test=ccf(merged_data.10min$Temperature..2m,
merged_data.10min$Temperature.x,
na.action=na.pass,
lag.max=200,
type='correlation',
ylab='Correlation',
main='Temperature correlation as function of clock lag')

lag_odin_sd = lag_test$lag[which.max(lag_test$acf)]
pms1003.data$date <- pms1003.data$date + lag_odin_sd*60*10
lag_test=ccf(merged_data.10min$Temperature..2m,
merged_data.10min$Temperature.y,
na.action=na.pass,
lag.max=200,
type='correlation',
ylab='Correlation',
main='Temperature correlation as function of clock lag')

lag_odin = lag_test$lag[which.max(lag_test$acf)]
odin.data$date <- odin.data$date + lag_odin*60*10
merged_data <- merge(pms1003.data,odin.data,by = 'date', all = TRUE)
merged_data <- merge(merged_data,ecan_data_raw,by = 'date', all = TRUE)
merged_data.10min <- timeAverage(merged_data,avg.time = '10 min')
merged_data.15min <- timeAverage(merged_data,avg.time = '15 min')
merged_data.60min <- timeAverage(merged_data,avg.time = '60 min')
# Find correlations
for_correl <- merged_data.10min[,c(3:14,17:24,32,34:36)]
find_channels <- cor(for_correl,use = 'pairwise')
corrgram(find_channels)

corrplot(find_channels,method = 'circle')

# Plotting
ggplot(merged_data.10min,aes(x=date))+
geom_line(aes(y = Dust.corr,color = 'Dust.corr'))+
geom_line(aes(y = PM2.5,color = 'PM2.5'))+
geom_line(aes(y = PM2.5.FDMS,color = 'PM2.5.FDMS'))+
geom_line(aes(y=N300,color = 'N300'))+
scale_color_manual("",
breaks = c('Dust.corr','PM2.5','PM2.5.FDMS','N300'),
values = c('blue','red','black','green'))
## Warning: Removed 138 rows containing missing values (geom_path).
## Warning: Removed 139 rows containing missing values (geom_path).
## Warning: Removed 139 rows containing missing values (geom_path).

timePlot(merged_data,pollutant = c('Temperature.x','Temperature.y','Temperature..2m'),avg.time = '30 min')

timePlot(merged_data,pollutant = c('PM2.5.FDMS','PM2.5','PM1'),avg.time = '30 min',group = TRUE)

timePlot(merged_data,pollutant = c('PM10.FDMS','PM10'),avg.time = '30 min',group = TRUE)

timePlot(merged_data,pollutant = c('PMcoarse.FDMS','PMc','PMf'),avg.time = '30 min',group = TRUE)

# Trying a linear fit between PM2.5 and PM2.5.FDMS
summary(linear_pm25 <- lm(data = merged_data.10min, formula = PM2.5.FDMS~PM2.5))
##
## Call:
## lm(formula = PM2.5.FDMS ~ PM2.5, data = merged_data.10min)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.1810 -1.2829 -0.2221 0.9190 9.7342
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.17145 0.08123 26.73 <2e-16 ***
## PM2.5 0.90872 0.01753 51.85 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.114 on 1106 degrees of freedom
## (189 observations deleted due to missingness)
## Multiple R-squared: 0.7085, Adjusted R-squared: 0.7082
## F-statistic: 2688 on 1 and 1106 DF, p-value: < 2.2e-16
merged_data$PM2.5.PMS1003 <- predict(linear_pm25,newdata = merged_data)
timePlot(merged_data,pollutant = c('PM2.5.FDMS','PM2.5.PMS1003'),cols = c('red','black'),avg.time = '10 min',group = TRUE)

timeVariation(merged_data,pollutant = c('PM2.5.FDMS','PM2.5.PMS1003'),cols = c('red','black'))
