## Loading required package: openair
## Loading required package: lazyeval
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Loading required package: maps
## Loading required package: reshape2

Introduction

The purpose of this analysis is to document the performance of the ODIN during their pre-CONA deployment in Christchurch.

ODIN data

The units ODIN_05-07 were deployed at Coles place between the 23\(^{th}\) and the 31\(^{th}\).

## ODIN_05
odin_05 <- read.table("~/data/ODIN_data/CONA/pre_tests/odin_05.data",
                      header=T, quote="")
#force GMT as the time zone to avoid openair issues with daylight saving switches
#The actual time zone is 'NZST'
odin_05$date=as.POSIXct(paste(odin_05$Date,odin_05$Time),tz='GMT')
odin_05$Time<-NULL
odin_05$Date<-NULL
odin_05$Battery<-5*odin_05$Battery/1024

## ODIN_06
odin_06 <- read.table("~/data/ODIN_data/CONA/pre_tests/odin_06.data",
                      header=T, quote="")
#force GMT as the time zone to avoid openair issues with daylight saving switches
#The actual time zone is 'NZST'
odin_06$date=as.POSIXct(paste(odin_06$Date,odin_06$Time),tz='GMT')
odin_06$Time<-NULL
odin_06$Date<-NULL
odin_06$Battery<-5*odin_06$Battery/1024

## ODIN_07
odin_07 <- read.table("~/data/ODIN_data/CONA/pre_tests/odin_07.data",
                      header=T, quote="")
#force GMT as the time zone to avoid openair issues with daylight saving switches
#The actual time zone is 'NZST'
odin_07$date=as.POSIXct(paste(odin_07$Date,odin_07$Time),tz='GMT')
odin_07$Time<-NULL
odin_07$Date<-NULL
odin_07$Battery<-5*odin_07$Battery/1024

ECan data

The data from the Coles Place site was obtained from Environment Canterbury’s data catalogue and then corrected for proper date handling.

# download.file(url = "http://data.ecan.govt.nz/data/29/Air/Air%20quality%20data%20for%20a%20monitored%20site/CSV?SiteId=1&StartDate=23%2F07%2F2015&EndDate=31%2F07%2F2015",destfile = "ecan_data.csv",method = "curl")
# system("sed -i 's/a.m./AM/g' ecan_data.csv")
# system("sed -i 's/p.m./PM/g' ecan_data.csv")
ecan_data_raw <- read.csv("ecan_data.csv",stringsAsFactors=FALSE)
ecan_data_raw$date<-as.POSIXct(ecan_data_raw$DateTime,format = "%d/%m/%Y %I:%M:%S %p",tz='GMT')
ecan_data<-as.data.frame(ecan_data_raw[,c('date','PM10.FDMS','PM2.5.FDMS','Temperature..1m')])

Merging the data

ECan’s data was provided as 10 minute values while ODIN reports every 1 minute so before merging the data, the timebase must be homogenized

odin <- merge(odin_05,odin_06,by='date',all=TRUE)
odin <- merge(odin,odin_07,by='date',all=TRUE)
odin.10min<-timeAverage(odin,avg.time='10 min')
all_merged.10min<-merge(odin.10min,ecan_data,by='date',all=TRUE)
names(all_merged.10min) <- c('date','Dust.05','Humidity.05','Temperature.05','Battery.05','Dust.06','Humidity.06','Temperature.06','Battery.06','Dust.07','Humidity.07','Temperature.07','Battery.07','PM10.FDMS','PM2.5.FDMS','Temperature..1m')
all_merged.1hr<-timeAverage(all_merged.10min,avg.time='1 hour')

Time sync

lag_test=ccf(all_merged.10min$Temperature.05,
             all_merged.10min$Temperature..1m,
             na.action=na.pass,
             lag.max=100,
             type='correlation',
             ylab='Correlation',
             main='Temperature correlation as function of clock lag')

odin_lag=lag_test$lag[which.max(lag_test$acf)]

ECan’s record is behind by -70 minutes with respect to ODIN data.
The correction was applied to the ODIN data as follows:

odin$date=odin$date-odin_lag*10*60
odin.10min<-timeAverage(odin,avg.time='10 min')
all_merged.10min<-merge(odin.10min,ecan_data,by='date',all=TRUE)
names(all_merged.10min) <- c('date','Dust.05','Humidity.05','Temperature.05','Battery.05','Dust.06','Humidity.06','Temperature.06','Battery.06','Dust.07','Humidity.07','Temperature.07','Battery.07','PM10.FDMS','PM2.5.FDMS','Temperature..1m')
all_merged.1hr<-timeAverage(all_merged.10min,avg.time='1 hour')
lag_test=ccf(all_merged.10min$Temperature.05,
             all_merged.10min$Temperature..1m,
             na.action=na.pass,
             lag.max=100,
             type='correlation',
             ylab='Correlation',
             main='Temperature correlation as function of clock lag')

Remove drift from ODIN raw data

It has been documented that the dust sensors suffer from significant drift, therefore a linear fit of the baseline drift is estimated.

# Estimate the baseline from a simple linear regression
all_merged.1hr$ODIN_drift.05<-predict(lm(all_merged.1hr$Dust.05~seq(all_merged.1hr$Dust.05)),newdata = all_merged.1hr)
all_merged.1hr$ODIN_drift.06<-predict(lm(all_merged.1hr$Dust.06~seq(all_merged.1hr$Dust.06)),newdata = all_merged.1hr)
all_merged.1hr$ODIN_drift.07<-predict(lm(all_merged.1hr$Dust.07~seq(all_merged.1hr$Dust.07)),newdata = all_merged.1hr)

# Remove the baseline drift from the raw ODIN data
all_merged.1hr$Dust.05.raw <- all_merged.1hr$Dust.05
all_merged.1hr$Dust.05.detrend<-all_merged.1hr$Dust.05.raw - all_merged.1hr$ODIN_drift.05

all_merged.1hr$Dust.06.raw <- all_merged.1hr$Dust.06
all_merged.1hr$Dust.06.detrend<-all_merged.1hr$Dust.06.raw - all_merged.1hr$ODIN_drift.06

all_merged.1hr$Dust.07.raw <- all_merged.1hr$Dust.07
all_merged.1hr$Dust.07.detrend<-all_merged.1hr$Dust.07.raw - all_merged.1hr$ODIN_drift.07

Calculate the temperature interference

First we divide the temperature range for each unit.

all_merged.1hr$Temperature.05.bin<-cut(all_merged.1hr$Temperature.05,breaks = c(0,5,10,15,20,25),labels = c('2.5','7.5','12.5','17.5','22.5'))
all_merged.1hr$Temperature.06.bin<-cut(all_merged.1hr$Temperature.06,breaks = c(0,5,10,15,20,25),labels = c('2.5','7.5','12.5','17.5','22.5'))
all_merged.1hr$Temperature.07.bin<-cut(all_merged.1hr$Temperature.07,breaks = c(0,5,10,15,20,25),labels = c('2.5','7.5','12.5','17.5','22.5'))
Temp <- c(2.5,7.5,12.5,17.5,22.5)

Dust.05<-tapply(all_merged.1hr$Dust.05.detrend,all_merged.1hr$Temperature.05.bin,min)
Dust.06<-tapply(all_merged.1hr$Dust.06.detrend,all_merged.1hr$Temperature.06.bin,min)
Dust.07<-tapply(all_merged.1hr$Dust.07.detrend,all_merged.1hr$Temperature.07.bin,min)

TC_Dust.05 <- data.frame(Dust.05.detrend = Dust.05,Temperature.05 = Temp)
TC_Dust.06 <- data.frame(Dust.06.detrend = Dust.06,Temperature.06 = Temp)
TC_Dust.07 <- data.frame(Dust.07.detrend = Dust.07,Temperature.07 = Temp)

Now we calculate the linear regression for the minimum dust response in each temperature bin and subtract it from the detrended data

summary(odin.05_T<-lm(data = TC_Dust.05,Dust.05.detrend~Temperature.05))
## 
## Call:
## lm(formula = Dust.05.detrend ~ Temperature.05, data = TC_Dust.05)
## 
## Residuals:
##      2.5      7.5     12.5     17.5     22.5 
##  0.22986  0.07779 -0.71299  0.27317  0.13217 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -6.31806    0.42581  -14.84 0.000664 ***
## Temperature.05  0.41926    0.02965   14.14 0.000766 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4688 on 3 degrees of freedom
## Multiple R-squared:  0.9852, Adjusted R-squared:  0.9803 
## F-statistic:   200 on 1 and 3 DF,  p-value: 0.0007662
summary(odin.06_T<-lm(data = TC_Dust.06,Dust.06.detrend~Temperature.06))
## 
## Call:
## lm(formula = Dust.06.detrend ~ Temperature.06, data = TC_Dust.06)
## 
## Residuals:
##      2.5      7.5     12.5     17.5     22.5 
##  0.02628  0.09066 -0.48609  0.59510 -0.22595 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -5.99930    0.42292  -14.19 0.000759 ***
## Temperature.06  0.40054    0.02945   13.60 0.000860 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4656 on 3 degrees of freedom
## Multiple R-squared:  0.984,  Adjusted R-squared:  0.9787 
## F-statistic:   185 on 1 and 3 DF,  p-value: 0.0008596
summary(odin.07_T<-lm(data = TC_Dust.07,Dust.07.detrend~Temperature.07))
## 
## Call:
## lm(formula = Dust.07.detrend ~ Temperature.07, data = TC_Dust.07)
## 
## Residuals:
##     2.5     7.5    12.5    17.5    22.5 
##  0.7565 -1.2033  0.5839 -0.5838  0.4467 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    -11.3047     0.8933  -12.66  0.00106 **
## Temperature.07   0.7418     0.0622   11.93  0.00127 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9834 on 3 degrees of freedom
## Multiple R-squared:  0.9793, Adjusted R-squared:  0.9725 
## F-statistic: 142.2 on 1 and 3 DF,  p-value: 0.001268
all_merged.1hr$Dust.05.corr <- all_merged.1hr$Dust.05.detrend - predict(odin.05_T,newdata = all_merged.1hr)
all_merged.1hr$Dust.06.corr <- all_merged.1hr$Dust.06.detrend - predict(odin.06_T,newdata = all_merged.1hr)
all_merged.1hr$Dust.07.corr <- all_merged.1hr$Dust.07.detrend - predict(odin.07_T,newdata = all_merged.1hr)

Dust performance using ECan data for calibration

With ECan’s PM data available, a more accurate calibration can be applied to the raw Dust signal from ODIN.

According to previous analyses, it is expected that the response of the ODIN depends on the ambient temperature and relative humidity. In this analysis, a linear relationship is proposed: \(Dust_{calibrated}=A*Dust_{raw}+B\)

Full dataset 1 hour PM\(_{2.5}\) fdms

summary(odin5.lm.full.1hr.pm2.5<-
          lm(data=all_merged.1hr,PM2.5.FDMS~
               Dust.05.corr))
## 
## Call:
## lm(formula = PM2.5.FDMS ~ Dust.05.corr, data = all_merged.1hr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.356  -6.162  -3.609   0.975  37.577 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   10.3168     1.6364   6.304 2.15e-09 ***
## Dust.05.corr   2.8676     0.6359   4.509 1.17e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.48 on 181 degrees of freedom
##   (25 observations deleted due to missingness)
## Multiple R-squared:  0.101,  Adjusted R-squared:  0.09603 
## F-statistic: 20.34 on 1 and 181 DF,  p-value: 1.165e-05
summary(odin6.lm.full.1hr.pm2.5<-
          lm(data=all_merged.1hr,PM2.5.FDMS~
               Dust.06.corr))
## 
## Call:
## lm(formula = PM2.5.FDMS ~ Dust.06.corr, data = all_merged.1hr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.265  -6.908  -4.247   2.191  41.418 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   17.1196     1.6621  10.300   <2e-16 ***
## Dust.06.corr  -0.1328     0.6353  -0.209    0.835    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.05 on 181 degrees of freedom
##   (25 observations deleted due to missingness)
## Multiple R-squared:  0.0002413,  Adjusted R-squared:  -0.005282 
## F-statistic: 0.04368 on 1 and 181 DF,  p-value: 0.8347
summary(odin7.lm.full.1hr.pm2.5<-
          lm(data=all_merged.1hr,PM2.5.FDMS~
               Dust.07.corr))
## 
## Call:
## lm(formula = PM2.5.FDMS ~ Dust.07.corr, data = all_merged.1hr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.047  -6.745  -4.122   1.766  39.401 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   15.2662     1.7062   8.948 4.26e-16 ***
## Dust.07.corr   0.3477     0.3360   1.035    0.302    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.02 on 181 degrees of freedom
##   (25 observations deleted due to missingness)
## Multiple R-squared:  0.005878,   Adjusted R-squared:  0.0003859 
## F-statistic:  1.07 on 1 and 181 DF,  p-value: 0.3023

Calibrated Dust

all_merged.1hr$Dust.05.cal<-predict(odin5.lm.full.1hr.pm2.5,newdata = all_merged.1hr)
all_merged.1hr$Dust.06.cal<-predict(odin6.lm.full.1hr.pm2.5,newdata = all_merged.1hr)
all_merged.1hr$Dust.07.cal<-predict(odin7.lm.full.1hr.pm2.5,newdata = all_merged.1hr)
timePlot(all_merged.1hr,pollutant = c('PM2.5.FDMS','Dust.05.cal','Dust.06.cal','Dust.07.cal'),group = TRUE)

timeVariation(all_merged.1hr,pollutant = c('PM2.5.FDMS','Dust.05.cal','Dust.06.cal','Dust.07.cal'))